www.pudn.com > sphinx_recipe.zip > make_test_trans.sh



# Make the test transcript files for use with Sphinx

####################
# Nov'92 transcripts

cd $WSJ0_DIR

# Create a file that contains the filename of all the transcription files
find -iname *.dot >$CMU_WSJ/dot_files.txt

perl $CMU_SCRIPTS/CreateTrans.pl $CMU_WSJ/etc/wsj_test_all.fileids $CMU_WSJ/dot_files.txt $CMU_WSJ/etc/wsj_all.dic $CMU_WSJ/etc/wsj_test_all.transcription $CMU_WSJ/etc/wsj_test_all_pruned.fileids 0 1 $CMU_WSJ/test_all_missing.txt >$CMU_WSJ/test_all_missing.log

# Create the Nov'92 transcription file
perl $CMU_SCRIPTS/CreateTrans.pl $CMU_WSJ/etc/wsj_test_nov92.fileids $CMU_WSJ/dot_files.txt $CMU_WSJ/etc/wsj_all.dic $CMU_WSJ/etc/wsj_test_nov92.transcription $CMU_WSJ/etc/wsj_test_nov92_pruned.fileids 0 1 $CMU_WSJ/test_nov92_missing.txt >>$CMU_WSJ/test_nov92_missing.log

# The pruned files should be identical since we aren't eliminating OOVs.
# But we'll copy then over the old files since they are in the same
# order as the transcript file which is required for sclite to work
# when we score the output.
cp $CMU_WSJ/etc/wsj_test_all_pruned.fileids $CMU_WSJ/etc/wsj_test_all.fileids
cp $CMU_WSJ/etc/wsj_test_nov92_pruned.fileids $CMU_WSJ/etc/wsj_test_nov92.fileids
rm -f $CMU_WSJ/etc/wsj_test_all_pruned.fileids
rm -f $CMU_WSJ/etc/wsj_test_nov92_pruned.fileids

##############################
# Hub 2 si_dt_s2 SJM sentences
cd $WSJ1_DIR

# Create a file that contains the filename of all the transcription files
find -iname *.dot >$CMU_WSJ/dot_files.txt

perl $CMU_SCRIPTS/CreateTrans.pl $CMU_WSJ/etc/wsj_test_si_dt_s2.fileids $CMU_WSJ/dot_files.txt $CMU_WSJ/etc/wsj_all.dic $CMU_WSJ/etc/wsj_test_si_dt_s2.transcription $CMU_WSJ/etc/wsj_test_si_dt_s2_pruned.fileids 0 1 $CMU_WSJ/test_si_dt_s2_missing.txt >$CMU_WSJ/test_si_dt_s2_missing.log

# The pruned files should be identical since we aren't eliminating OOVs
cp $CMU_WSJ/etc/wsj_test_si_dt_s2_pruned.fileids $CMU_WSJ/etc/wsj_test_si_dt_s2.fileids
rm -f $CMU_WSJ/etc/wsj_test_si_dt_s2_pruned.fileids