www.pudn.com > sphinx_recipe.zip > make_test_trans.sh
# Make the test transcript files for use with Sphinx #################### # Nov'92 transcripts cd $WSJ0_DIR # Create a file that contains the filename of all the transcription files find -iname *.dot >$CMU_WSJ/dot_files.txt perl $CMU_SCRIPTS/CreateTrans.pl $CMU_WSJ/etc/wsj_test_all.fileids $CMU_WSJ/dot_files.txt $CMU_WSJ/etc/wsj_all.dic $CMU_WSJ/etc/wsj_test_all.transcription $CMU_WSJ/etc/wsj_test_all_pruned.fileids 0 1 $CMU_WSJ/test_all_missing.txt >$CMU_WSJ/test_all_missing.log # Create the Nov'92 transcription file perl $CMU_SCRIPTS/CreateTrans.pl $CMU_WSJ/etc/wsj_test_nov92.fileids $CMU_WSJ/dot_files.txt $CMU_WSJ/etc/wsj_all.dic $CMU_WSJ/etc/wsj_test_nov92.transcription $CMU_WSJ/etc/wsj_test_nov92_pruned.fileids 0 1 $CMU_WSJ/test_nov92_missing.txt >>$CMU_WSJ/test_nov92_missing.log # The pruned files should be identical since we aren't eliminating OOVs. # But we'll copy then over the old files since they are in the same # order as the transcript file which is required for sclite to work # when we score the output. cp $CMU_WSJ/etc/wsj_test_all_pruned.fileids $CMU_WSJ/etc/wsj_test_all.fileids cp $CMU_WSJ/etc/wsj_test_nov92_pruned.fileids $CMU_WSJ/etc/wsj_test_nov92.fileids rm -f $CMU_WSJ/etc/wsj_test_all_pruned.fileids rm -f $CMU_WSJ/etc/wsj_test_nov92_pruned.fileids ############################## # Hub 2 si_dt_s2 SJM sentences cd $WSJ1_DIR # Create a file that contains the filename of all the transcription files find -iname *.dot >$CMU_WSJ/dot_files.txt perl $CMU_SCRIPTS/CreateTrans.pl $CMU_WSJ/etc/wsj_test_si_dt_s2.fileids $CMU_WSJ/dot_files.txt $CMU_WSJ/etc/wsj_all.dic $CMU_WSJ/etc/wsj_test_si_dt_s2.transcription $CMU_WSJ/etc/wsj_test_si_dt_s2_pruned.fileids 0 1 $CMU_WSJ/test_si_dt_s2_missing.txt >$CMU_WSJ/test_si_dt_s2_missing.log # The pruned files should be identical since we aren't eliminating OOVs cp $CMU_WSJ/etc/wsj_test_si_dt_s2_pruned.fileids $CMU_WSJ/etc/wsj_test_si_dt_s2.fileids rm -f $CMU_WSJ/etc/wsj_test_si_dt_s2_pruned.fileids