www.pudn.com > sphinx_recipe.zip > prep_training.sh
# Prepares scripts and config files for the training data. # Also creates the SI-284 and SI-84 training subsets. cd feat find -iname *.mfc | grep -i "_tr_" >mfc_files.txt # Create a script file with all the training data perl $CMU_SCRIPTS/StripText.pl mfc_files.txt ./ .mfc >../etc/wsj_all_train.fileids # Create a SI-284 only training file by combining # the SI-84 and SI-200 indexes. cat $WSJ0_DIR/WSJ0/DOC/INDICES/TRAIN/TR_S_WV1.NDX >$CMU_WSJ/si284.ndx cat $WSJ1_DIR/doc/indices/si_tr_s.ndx >>$CMU_WSJ/si284.ndx perl $CMU_SCRIPTS/PruneWithIndex.pl si_tr_s mfc_files.txt $CMU_WSJ/si284.ndx mfc_si284_files.txt >prune.log perl $CMU_SCRIPTS/StripText.pl mfc_si284_files.txt ./ .mfc >../etc/wsj_si284_train.fileids # Create a SI-84 only training file cat $WSJ0_DIR/WSJ0/DOC/INDICES/TRAIN/TR_S_WV1.NDX >$CMU_WSJ/si84.ndx perl $CMU_SCRIPTS/PruneWithIndex.pl si_tr_s mfc_files.txt $CMU_WSJ/si84.ndx mfc_si84_files.txt >>prune.log perl $CMU_SCRIPTS/StripText.pl mfc_si84_files.txt ./ .mfc >../etc/wsj_si84_train.fileids