www.pudn.com > sphinx_recipe.zip > prep_training.sh



# Prepares scripts and config files for the training data.
# Also creates the SI-284 and SI-84 training subsets.

cd feat
find -iname *.mfc | grep -i "_tr_" >mfc_files.txt

# Create a script file with all the training data
perl $CMU_SCRIPTS/StripText.pl mfc_files.txt ./ .mfc >../etc/wsj_all_train.fileids

# Create a SI-284 only training file by combining
# the SI-84 and SI-200 indexes.
cat $WSJ0_DIR/WSJ0/DOC/INDICES/TRAIN/TR_S_WV1.NDX >$CMU_WSJ/si284.ndx
cat $WSJ1_DIR/doc/indices/si_tr_s.ndx >>$CMU_WSJ/si284.ndx
perl $CMU_SCRIPTS/PruneWithIndex.pl si_tr_s mfc_files.txt $CMU_WSJ/si284.ndx mfc_si284_files.txt >prune.log
perl $CMU_SCRIPTS/StripText.pl mfc_si284_files.txt ./ .mfc >../etc/wsj_si284_train.fileids

# Create a SI-84 only training file 
cat $WSJ0_DIR/WSJ0/DOC/INDICES/TRAIN/TR_S_WV1.NDX >$CMU_WSJ/si84.ndx
perl $CMU_SCRIPTS/PruneWithIndex.pl si_tr_s mfc_files.txt $CMU_WSJ/si84.ndx mfc_si84_files.txt >>prune.log
perl $CMU_SCRIPTS/StripText.pl mfc_si84_files.txt ./ .mfc >../etc/wsj_si84_train.fileids