www.pudn.com > sphinx_recipe.zip > convert_code.sh
# Converts all the WSJ0 and WSJ1 training and test data from the
# original compressed form, to normal sphere headed wave
# files that can be coded by Sphinx.
cd $WSJ_ROOT
# Clean up any old files
rm -f $CMU_WSJ/wv1_files.txt
# Create some directories we'll need
mkdir -p $CMU_WSJ/feat
mkdir -p $CMU_WSJ/wav
mkdir -p $CMU_WSJ/logdir
# Only get the files in the training and test directories.
#find -iname *.wv1 | grep -E "_tr_|_TR_|_dt_|_ET_" >$CMU_WSJ/wv1_files.txt
# Changed to only get WSJ0 and WSJ1 directories
find ./wsj0 -iname *wv1 | grep -E "_tr_|_TR_|_dt_|_ET_" >$CMU_WSJ/wv1_files.txt
find ./wsj1 -iname *wv1 | grep -E "_tr_|_TR_|_dt_|_ET_" >>$CMU_WSJ/wv1_files.txt
# Create a config file for use with the coding script
perl $CMU_SCRIPTS/CreateConfig.pl $CMU_WSJ/etc/code_template.cfg CMU_ROOT $CMU_ROOT BASE_DIR wsj >$CMU_WSJ/temp_code.cfg
# We don't really need to keep the uncompressed WSJ data
# around, we just want the feature vectors. We'll do
# the data in chunks, once we code the data
# we can delete the wav files.
I=0
CHUNKS=500
while [ $I -le $CHUNKS ];
do
echo Converting and coding chunk ${I} of ${CHUNKS}...
perl $CMU_SCRIPTS/OutputEvery.pl $CMU_WSJ/wv1_files.txt $CHUNKS $I >$CMU_WSJ/temp_wv1_files.txt
# Call the script that builds the directory structure and converts
# each file.
perl $CMU_SCRIPTS/ConvertAndCopy.pl $CMU_WSJ/temp_wv1_files.txt $CMU_WSJ/wav $CMU_WSJ/feat
perl $CMU_SCRIPTS/StripText.pl $CMU_WSJ/temp_wv1_files.txt ./ .wv1 .WV1 >$CMU_WSJ/temp.fileids
cd $CMU_WSJ
# Code up all the files
perl $CMU_WSJ/scripts_pl/make_feats.pl -cfg $CMU_WSJ/temp_code.cfg -ctl $CMU_WSJ/temp.fileids
cd $WSJ_ROOT
# Remove the following two lines if you want to keep the
# uncompressed wave files around.
rm -f -r $CMU_WSJ/wav
mkdir $CMU_WSJ/wav
I=$(( $I + 1 ))
done
rm $CMU_WSJ/temp_wv1_files.txt
rm $CMU_WSJ/temp_code.cfg
rm $CMU_WSJ/temp.fileids