www.pudn.com > sphinx_recipe.zip > go.sh



# Prepares and trains a Sphinx acoustic model using
# WSJ0 and WSJ1 training data.
#
# Paramters:
#   $1    - "wsj_si84"   to train just on SI-84 data (default)
#           "wsj_si284"  to train just on SI-284 data
#           "wsj_all"    to train on all WSJ training data
#
#   $2-$4 - "skipinit"   if we want to just train and eval using a 
#                        different training data subset (you need to
#                        have previosly run without this option) 
#         - "skiptrain"  skip the training portion of script
#         - "skipeval"   skip the evaluation portion of script
#
# Copyright 2006 by Keith Vertanen

cd $CMU_WSJ

# These config files control the paramteres of the model
# built and how the decoder works.

# Continuous 8000 senones 3-state model and Nov'92 test set
CONFIG_TRAIN="etc/config_train_cont_8000"
CONFIG_DECODE="etc/config_decode_nov92_cont_8000"

# Train a semi-continuous 5-state model 
#CONFIG_TRAIN="etc/config_train_semi_8000"

# Sphinx-3 decoding of semi-continuous models
#CONFIG_DECODE="etc/config_decode_nov92_semi_8000"

# Sphinx-2 decoding of semi-continuous models
#CONFIG_DECODE="etc/config_decode_nov92_semi_8000_s2"

# Evaluate on si_dt_s2 using gigaword LM
#CONFIG_DECODE="etc/config_decode_si_dt_s2_cont_8000"

# Save the parameters off in named variables
if [[ $1 == "" ]]
then
    TRAIN_DATA="wsj_si84"
else
    TRAIN_DATA=$1
fi

echo "Environment variables:"
echo "WSJ0_DIR        = $WSJ0_DIR"
echo "WSJ1_DIR        = $WSJ1_DIR"
echo "WSJ_ROOT        = $WSJ_ROOT"
echo "CMU_SCRIPTS     = $CMU_SCRIPTS"
echo "CMU_COMMON      = $CMU_COMMON"
echo "CMU_ROOT        = $CMU_ROOT"
echo "CMU_ALIGN       = $CMU_ALIGN"
echo "CMU_RM1         = $CMU_RM1"
echo "CMU_WSJ         = $CMU_WSJ"
echo ""
echo "Training on     = $TRAIN_DATA"
echo "Training config = $CONFIG_TRAIN"
echo "Decoding config = $CONFIG_DECODE"
echo ""

if [[ $2 == "skipinit" || $3 == "skipinit" || $4 == "skipinit" ]]
then

echo "Skipping over initial steps..."

else

echo "Converting and coding training/test data..."
convert_code.sh

echo "Prepping training scripts..."
prep_training.sh

echo "Prepping test scripts..."
prep_test.sh

echo "Preparing phone list and dictionary..."
prep_dict.sh

echo "Preparing language model..."
prep_lm.sh

echo "Making initial transcriptions..."
make_trans.sh

echo "Making test set transcriptions..."
make_test_trans.sh

echo "Aligning with previous models..."
align_trans.sh

fi

if [[ $2 == "skiptrain" || $3 == "skiptrain" || $4 == "skiptrain" ]]
then

echo "Skipping training..."

else

echo "Training using $TRAIN_DATA data..."
train.sh ${CONFIG_TRAIN} ${TRAIN_DATA}

fi

if [[ $2 == "skipeval" || $3 == "skipeval" || $4 == "skipeval" ]]
then

echo ""
echo "Skipping evaluation..."

else

echo ""
echo "Evaluating on test set..."
eval.sh ${CONFIG_DECODE} ${TRAIN_DATA} DB_NAME ${TRAIN_DATA}

fi