www.pudn.com > sphinx_recipe.zip > go_all.sh
# Prepares and trains a Sphinx acoustic model using
# WSJ0 and WSJ1 training data.
#
# Paramters:
# $1 - "wsj_si84" to train just on SI-84 data (default)
# "wsj_si284" to train just on SI-284 data
# "wsj_all" to train on all WSJ training data
# $2 - how many Gaussians to mix up to (continuous models), 32 default
# $3 - number of Gaussians to start training from (0 for new run)
# $4 - "skipinit" if we want to just train and eval using a
# different training data subset (you need to
# have previosly run without this option)
#
# Copyright 2006 by Keith Vertanen
############################################################
# These config files control the paramteres of the model
# built and how the decoder works.
# Continuous 8000 senones 3-state model and Nov'92 test set
CONFIG_TRAIN="etc/config_train_cont_8000"
CONFIG_DECODE="etc/config_decode_nov92_cont_8000"
# Evaluate on si_dt_s2 using gigaword LM
# CONFIG_DECODE="etc/config_decode_si_dt_s2_cont_8000"
# Save the parameters off in named variables
if [[ $1 == "" ]]
then
TRAIN_DATA="wsj_si84"
else
TRAIN_DATA=$1
fi
if [[ $2 == "" ]]
then
MIX_TO=32
else
MIX_TO=$2
fi
if [[ $3 == "" ]]
then
START_AT=0
else
START_AT=$3
fi
echo "Environment variables:"
echo "WSJ0_DIR = $WSJ0_DIR"
echo "WSJ1_DIR = $WSJ1_DIR"
echo "WSJ_ROOT = $WSJ_ROOT"
echo "CMU_SCRIPTS = $CMU_SCRIPTS"
echo "CMU_COMMON = $CMU_COMMON"
echo "CMU_ROOT = $CMU_ROOT"
echo "CMU_ALIGN = $CMU_ALIGN"
echo "CMU_RM1 = $CMU_RM1"
echo "CMU_WSJ = $CMU_WSJ"
echo ""
echo "Mix up to = $MIX_TO"
echo "Continue from = $START_AT"
echo "Training on = $TRAIN_DATA"
echo ""
if [[ $4 == "skipinit" ]]
then
echo "Skipping over initial steps, going straight to training..."
else
echo "Converting and coding training/test data..."
#convert_code.sh
echo "Prepping training scripts..."
#prep_training.sh
echo "Prepping test scripts..."
#prep_test.sh
echo "Preparing phone list and dictionary..."
#prep_dict.sh
echo "Preparing language model..."
#prep_lm.sh
echo "Making initial transcriptions..."
#make_trans.sh
echo "Making test set transcriptions..."
#make_test_trans.sh
echo "Aligning with previous models..."
#align_trans.sh
fi
echo "Training using $TRAIN_DATA data..."
train.sh ${CONFIG_TRAIN} ${TRAIN_DATA} ${MIX_TO} ${START_AT}
echo "Evaluating on test set..."
eval.sh ${CONFIG_DECODE} ${TRAIN_DATA} DB_NAME ${TRAIN_DATA} CONT_NUM_DENSITIES ${MIX_TO}