www.pudn.com > sphinx_recipe.zip > prep_lm.sh



# Prepares the base LM for evaluating Nov'92 test set.

gunzip -d -c $WSJ0_DIR/WSJ0/LNG_MODL/BASE_LM/BCB05CNP.Z >etc/wsj0_5k_cnp

# Create the binary dump form
$CMU_ROOT/bin/lm3g2dmp etc/wsj0_5k_cnp  etc

# Produce the 5K dictionary used for decoding.

# Get rid of the MIT comment lines from the top
grep -v "#" $WSJ0_DIR/WSJ0/LNG_MODL/VOCAB/WLIST5C.NVP >dict_temp

# Add pronunciations for each word
perl $CMU_SCRIPTS/WordsToDictionary.pl dict_temp etc/wsj_all.dic etc/wsj0_5k_cnp.dic

rm -f dict_temp

cp etc/wsj_all.filler etc/wsj0_5k_cnp.filler