www.pudn.com > sphinx_recipe.zip > AddBackoffWeights.pl
#!/usr/bin/perl
# CMU lm3g2dmp utility isn't happy with a backoff bigram
# that doesn't have backoff weights for all unigrams
# (which exist because there were no bigrams for that
# word that survived the cutoff value).
#
# Adds back in a -99.0 backoff weight to shut it up.
#
# Copyright 2006 by Keith Vertanen
#
use strict;
if ( @ARGV < 1 )
{
print "$0 ";
exit(1);
}
my $lmFile;
my $line;
my $inUnigram;
my $doneUnigram;
my @chunks;
($lmFile) = @ARGV;
open(IN, $lmFile);
while ($line = )
{
$line =~ s/[\n\r]//g;
if ((!$doneUnigram) &&(!$inUnigram) && (index($line, "1-grams:") != -1))
{
$inUnigram = 1;
}
else
{
if (index($line, "2-grams:") != -1)
{
$inUnigram = 0;
$doneUnigram = 1;
}
else
{
if ($inUnigram)
{
@chunks = split(/\s{1,}/, $line);
if ((@chunks == 2) && (length($chunks[0]) > 1))
{
$line = $line . "\t-99.0";
}
}
}
}
print $line . "\n";
}
close(IN);