www.pudn.com > sphinx_recipe.zip > AddBackoffWeights.pl


#!/usr/bin/perl

# CMU lm3g2dmp utility isn't happy with a backoff bigram
# that doesn't have backoff weights for all unigrams 
# (which exist because there were no bigrams for that
# word that survived the cutoff value).
#
# Adds back in a -99.0 backoff weight to shut it up.
#
# Copyright 2006 by Keith Vertanen
#

use strict;

if ( @ARGV < 1 )
{
    print "$0 ";
    exit(1);
}

my $lmFile;
my $line;
my $inUnigram;
my $doneUnigram;
my @chunks;

($lmFile) = @ARGV;

open(IN, $lmFile);
while ($line = )
{
    $line =~ s/[\n\r]//g;

    if ((!$doneUnigram) &&(!$inUnigram) && (index($line, "1-grams:") != -1))
    {
	$inUnigram = 1;
    }
    else
    {
	if (index($line, "2-grams:") != -1)
	{
	    $inUnigram   = 0;
	    $doneUnigram = 1;
	}
	else
	{
	    if ($inUnigram)
	    {
		@chunks = split(/\s{1,}/, $line);
		if ((@chunks == 2) && (length($chunks[0]) > 1))
		{
		    $line = $line . "\t-99.0";
		}
	    }
	}

    }

    print $line . "\n";
}

close(IN);