www.pudn.com > sphinx_recipe.zip > PruneToAligned.pl
#!/usr/bin/perl
# Given a transcript that has been force aligned, prune a list of file ids
# to cooresponding 1-1 with the transcripts. This elimiantes any files
# that failed to get aligned.
#
# The utterances should have been listed in the same order in both files
# before the alignment and we assume this ordering in the pruning.
#
# Copyright 2006 by Keith Vertanen
#
use strict;
if ( @ARGV < 2 )
{
print "$0 \n";
exit(1);
}
my $transFile;
my $i;
my $pos;
my $idFile;
my $idLine;
my $id;
my $transLine;
my $transID;
my $idLineUpper;
($transFile, $idFile) = @ARGV;
open(IN_TRANS, $transFile);
open(IN_IDS, $idFile);
$transLine = uc();
$transLine =~ s/[\n\r]//g;
# There should be more lines in the id file than in the transcription file
while ($idLine = )
{
$idLine =~ s/[\n\r]//g;
$idLineUpper = uc($idLine);
# Line from ID is like:
# SI_TR_S/40O/40OC0209
$id = substr($idLineUpper, rindex($idLineUpper, "/") + 1);
# print "id = $id\n";
# See if our current transcript line matches this ID
if (index($transLine, "(" . $id . ")") != -1)
{
print $idLine . "\n";
# Advance to the next transcript line
$transLine = uc();
$transLine =~ s/[\n\r]//g;
}
}
close IN_TRANS;
close IN_IDS;