www.pudn.com > sphinx_recipe.zip > PruneToIntersection.pl
#!/usr/bin/perl
# Prunes a file ID list and transcription file (in 1-1 coorespondence)
# to contain only files listed in another file ID list.
#
# Copyright 2005 by Keith Vertanen
#
use strict;
if ( @ARGV < 5 )
{
print "$0 \n";
exit(1);
}
my $fileID;
my $fileTrans;
my $fileInter;
my $fileOutTrans;
my $fileOutID;
my $line;
my %ids;
my $lineID;
my $lineTrans;
($fileID, $fileTrans, $fileInter, $fileOutID, $fileOutTrans) = @ARGV;
# Read in all the IDs from the intersection file into a hash
open(IN, $fileInter);
while ($line = )
{
$line =~ s/\n//g;
$line =~ s/\r//g;
$ids{uc($line)} = 1;
# print $line . "\n";
}
close IN;
open(OUT_ID, ">" . $fileOutID);
open(OUT_TRANS, ">" . $fileOutTrans);
open(IN_ID, $fileID);
open(IN_TRANS, $fileTrans);
while ($lineID = )
{
$lineTrans = ;
#print $lineTrans . "\n";
$lineID =~ s/[\n\r]//g;
$lineTrans =~ s/[\n\r]//g;
#print $lineID . "\n";
if ($ids{uc($lineID)})
{
print OUT_ID $lineID . "\n";
print OUT_TRANS $lineTrans . "\n";
}
}
close(OUT_ID);
close(OUT_TRANS);
close(IN_ID);
close(IN_TRANS);