www.pudn.com > sphinx_recipe.zip > PruneToIntersection.pl


#!/usr/bin/perl

# Prunes a file ID list and transcription file (in 1-1 coorespondence)
# to contain only files listed in another file ID list.
#
# Copyright 2005 by Keith Vertanen
#

use strict;

if ( @ARGV < 5 )
{
    print "$0     \n"; 
    exit(1);
}

my $fileID;
my $fileTrans;
my $fileInter;
my $fileOutTrans;
my $fileOutID;
my $line;
my %ids;
my $lineID;
my $lineTrans;

($fileID, $fileTrans, $fileInter, $fileOutID, $fileOutTrans) = @ARGV;

# Read in all the IDs from the intersection file into a hash
open(IN, $fileInter);
while ($line = ) 
{
    $line =~ s/\n//g;
    $line =~ s/\r//g;
    
    $ids{uc($line)} = 1;

#    print $line . "\n";
}
close IN;

open(OUT_ID, ">" . $fileOutID);
open(OUT_TRANS, ">" . $fileOutTrans);
open(IN_ID, $fileID);
open(IN_TRANS, $fileTrans);

while ($lineID = )
{
    $lineTrans = ;
 
#print $lineTrans . "\n";
   
    $lineID 	=~ s/[\n\r]//g;
    $lineTrans 	=~ s/[\n\r]//g;

#print $lineID . "\n";
    
    if ($ids{uc($lineID)})
    {
	print OUT_ID $lineID . "\n";
	print OUT_TRANS $lineTrans . "\n";
    }
}

close(OUT_ID);
close(OUT_TRANS);
close(IN_ID);
close(IN_TRANS);