www.pudn.com > sphinx_recipe.zip > GetBetween.pl


#!/usr/bin/perl

# Reads in a text file and returns chunks of text that
# are on the same line but between two specified strings.
#
# Outputs the chunks on the same line separated by tabs.
# This can be used to munge result outputs into a nice
# summary table.
#
# End symbol of EOL means return until the end of the line
#
# Copyright 2006 by Keith Vertanen
#

use strict;

if ( @ARGV < 5 )
{
    print "$0      [start 2] [end 2]...\n"; 
    exit(1);
}

my $inputFile;
my $line;
my $i;
my $numPairs;
my @start;
my @end;
my @result;
my $description;
my $posStart;
my $posEnd;
my $afterStart;
my $found = 0;
my $outPerLine = 0;

($inputFile) = $ARGV[0];
($outPerLine) = $ARGV[1];
($description) = $ARGV[2];

$i = 3;
while ($i < @ARGV - 1)
{
    $start[$numPairs]  = $ARGV[$i];
    $end[$numPairs]    = $ARGV[$i + 1];

#    print "start = '" . $start[$numPairs] . "', end = '" . $end[$numPairs] . "'\n";

    $numPairs++;

    $i = $i + 2;
}


open(IN, $inputFile);
while ($line = ) 
{
    $line =~ s/\n//g;
    $line =~ s/\r//g;

    $found = 0;

    for ($i = 0; $i < $numPairs; $i++)
    {
	$posStart = -1;
	$posEnd   = -1;

#	print "Searching for: " . $start[$i] . "\n";

	$posStart = index($line, $start[$i]);


#	print "\"" . $line . "\", posStart = " . $posStart . "\n";

	if ($end[$i] =~ /EOL/)
	{
	    if ($posStart != -1 )
	    {
		# Return result until the end of the current line
		$result[$i] = substr($line, $posStart + length($start[$i]));
		$found = 1;
	    }
	}
	else
	{

	    if ($posStart != - 1)
	    {
		$afterStart = substr($line, $posStart + length($start[$i]));
		$posEnd     = index($afterStart, $end[$i]);
		if ($posEnd != -1)
		{
		    $result[$i] = substr($afterStart, 0, $posEnd);
		}
		$found = 1;
	    }
	}
    }		      

    if (($outPerLine) && ($found))
    {
	print $description . "\t";

	for ($i = 0; $i < $numPairs; $i++)
	{
	    if (length($result[$i]) > 0)
	    {
		print $result[$i] . "\t";
	    }
	}
	print "\n";
    }

}
close IN;

if (!$outPerLine)
{
    print $description . "\t";

    for ($i = 0; $i < $numPairs; $i++)
    {
	if (length($result[$i]) > 0)
	{
	    print $result[$i] . "\t";
	}
    }
    print "\n";
}