www.pudn.com > sphinx_recipe.zip > MergeDict.pl


#!/usr/bin/perl

# Merges two dictionaries and outputs in alphabetical order.
#
# Copyright 2005 by Keith Vertanen
#

use strict;

if ( @ARGV < 2 )
{
    print "$0  \n";
    exit(1);
}

my $dict1;
my $dict2;
my $makeUpper;

($dict1, $dict2) = @ARGV;

open(IN, $dict1);

my $line;
my $pos;
my $rest;
my $word;
my $pos2;
my %words;
my $firstChar;
my $newPart;

# Read in the first dictionary
while ($line = ) 
{
    if (index($line, "#") != 0) 
    {
	$pos = index($line, " ");
	$pos2 = index($line, "\t");

	if (($pos2 != -1) && ($pos2 < $pos))
	{
	    $pos = $pos2;
	}

	$word = substr($line, 0, $pos);

	if (length($word) > 0)
	{
	    $rest = substr($line, $pos + 1);
	    $rest =~ s/[\n\r]//g;
	    $rest = uc($rest);
	}

	# We may already have a pronunciation for this word
	# so we'll just add a second line to the output part.
	$newPart = $word . "\t" . $rest . "\n";
	
	# Only add if we don't have something identical for this word
	if (index($words{$word}, $newPart) == -1)
	{
	    $words{$word} = $words{$word} . $word . "\t" . $rest . "\n";
	}
    }
}

close IN;

open(IN, $dict2);

# Read in the second dictionary
while ($line = ) 
{
    if (index($line, "#") != 0) 
    {
	$pos = index($line, " ");
	$pos2 = index($line, "\t");

	if (($pos2 != -1) && ($pos2 < $pos))
	{
	    $pos = $pos2;
	}

	$word = substr($line, 0, $pos);

	if (length($word) > 0)
	{
	    $rest = substr($line, $pos + 1);
	    $rest =~ s/[\n\r]//g;
	    $rest = uc($rest);
	}

	# We may already have a pronunciation for this word
	# so we'll just add a second line to the output part.
	$newPart = $word . "\t" . $rest . "\n";
	
	# Only add if we don't have something identical for this word
	if (index($words{$word}, $newPart) == -1)
	{
	    $words{$word} = $words{$word} . $word . "\t" . $rest . "\n";
	}
    }
}

foreach $word (sort keys %words)
{
    if (length($words{$word}) > 0)
    {
	print $words{$word};
    }
}

close IN;