www.pudn.com > vlc.rar > vlc.cpp


/*----------------------------------------------------------------------------------------*
 *                                   IT++                                                 *
 *----------------------------------------------------------------------------------------*
 *        Copyright (c) 1995-2004 by Tony Ottosson, Thomas Eriksson, Pĺl Frenger,         *
 *        Tobias Ringström, and Jonas Samuelsson.                                         *
 *                                                                                        *
 *        Permission to use, copy, modify, and distribute this software and its           *
 *        documentation under the terms of the GNU General Public License is hereby       *
 *        granted. No representations are made about the suitability of this              *
 *        software for any purpose. It is provided "as is" without expressed or           *
 *        implied warranty. See the GNU General Public License for more details.          *
 *----------------------------------------------------------------------------------------*/

/*----------------------------------------------------------------------------------------*
 *        Variable Length codes                                                           *
 *----------------------------------------------------------------------------------------*
 *        This code is Copyright (C) 2004 by Hervé Jégou and Gurvan Le Dromaguet          *
 *        Email : firstname.lastname@irisa.fr                                             *
 *----------------------------------------------------------------------------------------*
 *                                           DISCLAIMER                                   *
 *                                                                                        *
 * This software package is free software ; you can redistribute it and/or modify it      *
 * under the terms of the GNU General Public License as published by the Free Software    * 
 * Foundation ; either version 2 of the License, or any later version.                    *
 * The software package is distributed in the hope that it will be useful, but WITHOUT    *
 * ANY WARRANTY ; without even the implied warranty of MERCHANTABILITY or FITNESS FOR     * 
 * A PARTICULAR PURPOSE. See the GNU General Public License for more details.             *
 *                                                                                        *
 *----------------------------------------------------------------------------------------*/

/*! 
  \file 
  \brief  Variable Length Codes implementation
  \author Hervé Jégou

  1.0

  2004/04/01 14:02:00
*/

#include 
#include 
#include 
#include "vlc.h"
#include "sourcefunc.h"
#include "base/matfunc.h"

#include "itconfig.h"  // Just in case, if functions max and min are not defined

#define DUMM_NODE  -1

namespace itpp
{

//--------------------------------------------------------------
// Variable Length Code. Basic class
//--------------------------------------------------------------

//--------------------------------------------------------------
// Since the base class for VLC can not be used, there is nothing to do there
VLC::VLC()
{
  nb_symbols = 0;
}

VLC::~VLC()
{
}

//--------------------------------------------------------------
VLC::VLC( int nb_symbols )
{
  setup( nb_symbols );
}


//--------------------------------------------------------------
VLC::VLC( const vec & pdf, const vec & symbols )
{
  setup_source( pdf, symbols );
}


//--------------------------------------------------------------
// Copy constructor
VLC::VLC( const VLC & vlc )
{
  copy( vlc );
}


//--------------------------------------------------------------
bool VLC::is_init() const
{
  // Perhaps this list is not exhaustive, but it should be sufficient.
  if( map.rows() <= get_nb_symbols() 
      || nb_symbols == 0
      || codeslength.length() <= get_nb_symbols()
      || codes.length() <= get_nb_symbols() )
    return false;
  else
    return vlc_init;
}


//--------------------------------------------------------------
const VLC & VLC::copy( const VLC & vlc )
{
  symbols = vlc.symbols;
  codes = vlc.codes;
  codeslength = vlc.codeslength;
  probas = vlc.probas;
  nb_symbols = vlc.nb_symbols;
  map = vlc.map;
  minh = vlc.minh;
  maxh = vlc.maxh;

  return *this;
}


const VLC & VLC::operator = ( const VLC & vlc )
{
  return copy( vlc );
}


//--------------------------------------------------------------
// Initialization of the internal variables
void VLC::setup( int nb_symbols )
{
  this->nb_symbols = nb_symbols;
  nb_nodes = 2 * get_nb_symbols() - 1;

  // By default, the root node is the last node of the node set
  node_root = get_nb_nodes() - 1;

  // Keep the definition of the symbols value while resizing the variable symbol
  map.set_size( get_nb_nodes(), 2 ); 
  map = -1;
}


//--------------------------------------------------------------
// Initialization of the internal variables
void VLC::setup_source( const vec & pdf, 
			const vec & symbols )
{
  // The symbol list is extended so that the nodes of the VLC tree
  // are given a symbol value corresponding to the optimal reconstruction (expectation)
  // If not symbol definition is provided, natural integers are used instead.
  // At this point, it is assumed that the kraft inequality will be verified.
  if( symbols.length() < 1 )
    this->symbols = cumsum( ones( pdf.length() ) ) - 1;
  else
    this->symbols = symbols;

  it_assert( this->symbols.length() == pdf.length(),
	     "The number of symbols must be equal to the number of elements in the pdf" );

  setup( pdf.length() );

  // Variable symbols is extended to internal nodes
  this->symbols.set_size( get_nb_nodes(), true );

  // For normalization of probabilities, the proba law is normalized by its sum
  // The size of probas is increased so that the nodes probas can be stored
  // and the node probabilities (excluding leaves) are initialized to zeros.
  it_assert( is_valid_pdf( pdf ), "Probability law is not a valid pdf" );

  this->probas = pdf;
  this->probas.set_length( get_nb_nodes(), true );
}



//--------------------------------------------------------------
int VLC::get_code( int s ) const
{
  return codes[ s ];
}


//--------------------------------------------------------------
int VLC::get_length( int s ) const
{
  return codeslength[ s ];
}


//--------------------------------------------------------------
// get_symbol:
//   For leaves : the value of the symbol associated to the leaf
//   For nodes  : the expectation of the symbol
double VLC::get_symbol( int s ) const
{
  if( symbols.length() > 0 )
    return symbols[ s ];
  else
    {
      it_warning( "Source definition (variable symbols) is not initialized in function get_symbol" );
      return 0;
    }
}


//--------------------------------------------------------------
vec VLC::get_symbols() const
{
  if( symbols.length() > 0 )
    return symbols.left( get_nb_symbols() );
  else
    {
      it_warning( "Source definition (variable symbols) is not initialized in function get_symbols" );
      return vec();
    }
}


//--------------------------------------------------------------
int VLC::get_nb_symbols() const
{
  return nb_symbols;
}

//--------------------------------------------------------------
int VLC::get_nb_nodes() const
{
  return nb_nodes;
}


//--------------------------------------------------------------
int VLC::get_nb_internal_nodes() const
{
  return get_nb_nodes() - get_nb_symbols();
}

//--------------------------------------------------------------
// Probability of a node/leaf
double VLC::get_proba( int s ) const
{
  if( probas.length() == 0 )
    {
      it_warning( "Source probability is not initialized in function get_proba" );
      return 0;
    }

  if( s >= 0 && s < get_nb_nodes() )
    return probas[ s ];
  else
    {
      it_warning( "Invalid node number" );
      return 0;
    }
}


//--------------------------------------------------------------
// Length of the shortest codeword
int VLC::get_minh() const
{ 
  return minh;
}


//--------------------------------------------------------------
// Length of the longest codeword (i.e. the tree height)
int VLC::get_maxh() const
{ 
  return maxh;
}


//--------------------------------------------------------------
double VLC::get_mdl() const
{
  double mdl = 0;

  for( int s = 0 ; s < get_nb_symbols() ; s++ )
    mdl += get_length( s ) * get_proba( s );

  return mdl;
}


//--------------------------------------------------------------
// Is the node s is an internal node or a leaf
bool VLC::is_leaf( int s ) const
{ 
  return( s < get_nb_symbols() ); 
}


bool VLC::is_node( int s ) const
{ 
  return( s >= get_nb_symbols() ); 
} 


//--------------------------------------------------------------
// Return some nodes ( root of the VLC tree, and child of a given node )
int VLC::get_root() const
{ 
  return( node_root ); 
}


int VLC::get_child0( int s ) const
{ 
  return map( s, 0 ); 
} 
  

int VLC::get_child1( int s ) const
{ 
  return map( s, 1 ); 
}


//--------------------------------------------------------------
// To call this function, the map table must be correctly set
void VLC::affect_codewords()  
{
  codes.set_size( get_nb_nodes() );
  codeslength.set_size( get_nb_nodes() );

  if( symbols.length() > 0 )
    // The first symbols/probabilities are assumed to have the correct values
    symbols.set_size( get_nb_nodes(), true );

  if( probas.length() > 0 )
    probas.set_size( get_nb_nodes(), true );

  codes[ get_root() ] = 0;
  codeslength[ get_root() ] = 0;

  maxh = 0;
  minh = 1000;
  
  affect_codewords( get_root() );
  vlc_init = true;
}

//--------------------------------------------------------------
// Not only the codewords are affected, but the expectation, probability,
// and variance of the node are also processed.
vec VLC::affect_codewords( int s )
{
  // variable res contains, respectively, the probability of the node,
  // its expectation and the associated variance
  vec res( 2 ), resA, resB;
  res.zeros();

  // Symbol is a dumm symbol
  if( s == DUMM_NODE )
    return res;

  // Symbol is a leaf
  if( is_leaf( s ) )
    {
      // Update the minh and maxh values if required
      maxh = std::max( maxh, codeslength[ s ] );
      minh = std::min( minh, codeslength[ s ] );

      if( probas.length() > 0 )
	res( 0 ) = get_proba( s );   // proba

      if( probas.length() > 0 && symbols.length() > 0 )
	res( 1 ) = get_symbol( s );  // expectation

      return res;
    }

  // Otherwise, internal node
  int child0 = get_child0( s );
  int child1 = get_child1( s );

  // The child0 is a valid symbols/leaf
  if( child0 != DUMM_NODE )
    {
      codeslength[ child0 ] = codeslength[ s ] + 1;
      codes[ child0 ] = codes[ s ];
    }

  // Same story
  if( child1 != DUMM_NODE )
    {
      codeslength[ child1 ] = codeslength[ s ] + 1;
      codes[ child1 ] = codes[ s ] + ( 1 << ( codeslength[ s ] ) );
    }

  // Launch the recursion
  resA = affect_codewords( child0 );
  resB = affect_codewords( child1 ); 

  // Process the probability and expectation
  res( 0 ) = resA( 0 ) + resB( 0 );
  res( 1 ) = ( resA( 1 ) * resA( 0 ) + resB( 1 ) * resB( 0 ) ) / res( 0 );

  // By the way, store the results in the object variables (only if these values are accurate)
  if( probas.length() > 0 )
    probas[ s ] = res( 0 );

  if( probas.length() > 0 && symbols.length() > 0 ) 
    symbols[ s ] = res( 1 );

  return res;
}




//--------------------------------------------------------------
int VLC::get_bitstream_length( const ivec & S ) const
{
  // Initialization of occurence, number of times a given symbol is met in the input sequence
  ivec occurence( get_nb_symbols() );
  occurence.zeros();
  int i, KE = 0, K = S.length();

  // Calculation of the number of bits that will be generated
  // occurence contains the number of symbol of each kind that occured
  for( int t = 0 ; t < K ; t++ )
      occurence[ S[ t ] ]++;


  // Then it is possible to calculate the output number of bits
  for( i = 0 ; i < get_nb_symbols() ; i++ )
    KE += get_length( i ) * occurence[ i ];

  return( KE );
}


//--------------------------------------------------------------
ivec VLC::get_cum_bitstream_length( const ivec & S ) const
{
  int K = S.length();
  ivec cum_bs_len( K );

  cum_bs_len[ 0 ] = get_length( S[ 0 ] );

  for( int t = 1 ; t < K ; t++ )
      cum_bs_len[ t ] = cum_bs_len[ t - 1 ] + get_length( S[ t ] );

   return cum_bs_len;
}


//--------------------------------------------------------------
ivec VLC::get_cum_bitstream_length_backward( const ivec & S ) const
{
  int K = S.length();
  ivec cum_bs_len( K );

  cum_bs_len[ K - 1 ] = get_length( S[ K - 1 ] );

  for( int t = K - 2 ; t >= 0 ; t-- )
      cum_bs_len[ t ] = cum_bs_len[ t + 1 ] + get_length( S[ t ] );

   return cum_bs_len;
}


//--------------------------------------------------------------
double VLC::get_kraft_sum() const
{
  double the_sum = 0;

  for( int i = 0 ; i < get_nb_symbols() ; i++ )
    the_sum += pow( 2, -get_length( i ) );

  return the_sum;
}


//--------------------------------------------------------------
// Create the VLC code according to the Huffman algorithm
// The algorithm assumes that the probability distribution function is pdf
void VLC::huffman()
{
  it_assert( is_valid_pdf( probas ) , "The probability distribution function is not valid\nE.g., use function setup_source to provide a correct initialization." );

  int i, min1, min2;

  // Active_symbols allows to know which symbol/node must be added to the tree
  vec active_symbols( get_nb_nodes() );
  active_symbols.zeros();
  active_symbols.set_subvector( 0, get_nb_symbols() - 1, 1 );
  node_root = get_nb_nodes() - 1;

  // Construction of the tree
  for( i = get_nb_symbols() ; i < get_nb_nodes() ; i++ )
    {
      // Retrieve the two smallest probabilities. For this purpose
      // find the minimum probability among active symbols and desactivate this symbol
      min1 = min_index( probas - active_symbols );
      map( i, 0 ) = min1;
      active_symbols[ min1 ] = 0;
 
      min2 = min_index( probas - active_symbols );
      map( i, 1 ) = min2;
      active_symbols[ min2 ] = 0;

      // Process the proba of the new symbol
      probas[ i ] = probas[ min1 ] + probas[ min2 ];

      // expectation which minimizes the reconstruction error
      // Processed only if the symbol values are provided
      if( symbols.length() > 0 )
	symbols[ i ] = ( probas[ min1 ] * symbols[ min1 ]
			 + probas[ min2 ] * symbols[ min2 ] ) / ( probas[ i ] ); 

     // Let the new symbol be active and the symbols min1 and min2 be inactive
      active_symbols[ i ] = 1;
      active_symbols[ min1 ] = 0;
      active_symbols[ min2 ] = 0;
    }

  affect_codewords();
}


//--------------------------------------------------------------
// Create the VLC code according to the Hu-Tucker algorithm
// The algorithm assumes that the probability distribution function is pdf.
// Note: this implementation is not optimized in terms of computionnal cost optimization.
void VLC::hu_tucker()
{
  it_assert( is_valid_pdf( probas ) , "The probability distribution function is not valid\nE.g., use function setup_source to provide a correct initialization." );

  ivec active_symbols( get_nb_symbols() );
  active_symbols.ones();
  active_symbols = cumsum( active_symbols ) - 1;
  node_root = get_nb_nodes() - 1;

  int nrof_act = get_nb_symbols();           // To store the number of active symbols
  int i, j, k, i1, i2, node1, node2;

  // The following rule store the "merging compatibility" between nodes. 
  bmat merg_comp;

  // While some nodes have to be merged
  // n is the number of the current node
  // The loop is not optimal, since only a pair of node is aggregated in each step
  for( int n = get_nb_symbols(); n < get_nb_nodes() ; n++ )
    {
      merg_comp.set_size( nrof_act, nrof_act );
      merg_comp.ones();                       // At this point, all couples are assumed compatible
      
      // Store the probability that is the smallest among compatible nodes and the corresponding indexes
      ivec best_for_i( nrof_act );
      vec best_for_i_prob( nrof_act );
      best_for_i = -1;
      best_for_i_prob = 1.1;

      // Process the merging compatibilities
      for( i = 0 ; i < nrof_act ; i++ )
	{
	  for( j = i + 1 ; j < nrof_act ; j++ )
	    {
	      // Test if there is an original block between i and j
	      for( k = i + 1 ; k < j ; k++ )
		if( is_leaf( active_symbols[ k ] ) )
		  {
		    merg_comp( i, j ) = 0;
		    merg_comp( j, i ) = 0;
		  }
	    }
	  // A node can not be merged with itself
	  merg_comp( i, i ) = 0;
	}

      // Retrieve the couple corresponding to smallest probabilities
      for( i = 0 ; i < nrof_act ; i++ )
	for( j = 0 ; j < nrof_act ; j++ )

	  if( merg_comp( i, j ) == 1 )
	    if( get_proba( active_symbols[ j ] ) < best_for_i_prob[ i ] )
	      {
		best_for_i[ i ] = j;
		best_for_i_prob[ i ] = get_proba( active_symbols[ j ] );
	      }

      // Retrieve at least one of the candidate couples
      // The new index is assigned the index n
      i1 = -1, i2 = -1;
      for( i = 0 ; i < nrof_act && i1 < 0 ; i++ )
	for( j = 0 ; j < nrof_act && i1 < 0 ; j++ )
	  if( merg_comp( i, j ) && ( best_for_i[ i ] == j ) && ( best_for_i[ j ] == i ) )
	    {
	      i1 = i;
	      i2 = j;
	    }

      node1 = active_symbols[ i1 ];
      node2 = active_symbols[ i2 ];

      map( n, 0 ) = node1;
      map( n, 1 ) = node2;
      
      // Process the proba of the new symbol
      probas[ n ] = probas[ node1 ] + probas[ node2 ];

      // Update the list of active symbols
      active_symbols[ i1 ] = n;
      active_symbols.del( i2 );

      nrof_act--; 
    }

  affect_codewords();

  quasi_lexicographic();
}


//--------------------------------------------------------------
// Tranform the tree in order to generate a quasi-lexicographic order
void VLC::quasi_lexicographic() 
{
  // In the present form, this function only applies to full codtree
  if( get_kraft_sum() != 1 )
    {
      it_warning( "Function can not be applied to a incomplete binary codetree (kraft sum must be 1)" );
      return;
    }

  int i, l, curnode = nb_symbols, child0, child1;

  ivec tmp_idx;    // Node/Leaf index
  vec tmp_rec;     // Reconstruction value for nodes/symbols
  int nbtmpsymb;   // Number of symbols/node in this layer

  // Re-initialization of the length
  codeslength.set_subvector( get_nb_symbols(), get_nb_nodes() - 1, 0 );

  for( l = maxh ; l >= 0 ; l-- ) 
    {
      // The highest size is allocated by default
      tmp_idx.set_size( get_nb_nodes() );
      tmp_rec.set_size( get_nb_nodes() );

      // Retrieve the symbol of the current layer
      nbtmpsymb = 0;
      for( i = 0 ; i < nb_symbols * 2 - 1 ; i++ )
	if( codeslength[ i ] == l )  // Is is a node/leaf of the focused layer?
	  {
	    tmp_idx[ nbtmpsymb ] = i;
	    tmp_rec[ nbtmpsymb ] = symbols[ i ];
	    nbtmpsymb++;
	  }

      // We have to sort the vectors tmp_idx and tmp_rec according to
      // the order generated by values of tmp_rec
      ivec sort_order = sort_index( tmp_rec( 0, nbtmpsymb - 1 ) );
      ivec tmp_idx_sub = tmp_idx( sort_order );
      vec tmp_rec_sub = tmp_rec( sort_order );
      tmp_idx.set_subvector( 0, nbtmpsymb - 1, tmp_idx_sub );
      tmp_rec.set_subvector( 0, nbtmpsymb - 1, tmp_rec_sub );

      // Aggregation of symbols of the current layer by pairs
      for( i = 0 ; i < nbtmpsymb / 2 ; i++ )
	{
	  child0 = tmp_idx[ i * 2 ];
	  child1 = tmp_idx[ i * 2 + 1 ];

	  // Update the map table
	  map( curnode, 0 ) = child0;
	  map( curnode, 1 ) = child1;

	  // probas, expectation, length,  of this node
	  probas[ curnode ] = probas[ child0 ] + probas[ child1 ];
	  symbols[ curnode ] = ( probas[ child0 ] * symbols[ child0 ]
	    + probas[ child1 ] * symbols[ child1 ] ) / ( probas[ curnode ] );
	  codeslength[ curnode ] = codeslength[ child0 ] - 1;

	  // update the structure used for the next sort
	  tmp_idx[ curnode ] = curnode;
	  tmp_rec[ curnode ] = symbols[ curnode ];

	  curnode++;
	}
    }

  codes[ get_root() ] = 0;
  
  for( i = get_nb_nodes() - 1 ; i >= nb_symbols ; i-- )
    {
      child0 = map( i, 0 );
      child1 = map( i, 1 );
      codes[ child0 ] = codes[ i ];
      codes[ child1 ] = codes[ i ] + ( 1 << ( codeslength[ i ] ) );
    }
}


using std::cerr;

//--------------------------------------------------------------
// Fixed Length Code
void VLC::flc( int nb_symbols = -1 )
{
  // If default parameter is used, then we assume that the source 
  // is alreadly initialized
  if( nb_symbols > 0 )
    setup( nb_symbols );

  int child0, child1, i, node, firstnode, lastfirstnode, lmax, l;

  // Test if the number of symbols is not a power of two
  if( get_nb_symbols() << 1 != ( get_nb_symbols() ^ ( get_nb_symbols() - 1 ) ) + 1 ) 
    it_error( "The construction of FLCs requires that the number of symbols is a power of two" );
  
  double lmaxdouble = itpp::log2( get_nb_symbols() ) + 0.0000001;
  lmax = int( lmaxdouble );

  // Construction of the tree
  firstnode = 0;
  node = get_nb_symbols();
  for( l = lmax - 1 ; l >= 0 ; l-- )             // Parse the tree layer per layer
    {
      lastfirstnode = firstnode;
      firstnode = node;
      for( i = 0 ; i < ( 1 << l ) ; i++, node++ )  // i indexes a given layer
	{
          child0 = lastfirstnode + ( i * 2 );
	  child1 = lastfirstnode + ( i * 2 ) + 1;
	  map( node, 0 ) = child0;
	  map( node, 1 ) = child1;
	}
    }
  node_root = get_nb_nodes() - 1;

  affect_codewords();
}



//--------------------------------------------------------------
// Related functions
//--------------------------------------------------------------

//--------------------------------------------------------------
// The marginal probability P(O) to have a 0, according to the vlc tree
// and to the source model
double get_proba_marg_0( const VLC & vlc, int l ) 
{
  int cwd, li, nb0;
  double proba0 = 0, tot_proba = 0;
  for( int s = 0 ; s < vlc.get_nb_symbols() ; s++ )
    {
      cwd = vlc.get_code( s );
      li = vlc.get_length( s );

      // Count the number of 0 in the codeword
      nb0 = 0;

      // The proba is computed only if the symbol is long enough
      if( li > l )
	{
	  tot_proba += vlc.get_proba( s );
	  cwd >>= l;

	  // Update the proba of 0
	  if( cwd % 2 == 0 )
	    proba0 += vlc.get_proba( s );
	}
    }
  proba0 /= tot_proba;

  return proba0;
}

//--------------------------------------------------------------
// The marginal probability P(O) to have a 0, according to the vlc tree
// and to the source model
double get_proba_marg_0( const VLC & vlc ) 
{
  double proba0 = 0, totproba = 0;
  for( int l = 1 ; l <= vlc.get_maxh() ; l++ )
    {
      proba0 += get_proba_length_loweq( vlc, l ) * get_proba_marg_0( vlc, l - 1 );
      totproba += get_proba_length_loweq( vlc, l );
    }
    
  return proba0 / totproba;
}


//--------------------------------------------------------------
// Calculation of Pr( L( S_t ) <= L ), for L = 1..maxh
vec get_proba_lengths( const VLC & vlc )
{
  int l, i;
  vec lambda( vlc.get_maxh() );
  lambda.zeros();
  for( l = 0 ; l < vlc.get_minh() ; l++ )
    lambda[ l ] = 1;

  for( l = vlc.get_minh() ; l < vlc.get_maxh() ; l++ ) 
    for( i = 0 ; i < vlc.get_nb_symbols() ; i++ )
      if( vlc.get_length( i ) > l )
	lambda[ l ] += vlc.get_proba( i );

  return lambda;
}

//--------------------------------------------------------------
// The probability to have a codeword of length l
double get_proba_length( const VLC & vlc, int l )
{
  double proba = 0.0;

  for( int s = 0 ; s < vlc.get_nb_symbols() ; s++ )
    if( l == vlc.get_length( s ) )
      proba += vlc.get_proba( s );

  return proba;
}

//--------------------------------------------------------------
// The probability to have a codeword of length equal to or lower than l
double get_proba_length_loweq( const VLC & vlc, int l )
{
  double proba = 0.0;

  for( int s = 0 ; s < vlc.get_nb_symbols() ; s++ )
    if( l <= vlc.get_length( s ) )
      proba += vlc.get_proba( s );

  return proba;
}


//--------------------------------------------------------------
double delta( const VLC & vlc, int node )
{
  int child0, child1;
  double delta;

  // A leaf?
  if( node < vlc.get_nb_symbols() )
    delta = 0;
  else
    {
      child0 = vlc.get_child0( node );
      child1 = vlc.get_child1( node );
      delta = variance( vlc, node ) - ( variance( vlc, child0 ) 
					* vlc.get_proba( child0 )
	+ variance( vlc, child1 ) * vlc.get_proba( child1 ) ) 
	/ vlc.get_proba( node );
    }
  return( delta );
}


//--------------------------------------------------------------
// local recursive function
double variance( const VLC & vlc, int node, double e );

//--------------------------------------------------------------
// Return the variance associated to a subtree

double variance( const VLC & vlc, int node ) 
{
  double E, v;

  // If the node is not really a node, or is a leaf
  if( node == DUMM_NODE || vlc.is_leaf( node ) )
    return 0;

  // If the function has not returned, is is an internal node
  // The expectation of this node is retrieved hereafter
  E = vlc.get_symbol( node );
  return variance( vlc, node, E ) / vlc.get_proba( node );

  return v;
}


// By default, the variance is given for the root node
double variance( const VLC & vlc ) 
{
  return variance( vlc, vlc.get_root() );
}


//--------------------------------------------------------------
// Return the variance associated to a subtree, 
// assuming the expectation of the source is E
double variance( const VLC & vlc, int node, double E ) 
{
  double v;

  // If the node is not really a node
  if( node == DUMM_NODE )
    return 0;

  // Is that node is a symbol ?
  // Then return square error ponderated with the proba
  if( vlc.is_leaf( node ) )
    {
      v = vlc.get_symbol( node ) - E;
      v = v * v * vlc.get_proba( node );
    }

  // Internal node -> recursive call
  else
    v = variance( vlc, vlc.get_child0( node ), E ) 
      + variance( vlc, vlc.get_child1( node ), E );

  return v;
}


//--------------------------------------------------------------
// Return the variances associated to the layers of an VLC tree
vec layer_variance( const VLC & vlc )
{
  int l, s;  // l:layer, s:node
  vec var_lay( vlc.get_maxh() ); var_lay.zeros();
  vec sum_prob( vlc.get_maxh() ); sum_prob.zeros(); // Sum of probabilities for this layer

  for( s = vlc.get_nb_symbols() ; s <= vlc.get_root() ; s++ )
    {
      l = vlc.get_length( s );
      var_lay[ l ] += variance( vlc, s ) * vlc.get_proba( s );
      sum_prob[ l ] += vlc.get_proba( s );
    }

  return( var_lay );
}


//--------------------------------------------------------------
std::ostream & operator << ( std::ostream & os, const VLC & vlc )
{
  // The code is not fully defined
  if( !vlc.is_init() )
    {
      it_warning( "Display function for VLC called on uninitialized object" );
      return os;
    }

  for( int i = 0 ; i < vlc.get_nb_nodes() ; i++ )
    {
      if( vlc.is_leaf( i ) )
	os << endl << "Symbol " << i << " : \t";
      else
	os << endl << "Node " << i << " : \t";

      os << "P= " << vlc.get_proba( i ) << "\t";
      os << "E= " << vlc.get_symbol( i ) << "\t";
      os << "variance= " << variance( vlc, i ) << "\t";
      os << "delta_var= " << delta( vlc, i ) << "\t";

      int code = vlc.get_code( i );

      if( vlc.get_length( i ) == 0 )
	os << '/';
      else
      for( int c = 0 ; c < vlc.get_length( i ) ; c++ )
	{
	  char bit = ( ( ( code % 2 ) > 0 ) ? 1 : 0 );
	  code >>= 1;
	  os << int( bit ) << "";
	}
    }
  os << endl;
  return os;
}

//--------------------------------------------------------------
const VLC & VLC::load( std::istream & is )
{
  int i;

  // A pre-requisite to this function is that the source is already defined
  it_assert( get_nb_symbols() > 0, "The number of symbols must be known in function load" );

  string cwd_string;
  
  // Note that we assume that map is at least of size nb_symbols + 1
  nb_nodes = get_nb_symbols() + 1;
  map = -1;

  // For user-defined codetree, the root is indexed by integer nb_symbols
  node_root = get_nb_symbols();


  // nextnode: the index (in map_tmp) of the next node to be created
  int nextnode = node_root + 1;

  // First store the codeword length and values (integer form of codewords)
  for( i = 0 ; i < get_nb_symbols() ; i++ )
    {

      int curnode = get_root(), childnode, codlen;
      int thebit;
      is >> cwd_string;

      codlen = cwd_string.length();

      // Process the codeword integer value
      for( int b = 0 ; b < codlen ; b++ )
	{
	  if( cwd_string[ b ] == '0' )
	    thebit = 0;

	  else if( cwd_string[ b ] == '1' )
	    thebit = 1;

	  else
	    {
	      it_warning( "Problem occured while processing codeword " );
	      return *this;
	    }

	  // We have to create a pointer on the symbol i
	  if( b == codlen - 1 )
	    {
	      // This leaf is already in use
	      if( map( curnode, thebit ) != -1 )
		it_error( "Loaded code does not define a prefix code" );

	      map( curnode, thebit ) = i;
	    }

	  else
	    // We have to create the pointer on another symbol,
	    // which may be a symbol
	    {
	      // Perhaps the childnode already exists
	      childnode = map( curnode, thebit );
	  
	      // Child node is currently not defined. Define a new node
	      if( childnode < 0 )
		{
		  childnode = nextnode++;
		  // If required, increase the number of nodes
		  if( childnode >= map.rows() )
		    {
		      map.set_size( childnode + 1, 2, true );
		      map( childnode, 0 ) = -1;
		      map( childnode, 1 ) = -1;
		    }
		}

	      // Found prematuraly a symbol : not a prefix code
	      else if( childnode < get_nb_symbols() )
		it_error( "Loaded code does not define a prefix code" );


	      // Make the link to the new node
	      map( curnode, thebit ) = childnode;

	      // Go to the child node for next iteration
	      curnode = childnode;
	    }
	}
    }

  // At this point, nextnode contains the total number of nodes
  // Adjust the size
  nb_nodes = nextnode;
  map.set_size( nextnode, 2, true );

  // Process probabilities and expectations
  affect_codewords();

  return *this;
}
  
 
//--------------------------------------------------------------
const VLC & VLC::load( const std::string & codedef )
{
  // First test if s is the string identifier for a special code
  string s( codedef );

  transform (s.begin(), s.end(),    // source
	     s.begin(),             // destination
	     toupper);              // operation

  if( s == "HUFFMAN" || s == "HUF" || s == "HUFF" )
    {
      huffman();
      return *this;
    }

  else if( s == "HUFFMANLEX" || s == "HUFFMAN LEX" || s == "HUFFMAN LEXICOGRAPHIC" || s == "HUFFMAN LEXICOGRAPHICAL"  )
    {
      huffman();
      quasi_lexicographic();
      return *this;
    }

  else if( s == "HU_TUCKER" || s == "HUTUCKER" || s == "HU-TUCKER" || s == "HU TUCKER" )
    {
      hu_tucker();
      return *this;
    }

  else if( s =="FLC" )
    {
      flc();
      return *this;
    }

  std::stringstream is( s );
  
  return load( is );
}


//--------------------------------------------------------------
const VLC & VLC::operator = ( const std::string & codedef )
{
  return load( codedef );
}


//--------------------------------------------------------------


} // End of namespace itpp

#undef DUMM_NODE