www.pudn.com > huffman.rar > norm2huff.m


function [zipped,info] = norm2huff(vector)
%NORM2HUFF   Huffman codification (encoder)
%   For vectors, NORM2HUFF(X) returns a Huffman coded version of the input vector.
%   For matrices, X(:) is used as input.
%
%   Input must be of uint8 type, while the output is a uint8 array.
%
%   [...,INFO] = ... returns also a structure with data required to convert it back
%   to normal vector:
%
%      INFO.pad        = eventually added bits at the end of bit sequence;
%      INFO.huffcodes  = Huffman codewords;
%      INFO.ratio      = compression ratio;
%      INFO.length     = original data length;
%      INFO.maxcodelen = max codeword length;
%
%   Codewords are stored in the 52 available bits of a double. To avoid anbiguities,
%   after the last codeword bit, a "1" bit is added to terminate the codeword.
%   I.e. the max codeword length can be 51 bits.
%
%   See also HUFF2NORM


%   $Author: Giuseppe Ridino' $
%   $Revision: 1.0 $  $Date: 10-May-2004 15:03:04 $


% ensure to handle uint8 input vector
if ~isa(vector,'uint8'),
	error('input argument must be a uint8 vector')
end

% vector as a row
vector = vector(:)';

% frequency
f = frequency(vector);

% simbols presents in the vector are
simbols = find(f~=0); % first value is 1 not 0!!!
f = f(simbols);

% sort using the frequency
[f,sortindex] = sort(f);
simbols = simbols(sortindex);

% generate the codewords as the 52 bits of a double
len = length(simbols);
simbols_index = num2cell(1:len);
codeword_tmp = cell(len,1);
while length(f)>1,
	index1 = simbols_index{1};
	index2 = simbols_index{2};
	codeword_tmp(index1) = addnode(codeword_tmp(index1),uint8(0));
	codeword_tmp(index2) = addnode(codeword_tmp(index2),uint8(1));
	f = [sum(f(1:2)) f(3:end)];
	simbols_index = [{[index1 index2]} simbols_index(3:end)];
	% resort data in order to have the two nodes with lower frequency as first two
	[f,sortindex] = sort(f);
	simbols_index = simbols_index(sortindex);
end

% arrange cell array to have correspondance simbol <-> codeword
codeword = cell(256,1);
codeword(simbols) = codeword_tmp;

% calculate full string length
len = 0;
for index=1:length(vector),
	len = len+length(codeword{double(vector(index))+1});
end
	
% create the full 01 sequence
string = repmat(uint8(0),1,len);
pointer = 1;
for index=1:length(vector),
	code = codeword{double(vector(index))+1};
	len = length(code);
	string(pointer+(0:len-1)) = code;
	pointer = pointer+len;
end

% calculate if it is necessary to add padding zeros
len = length(string);
pad = 8-mod(len,8);
if pad>0,
	string = [string uint8(zeros(1,pad))];
end

% now save only usefull codewords
codeword = codeword(simbols);
codelen = zeros(size(codeword));
weights = 2.^(0:51);
maxcodelen = 0;
for index = 1:length(codeword),
	len = length(codeword{index});
	if len>maxcodelen,
		maxcodelen = len;
	end
	if len>0,
		code = sum(weights(codeword{index}==1));
		code = bitset(code,len+1);
		codeword{index} = code;
		codelen(index) = len;
	end
end
codeword = [codeword{:}];

% calculate zipped vector
cols = length(string)/8;
string = reshape(string,8,cols);
weights = 2.^(0:7);
zipped = uint8(weights*double(string));

% store data into a sparse matrix
huffcodes = sparse(1,1); % init sparse matrix
for index = 1:numel(codeword),
	huffcodes(codeword(index),1) = simbols(index);
end

% create info structure
info.pad = pad;
info.huffcodes = huffcodes;
info.ratio = cols./length(vector);
info.length = length(vector);
info.maxcodelen = maxcodelen;


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function codeword_new = addnode(codeword_old,item)
codeword_new = cell(size(codeword_old));
for index = 1:length(codeword_old),
	codeword_new{index} = [item codeword_old{index}];
end