www.pudn.com > huffman_.zip > huffman_base.cpp
#include "huffman_base.h"
int huffman_base::get_code_len(int index)
{
check();
if (index < 0 || index >= (int)code_lens.size())
throw new huffman_exception("参数非法");
return code_lens[index];
}
vector huffman_base::get_all_code_lens(void)
{
check();
return code_lens;
}
unsigned long huffman_base::get_code(int index)
{
check();
if (index < 0 || index >= (int)codes.size())
throw new huffman_exception("参数非法");
return codes[index];
}
vector huffman_base::get_all_codes(void)
{
check();
return codes;
}
string huffman_base::get_code_str(int index)
{
check();
if (index < 0 || index >= (int)codes.size())
throw new huffman_exception("参数非法");
return long_to_string(codes[index], code_lens[index]);
}
vector huffman_base::get_all_code_strs(void)
{
check();
vector v;
for(int i = 0; i < (int)codes.size(); i++)
v.push_back(long_to_string(codes[i], code_lens[i]));
return v;
}
int huffman_base::find(unsigned long code)
{
check();
for(int i = 0; i < (int)codes.size(); i++)
if (codes[i] == code)
return i;
return -1;
}
int huffman_base::find(const string& code)
{
return find(string_to_long(code.c_str()));
}
inline void huffman_base::check()
{
if (codes.size() <= 0)
throw new huffman_exception("尚未调用过generate_codes()");
}
unsigned long huffman_base::string_to_long(const char* code)
{
unsigned long ret = 0;
int len = (int)strlen(code);
for(int i = len - 1; i >= 0; i--)
if (code[i] == '1')
ret |= (1ul << (len - i - 1));
return ret;
}
string huffman_base::long_to_string(unsigned long code, int code_len)
{
char* buf = new char[code_len + 1];
if (buf == NULL)
throw new huffman_exception("no enough memory.");
memset(buf, 0, code_len + 1);
unsigned long bit = 1 << (code_len - 1);
for(int i = 0; i < code_len; i++)
{
if (code & bit)
buf[i] = '1';
else
buf[i] = '0';
bit >>= 1;
}
string ret(buf); delete buf;
return ret;
}
void huffman_base::generate_canonical_codes()
{
if (code_lens.size() <= 0)
throw new huffman_exception("生成Canonical Huffman编码前,应已知所有元素码长");
int max_code_len = 0;
int min_code_len = 1000;
const int tmp = sizeof(unsigned long) * 8 + 1;
int len_count[tmp];
unsigned long min_code[tmp];
memset(len_count, 0, tmp * sizeof(int));
memset(min_code, 0, tmp * sizeof(unsigned long));
int num = (int)code_lens.size();
// 统计码长信息
for (int i = 0; i < num; i++)
{
int codelen = code_lens[i];
// huffman_base用unsigned long存储编码,因此
// 码长要限制在sizeof(unsigned long)*8以内
// 这里对超长的编码都简单忽略掉了
if ((unsigned long)codelen > sizeof(unsigned long)*8)
continue;
if (codelen > max_code_len)
max_code_len = codelen;
if (codelen < min_code_len)
min_code_len = codelen;
len_count[codelen]++;
}
// 计算特定码长的所有元素中最小的编码,这里使用的是
// Canonical Huffman编码规则,请参见相关文献
for (int i = max_code_len - 1; i >= 0; i--)
min_code[i] = (min_code[i + 1] + len_count[i + 1]) >> 1;
// 已知特定码长的所有元素中最小的编码,同样码长的元素,
// 编码逐个加1就可以了
codes.clear();
for (int i = 0; i < num; i++)
if (code_lens[i] > 0 && (unsigned long)code_lens[i] <= sizeof(unsigned long)*8)
codes.push_back(min_code[code_lens[i]]++);
else
codes.push_back(0);
}
bool huffman_base::verify()
{
check();
int max = 0;
const int code_len_limit = 100; // 这里能检验的最大码长是100
int len_count[code_len_limit + 1];
memset(len_count, 0, sizeof(int)*(code_len_limit+1));
for(int i = 0; i < (int)code_lens.size(); i++)
{
if (code_lens[i] > code_len_limit)
return true; // 如果有超长码,就不检验了
len_count[code_lens[i]]++;
if (code_lens[i] > max) max = code_lens[i];
}
// 从根开始,算每层分支结点数目,如果最后一层不为0,就表明Huffman树有错误
int nonleaf = 1;
for(int i = 1; i <= max; i++)
nonleaf = nonleaf * 2 - len_count[i];
return (nonleaf == 0);
}