www.pudn.com > ChineseProcessing.rar > FreqCounter.cpp
// FreqCounter.cpp: implementation of the CFreqCounter class. // ////////////////////////////////////////////////////////////////////// #include "stdafx.h" #include "CPT.h" #include "FreqCounter.h" #include#ifdef _DEBUG #undef THIS_FILE static char THIS_FILE[]=__FILE__; #define new DEBUG_NEW #endif ////////////////////////////////////////////////////////////////////// // Construction/Destruction ////////////////////////////////////////////////////////////////////// ChChar CFreqCounter::chars[MAXNUMCHINESECHAR]; UINT CFreqCounter::retvalue=CFreqCounter::InitChineseChars(); CFreqCounter::CFreqCounter(UINT nDepth) { m_nSizeArray = 0; m_aucArray = NULL; m_tucRoot=NULL; InitCounterTree(&m_tucRoot, nDepth, 0, MAXNUMCHINESECHAR-1); } CFreqCounter::~CFreqCounter() { if (m_tucRoot) ReleaseCounterTree(m_tucRoot); for (UINT i=0; i nRight || nDepth==0) return; p=new TUCounter; *tucTree = p; (p->ch)[0]=chars[i][0]; (p->ch)[1]=chars[i][1]; p->counter=0; p->left=p->right=NULL; InitCounterTree(&(p->left), nDepth-1, nLeft, i-1); InitCounterTree(&(p->right), nDepth-1, i+1, nRight); } void CFreqCounter::ReleaseCounterTree(TUCounter *root) { if (root==NULL) return; ReleaseCounterTree(root->left); ReleaseCounterTree(root->right); delete root; } void CFreqCounter::AddGram(ChChar first) { TUCounter * p=m_tucRoot; TUCounter **q=&m_tucRoot; while (p) { register int i = CompareChineseChar(first, p->ch); if (i==0) { (p->counter)++; return; } else if (i<0) { q=&(p->left); p=p->left; } else { q=&(p->right); p=p->right; } } p=new TUCounter; p->ch[0]=first[0]; p->ch[1]=first[1]; p->counter=1; p->left=p->right=NULL; *q=p; } void CFreqCounter::AddGram(ChChar first, ChChar second) { int l=0, r=m_nSizeArray-1, j, k; do { k=(l+r)/2; j=CompareChineseChar(first, m_aucArray[k].ch); if (j<0) r=k-1; else if (j>0) l=k+1; else break; } while (1); // must found if (CFreqCounter * p=m_aucArray[k].next) p->AddGram(second); } void CFreqCounter::AddGram(ChChar first, ChChar second, ChChar third) { int l=0, r=m_nSizeArray-1, j, k; do { k=(l+r)/2; j=CompareChineseChar(first, m_aucArray[k].ch); if (j<0) r=k-1; else if (j>0) l=k+1; else break; } while (1); // must found if (CFreqCounter * p=m_aucArray[k].next) p->AddGram(second, third); } void CFreqCounter::PostFirstScan() { m_nSizeArray=CountUniqueGrams(m_tucRoot); if (m_nSizeArray) { m_aucArray = new AUCounter[m_nSizeArray]; UINT i=OutputTreeToArray(0, m_tucRoot); } ReleaseCounterTree(m_tucRoot); m_tucRoot=NULL; } void CFreqCounter::InitSecondScan() { for (UINT i=0; i PostFirstScan(); } void CFreqCounter::InitThirdScan() { for (UINT i=0; i InitSecondScan(); } void CFreqCounter::PostThirdScan() { for (UINT i=0; i PostSecondScan(); } UINT CFreqCounter::CountUniqueGrams(TUCounter * tucTree) { if (tucTree == NULL) return 0; UINT l=CountUniqueGrams(tucTree->left), \ r=CountUniqueGrams(tucTree->right); if (tucTree->counter) return l+r+1; else return l+r; } UINT CFreqCounter::OutputTreeToArray(UINT left, TUCounter * tuc) { if (tuc == NULL) return 0; UINT i=OutputTreeToArray(left, tuc->left); if (tuc->counter) { m_aucArray[left+i].ch[0]=tuc->ch[0]; m_aucArray[left+i].ch[1]=tuc->ch[1]; m_aucArray[left+i].counter=tuc->counter; m_aucArray[left+i].next= NULL; i++; } i += OutputTreeToArray(left+i, tuc->right); return i; } void CFreqCounter::OutputUnigramFrequency(CFile &fout, const void * buf, size_t len) { AUCounter *p=new AUCounter[m_nSizeArray]; memmove(p, m_aucArray, m_nSizeArray*sizeof(AUCounter)); qsort(p, m_nSizeArray, sizeof(AUCounter), AUCounter::CompareCounterUnit); for (UINT i=0; i OutputUnigramFrequency(fout, buf1, len+2); } delete buf1; } void CFreqCounter::OutputTrigramFrequency(CFile &fout, const void * buf, size_t len) { char* buf1=new char[len+sizeof(ChChar)]; if (buf) memmove(buf1, buf, len); for (UINT i=0; i OutputBigramFrequency(fout, buf1, len+2); } delete buf1; } double AUMutual::moffset=0; void CFreqCounter::OutputAllMutual(CFile &fout) { UINT n=0, i, j, k; int l, r, m, ret; ChChar cc; for (i=0; i m_nSizeArray; AUMutual *p=new AUMutual[n]; for (i=0, k=0; i m_nSizeArray; j++, k++) { p[k].ch[0][0]=m_aucArray[i].ch[0]; p[k].ch[0][1]=m_aucArray[i].ch[1]; p[k].ch[1][0]=m_aucArray[i].next->m_aucArray[j].ch[0]; p[k].ch[1][1]=m_aucArray[i].next->m_aucArray[j].ch[1]; cc[0]=p[k].ch[1][0]; cc[1]=p[k].ch[1][1]; p[k].c[0]=m_aucArray[i].counter; p[k].counter=m_aucArray[i].next->m_aucArray[j].counter; l=0; r=m_nSizeArray-1; do { m=(l+r)/2; ret = CompareChineseChar(cc, m_aucArray[m].ch); if (ret < 0) r = m-1; else if (ret > 0) l = m+1; else break; } while (1); // must found p[k].c[1]=m_aucArray[m].counter; p[k].CalculateMutual(); } qsort(p, n, sizeof(AUMutual), AUMutual::CompareMutual); for (i=0; i