www.pudn.com > hmmtrain.rar > hmm.cpp
#include#include #include #include #include #include #include #include "hmm.h" #define DELTA 0.01 using namespace std; /******************************************************* 功能:随机数发生器,这样使得其余部分的代码称为平台无关的。 // 得到一个随机数种子,可以更具自己的情况修改该函数 int HMM::hmmgetseed(void) { return (17*rand()); } ********************************************************/ /*设置随机数种子 void HMM::hmmsetseed() { srand( (unsigned)time( NULL ) ); } void hmmsetseed(int seed) { srand(seed); } **********************************/ //得到0-1之间的一个双精度随机数 double HMM::hmmgetrand(void) { unsigned int seed=5; srand(seed); return (double) (rand()/(double)RAND_MAX); } //初始化 void HMM::InitHMM(HMM *phmm, int N, int M) { int i, j, k; /* 初始化随机数发生器 */ //hmmsetseed(); phmm->M = M; phmm->N = N; for (i = 0; i < phmm->N; i++) { for(j = 0; j < phmm->N; j++) { phmm->A[i][j]=0.000000; //cout< A[i][j]<<" "; } //cout< A[0][1]=1; for (i = 0; i N; i++) { for (j = i; j <= i+1 && j < phmm->N; j++) { //phmm->A[i][j] = hmmgetrand(); phmm->A[i][j] = 0.200000; //cout< A[i][j]<<" "; } //cout< N; j++) { for (k = 0; k < phmm->M; k++) { //phmm->B[j][k] = hmmgetrand(); phmm->B[j][k] = 0.0078125; //cout< B[j][k]<<" "; } //cout< pi[0] = 1; for (i = 1; i < phmm->N; i++) { phmm->pi[i] = 0.00000; //cout< pi[i]<<" "; } //cout< M); fprintf(fp, "N= %d\n", phmm->N); fprintf(fp, "A:\n"); for (i = 1; i <= phmm->N; i++) { for (j = 1; j <= phmm->N; j++) { fprintf(fp, "%f ", phmm->A[i][j] ); } fprintf(fp, "\n"); } fprintf(fp, "B:\n"); for (j = 1; j <= phmm->N; j++) { for (k = 1; k <= phmm->M; k++) { fprintf(fp, "%f ", phmm->B[j][k]); } fprintf(fp, "\n"); } fprintf(fp, "pi:\n"); for (i = 1; i <= phmm->N; i++) { fprintf(fp, "%f ", phmm->pi[i]); } fprintf(fp, "\n\n"); **************************************************************************/ ofstream outfile; outfile.open(str.c_str()); outfile<<"M="< M< N< N; i++) { for (j = 0; j < phmm->N; j++) outfile< A[i][j]<<" "; outfile< N; j++) { for (k = 0; k < phmm->M; k++) outfile< B[j][k]<<" "; outfile< N; i++) outfile< pi[i]<<" "; outfile< N; i++) { scale[t] += alpha[t][i]; } // rescale all the alpha's and smooth them if(scale[t]==0) { cout<<"scale["< N; i++) { alpha[t][i] /= scale[t]; } } //前向算法 void HMM::ForwardWithScale(HMM *phmm, int T, int *O) { int i,j; int t; double x; double logscale[LENGTH]; scalesum=0.0; //1.初始化当t=1时 for (i = 0; i < phmm->N; i++) { alpha[0][i]=0.0; alpha[0][i]=phmm->pi[i]*(phmm->B[i][O[0]]); } rescale_alphas(phmm,0); //2.递归 for (t = 1 ; t < T; t++) for(i = 0; i < phmm->N; i++) alpha[t][i]=0.0; for (t=1;t N;j++) { for(i=0;i N;i++) { x=phmm->A[i][j]*alpha[t-1][i]; alpha[t][j]+=x; } alpha[t][j]*=phmm->B[j][O[t]]; } rescale_alphas(phmm, t); } //3.终止 //pprobf=0.0; logprobf=0.0; /* for (i=0;i< phmm->N;i++) pprobf+=alpha[T-1][i]; logprobf=log10(pprobf); ******************************/ pprobf=1;//scale[0]; for(t=0;t N; i++) { beta[t][i] /= scale[t]; } } /*************************************************************************** ** 函数名称:BackwardWithScale ** 功能:后向算法估计参数(带比例因子修正) ** 参数:phmm:指向HMM的指针 ** T:观察值序列的长度 ** O:观察值序列 ** beta:运算中用到的临时数组 ** scale:比例因子数组 ** pprob:返回值,所要求的概率 **/ void HMM::BackwardWithScale(HMM *phmm, int T, int *O) { int i, j; /* 状态指示 */ int t; /* 时间下标 */ double pprobb; double logp; //double logscale[100]; /* 1. 初始化 */ for (i = 0; i < phmm->N; i++) beta[T-1][i] = 1.0; rescale_betas(phmm, T-1); /* 2. 递归 */ for (t = T - 2; t >= 0; t--) for(i = 0; i < phmm->N; i++) beta[t][i]=0.0; for (t = T - 2; t >= 0; t--) { for (i = 0; i < phmm->N; i++) { for (j = 0; j < phmm->N; j++) beta[t][i] += phmm->A[i][j] * (phmm->B[j][O[t+1]])*beta[t+1][j]; } rescale_betas(phmm, t); } pprobb = 0.0; for (i = 0; i < phmm->N; i++) pprobb += beta[0][i]; logp=log10(pprobb); logprobb=0.0; logprobb=-(logp+scalesum); } void HMM::ComputeGamma(HMM *phmm, int T, int *O) { int i; int t; for (i = 0; i < phmm->N; i++) { igammasum[i]=0.0; for (t = 0; t < T-1; t++) igammasum[i] += alpha[t][i]*beta[t][i]*scale[t]; } } /****************************************************************************** **函数名称:BaumWelch **功能:BaumWelch算法 **参数:phmm:HMM模型指针 ** T:观察序列长度 ** O:观察序列 ** alpha,beta,gamma,pniter均为中间变量 ** plogprobinit:初始概率 ** 最终概率 **/ void HMM::BaumWelch(HMM *phmm, int *NUM) { int i, j, k; int t, s; int T=256; int *O; int m,x; int l; //char ch[2]; O=new int[LENGTH]; double delta; double logprobprev=0.0; double logprobfin=0.0; //double pidenom=0.0; double v[STATE]; double y1[STATE]; double y2[STATE][CODENUM]; //double product[STATE]; double logpk[Sque]; double denomA[STATE]; double numA[STATE][STATE]; double xi; double denomB[STATE]; double numB[STATE][CODENUM]; //cout<<"l="< N;i++) v[i]=0.0; //for(i=0;i N;i++) //pireestim[i]=0.0; for(i=0;i N;i++) y1[i]=0.0; for(i=0; i N; i++) for(k=0;k M;k++) y2[i][k]=0.0; for(i=0; i N; i++) denomA[i]=0.0; for (i = 0; i< phmm->N; i++) { for (j = 0; j < phmm->N; j++) numA[i][j]=0.0; } for (j = 0; j< phmm->N; j++) denomB[j]=0.0; for (j = 0; j< phmm->N; j++) { for (k = 0; k < phmm->M; k++) numB[j][k]=0.0; } for (s=0; s >ch[0]; //infile>>ch[1]; //infile>>T; for (i=0;i >x; O[i]=x; } phmm->ForwardWithScale(phmm, T, O ); logpk[s]=0.0; logpk[s]=logprobf; phmm->BackwardWithScale(phmm, T, O); //计算公式B的第一部分 for(i=0;i N;i++) { v[i]=alpha[0][i]*beta[0][i]*scale[0]; y1[i]+=v[i]; for (k = 0; k < phmm->M; k++) if(O[0]==k) y2[i][k]+=v[i]; }//重新检查 /*prepare for pi reestimation for(i=0;i N;i++) { //pireestim[i]=0.0; product[i]=alpha[0][i]*beta[0][i]*scale[0]*pprobf; pidenom+=product[i]; pireestim[i]+=product[i]; } */ //prepare for A reestimation phmm->ComputeGamma(phmm, T, O); //各个序列的Gamma相加得到A重估公式的分母 for ( i=0; i< phmm->N; i++) denomA[i]+=igammasum[i]; //计算A重估公式的分子 for (i = 0; i< phmm->N; i++) { for (j = 0; j < phmm->N; j++) { for (t = 0; t < T-1; t++) numA[i][j] += alpha[t][i]*beta[t+1][j]*phmm->A[i][j]*phmm->B[j][O[t+1]]; } } //计算B重估公式的分母的第二部分 for (j = 0; j < phmm->N; j++) { //denomB[j]=0.0; for (t = 1; t < T; t++) { xi=0.0; for(i = 0; i< phmm->N; i++) xi+=alpha[t-1][i]*beta[t][j]*phmm->A[i][j]*phmm->B[j][O[t]]; denomB[j]+=xi; } } //计算B重估公式的分子的第二部分 for (j = 0; j < phmm->N; j++) { for(k = 0 ; k < phmm->M; k++) { //numB[j][k]=0.0; for (t = 1; t < T; t++) { xi=0.0; if(O[t]==k) for(i = 0; i< phmm->N; i++) xi+=alpha[t-1][i]*beta[t][j]*phmm->A[i][j]*phmm->B[j][O[t]]; numB[j][k]+=xi; } } } infile.close(); //infile.clear(); } for( s = 0; s < Sque; s++ ) logprobfin += logpk[s]; /* log P(O |初始状态) */ logprobprev = logprobfin; l=0; cout<<"l="< N; i++) phmm->pi[i]=pireestim[i]/pidenom; */ //重估A for (i = 0; i < phmm->N; i++) { for (j = 0; j < phmm->N; j++) phmm->A[i][j] = numA[i][j]/denomA[i]; } //重估B for (j = 0; j < phmm->N; j++) { denomB[j]+=y1[j]; for (k = 0; k < phmm->M; k++) { numB[j][k]+=y2[j][k]; phmm->B[j][k]=numB[j][k]/denomB[j]; //cout< B[j][k]<<" "; } //cout< N;i++) v[i]=0.0; //for(i=0;i N;i++) //pireestim[i]=0.0; for(i=0;i N;i++) y1[i]=0.0; for(i=0; i N; i++) for(k=0;k M;k++) y2[i][k]=0.0; for(i=0; i N; i++) denomA[i]=0.0; for (i = 0; i< phmm->N; i++) { for (j = 0; j < phmm->N; j++) numA[i][j]=0.0; } for (j = 0; j< phmm->N; j++) denomB[j]=0.0; for (j = 0; j< phmm->N; j++) { for (k = 0; k < phmm->M; k++) numB[j][k]=0.0; } for (s=0; s >ch[0]; //infile>>ch[1]; //infile>>T; for (i=0;i >x; O[i]=x; } phmm->ForwardWithScale(phmm, T, O ); logpk[s]=0.0; logpk[s]=logprobf; phmm->BackwardWithScale(phmm, T, O); //prepare for reestimation of part 2 of for(i=0;i N;i++) { v[i]=alpha[0][i]*beta[0][i]*scale[0];//未定义 y1[i]+=v[i]; for (k = 0; k < phmm->M; k++) if(O[0]==k) y2[i][k]+=v[i]; } /*prepare for pi reestimation for(i=0;i N;i++) { //pireestim[i]=0.0; product[i]=alpha[0][i]*beta[0][i]*scale[0]*pprobf; pidenom+=product[i]; pireestim[i]+=product[i]; } */ //prepare for A reestimation phmm->ComputeGamma(phmm, T, O); //各个序列的Gamma相加得到A重估公式的分母 for ( i=0; i< phmm->N; i++) denomA[i]+=igammasum[i]; //计算A重估公式的分子 for (i = 0; i< phmm->N; i++) { for (j = 0; j < phmm->N; j++) { for (t = 0; t < T-1; t++) numA[i][j] += alpha[t][i]*beta[t+1][j]*phmm->A[i][j]*phmm->B[j][O[t+1]]; } } //计算B重估公式的分母的第二部分 for (j = 0; j < phmm->N; j++) { //denomB[j]=0.0; for (t = 1; t < T; t++) { xi=0.0; for(i = 0; i< phmm->N; i++) xi+=alpha[t-1][i]*beta[t][j]*phmm->A[i][j]*phmm->B[j][O[t]]; denomB[j]+=xi; } } //计算B重估公式的分子的第二部分 for (j = 0; j < phmm->N; j++) { for(k = 0 ; k < phmm->M; k++) { //numB[j][k]=0.0; for (t = 1; t < T; t++) { xi=0.0; if(O[t]==k) for(i = 0; i< phmm->N; i++) xi+=alpha[t-1][i]*beta[t][j]*phmm->A[i][j]*phmm->B[j][O[t]]; numB[j][k]+=xi; } } } infile.close(); //infile.clear(); } /* 计算两次直接的概率差 */ logprobfin=0.0; for( s = 0; s < Sque; s++ ) logprobfin += logpk[s]; /* log P(O |初始状态) */ delta =fabs(logprobfin - logprobprev); logprobprev = logprobfin; l++; cout<<"l="< DELTA); /* 如果差的不太大,表明收敛,退出 */ delete []O; } void HMM::smooth(HMM *phmm) { int j,k; int R;//0的个数 for(j = 0; j < phmm->N; j++ ) { R=0; for(k = 0 ; k < phmm->M; k++) { if(phmm->B[j][k] <= 0.00001) { phmm->B[j][k]=e; R++; } } for(k = 0 ; k < phmm->M; k++) if (phmm->B[j][k] != e) phmm->B[j][k]=phmm->B[j][k]*(1-R*e); } } void main() { //int N=5; //int M=64;//N,M可以改变,M是码字 string unshmmname; string hmmname; int i; int *NUM=&i; HMM *hmm=new HMM(); for(i=0;i InitHMM(hmm,hmm->N,hmm->M); *NUM=i; hmm->BaumWelch(hmm,NUM); hmm->PrintHMM(unshmmname,hmm); hmm->smooth(hmm); hmmname="D:\\hmmdata9+7\\modelparameter\\clean40ci\\hmm"+itostr(i)+".txt"; hmm->PrintHMM(hmmname,hmm); } }