www.pudn.com > AVS_M_ver10.rar > cod_sad.c
/* ******************************************************************************** * * File : cod_sad.c * Description : Parameters extraction and signal classifiction * ******************************************************************************** */ #include "../include/cod_sad.h" #include "../include/amr_plus.h" #include "../include/mem.h" #include#include #include #include #include "../lib_amr/enc_if.h" #include "../lib_amr/enc_main.c" #include "../lib_amr/enc_lpc.c" #include "../include/s_util.h" #include "../../c-code/include/open_pitch.h" ///////////////////////////////// /* frame count */ int count_frame; ///////////////////////////////// /************************************************************************* * * FUNCTIONS: extract_isf() * input: isf * output: lsf_meanSD * DESCRIPTION: extract parameters from 'isf' for music/speech classification * from VAD module * *************************************************************************/ void extract_isf(SyndecidSt *synSt , float isf[M]) { int i; float temp; float L_temp; float lsf_SD = 0; /* ISF spectral deviation */ float lsfmean_SD = 0; /* Calculate the spectral deviation */ L_temp = 0; for (i = 0; i < M; i++) { temp = isf[i] - synSt->lsf_old[i]; L_temp += temp * temp; } if (count_frame == 1) { lsf_SD = 0; } else { lsf_SD = L_temp; } L_temp = 0; for (i = 0; i < M; i++) { temp = isf[i] - synSt->lsfmean[i]; L_temp += temp * temp; } if (count_frame < 6) { lsfmean_SD = 0; } else { lsfmean_SD = L_temp; } for (i = 0; i < M; i++) { synSt->lsfmean[i] = 0.75 * synSt->lsfmean[i] + 0.25 * isf[i]; } /* Averaging short time ISF spectral deviation */ //lsf_meanSD = (Word16)(0.8*lsf_meanSD + 0.2*lsf_SD); synSt->lsf_meanSD = 0.2 * synSt->pre_lsf_SD[0] + 0.2 * synSt->pre_lsf_SD[1] + 0.2 * synSt->pre_lsf_SD[2] + 0.2 * synSt->pre_lsf_SD[3] + 0.2 * lsf_SD; synSt->pre_lsf_SD[3] = synSt->pre_lsf_SD[2]; synSt->pre_lsf_SD[2] = synSt->pre_lsf_SD[1]; synSt->pre_lsf_SD[1] = synSt->pre_lsf_SD[0]; synSt->pre_lsf_SD[0] = lsf_SD; /* Averaging long time ISF spectral deviation */ //lsfmean_meanSD = (Word16)(0.8*lsfmean_meanSD + 0.2*lsfmean_SD); synSt->lsfmean_meanSD = 0.2 * synSt->pre_lsfmean_SD[0] + 0.2 * synSt->pre_lsfmean_SD[1] + 0.2 * synSt->pre_lsfmean_SD[2] + 0.2 * synSt->pre_lsfmean_SD[3] + 0.2 * lsfmean_SD; synSt->pre_lsfmean_SD[3] = synSt->pre_lsfmean_SD[2]; synSt->pre_lsfmean_SD[2] = synSt->pre_lsfmean_SD[1]; synSt->pre_lsfmean_SD[1] = synSt->pre_lsfmean_SD[0]; synSt->pre_lsfmean_SD[0] = lsfmean_SD; for (i = 0; i < M; i++) { synSt->lsf_old[i] = isf[i]; } } /************************************************************************* * * FUNCTIONS: extract_ratio() * DESCRIPTION: extract music/speech classification parameters of subband level * from VAD module * *************************************************************************/ void extract_ratio(VadVars * st, SyndecidSt *synSt ) { Word16 i; Word16 temp; Word32 L_temp; float level_SD; /*sub-band level spectral deviation*/ float levelmean_SD; /*sub-band level mean spectral deviation*/ /* double level_energy */ double short_mean_level_energy; double long_mean_level_energy; Word32 L_sum; /* music sub-band energydeviation flag after hangover */ Word16 level_meanSD_low_flag; Word16 level_meanSD_high_flag; /* Parameters of sub-band ratio */ float sublevel_rate; float level_rate; Word32 sublevel_high_energy; Word32 sublevel_low_energy; Word32 sublevel_high1_energy; Word32 sublevel_low1_energy; Word32 sublevel_high2_energy; //Word32 sublevel_low2_energy; /*Calculating sub-band ratio: ra_z. 0-7,8-11 in amrwbp*/ sublevel_high_energy = st->level[10] + st->level[11]; sublevel_low_energy = st->level[0] + st->level[1] + st->level[2] + st->level[3] + st->level[4] + st->level[5] + st->level[6] + st->level[7] + st->level[8] + st->level[9]; sublevel_high1_energy = st->level[8] + st->level[9]; sublevel_low1_energy = st->level[5] + st->level[6] + st->level[7]; sublevel_high2_energy = st->level[11]; //sublevel_low2_energy = sublevel_high1_energy; if (sublevel_low_energy <= 0) { level_rate = 0.0; } else { level_rate = (float)(sublevel_high_energy) / sublevel_low_energy; } /*if(level_rate>32767) { level_rate=32767; }*/ sublevel_rate = level_rate; synSt->level_energy = 0; for (i = 0; i < COMPLEN; i++) { synSt->level_energy = synSt->level_energy + st->level[i]; } short_mean_level_energy = synSt->level_energy + synSt->pre_level_energy[0]; /* Calculating sub-band level standard deviation */ L_temp = 0; for (i = 0; i < COMPLEN; i++) { temp = abs(st->level[i] - st->old_level[i]); L_temp = L_temp + temp; } if (count_frame == 1) { level_SD = 0.0; } else { if (short_mean_level_energy == 0) { level_SD = 0.0; } else { level_SD = (float)L_temp / short_mean_level_energy; } } long_mean_level_energy = short_mean_level_energy + synSt->pre_level_energy[1] + synSt->pre_level_energy[2] + synSt->pre_level_energy[3]; L_temp = 0; for (i = 0; i < COMPLEN; i++) { temp = abs(st->level[i] - synSt->level_12_mean[i]); L_temp = L_temp + temp; } if (count_frame < 10) { levelmean_SD = 0; } else { if (long_mean_level_energy == 0) { levelmean_SD = 0; } else { //levelmean_SD = (Word16)((L_temp<<14)/long_mean_level_energy); /*L_temp = ((double)(L_temp<<14))/long_mean_level_energy; if(L_temp>32767) L_temp = 32767;*/ levelmean_SD = (float)L_temp / long_mean_level_energy; } } for (i = 0; i < COMPLEN; i++) { synSt->level_12_mean[i] = (Word16)(0.75 * synSt->level_12_mean[i] + 0.25 * st->level[i]); } /* Averaging short time sub-band energy standard deviation */ //level_meanSD = (Word16)(0.8*level_meanSD + 0.2*level_SD); synSt->level_meanSD = (0.2 * synSt->pre_level_SD[0] + 0.2 * synSt->pre_level_SD[1] + 0.2 * synSt->pre_level_SD[2] + 0.2 * synSt->pre_level_SD[3] + 0.2 * level_SD); synSt->pre_level_SD[3] = synSt->pre_level_SD[2]; synSt->pre_level_SD[2] = synSt->pre_level_SD[1]; synSt->pre_level_SD[1] = synSt->pre_level_SD[0]; synSt->pre_level_SD[0] = level_SD; /* Averaging long time sub-band energy standard deviation */ //levelmean_meanSD = (Word16)(0.8*levelmean_meanSD + 0.2*levelmean_SD); synSt->levelmean_meanSD = (0.2 * synSt->pre_levelmean_SD[0] + 0.2 * synSt->pre_levelmean_SD[1] + 0.2 * synSt->pre_levelmean_SD[2] + 0.2 * synSt->pre_levelmean_SD[3] + 0.2 * levelmean_SD); synSt->pre_levelmean_SD[3] = synSt->pre_levelmean_SD[2]; synSt->pre_levelmean_SD[2] = synSt->pre_levelmean_SD[1]; synSt->pre_levelmean_SD[1] = synSt->pre_levelmean_SD[0]; synSt->pre_levelmean_SD[0] = levelmean_SD; synSt->pre_level_energy[3] = synSt->pre_level_energy[2]; synSt->pre_level_energy[2] = synSt->pre_level_energy[1]; synSt->pre_level_energy[1] = synSt->pre_level_energy[0]; synSt->pre_level_energy[0] = synSt->level_energy; /* Update signal levels of the previous frame (old_level) */ for (i = 0; i < COMPLEN; i++) { st->old_level[i] = st->level[i]; } } /*********************************************************** * Function: pre_big * Purpose: Big subframe (2 subframes) preprocessing * ************************************************************/ int pre_big(SyndecidSt *synSt , int frameOffset, /* i : Start position in speech vector, Q0*/ float speech[], /* i : speech, Q0*/ float wsp[] /* o : weighted speech Q0*/ ) { float Ap1[MP1]; /* A(z) with spectral expansion */ float Ap2[MP1]; /* A(z) with spectral expansion */ const float *g1; /* Pointer to correct gammma1 vector */ int aOffset; int i; if (frameOffset > 0) { aOffset = 2 * MP1; } else { aOffset = 0; } /* process two subframes (which form the "big" subframe) */ for (i = 0; i < 2; i++) { E_LPC_a_weight(&synSt->A_t[aOffset], Ap1 , GAMMA1, M); E_LPC_a_weight(&synSt->A_t[aOffset], Ap2, GAMMA1, M); E_UTIL_residu(Ap1, &speech[frameOffset], &wsp[frameOffset], L_SUBFR); E_UTIL_synthesis(Ap2, &wsp[frameOffset], &wsp[frameOffset], L_SUBFR, synSt->mem_w, 1); aOffset = aOffset + MP1; frameOffset = frameOffset + L_SUBFR; } return 0; } /************************************************************************* * * FUNCTIONS: extract_op() * input: T_op , pitch * output: pitch_flag , synSt->meangain ,synSt->T_op_mean * DESCRIPTION: extract parameters from 'pitch\tone' for music/speech classification * from VAD module * *************************************************************************/ void extract_op( SyndecidSt *synSt , Coder_State_Plus * st, /* i : state struct */ float new_speech[]) /*i:input : signal used to compute the open loop pitch */ { int i_subfr, subfrNr; float *speech1; float sum = 0; float t0, cor_max; float *p = NULL, *p1 = NULL; int i, j; float old_d_wsp[(PIT_MAX_MAX/OPL_DECIM)+L_DIV]; /* Weighting speech*/ float *d_wsp = NULL; int T_op[2]; /*delay of open-loop pitch */ #ifdef AVS_OPEN_PITCH float R0, R1, R2; #endif /* Present frame */ speech1 = new_speech - L_NEXT; /* initializing of speech weighting*/ d_wsp = old_d_wsp + PIT_MAX_MAX / OPL_DECIM; mvr2r(st->old_d_wsp, old_d_wsp, PIT_MAX_MAX / OPL_DECIM); /*Calculating open-loop pitch, using weighting speech wsp other than original speech */ for (subfrNr = 0, i_subfr = 0; subfrNr < 2; subfrNr++, i_subfr += 128) { /* Pre-processing of big frame*/ pre_big(synSt , i_subfr, speech1, d_wsp); #ifdef AVS_OPEN_PITCH T_op[subfrNr] = (int) find_pitch(&d_wsp[subfrNr*64], 20, PIT_MAX,(2*L_SUBFR)/OPL_DECIM); /* calculate open pitch gain */ R0 = R1 = R2 = 0.0F; for (j=0; j<(2*L_SUBFR)/OPL_DECIM; j++) { R1 += (d_wsp+subfrNr*64)[j] * (d_wsp+subfrNr*64)[j]; R2 += (d_wsp+subfrNr*64)[j-T_op[subfrNr]] * (d_wsp+subfrNr*64)[j-T_op[subfrNr]]; R0 += (d_wsp+subfrNr*64)[j] * (d_wsp+subfrNr*64)[j-T_op[subfrNr]]; } st->ol_gain = (Float32)(R0 / (sqrt(R1 * R2) + 1e-5)); #endif #ifndef AVS_OPEN_PITCH //T_op[subfrNr] = Pitch_ol(st, MR795, &wsp[i_subfr], PIT_MIN, PIT_MAX, L_FRAME_BY2, subfrNr, 1); T_op[subfrNr] = E_GAIN_open_loop_search(&d_wsp[subfrNr*64], 20, PIT_MAX, (2 * L_SUBFR) / OPL_DECIM, st->old_T0_med, &(st->ol_gain), st->hp_ol_ltp_mem, st->hp_old_wsp, (unsigned char)st->ol_wght_flg); if (st->ol_gain > 0.6) { st->old_T0_med = E_GAIN_olag_median(T_op[subfrNr], st->old_ol_lag); st->ada_w = 1.0; sum = 20000; } else { st->ada_w = st->ada_w * 0.9f; } if ( st->ada_w < 0.8) { st->ol_wght_flg = 0; } else { st->ol_wght_flg = 1; } #endif /* compute max */ cor_max = 0.0f; p = &d_wsp[0]; p1 = d_wsp - T_op[subfrNr]; for (j = 0; j < (2*L_SUBFR) / OPL_DECIM; j++) { cor_max += *p++ * *p1++; } /* compute energy */ t0 = 0.01f; p = d_wsp - T_op[subfrNr]; for (j = 0; j < (2*L_SUBFR) / OPL_DECIM; j++, p++) { t0 += *p * *p; } if (cor_max > 0) { if (cor_max >= t0) { sum = 32767; } else { sum = (cor_max / t0); } } else { sum = 0; } sum = (sum * 10000) / 32768; synSt->ptone_tone[subfrNr] = sum; } /* d_wsp already shifted */ mvr2r(old_d_wsp, st->old_d_wsp, PIT_MAX_MAX / OPL_DECIM); /* short time smooth */ synSt->T_op_mean = (int) (0.166 * synSt->pre_T_op[0] + 0.166 * synSt->pre_T_op[1] + 0.166 * synSt->pre_T_op[2] + 0.166 * synSt->pre_T_op[3] + 0.166 * T_op[0] + 0.166 * T_op[1]); synSt->pre_T_op[3] = synSt->pre_T_op[1]; synSt->pre_T_op[2] = synSt->pre_T_op[0]; synSt->pre_T_op[1] = T_op[1]; synSt->pre_T_op[0] = T_op[0]; /* long time smooth */ synSt->T_op_longmean = (int)(0.8 * synSt->T_op_longmean + 0.1 * T_op[0] + 0.1 * T_op[1]); /* pitch detection */ wb_vad_pitch_tone_detection(st->vadSt, st->ol_gain); { Word16 lagcount = 0; synSt->pitch = 0; for (i = 0; i < 2; i++) { if ((abs(synSt->oldlag - T_op[i]) - 45) < 0) { lagcount = lagcount + 1; } /* Save the current LTP lag */ synSt->oldlag = T_op[i]; } if ( (synSt->oldlag_count + lagcount - 4) >= 0) { synSt->pitch = 1; } synSt->oldlag_count = lagcount; } /* long time correlation */ synSt->meangain = 0.166 * synSt->pre_ptone_tone[0] + 0.166 * synSt->pre_ptone_tone[1] + 0.166 * synSt->pre_ptone_tone[2] + 0.166 * synSt->pre_ptone_tone[3] + 0.166 * synSt->ptone_tone[0] + 0.166 * synSt->ptone_tone[1]; synSt->pre_ptone_tone[3] = synSt->pre_ptone_tone[1]; synSt->pre_ptone_tone[2] = synSt->pre_ptone_tone[0]; synSt->pre_ptone_tone[1] = synSt->ptone_tone[1]; synSt->pre_ptone_tone[0] = synSt->ptone_tone[0]; return; } /************************************************************************* * * FUNCTIONS: syndecid() * input: speech[] * output: signal_sort * DESCRIPTION: make decision of sigal classifiction * *************************************************************************/ int syndecid(Coder_State_Plus * st, /* i : state struct */ SyndecidSt *synSt , float speech[], int frame, int i ) { float r[M + 1]; /* Autocorrelations of windowed speech */ float lspnew[M]; /* LSPs at 4nd subframe */ float lsf[M]; /* lsf coefficent */ int k = 0; Word16 signal_sort; /* signal type*/ Word16 music_flag; /* music flag*/ Word16 speech_flag; /* speech flag*/ int vad_flag; /* vad flag*/ Word16 pitch_flag; /*pitch flag after hangover protection*/ Word16 vad; /* vad flag before hangover protection*/ /* Music sub-band energy deviation flag after hangover */ Word16 level_meanSD_low_flag; Word16 level_meanSD_high_flag; /* Music spectral deviation flag after hangover */ Word16 lsf_meanSD_low_flag; Word16 lsf_meanSD_high_flag; /*Set the frame count*/ count_frame = frame + 1; /* Set the vad flag according to the previouse vad decision */ vad_flag = st->stClass->vadFlag[i]; /* Calculate the parameters of sub-band level*/ extract_ratio(st->vadSt, synSt); /* Autocorrelations of input signal */ E_UTIL_autocorrPlus( &speech[(i*L_DIV)+L_SUBFR], r, M, L_WINDOW, st->window); /* Lag windowing */ lag_wind( r, M); /* Levinson Durbin */ E_LPC_lev_dur(&synSt->A_t[MP1 * 3], r, M); /* From A(z) to ISP */ E_LPC_a_isp_conversion(&synSt->A_t[MP1 * 3], lspnew, synSt->lsp_old, M); /* Convert isps to frequency domain 0..6400 */ E_LPC_isp_isf_conversion(lspnew, lsf, M); /* interpol quantized lpc */ int_lpc_np1(synSt->lsp_old, lspnew, synSt->A_t, 4, M); /* Calculate isf parameters */ extract_isf(synSt , lsf); for (k = 0; k < M; k++) synSt->lsp_old[k] = lspnew[k]; extract_op(synSt, st, speech); /* Set the pitch hangover */ if (synSt->pitch == 1) { synSt->pitch_counter++; synSt->pitch_hangover = 0; } else { synSt->pitch_counter = 0; } /* Set the hangover frame counter */ if (synSt->pitch_counter > 10) { synSt->pitch_hangover = 10; } else if (synSt->pitch_counter > 5) { synSt->pitch_hangover = 5; } else if (synSt->pitch_counter > 2) { synSt->pitch_hangover = 3; } /* Set the pitch flag after the hangover protection*/ pitch_flag = (synSt->pitch_hangover > 0); if (synSt->pitch_hangover > 0) { synSt->pitch_hangover--; } /* First, classify signal into two types according to the vad flag */ if (vad_flag == 1) { signal_sort = UNCERTAIN; } else { signal_sort = NOISE; } /* Then, classify signal into speech and music */ /* Set hangover of the level_meanSD */ if (synSt->level_meanSD > 0.27) { synSt->level_meanSD_high_counter++; } else { synSt->level_meanSD_high_counter = 0; } /* Set the hangover counter of level_meanSD */ if (synSt->level_meanSD_high_counter > 15) { synSt->level_meanSD_high_hangover = 10; } else if (synSt->level_meanSD_high_counter > 10) { synSt->level_meanSD_high_hangover = 5; } else if (synSt->level_meanSD_high_counter > 5) { synSt->level_meanSD_high_hangover = 3; } /* Set pitch flag accoding after the hangover protection */ level_meanSD_high_flag = ((synSt->level_meanSD > 0.27) || (synSt->level_meanSD_high_hangover >= 0)); if (synSt->level_meanSD_high_hangover >= 0) { synSt->level_meanSD_high_hangover--; } /* Set hangover of lsf_meanSD */ if (synSt->lsf_meanSD > 170000) { synSt->lsf_meanSD_high_counter++; } else { synSt->lsf_meanSD_high_counter = 0; } /* Set the hangover counter */ if (synSt->lsf_meanSD_high_counter > 15) { synSt->lsf_meanSD_high_hangover = 10; } else if (synSt->lsf_meanSD_high_counter > 10) { synSt->lsf_meanSD_high_hangover = 5; } else if (synSt->lsf_meanSD_high_counter > 5) { synSt->lsf_meanSD_high_hangover = 3; } lsf_meanSD_high_flag = ((synSt->lsf_meanSD > 170000) || (synSt->lsf_meanSD_high_hangover >= 0)); //lsf_meanSD_high_flag = (lsf_meanSD>1000); if (synSt->lsf_meanSD_high_hangover >= 0) { synSt->lsf_meanSD_high_hangover--; } /* Set hangover of level_meanSD */ if (synSt->level_meanSD < 0.07) { synSt->level_meanSD_low_counter++; } else { synSt->level_meanSD_low_counter = 0; } /* Set hangover frame counter */ if (synSt->level_meanSD_low_counter > 20) { synSt->level_meanSD_low_hangover = 10; } else if (synSt->level_meanSD_low_counter > 10) { synSt->level_meanSD_low_hangover = 5; } else if (synSt->level_meanSD_low_counter > 5) { synSt->level_meanSD_low_hangover = 2; } level_meanSD_low_flag = ((synSt->level_meanSD < 0.07) || (synSt->level_meanSD_low_hangover >= 0)); //level_meanSD_low_flag = (level_meanSD<2000); if (synSt->level_meanSD_low_hangover >= 0) { synSt->level_meanSD_low_hangover--; } /* Set hangover of lsf_meanD */ if (synSt->lsf_meanSD < 80000) { synSt->lsf_meanSD_low_counter++; } else { synSt->lsf_meanSD_low_counter = 0; } /* Set hangover frame counter */ if (synSt->lsf_meanSD_low_counter > 20) { synSt->lsf_meanSD_low_hangover = 10; } else if (synSt->lsf_meanSD_low_counter > 10) { synSt->lsf_meanSD_low_hangover = 5; } else if (synSt->lsf_meanSD_low_counter > 5) { synSt->lsf_meanSD_low_hangover = 2; } lsf_meanSD_low_flag = ((synSt->lsf_meanSD < 80000) || (synSt->lsf_meanSD_low_hangover >= 0)); //lsf_meanSD_low_flag = (lsf_meanSD<900); if (synSt->lsf_meanSD_low_hangover >= 0) { synSt->lsf_meanSD_low_hangover--; } /* Reset the counter to zero */ if (signal_sort == NOISE) { synSt->level_meanSD_low_counter = 0; synSt->level_meanSD_low_hangover = 0; } /* Signal decision */ music_flag = 0; speech_flag = 0; if (signal_sort == UNCERTAIN) { /* Speech decision */ if (synSt->lsf_meanSD > 300000) { speech_flag = 1; } if ((synSt->pitch == 1) && (synSt->T_op_mean <= 43)) { speech_flag = 1; } /* The main purpose is that classifying the last parts of voiced speech into uncertain, then make the speech decision according to the voice condition, otherwise classify it as music */ if (pitch_flag == 1) { speech_flag = 1; } /* Changed by hongjun */ /* if(synSt->meangain>8000) { speech_flag = 1; }*/ if (level_meanSD_high_flag || lsf_meanSD_high_flag) { speech_flag = 1; } /* Music decision */ if (lsf_meanSD_low_flag && level_meanSD_low_flag) { music_flag = 1; } if ((music_flag == 1) && (speech_flag == 1)) { signal_sort = UNCERTAIN; } else if (music_flag == 1) { signal_sort = MUSIC; } else if (speech_flag == 1) { signal_sort = SPEECH; } if (synSt->lsf_meanSD > 400000) { signal_sort = SPEECH; } //if((level_energy<5000)&&((music_flag==1)||(speech_flag==1))) if ((synSt->level_energy < 10000) && (synSt->noise_continue_counter > 5)) { signal_sort = UNCERTAIN; } /* When pitch_flag=1, if the music flag is 1 too, it will be classified as uncertain. In this condition, choosing two parameters to restrict classifying: When the lsf_meanSD is very small and previous frames are not continious speech frames, it will be classifyed as music. */ if ( signal_sort == UNCERTAIN) { if ((pitch_flag == 1) && (synSt->lsf_meanSD < 80000) && (synSt->speech_continue_counter < 3)) { signal_sort = MUSIC; } } /* When it's speech type, if the previous frames is music and the lsf_meanSD is small, it's will be classifyed as music */ if (signal_sort == SPEECH) { if (synSt->music_continue_counter > 3) { if (synSt->lsf_meanSD < 80000) { signal_sort = MUSIC; } } } /*Changed by hongjun*/ /*if((synSt->meangain_flag == 1)&&(synSt->music_continue_counter<3)) { signal_sort = SPEECH; }*/ if (synSt->level_energy < 5000) { signal_sort = UNCERTAIN; } } /* Start to classify the UNCERTAIN signal after the first decision */ synSt->speech_hangover_flag = 0; synSt->music_hangover_flag = 0; if (signal_sort == UNCERTAIN) { if (synSt->music_continue_counter > 3) { //if(lsf_meanSD_low_flag||level_meanSD_low_flag||(level_meanSD<3000)||(lsf_meanSD<1200)) if (lsf_meanSD_low_flag || level_meanSD_low_flag) { synSt->music_hangover_flag = 1; } else { synSt->music_hangover_flag = 0; } } if (synSt->speech_hangover_flag == 1) { signal_sort = SPEECH; } if (synSt->music_hangover_flag == 1) { signal_sort = MUSIC; } if ((synSt->music_hangover_flag == 1) && (synSt->speech_hangover_flag == 1)) { signal_sort = UNCERTAIN; } if (synSt->music_continue_counter > 1) { if (synSt->lsf_meanSD < 80000) { signal_sort = MUSIC; } } if ( signal_sort == UNCERTAIN) { if (synSt->music_continue_counter > 20) { signal_sort = MUSIC; } if (synSt->speech_continue_counter > 20) { signal_sort = SPEECH; } } //if((level_energy<5000)&&(meangain<5000)) if ((synSt->level_energy < 5000)) { signal_sort = UNCERTAIN; } } if (signal_sort != NOISE) { if (synSt->music_continue_counter > 150) { if (synSt->lsf_meanSD < 170000) { signal_sort = MUSIC; } } if (synSt->speech_continue_counter > 150) { if (synSt->lsf_meanSD > 170000) { signal_sort = SPEECH; } } if ((synSt->level_energy < 5000)) { signal_sort = UNCERTAIN; } } /* Classify the UNCERTAIN signal into speech or music, add by wangjun */ if (signal_sort == UNCERTAIN) { synSt->uncertain_continue_counter++; if (synSt->speech_continue_counter > 3) { if (synSt->uncertain_continue_counter < 15) { signal_sort = SPEECH; } else { signal_sort = MUSIC; } } else { if (synSt->uncertain_continue_counter < 15) { signal_sort = MUSIC; } else { signal_sort = SPEECH; } } } else { synSt->uncertain_continue_counter = 0; } /* Counter of continue music signal */ if (signal_sort == MUSIC) { synSt->music_continue_counter++; } else { synSt->music_continue_counter = 0; } /* Counter of continue speech signal */ if (signal_sort == SPEECH) { synSt->speech_continue_counter++; } else { synSt->speech_continue_counter = 0; } /* Counter of continue noise signal */ if (signal_sort == NOISE) { synSt->noise_continue_counter++; } else { synSt->noise_continue_counter = 0; } return signal_sort; } /****************************************************************************** * * Function: syndecid_init * Description: Allocates state memory and initializes state memory * ******************************************************************************* */ int syndecid_init(/* return: non-zero with error, zero for ok. */ SyndecidSt **state /* i/o : State structure */ ) { SyndecidSt *s; if (state == (SyndecidSt **)NULL) { fprintf(stderr, "syndecid_init: invalid parameter\n"); return -1; } *state = NULL; /*allocate memory */ if ((s = (SyndecidSt*)malloc(sizeof(SyndecidSt))) == NULL) { fprintf(stderr, "syndecid_init: can not malloc state structure\n"); return -1; } syndecid_reset(s); s->oldlag = 0; s->oldlag_count = 0; set_zero(s->ptone_tone, 2); set_zero(s->pre_ptone_tone, 8); set_zero(s->mem_w, M); /*added by hongjun*/ s->pitch_counter = 0; s->pitch_hangover = 0; s->vad_counter = 0; s->vad_hangover = 0; s->level_meanSD_low_counter = 0; s->level_meanSD_low_hangover = 0; s->level_meanSD_high_counter = 0; s->level_meanSD_high_hangover = 0; s->lsf_meanSD_low_counter = 0; s->lsf_meanSD_low_hangover = 0; s->lsf_meanSD_high_counter = 0; s->lsf_meanSD_high_hangover = 0; s->music_counter = 0; s->music_hangover = 0; s->speech_counter = 0; s->speech_hangover = 0; s->music_continue_counter = 0; s->speech_continue_counter = 0; s->noise_continue_counter = 0; s->uncertain_continue_counter = 0; s->speech_hangover_flag = 0; s->music_hangover_flag = 0; s->meangain_hangover_flag = 0; s->meangain_continue_counter = 0; s->meangain_flag = 0; s->vad_continual_counter = 0; s->vad_continual_flag = 0; s->vad_hangover_flag = 0; *state = s; return 0; } /****************************************************************************** * * Function: syndecid_reset * Description: Initializes state memory to zero * ******************************************************************************* ******************************************************************************* */ int syndecid_reset ( /* return: non-zero with error, zero for ok. */ SyndecidSt *state /* i/o : State structure */ ) { if (state == (SyndecidSt *) NULL) { fprintf(stderr, "syndecid_reset: invalid parameter\n"); return -1; } memset(state->pre_level_energy, 0, 4* sizeof(double)); memset(state->level_12_old, 0 , (5*M + 2*COMPLEN + 79)*sizeof(Word16)); /*added by hongjun*/ memset(state->lsf_old, 0 , M*sizeof(float)); memset(state->lsp_old, 0 , M*sizeof(float)); memset(state->lsfmean, 0 , M*sizeof(float)); memset(state->pre_lsf_SD, 0 , 4*sizeof(float)); memset(state->pre_lsfmean_SD, 0 , 4*sizeof(float)); return 0; } /****************************************************************************** * * Function: syndecid_exit * Description: The memory used for state memory is freed * ******************************************************************************* */ void syndecid_exit ( SyndecidSt **state /* i/o : State structure */ ) { if (state == NULL || *state == NULL) return; /* deallocate memory */ free(*state); *state = NULL; return; }