www.pudn.com > SMV_Code.rar > lib_evad.c
/*=========================================================================*/ /* Each of the companies; Ericsson, Lucent, Mindspeed, Motorola, Nokia, */ /* Nortel Networks, and Qualcomm (hereinafter referred to individually as */ /* “Source” or collectively as “Sources”) do hereby state: */ /* */ /* To the extent to which the Source(s) may legally and freely do so, */ /* the Source(s), upon submission of a Contribution, grant(s) a free, */ /* irrevocable, non-exclusive, license to the Third Generation Partnership */ /* Project 2 (3GPP2) and its Organizational Partners: ARIB, CCSA, TIA, */ /* TTA, and TTC, under the Source’s copyright or copyright license rights */ /* in the Contribution, to, in whole or in part, copy, make derivative */ /* works, perform, display and distribute the Contribution and derivative */ /* works thereof consistent with 3GPP2’s and each Organizational Partner’s */ /* policies and procedures, with the right to (i) sublicense the foregoing */ /* rights consistent with 3GPP2’s and each Organizational Partner’s */ /* policies and procedures and (ii) copyright and sell, if applicable) in */ /* 3GPP2's name or each Organizational Partner’s name any 3GPP2 or */ /* transposed Publication even though this Publication may contain the */ /* Contribution or a derivative work thereof. The Contribution shall */ /* disclose any known limitations on the Source’s rights to license as */ /* herein provided. */ /* */ /* When a Contribution is submitted by the Source(s) to assist the */ /* formulating groups of 3GPP2 or any of its Organizational Partners, */ /* it is proposed to the Committee as a basis for discussion and is not */ /* to be construed as a binding proposal on the Source(s). The Source(s) */ /* specifically reserve(s) the right to amend or modify the material */ /* contained in the Contribution. Nothing contained in the Contribution */ /* shall, except as herein expressly provided, be construed as conferring */ /* by implication, estoppel or otherwise, any license or right under */ /* (i) any existing or later issuing patent, whether or not the use of */ /* information in the document necessarily employs an invention of any */ /* existing or later issued patent, (ii) any copyright, (iii) any */ /* trademark, or (iv) any other intellectual property right. */ /* */ /* With respect to the Software necessary for the practice of any or all */ /* Normative portions of the Selectable Mode Vocoder (SMV) as it exists on */ /* the date of submittal of this form, should the SMV be approved as a */ /* Specification or Report by 3GPP2, or as a transposed Standard by any of */ /* the 3GPP2’s Organizational Partners, the Source(s) state(s) that a */ /* worldwide license to reproduce, use and distribute the Software, the */ /* license rights to which are held by the Source(s), will be made */ /* available to applicants under terms and conditions that are reasonable */ /* and non-discriminatory, which may include monetary compensation, */ /* and only to the extent necessary for the practice of any or all of the */ /* Normative portions of the SMV or the field of use of practice of the */ /* SMV Specification, Report, or Standard. The statement contained above */ /* is irrevocable and shall be binding upon the Source(s). In the event */ /* the rights of the Source(s) in and to copyright or copyright license */ /* rights subject to such commitment are assigned or transferred, */ /* the Source(s) shall notify the assignee or transferee of the existence */ /* of such commitments. */ /*=========================================================================*/ /* */ /*-------------------------------------------------------------------*/ /*===================================================================*/ /* LIBRARY: lib_evad.c */ /*-------------------------------------------------------------------*/ /* PURPOSE : Library of Voice Activity Detection based on EVAD VAD. */ /*===================================================================*/ #include#include "typedef.h" #include "lib_evad.h" #include "lib_evad.tab" #ifdef VAD_B #define rint(x) (int)((x) + 0.5) /*...define non-ANSI function...*/ /**********************************************************************/ /* Initialize rate. */ /**********************************************************************/ void initialize_rate ( ENCODER_MEM* e_mem ) { INT32 i,j,k; e_mem->last_rate = EIGHTH; /* Added by D. Pian 12-Sep-94 */ /* rate decision initialization */ e_mem->frame_num=0; e_mem->num_full_frames=0; e_mem->hangover=10; e_mem->hangover_in_progress=0; e_mem->adaptcount=0; e_mem->pitchrun=0; e_mem->frame_energy_sm[0] = 32000;/*changed by D. Pian 08-Feb-95*/ /* changed from 3200000*16 by khaled, Dec., 20, 2000 */ e_mem->frame_energy_sm[1] = 3200;/*changed by D. Pian 08-Feb-95*/ /* changed from 320000*16 by khaled, Dec., 20, 2000 */ e_mem->signal_energy[0] = 3200000*16;/* Added by D. Pian 12-Sep-94 */ e_mem->signal_energy[1] = 320000*16; /* and Changed 5-Apr-95 */ e_mem->snr_stat_once=0; for (j=0; j band_noise_sm[j]= HIGH_THRESH_LIM; for (i=0; i r_filt[j][i]=0; for (k=0; k r_filt[j][i]+=rate_filt[j][k]*rate_filt[j][k+i]; } } } for (j=0; j snr[j] = e_mem->signal_energy[j]/e_mem->band_noise_sm[j]; e_mem->snr_map[j] = 0; } } /**********************************************************************/ /* Band energy. */ /**********************************************************************/ void band_energy_fcn ( FLOAT64* R, FLOAT32* energy, ENCODER_MEM* e_mem ) { INT32 i,j; /* find the energy in each band */ for (j=0; j r_filt[j][0]*R[0]; for(i=1;i r_filt[j][i]*R[i]; } energy[j]*=2.0/3.0; } } /**********************************************************************/ /* Update background. */ /**********************************************************************/ void update_background ( INT16 rate, ENCODER_MEM* e_mem, FLOAT64 beta ) { INT32 i; for(i=0;i frame_energy_sm[i] = SMSNR*e_mem->frame_energy_sm[i] + (1.0-SMSNR)*e_mem->band_power[i]; } /*************************************************************************/ /* Section 2.4.4.2.2 Updating background noise */ /* Now do the threshold adaptation */ /* don't adapt thresholds unless NACF < NACF_ADAP_BGN_THR over ADP frames*/ /* we're assuming the background noise here doesn't have periodicity in */ /* the range of typical speech */ if (beta < NACF_ADAP_BGN_THR) { e_mem->adaptcount++; } else { e_mem->adaptcount = 0; } if(e_mem->adaptcount > ADP) { /* change the predictor filter for the noise whitening */ /* noise */ /* update our background noise estimate */ for(i=0;i band_noise_sm[i]+1 < e_mem->band_noise_sm[i]*INC_FACTOR) { e_mem->band_noise_sm[i] *= INC_FACTOR; } else { e_mem->band_noise_sm[i] += 1.0; } } e_mem->adaptcount = ADP + 1; } else{ /*if SNR map est. is valid & above SNR_MAP_THRESHOLD let background noise */ /* estimate slowly inch up like it does in IS-96. This will allow */ /* the rate decision to perform as well as IS-96 in babble noise and */ /* other non-stationary noises */ if(e_mem->snr_stat_once == 1) { /* snr is valid */ for(i=0;i snr_map[i] > SNR_MAP_THRESHOLD) && (e_mem->band_noise_sm[i] < e_mem->frame_energy_sm[i]) ){ if(e_mem->band_noise_sm[i]+1.0 < e_mem->band_noise_sm[i]*IS96_INC) { e_mem->band_noise_sm[i] *= IS96_INC; } else { e_mem->band_noise_sm[i] += 1.0; } } } } } /* if input energy is lower than noise estimate, */ /* reduce background noise estimate immediately */ for(i=0;i frame_energy_sm[i] < e_mem->band_noise_sm[i]) { e_mem->band_noise_sm[i] = e_mem->frame_energy_sm[i]; } } for(i=0;i band_noise_sm[i] > HIGH_THRESH_LIM) { e_mem->band_noise_sm[i] = HIGH_THRESH_LIM; } if (e_mem->band_noise_sm[i] < LOWEST_LEVEL[i]) { e_mem->band_noise_sm[i] = LOWEST_LEVEL[i]; } } /* end of updating backgound noise energy */ /***************************************************************/ /***************************************************************/ /* Section 2.4.4.2.2 Updating Signal Energy Estimate */ /* pitchrun keeps track of the number of frames in a row with periodicity*/ if (beta > NACF_SOLID_VOICED) { e_mem->pitchrun += 1; } else { e_mem->pitchrun = 0; } if (e_mem->pitchrun > STATVOICED) { /* decrease the signal energy */ /* if we're in a voiced segment */ e_mem->snr_stat_once = 1; /* confidence that we have seen speech */ /* indicates a stationary voiced segment */ for(i=0;i signal_energy[i] *= SCALE_DOWN_ENERGY; } } /* assume that the highest energy sounds are speech signals and */ /* thus the highest energy frames contain speech and can be used */ /* to define the SNR */ for(i=0;i frame_energy_sm[i] > e_mem->signal_energy[i]) { e_mem->signal_energy[i] = e_mem->frame_energy_sm[i]; } } /* end updating signal energy estimate */ /************************************************************************/ /* This is now done near the top of select_mode1() */ /* update SNR estimates */ for(i=0;i band_noise_sm[i] > 0.0) { e_mem->snr[i] = e_mem->signal_energy[i]/e_mem->band_noise_sm[i]; } else { e_mem->snr[i] = 100000000.0; /* a very high snr if noise =0.0 */ } } if(e_mem->snr_stat_once != 0) { /* we have seen some speech and are confident in our SNR measure */ e_mem->snr_stat_once = 1; /* UPDATE THIS FOR GENERAL SNR MAPS! */ for(i=0;i snr[i] > 0.0){ e_mem->snr_map[i] = (INT32) rint((10*log10(e_mem->snr[i])-20.0)/5.0); } else { e_mem->snr_map[i] = 0; } if(e_mem->snr_map[i] < 0) e_mem->snr_map[i]=0; if(e_mem->snr_map[i] > 7) e_mem->snr_map[i]=7; } } else { /* we haven't seen speech, aren't confident in our estimate of the */ /* signal energy and will use a nominal energy of */ /* VOICE_INITIAL dB (-18dbm0) for our signal energy */ for(i=0;i band_noise_sm[i] > 0.0){ if (i == 0) { e_mem->snr_map[i]= (INT32) rint((VOICE_INITIAL- 10*log10(e_mem->band_noise_sm[i])-20.0)/5.0); } else { e_mem->snr_map[i]= (INT32) rint((VOICE_INITIAL_HI- 10*log10(e_mem->band_noise_sm[i])-20.0)/5.0); } } else { e_mem->snr_map[i] = 7; } if(e_mem->snr_map[i] < 0) e_mem->snr_map[i]=0; if(e_mem->snr_map[i] > 7) e_mem->snr_map[i]=7; } } e_mem->last_rate = rate; e_mem->last_rate_2nd_stage = rate; } /**********************************************************************/ /* Select rate. */ /**********************************************************************/ INT16 EVAD_voice_detection ( INT16 smv_mode, FLOAT64* R_interp, INT16 max_rate, INT16 min_rate, FLOAT64 beta, ENCODER_MEM *e_mem ) { static INT16 first = 0; static FLOAT32 (*THRESH_SNR)[TLEVELS][2]; static INT32 *hangover; INT16 rate; INT32 i; INT32 k; // e_mem = &rate_mem; /* done for rate integration with AT&T simulation*/ if(first == 0){ /*do initialization of rate_mem */ initialize_rate(e_mem); first = 1; } if (smv_mode==0) { hangover=hangover0; THRESH_SNR=THRESH_SNR0; } else if (smv_mode==1) { hangover=hangover1; THRESH_SNR=THRESH_SNR1; } else { hangover=hangover2; THRESH_SNR=THRESH_SNR2; } /* this is the acf of the noise reducing prediction filter */ /* now filter the input speech by bandpass filters and */ /* derive the necessary band energies */ band_energy_fcn(R_interp, e_mem->band_power, e_mem); /* now our threshold comparison and background noise estimation */ /* is done for all frequency bands */ /* rate thresholds are calculated below, but not saved */ for(k=0; k band_rate[k]=EIGHTH; for (i=0; i<2; i++) { /* rate_thresh[k][i] = THRESH_SNR[k][e_mem->snr_map[k]][i]* e_mem->band_noise_sm[k]; */ /* since we changed SMSNR to 0.6 from 0.8 */ if (e_mem->band_power[k]>THRESH_SNR[k][e_mem->snr_map[k]][i]* e_mem->band_noise_sm[k]) { if(e_mem->band_rate[k] == EIGHTH) { e_mem->band_rate[k]=HALFRATE_VOICED; } else if (e_mem->band_rate[k] == HALFRATE_VOICED) { e_mem->band_rate[k]=FULLRATE_VOICED; } } } } /* do threshold comparisons for full and half rates */ /* use 10log10(e_mem->band_power[k]/band_noise_sm[k]) */ /* the maximum rate for all bands is chosen as the rate */ rate = e_mem->band_rate[0]; for(k=1;k< FREQBANDS;k++) { if(e_mem->band_rate[k] > rate) rate = e_mem->band_rate[k]; } /* Section 2.4.4.1.3 Calculating Hangover Frames as Function of SNR */ if((e_mem->num_full_frames > FULL_THRESH) || e_mem->hangover_in_progress == 1) { /* only use hangover after so many full rate frames in a row*/ if(rate != FULLRATE_VOICED) { e_mem->hangover += 1; if (e_mem->hangover <= hangover[e_mem->snr_map[0]]) { rate = FULLRATE_VOICED; e_mem->hangover_in_progress = 1; } else { e_mem->hangover_in_progress = 0; } } else { e_mem->hangover_in_progress = 0; } } if((rate == FULLRATE_VOICED) && (e_mem->hangover_in_progress == 0)) { e_mem->hangover=0; e_mem->num_full_frames+=1; } else { e_mem->num_full_frames = 0; } /* end of hangover algorithm */ /* assuming only 1/8,1/2,and full rates pluse some 1/2 and full rates modes */ /* if last rate is full force 1/2 rate before going to 1/8 */ /* Section 2.4.4.1.4 Constraints on Rate Selection */ if ((e_mem->last_rate_1st_stage == FULLRATE_VOICED) && rate == EIGHTH) { rate = HALFRATE_VOICED; } /* allows fast convergence for EIGHTH rate */ if(e_mem->frame_num == 0){ rate = FULLRATE_VOICED; } if (rate > max_rate) { rate = max_rate; } if (rate < min_rate) { rate = min_rate; } e_mem->last_rate_1st_stage = rate; e_mem->last_rate = rate; /* update background noise and signal energy estimates */ update_background(rate,e_mem,beta); e_mem->frame_num += 1; return(rate); } #endif