www.pudn.com > SMV_Code.rar > lib_evad.c


/*=========================================================================*/ 
/* Each of the companies; Ericsson, Lucent, Mindspeed, Motorola, Nokia,    */ 
/* Nortel Networks, and Qualcomm (hereinafter referred to individually as  */ 
/* “Source” or collectively as “Sources”) do hereby state:                 */ 
/*                                                                         */ 
/* To the extent to which the Source(s) may legally and freely do so,      */ 
/* the Source(s), upon submission of a Contribution, grant(s) a free,      */ 
/* irrevocable, non-exclusive, license to the Third Generation Partnership */ 
/* Project 2 (3GPP2) and its Organizational Partners: ARIB, CCSA, TIA,     */ 
/* TTA, and TTC, under the Source’s copyright or copyright license rights  */ 
/* in the Contribution, to, in whole or in part, copy, make derivative     */ 
/* works, perform, display and distribute the Contribution and derivative  */ 
/* works thereof consistent with 3GPP2’s and each Organizational Partner’s */ 
/* policies and procedures, with the right to (i) sublicense the foregoing */ 
/* rights consistent with 3GPP2’s and each Organizational Partner’s        */ 
/* policies and procedures and (ii) copyright and sell, if applicable) in  */ 
/* 3GPP2's name or each Organizational Partner’s name any 3GPP2 or         */ 
/* transposed Publication even though this Publication may contain the     */ 
/* Contribution or a derivative work thereof.  The Contribution shall      */ 
/* disclose any known limitations on the Source’s rights to license as     */ 
/* herein provided.                                                        */ 
/*                                                                         */ 
/* When a Contribution is submitted by the Source(s) to assist the         */ 
/* formulating groups of 3GPP2 or any of its Organizational Partners,      */ 
/* it is proposed to the Committee as a basis for discussion and is not    */ 
/* to be construed as a binding proposal on the Source(s).  The Source(s)  */ 
/* specifically reserve(s) the right to amend or modify the material       */ 
/* contained in the Contribution. Nothing contained in the Contribution    */ 
/* shall, except as herein expressly provided, be construed as conferring  */ 
/* by implication, estoppel or otherwise, any license or right under       */ 
/* (i) any existing or later issuing patent, whether or not the use of     */ 
/* information in the document necessarily employs an invention of any     */ 
/* existing or later issued patent, (ii) any copyright, (iii) any          */ 
/* trademark, or (iv) any other intellectual property right.               */ 
/*                                                                         */ 
/* With respect to the Software necessary for the practice of any or all   */ 
/* Normative portions of the Selectable Mode Vocoder (SMV) as it exists on */ 
/* the date of submittal of this form, should the SMV be approved as a     */ 
/* Specification or Report by 3GPP2, or as a transposed Standard by any of */ 
/* the 3GPP2’s Organizational Partners, the Source(s) state(s) that a      */ 
/* worldwide license to reproduce, use and distribute the Software, the    */ 
/* license rights to which are held by the Source(s), will be made         */ 
/* available to applicants under terms and conditions that are reasonable  */ 
/* and non-discriminatory, which may include monetary compensation,        */ 
/* and only to the extent necessary for the practice of any or all of the  */ 
/* Normative portions of the SMV or the field of use of practice of the    */ 
/* SMV Specification, Report, or Standard.  The statement contained above  */ 
/* is irrevocable and shall be binding upon the Source(s).  In the event   */ 
/* the rights of the Source(s) in and to copyright or copyright license    */ 
/* rights subject to such commitment are assigned or transferred,          */ 
/* the Source(s) shall notify the assignee or transferee of the existence  */ 
/* of such commitments.                                                    */ 
/*=========================================================================*/ 
/*                                                                   */ 
/*-------------------------------------------------------------------*/ 
/*===================================================================*/ 
/* LIBRARY: lib_evad.c                                               */ 
/*-------------------------------------------------------------------*/ 
/* PURPOSE : Library of Voice Activity Detection based on EVAD VAD.  */ 
/*===================================================================*/ 
 
#include  
 
#include "typedef.h" 
#include "lib_evad.h" 
#include "lib_evad.tab" 
 
 
#ifdef VAD_B 
 
#define rint(x)  (int)((x) + 0.5)     /*...define non-ANSI function...*/ 
 
/**********************************************************************/ 
/*  Initialize rate.                                                  */ 
/**********************************************************************/ 
void  initialize_rate ( 
	ENCODER_MEM*  e_mem 
) 
{ 
	INT32 i,j,k; 
 
	e_mem->last_rate = EIGHTH; /* Added by D. Pian 12-Sep-94 */ 
 
	/* rate decision initialization */ 
	e_mem->frame_num=0; 
	 
	e_mem->num_full_frames=0; 
	e_mem->hangover=10; 
	e_mem->hangover_in_progress=0; 
 
	e_mem->adaptcount=0; 
	e_mem->pitchrun=0; 
	e_mem->frame_energy_sm[0] = 32000;/*changed by D. Pian 08-Feb-95*/ 
	/* changed from 3200000*16 by khaled, Dec., 20, 2000 */ 
	e_mem->frame_energy_sm[1] = 3200;/*changed by D. Pian 08-Feb-95*/ 
           /* changed from 320000*16 by khaled, Dec., 20, 2000 */ 
	e_mem->signal_energy[0] = 3200000*16;/* Added by D. Pian 12-Sep-94 */ 
	e_mem->signal_energy[1] = 320000*16; /* and Changed 5-Apr-95 */ 
 
	e_mem->snr_stat_once=0; 
 
	for (j=0; jband_noise_sm[j]= HIGH_THRESH_LIM; 
		for (i=0; ir_filt[j][i]=0; 
			for (k=0; kr_filt[j][i]+=rate_filt[j][k]*rate_filt[j][k+i]; 
			} 
		} 
	} 
	for (j=0; jsnr[j] = e_mem->signal_energy[j]/e_mem->band_noise_sm[j];    
		e_mem->snr_map[j] = 0; 
	} 
} 
 
/**********************************************************************/ 
/*  Band energy.                                                      */ 
/**********************************************************************/ 
void  band_energy_fcn ( 
	FLOAT64*        R, 
	FLOAT32*        energy, 
	ENCODER_MEM*  e_mem 
) 
{ 
	INT32 i,j; 
 
	/* find the energy in each band   */ 
	for (j=0; jr_filt[j][0]*R[0]; 
		for(i=1;ir_filt[j][i]*R[i];  
		} 
		energy[j]*=2.0/3.0; 
	} 
} 
 
/**********************************************************************/ 
/*  Update background.                                                */ 
/**********************************************************************/ 
void  update_background ( 
	INT16         rate, 
	ENCODER_MEM*  e_mem, 
	FLOAT64         beta 
) 
{ 
	INT32    i; 
 
	    for(i=0;iframe_energy_sm[i] = SMSNR*e_mem->frame_energy_sm[i] + (1.0-SMSNR)*e_mem->band_power[i]; 
	     } 
         
	/*************************************************************************/ 
	/* Section 2.4.4.2.2 Updating background noise                           */ 
 
	/* Now do the threshold adaptation                                       */ 
	/* don't adapt thresholds unless NACF < NACF_ADAP_BGN_THR over ADP frames*/ 
	/* we're assuming the background noise here doesn't have periodicity in  */ 
	/* the range of typical speech                                           */ 
   
	if (beta < NACF_ADAP_BGN_THR) { 
		e_mem->adaptcount++; 
	} else { 
		e_mem->adaptcount = 0; 
	} 
 
	if(e_mem->adaptcount > ADP) { 
		/* change the predictor filter for the noise whitening */ 
		/* noise                                            */ 
		/* update our background noise estimate */ 
		for(i=0;iband_noise_sm[i]+1 < e_mem->band_noise_sm[i]*INC_FACTOR) { 
				e_mem->band_noise_sm[i] *= INC_FACTOR; 
			} else { 
				e_mem->band_noise_sm[i] += 1.0; 
			} 
		} 
		e_mem->adaptcount = ADP + 1; 
	} else{ 
 
		/*if SNR map est. is valid & above SNR_MAP_THRESHOLD let background noise */ 
		/* estimate slowly inch up like it does in IS-96.  This will allow */ 
		/* the rate decision to perform as well as IS-96 in babble noise and */ 
		/* other non-stationary noises    */ 
 
		if(e_mem->snr_stat_once ==  1) {  /* snr is valid  */ 
			for(i=0;isnr_map[i] > SNR_MAP_THRESHOLD) &&  
				(e_mem->band_noise_sm[i] < e_mem->frame_energy_sm[i]) ){ 
					if(e_mem->band_noise_sm[i]+1.0 < e_mem->band_noise_sm[i]*IS96_INC) { 
						e_mem->band_noise_sm[i] *= IS96_INC; 
					} else { 
						e_mem->band_noise_sm[i] += 1.0; 
					} 
				} 
			} 
		} 
	} 
 
	/* if input energy is lower than noise estimate, */ 
	/* reduce background noise estimate immediately  */ 
 
	for(i=0;iframe_energy_sm[i] < e_mem->band_noise_sm[i]) { 
			e_mem->band_noise_sm[i] = e_mem->frame_energy_sm[i]; 
		} 
	} 
 
	for(i=0;iband_noise_sm[i] > HIGH_THRESH_LIM) { 
			e_mem->band_noise_sm[i] = HIGH_THRESH_LIM; 
		} 
		if (e_mem->band_noise_sm[i] < LOWEST_LEVEL[i]) { 
			e_mem->band_noise_sm[i] = LOWEST_LEVEL[i]; 
		} 
	} 
 
	/* end of updating backgound noise energy */ 
	/***************************************************************/ 
 
	/***************************************************************/ 
	/* Section 2.4.4.2.2 Updating Signal Energy Estimate */ 
	/* pitchrun keeps track of the number of frames in a row with periodicity*/ 
 
	if (beta > NACF_SOLID_VOICED) { 
		e_mem->pitchrun += 1; 
	} else { 
		e_mem->pitchrun = 0; 
	} 
 
	if (e_mem->pitchrun > STATVOICED) {  /* decrease the signal energy   */ 
	                                     /* if we're in a voiced segment */ 
		e_mem->snr_stat_once = 1;   /* confidence that we have seen speech */ 
 
		/* indicates a stationary voiced segment       */ 
		for(i=0;isignal_energy[i] *= SCALE_DOWN_ENERGY;  
		} 
	} 
 
	/* assume that the highest energy sounds are speech signals and */ 
	/* thus the highest energy frames contain speech and can be used */ 
	/* to define the SNR */ 
 
	for(i=0;iframe_energy_sm[i] > e_mem->signal_energy[i]) { 
			e_mem->signal_energy[i] = e_mem->frame_energy_sm[i]; 
		} 
	} 
 
	/* end updating signal energy estimate */ 
	/************************************************************************/ 
 
	/* This is now done near the top of select_mode1() */ 
	/* update SNR estimates */ 
    
	for(i=0;iband_noise_sm[i] > 0.0) { 
			e_mem->snr[i] = e_mem->signal_energy[i]/e_mem->band_noise_sm[i]; 
		} else { 
			e_mem->snr[i] = 100000000.0;  /* a very high snr if noise =0.0 */ 
		} 
	}	 
 
	if(e_mem->snr_stat_once != 0) { 
		/* we have seen some speech and are confident in our SNR measure */ 
		e_mem->snr_stat_once = 1; 
 
		/* UPDATE THIS FOR GENERAL SNR MAPS! */ 
		for(i=0;isnr[i] > 0.0){ 
				e_mem->snr_map[i] = (INT32) rint((10*log10(e_mem->snr[i])-20.0)/5.0); 
			} else { 
				e_mem->snr_map[i] = 0; 
			} 
 
			if(e_mem->snr_map[i] < 0) e_mem->snr_map[i]=0; 
			if(e_mem->snr_map[i] > 7) e_mem->snr_map[i]=7; 
		} 
	} else { 
		/* we haven't seen speech, aren't confident in our estimate of the */ 
		/* signal energy and will use a nominal energy of */ 
		/* VOICE_INITIAL dB (-18dbm0) for our signal energy */ 
	 
		for(i=0;iband_noise_sm[i] > 0.0){ 
				if (i == 0) { 
					e_mem->snr_map[i]= (INT32) rint((VOICE_INITIAL- 10*log10(e_mem->band_noise_sm[i])-20.0)/5.0); 
				} else { 
					e_mem->snr_map[i]= (INT32) rint((VOICE_INITIAL_HI- 10*log10(e_mem->band_noise_sm[i])-20.0)/5.0); 
				} 
			} else { 
				e_mem->snr_map[i] = 7; 
			} 
			if(e_mem->snr_map[i] < 0) e_mem->snr_map[i]=0; 
			if(e_mem->snr_map[i] > 7) e_mem->snr_map[i]=7; 
		} 
	} 
 
	e_mem->last_rate = rate; 
	e_mem->last_rate_2nd_stage = rate; 
 
} 
 
/**********************************************************************/ 
/*  Select rate.                                                      */ 
/**********************************************************************/ 
INT16 EVAD_voice_detection  ( 
        INT16 smv_mode,		     
	FLOAT64*  R_interp, 
	INT16   max_rate, 
	INT16   min_rate, 
	FLOAT64   beta, 
	ENCODER_MEM *e_mem 
	) 
{ 
	static INT16  first = 0; 
	static FLOAT32 (*THRESH_SNR)[TLEVELS][2]; 
	static INT32 *hangover; 
	INT16  rate; 
	INT32    i; 
	INT32    k; 
 
	//	e_mem = &rate_mem;  /* done for rate integration with AT&T simulation*/ 
 
	if(first == 0){ 
		/*do initialization of rate_mem */ 
		initialize_rate(e_mem); 
		first = 1; 
	} 
 
		if (smv_mode==0) { 
		  hangover=hangover0; 
		  THRESH_SNR=THRESH_SNR0; 
		} 
		else if (smv_mode==1) { 
		  hangover=hangover1; 
		  THRESH_SNR=THRESH_SNR1; 
		} 
		else { 
		  hangover=hangover2; 
		  THRESH_SNR=THRESH_SNR2; 
		} 
 
	/* this is the acf of the noise reducing prediction filter */ 
 
	/* now filter the input speech by bandpass filters and     */ 
	/* derive the necessary band energies                      */ 
 
	band_energy_fcn(R_interp, e_mem->band_power, e_mem); 
 
	/* now our threshold comparison and background noise estimation */ 
	/* is done for all frequency bands                              */ 
	/* rate thresholds are calculated below, but not saved */ 
 
	for(k=0; kband_rate[k]=EIGHTH; 
		for (i=0; i<2; i++) { 
			/* rate_thresh[k][i] = THRESH_SNR[k][e_mem->snr_map[k]][i]* e_mem->band_noise_sm[k]; */ 
			/* since we changed SMSNR to 0.6 from 0.8 */ 
			if (e_mem->band_power[k]>THRESH_SNR[k][e_mem->snr_map[k]][i]* e_mem->band_noise_sm[k])  
			{ 
				if(e_mem->band_rate[k] == EIGHTH) { 
					e_mem->band_rate[k]=HALFRATE_VOICED; 
				} else if (e_mem->band_rate[k] == HALFRATE_VOICED) { 
					e_mem->band_rate[k]=FULLRATE_VOICED; 
				} 
			} 
		} 
	}  /* do threshold comparisons for full and half rates  */ 
    
	/* use 10log10(e_mem->band_power[k]/band_noise_sm[k]) */ 
 
	/* the maximum rate for all bands is chosen as the rate */ 
	rate = e_mem->band_rate[0]; 
	for(k=1;k< FREQBANDS;k++) { 
		if(e_mem->band_rate[k] > rate) rate = e_mem->band_rate[k]; 
	} 
 
	/* Section 2.4.4.1.3 Calculating Hangover Frames as Function of SNR */ 
	if((e_mem->num_full_frames > FULL_THRESH) || e_mem->hangover_in_progress == 1) { 
		/* only use hangover after so many full rate frames in a row*/ 
		if(rate != FULLRATE_VOICED) { 
			e_mem->hangover += 1;  
			if (e_mem->hangover <= hangover[e_mem->snr_map[0]]) { 
				rate = FULLRATE_VOICED; 
				e_mem->hangover_in_progress = 1; 
			} else { 
				e_mem->hangover_in_progress = 0; 
			} 
		} else { 
			e_mem->hangover_in_progress = 0; 
		} 
	} 
 
	if((rate == FULLRATE_VOICED) && (e_mem->hangover_in_progress == 0)) { 
		e_mem->hangover=0;    
		e_mem->num_full_frames+=1; 
	} else { 
		e_mem->num_full_frames = 0; 
	} 
	/* end of hangover algorithm     */ 
 
 
	/* assuming only 1/8,1/2,and full rates pluse some 1/2 and full rates modes */ 
	/* if last rate is full force 1/2 rate before going to 1/8                  */ 
 
	/* Section 2.4.4.1.4 Constraints on Rate Selection */ 
	if ((e_mem->last_rate_1st_stage == FULLRATE_VOICED) && rate == EIGHTH) { 
		rate = HALFRATE_VOICED; 
	} 
 
	/* allows fast convergence for EIGHTH rate */ 
	if(e_mem->frame_num == 0){ 
		rate = FULLRATE_VOICED; 
	} 
 
	if (rate > max_rate) { 
		rate = max_rate; 
	} 
	if (rate < min_rate) { 
		rate = min_rate; 
	} 
 
	e_mem->last_rate_1st_stage = rate; 
	e_mem->last_rate = rate; 
 
	/* update background noise and signal energy estimates */ 
	update_background(rate,e_mem,beta); 
 
	e_mem->frame_num += 1; 
 
	return(rate); 
} 
#endif