www.pudn.com > AVS_M_ver10.rar > cod_sad.c


/*
********************************************************************************
*
* File : cod_sad.c
* Description : Parameters extraction and signal classifiction
*
********************************************************************************
*/
#include "../include/cod_sad.h"
#include "../include/amr_plus.h"
#include "../include/mem.h"

#include <float.h>
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include "../lib_amr/enc_if.h"
#include "../lib_amr/enc_main.c"
#include "../lib_amr/enc_lpc.c"
#include "../include/s_util.h"

#include "../../c-code/include/open_pitch.h"

/////////////////////////////////
/* frame count */
int count_frame;
/////////////////////////////////



/*************************************************************************
*
* FUNCTIONS: extract_isf()
* input: isf
* output: lsf_meanSD
* DESCRIPTION: extract parameters from 'isf' for music/speech classification
* from VAD module
*
*************************************************************************/
void extract_isf(SyndecidSt *synSt , float isf[M])
{
int i;
float temp;
float L_temp;
float lsf_SD = 0; /* ISF spectral deviation */
float lsfmean_SD = 0;

/* Calculate the spectral deviation */
L_temp = 0;
for (i = 0; i < M; i++)
{
temp = isf[i] - synSt->lsf_old[i];
L_temp += temp * temp;
}
if (count_frame == 1)
{
lsf_SD = 0;
}
else
{
lsf_SD = L_temp;
}

L_temp = 0;
for (i = 0; i < M; i++)
{
temp = isf[i] - synSt->lsfmean[i];
L_temp += temp * temp;
}
if (count_frame < 6)
{
lsfmean_SD = 0;
}
else
{
lsfmean_SD = L_temp;
}

for (i = 0; i < M; i++)
{
synSt->lsfmean[i] = 0.75 * synSt->lsfmean[i] + 0.25 * isf[i];
}

/* Averaging short time ISF spectral deviation */
//lsf_meanSD = (Word16)(0.8*lsf_meanSD + 0.2*lsf_SD);
synSt->lsf_meanSD = 0.2 * synSt->pre_lsf_SD[0]
+ 0.2 * synSt->pre_lsf_SD[1]
+ 0.2 * synSt->pre_lsf_SD[2]
+ 0.2 * synSt->pre_lsf_SD[3]
+ 0.2 * lsf_SD;
synSt->pre_lsf_SD[3] = synSt->pre_lsf_SD[2];
synSt->pre_lsf_SD[2] = synSt->pre_lsf_SD[1];
synSt->pre_lsf_SD[1] = synSt->pre_lsf_SD[0];
synSt->pre_lsf_SD[0] = lsf_SD;

/* Averaging long time ISF spectral deviation */
//lsfmean_meanSD = (Word16)(0.8*lsfmean_meanSD + 0.2*lsfmean_SD);
synSt->lsfmean_meanSD = 0.2 * synSt->pre_lsfmean_SD[0]
+ 0.2 * synSt->pre_lsfmean_SD[1]
+ 0.2 * synSt->pre_lsfmean_SD[2]
+ 0.2 * synSt->pre_lsfmean_SD[3]
+ 0.2 * lsfmean_SD;
synSt->pre_lsfmean_SD[3] = synSt->pre_lsfmean_SD[2];
synSt->pre_lsfmean_SD[2] = synSt->pre_lsfmean_SD[1];
synSt->pre_lsfmean_SD[1] = synSt->pre_lsfmean_SD[0];
synSt->pre_lsfmean_SD[0] = lsfmean_SD;

for (i = 0; i < M; i++)
{
synSt->lsf_old[i] = isf[i];
}

}

/*************************************************************************
*
* FUNCTIONS: extract_ratio()
* DESCRIPTION: extract music/speech classification parameters of subband level
* from VAD module
*
*************************************************************************/
void extract_ratio(VadVars * st, SyndecidSt *synSt )
{

Word16 i;
Word16 temp;
Word32 L_temp;
float level_SD; /*sub-band level spectral deviation*/
float levelmean_SD; /*sub-band level mean spectral deviation*/

/* double level_energy */
double short_mean_level_energy;
double long_mean_level_energy;
Word32 L_sum;

/* music sub-band energydeviation flag after hangover */
Word16 level_meanSD_low_flag;
Word16 level_meanSD_high_flag;

/* Parameters of sub-band ratio */
float sublevel_rate;
float level_rate;
Word32 sublevel_high_energy;
Word32 sublevel_low_energy;
Word32 sublevel_high1_energy;
Word32 sublevel_low1_energy;
Word32 sublevel_high2_energy;
//Word32 sublevel_low2_energy;

/*Calculating sub-band ratio: ra_z. 0-7,8-11 in amrwbp*/
sublevel_high_energy = st->level[10] + st->level[11];
sublevel_low_energy = st->level[0] + st->level[1] + st->level[2] + st->level[3]
+ st->level[4] + st->level[5] + st->level[6] + st->level[7]
+ st->level[8] + st->level[9];

sublevel_high1_energy = st->level[8] + st->level[9];
sublevel_low1_energy = st->level[5] + st->level[6] + st->level[7];

sublevel_high2_energy = st->level[11];
//sublevel_low2_energy = sublevel_high1_energy;

if (sublevel_low_energy <= 0)
{
level_rate = 0.0;
}
else
{
level_rate = (float)(sublevel_high_energy) / sublevel_low_energy;
}
/*if(level_rate>32767)
{
level_rate=32767;
}*/
sublevel_rate = level_rate;
synSt->level_energy = 0;
for (i = 0; i < COMPLEN; i++)
{
synSt->level_energy = synSt->level_energy + st->level[i];
}

short_mean_level_energy = synSt->level_energy + synSt->pre_level_energy[0];

/* Calculating sub-band level standard deviation */
L_temp = 0;
for (i = 0; i < COMPLEN; i++)
{
temp = abs(st->level[i] - st->old_level[i]);
L_temp = L_temp + temp;
}
if (count_frame == 1)
{
level_SD = 0.0;
}
else
{
if (short_mean_level_energy == 0)
{
level_SD = 0.0;
}
else
{

level_SD = (float)L_temp / short_mean_level_energy;
}
}

long_mean_level_energy = short_mean_level_energy + synSt->pre_level_energy[1] +
synSt->pre_level_energy[2] + synSt->pre_level_energy[3];

L_temp = 0;
for (i = 0; i < COMPLEN; i++)
{
temp = abs(st->level[i] - synSt->level_12_mean[i]);
L_temp = L_temp + temp;
}
if (count_frame < 10)
{
levelmean_SD = 0;
}
else
{
if (long_mean_level_energy == 0)
{
levelmean_SD = 0;
}
else
{
//levelmean_SD = (Word16)((L_temp<<14)/long_mean_level_energy);
/*L_temp = ((double)(L_temp<<14))/long_mean_level_energy;
if(L_temp>32767)
L_temp = 32767;*/
levelmean_SD = (float)L_temp / long_mean_level_energy;
}
}
for (i = 0; i < COMPLEN; i++)
{
synSt->level_12_mean[i] = (Word16)(0.75 * synSt->level_12_mean[i] + 0.25 * st->level[i]);
}

/* Averaging short time sub-band energy standard deviation */
//level_meanSD = (Word16)(0.8*level_meanSD + 0.2*level_SD);
synSt->level_meanSD = (0.2 * synSt->pre_level_SD[0] + 0.2 * synSt->pre_level_SD[1] + 0.2 * synSt->pre_level_SD[2]
+ 0.2 * synSt->pre_level_SD[3] + 0.2 * level_SD);
synSt->pre_level_SD[3] = synSt->pre_level_SD[2];
synSt->pre_level_SD[2] = synSt->pre_level_SD[1];
synSt->pre_level_SD[1] = synSt->pre_level_SD[0];
synSt->pre_level_SD[0] = level_SD;

/* Averaging long time sub-band energy standard deviation */
//levelmean_meanSD = (Word16)(0.8*levelmean_meanSD + 0.2*levelmean_SD);
synSt->levelmean_meanSD = (0.2 * synSt->pre_levelmean_SD[0] + 0.2 * synSt->pre_levelmean_SD[1] + 0.2 * synSt->pre_levelmean_SD[2]
+ 0.2 * synSt->pre_levelmean_SD[3] + 0.2 * levelmean_SD);
synSt->pre_levelmean_SD[3] = synSt->pre_levelmean_SD[2];
synSt->pre_levelmean_SD[2] = synSt->pre_levelmean_SD[1];
synSt->pre_levelmean_SD[1] = synSt->pre_levelmean_SD[0];
synSt->pre_levelmean_SD[0] = levelmean_SD;

synSt->pre_level_energy[3] = synSt->pre_level_energy[2];
synSt->pre_level_energy[2] = synSt->pre_level_energy[1];
synSt->pre_level_energy[1] = synSt->pre_level_energy[0];
synSt->pre_level_energy[0] = synSt->level_energy;

/* Update signal levels of the previous frame (old_level) */
for (i = 0; i < COMPLEN; i++)
{
st->old_level[i] = st->level[i];
}
}

/***********************************************************
* Function: pre_big
* Purpose: Big subframe (2 subframes) preprocessing
*
************************************************************/

int pre_big(SyndecidSt *synSt ,
int frameOffset, /* i : Start position in speech vector, Q0*/
float speech[], /* i : speech, Q0*/
float wsp[] /* o : weighted speech Q0*/
)
{
float Ap1[MP1]; /* A(z) with spectral expansion */
float Ap2[MP1]; /* A(z) with spectral expansion */
const float *g1; /* Pointer to correct gammma1 vector */
int aOffset;
int i;

if (frameOffset > 0)
{
aOffset = 2 * MP1;
}
else
{
aOffset = 0;
}

/* process two subframes (which form the "big" subframe) */
for (i = 0; i < 2; i++)
{
E_LPC_a_weight(&amt;synSt->A_t[aOffset], Ap1 , GAMMA1, M);
E_LPC_a_weight(&amt;synSt->A_t[aOffset], Ap2, GAMMA1, M);
E_UTIL_residu(Ap1, &amt;speech[frameOffset], &amt;wsp[frameOffset], L_SUBFR);

E_UTIL_synthesis(Ap2, &amt;wsp[frameOffset], &amt;wsp[frameOffset], L_SUBFR, synSt->mem_w, 1);
aOffset = aOffset + MP1;
frameOffset = frameOffset + L_SUBFR;
}

return 0;
}


/*************************************************************************
*
* FUNCTIONS: extract_op()
* input: T_op , pitch
* output: pitch_flag , synSt->meangain ,synSt->T_op_mean
* DESCRIPTION: extract parameters from 'pitch\tone' for music/speech classification
* from VAD module
*
*************************************************************************/
void extract_op( SyndecidSt *synSt ,
Coder_State_Plus * st, /* i : state struct */
float new_speech[]) /*i:input : signal used to compute the open loop pitch */

{
int i_subfr, subfrNr;
float *speech1;
float sum = 0;
float t0, cor_max;
float *p = NULL, *p1 = NULL;
int i, j;

float old_d_wsp[(PIT_MAX_MAX/OPL_DECIM)+L_DIV]; /* Weighting speech*/
float *d_wsp = NULL;

int T_op[2]; /*delay of open-loop pitch */

#ifdef AVS_OPEN_PITCH
float R0, R1, R2;
#endif


/* Present frame */
speech1 = new_speech - L_NEXT;
/* initializing of speech weighting*/
d_wsp = old_d_wsp + PIT_MAX_MAX / OPL_DECIM;
mvr2r(st->old_d_wsp, old_d_wsp, PIT_MAX_MAX / OPL_DECIM);

/*Calculating open-loop pitch, using weighting speech wsp other than original speech */
for (subfrNr = 0, i_subfr = 0; subfrNr < 2; subfrNr++, i_subfr += 128)
{
/* Pre-processing of big frame*/
pre_big(synSt , i_subfr, speech1, d_wsp);
#ifdef AVS_OPEN_PITCH
T_op[subfrNr] = (int) find_pitch(&amt;d_wsp[subfrNr*64], 20, PIT_MAX,(2*L_SUBFR)/OPL_DECIM);
/* calculate open pitch gain */
R0 = R1 = R2 = 0.0F;
for (j=0; j<(2*L_SUBFR)/OPL_DECIM; j++)
{
R1 += (d_wsp+subfrNr*64)[j] * (d_wsp+subfrNr*64)[j];
R2 += (d_wsp+subfrNr*64)[j-T_op[subfrNr]] * (d_wsp+subfrNr*64)[j-T_op[subfrNr]];
R0 += (d_wsp+subfrNr*64)[j] * (d_wsp+subfrNr*64)[j-T_op[subfrNr]];
}
st->ol_gain = (Float32)(R0 / (sqrt(R1 * R2) + 1e-5));
#endif
#ifndef AVS_OPEN_PITCH
//T_op[subfrNr] = Pitch_ol(st, MR795, &amt;wsp[i_subfr], PIT_MIN, PIT_MAX, L_FRAME_BY2, subfrNr, 1);
T_op[subfrNr] = E_GAIN_open_loop_search(&amt;d_wsp[subfrNr*64], 20, PIT_MAX, (2 * L_SUBFR) / OPL_DECIM, st->old_T0_med, &amt;(st->ol_gain),
st->hp_ol_ltp_mem, st->hp_old_wsp, (unsigned char)st->ol_wght_flg);
if (st->ol_gain > 0.6)
{
st->old_T0_med = E_GAIN_olag_median(T_op[subfrNr], st->old_ol_lag);
st->ada_w = 1.0;
sum = 20000;
}
else
{
st->ada_w = st->ada_w * 0.9f;
}
if ( st->ada_w < 0.8)
{
st->ol_wght_flg = 0;
}
else
{
st->ol_wght_flg = 1;
}
#endif
/* compute max */
cor_max = 0.0f;
p = &amt;d_wsp[0];
p1 = d_wsp - T_op[subfrNr];
for (j = 0; j < (2*L_SUBFR) / OPL_DECIM; j++)
{
cor_max += *p++ * *p1++;
}

/* compute energy */
t0 = 0.01f;
p = d_wsp - T_op[subfrNr];
for (j = 0; j < (2*L_SUBFR) / OPL_DECIM; j++, p++)
{
t0 += *p * *p;
}

if (cor_max > 0)
{
if (cor_max >= t0)
{
sum = 32767;
}
else
{
sum = (cor_max / t0);
}
}
else
{
sum = 0;
}
sum = (sum * 10000) / 32768;
synSt->ptone_tone[subfrNr] = sum;

}
/* d_wsp already shifted */
mvr2r(old_d_wsp, st->old_d_wsp, PIT_MAX_MAX / OPL_DECIM);

/* short time smooth */
synSt->T_op_mean = (int) (0.166 * synSt->pre_T_op[0] + 0.166 * synSt->pre_T_op[1] + 0.166 * synSt->pre_T_op[2]
+ 0.166 * synSt->pre_T_op[3] + 0.166 * T_op[0] + 0.166 * T_op[1]);
synSt->pre_T_op[3] = synSt->pre_T_op[1];
synSt->pre_T_op[2] = synSt->pre_T_op[0];
synSt->pre_T_op[1] = T_op[1];
synSt->pre_T_op[0] = T_op[0];

/* long time smooth */
synSt->T_op_longmean = (int)(0.8 * synSt->T_op_longmean + 0.1 * T_op[0] + 0.1 * T_op[1]);

/* pitch detection */
wb_vad_pitch_tone_detection(st->vadSt, st->ol_gain);
{
Word16 lagcount = 0;
synSt->pitch = 0;

for (i = 0; i < 2; i++)
{
if ((abs(synSt->oldlag - T_op[i]) - 45) < 0)
{
lagcount = lagcount + 1;
}
/* Save the current LTP lag */
synSt->oldlag = T_op[i];
}

if ( (synSt->oldlag_count + lagcount - 4) >= 0)
{
synSt->pitch = 1;
}
synSt->oldlag_count = lagcount;
}

/* long time correlation */
synSt->meangain = 0.166 * synSt->pre_ptone_tone[0] + 0.166 * synSt->pre_ptone_tone[1]
+ 0.166 * synSt->pre_ptone_tone[2] + 0.166 * synSt->pre_ptone_tone[3]
+ 0.166 * synSt->ptone_tone[0] + 0.166 * synSt->ptone_tone[1];
synSt->pre_ptone_tone[3] = synSt->pre_ptone_tone[1];
synSt->pre_ptone_tone[2] = synSt->pre_ptone_tone[0];
synSt->pre_ptone_tone[1] = synSt->ptone_tone[1];
synSt->pre_ptone_tone[0] = synSt->ptone_tone[0];

return;

}


/*************************************************************************
*
* FUNCTIONS: syndecid()
* input: speech[]
* output: signal_sort
* DESCRIPTION: make decision of sigal classifiction
*
*************************************************************************/
int syndecid(Coder_State_Plus * st, /* i : state struct */
SyndecidSt *synSt ,
float speech[],
int frame,
int i
)
{

float r[M + 1]; /* Autocorrelations of windowed speech */
float lspnew[M]; /* LSPs at 4nd subframe */
float lsf[M]; /* lsf coefficent */
int k = 0;

Word16 signal_sort; /* signal type*/
Word16 music_flag; /* music flag*/
Word16 speech_flag; /* speech flag*/
int vad_flag; /* vad flag*/
Word16 pitch_flag; /*pitch flag after hangover protection*/
Word16 vad; /* vad flag before hangover protection*/

/* Music sub-band energy deviation flag after hangover */
Word16 level_meanSD_low_flag;
Word16 level_meanSD_high_flag;
/* Music spectral deviation flag after hangover */
Word16 lsf_meanSD_low_flag;
Word16 lsf_meanSD_high_flag;

/*Set the frame count*/
count_frame = frame + 1;

/* Set the vad flag according to the previouse vad decision */
vad_flag = st->stClass->vadFlag[i];

/* Calculate the parameters of sub-band level*/
extract_ratio(st->vadSt, synSt);

/* Autocorrelations of input signal */
E_UTIL_autocorrPlus( &amt;speech[(i*L_DIV)+L_SUBFR], r, M, L_WINDOW, st->window);
/* Lag windowing */
lag_wind( r, M);
/* Levinson Durbin */
E_LPC_lev_dur(&amt;synSt->A_t[MP1 * 3], r, M);
/* From A(z) to ISP */
E_LPC_a_isp_conversion(&amt;synSt->A_t[MP1 * 3], lspnew, synSt->lsp_old, M);
/* Convert isps to frequency domain 0..6400 */
E_LPC_isp_isf_conversion(lspnew, lsf, M);

/* interpol quantized lpc */
int_lpc_np1(synSt->lsp_old, lspnew, synSt->A_t, 4, M);

/* Calculate isf parameters */
extract_isf(synSt , lsf);

for (k = 0; k < M; k++)
synSt->lsp_old[k] = lspnew[k];

extract_op(synSt, st, speech);

/* Set the pitch hangover */
if (synSt->pitch == 1)
{
synSt->pitch_counter++;
synSt->pitch_hangover = 0;
}
else
{
synSt->pitch_counter = 0;
}
/* Set the hangover frame counter */
if (synSt->pitch_counter > 10)
{
synSt->pitch_hangover = 10;
}
else if (synSt->pitch_counter > 5)
{
synSt->pitch_hangover = 5;
}
else if (synSt->pitch_counter > 2)
{
synSt->pitch_hangover = 3;
}

/* Set the pitch flag after the hangover protection*/
pitch_flag = (synSt->pitch_hangover > 0);

if (synSt->pitch_hangover > 0)
{
synSt->pitch_hangover--;
}

/* First, classify signal into two types according to the vad flag */
if (vad_flag == 1)
{
signal_sort = UNCERTAIN;
}
else
{
signal_sort = NOISE;
}

/* Then, classify signal into speech and music */

/* Set hangover of the level_meanSD */
if (synSt->level_meanSD > 0.27)
{
synSt->level_meanSD_high_counter++;
}
else
{
synSt->level_meanSD_high_counter = 0;
}
/* Set the hangover counter of level_meanSD */
if (synSt->level_meanSD_high_counter > 15)
{
synSt->level_meanSD_high_hangover = 10;
}
else if (synSt->level_meanSD_high_counter > 10)
{
synSt->level_meanSD_high_hangover = 5;
}
else if (synSt->level_meanSD_high_counter > 5)
{
synSt->level_meanSD_high_hangover = 3;
}

/* Set pitch flag accoding after the hangover protection */
level_meanSD_high_flag = ((synSt->level_meanSD > 0.27) || (synSt->level_meanSD_high_hangover >= 0));
if (synSt->level_meanSD_high_hangover >= 0)
{
synSt->level_meanSD_high_hangover--;
}

/* Set hangover of lsf_meanSD */
if (synSt->lsf_meanSD > 170000)
{
synSt->lsf_meanSD_high_counter++;
}
else
{
synSt->lsf_meanSD_high_counter = 0;
}
/* Set the hangover counter */
if (synSt->lsf_meanSD_high_counter > 15)
{
synSt->lsf_meanSD_high_hangover = 10;
}
else if (synSt->lsf_meanSD_high_counter > 10)
{
synSt->lsf_meanSD_high_hangover = 5;
}
else if (synSt->lsf_meanSD_high_counter > 5)
{
synSt->lsf_meanSD_high_hangover = 3;
}

lsf_meanSD_high_flag = ((synSt->lsf_meanSD > 170000) || (synSt->lsf_meanSD_high_hangover >= 0));
//lsf_meanSD_high_flag = (lsf_meanSD>1000);
if (synSt->lsf_meanSD_high_hangover >= 0)
{
synSt->lsf_meanSD_high_hangover--;
}

/* Set hangover of level_meanSD */
if (synSt->level_meanSD < 0.07)
{
synSt->level_meanSD_low_counter++;
}
else
{
synSt->level_meanSD_low_counter = 0;
}

/* Set hangover frame counter */
if (synSt->level_meanSD_low_counter > 20)
{
synSt->level_meanSD_low_hangover = 10;
}
else if (synSt->level_meanSD_low_counter > 10)
{
synSt->level_meanSD_low_hangover = 5;
}
else if (synSt->level_meanSD_low_counter > 5)
{
synSt->level_meanSD_low_hangover = 2;
}

level_meanSD_low_flag = ((synSt->level_meanSD < 0.07) || (synSt->level_meanSD_low_hangover >= 0));
//level_meanSD_low_flag = (level_meanSD<2000);
if (synSt->level_meanSD_low_hangover >= 0)
{
synSt->level_meanSD_low_hangover--;
}

/* Set hangover of lsf_meanD */
if (synSt->lsf_meanSD < 80000)
{
synSt->lsf_meanSD_low_counter++;
}
else
{
synSt->lsf_meanSD_low_counter = 0;
}

/* Set hangover frame counter */
if (synSt->lsf_meanSD_low_counter > 20)
{
synSt->lsf_meanSD_low_hangover = 10;
}
else if (synSt->lsf_meanSD_low_counter > 10)
{
synSt->lsf_meanSD_low_hangover = 5;
}
else if (synSt->lsf_meanSD_low_counter > 5)
{
synSt->lsf_meanSD_low_hangover = 2;
}
lsf_meanSD_low_flag = ((synSt->lsf_meanSD < 80000) || (synSt->lsf_meanSD_low_hangover >= 0));
//lsf_meanSD_low_flag = (lsf_meanSD<900);
if (synSt->lsf_meanSD_low_hangover >= 0)
{
synSt->lsf_meanSD_low_hangover--;
}

/* Reset the counter to zero */
if (signal_sort == NOISE)
{
synSt->level_meanSD_low_counter = 0;
synSt->level_meanSD_low_hangover = 0;
}

/* Signal decision */
music_flag = 0;
speech_flag = 0;
if (signal_sort == UNCERTAIN)
{
/* Speech decision */
if (synSt->lsf_meanSD > 300000)
{
speech_flag = 1;
}
if ((synSt->pitch == 1) &amt;&amt; (synSt->T_op_mean <= 43))
{
speech_flag = 1;
}

/*
The main purpose is that classifying the last parts of voiced speech into uncertain,
then make the speech decision according to the voice condition, otherwise classify it as music
*/
if (pitch_flag == 1)
{
speech_flag = 1;
}
/* Changed by hongjun */
/*
if(synSt->meangain>8000)
{
speech_flag = 1;
}*/
if (level_meanSD_high_flag || lsf_meanSD_high_flag)
{
speech_flag = 1;
}
/* Music decision */
if (lsf_meanSD_low_flag &amt;&amt; level_meanSD_low_flag)
{
music_flag = 1;
}
if ((music_flag == 1) &amt;&amt; (speech_flag == 1))
{
signal_sort = UNCERTAIN;
}
else if (music_flag == 1)
{
signal_sort = MUSIC;
}
else if (speech_flag == 1)
{
signal_sort = SPEECH;
}
if (synSt->lsf_meanSD > 400000)
{
signal_sort = SPEECH;
}
//if((level_energy<5000)&amt;&amt;((music_flag==1)||(speech_flag==1)))
if ((synSt->level_energy < 10000) &amt;&amt; (synSt->noise_continue_counter > 5))
{
signal_sort = UNCERTAIN;
}

/*
When pitch_flag=1, if the music flag is 1 too, it will be classified as uncertain.
In this condition, choosing two parameters to restrict classifying:
When the lsf_meanSD is very small and previous frames are not continious speech frames,
it will be classifyed as music.
*/
if ( signal_sort == UNCERTAIN)
{
if ((pitch_flag == 1) &amt;&amt; (synSt->lsf_meanSD < 80000) &amt;&amt; (synSt->speech_continue_counter < 3))
{
signal_sort = MUSIC;
}
}

/*
When it's speech type, if the previous frames is music and the lsf_meanSD is small,
it's will be classifyed as music
*/
if (signal_sort == SPEECH)
{
if (synSt->music_continue_counter > 3)
{
if (synSt->lsf_meanSD < 80000)
{
signal_sort = MUSIC;
}
}
}
/*Changed by hongjun*/
/*if((synSt->meangain_flag == 1)&amt;&amt;(synSt->music_continue_counter<3))
{
signal_sort = SPEECH;
}*/
if (synSt->level_energy < 5000)
{
signal_sort = UNCERTAIN;
}
}

/* Start to classify the UNCERTAIN signal after the first decision */
synSt->speech_hangover_flag = 0;
synSt->music_hangover_flag = 0;
if (signal_sort == UNCERTAIN)
{
if (synSt->music_continue_counter > 3)
{
//if(lsf_meanSD_low_flag||level_meanSD_low_flag||(level_meanSD<3000)||(lsf_meanSD<1200))
if (lsf_meanSD_low_flag || level_meanSD_low_flag)
{
synSt->music_hangover_flag = 1;
}
else
{
synSt->music_hangover_flag = 0;
}
}
if (synSt->speech_hangover_flag == 1)
{
signal_sort = SPEECH;
}
if (synSt->music_hangover_flag == 1)
{
signal_sort = MUSIC;
}
if ((synSt->music_hangover_flag == 1) &amt;&amt; (synSt->speech_hangover_flag == 1))
{
signal_sort = UNCERTAIN;
}
if (synSt->music_continue_counter > 1)
{
if (synSt->lsf_meanSD < 80000)
{
signal_sort = MUSIC;
}
}
if ( signal_sort == UNCERTAIN)
{
if (synSt->music_continue_counter > 20)
{
signal_sort = MUSIC;
}
if (synSt->speech_continue_counter > 20)
{
signal_sort = SPEECH;
}
}
//if((level_energy<5000)&amt;&amt;(meangain<5000))
if ((synSt->level_energy < 5000))
{
signal_sort = UNCERTAIN;
}
}
if (signal_sort != NOISE)
{
if (synSt->music_continue_counter > 150)
{
if (synSt->lsf_meanSD < 170000)
{
signal_sort = MUSIC;
}
}
if (synSt->speech_continue_counter > 150)
{
if (synSt->lsf_meanSD > 170000)
{
signal_sort = SPEECH;
}
}
if ((synSt->level_energy < 5000))
{
signal_sort = UNCERTAIN;
}
}

/* Classify the UNCERTAIN signal into speech or music, add by wangjun */
if (signal_sort == UNCERTAIN)
{
synSt->uncertain_continue_counter++;
if (synSt->speech_continue_counter > 3)
{
if (synSt->uncertain_continue_counter < 15)
{
signal_sort = SPEECH;
}
else
{
signal_sort = MUSIC;
}
}
else
{
if (synSt->uncertain_continue_counter < 15)
{
signal_sort = MUSIC;
}
else
{
signal_sort = SPEECH;
}
}

}
else
{
synSt->uncertain_continue_counter = 0;
}

/* Counter of continue music signal */
if (signal_sort == MUSIC)
{
synSt->music_continue_counter++;
}
else
{
synSt->music_continue_counter = 0;
}

/* Counter of continue speech signal */
if (signal_sort == SPEECH)
{
synSt->speech_continue_counter++;
}
else
{
synSt->speech_continue_counter = 0;
}

/* Counter of continue noise signal */
if (signal_sort == NOISE)
{
synSt->noise_continue_counter++;
}
else
{
synSt->noise_continue_counter = 0;
}

return signal_sort;
}


/******************************************************************************
*
* Function: syndecid_init
* Description: Allocates state memory and initializes state memory
*
*******************************************************************************
*/
int syndecid_init(/* return: non-zero with error, zero for ok. */
SyndecidSt **state /* i/o : State structure */
)
{
SyndecidSt *s;
if (state == (SyndecidSt **)NULL)
{
fprintf(stderr, "syndecid_init: invalid parameter\n");
return -1;
}
*state = NULL;
/*allocate memory */
if ((s = (SyndecidSt*)malloc(sizeof(SyndecidSt))) == NULL)
{
fprintf(stderr, "syndecid_init: can not malloc state structure\n");
return -1;
}
syndecid_reset(s);


s->oldlag = 0;
s->oldlag_count = 0;
set_zero(s->ptone_tone, 2);
set_zero(s->pre_ptone_tone, 8);
set_zero(s->mem_w, M);

/*added by hongjun*/
s->pitch_counter = 0;
s->pitch_hangover = 0;
s->vad_counter = 0;
s->vad_hangover = 0;
s->level_meanSD_low_counter = 0;
s->level_meanSD_low_hangover = 0;
s->level_meanSD_high_counter = 0;
s->level_meanSD_high_hangover = 0;
s->lsf_meanSD_low_counter = 0;
s->lsf_meanSD_low_hangover = 0;
s->lsf_meanSD_high_counter = 0;
s->lsf_meanSD_high_hangover = 0;
s->music_counter = 0;
s->music_hangover = 0;
s->speech_counter = 0;
s->speech_hangover = 0;
s->music_continue_counter = 0;
s->speech_continue_counter = 0;
s->noise_continue_counter = 0;
s->uncertain_continue_counter = 0;
s->speech_hangover_flag = 0;
s->music_hangover_flag = 0;
s->meangain_hangover_flag = 0;
s->meangain_continue_counter = 0;
s->meangain_flag = 0;
s->vad_continual_counter = 0;
s->vad_continual_flag = 0;
s->vad_hangover_flag = 0;

*state = s;

return 0;
}

/******************************************************************************
*
* Function: syndecid_reset
* Description: Initializes state memory to zero
*
*******************************************************************************
*******************************************************************************
*/
int syndecid_reset ( /* return: non-zero with error, zero for ok. */
SyndecidSt *state /* i/o : State structure */
)
{
if (state == (SyndecidSt *) NULL)
{
fprintf(stderr, "syndecid_reset: invalid parameter\n");
return -1;
}

memset(state->pre_level_energy, 0, 4* sizeof(double));

memset(state->level_12_old, 0 , (5*M + 2*COMPLEN + 79)*sizeof(Word16));

/*added by hongjun*/
memset(state->lsf_old, 0 , M*sizeof(float));
memset(state->lsp_old, 0 , M*sizeof(float));
memset(state->lsfmean, 0 , M*sizeof(float));
memset(state->pre_lsf_SD, 0 , 4*sizeof(float));
memset(state->pre_lsfmean_SD, 0 , 4*sizeof(float));

return 0;
}

/******************************************************************************
*
* Function: syndecid_exit
* Description: The memory used for state memory is freed
*
*******************************************************************************
*/
void syndecid_exit (
SyndecidSt **state /* i/o : State structure */
)
{
if (state == NULL || *state == NULL)
return;
/* deallocate memory */
free(*state);
*state = NULL;
return;
}