www.pudn.com > speechVAD.rar > VAD.cpp


    // VAD.cpp: implementation of the CVAD class. 
// 
////////////////////////////////////////////////////////////////////// 
 
#include "stdafx.h" 
#include "VAD.h" 
#include "DllUseLib.h" 
 
 
////////////////////////////////////////////////////////////////////// 
// Construction/Destruction 
////////////////////////////////////////////////////////////////////// 
 
short pTaps[] = { 
    500,  1001,  500,  14,  -22366,   7824, 
    500,  1001,  500,  14,  -26406,  12198 
}; 
 
CVAD::CVAD() 
{ 
  VADInit(); 
} 
 
CVAD::~CVAD() 
{ 
    if (pState) 
        delete[] pState;  
 
    if (m_pReadBuffer) 
        delete[] m_pReadBuffer; 
 
    if (m_pDecResultBuf) 
        delete[] m_pDecResultBuf; 
} 
 
void CVAD::VADInit() 
{ 
    cWinSamps            = WINDOW_LEN_SAMPS;  //samples in one frame 
    cFrameShiftSamps     = FRAME_SHIFT_SAMPS; //samples shift 
    m_nAllDateLenInSamps = VAD_ALL_SAMPS;     //samples in the buffer to be processed 
    cReuseSamps          = cWinSamps - cFrameShiftSamps;//reuse samps 
    m_pReadBuffer = NULL; 
    m_pDecResultBuf = NULL; 
    pState = NULL; 
 
    //alloc DecResultBuf 
    int   nDecNumInFirstVoice = (m_nAllDateLenInSamps-cWinSamps)/cFrameShiftSamps + 1; 
    int   nDecNumInCommVoice  = m_nAllDateLenInSamps / cFrameShiftSamps; 
    m_pDecResultBuf = new int[nDecNumInCommVoice]; 
 
    VAD_GetStateSizeBytes(cWinSamps, &cStateBytes);    /* compute the size in bytes required for the VAD Internal State */ 
 
    pState = (VADStateStruct*)(new BYTE[cStateBytes]);  
    m_pReadBuffer = new short[cWinSamps]; 
 
    VAD_Init(pState, FRAME_SHIFT_MSEC, cWinSamps, SAMPLING_FREQUENCY_HZ); 
 
 
    prevDecisionState = INACTIVE; 
    prevInputSample = 0; 
    m_bIsFirstSegment = TRUE;//when init VAD first voise segment 
} 
 
///////////////////////////// 
//VAD process frame 
/* 
// in:   
// pFrameBuffer:     pointer to this buffer 
// bufferLenInSams:  buffer len in samps // VAD_ALL_SAMPS 
// isFistVoise:      first voise segment 
// out: VAD descion 
*//////////////////////////// 
int CVAD::VADProcessFrame(short * pFrameBuffer, int bufferLenInSmps) 
{ 
    int         nDecNumInFirstVoice = (bufferLenInSmps-cWinSamps)/cFrameShiftSamps + 1; 
    int         nDecNumInCommVoice = bufferLenInSmps / cFrameShiftSamps; 
    int         nReadSampsNum; 
    int         nSmpsLeftInBuf = bufferLenInSmps; 
    int         nIndex = 0; 
    int         VAD_FinalDec;   
    VADDecisionState    curDecisionState = NODECISION; 
 
    if (m_bIsFirstSegment == TRUE) 
    { 
        memset(m_pDecResultBuf, -1, nDecNumInCommVoice*sizeof(int)); 
         
        //read the first frame to process 
        nReadSampsNum = cWinSamps; 
        memcpy(m_pReadBuffer, pFrameBuffer, nReadSampsNum* sizeof(short)); //copy the first frame into process buffer 
        nSmpsLeftInBuf -= nReadSampsNum; 
 
        //remove DC 
        prevInputSample = 0; 
        own_ippsCompensateOffsetQ15_16s_I(m_pReadBuffer, nReadSampsNum, &prevInputSample, 0, DC_RMV_COEF); 
        prevOutputSample = m_pReadBuffer[nReadSampsNum-1]; 
 
        while (nSmpsLeftInBuf >= cFrameShiftSamps)  
        { 
            /* perform VAD */ 
            VAD_ProcessFrame(pState, m_pReadBuffer, cWinSamps, &curDecisionState, &decisionFrameNum); 
 
            /*put this dec in dec buffer*/ 
            if ((nIndex >= 0) && (nIndex < nDecNumInFirstVoice)) 
            { 
                m_pDecResultBuf[nIndex] = curDecisionState; 
            } 
 
            /* save the samples to be re-used from the recently processed frame and read another (lookahead) frame shift of data. */ 
            //ippsCopy_16s(m_pReadBuffer+cFrameShiftSamps, m_pReadBuffer, cReuseSamps); 
            memcpy(m_pReadBuffer, m_pReadBuffer+cFrameShiftSamps, cReuseSamps*sizeof(short)); 
            memcpy(m_pReadBuffer+cReuseSamps, pFrameBuffer+cWinSamps+nIndex*cFrameShiftSamps, cFrameShiftSamps*sizeof(short)); 
             
            nSmpsLeftInBuf -= cFrameShiftSamps; 
            nIndex++; 
            
            /* remove DC */ 
            own_ippsCompensateOffsetQ15_16s_I(m_pReadBuffer+cReuseSamps, cFrameShiftSamps, &prevInputSample, prevOutputSample, DC_RMV_COEF); 
            prevOutputSample = m_pReadBuffer[cReuseSamps+cFrameShiftSamps-1]; 
        } 
 
        /*process the dec buffer*/ 
        VAD_FinalDec = VADProcessDecBuf(&prevDecisionState, nDecNumInFirstVoice, m_pDecResultBuf);//prevDecisionState changed in the func 
 
        m_bIsFirstSegment = FALSE; 
    } 
    else//in the process of qq chat  
    { 
        memset(m_pDecResultBuf, -1, nDecNumInCommVoice*sizeof(int)); 
 
        //copy the first shift frame to buffer to process 
        //ippsCopy_16s(m_pReadBuffer+cFrameShiftSamps, m_pReadBuffer, cReuseSamps); 
        memcpy(m_pReadBuffer, m_pReadBuffer+cFrameShiftSamps, cReuseSamps); 
        memcpy(m_pReadBuffer+cReuseSamps, pFrameBuffer, cFrameShiftSamps*sizeof(short)); 
             
        /* remove DC */ 
        own_ippsCompensateOffsetQ15_16s_I(m_pReadBuffer+cReuseSamps, cFrameShiftSamps, &prevInputSample, prevOutputSample, DC_RMV_COEF); 
        prevOutputSample = m_pReadBuffer[cReuseSamps+cFrameShiftSamps-1]; 
     
        nSmpsLeftInBuf -= cFrameShiftSamps; 
 
        while (nSmpsLeftInBuf >= cFrameShiftSamps) 
        { 
            /* perform VAD */ 
            VAD_ProcessFrame(pState, m_pReadBuffer, cWinSamps, &curDecisionState, &decisionFrameNum); 
            /*put this dec in dec buffer*/ 
            if ((nIndex >= 0) && (nIndex < nDecNumInCommVoice)) 
            { 
                m_pDecResultBuf[nIndex] = curDecisionState; 
            } 
                nIndex++; 
 
             /* save the samples to be re-used from the recently processed frame and read another (lookahead) frame shift of data. */ 
            //ippsCopy_16s(m_pReadBuffer+cFrameShiftSamps, m_pReadBuffer, cReuseSamps); 
            memcpy(m_pReadBuffer, m_pReadBuffer+cFrameShiftSamps, cReuseSamps); 
            memcpy(m_pReadBuffer+cReuseSamps, pFrameBuffer+nIndex*cFrameShiftSamps, cFrameShiftSamps*sizeof(short)); 
             
            nSmpsLeftInBuf -= cFrameShiftSamps; 
            
            /* remove DC */ 
            own_ippsCompensateOffsetQ15_16s_I(m_pReadBuffer+cReuseSamps, cFrameShiftSamps, &prevInputSample, prevOutputSample, DC_RMV_COEF); 
            prevOutputSample = m_pReadBuffer[cReuseSamps+cFrameShiftSamps-1]; 
 
        } 
        /*process the dec buffer*/ 
        VAD_FinalDec = VADProcessDecBuf(&prevDecisionState, nDecNumInFirstVoice, m_pDecResultBuf);//prevDecisionState changed in the func 
 
    } 
 
    return VAD_FinalDec; 
} 
 
//preDec is &prevDesionState , changed in func 
 
int CVAD::VADProcessDecBuf(VADDecisionState *preDec, int bufLen, int *pDecBuffer) 
{ 
    int i; 
    int return_Value; 
    BOOL decExist = FALSE; 
 
    if (*preDec == INACTIVE) 
    { 
        for (i=0; iframeNum >= 2147483647) 
    { 
        pState->frameNum = 1000; 
    } 
    else 
    { 
       pState->frameNum++; 
    } 
 
 
    /* Energy-Based state update */ 
    E_UpdateEnergyState(pInFrame, pState->frameNum, pState->pEState); 
 
    /* Periodicity-Based state update */ 
    PER_UpdatePerState(pInFrame, pState->frameNum, len, pState->pPerState); 
 
    /* State Machine update */ 
    SM_UpdateState(pState); 
 
    /* Update the output decision variables based on the VAD internal state */ 
    if (pState->pSM->uttHasStartedFlag) 
    { 
        /* if utterance start detected */ 
        *pDecisionState = ACTIVE; 
        *pDecisionFrame = pState->pSM->uttBegFrameNum; 
 
        /* reset the start flag for the next utterance */ 
        pState->pSM->uttHasStartedFlag=0; 
    } 
    else if (pState->pSM->uttHasEndedFlag) 
    { 
        /* if utterance end detected */ 
        *pDecisionState = INACTIVE; 
        *pDecisionFrame = pState->pSM->uttEndFrameNum; 
 
        /* reset the end flag for the next utterance */ 
        pState->pSM->uttHasEndedFlag = 0; 
    } 
    else 
    { 
        /* if neither start or end of utterance was detected */ 
        *pDecisionState = NODECISION; 
        *pDecisionFrame = -1; 
    } 
} 
 
 
/******************************************************************************** 
// Name:             VAD_GetStateSizeBytes 
// Description:      Calculate and return the size in bytes required by the VAD 
//                   internal state structure based on the input parameters 
// Input Arguments: 
//                   cWinSamps      - size of an input data frame in samples 
// Output Arguments: 
//                   pNumStateBytes - pointer to output variable containing the 
//                                    calculated state size 
// Returns:          None 
// Notes: 
********************************************************************************/ 
void CVAD::VAD_GetStateSizeBytes(int cWinSamps, int* pNumStateBytes) 
{ 
    int cTmpStateBytes; /* size of the intermediate states in bytes */ 
 
    /* initialize with the size of the static components of the VADStateStruct */ 
    *pNumStateBytes = sizeof(VADStateStruct); 
 
    /* add the size of the energy state structure */ 
    E_GetStateSizeBytes(&cTmpStateBytes); 
    *pNumStateBytes += cTmpStateBytes; 
 
    /* add the size of the periodicity state structure */ 
    PER_GetStateSizeBytes(cWinSamps, &cTmpStateBytes); 
    *pNumStateBytes += cTmpStateBytes; 
 
    /* add the size of the state-machine structure */ 
    SM_GetStateSizeBytes(&cTmpStateBytes); 
    *pNumStateBytes += cTmpStateBytes; 
} 
 
/******************************************************************************** 
// Name:            VAD_Init 
// Description:     Initialize the VAD state structure with initial values. Also, 
//                  assign the externally allocated memory to the internal variables 
//                  of the VADStateStruct. 
// 
// Input Arguments: 
//                  frameShiftMsec - frame shift for overlapping frames in msec 
//                  cWinSamps      - size of an input data frame in samples 
//                  sampFreqHz     - sampling frequency of the input data in Hz 
// Input/Output Arguments: 
//                  pState         - pointer to an VADState structure 
// 
// Returns:         None 
********************************************************************************/ 
void CVAD::VAD_Init(VADStateStruct* pState, int frameShiftMsec, int cWinSamps, int sampFreqHz) 
{ 
    char *pMemory;      /* pointer to current memory block to be assigned */ 
    int  cStateBytes;  /* size of the intermediate states in bytes */ 
 
    /* start memory pointer just after the memory for the VADStateStruct */ 
    pMemory = (char*)(&pState->pMemoryBlock + 1); 
 
    /* assign memory and initialize the energy state structure */ 
    pState->pEState = (EStateStruct *) pMemory; 
    E_Init(pState->pEState, cWinSamps, frameShiftMsec); 
    E_GetStateSizeBytes(&cStateBytes); 
    pMemory += cStateBytes; 
 
    /* assign memory and initialize the periodicity state structure */ 
    pState->pPerState = (PERStateStruct *) pMemory; 
    PER_Init(pState->pPerState, frameShiftMsec, cWinSamps, sampFreqHz); 
    PER_GetStateSizeBytes(cWinSamps, &cStateBytes); 
    pMemory += cStateBytes; 
 
    /* assign memory and initialize the state-machine structure */ 
    pState->pSM = (SMStruct *) pMemory; 
    SM_Init(pState->pSM, frameShiftMsec); 
    SM_GetStateSizeBytes(&cStateBytes); 
    pMemory += cStateBytes; 
 
    /* initialize the frame count */ 
    pState->frameNum = 0; 
} 
 
/******************************************************************************** 
// Name:             VAD_ProcessEndOfInput 
// Description:      This function is called at the end of input data stream to check 
//                   if the VAD state machine was already in the HANG state when the stream 
//                   ended. If so, utterance endpoint is flagged accordingly. 
// 
// Input/Output Arguments: 
//                  pState         - pointer to the VAD state structure 
// outputArguments: 
//                  pDecisionState - pointer to output variable that contains the decision 
//                                   made by VAD 
//                  pDecisionFrame - pointer to output variable that contains the frame number 
//                                   (counted from zero)  of the determined endpoint 
// Returns:         None 
********************************************************************************/ 
void CVAD::VAD_ProcessEndOfInput( 
                              VADStateStruct*    pState, 
                              VADDecisionState*  pDecisionState, 
                              int*               pDecisionFrame) 
{ 
   /* Update the output decision variables based on the VAD internal state */ 
    if (pState->pSM->uttHasEndedFlag) 
    { 
        /* if utterance end detected */ 
        *pDecisionState = INACTIVE; 
        *pDecisionFrame = pState->pSM->uttEndFrameNum; 
 
        /* reset the end flag for the next utterance */ 
        pState->pSM->uttHasEndedFlag = 0; 
    } 
    else 
    { 
        /* declare end of stream to flush the complete output buffer */ 
        *pDecisionState = END_OF_STREAM; 
        *pDecisionFrame = -1; 
    } 
 
} 
 
/* EOF */ 
 
/******************************************************************************** 
// Name:             E_GetStateSizeBytes 
// Description:      Calculate and return the size in bytes required by the Energy 
//                   state structure. 
// Input Arguments: 
//                   None 
// Output Arguments: 
//                   pNumStateBytes - pointer to output variable containing the 
//                                    calculated  state size 
// Returns:          None 
// Notes: 
********************************************************************************/ 
void CVAD::E_GetStateSizeBytes(int* pNumStateBytes) 
{ 
    /* initialize with the size of the static components of the EStateStruct */ 
    *pNumStateBytes = sizeof(EStateStruct); 
} 
 
/******************************************************************************** 
// Name:            E_Init 
// Description:     Initialize the Energy state structure with initial values. Also, 
//                  assign the externally allocated memory to the internal variables 
//                  of the EStateStruct. 
// 
// Input Arguments: 
//                  cWinSamps      - size of an input data frame in samples 
//                  frameShiftMsec - frame shift for overlapping frames in msec 
// Input/Output Arguments: 
//                  pEState        - pointer to an EState structure 
// Returns:         None 
********************************************************************************/ 
void CVAD::E_Init(EStateStruct* pEState, int cWinSamps, int frameShiftMsec) 
{ 
    int    halfFrameShiftMsec; 
    int     i; 
 
    /* number of samples in a frame */ 
    pEState->cFrameSamples = cWinSamps; 
 
    /* find number of right shifts for terms in energy summation */ 
    if (cWinSamps > 0) 
    { 
      pEState->cScaleFactor = 0; 
      i = cWinSamps; 
      while(i>0) 
      { 
        pEState->cScaleFactor++; 
        i >>= 1; 
      } 
    } 
 
    /* half the frame shift used to round of the computations of variables */ 
    halfFrameShiftMsec = frameShiftMsec/2; 
 
    /* Initialize variables */ 
    pEState->cInitNoiseEstFrames = (E_INIT_NOISE_ESTIMATE_MSEC + halfFrameShiftMsec) / frameShiftMsec; 
 
    /* Initial value to compute running average in order to initialize noise floor */ 
    pEState->noiseFloorDB = E_MIN_ENERGY_DB; 
} 
 
/******************************************************************************** 
// Name:            E_UpdateEnergyState 
// Description:     Update the energy state based on the current input frame. 
// 
// Input Arguments: 
//                  pFrame    - input frame 
//                  frameNum  - current frame number used in initializing the noise floor 
// Input/Output Arguments: 
//                  pEState   - pointer to an EState structure 
// 
// Returns:         None 
********************************************************************************/ 
void CVAD::E_UpdateEnergyState(const short* pFrame, int frameNum, EStateStruct* pEState) 
{ 
    /* compute frame energy in DB */ 
    E_ComputeEnergyDB(pFrame, pEState); 
 
    /* compute threshold for noise for the current frame */ 
    E_ComputeNoiseEnergyThreshDB(pEState, frameNum); 
} 
 
/******************************************************************************** 
// Name:            E_ComputeEnergyDB 
// Description:     Compute the energy of the input frame in DB. The energy is computed 
//                  as the variance of the samples in the input frame. The variance is 
//                  subsequently converted into DB (10log10). 
// 
// Input Arguments: 
//                  pFrame         - input frame 
// Input/Output Arguments: 
//                  pEState      - pointer to an EState structure 
// 
// Returns:         None 
********************************************************************************/ 
void CVAD::E_ComputeEnergyDB(const short* pFrame, EStateStruct* pEState) 
{ 
    int  sumSqr;    /* sum square of the input samples */ 
    int     i; 
 
    sumSqr = 0; 
 
    /* compute sum-square and sum of the input samples */ 
    for (i=0; icFrameSamples; i++) 
    { 
        sumSqr  += (pFrame[i]*pFrame[i]) >> pEState->cScaleFactor; 
    } 
 
    /* convert to DB using Intel(R) IPP call */ 
    if (sumSqr > 0) 
    { 
        own_ipps10Log10_32s_Sfs(&sumSqr, &(pEState->energyDB), 1, -Q15); 
    } 
    else 
    { 
        pEState->energyDB = E_MIN_ENERGY_DB; 
    } 
} 
 
/******************************************************************************** 
// Name:            E_ComputeNoiseEnergyThreshDB 
// Description:     Compute the noise energy threshold of the input frame in DB. 
//                  The noise threshold is computed as the sum of a noise floor and 
//                  a correction term. 
 
//                  The noise floor is initialized as the average 
//                  energy over the initial E_INIT_NOISE_ESTIMATE_MSEC of input data. 
//                  The noise floor is then updated in hypothesized non-speech regions 
//                  using either a slow or fast adaptation factor. 
// 
// Input Arguments: 
//                  frameNum  - current frame number used in initializing the noise floor 
// Input/Output Arguments: 
//                  pEState - pointer to an EState structure 
// 
// Returns:         None 
********************************************************************************/ 
void CVAD::E_ComputeNoiseEnergyThreshDB(EStateStruct* pEState, int frameNum) 
{ 
    if (frameNum <= pEState->cInitNoiseEstFrames) 
    { 
        /* Initialize noise floor */ 
        pEState->noiseFloorDB = pEState->noiseFloorDB * (frameNum-1) + pEState->energyDB; 
        pEState->noiseFloorDB /= frameNum; 
    } 
    else 
    { 
        if (pEState->energyDB > pEState->noiseThreshDB) 
        { 
            WEIGHTED_AVG_Q15(pEState->energyDB, pEState->noiseFloorDB, E_SLOW_ADAPT_ALPHA_Q15, pEState->noiseFloorDB) 
        } 
        else 
        { 
            WEIGHTED_AVG_Q15(pEState->energyDB, pEState->noiseFloorDB, E_FAST_ADAPT_ALPHA_Q15, pEState->noiseFloorDB) 
        } 
    } 
 
    pEState->noiseThreshDB = pEState->noiseFloorDB + E_THRESH_OFFSET_DB_Q15; 
} 
 
/* EOF */ 
/******************************************************************************** 
// Name:             PER_GetStateSizeBytes 
// Description:      Calculate and return the size in bytes required by the 
//                   periodicity internal state structure based on the input parameters 
// Input Arguments: 
//                   cWinSamps      - size of an input data frame in samples 
// Output Arguments: 
//                   pNumStateBytes - pointer to output variable that contains the 
//                                    calculated state size 
// Returns:          None 
// Notes: 
********************************************************************************/ 
 
void CVAD::PER_GetStateSizeBytes(int cWinSamps, int* pNumStateBytes) 
{ 
    int cTmpBytes; 
    int cTmpDownSampledWinSamps; 
 
    /* initialize with the size of the static components of the PERStateStruct */ 
    *pNumStateBytes = sizeof(PERStateStruct); 
 
    /* add the size of pState->pBPFrame - band-pass filtered values */ 
    cTmpBytes = cWinSamps *sizeof(short); 
    *pNumStateBytes += cTmpBytes; 
 
    /* add any alignment bytes for word boundary alignment */ 
    cTmpBytes = cTmpBytes % sizeof(int); /* align of word boundary */ 
    *pNumStateBytes += cTmpBytes; 
 
    /* add the size of pState->pDSFrame - downsampled frame */ 
    own_ippsDownSampleSize(cWinSamps, PER_DOWNSAMPLE_FACTOR, PER_DOWNSAMPLE_PHASE, &cTmpDownSampledWinSamps); 
    cTmpBytes = cTmpDownSampledWinSamps * sizeof(short); 
    *pNumStateBytes += cTmpBytes; 
 
    /* add any alignment bytes for word boundary alignment */ 
    cTmpBytes = cTmpBytes % sizeof(int); /* align of word boundary */ 
    *pNumStateBytes += cTmpBytes; 
 
    //cTmpBytes = TAPSLEN * sizeof(int); 
    cTmpBytes = (NUMBIQUAD*2) * sizeof(int); 
    *pNumStateBytes += cTmpBytes; 
 
} 
 
/******************************************************************************** 
// Name:            PER_Init 
// Description:     Initialize the Periodicity state structure with initial values. Also, 
//                  assign the externally allocated memory to the internal variables 
//                  of the PERStateStruct. 
// 
// Input Arguments: 
//                  frameShiftMsec - frame shift for overlapping frames in msec 
//                  cWinSamps      - size of an input data frame in samples 
//                  sampFreqHz     - sampling frequency of the input data in Hz 
// Input/Output Arguments: 
//                  pPerState      - pointer to an PERState structure to be initialized 
// Returns:         None 
********************************************************************************/ 
 
void CVAD::PER_Init(PERStateStruct* pPerState, int frameShiftMsec, int cWinSamps, int sampFreqHz) 
{ 
    int   halfFrameShiftMsec; 
    int   tmpBytes; 
    char* pMemory; 
    int   i; 
 
    /* half the frame shift used to round of the computations of variables */ 
    halfFrameShiftMsec = frameShiftMsec/2; 
 
    /* start memory pointer just after the memory for the PERStateStruct */ 
    pMemory = (char*)(&pPerState->pMemoryBlock + 1); 
 
    /* Initialize periodicity values of state */ 
    pPerState->cInitPerEstFrames = (PER_INIT_PER_ESTIMATE_MSEC + halfFrameShiftMsec) / frameShiftMsec; 
    pPerState->minPeriodSamps = sampFreqHz / (PER_DOWNSAMPLE_FACTOR * PER_MAX_PITCH_FREQ_HZ); 
    pPerState->maxPeriodSamps = sampFreqHz / (PER_DOWNSAMPLE_FACTOR * PER_MIN_PITCH_FREQ_HZ); 
 
    pPerState->smoothPeriodicityQ15 = 0; 
 
    /* assign memory to the buffer that holds the bandpass filtered values */ 
    pPerState->pBPFrame = (short *) pMemory; 
    tmpBytes = cWinSamps * sizeof(short); 
 
    /* align on word boundary */ 
    tmpBytes += (tmpBytes % sizeof(int)); 
    pMemory += tmpBytes; 
 
    /* assign memory to the buffer that holds the downsampled values */ 
    own_ippsDownSampleSize(cWinSamps, PER_DOWNSAMPLE_FACTOR, PER_DOWNSAMPLE_PHASE, &pPerState->cDSFrameSamps); 
    pPerState->pDSFrame = (short *) pMemory; 
    tmpBytes = pPerState->cDSFrameSamps * sizeof(short); 
 
    /* align on word boundary */ 
    tmpBytes += (tmpBytes % sizeof(int)); 
    pMemory += tmpBytes; 
 
    /* assign memory to the delayLine for the IIR filter */ 
    pPerState->pDelayLine = (int *) pMemory; 
    tmpBytes = (NUMBIQUAD*2) * sizeof(int); 
 
    for (i=0; i<(NUMBIQUAD*2); i++) 
    { 
        pPerState->pDelayLine[i] = 0; 
    } 
} 
 
/******************************************************************************** 
// Name:            PER_UpdatePerState 
// Description:     Update the periodicity state based on the current input frame. 
// 
// Input Arguments: 
//                  pFrame    - input frame 
//                  frameNum  - current frame number used in initializing the noise floor 
//                  len       - number of samples in the input frame 
// Input/Output Arguments: 
//                  pPerState - pointer to an periodicity state structure 
// 
// Returns:         None 
********************************************************************************/ 
void CVAD::PER_UpdatePerState(const short* pFrame, int frameNum, int len, PERStateStruct* pPerState) 
{ 
    /* bandpass filter the input data */ 
    PER_BandPassAndDownSample(pFrame, len, pPerState); 
 
    /* compute periodicity on the bandpass filtered data */ 
    PER_ComputePeriodicity(pPerState->pDSFrame, pPerState->cDSFrameSamps, pPerState); 
 
    /* smooth the periodicity using history */ 
    PER_SmoothPeriodicity(pPerState, frameNum); 
 
} 
 
/******************************************************************************** 
// Name:            PER_BandPassAndDownSample 
// Description:     Band-pass filter (70-1000Hz) the input data using Intel(R) IPP data 
// 
// Input Arguments: 
//                  pSrc      - input data 
//                  srcLen    - number of samples in the input and output buffers 
// Output Arguments: 
//                  pPerState - output band pass filtered data 
// Returns:         None 
********************************************************************************/ 
 
void CVAD::PER_BandPassAndDownSample(const short* pSrc, int srcLen, PERStateStruct* pPerState) 
{ 
    int    phase; 
 
    own_ippsIIR_BiQuadDirect_16s(pSrc, pPerState->pBPFrame, srcLen, pTaps, NUMBIQUAD, pPerState->pDelayLine); 
 
    /* downsample */ 
    phase = PER_DOWNSAMPLE_PHASE; 
    own_ippsDownSample_16s(pPerState->pBPFrame, srcLen, &phase, pPerState->pDSFrame, PER_DOWNSAMPLE_FACTOR); 
} 
 
 
/******************************************************************************** 
// Name:            PER_ComputePeriodicity 
// Description:     Compute periodicity using Intel(R) IPP function 
// 
// Input Arguments: 
//                  pInFrame  - input data 
//                  len       - number of samples in the input buffer 
// Input/Output Arguments: 
//                  pPerState - pointer to an periodicity state structure 
// 
// Returns:         None 
********************************************************************************/ 
 
void CVAD::PER_ComputePeriodicity(const short* pInFrame, int len, PERStateStruct* pPerState) 
{ 
    own_ippsPeriodicityLSPE_16s(pInFrame,len,&(pPerState->periodicityQ15),&(pPerState->period),pPerState->maxPeriodSamps,pPerState->minPeriodSamps); 
} 
 
 
/******************************************************************************** 
// Name:            PER_SmoothPeriodicity 
// Description:     Smooth the computed periodicity by summing over periodicity history. 
//                  The average noise periodicity value is removed before smoothing. 
// 
// Input Arguments: 
//                  frameNum  - current frame number used in initializing the noise floor 
// Input/Output Arguments: 
//                  pPerState - pointer to an periodicity state structure 
// 
// Returns:         None 
********************************************************************************/ 
 
void CVAD::PER_SmoothPeriodicity(PERStateStruct* pPerState, int frameNum) 
{ 
    if (frameNum <= pPerState->cInitPerEstFrames) 
    { 
        /* Initialize periodicity */ 
        pPerState->smoothPeriodicityQ15 = pPerState->smoothPeriodicityQ15 * (frameNum-1) + pPerState->periodicityQ15; 
        pPerState->smoothPeriodicityQ15 /= frameNum; 
    } 
    else 
    { 
        WEIGHTED_AVG_Q15(pPerState->periodicityQ15, pPerState->smoothPeriodicityQ15, PER_ADAPT_ALPHA_Q15, 
          pPerState->smoothPeriodicityQ15) 
    } 
} 
 
/* EOF */ 
/******************************************************************************** 
// Name:             SM_GetStateSizeBytes 
// Description:      Calculate and return the size in bytes required by the State 
//                   Machine (SM) structure. 
// Output Arguments: 
//                   pNumStateBytes  - return variable holding the calculated state size 
// 
// Returns:          None 
// Notes: 
********************************************************************************/ 
void CVAD::SM_GetStateSizeBytes(int* pNumStateBytes) 
{ 
    /* size of the SMStruct */ 
    *pNumStateBytes = sizeof(SMStruct); 
} 
 
/******************************************************************************** 
// Name:            SM_Init 
// Description:     Initialize the state machine structure with initial values. 
// 
// Input Arguments: 
//                  frameShiftMsec - frame shift for overlapping frames in msec 
// Input/Output Arguments: 
//                  pSMState       - pointer to an SM structure to be initialized 
// 
// Returns:         None 
********************************************************************************/ 
void CVAD::SM_Init(SMStruct* pSMState, int frameShiftMsec) 
{ 
    /* half the frame shift used to round of the computations of variables */ 
    int halfFrameShiftMsec = frameShiftMsec/2; 
 
    /* Initialize variables */ 
    pSMState->cMinOnsetFrames = (ONSET_THRESHOLD_MSEC + halfFrameShiftMsec) / frameShiftMsec; 
    pSMState->cMinEnergyHangFrames  = (ENERGY_HANG_THRESH_MSEC + halfFrameShiftMsec) / frameShiftMsec; 
    pSMState->cMinPerHangFrames  = (PER_HANG_THRESH_MSEC + halfFrameShiftMsec) / frameShiftMsec; 
    pSMState->cUttBegAdjustFrames = (UTT_BEG_ADJUSTMENT_MSEC + halfFrameShiftMsec) / frameShiftMsec; 
    pSMState->cOnsetFrames = 0; 
    pSMState->cEnergyHangFrames = 0; 
    pSMState->cPerHangFrames = 0; 
    pSMState->uttHasStartedFlag = 0; 
    pSMState->uttHasEndedFlag = 0; 
    pSMState->state = SILENCE; 
    pSMState->uttEndFrameNum = 0; 
    pSMState->uttBegFrameNum = 0; 
    pSMState->prevUttEndFrameNum=0; 
 
} 
 
/******************************************************************************** 
// Name:            SM_UpdateSMState 
// Description:     Update the state machine based on energy and periodicity 
// 
// Input/Output Arguments: 
//                  pState - pointer to the VAD state structure 
// 
// Returns:         None 
********************************************************************************/ 
void CVAD::SM_UpdateState(VADStateStruct* pState) 
{ 
    /* update state machine parameters based on the energy and periodicity measures */ 
    SM_UpdateSMParams(pState); 
 
    /* determine state of the VAD */ 
    SM_UpdateSMState(pState->pSM, pState->frameNum); 
 
    if (pState->pSM->uttHasStartedFlag) 
    { 
        /* adjust the start frame number by a fixed number of frames */ 
        pState->pSM->uttBegFrameNum -= pState->pSM->cUttBegAdjustFrames; 
 
        /* prevent over-adjustment */ 
        if (pState->pSM->uttBegFrameNum <= pState->pSM->prevUttEndFrameNum) 
        { 
          pState->pSM->uttBegFrameNum = pState->pSM->prevUttEndFrameNum + 1; 
        } 
    } 
    else if (pState->pSM->uttHasEndedFlag) 
    { 
        /* save the end-of-utterance frame number to use in calculation of history for start of next utterance */ 
        pState->pSM->prevUttEndFrameNum = pState->frameNum; 
    } 
} 
 
/******************************************************************************** 
// Name:            SM_UpdateStateParams 
// Description:     Update the energy and periodicity related flags based on the 
//                  their values with respect to their thresholds. 
// 
// Input/Output Arguments: 
//                  pState       - pointer to the VADState structure. 
// 
// Returns:         None 
********************************************************************************/ 
void CVAD::SM_UpdateSMParams(VADStateStruct* pState) 
{ 
    /* 
    // If the current frame energy exceeds the noise threshold, this indicates 
    // that speech is present in the current frame. 
    */ 
    if (pState->pEState->energyDB > pState->pEState->noiseThreshDB) 
    { 
        pState->pSM->energySpeechIsActiveFlag = 1; 
    } 
    else 
    { 
        pState->pSM->energySpeechIsActiveFlag = 0; 
    } 
 
    /* 
    // If the current frame periodicity exceeds the threshold, this indicates 
    // that (voiced) speech is present in the current frame. 
    */ 
    if (pState->pPerState->smoothPeriodicityQ15 > PER_SPEECH_THRESHOLD_Q15) 
    { 
        pState->pSM->perSpeechIsActiveFlag = 1; 
    } 
    else 
    { 
        pState->pSM->perSpeechIsActiveFlag = 0; 
    } 
 
} 
 
/******************************************************************************** 
// Name:            SM_UpdateState 
// Description:     Update the state of the VAD state machine based on onset and 
//                  hang times. The VAD state machine can be in one of the following states 
//                  1) SILENCE - No speech is present. 
//                               Allowed transitions - 
//                               SILENCE -> SILENCE : atleast one of the energy/periodicity 
//                                                    flags indicates speech inactivity. 
//                               SILENCE -> ONSET   : both energy/periodicity flag indicate 
//                                                    speech activity 
// 
//                  2) ONSET   - Start of an utterance may have been detected. 
//                               Allowed transitions - 
//                               ONSET -> SPEECH    : both the energy/periodicity flags indicate 
//                                                    speech activity in consecutive ONSET_THRESHOLD_MSEC 
//                                                    frames. This transition sets the utterance start flag. 
//                               ONSET -> SILENCE   : both the energy/periodicity flags are false (do not 
//                                                    indicate speech activity) in consecutive 
//                                                    ONSET_THRESHOLD_MSEC frames 
//                               ONSET -> ONSET     : both energy/periodicity flag indicate 
//                                                    speech activity but ONSET_THRESHOLD_MSEC 
//                                                    has not been reached 
 
//                  3) SPEECH  - Speech is present. 
//                               Allowed transitions - 
//                               SPEECH -> HANG     : at least one of energy/periodicity 
//                                                    flags do not indicate speech activity 
//                               SPEECH -> SPEECH   : both energy/periodicity flag indicate 
//                                                    speech activity 
// 
//                  4) HANG    - End of an utterance may have been detected. 
//                               Allowed transitions - 
//                               HANG -> SPEECH     : if both energy/periodicity flags indicate 
//                                                    speech activity before hang times 
//                                                    ENERGY_HANG_THRESH_MSEC or PER_HANG_THRESH_MSEC 
//                                                    are completed 
//                               HANG -> HANG       : at least one of energy/periodicity 
//                                                    flags do not indicate speech activity in consecutive 
//                                                    frames but hang time ENERGY_HANG_THRESH_MSEC or 
//                                                    PER_HANG_THRESH_MSEC are not yet completed 
//                               HANG -> SILENCE    : at least one of energy/periodicity flags do not indicate 
//                                                    speech activity in consecutive frames and hang times 
//                                                    ENERGY_HANG_THRESH_MSEC or PER_HANG_THRESH_MSEC are 
//                                                    is completed. This transition sets the utterance end flag. 
// Input Arguments: 
//                  frameNum     - current frame number used for setting the start/end frame number. 
// Input/Output ARguments: 
//                  pState       - pointer to the SM structure. 
// 
// Returns:         None 
********************************************************************************/ 
void CVAD::SM_UpdateSMState(SMStruct* pSM, int frameNum) 
{ 
 
    switch (pSM->state) 
    { 
        case SILENCE: 
            if (pSM->energySpeechIsActiveFlag && pSM->perSpeechIsActiveFlag ) 
            { 
                /* speech onset detected. Transition SILENCE -> ONSET */ 
                pSM->state = ONSET; 
                pSM->cOnsetFrames = 1; 
            } 
            break; 
 
        case ONSET: 
            if (pSM->energySpeechIsActiveFlag && pSM->perSpeechIsActiveFlag ) 
            { 
                /* update number of onset frames */ 
                (pSM->cOnsetFrames)++; 
 
                if (pSM->cOnsetFrames >= pSM->cMinOnsetFrames) 
                { 
                    /* utterance start detected. Transition ONSET -> SPEECH */ 
                    pSM->uttHasStartedFlag = 1; 
                    pSM->uttBegFrameNum = frameNum - pSM->cMinOnsetFrames; 
                    pSM->state = SPEECH; 
                } 
            } 
            else 
            { 
                /* False onset. Reset onset frames. Transition ONSET -> SILENCE */ 
                pSM->state = SILENCE; 
                pSM->cOnsetFrames = 0; 
            } 
            break; 
 
        case SPEECH: 
            if (!(pSM->energySpeechIsActiveFlag)) 
            { 
                /* utterance end may have started. Transition SPEECH -> HANG */ 
                pSM->cEnergyHangFrames=1; 
                pSM->state = HANG; 
            } 
 
            if (!(pSM->perSpeechIsActiveFlag)) 
            { 
                /* utterance end may have started. Transition SPEECH -> HANG */ 
                pSM->cPerHangFrames=1; 
                pSM->state = HANG; 
            } 
 
            break; 
 
        case HANG: 
            if (!(pSM->energySpeechIsActiveFlag)) 
            { 
                /* update number of consecutive hang frames based on energy measure */ 
                (pSM->cEnergyHangFrames)++; 
            } 
            else 
            { 
                /* reset to zero since consecutive frames not inactive */ 
                pSM->cEnergyHangFrames = 0; 
            } 
            if (!(pSM->perSpeechIsActiveFlag)) 
            { 
                /* update number of consecutive hang frames based on energy measure */ 
                (pSM->cPerHangFrames)++; 
            } 
            else 
            { 
                /* reset to zero since consecutive frames not inactive */ 
                pSM->cPerHangFrames = 0; 
            } 
            if ( (pSM->cEnergyHangFrames >= pSM->cMinEnergyHangFrames) 
              || (pSM->cPerHangFrames >= pSM->cMinPerHangFrames) ) 
            { 
                /* speech end detected. Transition HANG -> SILENCE */ 
                pSM->uttEndFrameNum = frameNum - 1; 
                pSM->uttHasEndedFlag = 1; 
                pSM->cEnergyHangFrames = 0; 
                pSM->cPerHangFrames = 0; 
                pSM->state = SILENCE; 
            } 
            else if ((0 == pSM->cEnergyHangFrames) 
              && (0 == pSM->cPerHangFrames)) 
            { 
                /* 
                // false HANG since both energy/periodicity flags indicate speech activity. 
                // Transition HANG -> SPEECH 
                */ 
                pSM->state = SPEECH; 
            } 
            break; 
 
        default: 
            break; 
    } 
} 
 
 
/* EOF */ 
 
//void CVAD::SetLength(int nLen) 
//{ 
//    m_nAllDateLenInSamps = nLen; 
//}