www.pudn.com > ETSI_ES_202_212_software.rar > ExtAdvFrontEnd.c


/*===============================================================================
 *      ETSI ES 202 212   Distributed Speech Recognition
 *      Extended Advanced Front-End Feature Extraction Algorithm & Compression Algorithm
 *      Speech Reconstruction Algorithm.
 *      C-language software implementation                                      
 *      Version 1.1.1   October, 2003                                            
 *===============================================================================*/
/*---------------------------------------------------------------------------
 *
 * FILE NAME: ExtAdvFrontEnd.c
 * PURPOSE:   This file contains the main part of Extended Advanced DSR FE.
 *            Speech samples are read from input waveform file frame by frame.
 *            Feature extraction is performed for each frame by calling the
 *            function DoAdvProcess () (see ParmInterface.c).
 *            Feature vectors are output to file in HTK format. VAD information
 *            is output to file in ASCII format.
 *            Command line arguments are handled by a command line parsing
 *            function.
 *
 *---------------------------------------------------------------------------*/

/*-----------------
 * File Inclusions
 *-----------------*/
#include 
#include 
#include 
#include 

#include "fileio.h"
#include "ParmInterface.h"
#include "pitchInterface.h"

#define VERSION_STR "Version 1.1.1, October 2003"

/*-------------------------------
 * Global Definitions and Macros
 *-------------------------------*/
#define PRINTMOD 15
#define IEEE_LE 0
#define IEEE_BE 1

/*---------------
 * HTK constants 
 *---------------*/
#define HTK_PR_MFCC 6
#define HTK_PR_E    0x40
#define HTK_PR_0    0x2000

/*----------------------------------------------
 * Global Variable Definitions and Declarations
 *----------------------------------------------*/
BOOLEAN QuietMode = FALSE,      /* Supress output to stderr */
  FsSpecified = FALSE,          /* Sampling frequency specified */
  SwapSpecified = FALSE,        /* Byte swap for raw data files specified */
  InputKindSpecified = FALSE,   /* Input file format specified */
  NoOutHeaderSpecified = FALSE, /* No output HTK header option specified */
  Noc0 = FALSE,                 /* No c0 coefficient to output feature vector */
  NologE = FALSE;               /* No logE component to output feature vector */ 

FILE *fp_in = NULL,             /* Input HTK, NIST or raw data file */
  *fp_out_htk = NULL,           /* Output HTK file */
  *fp_out_vad = NULL,           /* Output VAD file */
  *fp_out_pitch = NULL,         /* Output pitch file */
  *fp_out_class = NULL;         /* Output class file */

int HeaderBytesToSkip = 0;

char InFilename[199],           /* Name of input file */
  OutFilename[199],             /* Name of output file */
  VADFilename[199],             /* Name of output VAD file */
  InputKind[10] = "NIST";       /* Input file format */

int SamplingFrequency = 8000,   /* SamplingFrequency */
  NativeByteOrder = IEEE_LE,    /* Native byte ordering */
  InputByteOrder,               /* Default input byte ordering */
  OutParmKind=HTK_PR_MFCC + HTK_PR_E + HTK_PR_0; /* Output parameter kind MFCC_0_E as default */

/*------------
 * Prototypes
 *------------*/
static int ReadBufWave (FILE *fp_in, FILE_TYPE *buf, int nSamples, int Swap);
static BOOLEAN ParseCommLine (int argc, char *argv[]);

/*-----------
 * Functions
 *-----------*/
/*----------------------------------------------------------------------------
 * FUNCTION NAME: ReadBufWave
 *
 * PURPOSE:       Read buffer from input file
 *
 * INPUT:
 *   fp_in        Pointer to file
 *   nSamples     Number of samples to read
 *   swap         Byte swap specification
 *
 * OUTPUT
 *   buf          Pointer to buffer
 *
 * RETURN VALUE
 *   FALSE        In case of fread() error
 *   TRUE         Otherwise
 *
 *---------------------------------------------------------------------------*/
static int ReadBufWave (FILE *fp_in, FILE_TYPE *buf, int nSamples, int Swap)
{
  FILE_TYPE s;
  int i;
	
  for (i=0; i> 8);
	  buf[i] = (FILE_TYPE)s;
    }
  return TRUE;
}

/*----------------------------------------------------------------------------
 * FUNCTION NAME: ParseCommLine
 *
 * PURPOSE:       Parses command line arguments, opens input and output files
 *
 * INPUT:
 *   argc         Number of command line arguments
 *   argv         Array of command line arguments
 *
 * OUTPUT
 *   none
 *
 * RETURN VALUE
 *   FALSE        In case of any errors
 *   TRUE         Otherwise
 *
 *---------------------------------------------------------------------------*/
static BOOLEAN ParseCommLine (int argc, char *argv[])
{
  int mark = 0, mand_arg_no = 1;
	
  for ( ; argc; argc--, mark++) 
  {
	if (strcmp (argv[mark], "-q") == 0)
        {
		  QuietMode = TRUE;
        }
	else if (strcmp (argv[mark], "-fs") == 0)
        {
		  FsSpecified = TRUE;
		  --argc;
		  ++mark;
		  SamplingFrequency = 1000 * atoi (argv[mark]);
        }
	else if (strcmp (argv[mark], "-swap") == 0)
        {
		  SwapSpecified = TRUE;
        }
	else if (strcmp (argv[mark], "-F") == 0)
        {
		  InputKindSpecified = TRUE;
		  --argc;
		  ++mark;
		  strcpy (InputKind, argv[mark]);
        }
	else if (strcmp (argv[mark], "-noh") == 0)
        {
		  NoOutHeaderSpecified = TRUE;
        }
	else if (strcmp (argv[mark], "-noc0") == 0)
        {
		  Noc0 = TRUE;
        }
	else if (strcmp (argv[mark], "-nologE") == 0)
        {
		  NologE = TRUE;
        }
        else if (strcmp (argv[mark], "-skip_header_bytes") == 0)
        {
          --argc;
          ++mark;
          HeaderBytesToSkip = atoi (argv[mark]);
        }
	else if (argv[mark][0] == '-')
        {
		  fprintf (stderr, "WARNING:  Un-recognized flag '%s' !\r\n", argv[mark]);
        }
	else
        {
			switch (mand_arg_no)
			{

			case 1: /* First mandatory argument - input file */
					strcpy (InFilename, argv[mark]);
					fp_in = fopen (argv[mark], "rb");
					if (fp_in == NULL)
					{
						fprintf (stderr, "    ERROR:   Could not open file '%s' !\r\n",
						argv[mark]);
						return FALSE;
					}					
					break;
					
			case 2: /* Second mandatory argument - output HTK file */
					strcpy (OutFilename, argv[mark]);
					fp_out_htk = fopen (OutFilename, "wb");
					if (fp_out_htk == NULL)
					{
						fprintf (stderr, "    ERROR:   Could not open file '%s' !\r\n",
						OutFilename);
						return FALSE;
					}
			                strcpy (VADFilename, strcat(strtok(argv[mark], "."), ".vad"));
			                fp_out_vad = fopen (VADFilename, "wb");
			                if (fp_out_vad == NULL)
				        {
				          fprintf (stderr, "ERROR:   Could not open file '%s' !\r\n", VADFilename);
				          return FALSE;
				        }		
					break;

			case 3: /* Third mandatory argument - output pitch file */
					strcpy (OutFilename, argv[mark]);
					fp_out_pitch = fopen (OutFilename, "wb");
					if (fp_out_pitch == NULL)
					{
						fprintf (stderr, "    ERROR:   Could not open file '%s' !\r\n",
						OutFilename);
						return FALSE;
					}
					break;

                	case 4: /* Fourth mandatory argument - output class file */
					strcpy (OutFilename, argv[mark]);
					fp_out_class = fopen (OutFilename, "w"); // ASCII output
					if (fp_out_class == NULL)
					{
						fprintf (stderr, "    ERROR:   Could not open file '%s' !\r\n",
						OutFilename);
						return FALSE;
					}
					break;

			default:
					fprintf (stderr, "    ERROR:   Too many input arguments!\r\n");
					return FALSE;

			} /* end switch on mandatory argument no */
			mand_arg_no++;            

        } // end of loop on arguments
    }

    if (!fp_in || !fp_out_htk || !fp_out_pitch || !fp_out_class)
    {
	  fprintf (stderr, "ERROR:   Input and output files must be given!\r\n");
	  return FALSE;
    }
	
    if (strcmp (InputKind, "NIST") && strcmp (InputKind, "HTK") && strcmp (InputKind, "RAW"))
    {
	  fprintf (stderr, "ERROR:   Invalid input file format '%s'!\r\n", InputKind);
	  return FALSE;
    }

    if (strcmp (InputKind, "RAW") && HeaderBytesToSkip != 0)
    {
      fprintf(stderr, "ERROR: skip_header_bytes can be specified for RAW format only.\r\n");
      return FALSE;
    }
	
    if (strcmp (InputKind, "RAW") && FsSpecified)
	fprintf (stderr, "WARNING:   Sampling frequency needs to be specified only for raw data files.\r\n");
	
    if (strcmp (InputKind, "RAW") && SwapSpecified)
	fprintf (stderr, "WARNING:   Byte swapping needs to be specified only for raw data files if necessary.\r\n");
	
  if (Noc0 && NologE) TextToParmKind( "MFCC", &OutParmKind);
  else if (Noc0) TextToParmKind( "MFCC_E", &OutParmKind);
  else if (NologE) TextToParmKind( "MFCC_0", &OutParmKind);
	
  return TRUE;
}

/*----------------------------------------------------------------------------
 * FUNCTION NAME: main
 *
 * PURPOSE:       Main front-end operations from input speech samples to output
 *                feature vectors. See embedded comments.
 *
 * INPUT:
 *   argc         Number of command line arguments (passed to ParseCommLine)
 *   argv         Array of command line arguments (passed to ParseCommLine)
 *
 * OUTPUT
 *   none
 *
 * RETURN VALUE
 *   TRUE         In case of any errors
 *   FALSE        Otherwise
 *
 *---------------------------------------------------------------------------*/
extern int main (int argc, char *argv[])
{
  FILE_TYPE *SigBuf;
  X_FLOAT32 FeatureBuffer[NUM_CEP_COEFF + 2];
  FEParamsX  *pFEParX;
  HTK_Header InHheader, OutHheader;
  NIST_Header InNheader;

  int rc;
  int NbSamplesToRead;

  long FrameCounter = 0;
  long SpeechFrameCounter = 0;
  long NonSpeechFrameCounter = 0;

	

  if (!ParseCommLine (argc - 1, argv + 1))
  {
      fprintf (stderr,"\r\n    ETSI ES 202 212 DSR Extended Advanced Front-End Feature Extraction Algorithm");
      fprintf (stderr,"\r\n    C-language software implementation");
      fprintf (stderr,"\r\n    %s\r\n\n",VERSION_STR);

      fprintf (stderr, "\r\n    USAGE:");
      fprintf (stderr, "   %s infile HTK_outfile pitch_outfile class_outfile [options]\r\n", argv[0]);
      fprintf (stderr, "\n   The program outputs also .vad ASCII VAD file\r\n");
      fprintf (stderr, "\r\n    OPTIONS:\r\n");
      fprintf (stderr, "     -q            Quiet Mode                                 (%s)\r\n", QuietMode ? "TRUE" : "FALSE");
      fprintf (stderr, "     -F    format  Input file format (NIST,HTK,RAW)           (%s)\r\n", InputKind);
      fprintf (stderr, "     -fs   freq    Sampling frequency in kHz (%d,%d,%d)        (%d)\r\n", SAMPLING_FREQ_1, SAMPLING_FREQ_2, SAMPLING_FREQ_3, SamplingFrequency / 1000);
      fprintf (stderr, "     -swap         Change input byte ordering                 (%s)\r\n", SwapSpecified ? "Swapped" : "Native");
      fprintf (stderr, "                   (Native byte ordering is %s)\r\n", NativeByteOrder ? "ieee-be" : "ieee-le");
      fprintf (stderr, "     -noh          No HTK header to output file               (%s)\r\n", NoOutHeaderSpecified ? "TRUE" : "FALSE" );
      fprintf (stderr, "     -noc0         No c0 coefficient to output feature vector (%s)\r\n", Noc0 ? "TRUE" : "FALSE" );
      fprintf (stderr, "     -nologE       No logE component to output feature vector (%s)\r\n", NologE ? "TRUE" : "FALSE" );
      fprintf (stderr, "     -skip_header_bytes n - Skip header, first n bytes (Only for F RAW)\r\n");

      return TRUE;
    }

    if (!QuietMode)
    {
      fprintf (stderr,"\r\n    ETSI ES 202 212 DSR Extended Advanced Front-End Feature Extraction Algorithm");
      fprintf (stderr,"\r\n    C-language software implementation");
      fprintf (stderr,"\r\n    %s\r\n\n",VERSION_STR);
    }


  /*----------------*/
  /* Initialization */
  /*----------------*/
  InputByteOrder = NativeByteOrder;
	
	  /*-----------------------------------------------------------------
	   * Read input header, extract sampling frequency and byte ordering
	   *-----------------------------------------------------------------*/
    if (!strcmp (InputKind, "NIST"))
    {
	  if (!ReadNISTHeader (fp_in, &InNheader))
          {
		  fprintf (stderr, "ERROR:   Invalid NIST header !\r\n");
			  return TRUE;
          }
          SamplingFrequency = InNheader.SampleRate;
	  if (strcmp (InNheader.SampleByteFormat, "10"))
		InputByteOrder = IEEE_LE;
	  else
		InputByteOrder = IEEE_BE;
    }
    else if (!strcmp (InputKind, "HTK"))
    {
	  if (!ReadHTKHeader (fp_in, &InHheader, InputByteOrder!=IEEE_BE))
          {
		  fprintf (stderr, "ERROR:   Invalid HTK header !\r\n");
		  return TRUE;
          }
		  // 625->16kHz, 1250->8kHz, 909->11kHz
	  SamplingFrequency = 10 * floor ((float) 1e6 / (float) InHheader.sampPeriod);
	  InputByteOrder = IEEE_BE;
    }
		
	  /*------------------------------------------------------------
	   * Write output header (number of frames to be updated later)
	   *------------------------------------------------------------*/
	  OutHheader.nSamples = 0;
	  OutHheader.sampPeriod = 100000; /* 10000.0 us (100 Hz) */
	  OutHheader.sampSize = (NUM_CEP_COEFF - (Noc0 ? 1:0) + (NologE ? 0:1)) * 4;
	  OutHheader.sampKind = OutParmKind;		
	  if (!NoOutHeaderSpecified) WriteHTKHeader (fp_out_htk, &OutHheader);
				
	  /*------------------------------------------
	   * Memory allocation for FE data structures
	   *------------------------------------------*/
	  pFEParX = AdvProcessAlloc (SamplingFrequency);

/* For pitch and class extraction */

    pFEParX->pfInpSpeech = (X_FLOAT32*)malloc(sizeof(X_FLOAT32)*FRAME_LENGTH);
    if ( !(pFEParX->pfInpSpeech))
          {
            fprintf (stderr, "ERROR:  Memory allocation error occured!\r\n");
            return TRUE;
          }
    pFEParX->pfUBSpeech = (X_FLOAT32*)malloc(sizeof(X_FLOAT32)*FRAME_LENGTH);
    if ( !(pFEParX->pfUBSpeech))
          {
            fprintf (stderr, "ERROR:  Memory allocation error occured!\r\n");
            return TRUE;
          }
    pFEParX->pfProcSpeech = (X_FLOAT32*)malloc(sizeof(X_FLOAT32)*(FRAME_LENGTH+HISTORY_LENGTH));
    if ( !(pFEParX->pfProcSpeech))
          {
            fprintf (stderr, "ERROR:  Memory allocation error occured!\r\n");
            return TRUE;
          }
    pFEParX->pfDownSampledProcSpeech = (X_FLOAT32*)
        malloc(sizeof(X_FLOAT32)*(FRAME_LENGTH+HISTORY_LENGTH)/DOWN_SAMP_FACTOR+1);
    if ( !(pFEParX->pfDownSampledProcSpeech))
          {
            fprintf (stderr, "ERROR:  Memory allocation error occured!\r\n");
            return TRUE;
          }

    /*-------------------------------------------------------
     * Initialization of FE data structures and input buffer
     *-------------------------------------------------------*/
    AdvProcessInit (pFEParX);




    rc = InitPitchRom(&(pFEParX->pPitchRom));
    if (rc != 0)
      {
	fprintf(stderr,"ERROR: Can't initialize PITCH ROM, RC = %d\r\n",rc);
	return TRUE;
      }

    rc = InitPitchEstimator(pFEParX->pPitchRom,&(pFEParX->pPitchEstimator));
    if ( rc!= 0)
      {
	fprintf (stderr, "ERROR: can't initialize PITCH ESTIMATOR, RC = %d\r\n",rc);
	return TRUE;
      }


    pFEParX->speech_fid = fp_in;
    pFEParX->htk_fid = fp_out_htk;
    pFEParX->vad_fid = fp_out_vad;
    pFEParX->pitch_fid = fp_out_pitch;
    pFEParX->class_fid = fp_out_class;

    NbSamplesToRead  = pFEParX->NbSamplesToRead;

    SigBuf = calloc (1, sizeof (SigBuf[0]) * NbSamplesToRead);
    if (SigBuf == NULL)
      {
	fprintf (stderr,"ERROR:  Memory allocation error occured!\r\n");
	return TRUE;
      }

	  /*------------
	   * Processing
	   *------------*/

          fseek(fp_in,HeaderBytesToSkip,SEEK_SET);

	  while (ReadBufWave (fp_in, SigBuf, NbSamplesToRead,
						  (SwapSpecified || InputByteOrder != NativeByteOrder)))
        {
		  FrameCounter++;

		  if (DoAdvProcess (SigBuf, FeatureBuffer, pFEParX))
			{
			  /*---------------
			   * Output result
			   *---------------*/
			  WriteHTKFeature (fp_out_htk, FeatureBuffer, (short) (NUM_CEP_COEFF - (Noc0 ? 1:0) + (NologE ? 0:1)));

			  fprintf (fp_out_vad, "%d ", pFEParX->VAD); 
			  if (pFEParX->VAD == SPEECH_FRAME)
				SpeechFrameCounter++;
			  else
				NonSpeechFrameCounter++;
			}

		  /*---------------------------
		   * Display processing status
		   *---------------------------*/
		  if (!QuietMode && !(FrameCounter % PRINTMOD))
			{
			  fprintf (stderr, "\rProcessing status: %ld frames ...", FrameCounter);
			  fflush (stderr);
			}	
        }

	  /*----------
	   * Flushing
	   *----------*/
	  while (FlushAdvProcess (FeatureBuffer, pFEParX))
		{		
		  /*---------------
		   * Output result
		   *---------------*/
		  WriteHTKFeature (fp_out_htk, FeatureBuffer, (short) (NUM_CEP_COEFF - (Noc0 ? 1:0) + (NologE ? 0:1)));

		  fprintf (fp_out_vad, "%d ", pFEParX->VAD); 
		  if (pFEParX->VAD == SPEECH_FRAME)
			SpeechFrameCounter++;
		  else
			NonSpeechFrameCounter++;
		}

	  /*----------------
	   * Memory release
	   *----------------*/

  DeallocatePitchRom(pFEParX->pPitchRom);
  DeallocatePitchEstimator(pFEParX->pPitchEstimator);
  free(pFEParX->pfUBSpeech);
  free(pFEParX->pfProcSpeech);
  free(pFEParX->pfDownSampledProcSpeech);


	  AdvProcessDelete (&pFEParX);
	  free (SigBuf);

	  /*----------------------------------------
	   * Correct number of frames in HTK header
	   *----------------------------------------*/
	  OutHheader.nSamples = SpeechFrameCounter + NonSpeechFrameCounter;
	  if (!NoOutHeaderSpecified) WriteHTKHeader (fp_out_htk, &OutHheader);
		
	  /*------------------------------
	   * Close input and output files
	   *------------------------------*/
	  fclose (fp_in);
	  fclose (fp_out_htk);
	  fclose (fp_out_vad);
	  fclose (fp_out_pitch);
	  fclose (fp_out_class);
		
	  /*----------------------
	   * Display final status 
	   *----------------------*/
	  if (!QuietMode)
		fprintf (stderr, "\rProcessed: %ld Frames.           \r\n", FrameCounter);
  
	  fprintf (stderr, "Extended Advanced Front End: ");
	  fprintf (stderr, InFilename);
	  fprintf (stderr, ": %ld Frames, %ld Speech, %ld Non Speech, %ld Lost.\r\n", FrameCounter, 
			   SpeechFrameCounter, NonSpeechFrameCounter, FrameCounter - SpeechFrameCounter - NonSpeechFrameCounter);	

	  if (SpeechFrameCounter == 0)
		fprintf (stderr, "NO SPEECH DETECTED !\r\n");


  return FALSE;
	

}