www.pudn.com > lencod.rar > fast_me.c


/* 
*********************************************************************** 
* COPYRIGHT AND WARRANTY INFORMATION 
* 
* Copyright 2003, Advanced Audio Video Coding Standard, Part II 
* 
* DISCLAIMER OF WARRANTY 
* 
* These software programs are available to the users without any 
* license fee or royalty on an "as is" basis. The AVS disclaims 
* any and all warranties, whether express, implied, or statutory, 
* including any implied warranties of merchantability or of fitness 
* for a particular purpose. In no event shall the contributors or  
* the AVS be liable for any incidental, punitive, or consequential 
* damages of any kind whatsoever arising from the use of this program. 
* 
* This disclaimer of warranty extends to the user of this program 
* and user's customers, employees, agents, transferees, successors, 
* and assigns. 
* 
* The AVS does not represent or warrant that the program furnished 
* hereunder are free of infringement of any third-party patents. 
* Commercial implementations of AVS, including shareware, may be 
* subject to royalty fees to patent holders. Information regarding 
* the AVS patent policy is available from the AVS Web site at 
* http://www.avs.org.cn 
* 
* THIS IS NOT A GRANT OF PATENT RIGHTS - SEE THE AVS PATENT POLICY. 
************************************************************************ 
*/ 
 
/* 
************************************************************************************* 
* File name:  
* Function:  
	 Fast integer pel motion estimation and fractional pel motion estimation 
	 algorithms are described in this file. 
	  1. get_mem_FME() and free_mem_FME() are functions for allocation and release 
	     of memories about motion estimation 
	  2. FME_BlockMotionSearch() is the function for fast integer pel motion  
	     estimation and fractional pel motion estimation 
	  3. DefineThreshold() defined thresholds for early termination 
* 
************************************************************************************* 
*/ 
 
#include  
#include  
#include  
#include  
#include  
 
#include "memalloc.h" 
#include "fast_me.h" 
#include "refbuf.h" 
 
#ifdef TimerCal 
#include  
#include  
#include  
#endif 
 
#define Q_BITS          15 
 
extern  int*   byte_abs; 
extern  int*   mvbits; 
extern  int*   spiral_search_x; 
extern  int*   spiral_search_y; 
 
static pel_t (*PelY_14) (pel_t**, int, int); 
static const int quant_coef[6][4][4] = { 
  {{13107, 8066,13107, 8066},{ 8066, 5243, 8066, 5243},{13107, 8066,13107, 8066},{ 8066, 5243, 8066, 5243}}, 
  {{11916, 7490,11916, 7490},{ 7490, 4660, 7490, 4660},{11916, 7490,11916, 7490},{ 7490, 4660, 7490, 4660}}, 
  {{10082, 6554,10082, 6554},{ 6554, 4194, 6554, 4194},{10082, 6554,10082, 6554},{ 6554, 4194, 6554, 4194}}, 
  {{ 9362, 5825, 9362, 5825},{ 5825, 3647, 5825, 3647},{ 9362, 5825, 9362, 5825},{ 5825, 3647, 5825, 3647}}, 
  {{ 8192, 5243, 8192, 5243},{ 5243, 3355, 5243, 3355},{ 8192, 5243, 8192, 5243},{ 5243, 3355, 5243, 3355}}, 
  {{ 7282, 4559, 7282, 4559},{ 4559, 2893, 4559, 2893},{ 7282, 4559, 7282, 4559},{ 4559, 2893, 4559, 2893}} 
}; 
 
void DefineThreshold() 
{ 
  static float ThresholdFac[8] = {0,8,4,4,2.5,1.5,1.5,1};  
  static int ThreshUp[8] = {0, 1024,512,512,448,384,384,384}; 
 
  AlphaSec[1] = 0.01f; 
  AlphaSec[2] = 0.01f; 
  AlphaSec[3] = 0.01f; 
  AlphaSec[4] = 0.02f; 
  AlphaSec[5] = 0.03f; 
  AlphaSec[6] = 0.03f; 
  AlphaSec[7] = 0.04f; 
 
  AlphaThird[1] = 0.06f; 
  AlphaThird[2] = 0.07f; 
  AlphaThird[3] = 0.07f; 
  AlphaThird[4] = 0.08f; 
  AlphaThird[5] = 0.12f; 
  AlphaThird[6] = 0.11f; 
  AlphaThird[7] = 0.15f; 
 
  DefineThresholdMB(); 
  return; 
} 
 
void DefineThresholdMB() 
{ 
  int gb_qp_per    = (input->qpN-MIN_QP)/6; 
  int gb_qp_rem    = (input->qpN-MIN_QP)%6; 
   
  int gb_q_bits    = Q_BITS+gb_qp_per; 
  int gb_qp_const,Thresh4x4; 
 
  if (img->type == INTRA_IMG) 
    gb_qp_const=(1<InterlaceCodingOption != FRAME_CODING)    
	  img->buf_cycle *= 2;	 
 
  if ((*mv = (int*****)calloc(input->img_width/4,sizeof(int****))) == NULL) 
    no_mem_exit ("get_mem_mv: mv"); 
  for (i=0; iimg_width/4; i++) 
  { 
    if (((*mv)[i] = (int****)calloc(input->img_height/4,sizeof(int***))) == NULL) 
      no_mem_exit ("get_mem_mv: mv"); 
    for (j=0; jimg_height/4; j++) 
    { 
      if (((*mv)[i][j] = (int***)calloc(img->buf_cycle,sizeof(int**))) == NULL) 
        no_mem_exit ("get_mem_mv: mv"); 
      for (k=0; kbuf_cycle; k++) 
      { 
        if (((*mv)[i][j][k] = (int**)calloc(9,sizeof(int*))) == NULL) 
          no_mem_exit ("get_mem_mv: mv"); 
        for (l=0; l<9; l++) 
          if (((*mv)[i][j][k][l] = (int*)calloc(3,sizeof(int))) == NULL) 
            no_mem_exit ("get_mem_mv: mv"); 
      } 
    } 
  } 
  if(input->InterlaceCodingOption != FRAME_CODING)    
	  img->buf_cycle /= 2;	 
 
  return input->img_width/4*input->img_height/4*img->buf_cycle*9*3*sizeof(int); 
} 
/* 
************************************************************************* 
* Function:Dynamic memory allocation of all infomation needed for backward prediction 
* Input: 
* Output: 
* Return: Number of allocated bytes 
* Attention: 
************************************************************************* 
*/ 
 
int get_mem_bwmincost (int****** mv) 
{ 
  int i, j, k, l; 
 
  if(input->InterlaceCodingOption != FRAME_CODING)   img->buf_cycle *= 2;	 
 
  if ((*mv = (int*****)calloc(input->img_width/4,sizeof(int****))) == NULL) 
    no_mem_exit ("get_mem_mv: mv"); 
  for (i=0; iimg_width/4; i++) 
  { 
    if (((*mv)[i] = (int****)calloc(input->img_height/4,sizeof(int***))) == NULL) 
      no_mem_exit ("get_mem_mv: mv"); 
    for (j=0; jimg_height/4; j++) 
    { 
      if (((*mv)[i][j] = (int***)calloc(img->buf_cycle,sizeof(int**))) == NULL) 
        no_mem_exit ("get_mem_mv: mv"); 
      for (k=0; kbuf_cycle; k++) 
      { 
        if (((*mv)[i][j][k] = (int**)calloc(9,sizeof(int*))) == NULL) 
          no_mem_exit ("get_mem_mv: mv"); 
        for (l=0; l<9; l++) 
          if (((*mv)[i][j][k][l] = (int*)calloc(3,sizeof(int))) == NULL) 
            no_mem_exit ("get_mem_mv: mv"); 
      } 
    } 
  } 
  if(input->InterlaceCodingOption != FRAME_CODING)   img->buf_cycle /= 2;	 
 
  return input->img_width/4*input->img_height/4*1*9*3*sizeof(int); 
} 
 
int get_mem_FME() 
{ 
	int memory_size = 0; 
	memory_size += get_mem2Dint(&McostState, 2*input->search_range+1, 2*input->search_range+1); 
	memory_size += get_mem_mincost (&(all_mincost)); 
	memory_size += get_mem_bwmincost(&(all_bwmincost)); 
	memory_size += get_mem2D(&SearchState,7,7); 
	 
	return memory_size; 
} 
 
/* 
************************************************************************* 
* Function:free the memory allocated for of all infomation needed for Fast ME 
* Input: 
* Output: 
* Return:  
* Attention: 
************************************************************************* 
*/ 
 
void free_mem_mincost (int***** mv) 
{ 
  int i, j, k, l; 
  if(input->InterlaceCodingOption != FRAME_CODING)   img->buf_cycle *= 2;	 
 
  for (i=0; iimg_width/4; i++) 
  { 
    for (j=0; jimg_height/4; j++) 
    { 
      for (k=0; kbuf_cycle; k++) 
      { 
        for (l=0; l<9; l++) 
          free (mv[i][j][k][l]); 
        free (mv[i][j][k]); 
      } 
      free (mv[i][j]); 
    } 
    free (mv[i]); 
  } 
  free (mv); 
  if(input->InterlaceCodingOption != FRAME_CODING)   img->buf_cycle /= 2;	 
} 
 
/* 
************************************************************************* 
* Function:free the memory allocated for of all infomation needed for backward prediction 
* Input: 
* Output: 
* Return:  
* Attention: 
************************************************************************* 
*/ 
 
void free_mem_bwmincost (int***** mv) 
{ 
  int i, j, k, l; 
 
  for (i=0; iimg_width/4; i++) 
  { 
    for (j=0; jimg_height/4; j++) 
    { 
      for (k=0; k<1; k++) 
      { 
        for (l=0; l<9; l++) 
          free (mv[i][j][k][l]); 
        free (mv[i][j][k]); 
      } 
      free (mv[i][j]); 
    } 
    free (mv[i]); 
  } 
  free (mv); 
} 
 
void free_mem_FME() 
{ 
  free_mem2Dint(McostState); 
  free_mem_mincost (all_mincost); 
  free_mem_bwmincost(all_bwmincost); 
 
  free_mem2D(SearchState); 
} 
 
void 
FME_SetMotionVectorPredictor (int  pmv[2], 
                          int  **refFrArr, 
                          int  ***tmp_mv, 
                          int  ref_frame, 
                          int  mb_x, 
                          int  mb_y, 
                          int  blockshape_x, 
                          int  blockshape_y, 
						  int  blocktype, 
						  int  ref) 
{ 
  int pic_block_x          = (img->block_x>>1) + (mb_x>>3); 
  int pic_block_y          = (img->block_y>>1) + (mb_y>>3); 
  int mb_nr                = img->current_mb_nr; 
  int mb_width             = img->width/16; 
  int mb_available_up      = (img->mb_y == 0          ) ? 0 : (img->mb_data[mb_nr].slice_nr == img->mb_data[mb_nr-mb_width  ].slice_nr);  // jlzheng 6.23 
  int mb_available_left    = (img->mb_x == 0          ) ? 0 : (img->mb_data[mb_nr].slice_nr == img->mb_data[mb_nr-1         ].slice_nr);  // jlzheng 6.23 
  int mb_available_upleft  = (img->mb_x == 0 || 
                              img->mb_y == 0          ) ? 0 : (img->mb_data[mb_nr].slice_nr == img->mb_data[mb_nr-mb_width-1].slice_nr);  // jlzheng 6.23 
  int mb_available_upright = (img->mb_x >= mb_width-1 || 
                              img->mb_y == 0          ) ? 0 : (img->mb_data[mb_nr].slice_nr == img->mb_data[mb_nr-mb_width+1].slice_nr);  // jlzheng 6.23 
  int block_available_up, block_available_left, block_available_upright, block_available_upleft; 
  int mv_a, mv_b, mv_c, mv_d, pred_vec=0; 
  int mvPredType, rFrameL, rFrameU, rFrameUR; 
  int hv; 
 
//FAST MOTION ESTIMATION. ZHIBO CHEN 2003.3 
  int SAD_a, SAD_b, SAD_c, SAD_d; 
  int temp_pred_SAD[2]; 
	/*lgp*/ 
  int y_up = 1,y_upright=1,y_upleft=1,off_y=0; 
  int mva[3] , mvb[3],mvc[3]; 
  /*Lou 1016 Start*/ 
   
  int rFrameUL; 
  Macroblock*     currMB = &img->mb_data[img->current_mb_nr]; 
  int smbtypecurr, smbtypeL, smbtypeU, smbtypeUL, smbtypeUR; 
   
	smbtypecurr = -2; 
	smbtypeL = -2; 
	smbtypeU = -2; 
	smbtypeUL = -2; 
	smbtypeUR = -2; 
 
  /*Lou 1016 End*/ 
 
	pred_SAD_space = 0; 
	 
  /* D B C */ 
  /* A X   */ 
   
  /* 1 A, B, D are set to 0 if unavailable       */ 
  /* 2 If C is not available it is replaced by D */ 
 
    block_available_up   = mb_available_up   || (mb_y > 0); 
    block_available_left = mb_available_left || (mb_x > 0); 
 
  if (mb_y > 0) 
  { 
    if (mb_x < 8)  // first column of 8x8 blocks 
    { 
      if (mb_y==8) 
      { 
        if (blockshape_x == 16)      block_available_upright = 0; 
        else                         block_available_upright = 1; 
      } 
      else 
      { 
        if (mb_x+blockshape_x != 8)  block_available_upright = 1; 
        else                         block_available_upright = 0; 
      } 
    } 
    else 
    { 
      if (mb_x+blockshape_x != 16)   block_available_upright = 1; 
      else                           block_available_upright = 0; 
    } 
  } 
  else if (mb_x+blockshape_x != MB_BLOCK_SIZE) 
  { 
    block_available_upright = block_available_up; 
  } 
  else 
  { 
    block_available_upright = mb_available_upright; 
  } 
   
  if (mb_x > 0) 
  { 
    block_available_upleft = (mb_y > 0 ? 1 : block_available_up); 
  } 
  else if (mb_y > 0) 
  { 
    block_available_upleft = block_available_left; 
  } 
  else 
  { 
    block_available_upleft = mb_available_upleft; 
  } 
   
	smbtypecurr = -2; 
	smbtypeL = -2; 
	smbtypeU = -2; 
	smbtypeUL = -2; 
	smbtypeUR = -2; 
 
  /*Lou 1016 Start*/ 
  mvPredType = MVPRED_MEDIAN; 
 
 
  rFrameL    = block_available_left    ? refFrArr[pic_block_y  -off_y]  [pic_block_x-1] : -1; 
  rFrameU    = block_available_up      ? refFrArr[pic_block_y-y_up][pic_block_x]   : -1; 
  rFrameUR   = block_available_upright ? refFrArr[pic_block_y-y_upright][pic_block_x+blockshape_x/8] : 
  block_available_upleft  ? refFrArr[pic_block_y-y_upleft][pic_block_x-1] : -1; 
  rFrameUL   = block_available_upleft  ? refFrArr[pic_block_y-y_upleft][pic_block_x-1] : -1; 
 
 
  if((rFrameL != -1)&&(rFrameU == -1)&&(rFrameUR == -1)) 
    mvPredType = MVPRED_L; 
  else if((rFrameL == -1)&&(rFrameU != -1)&&(rFrameUR == -1)) 
    mvPredType = MVPRED_U; 
  else if((rFrameL == -1)&&(rFrameU == -1)&&(rFrameUR != -1)) 
    mvPredType = MVPRED_UR; 
  /*Lou 1016 End*/ 
 
 
  // Directional predictions  
  else if(blockshape_x == 8 && blockshape_y == 16) 
  { 
    if(mb_x == 0) 
    { 
      if(rFrameL == ref_frame) 
        mvPredType = MVPRED_L; 
    } 
    else 
    { 
      //if( block_available_upright && refFrArr[pic_block_y-1][pic_block_x+blockshape_x/4] == ref_frame) 
      if(rFrameUR == ref_frame) // not correct 
        mvPredType = MVPRED_UR; 
    } 
  } 
 
  else if(blockshape_x == 16 && blockshape_y == 8) 
  { 
    if(mb_y == 0) 
    { 
      if(rFrameU == ref_frame) 
        mvPredType = MVPRED_U; 
    } 
    else 
    { 
      if(rFrameL == ref_frame) 
        mvPredType = MVPRED_L; 
    } 
  } 
   
//#define MEDIAN(a,b,c)  (a>b?a>c?b>c?b:c:a:b>c?a>c?a:c:b) 
#define MEDIAN(a,b,c)  (a + b + c - min(a, min(b, c)) - max(a, max(b, c))); 
   
   
  for (hv=0; hv < 2; hv++) 
  { 
    mva[hv] = mv_a = block_available_left    ? tmp_mv[hv][pic_block_y - off_y/*lgp*/][4+pic_block_x-1]              : 0; 
    mvb[hv] = mv_b = block_available_up      ? tmp_mv[hv][pic_block_y-/*1*/y_up/*lgp*/][4+pic_block_x]                : 0; 
    mv_d = block_available_upleft  ? tmp_mv[hv][pic_block_y-y_upleft][4+pic_block_x-1]: 0; 
    mvc[hv] = mv_c = block_available_upright ? tmp_mv[hv][pic_block_y-/*1*/y_upright/*lgp*/][4+pic_block_x+blockshape_x/8] : mv_d; 
     
    //--- Yulj 2004.07.04 
	// mv_a, mv_b... are not scaled. 
    mva[hv] = scale_motion_vector(mva[hv], ref_frame, rFrameL, smbtypecurr, smbtypeL, pic_block_y-off_y, pic_block_y, ref, 0); 
    mvb[hv] = scale_motion_vector(mvb[hv], ref_frame, rFrameU, smbtypecurr, smbtypeU, pic_block_y-y_up, pic_block_y, ref, 0); 
    mv_d = scale_motion_vector(mv_d, ref_frame, rFrameUL, smbtypecurr, smbtypeUL, pic_block_y-y_upleft, pic_block_y, ref, 0); 
    mvc[hv] = block_available_upright ? scale_motion_vector(mvc[hv], ref_frame, rFrameUR, smbtypecurr, smbtypeUR, pic_block_y-y_upright, pic_block_y, ref, 0): mv_d; 
     
     
//FAST MOTION ESTIMATION. ZHIBO CHEN 2003.3 
    SAD_a = block_available_left    ? ((ref == -1) ? all_bwmincost[((img->pix_x+mb_x)>>2) -1][((img->pix_y+mb_y)>>2)][0][blocktype][0] : all_mincost[((img->pix_x+mb_x)>>2) -1][((img->pix_y+mb_y)>>2)][ref_frame][blocktype][0]) : 0; 
    SAD_b = block_available_up      ? ((ref == -1) ? all_bwmincost[((img->pix_x+mb_x)>>2)][((img->pix_y+mb_y)>>2) -1][0][blocktype][0] : all_mincost[((img->pix_x+mb_x)>>2)][((img->pix_y+mb_y)>>2) -1][ref_frame][blocktype][0]) : 0; 
    SAD_d = block_available_upleft  ? ((ref == -1) ? all_bwmincost[((img->pix_x+mb_x)>>2) -1][((img->pix_y+mb_y)>>2) -1][0][blocktype][0] : all_mincost[((img->pix_x+mb_x)>>2) -1][((img->pix_y+mb_y)>>2) -1][ref_frame][blocktype][0]) : 0; 
    SAD_c = block_available_upright ? ((ref == -1) ? all_bwmincost[((img->pix_x+mb_x)>>2) +1][((img->pix_y+mb_y)>>2) -1][0][blocktype][0] : all_mincost[((img->pix_x+mb_x)>>2) +1][((img->pix_y+mb_y)>>2) -1][ref_frame][blocktype][0]) : SAD_d; 
 
    switch (mvPredType) 
    { 
    case MVPRED_MEDIAN: 
      if(hv == 1){ 
          // jlzheng 7.2 
          // !! for A  
           mva[2] = abs(mva[0] - mvb[0])	+ abs(mva[1] - mvb[1]);  
          // !! for B 
    	   mvb[2] = abs(mvb[0] - mvc[0])        + abs(mvb[1] - mvc[1]); 
          // !! for C  
	   mvc[2] = abs(mvc[0] - mva[0])	+ abs(mvc[1] - mva[1]) ; 
           
          pred_vec = MEDIAN(mva[2],mvb[2],mvc[2]); 
           
          if(pred_vec == mva[2]){ 
            pmv[0] = mvc[0]; 
            pmv[1] = mvc[1]; 
            temp_pred_SAD[1] = temp_pred_SAD[0] = SAD_a; 
          } 
          else if(pred_vec == mvb[2]){ 
            pmv[0] = mva[0]; 
            pmv[1] = mva[1]; 
            temp_pred_SAD[1] = temp_pred_SAD[0] = SAD_b; 
          } 
          else{ 
            pmv[0] = mvb[0]; 
            pmv[1] = mvb[1]; 
            temp_pred_SAD[1] = temp_pred_SAD[0] = SAD_c; 
          }   //END 
      } 
  
      break; 
       
    case MVPRED_L: 
      	 pred_vec = mv_a; 
//FAST MOTION ESTIMATION. ZHIBO CHEN 2003.3 
	 temp_pred_SAD[hv] = SAD_a; 
      break; 
    case MVPRED_U: 
      	 pred_vec = mv_b; 
//FAST MOTION ESTIMATION. ZHIBO CHEN 2003.3 
	 temp_pred_SAD[hv] = SAD_b; 
      break; 
    case MVPRED_UR: 
      	 pred_vec = mv_c; 
//FAST MOTION ESTIMATION. ZHIBO CHEN 2003.3 
	 temp_pred_SAD[hv] = SAD_c; 
      break; 
    default: 
      break; 
    } 
	if(mvPredType != MVPRED_MEDIAN) 
	 pmv[hv] = pred_vec; 
  } 
//FAST MOTION ESTIMATION. ZHIBO CHEN 2003.3 
  pred_SAD_space = temp_pred_SAD[0]>temp_pred_SAD[1]?temp_pred_SAD[1]:temp_pred_SAD[0]; 
#undef MEDIAN 
} 
 
 
FME_BlockMotionSearch (int       ref,           // <--  reference frame (0... or -1 (backward)) 
                   int       pic_pix_x,     // <--  absolute x-coordinate of regarded AxB block 
                   int       pic_pix_y,     // <--  absolute y-coordinate of regarded AxB block 
                   int       blocktype,     // <--  block type (1-16x16 ... 7-4x4) 
                   int       search_range,  // <--  1-d search range for integer-position search 
                   double    lambda         // <--  lagrangian parameter for determining motion cost 
                   ) 
{ 
  static pel_t   orig_val [256]; 
  static pel_t  *orig_pic  [16] = {orig_val,     orig_val+ 16, orig_val+ 32, orig_val+ 48, 
                                   orig_val+ 64, orig_val+ 80, orig_val+ 96, orig_val+112, 
                                   orig_val+128, orig_val+144, orig_val+160, orig_val+176, 
                                   orig_val+192, orig_val+208, orig_val+224, orig_val+240}; 
 
  int       pred_mv_x, pred_mv_y, mv_x, mv_y, i, j; 
 
  int       max_value = (1<<20); 
  int       min_mcost = max_value; 
  int       mb_x      = pic_pix_x-img->pix_x; 
  int       mb_y      = pic_pix_y-img->pix_y; 
  int       bsx       = input->blc_size[blocktype][0]; 
  int       bsy       = input->blc_size[blocktype][1]; 
  int       refframe  = (ref==-1 ? 0 : ref); 
  int*      pred_mv; 
  int**     ref_array = ((img->type!=B_IMG /*&& img->type!=BS_IMG*/) ? refFrArr : ref>=0 ? fw_refFrArr : bw_refFrArr); 
  int***    mv_array  = ((img->type!=B_IMG /*&& img->type!=BS_IMG*/) ? tmp_mv   : ref>=0 ? tmp_fwMV    : tmp_bwMV); 
  int*****  all_bmv   = img->all_bmv; 
  int*****  all_mv    = (ref<0 ? img->all_bmv : img->all_mv);/*lgp*dct*modify*/ 
  byte**    imgY_org_pic = imgY_org; 
 
//FAST MOTION ESTIMATION. ZHIBO CHEN 2003.3 
  int       N_Bframe = input->successive_Bframe, n_Bframe =(N_Bframe) ? ((Bframe_ctr%N_Bframe) ? Bframe_ctr%N_Bframe : N_Bframe) : 0 ; 
 
  int       incr_y=1,off_y=0;/*lgp*/ 
  int       b8_x          = (mb_x>>3);/*lgp*/ 
  int       b8_y          = (mb_y>>3);/*lgp*/ 
  int       center_x = pic_pix_x;/*lgp*/ 
  int       center_y = pic_pix_y;/*lgp*/ 
   int MaxMVHRange,MaxMVVRange; 
#ifdef TimerCal 
  LARGE_INTEGER litmp;  
  LONGLONG QPart1,QPart2;  
  double dfMinus, dfFreq, dfTim;  
   
  QueryPerformanceFrequency(&litmp);  
  dfFreq = (double)litmp.QuadPart;  
#endif 
 
  if (!img->picture_structure) // field coding 
  { 
     if (img->type==B_IMG) 
	  refframe = ref<0 ? ref+2 : ref;  // <--forward  backward--> 
		                                              // 1  | 0 | -2 | -1  Yulj 2004.07.15 
  } 
   
   pred_mv = ((img->type!=B_IMG /*&& img->type!=BS_IMG*/) ? img->mv  : ref>=0 ? img->p_fwMV : img->p_bwMV)[mb_x>>3][mb_y>>3][refframe][blocktype]; 
 
  //================================== 
  //=====   GET ORIGINAL BLOCK   ===== 
  //================================== 
  for (j = 0; j < bsy; j++) 
  { 
    for (i = 0; i < bsx; i++) 
    { 
      orig_pic[j][i] = imgY_org_pic[pic_pix_y+/*j*/incr_y*j+off_y/*lgp*/][pic_pix_x+i]; 
    } 
  } 
 
//FAST MOTION ESTIMATION. ZHIBO CHEN 2003.3 
  if(blocktype>6) 
  { 
	  pred_MV_uplayer[0] = all_mv[b8_x][b8_y][refframe][5][0]; 
	  pred_MV_uplayer[1] = all_mv[b8_x][b8_y][refframe][5][1]; 
	  pred_SAD_uplayer    = (ref == -1) ? (all_bwmincost[(img->pix_x>>2)+b8_x][(img->pix_y>>2)+b8_y][0][5][0]) : (all_mincost[(img->pix_x>>2)+b8_x][(img->pix_y>>2)+b8_y][refframe][5][0]); 
	  pred_SAD_uplayer   /= 2;  
	   
  } 
  else if(blocktype>4) 
  { 
	  pred_MV_uplayer[0] = all_mv[b8_x][b8_y][refframe][4][0]; 
	  pred_MV_uplayer[1] = all_mv[b8_x][b8_y][refframe][4][1]; 
	  pred_SAD_uplayer    = (ref == -1) ? (all_bwmincost[(img->pix_x>>2)+b8_x][(img->pix_y>>2)+b8_y][0][4][0]) : (all_mincost[(img->pix_x>>2)+b8_x][(img->pix_y>>2)+b8_y][refframe][4][0]); 
	  pred_SAD_uplayer   /= 2;  
	   
  } 
  else if(blocktype == 4) 
  { 
	  pred_MV_uplayer[0] = all_mv[b8_x][b8_y][refframe][2][0]; 
	  pred_MV_uplayer[1] = all_mv[b8_x][b8_y][refframe][2][1]; 
	  pred_SAD_uplayer    = (ref == -1) ? (all_bwmincost[(img->pix_x>>2)+b8_x][(img->pix_y>>2)+b8_y][0][2][0]) : (all_mincost[(img->pix_x>>2)+b8_x][(img->pix_y>>2)+b8_y][refframe][2][0]); 
	  pred_SAD_uplayer   /= 2;  
  } 
  else if(blocktype > 1) 
  { 
	  pred_MV_uplayer[0] = all_mv[b8_x][b8_y][refframe][1][0]; 
	  pred_MV_uplayer[1] = all_mv[b8_x][b8_y][refframe][1][1]; 
	  pred_SAD_uplayer    = (ref == -1) ? (all_bwmincost[(img->pix_x>>2)+b8_x][(img->pix_y>>2)+b8_y][0][1][0]) : (all_mincost[(img->pix_x>>2)+b8_x][(img->pix_y>>2)+b8_y][refframe][1][0]); 
	  pred_SAD_uplayer   /= 2;  
  } 
   
 
  pred_SAD_uplayer = flag_intra_SAD ? 0 : pred_SAD_uplayer;// for irregular motion 
  
  //coordinate prediction 
  if (img->number > refframe+1) 
  { 
	  pred_SAD_time = all_mincost[(img->pix_x>>2)+b8_x][(img->pix_y>>2)+b8_y][refframe][blocktype][0]; 
	  pred_MV_time[0] = all_mincost[(img->pix_x>>2)+b8_x][(img->pix_y>>2)+b8_y][refframe][blocktype][1]; 
	  pred_MV_time[1] = all_mincost[(img->pix_x>>2)+b8_x][(img->pix_y>>2)+b8_y][refframe][blocktype][2]; 
  } 
  if (ref == -1 && Bframe_ctr > 1) 
  { 
	  pred_SAD_time = all_bwmincost[(img->pix_x>>2)+b8_x][(img->pix_y>>2)+b8_y][refframe][blocktype][0]; 
	  pred_MV_time[0] = (int)(all_bwmincost[(img->pix_x>>2)+b8_x][(img->pix_y>>2)+b8_y][0][blocktype][1] * ((n_Bframe==1) ? (N_Bframe) : (N_Bframe-n_Bframe+1.0)/(N_Bframe-n_Bframe+2.0)) );//should add a factor 
	  pred_MV_time[1] = (int)(all_bwmincost[(img->pix_x>>2)+b8_x][(img->pix_y>>2)+b8_y][0][blocktype][2] *((n_Bframe==1) ? (N_Bframe) : (N_Bframe-n_Bframe+1.0)/(N_Bframe-n_Bframe+2.0)) );//should add a factor 
  } 
 
  { 
	  if (refframe > 0) 
	  {//field_mode top_field 
		  pred_SAD_ref = all_mincost[(img->pix_x>>2)+b8_x][(img->pix_y>>2)+b8_y][(refframe-1)][blocktype][0]; 
		  pred_SAD_ref = flag_intra_SAD ? 0 : pred_SAD_ref;//add this for irregular motion 
		  pred_MV_ref[0] = all_mincost[(img->pix_x>>2)+b8_x][(img->pix_y>>2)+b8_y][(refframe-1)][blocktype][1]; 
		  pred_MV_ref[0] = (int)(pred_MV_ref[0]*(refframe+1)/(float)(refframe)); 
		  pred_MV_ref[1] = all_mincost[(img->pix_x>>2)+b8_x][(img->pix_y>>2)+b8_y][(refframe-1)][blocktype][2]; 
		  pred_MV_ref[1] = (int)(pred_MV_ref[1]*(refframe+1)/(float)(refframe)); 
	  } 
	  if (img->type == B_IMG && ref == 0) 
	  { 
		  pred_SAD_ref = all_bwmincost[(img->pix_x>>2)+b8_x][(img->pix_y>>2)+b8_y][0][blocktype][0]; 
		  pred_SAD_ref = flag_intra_SAD ? 0 : pred_SAD_ref;//add this for irregular motion 
		  pred_MV_ref[0] =(int) (all_bwmincost[(img->pix_x>>2)+b8_x][(img->pix_y>>2)+b8_y][0][blocktype][1]*(-n_Bframe)/(N_Bframe-n_Bframe+1.0f)); //should add a factor 
		  pred_MV_ref[1] =(int) ( all_bwmincost[(img->pix_x>>2)+b8_x][(img->pix_y>>2)+b8_y][0][blocktype][2]*(-n_Bframe)/(N_Bframe-n_Bframe+1.0f));  
	  } 
  } 
 
 
  //=========================================== 
  //=====   GET MOTION VECTOR PREDICTOR   ===== 
  //=========================================== 
  FME_SetMotionVectorPredictor (pred_mv, ref_array, mv_array, refframe, mb_x, mb_y, bsx, bsy, blocktype, ref); 
  pred_mv_x = pred_mv[0]; 
  pred_mv_y = pred_mv[1]; 
 
#ifdef TimerCal 
  QueryPerformanceCounter(&litmp);  
  QPart1 = litmp.QuadPart;  
#endif 
  //================================== 
  //=====   INTEGER-PEL SEARCH   ===== 
  //================================== 
 
//FAST MOTION ESTIMATION. ZHIBO CHEN 2003.3 
  mv_x = pred_mv_x / 4; 
  mv_y = pred_mv_y / 4; 
  if (!input->rdopt) 
  { 
    //--- adjust search center so that the (0,0)-vector is inside --- 
    mv_x = max (-search_range, min (search_range, mv_x)); 
    mv_y = max (-search_range, min (search_range, mv_y)); 
  } 
 
  min_mcost = FastIntegerPelBlockMotionSearch(orig_pic, ref, center_x/*lgp*/, center_y/*lgp*/,/*pic_pix_x, pic_pix_y,*/ blocktype, 
           pred_mv_x, pred_mv_y, &mv_x, &mv_y, search_range, 
           min_mcost, lambda); 
 
 
 
//FAST MOTION ESTIMATION. ZHIBO CHEN 2003.3 
  for (i=0; i < (bsx>>2); i++) 
  { 
	  for (j=0; j < (bsy>>2); j++) 
	  { 
		  if (ref > -1) 
			  all_mincost[(img->pix_x>>2)+b8_x+i][(img->pix_y>>2)+b8_y+j][refframe][blocktype][0] = min_mcost; 
		  else 
			  all_bwmincost[(img->pix_x>>2)+b8_x+i][(img->pix_y>>2)+b8_y+j][0][blocktype][0] = min_mcost; 
	  } 
  } 
#ifdef TimerCal   
  QueryPerformanceCounter(&litmp);  
  QPart2 = litmp.QuadPart;  
  dfMinus = (double)(QPart2 - QPart1);  
  dfTim = dfMinus / dfFreq;  
  integer_time=integer_time + dfTim;//tmp_time; 
#endif 
 
 //============================== 
  //=====   SUB-PEL SEARCH   ===== 
  //============================== 
  if (input->hadamard) 
  { 
    min_mcost = max_value; 
  } 
  
#ifdef TimerCal  
  QueryPerformanceCounter(&litmp);  
  QPart1 = litmp.QuadPart;  
#endif 
 
 
if(blocktype >3) 
{ 
  min_mcost =  FastSubPelBlockMotionSearch (orig_pic, ref, center_x, center_y, blocktype, 
                                        pred_mv_x, pred_mv_y, &mv_x, &mv_y, 9, 9, 
                                        min_mcost, lambda, 0); 
} 
else 
{ 
  min_mcost =  SubPelBlockMotionSearch (orig_pic, ref, center_x/*lgp*/, center_y/*lgp*/,/*pic_pix_x, pic_pix_y,*/ blocktype, 
                                        pred_mv_x, pred_mv_y, &mv_x, &mv_y, 9, 9, 
                                        min_mcost, lambda); 
} 
 
 
  for (i=0; i < (bsx>>2); i++) 
  { 
	  for (j=0; j < (bsy>>2); j++) 
	  { 
		  if (ref > -1) 
		  { 
			  all_mincost[(img->pix_x>>2)+b8_x+i][(img->pix_y>>2)+b8_y+j][refframe][blocktype][1] = mv_x; 
			  all_mincost[(img->pix_x>>2)+b8_x+i][(img->pix_y>>2)+b8_y+j][refframe][blocktype][2] = mv_y; 
		  } 
		  else 
		  { 
			  all_bwmincost[(img->pix_x>>2)+b8_x+i][(img->pix_y>>2)+b8_y+j][0][blocktype][1] = mv_x; 
			  all_bwmincost[(img->pix_x>>2)+b8_x+i][(img->pix_y>>2)+b8_y+j][0][blocktype][2] = mv_y; 
		  } 
	  } 
  } 
 
#ifdef TimerCal 
  QueryPerformanceCounter(&litmp);  
  QPart2 = litmp.QuadPart;  
  dfMinus = (double)(QPart2 - QPart1);  
  dfTim = dfMinus / dfFreq;  
  fractional_time=fractional_time + dfTim; 
#endif 
 
  if (!input->rdopt) 
  { 
    // Get the skip mode cost 
    if (blocktype == 1 && img->type == INTER_IMG) 
    { 
      int cost; 
 
      FindSkipModeMotionVector (); 
 
      cost  = GetSkipCostMB (lambda); 
      cost -= (int)floor(8*lambda+0.4999); 
 
      if (cost < min_mcost) 
      { 
        min_mcost = cost; 
        mv_x      = img->all_mv [0][0][0][0][0]; 
        mv_y      = img->all_mv [0][0][0][0][1]; 
      } 
    } 
  } 
    MaxMVHRange= MAX_H_SEARCH_RANGE;   //add by wuzhongmou  
  MaxMVVRange= MAX_V_SEARCH_RANGE; 
  	if(!img->picture_structure) //field coding 
	{ 
		MaxMVVRange=MAX_V_SEARCH_RANGE_FIELD; 
	} 
	if(mv_x < -MaxMVHRange ) 
     mv_x = -MaxMVHRange; 
	if( mv_x> MaxMVHRange-1) 
		mv_x=MaxMVHRange-1; 
   if( mv_y < -MaxMVVRange) 
         mv_y = -MaxMVVRange; 
	if( mv_y > MaxMVVRange-1) 
		 mv_y = MaxMVVRange-1;        // add by wuzhongmou 
  //=============================================== 
  //=====   SET MV'S AND RETURN MOTION COST   ===== 
  //=============================================== 
  /*lgp*/ 
  for (i=0; i < (bsx>>3); i++) 
  { 
    for (j=0; j < (bsy>>3); j++) 
    { 
      all_mv[b8_x+i][b8_y+j][refframe][blocktype][0] = mv_x; 
      all_mv[b8_x+i][b8_y+j][refframe][blocktype][1] = mv_y; 
    } 
  } 
 
   
  return min_mcost; 
} 
 
#ifdef WIN32 
_inline int PartCalMad(pel_t *ref_pic,pel_t** orig_pic,pel_t *(*get_ref_line)(int, pel_t*, int, int), int blocksize_y,int blocksize_x, int blocksize_x4,int mcost,int min_mcost,int cand_x,int cand_y) 
#else 
inline int PartCalMad(pel_t *ref_pic,pel_t** orig_pic,pel_t *(*get_ref_line)(int, pel_t*, int, int), int blocksize_y,int blocksize_x, int blocksize_x4,int mcost,int min_mcost,int cand_x,int cand_y) 
#endif 
{ 
	int y,x4; 
	pel_t *orig_line, *ref_line; 
	for (y=0; y= min_mcost) 
		{ 
			break; 
		} 
    } 
    return mcost; 
} 
 
 
/* 
************************************************************************* 
* Function: FastIntegerPelBlockMotionSearch: fast pixel block motion search  
      this algrithm is called UMHexagonS(see JVT-D016),which includes  
      four steps with different kinds of search patterns 
* Input: 
    pel_t**   orig_pic,     // <--  original picture 
    int       ref,          // <--  reference frame (0... or -1 (backward)) 
    int       pic_pix_x,    // <--  absolute x-coordinate of regarded AxB block 
    int       pic_pix_y,    // <--  absolute y-coordinate of regarded AxB block 
	int       blocktype,    // <--  block type (1-16x16 ... 7-4x4) 
	int       pred_mv_x,    // <--  motion vector predictor (x) in sub-pel units 
	int       pred_mv_y,    // <--  motion vector predictor (y) in sub-pel units 
	int*      mv_x,         //  --> motion vector (x) - in pel units 
	int*      mv_y,         //  --> motion vector (y) - in pel units 
	int       search_range, // <--  1-d search range in pel units                          
	int       min_mcost,    // <--  minimum motion cost (cost for center or huge value) 
	double    lambda        // <--  lagrangian parameter for determining motion cost 
* Output: 
* Return:  
* Attention: in this function, three macro definitions is gives, 
	 EARLY_TERMINATION: early termination algrithm, refer to JVT-D016.doc 
	 SEARCH_ONE_PIXEL: search one pixel in search range 
	 SEARCH_ONE_PIXEL1(value_iAbort): search one pixel in search range, 
                                  but give a parameter to show if mincost refeshed 
************************************************************************* 
*/ 
 
int                                     //  ==> minimum motion cost after search 
FastIntegerPelBlockMotionSearch  (pel_t**   orig_pic,     // <--  not used 
								  int       ref,          // <--  reference frame (0... or -1 (backward)) 
								  int       pic_pix_x,    // <--  absolute x-coordinate of regarded AxB block 
								  int       pic_pix_y,    // <--  absolute y-coordinate of regarded AxB block 
								  int       blocktype,    // <--  block type (1-16x16 ... 7-4x4) 
								  int       pred_mv_x,    // <--  motion vector predictor (x) in sub-pel units 
								  int       pred_mv_y,    // <--  motion vector predictor (y) in sub-pel units 
								  int*      mv_x,         //  --> motion vector (x) - in pel units 
								  int*      mv_y,         //  --> motion vector (y) - in pel units 
								  int       search_range, // <--  1-d search range in pel units                          
								  int       min_mcost,    // <--  minimum motion cost (cost for center or huge value) 
								  double    lambda)       // <--  lagrangian parameter for determining motion cost 
{ 
	static int Diamond_x[4] = {-1, 0, 1, 0}; 
	static int Diamond_y[4] = {0, 1, 0, -1}; 
	static int Hexagon_x[6] = {2, 1, -1, -2, -1, 1}; 
	static int Hexagon_y[6] = {0, -2, -2, 0,  2, 2}; 
	static int Big_Hexagon_x[16] = {0,-2, -4,-4,-4, -4, -4, -2,  0,  2,  4,  4, 4, 4, 4, 2}; 
	static int Big_Hexagon_y[16] = {4, 3, 2,  1, 0, -1, -2, -3, -4, -3, -2, -1, 0, 1, 2, 3}; 
 
	int   pos, cand_x, cand_y,  mcost; 
	pel_t *(*get_ref_line)(int, pel_t*, int, int); 
	pel_t*  ref_pic       = img->type==B_IMG? Refbuf11 [ref+(!img->picture_structure) +1] : Refbuf11[ref]; 
	int   best_pos      = 0;                                        // position with minimum motion cost 
	int   max_pos       = (2*search_range+1)*(2*search_range+1);    // number of search positions 
	int   lambda_factor = LAMBDA_FACTOR (lambda);                   // factor for determining lagragian motion cost 
	int   mvshift       = 2;                  // motion vector shift for getting sub-pel units 
	int   blocksize_y   = input->blc_size[blocktype][1];            // vertical block size 
	int   blocksize_x   = input->blc_size[blocktype][0];            // horizontal block size 
	int   blocksize_x4  = blocksize_x >> 2;                         // horizontal block size in 4-pel units 
	int   pred_x        = (pic_pix_x << mvshift) + pred_mv_x;       // predicted position x (in sub-pel units) 
	int   pred_y        = (pic_pix_y << mvshift) + pred_mv_y;       // predicted position y (in sub-pel units) 
	int   center_x      = pic_pix_x + *mv_x;                        // center position x (in pel units) 
	int   center_y      = pic_pix_y + *mv_y;                        // center position y (in pel units) 
	int    best_x, best_y; 
	int   check_for_00  = (blocktype==1 && !input->rdopt && img->type!=B_IMG && ref==0); 
	int   search_step,iYMinNow, iXMinNow; 
	int   i,m, iSADLayer;  
	int   iAbort; 
	float betaSec,betaThird; 
 
	int   height        = img->height;/*lgp*/ 
 
	//===== set function for getting reference picture lines ===== 
	if ((center_x > search_range) && (center_x < img->width -1-search_range-blocksize_x) && 
		(center_y > search_range) && (center_y < height-1-search_range-blocksize_y)   ) 
	{ 
		get_ref_line = FastLineX; 
	} 
	else 
	{ 
		get_ref_line = UMVLineX; 
	} 
	 
	memset(McostState[0],0,(2*search_range+1)*(2*search_range+1)*4); 
	 
   ///////////////////////////////////////////////////////////////	 
	if(ref>0)  
	{ 
		if(pred_SAD_ref!=0) 
		{ 
			betaSec = Bsize[blocktype]/(pred_SAD_ref*pred_SAD_ref)-AlphaSec[blocktype]; 
			betaThird = Bsize[blocktype]/(pred_SAD_ref*pred_SAD_ref)-AlphaThird[blocktype]; 
		} 
		else 
		{ 
			betaSec = 0; 
			betaThird = 0; 
		} 
	} 
	else  
	{ 
		if(blocktype==1) 
		{ 
			if(pred_SAD_space !=0) 
			{ 
				betaSec = Bsize[blocktype]/(pred_SAD_space*pred_SAD_space)-AlphaSec[blocktype]; 
				betaThird = Bsize[blocktype]/(pred_SAD_space*pred_SAD_space)-AlphaThird[blocktype]; 
			} 
			else 
			{ 
				betaSec = 0; 
				betaThird = 0; 
			} 
		} 
		else 
		{ 
			if(pred_SAD_uplayer !=0) 
			{ 
				betaSec = Bsize[blocktype]/(pred_SAD_uplayer*pred_SAD_uplayer)-AlphaSec[blocktype]; 
				betaThird = Bsize[blocktype]/(pred_SAD_uplayer*pred_SAD_uplayer)-AlphaThird[blocktype]; 
			} 
			else 
			{ 
				betaSec = 0; 
				betaThird = 0; 
			} 
		} 
	} 
	/*****************************/ 
 
	////////////search around the predictor and (0,0) 
	//check the center median predictor 
	cand_x = center_x ; 
	cand_y = center_y ; 
	mcost = MV_COST (lambda_factor, mvshift, cand_x, cand_y, pred_x, pred_y); 
	mcost = PartCalMad(ref_pic, orig_pic, get_ref_line,blocksize_y,blocksize_x,blocksize_x4,mcost,min_mcost,cand_x,cand_y); 
	McostState[search_range][search_range] = mcost; 
	if (mcost < min_mcost) 
	{ 
		min_mcost = mcost; 
		best_x = cand_x; 
		best_y = cand_y; 
	} 
 
	iXMinNow = best_x; 
	iYMinNow = best_y; 
	for (m = 0; m < 4; m++) 
	{		 
		cand_x = iXMinNow + Diamond_x[m]; 
		cand_y = iYMinNow + Diamond_y[m];    
		SEARCH_ONE_PIXEL 
	}  
 
	if(center_x != pic_pix_x || center_y != pic_pix_y) 
	{ 
		cand_x = pic_pix_x ; 
		cand_y = pic_pix_y ; 
		SEARCH_ONE_PIXEL 
 
		iXMinNow = best_x; 
		iYMinNow = best_y; 
		for (m = 0; m < 4; m++) 
		{		 
			cand_x = iXMinNow + Diamond_x[m]; 
			cand_y = iYMinNow + Diamond_y[m];    
			SEARCH_ONE_PIXEL 
		}  
	} 
	 
    if(blocktype>1) 
	{ 
		cand_x = pic_pix_x + (pred_MV_uplayer[0]/4); 
		cand_y = pic_pix_y + (pred_MV_uplayer[1]/4); 
		SEARCH_ONE_PIXEL 
		if ((min_mcost-pred_SAD_uplayer)number > 1 + ref && ref!=-1) || (ref == -1 && Bframe_ctr > 1)) 
	{ 
		cand_x = pic_pix_x + pred_MV_time[0]/4; 
		cand_y = pic_pix_y + pred_MV_time[1]/4; 
		SEARCH_ONE_PIXEL 
  } 
 
	//prediciton using mV of last ref moiton vector 
	if ((ref > 0) || (img->type == B_IMG && ref == 0)) 
	{ 
		cand_x = pic_pix_x + pred_MV_ref[0]/4; 
		cand_y = pic_pix_y + pred_MV_ref[1]/4; 
		SEARCH_ONE_PIXEL 
	} 
	//strengthen local search 
	iXMinNow = best_x; 
	iYMinNow = best_y; 
	for (m = 0; m < 4; m++) 
	{		 
		cand_x = iXMinNow + Diamond_x[m]; 
		cand_y = iYMinNow + Diamond_y[m];    
		SEARCH_ONE_PIXEL 
	}  
 
	//early termination algrithm, refer to JVT-D016 
    EARLY_TERMINATION 
	 
	if(blocktype>6) 
		goto sec_step; 
	else 
		goto first_step; 
	 
first_step: //Unsymmetrical-cross search  
	iXMinNow = best_x; 
	iYMinNow = best_y; 
	 
	for(i=1;i<=search_range/2;i++) 
	{ 
		search_step = 2*i - 1; 
		cand_x = iXMinNow + search_step; 
		cand_y = iYMinNow ; 
		SEARCH_ONE_PIXEL		 
		cand_x = iXMinNow - search_step; 
		cand_y = iYMinNow ; 
		SEARCH_ONE_PIXEL 
	} 
	 
	for(i=1;i<=search_range/4;i++) 
	{ 
		search_step = 2*i - 1; 
		cand_x = iXMinNow ; 
		cand_y = iYMinNow + search_step; 
		SEARCH_ONE_PIXEL 
		cand_x = iXMinNow ; 
		cand_y = iYMinNow - search_step; 
		SEARCH_ONE_PIXEL 
	} 
	//early termination algrithm, refer to JVT-D016 
    EARLY_TERMINATION 
	 
	iXMinNow = best_x; 
	iYMinNow = best_y; 
// Uneven Multi-Hexagon-grid Search	 
	for(pos=1;pos<25;pos++) 
	{ 
		cand_x = iXMinNow + spiral_search_x[pos]; 
		cand_y = iYMinNow + spiral_search_y[pos]; 
		SEARCH_ONE_PIXEL 
	} 
	//early termination algrithm, refer to JVT-D016 
    EARLY_TERMINATION 
	 
	for(i=1;i<=search_range/4; i++) 
	{ 
		iAbort = 0;    
		for (m = 0; m < 16; m++) 
		{ 
			cand_x = iXMinNow + Big_Hexagon_x[m]*i; 
			cand_y = iYMinNow + Big_Hexagon_y[m]*i;  
			SEARCH_ONE_PIXEL1(1) 
		} 
		if (iAbort) 
		{	 
			//early termination algrithm, refer to JVT-D016 
			EARLY_TERMINATION 
		} 
	} 
sec_step:  //Extended Hexagon-based Search 
		  iXMinNow = best_x; 
		  iYMinNow = best_y; 
		  for(i=0;ihadamard)) > min_mcost) 
          { 
            abort_search = 1; 
            break; 
          } 
        } 
        else  // copy diff to curr_diff for ABT SATD calculation 
        { 
          for (yy=y0,kk=0; yy minimum motion cost after search 
    FastSubPelBlockMotionSearch (pel_t**   orig_pic,      // <--  original pixel values for the AxB block 
    int       ref,           // <--  reference frame (0... or -1 (backward)) 
    int       pic_pix_x,     // <--  absolute x-coordinate of regarded AxB block 
    int       pic_pix_y,     // <--  absolute y-coordinate of regarded AxB block 
    int       blocktype,     // <--  block type (1-16x16 ... 7-4x4) 
    int       pred_mv_x,     // <--  motion vector predictor (x) in sub-pel units 
    int       pred_mv_y,     // <--  motion vector predictor (y) in sub-pel units 
    int*      mv_x,          // <--> in: search center (x) / out: motion vector (x) - in pel units 
    int*      mv_y,          // <--> in: search center (y) / out: motion vector (y) - in pel units 
    int       search_pos2,   // <--  search positions for    half-pel search  (default: 9) 
    int       search_pos4,   // <--  search positions for quarter-pel search  (default: 9) 
    int       min_mcost,     // <--  minimum motion cost (cost for center or huge value) 
    double    lambda, 
    int	useABT)        // <--  lagrangian parameter for determining motion cost 
  { 
    static int Diamond_x[4] = {-1, 0, 1, 0}; 
    static int Diamond_y[4] = {0, 1, 0, -1}; 
    int   mcost; 
    int   cand_mv_x, cand_mv_y; 
     
    int   incr            = ref==-1 ? ((!img->fld_type)&&(mref==mref_fld)&&(img->type==B_IMG)) : (mref==mref_fld)&&(img->type==B_IMG) ; 
    pel_t **ref_pic       = img->type==B_IMG? mref [ref+1+incr] : mref [ref]; 
     
    int   lambda_factor   = LAMBDA_FACTOR (lambda); 
    int   mv_shift        = 0; 
    int   check_position0 = (blocktype==1 && *mv_x==0 && *mv_y==0 && input->hadamard && !input->rdopt && img->type!=B_IMG && ref==0); 
    int   blocksize_x     = input->blc_size[blocktype][0]; 
    int   blocksize_y     = input->blc_size[blocktype][1]; 
    int   pic4_pix_x      = (pic_pix_x << 2); 
    int   pic4_pix_y      = (pic_pix_y << 2); 
    int   max_pos_x4      = ((img->width -blocksize_x+1)<<2); 
    int   max_pos_y4      = ((img->height-blocksize_y+1)<<2); 
     
    int   min_pos2        = (input->hadamard ? 0 : 1); 
    int   max_pos2        = (input->hadamard ? max(1,search_pos2) : search_pos2); 
    int   search_range_dynamic,iXMinNow,iYMinNow,i; 
    int   iSADLayer,m,currmv_x,currmv_y,iCurrSearchRange; 
    int   search_range = input->search_range; 
    int   pred_frac_mv_x,pred_frac_mv_y,abort_search; 
    int   mv_cost;  
     
    int   pred_frac_up_mv_x, pred_frac_up_mv_y; 
 
	if (!img->picture_structure) //field coding 
	{ 
		if (img->type==B_IMG) 
		{ 
			incr = 2; 
		} 
	}else 
	{ 
		if(img->type==B_IMG) 
			incr = 1; 
	} 
    ref_pic       = img->type==B_IMG? mref [ref+incr] : mref [ref]; 
     
    *mv_x <<= 2; 
    *mv_y <<= 2; 
    if ((pic4_pix_x + *mv_x > 1) && (pic4_pix_x + *mv_x < max_pos_x4 - 2) && 
      (pic4_pix_y + *mv_y > 1) && (pic4_pix_y + *mv_y < max_pos_y4 - 2)   ) 
    { 
      PelY_14 = FastPelY_14; 
    } 
    else 
    { 
      PelY_14 = UMVPelY_14; 
    } 
     
    search_range_dynamic = 3; 
    pred_frac_mv_x = (pred_mv_x - *mv_x)%4; 
    pred_frac_mv_y = (pred_mv_y - *mv_y)%4;  
     
    pred_frac_up_mv_x = (pred_MV_uplayer[0] - *mv_x)%4; 
    pred_frac_up_mv_y = (pred_MV_uplayer[1] - *mv_y)%4; 
     
     
    memset(SearchState[0],0,(2*search_range_dynamic+1)*(2*search_range_dynamic+1)); 
     
    if(input->hadamard) 
    { 
      cand_mv_x = *mv_x;     
      cand_mv_y = *mv_y;     
      mv_cost = MV_COST (lambda_factor, mv_shift, cand_mv_x, cand_mv_y, pred_mv_x, pred_mv_y);		 
      mcost = AddUpSADQuarter(pic_pix_x,pic_pix_y,blocksize_x,blocksize_y,cand_mv_x,cand_mv_y,ref_pic,orig_pic,mv_cost,min_mcost,useABT); 
      SearchState[search_range_dynamic][search_range_dynamic] = 1; 
      if (mcost < min_mcost) 
      { 
        min_mcost = mcost; 
        currmv_x = cand_mv_x; 
        currmv_y = cand_mv_y;	 
      } 
    } 
    else 
    { 
      SearchState[search_range_dynamic][search_range_dynamic] = 1; 
      currmv_x = *mv_x; 
      currmv_y = *mv_y;	 
    } 
     
    if(pred_frac_mv_x!=0 || pred_frac_mv_y!=0) 
    { 
      cand_mv_x = *mv_x + pred_frac_mv_x;     
      cand_mv_y = *mv_y + pred_frac_mv_y;     
      mv_cost = MV_COST (lambda_factor, mv_shift, cand_mv_x, cand_mv_y, pred_mv_x, pred_mv_y);		 
      mcost = AddUpSADQuarter(pic_pix_x,pic_pix_y,blocksize_x,blocksize_y,cand_mv_x,cand_mv_y,ref_pic,orig_pic,mv_cost,min_mcost,useABT); 
      SearchState[cand_mv_y -*mv_y + search_range_dynamic][cand_mv_x - *mv_x + search_range_dynamic] = 1; 
      if (mcost < min_mcost) 
      { 
        min_mcost = mcost; 
        currmv_x = cand_mv_x; 
        currmv_y = cand_mv_y;	 
      } 
    } 
     
      
    iXMinNow = currmv_x; 
    iYMinNow = currmv_y; 
    iCurrSearchRange = 2*search_range_dynamic+1;  
    for(i=0;itype != 0) 
  { 
	  if (img->pix_x == 0 && img->pix_y == 0) 
	  { 
		  flag_intra_SAD = 0; 
	  } 
	  else if (img->pix_x == 0) 
	  { 
		  flag_intra_SAD = flag_intra[(img->pix_x)>>4]; 
	  } 
	  else if (img->pix_y == 0) 
	  { 
		  flag_intra_SAD = flag_intra[((img->pix_x)>>4)-1]; 
	  } 
	  else  
	  { 
		  flag_intra_SAD = ((flag_intra[(img->pix_x)>>4])||(flag_intra[((img->pix_x)>>4)-1])||(flag_intra[((img->pix_x)>>4)+1])) ; 
	  } 
  } 
  return; 
} 
 
void skip_intrabk_SAD(int best_mode, int ref_max) 
{ 
	int i,j,k, ref; 
  if (img->number > 0)  
	  flag_intra[(img->pix_x)>>4] = (best_mode == 9 || best_mode == 10) ? 1:0; 
  if (img->type!=0  && (best_mode == 9 || best_mode == 10)) 
  { 
	  for (i=0; i < 4; i++) 
	  { 
		  for (j=0; j < 4; j++) 
		  { 
			  for (k=1; k < 8;k++) 
			  { 
				  for (ref=0; refpix_x>>2)+i][(img->pix_y>>2)+j][ref][k][0] = 0;    
				  } 
			  } 
		  } 
	  } 
   
  } 
  return; 
} 
 
 
int                                         //  ==> minimum motion cost after search 
FME_BlockMotionSearch_bid (int       ref,           // <--  reference frame (0... or -1 (backward)) 
                   int       pic_pix_x,     // <--  absolute x-coordinate of regarded AxB block 
                   int       pic_pix_y,     // <--  absolute y-coordinate of regarded AxB block 
                   int       blocktype,     // <--  block type (1-16x16 ... 7-4x4) 
                   int       search_range,  // <--  1-d search range for integer-position search 
                   double    lambda         // <--  lagrangian parameter for determining motion cost 
                   ) 
{ 
  static pel_t   orig_val [256]; 
  static pel_t  *orig_pic  [16] = {orig_val,     orig_val+ 16, orig_val+ 32, orig_val+ 48, 
                                   orig_val+ 64, orig_val+ 80, orig_val+ 96, orig_val+112, 
                                   orig_val+128, orig_val+144, orig_val+160, orig_val+176, 
                                   orig_val+192, orig_val+208, orig_val+224, orig_val+240}; 
 
  int       pred_mv_x, pred_mv_y, mv_x, mv_y, i, j; 
 
  int       max_value = (1<<20); 
  int       min_mcost = max_value; 
  int       mb_x      = pic_pix_x-img->pix_x; 
  int       mb_y      = pic_pix_y-img->pix_y; 
  int       bsx       = input->blc_size[blocktype][0]; 
  int       bsy       = input->blc_size[blocktype][1]; 
  int       refframe  = (ref==-1 ? 0 : ref); 
  int*      pred_mv; 
  int**     ref_array = ((img->type!=B_IMG /*&& img->type!=BS_IMG*/) ? refFrArr : ref>=0 ? fw_refFrArr : bw_refFrArr); 
  int***    mv_array  = ((img->type!=B_IMG /*&& img->type!=BS_IMG*/) ? tmp_mv   : ref>=0 ? tmp_fwMV    : tmp_bwMV); 
  int*****  all_bmv   = img->all_bmv; 
  int*****  all_mv    = (ref==-1 ? img->all_bmv : img->all_omv);//lgp 
 
  byte**    imgY_org_pic = imgY_org; 
 
//FAST MOTION ESTIMATION. ZHIBO CHEN 2003.3 
  int       N_Bframe = input->successive_Bframe, n_Bframe =(N_Bframe) ? ((Bframe_ctr%N_Bframe) ? Bframe_ctr%N_Bframe : N_Bframe) : 0 ; 
 
	int       incr_y=1,off_y=0;/*lgp*/ 
  int       b8_x          = (mb_x>>3);/*lgp*/ 
  int       b8_y          = (mb_y>>3);/*lgp*/ 
  int       center_x = pic_pix_x;/*lgp*/ 
  int       center_y = pic_pix_y;/*lgp*/ 
  int MaxMVHRange,MaxMVVRange; 
#ifdef TimerCal 
  LARGE_INTEGER litmp;  
  LONGLONG QPart1,QPart2;  
  double dfMinus, dfFreq, dfTim;  
   
  QueryPerformanceFrequency(&litmp);  
  dfFreq = (double)litmp.QuadPart;  
#endif 
  
  if (!img->picture_structure) // field coding 
  { 
	  if (img->type==B_IMG) 
		  refframe = ref<0 ? ref+2 : ref;  // Yulj 2004.07.15 
  } 
 
	pred_mv = ((img->type!=B_IMG) ? img->mv  : ref>=0 ? img->omv/*img->p_fwMV*/ : img->p_bwMV)[mb_x>>3][mb_y>>3][refframe][blocktype]; 
 
 
  //================================== 
  //=====   GET ORIGINAL BLOCK   ===== 
  //================================== 
  for (j = 0; j < bsy; j++) 
  { 
    for (i = 0; i < bsx; i++) 
    { 
      orig_pic[j][i] = imgY_org_pic[pic_pix_y+/*j*/incr_y*j+off_y/*lgp*/][pic_pix_x+i]; 
//      orig_pic[j][i] = imgY_org_pic[pic_pix_y+j][pic_pix_x+i]; 
    } 
  } 
 
//FAST MOTION ESTIMATION. ZHIBO CHEN 2003.3 
  if(blocktype>6) 
  { 
	  pred_MV_uplayer[0] = all_mv[b8_x][b8_y][refframe][5][0]; 
	  pred_MV_uplayer[1] = all_mv[b8_x][b8_y][refframe][5][1]; 
	  pred_SAD_uplayer    = (ref == -1) ? (all_bwmincost[(img->pix_x>>2)+b8_x][(img->pix_y>>2)+b8_y][0][5][0]) : (all_mincost[(img->pix_x>>2)+b8_x][(img->pix_y>>2)+b8_y][refframe][5][0]); 
	  pred_SAD_uplayer   /= 2;  
	   
  } 
  else if(blocktype>4) 
  { 
	  pred_MV_uplayer[0] = all_mv[b8_x][b8_y][refframe][4][0]; 
	  pred_MV_uplayer[1] = all_mv[b8_x][b8_y][refframe][4][1]; 
	  pred_SAD_uplayer    = (ref == -1) ? (all_bwmincost[(img->pix_x>>2)+b8_x][(img->pix_y>>2)+b8_y][0][4][0]) : (all_mincost[(img->pix_x>>2)+b8_x][(img->pix_y>>2)+b8_y][refframe][4][0]); 
	  pred_SAD_uplayer   /= 2;  
	   
  } 
  else if(blocktype == 4) 
  { 
	  pred_MV_uplayer[0] = all_mv[b8_x][b8_y][refframe][2][0]; 
	  pred_MV_uplayer[1] = all_mv[b8_x][b8_y][refframe][2][1]; 
	  pred_SAD_uplayer    = (ref == -1) ? (all_bwmincost[(img->pix_x>>2)+b8_x][(img->pix_y>>2)+b8_y][0][2][0]) : (all_mincost[(img->pix_x>>2)+b8_x][(img->pix_y>>2)+b8_y][refframe][2][0]); 
	  pred_SAD_uplayer   /= 2;  
  } 
  else if(blocktype > 1) 
  { 
	  pred_MV_uplayer[0] = all_mv[b8_x][b8_y][refframe][1][0]; 
	  pred_MV_uplayer[1] = all_mv[b8_x][b8_y][refframe][1][1]; 
	  pred_SAD_uplayer    = (ref == -1) ? (all_bwmincost[(img->pix_x>>2)+b8_x][(img->pix_y>>2)+b8_y][0][1][0]) : (all_mincost[(img->pix_x>>2)+b8_x][(img->pix_y>>2)+b8_y][refframe][1][0]); 
	  pred_SAD_uplayer   /= 2;  
  } 
   
  pred_SAD_uplayer = flag_intra_SAD ? 0 : pred_SAD_uplayer;// for irregular motion 
  
  //coordinate prediction 
  if (img->number > refframe+1) 
  { 
	  pred_SAD_time = all_mincost[(img->pix_x>>2)+b8_x][(img->pix_y>>2)+b8_y][refframe][blocktype][0]; 
	  pred_MV_time[0] = all_mincost[(img->pix_x>>2)+b8_x][(img->pix_y>>2)+b8_y][refframe][blocktype][1]; 
	  pred_MV_time[1] = all_mincost[(img->pix_x>>2)+b8_x][(img->pix_y>>2)+b8_y][refframe][blocktype][2]; 
  } 
  if (ref == -1 && Bframe_ctr > 1) 
  { 
	  pred_SAD_time = all_bwmincost[(img->pix_x>>2)+b8_x][(img->pix_y>>2)+b8_y][refframe][blocktype][0]; 
	  pred_MV_time[0] = (int)(all_bwmincost[(img->pix_x>>2)+b8_x][(img->pix_y>>2)+b8_y][0][blocktype][1] * ((n_Bframe==1) ? (N_Bframe) : (N_Bframe-n_Bframe+1.0)/(N_Bframe-n_Bframe+2.0)) );//should add a factor 
	  pred_MV_time[1] = (int)(all_bwmincost[(img->pix_x>>2)+b8_x][(img->pix_y>>2)+b8_y][0][blocktype][2] *((n_Bframe==1) ? (N_Bframe) : (N_Bframe-n_Bframe+1.0)/(N_Bframe-n_Bframe+2.0)) );//should add a factor 
  } 
 
   { 
	  if (refframe > 0) 
	  {//field_mode top_field 
		  pred_SAD_ref = all_mincost[(img->pix_x>>2)+b8_x][(img->pix_y>>2)+b8_y][(refframe-1)][blocktype][0]; 
		  pred_SAD_ref = flag_intra_SAD ? 0 : pred_SAD_ref;//add this for irregular motion 
		  pred_MV_ref[0] = all_mincost[(img->pix_x>>2)+b8_x][(img->pix_y>>2)+b8_y][(refframe-1)][blocktype][1]; 
		  pred_MV_ref[0] = (int)(pred_MV_ref[0]*(refframe+1)/(float)(refframe)); 
		  pred_MV_ref[1] = all_mincost[(img->pix_x>>2)+b8_x][(img->pix_y>>2)+b8_y][(refframe-1)][blocktype][2]; 
		  pred_MV_ref[1] = (int)(pred_MV_ref[1]*(refframe+1)/(float)(refframe)); 
	  } 
	  if (img->type == B_IMG && ref == 0) 
	  { 
		  pred_SAD_ref = all_bwmincost[(img->pix_x>>2)+b8_x][(img->pix_y>>2)+b8_y][0][blocktype][0]; 
		  pred_SAD_ref = flag_intra_SAD ? 0 : pred_SAD_ref;//add this for irregular motion 
		  pred_MV_ref[0] =(int) (all_bwmincost[(img->pix_x>>2)+b8_x][(img->pix_y>>2)+b8_y][0][blocktype][1]*(-n_Bframe)/(N_Bframe-n_Bframe+1.0f)); //should add a factor 
		  pred_MV_ref[1] =(int) ( all_bwmincost[(img->pix_x>>2)+b8_x][(img->pix_y>>2)+b8_y][0][blocktype][2]*(-n_Bframe)/(N_Bframe-n_Bframe+1.0f));  
	  } 
  } 
 
 
  //=========================================== 
  //=====   GET MOTION VECTOR PREDICTOR   ===== 
  //=========================================== 
  FME_SetMotionVectorPredictor (pred_mv, ref_array, mv_array, refframe, mb_x, mb_y, bsx, bsy, blocktype, ref); 
  pred_mv_x = pred_mv[0]; 
  pred_mv_y = pred_mv[1]; 
 
#ifdef TimerCal 
  QueryPerformanceCounter(&litmp);  
  QPart1 = litmp.QuadPart;  
#endif 
  //================================== 
  //=====   INTEGER-PEL SEARCH   ===== 
  //================================== 
 
//FAST MOTION ESTIMATION. ZHIBO CHEN 2003.3 
  mv_x = pred_mv_x / 4; 
  mv_y = pred_mv_y / 4; 
  if (!input->rdopt) 
  { 
    //--- adjust search center so that the (0,0)-vector is inside --- 
    mv_x = max (-search_range, min (search_range, mv_x)); 
    mv_y = max (-search_range, min (search_range, mv_y)); 
  } 
 
  min_mcost = FastIntegerPelBlockMotionSearch(orig_pic, ref, center_x/*lgp*/, center_y/*lgp*/,/*pic_pix_x, pic_pix_y,*/ blocktype, 
                                            pred_mv_x, pred_mv_y, &mv_x, &mv_y, search_range, 
                                            min_mcost, lambda); 
 
 
 
//FAST MOTION ESTIMATION. ZHIBO CHEN 2003.3 
  for (i=0; i < (bsx>>2); i++) 
  { 
	  for (j=0; j < (bsy>>2); j++) 
	  { 
		  if (ref > -1) 
			  all_mincost[(img->pix_x>>2)+b8_x+i][(img->pix_y>>2)+b8_y+j][refframe][blocktype][0] = min_mcost; 
		  else 
			  all_bwmincost[(img->pix_x>>2)+b8_x+i][(img->pix_y>>2)+b8_y+j][0][blocktype][0] = min_mcost; 
	  } 
  } 
#ifdef TimerCal   
  QueryPerformanceCounter(&litmp);  
  QPart2 = litmp.QuadPart;  
  dfMinus = (double)(QPart2 - QPart1);  
  dfTim = dfMinus / dfFreq;  
  integer_time=integer_time + dfTim;//tmp_time; 
#endif 
 
 //============================== 
  //=====   SUB-PEL SEARCH   ===== 
  //============================== 
  if (input->hadamard) 
  { 
    min_mcost = max_value; 
  } 
	 
#ifdef TimerCal  
  QueryPerformanceCounter(&litmp);  
  QPart1 = litmp.QuadPart;  
#endif 
	 
	if(blocktype >3) 
	{ 
		min_mcost =  FastSubPelBlockMotionSearch_bid (orig_pic, ref, center_x, center_y, blocktype, 
													pred_mv_x, pred_mv_y, &mv_x, &mv_y, 9, 9,min_mcost, lambda, 0); 
	} 
	else 
		min_mcost =  SubPelBlockMotionSearch_bid (orig_pic, ref, center_x/*lgp*/, center_y/*lgp*/, blocktype, 
												pred_mv_x, pred_mv_y, &mv_x, &mv_y, 9, 9, min_mcost, lambda); 
	 
  for (i=0; i < (bsx>>2); i++) 
  { 
	  for (j=0; j < (bsy>>2); j++) 
	  { 
		  if (ref > -1) 
		  { 
			  all_mincost[(img->pix_x>>2)+b8_x+i][(img->pix_y>>2)+b8_y+j][refframe][blocktype][1] = mv_x; 
			  all_mincost[(img->pix_x>>2)+b8_x+i][(img->pix_y>>2)+b8_y+j][refframe][blocktype][2] = mv_y; 
		  } 
		  else 
		  { 
			  all_bwmincost[(img->pix_x>>2)+b8_x+i][(img->pix_y>>2)+b8_y+j][0][blocktype][1] = mv_x; 
			  all_bwmincost[(img->pix_x>>2)+b8_x+i][(img->pix_y>>2)+b8_y+j][0][blocktype][2] = mv_y; 
		  } 
	  } 
  } 
 
#ifdef TimerCal 
  QueryPerformanceCounter(&litmp);  
  QPart2 = litmp.QuadPart;  
  dfMinus = (double)(QPart2 - QPart1);  
  dfTim = dfMinus / dfFreq;  
  fractional_time=fractional_time + dfTim; 
#endif 
 
  if (!input->rdopt) 
  { 
    // Get the skip mode cost 
    if (blocktype == 1 && img->type == INTER_IMG) 
    { 
      int cost; 
 
      FindSkipModeMotionVector (); 
 
      cost  = GetSkipCostMB (lambda); 
      cost -= (int)floor(8*lambda+0.4999); 
 
      if (cost < min_mcost) 
      { 
        min_mcost = cost; 
        mv_x      = img->all_mv [0][0][0][0][0]; 
        mv_y      = img->all_mv [0][0][0][0][1]; 
      } 
    } 
  } 
    MaxMVHRange= MAX_H_SEARCH_RANGE;   //add by wuzhongmou  
    MaxMVVRange= MAX_V_SEARCH_RANGE; 
  	if(!img->picture_structure) //field coding 
	{ 
			MaxMVVRange=MAX_V_SEARCH_RANGE_FIELD; 
		} 
		if(mv_x < -MaxMVHRange ) 
	      mv_x = -MaxMVHRange; 
		if( mv_x> MaxMVHRange-1) 
			mv_x=MaxMVHRange-1; 
	    if( mv_y < -MaxMVVRange) 
	         mv_y = -MaxMVVRange; 
		if( mv_y > MaxMVVRange-1) 
		 mv_y = MaxMVVRange-1;        // add by wuzhongmou 
  //=============================================== 
  //=====   SET MV'S AND RETURN MOTION COST   ===== 
  //=============================================== 
  /*lgp*/ 
  for (i=0; i < (bsx>>3); i++) 
  { 
    for (j=0; j < (bsy>>3); j++) 
    { 
      all_mv[b8_x+i][b8_y+j][refframe][blocktype][0] = mv_x; 
      all_mv[b8_x+i][b8_y+j][refframe][blocktype][1] = mv_y; 
    } 
  } 
 
  return min_mcost; 
} 
 
  int                                                   //  ==> minimum motion cost after search 
    FastSubPelBlockMotionSearch_bid (pel_t**   orig_pic,      // <--  original pixel values for the AxB block 
    int       ref,           // <--  reference frame (0... or -1 (backward)) 
    int       pic_pix_x,     // <--  absolute x-coordinate of regarded AxB block 
    int       pic_pix_y,     // <--  absolute y-coordinate of regarded AxB block 
    int       blocktype,     // <--  block type (1-16x16 ... 7-4x4) 
    int       pred_mv_x,     // <--  motion vector predictor (x) in sub-pel units 
    int       pred_mv_y,     // <--  motion vector predictor (y) in sub-pel units 
    int*      mv_x,          // <--> in: search center (x) / out: motion vector (x) - in pel units 
    int*      mv_y,          // <--> in: search center (y) / out: motion vector (y) - in pel units 
    int       search_pos2,   // <--  search positions for    half-pel search  (default: 9) 
    int       search_pos4,   // <--  search positions for quarter-pel search  (default: 9) 
    int       min_mcost,     // <--  minimum motion cost (cost for center or huge value) 
    double    lambda, 
    int	useABT)        // <--  lagrangian parameter for determining motion cost 
{ 
	static int Diamond_x[4] = {-1, 0, 1, 0}; 
	static int Diamond_y[4] = {0, 1, 0, -1}; 
    int   mcost; 
    int   cand_mv_x, cand_mv_y; 
     
    int   incr            = ref==-1 ? ((!img->fld_type)&&(mref==mref_fld)&&(img->type==B_IMG)) : (mref==mref_fld)&&(img->type==B_IMG) ; 
    pel_t **ref_pic       = img->type==B_IMG? mref [ref+1+incr] : mref [ref]; 
	pel_t **ref_pic_bid;//lgp 
     
    int   lambda_factor   = LAMBDA_FACTOR (lambda); 
    int   mv_shift        = 0; 
    int   check_position0 = (blocktype==1 && *mv_x==0 && *mv_y==0 && input->hadamard && !input->rdopt && img->type!=B_IMG && ref==0); 
    int   blocksize_x     = input->blc_size[blocktype][0]; 
    int   blocksize_y     = input->blc_size[blocktype][1]; 
    int   pic4_pix_x      = (pic_pix_x << 2); 
    int   pic4_pix_y      = (pic_pix_y << 2); 
    int   max_pos_x4      = ((img->width -blocksize_x+1)<<2); 
    int   max_pos_y4      = ((img->height-blocksize_y+1)<<2); 
     
    int   min_pos2        = (input->hadamard ? 0 : 1); 
    int   max_pos2        = (input->hadamard ? max(1,search_pos2) : search_pos2); 
    int   search_range_dynamic,iXMinNow,iYMinNow,i; 
    int   iSADLayer,m,currmv_x,currmv_y,iCurrSearchRange; 
    int   search_range = input->search_range; 
    int   pred_frac_mv_x,pred_frac_mv_y,abort_search; 
    int   mv_cost;  
    int   pred_frac_up_mv_x, pred_frac_up_mv_y; 
	int delta_P,TRp,DistanceIndexFw,DistanceIndexBw,refframe,delta_PB; 
 
    ref_pic_bid = img->type==B_IMG? mref [0] : mref [ref]; 
	//xyji   
	ref_pic_bid = img->type==B_IMG? mref [img->picture_structure ? 0 : ref/*3-(ref+incr)*/] : mref [ref]; 
 
	refframe = ref;// + incr; 
	delta_P = 2*(img->imgtr_next_P_frm - img->imgtr_last_P_frm); 
	if(img->picture_structure) 
		TRp = (refframe+1)*delta_P; 
	else 
		TRp = delta_P;//ref == 0 ? delta_P-1 : delta_P+1; 
	delta_PB = 2*(img->tr - img->imgtr_last_P_frm); 
	if(!img->picture_structure) 
	{ 
		if(img->current_mb_nr_fld < img->total_number_mb) //top field 
			DistanceIndexFw =  refframe == 0 ? delta_PB-1:delta_PB; 
		else 
			DistanceIndexFw =  refframe == 0 ? delta_PB:delta_PB+1; 
	} 
	else 
		DistanceIndexFw = delta_PB;	    
	DistanceIndexBw    = TRp - DistanceIndexFw; 
 
    *mv_x <<= 2; 
    *mv_y <<= 2; 
    if ((pic4_pix_x + *mv_x > 1) && (pic4_pix_x + *mv_x < max_pos_x4 - 2) && 
      (pic4_pix_y + *mv_y > 1) && (pic4_pix_y + *mv_y < max_pos_y4 - 2)   ) 
    { 
      PelY_14 = UMVPelY_14;//FastPelY_14;//lgp 
    } 
    else 
    { 
      PelY_14 = UMVPelY_14; 
    } 
     
    search_range_dynamic = 3; 
    pred_frac_mv_x = (pred_mv_x - *mv_x)%4; 
    pred_frac_mv_y = (pred_mv_y - *mv_y)%4;  
     
    pred_frac_up_mv_x = (pred_MV_uplayer[0] - *mv_x)%4; 
    pred_frac_up_mv_y = (pred_MV_uplayer[1] - *mv_y)%4; 
     
     
    memset(SearchState[0],0,(2*search_range_dynamic+1)*(2*search_range_dynamic+1)); 
     
    if(input->hadamard) 
    { 
      cand_mv_x = *mv_x;     
      cand_mv_y = *mv_y;     
      mv_cost = MV_COST (lambda_factor, mv_shift, cand_mv_x, cand_mv_y, pred_mv_x, pred_mv_y);		 
      mcost = AddUpSADQuarter_bid(pic_pix_x,pic_pix_y,blocksize_x,blocksize_y,cand_mv_x,cand_mv_y,ref_pic, 
									ref_pic_bid,orig_pic,mv_cost,min_mcost,useABT,DistanceIndexFw,DistanceIndexBw); 
      SearchState[search_range_dynamic][search_range_dynamic] = 1; 
      if (mcost < min_mcost) 
      { 
        min_mcost = mcost; 
        currmv_x = cand_mv_x; 
        currmv_y = cand_mv_y;	 
      } 
    } 
    else 
    { 
      SearchState[search_range_dynamic][search_range_dynamic] = 1; 
      currmv_x = *mv_x; 
      currmv_y = *mv_y;	 
    } 
     
    if(pred_frac_mv_x!=0 || pred_frac_mv_y!=0) 
    { 
      cand_mv_x = *mv_x + pred_frac_mv_x;     
      cand_mv_y = *mv_y + pred_frac_mv_y;     
      mv_cost = MV_COST (lambda_factor, mv_shift, cand_mv_x, cand_mv_y, pred_mv_x, pred_mv_y);		 
      mcost = AddUpSADQuarter_bid(pic_pix_x,pic_pix_y,blocksize_x,blocksize_y,cand_mv_x,cand_mv_y,ref_pic, 
									ref_pic_bid,orig_pic,mv_cost,min_mcost,useABT,DistanceIndexFw,DistanceIndexBw); 
      SearchState[cand_mv_y -*mv_y + search_range_dynamic][cand_mv_x - *mv_x + search_range_dynamic] = 1; 
      if (mcost < min_mcost) 
      { 
        min_mcost = mcost; 
        currmv_x = cand_mv_x; 
        currmv_y = cand_mv_y;	 
      } 
    } 
     
      
    iXMinNow = currmv_x; 
    iYMinNow = currmv_y; 
    iCurrSearchRange = 2*search_range_dynamic+1;  
    for(i=0;i>8); 
	  ry0_bid = ((pic_pix_y+y0)<<2) - ((cand_mv_y*DistanceIndexBw*(512/DistanceIndexFw)+256)>>9); 
      for (x0=0; x0>8); 
		rx0_bid = ((pic_pix_x+x0)<<2) - ((cand_mv_x*DistanceIndexBw*(512/DistanceIndexFw)+256)>>9); 
        d   = diff; 
         
        orig_line = orig_pic [y0  ];    ry=ry0; ry_bid = ry0_bid;//lgp 
        *d++      = orig_line[x0  ]  -  (PelY_14 (ref_pic, ry, rx0   ) + PelY_14 (ref_pic_bid, ry_bid, rx0_bid   ))/2; 
        *d++      = orig_line[x0+1]  -  (PelY_14 (ref_pic, ry, rx0+ 4) + PelY_14 (ref_pic_bid, ry_bid, rx0_bid+ 4))/2; 
        *d++      = orig_line[x0+2]  -  (PelY_14 (ref_pic, ry, rx0+ 8) + PelY_14 (ref_pic_bid, ry_bid, rx0_bid+ 8))/2; 
        *d++      = orig_line[x0+3]  -  (PelY_14 (ref_pic, ry, rx0+12) + PelY_14 (ref_pic_bid, ry_bid, rx0_bid+12))/2; 
         
        orig_line = orig_pic [y0+1];    ry=ry0+4;ry_bid=ry0_bid+4; 
        *d++      = orig_line[x0  ]  -  (PelY_14 (ref_pic, ry, rx0   ) + PelY_14 (ref_pic_bid, ry_bid, rx0_bid   ))/2; 
        *d++      = orig_line[x0+1]  -  (PelY_14 (ref_pic, ry, rx0+ 4) + PelY_14 (ref_pic_bid, ry_bid, rx0_bid+ 4))/2; 
        *d++      = orig_line[x0+2]  -  (PelY_14 (ref_pic, ry, rx0+ 8) + PelY_14 (ref_pic_bid, ry_bid, rx0_bid+ 8))/2; 
        *d++      = orig_line[x0+3]  -  (PelY_14 (ref_pic, ry, rx0+12) + PelY_14 (ref_pic_bid, ry_bid, rx0_bid+12))/2; 
 
        orig_line = orig_pic [y0+2];    ry=ry0+8;ry_bid=ry0_bid+8; 
        *d++      = orig_line[x0  ]  -  (PelY_14 (ref_pic, ry, rx0   ) + PelY_14 (ref_pic_bid, ry_bid, rx0_bid   ))/2; 
        *d++      = orig_line[x0+1]  -  (PelY_14 (ref_pic, ry, rx0+ 4) + PelY_14 (ref_pic_bid, ry_bid, rx0_bid+ 4))/2; 
        *d++      = orig_line[x0+2]  -  (PelY_14 (ref_pic, ry, rx0+ 8) + PelY_14 (ref_pic_bid, ry_bid, rx0_bid+ 8))/2; 
        *d++      = orig_line[x0+3]  -  (PelY_14 (ref_pic, ry, rx0+12) + PelY_14 (ref_pic_bid, ry_bid, rx0_bid+12))/2; 
 
        orig_line = orig_pic [y0+3];    ry=ry0+12;ry_bid=ry0_bid+12; 
        *d++      = orig_line[x0  ]  -  (PelY_14 (ref_pic, ry, rx0   ) + PelY_14 (ref_pic_bid, ry_bid, rx0_bid   ))/2; 
        *d++      = orig_line[x0+1]  -  (PelY_14 (ref_pic, ry, rx0+ 4) + PelY_14 (ref_pic_bid, ry_bid, rx0_bid+ 4))/2; 
        *d++      = orig_line[x0+2]  -  (PelY_14 (ref_pic, ry, rx0+ 8) + PelY_14 (ref_pic_bid, ry_bid, rx0_bid+ 8))/2; 
        *d        = orig_line[x0+3]  -  (PelY_14 (ref_pic, ry, rx0+12) + PelY_14 (ref_pic_bid, ry_bid, rx0_bid+12))/2; 
         
        if (!useABT) 
        { 
          if ((mcost += SATD (diff, input->hadamard)) > min_mcost) 
          { 
            abort_search = 1; 
            break; 
          } 
        } 
        else  // copy diff to curr_diff for ABT SATD calculation 
        { 
          for (yy=y0,kk=0; yy