www.pudn.com > AVS.rar > block.cpp


/************************************************************************* 
 AVS1-P2视频解码器源码 
 版权所有:联合信源数字音视频技术(北京)有限公司, (c) 2005-2006  
 
 AVS1-P2 Video Decoder Source Code 
 (c) Copyright, NSCC All Rights Reserved, 2005-2006 
 ************************************************************************* 
 Distributed under the terms of the GNU General Public License as 
 published by the Free Software Foundation; either version 2 of the 
 License, or (at your option) any later version. 
 
 This program is distributed in the hope that it will be useful, 
 but WITHOUT ANY WARRANTY; without even the implied warranty of 
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 GNU General Public License for more details. 
 
 You should have received a copy of the GNU General Public License 
 along with this program; if not, write to the Free Software 
 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
*************************************************************************/ 
/************************************************************************* 
  文件名称:	block.cpp 
  描    述: 包含IDCT变换,亮度插值算法和色度插值和其他的一些以块为单位的 
            操作:8x8块重构,拷贝一个8x8块,求两个块的平均值等 
*************************************************************************/ 
/************************************************************************* 
  Revision History 
  data          Modification                                    Author 
  2005-2-8      Created                                          jthou 
 *************************************************************************/ 
#include "block.h" 
#include "global.h" 
 
/************************************************************************/ 
/*  函数功能:IDCT                                                      */ 
/************************************************************************/ 
void inv_transform_B8(AVS_SHORT* psBlock  // block to be inverse transformed. 
   ) 
{ 
  AVS_SHORT  xx, yy; 
  AVS_SHORT  tmp[8]; 
  AVS_SHORT  t; 
  AVS_SHORT  b[8]; 
   
  for(yy=0; yy<8; yy++) 
  {     
    // Horizontal inverse transform 
    // Reorder 
     
    tmp[0]=psBlock[yy*8+0]; 
    tmp[1]=psBlock[yy*8+4]; 
    tmp[2]=psBlock[yy*8+2]; 
    tmp[3]=psBlock[yy*8+6]; 
    tmp[4]=psBlock[yy*8+1]; 
    tmp[5]=psBlock[yy*8+3]; 
    tmp[6]=psBlock[yy*8+5]; 
    tmp[7]=psBlock[yy*8+7]; 
    // Downleft Butterfly 
    b[0] = ((tmp[4] - tmp[7])<<1) + tmp[4]; 
    b[1] = ((tmp[5] + tmp[6])<<1) + tmp[5]; 
    b[2] = ((tmp[5] - tmp[6])<<1) - tmp[6]; 
    b[3] = ((tmp[4] + tmp[7])<<1) + tmp[7]; 
     
    b[4] = ((b[0] + b[1] + b[3])<<1) + b[1]; 
    b[5] = ((b[0] - b[1] + b[2])<<1) + b[0]; 
    b[6] = ((-b[1] - b[2] + b[3])<<1) + b[3]; 
    b[7] = ((b[0] - b[2] - b[3])<<1) - b[2]; 
     
    // Upleft Butterfly 
    t=((tmp[2]*10)+(tmp[3]<<2)); 
    tmp[3]=((tmp[2]<<2)-(tmp[3]*10)); 
    tmp[2]=t; 
     
    t=(tmp[0]+tmp[1])<<3; 
    tmp[1]=(tmp[0]-tmp[1])<<3; 
    tmp[0]=t; 
     
    b[0]=tmp[0]+tmp[2]; 
    b[1]=tmp[1]+tmp[3]; 
    b[2]=tmp[1]-tmp[3]; 
    b[3]=tmp[0]-tmp[2];	  
     
    // Last Butterfly 
     
    psBlock[yy*8+0]=((b[0]+b[4])+(1<<2))>>3; 
    psBlock[yy*8+1]=((b[1]+b[5])+(1<<2))>>3; 
    psBlock[yy*8+2]=((b[2]+b[6])+(1<<2))>>3; 
    psBlock[yy*8+3]=((b[3]+b[7])+(1<<2))>>3; 
    psBlock[yy*8+7]=((b[0]-b[4])+(1<<2))>>3; 
    psBlock[yy*8+6]=((b[1]-b[5])+(1<<2))>>3; 
    psBlock[yy*8+5]=((b[2]-b[6])+(1<<2))>>3; 
    psBlock[yy*8+4]=((b[3]-b[7])+(1<<2))>>3; 
  } 
  // Vertical inverse transform 
  for(xx=0; xx<8; xx++) 
  {      
    // Reorder 
    tmp[0]=psBlock[0*8+xx]; 
    tmp[1]=psBlock[4*8+xx]; 
    tmp[2]=psBlock[2*8+xx]; 
    tmp[3]=psBlock[6*8+xx]; 
    tmp[4]=psBlock[1*8+xx]; 
    tmp[5]=psBlock[3*8+xx]; 
    tmp[6]=psBlock[5*8+xx]; 
    tmp[7]=psBlock[7*8+xx]; 
     
    // Downleft Butterfly 
    b[0] = ((tmp[4] - tmp[7])<<1) + tmp[4]; 
    b[1] = ((tmp[5] + tmp[6])<<1) + tmp[5]; 
    b[2] = ((tmp[5] - tmp[6])<<1) - tmp[6]; 
    b[3] = ((tmp[4] + tmp[7])<<1) + tmp[7]; 
     
    b[4] = ((b[0] + b[1] + b[3])<<1) + b[1]; 
    b[5] = ((b[0] - b[1] + b[2])<<1) + b[0]; 
    b[6] = ((-b[1] - b[2] + b[3])<<1) + b[3]; 
    b[7] = ((b[0] - b[2] - b[3])<<1) - b[2]; 
     
    // Upleft Butterfly 
    t=((tmp[2]*10)+(tmp[3]<<2)); 
    tmp[3]=((tmp[2]<<2)-(tmp[3]*10)); 
    tmp[2]=t; 
     
    t=(tmp[0]+tmp[1])<<3; 
    tmp[1]=(tmp[0]-tmp[1])<<3; 
    tmp[0]=t; 
     
    b[0]=tmp[0]+tmp[2]; 
    b[1]=tmp[1]+tmp[3]; 
    b[2]=tmp[1]-tmp[3]; 
    b[3]=tmp[0]-tmp[2]; 
     
    // Last Butterfly 
    psBlock[0*8+xx]=(Clip3(-32768,32703,b[0]+b[4])+64)>>7; 
    psBlock[1*8+xx]=(Clip3(-32768,32703,b[1]+b[5])+64)>>7; 
    psBlock[2*8+xx]=(Clip3(-32768,32703,b[2]+b[6])+64)>>7; 
    psBlock[3*8+xx]=(Clip3(-32768,32703,b[3]+b[7])+64)>>7; 
    psBlock[7*8+xx]=(Clip3(-32768,32703,b[0]-b[4])+64)>>7; 
    psBlock[6*8+xx]=(Clip3(-32768,32703,b[1]-b[5])+64)>>7; 
    psBlock[5*8+xx]=(Clip3(-32768,32703,b[2]-b[6])+64)>>7; 
    psBlock[4*8+xx]=(Clip3(-32768,32703,b[3]-b[7])+64)>>7; 
  } 
} 
 
/************************************************************************/ 
/*  函数功能:8x8亮度块插值预测                                         */ 
/************************************************************************/ 
 
void GetBlock(MOTIONVECTOR* pMv, AVS_INT iImgX, AVS_INT iImgY, AVS_INT iImgWidth, AVS_INT iImgHeight, const AVS_BYTE* pbRefY, AVS_BYTE* pPred) 
{ 
  int i, j; 
  int imgX = iImgX; 
  int imgY = iImgY; 
 
  int dx = (imgX*4 + pMv->x)&3; 
  int dy = (imgY*4 + pMv->y)&3; 
 
  int refX = (imgX*4 + pMv->x - dx)/4; 
  int refY = (imgY*4 + pMv->y - dy)/4; 
 
  if(dy ==0 && dx == 0)  //D 
  { 
    for(i=0; i<8; i++) 
    { 
      for(j=0; j<8; j++) 
      { 
        pPred[i*8+j] = *(pbRefY + max(0,min(iImgHeight-1,refY+i))*iImgWidth + max(0,min(iImgWidth-1,refX+j))); 
      } 
    } 
  } 
  else if(dx==2 && dy==0) //b 
  { 
    for(i=0; i<8; i++) 
    { 
      for(j=0; j<8; j++) 
      { 
        int bpie =  
          -    *(pbRefY + max(0,min(iImgHeight-1, refY+i))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1)))  
          + 5* *(pbRefY + max(0,min(iImgHeight-1, refY+i))*iImgWidth + max(0, min(iImgWidth-1, refX+j  )))  
          + 5* *(pbRefY + max(0,min(iImgHeight-1, refY+i))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1))) 
          -    *(pbRefY + max(0,min(iImgHeight-1, refY+i))*iImgWidth + max(0, min(iImgWidth-1, refX+j+2))); 
        pPred[i*8+j] = Clip((bpie+4)>>3); 
      } 
    } 
  } 
  else if(dy==2 && dx==0) //h 
  { 
    for(i=0; i<8; i++) 
      for(j=0; j<8; j++) 
      { 
        int hpie =  
          -    *(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j))) 
          + 5* *(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j))) 
          + 5* *(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j))) 
          -    *(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j))); 
        pPred[i*8+j] = Clip((hpie+4)>>3); 
      } 
  } 
  else if(dy==2 && dx==2) //j 
  { 
    for(i=0; i<8; i++) 
    { 
      for(j=0; j<8; j++) 
      { 
        int aapie =  
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j  )))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1)))) 
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j+2)))); 
        int bpie =   
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j  )))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1)))) 
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j+2)))); 
 
        int spie =  
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j  )))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1)))) 
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j+2)))); 
        int ddpie =  
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j  )))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1)))) 
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j+2)))); 
        int jpie = -aapie + 5*bpie + 5*spie - ddpie; 
        pPred[i*8+j] = Clip((jpie + 32)>>6); 
      } 
    } 
  } 
  else if(dy==0 && dx==1) //a 
  { 
    for(i=0; i<8; i++) 
    { 
      for(j=0; j<8; j++) 
      { 
        int eepie =  
          -    *(pbRefY + max(0,min(iImgHeight-1, refY+i))*iImgWidth + max(0, min(iImgWidth-1, refX+j-2)))  
          + 5* *(pbRefY + max(0,min(iImgHeight-1, refY+i))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1)))  
          + 5* *(pbRefY + max(0,min(iImgHeight-1, refY+i))*iImgWidth + max(0, min(iImgWidth-1, refX+j  ))) 
          -    *(pbRefY + max(0,min(iImgHeight-1, refY+i))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1))); 
        int Dpie = *(pbRefY + max(0,min(iImgHeight-1, refY+i))*iImgWidth + max(0, min(iImgWidth-1, refX+j))) * 8; 
        int bpie =  
          -    *(pbRefY + max(0,min(iImgHeight-1, refY+i))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1)))  
          + 5* *(pbRefY + max(0,min(iImgHeight-1, refY+i))*iImgWidth + max(0, min(iImgWidth-1, refX+j  )))  
          + 5* *(pbRefY + max(0,min(iImgHeight-1, refY+i))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1))) 
          -    *(pbRefY + max(0,min(iImgHeight-1, refY+i))*iImgWidth + max(0, min(iImgWidth-1, refX+j+2))); 
        int Epie = *(pbRefY + max(0,min(iImgHeight-1, refY+i))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1))) * 8; 
        pPred[i*8+j] = Clip((eepie + 7*Dpie + 7*bpie + Epie + 64)>>7); 
      } 
    } 
  } 
  else if(dy==0 && dx==3) //c 
  { 
    for(i=0; i<8; i++) 
    { 
      for(j=0; j<8; j++) 
      { 
        int Dpie = *(pbRefY + max(0,min(iImgHeight-1, refY+i))*iImgWidth + max(0, min(iImgWidth-1, refX+j))) * 8; 
        int Epie = *(pbRefY + max(0,min(iImgHeight-1, refY+i))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1))) * 8; 
        int bpie =  
          -    *(pbRefY + max(0,min(iImgHeight-1, refY+i))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1)))  
          + 5* *(pbRefY + max(0,min(iImgHeight-1, refY+i))*iImgWidth + max(0, min(iImgWidth-1, refX+j  )))  
          + 5* *(pbRefY + max(0,min(iImgHeight-1, refY+i))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1))) 
          -    *(pbRefY + max(0,min(iImgHeight-1, refY+i))*iImgWidth + max(0, min(iImgWidth-1, refX+j+2))); 
        int ccpie =  
          -    *(pbRefY + max(0,min(iImgHeight-1, refY+i))*iImgWidth + max(0, min(iImgWidth-1, refX+j  )))  
          + 5* *(pbRefY + max(0,min(iImgHeight-1, refY+i))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1)))  
          + 5* *(pbRefY + max(0,min(iImgHeight-1, refY+i))*iImgWidth + max(0, min(iImgWidth-1, refX+j+2))) 
          -    *(pbRefY + max(0,min(iImgHeight-1, refY+i))*iImgWidth + max(0, min(iImgWidth-1, refX+j+3))); 
        pPred[i*8+j] = Clip((Dpie + 7*bpie + 7*Epie + ccpie + 64)>>7); 
      } 
    } 
  } 
  else if(dy==1 && dx==0) //d 
  { 
    for(i=0; i<8; i++) 
    { 
      for(j=0; j<8; j++) 
      { 
        int ffpie =  
          -    *(pbRefY + max(0, min(iImgHeight-1, refY+i-2))*iImgWidth + max(0, min(iImgWidth-1, refX+j))) 
          + 5* *(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j))) 
          + 5* *(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j))) 
          -    *(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j))); 
        
        int Dpie = *(pbRefY + max(0,min(iImgHeight-1, refY+i))*iImgWidth + max(0, min(iImgWidth-1, refX+j))) * 8; 
        int hpie =  
          -    *(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j))) 
          + 5* *(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j))) 
          + 5* *(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j))) 
          -    *(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j))); 
                int Hpie = *(pbRefY + max(0,min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j))) * 8;  
        int dpie = ffpie + 7*Dpie + 7*hpie + Hpie; 
        pPred[i*8+j] = Clip((dpie + 64)>>7);         
      } 
    } 
  } 
  else if(dy==1 && dx==1) //e 
  { 
    for(i=0; i<8; i++) 
    { 
      for(j=0; j<8; j++) 
      { 
        int aapie =  
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j  )))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1)))) 
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j+2)))); 
        int bpie =   
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j  )))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1)))) 
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j+2)))); 
        int spie =  
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j  )))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1)))) 
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j+2)))); 
        int ddpie =  
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j  )))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1)))) 
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j+2)))); 
        
        int jpie = -aapie + 5*bpie + 5*spie - ddpie; 
         
        int Dpiepie = *(pbRefY + max(0,min(iImgHeight-1, refY+i))*iImgWidth + max(0, min(iImgWidth-1, refX+j))) * 64; 
  
        pPred[i*8+j] = Clip((Dpiepie+jpie+64)>>7); 
      } 
    } 
  } 
  else if(dy==1 && dx==2) //f 
  { 
    for(i=0; i<8; i++) 
    { 
      for(j=0; j<8; j++) 
      { 
        int xxpie =  
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i-2))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i-2))*iImgWidth + max(0, min(iImgWidth-1, refX+j  )))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i-2))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1)))) 
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i-2))*iImgWidth + max(0, min(iImgWidth-1, refX+j+2)))); 
 
        int aapie =  
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j  )))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1)))) 
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j+2)))); 
        int bpie =   
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j  )))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1)))) 
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j+2)))); 
 
        int spie =  
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j  )))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1)))) 
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j+2)))); 
        int ddpie =  
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j  )))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1)))) 
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j+2)))); 
 
        int hhpie = -xxpie + 5*aapie + 5*bpie - spie; 
        int jpie = -aapie + 5*bpie + 5*spie - ddpie; 
         
        pPred[i*8+j] = Clip((hhpie+7*bpie*8+7*jpie+spie*8+512)>>10);       
      } 
    } 
  } 
  else if(dy==1 && dx==3) //g 
  { 
    for(i=0; i<8; i++) 
    { 
      for(j=0; j<8; j++) 
      { 
        int aapie =  
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j  )))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1)))) 
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j+2)))); 
        int bpie =   
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j  )))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1)))) 
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j+2)))); 
 
        int spie =  
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j  )))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1)))) 
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j+2)))); 
        int ddpie =  
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j  )))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1)))) 
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j+2)))); 
        int jpie = -aapie + 5*bpie + 5*spie - ddpie; 
        int Epiepie = *(pbRefY + max(0,min(iImgHeight-1, refY+i))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1))) * 64; 
        pPred[i*8+j] = Clip((Epiepie+jpie+64)>>7); 
      } 
    } 
  } 
  else if(dy==2 && dx==1) //i 
  { 
    for(i=0; i<8; i++) 
    { 
      for(j=0; j<8; j++) 
      { 
        int aapie =  
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j  )))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1)))) 
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j+2)))); 
        int bpie =   
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j  )))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1)))) 
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j+2)))); 
 
        int spie =  
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j  )))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1)))) 
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j+2)))); 
        int ddpie =  
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j  )))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1)))) 
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j+2)))); 
        int jpie = -aapie + 5*bpie + 5*spie - ddpie; 
 
        int aaxxpie =  
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j-2)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j  )))) 
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1)))); 
        int eepie =   
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j-2)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j  )))) 
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1)))); 
 
        int sxxpie =  
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j-2)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j  )))) 
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1)))); 
        int ddxxpie =  
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j-2)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j  )))) 
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1)))); 
        int ggpie = -aaxxpie + 5*eepie + 5*sxxpie - ddxxpie; 
        int hpie =  
          -    *(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j))) 
          + 5* *(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j))) 
          + 5* *(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j))) 
          -    *(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j))); 
        int mpie =  
          -    *(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1))) 
          + 5* *(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1))) 
          + 5* *(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1))) 
          -    *(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1))); 
        pPred[i*8+j] = Clip((ggpie+7*hpie*8+7*jpie+mpie*8+512)>>10); 
      } 
    } 
  } 
  else if(dy==2 && dx==3) //k 
  { 
    for(i=0; i<8; i++) 
    { 
      for(j=0; j<8; j++) 
      { 
        int bbpie =  
              -    *(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1))) 
              + 5* *(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1))) 
              + 5* *(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1))) 
              -    *(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1))); 
        int hpie =   
              -    *(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j))) 
              + 5* *(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j))) 
              + 5* *(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j))) 
              -    *(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j))); 
        int mpie =  
              -    *(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1))) 
              + 5* *(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1))) 
              + 5* *(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1))) 
              -    *(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1))); 
        int ccpie =  
              -    *(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j+2))) 
              + 5* *(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j+2))) 
              + 5* *(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j+2))) 
              -    *(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j+2))); 
        int jpie = -bbpie + 5*hpie + 5*mpie - ccpie; 
 
        int xxpie =  
              -    *(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j+3))) 
              + 5* *(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j+3))) 
              + 5* *(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j+3))) 
              -    *(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j+3))); 
        int kkpie = -hpie + 5*mpie + 5*ccpie - xxpie; 
        pPred[i*8+j] = Clip((hpie*8+7*jpie+7*mpie*8+kkpie+512)>>10); 
 
      } 
    } 
  } 
  else if(dy==3 && dx==0) //n 
  { 
    for(i=0; i<8; i++) 
    { 
      for(j=0; j<8; j++) 
      { 
        int Dpie = *(pbRefY + max(0,min(iImgHeight-1, refY+i))*iImgWidth + max(0, min(iImgWidth-1, refX+j))) * 8; 
        int hpie =   
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j)))) 
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j)))); 
        int Hpie = *(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j))) * 8;   
        int xxpie =  
              -    *(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j))) 
              + 5* *(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j))) 
              + 5* *(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j))) 
              -    *(pbRefY + max(0, min(iImgHeight-1, refY+i+3))*iImgWidth + max(0, min(iImgWidth-1, refX+j))); 
        pPred[i*8+j] = Clip((Dpie+7*hpie+7*Hpie+xxpie+64)>>7); 
     } 
    } 
 
  } 
  else if(dy==3 && dx==1) //p 
  { 
    for(i=0; i<8; i++) 
    { 
      for(j=0; j<8; j++) 
      { 
        int Hpiepie = *(pbRefY + max(0,min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j))) * 64;   
        int aapie =  
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j  )))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1)))) 
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j+2)))); 
        int bpie =   
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j  )))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1)))) 
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j+2)))); 
        int spie =  
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j  )))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1)))) 
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j+2)))); 
        int ddpie =  
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j  )))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1)))) 
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j+2)))); 
        int jpie = -aapie + 5*bpie + 5*spie - ddpie; 
        pPred[i*8+j] = Clip((jpie+Hpiepie+64)>>7); 
      } 
    } 
  } 
  else if(dy==3 && dx==2) //q 
  { 
    for(i=0; i<8; i++) 
    { 
      for(j=0; j<8; j++) 
      { 
        int aapie =  
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j  )))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1)))) 
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j+2)))); 
        int bpie =   
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j  )))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1)))) 
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j+2))));        
        int spie =  
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j  )))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1)))) 
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j+2)))); 
        int ddpie =  
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j  )))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1)))) 
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j+2)))); 
        int xxpie =  
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i+3))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i+3))*iImgWidth + max(0, min(iImgWidth-1, refX+j  )))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i+3))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1)))) 
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i+3))*iImgWidth + max(0, min(iImgWidth-1, refX+j+2)))); 
        int jpie = -aapie + 5*bpie + 5*spie - ddpie; 
        int jxxpie = -bpie + 5*spie + 5*ddpie - xxpie;      
        int qpie = bpie*8 + 7*jpie + 7*spie*8 + jxxpie; 
        pPred[i*8+j] = Clip((qpie + 512)>>10); 
      } 
    } 
  } 
  else if(dx==3 && dy==3) //r 
  { 
    for(i=0; i<8; i++) 
    { 
      for(j=0; j<8; j++) 
      { 
        int Ipiepie = *(pbRefY + max(0,min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1))) * 64;   
        int aapie =  
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j  )))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1)))) 
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i-1))*iImgWidth + max(0, min(iImgWidth-1, refX+j+2)))); 
        int bpie =   
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j  )))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1)))) 
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i  ))*iImgWidth + max(0, min(iImgWidth-1, refX+j+2)))); 
        int spie =  
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j  )))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1)))) 
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i+1))*iImgWidth + max(0, min(iImgWidth-1, refX+j+2)))); 
        int ddpie =  
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j  )))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1)))) 
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i+2))*iImgWidth + max(0, min(iImgWidth-1, refX+j+2)))); 
        int xxpie =  
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i+3))*iImgWidth + max(0, min(iImgWidth-1, refX+j-1)))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i+3))*iImgWidth + max(0, min(iImgWidth-1, refX+j  )))) 
              + 5* (*(pbRefY + max(0, min(iImgHeight-1, refY+i+3))*iImgWidth + max(0, min(iImgWidth-1, refX+j+1)))) 
              -    (*(pbRefY + max(0, min(iImgHeight-1, refY+i+3))*iImgWidth + max(0, min(iImgWidth-1, refX+j+2)))); 
        int jpie = -aapie + 5*bpie + 5*spie - ddpie; 
        pPred[i*8+j] = Clip((jpie+Ipiepie+64)>>7); 
      } 
    } 
  }  
} 
 
/************************************************************************/ 
/*  函数功能:16x16色度块插值预测                                       */ 
/************************************************************************/ 
void GetChromaBlock16x16(const AVS_BYTE* pbRefUV, AVS_INT iUVx, AVS_INT iUVy, AVS_INT iUVWidth, AVS_INT iUVHeight, MOTIONVECTOR* pMv, AVS_BYTE* pPred) 
{ 
  AVS_INT dx = (iUVx*8 + pMv->x)&7; 
  AVS_INT dy = (iUVy*8 + pMv->y)&7; 
 
  AVS_INT refX = (iUVx*8 + pMv->x - dx)/8; 
  AVS_INT refY = (iUVy*8 + pMv->y - dy)/8; 
 
  AVS_INT DX = 8-dx; 
  AVS_INT DY = 8-dy; 
     
  AVS_INT DXDY = DX*DY; 
  AVS_INT dxDY = dx*DY; 
  AVS_INT DXdy = DX*dy; 
  AVS_INT dxdy = dx*dy; 
 
  AVS_INT i,j; 
 
  for(i=0; i<8; i++) 
  { 
    for(j=0; j<8; j++) 
    { 
      pPred[i*8+j] = Clip((DXDY*pbRefUV[max(0, min(refY+i,iUVHeight-1))*iUVWidth + max(0, min(refX+j,iUVWidth-1))] 
        + dxDY*pbRefUV[max(0, min(refY+i,iUVHeight-1))*iUVWidth + max(0, min(refX+j+1,iUVWidth-1))]  
        + DXdy*pbRefUV[max(0, min(refY+i+1,iUVHeight-1))*iUVWidth + max(0, min(refX+j,iUVWidth-1))] 
        + dxdy*pbRefUV[max(0, min(refY+i+1,iUVHeight-1))*iUVWidth + max(0, min(refX+j+1,iUVWidth-1))] + 32)>>6); 
    } 
 } 
} 
 
/************************************************************************/ 
/*  函数功能:16x8色度块预测                                            */ 
/************************************************************************/ 
void GetChromaBlock16x8(const AVS_BYTE* pbRefUV, AVS_INT iUVx, AVS_INT iUVy, AVS_INT iUVWidth, AVS_INT iUVHeight, MOTIONVECTOR* pMv, AVS_BYTE* pPred) 
{ 
  AVS_INT dx = (iUVx*8 + pMv->x)&7; 
  AVS_INT dy = (iUVy*8 + pMv->y)&7; 
 
  AVS_INT refX = (iUVx*8 + pMv->x - dx)/8; 
  AVS_INT refY = (iUVy*8 + pMv->y - dy)/8; 
  
  AVS_INT DX = 8-dx; 
  AVS_INT DY = 8-dy; 
   
  AVS_INT DXDY = DX*DY; 
  AVS_INT dxDY = dx*DY; 
  AVS_INT DXdy = DX*dy; 
  AVS_INT dxdy = dx*dy; 
 
  AVS_INT i,j; 
 
  for(i=0; i<4; i++) 
  { 
    for(j=0; j<8; j++) 
    { 
      pPred[i*8+j] = Clip((DXDY*pbRefUV[max(0, min(refY+i,iUVHeight-1))*iUVWidth + max(0, min(refX+j,iUVWidth-1))] 
        + dxDY*pbRefUV[max(0, min(refY+i,iUVHeight-1))*iUVWidth + max(0, min(refX+j+1,iUVWidth-1))]  
        + DXdy*pbRefUV[max(0, min(refY+i+1,iUVHeight-1))*iUVWidth + max(0, min(refX+j,iUVWidth-1))] 
        + dxdy*pbRefUV[max(0, min(refY+i+1,iUVHeight-1))*iUVWidth + max(0, min(refX+j+1,iUVWidth-1))] + 32)>>6); 
    } 
 } 
} 
 
/************************************************************************/ 
/*  函数功能:8x16色度块预测                                            */ 
/************************************************************************/ 
void GetChromaBlock8x16(const AVS_BYTE* pbRefUV, AVS_INT iUVx, AVS_INT iUVy, AVS_INT iUVWidth, AVS_INT iUVHeight, MOTIONVECTOR* pMv, AVS_BYTE* pPred) 
{ 
  AVS_INT dx = (iUVx*8 + pMv->x)&7; 
  AVS_INT dy = (iUVy*8 + pMv->y)&7; 
 
  AVS_INT refX = (iUVx*8 + pMv->x - dx)/8; 
  AVS_INT refY = (iUVy*8 + pMv->y - dy)/8; 
   
  AVS_INT DX = 8-dx; 
  AVS_INT DY = 8-dy; 
 
  AVS_INT DXDY = DX*DY; 
  AVS_INT dxDY = dx*DY; 
  AVS_INT DXdy = DX*dy; 
  AVS_INT dxdy = dx*dy; 
 
  AVS_INT i,j; 
 
  for(i=0; i<8; i++) 
  { 
    for(j=0; j<4; j++) 
    { 
      pPred[i*8+j] = Clip((DXDY*pbRefUV[max(0, min(refY+i,iUVHeight-1))*iUVWidth + max(0, min(refX+j,iUVWidth-1))] 
        + dxDY*pbRefUV[max(0, min(refY+i,iUVHeight-1))*iUVWidth + max(0, min(refX+j+1,iUVWidth-1))]  
        + DXdy*pbRefUV[max(0, min(refY+i+1,iUVHeight-1))*iUVWidth + max(0, min(refX+j,iUVWidth-1))] 
        + dxdy*pbRefUV[max(0, min(refY+i+1,iUVHeight-1))*iUVWidth + max(0, min(refX+j+1,iUVWidth-1))] + 32)>>6); 
    } 
 } 
} 
 
/************************************************************************/ 
/*  函数功能:8x8色度块预测                                             */ 
/************************************************************************/ 
void GetChromaBlock8x8(const AVS_BYTE* pbRefUV, AVS_INT iUVx, AVS_INT iUVy, AVS_INT iUVWidth, AVS_INT iUVHeight, MOTIONVECTOR* pMv, AVS_BYTE* pPred) 
{ 
  AVS_INT dx = (iUVx*8 + pMv->x)&7; 
  AVS_INT dy = (iUVy*8 + pMv->y)&7; 
 
  AVS_INT refX = (iUVx*8 + pMv->x - dx)/8; 
  AVS_INT refY = (iUVy*8 + pMv->y - dy)/8; 
   
  AVS_INT DX = 8-dx; 
  AVS_INT DY = 8-dy; 
 
  AVS_INT DXDY = DX*DY; 
  AVS_INT dxDY = dx*DY; 
  AVS_INT DXdy = DX*dy; 
  AVS_INT dxdy = dx*dy; 
 
  AVS_INT i,j; 
 
  for(i=0; i<4; i++) 
  { 
    for(j=0; j<4; j++) 
    { 
      pPred[i*8+j] = Clip((DXDY*pbRefUV[max(0, min(refY+i,iUVHeight-1))*iUVWidth + max(0, min(refX+j,iUVWidth-1))] 
        + dxDY*pbRefUV[max(0, min(refY+i,iUVHeight-1))*iUVWidth + max(0, min(refX+j+1,iUVWidth-1))]  
        + DXdy*pbRefUV[max(0, min(refY+i+1,iUVHeight-1))*iUVWidth + max(0, min(refX+j,iUVWidth-1))] 
        + dxdy*pbRefUV[max(0, min(refY+i+1,iUVHeight-1))*iUVWidth + max(0, min(refX+j+1,iUVWidth-1))] + 32)>>6); 
    } 
 } 
} 
 
/************************************************************************/ 
/* 函数功能: 重构8x8块                                                 */ 
/*          -重构图像值等于预测值加上残差系数                          */ 
/************************************************************************/ 
void ReconB8(AVS_BYTE* pDst, AVS_INT iImgWidth, AVS_BYTE* pPred, AVS_SHORT* pCoef) 
{ 
  AVS_BYTE* pDst1 = pDst; 
  AVS_BYTE* pSrc0 = pPred; 
  AVS_SHORT*  pSrc1 = pCoef; 
  AVS_INT i,j; 
 
  for(i=0; i<8; i++) 
  { 
    for(j=0; j<8; j++) 
    { 
      int val = (*pSrc0++) + (*pSrc1++); 
      *(pDst1++) = Clip(val); 
    } 
    pDst1 += iImgWidth-8; 
  } 
} 
 
/************************************************************************/ 
/* 函数功能:拷贝一个8x8块                                              */ 
/*          -主要用于cbp为0,对应块没有残差值的时候,直接拷贝预测值    */ 
/*            到重构图像中                                              */ 
/************************************************************************/ 
void CopyB8(AVS_BYTE* pDst, AVS_INT iStride, AVS_BYTE* pSrc) 
{ 
  AVS_INT i; 
  for(i=0; i<8; i++) 
  { 
    //memcpy(pDst, pSrc, 8); 
    *((AVS_INT*)pDst) = *((AVS_INT*)pSrc); 
    *((AVS_INT*)pDst+1) = *((AVS_INT*)pSrc+1); 
    pSrc += 8; 
    pDst += iStride; 
  } 
} 
 
/************************************************************************/ 
/* 函数功能:求平均值                                                   */ 
/*        - 主要是用与双向预测中求前后向预测值的平均值,也可以用做求   */ 
/*           其他的平均值                                               */ 
/*                                                                      */ 
/*        - C语言代码课支持各种块形状,MMX代码只支持8x32, 8x8, 8x4,    */ 
/*           4x8, 4x4块形状                                             */ 
/************************************************************************/ 
void AverageB8x(AVS_BYTE* pDst, AVS_BYTE* pSrc, AVS_INT iHeight, AVS_INT iWidth) 
{ 
  AVS_INT i, j; 
  for(i=0; i