www.pudn.com > MPEG4Codec.zip > basic_prediction_mmx.c
/************************************************************************** * * * This code has been developed by Eugene Kuznetsov. This software is an * * implementation of a part of one or more MPEG-4 Video tools as * * specified in ISO/IEC 14496-2 standard. Those intending to use this * * software module in hardware or software products are advised that its * * use may infringe existing patents or copyrights, and any such use * * would be at such party's own risk. The original developer of this * * software module and his/her company, and subsequent editors and their * * companies (including Project Mayo), will have no liability for use of * * this software or modifications or derivatives thereof. * * * * Project Mayo gives users of the Codec a license to this software * * module or modifications thereof for use in hardware or software * * products claiming conformance to the MPEG-4 Video Standard as * * described in the Open DivX license. * * * * The complete Open DivX license can be found at * * http://www.projectmayo.com/opendivx/license.php * * * **************************************************************************/ /** * Copyright (C) 2001 - Project Mayo * * Eugene Kuznetsov * * DivX Advanced Research Center* **/ #include "basic_prediction.h" // Purpose: specialized basic motion compensation routines void CopyBlock(unsigned char * Src, unsigned char * Dst, int Stride) { int dy; long *lpSrc = (long *) Src; long *lpDst = (long *) Dst; int lpStride = Stride >> 2; for (dy = 0; dy < 8; dy++) { lpDst[0] = lpSrc[0]; lpDst[1] = lpSrc[1]; lpSrc += lpStride; lpDst += lpStride; } } #define CopyBlockHorLoop \ "movb (%%esi), %%al\n" \ "incl %%esi\n" \ "movb (%%esi), %%cl\n" \ "addl %%ecx, %%eax\n" \ "incl %%eax\n" \ "shrl $1, %%eax\n" \ "movb %%al, (%%edi)\n" \ "incl %%edi\n" // input: esi // output: edi // modifies: eax, ebx, edx #define CopyBlockHorLoopFast \ "movl (%%esi), %%edx\n" \ "movl 1(%%esi), %%ebx\n"\ "movl %%edx, %%eax\n" \ "xorl %%ebx, %%edx\n" \ "shrl $1, %%edx\n" \ "adcl %%ebx, %%eax\n" \ "rcrl $1, %%eax\n" \ "andl $0x808080, %%edx\n"\ "addl %%edx, %%eax\n" \ "movl %%eax, (%%edi)\n" \ "addl $4, %%esi\n" \ "addl $4, %%edi\n" #define CopyBlockVerLoopFast \ "movl (%%esi), %%edx\n" \ "movl (%%esi,%%ecx), %%ebx\n"\ "movl %%edx, %%eax\n" \ "xorl %%ebx, %%edx\n" \ "shrl $1, %%edx\n" \ "adcl %%ebx, %%eax\n" \ "rcrl $1, %%eax\n" \ "andl $0x808080, %%edx\n"\ "addl %%edx, %%eax\n" \ "movl %%eax, (%%edi)\n" \ "addl $4, %%esi\n" \ "addl $4, %%edi\n" #define CopyBlockHorLoopRound \ "movb (%%esi), %%al\n" \ "incl %%esi\n" \ "movb (%%esi), %%cl\n" \ "addl %%ecx, %%eax\n" \ "shrl $1, %%eax\n" \ "movb %%al, (%%edi)\n" \ "incl %%edi\n" #define CopyBlockVerLoop \ "movb (%%esi), %%al\n" \ "movb (%%esi,%%ebx), %%cl\n" \ "addl %%ecx, %%eax\n" \ "incl %%eax\n" \ "shrl $1, %%eax\n" \ "movb %%al, (%%edi)\n" \ "incl %%esi\n" \ "incl %%edi\n" #define CopyBlockVerLoopRound \ "movb (%%esi), %%al\n" \ "movb (%%esi,%%ebx), %%cl\n" \ "addl %%ecx, %%eax\n" \ "shrl $1, %%eax\n" \ "movb %%al, (%%edi)\n" \ "incl %%esi\n" \ "incl %%edi\n" #define CopyBlockHorVerLoop(STEP) \ "movb " #STEP "(%%esi), %%al\n" \ "movb " #STEP "+1(%%esi), %%cl\n" \ "addl %%ecx, %%eax\n" \ "movb " #STEP "(%%esi, %%ebx), %%cl\n" \ "addl %%ecx, %%eax\n" \ "movb " #STEP "+1(%%esi, %%ebx), %%cl\n" \ "addl %%ecx, %%eax\n" \ "addl $2, %%eax\n" \ "shrl $2, %%eax\n" \ "movb %%al, " #STEP "(%%edi)\n" #define CopyBlockHorVerLoopRound(STEP) \ "movb " #STEP "(%%esi), %%al\n" \ "movb " #STEP "+1(%%esi), %%cl\n" \ "addl %%ecx, %%eax\n" \ "movb " #STEP "(%%esi, %%ebx), %%cl\n" \ "addl %%ecx, %%eax\n" \ "movb " #STEP "+1(%%esi, %%ebx), %%cl\n" \ "addl %%ecx, %%eax\n" \ "incl %%eax\n" \ "shrl $2, %%eax\n" \ "movb %%al, " #STEP "(%%edi)\n" /**/ void CopyBlockHor(unsigned char * Src, unsigned char * Dst, int Stride) { __asm__ ( "movl %2, %%esi\n" "movl %3, %%edi\n" "pushl %%ebx\n" "1:\n" "pushl %%edx\n" CopyBlockHorLoopFast CopyBlockHorLoopFast "popl %%edx\n" "addl %%ecx, %%esi\n" "addl %%ecx, %%edi\n" "decl %%edx\n" "jnz 1b\n" "popl %%ebx\n" : : "c"(Stride-8), "d"(8), "g" (Src), "g"(Dst) : "esi", "edi" ); } void CopyBlockVer(unsigned char * Src, unsigned char * Dst, int Stride) { __asm__ ( "movl %2, %%esi\n" "movl %3, %%edi\n" "pushl %%ebx\n" "1:\n" "pushl %%edx\n" CopyBlockVerLoopFast CopyBlockVerLoopFast "popl %%edx\n" "addl %%ecx, %%esi\n" "subl $8, %%esi\n" "addl %%ecx, %%edi\n" "subl $8, %%edi\n" "decl %%edx\n" "jnz 1b\n" "popl %%ebx\n" : : "c"(Stride), "d"(8), "g" (Src), "g"(Dst) : "esi", "edi" ); } /* void CopyBlockHor(unsigned char * Src, unsigned char * Dst, int Stride) { __asm__ ( "pushl %%ebx\n" "movl %1, %%ebx\n" "movl %4, %%esi\n" "movl %5, %%edi\n" "1:\n" CopyBlockHorLoop CopyBlockHorLoop CopyBlockHorLoop CopyBlockHorLoop CopyBlockHorLoop CopyBlockHorLoop CopyBlockHorLoop CopyBlockHorLoop "addl %%ebx, %%esi\n" "addl %%ebx, %%edi\n" "decl %%edx\n" "jnz 1b\n" "popl %%ebx\n" : : "a"(0), "g"(Stride-8), "c"(0), "d"(8), "g" (Src), "g"(Dst) ); } */ void CopyBlockHorRound(unsigned char * Src, unsigned char * Dst, int Stride) { __asm__ ( "movl %1, %%eax\n" "movl %4, %%esi\n" "movl %5, %%edi\n" "pushl %%ebx\n" "movl %%eax, %%ebx\n" "xorl %%eax, %%eax\n" "1:\n" CopyBlockHorLoopRound CopyBlockHorLoopRound CopyBlockHorLoopRound CopyBlockHorLoopRound CopyBlockHorLoopRound CopyBlockHorLoopRound CopyBlockHorLoopRound CopyBlockHorLoopRound "addl %%ebx, %%esi\n" "addl %%ebx, %%edi\n" "decl %%edx\n" "jnz 1b\n" "popl %%ebx\n" : : "a"(0), "g"(Stride-8), "c"(0), "d"(8), "g" (Src), "g"(Dst) : "esi", "edi" ); } /**/ /* void CopyBlockVer(unsigned char * Src, unsigned char * Dst, int Stride) { __asm__ ( "pushl %%ebx\n" "movl %1, %%ebx\n" "movl %4, %%esi\n" "movl %5, %%edi\n" "1:\n" CopyBlockVerLoop CopyBlockVerLoop CopyBlockVerLoop CopyBlockVerLoop CopyBlockVerLoop CopyBlockVerLoop CopyBlockVerLoop CopyBlockVerLoop "addl %%ebx, %%esi\n" "subl $8, %%esi\n" "addl %%ebx, %%edi\n" "subl $8, %%edi\n" "decl %%edx\n" "jnz 1b\n" "popl %%ebx\n" : : "a"(0), "g"(Stride), "c"(0), "d"(8), "g" (Src), "g"(Dst) ); } */ /**/ void CopyBlockVerRound(unsigned char * Src, unsigned char * Dst, int Stride) { __asm__ ( "movl %1, %%eax\n" "movl %4, %%esi\n" "movl %5, %%edi\n" "pushl %%ebx\n" "movl %%eax, %%ebx\n" "xorl %%eax, %%eax\n" "1:\n" CopyBlockVerLoopRound CopyBlockVerLoopRound CopyBlockVerLoopRound CopyBlockVerLoopRound CopyBlockVerLoopRound CopyBlockVerLoopRound CopyBlockVerLoopRound CopyBlockVerLoopRound "addl %%ebx, %%esi\n" "subl $8, %%esi\n" "addl %%ebx, %%edi\n" "subl $8, %%edi\n" "decl %%edx\n" "jnz 1b\n" "popl %%ebx\n" : : "a"(0), "g"(Stride), "c"(0), "d"(8), "g" (Src), "g"(Dst) : "esi", "edi" ); }/**/ void CopyBlockHorVer(unsigned char * Src, unsigned char * Dst, int Stride) { int dy, dx; for (dy = 0; dy < 8; dy++) { for (dx = 0; dx < 8; dx++) { Dst[dx] = (Src[dx] + Src[dx+1] + Src[dx+Stride] + Src[dx+Stride+1] +2) >> 2; // horver interpolation with rounding } Src += Stride; Dst += Stride; } } /**/ void CopyBlockHorVerRound(unsigned char * Src, unsigned char * Dst, int Stride) { int dy, dx; for (dy = 0; dy < 8; dy++) { for (dx = 0; dx < 8; dx++) { Dst[dx] = (Src[dx] + Src[dx+1] + Src[dx+Stride] + Src[dx+Stride+1] +1) >> 2; // horver interpolation with rounding } Src += Stride; Dst += Stride; } } /** *** **/ void CopyMBlock(unsigned char * Src, unsigned char * Dst, int Stride) { __asm__ ( "movl %0, %%eax\n" "movl %2, %%esi\n" "movl %3, %%edi\n" "pushl %%ebx\n" "movl %%eax, %%ebx\n" "1:\n" "movl (%%esi), %%eax\n" "movl %%eax, (%%edi)\n" "addl $4, %%esi\n" "addl $4, %%edi\n" "movl (%%esi), %%eax\n" "movl %%eax, (%%edi)\n" "addl $4, %%esi\n" "addl $4, %%edi\n" "movl (%%esi), %%eax\n" "movl %%eax, (%%edi)\n" "addl $4, %%esi\n" "addl $4, %%edi\n" "movl (%%esi), %%eax\n" "movl %%eax, (%%edi)\n" "addl %%ebx, %%esi\n" "addl %%ebx, %%edi\n" "decl %%edx\n" "jnz 1b\n" "popl %%ebx\n" : : "g"(Stride-12), "d"(16), "g" (Src), "g" (Dst) : "esi", "edi" ); } /**/ void CopyMBlockHor(unsigned char * Src, unsigned char * Dst, int Stride) { __asm__ ( "movl %2, %%esi\n" "movl %3, %%edi\n" "pushl %%ebx\n" "1:\n" "pushl %%edx\n" CopyBlockHorLoopFast CopyBlockHorLoopFast CopyBlockHorLoopFast CopyBlockHorLoopFast "popl %%edx\n" "addl %%ecx, %%esi\n" "addl %%ecx, %%edi\n" "decl %%edx\n" "jnz 1b\n" "popl %%ebx\n" : : "c"(Stride-16), "d"(16), "g" (Src), "g"(Dst) : "esi", "edi" ); } void CopyMBlockVer(unsigned char * Src, unsigned char * Dst, int Stride) { __asm__ ( "movl %2, %%esi\n" "movl %3, %%edi\n" "pushl %%ebx\n" "1:\n" "pushl %%edx\n" CopyBlockVerLoopFast CopyBlockVerLoopFast CopyBlockVerLoopFast CopyBlockVerLoopFast "popl %%edx\n" "addl %%ecx, %%esi\n" "subl $16, %%esi\n" "addl %%ecx, %%edi\n" "subl $16, %%edi\n" "decl %%edx\n" "jnz 1b\n" "popl %%ebx\n" : : "c"(Stride), "d"(16), "g" (Src), "g"(Dst) : "esi", "edi" ); } /* void CopyMBlockHor(unsigned char * Src, unsigned char * Dst, int Stride) { __asm__ ( "pushl %%ebx\n" "movl %1, %%ebx\n" "movl %4, %%esi\n" "movl %5, %%edi\n" "1:\n" CopyBlockHorLoop CopyBlockHorLoop CopyBlockHorLoop CopyBlockHorLoop CopyBlockHorLoop CopyBlockHorLoop CopyBlockHorLoop CopyBlockHorLoop CopyBlockHorLoop CopyBlockHorLoop CopyBlockHorLoop CopyBlockHorLoop CopyBlockHorLoop CopyBlockHorLoop CopyBlockHorLoop CopyBlockHorLoop "addl %%ebx, %%esi\n" "addl %%ebx, %%edi\n" "decl %%edx\n" "jnz 1b\n" "popl %%ebx\n" : : "a"(0), "g"(Stride-16), "c"(0), "d"(16), "g" (Src), "g"(Dst) ); } */ /* void CopyMBlockVer(unsigned char * Src, unsigned char * Dst, int Stride) { __asm__ ( "pushl %%ebx\n" "movl %1, %%ebx\n" "movl %4, %%esi\n" "movl %5, %%edi\n" "1:\n" CopyBlockVerLoop CopyBlockVerLoop CopyBlockVerLoop CopyBlockVerLoop CopyBlockVerLoop CopyBlockVerLoop CopyBlockVerLoop CopyBlockVerLoop CopyBlockVerLoop CopyBlockVerLoop CopyBlockVerLoop CopyBlockVerLoop CopyBlockVerLoop CopyBlockVerLoop CopyBlockVerLoop CopyBlockVerLoop "addl %%ebx, %%esi\n" "subl $16, %%esi\n" "addl %%ebx, %%edi\n" "subl $16, %%edi\n" "decl %%edx\n" "jnz 1b\n" "popl %%ebx\n" : : "a"(0), "g"(Stride), "c"(0), "d"(16), "g" (Src), "g"(Dst) ); } */ void CopyMBlockHorRound(unsigned char * Src, unsigned char * Dst, int Stride) { __asm__ ( "movl %1, %%eax\n" "movl %4, %%esi\n" "movl %5, %%edi\n" "pushl %%ebx\n" "movl %%eax, %%ebx\n" "xorl %%eax, %%eax\n" "1:\n" CopyBlockHorLoopRound CopyBlockHorLoopRound CopyBlockHorLoopRound CopyBlockHorLoopRound CopyBlockHorLoopRound CopyBlockHorLoopRound CopyBlockHorLoopRound CopyBlockHorLoopRound CopyBlockHorLoopRound CopyBlockHorLoopRound CopyBlockHorLoopRound CopyBlockHorLoopRound CopyBlockHorLoopRound CopyBlockHorLoopRound CopyBlockHorLoopRound CopyBlockHorLoopRound "addl %%ebx, %%esi\n" "addl %%ebx, %%edi\n" "decl %%edx\n" "jnz 1b\n" "popl %%ebx\n" : : "a"(0), "g"(Stride-16), "c"(0), "d"(16), "g" (Src), "g"(Dst) : "esi", "edi" ); } void CopyMBlockVerRound(unsigned char * Src, unsigned char * Dst, int Stride) { __asm__ ( "movl %1, %%eax\n" "movl %4, %%esi\n" "movl %5, %%edi\n" "pushl %%ebx\n" "movl %%eax, %%ebx\n" "xorl %%eax, %%eax\n" "1:\n" CopyBlockVerLoopRound CopyBlockVerLoopRound CopyBlockVerLoopRound CopyBlockVerLoopRound CopyBlockVerLoopRound CopyBlockVerLoopRound CopyBlockVerLoopRound CopyBlockVerLoopRound CopyBlockVerLoopRound CopyBlockVerLoopRound CopyBlockVerLoopRound CopyBlockVerLoopRound CopyBlockVerLoopRound CopyBlockVerLoopRound CopyBlockVerLoopRound CopyBlockVerLoopRound "addl %%ebx, %%esi\n" "subl $16, %%esi\n" "addl %%ebx, %%edi\n" "subl $16, %%edi\n" "decl %%edx\n" "jnz 1b\n" "popl %%ebx\n" : : "a"(0), "g"(Stride), "c"(0), "d"(16), "g" (Src), "g"(Dst) : "esi", "edi" ); } /**/ void CopyMBlockHorVer(unsigned char * Src, unsigned char * Dst, int Stride) { __asm__ ( "movl %1, %%eax\n" "movl %4, %%esi\n" "movl %5, %%edi\n" "pushl %%ebx\n" "movl %%eax, %%ebx\n" "xorl %%eax, %%eax\n" "1:\n" CopyBlockHorVerLoop(0) CopyBlockHorVerLoop(1) CopyBlockHorVerLoop(2) CopyBlockHorVerLoop(3) CopyBlockHorVerLoop(4) CopyBlockHorVerLoop(5) CopyBlockHorVerLoop(6) CopyBlockHorVerLoop(7) CopyBlockHorVerLoop(8) CopyBlockHorVerLoop(9) CopyBlockHorVerLoop(10) CopyBlockHorVerLoop(11) CopyBlockHorVerLoop(12) CopyBlockHorVerLoop(13) CopyBlockHorVerLoop(14) CopyBlockHorVerLoop(15) "addl %%ebx, %%esi\n" "addl %%ebx, %%edi\n" "decl %%edx\n" "jnz 1b\n" "popl %%ebx\n" : : "a"(0), "g"(Stride), "c" (0), "d" (16), "g" (Src), "g" (Dst) : "esi", "edi" ); } void CopyMBlockHorVerRound(unsigned char * Src, unsigned char * Dst, int Stride) { __asm__ ( "movl %1, %%eax\n" "movl %4, %%esi\n" "movl %5, %%edi\n" "pushl %%ebx\n" "movl %%eax, %%ebx\n" "xorl %%eax, %%eax\n" "1:\n" CopyBlockHorVerLoopRound(0) CopyBlockHorVerLoopRound(1) CopyBlockHorVerLoopRound(2) CopyBlockHorVerLoopRound(3) CopyBlockHorVerLoopRound(4) CopyBlockHorVerLoopRound(5) CopyBlockHorVerLoopRound(6) CopyBlockHorVerLoopRound(7) CopyBlockHorVerLoopRound(8) CopyBlockHorVerLoopRound(9) CopyBlockHorVerLoopRound(10) CopyBlockHorVerLoopRound(11) CopyBlockHorVerLoopRound(12) CopyBlockHorVerLoopRound(13) CopyBlockHorVerLoopRound(14) CopyBlockHorVerLoopRound(15) "addl %%ebx, %%esi\n" "addl %%ebx, %%edi\n" "decl %%edx\n" "jnz 1b\n" "popl %%ebx\n" : : "a"(0), "g"(Stride), "c" (0), "d" (16), "g" (Src), "g" (Dst) : "esi", "edi" ); }