www.pudn.com > UYVY2RGB.rar > MMX.CPP
const static int p_1164 = 75;
const static int p_1596 = 102;
const static int p_0391 = 25;
const static int p_0813 = 52;
const static int p_2018 = 129;
const static int ooffooff=0x00ff00ff;
const static int ffooffoo=0xff00ff00;
void yuv2rgb4XmmxC420(unsigned char *lpY,
unsigned char *lpU,
unsigned char *lpV,
unsigned char *lpRGB,
int nSrcHeight,
int nSrcWidth)
{
int rgbwidth=nSrcWidth<<2;// 32 bits rgb0;
int nyw=nSrcWidth;
int col=nSrcWidth>>3;
int row=nSrcHeight>>1;
#define mmt2018u mm1
#define mmt0813v mm2
#define mmt0391u mm3
#define mmt1596v mm4
__int64 ty;
__asm
{
mov esi,lpU
mov edi,lpV
mov eax,lpY
mov edx,lpRGB
mov ecx,col
mov ebx,row
rrr:
pxor mm0,mm0
movq mm3,qword ptr t128
movq mm4,qword ptr t0391
movq mm5,qword ptr t2018
movq mm6,qword ptr t1596
movq mm7,qword ptr t0813
movd mm1,dword ptr [esi]
movd mm2,dword ptr [edi]
punpcklbw mm1,mm0
punpcklbw mm2,mm0
psubsw mm1,mm3
psubsw mm2,mm3
movq mm3,mm1
psllw mm1,3
pmulhw mm3,mm4 // t0391u-->mm3
pmulhw mm1,mm5 // t2018u-->mm1
movq mm4,mm2
psllw mm2,1
psllw mm4,2
pmulhw mm2,mm7 // t0813v-->mm2
pmulhw mm4,mm6 // t1596v-->mm4
movq mm5,dword ptr [eax] // 76 54 32 10
pxor mm0,mm0
movq mm6,mm5
punpcklbw mm5,mm0 // 03 02 01 00
punpckhbw mm0,mm6 // 70 60 50 40
por mm0,mm5 // 73 62 51 40
pxor mm6,mm6
pxor mm5,mm5
punpckhbw mm6,mm0 // 70 30 60 20
punpcklbw mm0,mm5 // 05 01 04 00
por mm0,mm6 // 75 31 64 20
pxor mm5,mm5
movq mm6,mm0
punpckhbw mm6,mm5 // y7 y5 y3 y1
punpcklbw mm0,mm5
movq mm5,qword ptr t16
movq mm7,qword ptr t1164
psubsw mm6,mm5
psubsw mm0,mm5
psllw mm6,2
psllw mm0,2
pmulhw mm6,mm7
pmulhw mm0,mm7 // y6 y4 y2 y0 -->mm0
movq qword ptr ty,mm6 // y7 y5 y3 y1 -->ty
pxor mm7,mm7
movq mm5,mmt1596v
movq mm6,mm0
paddsw mm5,mm0 // r=1.164(y-16)+1.596(v-128) r-->mm5
psubsw mm6,mmt0391u //1.0.0.164/(y-16)-0.391(u-128)
psubsw mm6,mmt0813v//g=1.164(y-16)-0.391(u-128)-0.813(v-128) g-->mm6
paddsw mm0,mmt2018u // b=1.164(y-16)+2.018(u-128) b-->mm0
// b-->mm0,g-->mm6,r-->mm5
packuswb mm6,mm7
packuswb mm0,mm7
punpcklbw mm0,mm6 // g3 b3 g2 b2 g1 b1 g0 b0 -->mm0
packuswb mm5,mm7
punpcklbw mm5,mm7 // 00 r3 00 r2 00 r1 00 r0 -->mm5
movq mm7,mm0
punpcklwd mm0,mm5 // 00 r1 g1 b1 00 r0 g0 b0
punpckhwd mm7,mm5 // 00 r3 g3 b3 00 r2 g2 b2
movq qword ptr[edx], mm0
movq mm0,qword ptr ty
movq qword ptr[edx+8], mm7
pxor mm7,mm7
movq mm5,mmt1596v
paddsw mm5,mm0 // r=1.164(y-16)+1.596(v-128) r-->mm5
movq mm6,mm0 //copy/ 1.164(y-16)
psubsw mm6,mmt0391u //1.0.0.164/(y-16)-0.391(u-128)
psubsw mm6,mmt0813v//g=1.164(y-16)-0.391(u-128)-0.813(v-128) g-->mm6
paddsw mm0,mmt2018u // b=1.164(y-16)+2.018(u-128) b-->mm0
// b-->mm0,g-->mm6,r-->mm5
packuswb mm6,mm7
packuswb mm0,mm7
punpcklbw mm0,mm6 // g3 b3 g2 b2 g1 b1 g0 b0 -->mm0
packuswb mm5,mm7
punpcklbw mm5,mm7 // 00 r3 00 r2 00 r1 00 r0 -->mm5
movq mm7,mm0
movq mm6,[edx] // 2 0
punpcklwd mm0,mm5 // 00 r1 g1 b1 00 r0 g0 b0
punpckhwd mm7,mm5 // 00 r3 g3 b3 00 r2 g2 b2
movq mm5,mm6
punpckldq mm6,mm0 // 1 0
punpckhdq mm5,mm0 // 3 2
movq mm0,[edx+8] // 4 6
movq [edx],mm6
movq [edx+8], mm5
movq mm6,mm0
punpckhdq mm0,mm7 // 7 6
punpckldq mm6,mm7 // 5 4
movq [edx+24], mm0
movq [edx+16],mm6
//next/ row of y
add eax,nyw
add edx,rgbwidth
movq mm5,dword ptr [eax] // 76 54 32 10
pxor mm0,mm0
movq mm6,mm5
punpcklbw mm5,mm0 // 03 02 01 00
punpckhbw mm0,mm6 // 70 60 50 40
por mm0,mm5 // 73 62 51 40
pxor mm6,mm6
pxor mm5,mm5
punpckhbw mm6,mm0 // 70 30 60 20
punpcklbw mm0,mm5 // 05 01 04 00
por mm0,mm6 // 75 31 64 20
pxor mm5,mm5
movq mm6,mm0
punpckhbw mm6,mm5 // y7 y5 y3 y1
punpcklbw mm0,mm5
movq mm5,qword ptr t16
movq mm7,qword ptr t1164
psubsw mm6,mm5
psubsw mm0,mm5
psllw mm6,2
psllw mm0,2
pmulhw mm6,mm7
pmulhw mm0,mm7 // y6 y4 y2 y0 -->mm0
movq qword ptr ty,mm6 // y7 y5 y3 y1 -->ty
//compute/
pxor mm7,mm7
movq mm5,mmt1596v
paddsw mm5,mm0 // r=1.164(y-16)+1.596(v-128) r-->mm5
movq mm6,mm0 //copy/ 1.164(y-16)
psubsw mm6,mmt0391u //1.0.0.164/(y-16)-0.391(u-128)
psubsw mm6,mmt0813v//g=1.164(y-16)-0.391(u-128)-0.813(v-128) g-->mm6
paddsw mm0,mmt2018u // b=1.164(y-16)+2.018(u-128) b-->mm0
// b-->mm0,g-->mm6,r-->mm5
packuswb mm6,mm7
packuswb mm0,mm7
punpcklbw mm0,mm6 // g3 b3 g2 b2 g1 b1 g0 b0 -->mm0
packuswb mm5,mm7
punpcklbw mm5,mm7 // 00 r3 00 r2 00 r1 00 r0 -->mm5
movq mm7,mm0
punpcklwd mm0,mm5 // 00 r1 g1 b1 00 r0 g0 b0
punpckhwd mm7,mm5 // 00 r3 g3 b3 00 r2 g2 b2
movq qword ptr[edx], mm0
movq qword ptr[edx+8], mm7
//compute/
movq mm0,qword ptr ty
pxor mm7,mm7
movq mm5,mmt1596v
paddsw mm5,mm0 // r=1.164(y-16)+1.596(v-128) r-->mm5
movq mm6,mm0 //copy/ 1.164(y-16)
psubsw mm6,mmt0391u //1.0.0.164/(y-16)-0.391(u-128)
psubsw mm6,mmt0813v//g=1.164(y-16)-0.391(u-128)-0.813(v-128) g-->mm6
paddsw mm0,mmt2018u // b=1.164(y-16)+2.018(u-128) b-->mm0
// b-->mm0,g-->mm6,r-->mm5
packuswb mm6,mm7
packuswb mm0,mm7
punpcklbw mm0,mm6 // g3 b3 g2 b2 g1 b1 g0 b0 -->mm0
packuswb mm5,mm7
punpcklbw mm5,mm7 // 00 r3 00 r2 00 r1 00 r0 -->mm5
movq mm7,mm0
movq mm6,[edx] // 2 0
punpcklwd mm0,mm5 // 00 r1 g1 b1 00 r0 g0 b0
punpckhwd mm7,mm5 // 00 r3 g3 b3 00 r2 g2 b2
movq mm5,mm6
punpckldq mm6,mm0 // 1 0
punpckhdq mm5,mm0 // 3 2
movq mm0,[edx+8] // 4 6
movq [edx],mm6
movq [edx+8], mm5
movq mm6,mm0
punpckhdq mm0,mm7 // 7 6
punpckldq mm6,mm7 // 5 4
movq [edx+24], mm0
movq [edx+16],mm6
sub eax,nyw
sub edx,rgbwidth
add esi,4
add edi,4
add eax,8
add edx,32
dec ecx
jnz rrr
mov ecx,col
add eax,nyw
add edx,rgbwidth
dec ebx
jnz rrr
emms
}
}