www.pudn.com > UYVY2RGB.rar > MMX.CPP


 
 
 
 
const static int p_1164 = 75;  
const static int p_1596 = 102;  
const static int p_0391 = 25;  
const static int p_0813 = 52;  
const static int p_2018 = 129;  
 
const static int ooffooff=0x00ff00ff; 
const static int ffooffoo=0xff00ff00; 
 
 
 
void yuv2rgb4XmmxC420(unsigned char *lpY, 
                                    unsigned char *lpU, 
                                    unsigned char *lpV, 
                                    unsigned char *lpRGB, 
                                    int           nSrcHeight, 
                                    int           nSrcWidth) 
{ 
    int rgbwidth=nSrcWidth<<2;// 32 bits rgb0; 
	int nyw=nSrcWidth; 
	int col=nSrcWidth>>3; 
	int row=nSrcHeight>>1; 
	 
#define  mmt2018u  mm1 
#define  mmt0813v  mm2 
#define  mmt0391u  mm3 
#define  mmt1596v  mm4 
	 
	__int64  ty; 
	 
	__asm 
	{ 
		mov        esi,lpU 
			mov        edi,lpV 
			mov        eax,lpY 
			mov        edx,lpRGB 
			mov        ecx,col 
			mov        ebx,row 
			 
rrr:  
		pxor       mm0,mm0 
			 
			movq       mm3,qword ptr t128 
			movq       mm4,qword ptr t0391 
			movq       mm5,qword ptr t2018 
			movq       mm6,qword ptr t1596 
			movq       mm7,qword ptr t0813 
			 
			movd       mm1,dword ptr [esi] 
			movd       mm2,dword ptr [edi] 
			punpcklbw  mm1,mm0 
			punpcklbw  mm2,mm0 
			 
 
		psubsw     mm1,mm3           
		psubsw     mm2,mm3            
		movq       mm3,mm1 
			psllw      mm1,3 
			pmulhw     mm3,mm4         // t0391u-->mm3 
			pmulhw     mm1,mm5         // t2018u-->mm1 
			movq       mm4,mm2 
			psllw      mm2,1 
			psllw      mm4,2 
			pmulhw     mm2,mm7          // t0813v-->mm2 
			pmulhw     mm4,mm6          // t1596v-->mm4 
			 
			movq       mm5,dword ptr [eax]          // 76 54 32 10 
			pxor       mm0,mm0 
			movq       mm6,mm5 
			punpcklbw  mm5,mm0                      // 03 02 01 00                        
			punpckhbw  mm0,mm6                      // 70 60 50 40 
			por        mm0,mm5                      // 73 62 51 40 
			pxor       mm6,mm6 
			pxor       mm5,mm5 
			punpckhbw  mm6,mm0                    // 70 30 60 20  
			punpcklbw  mm0,mm5                    // 05 01 04 00 
			por        mm0,mm6                    // 75 31 64 20 
			pxor       mm5,mm5 
			movq       mm6,mm0 
			punpckhbw  mm6,mm5                    // y7 y5 y3 y1  
			punpcklbw  mm0,mm5 
			movq       mm5,qword ptr t16 
			movq       mm7,qword ptr t1164 
			psubsw     mm6,mm5 
			psubsw     mm0,mm5 
			psllw      mm6,2 
			psllw      mm0,2  
			pmulhw     mm6,mm7 
			pmulhw     mm0,mm7                     // y6 y4 y2 y0 -->mm0 
			movq       qword ptr ty,mm6            // y7 y5 y3 y1 -->ty 
			 
 
        pxor       mm7,mm7 
			movq       mm5,mmt1596v 
			movq       mm6,mm0                     
		paddsw     mm5,mm0        // r=1.164(y-16)+1.596(v-128)            r-->mm5 
			psubsw     mm6,mmt0391u                  //1.0.0.164/(y-16)-0.391(u-128) 
		psubsw     mm6,mmt0813v//g=1.164(y-16)-0.391(u-128)-0.813(v-128)     g-->mm6 
			paddsw     mm0,mmt2018u //  b=1.164(y-16)+2.018(u-128)               b-->mm0 
			 
			// b-->mm0,g-->mm6,r-->mm5 
			packuswb    mm6,mm7 
			packuswb    mm0,mm7 
			punpcklbw   mm0,mm6     // g3 b3 g2 b2 g1 b1 g0 b0  -->mm0 
			packuswb    mm5,mm7 
			punpcklbw   mm5,mm7     // 00 r3 00 r2 00 r1 00 r0  -->mm5 
			movq        mm7,mm0 
			punpcklwd   mm0,mm5     // 00 r1 g1 b1 00 r0 g0 b0 
			punpckhwd   mm7,mm5     // 00 r3 g3 b3 00 r2 g2 b2 
			movq        qword ptr[edx], mm0 
			movq  mm0,qword ptr ty 
			movq        qword ptr[edx+8], mm7 
			 
        pxor       mm7,mm7 
			movq       mm5,mmt1596v 
			paddsw     mm5,mm0        // r=1.164(y-16)+1.596(v-128)            r-->mm5 
			movq       mm6,mm0                    //copy/ 1.164(y-16) 
		psubsw     mm6,mmt0391u                  //1.0.0.164/(y-16)-0.391(u-128) 
		psubsw     mm6,mmt0813v//g=1.164(y-16)-0.391(u-128)-0.813(v-128)     g-->mm6 
			paddsw     mm0,mmt2018u //  b=1.164(y-16)+2.018(u-128)                b-->mm0 
			 
			// b-->mm0,g-->mm6,r-->mm5 
			packuswb    mm6,mm7 
			packuswb    mm0,mm7 
			punpcklbw   mm0,mm6     // g3 b3 g2 b2 g1 b1 g0 b0  -->mm0 
			packuswb    mm5,mm7 
			punpcklbw   mm5,mm7     // 00 r3 00 r2 00 r1 00 r0  -->mm5 
			movq        mm7,mm0 
			movq        mm6,[edx]   // 2 0 
			 
			punpcklwd   mm0,mm5     // 00 r1 g1 b1 00 r0 g0 b0 
			punpckhwd   mm7,mm5     // 00 r3 g3 b3 00 r2 g2 b2   
			 
			movq        mm5,mm6 
			punpckldq   mm6,mm0     // 1 0 
			punpckhdq   mm5,mm0     // 3 2 
			movq        mm0,[edx+8] // 4 6 
			movq        [edx],mm6 
			movq        [edx+8], mm5 
			movq        mm6,mm0 
			punpckhdq   mm0,mm7     // 7 6 
			punpckldq   mm6,mm7     // 5 4 
			movq        [edx+24], mm0 
			movq        [edx+16],mm6 
			 
//next/ row of y  
		add        eax,nyw 
			add        edx,rgbwidth 
			 
			movq       mm5,dword ptr [eax]          // 76 54 32 10 
			pxor       mm0,mm0 
			movq       mm6,mm5 
			punpcklbw  mm5,mm0                      // 03 02 01 00                        
			punpckhbw  mm0,mm6                      // 70 60 50 40 
			por        mm0,mm5                      // 73 62 51 40 
			pxor       mm6,mm6 
			pxor       mm5,mm5 
			punpckhbw  mm6,mm0                    // 70 30 60 20  
			punpcklbw  mm0,mm5                    // 05 01 04 00 
			por        mm0,mm6                    // 75 31 64 20 
			pxor       mm5,mm5 
			movq       mm6,mm0 
			punpckhbw  mm6,mm5                    // y7 y5 y3 y1  
			punpcklbw  mm0,mm5 
			movq       mm5,qword ptr t16 
			movq       mm7,qword ptr t1164 
			psubsw     mm6,mm5 
			psubsw     mm0,mm5 
			psllw      mm6,2 
			psllw      mm0,2 
			pmulhw     mm6,mm7 
			pmulhw     mm0,mm7                      // y6 y4 y2 y0 -->mm0 
			movq       qword ptr ty,mm6           // y7 y5 y3 y1 -->ty 
			 
//compute/ 
        pxor       mm7,mm7 
			movq       mm5,mmt1596v 
			paddsw     mm5,mm0        // r=1.164(y-16)+1.596(v-128)            r-->mm5 
			movq       mm6,mm0                    //copy/ 1.164(y-16) 
		psubsw     mm6,mmt0391u                 //1.0.0.164/(y-16)-0.391(u-128) 
		psubsw     mm6,mmt0813v//g=1.164(y-16)-0.391(u-128)-0.813(v-128)     g-->mm6 
			paddsw     mm0,mmt2018u //  b=1.164(y-16)+2.018(u-128)               b-->mm0 
			 
			// b-->mm0,g-->mm6,r-->mm5 
			packuswb    mm6,mm7 
			packuswb    mm0,mm7 
			punpcklbw   mm0,mm6     // g3 b3 g2 b2 g1 b1 g0 b0  -->mm0 
			packuswb    mm5,mm7 
			punpcklbw   mm5,mm7     // 00 r3 00 r2 00 r1 00 r0  -->mm5 
			movq        mm7,mm0 
			punpcklwd   mm0,mm5     // 00 r1 g1 b1 00 r0 g0 b0 
			punpckhwd   mm7,mm5     // 00 r3 g3 b3 00 r2 g2 b2 
			movq        qword ptr[edx], mm0 
			movq        qword ptr[edx+8], mm7 
			 
//compute/ 
		movq       mm0,qword ptr ty 
			pxor       mm7,mm7 
			movq       mm5,mmt1596v 
			paddsw     mm5,mm0        // r=1.164(y-16)+1.596(v-128)            r-->mm5 
			movq       mm6,mm0                     //copy/ 1.164(y-16) 
		psubsw     mm6,mmt0391u                  //1.0.0.164/(y-16)-0.391(u-128) 
		psubsw     mm6,mmt0813v//g=1.164(y-16)-0.391(u-128)-0.813(v-128)     g-->mm6 
			paddsw     mm0,mmt2018u //  b=1.164(y-16)+2.018(u-128)               b-->mm0 
			 
			// b-->mm0,g-->mm6,r-->mm5 
			packuswb    mm6,mm7 
			packuswb    mm0,mm7 
			punpcklbw   mm0,mm6     // g3 b3 g2 b2 g1 b1 g0 b0  -->mm0 
			packuswb    mm5,mm7 
			punpcklbw   mm5,mm7     // 00 r3 00 r2 00 r1 00 r0  -->mm5 
			movq        mm7,mm0 
			 
			movq        mm6,[edx]   // 2 0 
			 
			punpcklwd   mm0,mm5     // 00 r1 g1 b1 00 r0 g0 b0 
			punpckhwd   mm7,mm5     // 00 r3 g3 b3 00 r2 g2 b2   
			 
			movq        mm5,mm6 
			punpckldq   mm6,mm0     // 1 0 
			punpckhdq   mm5,mm0     // 3 2 
			movq        mm0,[edx+8] // 4 6 
			movq        [edx],mm6 
			movq        [edx+8], mm5 
			movq        mm6,mm0 
			punpckhdq   mm0,mm7     // 7 6 
			punpckldq   mm6,mm7     // 5 4 
			movq        [edx+24], mm0 
			movq        [edx+16],mm6 
			 
			sub        eax,nyw 
			sub        edx,rgbwidth 
			add        esi,4 
			add        edi,4 
			add        eax,8 
			add        edx,32 
			dec        ecx 
			jnz        rrr 
			 
			mov        ecx,col 
			add        eax,nyw 
			add        edx,rgbwidth 
			dec        ebx 
			jnz        rrr 
			emms 
 } 
}