www.pudn.com > mpeg4_DECORE.rar > deblock_vert_choose_p1p2.c


/************************************************************************** 
 *                                                                        * 
 * This code has been developed by John Funnell. This software is an      * 
 * implementation of a part of one or more MPEG-4 Video tools as          * 
 * specified in ISO/IEC 14496-2 standard.  Those intending to use this    * 
 * software module in hardware or software products are advised that its  * 
 * use may infringe existing patents or copyrights, and any such use      * 
 * would be at such party's own risk.  The original developer of this     * 
 * software module and his/her company, and subsequent editors and their  * 
 * companies (including Project Mayo), will have no liability for use of  * 
 * this software or modifications or derivatives thereof.                 * 
 *                                                                        * 
 * Project Mayo gives users of the Codec a license to this software       * 
 * module or modifications thereof for use in hardware or software        * 
 * products claiming conformance to the MPEG-4 Video Standard as          * 
 * described in the Open DivX license.                                    * 
 *                                                                        * 
 * The complete Open DivX license can be found at                         * 
 * http://www.projectmayo.com/opendivx/license.php                        * 
 *                                                                        * 
 **************************************************************************/ 
/** 
*  Copyright (C) 2001 - Project Mayo 
 * 
 * John Funnell 
 * 
 * DivX Advanced Research Center  
* 
**/ 
 
/*** 
 
References:   
 * ISO/IEC 14496-2 
 * MoMuSys-FDIS-V1.0-990812 
 * Intel Architecture Software Developer's Manual 
   Volume 2: Instruction Set Reference 
 
***/ 
 
#include "postprocess_mmx.h" 
 
/* John Funnell, December 2000 */ 
 
 
 
/* This function chooses the "endstops" for the vertial LPF9 filter: p1 and p2 */ 
/* We also convert these to 16-bit values here */ 
INLINE void deblock_vert_choose_p1p2(uint8_t *v, int stride, uint64_t *p1p2, int QP) { 
	uint64_t *pmm1, *pmm2; 
	uint64_t mm_b_qp; 
	#ifdef PP_SELF_CHECK 
	int i; 
	#endif 
 
	/* load QP into every one of the 8 bytes in mm_b_qp */ 
	((uint32_t *)&mm_b_qp)[0] =  
	((uint32_t *)&mm_b_qp)[1] = 0x01010101 * QP;  
 
	pmm1 = (uint64_t *)(&(v[0*stride])); 
	pmm2 = (uint64_t *)(&(v[8*stride])); 
 
	__asm { 
		push eax 
		push ebx 
		push ecx 
		 
		mov eax, pmm1 
		mov ebx, pmm2 
		mov ecx, p1p2 
 
	/* p1 */ 
		pxor     mm7, mm7             /* mm7 = 0                       */ 
		movq     mm0, [eax]          /* mm0 = *pmm1 = v[l0]           */ 
		movq     mm2, mm0             /* mm2 = mm0 = v[l0]             */ 
		add      eax, stride         /* pmm1 += stride                */ 
		movq     mm1, [eax]          /* mm1 = *pmm1 = v[l1]           */ 
		movq     mm3, mm1             /* mm3 = mm1 = v[l1]             */ 
		psubusb  mm0, mm1             /* mm0 -= mm1                    */ 
		psubusb  mm1, mm2             /* mm1 -= mm2                    */ 
		por      mm0, mm1             /* mm0 |= mm1                    */ 
		psubusb  mm0, mm_b_qp         /* mm0 -= QP                     */ 
		/* now a zero byte in mm0 indicates use v0 else use v1              */ 
		pcmpeqb  mm0, mm7             /* zero bytes to ff others to 00 */ 
		movq     mm1, mm0             /* make a copy of mm0            */ 
		/* now ff byte in mm0 indicates use v0 else use v1                  */ 
		pandn    mm0, mm3             /* mask v1 into 00 bytes in mm0  */ 
		pand     mm1, mm2             /* mask v0 into ff bytes in mm0  */ 
		por      mm0, mm1             /* mm0 |= mm1                    */ 
		movq     mm1, mm0             /* make a copy of mm0            */ 
		/* Now we have our result, p1, in mm0.  Next, unpack.               */ 
		punpcklbw mm0, mm7            /* low bytes to mm0              */ 
		punpckhbw mm1, mm7            /* high bytes to mm1             */ 
		/* Store p1 in memory                                               */ 
		movq     [ecx], mm0           /* low words to p1p2[0]          */ 
		movq     8[ecx], mm1          /* high words to p1p2[1]         */ 
	/* p2 */ 
		movq     mm1, [ebx]          /* mm1 = *pmm2 = v[l8]           */ 
		movq     mm3, mm1             /* mm3 = mm1 = v[l8]             */ 
		add      ebx, stride         /* pmm2 += stride                */ 
		movq     mm0, [ebx]          /* mm0 = *pmm2 = v[l9]           */ 
		movq     mm2, mm0             /* mm2 = mm0 = v[l9]             */ 
		psubusb  mm0, mm1             /* mm0 -= mm1                    */ 
		psubusb  mm1, mm2             /* mm1 -= mm2                    */ 
		por      mm0, mm1             /* mm0 |= mm1                    */ 
		psubusb  mm0, mm_b_qp         /* mm0 -= QP                     */ 
		/* now a zero byte in mm0 indicates use v0 else use v1              */ 
		pcmpeqb  mm0, mm7             /* zero bytes to ff others to 00 */ 
		movq     mm1, mm0             /* make a copy of mm0            */ 
		/* now ff byte in mm0 indicates use v0 else use v1                  */ 
		pandn    mm0, mm3             /* mask v1 into 00 bytes in mm0  */ 
		pand     mm1, mm2             /* mask v0 into ff bytes in mm0  */ 
		por      mm0, mm1             /* mm0 |= mm1                    */ 
		movq     mm1, mm0             /* make a copy of mm0            */ 
		/* Now we have our result, p2, in mm0.  Next, unpack.               */ 
		punpcklbw mm0, mm7            /* low bytes to mm0              */ 
		punpckhbw mm1, mm7            /* high bytes to mm1             */ 
		/* Store p2 in memory                                               */ 
		movq     16[ecx], mm0         /* low words to p1p2[2]          */ 
		movq     24[ecx], mm1         /* high words to p1p2[3]         */ 
 
		pop ecx 
		pop ebx 
		pop eax 
 
 
 
	}; 
 
	#ifdef PP_SELF_CHECK 
	/* check p1 and p2 have been calculated correctly */ 
	/* p2 */ 
	for (i=0; i<8; i++) { 
		if ( ((ABS(v[9*stride+i] - v[8*stride+i]) - QP > 0) ? v[8*stride+i] : v[9*stride+i]) 
		     != ((uint16_t *)(&(p1p2[2])))[i] ) { 
			 printf("ERROR: problem with P2\n"); 
		} 
	} 
	/* p1 */ 
	for (i=0; i<8; i++) { 
		if ( ((ABS(v[0*stride+i] - v[1*stride+i]) - QP > 0) ? v[1*stride+i] : v[0*stride+i]) 
		     != ((uint16_t *)(&(p1p2[0])))[i] ) { 
			 printf("ERROR: problem with P1\n"); 
		} 
	} 
	#endif 
 
}