www.pudn.com > mpeg4_DECORE.rar > deblock_vert_useDC.c


/************************************************************************** 
 *                                                                        * 
 * This code has been developed by John Funnell. This software is an      * 
 * implementation of a part of one or more MPEG-4 Video tools as          * 
 * specified in ISO/IEC 14496-2 standard.  Those intending to use this    * 
 * software module in hardware or software products are advised that its  * 
 * use may infringe existing patents or copyrights, and any such use      * 
 * would be at such party's own risk.  The original developer of this     * 
 * software module and his/her company, and subsequent editors and their  * 
 * companies (including Project Mayo), will have no liability for use of  * 
 * this software or modifications or derivatives thereof.                 * 
 *                                                                        * 
 * Project Mayo gives users of the Codec a license to this software       * 
 * module or modifications thereof for use in hardware or software        * 
 * products claiming conformance to the MPEG-4 Video Standard as          * 
 * described in the Open DivX license.                                    * 
 *                                                                        * 
 * The complete Open DivX license can be found at                         * 
 * http://www.projectmayo.com/opendivx/license.php                        * 
 *                                                                        * 
 **************************************************************************/ 
/** 
*  Copyright (C) 2001 - Project Mayo 
 * 
 * John Funnell 
 * 
 * DivX Advanced Research Center  
* 
**/ 
 
/*** 
 
References:   
 * ISO/IEC 14496-2 
 * MoMuSys-FDIS-V1.0-990812 
 * Intel Architecture Software Developer's Manual 
   Volume 2: Instruction Set Reference 
 
***/ 
 
#include "postprocess_mmx.h" 
 
/* John Funnell, December 2000 */ 
 
/* decide DC mode or default mode in assembler */ 
INLINE  int deblock_vert_useDC(uint8_t *v, int stride) { 
	const uint64_t mask   = 0xfefefefefefefefe; 
	uint32_t mm_data1; 
	uint64_t *pmm1; 
	int eq_cnt, useDC; 
	#ifdef PP_SELF_CHECK 
	int useDC2, i, j; 
	#endif 
 
	#ifdef PP_SELF_CHECK 
	/* C-code version for testing */ 
	eq_cnt = 0; 
	for (j=1; j<8; j++) { 
		for (i=0; i<8; i++) { 
			if (ABS(v[j*stride+i] - v[(j+1)*stride+i]) <= 1) eq_cnt++; 
		} 
	} 
	useDC2 = (eq_cnt > DEBLOCK_VERT_USEDC_THR);  
	#endif 
			 
	/* starting pointer is at v[stride] == v1 in mpeg4 notation */ 
	pmm1 = (uint64_t *)(&(v[stride])); 
 
	/* first load some constants into mm4, mm6, mm7 */ 
	__asm { 
		push eax 
		mov eax, pmm1 
 
		movq mm6, mask               /*mm6 = 0xfefefefefefefefe       */ 
		pxor mm7, mm7                /*mm7 = 0x0000000000000000       */ 
 
		movq mm2, [eax]             /* mm2 = *p_data                 */ 
		pxor mm4, mm4                /*mm4 = 0x0000000000000000       */ 
 
		add   eax, stride          /* p_data += stride              */ 
		movq   mm3, mm2              /* mm3 = *p_data                 */ 
 
 
 
 
 
 
	}; 
 
	__asm { 
 
		movq   mm2, [eax]          /* mm2 = *p_data                 */ 
		movq   mm0, mm3             /* mm0 = mm3                     */ 
 
		movq   mm3, mm2             /* mm3 = *p_data                 */ 
		movq   mm1, mm0             /* mm1 = mm0                     */ 
 
		psubusb mm0, mm2            /* mm0 -= mm2                    */ 
		add   eax, stride         /* p_data += stride              */ 
 
		psubusb mm2, mm1            /* mm2 -= mm1                    */ 
		por    mm0, mm2             /* mm0 |= mm2                    */ 
 
		pand   mm0, mm6             /* mm0 &= 0xfefefefefefefefe     */       
		pcmpeqb mm0, mm4            /* is mm0 == 0 ?                 */ 
 
		movq   mm2, [eax]          /* mm2 = *p_data                 */ 
		psubb  mm7, mm0             /* mm7 has running total of eqcnts */ 
 
		movq   mm5, mm3             /* mm5 = mm3                     */ 
		movq   mm3, mm2             /* mm3 = *p_data                 */ 
 
		movq   mm1, mm5             /* mm1 = mm5                     */ 
		psubusb mm5, mm2            /* mm5 -= mm2                    */ 
 
		psubusb mm2, mm1            /* mm2 -= mm1                    */ 
		por    mm5, mm2             /* mm5 |= mm2                    */ 
 
		add   eax, stride         /* p_data += stride              */ 
		pand   mm5, mm6             /* mm5 &= 0xfefefefefefefefe     */       
 
		pcmpeqb mm5, mm4            /* is mm0 == 0 ?                 */ 
		psubb  mm7, mm5             /* mm7 has running total of eqcnts */ 
 
		movq   mm2, [eax]          /* mm2 = *p_data                 */ 
		movq   mm0, mm3             /* mm0 = mm3                     */ 
 
		movq   mm3, mm2             /* mm3 = *p_data                 */ 
		movq   mm1, mm0             /* mm1 = mm0                     */ 
 
		psubusb mm0, mm2            /* mm0 -= mm2                    */ 
		add   eax, stride         /* p_data += stride              */ 
 
		psubusb mm2, mm1            /* mm2 -= mm1                    */ 
		por    mm0, mm2             /* mm0 |= mm2                    */ 
 
		pand   mm0, mm6             /* mm0 &= 0xfefefefefefefefe     */       
		pcmpeqb mm0, mm4            /* is mm0 == 0 ?                 */ 
 
		movq   mm2, [eax]          /* mm2 = *p_data                 */ 
		psubb  mm7, mm0             /* mm7 has running total of eqcnts */ 
 
		movq   mm5, mm3             /* mm5 = mm3                     */ 
		movq   mm3, mm2             /* mm3 = *p_data                 */ 
 
		movq   mm1, mm5             /* mm1 = mm5                     */ 
		psubusb mm5, mm2            /* mm5 -= mm2                    */ 
 
		psubusb mm2, mm1            /* mm2 -= mm1                    */ 
		por    mm5, mm2             /* mm5 |= mm2                    */ 
 
		add   eax, stride         /* p_data += stride              */ 
		pand   mm5, mm6             /* mm5 &= 0xfefefefefefefefe     */       
 
		pcmpeqb mm5, mm4            /* is mm0 == 0 ?                 */ 
		psubb  mm7, mm5             /* mm7 has running total of eqcnts */ 
 
		movq   mm2, [eax]          /* mm2 = *p_data                 */ 
		movq   mm0, mm3             /* mm0 = mm3                     */ 
 
		movq   mm3, mm2             /* mm3 = *p_data                 */ 
		movq   mm1, mm0             /* mm1 = mm0                     */ 
 
		psubusb mm0, mm2            /* mm0 -= mm2                    */ 
		add   eax, stride         /* p_data += stride              */ 
 
		psubusb mm2, mm1            /* mm2 -= mm1                    */ 
		por    mm0, mm2             /* mm0 |= mm2                    */ 
 
		pand   mm0, mm6             /* mm0 &= 0xfefefefefefefefe     */       
		pcmpeqb mm0, mm4            /* is mm0 == 0 ?                 */ 
 
		movq   mm2, [eax]          /* mm2 = *p_data                 */ 
		psubb  mm7, mm0             /* mm7 has running total of eqcnts */ 
 
		movq   mm5, mm3             /* mm5 = mm3                     */ 
		movq   mm3, mm2             /* mm3 = *p_data                 */ 
 
		movq   mm1, mm5             /* mm1 = mm5                     */ 
		psubusb mm5, mm2            /* mm5 -= mm2                    */ 
 
		psubusb mm2, mm1            /* mm2 -= mm1                    */ 
		por    mm5, mm2             /* mm5 |= mm2                    */ 
 
		add   eax, stride         /* p_data += stride              */ 
		pand   mm5, mm6             /* mm5 &= 0xfefefefefefefefe     */       
 
		pcmpeqb mm5, mm4            /* is mm0 == 0 ?                 */ 
		psubb  mm7, mm5             /* mm7 has running total of eqcnts */ 
 
		movq   mm2, [eax]          /* mm2 = *p_data                 */ 
		movq   mm0, mm3             /* mm0 = mm3                     */ 
 
		movq   mm3, mm2             /* mm3 = *p_data                 */ 
		movq   mm1, mm0             /* mm1 = mm0                     */ 
 
		psubusb mm0, mm2            /* mm0 -= mm2                    */ 
		add   eax, stride         /* p_data += stride              */ 
 
		psubusb mm2, mm1            /* mm2 -= mm1                    */ 
		por    mm0, mm2             /* mm0 |= mm2                    */ 
 
		pand   mm0, mm6             /* mm0 &= 0xfefefefefefefefe     */       
		pcmpeqb mm0, mm4            /* is mm0 == 0 ?                 */ 
 
		psubb  mm7, mm0             /* mm7 has running total of eqcnts */ 
 
		pop eax	 
		 
	 
	 
	 
	 
	                                        
	}; 
			 
	/* now mm7 contains negative eq_cnt for all 8-columns */ 
	/* copy this to mm_data1                              */ 
	/* sum all 8 bytes in mm7 */ 
	__asm { 
		movq    mm1, mm7            /* mm1 = mm7             0 1w2 3 4 5 6 7r   */ 
		psrlq   mm7, 32             /* mm7 >>= 32            0 1 2 3 4 5 6 7m   */ 
 
		paddb   mm7, mm1            /* mm7 has running total of eqcnts */ 
 
		movq mm1, mm7               /* mm1 = mm7             0 1w2 3 4 5 6 7r   */ 
		psrlq   mm7, 16             /* mm7 >>= 16            0 1 2 3 4 5 6 7m   */ 
 
		paddb   mm1, mm7            /* mm7 has running total of eqcnts */ 
 
		movq mm7, mm1               /* mm1 = mm7             0 1w2 3 4 5 6 7r   */ 
		psrlq   mm7, 8              /* mm7 >>= 8             0 1 2 3 4 5 6 7m   */ 
 
		paddb   mm7, mm1            /* mm7 has running total of eqcnts */ 
 
		movd mm_data1, mm7          /* mm_data1 = mm7       */ 
 
	}; 
 
	eq_cnt = mm_data1 & 0xff; 
			 
	useDC = (eq_cnt  > DEBLOCK_VERT_USEDC_THR);			 
			 
	#ifdef PP_SELF_CHECK 
	if (useDC != useDC2) printf("ERROR: MMX version of useDC is incorrect\n"); 
	#endif 
	 
	return useDC; 
}