www.pudn.com > mpeg4_DECORE.rar > deblock_vert_copy_and_unpack.c


/************************************************************************** 
 *                                                                        * 
 * This code has been developed by John Funnell. This software is an      * 
 * implementation of a part of one or more MPEG-4 Video tools as          * 
 * specified in ISO/IEC 14496-2 standard.  Those intending to use this    * 
 * software module in hardware or software products are advised that its  * 
 * use may infringe existing patents or copyrights, and any such use      * 
 * would be at such party's own risk.  The original developer of this     * 
 * software module and his/her company, and subsequent editors and their  * 
 * companies (including Project Mayo), will have no liability for use of  * 
 * this software or modifications or derivatives thereof.                 * 
 *                                                                        * 
 * Project Mayo gives users of the Codec a license to this software       * 
 * module or modifications thereof for use in hardware or software        * 
 * products claiming conformance to the MPEG-4 Video Standard as          * 
 * described in the Open DivX license.                                    * 
 *                                                                        * 
 * The complete Open DivX license can be found at                         * 
 * http://www.projectmayo.com/opendivx/license.php                        * 
 *                                                                        * 
 **************************************************************************/ 
/** 
*  Copyright (C) 2001 - Project Mayo 
 * 
 * John Funnell 
 * 
 * DivX Advanced Research Center  
* 
**/ 
 
/*** 
 
References:   
 * ISO/IEC 14496-2 
 * MoMuSys-FDIS-V1.0-990812 
 * Intel Architecture Software Developer's Manual 
   Volume 2: Instruction Set Reference 
 
***/ 
 
#include "postprocess_mmx.h" 
 
 
/* John Funnell, December 2000 */ 
 
 
 
/* function using MMX to copy an 8-pixel wide column and unpack to 16-bit values */ 
/* n is the number of rows to copy - this muxt be even */ 
INLINE void deblock_vert_copy_and_unpack(int stride, uint8_t *source, uint64_t *dest, int n) { 
	uint64_t *pmm1 = (uint64_t *)source; 
	uint64_t *pmm2 = (uint64_t *)dest; 
	int i = -n / 2; 
	#ifdef PP_SELF_CHECK 
	int j, k; 
	#endif 
 
	/* copy block to local store whilst unpacking to 16-bit values */ 
	__asm { 
		push edi 
		push eax 
		push ebx 
		 
		mov edi, i 
		mov eax, pmm1 
		mov ebx, pmm2 
 
		pxor   mm7, mm7                        /* set mm7 = 0                     */ 
	deblock_v_L1:                             /* now p1 is in mm1                */	 
		movq   mm0, [eax]                     /* mm0 = v[0*stride]               */							 
 
		#ifdef PREFETCH_ENABLE 
		prefetcht0 0[ebx]                  
		#endif 
		add   eax, stride                    /* p_data += stride                */ 
		movq   mm1, mm0                        /* mm1 = v[0*stride]               */							 
		punpcklbw mm0, mm7                     /* unpack low bytes (left hand 4)  */ 
 
		movq   mm2, [eax]                     /* mm2 = v[0*stride]               */							 
		punpckhbw mm1, mm7                     /* unpack high bytes (right hand 4)*/ 
 
		movq   mm3, mm2                        /* mm3 = v[0*stride]               */							 
		punpcklbw mm2, mm7                     /* unpack low bytes (left hand 4)  */ 
 
		movq   [ebx], mm0                     /* v_local[n] = mm0 (left)         */ 
		add   eax, stride                    /* p_data += stride                */ 
 
		movq   8[ebx], mm1                    /* v_local[n+8] = mm1 (right)      */ 
		punpckhbw mm3, mm7                     /* unpack high bytes (right hand 4)*/ 
 
		movq   16[ebx], mm2                   /* v_local[n+16] = mm2 (left)      */ 
 
		movq   24[ebx], mm3                   /* v_local[n+24] = mm3 (right)     */ 
 
		add   ebx, 32                        /* p_data2 += 8                    */ 
		 
		add   i, 1                            /* increment loop counter          */ 
		jne    deblock_v_L1              
 
 
		pop ebx 
		pop eax 
		pop edi 
 
 
 
 
 
 
 
 
	}; 
 
	#ifdef PP_SELF_CHECK 
	/* check that MMX copy has worked correctly */ 
	for (k=0; k