www.pudn.com > x264_2007.rar > quant-a.asm
;***************************************************************************** ;* quant-a.asm: h264 encoder library ;***************************************************************************** ;* Copyright (C) 2005 x264 project ;* ;* Authors: Loren Merritt;* ;* This program is free software; you can redistribute it and/or modify ;* it under the terms of the GNU General Public License as published by ;* the Free Software Foundation; either version 2 of the License, or ;* (at your option) any later version. ;* ;* This program is distributed in the hope that it will be useful, ;* but WITHOUT ANY WARRANTY; without even the implied warranty of ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;* GNU General Public License for more details. ;* ;* You should have received a copy of the GNU General Public License ;* along with this program; if not, write to the Free Software ;* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. ;***************************************************************************** BITS 32 %include "i386inc.asm" SECTION_RODATA pd_1: times 2 dd 1 SECTION .text %macro QUANT_AC_START 0 mov eax, [esp+ 4] ; dct mov ecx, [esp+ 8] ; mf mov edx, [esp+12] ; bias %endmacro %macro MMX_QUANT_DC_START 0 mov eax, [esp+ 4] ; dct movd mm6, [esp+ 8] ; mf movd mm7, [esp+12] ; bias pshufw mm6, mm6, 0 pshufw mm7, mm7, 0 %endmacro %macro SSE2_QUANT_DC_START 0 mov eax, [esp+ 4] ; dct movd xmm6, [esp+ 8] ; mf movd xmm7, [esp+12] ; bias pshuflw xmm6, xmm6, 0 pshuflw xmm7, xmm7, 0 punpcklqdq xmm6, xmm6 punpcklqdq xmm7, xmm7 %endmacro %macro QUANT_ONE 5 ;;; %1 (m64) dct[y][x] ;;; %2 (m64/mmx) mf[y][x] or mf[0][0] (as uint16_t) ;;; %3 (m64/mmx) bias[y][x] or bias[0][0] (as uint16_t) mov%1 %2m0, %3 ; load dct coeffs pxor %2m1, %2m1 pcmpgtw %2m1, %2m0 ; sign(coeff) pxor %2m0, %2m1 psubw %2m0, %2m1 ; abs(coeff) paddusw %2m0, %5 ; round pmulhuw %2m0, %4 ; divide pxor %2m0, %2m1 ; restore sign psubw %2m0, %2m1 mov%1 %3, %2m0 ; store %endmacro %macro MMX_QUANT_1x4 3 QUANT_ONE q, m, %1, %2, %3 %endmacro %macro SSE2_QUANT_1x8 3 QUANT_ONE dqa, xm, %1, %2, %3 %endmacro %macro SSSE3_QUANT_1x8 3 movdqa xmm1, %1 ; load dct coeffs pabsw xmm0, xmm1 paddusw xmm0, %3 ; round pmulhuw xmm0, %2 ; divide psignw xmm0, xmm1 ; restore sign movdqa %1, xmm0 ; store %endmacro ;----------------------------------------------------------------------------- ; void x264_quant_2x2_dc_mmxext( int16_t dct[4], int mf, int bias ) ;----------------------------------------------------------------------------- cglobal x264_quant_2x2_dc_mmxext MMX_QUANT_DC_START MMX_QUANT_1x4 [eax], mm6, mm7 ret ;----------------------------------------------------------------------------- ; void x264_quant_4x4_dc_mmxext( int16_t dct[16], int mf, int bias ) ;----------------------------------------------------------------------------- cglobal x264_quant_4x4_dc_mmxext MMX_QUANT_DC_START %assign x 0 %rep 4 MMX_QUANT_1x4 [eax+x], mm6, mm7 %assign x (x+8) %endrep ret ;----------------------------------------------------------------------------- ; void x264_quant_4x4_mmx( int16_t dct[16], uint16_t mf[16], uint16_t bias[16] ) ;----------------------------------------------------------------------------- cglobal x264_quant_4x4_mmx QUANT_AC_START %assign x 0 %rep 4 MMX_QUANT_1x4 [eax+x], [ecx+x], [edx+x] %assign x (x+8) %endrep ret ;----------------------------------------------------------------------------- ; void x264_quant_8x8_mmx( int16_t dct[64], uint16_t mf[64], uint16_t bias[64] ) ;----------------------------------------------------------------------------- cglobal x264_quant_8x8_mmx QUANT_AC_START %assign x 0 %rep 16 MMX_QUANT_1x4 [eax+x], [ecx+x], [edx+x] %assign x (x+8) %endrep ret %macro QUANT_SSE 1 ;----------------------------------------------------------------------------- ; void x264_quant_4x4_dc_sse2( int16_t dct[16], int mf, int bias ) ;----------------------------------------------------------------------------- cglobal x264_quant_4x4_dc_%1 SSE2_QUANT_DC_START %assign x 0 %rep 2 QUANT_1x8 [eax+x], xmm6, xmm7 %assign x (x+16) %endrep ret ;----------------------------------------------------------------------------- ; void x264_quant_4x4_sse2( int16_t dct[16], uint16_t mf[16], uint16_t bias[16] ) ;----------------------------------------------------------------------------- cglobal x264_quant_4x4_%1 QUANT_AC_START %assign x 0 %rep 2 QUANT_1x8 [eax+x], [ecx+x], [edx+x] %assign x (x+16) %endrep ret ;----------------------------------------------------------------------------- ; void x264_quant_8x8_sse2( int16_t dct[64], uint16_t mf[64], uint16_t bias[64] ) ;----------------------------------------------------------------------------- cglobal x264_quant_8x8_%1 QUANT_AC_START %assign x 0 %rep 8 QUANT_1x8 [eax+x], [ecx+x], [edx+x] %assign x (x+16) %endrep ret %endmacro %define QUANT_1x8 SSE2_QUANT_1x8 QUANT_SSE sse2 %ifdef HAVE_SSE3 %define QUANT_1x8 SSSE3_QUANT_1x8 QUANT_SSE ssse3 %endif ;============================================================================= ; dequant ;============================================================================= %macro DEQUANT16_L_1x4 3 ;;; %1 dct[y][x] ;;; %2,%3 dequant_mf[i_mf][y][x] ;;; mm5 i_qbits movq mm1, %2 movq mm2, %3 movq mm0, %1 packssdw mm1, mm2 pmullw mm0, mm1 psllw mm0, mm5 movq %1, mm0 %endmacro %macro DEQUANT16_R_1x4 3 ;;; %1 dct[y][x] ;;; %2,%3 dequant_mf[i_mf][y][x] ;;; mm5 -i_qbits ;;; mm6 f as words movq mm1, %2 movq mm2, %3 movq mm0, %1 packssdw mm1, mm2 pmullw mm0, mm1 paddw mm0, mm6 psraw mm0, mm5 movq %1, mm0 %endmacro %macro DEQUANT32_R_1x4 3 ;;; %1 dct[y][x] ;;; %2,%3 dequant_mf[i_mf][y][x] ;;; mm5 -i_qbits ;;; mm6 f as dwords ;;; mm7 0 movq mm0, %1 movq mm1, mm0 punpcklwd mm0, mm0 punpckhwd mm1, mm1 movq mm2, mm0 movq mm3, mm1 pmulhw mm0, %2 pmulhw mm1, %3 pmullw mm2, %2 pmullw mm3, %3 pslld mm0, 16 pslld mm1, 16 paddd mm0, mm2 paddd mm1, mm3 paddd mm0, mm6 paddd mm1, mm6 psrad mm0, mm5 psrad mm1, mm5 packssdw mm0, mm1 movq %1, mm0 %endmacro ;----------------------------------------------------------------------------- ; void x264_dequant_4x4_mmx( int16_t dct[4][4], int dequant_mf[6][4][4], int i_qp ) ;----------------------------------------------------------------------------- %macro DEQUANT_WxH 3 cglobal %1 mov edx, [esp+12] ; i_qp imul eax, edx, 0x2b shr eax, 8 ; i_qbits = i_qp / 6 lea ecx, [eax+eax*2] sub edx, ecx sub edx, ecx ; i_mf = i_qp % 6 shl edx, %3+2 add edx, [esp+8] ; dequant_mf[i_mf] mov ecx, [esp+4] ; dct sub eax, %3 jl .rshift32 ; negative qbits => rightshift .lshift: movd mm5, eax mov eax, 8*(%2-1) .loopl16 %rep 2 DEQUANT16_L_1x4 [ecx+eax], [edx+eax*2], [edx+eax*2+8] sub eax, byte 8 %endrep jge .loopl16 nop ret .rshift32: neg eax picpush ebx picgetgot ebx movq mm6, [pd_1 GOT_ebx] picpop ebx movd mm5, eax pxor mm7, mm7 pslld mm6, mm5 psrld mm6, 1 mov eax, 8*(%2-1) .loopr32 %rep 2 DEQUANT32_R_1x4 [ecx+eax], [edx+eax*2], [edx+eax*2+8] sub eax, byte 8 %endrep jge .loopr32 nop ret %endmacro DEQUANT_WxH x264_dequant_4x4_mmx, 4, 4 DEQUANT_WxH x264_dequant_8x8_mmx, 16, 6