www.pudn.com > lame3(mp3编码程序和资料).zip > choose_table.nas


; new count bit routine
;	part of this code is origined from
;	new GOGO-no-coda (1999, 2000)
;	Copyright (C) 1999 shigeo
;	modified by Keiichi SAKAI

%include "nasm.h"

	globaldef	choose_table_MMX
	globaldef	MMX_masking

	externdef	largetbl
	externdef	t1l
	externdef	table23
	externdef	table56
	externdef	tableABC
	externdef	tableDEF
	externdef	linbits32
	externdef	choose_table_H

	segment_data
	align	16
D14_14_14_14	dd	0x000E000E, 0x000E000E
D15_15_15_15	dd	0xfff0fff0, 0xfff0fff0
mul_add		dd	0x00010010, 0x00010010
mul_add23	dd	0x00010003, 0x00010003
mul_add56	dd	0x00010004, 0x00010004

choose_jump_table_L:
	dd	table_MMX.L_case_0
	dd	table_MMX.L_case_1
	dd	table_MMX.L_case_2
	dd	table_MMX.L_case_3
	dd	table_MMX.L_case_45
	dd	table_MMX.L_case_45
	dd	table_MMX.L_case_67
	dd	table_MMX.L_case_67
	dd	table_MMX.L_case_8_15
	dd	table_MMX.L_case_8_15
	dd	table_MMX.L_case_8_15
	dd	table_MMX.L_case_8_15
	dd	table_MMX.L_case_8_15
	dd	table_MMX.L_case_8_15
	dd	table_MMX.L_case_8_15
	dd	table_MMX.L_case_8_15

	segment_code
;
; use MMX
;

	align	16
; int choose_table(int *ix, int *end, int *s)
choose_table_MMX:
	mov	ecx,[esp+4]	;ecx = begin
	mov	edx,[esp+8]	;edx = end
	xor	eax,eax
	sub	ecx,edx		;ecx = begin-end(should be minus)
 	pxor	mm0,mm0		;mm0=[0:0]
	pxor	mm1,mm1		;mm1=[0:0]
	test	ecx,8
	jz	.lp

	movq	mm1,[edx+ecx]
	add	ecx,8
	jz	.exit

	align	4
.lp:
	movq	mm4,[edx+ecx]
	movq	mm5,[edx+ecx+8]
	add	ecx,16
	psubusw	mm4,mm0	; $BK\Ev$O(B dword $B$G$J$$$H$$$1$J$$$N$@$,(B
	psubusw	mm5,mm1	; $B$=$s$J%3%^%s%I$O$J$$(B :-p
	paddw	mm0,mm4 ; $B$,(B, $B$3$3$G07$&CM$NHO0O$O(B 8191+15 $B0J2<$J$N$GLdBj$J$$(B
	paddw	mm1,mm5
	jnz	.lp
.exit:
	psubusw	mm1,mm0	; $B$3$l$bK\Ev$O(B dword $B$G$J$$$H$$$1$J$$(B
	paddw	mm0,mm1

	movq	mm4,mm0
	punpckhdq	mm4,mm4
	psubusw	mm4,mm0	; $B$3$l$bK\Ev$O(B dword $B$G$J$$$H$$$1$J$$(B
	paddw	mm0,mm4
	movd	eax,mm0

	cmp	eax,15
	ja	.with_ESC
	jmp	[choose_jump_table_L+eax*4]

.with_ESC1:
	emms
	mov	ecx, [esp+12]	; *s
	mov	[ecx], eax
	or	eax,-1
	ret

.with_ESC:
	cmp	eax, 8191+15
	ja	.with_ESC1

	sub	eax,15
	push	ebx
	push	esi
	bsr	eax, eax
%assign _P 4*2
	movq    mm5, [D15_15_15_15]
	movq	mm6, [D14_14_14_14]
	movq	mm3, [mul_add]

	mov	ecx, [esp+_P+4]		; = ix
;	mov	edx, [esp+_P+8]		; = end
	sub	ecx, edx

	xor	esi, esi	; sum = 0
	pxor	mm7, mm7	; linbits_sum, 14$B$r1[$($?$b$N$N?t(B
	test    ecx, 8
	jz	.H_dual_lp1

	movq	mm0, [edx+ecx]
	packssdw	mm0,mm7
	movq	mm2, mm0
	paddusw	mm0, mm5	; mm0 = min(ix, 15)+0xfff0
	pcmpgtw	mm2, mm6	; 14$B$h$jBg$-$$$+!)(B
	psubw	mm7, mm2	; 14$B$h$jBg$-$$$H$-(B linbits_sum++;
	pmaddwd	mm0, mm3	; {0, 0, y, x}*{1, 16, 1, 16}
	movd	ebx, mm0
	mov	esi, [largetbl+ebx*4+(16*16+16)*4]
	add	ecx,8

	jz	.H_dual_exit

	align   4
.H_dual_lp1:
	movq	mm0, [edx+ecx]
	movq	mm1, [edx+ecx+8]
	packssdw	mm0,mm1
	movq	mm2, mm0
	paddusw	mm0, mm5	; mm0 = min(ix, 15)+0xfff0
	pcmpgtw	mm2, mm6	; 14$B$h$jBg$-$$$+!)(B
	pmaddwd	mm0, mm3	; {y, x, y, x}*{1, 16, 1, 16}
	movd	ebx, mm0
	punpckhdq	mm0,mm0
	add	esi, [largetbl+ebx*4+(16*16+16)*4]
	movd	ebx, mm0
	add	esi, [largetbl+ebx*4+(16*16+16)*4]
	add	ecx, 16
	psubw	mm7, mm2	; 14$B$h$jBg$-$$$H$-(B linbits_sum++;
	jnz	.H_dual_lp1

.H_dual_exit:
	pmov	mm1,mm7
	punpckhdq	mm7,mm7
	paddd	mm7,mm1
	punpckldq	mm7,mm7

	pmaddwd	mm7, [linbits32+eax*8]	; linbits
	mov	ax, [choose_table_H+eax*2]

	movd	ecx, mm7
	punpckhdq	mm7,mm7
	movd	edx,mm7
	emms
	shl	edx, 16
	add	ecx, edx

	add	ecx, esi

	pop	esi
	pop	ebx

	mov	edx, ecx
	and	ecx, 0xffff	; ecx = sum2
	shr	edx, 16	; edx = sum

	cmp	edx, ecx
	jle	.chooseE_s1
	mov	edx, ecx
	shr	eax, 8
.chooseE_s1:
	mov	ecx, [esp+12] ; *s
	and	eax, 0xff
	add	[ecx], edx
	ret

table_MMX.L_case_0:
	emms
	ret

table_MMX.L_case_1:
	emms
	mov	eax, [esp+12] ; *s
	mov	ecx, [esp+4] ; *ix
	sub	ecx, edx
	push	ebx
.lp:
	mov	ebx, [edx+ecx]
	add	ebx, ebx
	add	ebx, [edx+ecx+4]
	movzx	ebx, byte [ebx+t1l]
	add	[eax], ebx
	add	ecx, 8
	jnz	.lp
	pop	ebx
	mov	eax, 1
	ret

table_MMX.L_case_45:
	push	dword 7
	mov	ecx, tableABC+9*8
	jmp	from3

table_MMX.L_case_67:
	push	dword 10
	mov	ecx, tableABC
	jmp	from3

table_MMX.L_case_8_15:
	push	dword 13
	mov	ecx, tableDEF
from3:
	mov	eax,[esp+8]	;eax = *begin
;	mov	edx,[esp+12]	;edx = *end

	push	ebx
	sub	eax, edx

	movq	mm5,[mul_add]
	pxor	mm2,mm2	;mm2 = sum

	test	eax, 8
	jz	.choose3_lp1
; odd length
	movq	mm0,[edx+eax]	;mm0 = ix[0] | ix[1]
	packssdw	mm0,mm2

	pmaddwd	mm0,mm5
	movd	ebx,mm0

	movq	mm2,  [ecx+ebx*8]

	add	eax,8
	jz	.choose3_exit

	align	4
.choose3_lp1
	movq	mm0,[edx+eax]
	movq	mm1,[edx+eax+8]
	packssdw	mm0,mm1 ;mm0 = ix[0]|ix[1]|ix[2]|ix[3]
	pmaddwd	mm0,mm5
	movd	ebx,mm0
	punpckhdq	mm0,mm0
	paddd	mm2, [ecx+ebx*8]
	movd	ebx,mm0
	add	eax,16
	paddd	mm2, [ecx+ebx*8]
	jnz	.choose3_lp1
.choose3_exit
;	xor	eax,eax
	movd	ebx, mm2
	punpckhdq	mm2,mm2
	mov	ecx, ebx
	and	ecx, 0xffff	; ecx = sum2
	shr	ebx, 16	; ebx = sum1
	movd	edx, mm2	; edx = sum

	cmp	edx, ebx
	jle	.choose3_s1
	mov	edx, ebx
	inc	eax
.choose3_s1:
	emms
	pop	ebx
	cmp	edx, ecx
	jle	.choose3_s2
	mov	edx, ecx
	mov	eax, 2
.choose3_s2:
	pop	ecx
	add	eax, ecx
	mov	ecx, [esp+12] ; *s
	add	[ecx], edx
	ret

table_MMX.L_case_2:
	push	dword 2
	mov	ecx,table23
	pmov	mm5,[mul_add23]
	jmp	from2
table_MMX.L_case_3:
	push	dword 5
	mov	ecx,table56
	pmov	mm5,[mul_add56]
from2:
	mov	eax,[esp+8]	;eax = *begin
;	mov	edx,[esp+12]	;edx = *end
	push	ebx
	push	edi

	sub	eax, edx
	xor	edi, edi
	test	eax, 8
	jz	.choose2_lp1
; odd length
	movq	mm0,[edx+eax]	;mm0 = ix[0] | ix[1]
	pxor	mm2,mm2		;mm2 = sum
	packssdw	mm0,mm2

	pmaddwd	mm0,mm5
	movd	ebx,mm0

	mov	edi,  [ecx+ebx*4]

	add	eax,8
	jz	.choose2_exit

	align	4
.choose2_lp1
	movq	mm0,[edx+eax]
	movq	mm1,[edx+eax+8]
	packssdw	mm0,mm1 ;mm0 = ix[0]|ix[1]|ix[2]|ix[3]
	pmaddwd	mm0,mm5
	movd	ebx,mm0
	punpckhdq	mm0,mm0
	add	edi, [ecx+ebx*4]
	movd	ebx, mm0
	add	edi, [ecx+ebx*4]
	add	eax,16
	jnc	.choose2_lp1
.choose2_exit
	mov	ecx, edi
	pop	edi
	pop	ebx
	pop	eax ; table num.
	emms

	mov	edx, ecx
	and	ecx, 0xffff	; ecx = sum2
	shr	edx, 16	; edx = sum1

	cmp	edx, ecx
	jle	.choose2_s1
	mov	edx, ecx
	inc	eax
.choose2_s1:
	mov	ecx, [esp+12] ; *s
	add	[ecx], edx
	ret

	end