www.pudn.com > lame3(mp3園鷹殻會才彿創).zip > fft.nas
; for new GOGO-no-coda (1999/09)
; Copyright (C) 1999 shigeo
; special thanks to Keiichi SAKAI, URURI
%include "nasm.h"
globaldef fht_3DN
globaldef fht
externdef costab_fft
externdef sintab_fft
externdef gray_index
segment_data
align 16
D_MSB1_0 dd 0 ,0x80000000
D_SQRT2 dd 1.414213562,1.414213562
t_s0 dd 0 ;[ t_c:t_s]
t_c0 dd 0
t_c1 dd 0 ;[-t_s:t_c]
t_s1 dd 0
D_s1c1 dd 0, 0
D_Mc1s1 dd 0, 0
D_s2c2 dd 0, 0
D_Mc2s2 dd 0, 0
D_0_1 dd 1.0, 0.0
S_05 DD 0.5
S_00005 DD 0.0005
fht dd 0 ;関数ポインタ
segment_code
;************************************************************************
; by shigeo
; 99/08/16
; 23000clk 辛かった〜
; 18500clk bit reversal from gogo1 by URURI
;void fht(float *fz, int n);
align 16
fht_3DN:
push ebx
push esi
push edi
push ebp
%assign _P 4*4
;まず最初のループ... はfht()の外へ移動
mov esi,[esp+_P+4] ;esi=fz
mov ecx,[esp+_P+8] ;ecx=n
;メインループ
movq mm7,[D_MSB1_0] ;mm7=[1<<31:0]
%assign LOCAL_STACK 16
sub esp,LOCAL_STACK
%assign _P (_P+LOCAL_STACK)
xor eax,eax
mov [esp],eax ;k=0
%define k dword [esp]
%define kx dword [esp+4]
%define fn dword [esp+8]
.lp30: ;k=0; do{
mov ecx,k
add ecx,2
mov k,ecx
mov eax,1
shl eax,cl ;eax=k1 = 1<>1
mov kx,esi ;保存(後で使う)
mov edi,[esp+_P+4] ;edi=fi=fz
lea ebp,[edi+esi*4] ;ebp=gi=fz+kx
mov esi,[esp+_P+8] ;esi=n
lea esi,[edi+esi*4] ;esi=fn=fz+n
movq mm6,[D_SQRT2] ;mm6=[√2:√2]
.lp31: ;fn=fz+n; do{ FLOAT g0,f0,f1,...
movd mm0,[edi] ;mm0=[0:fi[ 0]]
movd mm1,[edi+eax*4] ;mm1=[0:fi[k1]]
punpckldq mm0,mm0 ;mm0=[fi_0 :fi_0 ]
punpckldq mm1,mm1 ;mm1=[fi_k1:fi_k1]
movd mm2,[edi+ebx*4]
movd mm3,[edi+ecx*4]
punpckldq mm2,mm2 ;mm2=[fi_k2:fi_k2]
punpckldq mm3,mm3 ;mm3=[fi_k3:fi_k3]
pxor mm1,mm7 ;mm1=[-fi_k1:fi_k1]
pxor mm3,mm7 ;mm3=[-fi_k3:fi_k3]
pfadd mm0,mm1 ;mm0=[f1:f0]=[fi_0 -fi_k1 : fi_0 +fi_k1]
pfadd mm2,mm3 ;mm2=[f3:f2]=[fi_k2-fi_k3 : fi_k2+fi_k3]
movq mm3,mm0 ;mm3=[f1:f0]
pfadd mm0,mm2 ;mm0=[f1+f3:f0+f2]
movd [edi],mm0 ;fi[0]=f0+f2
psrlq mm0,32 ;mm0=[0:f1+f3]
pfsub mm3,mm2 ;mm3=[f1-f3:f0-f2]
movd [edi+eax*4],mm0 ;fi[k1]=f1+f3
movd [edi+ebx*4],mm3 ;fi[k2]=f0-f2
psrlq mm3,32 ;mm3=[0:f1-f3]
movd [edi+ecx*4],mm3 ;fi[k3]=f1-f3
movd mm0,[ebp] ;mm0=[0:gi_0]
movd mm1,[ebp+eax*4] ;mm1=[0:gi_k1]
punpckldq mm0,mm0 ;mm0=[gi_0 :gi_0 ]
punpckldq mm1,mm1 ;mm1=[gi_k1:gi_k1]
movd mm2,[ebp+ebx*4] ;mm2=[0:gi_k2]
pxor mm1,mm7 ;mm1=[-gi_k1:gi_k1]
punpckldq mm2,[ebp+ecx*4] ;mm2=[gi_k3:gi_k2]
pfadd mm0,mm1 ;mm0=[g1:g0]=[gi_0 -gi_k1:gi_0 +gi_k1]
pfmul mm2,mm6 ;mm2=[g3:g2]=sqrt2 * [gi_k3:gi_k2]
movq mm1,mm0 ;mm1=[g1:g0]
pfadd mm0,mm2 ;mm0=[g1+g3:g0+g2]
movd [ebp],mm0 ;gi[0]=g0+g2
psrlq mm0,32 ;mm0=[0:g1+g3]
pfsub mm1,mm2 ;mm1=[g1-g3:g0-g2]
movd [ebp+eax*4],mm0 ;gi[k1]=g1+g3
movd [ebp+ebx*4],mm1 ;gi[k2]=g0-g2
psrlq mm1,32 ;mm1=[0:g1-g3]
movd [ebp+ecx*4],mm1 ;gi[k3]=g1-g3
lea edi,[edi+edx*4] ;fi += k4
lea ebp,[ebp+edx*4] ;gi += k4
cmp edi,esi
jc near .lp31 ;}while(fi