www.pudn.com > x28x_FFT.rar > cfft32c.asm


;======================================================================== 
; 
; File Name     : cfft.asm 
;  
; Originator    : Advanced Embeeded Control 
;                 Texas Instruments Inc. 
;  
; Description   : This file contain source code for complex FFT 
;                
; Date          : 26/02/2002 (dd/mm/yyyy) 
;========================================================================     
; Routine Type  : C Callable 
; Description   : 
; void FFTC_calc(FFTxxxx_handle)  
; This function implements complex FFT using radix-2 DIT algo 
;  
; void FFT128C_init(void) 
; This function copies the Twiddle factors from load time address to  
; its run time address 
;======================================================================== 
; COMPLEX FFT MODULES                
;----------------------------------------------------------------------- 
;    typedef struct {                 
;       long *ipcbptr; 
;       long *tfptr;                
;       int size; 
;       int nrstage;              
;       long *magptr; 
;       long *winptr;  
;       long peakmag; 
;       int peakfrq;           
;       void (*init)(void);           
;       void (*izero)(void *);           
;       void (*calc)(void *);            
;       void (*mag)(void *);             
;       void (*win)(void *);               
;       }FFTxxxC;                         
;                                              
;======================================================================== 
 
                .include "sel_q.asm" 
                .def    _CFFT32_calc 
 
COS45K_LSW      .set    799Bh 
COS45K_MSW      .set    5A82h 
;=============================================================================== 
; FFT - Computation Routine  (FFT128R_calc) 
;=============================================================================== 
;   * This routine takes the 128 point complex data sequence, in bit reversed form   
;   * It implements Decimation in time FFT algorithm 
;   * Performs inplace computation 
;   * Output in-order data "IPCB"----> Xr(0),Xi(0),Xr(1),Xi(1)...Xr(127)&Xi(127) 
;===============================================================================             
;---------------------------------------------------- 
; MACRO 'ZEROI'      
; Number of WORDS/Number of Cycles 12 
; USAGE: Called if the TWIDDLE FACTOR: 1 
;  
; P=PR+jPI, Q=QR+jQI, W=WR-jWI 
; P'=P+W*Q and Q'=P-W*Q 
; 
; POINTERS AT ENTRY 
; AR6->(PR,PI,PR+1,PI+1,PR+2,PI+2,....) 
; AR2->(QR,QI,QR+1,QI+1,QR+2,QR+2,....) 
; COMPUTATION PERFORMED: INPLACE COMPUTATION 
; AR6   ->      PR'=(PR+QR)/2  
; AR2   ->      QR'=(PR-QR)/2 
; AR6+1 ->      PI'=(PI+QI)/2 
; AR2+1 ->      QI'=(PI-QI)/2 
; PR and PR', PI and PI', QR and QR', QI and QI' are 
; stored in the same locations 
; POINTERS AT EXIT 
; AR6->(PR+1,PI+1,....)  
; AR2->(QR+1,QI+1,....) 
;------------------------------------------------------ 
;                                                   (Content of REG after exec.) 
ZEROI   .macro                      ;                   AR6     AR2               
            MOVL    ACC,*XAR6       ; ACC:=PR           PR      QR   
            SFR     ACC,#1          ; ACC:=PR/2         PR      QR 
            MOVL    P,*XAR2         ; P=QR              PR      QR 
            ADDL    ACC,P<(PR,PI,PR+1,PI+1,PR+2,PI+2,....) 
; AR2->(QR,QI,QR+1,QI+1,QR+2,QR+2,....) 
; COMPUTATION PERFORMED: INPLACE COMPUTATION 
; AR6   ->      PR'=(PR+QI)/2  
; AR2   ->      QR'=(PR-QI)/2 
; AR6+1 ->      PI'=(PI-QR)/2 
; AR2+1 ->      QI'=(PI+QR)/2 
; PR and PR', PI and PI', QR and QR', QI and QI' are 
; stored in the same locations  
; POINTERS AT EXIT 
; AR6->(PR+1,PI+1,....)  
; AR2->(QR+1,QI+1,....) 
;------------------------------------------------------          
;                                                   (Content of REG after exec.) 
PBY2I   .macro                      ;                   AR6     AR2   
             
            MOVL    XAR5,*XAR2++    ; XAR5=QR           PR      QI 
            MOVL    ACC,*XAR6       ; ACC:=PR           PR      QI 
            SFR     ACC,#1          ; ACC:=PR/2         PR      QI 
            MOVL    P,*XAR2         ; P:=QI             PR      QI 
            ADDL    ACC,P<(PR,PI,PR+1,PI+1,PR+2,PI+2,....) 
; AR2->(QR,QI,QR+1,QI+1,QR+2,QR+2,....) 
; CALCULATION PERFORMED: INPLACE COMPUTATION 
; AR6   ->      PR'=(PR+QI*W+QR*W)/2   
; AR2   ->      QR'=(PR-QI*W-QR*W)/2 
; AR6+1 ->      PI'=(PI+QI*W-QR*W)/2 
; AR2+1 ->      QI'=(PI-QI*W+QR*W)/2 
; PR and PR', PI and PI', QR and QR', QI and QI' are 
; stored in the same locations 
; POINTERS AT EXIT 
; AR6->(PR+1,PI+1,....)  
; AR2->(QR+1,QI+1,....) 
;------------------------------------------------------          
;                                                           (Content of REG after exec.) 
PBY4I   .macro                      ;                       PREG    AR6  AR2          
                                    ;                       -       PR   QR  
            MOVL    ACC,*XAR6       ; ACC=PR                -       PR   QR   
            SFR     ACC,#1          ; ACC=PR/2              -       PR   QR   
            QMPYL   P,XT,*XAR2      ; PREG=W*QR/2           W*QR/2  PR   QR   
            SUBL    ACC,P           ; ACC=(PR-W*QR)/2       W*QR/2  PR   QR 
            MOVL    XAR5,P          ; XAR5=W*QR/2 
            QMPYL   P,XT,*+XAR2[2]  ; ACC=(PR-W*QR)/2       W*QI/2  PR   QR 
            SUBL    ACC,P           ; ACC=(PR-W*QI-W*QR)/2  W*QI/2  PR   QR  
            MOVL    *XAR2++,ACC     ; QR'=(PR-W*QI-W*QR)/2  W*QI/2  PR   QI 
            NEG     ACC             ; ACC=-(PR-W*QI-W*QR)/2 W*QI/2  PR   QI 
            ADDL    *XAR6++,ACC     ; PR'=(PR+W*QI+W*QR)/2  W*QI/2  PI   QI 
 
            MOVL    ACC,*XAR6       ; ACC=PI                W*QI/2  PI   QI 
            SFR     ACC,#1          ; ACC=PI/2              W*QI/2  PI   QI 
            SUBL    ACC,P           ; ACC=(PI-W*QI)/2       W*QI/2  PI   QI 
            ADDL    ACC,XAR5        ; ACC=(PI-W*QI+W*QR)/2  W*QR/2  PI   QI 
            MOVL    *XAR2++,ACC     ; QI'=(PI-W*QI+W*QR)/2  W*QR/2  PI   QR+1 
            NEG     ACC             ; ACC=-(PI-W*QI+W*QR)/2 W*QR/2  PI   QR+1 
            ADDL    *XAR6++,ACC     ; PI'=(PI+W*QI-W*QR)/2  W*QR/2  PR+1 QR+1            
        .endm 
 
 
;---------------------------------------------------- 
; MACRO 'P3BY4I'    SPM set to 1bit right shift, XT=w     
; Number of WORDS/Number of Cycles 16 
; USAGE: Called if the TWIDDLE FACTOR: COS(90+45)-jSIN(90+45)=-0.707-j0.707  
;  
; P=PR+jPI, Q=QR+jQI, W=WR-jWI 
; P'=P+W*Q and Q'=P-W*Q 
; 
; POINTERS AT ENTRY  
; TREG=W=|SIN(135)|=|COS(135)|=05a82h 
; AR6->(PR,PI,PR+1,PI+1,PR+2,PI+2,....) 
; AR2->(QR,QI,QR+1,QI+1,QR+2,QR+2,....) 
; CALCULATION PERFORMED: INPLACE COMPUTATION 
; AR6   ->      PR'=(PR+QI*W-QR*W)/2   
; AR2   ->      QR'=(PR-QI*W+QR*W)/2 
; AR6+1 ->      PI'=(PI-QI*W-QR*W)/2 
; AR2+1 ->      QI'=(PI+QI*W+QR*W)/2 
; PR and PR', PI and PI', QR and QR', QI and QI' are 
; stored in the same locations  
; POINTERS AT EXIT 
; AR6->(PR+1,PI+1,....)  
; AR2->(QR+1,QI+1,....) 
;------------------------------------------------------          
;                                                           (Content of REG after exec.) 
P3BY4I  .macro                      ;                       AR5     PREG    AR6  AR2  
                                    ;                       -       PR   QR  
            MOVL    ACC,*XAR6       ; ACC=PR                -       PR   QR   
            SFR     ACC,#1          ; ACC=PR/2              -       PR   QR   
            QMPYL   P,XT,*XAR2      ; PREG=W*QR/2           W*QR/2  PR   QR  
            ADDL    ACC,P           ; ACC=(PR+W*QR)/2       W*QR/2  PR   QR 
            MOVL    XAR5,P          ; XAR5=W*QR/2           W*QR/2  PR   QR 
            QMPYL   P,XT,*+XAR2[2]  ; ACC=(PR+W*QR)/2       W*QI/2  PR   QR 
            SUBL    ACC,P           ; ACC=(PR-W*QI+W*QR)/2  W*QI/2  PR   QR  
            MOVL    *XAR2++,ACC     ; QR'=(PR-W*QI+W*QR)/2  W*QI/2  PR   QI 
            NEG     ACC             ; ACC=-(PR-W*QI+W*QR)/2 W*QI/2  PR   QI 
            ADDL    *XAR6++,ACC     ; PR'=(PR+W*QI-W*QR)/2  W*QI/2  PI   QI 
 
            MOVL    ACC,*XAR6       ; ACC=PI                W*QI/2  PI   QI 
            SFR     ACC,#1          ; ACC=PI/2              W*QI/2  PI   QI 
            ADDL    ACC,P           ; ACC=(PR+W*QI)/2       W*QI/2  PI   QI 
            ADDL    ACC,XAR5        ; ACC=(PI+W*QI+W*QR)/2  W*QR/2  PI   QI 
            NOP     *,ARP2 
            MOVL    *0++,ACC        ; QI'=(PI+W*QI+W*QR)/2  W*QR/2  PI   QR+1 
            NEG     ACC             ; ACC=-(PI+W*QI+W*QR)/2 W*QR/2  PI   QR+1 
            ADDL    *XAR6,ACC       ; PI'=(PI-W*QI-W*QR)/2  W*QR/2  PI   QR+1 
            NOP     *0++            ;                       W*QR/2  PR+1 QR+1 
        .endm   
 
 
;---------------------------------------------------- 
; MACRO 'BFLY'       SPM set to 1bit right shift 
; Number of WORDS/Number of Cycles 21 
; USAGE: General Butterfly RADIX 2 -> TWIDDLE FACTOR W=WR-jWI  
;  
; P=PR+jPI, Q=QR+jQI, W=WR-jWI 
; P'=P+W*Q and Q'=P-W*Q 
; 
; REGISTER USED: XAR0,XAR2,XAR3,XAR4,XAR5,XAR6,ACC,XT,P  
; 
; POINTERS AT ENTRY 
; AR6->(PR,PI,PR+1,PI+1,PR+2,PI+2,....) 
; AR2->(QR,QI,QR+1,QI+1,QR+2,QR+2,....) 
; AR3-> Real Part of Twiddle Factor: WR(COSx) 
; AR4-> Imaginary Part of Twiddle Factor: WI(SINx) 
; AR5-> Temporary Variable 
; CALCULATION PERFORMED: INPLACE COMPUTATION  
; AR6   ->      PR'=(PR+QR*WR+QI*WI)/2   
; AR2   ->      QR'=(PR-QR*WR-QI*WI)/2 
; AR6+1 ->      PI'=(PI+QI*WR-QR*WI)/2 
; AR2+1 ->      QI'=(PI-QI*WR+QR*WI)/2 
; PR and PR', PI and PI', QR and QR', QI and QI' are 
; stored in the same locations 
; POINTERS AT EXIT 
; AR6->(PR+1,PI+1,....)  
; AR2->(QR+1,QI+1,....) 
; AR3-> Real Part of Next Twiddle Factor 
; AR4-> Imaginary Part ofNext Twiddle Factor 
;------------------------------------------------------             
     
BFLY    .macro  p                   ;                         XT    AR6     AR2     AR3     AR4   
                                    ;                         -     PR      QR      WR      WI  
            MOVL    XT,*XAR3        ; XT=WR                   WR    PR      QR      WR      WI 
            NOP     *0++            ;                         WR    PR      QR      WR+n    WI 
            QMPYL   P,XT,*XAR2++    ; P=QR*WR                 WR    PR      QI      WR+n    WI 
            QMPYL   ACC,XT,*,ARP4   ; ACC=QI*WR               WR    PR      QI      WR+n    WI 
            MOVL    *-SP[TEMP],P    ; TEMP=QR*WR              WR    PR      QI      WR+n    WI 
 
            MOVL    XT,*0++         ; XT=WI                   WR    PR      QI      WR+n    WI+n 
            QMPYL   P,XT,*--XAR2    ; P=WI*QR                 WR    PR      QR      WR+n    WI+n 
            SUBL    ACC,P           ; ACC=(QI*WR-QR*WI)       WR    PR      QR      WR+n    WI+n 
             
            .if(TF_QFMAT==Q30) 
            LSL     ACC,#1          ; ACC=(QI*WR-QR*WI) (Q30) 
            .endif 
 
            MOVL    XAR5,ACC        ; XAR5=(QI*WR-QR*WI) (Q30) 
                         
            QMPYL   ACC,XT,*+XAR2[2]; ACC=WI*QI              WR    PR      QR      WR+n    WI+n 
            ADDL    ACC,*-SP[TEMP]  ; ACC=(QR*WR+QI*WI)      WR    PR      QR      WR+n    WI+n 
 
            .if(TF_QFMAT==Q30) 
            LSL     ACC,#1          ; ACC=(QR*WR+QI*WI) in Q30 
            .endif 
 
            MOVL    P,*XAR6         ; P=PR                    WR    PR      QR      WR+n    WI+n 
            ADDL    ACC,P<(R1,I1) 
; AR2->(R2,I2) 
; AR3->(R3,I3) 
; AR4->(R4,I4) 
; AR5-> Temporary Variable 
; CALCULATION PERFORMED: INPLACE COMPUTATION  
; AR6   ->      R1'=(R1+R2+R3+R4)/4 
; AR2   ->      R2'=(R1-R2+I3-I4)/4 
; AR3   ->      R3'=(R1+R2-R3-R4)/4 
; AR4   ->      R4'=(R1-R2-I3+I4)/4 
; AR6+1 ->      I1'=(I1+I2+I3+I4)/4 
; AR2+1 ->      I2'=(I1-I2-R3+R4)/4 
; AR3+1 ->      I3'=(I1+I2-I3-I4)/4 
; AR4+1 ->      I4'=(I1-I2+R3-R4)/4 
; POINTERS AT EXIT 
; AR6->(R5,I5)  
; AR2->(R6,I6) 
; AR3->(R7,I7)  
; AR4->(R8,I8) 
;---------------------------------------------------          
;                                                           (Content of REG after exec.)                       
COMBO   .macro                      ;                       AR6 AR2 AR3 AR4   
            MOVL    ACC,*XAR3       ; ACC=R3                R1  R2  R3  R4   
            SFR     ACC,#1          ; ACC=R3/2 
            MOVL    P,*XAR4         ; P=R4 
            SUBL    ACC,P<FFT_handle              
            MOVL    XAR2,*XAR7      ; XAR2=ipcbptr 
            MOVL    XAR3,XAR2       ; XAR3=ipcbptr 
            MOVL    XAR4,XAR2       ; XAR4=ipcbptr 
             
            MOVL    XAR6,XAR2       ; XAR6=ipcbptr  
            ADDB    XAR2,#4         ; XAR2=ipcbptr+4 
            ADDB    XAR3,#8         ; XAR3=ipcbptr+8 
            ADDB    XAR4,#12        ; XAR4=ipcbptr+12 
            MOVB    XAR0,#14        ; XAR0=14 
                                                                                                           
;----------------------------------------------------------------- 
; Stage 1 & 2 - Using the RADIX 4 COMBO Macro 
;----------------------------------------------------------------- 
            MOVL    *-SP[2],XAR7    ; (SP-2)=FFT_handle 
            MOV     ACC,*+XAR7[4]<<14 
            SUB     ACC,#1<<14 
            MOV     AR7,AH          ; Loop (N/4 - 1) times 
           
STAGE1_2_LP: 
            COMBO 
            BANZ    STAGE1_2_LP,AR7--   
 
;------------------------------------------------------------------ 
; Stage 3 - Using ZEROI, PBY4I,PBY2I,P3BY4I Macros 
;------------------------------------------------------------------ 
            MOVL    XAR7,*-SP[2]    ; XAR7=FFT_handle 
            MOVL    XAR6,*XAR7      ; XAR6=ipcbptr  
            MOVL    XAR2,*XAR7      ; XAR2=ipcbptr 
            ADDB    XAR2,#16        ; XAR2=ipcbptr+16 
            MOVB    XAR0,#18 
 
            MOV     AL,#COS45K_LSW 
            MOV     AH,#COS45K_MSW 
            MOVL    XT,ACC          ; XT=COS(45) in Q31 format 
                 
            MOV     ACC,*+XAR7[4]<<13 
            SUB     ACC,#1<<13 
            MOV     AR1,AH          ; Loop (N/8 - 1) times 
        
STAGE3_LP:  ZEROI 
            PBY4I 
            PBY2I 
            P3BY4I                  ; AR7 at end, Use *0+ modify 
            BANZ    STAGE3_LP,AR1-- 
                                                                                             
;-------------------------------------------------------- 
; Stage 4 to LOG2(N) - Using BFLY Macro 
;--------------------------------------------------------    
; Initialisation 
; 1. Initialise the Butterfly loop(BLC), Group loop(GLC) & Stage loop counter(SLC) 
; 2. Initialise the Data offset(DOST) and twiddle factor offset (TOST)           
            MOV     ACC,*+XAR7[4]<<12 ; AH=(N/16) 
            MOV     *-SP[GLCB],AH   ; GLCB=N/16 
            MOV     *-SP[GLC],AH    ; GLC=N/16 
            MOVB    *-SP[BLC],#8,UNC; BLC=8 
 
; Twiddle factor offset 
            LSL     ACC,#1 
            MOV     T,AH            ; T=N/8 
            ADDB    XAR7,#13 
            MPY     ACC,T,*XAR7     ; ACC=ratio*N/8 
            SUBB    XAR7,#13             
            MOV     *-SP[TOST],AL   ; TOST=ratio*N/8 
             
            MOV     ACC,*+XAR7[5]   ; ACC=nrstage 
            SUB     ACC,#3          ; ACC=nrstage-3 
            MOV     *-SP[SLC],AL    ; SLC=nrState-3 
            MOVB    *-SP[DOST],#32,UNC ; DOST=32 
 
            MOV     ACC,*+XAR7[4]<<15  ; AH=N/2 
            ADDB    XAR7,#13         
            MOV     T,*XAR7         ; T=ratio 
            SUBB    XAR7,#13         
            MPY     ACC,T,AH        ; ACC=ratio*N/2 
            ADDL    ACC,*+XAR7[2]   ;  
            MOVL    *-SP[WRWIOST],ACC  ; WRWIOST=tfptr+ratio*N/2 ----> COS(Angle) 
 
 
SLP:                                ; Stage loop  
            MOVL    XAR6,*XAR7      ; XAR6=ipcbptr ----->  P           
            MOVL    ACC,*XAR7       ; ACC=ipcbptr 
            ADD     ACC,*-SP[DOST]  ; ACC=ipcbptr+DOST 
            MOVL    XAR2,ACC        ; XAR2->ipcbptr+DOST -----> Q 
             
            MOVZ    AR0,*-SP[TOST]  ; AR0=TOST 
 
             
GLP:                                ; Group loop 
            MOVL    XAR3,*-SP[WRWIOST] 
            MOVL    XAR4,*+XAR7[2]  ; XAR4=tfptr ---> SIN(Angle) 
             
            MOVZ    AR1,*-SP[BLC]   ; AR1=BLC 
            SUBB    XAR1,#1 
BLP:                                ; Butterfly loop 
            BFLY    7 
            BANZ    BLP,AR1--       ; Butterfly loop                       
 
; Operations performed between the group 
; 1. Manuplating the P & Q pointer to point to the next group 
; 2. Reinitialising the WI and WR twiddle factor pointer   
; 3. Decrementing the Group Count by 1, till all the group in the stage is computed 
; 4. Reinitialising the Butterfly loop index for next group 
                                    ; ARP=AR2 
            MOVU    ACC,*-SP[DOST]  ; ACC=DOST 
            ADDL    XAR6,ACC         
            ADDL    XAR2,ACC         
            DEC     *-SP[GLC] 
            BF      GLP,NEQ         ; Group Loop  
   
; Operation Performed, between the stage 
; 1. Mutiply the data offset by 2 
; 2. Divide the Group count by 2 
; 3. Divide the Twiddle offset by 2 
; 4. Multiply the butterfly count by 2 
; 5. Decrementing the stage count by 1, till all the stage gets over 
; 6. Reinitialising the P & Q data pointer to the first group of next stage          
            MOV     ACC,*-SP[DOST]<<1 
            MOV     *-SP[DOST],AL   ; DOST=DOST*2 
 
            MOV     ACC,*-SP[BLC]<<1 
            MOV     *-SP[BLC],AL    ; BLC=BLC*2 
 
            MOV     ACC,*-SP[TOST]<<15 
            MOV     *-SP[TOST],AH   ; TOST=TOST/2 
 
            MOV     ACC,*-SP[GLCB]<<15 
            MOV     *-SP[GLCB],AH   ; GLCB=GLCB/2 
            MOV     *-SP[GLC],AH    ; GLC=GLCB/2 
 
            DEC     *-SP[SLC] 
            BF      SLP,NEQ         ; Stage loop 
                      
            SPM     0               ; SPM set to 0 
             
            SUBB    SP,#FFT_LOCAL_FRAME ; Clear local frame 
            POP     XAR3            ; Context Restore 
            POP     XAR2 
            POP     XAR1  
            NASP                    ; Unalign Stack Pointer 
            LRETR