www.pudn.com > scaling.rar > scale_h2_h.asm


* ========================================================================= * 
*   NAME                                                                    * 
*       scale_h2 -- Horizontal scaling by 2                                 * 
*                                                                           * 
*   USAGE                                                                   * 
*       This routine has the following C prototype:                         * 
*                                                                           * 
*       void scale_h2_cn(                                                   * 
*                            const unsigned char *restrict inp,             * 
*                            int   cols,                                    * 
*                            unsigned char *restrict outp                   * 
*                        )                                                  * 
*                                                                           * 
*                                                                           * 
*       The scale_h2 function accepts an input array "inp" that contains    * 
*       "cols" contiguous pixels and produces an output array "outp" of     * 
*       "cols/2" pixels. Simple averaging and rounding are performed.       * 
*       This step may also be viewed as simple bi-linear interpolation.     * 
*                                                                           * 
*   DESCRIPTION                                                             * 
*       The following C code model shows the behaviour of the code.         * 
*                                                                           * 
*       void scale_h2_cn(                                                   * 
*                          const unsigned char *restrict inp,               * 
*                          int   cols,                                      * 
*                          unsigned char *restrict outp                     * 
*                        )                                                  * 
*        {                                                                  * 
*            int i;                                                         * 
*                                                                           * 
*            for( i = 0; i < (cols >> 1); i++)                              * 
*            {                                                              * 
*                outp[i] = (inp[2*i] + inp[(2*i)+1] + 1) >> 1;              * 
*            }                                                              * 
*        }                                                                  * 
*                                                                           * 
*      Every two consecutive pixels of the input image are averaged to      * 
*      form an output pixel.                                                * 
*                                                                           * 
*   TECHNIQUES                                                              * 
*      This code can be used to process either one line, or an arbitrary    * 
*      number of lines as long as they are contiguous in memory.            * 
*                                                                           * 
*   ASSUMPTIONS                                                             * 
*      The input array is aligned on a double-word boundary. The output     * 
*      array is aligned on a double word boundary. The number of input      * 
*      pixels is a multiple of 32.                                          * 
*                                                                           * 
*   NOTES                                                                   * 
*     None                                                                  * 
*                                                                           * 
*   SOURCE                                                                  * 
*     None                                                                  * 
*                                                                           * 
*   MEMORY NOTE                                                             * 
*      No bank conflcits should occurr irrespective of the alignemt of      * 
*      the arrays in memory.                                                * 
*                                                                           * 
*   INTERRUPT NOTE                                                          * 
*      This code is interrupt tolerant though not interruptible.            * 
*                                                                           * 
*   CYCLES                                                                  * 
*      (5*cols)/32 - 5                                                      * 
*                                                                           * 
*   cols = 640,  cycles = 95.                                               * 
*                                                                           * 
* ------------------------------------------------------------------------- * 
*             Copyright (c) 2002 Texas Instruments, Incorporated.           * 
*                            All Rights Reserved.                           * 
* ========================================================================= * 
 
               .sect ".text:hand" 
               .include "scale_h2_h.h64" 
   
_scale_h2_asm: 
 
*S-------------------------------------------------------------------------S* 
*S                TI Proprietary Information: Internal Data                S* 
*S-------------------------------------------------------------------------S* 
*S Swipe6NG datecode: 020206 (12:26:58 AM) [REL] 
*S Swipe6NG commandline: 
*S swipe6ng -Ma -s -v -l -N -mv6400 prol.sa 
*S Target architecture: "C64xx ISA" 
*S-------------------------------------------------------------------------S* 
*S Total register usage: 6 (a:3 b:3) 
* ===================== SYMBOLIC REGISTER ASSIGNMENTS ===================== * 
        .asg            A4,         A_inp                       ; Preferred 
        .asg            B4,         B_cols                      ; Preferred 
        .asg            A6,         A_outp                      ; Preferred 
        .asg            B6,         B_inp                       ; Preferred 
        .asg            B8,         B_outp                      ; Preferred 
        .asg            A0,         A_pd                        ; Preferred 
        .asg            B3,         B_return                    ; Return 
        .asg            B30,        B_csr                       ; CSR 
        .asg            B31,        B_no_gie                    ; NO GIE 
* ========================================================================= * 
 
        ADD     .L2X    A_inp,      8,          B_inp           ;[ 4,0]  
||      MVC     .S2     CSR,        B_csr                       ; CSR 
 
        MVK     .S1     3,          A_pd                        ;[ 4,0] 
||      ADD     .S2X    A_outp,     8,          B_outp          ;[ 3,0] 
||      AND     .L2     B_csr,      -2,         B_no_gie        ; 
 
        ;==== Branch occurs   
 
*S-------------------------------------------------------------------------S* 
*S                TI Proprietary Information: Internal Data                S* 
*S-------------------------------------------------------------------------S* 
*S Swipe6NG datecode: 020206 (12:26:58 AM) [REL] 
*S Swipe6NG commandline: 
*S swipe6ng -Ma -s -v -2 -mv6400 --Bl scale_h2.cmd 
*S Target architecture: "C64xx ISA" 
*S-------------------------------------------------------------------------S* 
*S 
*S Resource MII: 4 
*S 
*S    L1         L1-unit hardware                             * Bound: 4 
*S    S1         S1-unit hardware                             * Bound: 4 
*S    D1         D1-unit hardware                               Bound: 3 
*S    M1         M1-unit hardware                             * Bound: 4 
*S    T1         T1-unit LD/ST transport (DAB1)                 Bound: 3 
*S    L2         L2-unit hardware                             * Bound: 4 
*S    S2         S2-unit hardware                             * Bound: 4 
*S    D2         D2-unit hardware                               Bound: 3 
*S    M2         M2-unit hardware                             * Bound: 4 
*S    T2         T2-unit LD/ST transport (DAB2)                 Bound: 3 
*S    M1h        M1-unit write port (hi half)                 * Bound: 4 
*S    M2h        M2-unit write port (hi half)                 * Bound: 4 
*S    xoper1     Unshared cross path on 1X                      Bound: 1 
*S    xoper2     Unshared cross path on 2X                      Bound: 1 
*S    slots      Instruction issue slots: 30                  * Bound: 4 
*S 
*S Recurrence MII: 3 
*S 
*S------------------------- Running Adjust Phase --------------------------S* 
*S-------------------------------------------------------------------------S* 
*S File   : scale_h2.cmd 
*S ResMII : 4 
*S RecMII : 3 
*S MII    : 4 
*S II     : 4 
*S No ILP : 0 
*S List   : 18 
*S Length : 16 
*S Start  : 0 
*S Stop   : 15 
*S Operations         : 30 
*S Avg. para @ II     : 7.500000 
*S Calc. Operations   : 30 
*S Avg. para calc @ II: 7.500000 
*S Live Too Long : 0 
*S Unroll Factor : 0 
*S-------------------------------------------------------------------------S* 
*S Total register usage: 29 (a:14 b:15) 
* ===================== SYMBOLIC REGISTER ASSIGNMENTS ===================== * 
        .asg            A4,         A_inp                       ; Preferred 
        .asg            B6,         B_inp                       ; Preferred 
        .asg            A6,         A_outp                      ; Preferred 
        .asg            B8,         B_outp                      ; Preferred 
        .asg            B4,         B_cols                      ; Preferred 
        .asg            A0,         A_pd                        ; Preferred 
        .asg            A9,         A_in7654_0 
        .asg            A8,         A_in3210_0 
        .asg            B19,        B_inFEDC_0 
        .asg            B18,        B_inBA98_0 
        .asg            A5,         A_in2103_0 
        .asg            A19,        A_in6547_0 
        .asg            B22,        B_inA98B_0 
        .asg            B20,        B_inEDCF_0 
        .asg            A20,        A_in6420_0 
        .asg            A3,         A_in7531_0 
        .asg            B20,        B_inECA8_0 
        .asg            B7,         B_inFDB9_0 
        .asg            A18,        A_outw0 
        .asg            B24,        B_outw_0 
        .asg            A17,        A_in7654_1 
        .asg            A16,        A_in3210_1 
        .asg            B17,        B_inFEDC_1 
        .asg            B16,        B_inBA98_1 
        .asg            A7,         A_in2103_1 
        .asg            A21,        A_in6547_1 
        .asg            B21,        B_inA98B_1 
        .asg            B9,         B_inEDCF_1 
        .asg            A7,         A_in6420_1 
        .asg            A20,        A_in7531_1 
        .asg            B21,        B_inECA8_1 
        .asg            B5,         B_inFDB9_1 
        .asg            A19,        A_outw_1 
        .asg            B23,        B_outw1 
        .asg            A19,        A_outw1 
        .asg            B22,        B_outw0 
* ========================================================================= * 
* =========================== PIPE LOOP PROLOG ============================ * 
        LDDW    .D2T2   *B_inp++[2],            B_inFEDC_0:B_inBA98_0   ;[ 1,1]  
||      LDDW    .D1T1   *A_inp++[2],            A_in7654_0:A_in3210_0   ;[ 1,1]  
||      MVC     .S2     B_no_gie,               CSR                     ; Int. 
 
        LDDW    .D2T2   *B_inp++[2],            B_inFEDC_1:B_inBA98_1   ;[ 2,1]  
||      LDDW    .D1T1   *A_inp++[2],            A_in7654_1:A_in3210_1   ;[ 2,1]  
         
        SHRU    .S2     B_cols,                 5,          B_cols      ;[ 3,0]  
||      B       .S1     LOOP                                            ; 
 
        SUB     .D2     B_cols,                 3,          B_cols      ;[ 4,0] 
 
* =========================== PIPE LOOP KERNEL ============================ * 
LOOP: 
        AVGU4   .M2     B_inFDB9_1, B_inECA8_1, B_outw1                 ;[13,1]  
||      PACKH4  .L2     B_inFEDC_1, B_inBA98_1, B_inFDB9_1              ;[ 9,2]  
||      PACKH4  .L1     A_in7654_1, A_in3210_1, A_in7531_1              ;[ 9,2]  
||      SHLMB   .S2     B_inBA98_1, B_inBA98_1, B_inA98B_1              ;[ 9,2]  
||      ROTL    .M1     A_in7654_0, 8,          A_in6547_0              ;[ 9,2]  
||      LDDW    .D2T2   *B_inp++[2],            B_inFEDC_0:B_inBA98_0   ;[ 1,4]  
||      LDDW    .D1T1   *A_inp++[2],            A_in7654_0:A_in3210_0   ;[ 1,4]  
 
  [ A_pd]SUB    .S1     A_pd,       1,          A_pd                    ;[14,1]  
||      MV      .S2X    A_outw_1,   B_outw0                             ;[14,1]  
||      PACKH4  .L1     A_in6547_1, A_in2103_1, A_in6420_1              ;[10,2]  
||      PACKH4  .L2     B_inEDCF_0, B_inA98B_0, B_inECA8_0              ;[10,2]  
||      ROTL    .M2     B_inBA98_0, 8,          B_inA98B_0              ;[ 6,3]  
||      ROTL    .M1     A_in3210_0, 8,          A_in2103_0              ;[ 6,3]  
||      LDDW    .D2T2   *B_inp++[2],            B_inFEDC_1:B_inBA98_1   ;[ 2,4]  
||      LDDW    .D1T1   *A_inp++[2],            A_in7654_1:A_in3210_1   ;[ 2,4]  
 
  [!A_pd]STDW   .D2T2   B_outw1:B_outw0,        *B_outp++[2]            ;[15,1]  
||      MV      .D1X    B_outw_0,   A_outw1                             ;[15,1]  
||      BDEC    .S2     LOOP,       B_cols                              ;[11,2]  
||      AVGU4   .M1     A_in6420_1, A_in7531_1, A_outw_1                ;[11,2]  
||      PACKH4  .L2     B_inEDCF_1, B_inA98B_1, B_inECA8_1              ;[11,2]  
||      PACKH4  .L1     A_in6547_0, A_in2103_0, A_in6420_0              ;[11,2]  
||      SHLMB   .S1     A_in3210_1, A_in3210_1, A_in2103_1              ;[ 7,3]  
||      ROTL    .M2     B_inFEDC_0, 8,          B_inEDCF_0              ;[ 7,3]  
 
  [!A_pd]STDW   .D1T1   A_outw1:A_outw0,        *A_outp++[2]            ;[16,1]  
||      AVGU4   .M2     B_inFDB9_0, B_inECA8_0, B_outw_0                ;[12,2]  
||      AVGU4   .M1     A_in6420_0, A_in7531_0, A_outw0                 ;[12,2]  
||      SHLMB   .S2     B_inFEDC_1, B_inFEDC_1, B_inEDCF_1              ;[ 8,3]  
||      SHLMB   .S1     A_in7654_1, A_in7654_1, A_in6547_1              ;[ 8,3]  
||      PACKH4  .L2     B_inFEDC_0, B_inBA98_0, B_inFDB9_0              ;[ 8,3]  
||      PACKH4  .L1     A_in7654_0, A_in3210_0, A_in7531_0              ;[ 8,3]  
 
* =========================== PIPE LOOP EPILOG ============================ * 
 
        AVGU4   .M2     B_inFDB9_1, B_inECA8_1, B_outw1                 ;[13,3]  
||      PACKH4  .L2     B_inFEDC_1, B_inBA98_1, B_inFDB9_1              ;[ 9,4]  
||      PACKH4  .L1     A_in7654_1, A_in3210_1, A_in7531_1              ;[ 9,4]  
||      SHLMB   .S2     B_inBA98_1, B_inBA98_1, B_inA98B_1              ;[ 9,4]  
||      ROTL    .M1     A_in7654_0, 8,          A_in6547_0              ;[ 9,4]  
 
        MV      .S2X    A_outw_1,   B_outw0                             ;[14,3]  
||      PACKH4  .L1     A_in6547_1, A_in2103_1, A_in6420_1              ;[10,4]  
||      PACKH4  .L2     B_inEDCF_0, B_inA98B_0, B_inECA8_0              ;[10,4]  
 
  [!A_pd]STDW   .D2T2   B_outw1:B_outw0,        *B_outp++[2]            ;[15,3]  
||      MV      .D1X    B_outw_0,   A_outw1                             ;[15,3]  
||      AVGU4   .M1     A_in6420_1, A_in7531_1, A_outw_1                ;[11,4]  
||      PACKH4  .L2     B_inEDCF_1, B_inA98B_1, B_inECA8_1              ;[11,4]  
||      PACKH4  .L1     A_in6547_0, A_in2103_0, A_in6420_0              ;[11,4]  
||      B       .S2     B_return                                        ; 
 
  [!A_pd]STDW   .D1T1   A_outw1:A_outw0,        *A_outp++[2]            ;[16,3]  
||      AVGU4   .M2     B_inFDB9_0, B_inECA8_0, B_outw_0                ;[12,4]  
||      AVGU4   .M1     A_in6420_0, A_in7531_0, A_outw0                 ;[12,4]  
 
        AVGU4   .M2     B_inFDB9_1, B_inECA8_1, B_outw1                 ;[13,4]  
 
        MV      .S2X    A_outw_1,   B_outw0                             ;[14,4]  
 
  [!A_pd]STDW   .D2T2   B_outw1:B_outw0,        *B_outp++[2]            ;[15,4]  
||      MV      .D1X    B_outw_0,   A_outw1                             ;[15,4]  
 
  [!A_pd]STDW   .D1T1   A_outw1:A_outw0,        *A_outp++[2]            ;[16,4]  
||       MVC    .S2     B_csr,                   CSR                    ; CSR 
 
; ===== Branch Occurs ===== 
 
* ========================================================================= * 
*S-------------------------------------------------------------------------S* 
*S                          Register Usage Table                           S* 
*S-------------------------------------------------------------------------S* 
*S    AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA : BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB  S* 
*S    33222222222211111111119876543210 : 33222222222211111111119876543210  S* 
*S    1098765432109876543210           : 1098765432109876543210            S* 
*S-------------------------------------------------------------------------S* 
*S 1: ..........1.1.11......1.11111..1 : .........111..11......111111....  S* 
*S 2: ..........1111........1111111..1 : .......1.11111........111111....  S* 
*S 3: ...........11111......1111111..1 : .......111111111......111111....  S* 
*S 4: ...........11111......1111111..1 : .........1111111.......11111....  S* 
*S-------------------------------------------------------------------------S* 
 
* ========================================================================= * 
*   End of file:  scale_h2_h.asm                                            * 
* ------------------------------------------------------------------------- * 
*             Copyright (c) 2001 Texas Instruments, Incorporated.           * 
*                            All Rights Reserved.                           * 
* ========================================================================= *