www.pudn.com > scaling.rar > scale_h2_h.asm
* ========================================================================= *
* NAME *
* scale_h2 -- Horizontal scaling by 2 *
* *
* USAGE *
* This routine has the following C prototype: *
* *
* void scale_h2_cn( *
* const unsigned char *restrict inp, *
* int cols, *
* unsigned char *restrict outp *
* ) *
* *
* *
* The scale_h2 function accepts an input array "inp" that contains *
* "cols" contiguous pixels and produces an output array "outp" of *
* "cols/2" pixels. Simple averaging and rounding are performed. *
* This step may also be viewed as simple bi-linear interpolation. *
* *
* DESCRIPTION *
* The following C code model shows the behaviour of the code. *
* *
* void scale_h2_cn( *
* const unsigned char *restrict inp, *
* int cols, *
* unsigned char *restrict outp *
* ) *
* { *
* int i; *
* *
* for( i = 0; i < (cols >> 1); i++) *
* { *
* outp[i] = (inp[2*i] + inp[(2*i)+1] + 1) >> 1; *
* } *
* } *
* *
* Every two consecutive pixels of the input image are averaged to *
* form an output pixel. *
* *
* TECHNIQUES *
* This code can be used to process either one line, or an arbitrary *
* number of lines as long as they are contiguous in memory. *
* *
* ASSUMPTIONS *
* The input array is aligned on a double-word boundary. The output *
* array is aligned on a double word boundary. The number of input *
* pixels is a multiple of 32. *
* *
* NOTES *
* None *
* *
* SOURCE *
* None *
* *
* MEMORY NOTE *
* No bank conflcits should occurr irrespective of the alignemt of *
* the arrays in memory. *
* *
* INTERRUPT NOTE *
* This code is interrupt tolerant though not interruptible. *
* *
* CYCLES *
* (5*cols)/32 - 5 *
* *
* cols = 640, cycles = 95. *
* *
* ------------------------------------------------------------------------- *
* Copyright (c) 2002 Texas Instruments, Incorporated. *
* All Rights Reserved. *
* ========================================================================= *
.sect ".text:hand"
.include "scale_h2_h.h64"
_scale_h2_asm:
*S-------------------------------------------------------------------------S*
*S TI Proprietary Information: Internal Data S*
*S-------------------------------------------------------------------------S*
*S Swipe6NG datecode: 020206 (12:26:58 AM) [REL]
*S Swipe6NG commandline:
*S swipe6ng -Ma -s -v -l -N -mv6400 prol.sa
*S Target architecture: "C64xx ISA"
*S-------------------------------------------------------------------------S*
*S Total register usage: 6 (a:3 b:3)
* ===================== SYMBOLIC REGISTER ASSIGNMENTS ===================== *
.asg A4, A_inp ; Preferred
.asg B4, B_cols ; Preferred
.asg A6, A_outp ; Preferred
.asg B6, B_inp ; Preferred
.asg B8, B_outp ; Preferred
.asg A0, A_pd ; Preferred
.asg B3, B_return ; Return
.asg B30, B_csr ; CSR
.asg B31, B_no_gie ; NO GIE
* ========================================================================= *
ADD .L2X A_inp, 8, B_inp ;[ 4,0]
|| MVC .S2 CSR, B_csr ; CSR
MVK .S1 3, A_pd ;[ 4,0]
|| ADD .S2X A_outp, 8, B_outp ;[ 3,0]
|| AND .L2 B_csr, -2, B_no_gie ;
;==== Branch occurs
*S-------------------------------------------------------------------------S*
*S TI Proprietary Information: Internal Data S*
*S-------------------------------------------------------------------------S*
*S Swipe6NG datecode: 020206 (12:26:58 AM) [REL]
*S Swipe6NG commandline:
*S swipe6ng -Ma -s -v -2 -mv6400 --Bl scale_h2.cmd
*S Target architecture: "C64xx ISA"
*S-------------------------------------------------------------------------S*
*S
*S Resource MII: 4
*S
*S L1 L1-unit hardware * Bound: 4
*S S1 S1-unit hardware * Bound: 4
*S D1 D1-unit hardware Bound: 3
*S M1 M1-unit hardware * Bound: 4
*S T1 T1-unit LD/ST transport (DAB1) Bound: 3
*S L2 L2-unit hardware * Bound: 4
*S S2 S2-unit hardware * Bound: 4
*S D2 D2-unit hardware Bound: 3
*S M2 M2-unit hardware * Bound: 4
*S T2 T2-unit LD/ST transport (DAB2) Bound: 3
*S M1h M1-unit write port (hi half) * Bound: 4
*S M2h M2-unit write port (hi half) * Bound: 4
*S xoper1 Unshared cross path on 1X Bound: 1
*S xoper2 Unshared cross path on 2X Bound: 1
*S slots Instruction issue slots: 30 * Bound: 4
*S
*S Recurrence MII: 3
*S
*S------------------------- Running Adjust Phase --------------------------S*
*S-------------------------------------------------------------------------S*
*S File : scale_h2.cmd
*S ResMII : 4
*S RecMII : 3
*S MII : 4
*S II : 4
*S No ILP : 0
*S List : 18
*S Length : 16
*S Start : 0
*S Stop : 15
*S Operations : 30
*S Avg. para @ II : 7.500000
*S Calc. Operations : 30
*S Avg. para calc @ II: 7.500000
*S Live Too Long : 0
*S Unroll Factor : 0
*S-------------------------------------------------------------------------S*
*S Total register usage: 29 (a:14 b:15)
* ===================== SYMBOLIC REGISTER ASSIGNMENTS ===================== *
.asg A4, A_inp ; Preferred
.asg B6, B_inp ; Preferred
.asg A6, A_outp ; Preferred
.asg B8, B_outp ; Preferred
.asg B4, B_cols ; Preferred
.asg A0, A_pd ; Preferred
.asg A9, A_in7654_0
.asg A8, A_in3210_0
.asg B19, B_inFEDC_0
.asg B18, B_inBA98_0
.asg A5, A_in2103_0
.asg A19, A_in6547_0
.asg B22, B_inA98B_0
.asg B20, B_inEDCF_0
.asg A20, A_in6420_0
.asg A3, A_in7531_0
.asg B20, B_inECA8_0
.asg B7, B_inFDB9_0
.asg A18, A_outw0
.asg B24, B_outw_0
.asg A17, A_in7654_1
.asg A16, A_in3210_1
.asg B17, B_inFEDC_1
.asg B16, B_inBA98_1
.asg A7, A_in2103_1
.asg A21, A_in6547_1
.asg B21, B_inA98B_1
.asg B9, B_inEDCF_1
.asg A7, A_in6420_1
.asg A20, A_in7531_1
.asg B21, B_inECA8_1
.asg B5, B_inFDB9_1
.asg A19, A_outw_1
.asg B23, B_outw1
.asg A19, A_outw1
.asg B22, B_outw0
* ========================================================================= *
* =========================== PIPE LOOP PROLOG ============================ *
LDDW .D2T2 *B_inp++[2], B_inFEDC_0:B_inBA98_0 ;[ 1,1]
|| LDDW .D1T1 *A_inp++[2], A_in7654_0:A_in3210_0 ;[ 1,1]
|| MVC .S2 B_no_gie, CSR ; Int.
LDDW .D2T2 *B_inp++[2], B_inFEDC_1:B_inBA98_1 ;[ 2,1]
|| LDDW .D1T1 *A_inp++[2], A_in7654_1:A_in3210_1 ;[ 2,1]
SHRU .S2 B_cols, 5, B_cols ;[ 3,0]
|| B .S1 LOOP ;
SUB .D2 B_cols, 3, B_cols ;[ 4,0]
* =========================== PIPE LOOP KERNEL ============================ *
LOOP:
AVGU4 .M2 B_inFDB9_1, B_inECA8_1, B_outw1 ;[13,1]
|| PACKH4 .L2 B_inFEDC_1, B_inBA98_1, B_inFDB9_1 ;[ 9,2]
|| PACKH4 .L1 A_in7654_1, A_in3210_1, A_in7531_1 ;[ 9,2]
|| SHLMB .S2 B_inBA98_1, B_inBA98_1, B_inA98B_1 ;[ 9,2]
|| ROTL .M1 A_in7654_0, 8, A_in6547_0 ;[ 9,2]
|| LDDW .D2T2 *B_inp++[2], B_inFEDC_0:B_inBA98_0 ;[ 1,4]
|| LDDW .D1T1 *A_inp++[2], A_in7654_0:A_in3210_0 ;[ 1,4]
[ A_pd]SUB .S1 A_pd, 1, A_pd ;[14,1]
|| MV .S2X A_outw_1, B_outw0 ;[14,1]
|| PACKH4 .L1 A_in6547_1, A_in2103_1, A_in6420_1 ;[10,2]
|| PACKH4 .L2 B_inEDCF_0, B_inA98B_0, B_inECA8_0 ;[10,2]
|| ROTL .M2 B_inBA98_0, 8, B_inA98B_0 ;[ 6,3]
|| ROTL .M1 A_in3210_0, 8, A_in2103_0 ;[ 6,3]
|| LDDW .D2T2 *B_inp++[2], B_inFEDC_1:B_inBA98_1 ;[ 2,4]
|| LDDW .D1T1 *A_inp++[2], A_in7654_1:A_in3210_1 ;[ 2,4]
[!A_pd]STDW .D2T2 B_outw1:B_outw0, *B_outp++[2] ;[15,1]
|| MV .D1X B_outw_0, A_outw1 ;[15,1]
|| BDEC .S2 LOOP, B_cols ;[11,2]
|| AVGU4 .M1 A_in6420_1, A_in7531_1, A_outw_1 ;[11,2]
|| PACKH4 .L2 B_inEDCF_1, B_inA98B_1, B_inECA8_1 ;[11,2]
|| PACKH4 .L1 A_in6547_0, A_in2103_0, A_in6420_0 ;[11,2]
|| SHLMB .S1 A_in3210_1, A_in3210_1, A_in2103_1 ;[ 7,3]
|| ROTL .M2 B_inFEDC_0, 8, B_inEDCF_0 ;[ 7,3]
[!A_pd]STDW .D1T1 A_outw1:A_outw0, *A_outp++[2] ;[16,1]
|| AVGU4 .M2 B_inFDB9_0, B_inECA8_0, B_outw_0 ;[12,2]
|| AVGU4 .M1 A_in6420_0, A_in7531_0, A_outw0 ;[12,2]
|| SHLMB .S2 B_inFEDC_1, B_inFEDC_1, B_inEDCF_1 ;[ 8,3]
|| SHLMB .S1 A_in7654_1, A_in7654_1, A_in6547_1 ;[ 8,3]
|| PACKH4 .L2 B_inFEDC_0, B_inBA98_0, B_inFDB9_0 ;[ 8,3]
|| PACKH4 .L1 A_in7654_0, A_in3210_0, A_in7531_0 ;[ 8,3]
* =========================== PIPE LOOP EPILOG ============================ *
AVGU4 .M2 B_inFDB9_1, B_inECA8_1, B_outw1 ;[13,3]
|| PACKH4 .L2 B_inFEDC_1, B_inBA98_1, B_inFDB9_1 ;[ 9,4]
|| PACKH4 .L1 A_in7654_1, A_in3210_1, A_in7531_1 ;[ 9,4]
|| SHLMB .S2 B_inBA98_1, B_inBA98_1, B_inA98B_1 ;[ 9,4]
|| ROTL .M1 A_in7654_0, 8, A_in6547_0 ;[ 9,4]
MV .S2X A_outw_1, B_outw0 ;[14,3]
|| PACKH4 .L1 A_in6547_1, A_in2103_1, A_in6420_1 ;[10,4]
|| PACKH4 .L2 B_inEDCF_0, B_inA98B_0, B_inECA8_0 ;[10,4]
[!A_pd]STDW .D2T2 B_outw1:B_outw0, *B_outp++[2] ;[15,3]
|| MV .D1X B_outw_0, A_outw1 ;[15,3]
|| AVGU4 .M1 A_in6420_1, A_in7531_1, A_outw_1 ;[11,4]
|| PACKH4 .L2 B_inEDCF_1, B_inA98B_1, B_inECA8_1 ;[11,4]
|| PACKH4 .L1 A_in6547_0, A_in2103_0, A_in6420_0 ;[11,4]
|| B .S2 B_return ;
[!A_pd]STDW .D1T1 A_outw1:A_outw0, *A_outp++[2] ;[16,3]
|| AVGU4 .M2 B_inFDB9_0, B_inECA8_0, B_outw_0 ;[12,4]
|| AVGU4 .M1 A_in6420_0, A_in7531_0, A_outw0 ;[12,4]
AVGU4 .M2 B_inFDB9_1, B_inECA8_1, B_outw1 ;[13,4]
MV .S2X A_outw_1, B_outw0 ;[14,4]
[!A_pd]STDW .D2T2 B_outw1:B_outw0, *B_outp++[2] ;[15,4]
|| MV .D1X B_outw_0, A_outw1 ;[15,4]
[!A_pd]STDW .D1T1 A_outw1:A_outw0, *A_outp++[2] ;[16,4]
|| MVC .S2 B_csr, CSR ; CSR
; ===== Branch Occurs =====
* ========================================================================= *
*S-------------------------------------------------------------------------S*
*S Register Usage Table S*
*S-------------------------------------------------------------------------S*
*S AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA : BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB S*
*S 33222222222211111111119876543210 : 33222222222211111111119876543210 S*
*S 1098765432109876543210 : 1098765432109876543210 S*
*S-------------------------------------------------------------------------S*
*S 1: ..........1.1.11......1.11111..1 : .........111..11......111111.... S*
*S 2: ..........1111........1111111..1 : .......1.11111........111111.... S*
*S 3: ...........11111......1111111..1 : .......111111111......111111.... S*
*S 4: ...........11111......1111111..1 : .........1111111.......11111.... S*
*S-------------------------------------------------------------------------S*
* ========================================================================= *
* End of file: scale_h2_h.asm *
* ------------------------------------------------------------------------- *
* Copyright (c) 2001 Texas Instruments, Incorporated. *
* All Rights Reserved. *
* ========================================================================= *