www.pudn.com > ccs_encoder.rar > sad16bi_c.asm
;******************************************************************************
;* TMS320C6x C/C++ Codegen PC Version 4.32 *
;* Date/Time created: Thu Apr 21 17:35:38 2005 *
;******************************************************************************
;******************************************************************************
;* GLOBAL FILE PARAMETERS *
;* *
;* Architecture : TMS320C621x *
;* Optimization : Enabled at level 3 *
;* Optimizing for : Speed *
;* Based on options: -o3, no -ms *
;* Endian : Little *
;* Interrupt Thrshld : Disabled *
;* Memory Model : Large *
;* Calls to RTS : Far *
;* Pipelining : Enabled *
;* Speculative Load : Enabled *
;* Memory Aliases : Presume are aliases (pessimistic) *
;* Debug Info : COFF Debug *
;* *
;******************************************************************************
.asg A15, FP
.asg B14, DP
.asg B15, SP
.global $bss
.file "serial_asm"
.global _sad16bi
.sect ".text"
.file "E:\ccs_dct_q\ccs_encoder01-15\ccs_encoder\motion\sad16bi_c.sa"
.sym _sad16bi,_sad16bi, 36, 3, 0
.func 2
;******************************************************************************
;* FUNCTION NAME: _sad16bi *
;* *
;* Regs Modified : A0,A1,A2,A3,A4,A5,A6,B0,B4,B5,B6,B7,B8 *
;* Regs Used : A0,A1,A2,A3,A4,A5,A6,B0,B3,B4,B5,B6,B7,B8 *
;******************************************************************************
;******************************************************************************
;* *
;* Using -g (debug) with optimization (-o3) may disable key optimizations! *
;* *
;******************************************************************************
_sad16bi:
;** --------------------------------------------------------------------------*
.line 1
;*----------------------------------------------------------------------------*
;* SOFTWARE PIPELINE INFORMATION
;*
;* Loop label : loop
;* Loop source line : 19
;* Loop closing brace source line : 29
;* Known Minimum Trip Count : 16
;* Known Max Trip Count Factor : 1
;* Loop Carried Dependency Bound(^) : 0
;* Unpartitioned Resource Bound : 2
;* Partitioned Resource Bound(*) : 2
;* Resource Partition:
;* A-side B-side
;* .L units 0 1
;* .S units 1 1
;* .D units 2* 1
;* .M units 0 0
;* .X cross paths 1 1
;* .T address paths 2* 1
;* Long read paths 0 0
;* Long write paths 0 0
;* Logical ops (.LS) 1 0 (.L or .S unit)
;* Addition ops (.LSD) 2 2 (.L or .S or .D unit)
;* Bound(.L .S .LS) 1 1
;* Bound(.L .S .D .LS .LSD) 2* 2*
;*
;* Searching for software pipeline schedule at ...
;* ii = 2 Schedule found with 6 iterations in parallel
;*
;* Register Usage Table:
;* +---------------------------------+
;* |AAAAAAAAAAAAAAAA|BBBBBBBBBBBBBBBB|
;* |0000000000111111|0000000000111111|
;* |0123456789012345|0123456789012345|
;* |----------------+----------------|
;* 0: |** **** |* *** |
;* 1: |** **** |* *** |
;* +---------------------------------+
;*
;* Done
;*
;* Collapsed epilog stages : 5
;* Prolog not entirely removed
;* Collapsed prolog stages : 3
;*
;* Minimum required memory pad : 5 bytes
;*
;* Minimum safe trip count : 1
;*----------------------------------------------------------------------------*
;* SINGLE SCHEDULED ITERATION
;*
;* loop:
;* 0 LDB .D2T2 *B5++,B6 ; |19| ref1[]
;* || LDB .D1T1 *A4++,A3 ; |20| ref2[]
;* 1 NOP 2
;* 3 LDB .D1T1 *A6++,A5 ; |21| cur[]
;* 4 [ B0] ADD .S2 0xffffffff,B0,B0 ; |28|
;* 5 ADD .L1X B6,A3,A3 ; |22| cur[]+ref[]
;* || [ B0] B .S2 loop ; |29|
;* 6 ADD .L1 0x1,A3,A5 ; |23|
;* 7 SHR .S1 A5,0x2,A0 ; |24| (cur[]+ref[]+1)/2
;* 8 SUB .S1 A5,A0,A0 ; |25|
;* 9 ABS .L2X A0,B6 ; |26|
;* 10 ADD .L2 B4,B6,B4 ; |27|
;* ; BRANCH OCCURS ; |29|
;*----------------------------------------------------------------------------*
L1: ; PIPED LOOP PROLOG
.sym cur,4, 4, 4, 32
.sym ref1,21, 4, 4, 32
.sym ref2,0, 4, 4, 32
.sym stride,23, 4, 4, 32
; _sad16bi: .cproc cur, ref1, ref2, stride
.sym sad,20, 4, 4, 32
.sym count1,16, 4, 4, 32
.sym pcur,6, 4, 4, 32
.sym pref1,21, 4, 4, 32
.sym pref2,22, 4, 4, 32
.sym flag,0, 4, 4, 32
.sym pixel,0, 4, 4, 32
; .reg sad, count1,pcur,pref1, pref2,flag,pixel
.sym count2,2, 4, 4, 32
.sym sad0,0, 4, 4, 32
.sym a,0, 4, 4, 32
.sym b,0, 4, 4, 32
.sym c,0, 4, 4, 32
; .reg count2,sad0,a,b,c
; .no_mdep
; loop: .trip 16
NOP 1
MV .D1 A6,A0 ; |2|
|| MV .D2 B6,B7 ; |2|
|| MV .S2 B4,B5 ; |2|
.line 4
ZERO .D2 B4 ; |5| sad=0
.line 5
.line 6
.line 7
.line 9
.line 10
.line 11
MVK .S1 0x10,A2 ; |12| set loop2 count
.line 12
MVK .S2 0x10,B0 ; |13| set loop1 count
.line 13
MV .D1 A4,A6 ; |14| get the cur first address
.line 14
.line 15
MV .S2X A0,B6 ; |16| get the ref2 first address
.line 17
MV .S1X B6,A4
|| B .S2 loop ; |29| (P) <0,5>
LDB .D1T1 *A4++,A3 ; |20| (P) <0,0> ref2[]
|| LDB .D2T2 *B5++,B6 ; |19| (P) <0,0> ref1[]
B .S2 loop ; |29| (P) <1,5>
MVK .S1 0x4000,A1 ; init prolog collapse predicate
|| LDB .D2T2 *B5++,B6 ; |19| (P) <1,0> ref1[]
|| LDB .D1T1 *A4++,A3 ; |20| (P) <1,0> ref2[]
;** --------------------------------------------------------------------------*
loop: ; PIPED LOOP KERNEL
ABS .L2X A0,B6 ; |26| <0,9>
|| SHR .S1 A5,0x2,A0 ; |24| <1,7> (cur[]+ref[]+1)/2
|| [ B0] B .S2 loop ; |29| <2,5>
|| ADD .L1X B6,A3,A3 ; |22| <2,5> cur[]+ref[]
|| LDB .D1T1 *A6++,A5 ; |21| <3,3> cur[]
[ A1] MPYSU .M1 2,A1,A1 ; <0,10>
|| [!A1] ADD .L2 B4,B6,B4 ; |27| <0,10>
|| SUB .S1 A5,A0,A0 ; |25| <1,8>
|| ADD .L1 0x1,A3,A5 ; |23| <2,6>
|| [ B0] ADD .S2 0xffffffff,B0,B0 ; |28| <3,4>
|| LDB .D2T2 *B5++,B6 ; |19| <5,0> ref1[]
|| LDB .D1T1 *A4++,A3 ; |20| <5,0> ref2[]
;** --------------------------------------------------------------------------*
L3: ; PIPED LOOP EPILOG AND PROLOG
; loop: .trip 16
SUB .D2 B5,5,B5
|| SUB .D1 A6,3,A0
|| SUB .S2X A4,5,B8
.line 29
ADD .S1X A0,B7,A6 ; |30| cur+stride
.line 30
ADD .D2 B5,B7,B5 ; |31| ref1+stride
.line 31
NOP 1
ADD .D2 B8,B7,B6 ; |32| ref2+stride
.line 32
[ A2] ADD .D1 0xffffffff,A2,A2 ; |33|
.line 33
MVK .S2 0x10,B0 ; |34| reset loop1 count
.line 34
MVK .S1 0x4000,A1 ; init prolog collapse predicate
|| MV .L1X B6,A4
|| [ A2] B .S2 loop ; |35| branch to kernel
NOP 1
[ A2] B .S2 loop ; |29| (P) <0,5>
[ A2] LDB .D2T2 *B5++,B6 ; |19| (P) <0,0> ref1[]
|| [ A2] LDB .D1T1 *A4++,A3 ; |20| (P) <0,0> ref2[]
[ A2] B .S2 loop ; |29| (P) <1,5>
[ A2] LDB .D2T2 *B5++,B6 ; |19| (P) <1,0> ref1[]
|| [ A2] LDB .D1T1 *A4++,A3 ; |20| (P) <1,0> ref2[]
;** --------------------------------------------------------------------------*
.line 35
MV .S1X B4,A4 ; |36|
.line 36
RET .S2 B3 ; |37|
NOP 5
; BRANCH OCCURS ; |37|
.endfunc 37,000000000h,0
; .endproc
;******************************************************************************
;* TYPE INFORMATION *
;******************************************************************************