www.pudn.com > ccs_encoder.rar > sad8bi.asm
;******************************************************************************
;* TMS320C6x C/C++ Codegen PC Version 4.32 *
;* Date/Time created: Thu Apr 21 17:35:40 2005 *
;******************************************************************************
;******************************************************************************
;* GLOBAL FILE PARAMETERS *
;* *
;* Architecture : TMS320C621x *
;* Optimization : Enabled at level 3 *
;* Optimizing for : Speed *
;* Based on options: -o3, no -ms *
;* Endian : Little *
;* Interrupt Thrshld : Disabled *
;* Memory Model : Large *
;* Calls to RTS : Far *
;* Pipelining : Enabled *
;* Speculative Load : Enabled *
;* Memory Aliases : Presume are aliases (pessimistic) *
;* Debug Info : COFF Debug *
;* *
;******************************************************************************
.asg A15, FP
.asg B14, DP
.asg B15, SP
.global $bss
.file "serial_asm"
.global _sad8bi
.sect ".text"
.file "E:\ccs_dct_q\ccs_encoder01-15\ccs_encoder\motion\sad8bi.sa"
.sym _sad8bi,_sad8bi, 36, 3, 0
.func 2
;******************************************************************************
;* FUNCTION NAME: _sad8bi *
;* *
;* Regs Modified : A0,A1,A2,A3,A4,A5,A6,B0,B4,B5,B6,B7,B8 *
;* Regs Used : A0,A1,A2,A3,A4,A5,A6,B0,B3,B4,B5,B6,B7,B8 *
;******************************************************************************
;******************************************************************************
;* *
;* Using -g (debug) with optimization (-o3) may disable key optimizations! *
;* *
;******************************************************************************
_sad8bi:
;** --------------------------------------------------------------------------*
.line 1
;*----------------------------------------------------------------------------*
;* SOFTWARE PIPELINE INFORMATION
;*
;* Loop label : loop
;* Loop source line : 20
;* Loop closing brace source line : 30
;* Known Minimum Trip Count : 8
;* Known Max Trip Count Factor : 1
;* Loop Carried Dependency Bound(^) : 0
;* Unpartitioned Resource Bound : 2
;* Partitioned Resource Bound(*) : 2
;* Resource Partition:
;* A-side B-side
;* .L units 0 1
;* .S units 1 1
;* .D units 2* 1
;* .M units 0 0
;* .X cross paths 1 1
;* .T address paths 2* 1
;* Long read paths 0 0
;* Long write paths 0 0
;* Logical ops (.LS) 1 0 (.L or .S unit)
;* Addition ops (.LSD) 2 2 (.L or .S or .D unit)
;* Bound(.L .S .LS) 1 1
;* Bound(.L .S .D .LS .LSD) 2* 2*
;*
;* Searching for software pipeline schedule at ...
;* ii = 2 Schedule found with 6 iterations in parallel
;*
;* Register Usage Table:
;* +---------------------------------+
;* |AAAAAAAAAAAAAAAA|BBBBBBBBBBBBBBBB|
;* |0000000000111111|0000000000111111|
;* |0123456789012345|0123456789012345|
;* |----------------+----------------|
;* 0: |** **** |* *** |
;* 1: |** **** |* *** |
;* +---------------------------------+
;*
;* Done
;*
;* Collapsed epilog stages : 5
;* Prolog not entirely removed
;* Collapsed prolog stages : 3
;*
;* Minimum required memory pad : 5 bytes
;*
;* Minimum safe trip count : 1
;*----------------------------------------------------------------------------*
;* SINGLE SCHEDULED ITERATION
;*
;* loop:
;* 0 LDB .D2T2 *B5++,B6 ; |20| ref1[]
;* || LDB .D1T1 *A4++,A3 ; |21| ref2[]
;* 1 NOP 2
;* 3 LDB .D1T1 *A6++,A5 ; |22| cur[]
;* 4 [ B0] ADD .S2 0xffffffff,B0,B0 ; |29|
;* 5 ADD .L1X B6,A3,A3 ; |23| cur[]+ref[]
;* || [ B0] B .S2 loop ; |30|
;* 6 ADD .L1 0x1,A3,A5 ; |24|
;* 7 SHR .S1 A5,0x2,A0 ; |25| (cur[]+ref[]+1)/2
;* 8 SUB .S1 A5,A0,A0 ; |26|
;* 9 ABS .L2X A0,B6 ; |27|
;* 10 ADD .L2 B4,B6,B4 ; |28|
;* ; BRANCH OCCURS ; |30|
;*----------------------------------------------------------------------------*
L1: ; PIPED LOOP PROLOG
.sym cur,4, 4, 4, 32
.sym ref1,21, 4, 4, 32
.sym ref2,0, 4, 4, 32
.sym stride,23, 4, 4, 32
; _sad8bi: .cproc cur, ref1, ref2, stride
.sym sad,20, 4, 4, 32
.sym count1,16, 4, 4, 32
.sym pcur,6, 4, 4, 32
.sym pref1,21, 4, 4, 32
.sym pref2,22, 4, 4, 32
.sym flag,0, 4, 4, 32
.sym pixel,0, 4, 4, 32
; .reg sad, count1,pcur,pref1, pref2,flag,pixel
.sym count2,2, 4, 4, 32
.sym sad0,0, 4, 4, 32
.sym a,0, 4, 4, 32
.sym b,0, 4, 4, 32
.sym c,0, 4, 4, 32
; .reg count2,sad0,a,b,c
; .no_mdep
; loop: .trip 8
NOP 1
MV .D1 A6,A0 ; |2|
|| MV .D2 B6,B7 ; |2|
|| MV .S2 B4,B5 ; |2|
.line 4
ZERO .D2 B4 ; |5| sad=0
.line 5
.line 6
.line 7
.line 10
.line 11
.line 12
MVK .S1 0x8,A2 ; |13| set loop2 count
.line 13
MVK .S2 0x8,B0 ; |14| set loop1 count
.line 14
MV .D1 A4,A6 ; |15| get the cur first address
.line 15
.line 16
MV .S2X A0,B6 ; |17| get the ref2 first address
.line 18
MV .S1X B6,A4
|| B .S2 loop ; |30| (P) <0,5>
LDB .D1T1 *A4++,A3 ; |21| (P) <0,0> ref2[]
|| LDB .D2T2 *B5++,B6 ; |20| (P) <0,0> ref1[]
B .S2 loop ; |30| (P) <1,5>
MVK .S1 0x4000,A1 ; init prolog collapse predicate
|| LDB .D2T2 *B5++,B6 ; |20| (P) <1,0> ref1[]
|| LDB .D1T1 *A4++,A3 ; |21| (P) <1,0> ref2[]
;** --------------------------------------------------------------------------*
loop: ; PIPED LOOP KERNEL
ABS .L2X A0,B6 ; |27| <0,9>
|| SHR .S1 A5,0x2,A0 ; |25| <1,7> (cur[]+ref[]+1)/2
|| [ B0] B .S2 loop ; |30| <2,5>
|| ADD .L1X B6,A3,A3 ; |23| <2,5> cur[]+ref[]
|| LDB .D1T1 *A6++,A5 ; |22| <3,3> cur[]
[ A1] MPYSU .M1 2,A1,A1 ; <0,10>
|| [!A1] ADD .L2 B4,B6,B4 ; |28| <0,10>
|| SUB .S1 A5,A0,A0 ; |26| <1,8>
|| ADD .L1 0x1,A3,A5 ; |24| <2,6>
|| [ B0] ADD .S2 0xffffffff,B0,B0 ; |29| <3,4>
|| LDB .D2T2 *B5++,B6 ; |20| <5,0> ref1[]
|| LDB .D1T1 *A4++,A3 ; |21| <5,0> ref2[]
;** --------------------------------------------------------------------------*
L3: ; PIPED LOOP EPILOG AND PROLOG
; loop: .trip 8
SUB .D2 B5,5,B5
|| SUB .D1 A6,3,A0
|| SUB .S2X A4,5,B8
.line 30
ADD .S1X A0,B7,A6 ; |31| cur+stride
.line 31
ADD .D2 B5,B7,B5 ; |32| ref1+stride
.line 32
NOP 1
ADD .D2 B8,B7,B6 ; |33| ref2+stride
.line 33
[ A2] ADD .D1 0xffffffff,A2,A2 ; |34|
.line 34
MVK .S2 0x8,B0 ; |35| reset loop count
.line 35
MVK .S1 0x4000,A1 ; init prolog collapse predicate
|| MV .L1X B6,A4
|| [ A2] B .S2 loop ; |36| branch to kernel
NOP 1
[ A2] B .S2 loop ; |30| (P) <0,5>
[ A2] LDB .D2T2 *B5++,B6 ; |20| (P) <0,0> ref1[]
|| [ A2] LDB .D1T1 *A4++,A3 ; |21| (P) <0,0> ref2[]
[ A2] B .S2 loop ; |30| (P) <1,5>
[ A2] LDB .D2T2 *B5++,B6 ; |20| (P) <1,0> ref1[]
|| [ A2] LDB .D1T1 *A4++,A3 ; |21| (P) <1,0> ref2[]
;** --------------------------------------------------------------------------*
.line 36
MV .S1X B4,A4 ; |37|
.line 37
RET .S2 B3 ; |38|
NOP 5
; BRANCH OCCURS ; |38|
.endfunc 38,000000000h,0
; .endproc
;******************************************************************************
;* TYPE INFORMATION *
;******************************************************************************