www.pudn.com > ccs_encoder.rar > sad16_c.asm
;******************************************************************************
;* TMS320C6x C/C++ Codegen PC Version 4.32 *
;* Date/Time created: Thu Apr 21 17:35:37 2005 *
;******************************************************************************
;******************************************************************************
;* GLOBAL FILE PARAMETERS *
;* *
;* Architecture : TMS320C621x *
;* Optimization : Enabled at level 3 *
;* Optimizing for : Speed *
;* Based on options: -o3, no -ms *
;* Endian : Little *
;* Interrupt Thrshld : Disabled *
;* Memory Model : Large *
;* Calls to RTS : Far *
;* Pipelining : Enabled *
;* Speculative Load : Enabled *
;* Memory Aliases : Presume are aliases (pessimistic) *
;* Debug Info : COFF Debug *
;* *
;******************************************************************************
.asg A15, FP
.asg B14, DP
.asg B15, SP
.global $bss
.file "serial_asm"
.global _sad16
.sect ".text"
.file "E:\ccs_dct_q\ccs_encoder01-15\ccs_encoder\motion\sad16_c.sa"
.sym _sad16,_sad16, 36, 3, 0
.func 2
;******************************************************************************
;* FUNCTION NAME: _sad16 *
;* *
;* Regs Modified : A0,A1,A2,A3,A4,A5,B0,B4,B5,B7,B8,B9 *
;* Regs Used : A0,A1,A2,A3,A4,A5,A6,B0,B3,B4,B5,B6,B7,B8,B9 *
;******************************************************************************
;******************************************************************************
;* *
;* Using -g (debug) with optimization (-o3) may disable key optimizations! *
;* *
;******************************************************************************
_sad16:
;** --------------------------------------------------------------------------*
.line 1
.sym pcur,4, 4, 4, 32
.sym pref,21, 4, 4, 32
.sym stride,23, 4, 4, 32
.sym best_sad,22, 4, 4, 32
; _sad16: .cproc pcur,pref,stride,best_sad
.sym sad,24, 4, 4, 32
.sym count1,25, 4, 4, 32
.sym ptr_cur,20, 4, 4, 32
.sym ptr_ref,21, 4, 4, 32
.sym flag,16, 4, 4, 32
; .reg sad, count1,ptr_cur,ptr_ref,flag
.sym count2,2, 4, 4, 32
.sym sad0,0, 4, 4, 32
.sym a,0, 4, 4, 32
.sym b,0, 4, 4, 32
; .reg count2,sad0,a,b
; .no_mdep
MV .D2 B4,B5 ; |2|
|| MV .S2X A6,B7 ; |2|
.line 4
ZERO .D2 B8 ; |5| sad=0
.line 5
.line 6
.line 8
.line 9
MVK .S1 0x10,A2 ; |10| set loop2 count
.line 10
MVK .S2 0x10,B9 ; |11| set loop1 count
.line 11
MV .S2X A4,B4 ; |12| get the cur first address
.line 12
.line 14
;** --------------------------------------------------------------------------*
;** BEGIN LOOP L1
;** --------------------------------------------------------------------------*
L1:
; loop: .trip 16
MV .S1X B5,A4
;*----------------------------------------------------------------------------*
;* SOFTWARE PIPELINE INFORMATION
;*
;* Loop label : loop
;* Loop source line : 16
;* Loop closing brace source line : 22
;* Known Minimum Trip Count : 16
;* Known Max Trip Count Factor : 1
;* Loop Carried Dependency Bound(^) : 0
;* Unpartitioned Resource Bound : 2
;* Partitioned Resource Bound(*) : 2
;* Resource Partition:
;* A-side B-side
;* .L units 2* 0
;* .S units 0 1
;* .D units 1 1
;* .M units 0 0
;* .X cross paths 1 0
;* .T address paths 1 1
;* Long read paths 0 0
;* Long write paths 0 0
;* Logical ops (.LS) 0 0 (.L or .S unit)
;* Addition ops (.LSD) 1 1 (.L or .S or .D unit)
;* Bound(.L .S .LS) 1 1
;* Bound(.L .S .D .LS .LSD) 2* 1
;*
;* Searching for software pipeline schedule at ...
;* ii = 2 Schedule found with 4 iterations in parallel
;*
;* Register Usage Table:
;* +---------------------------------+
;* |AAAAAAAAAAAAAAAA|BBBBBBBBBBBBBBBB|
;* |0000000000111111|0000000000111111|
;* |0123456789012345|0123456789012345|
;* |----------------+----------------|
;* 0: |** ** |* * |
;* 1: |** *** |* ** |
;* +---------------------------------+
;*
;* Done
;*
;* Collapsed epilog stages : 3
;* Prolog not entirely removed
;* Collapsed prolog stages : 1
;*
;* Minimum required memory pad : 3 bytes
;*
;* Minimum safe trip count : 1
;*----------------------------------------------------------------------------*
;* SINGLE SCHEDULED ITERATION
;*
;* loop:
;* 0 LDB .D2T2 *B4++,B5 ; |16| cur[]
;* || LDB .D1T1 *A4++,A0 ; |17| ref[]
;* 1 [ B0] ADD .D2 0xffffffff,B0,B0 ; |21|
;* 2 [ B0] B .S2 loop ; |22|
;* 3 NOP 2
;* 5 SUB .L1X B5,A0,A0 ; |18| cur[]-ref[]
;* 6 ABS .L1 A0,A3 ; |19| sad0=abs(cur[]-ref[])
;* 7 ADD .S1 A5,A3,A5 ; |20|
;* ; BRANCH OCCURS ; |22|
;*----------------------------------------------------------------------------*
L2: ; PIPED LOOP PROLOG
LDB .D2T2 *B4++,B5 ; |16| (P) <0,0> cur[]
|| B .S2 loop ; |22| (P) <0,2>
|| LDB .D1T1 *A4++,A0 ; |17| (P) <0,0> ref[]
NOP 1
LDB .D1T1 *A4++,A0 ; |17| (P) <1,0> ref[]
|| B .S2 loop ; |22| (P) <1,2>
|| LDB .D2T2 *B4++,B5 ; |16| (P) <1,0> cur[]
SUB .D2 B9,2,B0
|| MV .L1X B8,A5
|| MVK .S1 0x1,A1 ; init prolog collapse predicate
;** --------------------------------------------------------------------------*
loop: ; PIPED LOOP KERNEL
ABS .L1 A0,A3 ; |19| <0,6> sad0=abs(cur[]-ref[])
|| [ B0] B .S2 loop ; |22| <2,2>
|| LDB .D2T2 *B4++,B5 ; |16| <3,0> cur[]
|| LDB .D1T1 *A4++,A0 ; |17| <3,0> ref[]
[ A1] SUB .D1 A1,1,A1 ; <0,7>
|| [!A1] ADD .S1 A5,A3,A5 ; |20| <0,7>
|| SUB .L1X B5,A0,A0 ; |18| <1,5> cur[]-ref[]
|| [ B0] ADD .D2 0xffffffff,B0,B0 ; |21| <3,1>
;** --------------------------------------------------------------------------*
L4: ; PIPED LOOP EPILOG
;** --------------------------------------------------------------------------*
SUB .D2 B4,3,B4
MV .S2X A5,B8
NOP 1
SUB .S2X A4,3,B5
.line 22
CMPGT .L2 B8,B6,B0 ; |23|
.line 23
[ B0] B .S2 L6 ; |24| retrun sad when sad>best_sad
[ B0] MV .S1X B8,A4 ; |31|
NOP 4
; BRANCH OCCURS ; |24|
;** --------------------------------------------------------------------------*
.line 24
ADD .D2 B4,B7,B4 ; |25| cur+stride
.line 25
ADD .D2 B5,B7,B5 ; |26| cur+stride
.line 26
[ A2] ADD .D1 0xffffffff,A2,A2 ; |27|
.line 27
MVK .S2 0x10,B9 ; |28| set loop1 count
.line 28
[ A2] B .S1 L1 ; |29| the second cycle
NOP 5
; BRANCH OCCURS ; |29|
;** --------------------------------------------------------------------------*
.line 30
;** --------------------------------------------------------------------------*
done:
MV .S1X B8,A4 ; |31|
;** --------------------------------------------------------------------------*
L6:
.line 31
RET .S2 B3 ; |32|
NOP 5
; BRANCH OCCURS ; |32|
.endfunc 32,000000000h,0
; .endproc
;******************************************************************************
;* TYPE INFORMATION *
;******************************************************************************