www.pudn.com > ccs_encoder.rar > sad8bi.asm


;****************************************************************************** 
;* TMS320C6x C/C++ Codegen                                    PC Version 4.32 * 
;* Date/Time created: Thu Apr 21 17:35:40 2005                                * 
;****************************************************************************** 
 
;****************************************************************************** 
;* GLOBAL FILE PARAMETERS                                                     * 
;*                                                                            * 
;*   Architecture      : TMS320C621x                                          * 
;*   Optimization      : Enabled at level 3                                   * 
;*   Optimizing for    : Speed                                                * 
;*                       Based on options: -o3, no -ms                        * 
;*   Endian            : Little                                               * 
;*   Interrupt Thrshld : Disabled                                             * 
;*   Memory Model      : Large                                                * 
;*   Calls to RTS      : Far                                                  * 
;*   Pipelining        : Enabled                                              * 
;*   Speculative Load  : Enabled                                              * 
;*   Memory Aliases    : Presume are aliases (pessimistic)                    * 
;*   Debug Info        : COFF Debug                                           * 
;*                                                                            * 
;****************************************************************************** 
 
	.asg	A15, FP 
	.asg	B14, DP 
	.asg	B15, SP 
	.global	$bss 
 
	.file	"serial_asm" 
                   .global	_sad8bi 
 
	.sect	".text" 
	.file	"E:\ccs_dct_q\ccs_encoder01-15\ccs_encoder\motion\sad8bi.sa" 
	.sym	_sad8bi,_sad8bi, 36, 3, 0 
	.func	2 
 
;****************************************************************************** 
;* FUNCTION NAME: _sad8bi                                                     * 
;*                                                                            * 
;*   Regs Modified     : A0,A1,A2,A3,A4,A5,A6,B0,B4,B5,B6,B7,B8               * 
;*   Regs Used         : A0,A1,A2,A3,A4,A5,A6,B0,B3,B4,B5,B6,B7,B8            * 
;****************************************************************************** 
 
;****************************************************************************** 
;*                                                                            * 
;* Using -g (debug) with optimization (-o3) may disable key optimizations!    * 
;*                                                                            * 
;****************************************************************************** 
_sad8bi: 
;** --------------------------------------------------------------------------* 
	.line	1 
;*----------------------------------------------------------------------------* 
;*   SOFTWARE PIPELINE INFORMATION 
;* 
;*      Loop label : loop 
;*      Loop source line                 : 20 
;*      Loop closing brace source line   : 30 
;*      Known Minimum Trip Count         : 8 
;*      Known Max Trip Count Factor      : 1 
;*      Loop Carried Dependency Bound(^) : 0 
;*      Unpartitioned Resource Bound     : 2 
;*      Partitioned Resource Bound(*)    : 2 
;*      Resource Partition: 
;*                                A-side   B-side 
;*      .L units                     0        1      
;*      .S units                     1        1      
;*      .D units                     2*       1      
;*      .M units                     0        0      
;*      .X cross paths               1        1      
;*      .T address paths             2*       1      
;*      Long read paths              0        0      
;*      Long write paths             0        0      
;*      Logical  ops (.LS)           1        0     (.L or .S unit) 
;*      Addition ops (.LSD)          2        2     (.L or .S or .D unit) 
;*      Bound(.L .S .LS)             1        1      
;*      Bound(.L .S .D .LS .LSD)     2*       2*     
;* 
;*      Searching for software pipeline schedule at ... 
;*         ii = 2  Schedule found with 6 iterations in parallel 
;* 
;*      Register Usage Table: 
;*          +---------------------------------+ 
;*          |AAAAAAAAAAAAAAAA|BBBBBBBBBBBBBBBB| 
;*          |0000000000111111|0000000000111111| 
;*          |0123456789012345|0123456789012345| 
;*          |----------------+----------------| 
;*       0: |** ****         |*   ***         | 
;*       1: |** ****         |*   ***         | 
;*          +---------------------------------+ 
;* 
;*      Done 
;* 
;*      Collapsed epilog stages     : 5 
;*      Prolog not entirely removed 
;*      Collapsed prolog stages     : 3 
;* 
;*      Minimum required memory pad : 5 bytes 
;* 
;*      Minimum safe trip count     : 1 
;*----------------------------------------------------------------------------* 
;*        SINGLE SCHEDULED ITERATION 
;* 
;*        loop: 
;*   0              LDB     .D2T2   *B5++,B6          ; |20|  ref1[] 
;*       ||         LDB     .D1T1   *A4++,A3          ; |21| ref2[] 
;*   1              NOP             2 
;*   3              LDB     .D1T1   *A6++,A5          ; |22| cur[] 
;*   4      [ B0]   ADD     .S2     0xffffffff,B0,B0  ; |29|  
;*   5              ADD     .L1X    B6,A3,A3          ; |23|  cur[]+ref[] 
;*       || [ B0]   B       .S2     loop              ; |30|  
;*   6              ADD     .L1     0x1,A3,A5         ; |24|   
;*   7              SHR     .S1     A5,0x2,A0         ; |25|  (cur[]+ref[]+1)/2 
;*   8              SUB     .S1     A5,A0,A0          ; |26|  
;*   9              ABS     .L2X    A0,B6             ; |27|  
;*  10              ADD     .L2     B4,B6,B4          ; |28|  
;*                  ; BRANCH OCCURS                   ; |30|  
;*----------------------------------------------------------------------------* 
L1:    ; PIPED LOOP PROLOG 
	.sym	cur,4, 4, 4, 32 
	.sym	ref1,21, 4, 4, 32 
	.sym	ref2,0, 4, 4, 32 
	.sym	stride,23, 4, 4, 32 
; _sad8bi: .cproc	cur, ref1, ref2, stride 
	.sym	sad,20, 4, 4, 32 
	.sym	count1,16, 4, 4, 32 
	.sym	pcur,6, 4, 4, 32 
	.sym	pref1,21, 4, 4, 32 
	.sym	pref2,22, 4, 4, 32 
	.sym	flag,0, 4, 4, 32 
	.sym	pixel,0, 4, 4, 32 
;             .reg  sad, count1,pcur,pref1, pref2,flag,pixel 
	.sym	count2,2, 4, 4, 32 
	.sym	sad0,0, 4, 4, 32 
	.sym	a,0, 4, 4, 32 
	.sym	b,0, 4, 4, 32 
	.sym	c,0, 4, 4, 32 
;             .reg  count2,sad0,a,b,c  
;            .no_mdep  
; loop:       .trip 8 
           NOP             1 
 
           MV      .D1     A6,A0             ; |2|  
||         MV      .D2     B6,B7             ; |2|  
||         MV      .S2     B4,B5             ; |2|  
 
	.line	4 
           ZERO    .D2     B4                ; |5| sad=0 
	.line	5 
	.line	6 
	.line	7 
	.line	10 
	.line	11 
	.line	12 
           MVK     .S1     0x8,A2            ; |13| set loop2 count 
	.line	13 
           MVK     .S2     0x8,B0            ; |14| set loop1 count 
	.line	14 
           MV      .D1     A4,A6             ; |15| get the cur first address    
	.line	15 
	.line	16 
           MV      .S2X    A0,B6             ; |17| get the ref2 first address 
	.line	18 
 
           MV      .S1X    B6,A4 
||         B       .S2     loop              ; |30| (P) <0,5>  
 
           LDB     .D1T1   *A4++,A3          ; |21| (P) <0,0> ref2[] 
||         LDB     .D2T2   *B5++,B6          ; |20| (P) <0,0>  ref1[] 
 
           B       .S2     loop              ; |30| (P) <1,5>  
 
           MVK     .S1     0x4000,A1         ; init prolog collapse predicate 
||         LDB     .D2T2   *B5++,B6          ; |20| (P) <1,0>  ref1[] 
||         LDB     .D1T1   *A4++,A3          ; |21| (P) <1,0> ref2[] 
 
;** --------------------------------------------------------------------------* 
loop:    ; PIPED LOOP KERNEL 
 
           ABS     .L2X    A0,B6             ; |27| <0,9>  
||         SHR     .S1     A5,0x2,A0         ; |25| <1,7>  (cur[]+ref[]+1)/2 
|| [ B0]   B       .S2     loop              ; |30| <2,5>  
||         ADD     .L1X    B6,A3,A3          ; |23| <2,5>  cur[]+ref[] 
||         LDB     .D1T1   *A6++,A5          ; |22| <3,3> cur[] 
 
   [ A1]   MPYSU   .M1     2,A1,A1           ; <0,10>  
|| [!A1]   ADD     .L2     B4,B6,B4          ; |28| <0,10>  
||         SUB     .S1     A5,A0,A0          ; |26| <1,8>  
||         ADD     .L1     0x1,A3,A5         ; |24| <2,6>   
|| [ B0]   ADD     .S2     0xffffffff,B0,B0  ; |29| <3,4>  
||         LDB     .D2T2   *B5++,B6          ; |20| <5,0>  ref1[] 
||         LDB     .D1T1   *A4++,A3          ; |21| <5,0> ref2[] 
 
;** --------------------------------------------------------------------------* 
L3:    ; PIPED LOOP EPILOG AND PROLOG 
; loop:       .trip 8 
 
           SUB     .D2     B5,5,B5 
||         SUB     .D1     A6,3,A0 
||         SUB     .S2X    A4,5,B8 
 
	.line	30 
           ADD     .S1X    A0,B7,A6          ; |31| cur+stride 
	.line	31 
           ADD     .D2     B5,B7,B5          ; |32| ref1+stride 
	.line	32 
           NOP             1 
           ADD     .D2     B8,B7,B6          ; |33| ref2+stride   
	.line	33 
   [ A2]   ADD     .D1     0xffffffff,A2,A2  ; |34|  
	.line	34 
           MVK     .S2     0x8,B0            ; |35| reset loop count 
	.line	35 
 
           MVK     .S1     0x4000,A1         ; init prolog collapse predicate 
||         MV      .L1X    B6,A4 
|| [ A2]   B       .S2     loop              ; |36| branch to kernel 
 
           NOP             1 
   [ A2]   B       .S2     loop              ; |30| (P) <0,5>  
 
   [ A2]   LDB     .D2T2   *B5++,B6          ; |20| (P) <0,0>  ref1[] 
|| [ A2]   LDB     .D1T1   *A4++,A3          ; |21| (P) <0,0> ref2[] 
 
   [ A2]   B       .S2     loop              ; |30| (P) <1,5>  
 
   [ A2]   LDB     .D2T2   *B5++,B6          ; |20| (P) <1,0>  ref1[] 
|| [ A2]   LDB     .D1T1   *A4++,A3          ; |21| (P) <1,0> ref2[] 
 
;** --------------------------------------------------------------------------* 
	.line	36 
           MV      .S1X    B4,A4             ; |37|  
	.line	37 
           RET     .S2     B3                ; |38|  
           NOP             5 
           ; BRANCH OCCURS                   ; |38|  
	.endfunc	38,000000000h,0 
 
 
;             .endproc 
 
;****************************************************************************** 
;* TYPE INFORMATION                                                           * 
;******************************************************************************