www.pudn.com > Blackfin_Mpeg_2_4.zip > mve_core.asm


/******************************************************************************* 
Copyright(c) 2000 - 2002 Analog Devices. All Rights Reserved. 
Developed by Joint Development Software Application Team, IPDC, Bangalore, India 
for Blackfin DSPs  ( Micro Signal Architecture 1.0 specification). 
 
By using this module you agree to the terms of the Analog Devices License 
Agreement for DSP Software.  
******************************************************************************** 
Module Name     : mve_core.asm 
Label Name      : __mve_core 
Version         :   1.0 
Change History  : 
 
                Version     Date          Author            Comments 
                1.0         07/02/2001    Vijay             Original  
 
Description     : The assembly function is used by the motion estimation routine 
                  to compute the best matching macroblock for a given target  
                  block and to compute the motion vectors for that matching  
                  block. The subroutine fetches all the possible blocks within  
                  the search range in the reference frame and determines the  
                  best matching macro block for a give target block based on the 
                  MAD. The MAD of a pair of 16x16 blocks denoted by TGT_BLK and  
                  REF_BLK is computed as follows : 
 
                      MAD = Sum( ABS[TGT_BLK - REF_BLK ] ) 
 
                  The offset between the best matching reference block and the  
                  target block gives the motion vector. Half pixelation is not  
                  done inside this subroutine. 
 
Prototype       : void _mve_core( srruct *input_parameters, struct *output); 
 
                  The input to this routine is a set of two pointers. The first  
                  pointer points to a structure containing the following input  
                  parameters. 
 
                  Input : Structure 1 : 
                        struct { 
                                    int hor_size; 
                            -> Horizontal size of the video frame  
                                    int left_search_boundary; 
                            -> (2^search_factor + 1)  
                                    unsigned char *tgt_blk_add; 
                            -> Start address of the target block;  
                                    unsigned char *loc_tgt_blk_in_ref_frame; 
                            -> Location of target block in the reference frame  
                                    unsigned char *search_begin_st_add; 
                            -> Start address from where search should begin  
                                    int hor_search_range; 
                            -> Search range in pixels in the horizontal  
                               direction  
                                    int ver_search_range; 
                            -> Search range in pixels in the vertical direction  
                                }input_parameters; 
 
                  The second structure acts as an output where the elements of  
                  the structure are modified by this routine 
 
                  Output : Structure 2 : 
                        struct { 
                                    int least_mad; 
                            -> The value of the least MAD after searching the  
                               search window fully  
                                    unsigned char *add_least_mad; 
                            -> Address of the block having the least MAD  
                                    int vert_mv; 
                            -> Vertical integer motion vector  
                                    int hor_mv; 
                            -> Horizontal integer motion vector  
                                }output; 
 
Registers used  : A0, A1, R0-R3, R5-R7, I0, I1, I3, M1-M3, L0, L1, L3, P0-P5,  
                  LC0 LC1. 
 
Performance     : 
                Code size : 388 Bytes. 
*******************************************************************************/ 
.section L1_code; 
.align 8; 
.global __mve_core; 
     
__mve_core: 
 
    [--SP] = (R7:5, P5:3); 
    L0 = 0; 
    L1 = 0; 
    L3 = 0; 
    P5 = R0; 
    I3 = R1; 
    P3 = 16;                // Loop ctr(for 16 rows) initialized 
    R7.L = -1;              // Initialize R7 to the positive maximum number 
    R7.H = 0x7FFF;          // R7 contains the minimum MAD or MSE 
    R0 = [P5];              // Horizontal size of the frame 
    R1 = R0 << 4 || P1 = [P5 + 16]; 
                            // Starting address from where subroutine must start 
                            // searching for matching blocks  
    M2 = R1;                // 16*Horizontal size 
    R0 += -16; 
    M1 = R0;                // Horizontal size - 16 
    R0 += 4; 
    M3 = R0;                // Horizontal size - 12 
    MNOP || R1 = [P5 + 8]; 
    I0 = R1;                // Starting address of target block 
    MNOP || R2 = [P5 + 20]; 
    P4 = R2;                // Horizontal search range 
    R6 = [P5 + 24] || NOP;  // Vertical search range 
    P2 = 0; 
VERTICAL: 
    P1 = P1 + P2;           // Update the reference block row address 
    P2 = P1;                 
    LSETUP(ST_SEARCH, END_SEARCH) LC1 = P4; 
                            // Set horizontal span of the search range  
ST_SEARCH: 
        I1 = P2;            // Fetch the start address of the reference block 
/*********************** MEAN ABSOLUTE DIFFERENCE *****************************/ 
     
        A1=A0=0;            // Initialize accumulators for accumulation 
        DISALGNEXCPT || R0 = [I0++] || R2 = [I1++]; 
                            // Fetch the first data from the two blocks  
        LSETUP (MAD_START, MAD_END) LC0=P3; 
MAD_START:  DISALGNEXCPT || R3 = [I1++]; 
            SAA (R1:0,R3:2) || R1 = [I0++]  || R2 = [I1++]; 
                            // Compute absolute difference and accumulate  
            SAA (R1:0,R3:2) (R) || R0 = [I0++] || R3 = [I1++]; 
                            //                  |  
            SAA (R1:0,R3:2) || R1 = [I0 ++ M3] || R2 = [I1++M1]; 
                            //  After fetch of 4th word of target blk, pointer  
                            // made to point next row  
MAD_END:    SAA (R1:0,R3:2) (R) || R0 = [I0++] || R2 = [I1++]; 
 
        R3=A1.L+A1.H,R2=A0.L+A0.H || I0 -= M2;     
        R0 = R2 + R3 (NS) || I0 -= 4; 
                            // Add the accumulated values in both MACs  
     
     
/*************************** MINIMUM MAD COMPUTATION **************************/ 
        CC = R0 <= R7;      // Check if the latest MAD or MSE is less than the  
                            // previous ones 
        IF !CC JUMP END_SEARCH (BP); 
                            // If latest MAD is not lesser, then return  
        CC = R0 < R7;                            
        IF !CC JUMP EQUAL;  // If MAD is lesser jump to 'LESS' 
        R7 = R0;            // If latest MAD is less, latest MAD or MSE is the  
                            // minimum 
        P0 = P2;            // and corresponding block is better match 
        JUMP END_SEARCH; 
EQUAL:                      // Compute the MVs for the least, previous MAD 
        R1 = P0; 
        R0 = R0 - R0 (NS) || R2 = [P5 + 12]; 
        R3 = R1 - R2 (NS) || R5 = [P5]; 
                            // Find the difference between current block and  
                            // reference block  
        R1 = ABS R3 || R2 = [P5 + 4]; 
                            // Take the absolute value of the difference  
REPEAT1: 
        CC = R1 < R5; 
        IF CC JUMP FINISH_MOD1;                  
        R1 = R1 - R5;       // Divide the offset by COLUMNS 
        R0 += 2;            // R0 has twice the quotient and R1 has the  
                            // remainder 
        JUMP REPEAT1; 
FINISH_MOD1: 
        CC = R1 <= R2; 
        IF CC JUMP NOTRANS1; 
        R0 += 2;            // If remainder is greater than that value,  
                            // increment quotient by two 
        R1 = R1 - R5;       // and remainder = remainder - COLUMNS 
NOTRANS1: 
        R1 <<= 1;           // Remainder*2 gives horizontal motion vector 
        R2 = -R0; 
        R5 = -R1; 
        CC = R3 < 0;        // If R3 is negative (diff. negative), negate both  
                            // quotient and remainder 
        IF CC R0 = R2;                           
        IF CC R1 = R5;                           
        A1 = R0.L*R0.L (IS);// Distance to previous best match from reference  
                            // block 
        A1 += R1.L*R1.L (IS); 
     
                            // Compute the MVs for the least, current MAD 
        R1 = P2; 
        R0 = R0 - R0 (NS) || R2 = [P5 + 12]; 
        R3 = R1 - R2 (NS) || R5 = [P5]; 
                            // Find the difference between current block and  
                            // reference block  
        R1 = ABS R3 || R2 = [P5 + 4]; 
                            // Take the absolute value of the difference 
REPEAT2: 
        CC = R1 < R5; 
        IF CC JUMP FINISH_MOD2;                  
        R1 = R1 - R5;       // Divide the offset by COLUMNS 
        R0 += 2;            // R0 has twice the quotient and R1 has the  
                            // remainder 
        JUMP REPEAT2; 
FINISH_MOD2: 
        CC = R1 <= R2; 
        IF CC JUMP NOTRANS2; 
        R0 += 2;            // If remainder is greater than that value,  
                            // increment quotient by two 
        R1 = R1 - R5;       // and remainder = remainder - COLUMNS 
NOTRANS2: 
        R1 <<= 1;           // Remainder*2 gives horizontal motion vector 
        R2 = -R0; 
        R5 = -R1; 
        CC = R3 < 0;        // If R3 is negative (diff. negative), negate both  
                            // quotient and remainder 
        IF CC R0 = R2;                           
        IF CC R1 = R5;                           
        A0 = R0.L*R0.L (IS); 
        A0 += R1.L*R1.L (IS); 
                            // Distance to current block from reference block  
     
        CC = A0 < A1; 
        IF CC P0 = P2;      // Make current block as the best match till now 
     
END_SEARCH: 
        P2 += 1;            // Update the reference block column address 
    P2 = [P5];              // Horizontal size of the video frame 
    R6 += -1; 
    CC = R6 <= 0;           // Check if the all the rows in the search area are  
                            // completed 
    IF !CC JUMP  VERTICAL (BP); 
                            // Repeat till the entire search area is matched  
     
                            // Compute the motion vectors 
    R1 = P0; 
    R0 = R0 - R0 (NS) || R2 = [P5 + 12]; 
    R3 = R1 - R2 (NS) || R5 = [P5]; 
                            // Find the difference between current block and  
                            // reference block  
    R1 = ABS R3 || R2 = [P5 + 4]; 
                            // Take the absolute value of the difference 
REPEAT3: 
    CC = R1 < R5; 
    IF CC JUMP FINISH_MOD3;                  
    R1 = R1 - R5;           // Divide the offset by COLUMNS 
    R0 += 2;                // R0 has twice the quotient and R1 has the  
                            // remainder 
    JUMP REPEAT3; 
FINISH_MOD3: 
    CC = R1 <= R2; 
    IF CC JUMP NOTRANS3; 
    R0 += 2;                // If remainder is greater than that value,  
                            // increment quotient by two 
    R1 = R1 - R5;           // and remainder = remainder - COLUMNS 
NOTRANS3: 
    R1 <<= 1;               // Remainder*2 gives horizontal motion vector 
    R2 = -R0; 
    R5 = -R1; 
    CC = R3 < 0;            // If R3 is negative (diff. negative), negate both  
                            // quotient and remainder 
    IF CC R0 = R2;                           
    IF CC R1 = R5;                           
    R3 = P0; 
    [I3++] = R7;            // Store the least MAD in the output structure 
    [I3++] = R3;            // Store the address of the least MAD in the output  
                            // structure 
    [I3++] = R0;            // Store the integer vertical MV in the output  
                            // structure 
    [I3++] = R1;            // Store the integer horizontal MV in the output  
                            // structure 
    (R7:5, P5:3) = [SP++];           
    RTS; 
    NOP;                    //to avoid one stall if LINK or UNLINK happens to be 
                            //the next instruction after RTS in the memory. 
__mve_core.end: