www.pudn.com > Blackfin_Mpeg_2_4.zip > hpel_core2.asm


/******************************************************************************* 
Copyright(c) 2000 - 2002 Analog Devices. All Rights Reserved. 
Developed by Joint Development Software Application Team, IPDC, Bangalore, India 
for Blackfin DSPs  ( Micro Signal Architecture 1.0 specification). 
 
By using this module you agree to the terms of the Analog Devices License 
Agreement for DSP Software.  
******************************************************************************** 
Module Name     : hpel_core2.asm 
Label Name      : __hpel_core2 
Version         :   1.0 
Change History  : 
 
                Version     Date          Author            Comments 
                1.0         07/02/2001    Vijay             Original  
 
Description     : The assembly function is used by the half pixelation routine  
                  to compute the average of four CORNER macroblocks which need  
                  not be word aligned. The starting addresses of the macroblocks 
                  are passed in I0 and I1 and the averaged result is stored  in  
                  a buffer pointed to by I3. The MAD between the averaged  
                  (interpolated) block and the target block is then computed. If 
                  the interpolated block has a lower value of MAD than the  
                  previously observed minimum an offset of +1 or -1 is added to  
                  the integer motion vector in the corresponding direction. This 
                  is called the half-pixel correction. 
 
Prototype       : void _hpel_core2(struct *input_parameters, int *temp_array1,  
                                   int *temp_array2); 
 
                  Input : The input to this routine is a pointer to a structure  
                  containing the following elements and two temporary arrays of  
                  size 260. The content of the first temporary array is obtained 
                  from the hpel_core1 routine (i.e., _hpel_core1 should be 
                  called before invoking this routine). 
 
                  Structure : 
                        struct { 
                                    int least_mad; 
                            -> The value of the least MAD after searching the  
                               search window fully  
                                    unsigned char *add_least_mad; 
                            -> Address of the block having the least MAD  
                                    int vert_mv; 
                            -> Vertical integer motion vector  
                                    int hor_mv; 
                            -> Horizontal integer motion vector  
                                    int curr_ver_mv_off; 
                            -> Current hpel offset for the vertical motion  
                               vector  
                                    int curr_hor_mv_off; 
                            -> Current hpel offset for the horizontal motion  
                               vector  
                                    int prev_ver_mv_off; 
                            -> Previous hpel offset for the vertical motion  
                               vector  
                                    int prev_hor_mv_off; 
                            -> Previous hpel offset for the horizontal motion  
                               vector  
                                    int hor_size; 
                            -> Horizontal size of the video frame  
                                    unsigned char *tgt_blk_add; 
                            -> Start address of the target block  
                                }input_parameters; 
 
Registers used  : A0, A1, R0-R3, R7, I0-I3, M0, M1, M3, L0, L1, L3, P1, P2, LC0. 
 
Performance     : 
                Code size   : 332 Bytes. 
*******************************************************************************/ 
.section L1_code; 
.align 8; 
.global __hpel_core2; 
 
__hpel_core2: 
 
    [--SP] = R7; 
    L0 = 0; 
    L1 = 0; 
    L3 = 0; 
    P1 = R0; 
    M0 = R1; 
    I3 = R2; 
    I2 = R2; 
    R1 = [P1 + 16]; // Current hpel offset for the vertical motion  
                            // vector 
    R3 = [P1 + 32]; // Horizontal size of the video frame 
    R2 = R1.L * R3.L (IS) || R0 = [P1 + 4]; 
                            // Address of the block having least MAD  
    R2 = R0 + R2 (NS) || R7 = [P1 + 20]; 
                            // Current hpel offset for the horizontal motion  
                            // vector  
    R2 = R2 + R7 (S); 
    I1 = R2; 
    R1 = R1 + R7 (S);           // If (VMV + HMV) == 0 
    R2 = 1;                 //     address_offset = curr_hor_mv_off 
    CC = R1 == 0;           // else 
    IF !CC R2 = R3;         //     address_offset = curr_hor_mv_off*hor_size 
    P2 = 16;                // Loop ctr(for 16 rows) is initialized 
    R1 = R2.L * R7.L (IS); 
    R0 = R0 + R1; 
    I0 = R0; 
    R3 += -16; 
    M1 = R3; 
    R3 += 4; 
    M3 = R3; 
     
/***************** AVERAGE ADJACENT BLOCKS ************************************/ 
    LSETUP(LOOP_ST,LP_END) LC0=P2; 
    DISALGNEXCPT || R0 = [I0++] || R2  =[I1++]; 
                            // Fetch 1st words of the two blocks(if disligned,  
                            //contains partial data 
LOOP_ST: 
        DISALGNEXCPT || R1 = [I0++] || R3  =[I1++]; 
                            // Fetch 2nd words(will contain remaining part of  
                            // 1st word)  
        R7 = BYTEOP1P(R1:0,R3:2) || R0 = [I0++] || R2  =[I1++]; 
                            // Average(R0,R2) and fetch 3rd word  
        R7 = BYTEOP1P(R1:0,R3:2)(R) || R1 = [I0++] || [I3++] = R7 ; 
                            // Average(R1,R3), fetch 4th data and store previous 
                            // result  
        DISALGNEXCPT  || R3  =[I1++] || [I3++] = R7; 
                            // Fetch 4th data and store previous result  
        R7 = BYTEOP1P(R1:0,R3:2) || R0 = [I0++M1] || R2  =[I1++M1]; 
                            // Average (R0,R2), fetch 5th word and modify  
                            // pointers  
        R7 = BYTEOP1P(R1:0,R3:2)(R) || R0 = [I0++] || [I3++] = R7 ; 
                            // Average (R1,R3), fetch 1st word of next row and  
                            // store  
LP_END: DISALGNEXCPT || R2  =[I1++] || [I3++] = R7; 
                            // Fetch 1st word of next row and store previous  
                            // result  
/*********************** AVERAGE CORNER BLOCKS & COMPUTE MAD *****************/ 
     
    I0 = I2; 
    I1 = M0; 
    R0 = [P1 + 36]; 
    I3 = R0;                // Fetch the start address of the target block 
    A1=A0=0 || R0 = [I0++] || R2 = [I1++]; 
                            // Initialize accumulators for MAC and fetch first  
                            // data 
    MNOP; 
     
    LSETUP(CAVG_ST,CAVG_END) LC0 = P2; 
CAVG_ST: 
        R1 = BYTEOP1P(R1:0,R3:2) || R3 = [I3++] || R2 = [I1++]; 
                            // Average and fetch data from tgt blk  
        SAA (R1:0,R3:2) (R) || R0 = [I0++]; 
                            // MAD and fetch data to be averaged from buffers  
        R1 = BYTEOP1P(R1:0,R3:2) || R3 = [I3++] || R2 = [I1++]; 
                            // Average and fetch data from tgt blk  
        SAA (R1:0,R3:2) (R) || R0 = [I0++]; 
                            // MAD and fetch data to be averaged from buffers  
        R1 = BYTEOP1P(R1:0,R3:2) || R3 = [I3++] || R2 = [I1++]; 
                            // Average and fetch data from tgt blk  
        SAA (R1:0,R3:2) (R) || R0 = [I0++]; 
                            // MAD and fetch data to be averaged from buffers  
        R1 = BYTEOP1P(R1:0,R3:2) || R3 = [I3++M3] || R2 = [I1++]; 
                            // Average and fetch data from tgt blk and modify  
                            // pointer  
CAVG_END: 
        SAA (R1:0,R3:2) (R) || R0 = [I0++]; 
                            // MAD and fetch data from next row to be averaged  
                            // from buffers  
    R3=A1.L+A1.H,R2=A0.L+A0.H;     
    R0 = R2 + R3 (NS) || R3 = [P1]; 
                            // Add the accumulated values in both MACs  
     
/******************** MINIMUM MAD COMPUTATION ******************************/ 
     
    CC = R0 <= R3;          // Check if the latest MAD or MSE is less than the  
                            // previous ones 
    IF CC JUMP LESS_OR_EQUAL; 
                            // If latest MAD is not lesser, then return  
    R7 = [SP++]; 
    RTS; 
LESS_OR_EQUAL: 
    CC = R0 < R3;                  
    IF !CC JUMP EQUAL;      // If MAD is lesser jump to 'LESS' 
    [P1] = R0;              // If latest MAD is less, then store it as the  
                            // minimum MAD 
    R0 = [P1 + 16]; 
    [P1 + 24] = R0; 
    R0 = [P1 + 20]; 
    [P1 + 28] = R0; 
    R7 = [SP++]; 
    RTS; 
EQUAL: 
    R2 = [P1 + 12];         // Horizontal-vector before half pixel estimation 
    R1 = [P1 + 28]; 
    R1 = R2 + R1 (NS) || R3 = [P1 + 8]; 
                            // Half pixel estimation for Horizontal-vector added 
                            // to X-vector  
    R0 = [P1 + 24]; 
    R0 = R3 + R0;           // Half pixel estimation for Vertical-vector added  
                            // to Y-vector 
    A1 = R1.L*R1.L (IS) || R1 = [P1 + 20]; 
    A1 += R0.L*R0.L (IS) || R0 = [P1 + 16]; 
                            // Distance to previous best match from reference  
                            // block  
    R0 = R0 + R3; 
    R1 = R1 + R2; 
    A0 = R0.L*R0.L (IS);    // Distance to current best match from reference  
                            // block 
    A0 += R1.L*R1.L (IS); 
    CC = A0 < A1; 
    IF !CC JUMP FINISH; 
    R0 = [P1 + 16]; 
    [P1 + 24] = R0; 
    R0 = [P1 + 20]; 
    [P1 + 28] = R0; 
FINISH: 
    R7 = [SP++]; 
    RTS;        
     
__hpel_core2.end: