www.pudn.com > T264-src-0.02.zip > interpolate.c


/***************************************************************************** 
 * 
 *  T264 AVC CODEC 
 * 
 *  Copyright(C) 2004-2005 llcc  
 *               2004-2005 visionany  
 * 
 *  This program is free software ; you can redistribute it and/or modify 
 *  it under the terms of the GNU General Public License as published by 
 *  the Free Software Foundation ; either version 2 of the License, or 
 *  (at your option) any later version. 
 * 
 *  This program is distributed in the hope that it will be useful, 
 *  but WITHOUT ANY WARRANTY ; without even the implied warranty of 
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 *  GNU General Public License for more details. 
 * 
 *  You should have received a copy of the GNU General Public License 
 *  along with this program ; if not, write to the Free Software 
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA 
 * 
 ****************************************************************************/ 
 
#include "stdio.h" 
#include "T264.h" 
#include "interpolate.h" 
 
//  1/4 pixel search 
uint32_t 
T264_quarter_pixel_search(T264_t* t, uint8_t* src, T264_frame_t* refframe, int32_t offset, T264_vector_t* vec, T264_vector_t* vec_median, uint32_t sad_org, int32_t w, int32_t h, uint8_t* residual, int32_t mb_part) 
{ 
    DECLARE_ALIGNED_MATRIX(data, 16, 16, uint8_t, CACHE_SIZE); 
 
    uint32_t sad = sad_org; 
    uint8_t* ref; 
    int16_t x, y; 
 
    // xxx 
    x = vec[0].x &= ~3; 
    y = vec[0].y &= ~3; 
    ref = refframe->Y[0] + offset + (y >> 2) * t->edged_stride + (x >> 2); 
     
    if (t->flags & USE_HALFPEL) 
    { 
        uint8_t* refcur; 
       // right half pel 
        refcur = refframe->Y[1] + offset + (y >> 2) * t->edged_stride + (x >> 2); 
        sad = t->cmp[mb_part](src, t->stride, refcur, t->edged_stride) + 
            t->mb.lambda * (eg_size_se(&t->bs, (x + 2) - vec_median[0].x) +  
            eg_size_se(&t->bs, y - vec_median[0].y)); 
        if (sad < sad_org) 
        { 
            sad_org = sad; 
            vec[0].x = x + 2; 
            vec[0].y = y; 
            ref = refcur; 
        } 
        // left half pel 
        refcur --; 
        sad = t->cmp[mb_part](src, t->stride, refcur, t->edged_stride) + 
            t->mb.lambda * (eg_size_se(&t->bs, (x - 2) - vec_median[0].x) +  
            eg_size_se(&t->bs, y - vec_median[0].y)); 
        if (sad < sad_org) 
        { 
            sad_org = sad; 
            vec[0].x = x - 2; 
            vec[0].y = y; 
            ref = refcur; 
        } 
        // bottom half pel 
        refcur = refframe->Y[2] + offset + (y >> 2) * t->edged_stride + (x >> 2); 
        sad = t->cmp[mb_part](src, t->stride, refcur, t->edged_stride) + 
            t->mb.lambda * (eg_size_se(&t->bs, x - vec_median[0].x) +  
            eg_size_se(&t->bs, y + 2 - vec_median[0].y)); 
        if (sad < sad_org) 
        { 
            sad_org = sad; 
            vec[0].x = x; 
            vec[0].y = y + 2; 
            ref = refcur; 
        } 
        // top half pel 
        refcur -= t->edged_stride; 
        sad = t->cmp[mb_part](src, t->stride, refcur, t->edged_stride) + 
            t->mb.lambda * (eg_size_se(&t->bs, x - vec_median[0].x) +  
            eg_size_se(&t->bs, y - 2 - vec_median[0].y)); 
        if (sad < sad_org) 
        { 
            sad_org = sad; 
            vec[0].x = x; 
            vec[0].y = y - 2; 
            ref = refcur; 
        } 
        t->memcpy_stride_u(ref, w, h, t->edged_stride, residual, 16); 
 
        // quarter pel search 
        if (t->flags & USE_QUARTPEL) 
        { 
            int16_t m, n; 
            int32_t i; 
            struct  
            { 
                uint8_t* p1; 
                uint8_t* p2; 
                int16_t x; 
                int16_t y; 
            }pos[4]; 
 
            x = vec[0].x; 
            y = vec[0].y; 
 
            m = x & 2; 
            n = y & 2; 
            if (n == 0 && m == 0) 
            { 
                pos[0].p1 = refframe->Y[0] + offset + (y >> 2) * t->edged_stride + (x >> 2); 
                pos[0].p2 = refframe->Y[1] + offset + (y >> 2) * t->edged_stride + (x >> 2); 
                pos[0].x = x + 1; 
                pos[0].y = y; 
 
                pos[1].p1 = pos[0].p1; 
                pos[1].p2 = pos[0].p2 - 1; 
                pos[1].x = x - 1; 
                pos[1].y = y; 
 
                pos[2].p1 = pos[0].p1; 
                pos[2].p2 = refframe->Y[2] + offset + (y >> 2) * t->edged_stride + (x >> 2); 
                pos[2].x = x; 
                pos[2].y = y + 1; 
 
                pos[3].p1 = pos[0].p1; 
                pos[3].p2 = pos[2].p2 - t->edged_stride; 
                pos[3].x = x; 
                pos[3].y = y - 1; 
            } 
            else if (n == 0 && m == 2) 
            { 
                pos[0].p1 = refframe->Y[0] + offset + (y >> 2) * t->edged_stride + (x >> 2) + 1; 
                pos[0].p2 = refframe->Y[1] + offset + (y >> 2) * t->edged_stride + (x >> 2); 
                pos[0].x = x + 1; 
                pos[0].y = y; 
 
                pos[1].p1 = pos[0].p1 - 1; 
                pos[1].p2 = pos[0].p2; 
                pos[1].x = x - 1; 
                pos[1].y = y; 
 
                pos[2].p1 = refframe->Y[3] + offset + (y >> 2) * t->edged_stride + (x >> 2); 
                pos[2].p2 = pos[0].p2; 
                pos[2].x = x; 
                pos[2].y = y + 1; 
 
                pos[3].p1 = pos[2].p1 - t->edged_stride; 
                pos[3].p2 = pos[2].p2; 
                pos[3].x = x; 
                pos[3].y = y - 1; 
            } 
            else if (n == 2 && m == 0) 
            { 
                pos[0].p1 = refframe->Y[3] + offset + (y >> 2) * t->edged_stride + (x >> 2); 
                pos[0].p2 = refframe->Y[2] + offset + (y >> 2) * t->edged_stride + (x >> 2); 
                pos[0].x = x + 1; 
                pos[0].y = y; 
 
                pos[1].p1 = pos[0].p1 - 1; 
                pos[1].p2 = pos[0].p2; 
                pos[1].x = x - 1; 
                pos[1].y = y; 
 
                pos[2].p1 = refframe->Y[0] + offset + ((y >> 2) + 1) * t->edged_stride + (x >> 2); 
                pos[2].p2 = pos[0].p2; 
                pos[2].x = x; 
                pos[2].y = y + 1; 
 
                pos[3].p1 = pos[2].p1 - t->edged_stride; 
                pos[3].p2 = pos[0].p2; 
                pos[3].x = x; 
                pos[3].y = y - 1; 
            } 
            else 
            { 
                pos[0].p1 = refframe->Y[2] + offset + (y >> 2) * t->edged_stride + (x >> 2) + 1; 
                pos[0].p2 = refframe->Y[3] + offset + (y >> 2) * t->edged_stride + (x >> 2); 
                pos[0].x = x + 1; 
                pos[0].y = y; 
 
                pos[1].p1 = pos[0].p1 - 1; 
                pos[1].p2 = pos[0].p2; 
                pos[1].x = x - 1; 
                pos[1].y = y; 
 
                pos[2].p1 = refframe->Y[1] + offset + ((y >> 2) + 1) * t->edged_stride + (x >> 2); 
                pos[2].p2 = pos[0].p2; 
                pos[2].x = x; 
                pos[2].y = y + 1; 
 
                pos[3].p1 = pos[2].p1 - t->edged_stride; 
                pos[3].p2 = pos[2].p2; 
                pos[3].x = x; 
                pos[3].y = y - 1; 
            } 
            for(i = 0 ; i < 4 ; i ++) 
            { 
                t->pixel_avg(pos[i].p1, pos[i].p2, t->edged_stride, t->edged_stride, data, 16, w, h); 
                sad = t->cmp[mb_part](src, t->stride, data, 16) + 
                    t->mb.lambda * (eg_size_se(&t->bs, pos[i].x - vec_median[0].x) +  
                    eg_size_se(&t->bs, pos[i].y - vec_median[0].y)); 
                if (sad < sad_org) 
                { 
                    sad_org = sad; 
                    vec[0].x = pos[i].x; 
                    vec[0].y = pos[i].y; 
                    t->memcpy_stride_u(data, w, h, 16, residual, 16); 
                } 
            } 
        } 
        sad = sad_org; 
    } 
    else 
    { 
        // x & y always integer pel 
        t->memcpy_stride_u(ref, w, h, t->edged_stride, residual, 16); 
    } 
    return sad; 
} 
 
void 
T264_pixel_avg_c(uint8_t* p1, uint8_t* p2, int32_t p1_stride, int32_t p2_stride, uint8_t* dst, int32_t dst_stride, int32_t w, int32_t h) 
{ 
    int32_t i, j; 
 
    for(i = 0 ; i < h ; i ++) 
    { 
        for(j = 0 ; j < w ; j ++) 
        { 
            dst[j] = (p1[j] + p2[j] + 1) >> 1; 
        } 
        p1 += p1_stride; 
        p2 += p2_stride; 
        dst+= dst_stride; 
    } 
} 
 
void 
T264_eighth_pixel_mc_u_c(uint8_t* src, int32_t src_stride, uint8_t* dst, int16_t mvx, int16_t mvy, int32_t width, int32_t height) 
{ 
    int32_t x, y; 
    int32_t i, j; 
 
    x = mvx & 0x7; 
    y = mvy & 0x7; 
 
    for (i = 0 ; i < height ; i ++) 
    { 
        for(j = 0 ; j < width ; j ++) 
        { 
            dst[j] = ((8 - x) * (8 - y) * src[j]  + x * (8 - y) * src[j + 1] +  
                (8 - x) * y * src[j + src_stride] + x * y * src[j + src_stride+ 1] + 32) >> 6; 
        } 
        src += src_stride; 
        dst += 8; 
    } 
} 
 
static __inline int32_t 
tapfilter_h(uint8_t* p) 
{ 
    return p[-2] - 5 * p[-1] + 20 * p[0] + 20 * p[1] - 5 * p[2] + p[3]; 
} 
 
void 
interpolate_halfpel_h_c(uint8_t* src, int32_t src_stride, uint8_t* dst, int32_t dst_stride, int32_t width, int32_t height) 
{ 
    int32_t i, j; 
    int32_t tmp; 
 
    for (i = 0 ; i < height ; i ++) 
    { 
        for (j = 0 ; j < width ; j ++) 
        { 
            tmp = (tapfilter_h(src + j) + 16) >> 5; 
            dst[j] = CLIP1(tmp); 
        } 
        src += src_stride; 
        dst += dst_stride; 
    } 
} 
 
static __inline int32_t 
tapfilter_v(uint8_t* p, int32_t stride) 
{ 
    return p[-2 * stride] - 5 * p[-stride] + 20 * p[0] + 20 * p[stride] - 5 * p[2 * stride] + p[3 * stride]; 
} 
 
void 
interpolate_halfpel_v_c(uint8_t* src, int32_t src_stride, uint8_t* dst, int32_t dst_stride, int32_t width, int32_t height) 
{ 
    int32_t i, j; 
    int32_t tmp; 
 
    for (i = 0 ; i < height ; i ++) 
    { 
        for (j = 0 ; j < width ; j ++) 
        { 
            tmp = (tapfilter_v(src + j, src_stride) + 16) >> 5; 
            dst[j] = CLIP1(tmp); 
        } 
        src += src_stride; 
        dst += dst_stride; 
    } 
} 
 
// use vertical to generate this pic 
void 
interpolate_halfpel_hv_c(uint8_t* src, int32_t src_stride, uint8_t* dst, int32_t dst_stride, int32_t width, int32_t height) 
{ 
    int32_t i, j; 
    int32_t tmp; 
 
    for (i = 0 ; i < height ; i ++) 
    { 
        for (j = 0 ; j < width ; j ++) 
        { 
            tmp = ( 
                  (src[j - 2 - 2 * src_stride] - 5 * src[j - 1 - 2 * src_stride] + 20 * src[j - 2 * src_stride] + 20 * src[j + 1 - 2 * src_stride] - 5 * src[j + 2 - 2 * src_stride] + src[j + 3 - 2 * src_stride]) + 
           (-5) * (src[j - 2 - 1 * src_stride] - 5 * src[j - 1 - 1 * src_stride] + 20 * src[j - 1 * src_stride] + 20 * src[j + 1 - 1 * src_stride] - 5 * src[j + 2 - 1 * src_stride] + src[j + 3 - 1 * src_stride]) + 
           (20) * (src[j - 2 - 0 * src_stride] - 5 * src[j - 1 - 0 * src_stride] + 20 * src[j - 0 * src_stride] + 20 * src[j + 1 - 0 * src_stride] - 5 * src[j + 2 - 0 * src_stride] + src[j + 3 - 0 * src_stride]) + 
           (20) * (src[j - 2 + 1 * src_stride] - 5 * src[j - 1 + 1 * src_stride] + 20 * src[j + 1 * src_stride] + 20 * src[j + 1 + 1 * src_stride] - 5 * src[j + 2 + 1 * src_stride] + src[j + 3 + 1 * src_stride]) + 
           (-5) * (src[j - 2 + 2 * src_stride] - 5 * src[j - 1 + 2 * src_stride] + 20 * src[j + 2 * src_stride] + 20 * src[j + 1 + 2 * src_stride] - 5 * src[j + 2 + 2 * src_stride] + src[j + 3 + 2 * src_stride]) + 
                  (src[j - 2 + 3 * src_stride] - 5 * src[j - 1 + 3 * src_stride] + 20 * src[j + 3 * src_stride] + 20 * src[j + 1 + 3 * src_stride] - 5 * src[j + 2 + 3 * src_stride] + src[j + 3 + 3 * src_stride]) + 
                  512) >> 10; 
            dst[j] = CLIP1(tmp); 
        } 
        src += src_stride; 
        dst += dst_stride; 
    } 
}