www.pudn.com > T264-src-0.02.zip > interpolate.c
/***************************************************************************** * * T264 AVC CODEC * * Copyright(C) 2004-2005 llcc* 2004-2005 visionany * * This program is free software ; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation ; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY ; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program ; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * ****************************************************************************/ #include "stdio.h" #include "T264.h" #include "interpolate.h" // 1/4 pixel search uint32_t T264_quarter_pixel_search(T264_t* t, uint8_t* src, T264_frame_t* refframe, int32_t offset, T264_vector_t* vec, T264_vector_t* vec_median, uint32_t sad_org, int32_t w, int32_t h, uint8_t* residual, int32_t mb_part) { DECLARE_ALIGNED_MATRIX(data, 16, 16, uint8_t, CACHE_SIZE); uint32_t sad = sad_org; uint8_t* ref; int16_t x, y; // xxx x = vec[0].x &= ~3; y = vec[0].y &= ~3; ref = refframe->Y[0] + offset + (y >> 2) * t->edged_stride + (x >> 2); if (t->flags & USE_HALFPEL) { uint8_t* refcur; // right half pel refcur = refframe->Y[1] + offset + (y >> 2) * t->edged_stride + (x >> 2); sad = t->cmp[mb_part](src, t->stride, refcur, t->edged_stride) + t->mb.lambda * (eg_size_se(&t->bs, (x + 2) - vec_median[0].x) + eg_size_se(&t->bs, y - vec_median[0].y)); if (sad < sad_org) { sad_org = sad; vec[0].x = x + 2; vec[0].y = y; ref = refcur; } // left half pel refcur --; sad = t->cmp[mb_part](src, t->stride, refcur, t->edged_stride) + t->mb.lambda * (eg_size_se(&t->bs, (x - 2) - vec_median[0].x) + eg_size_se(&t->bs, y - vec_median[0].y)); if (sad < sad_org) { sad_org = sad; vec[0].x = x - 2; vec[0].y = y; ref = refcur; } // bottom half pel refcur = refframe->Y[2] + offset + (y >> 2) * t->edged_stride + (x >> 2); sad = t->cmp[mb_part](src, t->stride, refcur, t->edged_stride) + t->mb.lambda * (eg_size_se(&t->bs, x - vec_median[0].x) + eg_size_se(&t->bs, y + 2 - vec_median[0].y)); if (sad < sad_org) { sad_org = sad; vec[0].x = x; vec[0].y = y + 2; ref = refcur; } // top half pel refcur -= t->edged_stride; sad = t->cmp[mb_part](src, t->stride, refcur, t->edged_stride) + t->mb.lambda * (eg_size_se(&t->bs, x - vec_median[0].x) + eg_size_se(&t->bs, y - 2 - vec_median[0].y)); if (sad < sad_org) { sad_org = sad; vec[0].x = x; vec[0].y = y - 2; ref = refcur; } t->memcpy_stride_u(ref, w, h, t->edged_stride, residual, 16); // quarter pel search if (t->flags & USE_QUARTPEL) { int16_t m, n; int32_t i; struct { uint8_t* p1; uint8_t* p2; int16_t x; int16_t y; }pos[4]; x = vec[0].x; y = vec[0].y; m = x & 2; n = y & 2; if (n == 0 && m == 0) { pos[0].p1 = refframe->Y[0] + offset + (y >> 2) * t->edged_stride + (x >> 2); pos[0].p2 = refframe->Y[1] + offset + (y >> 2) * t->edged_stride + (x >> 2); pos[0].x = x + 1; pos[0].y = y; pos[1].p1 = pos[0].p1; pos[1].p2 = pos[0].p2 - 1; pos[1].x = x - 1; pos[1].y = y; pos[2].p1 = pos[0].p1; pos[2].p2 = refframe->Y[2] + offset + (y >> 2) * t->edged_stride + (x >> 2); pos[2].x = x; pos[2].y = y + 1; pos[3].p1 = pos[0].p1; pos[3].p2 = pos[2].p2 - t->edged_stride; pos[3].x = x; pos[3].y = y - 1; } else if (n == 0 && m == 2) { pos[0].p1 = refframe->Y[0] + offset + (y >> 2) * t->edged_stride + (x >> 2) + 1; pos[0].p2 = refframe->Y[1] + offset + (y >> 2) * t->edged_stride + (x >> 2); pos[0].x = x + 1; pos[0].y = y; pos[1].p1 = pos[0].p1 - 1; pos[1].p2 = pos[0].p2; pos[1].x = x - 1; pos[1].y = y; pos[2].p1 = refframe->Y[3] + offset + (y >> 2) * t->edged_stride + (x >> 2); pos[2].p2 = pos[0].p2; pos[2].x = x; pos[2].y = y + 1; pos[3].p1 = pos[2].p1 - t->edged_stride; pos[3].p2 = pos[2].p2; pos[3].x = x; pos[3].y = y - 1; } else if (n == 2 && m == 0) { pos[0].p1 = refframe->Y[3] + offset + (y >> 2) * t->edged_stride + (x >> 2); pos[0].p2 = refframe->Y[2] + offset + (y >> 2) * t->edged_stride + (x >> 2); pos[0].x = x + 1; pos[0].y = y; pos[1].p1 = pos[0].p1 - 1; pos[1].p2 = pos[0].p2; pos[1].x = x - 1; pos[1].y = y; pos[2].p1 = refframe->Y[0] + offset + ((y >> 2) + 1) * t->edged_stride + (x >> 2); pos[2].p2 = pos[0].p2; pos[2].x = x; pos[2].y = y + 1; pos[3].p1 = pos[2].p1 - t->edged_stride; pos[3].p2 = pos[0].p2; pos[3].x = x; pos[3].y = y - 1; } else { pos[0].p1 = refframe->Y[2] + offset + (y >> 2) * t->edged_stride + (x >> 2) + 1; pos[0].p2 = refframe->Y[3] + offset + (y >> 2) * t->edged_stride + (x >> 2); pos[0].x = x + 1; pos[0].y = y; pos[1].p1 = pos[0].p1 - 1; pos[1].p2 = pos[0].p2; pos[1].x = x - 1; pos[1].y = y; pos[2].p1 = refframe->Y[1] + offset + ((y >> 2) + 1) * t->edged_stride + (x >> 2); pos[2].p2 = pos[0].p2; pos[2].x = x; pos[2].y = y + 1; pos[3].p1 = pos[2].p1 - t->edged_stride; pos[3].p2 = pos[2].p2; pos[3].x = x; pos[3].y = y - 1; } for(i = 0 ; i < 4 ; i ++) { t->pixel_avg(pos[i].p1, pos[i].p2, t->edged_stride, t->edged_stride, data, 16, w, h); sad = t->cmp[mb_part](src, t->stride, data, 16) + t->mb.lambda * (eg_size_se(&t->bs, pos[i].x - vec_median[0].x) + eg_size_se(&t->bs, pos[i].y - vec_median[0].y)); if (sad < sad_org) { sad_org = sad; vec[0].x = pos[i].x; vec[0].y = pos[i].y; t->memcpy_stride_u(data, w, h, 16, residual, 16); } } } sad = sad_org; } else { // x & y always integer pel t->memcpy_stride_u(ref, w, h, t->edged_stride, residual, 16); } return sad; } void T264_pixel_avg_c(uint8_t* p1, uint8_t* p2, int32_t p1_stride, int32_t p2_stride, uint8_t* dst, int32_t dst_stride, int32_t w, int32_t h) { int32_t i, j; for(i = 0 ; i < h ; i ++) { for(j = 0 ; j < w ; j ++) { dst[j] = (p1[j] + p2[j] + 1) >> 1; } p1 += p1_stride; p2 += p2_stride; dst+= dst_stride; } } void T264_eighth_pixel_mc_u_c(uint8_t* src, int32_t src_stride, uint8_t* dst, int16_t mvx, int16_t mvy, int32_t width, int32_t height) { int32_t x, y; int32_t i, j; x = mvx & 0x7; y = mvy & 0x7; for (i = 0 ; i < height ; i ++) { for(j = 0 ; j < width ; j ++) { dst[j] = ((8 - x) * (8 - y) * src[j] + x * (8 - y) * src[j + 1] + (8 - x) * y * src[j + src_stride] + x * y * src[j + src_stride+ 1] + 32) >> 6; } src += src_stride; dst += 8; } } static __inline int32_t tapfilter_h(uint8_t* p) { return p[-2] - 5 * p[-1] + 20 * p[0] + 20 * p[1] - 5 * p[2] + p[3]; } void interpolate_halfpel_h_c(uint8_t* src, int32_t src_stride, uint8_t* dst, int32_t dst_stride, int32_t width, int32_t height) { int32_t i, j; int32_t tmp; for (i = 0 ; i < height ; i ++) { for (j = 0 ; j < width ; j ++) { tmp = (tapfilter_h(src + j) + 16) >> 5; dst[j] = CLIP1(tmp); } src += src_stride; dst += dst_stride; } } static __inline int32_t tapfilter_v(uint8_t* p, int32_t stride) { return p[-2 * stride] - 5 * p[-stride] + 20 * p[0] + 20 * p[stride] - 5 * p[2 * stride] + p[3 * stride]; } void interpolate_halfpel_v_c(uint8_t* src, int32_t src_stride, uint8_t* dst, int32_t dst_stride, int32_t width, int32_t height) { int32_t i, j; int32_t tmp; for (i = 0 ; i < height ; i ++) { for (j = 0 ; j < width ; j ++) { tmp = (tapfilter_v(src + j, src_stride) + 16) >> 5; dst[j] = CLIP1(tmp); } src += src_stride; dst += dst_stride; } } // use vertical to generate this pic void interpolate_halfpel_hv_c(uint8_t* src, int32_t src_stride, uint8_t* dst, int32_t dst_stride, int32_t width, int32_t height) { int32_t i, j; int32_t tmp; for (i = 0 ; i < height ; i ++) { for (j = 0 ; j < width ; j ++) { tmp = ( (src[j - 2 - 2 * src_stride] - 5 * src[j - 1 - 2 * src_stride] + 20 * src[j - 2 * src_stride] + 20 * src[j + 1 - 2 * src_stride] - 5 * src[j + 2 - 2 * src_stride] + src[j + 3 - 2 * src_stride]) + (-5) * (src[j - 2 - 1 * src_stride] - 5 * src[j - 1 - 1 * src_stride] + 20 * src[j - 1 * src_stride] + 20 * src[j + 1 - 1 * src_stride] - 5 * src[j + 2 - 1 * src_stride] + src[j + 3 - 1 * src_stride]) + (20) * (src[j - 2 - 0 * src_stride] - 5 * src[j - 1 - 0 * src_stride] + 20 * src[j - 0 * src_stride] + 20 * src[j + 1 - 0 * src_stride] - 5 * src[j + 2 - 0 * src_stride] + src[j + 3 - 0 * src_stride]) + (20) * (src[j - 2 + 1 * src_stride] - 5 * src[j - 1 + 1 * src_stride] + 20 * src[j + 1 * src_stride] + 20 * src[j + 1 + 1 * src_stride] - 5 * src[j + 2 + 1 * src_stride] + src[j + 3 + 1 * src_stride]) + (-5) * (src[j - 2 + 2 * src_stride] - 5 * src[j - 1 + 2 * src_stride] + 20 * src[j + 2 * src_stride] + 20 * src[j + 1 + 2 * src_stride] - 5 * src[j + 2 + 2 * src_stride] + src[j + 3 + 2 * src_stride]) + (src[j - 2 + 3 * src_stride] - 5 * src[j - 1 + 3 * src_stride] + 20 * src[j + 3 * src_stride] + 20 * src[j + 1 + 3 * src_stride] - 5 * src[j + 2 + 3 * src_stride] + src[j + 3 + 3 * src_stride]) + 512) >> 10; dst[j] = CLIP1(tmp); } src += src_stride; dst += dst_stride; } }