www.pudn.com > T264-src-0.02.zip > estimation.c
/***************************************************************************** * * T264 AVC CODEC * * Copyright(C) 2004-2005 llcc* 2004-2005 visionany * * This program is free software ; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation ; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY ; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program ; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * ****************************************************************************/ #include "stdio.h" #include "T264.h" #include "estimation.h" #include "memory.h" #include "assert.h" #define MAX_ITER_SDSP 50 //overlapped check points static int32_t check_vec(int32_t i, T264_vector_t* vec) { int32_t j; for(j = 0 ; j < i ; j ++) { if (vec[i].x == vec[j].x && vec[i].y == vec[j].y && vec[i].refno == vec[j].refno) return 1; } return 0; } static int32_t check_all_vec(T264_vector_t checked_pts[10], T264_vector_t* vec) { int32_t i; for(i = 0 ; i < 10 ; i ++) { if (vec->x == checked_pts[i].x && vec->y == checked_pts[i].y && checked_pts[i].refno != -2) return 1; } return 0; } uint32_t T264_search(T264_t* t, T264_search_context_t* context) { uint32_t sad = -1; int32_t i; int32_t best = 0; T264_vector_t mv_pred; int16_t mb_xy = t->mb.mb_xy; int16_t mb_x = t->mb.mb_x; int16_t mb_y = t->mb.mb_y; int32_t height = context->height; int32_t width = context->width; int32_t limit_x = context->limit_x; int32_t limit_y = context->limit_y; int32_t stride_cur = t->stride; int32_t stride_ref = t->edged_stride; // start point of current and reference block int32_t row = context->offset / t->edged_stride; int32_t col = context->offset % t->edged_stride; uint8_t* cur = t->cur.Y[0] + row * stride_cur + col; uint8_t* ref_st = t->refl0[0]->Y[0] + row * stride_ref + col; uint8_t* ref; int8_t best_ref_no; //adaptive thresholds uint32_t th0, th1; th0 = height * width; //256 for median predictor (16x16) // try median vector if (context->vec[0].refno >= 0) { // check this predictor mv_pred.refno = context->vec[0].refno; mv_pred.x = context->vec[0].x >> 2; mv_pred.y = context->vec[0].y >> 2; ref_st = t->refl0[mv_pred.refno]->Y[0] + row * stride_ref + col; ref = ref_st + mv_pred.y * stride_ref + mv_pred.x; sad = t->sad[context->mb_part](cur, stride_cur, ref, stride_ref) + t->mb.lambda * (eg_size_se(&t->bs, (mv_pred.x << 2) - context->vec[0].x) + eg_size_se(&t->bs, (mv_pred.y << 2) - context->vec[0].y)); if (sad < th0) { context->vec_best = context->vec[0]; return sad; } } // try other predictors (set A) for (i = 1 ; i < context->vec_num ; i ++) { if (context->vec[i].refno >= 0) { if (!check_vec(i, context->vec)) //not checked before { uint32_t cursad; // check this predictor mv_pred.refno = context->vec[i].refno; mv_pred.x = context->vec[i].x >> 2; mv_pred.y = context->vec[i].y >> 2; ref_st = t->refl0[mv_pred.refno]->Y[0] + row * stride_ref + col; ref = ref_st + mv_pred.y * stride_ref + mv_pred.x; cursad = t->sad[context->mb_part](cur, stride_cur, ref, stride_ref) + t->mb.lambda * (eg_size_se(&t->bs, (mv_pred.x << 2) - context->vec[0].x) + eg_size_se(&t->bs, (mv_pred.y << 2) - context->vec[0].y)); if (cursad < sad) { best = i; sad = cursad; } } } } //fixed th1 = 1024; //adaptive minimum sad(spatial 3) + 100 // th1 = T264_MIN( T264_MIN(t->mb.sad_ref[0], t->mb.sad_ref[1]), t->mb.sad_ref[2] ); context->vec_best = context->vec[best]; if (sad < th0) return sad; // ref_st of best reference frame best_ref_no = context->vec[best].refno; ref_st = t->refl0[best_ref_no]->Y[0] + row * stride_ref + col; // diamond search sad = diamond_search(t, cur, ref_st, context, stride_cur, stride_ref, sad); return sad; } uint32_t diamond_search(T264_t* t, uint8_t* cur, uint8_t* ref_st, T264_search_context_t* context, int32_t stride_cur, int32_t stride_ref, uint32_t sad) { int32_t width = context->width; int32_t height = context->height; int32_t limit_x = context->limit_x; int32_t limit_y = context->limit_y; //start mv int32_t mvx = context->vec_best.x >> 2; int32_t mvy = context->vec_best.y >> 2; //sdsp T264_vector_t sdsp[4]; // sad for start mv uint32_t cursad; int32_t best; uint8_t* ref; uint8_t stop = 0; int32_t i; // checked pts T264_vector_t checked_pts[10]; int32_t checked_no = 0; memset(checked_pts, -2, sizeof(checked_pts)); checked_pts[0] = context->vec_best; sdsp[0].refno = sdsp[1].refno = sdsp[2].refno = sdsp[3].refno = 0; while(!stop) { best = -1; sdsp[0].x = mvx - 1; sdsp[0].y = mvy; sdsp[1].x = mvx + 1; sdsp[1].y = mvy; sdsp[2].x = mvx; sdsp[2].y = mvy - 1; sdsp[3].x = mvx; sdsp[3].y = mvy + 1; // search 4 points of sdsp for(i = 0; i < 4; i++) { if (!check_all_vec(checked_pts, &sdsp[i])) //not checked before { checked_no = (checked_no + 1) % 10; checked_pts[checked_no] = sdsp[i]; ref = ref_st + sdsp[i].y * stride_ref + sdsp[i].x; cursad = t->sad[context->mb_part](cur, stride_cur, ref, stride_ref) + t->mb.lambda * (eg_size_se(&t->bs, (sdsp[i].x << 2) - context->vec[0].x) + eg_size_se(&t->bs, (sdsp[i].y << 2) - context->vec[0].y)); if(cursad < sad) { sad = cursad; best = i; } } } // not in center, best != -1 if( best == -1) { stop = 1; } else { mvx = sdsp[best].x; mvy = sdsp[best].y; if (mvx == limit_x || mvx == -limit_x || mvy == limit_y || mvy == -limit_y) { stop = 1; } } } // final mv ref = ref_st + mvy * stride_ref + mvx; context->vec_best.x = mvx << 2; context->vec_best.y = mvy << 2; // mostly we use sad as cmp function if (t->cmp[context->mb_part] == t->sad[context->mb_part]) return sad; sad = t->cmp[context->mb_part](cur, stride_cur, ref, stride_ref) + t->mb.lambda * (eg_size_se(&t->bs, context->vec_best.x - context->vec[0].x) + eg_size_se(&t->bs, context->vec_best.y - context->vec[0].y)); return sad; } /* * Full Search */ uint32_t T264_search_full(T264_t* t, T264_search_context_t* context) { uint32_t sad; uint32_t cursad; int32_t i, j; int16_t mb_xy = t->mb.mb_xy; int16_t mb_x = t->mb.mb_x; int16_t mb_y = t->mb.mb_y; int32_t height = context->height; int32_t width = context->width; int32_t limit_x = context->limit_x; int32_t limit_y = context->limit_y; int32_t stride_cur = t->stride; int32_t stride_ref = t->edged_stride; // start point of current and reference block int32_t row = context->offset / t->edged_stride; int32_t col = context->offset % t->edged_stride; uint8_t* cur = t->cur.Y[0] + row * stride_cur + col; uint8_t* ref_st = t->refl0[0]->Y[0] + row * stride_ref + col; uint8_t* ref; context->vec_best.refno = 0; // full search sad = width * height * 255; for(i = -limit_y + (context->vec[0].y >> 2); i <= (limit_y + (context->vec[0].y >> 2)) ; i++) for(j = -limit_x + (context->vec[0].x >> 2); j <= (limit_x + (context->vec[0].x >> 2)) ; j++) { ref = ref_st + i * stride_ref + j; cursad = t->sad[context->mb_part](cur, stride_cur, ref, stride_ref) + t->mb.lambda * (eg_size_se(&t->bs, (j << 2) - context->vec[0].x) + eg_size_se(&t->bs, (i << 2) - context->vec[0].y)); if(cursad < sad) { sad = cursad; context->vec_best.y = i; context->vec_best.x = j; } } ref = ref_st + context->vec_best.y * stride_ref + context->vec_best.x; context->vec_best.y <<= 2; context->vec_best.x <<= 2; sad = t->cmp[context->mb_part](cur, t->stride, ref, t->edged_stride) + t->mb.lambda * (eg_size_se(&t->bs, context->vec_best.x - context->vec[0].x) + eg_size_se(&t->bs, context->vec_best.y - context->vec[0].y)); return sad; } // xxx, never used, just for compare to jm80. uint32_t T264_spiral_search_full(T264_t* t, T264_search_context_t* context) { uint32_t sad; uint32_t cursad; int32_t i, j, k, l; int16_t mb_xy = t->mb.mb_xy; int16_t mb_x = t->mb.mb_x; int16_t mb_y = t->mb.mb_y; int32_t height = context->height; int32_t width = context->width; int32_t limit_x = context->limit_x; int32_t limit_y = context->limit_y; int32_t stride_cur = t->stride; int32_t stride_ref = t->edged_stride; // start point of current and reference block int32_t row = context->offset / t->edged_stride; int32_t col = context->offset % t->edged_stride; uint8_t* cur = t->cur.Y[0] + row * stride_cur + col; uint8_t* ref_st = t->refl0[0]->Y[0] + row * stride_ref + col; uint8_t* ref; int32_t spiral_search_x[33 * 33]; int32_t spiral_search_y[33 * 33]; context->vec_best.refno = 0; spiral_search_x[0] = spiral_search_y[0] = 0; for (k=1, l=1; l<=T264_MAX(1,16); l++) { for (i=-l+1; i< l; i++) { spiral_search_x[k] = l; spiral_search_y[k++] = i; spiral_search_x[k] = -l; spiral_search_y[k++] = i; } for (i=-l; i<=l; i++) { spiral_search_x[k] = i; spiral_search_y[k++] = l; spiral_search_x[k] = i; spiral_search_y[k++] = -l; } } // full search sad = width * height * 255; for(k = 0 ; k < 33 * 33 ; k ++) { i = (context->vec[0].y / 4) + spiral_search_y[k]; j = (context->vec[0].x / 4) + spiral_search_x[k]; ref = ref_st + i * stride_ref + j; cursad = t->sad[context->mb_part](cur, stride_cur, ref, stride_ref) + t->mb.lambda * (eg_size_se(&t->bs, (j << 2) - context->vec[0].x) + eg_size_se(&t->bs, (i << 2) - context->vec[0].y)); if(cursad < sad) { sad = cursad; context->vec_best.y = i; context->vec_best.x = j; } } ref = ref_st + context->vec_best.y * stride_ref + context->vec_best.x; context->vec_best.y <<= 2; context->vec_best.x <<= 2; sad = t->cmp[context->mb_part](cur, t->stride, ref, t->edged_stride) + t->mb.lambda * (eg_size_se(&t->bs, context->vec_best.x - context->vec[0].x) + eg_size_se(&t->bs, context->vec_best.y - context->vec[0].y)); return sad; }