www.pudn.com > T264-src-0.02.zip > estimation.c


/***************************************************************************** 
 * 
 *  T264 AVC CODEC 
 * 
 *  Copyright(C) 2004-2005 llcc  
 *               2004-2005 visionany  
 * 
 *  This program is free software ; you can redistribute it and/or modify 
 *  it under the terms of the GNU General Public License as published by 
 *  the Free Software Foundation ; either version 2 of the License, or 
 *  (at your option) any later version. 
 * 
 *  This program is distributed in the hope that it will be useful, 
 *  but WITHOUT ANY WARRANTY ; without even the implied warranty of 
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 *  GNU General Public License for more details. 
 * 
 *  You should have received a copy of the GNU General Public License 
 *  along with this program ; if not, write to the Free Software 
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA 
 * 
 ****************************************************************************/ 
 
#include "stdio.h" 
#include "T264.h" 
#include "estimation.h" 
#include "memory.h" 
#include "assert.h" 
 
#define MAX_ITER_SDSP 50 
 
//overlapped check points  
static int32_t 
check_vec(int32_t i, T264_vector_t* vec) 
{ 
    int32_t j; 
 
    for(j = 0 ; j < i ; j ++) 
    { 
        if (vec[i].x == vec[j].x && vec[i].y == vec[j].y && vec[i].refno == vec[j].refno) 
            return 1; 
    } 
 
    return 0; 
} 
 
static int32_t 
check_all_vec(T264_vector_t checked_pts[10], T264_vector_t* vec) 
{ 
    int32_t i; 
 
    for(i = 0 ; i < 10 ; i ++) 
    { 
        if (vec->x == checked_pts[i].x && vec->y == checked_pts[i].y && checked_pts[i].refno != -2) 
            return 1; 
    } 
 
    return 0; 
} 
 
uint32_t 
T264_search(T264_t* t, T264_search_context_t* context) 
{ 
    uint32_t sad = -1; 
    int32_t i; 
    int32_t best = 0; 
 
	T264_vector_t mv_pred; 
 
	int16_t mb_xy = t->mb.mb_xy; 
	int16_t mb_x = t->mb.mb_x; 
	int16_t mb_y = t->mb.mb_y; 
	int32_t height = context->height; 
	int32_t width = context->width; 
	int32_t limit_x = context->limit_x; 
	int32_t limit_y = context->limit_y; 
	int32_t stride_cur = t->stride; 
	int32_t stride_ref = t->edged_stride; 
 
	// start point of current and reference block  
	int32_t row = context->offset / t->edged_stride; 
	int32_t col = context->offset % t->edged_stride; 
	uint8_t* cur = t->cur.Y[0] + row * stride_cur + col; 
	uint8_t* ref_st = t->refl0[0]->Y[0] + row * stride_ref + col; 
 
	uint8_t* ref; 
	int8_t best_ref_no; 
 
	//adaptive thresholds 
	uint32_t th0, th1; 
	th0 = height * width; //256 for median predictor (16x16)	 
	 
    // try median vector 
    if (context->vec[0].refno >= 0) 
    { 
        // check this predictor 
		mv_pred.refno = context->vec[0].refno; 
		mv_pred.x = context->vec[0].x >> 2; 
		mv_pred.y = context->vec[0].y >> 2; 
		ref_st = t->refl0[mv_pred.refno]->Y[0] + row * stride_ref + col; 
		ref = ref_st + mv_pred.y * stride_ref + mv_pred.x; 
		sad = t->sad[context->mb_part](cur, stride_cur, ref, stride_ref) + 
                t->mb.lambda * (eg_size_se(&t->bs, (mv_pred.x << 2) - context->vec[0].x) +  
                                eg_size_se(&t->bs, (mv_pred.y << 2) - context->vec[0].y)); 
        if (sad < th0) 
        { 
            context->vec_best = context->vec[0]; 
            return sad; 
        } 
    } 
 
	// try other predictors (set A) 
    for (i = 1 ; i < context->vec_num ; i ++) 
    { 
        if (context->vec[i].refno >= 0) 
        { 
            if (!check_vec(i, context->vec)) //not checked before 
            { 
                uint32_t cursad; 
			    // check this predictor 
			    mv_pred.refno = context->vec[i].refno; 
			    mv_pred.x = context->vec[i].x >> 2; 
			    mv_pred.y = context->vec[i].y >> 2; 
			    ref_st = t->refl0[mv_pred.refno]->Y[0] + row * stride_ref + col; 
			    ref = ref_st + mv_pred.y * stride_ref + mv_pred.x; 
			    cursad = t->sad[context->mb_part](cur, stride_cur, ref, stride_ref) + 
                    t->mb.lambda * (eg_size_se(&t->bs, (mv_pred.x << 2) - context->vec[0].x) +  
                                    eg_size_se(&t->bs, (mv_pred.y << 2) - context->vec[0].y)); 
 
                if (cursad < sad) 
                { 
                    best = i; 
                    sad = cursad; 
                } 
            } 
        } 
    } 
	//fixed 
	th1 = 1024;  
	//adaptive minimum sad(spatial 3) + 100 
//	th1 = T264_MIN( T264_MIN(t->mb.sad_ref[0], t->mb.sad_ref[1]), t->mb.sad_ref[2] ); 
	 
	context->vec_best = context->vec[best]; 
 
	if (sad < th0) 
	    return sad; 
 
	// ref_st of best reference frame 
	best_ref_no = context->vec[best].refno;	 
	ref_st = t->refl0[best_ref_no]->Y[0] + row * stride_ref + col; 
    // diamond search  
    sad = diamond_search(t, cur, ref_st, context, stride_cur, stride_ref, sad); 
    return sad;	 
} 
 
uint32_t  
diamond_search(T264_t* t, uint8_t* cur, uint8_t* ref_st, T264_search_context_t* context, int32_t stride_cur, int32_t stride_ref, uint32_t sad) 
{ 
	int32_t width = context->width; 
	int32_t height = context->height; 
	int32_t limit_x = context->limit_x; 
	int32_t limit_y = context->limit_y; 
	//start mv 
	int32_t mvx = context->vec_best.x >> 2; 
	int32_t mvy = context->vec_best.y >> 2; 
	//sdsp 
	T264_vector_t sdsp[4]; 
	// sad for start mv 
	uint32_t cursad; 
	int32_t best; 
	uint8_t* ref; 
	uint8_t stop = 0; 
	int32_t i; 
	// checked pts 
    T264_vector_t checked_pts[10]; 
	int32_t checked_no = 0; 
 
    memset(checked_pts, -2, sizeof(checked_pts)); 
    checked_pts[0] = context->vec_best; 
    sdsp[0].refno = sdsp[1].refno = sdsp[2].refno = sdsp[3].refno = 0; 
 
	while(!stop) 
	{ 
		best = -1; 
		sdsp[0].x = mvx - 1; sdsp[0].y = mvy; 
		sdsp[1].x = mvx + 1; sdsp[1].y = mvy; 
		sdsp[2].x = mvx; sdsp[2].y = mvy - 1; 
		sdsp[3].x = mvx; sdsp[3].y = mvy + 1; 
		// search 4 points of sdsp 
		for(i = 0; i < 4; i++) 
		{ 
			if (!check_all_vec(checked_pts, &sdsp[i])) //not checked before 
			{ 
                checked_no = (checked_no + 1) % 10; 
                checked_pts[checked_no] = sdsp[i]; 
				ref = ref_st + sdsp[i].y * stride_ref + sdsp[i].x;	 
				cursad = t->sad[context->mb_part](cur, stride_cur, ref, stride_ref) + 
                    t->mb.lambda * (eg_size_se(&t->bs, (sdsp[i].x << 2) - context->vec[0].x) +  
                                    eg_size_se(&t->bs, (sdsp[i].y << 2) - context->vec[0].y)); 
 
				if(cursad < sad) 
				{ 
					sad = cursad; 
					best = i; 
				} 
			} 
		} 
		// not in center, best != -1 
		if( best == -1) 
        { 
            stop = 1; 
        } 
		else 
		{ 
			mvx = sdsp[best].x; 
			mvy = sdsp[best].y; 
            if (mvx == limit_x || mvx == -limit_x || 
                mvy == limit_y || mvy == -limit_y) 
            { 
                stop = 1; 
            } 
        } 
	} 
 
	// final mv 
    ref = ref_st + mvy * stride_ref + mvx; 
	context->vec_best.x = mvx << 2; 
	context->vec_best.y = mvy << 2; 
    // mostly we use sad as cmp function 
    if (t->cmp[context->mb_part] == t->sad[context->mb_part]) 
        return sad; 
 
    sad = t->cmp[context->mb_part](cur, stride_cur, ref, stride_ref) + 
        t->mb.lambda * (eg_size_se(&t->bs, context->vec_best.x - context->vec[0].x) +  
						eg_size_se(&t->bs, context->vec_best.y - context->vec[0].y)); 
   return sad; 
} 
 
/* 
 *	Full Search 
 */ 
 
uint32_t 
T264_search_full(T264_t* t, T264_search_context_t* context) 
{ 
    uint32_t sad; 
	uint32_t cursad; 
    int32_t i, j; 
 
	int16_t mb_xy = t->mb.mb_xy; 
	int16_t mb_x = t->mb.mb_x; 
	int16_t mb_y = t->mb.mb_y; 
	int32_t height = context->height; 
	int32_t width = context->width; 
	int32_t limit_x = context->limit_x; 
	int32_t limit_y = context->limit_y; 
	int32_t stride_cur = t->stride; 
	int32_t stride_ref = t->edged_stride; 
 
	// start point of current and reference block  
	int32_t row = context->offset / t->edged_stride; 
	int32_t col = context->offset % t->edged_stride; 
	uint8_t* cur = t->cur.Y[0] + row * stride_cur + col; 
	uint8_t* ref_st = t->refl0[0]->Y[0] + row * stride_ref + col;	 
	uint8_t* ref; 
	context->vec_best.refno = 0; 
	 
	// full search 
	sad = width * height * 255; 
	for(i = -limit_y + (context->vec[0].y >> 2); i <= (limit_y + (context->vec[0].y >> 2)) ; i++) 
		for(j = -limit_x + (context->vec[0].x >> 2); j <= (limit_x + (context->vec[0].x >> 2)) ; j++) 
		{ 
			ref = ref_st + i * stride_ref + j; 
			cursad = t->sad[context->mb_part](cur, stride_cur, ref, stride_ref) + 
                t->mb.lambda * (eg_size_se(&t->bs, (j << 2) - context->vec[0].x) +  
                                eg_size_se(&t->bs, (i << 2) - context->vec[0].y)); 
			if(cursad < sad) 
			{ 
				sad = cursad; 
				context->vec_best.y = i; 
				context->vec_best.x = j; 
			} 
		} 
 
    ref = ref_st + context->vec_best.y * stride_ref + context->vec_best.x; 
    context->vec_best.y <<= 2; 
    context->vec_best.x <<= 2; 
 
    sad = t->cmp[context->mb_part](cur, t->stride, ref, t->edged_stride) + 
        t->mb.lambda * (eg_size_se(&t->bs, context->vec_best.x - context->vec[0].x) +  
						eg_size_se(&t->bs, context->vec_best.y - context->vec[0].y)); 
	 
    return sad;	 
} 
 
// xxx, never used, just for compare to jm80. 
uint32_t 
T264_spiral_search_full(T264_t* t, T264_search_context_t* context) 
{ 
    uint32_t sad; 
    uint32_t cursad; 
    int32_t i, j, k, l; 
 
    int16_t mb_xy = t->mb.mb_xy; 
    int16_t mb_x = t->mb.mb_x; 
    int16_t mb_y = t->mb.mb_y; 
    int32_t height = context->height; 
    int32_t width = context->width; 
    int32_t limit_x = context->limit_x; 
    int32_t limit_y = context->limit_y; 
    int32_t stride_cur = t->stride; 
    int32_t stride_ref = t->edged_stride; 
 
    // start point of current and reference block  
    int32_t row = context->offset / t->edged_stride; 
    int32_t col = context->offset % t->edged_stride; 
    uint8_t* cur = t->cur.Y[0] + row * stride_cur + col; 
    uint8_t* ref_st = t->refl0[0]->Y[0] + row * stride_ref + col;	 
    uint8_t* ref; 
    int32_t spiral_search_x[33 * 33]; 
    int32_t spiral_search_y[33 * 33]; 
    context->vec_best.refno = 0; 
 
    spiral_search_x[0] = spiral_search_y[0] = 0; 
    for (k=1, l=1; l<=T264_MAX(1,16); l++) 
    { 
        for (i=-l+1; i< l; i++) 
        { 
            spiral_search_x[k] = l;  spiral_search_y[k++] =  i; 
            spiral_search_x[k] =  -l;  spiral_search_y[k++] =  i; 
        } 
        for (i=-l;   i<=l; i++) 
        { 
            spiral_search_x[k] =  i;  spiral_search_y[k++] = l; 
            spiral_search_x[k] =  i;  spiral_search_y[k++] =  -l; 
        } 
    } 
 
    // full search 
    sad = width * height * 255; 
    for(k = 0 ; k < 33 * 33 ; k ++) 
    { 
        i = (context->vec[0].y / 4) + spiral_search_y[k]; 
        j = (context->vec[0].x / 4) + spiral_search_x[k]; 
 
        ref = ref_st + i * stride_ref + j; 
        cursad = t->sad[context->mb_part](cur, stride_cur, ref, stride_ref) + 
            t->mb.lambda * (eg_size_se(&t->bs, (j << 2) - context->vec[0].x) +  
            eg_size_se(&t->bs, (i << 2) - context->vec[0].y)); 
        if(cursad < sad) 
        { 
            sad = cursad; 
            context->vec_best.y = i; 
            context->vec_best.x = j; 
        } 
    } 
 
    ref = ref_st + context->vec_best.y * stride_ref + context->vec_best.x; 
    context->vec_best.y <<= 2; 
    context->vec_best.x <<= 2; 
 
    sad = t->cmp[context->mb_part](cur, t->stride, ref, t->edged_stride) + 
        t->mb.lambda * (eg_size_se(&t->bs, context->vec_best.x - context->vec[0].x) +  
        eg_size_se(&t->bs, context->vec_best.y - context->vec[0].y)); 
 
    return sad;	 
}