www.pudn.com > T264-src-0.02.zip > intra.c
/***************************************************************************** * * T264 AVC CODEC * * Copyright(C) 2004-2005 llcc* 2004-2005 visionany * * This program is free software ; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation ; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY ; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program ; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * ****************************************************************************/ #include "stdio.h" #include "memory.h" #include "t264.h" #include "intra.h" #include "utility.h" #include "cavlc.h" // // NOTE: (t->flags & (INTRA_16x16 | INTRA_4x4)) != 0 // uint32_t T264_mode_decision_intra_y(_RW T264_t* t) { uint32_t sad16x16 = -1; uint32_t sad4x4 = -1; if (t->flags & USE_INTRA16x16) sad16x16 = T264_mode_decision_intra_16x16(t); if (t->flags & USE_INTRA4x4) sad4x4 = T264_mode_decision_intra_4x4(t); if (sad16x16 < sad4x4) { t->mb.mb_mode = I_16x16; t->mb.sad = sad16x16; } else { t->mb.mb_mode = I_4x4; t->mb.sad = sad4x4; } return t->mb.sad; } uint32_t T264_mode_decision_intra_16x16(_RW T264_t* t) { DECLARE_ALIGNED_MATRIX(pred16x16, 16, 16, uint8_t, CACHE_SIZE); DECLARE_ALIGNED_MATRIX(topcache, 1, 16 + CACHE_SIZE, uint8_t, CACHE_SIZE); DECLARE_ALIGNED_MATRIX(leftcache, 1, 16 + CACHE_SIZE, uint8_t, CACHE_SIZE); uint32_t sad16x16 = -1; uint8_t* pred16x16free0 = pred16x16; uint8_t* pred16x16free1 = t->mb.pred_i16x16; int32_t modes; int32_t bestmode; int32_t preds[9]; int32_t i; uint8_t* top, *left; static uint8_t fixmode[] = { Intra_16x16_TOP, Intra_16x16_LEFT, Intra_16x16_DC, Intra_16x16_PLANE, Intra_16x16_DC, Intra_16x16_DC, Intra_16x16_DC }; top = &topcache[CACHE_SIZE]; left = &leftcache[CACHE_SIZE]; T264_intra_16x16_available(t, preds, &modes, top, left); for(i = 0 ; i < modes ; i ++) { int32_t mode = preds[i]; uint32_t sad; // // pred // t->pred16x16[mode]( pred16x16free1, 16, top, left); // Now use satd for 16x16 Intra // Thomascatlee@163.com sad = t->T264_satd_16x16_u(t->mb.src_y, t->stride, pred16x16free1, 16) + t->mb.lambda * eg_size_ue(&t->bs, fixmode[mode]); if (sad < sad16x16) { SWAP(uint8_t, pred16x16free0, pred16x16free1); sad16x16 = sad; bestmode = mode; } } if (pred16x16free0 != t->mb.pred_i16x16) { memcpy(t->mb.pred_i16x16, pred16x16free0, sizeof(t->mb.pred_i16x16)); } //fixed prediction mode DCLEFT DCTOP DC128 = DC t->mb.mode_i16x16 = fixmode[bestmode]; return sad16x16; } uint32_t T264_mode_decision_intra_4x4(T264_t* t) { DECLARE_ALIGNED_MATRIX(pred4x40, 4, 5, uint8_t, CACHE_SIZE); DECLARE_ALIGNED_MATRIX(pred4x41, 4, 5, uint8_t, CACHE_SIZE); DECLARE_ALIGNED_MATRIX(topcache, 8 + CACHE_SIZE, 1, uint8_t, CACHE_SIZE); DECLARE_ALIGNED_MATRIX(leftcache, 4 + CACHE_SIZE, 1, uint8_t, CACHE_SIZE); uint32_t sad_all = 0; uint32_t sad4x4; int32_t i, j; uint8_t* src; uint8_t* dst; uint8_t* p0, *p1; uint8_t* left; uint8_t* top; int32_t preds[9]; int32_t modes; int32_t bestmode; static uint8_t fixmode[] = { Intra_4x4_TOP, Intra_4x4_LEFT, Intra_4x4_DC, Intra_4x4_DIAGONAL_DOWNLEFT, //Intra_4x4_DC, add by cloud Intra_4x4_DIAGONAL_DOWNRIGHT, Intra_4x4_VERTICAL_RIGHT, Intra_4x4_HORIZONTAL_DOWN, Intra_4x4_VERTICAL_LEFT, Intra_4x4_HORIZONTAL_UP, Intra_4x4_DC, Intra_4x4_DC, Intra_4x4_DC }; p0 = pred4x40; p1 = pred4x41; left = &leftcache[CACHE_SIZE]; top = &topcache[CACHE_SIZE]; for(i = 0 ; i < 16 ; i ++) { int32_t row = i / 4; int32_t col = i % 4; int32_t pred_mode; src = t->mb.src_y + (row * t->stride << 2) + (col << 2); dst = t->mb.dst_y + (row * t->edged_stride << 2) + (col << 2); pred_mode = T264_mb_predict_intra4x4_mode(t, luma_index[i]); T264_intra_4x4_available(t, i, preds, &modes, dst, left, top); sad4x4 = -1; for(j = 0 ; j < modes ; j ++) { uint32_t sad; int32_t mode = preds[j]; t->pred4x4[mode](p1, 4, top, left); sad = t->cmp[MB_4x4](src, t->stride, p1, 4) + (pred_mode == fixmode[mode] ? 0 : 4 * t->mb.lambda); //t->mb.lambda * (pred_mode == fixmode[mode] ? 1 : 4); if (sad < sad4x4) { SWAP(uint8_t, p0, p1); sad4x4 = sad; bestmode = mode; } } //fixed prediction mode DCLEFT DCTOP DC128 = DC t->mb.i4x4_pred_mode_ref[IPM_LUMA + col + row * 8] = t->mb.mode_i4x4[luma_index[i]] = fixmode[bestmode]; sad_all += sad4x4; T264_encode_intra_4x4(t, p0, i); } sad_all += t->mb.lambda * 24; return sad_all; } void T264_intra_16x16_available(T264_t* t, int32_t preds[], int32_t* modes, uint8_t* top, uint8_t* left) { uint8_t* p; int32_t i; if ((t->mb.mb_neighbour & (MB_LEFT | MB_TOP)) == (MB_LEFT | MB_TOP)) { preds[0] = Intra_16x16_TOP; preds[1] = Intra_16x16_LEFT; preds[2] = Intra_16x16_DC; preds[3] = Intra_16x16_PLANE; *modes = 4; p = t->mb.dst_y - t->edged_stride; for(i = -1 ; i < 16 ; i ++) { top[i] = p[i]; } p --; for(i = -1 ; i < 16 ; i ++) { left[i] = p[0]; p += t->edged_stride; } } else if(t->mb.mb_neighbour & MB_LEFT) { preds[0] = Intra_16x16_LEFT; preds[1] = Intra_16x16_DCLEFT; *modes = 2; p = t->mb.dst_y - 1; for(i = 0 ; i < 16 ; i ++) { left[i] = p[0]; p += t->edged_stride; } } else if(t->mb.mb_neighbour & MB_TOP) { preds[0] = Intra_16x16_TOP; preds[1] = Intra_16x16_DCTOP; *modes = 2; p = t->mb.dst_y - t->edged_stride; for(i = 0 ; i < 16 ; i ++) { top[i] = p[i]; } } else { preds[0] = Intra_16x16_DC128; *modes = 1; } } void T264_intra_4x4_available(T264_t* t, int32_t idx, int32_t preds[], int32_t* modes, uint8_t* dst, uint8_t* left, uint8_t* top) { static const int32_t neighbour[] = { 0, MB_LEFT, MB_LEFT, MB_LEFT, MB_TOP| MB_TOPRIGHT, MB_LEFT| MB_TOP, MB_LEFT |MB_TOP| MB_TOPRIGHT, MB_LEFT| MB_TOP, MB_TOP| MB_TOPRIGHT, MB_LEFT| MB_TOP| MB_TOPRIGHT, MB_LEFT |MB_TOP| MB_TOPRIGHT, MB_LEFT| MB_TOP, MB_TOP| MB_TOPRIGHT, MB_LEFT| MB_TOP, MB_LEFT |MB_TOP| MB_TOPRIGHT, MB_LEFT| MB_TOP }; static const int32_t fix[] = { ~0, ~0, ~0, ~0, ~0, ~MB_TOPRIGHT, ~0, ~MB_TOPRIGHT, ~0, ~0, ~0, ~MB_TOPRIGHT, ~0, ~MB_TOPRIGHT, ~0, ~MB_TOPRIGHT }; uint8_t* p; int32_t i; int32_t mb_neighbour = (t->mb.mb_neighbour| neighbour[idx]) & fix[idx]; if ((mb_neighbour & (MB_LEFT | MB_TOP)) == (MB_LEFT | MB_TOP)) { preds[0] = Intra_4x4_TOP; preds[1] = Intra_4x4_LEFT; preds[2] = Intra_4x4_DC; //cloud add #if 0 preds[3] = Intra_4x4_DIAGONAL_DOWNLEFT; preds[4] = Intra_4x4_DIAGONAL_DOWNRIGHT; preds[5] = Intra_4x4_VERTICAL_RIGHT; preds[6] = Intra_4x4_HORIZONTAL_DOWN; preds[7] = Intra_4x4_VERTICAL_LEFT; preds[8] = Intra_4x4_HORIZONTAL_UP; *modes = 9; #else // mode 3 & 7 disable preds[3] = Intra_4x4_DIAGONAL_DOWNRIGHT; preds[4] = Intra_4x4_VERTICAL_RIGHT; preds[5] = Intra_4x4_HORIZONTAL_DOWN; preds[6] = Intra_4x4_HORIZONTAL_UP; *modes = 7; #endif p = dst - t->edged_stride; if (mb_neighbour & MB_TOPRIGHT) { for(i = -1 ; i < 8 ; i ++) { top[i] = p[i]; } } else { for(i = -1 ; i < 4 ; i ++) { top[i] = p[i]; } //cloud add,to fill padded 4 positions for( ; i < 8 ; ++ i) top[i] = p[3]; } p --; for(i = -1 ; i < 4 ; i ++) { left[i] = p[0]; p += t->edged_stride; } } else if(mb_neighbour & MB_LEFT) { preds[0] = Intra_4x4_LEFT; preds[1] = Intra_4x4_DCLEFT; //cloud add preds[2] = Intra_4x4_HORIZONTAL_UP; *modes = 2;//3; p = dst - 1; for(i = 0 ; i < 4 ; i ++) { left[i] = p[0]; p += t->edged_stride; } } else if(mb_neighbour & MB_TOP) { preds[0] = Intra_4x4_TOP; preds[1] = Intra_4x4_DCTOP; //cloud add #if 0 preds[2] = Intra_4x4_DIAGONAL_DOWNLEFT; preds[3] = Intra_4x4_VERTICAL_LEFT; *modes = 4; #else *modes = 2; #endif p = dst - t->edged_stride; if (mb_neighbour & MB_TOPRIGHT) { for(i = -1 ; i < 8 ; i ++) { top[i] = p[i]; } } else { for(i = -1 ; i < 4 ; i ++) { top[i] = p[i]; } //cloud add,to fill padded 4 positions for( ; i < 8 ; ++ i) top[i] = p[3]; } } else { preds[0] = Intra_4x4_DC128; *modes = 1; } } void T264_encode_intra_y(_RW T264_t* t) { if (t->mb.mb_mode == I_16x16) { T264_encode_intra_16x16(t); } else if (t->mb.mb_mode == I_4x4) { } } void T264_encode_intra_16x16(_RW T264_t* t) { DECLARE_ALIGNED_MATRIX(dct, 17, 16, int16_t, 16); int32_t qp = t->qp_y; int32_t i; int16_t* curdct; t->expand8to16sub(t->mb.pred_i16x16, 16 / 4, 16 / 4, dct, t->mb.src_y, t->stride); curdct = dct; for(i = 0 ; i < 16 ; i ++) { t->fdct4x4(curdct); dct[256 + i] = curdct[0]; t->quant4x4(curdct, qp, TRUE); scan_zig_4x4(t->mb.dct_y_z[luma_index[i]], curdct); t->iquant4x4(curdct, qp); curdct += 16; } t->fdct4x4dc(curdct); t->quant4x4dc(curdct, qp); scan_zig_4x4(t->mb.dc4x4_z, curdct); // i don't know why to do so, if someone knows tell me. t->idct4x4dc(curdct); t->iquant4x4dc(curdct, qp); curdct = dct; for(i = 0 ; i < 16 ; i ++) { curdct[0] = dct[256 + i]; t->idct4x4(curdct); curdct += 16; } t->contract16to8add(dct, 16 / 4, 16 / 4, t->mb.pred_i16x16, t->mb.dst_y, t->edged_stride); } void T264_encode_intra_4x4(_RW T264_t* t, uint8_t* pred, int32_t i) { DECLARE_ALIGNED_MATRIX(dct, 1, 16, int16_t, 16); int32_t qp = t->qp_y; int32_t row = i / 4; int32_t col = i % 4; //residual saved in t->pred_16x16_4x4 uint8_t* src = t->mb.src_y + (row * t->stride << 2) + (col << 2); //reconstructed MB saved in t->dst uint8_t* dst = t->mb.dst_y + (row * t->edged_stride << 2) + (col << 2); t->expand8to16sub(pred, 4 / 4, 4 / 4, dct, src, t->stride); t->fdct4x4(dct); t->quant4x4(dct, qp, t->slice_type == SLICE_I); scan_zig_4x4(t->mb.dct_y_z[luma_index[i]], dct); t->iquant4x4(dct, qp); t->idct4x4(dct); t->contract16to8add(dct, 4 / 4, 4 / 4, pred, dst, t->edged_stride); } uint32_t T264_mode_decision_intra_uv(_RW T264_t* t) { DECLARE_ALIGNED_MATRIX(pred8x8u, 8, 8, uint8_t, CACHE_SIZE); DECLARE_ALIGNED_MATRIX(pred8x8v, 8, 8, uint8_t, CACHE_SIZE); DECLARE_ALIGNED_MATRIX(topcacheu, 1, 8 + CACHE_SIZE, uint8_t, CACHE_SIZE); DECLARE_ALIGNED_MATRIX(leftcacheu, 1, 8 + CACHE_SIZE, uint8_t, CACHE_SIZE); DECLARE_ALIGNED_MATRIX(topcachev, 1, 8 + CACHE_SIZE, uint8_t, CACHE_SIZE); DECLARE_ALIGNED_MATRIX(leftcachev, 1, 8 + CACHE_SIZE, uint8_t, CACHE_SIZE); uint32_t sad8x8 = -1; uint8_t* pred8x8freeu0 = pred8x8u; uint8_t* pred8x8freeu1 = t->mb.pred_i8x8u; uint8_t* pred8x8freev0 = pred8x8v; uint8_t* pred8x8freev1 = t->mb.pred_i8x8v; int32_t modes; int32_t bestmode; int32_t preds[9]; int32_t i; uint8_t* top_u, *left_u; uint8_t* top_v, *left_v; static uint8_t fixmode[] = { Intra_8x8_DC, Intra_8x8_LEFT, Intra_8x8_TOP, Intra_8x8_PLANE, Intra_8x8_DC, Intra_8x8_DC, Intra_8x8_DC }; top_u = &topcacheu[CACHE_SIZE]; top_v = &topcachev[CACHE_SIZE]; left_u = &leftcacheu[CACHE_SIZE]; left_v = &leftcachev[CACHE_SIZE]; T264_intra_8x8_available(t, preds, &modes, top_u, left_u, top_v, left_v); for(i = 0 ; i < modes ; i ++) { int32_t mode = preds[i]; uint32_t sad; t->pred8x8[mode]( pred8x8freeu1, 8, top_u, left_u); t->pred8x8[mode]( pred8x8freev1, 8, top_v, left_v); sad = t->cmp[MB_8x8](t->mb.src_u, t->stride_uv, pred8x8freeu1, 8) + t->cmp[MB_8x8](t->mb.src_v, t->stride_uv, pred8x8freev1, 8) + // t->mb.lambda * eg_size_ue(&t->bs, fixmode[mode]); 0; if (sad < sad8x8) { SWAP(uint8_t, pred8x8freeu0, pred8x8freeu1); SWAP(uint8_t, pred8x8freev0, pred8x8freev1); sad8x8 = sad; bestmode = mode; } } if (pred8x8freeu0 != t->mb.pred_i8x8u) { memcpy(t->mb.pred_i8x8u, pred8x8freeu0, sizeof(t->mb.pred_i8x8u)); } if (pred8x8freev0 != t->mb.pred_i8x8v) { memcpy(t->mb.pred_i8x8v, pred8x8freev0, sizeof(t->mb.pred_i8x8v)); } //fixed prediction mode DCLEFT DCTOP DC128 = DC t->mb.mb_mode_uv = fixmode[bestmode]; return sad8x8; } void T264_intra_8x8_available(T264_t* t, int32_t preds[], int32_t* modes, uint8_t* top_u, uint8_t* left_u, uint8_t* top_v, uint8_t* left_v) { int32_t i; uint8_t* p_u, *p_v; if ((t->mb.mb_neighbour & (MB_LEFT | MB_TOP)) == (MB_LEFT | MB_TOP)) { preds[0] = Intra_8x8_DC; preds[1] = Intra_8x8_TOP; preds[2] = Intra_8x8_LEFT; preds[3] = Intra_8x8_PLANE; *modes = 4; p_u = t->mb.dst_u - t->edged_stride_uv; p_v = t->mb.dst_v - t->edged_stride_uv; for(i = -1 ; i < 8 ; i ++) { top_u[i] = p_u[i]; top_v[i] = p_v[i]; } p_u --; p_v --; for(i = -1 ; i < 8 ; i ++) { left_u[i] = p_u[0]; left_v[i] = p_v[0]; p_u += t->edged_stride_uv; p_v += t->edged_stride_uv; } } else if(t->mb.mb_neighbour & MB_LEFT) { preds[0] = Intra_8x8_DCLEFT; preds[1] = Intra_8x8_LEFT; *modes = 2; p_u = t->mb.dst_u - 1; p_v = t->mb.dst_v - 1; for(i = 0 ; i < 8 ; i ++) { left_u[i] = p_u[0]; left_v[i] = p_v[0]; p_u += t->edged_stride_uv; p_v += t->edged_stride_uv; } } else if(t->mb.mb_neighbour & MB_TOP) { preds[0] = Intra_8x8_DCTOP; preds[1] = Intra_8x8_TOP; *modes = 2; p_u = t->mb.dst_u - t->edged_stride_uv; p_v = t->mb.dst_v - t->edged_stride_uv; for(i = 0 ; i < 8 ; i ++) { top_u[i] = p_u[i]; top_v[i] = p_v[i]; } } else { preds[0] = Intra_8x8_DC128; *modes = 1; } } void T264_encode_intra_uv(_RW T264_t* t) { DECLARE_ALIGNED_MATRIX(dct, 10, 8, int16_t, CACHE_SIZE); int32_t qp = t->qp_uv; int32_t i, j; int16_t* curdct; uint8_t* start; uint8_t* dst; uint8_t* src; int32_t intra = t->slice_type == SLICE_I ? 1 : 0; start = t->mb.pred_i8x8u; src = t->mb.src_u; dst = t->mb.dst_u; for(j = 0 ; j < 2 ; j ++) { t->expand8to16sub(start, 8 / 4, 8 / 4, dct, src, t->stride_uv); curdct = dct; for(i = 0 ; i < 4 ; i ++) { t->fdct4x4(curdct); dct[64 + i] = curdct[0]; t->quant4x4(curdct, qp, intra); scan_zig_4x4(t->mb.dct_uv_z[j][i], curdct); { // we will count coeff cost, from jm80 int32_t run, k; int32_t coeff_cost; run = -1; coeff_cost = 0; for(k = 1 ; k < 16 ; k ++) { run ++; if (t->mb.dct_uv_z[j][i][k] != 0) { if (ABS(t->mb.dct_uv_z[j][i][k]) > 1) { coeff_cost += 16 * 16 * 256; break; } else { coeff_cost += COEFF_COST[run]; run = -1; } } } if (coeff_cost < CHROMA_COEFF_COST) { memset(&t->mb.dct_uv_z[j][i][1], 0, 15 * sizeof(int16_t)); memset(curdct + 1, 0, 15 * sizeof(int16_t)); } } t->iquant4x4(curdct, qp); curdct += 16; } t->fdct2x2dc(curdct); t->quant2x2dc(curdct, qp, intra); scan_zig_2x2(t->mb.dc2x2_z[j], curdct); t->iquant2x2dc(curdct, qp); t->idct2x2dc(curdct); curdct = dct; for(i = 0 ; i < 4 ; i ++) { curdct[0] = dct[64 + i]; t->idct4x4(curdct); curdct += 16; } t->contract16to8add(dct, 8 / 4, 8 / 4, start, dst, t->edged_stride_uv); // // change to v // start = t->mb.pred_i8x8v; dst = t->mb.dst_v; src = t->mb.src_v; } }