www.pudn.com > h264src.zip > block.c
/***************************************************************************** * * T264 AVC CODEC * * Copyright(C) 2004-2005 llcc* 2004-2005 visionany * 2005.2.24 CloudWu added support for B-frame MB16x16 support * 2005.3.2 CloudWu added support for B-frame MB16x8 and MB8x16,MB8x8 support * * This program is free software ; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation ; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY ; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program ; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * ****************************************************************************/ #include "stdio.h" #include "T264.h" #include "utility.h" #ifndef CHIP_DM642 #include "memory.h" #endif #include "assert.h" #include "block.h" /* intra */ static void __inline T264dec_mb_decode_predict_i16x16_y(T264_t* t, uint8_t mode, uint8_t* pred, uint8_t* src) { DECLARE_ALIGNED_MATRIX(topcache, 1, 16 + CACHE_SIZE, uint8_t, CACHE_SIZE); DECLARE_ALIGNED_MATRIX(leftcache, 1, 16 + CACHE_SIZE, uint8_t, CACHE_SIZE); uint8_t* p; int32_t i; uint8_t* top, *left; top = &topcache[CACHE_SIZE]; left = &leftcache[CACHE_SIZE]; if (mode == Intra_16x16_DC) { if ((t->mb.mb_neighbour & (MB_LEFT | MB_TOP)) == (MB_LEFT | MB_TOP)) { mode = Intra_16x16_DC; p = src - t->edged_stride; for(i = 0 ; i < 16 ; i ++) { top[i] = p[i]; } p = src - 1; for(i = 0 ; i < 16 ; i ++) { left[i] = p[0]; p += t->edged_stride; } } else if(t->mb.mb_neighbour & MB_LEFT) { mode = Intra_16x16_DCLEFT; p = src - 1; for(i = 0 ; i < 16 ; i ++) { left[i] = p[0]; p += t->edged_stride; } } else if(t->mb.mb_neighbour & MB_TOP) { mode = Intra_16x16_DCTOP; p = src - t->edged_stride; for(i = 0 ; i < 16 ; i ++) { top[i] = p[i]; } } else { mode = Intra_16x16_DC128; } } else { switch(mode) { case Intra_16x16_TOP: p = src - t->edged_stride; for(i = 0 ; i < 16 ; i ++) { top[i] = p[i]; } break; case Intra_16x16_LEFT: p = src - 1; for(i = 0 ; i < 16 ; i ++) { left[i] = p[0]; p += t->edged_stride; } break; case Intra_16x16_PLANE: p = src - t->edged_stride; for(i = -1 ; i < 16 ; i ++) { top[i] = p[i]; } p -= 1; for(i = -1 ; i < 16 ; i ++) { left[i] = p[0]; p += t->edged_stride; } break; default: assert(0); break; } } t->pred16x16[mode](pred, 16, top, left); } static void __inline T264dec_mb_decode_predict_i4x4_y(T264_t* t, uint8_t idx, uint8_t mode, uint8_t* pred, uint8_t* src) { DECLARE_ALIGNED_MATRIX(topcache, 8 + CACHE_SIZE, 1, uint8_t, CACHE_SIZE); DECLARE_ALIGNED_MATRIX(leftcache, 4 + CACHE_SIZE, 1, uint8_t, CACHE_SIZE); static const int32_t neighbour[] = { 0, MB_LEFT, MB_LEFT, MB_LEFT, MB_TOP| MB_TOPRIGHT, MB_LEFT| MB_TOP, MB_LEFT |MB_TOP| MB_TOPRIGHT, MB_LEFT| MB_TOP, MB_TOP| MB_TOPRIGHT, MB_LEFT| MB_TOP| MB_TOPRIGHT, MB_LEFT |MB_TOP| MB_TOPRIGHT, MB_LEFT| MB_TOP, MB_TOP| MB_TOPRIGHT, MB_LEFT| MB_TOP, MB_LEFT |MB_TOP| MB_TOPRIGHT, MB_LEFT| MB_TOP }; static const int32_t fix[] = { ~0, ~0, ~0, ~0, ~0, ~MB_TOPRIGHT, ~0, ~MB_TOPRIGHT, ~0, ~0, ~0, ~MB_TOPRIGHT, ~0, ~MB_TOPRIGHT, ~0, ~MB_TOPRIGHT }; uint8_t* p; int32_t i; uint8_t* top = &topcache[CACHE_SIZE]; uint8_t* left = &leftcache[CACHE_SIZE]; if (mode == Intra_4x4_DC) { int32_t mb_neighbour = (t->mb.mb_neighbour| neighbour[idx]) & fix[idx]; if ((mb_neighbour & (MB_LEFT | MB_TOP)) == (MB_LEFT | MB_TOP)) { mode = Intra_4x4_DC; p = src - t->edged_stride; for(i = 0 ; i < 4 ; i ++) { top[i] = p[i]; } p = src - 1; for(i = 0 ; i < 4 ; i ++) { left[i] = p[0]; p += t->edged_stride; } } else if(mb_neighbour & MB_LEFT) { mode = Intra_4x4_DCLEFT; p = src - 1; for(i = 0 ; i < 4 ; i ++) { left[i] = p[0]; p += t->edged_stride; } } else if(mb_neighbour & MB_TOP) { mode = Intra_4x4_DCTOP; p = src - t->edged_stride; for(i = 0 ; i < 4 ; i ++) { top[i] = p[i]; } } else { mode = Intra_4x4_DC128; } } else { switch(mode) { case Intra_4x4_TOP: p = src - t->edged_stride; for(i = 0 ; i < 4 ; i ++) { top[i] = p[i]; } break; case Intra_4x4_LEFT: case Intra_4x4_HORIZONTAL_UP: p = src - 1; for(i = 0 ; i < 4 ; i ++) { left[i] = p[0]; p += t->edged_stride; } break; case Intra_4x4_DIAGONAL_DOWNLEFT: case Intra_4x4_VERTICAL_LEFT: { int32_t mb_neighbour = (t->mb.mb_neighbour| neighbour[idx]) & fix[idx]; p = src - t->edged_stride; if((idx & 3) == 3 && t->mb.mb_x == t->mb_width - 1) //if is the right-most sub-block, if is th last MB in horizontal, no top-right exist mb_neighbour &= ~MB_TOPRIGHT; if (mb_neighbour & MB_TOPRIGHT) { for(i = 0 ; i < 8 ; i ++) { top[i] = p[i]; } } else { for(i = 0 ; i < 4 ; i ++) { top[i] = p[i]; } top[4] = p[3]; top[5] = p[3]; top[6] = p[3]; top[7] = p[3]; } } break; case Intra_4x4_DIAGONAL_DOWNRIGHT: case Intra_4x4_VERTICAL_RIGHT: case Intra_4x4_HORIZONTAL_DOWN: p = src - t->edged_stride; for(i = -1 ; i < 4 ; i ++) { top[i] = p[i]; } p -= 1; for(i = -1 ; i < 4 ; i ++) { left[i] = p[0]; p += t->edged_stride; } break; default: assert(0); break; } } t->pred4x4[mode](pred, 4, top, left); } static void __inline T264dec_mb_decode_predict_i8x8_y(T264_t* t, uint8_t mode, uint8_t* pred_u, uint8_t* pred_v) { DECLARE_ALIGNED_MATRIX(topcacheu, 1, 8 + CACHE_SIZE, uint8_t, CACHE_SIZE); DECLARE_ALIGNED_MATRIX(leftcacheu, 1, 8 + CACHE_SIZE, uint8_t, CACHE_SIZE); DECLARE_ALIGNED_MATRIX(topcachev, 1, 8 + CACHE_SIZE, uint8_t, CACHE_SIZE); DECLARE_ALIGNED_MATRIX(leftcachev, 1, 8 + CACHE_SIZE, uint8_t, CACHE_SIZE); uint8_t* p_u, *p_v; int32_t i; uint8_t* top_u, *left_u; uint8_t* top_v, *left_v; top_u = &topcacheu[CACHE_SIZE]; top_v = &topcachev[CACHE_SIZE]; left_u = &leftcacheu[CACHE_SIZE]; left_v = &leftcachev[CACHE_SIZE]; if (mode == Intra_8x8_DC) { if ((t->mb.mb_neighbour & (MB_LEFT | MB_TOP)) == (MB_LEFT | MB_TOP)) { mode = Intra_8x8_DC; p_u = t->mb.src_u - t->edged_stride_uv; p_v = t->mb.src_v - t->edged_stride_uv; for(i = 0 ; i < 8 ; i ++) { top_u[i] = p_u[i]; top_v[i] = p_v[i]; } p_u = t->mb.src_u - 1; p_v = t->mb.src_v - 1; for(i = 0 ; i < 8 ; i ++) { left_u[i] = p_u[0]; left_v[i] = p_v[0]; p_u += t->edged_stride_uv; p_v += t->edged_stride_uv; } } else if(t->mb.mb_neighbour & MB_LEFT) { mode = Intra_8x8_DCLEFT; p_u = t->mb.src_u - 1; p_v = t->mb.src_v - 1; for(i = 0 ; i < 8 ; i ++) { left_u[i] = p_u[0]; left_v[i] = p_v[0]; p_u += t->edged_stride_uv; p_v += t->edged_stride_uv; } } else if(t->mb.mb_neighbour & MB_TOP) { mode = Intra_8x8_DCTOP; p_u = t->mb.src_u - t->edged_stride_uv; p_v = t->mb.src_v - t->edged_stride_uv; for(i = 0 ; i < 8 ; i ++) { top_u[i] = p_u[i]; top_v[i] = p_v[i]; } } else { mode = Intra_8x8_DC128; } } else { switch(mode) { case Intra_8x8_TOP: p_u = t->mb.src_u - t->edged_stride_uv; p_v = t->mb.src_v - t->edged_stride_uv; for(i = 0 ; i < 8 ; i ++) { top_u[i] = p_u[i]; top_v[i] = p_v[i]; } break; case Intra_8x8_LEFT: p_u = t->mb.src_u - 1; p_v = t->mb.src_v - 1; for(i = 0 ; i < 8 ; i ++) { left_u[i] = p_u[0]; left_v[i] = p_v[0]; p_u += t->edged_stride_uv; p_v += t->edged_stride_uv; } break; case Intra_8x8_PLANE: p_u = t->mb.src_u - t->edged_stride_uv; p_v = t->mb.src_v - t->edged_stride_uv; for(i = -1 ; i < 8 ; i ++) { top_u[i] = p_u[i]; top_v[i] = p_v[i]; } p_u -= 1; p_v -= 1; for(i = -1 ; i < 8 ; i ++) { left_u[i] = p_u[0]; p_u += t->edged_stride_uv; left_v[i] = p_v[0]; p_v += t->edged_stride_uv; } break; default: assert(0); break; } } t->pred8x8[mode](pred_u, 8, top_u, left_u); t->pred8x8[mode](pred_v, 8, top_v, left_v); } static void __inline T264dec_mb_decode_i16x16_y(T264_t* t) { DECLARE_ALIGNED_MATRIX(dct, 1+16, 16, int16_t, CACHE_SIZE); int32_t qp = t->qp_y; int32_t i; int16_t* curdct; uint8_t* src; src = t->mb.src_y; T264dec_mb_decode_predict_i16x16_y(t, t->mb.mode_i16x16, t->mb.pred_i16x16, src); unscan_zig_4x4( t->mb.dc4x4_z, dct + 256 ); t->iquant4x4dc(dct + 256, qp); t->idct4x4dc(dct + 256); curdct = dct; for( i = 0; i < 16; i++ ) { unscan_zig_4x4( t->mb.dct_y_z[luma_index[i]], curdct ); t->iquant4x4( curdct, qp ); curdct[0] = dct[256 + i]; t->idct4x4(curdct); curdct += 16; } t->contract16to8add(dct, 16 / 4, 16 / 4, t->mb.pred_i16x16, src, t->edged_stride); } static void __inline T264dec_mb_decode_i4x4_y(T264_t* t) { DECLARE_ALIGNED_MATRIX(pred, 4, 5, uint8_t, CACHE_SIZE); DECLARE_ALIGNED_MATRIX(dct, 1, 16, int16_t, 16); int32_t qp = t->qp_y; int32_t i; uint8_t* src; for(i = 0 ; i < 16 ; i ++) { int32_t row = i / 4; int32_t col = i % 4; src = t->mb.src_y + (row * t->edged_stride << 2) + (col << 2); T264dec_mb_decode_predict_i4x4_y(t, i, t->mb.mode_i4x4[luma_index[i]], pred, src); unscan_zig_4x4(t->mb.dct_y_z[luma_index[i]], dct); t->iquant4x4(dct, qp); t->idct4x4(dct); t->contract16to8add(dct, 4 / 4, 4 / 4, pred, src, t->edged_stride); } } void T264dec_mb_decode_intra_y(T264_t* t) { if (t->mb.mb_mode == I_4x4) T264dec_mb_decode_i4x4_y(t); else T264dec_mb_decode_i16x16_y(t); } void T264dec_mb_decode_uv(T264_t* t, uint8_t* pred_u, uint8_t* pred_v) { DECLARE_ALIGNED_MATRIX(dct, 10, 8, int16_t, CACHE_SIZE); int32_t qp = t->qp_uv; int32_t i, j; int16_t* curdct; uint8_t* start; uint8_t* src; start = pred_u; src = t->mb.src_u; for(j = 0 ; j < 2 ; j ++) { unscan_zig_2x2(t->mb.dc2x2_z[j], dct + 64); t->iquant2x2dc(dct + 64, qp); t->idct2x2dc(dct + 64); curdct = dct; for(i = 0 ; i < 4 ; i ++) { unscan_zig_4x4(t->mb.dct_uv_z[j][i], curdct); t->iquant4x4(curdct, qp); curdct[0] = dct[64 + i]; t->idct4x4(curdct); curdct += 16; } t->contract16to8add(dct, 8 / 4, 8 / 4, start, src, t->edged_stride_uv); // // change to v // start = pred_v; src = t->mb.src_v; } } void T264dec_mb_decode_intra_uv(T264_t* t) { T264dec_mb_decode_predict_i8x8_y(t, t->mb.mb_mode_uv, t->mb.pred_i8x8u, t->mb.pred_i8x8v); T264dec_mb_decode_uv(t, t->mb.pred_i8x8u, t->mb.pred_i8x8v); } void T264dec_mb_decode_interp_mc(T264_t* t, uint8_t* ref) { T264_vector_t vec; uint8_t* tmp; int32_t x, y; int32_t i; int32_t list_index = 0; static const int8_t index[4][4][6] = { {{0, 0, 0, 0, 0, 0}, {0, 1, 0, 0, 0, 0}, {1, 1, 0, 0, 0, 0}, {1, 0, 0, 0, 1, 0}}, {{0, 2, 0, 0, 0, 0}, {1, 2, 0, 0, 0, 0}, {1, 3, 0, 0, 0, 0}, {1, 2, 0, 0, 1, 0}}, {{2, 2, 0, 0, 0, 0}, {2, 3, 0, 0, 0, 0}, {3, 3, 0, 0, 0, 0}, {3, 2, 0, 0, 1, 0}}, {{2, 0, 0, 0, 0, 1}, {2, 1, 0, 0, 0, 1}, {3, 1, 0, 0, 0, 1}, {1, 2, 0, 1, 1, 0}} }; switch(t->mb.mb_part) { case MB_16x16: vec = t->mb.vec[0][0]; x = (vec.x & 3); y = (vec.y & 3); if (index[y][x][0] == index[y][x][1]) { tmp = t->ref[list_index][vec.refno]->Y[index[y][x][0]] + ((t->mb.mb_y << 4) + (vec.y >> 2)) * t->edged_stride + ((t->mb.mb_x << 4) + (vec.x >> 2)); t->memcpy_stride_u(tmp, 16, 16, t->edged_stride, ref, 16); } else { t->pia[MB_16x16](t->ref[list_index][vec.refno]->Y[index[y][x][0]] + ((t->mb.mb_y << 4) + (vec.y >> 2) + index[y][x][3]) * t->edged_stride + (t->mb.mb_x << 4) + (vec.x >> 2) + index[y][x][2], t->ref[list_index][vec.refno]->Y[index[y][x][1]] + ((t->mb.mb_y << 4) + (vec.y >> 2) + index[y][x][5]) * t->edged_stride + (t->mb.mb_x << 4) + (vec.x >> 2) + index[y][x][4], t->edged_stride, t->edged_stride, ref, 16); } break; case MB_16x8: vec = t->mb.vec[0][0]; x = (vec.x & 3); y = (vec.y & 3); if (index[y][x][0] == index[y][x][1]) { tmp = t->ref[list_index][vec.refno]->Y[index[y][x][0]] + ((t->mb.mb_y << 4) + (vec.y >> 2)) * t->edged_stride + ((t->mb.mb_x << 4) + (vec.x >> 2)); t->memcpy_stride_u(tmp, 16, 8, t->edged_stride, ref, 16); } else { t->pia[MB_16x8](t->ref[list_index][vec.refno]->Y[index[y][x][0]] + ((t->mb.mb_y << 4) + (vec.y >> 2) + index[y][x][3]) * t->edged_stride + (t->mb.mb_x << 4) + (vec.x >> 2) + index[y][x][2], t->ref[list_index][vec.refno]->Y[index[y][x][1]] + ((t->mb.mb_y << 4) + (vec.y >> 2) + index[y][x][5]) * t->edged_stride + (t->mb.mb_x << 4) + (vec.x >> 2) + index[y][x][4], t->edged_stride, t->edged_stride, ref, 16); } vec = t->mb.vec[0][8]; x = (vec.x & 3); y = (vec.y & 3); if (index[y][x][0] == index[y][x][1]) { tmp = t->ref[list_index][vec.refno]->Y[index[y][x][0]] + ((t->mb.mb_y << 4) + (vec.y >> 2) + 8) * t->edged_stride + ((t->mb.mb_x << 4) + (vec.x >> 2)); t->memcpy_stride_u(tmp, 16, 8, t->edged_stride, ref + 16 * 8, 16); } else { t->pia[MB_16x8](t->ref[list_index][vec.refno]->Y[index[y][x][0]] + ((t->mb.mb_y << 4) + (vec.y >> 2) + index[y][x][3] + 8) * t->edged_stride + (t->mb.mb_x << 4) + (vec.x >> 2) + index[y][x][2], t->ref[list_index][vec.refno]->Y[index[y][x][1]] + ((t->mb.mb_y << 4) + (vec.y >> 2) + index[y][x][5] + 8) * t->edged_stride + (t->mb.mb_x << 4) + (vec.x >> 2) + index[y][x][4], t->edged_stride, t->edged_stride, ref + 16 * 8, 16); } break; case MB_8x16: vec = t->mb.vec[0][0]; x = (vec.x & 3); y = (vec.y & 3); if (index[y][x][0] == index[y][x][1]) { tmp = t->ref[list_index][vec.refno]->Y[index[y][x][0]] + ((t->mb.mb_y << 4) + (vec.y >> 2)) * t->edged_stride + ((t->mb.mb_x << 4) + (vec.x >> 2)); t->memcpy_stride_u(tmp, 8, 16, t->edged_stride, ref, 16); } else { t->pia[MB_8x16](t->ref[list_index][vec.refno]->Y[index[y][x][0]] + ((t->mb.mb_y << 4) + (vec.y >> 2) + index[y][x][3]) * t->edged_stride + (t->mb.mb_x << 4) + (vec.x >> 2) + index[y][x][2], t->ref[list_index][vec.refno]->Y[index[y][x][1]] + ((t->mb.mb_y << 4) + (vec.y >> 2) + index[y][x][5]) * t->edged_stride + (t->mb.mb_x << 4) + (vec.x >> 2) + index[y][x][4], t->edged_stride, t->edged_stride, ref, 16); } vec = t->mb.vec[0][2]; x = (vec.x & 3); y = (vec.y & 3); if (index[y][x][0] == index[y][x][1]) { tmp = t->ref[list_index][vec.refno]->Y[index[y][x][0]] + ((t->mb.mb_y << 4) + (vec.y >> 2)) * t->edged_stride + ((t->mb.mb_x << 4) + (vec.x >> 2)) + 8; t->memcpy_stride_u(tmp, 8, 16, t->edged_stride, ref + 8, 16); } else { t->pia[MB_8x16](t->ref[list_index][vec.refno]->Y[index[y][x][0]] + ((t->mb.mb_y << 4) + (vec.y >> 2) + index[y][x][3]) * t->edged_stride + (t->mb.mb_x << 4) + (vec.x >> 2) + index[y][x][2] + 8, t->ref[list_index][vec.refno]->Y[index[y][x][1]] + ((t->mb.mb_y << 4) + (vec.y >> 2) + index[y][x][5]) * t->edged_stride + (t->mb.mb_x << 4) + (vec.x >> 2) + index[y][x][4] + 8, t->edged_stride, t->edged_stride, ref + 8, 16); } break; case MB_8x8: case MB_8x8ref0: for(i = 0 ; i < 4 ; i ++) { int32_t offset1, offset2; switch(t->mb.submb_part[luma_index[4 * i]]) { case MB_8x8: vec = t->mb.vec[0][luma_index[4 * i]]; x = (vec.x & 3); y = (vec.y & 3); if (index[y][x][0] == index[y][x][1]) { offset1 = ((t->mb.mb_y << 4) + (vec.y >> 2) + i / 2 * 8) * t->edged_stride + ((t->mb.mb_x << 4) + (vec.x >> 2)) + i % 2 * 8; tmp = t->ref[list_index][vec.refno]->Y[index[y][x][0]] + offset1; t->memcpy_stride_u(tmp, 8, 8, t->edged_stride, ref + i / 2 * 16 * 8 + i % 2 * 8, 16); } else { offset1 = ((t->mb.mb_y << 4) + (vec.y >> 2) + index[y][x][3] + i / 2 * 8) * t->edged_stride + (t->mb.mb_x << 4) + (vec.x >> 2) + index[y][x][2] + i % 2 * 8; offset2 = ((t->mb.mb_y << 4) + (vec.y >> 2) + index[y][x][5] + i / 2 * 8) * t->edged_stride + (t->mb.mb_x << 4) + (vec.x >> 2) + index[y][x][4] + i % 2 * 8; t->pia[MB_8x8](t->ref[list_index][vec.refno]->Y[index[y][x][0]] + offset1, t->ref[list_index][vec.refno]->Y[index[y][x][1]] + offset2, t->edged_stride, t->edged_stride, ref + i / 2 * 16 * 8 + i % 2 * 8, 16); } break; case MB_8x4: vec = t->mb.vec[0][luma_index[4 * i]]; x = (vec.x & 3); y = (vec.y & 3); if (index[y][x][0] == index[y][x][1]) { offset1 = ((t->mb.mb_y << 4) + (vec.y >> 2) + i / 2 * 8) * t->edged_stride + ((t->mb.mb_x << 4) + (vec.x >> 2)) + i % 2 * 8; tmp = t->ref[list_index][vec.refno]->Y[index[y][x][0]] + offset1; t->memcpy_stride_u(tmp, 8, 4, t->edged_stride, ref + i / 2 * 16 * 8 + i % 2 * 8, 16); } else { offset1 = ((t->mb.mb_y << 4) + (vec.y >> 2) + index[y][x][3] + i / 2 * 8) * t->edged_stride + (t->mb.mb_x << 4) + (vec.x >> 2) + index[y][x][2] + i % 2 * 8; offset2 = ((t->mb.mb_y << 4) + (vec.y >> 2) + index[y][x][5] + i / 2 * 8) * t->edged_stride + (t->mb.mb_x << 4) + (vec.x >> 2) + index[y][x][4] + i % 2 * 8; t->pia[MB_8x4](t->ref[list_index][vec.refno]->Y[index[y][x][0]] + offset1, t->ref[list_index][vec.refno]->Y[index[y][x][1]] + offset2, t->edged_stride, t->edged_stride, ref + i / 2 * 16 * 8 + i % 2 * 8, 16); } vec = t->mb.vec[0][luma_index[4 * i + 2]]; x = (vec.x & 3); y = (vec.y & 3); if (index[y][x][0] == index[y][x][1]) { offset1 = ((t->mb.mb_y << 4) + (vec.y >> 2) + i / 2 * 8 + 4) * t->edged_stride + ((t->mb.mb_x << 4) + (vec.x >> 2)) + i % 2 * 8; tmp = t->ref[list_index][vec.refno]->Y[index[y][x][0]] + offset1; t->memcpy_stride_u(tmp, 8, 4, t->edged_stride, ref + i / 2 * 16 * 8 + 64 + i % 2 * 8, 16); } else { offset1 = ((t->mb.mb_y << 4) + (vec.y >> 2) + index[y][x][3] + i / 2 * 8 + 4) * t->edged_stride + (t->mb.mb_x << 4) + (vec.x >> 2) + index[y][x][2] + i % 2 * 8; offset2 = ((t->mb.mb_y << 4) + (vec.y >> 2) + index[y][x][5] + i / 2 * 8 + 4) * t->edged_stride + (t->mb.mb_x << 4) + (vec.x >> 2) + index[y][x][4] + i % 2 * 8; t->pia[MB_8x4](t->ref[list_index][vec.refno]->Y[index[y][x][0]] + offset1, t->ref[list_index][vec.refno]->Y[index[y][x][1]] + offset2, t->edged_stride, t->edged_stride, ref + i / 2 * 16 * 8 + i % 2 * 8 + 64, 16); } break; case MB_4x8: vec = t->mb.vec[0][luma_index[4 * i]]; x = (vec.x & 3); y = (vec.y & 3); if (index[y][x][0] == index[y][x][1]) { offset1 = ((t->mb.mb_y << 4) + (vec.y >> 2) + i / 2 * 8) * t->edged_stride + ((t->mb.mb_x << 4) + (vec.x >> 2)) + i % 2 * 8; tmp = t->ref[list_index][vec.refno]->Y[index[y][x][0]] + offset1; t->memcpy_stride_u(tmp, 4, 8, t->edged_stride, ref + i / 2 * 16 * 8 + i % 2 * 8, 16); } else { offset1 = ((t->mb.mb_y << 4) + (vec.y >> 2) + index[y][x][3] + i / 2 * 8) * t->edged_stride + (t->mb.mb_x << 4) + (vec.x >> 2) + index[y][x][2] + i % 2 * 8; offset2 = ((t->mb.mb_y << 4) + (vec.y >> 2) + index[y][x][5] + i / 2 * 8) * t->edged_stride + (t->mb.mb_x << 4) + (vec.x >> 2) + index[y][x][4] + i % 2 * 8; t->pia[MB_4x8](t->ref[list_index][vec.refno]->Y[index[y][x][0]] + offset1, t->ref[list_index][vec.refno]->Y[index[y][x][1]] + offset2, t->edged_stride, t->edged_stride, ref + i / 2 * 16 * 8 + i % 2 * 8, 16); } vec = t->mb.vec[0][luma_index[4 * i + 1]]; x = (vec.x & 3); y = (vec.y & 3); if (index[y][x][0] == index[y][x][1]) { offset1 = ((t->mb.mb_y << 4) + (vec.y >> 2) + i / 2 * 8) * t->edged_stride + ((t->mb.mb_x << 4) + (vec.x >> 2)) + i % 2 * 8 + 4; tmp = t->ref[list_index][vec.refno]->Y[index[y][x][0]] + offset1; t->memcpy_stride_u(tmp, 4, 8, t->edged_stride, ref + i / 2 * 16 * 8 + i % 2 * 8 + 4, 16); } else { offset1 = ((t->mb.mb_y << 4) + (vec.y >> 2) + index[y][x][3] + i / 2 * 8) * t->edged_stride + (t->mb.mb_x << 4) + (vec.x >> 2) + index[y][x][2] + i % 2 * 8 + 4; offset2 = ((t->mb.mb_y << 4) + (vec.y >> 2) + index[y][x][5] + i / 2 * 8) * t->edged_stride + (t->mb.mb_x << 4) + (vec.x >> 2) + index[y][x][4] + i % 2 * 8 + 4; t->pia[MB_4x8](t->ref[list_index][vec.refno]->Y[index[y][x][0]] + offset1, t->ref[list_index][vec.refno]->Y[index[y][x][1]] + offset2, t->edged_stride, t->edged_stride, ref + i / 2 * 16 * 8 + i % 2 * 8 + 4, 16); } break; case MB_4x4: vec = t->mb.vec[0][luma_index[4 * i]]; x = (vec.x & 3); y = (vec.y & 3); if (index[y][x][0] == index[y][x][1]) { offset1 = ((t->mb.mb_y << 4) + (vec.y >> 2) + i / 2 * 8) * t->edged_stride + ((t->mb.mb_x << 4) + (vec.x >> 2)) + i % 2 * 8; tmp = t->ref[list_index][vec.refno]->Y[index[y][x][0]] + offset1; t->memcpy_stride_u(tmp, 4, 4, t->edged_stride, ref + i / 2 * 16 * 8 + i % 2 * 8, 16); } else { offset1 = ((t->mb.mb_y << 4) + (vec.y >> 2) + index[y][x][3] + i / 2 * 8) * t->edged_stride + (t->mb.mb_x << 4) + (vec.x >> 2) + index[y][x][2] + i % 2 * 8; offset2 = ((t->mb.mb_y << 4) + (vec.y >> 2) + index[y][x][5] + i / 2 * 8) * t->edged_stride + (t->mb.mb_x << 4) + (vec.x >> 2) + index[y][x][4] + i % 2 * 8; t->pia[MB_4x4](t->ref[list_index][vec.refno]->Y[index[y][x][0]] + offset1, t->ref[list_index][vec.refno]->Y[index[y][x][1]] + offset2, t->edged_stride, t->edged_stride, ref + i / 2 * 16 * 8 + i % 2 * 8, 16); } vec = t->mb.vec[0][luma_index[4 * i + 1]]; x = (vec.x & 3); y = (vec.y & 3); if (index[y][x][0] == index[y][x][1]) { offset1 = ((t->mb.mb_y << 4) + (vec.y >> 2) + i / 2 * 8) * t->edged_stride + ((t->mb.mb_x << 4) + (vec.x >> 2)) + i % 2 * 8 + 4; tmp = t->ref[list_index][vec.refno]->Y[index[y][x][0]] + offset1; t->memcpy_stride_u(tmp, 4, 4, t->edged_stride, ref + i / 2 * 16 * 8 + i % 2 * 8 + 4, 16); } else { offset1 = ((t->mb.mb_y << 4) + (vec.y >> 2) + index[y][x][3] + i / 2 * 8) * t->edged_stride + (t->mb.mb_x << 4) + (vec.x >> 2) + index[y][x][2] + i % 2 * 8 + 4; offset2 = ((t->mb.mb_y << 4) + (vec.y >> 2) + index[y][x][5] + i / 2 * 8) * t->edged_stride + (t->mb.mb_x << 4) + (vec.x >> 2) + index[y][x][4] + i % 2 * 8 + 4; t->pia[MB_4x4](t->ref[list_index][vec.refno]->Y[index[y][x][0]] + offset1, t->ref[list_index][vec.refno]->Y[index[y][x][1]] + offset2, t->edged_stride, t->edged_stride, ref + i / 2 * 16 * 8 + i % 2 * 8 + 4, 16); } vec = t->mb.vec[0][luma_index[4 * i + 2]]; x = (vec.x & 3); y = (vec.y & 3); if (index[y][x][0] == index[y][x][1]) { offset1 = ((t->mb.mb_y << 4) + (vec.y >> 2) + i / 2 * 8 + 4) * t->edged_stride + ((t->mb.mb_x << 4) + (vec.x >> 2)) + i % 2 * 8; tmp = t->ref[list_index][vec.refno]->Y[index[y][x][0]] + offset1; t->memcpy_stride_u(tmp, 4, 4, t->edged_stride, ref + i / 2 * 16 * 8 + 64 + i % 2 * 8, 16); } else { offset1 = ((t->mb.mb_y << 4) + (vec.y >> 2) + index[y][x][3] + i / 2 * 8 + 4) * t->edged_stride + (t->mb.mb_x << 4) + (vec.x >> 2) + index[y][x][2] + i % 2 * 8; offset2 = ((t->mb.mb_y << 4) + (vec.y >> 2) + index[y][x][5] + i / 2 * 8 + 4) * t->edged_stride + (t->mb.mb_x << 4) + (vec.x >> 2) + index[y][x][4] + i % 2 * 8; t->pia[MB_4x4](t->ref[list_index][vec.refno]->Y[index[y][x][0]] + offset1, t->ref[list_index][vec.refno]->Y[index[y][x][1]] + offset2, t->edged_stride, t->edged_stride, ref + i / 2 * 16 * 8 + i % 2 * 8 + 64, 16); } vec = t->mb.vec[0][luma_index[4 * i + 3]]; x = (vec.x & 3); y = (vec.y & 3); if (index[y][x][0] == index[y][x][1]) { offset1 = ((t->mb.mb_y << 4) + (vec.y >> 2) + i / 2 * 8 + 4) * t->edged_stride + ((t->mb.mb_x << 4) + (vec.x >> 2)) + i % 2 * 8 + 4; tmp = t->ref[list_index][vec.refno]->Y[index[y][x][0]] + offset1; t->memcpy_stride_u(tmp, 4, 4, t->edged_stride, ref + i / 2 * 16 * 8 + 64 + i % 2 * 8 + 4, 16); } else { offset1 = ((t->mb.mb_y << 4) + (vec.y >> 2) + index[y][x][3] + i / 2 * 8 + 4) * t->edged_stride + (t->mb.mb_x << 4) + (vec.x >> 2) + index[y][x][2] + i % 2 * 8 + 4; offset2 = ((t->mb.mb_y << 4) + (vec.y >> 2) + index[y][x][5] + i / 2 * 8 + 4) * t->edged_stride + (t->mb.mb_x << 4) + (vec.x >> 2) + index[y][x][4] + i % 2 * 8 + 4; t->pia[MB_4x4](t->ref[list_index][vec.refno]->Y[index[y][x][0]] + offset1, t->ref[list_index][vec.refno]->Y[index[y][x][1]] + offset2, t->edged_stride, t->edged_stride, ref + i / 2 * 16 * 8 + i % 2 * 8 + 64 + 4, 16); } break; } } break; default: assert(0); break; } } void T264dec_mb_decode_interp_transform(T264_t* t, uint8_t* ref) { DECLARE_ALIGNED_MATRIX(dct, 16, 16, int16_t, 16); int16_t* curdct = dct; int32_t i; for(i = 0 ; i < 16 ; i ++) { unscan_zig_4x4(t->mb.dct_y_z[luma_index[i]], curdct); t->iquant4x4(curdct, t->qp_y); t->idct4x4(curdct); curdct += 16; } t->contract16to8add(dct, 16 / 4, 16 / 4, ref, t->mb.src_y, t->edged_stride); } void T264dec_mb_decode_interp_y(T264_t* t) { T264dec_mb_decode_interp_mc(t, t->mb.pred_p16x16); T264dec_mb_decode_interp_transform(t, t->mb.pred_p16x16); } void T264dec_mb_decode_interp_uv(T264_t* t) { DECLARE_ALIGNED_MATRIX(pred_u, 8, 8, uint8_t, CACHE_SIZE); DECLARE_ALIGNED_MATRIX(pred_v, 8, 8, uint8_t, CACHE_SIZE); T264_vector_t vec; uint8_t* src, *dst; uint8_t* src_u, *dst_u; int32_t i; int32_t list_index = 0; switch (t->mb.mb_part) { case MB_16x16: vec = t->mb.vec[0][0]; src = t->ref[list_index][vec.refno]->U + ((t->mb.mb_y << 3) + (vec.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3); dst = pred_u; t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 8, 8); src = t->ref[list_index][vec.refno]->V + ((t->mb.mb_y << 3) + (vec.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3); dst = pred_v; t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 8, 8); break; case MB_16x8: vec = t->mb.vec[0][0]; src_u = t->ref[list_index][vec.refno]->U + ((t->mb.mb_y << 3) + (vec.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3); dst_u = pred_u; t->eighth_pixel_mc_u(src_u, t->edged_stride_uv, dst_u, vec.x, vec.y, 8, 4); src = t->ref[list_index][vec.refno]->V + ((t->mb.mb_y << 3) + (vec.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3); dst = pred_v; t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 8, 4); vec = t->mb.vec[0][luma_index[8]]; src_u = t->ref[list_index][vec.refno]->U + ((t->mb.mb_y << 3) + (vec.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + 4 * t->edged_stride_uv; dst_u += 4 * 8; t->eighth_pixel_mc_u(src_u, t->edged_stride_uv, dst_u, vec.x, vec.y, 8, 4); src = t->ref[list_index][vec.refno]->V + ((t->mb.mb_y << 3) + (vec.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + 4 * t->edged_stride_uv; dst += 4 * 8; t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 8, 4); break; case MB_8x16: vec = t->mb.vec[0][0]; src_u = t->ref[list_index][vec.refno]->U + ((t->mb.mb_y << 3) + (vec.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3); dst_u = pred_u; t->eighth_pixel_mc_u(src_u, t->edged_stride_uv, dst_u, vec.x, vec.y, 4, 8); src = t->ref[list_index][vec.refno]->V + ((t->mb.mb_y << 3) + (vec.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3); dst = pred_v; t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 4, 8); vec = t->mb.vec[0][luma_index[4]]; src_u = t->ref[list_index][vec.refno]->U + ((t->mb.mb_y << 3) + (vec.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + 4; dst_u += 4; t->eighth_pixel_mc_u(src_u, t->edged_stride_uv, dst_u, vec.x, vec.y, 4, 8); src = t->ref[list_index][vec.refno]->V + ((t->mb.mb_y << 3) + (vec.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + 4; dst += 4; t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 4, 8); break; case MB_8x8: case MB_8x8ref0: for(i = 0 ; i < 4 ; i ++) { switch(t->mb.submb_part[luma_index[4 * i]]) { case MB_8x8: vec = t->mb.vec[0][luma_index[4 * i]]; src = t->ref[list_index][vec.refno]->U + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4); dst = pred_u + i / 2 * 32 + i % 2 * 4; t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 4, 4); src = t->ref[list_index][vec.refno]->V + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4); dst = pred_v + i / 2 * 32 + i % 2 * 4; t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 4, 4); break; case MB_8x4: vec = t->mb.vec[0][luma_index[4 * i]]; src_u = t->ref[list_index][vec.refno]->U + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4); dst_u = pred_u + i / 2 * 32 + i % 2 * 4; t->eighth_pixel_mc_u(src_u, t->edged_stride_uv, dst_u, vec.x, vec.y, 4, 2); src = t->ref[list_index][vec.refno]->V + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4); dst = pred_v + i / 2 * 32 + i % 2 * 4; t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 4, 2); vec = t->mb.vec[0][luma_index[4 * i + 2]]; src_u = t->ref[list_index][vec.refno]->U + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4) + 2 * t->edged_stride_uv; dst_u += 2 * 8; t->eighth_pixel_mc_u(src_u, t->edged_stride_uv, dst_u, vec.x, vec.y, 4, 2); src = t->ref[list_index][vec.refno]->V + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4) + 2 * t->edged_stride_uv; dst += 2 * 8; t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 4, 2); break; case MB_4x8: vec = t->mb.vec[0][luma_index[4 * i]]; src_u = t->ref[list_index][vec.refno]->U + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4); dst_u = pred_u + i / 2 * 32 + i % 2 * 4; t->eighth_pixel_mc_u(src_u, t->edged_stride_uv, dst_u, vec.x, vec.y, 2, 4); src = t->ref[list_index][vec.refno]->V + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4); dst = pred_v + i / 2 * 32 + i % 2 * 4; t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 2, 4); vec = t->mb.vec[0][luma_index[4 * i + 1]]; src_u = t->ref[list_index][vec.refno]->U + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4) + 2; dst_u += 2; t->eighth_pixel_mc_u(src_u, t->edged_stride_uv, dst_u, vec.x, vec.y, 2, 4); src = t->ref[list_index][vec.refno]->V + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4) + 2; dst += 2; t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 2, 4); break; case MB_4x4: vec = t->mb.vec[0][luma_index[4 * i]]; src_u = t->ref[list_index][vec.refno]->U + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4); dst_u = pred_u + i / 2 * 32 + i % 2 * 4; t->eighth_pixel_mc_u(src_u, t->edged_stride_uv, dst_u, vec.x, vec.y, 2, 2); src = t->ref[list_index][vec.refno]->V + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4); dst = pred_v + i / 2 * 32 + i % 2 * 4; t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 2, 2); vec = t->mb.vec[0][luma_index[4 * i + 1]]; src_u = t->ref[list_index][vec.refno]->U + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4) + 2; dst_u += 2; t->eighth_pixel_mc_u(src_u, t->edged_stride_uv, dst_u, vec.x, vec.y, 2, 2); src = t->ref[list_index][vec.refno]->V + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4) + 2; dst += 2; t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 2, 2); vec = t->mb.vec[0][luma_index[4 * i + 2]]; src_u = t->ref[list_index][vec.refno]->U + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4) + 2 * t->edged_stride_uv; dst_u += 2 * 8 - 2; t->eighth_pixel_mc_u(src_u, t->edged_stride_uv, dst_u, vec.x, vec.y, 2, 2); src = t->ref[list_index][vec.refno]->V + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4) + 2 * t->edged_stride_uv; dst += 2 * 8 - 2; t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 2, 2); vec = t->mb.vec[0][luma_index[4 * i + 3]]; src_u = t->ref[list_index][vec.refno]->U + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4) + 2 * t->edged_stride_uv + 2; dst_u += 2; t->eighth_pixel_mc_u(src_u, t->edged_stride_uv, dst_u, vec.x, vec.y, 2, 2); src = t->ref[list_index][vec.refno]->V + ((t->mb.mb_y << 3) + (vec.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec.x >> 3) + (i % 2 * 4) + 2 * t->edged_stride_uv + 2; dst += 2; t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 2, 2); break; default: break; } } break; default: break; } T264dec_mb_decode_uv(t, pred_u, pred_v); } static const int8_t index[4][4][6] = { {{0, 0, 0, 0, 0, 0}, {0, 1, 0, 0, 0, 0}, {1, 1, 0, 0, 0, 0}, {1, 0, 0, 0, 1, 0}}, {{0, 2, 0, 0, 0, 0}, {1, 2, 0, 0, 0, 0}, {1, 3, 0, 0, 0, 0}, {1, 2, 0, 0, 1, 0}}, {{2, 2, 0, 0, 0, 0}, {2, 3, 0, 0, 0, 0}, {3, 3, 0, 0, 0, 0}, {3, 2, 0, 0, 1, 0}}, {{2, 0, 0, 0, 0, 1}, {2, 1, 0, 0, 0, 1}, {3, 1, 0, 0, 0, 1}, {1, 2, 0, 1, 1, 0}} }; void T264_mb4x4_interb_uv_mc(T264_t* t,T264_vector_t vecPredicted[2][16],uint8_t* pred_u,uint8_t* pred_v) { DECLARE_ALIGNED_MATRIX(pred_u_l1, 8, 8, uint8_t, CACHE_SIZE); DECLARE_ALIGNED_MATRIX(pred_v_l1, 8, 8, uint8_t, CACHE_SIZE); T264_vector_t vec; uint8_t* src, *dst; int32_t i; int32_t j; int32_t idx; int32_t offset_src,offset_dst; uint8_t *dstv; for(i = 0;i < 4; ++i) { for(j = 0;j < 4; ++j) { //predict each 2x2 block idx = (i * 4) + j; offset_dst = ((i * 2) * 8) + (j << 1); vec = vecPredicted[0][idx]; offset_src = ((t->mb.mb_y << 3) + ((i << 1) + (vec.y >> 3))) * t->edged_stride_uv + (t->mb.mb_x << 3) + (j << 1) + (vec.x >> 3); dstv = pred_v + offset_dst; dst = pred_u + offset_dst; if(vec.refno > -1) { src = t->ref[0][vec.refno]->U + offset_src; t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 2, 2); src = t->ref[0][vec.refno]->V + offset_src; t->eighth_pixel_mc_u(src, t->edged_stride_uv, dstv, vec.x, vec.y, 2, 2); } vec = vecPredicted[1][idx]; offset_src = ((t->mb.mb_y << 3) + ((i << 1) + (vec.y >> 3))) * t->edged_stride_uv + (t->mb.mb_x << 3) + (j << 1) + (vec.x >> 3); if(vec.refno > -1) { if(vecPredicted[0][idx].refno > -1) { dst = pred_u_l1 + offset_dst; dstv = pred_v_l1 + offset_dst; } src = t->ref[1][vec.refno]->U + offset_src; t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec.x, vec.y, 2, 2); src = t->ref[1][vec.refno]->V + offset_src; t->eighth_pixel_mc_u(src, t->edged_stride_uv, dstv, vec.x, vec.y, 2, 2); } if(dst != pred_u + offset_dst) { t->pia[MB_2x2](dst, pred_u + offset_dst, 8, 8, pred_u + offset_dst, 8); t->pia[MB_2x2](dstv, pred_v + offset_dst, 8, 8, pred_v + offset_dst, 8); } } } } void T264_mb4x4_interb_mc(T264_t* t,T264_vector_t vec[2][16],uint8_t* ref) { T264_vector_t vec0,vec1; uint8_t* tmp,*pred_tmp; int32_t x, y,i,j; int32_t list_index, block_idx = 0; int32_t offset1, offset2; DECLARE_ALIGNED_MATRIX_H(pred_16x16bi, 16, 16, uint8_t, CACHE_SIZE); for(i = 0 ; i < 4 ; i ++) { for(j = 0;j < 4; ++j) { int32_t offset_base; vec0 = vec[0][block_idx]; vec1 = vec[1][block_idx]; x = (vec0.x & 3); y = (vec0.y & 3); // offset_base = luma_inverse_y[block_idx] * 16 * 4 + luma_inverse_x[block_idx] * 4; offset_base = i * 16 * 4 + j * 4; pred_tmp = ref + offset_base; if(vec0.refno > -1) { list_index = 0; if (index[y][x][0] == index[y][x][1]) { offset1 = ((t->mb.mb_y << 4) + (vec0.y >> 2) + i * 4) * t->edged_stride + ((t->mb.mb_x << 4) + (vec0.x >> 2)) + j * 4; tmp = t->ref[list_index][vec0.refno]->Y[index[y][x][0]] + offset1; t->memcpy_stride_u(tmp, 4, 4, t->edged_stride, pred_tmp, 16); } else { offset1 = ((t->mb.mb_y << 4) + (vec0.y >> 2) + index[y][x][3] + i * 4) * t->edged_stride + (t->mb.mb_x << 4) + (vec0.x >> 2) + index[y][x][2] + j * 4; offset2 = ((t->mb.mb_y << 4) + (vec0.y >> 2) + index[y][x][5] + i * 4) * t->edged_stride + (t->mb.mb_x << 4) + (vec0.x >> 2) + index[y][x][4] + j * 4; t->pia[MB_4x4](t->ref[list_index][vec0.refno]->Y[index[y][x][0]] + offset1, t->ref[list_index][vec0.refno]->Y[index[y][x][1]] + offset2, t->edged_stride, t->edged_stride, pred_tmp,16); } } x = (vec1.x & 3); y = (vec1.y & 3); if(vec1.refno > -1) { list_index = 1; if(vec0.refno > -1) pred_tmp = pred_16x16bi + offset_base; if (index[y][x][0] == index[y][x][1]) { offset1 = ((t->mb.mb_y << 4) + (vec1.y >> 2) + i * 4) * t->edged_stride + ((t->mb.mb_x << 4) + (vec1.x >> 2)) + j * 4; tmp = t->ref[list_index][vec1.refno]->Y[index[y][x][0]] + offset1; t->memcpy_stride_u(tmp, 4, 4, t->edged_stride, pred_tmp, 16); } else { offset1 = ((t->mb.mb_y << 4) + (vec1.y >> 2) + index[y][x][3] + i * 4) * t->edged_stride + (t->mb.mb_x << 4) + (vec1.x >> 2) + index[y][x][2] + j * 4; offset2 = ((t->mb.mb_y << 4) + (vec1.y >> 2) + index[y][x][5] + i * 4) * t->edged_stride + (t->mb.mb_x << 4) + (vec1.x >> 2) + index[y][x][4] + j * 4; t->pia[MB_4x4](t->ref[list_index][vec1.refno]->Y[index[y][x][0]] + offset1, t->ref[list_index][vec1.refno]->Y[index[y][x][1]] + offset2, t->edged_stride, t->edged_stride, pred_tmp, 16); } } if(pred_tmp != ref + offset_base) t->pia[MB_4x4](pred_tmp,ref + offset_base,16,16,ref + offset_base,16); ++block_idx; } } } void T264dec_mb_decode_interb_mc(T264_t* t, uint8_t* ref) { T264_vector_t vec0,vec1; uint8_t* tmp,*pred_tmp; int32_t x, y,i; int32_t list_index; DECLARE_ALIGNED_MATRIX_H(pred_16x16bi, 16, 16, uint8_t, CACHE_SIZE); if(t->mb.is_copy) T264_mb4x4_interb_mc(t,t->mb.vec,ref); else switch(t->mb.mb_part) { case MB_16x16: vec0 = t->mb.vec[0][0]; vec1 = t->mb.vec[1][0]; x = (vec0.x & 3); y = (vec0.y & 3); pred_tmp = ref; if(vec0.refno > -1) { list_index = 0; if (index[y][x][0] == index[y][x][1]) { tmp = t->ref[list_index][vec0.refno]->Y[index[y][x][0]] + ((t->mb.mb_y << 4) + (vec0.y >> 2)) * t->edged_stride + ((t->mb.mb_x << 4) + (vec0.x >> 2)); t->memcpy_stride_u(tmp, 16, 16, t->edged_stride, ref, 16); } else { t->pia[MB_16x16](t->ref[list_index][vec0.refno]->Y[index[y][x][0]] + ((t->mb.mb_y << 4) + (vec0.y >> 2) + index[y][x][3]) * t->edged_stride + (t->mb.mb_x << 4) + (vec0.x >> 2) + index[y][x][2], t->ref[list_index][vec0.refno]->Y[index[y][x][1]] + ((t->mb.mb_y << 4) + (vec0.y >> 2) + index[y][x][5]) * t->edged_stride + (t->mb.mb_x << 4) + (vec0.x >> 2) + index[y][x][4], t->edged_stride, t->edged_stride, ref, 16); } } if(vec1.refno > -1) { //if bi-pred x = (vec1.x & 3); y = (vec1.y & 3); list_index = 1; if(vec0.refno > -1) //if biPred pred_tmp = pred_16x16bi; else pred_tmp = ref; if (index[y][x][0] == index[y][x][1]) { tmp = t->ref[list_index][vec1.refno]->Y[index[y][x][0]] + ((t->mb.mb_y << 4) + (vec1.y >> 2)) * t->edged_stride + ((t->mb.mb_x << 4) + (vec1.x >> 2)); t->memcpy_stride_u(tmp, 16, 16, t->edged_stride, pred_tmp, 16); } else { t->pia[MB_16x16](t->ref[list_index][vec1.refno]->Y[index[y][x][0]] + ((t->mb.mb_y << 4) + (vec1.y >> 2) + index[y][x][3]) * t->edged_stride + (t->mb.mb_x << 4) + (vec1.x >> 2) + index[y][x][2], t->ref[list_index][vec1.refno]->Y[index[y][x][1]] + ((t->mb.mb_y << 4) + (vec1.y >> 2) + index[y][x][5]) * t->edged_stride + (t->mb.mb_x << 4) + (vec1.x >> 2) + index[y][x][4], t->edged_stride, t->edged_stride, pred_tmp, 16); } } if(pred_tmp != ref) { //if biPred t->pia[MB_16x16](pred_tmp,ref,16,16,ref,16); } break; case MB_16x8: vec0 = t->mb.vec[0][0]; vec1 = t->mb.vec[1][0]; pred_tmp = ref; if(vec0.refno > -1) { list_index = 0; x = (vec0.x & 3); y = (vec0.y & 3); if (index[y][x][0] == index[y][x][1]) { tmp = t->ref[list_index][vec0.refno]->Y[index[y][x][0]] + ((t->mb.mb_y << 4) + (vec0.y >> 2)) * t->edged_stride + ((t->mb.mb_x << 4) + (vec0.x >> 2)); t->memcpy_stride_u(tmp, 16, 8, t->edged_stride, ref, 16); } else { t->pia[MB_16x8](t->ref[list_index][vec0.refno]->Y[index[y][x][0]] + ((t->mb.mb_y << 4) + (vec0.y >> 2) + index[y][x][3]) * t->edged_stride + (t->mb.mb_x << 4) + (vec0.x >> 2) + index[y][x][2], t->ref[list_index][vec0.refno]->Y[index[y][x][1]] + ((t->mb.mb_y << 4) + (vec0.y >> 2) + index[y][x][5]) * t->edged_stride + (t->mb.mb_x << 4) + (vec0.x >> 2) + index[y][x][4], t->edged_stride, t->edged_stride, ref, 16); } } if(vec1.refno > -1) { x = (vec1.x & 3); y = (vec1.y & 3); list_index = 1; if(vec0.refno > -1) //if biPred pred_tmp = pred_16x16bi; else pred_tmp = ref; if (index[y][x][0] == index[y][x][1]) { tmp = t->ref[list_index][vec1.refno]->Y[index[y][x][0]] + ((t->mb.mb_y << 4) + (vec1.y >> 2)) * t->edged_stride + ((t->mb.mb_x << 4) + (vec1.x >> 2)); t->memcpy_stride_u(tmp, 16, 8, t->edged_stride, pred_tmp, 16); } else { t->pia[MB_16x8](t->ref[list_index][vec1.refno]->Y[index[y][x][0]] + ((t->mb.mb_y << 4) + (vec1.y >> 2) + index[y][x][3]) * t->edged_stride + (t->mb.mb_x << 4) + (vec1.x >> 2) + index[y][x][2], t->ref[list_index][vec1.refno]->Y[index[y][x][1]] + ((t->mb.mb_y << 4) + (vec1.y >> 2) + index[y][x][5]) * t->edged_stride + (t->mb.mb_x << 4) + (vec1.x >> 2) + index[y][x][4], t->edged_stride, t->edged_stride, pred_tmp, 16); } } if(pred_tmp != ref) { //if biPred t->pia[MB_16x8](pred_tmp,ref,16,16,ref,16); } //For second MB16x8 vec0 = t->mb.vec[0][8]; vec1 = t->mb.vec[1][8]; pred_tmp = ref + 16 * 8; if(vec0.refno > -1) { x = (vec0.x & 3); y = (vec0.y & 3); list_index = 0; if (index[y][x][0] == index[y][x][1]) { tmp = t->ref[list_index][vec0.refno]->Y[index[y][x][0]] + ((t->mb.mb_y << 4) + (vec0.y >> 2) + 8) * t->edged_stride + ((t->mb.mb_x << 4) + (vec0.x >> 2)); t->memcpy_stride_u(tmp, 16, 8, t->edged_stride, pred_tmp, 16); } else { t->pia[MB_16x8](t->ref[list_index][vec0.refno]->Y[index[y][x][0]] + ((t->mb.mb_y << 4) + (vec0.y >> 2) + index[y][x][3] + 8) * t->edged_stride + (t->mb.mb_x << 4) + (vec0.x >> 2) + index[y][x][2], t->ref[list_index][vec0.refno]->Y[index[y][x][1]] + ((t->mb.mb_y << 4) + (vec0.y >> 2) + index[y][x][5] + 8) * t->edged_stride + (t->mb.mb_x << 4) + (vec0.x >> 2) + index[y][x][4], t->edged_stride, t->edged_stride, pred_tmp, 16); } } if(vec1.refno > -1) { x = (vec1.x & 3); y = (vec1.y & 3); list_index = 1; if(vec0.refno > -1) //if biPred pred_tmp = pred_16x16bi + 16 * 8; if (index[y][x][0] == index[y][x][1]) { tmp = t->ref[list_index][vec1.refno]->Y[index[y][x][0]] + ((t->mb.mb_y << 4) + (vec1.y >> 2) + 8) * t->edged_stride + ((t->mb.mb_x << 4) + (vec1.x >> 2)); t->memcpy_stride_u(tmp, 16, 8, t->edged_stride,pred_tmp, 16); } else { t->pia[MB_16x8](t->ref[list_index][vec1.refno]->Y[index[y][x][0]] + ((t->mb.mb_y << 4) + (vec1.y >> 2) + index[y][x][3] + 8) * t->edged_stride + (t->mb.mb_x << 4) + (vec1.x >> 2) + index[y][x][2], t->ref[list_index][vec1.refno]->Y[index[y][x][1]] + ((t->mb.mb_y << 4) + (vec1.y >> 2) + index[y][x][5] + 8) * t->edged_stride + (t->mb.mb_x << 4) + (vec1.x >> 2) + index[y][x][4], t->edged_stride, t->edged_stride, pred_tmp, 16); } } if(pred_tmp != ref + 16 * 8) { //if biPred t->pia[MB_16x8](pred_tmp,ref + 16 * 8,16,16,ref + 16 * 8,16); } break; case MB_8x16: pred_tmp = ref; vec0 = t->mb.vec[0][0]; vec1 = t->mb.vec[1][0]; if(vec0.refno > -1) { x = (vec0.x & 3); y = (vec0.y & 3); list_index = 0; if (index[y][x][0] == index[y][x][1]) { tmp = t->ref[list_index][vec0.refno]->Y[index[y][x][0]] + ((t->mb.mb_y << 4) + (vec0.y >> 2)) * t->edged_stride + ((t->mb.mb_x << 4) + (vec0.x >> 2)); t->memcpy_stride_u(tmp, 8, 16, t->edged_stride, ref, 16); } else { t->pia[MB_8x16](t->ref[list_index][vec0.refno]->Y[index[y][x][0]] + ((t->mb.mb_y << 4) + (vec0.y >> 2) + index[y][x][3]) * t->edged_stride + (t->mb.mb_x << 4) + (vec0.x >> 2) + index[y][x][2], t->ref[list_index][vec0.refno]->Y[index[y][x][1]] + ((t->mb.mb_y << 4) + (vec0.y >> 2) + index[y][x][5]) * t->edged_stride + (t->mb.mb_x << 4) + (vec0.x >> 2) + index[y][x][4], t->edged_stride, t->edged_stride, ref, 16); } } if(vec1.refno > -1) { list_index = 1; x = (vec1.x & 3); y = (vec1.y & 3); if(vec0.refno > -1) //if biPred pred_tmp = pred_16x16bi; if (index[y][x][0] == index[y][x][1]) { tmp = t->ref[list_index][vec1.refno]->Y[index[y][x][0]] + ((t->mb.mb_y << 4) + (vec1.y >> 2)) * t->edged_stride + ((t->mb.mb_x << 4) + (vec1.x >> 2)); t->memcpy_stride_u(tmp, 8, 16, t->edged_stride, pred_tmp, 16); } else { t->pia[MB_8x16](t->ref[list_index][vec1.refno]->Y[index[y][x][0]] + ((t->mb.mb_y << 4) + (vec1.y >> 2) + index[y][x][3]) * t->edged_stride + (t->mb.mb_x << 4) + (vec1.x >> 2) + index[y][x][2], t->ref[list_index][vec1.refno]->Y[index[y][x][1]] + ((t->mb.mb_y << 4) + (vec1.y >> 2) + index[y][x][5]) * t->edged_stride + (t->mb.mb_x << 4) + (vec1.x >> 2) + index[y][x][4], t->edged_stride, t->edged_stride,pred_tmp, 16); } } if(pred_tmp != ref) { //if biPred t->pia[MB_8x16](pred_tmp,ref,16,16,ref,16); } //for second MB8x16 vec0 = t->mb.vec[0][2]; vec1 = t->mb.vec[1][2]; pred_tmp = ref + 8; if(vec0.refno > -1) { x = (vec0.x & 3); y = (vec0.y & 3); list_index = 0; if (index[y][x][0] == index[y][x][1]) { tmp = t->ref[list_index][vec0.refno]->Y[index[y][x][0]] + ((t->mb.mb_y << 4) + (vec0.y >> 2)) * t->edged_stride + ((t->mb.mb_x << 4) + (vec0.x >> 2)) + 8; t->memcpy_stride_u(tmp, 8, 16, t->edged_stride, pred_tmp, 16); } else { t->pia[MB_8x16](t->ref[list_index][vec0.refno]->Y[index[y][x][0]] + ((t->mb.mb_y << 4) + (vec0.y >> 2) + index[y][x][3]) * t->edged_stride + (t->mb.mb_x << 4) + (vec0.x >> 2) + index[y][x][2] + 8, t->ref[list_index][vec0.refno]->Y[index[y][x][1]] + ((t->mb.mb_y << 4) + (vec0.y >> 2) + index[y][x][5]) * t->edged_stride + (t->mb.mb_x << 4) + (vec0.x >> 2) + index[y][x][4] + 8, t->edged_stride, t->edged_stride, pred_tmp, 16); } } if(vec1.refno > -1) { x = (vec1.x & 3); y = (vec1.y & 3); list_index = 1; if(vec0.refno > -1) //if biPred pred_tmp = pred_16x16bi + 8; if (index[y][x][0] == index[y][x][1]) { tmp = t->ref[list_index][vec1.refno]->Y[index[y][x][0]] + ((t->mb.mb_y << 4) + (vec1.y >> 2)) * t->edged_stride + ((t->mb.mb_x << 4) + (vec1.x >> 2)) + 8; t->memcpy_stride_u(tmp, 8, 16, t->edged_stride, pred_tmp, 16); } else { t->pia[MB_8x16](t->ref[list_index][vec1.refno]->Y[index[y][x][0]] + ((t->mb.mb_y << 4) + (vec1.y >> 2) + index[y][x][3]) * t->edged_stride + (t->mb.mb_x << 4) + (vec1.x >> 2) + index[y][x][2] + 8, t->ref[list_index][vec1.refno]->Y[index[y][x][1]] + ((t->mb.mb_y << 4) + (vec1.y >> 2) + index[y][x][5]) * t->edged_stride + (t->mb.mb_x << 4) + (vec1.x >> 2) + index[y][x][4] + 8, t->edged_stride, t->edged_stride,pred_tmp, 16); } } if(pred_tmp != ref + 8) { //if biPred t->pia[MB_8x16](pred_tmp,ref + 8,16,16,ref + 8,16); } break; case MB_8x8: for(i = 0 ; i < 4 ; i ++) { int32_t offset1, offset2; switch(t->mb.submb_part[luma_index[4 * i]]) { case MB_8x8: vec0 = t->mb.vec[0][luma_index[4 * i]]; vec1 = t->mb.vec[1][luma_index[4 * i]]; x = (vec0.x & 3); y = (vec0.y & 3); pred_tmp = ref + i / 2 * 16 * 8 + i % 2 * 8; if(vec0.refno > -1) { list_index = 0; if (index[y][x][0] == index[y][x][1]) { offset1 = ((t->mb.mb_y << 4) + (vec0.y >> 2) + i / 2 * 8) * t->edged_stride + ((t->mb.mb_x << 4) + (vec0.x >> 2)) + i % 2 * 8; tmp = t->ref[list_index][vec0.refno]->Y[index[y][x][0]] + offset1; t->memcpy_stride_u(tmp, 8, 8, t->edged_stride, pred_tmp, 16); } else { offset1 = ((t->mb.mb_y << 4) + (vec0.y >> 2) + index[y][x][3] + i / 2 * 8) * t->edged_stride + (t->mb.mb_x << 4) + (vec0.x >> 2) + index[y][x][2] + i % 2 * 8; offset2 = ((t->mb.mb_y << 4) + (vec0.y >> 2) + index[y][x][5] + i / 2 * 8) * t->edged_stride + (t->mb.mb_x << 4) + (vec0.x >> 2) + index[y][x][4] + i % 2 * 8; t->pia[MB_8x8](t->ref[list_index][vec0.refno]->Y[index[y][x][0]] + offset1, t->ref[list_index][vec0.refno]->Y[index[y][x][1]] + offset2, t->edged_stride, t->edged_stride, pred_tmp,16); } } x = (vec1.x & 3); y = (vec1.y & 3); if(vec1.refno > -1) { list_index = 1; if(vec0.refno > -1) pred_tmp = pred_16x16bi + i / 2 * 16 * 8 + i % 2 * 8; if (index[y][x][0] == index[y][x][1]) { offset1 = ((t->mb.mb_y << 4) + (vec1.y >> 2) + i / 2 * 8) * t->edged_stride + ((t->mb.mb_x << 4) + (vec1.x >> 2)) + i % 2 * 8; tmp = t->ref[list_index][vec1.refno]->Y[index[y][x][0]] + offset1; t->memcpy_stride_u(tmp, 8, 8, t->edged_stride, pred_tmp, 16); } else { offset1 = ((t->mb.mb_y << 4) + (vec1.y >> 2) + index[y][x][3] + i / 2 * 8) * t->edged_stride + (t->mb.mb_x << 4) + (vec1.x >> 2) + index[y][x][2] + i % 2 * 8; offset2 = ((t->mb.mb_y << 4) + (vec1.y >> 2) + index[y][x][5] + i / 2 * 8) * t->edged_stride + (t->mb.mb_x << 4) + (vec1.x >> 2) + index[y][x][4] + i % 2 * 8; t->pia[MB_8x8](t->ref[list_index][vec1.refno]->Y[index[y][x][0]] + offset1, t->ref[list_index][vec1.refno]->Y[index[y][x][1]] + offset2, t->edged_stride, t->edged_stride, pred_tmp, 16); } } if(pred_tmp != ref + i / 2 * 16 * 8 + i % 2 * 8) t->pia[MB_8x8](pred_tmp,ref + i / 2 * 16 * 8 + i % 2 * 8,16,16,ref + i / 2 * 16 * 8 + i % 2 * 8,16); break; default: assert(0); break; } } break; default: //only support MB16x16 B-frame assert(0); break; } } void T264dec_mb_decode_interb_y(T264_t* t) { T264dec_mb_decode_interb_mc(t, t->mb.pred_p16x16); T264dec_mb_decode_interp_transform(t, t->mb.pred_p16x16); } void T264dec_mb_decode_interb_uv(T264_t* t) { DECLARE_ALIGNED_MATRIX(pred_u, 8, 8, uint8_t, CACHE_SIZE); DECLARE_ALIGNED_MATRIX(pred_v, 8, 8, uint8_t, CACHE_SIZE); DECLARE_ALIGNED_MATRIX(pred_bi, 8, 8, uint8_t, CACHE_SIZE); T264_vector_t vec0,vec1; uint8_t* src, *dst; int32_t list_index,i; if(t->mb.is_copy) { T264_mb4x4_interb_uv_mc(t,t->mb.vec,pred_u,pred_v); }else switch (t->mb.mb_part) { case MB_16x16: vec0 = t->mb.vec[0][0]; vec1 = t->mb.vec[1][0]; dst = pred_u; if(vec0.refno > -1) { list_index = 0; src = t->ref[list_index][vec0.refno]->U + ((t->mb.mb_y << 3) + (vec0.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec0.x >> 3); t->eighth_pixel_mc_u(src, t->edged_stride_uv, pred_u, vec0.x, vec0.y, 8, 8); } if(vec1.refno > -1) { list_index = 1; if(vec0.refno > -1) dst = pred_bi; else dst = pred_u; src = t->ref[list_index][vec1.refno]->U + ((t->mb.mb_y << 3) + (vec1.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec1.x >> 3); t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec1.x, vec1.y, 8, 8); } if(dst != pred_u) { t->pia[MB_8x8](dst,pred_u,8,8,pred_u,8); } dst = pred_v; if(vec0.refno > -1) { list_index = 0; src = t->ref[list_index][vec0.refno]->V + ((t->mb.mb_y << 3) + (vec0.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec0.x >> 3); t->eighth_pixel_mc_u(src, t->edged_stride_uv, pred_v, vec0.x, vec0.y, 8, 8); } if(vec1.refno > -1) { list_index = 1; if(vec0.refno > -1) dst = pred_bi; else dst = pred_v; src = t->ref[list_index][vec1.refno]->V + ((t->mb.mb_y << 3) + (vec1.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec1.x >> 3); t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec1.x, vec1.y, 8, 8); } if(dst != pred_v) { t->pia[MB_8x8](dst,pred_v,8,8,pred_v,8); } break; case MB_16x8: vec0 = t->mb.vec[0][0]; vec1 = t->mb.vec[1][0]; dst = pred_u; if(vec0.refno > -1) { list_index = 0; src = t->ref[list_index][vec0.refno]->U + ((t->mb.mb_y << 3) + (vec0.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec0.x >> 3); t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec0.x, vec0.y, 8, 4); } if(vec1.refno > -1) { if(vec0.refno > -1) dst = pred_bi; else dst = pred_u; list_index = 1; src = t->ref[list_index][vec1.refno]->U + ((t->mb.mb_y << 3) + (vec1.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec1.x >> 3); t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec1.x, vec1.y, 8, 4); } if(dst != pred_u) { t->pia[MB_8x4](dst,pred_u,8,8,pred_u,8); } dst = pred_v; if(vec0.refno > -1) { list_index = 0; src = t->ref[list_index][vec0.refno]->V + ((t->mb.mb_y << 3) + (vec0.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec0.x >> 3); t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec0.x, vec0.y, 8, 4); } if(vec1.refno > -1) { if(vec0.refno > -1) dst = pred_bi; else dst = pred_v; list_index = 1; src = t->ref[list_index][vec1.refno]->V + ((t->mb.mb_y << 3) + (vec1.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec1.x >> 3); t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec1.x, vec1.y, 8, 4); } if(dst != pred_v) { t->pia[MB_8x4](dst,pred_v,8,8,pred_v,8); } //now for next MB16x8 vec0 = t->mb.vec[0][luma_index[8]]; vec1 = t->mb.vec[1][luma_index[8]]; dst = pred_u + 4 * 8; if(vec0.refno > -1) { list_index = 0; src = t->ref[list_index][vec0.refno]->U + ((t->mb.mb_y << 3) + (vec0.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec0.x >> 3) + 4 * t->edged_stride_uv; t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec0.x, vec0.y, 8, 4); } if(vec1.refno > -1) { if(vec0.refno > -1) dst = pred_bi + 4 * 8; else dst = pred_u + 4 * 8; list_index = 1; src = t->ref[list_index][vec1.refno]->U + ((t->mb.mb_y << 3) + (vec1.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec1.x >> 3) + 4 * t->edged_stride_uv; t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec1.x, vec1.y, 8, 4); } if(dst != pred_u + 4 * 8) { t->pia[MB_8x4](dst,pred_u + 4 * 8,8,8,pred_u + 4 * 8,8); } //for v dst = pred_v + 4 * 8; if(vec0.refno > -1) { list_index = 0; src = t->ref[list_index][vec0.refno]->V + ((t->mb.mb_y << 3) + (vec0.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec0.x >> 3) + 4 * t->edged_stride_uv; t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec0.x, vec0.y, 8, 4); } if(vec1.refno > -1) { if(vec0.refno > -1) dst = pred_bi + 4 * 8; else dst = pred_v + 4 * 8; list_index = 1; src = t->ref[list_index][vec1.refno]->V + ((t->mb.mb_y << 3) + (vec1.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec1.x >> 3) + 4 * t->edged_stride_uv; t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec1.x, vec1.y, 8, 4); } if(dst != pred_v + 4 * 8) { t->pia[MB_8x4](dst,pred_v + 4 * 8,8,8,pred_v + 4 * 8,8); } break; case MB_8x16: vec0 = t->mb.vec[0][0]; vec1 = t->mb.vec[1][0]; dst = pred_u; if(vec0.refno > -1) { list_index = 0; src = t->ref[list_index][vec0.refno]->U + ((t->mb.mb_y << 3) + (vec0.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec0.x >> 3); t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec0.x, vec0.y, 4, 8); } if(vec1.refno > -1) { if(vec0.refno > -1) dst = pred_bi; else dst = pred_u; list_index = 1; src = t->ref[list_index][vec1.refno]->U + ((t->mb.mb_y << 3) + (vec1.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec1.x >> 3); t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec1.x, vec1.y, 4, 8); } if(dst != pred_u) { t->pia[MB_4x8](dst,pred_u,8,8,pred_u,8); } dst = pred_v; if(vec0.refno > -1) { list_index = 0; src = t->ref[list_index][vec0.refno]->V + ((t->mb.mb_y << 3) + (vec0.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec0.x >> 3); //dst = pred_v; t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec0.x, vec0.y, 4, 8); } if(vec1.refno > -1) { if(vec0.refno > -1) dst = pred_bi; else dst = pred_v; list_index = 1; src = t->ref[list_index][vec1.refno]->V + ((t->mb.mb_y << 3) + (vec1.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec1.x >> 3); t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec1.x, vec1.y, 4, 8); } if(dst != pred_v) { t->pia[MB_4x8](dst,pred_v,8,8,pred_v,8); } //now for next MB8x16 vec0 = t->mb.vec[0][luma_index[4]]; vec1 = t->mb.vec[1][luma_index[4]]; dst = pred_u + 4; if(vec0.refno > -1) { list_index = 0; src = t->ref[list_index][vec0.refno]->U + ((t->mb.mb_y << 3) + (vec0.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec0.x >> 3) + 4; t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec0.x, vec0.y, 4, 8); } if(vec1.refno > -1) { if(vec0.refno > -1) dst = pred_bi + 4; else dst = pred_u + 4; list_index = 1; src = t->ref[list_index][vec1.refno]->U + ((t->mb.mb_y << 3) + (vec1.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec1.x >> 3) + 4; t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec1.x, vec1.y, 4, 8); } if(dst != pred_u + 4) { t->pia[MB_4x8](dst,pred_u + 4,8,8,pred_u + 4,8); } //for v dst = pred_v + 4; if(vec0.refno > -1) { list_index = 0; src = t->ref[list_index][vec0.refno]->V + ((t->mb.mb_y << 3) + (vec0.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec0.x >> 3) + 4; t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec0.x, vec0.y, 4, 8); } if(vec1.refno > -1) { if(vec0.refno > -1) dst = pred_bi + 4; else dst = pred_v + 4; list_index = 1; src = t->ref[list_index][vec1.refno]->V + ((t->mb.mb_y << 3) + (vec1.y >> 3)) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec1.x >> 3) + 4; t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec1.x, vec1.y, 4, 8); } if(dst != pred_v + 4) { t->pia[MB_4x8](dst,pred_v + 4,8,8,pred_v + 4,8); } break; case MB_8x8: for(i = 0 ; i < 4 ; i ++) { switch(t->mb.submb_part[luma_index[4 * i]]) { case MB_8x8: vec0 = t->mb.vec[0][luma_index[4 * i]]; vec1 = t->mb.vec[1][luma_index[4 * i]]; dst = pred_u + i / 2 * 32 + i % 2 * 4; if(vec0.refno > -1) { list_index = 0; src = t->ref[list_index][vec0.refno]->U + ((t->mb.mb_y << 3) + (vec0.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec0.x >> 3) + (i % 2 * 4); t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec0.x, vec0.y, 4, 4); } if(vec1.refno > -1) { if(vec0.refno > -1) dst = pred_bi + i / 2 * 32 + i % 2 * 4; list_index = 1; src = t->ref[list_index][vec1.refno]->U + ((t->mb.mb_y << 3) + (vec1.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec1.x >> 3) + (i % 2 * 4); t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec1.x, vec1.y, 4, 4); } if(dst != pred_u + i / 2 * 32 + i % 2 * 4) t->pia[MB_4x4](dst,pred_u + i / 2 * 32 + i % 2 * 4,8,8,pred_u + i / 2 * 32 + i % 2 * 4,8); dst = pred_v + i / 2 * 32 + i % 2 * 4; if(vec0.refno > -1) { list_index = 0; src = t->ref[list_index][vec0.refno]->V + ((t->mb.mb_y << 3) + (vec0.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec0.x >> 3) + (i % 2 * 4); t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec0.x, vec0.y, 4, 4); } if(vec1.refno > -1) { if(vec0.refno > -1) dst = pred_bi + i / 2 * 32 + i % 2 * 4; list_index = 1; src = t->ref[list_index][vec1.refno]->V + ((t->mb.mb_y << 3) + (vec1.y >> 3) + i / 2 * 4) * t->edged_stride_uv + (t->mb.mb_x << 3) + (vec1.x >> 3) + (i % 2 * 4); t->eighth_pixel_mc_u(src, t->edged_stride_uv, dst, vec1.x, vec1.y, 4, 4); } if(dst != pred_v + i / 2 * 32 + i % 2 * 4) t->pia[MB_4x4](dst,pred_v + i / 2 * 32 + i % 2 * 4,8,8,pred_v + i / 2 * 32 + i % 2 * 4,8); break; default: assert(0); break; } } default: break; } T264dec_mb_decode_uv(t, pred_u, pred_v); }