www.pudn.com > t264-src-0.14.rar > utility.c
/***************************************************************************** * * T264 AVC CODEC * * Copyright(C) 2004-2005 llcc* 2004-2005 visionany * 2005.1.13 CloudWu modify PIA_u_wxh_c() function * This program is free software ; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation ; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY ; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program ; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * ****************************************************************************/ #define Q_BITS 15 #define DQ_BITS 6 #define DQ_ROUND (1<<(DQ_BITS-1)) #include "stdio.h" #include "portab.h" #ifndef CHIP_DM642 #include "malloc.h" #endif #include "utility.h" #ifndef CHIP_DM642 #include "memory.h" #endif // // from xvid // void* T264_malloc(int32_t size, int32_t alignment) { uint8_t *mem_ptr; if (!alignment) { /* We have not to satisfy any alignment */ if ((mem_ptr = (uint8_t *) malloc(size + 1)) != NULL) { /* Store (mem_ptr - "real allocated memory") in *(mem_ptr-1) */ *mem_ptr = (uint8_t)1; /* Return the mem_ptr pointer */ return ((void *)(mem_ptr+1)); } } else { uint8_t *tmp; /* Allocate the required size memory + alignment so we * can realign the data if necessary */ if ((tmp = (uint8_t *) malloc(size + alignment)) != NULL) { /* Align the tmp pointer */ mem_ptr = (uint8_t *) ((uint32_t) (tmp + alignment - 1) & (~(uint32_t) (alignment - 1))); /* Special case where malloc have already satisfied the alignment * We must add alignment to mem_ptr because we must store * (mem_ptr - tmp) in *(mem_ptr-1) * If we do not add alignment to mem_ptr then *(mem_ptr-1) points * to a forbidden memory space */ if (mem_ptr == tmp) mem_ptr += alignment; /* (mem_ptr - tmp) is stored in *(mem_ptr-1) so we are able to retrieve * the real malloc block allocated and free it in xvid_free */ *(mem_ptr - 1) = (uint8_t) (mem_ptr - tmp); /* Return the aligned pointer */ return ((void *)mem_ptr); } } return(NULL); } void T264_free(void* p) { uint8_t *ptr; if (p == NULL) return; /* Aligned pointer */ ptr = p; /* *(ptr - 1) holds the offset to the real allocated block * we sub that offset os we free the real pointer */ ptr -= *(ptr - 1); /* Free the memory */ free(ptr); } void expand8to16_c(uint8_t* src, int32_t src_stride, int32_t quarter_width, int32_t quarter_height, int16_t* dst) { int32_t i, j; for(i = 0 ; i < quarter_height * 4 ; i ++) { for(j = 0 ; j < quarter_width ; j ++) { dst[i * quarter_width * 4 + j * 4 + 0] = src[0 + j * 4]; dst[i * quarter_width * 4 + j * 4 + 1] = src[1 + j * 4]; dst[i * quarter_width * 4 + j * 4 + 2] = src[2 + j * 4]; dst[i * quarter_width * 4 + j * 4 + 3] = src[3 + j * 4]; } src += src_stride; } } void expand8to16sub_c(uint8_t* pred, int32_t quarter_width, int32_t quarter_height, int16_t* dst, uint8_t* src, int32_t src_stride) { int32_t i, j, k; uint8_t* start_p; uint8_t* start_s; for(i = 0 ; i < quarter_height ; i ++) { for(j = 0 ; j < quarter_width ; j ++) { start_p = pred + i * quarter_width * 4 * 4 + j * 4; start_s = src + i * src_stride * 4 + j * 4; for(k = 0 ; k < 4 ; k ++) { dst[0] = start_s[0] - start_p[0]; dst[1] = start_s[1] - start_p[1]; dst[2] = start_s[2] - start_p[2]; dst[3] = start_s[3] - start_p[3]; dst += 4; start_p += 4 * quarter_width; start_s += src_stride; } } } } void contract16to8_c(int16_t* src, int32_t quarter_width, int32_t quarter_height, uint8_t* dst, int32_t dst_stride) { int32_t i, j; for(i = 0 ; i < quarter_height * 4 ; i ++) { for(j = 0 ; j < quarter_width ; j ++) { int16_t tmp; tmp = src[i * quarter_width * 4 + j * 4 + 0]; dst[0 + j * 4] = CLIP1(tmp); tmp = src[i * quarter_width * 4 + j * 4 + 1]; dst[1 + j * 4] = CLIP1(tmp); tmp = src[i * quarter_width * 4 + j * 4 + 2]; dst[2 + j * 4] = CLIP1(tmp); tmp = src[i * quarter_width * 4 + j * 4 + 3]; dst[3 + j * 4] = CLIP1(tmp); } dst += dst_stride; } } void //assigned contract16to8add_c(int16_t* src, int32_t quarter_width, int32_t quarter_height, uint8_t* pred, uint8_t* dst, int32_t dst_stride) { int32_t i, j, k; uint8_t* start_p; uint8_t* start_d; for(i = 0 ; i < quarter_height ; i ++) { for(j = 0 ; j < quarter_width ; j ++) { start_p = pred + i * quarter_width * 4 * 4 + j * 4; start_d = dst + i * dst_stride * 4 + j * 4; for(k = 0 ; k < 4 ; k ++) { int16_t tmp; tmp = src[0] + start_p[0]; start_d[0] = CLIP1(tmp); tmp = src[1] + start_p[1]; start_d[1] = CLIP1(tmp); tmp = src[2] + start_p[2]; start_d[2] = CLIP1(tmp); tmp = src[3] + start_p[3]; start_d[3] = CLIP1(tmp); //tmp = (src[0] + (start_p[0] << DQ_BITS) + DQ_ROUND) >> DQ_BITS; //start_d[0] = CLIP1(tmp); //tmp = (src[1] + (start_p[1] << DQ_BITS) + DQ_ROUND) >> DQ_BITS; //start_d[1] = CLIP1(tmp); //tmp = (src[2] + (start_p[2] << DQ_BITS) + DQ_ROUND) >> DQ_BITS; //start_d[2] = CLIP1(tmp); //tmp = (src[3] + (start_p[3] << DQ_BITS) + DQ_ROUND) >> DQ_BITS; //start_d[3] = CLIP1(tmp); src += 4; start_p += 4 * quarter_width; start_d += dst_stride; } } } } void memcpy_stride_u_c(void* src, int32_t width, int32_t height, int32_t src_stride, void* dst, int32_t dst_stride) { int32_t i; uint8_t* s = src; uint8_t* d = dst; for(i = 0 ; i < height ; i ++) { memcpy(d, s, width); s += src_stride; d += dst_stride; } } static __inline uint32_t T264_sad_u_c(uint8_t* src, int32_t src_stride, uint8_t* data, int32_t width, int32_t height, int32_t dst_stride) { int32_t i, j; uint32_t sad; sad = 0; for(i = 0 ; i < height ; i ++) { for(j = 0 ; j < width ; j ++) { int32_t tmp = data[j] - src[j]; sad += ABS(tmp); } src += src_stride; data += dst_stride; } return sad; } //copied from JM,by cloud wu static __inline uint32_t _satd_4x4_dif_c(int16_t* diff) { int32_t k, satd = 0, m[16], dd, d[16]; /*===== hadamard transform =====*/ m[ 0] = diff[ 0] + diff[12]; m[ 4] = diff[ 4] + diff[ 8]; m[ 8] = diff[ 4] - diff[ 8]; m[12] = diff[ 0] - diff[12]; m[ 1] = diff[ 1] + diff[13]; m[ 5] = diff[ 5] + diff[ 9]; m[ 9] = diff[ 5] - diff[ 9]; m[13] = diff[ 1] - diff[13]; m[ 2] = diff[ 2] + diff[14]; m[ 6] = diff[ 6] + diff[10]; m[10] = diff[ 6] - diff[10]; m[14] = diff[ 2] - diff[14]; m[ 3] = diff[ 3] + diff[15]; m[ 7] = diff[ 7] + diff[11]; m[11] = diff[ 7] - diff[11]; m[15] = diff[ 3] - diff[15]; d[ 0] = m[ 0] + m[ 4]; d[ 8] = m[ 0] - m[ 4]; d[ 4] = m[ 8] + m[12]; d[12] = m[12] - m[ 8]; d[ 1] = m[ 1] + m[ 5]; d[ 9] = m[ 1] - m[ 5]; d[ 5] = m[ 9] + m[13]; d[13] = m[13] - m[ 9]; d[ 2] = m[ 2] + m[ 6]; d[10] = m[ 2] - m[ 6]; d[ 6] = m[10] + m[14]; d[14] = m[14] - m[10]; d[ 3] = m[ 3] + m[ 7]; d[11] = m[ 3] - m[ 7]; d[ 7] = m[11] + m[15]; d[15] = m[15] - m[11]; m[ 0] = d[ 0] + d[ 3]; m[ 1] = d[ 1] + d[ 2]; m[ 2] = d[ 1] - d[ 2]; m[ 3] = d[ 0] - d[ 3]; m[ 4] = d[ 4] + d[ 7]; m[ 5] = d[ 5] + d[ 6]; m[ 6] = d[ 5] - d[ 6]; m[ 7] = d[ 4] - d[ 7]; m[ 8] = d[ 8] + d[11]; m[ 9] = d[ 9] + d[10]; m[10] = d[ 9] - d[10]; m[11] = d[ 8] - d[11]; m[12] = d[12] + d[15]; m[13] = d[13] + d[14]; m[14] = d[13] - d[14]; m[15] = d[12] - d[15]; d[ 0] = m[ 0] + m[ 1]; d[ 1] = m[ 0] - m[ 1]; d[ 2] = m[ 2] + m[ 3]; d[ 3] = m[ 3] - m[ 2]; d[ 4] = m[ 4] + m[ 5]; d[ 5] = m[ 4] - m[ 5]; d[ 6] = m[ 6] + m[ 7]; d[ 7] = m[ 7] - m[ 6]; d[ 8] = m[ 8] + m[ 9]; d[ 9] = m[ 8] - m[ 9]; d[10] = m[10] + m[11]; d[11] = m[11] - m[10]; d[12] = m[12] + m[13]; d[13] = m[12] - m[13]; d[14] = m[14] + m[15]; d[15] = m[15] - m[14]; /*===== sum up =====*/ for (dd=d[k=0]; k<16; dd=d[++k]) { satd += (dd < 0 ? -dd : dd); } satd = ((satd+1)>>1); return satd; } static __inline uint32_t T264_satd_u_c(uint8_t* src, int32_t src_stride, uint8_t* data, int32_t width, int32_t height, int32_t dst_stride) { int32_t i, j, n, m; uint32_t sad; int16_t tmp[16]; sad = 0; for(i = 0 ; i < height ; i += 4) { for(j = 0 ; j < width ; j += 4) { uint8_t* tmp_s = src + i * src_stride + j; uint8_t* tmp_d = data+ i * dst_stride + j; for(n = 0 ; n < 4 ; n ++) { for(m = 0 ; m < 4 ; m ++) tmp[n * 4 + m] = tmp_d[m] - tmp_s[m]; tmp_d += dst_stride; tmp_s += src_stride; } sad += _satd_4x4_dif_c(tmp); } } return sad; } #define SADFUNC(w, h, base) \ uint32_t \ T264_##base##_u_##w##x##h##_c(uint8_t* src, int32_t src_stride, uint8_t* data, int32_t dst_stride) \ { \ return T264_##base##_u_c(src, src_stride, data, w, h, dst_stride); \ } SADFUNC(16, 16, sad) SADFUNC(16, 8, sad) SADFUNC(8, 16, sad) SADFUNC(8, 8, sad) SADFUNC(8, 4, sad) SADFUNC(4, 8, sad) SADFUNC(4, 4, sad) SADFUNC(16, 16, satd) SADFUNC(16, 8, satd) SADFUNC(8, 16, satd) SADFUNC(8, 8, satd) SADFUNC(8, 4, satd) SADFUNC(4, 8, satd) SADFUNC(4, 4, satd) /********************************************************************************** * * Based on the FUNCTION T264_satd_u_c() * use SATD for 16x16 Intra * Thomascatlee@163.com * *********************************************************************************/ uint32_t T264_satd_i16x16_u_c(uint8_t* src, int32_t src_stride, uint8_t* data, int32_t dst_stride) { return T264_satd_u_c(src,src_stride, data, 16,16,dst_stride); /* int32_t i, j, n, m, k; uint32_t sad; int16_t tmp[16]; int16_t s_dc[16]; int16_t s[4]; sad = 0; k = 0; for(i = 0 ; i < 16 ; i += 4) { for(j = 0 ; j < 16 ; j += 4) { uint8_t* tmp_s = src + i * src_stride + j; uint8_t* tmp_d = data+ i * dst_stride + j; for(n = 0 ; n < 4 ; n ++) { for(m = 0 ; m < 4 ; m ++) tmp[n * 4 + m] = tmp_d[m] - tmp_s[m]; tmp_d += dst_stride; tmp_s += src_stride; } for(n = 0 ; n < 4 ; n ++) { s[0] = tmp[0 * 4 + n] + tmp[3 * 4 + n]; s[3] = tmp[0 * 4 + n] - tmp[3 * 4 + n]; s[1] = tmp[1 * 4 + n] + tmp[2 * 4 + n]; s[2] = tmp[1 * 4 + n] - tmp[2 * 4 + n]; tmp[0 * 4 + n] = s[0] + s[1]; tmp[2 * 4 + n] = s[0] - s[1]; tmp[1 * 4 + n] = s[3] + s[2]; tmp[3 * 4 + n] = s[3] - s[2]; } // Add for get DC coeff n = 0; s[0] = tmp[n * 4 + 0] + tmp[n * 4 + 3]; s[3] = tmp[n * 4 + 0] - tmp[n * 4 + 3]; s[1] = tmp[n * 4 + 1] + tmp[n * 4 + 2]; s[2] = tmp[n * 4 + 1] - tmp[n * 4 + 2]; s_dc[k] = ((s[0] + s[1]) >> 2); sad += ABS(s[0] - s[1]); sad += ABS(s[3] + s[2]); sad += ABS(s[3] - s[2]); k++; for(n = 1 ; n < 4 ; n ++) { s[0] = tmp[n * 4 + 0] + tmp[n * 4 + 3]; s[3] = tmp[n * 4 + 0] - tmp[n * 4 + 3]; s[1] = tmp[n * 4 + 1] + tmp[n * 4 + 2]; s[2] = tmp[n * 4 + 1] - tmp[n * 4 + 2]; sad += ABS(s[0] + s[1]); sad += ABS(s[0] - s[1]); sad += ABS(s[3] + s[2]); sad += ABS(s[3] - s[2]); } } } // Hadamard of DC coeff for(n = 0 ; n < 4 ; n ++) { s[0] = s_dc[0 * 4 + n] + s_dc[3 * 4 + n]; s[3] = s_dc[0 * 4 + n] - s_dc[3 * 4 + n]; s[1] = s_dc[1 * 4 + n] + s_dc[2 * 4 + n]; s[2] = s_dc[1 * 4 + n] - s_dc[2 * 4 + n]; tmp[0 * 4 + n] = s[0] + s[1]; tmp[2 * 4 + n] = s[0] - s[1]; tmp[1 * 4 + n] = s[3] + s[2]; tmp[3 * 4 + n] = s[3] - s[2]; } for(n = 0 ; n < 4 ; n ++) { s[0] = tmp[n * 4 + 0] + tmp[n * 4 + 3]; s[3] = tmp[n * 4 + 0] - tmp[n * 4 + 3]; s[1] = tmp[n * 4 + 1] + tmp[n * 4 + 2]; s[2] = tmp[n * 4 + 1] - tmp[n * 4 + 2]; sad += ABS(s[0] + s[1]); sad += ABS(s[0] - s[1]); sad += ABS(s[3] + s[2]); sad += ABS(s[3] - s[2]); } return sad >> 1; */ } //calculate non-zero counts for an array v[i_count] int32_t array_non_zero_count(int16_t *v, int32_t i_count) { int32_t i; int32_t i_nz; for( i = 0, i_nz = 0; i < i_count; i++ ) { if( v[i] ) { i_nz++; } } return i_nz; }