www.pudn.com > coremp4-1.0.zip > softidct.cpp
/*****************************************************************************
*
* This program is free software ; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* The Core Pocket Media Player
* Copyright (c) 2004-2005 Gabor Kovacs
*
****************************************************************************/
#include "Rules.h"
#include "Util.h"
#include "SoftIdct.h"
#ifdef ARM
#include "DynamicArmCode.h"
#endif
#define USE_WMMX //use Wireless MMX, if available by platform and is supported by hardware
//----------------------------
#define MV_X(v) ((v<<16)>>17)
#define MV_Y(v) (v>>17)
#define MV_SUB(v) (v&1)+((v>>15)&2)
#define UVX2 1
#define UVY2 1
#define EDGE 32
//----------------------------
void IDCT_Const8x8(int v, byte * Dst,int DstStride, const byte *Src);
//ARM-optimized functions:
#ifndef _ARM
void CopyBlock(const byte * Src, byte * Dst, int SrcPitch, int DstPitch);
void CopyBlockHor(const byte * Src, byte * Dst, int SrcPitch, int DstPitch);
void CopyBlockVer(const byte * Src, byte * Dst, int SrcPitch, int DstPitch);
void CopyBlockHorVer(const byte * Src, byte * Dst, int SrcPitch, int DstPitch);
void CopyBlockHorRound(const byte * Src, byte * Dst, int SrcPitch, int DstPitch);
void CopyBlockVerRound(const byte * Src, byte * Dst, int SrcPitch, int DstPitch);
void CopyBlockHorVerRound(const byte * Src, byte * Dst, int SrcPitch, int DstPitch);
void IDCT_Block4x8(short *Block, byte *Dest, int DestStride, const byte *Src);
void IDCT_Block8x8(short *Block, byte *Dest, int DestStride, const byte *Src);
// AddBlock DstPitch=8
void AddBlock(const byte * Src, byte * Dst, int SrcPitch);
void AddBlockHor(const byte * Src, byte * Dst, int SrcPitch);
void AddBlockVer(const byte * Src, byte * Dst, int SrcPitch);
void AddBlockHorVer(const byte * Src, byte * Dst, int SrcPitch);
// Src must be aligned
void CopyBlock8x8(const byte * Src, byte * Dst, int SrcPitch,int DstPitch);
void CopyBlock16x16(const byte * Src, byte * Dst, int SrcPitch,int DstPitch);
#endif
//----------------------------
void C_softidct::Drop(){
for(int i=0; i>= 1;
v &= 0x7fffffff;
}
curr_block_ptr += v;
if(inc_back)
curr_back_buf_ptr += v;
if(inc_forward)
curr_forward_buf_ptr += v;
}
//----------------------------
bool C_softidct::GetBuffer(const byte *&Y, const byte *&U, const byte *&V) const{
if(rendered_buffer_index < 0)
return false;
Y = buffers[rendered_buffer_index].ptr + EDGE + EDGE*buffer_sx;
U = buffers[rendered_buffer_index].ptr + Y_buffer_size + (EDGE >> UVX2) + (EDGE >> UVY2)*(buffer_sx >> UVX2);
V = U + (Y_buffer_size >> (UVX2+UVY2));
return true;
}
//----------------------------
void C_softidct::Inter8x8(short* Block, int Length){
if(!ptr_forward_buffer){
int MV = *mv_back++;
byte *ptr = curr_back_buf_ptr + MV_X(MV) + current_pitch * MV_Y(MV);
if(Length){
//mcomp and idct (using tmp buffer)
if(ptr >= ptr_backward_buffer && ptr < ptr_b_max)
CopyBlock[MV_SUB(MV)](ptr, temp_buffer_16, current_pitch, 8);
if(Length == 1){
int v = (Block[0]+4) >> 3;
if(v)
IDCT_Const8x8(v, curr_block_ptr, current_pitch, temp_buffer_16);
else
CopyBlock8x8(temp_buffer_16, curr_block_ptr, 8, current_pitch);
}else
if(Length < 15 || (Length<26 && ((dword*)Block)[2]==0 && ((dword*)Block)[6]==0)){
IDCT_Block4x8(Block, curr_block_ptr, current_pitch, temp_buffer_16);
}else{
IDCT_Block8x8(Block, curr_block_ptr, current_pitch, temp_buffer_16);
}
} else {
//only back mcomp
if(ptr >= ptr_backward_buffer && ptr < ptr_b_max)
CopyBlock[MV_SUB(MV)](ptr, curr_block_ptr, current_pitch, current_pitch);
}
IncPtr(true, false);
}else{
if(Length){
//mcomp and idct (using tmp buffer)
if(mv_back){
int MV = *mv_back++;
byte *ptr = curr_back_buf_ptr + MV_X(MV) + current_pitch * MV_Y(MV);
if(ptr >= ptr_backward_buffer && ptr < ptr_b_max)
CopyBlock[MV_SUB(MV)](ptr, temp_buffer_16, current_pitch, 8);
if(mv_forward){
MV = *mv_forward++;
ptr = curr_forward_buf_ptr + MV_X(MV) + current_pitch * MV_Y(MV);
if(ptr >= ptr_forward_buffer && ptr < ptr_f_max)
add_block[MV_SUB(MV)](ptr, temp_buffer_16, current_pitch);
}
}else
if(mv_forward){
int MV = *mv_forward++;
byte *ptr = curr_forward_buf_ptr + MV_X(MV) + current_pitch * MV_Y(MV);
if(ptr >= ptr_forward_buffer && ptr < ptr_f_max)
CopyBlock[MV_SUB(MV)](ptr,temp_buffer_16, current_pitch, 8);
}
if(Length == 1){
int v = (Block[0]+4) >> 3;
if(v)
IDCT_Const8x8(v, curr_block_ptr, current_pitch, temp_buffer_16);
else
CopyBlock8x8(temp_buffer_16, curr_block_ptr, 8, current_pitch);
}else
if(Length < 15){
IDCT_Block4x8(Block, curr_block_ptr, current_pitch, temp_buffer_16);
}else{
IDCT_Block8x8(Block, curr_block_ptr, current_pitch, temp_buffer_16);
}
}else{
//interpolate back and foward (using tmp buffer)
if(mv_back && mv_forward){
int MV = *mv_back++;
byte *ptr = curr_back_buf_ptr + MV_X(MV) + current_pitch * MV_Y(MV);
if(ptr >= ptr_backward_buffer && ptr < ptr_b_max)
CopyBlock[MV_SUB(MV)](ptr, temp_buffer_16, current_pitch, 8);
MV = *mv_forward++;
ptr = curr_forward_buf_ptr + MV_X(MV) + current_pitch * MV_Y(MV);
if(ptr >= ptr_forward_buffer && ptr < ptr_f_max){
add_block[MV_SUB(MV)](ptr, temp_buffer_16, current_pitch);
//copy temp_buffer_16 to Dst
all_copy_block[0][0](temp_buffer_16, curr_block_ptr, 8, current_pitch);
}
}else
if(mv_back){
int MV = *mv_back++;
byte *ptr = curr_back_buf_ptr + MV_X(MV) + current_pitch * MV_Y(MV);
if(ptr >= ptr_backward_buffer && ptr < ptr_b_max)
CopyBlock[MV_SUB(MV)](ptr, curr_block_ptr, current_pitch, current_pitch);
}else
if(mv_forward){
int MV = *mv_forward++;
byte *ptr = curr_forward_buf_ptr + MV_X(MV) + current_pitch * MV_Y(MV);
if(ptr >= ptr_forward_buffer && ptr < ptr_f_max)
CopyBlock[MV_SUB(MV)](ptr, curr_block_ptr, current_pitch, current_pitch);
}
}
IncPtr(true, true);
}
}
//----------------------------
void C_softidct::BeginBlock(int x, int y){
x = x*16 + EDGE;
y = (y*16 + EDGE) * buffer_sx;
current_pitch = buffer_sx;
curr_block_ptr = curr_frame_buffer_ptr + x + y;
curr_back_buf_ptr = ptr_backward_buffer + x + y;
curr_forward_buf_ptr = ptr_forward_buffer + x + y;
pass_ptr = pass_offset;
//Y[1;1] -> U
pass_offset[3] = (-y-x-8*buffer_sx-8 + Y_buffer_size + (x>>1) + (y>>2)) | 0x80000000;
}
//----------------------------
void C_softidct::Copy16x16(int x, int y, int Forward){
BeginBlock(x, y);
byte *&ref_ptr = Forward ? curr_forward_buf_ptr : curr_back_buf_ptr;
CopyBlock16x16(ref_ptr, curr_block_ptr, current_pitch, current_pitch);
{
//like 4x IncPtr
dword v = (pass_offset[0]+pass_offset[1]+pass_offset[2]+pass_offset[3]) & 0x7fffffff;
current_pitch >>= 1;
curr_block_ptr += v;
curr_back_buf_ptr += v;
curr_forward_buf_ptr += v;
pass_ptr = pass_offset+4;
}
//U
CopyBlock8x8(ref_ptr, curr_block_ptr, current_pitch, current_pitch);
IncPtr(true, true);
//V
CopyBlock8x8(ref_ptr, curr_block_ptr, current_pitch, current_pitch);
}
//----------------------------
static void FillEdge(byte *ptr, int sx, int sy, int EdgeX, int EdgeY){
int n;
byte *p;
int InnerWidth = sx - EdgeX*2;
int InnerHeight = sy - EdgeY*2;
//left and right
p = ptr + EdgeX + EdgeY * sx;
for(n=0;n>UVX2, buffer_sy>>UVY2, EDGE>>UVX2, EDGE>>UVY2);
//V
ptr += Y_buffer_size >> (UVX2+UVY2);
FillEdge(ptr, buffer_sx>>UVX2, buffer_sy>>UVY2, EDGE>>UVX2, EDGE>>UVY2);
}
//----------------------------
bool C_softidct::SetBufferCount(int n){
if(n<=bufer_count)
return true;
int i;
if(n>MAXBUF){
return false;
}
for(i=n; i n)
bufer_count = n;
for(i=bufer_count; i=2 && AvailMemory()<64*1024){
delete[] buffers[i].allocated;
buffer_size = NULL;
max_buffer_count = bufer_count;
break;
}
*/
buffers[i].ptr = (byte*)(((dword)buffers[i].allocated + Align-1) & ~(Align-1));
buffers[i].has_border = false;
buffers[i].frame_index = -1;
bufer_count = i+1;
#ifdef _DEBUG
if(i==2) //fill B-frame buffer to green, for easier detection of problems
MemSet(buffers[i].ptr, 0x0, buffer_size);
#endif
}
if(rendered_buffer_index >= bufer_count)
rendered_buffer_index = -1;
return (bufer_count == n);
}
//----------------------------
void C_softidct::FrameStart(int frame_index, int DstNo, int BackNo, int FwdNo, int ShowNo){
rendered_buffer_index = ShowNo;
last_frame_index = buffers[DstNo].frame_index;// = -1;
buffers[DstNo].frame_index = frame_index;
curr_frame_buffer_ptr = buffers[DstNo].ptr;
ptr_backward_buffer = NULL;
ptr_forward_buffer = NULL;
ptr_b_max = NULL;
ptr_f_max = NULL;
if(BackNo>=0){
ptr_backward_buffer = buffers[BackNo].ptr;
if(!buffers[BackNo].has_border){
buffers[BackNo].has_border = true;
FillEdgeYUV(ptr_backward_buffer);
}
ptr_b_max = ptr_backward_buffer + buffer_size -8-8*UV_buffer_pitch;
}
if(FwdNo>=0){
ptr_forward_buffer = buffers[FwdNo].ptr;
if(!buffers[FwdNo].has_border){
buffers[FwdNo].has_border = true;
FillEdgeYUV(ptr_forward_buffer);
}
ptr_f_max = ptr_forward_buffer + buffer_size -8-8*UV_buffer_pitch;
}
//invalidate border for dst
buffers[DstNo].has_border = false;
}
//----------------------------
C_softidct::C_softidct(dword sx, dword sy):
bufer_count(0),
buffer_size(0),
#ifdef ARM
dyn_code(NULL),
#endif
image_sx(sx),
image_sy(sy),
rendered_buffer_index(-1),
last_frame_index(-1)
{
MemSet(buffers, 0, sizeof(buffers));
IDCT_Const8x8 = ::IDCT_Const8x8;
#ifdef ARM
#ifdef USE_WMMX
dword cpu_caps = C_dyn_code::GetCpuCaps();
bool use_wmmx = (cpu_caps & C_dyn_code::CPU_WIRELESS_MMX);
#else
bool use_wmmx = false;
#endif
dyn_code = C_dyn_code::Create();
void BuildMotionCompensationFunctions(C_dyn_code&, bool use_wmmx);
BuildMotionCompensationFunctions(*dyn_code, use_wmmx);
void BuildIDCTFunctions(C_dyn_code&, bool use_wmmx);
BuildIDCTFunctions(*dyn_code, use_wmmx);
dyn_code->CodeBuild();
//if(((dword*)dyn_code->Code(0))[-1]!=0xe289409d) Fatal("!", 0);
int i;
for(i=0; i<4; i++)
add_block[i] = (t_AddBlock)dyn_code->Code(i);
all_copy_block[0][0] = all_copy_block[1][0] = (t_CopyBlock)dyn_code->Code(4);
for(i=0; i<3; i++){
all_copy_block[0][1+i] = (t_CopyBlock)dyn_code->Code(5+i*2);
all_copy_block[1][1+i] = (t_CopyBlock)dyn_code->Code(6+i*2);
}
CopyBlock8x8 = (t_CopyBlock)dyn_code->Code(11);
CopyBlock16x16 = (t_CopyBlock)dyn_code->Code(12);
IDCT_Block4x8 = (t_IDCT_Block)dyn_code->Code(13);
IDCT_Block8x8 = (t_IDCT_Block)dyn_code->Code(14);
if(use_wmmx)
IDCT_Const8x8 = (t_IDCT_Const8x8)dyn_code->Code(15);
#else
add_block[0] = AddBlock;
add_block[1] = AddBlockHor;
add_block[2] = AddBlockVer;
add_block[3] = AddBlockHorVer;
all_copy_block[0][0] = ::CopyBlock;
all_copy_block[1][0] = ::CopyBlock;
all_copy_block[0][1] = ::CopyBlockHor;
all_copy_block[1][1] = ::CopyBlockHorRound;
all_copy_block[0][2] = ::CopyBlockVer;
all_copy_block[1][2] = ::CopyBlockVerRound;
all_copy_block[0][3] = ::CopyBlockHorVer;
all_copy_block[1][3] = ::CopyBlockHorVerRound;
CopyBlock8x8 = ::CopyBlock8x8;
CopyBlock16x16 = ::CopyBlock16x16;
IDCT_Block4x8 = ::IDCT_Block4x8;
IDCT_Block8x8 = ::IDCT_Block8x8;
#endif
temp_buffer_16 = (byte*)(((dword)_temp_buffer + 15) & ~15);
//compute sizes of buffers
const int AlignX = (8 << UVX2) - 1;
const int AlignY = (8 << UVY2) - 1;
buffer_sx = ((image_sx+AlignX)&~AlignX)+2*EDGE;
buffer_sy = ((image_sy+AlignY)&~AlignY)+2*EDGE;
Y_buffer_size = buffer_sy * buffer_sx;
buffer_size = Y_buffer_size + 2*(Y_buffer_size >> (UVX2+UVY2));
UV_buffer_pitch = buffer_sx >> UVX2;
pass_offset[0] = 8; //Y[0;0] -> Y[0;1]
pass_offset[1] = 8*buffer_sx-8; //Y[0;1] -> Y[1;0]
pass_offset[2] = 8; //Y[1;0] -> Y[1;1]
pass_offset[4] = Y_buffer_size >> 2; //U->V
pass_offset[5] = 0;
SetRounding(false);
}
//----------------------------
C_softidct::~C_softidct(){
for(int i=0; i