www.pudn.com > src.rar > Copy.cpp
#include "stdafx.h" #include#include "DirectVobSubFilter.h" #include "misc.h" extern int c2y_yb[256]; extern int c2y_yg[256]; extern int c2y_yr[256]; static const __int64 _8181 = 0x0080001000800010i64; void MixLine(uint* d, uint* s, uchar* sub, int pitch, const GUID& subtype) { if(subtype == MEDIASUBTYPE_YUY2) { uint* dstend = d + (pitch>>2); if(s != NULL) { for(; d < dstend; sub+=8, s++, d++) { int a3 = (sub[3]+sub[7])>>1; if(a3 < 0xff) { // rgb2yuv(sub[2], sub[1], sub[0], sub[6], sub[5], sub[4]); /* dy1 = (((((int)(*s)&0xff)-0x10)*sub[3])>>8) + sub[1]; // + y1; dy2 = (((((int)(*s>>16)&0xff)-0x10)*sub[7])>>8) + sub[5]; // + y2; du = (((((int)(*s>>8)&0xff)-0x80)*a3)>>8) + sub[0]; // + u; dv = (((((int)(*s>>24)&0xff)-0x80)*a3)>>8) + sub[4]; // + v; *d = (dv<<24)|(dy2<<16)|(du<<8)|dy1; */ uint ia = (a3<<24)|(sub[7]<<16)|(a3<<8)|sub[3]; uint c = (sub[4]<<24)|(sub[5]<<16)|(sub[0]<<8)|sub[1]; // (v<<24)|(y2<<16)|(u<<8)|y1; __asm { mov esi, s mov edi, d pxor mm0, mm0 movq mm1, _8181 movd mm2, c punpcklbw mm2, mm0 movd mm3, [esi] punpcklbw mm3, mm0 movd mm4, ia punpcklbw mm4, mm0 psrlw mm4, 1 psubsw mm3, mm1 pmullw mm3, mm4 psraw mm3, 7 paddsw mm3, mm2 packuswb mm3, mm3 movd [edi], mm3 }; } else { *d = *s; } } } else { for(; d < dstend; sub+=8, d++) { if((sub[3]+sub[7]) < (0xff<<1)) { // rgb2yuv(sub[2], sub[1], sub[0], sub[6], sub[5], sub[4]); // *d = (v<<24)|(y2<<16)|(u<<8)|y1; *d = (sub[4]<<24)|(sub[5]<<16)|(sub[0]<<8)|sub[1]; } else { *d = 0x80108010; } } } } else if(subtype == MEDIASUBTYPE_RGB555) { ushort* ss = (ushort*)s; ushort* ds = (ushort*)d; ushort* dstend = (ushort*)(d + (pitch>>2)); if(ss != NULL) { for(; ds < dstend; sub+=4, ss++, ds++) { if(sub[3] < 0xff) { *ds = (((((*ss&0x7c00)*sub[3])>>8) + ((*((uint*)sub)>>9)&0x7c00))&0x7c00) | (((((*ss&0x03e0)*sub[3])>>8) + ((*((uint*)sub)>>6)&0x03e0))&0x03e0) | (((((*ss&0x001f)*sub[3])>>8) + ((*((uint*)sub)>>3)&0x001f))&0x001f); } else { *ds = *ss; } } } else { for(; ds < dstend; sub+=4, ds++) { if(sub[3] < 0xff) { *ds = ((*((uint*)sub)>>9)&0x7c00)|((*((uint*)sub)>>6)&0x03e0)|((*((uint*)sub)>>3)&0x001f); } else { *ds = 0; } } } } else if(subtype == MEDIASUBTYPE_RGB565) { ushort* ss = (ushort*)s; ushort* ds = (ushort*)d; ushort* dstend = (ushort*)(d + (pitch>>2)); if(ss != NULL) { for(; ds < dstend; sub+=4, ss++, ds++) { if(sub[3] < 0xff) { *ds = (((((*ss&0xf800)*sub[3])>>8) + ((*((uint*)sub)>>8)&0xf800))&0xf800) | (((((*ss&0x07e0)*sub[3])>>8) + ((*((uint*)sub)>>5)&0x07e0))&0x07e0) | (((((*ss&0x001f)*sub[3])>>8) + ((*((uint*)sub)>>3)&0x001f))&0x001f); } else { *ds = *ss; } } } else { for(; ds < dstend; sub+=4, ds++) { if(sub[3] < 0xff) { *ds = ((*((uint*)sub)>>8)&0xf800)|((*((uint*)sub)>>5)&0x07e0)|((*((uint*)sub)>>3)&0x001f); } else { *ds = 0; } } } } else if(subtype == MEDIASUBTYPE_RGB24) { uchar* st = (uchar*)s; uchar* dt = (uchar*)d; uchar* dstend = dt + pitch; if(s != NULL) { for(; dt < dstend; sub+=4, st+=3, dt+=3) { if(sub[3] < 0xff) { dt[0] = ((st[0]*sub[3])>>8) + sub[0]; dt[1] = ((st[1]*sub[3])>>8) + sub[1]; dt[2] = ((st[2]*sub[3])>>8) + sub[2]; } else { dt[0] = st[0]; dt[1] = st[1]; dt[2] = st[2]; } } } else { for(; dt < dstend; sub+=4, dt+=3) { if(sub[3] < 0xff) { dt[0] = sub[0]; dt[1] = sub[1]; dt[2] = sub[2]; } else { dt[0] = dt[1] = dt[2] = 0; } } } } else if(subtype == MEDIASUBTYPE_RGB32 || subtype == MEDIASUBTYPE_ARGB32) { uint* dstend = d + (pitch>>2); if(s != NULL) { for(; d < dstend; sub+=4, s++, d++) { if(sub[3] < 0xff) { *d = (((((*s&0x00ff00ff)*sub[3])>>8) + (*((uint*)sub)&0x00ff00ff))&0x00ff00ff) | (((((*s&0x0000ff00)*sub[3])>>8) + (*((uint*)sub)&0x0000ff00))&0x0000ff00); } else { *d = *s; } } } else { for(; d < dstend; sub+=4, d++) { *d = (sub[3] < 0xff) ? (*((uint*)sub)&0xffffff) : 0; } } } __asm emms; } void MixLineYV12(uint* d, uint* s, uchar* sub, int pitch, int plane, int subPitch) { if(plane == 0) // y { BYTE* sb = (BYTE*)s; BYTE* db = (BYTE*)d; BYTE* dbtend = db + pitch; if(s != NULL) { for(; db < dbtend; sub+=4, sb++, db++) { if(sub[3] < 0xff) { *db = (((*sb-0x10)*sub[3])>>8) + sub[1]; } else { *db = *sb; } } } else { for(; db < dbtend; sub+=4, db++) { if(sub[3] < 0xff) { *db = sub[1]; } else { *db = 0x10; } } } } else if(plane == 1 || plane == 2) // u, v { BYTE* sb = (BYTE*)s; BYTE* db = (BYTE*)d; BYTE* dbtend = db + pitch; if(plane == 1) sub += 4; if(s != NULL) { for(; db < dbtend; sub+=8, sb++, db++) { int ia = (sub[3]+sub[3+subPitch])>>1; if(ia < 0xff) { *db = (((*sb-0x80)*ia)>>8) + ((sub[0]+sub[subPitch])>>1); } else { *db = *sb; } } } else { for(; db < dbtend; sub+=8, db++) { int ia = (sub[3]+sub[3+subPitch])>>1; if(ia < 0xff) { *db = (sub[0]+sub[subPitch])>>1; } else { *db = 0x80; } } } } } void BltLineRGB32(uint* d, uchar* sub, int w, const GUID& subtype) { if(subtype == MEDIASUBTYPE_YV12) { BYTE* db = (BYTE*)d; BYTE* dbtend = db + w; for(; db < dbtend; sub+=4, db++) { if(sub[3] < 0xff) { int y = (c2y_yb[sub[0]] + c2y_yg[sub[1]] + c2y_yr[sub[2]] + 0x108000) >> 16; *db = y; // w/o colors } } } else if(subtype == MEDIASUBTYPE_YUY2) { ushort* ds = (ushort*)d; ushort* dstend = ds + w; for(; ds < dstend; sub+=4, ds++) { if(sub[3] < 0xff) { int y = (c2y_yb[sub[0]] + c2y_yg[sub[1]] + c2y_yr[sub[2]] + 0x108000) >> 16; *ds = 0x8000|y; // w/o colors } } } else if(subtype == MEDIASUBTYPE_RGB555) { ushort* ds = (ushort*)d; ushort* dstend = ds + w; for(; ds < dstend; sub+=4, ds++) { if(sub[3] < 0xff) { *ds = ((*((uint*)sub)>>9)&0x7c00)|((*((uint*)sub)>>6)&0x03e0)|((*((uint*)sub)>>3)&0x001f); } } } else if(subtype == MEDIASUBTYPE_RGB565) { ushort* ds = (ushort*)d; ushort* dstend = ds + w; for(; ds < dstend; sub+=4, ds++) { if(sub[3] < 0xff) { *ds = ((*((uint*)sub)>>8)&0xf800)|((*((uint*)sub)>>5)&0x07e0)|((*((uint*)sub)>>3)&0x001f); } } } else if(subtype == MEDIASUBTYPE_RGB24) { uchar* dt = (uchar*)d; uchar* dstend = dt + w*3; for(; dt < dstend; sub+=4, dt+=3) { if(sub[3] < 0xff) { dt[0] = sub[0]; dt[1] = sub[1]; dt[2] = sub[2]; } } } else if(subtype == MEDIASUBTYPE_RGB32 || subtype == MEDIASUBTYPE_ARGB32) { uint* dstend = d + w; for(; d < dstend; sub+=4, d++) { if(sub[3] < 0xff) *d = *((uint*)sub)&0xffffff; } } } void AvgLines8(BYTE* ptr, int height, int pitch) { if(height <= 1) return; BYTE* s = ptr; BYTE* d = ptr + (height-2)*pitch; for(; s < d; s += pitch*2) { BYTE* tmp = s; __asm { mov esi, tmp mov ebx, pitch mov ecx, ebx shr ecx, 3 pxor mm7, mm7 AvgLines8_loop: movq mm0, [esi] movq mm1, mm0 punpcklbw mm0, mm7 punpckhbw mm1, mm7 movq mm2, [esi+ebx*2] movq mm3, mm2 punpcklbw mm2, mm7 punpckhbw mm3, mm7 paddw mm0, mm2 psrlw mm0, 1 paddw mm1, mm3 psrlw mm1, 1 packuswb mm0, mm1 movq [esi+ebx], mm0 lea esi, [esi+8] loop AvgLines8_loop mov tmp, esi } for(int i = pitch&7; i--; tmp++) { tmp[pitch] = (tmp[0] + tmp[pitch<<1]) >> 1; } } if(!(height&1) && height >= 2) { ptr += (height-2)*pitch; memcpy(ptr + pitch, ptr, pitch); } __asm emms; } void AvgLines555(BYTE* ptr, int height, int pitch) { if(height <= 1) return; unsigned __int64 __0x7c007c007c007c00 = 0x7c007c007c007c00; unsigned __int64 __0x03e003e003e003e0 = 0x03e003e003e003e0; unsigned __int64 __0x001f001f001f001f = 0x001f001f001f001f; BYTE* s = ptr; BYTE* d = ptr + (height-2)*pitch; for(; s < d; s += pitch*2) { BYTE* tmp = s; __asm { mov esi, tmp mov ebx, pitch mov ecx, ebx shr ecx, 3 movq mm6, __0x03e003e003e003e0 movq mm7, __0x001f001f001f001f AvgLines555_loop: movq mm0, [esi] movq mm1, mm0 movq mm2, mm0 psrlw mm0, 10 // red1 bits: mm0 = 001f001f001f001f pand mm1, mm6 // green1 bits: mm1 = 03e003e003e003e0 pand mm2, mm7 // blue1 bits: mm2 = 001f001f001f001f movq mm3, [esi+ebx*2] movq mm4, mm3 movq mm5, mm3 psrlw mm3, 10 // red2 bits: mm3 = 001f001f001f001f pand mm4, mm6 // green2 bits: mm4 = 03e003e003e003e0 pand mm5, mm7 // blue2 bits: mm5 = 001f001f001f001f paddw mm0, mm3 psrlw mm0, 1 // (red1+red2)/2 psllw mm0, 10 // red bits at 7c007c007c007c00 paddw mm1, mm4 psrlw mm1, 1 // (green1+green2)/2 pand mm1, mm6 // green bits at 03e003e003e003e0 paddw mm2, mm5 psrlw mm2, 1 // (blue1+blue2)/2 // blue bits at 001f001f001f001f (no need to pand, lower bits were discareded) por mm0, mm1 por mm0, mm2 movq [esi+ebx], mm0 lea esi, [esi+8] loop AvgLines555_loop mov tmp, esi } for(int i = (pitch&7)>>1; i--; tmp++) { tmp[pitch] = ((((*tmp&0x7c00) + (tmp[pitch<<1]&0x7c00)) >> 1)&0x7c00)| ((((*tmp&0x03e0) + (tmp[pitch<<1]&0x03e0)) >> 1)&0x03e0)| ((((*tmp&0x001f) + (tmp[pitch<<1]&0x001f)) >> 1)&0x001f); } } if(!(height&1) && height >= 2) { ptr += (height-2)*pitch; memcpy(ptr + pitch, ptr, pitch); } __asm emms; } void AvgLines565(BYTE* ptr, int height, int pitch) { if(height <= 1) return; unsigned __int64 __0xf800f800f800f800 = 0xf800f800f800f800; unsigned __int64 __0x07e007e007e007e0 = 0x07e007e007e007e0; unsigned __int64 __0x001f001f001f001f = 0x001f001f001f001f; BYTE* s = ptr; BYTE* d = ptr + (height-2)*pitch; for(; s < d; s += pitch*2) { ushort* tmp = (ushort*)s; __asm { mov esi, tmp mov ebx, pitch mov ecx, ebx shr ecx, 3 movq mm6, __0x07e007e007e007e0 movq mm7, __0x001f001f001f001f AvgLines565_loop: movq mm0, [esi] movq mm1, mm0 movq mm2, mm0 psrlw mm0, 11 // red1 bits: mm0 = 001f001f001f001f pand mm1, mm6 // green1 bits: mm1 = 07e007e007e007e0 pand mm2, mm7 // blue1 bits: mm2 = 001f001f001f001f movq mm3, [esi+ebx*2] movq mm4, mm3 movq mm5, mm3 psrlw mm3, 11 // red2 bits: mm3 = 001f001f001f001f pand mm4, mm6 // green2 bits: mm4 = 07e007e007e007e0 pand mm5, mm7 // blue2 bits: mm5 = 001f001f001f001f paddw mm0, mm3 psrlw mm0, 1 // (red1+red2)/2 psllw mm0, 11 // red bits at f800f800f800f800 paddw mm1, mm4 psrlw mm1, 1 // (green1+green2)/2 pand mm1, mm6 // green bits at 03e003e003e003e0 paddw mm2, mm5 psrlw mm2, 1 // (blue1+blue2)/2 // blue bits at 001f001f001f001f (no need to pand, lower bits were discareded) por mm0, mm1 por mm0, mm2 movq [esi+ebx], mm0 lea esi, [esi+8] loop AvgLines565_loop mov tmp, esi } for(int i = (pitch&7)>>1; i--; tmp++) { tmp[pitch] = ((((*tmp&0xf800) + (tmp[pitch<<1]&0xf800)) >> 1)&0xf800)| ((((*tmp&0x07e0) + (tmp[pitch<<1]&0x07e0)) >> 1)&0x07e0)| ((((*tmp&0x001f) + (tmp[pitch<<1]&0x001f)) >> 1)&0x001f); } } if(!(height&1) && height >= 2) { ptr += (height-2)*pitch; memcpy(ptr + pitch, ptr, pitch); } __asm emms; } /* ResX2 */ void CDirectVobSubFilter::Scale2x(BYTE* d, BYTE* s) { if(m_bihIn.biCompression == mmioFOURCC('Y', 'V', '1', '2') && m_bihOut.biCompression == mmioFOURCC('Y', 'V', '1', '2')) { int ww = m_bihIn.biWidth; int hh = m_bihIn.biHeight; BYTE* ss = s; BYTE* dd = d; for(int plane = 0; plane < 3; plane++) { int w = ww; int h = hh; int pitch = w; BYTE* s1; BYTE* s2; BYTE* d1; for(s1 = ss, s2 = ss + (h*pitch), d1 = dd; s1 < s2; d1 += pitch*2) // TODO: replace this mess with mmx code { BYTE* tmp = s1 + pitch; for(BYTE* s3 = s1 + pitch - 1; s1 < s3; s1 += 1, d1 += 2) { d1[0] = s1[0]; d1[1] = (s1[0]+s1[1])>>1; } d1[0] = d1[1] = s1[0]; s1 += 1; d1 += 2; s1 = tmp; } w <<= 1; h <<= 1; pitch = w; AvgLines8(dd, h, pitch); if(plane == 0) { ww >>= 1; hh >>= 1; int size = m_bihIn.biWidth*m_bihIn.biHeight; ss = s + size; size <<= 2; dd = d + size; } else if(plane == 1) { int size = m_bihIn.biWidth*m_bihIn.biHeight; ss = s + size + (size>>2); size <<= 2; dd = d + size + (size>>2); } } } if(m_bihIn.biCompression == mmioFOURCC('Y', 'U', 'Y', '2') && m_bihOut.biCompression == mmioFOURCC('Y', 'U', 'Y', '2')) { unsigned __int64 __0xffffffff00000000 = 0xffffffff00000000; unsigned __int64 __0x00000000ffffffff = 0x00000000ffffffff; unsigned __int64 __0x00ff00ff00ff00ff = 0x00ff00ff00ff00ff; int w = m_bihIn.biWidth; int h = m_bihIn.biHeight; int pitch = w*2; BYTE* s1; BYTE* s2; BYTE* d1; for(s1 = s, s2 = s + (h*pitch), d1 = d; s1 < s2; d1 += pitch*2) { BYTE* tmp = s1 + pitch; // row0, 4 pixels: y1|u1|y2|v1|y3|u2|y4|v2 // -> // row0, 8 pixels: y1|u1|(y1+y2)/2|v1|y2|(u1+u2)/2|(y2+y3)/2|(v1+v2)/2 __asm { mov esi, s1 mov edi, d1 mov ecx, pitch shr ecx, 2 dec ecx movq mm4, __0x00ff00ff00ff00ff movq mm5, __0x00000000ffffffff movq mm6, __0xffffffff00000000 row_loop1: movq mm0, [esi] movq mm2, mm0 pand mm0, mm4 // mm0 = 00y400y300y200y1 psrlw mm2, 8 // mm2 = 00u200v200u100v1 movq mm1, mm0 pand mm0, mm5 // mm0 = 0000000000y200y1 psllq mm1, 16 pand mm1, mm6 // mm1 = 00y300y200000000 por mm1, mm0 // mm1 = 00y300y200y200y1 punpcklwd mm0, mm0 // mm0 = 00y200y200y100y1 paddw mm0, mm1 psrlw mm0, 1 // mm0 = (mm0 + mm1) / 2 movq mm1, mm2 punpckldq mm1, mm1 // mm1 = 00u100v100u100v1 paddw mm1, mm2 psrlw mm1, 1 // mm1 = (mm1 + mm2) / 2 psllw mm1, 8 por mm0, mm1 // mm0 = (v1+v2)/2|(y2+y3)/2|(u1+u2)/2|y2|v1|(y1+y2)/2|u1|y1 movq [edi], mm0 lea esi, [esi+4] lea edi, [edi+8] loop row_loop1 mov s1, esi mov d1, edi }; *d1++ = s1[0]; *d1++ = s1[1]; *d1++ =(s1[0]+s1[2])>>1; *d1++ = s1[3]; *d1++ = s1[2]; *d1++ = s1[1]; *d1++ = s1[2]; *d1++ = s1[3]; s1 += 4; s1 = tmp; } w <<= 1; h <<= 1; pitch = w*2; AvgLines8(d, h, pitch); } else if(m_bihIn.biCompression <= 3 && m_bihOut.biCompression <= 3 && m_bihIn.biBitCount == 16 && m_bihOut.biBitCount == 16 && m_pOutput->CurrentMediaType().subtype == MEDIASUBTYPE_RGB555) { int w = m_bihIn.biWidth; int h = m_bihIn.biHeight; int pitch = w*2; BYTE* s1; BYTE* s2; BYTE* d1; for(s1 = s, s2 = s + (h*pitch), d1 = d; s1 < s2; d1 += pitch*2) // TODO: replace this mess with mmx code { BYTE* tmp = s1 + pitch; for(BYTE* s3 = s1 + pitch - 2; s1 < s3; s1 += 2, d1 += 4) { *((ushort*)d1) = *((ushort*)s1); *((ushort*)d1+1) = ((((*((ushort*)s1)&0x7c00) + (*((ushort*)s1+1)&0x7c00)) >> 1)&0x7c00)| ((((*((ushort*)s1)&0x03e0) + (*((ushort*)s1+1)&0x03e0)) >> 1)&0x03e0)| ((((*((ushort*)s1)&0x001f) + (*((ushort*)s1+1)&0x001f)) >> 1)&0x001f); } *((ushort*)d1) = *((ushort*)s1); *((ushort*)d1+1) = *((ushort*)s1); s1 += 2; d1 += 4; s1 = tmp; } w <<= 1; h <<= 1; pitch = w*2; AvgLines555(d, h, pitch); } else if(m_bihIn.biCompression <= 3 && m_bihOut.biCompression <= 3 && m_bihIn.biBitCount == 16 && m_bihOut.biBitCount == 16 && m_pOutput->CurrentMediaType().subtype == MEDIASUBTYPE_RGB565) { int w = m_bihIn.biWidth; int h = m_bihIn.biHeight; int pitch = w*2; BYTE* s1; BYTE* s2; BYTE* d1; for(s1 = s, s2 = s + (h*pitch), d1 = d; s1 < s2; d1 += pitch*2) // TODO: replace this mess with mmx code { BYTE* tmp = s1 + pitch; for(BYTE* s3 = s1 + pitch - 2; s1 < s3; s1 += 2, d1 += 4) { *((ushort*)d1) = *((ushort*)s1); *((ushort*)d1+1) = ((((*((ushort*)s1)&0xf800) + (*((ushort*)s1+1)&0xf800)) >> 1)&0xf800)| ((((*((ushort*)s1)&0x07e0) + (*((ushort*)s1+1)&0x07e0)) >> 1)&0x07e0)| ((((*((ushort*)s1)&0x001f) + (*((ushort*)s1+1)&0x001f)) >> 1)&0x001f); } *((ushort*)d1) = *((ushort*)s1); *((ushort*)d1+1) = *((ushort*)s1); s1 += 2; d1 += 4; s1 = tmp; } w <<= 1; h <<= 1; pitch = w*2; AvgLines565(d, h, pitch); } else if(m_bihIn.biCompression <= 3 && m_bihOut.biCompression <= 3 && m_bihIn.biBitCount == 24 && m_bihOut.biBitCount == 24) { int w = m_bihIn.biWidth; int h = m_bihIn.biHeight; int pitch = w*3; //(w*3+3)&~3; BYTE* s1; BYTE* s2; BYTE* d1; for(s1 = s, s2 = s + (h*pitch), d1 = d; s1 < s2; d1 += pitch*2) // TODO: replace this mess with mmx code { BYTE* tmp = s1 + pitch; for(BYTE* s3 = s1 + pitch - 3; s1 < s3; s1 += 3, d1 += 6) { d1[0] = s1[0]; d1[1] = s1[1]; d1[2] = s1[2]; d1[3] = (s1[0]+s1[3])>>1; d1[4] = (s1[1]+s1[4])>>1; d1[5] = (s1[2]+s1[5])>>1; } d1[0] = d1[3] = s1[0]; d1[1] = d1[4] = s1[1]; d1[2] = d1[5] = s1[2]; s1 += 3; d1 += 6; s1 = tmp; } w <<= 1; h <<= 1; pitch = w*3; //(w*3+3)&~3; AvgLines8(d, h, pitch); } else if(m_bihIn.biCompression <= 3 && m_bihOut.biCompression <= 3 && m_bihIn.biBitCount == 32 && m_bihOut.biBitCount == 32) { /* { DDSURFACEDESC2 ddsd2; CComPtr pDDS1, pDDS2; // Initialize the surface description. ZeroMemory(&ddsd2, sizeof(DDSURFACEDESC2)); ZeroMemory(&ddsd2.ddpfPixelFormat, sizeof(DDPIXELFORMAT)); ddsd2.dwSize = sizeof(ddsd2); ddsd2.dwFlags = DDSD_WIDTH | DDSD_HEIGHT | DDSD_LPSURFACE | DDSD_PITCH | DDSD_PIXELFORMAT | DDSD_CAPS; ddsd2.ddsCaps.dwCaps = DDSCAPS_OFFSCREENPLAIN | DDSCAPS_SYSTEMMEMORY; ddsd2.dwWidth = m_bihIn.biWidth; ddsd2.dwHeight= m_bihIn.biHeight; ddsd2.lPitch = (LONG)4 * m_bihIn.biWidth; ddsd2.lpSurface = (LPVOID)s; // Set up the pixel format for 24-bit RGB (8-8-8). ddsd2.ddpfPixelFormat.dwSize = sizeof(DDPIXELFORMAT); ddsd2.ddpfPixelFormat.dwFlags= DDPF_RGB; ddsd2.ddpfPixelFormat.dwRGBBitCount = (DWORD)4*8; ddsd2.ddpfPixelFormat.dwRBitMask = 0x00FF0000; ddsd2.ddpfPixelFormat.dwGBitMask = 0x0000FF00; ddsd2.ddpfPixelFormat.dwBBitMask = 0x000000FF; HRESULT hr; // Create the surface hr = m_pDD->CreateSurface(&ddsd2, &pDDS1, NULL); ddsd2.dwWidth <<= 1; ddsd2.dwHeight<<= 1; ddsd2.lPitch <<= 1; ddsd2.lpSurface = (LPVOID)d; hr = m_pDD->CreateSurface(&ddsd2, &pDDS2, NULL); if(pDDS1 && pDDS2) { hr = pDDS2->Blt(NULL, pDDS1, NULL, DDBLT_WAIT, NULL); } return; } // remark: the image quality was disappointing... */ int w = m_bihIn.biWidth; int h = m_bihIn.biHeight; int pitch = w * 4; BYTE* s1; BYTE* s2; BYTE* d1; for(s1 = s, s2 = s + (h*pitch), d1 = d; s1 < s2; d1 += pitch*2) { BYTE* tmp = s1 + pitch; __asm { mov esi, s1 mov edi, d1 mov ecx, pitch shr ecx, 2 dec ecx pxor mm0, mm0 row_loop3: movq mm1, [esi] movq mm2, mm1 punpcklbw mm1, mm0 // mm1 = 00xx00r100g100b1 punpckhbw mm2, mm0 // mm2 = 00xx00r200g200b2 paddw mm2, mm1 psrlw mm2, 1 // mm2 = (mm1 + mm2) / 2 packuswb mm1, mm2 movq [edi], mm1 lea esi, [esi+4] lea edi, [edi+8] loop row_loop3 mov s1, esi mov d1, edi }; *((uint*)d1) = *((uint*)s1); *((uint*)d1+1) = *((uint*)s1); s1 += 4; d1 += 8; s1 = tmp; } w <<= 1; h <<= 1; pitch = w * 4; AvgLines8(d, h, pitch); } __asm emms; } HRESULT CDirectVobSubFilter::Copy(BYTE* pOut, BYTE* pIn, SubImage* img) { int wIn = m_bihIn.biWidth, wOut = m_bihOut.biWidth; int hIn = m_bihIn.biHeight, hOut = abs(m_bihOut.biHeight); int bppIn = m_bihIn.biBitCount, bppOut = m_bihOut.biBitCount; int pitchIn = wIn * bppIn >> 3, pitchOut = wOut * bppOut >> 3; bool fFlip = m_bihOut.biHeight < 0 && m_bihOut.biCompression <= 3; // flip if we are copying rgb and the signs aren't matching (we only check the output height since input is always > 0) if(m_fFlipPicture) fFlip = !fFlip; if(m_fMSMpeg4Fix) fFlip = !fFlip; // if(m_fDivxPlusFix) fFlip = !fFlip; bool fFlipSub = !(m_bihOut.biHeight > 0 && m_bihOut.biCompression <= 3); // flip unless the dst bitmap is also a flipped rgb if(m_fFlipSubtitles) fFlipSub = !fFlipSub; // if(m_fDivxPlusFix) fFlipSub = !fFlipSub; /* ResX2 */ if(m_fResX2Active) { Scale2x(m_pTempPicBuff, pIn); pIn = m_pTempPicBuff; wIn <<= 1; hIn <<= 1; pitchIn <<= 1; } pitchIn = (pitchIn+3)&~3; pitchOut = (pitchOut+3)&~3; uint black = (m_bihIn.biCompression == mmioFOURCC('Y', 'U', 'Y', '2')) ? 0x80108010 : 0; const GUID& subtype = m_pOutput->CurrentMediaType().subtype; BITMAP bm; if(img) GetObject(img->hbm, sizeof(BITMAP), &bm); int y = 0; { int dpLeft = (((m_bihSub.biWidth - wIn) >> 1) & ~1) * bppIn >> 3; int dpRight = max(pitchOut - (dpLeft + pitchIn), 0); int dpMid = pitchOut - dpLeft - dpRight; int i = 0, j = 0, k = 0; j += (hOut - hIn) >> 1; for(; i < j; i++, pOut += pitchOut) // memsetd(pOut, black, pitchOut); { memsetd(pOut, black, dpLeft); if(img && m_mode >= VOBSUB) { y = fFlipSub ? (bm.bmHeight - i - 1) : i; MixLine((uint*)(pOut + dpLeft), NULL, (uchar*)bm.bmBits + y * bm.bmWidthBytes, dpMid, subtype); } else memsetd(pOut + dpLeft, black, dpMid); memsetd(pOut + dpLeft + dpMid, black, dpRight); } j += hIn; int pitchIn2; if(!fFlip) { if(hIn > hOut) pIn += pitchIn * ((hIn - hOut) >> 1); pitchIn2 = pitchIn; } else { pIn += pitchIn * (j-i-1); pitchIn2 = -pitchIn; } for(k = min(j, hOut); i < k; i++, pIn += pitchIn2, pOut += pitchOut) { memsetd(pOut, black, dpLeft); if(img && m_mode >= VOBSUB) { y = fFlipSub ? (bm.bmHeight - i - 1) : i; MixLine((uint*)(pOut + dpLeft), (uint*)pIn, (uchar*)bm.bmBits + y * bm.bmWidthBytes, dpMid, subtype); } else memcpy(pOut + dpLeft, pIn, dpMid); memsetd(pOut + dpLeft + dpMid, black, dpRight); } j = hOut; for(; i < j; i++, pOut += pitchOut) // memsetd(pOut, black, pitchOut); { memsetd(pOut, black, dpLeft); if(img && m_mode >= VOBSUB) { y = fFlipSub ? (bm.bmHeight - i - 1) : i; MixLine((uint*)(pOut + dpLeft), NULL, (uchar*)bm.bmBits + y * bm.bmWidthBytes, dpMid, subtype); } else memsetd(pOut + dpLeft, black, dpMid); memsetd(pOut + dpLeft + dpMid, black, dpRight); } } return NOERROR; } HRESULT CDirectVobSubFilter::CopyYV12(BYTE* pOut, BYTE* pIn, SubImage* img) { int wIn = m_bihIn.biWidth, wOut = m_bihOut.biWidth; int hIn = m_bihIn.biHeight, hOut = abs(m_bihOut.biHeight); int pitchIn = wIn, pitchOut = wOut; bool fFlip = m_bihOut.biHeight < 0 && m_bihOut.biCompression <= 3; // flip if we are copying rgb and the signs aren't matching (we only check the output height since input is always > 0) if(m_fFlipPicture) fFlip = !fFlip; if(m_fMSMpeg4Fix) fFlip = !fFlip; // if(m_fDivxPlusFix) fFlip = !fFlip; bool fFlipSub = !(m_bihOut.biHeight > 0 && m_bihOut.biCompression <= 3); // flip unless the dst bitmap is also a flipped rgb if(m_fFlipSubtitles) fFlipSub = !fFlipSub; // if(m_fDivxPlusFix) fFlipSub = !fFlipSub; /* ResX2 */ if(m_fResX2Active) { Scale2x(m_pTempPicBuff, pIn); pIn = m_pTempPicBuff; wIn <<= 1; hIn <<= 1; pitchIn <<= 1; } pitchIn = (pitchIn+3)&~3; pitchOut = (pitchOut+3)&~3; BYTE* pInVU = pIn + pitchIn*hIn; BYTE* pOutVU = pOut + pitchOut*hOut; uint black = 0x10; int subPitch = (fFlipSub ? -m_bihSub.biWidth : m_bihSub.biWidth)*4; const GUID& subtype = m_pOutput->CurrentMediaType().subtype; BITMAP bm; if(img) GetObject(img->hbm, sizeof(BITMAP), &bm); int y = 0; int dpLeft = ((m_bihSub.biWidth - wIn) >> 1) & ~1; for(int plane = 0; plane < 3; plane++) { int dpRight = max(pitchOut - (dpLeft + pitchIn), 0); int dpMid = pitchOut - dpLeft - dpRight; int i = 0, j = 0, k = 0; j += (hOut - hIn) >> 1; for(; i < j; i++, pOut += pitchOut) // memset(pOut, black, pitchOut); { memset(pOut, black, dpLeft); if(img && m_mode >= VOBSUB) { y = (plane == 0) ? i : (i<<1); y = fFlipSub ? (bm.bmHeight - y - 1) : y; MixLineYV12((uint*)(pOut + dpLeft), NULL, (uchar*)bm.bmBits + y * bm.bmWidthBytes, dpMid, plane, subPitch); } else memset(pOut + dpLeft, black, dpMid); memset(pOut + dpLeft + dpMid, black, dpRight); } j += hIn; int pitchIn2; if(!fFlip) { if(hIn > hOut) pIn += pitchIn * ((hIn - hOut) >> 1); pitchIn2 = pitchIn; } else { pIn += pitchIn * (j-i-1); pitchIn2 = -pitchIn; } for(k = min(j, hOut); i < k; i++, pIn += pitchIn2, pOut += pitchOut) { memset(pOut, black, dpLeft); if(img && m_mode >= VOBSUB) { y = (plane == 0) ? i : (i<<1); y = fFlipSub ? (bm.bmHeight - y - 1) : y; MixLineYV12((uint*)(pOut + dpLeft), (uint*)pIn, (uchar*)bm.bmBits + y * bm.bmWidthBytes, dpMid, plane, subPitch); } else memcpy(pOut + dpLeft, pIn, dpMid); memset(pOut + dpLeft + dpMid, black, dpRight); } j = hOut; for(; i < j; i++, pOut += pitchOut) // memsetd(pOut, black, pitchOut); { memset(pOut, black, dpLeft); if(img && m_mode >= VOBSUB) { y = (plane == 0) ? i : (i<<1); y = fFlipSub ? (bm.bmHeight - y - 1) : y; MixLineYV12((uint*)(pOut + dpLeft), NULL, (uchar*)bm.bmBits + y * bm.bmWidthBytes, dpMid, plane, subPitch); } else memset(pOut + dpLeft, black, dpMid); memset(pOut + dpLeft + dpMid, black, dpRight); } if(plane == 0) { hIn >>= 1; hOut >>= 1; pitchIn >>= 1; pitchOut >>= 1; dpLeft >>= 1; pIn = pInVU; pOut = pOutVU; black = 0x80; } else if(plane == 1) { pIn = pInVU + pitchIn*hIn; pOut = pOutVU + pitchOut*hOut; } } return NOERROR; } void CDirectVobSubFilter::PrintMessages(BYTE* pOut) { const GUID& subtype = m_pOutput->CurrentMediaType().subtype; CString msg, tmp; if(m_fOSD) { CString c(GuidNames[subtype]); if(!_tcsncmp(c, _T("MEDIASUBTYPE_"), 13)) c = c.Mid(13); tmp.Format(_T("%dx%d %s\n"), m_bihOut.biWidth, m_bihOut.biHeight, c); msg += tmp; int start, stop, pos, size; m_sic.GetStats(start, stop, pos, size); tmp.Format(_T("real fps: %.3f, current fps: %.3f\nmedia time: %d, subtitle time: %d [ms]\nframe number: %d (calculated)\nrate: %.4f\nstart: %d - stop: %d [ms]\npos: %d - size: %d"), m_fps, m_MediaFPS/*m_fMediaFPSEnabled?m_MediaFPS:fabs(m_fps)*/, (int)m_tPrev.Millisecs(), CalcCurrentTime(), (int)(m_tPrev.m_time * m_fps / 10000000), m_pInput->CurrentRate(), start, stop, pos, size); msg += tmp; if(size > 0) { SubImage* img = NULL; CAutoLock cAutoLock(&m_sic.m_csAccessLock); for(int i = 0; i < size && (img = m_sic.GetSubImage(i)); i++) { tmp.Format(_T("\n%d: %d - %d [ms]"), i, img->start, img->stop); msg += tmp; } } } if(msg.IsEmpty()) return; HANDLE hOldBitmap = SelectObject(m_hdc, m_hbm); HANDLE hOldFont = SelectObject(m_hdc, m_hfont); SetTextColor(m_hdc, 0xffffff); SetBkMode(m_hdc, TRANSPARENT); SetMapMode(m_hdc, MM_TEXT); BITMAP bm; GetObject(m_hbm, sizeof(BITMAP), &bm); CRect r(0, 0, bm.bmWidth, bm.bmHeight); DrawText(m_hdc, msg, _tcslen(msg), &r, DT_CALCRECT|DT_EXTERNALLEADING|DT_NOPREFIX|DT_WORDBREAK); r += CPoint(10, 10); r &= CRect(0, 0, bm.bmWidth, bm.bmHeight); DrawText(m_hdc, msg, _tcslen(msg), &r, DT_LEFT|DT_TOP|DT_NOPREFIX|DT_WORDBREAK); BYTE* pIn = (BYTE*)bm.bmBits; int pitchIn = bm.bmWidthBytes; int pitchOut = m_bihOut.biWidth * m_bihOut.biBitCount >> 3; if(m_bihOut.biCompression == mmioFOURCC('Y', 'V', '1', '2')) pitchOut = m_bihOut.biWidth; pitchIn = (pitchIn+3)&~3; pitchOut = (pitchOut+3)&~3; if(m_bihOut.biHeight > 0 && m_bihOut.biCompression <= 3) // flip if the dst bitmap is flipped rgb (m_hbm is a top-down bitmap, not like the subpictures) { pOut += pitchOut * (abs(m_bihOut.biHeight)-1); pitchOut = -pitchOut; } pIn += pitchIn * r.top; pOut += pitchOut * r.top; for(int w = min(r.right, m_bihOut.biWidth), h = r.Height(); h--; pIn += pitchIn, pOut += pitchOut) { BltLineRGB32((uint*)pOut, pIn, w, subtype); memsetd(pIn, 0xff000000, r.right*4); } SelectObject(m_hdc, hOldBitmap); SelectObject(m_hdc, hOldFont); }