www.pudn.com > mmxswarm.zip > SSE2Surface32.cpp
// SSE2Surface32.cpp : implementation of the CSSE2Surface32Intrinsic
// class
//
// This is a part of the Microsoft Foundation Classes C++ library.
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// This source code is only intended as a supplement to the
// Microsoft Foundation Classes Reference and related
// electronic documentation provided with the library.
// See these sources for detailed information regarding the
// Microsoft Foundation Classes product.
//
#include "stdafx.h"
#include "SSE2Surface.h"
#include "SSE2Wrapper.h"
typedef CSSE2Unsigned16Saturated CSSE2;
// Optimized for a 4-pixel processing 32 bit buffer
void CSSE2Surface32Intrinsic::AdjustWidth(int *pWidth)
{
ASSERT(pWidth != NULL);
ASSERT(m_kDeltaX <= 3);
*pWidth = (*pWidth + 3-m_kDeltaX) & ~3; // round up by 4
}
void CSSE2Surface32Intrinsic::OnCreated()
{
ASSERT(GetBitDepth() == 32);
ASSERT((GetPitch() & 0xF) == 0);
ASSERT(GetVisibleWidth() && GetVisibleHeight());
ASSERT(sizeof(RGBQUAD) == 4);
int width = GetVisibleWidth();
m_qwpl = GetPitch()/8; // qwords Per Line
m_width = (width+3)/4; // 4 pixels at a time
}
void CSSE2Surface32Intrinsic::BlurBits()
{
int height = GetVisibleHeight();
ULONGLONG *pCur = (ULONGLONG *)GetPixelAddress(0,0);
ASSERT((DWORD_PTR(pCur) & 0xF) == 0);
CSSE2 cFader;
CSSE2 cRight, cLeft;
CSSE2 cUp, cDown, cCur;
CSSE2 cResult;
cFader.UnpackBytesLo( 0x0101010101010101u );
cLeft.Clear();
do {
int width = m_width;
ASSERT((DWORD_PTR(pCur) & 0xF) == 0);
do {
RGBQUAD *pdwCur = (RGBQUAD*)pCur;
ULONGLONG *pNext = (ULONGLONG*)(pdwCur+1);
// Load pixels and do the mmx unpack
cCur.UnpackBytesLo( *pCur );
cRight.UnpackBytesLo( *pNext );
cUp.UnpackBytesLo( pCur[-m_qwpl] );
cDown.UnpackBytesLo( pCur[m_qwpl] );
// Actual math. Don't step on current, or right.
// Sum the 4 around and double the middle
// Do current pixel in this line
cResult = (cDown+cUp+cLeft+cRight+(cCur<<2))>>3;
// Do next pixel
cLeft = cRight; // Slide left!
cCur.UnpackBytesLo( pCur[1] );
cRight.UnpackBytesLo( pNext[1] );
cUp.UnpackBytesLo( pCur[-m_qwpl+1] );
cDown.UnpackBytesLo( pCur[m_qwpl+1] );
cCur = (cDown+cUp+cLeft+cRight+(cCur<<2))>>3;
#if defined(TRIPPY)
cCur += cFader; // increase the fade to white
cResult += cFader; // increase the fade to white
#elif defined (FAST_FADE)
cCur -= cFader; // increase the fade to black
cResult -= cFader; // increase the fade to black
#endif
cLeft = cRight; // Slide left!
cResult.PackBytes(pCur, cCur);
pCur += 2;
} while (--width > 0);
} while (--height > 0);
}