www.pudn.com > mmxswarm.zip > SSE2Surface24.cpp


// SSE2Surface24.cpp : implementation of the CSSE2Surface24Intrinsic 
// class 
// 
// This is a part of the Microsoft Foundation Classes C++ library. 
// Copyright (c) Microsoft Corporation.  All rights reserved. 
// 
// This source code is only intended as a supplement to the 
// Microsoft Foundation Classes Reference and related 
// electronic documentation provided with the library. 
// See these sources for detailed information regarding the 
// Microsoft Foundation Classes product. 
// 
#include "stdafx.h" 
#include "SSE2Surface.h" 
#include "SSE2Wrapper.h" 
 
typedef CSSE2Unsigned16Saturated CSSE2; 
 
// Optimized for a 4-pixel processing 24 bit buffer 
void CSSE2Surface24Intrinsic::AdjustWidth(int *pWidth) 
{ 
	ASSERT(pWidth != NULL); 
	ASSERT(m_kDeltaX <= 15); 
 
	// need to align/round-up to 256-bits/32-bytes 
	// since we process 32 bit chunks. 
 
	// Also need to make sure the pitch is 16 byte aligned. 
	// so that each line starts on an aligned boundary. 
 
	// Since a pixel is 3 bytes wide, this unfortunately means 
	// making it 48 byte aligned, or 48/3 pixels, or round up to 
	// the nearest 16. 
	*pWidth = (*pWidth+15-m_kDeltaX)& ~0xF; 
} 
 
void CSSE2Surface24Intrinsic::OnCreated() 
{ 
	ASSERT(GetBitDepth() == 24); 
	ASSERT((GetPitch() & 0xF) == 0); 
	ASSERT(GetVisibleWidth() && GetVisibleHeight()); 
	ASSERT(sizeof(RGBTRIPLE) == 3); 
 
	int width = GetVisibleWidth(); 
    m_qwpl  = GetPitch()/8; // qwords Per Line 
    m_width = (width*3+15)/16;// (+7/8) // m_qwpl/2 without processing off-screen bits; 
	m_delta = m_qwpl - m_width*2; 
} 
 
void CSSE2Surface24Intrinsic::BlurBits() 
{ 
    int height = GetVisibleHeight(); 
    ULONGLONG *pCur  = (ULONGLONG*)GetPixelAddress(0,0); 
 
	CSSE2 cFader; 
	CSSE2 cRight, cLeft; 
	CSSE2 cUp, cDown, cCur; 
	CSSE2 cResult; 
 
	cFader.UnpackBytesLo( 0x0101010101010101u ); 
	cLeft.Clear(); 
 
	do { 
		int width = m_width; 
		do { 
			BYTE *bpCur = (BYTE *)pCur; 
			// Load pixels and do the mmx unpack 
			cCur.UnpackBytesLo( pCur[0] ); 
			// treating non-aligned data as dwords isn't generally a good idea 
			cRight.UnpackBytesLo( *(ULONGLONG *)(bpCur+3) ); 
			cUp.UnpackBytesLo( pCur[-m_qwpl] ); 
			cDown.UnpackBytesLo( pCur[m_qwpl] ); 
 
			// Sum the 4 around and double the middle 
			// Do current pixel in this line 
			cResult = (cDown+cUp+cLeft+cRight+(cCur<<2))>>3; 
 
			cLeft.UnpackBytesLo( *(ULONGLONG *)(bpCur+5) ); 
			pCur++; 
			bpCur = (BYTE *)pCur; 
			cCur.UnpackBytesLo( pCur[0] ); 
			cRight.UnpackBytesLo( *(ULONGLONG *)(bpCur+3) ); 
			cUp.UnpackBytesLo( pCur[-m_qwpl] ); 
			cDown.UnpackBytesLo( pCur[m_qwpl] ); 
			cCur = (cDown+cUp+cLeft+cRight+(cCur<<2))>>3; 
 
#if defined(TRIPPY) 
			cCur += cFader; // increase the fade to white 
			cResult += cFader; // increase the fade to white 
#elif defined (FAST_FADE) 
			cCur -= cFader; // increase the fade to white 
			cResult -= cFader; // increase the fade to white 
#endif 
			// Reset the left before we write anything out. 
			// treating non-aligned data as dwords isn't generally a good idea 
			cLeft.UnpackBytesLo( *(ULONGLONG *)(bpCur+5) ); 
			cResult.PackBytes(pCur-1, cCur); 
			pCur++; 
		} while (--width > 0); 
		pCur += m_delta; 
	} while (--height > 0); 
}