www.pudn.com > mmxswarm.zip > MMXSurface16.cpp


// MMXSurface16.cpp : implementation of the CMMXSurface16Intrinsic 
// class 
// 
// This is a part of the Microsoft Foundation Classes C++ library. 
// Copyright (c) Microsoft Corporation.  All rights reserved. 
// 
// This source code is only intended as a supplement to the 
// Microsoft Foundation Classes Reference and related 
// electronic documentation provided with the library. 
// See these sources for detailed information regarding the 
// Microsoft Foundation Classes product. 
// 
#include "stdafx.h" 
#include "MMXSurface.h" 
#include "MMXWrapper.h" 
 
typedef CMMXUnsigned16Saturated CMMX; 
 
// Optimized for a 4 pixel processing 16 bit buffer 
void CMMXSurface16Intrinsic::AdjustWidth(int *pWidth) 
{ 
	ASSERT(pWidth != NULL); 
	ASSERT(m_kDeltaX <= 3); 
	// to avoid special casing the end of the loop, 
	// our visible pixel width needs to be a multiple of 4 
 
	*pWidth = (*pWidth+3-m_kDeltaX)& ~0x3; 
} 
 
void CMMXSurface16Intrinsic::OnCreated() 
{ 
	ASSERT(GetBitDepth() == 16); 
	ASSERT((GetPitch() & 0x7) == 0); 
	ASSERT(GetVisibleWidth() && GetVisibleHeight()); 
 
	int width = GetVisibleWidth(); 
    m_qwpl  = GetPitch()/8; // qwords Per Line 
    m_width = (width+3)/4; // 4 pixels at a time 
} 
 
// Note: It's still faster than the brute force approach, 
// However, it's slower than choosing 24 bit and blitting to 
// a 16 bit screen. breaking out the bits into MMX friendly sizes 
// doesn't work out well. 
void CMMXSurface16Intrinsic::BlurBits() 
{ 
    int height = GetVisibleHeight(); 
    ULONGLONG *pCur  = (ULONGLONG *)GetPixelAddress(0,0); 
 
	CMMX cUpBase, cDownBase, cCurBase, cLeftBase, cRightBase; 
	CMMX cUp, cDown, cCur, cLeft, cRight; 
	CMMX cDest; 
	CMMX cMask(0x001f001f001f001fu); // colorspace mask - 5 bits per color 
 
	do { 
		int width = m_width; 
		do { 
			// Load pixels and do the mmx unpack 
			// Note: pwCur is used to do non-aligned 
			// data reads - which is not normally recommended. 
			// on X86, it is faster than loading aligned and 
			// shift-oring. 
			WORD *pwCur = (WORD *)pCur; 
			cLeftBase = *(ULONGLONG*)(pwCur-1); 
			cCurBase = pCur[0]; 
			cRightBase = *(ULONGLONG*)(pwCur+1); 
			cUpBase = pCur[-m_qwpl]; 
			cDownBase = pCur[+m_qwpl]; 
 
			cLeft = cLeftBase & cMask; 
			cCur = cCurBase & cMask; 
			cRight = cRightBase & cMask; 
			cUp = cUpBase & cMask; 
			cDown = cDownBase & cMask; 
 
			// Actual math. Don't step on current, or right. 
			// Sum the 4 around and double the middle 
			// Do current pixel in this line 
			cDest = ((cDown+cUp+cLeft+cRight+(cCur<<2))>>3); 
			cMask <<= 5; 
 
			cLeft = cLeftBase & cMask; 
			cCur = cCurBase & cMask; 
			cRight = cRightBase & cMask; 
			cUp = cUpBase & cMask; 
			cDown = cDownBase & cMask; 
 
			// Actual math for next color space 
			cDest |= cMask & ((cDown+cUp+cLeft+cRight+(cCur<<2))>>3); 
			cMask <<= 5; 
 
			cLeft = (cLeftBase & cMask) >> 3; 
			cCur = (cCurBase & cMask) >> 3; 
			cRight = (cRightBase & cMask) >> 3; 
			cUp = (cUpBase & cMask) >> 3; 
			cDown = (cDownBase & cMask) >> 3; 
 
			// Actual math for next color space 
			cDest |= cMask & (cDown+cUp+cLeft+cRight+(cCur<<2)); 
			*pCur++ = cDest; 
			cMask >>= 10; 
		} while (--width > 0); 
	} while (--height > 0); 
}