www.pudn.com > mmxswarm.zip > SSE2Wrapper.h
// SSE2Wrapper.h : Very thin object wrapper over the SSE2 types
//
// This is a part of the Microsoft Foundation Classes C++ library.
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// This source code is only intended as a supplement to the
// Microsoft Foundation Classes Reference and related
// electronic documentation provided with the library.
// See these sources for detailed information regarding the
// Microsoft Foundation Classes product.
//
#pragma once
// Very thin and specific classes to have the SSE2 integer
// types more C++ friendly.
class CSSE2Unsigned16Saturated
{
public:
CSSE2Unsigned16Saturated()
{
}
CSSE2Unsigned16Saturated( __m128i m128i )
{
m_m128i = m128i;
}
CSSE2Unsigned16Saturated( ULONGLONG qw )
{
m_m128i = _mm_loadl_epi64((__m128i*)&qw);
}
CSSE2Unsigned16Saturated& operator=( const CSSE2Unsigned16Saturated& m )
{
m_m128i = m.m_m128i;
return( *this );
}
CSSE2Unsigned16Saturated& operator=( const ULONGLONG &qw )
{
m_m128i = _mm_loadl_epi64((__m128i*)&qw);
return( *this );
}
operator __m128i() const
{
return( m_m128i );
}
// least significant half
operator ULONGLONG() const
{
return( *m_m128i.m128i_i64 );
}
void Clear()
{
m_m128i = _mm_setzero_si128();
}
void Fill( const ULONGLONG &qw )
{
__m64 m;
m.m64_u64 = qw;
m_m128i = _mm_set1_epi64( m );
}
CSSE2Unsigned16Saturated& operator+=( const CSSE2Unsigned16Saturated& m )
{
m_m128i = _mm_adds_epu16( m_m128i, m );
return( *this );
}
CSSE2Unsigned16Saturated& operator-=( const CSSE2Unsigned16Saturated& m )
{
m_m128i = _mm_subs_epu16( m_m128i, m );
return( *this );
}
CSSE2Unsigned16Saturated& operator>>=( int nBits )
{
m_m128i = _mm_srli_epi16( m_m128i, nBits );
return( *this );
}
CSSE2Unsigned16Saturated& operator<<=( int nBits )
{
m_m128i = _mm_slli_epi16( m_m128i, nBits );
return( *this );
}
CSSE2Unsigned16Saturated& operator&=( const CSSE2Unsigned16Saturated& m )
{
m_m128i = _mm_and_si128(m_m128i, m);
return(*this);
}
CSSE2Unsigned16Saturated& operator|=( const CSSE2Unsigned16Saturated& m )
{
m_m128i = _mm_or_si128(m_m128i, m);
return(*this);
}
CSSE2Unsigned16Saturated& AndNot( const CSSE2Unsigned16Saturated& m )
{
m_m128i = _mm_andnot_si128(m, m_m128i);
return(*this);
}
ULONGLONG PackBytes() const
{
return( *_mm_packus_epi16(m_m128i, _mm_setzero_si128()).m128i_i64 );
}
// aligned
void Store( void *pAddr )
{
ASSERT((DWORD_PTR(pAddr)& 0xF) == 0);
_mm_store_si128( (__m128i *)pAddr, m_m128i );
}
// aligned
void StoreU( void *pAddr )
{
_mm_storeu_si128( (__m128i *)pAddr, m_m128i );
}
// aligned
void Load( void *pAddr )
{
ASSERT((DWORD_PTR(pAddr)& 0xF) == 0);
m_m128i = _mm_load_si128( (__m128i *)pAddr );
}
// aligned
void LoadU( void *pAddr )
{
m_m128i = _mm_loadu_si128( (__m128i *)pAddr );
}
// aligned
void PackBytes( void *pAddr, const CSSE2Unsigned16Saturated& mUpper ) const
{
ASSERT((DWORD_PTR(pAddr)& 0xF) == 0);
_mm_store_si128( (__m128i *)pAddr, _mm_packus_epi16( m_m128i, mUpper ) );
}
// Unaligned, documented to be slower.
void PackBytesU( void *pAddr, const CSSE2Unsigned16Saturated& mUpper ) const
{
_mm_storeu_si128( (__m128i *)pAddr, _mm_packus_epi16( m_m128i, mUpper ) );
}
void UnpackBytesLo( const ULONGLONG &qw )
{
m_m128i = _mm_unpacklo_epi8( _mm_loadl_epi64((__m128i*)&qw), _mm_setzero_si128() );
}
void UnpackBytesHi( ULONGLONG qw )
{
m_m128i = _mm_unpacklo_epi8( _mm_setzero_si128(), _mm_loadl_epi64((__m128i*)&qw) );
}
void UnpackBytes(CSSE2Unsigned16Saturated &mmLeft)
{
mmLeft = _mm_unpackhi_epi8( m_m128i, _mm_setzero_si128() );
m_m128i = _mm_unpacklo_epi8( m_m128i, _mm_setzero_si128() );
}
public:
__m128i m_m128i;
};
inline CSSE2Unsigned16Saturated operator+( const CSSE2Unsigned16Saturated& m1, const CSSE2Unsigned16Saturated& m2 )
{
return( _mm_adds_epu16( m1.m_m128i, m2.m_m128i ) );
}
inline CSSE2Unsigned16Saturated operator-( const CSSE2Unsigned16Saturated& m1, const CSSE2Unsigned16Saturated& m2 )
{
return( _mm_subs_epu16( m1.m_m128i, m2.m_m128i ) );
}
inline CSSE2Unsigned16Saturated operator<<( const CSSE2Unsigned16Saturated& m1, int nBits )
{
return( _mm_slli_epi16( m1, nBits ) );
}
inline CSSE2Unsigned16Saturated operator>>( const CSSE2Unsigned16Saturated& m1, int nBits )
{
return( _mm_srli_epi16( m1, nBits ) );
}
inline CSSE2Unsigned16Saturated operator&( const CSSE2Unsigned16Saturated& m1, const CSSE2Unsigned16Saturated& m2 )
{
return(_mm_and_si128(m1, m2));
}