www.pudn.com > Estereo.rar > misc.cpp
/***************************************************************************
*
* Copyright 2004 by the Massachusetts Institute of Technology. All
* rights reserved.
*
* Developed by David Demirdjian
* at the Computer Sciences and Artificial Intelligence Laboratory,
* MIT, Cambridge, Massachusetts.
*
* Permission to use, copy, or modify this software and its documentation
* for educational and research purposes only and without fee is hereby
* granted, provided that this copyright notice and the original authors's
* names appear on all copies and supporting documentation. If individual
* files are separated from this distribution directory structure, this
* copyright notice must be included. For any other uses of this software,
* in original or modified form, including but not limited to distribution
* in whole or in part, specific prior permission must be obtained from
* MIT. These programs shall not be used, rewritten, or adapted as the
* basis of a commercial software or hardware product without first
* obtaining appropriate licenses from MIT. MIT. makes no representations
* about the suitability of this software for any purpose. It is provided
* "as is" without express or implied warranty.
*
**************************************************************************/
#include "stereoMatching.h"
#include "processingMMX.h"
void createIdxValidPixelsList(const uchar* depth_image, int* idx_valid_pixels, int& nbPoints, int siz, int UNDEFINED_DEPTH)
{
const uchar *ptIm = depth_image;
nbPoints=0;
for (int i=0; i=0 && d= *ptBestScore + tol) *ptDisp=0;
}
}
}
// ---------------------------------------------------------------
// check if the disparities correspond to 'acceptable' values of
void checkDisparityValidityAndSearchAround(uchar* disp, uchar* buff, int buffStep,
uchar* bestScore, uchar tol,
uchar undefined_val, uchar nbDepth, int imageSize)
{
float score_prevDisp, score_nextDisp, minScore;
bool test;
uchar* ptDisp = disp, *ptBuff=buff, *ptBestScore = bestScore;
for (int i=0; i0 && d= thresh)
{
// pixel not valid
score_prevDisp = *(ptBuffToCheck-buffStep) ;
score_nextDisp = *(ptBuffToCheck+buffStep) ;
test = (score_prevDisp < score_nextDisp);
minScore = (test)?score_prevDisp:score_nextDisp;
if (minScore > thresh)
*ptDisp=0;
else
{
if (test)
--(*ptDisp);
else
++(*ptDisp);
}
}
}
}
}
#define _ABS_DIFF_TRI_320_240(width,sizeImage,Z) __asm \
{ \
__asm movq mm4,mm1 /* mm4=mm1 */ \
__asm por mm3,mm7 /* here mm2=new src2 mm3=new src3 */ \
\
__asm movq mm7, mm0 \
__asm psubusb mm4,mm2 /* mm4 = src1 - src2 */ \
\
__asm psubusb mm2,mm1 /* mm2 = src2 - src1 */ \
__asm psllq mm7,Z \
\
__asm movq mm5,mm1 /* mm5=src1 */ \
__asm por mm4,mm2 /* mm2=|src1-src2| */ \
\
__asm movq mm2,[ebx + width] /* mm2= src2 + 'width' = new src2*/ \
__asm psubusb mm5,mm3 /* mm5=src1-src3*/ \
\
__asm movq mm6,mm3 /* mm6=src3*/ \
__asm psubusb mm6,mm1 /* mm3=src3-src1*/ \
\
__asm por mm6,mm5 /* mm6=|src1-src3|*/ \
__asm paddusb mm4,mm6 /* mm4 = |src1-src2|+|src1-src3|*/ \
\
__asm movq [edi+sizeImage], mm4 /* here mm1=src1*/ \
__asm psrlq mm3, 8 /* mm3 = src3 + '1' ... with [x00000000] at the end*/\
}
// ImgSubandAdd2: D = saturation0(|S1 - S2| + |S1 - S3|)
// process 8 disparities at a time
//
// Src1: right
// Src2: top
// Src3: left
//
// TODO? divide the result by 2 (shift)
int ImgSubandAdd2_320_240(const unsigned char *Src1, const unsigned char *Src2,
const unsigned char *Src3,
unsigned char* Dest1, int l)
{
if (l < 8) return 0; // image size must be at least 8 bytes
__asm
{
mov eax, Src1
mov ebx, Src2
mov edx, Src3
mov edi, Dest1
mov ecx, l
shr ecx, 3
movq mm0,[edx] // mm0=src3
movq mm0,[edx] // mm0=src3
align 16
inner_loop:
// mov esi, width
movq mm1,[eax] // mm1=src1
movq mm3,mm0 // mm3=src3
movq mm2,[ebx] // mm2=src2
add eax,8
// -- 1 --------- in : mm1,mm2,mm3 out: mm4=SAD mm2=new mm2 --
movq mm4,mm1 // mm4=mm1
psubusb mm4,mm2 // mm4 = src1 - src2
movq mm0,[edx+8]
psubusb mm2,mm1 // mm2 = src2 - src1
movq mm5,mm1 // mm5=src1
por mm4,mm2 // mm2=|src1-src2|
movq mm2,[ebx+320] // mm2= src2 + 'width' = new src2
psubusb mm5,mm3 // mm5=src1-src3
movq mm6,mm3 // mm6=src3
psubusb mm6,mm1 // mm3=src3-src1
movq mm7, mm0
psrlq mm3, 8 // mm3 = src3 + '1' ... with [x00000000] at the end
por mm6,mm5 // mm6=|src1-src3|
paddusb mm4,mm6 // mm4 = |src1-src2|+|src1-src3|
movq [edi], mm4
psllq mm7, 56 // here mm1=src1 mm2=NEW src2 mm3=begin of NEWsrc3 mm7=end of NEWsrc3
// -------------------------------------------------------------
// - 2 ----------------
_ABS_DIFF_TRI_320_240(640,320*240,48)
// - 3 ----------------
_ABS_DIFF_TRI_320_240(960,2*320*240,40)
// - 4 ----------------
_ABS_DIFF_TRI_320_240(1280,3*320*240,32)
// - 5 ----------------
_ABS_DIFF_TRI_320_240(1600,4*320*240,24)
// - 6 ----------------
_ABS_DIFF_TRI_320_240(1920,5*320*240,16)
// - 7 ----------------
_ABS_DIFF_TRI_320_240(2240,6*320*240,8)
// - 8 ----------------
movq mm4,mm1 // mm4=mm1
por mm3,mm7 // here mm2=new src2 mm3=new src3
psubusb mm4,mm2 // mm4 = src1 - src2
psubusb mm2,mm1 // mm2 = src2 - src1
movq mm5,mm1 // mm5=src1
por mm4,mm2 // mm2=|src1-src2|
psubusb mm5,mm3 // mm5=src1-src3
psubusb mm3,mm1 // mm3=src3-src1
por mm3,mm5 // mm6=|src1-src3|
paddusb mm4,mm3 // mm4 = |src1-src2|+|src1-src3|
movq [edi+2560], mm4 // here mm1=src1
// -------------------------------------------------------------
//
add ebx,8
add edx,8
add edi,8
dec ecx
jnz inner_loop
emms
}
return 1;
}
// macro: in: mm1,mm2
#define _ABS_DIFF_HORIZ_16(Z) __asm \
{ \
__asm movq mm7, mm0 \
__asm add edi, imageSize \
__asm movq mm5,mm1 /* mm5=src1 */ \
__asm psllq mm7, Z \
__asm psubusb mm5,mm3 /* mm5=src1-src3 */ \
__asm movq mm6,mm3 /* mm6=src3 */ \
__asm psubusb mm6,mm1 /* mm3=src3-src1 */ \
__asm por mm6,mm5 /* mm6=|src1-src3| */ \
__asm psrlq mm3, 8 /* mm3 = src3 + '1' ... with [x00000000] at the end */ \
__asm por mm3,mm7 /* here mm3=new src3 */ \
\
__asm movq mm7, mm6 /* copy results to mm7 as well */ \
__asm pxor mm5, mm5 \
__asm punpcklbw mm6, mm5 /* unpack mm6 */ \
\
__asm movq [edi], mm6 /* here mm1=src1 */ \
__asm punpckhbw mm7, mm5 /* unpack mm6 */ \
__asm movq [edi+8], mm7 /* here mm1=src1 */ \
}
// ImgSubandAdd2: D = saturation0(|S1 - S2| + |S1 - S3|)
// process 8 disparities at a time
//
// Src1: right
// Src2: top
// Src3: left
//
// TODO? divide the result by 2 (shift)
int ImgSubandAdd2_Horiz_16(const unsigned char *rightIm, const unsigned char *leftIm,
unsigned short* Dest, int l, int imageSize, int width)
{
if (l < 8) return 0; // image size must be at least 8 bytes
const int back_step2 = 7*imageSize;
__asm
{
mov eax, rightIm
mov edx, leftIm
mov edi, Dest
mov ecx, l
shr ecx, 3
movq mm0,[edx] // mm0=src3
movq mm0,[edx] // mm0=src3
align 16
inner_loop:
movq mm1,[eax] // mm1=src1
movq mm3,mm0 // mm3=src3
// -- 1 --------- in : mm1,mm2,mm3 out: mm4=SAD mm2=new mm2 --
movq mm0,[edx+8]
add eax,8
movq mm5,mm1 // mm5=src1
psubusb mm5,mm3 // mm5=src1-src3
movq mm6,mm3 // mm6=src3
psubusb mm6,mm1 // mm3=src3-src1
movq mm7, mm0
psrlq mm3, 8 // mm3 = src3 + '1' ... with [x00000000] at the end
por mm6,mm5 // mm6=|src1-src3|
psllq mm7, 56 // here mm1=src1 mm3=begin of NEWsrc3 mm7=end of NEWsrc3
por mm3,mm7 // here mm3=new src3
// ------ new ---------
movq mm7, mm6 // copy results to mm7 as well
pxor mm5, mm5
punpcklbw mm6, mm5 // unpack mm6
movq [edi], mm6 /* here mm1=src1 */
punpckhbw mm7, mm5 // unpack mm6
movq [edi+8], mm7 /* here mm1=src1 */
// - 2 ----------------
_ABS_DIFF_HORIZ_16(48)
_ABS_DIFF_HORIZ_16(40)
_ABS_DIFF_HORIZ_16(32)
_ABS_DIFF_HORIZ_16(24)
_ABS_DIFF_HORIZ_16(16)
_ABS_DIFF_HORIZ_16(8)
// - 8 ----------------
movq mm5,mm1 // mm5=src1
add edi, imageSize
psubusb mm5,mm3 // mm5=src1-src3
psubusb mm3,mm1 // mm3=src3-src1
por mm3,mm5 // mm6=|src1-src3|
movq [edi], mm3
// ------ new ---------
movq mm7, mm3 // copy results to mm7 as well
pxor mm5, mm5
punpcklbw mm3, mm5 // unpack mm6
movq [edi], mm3 /* here mm1=src1 */
punpckhbw mm7, mm5 // unpack mm6
movq [edi+8], mm7 /* here mm1=src1 */
// -------------------------------------------------------------
//
add edx,8
sub edi, back_step2
add edi,16
dec ecx
jnz inner_loop
emms
}
return 1;
}