www.pudn.com > Estereo.rar > misc.cpp


/***************************************************************************  
* 
* Copyright 2004 by the Massachusetts Institute  of Technology.   All    
* rights reserved.  
*   
* Developed  by David Demirdjian 
* at the Computer Sciences and Artificial Intelligence Laboratory,  
* MIT, Cambridge, Massachusetts.  
*   
* Permission to use, copy, or modify this software and  its documentation  
* for  educational  and  research purposes only and without fee  is hereby  
* granted, provided  that this copyright notice and the original authors's  
* names appear  on all copies and supporting documentation.  If individual  
* files are  separated from  this  distribution directory  structure, this  
* copyright notice must be included.  For any other uses of this software,  
* in original or  modified form, including but not limited to distribution  
* in whole or in  part, specific  prior permission  must be  obtained from  
* MIT.  These programs shall not  be  used, rewritten, or  adapted as  the  
* basis  of  a  commercial  software  or  hardware product  without  first  
* obtaining appropriate licenses  from MIT.  MIT. makes no representations  
* about the suitability of this  software for any purpose.  It is provided  
* "as is" without express or implied warranty.  
*   
**************************************************************************/ 
#include "stereoMatching.h" 
#include "processingMMX.h" 
 
void createIdxValidPixelsList(const uchar* depth_image, int* idx_valid_pixels, int& nbPoints, int siz, int UNDEFINED_DEPTH) 
{ 
	const uchar *ptIm = depth_image; 
	nbPoints=0; 
	for (int i=0; i=0 && d= *ptBestScore + tol)  *ptDisp=0; 
		} 
	} 
} 
 
 
// --------------------------------------------------------------- 
// check if the disparities correspond to 'acceptable' values of 
void checkDisparityValidityAndSearchAround(uchar* disp, uchar* buff, int buffStep, 											 
											uchar* bestScore, uchar tol,  
											uchar undefined_val, uchar nbDepth, int imageSize) 
{ 
	float score_prevDisp, score_nextDisp, minScore; 
	bool test; 
	uchar* ptDisp = disp, *ptBuff=buff, *ptBestScore = bestScore; 
	for (int i=0; i0 && d= thresh)  
			{ 
				// pixel not valid 
				score_prevDisp = *(ptBuffToCheck-buffStep) ; 
				score_nextDisp = *(ptBuffToCheck+buffStep) ; 
				test = (score_prevDisp < score_nextDisp); 
				minScore = (test)?score_prevDisp:score_nextDisp; 
				if (minScore > thresh)  
					*ptDisp=0; 
				else 
				{ 
					if (test)  
						--(*ptDisp);  
					else  
						++(*ptDisp); 
				} 
			} 
		} 
	} 
} 
#define _ABS_DIFF_TRI_320_240(width,sizeImage,Z) __asm \ 
{ \ 
	__asm	movq	mm4,mm1		/* mm4=mm1 */ \ 
	__asm	por		mm3,mm7		/* here mm2=new src2		mm3=new src3 */ \ 
\ 
	__asm	movq	mm7, mm0 \ 
	__asm	psubusb	mm4,mm2		/* mm4 = src1 - src2 */ \ 
\ 
	__asm	psubusb	mm2,mm1		/* mm2 = src2 - src1 */ \ 
	__asm	psllq	mm7,Z	\ 
\ 
	__asm	movq	mm5,mm1		/* mm5=src1 */ \ 
	__asm	por		mm4,mm2		/* mm2=|src1-src2| */ \ 
\ 
	__asm	movq	mm2,[ebx + width]	/* mm2= src2 + 'width' = new src2*/ \ 
	__asm	psubusb	mm5,mm3		/* mm5=src1-src3*/ \ 
\ 
	__asm	movq	mm6,mm3		/* mm6=src3*/ \ 
    __asm   psubusb	mm6,mm1	 	/* mm3=src3-src1*/ \ 
\ 
	__asm	por		mm6,mm5		/* mm6=|src1-src3|*/ \ 
	__asm	paddusb mm4,mm6		/* mm4 = |src1-src2|+|src1-src3|*/ \ 
\ 
	__asm	movq    [edi+sizeImage], mm4	 /* here mm1=src1*/	 \ 
	__asm	psrlq	mm3, 8		/* mm3 = src3 + '1' ... with [x00000000] at the end*/\ 
} 
 
 
//  ImgSubandAdd2: D = saturation0(|S1 - S2| + |S1 - S3|) 
// process 8 disparities at a time 
// 
//			Src1: right	 
//			Src2: top	 
//			Src3: left 
// 
// TODO? divide the result by 2 (shift) 
int ImgSubandAdd2_320_240(const unsigned char *Src1, const unsigned char *Src2,  
				 const unsigned char *Src3,  
				 unsigned char* Dest1, int l) 
{ 
 
	if (l < 8) return 0;              // image size must be at least 8 bytes  
 
  __asm  
  {		 
        mov eax, Src1      
        mov ebx, Src2 
		mov edx, Src3 
        mov edi, Dest1    
 
        mov	ecx, l    
        shr	ecx, 3	 
	 
		movq	mm0,[edx]	// mm0=src3 
		movq	mm0,[edx]	// mm0=src3 
align 16 
inner_loop: 
//		mov esi, width 
		movq	mm1,[eax]	// mm1=src1 
		movq	mm3,mm0		// mm3=src3 
 
		movq	mm2,[ebx]	// mm2=src2 
        add eax,8          
  
		// -- 1 --------- in : mm1,mm2,mm3     out: mm4=SAD  mm2=new mm2 -- 
		movq	mm4,mm1		// mm4=mm1 
		psubusb	mm4,mm2		// mm4 = src1 - src2 
 
		movq	mm0,[edx+8] 
		psubusb	mm2,mm1		// mm2 = src2 - src1 
         
		movq	mm5,mm1		// mm5=src1 
		por		mm4,mm2		// mm2=|src1-src2| 
 
		movq	mm2,[ebx+320]	// mm2= src2 + 'width' = new src2 
		psubusb	mm5,mm3		// mm5=src1-src3 
 
		movq	mm6,mm3		// mm6=src3 
        psubusb	mm6,mm1	 	// mm3=src3-src1 
 
		movq	mm7, mm0 
		psrlq	mm3, 8		// mm3 = src3 + '1' ... with [x00000000] at the end 
 
		por		mm6,mm5		// mm6=|src1-src3| 
		paddusb mm4,mm6		// mm4 = |src1-src2|+|src1-src3| 
 
        movq    [edi], mm4	  
		psllq	mm7, 56		// here mm1=src1	mm2=NEW src2	mm3=begin of NEWsrc3 	  mm7=end of NEWsrc3 
		// ------------------------------------------------------------- 
		 
 
		// - 2 ---------------- 
		_ABS_DIFF_TRI_320_240(640,320*240,48) 
	 
 		// - 3 ---------------- 
		_ABS_DIFF_TRI_320_240(960,2*320*240,40) 
 
 		// - 4 ---------------- 
		_ABS_DIFF_TRI_320_240(1280,3*320*240,32) 
 
		// - 5 ---------------- 
		_ABS_DIFF_TRI_320_240(1600,4*320*240,24) 
		 
		// - 6 ---------------- 
		_ABS_DIFF_TRI_320_240(1920,5*320*240,16) 
		 
		// - 7 ---------------- 
		_ABS_DIFF_TRI_320_240(2240,6*320*240,8) 
	 
 
		// - 8 ---------------- 
		movq	mm4,mm1		// mm4=mm1 
		por		mm3,mm7		// here mm2=new src2		mm3=new src3 
 
		psubusb	mm4,mm2		// mm4 = src1 - src2 
		psubusb	mm2,mm1		// mm2 = src2 - src1 
         
		movq	mm5,mm1		// mm5=src1 
		por		mm4,mm2		// mm2=|src1-src2| 
 
		psubusb	mm5,mm3		// mm5=src1-src3 
        psubusb	mm3,mm1	 	// mm3=src3-src1 
 
		por		mm3,mm5		// mm6=|src1-src3| 
		paddusb mm4,mm3		// mm4 = |src1-src2|+|src1-src3| 
		 
        movq    [edi+2560], mm4	 // here mm1=src1	 
		// ------------------------------------------------------------- 
 		//  
        add ebx,8 
        add edx,8      
        add edi,8		 
        dec ecx       
        jnz inner_loop     
        emms   		 
  } 
	 
  return 1; 
} 
 
 
// macro: in: mm1,mm2 
#define _ABS_DIFF_HORIZ_16(Z) __asm \ 
{ \ 
	__asm  movq	mm7, mm0 \ 
	__asm  add edi, imageSize \ 
	__asm  movq	mm5,mm1		/* mm5=src1 */ \ 
	__asm  psllq	mm7, Z \ 
	__asm  psubusb	mm5,mm3		/* mm5=src1-src3 */ \ 
	__asm  movq	mm6,mm3		/* mm6=src3 */ \ 
    __asm  psubusb	mm6,mm1	 	/* mm3=src3-src1 */ \ 
	__asm  por		mm6,mm5		/* mm6=|src1-src3| */ \ 
	__asm  psrlq	mm3, 8		/* mm3 = src3 + '1' ... with [x00000000] at the end */ \ 
	__asm  por		mm3,mm7		/* here mm3=new src3 */ \ 
\ 
	__asm  movq		mm7, mm6	/* copy results to mm7 as well */ \ 
	__asm  pxor		mm5, mm5  \ 
	__asm  punpcklbw mm6, mm5	/* unpack mm6 */ \ 
\ 
    __asm  movq    [edi], mm6	 /* here mm1=src1 */ \ 
	__asm  punpckhbw mm7, mm5	/* unpack mm6 */ \ 
    __asm  movq    [edi+8], mm7	 /* here mm1=src1 */ \ 
}	 
 
//  ImgSubandAdd2: D = saturation0(|S1 - S2| + |S1 - S3|) 
// process 8 disparities at a time 
// 
//			Src1: right	 
//			Src2: top	 
//			Src3: left 
// 
// TODO? divide the result by 2 (shift) 
int ImgSubandAdd2_Horiz_16(const unsigned char *rightIm, const unsigned char *leftIm,  
						   unsigned short* Dest, int l, int imageSize, int width) 
{ 
 
	if (l < 8) return 0;              // image size must be at least 8 bytes  
	const int back_step2 = 7*imageSize; 
  __asm  
  {		 
        mov eax, rightIm      
 		mov edx, leftIm 
        mov edi, Dest  
 
        mov	ecx, l    
        shr	ecx, 3	 
	 
		movq	mm0,[edx]	// mm0=src3 
		movq	mm0,[edx]	// mm0=src3 
align 16 
inner_loop: 
 
		movq	mm1,[eax]	// mm1=src1 
		movq	mm3,mm0		// mm3=src3 
 
		// -- 1 --------- in : mm1,mm2,mm3     out: mm4=SAD  mm2=new mm2 -- 
		movq	mm0,[edx+8] 
        add eax,8          
    
		movq	mm5,mm1		// mm5=src1 
		psubusb	mm5,mm3		// mm5=src1-src3 
 
		movq	mm6,mm3		// mm6=src3 
        psubusb	mm6,mm1	 	// mm3=src3-src1 
 
		movq	mm7, mm0 
		psrlq	mm3, 8		// mm3 = src3 + '1' ... with [x00000000] at the end 
 
		por		mm6,mm5		// mm6=|src1-src3| 
		psllq	mm7, 56		// here mm1=src1	mm3=begin of NEWsrc3 	  mm7=end of NEWsrc3 
		por		mm3,mm7		// here mm3=new src3 
 
		// ------ new --------- 
		movq		mm7, mm6	// copy results to mm7 as well 
		pxor		mm5, mm5 
		punpcklbw mm6, mm5	// unpack mm6 
 
		movq    [edi], mm6	 /* here mm1=src1 */  
		punpckhbw mm7, mm5	// unpack mm6 
		movq    [edi+8], mm7	 /* here mm1=src1 */  
 
		// - 2 ---------------- 
		_ABS_DIFF_HORIZ_16(48) 
		_ABS_DIFF_HORIZ_16(40) 
		_ABS_DIFF_HORIZ_16(32) 
		_ABS_DIFF_HORIZ_16(24) 
		_ABS_DIFF_HORIZ_16(16) 
		_ABS_DIFF_HORIZ_16(8) 
 
		// - 8 ---------------- 
		movq	mm5,mm1		// mm5=src1 
		add edi, imageSize 
 
		psubusb	mm5,mm3		// mm5=src1-src3 
        psubusb	mm3,mm1	 	// mm3=src3-src1 
 
		por		mm3,mm5		// mm6=|src1-src3|		 
        movq    [edi], mm3	  
 
		// ------ new --------- 
		movq		mm7, mm3	// copy results to mm7 as well 
		pxor		mm5, mm5 
		punpcklbw mm3, mm5	// unpack mm6 
 
		movq    [edi], mm3	 /* here mm1=src1 */  
		punpckhbw mm7, mm5	// unpack mm6 
		movq    [edi+8], mm7	 /* here mm1=src1 */ 		 
		 
		// ------------------------------------------------------------- 
 		//  
        add edx,8      
		sub edi, back_step2 
        add edi,16		 
        dec ecx       
        jnz inner_loop     
        emms   		 
  } 
	 
  return 1; 
}