www.pudn.com > VirtualVCR-src-v2.6.9.zip > src_filter_asm.cpp


//	SRC_Filter - High Quality Audio Sample-Rate-Conversion  
//	Copyright (C) 2001 Andreas Dittrich 
// 
//	This program is free software; you can redistribute it and/or modify 
//	it under the terms of the GNU General Public License as published by 
//	the Free Software Foundation; either version 2 of the License, or 
//	(at your option) any later version. 
// 
//	This program is distributed in the hope that it will be useful, 
//	but WITHOUT ANY WARRANTY; without even the implied warranty of 
//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
//	GNU General Public License for more details. 
// 
//	You should have received a copy of the GNU General Public License 
//	along with this program; if not, write to the Free Software 
//	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 
 
/******************************************************************************/ 
/*                                                                            */ 
/* src_filter_asm.cpp                                                         */ 
/*                                                                            */ 
/******************************************************************************/ 
/* Author(s)    : Andreas Dittrich                                            */ 
/* Project start: 06.11.2001                                                  */ 
/* Completed    : 28.11.2001                                                  */ 
/* Last change  : 07.03.2002                                                  */ 
/* Time         : 00:45                                                       */ 
/******************************************************************************/ 
/* Changes      : I                                                           */ 
/******************************************************************************/ 
 
/******************************************************************************/ 
/* Last change  : 07.03.2002                                                  */ 
/* Time         : 00:45                                                       */ 
/* By         : Andreas Dittrich                                              */ 
/* Description: Bug conversion long to short corrected in mono and stereo     */ 
/******************************************************************************/ 
 
 
#include "src_filter_asm.h" 
 
static long lZi[N_FILTERORDER]; 
static short sZi[N_FILTERORDER]; 
 
 
static void __declspec(naked) filter_mono( short int *x_ptr, short int *h_ptr1, short int *h_ptr2, long *y_ptr, long N_FO16 ) { 
	__asm { 
		push	ebp 
		push	edi 
		push	esi 
		push	ebx 
 
		mov		edi,[esp+16+16]		; &y 
		mov		ecx,dword ptr [edi]	; y 
		mov		edi,[esp+4+16]		; x_ptr 
		mov		esi,[esp+8+16]		; h_ptr1 
 
		mov		ebp, [esp+20+16]	; N_FILTERORDER16 
 
h1_loop: 
		movsx	eax,word ptr [esi]	; h_ptr[0] 
		movsx	ebx,word ptr [edi]	; x_ptr[0] 
		imul	ebx,eax 
		movsx	eax,word ptr [edi+2] 
		movsx	edx,word ptr [esi+2] 
		imul	edx,eax 
 
		add		edx,ebx 
		sar		edx,MUL_ACC_SHR 
		add		ecx,edx 
 
		movsx	eax,word ptr [edi+4] 
		movsx	ebx,word ptr [esi+4] 
		imul	ebx,eax 
		movsx	eax,word ptr [edi+6] 
		movsx	edx,word ptr [esi+6] 
		imul	edx,eax 
 
		add		edx,ebx 
		sar		edx,MUL_ACC_SHR 
		add		ecx,edx 
 
		movsx	eax,word ptr [edi+8] 
		movsx	ebx,word ptr [esi+8] 
		imul	ebx,eax 
		movsx	eax,word ptr [edi+10] 
		movsx	edx,word ptr [esi+10] 
		imul	edx,eax 
 
		add		edx,ebx 
		sar		edx,MUL_ACC_SHR 
		add		ecx,edx 
 
		movsx	eax,word ptr [edi+12] 
		movsx	ebx,word ptr [esi+12] 
		imul	ebx,eax 
		movsx	eax,word ptr [edi+14] 
		movsx	edx,word ptr [esi+14] 
		imul	edx,eax 
 
		add		edx,ebx 
		sar		edx,MUL_ACC_SHR 
		add		ecx,edx 
 
 
		add		edi,16		; x_ptr += 8 
		add		esi,16		; h_ptr1 += 8 
		sub		ebp, 1	 
		jne		h1_loop 
; end of h1_loop 
 
 
		mov		esi,[esp+12+16]		; h_ptr1 
		mov		ebp, [esp+20+16]	; N_FILTERORDER16 
h2_loop: 
		movsx	eax,word ptr [esi-2]	; h_ptr[0] 
		movsx	ebx,word ptr [edi]	; x_ptr[0] 
		imul	ebx,eax 
		movsx	eax,word ptr [edi+2] 
		movsx	edx,word ptr [esi-4] 
		imul	edx,eax 
 
		add		edx,ebx 
		sar		edx,MUL_ACC_SHR 
		add		ecx,edx 
 
		movsx	eax,word ptr [edi+4] 
		movsx	ebx,word ptr [esi-6] 
		imul	ebx,eax 
		movsx	eax,word ptr [edi+6] 
		movsx	edx,word ptr [esi-8] 
		imul	edx,eax 
 
		add		edx,ebx 
		sar		edx,MUL_ACC_SHR 
		add		ecx,edx 
 
		movsx	eax,word ptr [edi+8] 
		movsx	ebx,word ptr [esi-10] 
		imul	ebx,eax 
		movsx	eax,word ptr [edi+10] 
		movsx	edx,word ptr [esi-12] 
		imul	edx,eax 
 
		add		edx,ebx 
		sar		edx,MUL_ACC_SHR 
		add		ecx,edx 
 
		movsx	eax,word ptr [edi+12] 
		movsx	ebx,word ptr [esi-14] 
		imul	ebx,eax 
		movsx	eax,word ptr [edi+14] 
		movsx	edx,word ptr [esi-16] 
		imul	edx,eax 
 
		add		edx,ebx 
		sar		edx,MUL_ACC_SHR 
		add		ecx,edx 
 
		add		edi,16		; x_ptr += 16 
		sub		esi,16		; h_ptr1 -= 16 
		sub		ebp, 1	 
		jne		h2_loop 
; end of h2_loop 
 
		mov		edi,[esp+16+16]		; &y 
		mov		dword ptr [edi],ecx	; y 
 
		pop		ebx 
		pop		esi 
		pop		edi 
		pop		ebp 
		ret 
	} 
} 
 
 
 
int sample_rate_conversion_HQ_16bit_mono( short int *src, long N_src, short int *dst, long N_dst,  
				short int *h ) 
{ 
	short int *src_ptr, *h_ptr1, *h_ptr2, *sZi_end; 
	long phase; 
	long i,j; 
	long mu; 
 
	phase = -N_src; 
	mu = -N_FILTERORDER; 
	sZi_end = sZi + N_FILTERORDER; 
 
	for (i=0;i= N_dst ) { 
			mu++;  
			phase = phase - N_dst; 
		} 
 
		phase_x_N_SUBFILTER = phase*N_SUBFILTER; 
		filter_nr = phase_x_N_SUBFILTER/N_dst + 1; 
		beta = ( ((phase_x_N_SUBFILTER%N_dst)<> MUL_ACC_SHR; 
						y_B += ( sZi_end[nu] * h_ptr1[-N_FILTERORDER2] ) >> MUL_ACC_SHR; 
					} else { 
						y_A += ( src[nu] * h_ptr1[0] ) >> MUL_ACC_SHR; 
						y_B += ( src[nu] * h_ptr1[-N_FILTERORDER2] ) >> MUL_ACC_SHR; 
					} 
					nu++; 
					h_ptr1++; 
				} while (--j); 
 
				j = N_FILTERORDER2; 
				do { 
					if (nu<0) { 
						y_A += ( sZi_end[nu] * h_ptr2[-1] ) >> MUL_ACC_SHR; 
						y_B += ( sZi_end[nu] * h_ptr2[-1+N_FILTERORDER2] ) >> MUL_ACC_SHR; 
					} else { 
						y_A += ( src[nu] * h_ptr2[-1] ) >> MUL_ACC_SHR; 
						y_B += ( src[nu] * h_ptr2[-1+N_FILTERORDER2] ) >> MUL_ACC_SHR; 
					} 
					nu++; 
					h_ptr2--; 
				} while (--j); 
 
			} else { 
				filter_mono( src_ptr, h_ptr1, h_ptr2, &y_A, (long)N_FILTERORDER16 ); 
				filter_mono( src_ptr, h_ptr1 - N_FILTERORDER2, h_ptr2 + N_FILTERORDER2, &y_B, (long)N_FILTERORDER16 ); 
			} 
 
		}  
		else 
		{ 
			h_ptr1 = h; 
			h_ptr2 = h + N_SUBFILTER * N_FILTERORDER2; 
			src_ptr = src + mu; 
 
			if (mu<0) { 
 
				long nu = mu; 
 
				j = N_FILTERORDER2; 
				do { 
					if (nu<-1) { 
						y_A += ( sZi_end[nu] * h_ptr1[0] ) >> MUL_ACC_SHR; 
						y_B += ( sZi_end[nu+1] * h_ptr1[(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR; 
					} else if (nu<0) { 
						y_A += ( sZi_end[-1] * h_ptr1[0] ) >> MUL_ACC_SHR; 
						y_B += ( src[0] * h_ptr1[(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR; 
					} else { 
						y_A += ( src[nu] * h_ptr1[0] ) >> MUL_ACC_SHR; 
						y_B += ( src[nu+1] * h_ptr1[(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR; 
					} 
					nu++; 
					h_ptr1++; 
				} while (--j); 
 
				j = N_FILTERORDER2; 
				do { 
					if (nu<-1) { 
						y_A += ( sZi_end[nu] * h_ptr2[-1] ) >> MUL_ACC_SHR; 
						y_B += ( sZi_end[nu+1] * h_ptr2[-1-(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR; 
					} else if (nu<0) { 
						y_A += ( sZi_end[-1] * h_ptr2[-1] ) >> MUL_ACC_SHR; 
						y_B += ( src[0] * h_ptr2[-1-(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR; 
					} else { 
						y_A += ( src[nu] * h_ptr2[-1] ) >> MUL_ACC_SHR; 
						y_B += ( src[nu+1] * h_ptr2[-1-(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR; 
					} 
					nu++; 
					h_ptr2--; 
				} while (--j); 
 
			} else { 
				filter_mono( src_ptr, h_ptr1, h_ptr2, &y_A, (long)N_FILTERORDER16 ); 
				filter_mono( src_ptr+1, h_ptr1+(N_SUBFILTER-1)*N_FILTERORDER2, h_ptr2-(N_SUBFILTER-1)*N_FILTERORDER2, &y_B, (long)N_FILTERORDER16 ); 
			} 
		} 
 
		long y_C = ( (y_A*alpha+y_B*beta+ROUNDING_OFFSET)>>(H_DIGITS-1-MUL_ACC_SHR+LINEAR_INTERPOLATION_DIGITS) ); 
 
		if (y_C>32767) 
			*(dst++) = 32767; 
		else if (y_C<-32768) 
			*(dst++) = -32768; 
		else *(dst++) = (short int)y_C; 
 
	} 
 
	memcpy( sZi, src+N_src-N_FILTERORDER, N_FILTERORDER*2 ); 
 
	return 0; 
} 
 
 
 
static void __declspec(naked) filter_stereo( long *x_ptr, short int *h_ptr1, short int *h_ptr2, long *yl_ptr, long *yr_ptr, long N_FO16 ) { 
	__asm { 
		push	ebp 
		push	edi 
		push	esi 
		push	ebx 
		push	0x00000000		; [esp+4] Speicher für yl 
		push	0x00000000		; [esp+0] Speicher für yr 
 
		mov		edi,[esp+4+24]		; x_ptr 
		mov		esi,[esp+8+24]		; h_ptr1 
		mov		ebp, [esp+24+24]	; N_FILTERORDER16 
 
h1_loop: 
		movsx	eax,word ptr [esi+0]	; h_ptr[0] 
		movsx	ebx,word ptr [edi+0]	; x_ptr[0] left  
		movsx	edx,word ptr [edi+2]	; x_ptr[0] right 
 
		imul	ebx,eax 
		imul	edx,eax 
		movsx	eax,word ptr [esi+2] 
		sar		ebx,MUL_ACC_SHR 
		sar		edx,MUL_ACC_SHR 
		add		[esp+4],ebx 
		movsx	ebx,word ptr [edi+4]  
		add		[esp+0],edx 
		movsx	edx,word ptr [edi+6] 
 
		imul	ebx,eax 
		imul	edx,eax 
		movsx	eax,word ptr [esi+4] 
		sar		ebx,MUL_ACC_SHR 
		sar		edx,MUL_ACC_SHR 
		add		[esp+4],ebx 
		movsx	ebx,word ptr [edi+8]  
		add		[esp+0],edx 
		movsx	edx,word ptr [edi+10] 
 
		imul	ebx,eax 
		imul	edx,eax 
		movsx	eax,word ptr [esi+6] 
		sar		ebx,MUL_ACC_SHR 
		sar		edx,MUL_ACC_SHR 
		add		[esp+4],ebx 
		movsx	ebx,word ptr [edi+12]  
		add		[esp+0],edx 
		movsx	edx,word ptr [edi+14] 
 
		imul	ebx,eax 
		imul	edx,eax 
		movsx	eax,word ptr [esi+8] 
		sar		ebx,MUL_ACC_SHR 
		sar		edx,MUL_ACC_SHR 
		add		[esp+4],ebx 
		movsx	ebx,word ptr [edi+16]  
		add		[esp+0],edx 
		movsx	edx,word ptr [edi+18] 
 
		imul	ebx,eax 
		imul	edx,eax 
		movsx	eax,word ptr [esi+10] 
		sar		ebx,MUL_ACC_SHR 
		sar		edx,MUL_ACC_SHR 
		add		[esp+4],ebx 
		movsx	ebx,word ptr [edi+20]  
		add		[esp+0],edx 
		movsx	edx,word ptr [edi+22] 
 
		imul	ebx,eax 
		imul	edx,eax 
		movsx	eax,word ptr [esi+12] 
		sar		ebx,MUL_ACC_SHR 
		sar		edx,MUL_ACC_SHR 
		add		[esp+4],ebx 
		movsx	ebx,word ptr [edi+24]  
		add		[esp+0],edx 
		movsx	edx,word ptr [edi+26] 
 
		imul	ebx,eax 
		imul	edx,eax 
		movsx	eax,word ptr [esi+14] 
		sar		ebx,MUL_ACC_SHR 
		sar		edx,MUL_ACC_SHR 
		add		[esp+4],ebx 
		movsx	ebx,word ptr [edi+28]  
		add		[esp+0],edx 
		movsx	edx,word ptr [edi+30] 
 
		imul	ebx,eax 
		imul	edx,eax 
		sar		ebx,MUL_ACC_SHR 
		sar		edx,MUL_ACC_SHR 
		add		[esp+4],ebx 
		add		[esp+0],edx 
 
 
		add		edi,32 
		add		esi,16 
		sub		ebp, 1	 
		jne		h1_loop 
; end of h1_loop 
 
 
		mov		esi,[esp+12+24]		; h_ptr2 
		mov		ebp, [esp+24+24]	; N_FILTERORDER16 
h2_loop: 
		movsx	eax,word ptr [esi-2] 
		movsx	ebx,word ptr [edi+0]  
		movsx	edx,word ptr [edi+2] 
 
		imul	ebx,eax 
		imul	edx,eax 
		movsx	eax,word ptr [esi-4] 
		sar		ebx,MUL_ACC_SHR 
		sar		edx,MUL_ACC_SHR 
		add		[esp+4],ebx 
		movsx	ebx,word ptr [edi+4]  
		add		[esp+0],edx 
		movsx	edx,word ptr [edi+6] 
 
		imul	ebx,eax 
		imul	edx,eax 
		movsx	eax,word ptr [esi-6] 
		sar		ebx,MUL_ACC_SHR 
		sar		edx,MUL_ACC_SHR 
		add		[esp+4],ebx 
		movsx	ebx,word ptr [edi+8]  
		add		[esp+0],edx 
		movsx	edx,word ptr [edi+10] 
 
		imul	ebx,eax 
		imul	edx,eax 
		movsx	eax,word ptr [esi-8] 
		sar		ebx,MUL_ACC_SHR 
		sar		edx,MUL_ACC_SHR 
		add		[esp+4],ebx 
		movsx	ebx,word ptr [edi+12]  
		add		[esp+0],edx 
		movsx	edx,word ptr [edi+14] 
 
		imul	ebx,eax 
		imul	edx,eax 
		movsx	eax,word ptr [esi-10] 
		sar		ebx,MUL_ACC_SHR 
		sar		edx,MUL_ACC_SHR 
		add		[esp+4],ebx 
		movsx	ebx,word ptr [edi+16]  
		add		[esp+0],edx 
		movsx	edx,word ptr [edi+18] 
 
		imul	ebx,eax 
		imul	edx,eax 
		movsx	eax,word ptr [esi-12] 
		sar		ebx,MUL_ACC_SHR 
		sar		edx,MUL_ACC_SHR 
		add		[esp+4],ebx 
		movsx	ebx,word ptr [edi+20]  
		add		[esp+0],edx 
		movsx	edx,word ptr [edi+22] 
 
		imul	ebx,eax 
		imul	edx,eax 
		movsx	eax,word ptr [esi-14] 
		sar		ebx,MUL_ACC_SHR 
		sar		edx,MUL_ACC_SHR 
		add		[esp+4],ebx 
		movsx	ebx,word ptr [edi+24]  
		add		[esp+0],edx 
		movsx	edx,word ptr [edi+26] 
 
		imul	ebx,eax 
		imul	edx,eax 
		movsx	eax,word ptr [esi-16] 
		sar		ebx,MUL_ACC_SHR 
		sar		edx,MUL_ACC_SHR 
		add		[esp+4],ebx 
		movsx	ebx,word ptr [edi+28]  
		add		[esp+0],edx 
		movsx	edx,word ptr [edi+30] 
 
		imul	ebx,eax 
		imul	edx,eax 
		sar		ebx,MUL_ACC_SHR 
		sar		edx,MUL_ACC_SHR 
		add		[esp+4],ebx 
		add		[esp+0],edx 
 
		 
		add		edi,32 
		sub		esi,16 
		sub		ebp, 1	 
		jne		h2_loop 
; end of h2_loop 
 
		mov		edi,[esp+16+24]		; &yl 
		mov		eax, [esp+4] 
		add		[edi],eax 
 
		mov		edi,[esp+20+24]		; &yr 
		mov		eax, [esp+0] 
		add		[edi],eax 
		 
		pop		eax 
		pop		eax 
		pop		ebx 
		pop		esi 
		pop		edi 
		pop		ebp 
		ret 
	} 
} 
 
 
 
 
 
 
 
 
 
int sample_rate_conversion_HQ_16bit_stereo( long *src, long N_src, long *dst, long N_dst,  
				short int *h ) 
{ 
	long *lZi_end; 
	short int *h_ptr1, *h_ptr2; 
	long phase; 
	long i,j; 
	long mu; 
 
	phase = -N_src; 
	mu = -N_FILTERORDER; 
	lZi_end = lZi + N_FILTERORDER; 
 
	for (i=0;i= N_dst ) { 
			mu++;  
			phase = phase - N_dst; 
		} 
 
		phase_x_N_SUBFILTER = phase*N_SUBFILTER; 
		filter_nr = phase_x_N_SUBFILTER/N_dst + 1; 
		beta = ( ((phase_x_N_SUBFILTER%N_dst)<> MUL_ACC_SHR; 
						yr_A += ( siZi_end[2*nu+1] * h_ptr1[0] ) >> MUL_ACC_SHR; 
						yl_B += ( siZi_end[2*nu] * h_ptr1[-N_FILTERORDER2] ) >> MUL_ACC_SHR; 
						yr_B += ( siZi_end[2*nu+1] * h_ptr1[-N_FILTERORDER2] ) >> MUL_ACC_SHR; 
					} else { 
						yl_A += ( sisrc[2*nu] * h_ptr1[0] ) >> MUL_ACC_SHR; 
						yr_A += ( sisrc[2*nu+1] * h_ptr1[0] ) >> MUL_ACC_SHR; 
						yl_B += ( sisrc[2*nu] * h_ptr1[-N_FILTERORDER2] ) >> MUL_ACC_SHR; 
						yr_B += ( sisrc[2*nu+1] * h_ptr1[-N_FILTERORDER2] ) >> MUL_ACC_SHR; 
					} 
					nu++; 
					h_ptr1++; 
				} while (--j); 
 
				j = N_FILTERORDER2; 
				do { 
					if (nu<0) { 
						yl_A += ( siZi_end[2*nu] * h_ptr2[-1] ) >> MUL_ACC_SHR; 
						yr_A += ( siZi_end[2*nu+1] * h_ptr2[-1] ) >> MUL_ACC_SHR; 
						yl_B += ( siZi_end[2*nu] * h_ptr2[-1+N_FILTERORDER2] ) >> MUL_ACC_SHR; 
						yr_B += ( siZi_end[2*nu+1] * h_ptr2[-1+N_FILTERORDER2] ) >> MUL_ACC_SHR; 
					} else { 
						yl_A += ( sisrc[2*nu] * h_ptr2[-1] ) >> MUL_ACC_SHR; 
						yr_A += ( sisrc[2*nu+1] * h_ptr2[-1] ) >> MUL_ACC_SHR; 
						yl_B += ( sisrc[2*nu] * h_ptr2[-1+N_FILTERORDER2] ) >> MUL_ACC_SHR; 
						yr_B += ( sisrc[2*nu+1] * h_ptr2[-1+N_FILTERORDER2] ) >> MUL_ACC_SHR; 
					} 
					nu++; 
					h_ptr2--; 
				} while (--j); 
 
 
			} else { 
				filter_stereo( src+mu, h_ptr1, h_ptr2, &yl_A, &yr_A, (long)N_FILTERORDER16 ); 
				filter_stereo( src+mu, h_ptr1 - N_FILTERORDER2, h_ptr2 + N_FILTERORDER2, &yl_B, &yr_B, (long)N_FILTERORDER16 ); 
			} 
 
		}  
		else 
		{ 
			h_ptr1 = h; 
			h_ptr2 = h + N_SUBFILTER * N_FILTERORDER2; 
 
			if (mu<0) { 
 
				long nu = mu; 
 
				j = N_FILTERORDER2; 
				do { 
					if (nu<-1) { 
						yl_A += ( siZi_end[2*nu] * h_ptr1[0] ) >> MUL_ACC_SHR; 
						yr_A += ( siZi_end[2*nu+1] * h_ptr1[0] ) >> MUL_ACC_SHR; 
						yl_B += ( siZi_end[2*nu+2] * h_ptr1[(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR; 
						yr_B += ( siZi_end[2*nu+3] * h_ptr1[(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR; 
					} else if (nu<0) { 
						yl_A += ( siZi_end[-2] * h_ptr1[0] ) >> MUL_ACC_SHR; 
						yr_A += ( siZi_end[-1] * h_ptr1[0] ) >> MUL_ACC_SHR; 
						yl_B += ( sisrc[0] * h_ptr1[(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR; 
						yr_B += ( sisrc[1] * h_ptr1[(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR; 
					} else { 
						yl_A += ( sisrc[2*nu] * h_ptr1[0] ) >> MUL_ACC_SHR; 
						yr_A += ( sisrc[2*nu+1] * h_ptr1[0] ) >> MUL_ACC_SHR; 
						yl_B += ( sisrc[2*nu+2] * h_ptr1[(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR; 
						yr_B += ( sisrc[2*nu+3] * h_ptr1[(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR; 
					} 
					nu++; 
					h_ptr1++; 
				} while (--j); 
 
				j = N_FILTERORDER2; 
				do { 
					if (nu<-1) { 
						yl_A += ( siZi_end[2*nu] * h_ptr2[-1] ) >> MUL_ACC_SHR; 
						yr_A += ( siZi_end[2*nu+1] * h_ptr2[-1] ) >> MUL_ACC_SHR; 
						yl_B += ( siZi_end[2*nu+2] * h_ptr2[-1-(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR; 
						yr_B += ( siZi_end[2*nu+3] * h_ptr2[-1-(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR; 
					} else if (nu<0) { 
						yl_A += ( siZi_end[-2] * h_ptr2[-1] ) >> MUL_ACC_SHR; 
						yr_A += ( siZi_end[-1] * h_ptr2[-1] ) >> MUL_ACC_SHR; 
						yl_B += ( sisrc[0] * h_ptr2[-1-(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR; 
						yr_B += ( sisrc[1] * h_ptr2[-1-(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR; 
					} else { 
						yl_A += ( sisrc[2*nu] * h_ptr2[-1] ) >> MUL_ACC_SHR; 
						yr_A += ( sisrc[2*nu+1] * h_ptr2[-1] ) >> MUL_ACC_SHR; 
						yl_B += ( sisrc[2*nu+2] * h_ptr2[-1-(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR; 
						yr_B += ( sisrc[2*nu+3] * h_ptr2[-1-(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR; 
					} 
					nu++; 
					h_ptr2--; 
				} while (--j); 
 
			} else { 
				filter_stereo( src+mu, h_ptr1, h_ptr2, &yl_A, &yr_A, (long)N_FILTERORDER16 ); 
				filter_stereo( src+mu+1, h_ptr1+(N_SUBFILTER-1)*N_FILTERORDER2, h_ptr2-(N_SUBFILTER-1)*N_FILTERORDER2, &yl_B, &yr_B, (long)N_FILTERORDER16 ); 
			} 
		} 
		 
		long yl = ( (yl_A*alpha+yl_B*beta+ROUNDING_OFFSET)>>(H_DIGITS-1-MUL_ACC_SHR+LINEAR_INTERPOLATION_DIGITS) ); 
		long yr = ( (yr_A*alpha+yr_B*beta+ROUNDING_OFFSET)>>(H_DIGITS-1-MUL_ACC_SHR+LINEAR_INTERPOLATION_DIGITS) ); 
 
		if (yl>32767) 
			sidst[0] = 32767; 
		else if (yl<-32768) 
			sidst[0] = -32768; 
		else sidst[0] = (short int)yl; 
 
		if (yr>32767) 
			sidst[1] = 32767; 
		else if (yr<-32768) 
			sidst[1] = -32768; 
		else sidst[1] = (short int)yr; 
 
	} 
 
	memcpy( lZi, src+N_src-N_FILTERORDER, N_FILTERORDER*4 ); 
 
	return 0; 
}