www.pudn.com > VirtualVCR-src-v2.6.9.zip > src_filter_asm.cpp
// SRC_Filter - High Quality Audio Sample-Rate-Conversion
// Copyright (C) 2001 Andreas Dittrich
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
/******************************************************************************/
/* */
/* src_filter_asm.cpp */
/* */
/******************************************************************************/
/* Author(s) : Andreas Dittrich */
/* Project start: 06.11.2001 */
/* Completed : 28.11.2001 */
/* Last change : 07.03.2002 */
/* Time : 00:45 */
/******************************************************************************/
/* Changes : I */
/******************************************************************************/
/******************************************************************************/
/* Last change : 07.03.2002 */
/* Time : 00:45 */
/* By : Andreas Dittrich */
/* Description: Bug conversion long to short corrected in mono and stereo */
/******************************************************************************/
#include "src_filter_asm.h"
static long lZi[N_FILTERORDER];
static short sZi[N_FILTERORDER];
static void __declspec(naked) filter_mono( short int *x_ptr, short int *h_ptr1, short int *h_ptr2, long *y_ptr, long N_FO16 ) {
__asm {
push ebp
push edi
push esi
push ebx
mov edi,[esp+16+16] ; &y
mov ecx,dword ptr [edi] ; y
mov edi,[esp+4+16] ; x_ptr
mov esi,[esp+8+16] ; h_ptr1
mov ebp, [esp+20+16] ; N_FILTERORDER16
h1_loop:
movsx eax,word ptr [esi] ; h_ptr[0]
movsx ebx,word ptr [edi] ; x_ptr[0]
imul ebx,eax
movsx eax,word ptr [edi+2]
movsx edx,word ptr [esi+2]
imul edx,eax
add edx,ebx
sar edx,MUL_ACC_SHR
add ecx,edx
movsx eax,word ptr [edi+4]
movsx ebx,word ptr [esi+4]
imul ebx,eax
movsx eax,word ptr [edi+6]
movsx edx,word ptr [esi+6]
imul edx,eax
add edx,ebx
sar edx,MUL_ACC_SHR
add ecx,edx
movsx eax,word ptr [edi+8]
movsx ebx,word ptr [esi+8]
imul ebx,eax
movsx eax,word ptr [edi+10]
movsx edx,word ptr [esi+10]
imul edx,eax
add edx,ebx
sar edx,MUL_ACC_SHR
add ecx,edx
movsx eax,word ptr [edi+12]
movsx ebx,word ptr [esi+12]
imul ebx,eax
movsx eax,word ptr [edi+14]
movsx edx,word ptr [esi+14]
imul edx,eax
add edx,ebx
sar edx,MUL_ACC_SHR
add ecx,edx
add edi,16 ; x_ptr += 8
add esi,16 ; h_ptr1 += 8
sub ebp, 1
jne h1_loop
; end of h1_loop
mov esi,[esp+12+16] ; h_ptr1
mov ebp, [esp+20+16] ; N_FILTERORDER16
h2_loop:
movsx eax,word ptr [esi-2] ; h_ptr[0]
movsx ebx,word ptr [edi] ; x_ptr[0]
imul ebx,eax
movsx eax,word ptr [edi+2]
movsx edx,word ptr [esi-4]
imul edx,eax
add edx,ebx
sar edx,MUL_ACC_SHR
add ecx,edx
movsx eax,word ptr [edi+4]
movsx ebx,word ptr [esi-6]
imul ebx,eax
movsx eax,word ptr [edi+6]
movsx edx,word ptr [esi-8]
imul edx,eax
add edx,ebx
sar edx,MUL_ACC_SHR
add ecx,edx
movsx eax,word ptr [edi+8]
movsx ebx,word ptr [esi-10]
imul ebx,eax
movsx eax,word ptr [edi+10]
movsx edx,word ptr [esi-12]
imul edx,eax
add edx,ebx
sar edx,MUL_ACC_SHR
add ecx,edx
movsx eax,word ptr [edi+12]
movsx ebx,word ptr [esi-14]
imul ebx,eax
movsx eax,word ptr [edi+14]
movsx edx,word ptr [esi-16]
imul edx,eax
add edx,ebx
sar edx,MUL_ACC_SHR
add ecx,edx
add edi,16 ; x_ptr += 16
sub esi,16 ; h_ptr1 -= 16
sub ebp, 1
jne h2_loop
; end of h2_loop
mov edi,[esp+16+16] ; &y
mov dword ptr [edi],ecx ; y
pop ebx
pop esi
pop edi
pop ebp
ret
}
}
int sample_rate_conversion_HQ_16bit_mono( short int *src, long N_src, short int *dst, long N_dst,
short int *h )
{
short int *src_ptr, *h_ptr1, *h_ptr2, *sZi_end;
long phase;
long i,j;
long mu;
phase = -N_src;
mu = -N_FILTERORDER;
sZi_end = sZi + N_FILTERORDER;
for (i=0;i= N_dst ) {
mu++;
phase = phase - N_dst;
}
phase_x_N_SUBFILTER = phase*N_SUBFILTER;
filter_nr = phase_x_N_SUBFILTER/N_dst + 1;
beta = ( ((phase_x_N_SUBFILTER%N_dst)<> MUL_ACC_SHR;
y_B += ( sZi_end[nu] * h_ptr1[-N_FILTERORDER2] ) >> MUL_ACC_SHR;
} else {
y_A += ( src[nu] * h_ptr1[0] ) >> MUL_ACC_SHR;
y_B += ( src[nu] * h_ptr1[-N_FILTERORDER2] ) >> MUL_ACC_SHR;
}
nu++;
h_ptr1++;
} while (--j);
j = N_FILTERORDER2;
do {
if (nu<0) {
y_A += ( sZi_end[nu] * h_ptr2[-1] ) >> MUL_ACC_SHR;
y_B += ( sZi_end[nu] * h_ptr2[-1+N_FILTERORDER2] ) >> MUL_ACC_SHR;
} else {
y_A += ( src[nu] * h_ptr2[-1] ) >> MUL_ACC_SHR;
y_B += ( src[nu] * h_ptr2[-1+N_FILTERORDER2] ) >> MUL_ACC_SHR;
}
nu++;
h_ptr2--;
} while (--j);
} else {
filter_mono( src_ptr, h_ptr1, h_ptr2, &y_A, (long)N_FILTERORDER16 );
filter_mono( src_ptr, h_ptr1 - N_FILTERORDER2, h_ptr2 + N_FILTERORDER2, &y_B, (long)N_FILTERORDER16 );
}
}
else
{
h_ptr1 = h;
h_ptr2 = h + N_SUBFILTER * N_FILTERORDER2;
src_ptr = src + mu;
if (mu<0) {
long nu = mu;
j = N_FILTERORDER2;
do {
if (nu<-1) {
y_A += ( sZi_end[nu] * h_ptr1[0] ) >> MUL_ACC_SHR;
y_B += ( sZi_end[nu+1] * h_ptr1[(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR;
} else if (nu<0) {
y_A += ( sZi_end[-1] * h_ptr1[0] ) >> MUL_ACC_SHR;
y_B += ( src[0] * h_ptr1[(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR;
} else {
y_A += ( src[nu] * h_ptr1[0] ) >> MUL_ACC_SHR;
y_B += ( src[nu+1] * h_ptr1[(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR;
}
nu++;
h_ptr1++;
} while (--j);
j = N_FILTERORDER2;
do {
if (nu<-1) {
y_A += ( sZi_end[nu] * h_ptr2[-1] ) >> MUL_ACC_SHR;
y_B += ( sZi_end[nu+1] * h_ptr2[-1-(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR;
} else if (nu<0) {
y_A += ( sZi_end[-1] * h_ptr2[-1] ) >> MUL_ACC_SHR;
y_B += ( src[0] * h_ptr2[-1-(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR;
} else {
y_A += ( src[nu] * h_ptr2[-1] ) >> MUL_ACC_SHR;
y_B += ( src[nu+1] * h_ptr2[-1-(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR;
}
nu++;
h_ptr2--;
} while (--j);
} else {
filter_mono( src_ptr, h_ptr1, h_ptr2, &y_A, (long)N_FILTERORDER16 );
filter_mono( src_ptr+1, h_ptr1+(N_SUBFILTER-1)*N_FILTERORDER2, h_ptr2-(N_SUBFILTER-1)*N_FILTERORDER2, &y_B, (long)N_FILTERORDER16 );
}
}
long y_C = ( (y_A*alpha+y_B*beta+ROUNDING_OFFSET)>>(H_DIGITS-1-MUL_ACC_SHR+LINEAR_INTERPOLATION_DIGITS) );
if (y_C>32767)
*(dst++) = 32767;
else if (y_C<-32768)
*(dst++) = -32768;
else *(dst++) = (short int)y_C;
}
memcpy( sZi, src+N_src-N_FILTERORDER, N_FILTERORDER*2 );
return 0;
}
static void __declspec(naked) filter_stereo( long *x_ptr, short int *h_ptr1, short int *h_ptr2, long *yl_ptr, long *yr_ptr, long N_FO16 ) {
__asm {
push ebp
push edi
push esi
push ebx
push 0x00000000 ; [esp+4] Speicher für yl
push 0x00000000 ; [esp+0] Speicher für yr
mov edi,[esp+4+24] ; x_ptr
mov esi,[esp+8+24] ; h_ptr1
mov ebp, [esp+24+24] ; N_FILTERORDER16
h1_loop:
movsx eax,word ptr [esi+0] ; h_ptr[0]
movsx ebx,word ptr [edi+0] ; x_ptr[0] left
movsx edx,word ptr [edi+2] ; x_ptr[0] right
imul ebx,eax
imul edx,eax
movsx eax,word ptr [esi+2]
sar ebx,MUL_ACC_SHR
sar edx,MUL_ACC_SHR
add [esp+4],ebx
movsx ebx,word ptr [edi+4]
add [esp+0],edx
movsx edx,word ptr [edi+6]
imul ebx,eax
imul edx,eax
movsx eax,word ptr [esi+4]
sar ebx,MUL_ACC_SHR
sar edx,MUL_ACC_SHR
add [esp+4],ebx
movsx ebx,word ptr [edi+8]
add [esp+0],edx
movsx edx,word ptr [edi+10]
imul ebx,eax
imul edx,eax
movsx eax,word ptr [esi+6]
sar ebx,MUL_ACC_SHR
sar edx,MUL_ACC_SHR
add [esp+4],ebx
movsx ebx,word ptr [edi+12]
add [esp+0],edx
movsx edx,word ptr [edi+14]
imul ebx,eax
imul edx,eax
movsx eax,word ptr [esi+8]
sar ebx,MUL_ACC_SHR
sar edx,MUL_ACC_SHR
add [esp+4],ebx
movsx ebx,word ptr [edi+16]
add [esp+0],edx
movsx edx,word ptr [edi+18]
imul ebx,eax
imul edx,eax
movsx eax,word ptr [esi+10]
sar ebx,MUL_ACC_SHR
sar edx,MUL_ACC_SHR
add [esp+4],ebx
movsx ebx,word ptr [edi+20]
add [esp+0],edx
movsx edx,word ptr [edi+22]
imul ebx,eax
imul edx,eax
movsx eax,word ptr [esi+12]
sar ebx,MUL_ACC_SHR
sar edx,MUL_ACC_SHR
add [esp+4],ebx
movsx ebx,word ptr [edi+24]
add [esp+0],edx
movsx edx,word ptr [edi+26]
imul ebx,eax
imul edx,eax
movsx eax,word ptr [esi+14]
sar ebx,MUL_ACC_SHR
sar edx,MUL_ACC_SHR
add [esp+4],ebx
movsx ebx,word ptr [edi+28]
add [esp+0],edx
movsx edx,word ptr [edi+30]
imul ebx,eax
imul edx,eax
sar ebx,MUL_ACC_SHR
sar edx,MUL_ACC_SHR
add [esp+4],ebx
add [esp+0],edx
add edi,32
add esi,16
sub ebp, 1
jne h1_loop
; end of h1_loop
mov esi,[esp+12+24] ; h_ptr2
mov ebp, [esp+24+24] ; N_FILTERORDER16
h2_loop:
movsx eax,word ptr [esi-2]
movsx ebx,word ptr [edi+0]
movsx edx,word ptr [edi+2]
imul ebx,eax
imul edx,eax
movsx eax,word ptr [esi-4]
sar ebx,MUL_ACC_SHR
sar edx,MUL_ACC_SHR
add [esp+4],ebx
movsx ebx,word ptr [edi+4]
add [esp+0],edx
movsx edx,word ptr [edi+6]
imul ebx,eax
imul edx,eax
movsx eax,word ptr [esi-6]
sar ebx,MUL_ACC_SHR
sar edx,MUL_ACC_SHR
add [esp+4],ebx
movsx ebx,word ptr [edi+8]
add [esp+0],edx
movsx edx,word ptr [edi+10]
imul ebx,eax
imul edx,eax
movsx eax,word ptr [esi-8]
sar ebx,MUL_ACC_SHR
sar edx,MUL_ACC_SHR
add [esp+4],ebx
movsx ebx,word ptr [edi+12]
add [esp+0],edx
movsx edx,word ptr [edi+14]
imul ebx,eax
imul edx,eax
movsx eax,word ptr [esi-10]
sar ebx,MUL_ACC_SHR
sar edx,MUL_ACC_SHR
add [esp+4],ebx
movsx ebx,word ptr [edi+16]
add [esp+0],edx
movsx edx,word ptr [edi+18]
imul ebx,eax
imul edx,eax
movsx eax,word ptr [esi-12]
sar ebx,MUL_ACC_SHR
sar edx,MUL_ACC_SHR
add [esp+4],ebx
movsx ebx,word ptr [edi+20]
add [esp+0],edx
movsx edx,word ptr [edi+22]
imul ebx,eax
imul edx,eax
movsx eax,word ptr [esi-14]
sar ebx,MUL_ACC_SHR
sar edx,MUL_ACC_SHR
add [esp+4],ebx
movsx ebx,word ptr [edi+24]
add [esp+0],edx
movsx edx,word ptr [edi+26]
imul ebx,eax
imul edx,eax
movsx eax,word ptr [esi-16]
sar ebx,MUL_ACC_SHR
sar edx,MUL_ACC_SHR
add [esp+4],ebx
movsx ebx,word ptr [edi+28]
add [esp+0],edx
movsx edx,word ptr [edi+30]
imul ebx,eax
imul edx,eax
sar ebx,MUL_ACC_SHR
sar edx,MUL_ACC_SHR
add [esp+4],ebx
add [esp+0],edx
add edi,32
sub esi,16
sub ebp, 1
jne h2_loop
; end of h2_loop
mov edi,[esp+16+24] ; &yl
mov eax, [esp+4]
add [edi],eax
mov edi,[esp+20+24] ; &yr
mov eax, [esp+0]
add [edi],eax
pop eax
pop eax
pop ebx
pop esi
pop edi
pop ebp
ret
}
}
int sample_rate_conversion_HQ_16bit_stereo( long *src, long N_src, long *dst, long N_dst,
short int *h )
{
long *lZi_end;
short int *h_ptr1, *h_ptr2;
long phase;
long i,j;
long mu;
phase = -N_src;
mu = -N_FILTERORDER;
lZi_end = lZi + N_FILTERORDER;
for (i=0;i= N_dst ) {
mu++;
phase = phase - N_dst;
}
phase_x_N_SUBFILTER = phase*N_SUBFILTER;
filter_nr = phase_x_N_SUBFILTER/N_dst + 1;
beta = ( ((phase_x_N_SUBFILTER%N_dst)<> MUL_ACC_SHR;
yr_A += ( siZi_end[2*nu+1] * h_ptr1[0] ) >> MUL_ACC_SHR;
yl_B += ( siZi_end[2*nu] * h_ptr1[-N_FILTERORDER2] ) >> MUL_ACC_SHR;
yr_B += ( siZi_end[2*nu+1] * h_ptr1[-N_FILTERORDER2] ) >> MUL_ACC_SHR;
} else {
yl_A += ( sisrc[2*nu] * h_ptr1[0] ) >> MUL_ACC_SHR;
yr_A += ( sisrc[2*nu+1] * h_ptr1[0] ) >> MUL_ACC_SHR;
yl_B += ( sisrc[2*nu] * h_ptr1[-N_FILTERORDER2] ) >> MUL_ACC_SHR;
yr_B += ( sisrc[2*nu+1] * h_ptr1[-N_FILTERORDER2] ) >> MUL_ACC_SHR;
}
nu++;
h_ptr1++;
} while (--j);
j = N_FILTERORDER2;
do {
if (nu<0) {
yl_A += ( siZi_end[2*nu] * h_ptr2[-1] ) >> MUL_ACC_SHR;
yr_A += ( siZi_end[2*nu+1] * h_ptr2[-1] ) >> MUL_ACC_SHR;
yl_B += ( siZi_end[2*nu] * h_ptr2[-1+N_FILTERORDER2] ) >> MUL_ACC_SHR;
yr_B += ( siZi_end[2*nu+1] * h_ptr2[-1+N_FILTERORDER2] ) >> MUL_ACC_SHR;
} else {
yl_A += ( sisrc[2*nu] * h_ptr2[-1] ) >> MUL_ACC_SHR;
yr_A += ( sisrc[2*nu+1] * h_ptr2[-1] ) >> MUL_ACC_SHR;
yl_B += ( sisrc[2*nu] * h_ptr2[-1+N_FILTERORDER2] ) >> MUL_ACC_SHR;
yr_B += ( sisrc[2*nu+1] * h_ptr2[-1+N_FILTERORDER2] ) >> MUL_ACC_SHR;
}
nu++;
h_ptr2--;
} while (--j);
} else {
filter_stereo( src+mu, h_ptr1, h_ptr2, &yl_A, &yr_A, (long)N_FILTERORDER16 );
filter_stereo( src+mu, h_ptr1 - N_FILTERORDER2, h_ptr2 + N_FILTERORDER2, &yl_B, &yr_B, (long)N_FILTERORDER16 );
}
}
else
{
h_ptr1 = h;
h_ptr2 = h + N_SUBFILTER * N_FILTERORDER2;
if (mu<0) {
long nu = mu;
j = N_FILTERORDER2;
do {
if (nu<-1) {
yl_A += ( siZi_end[2*nu] * h_ptr1[0] ) >> MUL_ACC_SHR;
yr_A += ( siZi_end[2*nu+1] * h_ptr1[0] ) >> MUL_ACC_SHR;
yl_B += ( siZi_end[2*nu+2] * h_ptr1[(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR;
yr_B += ( siZi_end[2*nu+3] * h_ptr1[(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR;
} else if (nu<0) {
yl_A += ( siZi_end[-2] * h_ptr1[0] ) >> MUL_ACC_SHR;
yr_A += ( siZi_end[-1] * h_ptr1[0] ) >> MUL_ACC_SHR;
yl_B += ( sisrc[0] * h_ptr1[(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR;
yr_B += ( sisrc[1] * h_ptr1[(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR;
} else {
yl_A += ( sisrc[2*nu] * h_ptr1[0] ) >> MUL_ACC_SHR;
yr_A += ( sisrc[2*nu+1] * h_ptr1[0] ) >> MUL_ACC_SHR;
yl_B += ( sisrc[2*nu+2] * h_ptr1[(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR;
yr_B += ( sisrc[2*nu+3] * h_ptr1[(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR;
}
nu++;
h_ptr1++;
} while (--j);
j = N_FILTERORDER2;
do {
if (nu<-1) {
yl_A += ( siZi_end[2*nu] * h_ptr2[-1] ) >> MUL_ACC_SHR;
yr_A += ( siZi_end[2*nu+1] * h_ptr2[-1] ) >> MUL_ACC_SHR;
yl_B += ( siZi_end[2*nu+2] * h_ptr2[-1-(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR;
yr_B += ( siZi_end[2*nu+3] * h_ptr2[-1-(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR;
} else if (nu<0) {
yl_A += ( siZi_end[-2] * h_ptr2[-1] ) >> MUL_ACC_SHR;
yr_A += ( siZi_end[-1] * h_ptr2[-1] ) >> MUL_ACC_SHR;
yl_B += ( sisrc[0] * h_ptr2[-1-(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR;
yr_B += ( sisrc[1] * h_ptr2[-1-(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR;
} else {
yl_A += ( sisrc[2*nu] * h_ptr2[-1] ) >> MUL_ACC_SHR;
yr_A += ( sisrc[2*nu+1] * h_ptr2[-1] ) >> MUL_ACC_SHR;
yl_B += ( sisrc[2*nu+2] * h_ptr2[-1-(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR;
yr_B += ( sisrc[2*nu+3] * h_ptr2[-1-(N_SUBFILTER-1)*N_FILTERORDER2] ) >> MUL_ACC_SHR;
}
nu++;
h_ptr2--;
} while (--j);
} else {
filter_stereo( src+mu, h_ptr1, h_ptr2, &yl_A, &yr_A, (long)N_FILTERORDER16 );
filter_stereo( src+mu+1, h_ptr1+(N_SUBFILTER-1)*N_FILTERORDER2, h_ptr2-(N_SUBFILTER-1)*N_FILTERORDER2, &yl_B, &yr_B, (long)N_FILTERORDER16 );
}
}
long yl = ( (yl_A*alpha+yl_B*beta+ROUNDING_OFFSET)>>(H_DIGITS-1-MUL_ACC_SHR+LINEAR_INTERPOLATION_DIGITS) );
long yr = ( (yr_A*alpha+yr_B*beta+ROUNDING_OFFSET)>>(H_DIGITS-1-MUL_ACC_SHR+LINEAR_INTERPOLATION_DIGITS) );
if (yl>32767)
sidst[0] = 32767;
else if (yl<-32768)
sidst[0] = -32768;
else sidst[0] = (short int)yl;
if (yr>32767)
sidst[1] = 32767;
else if (yr<-32768)
sidst[1] = -32768;
else sidst[1] = (short int)yr;
}
memcpy( lZi, src+N_src-N_FILTERORDER, N_FILTERORDER*4 );
return 0;
}