www.pudn.com > mediator15src.zip > dct64.c
/*
* Discrete Cosine Tansform (DCT) for subband synthesis
*
* -funroll-loops (for gcc) will remove the loops for better performance
* using loops in the source-code enhances readabillity
*/
/*
* TODO: write an optimized version for the down-sampling modes
* (in these modes the bands 16-31 (2:1) or 8-31 (4:1) are zero
*/
#include "mpg123.h"
void dct64(float *out0, float *out1, float *samples)
{
float bufs[64];
{
register int i, j;
register float *b1, *b2, *bs, *costab;
b1 = samples;
bs = bufs;
costab = pnts[0] + 16;
b2 = b1 + 32;
for(i = 15; i >= 0; i--)
*bs++ = (*b1++ + *--b2);
for(i = 15; i >= 0; i--)
*bs++ = (*--b2 - *b1++) * *--costab;
b1 = bufs;
costab = pnts[1] + 8;
b2 = b1 + 16;
{
for(i = 7; i >= 0; i--)
*bs++ = (*b1++ + *--b2);
for(i = 7; i >= 0; i--)
*bs++ = (*--b2 - *b1++) * *--costab;
b2 += 32;
costab += 8;
for(i = 7; i >= 0; i--)
*bs++ = (*b1++ + *--b2);
for(i = 7; i >= 0; i--)
*bs++ = (*b1++ - *--b2) * *--costab;
b2 += 32;
}
bs = bufs;
costab = pnts[2];
b2 = b1 + 8;
for (j = 2; j ; j--)
{
for (i = 3; i >= 0; i--)
*bs++ = (*b1++ + *--b2);
for (i = 3; i >= 0; i--)
*bs++ = (*--b2 - *b1++) * costab[i];
b2 += 16;
for (i = 3; i >= 0; i--)
*bs++ = (*b1++ + *--b2);
for (i = 3; i >= 0; i--)
*bs++ = (*b1++ - *--b2) * costab[i];
b2 += 16;
}
b1 = bufs;
costab = pnts[3];
b2 = b1 + 4;
for (j = 4; j ; j--)
{
*bs++ = (*b1++ + *--b2);
*bs++ = (*b1++ + *--b2);
*bs++ = (*--b2 - *b1++) * costab[1];
*bs++ = (*--b2 - *b1++) * costab[0];
b2 += 8;
*bs++ = (*b1++ + *--b2);
*bs++ = (*b1++ + *--b2);
*bs++ = (*b1++ - *--b2) * costab[1];
*bs++ = (*b1++ - *--b2) * costab[0];
b2 += 8;
}
bs = bufs;
costab = pnts[4];
for (j = 8; j; j--)
{
float v0, v1;
v0 = *b1++; v1 = *b1++;
*bs++ = (v0 + v1);
*bs++ = (v0 - v1) * (*costab);
v0 = *b1++; v1 = *b1++;
*bs++ = (v0 + v1);
*bs++ = (v1 - v0) * (*costab);
}
}
{
register float *b1;
register int i;
for (b1 = bufs, i = 8; i; i--, b1 += 4)
b1[2] += b1[3];
for (b1 = bufs, i = 4; i; i--, b1 += 8)
{
b1[4] += b1[6];
b1[6] += b1[5];
b1[5] += b1[7];
}
for (b1 = bufs, i = 2; i; i--, b1 += 16)
{
b1[8] += b1[12];
b1[12] += b1[10];
b1[10] += b1[14];
b1[14] += b1[9];
b1[9] += b1[13];
b1[13] += b1[11];
b1[11] += b1[15];
}
}
out0[0x10*16] = bufs[0];
out0[0x10*15] = bufs[16+0] + bufs[16+8];
out0[0x10*14] = bufs[8];
out0[0x10*13] = bufs[16+8] + bufs[16+4];
out0[0x10*12] = bufs[4];
out0[0x10*11] = bufs[16+4] + bufs[16+12];
out0[0x10*10] = bufs[12];
out0[0x10* 9] = bufs[16+12] + bufs[16+2];
out0[0x10* 8] = bufs[2];
out0[0x10* 7] = bufs[16+2] + bufs[16+10];
out0[0x10* 6] = bufs[10];
out0[0x10* 5] = bufs[16+10] + bufs[16+6];
out0[0x10* 4] = bufs[6];
out0[0x10* 3] = bufs[16+6] + bufs[16+14];
out0[0x10* 2] = bufs[14];
out0[0x10* 1] = bufs[16+14] + bufs[16+1];
out0[0x10* 0] = bufs[1];
out1[0x10* 0] = bufs[1];
out1[0x10* 1] = bufs[16+1] + bufs[16+9];
out1[0x10* 2] = bufs[9];
out1[0x10* 3] = bufs[16+9] + bufs[16+5];
out1[0x10* 4] = bufs[5];
out1[0x10* 5] = bufs[16+5] + bufs[16+13];
out1[0x10* 6] = bufs[13];
out1[0x10* 7] = bufs[16+13] + bufs[16+3];
out1[0x10* 8] = bufs[3];
out1[0x10* 9] = bufs[16+3] + bufs[16+11];
out1[0x10*10] = bufs[11];
out1[0x10*11] = bufs[16+11] + bufs[16+7];
out1[0x10*12] = bufs[7];
out1[0x10*13] = bufs[16+7] + bufs[16+15];
out1[0x10*14] = bufs[15];
out1[0x10*15] = bufs[16+15];
}