www.pudn.com > OpenCV-Intel.zip > cvimgwarp.cpp


/*M/////////////////////////////////////////////////////////////////////////////////////// 
// 
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 
// 
//  By downloading, copying, installing or using the software you agree to this license. 
//  If you do not agree to this license, do not download, install, 
//  copy or use the software. 
// 
// 
//                        Intel License Agreement 
//                For Open Source Computer Vision Library 
// 
// Copyright (C) 2000, Intel Corporation, all rights reserved. 
// Third party copyrights are property of their respective owners. 
// 
// Redistribution and use in source and binary forms, with or without modification, 
// are permitted provided that the following conditions are met: 
// 
//   * Redistribution's of source code must retain the above copyright notice, 
//     this list of conditions and the following disclaimer. 
// 
//   * Redistribution's in binary form must reproduce the above copyright notice, 
//     this list of conditions and the following disclaimer in the documentation 
//     and/or other materials provided with the distribution. 
// 
//   * The name of Intel Corporation may not be used to endorse or promote products 
//     derived from this software without specific prior written permission. 
// 
// This software is provided by the copyright holders and contributors "as is" and 
// any express or implied warranties, including, but not limited to, the implied 
// warranties of merchantability and fitness for a particular purpose are disclaimed. 
// In no event shall the Intel Corporation or contributors be liable for any direct, 
// indirect, incidental, special, exemplary, or consequential damages 
// (including, but not limited to, procurement of substitute goods or services; 
// loss of use, data, or profits; or business interruption) however caused 
// and on any theory of liability, whether in contract, strict liability, 
// or tort (including negligence or otherwise) arising in any way out of 
// the use of this software, even if advised of the possibility of such damage. 
// 
//M*/ 
 
/* //////////////////////////////////////////////////////////////////// 
// 
//  Geometrical transforms on images and matrices: rotation, zoom etc. 
// 
// */ 
 
#include "_cv.h" 
 
 
/************** interpolation constants and tables ***************/ 
 
#define ICV_WARP_MUL_ONE_8U(x)  ((x) << ICV_WARP_SHIFT) 
#define ICV_WARP_DESCALE_8U(x)  CV_DESCALE((x), ICV_WARP_SHIFT*2) 
#define ICV_WARP_CLIP_X(x)      ((unsigned)(x) < (unsigned)ssize.width ? \ 
                                (x) : (x) < 0 ? 0 : ssize.width - 1) 
#define ICV_WARP_CLIP_Y(y)      ((unsigned)(y) < (unsigned)ssize.height ? \ 
                                (y) : (y) < 0 ? 0 : ssize.height - 1) 
 
float icvLinearCoeffs[(ICV_LINEAR_TAB_SIZE+1)*2]; 
 
void icvInitLinearCoeffTab() 
{ 
    static int inittab = 0; 
    if( !inittab ) 
    { 
        for( int i = 0; i <= ICV_LINEAR_TAB_SIZE; i++ ) 
        { 
            float x = (float)i/ICV_LINEAR_TAB_SIZE; 
            icvLinearCoeffs[i*2] = x; 
            icvLinearCoeffs[i*2+1] = 1.f - x; 
        } 
 
        inittab = 1; 
    } 
} 
 
 
float icvCubicCoeffs[(ICV_CUBIC_TAB_SIZE+1)*2]; 
 
void icvInitCubicCoeffTab() 
{ 
    static int inittab = 0; 
    if( !inittab ) 
    { 
#if 0 
        // classical Mitchell-Netravali filter 
        const double B = 1./3; 
        const double C = 1./3; 
        const double p0 = (6 - 2*B)/6.; 
        const double p2 = (-18 + 12*B + 6*C)/6.; 
        const double p3 = (12 - 9*B - 6*C)/6.; 
        const double q0 = (8*B + 24*C)/6.; 
        const double q1 = (-12*B - 48*C)/6.; 
        const double q2 = (6*B + 30*C)/6.; 
        const double q3 = (-B - 6*C)/6.; 
 
        #define ICV_CUBIC_1(x)  (((x)*p3 + p2)*(x)*(x) + p0) 
        #define ICV_CUBIC_2(x)  ((((x)*q3 + q2)*(x) + q1)*(x) + q0) 
#else 
        // alternative "sharp" filter 
        const double A = -0.75; 
        #define ICV_CUBIC_1(x)  (((A + 2)*(x) - (A + 3))*(x)*(x) + 1) 
        #define ICV_CUBIC_2(x)  (((A*(x) - 5*A)*(x) + 8*A)*(x) - 4*A) 
#endif 
        for( int i = 0; i <= ICV_CUBIC_TAB_SIZE; i++ ) 
        { 
            float x = (float)i/ICV_CUBIC_TAB_SIZE; 
            icvCubicCoeffs[i*2] = (float)ICV_CUBIC_1(x); 
            x += 1.f; 
            icvCubicCoeffs[i*2+1] = (float)ICV_CUBIC_2(x); 
        } 
 
        inittab = 1; 
    } 
} 
 
 
/****************************************************************************************\ 
*                                         Resize                                         * 
\****************************************************************************************/ 
 
static CvStatus CV_STDCALL 
icvResize_NN_8u_C1R( const uchar* src, int srcstep, CvSize ssize, 
                     uchar* dst, int dststep, CvSize dsize, int pix_size ) 
{ 
    int* x_ofs = (int*)cvStackAlloc( dsize.width * sizeof(x_ofs[0]) ); 
    int pix_size4 = pix_size / sizeof(int); 
    int x, y, t; 
 
    for( x = 0; x < dsize.width; x++ ) 
    { 
        t = (ssize.width*x*2 + MIN(ssize.width, dsize.width) - 1)/(dsize.width*2); 
        t -= t >= ssize.width; 
        x_ofs[x] = t*pix_size; 
    } 
 
    for( y = 0; y < dsize.height; y++, dst += dststep ) 
    { 
        const uchar* tsrc; 
        t = (ssize.height*y*2 + MIN(ssize.height, dsize.height) - 1)/(dsize.height*2); 
        t -= t >= ssize.height; 
        tsrc = src + srcstep*t; 
 
        switch( pix_size ) 
        { 
        case 1: 
            for( x = 0; x <= dsize.width - 2; x += 2 ) 
            { 
                uchar t0 = tsrc[x_ofs[x]]; 
                uchar t1 = tsrc[x_ofs[x+1]]; 
 
                dst[x] = t0; 
                dst[x+1] = t1; 
            } 
 
            for( ; x < dsize.width; x++ ) 
                dst[x] = tsrc[x_ofs[x]]; 
            break; 
        case 2: 
            for( x = 0; x < dsize.width; x++ ) 
                *(ushort*)(dst + x*2) = *(ushort*)(tsrc + x_ofs[x]); 
            break; 
        case 3: 
            for( x = 0; x < dsize.width; x++ ) 
            { 
                const uchar* _tsrc = tsrc + x_ofs[x]; 
                dst[x*3] = _tsrc[0]; dst[x*3+1] = _tsrc[1]; dst[x*3+2] = _tsrc[2]; 
            } 
            break; 
        case 4: 
            for( x = 0; x < dsize.width; x++ ) 
                *(int*)(dst + x*4) = *(int*)(tsrc + x_ofs[x]); 
            break; 
        case 6: 
            for( x = 0; x < dsize.width; x++ ) 
            { 
                const ushort* _tsrc = (const ushort*)(tsrc + x_ofs[x]); 
                ushort* _tdst = (ushort*)(dst + x*6); 
                _tdst[0] = _tsrc[0]; _tdst[1] = _tsrc[1]; _tdst[2] = _tsrc[2]; 
            } 
            break; 
        default: 
            for( x = 0; x < dsize.width; x++ ) 
                CV_MEMCPY_INT( dst + x*pix_size, tsrc + x_ofs[x], pix_size4 ); 
        } 
    } 
 
    return CV_OK; 
} 
 
 
typedef struct CvResizeAlpha 
{ 
    int idx; 
    union 
    { 
        float alpha; 
        int ialpha; 
    }; 
} 
CvResizeAlpha; 
 
 
#define  ICV_DEF_RESIZE_BILINEAR_FUNC( flavor, arrtype, worktype, alpha_field,  \ 
                                       mul_one_macro, descale_macro )           \ 
static CvStatus CV_STDCALL                                                      \ 
icvResize_Bilinear_##flavor##_CnR( const arrtype* src, int srcstep, CvSize ssize,\ 
                                   arrtype* dst, int dststep, CvSize dsize,     \ 
                                   int cn, int xmax,                            \ 
                                   const CvResizeAlpha* xofs,                   \ 
                                   const CvResizeAlpha* yofs,                   \ 
                                   worktype* buf0, worktype* buf1 )             \ 
{                                                                               \ 
    int prev_sy0 = -1, prev_sy1 = -1;                                           \ 
    int k, dx, dy;                                                              \ 
                                                                                \ 
    srcstep /= sizeof(src[0]);                                                  \ 
    dststep /= sizeof(dst[0]);                                                  \ 
    dsize.width *= cn;                                                          \ 
    xmax *= cn;                                                                 \ 
                                                                                \ 
    for( dy = 0; dy < dsize.height; dy++, dst += dststep )                      \ 
    {                                                                           \ 
        worktype fy = yofs[dy].alpha_field, *swap_t;                            \ 
        int sy0 = yofs[dy].idx, sy1 = sy0 + (fy > 0 && sy0 < ssize.height-1);   \ 
                                                                                \ 
        if( sy0 == prev_sy0 && sy1 == prev_sy1 )                                \ 
            k = 2;                                                              \ 
        else if( sy0 == prev_sy1 )                                              \ 
        {                                                                       \ 
            CV_SWAP( buf0, buf1, swap_t );                                      \ 
            k = 1;                                                              \ 
        }                                                                       \ 
        else                                                                    \ 
            k = 0;                                                              \ 
                                                                                \ 
        for( ; k < 2; k++ )                                                     \ 
        {                                                                       \ 
            worktype* _buf = k == 0 ? buf0 : buf1;                              \ 
            const arrtype* _src;                                                \ 
            int sy = k == 0 ? sy0 : sy1;                                        \ 
            if( k == 1 && sy1 == sy0 )                                          \ 
            {                                                                   \ 
                memcpy( buf1, buf0, dsize.width*sizeof(buf0[0]) );              \ 
                continue;                                                       \ 
            }                                                                   \ 
                                                                                \ 
            _src = src + sy*srcstep;                                            \ 
            for( dx = 0; dx < xmax; dx++ )                                      \ 
            {                                                                   \ 
                int sx = xofs[dx].idx;                                          \ 
                worktype fx = xofs[dx].alpha_field;                             \ 
                worktype t = _src[sx];                                          \ 
                _buf[dx] = mul_one_macro(t) + fx*(_src[sx+cn] - t);             \ 
            }                                                                   \ 
                                                                                \ 
            for( ; dx < dsize.width; dx++ )                                     \ 
                _buf[dx] = mul_one_macro(_src[xofs[dx].idx]);                   \ 
        }                                                                       \ 
                                                                                \ 
        prev_sy0 = sy0;                                                         \ 
        prev_sy1 = sy1;                                                         \ 
                                                                                \ 
        if( sy0 == sy1 )                                                        \ 
            for( dx = 0; dx < dsize.width; dx++ )                               \ 
                dst[dx] = (arrtype)descale_macro( mul_one_macro(buf0[dx]));     \ 
        else                                                                    \ 
            for( dx = 0; dx < dsize.width; dx++ )                               \ 
                dst[dx] = (arrtype)descale_macro( mul_one_macro(buf0[dx]) +     \ 
                                                  fy*(buf1[dx] - buf0[dx]));    \ 
    }                                                                           \ 
                                                                                \ 
    return CV_OK;                                                               \ 
} 
 
 
typedef struct CvDecimateAlpha 
{ 
    int si, di; 
    float alpha; 
} 
CvDecimateAlpha; 
 
 
#define  ICV_DEF_RESIZE_AREA_FAST_FUNC( flavor, arrtype, worktype, cast_macro ) \ 
static CvStatus CV_STDCALL                                                      \ 
icvResize_AreaFast_##flavor##_CnR( const arrtype* src, int srcstep, CvSize ssize,\ 
                              arrtype* dst, int dststep, CvSize dsize, int cn,  \ 
                              const int* ofs, const int* xofs )                 \ 
{                                                                               \ 
    int dy, dx, k = 0;                                                          \ 
    int scale_x = ssize.width/dsize.width;                                      \ 
    int scale_y = ssize.height/dsize.height;                                    \ 
    int area = scale_x*scale_y;                                                 \ 
    float scale = 1.f/(scale_x*scale_y);                                        \ 
                                                                                \ 
    srcstep /= sizeof(src[0]);                                                  \ 
    dststep /= sizeof(dst[0]);                                                  \ 
    dsize.width *= cn;                                                          \ 
                                                                                \ 
    for( dy = 0; dy < dsize.height; dy++, dst += dststep )                      \ 
        for( dx = 0; dx < dsize.width; dx++ )                                   \ 
        {                                                                       \ 
            const arrtype* _src = src + dy*scale_y*srcstep + xofs[dx];          \ 
            worktype sum = 0;                                                   \ 
                                                                                \ 
            for( k = 0; k <= area - 4; k += 4 )                                 \ 
                sum += _src[ofs[k]] + _src[ofs[k+1]] +                          \ 
                       _src[ofs[k+2]] + _src[ofs[k+3]];                         \ 
                                                                                \ 
            for( ; k < area; k++ )                                              \ 
                sum += _src[ofs[k]];                                            \ 
                                                                                \ 
            dst[dx] = (arrtype)cast_macro( sum*scale );                         \ 
        }                                                                       \ 
                                                                                \ 
    return CV_OK;                                                               \ 
} 
 
 
#define  ICV_DEF_RESIZE_AREA_FUNC( flavor, arrtype, load_macro, cast_macro )    \ 
static CvStatus CV_STDCALL                                                      \ 
icvResize_Area_##flavor##_CnR( const arrtype* src, int srcstep, CvSize ssize,   \ 
                               arrtype* dst, int dststep, CvSize dsize,         \ 
                               int cn, const CvDecimateAlpha* xofs,             \ 
                               int xofs_count, float* buf, float* sum )         \ 
{                                                                               \ 
    int k, sy, dx, cur_dy = 0;                                                  \ 
    float scale_y = (float)ssize.height/dsize.height;                           \ 
                                                                                \ 
    srcstep /= sizeof(src[0]);                                                  \ 
    dststep /= sizeof(dst[0]);                                                  \ 
    dsize.width *= cn;                                                          \ 
                                                                                \ 
    for( sy = 0; sy < ssize.height; sy++, src += srcstep )                      \ 
    {                                                                           \ 
        if( cn == 1 )                                                           \ 
            for( k = 0; k < xofs_count; k++ )                                   \ 
            {                                                                   \ 
                int dxn = xofs[k].di;                                           \ 
                float alpha = xofs[k].alpha;                                    \ 
                buf[dxn] = buf[dxn] + load_macro(src[xofs[k].si])*alpha;        \ 
            }                                                                   \ 
        else if( cn == 2 )                                                      \ 
            for( k = 0; k < xofs_count; k++ )                                   \ 
            {                                                                   \ 
                int sxn = xofs[k].si;                                           \ 
                int dxn = xofs[k].di;                                           \ 
                float alpha = xofs[k].alpha;                                    \ 
                float t0 = buf[dxn] + load_macro(src[sxn])*alpha;               \ 
                float t1 = buf[dxn+1] + load_macro(src[sxn+1])*alpha;           \ 
                buf[dxn] = t0; buf[dxn+1] = t1;                                 \ 
            }                                                                   \ 
        else if( cn == 3 )                                                      \ 
            for( k = 0; k < xofs_count; k++ )                                   \ 
            {                                                                   \ 
                int sxn = xofs[k].si;                                           \ 
                int dxn = xofs[k].di;                                           \ 
                float alpha = xofs[k].alpha;                                    \ 
                float t0 = buf[dxn] + load_macro(src[sxn])*alpha;               \ 
                float t1 = buf[dxn+1] + load_macro(src[sxn+1])*alpha;           \ 
                float t2 = buf[dxn+2] + load_macro(src[sxn+2])*alpha;           \ 
                buf[dxn] = t0; buf[dxn+1] = t1; buf[dxn+2] = t2;                \ 
            }                                                                   \ 
        else                                                                    \ 
            for( k = 0; k < xofs_count; k++ )                                   \ 
            {                                                                   \ 
                int sxn = xofs[k].si;                                           \ 
                int dxn = xofs[k].di;                                           \ 
                float alpha = xofs[k].alpha;                                    \ 
                float t0 = buf[dxn] + load_macro(src[sxn])*alpha;               \ 
                float t1 = buf[dxn+1] + load_macro(src[sxn+1])*alpha;           \ 
                buf[dxn] = t0; buf[dxn+1] = t1;                                 \ 
                t0 = buf[dxn+2] + load_macro(src[sxn+2])*alpha;                 \ 
                t1 = buf[dxn+3] + load_macro(src[sxn+3])*alpha;                 \ 
                buf[dxn+2] = t0; buf[dxn+3] = t1;                               \ 
            }                                                                   \ 
                                                                                \ 
        if( (cur_dy + 1)*scale_y <= sy + 1 || sy == ssize.height - 1 )          \ 
        {                                                                       \ 
            float beta = sy + 1 - (cur_dy+1)*scale_y, beta1;                    \ 
            beta = MAX( beta, 0 );                                              \ 
            beta1 = 1 - beta;                                                   \ 
            if( fabs(beta) < 1e-3 )                                             \ 
                for( dx = 0; dx < dsize.width; dx++ )                           \ 
                {                                                               \ 
                    dst[dx] = (arrtype)cast_macro(sum[dx] + buf[dx]);           \ 
                    sum[dx] = buf[dx] = 0;                                      \ 
                }                                                               \ 
            else                                                                \ 
                for( dx = 0; dx < dsize.width; dx++ )                           \ 
                {                                                               \ 
                    dst[dx] = (arrtype)cast_macro(sum[dx] + buf[dx]*beta1);     \ 
                    sum[dx] = buf[dx]*beta;                                     \ 
                    buf[dx] = 0;                                                \ 
                }                                                               \ 
            dst += dststep;                                                     \ 
            cur_dy++;                                                           \ 
        }                                                                       \ 
        else                                                                    \ 
            for( dx = 0; dx < dsize.width; dx += 2 )                            \ 
            {                                                                   \ 
                float t0 = sum[dx] + buf[dx];                                   \ 
                float t1 = sum[dx+1] + buf[dx+1];                               \ 
                sum[dx] = t0; sum[dx+1] = t1;                                   \ 
                buf[dx] = buf[dx+1] = 0;                                        \ 
            }                                                                   \ 
    }                                                                           \ 
                                                                                \ 
    return CV_OK;                                                               \ 
} 
 
 
#define  ICV_DEF_RESIZE_BICUBIC_FUNC( flavor, arrtype, worktype, load_macro,    \ 
                                      cast_macro1, cast_macro2 )                \ 
static CvStatus CV_STDCALL                                                      \ 
icvResize_Bicubic_##flavor##_CnR( const arrtype* src, int srcstep, CvSize ssize,\ 
                                  arrtype* dst, int dststep, CvSize dsize,      \ 
                                  int cn, int xmin, int xmax,                   \ 
                                  const CvResizeAlpha* xofs, float** buf )      \ 
{                                                                               \ 
    float scale_y = (float)ssize.height/dsize.height;                           \ 
    int dx, dy, sx, sy, sy2, ify;                                               \ 
    int prev_sy2 = -2;                                                          \ 
                                                                                \ 
    xmin *= cn; xmax *= cn;                                                     \ 
    dsize.width *= cn;                                                          \ 
    ssize.width *= cn;                                                          \ 
    srcstep /= sizeof(src[0]);                                                  \ 
    dststep /= sizeof(dst[0]);                                                  \ 
                                                                                \ 
    for( dy = 0; dy < dsize.height; dy++, dst += dststep )                      \ 
    {                                                                           \ 
        float w0, w1, w2, w3;                                                   \ 
        float fy, x, sum;                                                       \ 
        float *row, *row0, *row1, *row2, *row3;                                 \ 
        int k1, k = 4;                                                          \ 
                                                                                \ 
        fy = dy*scale_y;                                                        \ 
        sy = cvFloor(fy);                                                       \ 
        fy -= sy;                                                               \ 
        ify = cvRound(fy*ICV_CUBIC_TAB_SIZE);                                   \ 
        sy2 = sy + 2;                                                           \ 
                                                                                \ 
        if( sy2 > prev_sy2 )                                                    \ 
        {                                                                       \ 
            int delta = prev_sy2 - sy + 2;                                      \ 
            for( k = 0; k < delta; k++ )                                        \ 
                CV_SWAP( buf[k], buf[k+4-delta], row );                         \ 
        }                                                                       \ 
                                                                                \ 
        for( sy += k - 1; k < 4; k++, sy++ )                                    \ 
        {                                                                       \ 
            const arrtype* _src = src + sy*srcstep;                             \ 
                                                                                \ 
            row = buf[k];                                                       \ 
            if( sy < 0 )                                                        \ 
                continue;                                                       \ 
            if( sy >= ssize.height )                                            \ 
            {                                                                   \ 
                assert( k > 0 );                                                \ 
                memcpy( row, buf[k-1], dsize.width*sizeof(row[0]) );            \ 
                continue;                                                       \ 
            }                                                                   \ 
                                                                                \ 
            for( dx = 0; dx < xmin; dx++ )                                      \ 
            {                                                                   \ 
                int ifx = xofs[dx].ialpha, sx0 = xofs[dx].idx;                  \ 
                sx = sx0 + cn*2;                                                \ 
                while( sx >= ssize.width )                                      \ 
                    sx -= cn;                                                   \ 
                x = load_macro(_src[sx]);                                       \ 
                sum = x*icvCubicCoeffs[(ICV_CUBIC_TAB_SIZE - ifx)*2 + 1];       \ 
                if( (unsigned)(sx = sx0 + cn) < (unsigned)ssize.width )         \ 
                    x = load_macro(_src[sx]);                                   \ 
                sum += x*icvCubicCoeffs[(ICV_CUBIC_TAB_SIZE - ifx)*2];          \ 
                if( (unsigned)(sx = sx0) < (unsigned)ssize.width )              \ 
                    x = load_macro(_src[sx]);                                   \ 
                sum += x*icvCubicCoeffs[ifx*2];                                 \ 
                if( (unsigned)(sx = sx0 - cn) < (unsigned)ssize.width )         \ 
                    x = load_macro(_src[sx]);                                   \ 
                row[dx] = sum + x*icvCubicCoeffs[ifx*2 + 1];                    \ 
            }                                                                   \ 
                                                                                \ 
            for( ; dx < xmax; dx++ )                                            \ 
            {                                                                   \ 
                int ifx = xofs[dx].ialpha;                                      \ 
                int sx0 = xofs[dx].idx;                                         \ 
                row[dx] = _src[sx0 - cn]*icvCubicCoeffs[ifx*2 + 1] +            \ 
                    _src[sx0]*icvCubicCoeffs[ifx*2] +                           \ 
                    _src[sx0 + cn]*icvCubicCoeffs[(ICV_CUBIC_TAB_SIZE-ifx)*2] + \ 
                    _src[sx0 + cn*2]*icvCubicCoeffs[(ICV_CUBIC_TAB_SIZE-ifx)*2+1];\ 
            }                                                                   \ 
                                                                                \ 
            for( ; dx < dsize.width; dx++ )                                     \ 
            {                                                                   \ 
                int ifx = xofs[dx].ialpha, sx0 = xofs[dx].idx;                  \ 
                x = load_macro(_src[sx0 - cn]);                                 \ 
                sum = x*icvCubicCoeffs[ifx*2 + 1];                              \ 
                if( (unsigned)(sx = sx0) < (unsigned)ssize.width )              \ 
                    x = load_macro(_src[sx]);                                   \ 
                sum += x*icvCubicCoeffs[ifx*2];                                 \ 
                if( (unsigned)(sx = sx0 + cn) < (unsigned)ssize.width )         \ 
                    x = load_macro(_src[sx]);                                   \ 
                sum += x*icvCubicCoeffs[(ICV_CUBIC_TAB_SIZE - ifx)*2];          \ 
                if( (unsigned)(sx = sx0 + cn*2) < (unsigned)ssize.width )       \ 
                    x = load_macro(_src[sx]);                                   \ 
                row[dx] = sum + x*icvCubicCoeffs[(ICV_CUBIC_TAB_SIZE-ifx)*2+1]; \ 
            }                                                                   \ 
                                                                                \ 
            if( sy == 0 )                                                       \ 
                for( k1 = 0; k1 < k; k1++ )                                     \ 
                    memcpy( buf[k1], row, dsize.width*sizeof(row[0]));          \ 
        }                                                                       \ 
                                                                                \ 
        prev_sy2 = sy2;                                                         \ 
                                                                                \ 
        row0 = buf[0]; row1 = buf[1];                                           \ 
        row2 = buf[2]; row3 = buf[3];                                           \ 
                                                                                \ 
        w0 = icvCubicCoeffs[ify*2+1];                                           \ 
        w1 = icvCubicCoeffs[ify*2];                                             \ 
        w2 = icvCubicCoeffs[(ICV_CUBIC_TAB_SIZE - ify)*2];                      \ 
        w3 = icvCubicCoeffs[(ICV_CUBIC_TAB_SIZE - ify)*2 + 1];                  \ 
                                                                                \ 
        for( dx = 0; dx < dsize.width; dx++ )                                   \ 
        {                                                                       \ 
            worktype val = cast_macro1( row0[dx]*w0 + row1[dx]*w1 +             \ 
                                        row2[dx]*w2 + row3[dx]*w3 );            \ 
            dst[dx] = cast_macro2(val);                                         \ 
        }                                                                       \ 
    }                                                                           \ 
                                                                                \ 
    return CV_OK;                                                               \ 
} 
 
 
ICV_DEF_RESIZE_BILINEAR_FUNC( 8u, uchar, int, ialpha, 
                              ICV_WARP_MUL_ONE_8U, ICV_WARP_DESCALE_8U ) 
ICV_DEF_RESIZE_BILINEAR_FUNC( 16u, ushort, float, alpha, CV_NOP, cvRound ) 
ICV_DEF_RESIZE_BILINEAR_FUNC( 32f, float, float, alpha, CV_NOP, CV_NOP ) 
 
ICV_DEF_RESIZE_BICUBIC_FUNC( 8u, uchar, int, CV_8TO32F, cvRound, CV_CAST_8U ) 
ICV_DEF_RESIZE_BICUBIC_FUNC( 16u, ushort, int, CV_NOP, cvRound, CV_CAST_16U ) 
ICV_DEF_RESIZE_BICUBIC_FUNC( 32f, float, float, CV_NOP, CV_NOP, CV_NOP ) 
 
ICV_DEF_RESIZE_AREA_FAST_FUNC( 8u, uchar, int, cvRound ) 
ICV_DEF_RESIZE_AREA_FAST_FUNC( 16u, ushort, int, cvRound ) 
ICV_DEF_RESIZE_AREA_FAST_FUNC( 32f, float, float, CV_NOP ) 
 
ICV_DEF_RESIZE_AREA_FUNC( 8u, uchar, CV_8TO32F, cvRound ) 
ICV_DEF_RESIZE_AREA_FUNC( 16u, ushort, CV_NOP, cvRound ) 
ICV_DEF_RESIZE_AREA_FUNC( 32f, float, CV_NOP, CV_NOP ) 
 
 
static void icvInitResizeTab( CvFuncTable* bilin_tab, 
                              CvFuncTable* bicube_tab, 
                              CvFuncTable* areafast_tab, 
                              CvFuncTable* area_tab ) 
{ 
    bilin_tab->fn_2d[CV_8U] = (void*)icvResize_Bilinear_8u_CnR; 
    bilin_tab->fn_2d[CV_16U] = (void*)icvResize_Bilinear_16u_CnR; 
    bilin_tab->fn_2d[CV_32F] = (void*)icvResize_Bilinear_32f_CnR; 
 
    bicube_tab->fn_2d[CV_8U] = (void*)icvResize_Bicubic_8u_CnR; 
    bicube_tab->fn_2d[CV_16U] = (void*)icvResize_Bicubic_16u_CnR; 
    bicube_tab->fn_2d[CV_32F] = (void*)icvResize_Bicubic_32f_CnR; 
 
    areafast_tab->fn_2d[CV_8U] = (void*)icvResize_AreaFast_8u_CnR; 
    areafast_tab->fn_2d[CV_16U] = (void*)icvResize_AreaFast_16u_CnR; 
    areafast_tab->fn_2d[CV_32F] = (void*)icvResize_AreaFast_32f_CnR; 
 
    area_tab->fn_2d[CV_8U] = (void*)icvResize_Area_8u_CnR; 
    area_tab->fn_2d[CV_16U] = (void*)icvResize_Area_16u_CnR; 
    area_tab->fn_2d[CV_32F] = (void*)icvResize_Area_32f_CnR; 
} 
 
 
typedef CvStatus (CV_STDCALL * CvResizeBilinearFunc) 
                    ( const void* src, int srcstep, CvSize ssize, 
                      void* dst, int dststep, CvSize dsize, 
                      int cn, int xmax, const CvResizeAlpha* xofs, 
                      const CvResizeAlpha* yofs, float* buf0, float* buf1 ); 
 
typedef CvStatus (CV_STDCALL * CvResizeBicubicFunc) 
                    ( const void* src, int srcstep, CvSize ssize, 
                      void* dst, int dststep, CvSize dsize, 
                      int cn, int xmin, int xmax, 
                      const CvResizeAlpha* xofs, float** buf ); 
 
typedef CvStatus (CV_STDCALL * CvResizeAreaFastFunc) 
                    ( const void* src, int srcstep, CvSize ssize, 
                      void* dst, int dststep, CvSize dsize, 
                      int cn, const int* ofs, const int *xofs ); 
 
typedef CvStatus (CV_STDCALL * CvResizeAreaFunc) 
                    ( const void* src, int srcstep, CvSize ssize, 
                      void* dst, int dststep, CvSize dsize, 
                      int cn, const CvDecimateAlpha* xofs, 
                      int xofs_count, float* buf, float* sum ); 
 
 
////////////////////////////////// IPP resize functions ////////////////////////////////// 
 
icvResize_8u_C1R_t icvResize_8u_C1R_p = 0; 
icvResize_8u_C3R_t icvResize_8u_C3R_p = 0; 
icvResize_8u_C4R_t icvResize_8u_C4R_p = 0; 
icvResize_16u_C1R_t icvResize_16u_C1R_p = 0; 
icvResize_16u_C3R_t icvResize_16u_C3R_p = 0; 
icvResize_16u_C4R_t icvResize_16u_C4R_p = 0; 
icvResize_32f_C1R_t icvResize_32f_C1R_p = 0; 
icvResize_32f_C3R_t icvResize_32f_C3R_p = 0; 
icvResize_32f_C4R_t icvResize_32f_C4R_p = 0; 
 
typedef CvStatus (CV_STDCALL * CvResizeIPPFunc) 
( const void* src, CvSize srcsize, int srcstep, CvRect srcroi, 
  void* dst, int dststep, CvSize dstroi, 
  double xfactor, double yfactor, int interpolation ); 
 
////////////////////////////////////////////////////////////////////////////////////////// 
 
CV_IMPL void 
cvResize( const CvArr* srcarr, CvArr* dstarr, int method ) 
{ 
    static CvFuncTable bilin_tab, bicube_tab, areafast_tab, area_tab; 
    static int inittab = 0; 
    void* temp_buf = 0; 
 
    CV_FUNCNAME( "cvResize" ); 
 
    __BEGIN__; 
     
    CvMat srcstub, *src = (CvMat*)srcarr; 
    CvMat dststub, *dst = (CvMat*)dstarr; 
    CvSize ssize, dsize; 
    float scale_x, scale_y; 
    int k, sx, sy, dx, dy; 
    int type, depth, cn; 
     
    CV_CALL( src = cvGetMat( srcarr, &srcstub )); 
    CV_CALL( dst = cvGetMat( dstarr, &dststub )); 
     
    if( CV_ARE_SIZES_EQ( src, dst )) 
        CV_CALL( cvCopy( src, dst )); 
 
    if( !CV_ARE_TYPES_EQ( src, dst )) 
        CV_ERROR( CV_StsUnmatchedFormats, "" ); 
 
    if( !inittab ) 
    { 
        icvInitResizeTab( &bilin_tab, &bicube_tab, &areafast_tab, &area_tab ); 
        inittab = 1; 
    } 
 
    ssize = cvGetMatSize( src ); 
    dsize = cvGetMatSize( dst ); 
    type = CV_MAT_TYPE(src->type); 
    depth = CV_MAT_DEPTH(type); 
    cn = CV_MAT_CN(type); 
    scale_x = (float)ssize.width/dsize.width; 
    scale_y = (float)ssize.height/dsize.height; 
 
    if( method == CV_INTER_CUBIC && 
        (MIN(ssize.width, dsize.width) <= 4 || 
        MIN(ssize.height, dsize.height) <= 4) ) 
        method = CV_INTER_LINEAR; 
 
    if( icvResize_8u_C1R_p && 
        MIN(ssize.width, dsize.width) > 4 && 
        MIN(ssize.height, dsize.height) > 4 ) 
    { 
        CvResizeIPPFunc ipp_func = 
            type == CV_8UC1 ? icvResize_8u_C1R_p : 
            type == CV_8UC3 ? icvResize_8u_C3R_p : 
            type == CV_8UC4 ? icvResize_8u_C4R_p : 
            type == CV_16UC1 ? icvResize_16u_C1R_p : 
            type == CV_16UC3 ? icvResize_16u_C3R_p : 
            type == CV_16UC4 ? icvResize_16u_C4R_p : 
            type == CV_32FC1 ? icvResize_32f_C1R_p : 
            type == CV_32FC3 ? icvResize_32f_C3R_p : 
            type == CV_32FC4 ? icvResize_32f_C4R_p : 0; 
        if( ipp_func && (CV_INTER_NN < method && method < CV_INTER_AREA)) 
        { 
            int srcstep = src->step ? src->step : CV_STUB_STEP; 
            int dststep = dst->step ? dst->step : CV_STUB_STEP; 
            IPPI_CALL( ipp_func( src->data.ptr, ssize, srcstep, 
                                 cvRect(0,0,ssize.width,ssize.height), 
                                 dst->data.ptr, dststep, dsize, 
                                 (double)dsize.width/ssize.width, 
                                 (double)dsize.height/ssize.height, 1 << method )); 
            EXIT; 
        } 
    } 
 
    if( method == CV_INTER_NN ) 
    { 
        IPPI_CALL( icvResize_NN_8u_C1R( src->data.ptr, src->step, ssize, 
                                        dst->data.ptr, dst->step, dsize, 
                                        CV_ELEM_SIZE(src->type))); 
    } 
    else if( method == CV_INTER_LINEAR || method == CV_INTER_AREA ) 
    { 
        if( method == CV_INTER_AREA && 
            ssize.width >= dsize.width && ssize.height >= dsize.height ) 
        { 
            // "area" method for (scale_x > 1 & scale_y > 1) 
            int iscale_x = cvRound(scale_x); 
            int iscale_y = cvRound(scale_y); 
 
            if( fabs(scale_x - iscale_x) < DBL_EPSILON && 
                fabs(scale_y - iscale_y) < DBL_EPSILON ) 
            { 
                int area = iscale_x*iscale_y; 
                int srcstep = src->step / CV_ELEM_SIZE(depth); 
                int* ofs = (int*)cvStackAlloc( (area + dsize.width*cn)*sizeof(int) ); 
                int* xofs = ofs + area; 
                CvResizeAreaFastFunc func = (CvResizeAreaFastFunc)areafast_tab.fn_2d[depth]; 
 
                if( !func ) 
                    CV_ERROR( CV_StsUnsupportedFormat, "" ); 
                 
                for( sy = 0, k = 0; sy < iscale_y; sy++ ) 
                    for( sx = 0; sx < iscale_x; sx++ ) 
                        ofs[k++] = sy*srcstep + sx*cn; 
 
                for( dx = 0; dx < dsize.width; dx++ ) 
                { 
                    sx = dx*iscale_x*cn; 
                    for( k = 0; k < cn; k++ ) 
                        xofs[dx*cn + k] = sx + k; 
                } 
 
                IPPI_CALL( func( src->data.ptr, src->step, ssize, dst->data.ptr, 
                                 dst->step, dsize, cn, ofs, xofs )); 
            } 
            else 
            { 
                int buf_len = dsize.width*cn + 4, buf_size, xofs_count = 0; 
                float scale = 1.f/(scale_x*scale_y); 
                float *buf, *sum; 
                CvDecimateAlpha* xofs; 
                CvResizeAreaFunc func = (CvResizeAreaFunc)area_tab.fn_2d[depth]; 
 
                if( !func ) 
                    CV_ERROR( CV_StsUnsupportedFormat, "" ); 
 
                buf_size = buf_len*2*sizeof(float) + ssize.width*2*sizeof(CvDecimateAlpha); 
                if( buf_size < CV_MAX_LOCAL_SIZE ) 
                    buf = (float*)cvStackAlloc(buf_size); 
                else 
                    CV_CALL( temp_buf = buf = (float*)cvAlloc(buf_size)); 
                sum = buf + buf_len; 
                xofs = (CvDecimateAlpha*)(sum + buf_len); 
 
                for( dx = 0, k = 0; dx < dsize.width; dx++ ) 
                { 
                    float fsx1 = dx*scale_x, fsx2 = fsx1 + scale_x; 
                    int sx1 = cvCeil(fsx1), sx2 = cvFloor(fsx2); 
 
                    assert( (unsigned)sx1 < (unsigned)ssize.width ); 
 
                    if( sx1 > fsx1 ) 
                    { 
                        assert( k < ssize.width*2 );             
                        xofs[k].di = dx*cn; 
                        xofs[k].si = (sx1-1)*cn; 
                        xofs[k++].alpha = (sx1 - fsx1)*scale; 
                    } 
 
                    for( sx = sx1; sx < sx2; sx++ ) 
                    { 
                        assert( k < ssize.width*2 ); 
                        xofs[k].di = dx*cn; 
                        xofs[k].si = sx*cn; 
                        xofs[k++].alpha = scale; 
                    } 
 
                    if( fsx2 - sx2 > 1e-3 ) 
                    { 
                        assert( k < ssize.width*2 ); 
                        assert((unsigned)sx2 < (unsigned)ssize.width ); 
                        xofs[k].di = dx*cn; 
                        xofs[k].si = sx2*cn; 
                        xofs[k++].alpha = (fsx2 - sx2)*scale; 
                    } 
                } 
 
                xofs_count = k; 
                memset( sum, 0, buf_len*sizeof(float) ); 
                memset( buf, 0, buf_len*sizeof(float) ); 
 
                IPPI_CALL( func( src->data.ptr, src->step, ssize, dst->data.ptr, 
                                 dst->step, dsize, cn, xofs, xofs_count, buf, sum )); 
            } 
        } 
        else // true "area" method for the cases (scale_x > 1 & scale_y < 1) and 
             // (scale_x < 1 & scale_y > 1) is not implemented. 
             // instead, it is emulated via some variant of bilinear interpolation. 
        { 
            float inv_scale_x = (float)dsize.width/ssize.width; 
            float inv_scale_y = (float)dsize.height/ssize.height; 
            int xmax = dsize.width, width = dsize.width*cn, buf_size; 
            float *buf0, *buf1; 
            CvResizeAlpha *xofs, *yofs; 
            int area_mode = method == CV_INTER_AREA; 
            float fx, fy; 
            CvResizeBilinearFunc func = (CvResizeBilinearFunc)bilin_tab.fn_2d[depth]; 
 
            if( !func ) 
                CV_ERROR( CV_StsUnsupportedFormat, "" ); 
 
            buf_size = width*2*sizeof(float) + (width + dsize.height)*sizeof(CvResizeAlpha); 
            if( buf_size < CV_MAX_LOCAL_SIZE ) 
                buf0 = (float*)cvStackAlloc(buf_size); 
            else 
                CV_CALL( temp_buf = buf0 = (float*)cvAlloc(buf_size)); 
            buf1 = buf0 + width; 
            xofs = (CvResizeAlpha*)(buf1 + width); 
            yofs = xofs + width; 
 
            for( dx = 0; dx < dsize.width; dx++ ) 
            { 
                if( !area_mode ) 
                { 
                    fx = (float)((dx+0.5)*scale_x - 0.5); 
                    sx = cvFloor(fx); 
                    fx -= sx; 
                } 
                else 
                { 
                    sx = cvFloor(dx*scale_x); 
                    fx = (dx+1) - (sx+1)*inv_scale_x; 
                    fx = fx <= 0 ? 0.f : fx - cvFloor(fx); 
                } 
 
                if( sx >= ssize.width-1 ) 
                { 
                    fx = 0, sx = ssize.width-1; 
                    if( xmax >= dsize.width ) 
                        xmax = dx; 
                } 
                else if( sx < 0 ) 
                    fx = 0, sx = 0; 
 
                if( depth != CV_8U ) 
                    for( k = 0, sx *= cn; k < cn; k++ ) 
                        xofs[dx*cn + k].idx = sx + k, xofs[dx*cn + k].alpha = fx; 
                else 
                    for( k = 0, sx *= cn; k < cn; k++ ) 
                        xofs[dx*cn + k].idx = sx + k, 
                        xofs[dx*cn + k].ialpha = CV_FLT_TO_FIX(fx, ICV_WARP_SHIFT); 
            } 
 
            for( dy = 0; dy < dsize.height; dy++ ) 
            { 
                if( !area_mode ) 
                { 
                    fy = (float)((dy+0.5)*scale_y - 0.5); 
                    sy = cvFloor(fy); 
                    fy -= sy; 
                    if( sy < 0 ) 
                        sy = 0, fy = 0; 
                } 
                else 
                { 
                    sy = cvFloor(dy*scale_y); 
                    fy = (dy+1) - (sy+1)*inv_scale_y; 
                    fy = fy <= 0 ? 0.f : fy - cvFloor(fy); 
                } 
 
                yofs[dy].idx = sy; 
                if( depth != CV_8U ) 
                    yofs[dy].alpha = fy; 
                else 
                    yofs[dy].ialpha = CV_FLT_TO_FIX(fy, ICV_WARP_SHIFT); 
            } 
 
            IPPI_CALL( func( src->data.ptr, src->step, ssize, dst->data.ptr, 
                             dst->step, dsize, cn, xmax, xofs, yofs, buf0, buf1 )); 
        } 
    } 
    else if( method == CV_INTER_CUBIC ) 
    { 
        int width = dsize.width*cn, buf_size; 
        int xmin = dsize.width, xmax = -1; 
        CvResizeAlpha* xofs; 
        float* buf[4]; 
        CvResizeBicubicFunc func = (CvResizeBicubicFunc)bicube_tab.fn_2d[depth]; 
 
        if( !func ) 
            CV_ERROR( CV_StsUnsupportedFormat, "" ); 
         
        buf_size = width*(4*sizeof(float) + sizeof(xofs[0])); 
        if( buf_size < CV_MAX_LOCAL_SIZE ) 
            buf[0] = (float*)cvStackAlloc(buf_size); 
        else 
            CV_CALL( temp_buf = buf[0] = (float*)cvAlloc(buf_size)); 
 
        for( k = 1; k < 4; k++ ) 
            buf[k] = buf[k-1] + width; 
        xofs = (CvResizeAlpha*)(buf[3] + width); 
 
        icvInitCubicCoeffTab(); 
 
        for( dx = 0; dx < dsize.width; dx++ ) 
        { 
            float fx = dx*scale_x; 
            sx = cvFloor(fx); 
            fx -= sx; 
            int ifx = cvRound(fx*ICV_CUBIC_TAB_SIZE); 
            if( sx-1 >= 0 && xmin > dx ) 
                xmin = dx; 
            if( sx+2 < ssize.width ) 
                xmax = dx + 1; 
         
            // at least one of 4 points should be within the image - to 
            // be able to set other points to the same value. see the loops 
            // for( dx = 0; dx < xmin; dx++ ) ... and for( ; dx < width; dx++ ) ... 
            if( sx < -2 ) 
                sx = -2; 
            else if( sx > ssize.width ) 
                sx = ssize.width; 
 
            for( k = 0; k < cn; k++ ) 
            { 
                xofs[dx*cn + k].idx = sx*cn + k; 
                xofs[dx*cn + k].ialpha = ifx; 
            } 
        } 
     
        IPPI_CALL( func( src->data.ptr, src->step, ssize, dst->data.ptr, 
                         dst->step, dsize, cn, xmin, xmax, xofs, buf )); 
    } 
    else 
        CV_ERROR( CV_StsBadFlag, "Unknown/unsupported interpolation method" ); 
 
    __END__; 
 
    cvFree( &temp_buf ); 
} 
 
 
/****************************************************************************************\ 
*                                     WarpAffine                                         * 
\****************************************************************************************/ 
 
#define ICV_DEF_WARP_AFFINE_BILINEAR_FUNC( flavor, arrtype, worktype,       \ 
            scale_alpha_macro, mul_one_macro, descale_macro, cast_macro )   \ 
static CvStatus CV_STDCALL                                                  \ 
icvWarpAffine_Bilinear_##flavor##_CnR(                                      \ 
    const arrtype* src, int step, CvSize ssize,                             \ 
    arrtype* dst, int dststep, CvSize dsize,                                \ 
    const double* matrix, int cn,                                           \ 
    const arrtype* fillval, const int* ofs )                                \ 
{                                                                           \ 
    int x, y, k;                                                            \ 
    double  A12 = matrix[1], b1 = matrix[2];                                \ 
    double  A22 = matrix[4], b2 = matrix[5];                                \ 
                                                                            \ 
    step /= sizeof(src[0]);                                                 \ 
    dststep /= sizeof(dst[0]);                                              \ 
                                                                            \ 
    for( y = 0; y < dsize.height; y++, dst += dststep )                     \ 
    {                                                                       \ 
        int xs = CV_FLT_TO_FIX( A12*y + b1, ICV_WARP_SHIFT );               \ 
        int ys = CV_FLT_TO_FIX( A22*y + b2, ICV_WARP_SHIFT );               \ 
                                                                            \ 
        for( x = 0; x < dsize.width; x++ )                                  \ 
        {                                                                   \ 
            int ixs = xs + ofs[x*2];                                        \ 
            int iys = ys + ofs[x*2+1];                                      \ 
            worktype a = scale_alpha_macro( ixs & ICV_WARP_MASK );          \ 
            worktype b = scale_alpha_macro( iys & ICV_WARP_MASK );          \ 
            worktype p0, p1;                                                \ 
            ixs >>= ICV_WARP_SHIFT;                                         \ 
            iys >>= ICV_WARP_SHIFT;                                         \ 
                                                                            \ 
            if( (unsigned)ixs < (unsigned)(ssize.width - 1) &&              \ 
                (unsigned)iys < (unsigned)(ssize.height - 1) )              \ 
            {                                                               \ 
                const arrtype* ptr = src + step*iys + ixs*cn;               \ 
                                                                            \ 
                for( k = 0; k < cn; k++ )                                   \ 
                {                                                           \ 
                    p0 = mul_one_macro(ptr[k]) +                            \ 
                        a * (ptr[k+cn] - ptr[k]);                           \ 
                    p1 = mul_one_macro(ptr[k+step]) +                       \ 
                        a * (ptr[k+cn+step] - ptr[k+step]);                 \ 
                    p0 = descale_macro(mul_one_macro(p0) + b*(p1 - p0));    \ 
                    dst[x*cn+k] = (arrtype)cast_macro(p0);                  \ 
                }                                                           \ 
            }                                                               \ 
            else if( (unsigned)(ixs+1) < (unsigned)(ssize.width+1) &&       \ 
                     (unsigned)(iys+1) < (unsigned)(ssize.height+1))        \ 
            {                                                               \ 
                int x0 = ICV_WARP_CLIP_X( ixs );                            \ 
                int y0 = ICV_WARP_CLIP_Y( iys );                            \ 
                int x1 = ICV_WARP_CLIP_X( ixs + 1 );                        \ 
                int y1 = ICV_WARP_CLIP_Y( iys + 1 );                        \ 
                const arrtype* ptr0, *ptr1, *ptr2, *ptr3;                   \ 
                                                                            \ 
                ptr0 = src + y0*step + x0*cn;                               \ 
                ptr1 = src + y0*step + x1*cn;                               \ 
                ptr2 = src + y1*step + x0*cn;                               \ 
                ptr3 = src + y1*step + x1*cn;                               \ 
                                                                            \ 
                for( k = 0; k < cn; k++ )                                   \ 
                {                                                           \ 
                    p0 = mul_one_macro(ptr0[k]) + a * (ptr1[k] - ptr0[k]);  \ 
                    p1 = mul_one_macro(ptr2[k]) + a * (ptr3[k] - ptr2[k]);  \ 
                    p0 = descale_macro( mul_one_macro(p0) + b*(p1 - p0) );  \ 
                    dst[x*cn+k] = (arrtype)cast_macro(p0);                  \ 
                }                                                           \ 
            }                                                               \ 
            else if( fillval )                                              \ 
                for( k = 0; k < cn; k++ )                                   \ 
                    dst[x*cn+k] = fillval[k];                               \ 
        }                                                                   \ 
    }                                                                       \ 
                                                                            \ 
    return CV_OK;                                                           \ 
} 
 
 
#define ICV_WARP_SCALE_ALPHA(x) ((x)*(1./(ICV_WARP_MASK+1))) 
 
ICV_DEF_WARP_AFFINE_BILINEAR_FUNC( 8u, uchar, int, CV_NOP, ICV_WARP_MUL_ONE_8U, 
                                   ICV_WARP_DESCALE_8U, CV_NOP ) 
//ICV_DEF_WARP_AFFINE_BILINEAR_FUNC( 8u, uchar, double, ICV_WARP_SCALE_ALPHA, CV_NOP, 
//                                   CV_NOP, ICV_WARP_CAST_8U ) 
ICV_DEF_WARP_AFFINE_BILINEAR_FUNC( 16u, ushort, double, ICV_WARP_SCALE_ALPHA, CV_NOP, 
                                   CV_NOP, cvRound ) 
ICV_DEF_WARP_AFFINE_BILINEAR_FUNC( 32f, float, double, ICV_WARP_SCALE_ALPHA, CV_NOP, 
                                   CV_NOP, CV_NOP ) 
 
 
typedef CvStatus (CV_STDCALL * CvWarpAffineFunc)( 
    const void* src, int srcstep, CvSize ssize, 
    void* dst, int dststep, CvSize dsize, 
    const double* matrix, int cn, 
    const void* fillval, const int* ofs ); 
 
static void icvInitWarpAffineTab( CvFuncTable* bilin_tab ) 
{ 
    bilin_tab->fn_2d[CV_8U] = (void*)icvWarpAffine_Bilinear_8u_CnR; 
    bilin_tab->fn_2d[CV_16U] = (void*)icvWarpAffine_Bilinear_16u_CnR; 
    bilin_tab->fn_2d[CV_32F] = (void*)icvWarpAffine_Bilinear_32f_CnR; 
} 
 
 
/////////////////////////////// IPP warpaffine functions ///////////////////////////////// 
 
icvWarpAffineBack_8u_C1R_t icvWarpAffineBack_8u_C1R_p = 0; 
icvWarpAffineBack_8u_C3R_t icvWarpAffineBack_8u_C3R_p = 0; 
icvWarpAffineBack_8u_C4R_t icvWarpAffineBack_8u_C4R_p = 0; 
icvWarpAffineBack_32f_C1R_t icvWarpAffineBack_32f_C1R_p = 0; 
icvWarpAffineBack_32f_C3R_t icvWarpAffineBack_32f_C3R_p = 0; 
icvWarpAffineBack_32f_C4R_t icvWarpAffineBack_32f_C4R_p = 0; 
 
typedef CvStatus (CV_STDCALL * CvWarpAffineBackIPPFunc) 
( const void* src, CvSize srcsize, int srcstep, CvRect srcroi, 
  void* dst, int dststep, CvRect dstroi, 
  const double* coeffs, int interpolation ); 
 
////////////////////////////////////////////////////////////////////////////////////////// 
 
CV_IMPL void 
cvWarpAffine( const CvArr* srcarr, CvArr* dstarr, const CvMat* matrix, 
              int flags, CvScalar fillval ) 
{ 
    static CvFuncTable bilin_tab; 
    static int inittab = 0; 
 
    CV_FUNCNAME( "cvWarpAffine" ); 
 
    __BEGIN__; 
     
    CvMat srcstub, *src = (CvMat*)srcarr; 
    CvMat dststub, *dst = (CvMat*)dstarr; 
    int k, type, depth, cn, *ofs = 0; 
    double src_matrix[6], dst_matrix[6]; 
    double fillbuf[4]; 
    int method = flags & 3; 
    CvMat srcAb = cvMat( 2, 3, CV_64F, src_matrix ), 
          dstAb = cvMat( 2, 3, CV_64F, dst_matrix ), 
          A, b, invA, invAb; 
    CvWarpAffineFunc func; 
    CvSize ssize, dsize; 
     
    if( !inittab ) 
    { 
        icvInitWarpAffineTab( &bilin_tab ); 
        inittab = 1; 
    } 
 
    CV_CALL( src = cvGetMat( srcarr, &srcstub )); 
    CV_CALL( dst = cvGetMat( dstarr, &dststub )); 
     
    if( !CV_ARE_TYPES_EQ( src, dst )) 
        CV_ERROR( CV_StsUnmatchedFormats, "" ); 
 
    if( !CV_IS_MAT(matrix) || CV_MAT_CN(matrix->type) != 1 || 
        CV_MAT_DEPTH(matrix->type) < CV_32F || matrix->rows != 2 || matrix->cols != 3 ) 
        CV_ERROR( CV_StsBadArg, 
        "Transformation matrix should be 2x3 floating-point single-channel matrix" ); 
 
    if( flags & CV_WARP_INVERSE_MAP ) 
        cvConvertScale( matrix, &dstAb ); 
    else 
    { 
        // [R|t] -> [R^-1 | -(R^-1)*t] 
        cvConvertScale( matrix, &srcAb ); 
        cvGetCols( &srcAb, &A, 0, 2 ); 
        cvGetCol( &srcAb, &b, 2 ); 
        cvGetCols( &dstAb, &invA, 0, 2 ); 
        cvGetCol( &dstAb, &invAb, 2 ); 
        cvInvert( &A, &invA, CV_SVD ); 
        cvGEMM( &invA, &b, -1, 0, 0, &invAb ); 
    } 
 
    type = CV_MAT_TYPE(src->type); 
    depth = CV_MAT_DEPTH(type); 
    cn = CV_MAT_CN(type); 
 
    ssize = cvGetMatSize(src); 
    dsize = cvGetMatSize(dst); 
 
    if( icvWarpAffineBack_8u_C1R_p && MIN( ssize.width, dsize.width ) >= 4 && 
        MIN( ssize.height, dsize.height ) >= 4 ) 
    { 
        CvWarpAffineBackIPPFunc ipp_func = 
            type == CV_8UC1 ? icvWarpAffineBack_8u_C1R_p : 
            type == CV_8UC3 ? icvWarpAffineBack_8u_C3R_p : 
            type == CV_8UC4 ? icvWarpAffineBack_8u_C4R_p : 
            type == CV_32FC1 ? icvWarpAffineBack_32f_C1R_p : 
            type == CV_32FC3 ? icvWarpAffineBack_32f_C3R_p : 
            type == CV_32FC4 ? icvWarpAffineBack_32f_C4R_p : 0; 
         
        if( ipp_func && CV_INTER_NN <= method && method <= CV_INTER_AREA ) 
        { 
            int srcstep = src->step ? src->step : CV_STUB_STEP; 
            int dststep = dst->step ? dst->step : CV_STUB_STEP; 
            CvRect srcroi = {0, 0, ssize.width, ssize.height}; 
            CvRect dstroi = {0, 0, dsize.width, dsize.height}; 
 
            // this is not the most efficient way to fill outliers 
            if( flags & CV_WARP_FILL_OUTLIERS ) 
                cvSet( dst, fillval ); 
 
            if( ipp_func( src->data.ptr, ssize, srcstep, srcroi, 
                          dst->data.ptr, dststep, dstroi, 
                          dstAb.data.db, 1 << method ) >= 0 ) 
                EXIT; 
        } 
    } 
 
    cvScalarToRawData( &fillval, fillbuf, CV_MAT_TYPE(src->type), 0 ); 
    ofs = (int*)cvStackAlloc( dst->cols*2*sizeof(ofs[0]) ); 
    for( k = 0; k < dst->cols; k++ ) 
    { 
        ofs[2*k] = CV_FLT_TO_FIX( dst_matrix[0]*k, ICV_WARP_SHIFT ); 
        ofs[2*k+1] = CV_FLT_TO_FIX( dst_matrix[3]*k, ICV_WARP_SHIFT ); 
    } 
 
    /*if( method == CV_INTER_LINEAR )*/ 
    { 
        func = (CvWarpAffineFunc)bilin_tab.fn_2d[depth]; 
        if( !func ) 
            CV_ERROR( CV_StsUnsupportedFormat, "" ); 
 
        IPPI_CALL( func( src->data.ptr, src->step, ssize, dst->data.ptr, 
                         dst->step, dsize, dst_matrix, cn, 
                         flags & CV_WARP_FILL_OUTLIERS ? fillbuf : 0, ofs )); 
    } 
 
    __END__; 
} 
 
 
CV_IMPL CvMat* 
cv2DRotationMatrix( CvPoint2D32f center, double angle, 
                    double scale, CvMat* matrix ) 
{ 
    CV_FUNCNAME( "cvGetRotationMatrix" ); 
 
    __BEGIN__; 
 
    double m[2][3]; 
    CvMat M = cvMat( 2, 3, CV_64FC1, m ); 
    double alpha, beta; 
 
    if( !matrix ) 
        CV_ERROR( CV_StsNullPtr, "" ); 
 
    angle *= CV_PI/180; 
    alpha = cos(angle)*scale; 
    beta = sin(angle)*scale; 
 
    m[0][0] = alpha; 
    m[0][1] = beta; 
    m[0][2] = (1-alpha)*center.x - beta*center.y; 
    m[1][0] = -beta; 
    m[1][1] = alpha; 
    m[1][2] = beta*center.x + (1-alpha)*center.y; 
 
    cvConvert( &M, matrix ); 
 
    __END__; 
 
    return matrix; 
} 
 
 
/****************************************************************************************\ 
*                                    WarpPerspective                                     * 
\****************************************************************************************/ 
 
#define ICV_DEF_WARP_PERSPECTIVE_BILINEAR_FUNC( flavor, arrtype, load_macro, cast_macro )\ 
static CvStatus CV_STDCALL                                                  \ 
icvWarpPerspective_Bilinear_##flavor##_CnR(                                 \ 
    const arrtype* src, int step, CvSize ssize,                             \ 
    arrtype* dst, int dststep, CvSize dsize,                                \ 
    const double* matrix, int cn,                                           \ 
    const arrtype* fillval )                                                \ 
{                                                                           \ 
    int x, y, k;                                                            \ 
    float A11 = (float)matrix[0], A12 = (float)matrix[1], A13 = (float)matrix[2];\ 
    float A21 = (float)matrix[3], A22 = (float)matrix[4], A23 = (float)matrix[5];\ 
    float A31 = (float)matrix[6], A32 = (float)matrix[7], A33 = (float)matrix[8];\ 
                                                                            \ 
    step /= sizeof(src[0]);                                                 \ 
    dststep /= sizeof(dst[0]);                                              \ 
                                                                            \ 
    for( y = 0; y < dsize.height; y++, dst += dststep )                     \ 
    {                                                                       \ 
        float xs0 = A12*y + A13;                                            \ 
        float ys0 = A22*y + A23;                                            \ 
        float ws = A32*y + A33;                                             \ 
                                                                            \ 
        for( x = 0; x < dsize.width; x++, xs0 += A11, ys0 += A21, ws += A31 )\ 
        {                                                                   \ 
            float inv_ws = 1.f/ws;                                          \ 
            float xs = xs0*inv_ws;                                          \ 
            float ys = ys0*inv_ws;                                          \ 
            int ixs = cvFloor(xs);                                          \ 
            int iys = cvFloor(ys);                                          \ 
            float a = xs - ixs;                                             \ 
            float b = ys - iys;                                             \ 
            float p0, p1;                                                   \ 
                                                                            \ 
            if( (unsigned)ixs < (unsigned)(ssize.width - 1) &&              \ 
                (unsigned)iys < (unsigned)(ssize.height - 1) )              \ 
            {                                                               \ 
                const arrtype* ptr = src + step*iys + ixs*cn;               \ 
                                                                            \ 
                for( k = 0; k < cn; k++ )                                   \ 
                {                                                           \ 
                    p0 = load_macro(ptr[k]) +                               \ 
                        a * (load_macro(ptr[k+cn]) - load_macro(ptr[k]));   \ 
                    p1 = load_macro(ptr[k+step]) +                          \ 
                        a * (load_macro(ptr[k+cn+step]) -                   \ 
                             load_macro(ptr[k+step]));                      \ 
                    dst[x*cn+k] = (arrtype)cast_macro(p0 + b*(p1 - p0));    \ 
                }                                                           \ 
            }                                                               \ 
            else if( (unsigned)(ixs+1) < (unsigned)(ssize.width+1) &&       \ 
                     (unsigned)(iys+1) < (unsigned)(ssize.height+1))        \ 
            {                                                               \ 
                int x0 = ICV_WARP_CLIP_X( ixs );                            \ 
                int y0 = ICV_WARP_CLIP_Y( iys );                            \ 
                int x1 = ICV_WARP_CLIP_X( ixs + 1 );                        \ 
                int y1 = ICV_WARP_CLIP_Y( iys + 1 );                        \ 
                const arrtype* ptr0, *ptr1, *ptr2, *ptr3;                   \ 
                                                                            \ 
                ptr0 = src + y0*step + x0*cn;                               \ 
                ptr1 = src + y0*step + x1*cn;                               \ 
                ptr2 = src + y1*step + x0*cn;                               \ 
                ptr3 = src + y1*step + x1*cn;                               \ 
                                                                            \ 
                for( k = 0; k < cn; k++ )                                   \ 
                {                                                           \ 
                    p0 = load_macro(ptr0[k]) +                              \ 
                        a * (load_macro(ptr1[k]) - load_macro(ptr0[k]));    \ 
                    p1 = load_macro(ptr2[k]) +                              \ 
                        a * (load_macro(ptr3[k]) - load_macro(ptr2[k]));    \ 
                    dst[x*cn+k] = (arrtype)cast_macro(p0 + b*(p1 - p0));    \ 
                }                                                           \ 
            }                                                               \ 
            else if( fillval )                                              \ 
                for( k = 0; k < cn; k++ )                                   \ 
                    dst[x*cn+k] = fillval[k];                               \ 
        }                                                                   \ 
    }                                                                       \ 
                                                                            \ 
    return CV_OK;                                                           \ 
} 
 
 
#define ICV_WARP_SCALE_ALPHA(x) ((x)*(1./(ICV_WARP_MASK+1))) 
 
ICV_DEF_WARP_PERSPECTIVE_BILINEAR_FUNC( 8u, uchar, CV_8TO32F, cvRound ) 
ICV_DEF_WARP_PERSPECTIVE_BILINEAR_FUNC( 16u, ushort, CV_NOP, cvRound ) 
ICV_DEF_WARP_PERSPECTIVE_BILINEAR_FUNC( 32f, float, CV_NOP, CV_NOP ) 
 
typedef CvStatus (CV_STDCALL * CvWarpPerspectiveFunc)( 
    const void* src, int srcstep, CvSize ssize, 
    void* dst, int dststep, CvSize dsize, 
    const double* matrix, int cn, const void* fillval ); 
 
static void icvInitWarpPerspectiveTab( CvFuncTable* bilin_tab ) 
{ 
    bilin_tab->fn_2d[CV_8U] = (void*)icvWarpPerspective_Bilinear_8u_CnR; 
    bilin_tab->fn_2d[CV_16U] = (void*)icvWarpPerspective_Bilinear_16u_CnR; 
    bilin_tab->fn_2d[CV_32F] = (void*)icvWarpPerspective_Bilinear_32f_CnR; 
} 
 
 
/////////////////////////// IPP warpperspective functions //////////////////////////////// 
 
icvWarpPerspectiveBack_8u_C1R_t icvWarpPerspectiveBack_8u_C1R_p = 0; 
icvWarpPerspectiveBack_8u_C3R_t icvWarpPerspectiveBack_8u_C3R_p = 0; 
icvWarpPerspectiveBack_8u_C4R_t icvWarpPerspectiveBack_8u_C4R_p = 0; 
icvWarpPerspectiveBack_32f_C1R_t icvWarpPerspectiveBack_32f_C1R_p = 0; 
icvWarpPerspectiveBack_32f_C3R_t icvWarpPerspectiveBack_32f_C3R_p = 0; 
icvWarpPerspectiveBack_32f_C4R_t icvWarpPerspectiveBack_32f_C4R_p = 0; 
 
icvWarpPerspective_8u_C1R_t icvWarpPerspective_8u_C1R_p = 0; 
icvWarpPerspective_8u_C3R_t icvWarpPerspective_8u_C3R_p = 0; 
icvWarpPerspective_8u_C4R_t icvWarpPerspective_8u_C4R_p = 0; 
icvWarpPerspective_32f_C1R_t icvWarpPerspective_32f_C1R_p = 0; 
icvWarpPerspective_32f_C3R_t icvWarpPerspective_32f_C3R_p = 0; 
icvWarpPerspective_32f_C4R_t icvWarpPerspective_32f_C4R_p = 0; 
 
typedef CvStatus (CV_STDCALL * CvWarpPerspectiveBackIPPFunc) 
( const void* src, CvSize srcsize, int srcstep, CvRect srcroi, 
  void* dst, int dststep, CvRect dstroi, 
  const double* coeffs, int interpolation ); 
 
////////////////////////////////////////////////////////////////////////////////////////// 
 
CV_IMPL void 
cvWarpPerspective( const CvArr* srcarr, CvArr* dstarr, 
                   const CvMat* matrix, int flags, CvScalar fillval ) 
{ 
    static CvFuncTable bilin_tab; 
    static int inittab = 0; 
 
    CV_FUNCNAME( "cvWarpPerspective" ); 
 
    __BEGIN__; 
     
    CvMat srcstub, *src = (CvMat*)srcarr; 
    CvMat dststub, *dst = (CvMat*)dstarr; 
    int type, depth, cn; 
    int method = flags & 3; 
    double src_matrix[9], dst_matrix[9]; 
    double fillbuf[4]; 
    CvMat A = cvMat( 3, 3, CV_64F, src_matrix ), 
          invA = cvMat( 3, 3, CV_64F, dst_matrix ); 
    CvWarpPerspectiveFunc func; 
    CvSize ssize, dsize; 
 
    if( method == CV_INTER_NN || method == CV_INTER_AREA ) 
        method = CV_INTER_LINEAR; 
     
    if( !inittab ) 
    { 
        icvInitWarpPerspectiveTab( &bilin_tab ); 
        inittab = 1; 
    } 
 
    CV_CALL( src = cvGetMat( srcarr, &srcstub )); 
    CV_CALL( dst = cvGetMat( dstarr, &dststub )); 
     
    if( !CV_ARE_TYPES_EQ( src, dst )) 
        CV_ERROR( CV_StsUnmatchedFormats, "" ); 
 
    if( !CV_IS_MAT(matrix) || CV_MAT_CN(matrix->type) != 1 || 
        CV_MAT_DEPTH(matrix->type) < CV_32F || matrix->rows != 3 || matrix->cols != 3 ) 
        CV_ERROR( CV_StsBadArg, 
        "Transformation matrix should be 3x3 floating-point single-channel matrix" ); 
 
    if( flags & CV_WARP_INVERSE_MAP ) 
        cvConvertScale( matrix, &invA ); 
    else 
    { 
        cvConvertScale( matrix, &A ); 
        cvInvert( &A, &invA, CV_SVD ); 
    } 
 
    type = CV_MAT_TYPE(src->type); 
    depth = CV_MAT_DEPTH(type); 
    cn = CV_MAT_CN(type); 
     
    ssize = cvGetMatSize(src); 
    dsize = cvGetMatSize(dst); 
     
    if( icvWarpPerspectiveBack_8u_C1R_p ) 
    { 
        CvWarpPerspectiveBackIPPFunc ipp_func = 
            type == CV_8UC1 ? icvWarpPerspectiveBack_8u_C1R_p : 
            type == CV_8UC3 ? icvWarpPerspectiveBack_8u_C3R_p : 
            type == CV_8UC4 ? icvWarpPerspectiveBack_8u_C4R_p : 
            type == CV_32FC1 ? icvWarpPerspectiveBack_32f_C1R_p : 
            type == CV_32FC3 ? icvWarpPerspectiveBack_32f_C3R_p : 
            type == CV_32FC4 ? icvWarpPerspectiveBack_32f_C4R_p : 0; 
         
        if( ipp_func && CV_INTER_NN <= method && method <= CV_INTER_AREA && 
            MIN(ssize.width,ssize.height) >= 4 && MIN(dsize.width,dsize.height) >= 4 ) 
        { 
            int srcstep = src->step ? src->step : CV_STUB_STEP; 
            int dststep = dst->step ? dst->step : CV_STUB_STEP; 
            CvStatus status; 
            CvRect srcroi = {0, 0, ssize.width, ssize.height}; 
            CvRect dstroi = {0, 0, dsize.width, dsize.height}; 
 
            // this is not the most efficient way to fill outliers 
            if( flags & CV_WARP_FILL_OUTLIERS ) 
                cvSet( dst, fillval ); 
 
            status = ipp_func( src->data.ptr, ssize, srcstep, srcroi, 
                               dst->data.ptr, dststep, dstroi, 
                               invA.data.db, 1 << method ); 
            if( status >= 0 ) 
                EXIT; 
 
            ipp_func = type == CV_8UC1 ? icvWarpPerspective_8u_C1R_p : 
                type == CV_8UC3 ? icvWarpPerspective_8u_C3R_p : 
                type == CV_8UC4 ? icvWarpPerspective_8u_C4R_p : 
                type == CV_32FC1 ? icvWarpPerspective_32f_C1R_p : 
                type == CV_32FC3 ? icvWarpPerspective_32f_C3R_p : 
                type == CV_32FC4 ? icvWarpPerspective_32f_C4R_p : 0; 
 
            if( ipp_func ) 
            { 
                if( flags & CV_WARP_INVERSE_MAP ) 
                    cvInvert( &invA, &A, CV_SVD ); 
 
                status = ipp_func( src->data.ptr, ssize, srcstep, srcroi, 
                               dst->data.ptr, dststep, dstroi, 
                               A.data.db, 1 << method ); 
                if( status >= 0 ) 
                    EXIT; 
            } 
        } 
    } 
 
    cvScalarToRawData( &fillval, fillbuf, CV_MAT_TYPE(src->type), 0 ); 
 
    /*if( method == CV_INTER_LINEAR )*/ 
    { 
        func = (CvWarpPerspectiveFunc)bilin_tab.fn_2d[depth]; 
        if( !func ) 
            CV_ERROR( CV_StsUnsupportedFormat, "" ); 
 
        IPPI_CALL( func( src->data.ptr, src->step, ssize, dst->data.ptr, 
                         dst->step, dsize, dst_matrix, cn, 
                         flags & CV_WARP_FILL_OUTLIERS ? fillbuf : 0 )); 
    } 
 
    __END__; 
} 
 
 
/* Calculates coefficients of perspective transformation 
 * which maps (xi,yi) to (ui,vi), (i=1,2,3,4): 
 * 
 *      c00*xi + c01*yi + c02 
 * ui = --------------------- 
 *      c20*xi + c21*yi + c22 
 * 
 *      c10*xi + c11*yi + c12 
 * vi = --------------------- 
 *      c20*xi + c21*yi + c22 
 * 
 * Coefficients are calculated by solving linear system: 
 * / x0 y0  1  0  0  0 -x0*u0 -y0*u0 \ /c00\ /u0\ 
 * | x1 y1  1  0  0  0 -x1*u1 -y1*u1 | |c01| |u1| 
 * | x2 y2  1  0  0  0 -x2*u2 -y2*u2 | |c02| |u2| 
 * | x3 y3  1  0  0  0 -x3*u3 -y3*u3 |.|c10|=|u3|, 
 * |  0  0  0 x0 y0  1 -x0*v0 -y0*v0 | |c11| |v0| 
 * |  0  0  0 x1 y1  1 -x1*v1 -y1*v1 | |c12| |v1| 
 * |  0  0  0 x2 y2  1 -x2*v2 -y2*v2 | |c20| |v2| 
 * \  0  0  0 x3 y3  1 -x3*v3 -y3*v3 / \c21/ \v3/ 
 * 
 * where: 
 *   cij - matrix coefficients, c22 = 1 
 */ 
CV_IMPL CvMat* 
cvWarpPerspectiveQMatrix( const CvPoint2D32f* src, 
                          const CvPoint2D32f* dst, 
                          CvMat* matrix ) 
{ 
    CV_FUNCNAME( "cvGetPerspectiveTransformMatrix" ); 
 
    __BEGIN__; 
 
    double a[8][8]; 
    double b[8], x[9]; 
 
    CvMat A = cvMat( 8, 8, CV_64FC1, a ); 
    CvMat B = cvMat( 8, 1, CV_64FC1, b ); 
    CvMat X = cvMat( 8, 1, CV_64FC1, x ); 
 
    int i; 
 
    if( !src || !dst || !matrix ) 
        CV_ERROR( CV_StsNullPtr, "" ); 
 
    for( i = 0; i < 4; ++i ) 
    { 
        a[i][0] = a[i+4][3] = src[i].x; 
        a[i][1] = a[i+4][4] = src[i].y; 
        a[i][2] = a[i+4][5] = 1; 
        a[i][3] = a[i][4] = a[i][5] = 
        a[i+4][0] = a[i+4][1] = a[i+4][2] = 0; 
        a[i][6] = -src[i].x*dst[i].x; 
        a[i][7] = -src[i].y*dst[i].x; 
        a[i+4][6] = -src[i].x*dst[i].y; 
        a[i+4][7] = -src[i].y*dst[i].y; 
        b[i] = dst[i].x; 
        b[i+4] = dst[i].y; 
    } 
 
    cvSolve( &A, &B, &X, CV_SVD ); 
    x[8] = 1; 
     
    X = cvMat( 3, 3, CV_64FC1, x ); 
    cvConvert( &X, matrix ); 
 
    __END__; 
 
    return matrix; 
} 
 
 
/****************************************************************************************\ 
*                          Generic Geometric Transformation: Remap                       * 
\****************************************************************************************/ 
 
#define  ICV_DEF_REMAP_BILINEAR_FUNC( flavor, arrtype, load_macro, cast_macro ) \ 
static CvStatus CV_STDCALL                                                      \ 
icvRemap_Bilinear_##flavor##_CnR( const arrtype* src, int srcstep, CvSize ssize,\ 
                         arrtype* dst, int dststep, CvSize dsize,           \ 
                         const float* mapx, int mxstep,                     \ 
                         const float* mapy, int mystep,                     \ 
                         int cn, const arrtype* fillval )                   \ 
{                                                                           \ 
    int i, j, k;                                                            \ 
    ssize.width--;                                                          \ 
    ssize.height--;                                                         \ 
                                                                            \ 
    srcstep /= sizeof(src[0]);                                              \ 
    dststep /= sizeof(dst[0]);                                              \ 
    mxstep /= sizeof(mapx[0]);                                              \ 
    mystep /= sizeof(mapy[0]);                                              \ 
                                                                            \ 
    for( i = 0; i < dsize.height; i++, dst += dststep,                      \ 
                                  mapx += mxstep, mapy += mystep )          \ 
    {                                                                       \ 
        for( j = 0; j < dsize.width; j++ )                                  \ 
        {                                                                   \ 
            int ix = cvRound(mapx[j]*(1 << ICV_WARP_SHIFT));                \ 
            int iy = cvRound(mapy[j]*(1 << ICV_WARP_SHIFT));                \ 
            int ifx = ix & ICV_WARP_MASK;                                   \ 
            int ify = iy & ICV_WARP_MASK;                                   \ 
            ix >>= ICV_WARP_SHIFT;                                          \ 
            iy >>= ICV_WARP_SHIFT;                                          \ 
                                                                            \ 
            float x0 = icvLinearCoeffs[ifx*2];                              \ 
            float x1 = icvLinearCoeffs[ifx*2 + 1];                          \ 
            float y0 = icvLinearCoeffs[ify*2];                              \ 
            float y1 = icvLinearCoeffs[ify*2 + 1];                          \ 
                                                                            \ 
            if( (unsigned)ix < (unsigned)ssize.width &&                     \ 
                (unsigned)iy < (unsigned)ssize.height )                     \ 
            {                                                               \ 
                const arrtype* s = src + iy*srcstep + ix*cn;                \ 
                for( k = 0; k < cn; k++, s++ )                              \ 
                {                                                           \ 
                    float t0 = x1*load_macro(s[0]) + x0*load_macro(s[cn]);  \ 
                    float t1 = x1*load_macro(s[srcstep]) +                  \ 
                               x0*load_macro(s[srcstep + cn]);              \ 
                    dst[j*cn + k] = (arrtype)cast_macro(y1*t0 + y0*t1);     \ 
                }                                                           \ 
            }                                                               \ 
            else if( fillval )                                              \ 
                for( k = 0; k < cn; k++ )                                   \ 
                    dst[j*cn + k] = fillval[k];                             \ 
        }                                                                   \ 
    }                                                                       \ 
                                                                            \ 
    return CV_OK;                                                           \ 
} 
 
 
#define  ICV_DEF_REMAP_BICUBIC_FUNC( flavor, arrtype, worktype,                 \ 
                                     load_macro, cast_macro1, cast_macro2 )     \ 
static CvStatus CV_STDCALL                                                      \ 
icvRemap_Bicubic_##flavor##_CnR( const arrtype* src, int srcstep, CvSize ssize, \ 
                         arrtype* dst, int dststep, CvSize dsize,               \ 
                         const float* mapx, int mxstep,                         \ 
                         const float* mapy, int mystep,                         \ 
                         int cn, const arrtype* fillval )                       \ 
{                                                                               \ 
    int i, j, k;                                                                \ 
    ssize.width = MAX( ssize.width - 3, 0 );                                    \ 
    ssize.height = MAX( ssize.height - 3, 0 );                                  \ 
                                                                                \ 
    srcstep /= sizeof(src[0]);                                                  \ 
    dststep /= sizeof(dst[0]);                                                  \ 
    mxstep /= sizeof(mapx[0]);                                                  \ 
    mystep /= sizeof(mapy[0]);                                                  \ 
                                                                                \ 
    for( i = 0; i < dsize.height; i++, dst += dststep,                          \ 
                                  mapx += mxstep, mapy += mystep )              \ 
    {                                                                           \ 
        for( j = 0; j < dsize.width; j++ )                                      \ 
        {                                                                       \ 
            int ix = cvRound(mapx[j]*(1 << ICV_WARP_SHIFT));                    \ 
            int iy = cvRound(mapy[j]*(1 << ICV_WARP_SHIFT));                    \ 
            int ifx = ix & ICV_WARP_MASK;                                       \ 
            int ify = iy & ICV_WARP_MASK;                                       \ 
            ix >>= ICV_WARP_SHIFT;                                              \ 
            iy >>= ICV_WARP_SHIFT;                                              \ 
                                                                                \ 
            if( (unsigned)(ix-1) < (unsigned)ssize.width &&                     \ 
                (unsigned)(iy-1) < (unsigned)ssize.height )                     \ 
            {                                                                   \ 
                for( k = 0; k < cn; k++ )                                       \ 
                {                                                               \ 
                    const arrtype* s = src + (iy-1)*srcstep + ix*cn + k;        \ 
                                                                                \ 
                    float t0 = load_macro(s[-cn])*icvCubicCoeffs[ifx*2 + 1] +   \ 
                               load_macro(s[0])*icvCubicCoeffs[ifx*2] +         \ 
                               load_macro(s[cn])*icvCubicCoeffs[(ICV_CUBIC_TAB_SIZE-ifx)*2] +\ 
                               load_macro(s[cn*2])*icvCubicCoeffs[(ICV_CUBIC_TAB_SIZE-ifx)*2+1];\ 
                                                                                \ 
                    s += srcstep;                                               \ 
                                                                                \ 
                    float t1 = load_macro(s[-cn])*icvCubicCoeffs[ifx*2 + 1] +   \ 
                               load_macro(s[0])*icvCubicCoeffs[ifx*2] +         \ 
                               load_macro(s[cn])*icvCubicCoeffs[(ICV_CUBIC_TAB_SIZE-ifx)*2] +\ 
                               load_macro(s[cn*2])*icvCubicCoeffs[(ICV_CUBIC_TAB_SIZE-ifx)*2+1];\ 
                                                                                \ 
                    s += srcstep;                                               \ 
                                                                                \ 
                    float t2 = load_macro(s[-cn])*icvCubicCoeffs[ifx*2 + 1] +   \ 
                               load_macro(s[0])*icvCubicCoeffs[ifx*2] +         \ 
                               load_macro(s[cn])*icvCubicCoeffs[(ICV_CUBIC_TAB_SIZE-ifx)*2] +\ 
                               load_macro(s[cn*2])*icvCubicCoeffs[(ICV_CUBIC_TAB_SIZE-ifx)*2+1];\ 
                                                                                \ 
                    s += srcstep;                                               \ 
                                                                                \ 
                    float t3 = load_macro(s[-cn])*icvCubicCoeffs[ifx*2 + 1] +   \ 
                               load_macro(s[0])*icvCubicCoeffs[ifx*2] +         \ 
                               load_macro(s[cn])*icvCubicCoeffs[(ICV_CUBIC_TAB_SIZE-ifx)*2] +\ 
                               load_macro(s[cn*2])*icvCubicCoeffs[(ICV_CUBIC_TAB_SIZE-ifx)*2+1];\ 
                                                                                \ 
                    worktype t = cast_macro1( t0*icvCubicCoeffs[ify*2 + 1] +    \ 
                               t1*icvCubicCoeffs[ify*2] +                       \ 
                               t2*icvCubicCoeffs[(ICV_CUBIC_TAB_SIZE-ify)*2] +  \ 
                               t3*icvCubicCoeffs[(ICV_CUBIC_TAB_SIZE-ify)*2+1] );\ 
                                                                                \ 
                    dst[j*cn + k] = cast_macro2(t);                             \ 
                }                                                               \ 
            }                                                                   \ 
            else if( fillval )                                                  \ 
                for( k = 0; k < cn; k++ )                                       \ 
                    dst[j*cn + k] = fillval[k];                                 \ 
        }                                                                       \ 
    }                                                                           \ 
                                                                                \ 
    return CV_OK;                                                               \ 
} 
 
 
ICV_DEF_REMAP_BILINEAR_FUNC( 8u, uchar, CV_8TO32F, cvRound ) 
ICV_DEF_REMAP_BILINEAR_FUNC( 16u, ushort, CV_NOP, cvRound ) 
ICV_DEF_REMAP_BILINEAR_FUNC( 32f, float, CV_NOP, CV_NOP ) 
 
ICV_DEF_REMAP_BICUBIC_FUNC( 8u, uchar, int, CV_8TO32F, cvRound, CV_FAST_CAST_8U ) 
ICV_DEF_REMAP_BICUBIC_FUNC( 16u, ushort, int, CV_NOP, cvRound, CV_CAST_16U ) 
ICV_DEF_REMAP_BICUBIC_FUNC( 32f, float, float, CV_NOP, CV_NOP, CV_NOP ) 
 
typedef CvStatus (CV_STDCALL * CvRemapFunc)( 
    const void* src, int srcstep, CvSize ssize, 
    void* dst, int dststep, CvSize dsize, 
    const float* mapx, int mxstep, 
    const float* mapy, int mystep, 
    int cn, const void* fillval ); 
 
static void icvInitRemapTab( CvFuncTable* bilinear_tab, CvFuncTable* bicubic_tab ) 
{ 
    bilinear_tab->fn_2d[CV_8U] = (void*)icvRemap_Bilinear_8u_CnR; 
    bilinear_tab->fn_2d[CV_16U] = (void*)icvRemap_Bilinear_16u_CnR; 
    bilinear_tab->fn_2d[CV_32F] = (void*)icvRemap_Bilinear_32f_CnR; 
 
    bicubic_tab->fn_2d[CV_8U] = (void*)icvRemap_Bicubic_8u_CnR; 
    bicubic_tab->fn_2d[CV_16U] = (void*)icvRemap_Bicubic_16u_CnR; 
    bicubic_tab->fn_2d[CV_32F] = (void*)icvRemap_Bicubic_32f_CnR; 
} 
 
 
/******************** IPP remap functions *********************/ 
 
typedef CvStatus (CV_STDCALL * CvRemapIPPFunc)( 
    const void* src, CvSize srcsize, int srcstep, CvRect srcroi, 
    const float* xmap, int xmapstep, const float* ymap, int ymapstep, 
    void* dst, int dststep, CvSize dstsize, int interpolation );  
 
icvRemap_8u_C1R_t icvRemap_8u_C1R_p = 0; 
icvRemap_8u_C3R_t icvRemap_8u_C3R_p = 0; 
icvRemap_8u_C4R_t icvRemap_8u_C4R_p = 0; 
 
icvRemap_32f_C1R_t icvRemap_32f_C1R_p = 0; 
icvRemap_32f_C3R_t icvRemap_32f_C3R_p = 0; 
icvRemap_32f_C4R_t icvRemap_32f_C4R_p = 0; 
 
/**************************************************************/ 
 
CV_IMPL void 
cvRemap( const CvArr* srcarr, CvArr* dstarr, 
         const CvArr* _mapx, const CvArr* _mapy, 
         int flags, CvScalar fillval ) 
{ 
    static CvFuncTable bilinear_tab; 
    static CvFuncTable bicubic_tab; 
    static int inittab = 0; 
 
    CV_FUNCNAME( "cvRemap" ); 
 
    __BEGIN__; 
     
    CvMat srcstub, *src = (CvMat*)srcarr; 
    CvMat dststub, *dst = (CvMat*)dstarr; 
    CvMat mxstub, *mapx = (CvMat*)_mapx; 
    CvMat mystub, *mapy = (CvMat*)_mapy; 
    int type, depth, cn; 
    int method = flags & 3; 
    double fillbuf[4]; 
    CvSize ssize, dsize; 
 
    if( !inittab ) 
    { 
        icvInitRemapTab( &bilinear_tab, &bicubic_tab ); 
        icvInitLinearCoeffTab(); 
        icvInitCubicCoeffTab(); 
        inittab = 1; 
    } 
 
    CV_CALL( src = cvGetMat( srcarr, &srcstub )); 
    CV_CALL( dst = cvGetMat( dstarr, &dststub )); 
    CV_CALL( mapx = cvGetMat( mapx, &mxstub )); 
    CV_CALL( mapy = cvGetMat( mapy, &mystub )); 
     
    if( !CV_ARE_TYPES_EQ( src, dst )) 
        CV_ERROR( CV_StsUnmatchedFormats, "" ); 
 
    if( !CV_ARE_TYPES_EQ( mapx, mapy ) || CV_MAT_TYPE( mapx->type ) != CV_32FC1 ) 
        CV_ERROR( CV_StsUnmatchedFormats, "Both map arrays must have 32fC1 type" ); 
 
    if( !CV_ARE_SIZES_EQ( mapx, mapy ) || !CV_ARE_SIZES_EQ( mapx, dst )) 
        CV_ERROR( CV_StsUnmatchedSizes, 
        "Both map arrays and the destination array must have the same size" ); 
 
    type = CV_MAT_TYPE(src->type); 
    depth = CV_MAT_DEPTH(type); 
    cn = CV_MAT_CN(type); 
     
    ssize = cvGetMatSize(src); 
    dsize = cvGetMatSize(dst); 
     
    if( icvRemap_8u_C1R_p ) 
    { 
        CvRemapIPPFunc ipp_func = 
            type == CV_8UC1 ? icvRemap_8u_C1R_p : 
            type == CV_8UC3 ? icvRemap_8u_C3R_p : 
            type == CV_8UC4 ? icvRemap_8u_C4R_p : 
            type == CV_32FC1 ? icvRemap_32f_C1R_p : 
            type == CV_32FC3 ? icvRemap_32f_C3R_p : 
            type == CV_32FC4 ? icvRemap_32f_C4R_p : 0; 
         
        if( ipp_func ) 
        { 
            int srcstep = src->step ? src->step : CV_STUB_STEP; 
            int dststep = dst->step ? dst->step : CV_STUB_STEP; 
            int mxstep = mapx->step ? mapx->step : CV_STUB_STEP; 
            int mystep = mapy->step ? mapy->step : CV_STUB_STEP; 
            CvStatus status; 
            CvRect srcroi = {0, 0, ssize.width, ssize.height}; 
 
            // this is not the most efficient way to fill outliers 
            if( flags & CV_WARP_FILL_OUTLIERS ) 
                cvSet( dst, fillval ); 
 
            status = ipp_func( src->data.ptr, ssize, srcstep, srcroi, 
                               mapx->data.fl, mxstep, mapy->data.fl, mystep, 
                               dst->data.ptr, dststep, dsize, 
                               1 << (method == CV_INTER_NN || method == CV_INTER_LINEAR || 
                               method == CV_INTER_CUBIC ? method : CV_INTER_LINEAR) ); 
            if( status >= 0 ) 
                EXIT; 
        } 
    } 
 
    cvScalarToRawData( &fillval, fillbuf, CV_MAT_TYPE(src->type), 0 ); 
 
    { 
        CvRemapFunc func = method == CV_INTER_CUBIC ? 
            (CvRemapFunc)bicubic_tab.fn_2d[depth] : 
            (CvRemapFunc)bilinear_tab.fn_2d[depth]; 
 
        if( !func ) 
            CV_ERROR( CV_StsUnsupportedFormat, "" ); 
 
        func( src->data.ptr, src->step, ssize, dst->data.ptr, dst->step, dsize, 
              mapx->data.fl, mapx->step, mapy->data.fl, mapy->step, 
              cn, flags & CV_WARP_FILL_OUTLIERS ? fillbuf : 0 ); 
    } 
 
    __END__; 
} 
 
 
/****************************************************************************************\ 
*                                   Log-Polar Transform                                  * 
\****************************************************************************************/ 
 
/* now it is done via Remap; more correct implementation should use 
   some super-sampling technique outside of the "fovea" circle */ 
CV_IMPL void 
cvLogPolar( const CvArr* srcarr, CvArr* dstarr, 
            CvPoint2D32f center, double M, int flags ) 
{ 
    CvMat* mapx = 0; 
    CvMat* mapy = 0; 
    double* exp_tab = 0; 
    float* buf = 0; 
     
    CV_FUNCNAME( "cvLogPolar" ); 
 
    __BEGIN__; 
 
    CvMat srcstub, *src = (CvMat*)srcarr; 
    CvMat dststub, *dst = (CvMat*)dstarr; 
    CvSize ssize, dsize; 
 
    CV_CALL( src = cvGetMat( srcarr, &srcstub )); 
    CV_CALL( dst = cvGetMat( dstarr, &dststub )); 
     
    if( !CV_ARE_TYPES_EQ( src, dst )) 
        CV_ERROR( CV_StsUnmatchedFormats, "" ); 
 
    if( M <= 0 ) 
        CV_ERROR( CV_StsOutOfRange, "M should be >0" ); 
 
    ssize = cvGetMatSize(src); 
    dsize = cvGetMatSize(dst); 
 
    CV_CALL( mapx = cvCreateMat( dsize.height, dsize.width, CV_32F )); 
    CV_CALL( mapy = cvCreateMat( dsize.height, dsize.width, CV_32F )); 
 
    if( !(flags & CV_WARP_INVERSE_MAP) ) 
    { 
        int phi, rho; 
         
        CV_CALL( exp_tab = (double*)cvAlloc( dsize.width*sizeof(exp_tab[0])) ); 
 
        for( rho = 0; rho < dst->width; rho++ ) 
            exp_tab[rho] = exp(rho/M); 
     
        for( phi = 0; phi < dsize.height; phi++ ) 
        { 
            double cp = cos(phi*2*CV_PI/dsize.height); 
            double sp = sin(phi*2*CV_PI/dsize.height); 
            float* mx = (float*)(mapx->data.ptr + phi*mapx->step); 
            float* my = (float*)(mapy->data.ptr + phi*mapy->step); 
 
            for( rho = 0; rho < dsize.width; rho++ ) 
            { 
                double r = exp_tab[rho]; 
                double x = r*cp + center.x; 
                double y = r*sp + center.y; 
 
                mx[rho] = (float)x; 
                my[rho] = (float)y; 
            } 
        } 
    } 
    else 
    { 
        int x, y; 
        CvMat bufx, bufy, bufp, bufa; 
        double ascale = (ssize.width-1)/(2*CV_PI); 
 
        CV_CALL( buf = (float*)cvAlloc( 4*dsize.width*sizeof(buf[0]) )); 
 
        bufx = cvMat( 1, dsize.width, CV_32F, buf ); 
        bufy = cvMat( 1, dsize.width, CV_32F, buf + dsize.width ); 
        bufp = cvMat( 1, dsize.width, CV_32F, buf + dsize.width*2 ); 
        bufa = cvMat( 1, dsize.width, CV_32F, buf + dsize.width*3 ); 
 
        for( x = 0; x < dsize.width; x++ ) 
            bufx.data.fl[x] = (float)x - center.x; 
 
        for( y = 0; y < dsize.height; y++ ) 
        { 
            float* mx = (float*)(mapx->data.ptr + y*mapx->step); 
            float* my = (float*)(mapy->data.ptr + y*mapy->step); 
             
            for( x = 0; x < dsize.width; x++ ) 
                bufy.data.fl[x] = (float)y - center.y; 
 
#if 1 
            cvCartToPolar( &bufx, &bufy, &bufp, &bufa ); 
 
            for( x = 0; x < dsize.width; x++ ) 
                bufp.data.fl[x] += 1.f; 
 
            cvLog( &bufp, &bufp ); 
             
            for( x = 0; x < dsize.width; x++ ) 
            { 
                double rho = bufp.data.fl[x]*M; 
                double phi = bufa.data.fl[x]*ascale; 
 
                mx[x] = (float)rho; 
                my[x] = (float)phi; 
            } 
#else 
            for( x = 0; x < dsize.width; x++ ) 
            { 
                double xx = bufx.data.fl[x]; 
                double yy = bufy.data.fl[x]; 
 
                double p = log(sqrt(xx*xx + yy*yy) + 1.)*M; 
                double a = atan2(yy,xx); 
                if( a < 0 ) 
                    a = 2*CV_PI + a; 
                a *= ascale; 
 
                mx[x] = (float)p; 
                my[x] = (float)a; 
            } 
#endif 
        } 
    } 
 
    cvRemap( src, dst, mapx, mapy, flags, cvScalarAll(0) ); 
 
    __END__; 
 
    cvFree( (void**)&exp_tab ); 
    cvFree( (void**)&buf ); 
    cvReleaseMat( &mapx ); 
    cvReleaseMat( &mapy ); 
} 
 
/* End of file. */