www.pudn.com > scaling.rar > scale_horz_h.h62


* ========================================================================= * 
*                                                                           * 
*   TEXAS INSTRUMENTS, INC.                                                 * 
*                                                                           * 
*   NAME                                                                    * 
*       scale_horz                                                          * 
*                                                                           * 
*                                                                           * 
*   USAGE                                                                   * 
*       This routine is C-callable and can be called as:                    * 
*                                                                           * 
*           void scale_horz                                                 * 
*           (                                                               * 
*               unsigned short *in_data,  /* Ptr to unscaled lines      */  * 
*               unsigned int    in_len,   /* Pixels/line unscaled       */  * 
*               short          *out_data, /* Ptr to scaled data lines   */  * 
*               unsigned int    out_len,  /* Pixels/line of scaled data */  * 
*               short          *hh,       /* Ptr to filter taps,            * 
*                                            interleaved odd/even           * 
*                                            outputs                    */  * 
*               unsigned int    l_hh,     /* Length of scaling filters  */  * 
*               unsigned int    n_hh,     /* Number of scaling filters  */  * 
*               short          *patch     /* Ptr to decrement pattern   */  * 
*           );                                                              * 
*                                                                           * 
*   DESCRIPTION                                                             * 
*                                                                           * 
*       This code can scale up or down 1 line of data, in the               * 
*       ratio out_len : in_len.  e.g 1 to 3, 4:3, 5:6. The                  * 
*       filters are designed outside of the loop using a                    * 
*       general purpose resizing algorithm.                                 * 
*                                                                           * 
*           patch0 = patch + 2;                                             * 
*           filter_count = n_hh;                                            * 
*           ka = 0;                                                         * 
*                                                                           * 
*           line0_x = plane_x;                                              * 
*           line0_y = plane_y;                                              * 
*           ptr_hh = hh;                                                    * 
*           jump = (int) patch[0]; ka = jump >> 1;                          * 
*           jump = (int) patch[1]; kb = jump >> 1;                          * 
*                                                                           * 
*           for ( i = 0; i < n_y; i += 2)                                   * 
*           {                                                               * 
*               y0 = 1 << 5;                                                * 
*               y1 = 1 << 5;                                                * 
*               for ( j = 0; j < l_hh; j+=4)                                * 
*               {                                                           * 
*                   /* even outputs */                                      * 
*                   for (k=0; k < 4; k++)                                   * 
*                   {                                                       * 
*                       h0 = *ptr_hh++;                                     * 
*                       x0 = *(line0_x+ ka + k);                            * 
*                       y0 += ( x0 * h0 );                                  * 
*                   }                                                       * 
*                   jump = (int) (*patch0++);                               * 
*                   ka = ka + (jump>>1);                                    * 
*                   /* odd outputs */                                       * 
*                   for (k=0; k < 4; k++)                                   * 
*                   {                                                       * 
*                       h1 = *ptr_hh++;                                     * 
*                       x1 = *(line0_x + kb + k);                           * 
*                       y1 += ( x1 * h1 );                                  * 
*                   }                                                       * 
*                   jump = (int) (*patch0++);                               * 
*                   kb = kb + (jump>>1);                                    * 
*               }                                                           * 
*               *line0_y++ = (short) (y0 >> 6) ;                            * 
*               *line0_y++ = (short) (y1 >> 6) ;                            * 
*                                                                           * 
*               filter_count -= 2;                                          * 
*               if (!filter_count)                                          * 
*               {                                                           * 
*                   patch0 = patch + 2;                                     * 
*                   ptr_hh = hh;                                            * 
*                   filter_count = n_hh;                                    * 
*               }                                                           * 
*           }                                                               * 
*                                                                           * 
*   ASSUMPTIONS                                                             * 
*       One line of data is produced per function call.                     * 
*                                                                           * 
*       The line must be aligned on a double word boundary and be a         * 
*       multiples of 8 bytes.                                               * 
*                                                                           * 
*       Filters are multiples of 4 taps, maximum number of filters is 256.  * 
*                                                                           * 
*       The computations for each output are interleaved, thus the filters  * 
*       are interleaved on a 4 short interval.                              * 
*                                                                           * 
*       Little ENDIAN Configuration is used and the input and output data   * 
*       is 16 bit unsinged and signed shorts respectively.  The filters     * 
*       are also 16 bit signed shorts in 12 bit precision.                  * 
*                                                                           * 
*       The n_hh filters are all of the same length and are                 * 
*       strung together in a single linear array.                           * 
*                                                                           * 
*       Interrupts are masked by the function for most of its duration.     * 
*                                                                           * 
*   MEMORY NOTE                                                             * 
*       Some bank hits will occur in this code for certain scale            * 
*       factors and filter lengths.                                         * 
*                                                                           * 
*       For 4 taps k = 0, for l_hh 8, k = 0.031, for l_hh = 16, k = 0.015.  * 
*       Different flter lengths can produce different numbers of bank       * 
*       conflicts.  Overall, these bank conflicts have nearly zero effect.  * 
*                                                                           * 
*       For l_hh=4: k=0, l_hh=8: k=1/32, l_hh=12: k=0, l_hh=16: k=1/64      * 
*       For l_hh % 8 == 0, k = 1/(4*l_hh) else k = 0                        * 
*                                                                           * 
*       'k' is the bank conflict between the store and the guidance table   * 
*       load.  Depending on the relative sizes of the filters and           * 
*       memory width, this bank conflict is between 0 and 3.1%              * 
*       overhead.                                                           * 
*                                                                           * 
*   TECHNIQUES                                                              * 
*       The outputs are computed using interleaved inputs. The patch table  * 
*       controls the access of 2 parallel pointers. For example an 8/33     * 
*       scale factor will have the following access pattern.                * 
*                                                                           * 
*                 11111111112222222222333333333344444444445555555555        * 
*       012345678901234567890123456789012345678901234567890123456789        * 
*                                                                           * 
*       0  e xxxxxxxx     <-start point of even output 0                    * 
*       1      o xxxxxxxx      <-start point of odd output 4                * 
*       2          e xxxxxxxx                                               * 
*       3              o xxxxxxxx                                           * 
*       4                  e xxxxxxxx                                       * 
*       5                      o xxxxxxxx                                   * 
*       6                          e xxxxxxxx                               * 
*       7                              o xxxxxxxx                           * 
*       0                                   e xxxxxxxx  <-next start        * 
*       1                                       o xxxxxxxx  <-next start    * 
*                                                                           * 
*                                                                           * 
*       From this diagram the odd pointer jumps 4 then another 4 as the     * 
*       filters have 8 taps, it then jumps 4 to get to the next set of      * 
*       input data. The odd pointer does the same. These jumps are          * 
*       interleaved and so are the filter coefficients. The jumps are       * 
*       in multiples of bytes as non-scaled non-aligned double word         * 
*       accesses are used.  In this case the table will be:                 * 
*                                                                           * 
*           short patch[] = {0,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,10,10,8,8};    * 
*                                                                           * 
*       Notice the first 2 entries are the intial starting points for       * 
*       the two pointers. To remove a dependency in the code the last 2     * 
*       entries are copies of the 2nd two. This makes the table almost      * 
*       circular.                                                           * 
*                                                                           * 
*   NOTES                                                                   * 
*       Other scale factors can be achieved with the following              * 
*       example tables.                                                     * 
*                                                                           * 
*   Scale Factor Taps  Table short jump[] =                                 * 
*   --------------------------------------------------------------------    * 
*       5/6       4    {0, 1, 2, 2, 2, 3, 3, 2, 2, 2, 3, 3, 2, 2}           * 
*       4/3       8    {0, 4, 4, 4, -3, -2, 4, 4, -2, -3, 4, 4}             * 
*       3/4       12   {0,1,4,4,4,4,-6,-5,4,4,4,4,-5,-6,4,4,4,4,-5,-5,4,4}  * 
*       6/5       16   {0,0,4,4,4,4,4,4,-11,-10,4,4,4,4,4,4,-10,-10,        * 
*                       4,4,4,4,4,4,-10,-11,4,4}                            * 
*                                                                           * 
*       The software to produce these tables and the simple coefficents     * 
*       for an arbitarary scale factor and number of taps can be found      * 
*       in the api document. Note in the case of 3/4, odd scale factors     * 
*       are doubled to make 6/8 instead of 3/4                              * 
*                                                                           * 
*   CYCLES                                                                  * 
*       cycles = 0.5 * out_len * l_hh * (1+k) + 30.                         * 
*       If (l_hh % 8) == 0 then k = 1/(4*l_hh) else k = 0.                  * 
*                                                                           * 
*       For l_hh = 16, in_len = 1024, and out_len = 1366,  cycles = 11129.  * 
*       For l_hh = 8,  in_len = 640,  and out_len = 120,   cycles = 525.    * 
*                                                                           * 
*   CODESIZE                                                                * 
*       296 bytes                                                           * 
* ------------------------------------------------------------------------- * 
*             Copyright (c) 2000 Texas Instruments, Incorporated.           * 
*                            All Rights Reserved.                           * 
* ========================================================================= * 
                    .sect ".opt_scale_horz" 
                    .global   _scale_horz_asm 
* ========================================================================= * 
*   End of file:  scale_horz_h.h62                                          * 
* ------------------------------------------------------------------------- * 
*             Copyright (c) 2000 Texas Instruments, Incorporated.           * 
*                            All Rights Reserved.                           * 
* ========================================================================= *