www.pudn.com > scaling.rar > scale_2d_yc_doub_buf.c


/* ========================================================================= */ 
/*              Copyright (C) 2000 Texas Instruments Incorporated.           */ 
/*                              All Rights Reserved                          */ 
/* ========================================================================= */ 
/* The code scales 3 planes of y, Cr, Cb data in varying combinations        */ 
/* where Y denotes luma, Cr is red chrominance and Cb is blue chrominance    */ 
/* NOTES                                                                     */ 
/* When the phrase "pixels" is used this measn a color pixel, this has       */ 
/* a color combination of  4:2:2, 4:4:4 or 4:2:0                             */ 
/* for 4:4:4 there are 3 identical coloe spaces all the same size            */ 
/* for 4:2:2 there planes are the same height but only half as wide          */ 
/* for 4:2:0 the planes are 1/2 the width and 1/2 the height                 */ 
/*                                                                           */ 
/* The scaler will automatically filter output data only within the display  */ 
/* if it is larger than the display it will write to the boundary, if smaller*/ 
/* it will centre the image in the middle.                                   */ 
/*                                                                           */ 
/* This function can be applied to R, G, B data also R->Y, G->Cr, B->Cb      */ 
/* ========================================================================= */ 
/*              Copyright (C) 2000 Texas Instruments Incorporated.           */ 
/*                              All Rights Reserved                          */ 
/* ========================================================================= */ 
 
#include      
#include  
 
#include "ycbcr422pl16_to_rgb565_h.h"   
#include "scale_horz_h.h" 
#include "scale_vert_h.h" 
#include "pix_expand_h.h" 
 
#define  f4_4_4 2 
#define  f4_2_2 1 
#define  f4_2_0 0  
#pragma  CODE_SECTION(scale_2d_ycrcb,".opt_gp"); 
/* ========================================================================= */ 
void scale_2d_ycrcb ( 
  int     format,           /* 4:4:4 == 2, 4:2:2 == 1, 4:2:0 == 0            */ 
 
  int     p_h,              /* numerator for horizontal scale factor p/q     */ 
  int     q_h,              /* denominator for horizontal scale factor p/q   */ 
 
  short * hpatch,           /* stride table for cr/b horizontal scaling jumps*/ 
  short * hh,               /* pntr to horz filter bank                      */ 
  int     l_hh,             /* number of taps in each horz filter            */ 
  int     n_hh,             /* number of filters in horz filter bank         */ 
 
  int     p_v,              /* numerator for vertical scale factor p/q       */ 
  int     q_v,              /* denominator for vertical scale factor p/q     */ 
 
  short * vpatch,           /* stride table for vertical scaling jumps       */ 
  short * hv,               /* pntr to vert filter bank                      */ 
  int     l_hv,             /* number of taps in each vert filter            */ 
  int     n_hv,             /* number of filters in vert filter bank         */ 
 
  int     in_pic_height,    /* height of picture in Pixels multiple of 2     */ 
  int     in_pic_width,     /* width  of picture in Pixels multiple of 8     */ 
 
  int     display_height,   /* height of picture in Pixels multiple of 2     */ 
  int     display_width,    /* width  of picture in Pixels multiple of 8     */ 
 
  unsigned char * image_y,  /* pntr to input image planar y values           */ 
  unsigned char * image_cr, /* pntr to input image planar cr values          */ 
  unsigned char * image_cb, /* pntr to input image planar cb values          */ 
  void  * L2scratch_pad,    /* temp space for internal buffers and stores    */ 
                            /* aligned to a double boundary                  */ 
  unsigned char * image_rgb)/* pntr to output image, scaled in color         */ 
/* ========================================================================= */ 
{ 
  short * vpatchy;     /* copy of patch table for y scaling */ 
  short * vpatchc;     /* copy of patch table for cr and cb scaling */ 
  short * t_filt;      /* pntr to temporary buffer length l_hh */ 
 
  int y_width;         /* width of raw y plane image */ 
  int y_height;        /* height of raw y plane image */ 
  int in_y_width;      /* width of data to be scaled, sub section of y buffer */ 
  int in_y_offset;     /* offset from left edge of raw Y plane buffer */ 
  int in_y_offset_1;   /* offset from left edge of raw Y plane buffer */ 
  int y_rd_index;      /* index into the raw Y buffer */ 
  int y_filter_no;     /* the current vertical filter being used for the  
                          vertical filtering of Y */ 
 
  int out_y_width;     /* width of data scaled Y buffer */ 
  int main_out_y_width; /* width of scaled chroma image */ 
  int total_out_y_width;/* width of scaled chroma image */ 
  int hylines;         /* number of horizontally scaled y lines  
                          put in y r(otating)b(uffer) */ 
  int y_rb_write_line; /* index into y r(otating)b(uffer) */ 
  int y_rb_read_line;  /* index into y r(otating)b(uffer) for removal */ 
 
  int out_color_offset;/* offset into the display buffer depends on display  
                          and scaled picture size */ 
 
  int in_vert_offset;  /* generic intermediate offset to top of scaled image */ 
 
  int in_c_offset;     /* offset to (cropped) corner of raw chroma images */ 
  int in_c_offset_1;   /* offset to (cropped) corner of raw chroma images */ 
  int c_width;         /* width of chroma raw image */ 
  int in_c_width;      /* width of cropped raw chroma image */ 
  int out_c_width;     /* width of scaled chroma image */ 
  int c_rd_index;      /* index into the raw C buffers */ 
  int hclines;         /* number of horizontally scaled c lines  
                          put in c r(otating)b(uffer) */ 
  int c_rb_write_line; /* index into c r(otating)b(uffer) for addition*/ 
 
  int c_rb_read_line;  /* index into c r(otating)b(uffer) for removal */ 
  int c_filter_no;     /* the current vertical filter being used for the  
                          vertical filtering of C */ 
 
  int max_clines;      /* number of color lines needed by colr format 
                          e.g. 444 needs 2 lines, 420 needs only 1 */ 
 
  int display_lines;   /* number of color lines written to display so far */ 
  int display_pixels;  /* number of color pixels written to display so far */ 
  int window_height;   /* number of lines to display */ 
 
  int color;           /* number of color elements/pixel rgb=3, 4:2:2 = 1.5 */ 
  int color_shift;     /* divider for whether output is 422,420 or 444 */ 
 
  unsigned char *buf_y0;/*pointer to 1st pingpong buffer for y raw image data */ 
  int    in_y_buf_size;/* size of the input ping or pong buffer */ 
  short  *buf_y1;      /* pointer to 2nd buffer containing expanded y pixels  */ 
  short  *rot_buf_y2;  /* pntr to circular line buffer containing l_hv lines of  
                          horizontally scaled raw y data */ 
  short  *buf_y3;      /* pntr to output buffer 2 lines of vertically &  
                          horizontally scaled y data ready for combining  
                          into color pixels */ 
  short *y_filt_state; /* packed last filter number and input index */ 
 
  unsigned char *buf_cr0;/* pointer to 1st ping pong buffer for c raw image data */ 
  int    in_c_buf_size;/* size of the input ping or pong buffer */ 
  short  *buf_cr1;     /* pointer to 2nd buffer containing expanded c pixels  */ 
  short  *rot_buf_cr2; /* pntr to circular line buffer containing l_hv lines of 
                          horizontally scaled raw c data */ 
  short  *buf_cr3;     /* pntr to buffer of 2 lines of vertically and horizontally 
                          scaled c data ready for combining into color pixels */ 
 
  unsigned char *buf_cb0;/* pointer to 1st ping pong buffer for c raw image data */ 
  short  *buf_cb1;     /* pntr to 2nd buffer containing expanded c pixels */ 
  short  *rot_buf_cb2; /* pntr to circular line buffer containing l_hv lines of 
                          horizontally scaled raw c data */ 
  short  *buf_cb3;     /* pntr to buffer of 2 lines of vertically & horizontally 
                          scaled c data ready for combining into color pixels */ 
  short  *c_filt_state;/* packed last filter number and input index */ 
 
  int    y_scaled;     /*num. of finished y lines in output buffer pre color*/ 
  int    c_scaled;     /*num. of finished c clines in output buffer pre color*/ 
  unsigned char *buf_rgb; /* pntr to ping pong buffer containing 2 lines of  
                             color pixels formed from the y,cr,cb data */ 
 
  short cmatrix[5] = {0x2543, 0x3313, -0x0C8A, -0x1A04, 0x408D};                              
  unsigned char *gp_store; /* pointer to the scratchpad space */ 
 
  int    store_index;      /* index into main scratch pad for buffers */ 
  int    divider;          /* number of strips needed for cache allocation */ 
  int    i;                /* current strip being worked on */ 
 
  int ping_in_y, pong_in_y;/* tags for buffers, ping or pong */ 
  int ping_in_c, pong_in_c;/* tags for buffers, ping or pong */ 
  unsigned int  in_id_y ;  /* input EDMA xfer id             */ 
  unsigned int  in_id_cr;  /* input EDMA xfer id             */ 
  unsigned int  in_id_cb;  /* input EDMA xfer id             */ 
  int           first;     /* tag for out buffer             */ 
  unsigned int  out_id0;   /* output EDMA xfer id            */ 
  int ping_out, pong_out;  /* tags for buffers, ping or pong */ 
  int write_buf; 
 
   
/* ========================= set up dimensions ============================== */ 
/* and align all offsets to 8 element boundaries and counts to mutliples  */ 
/* of 8 elements */ 
 
  color = 2;               /* each color is now 2 bytes, 565 */   
 
  y_width = in_pic_width; 
  y_height = in_pic_height; 
   
  /* the scaled output width is bigger than the display ? */ 
  if (p_h * in_pic_width >=  q_h * display_width) 
  { 
     in_y_offset = (y_width * p_h  - q_h * display_width)/(2*p_h); 
     in_y_width  = (q_h * display_width)/p_h; 
     out_y_width = display_width; 
     out_color_offset =  0; 
  /* the scaled image width is smaller than the display ? */ 
  } else 
  { 
     in_y_offset = 0; 
     in_y_width  = y_width; 
     out_y_width = (y_width * p_h) / q_h; 
     out_color_offset = color*((display_width * q_h - p_h * y_width)/(q_h*2)); 
  } 
 
  /* the scaled output is bigger than the display ? */ 
  if (p_v * in_pic_height >= q_v * display_height) 
  { 
     in_vert_offset= y_width*((y_height * p_v  - q_v * display_height)/(2*p_v)); 
     window_height = display_height; 
  /* the scaled image is smaller than the display ? */ 
  } else 
  { 
     in_vert_offset = 0; 
     window_height = (in_pic_height * p_v)/q_v; 
     window_height = (window_height + 2) & ~ 1; 
     out_color_offset += color * display_width * 
                         ((display_height * q_v - p_v * y_height)/(2*q_v)); 
  }                   
  out_color_offset &= ~3;  
 
  if (format == f4_4_4) 
  { 
     in_c_offset = in_y_offset + in_vert_offset; 
     c_width     = y_width; 
     in_c_width  = in_y_width; 
     out_c_width = out_y_width; 
     max_clines = 2; 
     color_shift = 0; 
  } else if (format == f4_2_2) 
  { 
     c_width = y_width >> 1; 
     in_c_offset = in_y_offset ? (c_width*p_h - q_h*(display_width>>1))/(2*p_h):0; 
     in_c_offset += (in_vert_offset >> 1); 
     in_c_width = in_y_width >> 1; 
     out_c_width = out_y_width >> 1; 
     max_clines = 2; 
     color_shift = 1; 
  } else /* format == f4_2_0 */ 
  { 
     c_width = y_width >> 1; 
     in_c_offset = in_y_offset ? (c_width*p_h - q_h*(display_width>>1))/(4*p_h) : 0; 
     in_c_offset += (in_vert_offset >> 2); 
     in_c_width = in_y_width >> 1; 
     out_c_width = out_y_width >> 1; 
     max_clines = 1; 
     color_shift = 1; 
  } 
  in_y_offset += in_vert_offset; 
  in_y_offset_1 = in_y_offset + (hpatch[1] >> 1); 
  in_c_offset_1 = in_c_offset + (hpatch[1] >> 1); 
 
/* ========================= set up buffers ============================= */ 
  gp_store = (unsigned char *) L2scratch_pad; 
 
  /* scratch pad is already set up to be aligned on a double boundary */ 
  /* Y buffers */ 
  store_index = 16; 
  store_index += 2*l_hh + 16 + 32; 
  store_index += 2*((in_y_width & ~7) + 16); 
  store_index += sizeof(short)*((in_y_width & ~7) + 32); /* */ 
  store_index += sizeof(short)*((out_y_width & ~7) + 16)*l_hv; 
  store_index += 2*sizeof(short)*((out_y_width & ~7) + 16); 
  store_index += 2*((in_c_width & ~7) + 16); 
  store_index += sizeof(short)*((in_c_width & ~7) + 32); /* */ 
  store_index += sizeof(short)*((out_c_width & ~7) + 16) *l_hv; 
  store_index += 2*sizeof(short)*((out_c_width & ~7) + 16); 
  /* Cb buffers */ 
  store_index += 2*((in_c_width & ~7) + 16); 
  store_index += sizeof(short)*((in_c_width & ~7) + 32); /* */ 
  store_index += sizeof(short)*((out_c_width & ~7) + 16) *l_hv; 
  store_index += 2*sizeof(short)*((out_c_width & ~7) + 16); 
 
  /* total data required is now known */ 
  divider = (store_index + 24*1024)/(16*1024); 
 
  main_out_y_width = out_y_width ;  
  in_y_width /= divider; 
  out_y_width /= divider; 
  in_c_width /= divider; 
 
  /* round up to nearest end */ 
  total_out_y_width = 0; 
  out_y_width += 8;   
  out_y_width &= ~7; 
  out_c_width = out_y_width >> color_shift; 
 
  /* Y buffers */ 
  store_index = 0; 
  t_filt = (short *) &gp_store[0]; 
  store_index += 2*l_hh + 16; 
  y_filt_state = (short *) &gp_store[store_index]; 
  store_index += 16; 
  c_filt_state = (short *) &gp_store[store_index]; 
  store_index += 16; 
 
  buf_y0 = &gp_store[store_index];  
  store_index += 2*((in_y_width & ~7) + 32); /* 16 */ 
  store_index += 64; 
 
  buf_y1 = (short *) &gp_store[store_index]; 
  store_index += sizeof(short)*((in_y_width & ~7) + 16); 
  store_index += 64; 
 
  rot_buf_y2 = (short *) &gp_store[store_index]; 
  store_index += sizeof(short)*((out_y_width & ~7) + 16)*l_hv; 
 
  buf_y3 = (short *) &gp_store[store_index]; 
  store_index += 2*sizeof(short)*((out_y_width & ~7) + 16); 
  
  /* Cr buffers */ 
  buf_cr0 = &gp_store[store_index];  
  store_index += 2*((in_c_width & ~7) + 32); /* 16 */ 
  store_index += 64; 
 
  buf_cr1 = (short *) &gp_store[store_index]; 
  store_index += sizeof(short)*((in_c_width & ~7) + 16); 
  store_index += 64; 
 
  rot_buf_cr2 = (short *) &gp_store[store_index]; 
  store_index += sizeof(short)*((out_c_width & ~7) + 16) *l_hv; 
 
  buf_cr3 = (short *) &gp_store[store_index]; 
  store_index += 2*sizeof(short)*((out_c_width & ~7) + 16); 
 
  /* Cb buffers */ 
  buf_cb0 = &gp_store[store_index];  
  store_index += 2*((in_c_width & ~7) + 32); /* 16 */ 
  store_index += 64; 
 
  buf_cb1 = (short *) &gp_store[store_index]; 
  store_index += sizeof(short)*((in_c_width & ~7) + 16); 
  store_index += 64; 
 
  rot_buf_cb2 = (short *) &gp_store[store_index]; 
  store_index += sizeof(short)*((out_c_width & ~7) + 16) *l_hv; 
 
  buf_cb3 = (short *) &gp_store[store_index]; 
  store_index += 2*sizeof(short)*((out_c_width & ~7) + 16); 
         
  /* final color buffer */ 
  /* this is a double buffer holding a total of 4 lines of color data */ 
  buf_rgb = &gp_store[store_index]; 
 
 
/* ========================= main scaling loop =========================== */ 
  /* initialise initial filter count scaled by filt length */ 
 
  y_filt_state[0] = c_filt_state[0] = l_hh*n_hh; 
  y_filt_state[1] = c_filt_state[1] = 0; 
  y_filt_state[2] = c_filt_state[2] = 0; 
  y_filt_state[3] = c_filt_state[3] = 0; 
  y_filt_state[4] = c_filt_state[4] = 0; 
  y_filt_state[5] = c_filt_state[5] = 0; 
 
  for (i=0; i < divider; i++) 
  { 
   total_out_y_width += out_y_width; 
   if (i == divider - 1) 
   { 
     out_y_width += main_out_y_width - total_out_y_width; 
     out_y_width &= ~3; 
     out_c_width = out_y_width >> color_shift; 
   } 
   vpatchy = vpatch; 
   vpatchc = vpatch; 
   y_rd_index = c_rd_index = 0; 
   y_rb_read_line = c_rb_read_line = 0; 
   display_lines = 0; 
   display_pixels = 0; 
   hylines = hclines = 0; 
   y_rb_write_line = c_rb_write_line = 0; 
   first = 1; 
 
   y_filter_no = c_filter_no = 0; 
   y_scaled = c_scaled = 0; 
 
   in_y_offset += y_filt_state[2]; 
   in_y_offset_1 += y_filt_state[3]; 
 
   in_c_offset += c_filt_state[2]; 
   in_c_offset_1 += c_filt_state[3]; 
 
   in_y_buf_size = ((in_y_width & ~15) + 48);  /* 7, 32 */ 
   in_c_buf_size = ((in_c_width & ~15) + 48);  /* 7, 32 */ 
 //  in_y_buf_size = (in_y_width+16) & ~15;  /* 7, 32 */ 
 //  in_c_buf_size = (in_c_width+16) & ~15;  /* 7, 32 */ 
  
   
  /* 1st dma transfer to get the ball rolling */ 
   ping_in_y = 1; pong_in_y = 0; 
   ping_in_c = 1; pong_in_c = 0; 
 
   in_id_y = DAT_copy(&image_y[(in_y_offset & ~7) + (y_rd_index & ~7)], 
                 &buf_y0[pong_in_y * in_y_buf_size], 
                 (unsigned short) in_y_buf_size);    
 
   in_id_cr = DAT_copy(&image_cr[(in_c_offset & ~7) + (c_rd_index & ~7)], 
                 &buf_cr0[pong_in_c * in_c_buf_size], 
                 (unsigned short) in_c_buf_size);    
 
   in_id_cb = DAT_copy(&image_cb[(in_c_offset & ~7) + (c_rd_index & ~7)], 
                 &buf_cb0[pong_in_c * in_c_buf_size], 
                 (unsigned short) in_c_buf_size);    
 
   ping_out = 0; 
   pong_out = 0; 
   write_buf = 0; 
    
 
   while(display_lines < window_height) 
   { 
     if (display_lines >= (window_height - 6)) break; //JS 
      
     if (hylines < l_hv) 
     {   
        /* wait on previous transfer */ 
        DAT_wait(in_id_y); 
        /* transfer data for next line */ 
        in_id_y = DAT_copy(&image_y[(in_y_offset & ~7) +  
                                    ((y_rd_index + y_width) & ~7)], 
                           &buf_y0[ping_in_y * in_y_buf_size], 
                           (unsigned short) in_y_buf_size);    
 
        pix_expand_asm(in_y_buf_size, 
                      &buf_y0[pong_in_y * in_y_buf_size], 
                      buf_y1); 
 
        hpatch[0] = 2*(in_y_offset & 7) + 2*(y_rd_index & 7); 
        hpatch[1] = 2*(in_y_offset_1 & 7) + 2*(y_rd_index & 7); 
                
        scale_horz_asm(buf_y1, 
                      in_y_width,  
                      &rot_buf_y2[((out_y_width & ~7)+8)*y_rb_write_line], 
                      out_y_width, 
                      hh, 
                      l_hh, 
                      n_hh, 
                      hpatch, y_filt_state); 
         
        pong_in_y ^= 1; ping_in_y ^= 1; 
        y_rd_index += y_width; 
        hylines += 1; 
        y_rb_write_line += 1; 
        if (y_rb_write_line == l_hv) y_rb_write_line = 0; 
     }   
      
     if (hclines < l_hv) 
     { 
        DAT_wait(in_id_cr); 
        in_id_cr = DAT_copy(&image_cr[(in_c_offset & ~7) + 
                                      ((c_rd_index + c_width) & ~7)], 
                            &buf_cr0[ping_in_c*in_c_buf_size], 
                            (unsigned short) in_c_buf_size);    
         
        DAT_wait(in_id_cb); 
        in_id_cb = DAT_copy(&image_cb[(in_c_offset & ~7) + 
                                      ((c_rd_index + c_width) & ~7)], 
                            &buf_cb0[ping_in_c*in_c_buf_size], 
                            (unsigned short) in_c_buf_size); 
         
        pix_expand_asm(in_c_buf_size, 
                      &buf_cr0[pong_in_c * in_c_buf_size], 
                      buf_cr1); 
        pix_expand_asm(in_c_buf_size, 
                      &buf_cb0[pong_in_c * in_c_buf_size], 
                      buf_cb1); 
         
        hpatch[0] = 2*(c_rd_index & 7) + 2*(in_c_offset & 7);       
        hpatch[1] = 2*(c_rd_index & 7) + 2*(in_c_offset_1 & 7);       
         
        scale_horz_asm(buf_cr1, 
                       in_c_width,  
                       &rot_buf_cr2[((out_c_width & ~7)+8)*c_rb_write_line], 
                       out_c_width, 
                       hh, 
                       l_hh, 
                       n_hh, 
                       hpatch, c_filt_state); 
                         
        scale_horz_asm(buf_cb1, 
                       in_c_width,  
                       &rot_buf_cb2[((out_c_width & ~7)+8)*c_rb_write_line], 
                       out_c_width, 
                       hh, 
                       l_hh, 
                       n_hh, 
                       hpatch, c_filt_state); 
                           
        pong_in_c ^= 1; ping_in_c ^= 1; 
        hclines += 1; 
        c_rd_index += c_width; 
 
        c_rb_write_line += 1; 
        if (c_rb_write_line == l_hv) c_rb_write_line = 0; 
     } 
      
 
     if (hylines >= l_hv) /* enough lines for vertical y filtering? */ 
     { 
    	scale_vert_asm(rot_buf_y2, 
                      &buf_y3[y_scaled*((out_y_width & ~7)+8)], 
                      (out_y_width & ~7)+8, 
                      &hv[y_filter_no*l_hv], 
                      t_filt, 
                      l_hv, 
                      y_rb_read_line); 
 
        y_scaled += 1; 
        y_rb_read_line += ((int) (vpatchy[0])); 
        y_rb_read_line = y_rb_read_line % l_hv; 
        hylines -= ((int) (vpatchy[0])); 
 
        vpatchy++; 
        y_filter_no++; 
        if (y_filter_no == n_hv) vpatchy = vpatch; 
        if (y_filter_no == n_hv) y_filter_no = 0; 
     } 
 
     /* enough lines for vertical chroma filtering? */ 
     if (hclines >= l_hv && c_scaled < max_clines)   
     { 
    	scale_vert_asm(rot_buf_cb2, 
                      &buf_cb3[c_scaled*((out_c_width & ~7)+8)], 
                      (out_c_width & ~7) + 8, 
                      &hv[c_filter_no*l_hv], 
                      t_filt, 
                      l_hv, 
                      c_rb_read_line); 
 
    	scale_vert_asm(rot_buf_cr2, 
                      &buf_cr3[c_scaled*((out_c_width & ~7)+8)], 
                      (out_c_width & ~7) + 8, 
                      &hv[c_filter_no*l_hv], 
                      t_filt, 
                      l_hv, 
                      c_rb_read_line); 
        c_scaled += 1; 
 
        c_rb_read_line += ((int) (vpatchc[0])); 
        c_rb_read_line = c_rb_read_line % l_hv; 
        hclines -= ((int) (vpatchc[0])); 
 
	    vpatchc++; 
        c_filter_no++; 
        if (c_filter_no >= n_hv) vpatchc = vpatch; 
        if (c_filter_no >= n_hv) c_filter_no = 0; 
     } 
 
   /* color conversion can only occur when hylines >= l_hv and 
      hcrlines >= l_hv but depending on the format c*lines should be reused */ 
 
     if (format != f4_2_0 && y_scaled == 2 && c_scaled == 2) 
     {   
        ycbcr422pl16_to_rgb565_asm(cmatrix, 
                &buf_y3[0], 
                &buf_cr3[0], 
                &buf_cb3[0], 
                (unsigned short *) &buf_rgb[2*ping_out*color*out_y_width], 
                out_y_width); /* in L2 */ 
        ycbcr422pl16_to_rgb565_asm(cmatrix, 
                &buf_y3 [(out_y_width & ~7) + 8], 
                &buf_cr3[(out_c_width & ~7) + 8], 
                &buf_cb3[(out_c_width & ~7) + 8], 
                (unsigned short *) &buf_rgb[(1+2*ping_out)*color*out_y_width], 
                out_y_width); /*<-in L2 */ 
         ping_out ^= 1; 
     } 
     /* this is done to fix the anomally of red being  
        swapped for blue chrom - the api say ycbcr, I say it 
        is ycrcb as is the convention */ 
         
     if (format == f4_2_0 && y_scaled == 2 && c_scaled == 1) 
     {  
        ycbcr422pl16_to_rgb565_asm(cmatrix, 
                &buf_y3[0], 
                &buf_cr3[0], &buf_cb3[0], 
                (unsigned short *) &buf_rgb[2*ping_out*color*out_y_width], 
                out_y_width); /* in L2 */ 
        ycbcr422pl16_to_rgb565_asm(cmatrix, 
                &buf_y3 [(out_y_width & ~7) + 8], 
                &buf_cr3[0], &buf_cb3[0], 
                (unsigned short *) &buf_rgb[(1 + 2*ping_out)*color*out_y_width], 
                out_y_width); /*<-in L2 */ 
 
        ping_out ^= 1; 
     } 
 
     /* this doesn't need to be double buffered as it writes out */ 
     /* so it has no dependency as in the input buffer case */ 
     /* while only a 1/2 fram is being computed at once the 
        output lines are wrtten into every other line so 
        it increments 2lines for each 1 */ 
 
     /* first time around we do not want this to occur but everything else 
        is ok to keep updateing, only the DAT_copy needs delaying */ 
 
     if (write_buf) 
     { 
        if (!first) DAT_wait(out_id0);    
        out_id0 = DAT_copy(&buf_rgb[2*pong_out*color*out_y_width], 
                           &image_rgb[out_color_offset+display_pixels*color], 
                           color*out_y_width); 
        display_pixels += display_width; 
 
        out_id0 = DAT_copy(&buf_rgb[(1+2*pong_out)*color*out_y_width],  
                           &image_rgb[out_color_offset+display_pixels*color], 
                           color*out_y_width); 
        display_pixels += display_width; 
        first = 0; 
        pong_out ^= 1; 
     } 
 
     if (y_scaled == 2 && c_scaled > 0) 
     {         
        /* this becomes 2 display_pixels */ 
        display_lines += 2; 
        y_scaled = 0; c_scaled = 0; 
        write_buf = 1; 
     } else { 
        write_buf = 0; 
     } 
 
     
  }  /* end while */ 
  if (write_buf) 
  { 
    DAT_wait(out_id0);    
    out_id0 = DAT_copy(&buf_rgb[2*pong_out*color*out_y_width], 
                       &image_rgb[out_color_offset+display_pixels*color], 
                       color*out_y_width); 
    display_pixels += display_width; 
 
    out_id0 = DAT_copy(&buf_rgb[(1+2*pong_out)*color*out_y_width],  
                       &image_rgb[out_color_offset+display_pixels*color], 
                       color*out_y_width); 
    display_pixels += display_width; 
  } 
 
  /* clean up at end of for loop for each strip */ 
  DAT_wait(in_id_cr); 
  DAT_wait(in_id_cb); 
  DAT_wait(in_id_y); 
  DAT_wait(out_id0); 
  out_color_offset += color*out_y_width; 
 
  y_filt_state[0] = y_filt_state[4];   /* y filter count */ 
  c_filt_state[0] = c_filt_state[4];   /* c filter count */  
 
 }/* end for */                                           
}  
 
 
/* ========================================================================= */ 
/*              Copyright (C) 2000 Texas Instruments Incorporated.           */ 
/*                              All Rights Reserved                          */ 
/* ========================================================================= */