www.pudn.com > scaling.rar > scale_2d_yc_doub_buf.c
/* ========================================================================= */ /* Copyright (C) 2000 Texas Instruments Incorporated. */ /* All Rights Reserved */ /* ========================================================================= */ /* The code scales 3 planes of y, Cr, Cb data in varying combinations */ /* where Y denotes luma, Cr is red chrominance and Cb is blue chrominance */ /* NOTES */ /* When the phrase "pixels" is used this measn a color pixel, this has */ /* a color combination of 4:2:2, 4:4:4 or 4:2:0 */ /* for 4:4:4 there are 3 identical coloe spaces all the same size */ /* for 4:2:2 there planes are the same height but only half as wide */ /* for 4:2:0 the planes are 1/2 the width and 1/2 the height */ /* */ /* The scaler will automatically filter output data only within the display */ /* if it is larger than the display it will write to the boundary, if smaller*/ /* it will centre the image in the middle. */ /* */ /* This function can be applied to R, G, B data also R->Y, G->Cr, B->Cb */ /* ========================================================================= */ /* Copyright (C) 2000 Texas Instruments Incorporated. */ /* All Rights Reserved */ /* ========================================================================= */ #include#include #include "ycbcr422pl16_to_rgb565_h.h" #include "scale_horz_h.h" #include "scale_vert_h.h" #include "pix_expand_h.h" #define f4_4_4 2 #define f4_2_2 1 #define f4_2_0 0 #pragma CODE_SECTION(scale_2d_ycrcb,".opt_gp"); /* ========================================================================= */ void scale_2d_ycrcb ( int format, /* 4:4:4 == 2, 4:2:2 == 1, 4:2:0 == 0 */ int p_h, /* numerator for horizontal scale factor p/q */ int q_h, /* denominator for horizontal scale factor p/q */ short * hpatch, /* stride table for cr/b horizontal scaling jumps*/ short * hh, /* pntr to horz filter bank */ int l_hh, /* number of taps in each horz filter */ int n_hh, /* number of filters in horz filter bank */ int p_v, /* numerator for vertical scale factor p/q */ int q_v, /* denominator for vertical scale factor p/q */ short * vpatch, /* stride table for vertical scaling jumps */ short * hv, /* pntr to vert filter bank */ int l_hv, /* number of taps in each vert filter */ int n_hv, /* number of filters in vert filter bank */ int in_pic_height, /* height of picture in Pixels multiple of 2 */ int in_pic_width, /* width of picture in Pixels multiple of 8 */ int display_height, /* height of picture in Pixels multiple of 2 */ int display_width, /* width of picture in Pixels multiple of 8 */ unsigned char * image_y, /* pntr to input image planar y values */ unsigned char * image_cr, /* pntr to input image planar cr values */ unsigned char * image_cb, /* pntr to input image planar cb values */ void * L2scratch_pad, /* temp space for internal buffers and stores */ /* aligned to a double boundary */ unsigned char * image_rgb)/* pntr to output image, scaled in color */ /* ========================================================================= */ { short * vpatchy; /* copy of patch table for y scaling */ short * vpatchc; /* copy of patch table for cr and cb scaling */ short * t_filt; /* pntr to temporary buffer length l_hh */ int y_width; /* width of raw y plane image */ int y_height; /* height of raw y plane image */ int in_y_width; /* width of data to be scaled, sub section of y buffer */ int in_y_offset; /* offset from left edge of raw Y plane buffer */ int in_y_offset_1; /* offset from left edge of raw Y plane buffer */ int y_rd_index; /* index into the raw Y buffer */ int y_filter_no; /* the current vertical filter being used for the vertical filtering of Y */ int out_y_width; /* width of data scaled Y buffer */ int main_out_y_width; /* width of scaled chroma image */ int total_out_y_width;/* width of scaled chroma image */ int hylines; /* number of horizontally scaled y lines put in y r(otating)b(uffer) */ int y_rb_write_line; /* index into y r(otating)b(uffer) */ int y_rb_read_line; /* index into y r(otating)b(uffer) for removal */ int out_color_offset;/* offset into the display buffer depends on display and scaled picture size */ int in_vert_offset; /* generic intermediate offset to top of scaled image */ int in_c_offset; /* offset to (cropped) corner of raw chroma images */ int in_c_offset_1; /* offset to (cropped) corner of raw chroma images */ int c_width; /* width of chroma raw image */ int in_c_width; /* width of cropped raw chroma image */ int out_c_width; /* width of scaled chroma image */ int c_rd_index; /* index into the raw C buffers */ int hclines; /* number of horizontally scaled c lines put in c r(otating)b(uffer) */ int c_rb_write_line; /* index into c r(otating)b(uffer) for addition*/ int c_rb_read_line; /* index into c r(otating)b(uffer) for removal */ int c_filter_no; /* the current vertical filter being used for the vertical filtering of C */ int max_clines; /* number of color lines needed by colr format e.g. 444 needs 2 lines, 420 needs only 1 */ int display_lines; /* number of color lines written to display so far */ int display_pixels; /* number of color pixels written to display so far */ int window_height; /* number of lines to display */ int color; /* number of color elements/pixel rgb=3, 4:2:2 = 1.5 */ int color_shift; /* divider for whether output is 422,420 or 444 */ unsigned char *buf_y0;/*pointer to 1st pingpong buffer for y raw image data */ int in_y_buf_size;/* size of the input ping or pong buffer */ short *buf_y1; /* pointer to 2nd buffer containing expanded y pixels */ short *rot_buf_y2; /* pntr to circular line buffer containing l_hv lines of horizontally scaled raw y data */ short *buf_y3; /* pntr to output buffer 2 lines of vertically & horizontally scaled y data ready for combining into color pixels */ short *y_filt_state; /* packed last filter number and input index */ unsigned char *buf_cr0;/* pointer to 1st ping pong buffer for c raw image data */ int in_c_buf_size;/* size of the input ping or pong buffer */ short *buf_cr1; /* pointer to 2nd buffer containing expanded c pixels */ short *rot_buf_cr2; /* pntr to circular line buffer containing l_hv lines of horizontally scaled raw c data */ short *buf_cr3; /* pntr to buffer of 2 lines of vertically and horizontally scaled c data ready for combining into color pixels */ unsigned char *buf_cb0;/* pointer to 1st ping pong buffer for c raw image data */ short *buf_cb1; /* pntr to 2nd buffer containing expanded c pixels */ short *rot_buf_cb2; /* pntr to circular line buffer containing l_hv lines of horizontally scaled raw c data */ short *buf_cb3; /* pntr to buffer of 2 lines of vertically & horizontally scaled c data ready for combining into color pixels */ short *c_filt_state;/* packed last filter number and input index */ int y_scaled; /*num. of finished y lines in output buffer pre color*/ int c_scaled; /*num. of finished c clines in output buffer pre color*/ unsigned char *buf_rgb; /* pntr to ping pong buffer containing 2 lines of color pixels formed from the y,cr,cb data */ short cmatrix[5] = {0x2543, 0x3313, -0x0C8A, -0x1A04, 0x408D}; unsigned char *gp_store; /* pointer to the scratchpad space */ int store_index; /* index into main scratch pad for buffers */ int divider; /* number of strips needed for cache allocation */ int i; /* current strip being worked on */ int ping_in_y, pong_in_y;/* tags for buffers, ping or pong */ int ping_in_c, pong_in_c;/* tags for buffers, ping or pong */ unsigned int in_id_y ; /* input EDMA xfer id */ unsigned int in_id_cr; /* input EDMA xfer id */ unsigned int in_id_cb; /* input EDMA xfer id */ int first; /* tag for out buffer */ unsigned int out_id0; /* output EDMA xfer id */ int ping_out, pong_out; /* tags for buffers, ping or pong */ int write_buf; /* ========================= set up dimensions ============================== */ /* and align all offsets to 8 element boundaries and counts to mutliples */ /* of 8 elements */ color = 2; /* each color is now 2 bytes, 565 */ y_width = in_pic_width; y_height = in_pic_height; /* the scaled output width is bigger than the display ? */ if (p_h * in_pic_width >= q_h * display_width) { in_y_offset = (y_width * p_h - q_h * display_width)/(2*p_h); in_y_width = (q_h * display_width)/p_h; out_y_width = display_width; out_color_offset = 0; /* the scaled image width is smaller than the display ? */ } else { in_y_offset = 0; in_y_width = y_width; out_y_width = (y_width * p_h) / q_h; out_color_offset = color*((display_width * q_h - p_h * y_width)/(q_h*2)); } /* the scaled output is bigger than the display ? */ if (p_v * in_pic_height >= q_v * display_height) { in_vert_offset= y_width*((y_height * p_v - q_v * display_height)/(2*p_v)); window_height = display_height; /* the scaled image is smaller than the display ? */ } else { in_vert_offset = 0; window_height = (in_pic_height * p_v)/q_v; window_height = (window_height + 2) & ~ 1; out_color_offset += color * display_width * ((display_height * q_v - p_v * y_height)/(2*q_v)); } out_color_offset &= ~3; if (format == f4_4_4) { in_c_offset = in_y_offset + in_vert_offset; c_width = y_width; in_c_width = in_y_width; out_c_width = out_y_width; max_clines = 2; color_shift = 0; } else if (format == f4_2_2) { c_width = y_width >> 1; in_c_offset = in_y_offset ? (c_width*p_h - q_h*(display_width>>1))/(2*p_h):0; in_c_offset += (in_vert_offset >> 1); in_c_width = in_y_width >> 1; out_c_width = out_y_width >> 1; max_clines = 2; color_shift = 1; } else /* format == f4_2_0 */ { c_width = y_width >> 1; in_c_offset = in_y_offset ? (c_width*p_h - q_h*(display_width>>1))/(4*p_h) : 0; in_c_offset += (in_vert_offset >> 2); in_c_width = in_y_width >> 1; out_c_width = out_y_width >> 1; max_clines = 1; color_shift = 1; } in_y_offset += in_vert_offset; in_y_offset_1 = in_y_offset + (hpatch[1] >> 1); in_c_offset_1 = in_c_offset + (hpatch[1] >> 1); /* ========================= set up buffers ============================= */ gp_store = (unsigned char *) L2scratch_pad; /* scratch pad is already set up to be aligned on a double boundary */ /* Y buffers */ store_index = 16; store_index += 2*l_hh + 16 + 32; store_index += 2*((in_y_width & ~7) + 16); store_index += sizeof(short)*((in_y_width & ~7) + 32); /* */ store_index += sizeof(short)*((out_y_width & ~7) + 16)*l_hv; store_index += 2*sizeof(short)*((out_y_width & ~7) + 16); store_index += 2*((in_c_width & ~7) + 16); store_index += sizeof(short)*((in_c_width & ~7) + 32); /* */ store_index += sizeof(short)*((out_c_width & ~7) + 16) *l_hv; store_index += 2*sizeof(short)*((out_c_width & ~7) + 16); /* Cb buffers */ store_index += 2*((in_c_width & ~7) + 16); store_index += sizeof(short)*((in_c_width & ~7) + 32); /* */ store_index += sizeof(short)*((out_c_width & ~7) + 16) *l_hv; store_index += 2*sizeof(short)*((out_c_width & ~7) + 16); /* total data required is now known */ divider = (store_index + 24*1024)/(16*1024); main_out_y_width = out_y_width ; in_y_width /= divider; out_y_width /= divider; in_c_width /= divider; /* round up to nearest end */ total_out_y_width = 0; out_y_width += 8; out_y_width &= ~7; out_c_width = out_y_width >> color_shift; /* Y buffers */ store_index = 0; t_filt = (short *) &gp_store[0]; store_index += 2*l_hh + 16; y_filt_state = (short *) &gp_store[store_index]; store_index += 16; c_filt_state = (short *) &gp_store[store_index]; store_index += 16; buf_y0 = &gp_store[store_index]; store_index += 2*((in_y_width & ~7) + 32); /* 16 */ store_index += 64; buf_y1 = (short *) &gp_store[store_index]; store_index += sizeof(short)*((in_y_width & ~7) + 16); store_index += 64; rot_buf_y2 = (short *) &gp_store[store_index]; store_index += sizeof(short)*((out_y_width & ~7) + 16)*l_hv; buf_y3 = (short *) &gp_store[store_index]; store_index += 2*sizeof(short)*((out_y_width & ~7) + 16); /* Cr buffers */ buf_cr0 = &gp_store[store_index]; store_index += 2*((in_c_width & ~7) + 32); /* 16 */ store_index += 64; buf_cr1 = (short *) &gp_store[store_index]; store_index += sizeof(short)*((in_c_width & ~7) + 16); store_index += 64; rot_buf_cr2 = (short *) &gp_store[store_index]; store_index += sizeof(short)*((out_c_width & ~7) + 16) *l_hv; buf_cr3 = (short *) &gp_store[store_index]; store_index += 2*sizeof(short)*((out_c_width & ~7) + 16); /* Cb buffers */ buf_cb0 = &gp_store[store_index]; store_index += 2*((in_c_width & ~7) + 32); /* 16 */ store_index += 64; buf_cb1 = (short *) &gp_store[store_index]; store_index += sizeof(short)*((in_c_width & ~7) + 16); store_index += 64; rot_buf_cb2 = (short *) &gp_store[store_index]; store_index += sizeof(short)*((out_c_width & ~7) + 16) *l_hv; buf_cb3 = (short *) &gp_store[store_index]; store_index += 2*sizeof(short)*((out_c_width & ~7) + 16); /* final color buffer */ /* this is a double buffer holding a total of 4 lines of color data */ buf_rgb = &gp_store[store_index]; /* ========================= main scaling loop =========================== */ /* initialise initial filter count scaled by filt length */ y_filt_state[0] = c_filt_state[0] = l_hh*n_hh; y_filt_state[1] = c_filt_state[1] = 0; y_filt_state[2] = c_filt_state[2] = 0; y_filt_state[3] = c_filt_state[3] = 0; y_filt_state[4] = c_filt_state[4] = 0; y_filt_state[5] = c_filt_state[5] = 0; for (i=0; i < divider; i++) { total_out_y_width += out_y_width; if (i == divider - 1) { out_y_width += main_out_y_width - total_out_y_width; out_y_width &= ~3; out_c_width = out_y_width >> color_shift; } vpatchy = vpatch; vpatchc = vpatch; y_rd_index = c_rd_index = 0; y_rb_read_line = c_rb_read_line = 0; display_lines = 0; display_pixels = 0; hylines = hclines = 0; y_rb_write_line = c_rb_write_line = 0; first = 1; y_filter_no = c_filter_no = 0; y_scaled = c_scaled = 0; in_y_offset += y_filt_state[2]; in_y_offset_1 += y_filt_state[3]; in_c_offset += c_filt_state[2]; in_c_offset_1 += c_filt_state[3]; in_y_buf_size = ((in_y_width & ~15) + 48); /* 7, 32 */ in_c_buf_size = ((in_c_width & ~15) + 48); /* 7, 32 */ // in_y_buf_size = (in_y_width+16) & ~15; /* 7, 32 */ // in_c_buf_size = (in_c_width+16) & ~15; /* 7, 32 */ /* 1st dma transfer to get the ball rolling */ ping_in_y = 1; pong_in_y = 0; ping_in_c = 1; pong_in_c = 0; in_id_y = DAT_copy(&image_y[(in_y_offset & ~7) + (y_rd_index & ~7)], &buf_y0[pong_in_y * in_y_buf_size], (unsigned short) in_y_buf_size); in_id_cr = DAT_copy(&image_cr[(in_c_offset & ~7) + (c_rd_index & ~7)], &buf_cr0[pong_in_c * in_c_buf_size], (unsigned short) in_c_buf_size); in_id_cb = DAT_copy(&image_cb[(in_c_offset & ~7) + (c_rd_index & ~7)], &buf_cb0[pong_in_c * in_c_buf_size], (unsigned short) in_c_buf_size); ping_out = 0; pong_out = 0; write_buf = 0; while(display_lines < window_height) { if (display_lines >= (window_height - 6)) break; //JS if (hylines < l_hv) { /* wait on previous transfer */ DAT_wait(in_id_y); /* transfer data for next line */ in_id_y = DAT_copy(&image_y[(in_y_offset & ~7) + ((y_rd_index + y_width) & ~7)], &buf_y0[ping_in_y * in_y_buf_size], (unsigned short) in_y_buf_size); pix_expand_asm(in_y_buf_size, &buf_y0[pong_in_y * in_y_buf_size], buf_y1); hpatch[0] = 2*(in_y_offset & 7) + 2*(y_rd_index & 7); hpatch[1] = 2*(in_y_offset_1 & 7) + 2*(y_rd_index & 7); scale_horz_asm(buf_y1, in_y_width, &rot_buf_y2[((out_y_width & ~7)+8)*y_rb_write_line], out_y_width, hh, l_hh, n_hh, hpatch, y_filt_state); pong_in_y ^= 1; ping_in_y ^= 1; y_rd_index += y_width; hylines += 1; y_rb_write_line += 1; if (y_rb_write_line == l_hv) y_rb_write_line = 0; } if (hclines < l_hv) { DAT_wait(in_id_cr); in_id_cr = DAT_copy(&image_cr[(in_c_offset & ~7) + ((c_rd_index + c_width) & ~7)], &buf_cr0[ping_in_c*in_c_buf_size], (unsigned short) in_c_buf_size); DAT_wait(in_id_cb); in_id_cb = DAT_copy(&image_cb[(in_c_offset & ~7) + ((c_rd_index + c_width) & ~7)], &buf_cb0[ping_in_c*in_c_buf_size], (unsigned short) in_c_buf_size); pix_expand_asm(in_c_buf_size, &buf_cr0[pong_in_c * in_c_buf_size], buf_cr1); pix_expand_asm(in_c_buf_size, &buf_cb0[pong_in_c * in_c_buf_size], buf_cb1); hpatch[0] = 2*(c_rd_index & 7) + 2*(in_c_offset & 7); hpatch[1] = 2*(c_rd_index & 7) + 2*(in_c_offset_1 & 7); scale_horz_asm(buf_cr1, in_c_width, &rot_buf_cr2[((out_c_width & ~7)+8)*c_rb_write_line], out_c_width, hh, l_hh, n_hh, hpatch, c_filt_state); scale_horz_asm(buf_cb1, in_c_width, &rot_buf_cb2[((out_c_width & ~7)+8)*c_rb_write_line], out_c_width, hh, l_hh, n_hh, hpatch, c_filt_state); pong_in_c ^= 1; ping_in_c ^= 1; hclines += 1; c_rd_index += c_width; c_rb_write_line += 1; if (c_rb_write_line == l_hv) c_rb_write_line = 0; } if (hylines >= l_hv) /* enough lines for vertical y filtering? */ { scale_vert_asm(rot_buf_y2, &buf_y3[y_scaled*((out_y_width & ~7)+8)], (out_y_width & ~7)+8, &hv[y_filter_no*l_hv], t_filt, l_hv, y_rb_read_line); y_scaled += 1; y_rb_read_line += ((int) (vpatchy[0])); y_rb_read_line = y_rb_read_line % l_hv; hylines -= ((int) (vpatchy[0])); vpatchy++; y_filter_no++; if (y_filter_no == n_hv) vpatchy = vpatch; if (y_filter_no == n_hv) y_filter_no = 0; } /* enough lines for vertical chroma filtering? */ if (hclines >= l_hv && c_scaled < max_clines) { scale_vert_asm(rot_buf_cb2, &buf_cb3[c_scaled*((out_c_width & ~7)+8)], (out_c_width & ~7) + 8, &hv[c_filter_no*l_hv], t_filt, l_hv, c_rb_read_line); scale_vert_asm(rot_buf_cr2, &buf_cr3[c_scaled*((out_c_width & ~7)+8)], (out_c_width & ~7) + 8, &hv[c_filter_no*l_hv], t_filt, l_hv, c_rb_read_line); c_scaled += 1; c_rb_read_line += ((int) (vpatchc[0])); c_rb_read_line = c_rb_read_line % l_hv; hclines -= ((int) (vpatchc[0])); vpatchc++; c_filter_no++; if (c_filter_no >= n_hv) vpatchc = vpatch; if (c_filter_no >= n_hv) c_filter_no = 0; } /* color conversion can only occur when hylines >= l_hv and hcrlines >= l_hv but depending on the format c*lines should be reused */ if (format != f4_2_0 && y_scaled == 2 && c_scaled == 2) { ycbcr422pl16_to_rgb565_asm(cmatrix, &buf_y3[0], &buf_cr3[0], &buf_cb3[0], (unsigned short *) &buf_rgb[2*ping_out*color*out_y_width], out_y_width); /* in L2 */ ycbcr422pl16_to_rgb565_asm(cmatrix, &buf_y3 [(out_y_width & ~7) + 8], &buf_cr3[(out_c_width & ~7) + 8], &buf_cb3[(out_c_width & ~7) + 8], (unsigned short *) &buf_rgb[(1+2*ping_out)*color*out_y_width], out_y_width); /*<-in L2 */ ping_out ^= 1; } /* this is done to fix the anomally of red being swapped for blue chrom - the api say ycbcr, I say it is ycrcb as is the convention */ if (format == f4_2_0 && y_scaled == 2 && c_scaled == 1) { ycbcr422pl16_to_rgb565_asm(cmatrix, &buf_y3[0], &buf_cr3[0], &buf_cb3[0], (unsigned short *) &buf_rgb[2*ping_out*color*out_y_width], out_y_width); /* in L2 */ ycbcr422pl16_to_rgb565_asm(cmatrix, &buf_y3 [(out_y_width & ~7) + 8], &buf_cr3[0], &buf_cb3[0], (unsigned short *) &buf_rgb[(1 + 2*ping_out)*color*out_y_width], out_y_width); /*<-in L2 */ ping_out ^= 1; } /* this doesn't need to be double buffered as it writes out */ /* so it has no dependency as in the input buffer case */ /* while only a 1/2 fram is being computed at once the output lines are wrtten into every other line so it increments 2lines for each 1 */ /* first time around we do not want this to occur but everything else is ok to keep updateing, only the DAT_copy needs delaying */ if (write_buf) { if (!first) DAT_wait(out_id0); out_id0 = DAT_copy(&buf_rgb[2*pong_out*color*out_y_width], &image_rgb[out_color_offset+display_pixels*color], color*out_y_width); display_pixels += display_width; out_id0 = DAT_copy(&buf_rgb[(1+2*pong_out)*color*out_y_width], &image_rgb[out_color_offset+display_pixels*color], color*out_y_width); display_pixels += display_width; first = 0; pong_out ^= 1; } if (y_scaled == 2 && c_scaled > 0) { /* this becomes 2 display_pixels */ display_lines += 2; y_scaled = 0; c_scaled = 0; write_buf = 1; } else { write_buf = 0; } } /* end while */ if (write_buf) { DAT_wait(out_id0); out_id0 = DAT_copy(&buf_rgb[2*pong_out*color*out_y_width], &image_rgb[out_color_offset+display_pixels*color], color*out_y_width); display_pixels += display_width; out_id0 = DAT_copy(&buf_rgb[(1+2*pong_out)*color*out_y_width], &image_rgb[out_color_offset+display_pixels*color], color*out_y_width); display_pixels += display_width; } /* clean up at end of for loop for each strip */ DAT_wait(in_id_cr); DAT_wait(in_id_cb); DAT_wait(in_id_y); DAT_wait(out_id0); out_color_offset += color*out_y_width; y_filt_state[0] = y_filt_state[4]; /* y filter count */ c_filt_state[0] = c_filt_state[4]; /* c filter count */ }/* end for */ } /* ========================================================================= */ /* Copyright (C) 2000 Texas Instruments Incorporated. */ /* All Rights Reserved */ /* ========================================================================= */