diff options
author | Jesse Hall <jessehall@google.com> | 2012-07-09 11:27:07 -0700 |
---|---|---|
committer | Jesse Hall <jessehall@google.com> | 2012-07-22 00:35:08 -0700 |
commit | 9682c8870b8ff5e4ac2e4c70b759f791c6f38c1f (patch) | |
tree | ded6ee18c4e1f33df235e53615a6d65e2d64f4ef /distrib/sdl-1.2.15/src/video/SDL_yuv_sw.c | |
parent | 74b55003f76dbca96e4a26d98fe464081ca5341f (diff) | |
download | external_qemu-9682c8870b8ff5e4ac2e4c70b759f791c6f38c1f.zip external_qemu-9682c8870b8ff5e4ac2e4c70b759f791c6f38c1f.tar.gz external_qemu-9682c8870b8ff5e4ac2e4c70b759f791c6f38c1f.tar.bz2 |
Import SDL release-1.2.15
Change-Id: I505c4aea24325cad475f217db5589814b4c75dbf
Diffstat (limited to 'distrib/sdl-1.2.15/src/video/SDL_yuv_sw.c')
-rw-r--r-- | distrib/sdl-1.2.15/src/video/SDL_yuv_sw.c | 1299 |
1 files changed, 1299 insertions, 0 deletions
diff --git a/distrib/sdl-1.2.15/src/video/SDL_yuv_sw.c b/distrib/sdl-1.2.15/src/video/SDL_yuv_sw.c new file mode 100644 index 0000000..c555ce0 --- /dev/null +++ b/distrib/sdl-1.2.15/src/video/SDL_yuv_sw.c @@ -0,0 +1,1299 @@ +/* + SDL - Simple DirectMedia Layer + Copyright (C) 1997-2012 Sam Lantinga + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Sam Lantinga + slouken@libsdl.org +*/ +#include "SDL_config.h" + +/* This is the software implementation of the YUV video overlay support */ + +/* This code was derived from code carrying the following copyright notices: + + * Copyright (c) 1995 The Regents of the University of California. + * All rights reserved. + * + * Permission to use, copy, modify, and distribute this software and its + * documentation for any purpose, without fee, and without written agreement is + * hereby granted, provided that the above copyright notice and the following + * two paragraphs appear in all copies of this software. + * + * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT + * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF + * CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + + * Copyright (c) 1995 Erik Corry + * All rights reserved. + * + * Permission to use, copy, modify, and distribute this software and its + * documentation for any purpose, without fee, and without written agreement is + * hereby granted, provided that the above copyright notice and the following + * two paragraphs appear in all copies of this software. + * + * IN NO EVENT SHALL ERIK CORRY BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, + * SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF + * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF ERIK CORRY HAS BEEN ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ERIK CORRY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" + * BASIS, AND ERIK CORRY HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, + * UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + + * Portions of this software Copyright (c) 1995 Brown University. + * All rights reserved. + * + * Permission to use, copy, modify, and distribute this software and its + * documentation for any purpose, without fee, and without written agreement + * is hereby granted, provided that the above copyright notice and the + * following two paragraphs appear in all copies of this software. + * + * IN NO EVENT SHALL BROWN UNIVERSITY BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT + * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF BROWN + * UNIVERSITY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * BROWN UNIVERSITY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" + * BASIS, AND BROWN UNIVERSITY HAS NO OBLIGATION TO PROVIDE MAINTENANCE, + * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +#include "SDL_video.h" +#include "SDL_cpuinfo.h" +#include "SDL_stretch_c.h" +#include "SDL_yuvfuncs.h" +#include "SDL_yuv_sw_c.h" + +/* The functions used to manipulate software video overlays */ +static struct private_yuvhwfuncs sw_yuvfuncs = { + SDL_LockYUV_SW, + SDL_UnlockYUV_SW, + SDL_DisplayYUV_SW, + SDL_FreeYUV_SW +}; + +/* RGB conversion lookup tables */ +struct private_yuvhwdata { + SDL_Surface *stretch; + SDL_Surface *display; + Uint8 *pixels; + int *colortab; + Uint32 *rgb_2_pix; + void (*Display1X)(int *colortab, Uint32 *rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod ); + void (*Display2X)(int *colortab, Uint32 *rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod ); + + /* These are just so we don't have to allocate them separately */ + Uint16 pitches[3]; + Uint8 *planes[3]; +}; + + +/* The colorspace conversion functions */ + +#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES +extern void Color565DitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod ); +extern void ColorRGBDitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod ); +#endif + +static void Color16DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod ) +{ + unsigned short* row1; + unsigned short* row2; + unsigned char* lum2; + int x, y; + int cr_r; + int crb_g; + int cb_b; + int cols_2 = cols / 2; + + row1 = (unsigned short*) out; + row2 = row1 + cols + mod; + lum2 = lum + cols; + + mod += cols + mod; + + y = rows / 2; + while( y-- ) + { + x = cols_2; + while( x-- ) + { + register int L; + + cr_r = 0*768+256 + colortab[ *cr + 0*256 ]; + crb_g = 1*768+256 + colortab[ *cr + 1*256 ] + + colortab[ *cb + 2*256 ]; + cb_b = 2*768+256 + colortab[ *cb + 3*256 ]; + ++cr; ++cb; + + L = *lum++; + *row1++ = (unsigned short)(rgb_2_pix[ L + cr_r ] | + rgb_2_pix[ L + crb_g ] | + rgb_2_pix[ L + cb_b ]); + + L = *lum++; + *row1++ = (unsigned short)(rgb_2_pix[ L + cr_r ] | + rgb_2_pix[ L + crb_g ] | + rgb_2_pix[ L + cb_b ]); + + + /* Now, do second row. */ + + L = *lum2++; + *row2++ = (unsigned short)(rgb_2_pix[ L + cr_r ] | + rgb_2_pix[ L + crb_g ] | + rgb_2_pix[ L + cb_b ]); + + L = *lum2++; + *row2++ = (unsigned short)(rgb_2_pix[ L + cr_r ] | + rgb_2_pix[ L + crb_g ] | + rgb_2_pix[ L + cb_b ]); + } + + /* + * These values are at the start of the next line, (due + * to the ++'s above),but they need to be at the start + * of the line after that. + */ + lum += cols; + lum2 += cols; + row1 += mod; + row2 += mod; + } +} + +static void Color24DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod ) +{ + unsigned int value; + unsigned char* row1; + unsigned char* row2; + unsigned char* lum2; + int x, y; + int cr_r; + int crb_g; + int cb_b; + int cols_2 = cols / 2; + + row1 = out; + row2 = row1 + cols*3 + mod*3; + lum2 = lum + cols; + + mod += cols + mod; + mod *= 3; + + y = rows / 2; + while( y-- ) + { + x = cols_2; + while( x-- ) + { + register int L; + + cr_r = 0*768+256 + colortab[ *cr + 0*256 ]; + crb_g = 1*768+256 + colortab[ *cr + 1*256 ] + + colortab[ *cb + 2*256 ]; + cb_b = 2*768+256 + colortab[ *cb + 3*256 ]; + ++cr; ++cb; + + L = *lum++; + value = (rgb_2_pix[ L + cr_r ] | + rgb_2_pix[ L + crb_g ] | + rgb_2_pix[ L + cb_b ]); + *row1++ = (value ) & 0xFF; + *row1++ = (value >> 8) & 0xFF; + *row1++ = (value >> 16) & 0xFF; + + L = *lum++; + value = (rgb_2_pix[ L + cr_r ] | + rgb_2_pix[ L + crb_g ] | + rgb_2_pix[ L + cb_b ]); + *row1++ = (value ) & 0xFF; + *row1++ = (value >> 8) & 0xFF; + *row1++ = (value >> 16) & 0xFF; + + + /* Now, do second row. */ + + L = *lum2++; + value = (rgb_2_pix[ L + cr_r ] | + rgb_2_pix[ L + crb_g ] | + rgb_2_pix[ L + cb_b ]); + *row2++ = (value ) & 0xFF; + *row2++ = (value >> 8) & 0xFF; + *row2++ = (value >> 16) & 0xFF; + + L = *lum2++; + value = (rgb_2_pix[ L + cr_r ] | + rgb_2_pix[ L + crb_g ] | + rgb_2_pix[ L + cb_b ]); + *row2++ = (value ) & 0xFF; + *row2++ = (value >> 8) & 0xFF; + *row2++ = (value >> 16) & 0xFF; + } + + /* + * These values are at the start of the next line, (due + * to the ++'s above),but they need to be at the start + * of the line after that. + */ + lum += cols; + lum2 += cols; + row1 += mod; + row2 += mod; + } +} + +static void Color32DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod ) +{ + unsigned int* row1; + unsigned int* row2; + unsigned char* lum2; + int x, y; + int cr_r; + int crb_g; + int cb_b; + int cols_2 = cols / 2; + + row1 = (unsigned int*) out; + row2 = row1 + cols + mod; + lum2 = lum + cols; + + mod += cols + mod; + + y = rows / 2; + while( y-- ) + { + x = cols_2; + while( x-- ) + { + register int L; + + cr_r = 0*768+256 + colortab[ *cr + 0*256 ]; + crb_g = 1*768+256 + colortab[ *cr + 1*256 ] + + colortab[ *cb + 2*256 ]; + cb_b = 2*768+256 + colortab[ *cb + 3*256 ]; + ++cr; ++cb; + + L = *lum++; + *row1++ = (rgb_2_pix[ L + cr_r ] | + rgb_2_pix[ L + crb_g ] | + rgb_2_pix[ L + cb_b ]); + + L = *lum++; + *row1++ = (rgb_2_pix[ L + cr_r ] | + rgb_2_pix[ L + crb_g ] | + rgb_2_pix[ L + cb_b ]); + + + /* Now, do second row. */ + + L = *lum2++; + *row2++ = (rgb_2_pix[ L + cr_r ] | + rgb_2_pix[ L + crb_g ] | + rgb_2_pix[ L + cb_b ]); + + L = *lum2++; + *row2++ = (rgb_2_pix[ L + cr_r ] | + rgb_2_pix[ L + crb_g ] | + rgb_2_pix[ L + cb_b ]); + } + + /* + * These values are at the start of the next line, (due + * to the ++'s above),but they need to be at the start + * of the line after that. + */ + lum += cols; + lum2 += cols; + row1 += mod; + row2 += mod; + } +} + +/* + * In this function I make use of a nasty trick. The tables have the lower + * 16 bits replicated in the upper 16. This means I can write ints and get + * the horisontal doubling for free (almost). + */ +static void Color16DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod ) +{ + unsigned int* row1 = (unsigned int*) out; + const int next_row = cols+(mod/2); + unsigned int* row2 = row1 + 2*next_row; + unsigned char* lum2; + int x, y; + int cr_r; + int crb_g; + int cb_b; + int cols_2 = cols / 2; + + lum2 = lum + cols; + + mod = (next_row * 3) + (mod/2); + + y = rows / 2; + while( y-- ) + { + x = cols_2; + while( x-- ) + { + register int L; + + cr_r = 0*768+256 + colortab[ *cr + 0*256 ]; + crb_g = 1*768+256 + colortab[ *cr + 1*256 ] + + colortab[ *cb + 2*256 ]; + cb_b = 2*768+256 + colortab[ *cb + 3*256 ]; + ++cr; ++cb; + + L = *lum++; + row1[0] = row1[next_row] = (rgb_2_pix[ L + cr_r ] | + rgb_2_pix[ L + crb_g ] | + rgb_2_pix[ L + cb_b ]); + row1++; + + L = *lum++; + row1[0] = row1[next_row] = (rgb_2_pix[ L + cr_r ] | + rgb_2_pix[ L + crb_g ] | + rgb_2_pix[ L + cb_b ]); + row1++; + + + /* Now, do second row. */ + + L = *lum2++; + row2[0] = row2[next_row] = (rgb_2_pix[ L + cr_r ] | + rgb_2_pix[ L + crb_g ] | + rgb_2_pix[ L + cb_b ]); + row2++; + + L = *lum2++; + row2[0] = row2[next_row] = (rgb_2_pix[ L + cr_r ] | + rgb_2_pix[ L + crb_g ] | + rgb_2_pix[ L + cb_b ]); + row2++; + } + + /* + * These values are at the start of the next line, (due + * to the ++'s above),but they need to be at the start + * of the line after that. + */ + lum += cols; + lum2 += cols; + row1 += mod; + row2 += mod; + } +} + +static void Color24DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod ) +{ + unsigned int value; + unsigned char* row1 = out; + const int next_row = (cols*2 + mod) * 3; + unsigned char* row2 = row1 + 2*next_row; + unsigned char* lum2; + int x, y; + int cr_r; + int crb_g; + int cb_b; + int cols_2 = cols / 2; + + lum2 = lum + cols; + + mod = next_row*3 + mod*3; + + y = rows / 2; + while( y-- ) + { + x = cols_2; + while( x-- ) + { + register int L; + + cr_r = 0*768+256 + colortab[ *cr + 0*256 ]; + crb_g = 1*768+256 + colortab[ *cr + 1*256 ] + + colortab[ *cb + 2*256 ]; + cb_b = 2*768+256 + colortab[ *cb + 3*256 ]; + ++cr; ++cb; + + L = *lum++; + value = (rgb_2_pix[ L + cr_r ] | + rgb_2_pix[ L + crb_g ] | + rgb_2_pix[ L + cb_b ]); + row1[0+0] = row1[3+0] = row1[next_row+0] = row1[next_row+3+0] = + (value ) & 0xFF; + row1[0+1] = row1[3+1] = row1[next_row+1] = row1[next_row+3+1] = + (value >> 8) & 0xFF; + row1[0+2] = row1[3+2] = row1[next_row+2] = row1[next_row+3+2] = + (value >> 16) & 0xFF; + row1 += 2*3; + + L = *lum++; + value = (rgb_2_pix[ L + cr_r ] | + rgb_2_pix[ L + crb_g ] | + rgb_2_pix[ L + cb_b ]); + row1[0+0] = row1[3+0] = row1[next_row+0] = row1[next_row+3+0] = + (value ) & 0xFF; + row1[0+1] = row1[3+1] = row1[next_row+1] = row1[next_row+3+1] = + (value >> 8) & 0xFF; + row1[0+2] = row1[3+2] = row1[next_row+2] = row1[next_row+3+2] = + (value >> 16) & 0xFF; + row1 += 2*3; + + + /* Now, do second row. */ + + L = *lum2++; + value = (rgb_2_pix[ L + cr_r ] | + rgb_2_pix[ L + crb_g ] | + rgb_2_pix[ L + cb_b ]); + row2[0+0] = row2[3+0] = row2[next_row+0] = row2[next_row+3+0] = + (value ) & 0xFF; + row2[0+1] = row2[3+1] = row2[next_row+1] = row2[next_row+3+1] = + (value >> 8) & 0xFF; + row2[0+2] = row2[3+2] = row2[next_row+2] = row2[next_row+3+2] = + (value >> 16) & 0xFF; + row2 += 2*3; + + L = *lum2++; + value = (rgb_2_pix[ L + cr_r ] | + rgb_2_pix[ L + crb_g ] | + rgb_2_pix[ L + cb_b ]); + row2[0+0] = row2[3+0] = row2[next_row+0] = row2[next_row+3+0] = + (value ) & 0xFF; + row2[0+1] = row2[3+1] = row2[next_row+1] = row2[next_row+3+1] = + (value >> 8) & 0xFF; + row2[0+2] = row2[3+2] = row2[next_row+2] = row2[next_row+3+2] = + (value >> 16) & 0xFF; + row2 += 2*3; + } + + /* + * These values are at the start of the next line, (due + * to the ++'s above),but they need to be at the start + * of the line after that. + */ + lum += cols; + lum2 += cols; + row1 += mod; + row2 += mod; + } +} + +static void Color32DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod ) +{ + unsigned int* row1 = (unsigned int*) out; + const int next_row = cols*2+mod; + unsigned int* row2 = row1 + 2*next_row; + unsigned char* lum2; + int x, y; + int cr_r; + int crb_g; + int cb_b; + int cols_2 = cols / 2; + + lum2 = lum + cols; + + mod = (next_row * 3) + mod; + + y = rows / 2; + while( y-- ) + { + x = cols_2; + while( x-- ) + { + register int L; + + cr_r = 0*768+256 + colortab[ *cr + 0*256 ]; + crb_g = 1*768+256 + colortab[ *cr + 1*256 ] + + colortab[ *cb + 2*256 ]; + cb_b = 2*768+256 + colortab[ *cb + 3*256 ]; + ++cr; ++cb; + + L = *lum++; + row1[0] = row1[1] = row1[next_row] = row1[next_row+1] = + (rgb_2_pix[ L + cr_r ] | + rgb_2_pix[ L + crb_g ] | + rgb_2_pix[ L + cb_b ]); + row1 += 2; + + L = *lum++; + row1[0] = row1[1] = row1[next_row] = row1[next_row+1] = + (rgb_2_pix[ L + cr_r ] | + rgb_2_pix[ L + crb_g ] | + rgb_2_pix[ L + cb_b ]); + row1 += 2; + + + /* Now, do second row. */ + + L = *lum2++; + row2[0] = row2[1] = row2[next_row] = row2[next_row+1] = + (rgb_2_pix[ L + cr_r ] | + rgb_2_pix[ L + crb_g ] | + rgb_2_pix[ L + cb_b ]); + row2 += 2; + + L = *lum2++; + row2[0] = row2[1] = row2[next_row] = row2[next_row+1] = + (rgb_2_pix[ L + cr_r ] | + rgb_2_pix[ L + crb_g ] | + rgb_2_pix[ L + cb_b ]); + row2 += 2; + } + + /* + * These values are at the start of the next line, (due + * to the ++'s above),but they need to be at the start + * of the line after that. + */ + lum += cols; + lum2 += cols; + row1 += mod; + row2 += mod; + } +} + +static void Color16DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod ) +{ + unsigned short* row; + int x, y; + int cr_r; + int crb_g; + int cb_b; + int cols_2 = cols / 2; + + row = (unsigned short*) out; + + y = rows; + while( y-- ) + { + x = cols_2; + while( x-- ) + { + register int L; + + cr_r = 0*768+256 + colortab[ *cr + 0*256 ]; + crb_g = 1*768+256 + colortab[ *cr + 1*256 ] + + colortab[ *cb + 2*256 ]; + cb_b = 2*768+256 + colortab[ *cb + 3*256 ]; + cr += 4; cb += 4; + + L = *lum; lum += 2; + *row++ = (unsigned short)(rgb_2_pix[ L + cr_r ] | + rgb_2_pix[ L + crb_g ] | + rgb_2_pix[ L + cb_b ]); + + L = *lum; lum += 2; + *row++ = (unsigned short)(rgb_2_pix[ L + cr_r ] | + rgb_2_pix[ L + crb_g ] | + rgb_2_pix[ L + cb_b ]); + + } + + row += mod; + } +} + +static void Color24DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod ) +{ + unsigned int value; + unsigned char* row; + int x, y; + int cr_r; + int crb_g; + int cb_b; + int cols_2 = cols / 2; + + row = (unsigned char*) out; + mod *= 3; + y = rows; + while( y-- ) + { + x = cols_2; + while( x-- ) + { + register int L; + + cr_r = 0*768+256 + colortab[ *cr + 0*256 ]; + crb_g = 1*768+256 + colortab[ *cr + 1*256 ] + + colortab[ *cb + 2*256 ]; + cb_b = 2*768+256 + colortab[ *cb + 3*256 ]; + cr += 4; cb += 4; + + L = *lum; lum += 2; + value = (rgb_2_pix[ L + cr_r ] | + rgb_2_pix[ L + crb_g ] | + rgb_2_pix[ L + cb_b ]); + *row++ = (value ) & 0xFF; + *row++ = (value >> 8) & 0xFF; + *row++ = (value >> 16) & 0xFF; + + L = *lum; lum += 2; + value = (rgb_2_pix[ L + cr_r ] | + rgb_2_pix[ L + crb_g ] | + rgb_2_pix[ L + cb_b ]); + *row++ = (value ) & 0xFF; + *row++ = (value >> 8) & 0xFF; + *row++ = (value >> 16) & 0xFF; + + } + row += mod; + } +} + +static void Color32DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod ) +{ + unsigned int* row; + int x, y; + int cr_r; + int crb_g; + int cb_b; + int cols_2 = cols / 2; + + row = (unsigned int*) out; + y = rows; + while( y-- ) + { + x = cols_2; + while( x-- ) + { + register int L; + + cr_r = 0*768+256 + colortab[ *cr + 0*256 ]; + crb_g = 1*768+256 + colortab[ *cr + 1*256 ] + + colortab[ *cb + 2*256 ]; + cb_b = 2*768+256 + colortab[ *cb + 3*256 ]; + cr += 4; cb += 4; + + L = *lum; lum += 2; + *row++ = (rgb_2_pix[ L + cr_r ] | + rgb_2_pix[ L + crb_g ] | + rgb_2_pix[ L + cb_b ]); + + L = *lum; lum += 2; + *row++ = (rgb_2_pix[ L + cr_r ] | + rgb_2_pix[ L + crb_g ] | + rgb_2_pix[ L + cb_b ]); + + + } + row += mod; + } +} + +/* + * In this function I make use of a nasty trick. The tables have the lower + * 16 bits replicated in the upper 16. This means I can write ints and get + * the horisontal doubling for free (almost). + */ +static void Color16DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod ) +{ + unsigned int* row = (unsigned int*) out; + const int next_row = cols+(mod/2); + int x, y; + int cr_r; + int crb_g; + int cb_b; + int cols_2 = cols / 2; + + y = rows; + while( y-- ) + { + x = cols_2; + while( x-- ) + { + register int L; + + cr_r = 0*768+256 + colortab[ *cr + 0*256 ]; + crb_g = 1*768+256 + colortab[ *cr + 1*256 ] + + colortab[ *cb + 2*256 ]; + cb_b = 2*768+256 + colortab[ *cb + 3*256 ]; + cr += 4; cb += 4; + + L = *lum; lum += 2; + row[0] = row[next_row] = (rgb_2_pix[ L + cr_r ] | + rgb_2_pix[ L + crb_g ] | + rgb_2_pix[ L + cb_b ]); + row++; + + L = *lum; lum += 2; + row[0] = row[next_row] = (rgb_2_pix[ L + cr_r ] | + rgb_2_pix[ L + crb_g ] | + rgb_2_pix[ L + cb_b ]); + row++; + + } + row += next_row; + } +} + +static void Color24DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod ) +{ + unsigned int value; + unsigned char* row = out; + const int next_row = (cols*2 + mod) * 3; + int x, y; + int cr_r; + int crb_g; + int cb_b; + int cols_2 = cols / 2; + y = rows; + while( y-- ) + { + x = cols_2; + while( x-- ) + { + register int L; + + cr_r = 0*768+256 + colortab[ *cr + 0*256 ]; + crb_g = 1*768+256 + colortab[ *cr + 1*256 ] + + colortab[ *cb + 2*256 ]; + cb_b = 2*768+256 + colortab[ *cb + 3*256 ]; + cr += 4; cb += 4; + + L = *lum; lum += 2; + value = (rgb_2_pix[ L + cr_r ] | + rgb_2_pix[ L + crb_g ] | + rgb_2_pix[ L + cb_b ]); + row[0+0] = row[3+0] = row[next_row+0] = row[next_row+3+0] = + (value ) & 0xFF; + row[0+1] = row[3+1] = row[next_row+1] = row[next_row+3+1] = + (value >> 8) & 0xFF; + row[0+2] = row[3+2] = row[next_row+2] = row[next_row+3+2] = + (value >> 16) & 0xFF; + row += 2*3; + + L = *lum; lum += 2; + value = (rgb_2_pix[ L + cr_r ] | + rgb_2_pix[ L + crb_g ] | + rgb_2_pix[ L + cb_b ]); + row[0+0] = row[3+0] = row[next_row+0] = row[next_row+3+0] = + (value ) & 0xFF; + row[0+1] = row[3+1] = row[next_row+1] = row[next_row+3+1] = + (value >> 8) & 0xFF; + row[0+2] = row[3+2] = row[next_row+2] = row[next_row+3+2] = + (value >> 16) & 0xFF; + row += 2*3; + + } + row += next_row; + } +} + +static void Color32DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix, + unsigned char *lum, unsigned char *cr, + unsigned char *cb, unsigned char *out, + int rows, int cols, int mod ) +{ + unsigned int* row = (unsigned int*) out; + const int next_row = cols*2+mod; + int x, y; + int cr_r; + int crb_g; + int cb_b; + int cols_2 = cols / 2; + mod+=mod; + y = rows; + while( y-- ) + { + x = cols_2; + while( x-- ) + { + register int L; + + cr_r = 0*768+256 + colortab[ *cr + 0*256 ]; + crb_g = 1*768+256 + colortab[ *cr + 1*256 ] + + colortab[ *cb + 2*256 ]; + cb_b = 2*768+256 + colortab[ *cb + 3*256 ]; + cr += 4; cb += 4; + + L = *lum; lum += 2; + row[0] = row[1] = row[next_row] = row[next_row+1] = + (rgb_2_pix[ L + cr_r ] | + rgb_2_pix[ L + crb_g ] | + rgb_2_pix[ L + cb_b ]); + row += 2; + + L = *lum; lum += 2; + row[0] = row[1] = row[next_row] = row[next_row+1] = + (rgb_2_pix[ L + cr_r ] | + rgb_2_pix[ L + crb_g ] | + rgb_2_pix[ L + cb_b ]); + row += 2; + + + } + + row += next_row; + } +} + +/* + * How many 1 bits are there in the Uint32. + * Low performance, do not call often. + */ +static int number_of_bits_set( Uint32 a ) +{ + if(!a) return 0; + if(a & 1) return 1 + number_of_bits_set(a >> 1); + return(number_of_bits_set(a >> 1)); +} + +/* + * How many 0 bits are there at least significant end of Uint32. + * Low performance, do not call often. + */ +static int free_bits_at_bottom( Uint32 a ) +{ + /* assume char is 8 bits */ + if(!a) return sizeof(Uint32) * 8; + if(((Sint32)a) & 1l) return 0; + return 1 + free_bits_at_bottom ( a >> 1); +} + + +SDL_Overlay *SDL_CreateYUV_SW(_THIS, int width, int height, Uint32 format, SDL_Surface *display) +{ + SDL_Overlay *overlay; + struct private_yuvhwdata *swdata; + int *Cr_r_tab; + int *Cr_g_tab; + int *Cb_g_tab; + int *Cb_b_tab; + Uint32 *r_2_pix_alloc; + Uint32 *g_2_pix_alloc; + Uint32 *b_2_pix_alloc; + int i; + int CR, CB; + Uint32 Rmask, Gmask, Bmask; + + /* Only RGB packed pixel conversion supported */ + if ( (display->format->BytesPerPixel != 2) && + (display->format->BytesPerPixel != 3) && + (display->format->BytesPerPixel != 4) ) { + SDL_SetError("Can't use YUV data on non 16/24/32 bit surfaces"); + return(NULL); + } + + /* Verify that we support the format */ + switch (format) { + case SDL_YV12_OVERLAY: + case SDL_IYUV_OVERLAY: + case SDL_YUY2_OVERLAY: + case SDL_UYVY_OVERLAY: + case SDL_YVYU_OVERLAY: + break; + default: + SDL_SetError("Unsupported YUV format"); + return(NULL); + } + + /* Create the overlay structure */ + overlay = (SDL_Overlay *)SDL_malloc(sizeof *overlay); + if ( overlay == NULL ) { + SDL_OutOfMemory(); + return(NULL); + } + SDL_memset(overlay, 0, (sizeof *overlay)); + + /* Fill in the basic members */ + overlay->format = format; + overlay->w = width; + overlay->h = height; + + /* Set up the YUV surface function structure */ + overlay->hwfuncs = &sw_yuvfuncs; + + /* Create the pixel data and lookup tables */ + swdata = (struct private_yuvhwdata *)SDL_malloc(sizeof *swdata); + overlay->hwdata = swdata; + if ( swdata == NULL ) { + SDL_OutOfMemory(); + SDL_FreeYUVOverlay(overlay); + return(NULL); + } + swdata->stretch = NULL; + swdata->display = display; + swdata->pixels = (Uint8 *) SDL_malloc(width*height*2); + swdata->colortab = (int *)SDL_malloc(4*256*sizeof(int)); + Cr_r_tab = &swdata->colortab[0*256]; + Cr_g_tab = &swdata->colortab[1*256]; + Cb_g_tab = &swdata->colortab[2*256]; + Cb_b_tab = &swdata->colortab[3*256]; + swdata->rgb_2_pix = (Uint32 *)SDL_malloc(3*768*sizeof(Uint32)); + r_2_pix_alloc = &swdata->rgb_2_pix[0*768]; + g_2_pix_alloc = &swdata->rgb_2_pix[1*768]; + b_2_pix_alloc = &swdata->rgb_2_pix[2*768]; + if ( ! swdata->pixels || ! swdata->colortab || ! swdata->rgb_2_pix ) { + SDL_OutOfMemory(); + SDL_FreeYUVOverlay(overlay); + return(NULL); + } + + /* Generate the tables for the display surface */ + for (i=0; i<256; i++) { + /* Gamma correction (luminescence table) and chroma correction + would be done here. See the Berkeley mpeg_play sources. + */ + CB = CR = (i-128); + Cr_r_tab[i] = (int) ( (0.419/0.299) * CR); + Cr_g_tab[i] = (int) (-(0.299/0.419) * CR); + Cb_g_tab[i] = (int) (-(0.114/0.331) * CB); + Cb_b_tab[i] = (int) ( (0.587/0.331) * CB); + } + + /* + * Set up entries 0-255 in rgb-to-pixel value tables. + */ + Rmask = display->format->Rmask; + Gmask = display->format->Gmask; + Bmask = display->format->Bmask; + for ( i=0; i<256; ++i ) { + r_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Rmask)); + r_2_pix_alloc[i+256] <<= free_bits_at_bottom(Rmask); + g_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Gmask)); + g_2_pix_alloc[i+256] <<= free_bits_at_bottom(Gmask); + b_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Bmask)); + b_2_pix_alloc[i+256] <<= free_bits_at_bottom(Bmask); + } + + /* + * If we have 16-bit output depth, then we double the value + * in the top word. This means that we can write out both + * pixels in the pixel doubling mode with one op. It is + * harmless in the normal case as storing a 32-bit value + * through a short pointer will lose the top bits anyway. + */ + if( display->format->BytesPerPixel == 2 ) { + for ( i=0; i<256; ++i ) { + r_2_pix_alloc[i+256] |= (r_2_pix_alloc[i+256]) << 16; + g_2_pix_alloc[i+256] |= (g_2_pix_alloc[i+256]) << 16; + b_2_pix_alloc[i+256] |= (b_2_pix_alloc[i+256]) << 16; + } + } + + /* + * Spread out the values we have to the rest of the array so that + * we do not need to check for overflow. + */ + for ( i=0; i<256; ++i ) { + r_2_pix_alloc[i] = r_2_pix_alloc[256]; + r_2_pix_alloc[i+512] = r_2_pix_alloc[511]; + g_2_pix_alloc[i] = g_2_pix_alloc[256]; + g_2_pix_alloc[i+512] = g_2_pix_alloc[511]; + b_2_pix_alloc[i] = b_2_pix_alloc[256]; + b_2_pix_alloc[i+512] = b_2_pix_alloc[511]; + } + + /* You have chosen wisely... */ + switch (format) { + case SDL_YV12_OVERLAY: + case SDL_IYUV_OVERLAY: + if ( display->format->BytesPerPixel == 2 ) { +#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES + /* inline assembly functions */ + if ( SDL_HasMMX() && (Rmask == 0xF800) && + (Gmask == 0x07E0) && + (Bmask == 0x001F) && + (width & 15) == 0) { +/*printf("Using MMX 16-bit 565 dither\n");*/ + swdata->Display1X = Color565DitherYV12MMX1X; + } else { +/*printf("Using C 16-bit dither\n");*/ + swdata->Display1X = Color16DitherYV12Mod1X; + } +#else + swdata->Display1X = Color16DitherYV12Mod1X; +#endif + swdata->Display2X = Color16DitherYV12Mod2X; + } + if ( display->format->BytesPerPixel == 3 ) { + swdata->Display1X = Color24DitherYV12Mod1X; + swdata->Display2X = Color24DitherYV12Mod2X; + } + if ( display->format->BytesPerPixel == 4 ) { +#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES + /* inline assembly functions */ + if ( SDL_HasMMX() && (Rmask == 0x00FF0000) && + (Gmask == 0x0000FF00) && + (Bmask == 0x000000FF) && + (width & 15) == 0) { +/*printf("Using MMX 32-bit dither\n");*/ + swdata->Display1X = ColorRGBDitherYV12MMX1X; + } else { +/*printf("Using C 32-bit dither\n");*/ + swdata->Display1X = Color32DitherYV12Mod1X; + } +#else + swdata->Display1X = Color32DitherYV12Mod1X; +#endif + swdata->Display2X = Color32DitherYV12Mod2X; + } + break; + case SDL_YUY2_OVERLAY: + case SDL_UYVY_OVERLAY: + case SDL_YVYU_OVERLAY: + if ( display->format->BytesPerPixel == 2 ) { + swdata->Display1X = Color16DitherYUY2Mod1X; + swdata->Display2X = Color16DitherYUY2Mod2X; + } + if ( display->format->BytesPerPixel == 3 ) { + swdata->Display1X = Color24DitherYUY2Mod1X; + swdata->Display2X = Color24DitherYUY2Mod2X; + } + if ( display->format->BytesPerPixel == 4 ) { + swdata->Display1X = Color32DitherYUY2Mod1X; + swdata->Display2X = Color32DitherYUY2Mod2X; + } + break; + default: + /* We should never get here (caught above) */ + break; + } + + /* Find the pitch and offset values for the overlay */ + overlay->pitches = swdata->pitches; + overlay->pixels = swdata->planes; + switch (format) { + case SDL_YV12_OVERLAY: + case SDL_IYUV_OVERLAY: + overlay->pitches[0] = overlay->w; + overlay->pitches[1] = overlay->pitches[0] / 2; + overlay->pitches[2] = overlay->pitches[0] / 2; + overlay->pixels[0] = swdata->pixels; + overlay->pixels[1] = overlay->pixels[0] + + overlay->pitches[0] * overlay->h; + overlay->pixels[2] = overlay->pixels[1] + + overlay->pitches[1] * overlay->h / 2; + overlay->planes = 3; + break; + case SDL_YUY2_OVERLAY: + case SDL_UYVY_OVERLAY: + case SDL_YVYU_OVERLAY: + overlay->pitches[0] = overlay->w*2; + overlay->pixels[0] = swdata->pixels; + overlay->planes = 1; + break; + default: + /* We should never get here (caught above) */ + break; + } + + /* We're all done.. */ + return(overlay); +} + +int SDL_LockYUV_SW(_THIS, SDL_Overlay *overlay) +{ + return(0); +} + +void SDL_UnlockYUV_SW(_THIS, SDL_Overlay *overlay) +{ + return; +} + +int SDL_DisplayYUV_SW(_THIS, SDL_Overlay *overlay, SDL_Rect *src, SDL_Rect *dst) +{ + struct private_yuvhwdata *swdata; + int stretch; + int scale_2x; + SDL_Surface *display; + Uint8 *lum, *Cr, *Cb; + Uint8 *dstp; + int mod; + + swdata = overlay->hwdata; + stretch = 0; + scale_2x = 0; + if ( src->x || src->y || src->w < overlay->w || src->h < overlay->h ) { + /* The source rectangle has been clipped. + Using a scratch surface is easier than adding clipped + source support to all the blitters, plus that would + slow them down in the general unclipped case. + */ + stretch = 1; + } else if ( (src->w != dst->w) || (src->h != dst->h) ) { + if ( (dst->w == 2*src->w) && + (dst->h == 2*src->h) ) { + scale_2x = 1; + } else { + stretch = 1; + } + } + if ( stretch ) { + if ( ! swdata->stretch ) { + display = swdata->display; + swdata->stretch = SDL_CreateRGBSurface( + SDL_SWSURFACE, + overlay->w, overlay->h, + display->format->BitsPerPixel, + display->format->Rmask, + display->format->Gmask, + display->format->Bmask, 0); + if ( ! swdata->stretch ) { + return(-1); + } + } + display = swdata->stretch; + } else { + display = swdata->display; + } + switch (overlay->format) { + case SDL_YV12_OVERLAY: + lum = overlay->pixels[0]; + Cr = overlay->pixels[1]; + Cb = overlay->pixels[2]; + break; + case SDL_IYUV_OVERLAY: + lum = overlay->pixels[0]; + Cr = overlay->pixels[2]; + Cb = overlay->pixels[1]; + break; + case SDL_YUY2_OVERLAY: + lum = overlay->pixels[0]; + Cr = lum + 3; + Cb = lum + 1; + break; + case SDL_UYVY_OVERLAY: + lum = overlay->pixels[0]+1; + Cr = lum + 1; + Cb = lum - 1; + break; + case SDL_YVYU_OVERLAY: + lum = overlay->pixels[0]; + Cr = lum + 1; + Cb = lum + 3; + break; + default: + SDL_SetError("Unsupported YUV format in blit"); + return(-1); + } + if ( SDL_MUSTLOCK(display) ) { + if ( SDL_LockSurface(display) < 0 ) { + return(-1); + } + } + if ( stretch ) { + dstp = (Uint8 *)swdata->stretch->pixels; + } else { + dstp = (Uint8 *)display->pixels + + dst->x * display->format->BytesPerPixel + + dst->y * display->pitch; + } + mod = (display->pitch / display->format->BytesPerPixel); + + if ( scale_2x ) { + mod -= (overlay->w * 2); + swdata->Display2X(swdata->colortab, swdata->rgb_2_pix, + lum, Cr, Cb, dstp, overlay->h, overlay->w, mod); + } else { + mod -= overlay->w; + swdata->Display1X(swdata->colortab, swdata->rgb_2_pix, + lum, Cr, Cb, dstp, overlay->h, overlay->w, mod); + } + if ( SDL_MUSTLOCK(display) ) { + SDL_UnlockSurface(display); + } + if ( stretch ) { + display = swdata->display; + SDL_SoftStretch(swdata->stretch, src, display, dst); + } + SDL_UpdateRects(display, 1, dst); + + return(0); +} + +void SDL_FreeYUV_SW(_THIS, SDL_Overlay *overlay) +{ + struct private_yuvhwdata *swdata; + + swdata = overlay->hwdata; + if ( swdata ) { + if ( swdata->stretch ) { + SDL_FreeSurface(swdata->stretch); + } + if ( swdata->pixels ) { + SDL_free(swdata->pixels); + } + if ( swdata->colortab ) { + SDL_free(swdata->colortab); + } + if ( swdata->rgb_2_pix ) { + SDL_free(swdata->rgb_2_pix); + } + SDL_free(swdata); + overlay->hwdata = NULL; + } +} |