1 files changed, 1299 insertions, 0 deletions
diff --git a/apps/plugins/sdl/src/video/SDL_yuv_sw.c b/apps/plugins/sdl/src/video/SDL_yuv_sw.c
new file mode 100644
index 0000000000..c555ce06c2
--- /dev/null
+++ b/apps/plugins/sdl/src/video/SDL_yuv_sw.c
@@ -0,0 +1,1299 @@
+/*
+    SDL - Simple DirectMedia Layer
+    Copyright (C) 1997-2012 Sam Lantinga
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+    Sam Lantinga
+    slouken@libsdl.org
+*/
+#include "SDL_config.h"
+/* This is the software implementation of the YUV video overlay support */
+/* This code was derived from code carrying the following copyright notices:
+ * Copyright (c) 1995 The Regents of the University of California.
+ * All rights reserved.
+ * 
+ * Permission to use, copy, modify, and distribute this software and its
+ * documentation for any purpose, without fee, and without written agreement is
+ * hereby granted, provided that the above copyright notice and the following
+ * two paragraphs appear in all copies of this software.
+ * 
+ * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
+ * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF
+ * CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * 
+ * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ * Copyright (c) 1995 Erik Corry
+ * All rights reserved.
+ * 
+ * Permission to use, copy, modify, and distribute this software and its
+ * documentation for any purpose, without fee, and without written agreement is
+ * hereby granted, provided that the above copyright notice and the following
+ * two paragraphs appear in all copies of this software.
+ * 
+ * IN NO EVENT SHALL ERIK CORRY BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
+ * SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF
+ * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF ERIK CORRY HAS BEEN ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * 
+ * ERIK CORRY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
+ * BASIS, AND ERIK CORRY HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT,
+ * UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ * Portions of this software Copyright (c) 1995 Brown University.
+ * All rights reserved.
+ * 
+ * Permission to use, copy, modify, and distribute this software and its
+ * documentation for any purpose, without fee, and without written agreement
+ * is hereby granted, provided that the above copyright notice and the
+ * following two paragraphs appear in all copies of this software.
+ * 
+ * IN NO EVENT SHALL BROWN UNIVERSITY BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
+ * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF BROWN
+ * UNIVERSITY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * 
+ * BROWN UNIVERSITY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
+ * BASIS, AND BROWN UNIVERSITY HAS NO OBLIGATION TO PROVIDE MAINTENANCE,
+ * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ */
+#include "SDL_video.h"
+#include "SDL_cpuinfo.h"
+#include "SDL_stretch_c.h"
+#include "SDL_yuvfuncs.h"
+#include "SDL_yuv_sw_c.h"
+/* The functions used to manipulate software video overlays */
+static struct private_yuvhwfuncs sw_yuvfuncs = {
+        SDL_LockYUV_SW,
+        SDL_UnlockYUV_SW,
+        SDL_DisplayYUV_SW,
+        SDL_FreeYUV_SW
+};
+/* RGB conversion lookup tables */
+struct private_yuvhwdata {
+        SDL_Surface *stretch;
+        SDL_Surface *display;
+        Uint8 *pixels;
+        int *colortab;
+        Uint32 *rgb_2_pix;
+        void (*Display1X)(int *colortab, Uint32 *rgb_2_pix,
+                          unsigned char *lum, unsigned char *cr,
+                          unsigned char *cb, unsigned char *out,
+                          int rows, int cols, int mod );
+        void (*Display2X)(int *colortab, Uint32 *rgb_2_pix,
+                          unsigned char *lum, unsigned char *cr,
+                          unsigned char *cb, unsigned char *out,
+                          int rows, int cols, int mod );
+        /* These are just so we don't have to allocate them separately */
+        Uint16 pitches[3];
+        Uint8 *planes[3];
+};
+/* The colorspace conversion functions */
+#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
+extern void Color565DitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
+                                     unsigned char *lum, unsigned char *cr,
+                                     unsigned char *cb, unsigned char *out,
+                                     int rows, int cols, int mod );
+extern void ColorRGBDitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
+                                     unsigned char *lum, unsigned char *cr,
+                                     unsigned char *cb, unsigned char *out,
+                                     int rows, int cols, int mod );
+#endif 
+static void Color16DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix,
+                                    unsigned char *lum, unsigned char *cr,
+                                    unsigned char *cb, unsigned char *out,
+                                    int rows, int cols, int mod )
+{
+    unsigned short* row1;
+    unsigned short* row2;
+    unsigned char* lum2;
+    int x, y;
+    int cr_r;
+    int crb_g;
+    int cb_b;
+    int cols_2 = cols / 2;
+    row1 = (unsigned short*) out;
+    row2 = row1 + cols + mod;
+    lum2 = lum + cols;
+    mod += cols + mod;
+    y = rows / 2;
+    while( y-- )
+    {
+        x = cols_2;
+        while( x-- )
+        {
+            register int L;
+            cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
+            crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
+                               + colortab[ *cb + 2*256 ];
+            cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
+            ++cr; ++cb;
+            L = *lum++;
+            *row1++ = (unsigned short)(rgb_2_pix[ L + cr_r ] |
+                                       rgb_2_pix[ L + crb_g ] |
+                                       rgb_2_pix[ L + cb_b ]);
+            L = *lum++;
+            *row1++ = (unsigned short)(rgb_2_pix[ L + cr_r ] |
+                                       rgb_2_pix[ L + crb_g ] |
+                                       rgb_2_pix[ L + cb_b ]);
+            /* Now, do second row.  */
+            L = *lum2++;
+            *row2++ = (unsigned short)(rgb_2_pix[ L + cr_r ] |
+                                       rgb_2_pix[ L + crb_g ] |
+                                       rgb_2_pix[ L + cb_b ]);
+            L = *lum2++;
+            *row2++ = (unsigned short)(rgb_2_pix[ L + cr_r ] |
+                                       rgb_2_pix[ L + crb_g ] |
+                                       rgb_2_pix[ L + cb_b ]);
+        }
+        /*
+         * These values are at the start of the next line, (due
+         * to the ++'s above),but they need to be at the start
+         * of the line after that.
+         */
+        lum  += cols;
+        lum2 += cols;
+        row1 += mod;
+        row2 += mod;
+    }
+}
+static void Color24DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix,
+                                    unsigned char *lum, unsigned char *cr,
+                                    unsigned char *cb, unsigned char *out,
+                                    int rows, int cols, int mod )
+{
+    unsigned int value;
+    unsigned char* row1;
+    unsigned char* row2;
+    unsigned char* lum2;
+    int x, y;
+    int cr_r;
+    int crb_g;
+    int cb_b;
+    int cols_2 = cols / 2;
+    row1 = out;
+    row2 = row1 + cols*3 + mod*3;
+    lum2 = lum + cols;
+    mod += cols + mod;
+    mod *= 3;
+    y = rows / 2;
+    while( y-- )
+    {
+        x = cols_2;
+        while( x-- )
+        {
+            register int L;
+            cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
+            crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
+                               + colortab[ *cb + 2*256 ];
+            cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
+            ++cr; ++cb;
+            L = *lum++;
+            value = (rgb_2_pix[ L + cr_r ] |
+                     rgb_2_pix[ L + crb_g ] |
+                     rgb_2_pix[ L + cb_b ]);
+            *row1++ = (value      ) & 0xFF;
+            *row1++ = (value >>  8) & 0xFF;
+            *row1++ = (value >> 16) & 0xFF;
+            L = *lum++;
+            value = (rgb_2_pix[ L + cr_r ] |
+                     rgb_2_pix[ L + crb_g ] |
+                     rgb_2_pix[ L + cb_b ]);
+            *row1++ = (value      ) & 0xFF;
+            *row1++ = (value >>  8) & 0xFF;
+            *row1++ = (value >> 16) & 0xFF;
+            /* Now, do second row.  */
+            L = *lum2++;
+            value = (rgb_2_pix[ L + cr_r ] |
+                     rgb_2_pix[ L + crb_g ] |
+                     rgb_2_pix[ L + cb_b ]);
+            *row2++ = (value      ) & 0xFF;
+            *row2++ = (value >>  8) & 0xFF;
+            *row2++ = (value >> 16) & 0xFF;
+            L = *lum2++;
+            value = (rgb_2_pix[ L + cr_r ] |
+                     rgb_2_pix[ L + crb_g ] |
+                     rgb_2_pix[ L + cb_b ]);
+            *row2++ = (value      ) & 0xFF;
+            *row2++ = (value >>  8) & 0xFF;
+            *row2++ = (value >> 16) & 0xFF;
+        }
+        /*
+         * These values are at the start of the next line, (due
+         * to the ++'s above),but they need to be at the start
+         * of the line after that.
+         */
+        lum  += cols;
+        lum2 += cols;
+        row1 += mod;
+        row2 += mod;
+    }
+}
+static void Color32DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix,
+                                    unsigned char *lum, unsigned char *cr,
+                                    unsigned char *cb, unsigned char *out,
+                                    int rows, int cols, int mod )
+{
+    unsigned int* row1;
+    unsigned int* row2;
+    unsigned char* lum2;
+    int x, y;
+    int cr_r;
+    int crb_g;
+    int cb_b;
+    int cols_2 = cols / 2;
+    row1 = (unsigned int*) out;
+    row2 = row1 + cols + mod;
+    lum2 = lum + cols;
+    mod += cols + mod;
+    y = rows / 2;
+    while( y-- )
+    {
+        x = cols_2;
+        while( x-- )
+        {
+            register int L;
+            cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
+            crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
+                               + colortab[ *cb + 2*256 ];
+            cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
+            ++cr; ++cb;
+            L = *lum++;
+            *row1++ = (rgb_2_pix[ L + cr_r ] |
+                       rgb_2_pix[ L + crb_g ] |
+                       rgb_2_pix[ L + cb_b ]);
+            L = *lum++;
+            *row1++ = (rgb_2_pix[ L + cr_r ] |
+                       rgb_2_pix[ L + crb_g ] |
+                       rgb_2_pix[ L + cb_b ]);
+            /* Now, do second row.  */
+            L = *lum2++;
+            *row2++ = (rgb_2_pix[ L + cr_r ] |
+                       rgb_2_pix[ L + crb_g ] |
+                       rgb_2_pix[ L + cb_b ]);
+            L = *lum2++;
+            *row2++ = (rgb_2_pix[ L + cr_r ] |
+                       rgb_2_pix[ L + crb_g ] |
+                       rgb_2_pix[ L + cb_b ]);
+        }
+        /*
+         * These values are at the start of the next line, (due
+         * to the ++'s above),but they need to be at the start
+         * of the line after that.
+         */
+        lum  += cols;
+        lum2 += cols;
+        row1 += mod;
+        row2 += mod;
+    }
+}
+/*
+ * In this function I make use of a nasty trick. The tables have the lower
+ * 16 bits replicated in the upper 16. This means I can write ints and get
+ * the horisontal doubling for free (almost).
+ */
+static void Color16DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix,
+                                    unsigned char *lum, unsigned char *cr,
+                                    unsigned char *cb, unsigned char *out,
+                                    int rows, int cols, int mod )
+{
+    unsigned int* row1 = (unsigned int*) out;
+    const int next_row = cols+(mod/2);
+    unsigned int* row2 = row1 + 2*next_row;
+    unsigned char* lum2;
+    int x, y;
+    int cr_r;
+    int crb_g;
+    int cb_b;
+    int cols_2 = cols / 2;
+    lum2 = lum + cols;
+    mod = (next_row * 3) + (mod/2);
+    y = rows / 2;
+    while( y-- )
+    {
+        x = cols_2;
+        while( x-- )
+        {
+            register int L;
+            cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
+            crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
+                               + colortab[ *cb + 2*256 ];
+            cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
+            ++cr; ++cb;
+            L = *lum++;
+            row1[0] = row1[next_row] = (rgb_2_pix[ L + cr_r ] |
+                                        rgb_2_pix[ L + crb_g ] |
+                                        rgb_2_pix[ L + cb_b ]);
+            row1++;
+            L = *lum++;
+            row1[0] = row1[next_row] = (rgb_2_pix[ L + cr_r ] |
+                                        rgb_2_pix[ L + crb_g ] |
+                                        rgb_2_pix[ L + cb_b ]);
+            row1++;
+            /* Now, do second row. */
+            L = *lum2++;
+            row2[0] = row2[next_row] = (rgb_2_pix[ L + cr_r ] |
+                                        rgb_2_pix[ L + crb_g ] |
+                                        rgb_2_pix[ L + cb_b ]);
+            row2++;
+            L = *lum2++;
+            row2[0] = row2[next_row] = (rgb_2_pix[ L + cr_r ] |
+                                        rgb_2_pix[ L + crb_g ] |
+                                        rgb_2_pix[ L + cb_b ]);
+            row2++;
+        }
+        /*
+         * These values are at the start of the next line, (due
+         * to the ++'s above),but they need to be at the start
+         * of the line after that.
+         */
+        lum  += cols;
+        lum2 += cols;
+        row1 += mod;
+        row2 += mod;
+    }
+}
+static void Color24DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix,
+                                    unsigned char *lum, unsigned char *cr,
+                                    unsigned char *cb, unsigned char *out,
+                                    int rows, int cols, int mod )
+{
+    unsigned int value;
+    unsigned char* row1 = out;
+    const int next_row = (cols*2 + mod) * 3;
+    unsigned char* row2 = row1 + 2*next_row;
+    unsigned char* lum2;
+    int x, y;
+    int cr_r;
+    int crb_g;
+    int cb_b;
+    int cols_2 = cols / 2;
+    lum2 = lum + cols;
+    mod = next_row*3 + mod*3;
+    y = rows / 2;
+    while( y-- )
+    {
+        x = cols_2;
+        while( x-- )
+        {
+            register int L;
+            cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
+            crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
+                               + colortab[ *cb + 2*256 ];
+            cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
+            ++cr; ++cb;
+            L = *lum++;
+            value = (rgb_2_pix[ L + cr_r ] |
+                     rgb_2_pix[ L + crb_g ] |
+                     rgb_2_pix[ L + cb_b ]);
+            row1[0+0] = row1[3+0] = row1[next_row+0] = row1[next_row+3+0] =
+                     (value      ) & 0xFF;
+            row1[0+1] = row1[3+1] = row1[next_row+1] = row1[next_row+3+1] =
+                     (value >>  8) & 0xFF;
+            row1[0+2] = row1[3+2] = row1[next_row+2] = row1[next_row+3+2] =
+                     (value >> 16) & 0xFF;
+            row1 += 2*3;
+            L = *lum++;
+            value = (rgb_2_pix[ L + cr_r ] |
+                     rgb_2_pix[ L + crb_g ] |
+                     rgb_2_pix[ L + cb_b ]);
+            row1[0+0] = row1[3+0] = row1[next_row+0] = row1[next_row+3+0] =
+                     (value      ) & 0xFF;
+            row1[0+1] = row1[3+1] = row1[next_row+1] = row1[next_row+3+1] =
+                     (value >>  8) & 0xFF;
+            row1[0+2] = row1[3+2] = row1[next_row+2] = row1[next_row+3+2] =
+                     (value >> 16) & 0xFF;
+            row1 += 2*3;
+            /* Now, do second row. */
+            L = *lum2++;
+            value = (rgb_2_pix[ L + cr_r ] |
+                     rgb_2_pix[ L + crb_g ] |
+                     rgb_2_pix[ L + cb_b ]);
+            row2[0+0] = row2[3+0] = row2[next_row+0] = row2[next_row+3+0] =
+                     (value      ) & 0xFF;
+            row2[0+1] = row2[3+1] = row2[next_row+1] = row2[next_row+3+1] =
+                     (value >>  8) & 0xFF;
+            row2[0+2] = row2[3+2] = row2[next_row+2] = row2[next_row+3+2] =
+                     (value >> 16) & 0xFF;
+            row2 += 2*3;
+            L = *lum2++;
+            value = (rgb_2_pix[ L + cr_r ] |
+                     rgb_2_pix[ L + crb_g ] |
+                     rgb_2_pix[ L + cb_b ]);
+            row2[0+0] = row2[3+0] = row2[next_row+0] = row2[next_row+3+0] =
+                     (value      ) & 0xFF;
+            row2[0+1] = row2[3+1] = row2[next_row+1] = row2[next_row+3+1] =
+                     (value >>  8) & 0xFF;
+            row2[0+2] = row2[3+2] = row2[next_row+2] = row2[next_row+3+2] =
+                     (value >> 16) & 0xFF;
+            row2 += 2*3;
+        }
+        /*
+         * These values are at the start of the next line, (due
+         * to the ++'s above),but they need to be at the start
+         * of the line after that.
+         */
+        lum  += cols;
+        lum2 += cols;
+        row1 += mod;
+        row2 += mod;
+    }
+}
+static void Color32DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix,
+                                    unsigned char *lum, unsigned char *cr,
+                                    unsigned char *cb, unsigned char *out,
+                                    int rows, int cols, int mod )
+{
+    unsigned int* row1 = (unsigned int*) out;
+    const int next_row = cols*2+mod;
+    unsigned int* row2 = row1 + 2*next_row;
+    unsigned char* lum2;
+    int x, y;
+    int cr_r;
+    int crb_g;
+    int cb_b;
+    int cols_2 = cols / 2;
+    lum2 = lum + cols;
+    mod = (next_row * 3) + mod;
+    y = rows / 2;
+    while( y-- )
+    {
+        x = cols_2;
+        while( x-- )
+        {
+            register int L;
+            cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
+            crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
+                               + colortab[ *cb + 2*256 ];
+            cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
+            ++cr; ++cb;
+            L = *lum++;
+            row1[0] = row1[1] = row1[next_row] = row1[next_row+1] =
+                                       (rgb_2_pix[ L + cr_r ] |
+                                        rgb_2_pix[ L + crb_g ] |
+                                        rgb_2_pix[ L + cb_b ]);
+            row1 += 2;
+            L = *lum++;
+            row1[0] = row1[1] = row1[next_row] = row1[next_row+1] =
+                                       (rgb_2_pix[ L + cr_r ] |
+                                        rgb_2_pix[ L + crb_g ] |
+                                        rgb_2_pix[ L + cb_b ]);
+            row1 += 2;
+            /* Now, do second row. */
+            L = *lum2++;
+            row2[0] = row2[1] = row2[next_row] = row2[next_row+1] =
+                                       (rgb_2_pix[ L + cr_r ] |
+                                        rgb_2_pix[ L + crb_g ] |
+                                        rgb_2_pix[ L + cb_b ]);
+            row2 += 2;
+            L = *lum2++;
+            row2[0] = row2[1] = row2[next_row] = row2[next_row+1] =
+                                       (rgb_2_pix[ L + cr_r ] |
+                                        rgb_2_pix[ L + crb_g ] |
+                                        rgb_2_pix[ L + cb_b ]);
+            row2 += 2;
+        }
+        /*
+         * These values are at the start of the next line, (due
+         * to the ++'s above),but they need to be at the start
+         * of the line after that.
+         */
+        lum  += cols;
+        lum2 += cols;
+        row1 += mod;
+        row2 += mod;
+    }
+}
+static void Color16DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix,
+                                    unsigned char *lum, unsigned char *cr,
+                                    unsigned char *cb, unsigned char *out,
+                                    int rows, int cols, int mod )
+{
+    unsigned short* row;
+    int x, y;
+    int cr_r;
+    int crb_g;
+    int cb_b;
+    int cols_2 = cols / 2;
+    row = (unsigned short*) out;
+    y = rows;
+    while( y-- )
+    {
+        x = cols_2;
+        while( x-- )
+        {
+            register int L;
+            cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
+            crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
+                               + colortab[ *cb + 2*256 ];
+            cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
+            cr += 4; cb += 4;
+            L = *lum; lum += 2;
+            *row++ = (unsigned short)(rgb_2_pix[ L + cr_r ] |
+                                      rgb_2_pix[ L + crb_g ] |
+                                      rgb_2_pix[ L + cb_b ]);
+            L = *lum; lum += 2;
+            *row++ = (unsigned short)(rgb_2_pix[ L + cr_r ] |
+                                      rgb_2_pix[ L + crb_g ] |
+                                      rgb_2_pix[ L + cb_b ]);
+        }
+        row += mod;
+    }
+}
+static void Color24DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix,
+                                    unsigned char *lum, unsigned char *cr,
+                                    unsigned char *cb, unsigned char *out,
+                                    int rows, int cols, int mod )
+{
+    unsigned int value;
+    unsigned char* row;
+    int x, y;
+    int cr_r;
+    int crb_g;
+    int cb_b;
+    int cols_2 = cols / 2;
+    row = (unsigned char*) out;
+    mod *= 3;
+    y = rows;
+    while( y-- )
+    {
+        x = cols_2;
+        while( x-- )
+        {
+            register int L;
+            cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
+            crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
+                               + colortab[ *cb + 2*256 ];
+            cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
+            cr += 4; cb += 4;
+            L = *lum; lum += 2;
+            value = (rgb_2_pix[ L + cr_r ] |
+                     rgb_2_pix[ L + crb_g ] |
+                     rgb_2_pix[ L + cb_b ]);
+            *row++ = (value      ) & 0xFF;
+            *row++ = (value >>  8) & 0xFF;
+            *row++ = (value >> 16) & 0xFF;
+            L = *lum; lum += 2;
+            value = (rgb_2_pix[ L + cr_r ] |
+                     rgb_2_pix[ L + crb_g ] |
+                     rgb_2_pix[ L + cb_b ]);
+            *row++ = (value      ) & 0xFF;
+            *row++ = (value >>  8) & 0xFF;
+            *row++ = (value >> 16) & 0xFF;
+        }
+        row += mod;
+    }
+}
+static void Color32DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix,
+                                    unsigned char *lum, unsigned char *cr,
+                                    unsigned char *cb, unsigned char *out,
+                                    int rows, int cols, int mod )
+{
+    unsigned int* row;
+    int x, y;
+    int cr_r;
+    int crb_g;
+    int cb_b;
+    int cols_2 = cols / 2;
+    row = (unsigned int*) out;
+    y = rows;
+    while( y-- )
+    {
+        x = cols_2;
+        while( x-- )
+        {
+            register int L;
+            cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
+            crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
+                               + colortab[ *cb + 2*256 ];
+            cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
+            cr += 4; cb += 4;
+            L = *lum; lum += 2;
+            *row++ = (rgb_2_pix[ L + cr_r ] |
+                       rgb_2_pix[ L + crb_g ] |
+                       rgb_2_pix[ L + cb_b ]);
+            L = *lum; lum += 2;
+            *row++ = (rgb_2_pix[ L + cr_r ] |
+                       rgb_2_pix[ L + crb_g ] |
+                       rgb_2_pix[ L + cb_b ]);
+        }
+        row += mod;
+    }
+}
+/*
+ * In this function I make use of a nasty trick. The tables have the lower
+ * 16 bits replicated in the upper 16. This means I can write ints and get
+ * the horisontal doubling for free (almost).
+ */
+static void Color16DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix,
+                                    unsigned char *lum, unsigned char *cr,
+                                    unsigned char *cb, unsigned char *out,
+                                    int rows, int cols, int mod )
+{
+    unsigned int* row = (unsigned int*) out;
+    const int next_row = cols+(mod/2);
+    int x, y;
+    int cr_r;
+    int crb_g;
+    int cb_b;
+    int cols_2 = cols / 2;
+    y = rows;
+    while( y-- )
+    {
+        x = cols_2;
+        while( x-- )
+        {
+            register int L;
+            cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
+            crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
+                               + colortab[ *cb + 2*256 ];
+            cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
+            cr += 4; cb += 4;
+            L = *lum; lum += 2;
+            row[0] = row[next_row] = (rgb_2_pix[ L + cr_r ] |
+                                        rgb_2_pix[ L + crb_g ] |
+                                        rgb_2_pix[ L + cb_b ]);
+            row++;
+            L = *lum; lum += 2;
+            row[0] = row[next_row] = (rgb_2_pix[ L + cr_r ] |
+                                        rgb_2_pix[ L + crb_g ] |
+                                        rgb_2_pix[ L + cb_b ]);
+            row++;
+        }
+        row += next_row;
+    }
+}
+static void Color24DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix,
+                                    unsigned char *lum, unsigned char *cr,
+                                    unsigned char *cb, unsigned char *out,
+                                    int rows, int cols, int mod )
+{
+    unsigned int value;
+    unsigned char* row = out;
+    const int next_row = (cols*2 + mod) * 3;
+    int x, y;
+    int cr_r;
+    int crb_g;
+    int cb_b;
+    int cols_2 = cols / 2;
+    y = rows;
+    while( y-- )
+    {
+        x = cols_2;
+        while( x-- )
+        {
+            register int L;
+            cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
+            crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
+                               + colortab[ *cb + 2*256 ];
+            cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
+            cr += 4; cb += 4;
+            L = *lum; lum += 2;
+            value = (rgb_2_pix[ L + cr_r ] |
+                     rgb_2_pix[ L + crb_g ] |
+                     rgb_2_pix[ L + cb_b ]);
+            row[0+0] = row[3+0] = row[next_row+0] = row[next_row+3+0] =
+                     (value      ) & 0xFF;
+            row[0+1] = row[3+1] = row[next_row+1] = row[next_row+3+1] =
+                     (value >>  8) & 0xFF;
+            row[0+2] = row[3+2] = row[next_row+2] = row[next_row+3+2] =
+                     (value >> 16) & 0xFF;
+            row += 2*3;
+            L = *lum; lum += 2;
+            value = (rgb_2_pix[ L + cr_r ] |
+                     rgb_2_pix[ L + crb_g ] |
+                     rgb_2_pix[ L + cb_b ]);
+            row[0+0] = row[3+0] = row[next_row+0] = row[next_row+3+0] =
+                     (value      ) & 0xFF;
+            row[0+1] = row[3+1] = row[next_row+1] = row[next_row+3+1] =
+                     (value >>  8) & 0xFF;
+            row[0+2] = row[3+2] = row[next_row+2] = row[next_row+3+2] =
+                     (value >> 16) & 0xFF;
+            row += 2*3;
+        }
+        row += next_row;
+    }
+}
+static void Color32DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix,
+                                    unsigned char *lum, unsigned char *cr,
+                                    unsigned char *cb, unsigned char *out,
+                                    int rows, int cols, int mod )
+{
+    unsigned int* row = (unsigned int*) out;
+    const int next_row = cols*2+mod;
+    int x, y;
+    int cr_r;
+    int crb_g;
+    int cb_b;
+    int cols_2 = cols / 2;
+    mod+=mod;
+    y = rows;
+    while( y-- )
+    {
+        x = cols_2;
+        while( x-- )
+        {
+            register int L;
+            cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
+            crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
+                               + colortab[ *cb + 2*256 ];
+            cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
+            cr += 4; cb += 4;
+            L = *lum; lum += 2;
+            row[0] = row[1] = row[next_row] = row[next_row+1] =
+                                       (rgb_2_pix[ L + cr_r ] |
+                                        rgb_2_pix[ L + crb_g ] |
+                                        rgb_2_pix[ L + cb_b ]);
+            row += 2;
+            L = *lum; lum += 2;
+            row[0] = row[1] = row[next_row] = row[next_row+1] =
+                                       (rgb_2_pix[ L + cr_r ] |
+                                        rgb_2_pix[ L + crb_g ] |
+                                        rgb_2_pix[ L + cb_b ]);
+            row += 2;
+        }
+        row += next_row;
+    }
+}
+/*
+ * How many 1 bits are there in the Uint32.
+ * Low performance, do not call often.
+ */
+static int number_of_bits_set( Uint32 a )
+{
+    if(!a) return 0;
+    if(a & 1) return 1 + number_of_bits_set(a >> 1);
+    return(number_of_bits_set(a >> 1));
+}
+/*
+ * How many 0 bits are there at least significant end of Uint32.
+ * Low performance, do not call often.
+ */
+static int free_bits_at_bottom( Uint32 a )
+{
+      /* assume char is 8 bits */
+    if(!a) return sizeof(Uint32) * 8;
+    if(((Sint32)a) & 1l) return 0;
+    return 1 + free_bits_at_bottom ( a >> 1);
+}
+SDL_Overlay *SDL_CreateYUV_SW(_THIS, int width, int height, Uint32 format, SDL_Surface *display)
+{
+        SDL_Overlay *overlay;
+        struct private_yuvhwdata *swdata;
+        int *Cr_r_tab;
+        int *Cr_g_tab;
+        int *Cb_g_tab;
+        int *Cb_b_tab;
+        Uint32 *r_2_pix_alloc;
+        Uint32 *g_2_pix_alloc;
+        Uint32 *b_2_pix_alloc;
+        int i;
+        int CR, CB;
+        Uint32 Rmask, Gmask, Bmask;
+        /* Only RGB packed pixel conversion supported */
+        if ( (display->format->BytesPerPixel != 2) &&
+             (display->format->BytesPerPixel != 3) &&
+             (display->format->BytesPerPixel != 4) ) {
+                SDL_SetError("Can't use YUV data on non 16/24/32 bit surfaces");
+                return(NULL);
+        }
+        /* Verify that we support the format */
+        switch (format) {
+            case SDL_YV12_OVERLAY:
+            case SDL_IYUV_OVERLAY:
+            case SDL_YUY2_OVERLAY:
+            case SDL_UYVY_OVERLAY:
+            case SDL_YVYU_OVERLAY:
+                break;
+            default:
+                SDL_SetError("Unsupported YUV format");
+                return(NULL);
+        }
+        /* Create the overlay structure */
+        overlay = (SDL_Overlay *)SDL_malloc(sizeof *overlay);
+        if ( overlay == NULL ) {
+                SDL_OutOfMemory();
+                return(NULL);
+        }
+        SDL_memset(overlay, 0, (sizeof *overlay));
+        /* Fill in the basic members */
+        overlay->format = format;
+        overlay->w = width;
+        overlay->h = height;
+        /* Set up the YUV surface function structure */
+        overlay->hwfuncs = &sw_yuvfuncs;
+        /* Create the pixel data and lookup tables */
+        swdata = (struct private_yuvhwdata *)SDL_malloc(sizeof *swdata);
+        overlay->hwdata = swdata;
+        if ( swdata == NULL ) {
+                SDL_OutOfMemory();
+                SDL_FreeYUVOverlay(overlay);
+                return(NULL);
+        }
+        swdata->stretch = NULL;
+        swdata->display = display;
+        swdata->pixels = (Uint8 *) SDL_malloc(width*height*2);
+        swdata->colortab = (int *)SDL_malloc(4*256*sizeof(int));
+        Cr_r_tab = &swdata->colortab[0*256];
+        Cr_g_tab = &swdata->colortab[1*256];
+        Cb_g_tab = &swdata->colortab[2*256];
+        Cb_b_tab = &swdata->colortab[3*256];
+        swdata->rgb_2_pix = (Uint32 *)SDL_malloc(3*768*sizeof(Uint32));
+        r_2_pix_alloc = &swdata->rgb_2_pix[0*768];
+        g_2_pix_alloc = &swdata->rgb_2_pix[1*768];
+        b_2_pix_alloc = &swdata->rgb_2_pix[2*768];
+        if ( ! swdata->pixels || ! swdata->colortab || ! swdata->rgb_2_pix ) {
+                SDL_OutOfMemory();
+                SDL_FreeYUVOverlay(overlay);
+                return(NULL);
+        }
+        /* Generate the tables for the display surface */
+        for (i=0; i<256; i++) {
+                /* Gamma correction (luminescence table) and chroma correction
+                   would be done here.  See the Berkeley mpeg_play sources.
+                */
+                CB = CR = (i-128);
+                Cr_r_tab[i] = (int) ( (0.419/0.299) * CR);
+                Cr_g_tab[i] = (int) (-(0.299/0.419) * CR);
+                Cb_g_tab[i] = (int) (-(0.114/0.331) * CB); 
+                Cb_b_tab[i] = (int) ( (0.587/0.331) * CB);
+        }
+        /* 
+         * Set up entries 0-255 in rgb-to-pixel value tables.
+         */
+        Rmask = display->format->Rmask;
+        Gmask = display->format->Gmask;
+        Bmask = display->format->Bmask;
+        for ( i=0; i<256; ++i ) {
+                r_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Rmask));
+                r_2_pix_alloc[i+256] <<= free_bits_at_bottom(Rmask);
+                g_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Gmask));
+                g_2_pix_alloc[i+256] <<= free_bits_at_bottom(Gmask);
+                b_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Bmask));
+                b_2_pix_alloc[i+256] <<= free_bits_at_bottom(Bmask);
+        }
+        /*
+         * If we have 16-bit output depth, then we double the value
+         * in the top word. This means that we can write out both
+         * pixels in the pixel doubling mode with one op. It is 
+         * harmless in the normal case as storing a 32-bit value
+         * through a short pointer will lose the top bits anyway.
+         */
+        if( display->format->BytesPerPixel == 2 ) {
+                for ( i=0; i<256; ++i ) {
+                        r_2_pix_alloc[i+256] |= (r_2_pix_alloc[i+256]) << 16;
+                        g_2_pix_alloc[i+256] |= (g_2_pix_alloc[i+256]) << 16;
+                        b_2_pix_alloc[i+256] |= (b_2_pix_alloc[i+256]) << 16;
+                }
+        }
+        /*
+         * Spread out the values we have to the rest of the array so that
+         * we do not need to check for overflow.
+         */
+        for ( i=0; i<256; ++i ) {
+                r_2_pix_alloc[i] = r_2_pix_alloc[256];
+                r_2_pix_alloc[i+512] = r_2_pix_alloc[511];
+                g_2_pix_alloc[i] = g_2_pix_alloc[256];
+                g_2_pix_alloc[i+512] = g_2_pix_alloc[511];
+                b_2_pix_alloc[i] = b_2_pix_alloc[256];
+                b_2_pix_alloc[i+512] = b_2_pix_alloc[511];
+        }
+        /* You have chosen wisely... */
+        switch (format) {
+            case SDL_YV12_OVERLAY:
+            case SDL_IYUV_OVERLAY:
+                if ( display->format->BytesPerPixel == 2 ) {
+#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
+                        /* inline assembly functions */
+                        if ( SDL_HasMMX() && (Rmask == 0xF800) &&
+                                             (Gmask == 0x07E0) &&
+                                             (Bmask == 0x001F) &&
+                                             (width & 15) == 0) {
+/*printf("Using MMX 16-bit 565 dither\n");*/
+                                swdata->Display1X = Color565DitherYV12MMX1X;
+                        } else {
+/*printf("Using C 16-bit dither\n");*/
+                                swdata->Display1X = Color16DitherYV12Mod1X;
+                        }
+#else
+                        swdata->Display1X = Color16DitherYV12Mod1X;
+#endif
+                        swdata->Display2X = Color16DitherYV12Mod2X;
+                }
+                if ( display->format->BytesPerPixel == 3 ) {
+                        swdata->Display1X = Color24DitherYV12Mod1X;
+                        swdata->Display2X = Color24DitherYV12Mod2X;
+                }
+                if ( display->format->BytesPerPixel == 4 ) {
+#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
+                        /* inline assembly functions */
+                        if ( SDL_HasMMX() && (Rmask == 0x00FF0000) &&
+                                             (Gmask == 0x0000FF00) &&
+                                             (Bmask == 0x000000FF) && 
+                                             (width & 15) == 0) {
+/*printf("Using MMX 32-bit dither\n");*/
+                                swdata->Display1X = ColorRGBDitherYV12MMX1X;
+                        } else {
+/*printf("Using C 32-bit dither\n");*/
+                                swdata->Display1X = Color32DitherYV12Mod1X;
+                        }
+#else
+                        swdata->Display1X = Color32DitherYV12Mod1X;
+#endif
+                        swdata->Display2X = Color32DitherYV12Mod2X;
+                }
+                break;
+            case SDL_YUY2_OVERLAY:
+            case SDL_UYVY_OVERLAY:
+            case SDL_YVYU_OVERLAY:
+                if ( display->format->BytesPerPixel == 2 ) {
+                        swdata->Display1X = Color16DitherYUY2Mod1X;
+                        swdata->Display2X = Color16DitherYUY2Mod2X;
+                }
+                if ( display->format->BytesPerPixel == 3 ) {
+                        swdata->Display1X = Color24DitherYUY2Mod1X;
+                        swdata->Display2X = Color24DitherYUY2Mod2X;
+                }
+                if ( display->format->BytesPerPixel == 4 ) {
+                        swdata->Display1X = Color32DitherYUY2Mod1X;
+                        swdata->Display2X = Color32DitherYUY2Mod2X;
+                }
+                break;
+            default:
+                /* We should never get here (caught above) */
+                break;
+        }
+        /* Find the pitch and offset values for the overlay */
+        overlay->pitches = swdata->pitches;
+        overlay->pixels = swdata->planes;
+        switch (format) {
+            case SDL_YV12_OVERLAY:
+            case SDL_IYUV_OVERLAY:
+                overlay->pitches[0] = overlay->w;
+                overlay->pitches[1] = overlay->pitches[0] / 2;
+                overlay->pitches[2] = overlay->pitches[0] / 2;
+                overlay->pixels[0] = swdata->pixels;
+                overlay->pixels[1] = overlay->pixels[0] +
+                                     overlay->pitches[0] * overlay->h;
+                overlay->pixels[2] = overlay->pixels[1] +
+                                     overlay->pitches[1] * overlay->h / 2;
+                overlay->planes = 3;
+                break;
+            case SDL_YUY2_OVERLAY:
+            case SDL_UYVY_OVERLAY:
+            case SDL_YVYU_OVERLAY:
+                overlay->pitches[0] = overlay->w*2;
+                overlay->pixels[0] = swdata->pixels;
+                overlay->planes = 1;
+                break;
+            default:
+                /* We should never get here (caught above) */
+                break;
+        }
+        /* We're all done.. */
+        return(overlay);
+}
+int SDL_LockYUV_SW(_THIS, SDL_Overlay *overlay)
+{
+        return(0);
+}
+void SDL_UnlockYUV_SW(_THIS, SDL_Overlay *overlay)
+{
+        return;
+}
+int SDL_DisplayYUV_SW(_THIS, SDL_Overlay *overlay, SDL_Rect *src, SDL_Rect *dst)
+{
+        struct private_yuvhwdata *swdata;
+        int stretch;
+        int scale_2x;
+        SDL_Surface *display;
+        Uint8 *lum, *Cr, *Cb;
+        Uint8 *dstp;
+        int mod;
+        swdata = overlay->hwdata;
+        stretch = 0;
+        scale_2x = 0;
+        if ( src->x || src->y || src->w < overlay->w || src->h < overlay->h ) {
+                /* The source rectangle has been clipped.
+                   Using a scratch surface is easier than adding clipped
+                   source support to all the blitters, plus that would
+                   slow them down in the general unclipped case.
+                */
+                stretch = 1;
+        } else if ( (src->w != dst->w) || (src->h != dst->h) ) {
+                if ( (dst->w == 2*src->w) &&
+                     (dst->h == 2*src->h) ) {
+                        scale_2x = 1;
+                } else {
+                        stretch = 1;
+                }
+        }
+        if ( stretch ) {
+                if ( ! swdata->stretch ) {
+                        display = swdata->display;
+                        swdata->stretch = SDL_CreateRGBSurface(
+                                SDL_SWSURFACE,
+                                overlay->w, overlay->h,
+                                display->format->BitsPerPixel,
+                                display->format->Rmask,
+                                display->format->Gmask,
+                                display->format->Bmask, 0);
+                        if ( ! swdata->stretch ) {
+                                return(-1);
+                        }
+                }
+                display = swdata->stretch;
+        } else {
+                display = swdata->display;
+        }
+        switch (overlay->format) {
+            case SDL_YV12_OVERLAY:
+                lum = overlay->pixels[0];
+                Cr =  overlay->pixels[1];
+                Cb =  overlay->pixels[2];
+                break;
+            case SDL_IYUV_OVERLAY:
+                lum = overlay->pixels[0];
+                Cr =  overlay->pixels[2];
+                Cb =  overlay->pixels[1];
+                break;
+            case SDL_YUY2_OVERLAY:
+                lum = overlay->pixels[0];
+                Cr = lum + 3;
+                Cb = lum + 1;
+                break;
+            case SDL_UYVY_OVERLAY:
+                lum = overlay->pixels[0]+1;
+                Cr = lum + 1;
+                Cb = lum - 1;
+                break;
+            case SDL_YVYU_OVERLAY:
+                lum = overlay->pixels[0];
+                Cr = lum + 1;
+                Cb = lum + 3;
+                break;
+            default:
+                SDL_SetError("Unsupported YUV format in blit");
+                return(-1);
+        }
+        if ( SDL_MUSTLOCK(display) ) {
+                if ( SDL_LockSurface(display) < 0 ) {
+                        return(-1);
+                }
+        }
+        if ( stretch ) {
+                dstp = (Uint8 *)swdata->stretch->pixels;
+        } else {
+                dstp = (Uint8 *)display->pixels
+                        + dst->x * display->format->BytesPerPixel
+                        + dst->y * display->pitch;
+        }
+        mod = (display->pitch / display->format->BytesPerPixel);
+        if ( scale_2x ) {
+                mod -= (overlay->w * 2);
+                swdata->Display2X(swdata->colortab, swdata->rgb_2_pix,
+                                  lum, Cr, Cb, dstp, overlay->h, overlay->w, mod);
+        } else {
+                mod -= overlay->w;
+                swdata->Display1X(swdata->colortab, swdata->rgb_2_pix,
+                                  lum, Cr, Cb, dstp, overlay->h, overlay->w, mod);
+        }
+        if ( SDL_MUSTLOCK(display) ) {
+                SDL_UnlockSurface(display);
+        }
+        if ( stretch ) {
+                display = swdata->display;
+                SDL_SoftStretch(swdata->stretch, src, display, dst);
+        }
+        SDL_UpdateRects(display, 1, dst);
+        return(0);
+}
+void SDL_FreeYUV_SW(_THIS, SDL_Overlay *overlay)
+{
+        struct private_yuvhwdata *swdata;
+        swdata = overlay->hwdata;
+        if ( swdata ) {
+                if ( swdata->stretch ) {
+                        SDL_FreeSurface(swdata->stretch);
+                }
+                if ( swdata->pixels ) {
+                        SDL_free(swdata->pixels);
+                }
+                if ( swdata->colortab ) {
+                        SDL_free(swdata->colortab);
+                }
+                if ( swdata->rgb_2_pix ) {
+                        SDL_free(swdata->rgb_2_pix);
+                }
+                SDL_free(swdata);
+                overlay->hwdata = NULL;
+        }
+}