diff options
author | Franklin Wei <git@fwei.tk> | 2017-01-21 15:18:31 -0500 |
---|---|---|
committer | Franklin Wei <git@fwei.tk> | 2017-12-23 21:01:26 -0500 |
commit | a855d6202536ff28e5aae4f22a0f31d8f5b325d0 (patch) | |
tree | 8c75f224dd64ed360505afa8843d016b0d75000b /apps/plugins/sdl/src/video/SDL_RLEaccel.c | |
parent | 01c6dcf6c7b9bb1ad2fa0450f99bacc5f3d3e04b (diff) | |
download | rockbox-a855d6202536ff28e5aae4f22a0f31d8f5b325d0.tar.gz rockbox-a855d6202536ff28e5aae4f22a0f31d8f5b325d0.zip |
Port of Duke Nukem 3D
This ports Fabien Sanglard's Chocolate Duke to run on a version of SDL
for Rockbox.
Change-Id: I8f2c4c78af19de10c1633ed7bb7a997b43256dd9
Diffstat (limited to 'apps/plugins/sdl/src/video/SDL_RLEaccel.c')
-rw-r--r-- | apps/plugins/sdl/src/video/SDL_RLEaccel.c | 1941 |
1 files changed, 1941 insertions, 0 deletions
diff --git a/apps/plugins/sdl/src/video/SDL_RLEaccel.c b/apps/plugins/sdl/src/video/SDL_RLEaccel.c new file mode 100644 index 0000000000..d4b191c272 --- /dev/null +++ b/apps/plugins/sdl/src/video/SDL_RLEaccel.c | |||
@@ -0,0 +1,1941 @@ | |||
1 | /* | ||
2 | SDL - Simple DirectMedia Layer | ||
3 | Copyright (C) 1997-2012 Sam Lantinga | ||
4 | |||
5 | This library is free software; you can redistribute it and/or | ||
6 | modify it under the terms of the GNU Lesser General Public | ||
7 | License as published by the Free Software Foundation; either | ||
8 | version 2.1 of the License, or (at your option) any later version. | ||
9 | |||
10 | This library is distributed in the hope that it will be useful, | ||
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Lesser General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Lesser General Public | ||
16 | License along with this library; if not, write to the Free Software | ||
17 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
18 | |||
19 | Sam Lantinga | ||
20 | slouken@libsdl.org | ||
21 | */ | ||
22 | #include "SDL_config.h" | ||
23 | |||
24 | /* | ||
25 | * RLE encoding for software colorkey and alpha-channel acceleration | ||
26 | * | ||
27 | * Original version by Sam Lantinga | ||
28 | * | ||
29 | * Mattias Engdegård (Yorick): Rewrite. New encoding format, encoder and | ||
30 | * decoder. Added per-surface alpha blitter. Added per-pixel alpha | ||
31 | * format, encoder and blitter. | ||
32 | * | ||
33 | * Many thanks to Xark and johns for hints, benchmarks and useful comments | ||
34 | * leading to this code. | ||
35 | * | ||
36 | * Welcome to Macro Mayhem. | ||
37 | */ | ||
38 | |||
39 | /* | ||
40 | * The encoding translates the image data to a stream of segments of the form | ||
41 | * | ||
42 | * <skip> <run> <data> | ||
43 | * | ||
44 | * where <skip> is the number of transparent pixels to skip, | ||
45 | * <run> is the number of opaque pixels to blit, | ||
46 | * and <data> are the pixels themselves. | ||
47 | * | ||
48 | * This basic structure is used both for colorkeyed surfaces, used for simple | ||
49 | * binary transparency and for per-surface alpha blending, and for surfaces | ||
50 | * with per-pixel alpha. The details differ, however: | ||
51 | * | ||
52 | * Encoding of colorkeyed surfaces: | ||
53 | * | ||
54 | * Encoded pixels always have the same format as the target surface. | ||
55 | * <skip> and <run> are unsigned 8 bit integers, except for 32 bit depth | ||
56 | * where they are 16 bit. This makes the pixel data aligned at all times. | ||
57 | * Segments never wrap around from one scan line to the next. | ||
58 | * | ||
59 | * The end of the sequence is marked by a zero <skip>,<run> pair at the * | ||
60 | * beginning of a line. | ||
61 | * | ||
62 | * Encoding of surfaces with per-pixel alpha: | ||
63 | * | ||
64 | * The sequence begins with a struct RLEDestFormat describing the target | ||
65 | * pixel format, to provide reliable un-encoding. | ||
66 | * | ||
67 | * Each scan line is encoded twice: First all completely opaque pixels, | ||
68 | * encoded in the target format as described above, and then all | ||
69 | * partially transparent (translucent) pixels (where 1 <= alpha <= 254), | ||
70 | * in the following 32-bit format: | ||
71 | * | ||
72 | * For 32-bit targets, each pixel has the target RGB format but with | ||
73 | * the alpha value occupying the highest 8 bits. The <skip> and <run> | ||
74 | * counts are 16 bit. | ||
75 | * | ||
76 | * For 16-bit targets, each pixel has the target RGB format, but with | ||
77 | * the middle component (usually green) shifted 16 steps to the left, | ||
78 | * and the hole filled with the 5 most significant bits of the alpha value. | ||
79 | * i.e. if the target has the format rrrrrggggggbbbbb, | ||
80 | * the encoded pixel will be 00000gggggg00000rrrrr0aaaaabbbbb. | ||
81 | * The <skip> and <run> counts are 8 bit for the opaque lines, 16 bit | ||
82 | * for the translucent lines. Two padding bytes may be inserted | ||
83 | * before each translucent line to keep them 32-bit aligned. | ||
84 | * | ||
85 | * The end of the sequence is marked by a zero <skip>,<run> pair at the | ||
86 | * beginning of an opaque line. | ||
87 | */ | ||
88 | |||
89 | #include "SDL_video.h" | ||
90 | #include "SDL_sysvideo.h" | ||
91 | #include "SDL_blit.h" | ||
92 | #include "SDL_RLEaccel_c.h" | ||
93 | |||
94 | /* Force MMX to 0; this blows up on almost every major compiler now. --ryan. */ | ||
95 | #if 0 && defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) && SDL_ASSEMBLY_ROUTINES | ||
96 | #define MMX_ASMBLIT | ||
97 | #endif | ||
98 | |||
99 | #ifdef MMX_ASMBLIT | ||
100 | #include "mmx.h" | ||
101 | #include "SDL_cpuinfo.h" | ||
102 | #endif | ||
103 | |||
104 | #ifndef MAX | ||
105 | #define MAX(a, b) ((a) > (b) ? (a) : (b)) | ||
106 | #endif | ||
107 | #ifndef MIN | ||
108 | #define MIN(a, b) ((a) < (b) ? (a) : (b)) | ||
109 | #endif | ||
110 | |||
111 | #define PIXEL_COPY(to, from, len, bpp) \ | ||
112 | do { \ | ||
113 | if(bpp == 4) { \ | ||
114 | SDL_memcpy4(to, from, (size_t)(len)); \ | ||
115 | } else { \ | ||
116 | SDL_memcpy(to, from, (size_t)(len) * (bpp)); \ | ||
117 | } \ | ||
118 | } while(0) | ||
119 | |||
120 | /* | ||
121 | * Various colorkey blit methods, for opaque and per-surface alpha | ||
122 | */ | ||
123 | |||
124 | #define OPAQUE_BLIT(to, from, length, bpp, alpha) \ | ||
125 | PIXEL_COPY(to, from, length, bpp) | ||
126 | |||
127 | #ifdef MMX_ASMBLIT | ||
128 | |||
129 | #define ALPHA_BLIT32_888MMX(to, from, length, bpp, alpha) \ | ||
130 | do { \ | ||
131 | Uint32 *srcp = (Uint32 *)(from); \ | ||
132 | Uint32 *dstp = (Uint32 *)(to); \ | ||
133 | int i = 0x00FF00FF; \ | ||
134 | movd_m2r(*(&i), mm3); \ | ||
135 | punpckldq_r2r(mm3, mm3); \ | ||
136 | i = 0xFF000000; \ | ||
137 | movd_m2r(*(&i), mm7); \ | ||
138 | punpckldq_r2r(mm7, mm7); \ | ||
139 | i = alpha | alpha << 16; \ | ||
140 | movd_m2r(*(&i), mm4); \ | ||
141 | punpckldq_r2r(mm4, mm4); \ | ||
142 | pcmpeqd_r2r(mm5,mm5); /* set mm5 to "1" */ \ | ||
143 | pxor_r2r(mm7, mm5); /* make clear alpha mask */ \ | ||
144 | i = length; \ | ||
145 | if(i & 1) { \ | ||
146 | movd_m2r((*srcp), mm1); /* src -> mm1 */ \ | ||
147 | punpcklbw_r2r(mm1, mm1); \ | ||
148 | pand_r2r(mm3, mm1); \ | ||
149 | movd_m2r((*dstp), mm2); /* dst -> mm2 */ \ | ||
150 | punpcklbw_r2r(mm2, mm2); \ | ||
151 | pand_r2r(mm3, mm2); \ | ||
152 | psubw_r2r(mm2, mm1); \ | ||
153 | pmullw_r2r(mm4, mm1); \ | ||
154 | psrlw_i2r(8, mm1); \ | ||
155 | paddw_r2r(mm1, mm2); \ | ||
156 | pand_r2r(mm3, mm2); \ | ||
157 | packuswb_r2r(mm2, mm2); \ | ||
158 | pand_r2r(mm5, mm2); /* 00000RGB -> mm2 */ \ | ||
159 | movd_r2m(mm2, *dstp); \ | ||
160 | ++srcp; \ | ||
161 | ++dstp; \ | ||
162 | i--; \ | ||
163 | } \ | ||
164 | for(; i > 0; --i) { \ | ||
165 | movq_m2r((*srcp), mm0); \ | ||
166 | movq_r2r(mm0, mm1); \ | ||
167 | punpcklbw_r2r(mm0, mm0); \ | ||
168 | movq_m2r((*dstp), mm2); \ | ||
169 | punpckhbw_r2r(mm1, mm1); \ | ||
170 | movq_r2r(mm2, mm6); \ | ||
171 | pand_r2r(mm3, mm0); \ | ||
172 | punpcklbw_r2r(mm2, mm2); \ | ||
173 | pand_r2r(mm3, mm1); \ | ||
174 | punpckhbw_r2r(mm6, mm6); \ | ||
175 | pand_r2r(mm3, mm2); \ | ||
176 | psubw_r2r(mm2, mm0); \ | ||
177 | pmullw_r2r(mm4, mm0); \ | ||
178 | pand_r2r(mm3, mm6); \ | ||
179 | psubw_r2r(mm6, mm1); \ | ||
180 | pmullw_r2r(mm4, mm1); \ | ||
181 | psrlw_i2r(8, mm0); \ | ||
182 | paddw_r2r(mm0, mm2); \ | ||
183 | psrlw_i2r(8, mm1); \ | ||
184 | paddw_r2r(mm1, mm6); \ | ||
185 | pand_r2r(mm3, mm2); \ | ||
186 | pand_r2r(mm3, mm6); \ | ||
187 | packuswb_r2r(mm2, mm2); \ | ||
188 | packuswb_r2r(mm6, mm6); \ | ||
189 | psrlq_i2r(32, mm2); \ | ||
190 | psllq_i2r(32, mm6); \ | ||
191 | por_r2r(mm6, mm2); \ | ||
192 | pand_r2r(mm5, mm2); /* 00000RGB -> mm2 */ \ | ||
193 | movq_r2m(mm2, *dstp); \ | ||
194 | srcp += 2; \ | ||
195 | dstp += 2; \ | ||
196 | i--; \ | ||
197 | } \ | ||
198 | emms(); \ | ||
199 | } while(0) | ||
200 | |||
201 | #define ALPHA_BLIT16_565MMX(to, from, length, bpp, alpha) \ | ||
202 | do { \ | ||
203 | int i, n = 0; \ | ||
204 | Uint16 *srcp = (Uint16 *)(from); \ | ||
205 | Uint16 *dstp = (Uint16 *)(to); \ | ||
206 | Uint32 ALPHA = 0xF800; \ | ||
207 | movd_m2r(*(&ALPHA), mm1); \ | ||
208 | punpcklwd_r2r(mm1, mm1); \ | ||
209 | punpcklwd_r2r(mm1, mm1); \ | ||
210 | ALPHA = 0x07E0; \ | ||
211 | movd_m2r(*(&ALPHA), mm4); \ | ||
212 | punpcklwd_r2r(mm4, mm4); \ | ||
213 | punpcklwd_r2r(mm4, mm4); \ | ||
214 | ALPHA = 0x001F; \ | ||
215 | movd_m2r(*(&ALPHA), mm7); \ | ||
216 | punpcklwd_r2r(mm7, mm7); \ | ||
217 | punpcklwd_r2r(mm7, mm7); \ | ||
218 | alpha &= ~(1+2+4); \ | ||
219 | i = (Uint32)alpha | (Uint32)alpha << 16; \ | ||
220 | movd_m2r(*(&i), mm0); \ | ||
221 | punpckldq_r2r(mm0, mm0); \ | ||
222 | ALPHA = alpha >> 3; \ | ||
223 | i = ((int)(length) & 3); \ | ||
224 | for(; i > 0; --i) { \ | ||
225 | Uint32 s = *srcp++; \ | ||
226 | Uint32 d = *dstp; \ | ||
227 | s = (s | s << 16) & 0x07e0f81f; \ | ||
228 | d = (d | d << 16) & 0x07e0f81f; \ | ||
229 | d += (s - d) * ALPHA >> 5; \ | ||
230 | d &= 0x07e0f81f; \ | ||
231 | *dstp++ = d | d >> 16; \ | ||
232 | n++; \ | ||
233 | } \ | ||
234 | i = (int)(length) - n; \ | ||
235 | for(; i > 0; --i) { \ | ||
236 | movq_m2r((*dstp), mm3); \ | ||
237 | movq_m2r((*srcp), mm2); \ | ||
238 | movq_r2r(mm2, mm5); \ | ||
239 | pand_r2r(mm1 , mm5); \ | ||
240 | psrlq_i2r(11, mm5); \ | ||
241 | movq_r2r(mm3, mm6); \ | ||
242 | pand_r2r(mm1 , mm6); \ | ||
243 | psrlq_i2r(11, mm6); \ | ||
244 | psubw_r2r(mm6, mm5); \ | ||
245 | pmullw_r2r(mm0, mm5); \ | ||
246 | psrlw_i2r(8, mm5); \ | ||
247 | paddw_r2r(mm5, mm6); \ | ||
248 | psllq_i2r(11, mm6); \ | ||
249 | pand_r2r(mm1, mm6); \ | ||
250 | movq_r2r(mm4, mm5); \ | ||
251 | por_r2r(mm7, mm5); \ | ||
252 | pand_r2r(mm5, mm3); \ | ||
253 | por_r2r(mm6, mm3); \ | ||
254 | movq_r2r(mm2, mm5); \ | ||
255 | pand_r2r(mm4 , mm5); \ | ||
256 | psrlq_i2r(5, mm5); \ | ||
257 | movq_r2r(mm3, mm6); \ | ||
258 | pand_r2r(mm4 , mm6); \ | ||
259 | psrlq_i2r(5, mm6); \ | ||
260 | psubw_r2r(mm6, mm5); \ | ||
261 | pmullw_r2r(mm0, mm5); \ | ||
262 | psrlw_i2r(8, mm5); \ | ||
263 | paddw_r2r(mm5, mm6); \ | ||
264 | psllq_i2r(5, mm6); \ | ||
265 | pand_r2r(mm4, mm6); \ | ||
266 | movq_r2r(mm1, mm5); \ | ||
267 | por_r2r(mm7, mm5); \ | ||
268 | pand_r2r(mm5, mm3); \ | ||
269 | por_r2r(mm6, mm3); \ | ||
270 | movq_r2r(mm2, mm5); \ | ||
271 | pand_r2r(mm7 , mm5); \ | ||
272 | movq_r2r(mm3, mm6); \ | ||
273 | pand_r2r(mm7 , mm6); \ | ||
274 | psubw_r2r(mm6, mm5); \ | ||
275 | pmullw_r2r(mm0, mm5); \ | ||
276 | psrlw_i2r(8, mm5); \ | ||
277 | paddw_r2r(mm5, mm6); \ | ||
278 | pand_r2r(mm7, mm6); \ | ||
279 | movq_r2r(mm1, mm5); \ | ||
280 | por_r2r(mm4, mm5); \ | ||
281 | pand_r2r(mm5, mm3); \ | ||
282 | por_r2r(mm6, mm3); \ | ||
283 | movq_r2m(mm3, *dstp); \ | ||
284 | srcp += 4; \ | ||
285 | dstp += 4; \ | ||
286 | i -= 3; \ | ||
287 | } \ | ||
288 | emms(); \ | ||
289 | } while(0) | ||
290 | |||
291 | #define ALPHA_BLIT16_555MMX(to, from, length, bpp, alpha) \ | ||
292 | do { \ | ||
293 | int i, n = 0; \ | ||
294 | Uint16 *srcp = (Uint16 *)(from); \ | ||
295 | Uint16 *dstp = (Uint16 *)(to); \ | ||
296 | Uint32 ALPHA = 0x7C00; \ | ||
297 | movd_m2r(*(&ALPHA), mm1); \ | ||
298 | punpcklwd_r2r(mm1, mm1); \ | ||
299 | punpcklwd_r2r(mm1, mm1); \ | ||
300 | ALPHA = 0x03E0; \ | ||
301 | movd_m2r(*(&ALPHA), mm4); \ | ||
302 | punpcklwd_r2r(mm4, mm4); \ | ||
303 | punpcklwd_r2r(mm4, mm4); \ | ||
304 | ALPHA = 0x001F; \ | ||
305 | movd_m2r(*(&ALPHA), mm7); \ | ||
306 | punpcklwd_r2r(mm7, mm7); \ | ||
307 | punpcklwd_r2r(mm7, mm7); \ | ||
308 | alpha &= ~(1+2+4); \ | ||
309 | i = (Uint32)alpha | (Uint32)alpha << 16; \ | ||
310 | movd_m2r(*(&i), mm0); \ | ||
311 | punpckldq_r2r(mm0, mm0); \ | ||
312 | i = ((int)(length) & 3); \ | ||
313 | ALPHA = alpha >> 3; \ | ||
314 | for(; i > 0; --i) { \ | ||
315 | Uint32 s = *srcp++; \ | ||
316 | Uint32 d = *dstp; \ | ||
317 | s = (s | s << 16) & 0x03e07c1f; \ | ||
318 | d = (d | d << 16) & 0x03e07c1f; \ | ||
319 | d += (s - d) * ALPHA >> 5; \ | ||
320 | d &= 0x03e07c1f; \ | ||
321 | *dstp++ = d | d >> 16; \ | ||
322 | n++; \ | ||
323 | } \ | ||
324 | i = (int)(length) - n; \ | ||
325 | for(; i > 0; --i) { \ | ||
326 | movq_m2r((*dstp), mm3); \ | ||
327 | movq_m2r((*srcp), mm2); \ | ||
328 | movq_r2r(mm2, mm5); \ | ||
329 | pand_r2r(mm1 , mm5); \ | ||
330 | psrlq_i2r(10, mm5); \ | ||
331 | movq_r2r(mm3, mm6); \ | ||
332 | pand_r2r(mm1 , mm6); \ | ||
333 | psrlq_i2r(10, mm6); \ | ||
334 | psubw_r2r(mm6, mm5); \ | ||
335 | pmullw_r2r(mm0, mm5); \ | ||
336 | psrlw_i2r(8, mm5); \ | ||
337 | paddw_r2r(mm5, mm6); \ | ||
338 | psllq_i2r(10, mm6); \ | ||
339 | pand_r2r(mm1, mm6); \ | ||
340 | movq_r2r(mm4, mm5); \ | ||
341 | por_r2r(mm7, mm5); \ | ||
342 | pand_r2r(mm5, mm3); \ | ||
343 | por_r2r(mm6, mm3); \ | ||
344 | movq_r2r(mm2, mm5); \ | ||
345 | pand_r2r(mm4 , mm5); \ | ||
346 | psrlq_i2r(5, mm5); \ | ||
347 | movq_r2r(mm3, mm6); \ | ||
348 | pand_r2r(mm4 , mm6); \ | ||
349 | psrlq_i2r(5, mm6); \ | ||
350 | psubw_r2r(mm6, mm5); \ | ||
351 | pmullw_r2r(mm0, mm5); \ | ||
352 | psrlw_i2r(8, mm5); \ | ||
353 | paddw_r2r(mm5, mm6); \ | ||
354 | psllq_i2r(5, mm6); \ | ||
355 | pand_r2r(mm4, mm6); \ | ||
356 | movq_r2r(mm1, mm5); \ | ||
357 | por_r2r(mm7, mm5); \ | ||
358 | pand_r2r(mm5, mm3); \ | ||
359 | por_r2r(mm6, mm3); \ | ||
360 | movq_r2r(mm2, mm5); \ | ||
361 | pand_r2r(mm7 , mm5); \ | ||
362 | movq_r2r(mm3, mm6); \ | ||
363 | pand_r2r(mm7 , mm6); \ | ||
364 | psubw_r2r(mm6, mm5); \ | ||
365 | pmullw_r2r(mm0, mm5); \ | ||
366 | psrlw_i2r(8, mm5); \ | ||
367 | paddw_r2r(mm5, mm6); \ | ||
368 | pand_r2r(mm7, mm6); \ | ||
369 | movq_r2r(mm1, mm5); \ | ||
370 | por_r2r(mm4, mm5); \ | ||
371 | pand_r2r(mm5, mm3); \ | ||
372 | por_r2r(mm6, mm3); \ | ||
373 | movq_r2m(mm3, *dstp); \ | ||
374 | srcp += 4; \ | ||
375 | dstp += 4; \ | ||
376 | i -= 3; \ | ||
377 | } \ | ||
378 | emms(); \ | ||
379 | } while(0) | ||
380 | |||
381 | #endif | ||
382 | |||
383 | /* | ||
384 | * For 32bpp pixels on the form 0x00rrggbb: | ||
385 | * If we treat the middle component separately, we can process the two | ||
386 | * remaining in parallel. This is safe to do because of the gap to the left | ||
387 | * of each component, so the bits from the multiplication don't collide. | ||
388 | * This can be used for any RGB permutation of course. | ||
389 | */ | ||
390 | #define ALPHA_BLIT32_888(to, from, length, bpp, alpha) \ | ||
391 | do { \ | ||
392 | int i; \ | ||
393 | Uint32 *src = (Uint32 *)(from); \ | ||
394 | Uint32 *dst = (Uint32 *)(to); \ | ||
395 | for(i = 0; i < (int)(length); i++) { \ | ||
396 | Uint32 s = *src++; \ | ||
397 | Uint32 d = *dst; \ | ||
398 | Uint32 s1 = s & 0xff00ff; \ | ||
399 | Uint32 d1 = d & 0xff00ff; \ | ||
400 | d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff; \ | ||
401 | s &= 0xff00; \ | ||
402 | d &= 0xff00; \ | ||
403 | d = (d + ((s - d) * alpha >> 8)) & 0xff00; \ | ||
404 | *dst++ = d1 | d; \ | ||
405 | } \ | ||
406 | } while(0) | ||
407 | |||
408 | /* | ||
409 | * For 16bpp pixels we can go a step further: put the middle component | ||
410 | * in the high 16 bits of a 32 bit word, and process all three RGB | ||
411 | * components at the same time. Since the smallest gap is here just | ||
412 | * 5 bits, we have to scale alpha down to 5 bits as well. | ||
413 | */ | ||
414 | #define ALPHA_BLIT16_565(to, from, length, bpp, alpha) \ | ||
415 | do { \ | ||
416 | int i; \ | ||
417 | Uint16 *src = (Uint16 *)(from); \ | ||
418 | Uint16 *dst = (Uint16 *)(to); \ | ||
419 | Uint32 ALPHA = alpha >> 3; \ | ||
420 | for(i = 0; i < (int)(length); i++) { \ | ||
421 | Uint32 s = *src++; \ | ||
422 | Uint32 d = *dst; \ | ||
423 | s = (s | s << 16) & 0x07e0f81f; \ | ||
424 | d = (d | d << 16) & 0x07e0f81f; \ | ||
425 | d += (s - d) * ALPHA >> 5; \ | ||
426 | d &= 0x07e0f81f; \ | ||
427 | *dst++ = (Uint16)(d | d >> 16); \ | ||
428 | } \ | ||
429 | } while(0) | ||
430 | |||
431 | #define ALPHA_BLIT16_555(to, from, length, bpp, alpha) \ | ||
432 | do { \ | ||
433 | int i; \ | ||
434 | Uint16 *src = (Uint16 *)(from); \ | ||
435 | Uint16 *dst = (Uint16 *)(to); \ | ||
436 | Uint32 ALPHA = alpha >> 3; \ | ||
437 | for(i = 0; i < (int)(length); i++) { \ | ||
438 | Uint32 s = *src++; \ | ||
439 | Uint32 d = *dst; \ | ||
440 | s = (s | s << 16) & 0x03e07c1f; \ | ||
441 | d = (d | d << 16) & 0x03e07c1f; \ | ||
442 | d += (s - d) * ALPHA >> 5; \ | ||
443 | d &= 0x03e07c1f; \ | ||
444 | *dst++ = (Uint16)(d | d >> 16); \ | ||
445 | } \ | ||
446 | } while(0) | ||
447 | |||
448 | /* | ||
449 | * The general slow catch-all function, for remaining depths and formats | ||
450 | */ | ||
451 | #define ALPHA_BLIT_ANY(to, from, length, bpp, alpha) \ | ||
452 | do { \ | ||
453 | int i; \ | ||
454 | Uint8 *src = from; \ | ||
455 | Uint8 *dst = to; \ | ||
456 | for(i = 0; i < (int)(length); i++) { \ | ||
457 | Uint32 s, d; \ | ||
458 | unsigned rs, gs, bs, rd, gd, bd; \ | ||
459 | switch(bpp) { \ | ||
460 | case 2: \ | ||
461 | s = *(Uint16 *)src; \ | ||
462 | d = *(Uint16 *)dst; \ | ||
463 | break; \ | ||
464 | case 3: \ | ||
465 | if(SDL_BYTEORDER == SDL_BIG_ENDIAN) { \ | ||
466 | s = (src[0] << 16) | (src[1] << 8) | src[2]; \ | ||
467 | d = (dst[0] << 16) | (dst[1] << 8) | dst[2]; \ | ||
468 | } else { \ | ||
469 | s = (src[2] << 16) | (src[1] << 8) | src[0]; \ | ||
470 | d = (dst[2] << 16) | (dst[1] << 8) | dst[0]; \ | ||
471 | } \ | ||
472 | break; \ | ||
473 | case 4: \ | ||
474 | s = *(Uint32 *)src; \ | ||
475 | d = *(Uint32 *)dst; \ | ||
476 | break; \ | ||
477 | } \ | ||
478 | RGB_FROM_PIXEL(s, fmt, rs, gs, bs); \ | ||
479 | RGB_FROM_PIXEL(d, fmt, rd, gd, bd); \ | ||
480 | rd += (rs - rd) * alpha >> 8; \ | ||
481 | gd += (gs - gd) * alpha >> 8; \ | ||
482 | bd += (bs - bd) * alpha >> 8; \ | ||
483 | PIXEL_FROM_RGB(d, fmt, rd, gd, bd); \ | ||
484 | switch(bpp) { \ | ||
485 | case 2: \ | ||
486 | *(Uint16 *)dst = (Uint16)d; \ | ||
487 | break; \ | ||
488 | case 3: \ | ||
489 | if(SDL_BYTEORDER == SDL_BIG_ENDIAN) { \ | ||
490 | dst[0] = (Uint8)(d >> 16); \ | ||
491 | dst[1] = (Uint8)(d >> 8); \ | ||
492 | dst[2] = (Uint8)(d); \ | ||
493 | } else { \ | ||
494 | dst[0] = (Uint8)d; \ | ||
495 | dst[1] = (Uint8)(d >> 8); \ | ||
496 | dst[2] = (Uint8)(d >> 16); \ | ||
497 | } \ | ||
498 | break; \ | ||
499 | case 4: \ | ||
500 | *(Uint32 *)dst = d; \ | ||
501 | break; \ | ||
502 | } \ | ||
503 | src += bpp; \ | ||
504 | dst += bpp; \ | ||
505 | } \ | ||
506 | } while(0) | ||
507 | |||
508 | #ifdef MMX_ASMBLIT | ||
509 | |||
510 | #define ALPHA_BLIT32_888_50MMX(to, from, length, bpp, alpha) \ | ||
511 | do { \ | ||
512 | Uint32 *srcp = (Uint32 *)(from); \ | ||
513 | Uint32 *dstp = (Uint32 *)(to); \ | ||
514 | int i = 0x00fefefe; \ | ||
515 | movd_m2r(*(&i), mm4); \ | ||
516 | punpckldq_r2r(mm4, mm4); \ | ||
517 | i = 0x00010101; \ | ||
518 | movd_m2r(*(&i), mm3); \ | ||
519 | punpckldq_r2r(mm3, mm3); \ | ||
520 | i = (int)(length); \ | ||
521 | if( i & 1 ) { \ | ||
522 | Uint32 s = *srcp++; \ | ||
523 | Uint32 d = *dstp; \ | ||
524 | *dstp++ = (((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) \ | ||
525 | + (s & d & 0x00010101); \ | ||
526 | i--; \ | ||
527 | } \ | ||
528 | for(; i > 0; --i) { \ | ||
529 | movq_m2r((*dstp), mm2); /* dst -> mm2 */ \ | ||
530 | movq_r2r(mm2, mm6); /* dst -> mm6 */ \ | ||
531 | movq_m2r((*srcp), mm1); /* src -> mm1 */ \ | ||
532 | movq_r2r(mm1, mm5); /* src -> mm5 */ \ | ||
533 | pand_r2r(mm4, mm6); /* dst & 0x00fefefe -> mm6 */ \ | ||
534 | pand_r2r(mm4, mm5); /* src & 0x00fefefe -> mm5 */ \ | ||
535 | paddd_r2r(mm6, mm5); /* (dst & 0x00fefefe) + (dst & 0x00fefefe) -> mm5 */ \ | ||
536 | psrld_i2r(1, mm5); \ | ||
537 | pand_r2r(mm1, mm2); /* s & d -> mm2 */ \ | ||
538 | pand_r2r(mm3, mm2); /* s & d & 0x00010101 -> mm2 */ \ | ||
539 | paddd_r2r(mm5, mm2); \ | ||
540 | movq_r2m(mm2, (*dstp)); \ | ||
541 | dstp += 2; \ | ||
542 | srcp += 2; \ | ||
543 | i--; \ | ||
544 | } \ | ||
545 | emms(); \ | ||
546 | } while(0) | ||
547 | |||
548 | #endif | ||
549 | |||
550 | /* | ||
551 | * Special case: 50% alpha (alpha=128) | ||
552 | * This is treated specially because it can be optimized very well, and | ||
553 | * since it is good for many cases of semi-translucency. | ||
554 | * The theory is to do all three components at the same time: | ||
555 | * First zero the lowest bit of each component, which gives us room to | ||
556 | * add them. Then shift right and add the sum of the lowest bits. | ||
557 | */ | ||
558 | #define ALPHA_BLIT32_888_50(to, from, length, bpp, alpha) \ | ||
559 | do { \ | ||
560 | int i; \ | ||
561 | Uint32 *src = (Uint32 *)(from); \ | ||
562 | Uint32 *dst = (Uint32 *)(to); \ | ||
563 | for(i = 0; i < (int)(length); i++) { \ | ||
564 | Uint32 s = *src++; \ | ||
565 | Uint32 d = *dst; \ | ||
566 | *dst++ = (((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) \ | ||
567 | + (s & d & 0x00010101); \ | ||
568 | } \ | ||
569 | } while(0) | ||
570 | |||
571 | /* | ||
572 | * For 16bpp, we can actually blend two pixels in parallel, if we take | ||
573 | * care to shift before we add, not after. | ||
574 | */ | ||
575 | |||
576 | /* helper: blend a single 16 bit pixel at 50% */ | ||
577 | #define BLEND16_50(dst, src, mask) \ | ||
578 | do { \ | ||
579 | Uint32 s = *src++; \ | ||
580 | Uint32 d = *dst; \ | ||
581 | *dst++ = (Uint16)((((s & mask) + (d & mask)) >> 1) + \ | ||
582 | (s & d & (~mask & 0xffff))); \ | ||
583 | } while(0) | ||
584 | |||
585 | /* basic 16bpp blender. mask is the pixels to keep when adding. */ | ||
586 | #define ALPHA_BLIT16_50(to, from, length, bpp, alpha, mask) \ | ||
587 | do { \ | ||
588 | unsigned n = (length); \ | ||
589 | Uint16 *src = (Uint16 *)(from); \ | ||
590 | Uint16 *dst = (Uint16 *)(to); \ | ||
591 | if(((uintptr_t)src ^ (uintptr_t)dst) & 3) { \ | ||
592 | /* source and destination not in phase, blit one by one */ \ | ||
593 | while(n--) \ | ||
594 | BLEND16_50(dst, src, mask); \ | ||
595 | } else { \ | ||
596 | if((uintptr_t)src & 3) { \ | ||
597 | /* first odd pixel */ \ | ||
598 | BLEND16_50(dst, src, mask); \ | ||
599 | n--; \ | ||
600 | } \ | ||
601 | for(; n > 1; n -= 2) { \ | ||
602 | Uint32 s = *(Uint32 *)src; \ | ||
603 | Uint32 d = *(Uint32 *)dst; \ | ||
604 | *(Uint32 *)dst = ((s & (mask | mask << 16)) >> 1) \ | ||
605 | + ((d & (mask | mask << 16)) >> 1) \ | ||
606 | + (s & d & (~(mask | mask << 16))); \ | ||
607 | src += 2; \ | ||
608 | dst += 2; \ | ||
609 | } \ | ||
610 | if(n) \ | ||
611 | BLEND16_50(dst, src, mask); /* last odd pixel */ \ | ||
612 | } \ | ||
613 | } while(0) | ||
614 | |||
615 | #define ALPHA_BLIT16_565_50(to, from, length, bpp, alpha) \ | ||
616 | ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xf7de) | ||
617 | |||
618 | #define ALPHA_BLIT16_555_50(to, from, length, bpp, alpha) \ | ||
619 | ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xfbde) | ||
620 | |||
621 | #ifdef MMX_ASMBLIT | ||
622 | |||
623 | #define CHOOSE_BLIT(blitter, alpha, fmt) \ | ||
624 | do { \ | ||
625 | if(alpha == 255) { \ | ||
626 | switch(fmt->BytesPerPixel) { \ | ||
627 | case 1: blitter(1, Uint8, OPAQUE_BLIT); break; \ | ||
628 | case 2: blitter(2, Uint8, OPAQUE_BLIT); break; \ | ||
629 | case 3: blitter(3, Uint8, OPAQUE_BLIT); break; \ | ||
630 | case 4: blitter(4, Uint16, OPAQUE_BLIT); break; \ | ||
631 | } \ | ||
632 | } else { \ | ||
633 | switch(fmt->BytesPerPixel) { \ | ||
634 | case 1: \ | ||
635 | /* No 8bpp alpha blitting */ \ | ||
636 | break; \ | ||
637 | \ | ||
638 | case 2: \ | ||
639 | switch(fmt->Rmask | fmt->Gmask | fmt->Bmask) { \ | ||
640 | case 0xffff: \ | ||
641 | if(fmt->Gmask == 0x07e0 \ | ||
642 | || fmt->Rmask == 0x07e0 \ | ||
643 | || fmt->Bmask == 0x07e0) { \ | ||
644 | if(alpha == 128) \ | ||
645 | blitter(2, Uint8, ALPHA_BLIT16_565_50); \ | ||
646 | else { \ | ||
647 | if(SDL_HasMMX()) \ | ||
648 | blitter(2, Uint8, ALPHA_BLIT16_565MMX); \ | ||
649 | else \ | ||
650 | blitter(2, Uint8, ALPHA_BLIT16_565); \ | ||
651 | } \ | ||
652 | } else \ | ||
653 | goto general16; \ | ||
654 | break; \ | ||
655 | \ | ||
656 | case 0x7fff: \ | ||
657 | if(fmt->Gmask == 0x03e0 \ | ||
658 | || fmt->Rmask == 0x03e0 \ | ||
659 | || fmt->Bmask == 0x03e0) { \ | ||
660 | if(alpha == 128) \ | ||
661 | blitter(2, Uint8, ALPHA_BLIT16_555_50); \ | ||
662 | else { \ | ||
663 | if(SDL_HasMMX()) \ | ||
664 | blitter(2, Uint8, ALPHA_BLIT16_555MMX); \ | ||
665 | else \ | ||
666 | blitter(2, Uint8, ALPHA_BLIT16_555); \ | ||
667 | } \ | ||
668 | break; \ | ||
669 | } \ | ||
670 | /* fallthrough */ \ | ||
671 | \ | ||
672 | default: \ | ||
673 | general16: \ | ||
674 | blitter(2, Uint8, ALPHA_BLIT_ANY); \ | ||
675 | } \ | ||
676 | break; \ | ||
677 | \ | ||
678 | case 3: \ | ||
679 | blitter(3, Uint8, ALPHA_BLIT_ANY); \ | ||
680 | break; \ | ||
681 | \ | ||
682 | case 4: \ | ||
683 | if((fmt->Rmask | fmt->Gmask | fmt->Bmask) == 0x00ffffff \ | ||
684 | && (fmt->Gmask == 0xff00 || fmt->Rmask == 0xff00 \ | ||
685 | || fmt->Bmask == 0xff00)) { \ | ||
686 | if(alpha == 128) \ | ||
687 | { \ | ||
688 | if(SDL_HasMMX()) \ | ||
689 | blitter(4, Uint16, ALPHA_BLIT32_888_50MMX);\ | ||
690 | else \ | ||
691 | blitter(4, Uint16, ALPHA_BLIT32_888_50);\ | ||
692 | } \ | ||
693 | else \ | ||
694 | { \ | ||
695 | if(SDL_HasMMX()) \ | ||
696 | blitter(4, Uint16, ALPHA_BLIT32_888MMX);\ | ||
697 | else \ | ||
698 | blitter(4, Uint16, ALPHA_BLIT32_888); \ | ||
699 | } \ | ||
700 | } else \ | ||
701 | blitter(4, Uint16, ALPHA_BLIT_ANY); \ | ||
702 | break; \ | ||
703 | } \ | ||
704 | } \ | ||
705 | } while(0) | ||
706 | |||
707 | #else | ||
708 | |||
709 | #define CHOOSE_BLIT(blitter, alpha, fmt) \ | ||
710 | do { \ | ||
711 | if(alpha == 255) { \ | ||
712 | switch(fmt->BytesPerPixel) { \ | ||
713 | case 1: blitter(1, Uint8, OPAQUE_BLIT); break; \ | ||
714 | case 2: blitter(2, Uint8, OPAQUE_BLIT); break; \ | ||
715 | case 3: blitter(3, Uint8, OPAQUE_BLIT); break; \ | ||
716 | case 4: blitter(4, Uint16, OPAQUE_BLIT); break; \ | ||
717 | } \ | ||
718 | } else { \ | ||
719 | switch(fmt->BytesPerPixel) { \ | ||
720 | case 1: \ | ||
721 | /* No 8bpp alpha blitting */ \ | ||
722 | break; \ | ||
723 | \ | ||
724 | case 2: \ | ||
725 | switch(fmt->Rmask | fmt->Gmask | fmt->Bmask) { \ | ||
726 | case 0xffff: \ | ||
727 | if(fmt->Gmask == 0x07e0 \ | ||
728 | || fmt->Rmask == 0x07e0 \ | ||
729 | || fmt->Bmask == 0x07e0) { \ | ||
730 | if(alpha == 128) \ | ||
731 | blitter(2, Uint8, ALPHA_BLIT16_565_50); \ | ||
732 | else { \ | ||
733 | blitter(2, Uint8, ALPHA_BLIT16_565); \ | ||
734 | } \ | ||
735 | } else \ | ||
736 | goto general16; \ | ||
737 | break; \ | ||
738 | \ | ||
739 | case 0x7fff: \ | ||
740 | if(fmt->Gmask == 0x03e0 \ | ||
741 | || fmt->Rmask == 0x03e0 \ | ||
742 | || fmt->Bmask == 0x03e0) { \ | ||
743 | if(alpha == 128) \ | ||
744 | blitter(2, Uint8, ALPHA_BLIT16_555_50); \ | ||
745 | else { \ | ||
746 | blitter(2, Uint8, ALPHA_BLIT16_555); \ | ||
747 | } \ | ||
748 | break; \ | ||
749 | } \ | ||
750 | /* fallthrough */ \ | ||
751 | \ | ||
752 | default: \ | ||
753 | general16: \ | ||
754 | blitter(2, Uint8, ALPHA_BLIT_ANY); \ | ||
755 | } \ | ||
756 | break; \ | ||
757 | \ | ||
758 | case 3: \ | ||
759 | blitter(3, Uint8, ALPHA_BLIT_ANY); \ | ||
760 | break; \ | ||
761 | \ | ||
762 | case 4: \ | ||
763 | if((fmt->Rmask | fmt->Gmask | fmt->Bmask) == 0x00ffffff \ | ||
764 | && (fmt->Gmask == 0xff00 || fmt->Rmask == 0xff00 \ | ||
765 | || fmt->Bmask == 0xff00)) { \ | ||
766 | if(alpha == 128) \ | ||
767 | blitter(4, Uint16, ALPHA_BLIT32_888_50); \ | ||
768 | else \ | ||
769 | blitter(4, Uint16, ALPHA_BLIT32_888); \ | ||
770 | } else \ | ||
771 | blitter(4, Uint16, ALPHA_BLIT_ANY); \ | ||
772 | break; \ | ||
773 | } \ | ||
774 | } \ | ||
775 | } while(0) | ||
776 | |||
777 | #endif | ||
778 | |||
779 | /* | ||
780 | * This takes care of the case when the surface is clipped on the left and/or | ||
781 | * right. Top clipping has already been taken care of. | ||
782 | */ | ||
783 | static void RLEClipBlit(int w, Uint8 *srcbuf, SDL_Surface *dst, | ||
784 | Uint8 *dstbuf, SDL_Rect *srcrect, unsigned alpha) | ||
785 | { | ||
786 | SDL_PixelFormat *fmt = dst->format; | ||
787 | |||
788 | #define RLECLIPBLIT(bpp, Type, do_blit) \ | ||
789 | do { \ | ||
790 | int linecount = srcrect->h; \ | ||
791 | int ofs = 0; \ | ||
792 | int left = srcrect->x; \ | ||
793 | int right = left + srcrect->w; \ | ||
794 | dstbuf -= left * bpp; \ | ||
795 | for(;;) { \ | ||
796 | int run; \ | ||
797 | ofs += *(Type *)srcbuf; \ | ||
798 | run = ((Type *)srcbuf)[1]; \ | ||
799 | srcbuf += 2 * sizeof(Type); \ | ||
800 | if(run) { \ | ||
801 | /* clip to left and right borders */ \ | ||
802 | if(ofs < right) { \ | ||
803 | int start = 0; \ | ||
804 | int len = run; \ | ||
805 | int startcol; \ | ||
806 | if(left - ofs > 0) { \ | ||
807 | start = left - ofs; \ | ||
808 | len -= start; \ | ||
809 | if(len <= 0) \ | ||
810 | goto nocopy ## bpp ## do_blit; \ | ||
811 | } \ | ||
812 | startcol = ofs + start; \ | ||
813 | if(len > right - startcol) \ | ||
814 | len = right - startcol; \ | ||
815 | do_blit(dstbuf + startcol * bpp, srcbuf + start * bpp, \ | ||
816 | len, bpp, alpha); \ | ||
817 | } \ | ||
818 | nocopy ## bpp ## do_blit: \ | ||
819 | srcbuf += run * bpp; \ | ||
820 | ofs += run; \ | ||
821 | } else if(!ofs) \ | ||
822 | break; \ | ||
823 | if(ofs == w) { \ | ||
824 | ofs = 0; \ | ||
825 | dstbuf += dst->pitch; \ | ||
826 | if(!--linecount) \ | ||
827 | break; \ | ||
828 | } \ | ||
829 | } \ | ||
830 | } while(0) | ||
831 | |||
832 | CHOOSE_BLIT(RLECLIPBLIT, alpha, fmt); | ||
833 | |||
834 | #undef RLECLIPBLIT | ||
835 | |||
836 | } | ||
837 | |||
838 | |||
839 | /* blit a colorkeyed RLE surface */ | ||
840 | int SDL_RLEBlit(SDL_Surface *src, SDL_Rect *srcrect, | ||
841 | SDL_Surface *dst, SDL_Rect *dstrect) | ||
842 | { | ||
843 | Uint8 *dstbuf; | ||
844 | Uint8 *srcbuf; | ||
845 | int x, y; | ||
846 | int w = src->w; | ||
847 | unsigned alpha; | ||
848 | |||
849 | /* Lock the destination if necessary */ | ||
850 | if ( SDL_MUSTLOCK(dst) ) { | ||
851 | if ( SDL_LockSurface(dst) < 0 ) { | ||
852 | return(-1); | ||
853 | } | ||
854 | } | ||
855 | |||
856 | /* Set up the source and destination pointers */ | ||
857 | x = dstrect->x; | ||
858 | y = dstrect->y; | ||
859 | dstbuf = (Uint8 *)dst->pixels | ||
860 | + y * dst->pitch + x * src->format->BytesPerPixel; | ||
861 | srcbuf = (Uint8 *)src->map->sw_data->aux_data; | ||
862 | |||
863 | { | ||
864 | /* skip lines at the top if neccessary */ | ||
865 | int vskip = srcrect->y; | ||
866 | int ofs = 0; | ||
867 | if(vskip) { | ||
868 | |||
869 | #define RLESKIP(bpp, Type) \ | ||
870 | for(;;) { \ | ||
871 | int run; \ | ||
872 | ofs += *(Type *)srcbuf; \ | ||
873 | run = ((Type *)srcbuf)[1]; \ | ||
874 | srcbuf += sizeof(Type) * 2; \ | ||
875 | if(run) { \ | ||
876 | srcbuf += run * bpp; \ | ||
877 | ofs += run; \ | ||
878 | } else if(!ofs) \ | ||
879 | goto done; \ | ||
880 | if(ofs == w) { \ | ||
881 | ofs = 0; \ | ||
882 | if(!--vskip) \ | ||
883 | break; \ | ||
884 | } \ | ||
885 | } | ||
886 | |||
887 | switch(src->format->BytesPerPixel) { | ||
888 | case 1: RLESKIP(1, Uint8); break; | ||
889 | case 2: RLESKIP(2, Uint8); break; | ||
890 | case 3: RLESKIP(3, Uint8); break; | ||
891 | case 4: RLESKIP(4, Uint16); break; | ||
892 | } | ||
893 | |||
894 | #undef RLESKIP | ||
895 | |||
896 | } | ||
897 | } | ||
898 | |||
899 | alpha = (src->flags & SDL_SRCALPHA) == SDL_SRCALPHA | ||
900 | ? src->format->alpha : 255; | ||
901 | /* if left or right edge clipping needed, call clip blit */ | ||
902 | if ( srcrect->x || srcrect->w != src->w ) { | ||
903 | RLEClipBlit(w, srcbuf, dst, dstbuf, srcrect, alpha); | ||
904 | } else { | ||
905 | SDL_PixelFormat *fmt = src->format; | ||
906 | |||
907 | #define RLEBLIT(bpp, Type, do_blit) \ | ||
908 | do { \ | ||
909 | int linecount = srcrect->h; \ | ||
910 | int ofs = 0; \ | ||
911 | for(;;) { \ | ||
912 | unsigned run; \ | ||
913 | ofs += *(Type *)srcbuf; \ | ||
914 | run = ((Type *)srcbuf)[1]; \ | ||
915 | srcbuf += 2 * sizeof(Type); \ | ||
916 | if(run) { \ | ||
917 | do_blit(dstbuf + ofs * bpp, srcbuf, run, bpp, alpha); \ | ||
918 | srcbuf += run * bpp; \ | ||
919 | ofs += run; \ | ||
920 | } else if(!ofs) \ | ||
921 | break; \ | ||
922 | if(ofs == w) { \ | ||
923 | ofs = 0; \ | ||
924 | dstbuf += dst->pitch; \ | ||
925 | if(!--linecount) \ | ||
926 | break; \ | ||
927 | } \ | ||
928 | } \ | ||
929 | } while(0) | ||
930 | |||
931 | CHOOSE_BLIT(RLEBLIT, alpha, fmt); | ||
932 | |||
933 | #undef RLEBLIT | ||
934 | } | ||
935 | |||
936 | done: | ||
937 | /* Unlock the destination if necessary */ | ||
938 | if ( SDL_MUSTLOCK(dst) ) { | ||
939 | SDL_UnlockSurface(dst); | ||
940 | } | ||
941 | return(0); | ||
942 | } | ||
943 | |||
944 | #undef OPAQUE_BLIT | ||
945 | |||
946 | /* | ||
947 | * Per-pixel blitting macros for translucent pixels: | ||
948 | * These use the same techniques as the per-surface blitting macros | ||
949 | */ | ||
950 | |||
951 | /* | ||
952 | * For 32bpp pixels, we have made sure the alpha is stored in the top | ||
953 | * 8 bits, so proceed as usual | ||
954 | */ | ||
955 | #define BLIT_TRANSL_888(src, dst) \ | ||
956 | do { \ | ||
957 | Uint32 s = src; \ | ||
958 | Uint32 d = dst; \ | ||
959 | unsigned alpha = s >> 24; \ | ||
960 | Uint32 s1 = s & 0xff00ff; \ | ||
961 | Uint32 d1 = d & 0xff00ff; \ | ||
962 | d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff; \ | ||
963 | s &= 0xff00; \ | ||
964 | d &= 0xff00; \ | ||
965 | d = (d + ((s - d) * alpha >> 8)) & 0xff00; \ | ||
966 | dst = d1 | d; \ | ||
967 | } while(0) | ||
968 | |||
969 | /* | ||
970 | * For 16bpp pixels, we have stored the 5 most significant alpha bits in | ||
971 | * bits 5-10. As before, we can process all 3 RGB components at the same time. | ||
972 | */ | ||
973 | #define BLIT_TRANSL_565(src, dst) \ | ||
974 | do { \ | ||
975 | Uint32 s = src; \ | ||
976 | Uint32 d = dst; \ | ||
977 | unsigned alpha = (s & 0x3e0) >> 5; \ | ||
978 | s &= 0x07e0f81f; \ | ||
979 | d = (d | d << 16) & 0x07e0f81f; \ | ||
980 | d += (s - d) * alpha >> 5; \ | ||
981 | d &= 0x07e0f81f; \ | ||
982 | dst = (Uint16)(d | d >> 16); \ | ||
983 | } while(0) | ||
984 | |||
985 | #define BLIT_TRANSL_555(src, dst) \ | ||
986 | do { \ | ||
987 | Uint32 s = src; \ | ||
988 | Uint32 d = dst; \ | ||
989 | unsigned alpha = (s & 0x3e0) >> 5; \ | ||
990 | s &= 0x03e07c1f; \ | ||
991 | d = (d | d << 16) & 0x03e07c1f; \ | ||
992 | d += (s - d) * alpha >> 5; \ | ||
993 | d &= 0x03e07c1f; \ | ||
994 | dst = (Uint16)(d | d >> 16); \ | ||
995 | } while(0) | ||
996 | |||
997 | /* used to save the destination format in the encoding. Designed to be | ||
998 | macro-compatible with SDL_PixelFormat but without the unneeded fields */ | ||
999 | typedef struct { | ||
1000 | Uint8 BytesPerPixel; | ||
1001 | Uint8 Rloss; | ||
1002 | Uint8 Gloss; | ||
1003 | Uint8 Bloss; | ||
1004 | Uint8 Rshift; | ||
1005 | Uint8 Gshift; | ||
1006 | Uint8 Bshift; | ||
1007 | Uint8 Ashift; | ||
1008 | Uint32 Rmask; | ||
1009 | Uint32 Gmask; | ||
1010 | Uint32 Bmask; | ||
1011 | Uint32 Amask; | ||
1012 | } RLEDestFormat; | ||
1013 | |||
1014 | /* blit a pixel-alpha RLE surface clipped at the right and/or left edges */ | ||
1015 | static void RLEAlphaClipBlit(int w, Uint8 *srcbuf, SDL_Surface *dst, | ||
1016 | Uint8 *dstbuf, SDL_Rect *srcrect) | ||
1017 | { | ||
1018 | SDL_PixelFormat *df = dst->format; | ||
1019 | /* | ||
1020 | * clipped blitter: Ptype is the destination pixel type, | ||
1021 | * Ctype the translucent count type, and do_blend the macro | ||
1022 | * to blend one pixel. | ||
1023 | */ | ||
1024 | #define RLEALPHACLIPBLIT(Ptype, Ctype, do_blend) \ | ||
1025 | do { \ | ||
1026 | int linecount = srcrect->h; \ | ||
1027 | int left = srcrect->x; \ | ||
1028 | int right = left + srcrect->w; \ | ||
1029 | dstbuf -= left * sizeof(Ptype); \ | ||
1030 | do { \ | ||
1031 | int ofs = 0; \ | ||
1032 | /* blit opaque pixels on one line */ \ | ||
1033 | do { \ | ||
1034 | unsigned run; \ | ||
1035 | ofs += ((Ctype *)srcbuf)[0]; \ | ||
1036 | run = ((Ctype *)srcbuf)[1]; \ | ||
1037 | srcbuf += 2 * sizeof(Ctype); \ | ||
1038 | if(run) { \ | ||
1039 | /* clip to left and right borders */ \ | ||
1040 | int cofs = ofs; \ | ||
1041 | int crun = run; \ | ||
1042 | if(left - cofs > 0) { \ | ||
1043 | crun -= left - cofs; \ | ||
1044 | cofs = left; \ | ||
1045 | } \ | ||
1046 | if(crun > right - cofs) \ | ||
1047 | crun = right - cofs; \ | ||
1048 | if(crun > 0) \ | ||
1049 | PIXEL_COPY(dstbuf + cofs * sizeof(Ptype), \ | ||
1050 | srcbuf + (cofs - ofs) * sizeof(Ptype), \ | ||
1051 | (unsigned)crun, sizeof(Ptype)); \ | ||
1052 | srcbuf += run * sizeof(Ptype); \ | ||
1053 | ofs += run; \ | ||
1054 | } else if(!ofs) \ | ||
1055 | return; \ | ||
1056 | } while(ofs < w); \ | ||
1057 | /* skip padding if necessary */ \ | ||
1058 | if(sizeof(Ptype) == 2) \ | ||
1059 | srcbuf += (uintptr_t)srcbuf & 2; \ | ||
1060 | /* blit translucent pixels on the same line */ \ | ||
1061 | ofs = 0; \ | ||
1062 | do { \ | ||
1063 | unsigned run; \ | ||
1064 | ofs += ((Uint16 *)srcbuf)[0]; \ | ||
1065 | run = ((Uint16 *)srcbuf)[1]; \ | ||
1066 | srcbuf += 4; \ | ||
1067 | if(run) { \ | ||
1068 | /* clip to left and right borders */ \ | ||
1069 | int cofs = ofs; \ | ||
1070 | int crun = run; \ | ||
1071 | if(left - cofs > 0) { \ | ||
1072 | crun -= left - cofs; \ | ||
1073 | cofs = left; \ | ||
1074 | } \ | ||
1075 | if(crun > right - cofs) \ | ||
1076 | crun = right - cofs; \ | ||
1077 | if(crun > 0) { \ | ||
1078 | Ptype *dst = (Ptype *)dstbuf + cofs; \ | ||
1079 | Uint32 *src = (Uint32 *)srcbuf + (cofs - ofs); \ | ||
1080 | int i; \ | ||
1081 | for(i = 0; i < crun; i++) \ | ||
1082 | do_blend(src[i], dst[i]); \ | ||
1083 | } \ | ||
1084 | srcbuf += run * 4; \ | ||
1085 | ofs += run; \ | ||
1086 | } \ | ||
1087 | } while(ofs < w); \ | ||
1088 | dstbuf += dst->pitch; \ | ||
1089 | } while(--linecount); \ | ||
1090 | } while(0) | ||
1091 | |||
1092 | switch(df->BytesPerPixel) { | ||
1093 | case 2: | ||
1094 | if(df->Gmask == 0x07e0 || df->Rmask == 0x07e0 | ||
1095 | || df->Bmask == 0x07e0) | ||
1096 | RLEALPHACLIPBLIT(Uint16, Uint8, BLIT_TRANSL_565); | ||
1097 | else | ||
1098 | RLEALPHACLIPBLIT(Uint16, Uint8, BLIT_TRANSL_555); | ||
1099 | break; | ||
1100 | case 4: | ||
1101 | RLEALPHACLIPBLIT(Uint32, Uint16, BLIT_TRANSL_888); | ||
1102 | break; | ||
1103 | } | ||
1104 | } | ||
1105 | |||
1106 | /* blit a pixel-alpha RLE surface */ | ||
1107 | int SDL_RLEAlphaBlit(SDL_Surface *src, SDL_Rect *srcrect, | ||
1108 | SDL_Surface *dst, SDL_Rect *dstrect) | ||
1109 | { | ||
1110 | int x, y; | ||
1111 | int w = src->w; | ||
1112 | Uint8 *srcbuf, *dstbuf; | ||
1113 | SDL_PixelFormat *df = dst->format; | ||
1114 | |||
1115 | /* Lock the destination if necessary */ | ||
1116 | if ( SDL_MUSTLOCK(dst) ) { | ||
1117 | if ( SDL_LockSurface(dst) < 0 ) { | ||
1118 | return -1; | ||
1119 | } | ||
1120 | } | ||
1121 | |||
1122 | x = dstrect->x; | ||
1123 | y = dstrect->y; | ||
1124 | dstbuf = (Uint8 *)dst->pixels | ||
1125 | + y * dst->pitch + x * df->BytesPerPixel; | ||
1126 | srcbuf = (Uint8 *)src->map->sw_data->aux_data + sizeof(RLEDestFormat); | ||
1127 | |||
1128 | { | ||
1129 | /* skip lines at the top if necessary */ | ||
1130 | int vskip = srcrect->y; | ||
1131 | if(vskip) { | ||
1132 | int ofs; | ||
1133 | if(df->BytesPerPixel == 2) { | ||
1134 | /* the 16/32 interleaved format */ | ||
1135 | do { | ||
1136 | /* skip opaque line */ | ||
1137 | ofs = 0; | ||
1138 | do { | ||
1139 | int run; | ||
1140 | ofs += srcbuf[0]; | ||
1141 | run = srcbuf[1]; | ||
1142 | srcbuf += 2; | ||
1143 | if(run) { | ||
1144 | srcbuf += 2 * run; | ||
1145 | ofs += run; | ||
1146 | } else if(!ofs) | ||
1147 | goto done; | ||
1148 | } while(ofs < w); | ||
1149 | |||
1150 | /* skip padding */ | ||
1151 | srcbuf += (uintptr_t)srcbuf & 2; | ||
1152 | |||
1153 | /* skip translucent line */ | ||
1154 | ofs = 0; | ||
1155 | do { | ||
1156 | int run; | ||
1157 | ofs += ((Uint16 *)srcbuf)[0]; | ||
1158 | run = ((Uint16 *)srcbuf)[1]; | ||
1159 | srcbuf += 4 * (run + 1); | ||
1160 | ofs += run; | ||
1161 | } while(ofs < w); | ||
1162 | } while(--vskip); | ||
1163 | } else { | ||
1164 | /* the 32/32 interleaved format */ | ||
1165 | vskip <<= 1; /* opaque and translucent have same format */ | ||
1166 | do { | ||
1167 | ofs = 0; | ||
1168 | do { | ||
1169 | int run; | ||
1170 | ofs += ((Uint16 *)srcbuf)[0]; | ||
1171 | run = ((Uint16 *)srcbuf)[1]; | ||
1172 | srcbuf += 4; | ||
1173 | if(run) { | ||
1174 | srcbuf += 4 * run; | ||
1175 | ofs += run; | ||
1176 | } else if(!ofs) | ||
1177 | goto done; | ||
1178 | } while(ofs < w); | ||
1179 | } while(--vskip); | ||
1180 | } | ||
1181 | } | ||
1182 | } | ||
1183 | |||
1184 | /* if left or right edge clipping needed, call clip blit */ | ||
1185 | if(srcrect->x || srcrect->w != src->w) { | ||
1186 | RLEAlphaClipBlit(w, srcbuf, dst, dstbuf, srcrect); | ||
1187 | } else { | ||
1188 | |||
1189 | /* | ||
1190 | * non-clipped blitter. Ptype is the destination pixel type, | ||
1191 | * Ctype the translucent count type, and do_blend the | ||
1192 | * macro to blend one pixel. | ||
1193 | */ | ||
1194 | #define RLEALPHABLIT(Ptype, Ctype, do_blend) \ | ||
1195 | do { \ | ||
1196 | int linecount = srcrect->h; \ | ||
1197 | do { \ | ||
1198 | int ofs = 0; \ | ||
1199 | /* blit opaque pixels on one line */ \ | ||
1200 | do { \ | ||
1201 | unsigned run; \ | ||
1202 | ofs += ((Ctype *)srcbuf)[0]; \ | ||
1203 | run = ((Ctype *)srcbuf)[1]; \ | ||
1204 | srcbuf += 2 * sizeof(Ctype); \ | ||
1205 | if(run) { \ | ||
1206 | PIXEL_COPY(dstbuf + ofs * sizeof(Ptype), srcbuf, \ | ||
1207 | run, sizeof(Ptype)); \ | ||
1208 | srcbuf += run * sizeof(Ptype); \ | ||
1209 | ofs += run; \ | ||
1210 | } else if(!ofs) \ | ||
1211 | goto done; \ | ||
1212 | } while(ofs < w); \ | ||
1213 | /* skip padding if necessary */ \ | ||
1214 | if(sizeof(Ptype) == 2) \ | ||
1215 | srcbuf += (uintptr_t)srcbuf & 2; \ | ||
1216 | /* blit translucent pixels on the same line */ \ | ||
1217 | ofs = 0; \ | ||
1218 | do { \ | ||
1219 | unsigned run; \ | ||
1220 | ofs += ((Uint16 *)srcbuf)[0]; \ | ||
1221 | run = ((Uint16 *)srcbuf)[1]; \ | ||
1222 | srcbuf += 4; \ | ||
1223 | if(run) { \ | ||
1224 | Ptype *dst = (Ptype *)dstbuf + ofs; \ | ||
1225 | unsigned i; \ | ||
1226 | for(i = 0; i < run; i++) { \ | ||
1227 | Uint32 src = *(Uint32 *)srcbuf; \ | ||
1228 | do_blend(src, *dst); \ | ||
1229 | srcbuf += 4; \ | ||
1230 | dst++; \ | ||
1231 | } \ | ||
1232 | ofs += run; \ | ||
1233 | } \ | ||
1234 | } while(ofs < w); \ | ||
1235 | dstbuf += dst->pitch; \ | ||
1236 | } while(--linecount); \ | ||
1237 | } while(0) | ||
1238 | |||
1239 | switch(df->BytesPerPixel) { | ||
1240 | case 2: | ||
1241 | if(df->Gmask == 0x07e0 || df->Rmask == 0x07e0 | ||
1242 | || df->Bmask == 0x07e0) | ||
1243 | RLEALPHABLIT(Uint16, Uint8, BLIT_TRANSL_565); | ||
1244 | else | ||
1245 | RLEALPHABLIT(Uint16, Uint8, BLIT_TRANSL_555); | ||
1246 | break; | ||
1247 | case 4: | ||
1248 | RLEALPHABLIT(Uint32, Uint16, BLIT_TRANSL_888); | ||
1249 | break; | ||
1250 | } | ||
1251 | } | ||
1252 | |||
1253 | done: | ||
1254 | /* Unlock the destination if necessary */ | ||
1255 | if ( SDL_MUSTLOCK(dst) ) { | ||
1256 | SDL_UnlockSurface(dst); | ||
1257 | } | ||
1258 | return 0; | ||
1259 | } | ||
1260 | |||
1261 | /* | ||
1262 | * Auxiliary functions: | ||
1263 | * The encoding functions take 32bpp rgb + a, and | ||
1264 | * return the number of bytes copied to the destination. | ||
1265 | * The decoding functions copy to 32bpp rgb + a, and | ||
1266 | * return the number of bytes copied from the source. | ||
1267 | * These are only used in the encoder and un-RLE code and are therefore not | ||
1268 | * highly optimised. | ||
1269 | */ | ||
1270 | |||
1271 | /* encode 32bpp rgb + a into 16bpp rgb, losing alpha */ | ||
1272 | static int copy_opaque_16(void *dst, Uint32 *src, int n, | ||
1273 | SDL_PixelFormat *sfmt, SDL_PixelFormat *dfmt) | ||
1274 | { | ||
1275 | int i; | ||
1276 | Uint16 *d = dst; | ||
1277 | for(i = 0; i < n; i++) { | ||
1278 | unsigned r, g, b; | ||
1279 | RGB_FROM_PIXEL(*src, sfmt, r, g, b); | ||
1280 | PIXEL_FROM_RGB(*d, dfmt, r, g, b); | ||
1281 | src++; | ||
1282 | d++; | ||
1283 | } | ||
1284 | return n * 2; | ||
1285 | } | ||
1286 | |||
1287 | /* decode opaque pixels from 16bpp to 32bpp rgb + a */ | ||
1288 | static int uncopy_opaque_16(Uint32 *dst, void *src, int n, | ||
1289 | RLEDestFormat *sfmt, SDL_PixelFormat *dfmt) | ||
1290 | { | ||
1291 | int i; | ||
1292 | Uint16 *s = src; | ||
1293 | unsigned alpha = dfmt->Amask ? 255 : 0; | ||
1294 | for(i = 0; i < n; i++) { | ||
1295 | unsigned r, g, b; | ||
1296 | RGB_FROM_PIXEL(*s, sfmt, r, g, b); | ||
1297 | PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, alpha); | ||
1298 | s++; | ||
1299 | dst++; | ||
1300 | } | ||
1301 | return n * 2; | ||
1302 | } | ||
1303 | |||
1304 | |||
1305 | |||
1306 | /* encode 32bpp rgb + a into 32bpp G0RAB format for blitting into 565 */ | ||
1307 | static int copy_transl_565(void *dst, Uint32 *src, int n, | ||
1308 | SDL_PixelFormat *sfmt, SDL_PixelFormat *dfmt) | ||
1309 | { | ||
1310 | int i; | ||
1311 | Uint32 *d = dst; | ||
1312 | for(i = 0; i < n; i++) { | ||
1313 | unsigned r, g, b, a; | ||
1314 | Uint16 pix; | ||
1315 | RGBA_FROM_8888(*src, sfmt, r, g, b, a); | ||
1316 | PIXEL_FROM_RGB(pix, dfmt, r, g, b); | ||
1317 | *d = ((pix & 0x7e0) << 16) | (pix & 0xf81f) | ((a << 2) & 0x7e0); | ||
1318 | src++; | ||
1319 | d++; | ||
1320 | } | ||
1321 | return n * 4; | ||
1322 | } | ||
1323 | |||
1324 | /* encode 32bpp rgb + a into 32bpp G0RAB format for blitting into 555 */ | ||
1325 | static int copy_transl_555(void *dst, Uint32 *src, int n, | ||
1326 | SDL_PixelFormat *sfmt, SDL_PixelFormat *dfmt) | ||
1327 | { | ||
1328 | int i; | ||
1329 | Uint32 *d = dst; | ||
1330 | for(i = 0; i < n; i++) { | ||
1331 | unsigned r, g, b, a; | ||
1332 | Uint16 pix; | ||
1333 | RGBA_FROM_8888(*src, sfmt, r, g, b, a); | ||
1334 | PIXEL_FROM_RGB(pix, dfmt, r, g, b); | ||
1335 | *d = ((pix & 0x3e0) << 16) | (pix & 0xfc1f) | ((a << 2) & 0x3e0); | ||
1336 | src++; | ||
1337 | d++; | ||
1338 | } | ||
1339 | return n * 4; | ||
1340 | } | ||
1341 | |||
1342 | /* decode translucent pixels from 32bpp GORAB to 32bpp rgb + a */ | ||
1343 | static int uncopy_transl_16(Uint32 *dst, void *src, int n, | ||
1344 | RLEDestFormat *sfmt, SDL_PixelFormat *dfmt) | ||
1345 | { | ||
1346 | int i; | ||
1347 | Uint32 *s = src; | ||
1348 | for(i = 0; i < n; i++) { | ||
1349 | unsigned r, g, b, a; | ||
1350 | Uint32 pix = *s++; | ||
1351 | a = (pix & 0x3e0) >> 2; | ||
1352 | pix = (pix & ~0x3e0) | pix >> 16; | ||
1353 | RGB_FROM_PIXEL(pix, sfmt, r, g, b); | ||
1354 | PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, a); | ||
1355 | dst++; | ||
1356 | } | ||
1357 | return n * 4; | ||
1358 | } | ||
1359 | |||
1360 | /* encode 32bpp rgba into 32bpp rgba, keeping alpha (dual purpose) */ | ||
1361 | static int copy_32(void *dst, Uint32 *src, int n, | ||
1362 | SDL_PixelFormat *sfmt, SDL_PixelFormat *dfmt) | ||
1363 | { | ||
1364 | int i; | ||
1365 | Uint32 *d = dst; | ||
1366 | for(i = 0; i < n; i++) { | ||
1367 | unsigned r, g, b, a; | ||
1368 | Uint32 pixel; | ||
1369 | RGBA_FROM_8888(*src, sfmt, r, g, b, a); | ||
1370 | PIXEL_FROM_RGB(pixel, dfmt, r, g, b); | ||
1371 | *d++ = pixel | a << 24; | ||
1372 | src++; | ||
1373 | } | ||
1374 | return n * 4; | ||
1375 | } | ||
1376 | |||
1377 | /* decode 32bpp rgba into 32bpp rgba, keeping alpha (dual purpose) */ | ||
1378 | static int uncopy_32(Uint32 *dst, void *src, int n, | ||
1379 | RLEDestFormat *sfmt, SDL_PixelFormat *dfmt) | ||
1380 | { | ||
1381 | int i; | ||
1382 | Uint32 *s = src; | ||
1383 | for(i = 0; i < n; i++) { | ||
1384 | unsigned r, g, b, a; | ||
1385 | Uint32 pixel = *s++; | ||
1386 | RGB_FROM_PIXEL(pixel, sfmt, r, g, b); | ||
1387 | a = pixel >> 24; | ||
1388 | PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, a); | ||
1389 | dst++; | ||
1390 | } | ||
1391 | return n * 4; | ||
1392 | } | ||
1393 | |||
1394 | #define ISOPAQUE(pixel, fmt) ((((pixel) & fmt->Amask) >> fmt->Ashift) == 255) | ||
1395 | |||
1396 | #define ISTRANSL(pixel, fmt) \ | ||
1397 | ((unsigned)((((pixel) & fmt->Amask) >> fmt->Ashift) - 1U) < 254U) | ||
1398 | |||
1399 | /* convert surface to be quickly alpha-blittable onto dest, if possible */ | ||
1400 | static int RLEAlphaSurface(SDL_Surface *surface) | ||
1401 | { | ||
1402 | SDL_Surface *dest; | ||
1403 | SDL_PixelFormat *df; | ||
1404 | int maxsize = 0; | ||
1405 | int max_opaque_run; | ||
1406 | int max_transl_run = 65535; | ||
1407 | unsigned masksum; | ||
1408 | Uint8 *rlebuf, *dst; | ||
1409 | int (*copy_opaque)(void *, Uint32 *, int, | ||
1410 | SDL_PixelFormat *, SDL_PixelFormat *); | ||
1411 | int (*copy_transl)(void *, Uint32 *, int, | ||
1412 | SDL_PixelFormat *, SDL_PixelFormat *); | ||
1413 | |||
1414 | dest = surface->map->dst; | ||
1415 | if(!dest) | ||
1416 | return -1; | ||
1417 | df = dest->format; | ||
1418 | if(surface->format->BitsPerPixel != 32) | ||
1419 | return -1; /* only 32bpp source supported */ | ||
1420 | |||
1421 | /* find out whether the destination is one we support, | ||
1422 | and determine the max size of the encoded result */ | ||
1423 | masksum = df->Rmask | df->Gmask | df->Bmask; | ||
1424 | switch(df->BytesPerPixel) { | ||
1425 | case 2: | ||
1426 | /* 16bpp: only support 565 and 555 formats */ | ||
1427 | switch(masksum) { | ||
1428 | case 0xffff: | ||
1429 | if(df->Gmask == 0x07e0 | ||
1430 | || df->Rmask == 0x07e0 || df->Bmask == 0x07e0) { | ||
1431 | copy_opaque = copy_opaque_16; | ||
1432 | copy_transl = copy_transl_565; | ||
1433 | } else | ||
1434 | return -1; | ||
1435 | break; | ||
1436 | case 0x7fff: | ||
1437 | if(df->Gmask == 0x03e0 | ||
1438 | || df->Rmask == 0x03e0 || df->Bmask == 0x03e0) { | ||
1439 | copy_opaque = copy_opaque_16; | ||
1440 | copy_transl = copy_transl_555; | ||
1441 | } else | ||
1442 | return -1; | ||
1443 | break; | ||
1444 | default: | ||
1445 | return -1; | ||
1446 | } | ||
1447 | max_opaque_run = 255; /* runs stored as bytes */ | ||
1448 | |||
1449 | /* worst case is alternating opaque and translucent pixels, | ||
1450 | with room for alignment padding between lines */ | ||
1451 | maxsize = surface->h * (2 + (4 + 2) * (surface->w + 1)) + 2; | ||
1452 | break; | ||
1453 | case 4: | ||
1454 | if(masksum != 0x00ffffff) | ||
1455 | return -1; /* requires unused high byte */ | ||
1456 | copy_opaque = copy_32; | ||
1457 | copy_transl = copy_32; | ||
1458 | max_opaque_run = 255; /* runs stored as short ints */ | ||
1459 | |||
1460 | /* worst case is alternating opaque and translucent pixels */ | ||
1461 | maxsize = surface->h * 2 * 4 * (surface->w + 1) + 4; | ||
1462 | break; | ||
1463 | default: | ||
1464 | return -1; /* anything else unsupported right now */ | ||
1465 | } | ||
1466 | |||
1467 | maxsize += sizeof(RLEDestFormat); | ||
1468 | rlebuf = (Uint8 *)SDL_malloc(maxsize); | ||
1469 | if(!rlebuf) { | ||
1470 | SDL_OutOfMemory(); | ||
1471 | return -1; | ||
1472 | } | ||
1473 | { | ||
1474 | /* save the destination format so we can undo the encoding later */ | ||
1475 | RLEDestFormat *r = (RLEDestFormat *)rlebuf; | ||
1476 | r->BytesPerPixel = df->BytesPerPixel; | ||
1477 | r->Rloss = df->Rloss; | ||
1478 | r->Gloss = df->Gloss; | ||
1479 | r->Bloss = df->Bloss; | ||
1480 | r->Rshift = df->Rshift; | ||
1481 | r->Gshift = df->Gshift; | ||
1482 | r->Bshift = df->Bshift; | ||
1483 | r->Ashift = df->Ashift; | ||
1484 | r->Rmask = df->Rmask; | ||
1485 | r->Gmask = df->Gmask; | ||
1486 | r->Bmask = df->Bmask; | ||
1487 | r->Amask = df->Amask; | ||
1488 | } | ||
1489 | dst = rlebuf + sizeof(RLEDestFormat); | ||
1490 | |||
1491 | /* Do the actual encoding */ | ||
1492 | { | ||
1493 | int x, y; | ||
1494 | int h = surface->h, w = surface->w; | ||
1495 | SDL_PixelFormat *sf = surface->format; | ||
1496 | Uint32 *src = (Uint32 *)surface->pixels; | ||
1497 | Uint8 *lastline = dst; /* end of last non-blank line */ | ||
1498 | |||
1499 | /* opaque counts are 8 or 16 bits, depending on target depth */ | ||
1500 | #define ADD_OPAQUE_COUNTS(n, m) \ | ||
1501 | if(df->BytesPerPixel == 4) { \ | ||
1502 | ((Uint16 *)dst)[0] = n; \ | ||
1503 | ((Uint16 *)dst)[1] = m; \ | ||
1504 | dst += 4; \ | ||
1505 | } else { \ | ||
1506 | dst[0] = n; \ | ||
1507 | dst[1] = m; \ | ||
1508 | dst += 2; \ | ||
1509 | } | ||
1510 | |||
1511 | /* translucent counts are always 16 bit */ | ||
1512 | #define ADD_TRANSL_COUNTS(n, m) \ | ||
1513 | (((Uint16 *)dst)[0] = n, ((Uint16 *)dst)[1] = m, dst += 4) | ||
1514 | |||
1515 | for(y = 0; y < h; y++) { | ||
1516 | int runstart, skipstart; | ||
1517 | int blankline = 0; | ||
1518 | /* First encode all opaque pixels of a scan line */ | ||
1519 | x = 0; | ||
1520 | do { | ||
1521 | int run, skip, len; | ||
1522 | skipstart = x; | ||
1523 | while(x < w && !ISOPAQUE(src[x], sf)) | ||
1524 | x++; | ||
1525 | runstart = x; | ||
1526 | while(x < w && ISOPAQUE(src[x], sf)) | ||
1527 | x++; | ||
1528 | skip = runstart - skipstart; | ||
1529 | if(skip == w) | ||
1530 | blankline = 1; | ||
1531 | run = x - runstart; | ||
1532 | while(skip > max_opaque_run) { | ||
1533 | ADD_OPAQUE_COUNTS(max_opaque_run, 0); | ||
1534 | skip -= max_opaque_run; | ||
1535 | } | ||
1536 | len = MIN(run, max_opaque_run); | ||
1537 | ADD_OPAQUE_COUNTS(skip, len); | ||
1538 | dst += copy_opaque(dst, src + runstart, len, sf, df); | ||
1539 | runstart += len; | ||
1540 | run -= len; | ||
1541 | while(run) { | ||
1542 | len = MIN(run, max_opaque_run); | ||
1543 | ADD_OPAQUE_COUNTS(0, len); | ||
1544 | dst += copy_opaque(dst, src + runstart, len, sf, df); | ||
1545 | runstart += len; | ||
1546 | run -= len; | ||
1547 | } | ||
1548 | } while(x < w); | ||
1549 | |||
1550 | /* Make sure the next output address is 32-bit aligned */ | ||
1551 | dst += (uintptr_t)dst & 2; | ||
1552 | |||
1553 | /* Next, encode all translucent pixels of the same scan line */ | ||
1554 | x = 0; | ||
1555 | do { | ||
1556 | int run, skip, len; | ||
1557 | skipstart = x; | ||
1558 | while(x < w && !ISTRANSL(src[x], sf)) | ||
1559 | x++; | ||
1560 | runstart = x; | ||
1561 | while(x < w && ISTRANSL(src[x], sf)) | ||
1562 | x++; | ||
1563 | skip = runstart - skipstart; | ||
1564 | blankline &= (skip == w); | ||
1565 | run = x - runstart; | ||
1566 | while(skip > max_transl_run) { | ||
1567 | ADD_TRANSL_COUNTS(max_transl_run, 0); | ||
1568 | skip -= max_transl_run; | ||
1569 | } | ||
1570 | len = MIN(run, max_transl_run); | ||
1571 | ADD_TRANSL_COUNTS(skip, len); | ||
1572 | dst += copy_transl(dst, src + runstart, len, sf, df); | ||
1573 | runstart += len; | ||
1574 | run -= len; | ||
1575 | while(run) { | ||
1576 | len = MIN(run, max_transl_run); | ||
1577 | ADD_TRANSL_COUNTS(0, len); | ||
1578 | dst += copy_transl(dst, src + runstart, len, sf, df); | ||
1579 | runstart += len; | ||
1580 | run -= len; | ||
1581 | } | ||
1582 | if(!blankline) | ||
1583 | lastline = dst; | ||
1584 | } while(x < w); | ||
1585 | |||
1586 | src += surface->pitch >> 2; | ||
1587 | } | ||
1588 | dst = lastline; /* back up past trailing blank lines */ | ||
1589 | ADD_OPAQUE_COUNTS(0, 0); | ||
1590 | } | ||
1591 | |||
1592 | #undef ADD_OPAQUE_COUNTS | ||
1593 | #undef ADD_TRANSL_COUNTS | ||
1594 | |||
1595 | /* Now that we have it encoded, release the original pixels */ | ||
1596 | if((surface->flags & SDL_PREALLOC) != SDL_PREALLOC | ||
1597 | && (surface->flags & SDL_HWSURFACE) != SDL_HWSURFACE) { | ||
1598 | SDL_free( surface->pixels ); | ||
1599 | surface->pixels = NULL; | ||
1600 | } | ||
1601 | |||
1602 | /* realloc the buffer to release unused memory */ | ||
1603 | { | ||
1604 | Uint8 *p = SDL_realloc(rlebuf, dst - rlebuf); | ||
1605 | if(!p) | ||
1606 | p = rlebuf; | ||
1607 | surface->map->sw_data->aux_data = p; | ||
1608 | } | ||
1609 | |||
1610 | return 0; | ||
1611 | } | ||
1612 | |||
1613 | static Uint32 getpix_8(Uint8 *srcbuf) | ||
1614 | { | ||
1615 | return *srcbuf; | ||
1616 | } | ||
1617 | |||
1618 | static Uint32 getpix_16(Uint8 *srcbuf) | ||
1619 | { | ||
1620 | return *(Uint16 *)srcbuf; | ||
1621 | } | ||
1622 | |||
1623 | static Uint32 getpix_24(Uint8 *srcbuf) | ||
1624 | { | ||
1625 | #if SDL_BYTEORDER == SDL_LIL_ENDIAN | ||
1626 | return srcbuf[0] + (srcbuf[1] << 8) + (srcbuf[2] << 16); | ||
1627 | #else | ||
1628 | return (srcbuf[0] << 16) + (srcbuf[1] << 8) + srcbuf[2]; | ||
1629 | #endif | ||
1630 | } | ||
1631 | |||
1632 | static Uint32 getpix_32(Uint8 *srcbuf) | ||
1633 | { | ||
1634 | return *(Uint32 *)srcbuf; | ||
1635 | } | ||
1636 | |||
1637 | typedef Uint32 (*getpix_func)(Uint8 *); | ||
1638 | |||
1639 | static getpix_func getpixes[4] = { | ||
1640 | getpix_8, getpix_16, getpix_24, getpix_32 | ||
1641 | }; | ||
1642 | |||
1643 | static int RLEColorkeySurface(SDL_Surface *surface) | ||
1644 | { | ||
1645 | Uint8 *rlebuf, *dst; | ||
1646 | int maxn; | ||
1647 | int y; | ||
1648 | Uint8 *srcbuf, *lastline; | ||
1649 | int maxsize = 0; | ||
1650 | int bpp = surface->format->BytesPerPixel; | ||
1651 | getpix_func getpix; | ||
1652 | Uint32 ckey, rgbmask; | ||
1653 | int w, h; | ||
1654 | |||
1655 | /* calculate the worst case size for the compressed surface */ | ||
1656 | switch(bpp) { | ||
1657 | case 1: | ||
1658 | /* worst case is alternating opaque and transparent pixels, | ||
1659 | starting with an opaque pixel */ | ||
1660 | maxsize = surface->h * 3 * (surface->w / 2 + 1) + 2; | ||
1661 | break; | ||
1662 | case 2: | ||
1663 | case 3: | ||
1664 | /* worst case is solid runs, at most 255 pixels wide */ | ||
1665 | maxsize = surface->h * (2 * (surface->w / 255 + 1) | ||
1666 | + surface->w * bpp) + 2; | ||
1667 | break; | ||
1668 | case 4: | ||
1669 | /* worst case is solid runs, at most 65535 pixels wide */ | ||
1670 | maxsize = surface->h * (4 * (surface->w / 65535 + 1) | ||
1671 | + surface->w * 4) + 4; | ||
1672 | break; | ||
1673 | } | ||
1674 | |||
1675 | rlebuf = (Uint8 *)SDL_malloc(maxsize); | ||
1676 | if ( rlebuf == NULL ) { | ||
1677 | SDL_OutOfMemory(); | ||
1678 | return(-1); | ||
1679 | } | ||
1680 | |||
1681 | /* Set up the conversion */ | ||
1682 | srcbuf = (Uint8 *)surface->pixels; | ||
1683 | maxn = bpp == 4 ? 65535 : 255; | ||
1684 | dst = rlebuf; | ||
1685 | rgbmask = ~surface->format->Amask; | ||
1686 | ckey = surface->format->colorkey & rgbmask; | ||
1687 | lastline = dst; | ||
1688 | getpix = getpixes[bpp - 1]; | ||
1689 | w = surface->w; | ||
1690 | h = surface->h; | ||
1691 | |||
1692 | #define ADD_COUNTS(n, m) \ | ||
1693 | if(bpp == 4) { \ | ||
1694 | ((Uint16 *)dst)[0] = n; \ | ||
1695 | ((Uint16 *)dst)[1] = m; \ | ||
1696 | dst += 4; \ | ||
1697 | } else { \ | ||
1698 | dst[0] = n; \ | ||
1699 | dst[1] = m; \ | ||
1700 | dst += 2; \ | ||
1701 | } | ||
1702 | |||
1703 | for(y = 0; y < h; y++) { | ||
1704 | int x = 0; | ||
1705 | int blankline = 0; | ||
1706 | do { | ||
1707 | int run, skip, len; | ||
1708 | int runstart; | ||
1709 | int skipstart = x; | ||
1710 | |||
1711 | /* find run of transparent, then opaque pixels */ | ||
1712 | while(x < w && (getpix(srcbuf + x * bpp) & rgbmask) == ckey) | ||
1713 | x++; | ||
1714 | runstart = x; | ||
1715 | while(x < w && (getpix(srcbuf + x * bpp) & rgbmask) != ckey) | ||
1716 | x++; | ||
1717 | skip = runstart - skipstart; | ||
1718 | if(skip == w) | ||
1719 | blankline = 1; | ||
1720 | run = x - runstart; | ||
1721 | |||
1722 | /* encode segment */ | ||
1723 | while(skip > maxn) { | ||
1724 | ADD_COUNTS(maxn, 0); | ||
1725 | skip -= maxn; | ||
1726 | } | ||
1727 | len = MIN(run, maxn); | ||
1728 | ADD_COUNTS(skip, len); | ||
1729 | SDL_memcpy(dst, srcbuf + runstart * bpp, len * bpp); | ||
1730 | dst += len * bpp; | ||
1731 | run -= len; | ||
1732 | runstart += len; | ||
1733 | while(run) { | ||
1734 | len = MIN(run, maxn); | ||
1735 | ADD_COUNTS(0, len); | ||
1736 | SDL_memcpy(dst, srcbuf + runstart * bpp, len * bpp); | ||
1737 | dst += len * bpp; | ||
1738 | runstart += len; | ||
1739 | run -= len; | ||
1740 | } | ||
1741 | if(!blankline) | ||
1742 | lastline = dst; | ||
1743 | } while(x < w); | ||
1744 | |||
1745 | srcbuf += surface->pitch; | ||
1746 | } | ||
1747 | dst = lastline; /* back up bast trailing blank lines */ | ||
1748 | ADD_COUNTS(0, 0); | ||
1749 | |||
1750 | #undef ADD_COUNTS | ||
1751 | |||
1752 | /* Now that we have it encoded, release the original pixels */ | ||
1753 | if((surface->flags & SDL_PREALLOC) != SDL_PREALLOC | ||
1754 | && (surface->flags & SDL_HWSURFACE) != SDL_HWSURFACE) { | ||
1755 | SDL_free( surface->pixels ); | ||
1756 | surface->pixels = NULL; | ||
1757 | } | ||
1758 | |||
1759 | /* realloc the buffer to release unused memory */ | ||
1760 | { | ||
1761 | /* If realloc returns NULL, the original block is left intact */ | ||
1762 | Uint8 *p = SDL_realloc(rlebuf, dst - rlebuf); | ||
1763 | if(!p) | ||
1764 | p = rlebuf; | ||
1765 | surface->map->sw_data->aux_data = p; | ||
1766 | } | ||
1767 | |||
1768 | return(0); | ||
1769 | } | ||
1770 | |||
1771 | int SDL_RLESurface(SDL_Surface *surface) | ||
1772 | { | ||
1773 | int retcode; | ||
1774 | |||
1775 | /* Clear any previous RLE conversion */ | ||
1776 | if ( (surface->flags & SDL_RLEACCEL) == SDL_RLEACCEL ) { | ||
1777 | SDL_UnRLESurface(surface, 1); | ||
1778 | } | ||
1779 | |||
1780 | /* We don't support RLE encoding of bitmaps */ | ||
1781 | if ( surface->format->BitsPerPixel < 8 ) { | ||
1782 | return(-1); | ||
1783 | } | ||
1784 | |||
1785 | /* Lock the surface if it's in hardware */ | ||
1786 | if ( SDL_MUSTLOCK(surface) ) { | ||
1787 | if ( SDL_LockSurface(surface) < 0 ) { | ||
1788 | return(-1); | ||
1789 | } | ||
1790 | } | ||
1791 | |||
1792 | /* Encode */ | ||
1793 | if((surface->flags & SDL_SRCCOLORKEY) == SDL_SRCCOLORKEY) { | ||
1794 | retcode = RLEColorkeySurface(surface); | ||
1795 | } else { | ||
1796 | if((surface->flags & SDL_SRCALPHA) == SDL_SRCALPHA | ||
1797 | && surface->format->Amask != 0) | ||
1798 | retcode = RLEAlphaSurface(surface); | ||
1799 | else | ||
1800 | retcode = -1; /* no RLE for per-surface alpha sans ckey */ | ||
1801 | } | ||
1802 | |||
1803 | /* Unlock the surface if it's in hardware */ | ||
1804 | if ( SDL_MUSTLOCK(surface) ) { | ||
1805 | SDL_UnlockSurface(surface); | ||
1806 | } | ||
1807 | |||
1808 | if(retcode < 0) | ||
1809 | return -1; | ||
1810 | |||
1811 | /* The surface is now accelerated */ | ||
1812 | surface->flags |= SDL_RLEACCEL; | ||
1813 | |||
1814 | return(0); | ||
1815 | } | ||
1816 | |||
1817 | /* | ||
1818 | * Un-RLE a surface with pixel alpha | ||
1819 | * This may not give back exactly the image before RLE-encoding; all | ||
1820 | * completely transparent pixels will be lost, and colour and alpha depth | ||
1821 | * may have been reduced (when encoding for 16bpp targets). | ||
1822 | */ | ||
1823 | static SDL_bool UnRLEAlpha(SDL_Surface *surface) | ||
1824 | { | ||
1825 | Uint8 *srcbuf; | ||
1826 | Uint32 *dst; | ||
1827 | SDL_PixelFormat *sf = surface->format; | ||
1828 | RLEDestFormat *df = surface->map->sw_data->aux_data; | ||
1829 | int (*uncopy_opaque)(Uint32 *, void *, int, | ||
1830 | RLEDestFormat *, SDL_PixelFormat *); | ||
1831 | int (*uncopy_transl)(Uint32 *, void *, int, | ||
1832 | RLEDestFormat *, SDL_PixelFormat *); | ||
1833 | int w = surface->w; | ||
1834 | int bpp = df->BytesPerPixel; | ||
1835 | |||
1836 | if(bpp == 2) { | ||
1837 | uncopy_opaque = uncopy_opaque_16; | ||
1838 | uncopy_transl = uncopy_transl_16; | ||
1839 | } else { | ||
1840 | uncopy_opaque = uncopy_transl = uncopy_32; | ||
1841 | } | ||
1842 | |||
1843 | surface->pixels = SDL_malloc(surface->h * surface->pitch); | ||
1844 | if ( !surface->pixels ) { | ||
1845 | return(SDL_FALSE); | ||
1846 | } | ||
1847 | /* fill background with transparent pixels */ | ||
1848 | SDL_memset(surface->pixels, 0, surface->h * surface->pitch); | ||
1849 | |||
1850 | dst = surface->pixels; | ||
1851 | srcbuf = (Uint8 *)(df + 1); | ||
1852 | for(;;) { | ||
1853 | /* copy opaque pixels */ | ||
1854 | int ofs = 0; | ||
1855 | do { | ||
1856 | unsigned run; | ||
1857 | if(bpp == 2) { | ||
1858 | ofs += srcbuf[0]; | ||
1859 | run = srcbuf[1]; | ||
1860 | srcbuf += 2; | ||
1861 | } else { | ||
1862 | ofs += ((Uint16 *)srcbuf)[0]; | ||
1863 | run = ((Uint16 *)srcbuf)[1]; | ||
1864 | srcbuf += 4; | ||
1865 | } | ||
1866 | if(run) { | ||
1867 | srcbuf += uncopy_opaque(dst + ofs, srcbuf, run, df, sf); | ||
1868 | ofs += run; | ||
1869 | } else if(!ofs) | ||
1870 | return(SDL_TRUE); | ||
1871 | } while(ofs < w); | ||
1872 | |||
1873 | /* skip padding if needed */ | ||
1874 | if(bpp == 2) | ||
1875 | srcbuf += (uintptr_t)srcbuf & 2; | ||
1876 | |||
1877 | /* copy translucent pixels */ | ||
1878 | ofs = 0; | ||
1879 | do { | ||
1880 | unsigned run; | ||
1881 | ofs += ((Uint16 *)srcbuf)[0]; | ||
1882 | run = ((Uint16 *)srcbuf)[1]; | ||
1883 | srcbuf += 4; | ||
1884 | if(run) { | ||
1885 | srcbuf += uncopy_transl(dst + ofs, srcbuf, run, df, sf); | ||
1886 | ofs += run; | ||
1887 | } | ||
1888 | } while(ofs < w); | ||
1889 | dst += surface->pitch >> 2; | ||
1890 | } | ||
1891 | /* Make the compiler happy */ | ||
1892 | return(SDL_TRUE); | ||
1893 | } | ||
1894 | |||
1895 | void SDL_UnRLESurface(SDL_Surface *surface, int recode) | ||
1896 | { | ||
1897 | if ( (surface->flags & SDL_RLEACCEL) == SDL_RLEACCEL ) { | ||
1898 | surface->flags &= ~SDL_RLEACCEL; | ||
1899 | |||
1900 | if(recode && (surface->flags & SDL_PREALLOC) != SDL_PREALLOC | ||
1901 | && (surface->flags & SDL_HWSURFACE) != SDL_HWSURFACE) { | ||
1902 | if((surface->flags & SDL_SRCCOLORKEY) == SDL_SRCCOLORKEY) { | ||
1903 | SDL_Rect full; | ||
1904 | unsigned alpha_flag; | ||
1905 | |||
1906 | /* re-create the original surface */ | ||
1907 | surface->pixels = SDL_malloc(surface->h * surface->pitch); | ||
1908 | if ( !surface->pixels ) { | ||
1909 | /* Oh crap... */ | ||
1910 | surface->flags |= SDL_RLEACCEL; | ||
1911 | return; | ||
1912 | } | ||
1913 | |||
1914 | /* fill it with the background colour */ | ||
1915 | SDL_FillRect(surface, NULL, surface->format->colorkey); | ||
1916 | |||
1917 | /* now render the encoded surface */ | ||
1918 | full.x = full.y = 0; | ||
1919 | full.w = surface->w; | ||
1920 | full.h = surface->h; | ||
1921 | alpha_flag = surface->flags & SDL_SRCALPHA; | ||
1922 | surface->flags &= ~SDL_SRCALPHA; /* opaque blit */ | ||
1923 | SDL_RLEBlit(surface, &full, surface, &full); | ||
1924 | surface->flags |= alpha_flag; | ||
1925 | } else { | ||
1926 | if ( !UnRLEAlpha(surface) ) { | ||
1927 | /* Oh crap... */ | ||
1928 | surface->flags |= SDL_RLEACCEL; | ||
1929 | return; | ||
1930 | } | ||
1931 | } | ||
1932 | } | ||
1933 | |||
1934 | if ( surface->map && surface->map->sw_data->aux_data ) { | ||
1935 | SDL_free(surface->map->sw_data->aux_data); | ||
1936 | surface->map->sw_data->aux_data = NULL; | ||
1937 | } | ||
1938 | } | ||
1939 | } | ||
1940 | |||
1941 | |||