diff options
Diffstat (limited to 'apps/plugins/sdl/src/hermes/x86p_32.asm')
-rw-r--r-- | apps/plugins/sdl/src/hermes/x86p_32.asm | 1044 |
1 files changed, 1044 insertions, 0 deletions
diff --git a/apps/plugins/sdl/src/hermes/x86p_32.asm b/apps/plugins/sdl/src/hermes/x86p_32.asm new file mode 100644 index 0000000000..2b478802d1 --- /dev/null +++ b/apps/plugins/sdl/src/hermes/x86p_32.asm | |||
@@ -0,0 +1,1044 @@ | |||
1 | ; | ||
2 | ; x86 format converters for HERMES | ||
3 | ; Some routines Copyright (c) 1998 Christian Nentwich (brn@eleet.mcb.at) | ||
4 | ; This source code is licensed under the GNU LGPL | ||
5 | ; | ||
6 | ; Please refer to the file COPYING.LIB contained in the distribution for | ||
7 | ; licensing conditions | ||
8 | ; | ||
9 | ; Most routines are (c) Glenn Fiedler (ptc@gaffer.org), used with permission | ||
10 | ; | ||
11 | |||
12 | BITS 32 | ||
13 | |||
14 | %include "common.inc" | ||
15 | |||
16 | SDL_FUNC _ConvertX86p32_32BGR888 | ||
17 | SDL_FUNC _ConvertX86p32_32RGBA888 | ||
18 | SDL_FUNC _ConvertX86p32_32BGRA888 | ||
19 | SDL_FUNC _ConvertX86p32_24RGB888 | ||
20 | SDL_FUNC _ConvertX86p32_24BGR888 | ||
21 | SDL_FUNC _ConvertX86p32_16RGB565 | ||
22 | SDL_FUNC _ConvertX86p32_16BGR565 | ||
23 | SDL_FUNC _ConvertX86p32_16RGB555 | ||
24 | SDL_FUNC _ConvertX86p32_16BGR555 | ||
25 | SDL_FUNC _ConvertX86p32_8RGB332 | ||
26 | |||
27 | SECTION .text | ||
28 | |||
29 | ;; _Convert_* | ||
30 | ;; Paramters: | ||
31 | ;; ESI = source | ||
32 | ;; EDI = dest | ||
33 | ;; ECX = amount (NOT 0!!! (the _ConvertX86 routine checks for that though)) | ||
34 | ;; Destroys: | ||
35 | ;; EAX, EBX, EDX | ||
36 | |||
37 | |||
38 | _ConvertX86p32_32BGR888: | ||
39 | |||
40 | ; check short | ||
41 | cmp ecx,BYTE 32 | ||
42 | ja .L3 | ||
43 | |||
44 | .L1: ; short loop | ||
45 | mov edx,[esi] | ||
46 | bswap edx | ||
47 | ror edx,8 | ||
48 | mov [edi],edx | ||
49 | add esi,BYTE 4 | ||
50 | add edi,BYTE 4 | ||
51 | dec ecx | ||
52 | jnz .L1 | ||
53 | .L2: | ||
54 | retn | ||
55 | |||
56 | .L3: ; save ebp | ||
57 | push ebp | ||
58 | |||
59 | ; unroll four times | ||
60 | mov ebp,ecx | ||
61 | shr ebp,2 | ||
62 | |||
63 | ; save count | ||
64 | push ecx | ||
65 | |||
66 | .L4: mov eax,[esi] | ||
67 | mov ebx,[esi+4] | ||
68 | |||
69 | bswap eax | ||
70 | |||
71 | bswap ebx | ||
72 | |||
73 | ror eax,8 | ||
74 | mov ecx,[esi+8] | ||
75 | |||
76 | ror ebx,8 | ||
77 | mov edx,[esi+12] | ||
78 | |||
79 | bswap ecx | ||
80 | |||
81 | bswap edx | ||
82 | |||
83 | ror ecx,8 | ||
84 | mov [edi+0],eax | ||
85 | |||
86 | ror edx,8 | ||
87 | mov [edi+4],ebx | ||
88 | |||
89 | mov [edi+8],ecx | ||
90 | mov [edi+12],edx | ||
91 | |||
92 | add esi,BYTE 16 | ||
93 | add edi,BYTE 16 | ||
94 | |||
95 | dec ebp | ||
96 | jnz .L4 | ||
97 | |||
98 | ; check tail | ||
99 | pop ecx | ||
100 | and ecx,BYTE 11b | ||
101 | jz .L6 | ||
102 | |||
103 | .L5: ; tail loop | ||
104 | mov edx,[esi] | ||
105 | bswap edx | ||
106 | ror edx,8 | ||
107 | mov [edi],edx | ||
108 | add esi,BYTE 4 | ||
109 | add edi,BYTE 4 | ||
110 | dec ecx | ||
111 | jnz .L5 | ||
112 | |||
113 | .L6: pop ebp | ||
114 | retn | ||
115 | |||
116 | |||
117 | |||
118 | |||
119 | _ConvertX86p32_32RGBA888: | ||
120 | |||
121 | ; check short | ||
122 | cmp ecx,BYTE 32 | ||
123 | ja .L3 | ||
124 | |||
125 | .L1: ; short loop | ||
126 | mov edx,[esi] | ||
127 | rol edx,8 | ||
128 | mov [edi],edx | ||
129 | add esi,BYTE 4 | ||
130 | add edi,BYTE 4 | ||
131 | dec ecx | ||
132 | jnz .L1 | ||
133 | .L2: | ||
134 | retn | ||
135 | |||
136 | .L3: ; save ebp | ||
137 | push ebp | ||
138 | |||
139 | ; unroll four times | ||
140 | mov ebp,ecx | ||
141 | shr ebp,2 | ||
142 | |||
143 | ; save count | ||
144 | push ecx | ||
145 | |||
146 | .L4: mov eax,[esi] | ||
147 | mov ebx,[esi+4] | ||
148 | |||
149 | rol eax,8 | ||
150 | mov ecx,[esi+8] | ||
151 | |||
152 | rol ebx,8 | ||
153 | mov edx,[esi+12] | ||
154 | |||
155 | rol ecx,8 | ||
156 | mov [edi+0],eax | ||
157 | |||
158 | rol edx,8 | ||
159 | mov [edi+4],ebx | ||
160 | |||
161 | mov [edi+8],ecx | ||
162 | mov [edi+12],edx | ||
163 | |||
164 | add esi,BYTE 16 | ||
165 | add edi,BYTE 16 | ||
166 | |||
167 | dec ebp | ||
168 | jnz .L4 | ||
169 | |||
170 | ; check tail | ||
171 | pop ecx | ||
172 | and ecx,BYTE 11b | ||
173 | jz .L6 | ||
174 | |||
175 | .L5: ; tail loop | ||
176 | mov edx,[esi] | ||
177 | rol edx,8 | ||
178 | mov [edi],edx | ||
179 | add esi,BYTE 4 | ||
180 | add edi,BYTE 4 | ||
181 | dec ecx | ||
182 | jnz .L5 | ||
183 | |||
184 | .L6: pop ebp | ||
185 | retn | ||
186 | |||
187 | |||
188 | |||
189 | |||
190 | _ConvertX86p32_32BGRA888: | ||
191 | |||
192 | ; check short | ||
193 | cmp ecx,BYTE 32 | ||
194 | ja .L3 | ||
195 | |||
196 | .L1: ; short loop | ||
197 | mov edx,[esi] | ||
198 | bswap edx | ||
199 | mov [edi],edx | ||
200 | add esi,BYTE 4 | ||
201 | add edi,BYTE 4 | ||
202 | dec ecx | ||
203 | jnz .L1 | ||
204 | .L2: | ||
205 | retn | ||
206 | |||
207 | .L3: ; save ebp | ||
208 | push ebp | ||
209 | |||
210 | ; unroll four times | ||
211 | mov ebp,ecx | ||
212 | shr ebp,2 | ||
213 | |||
214 | ; save count | ||
215 | push ecx | ||
216 | |||
217 | .L4: mov eax,[esi] | ||
218 | mov ebx,[esi+4] | ||
219 | |||
220 | mov ecx,[esi+8] | ||
221 | mov edx,[esi+12] | ||
222 | |||
223 | bswap eax | ||
224 | |||
225 | bswap ebx | ||
226 | |||
227 | bswap ecx | ||
228 | |||
229 | bswap edx | ||
230 | |||
231 | mov [edi+0],eax | ||
232 | mov [edi+4],ebx | ||
233 | |||
234 | mov [edi+8],ecx | ||
235 | mov [edi+12],edx | ||
236 | |||
237 | add esi,BYTE 16 | ||
238 | add edi,BYTE 16 | ||
239 | |||
240 | dec ebp | ||
241 | jnz .L4 | ||
242 | |||
243 | ; check tail | ||
244 | pop ecx | ||
245 | and ecx,BYTE 11b | ||
246 | jz .L6 | ||
247 | |||
248 | .L5: ; tail loop | ||
249 | mov edx,[esi] | ||
250 | bswap edx | ||
251 | mov [edi],edx | ||
252 | add esi,BYTE 4 | ||
253 | add edi,BYTE 4 | ||
254 | dec ecx | ||
255 | jnz .L5 | ||
256 | |||
257 | .L6: pop ebp | ||
258 | retn | ||
259 | |||
260 | |||
261 | |||
262 | |||
263 | ;; 32 bit RGB 888 to 24 BIT RGB 888 | ||
264 | |||
265 | _ConvertX86p32_24RGB888: | ||
266 | |||
267 | ; check short | ||
268 | cmp ecx,BYTE 32 | ||
269 | ja .L3 | ||
270 | |||
271 | .L1: ; short loop | ||
272 | mov al,[esi] | ||
273 | mov bl,[esi+1] | ||
274 | mov dl,[esi+2] | ||
275 | mov [edi],al | ||
276 | mov [edi+1],bl | ||
277 | mov [edi+2],dl | ||
278 | add esi,BYTE 4 | ||
279 | add edi,BYTE 3 | ||
280 | dec ecx | ||
281 | jnz .L1 | ||
282 | .L2: | ||
283 | retn | ||
284 | |||
285 | .L3: ; head | ||
286 | mov edx,edi | ||
287 | and edx,BYTE 11b | ||
288 | jz .L4 | ||
289 | mov al,[esi] | ||
290 | mov bl,[esi+1] | ||
291 | mov dl,[esi+2] | ||
292 | mov [edi],al | ||
293 | mov [edi+1],bl | ||
294 | mov [edi+2],dl | ||
295 | add esi,BYTE 4 | ||
296 | add edi,BYTE 3 | ||
297 | dec ecx | ||
298 | jmp SHORT .L3 | ||
299 | |||
300 | .L4: ; unroll 4 times | ||
301 | push ebp | ||
302 | mov ebp,ecx | ||
303 | shr ebp,2 | ||
304 | |||
305 | ; save count | ||
306 | push ecx | ||
307 | |||
308 | .L5: mov eax,[esi] ; first dword eax = [A][R][G][B] | ||
309 | mov ebx,[esi+4] ; second dword ebx = [a][r][g][b] | ||
310 | |||
311 | shl eax,8 ; eax = [R][G][B][.] | ||
312 | mov ecx,[esi+12] ; third dword ecx = [a][r][g][b] | ||
313 | |||
314 | shl ebx,8 ; ebx = [r][g][b][.] | ||
315 | mov al,[esi+4] ; eax = [R][G][B][b] | ||
316 | |||
317 | ror eax,8 ; eax = [b][R][G][B] (done) | ||
318 | mov bh,[esi+8+1] ; ebx = [r][g][G][.] | ||
319 | |||
320 | mov [edi],eax | ||
321 | add edi,BYTE 3*4 | ||
322 | |||
323 | shl ecx,8 ; ecx = [r][g][b][.] | ||
324 | mov bl,[esi+8+0] ; ebx = [r][g][G][B] | ||
325 | |||
326 | rol ebx,16 ; ebx = [G][B][r][g] (done) | ||
327 | mov cl,[esi+8+2] ; ecx = [r][g][b][R] (done) | ||
328 | |||
329 | mov [edi+4-3*4],ebx | ||
330 | add esi,BYTE 4*4 | ||
331 | |||
332 | mov [edi+8-3*4],ecx | ||
333 | dec ebp | ||
334 | |||
335 | jnz .L5 | ||
336 | |||
337 | ; check tail | ||
338 | pop ecx | ||
339 | and ecx,BYTE 11b | ||
340 | jz .L7 | ||
341 | |||
342 | .L6: ; tail loop | ||
343 | mov al,[esi] | ||
344 | mov bl,[esi+1] | ||
345 | mov dl,[esi+2] | ||
346 | mov [edi],al | ||
347 | mov [edi+1],bl | ||
348 | mov [edi+2],dl | ||
349 | add esi,BYTE 4 | ||
350 | add edi,BYTE 3 | ||
351 | dec ecx | ||
352 | jnz .L6 | ||
353 | |||
354 | .L7: pop ebp | ||
355 | retn | ||
356 | |||
357 | |||
358 | |||
359 | |||
360 | ;; 32 bit RGB 888 to 24 bit BGR 888 | ||
361 | |||
362 | _ConvertX86p32_24BGR888: | ||
363 | |||
364 | ; check short | ||
365 | cmp ecx,BYTE 32 | ||
366 | ja .L3 | ||
367 | |||
368 | .L1: ; short loop | ||
369 | mov dl,[esi] | ||
370 | mov bl,[esi+1] | ||
371 | mov al,[esi+2] | ||
372 | mov [edi],al | ||
373 | mov [edi+1],bl | ||
374 | mov [edi+2],dl | ||
375 | add esi,BYTE 4 | ||
376 | add edi,BYTE 3 | ||
377 | dec ecx | ||
378 | jnz .L1 | ||
379 | .L2: | ||
380 | retn | ||
381 | |||
382 | .L3: ; head | ||
383 | mov edx,edi | ||
384 | and edx,BYTE 11b | ||
385 | jz .L4 | ||
386 | mov dl,[esi] | ||
387 | mov bl,[esi+1] | ||
388 | mov al,[esi+2] | ||
389 | mov [edi],al | ||
390 | mov [edi+1],bl | ||
391 | mov [edi+2],dl | ||
392 | add esi,BYTE 4 | ||
393 | add edi,BYTE 3 | ||
394 | dec ecx | ||
395 | jmp SHORT .L3 | ||
396 | |||
397 | .L4: ; unroll 4 times | ||
398 | push ebp | ||
399 | mov ebp,ecx | ||
400 | shr ebp,2 | ||
401 | |||
402 | ; save count | ||
403 | push ecx | ||
404 | |||
405 | .L5: | ||
406 | mov eax,[esi] ; first dword eax = [A][R][G][B] | ||
407 | mov ebx,[esi+4] ; second dword ebx = [a][r][g][b] | ||
408 | |||
409 | bswap eax ; eax = [B][G][R][A] | ||
410 | |||
411 | bswap ebx ; ebx = [b][g][r][a] | ||
412 | |||
413 | mov al,[esi+4+2] ; eax = [B][G][R][r] | ||
414 | mov bh,[esi+4+4+1] ; ebx = [b][g][G][a] | ||
415 | |||
416 | ror eax,8 ; eax = [r][B][G][R] (done) | ||
417 | mov bl,[esi+4+4+2] ; ebx = [b][g][G][R] | ||
418 | |||
419 | ror ebx,16 ; ebx = [G][R][b][g] (done) | ||
420 | mov [edi],eax | ||
421 | |||
422 | mov [edi+4],ebx | ||
423 | mov ecx,[esi+12] ; third dword ecx = [a][r][g][b] | ||
424 | |||
425 | bswap ecx ; ecx = [b][g][r][a] | ||
426 | |||
427 | mov cl,[esi+8] ; ecx = [b][g][r][B] (done) | ||
428 | add esi,BYTE 4*4 | ||
429 | |||
430 | mov [edi+8],ecx | ||
431 | add edi,BYTE 3*4 | ||
432 | |||
433 | dec ebp | ||
434 | jnz .L5 | ||
435 | |||
436 | ; check tail | ||
437 | pop ecx | ||
438 | and ecx,BYTE 11b | ||
439 | jz .L7 | ||
440 | |||
441 | .L6: ; tail loop | ||
442 | mov dl,[esi] | ||
443 | mov bl,[esi+1] | ||
444 | mov al,[esi+2] | ||
445 | mov [edi],al | ||
446 | mov [edi+1],bl | ||
447 | mov [edi+2],dl | ||
448 | add esi,BYTE 4 | ||
449 | add edi,BYTE 3 | ||
450 | dec ecx | ||
451 | jnz .L6 | ||
452 | |||
453 | .L7: | ||
454 | pop ebp | ||
455 | retn | ||
456 | |||
457 | |||
458 | |||
459 | |||
460 | ;; 32 bit RGB 888 to 16 BIT RGB 565 | ||
461 | |||
462 | _ConvertX86p32_16RGB565: | ||
463 | ; check short | ||
464 | cmp ecx,BYTE 16 | ||
465 | ja .L3 | ||
466 | |||
467 | .L1: ; short loop | ||
468 | mov bl,[esi+0] ; blue | ||
469 | mov al,[esi+1] ; green | ||
470 | mov ah,[esi+2] ; red | ||
471 | shr ah,3 | ||
472 | and al,11111100b | ||
473 | shl eax,3 | ||
474 | shr bl,3 | ||
475 | add al,bl | ||
476 | mov [edi+0],al | ||
477 | mov [edi+1],ah | ||
478 | add esi,BYTE 4 | ||
479 | add edi,BYTE 2 | ||
480 | dec ecx | ||
481 | jnz .L1 | ||
482 | |||
483 | .L2: ; End of short loop | ||
484 | retn | ||
485 | |||
486 | |||
487 | .L3: ; head | ||
488 | mov ebx,edi | ||
489 | and ebx,BYTE 11b | ||
490 | jz .L4 | ||
491 | |||
492 | mov bl,[esi+0] ; blue | ||
493 | mov al,[esi+1] ; green | ||
494 | mov ah,[esi+2] ; red | ||
495 | shr ah,3 | ||
496 | and al,11111100b | ||
497 | shl eax,3 | ||
498 | shr bl,3 | ||
499 | add al,bl | ||
500 | mov [edi+0],al | ||
501 | mov [edi+1],ah | ||
502 | add esi,BYTE 4 | ||
503 | add edi,BYTE 2 | ||
504 | dec ecx | ||
505 | |||
506 | .L4: | ||
507 | ; save count | ||
508 | push ecx | ||
509 | |||
510 | ; unroll twice | ||
511 | shr ecx,1 | ||
512 | |||
513 | ; point arrays to end | ||
514 | lea esi,[esi+ecx*8] | ||
515 | lea edi,[edi+ecx*4] | ||
516 | |||
517 | ; negative counter | ||
518 | neg ecx | ||
519 | jmp SHORT .L6 | ||
520 | |||
521 | .L5: | ||
522 | mov [edi+ecx*4-4],eax | ||
523 | .L6: | ||
524 | mov eax,[esi+ecx*8] | ||
525 | |||
526 | shr ah,2 | ||
527 | mov ebx,[esi+ecx*8+4] | ||
528 | |||
529 | shr eax,3 | ||
530 | mov edx,[esi+ecx*8+4] | ||
531 | |||
532 | shr bh,2 | ||
533 | mov dl,[esi+ecx*8+2] | ||
534 | |||
535 | shl ebx,13 | ||
536 | and eax,000007FFh | ||
537 | |||
538 | shl edx,8 | ||
539 | and ebx,07FF0000h | ||
540 | |||
541 | and edx,0F800F800h | ||
542 | add eax,ebx | ||
543 | |||
544 | add eax,edx | ||
545 | inc ecx | ||
546 | |||
547 | jnz .L5 | ||
548 | |||
549 | mov [edi+ecx*4-4],eax | ||
550 | |||
551 | ; tail | ||
552 | pop ecx | ||
553 | test cl,1 | ||
554 | jz .L7 | ||
555 | |||
556 | mov bl,[esi+0] ; blue | ||
557 | mov al,[esi+1] ; green | ||
558 | mov ah,[esi+2] ; red | ||
559 | shr ah,3 | ||
560 | and al,11111100b | ||
561 | shl eax,3 | ||
562 | shr bl,3 | ||
563 | add al,bl | ||
564 | mov [edi+0],al | ||
565 | mov [edi+1],ah | ||
566 | add esi,BYTE 4 | ||
567 | add edi,BYTE 2 | ||
568 | |||
569 | .L7: | ||
570 | retn | ||
571 | |||
572 | |||
573 | |||
574 | |||
575 | ;; 32 bit RGB 888 to 16 BIT BGR 565 | ||
576 | |||
577 | _ConvertX86p32_16BGR565: | ||
578 | |||
579 | ; check short | ||
580 | cmp ecx,BYTE 16 | ||
581 | ja .L3 | ||
582 | |||
583 | .L1: ; short loop | ||
584 | mov ah,[esi+0] ; blue | ||
585 | mov al,[esi+1] ; green | ||
586 | mov bl,[esi+2] ; red | ||
587 | shr ah,3 | ||
588 | and al,11111100b | ||
589 | shl eax,3 | ||
590 | shr bl,3 | ||
591 | add al,bl | ||
592 | mov [edi+0],al | ||
593 | mov [edi+1],ah | ||
594 | add esi,BYTE 4 | ||
595 | add edi,BYTE 2 | ||
596 | dec ecx | ||
597 | jnz .L1 | ||
598 | .L2: | ||
599 | retn | ||
600 | |||
601 | .L3: ; head | ||
602 | mov ebx,edi | ||
603 | and ebx,BYTE 11b | ||
604 | jz .L4 | ||
605 | mov ah,[esi+0] ; blue | ||
606 | mov al,[esi+1] ; green | ||
607 | mov bl,[esi+2] ; red | ||
608 | shr ah,3 | ||
609 | and al,11111100b | ||
610 | shl eax,3 | ||
611 | shr bl,3 | ||
612 | add al,bl | ||
613 | mov [edi+0],al | ||
614 | mov [edi+1],ah | ||
615 | add esi,BYTE 4 | ||
616 | add edi,BYTE 2 | ||
617 | dec ecx | ||
618 | |||
619 | .L4: ; save count | ||
620 | push ecx | ||
621 | |||
622 | ; unroll twice | ||
623 | shr ecx,1 | ||
624 | |||
625 | ; point arrays to end | ||
626 | lea esi,[esi+ecx*8] | ||
627 | lea edi,[edi+ecx*4] | ||
628 | |||
629 | ; negative count | ||
630 | neg ecx | ||
631 | jmp SHORT .L6 | ||
632 | |||
633 | .L5: | ||
634 | mov [edi+ecx*4-4],eax | ||
635 | .L6: | ||
636 | mov edx,[esi+ecx*8+4] | ||
637 | |||
638 | mov bh,[esi+ecx*8+4] | ||
639 | mov ah,[esi+ecx*8] | ||
640 | |||
641 | shr bh,3 | ||
642 | mov al,[esi+ecx*8+1] | ||
643 | |||
644 | shr ah,3 | ||
645 | mov bl,[esi+ecx*8+5] | ||
646 | |||
647 | shl eax,3 | ||
648 | mov dl,[esi+ecx*8+2] | ||
649 | |||
650 | shl ebx,19 | ||
651 | and eax,0000FFE0h | ||
652 | |||
653 | shr edx,3 | ||
654 | and ebx,0FFE00000h | ||
655 | |||
656 | and edx,001F001Fh | ||
657 | add eax,ebx | ||
658 | |||
659 | add eax,edx | ||
660 | inc ecx | ||
661 | |||
662 | jnz .L5 | ||
663 | |||
664 | mov [edi+ecx*4-4],eax | ||
665 | |||
666 | ; tail | ||
667 | pop ecx | ||
668 | and ecx,BYTE 1 | ||
669 | jz .L7 | ||
670 | mov ah,[esi+0] ; blue | ||
671 | mov al,[esi+1] ; green | ||
672 | mov bl,[esi+2] ; red | ||
673 | shr ah,3 | ||
674 | and al,11111100b | ||
675 | shl eax,3 | ||
676 | shr bl,3 | ||
677 | add al,bl | ||
678 | mov [edi+0],al | ||
679 | mov [edi+1],ah | ||
680 | add esi,BYTE 4 | ||
681 | add edi,BYTE 2 | ||
682 | |||
683 | .L7: | ||
684 | retn | ||
685 | |||
686 | |||
687 | |||
688 | |||
689 | ;; 32 BIT RGB TO 16 BIT RGB 555 | ||
690 | |||
691 | _ConvertX86p32_16RGB555: | ||
692 | |||
693 | ; check short | ||
694 | cmp ecx,BYTE 16 | ||
695 | ja .L3 | ||
696 | |||
697 | .L1: ; short loop | ||
698 | mov bl,[esi+0] ; blue | ||
699 | mov al,[esi+1] ; green | ||
700 | mov ah,[esi+2] ; red | ||
701 | shr ah,3 | ||
702 | and al,11111000b | ||
703 | shl eax,2 | ||
704 | shr bl,3 | ||
705 | add al,bl | ||
706 | mov [edi+0],al | ||
707 | mov [edi+1],ah | ||
708 | add esi,BYTE 4 | ||
709 | add edi,BYTE 2 | ||
710 | dec ecx | ||
711 | jnz .L1 | ||
712 | .L2: | ||
713 | retn | ||
714 | |||
715 | .L3: ; head | ||
716 | mov ebx,edi | ||
717 | and ebx,BYTE 11b | ||
718 | jz .L4 | ||
719 | mov bl,[esi+0] ; blue | ||
720 | mov al,[esi+1] ; green | ||
721 | mov ah,[esi+2] ; red | ||
722 | shr ah,3 | ||
723 | and al,11111000b | ||
724 | shl eax,2 | ||
725 | shr bl,3 | ||
726 | add al,bl | ||
727 | mov [edi+0],al | ||
728 | mov [edi+1],ah | ||
729 | add esi,BYTE 4 | ||
730 | add edi,BYTE 2 | ||
731 | dec ecx | ||
732 | |||
733 | .L4: ; save count | ||
734 | push ecx | ||
735 | |||
736 | ; unroll twice | ||
737 | shr ecx,1 | ||
738 | |||
739 | ; point arrays to end | ||
740 | lea esi,[esi+ecx*8] | ||
741 | lea edi,[edi+ecx*4] | ||
742 | |||
743 | ; negative counter | ||
744 | neg ecx | ||
745 | jmp SHORT .L6 | ||
746 | |||
747 | .L5: | ||
748 | mov [edi+ecx*4-4],eax | ||
749 | .L6: | ||
750 | mov eax,[esi+ecx*8] | ||
751 | |||
752 | shr ah,3 | ||
753 | mov ebx,[esi+ecx*8+4] | ||
754 | |||
755 | shr eax,3 | ||
756 | mov edx,[esi+ecx*8+4] | ||
757 | |||
758 | shr bh,3 | ||
759 | mov dl,[esi+ecx*8+2] | ||
760 | |||
761 | shl ebx,13 | ||
762 | and eax,000007FFh | ||
763 | |||
764 | shl edx,7 | ||
765 | and ebx,07FF0000h | ||
766 | |||
767 | and edx,07C007C00h | ||
768 | add eax,ebx | ||
769 | |||
770 | add eax,edx | ||
771 | inc ecx | ||
772 | |||
773 | jnz .L5 | ||
774 | |||
775 | mov [edi+ecx*4-4],eax | ||
776 | |||
777 | ; tail | ||
778 | pop ecx | ||
779 | and ecx,BYTE 1 | ||
780 | jz .L7 | ||
781 | mov bl,[esi+0] ; blue | ||
782 | mov al,[esi+1] ; green | ||
783 | mov ah,[esi+2] ; red | ||
784 | shr ah,3 | ||
785 | and al,11111000b | ||
786 | shl eax,2 | ||
787 | shr bl,3 | ||
788 | add al,bl | ||
789 | mov [edi+0],al | ||
790 | mov [edi+1],ah | ||
791 | add esi,BYTE 4 | ||
792 | add edi,BYTE 2 | ||
793 | |||
794 | .L7: | ||
795 | retn | ||
796 | |||
797 | |||
798 | |||
799 | |||
800 | ;; 32 BIT RGB TO 16 BIT BGR 555 | ||
801 | |||
802 | _ConvertX86p32_16BGR555: | ||
803 | |||
804 | ; check short | ||
805 | cmp ecx,BYTE 16 | ||
806 | ja .L3 | ||
807 | |||
808 | |||
809 | .L1: ; short loop | ||
810 | mov ah,[esi+0] ; blue | ||
811 | mov al,[esi+1] ; green | ||
812 | mov bl,[esi+2] ; red | ||
813 | shr ah,3 | ||
814 | and al,11111000b | ||
815 | shl eax,2 | ||
816 | shr bl,3 | ||
817 | add al,bl | ||
818 | mov [edi+0],al | ||
819 | mov [edi+1],ah | ||
820 | add esi,BYTE 4 | ||
821 | add edi,BYTE 2 | ||
822 | dec ecx | ||
823 | jnz .L1 | ||
824 | .L2: | ||
825 | retn | ||
826 | |||
827 | .L3: ; head | ||
828 | mov ebx,edi | ||
829 | and ebx,BYTE 11b | ||
830 | jz .L4 | ||
831 | mov ah,[esi+0] ; blue | ||
832 | mov al,[esi+1] ; green | ||
833 | mov bl,[esi+2] ; red | ||
834 | shr ah,3 | ||
835 | and al,11111000b | ||
836 | shl eax,2 | ||
837 | shr bl,3 | ||
838 | add al,bl | ||
839 | mov [edi+0],al | ||
840 | mov [edi+1],ah | ||
841 | add esi,BYTE 4 | ||
842 | add edi,BYTE 2 | ||
843 | dec ecx | ||
844 | |||
845 | .L4: ; save count | ||
846 | push ecx | ||
847 | |||
848 | ; unroll twice | ||
849 | shr ecx,1 | ||
850 | |||
851 | ; point arrays to end | ||
852 | lea esi,[esi+ecx*8] | ||
853 | lea edi,[edi+ecx*4] | ||
854 | |||
855 | ; negative counter | ||
856 | neg ecx | ||
857 | jmp SHORT .L6 | ||
858 | |||
859 | .L5: | ||
860 | mov [edi+ecx*4-4],eax | ||
861 | .L6: | ||
862 | mov edx,[esi+ecx*8+4] | ||
863 | |||
864 | mov bh,[esi+ecx*8+4] | ||
865 | mov ah,[esi+ecx*8] | ||
866 | |||
867 | shr bh,3 | ||
868 | mov al,[esi+ecx*8+1] | ||
869 | |||
870 | shr ah,3 | ||
871 | mov bl,[esi+ecx*8+5] | ||
872 | |||
873 | shl eax,2 | ||
874 | mov dl,[esi+ecx*8+2] | ||
875 | |||
876 | shl ebx,18 | ||
877 | and eax,00007FE0h | ||
878 | |||
879 | shr edx,3 | ||
880 | and ebx,07FE00000h | ||
881 | |||
882 | and edx,001F001Fh | ||
883 | add eax,ebx | ||
884 | |||
885 | add eax,edx | ||
886 | inc ecx | ||
887 | |||
888 | jnz .L5 | ||
889 | |||
890 | mov [edi+ecx*4-4],eax | ||
891 | |||
892 | ; tail | ||
893 | pop ecx | ||
894 | and ecx,BYTE 1 | ||
895 | jz .L7 | ||
896 | mov ah,[esi+0] ; blue | ||
897 | mov al,[esi+1] ; green | ||
898 | mov bl,[esi+2] ; red | ||
899 | shr ah,3 | ||
900 | and al,11111000b | ||
901 | shl eax,2 | ||
902 | shr bl,3 | ||
903 | add al,bl | ||
904 | mov [edi+0],al | ||
905 | mov [edi+1],ah | ||
906 | add esi,BYTE 4 | ||
907 | add edi,BYTE 2 | ||
908 | |||
909 | .L7: | ||
910 | retn | ||
911 | |||
912 | |||
913 | |||
914 | |||
915 | |||
916 | ;; FROM 32 BIT RGB to 8 BIT RGB (rrrgggbbb) | ||
917 | ;; This routine writes FOUR pixels at once (dword) and then, if they exist | ||
918 | ;; the trailing three pixels | ||
919 | _ConvertX86p32_8RGB332: | ||
920 | |||
921 | |||
922 | .L_ALIGNED: | ||
923 | push ecx | ||
924 | |||
925 | shr ecx,2 ; We will draw 4 pixels at once | ||
926 | jnz .L1 | ||
927 | |||
928 | jmp .L2 ; short jump out of range :( | ||
929 | |||
930 | .L1: | ||
931 | mov eax,[esi] ; first pair of pixels | ||
932 | mov edx,[esi+4] | ||
933 | |||
934 | shr dl,6 | ||
935 | mov ebx,eax | ||
936 | |||
937 | shr al,6 | ||
938 | and ah,0e0h | ||
939 | |||
940 | shr ebx,16 | ||
941 | and dh,0e0h | ||
942 | |||
943 | shr ah,3 | ||
944 | and bl,0e0h | ||
945 | |||
946 | shr dh,3 | ||
947 | |||
948 | or al,bl | ||
949 | |||
950 | mov ebx,edx | ||
951 | or al,ah | ||
952 | |||
953 | shr ebx,16 | ||
954 | or dl,dh | ||
955 | |||
956 | and bl,0e0h | ||
957 | |||
958 | or dl,bl | ||
959 | |||
960 | mov ah,dl | ||
961 | |||
962 | |||
963 | |||
964 | mov ebx,[esi+8] ; second pair of pixels | ||
965 | |||
966 | mov edx,ebx | ||
967 | and bh,0e0h | ||
968 | |||
969 | shr bl,6 | ||
970 | and edx,0e00000h | ||
971 | |||
972 | shr edx,16 | ||
973 | |||
974 | shr bh,3 | ||
975 | |||
976 | ror eax,16 | ||
977 | or bl,dl | ||
978 | |||
979 | mov edx,[esi+12] | ||
980 | or bl,bh | ||
981 | |||
982 | mov al,bl | ||
983 | |||
984 | mov ebx,edx | ||
985 | and dh,0e0h | ||
986 | |||
987 | shr dl,6 | ||
988 | and ebx,0e00000h | ||
989 | |||
990 | shr dh,3 | ||
991 | mov ah,dl | ||
992 | |||
993 | shr ebx,16 | ||
994 | or ah,dh | ||
995 | |||
996 | or ah,bl | ||
997 | |||
998 | rol eax,16 | ||
999 | add esi,BYTE 16 | ||
1000 | |||
1001 | mov [edi],eax | ||
1002 | add edi,BYTE 4 | ||
1003 | |||
1004 | dec ecx | ||
1005 | jz .L2 ; L1 out of range for short jump :( | ||
1006 | |||
1007 | jmp .L1 | ||
1008 | .L2: | ||
1009 | |||
1010 | pop ecx | ||
1011 | and ecx,BYTE 3 ; mask out number of pixels to draw | ||
1012 | |||
1013 | jz .L4 ; Nothing to do anymore | ||
1014 | |||
1015 | .L3: | ||
1016 | mov eax,[esi] ; single pixel conversion for trailing pixels | ||
1017 | |||
1018 | mov ebx,eax | ||
1019 | |||
1020 | shr al,6 | ||
1021 | and ah,0e0h | ||
1022 | |||
1023 | shr ebx,16 | ||
1024 | |||
1025 | shr ah,3 | ||
1026 | and bl,0e0h | ||
1027 | |||
1028 | or al,ah | ||
1029 | or al,bl | ||
1030 | |||
1031 | mov [edi],al | ||
1032 | |||
1033 | inc edi | ||
1034 | add esi,BYTE 4 | ||
1035 | |||
1036 | dec ecx | ||
1037 | jnz .L3 | ||
1038 | |||
1039 | .L4: | ||
1040 | retn | ||
1041 | |||
1042 | %ifidn __OUTPUT_FORMAT__,elf32 | ||
1043 | section .note.GNU-stack noalloc noexec nowrite progbits | ||
1044 | %endif | ||