diff options
author | Franklin Wei <git@fwei.tk> | 2017-01-21 15:18:31 -0500 |
---|---|---|
committer | Franklin Wei <git@fwei.tk> | 2017-12-23 21:01:26 -0500 |
commit | a855d6202536ff28e5aae4f22a0f31d8f5b325d0 (patch) | |
tree | 8c75f224dd64ed360505afa8843d016b0d75000b /apps/plugins/sdl/src/video/ps3/spulibs/yuv2rgb_converter.c | |
parent | 01c6dcf6c7b9bb1ad2fa0450f99bacc5f3d3e04b (diff) | |
download | rockbox-a855d6202536ff28e5aae4f22a0f31d8f5b325d0.tar.gz rockbox-a855d6202536ff28e5aae4f22a0f31d8f5b325d0.zip |
Port of Duke Nukem 3D
This ports Fabien Sanglard's Chocolate Duke to run on a version of SDL
for Rockbox.
Change-Id: I8f2c4c78af19de10c1633ed7bb7a997b43256dd9
Diffstat (limited to 'apps/plugins/sdl/src/video/ps3/spulibs/yuv2rgb_converter.c')
-rw-r--r-- | apps/plugins/sdl/src/video/ps3/spulibs/yuv2rgb_converter.c | 629 |
1 files changed, 629 insertions, 0 deletions
diff --git a/apps/plugins/sdl/src/video/ps3/spulibs/yuv2rgb_converter.c b/apps/plugins/sdl/src/video/ps3/spulibs/yuv2rgb_converter.c new file mode 100644 index 0000000000..5e166914c5 --- /dev/null +++ b/apps/plugins/sdl/src/video/ps3/spulibs/yuv2rgb_converter.c | |||
@@ -0,0 +1,629 @@ | |||
1 | /* | ||
2 | * SDL - Simple DirectMedia Layer | ||
3 | * CELL BE Support for PS3 Framebuffer | ||
4 | * Copyright (C) 2008, 2009 International Business Machines Corporation | ||
5 | * | ||
6 | * This library is free software; you can redistribute it and/or modify it | ||
7 | * under the terms of the GNU Lesser General Public License as published | ||
8 | * by the Free Software Foundation; either version 2.1 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This library is distributed in the hope that it will be useful, but | ||
12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
14 | * Lesser General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU Lesser General Public | ||
17 | * License along with this library; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 | ||
19 | * USA | ||
20 | * | ||
21 | * Martin Lowinski <lowinski [at] de [dot] ibm [ibm] com> | ||
22 | * Dirk Herrendoerfer <d.herrendoerfer [at] de [dot] ibm [dot] com> | ||
23 | * SPE code based on research by: | ||
24 | * Rene Becker | ||
25 | * Thimo Emmerich | ||
26 | */ | ||
27 | |||
28 | #include "spu_common.h" | ||
29 | |||
30 | #include <spu_intrinsics.h> | ||
31 | #include <spu_mfcio.h> | ||
32 | |||
33 | // Debugging | ||
34 | //#define DEBUG | ||
35 | |||
36 | #ifdef DEBUG | ||
37 | #define deprintf(fmt, args... ) \ | ||
38 | fprintf( stdout, fmt, ##args ); \ | ||
39 | fflush( stdout ); | ||
40 | #else | ||
41 | #define deprintf( fmt, args... ) | ||
42 | #endif | ||
43 | |||
44 | struct yuv2rgb_parms_t parms_converter __attribute__((aligned(128))); | ||
45 | |||
46 | /* A maximum of 8 lines Y, therefore 4 lines V, 4 lines U are stored | ||
47 | * there might be the need to retrieve misaligned data, adjust | ||
48 | * incoming v and u plane to be able to handle this (add 128) | ||
49 | */ | ||
50 | unsigned char y_plane[2][(MAX_HDTV_WIDTH + 128) * 4] __attribute__((aligned(128))); | ||
51 | unsigned char v_plane[2][(MAX_HDTV_WIDTH + 128) * 2] __attribute__((aligned(128))); | ||
52 | unsigned char u_plane[2][(MAX_HDTV_WIDTH + 128) * 2] __attribute__((aligned(128))); | ||
53 | |||
54 | /* A maximum of 4 lines BGRA are stored, 4 byte per pixel */ | ||
55 | unsigned char bgra[4 * MAX_HDTV_WIDTH * 4] __attribute__((aligned(128))); | ||
56 | |||
57 | /* some vectors needed by the float to int conversion */ | ||
58 | static const vector float vec_255 = { 255.0f, 255.0f, 255.0f, 255.0f }; | ||
59 | static const vector float vec_0_1 = { 0.1f, 0.1f, 0.1f, 0.1f }; | ||
60 | |||
61 | void yuv_to_rgb_w16(); | ||
62 | void yuv_to_rgb_w32(); | ||
63 | |||
64 | void yuv_to_rgb_w16_line(unsigned char* y_addr, unsigned char* v_addr, unsigned char* u_addr, unsigned char* bgra_addr, unsigned int width); | ||
65 | void yuv_to_rgb_w32_line(unsigned char* y_addr, unsigned char* v_addr, unsigned char* u_addr, unsigned char* bgra_addr_, unsigned int width); | ||
66 | |||
67 | |||
68 | int main(unsigned long long spe_id __attribute__((unused)), unsigned long long argp __attribute__ ((unused))) | ||
69 | { | ||
70 | deprintf("[SPU] yuv2rgb_spu is up... (on SPE #%llu)\n", spe_id); | ||
71 | uint32_t ea_mfc, mbox; | ||
72 | // send ready message | ||
73 | spu_write_out_mbox(SPU_READY); | ||
74 | |||
75 | while (1) { | ||
76 | /* Check mailbox */ | ||
77 | mbox = spu_read_in_mbox(); | ||
78 | deprintf("[SPU] Message is %u\n", mbox); | ||
79 | switch (mbox) { | ||
80 | case SPU_EXIT: | ||
81 | deprintf("[SPU] fb_writer goes down...\n"); | ||
82 | return 0; | ||
83 | case SPU_START: | ||
84 | break; | ||
85 | default: | ||
86 | deprintf("[SPU] Cannot handle message\n"); | ||
87 | continue; | ||
88 | } | ||
89 | |||
90 | /* Tag Manager setup */ | ||
91 | unsigned int tag_id; | ||
92 | tag_id = mfc_multi_tag_reserve(1); | ||
93 | if (tag_id == MFC_TAG_INVALID) { | ||
94 | deprintf("[SPU] Failed to reserve mfc tags on yuv2rgb_converter\n"); | ||
95 | return 0; | ||
96 | } | ||
97 | |||
98 | /* DMA transfer for the input parameters */ | ||
99 | ea_mfc = spu_read_in_mbox(); | ||
100 | deprintf("[SPU] Message on yuv2rgb_converter is %u\n", ea_mfc); | ||
101 | spu_mfcdma32(&parms_converter, (unsigned int)ea_mfc, sizeof(struct yuv2rgb_parms_t), tag_id, MFC_GET_CMD); | ||
102 | DMA_WAIT_TAG(tag_id); | ||
103 | |||
104 | /* There are alignment issues that involve handling of special cases | ||
105 | * a width of 32 results in a width of 16 in the chrominance | ||
106 | * --> choose the proper handling to optimize the performance | ||
107 | */ | ||
108 | deprintf("[SPU] Convert %ix%i from YUV to RGB\n", parms_converter.src_pixel_width, parms_converter.src_pixel_height); | ||
109 | if (parms_converter.src_pixel_width & 0x1f) { | ||
110 | deprintf("[SPU] Using yuv_to_rgb_w16\n"); | ||
111 | yuv_to_rgb_w16(); | ||
112 | } else { | ||
113 | deprintf("[SPU] Using yuv_to_rgb_w32\n"); | ||
114 | yuv_to_rgb_w32(); | ||
115 | } | ||
116 | |||
117 | mfc_multi_tag_release(tag_id, 1); | ||
118 | deprintf("[SPU] yuv2rgb_spu... done!\n"); | ||
119 | /* Send FIN message */ | ||
120 | spu_write_out_mbox(SPU_FIN); | ||
121 | } | ||
122 | |||
123 | return 0; | ||
124 | } | ||
125 | |||
126 | |||
127 | /* | ||
128 | * float_to_char() | ||
129 | * | ||
130 | * converts a float to a character using saturated | ||
131 | * arithmetic | ||
132 | * | ||
133 | * @param s float for conversion | ||
134 | * @returns converted character | ||
135 | */ | ||
136 | inline static unsigned char float_to_char(float s) { | ||
137 | vector float vec_s = spu_splats(s); | ||
138 | vector unsigned int select_1 = spu_cmpgt(vec_0_1, vec_s); | ||
139 | vec_s = spu_sel(vec_s, vec_0_1, select_1); | ||
140 | |||
141 | vector unsigned int select_2 = spu_cmpgt(vec_s, vec_255); | ||
142 | vec_s = spu_sel(vec_s, vec_255, select_2); | ||
143 | return (unsigned char) spu_extract(vec_s,0); | ||
144 | } | ||
145 | |||
146 | |||
147 | /* | ||
148 | * vfloat_to_vuint() | ||
149 | * | ||
150 | * converts a float vector to an unsinged int vector using saturated | ||
151 | * arithmetic | ||
152 | * | ||
153 | * @param vec_s float vector for conversion | ||
154 | * @returns converted unsigned int vector | ||
155 | */ | ||
156 | inline static vector unsigned int vfloat_to_vuint(vector float vec_s) { | ||
157 | vector unsigned int select_1 = spu_cmpgt(vec_0_1, vec_s); | ||
158 | vec_s = spu_sel(vec_s, vec_0_1, select_1); | ||
159 | |||
160 | vector unsigned int select_2 = spu_cmpgt(vec_s, vec_255); | ||
161 | vec_s = spu_sel(vec_s, vec_255, select_2); | ||
162 | return spu_convtu(vec_s,0); | ||
163 | } | ||
164 | |||
165 | |||
166 | void yuv_to_rgb_w16() { | ||
167 | // Pixel dimensions of the picture | ||
168 | uint32_t width, height; | ||
169 | |||
170 | // Extract parameters | ||
171 | width = parms_converter.src_pixel_width; | ||
172 | height = parms_converter.src_pixel_height; | ||
173 | |||
174 | // Plane data management | ||
175 | // Y | ||
176 | unsigned char* ram_addr_y = parms_converter.y_plane; | ||
177 | // V | ||
178 | unsigned char* ram_addr_v = parms_converter.v_plane; | ||
179 | // U | ||
180 | unsigned char* ram_addr_u = parms_converter.u_plane; | ||
181 | |||
182 | // BGRA | ||
183 | unsigned char* ram_addr_bgra = parms_converter.dstBuffer; | ||
184 | |||
185 | // Strides | ||
186 | unsigned int stride_y = width; | ||
187 | unsigned int stride_vu = width>>1; | ||
188 | |||
189 | // Buffer management | ||
190 | unsigned int buf_idx = 0; | ||
191 | unsigned int size_4lines_y = stride_y<<2; | ||
192 | unsigned int size_2lines_y = stride_y<<1; | ||
193 | unsigned int size_2lines_vu = stride_vu<<1; | ||
194 | |||
195 | // 2*width*4byte_per_pixel | ||
196 | unsigned int size_2lines_bgra = width<<3; | ||
197 | |||
198 | |||
199 | // start double-buffered processing | ||
200 | // 4 lines y | ||
201 | spu_mfcdma32(y_plane[buf_idx], (unsigned int) ram_addr_y, size_4lines_y, RETR_BUF+buf_idx, MFC_GET_CMD); | ||
202 | |||
203 | // 2 lines v | ||
204 | spu_mfcdma32(v_plane[buf_idx], (unsigned int) ram_addr_v, size_2lines_vu, RETR_BUF+buf_idx, MFC_GET_CMD); | ||
205 | |||
206 | // 2 lines u | ||
207 | spu_mfcdma32(u_plane[buf_idx], (unsigned int) ram_addr_u, size_2lines_vu, RETR_BUF+buf_idx, MFC_GET_CMD); | ||
208 | |||
209 | // Wait for these transfers to be completed | ||
210 | DMA_WAIT_TAG((RETR_BUF + buf_idx)); | ||
211 | |||
212 | unsigned int i; | ||
213 | for(i=0; i<(height>>2)-1; i++) { | ||
214 | |||
215 | buf_idx^=1; | ||
216 | |||
217 | // 4 lines y | ||
218 | spu_mfcdma32(y_plane[buf_idx], (unsigned int) ram_addr_y+size_4lines_y, size_4lines_y, RETR_BUF+buf_idx, MFC_GET_CMD); | ||
219 | |||
220 | // 2 lines v | ||
221 | spu_mfcdma32(v_plane[buf_idx], (unsigned int) ram_addr_v+size_2lines_vu, size_2lines_vu, RETR_BUF+buf_idx, MFC_GET_CMD); | ||
222 | |||
223 | // 2 lines u | ||
224 | spu_mfcdma32(u_plane[buf_idx], (unsigned int) ram_addr_u+size_2lines_vu, size_2lines_vu, RETR_BUF+buf_idx, MFC_GET_CMD); | ||
225 | |||
226 | DMA_WAIT_TAG((RETR_BUF + buf_idx)); | ||
227 | |||
228 | buf_idx^=1; | ||
229 | |||
230 | |||
231 | // Convert YUV to BGRA, store it back (first two lines) | ||
232 | yuv_to_rgb_w16_line(y_plane[buf_idx], v_plane[buf_idx], u_plane[buf_idx], bgra, width); | ||
233 | |||
234 | // Next two lines | ||
235 | yuv_to_rgb_w16_line(y_plane[buf_idx] + size_2lines_y, | ||
236 | v_plane[buf_idx] + stride_vu, | ||
237 | u_plane[buf_idx] + stride_vu, | ||
238 | bgra + size_2lines_bgra, | ||
239 | width); | ||
240 | |||
241 | // Wait for previous storing transfer to be completed | ||
242 | DMA_WAIT_TAG(STR_BUF); | ||
243 | |||
244 | // Store converted lines in two steps->max transfer size 16384 | ||
245 | spu_mfcdma32(bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD); | ||
246 | ram_addr_bgra += size_2lines_bgra; | ||
247 | spu_mfcdma32(bgra+size_2lines_bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD); | ||
248 | ram_addr_bgra += size_2lines_bgra; | ||
249 | |||
250 | // Move 4 lines | ||
251 | ram_addr_y += size_4lines_y; | ||
252 | ram_addr_v += size_2lines_vu; | ||
253 | ram_addr_u += size_2lines_vu; | ||
254 | |||
255 | buf_idx^=1; | ||
256 | } | ||
257 | |||
258 | // Convert YUV to BGRA, store it back (first two lines) | ||
259 | yuv_to_rgb_w16_line(y_plane[buf_idx], v_plane[buf_idx], u_plane[buf_idx], bgra, width); | ||
260 | |||
261 | // Next two lines | ||
262 | yuv_to_rgb_w16_line(y_plane[buf_idx] + size_2lines_y, | ||
263 | v_plane[buf_idx] + stride_vu, | ||
264 | u_plane[buf_idx] + stride_vu, | ||
265 | bgra + size_2lines_bgra, | ||
266 | width); | ||
267 | |||
268 | // Wait for previous storing transfer to be completed | ||
269 | DMA_WAIT_TAG(STR_BUF); | ||
270 | spu_mfcdma32(bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD); | ||
271 | ram_addr_bgra += size_2lines_bgra; | ||
272 | spu_mfcdma32(bgra+size_2lines_bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD); | ||
273 | |||
274 | // wait for previous storing transfer to be completed | ||
275 | DMA_WAIT_TAG(STR_BUF); | ||
276 | |||
277 | } | ||
278 | |||
279 | |||
280 | void yuv_to_rgb_w32() { | ||
281 | // Pixel dimensions of the picture | ||
282 | uint32_t width, height; | ||
283 | |||
284 | // Extract parameters | ||
285 | width = parms_converter.src_pixel_width; | ||
286 | height = parms_converter.src_pixel_height; | ||
287 | |||
288 | // Plane data management | ||
289 | // Y | ||
290 | unsigned char* ram_addr_y = parms_converter.y_plane; | ||
291 | // V | ||
292 | unsigned char* ram_addr_v = parms_converter.v_plane; | ||
293 | // U | ||
294 | unsigned char* ram_addr_u = parms_converter.u_plane; | ||
295 | |||
296 | // BGRA | ||
297 | unsigned char* ram_addr_bgra = parms_converter.dstBuffer; | ||
298 | |||
299 | // Strides | ||
300 | unsigned int stride_y = width; | ||
301 | unsigned int stride_vu = width>>1; | ||
302 | |||
303 | // Buffer management | ||
304 | unsigned int buf_idx = 0; | ||
305 | unsigned int size_4lines_y = stride_y<<2; | ||
306 | unsigned int size_2lines_y = stride_y<<1; | ||
307 | unsigned int size_2lines_vu = stride_vu<<1; | ||
308 | |||
309 | // 2*width*4byte_per_pixel | ||
310 | unsigned int size_2lines_bgra = width<<3; | ||
311 | |||
312 | // start double-buffered processing | ||
313 | // 4 lines y | ||
314 | spu_mfcdma32(y_plane[buf_idx], (unsigned int) ram_addr_y, size_4lines_y, RETR_BUF + buf_idx, MFC_GET_CMD); | ||
315 | // 2 lines v | ||
316 | spu_mfcdma32(v_plane[buf_idx], (unsigned int) ram_addr_v, size_2lines_vu, RETR_BUF + buf_idx, MFC_GET_CMD); | ||
317 | // 2 lines u | ||
318 | spu_mfcdma32(u_plane[buf_idx], (unsigned int) ram_addr_u, size_2lines_vu, RETR_BUF + buf_idx, MFC_GET_CMD); | ||
319 | |||
320 | // Wait for these transfers to be completed | ||
321 | DMA_WAIT_TAG((RETR_BUF + buf_idx)); | ||
322 | |||
323 | unsigned int i; | ||
324 | for(i=0; i < (height>>2)-1; i++) { | ||
325 | buf_idx^=1; | ||
326 | // 4 lines y | ||
327 | spu_mfcdma32(y_plane[buf_idx], (unsigned int) ram_addr_y+size_4lines_y, size_4lines_y, RETR_BUF + buf_idx, MFC_GET_CMD); | ||
328 | deprintf("4lines = %d\n", size_4lines_y); | ||
329 | // 2 lines v | ||
330 | spu_mfcdma32(v_plane[buf_idx], (unsigned int) ram_addr_v+size_2lines_vu, size_2lines_vu, RETR_BUF + buf_idx, MFC_GET_CMD); | ||
331 | deprintf("2lines = %d\n", size_2lines_vu); | ||
332 | // 2 lines u | ||
333 | spu_mfcdma32(u_plane[buf_idx], (unsigned int) ram_addr_u+size_2lines_vu, size_2lines_vu, RETR_BUF + buf_idx, MFC_GET_CMD); | ||
334 | deprintf("2lines = %d\n", size_2lines_vu); | ||
335 | |||
336 | DMA_WAIT_TAG((RETR_BUF + buf_idx)); | ||
337 | |||
338 | buf_idx^=1; | ||
339 | |||
340 | // Convert YUV to BGRA, store it back (first two lines) | ||
341 | yuv_to_rgb_w32_line(y_plane[buf_idx], v_plane[buf_idx], u_plane[buf_idx], bgra, width); | ||
342 | |||
343 | // Next two lines | ||
344 | yuv_to_rgb_w32_line(y_plane[buf_idx] + size_2lines_y, | ||
345 | v_plane[buf_idx] + stride_vu, | ||
346 | u_plane[buf_idx] + stride_vu, | ||
347 | bgra + size_2lines_bgra, | ||
348 | width); | ||
349 | |||
350 | // Wait for previous storing transfer to be completed | ||
351 | DMA_WAIT_TAG(STR_BUF); | ||
352 | |||
353 | // Store converted lines in two steps->max transfer size 16384 | ||
354 | spu_mfcdma32(bgra, (unsigned int)ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD); | ||
355 | ram_addr_bgra += size_2lines_bgra; | ||
356 | spu_mfcdma32(bgra + size_2lines_bgra, (unsigned int)ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD); | ||
357 | ram_addr_bgra += size_2lines_bgra; | ||
358 | |||
359 | // Move 4 lines | ||
360 | ram_addr_y += size_4lines_y; | ||
361 | ram_addr_v += size_2lines_vu; | ||
362 | ram_addr_u += size_2lines_vu; | ||
363 | |||
364 | buf_idx^=1; | ||
365 | } | ||
366 | |||
367 | // Convert YUV to BGRA, store it back (first two lines) | ||
368 | yuv_to_rgb_w32_line(y_plane[buf_idx], v_plane[buf_idx], u_plane[buf_idx], bgra, width); | ||
369 | |||
370 | // Next two lines | ||
371 | yuv_to_rgb_w32_line(y_plane[buf_idx] + size_2lines_y, | ||
372 | v_plane[buf_idx] + stride_vu, | ||
373 | u_plane[buf_idx] + stride_vu, | ||
374 | bgra + size_2lines_bgra, | ||
375 | width); | ||
376 | |||
377 | // Wait for previous storing transfer to be completed | ||
378 | DMA_WAIT_TAG(STR_BUF); | ||
379 | spu_mfcdma32(bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD); | ||
380 | ram_addr_bgra += size_2lines_bgra; | ||
381 | spu_mfcdma32(bgra + size_2lines_bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD); | ||
382 | |||
383 | // Wait for previous storing transfer to be completed | ||
384 | DMA_WAIT_TAG(STR_BUF); | ||
385 | } | ||
386 | |||
387 | |||
388 | /* Some vectors needed by the yuv 2 rgb conversion algorithm */ | ||
389 | const vector float vec_minus_128 = { -128.0f, -128.0f, -128.0f, -128.0f }; | ||
390 | const vector unsigned char vec_null = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; | ||
391 | const vector unsigned char vec_char2int_first = { 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x13 }; | ||
392 | const vector unsigned char vec_char2int_second = { 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x17 }; | ||
393 | const vector unsigned char vec_char2int_third = { 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x1B }; | ||
394 | const vector unsigned char vec_char2int_fourth = { 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x1F }; | ||
395 | |||
396 | const vector float vec_R_precalc_coeff = {1.403f, 1.403f, 1.403f, 1.403f}; | ||
397 | const vector float vec_Gu_precalc_coeff = {-0.344f, -0.344f, -0.344f, -0.344f}; | ||
398 | const vector float vec_Gv_precalc_coeff = {-0.714f, -0.714f, -0.714f, -0.714f}; | ||
399 | const vector float vec_B_precalc_coeff = {1.773f, 1.773f, 1.773f, 1.773f}; | ||
400 | |||
401 | const vector unsigned int vec_alpha = { 255 << 24, 255 << 24, 255 << 24, 255 << 24 }; | ||
402 | |||
403 | const vector unsigned char vec_select_floats_upper = { 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07 }; | ||
404 | const vector unsigned char vec_select_floats_lower = { 0x08, 0x09, 0x0A, 0x0B, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x0C, 0x0D, 0x0E, 0x0F }; | ||
405 | |||
406 | |||
407 | /* | ||
408 | * yuv_to_rgb_w16() | ||
409 | * | ||
410 | * processes to line of yuv-input, width has to be a multiple of 16 | ||
411 | * two lines of yuv are taken as input | ||
412 | * | ||
413 | * @param y_addr address of the y plane in local store | ||
414 | * @param v_addr address of the v plane in local store | ||
415 | * @param u_addr address of the u plane in local store | ||
416 | * @param bgra_addr_ address of the bgra output buffer | ||
417 | * @param width the width in pixel | ||
418 | */ | ||
419 | void yuv_to_rgb_w16_line(unsigned char* y_addr, unsigned char* v_addr, unsigned char* u_addr, unsigned char* bgra_addr_, unsigned int width) { | ||
420 | // each pixel is stored as an integer | ||
421 | unsigned int* bgra_addr = (unsigned int*) bgra_addr_; | ||
422 | |||
423 | unsigned int x; | ||
424 | for(x = 0; x < width; x+=2) { | ||
425 | // Gehe zweischrittig durch die zeile, da jeder u und v wert fuer 4 pixel(zwei hoch, zwei breit) gilt | ||
426 | const unsigned char Y_1 = *(y_addr + x); | ||
427 | const unsigned char Y_2 = *(y_addr + x + 1); | ||
428 | const unsigned char Y_3 = *(y_addr + x + width); | ||
429 | const unsigned char Y_4 = *(y_addr + x + width + 1); | ||
430 | const unsigned char U = *(u_addr + (x >> 1)); | ||
431 | const unsigned char V = *(v_addr + (x >> 1)); | ||
432 | |||
433 | float V_minus_128 = (float)((float)V - 128.0f); | ||
434 | float U_minus_128 = (float)((float)U - 128.0f); | ||
435 | |||
436 | float R_precalculate = 1.403f * V_minus_128; | ||
437 | float G_precalculate = -(0.344f * U_minus_128 + 0.714f * V_minus_128); | ||
438 | float B_precalculate = 1.773f * U_minus_128; | ||
439 | |||
440 | const unsigned char R_1 = float_to_char((Y_1 + R_precalculate)); | ||
441 | const unsigned char R_2 = float_to_char((Y_2 + R_precalculate)); | ||
442 | const unsigned char R_3 = float_to_char((Y_3 + R_precalculate)); | ||
443 | const unsigned char R_4 = float_to_char((Y_4 + R_precalculate)); | ||
444 | const unsigned char G_1 = float_to_char((Y_1 + G_precalculate)); | ||
445 | const unsigned char G_2 = float_to_char((Y_2 + G_precalculate)); | ||
446 | const unsigned char G_3 = float_to_char((Y_3 + G_precalculate)); | ||
447 | const unsigned char G_4 = float_to_char((Y_4 + G_precalculate)); | ||
448 | const unsigned char B_1 = float_to_char((Y_1 + B_precalculate)); | ||
449 | const unsigned char B_2 = float_to_char((Y_2 + B_precalculate)); | ||
450 | const unsigned char B_3 = float_to_char((Y_3 + B_precalculate)); | ||
451 | const unsigned char B_4 = float_to_char((Y_4 + B_precalculate)); | ||
452 | |||
453 | *(bgra_addr + x) = (B_1 << 0)| (G_1 << 8) | (R_1 << 16) | (255 << 24); | ||
454 | *(bgra_addr + x + 1) = (B_2 << 0)| (G_2 << 8) | (R_2 << 16) | (255 << 24); | ||
455 | *(bgra_addr + x + width) = (B_3 << 0)| (G_3 << 8) | (R_3 << 16) | (255 << 24); | ||
456 | *(bgra_addr + x + width + 1) = (B_4 << 0)| (G_4 << 8) | (R_4 << 16) | (255 << 24); | ||
457 | } | ||
458 | } | ||
459 | |||
460 | |||
461 | /* | ||
462 | * yuv_to_rgb_w32() | ||
463 | * | ||
464 | * processes to line of yuv-input, width has to be a multiple of 32 | ||
465 | * two lines of yuv are taken as input | ||
466 | * | ||
467 | * @param y_addr address of the y plane in local store | ||
468 | * @param v_addr address of the v plane in local store | ||
469 | * @param u_addr address of the u plane in local store | ||
470 | * @param bgra_addr_ address of the bgra output buffer | ||
471 | * @param width the width in pixel | ||
472 | */ | ||
473 | void yuv_to_rgb_w32_line(unsigned char* y_addr, unsigned char* v_addr, unsigned char* u_addr, unsigned char* bgra_addr_, unsigned int width) { | ||
474 | // each pixel is stored as an integer | ||
475 | unsigned int* bgra_addr = (unsigned int*) bgra_addr_; | ||
476 | |||
477 | unsigned int x; | ||
478 | for(x = 0; x < width; x+=32) { | ||
479 | // Gehe zweischrittig durch die zeile, da jeder u und v wert fuer 4 pixel(zwei hoch, zwei breit) gilt | ||
480 | |||
481 | const vector unsigned char vchar_Y_1 = *((vector unsigned char*)(y_addr + x)); | ||
482 | const vector unsigned char vchar_Y_2 = *((vector unsigned char*)(y_addr + x + 16)); | ||
483 | const vector unsigned char vchar_Y_3 = *((vector unsigned char*)(y_addr + x + width)); | ||
484 | const vector unsigned char vchar_Y_4 = *((vector unsigned char*)(y_addr + x + width + 16)); | ||
485 | const vector unsigned char vchar_U = *((vector unsigned char*)(u_addr + (x >> 1))); | ||
486 | const vector unsigned char vchar_V = *((vector unsigned char*)(v_addr + (x >> 1))); | ||
487 | |||
488 | const vector float vfloat_U_1 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_U, vec_char2int_first), 0),vec_minus_128); | ||
489 | const vector float vfloat_U_2 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_U, vec_char2int_second), 0),vec_minus_128); | ||
490 | const vector float vfloat_U_3 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_U, vec_char2int_third), 0),vec_minus_128); | ||
491 | const vector float vfloat_U_4 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_U, vec_char2int_fourth), 0),vec_minus_128); | ||
492 | |||
493 | const vector float vfloat_V_1 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_V, vec_char2int_first), 0),vec_minus_128); | ||
494 | const vector float vfloat_V_2 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_V, vec_char2int_second), 0),vec_minus_128); | ||
495 | const vector float vfloat_V_3 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_V, vec_char2int_third), 0),vec_minus_128); | ||
496 | const vector float vfloat_V_4 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_V, vec_char2int_fourth), 0),vec_minus_128); | ||
497 | |||
498 | vector float Y_1 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_1, vec_char2int_first), 0); | ||
499 | vector float Y_2 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_1, vec_char2int_second), 0); | ||
500 | vector float Y_3 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_1, vec_char2int_third), 0); | ||
501 | vector float Y_4 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_1, vec_char2int_fourth), 0); | ||
502 | vector float Y_5 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_2, vec_char2int_first), 0); | ||
503 | vector float Y_6 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_2, vec_char2int_second), 0); | ||
504 | vector float Y_7 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_2, vec_char2int_third), 0); | ||
505 | vector float Y_8 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_2, vec_char2int_fourth), 0); | ||
506 | vector float Y_9 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_3, vec_char2int_first), 0); | ||
507 | vector float Y_10 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_3, vec_char2int_second), 0); | ||
508 | vector float Y_11 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_3, vec_char2int_third), 0); | ||
509 | vector float Y_12 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_3, vec_char2int_fourth), 0); | ||
510 | vector float Y_13 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_4, vec_char2int_first), 0); | ||
511 | vector float Y_14 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_4, vec_char2int_second), 0); | ||
512 | vector float Y_15 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_4, vec_char2int_third), 0); | ||
513 | vector float Y_16 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_4, vec_char2int_fourth), 0); | ||
514 | |||
515 | const vector float R1a_precalculate = spu_mul(vec_R_precalc_coeff, vfloat_V_1); | ||
516 | const vector float R2a_precalculate = spu_mul(vec_R_precalc_coeff, vfloat_V_2); | ||
517 | const vector float R3a_precalculate = spu_mul(vec_R_precalc_coeff, vfloat_V_3); | ||
518 | const vector float R4a_precalculate = spu_mul(vec_R_precalc_coeff, vfloat_V_4); | ||
519 | |||
520 | const vector float R1_precalculate = spu_shuffle(R1a_precalculate, R1a_precalculate, vec_select_floats_upper); | ||
521 | const vector float R2_precalculate = spu_shuffle(R1a_precalculate, R1a_precalculate, vec_select_floats_lower); | ||
522 | const vector float R3_precalculate = spu_shuffle(R2a_precalculate, R2a_precalculate, vec_select_floats_upper); | ||
523 | const vector float R4_precalculate = spu_shuffle(R2a_precalculate, R2a_precalculate, vec_select_floats_lower); | ||
524 | const vector float R5_precalculate = spu_shuffle(R3a_precalculate, R3a_precalculate, vec_select_floats_upper); | ||
525 | const vector float R6_precalculate = spu_shuffle(R3a_precalculate, R3a_precalculate, vec_select_floats_lower); | ||
526 | const vector float R7_precalculate = spu_shuffle(R4a_precalculate, R4a_precalculate, vec_select_floats_upper); | ||
527 | const vector float R8_precalculate = spu_shuffle(R4a_precalculate, R4a_precalculate, vec_select_floats_lower); | ||
528 | |||
529 | |||
530 | const vector float G1a_precalculate = spu_madd(vec_Gu_precalc_coeff, vfloat_U_1, spu_mul(vfloat_V_1, vec_Gv_precalc_coeff)); | ||
531 | const vector float G2a_precalculate = spu_madd(vec_Gu_precalc_coeff, vfloat_U_2, spu_mul(vfloat_V_2, vec_Gv_precalc_coeff)); | ||
532 | const vector float G3a_precalculate = spu_madd(vec_Gu_precalc_coeff, vfloat_U_3, spu_mul(vfloat_V_3, vec_Gv_precalc_coeff)); | ||
533 | const vector float G4a_precalculate = spu_madd(vec_Gu_precalc_coeff, vfloat_U_4, spu_mul(vfloat_V_4, vec_Gv_precalc_coeff)); | ||
534 | |||
535 | const vector float G1_precalculate = spu_shuffle(G1a_precalculate, G1a_precalculate, vec_select_floats_upper); | ||
536 | const vector float G2_precalculate = spu_shuffle(G1a_precalculate, G1a_precalculate, vec_select_floats_lower); | ||
537 | const vector float G3_precalculate = spu_shuffle(G2a_precalculate, G2a_precalculate, vec_select_floats_upper); | ||
538 | const vector float G4_precalculate = spu_shuffle(G2a_precalculate, G2a_precalculate, vec_select_floats_lower); | ||
539 | const vector float G5_precalculate = spu_shuffle(G3a_precalculate, G3a_precalculate, vec_select_floats_upper); | ||
540 | const vector float G6_precalculate = spu_shuffle(G3a_precalculate, G3a_precalculate, vec_select_floats_lower); | ||
541 | const vector float G7_precalculate = spu_shuffle(G4a_precalculate, G4a_precalculate, vec_select_floats_upper); | ||
542 | const vector float G8_precalculate = spu_shuffle(G4a_precalculate, G4a_precalculate, vec_select_floats_lower); | ||
543 | |||
544 | |||
545 | const vector float B1a_precalculate = spu_mul(vec_B_precalc_coeff, vfloat_U_1); | ||
546 | const vector float B2a_precalculate = spu_mul(vec_B_precalc_coeff, vfloat_U_2); | ||
547 | const vector float B3a_precalculate = spu_mul(vec_B_precalc_coeff, vfloat_U_3); | ||
548 | const vector float B4a_precalculate = spu_mul(vec_B_precalc_coeff, vfloat_U_4); | ||
549 | |||
550 | const vector float B1_precalculate = spu_shuffle(B1a_precalculate, B1a_precalculate, vec_select_floats_upper); | ||
551 | const vector float B2_precalculate = spu_shuffle(B1a_precalculate, B1a_precalculate, vec_select_floats_lower); | ||
552 | const vector float B3_precalculate = spu_shuffle(B2a_precalculate, B2a_precalculate, vec_select_floats_upper); | ||
553 | const vector float B4_precalculate = spu_shuffle(B2a_precalculate, B2a_precalculate, vec_select_floats_lower); | ||
554 | const vector float B5_precalculate = spu_shuffle(B3a_precalculate, B3a_precalculate, vec_select_floats_upper); | ||
555 | const vector float B6_precalculate = spu_shuffle(B3a_precalculate, B3a_precalculate, vec_select_floats_lower); | ||
556 | const vector float B7_precalculate = spu_shuffle(B4a_precalculate, B4a_precalculate, vec_select_floats_upper); | ||
557 | const vector float B8_precalculate = spu_shuffle(B4a_precalculate, B4a_precalculate, vec_select_floats_lower); | ||
558 | |||
559 | |||
560 | const vector unsigned int R_1 = vfloat_to_vuint(spu_add( Y_1, R1_precalculate)); | ||
561 | const vector unsigned int R_2 = vfloat_to_vuint(spu_add( Y_2, R2_precalculate)); | ||
562 | const vector unsigned int R_3 = vfloat_to_vuint(spu_add( Y_3, R3_precalculate)); | ||
563 | const vector unsigned int R_4 = vfloat_to_vuint(spu_add( Y_4, R4_precalculate)); | ||
564 | const vector unsigned int R_5 = vfloat_to_vuint(spu_add( Y_5, R5_precalculate)); | ||
565 | const vector unsigned int R_6 = vfloat_to_vuint(spu_add( Y_6, R6_precalculate)); | ||
566 | const vector unsigned int R_7 = vfloat_to_vuint(spu_add( Y_7, R7_precalculate)); | ||
567 | const vector unsigned int R_8 = vfloat_to_vuint(spu_add( Y_8, R8_precalculate)); | ||
568 | const vector unsigned int R_9 = vfloat_to_vuint(spu_add( Y_9, R1_precalculate)); | ||
569 | const vector unsigned int R_10 = vfloat_to_vuint(spu_add(Y_10, R2_precalculate)); | ||
570 | const vector unsigned int R_11 = vfloat_to_vuint(spu_add(Y_11, R3_precalculate)); | ||
571 | const vector unsigned int R_12 = vfloat_to_vuint(spu_add(Y_12, R4_precalculate)); | ||
572 | const vector unsigned int R_13 = vfloat_to_vuint(spu_add(Y_13, R5_precalculate)); | ||
573 | const vector unsigned int R_14 = vfloat_to_vuint(spu_add(Y_14, R6_precalculate)); | ||
574 | const vector unsigned int R_15 = vfloat_to_vuint(spu_add(Y_15, R7_precalculate)); | ||
575 | const vector unsigned int R_16 = vfloat_to_vuint(spu_add(Y_16, R8_precalculate)); | ||
576 | |||
577 | const vector unsigned int G_1 = vfloat_to_vuint(spu_add( Y_1, G1_precalculate)); | ||
578 | const vector unsigned int G_2 = vfloat_to_vuint(spu_add( Y_2, G2_precalculate)); | ||
579 | const vector unsigned int G_3 = vfloat_to_vuint(spu_add( Y_3, G3_precalculate)); | ||
580 | const vector unsigned int G_4 = vfloat_to_vuint(spu_add( Y_4, G4_precalculate)); | ||
581 | const vector unsigned int G_5 = vfloat_to_vuint(spu_add( Y_5, G5_precalculate)); | ||
582 | const vector unsigned int G_6 = vfloat_to_vuint(spu_add( Y_6, G6_precalculate)); | ||
583 | const vector unsigned int G_7 = vfloat_to_vuint(spu_add( Y_7, G7_precalculate)); | ||
584 | const vector unsigned int G_8 = vfloat_to_vuint(spu_add( Y_8, G8_precalculate)); | ||
585 | const vector unsigned int G_9 = vfloat_to_vuint(spu_add( Y_9, G1_precalculate)); | ||
586 | const vector unsigned int G_10 = vfloat_to_vuint(spu_add(Y_10, G2_precalculate)); | ||
587 | const vector unsigned int G_11 = vfloat_to_vuint(spu_add(Y_11, G3_precalculate)); | ||
588 | const vector unsigned int G_12 = vfloat_to_vuint(spu_add(Y_12, G4_precalculate)); | ||
589 | const vector unsigned int G_13 = vfloat_to_vuint(spu_add(Y_13, G5_precalculate)); | ||
590 | const vector unsigned int G_14 = vfloat_to_vuint(spu_add(Y_14, G6_precalculate)); | ||
591 | const vector unsigned int G_15 = vfloat_to_vuint(spu_add(Y_15, G7_precalculate)); | ||
592 | const vector unsigned int G_16 = vfloat_to_vuint(spu_add(Y_16, G8_precalculate)); | ||
593 | |||
594 | const vector unsigned int B_1 = vfloat_to_vuint(spu_add( Y_1, B1_precalculate)); | ||
595 | const vector unsigned int B_2 = vfloat_to_vuint(spu_add( Y_2, B2_precalculate)); | ||
596 | const vector unsigned int B_3 = vfloat_to_vuint(spu_add( Y_3, B3_precalculate)); | ||
597 | const vector unsigned int B_4 = vfloat_to_vuint(spu_add( Y_4, B4_precalculate)); | ||
598 | const vector unsigned int B_5 = vfloat_to_vuint(spu_add( Y_5, B5_precalculate)); | ||
599 | const vector unsigned int B_6 = vfloat_to_vuint(spu_add( Y_6, B6_precalculate)); | ||
600 | const vector unsigned int B_7 = vfloat_to_vuint(spu_add( Y_7, B7_precalculate)); | ||
601 | const vector unsigned int B_8 = vfloat_to_vuint(spu_add( Y_8, B8_precalculate)); | ||
602 | const vector unsigned int B_9 = vfloat_to_vuint(spu_add( Y_9, B1_precalculate)); | ||
603 | const vector unsigned int B_10 = vfloat_to_vuint(spu_add(Y_10, B2_precalculate)); | ||
604 | const vector unsigned int B_11 = vfloat_to_vuint(spu_add(Y_11, B3_precalculate)); | ||
605 | const vector unsigned int B_12 = vfloat_to_vuint(spu_add(Y_12, B4_precalculate)); | ||
606 | const vector unsigned int B_13 = vfloat_to_vuint(spu_add(Y_13, B5_precalculate)); | ||
607 | const vector unsigned int B_14 = vfloat_to_vuint(spu_add(Y_14, B6_precalculate)); | ||
608 | const vector unsigned int B_15 = vfloat_to_vuint(spu_add(Y_15, B7_precalculate)); | ||
609 | const vector unsigned int B_16 = vfloat_to_vuint(spu_add(Y_16, B8_precalculate)); | ||
610 | |||
611 | *((vector unsigned int*)(bgra_addr + x)) = spu_or(spu_or(vec_alpha, B_1), spu_or(spu_slqwbyte( R_1, 2),spu_slqwbyte(G_1, 1))); | ||
612 | *((vector unsigned int*)(bgra_addr + x + 4)) = spu_or(spu_or(vec_alpha, B_2), spu_or(spu_slqwbyte( R_2, 2),spu_slqwbyte(G_2, 1))); | ||
613 | *((vector unsigned int*)(bgra_addr + x + 8)) = spu_or(spu_or(vec_alpha, B_3), spu_or(spu_slqwbyte( R_3, 2),spu_slqwbyte(G_3, 1))); | ||
614 | *((vector unsigned int*)(bgra_addr + x + 12)) = spu_or(spu_or(vec_alpha, B_4), spu_or(spu_slqwbyte( R_4, 2),spu_slqwbyte(G_4, 1))); | ||
615 | *((vector unsigned int*)(bgra_addr + x + 16)) = spu_or(spu_or(vec_alpha, B_5), spu_or(spu_slqwbyte( R_5, 2),spu_slqwbyte(G_5, 1))); | ||
616 | *((vector unsigned int*)(bgra_addr + x + 20)) = spu_or(spu_or(vec_alpha, B_6), spu_or(spu_slqwbyte( R_6, 2),spu_slqwbyte(G_6, 1))); | ||
617 | *((vector unsigned int*)(bgra_addr + x + 24)) = spu_or(spu_or(vec_alpha, B_7), spu_or(spu_slqwbyte( R_7, 2),spu_slqwbyte(G_7, 1))); | ||
618 | *((vector unsigned int*)(bgra_addr + x + 28)) = spu_or(spu_or(vec_alpha, B_8), spu_or(spu_slqwbyte( R_8, 2),spu_slqwbyte(G_8, 1))); | ||
619 | *((vector unsigned int*)(bgra_addr + x + width)) = spu_or(spu_or(vec_alpha, B_9), spu_or(spu_slqwbyte( R_9, 2),spu_slqwbyte(G_9, 1))); | ||
620 | *((vector unsigned int*)(bgra_addr + x + width + 4)) = spu_or(spu_or(vec_alpha, B_10), spu_or(spu_slqwbyte(R_10, 2),spu_slqwbyte(G_10, 1))); | ||
621 | *((vector unsigned int*)(bgra_addr + x + width + 8)) = spu_or(spu_or(vec_alpha, B_11), spu_or(spu_slqwbyte(R_11, 2),spu_slqwbyte(G_11, 1))); | ||
622 | *((vector unsigned int*)(bgra_addr + x + width + 12)) = spu_or(spu_or(vec_alpha, B_12), spu_or(spu_slqwbyte(R_12, 2),spu_slqwbyte(G_12, 1))); | ||
623 | *((vector unsigned int*)(bgra_addr + x + width + 16)) = spu_or(spu_or(vec_alpha, B_13), spu_or(spu_slqwbyte(R_13, 2),spu_slqwbyte(G_13, 1))); | ||
624 | *((vector unsigned int*)(bgra_addr + x + width + 20)) = spu_or(spu_or(vec_alpha, B_14), spu_or(spu_slqwbyte(R_14, 2),spu_slqwbyte(G_14, 1))); | ||
625 | *((vector unsigned int*)(bgra_addr + x + width + 24)) = spu_or(spu_or(vec_alpha, B_15), spu_or(spu_slqwbyte(R_15, 2),spu_slqwbyte(G_15, 1))); | ||
626 | *((vector unsigned int*)(bgra_addr + x + width + 28)) = spu_or(spu_or(vec_alpha, B_16), spu_or(spu_slqwbyte(R_16, 2),spu_slqwbyte(G_16, 1))); | ||
627 | } | ||
628 | } | ||
629 | |||