summaryrefslogtreecommitdiff
path: root/apps/plugins/sdl/src/video/ps3
diff options
context:
space:
mode:
Diffstat (limited to 'apps/plugins/sdl/src/video/ps3')
-rw-r--r--apps/plugins/sdl/src/video/ps3/SDL_ps3events.c44
-rw-r--r--apps/plugins/sdl/src/video/ps3/SDL_ps3events_c.h41
-rw-r--r--apps/plugins/sdl/src/video/ps3/SDL_ps3video.c621
-rw-r--r--apps/plugins/sdl/src/video/ps3/SDL_ps3video.h165
-rw-r--r--apps/plugins/sdl/src/video/ps3/SDL_ps3yuv.c340
-rw-r--r--apps/plugins/sdl/src/video/ps3/SDL_ps3yuv_c.h44
-rw-r--r--apps/plugins/sdl/src/video/ps3/spulibs/Makefile83
-rw-r--r--apps/plugins/sdl/src/video/ps3/spulibs/bilin_scaler.c2050
-rw-r--r--apps/plugins/sdl/src/video/ps3/spulibs/fb_writer.c193
-rw-r--r--apps/plugins/sdl/src/video/ps3/spulibs/spu_common.h108
-rw-r--r--apps/plugins/sdl/src/video/ps3/spulibs/yuv2rgb_converter.c629
11 files changed, 0 insertions, 4318 deletions
diff --git a/apps/plugins/sdl/src/video/ps3/SDL_ps3events.c b/apps/plugins/sdl/src/video/ps3/SDL_ps3events.c
deleted file mode 100644
index e39efcc4f0..0000000000
--- a/apps/plugins/sdl/src/video/ps3/SDL_ps3events.c
+++ /dev/null
@@ -1,44 +0,0 @@
1/*
2 * SDL - Simple DirectMedia Layer
3 * CELL BE Support for PS3 Framebuffer
4 * Copyright (C) 2008, 2009 International Business Machines Corporation
5 *
6 * This library is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU Lesser General Public License as published
8 * by the Free Software Foundation; either version 2.1 of the License, or
9 * (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
19 * USA
20 *
21 * Martin Lowinski <lowinski [at] de [dot] ibm [ibm] com>
22 * Dirk Herrendoerfer <d.herrendoerfer [at] de [dot] ibm [dot] com>
23 * SPE code based on research by:
24 * Rene Becker
25 * Thimo Emmerich
26 */
27
28#include "SDL_config.h"
29
30#include "../../events/SDL_sysevents.h"
31#include "../../events/SDL_events_c.h"
32#include "SDL_ps3video.h"
33#include "SDL_ps3events_c.h"
34
35void PS3_PumpEvents(_THIS)
36{
37 return;
38}
39
40void PS3_InitOSKeymap(_THIS)
41{
42 return;
43}
44
diff --git a/apps/plugins/sdl/src/video/ps3/SDL_ps3events_c.h b/apps/plugins/sdl/src/video/ps3/SDL_ps3events_c.h
deleted file mode 100644
index fd11209af1..0000000000
--- a/apps/plugins/sdl/src/video/ps3/SDL_ps3events_c.h
+++ /dev/null
@@ -1,41 +0,0 @@
1/*
2 * SDL - Simple DirectMedia Layer
3 * CELL BE Support for PS3 Framebuffer
4 * Copyright (C) 2008, 2009 International Business Machines Corporation
5 *
6 * This library is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU Lesser General Public License as published
8 * by the Free Software Foundation; either version 2.1 of the License, or
9 * (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
19 * USA
20 *
21 * Martin Lowinski <lowinski [at] de [dot] ibm [ibm] com>
22 * Dirk Herrendoerfer <d.herrendoerfer [at] de [dot] ibm [dot] com>
23 * SPE code based on research by:
24 * Rene Becker
25 * Thimo Emmerich
26 */
27
28#include "SDL_config.h"
29
30#ifndef _SDL_ps3events_h
31#define _SDL_ps3events_h
32
33#include "SDL_ps3video.h"
34
35extern void PS3_InitOSKeymap(_THIS);
36extern void PS3_PumpEvents(_THIS);
37
38extern void enable_cursor(int enable);
39
40#endif /* _SDL_ps3events_h */
41
diff --git a/apps/plugins/sdl/src/video/ps3/SDL_ps3video.c b/apps/plugins/sdl/src/video/ps3/SDL_ps3video.c
deleted file mode 100644
index d5519e051e..0000000000
--- a/apps/plugins/sdl/src/video/ps3/SDL_ps3video.c
+++ /dev/null
@@ -1,621 +0,0 @@
1/*
2 * SDL - Simple DirectMedia Layer
3 * CELL BE Support for PS3 Framebuffer
4 * Copyright (C) 2008, 2009 International Business Machines Corporation
5 *
6 * This library is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU Lesser General Public License as published
8 * by the Free Software Foundation; either version 2.1 of the License, or
9 * (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
19 * USA
20 *
21 * Martin Lowinski <lowinski [at] de [dot] ibm [ibm] com>
22 * Dirk Herrendoerfer <d.herrendoerfer [at] de [dot] ibm [dot] com>
23 * SPE code based on research by:
24 * Rene Becker
25 * Thimo Emmerich
26 */
27
28#include "SDL_config.h"
29
30#include "SDL_video.h"
31#include "../SDL_sysvideo.h"
32#include "SDL_ps3events_c.h"
33#include "SDL_ps3video.h"
34#include "SDL_ps3yuv_c.h"
35#include "spulibs/spu_common.h"
36
37#include <fcntl.h>
38#include <stdlib.h>
39#include <sys/ioctl.h>
40#include <linux/kd.h>
41#include <sys/mman.h>
42
43#include <linux/fb.h>
44#include <asm/ps3fb.h>
45#include <libspe2.h>
46#include <malloc.h>
47
48/* SDL_VideoDevice functions */
49static int PS3_Available();
50static SDL_VideoDevice *PS3_CreateDevice(int devindex);
51static int PS3_VideoInit(_THIS, SDL_PixelFormat * vformat);
52static void PS3_VideoQuit(_THIS);
53static void PS3_DeleteDevice(SDL_VideoDevice * device);
54static SDL_Surface *PS3_SetVideoMode(_THIS, SDL_Surface * current, int width, int height, int bpp, Uint32 flags);
55static SDL_Rect **PS3_ListModes(_THIS, SDL_PixelFormat * format, Uint32 flags);
56
57/* Hardware surface functions */
58static int PS3_AllocHWSurface(_THIS, SDL_Surface * surface);
59static void PS3_FreeHWSurface(_THIS, SDL_Surface * surface);
60static int PS3_LockHWSurface(_THIS, SDL_Surface * surface);
61static void PS3_UnlockHWSurface(_THIS, SDL_Surface * surface);
62static int PS3_FlipDoubleBuffer(_THIS, SDL_Surface * surface);
63static void PS3_DoubleBufferUpdate(_THIS, int numrects, SDL_Rect * rects);
64
65/* SPU specific functions */
66int SPE_Start(_THIS, spu_data_t * spe_data);
67int SPE_Stop(_THIS, spu_data_t * spe_data);
68int SPE_Boot(_THIS, spu_data_t * spe_data);
69int SPE_Shutdown(_THIS, spu_data_t * spe_data);
70int SPE_SendMsg(_THIS, spu_data_t * spe_data, unsigned int msg);
71int SPE_WaitForMsg(_THIS, spu_data_t * spe_data, unsigned int msg);
72void SPE_RunContext(void *thread_argp);
73
74/* Helpers */
75void enable_cursor(int enable);
76
77/* Stores the SPE executable name of fb_writer_spu */
78extern spe_program_handle_t fb_writer_spu;
79
80/* SDL PS3 bootstrap function for checking availability */
81static int PS3_Available()
82{
83 return 1;
84}
85
86/* SDL PS3 bootstrap function for creating the device */
87static SDL_VideoDevice *PS3_CreateDevice(int devindex)
88{
89 SDL_VideoDevice *this;
90
91 /* Initialise SDL_VideoDevice */
92 this = (SDL_VideoDevice *) SDL_malloc(sizeof(SDL_VideoDevice));
93 if (this) {
94 memset(this, 0, sizeof *this);
95 this->hidden = (struct SDL_PrivateVideoData *)
96 SDL_malloc(sizeof(struct SDL_PrivateVideoData));
97 }
98 /* Error handling */
99 if ((this == NULL) || (this->hidden == NULL)) {
100 SDL_OutOfMemory();
101 if (this)
102 SDL_free(this);
103 return 0;
104 }
105 memset(this->hidden, 0, sizeof(struct SDL_PrivateVideoData));
106
107 /* Set the function pointers */
108 this->VideoInit = PS3_VideoInit;
109 this->ListModes = PS3_ListModes;
110 this->SetVideoMode = PS3_SetVideoMode;
111 this->SetColors = 0;
112 this->CreateYUVOverlay = PS3_CreateYUVOverlay;
113 this->UpdateRects = 0;
114 this->VideoQuit = PS3_VideoQuit;
115 this->AllocHWSurface = PS3_AllocHWSurface;
116 this->CheckHWBlit = 0;
117 this->FillHWRect = 0;
118 this->SetHWColorKey = 0;
119 this->SetHWAlpha = 0;
120 this->LockHWSurface = PS3_LockHWSurface;
121 this->UnlockHWSurface = PS3_UnlockHWSurface;
122 this->FlipHWSurface = PS3_FlipDoubleBuffer;
123 this->FreeHWSurface = PS3_FreeHWSurface;
124 this->SetCaption = 0;
125 this->SetIcon = 0;
126 this->IconifyWindow = 0;
127 this->GrabInput = 0;
128 this->GetWMInfo = 0;
129 this->InitOSKeymap = PS3_InitOSKeymap;
130 this->PumpEvents = PS3_PumpEvents;
131
132 this->free = PS3_DeleteDevice;
133
134 return this;
135}
136
137
138/* Bootstraping (see SDL_sysvideo.h) */
139VideoBootStrap PS3_bootstrap = {
140 "ps3", "PS3 Cell SPU Driver",
141 PS3_Available, PS3_CreateDevice
142};
143
144
145/* Delete the device */
146static void PS3_DeleteDevice(SDL_VideoDevice * device)
147{
148 free(device->hidden);
149 free(device);
150}
151
152
153/* Initialise the PS3 video device */
154static int PS3_VideoInit(_THIS, SDL_PixelFormat * vformat)
155{
156 /* Hide the cursor */
157 enable_cursor(0);
158
159 /* Create SPU fb_parms and thread structure */
160 fb_parms = (struct fb_writer_parms_t *)
161 memalign(16, sizeof(struct fb_writer_parms_t));
162 fb_thread_data = (spu_data_t *) malloc(sizeof(spu_data_t));
163 if (fb_parms == NULL || fb_thread_data == NULL) {
164 SDL_OutOfMemory();
165 return -1;
166 }
167 fb_thread_data->program = fb_writer_spu;
168 fb_thread_data->program_name = "fb_writer_spu";
169 fb_thread_data->argp = (void *)fb_parms;
170 fb_thread_data->keepalive = 1;
171 fb_thread_data->booted = 0;
172
173 SPE_Start(this, fb_thread_data);
174
175 /* Open the device */
176 fb_dev_fd = open(PS3_DEV_FB, O_RDWR);
177 if (fb_dev_fd < 0) {
178 SDL_SetError("[PS3] Unable to open device %s", PS3_DEV_FB);
179 return -1;
180 }
181
182 /* Get vscreeninfo */
183 if (ioctl(fb_dev_fd, FBIOGET_VSCREENINFO, &fb_vinfo)) {
184 SDL_SetError("[PS3] Can't get VSCREENINFO");
185 if (fb_dev_fd >= 0)
186 close(fb_dev_fd);
187 fb_dev_fd = -1;
188 return -1;
189 }
190
191 /* Fill in our hardware acceleration capabilities */
192 this->info.current_w = fb_vinfo.xres;
193 this->info.current_h = fb_vinfo.yres;
194 this->info.wm_available = 0;
195 this->info.hw_available = 1;
196
197 /* Backup the original vinfo to restore later */
198 fb_orig_vinfo = fb_vinfo;
199
200 /* 16 and 15 bpp is reported as 16 bpp */
201 fb_bits_per_pixel = fb_vinfo.bits_per_pixel;
202 if (fb_bits_per_pixel == 16)
203 fb_bits_per_pixel =
204 fb_vinfo.red.length + fb_vinfo.green.length +
205 fb_vinfo.blue.length;
206
207 /* Set SDL_PixelFormat */
208 vformat->BitsPerPixel = fb_vinfo.bits_per_pixel;
209
210 fb_vinfo.xres_virtual = fb_vinfo.xres;
211 fb_vinfo.yres_virtual = fb_vinfo.yres;
212
213 /* Put vscreeninfo */
214 if (ioctl(fb_dev_fd, FBIOPUT_VSCREENINFO, &fb_vinfo)) {
215 SDL_SetError("[PS3] Can't put VSCREENINFO");
216 if (fb_dev_fd >= 0)
217 close(fb_dev_fd);
218 fb_dev_fd = -1;
219 return -1;
220 }
221
222 s_fb_pixel_size = fb_vinfo.bits_per_pixel / 8;
223
224 s_writeable_width = fb_vinfo.xres;
225 s_writeable_height = fb_vinfo.yres;
226
227 /* Get ps3 screeninfo */
228 if (ioctl(fb_dev_fd, PS3FB_IOCTL_SCREENINFO, (unsigned long)&res) < 0) {
229 SDL_SetError("[PS3] PS3FB_IOCTL_SCREENINFO failed");
230 }
231 deprintf(1, "[PS3] xres:%d yres:%d xoff:%d yoff:%d\n", res.xres, res.yres, res.xoff, res.yoff);
232
233 /* Only use double buffering if enough fb memory is available */
234 if (res.num_frames < 2) {
235 double_buffering = 0;
236 } else {
237 double_buffering = 1;
238 }
239
240 real_width = res.xres;
241 real_height = res.yres;
242
243 /*
244 * Take control of frame buffer from kernel, for details see
245 * http://felter.org/wesley/files/ps3/linux-20061110-docs/ApplicationProgrammingEnvironment.html
246 * kernel will no longer flip the screen itself
247 */
248 ioctl(fb_dev_fd, PS3FB_IOCTL_ON, 0);
249
250 /* Unblank screen */
251 ioctl(fb_dev_fd, FBIOBLANK, 0);
252
253 return 0;
254}
255
256
257/* List available PS3 resolutions */
258static SDL_Rect **PS3_ListModes(_THIS, SDL_PixelFormat * format, Uint32 flags)
259{
260 /* A list of video resolutions that we query for (sorted largest to
261 * smallest)
262 */
263 static SDL_Rect PS3_resolutions[] = {
264 {0, 0, 1920, 1080}, // 1080p 16:9 HD
265 {0, 0, 1600, 1200}, // WUXGA
266 {0, 0, 1280, 1024}, // SXGA
267 {0, 0, 1280, 720}, // 720p 16:9 HD
268 {0, 0, 1024, 768}, // WXGA
269 {0, 0, 1024, 576}, // 576p 16:9
270 {0, 0, 853, 480}, // 480p 16:9
271 {0, 0, 720, 576}, // 576p 4:3 (PAL)
272 {0, 0, 720, 480}, // 480p 16:9 (NTSC)
273 };
274 static SDL_Rect *PS3_modes[] = {
275 &PS3_resolutions[0],
276 &PS3_resolutions[1],
277 &PS3_resolutions[2],
278 &PS3_resolutions[3],
279 &PS3_resolutions[4],
280 &PS3_resolutions[5],
281 &PS3_resolutions[6],
282 &PS3_resolutions[7],
283 &PS3_resolutions[8],
284 NULL
285 };
286 SDL_Rect **modes = PS3_modes;
287
288 return modes;
289}
290
291
292/* Get a list of the available display modes */
293static SDL_Surface *PS3_SetVideoMode(_THIS, SDL_Surface * current, int width, int height, int bpp, Uint32 flags)
294{
295 s_bounded_input_width = width < s_writeable_width ? width : s_writeable_width;
296 s_bounded_input_height = height < s_writeable_height ? height : s_writeable_height;
297 s_bounded_input_width_offset = (s_writeable_width - s_bounded_input_width) >> 1;
298 s_bounded_input_height_offset = (s_writeable_height - s_bounded_input_height) >> 1;
299 s_input_line_length = width * s_fb_pixel_size;
300
301 current->flags |= flags;
302
303 if (ioctl(fb_dev_fd, FBIOGET_FSCREENINFO, &fb_finfo)) {
304 SDL_SetError("[PS3] Can't get fixed screeninfo");
305 return NULL;
306 }
307
308 if (fb_finfo.type != FB_TYPE_PACKED_PIXELS) {
309 SDL_SetError("[PS3] type %s not supported",
310 fb_finfo.type);
311 return NULL;
312 }
313
314 /* Note: on PS3, fb_finfo.smem_len is enough for double buffering */
315 if ((frame_buffer =
316 (uint8_t *) mmap(0, fb_finfo.smem_len,
317 PROT_READ | PROT_WRITE, MAP_SHARED,
318 fb_dev_fd, 0)) == (uint8_t *) - 1) {
319 SDL_SetError("[PS3] Can't mmap for %s", PS3_DEV_FB);
320 return NULL;
321 } else {
322 current->flags |= SDL_DOUBLEBUF;
323 }
324 if (!SDL_ReallocFormat(current, fb_bits_per_pixel, 0, 0, 0, 0)) {
325 return (NULL);
326 }
327
328 /* Blank screen */
329 memset(frame_buffer, 0x00, fb_finfo.smem_len);
330
331 /* Centering */
332 s_center[0] =
333 frame_buffer + s_bounded_input_width_offset * s_fb_pixel_size +
334 s_bounded_input_height_offset * fb_finfo.line_length;
335 s_center[1] = s_center[0] + real_height * fb_finfo.line_length;
336 s_center_index = 0;
337
338 current->flags |= SDL_FULLSCREEN;
339 current->w = width;
340 current->h = height;
341 current->pitch = SDL_CalculatePitch(current);
342
343 /* Alloc aligned mem for current->pixels */
344 s_pixels = memalign(16, current->h * current->pitch);
345 current->pixels = (void *)s_pixels;
346 if (!current->pixels) {
347 SDL_OutOfMemory();
348 return NULL;
349 }
350
351 /* Set the update rectangle function */
352 this->UpdateRects = PS3_DoubleBufferUpdate;
353
354 return current;
355}
356
357
358/* Copy screen to framebuffer and flip */
359void PS3_DoubleBufferUpdate(_THIS, int numrects, SDL_Rect * rects)
360{
361 if (converter_thread_data && converter_thread_data->booted)
362 SPE_WaitForMsg(this, converter_thread_data, SPU_FIN);
363
364 /* Adjust centering */
365 s_bounded_input_width_offset = (s_writeable_width - s_bounded_input_width) >> 1;
366 s_bounded_input_height_offset = (s_writeable_height - s_bounded_input_height) >> 1;
367 s_center[0] = frame_buffer + s_bounded_input_width_offset * s_fb_pixel_size +
368 s_bounded_input_height_offset * fb_finfo.line_length;
369 s_center[1] = s_center[0] + real_height * fb_finfo.line_length;
370
371 /* Set SPU parms for copying the surface to framebuffer */
372 fb_parms->data = (unsigned char *)s_pixels;
373 fb_parms->center = s_center[s_center_index];
374 fb_parms->out_line_stride = fb_finfo.line_length;
375 fb_parms->in_line_stride = s_input_line_length;
376 fb_parms->bounded_input_height = s_bounded_input_height;
377 fb_parms->bounded_input_width = s_bounded_input_width;
378 fb_parms->fb_pixel_size = s_fb_pixel_size;
379
380 deprintf(3, "[PS3->SPU] fb_thread_data->argp = 0x%x\n", fb_thread_data->argp);
381
382 /* Copying.. */
383 SPE_SendMsg(this, fb_thread_data, SPU_START);
384 SPE_SendMsg(this, fb_thread_data, (unsigned int)fb_thread_data->argp);
385
386 SPE_WaitForMsg(this, fb_thread_data, SPU_FIN);
387
388 /* Flip the pages */
389 if (double_buffering)
390 s_center_index = s_center_index ^ 0x01;
391 PS3_FlipDoubleBuffer(this, this->screen);
392}
393
394
395/* Enable/Disable cursor */
396void enable_cursor(int enable)
397{
398 int fd = open("/dev/console", O_RDWR | O_NONBLOCK);
399 if (fd >= 0) {
400 ioctl(fd, KDSETMODE, enable ? KD_TEXT : KD_GRAPHICS);
401 close(fd);
402 }
403}
404
405
406static int PS3_AllocHWSurface(_THIS, SDL_Surface * surface)
407{
408 return -1;
409}
410
411
412static void PS3_FreeHWSurface(_THIS, SDL_Surface * surface)
413{
414 return;
415}
416
417
418static int PS3_LockHWSurface(_THIS, SDL_Surface * surface)
419{
420 return 0;
421}
422
423
424static void PS3_UnlockHWSurface(_THIS, SDL_Surface * surface)
425{
426 return;
427}
428
429
430/* Blit/Flip buffer to the screen. Must be called after each frame! */
431int PS3_FlipDoubleBuffer(_THIS, SDL_Surface * surface)
432{
433 unsigned long crt = 0;
434 /* Wait for vsync */
435 deprintf(1, "[PS3] Wait for vsync\n");
436 ioctl(fb_dev_fd, FBIO_WAITFORVSYNC, &crt);
437 /* Page flip */
438 deprintf(1, "[PS3] Page flip to buffer #%u 0x%x\n", s_center_index, s_center[s_center_index]);
439 ioctl(fb_dev_fd, PS3FB_IOCTL_FSEL, (unsigned long)&s_center_index);
440 return 1;
441}
442
443
444/* Start the SPE thread */
445int SPE_Start(_THIS, spu_data_t * spe_data)
446{
447 deprintf(2, "[PS3->SPU] Start SPE: %s\n", spe_data->program_name);
448 if (!(spe_data->booted))
449 SPE_Boot(this, spe_data);
450
451 /* To allow re-running of context, spe_ctx_entry has to be set before each call */
452 spe_data->entry = SPE_DEFAULT_ENTRY;
453 spe_data->error_code = 0;
454
455 /* Create SPE thread and run */
456 deprintf(2, "[PS3->SPU] Create Thread: %s\n", spe_data->program_name);
457 if (pthread_create
458 (&spe_data->thread, NULL, (void *)&SPE_RunContext, (void *)spe_data)) {
459 deprintf(2, "[PS3->SPU] Could not create pthread for spe: %s\n", spe_data->program_name);
460 SDL_SetError("[PS3->SPU] Could not create pthread for spe");
461 return -1;
462 }
463
464 if (spe_data->keepalive)
465 SPE_WaitForMsg(this, spe_data, SPU_READY);
466}
467
468
469/* Stop the SPE thread */
470int SPE_Stop(_THIS, spu_data_t * spe_data)
471{
472 deprintf(2, "[PS3->SPU] Stop SPE: %s\n", spe_data->program_name);
473 /* Wait for SPE thread to complete */
474 deprintf(2, "[PS3->SPU] Wait for SPE thread to complete: %s\n", spe_data->program_name);
475 if (pthread_join(spe_data->thread, NULL)) {
476 deprintf(2, "[PS3->SPU] Failed joining the thread: %s\n", spe_data->program_name);
477 SDL_SetError("[PS3->SPU] Failed joining the thread");
478 return -1;
479 }
480
481 return 0;
482}
483
484
485/* Create SPE context and load program */
486int SPE_Boot(_THIS, spu_data_t * spe_data)
487{
488 /* Create SPE context */
489 deprintf(2, "[PS3->SPU] Create SPE Context: %s\n", spe_data->program_name);
490 spe_data->ctx = spe_context_create(0, NULL);
491 if (spe_data->ctx == NULL) {
492 deprintf(2, "[PS3->SPU] Failed creating SPE context: %s\n", spe_data->program_name);
493 SDL_SetError("[PS3->SPU] Failed creating SPE context");
494 return -1;
495 }
496
497 /* Load SPE object into SPE local store */
498 deprintf(2, "[PS3->SPU] Load Program into SPE: %s\n", spe_data->program_name);
499 if (spe_program_load(spe_data->ctx, &spe_data->program)) {
500 deprintf(2, "[PS3->SPU] Failed loading program into SPE context: %s\n", spe_data->program_name);
501 SDL_SetError
502 ("[PS3->SPU] Failed loading program into SPE context");
503 return -1;
504 }
505 spe_data->booted = 1;
506 deprintf(2, "[PS3->SPU] SPE boot successful\n");
507
508 return 0;
509}
510
511/* (Stop and) shutdown the SPE */
512int SPE_Shutdown(_THIS, spu_data_t * spe_data)
513{
514 if (spe_data->keepalive && spe_data->booted) {
515 SPE_SendMsg(this, spe_data, SPU_EXIT);
516 SPE_Stop(this, spe_data);
517 }
518
519 /* Destroy SPE context */
520 deprintf(2, "[PS3->SPU] Destroy SPE context: %s\n", spe_data->program_name);
521 if (spe_context_destroy(spe_data->ctx)) {
522 deprintf(2, "[PS3->SPU] Failed destroying context: %s\n", spe_data->program_name);
523 SDL_SetError("[PS3->SPU] Failed destroying context");
524 return -1;
525 }
526 deprintf(2, "[PS3->SPU] SPE shutdown successful: %s\n", spe_data->program_name);
527 return 0;
528}
529
530
531/* Send message to the SPE via mailboxe */
532int SPE_SendMsg(_THIS, spu_data_t * spe_data, unsigned int msg)
533{
534 deprintf(2, "[PS3->SPU] Sending message %u to %s\n", msg, spe_data->program_name);
535 /* Send one message, block until message was sent */
536 unsigned int spe_in_mbox_msgs[1];
537 spe_in_mbox_msgs[0] = msg;
538 int in_mbox_write = spe_in_mbox_write(spe_data->ctx, spe_in_mbox_msgs, 1, SPE_MBOX_ALL_BLOCKING);
539
540 if (1 > in_mbox_write) {
541 deprintf(2, "[PS3->SPU] No message could be written to %s\n", spe_data->program_name);
542 SDL_SetError("[PS3->SPU] No message could be written");
543 return -1;
544 }
545 return 0;
546}
547
548
549/* Read 1 message from SPE, block until at least 1 message was received */
550int SPE_WaitForMsg(_THIS, spu_data_t * spe_data, unsigned int msg)
551{
552 deprintf(2, "[PS3->SPU] Waiting for message from %s\n", spe_data->program_name);
553 unsigned int out_messages[1];
554 while (!spe_out_mbox_status(spe_data->ctx));
555 int mbox_read = spe_out_mbox_read(spe_data->ctx, out_messages, 1);
556 deprintf(2, "[PS3->SPU] Got message from %s, message was %u\n", spe_data->program_name, out_messages[0]);
557 if (out_messages[0] == msg)
558 return 0;
559 else
560 return -1;
561}
562
563
564/* Re-runnable invocation of the spe_context_run call */
565void SPE_RunContext(void *thread_argp)
566{
567 /* argp is the pointer to argument to be passed to the SPE program */
568 spu_data_t *args = (spu_data_t *) thread_argp;
569 deprintf(3, "[PS3->SPU] void* argp=0x%x\n", (unsigned int)args->argp);
570
571 /* Run it.. */
572 deprintf(2, "[PS3->SPU] Run SPE program: %s\n", args->program_name);
573 if (spe_context_run
574 (args->ctx, &args->entry, 0, (void *)args->argp, NULL,
575 NULL) < 0) {
576 deprintf(2, "[PS3->SPU] Failed running SPE context: %s\n", args->program_name);
577 SDL_SetError("[PS3->SPU] Failed running SPE context: %s", args->program_name);
578 exit(1);
579 }
580
581 pthread_exit(NULL);
582}
583
584
585/* Quits the video driver */
586static void PS3_VideoQuit(_THIS)
587{
588 if (fb_dev_fd > 0) {
589 /* Restore the original video mode */
590 if (ioctl(fb_dev_fd, FBIOPUT_VSCREENINFO, &fb_orig_vinfo))
591 SDL_SetError("[PS3] Can't restore original fb_var_screeninfo");
592
593 /* Give control of frame buffer to kernel */
594 ioctl(fb_dev_fd, PS3FB_IOCTL_OFF, 0);
595 close(fb_dev_fd);
596 fb_dev_fd = -1;
597 }
598
599 if (frame_buffer) {
600 munmap(frame_buffer, fb_finfo.smem_len);
601 frame_buffer = 0;
602 }
603
604 if (fb_parms)
605 free((void *)fb_parms);
606 if (fb_thread_data) {
607 SPE_Shutdown(this, fb_thread_data);
608 free((void *)fb_thread_data);
609 }
610
611 if (this->screen) {
612 if (double_buffering && this->screen->pixels) {
613 free(this->screen->pixels);
614 }
615 this->screen->pixels = NULL;
616 }
617
618 enable_cursor(1);
619 deprintf(1, "[PS3] VideoQuit\n");
620}
621
diff --git a/apps/plugins/sdl/src/video/ps3/SDL_ps3video.h b/apps/plugins/sdl/src/video/ps3/SDL_ps3video.h
deleted file mode 100644
index 4fe5a2b42b..0000000000
--- a/apps/plugins/sdl/src/video/ps3/SDL_ps3video.h
+++ /dev/null
@@ -1,165 +0,0 @@
1/*
2 * SDL - Simple DirectMedia Layer
3 * CELL BE Support for PS3 Framebuffer
4 * Copyright (C) 2008, 2009 International Business Machines Corporation
5 *
6 * This library is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU Lesser General Public License as published
8 * by the Free Software Foundation; either version 2.1 of the License, or
9 * (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
19 * USA
20 *
21 * Martin Lowinski <lowinski [at] de [dot] ibm [ibm] com>
22 * Dirk Herrendoerfer <d.herrendoerfer [at] de [dot] ibm [dot] com>
23 * SPE code based on research by:
24 * Rene Becker
25 * Thimo Emmerich
26 */
27
28#include "SDL_config.h"
29#include "../SDL_sysvideo.h"
30#include "SDL_mouse.h"
31#include "SDL_mutex.h"
32#include "spulibs/spu_common.h"
33
34#include <libspe2.h>
35#include <pthread.h>
36#include <linux/types.h>
37#include <linux/fb.h>
38#include <asm/ps3fb.h>
39#include <linux/vt.h>
40#include <termios.h>
41
42#ifndef _SDL_ps3video_h
43#define _SDL_ps3video_h
44
45/* Debugging
46 * 0: No debug messages
47 * 1: Video debug messages
48 * 2: SPE debug messages
49 * 3: Memory adresses
50 */
51#define DEBUG_LEVEL 0
52
53#ifdef DEBUG_LEVEL
54#define deprintf( level, fmt, args... ) \
55 do \
56{ \
57 if ( (unsigned)(level) <= DEBUG_LEVEL ) \
58 { \
59 fprintf( stdout, fmt, ##args ); \
60 fflush( stdout ); \
61 } \
62} while ( 0 )
63#else
64#define deprintf( level, fmt, args... )
65#endif
66
67/* Framebuffer device */
68#define PS3_DEV_FB "/dev/fb0"
69
70/* Hidden "this" pointer for the video functions */
71#define _THIS SDL_VideoDevice * this
72
73/* SPU thread data */
74typedef struct spu_data {
75 spe_context_ptr_t ctx;
76 pthread_t thread;
77 spe_program_handle_t program;
78 char * program_name;
79 unsigned int booted;
80 unsigned int keepalive;
81 unsigned int entry;
82 int error_code;
83 void * argp;
84} spu_data_t;
85
86/* Private video driver data needed for Cell support */
87struct SDL_PrivateVideoData
88{
89 const char * const fb_dev_name; /* FB-device name */
90 int fb_dev_fd; /* Descriptor-handle for fb_dev_name */
91 uint8_t * frame_buffer; /* mmap'd access to fbdev */
92
93 /* SPE threading stuff */
94 spu_data_t * fb_thread_data;
95 spu_data_t * scaler_thread_data;
96 spu_data_t * converter_thread_data;
97
98 /* screeninfo (from linux/fb.h) */
99 struct fb_fix_screeninfo fb_finfo;
100 struct fb_var_screeninfo fb_vinfo;
101 struct fb_var_screeninfo fb_orig_vinfo;
102
103 /* screeninfo (from asm/ps3fb.h) */
104 struct ps3fb_ioctl_res res;
105
106 unsigned int double_buffering;
107 uint32_t real_width; // real width of screen
108 uint32_t real_height; // real height of screen
109
110 uint32_t s_fb_pixel_size; // 32: 4 24: 3 16: 2 15: 2
111 uint32_t fb_bits_per_pixel; // 32: 32 24: 24 16: 16 15: 15
112
113 uint32_t config_count;
114
115 uint32_t s_input_line_length; // precalculated: input_width * fb_pixel_size
116 uint32_t s_bounded_input_width; // width of input (bounded by writeable width)
117 uint32_t s_bounded_input_height;// height of input (bounded by writeable height)
118 uint32_t s_bounded_input_width_offset; // offset from the left side (used for centering)
119 uint32_t s_bounded_input_height_offset; // offset from the upper side (used for centering)
120 uint32_t s_writeable_width; // width of screen which is writeable
121 uint32_t s_writeable_height; // height of screen which is writeable
122
123 uint8_t * s_center[2]; // where to begin writing our image (centered?)
124 uint32_t s_center_index;
125
126 volatile void * s_pixels __attribute__((aligned(128)));
127
128 /* Framebuffer data */
129 volatile struct fb_writer_parms_t * fb_parms __attribute__((aligned(128)));
130};
131
132#define fb_dev_name (this->hidden->fb_dev_name)
133#define fb_dev_fd (this->hidden->fb_dev_fd)
134#define frame_buffer (this->hidden->frame_buffer)
135#define fb_thread_data (this->hidden->fb_thread_data)
136#define scaler_thread_data (this->hidden->scaler_thread_data)
137#define converter_thread_data (this->hidden->converter_thread_data)
138#define fb_parms (this->hidden->fb_parms)
139#define SDL_nummodes (this->hidden->SDL_nummodes)
140#define SDL_modelist (this->hidden->SDL_modelist)
141#define SDL_videomode (this->hidden->SDL_videomode)
142#define fb_finfo (this->hidden->fb_finfo)
143#define fb_vinfo (this->hidden->fb_vinfo)
144#define fb_orig_vinfo (this->hidden->fb_orig_vinfo)
145#define res (this->hidden->res)
146#define double_buffering (this->hidden->double_buffering)
147#define real_width (this->hidden->real_width)
148#define real_height (this->hidden->real_height)
149#define s_fb_pixel_size (this->hidden->s_fb_pixel_size)
150#define fb_bits_per_pixel (this->hidden->fb_bits_per_pixel)
151#define config_count (this->hidden->config_count)
152#define s_input_line_length (this->hidden->s_input_line_length)
153#define s_bounded_input_width (this->hidden->s_bounded_input_width)
154#define s_bounded_input_height (this->hidden->s_bounded_input_height)
155#define s_bounded_input_width_offset (this->hidden->s_bounded_input_width_offset)
156#define s_bounded_input_height_offset (this->hidden->s_bounded_input_height_offset)
157#define s_writeable_width (this->hidden->s_writeable_width)
158#define s_writeable_height (this->hidden->s_writeable_height)
159#define s_center (this->hidden->s_center)
160#define s_center_index (this->hidden->s_center_index)
161#define s_pixels (this->hidden->s_pixels)
162
163#endif /* _SDL_ps3video_h */
164
165
diff --git a/apps/plugins/sdl/src/video/ps3/SDL_ps3yuv.c b/apps/plugins/sdl/src/video/ps3/SDL_ps3yuv.c
deleted file mode 100644
index b1e17dae6d..0000000000
--- a/apps/plugins/sdl/src/video/ps3/SDL_ps3yuv.c
+++ /dev/null
@@ -1,340 +0,0 @@
1/*
2 * SDL - Simple DirectMedia Layer
3 * CELL BE Support for PS3 Framebuffer
4 * Copyright (C) 2008, 2009 International Business Machines Corporation
5 *
6 * This library is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU Lesser General Public License as published
8 * by the Free Software Foundation; either version 2.1 of the License, or
9 * (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
19 * USA
20 *
21 * Martin Lowinski <lowinski [at] de [dot] ibm [ibm] com>
22 * Dirk Herrendoerfer <d.herrendoerfer [at] de [dot] ibm [dot] com>
23 * SPE code based on research by:
24 * Rene Becker
25 * Thimo Emmerich
26 */
27
28#include "SDL_config.h"
29
30#include "SDL_video.h"
31#include "SDL_ps3video.h"
32#include "SDL_ps3yuv_c.h"
33#include "../SDL_yuvfuncs.h"
34#include "spulibs/spu_common.h"
35
36/* Stores the executable name */
37extern spe_program_handle_t yuv2rgb_spu;
38extern spe_program_handle_t bilin_scaler_spu;
39
40int SPE_Start(_THIS, spu_data_t * spe_data);
41int SPE_Stop(_THIS, spu_data_t * spe_data);
42int SPE_Boot(_THIS, spu_data_t * spe_data);
43int SPE_Shutdown(_THIS, spu_data_t * spe_data);
44int SPE_SendMsg(_THIS, spu_data_t * spe_data, unsigned int msg);
45int SPE_WaitForMsg(_THIS, spu_data_t * spe_data, unsigned int msg);
46void SPE_RunContext(void *thread_argp);
47
48
49/* The functions used to manipulate software video overlays */
50static struct private_yuvhwfuncs ps3_yuvfuncs = {
51 PS3_LockYUVOverlay,
52 PS3_UnlockYUVOverlay,
53 PS3_DisplayYUVOverlay,
54 PS3_FreeYUVOverlay
55};
56
57
58struct private_yuvhwdata {
59 SDL_Surface *display;
60 SDL_Surface *stretch;
61 volatile void * pixels __attribute__((aligned(128)));
62
63 /* These are just so we don't have to allocate them separately */
64 Uint16 pitches[3];
65 Uint8 * planes[3];
66
67 unsigned int scale;
68
69 /* Scaled YUV picture */
70 Uint8 * scaler_out __attribute__((aligned(128)));
71
72 /* YUV2RGB converter data */
73 volatile struct yuv2rgb_parms_t * converter_parms __attribute__((aligned(128)));
74
75 /* Scaler data */
76 volatile struct scale_parms_t * scaler_parms __attribute__((aligned(128)));
77
78 Uint8 locked;
79};
80
81
82SDL_Overlay *PS3_CreateYUVOverlay(_THIS, int width, int height, Uint32 format, SDL_Surface *display) {
83 /* Only RGB packed pixel conversion supported */
84 if ((display->format->BytesPerPixel != 2) &&
85 (display->format->BytesPerPixel != 3) &&
86 (display->format->BytesPerPixel != 4))
87 {
88 SDL_SetError ("Can't use YUV data on non 16/24/32 bit surfaces");
89 return NULL;
90 }
91
92 /* Double-check the requested format. We'll only support YV12 */
93 switch (format) {
94 case SDL_IYUV_OVERLAY:
95 case SDL_YV12_OVERLAY:
96 /* Supported YUV format */
97 break;
98 default:
99 SDL_SetError("Unsupported YUV format");
100 return NULL;
101 }
102
103 SDL_Overlay* overlay;
104 struct private_yuvhwdata* hwdata;
105
106 /* Create the overlay structure */
107 overlay = (SDL_Overlay *) SDL_calloc(1, sizeof(SDL_Overlay));
108 if (overlay == NULL) {
109 SDL_OutOfMemory();
110 return NULL;
111 }
112 SDL_memset(overlay, 0, (sizeof *overlay));
113
114 /* Set the basic attributes */
115 overlay->format = format;
116 overlay->w = width;
117 overlay->h = height;
118 overlay->hwdata = NULL;
119
120 /* Set up the PS3 YUV surface function structure */
121 overlay->hwfuncs = &ps3_yuvfuncs;
122
123 /* Create the pixel data and lookup tables */
124 hwdata = (struct private_yuvhwdata *) SDL_calloc(1, sizeof(struct private_yuvhwdata));
125 if (hwdata == NULL) {
126 SDL_OutOfMemory();
127 SDL_FreeYUVOverlay(overlay);
128 return NULL;
129 }
130 overlay->hwdata = hwdata;
131
132 hwdata->stretch = NULL;
133 hwdata->display = display;
134
135 /* Create SPU parms structure */
136 hwdata->converter_parms = (struct yuv2rgb_parms_t *) memalign(16, sizeof(struct yuv2rgb_parms_t));
137 hwdata->scaler_parms = (struct scale_parms_t *) memalign(16, sizeof(struct scale_parms_t));
138 if (hwdata->converter_parms == NULL || hwdata->scaler_parms == NULL) {
139 SDL_FreeYUVOverlay(overlay);
140 SDL_OutOfMemory();
141 return(NULL);
142 }
143
144 /* Set up the SPEs */
145 scaler_thread_data = (spu_data_t *) malloc(sizeof(spu_data_t));
146 converter_thread_data = (spu_data_t *) malloc(sizeof(spu_data_t));
147 if (converter_thread_data == NULL || scaler_thread_data == NULL) {
148 SDL_FreeYUVOverlay(overlay);
149 SDL_OutOfMemory();
150 return(NULL);
151 }
152
153 scaler_thread_data->program = bilin_scaler_spu;
154 scaler_thread_data->program_name = "bilin_scaler_spu";
155 scaler_thread_data->keepalive = 0;
156 scaler_thread_data->booted = 0;
157
158 converter_thread_data->program = yuv2rgb_spu;
159 converter_thread_data->program_name = "yuv2rgb_spu";
160 converter_thread_data->keepalive = 1;
161 converter_thread_data->booted = 0;
162
163 SPE_Start(this, converter_thread_data);
164
165 hwdata->pixels = (Uint8 *) memalign(16, width * height + ((width * height) >> 1));
166 if (hwdata->pixels == NULL) {
167 SDL_FreeYUVOverlay(overlay);
168 SDL_OutOfMemory();
169 return(NULL);
170 }
171
172 /* Find the pitch and offset values for the overlay */
173 overlay->pitches = hwdata->pitches;
174 overlay->pixels = hwdata->planes;
175 switch (format) {
176 case SDL_YV12_OVERLAY:
177 case SDL_IYUV_OVERLAY:
178 overlay->pitches[0] = overlay->w;
179 overlay->pitches[1] = overlay->pitches[0] / 2;
180 overlay->pitches[2] = overlay->pitches[0] / 2;
181 overlay->pixels[0] = (Uint8 *)hwdata->pixels;
182 overlay->pixels[1] = overlay->pixels[0] +
183 overlay->pitches[0] * overlay->h;
184 overlay->pixels[2] = overlay->pixels[1] +
185 overlay->pitches[1] * overlay->h / 2;
186 overlay->planes = 3;
187 break;
188 default:
189 /* We should never get here (caught above) */
190 break;
191 }
192
193 /* We're all done.. */
194 return overlay;
195}
196
197
198int PS3_LockYUVOverlay(_THIS, SDL_Overlay *overlay) {
199 if (overlay == NULL) {
200 return -1;
201 }
202 overlay->hwdata->locked = 1;
203
204 return 0;
205}
206
207
208void PS3_UnlockYUVOverlay(_THIS, SDL_Overlay *overlay) {
209 if (overlay == NULL) {
210 return;
211 }
212 overlay->hwdata->locked = 0;
213
214 return;
215}
216
217
218int PS3_DisplayYUVOverlay(_THIS, SDL_Overlay *overlay, SDL_Rect *src, SDL_Rect *dst) {
219 if ((overlay == NULL) || (overlay->hwdata == NULL)) {
220 return -1;
221 }
222
223 Uint8 *lum, *Cr, *Cb;
224 struct private_yuvhwdata *hwdata;
225 SDL_Surface *display;
226
227 hwdata = overlay->hwdata;
228 display = hwdata->display;
229
230 /* Do we have to scale? */
231 if ((src->w != dst->w) || (src->h != dst->h) ) {
232 hwdata->scale = 1;
233 deprintf(1, "[PS3] We need to scale\n");
234 } else {
235 hwdata->scale = 0;
236 deprintf(1, "[PS3] No scaling\n");
237 }
238
239 /* Find out where the various portions of the image are */
240 switch (overlay->format) {
241 case SDL_YV12_OVERLAY:
242 lum = (Uint8 *)overlay->pixels[0];
243 Cr = (Uint8 *)overlay->pixels[1];
244 Cb = (Uint8 *)overlay->pixels[2];
245 break;
246 case SDL_IYUV_OVERLAY:
247 lum = (Uint8 *)overlay->pixels[0];
248 Cr = (Uint8 *)overlay->pixels[2];
249 Cb = (Uint8 *)overlay->pixels[1];
250 break;
251 default:
252 SDL_SetError("Unsupported YUV format in blit");
253 return -1;
254 }
255
256 if (hwdata->scale) {
257 /* Alloc mem for scaled YUV picture */
258 hwdata->scaler_out = (Uint8 *) memalign(16, dst->w * dst->h + ((dst->w * dst->h) >> 1));
259 if (hwdata->scaler_out == NULL) {
260 SDL_FreeYUVOverlay(overlay);
261 SDL_OutOfMemory();
262 return -1;
263 }
264
265 /* Set parms for scaling */
266 hwdata->scaler_parms->src_pixel_width = src->w;
267 hwdata->scaler_parms->src_pixel_height = src->h;
268 hwdata->scaler_parms->dst_pixel_width = dst->w;
269 hwdata->scaler_parms->dst_pixel_height = dst->h;
270 hwdata->scaler_parms->y_plane = lum;
271 hwdata->scaler_parms->v_plane = Cr;
272 hwdata->scaler_parms->u_plane = Cb;
273 hwdata->scaler_parms->dstBuffer = hwdata->scaler_out;
274 scaler_thread_data->argp = (void *)hwdata->scaler_parms;
275
276 /* Scale the YUV overlay to given size */
277 SPE_Start(this, scaler_thread_data);
278 SPE_Stop(this, scaler_thread_data);
279
280 /* Set parms for converting after scaling */
281 hwdata->converter_parms->y_plane = hwdata->scaler_out;
282 hwdata->converter_parms->v_plane = hwdata->scaler_out + dst->w * dst->h;
283 hwdata->converter_parms->u_plane = hwdata->scaler_out + dst->w * dst->h + ((dst->w * dst->h) >> 2);
284 } else {
285 /* Set parms for converting */
286 hwdata->converter_parms->y_plane = lum;
287 hwdata->converter_parms->v_plane = Cr;
288 hwdata->converter_parms->u_plane = Cb;
289 }
290
291 hwdata->converter_parms->src_pixel_width = dst->w;
292 hwdata->converter_parms->src_pixel_height = dst->h;
293 hwdata->converter_parms->dstBuffer = (Uint8 *) s_pixels;
294 converter_thread_data->argp = (void *)hwdata->converter_parms;
295
296 /* Convert YUV overlay to RGB */
297 SPE_SendMsg(this, converter_thread_data, SPU_START);
298 SPE_SendMsg(this, converter_thread_data, (unsigned int)converter_thread_data->argp);
299
300 /* Centering */
301 s_bounded_input_width = dst->w;
302 s_bounded_input_height = dst->h;
303
304 /* UpdateRects() will do the rest.. */
305 SDL_UpdateRects(display, 1, dst);
306
307 if (hwdata->scale)
308 SDL_free((void *)hwdata->scaler_out);
309
310 return 0;
311}
312
313
314void PS3_FreeYUVOverlay(_THIS, SDL_Overlay *overlay) {
315 if (overlay == NULL) {
316 return;
317 }
318
319 if (overlay->hwdata == NULL) {
320 return;
321 }
322
323 struct private_yuvhwdata * hwdata;
324 hwdata = overlay->hwdata;
325
326 if (scaler_thread_data)
327 SDL_free(scaler_thread_data);
328 if (converter_thread_data) {
329 SPE_Shutdown(this, converter_thread_data);
330 SDL_free(converter_thread_data);
331 }
332
333 if (hwdata) {
334 if (hwdata->pixels)
335 SDL_free((void *)hwdata->pixels);
336 SDL_free(hwdata);
337 }
338 return;
339}
340
diff --git a/apps/plugins/sdl/src/video/ps3/SDL_ps3yuv_c.h b/apps/plugins/sdl/src/video/ps3/SDL_ps3yuv_c.h
deleted file mode 100644
index 49f9d70953..0000000000
--- a/apps/plugins/sdl/src/video/ps3/SDL_ps3yuv_c.h
+++ /dev/null
@@ -1,44 +0,0 @@
1/*
2 * SDL - Simple DirectMedia Layer
3 * CELL BE Support for PS3 Framebuffer
4 * Copyright (C) 2008, 2009 International Business Machines Corporation
5 *
6 * This library is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU Lesser General Public License as published
8 * by the Free Software Foundation; either version 2.1 of the License, or
9 * (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
19 * USA
20 *
21 * Martin Lowinski <lowinski [at] de [dot] ibm [ibm] com>
22 * Dirk Herrendoerfer <d.herrendoerfer [at] de [dot] ibm [dot] com>
23 * SPE code based on research by:
24 * Rene Becker
25 * Thimo Emmerich
26 */
27
28#include "SDL_config.h"
29
30#ifndef _SDL_ps3yuv_h
31#define _SDL_ps3yuv_h
32
33/* This is the PS3 implementation of YUV video overlays */
34
35#include "SDL_video.h"
36
37extern SDL_Overlay *PS3_CreateYUVOverlay(_THIS, int width, int height, Uint32 format, SDL_Surface *display);
38extern int PS3_DisplayYUVOverlay(_THIS, SDL_Overlay *overlay, SDL_Rect *src, SDL_Rect *dst);
39extern int PS3_LockYUVOverlay(_THIS, SDL_Overlay *overlay);
40extern void PS3_UnlockYUVOverlay(_THIS, SDL_Overlay *overlay);
41extern void PS3_FreeYUVOverlay(_THIS, SDL_Overlay *overlay);
42
43#endif /* _SDL_ps3yuv_h */
44
diff --git a/apps/plugins/sdl/src/video/ps3/spulibs/Makefile b/apps/plugins/sdl/src/video/ps3/spulibs/Makefile
deleted file mode 100644
index dc580d9436..0000000000
--- a/apps/plugins/sdl/src/video/ps3/spulibs/Makefile
+++ /dev/null
@@ -1,83 +0,0 @@
1# This Makefile is for building the CELL BE SPU libs
2# libfb_writer_spu.so, libyuv2rgb_spu.so, libbilin_scaler_spu.so
3
4# Toolchain
5SPU_GCC=/usr/bin/spu-gcc
6PPU_GCC=/usr/bin/gcc
7PPU_EMBEDSPU=/usr/bin/embedspu
8PPU_AR=/usr/bin/ar
9PPU_LD=/usr/bin/ld
10INSTALL=/usr/bin/install
11
12SPU_CFLAGS=-W -Wall -Winline -Wno-main -I. -I /usr/spu/include -I /opt/cell/sdk/usr/spu/include -finline-limit=10000 -Winline -ftree-vectorize -funroll-loops -fmodulo-sched -ffast-math -fPIC -O2
13
14# Usually /usr/lib, depending on your distribution
15PREFIX=/usr/lib
16
17
18all: libfb_writer_spu.a libfb_writer_spu.so \
19 libyuv2rgb_spu.so libyuv2rgb_spu.a \
20 libbilin_scaler_spu.so libbilin_scaler_spu.a
21
22
23# fb_writer
24fb_writer_spu-embed.o: fb_writer.c spu_common.h
25 $(SPU_GCC) $(SPU_CFLAGS) -o fb_writer_spu fb_writer.c -lm
26 $(PPU_EMBEDSPU) -m32 fb_writer_spu fb_writer_spu fb_writer_spu-embed.o
27
28libfb_writer_spu.so: fb_writer_spu-embed.o
29 $(PPU_LD) -o libfb_writer_spu.so -shared -soname=libfb_writer_spu.so fb_writer_spu-embed.o
30
31libfb_writer_spu.a: fb_writer_spu-embed.o
32 $(PPU_AR) -qcs libfb_writer_spu.a fb_writer_spu-embed.o
33
34
35# yuv2rgb_converter
36yuv2rgb_spu-embed.o: yuv2rgb_converter.c spu_common.h
37 $(SPU_GCC) $(SPU_CFLAGS) -o yuv2rgb_spu yuv2rgb_converter.c -lm
38 $(PPU_EMBEDSPU) -m32 yuv2rgb_spu yuv2rgb_spu yuv2rgb_spu-embed.o
39
40libyuv2rgb_spu.a: yuv2rgb_spu-embed.o
41 $(PPU_AR) -qcs libyuv2rgb_spu.a yuv2rgb_spu-embed.o
42
43libyuv2rgb_spu.so: yuv2rgb_spu-embed.o
44 $(PPU_LD) -o libyuv2rgb_spu.so -shared -soname=libyuv2rgb_spu.so yuv2rgb_spu-embed.o
45
46
47# bilin_scaler
48bilin_scaler_spu-embed.o: bilin_scaler.c spu_common.h
49 $(SPU_GCC) $(SPU_CFLAGS) -o bilin_scaler_spu bilin_scaler.c -lm
50 $(PPU_EMBEDSPU) -m32 bilin_scaler_spu bilin_scaler_spu bilin_scaler_spu-embed.o
51
52libbilin_scaler_spu.a: bilin_scaler_spu-embed.o
53 $(PPU_AR) -qcs libbilin_scaler_spu.a bilin_scaler_spu-embed.o
54
55libbilin_scaler_spu.so: bilin_scaler_spu-embed.o
56 $(PPU_LD) -o libbilin_scaler_spu.so -shared -soname=libbilin_scaler_spu.so bilin_scaler_spu-embed.o
57
58install: libfb_writer_spu.a libfb_writer_spu.so \
59 libyuv2rgb_spu.so libyuv2rgb_spu.a \
60 libbilin_scaler_spu.so libbilin_scaler_spu.a
61 $(INSTALL) -c -m 0755 libfb_writer_spu.so $(PREFIX)/.
62 $(INSTALL) -c -m 0655 libfb_writer_spu.a $(PREFIX)/.
63 $(INSTALL) -c -m 0755 libyuv2rgb_spu.so $(PREFIX)/.
64 $(INSTALL) -c -m 0655 libyuv2rgb_spu.a $(PREFIX)/.
65 $(INSTALL) -c -m 0755 libbilin_scaler_spu.so $(PREFIX)/.
66 $(INSTALL) -c -m 0655 libbilin_scaler_spu.a $(PREFIX)/.
67
68
69uninstall: $(PREFIX)/libfb_writer_spu.so $(PREFIX)/libfb_writer_spu.a \
70 $(PREFIX)/libyuv2rgb_spu.so $(PREFIX)/libyuv2rgb_spu.a \
71 $(PREFIX)/libbilin_scaler_spu.so $(PREFIX)/libbilin_scaler_spu.a
72 rm -f $(PREFIX)/libfb_writer_spu.a
73 rm -f $(PREFIX)/libfb_writer_spu.so
74 rm -f $(PREFIX)/libyuv2rgb_spu.so
75 rm -f $(PREFIX)/libyuv2rgb_spu.a
76 rm -f $(PREFIX)/libbilin_scaler_spu.so
77 rm -f $(PREFIX)/libbilin_scaler_spu.a
78
79
80clean:
81 rm -f bilin_scaler_spu-embed.o libbilin_scaler_spu.so libbilin_scaler_spu.a bilin_scaler_spu
82 rm -f yuv2rgb_spu-embed.o libyuv2rgb_spu.so libyuv2rgb_spu.a yuv2rgb_spu
83 rm -f fb_writer_spu-embed.o libfb_writer_spu.so libfb_writer_spu.a fb_writer_spu
diff --git a/apps/plugins/sdl/src/video/ps3/spulibs/bilin_scaler.c b/apps/plugins/sdl/src/video/ps3/spulibs/bilin_scaler.c
deleted file mode 100644
index be9b5c6e8d..0000000000
--- a/apps/plugins/sdl/src/video/ps3/spulibs/bilin_scaler.c
+++ /dev/null
@@ -1,2050 +0,0 @@
1/*
2 * SDL - Simple DirectMedia Layer
3 * CELL BE Support for PS3 Framebuffer
4 * Copyright (C) 2008, 2009 International Business Machines Corporation
5 *
6 * This library is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU Lesser General Public License as published
8 * by the Free Software Foundation; either version 2.1 of the License, or
9 * (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
19 * USA
20 *
21 * Martin Lowinski <lowinski [at] de [dot] ibm [ibm] com>
22 * Dirk Herrendoerfer <d.herrendoerfer [at] de [dot] ibm [dot] com>
23 * SPE code based on research by:
24 * Rene Becker
25 * Thimo Emmerich
26 */
27
28#include "spu_common.h"
29
30#include <spu_intrinsics.h>
31#include <spu_mfcio.h>
32
33// Debugging
34//#define DEBUG
35
36#ifdef DEBUG
37#define deprintf(fmt, args... ) \
38 fprintf( stdout, fmt, ##args ); \
39 fflush( stdout );
40#else
41#define deprintf( fmt, args... )
42#endif
43
44struct scale_parms_t parms __attribute__((aligned(128)));
45
46/* A maximum of 8 lines Y, therefore 4 lines V, 4 lines U are stored
47 * there might be the need to retrieve misaligned data, adjust
48 * incoming v and u plane to be able to handle this (add 128)
49 */
50unsigned char y_plane[2][(MAX_HDTV_WIDTH+128)*4] __attribute__((aligned(128)));
51unsigned char v_plane[2][(MAX_HDTV_WIDTH+128)*2] __attribute__((aligned(128)));
52unsigned char u_plane[2][(MAX_HDTV_WIDTH+128)*2] __attribute__((aligned(128)));
53
54/* temp-buffer for scaling: 4 lines Y, therefore 2 lines V, 2 lines U */
55unsigned char scaled_y_plane[2][MAX_HDTV_WIDTH*2] __attribute__((aligned(128)));
56unsigned char scaled_v_plane[2][MAX_HDTV_WIDTH/2] __attribute__((aligned(128)));
57unsigned char scaled_u_plane[2][MAX_HDTV_WIDTH/2] __attribute__((aligned(128)));
58
59/* some vectors needed by the float to int conversion */
60static const vector float vec_255 = { 255.0f, 255.0f, 255.0f, 255.0f };
61static const vector float vec_0_1 = { 0.1f, 0.1f, 0.1f, 0.1f };
62
63void bilinear_scale_line_w8(unsigned char* src, unsigned char* dst_, unsigned int dst_width, vector float vf_x_scale, vector float vf_NSweight, unsigned int src_linestride);
64void bilinear_scale_line_w16(unsigned char* src, unsigned char* dst_, unsigned int dst_width, vector float vf_x_scale, vector float vf_NSweight, unsigned int src_linestride);
65
66void scale_srcw16_dstw16();
67void scale_srcw16_dstw32();
68void scale_srcw32_dstw16();
69void scale_srcw32_dstw32();
70
71int main( unsigned long long spe_id __attribute__((unused)), unsigned long long argp )
72{
73 deprintf("[SPU] bilin_scaler_spu is up... (on SPE #%llu)\n", spe_id);
74 /* DMA transfer for the input parameters */
75 spu_mfcdma32(&parms, (unsigned int)argp, sizeof(struct scale_parms_t), TAG_INIT, MFC_GET_CMD);
76 DMA_WAIT_TAG(TAG_INIT);
77
78 deprintf("[SPU] Scale %ux%u to %ux%u\n", parms.src_pixel_width, parms.src_pixel_height,
79 parms.dst_pixel_width, parms.dst_pixel_height);
80
81 if(parms.src_pixel_width & 0x1f) {
82 if(parms.dst_pixel_width & 0x1F) {
83 deprintf("[SPU] Using scale_srcw16_dstw16\n");
84 scale_srcw16_dstw16();
85 } else {
86 deprintf("[SPU] Using scale_srcw16_dstw32\n");
87 scale_srcw16_dstw32();
88 }
89 } else {
90 if(parms.dst_pixel_width & 0x1F) {
91 deprintf("[SPU] Using scale_srcw32_dstw16\n");
92 scale_srcw32_dstw16();
93 } else {
94 deprintf("[SPU] Using scale_srcw32_dstw32\n");
95 scale_srcw32_dstw32();
96 }
97 }
98 deprintf("[SPU] bilin_scaler_spu... done!\n");
99
100 return 0;
101}
102
103
104/*
105 * vfloat_to_vuint()
106 *
107 * converts a float vector to an unsinged int vector using saturated
108 * arithmetic
109 *
110 * @param vec_s float vector for conversion
111 * @returns converted unsigned int vector
112 */
113inline static vector unsigned int vfloat_to_vuint(vector float vec_s) {
114 vector unsigned int select_1 = spu_cmpgt(vec_0_1, vec_s);
115 vec_s = spu_sel(vec_s, vec_0_1, select_1);
116
117 vector unsigned int select_2 = spu_cmpgt(vec_s, vec_255);
118 vec_s = spu_sel(vec_s, vec_255, select_2);
119 return spu_convtu(vec_s,0);
120}
121
122
123/*
124 * scale_srcw16_dstw16()
125 *
126 * processes an input image of width 16
127 * scaling is done to a width 16
128 * result stored in RAM
129 */
130void scale_srcw16_dstw16() {
131 // extract parameters
132 unsigned char* dst_addr = (unsigned char *)parms.dstBuffer;
133
134 unsigned int src_width = parms.src_pixel_width;
135 unsigned int src_height = parms.src_pixel_height;
136 unsigned int dst_width = parms.dst_pixel_width;
137 unsigned int dst_height = parms.dst_pixel_height;
138
139 // YVU
140 unsigned int src_linestride_y = src_width;
141 unsigned int src_dbl_linestride_y = src_width<<1;
142 unsigned int src_linestride_vu = src_width>>1;
143 unsigned int src_dbl_linestride_vu = src_width;
144
145 // scaled YVU
146 unsigned int scaled_src_linestride_y = dst_width;
147
148 // ram addresses
149 unsigned char* src_addr_y = parms.y_plane;
150 unsigned char* src_addr_v = parms.v_plane;
151 unsigned char* src_addr_u = parms.u_plane;
152
153 // for handling misalignment, addresses are precalculated
154 unsigned char* precalc_src_addr_v = src_addr_v;
155 unsigned char* precalc_src_addr_u = src_addr_u;
156
157 unsigned int dst_picture_size = dst_width*dst_height;
158
159 // Sizes for destination
160 unsigned int dst_dbl_linestride_y = dst_width<<1;
161 unsigned int dst_dbl_linestride_vu = dst_width>>1;
162
163 // Perform address calculation for Y, V and U in main memory with dst_addr as base
164 unsigned char* dst_addr_main_memory_y = dst_addr;
165 unsigned char* dst_addr_main_memory_v = dst_addr + dst_picture_size;
166 unsigned char* dst_addr_main_memory_u = dst_addr_main_memory_v +(dst_picture_size>>2);
167
168 // calculate scale factors
169 vector float vf_x_scale = spu_splats( (float)src_width/(float)dst_width );
170 float y_scale = (float)src_height/(float)dst_height;
171
172 // double buffered processing
173 // buffer switching
174 unsigned int curr_src_idx = 0;
175 unsigned int curr_dst_idx = 0;
176 unsigned int next_src_idx, next_dst_idx;
177
178 // 2 lines y as output, upper and lowerline
179 unsigned int curr_interpl_y_upper = 0;
180 unsigned int next_interpl_y_upper;
181 unsigned int curr_interpl_y_lower, next_interpl_y_lower;
182 // only 1 line v/u output, both planes have the same dimension
183 unsigned int curr_interpl_vu = 0;
184 unsigned int next_interpl_vu;
185
186 // weights, calculated in every loop iteration
187 vector float vf_curr_NSweight_y_upper = { 0.0f, 0.0f, 0.0f, 0.0f };
188 vector float vf_next_NSweight_y_upper;
189 vector float vf_curr_NSweight_y_lower, vf_next_NSweight_y_lower;
190 vector float vf_curr_NSweight_vu = { 0.0f, 0.0f, 0.0f, 0.0f };
191 vector float vf_next_NSweight_vu;
192
193 // line indices for the src picture
194 float curr_src_y_upper = 0.0f, next_src_y_upper;
195 float curr_src_y_lower, next_src_y_lower;
196 float curr_src_vu = 0.0f, next_src_vu;
197
198 // line indices for the dst picture
199 unsigned int dst_y=0, dst_vu=0;
200
201 // offset for the v and u plane to handle misalignement
202 unsigned int curr_lsoff_v = 0, next_lsoff_v;
203 unsigned int curr_lsoff_u = 0, next_lsoff_u;
204
205 // calculate lower line indices
206 curr_src_y_lower = ((float)curr_interpl_y_upper+1)*y_scale;
207 curr_interpl_y_lower = (unsigned int)curr_src_y_lower;
208 // lower line weight
209 vf_curr_NSweight_y_lower = spu_splats( curr_src_y_lower-(float)curr_interpl_y_lower );
210
211
212 // start partially double buffered processing
213 // get initial data, 2 sets of y, 1 set v, 1 set u
214 mfc_get( y_plane[curr_src_idx], (unsigned int) src_addr_y, src_dbl_linestride_y, RETR_BUF, 0, 0 );
215 mfc_get( y_plane[curr_src_idx]+src_dbl_linestride_y,
216 (unsigned int) src_addr_y+(curr_interpl_y_lower*src_linestride_y),
217 src_dbl_linestride_y,
218 RETR_BUF,
219 0, 0 );
220 mfc_get( v_plane[curr_src_idx], (unsigned int) src_addr_v, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
221 mfc_get( u_plane[curr_src_idx], (unsigned int) src_addr_u, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
222
223 /* iteration loop
224 * within each iteration 4 lines y, 2 lines v, 2 lines u are retrieved
225 * the scaled output is 2 lines y, 1 line v, 1 line u
226 * the yuv2rgb-converted output is stored to RAM
227 */
228 for( dst_vu=0; dst_vu<(dst_height>>1)-1; dst_vu++ ) {
229 dst_y = dst_vu<<1;
230
231 // calculate next indices
232 next_src_vu = ((float)dst_vu+1)*y_scale;
233 next_src_y_upper = ((float)dst_y+2)*y_scale;
234 next_src_y_lower = ((float)dst_y+3)*y_scale;
235
236 next_interpl_vu = (unsigned int) next_src_vu;
237 next_interpl_y_upper = (unsigned int) next_src_y_upper;
238 next_interpl_y_lower = (unsigned int) next_src_y_lower;
239
240 // calculate weight NORTH-SOUTH
241 vf_next_NSweight_vu = spu_splats( next_src_vu-(float)next_interpl_vu );
242 vf_next_NSweight_y_upper = spu_splats( next_src_y_upper-(float)next_interpl_y_upper );
243 vf_next_NSweight_y_lower = spu_splats( next_src_y_lower-(float)next_interpl_y_lower );
244
245 // get next lines
246 next_src_idx = curr_src_idx^1;
247 next_dst_idx = curr_dst_idx^1;
248
249 // 4 lines y
250 mfc_get( y_plane[next_src_idx],
251 (unsigned int) src_addr_y+(next_interpl_y_upper*src_linestride_y),
252 src_dbl_linestride_y,
253 RETR_BUF+next_src_idx,
254 0, 0 );
255 mfc_get( y_plane[next_src_idx]+src_dbl_linestride_y,
256 (unsigned int) src_addr_y+(next_interpl_y_lower*src_linestride_y),
257 src_dbl_linestride_y,
258 RETR_BUF+next_src_idx,
259 0, 0 );
260
261 // 2 lines v
262 precalc_src_addr_v = src_addr_v+(next_interpl_vu*src_linestride_vu);
263 next_lsoff_v = ((unsigned int)precalc_src_addr_v)&0x0F;
264 mfc_get( v_plane[next_src_idx],
265 ((unsigned int) precalc_src_addr_v)&0xFFFFFFF0,
266 src_dbl_linestride_vu+(next_lsoff_v<<1),
267 RETR_BUF+next_src_idx,
268 0, 0 );
269 // 2 lines u
270 precalc_src_addr_u = src_addr_u+(next_interpl_vu*src_linestride_vu);
271 next_lsoff_u = ((unsigned int)precalc_src_addr_u)&0x0F;
272 mfc_get( u_plane[next_src_idx],
273 ((unsigned int) precalc_src_addr_u)&0xFFFFFFF0,
274 src_dbl_linestride_vu+(next_lsoff_v<<1),
275 RETR_BUF+next_src_idx,
276 0, 0 );
277
278 DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
279
280 // scaling
281 // work line y_upper
282 bilinear_scale_line_w16( y_plane[curr_src_idx],
283 scaled_y_plane[curr_src_idx],
284 dst_width,
285 vf_x_scale,
286 vf_curr_NSweight_y_upper,
287 src_linestride_y );
288 // work line y_lower
289 bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
290 scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
291 dst_width,
292 vf_x_scale,
293 vf_curr_NSweight_y_lower,
294 src_linestride_y );
295 // work line v
296 bilinear_scale_line_w8( v_plane[curr_src_idx]+curr_lsoff_v,
297 scaled_v_plane[curr_src_idx],
298 dst_width>>1,
299 vf_x_scale,
300 vf_curr_NSweight_vu,
301 src_linestride_vu );
302 // work line u
303 bilinear_scale_line_w8( u_plane[curr_src_idx]+curr_lsoff_u,
304 scaled_u_plane[curr_src_idx],
305 dst_width>>1,
306 vf_x_scale,
307 vf_curr_NSweight_vu,
308 src_linestride_vu );
309
310
311 // Store the result back to main memory into a destination buffer in YUV format
312 //---------------------------------------------------------------------------------------------
313 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
314
315 // Perform three DMA transfers to 3 different locations in the main memory!
316 // dst_width: Pixel width of destination image
317 // dst_addr: Destination address in main memory
318 // dst_vu: Counter which is incremented one by one
319 // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
320 mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr)
321 (unsigned int)dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr)
322 dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution)
323 STR_BUF+curr_dst_idx, // Tag
324 0, 0 );
325
326 mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr)
327 (unsigned int)dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
328 dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution)
329 STR_BUF+curr_dst_idx, // Tag
330 0, 0 );
331
332 mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr)
333 (unsigned int)dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
334 dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution)
335 STR_BUF+curr_dst_idx, // Tag
336 0, 0 );
337 //---------------------------------------------------------------------------------------------
338
339
340 // update for next cycle
341 curr_src_idx = next_src_idx;
342 curr_dst_idx = next_dst_idx;
343
344 curr_interpl_y_upper = next_interpl_y_upper;
345 curr_interpl_y_lower = next_interpl_y_lower;
346 curr_interpl_vu = next_interpl_vu;
347
348 vf_curr_NSweight_y_upper = vf_curr_NSweight_y_upper;
349 vf_curr_NSweight_y_lower = vf_curr_NSweight_y_lower;
350 vf_curr_NSweight_vu = vf_next_NSweight_vu;
351
352 curr_src_y_upper = next_src_y_upper;
353 curr_src_y_lower = next_src_y_lower;
354 curr_src_vu = next_src_vu;
355
356 curr_lsoff_v = next_lsoff_v;
357 curr_lsoff_u = next_lsoff_u;
358 }
359
360
361
362 DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
363
364 // scaling
365 // work line y_upper
366 bilinear_scale_line_w16( y_plane[curr_src_idx],
367 scaled_y_plane[curr_src_idx],
368 dst_width,
369 vf_x_scale,
370 vf_curr_NSweight_y_upper,
371 src_linestride_y );
372 // work line y_lower
373 bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
374 scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
375 dst_width,
376 vf_x_scale,
377 vf_curr_NSweight_y_lower,
378 src_linestride_y );
379 // work line v
380 bilinear_scale_line_w8( v_plane[curr_src_idx]+curr_lsoff_v,
381 scaled_v_plane[curr_src_idx],
382 dst_width>>1,
383 vf_x_scale,
384 vf_curr_NSweight_vu,
385 src_linestride_vu );
386 // work line u
387 bilinear_scale_line_w8( u_plane[curr_src_idx]+curr_lsoff_u,
388 scaled_u_plane[curr_src_idx],
389 dst_width>>1,
390 vf_x_scale,
391 vf_curr_NSweight_vu,
392 src_linestride_vu );
393
394
395 // Store the result back to main memory into a destination buffer in YUV format
396 //---------------------------------------------------------------------------------------------
397 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
398
399 // Perform three DMA transfers to 3 different locations in the main memory!
400 // dst_width: Pixel width of destination image
401 // dst_addr: Destination address in main memory
402 // dst_vu: Counter which is incremented one by one
403 // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
404 mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr)
405 (unsigned int)dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr)
406 dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution)
407 STR_BUF+curr_dst_idx, // Tag
408 0, 0 );
409
410 mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr)
411 (unsigned int)dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
412 dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution)
413 STR_BUF+curr_dst_idx, // Tag
414 0, 0 );
415
416 mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr)
417 (unsigned int)dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
418 dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution)
419 STR_BUF+curr_dst_idx, // Tag
420 0, 0 );
421
422 // wait for completion
423 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
424 //---------------------------------------------------------------------------------------------
425}
426
427
428/*
429 * scale_srcw16_dstw32()
430 *
431 * processes an input image of width 16
432 * scaling is done to a width 32
433 * yuv2rgb conversion on a width of 32
434 * result stored in RAM
435 */
436void scale_srcw16_dstw32() {
437 // extract parameters
438 unsigned char* dst_addr = (unsigned char *)parms.dstBuffer;
439
440 unsigned int src_width = parms.src_pixel_width;
441 unsigned int src_height = parms.src_pixel_height;
442 unsigned int dst_width = parms.dst_pixel_width;
443 unsigned int dst_height = parms.dst_pixel_height;
444
445 // YVU
446 unsigned int src_linestride_y = src_width;
447 unsigned int src_dbl_linestride_y = src_width<<1;
448 unsigned int src_linestride_vu = src_width>>1;
449 unsigned int src_dbl_linestride_vu = src_width;
450 // scaled YVU
451 unsigned int scaled_src_linestride_y = dst_width;
452
453 // ram addresses
454 unsigned char* src_addr_y = parms.y_plane;
455 unsigned char* src_addr_v = parms.v_plane;
456 unsigned char* src_addr_u = parms.u_plane;
457
458 unsigned int dst_picture_size = dst_width*dst_height;
459
460 // Sizes for destination
461 unsigned int dst_dbl_linestride_y = dst_width<<1;
462 unsigned int dst_dbl_linestride_vu = dst_width>>1;
463
464 // Perform address calculation for Y, V and U in main memory with dst_addr as base
465 unsigned char* dst_addr_main_memory_y = dst_addr;
466 unsigned char* dst_addr_main_memory_v = dst_addr + dst_picture_size;
467 unsigned char* dst_addr_main_memory_u = dst_addr_main_memory_v +(dst_picture_size>>2);
468
469
470 // for handling misalignment, addresses are precalculated
471 unsigned char* precalc_src_addr_v = src_addr_v;
472 unsigned char* precalc_src_addr_u = src_addr_u;
473
474 // calculate scale factors
475 vector float vf_x_scale = spu_splats( (float)src_width/(float)dst_width );
476 float y_scale = (float)src_height/(float)dst_height;
477
478 // double buffered processing
479 // buffer switching
480 unsigned int curr_src_idx = 0;
481 unsigned int curr_dst_idx = 0;
482 unsigned int next_src_idx, next_dst_idx;
483
484 // 2 lines y as output, upper and lowerline
485 unsigned int curr_interpl_y_upper = 0;
486 unsigned int next_interpl_y_upper;
487 unsigned int curr_interpl_y_lower, next_interpl_y_lower;
488 // only 1 line v/u output, both planes have the same dimension
489 unsigned int curr_interpl_vu = 0;
490 unsigned int next_interpl_vu;
491
492 // weights, calculated in every loop iteration
493 vector float vf_curr_NSweight_y_upper = { 0.0f, 0.0f, 0.0f, 0.0f };
494 vector float vf_next_NSweight_y_upper;
495 vector float vf_curr_NSweight_y_lower, vf_next_NSweight_y_lower;
496 vector float vf_curr_NSweight_vu = { 0.0f, 0.0f, 0.0f, 0.0f };
497 vector float vf_next_NSweight_vu;
498
499 // line indices for the src picture
500 float curr_src_y_upper = 0.0f, next_src_y_upper;
501 float curr_src_y_lower, next_src_y_lower;
502 float curr_src_vu = 0.0f, next_src_vu;
503
504 // line indices for the dst picture
505 unsigned int dst_y=0, dst_vu=0;
506
507 // offset for the v and u plane to handle misalignement
508 unsigned int curr_lsoff_v = 0, next_lsoff_v;
509 unsigned int curr_lsoff_u = 0, next_lsoff_u;
510
511 // calculate lower line idices
512 curr_src_y_lower = ((float)curr_interpl_y_upper+1)*y_scale;
513 curr_interpl_y_lower = (unsigned int)curr_src_y_lower;
514 // lower line weight
515 vf_curr_NSweight_y_lower = spu_splats( curr_src_y_lower-(float)curr_interpl_y_lower );
516
517
518 // start partially double buffered processing
519 // get initial data, 2 sets of y, 1 set v, 1 set u
520 mfc_get( y_plane[curr_src_idx], (unsigned int) src_addr_y, src_dbl_linestride_y, RETR_BUF, 0, 0 );
521 mfc_get( y_plane[curr_src_idx]+src_dbl_linestride_y,
522 (unsigned int) src_addr_y+(curr_interpl_y_lower*src_linestride_y),
523 src_dbl_linestride_y,
524 RETR_BUF,
525 0, 0 );
526 mfc_get( v_plane[curr_src_idx], (unsigned int) src_addr_v, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
527 mfc_get( u_plane[curr_src_idx], (unsigned int) src_addr_u, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
528
529 // iteration loop
530 // within each iteration 4 lines y, 2 lines v, 2 lines u are retrieved
531 // the scaled output is 2 lines y, 1 line v, 1 line u
532 // the yuv2rgb-converted output is stored to RAM
533 for( dst_vu=0; dst_vu<(dst_height>>1)-1; dst_vu++ ) {
534 dst_y = dst_vu<<1;
535
536 // calculate next indices
537 next_src_vu = ((float)dst_vu+1)*y_scale;
538 next_src_y_upper = ((float)dst_y+2)*y_scale;
539 next_src_y_lower = ((float)dst_y+3)*y_scale;
540
541 next_interpl_vu = (unsigned int) next_src_vu;
542 next_interpl_y_upper = (unsigned int) next_src_y_upper;
543 next_interpl_y_lower = (unsigned int) next_src_y_lower;
544
545 // calculate weight NORTH-SOUTH
546 vf_next_NSweight_vu = spu_splats( next_src_vu-(float)next_interpl_vu );
547 vf_next_NSweight_y_upper = spu_splats( next_src_y_upper-(float)next_interpl_y_upper );
548 vf_next_NSweight_y_lower = spu_splats( next_src_y_lower-(float)next_interpl_y_lower );
549
550 // get next lines
551 next_src_idx = curr_src_idx^1;
552 next_dst_idx = curr_dst_idx^1;
553
554 // 4 lines y
555 mfc_get( y_plane[next_src_idx],
556 (unsigned int) src_addr_y+(next_interpl_y_upper*src_linestride_y),
557 src_dbl_linestride_y,
558 RETR_BUF+next_src_idx,
559 0, 0 );
560 mfc_get( y_plane[next_src_idx]+src_dbl_linestride_y,
561 (unsigned int) src_addr_y+(next_interpl_y_lower*src_linestride_y),
562 src_dbl_linestride_y,
563 RETR_BUF+next_src_idx,
564 0, 0 );
565
566 // 2 lines v
567 precalc_src_addr_v = src_addr_v+(next_interpl_vu*src_linestride_vu);
568 next_lsoff_v = ((unsigned int)precalc_src_addr_v)&0x0F;
569 mfc_get( v_plane[next_src_idx],
570 ((unsigned int) precalc_src_addr_v)&0xFFFFFFF0,
571 src_dbl_linestride_vu+(next_lsoff_v<<1),
572 RETR_BUF+next_src_idx,
573 0, 0 );
574 // 2 lines u
575 precalc_src_addr_u = src_addr_u+(next_interpl_vu*src_linestride_vu);
576 next_lsoff_u = ((unsigned int)precalc_src_addr_u)&0x0F;
577 mfc_get( u_plane[next_src_idx],
578 ((unsigned int) precalc_src_addr_u)&0xFFFFFFF0,
579 src_dbl_linestride_vu+(next_lsoff_v<<1),
580 RETR_BUF+next_src_idx,
581 0, 0 );
582
583 DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
584
585 // scaling
586 // work line y_upper
587 bilinear_scale_line_w16( y_plane[curr_src_idx],
588 scaled_y_plane[curr_src_idx],
589 dst_width,
590 vf_x_scale,
591 vf_curr_NSweight_y_upper,
592 src_linestride_y );
593 // work line y_lower
594 bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
595 scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
596 dst_width,
597 vf_x_scale,
598 vf_curr_NSweight_y_lower,
599 src_linestride_y );
600 // work line v
601 bilinear_scale_line_w8( v_plane[curr_src_idx]+curr_lsoff_v,
602 scaled_v_plane[curr_src_idx],
603 dst_width>>1,
604 vf_x_scale,
605 vf_curr_NSweight_vu,
606 src_linestride_vu );
607 // work line u
608 bilinear_scale_line_w8( u_plane[curr_src_idx]+curr_lsoff_u,
609 scaled_u_plane[curr_src_idx],
610 dst_width>>1,
611 vf_x_scale,
612 vf_curr_NSweight_vu,
613 src_linestride_vu );
614
615 //---------------------------------------------------------------------------------------------
616 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
617
618 // Perform three DMA transfers to 3 different locations in the main memory!
619 // dst_width: Pixel width of destination image
620 // dst_addr: Destination address in main memory
621 // dst_vu: Counter which is incremented one by one
622 // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
623
624 mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr)
625 (unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr)
626 dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution)
627 STR_BUF+curr_dst_idx, // Tag
628 0, 0 );
629
630 mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr)
631 (unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
632 dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution)
633 STR_BUF+curr_dst_idx, // Tag
634 0, 0 );
635
636 mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr)
637 (unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
638 dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution)
639 STR_BUF+curr_dst_idx, // Tag
640 0, 0 );
641 //---------------------------------------------------------------------------------------------
642
643
644 // update for next cycle
645 curr_src_idx = next_src_idx;
646 curr_dst_idx = next_dst_idx;
647
648 curr_interpl_y_upper = next_interpl_y_upper;
649 curr_interpl_y_lower = next_interpl_y_lower;
650 curr_interpl_vu = next_interpl_vu;
651
652 vf_curr_NSweight_y_upper = vf_curr_NSweight_y_upper;
653 vf_curr_NSweight_y_lower = vf_curr_NSweight_y_lower;
654 vf_curr_NSweight_vu = vf_next_NSweight_vu;
655
656 curr_src_y_upper = next_src_y_upper;
657 curr_src_y_lower = next_src_y_lower;
658 curr_src_vu = next_src_vu;
659
660 curr_lsoff_v = next_lsoff_v;
661 curr_lsoff_u = next_lsoff_u;
662 }
663
664
665
666 DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
667
668 // scaling
669 // work line y_upper
670 bilinear_scale_line_w16( y_plane[curr_src_idx],
671 scaled_y_plane[curr_src_idx],
672 dst_width,
673 vf_x_scale,
674 vf_curr_NSweight_y_upper,
675 src_linestride_y );
676 // work line y_lower
677 bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
678 scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
679 dst_width,
680 vf_x_scale,
681 vf_curr_NSweight_y_lower,
682 src_linestride_y );
683 // work line v
684 bilinear_scale_line_w8( v_plane[curr_src_idx]+curr_lsoff_v,
685 scaled_v_plane[curr_src_idx],
686 dst_width>>1,
687 vf_x_scale,
688 vf_curr_NSweight_vu,
689 src_linestride_vu );
690 // work line u
691 bilinear_scale_line_w8( u_plane[curr_src_idx]+curr_lsoff_u,
692 scaled_u_plane[curr_src_idx],
693 dst_width>>1,
694 vf_x_scale,
695 vf_curr_NSweight_vu,
696 src_linestride_vu );
697
698 //---------------------------------------------------------------------------------------------
699 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
700
701 // Perform three DMA transfers to 3 different locations in the main memory!
702 // dst_width: Pixel width of destination image
703 // dst_addr: Destination address in main memory
704 // dst_vu: Counter which is incremented one by one
705 // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
706
707 mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr)
708 (unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr)
709 dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution)
710 STR_BUF+curr_dst_idx, // Tag
711 0, 0 );
712
713 mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr)
714 (unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
715 dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution)
716 STR_BUF+curr_dst_idx, // Tag
717 0, 0 );
718
719 mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr)
720 (unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
721 dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution)
722 STR_BUF+curr_dst_idx, // Tag
723 0, 0 );
724
725 // wait for completion
726 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
727 //---------------------------------------------------------------------------------------------
728}
729
730
731/*
732 * scale_srcw32_dstw16()
733 *
734 * processes an input image of width 32
735 * scaling is done to a width 16
736 * yuv2rgb conversion on a width of 16
737 * result stored in RAM
738 */
739void scale_srcw32_dstw16() {
740 // extract parameters
741 unsigned char* dst_addr = (unsigned char *)parms.dstBuffer;
742
743 unsigned int src_width = parms.src_pixel_width;
744 unsigned int src_height = parms.src_pixel_height;
745 unsigned int dst_width = parms.dst_pixel_width;
746 unsigned int dst_height = parms.dst_pixel_height;
747
748 // YVU
749 unsigned int src_linestride_y = src_width;
750 unsigned int src_dbl_linestride_y = src_width<<1;
751 unsigned int src_linestride_vu = src_width>>1;
752 unsigned int src_dbl_linestride_vu = src_width;
753 // scaled YVU
754 unsigned int scaled_src_linestride_y = dst_width;
755
756 // ram addresses
757 unsigned char* src_addr_y = parms.y_plane;
758 unsigned char* src_addr_v = parms.v_plane;
759 unsigned char* src_addr_u = parms.u_plane;
760
761 unsigned int dst_picture_size = dst_width*dst_height;
762
763 // Sizes for destination
764 unsigned int dst_dbl_linestride_y = dst_width<<1;
765 unsigned int dst_dbl_linestride_vu = dst_width>>1;
766
767 // Perform address calculation for Y, V and U in main memory with dst_addr as base
768 unsigned char* dst_addr_main_memory_y = dst_addr;
769 unsigned char* dst_addr_main_memory_v = dst_addr + dst_picture_size;
770 unsigned char* dst_addr_main_memory_u = dst_addr_main_memory_v +(dst_picture_size>>2);
771
772 // calculate scale factors
773 vector float vf_x_scale = spu_splats( (float)src_width/(float)dst_width );
774 float y_scale = (float)src_height/(float)dst_height;
775
776 // double buffered processing
777 // buffer switching
778 unsigned int curr_src_idx = 0;
779 unsigned int curr_dst_idx = 0;
780 unsigned int next_src_idx, next_dst_idx;
781
782 // 2 lines y as output, upper and lowerline
783 unsigned int curr_interpl_y_upper = 0;
784 unsigned int next_interpl_y_upper;
785 unsigned int curr_interpl_y_lower, next_interpl_y_lower;
786 // only 1 line v/u output, both planes have the same dimension
787 unsigned int curr_interpl_vu = 0;
788 unsigned int next_interpl_vu;
789
790 // weights, calculated in every loop iteration
791 vector float vf_curr_NSweight_y_upper = { 0.0f, 0.0f, 0.0f, 0.0f };
792 vector float vf_next_NSweight_y_upper;
793 vector float vf_curr_NSweight_y_lower, vf_next_NSweight_y_lower;
794 vector float vf_curr_NSweight_vu = { 0.0f, 0.0f, 0.0f, 0.0f };
795 vector float vf_next_NSweight_vu;
796
797 // line indices for the src picture
798 float curr_src_y_upper = 0.0f, next_src_y_upper;
799 float curr_src_y_lower, next_src_y_lower;
800 float curr_src_vu = 0.0f, next_src_vu;
801
802 // line indices for the dst picture
803 unsigned int dst_y=0, dst_vu=0;
804
805 // calculate lower line idices
806 curr_src_y_lower = ((float)curr_interpl_y_upper+1)*y_scale;
807 curr_interpl_y_lower = (unsigned int)curr_src_y_lower;
808 // lower line weight
809 vf_curr_NSweight_y_lower = spu_splats( curr_src_y_lower-(float)curr_interpl_y_lower );
810
811
812 // start partially double buffered processing
813 // get initial data, 2 sets of y, 1 set v, 1 set u
814 mfc_get( y_plane[curr_src_idx], (unsigned int) src_addr_y, src_dbl_linestride_y, RETR_BUF, 0, 0 );
815 mfc_get( y_plane[curr_src_idx]+src_dbl_linestride_y,
816 (unsigned int) src_addr_y+(curr_interpl_y_lower*src_linestride_y),
817 src_dbl_linestride_y,
818 RETR_BUF,
819 0, 0 );
820 mfc_get( v_plane[curr_src_idx], (unsigned int) src_addr_v, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
821 mfc_get( u_plane[curr_src_idx], (unsigned int) src_addr_u, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
822
823 // iteration loop
824 // within each iteration 4 lines y, 2 lines v, 2 lines u are retrieved
825 // the scaled output is 2 lines y, 1 line v, 1 line u
826 // the yuv2rgb-converted output is stored to RAM
827 for( dst_vu=0; dst_vu<(dst_height>>1)-1; dst_vu++ ) {
828 dst_y = dst_vu<<1;
829
830 // calculate next indices
831 next_src_vu = ((float)dst_vu+1)*y_scale;
832 next_src_y_upper = ((float)dst_y+2)*y_scale;
833 next_src_y_lower = ((float)dst_y+3)*y_scale;
834
835 next_interpl_vu = (unsigned int) next_src_vu;
836 next_interpl_y_upper = (unsigned int) next_src_y_upper;
837 next_interpl_y_lower = (unsigned int) next_src_y_lower;
838
839 // calculate weight NORTH-SOUTH
840 vf_next_NSweight_vu = spu_splats( next_src_vu-(float)next_interpl_vu );
841 vf_next_NSweight_y_upper = spu_splats( next_src_y_upper-(float)next_interpl_y_upper );
842 vf_next_NSweight_y_lower = spu_splats( next_src_y_lower-(float)next_interpl_y_lower );
843
844 // get next lines
845 next_src_idx = curr_src_idx^1;
846 next_dst_idx = curr_dst_idx^1;
847
848 // 4 lines y
849 mfc_get( y_plane[next_src_idx],
850 (unsigned int) src_addr_y+(next_interpl_y_upper*src_linestride_y),
851 src_dbl_linestride_y,
852 RETR_BUF+next_src_idx,
853 0, 0 );
854 mfc_get( y_plane[next_src_idx]+src_dbl_linestride_y,
855 (unsigned int) src_addr_y+(next_interpl_y_lower*src_linestride_y),
856 src_dbl_linestride_y,
857 RETR_BUF+next_src_idx,
858 0, 0 );
859
860 // 2 lines v
861 mfc_get( v_plane[next_src_idx],
862 (unsigned int) src_addr_v+(next_interpl_vu*src_linestride_vu),
863 src_dbl_linestride_vu,
864 RETR_BUF+next_src_idx,
865 0, 0 );
866 // 2 lines u
867 mfc_get( u_plane[next_src_idx],
868 (unsigned int) src_addr_u+(next_interpl_vu*src_linestride_vu),
869 src_dbl_linestride_vu,
870 RETR_BUF+next_src_idx,
871 0, 0 );
872
873 DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
874
875 // scaling
876 // work line y_upper
877 bilinear_scale_line_w16( y_plane[curr_src_idx],
878 scaled_y_plane[curr_src_idx],
879 dst_width,
880 vf_x_scale,
881 vf_curr_NSweight_y_upper,
882 src_linestride_y );
883 // work line y_lower
884 bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
885 scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
886 dst_width,
887 vf_x_scale,
888 vf_curr_NSweight_y_lower,
889 src_linestride_y );
890 // work line v
891 bilinear_scale_line_w16( v_plane[curr_src_idx],
892 scaled_v_plane[curr_src_idx],
893 dst_width>>1,
894 vf_x_scale,
895 vf_curr_NSweight_vu,
896 src_linestride_vu );
897 // work line u
898 bilinear_scale_line_w16( u_plane[curr_src_idx],
899 scaled_u_plane[curr_src_idx],
900 dst_width>>1,
901 vf_x_scale,
902 vf_curr_NSweight_vu,
903 src_linestride_vu );
904
905 //---------------------------------------------------------------------------------------------
906 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
907
908 // Perform three DMA transfers to 3 different locations in the main memory!
909 // dst_width: Pixel width of destination image
910 // dst_addr: Destination address in main memory
911 // dst_vu: Counter which is incremented one by one
912 // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
913
914 mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr)
915 (unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr)
916 dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution)
917 STR_BUF+curr_dst_idx, // Tag
918 0, 0 );
919
920 mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr)
921 (unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
922 dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution)
923 STR_BUF+curr_dst_idx, // Tag
924 0, 0 );
925
926 mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr)
927 (unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
928 dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution)
929 STR_BUF+curr_dst_idx, // Tag
930 0, 0 );
931 //---------------------------------------------------------------------------------------------
932
933
934 // update for next cycle
935 curr_src_idx = next_src_idx;
936 curr_dst_idx = next_dst_idx;
937
938 curr_interpl_y_upper = next_interpl_y_upper;
939 curr_interpl_y_lower = next_interpl_y_lower;
940 curr_interpl_vu = next_interpl_vu;
941
942 vf_curr_NSweight_y_upper = vf_curr_NSweight_y_upper;
943 vf_curr_NSweight_y_lower = vf_curr_NSweight_y_lower;
944 vf_curr_NSweight_vu = vf_next_NSweight_vu;
945
946 curr_src_y_upper = next_src_y_upper;
947 curr_src_y_lower = next_src_y_lower;
948 curr_src_vu = next_src_vu;
949 }
950
951
952
953 DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
954
955 // scaling
956 // work line y_upper
957 bilinear_scale_line_w16( y_plane[curr_src_idx],
958 scaled_y_plane[curr_src_idx],
959 dst_width,
960 vf_x_scale,
961 vf_curr_NSweight_y_upper,
962 src_linestride_y );
963 // work line y_lower
964 bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
965 scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
966 dst_width,
967 vf_x_scale,
968 vf_curr_NSweight_y_lower,
969 src_linestride_y );
970 // work line v
971 bilinear_scale_line_w16( v_plane[curr_src_idx],
972 scaled_v_plane[curr_src_idx],
973 dst_width>>1,
974 vf_x_scale,
975 vf_curr_NSweight_vu,
976 src_linestride_vu );
977 // work line u
978 bilinear_scale_line_w16( u_plane[curr_src_idx],
979 scaled_u_plane[curr_src_idx],
980 dst_width>>1,
981 vf_x_scale,
982 vf_curr_NSweight_vu,
983 src_linestride_vu );
984
985
986 //---------------------------------------------------------------------------------------------
987 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
988
989 // Perform three DMA transfers to 3 different locations in the main memory!
990 // dst_width: Pixel width of destination image
991 // dst_addr: Destination address in main memory
992 // dst_vu: Counter which is incremented one by one
993 // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
994
995 mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr)
996 (unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr)
997 dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution)
998 STR_BUF+curr_dst_idx, // Tag
999 0, 0 );
1000
1001 mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr)
1002 (unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
1003 dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution)
1004 STR_BUF+curr_dst_idx, // Tag
1005 0, 0 );
1006
1007 mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr)
1008 (unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
1009 dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution)
1010 STR_BUF+curr_dst_idx, // Tag
1011 0, 0 );
1012
1013 // wait for completion
1014 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
1015 //---------------------------------------------------------------------------------------------
1016}
1017
1018
1019/**
1020 * scale_srcw32_dstw32()
1021 *
1022 * processes an input image of width 32
1023 * scaling is done to a width 32
1024 * yuv2rgb conversion on a width of 32
1025 * result stored in RAM
1026 */
1027void scale_srcw32_dstw32() {
1028 // extract parameters
1029 unsigned char* dst_addr = (unsigned char *)parms.dstBuffer;
1030
1031 unsigned int src_width = parms.src_pixel_width;
1032 unsigned int src_height = parms.src_pixel_height;
1033 unsigned int dst_width = parms.dst_pixel_width;
1034 unsigned int dst_height = parms.dst_pixel_height;
1035
1036 // YVU
1037 unsigned int src_linestride_y = src_width;
1038 unsigned int src_dbl_linestride_y = src_width<<1;
1039 unsigned int src_linestride_vu = src_width>>1;
1040 unsigned int src_dbl_linestride_vu = src_width;
1041
1042 // scaled YVU
1043 unsigned int scaled_src_linestride_y = dst_width;
1044
1045 // ram addresses
1046 unsigned char* src_addr_y = parms.y_plane;
1047 unsigned char* src_addr_v = parms.v_plane;
1048 unsigned char* src_addr_u = parms.u_plane;
1049
1050 unsigned int dst_picture_size = dst_width*dst_height;
1051
1052 // Sizes for destination
1053 unsigned int dst_dbl_linestride_y = dst_width<<1;
1054 unsigned int dst_dbl_linestride_vu = dst_width>>1;
1055
1056 // Perform address calculation for Y, V and U in main memory with dst_addr as base
1057 unsigned char* dst_addr_main_memory_y = dst_addr;
1058 unsigned char* dst_addr_main_memory_v = dst_addr + dst_picture_size;
1059 unsigned char* dst_addr_main_memory_u = dst_addr_main_memory_v +(dst_picture_size>>2);
1060
1061 // calculate scale factors
1062 vector float vf_x_scale = spu_splats( (float)src_width/(float)dst_width );
1063 float y_scale = (float)src_height/(float)dst_height;
1064
1065 // double buffered processing
1066 // buffer switching
1067 unsigned int curr_src_idx = 0;
1068 unsigned int curr_dst_idx = 0;
1069 unsigned int next_src_idx, next_dst_idx;
1070
1071 // 2 lines y as output, upper and lowerline
1072 unsigned int curr_interpl_y_upper = 0;
1073 unsigned int next_interpl_y_upper;
1074 unsigned int curr_interpl_y_lower, next_interpl_y_lower;
1075 // only 1 line v/u output, both planes have the same dimension
1076 unsigned int curr_interpl_vu = 0;
1077 unsigned int next_interpl_vu;
1078
1079 // weights, calculated in every loop iteration
1080 vector float vf_curr_NSweight_y_upper = { 0.0f, 0.0f, 0.0f, 0.0f };
1081 vector float vf_next_NSweight_y_upper;
1082 vector float vf_curr_NSweight_y_lower, vf_next_NSweight_y_lower;
1083 vector float vf_curr_NSweight_vu = { 0.0f, 0.0f, 0.0f, 0.0f };
1084 vector float vf_next_NSweight_vu;
1085
1086 // line indices for the src picture
1087 float curr_src_y_upper = 0.0f, next_src_y_upper;
1088 float curr_src_y_lower, next_src_y_lower;
1089 float curr_src_vu = 0.0f, next_src_vu;
1090
1091 // line indices for the dst picture
1092 unsigned int dst_y=0, dst_vu=0;
1093
1094 // calculate lower line idices
1095 curr_src_y_lower = ((float)curr_interpl_y_upper+1)*y_scale;
1096 curr_interpl_y_lower = (unsigned int)curr_src_y_lower;
1097 // lower line weight
1098 vf_curr_NSweight_y_lower = spu_splats( curr_src_y_lower-(float)curr_interpl_y_lower );
1099
1100
1101 // start partially double buffered processing
1102 // get initial data, 2 sets of y, 1 set v, 1 set u
1103 mfc_get( y_plane[curr_src_idx], (unsigned int) src_addr_y, src_dbl_linestride_y, RETR_BUF, 0, 0 );
1104 mfc_get( y_plane[curr_src_idx]+src_dbl_linestride_y,
1105 (unsigned int) src_addr_y+(curr_interpl_y_lower*src_linestride_y),
1106 src_dbl_linestride_y,
1107 RETR_BUF,
1108 0, 0 );
1109 mfc_get( v_plane[curr_src_idx], (unsigned int) src_addr_v, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
1110 mfc_get( u_plane[curr_src_idx], (unsigned int) src_addr_u, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
1111
1112 // iteration loop
1113 // within each iteration 4 lines y, 2 lines v, 2 lines u are retrieved
1114 // the scaled output is 2 lines y, 1 line v, 1 line u
1115 // the yuv2rgb-converted output is stored to RAM
1116 for( dst_vu=0; dst_vu<(dst_height>>1)-1; dst_vu++ ) {
1117 dst_y = dst_vu<<1;
1118
1119 // calculate next indices
1120 next_src_vu = ((float)dst_vu+1)*y_scale;
1121 next_src_y_upper = ((float)dst_y+2)*y_scale;
1122 next_src_y_lower = ((float)dst_y+3)*y_scale;
1123
1124 next_interpl_vu = (unsigned int) next_src_vu;
1125 next_interpl_y_upper = (unsigned int) next_src_y_upper;
1126 next_interpl_y_lower = (unsigned int) next_src_y_lower;
1127
1128 // calculate weight NORTH-SOUTH
1129 vf_next_NSweight_vu = spu_splats( next_src_vu-(float)next_interpl_vu );
1130 vf_next_NSweight_y_upper = spu_splats( next_src_y_upper-(float)next_interpl_y_upper );
1131 vf_next_NSweight_y_lower = spu_splats( next_src_y_lower-(float)next_interpl_y_lower );
1132
1133 // get next lines
1134 next_src_idx = curr_src_idx^1;
1135 next_dst_idx = curr_dst_idx^1;
1136
1137 // 4 lines y
1138 mfc_get( y_plane[next_src_idx],
1139 (unsigned int) src_addr_y+(next_interpl_y_upper*src_linestride_y),
1140 src_dbl_linestride_y,
1141 RETR_BUF+next_src_idx,
1142 0, 0 );
1143 mfc_get( y_plane[next_src_idx]+src_dbl_linestride_y,
1144 (unsigned int) src_addr_y+(next_interpl_y_lower*src_linestride_y),
1145 src_dbl_linestride_y,
1146 RETR_BUF+next_src_idx,
1147 0, 0 );
1148
1149 // 2 lines v
1150 mfc_get( v_plane[next_src_idx],
1151 (unsigned int) src_addr_v+(next_interpl_vu*src_linestride_vu),
1152 src_dbl_linestride_vu,
1153 RETR_BUF+next_src_idx,
1154 0, 0 );
1155 // 2 lines u
1156 mfc_get( u_plane[next_src_idx],
1157 (unsigned int) src_addr_u+(next_interpl_vu*src_linestride_vu),
1158 src_dbl_linestride_vu,
1159 RETR_BUF+next_src_idx,
1160 0, 0 );
1161
1162 DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
1163
1164 // scaling
1165 // work line y_upper
1166 bilinear_scale_line_w16( y_plane[curr_src_idx],
1167 scaled_y_plane[curr_src_idx],
1168 dst_width,
1169 vf_x_scale,
1170 vf_curr_NSweight_y_upper,
1171 src_linestride_y );
1172 // work line y_lower
1173 bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
1174 scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
1175 dst_width,
1176 vf_x_scale,
1177 vf_curr_NSweight_y_lower,
1178 src_linestride_y );
1179 // work line v
1180 bilinear_scale_line_w16( v_plane[curr_src_idx],
1181 scaled_v_plane[curr_src_idx],
1182 dst_width>>1,
1183 vf_x_scale,
1184 vf_curr_NSweight_vu,
1185 src_linestride_vu );
1186 // work line u
1187 bilinear_scale_line_w16( u_plane[curr_src_idx],
1188 scaled_u_plane[curr_src_idx],
1189 dst_width>>1,
1190 vf_x_scale,
1191 vf_curr_NSweight_vu,
1192 src_linestride_vu );
1193
1194
1195
1196 // Store the result back to main memory into a destination buffer in YUV format
1197 //---------------------------------------------------------------------------------------------
1198 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
1199
1200 // Perform three DMA transfers to 3 different locations in the main memory!
1201 // dst_width: Pixel width of destination image
1202 // dst_addr: Destination address in main memory
1203 // dst_vu: Counter which is incremented one by one
1204 // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
1205
1206 mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr)
1207 (unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr)
1208 dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution)
1209 STR_BUF+curr_dst_idx, // Tag
1210 0, 0 );
1211
1212 mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr)
1213 (unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
1214 dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution)
1215 STR_BUF+curr_dst_idx, // Tag
1216 0, 0 );
1217
1218 mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr)
1219 (unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
1220 dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution)
1221 STR_BUF+curr_dst_idx, // Tag
1222 0, 0 );
1223 //---------------------------------------------------------------------------------------------
1224
1225
1226 // update for next cycle
1227 curr_src_idx = next_src_idx;
1228 curr_dst_idx = next_dst_idx;
1229
1230 curr_interpl_y_upper = next_interpl_y_upper;
1231 curr_interpl_y_lower = next_interpl_y_lower;
1232 curr_interpl_vu = next_interpl_vu;
1233
1234 vf_curr_NSweight_y_upper = vf_curr_NSweight_y_upper;
1235 vf_curr_NSweight_y_lower = vf_curr_NSweight_y_lower;
1236 vf_curr_NSweight_vu = vf_next_NSweight_vu;
1237
1238 curr_src_y_upper = next_src_y_upper;
1239 curr_src_y_lower = next_src_y_lower;
1240 curr_src_vu = next_src_vu;
1241 }
1242
1243
1244
1245 DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
1246
1247 // scaling
1248 // work line y_upper
1249 bilinear_scale_line_w16( y_plane[curr_src_idx],
1250 scaled_y_plane[curr_src_idx],
1251 dst_width,
1252 vf_x_scale,
1253 vf_curr_NSweight_y_upper,
1254 src_linestride_y );
1255 // work line y_lower
1256 bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
1257 scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
1258 dst_width,
1259 vf_x_scale,
1260 vf_curr_NSweight_y_lower,
1261 src_linestride_y );
1262 // work line v
1263 bilinear_scale_line_w16( v_plane[curr_src_idx],
1264 scaled_v_plane[curr_src_idx],
1265 dst_width>>1,
1266 vf_x_scale,
1267 vf_curr_NSweight_vu,
1268 src_linestride_vu );
1269 // work line u
1270 bilinear_scale_line_w16( u_plane[curr_src_idx],
1271 scaled_u_plane[curr_src_idx],
1272 dst_width>>1,
1273 vf_x_scale,
1274 vf_curr_NSweight_vu,
1275 src_linestride_vu );
1276
1277
1278 // Store the result back to main memory into a destination buffer in YUV format
1279 //---------------------------------------------------------------------------------------------
1280 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
1281
1282 // Perform three DMA transfers to 3 different locations in the main memory!
1283 // dst_width: Pixel width of destination image
1284 // dst_addr: Destination address in main memory
1285 // dst_vu: Counter which is incremented one by one
1286 // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
1287
1288 mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr)
1289 (unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr)
1290 dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution)
1291 STR_BUF+curr_dst_idx, // Tag
1292 0, 0 );
1293
1294 mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr)
1295 (unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
1296 dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution)
1297 STR_BUF+curr_dst_idx, // Tag
1298 0, 0 );
1299
1300 mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr)
1301 (unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
1302 dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution)
1303 STR_BUF+curr_dst_idx, // Tag
1304 0, 0 );
1305
1306 // wait for completion
1307 DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
1308 //---------------------------------------------------------------------------------------------
1309}
1310
1311
1312/*
1313 * bilinear_scale_line_w8()
1314 *
1315 * processes a line of yuv-input, width has to be a multiple of 8
1316 * scaled yuv-output is written to local store buffer
1317 *
1318 * @param src buffer for 2 lines input
1319 * @param dst_ buffer for 1 line output
1320 * @param dst_width the width of the destination line
1321 * @param vf_x_scale a float vector, at each entry is the x_scale-factor
1322 * @param vf_NSweight a float vector, at each position is the weight NORTH/SOUTH for the current line
1323 * @param src_linestride the stride of the srcline
1324 */
1325void bilinear_scale_line_w8( unsigned char* src, unsigned char* dst_, unsigned int dst_width, vector float vf_x_scale, vector float vf_NSweight, unsigned int src_linestride ) {
1326
1327 unsigned char* dst = dst_;
1328
1329 unsigned int dst_x;
1330 for( dst_x=0; dst_x<dst_width; dst_x+=8) {
1331 // address calculation for loading the 4 surrounding pixel of each calculated
1332 // destination pixel
1333 vector unsigned int vui_dst_x_tmp = spu_splats( dst_x );
1334 // lower range->first 4 pixel
1335 // upper range->next 4 pixel
1336 vector unsigned int vui_inc_dst_x_lower_range = { 0, 1, 2, 3 };
1337 vector unsigned int vui_inc_dst_x_upper_range = { 4, 5, 6, 7 };
1338 vector unsigned int vui_dst_x_lower_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_lower_range );
1339 vector unsigned int vui_dst_x_upper_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_upper_range );
1340
1341 // calculate weight EAST-WEST
1342 vector float vf_dst_x_lower_range = spu_convtf( vui_dst_x_lower_range, 0 );
1343 vector float vf_dst_x_upper_range = spu_convtf( vui_dst_x_upper_range, 0 );
1344 vector float vf_src_x_lower_range = spu_mul( vf_dst_x_lower_range, vf_x_scale );
1345 vector float vf_src_x_upper_range = spu_mul( vf_dst_x_upper_range, vf_x_scale );
1346 vector unsigned int vui_interpl_x_lower_range = spu_convtu( vf_src_x_lower_range, 0 );
1347 vector unsigned int vui_interpl_x_upper_range = spu_convtu( vf_src_x_upper_range, 0 );
1348 vector float vf_interpl_x_lower_range = spu_convtf( vui_interpl_x_lower_range, 0 );
1349 vector float vf_interpl_x_upper_range = spu_convtf( vui_interpl_x_upper_range, 0 );
1350 vector float vf_EWweight_lower_range = spu_sub( vf_src_x_lower_range, vf_interpl_x_lower_range );
1351 vector float vf_EWweight_upper_range = spu_sub( vf_src_x_upper_range, vf_interpl_x_upper_range );
1352
1353 // calculate address offset
1354 //
1355 // pixel NORTH WEST
1356 vector unsigned int vui_off_pixelNW_lower_range = vui_interpl_x_lower_range;
1357 vector unsigned int vui_off_pixelNW_upper_range = vui_interpl_x_upper_range;
1358
1359 // pixel NORTH EAST-->(offpixelNW+1)
1360 vector unsigned int vui_add_1 = { 1, 1, 1, 1 };
1361 vector unsigned int vui_off_pixelNE_lower_range = spu_add( vui_off_pixelNW_lower_range, vui_add_1 );
1362 vector unsigned int vui_off_pixelNE_upper_range = spu_add( vui_off_pixelNW_upper_range, vui_add_1 );
1363
1364 // SOUTH-WEST-->(offpixelNW+src_linestride)
1365 vector unsigned int vui_srclinestride = spu_splats( src_linestride );
1366 vector unsigned int vui_off_pixelSW_lower_range = spu_add( vui_srclinestride, vui_off_pixelNW_lower_range );
1367 vector unsigned int vui_off_pixelSW_upper_range = spu_add( vui_srclinestride, vui_off_pixelNW_upper_range );
1368
1369 // SOUTH-EAST-->(offpixelNW+src_linestride+1)
1370 vector unsigned int vui_off_pixelSE_lower_range = spu_add( vui_srclinestride, vui_off_pixelNE_lower_range );
1371 vector unsigned int vui_off_pixelSE_upper_range = spu_add( vui_srclinestride, vui_off_pixelNE_upper_range );
1372
1373 // calculate each address
1374 vector unsigned int vui_src_ls = spu_splats( (unsigned int) src );
1375 vector unsigned int vui_addr_pixelNW_lower_range = spu_add( vui_src_ls, vui_off_pixelNW_lower_range );
1376 vector unsigned int vui_addr_pixelNW_upper_range = spu_add( vui_src_ls, vui_off_pixelNW_upper_range );
1377 vector unsigned int vui_addr_pixelNE_lower_range = spu_add( vui_src_ls, vui_off_pixelNE_lower_range );
1378 vector unsigned int vui_addr_pixelNE_upper_range = spu_add( vui_src_ls, vui_off_pixelNE_upper_range );
1379
1380 vector unsigned int vui_addr_pixelSW_lower_range = spu_add( vui_src_ls, vui_off_pixelSW_lower_range );
1381 vector unsigned int vui_addr_pixelSW_upper_range = spu_add( vui_src_ls, vui_off_pixelSW_upper_range );
1382 vector unsigned int vui_addr_pixelSE_lower_range = spu_add( vui_src_ls, vui_off_pixelSE_lower_range );
1383 vector unsigned int vui_addr_pixelSE_upper_range = spu_add( vui_src_ls, vui_off_pixelSE_upper_range );
1384
1385 // get each pixel
1386 //
1387 // scalar load, afterwards insertion into the right position
1388 // NORTH WEST
1389 vector unsigned char null_vector = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
1390 vector unsigned char vuc_pixel_NW_lower_range = spu_insert(
1391 *((unsigned char*) spu_extract( vui_addr_pixelNW_lower_range, 0 )), null_vector, 3 );
1392 vuc_pixel_NW_lower_range = spu_insert(
1393 *((unsigned char*) spu_extract( vui_addr_pixelNW_lower_range, 1 )),
1394 vuc_pixel_NW_lower_range, 7 );
1395 vuc_pixel_NW_lower_range = spu_insert(
1396 *((unsigned char*) spu_extract( vui_addr_pixelNW_lower_range, 2 )),
1397 vuc_pixel_NW_lower_range, 11 );
1398 vuc_pixel_NW_lower_range = spu_insert(
1399 *((unsigned char*) spu_extract( vui_addr_pixelNW_lower_range, 3 )),
1400 vuc_pixel_NW_lower_range, 15 );
1401
1402 vector unsigned char vuc_pixel_NW_upper_range = spu_insert(
1403 *((unsigned char*) spu_extract( vui_addr_pixelNW_upper_range, 0 )), null_vector, 3 );
1404 vuc_pixel_NW_upper_range = spu_insert(
1405 *((unsigned char*) spu_extract( vui_addr_pixelNW_upper_range, 1 )),
1406 vuc_pixel_NW_upper_range, 7 );
1407 vuc_pixel_NW_upper_range = spu_insert(
1408 *((unsigned char*) spu_extract( vui_addr_pixelNW_upper_range, 2 )),
1409 vuc_pixel_NW_upper_range, 11 );
1410 vuc_pixel_NW_upper_range = spu_insert(
1411 *((unsigned char*) spu_extract( vui_addr_pixelNW_upper_range, 3 )),
1412 vuc_pixel_NW_upper_range, 15 );
1413
1414 // NORTH EAST
1415 vector unsigned char vuc_pixel_NE_lower_range = spu_insert(
1416 *((unsigned char*) spu_extract( vui_addr_pixelNE_lower_range, 0 )), null_vector, 3 );
1417 vuc_pixel_NE_lower_range = spu_insert(
1418 *((unsigned char*) spu_extract( vui_addr_pixelNE_lower_range, 1 )),
1419 vuc_pixel_NE_lower_range, 7 );
1420 vuc_pixel_NE_lower_range = spu_insert(
1421 *((unsigned char*) spu_extract( vui_addr_pixelNE_lower_range, 2 )),
1422 vuc_pixel_NE_lower_range, 11 );
1423 vuc_pixel_NE_lower_range = spu_insert(
1424 *((unsigned char*) spu_extract( vui_addr_pixelNE_lower_range, 3 )),
1425 vuc_pixel_NE_lower_range, 15 );
1426
1427 vector unsigned char vuc_pixel_NE_upper_range = spu_insert(
1428 *((unsigned char*) spu_extract( vui_addr_pixelNE_upper_range, 0 )), null_vector, 3 );
1429 vuc_pixel_NE_upper_range = spu_insert(
1430 *((unsigned char*) spu_extract( vui_addr_pixelNE_upper_range, 1 )),
1431 vuc_pixel_NE_upper_range, 7 );
1432 vuc_pixel_NE_upper_range = spu_insert(
1433 *((unsigned char*) spu_extract( vui_addr_pixelNE_upper_range, 2 )),
1434 vuc_pixel_NE_upper_range, 11 );
1435 vuc_pixel_NE_upper_range = spu_insert(
1436 *((unsigned char*) spu_extract( vui_addr_pixelNE_upper_range, 3 )),
1437 vuc_pixel_NE_upper_range, 15 );
1438
1439
1440 // SOUTH WEST
1441 vector unsigned char vuc_pixel_SW_lower_range = spu_insert(
1442 *((unsigned char*) spu_extract( vui_addr_pixelSW_lower_range, 0 )), null_vector, 3 );
1443 vuc_pixel_SW_lower_range = spu_insert(
1444 *((unsigned char*) spu_extract( vui_addr_pixelSW_lower_range, 1 )),
1445 vuc_pixel_SW_lower_range, 7 );
1446 vuc_pixel_SW_lower_range = spu_insert(
1447 *((unsigned char*) spu_extract( vui_addr_pixelSW_lower_range, 2 )),
1448 vuc_pixel_SW_lower_range, 11 );
1449 vuc_pixel_SW_lower_range = spu_insert(
1450 *((unsigned char*) spu_extract( vui_addr_pixelSW_lower_range, 3 )),
1451 vuc_pixel_SW_lower_range, 15 );
1452
1453 vector unsigned char vuc_pixel_SW_upper_range = spu_insert(
1454 *((unsigned char*) spu_extract( vui_addr_pixelSW_upper_range, 0 )), null_vector, 3 );
1455 vuc_pixel_SW_upper_range = spu_insert(
1456 *((unsigned char*) spu_extract( vui_addr_pixelSW_upper_range, 1 )),
1457 vuc_pixel_SW_upper_range, 7 );
1458 vuc_pixel_SW_upper_range = spu_insert(
1459 *((unsigned char*) spu_extract( vui_addr_pixelSW_upper_range, 2 )),
1460 vuc_pixel_SW_upper_range, 11 );
1461 vuc_pixel_SW_upper_range = spu_insert(
1462 *((unsigned char*) spu_extract( vui_addr_pixelSW_upper_range, 3 )),
1463 vuc_pixel_SW_upper_range, 15 );
1464
1465 // SOUTH EAST
1466 vector unsigned char vuc_pixel_SE_lower_range = spu_insert(
1467 *((unsigned char*) spu_extract( vui_addr_pixelSE_lower_range, 0 )), null_vector, 3 );
1468 vuc_pixel_SE_lower_range = spu_insert(
1469 *((unsigned char*) spu_extract( vui_addr_pixelSE_lower_range, 1 )),
1470 vuc_pixel_SE_lower_range, 7 );
1471 vuc_pixel_SE_lower_range = spu_insert(
1472 *((unsigned char*) spu_extract( vui_addr_pixelSE_lower_range, 2 )),
1473 vuc_pixel_SE_lower_range, 11 );
1474 vuc_pixel_SE_lower_range = spu_insert(
1475 *((unsigned char*) spu_extract( vui_addr_pixelSE_lower_range, 3 )),
1476 vuc_pixel_SE_lower_range, 15 );
1477
1478 vector unsigned char vuc_pixel_SE_upper_range = spu_insert(
1479 *((unsigned char*) spu_extract( vui_addr_pixelSE_upper_range, 0 )), null_vector, 3 );
1480 vuc_pixel_SE_upper_range = spu_insert(
1481 *((unsigned char*) spu_extract( vui_addr_pixelSE_upper_range, 1 )),
1482 vuc_pixel_SE_upper_range, 7 );
1483 vuc_pixel_SE_upper_range = spu_insert(
1484 *((unsigned char*) spu_extract( vui_addr_pixelSE_upper_range, 2 )),
1485 vuc_pixel_SE_upper_range, 11 );
1486 vuc_pixel_SE_upper_range = spu_insert(
1487 *((unsigned char*) spu_extract( vui_addr_pixelSE_upper_range, 3 )),
1488 vuc_pixel_SE_upper_range, 15 );
1489
1490
1491 // convert to float
1492 vector float vf_pixel_NW_lower_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_lower_range, 0 );
1493 vector float vf_pixel_NW_upper_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_upper_range, 0 );
1494
1495 vector float vf_pixel_SW_lower_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_lower_range, 0 );
1496 vector float vf_pixel_SW_upper_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_upper_range, 0 );
1497
1498 vector float vf_pixel_NE_lower_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_lower_range, 0 );
1499 vector float vf_pixel_NE_upper_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_upper_range, 0 );
1500
1501 vector float vf_pixel_SE_lower_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_lower_range, 0 );
1502 vector float vf_pixel_SE_upper_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_upper_range, 0 );
1503
1504
1505
1506 // first linear interpolation: EWtop
1507 // EWtop = NW + EWweight*(NE-NW)
1508 //
1509 // lower range
1510 vector float vf_EWtop_lower_range_tmp = spu_sub( vf_pixel_NE_lower_range, vf_pixel_NW_lower_range );
1511 vector float vf_EWtop_lower_range = spu_madd( vf_EWweight_lower_range,
1512 vf_EWtop_lower_range_tmp,
1513 vf_pixel_NW_lower_range );
1514
1515 // upper range
1516 vector float vf_EWtop_upper_range_tmp = spu_sub( vf_pixel_NE_upper_range, vf_pixel_NW_upper_range );
1517 vector float vf_EWtop_upper_range = spu_madd( vf_EWweight_upper_range,
1518 vf_EWtop_upper_range_tmp,
1519 vf_pixel_NW_upper_range );
1520
1521
1522
1523 // second linear interpolation: EWbottom
1524 // EWbottom = SW + EWweight*(SE-SW)
1525 //
1526 // lower range
1527 vector float vf_EWbottom_lower_range_tmp = spu_sub( vf_pixel_SE_lower_range, vf_pixel_SW_lower_range );
1528 vector float vf_EWbottom_lower_range = spu_madd( vf_EWweight_lower_range,
1529 vf_EWbottom_lower_range_tmp,
1530 vf_pixel_SW_lower_range );
1531
1532 // upper range
1533 vector float vf_EWbottom_upper_range_tmp = spu_sub( vf_pixel_SE_upper_range, vf_pixel_SW_upper_range );
1534 vector float vf_EWbottom_upper_range = spu_madd( vf_EWweight_upper_range,
1535 vf_EWbottom_upper_range_tmp,
1536 vf_pixel_SW_upper_range );
1537
1538
1539
1540 // third linear interpolation: the bilinear interpolated value
1541 // result = EWtop + NSweight*(EWbottom-EWtop);
1542 //
1543 // lower range
1544 vector float vf_result_lower_range_tmp = spu_sub( vf_EWbottom_lower_range, vf_EWtop_lower_range );
1545 vector float vf_result_lower_range = spu_madd( vf_NSweight,
1546 vf_result_lower_range_tmp,
1547 vf_EWtop_lower_range );
1548
1549 // upper range
1550 vector float vf_result_upper_range_tmp = spu_sub( vf_EWbottom_upper_range, vf_EWtop_upper_range );
1551 vector float vf_result_upper_range = spu_madd( vf_NSweight,
1552 vf_result_upper_range_tmp,
1553 vf_EWtop_upper_range );
1554
1555
1556 // convert back: using saturated arithmetic
1557 vector unsigned int vui_result_lower_range = vfloat_to_vuint( vf_result_lower_range );
1558 vector unsigned int vui_result_upper_range = vfloat_to_vuint( vf_result_upper_range );
1559
1560 // merge results->lower,upper
1561 vector unsigned char vuc_mask_merge_result = { 0x03, 0x07, 0x0B, 0x0F,
1562 0x13, 0x17, 0x1B, 0x1F,
1563 0x00, 0x00, 0x00, 0x00,
1564 0x00, 0x00, 0x00, 0x00 };
1565
1566 vector unsigned char vuc_result = spu_shuffle( (vector unsigned char) vui_result_lower_range,
1567 (vector unsigned char) vui_result_upper_range,
1568 vuc_mask_merge_result );
1569
1570 // partial storing
1571 vector unsigned char vuc_mask_out = { 0x00, 0x00, 0x00, 0x00,
1572 0x00, 0x00, 0x00, 0x00,
1573 0xFF, 0xFF, 0xFF, 0xFF,
1574 0xFF, 0xFF, 0xFF, 0xFF };
1575
1576
1577 // get currently stored data
1578 vector unsigned char vuc_orig = *((vector unsigned char*)dst);
1579
1580 // clear currently stored data
1581 vuc_orig = spu_and( vuc_orig,
1582 spu_rlqwbyte( vuc_mask_out, ((unsigned int)dst)&0x0F) );
1583
1584 // rotate result according to storing address
1585 vuc_result = spu_rlqwbyte( vuc_result, ((unsigned int)dst)&0x0F );
1586
1587 // store result
1588 *((vector unsigned char*)dst) = spu_or( vuc_result,
1589 vuc_orig );
1590 dst += 8;
1591 }
1592}
1593
1594
1595/*
1596 * bilinear_scale_line_w16()
1597 *
1598 * processes a line of yuv-input, width has to be a multiple of 16
1599 * scaled yuv-output is written to local store buffer
1600 *
1601 * @param src buffer for 2 lines input
1602 * @param dst_ buffer for 1 line output
1603 * @param dst_width the width of the destination line
1604 * @param vf_x_scale a float vector, at each entry is the x_scale-factor
1605 * @param vf_NSweight a float vector, at each position is the weight NORTH/SOUTH for the current line
1606 * @param src_linestride the stride of the srcline
1607 */
1608void bilinear_scale_line_w16( unsigned char* src, unsigned char* dst_, unsigned int dst_width, vector float vf_x_scale, vector float vf_NSweight, unsigned int src_linestride ) {
1609
1610 unsigned char* dst = dst_;
1611
1612 unsigned int dst_x;
1613 for( dst_x=0; dst_x<dst_width; dst_x+=16) {
1614 // address calculation for loading the 4 surrounding pixel of each calculated
1615 // destination pixel
1616 vector unsigned int vui_dst_x_tmp = spu_splats( dst_x );
1617 // parallelised processing
1618 // first range->pixel 1 2 3 4
1619 // second range->pixel 5 6 7 8
1620 // third range->pixel 9 10 11 12
1621 // fourth range->pixel 13 14 15 16
1622 vector unsigned int vui_inc_dst_x_first_range = { 0, 1, 2, 3 };
1623 vector unsigned int vui_inc_dst_x_second_range = { 4, 5, 6, 7 };
1624 vector unsigned int vui_inc_dst_x_third_range = { 8, 9, 10, 11 };
1625 vector unsigned int vui_inc_dst_x_fourth_range = { 12, 13, 14, 15 };
1626 vector unsigned int vui_dst_x_first_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_first_range );
1627 vector unsigned int vui_dst_x_second_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_second_range );
1628 vector unsigned int vui_dst_x_third_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_third_range );
1629 vector unsigned int vui_dst_x_fourth_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_fourth_range );
1630
1631 // calculate weight EAST-WEST
1632 vector float vf_dst_x_first_range = spu_convtf( vui_dst_x_first_range, 0 );
1633 vector float vf_dst_x_second_range = spu_convtf( vui_dst_x_second_range, 0 );
1634 vector float vf_dst_x_third_range = spu_convtf( vui_dst_x_third_range, 0 );
1635 vector float vf_dst_x_fourth_range = spu_convtf( vui_dst_x_fourth_range, 0 );
1636 vector float vf_src_x_first_range = spu_mul( vf_dst_x_first_range, vf_x_scale );
1637 vector float vf_src_x_second_range = spu_mul( vf_dst_x_second_range, vf_x_scale );
1638 vector float vf_src_x_third_range = spu_mul( vf_dst_x_third_range, vf_x_scale );
1639 vector float vf_src_x_fourth_range = spu_mul( vf_dst_x_fourth_range, vf_x_scale );
1640 vector unsigned int vui_interpl_x_first_range = spu_convtu( vf_src_x_first_range, 0 );
1641 vector unsigned int vui_interpl_x_second_range = spu_convtu( vf_src_x_second_range, 0 );
1642 vector unsigned int vui_interpl_x_third_range = spu_convtu( vf_src_x_third_range, 0 );
1643 vector unsigned int vui_interpl_x_fourth_range = spu_convtu( vf_src_x_fourth_range, 0 );
1644 vector float vf_interpl_x_first_range = spu_convtf( vui_interpl_x_first_range, 0 );
1645 vector float vf_interpl_x_second_range = spu_convtf( vui_interpl_x_second_range, 0 );
1646 vector float vf_interpl_x_third_range = spu_convtf( vui_interpl_x_third_range, 0 );
1647 vector float vf_interpl_x_fourth_range = spu_convtf( vui_interpl_x_fourth_range, 0 );
1648 vector float vf_EWweight_first_range = spu_sub( vf_src_x_first_range, vf_interpl_x_first_range );
1649 vector float vf_EWweight_second_range = spu_sub( vf_src_x_second_range, vf_interpl_x_second_range );
1650 vector float vf_EWweight_third_range = spu_sub( vf_src_x_third_range, vf_interpl_x_third_range );
1651 vector float vf_EWweight_fourth_range = spu_sub( vf_src_x_fourth_range, vf_interpl_x_fourth_range );
1652
1653 // calculate address offset
1654 //
1655 // pixel NORTH WEST
1656 vector unsigned int vui_off_pixelNW_first_range = vui_interpl_x_first_range;
1657 vector unsigned int vui_off_pixelNW_second_range = vui_interpl_x_second_range;
1658 vector unsigned int vui_off_pixelNW_third_range = vui_interpl_x_third_range;
1659 vector unsigned int vui_off_pixelNW_fourth_range = vui_interpl_x_fourth_range;
1660
1661 // pixel NORTH EAST-->(offpixelNW+1)
1662 vector unsigned int vui_add_1 = { 1, 1, 1, 1 };
1663 vector unsigned int vui_off_pixelNE_first_range = spu_add( vui_off_pixelNW_first_range, vui_add_1 );
1664 vector unsigned int vui_off_pixelNE_second_range = spu_add( vui_off_pixelNW_second_range, vui_add_1 );
1665 vector unsigned int vui_off_pixelNE_third_range = spu_add( vui_off_pixelNW_third_range, vui_add_1 );
1666 vector unsigned int vui_off_pixelNE_fourth_range = spu_add( vui_off_pixelNW_fourth_range, vui_add_1 );
1667
1668 // SOUTH-WEST-->(offpixelNW+src_linestride)
1669 vector unsigned int vui_srclinestride = spu_splats( src_linestride );
1670 vector unsigned int vui_off_pixelSW_first_range = spu_add( vui_srclinestride, vui_off_pixelNW_first_range );
1671 vector unsigned int vui_off_pixelSW_second_range = spu_add( vui_srclinestride, vui_off_pixelNW_second_range );
1672 vector unsigned int vui_off_pixelSW_third_range = spu_add( vui_srclinestride, vui_off_pixelNW_third_range );
1673 vector unsigned int vui_off_pixelSW_fourth_range = spu_add( vui_srclinestride, vui_off_pixelNW_fourth_range );
1674
1675 // SOUTH-EAST-->(offpixelNW+src_linestride+1)
1676 vector unsigned int vui_off_pixelSE_first_range = spu_add( vui_srclinestride, vui_off_pixelNE_first_range );
1677 vector unsigned int vui_off_pixelSE_second_range = spu_add( vui_srclinestride, vui_off_pixelNE_second_range );
1678 vector unsigned int vui_off_pixelSE_third_range = spu_add( vui_srclinestride, vui_off_pixelNE_third_range );
1679 vector unsigned int vui_off_pixelSE_fourth_range = spu_add( vui_srclinestride, vui_off_pixelNE_fourth_range );
1680
1681 // calculate each address
1682 vector unsigned int vui_src_ls = spu_splats( (unsigned int) src );
1683 vector unsigned int vui_addr_pixelNW_first_range = spu_add( vui_src_ls, vui_off_pixelNW_first_range );
1684 vector unsigned int vui_addr_pixelNW_second_range = spu_add( vui_src_ls, vui_off_pixelNW_second_range );
1685 vector unsigned int vui_addr_pixelNW_third_range = spu_add( vui_src_ls, vui_off_pixelNW_third_range );
1686 vector unsigned int vui_addr_pixelNW_fourth_range = spu_add( vui_src_ls, vui_off_pixelNW_fourth_range );
1687
1688 vector unsigned int vui_addr_pixelNE_first_range = spu_add( vui_src_ls, vui_off_pixelNE_first_range );
1689 vector unsigned int vui_addr_pixelNE_second_range = spu_add( vui_src_ls, vui_off_pixelNE_second_range );
1690 vector unsigned int vui_addr_pixelNE_third_range = spu_add( vui_src_ls, vui_off_pixelNE_third_range );
1691 vector unsigned int vui_addr_pixelNE_fourth_range = spu_add( vui_src_ls, vui_off_pixelNE_fourth_range );
1692
1693 vector unsigned int vui_addr_pixelSW_first_range = spu_add( vui_src_ls, vui_off_pixelSW_first_range );
1694 vector unsigned int vui_addr_pixelSW_second_range = spu_add( vui_src_ls, vui_off_pixelSW_second_range );
1695 vector unsigned int vui_addr_pixelSW_third_range = spu_add( vui_src_ls, vui_off_pixelSW_third_range );
1696 vector unsigned int vui_addr_pixelSW_fourth_range = spu_add( vui_src_ls, vui_off_pixelSW_fourth_range );
1697
1698 vector unsigned int vui_addr_pixelSE_first_range = spu_add( vui_src_ls, vui_off_pixelSE_first_range );
1699 vector unsigned int vui_addr_pixelSE_second_range = spu_add( vui_src_ls, vui_off_pixelSE_second_range );
1700 vector unsigned int vui_addr_pixelSE_third_range = spu_add( vui_src_ls, vui_off_pixelSE_third_range );
1701 vector unsigned int vui_addr_pixelSE_fourth_range = spu_add( vui_src_ls, vui_off_pixelSE_fourth_range );
1702
1703
1704 // get each pixel
1705 //
1706 // scalar load, afterwards insertion into the right position
1707 // NORTH WEST
1708 // first range
1709 vector unsigned char null_vector = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
1710 vector unsigned char vuc_pixel_NW_first_range = spu_insert(
1711 *((unsigned char*) spu_extract( vui_addr_pixelNW_first_range, 0 )), null_vector, 3 );
1712 vuc_pixel_NW_first_range = spu_insert(
1713 *((unsigned char*) spu_extract( vui_addr_pixelNW_first_range, 1 )),
1714 vuc_pixel_NW_first_range, 7 );
1715 vuc_pixel_NW_first_range = spu_insert(
1716 *((unsigned char*) spu_extract( vui_addr_pixelNW_first_range, 2 )),
1717 vuc_pixel_NW_first_range, 11 );
1718 vuc_pixel_NW_first_range = spu_insert(
1719 *((unsigned char*) spu_extract( vui_addr_pixelNW_first_range, 3 )),
1720 vuc_pixel_NW_first_range, 15 );
1721 // second range
1722 vector unsigned char vuc_pixel_NW_second_range = spu_insert(
1723 *((unsigned char*) spu_extract( vui_addr_pixelNW_second_range, 0 )), null_vector, 3 );
1724 vuc_pixel_NW_second_range = spu_insert(
1725 *((unsigned char*) spu_extract( vui_addr_pixelNW_second_range, 1 )),
1726 vuc_pixel_NW_second_range, 7 );
1727 vuc_pixel_NW_second_range = spu_insert(
1728 *((unsigned char*) spu_extract( vui_addr_pixelNW_second_range, 2 )),
1729 vuc_pixel_NW_second_range, 11 );
1730 vuc_pixel_NW_second_range = spu_insert(
1731 *((unsigned char*) spu_extract( vui_addr_pixelNW_second_range, 3 )),
1732 vuc_pixel_NW_second_range, 15 );
1733 // third range
1734 vector unsigned char vuc_pixel_NW_third_range = spu_insert(
1735 *((unsigned char*) spu_extract( vui_addr_pixelNW_third_range, 0 )), null_vector, 3 );
1736 vuc_pixel_NW_third_range = spu_insert(
1737 *((unsigned char*) spu_extract( vui_addr_pixelNW_third_range, 1 )),
1738 vuc_pixel_NW_third_range, 7 );
1739 vuc_pixel_NW_third_range = spu_insert(
1740 *((unsigned char*) spu_extract( vui_addr_pixelNW_third_range, 2 )),
1741 vuc_pixel_NW_third_range, 11 );
1742 vuc_pixel_NW_third_range = spu_insert(
1743 *((unsigned char*) spu_extract( vui_addr_pixelNW_third_range, 3 )),
1744 vuc_pixel_NW_third_range, 15 );
1745 // fourth range
1746 vector unsigned char vuc_pixel_NW_fourth_range = spu_insert(
1747 *((unsigned char*) spu_extract( vui_addr_pixelNW_fourth_range, 0 )), null_vector, 3 );
1748 vuc_pixel_NW_fourth_range = spu_insert(
1749 *((unsigned char*) spu_extract( vui_addr_pixelNW_fourth_range, 1 )),
1750 vuc_pixel_NW_fourth_range, 7 );
1751 vuc_pixel_NW_fourth_range = spu_insert(
1752 *((unsigned char*) spu_extract( vui_addr_pixelNW_fourth_range, 2 )),
1753 vuc_pixel_NW_fourth_range, 11 );
1754 vuc_pixel_NW_fourth_range = spu_insert(
1755 *((unsigned char*) spu_extract( vui_addr_pixelNW_fourth_range, 3 )),
1756 vuc_pixel_NW_fourth_range, 15 );
1757
1758 // NORTH EAST
1759 // first range
1760 vector unsigned char vuc_pixel_NE_first_range = spu_insert(
1761 *((unsigned char*) spu_extract( vui_addr_pixelNE_first_range, 0 )), null_vector, 3 );
1762 vuc_pixel_NE_first_range = spu_insert(
1763 *((unsigned char*) spu_extract( vui_addr_pixelNE_first_range, 1 )),
1764 vuc_pixel_NE_first_range, 7 );
1765 vuc_pixel_NE_first_range = spu_insert(
1766 *((unsigned char*) spu_extract( vui_addr_pixelNE_first_range, 2 )),
1767 vuc_pixel_NE_first_range, 11 );
1768 vuc_pixel_NE_first_range = spu_insert(
1769 *((unsigned char*) spu_extract( vui_addr_pixelNE_first_range, 3 )),
1770 vuc_pixel_NE_first_range, 15 );
1771 // second range
1772 vector unsigned char vuc_pixel_NE_second_range = spu_insert(
1773 *((unsigned char*) spu_extract( vui_addr_pixelNE_second_range, 0 )), null_vector, 3 );
1774 vuc_pixel_NE_second_range = spu_insert(
1775 *((unsigned char*) spu_extract( vui_addr_pixelNE_second_range, 1 )),
1776 vuc_pixel_NE_second_range, 7 );
1777 vuc_pixel_NE_second_range = spu_insert(
1778 *((unsigned char*) spu_extract( vui_addr_pixelNE_second_range, 2 )),
1779 vuc_pixel_NE_second_range, 11 );
1780 vuc_pixel_NE_second_range = spu_insert(
1781 *((unsigned char*) spu_extract( vui_addr_pixelNE_second_range, 3 )),
1782 vuc_pixel_NE_second_range, 15 );
1783 // third range
1784 vector unsigned char vuc_pixel_NE_third_range = spu_insert(
1785 *((unsigned char*) spu_extract( vui_addr_pixelNE_third_range, 0 )), null_vector, 3 );
1786 vuc_pixel_NE_third_range = spu_insert(
1787 *((unsigned char*) spu_extract( vui_addr_pixelNE_third_range, 1 )),
1788 vuc_pixel_NE_third_range, 7 );
1789 vuc_pixel_NE_third_range = spu_insert(
1790 *((unsigned char*) spu_extract( vui_addr_pixelNE_third_range, 2 )),
1791 vuc_pixel_NE_third_range, 11 );
1792 vuc_pixel_NE_third_range = spu_insert(
1793 *((unsigned char*) spu_extract( vui_addr_pixelNE_third_range, 3 )),
1794 vuc_pixel_NE_third_range, 15 );
1795 // fourth range
1796 vector unsigned char vuc_pixel_NE_fourth_range = spu_insert(
1797 *((unsigned char*) spu_extract( vui_addr_pixelNE_fourth_range, 0 )), null_vector, 3 );
1798 vuc_pixel_NE_fourth_range = spu_insert(
1799 *((unsigned char*) spu_extract( vui_addr_pixelNE_fourth_range, 1 )),
1800 vuc_pixel_NE_fourth_range, 7 );
1801 vuc_pixel_NE_fourth_range = spu_insert(
1802 *((unsigned char*) spu_extract( vui_addr_pixelNE_fourth_range, 2 )),
1803 vuc_pixel_NE_fourth_range, 11 );
1804 vuc_pixel_NE_fourth_range = spu_insert(
1805 *((unsigned char*) spu_extract( vui_addr_pixelNE_fourth_range, 3 )),
1806 vuc_pixel_NE_fourth_range, 15 );
1807
1808 // SOUTH WEST
1809 // first range
1810 vector unsigned char vuc_pixel_SW_first_range = spu_insert(
1811 *((unsigned char*) spu_extract( vui_addr_pixelSW_first_range, 0 )), null_vector, 3 );
1812 vuc_pixel_SW_first_range = spu_insert(
1813 *((unsigned char*) spu_extract( vui_addr_pixelSW_first_range, 1 )),
1814 vuc_pixel_SW_first_range, 7 );
1815 vuc_pixel_SW_first_range = spu_insert(
1816 *((unsigned char*) spu_extract( vui_addr_pixelSW_first_range, 2 )),
1817 vuc_pixel_SW_first_range, 11 );
1818 vuc_pixel_SW_first_range = spu_insert(
1819 *((unsigned char*) spu_extract( vui_addr_pixelSW_first_range, 3 )),
1820 vuc_pixel_SW_first_range, 15 );
1821 // second range
1822 vector unsigned char vuc_pixel_SW_second_range = spu_insert(
1823 *((unsigned char*) spu_extract( vui_addr_pixelSW_second_range, 0 )), null_vector, 3 );
1824 vuc_pixel_SW_second_range = spu_insert(
1825 *((unsigned char*) spu_extract( vui_addr_pixelSW_second_range, 1 )),
1826 vuc_pixel_SW_second_range, 7 );
1827 vuc_pixel_SW_second_range = spu_insert(
1828 *((unsigned char*) spu_extract( vui_addr_pixelSW_second_range, 2 )),
1829 vuc_pixel_SW_second_range, 11 );
1830 vuc_pixel_SW_second_range = spu_insert(
1831 *((unsigned char*) spu_extract( vui_addr_pixelSW_second_range, 3 )),
1832 vuc_pixel_SW_second_range, 15 );
1833 // third range
1834 vector unsigned char vuc_pixel_SW_third_range = spu_insert(
1835 *((unsigned char*) spu_extract( vui_addr_pixelSW_third_range, 0 )), null_vector, 3 );
1836 vuc_pixel_SW_third_range = spu_insert(
1837 *((unsigned char*) spu_extract( vui_addr_pixelSW_third_range, 1 )),
1838 vuc_pixel_SW_third_range, 7 );
1839 vuc_pixel_SW_third_range = spu_insert(
1840 *((unsigned char*) spu_extract( vui_addr_pixelSW_third_range, 2 )),
1841 vuc_pixel_SW_third_range, 11 );
1842 vuc_pixel_SW_third_range = spu_insert(
1843 *((unsigned char*) spu_extract( vui_addr_pixelSW_third_range, 3 )),
1844 vuc_pixel_SW_third_range, 15 );
1845 // fourth range
1846 vector unsigned char vuc_pixel_SW_fourth_range = spu_insert(
1847 *((unsigned char*) spu_extract( vui_addr_pixelSW_fourth_range, 0 )), null_vector, 3 );
1848 vuc_pixel_SW_fourth_range = spu_insert(
1849 *((unsigned char*) spu_extract( vui_addr_pixelSW_fourth_range, 1 )),
1850 vuc_pixel_SW_fourth_range, 7 );
1851 vuc_pixel_SW_fourth_range = spu_insert(
1852 *((unsigned char*) spu_extract( vui_addr_pixelSW_fourth_range, 2 )),
1853 vuc_pixel_SW_fourth_range, 11 );
1854 vuc_pixel_SW_fourth_range = spu_insert(
1855 *((unsigned char*) spu_extract( vui_addr_pixelSW_fourth_range, 3 )),
1856 vuc_pixel_SW_fourth_range, 15 );
1857
1858 // NORTH EAST
1859 // first range
1860 vector unsigned char vuc_pixel_SE_first_range = spu_insert(
1861 *((unsigned char*) spu_extract( vui_addr_pixelSE_first_range, 0 )), null_vector, 3 );
1862 vuc_pixel_SE_first_range = spu_insert(
1863 *((unsigned char*) spu_extract( vui_addr_pixelSE_first_range, 1 )),
1864 vuc_pixel_SE_first_range, 7 );
1865 vuc_pixel_SE_first_range = spu_insert(
1866 *((unsigned char*) spu_extract( vui_addr_pixelSE_first_range, 2 )),
1867 vuc_pixel_SE_first_range, 11 );
1868 vuc_pixel_SE_first_range = spu_insert(
1869 *((unsigned char*) spu_extract( vui_addr_pixelSE_first_range, 3 )),
1870 vuc_pixel_SE_first_range, 15 );
1871 // second range
1872 vector unsigned char vuc_pixel_SE_second_range = spu_insert(
1873 *((unsigned char*) spu_extract( vui_addr_pixelSE_second_range, 0 )), null_vector, 3 );
1874 vuc_pixel_SE_second_range = spu_insert(
1875 *((unsigned char*) spu_extract( vui_addr_pixelSE_second_range, 1 )),
1876 vuc_pixel_SE_second_range, 7 );
1877 vuc_pixel_SE_second_range = spu_insert(
1878 *((unsigned char*) spu_extract( vui_addr_pixelSE_second_range, 2 )),
1879 vuc_pixel_SE_second_range, 11 );
1880 vuc_pixel_SE_second_range = spu_insert(
1881 *((unsigned char*) spu_extract( vui_addr_pixelSE_second_range, 3 )),
1882 vuc_pixel_SE_second_range, 15 );
1883 // third range
1884 vector unsigned char vuc_pixel_SE_third_range = spu_insert(
1885 *((unsigned char*) spu_extract( vui_addr_pixelSE_third_range, 0 )), null_vector, 3 );
1886 vuc_pixel_SE_third_range = spu_insert(
1887 *((unsigned char*) spu_extract( vui_addr_pixelSE_third_range, 1 )),
1888 vuc_pixel_SE_third_range, 7 );
1889 vuc_pixel_SE_third_range = spu_insert(
1890 *((unsigned char*) spu_extract( vui_addr_pixelSE_third_range, 2 )),
1891 vuc_pixel_SE_third_range, 11 );
1892 vuc_pixel_SE_third_range = spu_insert(
1893 *((unsigned char*) spu_extract( vui_addr_pixelSE_third_range, 3 )),
1894 vuc_pixel_SE_third_range, 15 );
1895 // fourth range
1896 vector unsigned char vuc_pixel_SE_fourth_range = spu_insert(
1897 *((unsigned char*) spu_extract( vui_addr_pixelSE_fourth_range, 0 )), null_vector, 3 );
1898 vuc_pixel_SE_fourth_range = spu_insert(
1899 *((unsigned char*) spu_extract( vui_addr_pixelSE_fourth_range, 1 )),
1900 vuc_pixel_SE_fourth_range, 7 );
1901 vuc_pixel_SE_fourth_range = spu_insert(
1902 *((unsigned char*) spu_extract( vui_addr_pixelSE_fourth_range, 2 )),
1903 vuc_pixel_SE_fourth_range, 11 );
1904 vuc_pixel_SE_fourth_range = spu_insert(
1905 *((unsigned char*) spu_extract( vui_addr_pixelSE_fourth_range, 3 )),
1906 vuc_pixel_SE_fourth_range, 15 );
1907
1908
1909
1910 // convert to float
1911 vector float vf_pixel_NW_first_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_first_range, 0 );
1912 vector float vf_pixel_NW_second_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_second_range, 0 );
1913 vector float vf_pixel_NW_third_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_third_range, 0 );
1914 vector float vf_pixel_NW_fourth_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_fourth_range, 0 );
1915
1916 vector float vf_pixel_NE_first_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_first_range, 0 );
1917 vector float vf_pixel_NE_second_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_second_range, 0 );
1918 vector float vf_pixel_NE_third_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_third_range, 0 );
1919 vector float vf_pixel_NE_fourth_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_fourth_range, 0 );
1920
1921 vector float vf_pixel_SW_first_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_first_range, 0 );
1922 vector float vf_pixel_SW_second_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_second_range, 0 );
1923 vector float vf_pixel_SW_third_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_third_range, 0 );
1924 vector float vf_pixel_SW_fourth_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_fourth_range, 0 );
1925
1926 vector float vf_pixel_SE_first_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_first_range, 0 );
1927 vector float vf_pixel_SE_second_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_second_range, 0 );
1928 vector float vf_pixel_SE_third_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_third_range, 0 );
1929 vector float vf_pixel_SE_fourth_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_fourth_range, 0 );
1930
1931 // first linear interpolation: EWtop
1932 // EWtop = NW + EWweight*(NE-NW)
1933 //
1934 // first range
1935 vector float vf_EWtop_first_range_tmp = spu_sub( vf_pixel_NE_first_range, vf_pixel_NW_first_range );
1936 vector float vf_EWtop_first_range = spu_madd( vf_EWweight_first_range,
1937 vf_EWtop_first_range_tmp,
1938 vf_pixel_NW_first_range );
1939
1940 // second range
1941 vector float vf_EWtop_second_range_tmp = spu_sub( vf_pixel_NE_second_range, vf_pixel_NW_second_range );
1942 vector float vf_EWtop_second_range = spu_madd( vf_EWweight_second_range,
1943 vf_EWtop_second_range_tmp,
1944 vf_pixel_NW_second_range );
1945
1946 // third range
1947 vector float vf_EWtop_third_range_tmp = spu_sub( vf_pixel_NE_third_range, vf_pixel_NW_third_range );
1948 vector float vf_EWtop_third_range = spu_madd( vf_EWweight_third_range,
1949 vf_EWtop_third_range_tmp,
1950 vf_pixel_NW_third_range );
1951
1952 // fourth range
1953 vector float vf_EWtop_fourth_range_tmp = spu_sub( vf_pixel_NE_fourth_range, vf_pixel_NW_fourth_range );
1954 vector float vf_EWtop_fourth_range = spu_madd( vf_EWweight_fourth_range,
1955 vf_EWtop_fourth_range_tmp,
1956 vf_pixel_NW_fourth_range );
1957
1958
1959
1960 // second linear interpolation: EWbottom
1961 // EWbottom = SW + EWweight*(SE-SW)
1962 //
1963 // first range
1964 vector float vf_EWbottom_first_range_tmp = spu_sub( vf_pixel_SE_first_range, vf_pixel_SW_first_range );
1965 vector float vf_EWbottom_first_range = spu_madd( vf_EWweight_first_range,
1966 vf_EWbottom_first_range_tmp,
1967 vf_pixel_SW_first_range );
1968
1969 // second range
1970 vector float vf_EWbottom_second_range_tmp = spu_sub( vf_pixel_SE_second_range, vf_pixel_SW_second_range );
1971 vector float vf_EWbottom_second_range = spu_madd( vf_EWweight_second_range,
1972 vf_EWbottom_second_range_tmp,
1973 vf_pixel_SW_second_range );
1974 // first range
1975 vector float vf_EWbottom_third_range_tmp = spu_sub( vf_pixel_SE_third_range, vf_pixel_SW_third_range );
1976 vector float vf_EWbottom_third_range = spu_madd( vf_EWweight_third_range,
1977 vf_EWbottom_third_range_tmp,
1978 vf_pixel_SW_third_range );
1979
1980 // first range
1981 vector float vf_EWbottom_fourth_range_tmp = spu_sub( vf_pixel_SE_fourth_range, vf_pixel_SW_fourth_range );
1982 vector float vf_EWbottom_fourth_range = spu_madd( vf_EWweight_fourth_range,
1983 vf_EWbottom_fourth_range_tmp,
1984 vf_pixel_SW_fourth_range );
1985
1986
1987
1988 // third linear interpolation: the bilinear interpolated value
1989 // result = EWtop + NSweight*(EWbottom-EWtop);
1990 //
1991 // first range
1992 vector float vf_result_first_range_tmp = spu_sub( vf_EWbottom_first_range, vf_EWtop_first_range );
1993 vector float vf_result_first_range = spu_madd( vf_NSweight,
1994 vf_result_first_range_tmp,
1995 vf_EWtop_first_range );
1996
1997 // second range
1998 vector float vf_result_second_range_tmp = spu_sub( vf_EWbottom_second_range, vf_EWtop_second_range );
1999 vector float vf_result_second_range = spu_madd( vf_NSweight,
2000 vf_result_second_range_tmp,
2001 vf_EWtop_second_range );
2002
2003 // third range
2004 vector float vf_result_third_range_tmp = spu_sub( vf_EWbottom_third_range, vf_EWtop_third_range );
2005 vector float vf_result_third_range = spu_madd( vf_NSweight,
2006 vf_result_third_range_tmp,
2007 vf_EWtop_third_range );
2008
2009 // fourth range
2010 vector float vf_result_fourth_range_tmp = spu_sub( vf_EWbottom_fourth_range, vf_EWtop_fourth_range );
2011 vector float vf_result_fourth_range = spu_madd( vf_NSweight,
2012 vf_result_fourth_range_tmp,
2013 vf_EWtop_fourth_range );
2014
2015
2016
2017 // convert back: using saturated arithmetic
2018 vector unsigned int vui_result_first_range = vfloat_to_vuint( vf_result_first_range );
2019 vector unsigned int vui_result_second_range = vfloat_to_vuint( vf_result_second_range );
2020 vector unsigned int vui_result_third_range = vfloat_to_vuint( vf_result_third_range );
2021 vector unsigned int vui_result_fourth_range = vfloat_to_vuint( vf_result_fourth_range );
2022
2023 // merge results->lower,upper
2024 vector unsigned char vuc_mask_merge_result_first_second = { 0x03, 0x07, 0x0B, 0x0F,
2025 0x13, 0x17, 0x1B, 0x1F,
2026 0x00, 0x00, 0x00, 0x00,
2027 0x00, 0x00, 0x00, 0x00 };
2028
2029 vector unsigned char vuc_mask_merge_result_third_fourth = { 0x00, 0x00, 0x00, 0x00,
2030 0x00, 0x00, 0x00, 0x00,
2031 0x03, 0x07, 0x0B, 0x0F,
2032 0x13, 0x17, 0x1B, 0x1F };
2033
2034 vector unsigned char vuc_result_first_second =
2035 spu_shuffle( (vector unsigned char) vui_result_first_range,
2036 (vector unsigned char) vui_result_second_range,
2037 vuc_mask_merge_result_first_second );
2038
2039 vector unsigned char vuc_result_third_fourth =
2040 spu_shuffle( (vector unsigned char) vui_result_third_range,
2041 (vector unsigned char) vui_result_fourth_range,
2042 vuc_mask_merge_result_third_fourth );
2043
2044 // store result
2045 *((vector unsigned char*)dst) = spu_or( vuc_result_first_second,
2046 vuc_result_third_fourth );
2047 dst += 16;
2048 }
2049}
2050
diff --git a/apps/plugins/sdl/src/video/ps3/spulibs/fb_writer.c b/apps/plugins/sdl/src/video/ps3/spulibs/fb_writer.c
deleted file mode 100644
index 0eb51cc682..0000000000
--- a/apps/plugins/sdl/src/video/ps3/spulibs/fb_writer.c
+++ /dev/null
@@ -1,193 +0,0 @@
1/*
2 * SDL - Simple DirectMedia Layer
3 * CELL BE Support for PS3 Framebuffer
4 * Copyright (C) 2008, 2009 International Business Machines Corporation
5 *
6 * This library is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU Lesser General Public License as published
8 * by the Free Software Foundation; either version 2.1 of the License, or
9 * (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
19 * USA
20 *
21 * Martin Lowinski <lowinski [at] de [dot] ibm [ibm] com>
22 * Dirk Herrendoerfer <d.herrendoerfer [at] de [dot] ibm [dot] com>
23 * SPE code based on research by:
24 * Rene Becker
25 * Thimo Emmerich
26 */
27
28#include "spu_common.h"
29
30#include <spu_intrinsics.h>
31#include <spu_mfcio.h>
32#include <stdio.h>
33#include <string.h>
34
35// Debugging
36//#define DEBUG
37
38#ifdef DEBUG
39#define deprintf(fmt, args... ) \
40 fprintf( stdout, fmt, ##args ); \
41 fflush( stdout );
42#else
43#define deprintf( fmt, args... )
44#endif
45
46void cpy_to_fb(unsigned int);
47
48/* fb_writer_spu parms */
49static volatile struct fb_writer_parms_t parms __attribute__ ((aligned(128)));
50
51/* Code running on SPU */
52int main(unsigned long long spe_id __attribute__ ((unused)), unsigned long long argp __attribute__ ((unused)))
53{
54 deprintf("[SPU] fb_writer_spu is up... (on SPE #%llu)\n", spe_id);
55 uint32_t ea_mfc, mbox;
56 // send ready message
57 spu_write_out_mbox(SPU_READY);
58
59 while (1) {
60 /* Check mailbox */
61 mbox = spu_read_in_mbox();
62 deprintf("[SPU] Message is %u\n", mbox);
63 switch (mbox) {
64 case SPU_EXIT:
65 deprintf("[SPU] fb_writer goes down...\n");
66 return 0;
67 case SPU_START:
68 break;
69 default:
70 deprintf("[SPU] Cannot handle message\n");
71 continue;
72 }
73
74 /* Tag Manager setup */
75 unsigned int tags;
76 tags = mfc_multi_tag_reserve(5);
77 if (tags == MFC_TAG_INVALID) {
78 deprintf("[SPU] Failed to reserve mfc tags on fb_writer\n");
79 return 0;
80 }
81
82 /* Framebuffer parms */
83 ea_mfc = spu_read_in_mbox();
84 deprintf("[SPU] Message on fb_writer is %u\n", ea_mfc);
85 spu_mfcdma32(&parms, (unsigned int)ea_mfc,
86 sizeof(struct fb_writer_parms_t), tags,
87 MFC_GET_CMD);
88 deprintf("[SPU] argp = %u\n", (unsigned int)argp);
89 DMA_WAIT_TAG(tags);
90
91 /* Copy parms->data to framebuffer */
92 deprintf("[SPU] Copying to framebuffer started\n");
93 cpy_to_fb(tags);
94 deprintf("[SPU] Copying to framebuffer done!\n");
95
96 mfc_multi_tag_release(tags, 5);
97 deprintf("[SPU] fb_writer_spu... done!\n");
98 /* Send FIN msg */
99 spu_write_out_mbox(SPU_FIN);
100 }
101
102 return 0;
103}
104
105void cpy_to_fb(unsigned int tag_id_base)
106{
107 unsigned int i;
108 unsigned char current_buf;
109 uint8_t *in = parms.data;
110
111 /* Align fb pointer which was centered before */
112 uint8_t *fb =
113 (unsigned char *)((unsigned int)parms.center & 0xFFFFFFF0);
114
115 uint32_t bounded_input_height = parms.bounded_input_height;
116 uint32_t bounded_input_width = parms.bounded_input_width;
117 uint32_t fb_pixel_size = parms.fb_pixel_size;
118
119 uint32_t out_line_stride = parms.out_line_stride;
120 uint32_t in_line_stride = parms.in_line_stride;
121 uint32_t in_line_size = bounded_input_width * fb_pixel_size;
122
123 current_buf = 0;
124
125 /* Local store buffer */
126 static volatile uint8_t buf[4][BUFFER_SIZE]
127 __attribute__ ((aligned(128)));
128 /* do 4-times multibuffering using DMA list, process in two steps */
129 for (i = 0; i < bounded_input_height >> 2; i++) {
130 /* first buffer */
131 DMA_WAIT_TAG(tag_id_base + 1);
132 // retrieve buffer
133 spu_mfcdma32(buf[0], (unsigned int)in, in_line_size,
134 tag_id_base + 1, MFC_GETB_CMD);
135 DMA_WAIT_TAG(tag_id_base + 1);
136 // store buffer
137 spu_mfcdma32(buf[0], (unsigned int)fb, in_line_size,
138 tag_id_base + 1, MFC_PUTB_CMD);
139 in += in_line_stride;
140 fb += out_line_stride;
141 deprintf("[SPU] 1st buffer copied in=0x%x, fb=0x%x\n", in,
142 fb);
143
144 /* second buffer */
145 DMA_WAIT_TAG(tag_id_base + 2);
146 // retrieve buffer
147 spu_mfcdma32(buf[1], (unsigned int)in, in_line_size,
148 tag_id_base + 2, MFC_GETB_CMD);
149 DMA_WAIT_TAG(tag_id_base + 2);
150 // store buffer
151 spu_mfcdma32(buf[1], (unsigned int)fb, in_line_size,
152 tag_id_base + 2, MFC_PUTB_CMD);
153 in += in_line_stride;
154 fb += out_line_stride;
155 deprintf("[SPU] 2nd buffer copied in=0x%x, fb=0x%x\n", in,
156 fb);
157
158 /* third buffer */
159 DMA_WAIT_TAG(tag_id_base + 3);
160 // retrieve buffer
161 spu_mfcdma32(buf[2], (unsigned int)in, in_line_size,
162 tag_id_base + 3, MFC_GETB_CMD);
163 DMA_WAIT_TAG(tag_id_base + 3);
164 // store buffer
165 spu_mfcdma32(buf[2], (unsigned int)fb, in_line_size,
166 tag_id_base + 3, MFC_PUTB_CMD);
167 in += in_line_stride;
168 fb += out_line_stride;
169 deprintf("[SPU] 3rd buffer copied in=0x%x, fb=0x%x\n", in,
170 fb);
171
172 /* fourth buffer */
173 DMA_WAIT_TAG(tag_id_base + 4);
174 // retrieve buffer
175 spu_mfcdma32(buf[3], (unsigned int)in, in_line_size,
176 tag_id_base + 4, MFC_GETB_CMD);
177 DMA_WAIT_TAG(tag_id_base + 4);
178 // store buffer
179 spu_mfcdma32(buf[3], (unsigned int)fb, in_line_size,
180 tag_id_base + 4, MFC_PUTB_CMD);
181 in += in_line_stride;
182 fb += out_line_stride;
183 deprintf("[SPU] 4th buffer copied in=0x%x, fb=0x%x\n", in,
184 fb);
185 deprintf("[SPU] Loop #%i, bounded_input_height=%i\n", i,
186 bounded_input_height >> 2);
187 }
188 DMA_WAIT_TAG(tag_id_base + 2);
189 DMA_WAIT_TAG(tag_id_base + 3);
190 DMA_WAIT_TAG(tag_id_base + 4);
191}
192
193
diff --git a/apps/plugins/sdl/src/video/ps3/spulibs/spu_common.h b/apps/plugins/sdl/src/video/ps3/spulibs/spu_common.h
deleted file mode 100644
index 42c328c83d..0000000000
--- a/apps/plugins/sdl/src/video/ps3/spulibs/spu_common.h
+++ /dev/null
@@ -1,108 +0,0 @@
1/*
2 * SDL - Simple DirectMedia Layer
3 * CELL BE Support for PS3 Framebuffer
4 * Copyright (C) 2008, 2009 International Business Machines Corporation
5 *
6 * This library is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU Lesser General Public License as published
8 * by the Free Software Foundation; either version 2.1 of the License, or
9 * (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
19 * USA
20 *
21 * Martin Lowinski <lowinski [at] de [dot] ibm [ibm] com>
22 * Dirk Herrendoerfer <d.herrendoerfer [at] de [dot] ibm [dot] com>
23 * SPE code based on research by:
24 * Rene Becker
25 * Thimo Emmerich
26 */
27
28/* Common definitions/makros for SPUs */
29
30#ifndef _SPU_COMMON_H
31#define _SPU_COMMON_H
32
33#include <stdio.h>
34#include <stdint.h>
35#include <string.h>
36
37/* Tag management */
38#define DMA_WAIT_TAG(_tag) \
39 mfc_write_tag_mask(1<<(_tag)); \
40 mfc_read_tag_status_all();
41
42/* SPU mailbox messages */
43#define SPU_READY 0
44#define SPU_START 1
45#define SPU_FIN 2
46#define SPU_EXIT 3
47
48/* Tags */
49#define RETR_BUF 0
50#define STR_BUF 1
51#define TAG_INIT 2
52
53/* Buffersizes */
54#define MAX_HDTV_WIDTH 1920
55#define MAX_HDTV_HEIGHT 1080
56/* One stride of HDTV */
57#define BUFFER_SIZE 7680
58
59/* fb_writer ppu/spu exchange parms */
60struct fb_writer_parms_t {
61 uint8_t *data;
62 uint8_t *center;
63 uint32_t out_line_stride;
64 uint32_t in_line_stride;
65 uint32_t bounded_input_height;
66 uint32_t bounded_input_width;
67 uint32_t fb_pixel_size;
68
69 /* This padding is to fulfill the need for 16 byte alignment. On parm change, update! */
70 char padding[4];
71} __attribute__((aligned(128)));
72
73/* yuv2rgb ppu/spu exchange parms */
74struct yuv2rgb_parms_t {
75 uint8_t* y_plane;
76 uint8_t* v_plane;
77 uint8_t* u_plane;
78
79 uint8_t* dstBuffer;
80
81 unsigned int src_pixel_width;
82 unsigned int src_pixel_height;
83
84 /* This padding is to fulfill the need for 16 byte alignment. On parm change, update! */
85 char padding[128 - ((4 * sizeof(uint8_t *) + 2 * sizeof(unsigned int)) & 0x7F)];
86} __attribute__((aligned(128)));
87
88/* bilin_scaler ppu/spu exchange parms */
89struct scale_parms_t {
90 uint8_t* y_plane;
91 uint8_t* v_plane;
92 uint8_t* u_plane;
93
94 uint8_t* dstBuffer;
95
96 unsigned int src_pixel_width;
97 unsigned int src_pixel_height;
98
99 unsigned int dst_pixel_width;
100 unsigned int dst_pixel_height;
101
102 /* This padding is to fulfill the need for 16 byte alignment. On parm change, update! */
103 char padding[128 - ((4 * sizeof(uint8_t *) + 4 * sizeof(unsigned int)) & 0x7F)];
104} __attribute__((aligned(128)));
105
106#endif /* _SPU_COMMON_H */
107
108
diff --git a/apps/plugins/sdl/src/video/ps3/spulibs/yuv2rgb_converter.c b/apps/plugins/sdl/src/video/ps3/spulibs/yuv2rgb_converter.c
deleted file mode 100644
index 5e166914c5..0000000000
--- a/apps/plugins/sdl/src/video/ps3/spulibs/yuv2rgb_converter.c
+++ /dev/null
@@ -1,629 +0,0 @@
1/*
2 * SDL - Simple DirectMedia Layer
3 * CELL BE Support for PS3 Framebuffer
4 * Copyright (C) 2008, 2009 International Business Machines Corporation
5 *
6 * This library is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU Lesser General Public License as published
8 * by the Free Software Foundation; either version 2.1 of the License, or
9 * (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
19 * USA
20 *
21 * Martin Lowinski <lowinski [at] de [dot] ibm [ibm] com>
22 * Dirk Herrendoerfer <d.herrendoerfer [at] de [dot] ibm [dot] com>
23 * SPE code based on research by:
24 * Rene Becker
25 * Thimo Emmerich
26 */
27
28#include "spu_common.h"
29
30#include <spu_intrinsics.h>
31#include <spu_mfcio.h>
32
33// Debugging
34//#define DEBUG
35
36#ifdef DEBUG
37#define deprintf(fmt, args... ) \
38 fprintf( stdout, fmt, ##args ); \
39 fflush( stdout );
40#else
41#define deprintf( fmt, args... )
42#endif
43
44struct yuv2rgb_parms_t parms_converter __attribute__((aligned(128)));
45
46/* A maximum of 8 lines Y, therefore 4 lines V, 4 lines U are stored
47 * there might be the need to retrieve misaligned data, adjust
48 * incoming v and u plane to be able to handle this (add 128)
49 */
50unsigned char y_plane[2][(MAX_HDTV_WIDTH + 128) * 4] __attribute__((aligned(128)));
51unsigned char v_plane[2][(MAX_HDTV_WIDTH + 128) * 2] __attribute__((aligned(128)));
52unsigned char u_plane[2][(MAX_HDTV_WIDTH + 128) * 2] __attribute__((aligned(128)));
53
54/* A maximum of 4 lines BGRA are stored, 4 byte per pixel */
55unsigned char bgra[4 * MAX_HDTV_WIDTH * 4] __attribute__((aligned(128)));
56
57/* some vectors needed by the float to int conversion */
58static const vector float vec_255 = { 255.0f, 255.0f, 255.0f, 255.0f };
59static const vector float vec_0_1 = { 0.1f, 0.1f, 0.1f, 0.1f };
60
61void yuv_to_rgb_w16();
62void yuv_to_rgb_w32();
63
64void yuv_to_rgb_w16_line(unsigned char* y_addr, unsigned char* v_addr, unsigned char* u_addr, unsigned char* bgra_addr, unsigned int width);
65void yuv_to_rgb_w32_line(unsigned char* y_addr, unsigned char* v_addr, unsigned char* u_addr, unsigned char* bgra_addr_, unsigned int width);
66
67
68int main(unsigned long long spe_id __attribute__((unused)), unsigned long long argp __attribute__ ((unused)))
69{
70 deprintf("[SPU] yuv2rgb_spu is up... (on SPE #%llu)\n", spe_id);
71 uint32_t ea_mfc, mbox;
72 // send ready message
73 spu_write_out_mbox(SPU_READY);
74
75 while (1) {
76 /* Check mailbox */
77 mbox = spu_read_in_mbox();
78 deprintf("[SPU] Message is %u\n", mbox);
79 switch (mbox) {
80 case SPU_EXIT:
81 deprintf("[SPU] fb_writer goes down...\n");
82 return 0;
83 case SPU_START:
84 break;
85 default:
86 deprintf("[SPU] Cannot handle message\n");
87 continue;
88 }
89
90 /* Tag Manager setup */
91 unsigned int tag_id;
92 tag_id = mfc_multi_tag_reserve(1);
93 if (tag_id == MFC_TAG_INVALID) {
94 deprintf("[SPU] Failed to reserve mfc tags on yuv2rgb_converter\n");
95 return 0;
96 }
97
98 /* DMA transfer for the input parameters */
99 ea_mfc = spu_read_in_mbox();
100 deprintf("[SPU] Message on yuv2rgb_converter is %u\n", ea_mfc);
101 spu_mfcdma32(&parms_converter, (unsigned int)ea_mfc, sizeof(struct yuv2rgb_parms_t), tag_id, MFC_GET_CMD);
102 DMA_WAIT_TAG(tag_id);
103
104 /* There are alignment issues that involve handling of special cases
105 * a width of 32 results in a width of 16 in the chrominance
106 * --> choose the proper handling to optimize the performance
107 */
108 deprintf("[SPU] Convert %ix%i from YUV to RGB\n", parms_converter.src_pixel_width, parms_converter.src_pixel_height);
109 if (parms_converter.src_pixel_width & 0x1f) {
110 deprintf("[SPU] Using yuv_to_rgb_w16\n");
111 yuv_to_rgb_w16();
112 } else {
113 deprintf("[SPU] Using yuv_to_rgb_w32\n");
114 yuv_to_rgb_w32();
115 }
116
117 mfc_multi_tag_release(tag_id, 1);
118 deprintf("[SPU] yuv2rgb_spu... done!\n");
119 /* Send FIN message */
120 spu_write_out_mbox(SPU_FIN);
121 }
122
123 return 0;
124}
125
126
127/*
128 * float_to_char()
129 *
130 * converts a float to a character using saturated
131 * arithmetic
132 *
133 * @param s float for conversion
134 * @returns converted character
135 */
136inline static unsigned char float_to_char(float s) {
137 vector float vec_s = spu_splats(s);
138 vector unsigned int select_1 = spu_cmpgt(vec_0_1, vec_s);
139 vec_s = spu_sel(vec_s, vec_0_1, select_1);
140
141 vector unsigned int select_2 = spu_cmpgt(vec_s, vec_255);
142 vec_s = spu_sel(vec_s, vec_255, select_2);
143 return (unsigned char) spu_extract(vec_s,0);
144}
145
146
147/*
148 * vfloat_to_vuint()
149 *
150 * converts a float vector to an unsinged int vector using saturated
151 * arithmetic
152 *
153 * @param vec_s float vector for conversion
154 * @returns converted unsigned int vector
155 */
156inline static vector unsigned int vfloat_to_vuint(vector float vec_s) {
157 vector unsigned int select_1 = spu_cmpgt(vec_0_1, vec_s);
158 vec_s = spu_sel(vec_s, vec_0_1, select_1);
159
160 vector unsigned int select_2 = spu_cmpgt(vec_s, vec_255);
161 vec_s = spu_sel(vec_s, vec_255, select_2);
162 return spu_convtu(vec_s,0);
163}
164
165
166void yuv_to_rgb_w16() {
167 // Pixel dimensions of the picture
168 uint32_t width, height;
169
170 // Extract parameters
171 width = parms_converter.src_pixel_width;
172 height = parms_converter.src_pixel_height;
173
174 // Plane data management
175 // Y
176 unsigned char* ram_addr_y = parms_converter.y_plane;
177 // V
178 unsigned char* ram_addr_v = parms_converter.v_plane;
179 // U
180 unsigned char* ram_addr_u = parms_converter.u_plane;
181
182 // BGRA
183 unsigned char* ram_addr_bgra = parms_converter.dstBuffer;
184
185 // Strides
186 unsigned int stride_y = width;
187 unsigned int stride_vu = width>>1;
188
189 // Buffer management
190 unsigned int buf_idx = 0;
191 unsigned int size_4lines_y = stride_y<<2;
192 unsigned int size_2lines_y = stride_y<<1;
193 unsigned int size_2lines_vu = stride_vu<<1;
194
195 // 2*width*4byte_per_pixel
196 unsigned int size_2lines_bgra = width<<3;
197
198
199 // start double-buffered processing
200 // 4 lines y
201 spu_mfcdma32(y_plane[buf_idx], (unsigned int) ram_addr_y, size_4lines_y, RETR_BUF+buf_idx, MFC_GET_CMD);
202
203 // 2 lines v
204 spu_mfcdma32(v_plane[buf_idx], (unsigned int) ram_addr_v, size_2lines_vu, RETR_BUF+buf_idx, MFC_GET_CMD);
205
206 // 2 lines u
207 spu_mfcdma32(u_plane[buf_idx], (unsigned int) ram_addr_u, size_2lines_vu, RETR_BUF+buf_idx, MFC_GET_CMD);
208
209 // Wait for these transfers to be completed
210 DMA_WAIT_TAG((RETR_BUF + buf_idx));
211
212 unsigned int i;
213 for(i=0; i<(height>>2)-1; i++) {
214
215 buf_idx^=1;
216
217 // 4 lines y
218 spu_mfcdma32(y_plane[buf_idx], (unsigned int) ram_addr_y+size_4lines_y, size_4lines_y, RETR_BUF+buf_idx, MFC_GET_CMD);
219
220 // 2 lines v
221 spu_mfcdma32(v_plane[buf_idx], (unsigned int) ram_addr_v+size_2lines_vu, size_2lines_vu, RETR_BUF+buf_idx, MFC_GET_CMD);
222
223 // 2 lines u
224 spu_mfcdma32(u_plane[buf_idx], (unsigned int) ram_addr_u+size_2lines_vu, size_2lines_vu, RETR_BUF+buf_idx, MFC_GET_CMD);
225
226 DMA_WAIT_TAG((RETR_BUF + buf_idx));
227
228 buf_idx^=1;
229
230
231 // Convert YUV to BGRA, store it back (first two lines)
232 yuv_to_rgb_w16_line(y_plane[buf_idx], v_plane[buf_idx], u_plane[buf_idx], bgra, width);
233
234 // Next two lines
235 yuv_to_rgb_w16_line(y_plane[buf_idx] + size_2lines_y,
236 v_plane[buf_idx] + stride_vu,
237 u_plane[buf_idx] + stride_vu,
238 bgra + size_2lines_bgra,
239 width);
240
241 // Wait for previous storing transfer to be completed
242 DMA_WAIT_TAG(STR_BUF);
243
244 // Store converted lines in two steps->max transfer size 16384
245 spu_mfcdma32(bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD);
246 ram_addr_bgra += size_2lines_bgra;
247 spu_mfcdma32(bgra+size_2lines_bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD);
248 ram_addr_bgra += size_2lines_bgra;
249
250 // Move 4 lines
251 ram_addr_y += size_4lines_y;
252 ram_addr_v += size_2lines_vu;
253 ram_addr_u += size_2lines_vu;
254
255 buf_idx^=1;
256 }
257
258 // Convert YUV to BGRA, store it back (first two lines)
259 yuv_to_rgb_w16_line(y_plane[buf_idx], v_plane[buf_idx], u_plane[buf_idx], bgra, width);
260
261 // Next two lines
262 yuv_to_rgb_w16_line(y_plane[buf_idx] + size_2lines_y,
263 v_plane[buf_idx] + stride_vu,
264 u_plane[buf_idx] + stride_vu,
265 bgra + size_2lines_bgra,
266 width);
267
268 // Wait for previous storing transfer to be completed
269 DMA_WAIT_TAG(STR_BUF);
270 spu_mfcdma32(bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD);
271 ram_addr_bgra += size_2lines_bgra;
272 spu_mfcdma32(bgra+size_2lines_bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD);
273
274 // wait for previous storing transfer to be completed
275 DMA_WAIT_TAG(STR_BUF);
276
277}
278
279
280void yuv_to_rgb_w32() {
281 // Pixel dimensions of the picture
282 uint32_t width, height;
283
284 // Extract parameters
285 width = parms_converter.src_pixel_width;
286 height = parms_converter.src_pixel_height;
287
288 // Plane data management
289 // Y
290 unsigned char* ram_addr_y = parms_converter.y_plane;
291 // V
292 unsigned char* ram_addr_v = parms_converter.v_plane;
293 // U
294 unsigned char* ram_addr_u = parms_converter.u_plane;
295
296 // BGRA
297 unsigned char* ram_addr_bgra = parms_converter.dstBuffer;
298
299 // Strides
300 unsigned int stride_y = width;
301 unsigned int stride_vu = width>>1;
302
303 // Buffer management
304 unsigned int buf_idx = 0;
305 unsigned int size_4lines_y = stride_y<<2;
306 unsigned int size_2lines_y = stride_y<<1;
307 unsigned int size_2lines_vu = stride_vu<<1;
308
309 // 2*width*4byte_per_pixel
310 unsigned int size_2lines_bgra = width<<3;
311
312 // start double-buffered processing
313 // 4 lines y
314 spu_mfcdma32(y_plane[buf_idx], (unsigned int) ram_addr_y, size_4lines_y, RETR_BUF + buf_idx, MFC_GET_CMD);
315 // 2 lines v
316 spu_mfcdma32(v_plane[buf_idx], (unsigned int) ram_addr_v, size_2lines_vu, RETR_BUF + buf_idx, MFC_GET_CMD);
317 // 2 lines u
318 spu_mfcdma32(u_plane[buf_idx], (unsigned int) ram_addr_u, size_2lines_vu, RETR_BUF + buf_idx, MFC_GET_CMD);
319
320 // Wait for these transfers to be completed
321 DMA_WAIT_TAG((RETR_BUF + buf_idx));
322
323 unsigned int i;
324 for(i=0; i < (height>>2)-1; i++) {
325 buf_idx^=1;
326 // 4 lines y
327 spu_mfcdma32(y_plane[buf_idx], (unsigned int) ram_addr_y+size_4lines_y, size_4lines_y, RETR_BUF + buf_idx, MFC_GET_CMD);
328 deprintf("4lines = %d\n", size_4lines_y);
329 // 2 lines v
330 spu_mfcdma32(v_plane[buf_idx], (unsigned int) ram_addr_v+size_2lines_vu, size_2lines_vu, RETR_BUF + buf_idx, MFC_GET_CMD);
331 deprintf("2lines = %d\n", size_2lines_vu);
332 // 2 lines u
333 spu_mfcdma32(u_plane[buf_idx], (unsigned int) ram_addr_u+size_2lines_vu, size_2lines_vu, RETR_BUF + buf_idx, MFC_GET_CMD);
334 deprintf("2lines = %d\n", size_2lines_vu);
335
336 DMA_WAIT_TAG((RETR_BUF + buf_idx));
337
338 buf_idx^=1;
339
340 // Convert YUV to BGRA, store it back (first two lines)
341 yuv_to_rgb_w32_line(y_plane[buf_idx], v_plane[buf_idx], u_plane[buf_idx], bgra, width);
342
343 // Next two lines
344 yuv_to_rgb_w32_line(y_plane[buf_idx] + size_2lines_y,
345 v_plane[buf_idx] + stride_vu,
346 u_plane[buf_idx] + stride_vu,
347 bgra + size_2lines_bgra,
348 width);
349
350 // Wait for previous storing transfer to be completed
351 DMA_WAIT_TAG(STR_BUF);
352
353 // Store converted lines in two steps->max transfer size 16384
354 spu_mfcdma32(bgra, (unsigned int)ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD);
355 ram_addr_bgra += size_2lines_bgra;
356 spu_mfcdma32(bgra + size_2lines_bgra, (unsigned int)ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD);
357 ram_addr_bgra += size_2lines_bgra;
358
359 // Move 4 lines
360 ram_addr_y += size_4lines_y;
361 ram_addr_v += size_2lines_vu;
362 ram_addr_u += size_2lines_vu;
363
364 buf_idx^=1;
365 }
366
367 // Convert YUV to BGRA, store it back (first two lines)
368 yuv_to_rgb_w32_line(y_plane[buf_idx], v_plane[buf_idx], u_plane[buf_idx], bgra, width);
369
370 // Next two lines
371 yuv_to_rgb_w32_line(y_plane[buf_idx] + size_2lines_y,
372 v_plane[buf_idx] + stride_vu,
373 u_plane[buf_idx] + stride_vu,
374 bgra + size_2lines_bgra,
375 width);
376
377 // Wait for previous storing transfer to be completed
378 DMA_WAIT_TAG(STR_BUF);
379 spu_mfcdma32(bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD);
380 ram_addr_bgra += size_2lines_bgra;
381 spu_mfcdma32(bgra + size_2lines_bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD);
382
383 // Wait for previous storing transfer to be completed
384 DMA_WAIT_TAG(STR_BUF);
385}
386
387
388/* Some vectors needed by the yuv 2 rgb conversion algorithm */
389const vector float vec_minus_128 = { -128.0f, -128.0f, -128.0f, -128.0f };
390const vector unsigned char vec_null = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
391const vector unsigned char vec_char2int_first = { 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x13 };
392const vector unsigned char vec_char2int_second = { 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x17 };
393const vector unsigned char vec_char2int_third = { 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x1B };
394const vector unsigned char vec_char2int_fourth = { 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x1F };
395
396const vector float vec_R_precalc_coeff = {1.403f, 1.403f, 1.403f, 1.403f};
397const vector float vec_Gu_precalc_coeff = {-0.344f, -0.344f, -0.344f, -0.344f};
398const vector float vec_Gv_precalc_coeff = {-0.714f, -0.714f, -0.714f, -0.714f};
399const vector float vec_B_precalc_coeff = {1.773f, 1.773f, 1.773f, 1.773f};
400
401const vector unsigned int vec_alpha = { 255 << 24, 255 << 24, 255 << 24, 255 << 24 };
402
403const vector unsigned char vec_select_floats_upper = { 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07 };
404const vector unsigned char vec_select_floats_lower = { 0x08, 0x09, 0x0A, 0x0B, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x0C, 0x0D, 0x0E, 0x0F };
405
406
407/*
408 * yuv_to_rgb_w16()
409 *
410 * processes to line of yuv-input, width has to be a multiple of 16
411 * two lines of yuv are taken as input
412 *
413 * @param y_addr address of the y plane in local store
414 * @param v_addr address of the v plane in local store
415 * @param u_addr address of the u plane in local store
416 * @param bgra_addr_ address of the bgra output buffer
417 * @param width the width in pixel
418 */
419void yuv_to_rgb_w16_line(unsigned char* y_addr, unsigned char* v_addr, unsigned char* u_addr, unsigned char* bgra_addr_, unsigned int width) {
420 // each pixel is stored as an integer
421 unsigned int* bgra_addr = (unsigned int*) bgra_addr_;
422
423 unsigned int x;
424 for(x = 0; x < width; x+=2) {
425 // Gehe zweischrittig durch die zeile, da jeder u und v wert fuer 4 pixel(zwei hoch, zwei breit) gilt
426 const unsigned char Y_1 = *(y_addr + x);
427 const unsigned char Y_2 = *(y_addr + x + 1);
428 const unsigned char Y_3 = *(y_addr + x + width);
429 const unsigned char Y_4 = *(y_addr + x + width + 1);
430 const unsigned char U = *(u_addr + (x >> 1));
431 const unsigned char V = *(v_addr + (x >> 1));
432
433 float V_minus_128 = (float)((float)V - 128.0f);
434 float U_minus_128 = (float)((float)U - 128.0f);
435
436 float R_precalculate = 1.403f * V_minus_128;
437 float G_precalculate = -(0.344f * U_minus_128 + 0.714f * V_minus_128);
438 float B_precalculate = 1.773f * U_minus_128;
439
440 const unsigned char R_1 = float_to_char((Y_1 + R_precalculate));
441 const unsigned char R_2 = float_to_char((Y_2 + R_precalculate));
442 const unsigned char R_3 = float_to_char((Y_3 + R_precalculate));
443 const unsigned char R_4 = float_to_char((Y_4 + R_precalculate));
444 const unsigned char G_1 = float_to_char((Y_1 + G_precalculate));
445 const unsigned char G_2 = float_to_char((Y_2 + G_precalculate));
446 const unsigned char G_3 = float_to_char((Y_3 + G_precalculate));
447 const unsigned char G_4 = float_to_char((Y_4 + G_precalculate));
448 const unsigned char B_1 = float_to_char((Y_1 + B_precalculate));
449 const unsigned char B_2 = float_to_char((Y_2 + B_precalculate));
450 const unsigned char B_3 = float_to_char((Y_3 + B_precalculate));
451 const unsigned char B_4 = float_to_char((Y_4 + B_precalculate));
452
453 *(bgra_addr + x) = (B_1 << 0)| (G_1 << 8) | (R_1 << 16) | (255 << 24);
454 *(bgra_addr + x + 1) = (B_2 << 0)| (G_2 << 8) | (R_2 << 16) | (255 << 24);
455 *(bgra_addr + x + width) = (B_3 << 0)| (G_3 << 8) | (R_3 << 16) | (255 << 24);
456 *(bgra_addr + x + width + 1) = (B_4 << 0)| (G_4 << 8) | (R_4 << 16) | (255 << 24);
457 }
458}
459
460
461/*
462 * yuv_to_rgb_w32()
463 *
464 * processes to line of yuv-input, width has to be a multiple of 32
465 * two lines of yuv are taken as input
466 *
467 * @param y_addr address of the y plane in local store
468 * @param v_addr address of the v plane in local store
469 * @param u_addr address of the u plane in local store
470 * @param bgra_addr_ address of the bgra output buffer
471 * @param width the width in pixel
472 */
473void yuv_to_rgb_w32_line(unsigned char* y_addr, unsigned char* v_addr, unsigned char* u_addr, unsigned char* bgra_addr_, unsigned int width) {
474 // each pixel is stored as an integer
475 unsigned int* bgra_addr = (unsigned int*) bgra_addr_;
476
477 unsigned int x;
478 for(x = 0; x < width; x+=32) {
479 // Gehe zweischrittig durch die zeile, da jeder u und v wert fuer 4 pixel(zwei hoch, zwei breit) gilt
480
481 const vector unsigned char vchar_Y_1 = *((vector unsigned char*)(y_addr + x));
482 const vector unsigned char vchar_Y_2 = *((vector unsigned char*)(y_addr + x + 16));
483 const vector unsigned char vchar_Y_3 = *((vector unsigned char*)(y_addr + x + width));
484 const vector unsigned char vchar_Y_4 = *((vector unsigned char*)(y_addr + x + width + 16));
485 const vector unsigned char vchar_U = *((vector unsigned char*)(u_addr + (x >> 1)));
486 const vector unsigned char vchar_V = *((vector unsigned char*)(v_addr + (x >> 1)));
487
488 const vector float vfloat_U_1 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_U, vec_char2int_first), 0),vec_minus_128);
489 const vector float vfloat_U_2 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_U, vec_char2int_second), 0),vec_minus_128);
490 const vector float vfloat_U_3 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_U, vec_char2int_third), 0),vec_minus_128);
491 const vector float vfloat_U_4 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_U, vec_char2int_fourth), 0),vec_minus_128);
492
493 const vector float vfloat_V_1 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_V, vec_char2int_first), 0),vec_minus_128);
494 const vector float vfloat_V_2 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_V, vec_char2int_second), 0),vec_minus_128);
495 const vector float vfloat_V_3 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_V, vec_char2int_third), 0),vec_minus_128);
496 const vector float vfloat_V_4 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_V, vec_char2int_fourth), 0),vec_minus_128);
497
498 vector float Y_1 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_1, vec_char2int_first), 0);
499 vector float Y_2 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_1, vec_char2int_second), 0);
500 vector float Y_3 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_1, vec_char2int_third), 0);
501 vector float Y_4 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_1, vec_char2int_fourth), 0);
502 vector float Y_5 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_2, vec_char2int_first), 0);
503 vector float Y_6 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_2, vec_char2int_second), 0);
504 vector float Y_7 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_2, vec_char2int_third), 0);
505 vector float Y_8 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_2, vec_char2int_fourth), 0);
506 vector float Y_9 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_3, vec_char2int_first), 0);
507 vector float Y_10 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_3, vec_char2int_second), 0);
508 vector float Y_11 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_3, vec_char2int_third), 0);
509 vector float Y_12 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_3, vec_char2int_fourth), 0);
510 vector float Y_13 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_4, vec_char2int_first), 0);
511 vector float Y_14 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_4, vec_char2int_second), 0);
512 vector float Y_15 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_4, vec_char2int_third), 0);
513 vector float Y_16 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_4, vec_char2int_fourth), 0);
514
515 const vector float R1a_precalculate = spu_mul(vec_R_precalc_coeff, vfloat_V_1);
516 const vector float R2a_precalculate = spu_mul(vec_R_precalc_coeff, vfloat_V_2);
517 const vector float R3a_precalculate = spu_mul(vec_R_precalc_coeff, vfloat_V_3);
518 const vector float R4a_precalculate = spu_mul(vec_R_precalc_coeff, vfloat_V_4);
519
520 const vector float R1_precalculate = spu_shuffle(R1a_precalculate, R1a_precalculate, vec_select_floats_upper);
521 const vector float R2_precalculate = spu_shuffle(R1a_precalculate, R1a_precalculate, vec_select_floats_lower);
522 const vector float R3_precalculate = spu_shuffle(R2a_precalculate, R2a_precalculate, vec_select_floats_upper);
523 const vector float R4_precalculate = spu_shuffle(R2a_precalculate, R2a_precalculate, vec_select_floats_lower);
524 const vector float R5_precalculate = spu_shuffle(R3a_precalculate, R3a_precalculate, vec_select_floats_upper);
525 const vector float R6_precalculate = spu_shuffle(R3a_precalculate, R3a_precalculate, vec_select_floats_lower);
526 const vector float R7_precalculate = spu_shuffle(R4a_precalculate, R4a_precalculate, vec_select_floats_upper);
527 const vector float R8_precalculate = spu_shuffle(R4a_precalculate, R4a_precalculate, vec_select_floats_lower);
528
529
530 const vector float G1a_precalculate = spu_madd(vec_Gu_precalc_coeff, vfloat_U_1, spu_mul(vfloat_V_1, vec_Gv_precalc_coeff));
531 const vector float G2a_precalculate = spu_madd(vec_Gu_precalc_coeff, vfloat_U_2, spu_mul(vfloat_V_2, vec_Gv_precalc_coeff));
532 const vector float G3a_precalculate = spu_madd(vec_Gu_precalc_coeff, vfloat_U_3, spu_mul(vfloat_V_3, vec_Gv_precalc_coeff));
533 const vector float G4a_precalculate = spu_madd(vec_Gu_precalc_coeff, vfloat_U_4, spu_mul(vfloat_V_4, vec_Gv_precalc_coeff));
534
535 const vector float G1_precalculate = spu_shuffle(G1a_precalculate, G1a_precalculate, vec_select_floats_upper);
536 const vector float G2_precalculate = spu_shuffle(G1a_precalculate, G1a_precalculate, vec_select_floats_lower);
537 const vector float G3_precalculate = spu_shuffle(G2a_precalculate, G2a_precalculate, vec_select_floats_upper);
538 const vector float G4_precalculate = spu_shuffle(G2a_precalculate, G2a_precalculate, vec_select_floats_lower);
539 const vector float G5_precalculate = spu_shuffle(G3a_precalculate, G3a_precalculate, vec_select_floats_upper);
540 const vector float G6_precalculate = spu_shuffle(G3a_precalculate, G3a_precalculate, vec_select_floats_lower);
541 const vector float G7_precalculate = spu_shuffle(G4a_precalculate, G4a_precalculate, vec_select_floats_upper);
542 const vector float G8_precalculate = spu_shuffle(G4a_precalculate, G4a_precalculate, vec_select_floats_lower);
543
544
545 const vector float B1a_precalculate = spu_mul(vec_B_precalc_coeff, vfloat_U_1);
546 const vector float B2a_precalculate = spu_mul(vec_B_precalc_coeff, vfloat_U_2);
547 const vector float B3a_precalculate = spu_mul(vec_B_precalc_coeff, vfloat_U_3);
548 const vector float B4a_precalculate = spu_mul(vec_B_precalc_coeff, vfloat_U_4);
549
550 const vector float B1_precalculate = spu_shuffle(B1a_precalculate, B1a_precalculate, vec_select_floats_upper);
551 const vector float B2_precalculate = spu_shuffle(B1a_precalculate, B1a_precalculate, vec_select_floats_lower);
552 const vector float B3_precalculate = spu_shuffle(B2a_precalculate, B2a_precalculate, vec_select_floats_upper);
553 const vector float B4_precalculate = spu_shuffle(B2a_precalculate, B2a_precalculate, vec_select_floats_lower);
554 const vector float B5_precalculate = spu_shuffle(B3a_precalculate, B3a_precalculate, vec_select_floats_upper);
555 const vector float B6_precalculate = spu_shuffle(B3a_precalculate, B3a_precalculate, vec_select_floats_lower);
556 const vector float B7_precalculate = spu_shuffle(B4a_precalculate, B4a_precalculate, vec_select_floats_upper);
557 const vector float B8_precalculate = spu_shuffle(B4a_precalculate, B4a_precalculate, vec_select_floats_lower);
558
559
560 const vector unsigned int R_1 = vfloat_to_vuint(spu_add( Y_1, R1_precalculate));
561 const vector unsigned int R_2 = vfloat_to_vuint(spu_add( Y_2, R2_precalculate));
562 const vector unsigned int R_3 = vfloat_to_vuint(spu_add( Y_3, R3_precalculate));
563 const vector unsigned int R_4 = vfloat_to_vuint(spu_add( Y_4, R4_precalculate));
564 const vector unsigned int R_5 = vfloat_to_vuint(spu_add( Y_5, R5_precalculate));
565 const vector unsigned int R_6 = vfloat_to_vuint(spu_add( Y_6, R6_precalculate));
566 const vector unsigned int R_7 = vfloat_to_vuint(spu_add( Y_7, R7_precalculate));
567 const vector unsigned int R_8 = vfloat_to_vuint(spu_add( Y_8, R8_precalculate));
568 const vector unsigned int R_9 = vfloat_to_vuint(spu_add( Y_9, R1_precalculate));
569 const vector unsigned int R_10 = vfloat_to_vuint(spu_add(Y_10, R2_precalculate));
570 const vector unsigned int R_11 = vfloat_to_vuint(spu_add(Y_11, R3_precalculate));
571 const vector unsigned int R_12 = vfloat_to_vuint(spu_add(Y_12, R4_precalculate));
572 const vector unsigned int R_13 = vfloat_to_vuint(spu_add(Y_13, R5_precalculate));
573 const vector unsigned int R_14 = vfloat_to_vuint(spu_add(Y_14, R6_precalculate));
574 const vector unsigned int R_15 = vfloat_to_vuint(spu_add(Y_15, R7_precalculate));
575 const vector unsigned int R_16 = vfloat_to_vuint(spu_add(Y_16, R8_precalculate));
576
577 const vector unsigned int G_1 = vfloat_to_vuint(spu_add( Y_1, G1_precalculate));
578 const vector unsigned int G_2 = vfloat_to_vuint(spu_add( Y_2, G2_precalculate));
579 const vector unsigned int G_3 = vfloat_to_vuint(spu_add( Y_3, G3_precalculate));
580 const vector unsigned int G_4 = vfloat_to_vuint(spu_add( Y_4, G4_precalculate));
581 const vector unsigned int G_5 = vfloat_to_vuint(spu_add( Y_5, G5_precalculate));
582 const vector unsigned int G_6 = vfloat_to_vuint(spu_add( Y_6, G6_precalculate));
583 const vector unsigned int G_7 = vfloat_to_vuint(spu_add( Y_7, G7_precalculate));
584 const vector unsigned int G_8 = vfloat_to_vuint(spu_add( Y_8, G8_precalculate));
585 const vector unsigned int G_9 = vfloat_to_vuint(spu_add( Y_9, G1_precalculate));
586 const vector unsigned int G_10 = vfloat_to_vuint(spu_add(Y_10, G2_precalculate));
587 const vector unsigned int G_11 = vfloat_to_vuint(spu_add(Y_11, G3_precalculate));
588 const vector unsigned int G_12 = vfloat_to_vuint(spu_add(Y_12, G4_precalculate));
589 const vector unsigned int G_13 = vfloat_to_vuint(spu_add(Y_13, G5_precalculate));
590 const vector unsigned int G_14 = vfloat_to_vuint(spu_add(Y_14, G6_precalculate));
591 const vector unsigned int G_15 = vfloat_to_vuint(spu_add(Y_15, G7_precalculate));
592 const vector unsigned int G_16 = vfloat_to_vuint(spu_add(Y_16, G8_precalculate));
593
594 const vector unsigned int B_1 = vfloat_to_vuint(spu_add( Y_1, B1_precalculate));
595 const vector unsigned int B_2 = vfloat_to_vuint(spu_add( Y_2, B2_precalculate));
596 const vector unsigned int B_3 = vfloat_to_vuint(spu_add( Y_3, B3_precalculate));
597 const vector unsigned int B_4 = vfloat_to_vuint(spu_add( Y_4, B4_precalculate));
598 const vector unsigned int B_5 = vfloat_to_vuint(spu_add( Y_5, B5_precalculate));
599 const vector unsigned int B_6 = vfloat_to_vuint(spu_add( Y_6, B6_precalculate));
600 const vector unsigned int B_7 = vfloat_to_vuint(spu_add( Y_7, B7_precalculate));
601 const vector unsigned int B_8 = vfloat_to_vuint(spu_add( Y_8, B8_precalculate));
602 const vector unsigned int B_9 = vfloat_to_vuint(spu_add( Y_9, B1_precalculate));
603 const vector unsigned int B_10 = vfloat_to_vuint(spu_add(Y_10, B2_precalculate));
604 const vector unsigned int B_11 = vfloat_to_vuint(spu_add(Y_11, B3_precalculate));
605 const vector unsigned int B_12 = vfloat_to_vuint(spu_add(Y_12, B4_precalculate));
606 const vector unsigned int B_13 = vfloat_to_vuint(spu_add(Y_13, B5_precalculate));
607 const vector unsigned int B_14 = vfloat_to_vuint(spu_add(Y_14, B6_precalculate));
608 const vector unsigned int B_15 = vfloat_to_vuint(spu_add(Y_15, B7_precalculate));
609 const vector unsigned int B_16 = vfloat_to_vuint(spu_add(Y_16, B8_precalculate));
610
611 *((vector unsigned int*)(bgra_addr + x)) = spu_or(spu_or(vec_alpha, B_1), spu_or(spu_slqwbyte( R_1, 2),spu_slqwbyte(G_1, 1)));
612 *((vector unsigned int*)(bgra_addr + x + 4)) = spu_or(spu_or(vec_alpha, B_2), spu_or(spu_slqwbyte( R_2, 2),spu_slqwbyte(G_2, 1)));
613 *((vector unsigned int*)(bgra_addr + x + 8)) = spu_or(spu_or(vec_alpha, B_3), spu_or(spu_slqwbyte( R_3, 2),spu_slqwbyte(G_3, 1)));
614 *((vector unsigned int*)(bgra_addr + x + 12)) = spu_or(spu_or(vec_alpha, B_4), spu_or(spu_slqwbyte( R_4, 2),spu_slqwbyte(G_4, 1)));
615 *((vector unsigned int*)(bgra_addr + x + 16)) = spu_or(spu_or(vec_alpha, B_5), spu_or(spu_slqwbyte( R_5, 2),spu_slqwbyte(G_5, 1)));
616 *((vector unsigned int*)(bgra_addr + x + 20)) = spu_or(spu_or(vec_alpha, B_6), spu_or(spu_slqwbyte( R_6, 2),spu_slqwbyte(G_6, 1)));
617 *((vector unsigned int*)(bgra_addr + x + 24)) = spu_or(spu_or(vec_alpha, B_7), spu_or(spu_slqwbyte( R_7, 2),spu_slqwbyte(G_7, 1)));
618 *((vector unsigned int*)(bgra_addr + x + 28)) = spu_or(spu_or(vec_alpha, B_8), spu_or(spu_slqwbyte( R_8, 2),spu_slqwbyte(G_8, 1)));
619 *((vector unsigned int*)(bgra_addr + x + width)) = spu_or(spu_or(vec_alpha, B_9), spu_or(spu_slqwbyte( R_9, 2),spu_slqwbyte(G_9, 1)));
620 *((vector unsigned int*)(bgra_addr + x + width + 4)) = spu_or(spu_or(vec_alpha, B_10), spu_or(spu_slqwbyte(R_10, 2),spu_slqwbyte(G_10, 1)));
621 *((vector unsigned int*)(bgra_addr + x + width + 8)) = spu_or(spu_or(vec_alpha, B_11), spu_or(spu_slqwbyte(R_11, 2),spu_slqwbyte(G_11, 1)));
622 *((vector unsigned int*)(bgra_addr + x + width + 12)) = spu_or(spu_or(vec_alpha, B_12), spu_or(spu_slqwbyte(R_12, 2),spu_slqwbyte(G_12, 1)));
623 *((vector unsigned int*)(bgra_addr + x + width + 16)) = spu_or(spu_or(vec_alpha, B_13), spu_or(spu_slqwbyte(R_13, 2),spu_slqwbyte(G_13, 1)));
624 *((vector unsigned int*)(bgra_addr + x + width + 20)) = spu_or(spu_or(vec_alpha, B_14), spu_or(spu_slqwbyte(R_14, 2),spu_slqwbyte(G_14, 1)));
625 *((vector unsigned int*)(bgra_addr + x + width + 24)) = spu_or(spu_or(vec_alpha, B_15), spu_or(spu_slqwbyte(R_15, 2),spu_slqwbyte(G_15, 1)));
626 *((vector unsigned int*)(bgra_addr + x + width + 28)) = spu_or(spu_or(vec_alpha, B_16), spu_or(spu_slqwbyte(R_16, 2),spu_slqwbyte(G_16, 1)));
627 }
628}
629