diff options
Diffstat (limited to 'apps/plugins/sdl/src/video/ps3')
-rw-r--r-- | apps/plugins/sdl/src/video/ps3/SDL_ps3events.c | 44 | ||||
-rw-r--r-- | apps/plugins/sdl/src/video/ps3/SDL_ps3events_c.h | 41 | ||||
-rw-r--r-- | apps/plugins/sdl/src/video/ps3/SDL_ps3video.c | 621 | ||||
-rw-r--r-- | apps/plugins/sdl/src/video/ps3/SDL_ps3video.h | 165 | ||||
-rw-r--r-- | apps/plugins/sdl/src/video/ps3/SDL_ps3yuv.c | 340 | ||||
-rw-r--r-- | apps/plugins/sdl/src/video/ps3/SDL_ps3yuv_c.h | 44 | ||||
-rw-r--r-- | apps/plugins/sdl/src/video/ps3/spulibs/Makefile | 83 | ||||
-rw-r--r-- | apps/plugins/sdl/src/video/ps3/spulibs/bilin_scaler.c | 2050 | ||||
-rw-r--r-- | apps/plugins/sdl/src/video/ps3/spulibs/fb_writer.c | 193 | ||||
-rw-r--r-- | apps/plugins/sdl/src/video/ps3/spulibs/spu_common.h | 108 | ||||
-rw-r--r-- | apps/plugins/sdl/src/video/ps3/spulibs/yuv2rgb_converter.c | 629 |
11 files changed, 4318 insertions, 0 deletions
diff --git a/apps/plugins/sdl/src/video/ps3/SDL_ps3events.c b/apps/plugins/sdl/src/video/ps3/SDL_ps3events.c new file mode 100644 index 0000000000..e39efcc4f0 --- /dev/null +++ b/apps/plugins/sdl/src/video/ps3/SDL_ps3events.c | |||
@@ -0,0 +1,44 @@ | |||
1 | /* | ||
2 | * SDL - Simple DirectMedia Layer | ||
3 | * CELL BE Support for PS3 Framebuffer | ||
4 | * Copyright (C) 2008, 2009 International Business Machines Corporation | ||
5 | * | ||
6 | * This library is free software; you can redistribute it and/or modify it | ||
7 | * under the terms of the GNU Lesser General Public License as published | ||
8 | * by the Free Software Foundation; either version 2.1 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This library is distributed in the hope that it will be useful, but | ||
12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
14 | * Lesser General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU Lesser General Public | ||
17 | * License along with this library; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 | ||
19 | * USA | ||
20 | * | ||
21 | * Martin Lowinski <lowinski [at] de [dot] ibm [ibm] com> | ||
22 | * Dirk Herrendoerfer <d.herrendoerfer [at] de [dot] ibm [dot] com> | ||
23 | * SPE code based on research by: | ||
24 | * Rene Becker | ||
25 | * Thimo Emmerich | ||
26 | */ | ||
27 | |||
28 | #include "SDL_config.h" | ||
29 | |||
30 | #include "../../events/SDL_sysevents.h" | ||
31 | #include "../../events/SDL_events_c.h" | ||
32 | #include "SDL_ps3video.h" | ||
33 | #include "SDL_ps3events_c.h" | ||
34 | |||
35 | void PS3_PumpEvents(_THIS) | ||
36 | { | ||
37 | return; | ||
38 | } | ||
39 | |||
40 | void PS3_InitOSKeymap(_THIS) | ||
41 | { | ||
42 | return; | ||
43 | } | ||
44 | |||
diff --git a/apps/plugins/sdl/src/video/ps3/SDL_ps3events_c.h b/apps/plugins/sdl/src/video/ps3/SDL_ps3events_c.h new file mode 100644 index 0000000000..fd11209af1 --- /dev/null +++ b/apps/plugins/sdl/src/video/ps3/SDL_ps3events_c.h | |||
@@ -0,0 +1,41 @@ | |||
1 | /* | ||
2 | * SDL - Simple DirectMedia Layer | ||
3 | * CELL BE Support for PS3 Framebuffer | ||
4 | * Copyright (C) 2008, 2009 International Business Machines Corporation | ||
5 | * | ||
6 | * This library is free software; you can redistribute it and/or modify it | ||
7 | * under the terms of the GNU Lesser General Public License as published | ||
8 | * by the Free Software Foundation; either version 2.1 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This library is distributed in the hope that it will be useful, but | ||
12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
14 | * Lesser General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU Lesser General Public | ||
17 | * License along with this library; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 | ||
19 | * USA | ||
20 | * | ||
21 | * Martin Lowinski <lowinski [at] de [dot] ibm [ibm] com> | ||
22 | * Dirk Herrendoerfer <d.herrendoerfer [at] de [dot] ibm [dot] com> | ||
23 | * SPE code based on research by: | ||
24 | * Rene Becker | ||
25 | * Thimo Emmerich | ||
26 | */ | ||
27 | |||
28 | #include "SDL_config.h" | ||
29 | |||
30 | #ifndef _SDL_ps3events_h | ||
31 | #define _SDL_ps3events_h | ||
32 | |||
33 | #include "SDL_ps3video.h" | ||
34 | |||
35 | extern void PS3_InitOSKeymap(_THIS); | ||
36 | extern void PS3_PumpEvents(_THIS); | ||
37 | |||
38 | extern void enable_cursor(int enable); | ||
39 | |||
40 | #endif /* _SDL_ps3events_h */ | ||
41 | |||
diff --git a/apps/plugins/sdl/src/video/ps3/SDL_ps3video.c b/apps/plugins/sdl/src/video/ps3/SDL_ps3video.c new file mode 100644 index 0000000000..d5519e051e --- /dev/null +++ b/apps/plugins/sdl/src/video/ps3/SDL_ps3video.c | |||
@@ -0,0 +1,621 @@ | |||
1 | /* | ||
2 | * SDL - Simple DirectMedia Layer | ||
3 | * CELL BE Support for PS3 Framebuffer | ||
4 | * Copyright (C) 2008, 2009 International Business Machines Corporation | ||
5 | * | ||
6 | * This library is free software; you can redistribute it and/or modify it | ||
7 | * under the terms of the GNU Lesser General Public License as published | ||
8 | * by the Free Software Foundation; either version 2.1 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This library is distributed in the hope that it will be useful, but | ||
12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
14 | * Lesser General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU Lesser General Public | ||
17 | * License along with this library; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 | ||
19 | * USA | ||
20 | * | ||
21 | * Martin Lowinski <lowinski [at] de [dot] ibm [ibm] com> | ||
22 | * Dirk Herrendoerfer <d.herrendoerfer [at] de [dot] ibm [dot] com> | ||
23 | * SPE code based on research by: | ||
24 | * Rene Becker | ||
25 | * Thimo Emmerich | ||
26 | */ | ||
27 | |||
28 | #include "SDL_config.h" | ||
29 | |||
30 | #include "SDL_video.h" | ||
31 | #include "../SDL_sysvideo.h" | ||
32 | #include "SDL_ps3events_c.h" | ||
33 | #include "SDL_ps3video.h" | ||
34 | #include "SDL_ps3yuv_c.h" | ||
35 | #include "spulibs/spu_common.h" | ||
36 | |||
37 | #include <fcntl.h> | ||
38 | #include <stdlib.h> | ||
39 | #include <sys/ioctl.h> | ||
40 | #include <linux/kd.h> | ||
41 | #include <sys/mman.h> | ||
42 | |||
43 | #include <linux/fb.h> | ||
44 | #include <asm/ps3fb.h> | ||
45 | #include <libspe2.h> | ||
46 | #include <malloc.h> | ||
47 | |||
48 | /* SDL_VideoDevice functions */ | ||
49 | static int PS3_Available(); | ||
50 | static SDL_VideoDevice *PS3_CreateDevice(int devindex); | ||
51 | static int PS3_VideoInit(_THIS, SDL_PixelFormat * vformat); | ||
52 | static void PS3_VideoQuit(_THIS); | ||
53 | static void PS3_DeleteDevice(SDL_VideoDevice * device); | ||
54 | static SDL_Surface *PS3_SetVideoMode(_THIS, SDL_Surface * current, int width, int height, int bpp, Uint32 flags); | ||
55 | static SDL_Rect **PS3_ListModes(_THIS, SDL_PixelFormat * format, Uint32 flags); | ||
56 | |||
57 | /* Hardware surface functions */ | ||
58 | static int PS3_AllocHWSurface(_THIS, SDL_Surface * surface); | ||
59 | static void PS3_FreeHWSurface(_THIS, SDL_Surface * surface); | ||
60 | static int PS3_LockHWSurface(_THIS, SDL_Surface * surface); | ||
61 | static void PS3_UnlockHWSurface(_THIS, SDL_Surface * surface); | ||
62 | static int PS3_FlipDoubleBuffer(_THIS, SDL_Surface * surface); | ||
63 | static void PS3_DoubleBufferUpdate(_THIS, int numrects, SDL_Rect * rects); | ||
64 | |||
65 | /* SPU specific functions */ | ||
66 | int SPE_Start(_THIS, spu_data_t * spe_data); | ||
67 | int SPE_Stop(_THIS, spu_data_t * spe_data); | ||
68 | int SPE_Boot(_THIS, spu_data_t * spe_data); | ||
69 | int SPE_Shutdown(_THIS, spu_data_t * spe_data); | ||
70 | int SPE_SendMsg(_THIS, spu_data_t * spe_data, unsigned int msg); | ||
71 | int SPE_WaitForMsg(_THIS, spu_data_t * spe_data, unsigned int msg); | ||
72 | void SPE_RunContext(void *thread_argp); | ||
73 | |||
74 | /* Helpers */ | ||
75 | void enable_cursor(int enable); | ||
76 | |||
77 | /* Stores the SPE executable name of fb_writer_spu */ | ||
78 | extern spe_program_handle_t fb_writer_spu; | ||
79 | |||
80 | /* SDL PS3 bootstrap function for checking availability */ | ||
81 | static int PS3_Available() | ||
82 | { | ||
83 | return 1; | ||
84 | } | ||
85 | |||
86 | /* SDL PS3 bootstrap function for creating the device */ | ||
87 | static SDL_VideoDevice *PS3_CreateDevice(int devindex) | ||
88 | { | ||
89 | SDL_VideoDevice *this; | ||
90 | |||
91 | /* Initialise SDL_VideoDevice */ | ||
92 | this = (SDL_VideoDevice *) SDL_malloc(sizeof(SDL_VideoDevice)); | ||
93 | if (this) { | ||
94 | memset(this, 0, sizeof *this); | ||
95 | this->hidden = (struct SDL_PrivateVideoData *) | ||
96 | SDL_malloc(sizeof(struct SDL_PrivateVideoData)); | ||
97 | } | ||
98 | /* Error handling */ | ||
99 | if ((this == NULL) || (this->hidden == NULL)) { | ||
100 | SDL_OutOfMemory(); | ||
101 | if (this) | ||
102 | SDL_free(this); | ||
103 | return 0; | ||
104 | } | ||
105 | memset(this->hidden, 0, sizeof(struct SDL_PrivateVideoData)); | ||
106 | |||
107 | /* Set the function pointers */ | ||
108 | this->VideoInit = PS3_VideoInit; | ||
109 | this->ListModes = PS3_ListModes; | ||
110 | this->SetVideoMode = PS3_SetVideoMode; | ||
111 | this->SetColors = 0; | ||
112 | this->CreateYUVOverlay = PS3_CreateYUVOverlay; | ||
113 | this->UpdateRects = 0; | ||
114 | this->VideoQuit = PS3_VideoQuit; | ||
115 | this->AllocHWSurface = PS3_AllocHWSurface; | ||
116 | this->CheckHWBlit = 0; | ||
117 | this->FillHWRect = 0; | ||
118 | this->SetHWColorKey = 0; | ||
119 | this->SetHWAlpha = 0; | ||
120 | this->LockHWSurface = PS3_LockHWSurface; | ||
121 | this->UnlockHWSurface = PS3_UnlockHWSurface; | ||
122 | this->FlipHWSurface = PS3_FlipDoubleBuffer; | ||
123 | this->FreeHWSurface = PS3_FreeHWSurface; | ||
124 | this->SetCaption = 0; | ||
125 | this->SetIcon = 0; | ||
126 | this->IconifyWindow = 0; | ||
127 | this->GrabInput = 0; | ||
128 | this->GetWMInfo = 0; | ||
129 | this->InitOSKeymap = PS3_InitOSKeymap; | ||
130 | this->PumpEvents = PS3_PumpEvents; | ||
131 | |||
132 | this->free = PS3_DeleteDevice; | ||
133 | |||
134 | return this; | ||
135 | } | ||
136 | |||
137 | |||
138 | /* Bootstraping (see SDL_sysvideo.h) */ | ||
139 | VideoBootStrap PS3_bootstrap = { | ||
140 | "ps3", "PS3 Cell SPU Driver", | ||
141 | PS3_Available, PS3_CreateDevice | ||
142 | }; | ||
143 | |||
144 | |||
145 | /* Delete the device */ | ||
146 | static void PS3_DeleteDevice(SDL_VideoDevice * device) | ||
147 | { | ||
148 | free(device->hidden); | ||
149 | free(device); | ||
150 | } | ||
151 | |||
152 | |||
153 | /* Initialise the PS3 video device */ | ||
154 | static int PS3_VideoInit(_THIS, SDL_PixelFormat * vformat) | ||
155 | { | ||
156 | /* Hide the cursor */ | ||
157 | enable_cursor(0); | ||
158 | |||
159 | /* Create SPU fb_parms and thread structure */ | ||
160 | fb_parms = (struct fb_writer_parms_t *) | ||
161 | memalign(16, sizeof(struct fb_writer_parms_t)); | ||
162 | fb_thread_data = (spu_data_t *) malloc(sizeof(spu_data_t)); | ||
163 | if (fb_parms == NULL || fb_thread_data == NULL) { | ||
164 | SDL_OutOfMemory(); | ||
165 | return -1; | ||
166 | } | ||
167 | fb_thread_data->program = fb_writer_spu; | ||
168 | fb_thread_data->program_name = "fb_writer_spu"; | ||
169 | fb_thread_data->argp = (void *)fb_parms; | ||
170 | fb_thread_data->keepalive = 1; | ||
171 | fb_thread_data->booted = 0; | ||
172 | |||
173 | SPE_Start(this, fb_thread_data); | ||
174 | |||
175 | /* Open the device */ | ||
176 | fb_dev_fd = open(PS3_DEV_FB, O_RDWR); | ||
177 | if (fb_dev_fd < 0) { | ||
178 | SDL_SetError("[PS3] Unable to open device %s", PS3_DEV_FB); | ||
179 | return -1; | ||
180 | } | ||
181 | |||
182 | /* Get vscreeninfo */ | ||
183 | if (ioctl(fb_dev_fd, FBIOGET_VSCREENINFO, &fb_vinfo)) { | ||
184 | SDL_SetError("[PS3] Can't get VSCREENINFO"); | ||
185 | if (fb_dev_fd >= 0) | ||
186 | close(fb_dev_fd); | ||
187 | fb_dev_fd = -1; | ||
188 | return -1; | ||
189 | } | ||
190 | |||
191 | /* Fill in our hardware acceleration capabilities */ | ||
192 | this->info.current_w = fb_vinfo.xres; | ||
193 | this->info.current_h = fb_vinfo.yres; | ||
194 | this->info.wm_available = 0; | ||
195 | this->info.hw_available = 1; | ||
196 | |||
197 | /* Backup the original vinfo to restore later */ | ||
198 | fb_orig_vinfo = fb_vinfo; | ||
199 | |||
200 | /* 16 and 15 bpp is reported as 16 bpp */ | ||
201 | fb_bits_per_pixel = fb_vinfo.bits_per_pixel; | ||
202 | if (fb_bits_per_pixel == 16) | ||
203 | fb_bits_per_pixel = | ||
204 | fb_vinfo.red.length + fb_vinfo.green.length + | ||
205 | fb_vinfo.blue.length; | ||
206 | |||
207 | /* Set SDL_PixelFormat */ | ||
208 | vformat->BitsPerPixel = fb_vinfo.bits_per_pixel; | ||
209 | |||
210 | fb_vinfo.xres_virtual = fb_vinfo.xres; | ||
211 | fb_vinfo.yres_virtual = fb_vinfo.yres; | ||
212 | |||
213 | /* Put vscreeninfo */ | ||
214 | if (ioctl(fb_dev_fd, FBIOPUT_VSCREENINFO, &fb_vinfo)) { | ||
215 | SDL_SetError("[PS3] Can't put VSCREENINFO"); | ||
216 | if (fb_dev_fd >= 0) | ||
217 | close(fb_dev_fd); | ||
218 | fb_dev_fd = -1; | ||
219 | return -1; | ||
220 | } | ||
221 | |||
222 | s_fb_pixel_size = fb_vinfo.bits_per_pixel / 8; | ||
223 | |||
224 | s_writeable_width = fb_vinfo.xres; | ||
225 | s_writeable_height = fb_vinfo.yres; | ||
226 | |||
227 | /* Get ps3 screeninfo */ | ||
228 | if (ioctl(fb_dev_fd, PS3FB_IOCTL_SCREENINFO, (unsigned long)&res) < 0) { | ||
229 | SDL_SetError("[PS3] PS3FB_IOCTL_SCREENINFO failed"); | ||
230 | } | ||
231 | deprintf(1, "[PS3] xres:%d yres:%d xoff:%d yoff:%d\n", res.xres, res.yres, res.xoff, res.yoff); | ||
232 | |||
233 | /* Only use double buffering if enough fb memory is available */ | ||
234 | if (res.num_frames < 2) { | ||
235 | double_buffering = 0; | ||
236 | } else { | ||
237 | double_buffering = 1; | ||
238 | } | ||
239 | |||
240 | real_width = res.xres; | ||
241 | real_height = res.yres; | ||
242 | |||
243 | /* | ||
244 | * Take control of frame buffer from kernel, for details see | ||
245 | * http://felter.org/wesley/files/ps3/linux-20061110-docs/ApplicationProgrammingEnvironment.html | ||
246 | * kernel will no longer flip the screen itself | ||
247 | */ | ||
248 | ioctl(fb_dev_fd, PS3FB_IOCTL_ON, 0); | ||
249 | |||
250 | /* Unblank screen */ | ||
251 | ioctl(fb_dev_fd, FBIOBLANK, 0); | ||
252 | |||
253 | return 0; | ||
254 | } | ||
255 | |||
256 | |||
257 | /* List available PS3 resolutions */ | ||
258 | static SDL_Rect **PS3_ListModes(_THIS, SDL_PixelFormat * format, Uint32 flags) | ||
259 | { | ||
260 | /* A list of video resolutions that we query for (sorted largest to | ||
261 | * smallest) | ||
262 | */ | ||
263 | static SDL_Rect PS3_resolutions[] = { | ||
264 | {0, 0, 1920, 1080}, // 1080p 16:9 HD | ||
265 | {0, 0, 1600, 1200}, // WUXGA | ||
266 | {0, 0, 1280, 1024}, // SXGA | ||
267 | {0, 0, 1280, 720}, // 720p 16:9 HD | ||
268 | {0, 0, 1024, 768}, // WXGA | ||
269 | {0, 0, 1024, 576}, // 576p 16:9 | ||
270 | {0, 0, 853, 480}, // 480p 16:9 | ||
271 | {0, 0, 720, 576}, // 576p 4:3 (PAL) | ||
272 | {0, 0, 720, 480}, // 480p 16:9 (NTSC) | ||
273 | }; | ||
274 | static SDL_Rect *PS3_modes[] = { | ||
275 | &PS3_resolutions[0], | ||
276 | &PS3_resolutions[1], | ||
277 | &PS3_resolutions[2], | ||
278 | &PS3_resolutions[3], | ||
279 | &PS3_resolutions[4], | ||
280 | &PS3_resolutions[5], | ||
281 | &PS3_resolutions[6], | ||
282 | &PS3_resolutions[7], | ||
283 | &PS3_resolutions[8], | ||
284 | NULL | ||
285 | }; | ||
286 | SDL_Rect **modes = PS3_modes; | ||
287 | |||
288 | return modes; | ||
289 | } | ||
290 | |||
291 | |||
292 | /* Get a list of the available display modes */ | ||
293 | static SDL_Surface *PS3_SetVideoMode(_THIS, SDL_Surface * current, int width, int height, int bpp, Uint32 flags) | ||
294 | { | ||
295 | s_bounded_input_width = width < s_writeable_width ? width : s_writeable_width; | ||
296 | s_bounded_input_height = height < s_writeable_height ? height : s_writeable_height; | ||
297 | s_bounded_input_width_offset = (s_writeable_width - s_bounded_input_width) >> 1; | ||
298 | s_bounded_input_height_offset = (s_writeable_height - s_bounded_input_height) >> 1; | ||
299 | s_input_line_length = width * s_fb_pixel_size; | ||
300 | |||
301 | current->flags |= flags; | ||
302 | |||
303 | if (ioctl(fb_dev_fd, FBIOGET_FSCREENINFO, &fb_finfo)) { | ||
304 | SDL_SetError("[PS3] Can't get fixed screeninfo"); | ||
305 | return NULL; | ||
306 | } | ||
307 | |||
308 | if (fb_finfo.type != FB_TYPE_PACKED_PIXELS) { | ||
309 | SDL_SetError("[PS3] type %s not supported", | ||
310 | fb_finfo.type); | ||
311 | return NULL; | ||
312 | } | ||
313 | |||
314 | /* Note: on PS3, fb_finfo.smem_len is enough for double buffering */ | ||
315 | if ((frame_buffer = | ||
316 | (uint8_t *) mmap(0, fb_finfo.smem_len, | ||
317 | PROT_READ | PROT_WRITE, MAP_SHARED, | ||
318 | fb_dev_fd, 0)) == (uint8_t *) - 1) { | ||
319 | SDL_SetError("[PS3] Can't mmap for %s", PS3_DEV_FB); | ||
320 | return NULL; | ||
321 | } else { | ||
322 | current->flags |= SDL_DOUBLEBUF; | ||
323 | } | ||
324 | if (!SDL_ReallocFormat(current, fb_bits_per_pixel, 0, 0, 0, 0)) { | ||
325 | return (NULL); | ||
326 | } | ||
327 | |||
328 | /* Blank screen */ | ||
329 | memset(frame_buffer, 0x00, fb_finfo.smem_len); | ||
330 | |||
331 | /* Centering */ | ||
332 | s_center[0] = | ||
333 | frame_buffer + s_bounded_input_width_offset * s_fb_pixel_size + | ||
334 | s_bounded_input_height_offset * fb_finfo.line_length; | ||
335 | s_center[1] = s_center[0] + real_height * fb_finfo.line_length; | ||
336 | s_center_index = 0; | ||
337 | |||
338 | current->flags |= SDL_FULLSCREEN; | ||
339 | current->w = width; | ||
340 | current->h = height; | ||
341 | current->pitch = SDL_CalculatePitch(current); | ||
342 | |||
343 | /* Alloc aligned mem for current->pixels */ | ||
344 | s_pixels = memalign(16, current->h * current->pitch); | ||
345 | current->pixels = (void *)s_pixels; | ||
346 | if (!current->pixels) { | ||
347 | SDL_OutOfMemory(); | ||
348 | return NULL; | ||
349 | } | ||
350 | |||
351 | /* Set the update rectangle function */ | ||
352 | this->UpdateRects = PS3_DoubleBufferUpdate; | ||
353 | |||
354 | return current; | ||
355 | } | ||
356 | |||
357 | |||
358 | /* Copy screen to framebuffer and flip */ | ||
359 | void PS3_DoubleBufferUpdate(_THIS, int numrects, SDL_Rect * rects) | ||
360 | { | ||
361 | if (converter_thread_data && converter_thread_data->booted) | ||
362 | SPE_WaitForMsg(this, converter_thread_data, SPU_FIN); | ||
363 | |||
364 | /* Adjust centering */ | ||
365 | s_bounded_input_width_offset = (s_writeable_width - s_bounded_input_width) >> 1; | ||
366 | s_bounded_input_height_offset = (s_writeable_height - s_bounded_input_height) >> 1; | ||
367 | s_center[0] = frame_buffer + s_bounded_input_width_offset * s_fb_pixel_size + | ||
368 | s_bounded_input_height_offset * fb_finfo.line_length; | ||
369 | s_center[1] = s_center[0] + real_height * fb_finfo.line_length; | ||
370 | |||
371 | /* Set SPU parms for copying the surface to framebuffer */ | ||
372 | fb_parms->data = (unsigned char *)s_pixels; | ||
373 | fb_parms->center = s_center[s_center_index]; | ||
374 | fb_parms->out_line_stride = fb_finfo.line_length; | ||
375 | fb_parms->in_line_stride = s_input_line_length; | ||
376 | fb_parms->bounded_input_height = s_bounded_input_height; | ||
377 | fb_parms->bounded_input_width = s_bounded_input_width; | ||
378 | fb_parms->fb_pixel_size = s_fb_pixel_size; | ||
379 | |||
380 | deprintf(3, "[PS3->SPU] fb_thread_data->argp = 0x%x\n", fb_thread_data->argp); | ||
381 | |||
382 | /* Copying.. */ | ||
383 | SPE_SendMsg(this, fb_thread_data, SPU_START); | ||
384 | SPE_SendMsg(this, fb_thread_data, (unsigned int)fb_thread_data->argp); | ||
385 | |||
386 | SPE_WaitForMsg(this, fb_thread_data, SPU_FIN); | ||
387 | |||
388 | /* Flip the pages */ | ||
389 | if (double_buffering) | ||
390 | s_center_index = s_center_index ^ 0x01; | ||
391 | PS3_FlipDoubleBuffer(this, this->screen); | ||
392 | } | ||
393 | |||
394 | |||
395 | /* Enable/Disable cursor */ | ||
396 | void enable_cursor(int enable) | ||
397 | { | ||
398 | int fd = open("/dev/console", O_RDWR | O_NONBLOCK); | ||
399 | if (fd >= 0) { | ||
400 | ioctl(fd, KDSETMODE, enable ? KD_TEXT : KD_GRAPHICS); | ||
401 | close(fd); | ||
402 | } | ||
403 | } | ||
404 | |||
405 | |||
406 | static int PS3_AllocHWSurface(_THIS, SDL_Surface * surface) | ||
407 | { | ||
408 | return -1; | ||
409 | } | ||
410 | |||
411 | |||
412 | static void PS3_FreeHWSurface(_THIS, SDL_Surface * surface) | ||
413 | { | ||
414 | return; | ||
415 | } | ||
416 | |||
417 | |||
418 | static int PS3_LockHWSurface(_THIS, SDL_Surface * surface) | ||
419 | { | ||
420 | return 0; | ||
421 | } | ||
422 | |||
423 | |||
424 | static void PS3_UnlockHWSurface(_THIS, SDL_Surface * surface) | ||
425 | { | ||
426 | return; | ||
427 | } | ||
428 | |||
429 | |||
430 | /* Blit/Flip buffer to the screen. Must be called after each frame! */ | ||
431 | int PS3_FlipDoubleBuffer(_THIS, SDL_Surface * surface) | ||
432 | { | ||
433 | unsigned long crt = 0; | ||
434 | /* Wait for vsync */ | ||
435 | deprintf(1, "[PS3] Wait for vsync\n"); | ||
436 | ioctl(fb_dev_fd, FBIO_WAITFORVSYNC, &crt); | ||
437 | /* Page flip */ | ||
438 | deprintf(1, "[PS3] Page flip to buffer #%u 0x%x\n", s_center_index, s_center[s_center_index]); | ||
439 | ioctl(fb_dev_fd, PS3FB_IOCTL_FSEL, (unsigned long)&s_center_index); | ||
440 | return 1; | ||
441 | } | ||
442 | |||
443 | |||
444 | /* Start the SPE thread */ | ||
445 | int SPE_Start(_THIS, spu_data_t * spe_data) | ||
446 | { | ||
447 | deprintf(2, "[PS3->SPU] Start SPE: %s\n", spe_data->program_name); | ||
448 | if (!(spe_data->booted)) | ||
449 | SPE_Boot(this, spe_data); | ||
450 | |||
451 | /* To allow re-running of context, spe_ctx_entry has to be set before each call */ | ||
452 | spe_data->entry = SPE_DEFAULT_ENTRY; | ||
453 | spe_data->error_code = 0; | ||
454 | |||
455 | /* Create SPE thread and run */ | ||
456 | deprintf(2, "[PS3->SPU] Create Thread: %s\n", spe_data->program_name); | ||
457 | if (pthread_create | ||
458 | (&spe_data->thread, NULL, (void *)&SPE_RunContext, (void *)spe_data)) { | ||
459 | deprintf(2, "[PS3->SPU] Could not create pthread for spe: %s\n", spe_data->program_name); | ||
460 | SDL_SetError("[PS3->SPU] Could not create pthread for spe"); | ||
461 | return -1; | ||
462 | } | ||
463 | |||
464 | if (spe_data->keepalive) | ||
465 | SPE_WaitForMsg(this, spe_data, SPU_READY); | ||
466 | } | ||
467 | |||
468 | |||
469 | /* Stop the SPE thread */ | ||
470 | int SPE_Stop(_THIS, spu_data_t * spe_data) | ||
471 | { | ||
472 | deprintf(2, "[PS3->SPU] Stop SPE: %s\n", spe_data->program_name); | ||
473 | /* Wait for SPE thread to complete */ | ||
474 | deprintf(2, "[PS3->SPU] Wait for SPE thread to complete: %s\n", spe_data->program_name); | ||
475 | if (pthread_join(spe_data->thread, NULL)) { | ||
476 | deprintf(2, "[PS3->SPU] Failed joining the thread: %s\n", spe_data->program_name); | ||
477 | SDL_SetError("[PS3->SPU] Failed joining the thread"); | ||
478 | return -1; | ||
479 | } | ||
480 | |||
481 | return 0; | ||
482 | } | ||
483 | |||
484 | |||
485 | /* Create SPE context and load program */ | ||
486 | int SPE_Boot(_THIS, spu_data_t * spe_data) | ||
487 | { | ||
488 | /* Create SPE context */ | ||
489 | deprintf(2, "[PS3->SPU] Create SPE Context: %s\n", spe_data->program_name); | ||
490 | spe_data->ctx = spe_context_create(0, NULL); | ||
491 | if (spe_data->ctx == NULL) { | ||
492 | deprintf(2, "[PS3->SPU] Failed creating SPE context: %s\n", spe_data->program_name); | ||
493 | SDL_SetError("[PS3->SPU] Failed creating SPE context"); | ||
494 | return -1; | ||
495 | } | ||
496 | |||
497 | /* Load SPE object into SPE local store */ | ||
498 | deprintf(2, "[PS3->SPU] Load Program into SPE: %s\n", spe_data->program_name); | ||
499 | if (spe_program_load(spe_data->ctx, &spe_data->program)) { | ||
500 | deprintf(2, "[PS3->SPU] Failed loading program into SPE context: %s\n", spe_data->program_name); | ||
501 | SDL_SetError | ||
502 | ("[PS3->SPU] Failed loading program into SPE context"); | ||
503 | return -1; | ||
504 | } | ||
505 | spe_data->booted = 1; | ||
506 | deprintf(2, "[PS3->SPU] SPE boot successful\n"); | ||
507 | |||
508 | return 0; | ||
509 | } | ||
510 | |||
511 | /* (Stop and) shutdown the SPE */ | ||
512 | int SPE_Shutdown(_THIS, spu_data_t * spe_data) | ||
513 | { | ||
514 | if (spe_data->keepalive && spe_data->booted) { | ||
515 | SPE_SendMsg(this, spe_data, SPU_EXIT); | ||
516 | SPE_Stop(this, spe_data); | ||
517 | } | ||
518 | |||
519 | /* Destroy SPE context */ | ||
520 | deprintf(2, "[PS3->SPU] Destroy SPE context: %s\n", spe_data->program_name); | ||
521 | if (spe_context_destroy(spe_data->ctx)) { | ||
522 | deprintf(2, "[PS3->SPU] Failed destroying context: %s\n", spe_data->program_name); | ||
523 | SDL_SetError("[PS3->SPU] Failed destroying context"); | ||
524 | return -1; | ||
525 | } | ||
526 | deprintf(2, "[PS3->SPU] SPE shutdown successful: %s\n", spe_data->program_name); | ||
527 | return 0; | ||
528 | } | ||
529 | |||
530 | |||
531 | /* Send message to the SPE via mailboxe */ | ||
532 | int SPE_SendMsg(_THIS, spu_data_t * spe_data, unsigned int msg) | ||
533 | { | ||
534 | deprintf(2, "[PS3->SPU] Sending message %u to %s\n", msg, spe_data->program_name); | ||
535 | /* Send one message, block until message was sent */ | ||
536 | unsigned int spe_in_mbox_msgs[1]; | ||
537 | spe_in_mbox_msgs[0] = msg; | ||
538 | int in_mbox_write = spe_in_mbox_write(spe_data->ctx, spe_in_mbox_msgs, 1, SPE_MBOX_ALL_BLOCKING); | ||
539 | |||
540 | if (1 > in_mbox_write) { | ||
541 | deprintf(2, "[PS3->SPU] No message could be written to %s\n", spe_data->program_name); | ||
542 | SDL_SetError("[PS3->SPU] No message could be written"); | ||
543 | return -1; | ||
544 | } | ||
545 | return 0; | ||
546 | } | ||
547 | |||
548 | |||
549 | /* Read 1 message from SPE, block until at least 1 message was received */ | ||
550 | int SPE_WaitForMsg(_THIS, spu_data_t * spe_data, unsigned int msg) | ||
551 | { | ||
552 | deprintf(2, "[PS3->SPU] Waiting for message from %s\n", spe_data->program_name); | ||
553 | unsigned int out_messages[1]; | ||
554 | while (!spe_out_mbox_status(spe_data->ctx)); | ||
555 | int mbox_read = spe_out_mbox_read(spe_data->ctx, out_messages, 1); | ||
556 | deprintf(2, "[PS3->SPU] Got message from %s, message was %u\n", spe_data->program_name, out_messages[0]); | ||
557 | if (out_messages[0] == msg) | ||
558 | return 0; | ||
559 | else | ||
560 | return -1; | ||
561 | } | ||
562 | |||
563 | |||
564 | /* Re-runnable invocation of the spe_context_run call */ | ||
565 | void SPE_RunContext(void *thread_argp) | ||
566 | { | ||
567 | /* argp is the pointer to argument to be passed to the SPE program */ | ||
568 | spu_data_t *args = (spu_data_t *) thread_argp; | ||
569 | deprintf(3, "[PS3->SPU] void* argp=0x%x\n", (unsigned int)args->argp); | ||
570 | |||
571 | /* Run it.. */ | ||
572 | deprintf(2, "[PS3->SPU] Run SPE program: %s\n", args->program_name); | ||
573 | if (spe_context_run | ||
574 | (args->ctx, &args->entry, 0, (void *)args->argp, NULL, | ||
575 | NULL) < 0) { | ||
576 | deprintf(2, "[PS3->SPU] Failed running SPE context: %s\n", args->program_name); | ||
577 | SDL_SetError("[PS3->SPU] Failed running SPE context: %s", args->program_name); | ||
578 | exit(1); | ||
579 | } | ||
580 | |||
581 | pthread_exit(NULL); | ||
582 | } | ||
583 | |||
584 | |||
585 | /* Quits the video driver */ | ||
586 | static void PS3_VideoQuit(_THIS) | ||
587 | { | ||
588 | if (fb_dev_fd > 0) { | ||
589 | /* Restore the original video mode */ | ||
590 | if (ioctl(fb_dev_fd, FBIOPUT_VSCREENINFO, &fb_orig_vinfo)) | ||
591 | SDL_SetError("[PS3] Can't restore original fb_var_screeninfo"); | ||
592 | |||
593 | /* Give control of frame buffer to kernel */ | ||
594 | ioctl(fb_dev_fd, PS3FB_IOCTL_OFF, 0); | ||
595 | close(fb_dev_fd); | ||
596 | fb_dev_fd = -1; | ||
597 | } | ||
598 | |||
599 | if (frame_buffer) { | ||
600 | munmap(frame_buffer, fb_finfo.smem_len); | ||
601 | frame_buffer = 0; | ||
602 | } | ||
603 | |||
604 | if (fb_parms) | ||
605 | free((void *)fb_parms); | ||
606 | if (fb_thread_data) { | ||
607 | SPE_Shutdown(this, fb_thread_data); | ||
608 | free((void *)fb_thread_data); | ||
609 | } | ||
610 | |||
611 | if (this->screen) { | ||
612 | if (double_buffering && this->screen->pixels) { | ||
613 | free(this->screen->pixels); | ||
614 | } | ||
615 | this->screen->pixels = NULL; | ||
616 | } | ||
617 | |||
618 | enable_cursor(1); | ||
619 | deprintf(1, "[PS3] VideoQuit\n"); | ||
620 | } | ||
621 | |||
diff --git a/apps/plugins/sdl/src/video/ps3/SDL_ps3video.h b/apps/plugins/sdl/src/video/ps3/SDL_ps3video.h new file mode 100644 index 0000000000..4fe5a2b42b --- /dev/null +++ b/apps/plugins/sdl/src/video/ps3/SDL_ps3video.h | |||
@@ -0,0 +1,165 @@ | |||
1 | /* | ||
2 | * SDL - Simple DirectMedia Layer | ||
3 | * CELL BE Support for PS3 Framebuffer | ||
4 | * Copyright (C) 2008, 2009 International Business Machines Corporation | ||
5 | * | ||
6 | * This library is free software; you can redistribute it and/or modify it | ||
7 | * under the terms of the GNU Lesser General Public License as published | ||
8 | * by the Free Software Foundation; either version 2.1 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This library is distributed in the hope that it will be useful, but | ||
12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
14 | * Lesser General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU Lesser General Public | ||
17 | * License along with this library; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 | ||
19 | * USA | ||
20 | * | ||
21 | * Martin Lowinski <lowinski [at] de [dot] ibm [ibm] com> | ||
22 | * Dirk Herrendoerfer <d.herrendoerfer [at] de [dot] ibm [dot] com> | ||
23 | * SPE code based on research by: | ||
24 | * Rene Becker | ||
25 | * Thimo Emmerich | ||
26 | */ | ||
27 | |||
28 | #include "SDL_config.h" | ||
29 | #include "../SDL_sysvideo.h" | ||
30 | #include "SDL_mouse.h" | ||
31 | #include "SDL_mutex.h" | ||
32 | #include "spulibs/spu_common.h" | ||
33 | |||
34 | #include <libspe2.h> | ||
35 | #include <pthread.h> | ||
36 | #include <linux/types.h> | ||
37 | #include <linux/fb.h> | ||
38 | #include <asm/ps3fb.h> | ||
39 | #include <linux/vt.h> | ||
40 | #include <termios.h> | ||
41 | |||
42 | #ifndef _SDL_ps3video_h | ||
43 | #define _SDL_ps3video_h | ||
44 | |||
45 | /* Debugging | ||
46 | * 0: No debug messages | ||
47 | * 1: Video debug messages | ||
48 | * 2: SPE debug messages | ||
49 | * 3: Memory adresses | ||
50 | */ | ||
51 | #define DEBUG_LEVEL 0 | ||
52 | |||
53 | #ifdef DEBUG_LEVEL | ||
54 | #define deprintf( level, fmt, args... ) \ | ||
55 | do \ | ||
56 | { \ | ||
57 | if ( (unsigned)(level) <= DEBUG_LEVEL ) \ | ||
58 | { \ | ||
59 | fprintf( stdout, fmt, ##args ); \ | ||
60 | fflush( stdout ); \ | ||
61 | } \ | ||
62 | } while ( 0 ) | ||
63 | #else | ||
64 | #define deprintf( level, fmt, args... ) | ||
65 | #endif | ||
66 | |||
67 | /* Framebuffer device */ | ||
68 | #define PS3_DEV_FB "/dev/fb0" | ||
69 | |||
70 | /* Hidden "this" pointer for the video functions */ | ||
71 | #define _THIS SDL_VideoDevice * this | ||
72 | |||
73 | /* SPU thread data */ | ||
74 | typedef struct spu_data { | ||
75 | spe_context_ptr_t ctx; | ||
76 | pthread_t thread; | ||
77 | spe_program_handle_t program; | ||
78 | char * program_name; | ||
79 | unsigned int booted; | ||
80 | unsigned int keepalive; | ||
81 | unsigned int entry; | ||
82 | int error_code; | ||
83 | void * argp; | ||
84 | } spu_data_t; | ||
85 | |||
86 | /* Private video driver data needed for Cell support */ | ||
87 | struct SDL_PrivateVideoData | ||
88 | { | ||
89 | const char * const fb_dev_name; /* FB-device name */ | ||
90 | int fb_dev_fd; /* Descriptor-handle for fb_dev_name */ | ||
91 | uint8_t * frame_buffer; /* mmap'd access to fbdev */ | ||
92 | |||
93 | /* SPE threading stuff */ | ||
94 | spu_data_t * fb_thread_data; | ||
95 | spu_data_t * scaler_thread_data; | ||
96 | spu_data_t * converter_thread_data; | ||
97 | |||
98 | /* screeninfo (from linux/fb.h) */ | ||
99 | struct fb_fix_screeninfo fb_finfo; | ||
100 | struct fb_var_screeninfo fb_vinfo; | ||
101 | struct fb_var_screeninfo fb_orig_vinfo; | ||
102 | |||
103 | /* screeninfo (from asm/ps3fb.h) */ | ||
104 | struct ps3fb_ioctl_res res; | ||
105 | |||
106 | unsigned int double_buffering; | ||
107 | uint32_t real_width; // real width of screen | ||
108 | uint32_t real_height; // real height of screen | ||
109 | |||
110 | uint32_t s_fb_pixel_size; // 32: 4 24: 3 16: 2 15: 2 | ||
111 | uint32_t fb_bits_per_pixel; // 32: 32 24: 24 16: 16 15: 15 | ||
112 | |||
113 | uint32_t config_count; | ||
114 | |||
115 | uint32_t s_input_line_length; // precalculated: input_width * fb_pixel_size | ||
116 | uint32_t s_bounded_input_width; // width of input (bounded by writeable width) | ||
117 | uint32_t s_bounded_input_height;// height of input (bounded by writeable height) | ||
118 | uint32_t s_bounded_input_width_offset; // offset from the left side (used for centering) | ||
119 | uint32_t s_bounded_input_height_offset; // offset from the upper side (used for centering) | ||
120 | uint32_t s_writeable_width; // width of screen which is writeable | ||
121 | uint32_t s_writeable_height; // height of screen which is writeable | ||
122 | |||
123 | uint8_t * s_center[2]; // where to begin writing our image (centered?) | ||
124 | uint32_t s_center_index; | ||
125 | |||
126 | volatile void * s_pixels __attribute__((aligned(128))); | ||
127 | |||
128 | /* Framebuffer data */ | ||
129 | volatile struct fb_writer_parms_t * fb_parms __attribute__((aligned(128))); | ||
130 | }; | ||
131 | |||
132 | #define fb_dev_name (this->hidden->fb_dev_name) | ||
133 | #define fb_dev_fd (this->hidden->fb_dev_fd) | ||
134 | #define frame_buffer (this->hidden->frame_buffer) | ||
135 | #define fb_thread_data (this->hidden->fb_thread_data) | ||
136 | #define scaler_thread_data (this->hidden->scaler_thread_data) | ||
137 | #define converter_thread_data (this->hidden->converter_thread_data) | ||
138 | #define fb_parms (this->hidden->fb_parms) | ||
139 | #define SDL_nummodes (this->hidden->SDL_nummodes) | ||
140 | #define SDL_modelist (this->hidden->SDL_modelist) | ||
141 | #define SDL_videomode (this->hidden->SDL_videomode) | ||
142 | #define fb_finfo (this->hidden->fb_finfo) | ||
143 | #define fb_vinfo (this->hidden->fb_vinfo) | ||
144 | #define fb_orig_vinfo (this->hidden->fb_orig_vinfo) | ||
145 | #define res (this->hidden->res) | ||
146 | #define double_buffering (this->hidden->double_buffering) | ||
147 | #define real_width (this->hidden->real_width) | ||
148 | #define real_height (this->hidden->real_height) | ||
149 | #define s_fb_pixel_size (this->hidden->s_fb_pixel_size) | ||
150 | #define fb_bits_per_pixel (this->hidden->fb_bits_per_pixel) | ||
151 | #define config_count (this->hidden->config_count) | ||
152 | #define s_input_line_length (this->hidden->s_input_line_length) | ||
153 | #define s_bounded_input_width (this->hidden->s_bounded_input_width) | ||
154 | #define s_bounded_input_height (this->hidden->s_bounded_input_height) | ||
155 | #define s_bounded_input_width_offset (this->hidden->s_bounded_input_width_offset) | ||
156 | #define s_bounded_input_height_offset (this->hidden->s_bounded_input_height_offset) | ||
157 | #define s_writeable_width (this->hidden->s_writeable_width) | ||
158 | #define s_writeable_height (this->hidden->s_writeable_height) | ||
159 | #define s_center (this->hidden->s_center) | ||
160 | #define s_center_index (this->hidden->s_center_index) | ||
161 | #define s_pixels (this->hidden->s_pixels) | ||
162 | |||
163 | #endif /* _SDL_ps3video_h */ | ||
164 | |||
165 | |||
diff --git a/apps/plugins/sdl/src/video/ps3/SDL_ps3yuv.c b/apps/plugins/sdl/src/video/ps3/SDL_ps3yuv.c new file mode 100644 index 0000000000..b1e17dae6d --- /dev/null +++ b/apps/plugins/sdl/src/video/ps3/SDL_ps3yuv.c | |||
@@ -0,0 +1,340 @@ | |||
1 | /* | ||
2 | * SDL - Simple DirectMedia Layer | ||
3 | * CELL BE Support for PS3 Framebuffer | ||
4 | * Copyright (C) 2008, 2009 International Business Machines Corporation | ||
5 | * | ||
6 | * This library is free software; you can redistribute it and/or modify it | ||
7 | * under the terms of the GNU Lesser General Public License as published | ||
8 | * by the Free Software Foundation; either version 2.1 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This library is distributed in the hope that it will be useful, but | ||
12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
14 | * Lesser General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU Lesser General Public | ||
17 | * License along with this library; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 | ||
19 | * USA | ||
20 | * | ||
21 | * Martin Lowinski <lowinski [at] de [dot] ibm [ibm] com> | ||
22 | * Dirk Herrendoerfer <d.herrendoerfer [at] de [dot] ibm [dot] com> | ||
23 | * SPE code based on research by: | ||
24 | * Rene Becker | ||
25 | * Thimo Emmerich | ||
26 | */ | ||
27 | |||
28 | #include "SDL_config.h" | ||
29 | |||
30 | #include "SDL_video.h" | ||
31 | #include "SDL_ps3video.h" | ||
32 | #include "SDL_ps3yuv_c.h" | ||
33 | #include "../SDL_yuvfuncs.h" | ||
34 | #include "spulibs/spu_common.h" | ||
35 | |||
36 | /* Stores the executable name */ | ||
37 | extern spe_program_handle_t yuv2rgb_spu; | ||
38 | extern spe_program_handle_t bilin_scaler_spu; | ||
39 | |||
40 | int SPE_Start(_THIS, spu_data_t * spe_data); | ||
41 | int SPE_Stop(_THIS, spu_data_t * spe_data); | ||
42 | int SPE_Boot(_THIS, spu_data_t * spe_data); | ||
43 | int SPE_Shutdown(_THIS, spu_data_t * spe_data); | ||
44 | int SPE_SendMsg(_THIS, spu_data_t * spe_data, unsigned int msg); | ||
45 | int SPE_WaitForMsg(_THIS, spu_data_t * spe_data, unsigned int msg); | ||
46 | void SPE_RunContext(void *thread_argp); | ||
47 | |||
48 | |||
49 | /* The functions used to manipulate software video overlays */ | ||
50 | static struct private_yuvhwfuncs ps3_yuvfuncs = { | ||
51 | PS3_LockYUVOverlay, | ||
52 | PS3_UnlockYUVOverlay, | ||
53 | PS3_DisplayYUVOverlay, | ||
54 | PS3_FreeYUVOverlay | ||
55 | }; | ||
56 | |||
57 | |||
58 | struct private_yuvhwdata { | ||
59 | SDL_Surface *display; | ||
60 | SDL_Surface *stretch; | ||
61 | volatile void * pixels __attribute__((aligned(128))); | ||
62 | |||
63 | /* These are just so we don't have to allocate them separately */ | ||
64 | Uint16 pitches[3]; | ||
65 | Uint8 * planes[3]; | ||
66 | |||
67 | unsigned int scale; | ||
68 | |||
69 | /* Scaled YUV picture */ | ||
70 | Uint8 * scaler_out __attribute__((aligned(128))); | ||
71 | |||
72 | /* YUV2RGB converter data */ | ||
73 | volatile struct yuv2rgb_parms_t * converter_parms __attribute__((aligned(128))); | ||
74 | |||
75 | /* Scaler data */ | ||
76 | volatile struct scale_parms_t * scaler_parms __attribute__((aligned(128))); | ||
77 | |||
78 | Uint8 locked; | ||
79 | }; | ||
80 | |||
81 | |||
82 | SDL_Overlay *PS3_CreateYUVOverlay(_THIS, int width, int height, Uint32 format, SDL_Surface *display) { | ||
83 | /* Only RGB packed pixel conversion supported */ | ||
84 | if ((display->format->BytesPerPixel != 2) && | ||
85 | (display->format->BytesPerPixel != 3) && | ||
86 | (display->format->BytesPerPixel != 4)) | ||
87 | { | ||
88 | SDL_SetError ("Can't use YUV data on non 16/24/32 bit surfaces"); | ||
89 | return NULL; | ||
90 | } | ||
91 | |||
92 | /* Double-check the requested format. We'll only support YV12 */ | ||
93 | switch (format) { | ||
94 | case SDL_IYUV_OVERLAY: | ||
95 | case SDL_YV12_OVERLAY: | ||
96 | /* Supported YUV format */ | ||
97 | break; | ||
98 | default: | ||
99 | SDL_SetError("Unsupported YUV format"); | ||
100 | return NULL; | ||
101 | } | ||
102 | |||
103 | SDL_Overlay* overlay; | ||
104 | struct private_yuvhwdata* hwdata; | ||
105 | |||
106 | /* Create the overlay structure */ | ||
107 | overlay = (SDL_Overlay *) SDL_calloc(1, sizeof(SDL_Overlay)); | ||
108 | if (overlay == NULL) { | ||
109 | SDL_OutOfMemory(); | ||
110 | return NULL; | ||
111 | } | ||
112 | SDL_memset(overlay, 0, (sizeof *overlay)); | ||
113 | |||
114 | /* Set the basic attributes */ | ||
115 | overlay->format = format; | ||
116 | overlay->w = width; | ||
117 | overlay->h = height; | ||
118 | overlay->hwdata = NULL; | ||
119 | |||
120 | /* Set up the PS3 YUV surface function structure */ | ||
121 | overlay->hwfuncs = &ps3_yuvfuncs; | ||
122 | |||
123 | /* Create the pixel data and lookup tables */ | ||
124 | hwdata = (struct private_yuvhwdata *) SDL_calloc(1, sizeof(struct private_yuvhwdata)); | ||
125 | if (hwdata == NULL) { | ||
126 | SDL_OutOfMemory(); | ||
127 | SDL_FreeYUVOverlay(overlay); | ||
128 | return NULL; | ||
129 | } | ||
130 | overlay->hwdata = hwdata; | ||
131 | |||
132 | hwdata->stretch = NULL; | ||
133 | hwdata->display = display; | ||
134 | |||
135 | /* Create SPU parms structure */ | ||
136 | hwdata->converter_parms = (struct yuv2rgb_parms_t *) memalign(16, sizeof(struct yuv2rgb_parms_t)); | ||
137 | hwdata->scaler_parms = (struct scale_parms_t *) memalign(16, sizeof(struct scale_parms_t)); | ||
138 | if (hwdata->converter_parms == NULL || hwdata->scaler_parms == NULL) { | ||
139 | SDL_FreeYUVOverlay(overlay); | ||
140 | SDL_OutOfMemory(); | ||
141 | return(NULL); | ||
142 | } | ||
143 | |||
144 | /* Set up the SPEs */ | ||
145 | scaler_thread_data = (spu_data_t *) malloc(sizeof(spu_data_t)); | ||
146 | converter_thread_data = (spu_data_t *) malloc(sizeof(spu_data_t)); | ||
147 | if (converter_thread_data == NULL || scaler_thread_data == NULL) { | ||
148 | SDL_FreeYUVOverlay(overlay); | ||
149 | SDL_OutOfMemory(); | ||
150 | return(NULL); | ||
151 | } | ||
152 | |||
153 | scaler_thread_data->program = bilin_scaler_spu; | ||
154 | scaler_thread_data->program_name = "bilin_scaler_spu"; | ||
155 | scaler_thread_data->keepalive = 0; | ||
156 | scaler_thread_data->booted = 0; | ||
157 | |||
158 | converter_thread_data->program = yuv2rgb_spu; | ||
159 | converter_thread_data->program_name = "yuv2rgb_spu"; | ||
160 | converter_thread_data->keepalive = 1; | ||
161 | converter_thread_data->booted = 0; | ||
162 | |||
163 | SPE_Start(this, converter_thread_data); | ||
164 | |||
165 | hwdata->pixels = (Uint8 *) memalign(16, width * height + ((width * height) >> 1)); | ||
166 | if (hwdata->pixels == NULL) { | ||
167 | SDL_FreeYUVOverlay(overlay); | ||
168 | SDL_OutOfMemory(); | ||
169 | return(NULL); | ||
170 | } | ||
171 | |||
172 | /* Find the pitch and offset values for the overlay */ | ||
173 | overlay->pitches = hwdata->pitches; | ||
174 | overlay->pixels = hwdata->planes; | ||
175 | switch (format) { | ||
176 | case SDL_YV12_OVERLAY: | ||
177 | case SDL_IYUV_OVERLAY: | ||
178 | overlay->pitches[0] = overlay->w; | ||
179 | overlay->pitches[1] = overlay->pitches[0] / 2; | ||
180 | overlay->pitches[2] = overlay->pitches[0] / 2; | ||
181 | overlay->pixels[0] = (Uint8 *)hwdata->pixels; | ||
182 | overlay->pixels[1] = overlay->pixels[0] + | ||
183 | overlay->pitches[0] * overlay->h; | ||
184 | overlay->pixels[2] = overlay->pixels[1] + | ||
185 | overlay->pitches[1] * overlay->h / 2; | ||
186 | overlay->planes = 3; | ||
187 | break; | ||
188 | default: | ||
189 | /* We should never get here (caught above) */ | ||
190 | break; | ||
191 | } | ||
192 | |||
193 | /* We're all done.. */ | ||
194 | return overlay; | ||
195 | } | ||
196 | |||
197 | |||
198 | int PS3_LockYUVOverlay(_THIS, SDL_Overlay *overlay) { | ||
199 | if (overlay == NULL) { | ||
200 | return -1; | ||
201 | } | ||
202 | overlay->hwdata->locked = 1; | ||
203 | |||
204 | return 0; | ||
205 | } | ||
206 | |||
207 | |||
208 | void PS3_UnlockYUVOverlay(_THIS, SDL_Overlay *overlay) { | ||
209 | if (overlay == NULL) { | ||
210 | return; | ||
211 | } | ||
212 | overlay->hwdata->locked = 0; | ||
213 | |||
214 | return; | ||
215 | } | ||
216 | |||
217 | |||
218 | int PS3_DisplayYUVOverlay(_THIS, SDL_Overlay *overlay, SDL_Rect *src, SDL_Rect *dst) { | ||
219 | if ((overlay == NULL) || (overlay->hwdata == NULL)) { | ||
220 | return -1; | ||
221 | } | ||
222 | |||
223 | Uint8 *lum, *Cr, *Cb; | ||
224 | struct private_yuvhwdata *hwdata; | ||
225 | SDL_Surface *display; | ||
226 | |||
227 | hwdata = overlay->hwdata; | ||
228 | display = hwdata->display; | ||
229 | |||
230 | /* Do we have to scale? */ | ||
231 | if ((src->w != dst->w) || (src->h != dst->h) ) { | ||
232 | hwdata->scale = 1; | ||
233 | deprintf(1, "[PS3] We need to scale\n"); | ||
234 | } else { | ||
235 | hwdata->scale = 0; | ||
236 | deprintf(1, "[PS3] No scaling\n"); | ||
237 | } | ||
238 | |||
239 | /* Find out where the various portions of the image are */ | ||
240 | switch (overlay->format) { | ||
241 | case SDL_YV12_OVERLAY: | ||
242 | lum = (Uint8 *)overlay->pixels[0]; | ||
243 | Cr = (Uint8 *)overlay->pixels[1]; | ||
244 | Cb = (Uint8 *)overlay->pixels[2]; | ||
245 | break; | ||
246 | case SDL_IYUV_OVERLAY: | ||
247 | lum = (Uint8 *)overlay->pixels[0]; | ||
248 | Cr = (Uint8 *)overlay->pixels[2]; | ||
249 | Cb = (Uint8 *)overlay->pixels[1]; | ||
250 | break; | ||
251 | default: | ||
252 | SDL_SetError("Unsupported YUV format in blit"); | ||
253 | return -1; | ||
254 | } | ||
255 | |||
256 | if (hwdata->scale) { | ||
257 | /* Alloc mem for scaled YUV picture */ | ||
258 | hwdata->scaler_out = (Uint8 *) memalign(16, dst->w * dst->h + ((dst->w * dst->h) >> 1)); | ||
259 | if (hwdata->scaler_out == NULL) { | ||
260 | SDL_FreeYUVOverlay(overlay); | ||
261 | SDL_OutOfMemory(); | ||
262 | return -1; | ||
263 | } | ||
264 | |||
265 | /* Set parms for scaling */ | ||
266 | hwdata->scaler_parms->src_pixel_width = src->w; | ||
267 | hwdata->scaler_parms->src_pixel_height = src->h; | ||
268 | hwdata->scaler_parms->dst_pixel_width = dst->w; | ||
269 | hwdata->scaler_parms->dst_pixel_height = dst->h; | ||
270 | hwdata->scaler_parms->y_plane = lum; | ||
271 | hwdata->scaler_parms->v_plane = Cr; | ||
272 | hwdata->scaler_parms->u_plane = Cb; | ||
273 | hwdata->scaler_parms->dstBuffer = hwdata->scaler_out; | ||
274 | scaler_thread_data->argp = (void *)hwdata->scaler_parms; | ||
275 | |||
276 | /* Scale the YUV overlay to given size */ | ||
277 | SPE_Start(this, scaler_thread_data); | ||
278 | SPE_Stop(this, scaler_thread_data); | ||
279 | |||
280 | /* Set parms for converting after scaling */ | ||
281 | hwdata->converter_parms->y_plane = hwdata->scaler_out; | ||
282 | hwdata->converter_parms->v_plane = hwdata->scaler_out + dst->w * dst->h; | ||
283 | hwdata->converter_parms->u_plane = hwdata->scaler_out + dst->w * dst->h + ((dst->w * dst->h) >> 2); | ||
284 | } else { | ||
285 | /* Set parms for converting */ | ||
286 | hwdata->converter_parms->y_plane = lum; | ||
287 | hwdata->converter_parms->v_plane = Cr; | ||
288 | hwdata->converter_parms->u_plane = Cb; | ||
289 | } | ||
290 | |||
291 | hwdata->converter_parms->src_pixel_width = dst->w; | ||
292 | hwdata->converter_parms->src_pixel_height = dst->h; | ||
293 | hwdata->converter_parms->dstBuffer = (Uint8 *) s_pixels; | ||
294 | converter_thread_data->argp = (void *)hwdata->converter_parms; | ||
295 | |||
296 | /* Convert YUV overlay to RGB */ | ||
297 | SPE_SendMsg(this, converter_thread_data, SPU_START); | ||
298 | SPE_SendMsg(this, converter_thread_data, (unsigned int)converter_thread_data->argp); | ||
299 | |||
300 | /* Centering */ | ||
301 | s_bounded_input_width = dst->w; | ||
302 | s_bounded_input_height = dst->h; | ||
303 | |||
304 | /* UpdateRects() will do the rest.. */ | ||
305 | SDL_UpdateRects(display, 1, dst); | ||
306 | |||
307 | if (hwdata->scale) | ||
308 | SDL_free((void *)hwdata->scaler_out); | ||
309 | |||
310 | return 0; | ||
311 | } | ||
312 | |||
313 | |||
314 | void PS3_FreeYUVOverlay(_THIS, SDL_Overlay *overlay) { | ||
315 | if (overlay == NULL) { | ||
316 | return; | ||
317 | } | ||
318 | |||
319 | if (overlay->hwdata == NULL) { | ||
320 | return; | ||
321 | } | ||
322 | |||
323 | struct private_yuvhwdata * hwdata; | ||
324 | hwdata = overlay->hwdata; | ||
325 | |||
326 | if (scaler_thread_data) | ||
327 | SDL_free(scaler_thread_data); | ||
328 | if (converter_thread_data) { | ||
329 | SPE_Shutdown(this, converter_thread_data); | ||
330 | SDL_free(converter_thread_data); | ||
331 | } | ||
332 | |||
333 | if (hwdata) { | ||
334 | if (hwdata->pixels) | ||
335 | SDL_free((void *)hwdata->pixels); | ||
336 | SDL_free(hwdata); | ||
337 | } | ||
338 | return; | ||
339 | } | ||
340 | |||
diff --git a/apps/plugins/sdl/src/video/ps3/SDL_ps3yuv_c.h b/apps/plugins/sdl/src/video/ps3/SDL_ps3yuv_c.h new file mode 100644 index 0000000000..49f9d70953 --- /dev/null +++ b/apps/plugins/sdl/src/video/ps3/SDL_ps3yuv_c.h | |||
@@ -0,0 +1,44 @@ | |||
1 | /* | ||
2 | * SDL - Simple DirectMedia Layer | ||
3 | * CELL BE Support for PS3 Framebuffer | ||
4 | * Copyright (C) 2008, 2009 International Business Machines Corporation | ||
5 | * | ||
6 | * This library is free software; you can redistribute it and/or modify it | ||
7 | * under the terms of the GNU Lesser General Public License as published | ||
8 | * by the Free Software Foundation; either version 2.1 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This library is distributed in the hope that it will be useful, but | ||
12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
14 | * Lesser General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU Lesser General Public | ||
17 | * License along with this library; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 | ||
19 | * USA | ||
20 | * | ||
21 | * Martin Lowinski <lowinski [at] de [dot] ibm [ibm] com> | ||
22 | * Dirk Herrendoerfer <d.herrendoerfer [at] de [dot] ibm [dot] com> | ||
23 | * SPE code based on research by: | ||
24 | * Rene Becker | ||
25 | * Thimo Emmerich | ||
26 | */ | ||
27 | |||
28 | #include "SDL_config.h" | ||
29 | |||
30 | #ifndef _SDL_ps3yuv_h | ||
31 | #define _SDL_ps3yuv_h | ||
32 | |||
33 | /* This is the PS3 implementation of YUV video overlays */ | ||
34 | |||
35 | #include "SDL_video.h" | ||
36 | |||
37 | extern SDL_Overlay *PS3_CreateYUVOverlay(_THIS, int width, int height, Uint32 format, SDL_Surface *display); | ||
38 | extern int PS3_DisplayYUVOverlay(_THIS, SDL_Overlay *overlay, SDL_Rect *src, SDL_Rect *dst); | ||
39 | extern int PS3_LockYUVOverlay(_THIS, SDL_Overlay *overlay); | ||
40 | extern void PS3_UnlockYUVOverlay(_THIS, SDL_Overlay *overlay); | ||
41 | extern void PS3_FreeYUVOverlay(_THIS, SDL_Overlay *overlay); | ||
42 | |||
43 | #endif /* _SDL_ps3yuv_h */ | ||
44 | |||
diff --git a/apps/plugins/sdl/src/video/ps3/spulibs/Makefile b/apps/plugins/sdl/src/video/ps3/spulibs/Makefile new file mode 100644 index 0000000000..dc580d9436 --- /dev/null +++ b/apps/plugins/sdl/src/video/ps3/spulibs/Makefile | |||
@@ -0,0 +1,83 @@ | |||
1 | # This Makefile is for building the CELL BE SPU libs | ||
2 | # libfb_writer_spu.so, libyuv2rgb_spu.so, libbilin_scaler_spu.so | ||
3 | |||
4 | # Toolchain | ||
5 | SPU_GCC=/usr/bin/spu-gcc | ||
6 | PPU_GCC=/usr/bin/gcc | ||
7 | PPU_EMBEDSPU=/usr/bin/embedspu | ||
8 | PPU_AR=/usr/bin/ar | ||
9 | PPU_LD=/usr/bin/ld | ||
10 | INSTALL=/usr/bin/install | ||
11 | |||
12 | SPU_CFLAGS=-W -Wall -Winline -Wno-main -I. -I /usr/spu/include -I /opt/cell/sdk/usr/spu/include -finline-limit=10000 -Winline -ftree-vectorize -funroll-loops -fmodulo-sched -ffast-math -fPIC -O2 | ||
13 | |||
14 | # Usually /usr/lib, depending on your distribution | ||
15 | PREFIX=/usr/lib | ||
16 | |||
17 | |||
18 | all: libfb_writer_spu.a libfb_writer_spu.so \ | ||
19 | libyuv2rgb_spu.so libyuv2rgb_spu.a \ | ||
20 | libbilin_scaler_spu.so libbilin_scaler_spu.a | ||
21 | |||
22 | |||
23 | # fb_writer | ||
24 | fb_writer_spu-embed.o: fb_writer.c spu_common.h | ||
25 | $(SPU_GCC) $(SPU_CFLAGS) -o fb_writer_spu fb_writer.c -lm | ||
26 | $(PPU_EMBEDSPU) -m32 fb_writer_spu fb_writer_spu fb_writer_spu-embed.o | ||
27 | |||
28 | libfb_writer_spu.so: fb_writer_spu-embed.o | ||
29 | $(PPU_LD) -o libfb_writer_spu.so -shared -soname=libfb_writer_spu.so fb_writer_spu-embed.o | ||
30 | |||
31 | libfb_writer_spu.a: fb_writer_spu-embed.o | ||
32 | $(PPU_AR) -qcs libfb_writer_spu.a fb_writer_spu-embed.o | ||
33 | |||
34 | |||
35 | # yuv2rgb_converter | ||
36 | yuv2rgb_spu-embed.o: yuv2rgb_converter.c spu_common.h | ||
37 | $(SPU_GCC) $(SPU_CFLAGS) -o yuv2rgb_spu yuv2rgb_converter.c -lm | ||
38 | $(PPU_EMBEDSPU) -m32 yuv2rgb_spu yuv2rgb_spu yuv2rgb_spu-embed.o | ||
39 | |||
40 | libyuv2rgb_spu.a: yuv2rgb_spu-embed.o | ||
41 | $(PPU_AR) -qcs libyuv2rgb_spu.a yuv2rgb_spu-embed.o | ||
42 | |||
43 | libyuv2rgb_spu.so: yuv2rgb_spu-embed.o | ||
44 | $(PPU_LD) -o libyuv2rgb_spu.so -shared -soname=libyuv2rgb_spu.so yuv2rgb_spu-embed.o | ||
45 | |||
46 | |||
47 | # bilin_scaler | ||
48 | bilin_scaler_spu-embed.o: bilin_scaler.c spu_common.h | ||
49 | $(SPU_GCC) $(SPU_CFLAGS) -o bilin_scaler_spu bilin_scaler.c -lm | ||
50 | $(PPU_EMBEDSPU) -m32 bilin_scaler_spu bilin_scaler_spu bilin_scaler_spu-embed.o | ||
51 | |||
52 | libbilin_scaler_spu.a: bilin_scaler_spu-embed.o | ||
53 | $(PPU_AR) -qcs libbilin_scaler_spu.a bilin_scaler_spu-embed.o | ||
54 | |||
55 | libbilin_scaler_spu.so: bilin_scaler_spu-embed.o | ||
56 | $(PPU_LD) -o libbilin_scaler_spu.so -shared -soname=libbilin_scaler_spu.so bilin_scaler_spu-embed.o | ||
57 | |||
58 | install: libfb_writer_spu.a libfb_writer_spu.so \ | ||
59 | libyuv2rgb_spu.so libyuv2rgb_spu.a \ | ||
60 | libbilin_scaler_spu.so libbilin_scaler_spu.a | ||
61 | $(INSTALL) -c -m 0755 libfb_writer_spu.so $(PREFIX)/. | ||
62 | $(INSTALL) -c -m 0655 libfb_writer_spu.a $(PREFIX)/. | ||
63 | $(INSTALL) -c -m 0755 libyuv2rgb_spu.so $(PREFIX)/. | ||
64 | $(INSTALL) -c -m 0655 libyuv2rgb_spu.a $(PREFIX)/. | ||
65 | $(INSTALL) -c -m 0755 libbilin_scaler_spu.so $(PREFIX)/. | ||
66 | $(INSTALL) -c -m 0655 libbilin_scaler_spu.a $(PREFIX)/. | ||
67 | |||
68 | |||
69 | uninstall: $(PREFIX)/libfb_writer_spu.so $(PREFIX)/libfb_writer_spu.a \ | ||
70 | $(PREFIX)/libyuv2rgb_spu.so $(PREFIX)/libyuv2rgb_spu.a \ | ||
71 | $(PREFIX)/libbilin_scaler_spu.so $(PREFIX)/libbilin_scaler_spu.a | ||
72 | rm -f $(PREFIX)/libfb_writer_spu.a | ||
73 | rm -f $(PREFIX)/libfb_writer_spu.so | ||
74 | rm -f $(PREFIX)/libyuv2rgb_spu.so | ||
75 | rm -f $(PREFIX)/libyuv2rgb_spu.a | ||
76 | rm -f $(PREFIX)/libbilin_scaler_spu.so | ||
77 | rm -f $(PREFIX)/libbilin_scaler_spu.a | ||
78 | |||
79 | |||
80 | clean: | ||
81 | rm -f bilin_scaler_spu-embed.o libbilin_scaler_spu.so libbilin_scaler_spu.a bilin_scaler_spu | ||
82 | rm -f yuv2rgb_spu-embed.o libyuv2rgb_spu.so libyuv2rgb_spu.a yuv2rgb_spu | ||
83 | rm -f fb_writer_spu-embed.o libfb_writer_spu.so libfb_writer_spu.a fb_writer_spu | ||
diff --git a/apps/plugins/sdl/src/video/ps3/spulibs/bilin_scaler.c b/apps/plugins/sdl/src/video/ps3/spulibs/bilin_scaler.c new file mode 100644 index 0000000000..be9b5c6e8d --- /dev/null +++ b/apps/plugins/sdl/src/video/ps3/spulibs/bilin_scaler.c | |||
@@ -0,0 +1,2050 @@ | |||
1 | /* | ||
2 | * SDL - Simple DirectMedia Layer | ||
3 | * CELL BE Support for PS3 Framebuffer | ||
4 | * Copyright (C) 2008, 2009 International Business Machines Corporation | ||
5 | * | ||
6 | * This library is free software; you can redistribute it and/or modify it | ||
7 | * under the terms of the GNU Lesser General Public License as published | ||
8 | * by the Free Software Foundation; either version 2.1 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This library is distributed in the hope that it will be useful, but | ||
12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
14 | * Lesser General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU Lesser General Public | ||
17 | * License along with this library; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 | ||
19 | * USA | ||
20 | * | ||
21 | * Martin Lowinski <lowinski [at] de [dot] ibm [ibm] com> | ||
22 | * Dirk Herrendoerfer <d.herrendoerfer [at] de [dot] ibm [dot] com> | ||
23 | * SPE code based on research by: | ||
24 | * Rene Becker | ||
25 | * Thimo Emmerich | ||
26 | */ | ||
27 | |||
28 | #include "spu_common.h" | ||
29 | |||
30 | #include <spu_intrinsics.h> | ||
31 | #include <spu_mfcio.h> | ||
32 | |||
33 | // Debugging | ||
34 | //#define DEBUG | ||
35 | |||
36 | #ifdef DEBUG | ||
37 | #define deprintf(fmt, args... ) \ | ||
38 | fprintf( stdout, fmt, ##args ); \ | ||
39 | fflush( stdout ); | ||
40 | #else | ||
41 | #define deprintf( fmt, args... ) | ||
42 | #endif | ||
43 | |||
44 | struct scale_parms_t parms __attribute__((aligned(128))); | ||
45 | |||
46 | /* A maximum of 8 lines Y, therefore 4 lines V, 4 lines U are stored | ||
47 | * there might be the need to retrieve misaligned data, adjust | ||
48 | * incoming v and u plane to be able to handle this (add 128) | ||
49 | */ | ||
50 | unsigned char y_plane[2][(MAX_HDTV_WIDTH+128)*4] __attribute__((aligned(128))); | ||
51 | unsigned char v_plane[2][(MAX_HDTV_WIDTH+128)*2] __attribute__((aligned(128))); | ||
52 | unsigned char u_plane[2][(MAX_HDTV_WIDTH+128)*2] __attribute__((aligned(128))); | ||
53 | |||
54 | /* temp-buffer for scaling: 4 lines Y, therefore 2 lines V, 2 lines U */ | ||
55 | unsigned char scaled_y_plane[2][MAX_HDTV_WIDTH*2] __attribute__((aligned(128))); | ||
56 | unsigned char scaled_v_plane[2][MAX_HDTV_WIDTH/2] __attribute__((aligned(128))); | ||
57 | unsigned char scaled_u_plane[2][MAX_HDTV_WIDTH/2] __attribute__((aligned(128))); | ||
58 | |||
59 | /* some vectors needed by the float to int conversion */ | ||
60 | static const vector float vec_255 = { 255.0f, 255.0f, 255.0f, 255.0f }; | ||
61 | static const vector float vec_0_1 = { 0.1f, 0.1f, 0.1f, 0.1f }; | ||
62 | |||
63 | void bilinear_scale_line_w8(unsigned char* src, unsigned char* dst_, unsigned int dst_width, vector float vf_x_scale, vector float vf_NSweight, unsigned int src_linestride); | ||
64 | void bilinear_scale_line_w16(unsigned char* src, unsigned char* dst_, unsigned int dst_width, vector float vf_x_scale, vector float vf_NSweight, unsigned int src_linestride); | ||
65 | |||
66 | void scale_srcw16_dstw16(); | ||
67 | void scale_srcw16_dstw32(); | ||
68 | void scale_srcw32_dstw16(); | ||
69 | void scale_srcw32_dstw32(); | ||
70 | |||
71 | int main( unsigned long long spe_id __attribute__((unused)), unsigned long long argp ) | ||
72 | { | ||
73 | deprintf("[SPU] bilin_scaler_spu is up... (on SPE #%llu)\n", spe_id); | ||
74 | /* DMA transfer for the input parameters */ | ||
75 | spu_mfcdma32(&parms, (unsigned int)argp, sizeof(struct scale_parms_t), TAG_INIT, MFC_GET_CMD); | ||
76 | DMA_WAIT_TAG(TAG_INIT); | ||
77 | |||
78 | deprintf("[SPU] Scale %ux%u to %ux%u\n", parms.src_pixel_width, parms.src_pixel_height, | ||
79 | parms.dst_pixel_width, parms.dst_pixel_height); | ||
80 | |||
81 | if(parms.src_pixel_width & 0x1f) { | ||
82 | if(parms.dst_pixel_width & 0x1F) { | ||
83 | deprintf("[SPU] Using scale_srcw16_dstw16\n"); | ||
84 | scale_srcw16_dstw16(); | ||
85 | } else { | ||
86 | deprintf("[SPU] Using scale_srcw16_dstw32\n"); | ||
87 | scale_srcw16_dstw32(); | ||
88 | } | ||
89 | } else { | ||
90 | if(parms.dst_pixel_width & 0x1F) { | ||
91 | deprintf("[SPU] Using scale_srcw32_dstw16\n"); | ||
92 | scale_srcw32_dstw16(); | ||
93 | } else { | ||
94 | deprintf("[SPU] Using scale_srcw32_dstw32\n"); | ||
95 | scale_srcw32_dstw32(); | ||
96 | } | ||
97 | } | ||
98 | deprintf("[SPU] bilin_scaler_spu... done!\n"); | ||
99 | |||
100 | return 0; | ||
101 | } | ||
102 | |||
103 | |||
104 | /* | ||
105 | * vfloat_to_vuint() | ||
106 | * | ||
107 | * converts a float vector to an unsinged int vector using saturated | ||
108 | * arithmetic | ||
109 | * | ||
110 | * @param vec_s float vector for conversion | ||
111 | * @returns converted unsigned int vector | ||
112 | */ | ||
113 | inline static vector unsigned int vfloat_to_vuint(vector float vec_s) { | ||
114 | vector unsigned int select_1 = spu_cmpgt(vec_0_1, vec_s); | ||
115 | vec_s = spu_sel(vec_s, vec_0_1, select_1); | ||
116 | |||
117 | vector unsigned int select_2 = spu_cmpgt(vec_s, vec_255); | ||
118 | vec_s = spu_sel(vec_s, vec_255, select_2); | ||
119 | return spu_convtu(vec_s,0); | ||
120 | } | ||
121 | |||
122 | |||
123 | /* | ||
124 | * scale_srcw16_dstw16() | ||
125 | * | ||
126 | * processes an input image of width 16 | ||
127 | * scaling is done to a width 16 | ||
128 | * result stored in RAM | ||
129 | */ | ||
130 | void scale_srcw16_dstw16() { | ||
131 | // extract parameters | ||
132 | unsigned char* dst_addr = (unsigned char *)parms.dstBuffer; | ||
133 | |||
134 | unsigned int src_width = parms.src_pixel_width; | ||
135 | unsigned int src_height = parms.src_pixel_height; | ||
136 | unsigned int dst_width = parms.dst_pixel_width; | ||
137 | unsigned int dst_height = parms.dst_pixel_height; | ||
138 | |||
139 | // YVU | ||
140 | unsigned int src_linestride_y = src_width; | ||
141 | unsigned int src_dbl_linestride_y = src_width<<1; | ||
142 | unsigned int src_linestride_vu = src_width>>1; | ||
143 | unsigned int src_dbl_linestride_vu = src_width; | ||
144 | |||
145 | // scaled YVU | ||
146 | unsigned int scaled_src_linestride_y = dst_width; | ||
147 | |||
148 | // ram addresses | ||
149 | unsigned char* src_addr_y = parms.y_plane; | ||
150 | unsigned char* src_addr_v = parms.v_plane; | ||
151 | unsigned char* src_addr_u = parms.u_plane; | ||
152 | |||
153 | // for handling misalignment, addresses are precalculated | ||
154 | unsigned char* precalc_src_addr_v = src_addr_v; | ||
155 | unsigned char* precalc_src_addr_u = src_addr_u; | ||
156 | |||
157 | unsigned int dst_picture_size = dst_width*dst_height; | ||
158 | |||
159 | // Sizes for destination | ||
160 | unsigned int dst_dbl_linestride_y = dst_width<<1; | ||
161 | unsigned int dst_dbl_linestride_vu = dst_width>>1; | ||
162 | |||
163 | // Perform address calculation for Y, V and U in main memory with dst_addr as base | ||
164 | unsigned char* dst_addr_main_memory_y = dst_addr; | ||
165 | unsigned char* dst_addr_main_memory_v = dst_addr + dst_picture_size; | ||
166 | unsigned char* dst_addr_main_memory_u = dst_addr_main_memory_v +(dst_picture_size>>2); | ||
167 | |||
168 | // calculate scale factors | ||
169 | vector float vf_x_scale = spu_splats( (float)src_width/(float)dst_width ); | ||
170 | float y_scale = (float)src_height/(float)dst_height; | ||
171 | |||
172 | // double buffered processing | ||
173 | // buffer switching | ||
174 | unsigned int curr_src_idx = 0; | ||
175 | unsigned int curr_dst_idx = 0; | ||
176 | unsigned int next_src_idx, next_dst_idx; | ||
177 | |||
178 | // 2 lines y as output, upper and lowerline | ||
179 | unsigned int curr_interpl_y_upper = 0; | ||
180 | unsigned int next_interpl_y_upper; | ||
181 | unsigned int curr_interpl_y_lower, next_interpl_y_lower; | ||
182 | // only 1 line v/u output, both planes have the same dimension | ||
183 | unsigned int curr_interpl_vu = 0; | ||
184 | unsigned int next_interpl_vu; | ||
185 | |||
186 | // weights, calculated in every loop iteration | ||
187 | vector float vf_curr_NSweight_y_upper = { 0.0f, 0.0f, 0.0f, 0.0f }; | ||
188 | vector float vf_next_NSweight_y_upper; | ||
189 | vector float vf_curr_NSweight_y_lower, vf_next_NSweight_y_lower; | ||
190 | vector float vf_curr_NSweight_vu = { 0.0f, 0.0f, 0.0f, 0.0f }; | ||
191 | vector float vf_next_NSweight_vu; | ||
192 | |||
193 | // line indices for the src picture | ||
194 | float curr_src_y_upper = 0.0f, next_src_y_upper; | ||
195 | float curr_src_y_lower, next_src_y_lower; | ||
196 | float curr_src_vu = 0.0f, next_src_vu; | ||
197 | |||
198 | // line indices for the dst picture | ||
199 | unsigned int dst_y=0, dst_vu=0; | ||
200 | |||
201 | // offset for the v and u plane to handle misalignement | ||
202 | unsigned int curr_lsoff_v = 0, next_lsoff_v; | ||
203 | unsigned int curr_lsoff_u = 0, next_lsoff_u; | ||
204 | |||
205 | // calculate lower line indices | ||
206 | curr_src_y_lower = ((float)curr_interpl_y_upper+1)*y_scale; | ||
207 | curr_interpl_y_lower = (unsigned int)curr_src_y_lower; | ||
208 | // lower line weight | ||
209 | vf_curr_NSweight_y_lower = spu_splats( curr_src_y_lower-(float)curr_interpl_y_lower ); | ||
210 | |||
211 | |||
212 | // start partially double buffered processing | ||
213 | // get initial data, 2 sets of y, 1 set v, 1 set u | ||
214 | mfc_get( y_plane[curr_src_idx], (unsigned int) src_addr_y, src_dbl_linestride_y, RETR_BUF, 0, 0 ); | ||
215 | mfc_get( y_plane[curr_src_idx]+src_dbl_linestride_y, | ||
216 | (unsigned int) src_addr_y+(curr_interpl_y_lower*src_linestride_y), | ||
217 | src_dbl_linestride_y, | ||
218 | RETR_BUF, | ||
219 | 0, 0 ); | ||
220 | mfc_get( v_plane[curr_src_idx], (unsigned int) src_addr_v, src_dbl_linestride_vu, RETR_BUF, 0, 0 ); | ||
221 | mfc_get( u_plane[curr_src_idx], (unsigned int) src_addr_u, src_dbl_linestride_vu, RETR_BUF, 0, 0 ); | ||
222 | |||
223 | /* iteration loop | ||
224 | * within each iteration 4 lines y, 2 lines v, 2 lines u are retrieved | ||
225 | * the scaled output is 2 lines y, 1 line v, 1 line u | ||
226 | * the yuv2rgb-converted output is stored to RAM | ||
227 | */ | ||
228 | for( dst_vu=0; dst_vu<(dst_height>>1)-1; dst_vu++ ) { | ||
229 | dst_y = dst_vu<<1; | ||
230 | |||
231 | // calculate next indices | ||
232 | next_src_vu = ((float)dst_vu+1)*y_scale; | ||
233 | next_src_y_upper = ((float)dst_y+2)*y_scale; | ||
234 | next_src_y_lower = ((float)dst_y+3)*y_scale; | ||
235 | |||
236 | next_interpl_vu = (unsigned int) next_src_vu; | ||
237 | next_interpl_y_upper = (unsigned int) next_src_y_upper; | ||
238 | next_interpl_y_lower = (unsigned int) next_src_y_lower; | ||
239 | |||
240 | // calculate weight NORTH-SOUTH | ||
241 | vf_next_NSweight_vu = spu_splats( next_src_vu-(float)next_interpl_vu ); | ||
242 | vf_next_NSweight_y_upper = spu_splats( next_src_y_upper-(float)next_interpl_y_upper ); | ||
243 | vf_next_NSweight_y_lower = spu_splats( next_src_y_lower-(float)next_interpl_y_lower ); | ||
244 | |||
245 | // get next lines | ||
246 | next_src_idx = curr_src_idx^1; | ||
247 | next_dst_idx = curr_dst_idx^1; | ||
248 | |||
249 | // 4 lines y | ||
250 | mfc_get( y_plane[next_src_idx], | ||
251 | (unsigned int) src_addr_y+(next_interpl_y_upper*src_linestride_y), | ||
252 | src_dbl_linestride_y, | ||
253 | RETR_BUF+next_src_idx, | ||
254 | 0, 0 ); | ||
255 | mfc_get( y_plane[next_src_idx]+src_dbl_linestride_y, | ||
256 | (unsigned int) src_addr_y+(next_interpl_y_lower*src_linestride_y), | ||
257 | src_dbl_linestride_y, | ||
258 | RETR_BUF+next_src_idx, | ||
259 | 0, 0 ); | ||
260 | |||
261 | // 2 lines v | ||
262 | precalc_src_addr_v = src_addr_v+(next_interpl_vu*src_linestride_vu); | ||
263 | next_lsoff_v = ((unsigned int)precalc_src_addr_v)&0x0F; | ||
264 | mfc_get( v_plane[next_src_idx], | ||
265 | ((unsigned int) precalc_src_addr_v)&0xFFFFFFF0, | ||
266 | src_dbl_linestride_vu+(next_lsoff_v<<1), | ||
267 | RETR_BUF+next_src_idx, | ||
268 | 0, 0 ); | ||
269 | // 2 lines u | ||
270 | precalc_src_addr_u = src_addr_u+(next_interpl_vu*src_linestride_vu); | ||
271 | next_lsoff_u = ((unsigned int)precalc_src_addr_u)&0x0F; | ||
272 | mfc_get( u_plane[next_src_idx], | ||
273 | ((unsigned int) precalc_src_addr_u)&0xFFFFFFF0, | ||
274 | src_dbl_linestride_vu+(next_lsoff_v<<1), | ||
275 | RETR_BUF+next_src_idx, | ||
276 | 0, 0 ); | ||
277 | |||
278 | DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) ); | ||
279 | |||
280 | // scaling | ||
281 | // work line y_upper | ||
282 | bilinear_scale_line_w16( y_plane[curr_src_idx], | ||
283 | scaled_y_plane[curr_src_idx], | ||
284 | dst_width, | ||
285 | vf_x_scale, | ||
286 | vf_curr_NSweight_y_upper, | ||
287 | src_linestride_y ); | ||
288 | // work line y_lower | ||
289 | bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y, | ||
290 | scaled_y_plane[curr_src_idx]+scaled_src_linestride_y, | ||
291 | dst_width, | ||
292 | vf_x_scale, | ||
293 | vf_curr_NSweight_y_lower, | ||
294 | src_linestride_y ); | ||
295 | // work line v | ||
296 | bilinear_scale_line_w8( v_plane[curr_src_idx]+curr_lsoff_v, | ||
297 | scaled_v_plane[curr_src_idx], | ||
298 | dst_width>>1, | ||
299 | vf_x_scale, | ||
300 | vf_curr_NSweight_vu, | ||
301 | src_linestride_vu ); | ||
302 | // work line u | ||
303 | bilinear_scale_line_w8( u_plane[curr_src_idx]+curr_lsoff_u, | ||
304 | scaled_u_plane[curr_src_idx], | ||
305 | dst_width>>1, | ||
306 | vf_x_scale, | ||
307 | vf_curr_NSweight_vu, | ||
308 | src_linestride_vu ); | ||
309 | |||
310 | |||
311 | // Store the result back to main memory into a destination buffer in YUV format | ||
312 | //--------------------------------------------------------------------------------------------- | ||
313 | DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); | ||
314 | |||
315 | // Perform three DMA transfers to 3 different locations in the main memory! | ||
316 | // dst_width: Pixel width of destination image | ||
317 | // dst_addr: Destination address in main memory | ||
318 | // dst_vu: Counter which is incremented one by one | ||
319 | // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu) | ||
320 | mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr) | ||
321 | (unsigned int)dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr) | ||
322 | dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution) | ||
323 | STR_BUF+curr_dst_idx, // Tag | ||
324 | 0, 0 ); | ||
325 | |||
326 | mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr) | ||
327 | (unsigned int)dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) | ||
328 | dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution) | ||
329 | STR_BUF+curr_dst_idx, // Tag | ||
330 | 0, 0 ); | ||
331 | |||
332 | mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr) | ||
333 | (unsigned int)dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) | ||
334 | dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution) | ||
335 | STR_BUF+curr_dst_idx, // Tag | ||
336 | 0, 0 ); | ||
337 | //--------------------------------------------------------------------------------------------- | ||
338 | |||
339 | |||
340 | // update for next cycle | ||
341 | curr_src_idx = next_src_idx; | ||
342 | curr_dst_idx = next_dst_idx; | ||
343 | |||
344 | curr_interpl_y_upper = next_interpl_y_upper; | ||
345 | curr_interpl_y_lower = next_interpl_y_lower; | ||
346 | curr_interpl_vu = next_interpl_vu; | ||
347 | |||
348 | vf_curr_NSweight_y_upper = vf_curr_NSweight_y_upper; | ||
349 | vf_curr_NSweight_y_lower = vf_curr_NSweight_y_lower; | ||
350 | vf_curr_NSweight_vu = vf_next_NSweight_vu; | ||
351 | |||
352 | curr_src_y_upper = next_src_y_upper; | ||
353 | curr_src_y_lower = next_src_y_lower; | ||
354 | curr_src_vu = next_src_vu; | ||
355 | |||
356 | curr_lsoff_v = next_lsoff_v; | ||
357 | curr_lsoff_u = next_lsoff_u; | ||
358 | } | ||
359 | |||
360 | |||
361 | |||
362 | DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) ); | ||
363 | |||
364 | // scaling | ||
365 | // work line y_upper | ||
366 | bilinear_scale_line_w16( y_plane[curr_src_idx], | ||
367 | scaled_y_plane[curr_src_idx], | ||
368 | dst_width, | ||
369 | vf_x_scale, | ||
370 | vf_curr_NSweight_y_upper, | ||
371 | src_linestride_y ); | ||
372 | // work line y_lower | ||
373 | bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y, | ||
374 | scaled_y_plane[curr_src_idx]+scaled_src_linestride_y, | ||
375 | dst_width, | ||
376 | vf_x_scale, | ||
377 | vf_curr_NSweight_y_lower, | ||
378 | src_linestride_y ); | ||
379 | // work line v | ||
380 | bilinear_scale_line_w8( v_plane[curr_src_idx]+curr_lsoff_v, | ||
381 | scaled_v_plane[curr_src_idx], | ||
382 | dst_width>>1, | ||
383 | vf_x_scale, | ||
384 | vf_curr_NSweight_vu, | ||
385 | src_linestride_vu ); | ||
386 | // work line u | ||
387 | bilinear_scale_line_w8( u_plane[curr_src_idx]+curr_lsoff_u, | ||
388 | scaled_u_plane[curr_src_idx], | ||
389 | dst_width>>1, | ||
390 | vf_x_scale, | ||
391 | vf_curr_NSweight_vu, | ||
392 | src_linestride_vu ); | ||
393 | |||
394 | |||
395 | // Store the result back to main memory into a destination buffer in YUV format | ||
396 | //--------------------------------------------------------------------------------------------- | ||
397 | DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); | ||
398 | |||
399 | // Perform three DMA transfers to 3 different locations in the main memory! | ||
400 | // dst_width: Pixel width of destination image | ||
401 | // dst_addr: Destination address in main memory | ||
402 | // dst_vu: Counter which is incremented one by one | ||
403 | // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu) | ||
404 | mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr) | ||
405 | (unsigned int)dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr) | ||
406 | dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution) | ||
407 | STR_BUF+curr_dst_idx, // Tag | ||
408 | 0, 0 ); | ||
409 | |||
410 | mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr) | ||
411 | (unsigned int)dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) | ||
412 | dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution) | ||
413 | STR_BUF+curr_dst_idx, // Tag | ||
414 | 0, 0 ); | ||
415 | |||
416 | mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr) | ||
417 | (unsigned int)dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) | ||
418 | dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution) | ||
419 | STR_BUF+curr_dst_idx, // Tag | ||
420 | 0, 0 ); | ||
421 | |||
422 | // wait for completion | ||
423 | DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); | ||
424 | //--------------------------------------------------------------------------------------------- | ||
425 | } | ||
426 | |||
427 | |||
428 | /* | ||
429 | * scale_srcw16_dstw32() | ||
430 | * | ||
431 | * processes an input image of width 16 | ||
432 | * scaling is done to a width 32 | ||
433 | * yuv2rgb conversion on a width of 32 | ||
434 | * result stored in RAM | ||
435 | */ | ||
436 | void scale_srcw16_dstw32() { | ||
437 | // extract parameters | ||
438 | unsigned char* dst_addr = (unsigned char *)parms.dstBuffer; | ||
439 | |||
440 | unsigned int src_width = parms.src_pixel_width; | ||
441 | unsigned int src_height = parms.src_pixel_height; | ||
442 | unsigned int dst_width = parms.dst_pixel_width; | ||
443 | unsigned int dst_height = parms.dst_pixel_height; | ||
444 | |||
445 | // YVU | ||
446 | unsigned int src_linestride_y = src_width; | ||
447 | unsigned int src_dbl_linestride_y = src_width<<1; | ||
448 | unsigned int src_linestride_vu = src_width>>1; | ||
449 | unsigned int src_dbl_linestride_vu = src_width; | ||
450 | // scaled YVU | ||
451 | unsigned int scaled_src_linestride_y = dst_width; | ||
452 | |||
453 | // ram addresses | ||
454 | unsigned char* src_addr_y = parms.y_plane; | ||
455 | unsigned char* src_addr_v = parms.v_plane; | ||
456 | unsigned char* src_addr_u = parms.u_plane; | ||
457 | |||
458 | unsigned int dst_picture_size = dst_width*dst_height; | ||
459 | |||
460 | // Sizes for destination | ||
461 | unsigned int dst_dbl_linestride_y = dst_width<<1; | ||
462 | unsigned int dst_dbl_linestride_vu = dst_width>>1; | ||
463 | |||
464 | // Perform address calculation for Y, V and U in main memory with dst_addr as base | ||
465 | unsigned char* dst_addr_main_memory_y = dst_addr; | ||
466 | unsigned char* dst_addr_main_memory_v = dst_addr + dst_picture_size; | ||
467 | unsigned char* dst_addr_main_memory_u = dst_addr_main_memory_v +(dst_picture_size>>2); | ||
468 | |||
469 | |||
470 | // for handling misalignment, addresses are precalculated | ||
471 | unsigned char* precalc_src_addr_v = src_addr_v; | ||
472 | unsigned char* precalc_src_addr_u = src_addr_u; | ||
473 | |||
474 | // calculate scale factors | ||
475 | vector float vf_x_scale = spu_splats( (float)src_width/(float)dst_width ); | ||
476 | float y_scale = (float)src_height/(float)dst_height; | ||
477 | |||
478 | // double buffered processing | ||
479 | // buffer switching | ||
480 | unsigned int curr_src_idx = 0; | ||
481 | unsigned int curr_dst_idx = 0; | ||
482 | unsigned int next_src_idx, next_dst_idx; | ||
483 | |||
484 | // 2 lines y as output, upper and lowerline | ||
485 | unsigned int curr_interpl_y_upper = 0; | ||
486 | unsigned int next_interpl_y_upper; | ||
487 | unsigned int curr_interpl_y_lower, next_interpl_y_lower; | ||
488 | // only 1 line v/u output, both planes have the same dimension | ||
489 | unsigned int curr_interpl_vu = 0; | ||
490 | unsigned int next_interpl_vu; | ||
491 | |||
492 | // weights, calculated in every loop iteration | ||
493 | vector float vf_curr_NSweight_y_upper = { 0.0f, 0.0f, 0.0f, 0.0f }; | ||
494 | vector float vf_next_NSweight_y_upper; | ||
495 | vector float vf_curr_NSweight_y_lower, vf_next_NSweight_y_lower; | ||
496 | vector float vf_curr_NSweight_vu = { 0.0f, 0.0f, 0.0f, 0.0f }; | ||
497 | vector float vf_next_NSweight_vu; | ||
498 | |||
499 | // line indices for the src picture | ||
500 | float curr_src_y_upper = 0.0f, next_src_y_upper; | ||
501 | float curr_src_y_lower, next_src_y_lower; | ||
502 | float curr_src_vu = 0.0f, next_src_vu; | ||
503 | |||
504 | // line indices for the dst picture | ||
505 | unsigned int dst_y=0, dst_vu=0; | ||
506 | |||
507 | // offset for the v and u plane to handle misalignement | ||
508 | unsigned int curr_lsoff_v = 0, next_lsoff_v; | ||
509 | unsigned int curr_lsoff_u = 0, next_lsoff_u; | ||
510 | |||
511 | // calculate lower line idices | ||
512 | curr_src_y_lower = ((float)curr_interpl_y_upper+1)*y_scale; | ||
513 | curr_interpl_y_lower = (unsigned int)curr_src_y_lower; | ||
514 | // lower line weight | ||
515 | vf_curr_NSweight_y_lower = spu_splats( curr_src_y_lower-(float)curr_interpl_y_lower ); | ||
516 | |||
517 | |||
518 | // start partially double buffered processing | ||
519 | // get initial data, 2 sets of y, 1 set v, 1 set u | ||
520 | mfc_get( y_plane[curr_src_idx], (unsigned int) src_addr_y, src_dbl_linestride_y, RETR_BUF, 0, 0 ); | ||
521 | mfc_get( y_plane[curr_src_idx]+src_dbl_linestride_y, | ||
522 | (unsigned int) src_addr_y+(curr_interpl_y_lower*src_linestride_y), | ||
523 | src_dbl_linestride_y, | ||
524 | RETR_BUF, | ||
525 | 0, 0 ); | ||
526 | mfc_get( v_plane[curr_src_idx], (unsigned int) src_addr_v, src_dbl_linestride_vu, RETR_BUF, 0, 0 ); | ||
527 | mfc_get( u_plane[curr_src_idx], (unsigned int) src_addr_u, src_dbl_linestride_vu, RETR_BUF, 0, 0 ); | ||
528 | |||
529 | // iteration loop | ||
530 | // within each iteration 4 lines y, 2 lines v, 2 lines u are retrieved | ||
531 | // the scaled output is 2 lines y, 1 line v, 1 line u | ||
532 | // the yuv2rgb-converted output is stored to RAM | ||
533 | for( dst_vu=0; dst_vu<(dst_height>>1)-1; dst_vu++ ) { | ||
534 | dst_y = dst_vu<<1; | ||
535 | |||
536 | // calculate next indices | ||
537 | next_src_vu = ((float)dst_vu+1)*y_scale; | ||
538 | next_src_y_upper = ((float)dst_y+2)*y_scale; | ||
539 | next_src_y_lower = ((float)dst_y+3)*y_scale; | ||
540 | |||
541 | next_interpl_vu = (unsigned int) next_src_vu; | ||
542 | next_interpl_y_upper = (unsigned int) next_src_y_upper; | ||
543 | next_interpl_y_lower = (unsigned int) next_src_y_lower; | ||
544 | |||
545 | // calculate weight NORTH-SOUTH | ||
546 | vf_next_NSweight_vu = spu_splats( next_src_vu-(float)next_interpl_vu ); | ||
547 | vf_next_NSweight_y_upper = spu_splats( next_src_y_upper-(float)next_interpl_y_upper ); | ||
548 | vf_next_NSweight_y_lower = spu_splats( next_src_y_lower-(float)next_interpl_y_lower ); | ||
549 | |||
550 | // get next lines | ||
551 | next_src_idx = curr_src_idx^1; | ||
552 | next_dst_idx = curr_dst_idx^1; | ||
553 | |||
554 | // 4 lines y | ||
555 | mfc_get( y_plane[next_src_idx], | ||
556 | (unsigned int) src_addr_y+(next_interpl_y_upper*src_linestride_y), | ||
557 | src_dbl_linestride_y, | ||
558 | RETR_BUF+next_src_idx, | ||
559 | 0, 0 ); | ||
560 | mfc_get( y_plane[next_src_idx]+src_dbl_linestride_y, | ||
561 | (unsigned int) src_addr_y+(next_interpl_y_lower*src_linestride_y), | ||
562 | src_dbl_linestride_y, | ||
563 | RETR_BUF+next_src_idx, | ||
564 | 0, 0 ); | ||
565 | |||
566 | // 2 lines v | ||
567 | precalc_src_addr_v = src_addr_v+(next_interpl_vu*src_linestride_vu); | ||
568 | next_lsoff_v = ((unsigned int)precalc_src_addr_v)&0x0F; | ||
569 | mfc_get( v_plane[next_src_idx], | ||
570 | ((unsigned int) precalc_src_addr_v)&0xFFFFFFF0, | ||
571 | src_dbl_linestride_vu+(next_lsoff_v<<1), | ||
572 | RETR_BUF+next_src_idx, | ||
573 | 0, 0 ); | ||
574 | // 2 lines u | ||
575 | precalc_src_addr_u = src_addr_u+(next_interpl_vu*src_linestride_vu); | ||
576 | next_lsoff_u = ((unsigned int)precalc_src_addr_u)&0x0F; | ||
577 | mfc_get( u_plane[next_src_idx], | ||
578 | ((unsigned int) precalc_src_addr_u)&0xFFFFFFF0, | ||
579 | src_dbl_linestride_vu+(next_lsoff_v<<1), | ||
580 | RETR_BUF+next_src_idx, | ||
581 | 0, 0 ); | ||
582 | |||
583 | DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) ); | ||
584 | |||
585 | // scaling | ||
586 | // work line y_upper | ||
587 | bilinear_scale_line_w16( y_plane[curr_src_idx], | ||
588 | scaled_y_plane[curr_src_idx], | ||
589 | dst_width, | ||
590 | vf_x_scale, | ||
591 | vf_curr_NSweight_y_upper, | ||
592 | src_linestride_y ); | ||
593 | // work line y_lower | ||
594 | bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y, | ||
595 | scaled_y_plane[curr_src_idx]+scaled_src_linestride_y, | ||
596 | dst_width, | ||
597 | vf_x_scale, | ||
598 | vf_curr_NSweight_y_lower, | ||
599 | src_linestride_y ); | ||
600 | // work line v | ||
601 | bilinear_scale_line_w8( v_plane[curr_src_idx]+curr_lsoff_v, | ||
602 | scaled_v_plane[curr_src_idx], | ||
603 | dst_width>>1, | ||
604 | vf_x_scale, | ||
605 | vf_curr_NSweight_vu, | ||
606 | src_linestride_vu ); | ||
607 | // work line u | ||
608 | bilinear_scale_line_w8( u_plane[curr_src_idx]+curr_lsoff_u, | ||
609 | scaled_u_plane[curr_src_idx], | ||
610 | dst_width>>1, | ||
611 | vf_x_scale, | ||
612 | vf_curr_NSweight_vu, | ||
613 | src_linestride_vu ); | ||
614 | |||
615 | //--------------------------------------------------------------------------------------------- | ||
616 | DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); | ||
617 | |||
618 | // Perform three DMA transfers to 3 different locations in the main memory! | ||
619 | // dst_width: Pixel width of destination image | ||
620 | // dst_addr: Destination address in main memory | ||
621 | // dst_vu: Counter which is incremented one by one | ||
622 | // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu) | ||
623 | |||
624 | mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr) | ||
625 | (unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr) | ||
626 | dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution) | ||
627 | STR_BUF+curr_dst_idx, // Tag | ||
628 | 0, 0 ); | ||
629 | |||
630 | mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr) | ||
631 | (unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) | ||
632 | dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution) | ||
633 | STR_BUF+curr_dst_idx, // Tag | ||
634 | 0, 0 ); | ||
635 | |||
636 | mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr) | ||
637 | (unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) | ||
638 | dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution) | ||
639 | STR_BUF+curr_dst_idx, // Tag | ||
640 | 0, 0 ); | ||
641 | //--------------------------------------------------------------------------------------------- | ||
642 | |||
643 | |||
644 | // update for next cycle | ||
645 | curr_src_idx = next_src_idx; | ||
646 | curr_dst_idx = next_dst_idx; | ||
647 | |||
648 | curr_interpl_y_upper = next_interpl_y_upper; | ||
649 | curr_interpl_y_lower = next_interpl_y_lower; | ||
650 | curr_interpl_vu = next_interpl_vu; | ||
651 | |||
652 | vf_curr_NSweight_y_upper = vf_curr_NSweight_y_upper; | ||
653 | vf_curr_NSweight_y_lower = vf_curr_NSweight_y_lower; | ||
654 | vf_curr_NSweight_vu = vf_next_NSweight_vu; | ||
655 | |||
656 | curr_src_y_upper = next_src_y_upper; | ||
657 | curr_src_y_lower = next_src_y_lower; | ||
658 | curr_src_vu = next_src_vu; | ||
659 | |||
660 | curr_lsoff_v = next_lsoff_v; | ||
661 | curr_lsoff_u = next_lsoff_u; | ||
662 | } | ||
663 | |||
664 | |||
665 | |||
666 | DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) ); | ||
667 | |||
668 | // scaling | ||
669 | // work line y_upper | ||
670 | bilinear_scale_line_w16( y_plane[curr_src_idx], | ||
671 | scaled_y_plane[curr_src_idx], | ||
672 | dst_width, | ||
673 | vf_x_scale, | ||
674 | vf_curr_NSweight_y_upper, | ||
675 | src_linestride_y ); | ||
676 | // work line y_lower | ||
677 | bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y, | ||
678 | scaled_y_plane[curr_src_idx]+scaled_src_linestride_y, | ||
679 | dst_width, | ||
680 | vf_x_scale, | ||
681 | vf_curr_NSweight_y_lower, | ||
682 | src_linestride_y ); | ||
683 | // work line v | ||
684 | bilinear_scale_line_w8( v_plane[curr_src_idx]+curr_lsoff_v, | ||
685 | scaled_v_plane[curr_src_idx], | ||
686 | dst_width>>1, | ||
687 | vf_x_scale, | ||
688 | vf_curr_NSweight_vu, | ||
689 | src_linestride_vu ); | ||
690 | // work line u | ||
691 | bilinear_scale_line_w8( u_plane[curr_src_idx]+curr_lsoff_u, | ||
692 | scaled_u_plane[curr_src_idx], | ||
693 | dst_width>>1, | ||
694 | vf_x_scale, | ||
695 | vf_curr_NSweight_vu, | ||
696 | src_linestride_vu ); | ||
697 | |||
698 | //--------------------------------------------------------------------------------------------- | ||
699 | DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); | ||
700 | |||
701 | // Perform three DMA transfers to 3 different locations in the main memory! | ||
702 | // dst_width: Pixel width of destination image | ||
703 | // dst_addr: Destination address in main memory | ||
704 | // dst_vu: Counter which is incremented one by one | ||
705 | // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu) | ||
706 | |||
707 | mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr) | ||
708 | (unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr) | ||
709 | dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution) | ||
710 | STR_BUF+curr_dst_idx, // Tag | ||
711 | 0, 0 ); | ||
712 | |||
713 | mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr) | ||
714 | (unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) | ||
715 | dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution) | ||
716 | STR_BUF+curr_dst_idx, // Tag | ||
717 | 0, 0 ); | ||
718 | |||
719 | mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr) | ||
720 | (unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) | ||
721 | dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution) | ||
722 | STR_BUF+curr_dst_idx, // Tag | ||
723 | 0, 0 ); | ||
724 | |||
725 | // wait for completion | ||
726 | DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); | ||
727 | //--------------------------------------------------------------------------------------------- | ||
728 | } | ||
729 | |||
730 | |||
731 | /* | ||
732 | * scale_srcw32_dstw16() | ||
733 | * | ||
734 | * processes an input image of width 32 | ||
735 | * scaling is done to a width 16 | ||
736 | * yuv2rgb conversion on a width of 16 | ||
737 | * result stored in RAM | ||
738 | */ | ||
739 | void scale_srcw32_dstw16() { | ||
740 | // extract parameters | ||
741 | unsigned char* dst_addr = (unsigned char *)parms.dstBuffer; | ||
742 | |||
743 | unsigned int src_width = parms.src_pixel_width; | ||
744 | unsigned int src_height = parms.src_pixel_height; | ||
745 | unsigned int dst_width = parms.dst_pixel_width; | ||
746 | unsigned int dst_height = parms.dst_pixel_height; | ||
747 | |||
748 | // YVU | ||
749 | unsigned int src_linestride_y = src_width; | ||
750 | unsigned int src_dbl_linestride_y = src_width<<1; | ||
751 | unsigned int src_linestride_vu = src_width>>1; | ||
752 | unsigned int src_dbl_linestride_vu = src_width; | ||
753 | // scaled YVU | ||
754 | unsigned int scaled_src_linestride_y = dst_width; | ||
755 | |||
756 | // ram addresses | ||
757 | unsigned char* src_addr_y = parms.y_plane; | ||
758 | unsigned char* src_addr_v = parms.v_plane; | ||
759 | unsigned char* src_addr_u = parms.u_plane; | ||
760 | |||
761 | unsigned int dst_picture_size = dst_width*dst_height; | ||
762 | |||
763 | // Sizes for destination | ||
764 | unsigned int dst_dbl_linestride_y = dst_width<<1; | ||
765 | unsigned int dst_dbl_linestride_vu = dst_width>>1; | ||
766 | |||
767 | // Perform address calculation for Y, V and U in main memory with dst_addr as base | ||
768 | unsigned char* dst_addr_main_memory_y = dst_addr; | ||
769 | unsigned char* dst_addr_main_memory_v = dst_addr + dst_picture_size; | ||
770 | unsigned char* dst_addr_main_memory_u = dst_addr_main_memory_v +(dst_picture_size>>2); | ||
771 | |||
772 | // calculate scale factors | ||
773 | vector float vf_x_scale = spu_splats( (float)src_width/(float)dst_width ); | ||
774 | float y_scale = (float)src_height/(float)dst_height; | ||
775 | |||
776 | // double buffered processing | ||
777 | // buffer switching | ||
778 | unsigned int curr_src_idx = 0; | ||
779 | unsigned int curr_dst_idx = 0; | ||
780 | unsigned int next_src_idx, next_dst_idx; | ||
781 | |||
782 | // 2 lines y as output, upper and lowerline | ||
783 | unsigned int curr_interpl_y_upper = 0; | ||
784 | unsigned int next_interpl_y_upper; | ||
785 | unsigned int curr_interpl_y_lower, next_interpl_y_lower; | ||
786 | // only 1 line v/u output, both planes have the same dimension | ||
787 | unsigned int curr_interpl_vu = 0; | ||
788 | unsigned int next_interpl_vu; | ||
789 | |||
790 | // weights, calculated in every loop iteration | ||
791 | vector float vf_curr_NSweight_y_upper = { 0.0f, 0.0f, 0.0f, 0.0f }; | ||
792 | vector float vf_next_NSweight_y_upper; | ||
793 | vector float vf_curr_NSweight_y_lower, vf_next_NSweight_y_lower; | ||
794 | vector float vf_curr_NSweight_vu = { 0.0f, 0.0f, 0.0f, 0.0f }; | ||
795 | vector float vf_next_NSweight_vu; | ||
796 | |||
797 | // line indices for the src picture | ||
798 | float curr_src_y_upper = 0.0f, next_src_y_upper; | ||
799 | float curr_src_y_lower, next_src_y_lower; | ||
800 | float curr_src_vu = 0.0f, next_src_vu; | ||
801 | |||
802 | // line indices for the dst picture | ||
803 | unsigned int dst_y=0, dst_vu=0; | ||
804 | |||
805 | // calculate lower line idices | ||
806 | curr_src_y_lower = ((float)curr_interpl_y_upper+1)*y_scale; | ||
807 | curr_interpl_y_lower = (unsigned int)curr_src_y_lower; | ||
808 | // lower line weight | ||
809 | vf_curr_NSweight_y_lower = spu_splats( curr_src_y_lower-(float)curr_interpl_y_lower ); | ||
810 | |||
811 | |||
812 | // start partially double buffered processing | ||
813 | // get initial data, 2 sets of y, 1 set v, 1 set u | ||
814 | mfc_get( y_plane[curr_src_idx], (unsigned int) src_addr_y, src_dbl_linestride_y, RETR_BUF, 0, 0 ); | ||
815 | mfc_get( y_plane[curr_src_idx]+src_dbl_linestride_y, | ||
816 | (unsigned int) src_addr_y+(curr_interpl_y_lower*src_linestride_y), | ||
817 | src_dbl_linestride_y, | ||
818 | RETR_BUF, | ||
819 | 0, 0 ); | ||
820 | mfc_get( v_plane[curr_src_idx], (unsigned int) src_addr_v, src_dbl_linestride_vu, RETR_BUF, 0, 0 ); | ||
821 | mfc_get( u_plane[curr_src_idx], (unsigned int) src_addr_u, src_dbl_linestride_vu, RETR_BUF, 0, 0 ); | ||
822 | |||
823 | // iteration loop | ||
824 | // within each iteration 4 lines y, 2 lines v, 2 lines u are retrieved | ||
825 | // the scaled output is 2 lines y, 1 line v, 1 line u | ||
826 | // the yuv2rgb-converted output is stored to RAM | ||
827 | for( dst_vu=0; dst_vu<(dst_height>>1)-1; dst_vu++ ) { | ||
828 | dst_y = dst_vu<<1; | ||
829 | |||
830 | // calculate next indices | ||
831 | next_src_vu = ((float)dst_vu+1)*y_scale; | ||
832 | next_src_y_upper = ((float)dst_y+2)*y_scale; | ||
833 | next_src_y_lower = ((float)dst_y+3)*y_scale; | ||
834 | |||
835 | next_interpl_vu = (unsigned int) next_src_vu; | ||
836 | next_interpl_y_upper = (unsigned int) next_src_y_upper; | ||
837 | next_interpl_y_lower = (unsigned int) next_src_y_lower; | ||
838 | |||
839 | // calculate weight NORTH-SOUTH | ||
840 | vf_next_NSweight_vu = spu_splats( next_src_vu-(float)next_interpl_vu ); | ||
841 | vf_next_NSweight_y_upper = spu_splats( next_src_y_upper-(float)next_interpl_y_upper ); | ||
842 | vf_next_NSweight_y_lower = spu_splats( next_src_y_lower-(float)next_interpl_y_lower ); | ||
843 | |||
844 | // get next lines | ||
845 | next_src_idx = curr_src_idx^1; | ||
846 | next_dst_idx = curr_dst_idx^1; | ||
847 | |||
848 | // 4 lines y | ||
849 | mfc_get( y_plane[next_src_idx], | ||
850 | (unsigned int) src_addr_y+(next_interpl_y_upper*src_linestride_y), | ||
851 | src_dbl_linestride_y, | ||
852 | RETR_BUF+next_src_idx, | ||
853 | 0, 0 ); | ||
854 | mfc_get( y_plane[next_src_idx]+src_dbl_linestride_y, | ||
855 | (unsigned int) src_addr_y+(next_interpl_y_lower*src_linestride_y), | ||
856 | src_dbl_linestride_y, | ||
857 | RETR_BUF+next_src_idx, | ||
858 | 0, 0 ); | ||
859 | |||
860 | // 2 lines v | ||
861 | mfc_get( v_plane[next_src_idx], | ||
862 | (unsigned int) src_addr_v+(next_interpl_vu*src_linestride_vu), | ||
863 | src_dbl_linestride_vu, | ||
864 | RETR_BUF+next_src_idx, | ||
865 | 0, 0 ); | ||
866 | // 2 lines u | ||
867 | mfc_get( u_plane[next_src_idx], | ||
868 | (unsigned int) src_addr_u+(next_interpl_vu*src_linestride_vu), | ||
869 | src_dbl_linestride_vu, | ||
870 | RETR_BUF+next_src_idx, | ||
871 | 0, 0 ); | ||
872 | |||
873 | DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) ); | ||
874 | |||
875 | // scaling | ||
876 | // work line y_upper | ||
877 | bilinear_scale_line_w16( y_plane[curr_src_idx], | ||
878 | scaled_y_plane[curr_src_idx], | ||
879 | dst_width, | ||
880 | vf_x_scale, | ||
881 | vf_curr_NSweight_y_upper, | ||
882 | src_linestride_y ); | ||
883 | // work line y_lower | ||
884 | bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y, | ||
885 | scaled_y_plane[curr_src_idx]+scaled_src_linestride_y, | ||
886 | dst_width, | ||
887 | vf_x_scale, | ||
888 | vf_curr_NSweight_y_lower, | ||
889 | src_linestride_y ); | ||
890 | // work line v | ||
891 | bilinear_scale_line_w16( v_plane[curr_src_idx], | ||
892 | scaled_v_plane[curr_src_idx], | ||
893 | dst_width>>1, | ||
894 | vf_x_scale, | ||
895 | vf_curr_NSweight_vu, | ||
896 | src_linestride_vu ); | ||
897 | // work line u | ||
898 | bilinear_scale_line_w16( u_plane[curr_src_idx], | ||
899 | scaled_u_plane[curr_src_idx], | ||
900 | dst_width>>1, | ||
901 | vf_x_scale, | ||
902 | vf_curr_NSweight_vu, | ||
903 | src_linestride_vu ); | ||
904 | |||
905 | //--------------------------------------------------------------------------------------------- | ||
906 | DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); | ||
907 | |||
908 | // Perform three DMA transfers to 3 different locations in the main memory! | ||
909 | // dst_width: Pixel width of destination image | ||
910 | // dst_addr: Destination address in main memory | ||
911 | // dst_vu: Counter which is incremented one by one | ||
912 | // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu) | ||
913 | |||
914 | mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr) | ||
915 | (unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr) | ||
916 | dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution) | ||
917 | STR_BUF+curr_dst_idx, // Tag | ||
918 | 0, 0 ); | ||
919 | |||
920 | mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr) | ||
921 | (unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) | ||
922 | dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution) | ||
923 | STR_BUF+curr_dst_idx, // Tag | ||
924 | 0, 0 ); | ||
925 | |||
926 | mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr) | ||
927 | (unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) | ||
928 | dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution) | ||
929 | STR_BUF+curr_dst_idx, // Tag | ||
930 | 0, 0 ); | ||
931 | //--------------------------------------------------------------------------------------------- | ||
932 | |||
933 | |||
934 | // update for next cycle | ||
935 | curr_src_idx = next_src_idx; | ||
936 | curr_dst_idx = next_dst_idx; | ||
937 | |||
938 | curr_interpl_y_upper = next_interpl_y_upper; | ||
939 | curr_interpl_y_lower = next_interpl_y_lower; | ||
940 | curr_interpl_vu = next_interpl_vu; | ||
941 | |||
942 | vf_curr_NSweight_y_upper = vf_curr_NSweight_y_upper; | ||
943 | vf_curr_NSweight_y_lower = vf_curr_NSweight_y_lower; | ||
944 | vf_curr_NSweight_vu = vf_next_NSweight_vu; | ||
945 | |||
946 | curr_src_y_upper = next_src_y_upper; | ||
947 | curr_src_y_lower = next_src_y_lower; | ||
948 | curr_src_vu = next_src_vu; | ||
949 | } | ||
950 | |||
951 | |||
952 | |||
953 | DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) ); | ||
954 | |||
955 | // scaling | ||
956 | // work line y_upper | ||
957 | bilinear_scale_line_w16( y_plane[curr_src_idx], | ||
958 | scaled_y_plane[curr_src_idx], | ||
959 | dst_width, | ||
960 | vf_x_scale, | ||
961 | vf_curr_NSweight_y_upper, | ||
962 | src_linestride_y ); | ||
963 | // work line y_lower | ||
964 | bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y, | ||
965 | scaled_y_plane[curr_src_idx]+scaled_src_linestride_y, | ||
966 | dst_width, | ||
967 | vf_x_scale, | ||
968 | vf_curr_NSweight_y_lower, | ||
969 | src_linestride_y ); | ||
970 | // work line v | ||
971 | bilinear_scale_line_w16( v_plane[curr_src_idx], | ||
972 | scaled_v_plane[curr_src_idx], | ||
973 | dst_width>>1, | ||
974 | vf_x_scale, | ||
975 | vf_curr_NSweight_vu, | ||
976 | src_linestride_vu ); | ||
977 | // work line u | ||
978 | bilinear_scale_line_w16( u_plane[curr_src_idx], | ||
979 | scaled_u_plane[curr_src_idx], | ||
980 | dst_width>>1, | ||
981 | vf_x_scale, | ||
982 | vf_curr_NSweight_vu, | ||
983 | src_linestride_vu ); | ||
984 | |||
985 | |||
986 | //--------------------------------------------------------------------------------------------- | ||
987 | DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); | ||
988 | |||
989 | // Perform three DMA transfers to 3 different locations in the main memory! | ||
990 | // dst_width: Pixel width of destination image | ||
991 | // dst_addr: Destination address in main memory | ||
992 | // dst_vu: Counter which is incremented one by one | ||
993 | // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu) | ||
994 | |||
995 | mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr) | ||
996 | (unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr) | ||
997 | dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution) | ||
998 | STR_BUF+curr_dst_idx, // Tag | ||
999 | 0, 0 ); | ||
1000 | |||
1001 | mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr) | ||
1002 | (unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) | ||
1003 | dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution) | ||
1004 | STR_BUF+curr_dst_idx, // Tag | ||
1005 | 0, 0 ); | ||
1006 | |||
1007 | mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr) | ||
1008 | (unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) | ||
1009 | dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution) | ||
1010 | STR_BUF+curr_dst_idx, // Tag | ||
1011 | 0, 0 ); | ||
1012 | |||
1013 | // wait for completion | ||
1014 | DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); | ||
1015 | //--------------------------------------------------------------------------------------------- | ||
1016 | } | ||
1017 | |||
1018 | |||
1019 | /** | ||
1020 | * scale_srcw32_dstw32() | ||
1021 | * | ||
1022 | * processes an input image of width 32 | ||
1023 | * scaling is done to a width 32 | ||
1024 | * yuv2rgb conversion on a width of 32 | ||
1025 | * result stored in RAM | ||
1026 | */ | ||
1027 | void scale_srcw32_dstw32() { | ||
1028 | // extract parameters | ||
1029 | unsigned char* dst_addr = (unsigned char *)parms.dstBuffer; | ||
1030 | |||
1031 | unsigned int src_width = parms.src_pixel_width; | ||
1032 | unsigned int src_height = parms.src_pixel_height; | ||
1033 | unsigned int dst_width = parms.dst_pixel_width; | ||
1034 | unsigned int dst_height = parms.dst_pixel_height; | ||
1035 | |||
1036 | // YVU | ||
1037 | unsigned int src_linestride_y = src_width; | ||
1038 | unsigned int src_dbl_linestride_y = src_width<<1; | ||
1039 | unsigned int src_linestride_vu = src_width>>1; | ||
1040 | unsigned int src_dbl_linestride_vu = src_width; | ||
1041 | |||
1042 | // scaled YVU | ||
1043 | unsigned int scaled_src_linestride_y = dst_width; | ||
1044 | |||
1045 | // ram addresses | ||
1046 | unsigned char* src_addr_y = parms.y_plane; | ||
1047 | unsigned char* src_addr_v = parms.v_plane; | ||
1048 | unsigned char* src_addr_u = parms.u_plane; | ||
1049 | |||
1050 | unsigned int dst_picture_size = dst_width*dst_height; | ||
1051 | |||
1052 | // Sizes for destination | ||
1053 | unsigned int dst_dbl_linestride_y = dst_width<<1; | ||
1054 | unsigned int dst_dbl_linestride_vu = dst_width>>1; | ||
1055 | |||
1056 | // Perform address calculation for Y, V and U in main memory with dst_addr as base | ||
1057 | unsigned char* dst_addr_main_memory_y = dst_addr; | ||
1058 | unsigned char* dst_addr_main_memory_v = dst_addr + dst_picture_size; | ||
1059 | unsigned char* dst_addr_main_memory_u = dst_addr_main_memory_v +(dst_picture_size>>2); | ||
1060 | |||
1061 | // calculate scale factors | ||
1062 | vector float vf_x_scale = spu_splats( (float)src_width/(float)dst_width ); | ||
1063 | float y_scale = (float)src_height/(float)dst_height; | ||
1064 | |||
1065 | // double buffered processing | ||
1066 | // buffer switching | ||
1067 | unsigned int curr_src_idx = 0; | ||
1068 | unsigned int curr_dst_idx = 0; | ||
1069 | unsigned int next_src_idx, next_dst_idx; | ||
1070 | |||
1071 | // 2 lines y as output, upper and lowerline | ||
1072 | unsigned int curr_interpl_y_upper = 0; | ||
1073 | unsigned int next_interpl_y_upper; | ||
1074 | unsigned int curr_interpl_y_lower, next_interpl_y_lower; | ||
1075 | // only 1 line v/u output, both planes have the same dimension | ||
1076 | unsigned int curr_interpl_vu = 0; | ||
1077 | unsigned int next_interpl_vu; | ||
1078 | |||
1079 | // weights, calculated in every loop iteration | ||
1080 | vector float vf_curr_NSweight_y_upper = { 0.0f, 0.0f, 0.0f, 0.0f }; | ||
1081 | vector float vf_next_NSweight_y_upper; | ||
1082 | vector float vf_curr_NSweight_y_lower, vf_next_NSweight_y_lower; | ||
1083 | vector float vf_curr_NSweight_vu = { 0.0f, 0.0f, 0.0f, 0.0f }; | ||
1084 | vector float vf_next_NSweight_vu; | ||
1085 | |||
1086 | // line indices for the src picture | ||
1087 | float curr_src_y_upper = 0.0f, next_src_y_upper; | ||
1088 | float curr_src_y_lower, next_src_y_lower; | ||
1089 | float curr_src_vu = 0.0f, next_src_vu; | ||
1090 | |||
1091 | // line indices for the dst picture | ||
1092 | unsigned int dst_y=0, dst_vu=0; | ||
1093 | |||
1094 | // calculate lower line idices | ||
1095 | curr_src_y_lower = ((float)curr_interpl_y_upper+1)*y_scale; | ||
1096 | curr_interpl_y_lower = (unsigned int)curr_src_y_lower; | ||
1097 | // lower line weight | ||
1098 | vf_curr_NSweight_y_lower = spu_splats( curr_src_y_lower-(float)curr_interpl_y_lower ); | ||
1099 | |||
1100 | |||
1101 | // start partially double buffered processing | ||
1102 | // get initial data, 2 sets of y, 1 set v, 1 set u | ||
1103 | mfc_get( y_plane[curr_src_idx], (unsigned int) src_addr_y, src_dbl_linestride_y, RETR_BUF, 0, 0 ); | ||
1104 | mfc_get( y_plane[curr_src_idx]+src_dbl_linestride_y, | ||
1105 | (unsigned int) src_addr_y+(curr_interpl_y_lower*src_linestride_y), | ||
1106 | src_dbl_linestride_y, | ||
1107 | RETR_BUF, | ||
1108 | 0, 0 ); | ||
1109 | mfc_get( v_plane[curr_src_idx], (unsigned int) src_addr_v, src_dbl_linestride_vu, RETR_BUF, 0, 0 ); | ||
1110 | mfc_get( u_plane[curr_src_idx], (unsigned int) src_addr_u, src_dbl_linestride_vu, RETR_BUF, 0, 0 ); | ||
1111 | |||
1112 | // iteration loop | ||
1113 | // within each iteration 4 lines y, 2 lines v, 2 lines u are retrieved | ||
1114 | // the scaled output is 2 lines y, 1 line v, 1 line u | ||
1115 | // the yuv2rgb-converted output is stored to RAM | ||
1116 | for( dst_vu=0; dst_vu<(dst_height>>1)-1; dst_vu++ ) { | ||
1117 | dst_y = dst_vu<<1; | ||
1118 | |||
1119 | // calculate next indices | ||
1120 | next_src_vu = ((float)dst_vu+1)*y_scale; | ||
1121 | next_src_y_upper = ((float)dst_y+2)*y_scale; | ||
1122 | next_src_y_lower = ((float)dst_y+3)*y_scale; | ||
1123 | |||
1124 | next_interpl_vu = (unsigned int) next_src_vu; | ||
1125 | next_interpl_y_upper = (unsigned int) next_src_y_upper; | ||
1126 | next_interpl_y_lower = (unsigned int) next_src_y_lower; | ||
1127 | |||
1128 | // calculate weight NORTH-SOUTH | ||
1129 | vf_next_NSweight_vu = spu_splats( next_src_vu-(float)next_interpl_vu ); | ||
1130 | vf_next_NSweight_y_upper = spu_splats( next_src_y_upper-(float)next_interpl_y_upper ); | ||
1131 | vf_next_NSweight_y_lower = spu_splats( next_src_y_lower-(float)next_interpl_y_lower ); | ||
1132 | |||
1133 | // get next lines | ||
1134 | next_src_idx = curr_src_idx^1; | ||
1135 | next_dst_idx = curr_dst_idx^1; | ||
1136 | |||
1137 | // 4 lines y | ||
1138 | mfc_get( y_plane[next_src_idx], | ||
1139 | (unsigned int) src_addr_y+(next_interpl_y_upper*src_linestride_y), | ||
1140 | src_dbl_linestride_y, | ||
1141 | RETR_BUF+next_src_idx, | ||
1142 | 0, 0 ); | ||
1143 | mfc_get( y_plane[next_src_idx]+src_dbl_linestride_y, | ||
1144 | (unsigned int) src_addr_y+(next_interpl_y_lower*src_linestride_y), | ||
1145 | src_dbl_linestride_y, | ||
1146 | RETR_BUF+next_src_idx, | ||
1147 | 0, 0 ); | ||
1148 | |||
1149 | // 2 lines v | ||
1150 | mfc_get( v_plane[next_src_idx], | ||
1151 | (unsigned int) src_addr_v+(next_interpl_vu*src_linestride_vu), | ||
1152 | src_dbl_linestride_vu, | ||
1153 | RETR_BUF+next_src_idx, | ||
1154 | 0, 0 ); | ||
1155 | // 2 lines u | ||
1156 | mfc_get( u_plane[next_src_idx], | ||
1157 | (unsigned int) src_addr_u+(next_interpl_vu*src_linestride_vu), | ||
1158 | src_dbl_linestride_vu, | ||
1159 | RETR_BUF+next_src_idx, | ||
1160 | 0, 0 ); | ||
1161 | |||
1162 | DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) ); | ||
1163 | |||
1164 | // scaling | ||
1165 | // work line y_upper | ||
1166 | bilinear_scale_line_w16( y_plane[curr_src_idx], | ||
1167 | scaled_y_plane[curr_src_idx], | ||
1168 | dst_width, | ||
1169 | vf_x_scale, | ||
1170 | vf_curr_NSweight_y_upper, | ||
1171 | src_linestride_y ); | ||
1172 | // work line y_lower | ||
1173 | bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y, | ||
1174 | scaled_y_plane[curr_src_idx]+scaled_src_linestride_y, | ||
1175 | dst_width, | ||
1176 | vf_x_scale, | ||
1177 | vf_curr_NSweight_y_lower, | ||
1178 | src_linestride_y ); | ||
1179 | // work line v | ||
1180 | bilinear_scale_line_w16( v_plane[curr_src_idx], | ||
1181 | scaled_v_plane[curr_src_idx], | ||
1182 | dst_width>>1, | ||
1183 | vf_x_scale, | ||
1184 | vf_curr_NSweight_vu, | ||
1185 | src_linestride_vu ); | ||
1186 | // work line u | ||
1187 | bilinear_scale_line_w16( u_plane[curr_src_idx], | ||
1188 | scaled_u_plane[curr_src_idx], | ||
1189 | dst_width>>1, | ||
1190 | vf_x_scale, | ||
1191 | vf_curr_NSweight_vu, | ||
1192 | src_linestride_vu ); | ||
1193 | |||
1194 | |||
1195 | |||
1196 | // Store the result back to main memory into a destination buffer in YUV format | ||
1197 | //--------------------------------------------------------------------------------------------- | ||
1198 | DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); | ||
1199 | |||
1200 | // Perform three DMA transfers to 3 different locations in the main memory! | ||
1201 | // dst_width: Pixel width of destination image | ||
1202 | // dst_addr: Destination address in main memory | ||
1203 | // dst_vu: Counter which is incremented one by one | ||
1204 | // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu) | ||
1205 | |||
1206 | mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr) | ||
1207 | (unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr) | ||
1208 | dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution) | ||
1209 | STR_BUF+curr_dst_idx, // Tag | ||
1210 | 0, 0 ); | ||
1211 | |||
1212 | mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr) | ||
1213 | (unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) | ||
1214 | dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution) | ||
1215 | STR_BUF+curr_dst_idx, // Tag | ||
1216 | 0, 0 ); | ||
1217 | |||
1218 | mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr) | ||
1219 | (unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) | ||
1220 | dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution) | ||
1221 | STR_BUF+curr_dst_idx, // Tag | ||
1222 | 0, 0 ); | ||
1223 | //--------------------------------------------------------------------------------------------- | ||
1224 | |||
1225 | |||
1226 | // update for next cycle | ||
1227 | curr_src_idx = next_src_idx; | ||
1228 | curr_dst_idx = next_dst_idx; | ||
1229 | |||
1230 | curr_interpl_y_upper = next_interpl_y_upper; | ||
1231 | curr_interpl_y_lower = next_interpl_y_lower; | ||
1232 | curr_interpl_vu = next_interpl_vu; | ||
1233 | |||
1234 | vf_curr_NSweight_y_upper = vf_curr_NSweight_y_upper; | ||
1235 | vf_curr_NSweight_y_lower = vf_curr_NSweight_y_lower; | ||
1236 | vf_curr_NSweight_vu = vf_next_NSweight_vu; | ||
1237 | |||
1238 | curr_src_y_upper = next_src_y_upper; | ||
1239 | curr_src_y_lower = next_src_y_lower; | ||
1240 | curr_src_vu = next_src_vu; | ||
1241 | } | ||
1242 | |||
1243 | |||
1244 | |||
1245 | DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) ); | ||
1246 | |||
1247 | // scaling | ||
1248 | // work line y_upper | ||
1249 | bilinear_scale_line_w16( y_plane[curr_src_idx], | ||
1250 | scaled_y_plane[curr_src_idx], | ||
1251 | dst_width, | ||
1252 | vf_x_scale, | ||
1253 | vf_curr_NSweight_y_upper, | ||
1254 | src_linestride_y ); | ||
1255 | // work line y_lower | ||
1256 | bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y, | ||
1257 | scaled_y_plane[curr_src_idx]+scaled_src_linestride_y, | ||
1258 | dst_width, | ||
1259 | vf_x_scale, | ||
1260 | vf_curr_NSweight_y_lower, | ||
1261 | src_linestride_y ); | ||
1262 | // work line v | ||
1263 | bilinear_scale_line_w16( v_plane[curr_src_idx], | ||
1264 | scaled_v_plane[curr_src_idx], | ||
1265 | dst_width>>1, | ||
1266 | vf_x_scale, | ||
1267 | vf_curr_NSweight_vu, | ||
1268 | src_linestride_vu ); | ||
1269 | // work line u | ||
1270 | bilinear_scale_line_w16( u_plane[curr_src_idx], | ||
1271 | scaled_u_plane[curr_src_idx], | ||
1272 | dst_width>>1, | ||
1273 | vf_x_scale, | ||
1274 | vf_curr_NSweight_vu, | ||
1275 | src_linestride_vu ); | ||
1276 | |||
1277 | |||
1278 | // Store the result back to main memory into a destination buffer in YUV format | ||
1279 | //--------------------------------------------------------------------------------------------- | ||
1280 | DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); | ||
1281 | |||
1282 | // Perform three DMA transfers to 3 different locations in the main memory! | ||
1283 | // dst_width: Pixel width of destination image | ||
1284 | // dst_addr: Destination address in main memory | ||
1285 | // dst_vu: Counter which is incremented one by one | ||
1286 | // dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu) | ||
1287 | |||
1288 | mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr) | ||
1289 | (unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr) | ||
1290 | dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution) | ||
1291 | STR_BUF+curr_dst_idx, // Tag | ||
1292 | 0, 0 ); | ||
1293 | |||
1294 | mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr) | ||
1295 | (unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) | ||
1296 | dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution) | ||
1297 | STR_BUF+curr_dst_idx, // Tag | ||
1298 | 0, 0 ); | ||
1299 | |||
1300 | mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr) | ||
1301 | (unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr) | ||
1302 | dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution) | ||
1303 | STR_BUF+curr_dst_idx, // Tag | ||
1304 | 0, 0 ); | ||
1305 | |||
1306 | // wait for completion | ||
1307 | DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) ); | ||
1308 | //--------------------------------------------------------------------------------------------- | ||
1309 | } | ||
1310 | |||
1311 | |||
1312 | /* | ||
1313 | * bilinear_scale_line_w8() | ||
1314 | * | ||
1315 | * processes a line of yuv-input, width has to be a multiple of 8 | ||
1316 | * scaled yuv-output is written to local store buffer | ||
1317 | * | ||
1318 | * @param src buffer for 2 lines input | ||
1319 | * @param dst_ buffer for 1 line output | ||
1320 | * @param dst_width the width of the destination line | ||
1321 | * @param vf_x_scale a float vector, at each entry is the x_scale-factor | ||
1322 | * @param vf_NSweight a float vector, at each position is the weight NORTH/SOUTH for the current line | ||
1323 | * @param src_linestride the stride of the srcline | ||
1324 | */ | ||
1325 | void bilinear_scale_line_w8( unsigned char* src, unsigned char* dst_, unsigned int dst_width, vector float vf_x_scale, vector float vf_NSweight, unsigned int src_linestride ) { | ||
1326 | |||
1327 | unsigned char* dst = dst_; | ||
1328 | |||
1329 | unsigned int dst_x; | ||
1330 | for( dst_x=0; dst_x<dst_width; dst_x+=8) { | ||
1331 | // address calculation for loading the 4 surrounding pixel of each calculated | ||
1332 | // destination pixel | ||
1333 | vector unsigned int vui_dst_x_tmp = spu_splats( dst_x ); | ||
1334 | // lower range->first 4 pixel | ||
1335 | // upper range->next 4 pixel | ||
1336 | vector unsigned int vui_inc_dst_x_lower_range = { 0, 1, 2, 3 }; | ||
1337 | vector unsigned int vui_inc_dst_x_upper_range = { 4, 5, 6, 7 }; | ||
1338 | vector unsigned int vui_dst_x_lower_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_lower_range ); | ||
1339 | vector unsigned int vui_dst_x_upper_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_upper_range ); | ||
1340 | |||
1341 | // calculate weight EAST-WEST | ||
1342 | vector float vf_dst_x_lower_range = spu_convtf( vui_dst_x_lower_range, 0 ); | ||
1343 | vector float vf_dst_x_upper_range = spu_convtf( vui_dst_x_upper_range, 0 ); | ||
1344 | vector float vf_src_x_lower_range = spu_mul( vf_dst_x_lower_range, vf_x_scale ); | ||
1345 | vector float vf_src_x_upper_range = spu_mul( vf_dst_x_upper_range, vf_x_scale ); | ||
1346 | vector unsigned int vui_interpl_x_lower_range = spu_convtu( vf_src_x_lower_range, 0 ); | ||
1347 | vector unsigned int vui_interpl_x_upper_range = spu_convtu( vf_src_x_upper_range, 0 ); | ||
1348 | vector float vf_interpl_x_lower_range = spu_convtf( vui_interpl_x_lower_range, 0 ); | ||
1349 | vector float vf_interpl_x_upper_range = spu_convtf( vui_interpl_x_upper_range, 0 ); | ||
1350 | vector float vf_EWweight_lower_range = spu_sub( vf_src_x_lower_range, vf_interpl_x_lower_range ); | ||
1351 | vector float vf_EWweight_upper_range = spu_sub( vf_src_x_upper_range, vf_interpl_x_upper_range ); | ||
1352 | |||
1353 | // calculate address offset | ||
1354 | // | ||
1355 | // pixel NORTH WEST | ||
1356 | vector unsigned int vui_off_pixelNW_lower_range = vui_interpl_x_lower_range; | ||
1357 | vector unsigned int vui_off_pixelNW_upper_range = vui_interpl_x_upper_range; | ||
1358 | |||
1359 | // pixel NORTH EAST-->(offpixelNW+1) | ||
1360 | vector unsigned int vui_add_1 = { 1, 1, 1, 1 }; | ||
1361 | vector unsigned int vui_off_pixelNE_lower_range = spu_add( vui_off_pixelNW_lower_range, vui_add_1 ); | ||
1362 | vector unsigned int vui_off_pixelNE_upper_range = spu_add( vui_off_pixelNW_upper_range, vui_add_1 ); | ||
1363 | |||
1364 | // SOUTH-WEST-->(offpixelNW+src_linestride) | ||
1365 | vector unsigned int vui_srclinestride = spu_splats( src_linestride ); | ||
1366 | vector unsigned int vui_off_pixelSW_lower_range = spu_add( vui_srclinestride, vui_off_pixelNW_lower_range ); | ||
1367 | vector unsigned int vui_off_pixelSW_upper_range = spu_add( vui_srclinestride, vui_off_pixelNW_upper_range ); | ||
1368 | |||
1369 | // SOUTH-EAST-->(offpixelNW+src_linestride+1) | ||
1370 | vector unsigned int vui_off_pixelSE_lower_range = spu_add( vui_srclinestride, vui_off_pixelNE_lower_range ); | ||
1371 | vector unsigned int vui_off_pixelSE_upper_range = spu_add( vui_srclinestride, vui_off_pixelNE_upper_range ); | ||
1372 | |||
1373 | // calculate each address | ||
1374 | vector unsigned int vui_src_ls = spu_splats( (unsigned int) src ); | ||
1375 | vector unsigned int vui_addr_pixelNW_lower_range = spu_add( vui_src_ls, vui_off_pixelNW_lower_range ); | ||
1376 | vector unsigned int vui_addr_pixelNW_upper_range = spu_add( vui_src_ls, vui_off_pixelNW_upper_range ); | ||
1377 | vector unsigned int vui_addr_pixelNE_lower_range = spu_add( vui_src_ls, vui_off_pixelNE_lower_range ); | ||
1378 | vector unsigned int vui_addr_pixelNE_upper_range = spu_add( vui_src_ls, vui_off_pixelNE_upper_range ); | ||
1379 | |||
1380 | vector unsigned int vui_addr_pixelSW_lower_range = spu_add( vui_src_ls, vui_off_pixelSW_lower_range ); | ||
1381 | vector unsigned int vui_addr_pixelSW_upper_range = spu_add( vui_src_ls, vui_off_pixelSW_upper_range ); | ||
1382 | vector unsigned int vui_addr_pixelSE_lower_range = spu_add( vui_src_ls, vui_off_pixelSE_lower_range ); | ||
1383 | vector unsigned int vui_addr_pixelSE_upper_range = spu_add( vui_src_ls, vui_off_pixelSE_upper_range ); | ||
1384 | |||
1385 | // get each pixel | ||
1386 | // | ||
1387 | // scalar load, afterwards insertion into the right position | ||
1388 | // NORTH WEST | ||
1389 | vector unsigned char null_vector = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | ||
1390 | vector unsigned char vuc_pixel_NW_lower_range = spu_insert( | ||
1391 | *((unsigned char*) spu_extract( vui_addr_pixelNW_lower_range, 0 )), null_vector, 3 ); | ||
1392 | vuc_pixel_NW_lower_range = spu_insert( | ||
1393 | *((unsigned char*) spu_extract( vui_addr_pixelNW_lower_range, 1 )), | ||
1394 | vuc_pixel_NW_lower_range, 7 ); | ||
1395 | vuc_pixel_NW_lower_range = spu_insert( | ||
1396 | *((unsigned char*) spu_extract( vui_addr_pixelNW_lower_range, 2 )), | ||
1397 | vuc_pixel_NW_lower_range, 11 ); | ||
1398 | vuc_pixel_NW_lower_range = spu_insert( | ||
1399 | *((unsigned char*) spu_extract( vui_addr_pixelNW_lower_range, 3 )), | ||
1400 | vuc_pixel_NW_lower_range, 15 ); | ||
1401 | |||
1402 | vector unsigned char vuc_pixel_NW_upper_range = spu_insert( | ||
1403 | *((unsigned char*) spu_extract( vui_addr_pixelNW_upper_range, 0 )), null_vector, 3 ); | ||
1404 | vuc_pixel_NW_upper_range = spu_insert( | ||
1405 | *((unsigned char*) spu_extract( vui_addr_pixelNW_upper_range, 1 )), | ||
1406 | vuc_pixel_NW_upper_range, 7 ); | ||
1407 | vuc_pixel_NW_upper_range = spu_insert( | ||
1408 | *((unsigned char*) spu_extract( vui_addr_pixelNW_upper_range, 2 )), | ||
1409 | vuc_pixel_NW_upper_range, 11 ); | ||
1410 | vuc_pixel_NW_upper_range = spu_insert( | ||
1411 | *((unsigned char*) spu_extract( vui_addr_pixelNW_upper_range, 3 )), | ||
1412 | vuc_pixel_NW_upper_range, 15 ); | ||
1413 | |||
1414 | // NORTH EAST | ||
1415 | vector unsigned char vuc_pixel_NE_lower_range = spu_insert( | ||
1416 | *((unsigned char*) spu_extract( vui_addr_pixelNE_lower_range, 0 )), null_vector, 3 ); | ||
1417 | vuc_pixel_NE_lower_range = spu_insert( | ||
1418 | *((unsigned char*) spu_extract( vui_addr_pixelNE_lower_range, 1 )), | ||
1419 | vuc_pixel_NE_lower_range, 7 ); | ||
1420 | vuc_pixel_NE_lower_range = spu_insert( | ||
1421 | *((unsigned char*) spu_extract( vui_addr_pixelNE_lower_range, 2 )), | ||
1422 | vuc_pixel_NE_lower_range, 11 ); | ||
1423 | vuc_pixel_NE_lower_range = spu_insert( | ||
1424 | *((unsigned char*) spu_extract( vui_addr_pixelNE_lower_range, 3 )), | ||
1425 | vuc_pixel_NE_lower_range, 15 ); | ||
1426 | |||
1427 | vector unsigned char vuc_pixel_NE_upper_range = spu_insert( | ||
1428 | *((unsigned char*) spu_extract( vui_addr_pixelNE_upper_range, 0 )), null_vector, 3 ); | ||
1429 | vuc_pixel_NE_upper_range = spu_insert( | ||
1430 | *((unsigned char*) spu_extract( vui_addr_pixelNE_upper_range, 1 )), | ||
1431 | vuc_pixel_NE_upper_range, 7 ); | ||
1432 | vuc_pixel_NE_upper_range = spu_insert( | ||
1433 | *((unsigned char*) spu_extract( vui_addr_pixelNE_upper_range, 2 )), | ||
1434 | vuc_pixel_NE_upper_range, 11 ); | ||
1435 | vuc_pixel_NE_upper_range = spu_insert( | ||
1436 | *((unsigned char*) spu_extract( vui_addr_pixelNE_upper_range, 3 )), | ||
1437 | vuc_pixel_NE_upper_range, 15 ); | ||
1438 | |||
1439 | |||
1440 | // SOUTH WEST | ||
1441 | vector unsigned char vuc_pixel_SW_lower_range = spu_insert( | ||
1442 | *((unsigned char*) spu_extract( vui_addr_pixelSW_lower_range, 0 )), null_vector, 3 ); | ||
1443 | vuc_pixel_SW_lower_range = spu_insert( | ||
1444 | *((unsigned char*) spu_extract( vui_addr_pixelSW_lower_range, 1 )), | ||
1445 | vuc_pixel_SW_lower_range, 7 ); | ||
1446 | vuc_pixel_SW_lower_range = spu_insert( | ||
1447 | *((unsigned char*) spu_extract( vui_addr_pixelSW_lower_range, 2 )), | ||
1448 | vuc_pixel_SW_lower_range, 11 ); | ||
1449 | vuc_pixel_SW_lower_range = spu_insert( | ||
1450 | *((unsigned char*) spu_extract( vui_addr_pixelSW_lower_range, 3 )), | ||
1451 | vuc_pixel_SW_lower_range, 15 ); | ||
1452 | |||
1453 | vector unsigned char vuc_pixel_SW_upper_range = spu_insert( | ||
1454 | *((unsigned char*) spu_extract( vui_addr_pixelSW_upper_range, 0 )), null_vector, 3 ); | ||
1455 | vuc_pixel_SW_upper_range = spu_insert( | ||
1456 | *((unsigned char*) spu_extract( vui_addr_pixelSW_upper_range, 1 )), | ||
1457 | vuc_pixel_SW_upper_range, 7 ); | ||
1458 | vuc_pixel_SW_upper_range = spu_insert( | ||
1459 | *((unsigned char*) spu_extract( vui_addr_pixelSW_upper_range, 2 )), | ||
1460 | vuc_pixel_SW_upper_range, 11 ); | ||
1461 | vuc_pixel_SW_upper_range = spu_insert( | ||
1462 | *((unsigned char*) spu_extract( vui_addr_pixelSW_upper_range, 3 )), | ||
1463 | vuc_pixel_SW_upper_range, 15 ); | ||
1464 | |||
1465 | // SOUTH EAST | ||
1466 | vector unsigned char vuc_pixel_SE_lower_range = spu_insert( | ||
1467 | *((unsigned char*) spu_extract( vui_addr_pixelSE_lower_range, 0 )), null_vector, 3 ); | ||
1468 | vuc_pixel_SE_lower_range = spu_insert( | ||
1469 | *((unsigned char*) spu_extract( vui_addr_pixelSE_lower_range, 1 )), | ||
1470 | vuc_pixel_SE_lower_range, 7 ); | ||
1471 | vuc_pixel_SE_lower_range = spu_insert( | ||
1472 | *((unsigned char*) spu_extract( vui_addr_pixelSE_lower_range, 2 )), | ||
1473 | vuc_pixel_SE_lower_range, 11 ); | ||
1474 | vuc_pixel_SE_lower_range = spu_insert( | ||
1475 | *((unsigned char*) spu_extract( vui_addr_pixelSE_lower_range, 3 )), | ||
1476 | vuc_pixel_SE_lower_range, 15 ); | ||
1477 | |||
1478 | vector unsigned char vuc_pixel_SE_upper_range = spu_insert( | ||
1479 | *((unsigned char*) spu_extract( vui_addr_pixelSE_upper_range, 0 )), null_vector, 3 ); | ||
1480 | vuc_pixel_SE_upper_range = spu_insert( | ||
1481 | *((unsigned char*) spu_extract( vui_addr_pixelSE_upper_range, 1 )), | ||
1482 | vuc_pixel_SE_upper_range, 7 ); | ||
1483 | vuc_pixel_SE_upper_range = spu_insert( | ||
1484 | *((unsigned char*) spu_extract( vui_addr_pixelSE_upper_range, 2 )), | ||
1485 | vuc_pixel_SE_upper_range, 11 ); | ||
1486 | vuc_pixel_SE_upper_range = spu_insert( | ||
1487 | *((unsigned char*) spu_extract( vui_addr_pixelSE_upper_range, 3 )), | ||
1488 | vuc_pixel_SE_upper_range, 15 ); | ||
1489 | |||
1490 | |||
1491 | // convert to float | ||
1492 | vector float vf_pixel_NW_lower_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_lower_range, 0 ); | ||
1493 | vector float vf_pixel_NW_upper_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_upper_range, 0 ); | ||
1494 | |||
1495 | vector float vf_pixel_SW_lower_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_lower_range, 0 ); | ||
1496 | vector float vf_pixel_SW_upper_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_upper_range, 0 ); | ||
1497 | |||
1498 | vector float vf_pixel_NE_lower_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_lower_range, 0 ); | ||
1499 | vector float vf_pixel_NE_upper_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_upper_range, 0 ); | ||
1500 | |||
1501 | vector float vf_pixel_SE_lower_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_lower_range, 0 ); | ||
1502 | vector float vf_pixel_SE_upper_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_upper_range, 0 ); | ||
1503 | |||
1504 | |||
1505 | |||
1506 | // first linear interpolation: EWtop | ||
1507 | // EWtop = NW + EWweight*(NE-NW) | ||
1508 | // | ||
1509 | // lower range | ||
1510 | vector float vf_EWtop_lower_range_tmp = spu_sub( vf_pixel_NE_lower_range, vf_pixel_NW_lower_range ); | ||
1511 | vector float vf_EWtop_lower_range = spu_madd( vf_EWweight_lower_range, | ||
1512 | vf_EWtop_lower_range_tmp, | ||
1513 | vf_pixel_NW_lower_range ); | ||
1514 | |||
1515 | // upper range | ||
1516 | vector float vf_EWtop_upper_range_tmp = spu_sub( vf_pixel_NE_upper_range, vf_pixel_NW_upper_range ); | ||
1517 | vector float vf_EWtop_upper_range = spu_madd( vf_EWweight_upper_range, | ||
1518 | vf_EWtop_upper_range_tmp, | ||
1519 | vf_pixel_NW_upper_range ); | ||
1520 | |||
1521 | |||
1522 | |||
1523 | // second linear interpolation: EWbottom | ||
1524 | // EWbottom = SW + EWweight*(SE-SW) | ||
1525 | // | ||
1526 | // lower range | ||
1527 | vector float vf_EWbottom_lower_range_tmp = spu_sub( vf_pixel_SE_lower_range, vf_pixel_SW_lower_range ); | ||
1528 | vector float vf_EWbottom_lower_range = spu_madd( vf_EWweight_lower_range, | ||
1529 | vf_EWbottom_lower_range_tmp, | ||
1530 | vf_pixel_SW_lower_range ); | ||
1531 | |||
1532 | // upper range | ||
1533 | vector float vf_EWbottom_upper_range_tmp = spu_sub( vf_pixel_SE_upper_range, vf_pixel_SW_upper_range ); | ||
1534 | vector float vf_EWbottom_upper_range = spu_madd( vf_EWweight_upper_range, | ||
1535 | vf_EWbottom_upper_range_tmp, | ||
1536 | vf_pixel_SW_upper_range ); | ||
1537 | |||
1538 | |||
1539 | |||
1540 | // third linear interpolation: the bilinear interpolated value | ||
1541 | // result = EWtop + NSweight*(EWbottom-EWtop); | ||
1542 | // | ||
1543 | // lower range | ||
1544 | vector float vf_result_lower_range_tmp = spu_sub( vf_EWbottom_lower_range, vf_EWtop_lower_range ); | ||
1545 | vector float vf_result_lower_range = spu_madd( vf_NSweight, | ||
1546 | vf_result_lower_range_tmp, | ||
1547 | vf_EWtop_lower_range ); | ||
1548 | |||
1549 | // upper range | ||
1550 | vector float vf_result_upper_range_tmp = spu_sub( vf_EWbottom_upper_range, vf_EWtop_upper_range ); | ||
1551 | vector float vf_result_upper_range = spu_madd( vf_NSweight, | ||
1552 | vf_result_upper_range_tmp, | ||
1553 | vf_EWtop_upper_range ); | ||
1554 | |||
1555 | |||
1556 | // convert back: using saturated arithmetic | ||
1557 | vector unsigned int vui_result_lower_range = vfloat_to_vuint( vf_result_lower_range ); | ||
1558 | vector unsigned int vui_result_upper_range = vfloat_to_vuint( vf_result_upper_range ); | ||
1559 | |||
1560 | // merge results->lower,upper | ||
1561 | vector unsigned char vuc_mask_merge_result = { 0x03, 0x07, 0x0B, 0x0F, | ||
1562 | 0x13, 0x17, 0x1B, 0x1F, | ||
1563 | 0x00, 0x00, 0x00, 0x00, | ||
1564 | 0x00, 0x00, 0x00, 0x00 }; | ||
1565 | |||
1566 | vector unsigned char vuc_result = spu_shuffle( (vector unsigned char) vui_result_lower_range, | ||
1567 | (vector unsigned char) vui_result_upper_range, | ||
1568 | vuc_mask_merge_result ); | ||
1569 | |||
1570 | // partial storing | ||
1571 | vector unsigned char vuc_mask_out = { 0x00, 0x00, 0x00, 0x00, | ||
1572 | 0x00, 0x00, 0x00, 0x00, | ||
1573 | 0xFF, 0xFF, 0xFF, 0xFF, | ||
1574 | 0xFF, 0xFF, 0xFF, 0xFF }; | ||
1575 | |||
1576 | |||
1577 | // get currently stored data | ||
1578 | vector unsigned char vuc_orig = *((vector unsigned char*)dst); | ||
1579 | |||
1580 | // clear currently stored data | ||
1581 | vuc_orig = spu_and( vuc_orig, | ||
1582 | spu_rlqwbyte( vuc_mask_out, ((unsigned int)dst)&0x0F) ); | ||
1583 | |||
1584 | // rotate result according to storing address | ||
1585 | vuc_result = spu_rlqwbyte( vuc_result, ((unsigned int)dst)&0x0F ); | ||
1586 | |||
1587 | // store result | ||
1588 | *((vector unsigned char*)dst) = spu_or( vuc_result, | ||
1589 | vuc_orig ); | ||
1590 | dst += 8; | ||
1591 | } | ||
1592 | } | ||
1593 | |||
1594 | |||
1595 | /* | ||
1596 | * bilinear_scale_line_w16() | ||
1597 | * | ||
1598 | * processes a line of yuv-input, width has to be a multiple of 16 | ||
1599 | * scaled yuv-output is written to local store buffer | ||
1600 | * | ||
1601 | * @param src buffer for 2 lines input | ||
1602 | * @param dst_ buffer for 1 line output | ||
1603 | * @param dst_width the width of the destination line | ||
1604 | * @param vf_x_scale a float vector, at each entry is the x_scale-factor | ||
1605 | * @param vf_NSweight a float vector, at each position is the weight NORTH/SOUTH for the current line | ||
1606 | * @param src_linestride the stride of the srcline | ||
1607 | */ | ||
1608 | void bilinear_scale_line_w16( unsigned char* src, unsigned char* dst_, unsigned int dst_width, vector float vf_x_scale, vector float vf_NSweight, unsigned int src_linestride ) { | ||
1609 | |||
1610 | unsigned char* dst = dst_; | ||
1611 | |||
1612 | unsigned int dst_x; | ||
1613 | for( dst_x=0; dst_x<dst_width; dst_x+=16) { | ||
1614 | // address calculation for loading the 4 surrounding pixel of each calculated | ||
1615 | // destination pixel | ||
1616 | vector unsigned int vui_dst_x_tmp = spu_splats( dst_x ); | ||
1617 | // parallelised processing | ||
1618 | // first range->pixel 1 2 3 4 | ||
1619 | // second range->pixel 5 6 7 8 | ||
1620 | // third range->pixel 9 10 11 12 | ||
1621 | // fourth range->pixel 13 14 15 16 | ||
1622 | vector unsigned int vui_inc_dst_x_first_range = { 0, 1, 2, 3 }; | ||
1623 | vector unsigned int vui_inc_dst_x_second_range = { 4, 5, 6, 7 }; | ||
1624 | vector unsigned int vui_inc_dst_x_third_range = { 8, 9, 10, 11 }; | ||
1625 | vector unsigned int vui_inc_dst_x_fourth_range = { 12, 13, 14, 15 }; | ||
1626 | vector unsigned int vui_dst_x_first_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_first_range ); | ||
1627 | vector unsigned int vui_dst_x_second_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_second_range ); | ||
1628 | vector unsigned int vui_dst_x_third_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_third_range ); | ||
1629 | vector unsigned int vui_dst_x_fourth_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_fourth_range ); | ||
1630 | |||
1631 | // calculate weight EAST-WEST | ||
1632 | vector float vf_dst_x_first_range = spu_convtf( vui_dst_x_first_range, 0 ); | ||
1633 | vector float vf_dst_x_second_range = spu_convtf( vui_dst_x_second_range, 0 ); | ||
1634 | vector float vf_dst_x_third_range = spu_convtf( vui_dst_x_third_range, 0 ); | ||
1635 | vector float vf_dst_x_fourth_range = spu_convtf( vui_dst_x_fourth_range, 0 ); | ||
1636 | vector float vf_src_x_first_range = spu_mul( vf_dst_x_first_range, vf_x_scale ); | ||
1637 | vector float vf_src_x_second_range = spu_mul( vf_dst_x_second_range, vf_x_scale ); | ||
1638 | vector float vf_src_x_third_range = spu_mul( vf_dst_x_third_range, vf_x_scale ); | ||
1639 | vector float vf_src_x_fourth_range = spu_mul( vf_dst_x_fourth_range, vf_x_scale ); | ||
1640 | vector unsigned int vui_interpl_x_first_range = spu_convtu( vf_src_x_first_range, 0 ); | ||
1641 | vector unsigned int vui_interpl_x_second_range = spu_convtu( vf_src_x_second_range, 0 ); | ||
1642 | vector unsigned int vui_interpl_x_third_range = spu_convtu( vf_src_x_third_range, 0 ); | ||
1643 | vector unsigned int vui_interpl_x_fourth_range = spu_convtu( vf_src_x_fourth_range, 0 ); | ||
1644 | vector float vf_interpl_x_first_range = spu_convtf( vui_interpl_x_first_range, 0 ); | ||
1645 | vector float vf_interpl_x_second_range = spu_convtf( vui_interpl_x_second_range, 0 ); | ||
1646 | vector float vf_interpl_x_third_range = spu_convtf( vui_interpl_x_third_range, 0 ); | ||
1647 | vector float vf_interpl_x_fourth_range = spu_convtf( vui_interpl_x_fourth_range, 0 ); | ||
1648 | vector float vf_EWweight_first_range = spu_sub( vf_src_x_first_range, vf_interpl_x_first_range ); | ||
1649 | vector float vf_EWweight_second_range = spu_sub( vf_src_x_second_range, vf_interpl_x_second_range ); | ||
1650 | vector float vf_EWweight_third_range = spu_sub( vf_src_x_third_range, vf_interpl_x_third_range ); | ||
1651 | vector float vf_EWweight_fourth_range = spu_sub( vf_src_x_fourth_range, vf_interpl_x_fourth_range ); | ||
1652 | |||
1653 | // calculate address offset | ||
1654 | // | ||
1655 | // pixel NORTH WEST | ||
1656 | vector unsigned int vui_off_pixelNW_first_range = vui_interpl_x_first_range; | ||
1657 | vector unsigned int vui_off_pixelNW_second_range = vui_interpl_x_second_range; | ||
1658 | vector unsigned int vui_off_pixelNW_third_range = vui_interpl_x_third_range; | ||
1659 | vector unsigned int vui_off_pixelNW_fourth_range = vui_interpl_x_fourth_range; | ||
1660 | |||
1661 | // pixel NORTH EAST-->(offpixelNW+1) | ||
1662 | vector unsigned int vui_add_1 = { 1, 1, 1, 1 }; | ||
1663 | vector unsigned int vui_off_pixelNE_first_range = spu_add( vui_off_pixelNW_first_range, vui_add_1 ); | ||
1664 | vector unsigned int vui_off_pixelNE_second_range = spu_add( vui_off_pixelNW_second_range, vui_add_1 ); | ||
1665 | vector unsigned int vui_off_pixelNE_third_range = spu_add( vui_off_pixelNW_third_range, vui_add_1 ); | ||
1666 | vector unsigned int vui_off_pixelNE_fourth_range = spu_add( vui_off_pixelNW_fourth_range, vui_add_1 ); | ||
1667 | |||
1668 | // SOUTH-WEST-->(offpixelNW+src_linestride) | ||
1669 | vector unsigned int vui_srclinestride = spu_splats( src_linestride ); | ||
1670 | vector unsigned int vui_off_pixelSW_first_range = spu_add( vui_srclinestride, vui_off_pixelNW_first_range ); | ||
1671 | vector unsigned int vui_off_pixelSW_second_range = spu_add( vui_srclinestride, vui_off_pixelNW_second_range ); | ||
1672 | vector unsigned int vui_off_pixelSW_third_range = spu_add( vui_srclinestride, vui_off_pixelNW_third_range ); | ||
1673 | vector unsigned int vui_off_pixelSW_fourth_range = spu_add( vui_srclinestride, vui_off_pixelNW_fourth_range ); | ||
1674 | |||
1675 | // SOUTH-EAST-->(offpixelNW+src_linestride+1) | ||
1676 | vector unsigned int vui_off_pixelSE_first_range = spu_add( vui_srclinestride, vui_off_pixelNE_first_range ); | ||
1677 | vector unsigned int vui_off_pixelSE_second_range = spu_add( vui_srclinestride, vui_off_pixelNE_second_range ); | ||
1678 | vector unsigned int vui_off_pixelSE_third_range = spu_add( vui_srclinestride, vui_off_pixelNE_third_range ); | ||
1679 | vector unsigned int vui_off_pixelSE_fourth_range = spu_add( vui_srclinestride, vui_off_pixelNE_fourth_range ); | ||
1680 | |||
1681 | // calculate each address | ||
1682 | vector unsigned int vui_src_ls = spu_splats( (unsigned int) src ); | ||
1683 | vector unsigned int vui_addr_pixelNW_first_range = spu_add( vui_src_ls, vui_off_pixelNW_first_range ); | ||
1684 | vector unsigned int vui_addr_pixelNW_second_range = spu_add( vui_src_ls, vui_off_pixelNW_second_range ); | ||
1685 | vector unsigned int vui_addr_pixelNW_third_range = spu_add( vui_src_ls, vui_off_pixelNW_third_range ); | ||
1686 | vector unsigned int vui_addr_pixelNW_fourth_range = spu_add( vui_src_ls, vui_off_pixelNW_fourth_range ); | ||
1687 | |||
1688 | vector unsigned int vui_addr_pixelNE_first_range = spu_add( vui_src_ls, vui_off_pixelNE_first_range ); | ||
1689 | vector unsigned int vui_addr_pixelNE_second_range = spu_add( vui_src_ls, vui_off_pixelNE_second_range ); | ||
1690 | vector unsigned int vui_addr_pixelNE_third_range = spu_add( vui_src_ls, vui_off_pixelNE_third_range ); | ||
1691 | vector unsigned int vui_addr_pixelNE_fourth_range = spu_add( vui_src_ls, vui_off_pixelNE_fourth_range ); | ||
1692 | |||
1693 | vector unsigned int vui_addr_pixelSW_first_range = spu_add( vui_src_ls, vui_off_pixelSW_first_range ); | ||
1694 | vector unsigned int vui_addr_pixelSW_second_range = spu_add( vui_src_ls, vui_off_pixelSW_second_range ); | ||
1695 | vector unsigned int vui_addr_pixelSW_third_range = spu_add( vui_src_ls, vui_off_pixelSW_third_range ); | ||
1696 | vector unsigned int vui_addr_pixelSW_fourth_range = spu_add( vui_src_ls, vui_off_pixelSW_fourth_range ); | ||
1697 | |||
1698 | vector unsigned int vui_addr_pixelSE_first_range = spu_add( vui_src_ls, vui_off_pixelSE_first_range ); | ||
1699 | vector unsigned int vui_addr_pixelSE_second_range = spu_add( vui_src_ls, vui_off_pixelSE_second_range ); | ||
1700 | vector unsigned int vui_addr_pixelSE_third_range = spu_add( vui_src_ls, vui_off_pixelSE_third_range ); | ||
1701 | vector unsigned int vui_addr_pixelSE_fourth_range = spu_add( vui_src_ls, vui_off_pixelSE_fourth_range ); | ||
1702 | |||
1703 | |||
1704 | // get each pixel | ||
1705 | // | ||
1706 | // scalar load, afterwards insertion into the right position | ||
1707 | // NORTH WEST | ||
1708 | // first range | ||
1709 | vector unsigned char null_vector = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | ||
1710 | vector unsigned char vuc_pixel_NW_first_range = spu_insert( | ||
1711 | *((unsigned char*) spu_extract( vui_addr_pixelNW_first_range, 0 )), null_vector, 3 ); | ||
1712 | vuc_pixel_NW_first_range = spu_insert( | ||
1713 | *((unsigned char*) spu_extract( vui_addr_pixelNW_first_range, 1 )), | ||
1714 | vuc_pixel_NW_first_range, 7 ); | ||
1715 | vuc_pixel_NW_first_range = spu_insert( | ||
1716 | *((unsigned char*) spu_extract( vui_addr_pixelNW_first_range, 2 )), | ||
1717 | vuc_pixel_NW_first_range, 11 ); | ||
1718 | vuc_pixel_NW_first_range = spu_insert( | ||
1719 | *((unsigned char*) spu_extract( vui_addr_pixelNW_first_range, 3 )), | ||
1720 | vuc_pixel_NW_first_range, 15 ); | ||
1721 | // second range | ||
1722 | vector unsigned char vuc_pixel_NW_second_range = spu_insert( | ||
1723 | *((unsigned char*) spu_extract( vui_addr_pixelNW_second_range, 0 )), null_vector, 3 ); | ||
1724 | vuc_pixel_NW_second_range = spu_insert( | ||
1725 | *((unsigned char*) spu_extract( vui_addr_pixelNW_second_range, 1 )), | ||
1726 | vuc_pixel_NW_second_range, 7 ); | ||
1727 | vuc_pixel_NW_second_range = spu_insert( | ||
1728 | *((unsigned char*) spu_extract( vui_addr_pixelNW_second_range, 2 )), | ||
1729 | vuc_pixel_NW_second_range, 11 ); | ||
1730 | vuc_pixel_NW_second_range = spu_insert( | ||
1731 | *((unsigned char*) spu_extract( vui_addr_pixelNW_second_range, 3 )), | ||
1732 | vuc_pixel_NW_second_range, 15 ); | ||
1733 | // third range | ||
1734 | vector unsigned char vuc_pixel_NW_third_range = spu_insert( | ||
1735 | *((unsigned char*) spu_extract( vui_addr_pixelNW_third_range, 0 )), null_vector, 3 ); | ||
1736 | vuc_pixel_NW_third_range = spu_insert( | ||
1737 | *((unsigned char*) spu_extract( vui_addr_pixelNW_third_range, 1 )), | ||
1738 | vuc_pixel_NW_third_range, 7 ); | ||
1739 | vuc_pixel_NW_third_range = spu_insert( | ||
1740 | *((unsigned char*) spu_extract( vui_addr_pixelNW_third_range, 2 )), | ||
1741 | vuc_pixel_NW_third_range, 11 ); | ||
1742 | vuc_pixel_NW_third_range = spu_insert( | ||
1743 | *((unsigned char*) spu_extract( vui_addr_pixelNW_third_range, 3 )), | ||
1744 | vuc_pixel_NW_third_range, 15 ); | ||
1745 | // fourth range | ||
1746 | vector unsigned char vuc_pixel_NW_fourth_range = spu_insert( | ||
1747 | *((unsigned char*) spu_extract( vui_addr_pixelNW_fourth_range, 0 )), null_vector, 3 ); | ||
1748 | vuc_pixel_NW_fourth_range = spu_insert( | ||
1749 | *((unsigned char*) spu_extract( vui_addr_pixelNW_fourth_range, 1 )), | ||
1750 | vuc_pixel_NW_fourth_range, 7 ); | ||
1751 | vuc_pixel_NW_fourth_range = spu_insert( | ||
1752 | *((unsigned char*) spu_extract( vui_addr_pixelNW_fourth_range, 2 )), | ||
1753 | vuc_pixel_NW_fourth_range, 11 ); | ||
1754 | vuc_pixel_NW_fourth_range = spu_insert( | ||
1755 | *((unsigned char*) spu_extract( vui_addr_pixelNW_fourth_range, 3 )), | ||
1756 | vuc_pixel_NW_fourth_range, 15 ); | ||
1757 | |||
1758 | // NORTH EAST | ||
1759 | // first range | ||
1760 | vector unsigned char vuc_pixel_NE_first_range = spu_insert( | ||
1761 | *((unsigned char*) spu_extract( vui_addr_pixelNE_first_range, 0 )), null_vector, 3 ); | ||
1762 | vuc_pixel_NE_first_range = spu_insert( | ||
1763 | *((unsigned char*) spu_extract( vui_addr_pixelNE_first_range, 1 )), | ||
1764 | vuc_pixel_NE_first_range, 7 ); | ||
1765 | vuc_pixel_NE_first_range = spu_insert( | ||
1766 | *((unsigned char*) spu_extract( vui_addr_pixelNE_first_range, 2 )), | ||
1767 | vuc_pixel_NE_first_range, 11 ); | ||
1768 | vuc_pixel_NE_first_range = spu_insert( | ||
1769 | *((unsigned char*) spu_extract( vui_addr_pixelNE_first_range, 3 )), | ||
1770 | vuc_pixel_NE_first_range, 15 ); | ||
1771 | // second range | ||
1772 | vector unsigned char vuc_pixel_NE_second_range = spu_insert( | ||
1773 | *((unsigned char*) spu_extract( vui_addr_pixelNE_second_range, 0 )), null_vector, 3 ); | ||
1774 | vuc_pixel_NE_second_range = spu_insert( | ||
1775 | *((unsigned char*) spu_extract( vui_addr_pixelNE_second_range, 1 )), | ||
1776 | vuc_pixel_NE_second_range, 7 ); | ||
1777 | vuc_pixel_NE_second_range = spu_insert( | ||
1778 | *((unsigned char*) spu_extract( vui_addr_pixelNE_second_range, 2 )), | ||
1779 | vuc_pixel_NE_second_range, 11 ); | ||
1780 | vuc_pixel_NE_second_range = spu_insert( | ||
1781 | *((unsigned char*) spu_extract( vui_addr_pixelNE_second_range, 3 )), | ||
1782 | vuc_pixel_NE_second_range, 15 ); | ||
1783 | // third range | ||
1784 | vector unsigned char vuc_pixel_NE_third_range = spu_insert( | ||
1785 | *((unsigned char*) spu_extract( vui_addr_pixelNE_third_range, 0 )), null_vector, 3 ); | ||
1786 | vuc_pixel_NE_third_range = spu_insert( | ||
1787 | *((unsigned char*) spu_extract( vui_addr_pixelNE_third_range, 1 )), | ||
1788 | vuc_pixel_NE_third_range, 7 ); | ||
1789 | vuc_pixel_NE_third_range = spu_insert( | ||
1790 | *((unsigned char*) spu_extract( vui_addr_pixelNE_third_range, 2 )), | ||
1791 | vuc_pixel_NE_third_range, 11 ); | ||
1792 | vuc_pixel_NE_third_range = spu_insert( | ||
1793 | *((unsigned char*) spu_extract( vui_addr_pixelNE_third_range, 3 )), | ||
1794 | vuc_pixel_NE_third_range, 15 ); | ||
1795 | // fourth range | ||
1796 | vector unsigned char vuc_pixel_NE_fourth_range = spu_insert( | ||
1797 | *((unsigned char*) spu_extract( vui_addr_pixelNE_fourth_range, 0 )), null_vector, 3 ); | ||
1798 | vuc_pixel_NE_fourth_range = spu_insert( | ||
1799 | *((unsigned char*) spu_extract( vui_addr_pixelNE_fourth_range, 1 )), | ||
1800 | vuc_pixel_NE_fourth_range, 7 ); | ||
1801 | vuc_pixel_NE_fourth_range = spu_insert( | ||
1802 | *((unsigned char*) spu_extract( vui_addr_pixelNE_fourth_range, 2 )), | ||
1803 | vuc_pixel_NE_fourth_range, 11 ); | ||
1804 | vuc_pixel_NE_fourth_range = spu_insert( | ||
1805 | *((unsigned char*) spu_extract( vui_addr_pixelNE_fourth_range, 3 )), | ||
1806 | vuc_pixel_NE_fourth_range, 15 ); | ||
1807 | |||
1808 | // SOUTH WEST | ||
1809 | // first range | ||
1810 | vector unsigned char vuc_pixel_SW_first_range = spu_insert( | ||
1811 | *((unsigned char*) spu_extract( vui_addr_pixelSW_first_range, 0 )), null_vector, 3 ); | ||
1812 | vuc_pixel_SW_first_range = spu_insert( | ||
1813 | *((unsigned char*) spu_extract( vui_addr_pixelSW_first_range, 1 )), | ||
1814 | vuc_pixel_SW_first_range, 7 ); | ||
1815 | vuc_pixel_SW_first_range = spu_insert( | ||
1816 | *((unsigned char*) spu_extract( vui_addr_pixelSW_first_range, 2 )), | ||
1817 | vuc_pixel_SW_first_range, 11 ); | ||
1818 | vuc_pixel_SW_first_range = spu_insert( | ||
1819 | *((unsigned char*) spu_extract( vui_addr_pixelSW_first_range, 3 )), | ||
1820 | vuc_pixel_SW_first_range, 15 ); | ||
1821 | // second range | ||
1822 | vector unsigned char vuc_pixel_SW_second_range = spu_insert( | ||
1823 | *((unsigned char*) spu_extract( vui_addr_pixelSW_second_range, 0 )), null_vector, 3 ); | ||
1824 | vuc_pixel_SW_second_range = spu_insert( | ||
1825 | *((unsigned char*) spu_extract( vui_addr_pixelSW_second_range, 1 )), | ||
1826 | vuc_pixel_SW_second_range, 7 ); | ||
1827 | vuc_pixel_SW_second_range = spu_insert( | ||
1828 | *((unsigned char*) spu_extract( vui_addr_pixelSW_second_range, 2 )), | ||
1829 | vuc_pixel_SW_second_range, 11 ); | ||
1830 | vuc_pixel_SW_second_range = spu_insert( | ||
1831 | *((unsigned char*) spu_extract( vui_addr_pixelSW_second_range, 3 )), | ||
1832 | vuc_pixel_SW_second_range, 15 ); | ||
1833 | // third range | ||
1834 | vector unsigned char vuc_pixel_SW_third_range = spu_insert( | ||
1835 | *((unsigned char*) spu_extract( vui_addr_pixelSW_third_range, 0 )), null_vector, 3 ); | ||
1836 | vuc_pixel_SW_third_range = spu_insert( | ||
1837 | *((unsigned char*) spu_extract( vui_addr_pixelSW_third_range, 1 )), | ||
1838 | vuc_pixel_SW_third_range, 7 ); | ||
1839 | vuc_pixel_SW_third_range = spu_insert( | ||
1840 | *((unsigned char*) spu_extract( vui_addr_pixelSW_third_range, 2 )), | ||
1841 | vuc_pixel_SW_third_range, 11 ); | ||
1842 | vuc_pixel_SW_third_range = spu_insert( | ||
1843 | *((unsigned char*) spu_extract( vui_addr_pixelSW_third_range, 3 )), | ||
1844 | vuc_pixel_SW_third_range, 15 ); | ||
1845 | // fourth range | ||
1846 | vector unsigned char vuc_pixel_SW_fourth_range = spu_insert( | ||
1847 | *((unsigned char*) spu_extract( vui_addr_pixelSW_fourth_range, 0 )), null_vector, 3 ); | ||
1848 | vuc_pixel_SW_fourth_range = spu_insert( | ||
1849 | *((unsigned char*) spu_extract( vui_addr_pixelSW_fourth_range, 1 )), | ||
1850 | vuc_pixel_SW_fourth_range, 7 ); | ||
1851 | vuc_pixel_SW_fourth_range = spu_insert( | ||
1852 | *((unsigned char*) spu_extract( vui_addr_pixelSW_fourth_range, 2 )), | ||
1853 | vuc_pixel_SW_fourth_range, 11 ); | ||
1854 | vuc_pixel_SW_fourth_range = spu_insert( | ||
1855 | *((unsigned char*) spu_extract( vui_addr_pixelSW_fourth_range, 3 )), | ||
1856 | vuc_pixel_SW_fourth_range, 15 ); | ||
1857 | |||
1858 | // NORTH EAST | ||
1859 | // first range | ||
1860 | vector unsigned char vuc_pixel_SE_first_range = spu_insert( | ||
1861 | *((unsigned char*) spu_extract( vui_addr_pixelSE_first_range, 0 )), null_vector, 3 ); | ||
1862 | vuc_pixel_SE_first_range = spu_insert( | ||
1863 | *((unsigned char*) spu_extract( vui_addr_pixelSE_first_range, 1 )), | ||
1864 | vuc_pixel_SE_first_range, 7 ); | ||
1865 | vuc_pixel_SE_first_range = spu_insert( | ||
1866 | *((unsigned char*) spu_extract( vui_addr_pixelSE_first_range, 2 )), | ||
1867 | vuc_pixel_SE_first_range, 11 ); | ||
1868 | vuc_pixel_SE_first_range = spu_insert( | ||
1869 | *((unsigned char*) spu_extract( vui_addr_pixelSE_first_range, 3 )), | ||
1870 | vuc_pixel_SE_first_range, 15 ); | ||
1871 | // second range | ||
1872 | vector unsigned char vuc_pixel_SE_second_range = spu_insert( | ||
1873 | *((unsigned char*) spu_extract( vui_addr_pixelSE_second_range, 0 )), null_vector, 3 ); | ||
1874 | vuc_pixel_SE_second_range = spu_insert( | ||
1875 | *((unsigned char*) spu_extract( vui_addr_pixelSE_second_range, 1 )), | ||
1876 | vuc_pixel_SE_second_range, 7 ); | ||
1877 | vuc_pixel_SE_second_range = spu_insert( | ||
1878 | *((unsigned char*) spu_extract( vui_addr_pixelSE_second_range, 2 )), | ||
1879 | vuc_pixel_SE_second_range, 11 ); | ||
1880 | vuc_pixel_SE_second_range = spu_insert( | ||
1881 | *((unsigned char*) spu_extract( vui_addr_pixelSE_second_range, 3 )), | ||
1882 | vuc_pixel_SE_second_range, 15 ); | ||
1883 | // third range | ||
1884 | vector unsigned char vuc_pixel_SE_third_range = spu_insert( | ||
1885 | *((unsigned char*) spu_extract( vui_addr_pixelSE_third_range, 0 )), null_vector, 3 ); | ||
1886 | vuc_pixel_SE_third_range = spu_insert( | ||
1887 | *((unsigned char*) spu_extract( vui_addr_pixelSE_third_range, 1 )), | ||
1888 | vuc_pixel_SE_third_range, 7 ); | ||
1889 | vuc_pixel_SE_third_range = spu_insert( | ||
1890 | *((unsigned char*) spu_extract( vui_addr_pixelSE_third_range, 2 )), | ||
1891 | vuc_pixel_SE_third_range, 11 ); | ||
1892 | vuc_pixel_SE_third_range = spu_insert( | ||
1893 | *((unsigned char*) spu_extract( vui_addr_pixelSE_third_range, 3 )), | ||
1894 | vuc_pixel_SE_third_range, 15 ); | ||
1895 | // fourth range | ||
1896 | vector unsigned char vuc_pixel_SE_fourth_range = spu_insert( | ||
1897 | *((unsigned char*) spu_extract( vui_addr_pixelSE_fourth_range, 0 )), null_vector, 3 ); | ||
1898 | vuc_pixel_SE_fourth_range = spu_insert( | ||
1899 | *((unsigned char*) spu_extract( vui_addr_pixelSE_fourth_range, 1 )), | ||
1900 | vuc_pixel_SE_fourth_range, 7 ); | ||
1901 | vuc_pixel_SE_fourth_range = spu_insert( | ||
1902 | *((unsigned char*) spu_extract( vui_addr_pixelSE_fourth_range, 2 )), | ||
1903 | vuc_pixel_SE_fourth_range, 11 ); | ||
1904 | vuc_pixel_SE_fourth_range = spu_insert( | ||
1905 | *((unsigned char*) spu_extract( vui_addr_pixelSE_fourth_range, 3 )), | ||
1906 | vuc_pixel_SE_fourth_range, 15 ); | ||
1907 | |||
1908 | |||
1909 | |||
1910 | // convert to float | ||
1911 | vector float vf_pixel_NW_first_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_first_range, 0 ); | ||
1912 | vector float vf_pixel_NW_second_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_second_range, 0 ); | ||
1913 | vector float vf_pixel_NW_third_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_third_range, 0 ); | ||
1914 | vector float vf_pixel_NW_fourth_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_fourth_range, 0 ); | ||
1915 | |||
1916 | vector float vf_pixel_NE_first_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_first_range, 0 ); | ||
1917 | vector float vf_pixel_NE_second_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_second_range, 0 ); | ||
1918 | vector float vf_pixel_NE_third_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_third_range, 0 ); | ||
1919 | vector float vf_pixel_NE_fourth_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_fourth_range, 0 ); | ||
1920 | |||
1921 | vector float vf_pixel_SW_first_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_first_range, 0 ); | ||
1922 | vector float vf_pixel_SW_second_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_second_range, 0 ); | ||
1923 | vector float vf_pixel_SW_third_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_third_range, 0 ); | ||
1924 | vector float vf_pixel_SW_fourth_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_fourth_range, 0 ); | ||
1925 | |||
1926 | vector float vf_pixel_SE_first_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_first_range, 0 ); | ||
1927 | vector float vf_pixel_SE_second_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_second_range, 0 ); | ||
1928 | vector float vf_pixel_SE_third_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_third_range, 0 ); | ||
1929 | vector float vf_pixel_SE_fourth_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_fourth_range, 0 ); | ||
1930 | |||
1931 | // first linear interpolation: EWtop | ||
1932 | // EWtop = NW + EWweight*(NE-NW) | ||
1933 | // | ||
1934 | // first range | ||
1935 | vector float vf_EWtop_first_range_tmp = spu_sub( vf_pixel_NE_first_range, vf_pixel_NW_first_range ); | ||
1936 | vector float vf_EWtop_first_range = spu_madd( vf_EWweight_first_range, | ||
1937 | vf_EWtop_first_range_tmp, | ||
1938 | vf_pixel_NW_first_range ); | ||
1939 | |||
1940 | // second range | ||
1941 | vector float vf_EWtop_second_range_tmp = spu_sub( vf_pixel_NE_second_range, vf_pixel_NW_second_range ); | ||
1942 | vector float vf_EWtop_second_range = spu_madd( vf_EWweight_second_range, | ||
1943 | vf_EWtop_second_range_tmp, | ||
1944 | vf_pixel_NW_second_range ); | ||
1945 | |||
1946 | // third range | ||
1947 | vector float vf_EWtop_third_range_tmp = spu_sub( vf_pixel_NE_third_range, vf_pixel_NW_third_range ); | ||
1948 | vector float vf_EWtop_third_range = spu_madd( vf_EWweight_third_range, | ||
1949 | vf_EWtop_third_range_tmp, | ||
1950 | vf_pixel_NW_third_range ); | ||
1951 | |||
1952 | // fourth range | ||
1953 | vector float vf_EWtop_fourth_range_tmp = spu_sub( vf_pixel_NE_fourth_range, vf_pixel_NW_fourth_range ); | ||
1954 | vector float vf_EWtop_fourth_range = spu_madd( vf_EWweight_fourth_range, | ||
1955 | vf_EWtop_fourth_range_tmp, | ||
1956 | vf_pixel_NW_fourth_range ); | ||
1957 | |||
1958 | |||
1959 | |||
1960 | // second linear interpolation: EWbottom | ||
1961 | // EWbottom = SW + EWweight*(SE-SW) | ||
1962 | // | ||
1963 | // first range | ||
1964 | vector float vf_EWbottom_first_range_tmp = spu_sub( vf_pixel_SE_first_range, vf_pixel_SW_first_range ); | ||
1965 | vector float vf_EWbottom_first_range = spu_madd( vf_EWweight_first_range, | ||
1966 | vf_EWbottom_first_range_tmp, | ||
1967 | vf_pixel_SW_first_range ); | ||
1968 | |||
1969 | // second range | ||
1970 | vector float vf_EWbottom_second_range_tmp = spu_sub( vf_pixel_SE_second_range, vf_pixel_SW_second_range ); | ||
1971 | vector float vf_EWbottom_second_range = spu_madd( vf_EWweight_second_range, | ||
1972 | vf_EWbottom_second_range_tmp, | ||
1973 | vf_pixel_SW_second_range ); | ||
1974 | // first range | ||
1975 | vector float vf_EWbottom_third_range_tmp = spu_sub( vf_pixel_SE_third_range, vf_pixel_SW_third_range ); | ||
1976 | vector float vf_EWbottom_third_range = spu_madd( vf_EWweight_third_range, | ||
1977 | vf_EWbottom_third_range_tmp, | ||
1978 | vf_pixel_SW_third_range ); | ||
1979 | |||
1980 | // first range | ||
1981 | vector float vf_EWbottom_fourth_range_tmp = spu_sub( vf_pixel_SE_fourth_range, vf_pixel_SW_fourth_range ); | ||
1982 | vector float vf_EWbottom_fourth_range = spu_madd( vf_EWweight_fourth_range, | ||
1983 | vf_EWbottom_fourth_range_tmp, | ||
1984 | vf_pixel_SW_fourth_range ); | ||
1985 | |||
1986 | |||
1987 | |||
1988 | // third linear interpolation: the bilinear interpolated value | ||
1989 | // result = EWtop + NSweight*(EWbottom-EWtop); | ||
1990 | // | ||
1991 | // first range | ||
1992 | vector float vf_result_first_range_tmp = spu_sub( vf_EWbottom_first_range, vf_EWtop_first_range ); | ||
1993 | vector float vf_result_first_range = spu_madd( vf_NSweight, | ||
1994 | vf_result_first_range_tmp, | ||
1995 | vf_EWtop_first_range ); | ||
1996 | |||
1997 | // second range | ||
1998 | vector float vf_result_second_range_tmp = spu_sub( vf_EWbottom_second_range, vf_EWtop_second_range ); | ||
1999 | vector float vf_result_second_range = spu_madd( vf_NSweight, | ||
2000 | vf_result_second_range_tmp, | ||
2001 | vf_EWtop_second_range ); | ||
2002 | |||
2003 | // third range | ||
2004 | vector float vf_result_third_range_tmp = spu_sub( vf_EWbottom_third_range, vf_EWtop_third_range ); | ||
2005 | vector float vf_result_third_range = spu_madd( vf_NSweight, | ||
2006 | vf_result_third_range_tmp, | ||
2007 | vf_EWtop_third_range ); | ||
2008 | |||
2009 | // fourth range | ||
2010 | vector float vf_result_fourth_range_tmp = spu_sub( vf_EWbottom_fourth_range, vf_EWtop_fourth_range ); | ||
2011 | vector float vf_result_fourth_range = spu_madd( vf_NSweight, | ||
2012 | vf_result_fourth_range_tmp, | ||
2013 | vf_EWtop_fourth_range ); | ||
2014 | |||
2015 | |||
2016 | |||
2017 | // convert back: using saturated arithmetic | ||
2018 | vector unsigned int vui_result_first_range = vfloat_to_vuint( vf_result_first_range ); | ||
2019 | vector unsigned int vui_result_second_range = vfloat_to_vuint( vf_result_second_range ); | ||
2020 | vector unsigned int vui_result_third_range = vfloat_to_vuint( vf_result_third_range ); | ||
2021 | vector unsigned int vui_result_fourth_range = vfloat_to_vuint( vf_result_fourth_range ); | ||
2022 | |||
2023 | // merge results->lower,upper | ||
2024 | vector unsigned char vuc_mask_merge_result_first_second = { 0x03, 0x07, 0x0B, 0x0F, | ||
2025 | 0x13, 0x17, 0x1B, 0x1F, | ||
2026 | 0x00, 0x00, 0x00, 0x00, | ||
2027 | 0x00, 0x00, 0x00, 0x00 }; | ||
2028 | |||
2029 | vector unsigned char vuc_mask_merge_result_third_fourth = { 0x00, 0x00, 0x00, 0x00, | ||
2030 | 0x00, 0x00, 0x00, 0x00, | ||
2031 | 0x03, 0x07, 0x0B, 0x0F, | ||
2032 | 0x13, 0x17, 0x1B, 0x1F }; | ||
2033 | |||
2034 | vector unsigned char vuc_result_first_second = | ||
2035 | spu_shuffle( (vector unsigned char) vui_result_first_range, | ||
2036 | (vector unsigned char) vui_result_second_range, | ||
2037 | vuc_mask_merge_result_first_second ); | ||
2038 | |||
2039 | vector unsigned char vuc_result_third_fourth = | ||
2040 | spu_shuffle( (vector unsigned char) vui_result_third_range, | ||
2041 | (vector unsigned char) vui_result_fourth_range, | ||
2042 | vuc_mask_merge_result_third_fourth ); | ||
2043 | |||
2044 | // store result | ||
2045 | *((vector unsigned char*)dst) = spu_or( vuc_result_first_second, | ||
2046 | vuc_result_third_fourth ); | ||
2047 | dst += 16; | ||
2048 | } | ||
2049 | } | ||
2050 | |||
diff --git a/apps/plugins/sdl/src/video/ps3/spulibs/fb_writer.c b/apps/plugins/sdl/src/video/ps3/spulibs/fb_writer.c new file mode 100644 index 0000000000..0eb51cc682 --- /dev/null +++ b/apps/plugins/sdl/src/video/ps3/spulibs/fb_writer.c | |||
@@ -0,0 +1,193 @@ | |||
1 | /* | ||
2 | * SDL - Simple DirectMedia Layer | ||
3 | * CELL BE Support for PS3 Framebuffer | ||
4 | * Copyright (C) 2008, 2009 International Business Machines Corporation | ||
5 | * | ||
6 | * This library is free software; you can redistribute it and/or modify it | ||
7 | * under the terms of the GNU Lesser General Public License as published | ||
8 | * by the Free Software Foundation; either version 2.1 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This library is distributed in the hope that it will be useful, but | ||
12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
14 | * Lesser General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU Lesser General Public | ||
17 | * License along with this library; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 | ||
19 | * USA | ||
20 | * | ||
21 | * Martin Lowinski <lowinski [at] de [dot] ibm [ibm] com> | ||
22 | * Dirk Herrendoerfer <d.herrendoerfer [at] de [dot] ibm [dot] com> | ||
23 | * SPE code based on research by: | ||
24 | * Rene Becker | ||
25 | * Thimo Emmerich | ||
26 | */ | ||
27 | |||
28 | #include "spu_common.h" | ||
29 | |||
30 | #include <spu_intrinsics.h> | ||
31 | #include <spu_mfcio.h> | ||
32 | #include <stdio.h> | ||
33 | #include <string.h> | ||
34 | |||
35 | // Debugging | ||
36 | //#define DEBUG | ||
37 | |||
38 | #ifdef DEBUG | ||
39 | #define deprintf(fmt, args... ) \ | ||
40 | fprintf( stdout, fmt, ##args ); \ | ||
41 | fflush( stdout ); | ||
42 | #else | ||
43 | #define deprintf( fmt, args... ) | ||
44 | #endif | ||
45 | |||
46 | void cpy_to_fb(unsigned int); | ||
47 | |||
48 | /* fb_writer_spu parms */ | ||
49 | static volatile struct fb_writer_parms_t parms __attribute__ ((aligned(128))); | ||
50 | |||
51 | /* Code running on SPU */ | ||
52 | int main(unsigned long long spe_id __attribute__ ((unused)), unsigned long long argp __attribute__ ((unused))) | ||
53 | { | ||
54 | deprintf("[SPU] fb_writer_spu is up... (on SPE #%llu)\n", spe_id); | ||
55 | uint32_t ea_mfc, mbox; | ||
56 | // send ready message | ||
57 | spu_write_out_mbox(SPU_READY); | ||
58 | |||
59 | while (1) { | ||
60 | /* Check mailbox */ | ||
61 | mbox = spu_read_in_mbox(); | ||
62 | deprintf("[SPU] Message is %u\n", mbox); | ||
63 | switch (mbox) { | ||
64 | case SPU_EXIT: | ||
65 | deprintf("[SPU] fb_writer goes down...\n"); | ||
66 | return 0; | ||
67 | case SPU_START: | ||
68 | break; | ||
69 | default: | ||
70 | deprintf("[SPU] Cannot handle message\n"); | ||
71 | continue; | ||
72 | } | ||
73 | |||
74 | /* Tag Manager setup */ | ||
75 | unsigned int tags; | ||
76 | tags = mfc_multi_tag_reserve(5); | ||
77 | if (tags == MFC_TAG_INVALID) { | ||
78 | deprintf("[SPU] Failed to reserve mfc tags on fb_writer\n"); | ||
79 | return 0; | ||
80 | } | ||
81 | |||
82 | /* Framebuffer parms */ | ||
83 | ea_mfc = spu_read_in_mbox(); | ||
84 | deprintf("[SPU] Message on fb_writer is %u\n", ea_mfc); | ||
85 | spu_mfcdma32(&parms, (unsigned int)ea_mfc, | ||
86 | sizeof(struct fb_writer_parms_t), tags, | ||
87 | MFC_GET_CMD); | ||
88 | deprintf("[SPU] argp = %u\n", (unsigned int)argp); | ||
89 | DMA_WAIT_TAG(tags); | ||
90 | |||
91 | /* Copy parms->data to framebuffer */ | ||
92 | deprintf("[SPU] Copying to framebuffer started\n"); | ||
93 | cpy_to_fb(tags); | ||
94 | deprintf("[SPU] Copying to framebuffer done!\n"); | ||
95 | |||
96 | mfc_multi_tag_release(tags, 5); | ||
97 | deprintf("[SPU] fb_writer_spu... done!\n"); | ||
98 | /* Send FIN msg */ | ||
99 | spu_write_out_mbox(SPU_FIN); | ||
100 | } | ||
101 | |||
102 | return 0; | ||
103 | } | ||
104 | |||
105 | void cpy_to_fb(unsigned int tag_id_base) | ||
106 | { | ||
107 | unsigned int i; | ||
108 | unsigned char current_buf; | ||
109 | uint8_t *in = parms.data; | ||
110 | |||
111 | /* Align fb pointer which was centered before */ | ||
112 | uint8_t *fb = | ||
113 | (unsigned char *)((unsigned int)parms.center & 0xFFFFFFF0); | ||
114 | |||
115 | uint32_t bounded_input_height = parms.bounded_input_height; | ||
116 | uint32_t bounded_input_width = parms.bounded_input_width; | ||
117 | uint32_t fb_pixel_size = parms.fb_pixel_size; | ||
118 | |||
119 | uint32_t out_line_stride = parms.out_line_stride; | ||
120 | uint32_t in_line_stride = parms.in_line_stride; | ||
121 | uint32_t in_line_size = bounded_input_width * fb_pixel_size; | ||
122 | |||
123 | current_buf = 0; | ||
124 | |||
125 | /* Local store buffer */ | ||
126 | static volatile uint8_t buf[4][BUFFER_SIZE] | ||
127 | __attribute__ ((aligned(128))); | ||
128 | /* do 4-times multibuffering using DMA list, process in two steps */ | ||
129 | for (i = 0; i < bounded_input_height >> 2; i++) { | ||
130 | /* first buffer */ | ||
131 | DMA_WAIT_TAG(tag_id_base + 1); | ||
132 | // retrieve buffer | ||
133 | spu_mfcdma32(buf[0], (unsigned int)in, in_line_size, | ||
134 | tag_id_base + 1, MFC_GETB_CMD); | ||
135 | DMA_WAIT_TAG(tag_id_base + 1); | ||
136 | // store buffer | ||
137 | spu_mfcdma32(buf[0], (unsigned int)fb, in_line_size, | ||
138 | tag_id_base + 1, MFC_PUTB_CMD); | ||
139 | in += in_line_stride; | ||
140 | fb += out_line_stride; | ||
141 | deprintf("[SPU] 1st buffer copied in=0x%x, fb=0x%x\n", in, | ||
142 | fb); | ||
143 | |||
144 | /* second buffer */ | ||
145 | DMA_WAIT_TAG(tag_id_base + 2); | ||
146 | // retrieve buffer | ||
147 | spu_mfcdma32(buf[1], (unsigned int)in, in_line_size, | ||
148 | tag_id_base + 2, MFC_GETB_CMD); | ||
149 | DMA_WAIT_TAG(tag_id_base + 2); | ||
150 | // store buffer | ||
151 | spu_mfcdma32(buf[1], (unsigned int)fb, in_line_size, | ||
152 | tag_id_base + 2, MFC_PUTB_CMD); | ||
153 | in += in_line_stride; | ||
154 | fb += out_line_stride; | ||
155 | deprintf("[SPU] 2nd buffer copied in=0x%x, fb=0x%x\n", in, | ||
156 | fb); | ||
157 | |||
158 | /* third buffer */ | ||
159 | DMA_WAIT_TAG(tag_id_base + 3); | ||
160 | // retrieve buffer | ||
161 | spu_mfcdma32(buf[2], (unsigned int)in, in_line_size, | ||
162 | tag_id_base + 3, MFC_GETB_CMD); | ||
163 | DMA_WAIT_TAG(tag_id_base + 3); | ||
164 | // store buffer | ||
165 | spu_mfcdma32(buf[2], (unsigned int)fb, in_line_size, | ||
166 | tag_id_base + 3, MFC_PUTB_CMD); | ||
167 | in += in_line_stride; | ||
168 | fb += out_line_stride; | ||
169 | deprintf("[SPU] 3rd buffer copied in=0x%x, fb=0x%x\n", in, | ||
170 | fb); | ||
171 | |||
172 | /* fourth buffer */ | ||
173 | DMA_WAIT_TAG(tag_id_base + 4); | ||
174 | // retrieve buffer | ||
175 | spu_mfcdma32(buf[3], (unsigned int)in, in_line_size, | ||
176 | tag_id_base + 4, MFC_GETB_CMD); | ||
177 | DMA_WAIT_TAG(tag_id_base + 4); | ||
178 | // store buffer | ||
179 | spu_mfcdma32(buf[3], (unsigned int)fb, in_line_size, | ||
180 | tag_id_base + 4, MFC_PUTB_CMD); | ||
181 | in += in_line_stride; | ||
182 | fb += out_line_stride; | ||
183 | deprintf("[SPU] 4th buffer copied in=0x%x, fb=0x%x\n", in, | ||
184 | fb); | ||
185 | deprintf("[SPU] Loop #%i, bounded_input_height=%i\n", i, | ||
186 | bounded_input_height >> 2); | ||
187 | } | ||
188 | DMA_WAIT_TAG(tag_id_base + 2); | ||
189 | DMA_WAIT_TAG(tag_id_base + 3); | ||
190 | DMA_WAIT_TAG(tag_id_base + 4); | ||
191 | } | ||
192 | |||
193 | |||
diff --git a/apps/plugins/sdl/src/video/ps3/spulibs/spu_common.h b/apps/plugins/sdl/src/video/ps3/spulibs/spu_common.h new file mode 100644 index 0000000000..42c328c83d --- /dev/null +++ b/apps/plugins/sdl/src/video/ps3/spulibs/spu_common.h | |||
@@ -0,0 +1,108 @@ | |||
1 | /* | ||
2 | * SDL - Simple DirectMedia Layer | ||
3 | * CELL BE Support for PS3 Framebuffer | ||
4 | * Copyright (C) 2008, 2009 International Business Machines Corporation | ||
5 | * | ||
6 | * This library is free software; you can redistribute it and/or modify it | ||
7 | * under the terms of the GNU Lesser General Public License as published | ||
8 | * by the Free Software Foundation; either version 2.1 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This library is distributed in the hope that it will be useful, but | ||
12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
14 | * Lesser General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU Lesser General Public | ||
17 | * License along with this library; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 | ||
19 | * USA | ||
20 | * | ||
21 | * Martin Lowinski <lowinski [at] de [dot] ibm [ibm] com> | ||
22 | * Dirk Herrendoerfer <d.herrendoerfer [at] de [dot] ibm [dot] com> | ||
23 | * SPE code based on research by: | ||
24 | * Rene Becker | ||
25 | * Thimo Emmerich | ||
26 | */ | ||
27 | |||
28 | /* Common definitions/makros for SPUs */ | ||
29 | |||
30 | #ifndef _SPU_COMMON_H | ||
31 | #define _SPU_COMMON_H | ||
32 | |||
33 | #include <stdio.h> | ||
34 | #include <stdint.h> | ||
35 | #include <string.h> | ||
36 | |||
37 | /* Tag management */ | ||
38 | #define DMA_WAIT_TAG(_tag) \ | ||
39 | mfc_write_tag_mask(1<<(_tag)); \ | ||
40 | mfc_read_tag_status_all(); | ||
41 | |||
42 | /* SPU mailbox messages */ | ||
43 | #define SPU_READY 0 | ||
44 | #define SPU_START 1 | ||
45 | #define SPU_FIN 2 | ||
46 | #define SPU_EXIT 3 | ||
47 | |||
48 | /* Tags */ | ||
49 | #define RETR_BUF 0 | ||
50 | #define STR_BUF 1 | ||
51 | #define TAG_INIT 2 | ||
52 | |||
53 | /* Buffersizes */ | ||
54 | #define MAX_HDTV_WIDTH 1920 | ||
55 | #define MAX_HDTV_HEIGHT 1080 | ||
56 | /* One stride of HDTV */ | ||
57 | #define BUFFER_SIZE 7680 | ||
58 | |||
59 | /* fb_writer ppu/spu exchange parms */ | ||
60 | struct fb_writer_parms_t { | ||
61 | uint8_t *data; | ||
62 | uint8_t *center; | ||
63 | uint32_t out_line_stride; | ||
64 | uint32_t in_line_stride; | ||
65 | uint32_t bounded_input_height; | ||
66 | uint32_t bounded_input_width; | ||
67 | uint32_t fb_pixel_size; | ||
68 | |||
69 | /* This padding is to fulfill the need for 16 byte alignment. On parm change, update! */ | ||
70 | char padding[4]; | ||
71 | } __attribute__((aligned(128))); | ||
72 | |||
73 | /* yuv2rgb ppu/spu exchange parms */ | ||
74 | struct yuv2rgb_parms_t { | ||
75 | uint8_t* y_plane; | ||
76 | uint8_t* v_plane; | ||
77 | uint8_t* u_plane; | ||
78 | |||
79 | uint8_t* dstBuffer; | ||
80 | |||
81 | unsigned int src_pixel_width; | ||
82 | unsigned int src_pixel_height; | ||
83 | |||
84 | /* This padding is to fulfill the need for 16 byte alignment. On parm change, update! */ | ||
85 | char padding[128 - ((4 * sizeof(uint8_t *) + 2 * sizeof(unsigned int)) & 0x7F)]; | ||
86 | } __attribute__((aligned(128))); | ||
87 | |||
88 | /* bilin_scaler ppu/spu exchange parms */ | ||
89 | struct scale_parms_t { | ||
90 | uint8_t* y_plane; | ||
91 | uint8_t* v_plane; | ||
92 | uint8_t* u_plane; | ||
93 | |||
94 | uint8_t* dstBuffer; | ||
95 | |||
96 | unsigned int src_pixel_width; | ||
97 | unsigned int src_pixel_height; | ||
98 | |||
99 | unsigned int dst_pixel_width; | ||
100 | unsigned int dst_pixel_height; | ||
101 | |||
102 | /* This padding is to fulfill the need for 16 byte alignment. On parm change, update! */ | ||
103 | char padding[128 - ((4 * sizeof(uint8_t *) + 4 * sizeof(unsigned int)) & 0x7F)]; | ||
104 | } __attribute__((aligned(128))); | ||
105 | |||
106 | #endif /* _SPU_COMMON_H */ | ||
107 | |||
108 | |||
diff --git a/apps/plugins/sdl/src/video/ps3/spulibs/yuv2rgb_converter.c b/apps/plugins/sdl/src/video/ps3/spulibs/yuv2rgb_converter.c new file mode 100644 index 0000000000..5e166914c5 --- /dev/null +++ b/apps/plugins/sdl/src/video/ps3/spulibs/yuv2rgb_converter.c | |||
@@ -0,0 +1,629 @@ | |||
1 | /* | ||
2 | * SDL - Simple DirectMedia Layer | ||
3 | * CELL BE Support for PS3 Framebuffer | ||
4 | * Copyright (C) 2008, 2009 International Business Machines Corporation | ||
5 | * | ||
6 | * This library is free software; you can redistribute it and/or modify it | ||
7 | * under the terms of the GNU Lesser General Public License as published | ||
8 | * by the Free Software Foundation; either version 2.1 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This library is distributed in the hope that it will be useful, but | ||
12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
14 | * Lesser General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU Lesser General Public | ||
17 | * License along with this library; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 | ||
19 | * USA | ||
20 | * | ||
21 | * Martin Lowinski <lowinski [at] de [dot] ibm [ibm] com> | ||
22 | * Dirk Herrendoerfer <d.herrendoerfer [at] de [dot] ibm [dot] com> | ||
23 | * SPE code based on research by: | ||
24 | * Rene Becker | ||
25 | * Thimo Emmerich | ||
26 | */ | ||
27 | |||
28 | #include "spu_common.h" | ||
29 | |||
30 | #include <spu_intrinsics.h> | ||
31 | #include <spu_mfcio.h> | ||
32 | |||
33 | // Debugging | ||
34 | //#define DEBUG | ||
35 | |||
36 | #ifdef DEBUG | ||
37 | #define deprintf(fmt, args... ) \ | ||
38 | fprintf( stdout, fmt, ##args ); \ | ||
39 | fflush( stdout ); | ||
40 | #else | ||
41 | #define deprintf( fmt, args... ) | ||
42 | #endif | ||
43 | |||
44 | struct yuv2rgb_parms_t parms_converter __attribute__((aligned(128))); | ||
45 | |||
46 | /* A maximum of 8 lines Y, therefore 4 lines V, 4 lines U are stored | ||
47 | * there might be the need to retrieve misaligned data, adjust | ||
48 | * incoming v and u plane to be able to handle this (add 128) | ||
49 | */ | ||
50 | unsigned char y_plane[2][(MAX_HDTV_WIDTH + 128) * 4] __attribute__((aligned(128))); | ||
51 | unsigned char v_plane[2][(MAX_HDTV_WIDTH + 128) * 2] __attribute__((aligned(128))); | ||
52 | unsigned char u_plane[2][(MAX_HDTV_WIDTH + 128) * 2] __attribute__((aligned(128))); | ||
53 | |||
54 | /* A maximum of 4 lines BGRA are stored, 4 byte per pixel */ | ||
55 | unsigned char bgra[4 * MAX_HDTV_WIDTH * 4] __attribute__((aligned(128))); | ||
56 | |||
57 | /* some vectors needed by the float to int conversion */ | ||
58 | static const vector float vec_255 = { 255.0f, 255.0f, 255.0f, 255.0f }; | ||
59 | static const vector float vec_0_1 = { 0.1f, 0.1f, 0.1f, 0.1f }; | ||
60 | |||
61 | void yuv_to_rgb_w16(); | ||
62 | void yuv_to_rgb_w32(); | ||
63 | |||
64 | void yuv_to_rgb_w16_line(unsigned char* y_addr, unsigned char* v_addr, unsigned char* u_addr, unsigned char* bgra_addr, unsigned int width); | ||
65 | void yuv_to_rgb_w32_line(unsigned char* y_addr, unsigned char* v_addr, unsigned char* u_addr, unsigned char* bgra_addr_, unsigned int width); | ||
66 | |||
67 | |||
68 | int main(unsigned long long spe_id __attribute__((unused)), unsigned long long argp __attribute__ ((unused))) | ||
69 | { | ||
70 | deprintf("[SPU] yuv2rgb_spu is up... (on SPE #%llu)\n", spe_id); | ||
71 | uint32_t ea_mfc, mbox; | ||
72 | // send ready message | ||
73 | spu_write_out_mbox(SPU_READY); | ||
74 | |||
75 | while (1) { | ||
76 | /* Check mailbox */ | ||
77 | mbox = spu_read_in_mbox(); | ||
78 | deprintf("[SPU] Message is %u\n", mbox); | ||
79 | switch (mbox) { | ||
80 | case SPU_EXIT: | ||
81 | deprintf("[SPU] fb_writer goes down...\n"); | ||
82 | return 0; | ||
83 | case SPU_START: | ||
84 | break; | ||
85 | default: | ||
86 | deprintf("[SPU] Cannot handle message\n"); | ||
87 | continue; | ||
88 | } | ||
89 | |||
90 | /* Tag Manager setup */ | ||
91 | unsigned int tag_id; | ||
92 | tag_id = mfc_multi_tag_reserve(1); | ||
93 | if (tag_id == MFC_TAG_INVALID) { | ||
94 | deprintf("[SPU] Failed to reserve mfc tags on yuv2rgb_converter\n"); | ||
95 | return 0; | ||
96 | } | ||
97 | |||
98 | /* DMA transfer for the input parameters */ | ||
99 | ea_mfc = spu_read_in_mbox(); | ||
100 | deprintf("[SPU] Message on yuv2rgb_converter is %u\n", ea_mfc); | ||
101 | spu_mfcdma32(&parms_converter, (unsigned int)ea_mfc, sizeof(struct yuv2rgb_parms_t), tag_id, MFC_GET_CMD); | ||
102 | DMA_WAIT_TAG(tag_id); | ||
103 | |||
104 | /* There are alignment issues that involve handling of special cases | ||
105 | * a width of 32 results in a width of 16 in the chrominance | ||
106 | * --> choose the proper handling to optimize the performance | ||
107 | */ | ||
108 | deprintf("[SPU] Convert %ix%i from YUV to RGB\n", parms_converter.src_pixel_width, parms_converter.src_pixel_height); | ||
109 | if (parms_converter.src_pixel_width & 0x1f) { | ||
110 | deprintf("[SPU] Using yuv_to_rgb_w16\n"); | ||
111 | yuv_to_rgb_w16(); | ||
112 | } else { | ||
113 | deprintf("[SPU] Using yuv_to_rgb_w32\n"); | ||
114 | yuv_to_rgb_w32(); | ||
115 | } | ||
116 | |||
117 | mfc_multi_tag_release(tag_id, 1); | ||
118 | deprintf("[SPU] yuv2rgb_spu... done!\n"); | ||
119 | /* Send FIN message */ | ||
120 | spu_write_out_mbox(SPU_FIN); | ||
121 | } | ||
122 | |||
123 | return 0; | ||
124 | } | ||
125 | |||
126 | |||
127 | /* | ||
128 | * float_to_char() | ||
129 | * | ||
130 | * converts a float to a character using saturated | ||
131 | * arithmetic | ||
132 | * | ||
133 | * @param s float for conversion | ||
134 | * @returns converted character | ||
135 | */ | ||
136 | inline static unsigned char float_to_char(float s) { | ||
137 | vector float vec_s = spu_splats(s); | ||
138 | vector unsigned int select_1 = spu_cmpgt(vec_0_1, vec_s); | ||
139 | vec_s = spu_sel(vec_s, vec_0_1, select_1); | ||
140 | |||
141 | vector unsigned int select_2 = spu_cmpgt(vec_s, vec_255); | ||
142 | vec_s = spu_sel(vec_s, vec_255, select_2); | ||
143 | return (unsigned char) spu_extract(vec_s,0); | ||
144 | } | ||
145 | |||
146 | |||
147 | /* | ||
148 | * vfloat_to_vuint() | ||
149 | * | ||
150 | * converts a float vector to an unsinged int vector using saturated | ||
151 | * arithmetic | ||
152 | * | ||
153 | * @param vec_s float vector for conversion | ||
154 | * @returns converted unsigned int vector | ||
155 | */ | ||
156 | inline static vector unsigned int vfloat_to_vuint(vector float vec_s) { | ||
157 | vector unsigned int select_1 = spu_cmpgt(vec_0_1, vec_s); | ||
158 | vec_s = spu_sel(vec_s, vec_0_1, select_1); | ||
159 | |||
160 | vector unsigned int select_2 = spu_cmpgt(vec_s, vec_255); | ||
161 | vec_s = spu_sel(vec_s, vec_255, select_2); | ||
162 | return spu_convtu(vec_s,0); | ||
163 | } | ||
164 | |||
165 | |||
166 | void yuv_to_rgb_w16() { | ||
167 | // Pixel dimensions of the picture | ||
168 | uint32_t width, height; | ||
169 | |||
170 | // Extract parameters | ||
171 | width = parms_converter.src_pixel_width; | ||
172 | height = parms_converter.src_pixel_height; | ||
173 | |||
174 | // Plane data management | ||
175 | // Y | ||
176 | unsigned char* ram_addr_y = parms_converter.y_plane; | ||
177 | // V | ||
178 | unsigned char* ram_addr_v = parms_converter.v_plane; | ||
179 | // U | ||
180 | unsigned char* ram_addr_u = parms_converter.u_plane; | ||
181 | |||
182 | // BGRA | ||
183 | unsigned char* ram_addr_bgra = parms_converter.dstBuffer; | ||
184 | |||
185 | // Strides | ||
186 | unsigned int stride_y = width; | ||
187 | unsigned int stride_vu = width>>1; | ||
188 | |||
189 | // Buffer management | ||
190 | unsigned int buf_idx = 0; | ||
191 | unsigned int size_4lines_y = stride_y<<2; | ||
192 | unsigned int size_2lines_y = stride_y<<1; | ||
193 | unsigned int size_2lines_vu = stride_vu<<1; | ||
194 | |||
195 | // 2*width*4byte_per_pixel | ||
196 | unsigned int size_2lines_bgra = width<<3; | ||
197 | |||
198 | |||
199 | // start double-buffered processing | ||
200 | // 4 lines y | ||
201 | spu_mfcdma32(y_plane[buf_idx], (unsigned int) ram_addr_y, size_4lines_y, RETR_BUF+buf_idx, MFC_GET_CMD); | ||
202 | |||
203 | // 2 lines v | ||
204 | spu_mfcdma32(v_plane[buf_idx], (unsigned int) ram_addr_v, size_2lines_vu, RETR_BUF+buf_idx, MFC_GET_CMD); | ||
205 | |||
206 | // 2 lines u | ||
207 | spu_mfcdma32(u_plane[buf_idx], (unsigned int) ram_addr_u, size_2lines_vu, RETR_BUF+buf_idx, MFC_GET_CMD); | ||
208 | |||
209 | // Wait for these transfers to be completed | ||
210 | DMA_WAIT_TAG((RETR_BUF + buf_idx)); | ||
211 | |||
212 | unsigned int i; | ||
213 | for(i=0; i<(height>>2)-1; i++) { | ||
214 | |||
215 | buf_idx^=1; | ||
216 | |||
217 | // 4 lines y | ||
218 | spu_mfcdma32(y_plane[buf_idx], (unsigned int) ram_addr_y+size_4lines_y, size_4lines_y, RETR_BUF+buf_idx, MFC_GET_CMD); | ||
219 | |||
220 | // 2 lines v | ||
221 | spu_mfcdma32(v_plane[buf_idx], (unsigned int) ram_addr_v+size_2lines_vu, size_2lines_vu, RETR_BUF+buf_idx, MFC_GET_CMD); | ||
222 | |||
223 | // 2 lines u | ||
224 | spu_mfcdma32(u_plane[buf_idx], (unsigned int) ram_addr_u+size_2lines_vu, size_2lines_vu, RETR_BUF+buf_idx, MFC_GET_CMD); | ||
225 | |||
226 | DMA_WAIT_TAG((RETR_BUF + buf_idx)); | ||
227 | |||
228 | buf_idx^=1; | ||
229 | |||
230 | |||
231 | // Convert YUV to BGRA, store it back (first two lines) | ||
232 | yuv_to_rgb_w16_line(y_plane[buf_idx], v_plane[buf_idx], u_plane[buf_idx], bgra, width); | ||
233 | |||
234 | // Next two lines | ||
235 | yuv_to_rgb_w16_line(y_plane[buf_idx] + size_2lines_y, | ||
236 | v_plane[buf_idx] + stride_vu, | ||
237 | u_plane[buf_idx] + stride_vu, | ||
238 | bgra + size_2lines_bgra, | ||
239 | width); | ||
240 | |||
241 | // Wait for previous storing transfer to be completed | ||
242 | DMA_WAIT_TAG(STR_BUF); | ||
243 | |||
244 | // Store converted lines in two steps->max transfer size 16384 | ||
245 | spu_mfcdma32(bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD); | ||
246 | ram_addr_bgra += size_2lines_bgra; | ||
247 | spu_mfcdma32(bgra+size_2lines_bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD); | ||
248 | ram_addr_bgra += size_2lines_bgra; | ||
249 | |||
250 | // Move 4 lines | ||
251 | ram_addr_y += size_4lines_y; | ||
252 | ram_addr_v += size_2lines_vu; | ||
253 | ram_addr_u += size_2lines_vu; | ||
254 | |||
255 | buf_idx^=1; | ||
256 | } | ||
257 | |||
258 | // Convert YUV to BGRA, store it back (first two lines) | ||
259 | yuv_to_rgb_w16_line(y_plane[buf_idx], v_plane[buf_idx], u_plane[buf_idx], bgra, width); | ||
260 | |||
261 | // Next two lines | ||
262 | yuv_to_rgb_w16_line(y_plane[buf_idx] + size_2lines_y, | ||
263 | v_plane[buf_idx] + stride_vu, | ||
264 | u_plane[buf_idx] + stride_vu, | ||
265 | bgra + size_2lines_bgra, | ||
266 | width); | ||
267 | |||
268 | // Wait for previous storing transfer to be completed | ||
269 | DMA_WAIT_TAG(STR_BUF); | ||
270 | spu_mfcdma32(bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD); | ||
271 | ram_addr_bgra += size_2lines_bgra; | ||
272 | spu_mfcdma32(bgra+size_2lines_bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD); | ||
273 | |||
274 | // wait for previous storing transfer to be completed | ||
275 | DMA_WAIT_TAG(STR_BUF); | ||
276 | |||
277 | } | ||
278 | |||
279 | |||
280 | void yuv_to_rgb_w32() { | ||
281 | // Pixel dimensions of the picture | ||
282 | uint32_t width, height; | ||
283 | |||
284 | // Extract parameters | ||
285 | width = parms_converter.src_pixel_width; | ||
286 | height = parms_converter.src_pixel_height; | ||
287 | |||
288 | // Plane data management | ||
289 | // Y | ||
290 | unsigned char* ram_addr_y = parms_converter.y_plane; | ||
291 | // V | ||
292 | unsigned char* ram_addr_v = parms_converter.v_plane; | ||
293 | // U | ||
294 | unsigned char* ram_addr_u = parms_converter.u_plane; | ||
295 | |||
296 | // BGRA | ||
297 | unsigned char* ram_addr_bgra = parms_converter.dstBuffer; | ||
298 | |||
299 | // Strides | ||
300 | unsigned int stride_y = width; | ||
301 | unsigned int stride_vu = width>>1; | ||
302 | |||
303 | // Buffer management | ||
304 | unsigned int buf_idx = 0; | ||
305 | unsigned int size_4lines_y = stride_y<<2; | ||
306 | unsigned int size_2lines_y = stride_y<<1; | ||
307 | unsigned int size_2lines_vu = stride_vu<<1; | ||
308 | |||
309 | // 2*width*4byte_per_pixel | ||
310 | unsigned int size_2lines_bgra = width<<3; | ||
311 | |||
312 | // start double-buffered processing | ||
313 | // 4 lines y | ||
314 | spu_mfcdma32(y_plane[buf_idx], (unsigned int) ram_addr_y, size_4lines_y, RETR_BUF + buf_idx, MFC_GET_CMD); | ||
315 | // 2 lines v | ||
316 | spu_mfcdma32(v_plane[buf_idx], (unsigned int) ram_addr_v, size_2lines_vu, RETR_BUF + buf_idx, MFC_GET_CMD); | ||
317 | // 2 lines u | ||
318 | spu_mfcdma32(u_plane[buf_idx], (unsigned int) ram_addr_u, size_2lines_vu, RETR_BUF + buf_idx, MFC_GET_CMD); | ||
319 | |||
320 | // Wait for these transfers to be completed | ||
321 | DMA_WAIT_TAG((RETR_BUF + buf_idx)); | ||
322 | |||
323 | unsigned int i; | ||
324 | for(i=0; i < (height>>2)-1; i++) { | ||
325 | buf_idx^=1; | ||
326 | // 4 lines y | ||
327 | spu_mfcdma32(y_plane[buf_idx], (unsigned int) ram_addr_y+size_4lines_y, size_4lines_y, RETR_BUF + buf_idx, MFC_GET_CMD); | ||
328 | deprintf("4lines = %d\n", size_4lines_y); | ||
329 | // 2 lines v | ||
330 | spu_mfcdma32(v_plane[buf_idx], (unsigned int) ram_addr_v+size_2lines_vu, size_2lines_vu, RETR_BUF + buf_idx, MFC_GET_CMD); | ||
331 | deprintf("2lines = %d\n", size_2lines_vu); | ||
332 | // 2 lines u | ||
333 | spu_mfcdma32(u_plane[buf_idx], (unsigned int) ram_addr_u+size_2lines_vu, size_2lines_vu, RETR_BUF + buf_idx, MFC_GET_CMD); | ||
334 | deprintf("2lines = %d\n", size_2lines_vu); | ||
335 | |||
336 | DMA_WAIT_TAG((RETR_BUF + buf_idx)); | ||
337 | |||
338 | buf_idx^=1; | ||
339 | |||
340 | // Convert YUV to BGRA, store it back (first two lines) | ||
341 | yuv_to_rgb_w32_line(y_plane[buf_idx], v_plane[buf_idx], u_plane[buf_idx], bgra, width); | ||
342 | |||
343 | // Next two lines | ||
344 | yuv_to_rgb_w32_line(y_plane[buf_idx] + size_2lines_y, | ||
345 | v_plane[buf_idx] + stride_vu, | ||
346 | u_plane[buf_idx] + stride_vu, | ||
347 | bgra + size_2lines_bgra, | ||
348 | width); | ||
349 | |||
350 | // Wait for previous storing transfer to be completed | ||
351 | DMA_WAIT_TAG(STR_BUF); | ||
352 | |||
353 | // Store converted lines in two steps->max transfer size 16384 | ||
354 | spu_mfcdma32(bgra, (unsigned int)ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD); | ||
355 | ram_addr_bgra += size_2lines_bgra; | ||
356 | spu_mfcdma32(bgra + size_2lines_bgra, (unsigned int)ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD); | ||
357 | ram_addr_bgra += size_2lines_bgra; | ||
358 | |||
359 | // Move 4 lines | ||
360 | ram_addr_y += size_4lines_y; | ||
361 | ram_addr_v += size_2lines_vu; | ||
362 | ram_addr_u += size_2lines_vu; | ||
363 | |||
364 | buf_idx^=1; | ||
365 | } | ||
366 | |||
367 | // Convert YUV to BGRA, store it back (first two lines) | ||
368 | yuv_to_rgb_w32_line(y_plane[buf_idx], v_plane[buf_idx], u_plane[buf_idx], bgra, width); | ||
369 | |||
370 | // Next two lines | ||
371 | yuv_to_rgb_w32_line(y_plane[buf_idx] + size_2lines_y, | ||
372 | v_plane[buf_idx] + stride_vu, | ||
373 | u_plane[buf_idx] + stride_vu, | ||
374 | bgra + size_2lines_bgra, | ||
375 | width); | ||
376 | |||
377 | // Wait for previous storing transfer to be completed | ||
378 | DMA_WAIT_TAG(STR_BUF); | ||
379 | spu_mfcdma32(bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD); | ||
380 | ram_addr_bgra += size_2lines_bgra; | ||
381 | spu_mfcdma32(bgra + size_2lines_bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD); | ||
382 | |||
383 | // Wait for previous storing transfer to be completed | ||
384 | DMA_WAIT_TAG(STR_BUF); | ||
385 | } | ||
386 | |||
387 | |||
388 | /* Some vectors needed by the yuv 2 rgb conversion algorithm */ | ||
389 | const vector float vec_minus_128 = { -128.0f, -128.0f, -128.0f, -128.0f }; | ||
390 | const vector unsigned char vec_null = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; | ||
391 | const vector unsigned char vec_char2int_first = { 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x13 }; | ||
392 | const vector unsigned char vec_char2int_second = { 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x17 }; | ||
393 | const vector unsigned char vec_char2int_third = { 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x1B }; | ||
394 | const vector unsigned char vec_char2int_fourth = { 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x1F }; | ||
395 | |||
396 | const vector float vec_R_precalc_coeff = {1.403f, 1.403f, 1.403f, 1.403f}; | ||
397 | const vector float vec_Gu_precalc_coeff = {-0.344f, -0.344f, -0.344f, -0.344f}; | ||
398 | const vector float vec_Gv_precalc_coeff = {-0.714f, -0.714f, -0.714f, -0.714f}; | ||
399 | const vector float vec_B_precalc_coeff = {1.773f, 1.773f, 1.773f, 1.773f}; | ||
400 | |||
401 | const vector unsigned int vec_alpha = { 255 << 24, 255 << 24, 255 << 24, 255 << 24 }; | ||
402 | |||
403 | const vector unsigned char vec_select_floats_upper = { 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07 }; | ||
404 | const vector unsigned char vec_select_floats_lower = { 0x08, 0x09, 0x0A, 0x0B, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x0C, 0x0D, 0x0E, 0x0F }; | ||
405 | |||
406 | |||
407 | /* | ||
408 | * yuv_to_rgb_w16() | ||
409 | * | ||
410 | * processes to line of yuv-input, width has to be a multiple of 16 | ||
411 | * two lines of yuv are taken as input | ||
412 | * | ||
413 | * @param y_addr address of the y plane in local store | ||
414 | * @param v_addr address of the v plane in local store | ||
415 | * @param u_addr address of the u plane in local store | ||
416 | * @param bgra_addr_ address of the bgra output buffer | ||
417 | * @param width the width in pixel | ||
418 | */ | ||
419 | void yuv_to_rgb_w16_line(unsigned char* y_addr, unsigned char* v_addr, unsigned char* u_addr, unsigned char* bgra_addr_, unsigned int width) { | ||
420 | // each pixel is stored as an integer | ||
421 | unsigned int* bgra_addr = (unsigned int*) bgra_addr_; | ||
422 | |||
423 | unsigned int x; | ||
424 | for(x = 0; x < width; x+=2) { | ||
425 | // Gehe zweischrittig durch die zeile, da jeder u und v wert fuer 4 pixel(zwei hoch, zwei breit) gilt | ||
426 | const unsigned char Y_1 = *(y_addr + x); | ||
427 | const unsigned char Y_2 = *(y_addr + x + 1); | ||
428 | const unsigned char Y_3 = *(y_addr + x + width); | ||
429 | const unsigned char Y_4 = *(y_addr + x + width + 1); | ||
430 | const unsigned char U = *(u_addr + (x >> 1)); | ||
431 | const unsigned char V = *(v_addr + (x >> 1)); | ||
432 | |||
433 | float V_minus_128 = (float)((float)V - 128.0f); | ||
434 | float U_minus_128 = (float)((float)U - 128.0f); | ||
435 | |||
436 | float R_precalculate = 1.403f * V_minus_128; | ||
437 | float G_precalculate = -(0.344f * U_minus_128 + 0.714f * V_minus_128); | ||
438 | float B_precalculate = 1.773f * U_minus_128; | ||
439 | |||
440 | const unsigned char R_1 = float_to_char((Y_1 + R_precalculate)); | ||
441 | const unsigned char R_2 = float_to_char((Y_2 + R_precalculate)); | ||
442 | const unsigned char R_3 = float_to_char((Y_3 + R_precalculate)); | ||
443 | const unsigned char R_4 = float_to_char((Y_4 + R_precalculate)); | ||
444 | const unsigned char G_1 = float_to_char((Y_1 + G_precalculate)); | ||
445 | const unsigned char G_2 = float_to_char((Y_2 + G_precalculate)); | ||
446 | const unsigned char G_3 = float_to_char((Y_3 + G_precalculate)); | ||
447 | const unsigned char G_4 = float_to_char((Y_4 + G_precalculate)); | ||
448 | const unsigned char B_1 = float_to_char((Y_1 + B_precalculate)); | ||
449 | const unsigned char B_2 = float_to_char((Y_2 + B_precalculate)); | ||
450 | const unsigned char B_3 = float_to_char((Y_3 + B_precalculate)); | ||
451 | const unsigned char B_4 = float_to_char((Y_4 + B_precalculate)); | ||
452 | |||
453 | *(bgra_addr + x) = (B_1 << 0)| (G_1 << 8) | (R_1 << 16) | (255 << 24); | ||
454 | *(bgra_addr + x + 1) = (B_2 << 0)| (G_2 << 8) | (R_2 << 16) | (255 << 24); | ||
455 | *(bgra_addr + x + width) = (B_3 << 0)| (G_3 << 8) | (R_3 << 16) | (255 << 24); | ||
456 | *(bgra_addr + x + width + 1) = (B_4 << 0)| (G_4 << 8) | (R_4 << 16) | (255 << 24); | ||
457 | } | ||
458 | } | ||
459 | |||
460 | |||
461 | /* | ||
462 | * yuv_to_rgb_w32() | ||
463 | * | ||
464 | * processes to line of yuv-input, width has to be a multiple of 32 | ||
465 | * two lines of yuv are taken as input | ||
466 | * | ||
467 | * @param y_addr address of the y plane in local store | ||
468 | * @param v_addr address of the v plane in local store | ||
469 | * @param u_addr address of the u plane in local store | ||
470 | * @param bgra_addr_ address of the bgra output buffer | ||
471 | * @param width the width in pixel | ||
472 | */ | ||
473 | void yuv_to_rgb_w32_line(unsigned char* y_addr, unsigned char* v_addr, unsigned char* u_addr, unsigned char* bgra_addr_, unsigned int width) { | ||
474 | // each pixel is stored as an integer | ||
475 | unsigned int* bgra_addr = (unsigned int*) bgra_addr_; | ||
476 | |||
477 | unsigned int x; | ||
478 | for(x = 0; x < width; x+=32) { | ||
479 | // Gehe zweischrittig durch die zeile, da jeder u und v wert fuer 4 pixel(zwei hoch, zwei breit) gilt | ||
480 | |||
481 | const vector unsigned char vchar_Y_1 = *((vector unsigned char*)(y_addr + x)); | ||
482 | const vector unsigned char vchar_Y_2 = *((vector unsigned char*)(y_addr + x + 16)); | ||
483 | const vector unsigned char vchar_Y_3 = *((vector unsigned char*)(y_addr + x + width)); | ||
484 | const vector unsigned char vchar_Y_4 = *((vector unsigned char*)(y_addr + x + width + 16)); | ||
485 | const vector unsigned char vchar_U = *((vector unsigned char*)(u_addr + (x >> 1))); | ||
486 | const vector unsigned char vchar_V = *((vector unsigned char*)(v_addr + (x >> 1))); | ||
487 | |||
488 | const vector float vfloat_U_1 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_U, vec_char2int_first), 0),vec_minus_128); | ||
489 | const vector float vfloat_U_2 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_U, vec_char2int_second), 0),vec_minus_128); | ||
490 | const vector float vfloat_U_3 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_U, vec_char2int_third), 0),vec_minus_128); | ||
491 | const vector float vfloat_U_4 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_U, vec_char2int_fourth), 0),vec_minus_128); | ||
492 | |||
493 | const vector float vfloat_V_1 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_V, vec_char2int_first), 0),vec_minus_128); | ||
494 | const vector float vfloat_V_2 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_V, vec_char2int_second), 0),vec_minus_128); | ||
495 | const vector float vfloat_V_3 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_V, vec_char2int_third), 0),vec_minus_128); | ||
496 | const vector float vfloat_V_4 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_V, vec_char2int_fourth), 0),vec_minus_128); | ||
497 | |||
498 | vector float Y_1 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_1, vec_char2int_first), 0); | ||
499 | vector float Y_2 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_1, vec_char2int_second), 0); | ||
500 | vector float Y_3 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_1, vec_char2int_third), 0); | ||
501 | vector float Y_4 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_1, vec_char2int_fourth), 0); | ||
502 | vector float Y_5 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_2, vec_char2int_first), 0); | ||
503 | vector float Y_6 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_2, vec_char2int_second), 0); | ||
504 | vector float Y_7 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_2, vec_char2int_third), 0); | ||
505 | vector float Y_8 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_2, vec_char2int_fourth), 0); | ||
506 | vector float Y_9 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_3, vec_char2int_first), 0); | ||
507 | vector float Y_10 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_3, vec_char2int_second), 0); | ||
508 | vector float Y_11 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_3, vec_char2int_third), 0); | ||
509 | vector float Y_12 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_3, vec_char2int_fourth), 0); | ||
510 | vector float Y_13 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_4, vec_char2int_first), 0); | ||
511 | vector float Y_14 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_4, vec_char2int_second), 0); | ||
512 | vector float Y_15 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_4, vec_char2int_third), 0); | ||
513 | vector float Y_16 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_4, vec_char2int_fourth), 0); | ||
514 | |||
515 | const vector float R1a_precalculate = spu_mul(vec_R_precalc_coeff, vfloat_V_1); | ||
516 | const vector float R2a_precalculate = spu_mul(vec_R_precalc_coeff, vfloat_V_2); | ||
517 | const vector float R3a_precalculate = spu_mul(vec_R_precalc_coeff, vfloat_V_3); | ||
518 | const vector float R4a_precalculate = spu_mul(vec_R_precalc_coeff, vfloat_V_4); | ||
519 | |||
520 | const vector float R1_precalculate = spu_shuffle(R1a_precalculate, R1a_precalculate, vec_select_floats_upper); | ||
521 | const vector float R2_precalculate = spu_shuffle(R1a_precalculate, R1a_precalculate, vec_select_floats_lower); | ||
522 | const vector float R3_precalculate = spu_shuffle(R2a_precalculate, R2a_precalculate, vec_select_floats_upper); | ||
523 | const vector float R4_precalculate = spu_shuffle(R2a_precalculate, R2a_precalculate, vec_select_floats_lower); | ||
524 | const vector float R5_precalculate = spu_shuffle(R3a_precalculate, R3a_precalculate, vec_select_floats_upper); | ||
525 | const vector float R6_precalculate = spu_shuffle(R3a_precalculate, R3a_precalculate, vec_select_floats_lower); | ||
526 | const vector float R7_precalculate = spu_shuffle(R4a_precalculate, R4a_precalculate, vec_select_floats_upper); | ||
527 | const vector float R8_precalculate = spu_shuffle(R4a_precalculate, R4a_precalculate, vec_select_floats_lower); | ||
528 | |||
529 | |||
530 | const vector float G1a_precalculate = spu_madd(vec_Gu_precalc_coeff, vfloat_U_1, spu_mul(vfloat_V_1, vec_Gv_precalc_coeff)); | ||
531 | const vector float G2a_precalculate = spu_madd(vec_Gu_precalc_coeff, vfloat_U_2, spu_mul(vfloat_V_2, vec_Gv_precalc_coeff)); | ||
532 | const vector float G3a_precalculate = spu_madd(vec_Gu_precalc_coeff, vfloat_U_3, spu_mul(vfloat_V_3, vec_Gv_precalc_coeff)); | ||
533 | const vector float G4a_precalculate = spu_madd(vec_Gu_precalc_coeff, vfloat_U_4, spu_mul(vfloat_V_4, vec_Gv_precalc_coeff)); | ||
534 | |||
535 | const vector float G1_precalculate = spu_shuffle(G1a_precalculate, G1a_precalculate, vec_select_floats_upper); | ||
536 | const vector float G2_precalculate = spu_shuffle(G1a_precalculate, G1a_precalculate, vec_select_floats_lower); | ||
537 | const vector float G3_precalculate = spu_shuffle(G2a_precalculate, G2a_precalculate, vec_select_floats_upper); | ||
538 | const vector float G4_precalculate = spu_shuffle(G2a_precalculate, G2a_precalculate, vec_select_floats_lower); | ||
539 | const vector float G5_precalculate = spu_shuffle(G3a_precalculate, G3a_precalculate, vec_select_floats_upper); | ||
540 | const vector float G6_precalculate = spu_shuffle(G3a_precalculate, G3a_precalculate, vec_select_floats_lower); | ||
541 | const vector float G7_precalculate = spu_shuffle(G4a_precalculate, G4a_precalculate, vec_select_floats_upper); | ||
542 | const vector float G8_precalculate = spu_shuffle(G4a_precalculate, G4a_precalculate, vec_select_floats_lower); | ||
543 | |||
544 | |||
545 | const vector float B1a_precalculate = spu_mul(vec_B_precalc_coeff, vfloat_U_1); | ||
546 | const vector float B2a_precalculate = spu_mul(vec_B_precalc_coeff, vfloat_U_2); | ||
547 | const vector float B3a_precalculate = spu_mul(vec_B_precalc_coeff, vfloat_U_3); | ||
548 | const vector float B4a_precalculate = spu_mul(vec_B_precalc_coeff, vfloat_U_4); | ||
549 | |||
550 | const vector float B1_precalculate = spu_shuffle(B1a_precalculate, B1a_precalculate, vec_select_floats_upper); | ||
551 | const vector float B2_precalculate = spu_shuffle(B1a_precalculate, B1a_precalculate, vec_select_floats_lower); | ||
552 | const vector float B3_precalculate = spu_shuffle(B2a_precalculate, B2a_precalculate, vec_select_floats_upper); | ||
553 | const vector float B4_precalculate = spu_shuffle(B2a_precalculate, B2a_precalculate, vec_select_floats_lower); | ||
554 | const vector float B5_precalculate = spu_shuffle(B3a_precalculate, B3a_precalculate, vec_select_floats_upper); | ||
555 | const vector float B6_precalculate = spu_shuffle(B3a_precalculate, B3a_precalculate, vec_select_floats_lower); | ||
556 | const vector float B7_precalculate = spu_shuffle(B4a_precalculate, B4a_precalculate, vec_select_floats_upper); | ||
557 | const vector float B8_precalculate = spu_shuffle(B4a_precalculate, B4a_precalculate, vec_select_floats_lower); | ||
558 | |||
559 | |||
560 | const vector unsigned int R_1 = vfloat_to_vuint(spu_add( Y_1, R1_precalculate)); | ||
561 | const vector unsigned int R_2 = vfloat_to_vuint(spu_add( Y_2, R2_precalculate)); | ||
562 | const vector unsigned int R_3 = vfloat_to_vuint(spu_add( Y_3, R3_precalculate)); | ||
563 | const vector unsigned int R_4 = vfloat_to_vuint(spu_add( Y_4, R4_precalculate)); | ||
564 | const vector unsigned int R_5 = vfloat_to_vuint(spu_add( Y_5, R5_precalculate)); | ||
565 | const vector unsigned int R_6 = vfloat_to_vuint(spu_add( Y_6, R6_precalculate)); | ||
566 | const vector unsigned int R_7 = vfloat_to_vuint(spu_add( Y_7, R7_precalculate)); | ||
567 | const vector unsigned int R_8 = vfloat_to_vuint(spu_add( Y_8, R8_precalculate)); | ||
568 | const vector unsigned int R_9 = vfloat_to_vuint(spu_add( Y_9, R1_precalculate)); | ||
569 | const vector unsigned int R_10 = vfloat_to_vuint(spu_add(Y_10, R2_precalculate)); | ||
570 | const vector unsigned int R_11 = vfloat_to_vuint(spu_add(Y_11, R3_precalculate)); | ||
571 | const vector unsigned int R_12 = vfloat_to_vuint(spu_add(Y_12, R4_precalculate)); | ||
572 | const vector unsigned int R_13 = vfloat_to_vuint(spu_add(Y_13, R5_precalculate)); | ||
573 | const vector unsigned int R_14 = vfloat_to_vuint(spu_add(Y_14, R6_precalculate)); | ||
574 | const vector unsigned int R_15 = vfloat_to_vuint(spu_add(Y_15, R7_precalculate)); | ||
575 | const vector unsigned int R_16 = vfloat_to_vuint(spu_add(Y_16, R8_precalculate)); | ||
576 | |||
577 | const vector unsigned int G_1 = vfloat_to_vuint(spu_add( Y_1, G1_precalculate)); | ||
578 | const vector unsigned int G_2 = vfloat_to_vuint(spu_add( Y_2, G2_precalculate)); | ||
579 | const vector unsigned int G_3 = vfloat_to_vuint(spu_add( Y_3, G3_precalculate)); | ||
580 | const vector unsigned int G_4 = vfloat_to_vuint(spu_add( Y_4, G4_precalculate)); | ||
581 | const vector unsigned int G_5 = vfloat_to_vuint(spu_add( Y_5, G5_precalculate)); | ||
582 | const vector unsigned int G_6 = vfloat_to_vuint(spu_add( Y_6, G6_precalculate)); | ||
583 | const vector unsigned int G_7 = vfloat_to_vuint(spu_add( Y_7, G7_precalculate)); | ||
584 | const vector unsigned int G_8 = vfloat_to_vuint(spu_add( Y_8, G8_precalculate)); | ||
585 | const vector unsigned int G_9 = vfloat_to_vuint(spu_add( Y_9, G1_precalculate)); | ||
586 | const vector unsigned int G_10 = vfloat_to_vuint(spu_add(Y_10, G2_precalculate)); | ||
587 | const vector unsigned int G_11 = vfloat_to_vuint(spu_add(Y_11, G3_precalculate)); | ||
588 | const vector unsigned int G_12 = vfloat_to_vuint(spu_add(Y_12, G4_precalculate)); | ||
589 | const vector unsigned int G_13 = vfloat_to_vuint(spu_add(Y_13, G5_precalculate)); | ||
590 | const vector unsigned int G_14 = vfloat_to_vuint(spu_add(Y_14, G6_precalculate)); | ||
591 | const vector unsigned int G_15 = vfloat_to_vuint(spu_add(Y_15, G7_precalculate)); | ||
592 | const vector unsigned int G_16 = vfloat_to_vuint(spu_add(Y_16, G8_precalculate)); | ||
593 | |||
594 | const vector unsigned int B_1 = vfloat_to_vuint(spu_add( Y_1, B1_precalculate)); | ||
595 | const vector unsigned int B_2 = vfloat_to_vuint(spu_add( Y_2, B2_precalculate)); | ||
596 | const vector unsigned int B_3 = vfloat_to_vuint(spu_add( Y_3, B3_precalculate)); | ||
597 | const vector unsigned int B_4 = vfloat_to_vuint(spu_add( Y_4, B4_precalculate)); | ||
598 | const vector unsigned int B_5 = vfloat_to_vuint(spu_add( Y_5, B5_precalculate)); | ||
599 | const vector unsigned int B_6 = vfloat_to_vuint(spu_add( Y_6, B6_precalculate)); | ||
600 | const vector unsigned int B_7 = vfloat_to_vuint(spu_add( Y_7, B7_precalculate)); | ||
601 | const vector unsigned int B_8 = vfloat_to_vuint(spu_add( Y_8, B8_precalculate)); | ||
602 | const vector unsigned int B_9 = vfloat_to_vuint(spu_add( Y_9, B1_precalculate)); | ||
603 | const vector unsigned int B_10 = vfloat_to_vuint(spu_add(Y_10, B2_precalculate)); | ||
604 | const vector unsigned int B_11 = vfloat_to_vuint(spu_add(Y_11, B3_precalculate)); | ||
605 | const vector unsigned int B_12 = vfloat_to_vuint(spu_add(Y_12, B4_precalculate)); | ||
606 | const vector unsigned int B_13 = vfloat_to_vuint(spu_add(Y_13, B5_precalculate)); | ||
607 | const vector unsigned int B_14 = vfloat_to_vuint(spu_add(Y_14, B6_precalculate)); | ||
608 | const vector unsigned int B_15 = vfloat_to_vuint(spu_add(Y_15, B7_precalculate)); | ||
609 | const vector unsigned int B_16 = vfloat_to_vuint(spu_add(Y_16, B8_precalculate)); | ||
610 | |||
611 | *((vector unsigned int*)(bgra_addr + x)) = spu_or(spu_or(vec_alpha, B_1), spu_or(spu_slqwbyte( R_1, 2),spu_slqwbyte(G_1, 1))); | ||
612 | *((vector unsigned int*)(bgra_addr + x + 4)) = spu_or(spu_or(vec_alpha, B_2), spu_or(spu_slqwbyte( R_2, 2),spu_slqwbyte(G_2, 1))); | ||
613 | *((vector unsigned int*)(bgra_addr + x + 8)) = spu_or(spu_or(vec_alpha, B_3), spu_or(spu_slqwbyte( R_3, 2),spu_slqwbyte(G_3, 1))); | ||
614 | *((vector unsigned int*)(bgra_addr + x + 12)) = spu_or(spu_or(vec_alpha, B_4), spu_or(spu_slqwbyte( R_4, 2),spu_slqwbyte(G_4, 1))); | ||
615 | *((vector unsigned int*)(bgra_addr + x + 16)) = spu_or(spu_or(vec_alpha, B_5), spu_or(spu_slqwbyte( R_5, 2),spu_slqwbyte(G_5, 1))); | ||
616 | *((vector unsigned int*)(bgra_addr + x + 20)) = spu_or(spu_or(vec_alpha, B_6), spu_or(spu_slqwbyte( R_6, 2),spu_slqwbyte(G_6, 1))); | ||
617 | *((vector unsigned int*)(bgra_addr + x + 24)) = spu_or(spu_or(vec_alpha, B_7), spu_or(spu_slqwbyte( R_7, 2),spu_slqwbyte(G_7, 1))); | ||
618 | *((vector unsigned int*)(bgra_addr + x + 28)) = spu_or(spu_or(vec_alpha, B_8), spu_or(spu_slqwbyte( R_8, 2),spu_slqwbyte(G_8, 1))); | ||
619 | *((vector unsigned int*)(bgra_addr + x + width)) = spu_or(spu_or(vec_alpha, B_9), spu_or(spu_slqwbyte( R_9, 2),spu_slqwbyte(G_9, 1))); | ||
620 | *((vector unsigned int*)(bgra_addr + x + width + 4)) = spu_or(spu_or(vec_alpha, B_10), spu_or(spu_slqwbyte(R_10, 2),spu_slqwbyte(G_10, 1))); | ||
621 | *((vector unsigned int*)(bgra_addr + x + width + 8)) = spu_or(spu_or(vec_alpha, B_11), spu_or(spu_slqwbyte(R_11, 2),spu_slqwbyte(G_11, 1))); | ||
622 | *((vector unsigned int*)(bgra_addr + x + width + 12)) = spu_or(spu_or(vec_alpha, B_12), spu_or(spu_slqwbyte(R_12, 2),spu_slqwbyte(G_12, 1))); | ||
623 | *((vector unsigned int*)(bgra_addr + x + width + 16)) = spu_or(spu_or(vec_alpha, B_13), spu_or(spu_slqwbyte(R_13, 2),spu_slqwbyte(G_13, 1))); | ||
624 | *((vector unsigned int*)(bgra_addr + x + width + 20)) = spu_or(spu_or(vec_alpha, B_14), spu_or(spu_slqwbyte(R_14, 2),spu_slqwbyte(G_14, 1))); | ||
625 | *((vector unsigned int*)(bgra_addr + x + width + 24)) = spu_or(spu_or(vec_alpha, B_15), spu_or(spu_slqwbyte(R_15, 2),spu_slqwbyte(G_15, 1))); | ||
626 | *((vector unsigned int*)(bgra_addr + x + width + 28)) = spu_or(spu_or(vec_alpha, B_16), spu_or(spu_slqwbyte(R_16, 2),spu_slqwbyte(G_16, 1))); | ||
627 | } | ||
628 | } | ||
629 | |||