diff options
Diffstat (limited to 'apps/codecs')
-rw-r--r-- | apps/codecs/spc.c | 309 | ||||
-rw-r--r-- | apps/codecs/spc/spc_codec.h | 100 | ||||
-rw-r--r-- | apps/codecs/spc/spc_dsp.c | 232 |
3 files changed, 518 insertions, 123 deletions
diff --git a/apps/codecs/spc.c b/apps/codecs/spc.c index f2890cd4a4..ae3026354d 100644 --- a/apps/codecs/spc.c +++ b/apps/codecs/spc.c | |||
@@ -185,12 +185,253 @@ static int LoadID666(unsigned char *buf) { | |||
185 | } | 185 | } |
186 | 186 | ||
187 | /**************** Codec ****************/ | 187 | /**************** Codec ****************/ |
188 | enum {SAMPLE_RATE = 32000}; | ||
189 | static struct Spc_Emu spc_emu IDATA_ATTR CACHEALIGN_ATTR; | ||
188 | 190 | ||
189 | static int32_t samples[WAV_CHUNK_SIZE*2] IBSS_ATTR; | 191 | #if SPC_DUAL_CORE |
192 | /** Implementations for pipelined dual-core operation **/ | ||
193 | static int spc_emu_thread_stack[DEFAULT_STACK_SIZE/sizeof(int)] | ||
194 | CACHEALIGN_ATTR; | ||
190 | 195 | ||
191 | static struct Spc_Emu spc_emu IDATA_ATTR; | 196 | static const unsigned char * const spc_emu_thread_name = "spc emu"; |
197 | static struct thread_entry *emu_thread_p; | ||
192 | 198 | ||
193 | enum {SAMPLE_RATE = 32000}; | 199 | enum |
200 | { | ||
201 | SPC_EMU_AUDIO = 0, | ||
202 | SPC_EMU_LOAD, | ||
203 | SPC_EMU_QUIT, | ||
204 | }; | ||
205 | |||
206 | struct spc_load | ||
207 | { | ||
208 | uint8_t *buf; | ||
209 | size_t size; | ||
210 | }; | ||
211 | |||
212 | /* sample queue */ | ||
213 | #define WAV_NUM_CHUNKS 2 | ||
214 | #define WAV_CHUNK_MASK (WAV_NUM_CHUNKS-1) | ||
215 | struct sample_queue_chunk | ||
216 | { | ||
217 | long id; | ||
218 | union | ||
219 | { | ||
220 | intptr_t data; | ||
221 | int32_t audio[WAV_CHUNK_SIZE*2]; | ||
222 | }; | ||
223 | }; | ||
224 | |||
225 | static struct | ||
226 | { | ||
227 | int head, tail; | ||
228 | struct semaphore emu_sem_head; | ||
229 | struct semaphore emu_sem_tail; | ||
230 | struct event emu_evt_reply; | ||
231 | intptr_t retval; | ||
232 | struct sample_queue_chunk wav_chunk[WAV_NUM_CHUNKS]; | ||
233 | } sample_queue NOCACHEBSS_ATTR; | ||
234 | |||
235 | static inline void samples_release_wrbuf(void) | ||
236 | { | ||
237 | sample_queue.tail++; | ||
238 | ci->semaphore_release(&sample_queue.emu_sem_head); | ||
239 | } | ||
240 | |||
241 | static inline struct sample_queue_chunk * samples_get_wrbuf(void) | ||
242 | { | ||
243 | ci->semaphore_wait(&sample_queue.emu_sem_tail); | ||
244 | return &sample_queue.wav_chunk[sample_queue.tail & WAV_CHUNK_MASK]; | ||
245 | } | ||
246 | |||
247 | static inline void samples_release_rdbuf(void) | ||
248 | { | ||
249 | if (sample_queue.head != sample_queue.tail) { | ||
250 | sample_queue.head++; | ||
251 | } | ||
252 | |||
253 | ci->semaphore_release(&sample_queue.emu_sem_tail); | ||
254 | } | ||
255 | |||
256 | static inline int32_t * samples_get_rdbuf(void) | ||
257 | { | ||
258 | ci->semaphore_wait(&sample_queue.emu_sem_head); | ||
259 | |||
260 | if (ci->stop_codec || ci->new_track) | ||
261 | { | ||
262 | /* Told to stop. Buffer must be released. */ | ||
263 | samples_release_rdbuf(); | ||
264 | return NULL; | ||
265 | } | ||
266 | |||
267 | return sample_queue.wav_chunk[sample_queue.head & WAV_CHUNK_MASK].audio; | ||
268 | } | ||
269 | |||
270 | static intptr_t emu_thread_send_msg(long id, intptr_t data) | ||
271 | { | ||
272 | struct sample_queue_chunk *chunk; | ||
273 | /* Grab an audio output buffer */ | ||
274 | ci->semaphore_wait(&sample_queue.emu_sem_head); | ||
275 | chunk = &sample_queue.wav_chunk[sample_queue.head & WAV_CHUNK_MASK]; | ||
276 | /* Place a message in it instead of audio */ | ||
277 | chunk->id = id; | ||
278 | chunk->data = data; | ||
279 | /* Release it to the emu thread */ | ||
280 | samples_release_rdbuf(); | ||
281 | /* Wait for a response */ | ||
282 | ci->event_wait(&sample_queue.emu_evt_reply, STATE_SIGNALED); | ||
283 | return sample_queue.retval; | ||
284 | } | ||
285 | |||
286 | /* thread function */ | ||
287 | static bool emu_thread_process_msg(struct sample_queue_chunk *chunk) | ||
288 | { | ||
289 | long id = chunk->id; | ||
290 | bool ret = id != SPC_EMU_QUIT; | ||
291 | |||
292 | chunk->id = SPC_EMU_AUDIO; /* Reset chunk type to audio */ | ||
293 | sample_queue.retval = 0; | ||
294 | |||
295 | if (id == SPC_EMU_LOAD) | ||
296 | { | ||
297 | struct spc_load *ld = (struct spc_load *)chunk->data; | ||
298 | invalidate_icache(); | ||
299 | SPC_Init(&spc_emu); | ||
300 | sample_queue.retval = SPC_load_spc(&spc_emu, ld->buf, ld->size); | ||
301 | } | ||
302 | |||
303 | /* Empty the audio queue */ | ||
304 | /* This is a dirty hack a timeout based wait would make unnescessary but | ||
305 | still safe because the other thread is known to be waiting for a reply | ||
306 | and is not using the objects. */ | ||
307 | ci->semaphore_init(&sample_queue.emu_sem_tail, 2, 2); | ||
308 | ci->semaphore_init(&sample_queue.emu_sem_head, 2, 0); | ||
309 | sample_queue.head = sample_queue.tail = 0; | ||
310 | ci->event_set_state(&sample_queue.emu_evt_reply, STATE_SIGNALED); | ||
311 | |||
312 | return ret; | ||
313 | } | ||
314 | |||
315 | static void spc_emu_thread(void) | ||
316 | { | ||
317 | CPU_Init(&spc_emu); | ||
318 | |||
319 | while (1) { | ||
320 | /* get a buffer for output */ | ||
321 | struct sample_queue_chunk *chunk = samples_get_wrbuf(); | ||
322 | |||
323 | if (chunk->id != SPC_EMU_AUDIO) { | ||
324 | /* This chunk doesn't contain audio but a command */ | ||
325 | if (!emu_thread_process_msg(chunk)) | ||
326 | break; | ||
327 | /* Have to re-get this pointer to keep semaphore counts correct */ | ||
328 | continue; | ||
329 | } | ||
330 | |||
331 | ENTER_TIMER(render); | ||
332 | /* fill samples buffer */ | ||
333 | if ( SPC_play(&spc_emu, WAV_CHUNK_SIZE*2, chunk->audio) ) | ||
334 | assert( false ); | ||
335 | EXIT_TIMER(render); | ||
336 | |||
337 | /* done so release it to output */ | ||
338 | samples_release_wrbuf(); | ||
339 | ci->yield(); | ||
340 | } | ||
341 | } | ||
342 | |||
343 | static bool spc_emu_start(void) | ||
344 | { | ||
345 | emu_thread_p = ci->create_thread(spc_emu_thread, spc_emu_thread_stack, | ||
346 | sizeof(spc_emu_thread_stack), CREATE_THREAD_FROZEN, | ||
347 | spc_emu_thread_name IF_PRIO(, PRIORITY_PLAYBACK), COP); | ||
348 | |||
349 | if (emu_thread_p == NULL) | ||
350 | return false; | ||
351 | |||
352 | /* Initialize audio queue as full to prevent emu thread from trying to run the | ||
353 | emulator before loading something */ | ||
354 | ci->event_init(&sample_queue.emu_evt_reply, | ||
355 | EVENT_AUTOMATIC | STATE_NONSIGNALED); | ||
356 | ci->semaphore_init(&sample_queue.emu_sem_tail, 2, 0); | ||
357 | ci->semaphore_init(&sample_queue.emu_sem_head, 2, 2); | ||
358 | sample_queue.head = 0; | ||
359 | sample_queue.tail = 2; | ||
360 | |||
361 | /* Start it running */ | ||
362 | ci->thread_thaw(emu_thread_p); | ||
363 | return true; | ||
364 | } | ||
365 | |||
366 | /* load a new program on the emu thread */ | ||
367 | static inline int load_spc_buffer(uint8_t *buf, size_t size) | ||
368 | { | ||
369 | struct spc_load ld = { buf, size }; | ||
370 | flush_icache(); | ||
371 | return emu_thread_send_msg(SPC_EMU_LOAD, (intptr_t)&ld); | ||
372 | } | ||
373 | |||
374 | static inline void spc_emu_quit(void) | ||
375 | { | ||
376 | emu_thread_send_msg(SPC_EMU_QUIT, 0); | ||
377 | /* Wait for emu thread to be killed */ | ||
378 | ci->thread_wait(emu_thread_p); | ||
379 | } | ||
380 | |||
381 | static inline bool spc_play_get_samples(int32_t **samples) | ||
382 | { | ||
383 | /* obtain filled samples buffer */ | ||
384 | *samples = samples_get_rdbuf(); | ||
385 | return *samples != NULL; | ||
386 | } | ||
387 | |||
388 | static inline void spc_play_send_samples(int32_t *samples) | ||
389 | { | ||
390 | ci->pcmbuf_insert(samples, samples+WAV_CHUNK_SIZE, WAV_CHUNK_SIZE); | ||
391 | /* done with chunk so release it to emu thread */ | ||
392 | samples_release_rdbuf(); | ||
393 | } | ||
394 | |||
395 | #else /* !SPC_DUAL_CORE */ | ||
396 | /** Implementations for single-core operation **/ | ||
397 | int32_t wav_chunk[WAV_CHUNK_SIZE*2] IBSS_ATTR; | ||
398 | |||
399 | /* load a new program into emu */ | ||
400 | static inline int load_spc_buffer(uint8_t *buf, size_t size) | ||
401 | { | ||
402 | SPC_Init(&spc_emu); | ||
403 | return SPC_load_spc(&spc_emu, buf, size); | ||
404 | } | ||
405 | |||
406 | static inline bool spc_emu_start(void) | ||
407 | { | ||
408 | #ifdef CPU_COLDFIRE | ||
409 | /* signed integer mode with saturation */ | ||
410 | coldfire_set_macsr(EMAC_SATURATE); | ||
411 | #endif | ||
412 | CPU_Init(&spc_emu); | ||
413 | return true; | ||
414 | } | ||
415 | |||
416 | static inline void spc_play_send_samples(int32_t *samples) | ||
417 | { | ||
418 | ci->pcmbuf_insert(samples, samples+WAV_CHUNK_SIZE, WAV_CHUNK_SIZE); | ||
419 | } | ||
420 | |||
421 | #define spc_emu_quit() | ||
422 | #define samples_release_rdbuf() | ||
423 | |||
424 | static inline bool spc_play_get_samples(int32_t **samples) | ||
425 | { | ||
426 | ENTER_TIMER(render); | ||
427 | /* fill samples buffer */ | ||
428 | if ( SPC_play(&spc_emu,WAV_CHUNK_SIZE*2,wav_chunk) ) | ||
429 | assert( false ); | ||
430 | EXIT_TIMER(render); | ||
431 | *samples = wav_chunk; | ||
432 | return true; | ||
433 | } | ||
434 | #endif /* SPC_DUAL_CORE */ | ||
194 | 435 | ||
195 | /* The main decoder loop */ | 436 | /* The main decoder loop */ |
196 | static int play_track( void ) | 437 | static int play_track( void ) |
@@ -206,7 +447,7 @@ static int play_track( void ) | |||
206 | fadedec=0x7fffffffl/(fadeendsample-fadestartsample)+1; | 447 | fadedec=0x7fffffffl/(fadeendsample-fadestartsample)+1; |
207 | 448 | ||
208 | ENTER_TIMER(total); | 449 | ENTER_TIMER(total); |
209 | 450 | ||
210 | while ( 1 ) | 451 | while ( 1 ) |
211 | { | 452 | { |
212 | ci->yield(); | 453 | ci->yield(); |
@@ -224,14 +465,12 @@ static int play_track( void ) | |||
224 | } | 465 | } |
225 | ci->seek_complete(); | 466 | ci->seek_complete(); |
226 | } | 467 | } |
227 | 468 | ||
228 | ENTER_TIMER(render); | 469 | int32_t *samples; |
229 | /* fill samples buffer */ | 470 | if (!spc_play_get_samples(&samples)) |
230 | if ( SPC_play(&spc_emu,WAV_CHUNK_SIZE*2,samples) ) | 471 | break; |
231 | assert( false ); | 472 | |
232 | EXIT_TIMER(render); | 473 | sampleswritten += WAV_CHUNK_SIZE; |
233 | |||
234 | sampleswritten+=WAV_CHUNK_SIZE; | ||
235 | 474 | ||
236 | /* is track timed? */ | 475 | /* is track timed? */ |
237 | if (ci->global_settings->repeat_mode!=REPEAT_ONE && ci->id3->length) { | 476 | if (ci->global_settings->repeat_mode!=REPEAT_ONE && ci->id3->length) { |
@@ -241,11 +480,11 @@ static int play_track( void ) | |||
241 | /* fade? */ | 480 | /* fade? */ |
242 | if (curtime>ID666.length) | 481 | if (curtime>ID666.length) |
243 | { | 482 | { |
244 | #ifdef CPU_COLDFIRE | 483 | #ifdef CPU_COLDFIRE |
245 | /* Have to switch modes to do this */ | 484 | /* Have to switch modes to do this */ |
246 | long macsr = coldfire_get_macsr(); | 485 | long macsr = coldfire_get_macsr(); |
247 | coldfire_set_macsr(EMAC_SATURATE | EMAC_FRACTIONAL | EMAC_ROUND); | 486 | coldfire_set_macsr(EMAC_SATURATE | EMAC_FRACTIONAL | EMAC_ROUND); |
248 | #endif | 487 | #endif |
249 | int i; | 488 | int i; |
250 | for (i=0;i<WAV_CHUNK_SIZE;i++) { | 489 | for (i=0;i<WAV_CHUNK_SIZE;i++) { |
251 | if (lasttimesample+i>fadestartsample) { | 490 | if (lasttimesample+i>fadestartsample) { |
@@ -256,42 +495,43 @@ static int play_track( void ) | |||
256 | fadevol-=fadedec; | 495 | fadevol-=fadedec; |
257 | } | 496 | } |
258 | } | 497 | } |
259 | #ifdef CPU_COLDFIRE | 498 | #ifdef CPU_COLDFIRE |
260 | coldfire_set_macsr(macsr); | 499 | coldfire_set_macsr(macsr); |
261 | #endif | 500 | #endif |
262 | } | 501 | } |
263 | /* end? */ | 502 | /* end? */ |
264 | if (lasttimesample>=fadeendsample) | 503 | if (lasttimesample>=fadeendsample) |
504 | { | ||
505 | samples_release_rdbuf(); | ||
265 | break; | 506 | break; |
507 | } | ||
266 | } | 508 | } |
267 | 509 | ||
268 | ci->pcmbuf_insert(samples, samples+WAV_CHUNK_SIZE, WAV_CHUNK_SIZE); | 510 | spc_play_send_samples(samples); |
269 | 511 | ||
270 | if (ci->global_settings->repeat_mode!=REPEAT_ONE) | 512 | if (ci->global_settings->repeat_mode!=REPEAT_ONE) |
271 | ci->set_elapsed(sampleswritten*1000LL/SAMPLE_RATE); | 513 | ci->set_elapsed(sampleswritten*1000LL/SAMPLE_RATE); |
272 | else | 514 | else |
273 | ci->set_elapsed(0); | 515 | ci->set_elapsed(0); |
274 | } | 516 | } |
275 | 517 | ||
276 | EXIT_TIMER(total); | 518 | EXIT_TIMER(total); |
277 | |||
278 | return 0; | 519 | return 0; |
279 | } | 520 | } |
280 | 521 | ||
281 | /* this is the codec entry point */ | 522 | /* this is the codec entry point */ |
282 | enum codec_status codec_main(void) | 523 | enum codec_status codec_main(void) |
283 | { | 524 | { |
284 | #ifdef CPU_COLDFIRE | 525 | enum codec_status stat = CODEC_ERROR; |
285 | /* signed integer mode with saturation */ | 526 | |
286 | coldfire_set_macsr(EMAC_SATURATE); | 527 | if (!spc_emu_start()) |
287 | #endif | 528 | goto codec_quit; |
288 | CPU_Init(&spc_emu); | ||
289 | 529 | ||
290 | do | 530 | do |
291 | { | 531 | { |
292 | DEBUGF("SPC: next_track\n"); | 532 | DEBUGF("SPC: next_track\n"); |
293 | if (codec_init()) { | 533 | if (codec_init()) { |
294 | return CODEC_ERROR; | 534 | goto codec_quit; |
295 | } | 535 | } |
296 | DEBUGF("SPC: after init\n"); | 536 | DEBUGF("SPC: after init\n"); |
297 | 537 | ||
@@ -301,7 +541,7 @@ enum codec_status codec_main(void) | |||
301 | 541 | ||
302 | /* wait for track info to load */ | 542 | /* wait for track info to load */ |
303 | while (!*ci->taginfo_ready && !ci->stop_codec) | 543 | while (!*ci->taginfo_ready && !ci->stop_codec) |
304 | ci->sleep(1); | 544 | ci->yield(); |
305 | 545 | ||
306 | codec_set_replaygain(ci->id3); | 546 | codec_set_replaygain(ci->id3); |
307 | 547 | ||
@@ -313,20 +553,19 @@ enum codec_status codec_main(void) | |||
313 | size_t buffersize; | 553 | size_t buffersize; |
314 | uint8_t* buffer = ci->request_buffer(&buffersize, ci->filesize); | 554 | uint8_t* buffer = ci->request_buffer(&buffersize, ci->filesize); |
315 | if (!buffer) { | 555 | if (!buffer) { |
316 | return CODEC_ERROR; | 556 | goto codec_quit; |
317 | } | 557 | } |
318 | 558 | ||
319 | DEBUGF("SPC: read size = 0x%lx\n",(unsigned long)buffersize); | 559 | DEBUGF("SPC: read size = 0x%lx\n",(unsigned long)buffersize); |
320 | do | 560 | do |
321 | { | 561 | { |
322 | SPC_Init(&spc_emu); | 562 | if (load_spc_buffer(buffer, buffersize)) { |
323 | if (SPC_load_spc(&spc_emu,buffer,buffersize)) { | ||
324 | DEBUGF("SPC load failure\n"); | 563 | DEBUGF("SPC load failure\n"); |
325 | return CODEC_ERROR; | 564 | goto codec_quit; |
326 | } | 565 | } |
327 | 566 | ||
328 | LoadID666(buffer+0x2e); | 567 | LoadID666(buffer+0x2e); |
329 | 568 | ||
330 | if (ci->global_settings->repeat_mode!=REPEAT_ONE && ID666.length==0) { | 569 | if (ci->global_settings->repeat_mode!=REPEAT_ONE && ID666.length==0) { |
331 | ID666.length=3*60*1000; /* 3 minutes */ | 570 | ID666.length=3*60*1000; /* 3 minutes */ |
332 | ID666.fade=5*1000; /* 5 seconds */ | 571 | ID666.fade=5*1000; /* 5 seconds */ |
@@ -340,12 +579,16 @@ enum codec_status codec_main(void) | |||
340 | 579 | ||
341 | reset_profile_timers(); | 580 | reset_profile_timers(); |
342 | } | 581 | } |
343 | |||
344 | while ( play_track() ); | 582 | while ( play_track() ); |
345 | 583 | ||
346 | print_timers(ci->id3->path); | 584 | print_timers(ci->id3->path); |
347 | } | 585 | } |
348 | while ( ci->request_next_track() ); | 586 | while ( ci->request_next_track() ); |
587 | |||
588 | stat = CODEC_OK; | ||
589 | |||
590 | codec_quit: | ||
591 | spc_emu_quit(); | ||
349 | 592 | ||
350 | return CODEC_OK; | 593 | return stat; |
351 | } | 594 | } |
diff --git a/apps/codecs/spc/spc_codec.h b/apps/codecs/spc/spc_codec.h index f2677df04a..c785acc468 100644 --- a/apps/codecs/spc/spc_codec.h +++ b/apps/codecs/spc/spc_codec.h | |||
@@ -32,38 +32,51 @@ | |||
32 | 32 | ||
33 | /** Basic configuration options **/ | 33 | /** Basic configuration options **/ |
34 | 34 | ||
35 | /* TGB is the only target fast enough for gaussian and realtime BRR decode */ | 35 | #define SPC_DUAL_CORE 1 |
36 | /* echo is almost fast enough but not quite */ | ||
37 | #ifndef TOSHIBA_GIGABEAT_F | ||
38 | /* Cache BRR waves */ | ||
39 | #define SPC_BRRCACHE 1 | ||
40 | 36 | ||
41 | /* Disable gaussian interpolation */ | 37 | #if !defined(SPC_DUAL_CORE) || NUM_CORES == 1 |
42 | #define SPC_NOINTERP 1 | 38 | #undef SPC_DUAL_CORE |
43 | 39 | #define SPC_DUAL_CORE 0 | |
44 | #ifndef CPU_COLDFIRE | ||
45 | /* Disable echo processing */ | ||
46 | #define SPC_NOECHO 1 | ||
47 | #else | ||
48 | /* Enable echo processing */ | ||
49 | #define SPC_NOECHO 0 | ||
50 | #endif | 40 | #endif |
51 | #else | 41 | |
42 | /* TGB is the only target fast enough for gaussian and realtime BRR decode */ | ||
43 | /* echo is almost fast enough but not quite */ | ||
44 | #if defined(TOSHIBA_GIGABEAT_F) || defined(SIMULATOR) | ||
52 | /* Don't cache BRR waves */ | 45 | /* Don't cache BRR waves */ |
53 | #define SPC_BRRCACHE 0 | 46 | #define SPC_BRRCACHE 0 |
54 | 47 | ||
55 | /* Allow gaussian interpolation */ | 48 | /* Allow gaussian interpolation */ |
56 | #define SPC_NOINTERP 0 | 49 | #define SPC_NOINTERP 0 |
50 | |||
51 | /* Allow echo processing */ | ||
52 | #define SPC_NOECHO 0 | ||
53 | #elif defined(CPU_COLDFIRE) | ||
54 | /* Cache BRR waves */ | ||
55 | #define SPC_BRRCACHE 1 | ||
57 | 56 | ||
57 | /* Disable gaussian interpolation */ | ||
58 | #define SPC_NOINTERP 1 | ||
59 | |||
58 | /* Allow echo processing */ | 60 | /* Allow echo processing */ |
59 | #define SPC_NOECHO 0 | 61 | #define SPC_NOECHO 0 |
60 | #endif | 62 | #elif defined (CPU_PP) && SPC_DUAL_CORE |
63 | /* Cache BRR waves */ | ||
64 | #define SPC_BRRCACHE 1 | ||
65 | |||
66 | /* Disable gaussian interpolation */ | ||
67 | #define SPC_NOINTERP 1 | ||
61 | 68 | ||
62 | /* Samples per channel per iteration */ | 69 | /* Allow echo processing */ |
63 | #ifdef CPU_COLDFIRE | 70 | #define SPC_NOECHO 0 |
64 | #define WAV_CHUNK_SIZE 1024 | ||
65 | #else | 71 | #else |
66 | #define WAV_CHUNK_SIZE 2048 | 72 | /* Cache BRR waves */ |
73 | #define SPC_BRRCACHE 1 | ||
74 | |||
75 | /* Disable gaussian interpolation */ | ||
76 | #define SPC_NOINTERP 1 | ||
77 | |||
78 | /* Disable echo processing */ | ||
79 | #define SPC_NOECHO 1 | ||
67 | #endif | 80 | #endif |
68 | 81 | ||
69 | #ifdef CPU_ARM | 82 | #ifdef CPU_ARM |
@@ -72,6 +85,26 @@ | |||
72 | 85 | ||
73 | #undef IDATA_ATTR | 86 | #undef IDATA_ATTR |
74 | #define IDATA_ATTR | 87 | #define IDATA_ATTR |
88 | |||
89 | #undef ICONST_ATTR | ||
90 | #define ICONST_ATTR | ||
91 | |||
92 | #undef IBSS_ATTR | ||
93 | #define IBSS_ATTR | ||
94 | |||
95 | #if SPC_DUAL_CORE | ||
96 | #undef NOCACHEBSS_ATTR | ||
97 | #define NOCACHEBSS_ATTR __attribute__ ((section(".ibss"))) | ||
98 | #undef NOCACHEDATA_ATTR | ||
99 | #define NOCACHEDATA_ATTR __attribute__((section(".idata"))) | ||
100 | #endif | ||
101 | #endif | ||
102 | |||
103 | /* Samples per channel per iteration */ | ||
104 | #if defined(CPU_PP) && NUM_CORES == 1 | ||
105 | #define WAV_CHUNK_SIZE 2048 | ||
106 | #else | ||
107 | #define WAV_CHUNK_SIZE 1024 | ||
75 | #endif | 108 | #endif |
76 | 109 | ||
77 | /**************** Little-endian handling ****************/ | 110 | /**************** Little-endian handling ****************/ |
@@ -231,16 +264,26 @@ extern int16_t BRRcache [BRR_CACHE_SIZE]; | |||
231 | 264 | ||
232 | enum { FIR_BUF_HALF = 8 }; | 265 | enum { FIR_BUF_HALF = 8 }; |
233 | 266 | ||
234 | #ifdef CPU_COLDFIRE | 267 | #if defined(CPU_COLDFIRE) |
235 | /* global because of the large aligment requirement for hardware masking - | 268 | /* global because of the large aligment requirement for hardware masking - |
236 | * L-R interleaved 16-bit samples for easy loading and mac.w use. | 269 | * L-R interleaved 16-bit samples for easy loading and mac.w use. |
237 | */ | 270 | */ |
238 | enum | 271 | enum |
239 | { | 272 | { |
240 | FIR_BUF_SIZE = FIR_BUF_HALF * sizeof ( int32_t ), | 273 | FIR_BUF_CNT = FIR_BUF_HALF, |
241 | FIR_BUF_MASK = ~FIR_BUF_SIZE | 274 | FIR_BUF_SIZE = FIR_BUF_CNT * sizeof ( int32_t ), |
275 | FIR_BUF_ALIGN = FIR_BUF_SIZE * 2, | ||
276 | FIR_BUF_MASK = ~((FIR_BUF_ALIGN / 2) | (sizeof ( int32_t ) - 1)) | ||
277 | }; | ||
278 | #elif defined (CPU_ARM) | ||
279 | enum | ||
280 | { | ||
281 | FIR_BUF_CNT = FIR_BUF_HALF * 2 * 2, | ||
282 | FIR_BUF_SIZE = FIR_BUF_CNT * sizeof ( int32_t ), | ||
283 | FIR_BUF_ALIGN = FIR_BUF_SIZE, | ||
284 | FIR_BUF_MASK = ~((FIR_BUF_ALIGN / 2) | (sizeof ( int32_t ) * 2 - 1)) | ||
242 | }; | 285 | }; |
243 | #endif /* CPU_COLDFIRE */ | 286 | #endif /* CPU_* */ |
244 | 287 | ||
245 | struct Spc_Dsp | 288 | struct Spc_Dsp |
246 | { | 289 | { |
@@ -257,14 +300,19 @@ struct Spc_Dsp | |||
257 | int noise_count; | 300 | int noise_count; |
258 | uint16_t noise; /* also read as int16_t */ | 301 | uint16_t noise; /* also read as int16_t */ |
259 | 302 | ||
260 | #ifdef CPU_COLDFIRE | 303 | #if defined(CPU_COLDFIRE) |
261 | /* circularly hardware masked address */ | 304 | /* circularly hardware masked address */ |
262 | int32_t *fir_ptr; | 305 | int32_t *fir_ptr; |
263 | /* wrapped address just behind current position - | 306 | /* wrapped address just behind current position - |
264 | allows mac.w to increment and mask fir_ptr */ | 307 | allows mac.w to increment and mask fir_ptr */ |
265 | int32_t *last_fir_ptr; | 308 | int32_t *last_fir_ptr; |
266 | /* copy of echo FIR constants as int16_t for use with mac.w */ | 309 | /* copy of echo FIR constants as int16_t for use with mac.w */ |
267 | int16_t fir_coeff[VOICE_COUNT]; | 310 | int16_t fir_coeff [VOICE_COUNT]; |
311 | #elif defined (CPU_ARM) | ||
312 | /* fir_buf [i + 8] == fir_buf [i], to avoid wrap checking in FIR code */ | ||
313 | int32_t *fir_ptr; | ||
314 | /* copy of echo FIR constants as int32_t, for faster access */ | ||
315 | int32_t fir_coeff [VOICE_COUNT]; | ||
268 | #else | 316 | #else |
269 | /* fir_buf [i + 8] == fir_buf [i], to avoid wrap checking in FIR code */ | 317 | /* fir_buf [i + 8] == fir_buf [i], to avoid wrap checking in FIR code */ |
270 | int fir_pos; /* (0 to 7) */ | 318 | int fir_pos; /* (0 to 7) */ |
diff --git a/apps/codecs/spc/spc_dsp.c b/apps/codecs/spc/spc_dsp.c index 8881788cf1..19986fd8a8 100644 --- a/apps/codecs/spc/spc_dsp.c +++ b/apps/codecs/spc/spc_dsp.c | |||
@@ -25,14 +25,13 @@ | |||
25 | #include "spc_codec.h" | 25 | #include "spc_codec.h" |
26 | #include "spc_profiler.h" | 26 | #include "spc_profiler.h" |
27 | 27 | ||
28 | #ifdef CPU_COLDFIRE | 28 | #if defined(CPU_COLDFIRE) || defined (CPU_ARM) |
29 | static int32_t fir_buf[FIR_BUF_HALF] | 29 | int32_t fir_buf[FIR_BUF_CNT] |
30 | __attribute__ ((aligned (FIR_BUF_SIZE*2))) IBSS_ATTR; | 30 | __attribute__ ((aligned (FIR_BUF_ALIGN*1))) IBSS_ATTR; |
31 | #endif | 31 | #endif |
32 | |||
33 | #if SPC_BRRCACHE | 32 | #if SPC_BRRCACHE |
34 | /* a little extra for samples that go past end */ | 33 | /* a little extra for samples that go past end */ |
35 | int16_t BRRcache [0x20000 + 32]; | 34 | int16_t BRRcache [BRR_CACHE_SIZE]; |
36 | #endif | 35 | #endif |
37 | 36 | ||
38 | void DSP_write( struct Spc_Dsp* this, int i, int data ) | 37 | void DSP_write( struct Spc_Dsp* this, int i, int data ) |
@@ -58,11 +57,12 @@ void DSP_write( struct Spc_Dsp* this, int i, int data ) | |||
58 | 57 | ||
59 | /* if ( n < -32768 ) out = -32768; */ | 58 | /* if ( n < -32768 ) out = -32768; */ |
60 | /* if ( n > 32767 ) out = 32767; */ | 59 | /* if ( n > 32767 ) out = 32767; */ |
61 | #define CLAMP16( n, out )\ | 60 | #define CLAMP16( n ) \ |
62 | {\ | 61 | ({ \ |
63 | if ( (int16_t) n != n )\ | 62 | if ( (int16_t) n != n ) \ |
64 | out = 0x7FFF ^ (n >> 31);\ | 63 | n = 0x7FFF ^ (n >> 31); \ |
65 | } | 64 | n; \ |
65 | }) | ||
66 | 66 | ||
67 | #if SPC_BRRCACHE | 67 | #if SPC_BRRCACHE |
68 | static void decode_brr( struct Spc_Dsp* this, unsigned start_addr, | 68 | static void decode_brr( struct Spc_Dsp* this, unsigned start_addr, |
@@ -181,7 +181,7 @@ static void decode_brr( struct Spc_Dsp* this, unsigned start_addr, | |||
181 | smp2 = smp1; | 181 | smp2 = smp1; |
182 | } | 182 | } |
183 | 183 | ||
184 | CLAMP16( delta, delta ); | 184 | delta = CLAMP16( delta ); |
185 | smp1 = (int16_t) (delta * 2); /* sign-extend */ | 185 | smp1 = (int16_t) (delta * 2); /* sign-extend */ |
186 | } | 186 | } |
187 | while ( (offset += 4) != 0 ); | 187 | while ( (offset += 4) != 0 ); |
@@ -359,7 +359,7 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf ) | |||
359 | #define VOICE_RATE(x) (INT16A(raw_voice->rate) & 0x3FFF) | 359 | #define VOICE_RATE(x) (INT16A(raw_voice->rate) & 0x3FFF) |
360 | #define IF_RBE(...) | 360 | #define IF_RBE(...) |
361 | #endif /* ROCKBOX_BIG_ENDIAN */ | 361 | #endif /* ROCKBOX_BIG_ENDIAN */ |
362 | 362 | ||
363 | #if !SPC_NOINTERP | 363 | #if !SPC_NOINTERP |
364 | int const slow_gaussian = (this->r.g.pitch_mods >> 1) | | 364 | int const slow_gaussian = (this->r.g.pitch_mods >> 1) | |
365 | this->r.g.noise_enables; | 365 | this->r.g.noise_enables; |
@@ -431,7 +431,7 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf ) | |||
431 | 431 | ||
432 | /* Envelope */ | 432 | /* Envelope */ |
433 | { | 433 | { |
434 | int const env_range = 0x800; | 434 | int const ENV_RANGE = 0x800; |
435 | int env_mode = voice->env_mode; | 435 | int env_mode = voice->env_mode; |
436 | int adsr0 = raw_voice->adsr [0]; | 436 | int adsr0 = raw_voice->adsr [0]; |
437 | int env_timer; | 437 | int env_timer; |
@@ -482,14 +482,14 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf ) | |||
482 | 482 | ||
483 | int envx = voice->envx; | 483 | int envx = voice->envx; |
484 | 484 | ||
485 | int const step = env_range / 64; | 485 | int const step = ENV_RANGE / 64; |
486 | envx += step; | 486 | envx += step; |
487 | if ( t == 15 ) | 487 | if ( t == 15 ) |
488 | envx += env_range / 2 - step; | 488 | envx += ENV_RANGE / 2 - step; |
489 | 489 | ||
490 | if ( envx >= env_range ) | 490 | if ( envx >= ENV_RANGE ) |
491 | { | 491 | { |
492 | envx = env_range - 1; | 492 | envx = ENV_RANGE - 1; |
493 | voice->env_mode = state_decay; | 493 | voice->env_mode = state_decay; |
494 | } | 494 | } |
495 | voice->envx = envx; | 495 | voice->envx = envx; |
@@ -516,7 +516,7 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf ) | |||
516 | int mode = t >> 5; | 516 | int mode = t >> 5; |
517 | if ( mode <= 5 ) /* decay */ | 517 | if ( mode <= 5 ) /* decay */ |
518 | { | 518 | { |
519 | int step = env_range / 64; | 519 | int step = ENV_RANGE / 64; |
520 | if ( mode == 5 ) /* exponential */ | 520 | if ( mode == 5 ) /* exponential */ |
521 | { | 521 | { |
522 | envx--; /* envx *= 255 / 256 */ | 522 | envx--; /* envx *= 255 / 256 */ |
@@ -531,14 +531,14 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf ) | |||
531 | } | 531 | } |
532 | else /* attack */ | 532 | else /* attack */ |
533 | { | 533 | { |
534 | int const step = env_range / 64; | 534 | int const step = ENV_RANGE / 64; |
535 | envx += step; | 535 | envx += step; |
536 | if ( mode == 7 && | 536 | if ( mode == 7 && |
537 | envx >= env_range * 3 / 4 + step ) | 537 | envx >= ENV_RANGE * 3 / 4 + step ) |
538 | envx += env_range / 256 - step; | 538 | envx += ENV_RANGE / 256 - step; |
539 | 539 | ||
540 | if ( envx >= env_range ) | 540 | if ( envx >= ENV_RANGE ) |
541 | envx = env_range - 1; | 541 | envx = ENV_RANGE - 1; |
542 | } | 542 | } |
543 | voice->envx = envx; | 543 | voice->envx = envx; |
544 | /* TODO: should this be 8? */ | 544 | /* TODO: should this be 8? */ |
@@ -550,7 +550,7 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf ) | |||
550 | else /* state_release */ | 550 | else /* state_release */ |
551 | { | 551 | { |
552 | int envx = voice->envx; | 552 | int envx = voice->envx; |
553 | if ( (envx -= env_range / 256) > 0 ) | 553 | if ( (envx -= ENV_RANGE / 256) > 0 ) |
554 | { | 554 | { |
555 | voice->envx = envx; | 555 | voice->envx = envx; |
556 | raw_voice->envx = envx >> 8; | 556 | raw_voice->envx = envx >> 8; |
@@ -683,7 +683,7 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf ) | |||
683 | smp2 = smp1; | 683 | smp2 = smp1; |
684 | } | 684 | } |
685 | 685 | ||
686 | CLAMP16( delta, delta ); | 686 | delta = CLAMP16( delta ); |
687 | smp1 = (int16_t) (delta * 2); /* sign-extend */ | 687 | smp1 = (int16_t) (delta * 2); /* sign-extend */ |
688 | } | 688 | } |
689 | while ( (offset += 4) != 0 ); | 689 | while ( (offset += 4) != 0 ); |
@@ -778,7 +778,7 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf ) | |||
778 | output = (output + rev [1] * interp [2]) >> 12; | 778 | output = (output + rev [1] * interp [2]) >> 12; |
779 | output = (int16_t) (output * 2); | 779 | output = (int16_t) (output * 2); |
780 | output += ((rev [0] * interp [3]) >> 12) * 2; | 780 | output += ((rev [0] * interp [3]) >> 12) * 2; |
781 | CLAMP16( output, output ); | 781 | output = CLAMP16( output ); |
782 | } | 782 | } |
783 | output = (output * voice->envx) >> 11 & ~1; | 783 | output = (output * voice->envx) >> 11 & ~1; |
784 | 784 | ||
@@ -788,7 +788,7 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf ) | |||
788 | prev_outx = output; | 788 | prev_outx = output; |
789 | raw_voice->outx = (int8_t) (output >> 8); | 789 | raw_voice->outx = (int8_t) (output >> 8); |
790 | } | 790 | } |
791 | #else | 791 | #else /* SPCNOINTERP */ |
792 | /* two-point linear interpolation */ | 792 | /* two-point linear interpolation */ |
793 | #ifdef CPU_COLDFIRE | 793 | #ifdef CPU_COLDFIRE |
794 | int amp_0 = (int16_t)this->noise; | 794 | int amp_0 = (int16_t)this->noise; |
@@ -822,7 +822,7 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf ) | |||
822 | /* output = y0 + (result >> 12) */ | 822 | /* output = y0 + (result >> 12) */ |
823 | "asr.l %[sh], %[y1] \r\n" | 823 | "asr.l %[sh], %[y1] \r\n" |
824 | "add.l %[y0], %[y1] \r\n" | 824 | "add.l %[y0], %[y1] \r\n" |
825 | : [f]"+&d"(f), [y0]"=&a"(y0), [y1]"=&d"(amp_0) | 825 | : [f]"+d"(f), [y0]"=&a"(y0), [y1]"=&d"(amp_0) |
826 | : [s]"a"(voice->samples), [sh]"d"(12) | 826 | : [s]"a"(voice->samples), [sh]"d"(12) |
827 | ); | 827 | ); |
828 | } | 828 | } |
@@ -861,17 +861,49 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf ) | |||
861 | "movclr.l %%acc1, %[amp_1] \r\n" | 861 | "movclr.l %%acc1, %[amp_1] \r\n" |
862 | : [amp_0]"=r"(amp_0), [amp_1]"=r"(amp_1) | 862 | : [amp_0]"=r"(amp_0), [amp_1]"=r"(amp_1) |
863 | ); | 863 | ); |
864 | #else | 864 | #elif defined (CPU_ARM) |
865 | int amp_0, amp_1; | ||
866 | |||
867 | if ( (this->r.g.noise_enables & vbit) != 0 ) { | ||
868 | amp_0 = *(int16_t *)&this->noise; | ||
869 | } else { | ||
870 | uint32_t f = voice->position; | ||
871 | amp_0 = (uint32_t)voice->samples; | ||
865 | 872 | ||
866 | /* Try this one out on ARM and see - similar to above but the asm | 873 | asm volatile( |
867 | on coldfire removes a redundant register load worth 1 or 2%; | 874 | "mov %[y1], %[f], lsr #12 \r\n" |
868 | switching to loading two samples at once may help too. That's | 875 | "eor %[f], %[f], %[y1], lsl #12 \r\n" |
869 | done above and while 6 to 7% faster on cf over two 16 bit loads | 876 | "add %[y1], %[y0], %[y1], lsl #1 \r\n" |
870 | it makes it endian dependant. | 877 | "ldrsh %[y0], [%[y1], #2] \r\n" |
871 | 878 | "ldrsh %[y1], [%[y1], #4] \r\n" | |
872 | measured small improvement (~1.5%) - hcs | 879 | "sub %[y1], %[y1], %[y0] \r\n" |
873 | */ | 880 | "mul %[f], %[y1], %[f] \r\n" |
881 | "add %[y0], %[y0], %[f], asr #12 \r\n" | ||
882 | : [f]"+r"(f), [y0]"+r"(amp_0), [y1]"=&r"(amp_1) | ||
883 | ); | ||
884 | } | ||
885 | |||
886 | voice->position += rate; | ||
887 | |||
888 | asm volatile( | ||
889 | "mul %[amp_1], %[amp_0], %[envx] \r\n" | ||
890 | "mov %[amp_0], %[amp_1], asr #11 \r\n" | ||
891 | "mov %[amp_1], %[amp_0], asr #8 \r\n" | ||
892 | : [amp_0]"+r"(amp_0), [amp_1]"=&r"(amp_1) | ||
893 | : [envx]"r"(voice->envx) | ||
894 | ); | ||
895 | |||
896 | prev_outx = amp_0; | ||
897 | raw_voice->outx = (int8_t)amp_1; | ||
874 | 898 | ||
899 | asm volatile( | ||
900 | "mul %[amp_1], %[amp_0], %[vol_1] \r\n" | ||
901 | "mul %[amp_0], %[vol_0], %[amp_0] \r\n" | ||
902 | : [amp_0]"+r"(amp_0), [amp_1]"+r"(amp_1) | ||
903 | : [vol_0]"r"((int)voice->volume[0]), | ||
904 | [vol_1]"r"((int)voice->volume[1]) | ||
905 | ); | ||
906 | #else /* Unoptimized CPU */ | ||
875 | int output; | 907 | int output; |
876 | 908 | ||
877 | if ( (this->r.g.noise_enables & vbit) == 0 ) | 909 | if ( (this->r.g.noise_enables & vbit) == 0 ) |
@@ -884,19 +916,7 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf ) | |||
884 | } | 916 | } |
885 | 917 | ||
886 | voice->position += rate; | 918 | voice->position += rate; |
887 | 919 | ||
888 | /* old version */ | ||
889 | #if 0 | ||
890 | int fraction = voice->position & 0xFFF; | ||
891 | short const* const pos = voice->samples + (voice->position >> 12); | ||
892 | voice->position += rate; | ||
893 | int output = | ||
894 | (pos [2] * fraction + pos [1] * (0x1000 - fraction)) >> 12; | ||
895 | /* no interpolation (hardly faster, and crappy sounding) */ | ||
896 | /*int output = pos [0];*/ | ||
897 | if ( this->r.g.noise_enables & vbit ) | ||
898 | output = *(int16_t*) &this->noise; | ||
899 | #endif | ||
900 | output = (output * voice->envx) >> 11; | 920 | output = (output * voice->envx) >> 11; |
901 | 921 | ||
902 | /* duplicated here to give compiler more to run in parallel */ | 922 | /* duplicated here to give compiler more to run in parallel */ |
@@ -905,8 +925,8 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf ) | |||
905 | 925 | ||
906 | prev_outx = output; | 926 | prev_outx = output; |
907 | raw_voice->outx = (int8_t) (output >> 8); | 927 | raw_voice->outx = (int8_t) (output >> 8); |
908 | #endif /* CPU_COLDFIRE */ | 928 | #endif /* CPU_* */ |
909 | #endif | 929 | #endif /* SPCNOINTERP */ |
910 | 930 | ||
911 | #if SPC_BRRCACHE | 931 | #if SPC_BRRCACHE |
912 | if ( voice->position >= voice->wave_end ) | 932 | if ( voice->position >= voice->wave_end ) |
@@ -1033,7 +1053,7 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf ) | |||
1033 | "or.l %[sh], %[e0] \r\n" | 1053 | "or.l %[sh], %[e0] \r\n" |
1034 | /* save final feedback into echo buffer */ | 1054 | /* save final feedback into echo buffer */ |
1035 | "move.l %[e0], (%[echo_ptr]) \r\n" | 1055 | "move.l %[e0], (%[echo_ptr]) \r\n" |
1036 | : [e0]"+&d"(echo_0), [e1]"+&d"(echo_1) | 1056 | : [e0]"+d"(echo_0), [e1]"+d"(echo_1) |
1037 | : [out_0]"r"(out_0), [out_1]"r"(out_1), | 1057 | : [out_0]"r"(out_0), [out_1]"r"(out_1), |
1038 | [ef]"r"((int)this->r.g.echo_feedback), | 1058 | [ef]"r"((int)this->r.g.echo_feedback), |
1039 | [echo_ptr]"a"((int32_t *)echo_ptr), | 1059 | [echo_ptr]"a"((int32_t *)echo_ptr), |
@@ -1056,7 +1076,88 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf ) | |||
1056 | out_buf [ 0] = out_0; | 1076 | out_buf [ 0] = out_0; |
1057 | out_buf [WAV_CHUNK_SIZE] = out_1; | 1077 | out_buf [WAV_CHUNK_SIZE] = out_1; |
1058 | out_buf ++; | 1078 | out_buf ++; |
1059 | #else /* !CPU_COLDFIRE */ | 1079 | #elif defined (CPU_ARM) |
1080 | /* Read feedback from echo buffer */ | ||
1081 | int echo_pos = this->echo_pos; | ||
1082 | uint8_t* const echo_ptr = RAM + | ||
1083 | ((this->r.g.echo_page * 0x100 + echo_pos) & 0xFFFF); | ||
1084 | echo_pos += 4; | ||
1085 | if ( echo_pos >= (this->r.g.echo_delay & 15) * 0x800 ) | ||
1086 | echo_pos = 0; | ||
1087 | this->echo_pos = echo_pos; | ||
1088 | |||
1089 | int fb_0 = GET_LE16SA( echo_ptr ); | ||
1090 | int fb_1 = GET_LE16SA( echo_ptr + 2 ); | ||
1091 | |||
1092 | /* Keep last 8 samples */ | ||
1093 | int32_t *fir_ptr = this->fir_ptr; | ||
1094 | |||
1095 | /* Apply FIR */ | ||
1096 | asm volatile ( | ||
1097 | "str %[fb_0], [%[fir_p]], #4 \r\n" | ||
1098 | "str %[fb_1], [%[fir_p]], #4 \r\n" | ||
1099 | /* duplicate at +8 eliminates wrap checking below */ | ||
1100 | "str %[fb_0], [%[fir_p], #56] \r\n" | ||
1101 | "str %[fb_1], [%[fir_p], #60] \r\n" | ||
1102 | : [fir_p]"+r"(fir_ptr) | ||
1103 | : [fb_0]"r"(fb_0), [fb_1]"r"(fb_1) | ||
1104 | ); | ||
1105 | |||
1106 | this->fir_ptr = (int32_t *)((intptr_t)fir_ptr & FIR_BUF_MASK); | ||
1107 | int32_t *fir_coeff = this->fir_coeff; | ||
1108 | |||
1109 | asm volatile ( | ||
1110 | "ldmia %[fir_c]!, { r0-r1 } \r\n" | ||
1111 | "ldmia %[fir_p]!, { r4-r5 } \r\n" | ||
1112 | "mul %[fb_0], r0, %[fb_0] \r\n" | ||
1113 | "mul %[fb_1], r0, %[fb_1] \r\n" | ||
1114 | "mla %[fb_0], r4, r1, %[fb_0] \r\n" | ||
1115 | "mla %[fb_1], r5, r1, %[fb_1] \r\n" | ||
1116 | "ldmia %[fir_c]!, { r0-r1 } \r\n" | ||
1117 | "ldmia %[fir_p]!, { r2-r5 } \r\n" | ||
1118 | "mla %[fb_0], r2, r0, %[fb_0] \r\n" | ||
1119 | "mla %[fb_1], r3, r0, %[fb_1] \r\n" | ||
1120 | "mla %[fb_0], r4, r1, %[fb_0] \r\n" | ||
1121 | "mla %[fb_1], r5, r1, %[fb_1] \r\n" | ||
1122 | "ldmia %[fir_c]!, { r0-r1 } \r\n" | ||
1123 | "ldmia %[fir_p]!, { r2-r5 } \r\n" | ||
1124 | "mla %[fb_0], r2, r0, %[fb_0] \r\n" | ||
1125 | "mla %[fb_1], r3, r0, %[fb_1] \r\n" | ||
1126 | "mla %[fb_0], r4, r1, %[fb_0] \r\n" | ||
1127 | "mla %[fb_1], r5, r1, %[fb_1] \r\n" | ||
1128 | "ldmia %[fir_c]!, { r0-r1 } \r\n" | ||
1129 | "ldmia %[fir_p]!, { r2-r5 } \r\n" | ||
1130 | "mla %[fb_0], r2, r0, %[fb_0] \r\n" | ||
1131 | "mla %[fb_1], r3, r0, %[fb_1] \r\n" | ||
1132 | "mla %[fb_0], r4, r1, %[fb_0] \r\n" | ||
1133 | "mla %[fb_1], r5, r1, %[fb_1] \r\n" | ||
1134 | : [fb_0]"+r"(fb_0), [fb_1]"+r"(fb_1), | ||
1135 | [fir_p]"+r"(fir_ptr), [fir_c]"+r"(fir_coeff) | ||
1136 | : | ||
1137 | : "r0", "r1", "r2", "r3", "r4", "r5" | ||
1138 | ); | ||
1139 | |||
1140 | /* Generate output */ | ||
1141 | int amp_0 = (chans_0 * global_vol_0 + fb_0 * this->r.g.echo_volume_0) | ||
1142 | >> global_muting; | ||
1143 | int amp_1 = (chans_1 * global_vol_1 + fb_1 * this->r.g.echo_volume_1) | ||
1144 | >> global_muting; | ||
1145 | |||
1146 | out_buf [ 0] = amp_0; | ||
1147 | out_buf [WAV_CHUNK_SIZE] = amp_1; | ||
1148 | out_buf ++; | ||
1149 | |||
1150 | if ( !(this->r.g.flags & 0x20) ) | ||
1151 | { | ||
1152 | /* Feedback into echo buffer */ | ||
1153 | int e0 = (echo_0 >> 7) + ((fb_0 * this->r.g.echo_feedback) >> 14); | ||
1154 | int e1 = (echo_1 >> 7) + ((fb_1 * this->r.g.echo_feedback) >> 14); | ||
1155 | e0 = CLAMP16( e0 ); | ||
1156 | SET_LE16A( echo_ptr , e0 ); | ||
1157 | e1 = CLAMP16( e1 ); | ||
1158 | SET_LE16A( echo_ptr + 2, e1 ); | ||
1159 | } | ||
1160 | #else /* Unoptimized CPU */ | ||
1060 | /* Read feedback from echo buffer */ | 1161 | /* Read feedback from echo buffer */ |
1061 | int echo_pos = this->echo_pos; | 1162 | int echo_pos = this->echo_pos; |
1062 | uint8_t* const echo_ptr = RAM + | 1163 | uint8_t* const echo_ptr = RAM + |
@@ -1102,25 +1203,25 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf ) | |||
1102 | out_buf [WAV_CHUNK_SIZE] = amp_1; | 1203 | out_buf [WAV_CHUNK_SIZE] = amp_1; |
1103 | out_buf ++; | 1204 | out_buf ++; |
1104 | 1205 | ||
1105 | /* Feedback into echo buffer */ | ||
1106 | int e0 = (echo_0 >> 7) + ((fb_0 * this->r.g.echo_feedback) >> 14); | ||
1107 | int e1 = (echo_1 >> 7) + ((fb_1 * this->r.g.echo_feedback) >> 14); | ||
1108 | if ( !(this->r.g.flags & 0x20) ) | 1206 | if ( !(this->r.g.flags & 0x20) ) |
1109 | { | 1207 | { |
1110 | CLAMP16( e0, e0 ); | 1208 | /* Feedback into echo buffer */ |
1209 | int e0 = (echo_0 >> 7) + ((fb_0 * this->r.g.echo_feedback) >> 14); | ||
1210 | int e1 = (echo_1 >> 7) + ((fb_1 * this->r.g.echo_feedback) >> 14); | ||
1211 | e0 = CLAMP16( e0 ); | ||
1111 | SET_LE16A( echo_ptr , e0 ); | 1212 | SET_LE16A( echo_ptr , e0 ); |
1112 | CLAMP16( e1, e1 ); | 1213 | e1 = CLAMP16( e1 ); |
1113 | SET_LE16A( echo_ptr + 2, e1 ); | 1214 | SET_LE16A( echo_ptr + 2, e1 ); |
1114 | } | 1215 | } |
1115 | #endif /* CPU_COLDFIRE */ | 1216 | #endif /* CPU_* */ |
1116 | #else | 1217 | #else /* SPCNOECHO == 1*/ |
1117 | /* Generate output */ | 1218 | /* Generate output */ |
1118 | int amp_0 = (chans_0 * global_vol_0) >> global_muting; | 1219 | int amp_0 = (chans_0 * global_vol_0) >> global_muting; |
1119 | int amp_1 = (chans_1 * global_vol_1) >> global_muting; | 1220 | int amp_1 = (chans_1 * global_vol_1) >> global_muting; |
1120 | out_buf [ 0] = amp_0; | 1221 | out_buf [ 0] = amp_0; |
1121 | out_buf [WAV_CHUNK_SIZE] = amp_1; | 1222 | out_buf [WAV_CHUNK_SIZE] = amp_1; |
1122 | out_buf ++; | 1223 | out_buf ++; |
1123 | #endif | 1224 | #endif /* SPCNOECHO */ |
1124 | } | 1225 | } |
1125 | while ( --count ); | 1226 | while ( --count ); |
1126 | #if 0 | 1227 | #if 0 |
@@ -1155,10 +1256,13 @@ void DSP_reset( struct Spc_Dsp* this ) | |||
1155 | this->wave_entry [i].start_addr = -1; | 1256 | this->wave_entry [i].start_addr = -1; |
1156 | #endif | 1257 | #endif |
1157 | 1258 | ||
1158 | #ifdef CPU_COLDFIRE | 1259 | #if defined(CPU_COLDFIRE) |
1159 | this->fir_ptr = fir_buf; | 1260 | this->fir_ptr = fir_buf; |
1160 | this->last_fir_ptr = &fir_buf [7]; | 1261 | this->last_fir_ptr = &fir_buf [7]; |
1161 | ci->memset( fir_buf, 0, sizeof fir_buf ); | 1262 | ci->memset( fir_buf, 0, sizeof fir_buf ); |
1263 | #elif defined (CPU_ARM) | ||
1264 | this->fir_ptr = fir_buf; | ||
1265 | ci->memset( fir_buf, 0, sizeof fir_buf ); | ||
1162 | #else | 1266 | #else |
1163 | this->fir_pos = 0; | 1267 | this->fir_pos = 0; |
1164 | ci->memset( this->fir_buf, 0, sizeof this->fir_buf ); | 1268 | ci->memset( this->fir_buf, 0, sizeof this->fir_buf ); |