summaryrefslogtreecommitdiff
path: root/lib/rbcodec/codecs/libspc/spc_dsp.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/rbcodec/codecs/libspc/spc_dsp.c')
-rw-r--r--lib/rbcodec/codecs/libspc/spc_dsp.c1594
1 files changed, 1594 insertions, 0 deletions
diff --git a/lib/rbcodec/codecs/libspc/spc_dsp.c b/lib/rbcodec/codecs/libspc/spc_dsp.c
new file mode 100644
index 0000000000..6350c4c331
--- /dev/null
+++ b/lib/rbcodec/codecs/libspc/spc_dsp.c
@@ -0,0 +1,1594 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2007-2008 Michael Sevakis (jhMikeS)
11 * Copyright (C) 2006-2007 Adam Gashlin (hcs)
12 * Copyright (C) 2004-2007 Shay Green (blargg)
13 * Copyright (C) 2002 Brad Martin
14 *
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
19 *
20 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
21 * KIND, either express or implied.
22 *
23 ****************************************************************************/
24
25/* The DSP portion (awe!) */
26#include "codeclib.h"
27#include "spc_codec.h"
28#include "spc_profiler.h"
29
30#if defined(CPU_COLDFIRE) || defined (CPU_ARM)
31int32_t fir_buf[FIR_BUF_CNT] IBSS_ATTR_SPC
32 __attribute__((aligned(FIR_BUF_ALIGN*1)));
33#endif
34#if SPC_BRRCACHE
35/* a little extra for samples that go past end */
36int16_t BRRcache [BRR_CACHE_SIZE] CACHEALIGN_ATTR;
37#endif
38
39void DSP_write( struct Spc_Dsp* this, int i, int data )
40{
41 assert( (unsigned) i < REGISTER_COUNT );
42
43 this->r.reg [i] = data;
44 int high = i >> 4;
45 int low = i & 0x0F;
46 if ( low < 2 ) /* voice volumes */
47 {
48 int left = *(int8_t const*) &this->r.reg [i & ~1];
49 int right = *(int8_t const*) &this->r.reg [i | 1];
50 struct voice_t* v = this->voice_state + high;
51 v->volume [0] = left;
52 v->volume [1] = right;
53 }
54 else if ( low == 0x0F ) /* fir coefficients */
55 {
56 this->fir_coeff [7 - high] = (int8_t) data; /* sign-extend */
57 }
58}
59
60#define CLAMP16( n ) clip_sample_16( n )
61
62#if SPC_BRRCACHE
63static void decode_brr( struct Spc_Dsp* this, unsigned start_addr,
64 struct voice_t* voice,
65 struct raw_voice_t const* const raw_voice ) ICODE_ATTR_SPC;
66static void decode_brr( struct Spc_Dsp* this, unsigned start_addr,
67 struct voice_t* voice,
68 struct raw_voice_t const* const raw_voice )
69{
70 /* setup same variables as where decode_brr() is called from */
71 #undef RAM
72 #define RAM ram.ram
73
74 struct src_dir const* const sd =
75 &ram.sd[this->r.g.wave_page * 0x100/sizeof(struct src_dir)];
76 struct cache_entry_t* const wave_entry =
77 &this->wave_entry [raw_voice->waveform];
78
79 /* the following block can be put in place of the call to
80 decode_brr() below
81 */
82 {
83 DEBUGF( "decode at %08x (wave #%d)\n",
84 start_addr, raw_voice->waveform );
85
86 /* see if in cache */
87 int i;
88 for ( i = 0; i < this->oldsize; i++ )
89 {
90 struct cache_entry_t* e = &this->wave_entry_old [i];
91 if ( e->start_addr == start_addr )
92 {
93 DEBUGF( "found in wave_entry_old (oldsize=%d)\n",
94 this->oldsize );
95 *wave_entry = *e;
96 goto wave_in_cache;
97 }
98 }
99
100 wave_entry->start_addr = start_addr;
101
102 uint8_t const* const loop_ptr =
103 RAM + letoh16(sd[raw_voice->waveform].loop);
104 short* loop_start = 0;
105
106 short* out = BRRcache + start_addr * 2;
107 wave_entry->samples = out;
108 *out++ = 0;
109 int smp1 = 0;
110 int smp2 = 0;
111
112 uint8_t const* addr = RAM + start_addr;
113 int block_header;
114 do
115 {
116 if ( addr == loop_ptr )
117 {
118 loop_start = out;
119 DEBUGF( "loop at %08lx (wave #%d)\n",
120 (unsigned long)(addr - RAM), raw_voice->waveform );
121 }
122
123 /* header */
124 block_header = *addr;
125 addr += 9;
126 voice->addr = addr;
127 int const filter = (block_header & 0x0C) - 0x08;
128
129 /* scaling
130 (invalid scaling gives -4096 for neg nybble, 0 for pos) */
131 static unsigned char const right_shifts [16] = {
132 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 29, 29, 29,
133 };
134 static unsigned char const left_shifts [16] = {
135 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 11, 11
136 };
137 int const scale = block_header >> 4;
138 int const right_shift = right_shifts [scale];
139 int const left_shift = left_shifts [scale];
140
141 /* output position */
142 out += BRR_BLOCK_SIZE;
143 int offset = -BRR_BLOCK_SIZE << 2;
144
145 do /* decode and filter 16 samples */
146 {
147 /* Get nybble, sign-extend, then scale
148 get byte, select which nybble, sign-extend, then shift based
149 on scaling. also handles invalid scaling values. */
150 int delta = (int) (int8_t) (addr [offset >> 3] << (offset & 4))
151 >> right_shift << left_shift;
152
153 out [offset >> 2] = smp2;
154
155 if ( filter == 0 ) /* mode 0x08 (30-90% of the time) */
156 {
157 delta -= smp2 >> 1;
158 delta += smp2 >> 5;
159 smp2 = smp1;
160 delta += smp1;
161 delta += (-smp1 - (smp1 >> 1)) >> 5;
162 }
163 else
164 {
165 if ( filter == -4 ) /* mode 0x04 */
166 {
167 delta += smp1 >> 1;
168 delta += (-smp1) >> 5;
169 }
170 else if ( filter > -4 ) /* mode 0x0C */
171 {
172 delta -= smp2 >> 1;
173 delta += (smp2 + (smp2 >> 1)) >> 4;
174 delta += smp1;
175 delta += (-smp1 * 13) >> 7;
176 }
177 smp2 = smp1;
178 }
179
180 delta = CLAMP16( delta );
181 smp1 = (int16_t) (delta * 2); /* sign-extend */
182 }
183 while ( (offset += 4) != 0 );
184
185 /* if next block has end flag set, this block ends early */
186 /* (verified) */
187 if ( (block_header & 3) != 3 && (*addr & 3) == 1 )
188 {
189 /* skip last 9 samples */
190 out -= 9;
191 goto early_end;
192 }
193 }
194 while ( !(block_header & 1) && addr < RAM + 0x10000 );
195
196 out [0] = smp2;
197 out [1] = smp1;
198
199 early_end:
200 wave_entry->end = (out - 1 - wave_entry->samples) << 12;
201
202 wave_entry->loop = 0;
203 if ( (block_header & 2) )
204 {
205 if ( loop_start )
206 {
207 int loop = out - loop_start;
208 wave_entry->loop = loop;
209 wave_entry->end += 0x3000;
210 out [2] = loop_start [2];
211 out [3] = loop_start [3];
212 out [4] = loop_start [4];
213 }
214 else
215 {
216 DEBUGF( "loop point outside initial wave\n" );
217 }
218 }
219
220 DEBUGF( "end at %08lx (wave #%d)\n",
221 (unsigned long)(addr - RAM), raw_voice->waveform );
222
223 /* add to cache */
224 this->wave_entry_old [this->oldsize++] = *wave_entry;
225wave_in_cache:;
226 }
227}
228#endif
229
230static void key_on(struct Spc_Dsp* const this, struct voice_t* const voice,
231 struct src_dir const* const sd,
232 struct raw_voice_t const* const raw_voice,
233 const int key_on_delay, const int vbit) ICODE_ATTR_SPC;
234static void key_on(struct Spc_Dsp* const this, struct voice_t* const voice,
235 struct src_dir const* const sd,
236 struct raw_voice_t const* const raw_voice,
237 const int key_on_delay, const int vbit) {
238 #undef RAM
239 #define RAM ram.ram
240 int const env_rate_init = 0x7800;
241 voice->key_on_delay = key_on_delay;
242 if ( key_on_delay == 0 )
243 {
244 this->keys_down |= vbit;
245 voice->envx = 0;
246 voice->env_mode = state_attack;
247 voice->env_timer = env_rate_init; /* TODO: inaccurate? */
248 unsigned start_addr = letoh16(sd[raw_voice->waveform].start);
249 #if !SPC_BRRCACHE
250 {
251 voice->addr = RAM + start_addr;
252 /* BRR filter uses previous samples */
253 voice->samples [BRR_BLOCK_SIZE + 1] = 0;
254 voice->samples [BRR_BLOCK_SIZE + 2] = 0;
255 /* decode three samples immediately */
256 voice->position = (BRR_BLOCK_SIZE + 3) * 0x1000 - 1;
257 voice->block_header = 0; /* "previous" BRR header */
258 }
259 #else
260 {
261 voice->position = 3 * 0x1000 - 1;
262 struct cache_entry_t* const wave_entry =
263 &this->wave_entry [raw_voice->waveform];
264
265 /* predecode BRR if not already */
266 if ( wave_entry->start_addr != start_addr )
267 {
268 /* the following line can be replaced by the indicated block
269 in decode_brr() */
270 decode_brr( this, start_addr, voice, raw_voice );
271 }
272
273 voice->samples = wave_entry->samples;
274 voice->wave_end = wave_entry->end;
275 voice->wave_loop = wave_entry->loop;
276 }
277 #endif
278 }
279}
280
281void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
282{
283 #undef RAM
284#if defined(CPU_ARM) && !SPC_BRRCACHE
285 uint8_t* const ram_ = ram.ram;
286 #define RAM ram_
287#else
288 #define RAM ram.ram
289#endif
290#if 0
291 EXIT_TIMER(cpu);
292 ENTER_TIMER(dsp);
293#endif
294
295 /* Here we check for keys on/off. Docs say that successive writes
296 to KON/KOF must be separated by at least 2 Ts periods or risk
297 being neglected. Therefore DSP only looks at these during an
298 update, and not at the time of the write. Only need to do this
299 once however, since the regs haven't changed over the whole
300 period we need to catch up with. */
301
302 {
303 int key_ons = this->r.g.key_ons;
304 int key_offs = this->r.g.key_offs;
305 /* keying on a voice resets that bit in ENDX */
306 this->r.g.wave_ended &= ~key_ons;
307 /* key_off bits prevent key_on from being acknowledged */
308 this->r.g.key_ons = key_ons & key_offs;
309
310 /* process key events outside loop, since they won't re-occur */
311 struct voice_t* voice = this->voice_state + 8;
312 int vbit = 0x80;
313 do
314 {
315 --voice;
316 if ( key_offs & vbit )
317 {
318 voice->env_mode = state_release;
319 voice->key_on_delay = 0;
320 }
321 else if ( key_ons & vbit )
322 {
323 voice->key_on_delay = 8;
324 }
325 }
326 while ( (vbit >>= 1) != 0 );
327 }
328
329 struct src_dir const* const sd =
330 &ram.sd[this->r.g.wave_page * 0x100/sizeof(struct src_dir)];
331
332 #ifdef ROCKBOX_BIG_ENDIAN
333 /* Convert endiannesses before entering loops - these
334 get used alot */
335 const uint32_t rates[VOICE_COUNT] =
336 {
337 GET_LE16A( this->r.voice[0].rate ) & 0x3FFF,
338 GET_LE16A( this->r.voice[1].rate ) & 0x3FFF,
339 GET_LE16A( this->r.voice[2].rate ) & 0x3FFF,
340 GET_LE16A( this->r.voice[3].rate ) & 0x3FFF,
341 GET_LE16A( this->r.voice[4].rate ) & 0x3FFF,
342 GET_LE16A( this->r.voice[5].rate ) & 0x3FFF,
343 GET_LE16A( this->r.voice[6].rate ) & 0x3FFF,
344 GET_LE16A( this->r.voice[7].rate ) & 0x3FFF,
345 };
346 #define VOICE_RATE(x) *(x)
347 #define IF_RBE(...) __VA_ARGS__
348 #ifdef CPU_COLDFIRE
349 /* Initialize mask register with the buffer address mask */
350 asm volatile ("move.l %[m], %%mask" : : [m]"i"(FIR_BUF_MASK));
351 const int echo_wrap = (this->r.g.echo_delay & 15) * 0x800;
352 const int echo_start = this->r.g.echo_page * 0x100;
353 #endif /* CPU_COLDFIRE */
354 #else
355 #define VOICE_RATE(x) (GET_LE16(raw_voice->rate) & 0x3FFF)
356 #define IF_RBE(...)
357 #endif /* ROCKBOX_BIG_ENDIAN */
358
359#if !SPC_NOINTERP
360 int const slow_gaussian = (this->r.g.pitch_mods >> 1) |
361 this->r.g.noise_enables;
362#endif
363 /* (g.flags & 0x40) ? 30 : 14 */
364 int const global_muting = ((this->r.g.flags & 0x40) >> 2) + 14 - 8;
365 int const global_vol_0 = this->r.g.volume_0;
366 int const global_vol_1 = this->r.g.volume_1;
367
368 /* each rate divides exactly into 0x7800 without remainder */
369 int const env_rate_init = 0x7800;
370 static unsigned short const env_rates [0x20] ICONST_ATTR_SPC =
371 {
372 0x0000, 0x000F, 0x0014, 0x0018, 0x001E, 0x0028, 0x0030, 0x003C,
373 0x0050, 0x0060, 0x0078, 0x00A0, 0x00C0, 0x00F0, 0x0140, 0x0180,
374 0x01E0, 0x0280, 0x0300, 0x03C0, 0x0500, 0x0600, 0x0780, 0x0A00,
375 0x0C00, 0x0F00, 0x1400, 0x1800, 0x1E00, 0x2800, 0x3C00, 0x7800
376 };
377
378 do /* one pair of output samples per iteration */
379 {
380 /* Noise */
381 if ( this->r.g.noise_enables )
382 {
383 if ( (this->noise_count -=
384 env_rates [this->r.g.flags & 0x1F]) <= 0 )
385 {
386 this->noise_count = env_rate_init;
387 int feedback = (this->noise << 13) ^ (this->noise << 14);
388 this->noise = (feedback & 0x8000) ^ (this->noise >> 1 & ~1);
389 }
390 }
391
392#if !SPC_NOECHO
393 int echo_0 = 0;
394 int echo_1 = 0;
395#endif
396 long prev_outx = 0; /* TODO: correct value for first channel? */
397 int chans_0 = 0;
398 int chans_1 = 0;
399 /* TODO: put raw_voice pointer in voice_t? */
400 struct raw_voice_t * raw_voice = this->r.voice;
401 struct voice_t* voice = this->voice_state;
402 int vbit = 1;
403 IF_RBE( const uint32_t* vr = rates; )
404 for ( ; vbit < 0x100; vbit <<= 1, ++voice, ++raw_voice IF_RBE( , ++vr ) )
405 {
406 /* pregen involves checking keyon, etc */
407#if 0
408 ENTER_TIMER(dsp_pregen);
409#endif
410
411 /* Key on events are delayed */
412 int key_on_delay = voice->key_on_delay;
413
414 if ( UNLIKELY ( --key_on_delay >= 0 ) ) /* <1% of the time */
415 {
416 key_on(this,voice,sd,raw_voice,key_on_delay,vbit);
417 }
418
419 if ( !(this->keys_down & vbit) ) /* Silent channel */
420 {
421 silent_chan:
422 raw_voice->envx = 0;
423 raw_voice->outx = 0;
424 prev_outx = 0;
425 continue;
426 }
427
428 /* Envelope */
429 {
430 int const ENV_RANGE = 0x800;
431 int env_mode = voice->env_mode;
432 int adsr0 = raw_voice->adsr [0];
433 int env_timer;
434 if ( LIKELY ( env_mode != state_release ) ) /* 99% of the time */
435 {
436 env_timer = voice->env_timer;
437 if ( LIKELY ( adsr0 & 0x80 ) ) /* 79% of the time */
438 {
439 int adsr1 = raw_voice->adsr [1];
440 if ( LIKELY ( env_mode == state_sustain ) ) /* 74% of the time */
441 {
442 if ( (env_timer -= env_rates [adsr1 & 0x1F]) > 0 )
443 goto write_env_timer;
444
445 int envx = voice->envx;
446 envx--; /* envx *= 255 / 256 */
447 envx -= envx >> 8;
448 voice->envx = envx;
449 /* TODO: should this be 8? */
450 raw_voice->envx = envx >> 4;
451 goto init_env_timer;
452 }
453 else if ( env_mode < 0 ) /* 25% state_decay */
454 {
455 int envx = voice->envx;
456 if ( (env_timer -=
457 env_rates [(adsr0 >> 3 & 0x0E) + 0x10]) <= 0 )
458 {
459 envx--; /* envx *= 255 / 256 */
460 envx -= envx >> 8;
461 voice->envx = envx;
462 /* TODO: should this be 8? */
463 raw_voice->envx = envx >> 4;
464 env_timer = env_rate_init;
465 }
466
467 int sustain_level = adsr1 >> 5;
468 if ( envx <= (sustain_level + 1) * 0x100 )
469 voice->env_mode = state_sustain;
470
471 goto write_env_timer;
472 }
473 else /* state_attack */
474 {
475 int t = adsr0 & 0x0F;
476 if ( (env_timer -= env_rates [t * 2 + 1]) > 0 )
477 goto write_env_timer;
478
479 int envx = voice->envx;
480
481 int const step = ENV_RANGE / 64;
482 envx += step;
483 if ( t == 15 )
484 envx += ENV_RANGE / 2 - step;
485
486 if ( envx >= ENV_RANGE )
487 {
488 envx = ENV_RANGE - 1;
489 voice->env_mode = state_decay;
490 }
491 voice->envx = envx;
492 /* TODO: should this be 8? */
493 raw_voice->envx = envx >> 4;
494 goto init_env_timer;
495 }
496 }
497 else /* gain mode */
498 {
499 int t = raw_voice->gain;
500 if ( t < 0x80 )
501 {
502 raw_voice->envx = t;
503 voice->envx = t << 4;
504 goto env_end;
505 }
506 else
507 {
508 if ( (env_timer -= env_rates [t & 0x1F]) > 0 )
509 goto write_env_timer;
510
511 int envx = voice->envx;
512 int mode = t >> 5;
513 if ( mode <= 5 ) /* decay */
514 {
515 int step = ENV_RANGE / 64;
516 if ( mode == 5 ) /* exponential */
517 {
518 envx--; /* envx *= 255 / 256 */
519 step = envx >> 8;
520 }
521 if ( (envx -= step) < 0 )
522 {
523 envx = 0;
524 if ( voice->env_mode == state_attack )
525 voice->env_mode = state_decay;
526 }
527 }
528 else /* attack */
529 {
530 int const step = ENV_RANGE / 64;
531 envx += step;
532 if ( mode == 7 &&
533 envx >= ENV_RANGE * 3 / 4 + step )
534 envx += ENV_RANGE / 256 - step;
535
536 if ( envx >= ENV_RANGE )
537 envx = ENV_RANGE - 1;
538 }
539 voice->envx = envx;
540 /* TODO: should this be 8? */
541 raw_voice->envx = envx >> 4;
542 goto init_env_timer;
543 }
544 }
545 }
546 else /* state_release */
547 {
548 int envx = voice->envx;
549 if ( (envx -= ENV_RANGE / 256) > 0 )
550 {
551 voice->envx = envx;
552 raw_voice->envx = envx >> 8;
553 goto env_end;
554 }
555 else
556 {
557 /* bit was set, so this clears it */
558 this->keys_down ^= vbit;
559 voice->envx = 0;
560 goto silent_chan;
561 }
562 }
563 init_env_timer:
564 env_timer = env_rate_init;
565 write_env_timer:
566 voice->env_timer = env_timer;
567 env_end:;
568 }
569#if 0
570 EXIT_TIMER(dsp_pregen);
571
572 ENTER_TIMER(dsp_gen);
573#endif
574 #if !SPC_BRRCACHE
575 /* Decode BRR block */
576 if ( voice->position >= BRR_BLOCK_SIZE * 0x1000 )
577 {
578 voice->position -= BRR_BLOCK_SIZE * 0x1000;
579
580 uint8_t const* addr = voice->addr;
581 if ( addr >= RAM + 0x10000 )
582 addr -= 0x10000;
583
584 /* action based on previous block's header */
585 if ( voice->block_header & 1 )
586 {
587 addr = RAM + letoh16(sd[raw_voice->waveform].loop);
588 this->r.g.wave_ended |= vbit;
589 if ( !(voice->block_header & 2) ) /* 1% of the time */
590 {
591 /* first block was end block;
592 don't play anything (verified) */
593 /* bit was set, so this clears it */
594 this->keys_down ^= vbit;
595
596 /* since voice->envx is 0,
597 samples and position don't matter */
598 raw_voice->envx = 0;
599 voice->envx = 0;
600 goto skip_decode;
601 }
602 }
603
604 /* header */
605 int const block_header = *addr;
606 addr += 9;
607 voice->addr = addr;
608 voice->block_header = block_header;
609
610 /* previous samples */
611 int smp2 = voice->samples [BRR_BLOCK_SIZE + 1];
612 int smp1 = voice->samples [BRR_BLOCK_SIZE + 2];
613 voice->samples [0] = voice->samples [BRR_BLOCK_SIZE];
614
615 /* output position */
616 short* out = voice->samples + (1 + BRR_BLOCK_SIZE);
617 int offset = -BRR_BLOCK_SIZE << 2;
618
619 /* if next block has end flag set,
620 this block ends early (verified) */
621 if ( (block_header & 3) != 3 && (*addr & 3) == 1 )
622 {
623 /* arrange for last 9 samples to be skipped */
624 int const skip = 9;
625 out += (skip & 1);
626 voice->samples [skip] = voice->samples [BRR_BLOCK_SIZE];
627 voice->position += skip * 0x1000;
628 offset = (-BRR_BLOCK_SIZE + (skip & ~1)) << 2;
629 addr -= skip / 2;
630 /* force sample to end on next decode */
631 voice->block_header = 1;
632 }
633
634 int const filter = block_header & 0x0c;
635 int const scale = block_header >> 4;
636
637 if ( filter == 0x08 ) /* filter 2 (30-90% of the time) */
638 {
639 /* y[n] = x[n] + 61/32 * y[n-1] - 15/16 * y[n-2] */
640 do /* decode and filter 16 samples */
641 {
642 /* Get nybble, sign-extend, then scale
643 get byte, select which nybble, sign-extend, then shift
644 based on scaling. */
645 int delta = (int8_t)(addr [offset >> 3] << (offset & 4)) >> 4;
646 delta = (delta << scale) >> 1;
647
648 if (scale > 0xc)
649 delta = (delta >> 17) << 11;
650
651 out [offset >> 2] = smp2;
652
653 delta -= smp2 >> 1;
654 delta += smp2 >> 5;
655 delta += smp1;
656 delta += (-smp1 - (smp1 >> 1)) >> 5;
657
658 delta = CLAMP16( delta );
659 smp2 = smp1;
660 smp1 = (int16_t) (delta * 2); /* sign-extend */
661 }
662 while ( (offset += 4) != 0 );
663 }
664 else if ( filter == 0x04 ) /* filter 1 */
665 {
666 /* y[n] = x[n] + 15/16 * y[n-1] */
667 do /* decode and filter 16 samples */
668 {
669 /* Get nybble, sign-extend, then scale
670 get byte, select which nybble, sign-extend, then shift
671 based on scaling. */
672 int delta = (int8_t)(addr [offset >> 3] << (offset & 4)) >> 4;
673 delta = (delta << scale) >> 1;
674
675 if (scale > 0xc)
676 delta = (delta >> 17) << 11;
677
678 out [offset >> 2] = smp2;
679
680 delta += smp1 >> 1;
681 delta += (-smp1) >> 5;
682
683 delta = CLAMP16( delta );
684 smp2 = smp1;
685 smp1 = (int16_t) (delta * 2); /* sign-extend */
686 }
687 while ( (offset += 4) != 0 );
688 }
689 else if ( filter == 0x0c ) /* filter 3 */
690 {
691 /* y[n] = x[n] + 115/64 * y[n-1] - 13/16 * y[n-2] */
692 do /* decode and filter 16 samples */
693 {
694 /* Get nybble, sign-extend, then scale
695 get byte, select which nybble, sign-extend, then shift
696 based on scaling. */
697 int delta = (int8_t)(addr [offset >> 3] << (offset & 4)) >> 4;
698 delta = (delta << scale) >> 1;
699
700 if (scale > 0xc)
701 delta = (delta >> 17) << 11;
702
703 out [offset >> 2] = smp2;
704
705 delta -= smp2 >> 1;
706 delta += (smp2 + (smp2 >> 1)) >> 4;
707 delta += smp1;
708 delta += (-smp1 * 13) >> 7;
709
710 delta = CLAMP16( delta );
711 smp2 = smp1;
712 smp1 = (int16_t) (delta * 2); /* sign-extend */
713 }
714 while ( (offset += 4) != 0 );
715 }
716 else /* filter 0 */
717 {
718 /* y[n] = x[n] */
719 do /* decode and filter 16 samples */
720 {
721 /* Get nybble, sign-extend, then scale
722 get byte, select which nybble, sign-extend, then shift
723 based on scaling. */
724 int delta = (int8_t)(addr [offset >> 3] << (offset & 4)) >> 4;
725 delta = (delta << scale) >> 1;
726
727 if (scale > 0xc)
728 delta = (delta >> 17) << 11;
729
730 out [offset >> 2] = smp2;
731
732 smp2 = smp1;
733 smp1 = delta * 2;
734 }
735 while ( (offset += 4) != 0 );
736 }
737
738 out [0] = smp2;
739 out [1] = smp1;
740
741 skip_decode:;
742 }
743 #endif /* !SPC_BRRCACHE */
744 /* Get rate (with possible modulation) */
745 int rate = VOICE_RATE(vr);
746 if ( this->r.g.pitch_mods & vbit )
747 rate = (rate * (prev_outx + 32768)) >> 15;
748
749 #if !SPC_NOINTERP
750 /* Interleved gauss table (to improve cache coherency). */
751 /* gauss [i * 2 + j] = normal_gauss [(1 - j) * 256 + i] */
752 static short const gauss [512] ICONST_ATTR_SPC MEM_ALIGN_ATTR =
753 {
754370,1305, 366,1305, 362,1304, 358,1304, 354,1304, 351,1304, 347,1304, 343,1303,
755339,1303, 336,1303, 332,1302, 328,1302, 325,1301, 321,1300, 318,1300, 314,1299,
756311,1298, 307,1297, 304,1297, 300,1296, 297,1295, 293,1294, 290,1293, 286,1292,
757283,1291, 280,1290, 276,1288, 273,1287, 270,1286, 267,1284, 263,1283, 260,1282,
758257,1280, 254,1279, 251,1277, 248,1275, 245,1274, 242,1272, 239,1270, 236,1269,
759233,1267, 230,1265, 227,1263, 224,1261, 221,1259, 218,1257, 215,1255, 212,1253,
760210,1251, 207,1248, 204,1246, 201,1244, 199,1241, 196,1239, 193,1237, 191,1234,
761188,1232, 186,1229, 183,1227, 180,1224, 178,1221, 175,1219, 173,1216, 171,1213,
762168,1210, 166,1207, 163,1205, 161,1202, 159,1199, 156,1196, 154,1193, 152,1190,
763150,1186, 147,1183, 145,1180, 143,1177, 141,1174, 139,1170, 137,1167, 134,1164,
764132,1160, 130,1157, 128,1153, 126,1150, 124,1146, 122,1143, 120,1139, 118,1136,
765117,1132, 115,1128, 113,1125, 111,1121, 109,1117, 107,1113, 106,1109, 104,1106,
766102,1102, 100,1098, 99,1094, 97,1090, 95,1086, 94,1082, 92,1078, 90,1074,
767 89,1070, 87,1066, 86,1061, 84,1057, 83,1053, 81,1049, 80,1045, 78,1040,
768 77,1036, 76,1032, 74,1027, 73,1023, 71,1019, 70,1014, 69,1010, 67,1005,
769 66,1001, 65, 997, 64, 992, 62, 988, 61, 983, 60, 978, 59, 974, 58, 969,
770 56, 965, 55, 960, 54, 955, 53, 951, 52, 946, 51, 941, 50, 937, 49, 932,
771 48, 927, 47, 923, 46, 918, 45, 913, 44, 908, 43, 904, 42, 899, 41, 894,
772 40, 889, 39, 884, 38, 880, 37, 875, 36, 870, 36, 865, 35, 860, 34, 855,
773 33, 851, 32, 846, 32, 841, 31, 836, 30, 831, 29, 826, 29, 821, 28, 816,
774 27, 811, 27, 806, 26, 802, 25, 797, 24, 792, 24, 787, 23, 782, 23, 777,
775 22, 772, 21, 767, 21, 762, 20, 757, 20, 752, 19, 747, 19, 742, 18, 737,
776 17, 732, 17, 728, 16, 723, 16, 718, 15, 713, 15, 708, 15, 703, 14, 698,
777 14, 693, 13, 688, 13, 683, 12, 678, 12, 674, 11, 669, 11, 664, 11, 659,
778 10, 654, 10, 649, 10, 644, 9, 640, 9, 635, 9, 630, 8, 625, 8, 620,
779 8, 615, 7, 611, 7, 606, 7, 601, 6, 596, 6, 592, 6, 587, 6, 582,
780 5, 577, 5, 573, 5, 568, 5, 563, 4, 559, 4, 554, 4, 550, 4, 545,
781 4, 540, 3, 536, 3, 531, 3, 527, 3, 522, 3, 517, 2, 513, 2, 508,
782 2, 504, 2, 499, 2, 495, 2, 491, 2, 486, 1, 482, 1, 477, 1, 473,
783 1, 469, 1, 464, 1, 460, 1, 456, 1, 451, 1, 447, 1, 443, 1, 439,
784 0, 434, 0, 430, 0, 426, 0, 422, 0, 418, 0, 414, 0, 410, 0, 405,
785 0, 401, 0, 397, 0, 393, 0, 389, 0, 385, 0, 381, 0, 378, 0, 374,
786 };
787 /* Gaussian interpolation using most recent 4 samples */
788 long position = voice->position;
789 voice->position += rate;
790 short const* interp = voice->samples + (position >> 12);
791 int offset = position >> 4 & 0xFF;
792
793 /* Only left half of gaussian kernel is in table, so we must mirror
794 for right half */
795 short const* fwd = gauss + offset * 2;
796 short const* rev = gauss + 510 - offset * 2;
797
798 /* Use faster gaussian interpolation when exact result isn't needed
799 by pitch modulator of next channel */
800 int amp_0, amp_1; /* Also serve as temps _0, and _1 */
801 if ( LIKELY ( !(slow_gaussian & vbit) ) ) /* 99% of the time */
802 {
803 /* Main optimization is lack of clamping. Not a problem since
804 output never goes more than +/- 16 outside 16-bit range and
805 things are clamped later anyway. Other optimization is to
806 preserve fractional accuracy, eliminating several masks. */
807 #if defined (CPU_ARM)
808 int output;
809 int _2, _3; /* All-purpose temps */
810 /* Multiple ASM blocks keep regs free and reduce result
811 * latency issues. */
812 #if ARM_ARCH >= 6
813 /* Interpolate */
814 asm volatile (
815 "ldr %[_0], [%[interp]] \r\n" /* _0=i0i1 */
816 "ldr %[_2], [%[fwd]] \r\n" /* _2=f0f1 */
817 "ldr %[_1], [%[interp], #4] \r\n" /* _1=i2i3 */
818 "ldr %[_3], [%[rev]] \r\n" /* _3=r0r1 */
819 "smuad %[out], %[_0], %[_2] \r\n" /* out=f0*i0 + f1*i1 */
820 "smladx %[out], %[_1], %[_3], %[out] \r\n" /* out+=r1*i2 + r0*i3 */
821 : [out]"=r"(output),
822 [_0]"=&r"(amp_0), [_1]"=&r"(amp_1),
823 [_2]"=&r"(_2), [_3]"=r"(_3)
824 : [fwd]"r"(fwd), [rev]"r"(rev),
825 [interp]"r"(interp));
826 /* Apply voice envelope */
827 asm volatile (
828 "mov %[_2], %[out], asr #(11-5) \r\n" /* To do >> 16 later */
829 "mul %[out], %[_2], %[envx] \r\n" /* and avoid exp. shift */
830 : [out]"+r"(output), [_2]"=&r"(_2)
831 : [envx]"r"((int)voice->envx));
832 /* Apply left and right volume */
833 asm volatile (
834 "smulwb %[amp_0], %[out], %[vvol_0] \r\n" /* (32x16->48)[47:16]->[31:0] */
835 "smulwb %[amp_1], %[out], %[vvol_1] \r\n"
836 : [out]"+r"(output),
837 [amp_0]"=&r"(amp_0), [amp_1]"=r"(amp_1)
838 : [vvol_0]"r"(voice->volume[0]),
839 [vvol_1]"r"(voice->volume[1]));
840
841 raw_voice->outx = output >> (8+5); /* 'output' still 5 bits too big */
842 #else /* ARM_ARCH < 6 */
843 /* Perform gaussian interpolation on four samples */
844 asm volatile (
845 "ldrsh %[_0], [%[interp]] \r\n"
846 "ldrsh %[_2], [%[fwd]] \r\n"
847 "ldrsh %[_1], [%[interp], #2] \r\n"
848 "ldrsh %[_3], [%[fwd], #2] \r\n"
849 "mul %[out], %[_0], %[_2] \r\n" /* out= fwd[0]*interp[0] */
850 "ldrsh %[_0], [%[interp], #4] \r\n"
851 "ldrsh %[_2], [%[rev], #2] \r\n"
852 "mla %[out], %[_1], %[_3], %[out] \r\n" /* out+=fwd[1]*interp[1] */
853 "ldrsh %[_1], [%[interp], #6] \r\n"
854 "ldrsh %[_3], [%[rev]] \r\n"
855 "mla %[out], %[_0], %[_2], %[out] \r\n" /* out+=rev[1]*interp[2] */
856 "mla %[out], %[_1], %[_3], %[out] \r\n" /* out+=rev[0]*interp[3] */
857 : [out]"=&r"(output),
858 [_0]"=&r"(amp_0), [_1]"=&r"(amp_1),
859 [_2]"=&r"(_2), [_3]"=&r"(_3)
860 : [fwd]"r"(fwd), [rev]"r"(rev),
861 [interp]"r"(interp));
862 /* Apply voice envelope */
863 asm volatile (
864 "mov %[_2], %[out], asr #11 \r\n"
865 "mul %[out], %[_2], %[envx] \r\n"
866 : [out]"+r"(output), [_2]"=&r"(_2)
867 : [envx]"r"((int)voice->envx));
868 /* Reduce and apply left and right volume */
869 asm volatile (
870 "mov %[out], %[out], asr #11 \r\n"
871 "mul %[amp_0], %[out], %[vvol_0] \r\n"
872 "mul %[amp_1], %[out], %[vvol_1] \r\n"
873 : [out]"+r"(output),
874 [amp_0]"=&r"(amp_0), [amp_1]"=r"(amp_1)
875 : [vvol_0]"r"((int)voice->volume[0]),
876 [vvol_1]"r"((int)voice->volume[1]));
877
878 raw_voice->outx = output >> 8;
879 #endif /* ARM_ARCH */
880 #else /* Unoptimized CPU */
881 int output = (((fwd [0] * interp [0] +
882 fwd [1] * interp [1] +
883 rev [1] * interp [2] +
884 rev [0] * interp [3] ) >> 11) * voice->envx) >> 11;
885
886 /* duplicated here to give compiler more to run in parallel */
887 amp_0 = voice->volume [0] * output;
888 amp_1 = voice->volume [1] * output;
889
890 raw_voice->outx = output >> 8;
891 #endif /* CPU_* */
892 }
893 else /* slow gaussian */
894 {
895 #if defined(CPU_ARM)
896 #if ARM_ARCH >= 6
897 int output = *(int16_t*) &this->noise;
898
899 if ( !(this->r.g.noise_enables & vbit) )
900 {
901 /* Interpolate */
902 int _2, _3;
903 asm volatile (
904 /* NOTE: often-unaligned accesses */
905 "ldr %[_0], [%[interp]] \r\n" /* _0=i0i1 */
906 "ldr %[_2], [%[fwd]] \r\n" /* _2=f0f1 */
907 "ldr %[_1], [%[interp], #4] \r\n" /* _1=i2i3 */
908 "ldr %[_3], [%[rev]] \r\n" /* _3=f2f3 */
909 "smulbb %[out], %[_0], %[_2] \r\n" /* out=f0*i0 */
910 "smultt %[_0], %[_0], %[_2] \r\n" /* _0=f1*i1 */
911 "smulbt %[_2], %[_1], %[_3] \r\n" /* _2=r1*i2 */
912 "smultb %[_3], %[_1], %[_3] \r\n" /* _3=r0*i3 */
913 : [out]"=r"(output),
914 [_0]"=&r"(amp_0), [_1]"=&r"(amp_1),
915 [_2]"=&r"(_2), [_3]"=r"(_3)
916 : [fwd]"r"(fwd), [rev]"r"(rev),
917 [interp]"r"(interp));
918 asm volatile (
919 "mov %[out], %[out], asr#12 \r\n"
920 "add %[_0], %[out], %[_0], asr #12 \r\n"
921 "add %[_2], %[_0], %[_2], asr #12 \r\n"
922 "pkhbt %[_0], %[_2], %[_3], asl #4 \r\n" /* _3[31:16], _2[15:0] */
923 "sadd16 %[_0], %[_0], %[_0] \r\n" /* _3[31:16]*2, _2[15:0]*2 */
924 "qsubaddx %[out], %[_0], %[_0] \r\n" /* out[15:0]=
925 * sat16(_3[31:16]+_2[15:0]) */
926 : [out]"+r"(output),
927 [_0]"+r"(amp_0), [_2]"+r"(_2), [_3]"+r"(_3));
928 }
929 /* Apply voice envelope */
930 asm volatile (
931 "smulbb %[out], %[out], %[envx] \r\n"
932 : [out]"+r"(output)
933 : [envx]"r"(voice->envx));
934 /* Reduce and apply left and right volume */
935 asm volatile (
936 "mov %[out], %[out], asr #11 \r\n"
937 "bic %[out], %[out], #0x1 \r\n"
938 "mul %[amp_0], %[out], %[vvol_0] \r\n"
939 "mul %[amp_1], %[out], %[vvol_1] \r\n"
940 : [out]"+r"(output),
941 [amp_0]"=&r"(amp_0), [amp_1]"=r"(amp_1)
942 : [vvol_0]"r"((int)voice->volume[0]),
943 [vvol_1]"r"((int)voice->volume[1]));
944
945 prev_outx = output;
946 raw_voice->outx = output >> 8;
947 #else /* ARM_ARCH < 6 */
948 int output = *(int16_t*) &this->noise;
949
950 if ( !(this->r.g.noise_enables & vbit) )
951 {
952 /* Interpolate */
953 int _2, _3;
954 asm volatile (
955 "ldrsh %[_0], [%[interp]] \r\n"
956 "ldrsh %[_2], [%[fwd]] \r\n"
957 "ldrsh %[_1], [%[interp], #2] \r\n"
958 "ldrsh %[_3], [%[fwd], #2] \r\n"
959 "mul %[out], %[_2], %[_0] \r\n" /* fwd[0]*interp[0] */
960 "ldrsh %[_2], [%[rev], #2] \r\n"
961 "mul %[_0], %[_3], %[_1] \r\n" /* fwd[1]*interp[1] */
962 "ldrsh %[_1], [%[interp], #4] \r\n"
963 "mov %[out], %[out], asr #12 \r\n"
964 "ldrsh %[_3], [%[rev]] \r\n"
965 "mul %[_2], %[_1], %[_2] \r\n" /* rev[1]*interp[2] */
966 "ldrsh %[_1], [%[interp], #6] \r\n"
967 "add %[_0], %[out], %[_0], asr #12 \r\n"
968 "mul %[_3], %[_1], %[_3] \r\n" /* rev[0]*interp[3] */
969 "add %[_2], %[_0], %[_2], asr #12 \r\n"
970 "mov %[_2], %[_2], lsl #17 \r\n"
971 "mov %[_3], %[_3], asr #12 \r\n"
972 "mov %[_3], %[_3], asl #1 \r\n"
973 "add %[out], %[_3], %[_2], asr #16 \r\n"
974 : [out]"=&r"(output),
975 [_0]"=&r"(amp_0), [_1]"=&r"(amp_1),
976 [_2]"=&r"(_2), [_3]"=&r"(_3)
977 : [fwd]"r"(fwd), [rev]"r"(rev),
978 [interp]"r"(interp));
979
980 output = CLAMP16(output);
981 }
982 /* Apply voice envelope */
983 asm volatile (
984 "mul %[_0], %[out], %[envx] \r\n"
985 : [_0]"=r"(amp_0)
986 : [out]"r"(output), [envx]"r"((int)voice->envx));
987 /* Reduce and apply left and right volume */
988 asm volatile (
989 "mov %[out], %[amp_0], asr #11 \r\n" /* amp_0 = _0 */
990 "bic %[out], %[out], #0x1 \r\n"
991 "mul %[amp_0], %[out], %[vvol_0] \r\n"
992 "mul %[amp_1], %[out], %[vvol_1] \r\n"
993 : [out]"+r"(output),
994 [amp_0]"+r"(amp_0), [amp_1]"=r"(amp_1)
995 : [vvol_0]"r"((int)voice->volume[0]),
996 [vvol_1]"r"((int)voice->volume[1]));
997
998 prev_outx = output;
999 raw_voice->outx = output >> 8;
1000 #endif /* ARM_ARCH >= 6 */
1001 #else /* Unoptimized CPU */
1002 int output = *(int16_t*) &this->noise;
1003
1004 if ( !(this->r.g.noise_enables & vbit) )
1005 {
1006 output = (fwd [0] * interp [0]) & ~0xFFF;
1007 output = (output + fwd [1] * interp [1]) & ~0xFFF;
1008 output = (output + rev [1] * interp [2]) >> 12;
1009 output = (int16_t) (output * 2);
1010 output += ((rev [0] * interp [3]) >> 12) * 2;
1011 output = CLAMP16( output );
1012 }
1013 output = (output * voice->envx) >> 11 & ~1;
1014
1015 /* duplicated here to give compiler more to run in parallel */
1016 amp_0 = voice->volume [0] * output;
1017 amp_1 = voice->volume [1] * output;
1018
1019 prev_outx = output;
1020 raw_voice->outx = output >> 8;
1021 #endif /* CPU_* */
1022 }
1023 #else /* SPCNOINTERP */
1024 /* two-point linear interpolation */
1025 #ifdef CPU_COLDFIRE
1026 int amp_0 = (int16_t)this->noise;
1027 int amp_1;
1028
1029 if ( (this->r.g.noise_enables & vbit) == 0 )
1030 {
1031 uint32_t f = voice->position;
1032 int32_t y0;
1033
1034 /**
1035 * Formula (fastest found so far of MANY):
1036 * output = y0 + f*y1 - f*y0
1037 */
1038 asm volatile (
1039 /* separate fractional and whole parts */
1040 "move.l %[f], %[y1] \r\n"
1041 "and.l #0xfff, %[f] \r\n"
1042 "lsr.l %[sh], %[y1] \r\n"
1043 /* load samples y0 (upper) & y1 (lower) */
1044 "move.l 2(%[s], %[y1].l*2), %[y1] \r\n"
1045 /* %acc0 = f*y1 */
1046 "mac.w %[f]l, %[y1]l, %%acc0 \r\n"
1047 /* %acc0 -= f*y0 */
1048 "msac.w %[f]l, %[y1]u, %%acc0 \r\n"
1049 /* separate out y0 and sign extend */
1050 "swap %[y1] \r\n"
1051 "movea.w %[y1], %[y0] \r\n"
1052 /* fetch result, scale down and add y0 */
1053 "movclr.l %%acc0, %[y1] \r\n"
1054 /* output = y0 + (result >> 12) */
1055 "asr.l %[sh], %[y1] \r\n"
1056 "add.l %[y0], %[y1] \r\n"
1057 : [f]"+d"(f), [y0]"=&a"(y0), [y1]"=&d"(amp_0)
1058 : [s]"a"(voice->samples), [sh]"d"(12));
1059 }
1060
1061 /* apply voice envelope to output */
1062 asm volatile (
1063 "mac.w %[out]l, %[envx]l, %%acc0 \r\n"
1064 :
1065 : [out]"r"(amp_0), [envx]"r"(voice->envx));
1066
1067 /* advance voice position */
1068 voice->position += rate;
1069
1070 /* fetch output, scale and apply left and right
1071 voice volume */
1072 asm volatile (
1073 "movclr.l %%acc0, %[out] \r\n"
1074 "asr.l %[sh], %[out] \r\n"
1075 "mac.l %[vvol_0], %[out], %%acc0 \r\n"
1076 "mac.l %[vvol_1], %[out], %%acc1 \r\n"
1077 : [out]"=&d"(amp_0)
1078 : [vvol_0]"r"((int)voice->volume[0]),
1079 [vvol_1]"r"((int)voice->volume[1]),
1080 [sh]"d"(11));
1081
1082 /* save this output into previous, scale and save in
1083 output register */
1084 prev_outx = amp_0;
1085 raw_voice->outx = amp_0 >> 8;
1086
1087 /* fetch final voice output */
1088 asm volatile (
1089 "movclr.l %%acc0, %[amp_0] \r\n"
1090 "movclr.l %%acc1, %[amp_1] \r\n"
1091 : [amp_0]"=r"(amp_0), [amp_1]"=r"(amp_1));
1092 #elif defined (CPU_ARM)
1093 int amp_0, amp_1;
1094
1095 if ( (this->r.g.noise_enables & vbit) != 0 )
1096 {
1097 amp_0 = *(int16_t *)&this->noise;
1098 }
1099 else
1100 {
1101 uint32_t f = voice->position;
1102 amp_0 = (uint32_t)voice->samples;
1103
1104 asm volatile(
1105 "mov %[y1], %[f], lsr #12 \r\n"
1106 "eor %[f], %[f], %[y1], lsl #12 \r\n"
1107 "add %[y1], %[y0], %[y1], lsl #1 \r\n"
1108 "ldrsh %[y0], [%[y1], #2] \r\n"
1109 "ldrsh %[y1], [%[y1], #4] \r\n"
1110 "sub %[y1], %[y1], %[y0] \r\n"
1111 "mul %[f], %[y1], %[f] \r\n"
1112 "add %[y0], %[y0], %[f], asr #12 \r\n"
1113 : [f]"+r"(f), [y0]"+r"(amp_0), [y1]"=&r"(amp_1));
1114 }
1115
1116 voice->position += rate;
1117
1118 asm volatile(
1119 "mul %[amp_1], %[amp_0], %[envx] \r\n"
1120 "mov %[amp_0], %[amp_1], asr #11 \r\n"
1121 "mov %[amp_1], %[amp_0], asr #8 \r\n"
1122 : [amp_0]"+r"(amp_0), [amp_1]"=r"(amp_1)
1123 : [envx]"r"(voice->envx));
1124
1125 prev_outx = amp_0;
1126 raw_voice->outx = (int8_t)amp_1;
1127
1128 asm volatile(
1129 "mul %[amp_1], %[amp_0], %[vol_1] \r\n"
1130 "mul %[amp_0], %[vol_0], %[amp_0] \r\n"
1131 : [amp_0]"+r"(amp_0), [amp_1]"=&r"(amp_1)
1132 : [vol_0]"r"((int)voice->volume[0]),
1133 [vol_1]"r"((int)voice->volume[1]));
1134 #else /* Unoptimized CPU */
1135 int output;
1136
1137 if ( (this->r.g.noise_enables & vbit) == 0 )
1138 {
1139 int const fraction = voice->position & 0xfff;
1140 short const* const pos = (voice->samples + (voice->position >> 12)) + 1;
1141 output = pos[0] + ((fraction * (pos[1] - pos[0])) >> 12);
1142 } else {
1143 output = *(int16_t *)&this->noise;
1144 }
1145
1146 voice->position += rate;
1147
1148 output = (output * voice->envx) >> 11;
1149
1150 /* duplicated here to give compiler more to run in parallel */
1151 int amp_0 = voice->volume [0] * output;
1152 int amp_1 = voice->volume [1] * output;
1153
1154 prev_outx = output;
1155 raw_voice->outx = (int8_t) (output >> 8);
1156 #endif /* CPU_* */
1157 #endif /* SPCNOINTERP */
1158
1159 #if SPC_BRRCACHE
1160 if ( voice->position >= voice->wave_end )
1161 {
1162 long loop_len = voice->wave_loop << 12;
1163 voice->position -= loop_len;
1164 this->r.g.wave_ended |= vbit;
1165 if ( !loop_len )
1166 {
1167 this->keys_down ^= vbit;
1168 raw_voice->envx = 0;
1169 voice->envx = 0;
1170 }
1171 }
1172 #endif
1173#if 0
1174 EXIT_TIMER(dsp_gen);
1175
1176 ENTER_TIMER(dsp_mix);
1177#endif
1178 chans_0 += amp_0;
1179 chans_1 += amp_1;
1180 #if !SPC_NOECHO
1181 if ( this->r.g.echo_ons & vbit )
1182 {
1183 echo_0 += amp_0;
1184 echo_1 += amp_1;
1185 }
1186 #endif
1187#if 0
1188 EXIT_TIMER(dsp_mix);
1189#endif
1190 }
1191 /* end of voice loop */
1192
1193 #if !SPC_NOECHO
1194 #ifdef CPU_COLDFIRE
1195 /* Read feedback from echo buffer */
1196 int echo_pos = this->echo_pos;
1197 uint8_t* const echo_ptr = RAM + ((echo_start + echo_pos) & 0xFFFF);
1198 echo_pos += 4;
1199 if ( echo_pos >= echo_wrap )
1200 echo_pos = 0;
1201 this->echo_pos = echo_pos;
1202 int fb = swap_odd_even32(*(int32_t *)echo_ptr);
1203 int out_0, out_1;
1204
1205 /* Keep last 8 samples */
1206 *this->last_fir_ptr = fb;
1207 this->last_fir_ptr = this->fir_ptr;
1208
1209 /* Apply echo FIR filter to output samples read from echo buffer -
1210 circular buffer is hardware incremented and masked; FIR
1211 coefficients and buffer history are loaded in parallel with
1212 multiply accumulate operations. Shift left by one here and once
1213 again when calculating feedback to have sample values justified
1214 to bit 31 in the output to ease endian swap, interleaving and
1215 clamping before placing result in the program's echo buffer. */
1216 int _0, _1, _2;
1217 asm volatile (
1218 "move.l (%[fir_c]) , %[_2] \r\n"
1219 "mac.w %[fb]u, %[_2]u, <<, (%[fir_p])+&, %[_0], %%acc0 \r\n"
1220 "mac.w %[fb]l, %[_2]u, <<, (%[fir_p])& , %[_1], %%acc1 \r\n"
1221 "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
1222 "mac.w %[_0]l, %[_2]l, <<, 4(%[fir_c]) , %[_2], %%acc1 \r\n"
1223 "mac.w %[_1]u, %[_2]u, <<, 4(%[fir_p])& , %[_0], %%acc0 \r\n"
1224 "mac.w %[_1]l, %[_2]u, <<, 8(%[fir_p])& , %[_1], %%acc1 \r\n"
1225 "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
1226 "mac.w %[_0]l, %[_2]l, <<, 8(%[fir_c]) , %[_2], %%acc1 \r\n"
1227 "mac.w %[_1]u, %[_2]u, <<, 12(%[fir_p])& , %[_0], %%acc0 \r\n"
1228 "mac.w %[_1]l, %[_2]u, <<, 16(%[fir_p])& , %[_1], %%acc1 \r\n"
1229 "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
1230 "mac.w %[_0]l, %[_2]l, <<, 12(%[fir_c]) , %[_2], %%acc1 \r\n"
1231 "mac.w %[_1]u, %[_2]u, <<, 20(%[fir_p])& , %[_0], %%acc0 \r\n"
1232 "mac.w %[_1]l, %[_2]u, << , %%acc1 \r\n"
1233 "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
1234 "mac.w %[_0]l, %[_2]l, << , %%acc1 \r\n"
1235 : [_0]"=&r"(_0), [_1]"=&r"(_1), [_2]"=&r"(_2),
1236 [fir_p]"+a"(this->fir_ptr)
1237 : [fir_c]"a"(this->fir_coeff), [fb]"r"(fb)
1238 );
1239
1240 /* Generate output */
1241 asm volatile (
1242 /* fetch filter results _after_ gcc loads asm
1243 block parameters to eliminate emac stalls */
1244 "movclr.l %%acc0, %[out_0] \r\n"
1245 "movclr.l %%acc1, %[out_1] \r\n"
1246 /* apply global volume */
1247 "mac.l %[chans_0], %[gv_0] , %%acc2 \r\n"
1248 "mac.l %[chans_1], %[gv_1] , %%acc3 \r\n"
1249 /* apply echo volume and add to final output */
1250 "mac.l %[ev_0], %[out_0], >>, %%acc2 \r\n"
1251 "mac.l %[ev_1], %[out_1], >>, %%acc3 \r\n"
1252 : [out_0]"=&r"(out_0), [out_1]"=&r"(out_1)
1253 : [chans_0]"r"(chans_0), [gv_0]"r"(global_vol_0),
1254 [ev_0]"r"((int)this->r.g.echo_volume_0),
1255 [chans_1]"r"(chans_1), [gv_1]"r"(global_vol_1),
1256 [ev_1]"r"((int)this->r.g.echo_volume_1)
1257 );
1258
1259 /* Feedback into echo buffer */
1260 if ( !(this->r.g.flags & 0x20) )
1261 {
1262 int sh = 1 << 9;
1263
1264 asm volatile (
1265 /* scale echo voices; saturate if overflow */
1266 "mac.l %[sh], %[e1] , %%acc1 \r\n"
1267 "mac.l %[sh], %[e0] , %%acc0 \r\n"
1268 /* add scaled output from FIR filter */
1269 "mac.l %[out_1], %[ef], <<, %%acc1 \r\n"
1270 "mac.l %[out_0], %[ef], <<, %%acc0 \r\n"
1271 /* swap and fetch feedback results - simply
1272 swap_odd_even32 mixed in between macs and
1273 movclrs to mitigate stall issues */
1274 "move.l #0x00ff00ff, %[sh] \r\n"
1275 "movclr.l %%acc1, %[e1] \r\n"
1276 "swap %[e1] \r\n"
1277 "movclr.l %%acc0, %[e0] \r\n"
1278 "move.w %[e1], %[e0] \r\n"
1279 "and.l %[e0], %[sh] \r\n"
1280 "eor.l %[sh], %[e0] \r\n"
1281 "lsl.l #8, %[sh] \r\n"
1282 "lsr.l #8, %[e0] \r\n"
1283 "or.l %[sh], %[e0] \r\n"
1284 /* save final feedback into echo buffer */
1285 "move.l %[e0], (%[echo_ptr]) \r\n"
1286 : [e0]"+d"(echo_0), [e1]"+d"(echo_1), [sh]"+d"(sh)
1287 : [out_0]"r"(out_0), [out_1]"r"(out_1),
1288 [ef]"r"((int)this->r.g.echo_feedback),
1289 [echo_ptr]"a"((int32_t *)echo_ptr)
1290 );
1291 }
1292
1293 /* Output final samples */
1294 asm volatile (
1295 /* fetch output saved in %acc2 and %acc3 */
1296 "movclr.l %%acc2, %[out_0] \r\n"
1297 "movclr.l %%acc3, %[out_1] \r\n"
1298 /* scale right by global_muting shift */
1299 "asr.l %[gm], %[out_0] \r\n"
1300 "asr.l %[gm], %[out_1] \r\n"
1301 : [out_0]"=&d"(out_0), [out_1]"=&d"(out_1)
1302 : [gm]"d"(global_muting)
1303 );
1304
1305 out_buf [ 0] = out_0;
1306 out_buf [WAV_CHUNK_SIZE] = out_1;
1307 out_buf ++;
1308 #elif defined (CPU_ARM)
1309 /* Read feedback from echo buffer */
1310 int echo_pos = this->echo_pos;
1311 uint8_t* const echo_ptr = RAM +
1312 ((this->r.g.echo_page * 0x100 + echo_pos) & 0xFFFF);
1313 echo_pos += 4;
1314 if ( echo_pos >= (this->r.g.echo_delay & 15) * 0x800 )
1315 echo_pos = 0;
1316 this->echo_pos = echo_pos;
1317
1318 #if ARM_ARCH >= 6
1319 int32_t *fir_ptr, *fir_coeff;
1320 int fb_0, fb_1;
1321
1322 /* Apply FIR */
1323
1324 /* Keep last 8 samples */
1325 asm volatile (
1326 "ldr %[fb_0], [%[echo_p]] \r\n"
1327 "add %[fir_p], %[t_fir_p], #4 \r\n"
1328 "bic %[t_fir_p], %[fir_p], %[mask] \r\n"
1329 "str %[fb_0], [%[fir_p], #-4] \r\n"
1330 /* duplicate at +8 eliminates wrap checking below */
1331 "str %[fb_0], [%[fir_p], #28] \r\n"
1332 : [fir_p]"=&r"(fir_ptr), [t_fir_p]"+r"(this->fir_ptr),
1333 [fb_0]"=&r"(fb_0)
1334 : [echo_p]"r"(echo_ptr), [mask]"i"(~FIR_BUF_MASK));
1335
1336 fir_coeff = (int32_t *)this->fir_coeff;
1337
1338 /* Fugly, but the best version found. */
1339 int _0;
1340 asm volatile ( /* L0R0 = acc0 */
1341 "ldmia %[fir_p]!, { r2-r5 } \r\n" /* L1R1-L4R4 = r2-r5 */
1342 "ldmia %[fir_c]!, { r0-r1 } \r\n" /* C0C1-C2C3 = r0-r1 */
1343 "pkhbt %[_0], %[acc0], r2, asl #16 \r\n" /* L0R0,L1R1->L0L1,R0R1 */
1344 "pkhtb r2, r2, %[acc0], asr #16 \r\n"
1345 "smuad %[acc0], %[_0], r0 \r\n" /* acc0=L0*C0+L1*C1 */
1346 "smuad %[acc1], r2, r0 \r\n" /* acc1=R0*C0+R1*C1 */
1347 "pkhbt %[_0], r3, r4, asl #16 \r\n" /* L2R2,L3R3->L2L3,R2R3 */
1348 "pkhtb r4, r4, r3, asr #16 \r\n"
1349 "smlad %[acc0], %[_0], r1, %[acc0] \r\n" /* acc0+=L2*C2+L3*C3 */
1350 "smlad %[acc1], r4, r1, %[acc1] \r\n" /* acc1+=R2*C2+R3*C3 */
1351 "ldmia %[fir_p], { r2-r4 } \r\n" /* L5R5-L7R7 = r2-r4 */
1352 "ldmia %[fir_c], { r0-r1 } \r\n" /* C4C5-C6C7 = r0-r1 */
1353 "pkhbt %[_0], r5, r2, asl #16 \r\n" /* L4R4,L5R5->L4L5,R4R5 */
1354 "pkhtb r2, r2, r5, asr #16 \r\n"
1355 "smlad %[acc0], %[_0], r0, %[acc0] \r\n" /* acc0+=L4*C4+L5*C5 */
1356 "smlad %[acc1], r2, r0, %[acc1] \r\n" /* acc1+=R4*C4+R5*C5 */
1357 "pkhbt %[_0], r3, r4, asl #16 \r\n" /* L6R6,L7R7->L6L7,R6R7 */
1358 "pkhtb r4, r4, r3, asr #16 \r\n"
1359 "smlad %[acc0], %[_0], r1, %[acc0] \r\n" /* acc0+=L6*C6+L7*C7 */
1360 "smlad %[acc1], r4, r1, %[acc1] \r\n" /* acc1+=R6*C6+R7*C7 */
1361 : [acc0]"+r"(fb_0), [acc1]"=&r"(fb_1), [_0]"=&r"(_0),
1362 [fir_p]"+r"(fir_ptr), [fir_c]"+r"(fir_coeff)
1363 :
1364 : "r0", "r1", "r2", "r3", "r4", "r5");
1365
1366 /* Generate output */
1367 int amp_0, amp_1;
1368
1369 asm volatile (
1370 "mul %[amp_0], %[gvol_0], %[chans_0] \r\n"
1371 "mul %[amp_1], %[gvol_1], %[chans_1] \r\n"
1372 : [amp_0]"=&r"(amp_0), [amp_1]"=r"(amp_1)
1373 : [gvol_0]"r"(global_vol_0), [gvol_1]"r"(global_vol_1),
1374 [chans_0]"r"(chans_0), [chans_1]"r"(chans_1));
1375 asm volatile (
1376 "mla %[amp_0], %[fb_0], %[ev_0], %[amp_0] \r\n"
1377 "mla %[amp_1], %[fb_1], %[ev_1], %[amp_1] \r\n"
1378 : [amp_0]"+r"(amp_0), [amp_1]"+r"(amp_1)
1379 : [fb_0]"r"(fb_0), [fb_1]"r"(fb_1),
1380 [ev_0]"r"((int)this->r.g.echo_volume_0),
1381 [ev_1]"r"((int)this->r.g.echo_volume_1));
1382
1383 out_buf [ 0] = amp_0 >> global_muting;
1384 out_buf [WAV_CHUNK_SIZE] = amp_1 >> global_muting;
1385 out_buf ++;
1386
1387 if ( !(this->r.g.flags & 0x20) )
1388 {
1389 /* Feedback into echo buffer */
1390 int e0, e1;
1391
1392 asm volatile (
1393 "mov %[e0], %[echo_0], asl #7 \r\n"
1394 "mov %[e1], %[echo_1], asl #7 \r\n"
1395 "mla %[e0], %[fb_0], %[efb], %[e0] \r\n"
1396 "mla %[e1], %[fb_1], %[efb], %[e1] \r\n"
1397 : [e0]"=&r"(e0), [e1]"=&r"(e1)
1398 : [echo_0]"r"(echo_0), [echo_1]"r"(echo_1),
1399 [fb_0]"r"(fb_0), [fb_1]"r"(fb_1),
1400 [efb]"r"((int)this->r.g.echo_feedback));
1401 asm volatile (
1402 "ssat %[e0], #16, %[e0], asr #14 \r\n"
1403 "ssat %[e1], #16, %[e1], asr #14 \r\n"
1404 "pkhbt %[e0], %[e0], %[e1], lsl #16 \r\n"
1405 "str %[e0], [%[echo_p]] \r\n"
1406 : [e0]"+r"(e0), [e1]"+r"(e1)
1407 : [echo_p]"r"(echo_ptr));
1408 }
1409 #else /* ARM_ARCH < 6 */
1410 int fb_0 = GET_LE16SA( echo_ptr );
1411 int fb_1 = GET_LE16SA( echo_ptr + 2 );
1412 int32_t *fir_ptr, *fir_coeff;
1413
1414 /* Keep last 8 samples */
1415
1416 /* Apply FIR */
1417 asm volatile (
1418 "add %[fir_p], %[t_fir_p], #8 \r\n"
1419 "bic %[t_fir_p], %[fir_p], %[mask] \r\n"
1420 "str %[fb_0], [%[fir_p], #-8] \r\n"
1421 "str %[fb_1], [%[fir_p], #-4] \r\n"
1422 /* duplicate at +8 eliminates wrap checking below */
1423 "str %[fb_0], [%[fir_p], #56] \r\n"
1424 "str %[fb_1], [%[fir_p], #60] \r\n"
1425 : [fir_p]"=&r"(fir_ptr), [t_fir_p]"+r"(this->fir_ptr)
1426 : [fb_0]"r"(fb_0), [fb_1]"r"(fb_1), [mask]"i"(~FIR_BUF_MASK));
1427
1428 fir_coeff = this->fir_coeff;
1429
1430 asm volatile (
1431 "ldmia %[fir_c]!, { r0-r1 } \r\n"
1432 "ldmia %[fir_p]!, { r4-r5 } \r\n"
1433 "mul %[fb_0], r0, %[fb_0] \r\n"
1434 "mul %[fb_1], r0, %[fb_1] \r\n"
1435 "mla %[fb_0], r4, r1, %[fb_0] \r\n"
1436 "mla %[fb_1], r5, r1, %[fb_1] \r\n"
1437 "ldmia %[fir_c]!, { r0-r1 } \r\n"
1438 "ldmia %[fir_p]!, { r2-r5 } \r\n"
1439 "mla %[fb_0], r2, r0, %[fb_0] \r\n"
1440 "mla %[fb_1], r3, r0, %[fb_1] \r\n"
1441 "mla %[fb_0], r4, r1, %[fb_0] \r\n"
1442 "mla %[fb_1], r5, r1, %[fb_1] \r\n"
1443 "ldmia %[fir_c]!, { r0-r1 } \r\n"
1444 "ldmia %[fir_p]!, { r2-r5 } \r\n"
1445 "mla %[fb_0], r2, r0, %[fb_0] \r\n"
1446 "mla %[fb_1], r3, r0, %[fb_1] \r\n"
1447 "mla %[fb_0], r4, r1, %[fb_0] \r\n"
1448 "mla %[fb_1], r5, r1, %[fb_1] \r\n"
1449 "ldmia %[fir_c]!, { r0-r1 } \r\n"
1450 "ldmia %[fir_p]!, { r2-r5 } \r\n"
1451 "mla %[fb_0], r2, r0, %[fb_0] \r\n"
1452 "mla %[fb_1], r3, r0, %[fb_1] \r\n"
1453 "mla %[fb_0], r4, r1, %[fb_0] \r\n"
1454 "mla %[fb_1], r5, r1, %[fb_1] \r\n"
1455 : [fb_0]"+r"(fb_0), [fb_1]"+r"(fb_1),
1456 [fir_p]"+r"(fir_ptr), [fir_c]"+r"(fir_coeff)
1457 :
1458 : "r0", "r1", "r2", "r3", "r4", "r5");
1459
1460 /* Generate output */
1461 int amp_0 = (chans_0 * global_vol_0 + fb_0 * this->r.g.echo_volume_0)
1462 >> global_muting;
1463 int amp_1 = (chans_1 * global_vol_1 + fb_1 * this->r.g.echo_volume_1)
1464 >> global_muting;
1465
1466 out_buf [ 0] = amp_0;
1467 out_buf [WAV_CHUNK_SIZE] = amp_1;
1468 out_buf ++;
1469
1470 if ( !(this->r.g.flags & 0x20) )
1471 {
1472 /* Feedback into echo buffer */
1473 int e0 = (echo_0 >> 7) + ((fb_0 * this->r.g.echo_feedback) >> 14);
1474 int e1 = (echo_1 >> 7) + ((fb_1 * this->r.g.echo_feedback) >> 14);
1475 e0 = CLAMP16( e0 );
1476 SET_LE16A( echo_ptr , e0 );
1477 e1 = CLAMP16( e1 );
1478 SET_LE16A( echo_ptr + 2, e1 );
1479 }
1480 #endif /* ARM_ARCH */
1481 #else /* Unoptimized CPU */
1482 /* Read feedback from echo buffer */
1483 int echo_pos = this->echo_pos;
1484 uint8_t* const echo_ptr = RAM +
1485 ((this->r.g.echo_page * 0x100 + echo_pos) & 0xFFFF);
1486 echo_pos += 4;
1487 if ( echo_pos >= (this->r.g.echo_delay & 15) * 0x800 )
1488 echo_pos = 0;
1489 this->echo_pos = echo_pos;
1490 int fb_0 = GET_LE16SA( echo_ptr );
1491 int fb_1 = GET_LE16SA( echo_ptr + 2 );
1492
1493 /* Keep last 8 samples */
1494 int (* const fir_ptr) [2] = this->fir_buf + this->fir_pos;
1495 this->fir_pos = (this->fir_pos + 1) & (FIR_BUF_HALF - 1);
1496 fir_ptr [ 0] [0] = fb_0;
1497 fir_ptr [ 0] [1] = fb_1;
1498 /* duplicate at +8 eliminates wrap checking below */
1499 fir_ptr [FIR_BUF_HALF] [0] = fb_0;
1500 fir_ptr [FIR_BUF_HALF] [1] = fb_1;
1501
1502 /* Apply FIR */
1503 fb_0 *= this->fir_coeff [0];
1504 fb_1 *= this->fir_coeff [0];
1505
1506 #define DO_PT( i )\
1507 fb_0 += fir_ptr [i] [0] * this->fir_coeff [i];\
1508 fb_1 += fir_ptr [i] [1] * this->fir_coeff [i];
1509
1510 DO_PT( 1 )
1511 DO_PT( 2 )
1512 DO_PT( 3 )
1513 DO_PT( 4 )
1514 DO_PT( 5 )
1515 DO_PT( 6 )
1516 DO_PT( 7 )
1517
1518 /* Generate output */
1519 int amp_0 = (chans_0 * global_vol_0 + fb_0 * this->r.g.echo_volume_0)
1520 >> global_muting;
1521 int amp_1 = (chans_1 * global_vol_1 + fb_1 * this->r.g.echo_volume_1)
1522 >> global_muting;
1523 out_buf [ 0] = amp_0;
1524 out_buf [WAV_CHUNK_SIZE] = amp_1;
1525 out_buf ++;
1526
1527 if ( !(this->r.g.flags & 0x20) )
1528 {
1529 /* Feedback into echo buffer */
1530 int e0 = (echo_0 >> 7) + ((fb_0 * this->r.g.echo_feedback) >> 14);
1531 int e1 = (echo_1 >> 7) + ((fb_1 * this->r.g.echo_feedback) >> 14);
1532 e0 = CLAMP16( e0 );
1533 SET_LE16A( echo_ptr , e0 );
1534 e1 = CLAMP16( e1 );
1535 SET_LE16A( echo_ptr + 2, e1 );
1536 }
1537 #endif /* CPU_* */
1538 #else /* SPCNOECHO == 1*/
1539 /* Generate output */
1540 int amp_0 = (chans_0 * global_vol_0) >> global_muting;
1541 int amp_1 = (chans_1 * global_vol_1) >> global_muting;
1542 out_buf [ 0] = amp_0;
1543 out_buf [WAV_CHUNK_SIZE] = amp_1;
1544 out_buf ++;
1545 #endif /* SPCNOECHO */
1546 }
1547 while ( --count );
1548#if 0
1549 EXIT_TIMER(dsp);
1550 ENTER_TIMER(cpu);
1551#endif
1552}
1553
1554void DSP_reset( struct Spc_Dsp* this )
1555{
1556 this->keys_down = 0;
1557 this->echo_pos = 0;
1558 this->noise_count = 0;
1559 this->noise = 2;
1560
1561 this->r.g.flags = 0xE0; /* reset, mute, echo off */
1562 this->r.g.key_ons = 0;
1563
1564 ci->memset( this->voice_state, 0, sizeof this->voice_state );
1565
1566 int i;
1567 for ( i = VOICE_COUNT; --i >= 0; )
1568 {
1569 struct voice_t* v = this->voice_state + i;
1570 v->env_mode = state_release;
1571 v->addr = ram.ram;
1572 }
1573
1574 #if SPC_BRRCACHE
1575 this->oldsize = 0;
1576 for ( i = 0; i < 256; i++ )
1577 this->wave_entry [i].start_addr = -1;
1578 #endif
1579
1580#if defined(CPU_COLDFIRE)
1581 this->fir_ptr = fir_buf;
1582 this->last_fir_ptr = &fir_buf [7];
1583 ci->memset( fir_buf, 0, sizeof fir_buf );
1584#elif defined (CPU_ARM)
1585 this->fir_ptr = fir_buf;
1586 ci->memset( fir_buf, 0, sizeof fir_buf );
1587#else
1588 this->fir_pos = 0;
1589 ci->memset( this->fir_buf, 0, sizeof this->fir_buf );
1590#endif
1591
1592 assert( offsetof (struct globals_t,unused9 [2]) == REGISTER_COUNT );
1593 assert( sizeof (this->r.voice) == REGISTER_COUNT );
1594}