diff options
author | Sean Bartell <wingedtachikoma@gmail.com> | 2011-06-25 21:32:25 -0400 |
---|---|---|
committer | Nils Wallménius <nils@rockbox.org> | 2012-04-25 22:13:20 +0200 |
commit | f40bfc9267b13b54e6379dfe7539447662879d24 (patch) | |
tree | 9b20069d5e62809ff434061ad730096836f916f2 /lib/rbcodec/codecs/libspc/spc_dsp.c | |
parent | a0009907de7a0107d49040d8a180f140e2eff299 (diff) | |
download | rockbox-f40bfc9267b13b54e6379dfe7539447662879d24.tar.gz rockbox-f40bfc9267b13b54e6379dfe7539447662879d24.zip |
Add codecs to librbcodec.
Change-Id: Id7f4717d51ed02d67cb9f9cb3c0ada4a81843f97
Reviewed-on: http://gerrit.rockbox.org/137
Reviewed-by: Nils Wallménius <nils@rockbox.org>
Tested-by: Nils Wallménius <nils@rockbox.org>
Diffstat (limited to 'lib/rbcodec/codecs/libspc/spc_dsp.c')
-rw-r--r-- | lib/rbcodec/codecs/libspc/spc_dsp.c | 1594 |
1 files changed, 1594 insertions, 0 deletions
diff --git a/lib/rbcodec/codecs/libspc/spc_dsp.c b/lib/rbcodec/codecs/libspc/spc_dsp.c new file mode 100644 index 0000000000..6350c4c331 --- /dev/null +++ b/lib/rbcodec/codecs/libspc/spc_dsp.c | |||
@@ -0,0 +1,1594 @@ | |||
1 | /*************************************************************************** | ||
2 | * __________ __ ___. | ||
3 | * Open \______ \ ____ ____ | | _\_ |__ _______ ___ | ||
4 | * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / | ||
5 | * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < | ||
6 | * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ | ||
7 | * \/ \/ \/ \/ \/ | ||
8 | * $Id$ | ||
9 | * | ||
10 | * Copyright (C) 2007-2008 Michael Sevakis (jhMikeS) | ||
11 | * Copyright (C) 2006-2007 Adam Gashlin (hcs) | ||
12 | * Copyright (C) 2004-2007 Shay Green (blargg) | ||
13 | * Copyright (C) 2002 Brad Martin | ||
14 | * | ||
15 | * This program is free software; you can redistribute it and/or | ||
16 | * modify it under the terms of the GNU General Public License | ||
17 | * as published by the Free Software Foundation; either version 2 | ||
18 | * of the License, or (at your option) any later version. | ||
19 | * | ||
20 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY | ||
21 | * KIND, either express or implied. | ||
22 | * | ||
23 | ****************************************************************************/ | ||
24 | |||
25 | /* The DSP portion (awe!) */ | ||
26 | #include "codeclib.h" | ||
27 | #include "spc_codec.h" | ||
28 | #include "spc_profiler.h" | ||
29 | |||
30 | #if defined(CPU_COLDFIRE) || defined (CPU_ARM) | ||
31 | int32_t fir_buf[FIR_BUF_CNT] IBSS_ATTR_SPC | ||
32 | __attribute__((aligned(FIR_BUF_ALIGN*1))); | ||
33 | #endif | ||
34 | #if SPC_BRRCACHE | ||
35 | /* a little extra for samples that go past end */ | ||
36 | int16_t BRRcache [BRR_CACHE_SIZE] CACHEALIGN_ATTR; | ||
37 | #endif | ||
38 | |||
39 | void DSP_write( struct Spc_Dsp* this, int i, int data ) | ||
40 | { | ||
41 | assert( (unsigned) i < REGISTER_COUNT ); | ||
42 | |||
43 | this->r.reg [i] = data; | ||
44 | int high = i >> 4; | ||
45 | int low = i & 0x0F; | ||
46 | if ( low < 2 ) /* voice volumes */ | ||
47 | { | ||
48 | int left = *(int8_t const*) &this->r.reg [i & ~1]; | ||
49 | int right = *(int8_t const*) &this->r.reg [i | 1]; | ||
50 | struct voice_t* v = this->voice_state + high; | ||
51 | v->volume [0] = left; | ||
52 | v->volume [1] = right; | ||
53 | } | ||
54 | else if ( low == 0x0F ) /* fir coefficients */ | ||
55 | { | ||
56 | this->fir_coeff [7 - high] = (int8_t) data; /* sign-extend */ | ||
57 | } | ||
58 | } | ||
59 | |||
60 | #define CLAMP16( n ) clip_sample_16( n ) | ||
61 | |||
62 | #if SPC_BRRCACHE | ||
63 | static void decode_brr( struct Spc_Dsp* this, unsigned start_addr, | ||
64 | struct voice_t* voice, | ||
65 | struct raw_voice_t const* const raw_voice ) ICODE_ATTR_SPC; | ||
66 | static void decode_brr( struct Spc_Dsp* this, unsigned start_addr, | ||
67 | struct voice_t* voice, | ||
68 | struct raw_voice_t const* const raw_voice ) | ||
69 | { | ||
70 | /* setup same variables as where decode_brr() is called from */ | ||
71 | #undef RAM | ||
72 | #define RAM ram.ram | ||
73 | |||
74 | struct src_dir const* const sd = | ||
75 | &ram.sd[this->r.g.wave_page * 0x100/sizeof(struct src_dir)]; | ||
76 | struct cache_entry_t* const wave_entry = | ||
77 | &this->wave_entry [raw_voice->waveform]; | ||
78 | |||
79 | /* the following block can be put in place of the call to | ||
80 | decode_brr() below | ||
81 | */ | ||
82 | { | ||
83 | DEBUGF( "decode at %08x (wave #%d)\n", | ||
84 | start_addr, raw_voice->waveform ); | ||
85 | |||
86 | /* see if in cache */ | ||
87 | int i; | ||
88 | for ( i = 0; i < this->oldsize; i++ ) | ||
89 | { | ||
90 | struct cache_entry_t* e = &this->wave_entry_old [i]; | ||
91 | if ( e->start_addr == start_addr ) | ||
92 | { | ||
93 | DEBUGF( "found in wave_entry_old (oldsize=%d)\n", | ||
94 | this->oldsize ); | ||
95 | *wave_entry = *e; | ||
96 | goto wave_in_cache; | ||
97 | } | ||
98 | } | ||
99 | |||
100 | wave_entry->start_addr = start_addr; | ||
101 | |||
102 | uint8_t const* const loop_ptr = | ||
103 | RAM + letoh16(sd[raw_voice->waveform].loop); | ||
104 | short* loop_start = 0; | ||
105 | |||
106 | short* out = BRRcache + start_addr * 2; | ||
107 | wave_entry->samples = out; | ||
108 | *out++ = 0; | ||
109 | int smp1 = 0; | ||
110 | int smp2 = 0; | ||
111 | |||
112 | uint8_t const* addr = RAM + start_addr; | ||
113 | int block_header; | ||
114 | do | ||
115 | { | ||
116 | if ( addr == loop_ptr ) | ||
117 | { | ||
118 | loop_start = out; | ||
119 | DEBUGF( "loop at %08lx (wave #%d)\n", | ||
120 | (unsigned long)(addr - RAM), raw_voice->waveform ); | ||
121 | } | ||
122 | |||
123 | /* header */ | ||
124 | block_header = *addr; | ||
125 | addr += 9; | ||
126 | voice->addr = addr; | ||
127 | int const filter = (block_header & 0x0C) - 0x08; | ||
128 | |||
129 | /* scaling | ||
130 | (invalid scaling gives -4096 for neg nybble, 0 for pos) */ | ||
131 | static unsigned char const right_shifts [16] = { | ||
132 | 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 29, 29, 29, | ||
133 | }; | ||
134 | static unsigned char const left_shifts [16] = { | ||
135 | 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 11, 11 | ||
136 | }; | ||
137 | int const scale = block_header >> 4; | ||
138 | int const right_shift = right_shifts [scale]; | ||
139 | int const left_shift = left_shifts [scale]; | ||
140 | |||
141 | /* output position */ | ||
142 | out += BRR_BLOCK_SIZE; | ||
143 | int offset = -BRR_BLOCK_SIZE << 2; | ||
144 | |||
145 | do /* decode and filter 16 samples */ | ||
146 | { | ||
147 | /* Get nybble, sign-extend, then scale | ||
148 | get byte, select which nybble, sign-extend, then shift based | ||
149 | on scaling. also handles invalid scaling values. */ | ||
150 | int delta = (int) (int8_t) (addr [offset >> 3] << (offset & 4)) | ||
151 | >> right_shift << left_shift; | ||
152 | |||
153 | out [offset >> 2] = smp2; | ||
154 | |||
155 | if ( filter == 0 ) /* mode 0x08 (30-90% of the time) */ | ||
156 | { | ||
157 | delta -= smp2 >> 1; | ||
158 | delta += smp2 >> 5; | ||
159 | smp2 = smp1; | ||
160 | delta += smp1; | ||
161 | delta += (-smp1 - (smp1 >> 1)) >> 5; | ||
162 | } | ||
163 | else | ||
164 | { | ||
165 | if ( filter == -4 ) /* mode 0x04 */ | ||
166 | { | ||
167 | delta += smp1 >> 1; | ||
168 | delta += (-smp1) >> 5; | ||
169 | } | ||
170 | else if ( filter > -4 ) /* mode 0x0C */ | ||
171 | { | ||
172 | delta -= smp2 >> 1; | ||
173 | delta += (smp2 + (smp2 >> 1)) >> 4; | ||
174 | delta += smp1; | ||
175 | delta += (-smp1 * 13) >> 7; | ||
176 | } | ||
177 | smp2 = smp1; | ||
178 | } | ||
179 | |||
180 | delta = CLAMP16( delta ); | ||
181 | smp1 = (int16_t) (delta * 2); /* sign-extend */ | ||
182 | } | ||
183 | while ( (offset += 4) != 0 ); | ||
184 | |||
185 | /* if next block has end flag set, this block ends early */ | ||
186 | /* (verified) */ | ||
187 | if ( (block_header & 3) != 3 && (*addr & 3) == 1 ) | ||
188 | { | ||
189 | /* skip last 9 samples */ | ||
190 | out -= 9; | ||
191 | goto early_end; | ||
192 | } | ||
193 | } | ||
194 | while ( !(block_header & 1) && addr < RAM + 0x10000 ); | ||
195 | |||
196 | out [0] = smp2; | ||
197 | out [1] = smp1; | ||
198 | |||
199 | early_end: | ||
200 | wave_entry->end = (out - 1 - wave_entry->samples) << 12; | ||
201 | |||
202 | wave_entry->loop = 0; | ||
203 | if ( (block_header & 2) ) | ||
204 | { | ||
205 | if ( loop_start ) | ||
206 | { | ||
207 | int loop = out - loop_start; | ||
208 | wave_entry->loop = loop; | ||
209 | wave_entry->end += 0x3000; | ||
210 | out [2] = loop_start [2]; | ||
211 | out [3] = loop_start [3]; | ||
212 | out [4] = loop_start [4]; | ||
213 | } | ||
214 | else | ||
215 | { | ||
216 | DEBUGF( "loop point outside initial wave\n" ); | ||
217 | } | ||
218 | } | ||
219 | |||
220 | DEBUGF( "end at %08lx (wave #%d)\n", | ||
221 | (unsigned long)(addr - RAM), raw_voice->waveform ); | ||
222 | |||
223 | /* add to cache */ | ||
224 | this->wave_entry_old [this->oldsize++] = *wave_entry; | ||
225 | wave_in_cache:; | ||
226 | } | ||
227 | } | ||
228 | #endif | ||
229 | |||
230 | static void key_on(struct Spc_Dsp* const this, struct voice_t* const voice, | ||
231 | struct src_dir const* const sd, | ||
232 | struct raw_voice_t const* const raw_voice, | ||
233 | const int key_on_delay, const int vbit) ICODE_ATTR_SPC; | ||
234 | static void key_on(struct Spc_Dsp* const this, struct voice_t* const voice, | ||
235 | struct src_dir const* const sd, | ||
236 | struct raw_voice_t const* const raw_voice, | ||
237 | const int key_on_delay, const int vbit) { | ||
238 | #undef RAM | ||
239 | #define RAM ram.ram | ||
240 | int const env_rate_init = 0x7800; | ||
241 | voice->key_on_delay = key_on_delay; | ||
242 | if ( key_on_delay == 0 ) | ||
243 | { | ||
244 | this->keys_down |= vbit; | ||
245 | voice->envx = 0; | ||
246 | voice->env_mode = state_attack; | ||
247 | voice->env_timer = env_rate_init; /* TODO: inaccurate? */ | ||
248 | unsigned start_addr = letoh16(sd[raw_voice->waveform].start); | ||
249 | #if !SPC_BRRCACHE | ||
250 | { | ||
251 | voice->addr = RAM + start_addr; | ||
252 | /* BRR filter uses previous samples */ | ||
253 | voice->samples [BRR_BLOCK_SIZE + 1] = 0; | ||
254 | voice->samples [BRR_BLOCK_SIZE + 2] = 0; | ||
255 | /* decode three samples immediately */ | ||
256 | voice->position = (BRR_BLOCK_SIZE + 3) * 0x1000 - 1; | ||
257 | voice->block_header = 0; /* "previous" BRR header */ | ||
258 | } | ||
259 | #else | ||
260 | { | ||
261 | voice->position = 3 * 0x1000 - 1; | ||
262 | struct cache_entry_t* const wave_entry = | ||
263 | &this->wave_entry [raw_voice->waveform]; | ||
264 | |||
265 | /* predecode BRR if not already */ | ||
266 | if ( wave_entry->start_addr != start_addr ) | ||
267 | { | ||
268 | /* the following line can be replaced by the indicated block | ||
269 | in decode_brr() */ | ||
270 | decode_brr( this, start_addr, voice, raw_voice ); | ||
271 | } | ||
272 | |||
273 | voice->samples = wave_entry->samples; | ||
274 | voice->wave_end = wave_entry->end; | ||
275 | voice->wave_loop = wave_entry->loop; | ||
276 | } | ||
277 | #endif | ||
278 | } | ||
279 | } | ||
280 | |||
281 | void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf ) | ||
282 | { | ||
283 | #undef RAM | ||
284 | #if defined(CPU_ARM) && !SPC_BRRCACHE | ||
285 | uint8_t* const ram_ = ram.ram; | ||
286 | #define RAM ram_ | ||
287 | #else | ||
288 | #define RAM ram.ram | ||
289 | #endif | ||
290 | #if 0 | ||
291 | EXIT_TIMER(cpu); | ||
292 | ENTER_TIMER(dsp); | ||
293 | #endif | ||
294 | |||
295 | /* Here we check for keys on/off. Docs say that successive writes | ||
296 | to KON/KOF must be separated by at least 2 Ts periods or risk | ||
297 | being neglected. Therefore DSP only looks at these during an | ||
298 | update, and not at the time of the write. Only need to do this | ||
299 | once however, since the regs haven't changed over the whole | ||
300 | period we need to catch up with. */ | ||
301 | |||
302 | { | ||
303 | int key_ons = this->r.g.key_ons; | ||
304 | int key_offs = this->r.g.key_offs; | ||
305 | /* keying on a voice resets that bit in ENDX */ | ||
306 | this->r.g.wave_ended &= ~key_ons; | ||
307 | /* key_off bits prevent key_on from being acknowledged */ | ||
308 | this->r.g.key_ons = key_ons & key_offs; | ||
309 | |||
310 | /* process key events outside loop, since they won't re-occur */ | ||
311 | struct voice_t* voice = this->voice_state + 8; | ||
312 | int vbit = 0x80; | ||
313 | do | ||
314 | { | ||
315 | --voice; | ||
316 | if ( key_offs & vbit ) | ||
317 | { | ||
318 | voice->env_mode = state_release; | ||
319 | voice->key_on_delay = 0; | ||
320 | } | ||
321 | else if ( key_ons & vbit ) | ||
322 | { | ||
323 | voice->key_on_delay = 8; | ||
324 | } | ||
325 | } | ||
326 | while ( (vbit >>= 1) != 0 ); | ||
327 | } | ||
328 | |||
329 | struct src_dir const* const sd = | ||
330 | &ram.sd[this->r.g.wave_page * 0x100/sizeof(struct src_dir)]; | ||
331 | |||
332 | #ifdef ROCKBOX_BIG_ENDIAN | ||
333 | /* Convert endiannesses before entering loops - these | ||
334 | get used alot */ | ||
335 | const uint32_t rates[VOICE_COUNT] = | ||
336 | { | ||
337 | GET_LE16A( this->r.voice[0].rate ) & 0x3FFF, | ||
338 | GET_LE16A( this->r.voice[1].rate ) & 0x3FFF, | ||
339 | GET_LE16A( this->r.voice[2].rate ) & 0x3FFF, | ||
340 | GET_LE16A( this->r.voice[3].rate ) & 0x3FFF, | ||
341 | GET_LE16A( this->r.voice[4].rate ) & 0x3FFF, | ||
342 | GET_LE16A( this->r.voice[5].rate ) & 0x3FFF, | ||
343 | GET_LE16A( this->r.voice[6].rate ) & 0x3FFF, | ||
344 | GET_LE16A( this->r.voice[7].rate ) & 0x3FFF, | ||
345 | }; | ||
346 | #define VOICE_RATE(x) *(x) | ||
347 | #define IF_RBE(...) __VA_ARGS__ | ||
348 | #ifdef CPU_COLDFIRE | ||
349 | /* Initialize mask register with the buffer address mask */ | ||
350 | asm volatile ("move.l %[m], %%mask" : : [m]"i"(FIR_BUF_MASK)); | ||
351 | const int echo_wrap = (this->r.g.echo_delay & 15) * 0x800; | ||
352 | const int echo_start = this->r.g.echo_page * 0x100; | ||
353 | #endif /* CPU_COLDFIRE */ | ||
354 | #else | ||
355 | #define VOICE_RATE(x) (GET_LE16(raw_voice->rate) & 0x3FFF) | ||
356 | #define IF_RBE(...) | ||
357 | #endif /* ROCKBOX_BIG_ENDIAN */ | ||
358 | |||
359 | #if !SPC_NOINTERP | ||
360 | int const slow_gaussian = (this->r.g.pitch_mods >> 1) | | ||
361 | this->r.g.noise_enables; | ||
362 | #endif | ||
363 | /* (g.flags & 0x40) ? 30 : 14 */ | ||
364 | int const global_muting = ((this->r.g.flags & 0x40) >> 2) + 14 - 8; | ||
365 | int const global_vol_0 = this->r.g.volume_0; | ||
366 | int const global_vol_1 = this->r.g.volume_1; | ||
367 | |||
368 | /* each rate divides exactly into 0x7800 without remainder */ | ||
369 | int const env_rate_init = 0x7800; | ||
370 | static unsigned short const env_rates [0x20] ICONST_ATTR_SPC = | ||
371 | { | ||
372 | 0x0000, 0x000F, 0x0014, 0x0018, 0x001E, 0x0028, 0x0030, 0x003C, | ||
373 | 0x0050, 0x0060, 0x0078, 0x00A0, 0x00C0, 0x00F0, 0x0140, 0x0180, | ||
374 | 0x01E0, 0x0280, 0x0300, 0x03C0, 0x0500, 0x0600, 0x0780, 0x0A00, | ||
375 | 0x0C00, 0x0F00, 0x1400, 0x1800, 0x1E00, 0x2800, 0x3C00, 0x7800 | ||
376 | }; | ||
377 | |||
378 | do /* one pair of output samples per iteration */ | ||
379 | { | ||
380 | /* Noise */ | ||
381 | if ( this->r.g.noise_enables ) | ||
382 | { | ||
383 | if ( (this->noise_count -= | ||
384 | env_rates [this->r.g.flags & 0x1F]) <= 0 ) | ||
385 | { | ||
386 | this->noise_count = env_rate_init; | ||
387 | int feedback = (this->noise << 13) ^ (this->noise << 14); | ||
388 | this->noise = (feedback & 0x8000) ^ (this->noise >> 1 & ~1); | ||
389 | } | ||
390 | } | ||
391 | |||
392 | #if !SPC_NOECHO | ||
393 | int echo_0 = 0; | ||
394 | int echo_1 = 0; | ||
395 | #endif | ||
396 | long prev_outx = 0; /* TODO: correct value for first channel? */ | ||
397 | int chans_0 = 0; | ||
398 | int chans_1 = 0; | ||
399 | /* TODO: put raw_voice pointer in voice_t? */ | ||
400 | struct raw_voice_t * raw_voice = this->r.voice; | ||
401 | struct voice_t* voice = this->voice_state; | ||
402 | int vbit = 1; | ||
403 | IF_RBE( const uint32_t* vr = rates; ) | ||
404 | for ( ; vbit < 0x100; vbit <<= 1, ++voice, ++raw_voice IF_RBE( , ++vr ) ) | ||
405 | { | ||
406 | /* pregen involves checking keyon, etc */ | ||
407 | #if 0 | ||
408 | ENTER_TIMER(dsp_pregen); | ||
409 | #endif | ||
410 | |||
411 | /* Key on events are delayed */ | ||
412 | int key_on_delay = voice->key_on_delay; | ||
413 | |||
414 | if ( UNLIKELY ( --key_on_delay >= 0 ) ) /* <1% of the time */ | ||
415 | { | ||
416 | key_on(this,voice,sd,raw_voice,key_on_delay,vbit); | ||
417 | } | ||
418 | |||
419 | if ( !(this->keys_down & vbit) ) /* Silent channel */ | ||
420 | { | ||
421 | silent_chan: | ||
422 | raw_voice->envx = 0; | ||
423 | raw_voice->outx = 0; | ||
424 | prev_outx = 0; | ||
425 | continue; | ||
426 | } | ||
427 | |||
428 | /* Envelope */ | ||
429 | { | ||
430 | int const ENV_RANGE = 0x800; | ||
431 | int env_mode = voice->env_mode; | ||
432 | int adsr0 = raw_voice->adsr [0]; | ||
433 | int env_timer; | ||
434 | if ( LIKELY ( env_mode != state_release ) ) /* 99% of the time */ | ||
435 | { | ||
436 | env_timer = voice->env_timer; | ||
437 | if ( LIKELY ( adsr0 & 0x80 ) ) /* 79% of the time */ | ||
438 | { | ||
439 | int adsr1 = raw_voice->adsr [1]; | ||
440 | if ( LIKELY ( env_mode == state_sustain ) ) /* 74% of the time */ | ||
441 | { | ||
442 | if ( (env_timer -= env_rates [adsr1 & 0x1F]) > 0 ) | ||
443 | goto write_env_timer; | ||
444 | |||
445 | int envx = voice->envx; | ||
446 | envx--; /* envx *= 255 / 256 */ | ||
447 | envx -= envx >> 8; | ||
448 | voice->envx = envx; | ||
449 | /* TODO: should this be 8? */ | ||
450 | raw_voice->envx = envx >> 4; | ||
451 | goto init_env_timer; | ||
452 | } | ||
453 | else if ( env_mode < 0 ) /* 25% state_decay */ | ||
454 | { | ||
455 | int envx = voice->envx; | ||
456 | if ( (env_timer -= | ||
457 | env_rates [(adsr0 >> 3 & 0x0E) + 0x10]) <= 0 ) | ||
458 | { | ||
459 | envx--; /* envx *= 255 / 256 */ | ||
460 | envx -= envx >> 8; | ||
461 | voice->envx = envx; | ||
462 | /* TODO: should this be 8? */ | ||
463 | raw_voice->envx = envx >> 4; | ||
464 | env_timer = env_rate_init; | ||
465 | } | ||
466 | |||
467 | int sustain_level = adsr1 >> 5; | ||
468 | if ( envx <= (sustain_level + 1) * 0x100 ) | ||
469 | voice->env_mode = state_sustain; | ||
470 | |||
471 | goto write_env_timer; | ||
472 | } | ||
473 | else /* state_attack */ | ||
474 | { | ||
475 | int t = adsr0 & 0x0F; | ||
476 | if ( (env_timer -= env_rates [t * 2 + 1]) > 0 ) | ||
477 | goto write_env_timer; | ||
478 | |||
479 | int envx = voice->envx; | ||
480 | |||
481 | int const step = ENV_RANGE / 64; | ||
482 | envx += step; | ||
483 | if ( t == 15 ) | ||
484 | envx += ENV_RANGE / 2 - step; | ||
485 | |||
486 | if ( envx >= ENV_RANGE ) | ||
487 | { | ||
488 | envx = ENV_RANGE - 1; | ||
489 | voice->env_mode = state_decay; | ||
490 | } | ||
491 | voice->envx = envx; | ||
492 | /* TODO: should this be 8? */ | ||
493 | raw_voice->envx = envx >> 4; | ||
494 | goto init_env_timer; | ||
495 | } | ||
496 | } | ||
497 | else /* gain mode */ | ||
498 | { | ||
499 | int t = raw_voice->gain; | ||
500 | if ( t < 0x80 ) | ||
501 | { | ||
502 | raw_voice->envx = t; | ||
503 | voice->envx = t << 4; | ||
504 | goto env_end; | ||
505 | } | ||
506 | else | ||
507 | { | ||
508 | if ( (env_timer -= env_rates [t & 0x1F]) > 0 ) | ||
509 | goto write_env_timer; | ||
510 | |||
511 | int envx = voice->envx; | ||
512 | int mode = t >> 5; | ||
513 | if ( mode <= 5 ) /* decay */ | ||
514 | { | ||
515 | int step = ENV_RANGE / 64; | ||
516 | if ( mode == 5 ) /* exponential */ | ||
517 | { | ||
518 | envx--; /* envx *= 255 / 256 */ | ||
519 | step = envx >> 8; | ||
520 | } | ||
521 | if ( (envx -= step) < 0 ) | ||
522 | { | ||
523 | envx = 0; | ||
524 | if ( voice->env_mode == state_attack ) | ||
525 | voice->env_mode = state_decay; | ||
526 | } | ||
527 | } | ||
528 | else /* attack */ | ||
529 | { | ||
530 | int const step = ENV_RANGE / 64; | ||
531 | envx += step; | ||
532 | if ( mode == 7 && | ||
533 | envx >= ENV_RANGE * 3 / 4 + step ) | ||
534 | envx += ENV_RANGE / 256 - step; | ||
535 | |||
536 | if ( envx >= ENV_RANGE ) | ||
537 | envx = ENV_RANGE - 1; | ||
538 | } | ||
539 | voice->envx = envx; | ||
540 | /* TODO: should this be 8? */ | ||
541 | raw_voice->envx = envx >> 4; | ||
542 | goto init_env_timer; | ||
543 | } | ||
544 | } | ||
545 | } | ||
546 | else /* state_release */ | ||
547 | { | ||
548 | int envx = voice->envx; | ||
549 | if ( (envx -= ENV_RANGE / 256) > 0 ) | ||
550 | { | ||
551 | voice->envx = envx; | ||
552 | raw_voice->envx = envx >> 8; | ||
553 | goto env_end; | ||
554 | } | ||
555 | else | ||
556 | { | ||
557 | /* bit was set, so this clears it */ | ||
558 | this->keys_down ^= vbit; | ||
559 | voice->envx = 0; | ||
560 | goto silent_chan; | ||
561 | } | ||
562 | } | ||
563 | init_env_timer: | ||
564 | env_timer = env_rate_init; | ||
565 | write_env_timer: | ||
566 | voice->env_timer = env_timer; | ||
567 | env_end:; | ||
568 | } | ||
569 | #if 0 | ||
570 | EXIT_TIMER(dsp_pregen); | ||
571 | |||
572 | ENTER_TIMER(dsp_gen); | ||
573 | #endif | ||
574 | #if !SPC_BRRCACHE | ||
575 | /* Decode BRR block */ | ||
576 | if ( voice->position >= BRR_BLOCK_SIZE * 0x1000 ) | ||
577 | { | ||
578 | voice->position -= BRR_BLOCK_SIZE * 0x1000; | ||
579 | |||
580 | uint8_t const* addr = voice->addr; | ||
581 | if ( addr >= RAM + 0x10000 ) | ||
582 | addr -= 0x10000; | ||
583 | |||
584 | /* action based on previous block's header */ | ||
585 | if ( voice->block_header & 1 ) | ||
586 | { | ||
587 | addr = RAM + letoh16(sd[raw_voice->waveform].loop); | ||
588 | this->r.g.wave_ended |= vbit; | ||
589 | if ( !(voice->block_header & 2) ) /* 1% of the time */ | ||
590 | { | ||
591 | /* first block was end block; | ||
592 | don't play anything (verified) */ | ||
593 | /* bit was set, so this clears it */ | ||
594 | this->keys_down ^= vbit; | ||
595 | |||
596 | /* since voice->envx is 0, | ||
597 | samples and position don't matter */ | ||
598 | raw_voice->envx = 0; | ||
599 | voice->envx = 0; | ||
600 | goto skip_decode; | ||
601 | } | ||
602 | } | ||
603 | |||
604 | /* header */ | ||
605 | int const block_header = *addr; | ||
606 | addr += 9; | ||
607 | voice->addr = addr; | ||
608 | voice->block_header = block_header; | ||
609 | |||
610 | /* previous samples */ | ||
611 | int smp2 = voice->samples [BRR_BLOCK_SIZE + 1]; | ||
612 | int smp1 = voice->samples [BRR_BLOCK_SIZE + 2]; | ||
613 | voice->samples [0] = voice->samples [BRR_BLOCK_SIZE]; | ||
614 | |||
615 | /* output position */ | ||
616 | short* out = voice->samples + (1 + BRR_BLOCK_SIZE); | ||
617 | int offset = -BRR_BLOCK_SIZE << 2; | ||
618 | |||
619 | /* if next block has end flag set, | ||
620 | this block ends early (verified) */ | ||
621 | if ( (block_header & 3) != 3 && (*addr & 3) == 1 ) | ||
622 | { | ||
623 | /* arrange for last 9 samples to be skipped */ | ||
624 | int const skip = 9; | ||
625 | out += (skip & 1); | ||
626 | voice->samples [skip] = voice->samples [BRR_BLOCK_SIZE]; | ||
627 | voice->position += skip * 0x1000; | ||
628 | offset = (-BRR_BLOCK_SIZE + (skip & ~1)) << 2; | ||
629 | addr -= skip / 2; | ||
630 | /* force sample to end on next decode */ | ||
631 | voice->block_header = 1; | ||
632 | } | ||
633 | |||
634 | int const filter = block_header & 0x0c; | ||
635 | int const scale = block_header >> 4; | ||
636 | |||
637 | if ( filter == 0x08 ) /* filter 2 (30-90% of the time) */ | ||
638 | { | ||
639 | /* y[n] = x[n] + 61/32 * y[n-1] - 15/16 * y[n-2] */ | ||
640 | do /* decode and filter 16 samples */ | ||
641 | { | ||
642 | /* Get nybble, sign-extend, then scale | ||
643 | get byte, select which nybble, sign-extend, then shift | ||
644 | based on scaling. */ | ||
645 | int delta = (int8_t)(addr [offset >> 3] << (offset & 4)) >> 4; | ||
646 | delta = (delta << scale) >> 1; | ||
647 | |||
648 | if (scale > 0xc) | ||
649 | delta = (delta >> 17) << 11; | ||
650 | |||
651 | out [offset >> 2] = smp2; | ||
652 | |||
653 | delta -= smp2 >> 1; | ||
654 | delta += smp2 >> 5; | ||
655 | delta += smp1; | ||
656 | delta += (-smp1 - (smp1 >> 1)) >> 5; | ||
657 | |||
658 | delta = CLAMP16( delta ); | ||
659 | smp2 = smp1; | ||
660 | smp1 = (int16_t) (delta * 2); /* sign-extend */ | ||
661 | } | ||
662 | while ( (offset += 4) != 0 ); | ||
663 | } | ||
664 | else if ( filter == 0x04 ) /* filter 1 */ | ||
665 | { | ||
666 | /* y[n] = x[n] + 15/16 * y[n-1] */ | ||
667 | do /* decode and filter 16 samples */ | ||
668 | { | ||
669 | /* Get nybble, sign-extend, then scale | ||
670 | get byte, select which nybble, sign-extend, then shift | ||
671 | based on scaling. */ | ||
672 | int delta = (int8_t)(addr [offset >> 3] << (offset & 4)) >> 4; | ||
673 | delta = (delta << scale) >> 1; | ||
674 | |||
675 | if (scale > 0xc) | ||
676 | delta = (delta >> 17) << 11; | ||
677 | |||
678 | out [offset >> 2] = smp2; | ||
679 | |||
680 | delta += smp1 >> 1; | ||
681 | delta += (-smp1) >> 5; | ||
682 | |||
683 | delta = CLAMP16( delta ); | ||
684 | smp2 = smp1; | ||
685 | smp1 = (int16_t) (delta * 2); /* sign-extend */ | ||
686 | } | ||
687 | while ( (offset += 4) != 0 ); | ||
688 | } | ||
689 | else if ( filter == 0x0c ) /* filter 3 */ | ||
690 | { | ||
691 | /* y[n] = x[n] + 115/64 * y[n-1] - 13/16 * y[n-2] */ | ||
692 | do /* decode and filter 16 samples */ | ||
693 | { | ||
694 | /* Get nybble, sign-extend, then scale | ||
695 | get byte, select which nybble, sign-extend, then shift | ||
696 | based on scaling. */ | ||
697 | int delta = (int8_t)(addr [offset >> 3] << (offset & 4)) >> 4; | ||
698 | delta = (delta << scale) >> 1; | ||
699 | |||
700 | if (scale > 0xc) | ||
701 | delta = (delta >> 17) << 11; | ||
702 | |||
703 | out [offset >> 2] = smp2; | ||
704 | |||
705 | delta -= smp2 >> 1; | ||
706 | delta += (smp2 + (smp2 >> 1)) >> 4; | ||
707 | delta += smp1; | ||
708 | delta += (-smp1 * 13) >> 7; | ||
709 | |||
710 | delta = CLAMP16( delta ); | ||
711 | smp2 = smp1; | ||
712 | smp1 = (int16_t) (delta * 2); /* sign-extend */ | ||
713 | } | ||
714 | while ( (offset += 4) != 0 ); | ||
715 | } | ||
716 | else /* filter 0 */ | ||
717 | { | ||
718 | /* y[n] = x[n] */ | ||
719 | do /* decode and filter 16 samples */ | ||
720 | { | ||
721 | /* Get nybble, sign-extend, then scale | ||
722 | get byte, select which nybble, sign-extend, then shift | ||
723 | based on scaling. */ | ||
724 | int delta = (int8_t)(addr [offset >> 3] << (offset & 4)) >> 4; | ||
725 | delta = (delta << scale) >> 1; | ||
726 | |||
727 | if (scale > 0xc) | ||
728 | delta = (delta >> 17) << 11; | ||
729 | |||
730 | out [offset >> 2] = smp2; | ||
731 | |||
732 | smp2 = smp1; | ||
733 | smp1 = delta * 2; | ||
734 | } | ||
735 | while ( (offset += 4) != 0 ); | ||
736 | } | ||
737 | |||
738 | out [0] = smp2; | ||
739 | out [1] = smp1; | ||
740 | |||
741 | skip_decode:; | ||
742 | } | ||
743 | #endif /* !SPC_BRRCACHE */ | ||
744 | /* Get rate (with possible modulation) */ | ||
745 | int rate = VOICE_RATE(vr); | ||
746 | if ( this->r.g.pitch_mods & vbit ) | ||
747 | rate = (rate * (prev_outx + 32768)) >> 15; | ||
748 | |||
749 | #if !SPC_NOINTERP | ||
750 | /* Interleved gauss table (to improve cache coherency). */ | ||
751 | /* gauss [i * 2 + j] = normal_gauss [(1 - j) * 256 + i] */ | ||
752 | static short const gauss [512] ICONST_ATTR_SPC MEM_ALIGN_ATTR = | ||
753 | { | ||
754 | 370,1305, 366,1305, 362,1304, 358,1304, 354,1304, 351,1304, 347,1304, 343,1303, | ||
755 | 339,1303, 336,1303, 332,1302, 328,1302, 325,1301, 321,1300, 318,1300, 314,1299, | ||
756 | 311,1298, 307,1297, 304,1297, 300,1296, 297,1295, 293,1294, 290,1293, 286,1292, | ||
757 | 283,1291, 280,1290, 276,1288, 273,1287, 270,1286, 267,1284, 263,1283, 260,1282, | ||
758 | 257,1280, 254,1279, 251,1277, 248,1275, 245,1274, 242,1272, 239,1270, 236,1269, | ||
759 | 233,1267, 230,1265, 227,1263, 224,1261, 221,1259, 218,1257, 215,1255, 212,1253, | ||
760 | 210,1251, 207,1248, 204,1246, 201,1244, 199,1241, 196,1239, 193,1237, 191,1234, | ||
761 | 188,1232, 186,1229, 183,1227, 180,1224, 178,1221, 175,1219, 173,1216, 171,1213, | ||
762 | 168,1210, 166,1207, 163,1205, 161,1202, 159,1199, 156,1196, 154,1193, 152,1190, | ||
763 | 150,1186, 147,1183, 145,1180, 143,1177, 141,1174, 139,1170, 137,1167, 134,1164, | ||
764 | 132,1160, 130,1157, 128,1153, 126,1150, 124,1146, 122,1143, 120,1139, 118,1136, | ||
765 | 117,1132, 115,1128, 113,1125, 111,1121, 109,1117, 107,1113, 106,1109, 104,1106, | ||
766 | 102,1102, 100,1098, 99,1094, 97,1090, 95,1086, 94,1082, 92,1078, 90,1074, | ||
767 | 89,1070, 87,1066, 86,1061, 84,1057, 83,1053, 81,1049, 80,1045, 78,1040, | ||
768 | 77,1036, 76,1032, 74,1027, 73,1023, 71,1019, 70,1014, 69,1010, 67,1005, | ||
769 | 66,1001, 65, 997, 64, 992, 62, 988, 61, 983, 60, 978, 59, 974, 58, 969, | ||
770 | 56, 965, 55, 960, 54, 955, 53, 951, 52, 946, 51, 941, 50, 937, 49, 932, | ||
771 | 48, 927, 47, 923, 46, 918, 45, 913, 44, 908, 43, 904, 42, 899, 41, 894, | ||
772 | 40, 889, 39, 884, 38, 880, 37, 875, 36, 870, 36, 865, 35, 860, 34, 855, | ||
773 | 33, 851, 32, 846, 32, 841, 31, 836, 30, 831, 29, 826, 29, 821, 28, 816, | ||
774 | 27, 811, 27, 806, 26, 802, 25, 797, 24, 792, 24, 787, 23, 782, 23, 777, | ||
775 | 22, 772, 21, 767, 21, 762, 20, 757, 20, 752, 19, 747, 19, 742, 18, 737, | ||
776 | 17, 732, 17, 728, 16, 723, 16, 718, 15, 713, 15, 708, 15, 703, 14, 698, | ||
777 | 14, 693, 13, 688, 13, 683, 12, 678, 12, 674, 11, 669, 11, 664, 11, 659, | ||
778 | 10, 654, 10, 649, 10, 644, 9, 640, 9, 635, 9, 630, 8, 625, 8, 620, | ||
779 | 8, 615, 7, 611, 7, 606, 7, 601, 6, 596, 6, 592, 6, 587, 6, 582, | ||
780 | 5, 577, 5, 573, 5, 568, 5, 563, 4, 559, 4, 554, 4, 550, 4, 545, | ||
781 | 4, 540, 3, 536, 3, 531, 3, 527, 3, 522, 3, 517, 2, 513, 2, 508, | ||
782 | 2, 504, 2, 499, 2, 495, 2, 491, 2, 486, 1, 482, 1, 477, 1, 473, | ||
783 | 1, 469, 1, 464, 1, 460, 1, 456, 1, 451, 1, 447, 1, 443, 1, 439, | ||
784 | 0, 434, 0, 430, 0, 426, 0, 422, 0, 418, 0, 414, 0, 410, 0, 405, | ||
785 | 0, 401, 0, 397, 0, 393, 0, 389, 0, 385, 0, 381, 0, 378, 0, 374, | ||
786 | }; | ||
787 | /* Gaussian interpolation using most recent 4 samples */ | ||
788 | long position = voice->position; | ||
789 | voice->position += rate; | ||
790 | short const* interp = voice->samples + (position >> 12); | ||
791 | int offset = position >> 4 & 0xFF; | ||
792 | |||
793 | /* Only left half of gaussian kernel is in table, so we must mirror | ||
794 | for right half */ | ||
795 | short const* fwd = gauss + offset * 2; | ||
796 | short const* rev = gauss + 510 - offset * 2; | ||
797 | |||
798 | /* Use faster gaussian interpolation when exact result isn't needed | ||
799 | by pitch modulator of next channel */ | ||
800 | int amp_0, amp_1; /* Also serve as temps _0, and _1 */ | ||
801 | if ( LIKELY ( !(slow_gaussian & vbit) ) ) /* 99% of the time */ | ||
802 | { | ||
803 | /* Main optimization is lack of clamping. Not a problem since | ||
804 | output never goes more than +/- 16 outside 16-bit range and | ||
805 | things are clamped later anyway. Other optimization is to | ||
806 | preserve fractional accuracy, eliminating several masks. */ | ||
807 | #if defined (CPU_ARM) | ||
808 | int output; | ||
809 | int _2, _3; /* All-purpose temps */ | ||
810 | /* Multiple ASM blocks keep regs free and reduce result | ||
811 | * latency issues. */ | ||
812 | #if ARM_ARCH >= 6 | ||
813 | /* Interpolate */ | ||
814 | asm volatile ( | ||
815 | "ldr %[_0], [%[interp]] \r\n" /* _0=i0i1 */ | ||
816 | "ldr %[_2], [%[fwd]] \r\n" /* _2=f0f1 */ | ||
817 | "ldr %[_1], [%[interp], #4] \r\n" /* _1=i2i3 */ | ||
818 | "ldr %[_3], [%[rev]] \r\n" /* _3=r0r1 */ | ||
819 | "smuad %[out], %[_0], %[_2] \r\n" /* out=f0*i0 + f1*i1 */ | ||
820 | "smladx %[out], %[_1], %[_3], %[out] \r\n" /* out+=r1*i2 + r0*i3 */ | ||
821 | : [out]"=r"(output), | ||
822 | [_0]"=&r"(amp_0), [_1]"=&r"(amp_1), | ||
823 | [_2]"=&r"(_2), [_3]"=r"(_3) | ||
824 | : [fwd]"r"(fwd), [rev]"r"(rev), | ||
825 | [interp]"r"(interp)); | ||
826 | /* Apply voice envelope */ | ||
827 | asm volatile ( | ||
828 | "mov %[_2], %[out], asr #(11-5) \r\n" /* To do >> 16 later */ | ||
829 | "mul %[out], %[_2], %[envx] \r\n" /* and avoid exp. shift */ | ||
830 | : [out]"+r"(output), [_2]"=&r"(_2) | ||
831 | : [envx]"r"((int)voice->envx)); | ||
832 | /* Apply left and right volume */ | ||
833 | asm volatile ( | ||
834 | "smulwb %[amp_0], %[out], %[vvol_0] \r\n" /* (32x16->48)[47:16]->[31:0] */ | ||
835 | "smulwb %[amp_1], %[out], %[vvol_1] \r\n" | ||
836 | : [out]"+r"(output), | ||
837 | [amp_0]"=&r"(amp_0), [amp_1]"=r"(amp_1) | ||
838 | : [vvol_0]"r"(voice->volume[0]), | ||
839 | [vvol_1]"r"(voice->volume[1])); | ||
840 | |||
841 | raw_voice->outx = output >> (8+5); /* 'output' still 5 bits too big */ | ||
842 | #else /* ARM_ARCH < 6 */ | ||
843 | /* Perform gaussian interpolation on four samples */ | ||
844 | asm volatile ( | ||
845 | "ldrsh %[_0], [%[interp]] \r\n" | ||
846 | "ldrsh %[_2], [%[fwd]] \r\n" | ||
847 | "ldrsh %[_1], [%[interp], #2] \r\n" | ||
848 | "ldrsh %[_3], [%[fwd], #2] \r\n" | ||
849 | "mul %[out], %[_0], %[_2] \r\n" /* out= fwd[0]*interp[0] */ | ||
850 | "ldrsh %[_0], [%[interp], #4] \r\n" | ||
851 | "ldrsh %[_2], [%[rev], #2] \r\n" | ||
852 | "mla %[out], %[_1], %[_3], %[out] \r\n" /* out+=fwd[1]*interp[1] */ | ||
853 | "ldrsh %[_1], [%[interp], #6] \r\n" | ||
854 | "ldrsh %[_3], [%[rev]] \r\n" | ||
855 | "mla %[out], %[_0], %[_2], %[out] \r\n" /* out+=rev[1]*interp[2] */ | ||
856 | "mla %[out], %[_1], %[_3], %[out] \r\n" /* out+=rev[0]*interp[3] */ | ||
857 | : [out]"=&r"(output), | ||
858 | [_0]"=&r"(amp_0), [_1]"=&r"(amp_1), | ||
859 | [_2]"=&r"(_2), [_3]"=&r"(_3) | ||
860 | : [fwd]"r"(fwd), [rev]"r"(rev), | ||
861 | [interp]"r"(interp)); | ||
862 | /* Apply voice envelope */ | ||
863 | asm volatile ( | ||
864 | "mov %[_2], %[out], asr #11 \r\n" | ||
865 | "mul %[out], %[_2], %[envx] \r\n" | ||
866 | : [out]"+r"(output), [_2]"=&r"(_2) | ||
867 | : [envx]"r"((int)voice->envx)); | ||
868 | /* Reduce and apply left and right volume */ | ||
869 | asm volatile ( | ||
870 | "mov %[out], %[out], asr #11 \r\n" | ||
871 | "mul %[amp_0], %[out], %[vvol_0] \r\n" | ||
872 | "mul %[amp_1], %[out], %[vvol_1] \r\n" | ||
873 | : [out]"+r"(output), | ||
874 | [amp_0]"=&r"(amp_0), [amp_1]"=r"(amp_1) | ||
875 | : [vvol_0]"r"((int)voice->volume[0]), | ||
876 | [vvol_1]"r"((int)voice->volume[1])); | ||
877 | |||
878 | raw_voice->outx = output >> 8; | ||
879 | #endif /* ARM_ARCH */ | ||
880 | #else /* Unoptimized CPU */ | ||
881 | int output = (((fwd [0] * interp [0] + | ||
882 | fwd [1] * interp [1] + | ||
883 | rev [1] * interp [2] + | ||
884 | rev [0] * interp [3] ) >> 11) * voice->envx) >> 11; | ||
885 | |||
886 | /* duplicated here to give compiler more to run in parallel */ | ||
887 | amp_0 = voice->volume [0] * output; | ||
888 | amp_1 = voice->volume [1] * output; | ||
889 | |||
890 | raw_voice->outx = output >> 8; | ||
891 | #endif /* CPU_* */ | ||
892 | } | ||
893 | else /* slow gaussian */ | ||
894 | { | ||
895 | #if defined(CPU_ARM) | ||
896 | #if ARM_ARCH >= 6 | ||
897 | int output = *(int16_t*) &this->noise; | ||
898 | |||
899 | if ( !(this->r.g.noise_enables & vbit) ) | ||
900 | { | ||
901 | /* Interpolate */ | ||
902 | int _2, _3; | ||
903 | asm volatile ( | ||
904 | /* NOTE: often-unaligned accesses */ | ||
905 | "ldr %[_0], [%[interp]] \r\n" /* _0=i0i1 */ | ||
906 | "ldr %[_2], [%[fwd]] \r\n" /* _2=f0f1 */ | ||
907 | "ldr %[_1], [%[interp], #4] \r\n" /* _1=i2i3 */ | ||
908 | "ldr %[_3], [%[rev]] \r\n" /* _3=f2f3 */ | ||
909 | "smulbb %[out], %[_0], %[_2] \r\n" /* out=f0*i0 */ | ||
910 | "smultt %[_0], %[_0], %[_2] \r\n" /* _0=f1*i1 */ | ||
911 | "smulbt %[_2], %[_1], %[_3] \r\n" /* _2=r1*i2 */ | ||
912 | "smultb %[_3], %[_1], %[_3] \r\n" /* _3=r0*i3 */ | ||
913 | : [out]"=r"(output), | ||
914 | [_0]"=&r"(amp_0), [_1]"=&r"(amp_1), | ||
915 | [_2]"=&r"(_2), [_3]"=r"(_3) | ||
916 | : [fwd]"r"(fwd), [rev]"r"(rev), | ||
917 | [interp]"r"(interp)); | ||
918 | asm volatile ( | ||
919 | "mov %[out], %[out], asr#12 \r\n" | ||
920 | "add %[_0], %[out], %[_0], asr #12 \r\n" | ||
921 | "add %[_2], %[_0], %[_2], asr #12 \r\n" | ||
922 | "pkhbt %[_0], %[_2], %[_3], asl #4 \r\n" /* _3[31:16], _2[15:0] */ | ||
923 | "sadd16 %[_0], %[_0], %[_0] \r\n" /* _3[31:16]*2, _2[15:0]*2 */ | ||
924 | "qsubaddx %[out], %[_0], %[_0] \r\n" /* out[15:0]= | ||
925 | * sat16(_3[31:16]+_2[15:0]) */ | ||
926 | : [out]"+r"(output), | ||
927 | [_0]"+r"(amp_0), [_2]"+r"(_2), [_3]"+r"(_3)); | ||
928 | } | ||
929 | /* Apply voice envelope */ | ||
930 | asm volatile ( | ||
931 | "smulbb %[out], %[out], %[envx] \r\n" | ||
932 | : [out]"+r"(output) | ||
933 | : [envx]"r"(voice->envx)); | ||
934 | /* Reduce and apply left and right volume */ | ||
935 | asm volatile ( | ||
936 | "mov %[out], %[out], asr #11 \r\n" | ||
937 | "bic %[out], %[out], #0x1 \r\n" | ||
938 | "mul %[amp_0], %[out], %[vvol_0] \r\n" | ||
939 | "mul %[amp_1], %[out], %[vvol_1] \r\n" | ||
940 | : [out]"+r"(output), | ||
941 | [amp_0]"=&r"(amp_0), [amp_1]"=r"(amp_1) | ||
942 | : [vvol_0]"r"((int)voice->volume[0]), | ||
943 | [vvol_1]"r"((int)voice->volume[1])); | ||
944 | |||
945 | prev_outx = output; | ||
946 | raw_voice->outx = output >> 8; | ||
947 | #else /* ARM_ARCH < 6 */ | ||
948 | int output = *(int16_t*) &this->noise; | ||
949 | |||
950 | if ( !(this->r.g.noise_enables & vbit) ) | ||
951 | { | ||
952 | /* Interpolate */ | ||
953 | int _2, _3; | ||
954 | asm volatile ( | ||
955 | "ldrsh %[_0], [%[interp]] \r\n" | ||
956 | "ldrsh %[_2], [%[fwd]] \r\n" | ||
957 | "ldrsh %[_1], [%[interp], #2] \r\n" | ||
958 | "ldrsh %[_3], [%[fwd], #2] \r\n" | ||
959 | "mul %[out], %[_2], %[_0] \r\n" /* fwd[0]*interp[0] */ | ||
960 | "ldrsh %[_2], [%[rev], #2] \r\n" | ||
961 | "mul %[_0], %[_3], %[_1] \r\n" /* fwd[1]*interp[1] */ | ||
962 | "ldrsh %[_1], [%[interp], #4] \r\n" | ||
963 | "mov %[out], %[out], asr #12 \r\n" | ||
964 | "ldrsh %[_3], [%[rev]] \r\n" | ||
965 | "mul %[_2], %[_1], %[_2] \r\n" /* rev[1]*interp[2] */ | ||
966 | "ldrsh %[_1], [%[interp], #6] \r\n" | ||
967 | "add %[_0], %[out], %[_0], asr #12 \r\n" | ||
968 | "mul %[_3], %[_1], %[_3] \r\n" /* rev[0]*interp[3] */ | ||
969 | "add %[_2], %[_0], %[_2], asr #12 \r\n" | ||
970 | "mov %[_2], %[_2], lsl #17 \r\n" | ||
971 | "mov %[_3], %[_3], asr #12 \r\n" | ||
972 | "mov %[_3], %[_3], asl #1 \r\n" | ||
973 | "add %[out], %[_3], %[_2], asr #16 \r\n" | ||
974 | : [out]"=&r"(output), | ||
975 | [_0]"=&r"(amp_0), [_1]"=&r"(amp_1), | ||
976 | [_2]"=&r"(_2), [_3]"=&r"(_3) | ||
977 | : [fwd]"r"(fwd), [rev]"r"(rev), | ||
978 | [interp]"r"(interp)); | ||
979 | |||
980 | output = CLAMP16(output); | ||
981 | } | ||
982 | /* Apply voice envelope */ | ||
983 | asm volatile ( | ||
984 | "mul %[_0], %[out], %[envx] \r\n" | ||
985 | : [_0]"=r"(amp_0) | ||
986 | : [out]"r"(output), [envx]"r"((int)voice->envx)); | ||
987 | /* Reduce and apply left and right volume */ | ||
988 | asm volatile ( | ||
989 | "mov %[out], %[amp_0], asr #11 \r\n" /* amp_0 = _0 */ | ||
990 | "bic %[out], %[out], #0x1 \r\n" | ||
991 | "mul %[amp_0], %[out], %[vvol_0] \r\n" | ||
992 | "mul %[amp_1], %[out], %[vvol_1] \r\n" | ||
993 | : [out]"+r"(output), | ||
994 | [amp_0]"+r"(amp_0), [amp_1]"=r"(amp_1) | ||
995 | : [vvol_0]"r"((int)voice->volume[0]), | ||
996 | [vvol_1]"r"((int)voice->volume[1])); | ||
997 | |||
998 | prev_outx = output; | ||
999 | raw_voice->outx = output >> 8; | ||
1000 | #endif /* ARM_ARCH >= 6 */ | ||
1001 | #else /* Unoptimized CPU */ | ||
1002 | int output = *(int16_t*) &this->noise; | ||
1003 | |||
1004 | if ( !(this->r.g.noise_enables & vbit) ) | ||
1005 | { | ||
1006 | output = (fwd [0] * interp [0]) & ~0xFFF; | ||
1007 | output = (output + fwd [1] * interp [1]) & ~0xFFF; | ||
1008 | output = (output + rev [1] * interp [2]) >> 12; | ||
1009 | output = (int16_t) (output * 2); | ||
1010 | output += ((rev [0] * interp [3]) >> 12) * 2; | ||
1011 | output = CLAMP16( output ); | ||
1012 | } | ||
1013 | output = (output * voice->envx) >> 11 & ~1; | ||
1014 | |||
1015 | /* duplicated here to give compiler more to run in parallel */ | ||
1016 | amp_0 = voice->volume [0] * output; | ||
1017 | amp_1 = voice->volume [1] * output; | ||
1018 | |||
1019 | prev_outx = output; | ||
1020 | raw_voice->outx = output >> 8; | ||
1021 | #endif /* CPU_* */ | ||
1022 | } | ||
1023 | #else /* SPCNOINTERP */ | ||
1024 | /* two-point linear interpolation */ | ||
1025 | #ifdef CPU_COLDFIRE | ||
1026 | int amp_0 = (int16_t)this->noise; | ||
1027 | int amp_1; | ||
1028 | |||
1029 | if ( (this->r.g.noise_enables & vbit) == 0 ) | ||
1030 | { | ||
1031 | uint32_t f = voice->position; | ||
1032 | int32_t y0; | ||
1033 | |||
1034 | /** | ||
1035 | * Formula (fastest found so far of MANY): | ||
1036 | * output = y0 + f*y1 - f*y0 | ||
1037 | */ | ||
1038 | asm volatile ( | ||
1039 | /* separate fractional and whole parts */ | ||
1040 | "move.l %[f], %[y1] \r\n" | ||
1041 | "and.l #0xfff, %[f] \r\n" | ||
1042 | "lsr.l %[sh], %[y1] \r\n" | ||
1043 | /* load samples y0 (upper) & y1 (lower) */ | ||
1044 | "move.l 2(%[s], %[y1].l*2), %[y1] \r\n" | ||
1045 | /* %acc0 = f*y1 */ | ||
1046 | "mac.w %[f]l, %[y1]l, %%acc0 \r\n" | ||
1047 | /* %acc0 -= f*y0 */ | ||
1048 | "msac.w %[f]l, %[y1]u, %%acc0 \r\n" | ||
1049 | /* separate out y0 and sign extend */ | ||
1050 | "swap %[y1] \r\n" | ||
1051 | "movea.w %[y1], %[y0] \r\n" | ||
1052 | /* fetch result, scale down and add y0 */ | ||
1053 | "movclr.l %%acc0, %[y1] \r\n" | ||
1054 | /* output = y0 + (result >> 12) */ | ||
1055 | "asr.l %[sh], %[y1] \r\n" | ||
1056 | "add.l %[y0], %[y1] \r\n" | ||
1057 | : [f]"+d"(f), [y0]"=&a"(y0), [y1]"=&d"(amp_0) | ||
1058 | : [s]"a"(voice->samples), [sh]"d"(12)); | ||
1059 | } | ||
1060 | |||
1061 | /* apply voice envelope to output */ | ||
1062 | asm volatile ( | ||
1063 | "mac.w %[out]l, %[envx]l, %%acc0 \r\n" | ||
1064 | : | ||
1065 | : [out]"r"(amp_0), [envx]"r"(voice->envx)); | ||
1066 | |||
1067 | /* advance voice position */ | ||
1068 | voice->position += rate; | ||
1069 | |||
1070 | /* fetch output, scale and apply left and right | ||
1071 | voice volume */ | ||
1072 | asm volatile ( | ||
1073 | "movclr.l %%acc0, %[out] \r\n" | ||
1074 | "asr.l %[sh], %[out] \r\n" | ||
1075 | "mac.l %[vvol_0], %[out], %%acc0 \r\n" | ||
1076 | "mac.l %[vvol_1], %[out], %%acc1 \r\n" | ||
1077 | : [out]"=&d"(amp_0) | ||
1078 | : [vvol_0]"r"((int)voice->volume[0]), | ||
1079 | [vvol_1]"r"((int)voice->volume[1]), | ||
1080 | [sh]"d"(11)); | ||
1081 | |||
1082 | /* save this output into previous, scale and save in | ||
1083 | output register */ | ||
1084 | prev_outx = amp_0; | ||
1085 | raw_voice->outx = amp_0 >> 8; | ||
1086 | |||
1087 | /* fetch final voice output */ | ||
1088 | asm volatile ( | ||
1089 | "movclr.l %%acc0, %[amp_0] \r\n" | ||
1090 | "movclr.l %%acc1, %[amp_1] \r\n" | ||
1091 | : [amp_0]"=r"(amp_0), [amp_1]"=r"(amp_1)); | ||
1092 | #elif defined (CPU_ARM) | ||
1093 | int amp_0, amp_1; | ||
1094 | |||
1095 | if ( (this->r.g.noise_enables & vbit) != 0 ) | ||
1096 | { | ||
1097 | amp_0 = *(int16_t *)&this->noise; | ||
1098 | } | ||
1099 | else | ||
1100 | { | ||
1101 | uint32_t f = voice->position; | ||
1102 | amp_0 = (uint32_t)voice->samples; | ||
1103 | |||
1104 | asm volatile( | ||
1105 | "mov %[y1], %[f], lsr #12 \r\n" | ||
1106 | "eor %[f], %[f], %[y1], lsl #12 \r\n" | ||
1107 | "add %[y1], %[y0], %[y1], lsl #1 \r\n" | ||
1108 | "ldrsh %[y0], [%[y1], #2] \r\n" | ||
1109 | "ldrsh %[y1], [%[y1], #4] \r\n" | ||
1110 | "sub %[y1], %[y1], %[y0] \r\n" | ||
1111 | "mul %[f], %[y1], %[f] \r\n" | ||
1112 | "add %[y0], %[y0], %[f], asr #12 \r\n" | ||
1113 | : [f]"+r"(f), [y0]"+r"(amp_0), [y1]"=&r"(amp_1)); | ||
1114 | } | ||
1115 | |||
1116 | voice->position += rate; | ||
1117 | |||
1118 | asm volatile( | ||
1119 | "mul %[amp_1], %[amp_0], %[envx] \r\n" | ||
1120 | "mov %[amp_0], %[amp_1], asr #11 \r\n" | ||
1121 | "mov %[amp_1], %[amp_0], asr #8 \r\n" | ||
1122 | : [amp_0]"+r"(amp_0), [amp_1]"=r"(amp_1) | ||
1123 | : [envx]"r"(voice->envx)); | ||
1124 | |||
1125 | prev_outx = amp_0; | ||
1126 | raw_voice->outx = (int8_t)amp_1; | ||
1127 | |||
1128 | asm volatile( | ||
1129 | "mul %[amp_1], %[amp_0], %[vol_1] \r\n" | ||
1130 | "mul %[amp_0], %[vol_0], %[amp_0] \r\n" | ||
1131 | : [amp_0]"+r"(amp_0), [amp_1]"=&r"(amp_1) | ||
1132 | : [vol_0]"r"((int)voice->volume[0]), | ||
1133 | [vol_1]"r"((int)voice->volume[1])); | ||
1134 | #else /* Unoptimized CPU */ | ||
1135 | int output; | ||
1136 | |||
1137 | if ( (this->r.g.noise_enables & vbit) == 0 ) | ||
1138 | { | ||
1139 | int const fraction = voice->position & 0xfff; | ||
1140 | short const* const pos = (voice->samples + (voice->position >> 12)) + 1; | ||
1141 | output = pos[0] + ((fraction * (pos[1] - pos[0])) >> 12); | ||
1142 | } else { | ||
1143 | output = *(int16_t *)&this->noise; | ||
1144 | } | ||
1145 | |||
1146 | voice->position += rate; | ||
1147 | |||
1148 | output = (output * voice->envx) >> 11; | ||
1149 | |||
1150 | /* duplicated here to give compiler more to run in parallel */ | ||
1151 | int amp_0 = voice->volume [0] * output; | ||
1152 | int amp_1 = voice->volume [1] * output; | ||
1153 | |||
1154 | prev_outx = output; | ||
1155 | raw_voice->outx = (int8_t) (output >> 8); | ||
1156 | #endif /* CPU_* */ | ||
1157 | #endif /* SPCNOINTERP */ | ||
1158 | |||
1159 | #if SPC_BRRCACHE | ||
1160 | if ( voice->position >= voice->wave_end ) | ||
1161 | { | ||
1162 | long loop_len = voice->wave_loop << 12; | ||
1163 | voice->position -= loop_len; | ||
1164 | this->r.g.wave_ended |= vbit; | ||
1165 | if ( !loop_len ) | ||
1166 | { | ||
1167 | this->keys_down ^= vbit; | ||
1168 | raw_voice->envx = 0; | ||
1169 | voice->envx = 0; | ||
1170 | } | ||
1171 | } | ||
1172 | #endif | ||
1173 | #if 0 | ||
1174 | EXIT_TIMER(dsp_gen); | ||
1175 | |||
1176 | ENTER_TIMER(dsp_mix); | ||
1177 | #endif | ||
1178 | chans_0 += amp_0; | ||
1179 | chans_1 += amp_1; | ||
1180 | #if !SPC_NOECHO | ||
1181 | if ( this->r.g.echo_ons & vbit ) | ||
1182 | { | ||
1183 | echo_0 += amp_0; | ||
1184 | echo_1 += amp_1; | ||
1185 | } | ||
1186 | #endif | ||
1187 | #if 0 | ||
1188 | EXIT_TIMER(dsp_mix); | ||
1189 | #endif | ||
1190 | } | ||
1191 | /* end of voice loop */ | ||
1192 | |||
1193 | #if !SPC_NOECHO | ||
1194 | #ifdef CPU_COLDFIRE | ||
1195 | /* Read feedback from echo buffer */ | ||
1196 | int echo_pos = this->echo_pos; | ||
1197 | uint8_t* const echo_ptr = RAM + ((echo_start + echo_pos) & 0xFFFF); | ||
1198 | echo_pos += 4; | ||
1199 | if ( echo_pos >= echo_wrap ) | ||
1200 | echo_pos = 0; | ||
1201 | this->echo_pos = echo_pos; | ||
1202 | int fb = swap_odd_even32(*(int32_t *)echo_ptr); | ||
1203 | int out_0, out_1; | ||
1204 | |||
1205 | /* Keep last 8 samples */ | ||
1206 | *this->last_fir_ptr = fb; | ||
1207 | this->last_fir_ptr = this->fir_ptr; | ||
1208 | |||
1209 | /* Apply echo FIR filter to output samples read from echo buffer - | ||
1210 | circular buffer is hardware incremented and masked; FIR | ||
1211 | coefficients and buffer history are loaded in parallel with | ||
1212 | multiply accumulate operations. Shift left by one here and once | ||
1213 | again when calculating feedback to have sample values justified | ||
1214 | to bit 31 in the output to ease endian swap, interleaving and | ||
1215 | clamping before placing result in the program's echo buffer. */ | ||
1216 | int _0, _1, _2; | ||
1217 | asm volatile ( | ||
1218 | "move.l (%[fir_c]) , %[_2] \r\n" | ||
1219 | "mac.w %[fb]u, %[_2]u, <<, (%[fir_p])+&, %[_0], %%acc0 \r\n" | ||
1220 | "mac.w %[fb]l, %[_2]u, <<, (%[fir_p])& , %[_1], %%acc1 \r\n" | ||
1221 | "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n" | ||
1222 | "mac.w %[_0]l, %[_2]l, <<, 4(%[fir_c]) , %[_2], %%acc1 \r\n" | ||
1223 | "mac.w %[_1]u, %[_2]u, <<, 4(%[fir_p])& , %[_0], %%acc0 \r\n" | ||
1224 | "mac.w %[_1]l, %[_2]u, <<, 8(%[fir_p])& , %[_1], %%acc1 \r\n" | ||
1225 | "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n" | ||
1226 | "mac.w %[_0]l, %[_2]l, <<, 8(%[fir_c]) , %[_2], %%acc1 \r\n" | ||
1227 | "mac.w %[_1]u, %[_2]u, <<, 12(%[fir_p])& , %[_0], %%acc0 \r\n" | ||
1228 | "mac.w %[_1]l, %[_2]u, <<, 16(%[fir_p])& , %[_1], %%acc1 \r\n" | ||
1229 | "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n" | ||
1230 | "mac.w %[_0]l, %[_2]l, <<, 12(%[fir_c]) , %[_2], %%acc1 \r\n" | ||
1231 | "mac.w %[_1]u, %[_2]u, <<, 20(%[fir_p])& , %[_0], %%acc0 \r\n" | ||
1232 | "mac.w %[_1]l, %[_2]u, << , %%acc1 \r\n" | ||
1233 | "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n" | ||
1234 | "mac.w %[_0]l, %[_2]l, << , %%acc1 \r\n" | ||
1235 | : [_0]"=&r"(_0), [_1]"=&r"(_1), [_2]"=&r"(_2), | ||
1236 | [fir_p]"+a"(this->fir_ptr) | ||
1237 | : [fir_c]"a"(this->fir_coeff), [fb]"r"(fb) | ||
1238 | ); | ||
1239 | |||
1240 | /* Generate output */ | ||
1241 | asm volatile ( | ||
1242 | /* fetch filter results _after_ gcc loads asm | ||
1243 | block parameters to eliminate emac stalls */ | ||
1244 | "movclr.l %%acc0, %[out_0] \r\n" | ||
1245 | "movclr.l %%acc1, %[out_1] \r\n" | ||
1246 | /* apply global volume */ | ||
1247 | "mac.l %[chans_0], %[gv_0] , %%acc2 \r\n" | ||
1248 | "mac.l %[chans_1], %[gv_1] , %%acc3 \r\n" | ||
1249 | /* apply echo volume and add to final output */ | ||
1250 | "mac.l %[ev_0], %[out_0], >>, %%acc2 \r\n" | ||
1251 | "mac.l %[ev_1], %[out_1], >>, %%acc3 \r\n" | ||
1252 | : [out_0]"=&r"(out_0), [out_1]"=&r"(out_1) | ||
1253 | : [chans_0]"r"(chans_0), [gv_0]"r"(global_vol_0), | ||
1254 | [ev_0]"r"((int)this->r.g.echo_volume_0), | ||
1255 | [chans_1]"r"(chans_1), [gv_1]"r"(global_vol_1), | ||
1256 | [ev_1]"r"((int)this->r.g.echo_volume_1) | ||
1257 | ); | ||
1258 | |||
1259 | /* Feedback into echo buffer */ | ||
1260 | if ( !(this->r.g.flags & 0x20) ) | ||
1261 | { | ||
1262 | int sh = 1 << 9; | ||
1263 | |||
1264 | asm volatile ( | ||
1265 | /* scale echo voices; saturate if overflow */ | ||
1266 | "mac.l %[sh], %[e1] , %%acc1 \r\n" | ||
1267 | "mac.l %[sh], %[e0] , %%acc0 \r\n" | ||
1268 | /* add scaled output from FIR filter */ | ||
1269 | "mac.l %[out_1], %[ef], <<, %%acc1 \r\n" | ||
1270 | "mac.l %[out_0], %[ef], <<, %%acc0 \r\n" | ||
1271 | /* swap and fetch feedback results - simply | ||
1272 | swap_odd_even32 mixed in between macs and | ||
1273 | movclrs to mitigate stall issues */ | ||
1274 | "move.l #0x00ff00ff, %[sh] \r\n" | ||
1275 | "movclr.l %%acc1, %[e1] \r\n" | ||
1276 | "swap %[e1] \r\n" | ||
1277 | "movclr.l %%acc0, %[e0] \r\n" | ||
1278 | "move.w %[e1], %[e0] \r\n" | ||
1279 | "and.l %[e0], %[sh] \r\n" | ||
1280 | "eor.l %[sh], %[e0] \r\n" | ||
1281 | "lsl.l #8, %[sh] \r\n" | ||
1282 | "lsr.l #8, %[e0] \r\n" | ||
1283 | "or.l %[sh], %[e0] \r\n" | ||
1284 | /* save final feedback into echo buffer */ | ||
1285 | "move.l %[e0], (%[echo_ptr]) \r\n" | ||
1286 | : [e0]"+d"(echo_0), [e1]"+d"(echo_1), [sh]"+d"(sh) | ||
1287 | : [out_0]"r"(out_0), [out_1]"r"(out_1), | ||
1288 | [ef]"r"((int)this->r.g.echo_feedback), | ||
1289 | [echo_ptr]"a"((int32_t *)echo_ptr) | ||
1290 | ); | ||
1291 | } | ||
1292 | |||
1293 | /* Output final samples */ | ||
1294 | asm volatile ( | ||
1295 | /* fetch output saved in %acc2 and %acc3 */ | ||
1296 | "movclr.l %%acc2, %[out_0] \r\n" | ||
1297 | "movclr.l %%acc3, %[out_1] \r\n" | ||
1298 | /* scale right by global_muting shift */ | ||
1299 | "asr.l %[gm], %[out_0] \r\n" | ||
1300 | "asr.l %[gm], %[out_1] \r\n" | ||
1301 | : [out_0]"=&d"(out_0), [out_1]"=&d"(out_1) | ||
1302 | : [gm]"d"(global_muting) | ||
1303 | ); | ||
1304 | |||
1305 | out_buf [ 0] = out_0; | ||
1306 | out_buf [WAV_CHUNK_SIZE] = out_1; | ||
1307 | out_buf ++; | ||
1308 | #elif defined (CPU_ARM) | ||
1309 | /* Read feedback from echo buffer */ | ||
1310 | int echo_pos = this->echo_pos; | ||
1311 | uint8_t* const echo_ptr = RAM + | ||
1312 | ((this->r.g.echo_page * 0x100 + echo_pos) & 0xFFFF); | ||
1313 | echo_pos += 4; | ||
1314 | if ( echo_pos >= (this->r.g.echo_delay & 15) * 0x800 ) | ||
1315 | echo_pos = 0; | ||
1316 | this->echo_pos = echo_pos; | ||
1317 | |||
1318 | #if ARM_ARCH >= 6 | ||
1319 | int32_t *fir_ptr, *fir_coeff; | ||
1320 | int fb_0, fb_1; | ||
1321 | |||
1322 | /* Apply FIR */ | ||
1323 | |||
1324 | /* Keep last 8 samples */ | ||
1325 | asm volatile ( | ||
1326 | "ldr %[fb_0], [%[echo_p]] \r\n" | ||
1327 | "add %[fir_p], %[t_fir_p], #4 \r\n" | ||
1328 | "bic %[t_fir_p], %[fir_p], %[mask] \r\n" | ||
1329 | "str %[fb_0], [%[fir_p], #-4] \r\n" | ||
1330 | /* duplicate at +8 eliminates wrap checking below */ | ||
1331 | "str %[fb_0], [%[fir_p], #28] \r\n" | ||
1332 | : [fir_p]"=&r"(fir_ptr), [t_fir_p]"+r"(this->fir_ptr), | ||
1333 | [fb_0]"=&r"(fb_0) | ||
1334 | : [echo_p]"r"(echo_ptr), [mask]"i"(~FIR_BUF_MASK)); | ||
1335 | |||
1336 | fir_coeff = (int32_t *)this->fir_coeff; | ||
1337 | |||
1338 | /* Fugly, but the best version found. */ | ||
1339 | int _0; | ||
1340 | asm volatile ( /* L0R0 = acc0 */ | ||
1341 | "ldmia %[fir_p]!, { r2-r5 } \r\n" /* L1R1-L4R4 = r2-r5 */ | ||
1342 | "ldmia %[fir_c]!, { r0-r1 } \r\n" /* C0C1-C2C3 = r0-r1 */ | ||
1343 | "pkhbt %[_0], %[acc0], r2, asl #16 \r\n" /* L0R0,L1R1->L0L1,R0R1 */ | ||
1344 | "pkhtb r2, r2, %[acc0], asr #16 \r\n" | ||
1345 | "smuad %[acc0], %[_0], r0 \r\n" /* acc0=L0*C0+L1*C1 */ | ||
1346 | "smuad %[acc1], r2, r0 \r\n" /* acc1=R0*C0+R1*C1 */ | ||
1347 | "pkhbt %[_0], r3, r4, asl #16 \r\n" /* L2R2,L3R3->L2L3,R2R3 */ | ||
1348 | "pkhtb r4, r4, r3, asr #16 \r\n" | ||
1349 | "smlad %[acc0], %[_0], r1, %[acc0] \r\n" /* acc0+=L2*C2+L3*C3 */ | ||
1350 | "smlad %[acc1], r4, r1, %[acc1] \r\n" /* acc1+=R2*C2+R3*C3 */ | ||
1351 | "ldmia %[fir_p], { r2-r4 } \r\n" /* L5R5-L7R7 = r2-r4 */ | ||
1352 | "ldmia %[fir_c], { r0-r1 } \r\n" /* C4C5-C6C7 = r0-r1 */ | ||
1353 | "pkhbt %[_0], r5, r2, asl #16 \r\n" /* L4R4,L5R5->L4L5,R4R5 */ | ||
1354 | "pkhtb r2, r2, r5, asr #16 \r\n" | ||
1355 | "smlad %[acc0], %[_0], r0, %[acc0] \r\n" /* acc0+=L4*C4+L5*C5 */ | ||
1356 | "smlad %[acc1], r2, r0, %[acc1] \r\n" /* acc1+=R4*C4+R5*C5 */ | ||
1357 | "pkhbt %[_0], r3, r4, asl #16 \r\n" /* L6R6,L7R7->L6L7,R6R7 */ | ||
1358 | "pkhtb r4, r4, r3, asr #16 \r\n" | ||
1359 | "smlad %[acc0], %[_0], r1, %[acc0] \r\n" /* acc0+=L6*C6+L7*C7 */ | ||
1360 | "smlad %[acc1], r4, r1, %[acc1] \r\n" /* acc1+=R6*C6+R7*C7 */ | ||
1361 | : [acc0]"+r"(fb_0), [acc1]"=&r"(fb_1), [_0]"=&r"(_0), | ||
1362 | [fir_p]"+r"(fir_ptr), [fir_c]"+r"(fir_coeff) | ||
1363 | : | ||
1364 | : "r0", "r1", "r2", "r3", "r4", "r5"); | ||
1365 | |||
1366 | /* Generate output */ | ||
1367 | int amp_0, amp_1; | ||
1368 | |||
1369 | asm volatile ( | ||
1370 | "mul %[amp_0], %[gvol_0], %[chans_0] \r\n" | ||
1371 | "mul %[amp_1], %[gvol_1], %[chans_1] \r\n" | ||
1372 | : [amp_0]"=&r"(amp_0), [amp_1]"=r"(amp_1) | ||
1373 | : [gvol_0]"r"(global_vol_0), [gvol_1]"r"(global_vol_1), | ||
1374 | [chans_0]"r"(chans_0), [chans_1]"r"(chans_1)); | ||
1375 | asm volatile ( | ||
1376 | "mla %[amp_0], %[fb_0], %[ev_0], %[amp_0] \r\n" | ||
1377 | "mla %[amp_1], %[fb_1], %[ev_1], %[amp_1] \r\n" | ||
1378 | : [amp_0]"+r"(amp_0), [amp_1]"+r"(amp_1) | ||
1379 | : [fb_0]"r"(fb_0), [fb_1]"r"(fb_1), | ||
1380 | [ev_0]"r"((int)this->r.g.echo_volume_0), | ||
1381 | [ev_1]"r"((int)this->r.g.echo_volume_1)); | ||
1382 | |||
1383 | out_buf [ 0] = amp_0 >> global_muting; | ||
1384 | out_buf [WAV_CHUNK_SIZE] = amp_1 >> global_muting; | ||
1385 | out_buf ++; | ||
1386 | |||
1387 | if ( !(this->r.g.flags & 0x20) ) | ||
1388 | { | ||
1389 | /* Feedback into echo buffer */ | ||
1390 | int e0, e1; | ||
1391 | |||
1392 | asm volatile ( | ||
1393 | "mov %[e0], %[echo_0], asl #7 \r\n" | ||
1394 | "mov %[e1], %[echo_1], asl #7 \r\n" | ||
1395 | "mla %[e0], %[fb_0], %[efb], %[e0] \r\n" | ||
1396 | "mla %[e1], %[fb_1], %[efb], %[e1] \r\n" | ||
1397 | : [e0]"=&r"(e0), [e1]"=&r"(e1) | ||
1398 | : [echo_0]"r"(echo_0), [echo_1]"r"(echo_1), | ||
1399 | [fb_0]"r"(fb_0), [fb_1]"r"(fb_1), | ||
1400 | [efb]"r"((int)this->r.g.echo_feedback)); | ||
1401 | asm volatile ( | ||
1402 | "ssat %[e0], #16, %[e0], asr #14 \r\n" | ||
1403 | "ssat %[e1], #16, %[e1], asr #14 \r\n" | ||
1404 | "pkhbt %[e0], %[e0], %[e1], lsl #16 \r\n" | ||
1405 | "str %[e0], [%[echo_p]] \r\n" | ||
1406 | : [e0]"+r"(e0), [e1]"+r"(e1) | ||
1407 | : [echo_p]"r"(echo_ptr)); | ||
1408 | } | ||
1409 | #else /* ARM_ARCH < 6 */ | ||
1410 | int fb_0 = GET_LE16SA( echo_ptr ); | ||
1411 | int fb_1 = GET_LE16SA( echo_ptr + 2 ); | ||
1412 | int32_t *fir_ptr, *fir_coeff; | ||
1413 | |||
1414 | /* Keep last 8 samples */ | ||
1415 | |||
1416 | /* Apply FIR */ | ||
1417 | asm volatile ( | ||
1418 | "add %[fir_p], %[t_fir_p], #8 \r\n" | ||
1419 | "bic %[t_fir_p], %[fir_p], %[mask] \r\n" | ||
1420 | "str %[fb_0], [%[fir_p], #-8] \r\n" | ||
1421 | "str %[fb_1], [%[fir_p], #-4] \r\n" | ||
1422 | /* duplicate at +8 eliminates wrap checking below */ | ||
1423 | "str %[fb_0], [%[fir_p], #56] \r\n" | ||
1424 | "str %[fb_1], [%[fir_p], #60] \r\n" | ||
1425 | : [fir_p]"=&r"(fir_ptr), [t_fir_p]"+r"(this->fir_ptr) | ||
1426 | : [fb_0]"r"(fb_0), [fb_1]"r"(fb_1), [mask]"i"(~FIR_BUF_MASK)); | ||
1427 | |||
1428 | fir_coeff = this->fir_coeff; | ||
1429 | |||
1430 | asm volatile ( | ||
1431 | "ldmia %[fir_c]!, { r0-r1 } \r\n" | ||
1432 | "ldmia %[fir_p]!, { r4-r5 } \r\n" | ||
1433 | "mul %[fb_0], r0, %[fb_0] \r\n" | ||
1434 | "mul %[fb_1], r0, %[fb_1] \r\n" | ||
1435 | "mla %[fb_0], r4, r1, %[fb_0] \r\n" | ||
1436 | "mla %[fb_1], r5, r1, %[fb_1] \r\n" | ||
1437 | "ldmia %[fir_c]!, { r0-r1 } \r\n" | ||
1438 | "ldmia %[fir_p]!, { r2-r5 } \r\n" | ||
1439 | "mla %[fb_0], r2, r0, %[fb_0] \r\n" | ||
1440 | "mla %[fb_1], r3, r0, %[fb_1] \r\n" | ||
1441 | "mla %[fb_0], r4, r1, %[fb_0] \r\n" | ||
1442 | "mla %[fb_1], r5, r1, %[fb_1] \r\n" | ||
1443 | "ldmia %[fir_c]!, { r0-r1 } \r\n" | ||
1444 | "ldmia %[fir_p]!, { r2-r5 } \r\n" | ||
1445 | "mla %[fb_0], r2, r0, %[fb_0] \r\n" | ||
1446 | "mla %[fb_1], r3, r0, %[fb_1] \r\n" | ||
1447 | "mla %[fb_0], r4, r1, %[fb_0] \r\n" | ||
1448 | "mla %[fb_1], r5, r1, %[fb_1] \r\n" | ||
1449 | "ldmia %[fir_c]!, { r0-r1 } \r\n" | ||
1450 | "ldmia %[fir_p]!, { r2-r5 } \r\n" | ||
1451 | "mla %[fb_0], r2, r0, %[fb_0] \r\n" | ||
1452 | "mla %[fb_1], r3, r0, %[fb_1] \r\n" | ||
1453 | "mla %[fb_0], r4, r1, %[fb_0] \r\n" | ||
1454 | "mla %[fb_1], r5, r1, %[fb_1] \r\n" | ||
1455 | : [fb_0]"+r"(fb_0), [fb_1]"+r"(fb_1), | ||
1456 | [fir_p]"+r"(fir_ptr), [fir_c]"+r"(fir_coeff) | ||
1457 | : | ||
1458 | : "r0", "r1", "r2", "r3", "r4", "r5"); | ||
1459 | |||
1460 | /* Generate output */ | ||
1461 | int amp_0 = (chans_0 * global_vol_0 + fb_0 * this->r.g.echo_volume_0) | ||
1462 | >> global_muting; | ||
1463 | int amp_1 = (chans_1 * global_vol_1 + fb_1 * this->r.g.echo_volume_1) | ||
1464 | >> global_muting; | ||
1465 | |||
1466 | out_buf [ 0] = amp_0; | ||
1467 | out_buf [WAV_CHUNK_SIZE] = amp_1; | ||
1468 | out_buf ++; | ||
1469 | |||
1470 | if ( !(this->r.g.flags & 0x20) ) | ||
1471 | { | ||
1472 | /* Feedback into echo buffer */ | ||
1473 | int e0 = (echo_0 >> 7) + ((fb_0 * this->r.g.echo_feedback) >> 14); | ||
1474 | int e1 = (echo_1 >> 7) + ((fb_1 * this->r.g.echo_feedback) >> 14); | ||
1475 | e0 = CLAMP16( e0 ); | ||
1476 | SET_LE16A( echo_ptr , e0 ); | ||
1477 | e1 = CLAMP16( e1 ); | ||
1478 | SET_LE16A( echo_ptr + 2, e1 ); | ||
1479 | } | ||
1480 | #endif /* ARM_ARCH */ | ||
1481 | #else /* Unoptimized CPU */ | ||
1482 | /* Read feedback from echo buffer */ | ||
1483 | int echo_pos = this->echo_pos; | ||
1484 | uint8_t* const echo_ptr = RAM + | ||
1485 | ((this->r.g.echo_page * 0x100 + echo_pos) & 0xFFFF); | ||
1486 | echo_pos += 4; | ||
1487 | if ( echo_pos >= (this->r.g.echo_delay & 15) * 0x800 ) | ||
1488 | echo_pos = 0; | ||
1489 | this->echo_pos = echo_pos; | ||
1490 | int fb_0 = GET_LE16SA( echo_ptr ); | ||
1491 | int fb_1 = GET_LE16SA( echo_ptr + 2 ); | ||
1492 | |||
1493 | /* Keep last 8 samples */ | ||
1494 | int (* const fir_ptr) [2] = this->fir_buf + this->fir_pos; | ||
1495 | this->fir_pos = (this->fir_pos + 1) & (FIR_BUF_HALF - 1); | ||
1496 | fir_ptr [ 0] [0] = fb_0; | ||
1497 | fir_ptr [ 0] [1] = fb_1; | ||
1498 | /* duplicate at +8 eliminates wrap checking below */ | ||
1499 | fir_ptr [FIR_BUF_HALF] [0] = fb_0; | ||
1500 | fir_ptr [FIR_BUF_HALF] [1] = fb_1; | ||
1501 | |||
1502 | /* Apply FIR */ | ||
1503 | fb_0 *= this->fir_coeff [0]; | ||
1504 | fb_1 *= this->fir_coeff [0]; | ||
1505 | |||
1506 | #define DO_PT( i )\ | ||
1507 | fb_0 += fir_ptr [i] [0] * this->fir_coeff [i];\ | ||
1508 | fb_1 += fir_ptr [i] [1] * this->fir_coeff [i]; | ||
1509 | |||
1510 | DO_PT( 1 ) | ||
1511 | DO_PT( 2 ) | ||
1512 | DO_PT( 3 ) | ||
1513 | DO_PT( 4 ) | ||
1514 | DO_PT( 5 ) | ||
1515 | DO_PT( 6 ) | ||
1516 | DO_PT( 7 ) | ||
1517 | |||
1518 | /* Generate output */ | ||
1519 | int amp_0 = (chans_0 * global_vol_0 + fb_0 * this->r.g.echo_volume_0) | ||
1520 | >> global_muting; | ||
1521 | int amp_1 = (chans_1 * global_vol_1 + fb_1 * this->r.g.echo_volume_1) | ||
1522 | >> global_muting; | ||
1523 | out_buf [ 0] = amp_0; | ||
1524 | out_buf [WAV_CHUNK_SIZE] = amp_1; | ||
1525 | out_buf ++; | ||
1526 | |||
1527 | if ( !(this->r.g.flags & 0x20) ) | ||
1528 | { | ||
1529 | /* Feedback into echo buffer */ | ||
1530 | int e0 = (echo_0 >> 7) + ((fb_0 * this->r.g.echo_feedback) >> 14); | ||
1531 | int e1 = (echo_1 >> 7) + ((fb_1 * this->r.g.echo_feedback) >> 14); | ||
1532 | e0 = CLAMP16( e0 ); | ||
1533 | SET_LE16A( echo_ptr , e0 ); | ||
1534 | e1 = CLAMP16( e1 ); | ||
1535 | SET_LE16A( echo_ptr + 2, e1 ); | ||
1536 | } | ||
1537 | #endif /* CPU_* */ | ||
1538 | #else /* SPCNOECHO == 1*/ | ||
1539 | /* Generate output */ | ||
1540 | int amp_0 = (chans_0 * global_vol_0) >> global_muting; | ||
1541 | int amp_1 = (chans_1 * global_vol_1) >> global_muting; | ||
1542 | out_buf [ 0] = amp_0; | ||
1543 | out_buf [WAV_CHUNK_SIZE] = amp_1; | ||
1544 | out_buf ++; | ||
1545 | #endif /* SPCNOECHO */ | ||
1546 | } | ||
1547 | while ( --count ); | ||
1548 | #if 0 | ||
1549 | EXIT_TIMER(dsp); | ||
1550 | ENTER_TIMER(cpu); | ||
1551 | #endif | ||
1552 | } | ||
1553 | |||
1554 | void DSP_reset( struct Spc_Dsp* this ) | ||
1555 | { | ||
1556 | this->keys_down = 0; | ||
1557 | this->echo_pos = 0; | ||
1558 | this->noise_count = 0; | ||
1559 | this->noise = 2; | ||
1560 | |||
1561 | this->r.g.flags = 0xE0; /* reset, mute, echo off */ | ||
1562 | this->r.g.key_ons = 0; | ||
1563 | |||
1564 | ci->memset( this->voice_state, 0, sizeof this->voice_state ); | ||
1565 | |||
1566 | int i; | ||
1567 | for ( i = VOICE_COUNT; --i >= 0; ) | ||
1568 | { | ||
1569 | struct voice_t* v = this->voice_state + i; | ||
1570 | v->env_mode = state_release; | ||
1571 | v->addr = ram.ram; | ||
1572 | } | ||
1573 | |||
1574 | #if SPC_BRRCACHE | ||
1575 | this->oldsize = 0; | ||
1576 | for ( i = 0; i < 256; i++ ) | ||
1577 | this->wave_entry [i].start_addr = -1; | ||
1578 | #endif | ||
1579 | |||
1580 | #if defined(CPU_COLDFIRE) | ||
1581 | this->fir_ptr = fir_buf; | ||
1582 | this->last_fir_ptr = &fir_buf [7]; | ||
1583 | ci->memset( fir_buf, 0, sizeof fir_buf ); | ||
1584 | #elif defined (CPU_ARM) | ||
1585 | this->fir_ptr = fir_buf; | ||
1586 | ci->memset( fir_buf, 0, sizeof fir_buf ); | ||
1587 | #else | ||
1588 | this->fir_pos = 0; | ||
1589 | ci->memset( this->fir_buf, 0, sizeof this->fir_buf ); | ||
1590 | #endif | ||
1591 | |||
1592 | assert( offsetof (struct globals_t,unused9 [2]) == REGISTER_COUNT ); | ||
1593 | assert( sizeof (this->r.voice) == REGISTER_COUNT ); | ||
1594 | } | ||