diff options
Diffstat (limited to 'lib/rbcodec/codecs/libspc/spc_dsp.c')
-rw-r--r-- | lib/rbcodec/codecs/libspc/spc_dsp.c | 1733 |
1 files changed, 537 insertions, 1196 deletions
diff --git a/lib/rbcodec/codecs/libspc/spc_dsp.c b/lib/rbcodec/codecs/libspc/spc_dsp.c index 6350c4c331..c94fbc990e 100644 --- a/lib/rbcodec/codecs/libspc/spc_dsp.c +++ b/lib/rbcodec/codecs/libspc/spc_dsp.c | |||
@@ -27,15 +27,103 @@ | |||
27 | #include "spc_codec.h" | 27 | #include "spc_codec.h" |
28 | #include "spc_profiler.h" | 28 | #include "spc_profiler.h" |
29 | 29 | ||
30 | #if defined(CPU_COLDFIRE) || defined (CPU_ARM) | 30 | #define CLAMP16( n ) clip_sample_16( n ) |
31 | int32_t fir_buf[FIR_BUF_CNT] IBSS_ATTR_SPC | 31 | |
32 | __attribute__((aligned(FIR_BUF_ALIGN*1))); | 32 | #if defined(CPU_ARM) |
33 | #if ARM_ARCH >= 6 | ||
34 | #include "cpu/spc_dsp_armv6.c" | ||
35 | #else | ||
36 | #include "cpu/spc_dsp_armv4.c" | ||
33 | #endif | 37 | #endif |
34 | #if SPC_BRRCACHE | 38 | #elif defined (CPU_COLDFIRE) |
35 | /* a little extra for samples that go past end */ | 39 | #include "cpu/spc_dsp_coldfire.c" |
36 | int16_t BRRcache [BRR_CACHE_SIZE] CACHEALIGN_ATTR; | ||
37 | #endif | 40 | #endif |
38 | 41 | ||
42 | /* Above may still use generic implementations. Also defines final | ||
43 | function names. */ | ||
44 | #include "spc_dsp_generic.c" | ||
45 | |||
46 | /* each rate divides exactly into 0x7800 without remainder */ | ||
47 | static unsigned short const env_rates [0x20] ICONST_ATTR_SPC = | ||
48 | { | ||
49 | 0x0000, 0x000F, 0x0014, 0x0018, 0x001E, 0x0028, 0x0030, 0x003C, | ||
50 | 0x0050, 0x0060, 0x0078, 0x00A0, 0x00C0, 0x00F0, 0x0140, 0x0180, | ||
51 | 0x01E0, 0x0280, 0x0300, 0x03C0, 0x0500, 0x0600, 0x0780, 0x0A00, | ||
52 | 0x0C00, 0x0F00, 0x1400, 0x1800, 0x1E00, 0x2800, 0x3C00, 0x7800 | ||
53 | }; | ||
54 | |||
55 | #if !SPC_NOINTERP | ||
56 | /* Interleved gauss table (to improve cache coherency). */ | ||
57 | /* gauss [i * 2 + j] = normal_gauss [(1 - j) * 256 + i] */ | ||
58 | static int16_t const gauss_table [512] ICONST_ATTR_SPC MEM_ALIGN_ATTR = | ||
59 | { | ||
60 | 370,1305, 366,1305, 362,1304, 358,1304, | ||
61 | 354,1304, 351,1304, 347,1304, 343,1303, | ||
62 | 339,1303, 336,1303, 332,1302, 328,1302, | ||
63 | 325,1301, 321,1300, 318,1300, 314,1299, | ||
64 | 311,1298, 307,1297, 304,1297, 300,1296, | ||
65 | 297,1295, 293,1294, 290,1293, 286,1292, | ||
66 | 283,1291, 280,1290, 276,1288, 273,1287, | ||
67 | 270,1286, 267,1284, 263,1283, 260,1282, | ||
68 | 257,1280, 254,1279, 251,1277, 248,1275, | ||
69 | 245,1274, 242,1272, 239,1270, 236,1269, | ||
70 | 233,1267, 230,1265, 227,1263, 224,1261, | ||
71 | 221,1259, 218,1257, 215,1255, 212,1253, | ||
72 | 210,1251, 207,1248, 204,1246, 201,1244, | ||
73 | 199,1241, 196,1239, 193,1237, 191,1234, | ||
74 | 188,1232, 186,1229, 183,1227, 180,1224, | ||
75 | 178,1221, 175,1219, 173,1216, 171,1213, | ||
76 | 168,1210, 166,1207, 163,1205, 161,1202, | ||
77 | 159,1199, 156,1196, 154,1193, 152,1190, | ||
78 | 150,1186, 147,1183, 145,1180, 143,1177, | ||
79 | 141,1174, 139,1170, 137,1167, 134,1164, | ||
80 | 132,1160, 130,1157, 128,1153, 126,1150, | ||
81 | 124,1146, 122,1143, 120,1139, 118,1136, | ||
82 | 117,1132, 115,1128, 113,1125, 111,1121, | ||
83 | 109,1117, 107,1113, 106,1109, 104,1106, | ||
84 | 102,1102, 100,1098, 99,1094, 97,1090, | ||
85 | 95,1086, 94,1082, 92,1078, 90,1074, | ||
86 | 89,1070, 87,1066, 86,1061, 84,1057, | ||
87 | 83,1053, 81,1049, 80,1045, 78,1040, | ||
88 | 77,1036, 76,1032, 74,1027, 73,1023, | ||
89 | 71,1019, 70,1014, 69,1010, 67,1005, | ||
90 | 66,1001, 65, 997, 64, 992, 62, 988, | ||
91 | 61, 983, 60, 978, 59, 974, 58, 969, | ||
92 | 56, 965, 55, 960, 54, 955, 53, 951, | ||
93 | 52, 946, 51, 941, 50, 937, 49, 932, | ||
94 | 48, 927, 47, 923, 46, 918, 45, 913, | ||
95 | 44, 908, 43, 904, 42, 899, 41, 894, | ||
96 | 40, 889, 39, 884, 38, 880, 37, 875, | ||
97 | 36, 870, 36, 865, 35, 860, 34, 855, | ||
98 | 33, 851, 32, 846, 32, 841, 31, 836, | ||
99 | 30, 831, 29, 826, 29, 821, 28, 816, | ||
100 | 27, 811, 27, 806, 26, 802, 25, 797, | ||
101 | 24, 792, 24, 787, 23, 782, 23, 777, | ||
102 | 22, 772, 21, 767, 21, 762, 20, 757, | ||
103 | 20, 752, 19, 747, 19, 742, 18, 737, | ||
104 | 17, 732, 17, 728, 16, 723, 16, 718, | ||
105 | 15, 713, 15, 708, 15, 703, 14, 698, | ||
106 | 14, 693, 13, 688, 13, 683, 12, 678, | ||
107 | 12, 674, 11, 669, 11, 664, 11, 659, | ||
108 | 10, 654, 10, 649, 10, 644, 9, 640, | ||
109 | 9, 635, 9, 630, 8, 625, 8, 620, | ||
110 | 8, 615, 7, 611, 7, 606, 7, 601, | ||
111 | 6, 596, 6, 592, 6, 587, 6, 582, | ||
112 | 5, 577, 5, 573, 5, 568, 5, 563, | ||
113 | 4, 559, 4, 554, 4, 550, 4, 545, | ||
114 | 4, 540, 3, 536, 3, 531, 3, 527, | ||
115 | 3, 522, 3, 517, 2, 513, 2, 508, | ||
116 | 2, 504, 2, 499, 2, 495, 2, 491, | ||
117 | 2, 486, 1, 482, 1, 477, 1, 473, | ||
118 | 1, 469, 1, 464, 1, 460, 1, 456, | ||
119 | 1, 451, 1, 447, 1, 443, 1, 439, | ||
120 | 0, 434, 0, 430, 0, 426, 0, 422, | ||
121 | 0, 418, 0, 414, 0, 410, 0, 405, | ||
122 | 0, 401, 0, 397, 0, 393, 0, 389, | ||
123 | 0, 385, 0, 381, 0, 378, 0, 374, | ||
124 | }; | ||
125 | #endif /* !SPC_NOINTERP */ | ||
126 | |||
39 | void DSP_write( struct Spc_Dsp* this, int i, int data ) | 127 | void DSP_write( struct Spc_Dsp* this, int i, int data ) |
40 | { | 128 | { |
41 | assert( (unsigned) i < REGISTER_COUNT ); | 129 | assert( (unsigned) i < REGISTER_COUNT ); |
@@ -51,230 +139,395 @@ void DSP_write( struct Spc_Dsp* this, int i, int data ) | |||
51 | v->volume [0] = left; | 139 | v->volume [0] = left; |
52 | v->volume [1] = right; | 140 | v->volume [1] = right; |
53 | } | 141 | } |
142 | else if ( low < 4 ) /* voice rates */ | ||
143 | { | ||
144 | struct voice_t* v = this->voice_state + high; | ||
145 | v->rate = GET_LE16A( this->r.voice[high].rate ) & 0x3fff; | ||
146 | } | ||
147 | #if !SPC_NOECHO | ||
54 | else if ( low == 0x0F ) /* fir coefficients */ | 148 | else if ( low == 0x0F ) /* fir coefficients */ |
55 | { | 149 | { |
56 | this->fir_coeff [7 - high] = (int8_t) data; /* sign-extend */ | 150 | this->fir.coeff [7 - high] = (int8_t) data; /* sign-extend */ |
57 | } | 151 | } |
152 | #endif /* !SPC_NOECHO */ | ||
58 | } | 153 | } |
59 | 154 | ||
60 | #define CLAMP16( n ) clip_sample_16( n ) | 155 | /* Decode BRR block */ |
156 | static inline void | ||
157 | decode_brr_block( struct voice_t* voice, uint8_t const* addr, int16_t* out ) | ||
158 | { | ||
159 | /* header */ | ||
160 | unsigned block_header = *addr; | ||
161 | voice->wave.block_header = block_header; | ||
162 | |||
163 | /* point to next header */ | ||
164 | addr += 9; | ||
165 | voice->wave.addr = addr; | ||
166 | |||
167 | /* previous samples */ | ||
168 | int smp2 = out [0]; | ||
169 | int smp1 = out [1]; | ||
170 | |||
171 | int offset = -BRR_BLOCK_SIZE * 4; | ||
172 | |||
173 | #if !SPC_BRRCACHE | ||
174 | out [-(BRR_BLOCK_SIZE + 1)] = out [-1]; | ||
175 | |||
176 | /* if next block has end flag set, | ||
177 | this block ends early (verified) */ | ||
178 | if ( (block_header & 3) != 3 && (*addr & 3) == 1 ) | ||
179 | { | ||
180 | /* arrange for last 9 samples to be skipped */ | ||
181 | int const skip = 9; | ||
182 | out [skip - (BRR_BLOCK_SIZE + 1)] = out [-1]; | ||
183 | out += (skip & 1); | ||
184 | voice->wave.position += skip * 0x1000; | ||
185 | offset = (-BRR_BLOCK_SIZE + (skip & ~1)) * 4; | ||
186 | addr -= skip / 2; | ||
187 | /* force sample to end on next decode */ | ||
188 | voice->wave.block_header = 1; | ||
189 | } | ||
190 | #endif /* !SPC_BRRCACHE */ | ||
191 | |||
192 | int const filter = block_header & 0x0c; | ||
193 | int const scale = block_header >> 4; | ||
194 | |||
195 | if ( filter == 0x08 ) /* filter 2 (30-90% of the time) */ | ||
196 | { | ||
197 | /* y[n] = x[n] + 61/32 * y[n-1] - 15/16 * y[n-2] */ | ||
198 | do /* decode and filter 16 samples */ | ||
199 | { | ||
200 | /* Get nybble, sign-extend, then scale | ||
201 | get byte, select which nybble, sign-extend, then shift | ||
202 | based on scaling. */ | ||
203 | int delta = (int8_t)(addr [offset >> 3] << (offset & 4)) >> 4; | ||
204 | delta = (delta << scale) >> 1; | ||
205 | |||
206 | if (scale > 0xc) | ||
207 | delta = (delta >> 17) << 11; | ||
208 | |||
209 | out [offset >> 2] = smp2; | ||
210 | |||
211 | delta -= smp2 >> 1; | ||
212 | delta += smp2 >> 5; | ||
213 | delta += smp1; | ||
214 | delta += (-smp1 - (smp1 >> 1)) >> 5; | ||
215 | |||
216 | delta = CLAMP16( delta ); | ||
217 | smp2 = smp1; | ||
218 | smp1 = (int16_t) (delta * 2); /* sign-extend */ | ||
219 | } | ||
220 | while ( (offset += 4) != 0 ); | ||
221 | } | ||
222 | else if ( filter == 0x04 ) /* filter 1 */ | ||
223 | { | ||
224 | /* y[n] = x[n] + 15/16 * y[n-1] */ | ||
225 | do /* decode and filter 16 samples */ | ||
226 | { | ||
227 | /* Get nybble, sign-extend, then scale | ||
228 | get byte, select which nybble, sign-extend, then shift | ||
229 | based on scaling. */ | ||
230 | int delta = (int8_t)(addr [offset >> 3] << (offset & 4)) >> 4; | ||
231 | delta = (delta << scale) >> 1; | ||
232 | |||
233 | if (scale > 0xc) | ||
234 | delta = (delta >> 17) << 11; | ||
235 | |||
236 | out [offset >> 2] = smp2; | ||
237 | |||
238 | delta += smp1 >> 1; | ||
239 | delta += (-smp1) >> 5; | ||
240 | |||
241 | delta = CLAMP16( delta ); | ||
242 | smp2 = smp1; | ||
243 | smp1 = (int16_t) (delta * 2); /* sign-extend */ | ||
244 | } | ||
245 | while ( (offset += 4) != 0 ); | ||
246 | } | ||
247 | else if ( filter == 0x0c ) /* filter 3 */ | ||
248 | { | ||
249 | /* y[n] = x[n] + 115/64 * y[n-1] - 13/16 * y[n-2] */ | ||
250 | do /* decode and filter 16 samples */ | ||
251 | { | ||
252 | /* Get nybble, sign-extend, then scale | ||
253 | get byte, select which nybble, sign-extend, then shift | ||
254 | based on scaling. */ | ||
255 | int delta = (int8_t)(addr [offset >> 3] << (offset & 4)) >> 4; | ||
256 | delta = (delta << scale) >> 1; | ||
257 | |||
258 | if (scale > 0xc) | ||
259 | delta = (delta >> 17) << 11; | ||
260 | |||
261 | out [offset >> 2] = smp2; | ||
262 | |||
263 | delta -= smp2 >> 1; | ||
264 | delta += (smp2 + (smp2 >> 1)) >> 4; | ||
265 | delta += smp1; | ||
266 | delta += (-smp1 * 13) >> 7; | ||
267 | |||
268 | delta = CLAMP16( delta ); | ||
269 | smp2 = smp1; | ||
270 | smp1 = (int16_t) (delta * 2); /* sign-extend */ | ||
271 | } | ||
272 | while ( (offset += 4) != 0 ); | ||
273 | } | ||
274 | else /* filter 0 */ | ||
275 | { | ||
276 | /* y[n] = x[n] */ | ||
277 | do /* decode and filter 16 samples */ | ||
278 | { | ||
279 | /* Get nybble, sign-extend, then scale | ||
280 | get byte, select which nybble, sign-extend, then shift | ||
281 | based on scaling. */ | ||
282 | int delta = (int8_t)(addr [offset >> 3] << (offset & 4)) >> 4; | ||
283 | delta = (delta << scale) >> 1; | ||
284 | |||
285 | if (scale > 0xc) | ||
286 | delta = (delta >> 17) << 11; | ||
287 | |||
288 | out [offset >> 2] = smp2; | ||
289 | |||
290 | smp2 = smp1; | ||
291 | smp1 = delta * 2; | ||
292 | } | ||
293 | while ( (offset += 4) != 0 ); | ||
294 | } | ||
61 | 295 | ||
62 | #if SPC_BRRCACHE | 296 | #if SPC_BRRCACHE |
63 | static void decode_brr( struct Spc_Dsp* this, unsigned start_addr, | 297 | if ( !(block_header & 1) ) |
64 | struct voice_t* voice, | 298 | { |
65 | struct raw_voice_t const* const raw_voice ) ICODE_ATTR_SPC; | 299 | /* save to end of next block (for next call) */ |
66 | static void decode_brr( struct Spc_Dsp* this, unsigned start_addr, | 300 | out [BRR_BLOCK_SIZE ] = smp2; |
67 | struct voice_t* voice, | 301 | out [BRR_BLOCK_SIZE + 1] = smp1; |
68 | struct raw_voice_t const* const raw_voice ) | 302 | } |
303 | else | ||
304 | #endif /* SPC_BRRCACHE */ | ||
305 | { | ||
306 | /* save to end of this block */ | ||
307 | out [0] = smp2; | ||
308 | out [1] = smp1; | ||
309 | } | ||
310 | } | ||
311 | |||
312 | #if SPC_BRRCACHE | ||
313 | static void NO_INLINE ICODE_ATTR_SPC | ||
314 | brr_decode_cache( struct Spc_Dsp* this, struct src_dir const* sd, | ||
315 | unsigned start_addr, struct voice_t* voice, | ||
316 | struct raw_voice_t const* raw_voice ) | ||
69 | { | 317 | { |
70 | /* setup same variables as where decode_brr() is called from */ | 318 | /* a little extra for samples that go past end */ |
71 | #undef RAM | 319 | static int16_t BRRcache [BRR_CACHE_SIZE] CACHEALIGN_ATTR; |
72 | #define RAM ram.ram | 320 | |
321 | DEBUGF( "decode at %08x (wave #%d)\n", | ||
322 | start_addr, raw_voice->waveform ); | ||
73 | 323 | ||
74 | struct src_dir const* const sd = | ||
75 | &ram.sd[this->r.g.wave_page * 0x100/sizeof(struct src_dir)]; | ||
76 | struct cache_entry_t* const wave_entry = | 324 | struct cache_entry_t* const wave_entry = |
77 | &this->wave_entry [raw_voice->waveform]; | 325 | &this->wave_entry [raw_voice->waveform]; |
78 | 326 | ||
79 | /* the following block can be put in place of the call to | 327 | wave_entry->start_addr = start_addr; |
80 | decode_brr() below | 328 | |
81 | */ | 329 | uint8_t const* const loop_ptr = |
330 | ram.ram + letoh16( sd [raw_voice->waveform].loop ); | ||
331 | |||
332 | int16_t* loop_start = NULL; | ||
333 | |||
334 | uint8_t const* addr = ram.ram + start_addr; | ||
335 | |||
336 | int16_t* out = BRRcache + start_addr * 2; | ||
337 | wave_entry->samples = out; | ||
338 | |||
339 | /* BRR filter uses previous samples */ | ||
340 | out [BRR_BLOCK_SIZE + 1] = 0; | ||
341 | out [BRR_BLOCK_SIZE + 2] = 0; | ||
342 | *out++ = 0; | ||
343 | |||
344 | unsigned block_header; | ||
345 | |||
346 | do | ||
347 | { | ||
348 | if ( addr == loop_ptr ) | ||
349 | { | ||
350 | loop_start = out; | ||
351 | DEBUGF( "loop at %08lx (wave #%d)\n", | ||
352 | (unsigned long)(addr - RAM), raw_voice->waveform ); | ||
353 | } | ||
354 | |||
355 | /* output position - preincrement */ | ||
356 | out += BRR_BLOCK_SIZE; | ||
357 | |||
358 | decode_brr_block( voice, addr, out ); | ||
359 | |||
360 | block_header = voice->wave.block_header; | ||
361 | addr = voice->wave.addr; | ||
362 | |||
363 | /* if next block has end flag set, this block ends early */ | ||
364 | /* (verified) */ | ||
365 | if ( (block_header & 3) != 3 && (*addr & 3) == 1 ) | ||
366 | { | ||
367 | /* skip last 9 samples */ | ||
368 | DEBUGF( "block early end\n" ); | ||
369 | out -= 9; | ||
370 | break; | ||
371 | } | ||
372 | } | ||
373 | while ( !(block_header & 1) && addr < RAM + 0x10000 ); | ||
374 | |||
375 | wave_entry->end = (out - 1 - wave_entry->samples) << 12; | ||
376 | wave_entry->loop = 0; | ||
377 | |||
378 | if ( (block_header & 2) ) | ||
379 | { | ||
380 | if ( loop_start ) | ||
381 | { | ||
382 | wave_entry->loop = out - loop_start; | ||
383 | wave_entry->end += 0x3000; | ||
384 | |||
385 | out [2] = loop_start [2]; | ||
386 | out [3] = loop_start [3]; | ||
387 | out [4] = loop_start [4]; | ||
388 | } | ||
389 | else | ||
390 | { | ||
391 | DEBUGF( "loop point outside initial wave\n" ); | ||
392 | } | ||
393 | } | ||
394 | |||
395 | DEBUGF( "end at %08lx (wave #%d)\n", | ||
396 | (unsigned long)(addr - RAM), raw_voice->waveform ); | ||
397 | |||
398 | /* add to cache */ | ||
399 | this->wave_entry_old [this->oldsize++] = *wave_entry; | ||
400 | } | ||
401 | |||
402 | static inline void | ||
403 | brr_key_on( struct Spc_Dsp* this, struct src_dir const* sd, | ||
404 | struct voice_t* voice, struct raw_voice_t const* raw_voice ) | ||
405 | { | ||
406 | unsigned start_addr = letoh16( sd [raw_voice->waveform].start ); | ||
407 | struct cache_entry_t* const wave_entry = | ||
408 | &this->wave_entry [raw_voice->waveform]; | ||
409 | |||
410 | /* predecode BRR if not already */ | ||
411 | if ( wave_entry->start_addr != start_addr ) | ||
82 | { | 412 | { |
83 | DEBUGF( "decode at %08x (wave #%d)\n", | ||
84 | start_addr, raw_voice->waveform ); | ||
85 | |||
86 | /* see if in cache */ | 413 | /* see if in cache */ |
87 | int i; | 414 | for ( int i = 0; i < this->oldsize; i++ ) |
88 | for ( i = 0; i < this->oldsize; i++ ) | ||
89 | { | 415 | { |
90 | struct cache_entry_t* e = &this->wave_entry_old [i]; | 416 | struct cache_entry_t* e = &this->wave_entry_old [i]; |
417 | |||
91 | if ( e->start_addr == start_addr ) | 418 | if ( e->start_addr == start_addr ) |
92 | { | 419 | { |
93 | DEBUGF( "found in wave_entry_old (oldsize=%d)\n", | 420 | DEBUGF( "found in wave_entry_old (oldsize=%d)\n", |
94 | this->oldsize ); | 421 | this->oldsize ); |
95 | *wave_entry = *e; | 422 | *wave_entry = *e; |
96 | goto wave_in_cache; | 423 | goto wave_in_cache; /* Wave in cache */ |
97 | } | 424 | } |
98 | } | 425 | } |
99 | 426 | ||
100 | wave_entry->start_addr = start_addr; | 427 | /* actually decode it */ |
101 | 428 | brr_decode_cache( this, sd, start_addr, voice, raw_voice ); | |
102 | uint8_t const* const loop_ptr = | ||
103 | RAM + letoh16(sd[raw_voice->waveform].loop); | ||
104 | short* loop_start = 0; | ||
105 | |||
106 | short* out = BRRcache + start_addr * 2; | ||
107 | wave_entry->samples = out; | ||
108 | *out++ = 0; | ||
109 | int smp1 = 0; | ||
110 | int smp2 = 0; | ||
111 | |||
112 | uint8_t const* addr = RAM + start_addr; | ||
113 | int block_header; | ||
114 | do | ||
115 | { | ||
116 | if ( addr == loop_ptr ) | ||
117 | { | ||
118 | loop_start = out; | ||
119 | DEBUGF( "loop at %08lx (wave #%d)\n", | ||
120 | (unsigned long)(addr - RAM), raw_voice->waveform ); | ||
121 | } | ||
122 | |||
123 | /* header */ | ||
124 | block_header = *addr; | ||
125 | addr += 9; | ||
126 | voice->addr = addr; | ||
127 | int const filter = (block_header & 0x0C) - 0x08; | ||
128 | |||
129 | /* scaling | ||
130 | (invalid scaling gives -4096 for neg nybble, 0 for pos) */ | ||
131 | static unsigned char const right_shifts [16] = { | ||
132 | 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 29, 29, 29, | ||
133 | }; | ||
134 | static unsigned char const left_shifts [16] = { | ||
135 | 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 11, 11 | ||
136 | }; | ||
137 | int const scale = block_header >> 4; | ||
138 | int const right_shift = right_shifts [scale]; | ||
139 | int const left_shift = left_shifts [scale]; | ||
140 | |||
141 | /* output position */ | ||
142 | out += BRR_BLOCK_SIZE; | ||
143 | int offset = -BRR_BLOCK_SIZE << 2; | ||
144 | |||
145 | do /* decode and filter 16 samples */ | ||
146 | { | ||
147 | /* Get nybble, sign-extend, then scale | ||
148 | get byte, select which nybble, sign-extend, then shift based | ||
149 | on scaling. also handles invalid scaling values. */ | ||
150 | int delta = (int) (int8_t) (addr [offset >> 3] << (offset & 4)) | ||
151 | >> right_shift << left_shift; | ||
152 | |||
153 | out [offset >> 2] = smp2; | ||
154 | |||
155 | if ( filter == 0 ) /* mode 0x08 (30-90% of the time) */ | ||
156 | { | ||
157 | delta -= smp2 >> 1; | ||
158 | delta += smp2 >> 5; | ||
159 | smp2 = smp1; | ||
160 | delta += smp1; | ||
161 | delta += (-smp1 - (smp1 >> 1)) >> 5; | ||
162 | } | ||
163 | else | ||
164 | { | ||
165 | if ( filter == -4 ) /* mode 0x04 */ | ||
166 | { | ||
167 | delta += smp1 >> 1; | ||
168 | delta += (-smp1) >> 5; | ||
169 | } | ||
170 | else if ( filter > -4 ) /* mode 0x0C */ | ||
171 | { | ||
172 | delta -= smp2 >> 1; | ||
173 | delta += (smp2 + (smp2 >> 1)) >> 4; | ||
174 | delta += smp1; | ||
175 | delta += (-smp1 * 13) >> 7; | ||
176 | } | ||
177 | smp2 = smp1; | ||
178 | } | ||
179 | |||
180 | delta = CLAMP16( delta ); | ||
181 | smp1 = (int16_t) (delta * 2); /* sign-extend */ | ||
182 | } | ||
183 | while ( (offset += 4) != 0 ); | ||
184 | |||
185 | /* if next block has end flag set, this block ends early */ | ||
186 | /* (verified) */ | ||
187 | if ( (block_header & 3) != 3 && (*addr & 3) == 1 ) | ||
188 | { | ||
189 | /* skip last 9 samples */ | ||
190 | out -= 9; | ||
191 | goto early_end; | ||
192 | } | ||
193 | } | ||
194 | while ( !(block_header & 1) && addr < RAM + 0x10000 ); | ||
195 | |||
196 | out [0] = smp2; | ||
197 | out [1] = smp1; | ||
198 | |||
199 | early_end: | ||
200 | wave_entry->end = (out - 1 - wave_entry->samples) << 12; | ||
201 | |||
202 | wave_entry->loop = 0; | ||
203 | if ( (block_header & 2) ) | ||
204 | { | ||
205 | if ( loop_start ) | ||
206 | { | ||
207 | int loop = out - loop_start; | ||
208 | wave_entry->loop = loop; | ||
209 | wave_entry->end += 0x3000; | ||
210 | out [2] = loop_start [2]; | ||
211 | out [3] = loop_start [3]; | ||
212 | out [4] = loop_start [4]; | ||
213 | } | ||
214 | else | ||
215 | { | ||
216 | DEBUGF( "loop point outside initial wave\n" ); | ||
217 | } | ||
218 | } | ||
219 | |||
220 | DEBUGF( "end at %08lx (wave #%d)\n", | ||
221 | (unsigned long)(addr - RAM), raw_voice->waveform ); | ||
222 | |||
223 | /* add to cache */ | ||
224 | this->wave_entry_old [this->oldsize++] = *wave_entry; | ||
225 | wave_in_cache:; | ||
226 | } | 429 | } |
430 | |||
431 | wave_in_cache: | ||
432 | voice->wave.position = 3 * 0x1000 - 1; /* 0x2fff */ | ||
433 | voice->wave.samples = wave_entry->samples; | ||
434 | voice->wave.end = wave_entry->end; | ||
435 | voice->wave.loop = wave_entry->loop; | ||
436 | } | ||
437 | |||
438 | static inline int brr_decode( struct src_dir const* sd, struct voice_t* voice, | ||
439 | struct raw_voice_t const* raw_voice ) | ||
440 | { | ||
441 | if ( voice->wave.position < voice->wave.end ) | ||
442 | return 0; | ||
443 | |||
444 | long loop_len = voice->wave.loop << 12; | ||
445 | |||
446 | if ( !loop_len ) | ||
447 | return 2; | ||
448 | |||
449 | voice->wave.position -= loop_len; | ||
450 | return 1; | ||
451 | |||
452 | (void)sd; (void)raw_voice; | ||
227 | } | 453 | } |
228 | #endif | ||
229 | 454 | ||
230 | static void key_on(struct Spc_Dsp* const this, struct voice_t* const voice, | 455 | #else /* !SPC_BRRCACHE */ |
231 | struct src_dir const* const sd, | 456 | |
232 | struct raw_voice_t const* const raw_voice, | 457 | static inline void |
233 | const int key_on_delay, const int vbit) ICODE_ATTR_SPC; | 458 | brr_key_on( struct Spc_Dsp* this, struct src_dir const* sd, |
234 | static void key_on(struct Spc_Dsp* const this, struct voice_t* const voice, | 459 | struct voice_t* voice, struct raw_voice_t const* raw_voice ) |
235 | struct src_dir const* const sd, | 460 | { |
236 | struct raw_voice_t const* const raw_voice, | 461 | voice->wave.addr = ram.ram + letoh16( sd [raw_voice->waveform].start ); |
237 | const int key_on_delay, const int vbit) { | 462 | /* BRR filter uses previous samples */ |
463 | voice->wave.samples [BRR_BLOCK_SIZE + 1] = 0; | ||
464 | voice->wave.samples [BRR_BLOCK_SIZE + 2] = 0; | ||
465 | /* force decode on next brr_decode call */ | ||
466 | voice->wave.position = (BRR_BLOCK_SIZE + 3) * 0x1000 - 1; /* 0x12fff */ | ||
467 | voice->wave.block_header = 0; /* "previous" BRR header */ | ||
468 | (void)this; | ||
469 | } | ||
470 | |||
471 | static inline int brr_decode( struct src_dir const* sd, struct voice_t* voice, | ||
472 | struct raw_voice_t const* raw_voice ) | ||
473 | { | ||
238 | #undef RAM | 474 | #undef RAM |
475 | #if defined(CPU_ARM) && !SPC_BRRCACHE | ||
476 | uint8_t* const ram_ = ram.ram; | ||
477 | #define RAM ram_ | ||
478 | #else | ||
239 | #define RAM ram.ram | 479 | #define RAM ram.ram |
240 | int const env_rate_init = 0x7800; | 480 | #endif |
481 | |||
482 | if ( voice->wave.position < BRR_BLOCK_SIZE * 0x1000 ) | ||
483 | return 0; | ||
484 | |||
485 | voice->wave.position -= BRR_BLOCK_SIZE * 0x1000; | ||
486 | |||
487 | uint8_t const* addr = voice->wave.addr; | ||
488 | |||
489 | if ( addr >= RAM + 0x10000 ) | ||
490 | addr -= 0x10000; | ||
491 | |||
492 | unsigned block_header = voice->wave.block_header; | ||
493 | |||
494 | /* action based on previous block's header */ | ||
495 | int dec = 0; | ||
496 | |||
497 | if ( block_header & 1 ) | ||
498 | { | ||
499 | addr = RAM + letoh16( sd [raw_voice->waveform].loop ); | ||
500 | dec = 1; | ||
501 | |||
502 | if ( !(block_header & 2) ) /* 1% of the time */ | ||
503 | { | ||
504 | /* first block was end block; | ||
505 | don't play anything (verified) */ | ||
506 | return 2; | ||
507 | } | ||
508 | } | ||
509 | |||
510 | decode_brr_block( voice, addr, &voice->wave.samples [1 + BRR_BLOCK_SIZE] ); | ||
511 | |||
512 | return dec; | ||
513 | } | ||
514 | #endif /* SPC_BRRCACHE */ | ||
515 | |||
516 | static void NO_INLINE ICODE_ATTR_SPC | ||
517 | key_on( struct Spc_Dsp* const this, struct voice_t* const voice, | ||
518 | struct src_dir const* const sd, | ||
519 | struct raw_voice_t const* const raw_voice, | ||
520 | const int key_on_delay, const int vbit ) | ||
521 | { | ||
241 | voice->key_on_delay = key_on_delay; | 522 | voice->key_on_delay = key_on_delay; |
523 | |||
242 | if ( key_on_delay == 0 ) | 524 | if ( key_on_delay == 0 ) |
243 | { | 525 | { |
244 | this->keys_down |= vbit; | 526 | this->keys_down |= vbit; |
245 | voice->envx = 0; | 527 | voice->envx = 0; |
246 | voice->env_mode = state_attack; | 528 | voice->env_mode = state_attack; |
247 | voice->env_timer = env_rate_init; /* TODO: inaccurate? */ | 529 | voice->env_timer = ENV_RATE_INIT; /* TODO: inaccurate? */ |
248 | unsigned start_addr = letoh16(sd[raw_voice->waveform].start); | 530 | brr_key_on( this, sd, voice, raw_voice ); |
249 | #if !SPC_BRRCACHE | ||
250 | { | ||
251 | voice->addr = RAM + start_addr; | ||
252 | /* BRR filter uses previous samples */ | ||
253 | voice->samples [BRR_BLOCK_SIZE + 1] = 0; | ||
254 | voice->samples [BRR_BLOCK_SIZE + 2] = 0; | ||
255 | /* decode three samples immediately */ | ||
256 | voice->position = (BRR_BLOCK_SIZE + 3) * 0x1000 - 1; | ||
257 | voice->block_header = 0; /* "previous" BRR header */ | ||
258 | } | ||
259 | #else | ||
260 | { | ||
261 | voice->position = 3 * 0x1000 - 1; | ||
262 | struct cache_entry_t* const wave_entry = | ||
263 | &this->wave_entry [raw_voice->waveform]; | ||
264 | |||
265 | /* predecode BRR if not already */ | ||
266 | if ( wave_entry->start_addr != start_addr ) | ||
267 | { | ||
268 | /* the following line can be replaced by the indicated block | ||
269 | in decode_brr() */ | ||
270 | decode_brr( this, start_addr, voice, raw_voice ); | ||
271 | } | ||
272 | |||
273 | voice->samples = wave_entry->samples; | ||
274 | voice->wave_end = wave_entry->end; | ||
275 | voice->wave_loop = wave_entry->loop; | ||
276 | } | ||
277 | #endif | ||
278 | } | 531 | } |
279 | } | 532 | } |
280 | 533 | ||
@@ -287,10 +540,8 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf ) | |||
287 | #else | 540 | #else |
288 | #define RAM ram.ram | 541 | #define RAM ram.ram |
289 | #endif | 542 | #endif |
290 | #if 0 | ||
291 | EXIT_TIMER(cpu); | 543 | EXIT_TIMER(cpu); |
292 | ENTER_TIMER(dsp); | 544 | ENTER_TIMER(dsp); |
293 | #endif | ||
294 | 545 | ||
295 | /* Here we check for keys on/off. Docs say that successive writes | 546 | /* Here we check for keys on/off. Docs say that successive writes |
296 | to KON/KOF must be separated by at least 2 Ts periods or risk | 547 | to KON/KOF must be separated by at least 2 Ts periods or risk |
@@ -327,98 +578,60 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf ) | |||
327 | } | 578 | } |
328 | 579 | ||
329 | struct src_dir const* const sd = | 580 | struct src_dir const* const sd = |
330 | &ram.sd[this->r.g.wave_page * 0x100/sizeof(struct src_dir)]; | 581 | &ram.sd [this->r.g.wave_page * 0x100/sizeof(struct src_dir)]; |
331 | 582 | ||
332 | #ifdef ROCKBOX_BIG_ENDIAN | ||
333 | /* Convert endiannesses before entering loops - these | ||
334 | get used alot */ | ||
335 | const uint32_t rates[VOICE_COUNT] = | ||
336 | { | ||
337 | GET_LE16A( this->r.voice[0].rate ) & 0x3FFF, | ||
338 | GET_LE16A( this->r.voice[1].rate ) & 0x3FFF, | ||
339 | GET_LE16A( this->r.voice[2].rate ) & 0x3FFF, | ||
340 | GET_LE16A( this->r.voice[3].rate ) & 0x3FFF, | ||
341 | GET_LE16A( this->r.voice[4].rate ) & 0x3FFF, | ||
342 | GET_LE16A( this->r.voice[5].rate ) & 0x3FFF, | ||
343 | GET_LE16A( this->r.voice[6].rate ) & 0x3FFF, | ||
344 | GET_LE16A( this->r.voice[7].rate ) & 0x3FFF, | ||
345 | }; | ||
346 | #define VOICE_RATE(x) *(x) | ||
347 | #define IF_RBE(...) __VA_ARGS__ | ||
348 | #ifdef CPU_COLDFIRE | ||
349 | /* Initialize mask register with the buffer address mask */ | ||
350 | asm volatile ("move.l %[m], %%mask" : : [m]"i"(FIR_BUF_MASK)); | ||
351 | const int echo_wrap = (this->r.g.echo_delay & 15) * 0x800; | ||
352 | const int echo_start = this->r.g.echo_page * 0x100; | ||
353 | #endif /* CPU_COLDFIRE */ | ||
354 | #else | ||
355 | #define VOICE_RATE(x) (GET_LE16(raw_voice->rate) & 0x3FFF) | ||
356 | #define IF_RBE(...) | ||
357 | #endif /* ROCKBOX_BIG_ENDIAN */ | ||
358 | |||
359 | #if !SPC_NOINTERP | 583 | #if !SPC_NOINTERP |
360 | int const slow_gaussian = (this->r.g.pitch_mods >> 1) | | 584 | int const slow_gaussian = (this->r.g.pitch_mods >> 1) | |
361 | this->r.g.noise_enables; | 585 | this->r.g.noise_enables; |
586 | #endif | ||
587 | #if !SPC_NOECHO | ||
588 | int const echo_start = this->r.g.echo_page * 0x100; | ||
589 | int const echo_delay = (this->r.g.echo_delay & 15) * 0x800; | ||
362 | #endif | 590 | #endif |
363 | /* (g.flags & 0x40) ? 30 : 14 */ | 591 | /* (g.flags & 0x40) ? 30 : 14 */ |
364 | int const global_muting = ((this->r.g.flags & 0x40) >> 2) + 14 - 8; | 592 | int const global_muting = ((this->r.g.flags & 0x40) >> 2) + 14 - 8; |
365 | int const global_vol_0 = this->r.g.volume_0; | 593 | int const global_vol_0 = this->r.g.volume_0; |
366 | int const global_vol_1 = this->r.g.volume_1; | 594 | int const global_vol_1 = this->r.g.volume_1; |
367 | 595 | ||
368 | /* each rate divides exactly into 0x7800 without remainder */ | ||
369 | int const env_rate_init = 0x7800; | ||
370 | static unsigned short const env_rates [0x20] ICONST_ATTR_SPC = | ||
371 | { | ||
372 | 0x0000, 0x000F, 0x0014, 0x0018, 0x001E, 0x0028, 0x0030, 0x003C, | ||
373 | 0x0050, 0x0060, 0x0078, 0x00A0, 0x00C0, 0x00F0, 0x0140, 0x0180, | ||
374 | 0x01E0, 0x0280, 0x0300, 0x03C0, 0x0500, 0x0600, 0x0780, 0x0A00, | ||
375 | 0x0C00, 0x0F00, 0x1400, 0x1800, 0x1E00, 0x2800, 0x3C00, 0x7800 | ||
376 | }; | ||
377 | |||
378 | do /* one pair of output samples per iteration */ | 596 | do /* one pair of output samples per iteration */ |
379 | { | 597 | { |
380 | /* Noise */ | 598 | /* Noise */ |
381 | if ( this->r.g.noise_enables ) | 599 | if ( this->r.g.noise_enables ) |
382 | { | 600 | { |
383 | if ( (this->noise_count -= | 601 | this->noise_count -= env_rates [this->r.g.flags & 0x1F]; |
384 | env_rates [this->r.g.flags & 0x1F]) <= 0 ) | 602 | |
603 | if ( this->noise_count <= 0 ) | ||
385 | { | 604 | { |
386 | this->noise_count = env_rate_init; | 605 | this->noise_count = ENV_RATE_INIT; |
387 | int feedback = (this->noise << 13) ^ (this->noise << 14); | 606 | int feedback = (this->noise << 13) ^ (this->noise << 14); |
388 | this->noise = (feedback & 0x8000) ^ (this->noise >> 1 & ~1); | 607 | this->noise = (feedback & 0x8000) ^ (this->noise >> 1 & ~1); |
389 | } | 608 | } |
390 | } | 609 | } |
391 | 610 | ||
392 | #if !SPC_NOECHO | 611 | #if !SPC_NOECHO |
393 | int echo_0 = 0; | 612 | int echo_0 = 0, echo_1 = 0; |
394 | int echo_1 = 0; | 613 | #endif /* !SPC_NOECHO */ |
395 | #endif | ||
396 | long prev_outx = 0; /* TODO: correct value for first channel? */ | 614 | long prev_outx = 0; /* TODO: correct value for first channel? */ |
397 | int chans_0 = 0; | 615 | int chans_0 = 0, chans_1 = 0; |
398 | int chans_1 = 0; | 616 | |
399 | /* TODO: put raw_voice pointer in voice_t? */ | 617 | /* TODO: put raw_voice pointer in voice_t? */ |
400 | struct raw_voice_t * raw_voice = this->r.voice; | 618 | struct raw_voice_t * raw_voice = this->r.voice; |
401 | struct voice_t* voice = this->voice_state; | 619 | struct voice_t* voice = this->voice_state; |
402 | int vbit = 1; | 620 | |
403 | IF_RBE( const uint32_t* vr = rates; ) | 621 | for (int vbit = 1; vbit < 0x100; vbit <<= 1, ++voice, ++raw_voice ) |
404 | for ( ; vbit < 0x100; vbit <<= 1, ++voice, ++raw_voice IF_RBE( , ++vr ) ) | ||
405 | { | 622 | { |
406 | /* pregen involves checking keyon, etc */ | 623 | /* pregen involves checking keyon, etc */ |
407 | #if 0 | ||
408 | ENTER_TIMER(dsp_pregen); | 624 | ENTER_TIMER(dsp_pregen); |
409 | #endif | ||
410 | 625 | ||
411 | /* Key on events are delayed */ | 626 | /* Key on events are delayed */ |
412 | int key_on_delay = voice->key_on_delay; | 627 | int key_on_delay = voice->key_on_delay; |
413 | 628 | ||
414 | if ( UNLIKELY ( --key_on_delay >= 0 ) ) /* <1% of the time */ | 629 | if ( UNLIKELY ( --key_on_delay >= 0 ) ) /* <1% of the time */ |
415 | { | 630 | key_on( this, voice, sd, raw_voice, key_on_delay, vbit ); |
416 | key_on(this,voice,sd,raw_voice,key_on_delay,vbit); | ||
417 | } | ||
418 | 631 | ||
419 | if ( !(this->keys_down & vbit) ) /* Silent channel */ | 632 | if ( !(this->keys_down & vbit) ) /* Silent channel */ |
420 | { | 633 | { |
421 | silent_chan: | 634 | silent_chan: |
422 | raw_voice->envx = 0; | 635 | raw_voice->envx = 0; |
423 | raw_voice->outx = 0; | 636 | raw_voice->outx = 0; |
424 | prev_outx = 0; | 637 | prev_outx = 0; |
@@ -461,7 +674,7 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf ) | |||
461 | voice->envx = envx; | 674 | voice->envx = envx; |
462 | /* TODO: should this be 8? */ | 675 | /* TODO: should this be 8? */ |
463 | raw_voice->envx = envx >> 4; | 676 | raw_voice->envx = envx >> 4; |
464 | env_timer = env_rate_init; | 677 | env_timer = ENV_RATE_INIT; |
465 | } | 678 | } |
466 | 679 | ||
467 | int sustain_level = adsr1 >> 5; | 680 | int sustain_level = adsr1 >> 5; |
@@ -561,994 +774,131 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf ) | |||
561 | } | 774 | } |
562 | } | 775 | } |
563 | init_env_timer: | 776 | init_env_timer: |
564 | env_timer = env_rate_init; | 777 | env_timer = ENV_RATE_INIT; |
565 | write_env_timer: | 778 | write_env_timer: |
566 | voice->env_timer = env_timer; | 779 | voice->env_timer = env_timer; |
567 | env_end:; | 780 | env_end:; |
568 | } | 781 | } |
569 | #if 0 | 782 | |
570 | EXIT_TIMER(dsp_pregen); | 783 | EXIT_TIMER(dsp_pregen); |
571 | 784 | ||
572 | ENTER_TIMER(dsp_gen); | 785 | ENTER_TIMER(dsp_gen); |
573 | #endif | ||
574 | #if !SPC_BRRCACHE | ||
575 | /* Decode BRR block */ | ||
576 | if ( voice->position >= BRR_BLOCK_SIZE * 0x1000 ) | ||
577 | { | ||
578 | voice->position -= BRR_BLOCK_SIZE * 0x1000; | ||
579 | |||
580 | uint8_t const* addr = voice->addr; | ||
581 | if ( addr >= RAM + 0x10000 ) | ||
582 | addr -= 0x10000; | ||
583 | |||
584 | /* action based on previous block's header */ | ||
585 | if ( voice->block_header & 1 ) | ||
586 | { | ||
587 | addr = RAM + letoh16(sd[raw_voice->waveform].loop); | ||
588 | this->r.g.wave_ended |= vbit; | ||
589 | if ( !(voice->block_header & 2) ) /* 1% of the time */ | ||
590 | { | ||
591 | /* first block was end block; | ||
592 | don't play anything (verified) */ | ||
593 | /* bit was set, so this clears it */ | ||
594 | this->keys_down ^= vbit; | ||
595 | |||
596 | /* since voice->envx is 0, | ||
597 | samples and position don't matter */ | ||
598 | raw_voice->envx = 0; | ||
599 | voice->envx = 0; | ||
600 | goto skip_decode; | ||
601 | } | ||
602 | } | ||
603 | |||
604 | /* header */ | ||
605 | int const block_header = *addr; | ||
606 | addr += 9; | ||
607 | voice->addr = addr; | ||
608 | voice->block_header = block_header; | ||
609 | |||
610 | /* previous samples */ | ||
611 | int smp2 = voice->samples [BRR_BLOCK_SIZE + 1]; | ||
612 | int smp1 = voice->samples [BRR_BLOCK_SIZE + 2]; | ||
613 | voice->samples [0] = voice->samples [BRR_BLOCK_SIZE]; | ||
614 | |||
615 | /* output position */ | ||
616 | short* out = voice->samples + (1 + BRR_BLOCK_SIZE); | ||
617 | int offset = -BRR_BLOCK_SIZE << 2; | ||
618 | |||
619 | /* if next block has end flag set, | ||
620 | this block ends early (verified) */ | ||
621 | if ( (block_header & 3) != 3 && (*addr & 3) == 1 ) | ||
622 | { | ||
623 | /* arrange for last 9 samples to be skipped */ | ||
624 | int const skip = 9; | ||
625 | out += (skip & 1); | ||
626 | voice->samples [skip] = voice->samples [BRR_BLOCK_SIZE]; | ||
627 | voice->position += skip * 0x1000; | ||
628 | offset = (-BRR_BLOCK_SIZE + (skip & ~1)) << 2; | ||
629 | addr -= skip / 2; | ||
630 | /* force sample to end on next decode */ | ||
631 | voice->block_header = 1; | ||
632 | } | ||
633 | |||
634 | int const filter = block_header & 0x0c; | ||
635 | int const scale = block_header >> 4; | ||
636 | |||
637 | if ( filter == 0x08 ) /* filter 2 (30-90% of the time) */ | ||
638 | { | ||
639 | /* y[n] = x[n] + 61/32 * y[n-1] - 15/16 * y[n-2] */ | ||
640 | do /* decode and filter 16 samples */ | ||
641 | { | ||
642 | /* Get nybble, sign-extend, then scale | ||
643 | get byte, select which nybble, sign-extend, then shift | ||
644 | based on scaling. */ | ||
645 | int delta = (int8_t)(addr [offset >> 3] << (offset & 4)) >> 4; | ||
646 | delta = (delta << scale) >> 1; | ||
647 | 786 | ||
648 | if (scale > 0xc) | 787 | switch ( brr_decode( sd, voice, raw_voice ) ) |
649 | delta = (delta >> 17) << 11; | 788 | { |
650 | 789 | case 2: | |
651 | out [offset >> 2] = smp2; | 790 | /* bit was set, so this clears it */ |
652 | 791 | this->keys_down ^= vbit; | |
653 | delta -= smp2 >> 1; | ||
654 | delta += smp2 >> 5; | ||
655 | delta += smp1; | ||
656 | delta += (-smp1 - (smp1 >> 1)) >> 5; | ||
657 | |||
658 | delta = CLAMP16( delta ); | ||
659 | smp2 = smp1; | ||
660 | smp1 = (int16_t) (delta * 2); /* sign-extend */ | ||
661 | } | ||
662 | while ( (offset += 4) != 0 ); | ||
663 | } | ||
664 | else if ( filter == 0x04 ) /* filter 1 */ | ||
665 | { | ||
666 | /* y[n] = x[n] + 15/16 * y[n-1] */ | ||
667 | do /* decode and filter 16 samples */ | ||
668 | { | ||
669 | /* Get nybble, sign-extend, then scale | ||
670 | get byte, select which nybble, sign-extend, then shift | ||
671 | based on scaling. */ | ||
672 | int delta = (int8_t)(addr [offset >> 3] << (offset & 4)) >> 4; | ||
673 | delta = (delta << scale) >> 1; | ||
674 | |||
675 | if (scale > 0xc) | ||
676 | delta = (delta >> 17) << 11; | ||
677 | |||
678 | out [offset >> 2] = smp2; | ||
679 | |||
680 | delta += smp1 >> 1; | ||
681 | delta += (-smp1) >> 5; | ||
682 | |||
683 | delta = CLAMP16( delta ); | ||
684 | smp2 = smp1; | ||
685 | smp1 = (int16_t) (delta * 2); /* sign-extend */ | ||
686 | } | ||
687 | while ( (offset += 4) != 0 ); | ||
688 | } | ||
689 | else if ( filter == 0x0c ) /* filter 3 */ | ||
690 | { | ||
691 | /* y[n] = x[n] + 115/64 * y[n-1] - 13/16 * y[n-2] */ | ||
692 | do /* decode and filter 16 samples */ | ||
693 | { | ||
694 | /* Get nybble, sign-extend, then scale | ||
695 | get byte, select which nybble, sign-extend, then shift | ||
696 | based on scaling. */ | ||
697 | int delta = (int8_t)(addr [offset >> 3] << (offset & 4)) >> 4; | ||
698 | delta = (delta << scale) >> 1; | ||
699 | |||
700 | if (scale > 0xc) | ||
701 | delta = (delta >> 17) << 11; | ||
702 | |||
703 | out [offset >> 2] = smp2; | ||
704 | |||
705 | delta -= smp2 >> 1; | ||
706 | delta += (smp2 + (smp2 >> 1)) >> 4; | ||
707 | delta += smp1; | ||
708 | delta += (-smp1 * 13) >> 7; | ||
709 | |||
710 | delta = CLAMP16( delta ); | ||
711 | smp2 = smp1; | ||
712 | smp1 = (int16_t) (delta * 2); /* sign-extend */ | ||
713 | } | ||
714 | while ( (offset += 4) != 0 ); | ||
715 | } | ||
716 | else /* filter 0 */ | ||
717 | { | ||
718 | /* y[n] = x[n] */ | ||
719 | do /* decode and filter 16 samples */ | ||
720 | { | ||
721 | /* Get nybble, sign-extend, then scale | ||
722 | get byte, select which nybble, sign-extend, then shift | ||
723 | based on scaling. */ | ||
724 | int delta = (int8_t)(addr [offset >> 3] << (offset & 4)) >> 4; | ||
725 | delta = (delta << scale) >> 1; | ||
726 | |||
727 | if (scale > 0xc) | ||
728 | delta = (delta >> 17) << 11; | ||
729 | |||
730 | out [offset >> 2] = smp2; | ||
731 | |||
732 | smp2 = smp1; | ||
733 | smp1 = delta * 2; | ||
734 | } | ||
735 | while ( (offset += 4) != 0 ); | ||
736 | } | ||
737 | 792 | ||
738 | out [0] = smp2; | 793 | /* since voice->envx is 0, |
739 | out [1] = smp1; | 794 | samples and position don't matter */ |
740 | 795 | raw_voice->envx = 0; | |
741 | skip_decode:; | 796 | voice->envx = 0; |
797 | case 1: | ||
798 | this->r.g.wave_ended |= vbit; | ||
742 | } | 799 | } |
743 | #endif /* !SPC_BRRCACHE */ | 800 | |
744 | /* Get rate (with possible modulation) */ | 801 | /* Get rate (with possible modulation) */ |
745 | int rate = VOICE_RATE(vr); | 802 | int rate = voice->rate; |
746 | if ( this->r.g.pitch_mods & vbit ) | 803 | if ( this->r.g.pitch_mods & vbit ) |
747 | rate = (rate * (prev_outx + 32768)) >> 15; | 804 | rate = (rate * (prev_outx + 32768)) >> 15; |
748 | 805 | ||
806 | uint32_t position = voice->wave.position; | ||
807 | voice->wave.position += rate; | ||
808 | |||
809 | int output; | ||
810 | int amp_0, amp_1; | ||
811 | |||
749 | #if !SPC_NOINTERP | 812 | #if !SPC_NOINTERP |
750 | /* Interleved gauss table (to improve cache coherency). */ | ||
751 | /* gauss [i * 2 + j] = normal_gauss [(1 - j) * 256 + i] */ | ||
752 | static short const gauss [512] ICONST_ATTR_SPC MEM_ALIGN_ATTR = | ||
753 | { | ||
754 | 370,1305, 366,1305, 362,1304, 358,1304, 354,1304, 351,1304, 347,1304, 343,1303, | ||
755 | 339,1303, 336,1303, 332,1302, 328,1302, 325,1301, 321,1300, 318,1300, 314,1299, | ||
756 | 311,1298, 307,1297, 304,1297, 300,1296, 297,1295, 293,1294, 290,1293, 286,1292, | ||
757 | 283,1291, 280,1290, 276,1288, 273,1287, 270,1286, 267,1284, 263,1283, 260,1282, | ||
758 | 257,1280, 254,1279, 251,1277, 248,1275, 245,1274, 242,1272, 239,1270, 236,1269, | ||
759 | 233,1267, 230,1265, 227,1263, 224,1261, 221,1259, 218,1257, 215,1255, 212,1253, | ||
760 | 210,1251, 207,1248, 204,1246, 201,1244, 199,1241, 196,1239, 193,1237, 191,1234, | ||
761 | 188,1232, 186,1229, 183,1227, 180,1224, 178,1221, 175,1219, 173,1216, 171,1213, | ||
762 | 168,1210, 166,1207, 163,1205, 161,1202, 159,1199, 156,1196, 154,1193, 152,1190, | ||
763 | 150,1186, 147,1183, 145,1180, 143,1177, 141,1174, 139,1170, 137,1167, 134,1164, | ||
764 | 132,1160, 130,1157, 128,1153, 126,1150, 124,1146, 122,1143, 120,1139, 118,1136, | ||
765 | 117,1132, 115,1128, 113,1125, 111,1121, 109,1117, 107,1113, 106,1109, 104,1106, | ||
766 | 102,1102, 100,1098, 99,1094, 97,1090, 95,1086, 94,1082, 92,1078, 90,1074, | ||
767 | 89,1070, 87,1066, 86,1061, 84,1057, 83,1053, 81,1049, 80,1045, 78,1040, | ||
768 | 77,1036, 76,1032, 74,1027, 73,1023, 71,1019, 70,1014, 69,1010, 67,1005, | ||
769 | 66,1001, 65, 997, 64, 992, 62, 988, 61, 983, 60, 978, 59, 974, 58, 969, | ||
770 | 56, 965, 55, 960, 54, 955, 53, 951, 52, 946, 51, 941, 50, 937, 49, 932, | ||
771 | 48, 927, 47, 923, 46, 918, 45, 913, 44, 908, 43, 904, 42, 899, 41, 894, | ||
772 | 40, 889, 39, 884, 38, 880, 37, 875, 36, 870, 36, 865, 35, 860, 34, 855, | ||
773 | 33, 851, 32, 846, 32, 841, 31, 836, 30, 831, 29, 826, 29, 821, 28, 816, | ||
774 | 27, 811, 27, 806, 26, 802, 25, 797, 24, 792, 24, 787, 23, 782, 23, 777, | ||
775 | 22, 772, 21, 767, 21, 762, 20, 757, 20, 752, 19, 747, 19, 742, 18, 737, | ||
776 | 17, 732, 17, 728, 16, 723, 16, 718, 15, 713, 15, 708, 15, 703, 14, 698, | ||
777 | 14, 693, 13, 688, 13, 683, 12, 678, 12, 674, 11, 669, 11, 664, 11, 659, | ||
778 | 10, 654, 10, 649, 10, 644, 9, 640, 9, 635, 9, 630, 8, 625, 8, 620, | ||
779 | 8, 615, 7, 611, 7, 606, 7, 601, 6, 596, 6, 592, 6, 587, 6, 582, | ||
780 | 5, 577, 5, 573, 5, 568, 5, 563, 4, 559, 4, 554, 4, 550, 4, 545, | ||
781 | 4, 540, 3, 536, 3, 531, 3, 527, 3, 522, 3, 517, 2, 513, 2, 508, | ||
782 | 2, 504, 2, 499, 2, 495, 2, 491, 2, 486, 1, 482, 1, 477, 1, 473, | ||
783 | 1, 469, 1, 464, 1, 460, 1, 456, 1, 451, 1, 447, 1, 443, 1, 439, | ||
784 | 0, 434, 0, 430, 0, 426, 0, 422, 0, 418, 0, 414, 0, 410, 0, 405, | ||
785 | 0, 401, 0, 397, 0, 393, 0, 389, 0, 385, 0, 381, 0, 378, 0, 374, | ||
786 | }; | ||
787 | /* Gaussian interpolation using most recent 4 samples */ | 813 | /* Gaussian interpolation using most recent 4 samples */ |
788 | long position = voice->position; | 814 | |
789 | voice->position += rate; | ||
790 | short const* interp = voice->samples + (position >> 12); | ||
791 | int offset = position >> 4 & 0xFF; | ||
792 | |||
793 | /* Only left half of gaussian kernel is in table, so we must mirror | 815 | /* Only left half of gaussian kernel is in table, so we must mirror |
794 | for right half */ | 816 | for right half */ |
795 | short const* fwd = gauss + offset * 2; | 817 | int offset = ( position >> 4 ) & 0xFF; |
796 | short const* rev = gauss + 510 - offset * 2; | 818 | int16_t const* fwd = gauss_table + offset * 2; |
819 | int16_t const* rev = gauss_table + 510 - offset * 2; | ||
797 | 820 | ||
798 | /* Use faster gaussian interpolation when exact result isn't needed | 821 | /* Use faster gaussian interpolation when exact result isn't needed |
799 | by pitch modulator of next channel */ | 822 | by pitch modulator of next channel */ |
800 | int amp_0, amp_1; /* Also serve as temps _0, and _1 */ | ||
801 | if ( LIKELY ( !(slow_gaussian & vbit) ) ) /* 99% of the time */ | 823 | if ( LIKELY ( !(slow_gaussian & vbit) ) ) /* 99% of the time */ |
802 | { | 824 | { |
803 | /* Main optimization is lack of clamping. Not a problem since | 825 | /* Main optimization is lack of clamping. Not a problem since |
804 | output never goes more than +/- 16 outside 16-bit range and | 826 | output never goes more than +/- 16 outside 16-bit range and |
805 | things are clamped later anyway. Other optimization is to | 827 | things are clamped later anyway. Other optimization is to |
806 | preserve fractional accuracy, eliminating several masks. */ | 828 | preserve fractional accuracy, eliminating several masks. */ |
807 | #if defined (CPU_ARM) | 829 | output = gaussian_fast_interp( voice->wave.samples, position, |
808 | int output; | 830 | fwd, rev ); |
809 | int _2, _3; /* All-purpose temps */ | 831 | output = gaussian_fast_amp( voice, output, &_0, &_1 ); |
810 | /* Multiple ASM blocks keep regs free and reduce result | ||
811 | * latency issues. */ | ||
812 | #if ARM_ARCH >= 6 | ||
813 | /* Interpolate */ | ||
814 | asm volatile ( | ||
815 | "ldr %[_0], [%[interp]] \r\n" /* _0=i0i1 */ | ||
816 | "ldr %[_2], [%[fwd]] \r\n" /* _2=f0f1 */ | ||
817 | "ldr %[_1], [%[interp], #4] \r\n" /* _1=i2i3 */ | ||
818 | "ldr %[_3], [%[rev]] \r\n" /* _3=r0r1 */ | ||
819 | "smuad %[out], %[_0], %[_2] \r\n" /* out=f0*i0 + f1*i1 */ | ||
820 | "smladx %[out], %[_1], %[_3], %[out] \r\n" /* out+=r1*i2 + r0*i3 */ | ||
821 | : [out]"=r"(output), | ||
822 | [_0]"=&r"(amp_0), [_1]"=&r"(amp_1), | ||
823 | [_2]"=&r"(_2), [_3]"=r"(_3) | ||
824 | : [fwd]"r"(fwd), [rev]"r"(rev), | ||
825 | [interp]"r"(interp)); | ||
826 | /* Apply voice envelope */ | ||
827 | asm volatile ( | ||
828 | "mov %[_2], %[out], asr #(11-5) \r\n" /* To do >> 16 later */ | ||
829 | "mul %[out], %[_2], %[envx] \r\n" /* and avoid exp. shift */ | ||
830 | : [out]"+r"(output), [_2]"=&r"(_2) | ||
831 | : [envx]"r"((int)voice->envx)); | ||
832 | /* Apply left and right volume */ | ||
833 | asm volatile ( | ||
834 | "smulwb %[amp_0], %[out], %[vvol_0] \r\n" /* (32x16->48)[47:16]->[31:0] */ | ||
835 | "smulwb %[amp_1], %[out], %[vvol_1] \r\n" | ||
836 | : [out]"+r"(output), | ||
837 | [amp_0]"=&r"(amp_0), [amp_1]"=r"(amp_1) | ||
838 | : [vvol_0]"r"(voice->volume[0]), | ||
839 | [vvol_1]"r"(voice->volume[1])); | ||
840 | |||
841 | raw_voice->outx = output >> (8+5); /* 'output' still 5 bits too big */ | ||
842 | #else /* ARM_ARCH < 6 */ | ||
843 | /* Perform gaussian interpolation on four samples */ | ||
844 | asm volatile ( | ||
845 | "ldrsh %[_0], [%[interp]] \r\n" | ||
846 | "ldrsh %[_2], [%[fwd]] \r\n" | ||
847 | "ldrsh %[_1], [%[interp], #2] \r\n" | ||
848 | "ldrsh %[_3], [%[fwd], #2] \r\n" | ||
849 | "mul %[out], %[_0], %[_2] \r\n" /* out= fwd[0]*interp[0] */ | ||
850 | "ldrsh %[_0], [%[interp], #4] \r\n" | ||
851 | "ldrsh %[_2], [%[rev], #2] \r\n" | ||
852 | "mla %[out], %[_1], %[_3], %[out] \r\n" /* out+=fwd[1]*interp[1] */ | ||
853 | "ldrsh %[_1], [%[interp], #6] \r\n" | ||
854 | "ldrsh %[_3], [%[rev]] \r\n" | ||
855 | "mla %[out], %[_0], %[_2], %[out] \r\n" /* out+=rev[1]*interp[2] */ | ||
856 | "mla %[out], %[_1], %[_3], %[out] \r\n" /* out+=rev[0]*interp[3] */ | ||
857 | : [out]"=&r"(output), | ||
858 | [_0]"=&r"(amp_0), [_1]"=&r"(amp_1), | ||
859 | [_2]"=&r"(_2), [_3]"=&r"(_3) | ||
860 | : [fwd]"r"(fwd), [rev]"r"(rev), | ||
861 | [interp]"r"(interp)); | ||
862 | /* Apply voice envelope */ | ||
863 | asm volatile ( | ||
864 | "mov %[_2], %[out], asr #11 \r\n" | ||
865 | "mul %[out], %[_2], %[envx] \r\n" | ||
866 | : [out]"+r"(output), [_2]"=&r"(_2) | ||
867 | : [envx]"r"((int)voice->envx)); | ||
868 | /* Reduce and apply left and right volume */ | ||
869 | asm volatile ( | ||
870 | "mov %[out], %[out], asr #11 \r\n" | ||
871 | "mul %[amp_0], %[out], %[vvol_0] \r\n" | ||
872 | "mul %[amp_1], %[out], %[vvol_1] \r\n" | ||
873 | : [out]"+r"(output), | ||
874 | [amp_0]"=&r"(amp_0), [amp_1]"=r"(amp_1) | ||
875 | : [vvol_0]"r"((int)voice->volume[0]), | ||
876 | [vvol_1]"r"((int)voice->volume[1])); | ||
877 | |||
878 | raw_voice->outx = output >> 8; | ||
879 | #endif /* ARM_ARCH */ | ||
880 | #else /* Unoptimized CPU */ | ||
881 | int output = (((fwd [0] * interp [0] + | ||
882 | fwd [1] * interp [1] + | ||
883 | rev [1] * interp [2] + | ||
884 | rev [0] * interp [3] ) >> 11) * voice->envx) >> 11; | ||
885 | |||
886 | /* duplicated here to give compiler more to run in parallel */ | ||
887 | amp_0 = voice->volume [0] * output; | ||
888 | amp_1 = voice->volume [1] * output; | ||
889 | |||
890 | raw_voice->outx = output >> 8; | ||
891 | #endif /* CPU_* */ | ||
892 | } | 832 | } |
893 | else /* slow gaussian */ | 833 | else /* slow gaussian */ |
834 | #endif /* !SPC_NOINTERP (else two-point linear interpolation) */ | ||
894 | { | 835 | { |
895 | #if defined(CPU_ARM) | 836 | output = *(int16_t *)&this->noise; |
896 | #if ARM_ARCH >= 6 | ||
897 | int output = *(int16_t*) &this->noise; | ||
898 | |||
899 | if ( !(this->r.g.noise_enables & vbit) ) | ||
900 | { | ||
901 | /* Interpolate */ | ||
902 | int _2, _3; | ||
903 | asm volatile ( | ||
904 | /* NOTE: often-unaligned accesses */ | ||
905 | "ldr %[_0], [%[interp]] \r\n" /* _0=i0i1 */ | ||
906 | "ldr %[_2], [%[fwd]] \r\n" /* _2=f0f1 */ | ||
907 | "ldr %[_1], [%[interp], #4] \r\n" /* _1=i2i3 */ | ||
908 | "ldr %[_3], [%[rev]] \r\n" /* _3=f2f3 */ | ||
909 | "smulbb %[out], %[_0], %[_2] \r\n" /* out=f0*i0 */ | ||
910 | "smultt %[_0], %[_0], %[_2] \r\n" /* _0=f1*i1 */ | ||
911 | "smulbt %[_2], %[_1], %[_3] \r\n" /* _2=r1*i2 */ | ||
912 | "smultb %[_3], %[_1], %[_3] \r\n" /* _3=r0*i3 */ | ||
913 | : [out]"=r"(output), | ||
914 | [_0]"=&r"(amp_0), [_1]"=&r"(amp_1), | ||
915 | [_2]"=&r"(_2), [_3]"=r"(_3) | ||
916 | : [fwd]"r"(fwd), [rev]"r"(rev), | ||
917 | [interp]"r"(interp)); | ||
918 | asm volatile ( | ||
919 | "mov %[out], %[out], asr#12 \r\n" | ||
920 | "add %[_0], %[out], %[_0], asr #12 \r\n" | ||
921 | "add %[_2], %[_0], %[_2], asr #12 \r\n" | ||
922 | "pkhbt %[_0], %[_2], %[_3], asl #4 \r\n" /* _3[31:16], _2[15:0] */ | ||
923 | "sadd16 %[_0], %[_0], %[_0] \r\n" /* _3[31:16]*2, _2[15:0]*2 */ | ||
924 | "qsubaddx %[out], %[_0], %[_0] \r\n" /* out[15:0]= | ||
925 | * sat16(_3[31:16]+_2[15:0]) */ | ||
926 | : [out]"+r"(output), | ||
927 | [_0]"+r"(amp_0), [_2]"+r"(_2), [_3]"+r"(_3)); | ||
928 | } | ||
929 | /* Apply voice envelope */ | ||
930 | asm volatile ( | ||
931 | "smulbb %[out], %[out], %[envx] \r\n" | ||
932 | : [out]"+r"(output) | ||
933 | : [envx]"r"(voice->envx)); | ||
934 | /* Reduce and apply left and right volume */ | ||
935 | asm volatile ( | ||
936 | "mov %[out], %[out], asr #11 \r\n" | ||
937 | "bic %[out], %[out], #0x1 \r\n" | ||
938 | "mul %[amp_0], %[out], %[vvol_0] \r\n" | ||
939 | "mul %[amp_1], %[out], %[vvol_1] \r\n" | ||
940 | : [out]"+r"(output), | ||
941 | [amp_0]"=&r"(amp_0), [amp_1]"=r"(amp_1) | ||
942 | : [vvol_0]"r"((int)voice->volume[0]), | ||
943 | [vvol_1]"r"((int)voice->volume[1])); | ||
944 | |||
945 | prev_outx = output; | ||
946 | raw_voice->outx = output >> 8; | ||
947 | #else /* ARM_ARCH < 6 */ | ||
948 | int output = *(int16_t*) &this->noise; | ||
949 | |||
950 | if ( !(this->r.g.noise_enables & vbit) ) | ||
951 | { | ||
952 | /* Interpolate */ | ||
953 | int _2, _3; | ||
954 | asm volatile ( | ||
955 | "ldrsh %[_0], [%[interp]] \r\n" | ||
956 | "ldrsh %[_2], [%[fwd]] \r\n" | ||
957 | "ldrsh %[_1], [%[interp], #2] \r\n" | ||
958 | "ldrsh %[_3], [%[fwd], #2] \r\n" | ||
959 | "mul %[out], %[_2], %[_0] \r\n" /* fwd[0]*interp[0] */ | ||
960 | "ldrsh %[_2], [%[rev], #2] \r\n" | ||
961 | "mul %[_0], %[_3], %[_1] \r\n" /* fwd[1]*interp[1] */ | ||
962 | "ldrsh %[_1], [%[interp], #4] \r\n" | ||
963 | "mov %[out], %[out], asr #12 \r\n" | ||
964 | "ldrsh %[_3], [%[rev]] \r\n" | ||
965 | "mul %[_2], %[_1], %[_2] \r\n" /* rev[1]*interp[2] */ | ||
966 | "ldrsh %[_1], [%[interp], #6] \r\n" | ||
967 | "add %[_0], %[out], %[_0], asr #12 \r\n" | ||
968 | "mul %[_3], %[_1], %[_3] \r\n" /* rev[0]*interp[3] */ | ||
969 | "add %[_2], %[_0], %[_2], asr #12 \r\n" | ||
970 | "mov %[_2], %[_2], lsl #17 \r\n" | ||
971 | "mov %[_3], %[_3], asr #12 \r\n" | ||
972 | "mov %[_3], %[_3], asl #1 \r\n" | ||
973 | "add %[out], %[_3], %[_2], asr #16 \r\n" | ||
974 | : [out]"=&r"(output), | ||
975 | [_0]"=&r"(amp_0), [_1]"=&r"(amp_1), | ||
976 | [_2]"=&r"(_2), [_3]"=&r"(_3) | ||
977 | : [fwd]"r"(fwd), [rev]"r"(rev), | ||
978 | [interp]"r"(interp)); | ||
979 | |||
980 | output = CLAMP16(output); | ||
981 | } | ||
982 | /* Apply voice envelope */ | ||
983 | asm volatile ( | ||
984 | "mul %[_0], %[out], %[envx] \r\n" | ||
985 | : [_0]"=r"(amp_0) | ||
986 | : [out]"r"(output), [envx]"r"((int)voice->envx)); | ||
987 | /* Reduce and apply left and right volume */ | ||
988 | asm volatile ( | ||
989 | "mov %[out], %[amp_0], asr #11 \r\n" /* amp_0 = _0 */ | ||
990 | "bic %[out], %[out], #0x1 \r\n" | ||
991 | "mul %[amp_0], %[out], %[vvol_0] \r\n" | ||
992 | "mul %[amp_1], %[out], %[vvol_1] \r\n" | ||
993 | : [out]"+r"(output), | ||
994 | [amp_0]"+r"(amp_0), [amp_1]"=r"(amp_1) | ||
995 | : [vvol_0]"r"((int)voice->volume[0]), | ||
996 | [vvol_1]"r"((int)voice->volume[1])); | ||
997 | |||
998 | prev_outx = output; | ||
999 | raw_voice->outx = output >> 8; | ||
1000 | #endif /* ARM_ARCH >= 6 */ | ||
1001 | #else /* Unoptimized CPU */ | ||
1002 | int output = *(int16_t*) &this->noise; | ||
1003 | 837 | ||
1004 | if ( !(this->r.g.noise_enables & vbit) ) | 838 | if ( !(this->r.g.noise_enables & vbit) ) |
1005 | { | 839 | output = interp( voice->wave.samples, position, fwd, rev ); |
1006 | output = (fwd [0] * interp [0]) & ~0xFFF; | ||
1007 | output = (output + fwd [1] * interp [1]) & ~0xFFF; | ||
1008 | output = (output + rev [1] * interp [2]) >> 12; | ||
1009 | output = (int16_t) (output * 2); | ||
1010 | output += ((rev [0] * interp [3]) >> 12) * 2; | ||
1011 | output = CLAMP16( output ); | ||
1012 | } | ||
1013 | output = (output * voice->envx) >> 11 & ~1; | ||
1014 | |||
1015 | /* duplicated here to give compiler more to run in parallel */ | ||
1016 | amp_0 = voice->volume [0] * output; | ||
1017 | amp_1 = voice->volume [1] * output; | ||
1018 | |||
1019 | prev_outx = output; | ||
1020 | raw_voice->outx = output >> 8; | ||
1021 | #endif /* CPU_* */ | ||
1022 | } | ||
1023 | #else /* SPCNOINTERP */ | ||
1024 | /* two-point linear interpolation */ | ||
1025 | #ifdef CPU_COLDFIRE | ||
1026 | int amp_0 = (int16_t)this->noise; | ||
1027 | int amp_1; | ||
1028 | |||
1029 | if ( (this->r.g.noise_enables & vbit) == 0 ) | ||
1030 | { | ||
1031 | uint32_t f = voice->position; | ||
1032 | int32_t y0; | ||
1033 | |||
1034 | /** | ||
1035 | * Formula (fastest found so far of MANY): | ||
1036 | * output = y0 + f*y1 - f*y0 | ||
1037 | */ | ||
1038 | asm volatile ( | ||
1039 | /* separate fractional and whole parts */ | ||
1040 | "move.l %[f], %[y1] \r\n" | ||
1041 | "and.l #0xfff, %[f] \r\n" | ||
1042 | "lsr.l %[sh], %[y1] \r\n" | ||
1043 | /* load samples y0 (upper) & y1 (lower) */ | ||
1044 | "move.l 2(%[s], %[y1].l*2), %[y1] \r\n" | ||
1045 | /* %acc0 = f*y1 */ | ||
1046 | "mac.w %[f]l, %[y1]l, %%acc0 \r\n" | ||
1047 | /* %acc0 -= f*y0 */ | ||
1048 | "msac.w %[f]l, %[y1]u, %%acc0 \r\n" | ||
1049 | /* separate out y0 and sign extend */ | ||
1050 | "swap %[y1] \r\n" | ||
1051 | "movea.w %[y1], %[y0] \r\n" | ||
1052 | /* fetch result, scale down and add y0 */ | ||
1053 | "movclr.l %%acc0, %[y1] \r\n" | ||
1054 | /* output = y0 + (result >> 12) */ | ||
1055 | "asr.l %[sh], %[y1] \r\n" | ||
1056 | "add.l %[y0], %[y1] \r\n" | ||
1057 | : [f]"+d"(f), [y0]"=&a"(y0), [y1]"=&d"(amp_0) | ||
1058 | : [s]"a"(voice->samples), [sh]"d"(12)); | ||
1059 | } | ||
1060 | 840 | ||
1061 | /* apply voice envelope to output */ | 841 | /* Apply envelope and volume */ |
1062 | asm volatile ( | 842 | output = apply_amp( voice, output, &_0, &_1 ); |
1063 | "mac.w %[out]l, %[envx]l, %%acc0 \r\n" | ||
1064 | : | ||
1065 | : [out]"r"(amp_0), [envx]"r"(voice->envx)); | ||
1066 | |||
1067 | /* advance voice position */ | ||
1068 | voice->position += rate; | ||
1069 | |||
1070 | /* fetch output, scale and apply left and right | ||
1071 | voice volume */ | ||
1072 | asm volatile ( | ||
1073 | "movclr.l %%acc0, %[out] \r\n" | ||
1074 | "asr.l %[sh], %[out] \r\n" | ||
1075 | "mac.l %[vvol_0], %[out], %%acc0 \r\n" | ||
1076 | "mac.l %[vvol_1], %[out], %%acc1 \r\n" | ||
1077 | : [out]"=&d"(amp_0) | ||
1078 | : [vvol_0]"r"((int)voice->volume[0]), | ||
1079 | [vvol_1]"r"((int)voice->volume[1]), | ||
1080 | [sh]"d"(11)); | ||
1081 | |||
1082 | /* save this output into previous, scale and save in | ||
1083 | output register */ | ||
1084 | prev_outx = amp_0; | ||
1085 | raw_voice->outx = amp_0 >> 8; | ||
1086 | |||
1087 | /* fetch final voice output */ | ||
1088 | asm volatile ( | ||
1089 | "movclr.l %%acc0, %[amp_0] \r\n" | ||
1090 | "movclr.l %%acc1, %[amp_1] \r\n" | ||
1091 | : [amp_0]"=r"(amp_0), [amp_1]"=r"(amp_1)); | ||
1092 | #elif defined (CPU_ARM) | ||
1093 | int amp_0, amp_1; | ||
1094 | |||
1095 | if ( (this->r.g.noise_enables & vbit) != 0 ) | ||
1096 | { | ||
1097 | amp_0 = *(int16_t *)&this->noise; | ||
1098 | } | ||
1099 | else | ||
1100 | { | ||
1101 | uint32_t f = voice->position; | ||
1102 | amp_0 = (uint32_t)voice->samples; | ||
1103 | |||
1104 | asm volatile( | ||
1105 | "mov %[y1], %[f], lsr #12 \r\n" | ||
1106 | "eor %[f], %[f], %[y1], lsl #12 \r\n" | ||
1107 | "add %[y1], %[y0], %[y1], lsl #1 \r\n" | ||
1108 | "ldrsh %[y0], [%[y1], #2] \r\n" | ||
1109 | "ldrsh %[y1], [%[y1], #4] \r\n" | ||
1110 | "sub %[y1], %[y1], %[y0] \r\n" | ||
1111 | "mul %[f], %[y1], %[f] \r\n" | ||
1112 | "add %[y0], %[y0], %[f], asr #12 \r\n" | ||
1113 | : [f]"+r"(f), [y0]"+r"(amp_0), [y1]"=&r"(amp_1)); | ||
1114 | } | ||
1115 | |||
1116 | voice->position += rate; | ||
1117 | |||
1118 | asm volatile( | ||
1119 | "mul %[amp_1], %[amp_0], %[envx] \r\n" | ||
1120 | "mov %[amp_0], %[amp_1], asr #11 \r\n" | ||
1121 | "mov %[amp_1], %[amp_0], asr #8 \r\n" | ||
1122 | : [amp_0]"+r"(amp_0), [amp_1]"=r"(amp_1) | ||
1123 | : [envx]"r"(voice->envx)); | ||
1124 | |||
1125 | prev_outx = amp_0; | ||
1126 | raw_voice->outx = (int8_t)amp_1; | ||
1127 | |||
1128 | asm volatile( | ||
1129 | "mul %[amp_1], %[amp_0], %[vol_1] \r\n" | ||
1130 | "mul %[amp_0], %[vol_0], %[amp_0] \r\n" | ||
1131 | : [amp_0]"+r"(amp_0), [amp_1]"=&r"(amp_1) | ||
1132 | : [vol_0]"r"((int)voice->volume[0]), | ||
1133 | [vol_1]"r"((int)voice->volume[1])); | ||
1134 | #else /* Unoptimized CPU */ | ||
1135 | int output; | ||
1136 | |||
1137 | if ( (this->r.g.noise_enables & vbit) == 0 ) | ||
1138 | { | ||
1139 | int const fraction = voice->position & 0xfff; | ||
1140 | short const* const pos = (voice->samples + (voice->position >> 12)) + 1; | ||
1141 | output = pos[0] + ((fraction * (pos[1] - pos[0])) >> 12); | ||
1142 | } else { | ||
1143 | output = *(int16_t *)&this->noise; | ||
1144 | } | 843 | } |
1145 | 844 | ||
1146 | voice->position += rate; | ||
1147 | |||
1148 | output = (output * voice->envx) >> 11; | ||
1149 | |||
1150 | /* duplicated here to give compiler more to run in parallel */ | ||
1151 | int amp_0 = voice->volume [0] * output; | ||
1152 | int amp_1 = voice->volume [1] * output; | ||
1153 | |||
1154 | prev_outx = output; | 845 | prev_outx = output; |
1155 | raw_voice->outx = (int8_t) (output >> 8); | 846 | raw_voice->outx = output >> 8; |
1156 | #endif /* CPU_* */ | ||
1157 | #endif /* SPCNOINTERP */ | ||
1158 | 847 | ||
1159 | #if SPC_BRRCACHE | ||
1160 | if ( voice->position >= voice->wave_end ) | ||
1161 | { | ||
1162 | long loop_len = voice->wave_loop << 12; | ||
1163 | voice->position -= loop_len; | ||
1164 | this->r.g.wave_ended |= vbit; | ||
1165 | if ( !loop_len ) | ||
1166 | { | ||
1167 | this->keys_down ^= vbit; | ||
1168 | raw_voice->envx = 0; | ||
1169 | voice->envx = 0; | ||
1170 | } | ||
1171 | } | ||
1172 | #endif | ||
1173 | #if 0 | ||
1174 | EXIT_TIMER(dsp_gen); | 848 | EXIT_TIMER(dsp_gen); |
1175 | 849 | ||
1176 | ENTER_TIMER(dsp_mix); | 850 | ENTER_TIMER(dsp_mix); |
1177 | #endif | 851 | |
1178 | chans_0 += amp_0; | 852 | chans_0 += amp_0; |
1179 | chans_1 += amp_1; | 853 | chans_1 += amp_1; |
1180 | #if !SPC_NOECHO | 854 | #if !SPC_NOECHO |
1181 | if ( this->r.g.echo_ons & vbit ) | 855 | if ( this->r.g.echo_ons & vbit ) |
1182 | { | 856 | { |
1183 | echo_0 += amp_0; | 857 | echo_0 += amp_0; |
1184 | echo_1 += amp_1; | 858 | echo_1 += amp_1; |
1185 | } | 859 | } |
1186 | #endif | 860 | #endif /* !SPC_NOECHO */ |
1187 | #if 0 | 861 | |
1188 | EXIT_TIMER(dsp_mix); | 862 | EXIT_TIMER(dsp_mix); |
1189 | #endif | ||
1190 | } | 863 | } |
1191 | /* end of voice loop */ | 864 | /* end of voice loop */ |
1192 | 865 | ||
866 | /* Generate output */ | ||
867 | int amp_0, amp_1; | ||
1193 | #if !SPC_NOECHO | 868 | #if !SPC_NOECHO |
1194 | #ifdef CPU_COLDFIRE | ||
1195 | /* Read feedback from echo buffer */ | 869 | /* Read feedback from echo buffer */ |
1196 | int echo_pos = this->echo_pos; | 870 | int echo_pos = this->echo_pos; |
1197 | uint8_t* const echo_ptr = RAM + ((echo_start + echo_pos) & 0xFFFF); | 871 | uint8_t* const echo_ptr = RAM + ((echo_start + echo_pos) & 0xFFFF); |
1198 | echo_pos += 4; | ||
1199 | if ( echo_pos >= echo_wrap ) | ||
1200 | echo_pos = 0; | ||
1201 | this->echo_pos = echo_pos; | ||
1202 | int fb = swap_odd_even32(*(int32_t *)echo_ptr); | ||
1203 | int out_0, out_1; | ||
1204 | |||
1205 | /* Keep last 8 samples */ | ||
1206 | *this->last_fir_ptr = fb; | ||
1207 | this->last_fir_ptr = this->fir_ptr; | ||
1208 | |||
1209 | /* Apply echo FIR filter to output samples read from echo buffer - | ||
1210 | circular buffer is hardware incremented and masked; FIR | ||
1211 | coefficients and buffer history are loaded in parallel with | ||
1212 | multiply accumulate operations. Shift left by one here and once | ||
1213 | again when calculating feedback to have sample values justified | ||
1214 | to bit 31 in the output to ease endian swap, interleaving and | ||
1215 | clamping before placing result in the program's echo buffer. */ | ||
1216 | int _0, _1, _2; | ||
1217 | asm volatile ( | ||
1218 | "move.l (%[fir_c]) , %[_2] \r\n" | ||
1219 | "mac.w %[fb]u, %[_2]u, <<, (%[fir_p])+&, %[_0], %%acc0 \r\n" | ||
1220 | "mac.w %[fb]l, %[_2]u, <<, (%[fir_p])& , %[_1], %%acc1 \r\n" | ||
1221 | "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n" | ||
1222 | "mac.w %[_0]l, %[_2]l, <<, 4(%[fir_c]) , %[_2], %%acc1 \r\n" | ||
1223 | "mac.w %[_1]u, %[_2]u, <<, 4(%[fir_p])& , %[_0], %%acc0 \r\n" | ||
1224 | "mac.w %[_1]l, %[_2]u, <<, 8(%[fir_p])& , %[_1], %%acc1 \r\n" | ||
1225 | "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n" | ||
1226 | "mac.w %[_0]l, %[_2]l, <<, 8(%[fir_c]) , %[_2], %%acc1 \r\n" | ||
1227 | "mac.w %[_1]u, %[_2]u, <<, 12(%[fir_p])& , %[_0], %%acc0 \r\n" | ||
1228 | "mac.w %[_1]l, %[_2]u, <<, 16(%[fir_p])& , %[_1], %%acc1 \r\n" | ||
1229 | "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n" | ||
1230 | "mac.w %[_0]l, %[_2]l, <<, 12(%[fir_c]) , %[_2], %%acc1 \r\n" | ||
1231 | "mac.w %[_1]u, %[_2]u, <<, 20(%[fir_p])& , %[_0], %%acc0 \r\n" | ||
1232 | "mac.w %[_1]l, %[_2]u, << , %%acc1 \r\n" | ||
1233 | "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n" | ||
1234 | "mac.w %[_0]l, %[_2]l, << , %%acc1 \r\n" | ||
1235 | : [_0]"=&r"(_0), [_1]"=&r"(_1), [_2]"=&r"(_2), | ||
1236 | [fir_p]"+a"(this->fir_ptr) | ||
1237 | : [fir_c]"a"(this->fir_coeff), [fb]"r"(fb) | ||
1238 | ); | ||
1239 | |||
1240 | /* Generate output */ | ||
1241 | asm volatile ( | ||
1242 | /* fetch filter results _after_ gcc loads asm | ||
1243 | block parameters to eliminate emac stalls */ | ||
1244 | "movclr.l %%acc0, %[out_0] \r\n" | ||
1245 | "movclr.l %%acc1, %[out_1] \r\n" | ||
1246 | /* apply global volume */ | ||
1247 | "mac.l %[chans_0], %[gv_0] , %%acc2 \r\n" | ||
1248 | "mac.l %[chans_1], %[gv_1] , %%acc3 \r\n" | ||
1249 | /* apply echo volume and add to final output */ | ||
1250 | "mac.l %[ev_0], %[out_0], >>, %%acc2 \r\n" | ||
1251 | "mac.l %[ev_1], %[out_1], >>, %%acc3 \r\n" | ||
1252 | : [out_0]"=&r"(out_0), [out_1]"=&r"(out_1) | ||
1253 | : [chans_0]"r"(chans_0), [gv_0]"r"(global_vol_0), | ||
1254 | [ev_0]"r"((int)this->r.g.echo_volume_0), | ||
1255 | [chans_1]"r"(chans_1), [gv_1]"r"(global_vol_1), | ||
1256 | [ev_1]"r"((int)this->r.g.echo_volume_1) | ||
1257 | ); | ||
1258 | |||
1259 | /* Feedback into echo buffer */ | ||
1260 | if ( !(this->r.g.flags & 0x20) ) | ||
1261 | { | ||
1262 | int sh = 1 << 9; | ||
1263 | |||
1264 | asm volatile ( | ||
1265 | /* scale echo voices; saturate if overflow */ | ||
1266 | "mac.l %[sh], %[e1] , %%acc1 \r\n" | ||
1267 | "mac.l %[sh], %[e0] , %%acc0 \r\n" | ||
1268 | /* add scaled output from FIR filter */ | ||
1269 | "mac.l %[out_1], %[ef], <<, %%acc1 \r\n" | ||
1270 | "mac.l %[out_0], %[ef], <<, %%acc0 \r\n" | ||
1271 | /* swap and fetch feedback results - simply | ||
1272 | swap_odd_even32 mixed in between macs and | ||
1273 | movclrs to mitigate stall issues */ | ||
1274 | "move.l #0x00ff00ff, %[sh] \r\n" | ||
1275 | "movclr.l %%acc1, %[e1] \r\n" | ||
1276 | "swap %[e1] \r\n" | ||
1277 | "movclr.l %%acc0, %[e0] \r\n" | ||
1278 | "move.w %[e1], %[e0] \r\n" | ||
1279 | "and.l %[e0], %[sh] \r\n" | ||
1280 | "eor.l %[sh], %[e0] \r\n" | ||
1281 | "lsl.l #8, %[sh] \r\n" | ||
1282 | "lsr.l #8, %[e0] \r\n" | ||
1283 | "or.l %[sh], %[e0] \r\n" | ||
1284 | /* save final feedback into echo buffer */ | ||
1285 | "move.l %[e0], (%[echo_ptr]) \r\n" | ||
1286 | : [e0]"+d"(echo_0), [e1]"+d"(echo_1), [sh]"+d"(sh) | ||
1287 | : [out_0]"r"(out_0), [out_1]"r"(out_1), | ||
1288 | [ef]"r"((int)this->r.g.echo_feedback), | ||
1289 | [echo_ptr]"a"((int32_t *)echo_ptr) | ||
1290 | ); | ||
1291 | } | ||
1292 | 872 | ||
1293 | /* Output final samples */ | ||
1294 | asm volatile ( | ||
1295 | /* fetch output saved in %acc2 and %acc3 */ | ||
1296 | "movclr.l %%acc2, %[out_0] \r\n" | ||
1297 | "movclr.l %%acc3, %[out_1] \r\n" | ||
1298 | /* scale right by global_muting shift */ | ||
1299 | "asr.l %[gm], %[out_0] \r\n" | ||
1300 | "asr.l %[gm], %[out_1] \r\n" | ||
1301 | : [out_0]"=&d"(out_0), [out_1]"=&d"(out_1) | ||
1302 | : [gm]"d"(global_muting) | ||
1303 | ); | ||
1304 | |||
1305 | out_buf [ 0] = out_0; | ||
1306 | out_buf [WAV_CHUNK_SIZE] = out_1; | ||
1307 | out_buf ++; | ||
1308 | #elif defined (CPU_ARM) | ||
1309 | /* Read feedback from echo buffer */ | ||
1310 | int echo_pos = this->echo_pos; | ||
1311 | uint8_t* const echo_ptr = RAM + | ||
1312 | ((this->r.g.echo_page * 0x100 + echo_pos) & 0xFFFF); | ||
1313 | echo_pos += 4; | 873 | echo_pos += 4; |
1314 | if ( echo_pos >= (this->r.g.echo_delay & 15) * 0x800 ) | 874 | |
875 | if ( echo_pos >= echo_delay ) | ||
1315 | echo_pos = 0; | 876 | echo_pos = 0; |
1316 | this->echo_pos = echo_pos; | ||
1317 | 877 | ||
1318 | #if ARM_ARCH >= 6 | 878 | this->echo_pos = echo_pos; |
1319 | int32_t *fir_ptr, *fir_coeff; | ||
1320 | int fb_0, fb_1; | ||
1321 | 879 | ||
1322 | /* Apply FIR */ | 880 | /* Apply FIR */ |
1323 | 881 | int fb_0, fb_1; | |
1324 | /* Keep last 8 samples */ | 882 | echo_apply( this, echo_ptr, &fb_0, &fb_1 ); |
1325 | asm volatile ( | ||
1326 | "ldr %[fb_0], [%[echo_p]] \r\n" | ||
1327 | "add %[fir_p], %[t_fir_p], #4 \r\n" | ||
1328 | "bic %[t_fir_p], %[fir_p], %[mask] \r\n" | ||
1329 | "str %[fb_0], [%[fir_p], #-4] \r\n" | ||
1330 | /* duplicate at +8 eliminates wrap checking below */ | ||
1331 | "str %[fb_0], [%[fir_p], #28] \r\n" | ||
1332 | : [fir_p]"=&r"(fir_ptr), [t_fir_p]"+r"(this->fir_ptr), | ||
1333 | [fb_0]"=&r"(fb_0) | ||
1334 | : [echo_p]"r"(echo_ptr), [mask]"i"(~FIR_BUF_MASK)); | ||
1335 | |||
1336 | fir_coeff = (int32_t *)this->fir_coeff; | ||
1337 | |||
1338 | /* Fugly, but the best version found. */ | ||
1339 | int _0; | ||
1340 | asm volatile ( /* L0R0 = acc0 */ | ||
1341 | "ldmia %[fir_p]!, { r2-r5 } \r\n" /* L1R1-L4R4 = r2-r5 */ | ||
1342 | "ldmia %[fir_c]!, { r0-r1 } \r\n" /* C0C1-C2C3 = r0-r1 */ | ||
1343 | "pkhbt %[_0], %[acc0], r2, asl #16 \r\n" /* L0R0,L1R1->L0L1,R0R1 */ | ||
1344 | "pkhtb r2, r2, %[acc0], asr #16 \r\n" | ||
1345 | "smuad %[acc0], %[_0], r0 \r\n" /* acc0=L0*C0+L1*C1 */ | ||
1346 | "smuad %[acc1], r2, r0 \r\n" /* acc1=R0*C0+R1*C1 */ | ||
1347 | "pkhbt %[_0], r3, r4, asl #16 \r\n" /* L2R2,L3R3->L2L3,R2R3 */ | ||
1348 | "pkhtb r4, r4, r3, asr #16 \r\n" | ||
1349 | "smlad %[acc0], %[_0], r1, %[acc0] \r\n" /* acc0+=L2*C2+L3*C3 */ | ||
1350 | "smlad %[acc1], r4, r1, %[acc1] \r\n" /* acc1+=R2*C2+R3*C3 */ | ||
1351 | "ldmia %[fir_p], { r2-r4 } \r\n" /* L5R5-L7R7 = r2-r4 */ | ||
1352 | "ldmia %[fir_c], { r0-r1 } \r\n" /* C4C5-C6C7 = r0-r1 */ | ||
1353 | "pkhbt %[_0], r5, r2, asl #16 \r\n" /* L4R4,L5R5->L4L5,R4R5 */ | ||
1354 | "pkhtb r2, r2, r5, asr #16 \r\n" | ||
1355 | "smlad %[acc0], %[_0], r0, %[acc0] \r\n" /* acc0+=L4*C4+L5*C5 */ | ||
1356 | "smlad %[acc1], r2, r0, %[acc1] \r\n" /* acc1+=R4*C4+R5*C5 */ | ||
1357 | "pkhbt %[_0], r3, r4, asl #16 \r\n" /* L6R6,L7R7->L6L7,R6R7 */ | ||
1358 | "pkhtb r4, r4, r3, asr #16 \r\n" | ||
1359 | "smlad %[acc0], %[_0], r1, %[acc0] \r\n" /* acc0+=L6*C6+L7*C7 */ | ||
1360 | "smlad %[acc1], r4, r1, %[acc1] \r\n" /* acc1+=R6*C6+R7*C7 */ | ||
1361 | : [acc0]"+r"(fb_0), [acc1]"=&r"(fb_1), [_0]"=&r"(_0), | ||
1362 | [fir_p]"+r"(fir_ptr), [fir_c]"+r"(fir_coeff) | ||
1363 | : | ||
1364 | : "r0", "r1", "r2", "r3", "r4", "r5"); | ||
1365 | |||
1366 | /* Generate output */ | ||
1367 | int amp_0, amp_1; | ||
1368 | |||
1369 | asm volatile ( | ||
1370 | "mul %[amp_0], %[gvol_0], %[chans_0] \r\n" | ||
1371 | "mul %[amp_1], %[gvol_1], %[chans_1] \r\n" | ||
1372 | : [amp_0]"=&r"(amp_0), [amp_1]"=r"(amp_1) | ||
1373 | : [gvol_0]"r"(global_vol_0), [gvol_1]"r"(global_vol_1), | ||
1374 | [chans_0]"r"(chans_0), [chans_1]"r"(chans_1)); | ||
1375 | asm volatile ( | ||
1376 | "mla %[amp_0], %[fb_0], %[ev_0], %[amp_0] \r\n" | ||
1377 | "mla %[amp_1], %[fb_1], %[ev_1], %[amp_1] \r\n" | ||
1378 | : [amp_0]"+r"(amp_0), [amp_1]"+r"(amp_1) | ||
1379 | : [fb_0]"r"(fb_0), [fb_1]"r"(fb_1), | ||
1380 | [ev_0]"r"((int)this->r.g.echo_volume_0), | ||
1381 | [ev_1]"r"((int)this->r.g.echo_volume_1)); | ||
1382 | |||
1383 | out_buf [ 0] = amp_0 >> global_muting; | ||
1384 | out_buf [WAV_CHUNK_SIZE] = amp_1 >> global_muting; | ||
1385 | out_buf ++; | ||
1386 | 883 | ||
1387 | if ( !(this->r.g.flags & 0x20) ) | 884 | if ( !(this->r.g.flags & 0x20) ) |
1388 | { | 885 | { |
1389 | /* Feedback into echo buffer */ | 886 | /* Feedback into echo buffer */ |
1390 | int e0, e1; | 887 | echo_feedback( this, echo_ptr, echo_0, echo_1, fb_0, fb_1 ); |
1391 | |||
1392 | asm volatile ( | ||
1393 | "mov %[e0], %[echo_0], asl #7 \r\n" | ||
1394 | "mov %[e1], %[echo_1], asl #7 \r\n" | ||
1395 | "mla %[e0], %[fb_0], %[efb], %[e0] \r\n" | ||
1396 | "mla %[e1], %[fb_1], %[efb], %[e1] \r\n" | ||
1397 | : [e0]"=&r"(e0), [e1]"=&r"(e1) | ||
1398 | : [echo_0]"r"(echo_0), [echo_1]"r"(echo_1), | ||
1399 | [fb_0]"r"(fb_0), [fb_1]"r"(fb_1), | ||
1400 | [efb]"r"((int)this->r.g.echo_feedback)); | ||
1401 | asm volatile ( | ||
1402 | "ssat %[e0], #16, %[e0], asr #14 \r\n" | ||
1403 | "ssat %[e1], #16, %[e1], asr #14 \r\n" | ||
1404 | "pkhbt %[e0], %[e0], %[e1], lsl #16 \r\n" | ||
1405 | "str %[e0], [%[echo_p]] \r\n" | ||
1406 | : [e0]"+r"(e0), [e1]"+r"(e1) | ||
1407 | : [echo_p]"r"(echo_ptr)); | ||
1408 | } | 888 | } |
1409 | #else /* ARM_ARCH < 6 */ | 889 | #endif /* !SPC_NOECHO */ |
1410 | int fb_0 = GET_LE16SA( echo_ptr ); | ||
1411 | int fb_1 = GET_LE16SA( echo_ptr + 2 ); | ||
1412 | int32_t *fir_ptr, *fir_coeff; | ||
1413 | |||
1414 | /* Keep last 8 samples */ | ||
1415 | |||
1416 | /* Apply FIR */ | ||
1417 | asm volatile ( | ||
1418 | "add %[fir_p], %[t_fir_p], #8 \r\n" | ||
1419 | "bic %[t_fir_p], %[fir_p], %[mask] \r\n" | ||
1420 | "str %[fb_0], [%[fir_p], #-8] \r\n" | ||
1421 | "str %[fb_1], [%[fir_p], #-4] \r\n" | ||
1422 | /* duplicate at +8 eliminates wrap checking below */ | ||
1423 | "str %[fb_0], [%[fir_p], #56] \r\n" | ||
1424 | "str %[fb_1], [%[fir_p], #60] \r\n" | ||
1425 | : [fir_p]"=&r"(fir_ptr), [t_fir_p]"+r"(this->fir_ptr) | ||
1426 | : [fb_0]"r"(fb_0), [fb_1]"r"(fb_1), [mask]"i"(~FIR_BUF_MASK)); | ||
1427 | |||
1428 | fir_coeff = this->fir_coeff; | ||
1429 | |||
1430 | asm volatile ( | ||
1431 | "ldmia %[fir_c]!, { r0-r1 } \r\n" | ||
1432 | "ldmia %[fir_p]!, { r4-r5 } \r\n" | ||
1433 | "mul %[fb_0], r0, %[fb_0] \r\n" | ||
1434 | "mul %[fb_1], r0, %[fb_1] \r\n" | ||
1435 | "mla %[fb_0], r4, r1, %[fb_0] \r\n" | ||
1436 | "mla %[fb_1], r5, r1, %[fb_1] \r\n" | ||
1437 | "ldmia %[fir_c]!, { r0-r1 } \r\n" | ||
1438 | "ldmia %[fir_p]!, { r2-r5 } \r\n" | ||
1439 | "mla %[fb_0], r2, r0, %[fb_0] \r\n" | ||
1440 | "mla %[fb_1], r3, r0, %[fb_1] \r\n" | ||
1441 | "mla %[fb_0], r4, r1, %[fb_0] \r\n" | ||
1442 | "mla %[fb_1], r5, r1, %[fb_1] \r\n" | ||
1443 | "ldmia %[fir_c]!, { r0-r1 } \r\n" | ||
1444 | "ldmia %[fir_p]!, { r2-r5 } \r\n" | ||
1445 | "mla %[fb_0], r2, r0, %[fb_0] \r\n" | ||
1446 | "mla %[fb_1], r3, r0, %[fb_1] \r\n" | ||
1447 | "mla %[fb_0], r4, r1, %[fb_0] \r\n" | ||
1448 | "mla %[fb_1], r5, r1, %[fb_1] \r\n" | ||
1449 | "ldmia %[fir_c]!, { r0-r1 } \r\n" | ||
1450 | "ldmia %[fir_p]!, { r2-r5 } \r\n" | ||
1451 | "mla %[fb_0], r2, r0, %[fb_0] \r\n" | ||
1452 | "mla %[fb_1], r3, r0, %[fb_1] \r\n" | ||
1453 | "mla %[fb_0], r4, r1, %[fb_0] \r\n" | ||
1454 | "mla %[fb_1], r5, r1, %[fb_1] \r\n" | ||
1455 | : [fb_0]"+r"(fb_0), [fb_1]"+r"(fb_1), | ||
1456 | [fir_p]"+r"(fir_ptr), [fir_c]"+r"(fir_coeff) | ||
1457 | : | ||
1458 | : "r0", "r1", "r2", "r3", "r4", "r5"); | ||
1459 | |||
1460 | /* Generate output */ | ||
1461 | int amp_0 = (chans_0 * global_vol_0 + fb_0 * this->r.g.echo_volume_0) | ||
1462 | >> global_muting; | ||
1463 | int amp_1 = (chans_1 * global_vol_1 + fb_1 * this->r.g.echo_volume_1) | ||
1464 | >> global_muting; | ||
1465 | |||
1466 | out_buf [ 0] = amp_0; | ||
1467 | out_buf [WAV_CHUNK_SIZE] = amp_1; | ||
1468 | out_buf ++; | ||
1469 | 890 | ||
1470 | if ( !(this->r.g.flags & 0x20) ) | 891 | mix_output( this, global_muting, global_vol_0, global_vol_1, |
1471 | { | 892 | chans_0, chans_1, fb_0, fb_1, &_0, &_1 ); |
1472 | /* Feedback into echo buffer */ | ||
1473 | int e0 = (echo_0 >> 7) + ((fb_0 * this->r.g.echo_feedback) >> 14); | ||
1474 | int e1 = (echo_1 >> 7) + ((fb_1 * this->r.g.echo_feedback) >> 14); | ||
1475 | e0 = CLAMP16( e0 ); | ||
1476 | SET_LE16A( echo_ptr , e0 ); | ||
1477 | e1 = CLAMP16( e1 ); | ||
1478 | SET_LE16A( echo_ptr + 2, e1 ); | ||
1479 | } | ||
1480 | #endif /* ARM_ARCH */ | ||
1481 | #else /* Unoptimized CPU */ | ||
1482 | /* Read feedback from echo buffer */ | ||
1483 | int echo_pos = this->echo_pos; | ||
1484 | uint8_t* const echo_ptr = RAM + | ||
1485 | ((this->r.g.echo_page * 0x100 + echo_pos) & 0xFFFF); | ||
1486 | echo_pos += 4; | ||
1487 | if ( echo_pos >= (this->r.g.echo_delay & 15) * 0x800 ) | ||
1488 | echo_pos = 0; | ||
1489 | this->echo_pos = echo_pos; | ||
1490 | int fb_0 = GET_LE16SA( echo_ptr ); | ||
1491 | int fb_1 = GET_LE16SA( echo_ptr + 2 ); | ||
1492 | |||
1493 | /* Keep last 8 samples */ | ||
1494 | int (* const fir_ptr) [2] = this->fir_buf + this->fir_pos; | ||
1495 | this->fir_pos = (this->fir_pos + 1) & (FIR_BUF_HALF - 1); | ||
1496 | fir_ptr [ 0] [0] = fb_0; | ||
1497 | fir_ptr [ 0] [1] = fb_1; | ||
1498 | /* duplicate at +8 eliminates wrap checking below */ | ||
1499 | fir_ptr [FIR_BUF_HALF] [0] = fb_0; | ||
1500 | fir_ptr [FIR_BUF_HALF] [1] = fb_1; | ||
1501 | |||
1502 | /* Apply FIR */ | ||
1503 | fb_0 *= this->fir_coeff [0]; | ||
1504 | fb_1 *= this->fir_coeff [0]; | ||
1505 | 893 | ||
1506 | #define DO_PT( i )\ | ||
1507 | fb_0 += fir_ptr [i] [0] * this->fir_coeff [i];\ | ||
1508 | fb_1 += fir_ptr [i] [1] * this->fir_coeff [i]; | ||
1509 | |||
1510 | DO_PT( 1 ) | ||
1511 | DO_PT( 2 ) | ||
1512 | DO_PT( 3 ) | ||
1513 | DO_PT( 4 ) | ||
1514 | DO_PT( 5 ) | ||
1515 | DO_PT( 6 ) | ||
1516 | DO_PT( 7 ) | ||
1517 | |||
1518 | /* Generate output */ | ||
1519 | int amp_0 = (chans_0 * global_vol_0 + fb_0 * this->r.g.echo_volume_0) | ||
1520 | >> global_muting; | ||
1521 | int amp_1 = (chans_1 * global_vol_1 + fb_1 * this->r.g.echo_volume_1) | ||
1522 | >> global_muting; | ||
1523 | out_buf [ 0] = amp_0; | ||
1524 | out_buf [WAV_CHUNK_SIZE] = amp_1; | ||
1525 | out_buf ++; | ||
1526 | |||
1527 | if ( !(this->r.g.flags & 0x20) ) | ||
1528 | { | ||
1529 | /* Feedback into echo buffer */ | ||
1530 | int e0 = (echo_0 >> 7) + ((fb_0 * this->r.g.echo_feedback) >> 14); | ||
1531 | int e1 = (echo_1 >> 7) + ((fb_1 * this->r.g.echo_feedback) >> 14); | ||
1532 | e0 = CLAMP16( e0 ); | ||
1533 | SET_LE16A( echo_ptr , e0 ); | ||
1534 | e1 = CLAMP16( e1 ); | ||
1535 | SET_LE16A( echo_ptr + 2, e1 ); | ||
1536 | } | ||
1537 | #endif /* CPU_* */ | ||
1538 | #else /* SPCNOECHO == 1*/ | ||
1539 | /* Generate output */ | ||
1540 | int amp_0 = (chans_0 * global_vol_0) >> global_muting; | ||
1541 | int amp_1 = (chans_1 * global_vol_1) >> global_muting; | ||
1542 | out_buf [ 0] = amp_0; | 894 | out_buf [ 0] = amp_0; |
1543 | out_buf [WAV_CHUNK_SIZE] = amp_1; | 895 | out_buf [WAV_CHUNK_SIZE] = amp_1; |
1544 | out_buf ++; | 896 | out_buf ++; |
1545 | #endif /* SPCNOECHO */ | ||
1546 | } | 897 | } |
1547 | while ( --count ); | 898 | while ( --count ); |
1548 | #if 0 | 899 | |
1549 | EXIT_TIMER(dsp); | 900 | EXIT_TIMER(dsp); |
1550 | ENTER_TIMER(cpu); | 901 | ENTER_TIMER(cpu); |
1551 | #endif | ||
1552 | } | 902 | } |
1553 | 903 | ||
1554 | void DSP_reset( struct Spc_Dsp* this ) | 904 | void DSP_reset( struct Spc_Dsp* this ) |
@@ -1563,31 +913,22 @@ void DSP_reset( struct Spc_Dsp* this ) | |||
1563 | 913 | ||
1564 | ci->memset( this->voice_state, 0, sizeof this->voice_state ); | 914 | ci->memset( this->voice_state, 0, sizeof this->voice_state ); |
1565 | 915 | ||
1566 | int i; | 916 | for ( int i = VOICE_COUNT; --i >= 0; ) |
1567 | for ( i = VOICE_COUNT; --i >= 0; ) | ||
1568 | { | 917 | { |
1569 | struct voice_t* v = this->voice_state + i; | 918 | struct voice_t* v = this->voice_state + i; |
1570 | v->env_mode = state_release; | 919 | v->env_mode = state_release; |
1571 | v->addr = ram.ram; | 920 | v->wave.addr = ram.ram; |
1572 | } | 921 | } |
1573 | 922 | ||
1574 | #if SPC_BRRCACHE | 923 | #if SPC_BRRCACHE |
1575 | this->oldsize = 0; | 924 | this->oldsize = 0; |
1576 | for ( i = 0; i < 256; i++ ) | 925 | for ( int i = 0; i < 256; i++ ) |
1577 | this->wave_entry [i].start_addr = -1; | 926 | this->wave_entry [i].start_addr = -1; |
1578 | #endif | 927 | #endif /* SPC_BRRCACHE */ |
1579 | 928 | ||
1580 | #if defined(CPU_COLDFIRE) | 929 | #if !SPC_NOECHO |
1581 | this->fir_ptr = fir_buf; | 930 | echo_init(this); |
1582 | this->last_fir_ptr = &fir_buf [7]; | 931 | #endif /* SPC_NOECHO */ |
1583 | ci->memset( fir_buf, 0, sizeof fir_buf ); | ||
1584 | #elif defined (CPU_ARM) | ||
1585 | this->fir_ptr = fir_buf; | ||
1586 | ci->memset( fir_buf, 0, sizeof fir_buf ); | ||
1587 | #else | ||
1588 | this->fir_pos = 0; | ||
1589 | ci->memset( this->fir_buf, 0, sizeof this->fir_buf ); | ||
1590 | #endif | ||
1591 | 932 | ||
1592 | assert( offsetof (struct globals_t,unused9 [2]) == REGISTER_COUNT ); | 933 | assert( offsetof (struct globals_t,unused9 [2]) == REGISTER_COUNT ); |
1593 | assert( sizeof (this->r.voice) == REGISTER_COUNT ); | 934 | assert( sizeof (this->r.voice) == REGISTER_COUNT ); |