summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Sevakis <jethead71@rockbox.org>2013-05-18 01:45:03 -0400
committerMichael Sevakis <jethead71@rockbox.org>2013-05-21 00:02:14 -0400
commit87021f7c0ac4620eafd185ff11905ee643f72b6c (patch)
tree03ae48f3d999cd8743af40cc5df933f64f6df2d2
parenta17d6de5bc727b0bb55764ecef2605ae689e8dab (diff)
downloadrockbox-87021f7c0ac4620eafd185ff11905ee643f72b6c.tar.gz
rockbox-87021f7c0ac4620eafd185ff11905ee643f72b6c.zip
SPC Codec: Refactor for CPU and clean up some things.
CPU optimization gets its own files in which to fill-in optimizable routines. Some pointless #if 0's for profiling need removal. Those macros are empty if not profiling. Force some functions that are undesirable to be force-inlined by the compiler to be not inlined. Change-Id: Ia7b7e45380d7efb20c9b1a4d52e05db3ef6bbaab
-rw-r--r--lib/rbcodec/codecs/libspc/cpu/spc_dsp_armv4.c253
-rw-r--r--lib/rbcodec/codecs/libspc/cpu/spc_dsp_armv4.h45
-rw-r--r--lib/rbcodec/codecs/libspc/cpu/spc_dsp_armv6.c244
-rw-r--r--lib/rbcodec/codecs/libspc/cpu/spc_dsp_armv6.h45
-rw-r--r--lib/rbcodec/codecs/libspc/cpu/spc_dsp_coldfire.c198
-rw-r--r--lib/rbcodec/codecs/libspc/cpu/spc_dsp_coldfire.h45
-rw-r--r--lib/rbcodec/codecs/libspc/spc_codec.h147
-rw-r--r--lib/rbcodec/codecs/libspc/spc_cpu.c4
-rw-r--r--lib/rbcodec/codecs/libspc/spc_dsp.c1733
-rw-r--r--lib/rbcodec/codecs/libspc/spc_dsp_generic.c211
-rw-r--r--lib/rbcodec/codecs/libspc/spc_dsp_generic.h45
-rw-r--r--lib/rbcodec/codecs/libspc/spc_emu.c15
12 files changed, 1690 insertions, 1295 deletions
diff --git a/lib/rbcodec/codecs/libspc/cpu/spc_dsp_armv4.c b/lib/rbcodec/codecs/libspc/cpu/spc_dsp_armv4.c
new file mode 100644
index 0000000000..7eacc3baf9
--- /dev/null
+++ b/lib/rbcodec/codecs/libspc/cpu/spc_dsp_armv4.c
@@ -0,0 +1,253 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2007-2010 Michael Sevakis (jhMikeS)
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version 2
15 * of the License, or (at your option) any later version.
16 *
17 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
18 * KIND, either express or implied.
19 *
20 ****************************************************************************/
21#if !SPC_NOINTERP
22
23#define SPC_GAUSSIAN_FAST_INTERP
24static inline int gaussian_fast_interp( int16_t const* samples,
25 int32_t position,
26 int16_t const* fwd,
27 int16_t const* rev )
28{
29 int output;
30 int t0, t1, t2, t3;
31
32 asm volatile (
33 "ldrsh %[t0], [%[samp]] \n"
34 "ldrsh %[t2], [%[fwd]] \n"
35 "ldrsh %[t1], [%[samp], #2] \n"
36 "ldrsh %[t3], [%[fwd], #2] \n"
37 "mul %[out], %[t0], %[t2] \n" /* out= fwd[0]*samp[0] */
38 "ldrsh %[t0], [%[samp], #4] \n"
39 "ldrsh %[t2], [%[rev], #2] \n"
40 "mla %[out], %[t1], %[t3], %[out] \n" /* out+=fwd[1]*samp[1] */
41 "ldrsh %[t1], [%[samp], #6] \n"
42 "ldrsh %[t3], [%[rev]] \n"
43 "mla %[out], %[t0], %[t2], %[out] \n" /* out+=rev[1]*samp[2] */
44 "mla %[out], %[t1], %[t3], %[out] \n" /* out+=rev[0]*samp[3] */
45 : [out]"=&r"(output),
46 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3)
47 : [fwd]"r"(fwd), [rev]"r"(rev),
48 [samp]"r"(samples + (position >> 12)));
49
50 return output;
51}
52
53#define SPC_GAUSSIAN_FAST_AMP
54static inline int gaussian_fast_amp( struct voice_t* voice, int output,
55 int* amp_0, int* amp_1 )
56{
57 int t0;
58
59 asm volatile (
60 "mov %[t0], %[out], asr #11 \n"
61 "mul %[out], %[t0], %[envx] \n"
62 : [out]"+r"(output), [t0]"=&r"(t0)
63 : [envx]"r"((int) voice->envx));
64
65 asm volatile (
66 "mov %[out], %[out], asr #11 \n"
67 "mul %[a0], %[out], %[v0] \n"
68 "mul %[a1], %[out], %[v1] \n"
69 : [out]"+r"(output),
70 [a0]"=&r"(*amp_0), [a1]"=r"(*amp_1)
71 : [v0]"r"((int) voice->volume [0]),
72 [v1]"r"((int) voice->volume [1]));
73
74 return output;
75}
76
77#define SPC_GAUSSIAN_SLOW_INTERP
78static inline int gaussian_slow_interp( int16_t const* samples,
79 int32_t position,
80 int16_t const* fwd,
81 int16_t const* rev )
82{
83 int output;
84 int t0, t1, t2, t3;
85
86 asm volatile (
87 "ldrsh %[t0], [%[samp]] \n"
88 "ldrsh %[t2], [%[fwd]] \n"
89 "ldrsh %[t1], [%[samp], #2] \n"
90 "ldrsh %[t3], [%[fwd], #2] \n"
91 "mul %[out], %[t2], %[t0] \n" /* fwd[0]*samp[0] */
92 "ldrsh %[t2], [%[rev], #2] \n"
93 "mul %[t0], %[t3], %[t1] \n" /* fwd[1]*samp[1] */
94 "ldrsh %[t1], [%[samp], #4] \n"
95 "mov %[out], %[out], asr #12 \n"
96 "ldrsh %[t3], [%[rev]] \n"
97 "mul %[t2], %[t1], %[t2] \n" /* rev[1]*samp[2] */
98 "ldrsh %[t1], [%[samp], #6] \n"
99 "add %[t0], %[out], %[t0], asr #12 \n"
100 "mul %[t3], %[t1], %[t3] \n" /* rev[0]*samp[3] */
101 "add %[t2], %[t0], %[t2], asr #12 \n"
102 "mov %[t2], %[t2], lsl #17 \n"
103 "mov %[t3], %[t3], asr #12 \n"
104 "mov %[t3], %[t3], asl #1 \n"
105 "add %[out], %[t3], %[t2], asr #16 \n"
106 : [out]"=&r"(output),
107 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3)
108 : [fwd]"r"(fwd), [rev]"r"(rev),
109 [samp]"r"(samples + (position >> 12)));
110
111 return CLAMP16( output );
112}
113
114#define SPC_GAUSSIAN_SLOW_AMP
115static inline int gaussian_slow_amp( struct voice_t* voice, int output,
116 int* amp_0, int* amp_1 )
117{
118 int t0;
119
120 asm volatile (
121 "mul %[t0], %[out], %[envx]"
122 : [t0]"=r"(t0)
123 : [out]"r"(output), [envx]"r"((int) voice->envx));
124 asm volatile (
125 "mov %[t0], %[t0], asr #11 \n"
126 "bic %[t0], %[t0], #0x1 \n"
127 "mul %[a0], %[t0], %[v0] \n"
128 "mul %[a1], %[t0], %[v1] \n"
129 : [t0]"+r"(t0),
130 [a0]"=&r"(*amp_0), [a1]"=r"(*amp_1)
131 : [v0]"r"((int) voice->volume [0]),
132 [v1]"r"((int) voice->volume [1]));
133
134 return t0;
135}
136
137#else /* SPC_NOINTERP */
138
139#define SPC_LINEAR_INTERP
140static inline int linear_interp( int16_t const* samples, int32_t position )
141{
142 int output = (int) samples;
143 int y1;
144
145 asm volatile(
146 "mov %[y1], %[f], lsr #12 \n"
147 "eor %[f], %[f], %[y1], lsl #12 \n"
148 "add %[y1], %[y0], %[y1], lsl #1 \n"
149 "ldrsh %[y0], [%[y1], #2] \n"
150 "ldrsh %[y1], [%[y1], #4] \n"
151 "sub %[y1], %[y1], %[y0] \n"
152 "mul %[f], %[y1], %[f] \n"
153 "add %[y0], %[y0], %[f], asr #12 \n"
154 : [f]"+r"(position), [y0]"+r"(output), [y1]"=&r"(y1));
155
156 return output;
157}
158
159#define SPC_LINEAR_AMP
160static inline int linear_amp( struct voice_t* voice, int output,
161 int* amp_0, int* amp_1 )
162{
163 int t0;
164
165 asm volatile(
166 "mul %[t0], %[out], %[envx]"
167 : [t0]"=&r"(t0)
168 : [out]"r"(output), [envx]"r"(voice->envx));
169 asm volatile(
170 "mov %[t0], %[t0], asr #11 \n"
171 "mul %[a1], %[t0], %[v1] \n"
172 "mul %[a0], %[t0], %[v0] \n"
173 : [t0]"+r"(t0),
174 [a0]"=&r"(*amp_0), [a1]"=&r"(*amp_1)
175 : [v0]"r"((int) voice->volume [0]),
176 [v1]"r"((int) voice->volume [1]));
177
178 return t0;
179}
180
181#endif /* !SPC_NOINTERP */
182
183
184#if !SPC_NOECHO
185
186#define SPC_DSP_ECHO_APPLY
187
188/* Echo filter history */
189static int32_t fir_buf[FIR_BUF_CNT] IBSS_ATTR_SPC
190 __attribute__(( aligned(FIR_BUF_ALIGN*1) ));
191
192static inline void echo_init( struct Spc_Dsp* this )
193{
194 this->fir.ptr = fir_buf;
195 ci->memset( fir_buf, 0, sizeof fir_buf );
196}
197
198static inline void echo_apply( struct Spc_Dsp* this, uint8_t *echo_ptr,
199 int* out_0, int* out_1 )
200{
201 int t0 = GET_LE16SA( echo_ptr );
202 int t1 = GET_LE16SA( echo_ptr + 2 );
203
204 /* Keep last 8 samples */
205 int32_t *fir_ptr;
206 asm volatile (
207 "add %[p], %[t_p], #8 \n"
208 "bic %[t_p], %[p], %[mask] \n"
209 "str %[t0], [%[p], #-8] \n"
210 "str %[t1], [%[p], #-4] \n"
211 /* duplicate at +8 eliminates wrap checking below */
212 "str %[t0], [%[p], #56] \n"
213 "str %[t1], [%[p], #60] \n"
214 : [p]"=&r"(fir_ptr), [t_p]"+r"(this->fir.ptr)
215 : [t0]"r"(t0), [t1]"r"(t1), [mask]"i"(~FIR_BUF_MASK));
216
217 int32_t *fir_coeff = this->fir.coeff;
218
219 asm volatile (
220 "ldmia %[c]!, { r0-r1 } \n"
221 "ldmia %[p]!, { r4-r5 } \n"
222 "mul %[acc0], r0, %[acc0] \n"
223 "mul %[acc1], r0, %[acc1] \n"
224 "mla %[acc0], r4, r1, %[acc0] \n"
225 "mla %[acc1], r5, r1, %[acc1] \n"
226 "ldmia %[c]!, { r0-r1 } \n"
227 "ldmia %[p]!, { r2-r5 } \n"
228 "mla %[acc0], r2, r0, %[acc0] \n"
229 "mla %[acc1], r3, r0, %[acc1] \n"
230 "mla %[acc0], r4, r1, %[acc0] \n"
231 "mla %[acc1], r5, r1, %[acc1] \n"
232 "ldmia %[c]!, { r0-r1 } \n"
233 "ldmia %[p]!, { r2-r5 } \n"
234 "mla %[acc0], r2, r0, %[acc0] \n"
235 "mla %[acc1], r3, r0, %[acc1] \n"
236 "mla %[acc0], r4, r1, %[acc0] \n"
237 "mla %[acc1], r5, r1, %[acc1] \n"
238 "ldmia %[c]!, { r0-r1 } \n"
239 "ldmia %[p]!, { r2-r5 } \n"
240 "mla %[acc0], r2, r0, %[acc0] \n"
241 "mla %[acc1], r3, r0, %[acc1] \n"
242 "mla %[acc0], r4, r1, %[acc0] \n"
243 "mla %[acc1], r5, r1, %[acc1] \n"
244 : [acc0]"+r"(t0), [acc1]"+r"(t1),
245 [p]"+r"(fir_ptr), [c]"+r"(fir_coeff)
246 :
247 : "r0", "r1", "r2", "r3", "r4", "r5");
248
249 *out_0 = t0;
250 *out_1 = t1;
251}
252
253#endif /* SPC_NOECHO */
diff --git a/lib/rbcodec/codecs/libspc/cpu/spc_dsp_armv4.h b/lib/rbcodec/codecs/libspc/cpu/spc_dsp_armv4.h
new file mode 100644
index 0000000000..c9985e124a
--- /dev/null
+++ b/lib/rbcodec/codecs/libspc/cpu/spc_dsp_armv4.h
@@ -0,0 +1,45 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2007-2010 Michael Sevakis (jhMikeS)
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version 2
15 * of the License, or (at your option) any later version.
16 *
17 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
18 * KIND, either express or implied.
19 *
20 ****************************************************************************/
21#if !SPC_NOECHO
22
23#define SPC_DSP_ECHO_APPLY
24
25enum
26{
27 FIR_BUF_CNT = FIR_BUF_HALF * 2 * 2,
28 FIR_BUF_SIZE = FIR_BUF_CNT * sizeof ( int32_t ),
29 FIR_BUF_ALIGN = FIR_BUF_SIZE,
30 FIR_BUF_MASK = ~((FIR_BUF_ALIGN / 2) | (sizeof ( int32_t ) * 2 - 1))
31};
32
33/* Echo filter structure embedded in struct Spc_Dsp */
34struct echo_filter
35{
36 /* fir_buf [i + 8] == fir_buf [i], to avoid wrap checking in FIR code */
37 int32_t* ptr;
38 /* FIR history is interleaved with guard to eliminate wrap checking
39 * when convolving.
40 * |LL|RR|LL|RR|LL|RR|LL|RR|LL|RR|LL|RR|LL|RR|LL|RR|...
41 * |--|--|--|--|--|--|--|--|--|--|--|--|--|--|--|--| */
42 /* copy of echo FIR constants as int32_t, for faster access */
43 int32_t coeff [VOICE_COUNT];
44};
45#endif /* SPC_NOECHO */
diff --git a/lib/rbcodec/codecs/libspc/cpu/spc_dsp_armv6.c b/lib/rbcodec/codecs/libspc/cpu/spc_dsp_armv6.c
new file mode 100644
index 0000000000..2e3de87613
--- /dev/null
+++ b/lib/rbcodec/codecs/libspc/cpu/spc_dsp_armv6.c
@@ -0,0 +1,244 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2010 Michael Sevakis (jhMikeS)
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version 2
15 * of the License, or (at your option) any later version.
16 *
17 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
18 * KIND, either express or implied.
19 *
20 ****************************************************************************/
21#if !SPC_NOINTERP
22
23#define SPC_GAUSSIAN_FAST_INTERP
24static inline int gaussian_fast_interp( int16_t const* samples,
25 int32_t position,
26 int16_t const* fwd,
27 int16_t const* rev )
28{
29 int output;
30 int t0, t1, t2, t3;
31
32 asm volatile (
33 /* NOTE: often-unaligned accesses */
34 "ldr %[t0], [%[samp]] \n" /* t0=i0i1 */
35 "ldr %[t2], [%[fwd]] \n" /* t2=f0f1 */
36 "ldr %[t1], [%[samp], #4] \n" /* t1=i2i3 */
37 "ldr %[t3], [%[rev]] \n" /* t3=r0r1 */
38 "smuad %[out], %[t0], %[t2] \n" /* out=f0*i0+f1*i1 */
39 "smladx %[out], %[t1], %[t3], %[out] \n" /* out+=r1*i2+r0*i3 */
40 : [out]"=r"(output),
41 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=r"(t3)
42 : [fwd]"r"(fwd), [rev]"r"(rev),
43 [samp]"r"(samples + (position >> 12)));
44
45 return output;
46}
47
48#define SPC_GAUSSIAN_FAST_AMP
49static inline int gaussian_fast_amp( struct voice_t* voice, int output,
50 int* amp_0, int* amp_1 )
51{
52 int t0;
53
54 asm volatile (
55 "mov %[t0], %[out], asr #(11-5) \n" /* To do >> 16 below */
56 "mul %[out], %[t0], %[envx] \n"
57 : [out]"+r"(output), [t0]"=&r"(t0)
58 : [envx]"r"((int) voice->envx));
59
60 asm volatile (
61 "smulwb %[a0], %[out], %[v0] \n" /* amp * vol >> 16 */
62 "smulwb %[a1], %[out], %[v1] \n"
63 : [a0]"=&r"(*amp_0), [a1]"=r"(*amp_1)
64 : [out]"r"(output),
65 [v0]"r"(voice->volume [0]),
66 [v1]"r"(voice->volume [1]));
67
68 return output >> 5; /* 'output' still 5 bits too big */
69}
70
71#define SPC_GAUSSIAN_SLOW_INTERP
72static inline int gaussian_slow_interp( int16_t const* samples,
73 int32_t position,
74 int16_t const* fwd,
75 int16_t const* rev )
76{
77 int output;
78 int t0, t1, t2, t3;
79
80 asm volatile (
81 /* NOTE: often-unaligned accesses */
82 "ldr %[t0], [%[samp]] \n" /* t0=i0i1 */
83 "ldr %[t2], [%[fwd]] \n" /* t2=f0f1 */
84 "ldr %[t1], [%[samp], #4] \n" /* t1=i2i3 */
85 "ldr %[t3], [%[rev]] \n" /* t3=f2f3 */
86 "smulbb %[out], %[t0], %[t2] \n" /* out=f0*i0 */
87 "smultt %[t0], %[t0], %[t2] \n" /* t0=f1*i1 */
88 "smulbt %[t2], %[t1], %[t3] \n" /* t2=r1*i2 */
89 "smultb %[t3], %[t1], %[t3] \n" /* t3=r0*i3 */
90 : [out]"=r"(output),
91 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=r"(t3)
92 : [fwd]"r"(fwd), [rev]"r"(rev),
93 [samp]"r"(samples + (position >> 12)));
94
95 asm volatile (
96 "mov %[out], %[out], asr #12 \n"
97 "add %[t0], %[out], %[t0], asr #12 \n"
98 "add %[t2], %[t0], %[t2], asr #12 \n"
99 "pkhbt %[t0], %[t2], %[t3], asl #4 \n" /* t3[31:16], t2[15:0] */
100 "sadd16 %[t0], %[t0], %[t0] \n" /* t3[31:16]*2, t2[15:0]*2 */
101 "qsubaddx %[out], %[t0], %[t0] \n" /* out[15:0]=
102 * sat16(t3[31:16]+t2[15:0]) */
103 : [out]"+r"(output),
104 [t0]"+r"(t0), [t2]"+r"(t2), [t3]"+r"(t3));
105
106 /* output will be sign-extended in next step */
107 return output;
108}
109
110#define SPC_GAUSSIAN_SLOW_AMP
111static inline int gaussian_slow_amp( struct voice_t* voice, int output,
112 int* amp_0, int* amp_1 )
113{
114 asm volatile (
115 "smulbb %[out], %[out], %[envx]"
116 : [out]"+r"(output)
117 : [envx]"r"(voice->envx));
118
119 asm volatile (
120 "mov %[out], %[out], asr #11 \n"
121 "bic %[out], %[out], #0x1 \n"
122 "smulbb %[amp_0], %[out], %[v0] \n"
123 "smulbb %[amp_1], %[out], %[v1] \n"
124 : [out]"+r"(output),
125 [amp_0]"=&r"(*amp_0), [amp_1]"=r"(*amp_1)
126 : [v0]"r"(voice->volume[0]), [v1]"r"(voice->volume[1]));
127
128 return output;
129}
130
131#endif /* !SPC_NOINTERP */
132
133#if !SPC_NOECHO
134
135#define SPC_DSP_ECHO_APPLY
136
137/* Echo filter history */
138static int32_t fir_buf[FIR_BUF_CNT] IBSS_ATTR_SPC
139 __attribute__(( aligned(FIR_BUF_ALIGN*1) ));
140
141static inline void echo_init( struct Spc_Dsp* this )
142{
143 this->fir.ptr = fir_buf;
144 ci->memset( fir_buf, 0, sizeof fir_buf );
145}
146
147static inline void echo_apply(struct Spc_Dsp* this,
148 uint8_t* const echo_ptr, int* out_0, int* out_1)
149{
150 /* Keep last 8 samples */
151 int32_t* fir_ptr;
152 int t0;
153 asm volatile (
154 "ldr %[t0], [%[ep]] \n"
155 "add %[p], %[t_p], #4 \n"
156 "bic %[t_p], %[p], %[mask] \n"
157 "str %[t0], [%[p], #-4] \n"
158 /* duplicate at +8 eliminates wrap checking below */
159 "str %[t0], [%[p], #28] \n"
160 : [p]"=&r"(fir_ptr), [t_p]"+r"(this->fir.ptr),
161 [t0]"=&r"(t0)
162 : [ep]"r"(echo_ptr), [mask]"i"(~FIR_BUF_MASK));
163
164 int32_t* fir_coeff = (int32_t *)this->fir.coeff;
165
166 asm volatile ( /* L0R0 = acc0 */
167 "ldmia %[p]!, { r2-r5 } \n" /* L1R1-L4R4 = r2-r5 */
168 "ldmia %[c]!, { r0-r1 } \n" /* C0C1-C2C3 = r0-r1 */
169 "pkhbt %[acc0], %[t0], r2, asl #16 \n" /* L0R0,L1R1->L0L1,R0R1 */
170 "pkhtb r2, r2, %[t0], asr #16 \n"
171 "smuad %[acc0], %[acc0], r0 \n" /* acc0=L0*C0+L1*C1 */
172 "smuad %[acc1], r2, r0 \n" /* acc1=R0*C0+R1*C1 */
173 "pkhbt %[t0], r3, r4, asl #16 \n" /* L2R2,L3R3->L2L3,R2R3 */
174 "pkhtb r4, r4, r3, asr #16 \n"
175 "smlad %[acc0], %[t0], r1, %[acc0] \n" /* acc0+=L2*C2+L3*C3 */
176 "smlad %[acc1], r4, r1, %[acc1] \n" /* acc1+=R2*C2+R3*C3 */
177 "ldmia %[p], { r2-r4 } \n" /* L5R5-L7R7 = r2-r4 */
178 "ldmia %[c], { r0-r1 } \n" /* C4C5-C6C7 = r0-r1 */
179 "pkhbt %[t0], r5, r2, asl #16 \n" /* L4R4,L5R5->L4L5,R4R5 */
180 "pkhtb r2, r2, r5, asr #16 \n"
181 "smlad %[acc0], %[t0], r0, %[acc0] \n" /* acc0+=L4*C4+L5*C5 */
182 "smlad %[acc1], r2, r0, %[acc1] \n" /* acc1+=R4*C4+R5*C5 */
183 "pkhbt %[t0], r3, r4, asl #16 \n" /* L6R6,L7R7->L6L7,R6R7 */
184 "pkhtb r4, r4, r3, asr #16 \n"
185 "smlad %[acc0], %[t0], r1, %[acc0] \n" /* acc0+=L6*C6+L7*C7 */
186 "smlad %[acc1], r4, r1, %[acc1] \n" /* acc1+=R6*C6+R7*C7 */
187 : [t0]"+r"(t0), [acc0]"=&r"(*out_0), [acc1]"=&r"(*out_1),
188 [p]"+r"(fir_ptr), [c]"+r"(fir_coeff)
189 :
190 : "r0", "r1", "r2", "r3", "r4", "r5");
191}
192
193#define SPC_DSP_ECHO_FEEDBACK
194static inline void echo_feedback(struct Spc_Dsp* this, uint8_t* echo_ptr,
195 int echo_0, int echo_1, int fb_0, int fb_1)
196{
197 int e0, e1;
198 asm volatile (
199 "mov %[e0], %[ei0], asl #7 \n"
200 "mov %[e1], %[ei1], asl #7 \n"
201 "mla %[e0], %[fb0], %[ef], %[e0] \n"
202 "mla %[e1], %[fb1], %[ef], %[e1] \n"
203 : [e0]"=&r"(e0), [e1]"=&r"(e1)
204 : [ei0]"r"(echo_0), [ei1]"r"(echo_1),
205 [fb0]"r"(fb_0), [fb1]"r"(fb_1),
206 [ef]"r"((int)this->r.g.echo_feedback));
207 asm volatile (
208 "ssat %[e0], #16, %[e0], asr #14 \n"
209 "ssat %[e1], #16, %[e1], asr #14 \n"
210 "pkhbt %[e0], %[e0], %[e1], lsl #16 \n"
211 "str %[e0], [%[ep]] \n"
212 : [e0]"+r"(e0), [e1]"+r"(e1)
213 : [ep]"r"((int32_t *)echo_ptr));
214}
215
216#define SPC_DSP_GENERATE_OUTPUT
217static inline void echo_output( struct Spc_Dsp* this, int global_muting,
218 int global_vol_0, int global_vol_1, int chans_0, int chans_1,
219 int fb_0, int fb_1, int* out_0, int* out_1 )
220{
221 int t0, t1;
222
223 asm volatile (
224 "mul %[t0], %[gv0], %[ch0] \n"
225 "mul %[t1], %[gv1], %[ch1] \n"
226 : [t0]"=&r"(t0), [t1]"=r"(t1)
227 : [gv0]"r"(global_vol_0), [gv1]"r"(global_vol_1),
228 [ch0]"r"(chans_0), [ch1]"r"(chans_1));
229 asm volatile (
230 "mla %[t0], %[i0], %[ev0], %[t0] \n"
231 "mla %[t1], %[i1], %[ev1], %[t1] \n"
232 : [t0]"+r"(t0), [t1]"+r"(t1)
233 : [i0]"r"(fb_0), [i1]"r"(fb_1),
234 [ev0]"r"((int)this->r.g.echo_volume_0),
235 [ev1]"r"((int)this->r.g.echo_volume_1));
236 asm volatile (
237 "mov %[o0], %[t0], asr %[gm] \n"
238 "mov %[o1], %[t1], asr %[gm] \n"
239 : [o0]"=&r"(*out_0), [o1]"=r"(*out_1)
240 : [t0]"r"(t0), [t1]"r"(t1),
241 [gm]"r"(global_muting));
242}
243
244#endif /* SPC_NOECHO */
diff --git a/lib/rbcodec/codecs/libspc/cpu/spc_dsp_armv6.h b/lib/rbcodec/codecs/libspc/cpu/spc_dsp_armv6.h
new file mode 100644
index 0000000000..a36d8166c2
--- /dev/null
+++ b/lib/rbcodec/codecs/libspc/cpu/spc_dsp_armv6.h
@@ -0,0 +1,45 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2010 Michael Sevakis (jhMikeS)
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version 2
15 * of the License, or (at your option) any later version.
16 *
17 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
18 * KIND, either express or implied.
19 *
20 ****************************************************************************/
21#if !SPC_NOECHO
22
23#define SPC_DSP_ECHO_APPLY
24
25enum
26{
27 FIR_BUF_CNT = FIR_BUF_HALF * 2,
28 FIR_BUF_SIZE = FIR_BUF_CNT * sizeof ( int32_t ),
29 FIR_BUF_ALIGN = FIR_BUF_SIZE,
30 FIR_BUF_MASK = ~((FIR_BUF_ALIGN / 2) | (sizeof ( int32_t ) - 1))
31};
32
33/* Echo filter structure embedded in struct Spc_Dsp */
34struct echo_filter
35{ /* fir_buf [i + 8] == fir_buf [i], to avoid wrap checking in FIR code */
36 int32_t* ptr;
37 /* FIR history is interleaved with guard to eliminate wrap checking
38 * when convolving.
39 * |LR|LR|LR|LR|LR|LR|LR|LR|--|--|--|--|--|--|--|--| */
40 /* copy of echo FIR constants as int16_t, loaded as int32 for
41 * halfword, packed multiples */
42 int16_t coeff [VOICE_COUNT];
43};
44
45#endif /* SPC_NOECHO */
diff --git a/lib/rbcodec/codecs/libspc/cpu/spc_dsp_coldfire.c b/lib/rbcodec/codecs/libspc/cpu/spc_dsp_coldfire.c
new file mode 100644
index 0000000000..b0d14d157e
--- /dev/null
+++ b/lib/rbcodec/codecs/libspc/cpu/spc_dsp_coldfire.c
@@ -0,0 +1,198 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2007 Michael Sevakis (jhMikeS)
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version 2
15 * of the License, or (at your option) any later version.
16 *
17 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
18 * KIND, either express or implied.
19 *
20 ****************************************************************************/
21#if SPC_NOINTERP
22
23#define SPC_LINEAR_INTERP
24static inline int linear_interp( int16_t const* samples, int32_t position )
25{
26 uint32_t f = position;
27 int32_t y0, y1;
28
29 /**
30 * output = y0 + f*y1 - f*y0
31 */
32 asm volatile (
33 "move.l %[f], %[y1] \n" /* separate frac and whole */
34 "and.l #0xfff, %[f] \n"
35 "asr.l %[sh], %[y1] \n"
36 "move.l 2(%[s], %[y1].l*2), %[y1] \n" /* y0=upper, y1=lower */
37 "mac.w %[f]l, %[y1]l, %%acc0 \n" /* %acc0 = f*y1 */
38 "msac.w %[f]l, %[y1]u, %%acc0 \n" /* %acc0 -= f*y0 */
39 "swap %[y1] \n" /* separate out y0 and sign extend */
40 "movea.w %[y1], %[y0] \n"
41 "movclr.l %%acc0, %[y1] \n" /* fetch, scale down, add y0 */
42 "asr.l %[sh], %[y1] \n" /* output = y0 + (result >> 12) */
43 "add.l %[y0], %[y1] \n"
44 : [f]"+d"(f), [y0]"=&a"(y0), [y1]"=&d"(y1)
45 : [s]"a"(samples), [sh]"d"(12));
46
47 return y1;
48}
49
50#define SPC_LINEAR_AMP
51static inline int linear_amp( struct voice_t* voice, int output,
52 int* amp_0, int* amp_1 )
53{
54 asm volatile (
55 "mac.w %[out]l, %[envx]l, %%acc0"
56 :
57 : [out]"r"(output), [envx]"r"(voice->envx));
58 asm volatile (
59 "movclr.l %%acc0, %[out] \n"
60 "asr.l #8, %[out] \n"
61 "mac.l %[v0], %[out], %%acc0 \n"
62 "mac.l %[v1], %[out], %%acc1 \n"
63 "asr.l #3, %[out] \n"
64 : [out]"+r"(output)
65 : [v0]"r"((int) voice->volume [0]),
66 [v1]"r"((int) voice->volume [1]));
67 asm volatile (
68 "movclr.l %%acc0, %[a0] \n"
69 "asr.l #3, %[a0] \n"
70 "movclr.l %%acc1, %[a1] \n"
71 "asr.l #3, %[a1] \n"
72 : [a0]"=d"(*amp_0), [a1]"=d"(*amp_1));
73
74 return output;
75}
76
77#endif /* SPC_NOINTERP */
78
79
80#if !SPC_NOECHO
81
82#define SPC_DSP_ECHO_APPLY
83
84/* Echo filter history */
85static int32_t fir_buf[FIR_BUF_CNT] IBSS_ATTR_SPC
86 __attribute__(( aligned(FIR_BUF_ALIGN*1) ));
87
88static inline void echo_init( struct Spc_Dsp* this )
89{
90 /* Initialize mask register with the buffer address mask */
91 asm volatile ("move.l %0, %%mask" : : "i"(FIR_BUF_MASK));
92 this->fir.ptr = fir_buf;
93 this->fir.hist_ptr = &fir_buf [1];
94 ci->memset( fir_buf, 0, sizeof fir_buf );
95}
96
97static inline void echo_apply( struct Spc_Dsp* this, uint8_t* echo_ptr,
98 int* out_0, int* out_1 )
99{
100 int t0, t1, t2;
101
102 t1 = swap_odd_even32( *(int32_t *)echo_ptr );
103
104 /* Keep last 8 samples */
105 *this->fir.ptr = t1;
106 this->fir.ptr = this->fir.hist_ptr;
107
108 asm volatile (
109 "move.l (%[c]) , %[t2] \n"
110 "mac.w %[t1]u, %[t2]u, <<, (%[p])+&, %[t0], %%acc0 \n"
111 "mac.w %[t1]l, %[t2]u, <<, (%[p])& , %[t1], %%acc1 \n"
112 "mac.w %[t0]u, %[t2]l, << , %%acc0 \n"
113 "mac.w %[t0]l, %[t2]l, <<, 4(%[c]) , %[t2], %%acc1 \n"
114 "mac.w %[t1]u, %[t2]u, <<, 4(%[p])& , %[t0], %%acc0 \n"
115 "mac.w %[t1]l, %[t2]u, <<, 8(%[p])& , %[t1], %%acc1 \n"
116 "mac.w %[t0]u, %[t2]l, << , %%acc0 \n"
117 "mac.w %[t0]l, %[t2]l, <<, 8(%[c]) , %[t2], %%acc1 \n"
118 "mac.w %[t1]u, %[t2]u, <<, 12(%[p])& , %[t0], %%acc0 \n"
119 "mac.w %[t1]l, %[t2]u, <<, 16(%[p])& , %[t1], %%acc1 \n"
120 "mac.w %[t0]u, %[t2]l, << , %%acc0 \n"
121 "mac.w %[t0]l, %[t2]l, <<, 12(%[c]) , %[t2], %%acc1 \n"
122 "mac.w %[t1]u, %[t2]u, <<, 20(%[p])& , %[t0], %%acc0 \n"
123 "mac.w %[t1]l, %[t2]u, << , %%acc1 \n"
124 "mac.w %[t0]u, %[t2]l, << , %%acc0 \n"
125 "mac.w %[t0]l, %[t2]l, << , %%acc1 \n"
126 : [t0]"=&r"(t0), [t1]"+r"(t1), [t2]"=&r"(t2),
127 [p]"+a"(this->fir.hist_ptr)
128 : [c]"a"(this->fir.coeff));
129 asm volatile (
130 "movclr.l %%acc0, %[o0] \n"
131 "movclr.l %%acc1, %[o1] \n"
132 "mac.l %[ev0], %[o0], >>, %%acc2 \n" /* echo volume */
133 "mac.l %[ev1], %[o1], >>, %%acc3 \n"
134 : [o0]"=&r"(*out_0), [o1]"=&r"(*out_1)
135 : [ev0]"r"((int) this->r.g.echo_volume_0),
136 [ev1]"r"((int) this->r.g.echo_volume_1));
137}
138
139#define SPC_DSP_ECHO_FEEDBACK
140static inline void echo_feedback( struct Spc_Dsp* this, uint8_t* echo_ptr,
141 int echo_0, int echo_1, int fb_0, int fb_1 )
142{
143 asm volatile (
144 /* scale echo voices; saturate if overflow */
145 "mac.l %[sh], %[e1] , %%acc1 \n"
146 "mac.l %[sh], %[e0] , %%acc0 \n"
147 /* add scaled output from FIR filter */
148 "mac.l %[fb1], %[ef], <<, %%acc1 \n"
149 "mac.l %[fb0], %[ef], <<, %%acc0 \n"
150 :
151 : [e0]"d"(echo_0), [e1]"d"(echo_1),
152 [fb0]"r"(fb_0), [fb1]"r"(fb_1),
153 [ef]"r"((int)this->r.g.echo_feedback),
154 [sh]"r"(1 << 9));
155 /* swap and fetch feedback results */
156 int t0;
157 asm volatile(
158 "move.l #0x00ff00ff, %[t0] \n"
159 "movclr.l %%acc1, %[e1] \n"
160 "swap.w %[e1] \n"
161 "movclr.l %%acc0, %[e0] \n"
162 "move.w %[e1], %[e0] \n"
163 "and.l %[e0], %[t0] \n"
164 "eor.l %[t0], %[e0] \n"
165 "lsl.l #8, %[t0] \n"
166 "lsr.l #8, %[e0] \n"
167 "or.l %[e0], %[t0] \n"
168 : [e0]"=&d"(echo_0), [e1]"=&d"(echo_1),
169 [t0]"=&d"(t0));
170
171 /* save final feedback into echo buffer */
172 *(int32_t *)echo_ptr = t0;
173}
174
175#define SPC_DSP_GENERATE_OUTPUT
176static inline void echo_output( struct Spc_Dsp* this, int global_muting,
177 int global_vol_0, int global_vol_1, int chans_0, int chans_1,
178 int fb_0, int fb_1, int* out_0, int* out_1 )
179{
180 asm volatile (
181 "mac.l %[ch0], %[gv0], %%acc2 \n" /* global volume */
182 "mac.l %[ch1], %[gv1], %%acc3 \n"
183 :
184 : [ch0]"r"(chans_0), [gv0]"r"(global_vol_0),
185 [ch1]"r"(chans_1), [gv1]"r"(global_vol_1));
186 asm volatile (
187 "movclr.l %%acc2, %[a0] \n" /* fetch mixed output */
188 "movclr.l %%acc3, %[a1] \n"
189 "asr.l %[gm], %[a0] \n" /* scale by global_muting shift */
190 "asr.l %[gm], %[a1] \n"
191 : [a0]"=&d"(*out_0), [a1]"=&d"(*out_1)
192 : [gm]"d"(global_muting));
193
194 /* scaled echo is stored in %acc2 and %acc3 */
195 (void)this; (void)fb_0; (void)fb_1;
196}
197
198#endif /* !SPC_NOECHO */
diff --git a/lib/rbcodec/codecs/libspc/cpu/spc_dsp_coldfire.h b/lib/rbcodec/codecs/libspc/cpu/spc_dsp_coldfire.h
new file mode 100644
index 0000000000..f9aafabd18
--- /dev/null
+++ b/lib/rbcodec/codecs/libspc/cpu/spc_dsp_coldfire.h
@@ -0,0 +1,45 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2007 Michael Sevakis (jhMikeS)
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version 2
15 * of the License, or (at your option) any later version.
16 *
17 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
18 * KIND, either express or implied.
19 *
20 ****************************************************************************/
21#if !SPC_NOECHO
22
23#define SPC_DSP_ECHO_APPLY
24
25enum
26{
27 FIR_BUF_CNT = FIR_BUF_HALF,
28 FIR_BUF_SIZE = FIR_BUF_CNT * sizeof ( int32_t ),
29 FIR_BUF_ALIGN = FIR_BUF_SIZE * 2,
30 FIR_BUF_MASK = ~((FIR_BUF_ALIGN / 2) | (sizeof ( int32_t ) - 1))
31};
32
33/* Echo filter structure embedded in struct Spc_Dsp */
34struct echo_filter
35{
36 /* FIR history is interleaved. Hardware handles wrapping by mask.
37 * |LR|LR|LR|LR|LR|LR|LR|LR| */
38 int32_t* ptr;
39 /* wrapped address just behind current position -
40 allows mac.w to increment and mask ptr */
41 int32_t* hist_ptr;
42 /* copy of echo FIR constants as int16_t for use with mac.w */
43 int16_t coeff [VOICE_COUNT];
44};
45#endif /* !SPC_NOECHO */
diff --git a/lib/rbcodec/codecs/libspc/spc_codec.h b/lib/rbcodec/codecs/libspc/spc_codec.h
index 7f6b6e2e9f..a8eee6bfef 100644
--- a/lib/rbcodec/codecs/libspc/spc_codec.h
+++ b/lib/rbcodec/codecs/libspc/spc_codec.h
@@ -213,7 +213,9 @@ struct cpu_ram_t
213#define RAM ram.ram 213#define RAM ram.ram
214extern struct cpu_ram_t ram; 214extern struct cpu_ram_t ram;
215 215
216long CPU_run( THIS, long start_time ) ICODE_ATTR_SPC; 216long CPU_run( THIS, long start_time )
217 ICODE_ATTR_SPC;
218
217void CPU_Init( THIS ); 219void CPU_Init( THIS );
218 220
219/* The DSP portion (awe!) */ 221/* The DSP portion (awe!) */
@@ -261,6 +263,7 @@ struct globals_t
261 char unused9 [2]; 263 char unused9 [2];
262}; 264};
263 265
266enum { ENV_RATE_INIT = 0x7800 };
264enum state_t 267enum state_t
265{ /* -1, 0, +1 allows more efficient if statements */ 268{ /* -1, 0, +1 allows more efficient if statements */
266 state_decay = -1, 269 state_decay = -1,
@@ -278,64 +281,61 @@ struct cache_entry_t
278}; 281};
279 282
280enum { BRR_BLOCK_SIZE = 16 }; 283enum { BRR_BLOCK_SIZE = 16 };
281enum { BRR_CACHE_SIZE = 0x20000 + 32} ; 284enum { BRR_CACHE_SIZE = 0x20000 + 32};
285
286#if SPC_BRRCACHE
287struct voice_wave_t
288{
289 int16_t const* samples; /* decoded samples in cache */
290 long position; /* position in samples buffer, 12-bit frac */
291 long end; /* end position in samples buffer */
292 int loop; /* length of looping area */
293 unsigned block_header; /* header byte from current BRR block */
294 uint8_t const* addr; /* BRR waveform address in RAM */
295};
296#else /* !SPC_BRRCACHE */
297struct voice_wave_t
298{
299 int16_t samples [3 + BRR_BLOCK_SIZE + 1]; /* last decoded block */
300 int32_t position; /* position in samples buffer, 12-bit frac */
301 unsigned block_header; /* header byte from current BRR block */
302 uint8_t const* addr; /* BRR waveform address in RAM */
303};
304#endif /* SPC_BRRCACHE */
282 305
283struct voice_t 306struct voice_t
284{ 307{
285#if SPC_BRRCACHE 308 struct voice_wave_t wave;
286 int16_t const* samples;
287 long wave_end;
288 int wave_loop;
289#else
290 int16_t samples [3 + BRR_BLOCK_SIZE + 1];
291 int block_header; /* header byte from current block */
292#endif
293 uint8_t const* addr;
294 short volume [2]; 309 short volume [2];
295 long position;/* position in samples buffer, with 12-bit fraction */
296 short envx; 310 short envx;
297 short env_mode; 311 short env_mode;
298 short env_timer; 312 short env_timer;
299 short key_on_delay; 313 short key_on_delay;
314 short rate;
300}; 315};
301 316
302#if SPC_BRRCACHE 317#if !SPC_NOECHO
303/* a little extra for samples that go past end */ 318enum { FIR_BUF_HALF = 8 };
304extern int16_t BRRcache [BRR_CACHE_SIZE];
305#endif 319#endif
306 320
307enum { FIR_BUF_HALF = 8 }; 321struct Spc_Dsp;
308 322
309#if defined(CPU_COLDFIRE) 323/* These must go before the definition of struct Spc_Dsp because a
310/* global because of the large aligment requirement for hardware masking - 324 definition of struct echo_filter is required. Only declarations
311 * L-R interleaved 16-bit samples for easy loading and mac.w use. 325 are created unless SPC_DSP_C is defined before including these. */
312 */ 326#if defined(CPU_ARM)
313enum
314{
315 FIR_BUF_CNT = FIR_BUF_HALF,
316 FIR_BUF_SIZE = FIR_BUF_CNT * sizeof ( int32_t ),
317 FIR_BUF_ALIGN = FIR_BUF_SIZE * 2,
318 FIR_BUF_MASK = ~((FIR_BUF_ALIGN / 2) | (sizeof ( int32_t ) - 1))
319};
320#elif defined (CPU_ARM)
321#if ARM_ARCH >= 6 327#if ARM_ARCH >= 6
322enum 328#include "cpu/spc_dsp_armv6.h"
323{
324 FIR_BUF_CNT = FIR_BUF_HALF * 2,
325 FIR_BUF_SIZE = FIR_BUF_CNT * sizeof ( int32_t ),
326 FIR_BUF_ALIGN = FIR_BUF_SIZE,
327 FIR_BUF_MASK = ~((FIR_BUF_ALIGN / 2) | (sizeof ( int32_t ) - 1))
328};
329#else 329#else
330enum 330#include "cpu/spc_dsp_armv4.h"
331{ 331#endif
332 FIR_BUF_CNT = FIR_BUF_HALF * 2 * 2, 332#elif defined (CPU_COLDFIRE)
333 FIR_BUF_SIZE = FIR_BUF_CNT * sizeof ( int32_t ), 333#include "cpu/spc_dsp_coldfire.h"
334 FIR_BUF_ALIGN = FIR_BUF_SIZE, 334#endif
335 FIR_BUF_MASK = ~((FIR_BUF_ALIGN / 2) | (sizeof ( int32_t ) * 2 - 1)) 335
336}; 336/* Above may still use generic implementations. Also defines final
337#endif /* ARM_ARCH */ 337 function names. */
338#endif /* CPU_* */ 338#include "spc_dsp_generic.h"
339 339
340struct Spc_Dsp 340struct Spc_Dsp
341{ 341{
@@ -347,47 +347,15 @@ struct Spc_Dsp
347 int16_t align; 347 int16_t align;
348 } r; 348 } r;
349 349
350 unsigned echo_pos;
351 int keys_down; 350 int keys_down;
352 int noise_count; 351 int noise_count;
353 uint16_t noise; /* also read as int16_t */ 352 uint16_t noise; /* also read as int16_t */
354
355#if defined(CPU_COLDFIRE)
356 /* FIR history is interleaved. Hardware handles wrapping by mask.
357 * |LR|LR|LR|LR|LR|LR|LR|LR| */
358 int32_t *fir_ptr;
359 /* wrapped address just behind current position -
360 allows mac.w to increment and mask fir_ptr */
361 int32_t *last_fir_ptr;
362 /* copy of echo FIR constants as int16_t for use with mac.w */
363 int16_t fir_coeff [VOICE_COUNT];
364#elif defined (CPU_ARM)
365 /* fir_buf [i + 8] == fir_buf [i], to avoid wrap checking in FIR code */
366 int32_t *fir_ptr;
367#if ARM_ARCH >= 6
368 /* FIR history is interleaved with guard to eliminate wrap checking
369 * when convolving.
370 * |LR|LR|LR|LR|LR|LR|LR|LR|--|--|--|--|--|--|--|--| */
371 /* copy of echo FIR constants as int16_t, loaded as int32 for
372 * halfword, packed multiples */
373 int16_t fir_coeff [VOICE_COUNT];
374#else
375 /* FIR history is interleaved with guard to eliminate wrap checking
376 * when convolving.
377 * |LL|RR|LL|RR|LL|RR|LL|RR|LL|RR|LL|RR|LL|RR|LL|RR|...
378 * |--|--|--|--|--|--|--|--|--|--|--|--|--|--|--|--| */
379 /* copy of echo FIR constants as int32_t, for faster access */
380 int32_t fir_coeff [VOICE_COUNT];
381#endif /* ARM_ARCH */
382#else /* Unoptimized CPU */
383 /* fir_buf [i + 8] == fir_buf [i], to avoid wrap checking in FIR code */
384 int fir_pos; /* (0 to 7) */
385 int fir_buf [FIR_BUF_HALF * 2] [2];
386 /* copy of echo FIR constants as int, for faster access */
387 int fir_coeff [VOICE_COUNT];
388#endif
389
390 struct voice_t voice_state [VOICE_COUNT]; 353 struct voice_t voice_state [VOICE_COUNT];
354
355#if !SPC_NOECHO
356 unsigned echo_pos;
357 struct echo_filter fir;
358#endif /* !SPC_NOECHO */
391 359
392#if SPC_BRRCACHE 360#if SPC_BRRCACHE
393 uint8_t oldsize; 361 uint8_t oldsize;
@@ -396,7 +364,9 @@ struct Spc_Dsp
396#endif 364#endif
397}; 365};
398 366
399void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf ) ICODE_ATTR_SPC; 367void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
368 ICODE_ATTR_SPC;
369
400void DSP_reset( struct Spc_Dsp* this ); 370void DSP_reset( struct Spc_Dsp* this );
401 371
402static inline void DSP_run( struct Spc_Dsp* this, long count, int32_t* out ) 372static inline void DSP_run( struct Spc_Dsp* this, long count, int32_t* out )
@@ -474,7 +444,8 @@ void SPC_Init( THIS );
474int SPC_load_spc( THIS, const void* data, long size ); 444int SPC_load_spc( THIS, const void* data, long size );
475 445
476/**************** DSP interaction ****************/ 446/**************** DSP interaction ****************/
477void DSP_write( struct Spc_Dsp* this, int i, int data ) ICODE_ATTR_SPC; 447void DSP_write( struct Spc_Dsp* this, int i, int data )
448 ICODE_ATTR_SPC;
478 449
479static inline int DSP_read( struct Spc_Dsp* this, int i ) 450static inline int DSP_read( struct Spc_Dsp* this, int i )
480{ 451{
@@ -482,10 +453,14 @@ static inline int DSP_read( struct Spc_Dsp* this, int i )
482 return this->r.reg [i]; 453 return this->r.reg [i];
483} 454}
484 455
485int SPC_read( THIS, unsigned addr, long const time ) ICODE_ATTR_SPC; 456int SPC_read( THIS, unsigned addr, long const time )
486void SPC_write( THIS, unsigned addr, int data, long const time ) ICODE_ATTR_SPC; 457 ICODE_ATTR_SPC;
458
459void SPC_write( THIS, unsigned addr, int data, long const time )
460 ICODE_ATTR_SPC;
487 461
488/**************** Sample generation ****************/ 462/**************** Sample generation ****************/
489int SPC_play( THIS, long count, int32_t* out ) ICODE_ATTR_SPC; 463int SPC_play( THIS, long count, int32_t* out )
464 ICODE_ATTR_SPC;
490 465
491#endif /* _SPC_CODEC_H_ */ 466#endif /* _SPC_CODEC_H_ */
diff --git a/lib/rbcodec/codecs/libspc/spc_cpu.c b/lib/rbcodec/codecs/libspc/spc_cpu.c
index 23dcc257de..dbbc6cda0f 100644
--- a/lib/rbcodec/codecs/libspc/spc_cpu.c
+++ b/lib/rbcodec/codecs/libspc/spc_cpu.c
@@ -113,9 +113,7 @@ enum { st_c = 0x01 };
113 113
114long CPU_run( THIS, long start_time ) 114long CPU_run( THIS, long start_time )
115{ 115{
116#if 0
117 ENTER_TIMER(cpu); 116 ENTER_TIMER(cpu);
118#endif
119 117
120 register long spc_time_ = start_time; 118 register long spc_time_ = start_time;
121 119
@@ -1036,9 +1034,7 @@ out_of_time:
1036 this->r.x = (uint8_t) x; 1034 this->r.x = (uint8_t) x;
1037 this->r.y = (uint8_t) y; 1035 this->r.y = (uint8_t) y;
1038 1036
1039#if 0
1040 EXIT_TIMER(cpu); 1037 EXIT_TIMER(cpu);
1041#endif
1042 return spc_time_; 1038 return spc_time_;
1043} 1039}
1044 1040
diff --git a/lib/rbcodec/codecs/libspc/spc_dsp.c b/lib/rbcodec/codecs/libspc/spc_dsp.c
index 6350c4c331..c94fbc990e 100644
--- a/lib/rbcodec/codecs/libspc/spc_dsp.c
+++ b/lib/rbcodec/codecs/libspc/spc_dsp.c
@@ -27,15 +27,103 @@
27#include "spc_codec.h" 27#include "spc_codec.h"
28#include "spc_profiler.h" 28#include "spc_profiler.h"
29 29
30#if defined(CPU_COLDFIRE) || defined (CPU_ARM) 30#define CLAMP16( n ) clip_sample_16( n )
31int32_t fir_buf[FIR_BUF_CNT] IBSS_ATTR_SPC 31
32 __attribute__((aligned(FIR_BUF_ALIGN*1))); 32#if defined(CPU_ARM)
33#if ARM_ARCH >= 6
34#include "cpu/spc_dsp_armv6.c"
35#else
36#include "cpu/spc_dsp_armv4.c"
33#endif 37#endif
34#if SPC_BRRCACHE 38#elif defined (CPU_COLDFIRE)
35/* a little extra for samples that go past end */ 39#include "cpu/spc_dsp_coldfire.c"
36int16_t BRRcache [BRR_CACHE_SIZE] CACHEALIGN_ATTR;
37#endif 40#endif
38 41
42/* Above may still use generic implementations. Also defines final
43 function names. */
44#include "spc_dsp_generic.c"
45
46/* each rate divides exactly into 0x7800 without remainder */
47static unsigned short const env_rates [0x20] ICONST_ATTR_SPC =
48{
49 0x0000, 0x000F, 0x0014, 0x0018, 0x001E, 0x0028, 0x0030, 0x003C,
50 0x0050, 0x0060, 0x0078, 0x00A0, 0x00C0, 0x00F0, 0x0140, 0x0180,
51 0x01E0, 0x0280, 0x0300, 0x03C0, 0x0500, 0x0600, 0x0780, 0x0A00,
52 0x0C00, 0x0F00, 0x1400, 0x1800, 0x1E00, 0x2800, 0x3C00, 0x7800
53};
54
55#if !SPC_NOINTERP
56/* Interleved gauss table (to improve cache coherency). */
57/* gauss [i * 2 + j] = normal_gauss [(1 - j) * 256 + i] */
58static int16_t const gauss_table [512] ICONST_ATTR_SPC MEM_ALIGN_ATTR =
59{
60 370,1305, 366,1305, 362,1304, 358,1304,
61 354,1304, 351,1304, 347,1304, 343,1303,
62 339,1303, 336,1303, 332,1302, 328,1302,
63 325,1301, 321,1300, 318,1300, 314,1299,
64 311,1298, 307,1297, 304,1297, 300,1296,
65 297,1295, 293,1294, 290,1293, 286,1292,
66 283,1291, 280,1290, 276,1288, 273,1287,
67 270,1286, 267,1284, 263,1283, 260,1282,
68 257,1280, 254,1279, 251,1277, 248,1275,
69 245,1274, 242,1272, 239,1270, 236,1269,
70 233,1267, 230,1265, 227,1263, 224,1261,
71 221,1259, 218,1257, 215,1255, 212,1253,
72 210,1251, 207,1248, 204,1246, 201,1244,
73 199,1241, 196,1239, 193,1237, 191,1234,
74 188,1232, 186,1229, 183,1227, 180,1224,
75 178,1221, 175,1219, 173,1216, 171,1213,
76 168,1210, 166,1207, 163,1205, 161,1202,
77 159,1199, 156,1196, 154,1193, 152,1190,
78 150,1186, 147,1183, 145,1180, 143,1177,
79 141,1174, 139,1170, 137,1167, 134,1164,
80 132,1160, 130,1157, 128,1153, 126,1150,
81 124,1146, 122,1143, 120,1139, 118,1136,
82 117,1132, 115,1128, 113,1125, 111,1121,
83 109,1117, 107,1113, 106,1109, 104,1106,
84 102,1102, 100,1098, 99,1094, 97,1090,
85 95,1086, 94,1082, 92,1078, 90,1074,
86 89,1070, 87,1066, 86,1061, 84,1057,
87 83,1053, 81,1049, 80,1045, 78,1040,
88 77,1036, 76,1032, 74,1027, 73,1023,
89 71,1019, 70,1014, 69,1010, 67,1005,
90 66,1001, 65, 997, 64, 992, 62, 988,
91 61, 983, 60, 978, 59, 974, 58, 969,
92 56, 965, 55, 960, 54, 955, 53, 951,
93 52, 946, 51, 941, 50, 937, 49, 932,
94 48, 927, 47, 923, 46, 918, 45, 913,
95 44, 908, 43, 904, 42, 899, 41, 894,
96 40, 889, 39, 884, 38, 880, 37, 875,
97 36, 870, 36, 865, 35, 860, 34, 855,
98 33, 851, 32, 846, 32, 841, 31, 836,
99 30, 831, 29, 826, 29, 821, 28, 816,
100 27, 811, 27, 806, 26, 802, 25, 797,
101 24, 792, 24, 787, 23, 782, 23, 777,
102 22, 772, 21, 767, 21, 762, 20, 757,
103 20, 752, 19, 747, 19, 742, 18, 737,
104 17, 732, 17, 728, 16, 723, 16, 718,
105 15, 713, 15, 708, 15, 703, 14, 698,
106 14, 693, 13, 688, 13, 683, 12, 678,
107 12, 674, 11, 669, 11, 664, 11, 659,
108 10, 654, 10, 649, 10, 644, 9, 640,
109 9, 635, 9, 630, 8, 625, 8, 620,
110 8, 615, 7, 611, 7, 606, 7, 601,
111 6, 596, 6, 592, 6, 587, 6, 582,
112 5, 577, 5, 573, 5, 568, 5, 563,
113 4, 559, 4, 554, 4, 550, 4, 545,
114 4, 540, 3, 536, 3, 531, 3, 527,
115 3, 522, 3, 517, 2, 513, 2, 508,
116 2, 504, 2, 499, 2, 495, 2, 491,
117 2, 486, 1, 482, 1, 477, 1, 473,
118 1, 469, 1, 464, 1, 460, 1, 456,
119 1, 451, 1, 447, 1, 443, 1, 439,
120 0, 434, 0, 430, 0, 426, 0, 422,
121 0, 418, 0, 414, 0, 410, 0, 405,
122 0, 401, 0, 397, 0, 393, 0, 389,
123 0, 385, 0, 381, 0, 378, 0, 374,
124};
125#endif /* !SPC_NOINTERP */
126
39void DSP_write( struct Spc_Dsp* this, int i, int data ) 127void DSP_write( struct Spc_Dsp* this, int i, int data )
40{ 128{
41 assert( (unsigned) i < REGISTER_COUNT ); 129 assert( (unsigned) i < REGISTER_COUNT );
@@ -51,230 +139,395 @@ void DSP_write( struct Spc_Dsp* this, int i, int data )
51 v->volume [0] = left; 139 v->volume [0] = left;
52 v->volume [1] = right; 140 v->volume [1] = right;
53 } 141 }
142 else if ( low < 4 ) /* voice rates */
143 {
144 struct voice_t* v = this->voice_state + high;
145 v->rate = GET_LE16A( this->r.voice[high].rate ) & 0x3fff;
146 }
147#if !SPC_NOECHO
54 else if ( low == 0x0F ) /* fir coefficients */ 148 else if ( low == 0x0F ) /* fir coefficients */
55 { 149 {
56 this->fir_coeff [7 - high] = (int8_t) data; /* sign-extend */ 150 this->fir.coeff [7 - high] = (int8_t) data; /* sign-extend */
57 } 151 }
152#endif /* !SPC_NOECHO */
58} 153}
59 154
60#define CLAMP16( n ) clip_sample_16( n ) 155/* Decode BRR block */
156static inline void
157decode_brr_block( struct voice_t* voice, uint8_t const* addr, int16_t* out )
158{
159 /* header */
160 unsigned block_header = *addr;
161 voice->wave.block_header = block_header;
162
163 /* point to next header */
164 addr += 9;
165 voice->wave.addr = addr;
166
167 /* previous samples */
168 int smp2 = out [0];
169 int smp1 = out [1];
170
171 int offset = -BRR_BLOCK_SIZE * 4;
172
173#if !SPC_BRRCACHE
174 out [-(BRR_BLOCK_SIZE + 1)] = out [-1];
175
176 /* if next block has end flag set,
177 this block ends early (verified) */
178 if ( (block_header & 3) != 3 && (*addr & 3) == 1 )
179 {
180 /* arrange for last 9 samples to be skipped */
181 int const skip = 9;
182 out [skip - (BRR_BLOCK_SIZE + 1)] = out [-1];
183 out += (skip & 1);
184 voice->wave.position += skip * 0x1000;
185 offset = (-BRR_BLOCK_SIZE + (skip & ~1)) * 4;
186 addr -= skip / 2;
187 /* force sample to end on next decode */
188 voice->wave.block_header = 1;
189 }
190#endif /* !SPC_BRRCACHE */
191
192 int const filter = block_header & 0x0c;
193 int const scale = block_header >> 4;
194
195 if ( filter == 0x08 ) /* filter 2 (30-90% of the time) */
196 {
197 /* y[n] = x[n] + 61/32 * y[n-1] - 15/16 * y[n-2] */
198 do /* decode and filter 16 samples */
199 {
200 /* Get nybble, sign-extend, then scale
201 get byte, select which nybble, sign-extend, then shift
202 based on scaling. */
203 int delta = (int8_t)(addr [offset >> 3] << (offset & 4)) >> 4;
204 delta = (delta << scale) >> 1;
205
206 if (scale > 0xc)
207 delta = (delta >> 17) << 11;
208
209 out [offset >> 2] = smp2;
210
211 delta -= smp2 >> 1;
212 delta += smp2 >> 5;
213 delta += smp1;
214 delta += (-smp1 - (smp1 >> 1)) >> 5;
215
216 delta = CLAMP16( delta );
217 smp2 = smp1;
218 smp1 = (int16_t) (delta * 2); /* sign-extend */
219 }
220 while ( (offset += 4) != 0 );
221 }
222 else if ( filter == 0x04 ) /* filter 1 */
223 {
224 /* y[n] = x[n] + 15/16 * y[n-1] */
225 do /* decode and filter 16 samples */
226 {
227 /* Get nybble, sign-extend, then scale
228 get byte, select which nybble, sign-extend, then shift
229 based on scaling. */
230 int delta = (int8_t)(addr [offset >> 3] << (offset & 4)) >> 4;
231 delta = (delta << scale) >> 1;
232
233 if (scale > 0xc)
234 delta = (delta >> 17) << 11;
235
236 out [offset >> 2] = smp2;
237
238 delta += smp1 >> 1;
239 delta += (-smp1) >> 5;
240
241 delta = CLAMP16( delta );
242 smp2 = smp1;
243 smp1 = (int16_t) (delta * 2); /* sign-extend */
244 }
245 while ( (offset += 4) != 0 );
246 }
247 else if ( filter == 0x0c ) /* filter 3 */
248 {
249 /* y[n] = x[n] + 115/64 * y[n-1] - 13/16 * y[n-2] */
250 do /* decode and filter 16 samples */
251 {
252 /* Get nybble, sign-extend, then scale
253 get byte, select which nybble, sign-extend, then shift
254 based on scaling. */
255 int delta = (int8_t)(addr [offset >> 3] << (offset & 4)) >> 4;
256 delta = (delta << scale) >> 1;
257
258 if (scale > 0xc)
259 delta = (delta >> 17) << 11;
260
261 out [offset >> 2] = smp2;
262
263 delta -= smp2 >> 1;
264 delta += (smp2 + (smp2 >> 1)) >> 4;
265 delta += smp1;
266 delta += (-smp1 * 13) >> 7;
267
268 delta = CLAMP16( delta );
269 smp2 = smp1;
270 smp1 = (int16_t) (delta * 2); /* sign-extend */
271 }
272 while ( (offset += 4) != 0 );
273 }
274 else /* filter 0 */
275 {
276 /* y[n] = x[n] */
277 do /* decode and filter 16 samples */
278 {
279 /* Get nybble, sign-extend, then scale
280 get byte, select which nybble, sign-extend, then shift
281 based on scaling. */
282 int delta = (int8_t)(addr [offset >> 3] << (offset & 4)) >> 4;
283 delta = (delta << scale) >> 1;
284
285 if (scale > 0xc)
286 delta = (delta >> 17) << 11;
287
288 out [offset >> 2] = smp2;
289
290 smp2 = smp1;
291 smp1 = delta * 2;
292 }
293 while ( (offset += 4) != 0 );
294 }
61 295
62#if SPC_BRRCACHE 296#if SPC_BRRCACHE
63static void decode_brr( struct Spc_Dsp* this, unsigned start_addr, 297 if ( !(block_header & 1) )
64 struct voice_t* voice, 298 {
65 struct raw_voice_t const* const raw_voice ) ICODE_ATTR_SPC; 299 /* save to end of next block (for next call) */
66static void decode_brr( struct Spc_Dsp* this, unsigned start_addr, 300 out [BRR_BLOCK_SIZE ] = smp2;
67 struct voice_t* voice, 301 out [BRR_BLOCK_SIZE + 1] = smp1;
68 struct raw_voice_t const* const raw_voice ) 302 }
303 else
304#endif /* SPC_BRRCACHE */
305 {
306 /* save to end of this block */
307 out [0] = smp2;
308 out [1] = smp1;
309 }
310}
311
312#if SPC_BRRCACHE
313static void NO_INLINE ICODE_ATTR_SPC
314brr_decode_cache( struct Spc_Dsp* this, struct src_dir const* sd,
315 unsigned start_addr, struct voice_t* voice,
316 struct raw_voice_t const* raw_voice )
69{ 317{
70 /* setup same variables as where decode_brr() is called from */ 318 /* a little extra for samples that go past end */
71 #undef RAM 319 static int16_t BRRcache [BRR_CACHE_SIZE] CACHEALIGN_ATTR;
72 #define RAM ram.ram 320
321 DEBUGF( "decode at %08x (wave #%d)\n",
322 start_addr, raw_voice->waveform );
73 323
74 struct src_dir const* const sd =
75 &ram.sd[this->r.g.wave_page * 0x100/sizeof(struct src_dir)];
76 struct cache_entry_t* const wave_entry = 324 struct cache_entry_t* const wave_entry =
77 &this->wave_entry [raw_voice->waveform]; 325 &this->wave_entry [raw_voice->waveform];
78 326
79 /* the following block can be put in place of the call to 327 wave_entry->start_addr = start_addr;
80 decode_brr() below 328
81 */ 329 uint8_t const* const loop_ptr =
330 ram.ram + letoh16( sd [raw_voice->waveform].loop );
331
332 int16_t* loop_start = NULL;
333
334 uint8_t const* addr = ram.ram + start_addr;
335
336 int16_t* out = BRRcache + start_addr * 2;
337 wave_entry->samples = out;
338
339 /* BRR filter uses previous samples */
340 out [BRR_BLOCK_SIZE + 1] = 0;
341 out [BRR_BLOCK_SIZE + 2] = 0;
342 *out++ = 0;
343
344 unsigned block_header;
345
346 do
347 {
348 if ( addr == loop_ptr )
349 {
350 loop_start = out;
351 DEBUGF( "loop at %08lx (wave #%d)\n",
352 (unsigned long)(addr - RAM), raw_voice->waveform );
353 }
354
355 /* output position - preincrement */
356 out += BRR_BLOCK_SIZE;
357
358 decode_brr_block( voice, addr, out );
359
360 block_header = voice->wave.block_header;
361 addr = voice->wave.addr;
362
363 /* if next block has end flag set, this block ends early */
364 /* (verified) */
365 if ( (block_header & 3) != 3 && (*addr & 3) == 1 )
366 {
367 /* skip last 9 samples */
368 DEBUGF( "block early end\n" );
369 out -= 9;
370 break;
371 }
372 }
373 while ( !(block_header & 1) && addr < RAM + 0x10000 );
374
375 wave_entry->end = (out - 1 - wave_entry->samples) << 12;
376 wave_entry->loop = 0;
377
378 if ( (block_header & 2) )
379 {
380 if ( loop_start )
381 {
382 wave_entry->loop = out - loop_start;
383 wave_entry->end += 0x3000;
384
385 out [2] = loop_start [2];
386 out [3] = loop_start [3];
387 out [4] = loop_start [4];
388 }
389 else
390 {
391 DEBUGF( "loop point outside initial wave\n" );
392 }
393 }
394
395 DEBUGF( "end at %08lx (wave #%d)\n",
396 (unsigned long)(addr - RAM), raw_voice->waveform );
397
398 /* add to cache */
399 this->wave_entry_old [this->oldsize++] = *wave_entry;
400}
401
402static inline void
403brr_key_on( struct Spc_Dsp* this, struct src_dir const* sd,
404 struct voice_t* voice, struct raw_voice_t const* raw_voice )
405{
406 unsigned start_addr = letoh16( sd [raw_voice->waveform].start );
407 struct cache_entry_t* const wave_entry =
408 &this->wave_entry [raw_voice->waveform];
409
410 /* predecode BRR if not already */
411 if ( wave_entry->start_addr != start_addr )
82 { 412 {
83 DEBUGF( "decode at %08x (wave #%d)\n",
84 start_addr, raw_voice->waveform );
85
86 /* see if in cache */ 413 /* see if in cache */
87 int i; 414 for ( int i = 0; i < this->oldsize; i++ )
88 for ( i = 0; i < this->oldsize; i++ )
89 { 415 {
90 struct cache_entry_t* e = &this->wave_entry_old [i]; 416 struct cache_entry_t* e = &this->wave_entry_old [i];
417
91 if ( e->start_addr == start_addr ) 418 if ( e->start_addr == start_addr )
92 { 419 {
93 DEBUGF( "found in wave_entry_old (oldsize=%d)\n", 420 DEBUGF( "found in wave_entry_old (oldsize=%d)\n",
94 this->oldsize ); 421 this->oldsize );
95 *wave_entry = *e; 422 *wave_entry = *e;
96 goto wave_in_cache; 423 goto wave_in_cache; /* Wave in cache */
97 } 424 }
98 } 425 }
99 426
100 wave_entry->start_addr = start_addr; 427 /* actually decode it */
101 428 brr_decode_cache( this, sd, start_addr, voice, raw_voice );
102 uint8_t const* const loop_ptr =
103 RAM + letoh16(sd[raw_voice->waveform].loop);
104 short* loop_start = 0;
105
106 short* out = BRRcache + start_addr * 2;
107 wave_entry->samples = out;
108 *out++ = 0;
109 int smp1 = 0;
110 int smp2 = 0;
111
112 uint8_t const* addr = RAM + start_addr;
113 int block_header;
114 do
115 {
116 if ( addr == loop_ptr )
117 {
118 loop_start = out;
119 DEBUGF( "loop at %08lx (wave #%d)\n",
120 (unsigned long)(addr - RAM), raw_voice->waveform );
121 }
122
123 /* header */
124 block_header = *addr;
125 addr += 9;
126 voice->addr = addr;
127 int const filter = (block_header & 0x0C) - 0x08;
128
129 /* scaling
130 (invalid scaling gives -4096 for neg nybble, 0 for pos) */
131 static unsigned char const right_shifts [16] = {
132 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 29, 29, 29,
133 };
134 static unsigned char const left_shifts [16] = {
135 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 11, 11
136 };
137 int const scale = block_header >> 4;
138 int const right_shift = right_shifts [scale];
139 int const left_shift = left_shifts [scale];
140
141 /* output position */
142 out += BRR_BLOCK_SIZE;
143 int offset = -BRR_BLOCK_SIZE << 2;
144
145 do /* decode and filter 16 samples */
146 {
147 /* Get nybble, sign-extend, then scale
148 get byte, select which nybble, sign-extend, then shift based
149 on scaling. also handles invalid scaling values. */
150 int delta = (int) (int8_t) (addr [offset >> 3] << (offset & 4))
151 >> right_shift << left_shift;
152
153 out [offset >> 2] = smp2;
154
155 if ( filter == 0 ) /* mode 0x08 (30-90% of the time) */
156 {
157 delta -= smp2 >> 1;
158 delta += smp2 >> 5;
159 smp2 = smp1;
160 delta += smp1;
161 delta += (-smp1 - (smp1 >> 1)) >> 5;
162 }
163 else
164 {
165 if ( filter == -4 ) /* mode 0x04 */
166 {
167 delta += smp1 >> 1;
168 delta += (-smp1) >> 5;
169 }
170 else if ( filter > -4 ) /* mode 0x0C */
171 {
172 delta -= smp2 >> 1;
173 delta += (smp2 + (smp2 >> 1)) >> 4;
174 delta += smp1;
175 delta += (-smp1 * 13) >> 7;
176 }
177 smp2 = smp1;
178 }
179
180 delta = CLAMP16( delta );
181 smp1 = (int16_t) (delta * 2); /* sign-extend */
182 }
183 while ( (offset += 4) != 0 );
184
185 /* if next block has end flag set, this block ends early */
186 /* (verified) */
187 if ( (block_header & 3) != 3 && (*addr & 3) == 1 )
188 {
189 /* skip last 9 samples */
190 out -= 9;
191 goto early_end;
192 }
193 }
194 while ( !(block_header & 1) && addr < RAM + 0x10000 );
195
196 out [0] = smp2;
197 out [1] = smp1;
198
199 early_end:
200 wave_entry->end = (out - 1 - wave_entry->samples) << 12;
201
202 wave_entry->loop = 0;
203 if ( (block_header & 2) )
204 {
205 if ( loop_start )
206 {
207 int loop = out - loop_start;
208 wave_entry->loop = loop;
209 wave_entry->end += 0x3000;
210 out [2] = loop_start [2];
211 out [3] = loop_start [3];
212 out [4] = loop_start [4];
213 }
214 else
215 {
216 DEBUGF( "loop point outside initial wave\n" );
217 }
218 }
219
220 DEBUGF( "end at %08lx (wave #%d)\n",
221 (unsigned long)(addr - RAM), raw_voice->waveform );
222
223 /* add to cache */
224 this->wave_entry_old [this->oldsize++] = *wave_entry;
225wave_in_cache:;
226 } 429 }
430
431wave_in_cache:
432 voice->wave.position = 3 * 0x1000 - 1; /* 0x2fff */
433 voice->wave.samples = wave_entry->samples;
434 voice->wave.end = wave_entry->end;
435 voice->wave.loop = wave_entry->loop;
436}
437
438static inline int brr_decode( struct src_dir const* sd, struct voice_t* voice,
439 struct raw_voice_t const* raw_voice )
440{
441 if ( voice->wave.position < voice->wave.end )
442 return 0;
443
444 long loop_len = voice->wave.loop << 12;
445
446 if ( !loop_len )
447 return 2;
448
449 voice->wave.position -= loop_len;
450 return 1;
451
452 (void)sd; (void)raw_voice;
227} 453}
228#endif
229 454
230static void key_on(struct Spc_Dsp* const this, struct voice_t* const voice, 455#else /* !SPC_BRRCACHE */
231 struct src_dir const* const sd, 456
232 struct raw_voice_t const* const raw_voice, 457static inline void
233 const int key_on_delay, const int vbit) ICODE_ATTR_SPC; 458brr_key_on( struct Spc_Dsp* this, struct src_dir const* sd,
234static void key_on(struct Spc_Dsp* const this, struct voice_t* const voice, 459 struct voice_t* voice, struct raw_voice_t const* raw_voice )
235 struct src_dir const* const sd, 460{
236 struct raw_voice_t const* const raw_voice, 461 voice->wave.addr = ram.ram + letoh16( sd [raw_voice->waveform].start );
237 const int key_on_delay, const int vbit) { 462 /* BRR filter uses previous samples */
463 voice->wave.samples [BRR_BLOCK_SIZE + 1] = 0;
464 voice->wave.samples [BRR_BLOCK_SIZE + 2] = 0;
465 /* force decode on next brr_decode call */
466 voice->wave.position = (BRR_BLOCK_SIZE + 3) * 0x1000 - 1; /* 0x12fff */
467 voice->wave.block_header = 0; /* "previous" BRR header */
468 (void)this;
469}
470
471static inline int brr_decode( struct src_dir const* sd, struct voice_t* voice,
472 struct raw_voice_t const* raw_voice )
473{
238 #undef RAM 474 #undef RAM
475#if defined(CPU_ARM) && !SPC_BRRCACHE
476 uint8_t* const ram_ = ram.ram;
477 #define RAM ram_
478#else
239 #define RAM ram.ram 479 #define RAM ram.ram
240 int const env_rate_init = 0x7800; 480#endif
481
482 if ( voice->wave.position < BRR_BLOCK_SIZE * 0x1000 )
483 return 0;
484
485 voice->wave.position -= BRR_BLOCK_SIZE * 0x1000;
486
487 uint8_t const* addr = voice->wave.addr;
488
489 if ( addr >= RAM + 0x10000 )
490 addr -= 0x10000;
491
492 unsigned block_header = voice->wave.block_header;
493
494 /* action based on previous block's header */
495 int dec = 0;
496
497 if ( block_header & 1 )
498 {
499 addr = RAM + letoh16( sd [raw_voice->waveform].loop );
500 dec = 1;
501
502 if ( !(block_header & 2) ) /* 1% of the time */
503 {
504 /* first block was end block;
505 don't play anything (verified) */
506 return 2;
507 }
508 }
509
510 decode_brr_block( voice, addr, &voice->wave.samples [1 + BRR_BLOCK_SIZE] );
511
512 return dec;
513}
514#endif /* SPC_BRRCACHE */
515
516static void NO_INLINE ICODE_ATTR_SPC
517key_on( struct Spc_Dsp* const this, struct voice_t* const voice,
518 struct src_dir const* const sd,
519 struct raw_voice_t const* const raw_voice,
520 const int key_on_delay, const int vbit )
521{
241 voice->key_on_delay = key_on_delay; 522 voice->key_on_delay = key_on_delay;
523
242 if ( key_on_delay == 0 ) 524 if ( key_on_delay == 0 )
243 { 525 {
244 this->keys_down |= vbit; 526 this->keys_down |= vbit;
245 voice->envx = 0; 527 voice->envx = 0;
246 voice->env_mode = state_attack; 528 voice->env_mode = state_attack;
247 voice->env_timer = env_rate_init; /* TODO: inaccurate? */ 529 voice->env_timer = ENV_RATE_INIT; /* TODO: inaccurate? */
248 unsigned start_addr = letoh16(sd[raw_voice->waveform].start); 530 brr_key_on( this, sd, voice, raw_voice );
249 #if !SPC_BRRCACHE
250 {
251 voice->addr = RAM + start_addr;
252 /* BRR filter uses previous samples */
253 voice->samples [BRR_BLOCK_SIZE + 1] = 0;
254 voice->samples [BRR_BLOCK_SIZE + 2] = 0;
255 /* decode three samples immediately */
256 voice->position = (BRR_BLOCK_SIZE + 3) * 0x1000 - 1;
257 voice->block_header = 0; /* "previous" BRR header */
258 }
259 #else
260 {
261 voice->position = 3 * 0x1000 - 1;
262 struct cache_entry_t* const wave_entry =
263 &this->wave_entry [raw_voice->waveform];
264
265 /* predecode BRR if not already */
266 if ( wave_entry->start_addr != start_addr )
267 {
268 /* the following line can be replaced by the indicated block
269 in decode_brr() */
270 decode_brr( this, start_addr, voice, raw_voice );
271 }
272
273 voice->samples = wave_entry->samples;
274 voice->wave_end = wave_entry->end;
275 voice->wave_loop = wave_entry->loop;
276 }
277 #endif
278 } 531 }
279} 532}
280 533
@@ -287,10 +540,8 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
287#else 540#else
288 #define RAM ram.ram 541 #define RAM ram.ram
289#endif 542#endif
290#if 0
291 EXIT_TIMER(cpu); 543 EXIT_TIMER(cpu);
292 ENTER_TIMER(dsp); 544 ENTER_TIMER(dsp);
293#endif
294 545
295 /* Here we check for keys on/off. Docs say that successive writes 546 /* Here we check for keys on/off. Docs say that successive writes
296 to KON/KOF must be separated by at least 2 Ts periods or risk 547 to KON/KOF must be separated by at least 2 Ts periods or risk
@@ -327,98 +578,60 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
327 } 578 }
328 579
329 struct src_dir const* const sd = 580 struct src_dir const* const sd =
330 &ram.sd[this->r.g.wave_page * 0x100/sizeof(struct src_dir)]; 581 &ram.sd [this->r.g.wave_page * 0x100/sizeof(struct src_dir)];
331 582
332 #ifdef ROCKBOX_BIG_ENDIAN
333 /* Convert endiannesses before entering loops - these
334 get used alot */
335 const uint32_t rates[VOICE_COUNT] =
336 {
337 GET_LE16A( this->r.voice[0].rate ) & 0x3FFF,
338 GET_LE16A( this->r.voice[1].rate ) & 0x3FFF,
339 GET_LE16A( this->r.voice[2].rate ) & 0x3FFF,
340 GET_LE16A( this->r.voice[3].rate ) & 0x3FFF,
341 GET_LE16A( this->r.voice[4].rate ) & 0x3FFF,
342 GET_LE16A( this->r.voice[5].rate ) & 0x3FFF,
343 GET_LE16A( this->r.voice[6].rate ) & 0x3FFF,
344 GET_LE16A( this->r.voice[7].rate ) & 0x3FFF,
345 };
346 #define VOICE_RATE(x) *(x)
347 #define IF_RBE(...) __VA_ARGS__
348 #ifdef CPU_COLDFIRE
349 /* Initialize mask register with the buffer address mask */
350 asm volatile ("move.l %[m], %%mask" : : [m]"i"(FIR_BUF_MASK));
351 const int echo_wrap = (this->r.g.echo_delay & 15) * 0x800;
352 const int echo_start = this->r.g.echo_page * 0x100;
353 #endif /* CPU_COLDFIRE */
354 #else
355 #define VOICE_RATE(x) (GET_LE16(raw_voice->rate) & 0x3FFF)
356 #define IF_RBE(...)
357 #endif /* ROCKBOX_BIG_ENDIAN */
358
359#if !SPC_NOINTERP 583#if !SPC_NOINTERP
360 int const slow_gaussian = (this->r.g.pitch_mods >> 1) | 584 int const slow_gaussian = (this->r.g.pitch_mods >> 1) |
361 this->r.g.noise_enables; 585 this->r.g.noise_enables;
586#endif
587#if !SPC_NOECHO
588 int const echo_start = this->r.g.echo_page * 0x100;
589 int const echo_delay = (this->r.g.echo_delay & 15) * 0x800;
362#endif 590#endif
363 /* (g.flags & 0x40) ? 30 : 14 */ 591 /* (g.flags & 0x40) ? 30 : 14 */
364 int const global_muting = ((this->r.g.flags & 0x40) >> 2) + 14 - 8; 592 int const global_muting = ((this->r.g.flags & 0x40) >> 2) + 14 - 8;
365 int const global_vol_0 = this->r.g.volume_0; 593 int const global_vol_0 = this->r.g.volume_0;
366 int const global_vol_1 = this->r.g.volume_1; 594 int const global_vol_1 = this->r.g.volume_1;
367 595
368 /* each rate divides exactly into 0x7800 without remainder */
369 int const env_rate_init = 0x7800;
370 static unsigned short const env_rates [0x20] ICONST_ATTR_SPC =
371 {
372 0x0000, 0x000F, 0x0014, 0x0018, 0x001E, 0x0028, 0x0030, 0x003C,
373 0x0050, 0x0060, 0x0078, 0x00A0, 0x00C0, 0x00F0, 0x0140, 0x0180,
374 0x01E0, 0x0280, 0x0300, 0x03C0, 0x0500, 0x0600, 0x0780, 0x0A00,
375 0x0C00, 0x0F00, 0x1400, 0x1800, 0x1E00, 0x2800, 0x3C00, 0x7800
376 };
377
378 do /* one pair of output samples per iteration */ 596 do /* one pair of output samples per iteration */
379 { 597 {
380 /* Noise */ 598 /* Noise */
381 if ( this->r.g.noise_enables ) 599 if ( this->r.g.noise_enables )
382 { 600 {
383 if ( (this->noise_count -= 601 this->noise_count -= env_rates [this->r.g.flags & 0x1F];
384 env_rates [this->r.g.flags & 0x1F]) <= 0 ) 602
603 if ( this->noise_count <= 0 )
385 { 604 {
386 this->noise_count = env_rate_init; 605 this->noise_count = ENV_RATE_INIT;
387 int feedback = (this->noise << 13) ^ (this->noise << 14); 606 int feedback = (this->noise << 13) ^ (this->noise << 14);
388 this->noise = (feedback & 0x8000) ^ (this->noise >> 1 & ~1); 607 this->noise = (feedback & 0x8000) ^ (this->noise >> 1 & ~1);
389 } 608 }
390 } 609 }
391 610
392#if !SPC_NOECHO 611 #if !SPC_NOECHO
393 int echo_0 = 0; 612 int echo_0 = 0, echo_1 = 0;
394 int echo_1 = 0; 613 #endif /* !SPC_NOECHO */
395#endif
396 long prev_outx = 0; /* TODO: correct value for first channel? */ 614 long prev_outx = 0; /* TODO: correct value for first channel? */
397 int chans_0 = 0; 615 int chans_0 = 0, chans_1 = 0;
398 int chans_1 = 0; 616
399 /* TODO: put raw_voice pointer in voice_t? */ 617 /* TODO: put raw_voice pointer in voice_t? */
400 struct raw_voice_t * raw_voice = this->r.voice; 618 struct raw_voice_t * raw_voice = this->r.voice;
401 struct voice_t* voice = this->voice_state; 619 struct voice_t* voice = this->voice_state;
402 int vbit = 1; 620
403 IF_RBE( const uint32_t* vr = rates; ) 621 for (int vbit = 1; vbit < 0x100; vbit <<= 1, ++voice, ++raw_voice )
404 for ( ; vbit < 0x100; vbit <<= 1, ++voice, ++raw_voice IF_RBE( , ++vr ) )
405 { 622 {
406 /* pregen involves checking keyon, etc */ 623 /* pregen involves checking keyon, etc */
407#if 0
408 ENTER_TIMER(dsp_pregen); 624 ENTER_TIMER(dsp_pregen);
409#endif
410 625
411 /* Key on events are delayed */ 626 /* Key on events are delayed */
412 int key_on_delay = voice->key_on_delay; 627 int key_on_delay = voice->key_on_delay;
413 628
414 if ( UNLIKELY ( --key_on_delay >= 0 ) ) /* <1% of the time */ 629 if ( UNLIKELY ( --key_on_delay >= 0 ) ) /* <1% of the time */
415 { 630 key_on( this, voice, sd, raw_voice, key_on_delay, vbit );
416 key_on(this,voice,sd,raw_voice,key_on_delay,vbit);
417 }
418 631
419 if ( !(this->keys_down & vbit) ) /* Silent channel */ 632 if ( !(this->keys_down & vbit) ) /* Silent channel */
420 { 633 {
421 silent_chan: 634 silent_chan:
422 raw_voice->envx = 0; 635 raw_voice->envx = 0;
423 raw_voice->outx = 0; 636 raw_voice->outx = 0;
424 prev_outx = 0; 637 prev_outx = 0;
@@ -461,7 +674,7 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
461 voice->envx = envx; 674 voice->envx = envx;
462 /* TODO: should this be 8? */ 675 /* TODO: should this be 8? */
463 raw_voice->envx = envx >> 4; 676 raw_voice->envx = envx >> 4;
464 env_timer = env_rate_init; 677 env_timer = ENV_RATE_INIT;
465 } 678 }
466 679
467 int sustain_level = adsr1 >> 5; 680 int sustain_level = adsr1 >> 5;
@@ -561,994 +774,131 @@ void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
561 } 774 }
562 } 775 }
563 init_env_timer: 776 init_env_timer:
564 env_timer = env_rate_init; 777 env_timer = ENV_RATE_INIT;
565 write_env_timer: 778 write_env_timer:
566 voice->env_timer = env_timer; 779 voice->env_timer = env_timer;
567 env_end:; 780 env_end:;
568 } 781 }
569#if 0 782
570 EXIT_TIMER(dsp_pregen); 783 EXIT_TIMER(dsp_pregen);
571 784
572 ENTER_TIMER(dsp_gen); 785 ENTER_TIMER(dsp_gen);
573#endif
574 #if !SPC_BRRCACHE
575 /* Decode BRR block */
576 if ( voice->position >= BRR_BLOCK_SIZE * 0x1000 )
577 {
578 voice->position -= BRR_BLOCK_SIZE * 0x1000;
579
580 uint8_t const* addr = voice->addr;
581 if ( addr >= RAM + 0x10000 )
582 addr -= 0x10000;
583
584 /* action based on previous block's header */
585 if ( voice->block_header & 1 )
586 {
587 addr = RAM + letoh16(sd[raw_voice->waveform].loop);
588 this->r.g.wave_ended |= vbit;
589 if ( !(voice->block_header & 2) ) /* 1% of the time */
590 {
591 /* first block was end block;
592 don't play anything (verified) */
593 /* bit was set, so this clears it */
594 this->keys_down ^= vbit;
595
596 /* since voice->envx is 0,
597 samples and position don't matter */
598 raw_voice->envx = 0;
599 voice->envx = 0;
600 goto skip_decode;
601 }
602 }
603
604 /* header */
605 int const block_header = *addr;
606 addr += 9;
607 voice->addr = addr;
608 voice->block_header = block_header;
609
610 /* previous samples */
611 int smp2 = voice->samples [BRR_BLOCK_SIZE + 1];
612 int smp1 = voice->samples [BRR_BLOCK_SIZE + 2];
613 voice->samples [0] = voice->samples [BRR_BLOCK_SIZE];
614
615 /* output position */
616 short* out = voice->samples + (1 + BRR_BLOCK_SIZE);
617 int offset = -BRR_BLOCK_SIZE << 2;
618
619 /* if next block has end flag set,
620 this block ends early (verified) */
621 if ( (block_header & 3) != 3 && (*addr & 3) == 1 )
622 {
623 /* arrange for last 9 samples to be skipped */
624 int const skip = 9;
625 out += (skip & 1);
626 voice->samples [skip] = voice->samples [BRR_BLOCK_SIZE];
627 voice->position += skip * 0x1000;
628 offset = (-BRR_BLOCK_SIZE + (skip & ~1)) << 2;
629 addr -= skip / 2;
630 /* force sample to end on next decode */
631 voice->block_header = 1;
632 }
633
634 int const filter = block_header & 0x0c;
635 int const scale = block_header >> 4;
636
637 if ( filter == 0x08 ) /* filter 2 (30-90% of the time) */
638 {
639 /* y[n] = x[n] + 61/32 * y[n-1] - 15/16 * y[n-2] */
640 do /* decode and filter 16 samples */
641 {
642 /* Get nybble, sign-extend, then scale
643 get byte, select which nybble, sign-extend, then shift
644 based on scaling. */
645 int delta = (int8_t)(addr [offset >> 3] << (offset & 4)) >> 4;
646 delta = (delta << scale) >> 1;
647 786
648 if (scale > 0xc) 787 switch ( brr_decode( sd, voice, raw_voice ) )
649 delta = (delta >> 17) << 11; 788 {
650 789 case 2:
651 out [offset >> 2] = smp2; 790 /* bit was set, so this clears it */
652 791 this->keys_down ^= vbit;
653 delta -= smp2 >> 1;
654 delta += smp2 >> 5;
655 delta += smp1;
656 delta += (-smp1 - (smp1 >> 1)) >> 5;
657
658 delta = CLAMP16( delta );
659 smp2 = smp1;
660 smp1 = (int16_t) (delta * 2); /* sign-extend */
661 }
662 while ( (offset += 4) != 0 );
663 }
664 else if ( filter == 0x04 ) /* filter 1 */
665 {
666 /* y[n] = x[n] + 15/16 * y[n-1] */
667 do /* decode and filter 16 samples */
668 {
669 /* Get nybble, sign-extend, then scale
670 get byte, select which nybble, sign-extend, then shift
671 based on scaling. */
672 int delta = (int8_t)(addr [offset >> 3] << (offset & 4)) >> 4;
673 delta = (delta << scale) >> 1;
674
675 if (scale > 0xc)
676 delta = (delta >> 17) << 11;
677
678 out [offset >> 2] = smp2;
679
680 delta += smp1 >> 1;
681 delta += (-smp1) >> 5;
682
683 delta = CLAMP16( delta );
684 smp2 = smp1;
685 smp1 = (int16_t) (delta * 2); /* sign-extend */
686 }
687 while ( (offset += 4) != 0 );
688 }
689 else if ( filter == 0x0c ) /* filter 3 */
690 {
691 /* y[n] = x[n] + 115/64 * y[n-1] - 13/16 * y[n-2] */
692 do /* decode and filter 16 samples */
693 {
694 /* Get nybble, sign-extend, then scale
695 get byte, select which nybble, sign-extend, then shift
696 based on scaling. */
697 int delta = (int8_t)(addr [offset >> 3] << (offset & 4)) >> 4;
698 delta = (delta << scale) >> 1;
699
700 if (scale > 0xc)
701 delta = (delta >> 17) << 11;
702
703 out [offset >> 2] = smp2;
704
705 delta -= smp2 >> 1;
706 delta += (smp2 + (smp2 >> 1)) >> 4;
707 delta += smp1;
708 delta += (-smp1 * 13) >> 7;
709
710 delta = CLAMP16( delta );
711 smp2 = smp1;
712 smp1 = (int16_t) (delta * 2); /* sign-extend */
713 }
714 while ( (offset += 4) != 0 );
715 }
716 else /* filter 0 */
717 {
718 /* y[n] = x[n] */
719 do /* decode and filter 16 samples */
720 {
721 /* Get nybble, sign-extend, then scale
722 get byte, select which nybble, sign-extend, then shift
723 based on scaling. */
724 int delta = (int8_t)(addr [offset >> 3] << (offset & 4)) >> 4;
725 delta = (delta << scale) >> 1;
726
727 if (scale > 0xc)
728 delta = (delta >> 17) << 11;
729
730 out [offset >> 2] = smp2;
731
732 smp2 = smp1;
733 smp1 = delta * 2;
734 }
735 while ( (offset += 4) != 0 );
736 }
737 792
738 out [0] = smp2; 793 /* since voice->envx is 0,
739 out [1] = smp1; 794 samples and position don't matter */
740 795 raw_voice->envx = 0;
741 skip_decode:; 796 voice->envx = 0;
797 case 1:
798 this->r.g.wave_ended |= vbit;
742 } 799 }
743 #endif /* !SPC_BRRCACHE */ 800
744 /* Get rate (with possible modulation) */ 801 /* Get rate (with possible modulation) */
745 int rate = VOICE_RATE(vr); 802 int rate = voice->rate;
746 if ( this->r.g.pitch_mods & vbit ) 803 if ( this->r.g.pitch_mods & vbit )
747 rate = (rate * (prev_outx + 32768)) >> 15; 804 rate = (rate * (prev_outx + 32768)) >> 15;
748 805
806 uint32_t position = voice->wave.position;
807 voice->wave.position += rate;
808
809 int output;
810 int amp_0, amp_1;
811
749 #if !SPC_NOINTERP 812 #if !SPC_NOINTERP
750 /* Interleved gauss table (to improve cache coherency). */
751 /* gauss [i * 2 + j] = normal_gauss [(1 - j) * 256 + i] */
752 static short const gauss [512] ICONST_ATTR_SPC MEM_ALIGN_ATTR =
753 {
754370,1305, 366,1305, 362,1304, 358,1304, 354,1304, 351,1304, 347,1304, 343,1303,
755339,1303, 336,1303, 332,1302, 328,1302, 325,1301, 321,1300, 318,1300, 314,1299,
756311,1298, 307,1297, 304,1297, 300,1296, 297,1295, 293,1294, 290,1293, 286,1292,
757283,1291, 280,1290, 276,1288, 273,1287, 270,1286, 267,1284, 263,1283, 260,1282,
758257,1280, 254,1279, 251,1277, 248,1275, 245,1274, 242,1272, 239,1270, 236,1269,
759233,1267, 230,1265, 227,1263, 224,1261, 221,1259, 218,1257, 215,1255, 212,1253,
760210,1251, 207,1248, 204,1246, 201,1244, 199,1241, 196,1239, 193,1237, 191,1234,
761188,1232, 186,1229, 183,1227, 180,1224, 178,1221, 175,1219, 173,1216, 171,1213,
762168,1210, 166,1207, 163,1205, 161,1202, 159,1199, 156,1196, 154,1193, 152,1190,
763150,1186, 147,1183, 145,1180, 143,1177, 141,1174, 139,1170, 137,1167, 134,1164,
764132,1160, 130,1157, 128,1153, 126,1150, 124,1146, 122,1143, 120,1139, 118,1136,
765117,1132, 115,1128, 113,1125, 111,1121, 109,1117, 107,1113, 106,1109, 104,1106,
766102,1102, 100,1098, 99,1094, 97,1090, 95,1086, 94,1082, 92,1078, 90,1074,
767 89,1070, 87,1066, 86,1061, 84,1057, 83,1053, 81,1049, 80,1045, 78,1040,
768 77,1036, 76,1032, 74,1027, 73,1023, 71,1019, 70,1014, 69,1010, 67,1005,
769 66,1001, 65, 997, 64, 992, 62, 988, 61, 983, 60, 978, 59, 974, 58, 969,
770 56, 965, 55, 960, 54, 955, 53, 951, 52, 946, 51, 941, 50, 937, 49, 932,
771 48, 927, 47, 923, 46, 918, 45, 913, 44, 908, 43, 904, 42, 899, 41, 894,
772 40, 889, 39, 884, 38, 880, 37, 875, 36, 870, 36, 865, 35, 860, 34, 855,
773 33, 851, 32, 846, 32, 841, 31, 836, 30, 831, 29, 826, 29, 821, 28, 816,
774 27, 811, 27, 806, 26, 802, 25, 797, 24, 792, 24, 787, 23, 782, 23, 777,
775 22, 772, 21, 767, 21, 762, 20, 757, 20, 752, 19, 747, 19, 742, 18, 737,
776 17, 732, 17, 728, 16, 723, 16, 718, 15, 713, 15, 708, 15, 703, 14, 698,
777 14, 693, 13, 688, 13, 683, 12, 678, 12, 674, 11, 669, 11, 664, 11, 659,
778 10, 654, 10, 649, 10, 644, 9, 640, 9, 635, 9, 630, 8, 625, 8, 620,
779 8, 615, 7, 611, 7, 606, 7, 601, 6, 596, 6, 592, 6, 587, 6, 582,
780 5, 577, 5, 573, 5, 568, 5, 563, 4, 559, 4, 554, 4, 550, 4, 545,
781 4, 540, 3, 536, 3, 531, 3, 527, 3, 522, 3, 517, 2, 513, 2, 508,
782 2, 504, 2, 499, 2, 495, 2, 491, 2, 486, 1, 482, 1, 477, 1, 473,
783 1, 469, 1, 464, 1, 460, 1, 456, 1, 451, 1, 447, 1, 443, 1, 439,
784 0, 434, 0, 430, 0, 426, 0, 422, 0, 418, 0, 414, 0, 410, 0, 405,
785 0, 401, 0, 397, 0, 393, 0, 389, 0, 385, 0, 381, 0, 378, 0, 374,
786 };
787 /* Gaussian interpolation using most recent 4 samples */ 813 /* Gaussian interpolation using most recent 4 samples */
788 long position = voice->position; 814
789 voice->position += rate;
790 short const* interp = voice->samples + (position >> 12);
791 int offset = position >> 4 & 0xFF;
792
793 /* Only left half of gaussian kernel is in table, so we must mirror 815 /* Only left half of gaussian kernel is in table, so we must mirror
794 for right half */ 816 for right half */
795 short const* fwd = gauss + offset * 2; 817 int offset = ( position >> 4 ) & 0xFF;
796 short const* rev = gauss + 510 - offset * 2; 818 int16_t const* fwd = gauss_table + offset * 2;
819 int16_t const* rev = gauss_table + 510 - offset * 2;
797 820
798 /* Use faster gaussian interpolation when exact result isn't needed 821 /* Use faster gaussian interpolation when exact result isn't needed
799 by pitch modulator of next channel */ 822 by pitch modulator of next channel */
800 int amp_0, amp_1; /* Also serve as temps _0, and _1 */
801 if ( LIKELY ( !(slow_gaussian & vbit) ) ) /* 99% of the time */ 823 if ( LIKELY ( !(slow_gaussian & vbit) ) ) /* 99% of the time */
802 { 824 {
803 /* Main optimization is lack of clamping. Not a problem since 825 /* Main optimization is lack of clamping. Not a problem since
804 output never goes more than +/- 16 outside 16-bit range and 826 output never goes more than +/- 16 outside 16-bit range and
805 things are clamped later anyway. Other optimization is to 827 things are clamped later anyway. Other optimization is to
806 preserve fractional accuracy, eliminating several masks. */ 828 preserve fractional accuracy, eliminating several masks. */
807 #if defined (CPU_ARM) 829 output = gaussian_fast_interp( voice->wave.samples, position,
808 int output; 830 fwd, rev );
809 int _2, _3; /* All-purpose temps */ 831 output = gaussian_fast_amp( voice, output, &amp_0, &amp_1 );
810 /* Multiple ASM blocks keep regs free and reduce result
811 * latency issues. */
812 #if ARM_ARCH >= 6
813 /* Interpolate */
814 asm volatile (
815 "ldr %[_0], [%[interp]] \r\n" /* _0=i0i1 */
816 "ldr %[_2], [%[fwd]] \r\n" /* _2=f0f1 */
817 "ldr %[_1], [%[interp], #4] \r\n" /* _1=i2i3 */
818 "ldr %[_3], [%[rev]] \r\n" /* _3=r0r1 */
819 "smuad %[out], %[_0], %[_2] \r\n" /* out=f0*i0 + f1*i1 */
820 "smladx %[out], %[_1], %[_3], %[out] \r\n" /* out+=r1*i2 + r0*i3 */
821 : [out]"=r"(output),
822 [_0]"=&r"(amp_0), [_1]"=&r"(amp_1),
823 [_2]"=&r"(_2), [_3]"=r"(_3)
824 : [fwd]"r"(fwd), [rev]"r"(rev),
825 [interp]"r"(interp));
826 /* Apply voice envelope */
827 asm volatile (
828 "mov %[_2], %[out], asr #(11-5) \r\n" /* To do >> 16 later */
829 "mul %[out], %[_2], %[envx] \r\n" /* and avoid exp. shift */
830 : [out]"+r"(output), [_2]"=&r"(_2)
831 : [envx]"r"((int)voice->envx));
832 /* Apply left and right volume */
833 asm volatile (
834 "smulwb %[amp_0], %[out], %[vvol_0] \r\n" /* (32x16->48)[47:16]->[31:0] */
835 "smulwb %[amp_1], %[out], %[vvol_1] \r\n"
836 : [out]"+r"(output),
837 [amp_0]"=&r"(amp_0), [amp_1]"=r"(amp_1)
838 : [vvol_0]"r"(voice->volume[0]),
839 [vvol_1]"r"(voice->volume[1]));
840
841 raw_voice->outx = output >> (8+5); /* 'output' still 5 bits too big */
842 #else /* ARM_ARCH < 6 */
843 /* Perform gaussian interpolation on four samples */
844 asm volatile (
845 "ldrsh %[_0], [%[interp]] \r\n"
846 "ldrsh %[_2], [%[fwd]] \r\n"
847 "ldrsh %[_1], [%[interp], #2] \r\n"
848 "ldrsh %[_3], [%[fwd], #2] \r\n"
849 "mul %[out], %[_0], %[_2] \r\n" /* out= fwd[0]*interp[0] */
850 "ldrsh %[_0], [%[interp], #4] \r\n"
851 "ldrsh %[_2], [%[rev], #2] \r\n"
852 "mla %[out], %[_1], %[_3], %[out] \r\n" /* out+=fwd[1]*interp[1] */
853 "ldrsh %[_1], [%[interp], #6] \r\n"
854 "ldrsh %[_3], [%[rev]] \r\n"
855 "mla %[out], %[_0], %[_2], %[out] \r\n" /* out+=rev[1]*interp[2] */
856 "mla %[out], %[_1], %[_3], %[out] \r\n" /* out+=rev[0]*interp[3] */
857 : [out]"=&r"(output),
858 [_0]"=&r"(amp_0), [_1]"=&r"(amp_1),
859 [_2]"=&r"(_2), [_3]"=&r"(_3)
860 : [fwd]"r"(fwd), [rev]"r"(rev),
861 [interp]"r"(interp));
862 /* Apply voice envelope */
863 asm volatile (
864 "mov %[_2], %[out], asr #11 \r\n"
865 "mul %[out], %[_2], %[envx] \r\n"
866 : [out]"+r"(output), [_2]"=&r"(_2)
867 : [envx]"r"((int)voice->envx));
868 /* Reduce and apply left and right volume */
869 asm volatile (
870 "mov %[out], %[out], asr #11 \r\n"
871 "mul %[amp_0], %[out], %[vvol_0] \r\n"
872 "mul %[amp_1], %[out], %[vvol_1] \r\n"
873 : [out]"+r"(output),
874 [amp_0]"=&r"(amp_0), [amp_1]"=r"(amp_1)
875 : [vvol_0]"r"((int)voice->volume[0]),
876 [vvol_1]"r"((int)voice->volume[1]));
877
878 raw_voice->outx = output >> 8;
879 #endif /* ARM_ARCH */
880 #else /* Unoptimized CPU */
881 int output = (((fwd [0] * interp [0] +
882 fwd [1] * interp [1] +
883 rev [1] * interp [2] +
884 rev [0] * interp [3] ) >> 11) * voice->envx) >> 11;
885
886 /* duplicated here to give compiler more to run in parallel */
887 amp_0 = voice->volume [0] * output;
888 amp_1 = voice->volume [1] * output;
889
890 raw_voice->outx = output >> 8;
891 #endif /* CPU_* */
892 } 832 }
893 else /* slow gaussian */ 833 else /* slow gaussian */
834 #endif /* !SPC_NOINTERP (else two-point linear interpolation) */
894 { 835 {
895 #if defined(CPU_ARM) 836 output = *(int16_t *)&this->noise;
896 #if ARM_ARCH >= 6
897 int output = *(int16_t*) &this->noise;
898
899 if ( !(this->r.g.noise_enables & vbit) )
900 {
901 /* Interpolate */
902 int _2, _3;
903 asm volatile (
904 /* NOTE: often-unaligned accesses */
905 "ldr %[_0], [%[interp]] \r\n" /* _0=i0i1 */
906 "ldr %[_2], [%[fwd]] \r\n" /* _2=f0f1 */
907 "ldr %[_1], [%[interp], #4] \r\n" /* _1=i2i3 */
908 "ldr %[_3], [%[rev]] \r\n" /* _3=f2f3 */
909 "smulbb %[out], %[_0], %[_2] \r\n" /* out=f0*i0 */
910 "smultt %[_0], %[_0], %[_2] \r\n" /* _0=f1*i1 */
911 "smulbt %[_2], %[_1], %[_3] \r\n" /* _2=r1*i2 */
912 "smultb %[_3], %[_1], %[_3] \r\n" /* _3=r0*i3 */
913 : [out]"=r"(output),
914 [_0]"=&r"(amp_0), [_1]"=&r"(amp_1),
915 [_2]"=&r"(_2), [_3]"=r"(_3)
916 : [fwd]"r"(fwd), [rev]"r"(rev),
917 [interp]"r"(interp));
918 asm volatile (
919 "mov %[out], %[out], asr#12 \r\n"
920 "add %[_0], %[out], %[_0], asr #12 \r\n"
921 "add %[_2], %[_0], %[_2], asr #12 \r\n"
922 "pkhbt %[_0], %[_2], %[_3], asl #4 \r\n" /* _3[31:16], _2[15:0] */
923 "sadd16 %[_0], %[_0], %[_0] \r\n" /* _3[31:16]*2, _2[15:0]*2 */
924 "qsubaddx %[out], %[_0], %[_0] \r\n" /* out[15:0]=
925 * sat16(_3[31:16]+_2[15:0]) */
926 : [out]"+r"(output),
927 [_0]"+r"(amp_0), [_2]"+r"(_2), [_3]"+r"(_3));
928 }
929 /* Apply voice envelope */
930 asm volatile (
931 "smulbb %[out], %[out], %[envx] \r\n"
932 : [out]"+r"(output)
933 : [envx]"r"(voice->envx));
934 /* Reduce and apply left and right volume */
935 asm volatile (
936 "mov %[out], %[out], asr #11 \r\n"
937 "bic %[out], %[out], #0x1 \r\n"
938 "mul %[amp_0], %[out], %[vvol_0] \r\n"
939 "mul %[amp_1], %[out], %[vvol_1] \r\n"
940 : [out]"+r"(output),
941 [amp_0]"=&r"(amp_0), [amp_1]"=r"(amp_1)
942 : [vvol_0]"r"((int)voice->volume[0]),
943 [vvol_1]"r"((int)voice->volume[1]));
944
945 prev_outx = output;
946 raw_voice->outx = output >> 8;
947 #else /* ARM_ARCH < 6 */
948 int output = *(int16_t*) &this->noise;
949
950 if ( !(this->r.g.noise_enables & vbit) )
951 {
952 /* Interpolate */
953 int _2, _3;
954 asm volatile (
955 "ldrsh %[_0], [%[interp]] \r\n"
956 "ldrsh %[_2], [%[fwd]] \r\n"
957 "ldrsh %[_1], [%[interp], #2] \r\n"
958 "ldrsh %[_3], [%[fwd], #2] \r\n"
959 "mul %[out], %[_2], %[_0] \r\n" /* fwd[0]*interp[0] */
960 "ldrsh %[_2], [%[rev], #2] \r\n"
961 "mul %[_0], %[_3], %[_1] \r\n" /* fwd[1]*interp[1] */
962 "ldrsh %[_1], [%[interp], #4] \r\n"
963 "mov %[out], %[out], asr #12 \r\n"
964 "ldrsh %[_3], [%[rev]] \r\n"
965 "mul %[_2], %[_1], %[_2] \r\n" /* rev[1]*interp[2] */
966 "ldrsh %[_1], [%[interp], #6] \r\n"
967 "add %[_0], %[out], %[_0], asr #12 \r\n"
968 "mul %[_3], %[_1], %[_3] \r\n" /* rev[0]*interp[3] */
969 "add %[_2], %[_0], %[_2], asr #12 \r\n"
970 "mov %[_2], %[_2], lsl #17 \r\n"
971 "mov %[_3], %[_3], asr #12 \r\n"
972 "mov %[_3], %[_3], asl #1 \r\n"
973 "add %[out], %[_3], %[_2], asr #16 \r\n"
974 : [out]"=&r"(output),
975 [_0]"=&r"(amp_0), [_1]"=&r"(amp_1),
976 [_2]"=&r"(_2), [_3]"=&r"(_3)
977 : [fwd]"r"(fwd), [rev]"r"(rev),
978 [interp]"r"(interp));
979
980 output = CLAMP16(output);
981 }
982 /* Apply voice envelope */
983 asm volatile (
984 "mul %[_0], %[out], %[envx] \r\n"
985 : [_0]"=r"(amp_0)
986 : [out]"r"(output), [envx]"r"((int)voice->envx));
987 /* Reduce and apply left and right volume */
988 asm volatile (
989 "mov %[out], %[amp_0], asr #11 \r\n" /* amp_0 = _0 */
990 "bic %[out], %[out], #0x1 \r\n"
991 "mul %[amp_0], %[out], %[vvol_0] \r\n"
992 "mul %[amp_1], %[out], %[vvol_1] \r\n"
993 : [out]"+r"(output),
994 [amp_0]"+r"(amp_0), [amp_1]"=r"(amp_1)
995 : [vvol_0]"r"((int)voice->volume[0]),
996 [vvol_1]"r"((int)voice->volume[1]));
997
998 prev_outx = output;
999 raw_voice->outx = output >> 8;
1000 #endif /* ARM_ARCH >= 6 */
1001 #else /* Unoptimized CPU */
1002 int output = *(int16_t*) &this->noise;
1003 837
1004 if ( !(this->r.g.noise_enables & vbit) ) 838 if ( !(this->r.g.noise_enables & vbit) )
1005 { 839 output = interp( voice->wave.samples, position, fwd, rev );
1006 output = (fwd [0] * interp [0]) & ~0xFFF;
1007 output = (output + fwd [1] * interp [1]) & ~0xFFF;
1008 output = (output + rev [1] * interp [2]) >> 12;
1009 output = (int16_t) (output * 2);
1010 output += ((rev [0] * interp [3]) >> 12) * 2;
1011 output = CLAMP16( output );
1012 }
1013 output = (output * voice->envx) >> 11 & ~1;
1014
1015 /* duplicated here to give compiler more to run in parallel */
1016 amp_0 = voice->volume [0] * output;
1017 amp_1 = voice->volume [1] * output;
1018
1019 prev_outx = output;
1020 raw_voice->outx = output >> 8;
1021 #endif /* CPU_* */
1022 }
1023 #else /* SPCNOINTERP */
1024 /* two-point linear interpolation */
1025 #ifdef CPU_COLDFIRE
1026 int amp_0 = (int16_t)this->noise;
1027 int amp_1;
1028
1029 if ( (this->r.g.noise_enables & vbit) == 0 )
1030 {
1031 uint32_t f = voice->position;
1032 int32_t y0;
1033
1034 /**
1035 * Formula (fastest found so far of MANY):
1036 * output = y0 + f*y1 - f*y0
1037 */
1038 asm volatile (
1039 /* separate fractional and whole parts */
1040 "move.l %[f], %[y1] \r\n"
1041 "and.l #0xfff, %[f] \r\n"
1042 "lsr.l %[sh], %[y1] \r\n"
1043 /* load samples y0 (upper) & y1 (lower) */
1044 "move.l 2(%[s], %[y1].l*2), %[y1] \r\n"
1045 /* %acc0 = f*y1 */
1046 "mac.w %[f]l, %[y1]l, %%acc0 \r\n"
1047 /* %acc0 -= f*y0 */
1048 "msac.w %[f]l, %[y1]u, %%acc0 \r\n"
1049 /* separate out y0 and sign extend */
1050 "swap %[y1] \r\n"
1051 "movea.w %[y1], %[y0] \r\n"
1052 /* fetch result, scale down and add y0 */
1053 "movclr.l %%acc0, %[y1] \r\n"
1054 /* output = y0 + (result >> 12) */
1055 "asr.l %[sh], %[y1] \r\n"
1056 "add.l %[y0], %[y1] \r\n"
1057 : [f]"+d"(f), [y0]"=&a"(y0), [y1]"=&d"(amp_0)
1058 : [s]"a"(voice->samples), [sh]"d"(12));
1059 }
1060 840
1061 /* apply voice envelope to output */ 841 /* Apply envelope and volume */
1062 asm volatile ( 842 output = apply_amp( voice, output, &amp_0, &amp_1 );
1063 "mac.w %[out]l, %[envx]l, %%acc0 \r\n"
1064 :
1065 : [out]"r"(amp_0), [envx]"r"(voice->envx));
1066
1067 /* advance voice position */
1068 voice->position += rate;
1069
1070 /* fetch output, scale and apply left and right
1071 voice volume */
1072 asm volatile (
1073 "movclr.l %%acc0, %[out] \r\n"
1074 "asr.l %[sh], %[out] \r\n"
1075 "mac.l %[vvol_0], %[out], %%acc0 \r\n"
1076 "mac.l %[vvol_1], %[out], %%acc1 \r\n"
1077 : [out]"=&d"(amp_0)
1078 : [vvol_0]"r"((int)voice->volume[0]),
1079 [vvol_1]"r"((int)voice->volume[1]),
1080 [sh]"d"(11));
1081
1082 /* save this output into previous, scale and save in
1083 output register */
1084 prev_outx = amp_0;
1085 raw_voice->outx = amp_0 >> 8;
1086
1087 /* fetch final voice output */
1088 asm volatile (
1089 "movclr.l %%acc0, %[amp_0] \r\n"
1090 "movclr.l %%acc1, %[amp_1] \r\n"
1091 : [amp_0]"=r"(amp_0), [amp_1]"=r"(amp_1));
1092 #elif defined (CPU_ARM)
1093 int amp_0, amp_1;
1094
1095 if ( (this->r.g.noise_enables & vbit) != 0 )
1096 {
1097 amp_0 = *(int16_t *)&this->noise;
1098 }
1099 else
1100 {
1101 uint32_t f = voice->position;
1102 amp_0 = (uint32_t)voice->samples;
1103
1104 asm volatile(
1105 "mov %[y1], %[f], lsr #12 \r\n"
1106 "eor %[f], %[f], %[y1], lsl #12 \r\n"
1107 "add %[y1], %[y0], %[y1], lsl #1 \r\n"
1108 "ldrsh %[y0], [%[y1], #2] \r\n"
1109 "ldrsh %[y1], [%[y1], #4] \r\n"
1110 "sub %[y1], %[y1], %[y0] \r\n"
1111 "mul %[f], %[y1], %[f] \r\n"
1112 "add %[y0], %[y0], %[f], asr #12 \r\n"
1113 : [f]"+r"(f), [y0]"+r"(amp_0), [y1]"=&r"(amp_1));
1114 }
1115
1116 voice->position += rate;
1117
1118 asm volatile(
1119 "mul %[amp_1], %[amp_0], %[envx] \r\n"
1120 "mov %[amp_0], %[amp_1], asr #11 \r\n"
1121 "mov %[amp_1], %[amp_0], asr #8 \r\n"
1122 : [amp_0]"+r"(amp_0), [amp_1]"=r"(amp_1)
1123 : [envx]"r"(voice->envx));
1124
1125 prev_outx = amp_0;
1126 raw_voice->outx = (int8_t)amp_1;
1127
1128 asm volatile(
1129 "mul %[amp_1], %[amp_0], %[vol_1] \r\n"
1130 "mul %[amp_0], %[vol_0], %[amp_0] \r\n"
1131 : [amp_0]"+r"(amp_0), [amp_1]"=&r"(amp_1)
1132 : [vol_0]"r"((int)voice->volume[0]),
1133 [vol_1]"r"((int)voice->volume[1]));
1134 #else /* Unoptimized CPU */
1135 int output;
1136
1137 if ( (this->r.g.noise_enables & vbit) == 0 )
1138 {
1139 int const fraction = voice->position & 0xfff;
1140 short const* const pos = (voice->samples + (voice->position >> 12)) + 1;
1141 output = pos[0] + ((fraction * (pos[1] - pos[0])) >> 12);
1142 } else {
1143 output = *(int16_t *)&this->noise;
1144 } 843 }
1145 844
1146 voice->position += rate;
1147
1148 output = (output * voice->envx) >> 11;
1149
1150 /* duplicated here to give compiler more to run in parallel */
1151 int amp_0 = voice->volume [0] * output;
1152 int amp_1 = voice->volume [1] * output;
1153
1154 prev_outx = output; 845 prev_outx = output;
1155 raw_voice->outx = (int8_t) (output >> 8); 846 raw_voice->outx = output >> 8;
1156 #endif /* CPU_* */
1157 #endif /* SPCNOINTERP */
1158 847
1159 #if SPC_BRRCACHE
1160 if ( voice->position >= voice->wave_end )
1161 {
1162 long loop_len = voice->wave_loop << 12;
1163 voice->position -= loop_len;
1164 this->r.g.wave_ended |= vbit;
1165 if ( !loop_len )
1166 {
1167 this->keys_down ^= vbit;
1168 raw_voice->envx = 0;
1169 voice->envx = 0;
1170 }
1171 }
1172 #endif
1173#if 0
1174 EXIT_TIMER(dsp_gen); 848 EXIT_TIMER(dsp_gen);
1175 849
1176 ENTER_TIMER(dsp_mix); 850 ENTER_TIMER(dsp_mix);
1177#endif 851
1178 chans_0 += amp_0; 852 chans_0 += amp_0;
1179 chans_1 += amp_1; 853 chans_1 += amp_1;
1180 #if !SPC_NOECHO 854 #if !SPC_NOECHO
1181 if ( this->r.g.echo_ons & vbit ) 855 if ( this->r.g.echo_ons & vbit )
1182 { 856 {
1183 echo_0 += amp_0; 857 echo_0 += amp_0;
1184 echo_1 += amp_1; 858 echo_1 += amp_1;
1185 } 859 }
1186 #endif 860 #endif /* !SPC_NOECHO */
1187#if 0 861
1188 EXIT_TIMER(dsp_mix); 862 EXIT_TIMER(dsp_mix);
1189#endif
1190 } 863 }
1191 /* end of voice loop */ 864 /* end of voice loop */
1192 865
866 /* Generate output */
867 int amp_0, amp_1;
1193 #if !SPC_NOECHO 868 #if !SPC_NOECHO
1194 #ifdef CPU_COLDFIRE
1195 /* Read feedback from echo buffer */ 869 /* Read feedback from echo buffer */
1196 int echo_pos = this->echo_pos; 870 int echo_pos = this->echo_pos;
1197 uint8_t* const echo_ptr = RAM + ((echo_start + echo_pos) & 0xFFFF); 871 uint8_t* const echo_ptr = RAM + ((echo_start + echo_pos) & 0xFFFF);
1198 echo_pos += 4;
1199 if ( echo_pos >= echo_wrap )
1200 echo_pos = 0;
1201 this->echo_pos = echo_pos;
1202 int fb = swap_odd_even32(*(int32_t *)echo_ptr);
1203 int out_0, out_1;
1204
1205 /* Keep last 8 samples */
1206 *this->last_fir_ptr = fb;
1207 this->last_fir_ptr = this->fir_ptr;
1208
1209 /* Apply echo FIR filter to output samples read from echo buffer -
1210 circular buffer is hardware incremented and masked; FIR
1211 coefficients and buffer history are loaded in parallel with
1212 multiply accumulate operations. Shift left by one here and once
1213 again when calculating feedback to have sample values justified
1214 to bit 31 in the output to ease endian swap, interleaving and
1215 clamping before placing result in the program's echo buffer. */
1216 int _0, _1, _2;
1217 asm volatile (
1218 "move.l (%[fir_c]) , %[_2] \r\n"
1219 "mac.w %[fb]u, %[_2]u, <<, (%[fir_p])+&, %[_0], %%acc0 \r\n"
1220 "mac.w %[fb]l, %[_2]u, <<, (%[fir_p])& , %[_1], %%acc1 \r\n"
1221 "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
1222 "mac.w %[_0]l, %[_2]l, <<, 4(%[fir_c]) , %[_2], %%acc1 \r\n"
1223 "mac.w %[_1]u, %[_2]u, <<, 4(%[fir_p])& , %[_0], %%acc0 \r\n"
1224 "mac.w %[_1]l, %[_2]u, <<, 8(%[fir_p])& , %[_1], %%acc1 \r\n"
1225 "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
1226 "mac.w %[_0]l, %[_2]l, <<, 8(%[fir_c]) , %[_2], %%acc1 \r\n"
1227 "mac.w %[_1]u, %[_2]u, <<, 12(%[fir_p])& , %[_0], %%acc0 \r\n"
1228 "mac.w %[_1]l, %[_2]u, <<, 16(%[fir_p])& , %[_1], %%acc1 \r\n"
1229 "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
1230 "mac.w %[_0]l, %[_2]l, <<, 12(%[fir_c]) , %[_2], %%acc1 \r\n"
1231 "mac.w %[_1]u, %[_2]u, <<, 20(%[fir_p])& , %[_0], %%acc0 \r\n"
1232 "mac.w %[_1]l, %[_2]u, << , %%acc1 \r\n"
1233 "mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
1234 "mac.w %[_0]l, %[_2]l, << , %%acc1 \r\n"
1235 : [_0]"=&r"(_0), [_1]"=&r"(_1), [_2]"=&r"(_2),
1236 [fir_p]"+a"(this->fir_ptr)
1237 : [fir_c]"a"(this->fir_coeff), [fb]"r"(fb)
1238 );
1239
1240 /* Generate output */
1241 asm volatile (
1242 /* fetch filter results _after_ gcc loads asm
1243 block parameters to eliminate emac stalls */
1244 "movclr.l %%acc0, %[out_0] \r\n"
1245 "movclr.l %%acc1, %[out_1] \r\n"
1246 /* apply global volume */
1247 "mac.l %[chans_0], %[gv_0] , %%acc2 \r\n"
1248 "mac.l %[chans_1], %[gv_1] , %%acc3 \r\n"
1249 /* apply echo volume and add to final output */
1250 "mac.l %[ev_0], %[out_0], >>, %%acc2 \r\n"
1251 "mac.l %[ev_1], %[out_1], >>, %%acc3 \r\n"
1252 : [out_0]"=&r"(out_0), [out_1]"=&r"(out_1)
1253 : [chans_0]"r"(chans_0), [gv_0]"r"(global_vol_0),
1254 [ev_0]"r"((int)this->r.g.echo_volume_0),
1255 [chans_1]"r"(chans_1), [gv_1]"r"(global_vol_1),
1256 [ev_1]"r"((int)this->r.g.echo_volume_1)
1257 );
1258
1259 /* Feedback into echo buffer */
1260 if ( !(this->r.g.flags & 0x20) )
1261 {
1262 int sh = 1 << 9;
1263
1264 asm volatile (
1265 /* scale echo voices; saturate if overflow */
1266 "mac.l %[sh], %[e1] , %%acc1 \r\n"
1267 "mac.l %[sh], %[e0] , %%acc0 \r\n"
1268 /* add scaled output from FIR filter */
1269 "mac.l %[out_1], %[ef], <<, %%acc1 \r\n"
1270 "mac.l %[out_0], %[ef], <<, %%acc0 \r\n"
1271 /* swap and fetch feedback results - simply
1272 swap_odd_even32 mixed in between macs and
1273 movclrs to mitigate stall issues */
1274 "move.l #0x00ff00ff, %[sh] \r\n"
1275 "movclr.l %%acc1, %[e1] \r\n"
1276 "swap %[e1] \r\n"
1277 "movclr.l %%acc0, %[e0] \r\n"
1278 "move.w %[e1], %[e0] \r\n"
1279 "and.l %[e0], %[sh] \r\n"
1280 "eor.l %[sh], %[e0] \r\n"
1281 "lsl.l #8, %[sh] \r\n"
1282 "lsr.l #8, %[e0] \r\n"
1283 "or.l %[sh], %[e0] \r\n"
1284 /* save final feedback into echo buffer */
1285 "move.l %[e0], (%[echo_ptr]) \r\n"
1286 : [e0]"+d"(echo_0), [e1]"+d"(echo_1), [sh]"+d"(sh)
1287 : [out_0]"r"(out_0), [out_1]"r"(out_1),
1288 [ef]"r"((int)this->r.g.echo_feedback),
1289 [echo_ptr]"a"((int32_t *)echo_ptr)
1290 );
1291 }
1292 872
1293 /* Output final samples */
1294 asm volatile (
1295 /* fetch output saved in %acc2 and %acc3 */
1296 "movclr.l %%acc2, %[out_0] \r\n"
1297 "movclr.l %%acc3, %[out_1] \r\n"
1298 /* scale right by global_muting shift */
1299 "asr.l %[gm], %[out_0] \r\n"
1300 "asr.l %[gm], %[out_1] \r\n"
1301 : [out_0]"=&d"(out_0), [out_1]"=&d"(out_1)
1302 : [gm]"d"(global_muting)
1303 );
1304
1305 out_buf [ 0] = out_0;
1306 out_buf [WAV_CHUNK_SIZE] = out_1;
1307 out_buf ++;
1308 #elif defined (CPU_ARM)
1309 /* Read feedback from echo buffer */
1310 int echo_pos = this->echo_pos;
1311 uint8_t* const echo_ptr = RAM +
1312 ((this->r.g.echo_page * 0x100 + echo_pos) & 0xFFFF);
1313 echo_pos += 4; 873 echo_pos += 4;
1314 if ( echo_pos >= (this->r.g.echo_delay & 15) * 0x800 ) 874
875 if ( echo_pos >= echo_delay )
1315 echo_pos = 0; 876 echo_pos = 0;
1316 this->echo_pos = echo_pos;
1317 877
1318 #if ARM_ARCH >= 6 878 this->echo_pos = echo_pos;
1319 int32_t *fir_ptr, *fir_coeff;
1320 int fb_0, fb_1;
1321 879
1322 /* Apply FIR */ 880 /* Apply FIR */
1323 881 int fb_0, fb_1;
1324 /* Keep last 8 samples */ 882 echo_apply( this, echo_ptr, &fb_0, &fb_1 );
1325 asm volatile (
1326 "ldr %[fb_0], [%[echo_p]] \r\n"
1327 "add %[fir_p], %[t_fir_p], #4 \r\n"
1328 "bic %[t_fir_p], %[fir_p], %[mask] \r\n"
1329 "str %[fb_0], [%[fir_p], #-4] \r\n"
1330 /* duplicate at +8 eliminates wrap checking below */
1331 "str %[fb_0], [%[fir_p], #28] \r\n"
1332 : [fir_p]"=&r"(fir_ptr), [t_fir_p]"+r"(this->fir_ptr),
1333 [fb_0]"=&r"(fb_0)
1334 : [echo_p]"r"(echo_ptr), [mask]"i"(~FIR_BUF_MASK));
1335
1336 fir_coeff = (int32_t *)this->fir_coeff;
1337
1338 /* Fugly, but the best version found. */
1339 int _0;
1340 asm volatile ( /* L0R0 = acc0 */
1341 "ldmia %[fir_p]!, { r2-r5 } \r\n" /* L1R1-L4R4 = r2-r5 */
1342 "ldmia %[fir_c]!, { r0-r1 } \r\n" /* C0C1-C2C3 = r0-r1 */
1343 "pkhbt %[_0], %[acc0], r2, asl #16 \r\n" /* L0R0,L1R1->L0L1,R0R1 */
1344 "pkhtb r2, r2, %[acc0], asr #16 \r\n"
1345 "smuad %[acc0], %[_0], r0 \r\n" /* acc0=L0*C0+L1*C1 */
1346 "smuad %[acc1], r2, r0 \r\n" /* acc1=R0*C0+R1*C1 */
1347 "pkhbt %[_0], r3, r4, asl #16 \r\n" /* L2R2,L3R3->L2L3,R2R3 */
1348 "pkhtb r4, r4, r3, asr #16 \r\n"
1349 "smlad %[acc0], %[_0], r1, %[acc0] \r\n" /* acc0+=L2*C2+L3*C3 */
1350 "smlad %[acc1], r4, r1, %[acc1] \r\n" /* acc1+=R2*C2+R3*C3 */
1351 "ldmia %[fir_p], { r2-r4 } \r\n" /* L5R5-L7R7 = r2-r4 */
1352 "ldmia %[fir_c], { r0-r1 } \r\n" /* C4C5-C6C7 = r0-r1 */
1353 "pkhbt %[_0], r5, r2, asl #16 \r\n" /* L4R4,L5R5->L4L5,R4R5 */
1354 "pkhtb r2, r2, r5, asr #16 \r\n"
1355 "smlad %[acc0], %[_0], r0, %[acc0] \r\n" /* acc0+=L4*C4+L5*C5 */
1356 "smlad %[acc1], r2, r0, %[acc1] \r\n" /* acc1+=R4*C4+R5*C5 */
1357 "pkhbt %[_0], r3, r4, asl #16 \r\n" /* L6R6,L7R7->L6L7,R6R7 */
1358 "pkhtb r4, r4, r3, asr #16 \r\n"
1359 "smlad %[acc0], %[_0], r1, %[acc0] \r\n" /* acc0+=L6*C6+L7*C7 */
1360 "smlad %[acc1], r4, r1, %[acc1] \r\n" /* acc1+=R6*C6+R7*C7 */
1361 : [acc0]"+r"(fb_0), [acc1]"=&r"(fb_1), [_0]"=&r"(_0),
1362 [fir_p]"+r"(fir_ptr), [fir_c]"+r"(fir_coeff)
1363 :
1364 : "r0", "r1", "r2", "r3", "r4", "r5");
1365
1366 /* Generate output */
1367 int amp_0, amp_1;
1368
1369 asm volatile (
1370 "mul %[amp_0], %[gvol_0], %[chans_0] \r\n"
1371 "mul %[amp_1], %[gvol_1], %[chans_1] \r\n"
1372 : [amp_0]"=&r"(amp_0), [amp_1]"=r"(amp_1)
1373 : [gvol_0]"r"(global_vol_0), [gvol_1]"r"(global_vol_1),
1374 [chans_0]"r"(chans_0), [chans_1]"r"(chans_1));
1375 asm volatile (
1376 "mla %[amp_0], %[fb_0], %[ev_0], %[amp_0] \r\n"
1377 "mla %[amp_1], %[fb_1], %[ev_1], %[amp_1] \r\n"
1378 : [amp_0]"+r"(amp_0), [amp_1]"+r"(amp_1)
1379 : [fb_0]"r"(fb_0), [fb_1]"r"(fb_1),
1380 [ev_0]"r"((int)this->r.g.echo_volume_0),
1381 [ev_1]"r"((int)this->r.g.echo_volume_1));
1382
1383 out_buf [ 0] = amp_0 >> global_muting;
1384 out_buf [WAV_CHUNK_SIZE] = amp_1 >> global_muting;
1385 out_buf ++;
1386 883
1387 if ( !(this->r.g.flags & 0x20) ) 884 if ( !(this->r.g.flags & 0x20) )
1388 { 885 {
1389 /* Feedback into echo buffer */ 886 /* Feedback into echo buffer */
1390 int e0, e1; 887 echo_feedback( this, echo_ptr, echo_0, echo_1, fb_0, fb_1 );
1391
1392 asm volatile (
1393 "mov %[e0], %[echo_0], asl #7 \r\n"
1394 "mov %[e1], %[echo_1], asl #7 \r\n"
1395 "mla %[e0], %[fb_0], %[efb], %[e0] \r\n"
1396 "mla %[e1], %[fb_1], %[efb], %[e1] \r\n"
1397 : [e0]"=&r"(e0), [e1]"=&r"(e1)
1398 : [echo_0]"r"(echo_0), [echo_1]"r"(echo_1),
1399 [fb_0]"r"(fb_0), [fb_1]"r"(fb_1),
1400 [efb]"r"((int)this->r.g.echo_feedback));
1401 asm volatile (
1402 "ssat %[e0], #16, %[e0], asr #14 \r\n"
1403 "ssat %[e1], #16, %[e1], asr #14 \r\n"
1404 "pkhbt %[e0], %[e0], %[e1], lsl #16 \r\n"
1405 "str %[e0], [%[echo_p]] \r\n"
1406 : [e0]"+r"(e0), [e1]"+r"(e1)
1407 : [echo_p]"r"(echo_ptr));
1408 } 888 }
1409 #else /* ARM_ARCH < 6 */ 889 #endif /* !SPC_NOECHO */
1410 int fb_0 = GET_LE16SA( echo_ptr );
1411 int fb_1 = GET_LE16SA( echo_ptr + 2 );
1412 int32_t *fir_ptr, *fir_coeff;
1413
1414 /* Keep last 8 samples */
1415
1416 /* Apply FIR */
1417 asm volatile (
1418 "add %[fir_p], %[t_fir_p], #8 \r\n"
1419 "bic %[t_fir_p], %[fir_p], %[mask] \r\n"
1420 "str %[fb_0], [%[fir_p], #-8] \r\n"
1421 "str %[fb_1], [%[fir_p], #-4] \r\n"
1422 /* duplicate at +8 eliminates wrap checking below */
1423 "str %[fb_0], [%[fir_p], #56] \r\n"
1424 "str %[fb_1], [%[fir_p], #60] \r\n"
1425 : [fir_p]"=&r"(fir_ptr), [t_fir_p]"+r"(this->fir_ptr)
1426 : [fb_0]"r"(fb_0), [fb_1]"r"(fb_1), [mask]"i"(~FIR_BUF_MASK));
1427
1428 fir_coeff = this->fir_coeff;
1429
1430 asm volatile (
1431 "ldmia %[fir_c]!, { r0-r1 } \r\n"
1432 "ldmia %[fir_p]!, { r4-r5 } \r\n"
1433 "mul %[fb_0], r0, %[fb_0] \r\n"
1434 "mul %[fb_1], r0, %[fb_1] \r\n"
1435 "mla %[fb_0], r4, r1, %[fb_0] \r\n"
1436 "mla %[fb_1], r5, r1, %[fb_1] \r\n"
1437 "ldmia %[fir_c]!, { r0-r1 } \r\n"
1438 "ldmia %[fir_p]!, { r2-r5 } \r\n"
1439 "mla %[fb_0], r2, r0, %[fb_0] \r\n"
1440 "mla %[fb_1], r3, r0, %[fb_1] \r\n"
1441 "mla %[fb_0], r4, r1, %[fb_0] \r\n"
1442 "mla %[fb_1], r5, r1, %[fb_1] \r\n"
1443 "ldmia %[fir_c]!, { r0-r1 } \r\n"
1444 "ldmia %[fir_p]!, { r2-r5 } \r\n"
1445 "mla %[fb_0], r2, r0, %[fb_0] \r\n"
1446 "mla %[fb_1], r3, r0, %[fb_1] \r\n"
1447 "mla %[fb_0], r4, r1, %[fb_0] \r\n"
1448 "mla %[fb_1], r5, r1, %[fb_1] \r\n"
1449 "ldmia %[fir_c]!, { r0-r1 } \r\n"
1450 "ldmia %[fir_p]!, { r2-r5 } \r\n"
1451 "mla %[fb_0], r2, r0, %[fb_0] \r\n"
1452 "mla %[fb_1], r3, r0, %[fb_1] \r\n"
1453 "mla %[fb_0], r4, r1, %[fb_0] \r\n"
1454 "mla %[fb_1], r5, r1, %[fb_1] \r\n"
1455 : [fb_0]"+r"(fb_0), [fb_1]"+r"(fb_1),
1456 [fir_p]"+r"(fir_ptr), [fir_c]"+r"(fir_coeff)
1457 :
1458 : "r0", "r1", "r2", "r3", "r4", "r5");
1459
1460 /* Generate output */
1461 int amp_0 = (chans_0 * global_vol_0 + fb_0 * this->r.g.echo_volume_0)
1462 >> global_muting;
1463 int amp_1 = (chans_1 * global_vol_1 + fb_1 * this->r.g.echo_volume_1)
1464 >> global_muting;
1465
1466 out_buf [ 0] = amp_0;
1467 out_buf [WAV_CHUNK_SIZE] = amp_1;
1468 out_buf ++;
1469 890
1470 if ( !(this->r.g.flags & 0x20) ) 891 mix_output( this, global_muting, global_vol_0, global_vol_1,
1471 { 892 chans_0, chans_1, fb_0, fb_1, &amp_0, &amp_1 );
1472 /* Feedback into echo buffer */
1473 int e0 = (echo_0 >> 7) + ((fb_0 * this->r.g.echo_feedback) >> 14);
1474 int e1 = (echo_1 >> 7) + ((fb_1 * this->r.g.echo_feedback) >> 14);
1475 e0 = CLAMP16( e0 );
1476 SET_LE16A( echo_ptr , e0 );
1477 e1 = CLAMP16( e1 );
1478 SET_LE16A( echo_ptr + 2, e1 );
1479 }
1480 #endif /* ARM_ARCH */
1481 #else /* Unoptimized CPU */
1482 /* Read feedback from echo buffer */
1483 int echo_pos = this->echo_pos;
1484 uint8_t* const echo_ptr = RAM +
1485 ((this->r.g.echo_page * 0x100 + echo_pos) & 0xFFFF);
1486 echo_pos += 4;
1487 if ( echo_pos >= (this->r.g.echo_delay & 15) * 0x800 )
1488 echo_pos = 0;
1489 this->echo_pos = echo_pos;
1490 int fb_0 = GET_LE16SA( echo_ptr );
1491 int fb_1 = GET_LE16SA( echo_ptr + 2 );
1492
1493 /* Keep last 8 samples */
1494 int (* const fir_ptr) [2] = this->fir_buf + this->fir_pos;
1495 this->fir_pos = (this->fir_pos + 1) & (FIR_BUF_HALF - 1);
1496 fir_ptr [ 0] [0] = fb_0;
1497 fir_ptr [ 0] [1] = fb_1;
1498 /* duplicate at +8 eliminates wrap checking below */
1499 fir_ptr [FIR_BUF_HALF] [0] = fb_0;
1500 fir_ptr [FIR_BUF_HALF] [1] = fb_1;
1501
1502 /* Apply FIR */
1503 fb_0 *= this->fir_coeff [0];
1504 fb_1 *= this->fir_coeff [0];
1505 893
1506 #define DO_PT( i )\
1507 fb_0 += fir_ptr [i] [0] * this->fir_coeff [i];\
1508 fb_1 += fir_ptr [i] [1] * this->fir_coeff [i];
1509
1510 DO_PT( 1 )
1511 DO_PT( 2 )
1512 DO_PT( 3 )
1513 DO_PT( 4 )
1514 DO_PT( 5 )
1515 DO_PT( 6 )
1516 DO_PT( 7 )
1517
1518 /* Generate output */
1519 int amp_0 = (chans_0 * global_vol_0 + fb_0 * this->r.g.echo_volume_0)
1520 >> global_muting;
1521 int amp_1 = (chans_1 * global_vol_1 + fb_1 * this->r.g.echo_volume_1)
1522 >> global_muting;
1523 out_buf [ 0] = amp_0;
1524 out_buf [WAV_CHUNK_SIZE] = amp_1;
1525 out_buf ++;
1526
1527 if ( !(this->r.g.flags & 0x20) )
1528 {
1529 /* Feedback into echo buffer */
1530 int e0 = (echo_0 >> 7) + ((fb_0 * this->r.g.echo_feedback) >> 14);
1531 int e1 = (echo_1 >> 7) + ((fb_1 * this->r.g.echo_feedback) >> 14);
1532 e0 = CLAMP16( e0 );
1533 SET_LE16A( echo_ptr , e0 );
1534 e1 = CLAMP16( e1 );
1535 SET_LE16A( echo_ptr + 2, e1 );
1536 }
1537 #endif /* CPU_* */
1538 #else /* SPCNOECHO == 1*/
1539 /* Generate output */
1540 int amp_0 = (chans_0 * global_vol_0) >> global_muting;
1541 int amp_1 = (chans_1 * global_vol_1) >> global_muting;
1542 out_buf [ 0] = amp_0; 894 out_buf [ 0] = amp_0;
1543 out_buf [WAV_CHUNK_SIZE] = amp_1; 895 out_buf [WAV_CHUNK_SIZE] = amp_1;
1544 out_buf ++; 896 out_buf ++;
1545 #endif /* SPCNOECHO */
1546 } 897 }
1547 while ( --count ); 898 while ( --count );
1548#if 0 899
1549 EXIT_TIMER(dsp); 900 EXIT_TIMER(dsp);
1550 ENTER_TIMER(cpu); 901 ENTER_TIMER(cpu);
1551#endif
1552} 902}
1553 903
1554void DSP_reset( struct Spc_Dsp* this ) 904void DSP_reset( struct Spc_Dsp* this )
@@ -1563,31 +913,22 @@ void DSP_reset( struct Spc_Dsp* this )
1563 913
1564 ci->memset( this->voice_state, 0, sizeof this->voice_state ); 914 ci->memset( this->voice_state, 0, sizeof this->voice_state );
1565 915
1566 int i; 916 for ( int i = VOICE_COUNT; --i >= 0; )
1567 for ( i = VOICE_COUNT; --i >= 0; )
1568 { 917 {
1569 struct voice_t* v = this->voice_state + i; 918 struct voice_t* v = this->voice_state + i;
1570 v->env_mode = state_release; 919 v->env_mode = state_release;
1571 v->addr = ram.ram; 920 v->wave.addr = ram.ram;
1572 } 921 }
1573 922
1574 #if SPC_BRRCACHE 923#if SPC_BRRCACHE
1575 this->oldsize = 0; 924 this->oldsize = 0;
1576 for ( i = 0; i < 256; i++ ) 925 for ( int i = 0; i < 256; i++ )
1577 this->wave_entry [i].start_addr = -1; 926 this->wave_entry [i].start_addr = -1;
1578 #endif 927#endif /* SPC_BRRCACHE */
1579 928
1580#if defined(CPU_COLDFIRE) 929#if !SPC_NOECHO
1581 this->fir_ptr = fir_buf; 930 echo_init(this);
1582 this->last_fir_ptr = &fir_buf [7]; 931#endif /* SPC_NOECHO */
1583 ci->memset( fir_buf, 0, sizeof fir_buf );
1584#elif defined (CPU_ARM)
1585 this->fir_ptr = fir_buf;
1586 ci->memset( fir_buf, 0, sizeof fir_buf );
1587#else
1588 this->fir_pos = 0;
1589 ci->memset( this->fir_buf, 0, sizeof this->fir_buf );
1590#endif
1591 932
1592 assert( offsetof (struct globals_t,unused9 [2]) == REGISTER_COUNT ); 933 assert( offsetof (struct globals_t,unused9 [2]) == REGISTER_COUNT );
1593 assert( sizeof (this->r.voice) == REGISTER_COUNT ); 934 assert( sizeof (this->r.voice) == REGISTER_COUNT );
diff --git a/lib/rbcodec/codecs/libspc/spc_dsp_generic.c b/lib/rbcodec/codecs/libspc/spc_dsp_generic.c
new file mode 100644
index 0000000000..60e79f8763
--- /dev/null
+++ b/lib/rbcodec/codecs/libspc/spc_dsp_generic.c
@@ -0,0 +1,211 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2006-2007 Adam Gashlin (hcs)
11 * Copyright (C) 2004-2007 Shay Green (blargg)
12 * Copyright (C) 2002 Brad Martin
13 *
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version 2
17 * of the License, or (at your option) any later version.
18 *
19 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
20 * KIND, either express or implied.
21 *
22 ****************************************************************************/
23static inline int apply_gen_envx( struct voice_t* voice, int output )
24{
25 return (output * voice->envx) >> 11;
26}
27
28static inline int apply_gen_volume( struct voice_t* voice, int output,
29 int* amp_0, int* amp_1 )
30{
31 *amp_0 = voice->volume [0] * output;
32 *amp_1 = voice->volume [1] * output;
33 return output;
34}
35
36static inline int apply_gen_amp( struct voice_t* voice, int output,
37 int* amp_0, int* amp_1)
38{
39 output = apply_gen_envx( voice, output );
40 output = apply_gen_volume( voice, output, amp_0, amp_1 );
41 return output;
42}
43
44#if !SPC_NOINTERP
45
46#ifndef SPC_GAUSSIAN_FAST_INTERP
47static inline int gaussian_fast_interp( int16_t const* samples,
48 int32_t position,
49 int16_t const* fwd,
50 int16_t const* rev )
51{
52 samples += position >> 12;
53 return (fwd [0] * samples [0] +
54 fwd [1] * samples [1] +
55 rev [1] * samples [2] +
56 rev [0] * samples [3]) >> 11;
57}
58#endif /* SPC_GAUSSIAN_FAST_INTERP */
59
60#ifndef SPC_GAUSSIAN_FAST_AMP
61#define gaussian_fast_amp apply_amp
62#endif /* SPC_GAUSSIAN_FAST_AMP */
63
64#ifndef SPC_GAUSSIAN_SLOW_INTERP
65static inline int gaussian_slow_interp( int16_t const* samples,
66 int32_t position,
67 int16_t const* fwd,
68 int16_t const* rev )
69{
70 int output;
71 samples += position >> 12;
72 output = (fwd [0] * samples [0]) & ~0xFFF;
73 output = (output + fwd [1] * samples [1]) & ~0xFFF;
74 output = (output + rev [1] * samples [2]) >> 12;
75 output = (int16_t) (output * 2);
76 output += ((rev [0] * samples [3]) >> 12) * 2;
77 return CLAMP16( output );
78}
79#endif /* SPC_GAUSSIAN_SLOW_INTERP */
80
81#ifndef SPC_GAUSSIAN_SLOW_AMP
82static inline int gaussian_slow_amp( struct voice_t* voice, int output,
83 int *amp_0, int *amp_1 )
84{
85 output = apply_gen_envx( voice, output ) & ~1;
86 output = apply_gen_volume( voice, output, amp_0, amp_1 );
87 return output;
88}
89#endif /* SPC_GAUSSIAN_SLOW_AMP */
90
91#define interp gaussian_slow_interp
92#define apply_amp gaussian_slow_amp
93
94#else /* SPC_NOINTERP */
95
96#ifndef SPC_LINEAR_INTERP
97static inline int linear_interp( int16_t const* samples, int32_t position )
98{
99 int32_t fraction = position & 0xfff;
100 int16_t const* pos = (samples + (position >> 12)) + 1;
101 return pos[0] + ((fraction * (pos[1] - pos[0])) >> 12);
102}
103#endif /* SPC_LINEAR_INTERP */
104
105#define interp( samp, pos, fwd, rev ) \
106 linear_interp( (samp), (pos) )
107
108#ifndef SPC_LINEAR_AMP
109#define linear_amp apply_gen_amp
110#endif /* SPC_LINEAR_AMP */
111
112#define apply_amp linear_amp
113#endif /* SPC_NOINTERP */
114
115
116#if !SPC_NOECHO
117
118#ifndef SPC_DSP_ECHO_APPLY
119/* Init FIR filter */
120static inline void echo_init( struct Spc_Dsp* this )
121{
122 this->fir.pos = 0;
123 ci->memset( this->fir.buf, 0, sizeof this->fir.buf );
124}
125
126/* Apply FIR filter */
127static inline void echo_apply(struct Spc_Dsp* this,
128 uint8_t* const echo_ptr, int* out_0, int* out_1)
129{
130 int fb_0 = GET_LE16SA( echo_ptr );
131 int fb_1 = GET_LE16SA( echo_ptr + 2 );
132
133 /* Keep last 8 samples */
134 int (* const fir_ptr) [2] = this->fir.buf + this->fir.pos;
135 this->fir.pos = (this->fir.pos + 1) & (FIR_BUF_HALF - 1);
136
137 fir_ptr [ 0] [0] = fb_0;
138 fir_ptr [ 0] [1] = fb_1;
139 /* duplicate at +8 eliminates wrap checking below */
140 fir_ptr [FIR_BUF_HALF] [0] = fb_0;
141 fir_ptr [FIR_BUF_HALF] [1] = fb_1;
142
143 fb_0 *= this->fir.coeff [0];
144 fb_1 *= this->fir.coeff [0];
145
146 #define DO_PT( i ) \
147 fb_0 += fir_ptr [i] [0] * this->fir.coeff [i]; \
148 fb_1 += fir_ptr [i] [1] * this->fir.coeff [i];
149
150 DO_PT( 1 )
151 DO_PT( 2 )
152 DO_PT( 3 )
153 DO_PT( 4 )
154 DO_PT( 5 )
155 DO_PT( 6 )
156 DO_PT( 7 )
157
158 #undef DO_PT
159
160 *out_0 = fb_0;
161 *out_1 = fb_1;
162}
163#endif /* SPC_DSP_ECHO_APPLY */
164
165#ifndef SPC_DSP_ECHO_FEEDBACK
166/* Feedback into echo buffer */
167static inline void echo_feedback( struct Spc_Dsp* this, uint8_t *echo_ptr,
168 int echo_0, int echo_1, int fb_0, int fb_1 )
169{
170 int e0 = (echo_0 >> 7) + ((fb_0 * this->r.g.echo_feedback) >> 14);
171 int e1 = (echo_1 >> 7) + ((fb_1 * this->r.g.echo_feedback) >> 14);
172 e0 = CLAMP16( e0 );
173 SET_LE16A( echo_ptr , e0 );
174 e1 = CLAMP16( e1 );
175 SET_LE16A( echo_ptr + 2, e1 );
176}
177#endif /* SPC_DSP_ECHO_FEEDBACK */
178
179#ifndef SPC_DSP_GENERATE_OUTPUT
180/* Generate final output */
181static inline void echo_output( struct Spc_Dsp* this, int global_muting,
182 int global_vol_0, int global_vol_1, int chans_0, int chans_1,
183 int fb_0, int fb_1, int* out_0, int* out_1 )
184{
185 *out_0 = (chans_0 * global_vol_0 + fb_0 * this->r.g.echo_volume_0)
186 >> global_muting;
187 *out_1 = (chans_1 * global_vol_1 + fb_1 * this->r.g.echo_volume_1)
188 >> global_muting;
189}
190#endif /* SPC_DSP_GENERATE_OUTPUT */
191
192#define mix_output echo_output
193
194#else /* SPC_NOECHO */
195
196#ifndef SPC_DSP_GENERATE_OUTPUT
197/* Generate final output */
198static inline void noecho_output( struct Spc_Dsp* this, int global_muting,
199 int global_vol_0, int global_vol_1, int chans_0, int chans_1,
200 int* out_0, int* out_1 )
201{
202 *out_0 = (chans_0 * global_vol_0) >> global_muting;
203 *out_1 = (chans_1 * global_vol_1) >> global_muting;
204 (void)this;
205}
206#endif /* SPC_DSP_GENERATE_OUTPUT */
207
208#define mix_output(this, gm, gv0, gv1, ch0, ch1, fb_0, fb_1, o0, o1) \
209 noecho_output( (this), (gm), (gv0), (gv1), (ch0), (ch1), (o0), (o1) )
210
211#endif /* !SPC_NOECHO */
diff --git a/lib/rbcodec/codecs/libspc/spc_dsp_generic.h b/lib/rbcodec/codecs/libspc/spc_dsp_generic.h
new file mode 100644
index 0000000000..beeb87deb2
--- /dev/null
+++ b/lib/rbcodec/codecs/libspc/spc_dsp_generic.h
@@ -0,0 +1,45 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2006-2007 Adam Gashlin (hcs)
11 * Copyright (C) 2004-2007 Shay Green (blargg)
12 * Copyright (C) 2002 Brad Martin
13 *
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version 2
17 * of the License, or (at your option) any later version.
18 *
19 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
20 * KIND, either express or implied.
21 *
22 ****************************************************************************/
23#if !SPC_NOECHO
24
25#ifndef SPC_DSP_ECHO_APPLY
26enum
27{
28 FIR_BUF_CNT = FIR_BUF_HALF * 2 * 2,
29 FIR_BUF_SIZE = FIR_BUF_CNT * sizeof ( int32_t ),
30 FIR_BUF_ALIGN = FIR_BUF_SIZE,
31 FIR_BUF_MASK = ~((FIR_BUF_ALIGN / 2) | (sizeof ( int32_t ) * 2 - 1))
32};
33
34/* Echo filter structure embedded in struct Spc_Dsp */
35struct echo_filter
36{
37 /* fir_buf [i + 8] == fir_buf [i], to avoid wrap checking in FIR code */
38 int pos; /* (0 to 7) */
39 int buf [FIR_BUF_HALF * 2] [2];
40 /* copy of echo FIR constants as int, for faster access */
41 int coeff [VOICE_COUNT];
42};
43#endif /* SPC_DSP_ECHO_APPLY */
44
45#endif /* !SPC_NOECHO */
diff --git a/lib/rbcodec/codecs/libspc/spc_emu.c b/lib/rbcodec/codecs/libspc/spc_emu.c
index 5ea5b0cdeb..dab4199ef0 100644
--- a/lib/rbcodec/codecs/libspc/spc_emu.c
+++ b/lib/rbcodec/codecs/libspc/spc_emu.c
@@ -32,8 +32,8 @@ struct cpu_ram_t ram IBSS_ATTR_SPC_LARGE_IRAM CACHEALIGN_ATTR;
32 32
33/**************** Timers ****************/ 33/**************** Timers ****************/
34 34
35static void Timer_run_( struct Timer* t, long time ) ICODE_ATTR_SPC; 35static void NO_INLINE ICODE_ATTR_SPC
36static void Timer_run_( struct Timer* t, long time ) 36Timer_run_( struct Timer* t, long time )
37{ 37{
38 /* when disabled, next_tick should always be in the future */ 38 /* when disabled, next_tick should always be in the future */
39 assert( t->enabled ); 39 assert( t->enabled );
@@ -60,7 +60,7 @@ static inline void Timer_run( struct Timer* t, long time )
60/**************** SPC emulator ****************/ 60/**************** SPC emulator ****************/
61/* 1.024 MHz clock / 32000 samples per second */ 61/* 1.024 MHz clock / 32000 samples per second */
62 62
63static void SPC_enable_rom( THIS, int enable ) 63static void NO_INLINE SPC_enable_rom( THIS, int enable )
64{ 64{
65 if ( this->rom_enabled != enable ) 65 if ( this->rom_enabled != enable )
66 { 66 {
@@ -186,8 +186,8 @@ int SPC_load_spc( THIS, const void* data, long size )
186} 186}
187 187
188/**************** DSP interaction ****************/ 188/**************** DSP interaction ****************/
189static void SPC_run_dsp_( THIS, long time ) ICODE_ATTR_SPC; 189static void NO_INLINE ICODE_ATTR_SPC
190static void SPC_run_dsp_( THIS, long time ) 190SPC_run_dsp_( THIS, long time )
191{ 191{
192 /* divide by CLOCKS_PER_SAMPLE */ 192 /* divide by CLOCKS_PER_SAMPLE */
193 int count = ((time - this->next_dsp) >> 5) + 1; 193 int count = ((time - this->next_dsp) >> 5) + 1;
@@ -383,13 +383,10 @@ int SPC_play( THIS, long count, int32_t* out )
383 } 383 }
384 384
385 /* Catch DSP up to present */ 385 /* Catch DSP up to present */
386#if 0
387 ENTER_TIMER(cpu); 386 ENTER_TIMER(cpu);
388#endif
389 SPC_run_dsp( this, -EXTRA_CLOCKS ); 387 SPC_run_dsp( this, -EXTRA_CLOCKS );
390#if 0
391 EXIT_TIMER(cpu); 388 EXIT_TIMER(cpu);
392#endif 389
393 assert( this->next_dsp == CLOCKS_PER_SAMPLE - EXTRA_CLOCKS ); 390 assert( this->next_dsp == CLOCKS_PER_SAMPLE - EXTRA_CLOCKS );
394 assert( this->sample_buf - out == count ); 391 assert( this->sample_buf - out == count );
395 392