summaryrefslogtreecommitdiff
path: root/lib/rbcodec/dsp/dsp_cf.S
diff options
context:
space:
mode:
Diffstat (limited to 'lib/rbcodec/dsp/dsp_cf.S')
-rw-r--r--lib/rbcodec/dsp/dsp_cf.S502
1 files changed, 305 insertions, 197 deletions
diff --git a/lib/rbcodec/dsp/dsp_cf.S b/lib/rbcodec/dsp/dsp_cf.S
index 15ec7eb383..c710df5177 100644
--- a/lib/rbcodec/dsp/dsp_cf.S
+++ b/lib/rbcodec/dsp/dsp_cf.S
@@ -19,23 +19,27 @@
19 * KIND, either express or implied. 19 * KIND, either express or implied.
20 * 20 *
21 ****************************************************************************/ 21 ****************************************************************************/
22#include "config.h"
22 23
23/**************************************************************************** 24/****************************************************************************
24 * void dsp_apply_gain(int count, struct dsp_data *data, int32_t *buf[]) 25 * void pga_process(struct dsp_proc_entry *this, struct dsp_buffer **buf_p)
25 */ 26 */
26 .section .text 27 .section .text
27 .align 2 28 .align 2
28 .global dsp_apply_gain 29 .global pga_process
29dsp_apply_gain: 30pga_process:
31 | input: 4(sp) = this, 8(sp) = buf_p
32 movem.l 4(%sp), %a0-%a1 | %a0 = this, %a1 = buf_p
33 move.l (%a0), %a0 | %a0 = this->data = &pga_data
34 move.l (%a0), %a0 | %a0 = data->gain
35 move.l (%a1), %a1 | %a1 = buf = *buf_p
30 lea.l -20(%sp), %sp | save registers 36 lea.l -20(%sp), %sp | save registers
31 movem.l %d2-%d4/%a2-%a3, (%sp) | 37 movem.l %d2-%d4/%a2-%a3, (%sp) |
32 movem.l 28(%sp), %a0-%a1 | %a0 = data, 38 clr.l %d1 | %d1 = buf->format.num_channels
33 | %a1 = buf 39 move.b 17(%a1), %d1 |
34 move.l 4(%a0), %d1 | %d1 = data->num_channels
35 move.l 32(%a0), %a0 | %a0 = data->gain (in s8.23)
3610: | channel loop | 4010: | channel loop |
37 move.l 24(%sp), %d0 | %d0 = count 41 move.l (%a1), %d0 | %d0 = buf->remcount
38 move.l -4(%a1, %d1.l*4), %a2 | %a2 = s = buf[ch-1] 42 move.l (%a1, %d1.l*4), %a2 | %a2 = s = buf->p32[ch-1]
39 move.l %a2, %a3 | %a3 = d = s 43 move.l %a2, %a3 | %a3 = d = s
40 move.l (%a2)+, %d2 | %d2 = *s++, 44 move.l (%a2)+, %d2 | %d2 = *s++,
41 mac.l %a0, %d2, (%a2)+, %d2, %acc0 | %acc0 = S(n)*gain, load S(n+1) 45 mac.l %a0, %d2, (%a2)+, %d2, %acc0 | %acc0 = S(n)*gain, load S(n+1)
@@ -61,25 +65,29 @@ dsp_apply_gain:
61 movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers 65 movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers
62 lea.l 20(%sp), %sp | cleanup stack 66 lea.l 20(%sp), %sp | cleanup stack
63 rts | 67 rts |
64 .size dsp_apply_gain,.-dsp_apply_gain 68 .size pga_process, .-pga_process
65 69
66/**************************************************************************** 70/****************************************************************************
67 * void apply_crossfeed(int count, int32_t *buf[]) 71 * void crossfeed_process(struct dsp_proc_entry *this,
72 * struct dsp_buffer **buf_p)
68 */ 73 */
69 .section .text 74 .section .text
70 .align 2 75 .align 2
71 .global apply_crossfeed 76 .global crossfeed_process
72apply_crossfeed: 77crossfeed_process:
78 | input: 4(sp) = this, 8(sp) = buf_p
73 lea.l -44(%sp), %sp | 79 lea.l -44(%sp), %sp |
74 movem.l %d2-%d7/%a2-%a6, (%sp) | save all regs 80 movem.l %d2-%d7/%a2-%a6, (%sp) | save all regs
75 movem.l 48(%sp), %d7/%a4 | %d7 = count, %a4 = src 81 movem.l 48(%sp), %a1/%a4 | %a1 = this, %a4 = buf_p
76 movem.l (%a4), %a4-%a5 | %a4 = src[0], %a5 = src[1] 82 move.l (%a4), %a4 | %a4 = buf = *buf_p
77 lea.l crossfeed_data, %a1 | %a1 = &crossfeed_data 83 movem.l (%a4), %d7/%a4-%a5 | %d7 = buf->remcount, %a4 = buf->p32[0],
84 | %a5 = buf->p32[1]
85 move.l (%a1), %a1 | %a1 = &crossfeed_state
78 move.l (%a1)+, %d6 | %d6 = direct gain 86 move.l (%a1)+, %d6 | %d6 = direct gain
79 movem.l 12(%a1), %d0-%d3 | fetch filter history samples 87 movem.l 12(%a1), %d0-%d3 | fetch filter history samples
80 move.l 132(%a1), %a0 | fetch delay line address 88 lea.l 132(%a1), %a6 | %a6 = delay line wrap limit
89 move.l (%a6), %a0 | fetch delay line address
81 movem.l (%a1), %a1-%a3 | load filter coefs 90 movem.l (%a1), %a1-%a3 | load filter coefs
82 lea.l crossfeed_data+136, %a6 | %a6 = delay line wrap limit
83 bra.b 20f | loop start | go to loop start point 91 bra.b 20f | loop start | go to loop start point
84 /* Register usage in loop: 92 /* Register usage in loop:
85 * %a0 = delay_p, %a1..%a3 = b0, b1, a1 (filter coefs), 93 * %a0 = delay_p, %a1..%a3 = b0, b1, a1 (filter coefs),
@@ -109,174 +117,181 @@ apply_crossfeed:
109 mac.l %d6, %d5, %acc1 | %acc1 += gain*x_r[n] 117 mac.l %d6, %d5, %acc1 | %acc1 += gain*x_r[n]
110 cmp.l %a6, %a0 | wrap %a0 if passed end 118 cmp.l %a6, %a0 | wrap %a0 if passed end
111 bhs.b 30f | wrap buffer | 119 bhs.b 30f | wrap buffer |
112 .word 0x51fb | tpf.l | trap the buffer wrap 120 tpf.l | trap the buffer wrap
11330: | wrap buffer | ...fwd taken branches more costly 12130: | wrap buffer | ...fwd taken branches more costly
114 lea.l -104(%a0), %a0 | wrap it up 122 lea.l -104(%a6), %a0 | wrap it up
115 subq.l #1, %d7 | --count > 0 ? 123 subq.l #1, %d7 | --count > 0 ?
116 bgt.b 10b | loop | yes? do more 124 bgt.b 10b | loop | yes? do more
117 movclr.l %acc0, %d4 | write last outputs 125 movclr.l %acc0, %d4 | write last outputs
118 move.l %d4, (%a4) | . 126 move.l %d4, (%a4) | .
119 movclr.l %acc1, %d5 | . 127 movclr.l %acc1, %d5 | .
120 move.l %d5, (%a5) | . 128 move.l %d5, (%a5) | .
121 lea.l crossfeed_data+16, %a1 | save data back to struct 129 movem.l %d0-%d3, -120(%a6) | ...history
122 movem.l %d0-%d3, (%a1) | ...history 130 move.l %a0, (%a6) | ...delay_p
123 move.l %a0, 120(%a1) | ...delay_p
124 movem.l (%sp), %d2-%d7/%a2-%a6 | restore all regs 131 movem.l (%sp), %d2-%d7/%a2-%a6 | restore all regs
125 lea.l 44(%sp), %sp | 132 lea.l 44(%sp), %sp |
126 rts | 133 rts |
127 .size apply_crossfeed,.-apply_crossfeed 134 .size crossfeed_process,.-crossfeed_process
128 135
129/**************************************************************************** 136/****************************************************************************
130 * int dsp_downsample(int count, struct dsp_data *data, 137 * int lin_resample_resample(struct resample_data *data,
131 * in32_t *src[], int32_t *dst[]) 138 * struct dsp_buffer *src,
139 * struct dsp_buffer *dst)
132 */ 140 */
133 .section .text 141 .section .text
134 .align 2 142 .align 2
135 .global dsp_downsample 143 .global lin_resample_resample
136dsp_downsample: 144lin_resample_resample:
137 lea.l -40(%sp), %sp | save non-clobberables 145 | input: 4(sp) = data, 8(sp) = src, 12(sp) = dst
138 movem.l %d2-%d7/%a2-%a5, (%sp) | 146 lea.l -44(%sp), %sp | save non-volatiles
139 movem.l 44(%sp), %d2/%a0-%a2 | %d2 = count 147 movem.l %d2-%d7/%a2-%a6, (%sp) |
140 | %a0 = data 148 movem.l 48(%sp), %a0-%a2 | %a0 = data
141 | %a1 = src 149 | %a1 = src
142 | %a2 = dst 150 | %a2 = dst
143 movem.l 4(%a0), %d3-%d4 | %d3 = ch = data->num_channels 151 clr.l %d1 | %d1 = ch = src->format.num_channels
144 | %d4 = delta = data->resample_data.delta 152 move.b 17(%a1), %d1 |
145 moveq.l #16, %d7 | %d7 = shift 153 moveq.l #16, %d7 | %d7 = shift
14610: | channel loop | 154.lrs_channel_loop: |
147 move.l 12(%a0), %d5 | %d5 = phase = data->resample_data.phase 155 movem.l (%a0), %d2-%d3 | %d2 = delta = data->delta,
148 move.l -4(%a1, %d3.l*4), %a3 | %a3 = s = src[ch-1] 156 | %d3 = phase = data->phase
149 move.l -4(%a2, %d3.l*4), %a4 | %a4 = d = dst[ch-1] 157 move.l (%a1), %d4 | %d4 = srcrem = src->remcount
150 lea.l 12(%a0, %d3.l*4), %a5 | %a5 = &data->resample_data.ast_sample[ch-1] 158 move.l 12(%a2), %d5 | %d5 = dstrem = dst->bufcount
151 move.l (%a5), %d0 | %d0 = last = data->resample_data.last_sample[ch-1] 159 cmp.l #0x8000, %d4 | %d4 = MIN(srcrem, 0x8000)
152 move.l -4(%a3, %d2.l*4), (%a5) | data->resample_data.last_sample[ch-1] = s[count-1] 160 ble.b 10f |
153 move.l %d5, %d6 | %d6 = pos = phase >> 16 161 move.l #0x8000, %d4 |
154 lsr.l %d7, %d6 | 16210: |
155 cmp.l %d2, %d6 | past end of samples? 163 move.l (%a1, %d1.l*4), %a3 | %a3 = s = src->p32[ch]
156 bge.b 40f | skip resample loop| yes? skip loop 164 move.l (%a2, %d1.l*4), %a4 | %a4 = d = dst->p32[ch]
157 tst.l %d6 | need last sample of prev. frame? 165 move.l %d3, %d0 | %d0 = pos
158 bne.b 20f | resample loop | no? start main loop 166 lsr.l %d7, %d0 | ...
159 move.l (%a3, %d6.l*4), %d1 | %d1 = s[pos] 167 beq.b 11f | pos == 0?
160 bra.b 30f | resample start last | start with last (last in %d0) 168 cmp.l %d4, %d0 | pos = MIN(pos, srcrem)
16120: | resample loop | 169 blt.b 12f |
162 lea.l -4(%a3, %d6.l*4), %a5 | load s[pos-1] and s[pos] 170 move.l %d4, %d0 | pos = srcrem
163 movem.l (%a5), %d0-%d1 | 171 move.l -4(%a3, %d0.l*4), %d6 | %d6 = last = s[pos - 1]
16430: | resample start last | 172 bra.w .lrs_channel_complete | at limit; nothing to do but next
165 sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1] 17311: |
166 move.l %d0, %acc0 | %acc0 = previous sample 174 move.l 4(%a0, %d1.l*4), %d6 | %d6 = last = last_sample[ch]
167 move.l %d5, %d0 | frac = (phase << 16) >> 1 175 tpf.l | trap next move.l (last = s[pos - 1])
17612: |
177 move.l -4(%a3, %d0.l*4), %d6 | %d6 = last = s[pos - 1]
178 cmp.l #0x10000, %d2 | delta >= 1.0?
179 bhs.b .lrs_downsample | yes? downsampling
180 |
181 /** Upsampling **/ |
182 lea.l (%a3, %d0.l*4), %a3 | %a3 = &s[pos]
183 sub.l %d4, %d0 | %d0 = pos - srcrem = -dte
184 lsl.l %d7, %d2 | move delta to bits 30..15
185 lsr.l #1, %d2 |
186 lsl.l %d7, %d3 | move phase to bits 30..15
187 lsr.l #1, %d3 |
188 move.l (%a3)+, %a5 | %a5 = s[pos]
189 move.l %a5, %a6 | %a6 = diff = s[pos] - last
190 sub.l %d6, %a6 |
191 bra.b 22f |
192 /* Funky loop structure is to avoid emac latency stalls */
19320: |
194 move.l (%a3)+, %a5 | %a5 = s[pos]
195 move.l %a5, %a6 | %a6 = diff = s[pos] - last
196 sub.l %d6, %a6 |
19721: |
198 movclr.l %acc0, %d7 | *d++ = %d7 = result
199 move.l %d7, (%a4)+ |
20022: |
201 move.l %d6, %acc0 | %acc0 = last
202 mac.l %d3, %a6, %acc0 | %acc0 += frac * diff
203 subq.l #1, %d5 | dstrem <= 0?
204 ble.b 23f | yes? stop
205 add.l %d2, %d3 | phase += delta
206 bpl.b 21b | load next values?
207 move.l %a5, %d6 |
208 bclr.l #31, %d3 | clear sign bit
209 addq.l #1, %d0 | dte > 0?
210 bmi.b 20b | yes? continue resampling
211 tpf.w | trap next add.l (phase += delta)
21223: |
213 add.l %d2, %d3 | phase += delta
214 lsl.l #1, %d3 | frac -> phase
215 bcs.b 24f | was sign bit set?
216 tpf.l |
21724: |
218 move.l %a5, %d6 | yes? was going to move to new s[pos]
219 addq.l #1, %d0 |
220 movclr.l %acc0, %d7 | *d = %d7 = result
221 move.l %d7, (%a4) |
222 add.l %d4, %d0 | %d0 = -dte + srcrem = pos
223 or.l %d0, %d3 | restore phase
224 swap.w %d3 |
225 moveq.l #16, %d7 | %d7 = shift
226 bra.b .lrs_channel_complete |
227 |
228 /** Downsampling **/ |
229.lrs_downsample: |
230 move.l (%a3, %d0.l*4), %a5 | %a5 = s[pos]
231 bra.b 31f |
23230: |
233 lea.l -4(%a3, %d0.l*4), %a5 | %d6 = s[pos - 1], %a5 = s[pos]
234 movem.l (%a5), %d6/%a5 |
23531: |
236 move.l %d6, %acc0 | %acc0 = last
237 sub.l %d6, %a5 | %a5 = diff = s[pos] - s[pos - 1]
238 move.l %d3, %d0 | frac = (phase << 16) >> 1
168 lsl.l %d7, %d0 | 239 lsl.l %d7, %d0 |
169 lsr.l #1, %d0 | 240 lsr.l #1, %d0 |
170 mac.l %d0, %d1, %acc0 | %acc0 += frac * diff 241 mac.l %d0, %a5, %acc0 | %acc0 += frac * diff
171 add.l %d4, %d5 | phase += delta 242 add.l %d2, %d3 | phase += delta
172 move.l %d5, %d6 | pos = phase >> 16 243 move.l %d3, %d0 | pos = phase >> 16
173 lsr.l %d7, %d6 | 244 lsr.l %d7, %d0 |
174 movclr.l %acc0, %d0 | 245 movclr.l %acc0, %a5 |
175 move.l %d0, (%a4)+ | *d++ = %d0 246 move.l %a5, (%a4)+ | *d++ = %d0
176 cmp.l %d2, %d6 | pos < count? 247 subq.l #1, %d5 | dst full?
177 blt.b 20b | resample loop | yes? continue resampling 248 ble.b 32f | yes? stop
17840: | skip resample loop | 249 cmp.l %d4, %d0 | pos < srcrem?
179 subq.l #1, %d3 | ch > 0? 250 blt.b 30b | yes? continue resampling
180 bgt.b 10b | channel loop | yes? process next channel 251 tpf.l | trap cmp.l and ble.b
181 lsl.l %d7, %d2 | wrap phase to start of next frame 25232: |
182 sub.l %d2, %d5 | data->resample_data.phase = 253 cmp.l %d4, %d0 | pos = MIN(pos, srcrem)
183 move.l %d5, 12(%a0) | ... phase - (count << 16) 254 ble.b 33f |
184 move.l %a4, %d0 | return d - d[0] 255 move.l %d4, %d0 |
185 sub.l (%a2), %d0 | 25633: |
186 asr.l #2, %d0 | convert bytes->samples 257 move.l -4(%a3, %d0.l*4), %d6 | %d6 = s[pos - 1]
187 movem.l (%sp), %d2-%d7/%a2-%a5 | restore non-clobberables 258 |
188 lea.l 40(%sp), %sp | cleanup stack 259.lrs_channel_complete: |
260 move.l %d6, 4(%a0, %d1.l*4) | last_sample[ch] = last
261 subq.l #1, %d1 | ch > 0?
262 bgt.w .lrs_channel_loop | yes? process next channel
263 |
264 move.l 12(%a2), %d1 | %d1 = dst->bufcount
265 sub.l %d5, %d1 | written = dst->bufcount - dstrem
266 move.l %d1, (%a2) | dst->remcount = written
267 move.l %d0, %d1 | wrap phase to position in next frame
268 lsl.l %d7, %d1 | data->phase = phase - (pos << 16)
269 sub.l %d1, %d3 | ...
270 move.l %d3, 4(%a0) | ...
271 movem.l (%sp), %d2-%d7/%a2-%a6 | restore non-volatiles
272 lea.l 44(%sp), %sp | cleanup stack
189 rts | buh-bye 273 rts | buh-bye
190 .size dsp_downsample,.-dsp_downsample
191 274
192/**************************************************************************** 275 .size lin_resample_resample, .-lin_resample_resample
193 * int dsp_upsample(int count, struct dsp_data *dsp, 276
194 * const int32_t *src[], int32_t *dst[])
195 */
196 .section .text
197 .align 2
198 .global dsp_upsample
199dsp_upsample:
200 lea.l -40(%sp), %sp | save non-clobberables
201 movem.l %d2-%d7/%a2-%a5, (%sp) |
202 movem.l 44(%sp), %d2/%a0-%a2 | %d2 = count
203 | %a0 = data
204 | %a1 = src
205 | %a2 = dst
206 movem.l 4(%a0), %d3-%d4 | %d3 = ch = channels
207 | %d4 = delta = data->resample_data.delta
208 swap %d4 | swap delta to high word to use...
209 | ...carries to increment position
21010: | channel loop |
211 move.l 12(%a0), %d5 | %d5 = phase = data->resample_data.phase
212 move.l -4(%a1, %d3.l*4), %a3 | %a3 = s = src[ch-1]
213 lea.l 12(%a0, %d3.l*4), %a4 | %a4 = &data->resample_data.last_sample[ch-1]
214 lea.l -4(%a3, %d2.l*4), %a5 | %a5 = src_end = &src[count-1]
215 move.l (%a4), %d0 | %d0 = last = data->resample_data.last_sample[ch-1]
216 move.l (%a5), (%a4) | data->resample_data.last_sample[ch-1] = s[count-1]
217 move.l -4(%a2, %d3.l*4), %a4 | %a4 = d = dst[ch-1]
218 move.l (%a3)+, %d1 | fetch first sample - might throw this...
219 | ...away later but we'll be preincremented
220 move.l %d1, %d6 | save sample value
221 sub.l %d0, %d1 | %d1 = diff = s[0] - last
222 swap %d5 | swap phase to high word to use
223 | carries to increment position
224 move.l %d5, %d7 | %d7 = pos = phase >> 16
225 clr.w %d5 |
226 eor.l %d5, %d7 | pos == 0?
227 beq.b 40f | loop start | yes? start loop
228 cmp.l %d2, %d7 | past end of samples?
229 bge.b 50f | skip resample loop| yes? go to next channel and collect info
230 lea.l (%a3, %d7.l*4), %a3 | %a3 = s = &s[pos+1]
231 movem.l -8(%a3), %d0-%d1 | %d0 = s[pos-1], %d1 = s[pos]
232 move.l %d1, %d6 | save sample value
233 sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1]
234 bra.b 40f | loop start |
23520: | next sample loop |
236 move.l %d6, %d0 | move previous sample to %d0
237 move.l (%a3)+, %d1 | fetch next sample
238 move.l %d1, %d6 | save sample value
239 sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1]
24030: | same sample loop |
241 movclr.l %acc0, %d7 | %d7 = result
242 move.l %d7, (%a4)+ | *d++ = %d7
24340: | loop start |
244 lsr.l #1, %d5 | make phase into frac
245 move.l %d0, %acc0 | %acc0 = s[pos-1]
246 mac.l %d1, %d5, %acc0 | %acc0 = diff * frac
247 lsl.l #1, %d5 | restore frac to phase
248 add.l %d4, %d5 | phase += delta
249 bcc.b 30b | same sample loop | load next values?
250 cmp.l %a5, %a3 | src <= src_end?
251 bls.b 20b | next sample loop | yes? continue resampling
252 movclr.l %acc0, %d7 | %d7 = result
253 move.l %d7, (%a4)+ | *d++ = %d7
25450: | skip resample loop |
255 subq.l #1, %d3 | ch > 0?
256 bgt.b 10b | channel loop | yes? process next channel
257 swap %d5 | wrap phase to start of next frame
258 move.l %d5, 12(%a0) | ...and save in data->resample_data.phase
259 move.l %a4, %d0 | return d - d[0]
260 sub.l (%a2), %d0 |
261 movem.l (%sp), %d2-%d7/%a2-%a5 | restore non-clobberables
262 asr.l #2, %d0 | convert bytes->samples
263 lea.l 40(%sp), %sp | cleanup stack
264 rts | buh-bye
265 .size dsp_upsample,.-dsp_upsample
266 277
267/**************************************************************************** 278/****************************************************************************
268 * void channels_process_sound_chan_mono(int count, int32_t *buf[]) 279 * void channel_mode_proc_mono(struct dsp_proc_entry *this,
280 * struct dsp_buffer **buf_p)
269 * 281 *
270 * Mix left and right channels 50/50 into a center channel. 282 * Mix left and right channels 50/50 into a center channel.
271 */ 283 */
272 .section .text 284 .section .text
273 .align 2 285 .align 2
274 .global channels_process_sound_chan_mono 286 .global channel_mode_proc_mono
275channels_process_sound_chan_mono: 287channel_mode_proc_mono:
276 movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf 288 | input: 4(sp) = this, 8(sp) = buf_p
289 move.l 8(%sp), %a0 | %a0 = buf_p
290 move.l (%a0), %a0 | %a0 = buf = *buf_p
277 lea.l -20(%sp), %sp | save registers 291 lea.l -20(%sp), %sp | save registers
278 movem.l %d2-%d4/%a2-%a3, (%sp) | 292 movem.l %d2-%d4/%a2-%a3, (%sp) |
279 movem.l (%a0), %a0-%a1 | get channel pointers 293 movem.l (%a0), %d0/%a0-%a1 | %d0 = buf->remcount, %a0 = buf->p32[0],
294 | %a1 = buf->p32[1]
280 move.l %a0, %a2 | use separate dst pointers since read 295 move.l %a0, %a2 | use separate dst pointers since read
281 move.l %a1, %a3 | pointers run one ahead of write 296 move.l %a1, %a3 | pointers run one ahead of write
282 move.l #0x40000000, %d3 | %d3 = 0.5 297 move.l #0x40000000, %d3 | %d3 = 0.5
@@ -301,26 +316,29 @@ channels_process_sound_chan_mono:
301 movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers 316 movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers
302 lea.l 20(%sp), %sp | cleanup 317 lea.l 20(%sp), %sp | cleanup
303 rts | 318 rts |
304 .size channels_process_sound_chan_mono, \ 319 .size channel_mode_proc_mono, .-channel_mode_proc_mono
305 .-channels_process_sound_chan_mono
306 320
307/**************************************************************************** 321/****************************************************************************
308 * void channels_process_sound_chan_custom(int count, int32_t *buf[]) 322 * void channel_mode_proc_custom(struct dsp_proc_entry *this,
323 * struct dsp_buffer **buf_p)
309 * 324 *
310 * Apply stereo width (narrowing/expanding) effect. 325 * Apply stereo width (narrowing/expanding) effect.
311 */ 326 */
312 .section .text 327 .section .text
313 .align 2 328 .align 2
314 .global channels_process_sound_chan_custom 329 .global channel_mode_proc_custom
315channels_process_sound_chan_custom: 330channel_mode_proc_custom:
316 movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf 331 | input: 4(sp) = this, 8(sp) = buf_p
317 lea.l -28(%sp), %sp | save registers 332 lea.l -28(%sp), %sp | save registers
318 movem.l %d2-%d6/%a2-%a3, (%sp) | 333 movem.l %d2-%d6/%a2-%a3, (%sp) |
319 movem.l (%a0), %a0-%a1 | get channel pointers 334 movem.l 32(%sp), %a0-%a1 | %a0 = this, %a1 = buf_p
335 move.l (%a1), %a1 | %a1 = buf = *buf_p
336 move.l (%a0), %a2 | %a2 = this->data = &channel_mode_data
337 movem.l (%a1), %d0/%a0-%a1 | %d0 = buf->remcount, %a0 = buf->p32[0],
338 | %a1 = buf->p32[1]
339 movem.l (%a2), %d3-%d4 | %d3 = sw_gain, %d4 = sw_cross
320 move.l %a0, %a2 | use separate dst pointers since read 340 move.l %a0, %a2 | use separate dst pointers since read
321 move.l %a1, %a3 | pointers run one ahead of write 341 move.l %a1, %a3 | pointers run one ahead of write
322 move.l dsp_sw_gain, %d3 | load straight (mid) gain
323 move.l dsp_sw_cross, %d4 | load cross (side) gain
324 move.l (%a0)+, %d1 | prime the input registers 342 move.l (%a0)+, %d1 | prime the input registers
325 move.l (%a1)+, %d2 | 343 move.l (%a1)+, %d2 |
326 mac.l %d1, %d3 , %acc0 | L = l*gain + r*cross 344 mac.l %d1, %d3 , %acc0 | L = l*gain + r*cross
@@ -348,22 +366,25 @@ channels_process_sound_chan_custom:
348 movem.l (%sp), %d2-%d6/%a2-%a3 | restore registers 366 movem.l (%sp), %d2-%d6/%a2-%a3 | restore registers
349 lea.l 28(%sp), %sp | cleanup 367 lea.l 28(%sp), %sp | cleanup
350 rts | 368 rts |
351 .size channels_process_sound_chan_custom, \ 369 .size channel_mode_proc_custom, .-channel_mode_proc_custom
352 .-channels_process_sound_chan_custom
353 370
354/**************************************************************************** 371/****************************************************************************
355 * void channels_process_sound_chan_karaoke(int count, int32_t *buf[]) 372 * void channel_mode_proc_karaoke(struct dsp_proc_entry *this,
373 * struct dsp_buffer **buf_p)
356 * 374 *
357 * Separate channels into side channels. 375 * Separate channels into side channels.
358 */ 376 */
359 .section .text 377 .section .text
360 .align 2 378 .align 2
361 .global channels_process_sound_chan_karaoke 379 .global channel_mode_proc_karaoke
362channels_process_sound_chan_karaoke: 380channel_mode_proc_karaoke:
363 movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf 381 | input: 4(sp) = this, 8(sp) = buf_p
382 move.l 8(%sp), %a0 | %a0 = buf_p
383 move.l (%a0), %a0 | %a0 = buf = *buf_p
364 lea.l -20(%sp), %sp | save registers 384 lea.l -20(%sp), %sp | save registers
365 movem.l %d2-%d4/%a2-%a3, (%sp) | 385 movem.l %d2-%d4/%a2-%a3, (%sp) |
366 movem.l (%a0), %a0-%a1 | get channel src pointers 386 movem.l (%a0), %d0/%a0-%a1 | %d0 = buf->remcount, %a0 = buf->p32[0],
387 | %a1 = buf->p32[1]
367 move.l %a0, %a2 | use separate dst pointers since read 388 move.l %a0, %a2 | use separate dst pointers since read
368 move.l %a1, %a3 | pointers run one ahead of write 389 move.l %a1, %a3 | pointers run one ahead of write
369 move.l #0x40000000, %d3 | %d3 = 0.5 390 move.l #0x40000000, %d3 | %d3 = 0.5
@@ -390,12 +411,90 @@ channels_process_sound_chan_karaoke:
390 movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers 411 movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers
391 lea.l 20(%sp), %sp | cleanup 412 lea.l 20(%sp), %sp | cleanup
392 rts | 413 rts |
393 .size channels_process_sound_chan_karaoke, \ 414 .size channel_mode_proc_karaoke, .-channel_mode_proc_karaoke
394 .-channels_process_sound_chan_karaoke 415
416/****************************************************************************
417 * void filter_process(struct dsp_filter *f, int32_t *buf[], int count,
418 * unsigned int channels)
419 *
420 * define HIGH_PRECISION as '1' to make filtering calculate lower bits after
421 * shifting. without this, "shift" - 1 of the lower bits will be lost here.
422 */
423#define HIGH_PRECISION 0
424 .text
425 .global filter_process
426filter_process:
427 | input: 4(sp) = f, 8(sp) = buf, 12(sp) = count, 16(sp) = channels
428 lea.l -44(%sp), %sp | save clobbered regs
429#if HIGH_PRECISION
430 movem.l %d2-%d7/%a2-%a6, (%sp) | .
431#else
432 movem.l %d2-%d6/%a2-%a6, (%sp) |
433#endif
434 move.l 48(%sp), %a5 | fetch filter structure address
435 clr.l %d6 | load shift count
436 move.b 52(%a5), %d6 | .
437 subq.l #1, %d6 | EMAC gives us one free shift
438#if HIGH_PRECISION
439 moveq.l #8, %d7
440 sub.l %d6, %d7 | shift for lower part of accumulator
441#endif
442 movem.l (%a5), %a0-%a4 | load coefs
443 lea.l 20(%a5), %a5 | point to filter history
444
44510: | channel loop
446 move.l 52(%sp), %a6 | load input channel pointer
447 addq.l #4, 52(%sp) | point x to next channel
448 move.l (%a6), %a6 |
449 move.l 56(%sp), %d5 | number of samples
450 movem.l (%a5), %d0-%d3 | load filter history
451
452 | d0-d3 = history, d4 = temp, d5 = sample count, d6 = upper shift amount,
453 | d7 = lower shift amount,a0-a4 = coefs, a5 = history pointer, a6 = buf[ch]
45420: | loop
455 | Direct form 1 filtering code. We assume DSP has put EMAC in frac mode.
456 | y[n] = b0*x[i] + b1*x[i - 1] + b2*x[i - 2] + a1*y[i - 1] + a2*y[i - 2],
457 | where y[] is output and x[] is input. This is performed out of order
458 | to do parallel load of input value.
459 mac.l %a2, %d1, %acc0 | acc = b2*x[i - 2]
460 move.l %d0, %d1 | fix input history
461 mac.l %a1, %d0, (%a6), %d0, %acc0 | acc += b1*x[i - 1], x[i] -> d0
462 mac.l %a0, %d0, %acc0 | acc += b0*x[i]
463 mac.l %a3, %d2, %acc0 | acc += a1*y[i - 1]
464 mac.l %a4, %d3, %acc0 | acc += a2*y[i - 2]
465 move.l %d2, %d3 | fix output history
466#if HIGH_PRECISION
467 move.l %accext01, %d2 | fetch lower part of accumulator
468 move.b %d2, %d4 | clear upper three bytes
469 lsr.l %d7, %d4 | shift lower bits
470#endif
471 movclr.l %acc0, %d2 | fetch upper part of result
472 asl.l %d6, %d2 | restore fixed point format
473#if HIGH_PRECISION
474 or.l %d2, %d4 | combine lower and upper parts
475#endif
476 move.l %d2, (%a6)+ | save result
477 subq.l #1, %d5 | are we done with this channel?
478 bgt 20b | loop
479
480 movem.l %d0-%d3, (%a5) | save history back to struct
481 lea.l 16(%a5), %a5 | point to next channel's history
482 subq.l #1, 60(%sp) | have we processed both channels?
483 bhi 10b | channel loop
484
485#if HIGH_PRECISION
486 movem.l (%sp), %d2-%d7/%a2-%a6
487#else
488 movem.l (%sp), %d2-%d6/%a2-%a6
489#endif
490 lea.l 44(%sp), %sp
491 rts
492 .size filter_process, .-filter_process
395 493
396/**************************************************************************** 494/****************************************************************************
397 * void sample_output_stereo(int count, struct dsp_data *data, 495 * void sample_output_stereo(struct sample_io_data *this,
398 * const int32_t *src[], int16_t *dst) 496 * struct dsp_buffer *src,
497 * struct dsp_buffer *dst)
399 * 498 *
400 * Framework based on the ubiquitous Rockbox line transfer logic for 499 * Framework based on the ubiquitous Rockbox line transfer logic for
401 * Coldfire CPUs. 500 * Coldfire CPUs.
@@ -417,20 +516,24 @@ channels_process_sound_chan_karaoke:
417 .align 2 516 .align 2
418 .global sample_output_stereo 517 .global sample_output_stereo
419sample_output_stereo: 518sample_output_stereo:
519 | input: 4(sp) = count, 8(sp) = src, 12(sp) = dst
420 lea.l -48(%sp), %sp | save registers 520 lea.l -48(%sp), %sp | save registers
421 move.l %macsr, %d1 | do it now as at many lines will 521 move.l %macsr, %d1 | do it now as at many lines will
422 movem.l %d1-%d7/%a2-%a6, (%sp) | be the far more common condition 522 movem.l %d1-%d7/%a2-%a6, (%sp) | be the far more common condition
423 move.l #0x80, %macsr | put emac unit in signed int mode 523 move.l #0x80, %macsr | put emac unit in signed int mode
424 movem.l 52(%sp), %a0-%a2/%a4 | 524 movem.l 52(%sp), %a0-%a2 | %a0 = this, %a1 = src, %a2 = dst
425 lea.l (%a4, %a0.l*4), %a0 | %a0 = end address 525 move.l (%a0), %a0 | %a0 = this->outcount
426 move.l (%a1), %d1 | %a1 = multiplier: (1 << (16 - scale)) 526 move.l 4(%a2), %a4 | %a4 = dst->p16out
527 lea.l (%a4, %a0.l*4), %a0 | %a0 = count -> end address
528 movem.l 4(%a1), %a2-%a3 | %a2 = src->p32[0], %a3 = src->p32[1]
529 clr.l %d1 | %a1 = multiplier: (1 << (16 - scale))
530 move.b 19(%a1), %d1 | %d1 = src->format.output_scale
427 sub.l #16, %d1 | 531 sub.l #16, %d1 |
428 neg.l %d1 | 532 neg.l %d1 |
429 moveq.l #1, %d0 | 533 moveq.l #1, %d0 |
430 asl.l %d1, %d0 | 534 asl.l %d1, %d0 |
431 move.l %d0, %a1 | 535 move.l %d0, %a1 |
432 move.l #0x8000, %a6 | %a6 = rounding term 536 move.l #0x8000, %a6 | %a6 = rounding term
433 movem.l (%a2), %a2-%a3 | get L/R channel pointers
434 moveq.l #28, %d0 | %d0 = second line bound 537 moveq.l #28, %d0 | %d0 = second line bound
435 add.l %a4, %d0 | 538 add.l %a4, %d0 |
436 and.l #0xfffffff0, %d0 | 539 and.l #0xfffffff0, %d0 |
@@ -447,7 +550,7 @@ sample_output_stereo:
447 mac.l %d2, %a1, %acc1 | shift R to high word 550 mac.l %d2, %a1, %acc1 | shift R to high word
448 movclr.l %acc0, %d1 | get possibly saturated results 551 movclr.l %acc0, %d1 | get possibly saturated results
449 movclr.l %acc1, %d2 | 552 movclr.l %acc1, %d2 |
450 swap %d2 | move R to low word 553 swap.w %d2 | move R to low word
451 move.w %d2, %d1 | interleave MS 16 bits of each 554 move.w %d2, %d1 | interleave MS 16 bits of each
452 move.l %d1, (%a4)+ | ...and write both 555 move.l %d1, (%a4)+ | ...and write both
453 cmp.l %a4, %d0 | 556 cmp.l %a4, %d0 |
@@ -477,10 +580,10 @@ sample_output_stereo:
477 mac.l %d1, %a1, (%a2)+, %d2, %acc1 | with saturation 580 mac.l %d1, %a1, (%a2)+, %d2, %acc1 | with saturation
478 mac.l %d2, %a1, (%a2)+, %d3, %acc2 | 581 mac.l %d2, %a1, (%a2)+, %d3, %acc2 |
479 mac.l %d3, %a1 , %acc3 | 582 mac.l %d3, %a1 , %acc3 |
480 swap %d4 | a) interleave most significant... 583 swap.w %d4 | a) interleave most significant...
481 swap %d5 | 584 swap.w %d5 |
482 swap %d6 | 585 swap.w %d6 |
483 swap %d7 | 586 swap.w %d7 |
484 movclr.l %acc0, %d0 | obtain L results 587 movclr.l %acc0, %d0 | obtain L results
485 movclr.l %acc1, %d1 | 588 movclr.l %acc1, %d1 |
486 movclr.l %acc2, %d2 | 589 movclr.l %acc2, %d2 |
@@ -503,7 +606,7 @@ sample_output_stereo:
503 mac.l %d2, %a1, %acc1 | 606 mac.l %d2, %a1, %acc1 |
504 movclr.l %acc0, %d1 | 607 movclr.l %acc0, %d1 |
505 movclr.l %acc1, %d2 | 608 movclr.l %acc1, %d2 |
506 swap %d2 | 609 swap.w %d2 |
507 move.w %d2, %d1 | 610 move.w %d2, %d1 |
508 move.l %d1, (%a4)+ | 611 move.l %d1, (%a4)+ |
509 cmp.l %a4, %a0 | 612 cmp.l %a4, %a0 |
@@ -516,8 +619,9 @@ sample_output_stereo:
516 .size sample_output_stereo, .-sample_output_stereo 619 .size sample_output_stereo, .-sample_output_stereo
517 620
518/**************************************************************************** 621/****************************************************************************
519 * void sample_output_mono(int count, struct dsp_data *data, 622 * void sample_output_mono(struct sample_io_data *this,
520 * const int32_t *src[], int16_t *dst) 623 * struct dsp_buffer *src,
624 * struct dsp_buffer *dst)
521 * 625 *
522 * Same treatment as sample_output_stereo but for one channel. 626 * Same treatment as sample_output_stereo but for one channel.
523 */ 627 */
@@ -525,19 +629,23 @@ sample_output_stereo:
525 .align 2 629 .align 2
526 .global sample_output_mono 630 .global sample_output_mono
527sample_output_mono: 631sample_output_mono:
632 | input: 4(sp) = count, 8(sp) = src, 12(sp) = dst
528 lea.l -32(%sp), %sp | save registers 633 lea.l -32(%sp), %sp | save registers
529 move.l %macsr, %d1 | do it now as at many lines will 634 move.l %macsr, %d1 | do it now as at many lines will
530 movem.l %d1-%d5/%a2-%a4, (%sp) | be the far more common condition 635 movem.l %d1-%d5/%a2-%a4, (%sp) | be the far more common condition
531 move.l #0x80, %macsr | put emac unit in signed int mode 636 move.l #0x80, %macsr | put emac unit in signed int mode
532 movem.l 36(%sp), %a0-%a3 | 637 movem.l 36(%sp), %a0-%a2 | %a0 = this, %a1 = src, %a2 = dst
533 lea.l (%a3, %a0.l*4), %a0 | %a0 = end address 638 move.l (%a0), %a0 | %a0 = this->outcount
534 move.l (%a1), %d1 | %d5 = multiplier: (1 << (16 - scale)) 639 move.l 4(%a2), %a3 | %a3 = dst->p16out
640 movem.l 4(%a1), %a2 | %a2 = src->p32[0]
641 lea.l (%a3, %a0.l*4), %a0 | %a0 = count -> end address
642 clr.l %d1 | %d5 = multiplier: (1 << (16 - scale))
643 move.b 19(%a1), %d1 | %d1 = src->format.output_scale
535 sub.l #16, %d1 | 644 sub.l #16, %d1 |
536 neg.l %d1 | 645 neg.l %d1 |
537 moveq.l #1, %d5 | 646 moveq.l #1, %d5 |
538 asl.l %d1, %d5 | 647 asl.l %d1, %d5 |
539 move.l #0x8000, %a4 | %a4 = rounding term 648 move.l #0x8000, %a4 | %a4 = rounding term
540 movem.l (%a2), %a2 | get source channel pointer
541 moveq.l #28, %d0 | %d0 = second line bound 649 moveq.l #28, %d0 | %d0 = second line bound
542 add.l %a3, %d0 | 650 add.l %a3, %d0 |
543 and.l #0xfffffff0, %d0 | 651 and.l #0xfffffff0, %d0 |
@@ -552,7 +660,7 @@ sample_output_mono:
552 mac.l %d1, %d5, %acc0 | shift L to high word 660 mac.l %d1, %d5, %acc0 | shift L to high word
553 movclr.l %acc0, %d1 | get possibly saturated results 661 movclr.l %acc0, %d1 | get possibly saturated results
554 move.l %d1, %d2 | 662 move.l %d1, %d2 |
555 swap %d2 | move R to low word 663 swap.w %d2 | move R to low word
556 move.w %d2, %d1 | duplicate single channel into 664 move.w %d2, %d1 | duplicate single channel into
557 move.l %d1, (%a3)+ | L and R 665 move.l %d1, (%a3)+ | L and R
558 cmp.l %a3, %d0 | 666 cmp.l %a3, %d0 |
@@ -575,16 +683,16 @@ sample_output_mono:
575 movclr.l %acc2, %d2 | 683 movclr.l %acc2, %d2 |
576 movclr.l %acc3, %d3 | 684 movclr.l %acc3, %d3 |
577 move.l %d0, %d4 | duplicate single channel 685 move.l %d0, %d4 | duplicate single channel
578 swap %d4 | into L and R 686 swap.w %d4 | into L and R
579 move.w %d4, %d0 | 687 move.w %d4, %d0 |
580 move.l %d1, %d4 | 688 move.l %d1, %d4 |
581 swap %d4 | 689 swap.w %d4 |
582 move.w %d4, %d1 | 690 move.w %d4, %d1 |
583 move.l %d2, %d4 | 691 move.l %d2, %d4 |
584 swap %d4 | 692 swap.w %d4 |
585 move.w %d4, %d2 | 693 move.w %d4, %d2 |
586 move.l %d3, %d4 | 694 move.l %d3, %d4 |
587 swap %d4 | 695 swap.w %d4 |
588 move.w %d4, %d3 | 696 move.w %d4, %d3 |
589 movem.l %d0-%d3, -16(%a3) | write four stereo samples 697 movem.l %d0-%d3, -16(%a3) | write four stereo samples
590 cmp.l %a3, %a1 | 698 cmp.l %a3, %a1 |
@@ -598,7 +706,7 @@ sample_output_mono:
598 mac.l %d1, %d5, %acc0 | the same way as leading ones 706 mac.l %d1, %d5, %acc0 | the same way as leading ones
599 movclr.l %acc0, %d1 | 707 movclr.l %acc0, %d1 |
600 move.l %d1, %d2 | 708 move.l %d1, %d2 |
601 swap %d2 | 709 swap.w %d2 |
602 move.w %d2, %d1 | 710 move.w %d2, %d1 |
603 move.l %d1, (%a3)+ | 711 move.l %d1, (%a3)+ |
604 cmp.l %a3, %a0 | 712 cmp.l %a3, %a0 |