diff options
Diffstat (limited to 'lib/rbcodec/dsp/dsp_cf.S')
-rw-r--r-- | lib/rbcodec/dsp/dsp_cf.S | 502 |
1 files changed, 305 insertions, 197 deletions
diff --git a/lib/rbcodec/dsp/dsp_cf.S b/lib/rbcodec/dsp/dsp_cf.S index 15ec7eb383..c710df5177 100644 --- a/lib/rbcodec/dsp/dsp_cf.S +++ b/lib/rbcodec/dsp/dsp_cf.S | |||
@@ -19,23 +19,27 @@ | |||
19 | * KIND, either express or implied. | 19 | * KIND, either express or implied. |
20 | * | 20 | * |
21 | ****************************************************************************/ | 21 | ****************************************************************************/ |
22 | #include "config.h" | ||
22 | 23 | ||
23 | /**************************************************************************** | 24 | /**************************************************************************** |
24 | * void dsp_apply_gain(int count, struct dsp_data *data, int32_t *buf[]) | 25 | * void pga_process(struct dsp_proc_entry *this, struct dsp_buffer **buf_p) |
25 | */ | 26 | */ |
26 | .section .text | 27 | .section .text |
27 | .align 2 | 28 | .align 2 |
28 | .global dsp_apply_gain | 29 | .global pga_process |
29 | dsp_apply_gain: | 30 | pga_process: |
31 | | input: 4(sp) = this, 8(sp) = buf_p | ||
32 | movem.l 4(%sp), %a0-%a1 | %a0 = this, %a1 = buf_p | ||
33 | move.l (%a0), %a0 | %a0 = this->data = &pga_data | ||
34 | move.l (%a0), %a0 | %a0 = data->gain | ||
35 | move.l (%a1), %a1 | %a1 = buf = *buf_p | ||
30 | lea.l -20(%sp), %sp | save registers | 36 | lea.l -20(%sp), %sp | save registers |
31 | movem.l %d2-%d4/%a2-%a3, (%sp) | | 37 | movem.l %d2-%d4/%a2-%a3, (%sp) | |
32 | movem.l 28(%sp), %a0-%a1 | %a0 = data, | 38 | clr.l %d1 | %d1 = buf->format.num_channels |
33 | | %a1 = buf | 39 | move.b 17(%a1), %d1 | |
34 | move.l 4(%a0), %d1 | %d1 = data->num_channels | ||
35 | move.l 32(%a0), %a0 | %a0 = data->gain (in s8.23) | ||
36 | 10: | channel loop | | 40 | 10: | channel loop | |
37 | move.l 24(%sp), %d0 | %d0 = count | 41 | move.l (%a1), %d0 | %d0 = buf->remcount |
38 | move.l -4(%a1, %d1.l*4), %a2 | %a2 = s = buf[ch-1] | 42 | move.l (%a1, %d1.l*4), %a2 | %a2 = s = buf->p32[ch-1] |
39 | move.l %a2, %a3 | %a3 = d = s | 43 | move.l %a2, %a3 | %a3 = d = s |
40 | move.l (%a2)+, %d2 | %d2 = *s++, | 44 | move.l (%a2)+, %d2 | %d2 = *s++, |
41 | mac.l %a0, %d2, (%a2)+, %d2, %acc0 | %acc0 = S(n)*gain, load S(n+1) | 45 | mac.l %a0, %d2, (%a2)+, %d2, %acc0 | %acc0 = S(n)*gain, load S(n+1) |
@@ -61,25 +65,29 @@ dsp_apply_gain: | |||
61 | movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers | 65 | movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers |
62 | lea.l 20(%sp), %sp | cleanup stack | 66 | lea.l 20(%sp), %sp | cleanup stack |
63 | rts | | 67 | rts | |
64 | .size dsp_apply_gain,.-dsp_apply_gain | 68 | .size pga_process, .-pga_process |
65 | 69 | ||
66 | /**************************************************************************** | 70 | /**************************************************************************** |
67 | * void apply_crossfeed(int count, int32_t *buf[]) | 71 | * void crossfeed_process(struct dsp_proc_entry *this, |
72 | * struct dsp_buffer **buf_p) | ||
68 | */ | 73 | */ |
69 | .section .text | 74 | .section .text |
70 | .align 2 | 75 | .align 2 |
71 | .global apply_crossfeed | 76 | .global crossfeed_process |
72 | apply_crossfeed: | 77 | crossfeed_process: |
78 | | input: 4(sp) = this, 8(sp) = buf_p | ||
73 | lea.l -44(%sp), %sp | | 79 | lea.l -44(%sp), %sp | |
74 | movem.l %d2-%d7/%a2-%a6, (%sp) | save all regs | 80 | movem.l %d2-%d7/%a2-%a6, (%sp) | save all regs |
75 | movem.l 48(%sp), %d7/%a4 | %d7 = count, %a4 = src | 81 | movem.l 48(%sp), %a1/%a4 | %a1 = this, %a4 = buf_p |
76 | movem.l (%a4), %a4-%a5 | %a4 = src[0], %a5 = src[1] | 82 | move.l (%a4), %a4 | %a4 = buf = *buf_p |
77 | lea.l crossfeed_data, %a1 | %a1 = &crossfeed_data | 83 | movem.l (%a4), %d7/%a4-%a5 | %d7 = buf->remcount, %a4 = buf->p32[0], |
84 | | %a5 = buf->p32[1] | ||
85 | move.l (%a1), %a1 | %a1 = &crossfeed_state | ||
78 | move.l (%a1)+, %d6 | %d6 = direct gain | 86 | move.l (%a1)+, %d6 | %d6 = direct gain |
79 | movem.l 12(%a1), %d0-%d3 | fetch filter history samples | 87 | movem.l 12(%a1), %d0-%d3 | fetch filter history samples |
80 | move.l 132(%a1), %a0 | fetch delay line address | 88 | lea.l 132(%a1), %a6 | %a6 = delay line wrap limit |
89 | move.l (%a6), %a0 | fetch delay line address | ||
81 | movem.l (%a1), %a1-%a3 | load filter coefs | 90 | movem.l (%a1), %a1-%a3 | load filter coefs |
82 | lea.l crossfeed_data+136, %a6 | %a6 = delay line wrap limit | ||
83 | bra.b 20f | loop start | go to loop start point | 91 | bra.b 20f | loop start | go to loop start point |
84 | /* Register usage in loop: | 92 | /* Register usage in loop: |
85 | * %a0 = delay_p, %a1..%a3 = b0, b1, a1 (filter coefs), | 93 | * %a0 = delay_p, %a1..%a3 = b0, b1, a1 (filter coefs), |
@@ -109,174 +117,181 @@ apply_crossfeed: | |||
109 | mac.l %d6, %d5, %acc1 | %acc1 += gain*x_r[n] | 117 | mac.l %d6, %d5, %acc1 | %acc1 += gain*x_r[n] |
110 | cmp.l %a6, %a0 | wrap %a0 if passed end | 118 | cmp.l %a6, %a0 | wrap %a0 if passed end |
111 | bhs.b 30f | wrap buffer | | 119 | bhs.b 30f | wrap buffer | |
112 | .word 0x51fb | tpf.l | trap the buffer wrap | 120 | tpf.l | trap the buffer wrap |
113 | 30: | wrap buffer | ...fwd taken branches more costly | 121 | 30: | wrap buffer | ...fwd taken branches more costly |
114 | lea.l -104(%a0), %a0 | wrap it up | 122 | lea.l -104(%a6), %a0 | wrap it up |
115 | subq.l #1, %d7 | --count > 0 ? | 123 | subq.l #1, %d7 | --count > 0 ? |
116 | bgt.b 10b | loop | yes? do more | 124 | bgt.b 10b | loop | yes? do more |
117 | movclr.l %acc0, %d4 | write last outputs | 125 | movclr.l %acc0, %d4 | write last outputs |
118 | move.l %d4, (%a4) | . | 126 | move.l %d4, (%a4) | . |
119 | movclr.l %acc1, %d5 | . | 127 | movclr.l %acc1, %d5 | . |
120 | move.l %d5, (%a5) | . | 128 | move.l %d5, (%a5) | . |
121 | lea.l crossfeed_data+16, %a1 | save data back to struct | 129 | movem.l %d0-%d3, -120(%a6) | ...history |
122 | movem.l %d0-%d3, (%a1) | ...history | 130 | move.l %a0, (%a6) | ...delay_p |
123 | move.l %a0, 120(%a1) | ...delay_p | ||
124 | movem.l (%sp), %d2-%d7/%a2-%a6 | restore all regs | 131 | movem.l (%sp), %d2-%d7/%a2-%a6 | restore all regs |
125 | lea.l 44(%sp), %sp | | 132 | lea.l 44(%sp), %sp | |
126 | rts | | 133 | rts | |
127 | .size apply_crossfeed,.-apply_crossfeed | 134 | .size crossfeed_process,.-crossfeed_process |
128 | 135 | ||
129 | /**************************************************************************** | 136 | /**************************************************************************** |
130 | * int dsp_downsample(int count, struct dsp_data *data, | 137 | * int lin_resample_resample(struct resample_data *data, |
131 | * in32_t *src[], int32_t *dst[]) | 138 | * struct dsp_buffer *src, |
139 | * struct dsp_buffer *dst) | ||
132 | */ | 140 | */ |
133 | .section .text | 141 | .section .text |
134 | .align 2 | 142 | .align 2 |
135 | .global dsp_downsample | 143 | .global lin_resample_resample |
136 | dsp_downsample: | 144 | lin_resample_resample: |
137 | lea.l -40(%sp), %sp | save non-clobberables | 145 | | input: 4(sp) = data, 8(sp) = src, 12(sp) = dst |
138 | movem.l %d2-%d7/%a2-%a5, (%sp) | | 146 | lea.l -44(%sp), %sp | save non-volatiles |
139 | movem.l 44(%sp), %d2/%a0-%a2 | %d2 = count | 147 | movem.l %d2-%d7/%a2-%a6, (%sp) | |
140 | | %a0 = data | 148 | movem.l 48(%sp), %a0-%a2 | %a0 = data |
141 | | %a1 = src | 149 | | %a1 = src |
142 | | %a2 = dst | 150 | | %a2 = dst |
143 | movem.l 4(%a0), %d3-%d4 | %d3 = ch = data->num_channels | 151 | clr.l %d1 | %d1 = ch = src->format.num_channels |
144 | | %d4 = delta = data->resample_data.delta | 152 | move.b 17(%a1), %d1 | |
145 | moveq.l #16, %d7 | %d7 = shift | 153 | moveq.l #16, %d7 | %d7 = shift |
146 | 10: | channel loop | | 154 | .lrs_channel_loop: | |
147 | move.l 12(%a0), %d5 | %d5 = phase = data->resample_data.phase | 155 | movem.l (%a0), %d2-%d3 | %d2 = delta = data->delta, |
148 | move.l -4(%a1, %d3.l*4), %a3 | %a3 = s = src[ch-1] | 156 | | %d3 = phase = data->phase |
149 | move.l -4(%a2, %d3.l*4), %a4 | %a4 = d = dst[ch-1] | 157 | move.l (%a1), %d4 | %d4 = srcrem = src->remcount |
150 | lea.l 12(%a0, %d3.l*4), %a5 | %a5 = &data->resample_data.ast_sample[ch-1] | 158 | move.l 12(%a2), %d5 | %d5 = dstrem = dst->bufcount |
151 | move.l (%a5), %d0 | %d0 = last = data->resample_data.last_sample[ch-1] | 159 | cmp.l #0x8000, %d4 | %d4 = MIN(srcrem, 0x8000) |
152 | move.l -4(%a3, %d2.l*4), (%a5) | data->resample_data.last_sample[ch-1] = s[count-1] | 160 | ble.b 10f | |
153 | move.l %d5, %d6 | %d6 = pos = phase >> 16 | 161 | move.l #0x8000, %d4 | |
154 | lsr.l %d7, %d6 | | 162 | 10: | |
155 | cmp.l %d2, %d6 | past end of samples? | 163 | move.l (%a1, %d1.l*4), %a3 | %a3 = s = src->p32[ch] |
156 | bge.b 40f | skip resample loop| yes? skip loop | 164 | move.l (%a2, %d1.l*4), %a4 | %a4 = d = dst->p32[ch] |
157 | tst.l %d6 | need last sample of prev. frame? | 165 | move.l %d3, %d0 | %d0 = pos |
158 | bne.b 20f | resample loop | no? start main loop | 166 | lsr.l %d7, %d0 | ... |
159 | move.l (%a3, %d6.l*4), %d1 | %d1 = s[pos] | 167 | beq.b 11f | pos == 0? |
160 | bra.b 30f | resample start last | start with last (last in %d0) | 168 | cmp.l %d4, %d0 | pos = MIN(pos, srcrem) |
161 | 20: | resample loop | | 169 | blt.b 12f | |
162 | lea.l -4(%a3, %d6.l*4), %a5 | load s[pos-1] and s[pos] | 170 | move.l %d4, %d0 | pos = srcrem |
163 | movem.l (%a5), %d0-%d1 | | 171 | move.l -4(%a3, %d0.l*4), %d6 | %d6 = last = s[pos - 1] |
164 | 30: | resample start last | | 172 | bra.w .lrs_channel_complete | at limit; nothing to do but next |
165 | sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1] | 173 | 11: | |
166 | move.l %d0, %acc0 | %acc0 = previous sample | 174 | move.l 4(%a0, %d1.l*4), %d6 | %d6 = last = last_sample[ch] |
167 | move.l %d5, %d0 | frac = (phase << 16) >> 1 | 175 | tpf.l | trap next move.l (last = s[pos - 1]) |
176 | 12: | | ||
177 | move.l -4(%a3, %d0.l*4), %d6 | %d6 = last = s[pos - 1] | ||
178 | cmp.l #0x10000, %d2 | delta >= 1.0? | ||
179 | bhs.b .lrs_downsample | yes? downsampling | ||
180 | | | ||
181 | /** Upsampling **/ | | ||
182 | lea.l (%a3, %d0.l*4), %a3 | %a3 = &s[pos] | ||
183 | sub.l %d4, %d0 | %d0 = pos - srcrem = -dte | ||
184 | lsl.l %d7, %d2 | move delta to bits 30..15 | ||
185 | lsr.l #1, %d2 | | ||
186 | lsl.l %d7, %d3 | move phase to bits 30..15 | ||
187 | lsr.l #1, %d3 | | ||
188 | move.l (%a3)+, %a5 | %a5 = s[pos] | ||
189 | move.l %a5, %a6 | %a6 = diff = s[pos] - last | ||
190 | sub.l %d6, %a6 | | ||
191 | bra.b 22f | | ||
192 | /* Funky loop structure is to avoid emac latency stalls */ | ||
193 | 20: | | ||
194 | move.l (%a3)+, %a5 | %a5 = s[pos] | ||
195 | move.l %a5, %a6 | %a6 = diff = s[pos] - last | ||
196 | sub.l %d6, %a6 | | ||
197 | 21: | | ||
198 | movclr.l %acc0, %d7 | *d++ = %d7 = result | ||
199 | move.l %d7, (%a4)+ | | ||
200 | 22: | | ||
201 | move.l %d6, %acc0 | %acc0 = last | ||
202 | mac.l %d3, %a6, %acc0 | %acc0 += frac * diff | ||
203 | subq.l #1, %d5 | dstrem <= 0? | ||
204 | ble.b 23f | yes? stop | ||
205 | add.l %d2, %d3 | phase += delta | ||
206 | bpl.b 21b | load next values? | ||
207 | move.l %a5, %d6 | | ||
208 | bclr.l #31, %d3 | clear sign bit | ||
209 | addq.l #1, %d0 | dte > 0? | ||
210 | bmi.b 20b | yes? continue resampling | ||
211 | tpf.w | trap next add.l (phase += delta) | ||
212 | 23: | | ||
213 | add.l %d2, %d3 | phase += delta | ||
214 | lsl.l #1, %d3 | frac -> phase | ||
215 | bcs.b 24f | was sign bit set? | ||
216 | tpf.l | | ||
217 | 24: | | ||
218 | move.l %a5, %d6 | yes? was going to move to new s[pos] | ||
219 | addq.l #1, %d0 | | ||
220 | movclr.l %acc0, %d7 | *d = %d7 = result | ||
221 | move.l %d7, (%a4) | | ||
222 | add.l %d4, %d0 | %d0 = -dte + srcrem = pos | ||
223 | or.l %d0, %d3 | restore phase | ||
224 | swap.w %d3 | | ||
225 | moveq.l #16, %d7 | %d7 = shift | ||
226 | bra.b .lrs_channel_complete | | ||
227 | | | ||
228 | /** Downsampling **/ | | ||
229 | .lrs_downsample: | | ||
230 | move.l (%a3, %d0.l*4), %a5 | %a5 = s[pos] | ||
231 | bra.b 31f | | ||
232 | 30: | | ||
233 | lea.l -4(%a3, %d0.l*4), %a5 | %d6 = s[pos - 1], %a5 = s[pos] | ||
234 | movem.l (%a5), %d6/%a5 | | ||
235 | 31: | | ||
236 | move.l %d6, %acc0 | %acc0 = last | ||
237 | sub.l %d6, %a5 | %a5 = diff = s[pos] - s[pos - 1] | ||
238 | move.l %d3, %d0 | frac = (phase << 16) >> 1 | ||
168 | lsl.l %d7, %d0 | | 239 | lsl.l %d7, %d0 | |
169 | lsr.l #1, %d0 | | 240 | lsr.l #1, %d0 | |
170 | mac.l %d0, %d1, %acc0 | %acc0 += frac * diff | 241 | mac.l %d0, %a5, %acc0 | %acc0 += frac * diff |
171 | add.l %d4, %d5 | phase += delta | 242 | add.l %d2, %d3 | phase += delta |
172 | move.l %d5, %d6 | pos = phase >> 16 | 243 | move.l %d3, %d0 | pos = phase >> 16 |
173 | lsr.l %d7, %d6 | | 244 | lsr.l %d7, %d0 | |
174 | movclr.l %acc0, %d0 | | 245 | movclr.l %acc0, %a5 | |
175 | move.l %d0, (%a4)+ | *d++ = %d0 | 246 | move.l %a5, (%a4)+ | *d++ = %d0 |
176 | cmp.l %d2, %d6 | pos < count? | 247 | subq.l #1, %d5 | dst full? |
177 | blt.b 20b | resample loop | yes? continue resampling | 248 | ble.b 32f | yes? stop |
178 | 40: | skip resample loop | | 249 | cmp.l %d4, %d0 | pos < srcrem? |
179 | subq.l #1, %d3 | ch > 0? | 250 | blt.b 30b | yes? continue resampling |
180 | bgt.b 10b | channel loop | yes? process next channel | 251 | tpf.l | trap cmp.l and ble.b |
181 | lsl.l %d7, %d2 | wrap phase to start of next frame | 252 | 32: | |
182 | sub.l %d2, %d5 | data->resample_data.phase = | 253 | cmp.l %d4, %d0 | pos = MIN(pos, srcrem) |
183 | move.l %d5, 12(%a0) | ... phase - (count << 16) | 254 | ble.b 33f | |
184 | move.l %a4, %d0 | return d - d[0] | 255 | move.l %d4, %d0 | |
185 | sub.l (%a2), %d0 | | 256 | 33: | |
186 | asr.l #2, %d0 | convert bytes->samples | 257 | move.l -4(%a3, %d0.l*4), %d6 | %d6 = s[pos - 1] |
187 | movem.l (%sp), %d2-%d7/%a2-%a5 | restore non-clobberables | 258 | | |
188 | lea.l 40(%sp), %sp | cleanup stack | 259 | .lrs_channel_complete: | |
260 | move.l %d6, 4(%a0, %d1.l*4) | last_sample[ch] = last | ||
261 | subq.l #1, %d1 | ch > 0? | ||
262 | bgt.w .lrs_channel_loop | yes? process next channel | ||
263 | | | ||
264 | move.l 12(%a2), %d1 | %d1 = dst->bufcount | ||
265 | sub.l %d5, %d1 | written = dst->bufcount - dstrem | ||
266 | move.l %d1, (%a2) | dst->remcount = written | ||
267 | move.l %d0, %d1 | wrap phase to position in next frame | ||
268 | lsl.l %d7, %d1 | data->phase = phase - (pos << 16) | ||
269 | sub.l %d1, %d3 | ... | ||
270 | move.l %d3, 4(%a0) | ... | ||
271 | movem.l (%sp), %d2-%d7/%a2-%a6 | restore non-volatiles | ||
272 | lea.l 44(%sp), %sp | cleanup stack | ||
189 | rts | buh-bye | 273 | rts | buh-bye |
190 | .size dsp_downsample,.-dsp_downsample | ||
191 | 274 | ||
192 | /**************************************************************************** | 275 | .size lin_resample_resample, .-lin_resample_resample |
193 | * int dsp_upsample(int count, struct dsp_data *dsp, | 276 | |
194 | * const int32_t *src[], int32_t *dst[]) | ||
195 | */ | ||
196 | .section .text | ||
197 | .align 2 | ||
198 | .global dsp_upsample | ||
199 | dsp_upsample: | ||
200 | lea.l -40(%sp), %sp | save non-clobberables | ||
201 | movem.l %d2-%d7/%a2-%a5, (%sp) | | ||
202 | movem.l 44(%sp), %d2/%a0-%a2 | %d2 = count | ||
203 | | %a0 = data | ||
204 | | %a1 = src | ||
205 | | %a2 = dst | ||
206 | movem.l 4(%a0), %d3-%d4 | %d3 = ch = channels | ||
207 | | %d4 = delta = data->resample_data.delta | ||
208 | swap %d4 | swap delta to high word to use... | ||
209 | | ...carries to increment position | ||
210 | 10: | channel loop | | ||
211 | move.l 12(%a0), %d5 | %d5 = phase = data->resample_data.phase | ||
212 | move.l -4(%a1, %d3.l*4), %a3 | %a3 = s = src[ch-1] | ||
213 | lea.l 12(%a0, %d3.l*4), %a4 | %a4 = &data->resample_data.last_sample[ch-1] | ||
214 | lea.l -4(%a3, %d2.l*4), %a5 | %a5 = src_end = &src[count-1] | ||
215 | move.l (%a4), %d0 | %d0 = last = data->resample_data.last_sample[ch-1] | ||
216 | move.l (%a5), (%a4) | data->resample_data.last_sample[ch-1] = s[count-1] | ||
217 | move.l -4(%a2, %d3.l*4), %a4 | %a4 = d = dst[ch-1] | ||
218 | move.l (%a3)+, %d1 | fetch first sample - might throw this... | ||
219 | | ...away later but we'll be preincremented | ||
220 | move.l %d1, %d6 | save sample value | ||
221 | sub.l %d0, %d1 | %d1 = diff = s[0] - last | ||
222 | swap %d5 | swap phase to high word to use | ||
223 | | carries to increment position | ||
224 | move.l %d5, %d7 | %d7 = pos = phase >> 16 | ||
225 | clr.w %d5 | | ||
226 | eor.l %d5, %d7 | pos == 0? | ||
227 | beq.b 40f | loop start | yes? start loop | ||
228 | cmp.l %d2, %d7 | past end of samples? | ||
229 | bge.b 50f | skip resample loop| yes? go to next channel and collect info | ||
230 | lea.l (%a3, %d7.l*4), %a3 | %a3 = s = &s[pos+1] | ||
231 | movem.l -8(%a3), %d0-%d1 | %d0 = s[pos-1], %d1 = s[pos] | ||
232 | move.l %d1, %d6 | save sample value | ||
233 | sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1] | ||
234 | bra.b 40f | loop start | | ||
235 | 20: | next sample loop | | ||
236 | move.l %d6, %d0 | move previous sample to %d0 | ||
237 | move.l (%a3)+, %d1 | fetch next sample | ||
238 | move.l %d1, %d6 | save sample value | ||
239 | sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1] | ||
240 | 30: | same sample loop | | ||
241 | movclr.l %acc0, %d7 | %d7 = result | ||
242 | move.l %d7, (%a4)+ | *d++ = %d7 | ||
243 | 40: | loop start | | ||
244 | lsr.l #1, %d5 | make phase into frac | ||
245 | move.l %d0, %acc0 | %acc0 = s[pos-1] | ||
246 | mac.l %d1, %d5, %acc0 | %acc0 = diff * frac | ||
247 | lsl.l #1, %d5 | restore frac to phase | ||
248 | add.l %d4, %d5 | phase += delta | ||
249 | bcc.b 30b | same sample loop | load next values? | ||
250 | cmp.l %a5, %a3 | src <= src_end? | ||
251 | bls.b 20b | next sample loop | yes? continue resampling | ||
252 | movclr.l %acc0, %d7 | %d7 = result | ||
253 | move.l %d7, (%a4)+ | *d++ = %d7 | ||
254 | 50: | skip resample loop | | ||
255 | subq.l #1, %d3 | ch > 0? | ||
256 | bgt.b 10b | channel loop | yes? process next channel | ||
257 | swap %d5 | wrap phase to start of next frame | ||
258 | move.l %d5, 12(%a0) | ...and save in data->resample_data.phase | ||
259 | move.l %a4, %d0 | return d - d[0] | ||
260 | sub.l (%a2), %d0 | | ||
261 | movem.l (%sp), %d2-%d7/%a2-%a5 | restore non-clobberables | ||
262 | asr.l #2, %d0 | convert bytes->samples | ||
263 | lea.l 40(%sp), %sp | cleanup stack | ||
264 | rts | buh-bye | ||
265 | .size dsp_upsample,.-dsp_upsample | ||
266 | 277 | ||
267 | /**************************************************************************** | 278 | /**************************************************************************** |
268 | * void channels_process_sound_chan_mono(int count, int32_t *buf[]) | 279 | * void channel_mode_proc_mono(struct dsp_proc_entry *this, |
280 | * struct dsp_buffer **buf_p) | ||
269 | * | 281 | * |
270 | * Mix left and right channels 50/50 into a center channel. | 282 | * Mix left and right channels 50/50 into a center channel. |
271 | */ | 283 | */ |
272 | .section .text | 284 | .section .text |
273 | .align 2 | 285 | .align 2 |
274 | .global channels_process_sound_chan_mono | 286 | .global channel_mode_proc_mono |
275 | channels_process_sound_chan_mono: | 287 | channel_mode_proc_mono: |
276 | movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf | 288 | | input: 4(sp) = this, 8(sp) = buf_p |
289 | move.l 8(%sp), %a0 | %a0 = buf_p | ||
290 | move.l (%a0), %a0 | %a0 = buf = *buf_p | ||
277 | lea.l -20(%sp), %sp | save registers | 291 | lea.l -20(%sp), %sp | save registers |
278 | movem.l %d2-%d4/%a2-%a3, (%sp) | | 292 | movem.l %d2-%d4/%a2-%a3, (%sp) | |
279 | movem.l (%a0), %a0-%a1 | get channel pointers | 293 | movem.l (%a0), %d0/%a0-%a1 | %d0 = buf->remcount, %a0 = buf->p32[0], |
294 | | %a1 = buf->p32[1] | ||
280 | move.l %a0, %a2 | use separate dst pointers since read | 295 | move.l %a0, %a2 | use separate dst pointers since read |
281 | move.l %a1, %a3 | pointers run one ahead of write | 296 | move.l %a1, %a3 | pointers run one ahead of write |
282 | move.l #0x40000000, %d3 | %d3 = 0.5 | 297 | move.l #0x40000000, %d3 | %d3 = 0.5 |
@@ -301,26 +316,29 @@ channels_process_sound_chan_mono: | |||
301 | movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers | 316 | movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers |
302 | lea.l 20(%sp), %sp | cleanup | 317 | lea.l 20(%sp), %sp | cleanup |
303 | rts | | 318 | rts | |
304 | .size channels_process_sound_chan_mono, \ | 319 | .size channel_mode_proc_mono, .-channel_mode_proc_mono |
305 | .-channels_process_sound_chan_mono | ||
306 | 320 | ||
307 | /**************************************************************************** | 321 | /**************************************************************************** |
308 | * void channels_process_sound_chan_custom(int count, int32_t *buf[]) | 322 | * void channel_mode_proc_custom(struct dsp_proc_entry *this, |
323 | * struct dsp_buffer **buf_p) | ||
309 | * | 324 | * |
310 | * Apply stereo width (narrowing/expanding) effect. | 325 | * Apply stereo width (narrowing/expanding) effect. |
311 | */ | 326 | */ |
312 | .section .text | 327 | .section .text |
313 | .align 2 | 328 | .align 2 |
314 | .global channels_process_sound_chan_custom | 329 | .global channel_mode_proc_custom |
315 | channels_process_sound_chan_custom: | 330 | channel_mode_proc_custom: |
316 | movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf | 331 | | input: 4(sp) = this, 8(sp) = buf_p |
317 | lea.l -28(%sp), %sp | save registers | 332 | lea.l -28(%sp), %sp | save registers |
318 | movem.l %d2-%d6/%a2-%a3, (%sp) | | 333 | movem.l %d2-%d6/%a2-%a3, (%sp) | |
319 | movem.l (%a0), %a0-%a1 | get channel pointers | 334 | movem.l 32(%sp), %a0-%a1 | %a0 = this, %a1 = buf_p |
335 | move.l (%a1), %a1 | %a1 = buf = *buf_p | ||
336 | move.l (%a0), %a2 | %a2 = this->data = &channel_mode_data | ||
337 | movem.l (%a1), %d0/%a0-%a1 | %d0 = buf->remcount, %a0 = buf->p32[0], | ||
338 | | %a1 = buf->p32[1] | ||
339 | movem.l (%a2), %d3-%d4 | %d3 = sw_gain, %d4 = sw_cross | ||
320 | move.l %a0, %a2 | use separate dst pointers since read | 340 | move.l %a0, %a2 | use separate dst pointers since read |
321 | move.l %a1, %a3 | pointers run one ahead of write | 341 | move.l %a1, %a3 | pointers run one ahead of write |
322 | move.l dsp_sw_gain, %d3 | load straight (mid) gain | ||
323 | move.l dsp_sw_cross, %d4 | load cross (side) gain | ||
324 | move.l (%a0)+, %d1 | prime the input registers | 342 | move.l (%a0)+, %d1 | prime the input registers |
325 | move.l (%a1)+, %d2 | | 343 | move.l (%a1)+, %d2 | |
326 | mac.l %d1, %d3 , %acc0 | L = l*gain + r*cross | 344 | mac.l %d1, %d3 , %acc0 | L = l*gain + r*cross |
@@ -348,22 +366,25 @@ channels_process_sound_chan_custom: | |||
348 | movem.l (%sp), %d2-%d6/%a2-%a3 | restore registers | 366 | movem.l (%sp), %d2-%d6/%a2-%a3 | restore registers |
349 | lea.l 28(%sp), %sp | cleanup | 367 | lea.l 28(%sp), %sp | cleanup |
350 | rts | | 368 | rts | |
351 | .size channels_process_sound_chan_custom, \ | 369 | .size channel_mode_proc_custom, .-channel_mode_proc_custom |
352 | .-channels_process_sound_chan_custom | ||
353 | 370 | ||
354 | /**************************************************************************** | 371 | /**************************************************************************** |
355 | * void channels_process_sound_chan_karaoke(int count, int32_t *buf[]) | 372 | * void channel_mode_proc_karaoke(struct dsp_proc_entry *this, |
373 | * struct dsp_buffer **buf_p) | ||
356 | * | 374 | * |
357 | * Separate channels into side channels. | 375 | * Separate channels into side channels. |
358 | */ | 376 | */ |
359 | .section .text | 377 | .section .text |
360 | .align 2 | 378 | .align 2 |
361 | .global channels_process_sound_chan_karaoke | 379 | .global channel_mode_proc_karaoke |
362 | channels_process_sound_chan_karaoke: | 380 | channel_mode_proc_karaoke: |
363 | movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf | 381 | | input: 4(sp) = this, 8(sp) = buf_p |
382 | move.l 8(%sp), %a0 | %a0 = buf_p | ||
383 | move.l (%a0), %a0 | %a0 = buf = *buf_p | ||
364 | lea.l -20(%sp), %sp | save registers | 384 | lea.l -20(%sp), %sp | save registers |
365 | movem.l %d2-%d4/%a2-%a3, (%sp) | | 385 | movem.l %d2-%d4/%a2-%a3, (%sp) | |
366 | movem.l (%a0), %a0-%a1 | get channel src pointers | 386 | movem.l (%a0), %d0/%a0-%a1 | %d0 = buf->remcount, %a0 = buf->p32[0], |
387 | | %a1 = buf->p32[1] | ||
367 | move.l %a0, %a2 | use separate dst pointers since read | 388 | move.l %a0, %a2 | use separate dst pointers since read |
368 | move.l %a1, %a3 | pointers run one ahead of write | 389 | move.l %a1, %a3 | pointers run one ahead of write |
369 | move.l #0x40000000, %d3 | %d3 = 0.5 | 390 | move.l #0x40000000, %d3 | %d3 = 0.5 |
@@ -390,12 +411,90 @@ channels_process_sound_chan_karaoke: | |||
390 | movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers | 411 | movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers |
391 | lea.l 20(%sp), %sp | cleanup | 412 | lea.l 20(%sp), %sp | cleanup |
392 | rts | | 413 | rts | |
393 | .size channels_process_sound_chan_karaoke, \ | 414 | .size channel_mode_proc_karaoke, .-channel_mode_proc_karaoke |
394 | .-channels_process_sound_chan_karaoke | 415 | |
416 | /**************************************************************************** | ||
417 | * void filter_process(struct dsp_filter *f, int32_t *buf[], int count, | ||
418 | * unsigned int channels) | ||
419 | * | ||
420 | * define HIGH_PRECISION as '1' to make filtering calculate lower bits after | ||
421 | * shifting. without this, "shift" - 1 of the lower bits will be lost here. | ||
422 | */ | ||
423 | #define HIGH_PRECISION 0 | ||
424 | .text | ||
425 | .global filter_process | ||
426 | filter_process: | ||
427 | | input: 4(sp) = f, 8(sp) = buf, 12(sp) = count, 16(sp) = channels | ||
428 | lea.l -44(%sp), %sp | save clobbered regs | ||
429 | #if HIGH_PRECISION | ||
430 | movem.l %d2-%d7/%a2-%a6, (%sp) | . | ||
431 | #else | ||
432 | movem.l %d2-%d6/%a2-%a6, (%sp) | | ||
433 | #endif | ||
434 | move.l 48(%sp), %a5 | fetch filter structure address | ||
435 | clr.l %d6 | load shift count | ||
436 | move.b 52(%a5), %d6 | . | ||
437 | subq.l #1, %d6 | EMAC gives us one free shift | ||
438 | #if HIGH_PRECISION | ||
439 | moveq.l #8, %d7 | ||
440 | sub.l %d6, %d7 | shift for lower part of accumulator | ||
441 | #endif | ||
442 | movem.l (%a5), %a0-%a4 | load coefs | ||
443 | lea.l 20(%a5), %a5 | point to filter history | ||
444 | |||
445 | 10: | channel loop | ||
446 | move.l 52(%sp), %a6 | load input channel pointer | ||
447 | addq.l #4, 52(%sp) | point x to next channel | ||
448 | move.l (%a6), %a6 | | ||
449 | move.l 56(%sp), %d5 | number of samples | ||
450 | movem.l (%a5), %d0-%d3 | load filter history | ||
451 | |||
452 | | d0-d3 = history, d4 = temp, d5 = sample count, d6 = upper shift amount, | ||
453 | | d7 = lower shift amount,a0-a4 = coefs, a5 = history pointer, a6 = buf[ch] | ||
454 | 20: | loop | ||
455 | | Direct form 1 filtering code. We assume DSP has put EMAC in frac mode. | ||
456 | | y[n] = b0*x[i] + b1*x[i - 1] + b2*x[i - 2] + a1*y[i - 1] + a2*y[i - 2], | ||
457 | | where y[] is output and x[] is input. This is performed out of order | ||
458 | | to do parallel load of input value. | ||
459 | mac.l %a2, %d1, %acc0 | acc = b2*x[i - 2] | ||
460 | move.l %d0, %d1 | fix input history | ||
461 | mac.l %a1, %d0, (%a6), %d0, %acc0 | acc += b1*x[i - 1], x[i] -> d0 | ||
462 | mac.l %a0, %d0, %acc0 | acc += b0*x[i] | ||
463 | mac.l %a3, %d2, %acc0 | acc += a1*y[i - 1] | ||
464 | mac.l %a4, %d3, %acc0 | acc += a2*y[i - 2] | ||
465 | move.l %d2, %d3 | fix output history | ||
466 | #if HIGH_PRECISION | ||
467 | move.l %accext01, %d2 | fetch lower part of accumulator | ||
468 | move.b %d2, %d4 | clear upper three bytes | ||
469 | lsr.l %d7, %d4 | shift lower bits | ||
470 | #endif | ||
471 | movclr.l %acc0, %d2 | fetch upper part of result | ||
472 | asl.l %d6, %d2 | restore fixed point format | ||
473 | #if HIGH_PRECISION | ||
474 | or.l %d2, %d4 | combine lower and upper parts | ||
475 | #endif | ||
476 | move.l %d2, (%a6)+ | save result | ||
477 | subq.l #1, %d5 | are we done with this channel? | ||
478 | bgt 20b | loop | ||
479 | |||
480 | movem.l %d0-%d3, (%a5) | save history back to struct | ||
481 | lea.l 16(%a5), %a5 | point to next channel's history | ||
482 | subq.l #1, 60(%sp) | have we processed both channels? | ||
483 | bhi 10b | channel loop | ||
484 | |||
485 | #if HIGH_PRECISION | ||
486 | movem.l (%sp), %d2-%d7/%a2-%a6 | ||
487 | #else | ||
488 | movem.l (%sp), %d2-%d6/%a2-%a6 | ||
489 | #endif | ||
490 | lea.l 44(%sp), %sp | ||
491 | rts | ||
492 | .size filter_process, .-filter_process | ||
395 | 493 | ||
396 | /**************************************************************************** | 494 | /**************************************************************************** |
397 | * void sample_output_stereo(int count, struct dsp_data *data, | 495 | * void sample_output_stereo(struct sample_io_data *this, |
398 | * const int32_t *src[], int16_t *dst) | 496 | * struct dsp_buffer *src, |
497 | * struct dsp_buffer *dst) | ||
399 | * | 498 | * |
400 | * Framework based on the ubiquitous Rockbox line transfer logic for | 499 | * Framework based on the ubiquitous Rockbox line transfer logic for |
401 | * Coldfire CPUs. | 500 | * Coldfire CPUs. |
@@ -417,20 +516,24 @@ channels_process_sound_chan_karaoke: | |||
417 | .align 2 | 516 | .align 2 |
418 | .global sample_output_stereo | 517 | .global sample_output_stereo |
419 | sample_output_stereo: | 518 | sample_output_stereo: |
519 | | input: 4(sp) = count, 8(sp) = src, 12(sp) = dst | ||
420 | lea.l -48(%sp), %sp | save registers | 520 | lea.l -48(%sp), %sp | save registers |
421 | move.l %macsr, %d1 | do it now as at many lines will | 521 | move.l %macsr, %d1 | do it now as at many lines will |
422 | movem.l %d1-%d7/%a2-%a6, (%sp) | be the far more common condition | 522 | movem.l %d1-%d7/%a2-%a6, (%sp) | be the far more common condition |
423 | move.l #0x80, %macsr | put emac unit in signed int mode | 523 | move.l #0x80, %macsr | put emac unit in signed int mode |
424 | movem.l 52(%sp), %a0-%a2/%a4 | | 524 | movem.l 52(%sp), %a0-%a2 | %a0 = this, %a1 = src, %a2 = dst |
425 | lea.l (%a4, %a0.l*4), %a0 | %a0 = end address | 525 | move.l (%a0), %a0 | %a0 = this->outcount |
426 | move.l (%a1), %d1 | %a1 = multiplier: (1 << (16 - scale)) | 526 | move.l 4(%a2), %a4 | %a4 = dst->p16out |
527 | lea.l (%a4, %a0.l*4), %a0 | %a0 = count -> end address | ||
528 | movem.l 4(%a1), %a2-%a3 | %a2 = src->p32[0], %a3 = src->p32[1] | ||
529 | clr.l %d1 | %a1 = multiplier: (1 << (16 - scale)) | ||
530 | move.b 19(%a1), %d1 | %d1 = src->format.output_scale | ||
427 | sub.l #16, %d1 | | 531 | sub.l #16, %d1 | |
428 | neg.l %d1 | | 532 | neg.l %d1 | |
429 | moveq.l #1, %d0 | | 533 | moveq.l #1, %d0 | |
430 | asl.l %d1, %d0 | | 534 | asl.l %d1, %d0 | |
431 | move.l %d0, %a1 | | 535 | move.l %d0, %a1 | |
432 | move.l #0x8000, %a6 | %a6 = rounding term | 536 | move.l #0x8000, %a6 | %a6 = rounding term |
433 | movem.l (%a2), %a2-%a3 | get L/R channel pointers | ||
434 | moveq.l #28, %d0 | %d0 = second line bound | 537 | moveq.l #28, %d0 | %d0 = second line bound |
435 | add.l %a4, %d0 | | 538 | add.l %a4, %d0 | |
436 | and.l #0xfffffff0, %d0 | | 539 | and.l #0xfffffff0, %d0 | |
@@ -447,7 +550,7 @@ sample_output_stereo: | |||
447 | mac.l %d2, %a1, %acc1 | shift R to high word | 550 | mac.l %d2, %a1, %acc1 | shift R to high word |
448 | movclr.l %acc0, %d1 | get possibly saturated results | 551 | movclr.l %acc0, %d1 | get possibly saturated results |
449 | movclr.l %acc1, %d2 | | 552 | movclr.l %acc1, %d2 | |
450 | swap %d2 | move R to low word | 553 | swap.w %d2 | move R to low word |
451 | move.w %d2, %d1 | interleave MS 16 bits of each | 554 | move.w %d2, %d1 | interleave MS 16 bits of each |
452 | move.l %d1, (%a4)+ | ...and write both | 555 | move.l %d1, (%a4)+ | ...and write both |
453 | cmp.l %a4, %d0 | | 556 | cmp.l %a4, %d0 | |
@@ -477,10 +580,10 @@ sample_output_stereo: | |||
477 | mac.l %d1, %a1, (%a2)+, %d2, %acc1 | with saturation | 580 | mac.l %d1, %a1, (%a2)+, %d2, %acc1 | with saturation |
478 | mac.l %d2, %a1, (%a2)+, %d3, %acc2 | | 581 | mac.l %d2, %a1, (%a2)+, %d3, %acc2 | |
479 | mac.l %d3, %a1 , %acc3 | | 582 | mac.l %d3, %a1 , %acc3 | |
480 | swap %d4 | a) interleave most significant... | 583 | swap.w %d4 | a) interleave most significant... |
481 | swap %d5 | | 584 | swap.w %d5 | |
482 | swap %d6 | | 585 | swap.w %d6 | |
483 | swap %d7 | | 586 | swap.w %d7 | |
484 | movclr.l %acc0, %d0 | obtain L results | 587 | movclr.l %acc0, %d0 | obtain L results |
485 | movclr.l %acc1, %d1 | | 588 | movclr.l %acc1, %d1 | |
486 | movclr.l %acc2, %d2 | | 589 | movclr.l %acc2, %d2 | |
@@ -503,7 +606,7 @@ sample_output_stereo: | |||
503 | mac.l %d2, %a1, %acc1 | | 606 | mac.l %d2, %a1, %acc1 | |
504 | movclr.l %acc0, %d1 | | 607 | movclr.l %acc0, %d1 | |
505 | movclr.l %acc1, %d2 | | 608 | movclr.l %acc1, %d2 | |
506 | swap %d2 | | 609 | swap.w %d2 | |
507 | move.w %d2, %d1 | | 610 | move.w %d2, %d1 | |
508 | move.l %d1, (%a4)+ | | 611 | move.l %d1, (%a4)+ | |
509 | cmp.l %a4, %a0 | | 612 | cmp.l %a4, %a0 | |
@@ -516,8 +619,9 @@ sample_output_stereo: | |||
516 | .size sample_output_stereo, .-sample_output_stereo | 619 | .size sample_output_stereo, .-sample_output_stereo |
517 | 620 | ||
518 | /**************************************************************************** | 621 | /**************************************************************************** |
519 | * void sample_output_mono(int count, struct dsp_data *data, | 622 | * void sample_output_mono(struct sample_io_data *this, |
520 | * const int32_t *src[], int16_t *dst) | 623 | * struct dsp_buffer *src, |
624 | * struct dsp_buffer *dst) | ||
521 | * | 625 | * |
522 | * Same treatment as sample_output_stereo but for one channel. | 626 | * Same treatment as sample_output_stereo but for one channel. |
523 | */ | 627 | */ |
@@ -525,19 +629,23 @@ sample_output_stereo: | |||
525 | .align 2 | 629 | .align 2 |
526 | .global sample_output_mono | 630 | .global sample_output_mono |
527 | sample_output_mono: | 631 | sample_output_mono: |
632 | | input: 4(sp) = count, 8(sp) = src, 12(sp) = dst | ||
528 | lea.l -32(%sp), %sp | save registers | 633 | lea.l -32(%sp), %sp | save registers |
529 | move.l %macsr, %d1 | do it now as at many lines will | 634 | move.l %macsr, %d1 | do it now as at many lines will |
530 | movem.l %d1-%d5/%a2-%a4, (%sp) | be the far more common condition | 635 | movem.l %d1-%d5/%a2-%a4, (%sp) | be the far more common condition |
531 | move.l #0x80, %macsr | put emac unit in signed int mode | 636 | move.l #0x80, %macsr | put emac unit in signed int mode |
532 | movem.l 36(%sp), %a0-%a3 | | 637 | movem.l 36(%sp), %a0-%a2 | %a0 = this, %a1 = src, %a2 = dst |
533 | lea.l (%a3, %a0.l*4), %a0 | %a0 = end address | 638 | move.l (%a0), %a0 | %a0 = this->outcount |
534 | move.l (%a1), %d1 | %d5 = multiplier: (1 << (16 - scale)) | 639 | move.l 4(%a2), %a3 | %a3 = dst->p16out |
640 | movem.l 4(%a1), %a2 | %a2 = src->p32[0] | ||
641 | lea.l (%a3, %a0.l*4), %a0 | %a0 = count -> end address | ||
642 | clr.l %d1 | %d5 = multiplier: (1 << (16 - scale)) | ||
643 | move.b 19(%a1), %d1 | %d1 = src->format.output_scale | ||
535 | sub.l #16, %d1 | | 644 | sub.l #16, %d1 | |
536 | neg.l %d1 | | 645 | neg.l %d1 | |
537 | moveq.l #1, %d5 | | 646 | moveq.l #1, %d5 | |
538 | asl.l %d1, %d5 | | 647 | asl.l %d1, %d5 | |
539 | move.l #0x8000, %a4 | %a4 = rounding term | 648 | move.l #0x8000, %a4 | %a4 = rounding term |
540 | movem.l (%a2), %a2 | get source channel pointer | ||
541 | moveq.l #28, %d0 | %d0 = second line bound | 649 | moveq.l #28, %d0 | %d0 = second line bound |
542 | add.l %a3, %d0 | | 650 | add.l %a3, %d0 | |
543 | and.l #0xfffffff0, %d0 | | 651 | and.l #0xfffffff0, %d0 | |
@@ -552,7 +660,7 @@ sample_output_mono: | |||
552 | mac.l %d1, %d5, %acc0 | shift L to high word | 660 | mac.l %d1, %d5, %acc0 | shift L to high word |
553 | movclr.l %acc0, %d1 | get possibly saturated results | 661 | movclr.l %acc0, %d1 | get possibly saturated results |
554 | move.l %d1, %d2 | | 662 | move.l %d1, %d2 | |
555 | swap %d2 | move R to low word | 663 | swap.w %d2 | move R to low word |
556 | move.w %d2, %d1 | duplicate single channel into | 664 | move.w %d2, %d1 | duplicate single channel into |
557 | move.l %d1, (%a3)+ | L and R | 665 | move.l %d1, (%a3)+ | L and R |
558 | cmp.l %a3, %d0 | | 666 | cmp.l %a3, %d0 | |
@@ -575,16 +683,16 @@ sample_output_mono: | |||
575 | movclr.l %acc2, %d2 | | 683 | movclr.l %acc2, %d2 | |
576 | movclr.l %acc3, %d3 | | 684 | movclr.l %acc3, %d3 | |
577 | move.l %d0, %d4 | duplicate single channel | 685 | move.l %d0, %d4 | duplicate single channel |
578 | swap %d4 | into L and R | 686 | swap.w %d4 | into L and R |
579 | move.w %d4, %d0 | | 687 | move.w %d4, %d0 | |
580 | move.l %d1, %d4 | | 688 | move.l %d1, %d4 | |
581 | swap %d4 | | 689 | swap.w %d4 | |
582 | move.w %d4, %d1 | | 690 | move.w %d4, %d1 | |
583 | move.l %d2, %d4 | | 691 | move.l %d2, %d4 | |
584 | swap %d4 | | 692 | swap.w %d4 | |
585 | move.w %d4, %d2 | | 693 | move.w %d4, %d2 | |
586 | move.l %d3, %d4 | | 694 | move.l %d3, %d4 | |
587 | swap %d4 | | 695 | swap.w %d4 | |
588 | move.w %d4, %d3 | | 696 | move.w %d4, %d3 | |
589 | movem.l %d0-%d3, -16(%a3) | write four stereo samples | 697 | movem.l %d0-%d3, -16(%a3) | write four stereo samples |
590 | cmp.l %a3, %a1 | | 698 | cmp.l %a3, %a1 | |
@@ -598,7 +706,7 @@ sample_output_mono: | |||
598 | mac.l %d1, %d5, %acc0 | the same way as leading ones | 706 | mac.l %d1, %d5, %acc0 | the same way as leading ones |
599 | movclr.l %acc0, %d1 | | 707 | movclr.l %acc0, %d1 | |
600 | move.l %d1, %d2 | | 708 | move.l %d1, %d2 | |
601 | swap %d2 | | 709 | swap.w %d2 | |
602 | move.w %d2, %d1 | | 710 | move.w %d2, %d1 | |
603 | move.l %d1, (%a3)+ | | 711 | move.l %d1, (%a3)+ | |
604 | cmp.l %a3, %a0 | | 712 | cmp.l %a3, %a0 | |