summaryrefslogtreecommitdiff
path: root/apps/dsp_cf.S
diff options
context:
space:
mode:
Diffstat (limited to 'apps/dsp_cf.S')
-rw-r--r--apps/dsp_cf.S424
1 files changed, 254 insertions, 170 deletions
diff --git a/apps/dsp_cf.S b/apps/dsp_cf.S
index af9ac1fa4b..e5d3ee8c55 100644
--- a/apps/dsp_cf.S
+++ b/apps/dsp_cf.S
@@ -19,68 +19,117 @@
19 ****************************************************************************/ 19 ****************************************************************************/
20 20
21/**************************************************************************** 21/****************************************************************************
22 * void apply_crossfeed(int count, int32_t *src[]) 22 * void dsp_apply_gain(int count, struct dsp_data *data, int32_t *buf[])
23 */ 23 */
24 .section .text 24 .section .text
25 .align 2
26 .global dsp_apply_gain
27dsp_apply_gain:
28 lea.l -20(%sp), %sp | save registers
29 movem.l %d2-%d4/%a2-%a3, (%sp) |
30 movem.l 28(%sp), %a0-%a1 | %a0 = data,
31 | %a1 = buf
32 move.l 4(%a0), %d1 | %d1 = data->num_channels
33 move.l 32(%a0), %a0 | %a0 = data->gain (in s8.23)
3410: | channel loop |
35 move.l 24(%sp), %d0 | %d0 = count
36 move.l -4(%a1, %d1.l*4), %a2 | %a2 = s = buf[ch-1]
37 move.l %a2, %a3 | %a3 = d = s
38 move.l (%a2)+, %d2 | %d2 = *s++,
39 mac.l %a0, %d2, (%a2)+, %d2, %acc0 | %acc0 = S(n)*gain, load S(n+1)
40 subq.l #1, %d0 | --count > 0 ? : effectively n++
41 ble.b 30f | loop done | no? finish up
4220: | loop |
43 move.l %accext01, %d4 | fetch S(n-1)[7:0]
44 movclr.l %acc0, %d3 | fetch S(n-1)[40:8] in %d5[31:0]
45 asl.l #8, %d3 | *s++ = (S(n-1)[40:8] << 8) | S(n-1)[7:0]
46 mac.l %a0, %d2, (%a2)+, %d2, %acc0 | %acc0 = S(n)*gain, load S(n+1)
47 move.b %d4, %d3 |
48 move.l %d3, (%a3)+ |
49 subq.l #1, %d0 | --count > 0 ? : effectively n++
50 bgt.b 20b | loop | yes? do more samples
5130: | loop done |
52 move.l %accext01, %d4 | fetch S(n-1)[7:0]
53 movclr.l %acc0, %d3 | fetch S(n-1)[40:8] in %d5[31:0]
54 asl.l #8, %d3 | *s = (S(n-1)[40:8] << 8) | S(n-1)[7:0]
55 move.b %d4, %d3 |
56 move.l %d3, (%a3) |
57 subq.l #1, %d1 | next channel
58 bgt.b 10b | channel loop |
59 movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers
60 lea.l 20(%sp), %sp | cleanup stack
61 rts |
62 .size dsp_apply_gain,.-dsp_apply_gain
63
64/****************************************************************************
65 * void apply_crossfeed(int count, int32_t *buf[])
66 */
67 .section .text
68 .align 2
25 .global apply_crossfeed 69 .global apply_crossfeed
26apply_crossfeed: 70apply_crossfeed:
27 lea.l -44(%sp), %sp 71 lea.l -44(%sp), %sp |
28 movem.l %d2-%d7/%a2-%a6, (%sp) | save all regs 72 movem.l %d2-%d7/%a2-%a6, (%sp) | save all regs
29 movem.l 48(%sp), %d7/%a4 | %d7 = count, %a4 = src 73 movem.l 48(%sp), %d7/%a4 | %d7 = count, %a4 = src
30 movem.l (%a4), %a4-%a5 | %a4 = src[0], %a5 = src[1] 74 movem.l (%a4), %a4-%a5 | %a4 = src[0], %a5 = src[1]
31 lea.l crossfeed_data, %a1 75 lea.l crossfeed_data, %a1 | %a1 = &crossfeed_data
32 move.l (%a1)+, %a6 | a6 = direct gain 76 move.l (%a1)+, %d6 | %d6 = direct gain
33 movem.l 12(%a1), %d0-%d3 | fetch filter history samples 77 movem.l 12(%a1), %d0-%d3 | fetch filter history samples
34 move.l 132(%a1), %a0 | fetch delay line address 78 move.l 132(%a1), %a0 | fetch delay line address
35 movem.l (%a1), %a1-%a3 | load filter coefs 79 movem.l (%a1), %a1-%a3 | load filter coefs
80 lea.l crossfeed_data+136, %a6 | %a6 = delay line wrap limit
81 bra.b 20f | loop start | go to loop start point
36 /* Register usage in loop: 82 /* Register usage in loop:
37 * %a0 = delay_p, %a1..%a3 = b0, b1, a1 (filter coefs), 83 * %a0 = delay_p, %a1..%a3 = b0, b1, a1 (filter coefs),
38 * %a4 = src[0], %a5 = src[1], %a6 = direct gain, 84 * %a4 = buf[0], %a5 = buf[1],
85 * %a6 = delay line pointer wrap limit,
39 * %d0..%d3 = history 86 * %d0..%d3 = history
40 * %d4..%d6 = temp. 87 * %d4..%d5 = temp.
88 * %d6 = direct gain,
41 * %d7 = count 89 * %d7 = count
42 */ 90 */
43.cfloop: 9110: | loop |
44 mac.l %a2, %d0, 4(%a0), %d0, %acc0 | acc = b1*dr[n - 1] d0 = dr[n] 92 movclr.l %acc0, %d4 | write outputs
45 mac.l %a1, %d0 , %acc0 | acc += b0*dr[n] 93 move.l %d4, (%a4)+ | .
46 mac.l %a3, %d1, (%a4), %d4, %acc0 | acc += a1*y_l[n - 1], load L 94 movclr.l %acc1, %d5 | .
47 move.l %acc0, %d1 | get filtered delayed sample 95 move.l %d5, (%a5)+ | .
48 mac.l %a6, %d4, %acc0 | acc += gain*x_l[n] 9620: | loop start |
49 movclr.l %acc0, %d6 | 97 mac.l %a2, %d0, (%a0)+, %d0, %acc0 | %acc0 = b1*dl[n - 1], %d0 = dl[n]
50 move.l %d6, (%a4)+ | write result 98 mac.l %a1, %d0 , %acc0 | %acc0 += b0*dl[n]
51 99 mac.l %a3, %d1, (%a5), %d5, %acc0 | %acc0 += a1*y_r[n - 1], load R
52 mac.l %a2, %d2, (%a0), %d2, %acc0 | acc = b1*dl[n - 1], d2 = dl[n] 100 mac.l %a2, %d2, (%a0)+, %d2, %acc1 | %acc1 = b1*dr[n - 1], %d2 = dr[n]
53 mac.l %a1, %d2 , %acc0 | acc += b0*dl[n] 101 mac.l %a1, %d2 , %acc1 | %acc1 += b0*dr[n]
54 mac.l %a3, %d3, (%a5), %d5, %acc0 | acc += a1*y_r[n - 1], load R 102 mac.l %a3, %d3, (%a4), %d4, %acc1 | %acc1 += a1*y_l[n - 1], load L
55 movem.l %d4-%d5, (%a0) | save left & right inputs to delay line 103 movem.l %d4-%d5, -8(%a0) | save left & right inputs to delay line
56 move.l %acc0, %d3 | get filtered delayed sample 104 move.l %acc0, %d3 | get filtered delayed left sample (y_l[n])
57 mac.l %a6, %d5, %acc0 | acc += gain*x_r[n] 105 move.l %acc1, %d1 | get filtered delayed right sample (y_r[n])
58 lea.l 8(%a0), %a0 | increment delay pointer 106 mac.l %d6, %d4, %acc0 | %acc0 += gain*x_l[n]
59 movclr.l %acc0, %d6 | 107 mac.l %d6, %d5, %acc1 | %acc1 += gain*x_r[n]
60 move.l %d6, (%a5)+ | write result 108 cmp.l %a6, %a0 | wrap %a0 if passed end
61 109 bhs.b 30f | wrap buffer |
62 cmpa.l #crossfeed_data+136, %a0| wrap a0 if passed end 110 .word 0x51fb | tpf.l | trap the buffer wrap
63 bge.b .cfwrap | 11130: | wrap buffer | ...fwd taken branches more costly
64 .word 0x51fb | tpf.l - trap the buffer wrap 112 lea.l -104(%a0), %a0 | wrap it up
65.cfwrap: 113 subq.l #1, %d7 | --count > 0 ?
66 lea.l -104(%a0), %a0 | wrap 114 bgt.b 10b | loop | yes? do more
67 subq.l #1, %d7 | --count < 0 ? 115 movclr.l %acc0, %d4 | write last outputs
68 bgt.b .cfloop | 116 move.l %d4, (%a4) | .
117 movclr.l %acc1, %d5 | .
118 move.l %d5, (%a5) | .
69 lea.l crossfeed_data+16, %a1 | save data back to struct 119 lea.l crossfeed_data+16, %a1 | save data back to struct
70 movem.l %d0-%d3, (%a1) | ...history 120 movem.l %d0-%d3, (%a1) | ...history
71 move.l %a0, 120(%a1) | ...delay_p 121 move.l %a0, 120(%a1) | ...delay_p
72 movem.l (%sp), %d2-%d7/%a2-%a6 | restore all regs 122 movem.l (%sp), %d2-%d7/%a2-%a6 | restore all regs
73 lea.l 44(%sp), %sp 123 lea.l 44(%sp), %sp |
74 rts 124 rts |
75.cfend: 125 .size apply_crossfeed,.-apply_crossfeed
76 .size apply_crossfeed,.cfend-apply_crossfeed
77
78 126
79/**************************************************************************** 127/****************************************************************************
80 * int dsp_downsample(int count, struct dsp_data *data, 128 * int dsp_downsample(int count, struct dsp_data *data,
81 * in32_t *src[], int32_t *dst[]) 129 * in32_t *src[], int32_t *dst[])
82 */ 130 */
83 .section .text 131 .section .text
132 .align 2
84 .global dsp_downsample 133 .global dsp_downsample
85dsp_downsample: 134dsp_downsample:
86 lea.l -40(%sp), %sp | save non-clobberables 135 lea.l -40(%sp), %sp | save non-clobberables
@@ -92,7 +141,7 @@ dsp_downsample:
92 movem.l 4(%a0), %d3-%d4 | %d3 = ch = data->num_channels 141 movem.l 4(%a0), %d3-%d4 | %d3 = ch = data->num_channels
93 | %d4 = delta = data->resample_data.delta 142 | %d4 = delta = data->resample_data.delta
94 moveq.l #16, %d7 | %d7 = shift 143 moveq.l #16, %d7 | %d7 = shift
95.dschannel_loop: 14410: | channel loop |
96 move.l 12(%a0), %d5 | %d5 = phase = data->resample_data.phase 145 move.l 12(%a0), %d5 | %d5 = phase = data->resample_data.phase
97 move.l -4(%a1, %d3.l*4), %a3 | %a3 = s = src[ch-1] 146 move.l -4(%a1, %d3.l*4), %a3 | %a3 = s = src[ch-1]
98 move.l -4(%a2, %d3.l*4), %a4 | %a4 = d = dst[ch-1] 147 move.l -4(%a2, %d3.l*4), %a4 | %a4 = d = dst[ch-1]
@@ -102,15 +151,15 @@ dsp_downsample:
102 move.l %d5, %d6 | %d6 = pos = phase >> 16 151 move.l %d5, %d6 | %d6 = pos = phase >> 16
103 lsr.l %d7, %d6 | 152 lsr.l %d7, %d6 |
104 cmp.l %d2, %d6 | past end of samples? 153 cmp.l %d2, %d6 | past end of samples?
105 bge.b .dsloop_skip | yes? skip loop 154 bge.b 40f | skip resample loop| yes? skip loop
106 tst.l %d6 | need last sample of prev. frame? 155 tst.l %d6 | need last sample of prev. frame?
107 bne.b .dsloop | no? start main loop 156 bne.b 20f | resample loop | no? start main loop
108 move.l (%a3, %d6.l*4), %d1 | %d1 = s[pos] 157 move.l (%a3, %d6.l*4), %d1 | %d1 = s[pos]
109 bra.b .dsuse_last_start | start with last (last in %d0) 158 bra.b 30f | resample start last | start with last (last in %d0)
110.dsloop: 15920: | resample loop |
111 lea.l -4(%a3, %d6.l*4), %a5 | load s[pos-1] and s[pos] 160 lea.l -4(%a3, %d6.l*4), %a5 | load s[pos-1] and s[pos]
112 movem.l (%a5), %d0-%d1 | 161 movem.l (%a5), %d0-%d1 |
113.dsuse_last_start: 16230: | resample start last |
114 sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1] 163 sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1]
115 move.l %d0, %acc0 | %acc0 = previous sample 164 move.l %d0, %acc0 | %acc0 = previous sample
116 move.l %d5, %d0 | frac = (phase << 16) >> 1 165 move.l %d5, %d0 | frac = (phase << 16) >> 1
@@ -123,11 +172,11 @@ dsp_downsample:
123 movclr.l %acc0, %d0 | 172 movclr.l %acc0, %d0 |
124 move.l %d0, (%a4)+ | *d++ = %d0 173 move.l %d0, (%a4)+ | *d++ = %d0
125 cmp.l %d2, %d6 | pos < count? 174 cmp.l %d2, %d6 | pos < count?
126 blt.b .dsloop | yes? continue resampling 175 blt.b 20b | resample loop | yes? continue resampling
127.dsloop_skip: 17640: | skip resample loop |
128 subq.l #1, %d3 | ch > 0? 177 subq.l #1, %d3 | ch > 0?
129 bgt.b .dschannel_loop | yes? process next channel 178 bgt.b 10b | channel loop | yes? process next channel
130 asl.l %d7, %d2 | wrap phase to start of next frame 179 lsl.l %d7, %d2 | wrap phase to start of next frame
131 sub.l %d2, %d5 | data->resample_data.phase = 180 sub.l %d2, %d5 | data->resample_data.phase =
132 move.l %d5, 12(%a0) | ... phase - (count << 16) 181 move.l %d5, 12(%a0) | ... phase - (count << 16)
133 move.l %a4, %d0 | return d - d[0] 182 move.l %a4, %d0 | return d - d[0]
@@ -136,14 +185,14 @@ dsp_downsample:
136 movem.l (%sp), %d2-%d7/%a2-%a5 | restore non-clobberables 185 movem.l (%sp), %d2-%d7/%a2-%a5 | restore non-clobberables
137 lea.l 40(%sp), %sp | cleanup stack 186 lea.l 40(%sp), %sp | cleanup stack
138 rts | buh-bye 187 rts | buh-bye
139.dsend: 188 .size dsp_downsample,.-dsp_downsample
140 .size dsp_downsample,.dsend-dsp_downsample
141 189
142/**************************************************************************** 190/****************************************************************************
143 * int dsp_upsample(int count, struct dsp_data *dsp, 191 * int dsp_upsample(int count, struct dsp_data *dsp,
144 * in32_t *src[], int32_t *dst[]) 192 * int32_t *src[], int32_t *dst[])
145 */ 193 */
146 .section .text 194 .section .text
195 .align 2
147 .global dsp_upsample 196 .global dsp_upsample
148dsp_upsample: 197dsp_upsample:
149 lea.l -40(%sp), %sp | save non-clobberables 198 lea.l -40(%sp), %sp | save non-clobberables
@@ -154,47 +203,55 @@ dsp_upsample:
154 | %a2 = dst 203 | %a2 = dst
155 movem.l 4(%a0), %d3-%d4 | %d3 = ch = channels 204 movem.l 4(%a0), %d3-%d4 | %d3 = ch = channels
156 | %d4 = delta = data->resample_data.delta 205 | %d4 = delta = data->resample_data.delta
157 swap %d4 | swap delta to high word to use 206 swap %d4 | swap delta to high word to use...
158 | carries to increment position 207 | ...carries to increment position
159.uschannel_loop: 20810: | channel loop |
160 move.l 12(%a0), %d5 | %d5 = phase = data->resample_data.phase 209 move.l 12(%a0), %d5 | %d5 = phase = data->resample_data.phase
161 move.l -4(%a1, %d3.l*4), %a3 | %a3 = s = src[ch-1] 210 move.l -4(%a1, %d3.l*4), %a3 | %a3 = s = src[ch-1]
162 lea.l 12(%a0, %d3.l*4), %a4 | %a4 = &data->resample_data.last_sample[ch-1] 211 lea.l 12(%a0, %d3.l*4), %a4 | %a4 = &data->resample_data.last_sample[ch-1]
163 lea.l (%a3, %d2.l*4), %a5 | %a5 = src_end = &src[count] 212 lea.l -4(%a3, %d2.l*4), %a5 | %a5 = src_end = &src[count-1]
164 move.l (%a4), %d0 | %d0 = last = data->resample_data.last_sample[ch-1] 213 move.l (%a4), %d0 | %d0 = last = data->resample_data.last_sample[ch-1]
165 move.l -(%a5), (%a4) | data->resample_data.last_sample[ch-1] = s[count-1] 214 move.l (%a5), (%a4) | data->resample_data.last_sample[ch-1] = s[count-1]
166 move.l -4(%a2, %d3.l*4), %a4 | %a4 = d = dst[ch-1] 215 move.l -4(%a2, %d3.l*4), %a4 | %a4 = d = dst[ch-1]
216 move.l (%a3)+, %d1 | fetch first sample - might throw this...
217 | ...away later but we'll be preincremented
218 move.l %d1, %d6 | save sample value
219 sub.l %d0, %d1 | %d1 = diff = s[0] - last
167 swap %d5 | swap phase to high word to use 220 swap %d5 | swap phase to high word to use
168 | carries to increment position 221 | carries to increment position
169 move.l %d5, %d6 | %d6 = pos = phase >> 16 222 move.l %d5, %d7 | %d7 = pos = phase >> 16
170 clr.w %d5 | 223 clr.w %d5 |
171 eor.l %d5, %d6 | pos == 0? 224 eor.l %d5, %d7 | pos == 0?
172 beq.b .usstart_0 | no? transistion from down 225 beq.b 40f | loop start | yes? start loop
173 cmp.l %d2, %d6 | past end of samples? 226 cmp.l %d2, %d7 | past end of samples?
174 bge.b .usloop_skip | yes? skip loop 227 bge.b 50f | skip resample loop| yes? go to next channel and collect info
175 lea.l -4(%a3, %d6.l*4), %a3 | %a3 = s = &s[pos-1] (previous) 228 lea.l (%a3, %d7.l*4), %a3 | %a3 = s = &s[pos+1]
176 move.l (%a3)+, %d0 | %d0 = *s++ 229 movem.l -8(%a3), %d0-%d1 | %d0 = s[pos-1], %d1 = s[pos]
177 .word 0x51fa | tpf.w - trap next instruction 230 move.l %d1, %d6 | save sample value
178.usloop_1: 231 sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1]
232 bra.b 40f | loop start |
23320: | next sample loop |
179 move.l %d6, %d0 | move previous sample to %d0 234 move.l %d6, %d0 | move previous sample to %d0
180.usstart_0:
181 move.l (%a3)+, %d1 | fetch next sample 235 move.l (%a3)+, %d1 | fetch next sample
182 move.l %d1, %d6 | save sample value 236 move.l %d1, %d6 | save sample value
183 sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1] 237 sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1]
184.usloop_0: 23830: | same sample loop |
239 movclr.l %acc0, %d7 | %d7 = result
240 move.l %d7, (%a4)+ | *d++ = %d7
24140: | loop start |
185 lsr.l #1, %d5 | make phase into frac 242 lsr.l #1, %d5 | make phase into frac
243 move.l %d0, %acc0 | %acc0 = s[pos-1]
186 mac.l %d1, %d5, %acc0 | %acc0 = diff * frac 244 mac.l %d1, %d5, %acc0 | %acc0 = diff * frac
187 lsl.l #1, %d5 | restore frac to phase 245 lsl.l #1, %d5 | restore frac to phase
188 movclr.l %acc0, %d7 | %d7 = product
189 add.l %d0, %d7 | %d7 = last + product
190 move.l %d7, (%a4)+ | *d++ = %d7
191 add.l %d4, %d5 | phase += delta 246 add.l %d4, %d5 | phase += delta
192 bcc.b .usloop_0 | load next values? 247 bcc.b 30b | same sample loop | load next values?
193 cmp.l %a5, %a3 | src <= src_end? 248 cmp.l %a5, %a3 | src <= src_end?
194 ble.b .usloop_1 | yes? continue resampling 249 bls.b 20b | next sample loop | yes? continue resampling
195.usloop_skip: 250 movclr.l %acc0, %d7 | %d7 = result
251 move.l %d7, (%a4)+ | *d++ = %d7
25250: | skip resample loop |
196 subq.l #1, %d3 | ch > 0? 253 subq.l #1, %d3 | ch > 0?
197 bgt.b .uschannel_loop | yes? process next channel 254 bgt.b 10b | channel loop | yes? process next channel
198 swap %d5 | wrap phase to start of next frame 255 swap %d5 | wrap phase to start of next frame
199 move.l %d5, 12(%a0) | ...and save in data->resample_data.phase 256 move.l %d5, 12(%a0) | ...and save in data->resample_data.phase
200 move.l %a4, %d0 | return d - d[0] 257 move.l %a4, %d0 | return d - d[0]
@@ -203,12 +260,7 @@ dsp_upsample:
203 asr.l #2, %d0 | convert bytes->samples 260 asr.l #2, %d0 | convert bytes->samples
204 lea.l 40(%sp), %sp | cleanup stack 261 lea.l 40(%sp), %sp | cleanup stack
205 rts | buh-bye 262 rts | buh-bye
206.usend: 263 .size dsp_upsample,.-dsp_upsample
207 .size dsp_upsample,.usend-dsp_upsample
208
209/* These routines might benefit from burst transfers but we'll keep them
210 * small for now since they're rather light weight
211 */
212 264
213/**************************************************************************** 265/****************************************************************************
214 * void channels_process_sound_chan_mono(int count, int32_t *buf[]) 266 * void channels_process_sound_chan_mono(int count, int32_t *buf[])
@@ -216,31 +268,39 @@ dsp_upsample:
216 * Mix left and right channels 50/50 into a center channel. 268 * Mix left and right channels 50/50 into a center channel.
217 */ 269 */
218 .section .text 270 .section .text
271 .align 2
219 .global channels_process_sound_chan_mono 272 .global channels_process_sound_chan_mono
220channels_process_sound_chan_mono: 273channels_process_sound_chan_mono:
221 movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf 274 movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf
222 lea.l -12(%sp), %sp | save registers 275 lea.l -20(%sp), %sp | save registers
223 move.l %macsr, %d1 | 276 movem.l %d2-%d4/%a2-%a3, (%sp) |
224 movem.l %d1-%d3, (%sp) |
225 move.l #0xb0, %macsr | put emac in rounding fractional mode
226 movem.l (%a0), %a0-%a1 | get channel pointers 277 movem.l (%a0), %a0-%a1 | get channel pointers
278 move.l %a0, %a2 | use separate dst pointers since read
279 move.l %a1, %a3 | pointers run one ahead of write
227 move.l #0x40000000, %d3 | %d3 = 0.5 280 move.l #0x40000000, %d3 | %d3 = 0.5
2281: 281 move.l (%a0)+, %d1 | prime the input registers
229 move.l (%a0), %d1 | L = R = l/2 + r/2 282 move.l (%a1)+, %d2 |
230 mac.l %d1, %d3, (%a1), %d2, %acc0 | 283 mac.l %d1, %d3, (%a0)+, %d1, %acc0 |
231 mac.l %d2, %d3, %acc0 | 284 mac.l %d2, %d3, (%a1)+, %d2, %acc0 |
232 movclr.l %acc0, %d1 | 285 subq.l #1, %d0 |
233 move.l %d1, (%a0)+ | output to original buffer 286 ble.s 20f | loop done |
234 move.l %d1, (%a1)+ | 28710: | loop |
235 subq.l #1, %d0 | 288 movclr.l %acc0, %d4 | L = R = l/2 + r/2
236 bgt.s 1b | 289 mac.l %d1, %d3, (%a0)+, %d1, %acc0 |
237 movem.l (%sp), %d1-%d3 | restore registers 290 mac.l %d2, %d3, (%a1)+, %d2, %acc0 |
238 move.l %d1, %macsr | 291 move.l %d4, (%a2)+ | output to original buffer
239 lea.l 12(%sp), %sp | cleanup 292 move.l %d4, (%a3)+ |
240 rts 293 subq.l #1, %d0 |
241.cpmono_end: 294 bgt.s 10b | loop |
242 .size channels_process_sound_chan_mono, .cpmono_end-channels_process_sound_chan_mono 29520: | loop done |
243 296 movclr.l %acc0, %d4 | output last sample
297 move.l %d4, (%a2) |
298 move.l %d4, (%a3) |
299 movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers
300 lea.l 20(%sp), %sp | cleanup
301 rts |
302 .size channels_process_sound_chan_mono, \
303 .-channels_process_sound_chan_mono
244 304
245/**************************************************************************** 305/****************************************************************************
246 * void channels_process_sound_chan_custom(int count, int32_t *buf[]) 306 * void channels_process_sound_chan_custom(int count, int32_t *buf[])
@@ -248,34 +308,47 @@ channels_process_sound_chan_mono:
248 * Apply stereo width (narrowing/expanding) effect. 308 * Apply stereo width (narrowing/expanding) effect.
249 */ 309 */
250 .section .text 310 .section .text
311 .align 2
251 .global channels_process_sound_chan_custom 312 .global channels_process_sound_chan_custom
252channels_process_sound_chan_custom: 313channels_process_sound_chan_custom:
253 movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf 314 movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf
254 lea.l -16(%sp), %sp | save registers 315 lea.l -28(%sp), %sp | save registers
255 move.l %macsr, %d1 | 316 movem.l %d2-%d6/%a2-%a3, (%sp) |
256 movem.l %d1-%d4, (%sp) |
257 move.l #0xb0, %macsr | put emac in rounding fractional mode
258 movem.l (%a0), %a0-%a1 | get channel pointers 317 movem.l (%a0), %a0-%a1 | get channel pointers
318 move.l %a0, %a2 | use separate dst pointers since read
319 move.l %a1, %a3 | pointers run one ahead of write
259 move.l dsp_sw_gain, %d3 | load straight (mid) gain 320 move.l dsp_sw_gain, %d3 | load straight (mid) gain
260 move.l dsp_sw_cross, %d4 | load cross (side) gain 321 move.l dsp_sw_cross, %d4 | load cross (side) gain
2611: 322 move.l (%a0)+, %d1 | prime the input registers
262 move.l (%a0), %d1 | 323 move.l (%a1)+, %d2 |
263 mac.l %d1, %d3, (%a1), %d2, %acc0 | L = l*gain + r*cross 324 mac.l %d1, %d3 , %acc0 | L = l*gain + r*cross
264 mac.l %d1, %d4 , %acc1 | R = r*gain + l*cross 325 mac.l %d1, %d4, (%a0)+, %d1, %acc1 | R = r*gain + l*cross
265 mac.l %d2, %d4 , %acc0 | 326 mac.l %d2, %d4 , %acc0 |
266 mac.l %d2, %d3 , %acc1 | 327 mac.l %d2, %d3, (%a1)+, %d2, %acc1 |
267 movclr.l %acc0, %d1 |
268 movclr.l %acc1, %d2 |
269 move.l %d1, (%a0)+ |
270 move.l %d2, (%a1)+ |
271 subq.l #1, %d0 | 328 subq.l #1, %d0 |
272 bgt.s 1b | 329 ble.b 20f | loop done |
273 movem.l (%sp), %d1-%d4 | restore registers 33010: | loop |
274 move.l %d1, %macsr | 331 movclr.l %acc0, %d5 |
275 lea.l 16(%sp), %sp | cleanup 332 movclr.l %acc1, %d6 |
276 rts 33315: | loop start |
277.cpcustom_end: 334 mac.l %d1, %d3 , %acc0 | L = l*gain + r*cross
278 .size channels_process_sound_chan_custom, .cpcustom_end-channels_process_sound_chan_custom 335 mac.l %d1, %d4, (%a0)+, %d1, %acc1 | R = r*gain + l*cross
336 mac.l %d2, %d4 , %acc0 |
337 mac.l %d2, %d3, (%a1)+, %d2, %acc1 |
338 move.l %d5, (%a2)+ |
339 move.l %d6, (%a3)+ |
340 subq.l #1, %d0 |
341 bgt.s 10b | loop |
34220: | loop done |
343 movclr.l %acc0, %d5 | output last sample
344 movclr.l %acc1, %d6 |
345 move.l %d5, (%a2) |
346 move.l %d6, (%a3) |
347 movem.l (%sp), %d2-%d6/%a2-%a3 | restore registers
348 lea.l 28(%sp), %sp | cleanup
349 rts |
350 .size channels_process_sound_chan_custom, \
351 .-channels_process_sound_chan_custom
279 352
280/**************************************************************************** 353/****************************************************************************
281 * void channels_process_sound_chan_karaoke(int count, int32_t *buf[]) 354 * void channels_process_sound_chan_karaoke(int count, int32_t *buf[])
@@ -283,31 +356,42 @@ channels_process_sound_chan_custom:
283 * Separate channels into side channels. 356 * Separate channels into side channels.
284 */ 357 */
285 .section .text 358 .section .text
359 .align 2
286 .global channels_process_sound_chan_karaoke 360 .global channels_process_sound_chan_karaoke
287channels_process_sound_chan_karaoke: 361channels_process_sound_chan_karaoke:
288 movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf 362 movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf
289 lea.l -16(%sp), %sp | save registers 363 lea.l -20(%sp), %sp | save registers
290 move.l %macsr, %d1 | 364 movem.l %d2-%d4/%a2-%a3, (%sp) |
291 movem.l %d1-%d4, (%sp) | 365 movem.l (%a0), %a0-%a1 | get channel src pointers
292 move.l #0xb0, %macsr | put emac in rounding fractional mode 366 move.l %a0, %a2 | use separate dst pointers since read
293 movem.l (%a0), %a0-%a1 | get channel pointers 367 move.l %a1, %a3 | pointers run one ahead of write
294 move.l #0x40000000, %d4 | %d3 = 0.5 368 move.l #0x40000000, %d3 | %d3 = 0.5
2951: 369 move.l (%a0)+, %d1 | prime the input registers
296 move.l (%a0), %d1 | 370 move.l (%a1)+, %d2 |
297 msac.l %d1, %d4, (%a1), %d2, %acc0 | R = r/2 - l/2 371 mac.l %d1, %d3, (%a0)+, %d1, %acc0 | L = l/2 - r/2
298 mac.l %d2, %d4 , %acc0 | 372 msac.l %d2, %d3, (%a1)+, %d2, %acc0 |
299 movclr.l %acc0, %d1 | 373 subq.l #1, %d0 |
300 move.l %d1, (%a1)+ | 374 ble.b 20f | loop done |
301 neg.l %d1 | L = -R = -(r/2 - l/2) = l/2 - r/2 37510: | loop |
302 move.l %d1, (%a0)+ | 376 movclr.l %acc0, %d4 |
303 subq.l #1, %d0 | 377 mac.l %d1, %d3, (%a0)+, %d1, %acc0 | L = l/2 - r/2
304 bgt.s 1b | 378 msac.l %d2, %d3, (%a1)+, %d2, %acc0 |
305 movem.l (%sp), %d1-%d4 | restore registers 379 move.l %d4, (%a2)+ |
306 move.l %d1, %macsr | 380 neg.l %d4 | R = -L = -(l/2 - r/2) = r/2 - l/2
307 lea.l 16(%sp), %sp | cleanup 381 move.l %d4, (%a3)+ |
308 rts 382 subq.l #1, %d0 |
309.cpkaraoke_end: 383 bgt.s 10b | loop |
310 .size channels_process_sound_chan_karaoke, .cpkaraoke_end-channels_process_sound_chan_karaoke 38420: | loop done |
385 movclr.l %acc0, %d4 | output last sample
386 move.l %d4, (%a2) |
387 neg.l %d4 | R = -L = -(l/2 - r/2) = r/2 - l/2
388 move.l %d4, (%a3) |
389 movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers
390 lea.l 20(%sp), %sp | cleanup
391 rts |
392 .size channels_process_sound_chan_karaoke, \
393 .-channels_process_sound_chan_karaoke
394
311/**************************************************************************** 395/****************************************************************************
312 * void sample_output_stereo(int count, struct dsp_data *data, 396 * void sample_output_stereo(int count, struct dsp_data *data,
313 * int32_t *src[], int16_t *dst) 397 * int32_t *src[], int16_t *dst)
@@ -329,6 +413,7 @@ channels_process_sound_chan_karaoke:
329 * 413 *
330 */ 414 */
331 .section .text 415 .section .text
416 .align 2
332 .global sample_output_stereo 417 .global sample_output_stereo
333sample_output_stereo: 418sample_output_stereo:
334 lea.l -44(%sp), %sp | save registers 419 lea.l -44(%sp), %sp | save registers
@@ -348,11 +433,11 @@ sample_output_stereo:
348 add.l %a4, %d0 | 433 add.l %a4, %d0 |
349 and.l #0xfffffff0, %d0 | 434 and.l #0xfffffff0, %d0 |
350 cmp.l %a0, %d0 | at least a full line? 435 cmp.l %a0, %d0 | at least a full line?
351 bhi.w .sos_longloop_1_start | no? jump to trailing longword 436 bhi.w 40f | long loop 1 start | no? do as trailing longwords
352 sub.l #16, %d0 | %d1 = first line bound 437 sub.l #16, %d0 | %d1 = first line bound
353 cmp.l %a4, %d0 | any leading longwords? 438 cmp.l %a4, %d0 | any leading longwords?
354 bls.b .sos_lineloop_start | no? jump to line loop 439 bls.b 20f | line loop start | no? start line loop
355.sos_longloop_0: 44010: | long loop 0 |
356 move.l (%a2)+, %d1 | read longword from L and R 441 move.l (%a2)+, %d1 | read longword from L and R
357 mac.l %d1, %a1, (%a3)+, %d2, %acc0 | shift L to high word 442 mac.l %d1, %a1, (%a3)+, %d2, %acc0 | shift L to high word
358 mac.l %d2, %a1, %acc1 | shift R to high word 443 mac.l %d2, %a1, %acc1 | shift R to high word
@@ -362,10 +447,10 @@ sample_output_stereo:
362 move.w %d2, %d1 | interleave MS 16 bits of each 447 move.w %d2, %d1 | interleave MS 16 bits of each
363 move.l %d1, (%a4)+ | ...and write both 448 move.l %d1, (%a4)+ | ...and write both
364 cmp.l %a4, %d0 | 449 cmp.l %a4, %d0 |
365 bhi.b .sos_longloop_0 | 450 bhi.b 10b | long loop 0 |
366.sos_lineloop_start: 45120: | line loop start |
367 lea.l -12(%a0), %a5 | %a5 = at or just before last line bound 452 lea.l -12(%a0), %a5 | %a5 = at or just before last line bound
368.sos_lineloop: 45330: | line loop |
369 move.l (%a3)+, %d4 | get next 4 R samples and scale 454 move.l (%a3)+, %d4 | get next 4 R samples and scale
370 mac.l %d4, %a1, (%a3)+, %d5, %acc0 | with saturation 455 mac.l %d4, %a1, (%a3)+, %d5, %acc0 | with saturation
371 mac.l %d5, %a1, (%a3)+, %d6, %acc1 | 456 mac.l %d5, %a1, (%a3)+, %d6, %acc1 |
@@ -394,11 +479,11 @@ sample_output_stereo:
394 move.w %d7, %d3 | 479 move.w %d7, %d3 |
395 movem.l %d0-%d3, -16(%a4) | write four stereo samples 480 movem.l %d0-%d3, -16(%a4) | write four stereo samples
396 cmp.l %a4, %a5 | 481 cmp.l %a4, %a5 |
397 bhi.b .sos_lineloop | 482 bhi.b 30b | line loop |
398.sos_longloop_1_start: 48340: | long loop 1 start |
399 cmp.l %a4, %a0 | any longwords left? 484 cmp.l %a4, %a0 | any longwords left?
400 bls.b .sos_done | no? finished. 485 bls.b 60f | output end | no? stop
401.sos_longloop_1: 48650: | long loop 1 |
402 move.l (%a2)+, %d1 | handle trailing longwords 487 move.l (%a2)+, %d1 | handle trailing longwords
403 mac.l %d1, %a1, (%a3)+, %d2, %acc0 | the same way as leading ones 488 mac.l %d1, %a1, (%a3)+, %d2, %acc0 | the same way as leading ones
404 mac.l %d2, %a1, %acc1 | 489 mac.l %d2, %a1, %acc1 |
@@ -408,14 +493,13 @@ sample_output_stereo:
408 move.w %d2, %d1 | 493 move.w %d2, %d1 |
409 move.l %d1, (%a4)+ | 494 move.l %d1, (%a4)+ |
410 cmp.l %a4, %a0 | 495 cmp.l %a4, %a0 |
411 bhi.b .sos_longloop_1 | 496 bhi.b 50b | long loop 1
412.sos_done: 49760: | output end |
413 movem.l (%sp), %d1-%d7/%a2-%a5 | restore registers 498 movem.l (%sp), %d1-%d7/%a2-%a5 | restore registers
414 move.l %d1, %macsr | 499 move.l %d1, %macsr |
415 lea.l 44(%sp), %sp | cleanup 500 lea.l 44(%sp), %sp | cleanup
416 rts | 501 rts |
417.sos_end: 502 .size sample_output_stereo, .-sample_output_stereo
418 .size sample_output_stereo, .sos_end-sample_output_stereo
419 503
420/**************************************************************************** 504/****************************************************************************
421 * void sample_output_mono(int count, struct dsp_data *data, 505 * void sample_output_mono(int count, struct dsp_data *data,
@@ -424,6 +508,7 @@ sample_output_stereo:
424 * Same treatment as sample_output_stereo but for one channel. 508 * Same treatment as sample_output_stereo but for one channel.
425 */ 509 */
426 .section .text 510 .section .text
511 .align 2
427 .global sample_output_mono 512 .global sample_output_mono
428sample_output_mono: 513sample_output_mono:
429 lea.l -28(%sp), %sp | save registers 514 lea.l -28(%sp), %sp | save registers
@@ -442,11 +527,11 @@ sample_output_mono:
442 add.l %a3, %d0 | 527 add.l %a3, %d0 |
443 and.l #0xfffffff0, %d0 | 528 and.l #0xfffffff0, %d0 |
444 cmp.l %a0, %d0 | at least a full line? 529 cmp.l %a0, %d0 | at least a full line?
445 bhi.w .som_longloop_1_start | no? jump to trailing longword 530 bhi.w 40f | long loop 1 start | no? do as trailing longwords
446 sub.l #16, %d0 | %d1 = first line bound 531 sub.l #16, %d0 | %d1 = first line bound
447 cmp.l %a3, %d0 | any leading longwords? 532 cmp.l %a3, %d0 | any leading longwords?
448 bls.b .som_lineloop_start | no? jump to line loop 533 bls.b 20f | line loop start | no? start line loop
449.som_longloop_0: 53410: | long loop 0 |
450 move.l (%a2)+, %d1 | read longword from L and R 535 move.l (%a2)+, %d1 | read longword from L and R
451 mac.l %d1, %d5, %acc0 | shift L to high word 536 mac.l %d1, %d5, %acc0 | shift L to high word
452 movclr.l %acc0, %d1 | get possibly saturated results 537 movclr.l %acc0, %d1 | get possibly saturated results
@@ -455,10 +540,10 @@ sample_output_mono:
455 move.w %d2, %d1 | duplicate single channel into 540 move.w %d2, %d1 | duplicate single channel into
456 move.l %d1, (%a3)+ | L and R 541 move.l %d1, (%a3)+ | L and R
457 cmp.l %a3, %d0 | 542 cmp.l %a3, %d0 |
458 bhi.b .som_longloop_0 | 543 bhi.b 10b | long loop 0 |
459.som_lineloop_start: 54420: | line loop start |
460 lea.l -12(%a0), %a1 | %a1 = at or just before last line bound 545 lea.l -12(%a0), %a1 | %a1 = at or just before last line bound
461.som_lineloop: 54630: | line loop |
462 move.l (%a2)+, %d0 | get next 4 L samples and scale 547 move.l (%a2)+, %d0 | get next 4 L samples and scale
463 mac.l %d0, %d5, (%a2)+, %d1, %acc0 | with saturation 548 mac.l %d0, %d5, (%a2)+, %d1, %acc0 | with saturation
464 mac.l %d1, %d5, (%a2)+, %d2, %acc1 | 549 mac.l %d1, %d5, (%a2)+, %d2, %acc1 |
@@ -483,11 +568,11 @@ sample_output_mono:
483 move.w %d4, %d3 | 568 move.w %d4, %d3 |
484 movem.l %d0-%d3, -16(%a3) | write four stereo samples 569 movem.l %d0-%d3, -16(%a3) | write four stereo samples
485 cmp.l %a3, %a1 | 570 cmp.l %a3, %a1 |
486 bhi.b .som_lineloop | 571 bhi.b 30b | line loop |
487.som_longloop_1_start: 57240: | long loop 1 start |
488 cmp.l %a3, %a0 | any longwords left? 573 cmp.l %a3, %a0 | any longwords left?
489 bls.b .som_done | no? finished. 574 bls.b 60f | output end | no? stop
490.som_longloop_1: 57550: | loop loop 1 |
491 move.l (%a2)+, %d1 | handle trailing longwords 576 move.l (%a2)+, %d1 | handle trailing longwords
492 mac.l %d1, %d5, %acc0 | the same way as leading ones 577 mac.l %d1, %d5, %acc0 | the same way as leading ones
493 movclr.l %acc0, %d1 | 578 movclr.l %acc0, %d1 |
@@ -496,11 +581,10 @@ sample_output_mono:
496 move.w %d2, %d1 | 581 move.w %d2, %d1 |
497 move.l %d1, (%a3)+ | 582 move.l %d1, (%a3)+ |
498 cmp.l %a3, %a0 | 583 cmp.l %a3, %a0 |
499 bhi.b .som_longloop_1 | 584 bhi.b 50b | long loop 1 |
500.som_done: 58560: | output end |
501 movem.l (%sp), %d1-%d5/%a2-%a3 | restore registers 586 movem.l (%sp), %d1-%d5/%a2-%a3 | restore registers
502 move.l %d1, %macsr | 587 move.l %d1, %macsr |
503 lea.l 28(%sp), %sp | cleanup 588 lea.l 28(%sp), %sp | cleanup
504 rts | 589 rts |
505.som_end: 590 .size sample_output_mono, .-sample_output_mono
506 .size sample_output_mono, .som_end-sample_output_mono