summaryrefslogtreecommitdiff
path: root/apps/dsp_cf.S
diff options
context:
space:
mode:
authorMichael Sevakis <jethead71@rockbox.org>2007-03-25 04:03:44 +0000
committerMichael Sevakis <jethead71@rockbox.org>2007-03-25 04:03:44 +0000
commit369c2a37b7176e4f9c44f00a31b3b74e62b0b5d7 (patch)
tree7620c7da1d611d0d9a339487b6b264e44c6201bd /apps/dsp_cf.S
parentcd630c9e0a2e0aa259a6e53a5af1369f36984b1c (diff)
downloadrockbox-369c2a37b7176e4f9c44f00a31b3b74e62b0b5d7.tar.gz
rockbox-369c2a37b7176e4f9c44f00a31b3b74e62b0b5d7.zip
SWCODEC & Coldfire: Do some more DSP straigntening out. Do as much Coldfire optimizing as seems reasonably possible by jumping through some hoops to avoid stalls. Further boost reduction will just be fractional points if taken to extremes-- not worth it. Wrap up the ASM for awhile.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@12905 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/dsp_cf.S')
-rw-r--r--apps/dsp_cf.S424
1 files changed, 254 insertions, 170 deletions
diff --git a/apps/dsp_cf.S b/apps/dsp_cf.S
index af9ac1fa4b..e5d3ee8c55 100644
--- a/apps/dsp_cf.S
+++ b/apps/dsp_cf.S
@@ -19,68 +19,117 @@
19 ****************************************************************************/ 19 ****************************************************************************/
20 20
21/**************************************************************************** 21/****************************************************************************
22 * void apply_crossfeed(int count, int32_t *src[]) 22 * void dsp_apply_gain(int count, struct dsp_data *data, int32_t *buf[])
23 */ 23 */
24 .section .text 24 .section .text
25 .align 2
26 .global dsp_apply_gain
27dsp_apply_gain:
28 lea.l -20(%sp), %sp | save registers
29 movem.l %d2-%d4/%a2-%a3, (%sp) |
30 movem.l 28(%sp), %a0-%a1 | %a0 = data,
31 | %a1 = buf
32 move.l 4(%a0), %d1 | %d1 = data->num_channels
33 move.l 32(%a0), %a0 | %a0 = data->gain (in s8.23)
3410: | channel loop |
35 move.l 24(%sp), %d0 | %d0 = count
36 move.l -4(%a1, %d1.l*4), %a2 | %a2 = s = buf[ch-1]
37 move.l %a2, %a3 | %a3 = d = s
38 move.l (%a2)+, %d2 | %d2 = *s++,
39 mac.l %a0, %d2, (%a2)+, %d2, %acc0 | %acc0 = S(n)*gain, load S(n+1)
40 subq.l #1, %d0 | --count > 0 ? : effectively n++
41 ble.b 30f | loop done | no? finish up
4220: | loop |
43 move.l %accext01, %d4 | fetch S(n-1)[7:0]
44 movclr.l %acc0, %d3 | fetch S(n-1)[40:8] in %d5[31:0]
45 asl.l #8, %d3 | *s++ = (S(n-1)[40:8] << 8) | S(n-1)[7:0]
46 mac.l %a0, %d2, (%a2)+, %d2, %acc0 | %acc0 = S(n)*gain, load S(n+1)
47 move.b %d4, %d3 |
48 move.l %d3, (%a3)+ |
49 subq.l #1, %d0 | --count > 0 ? : effectively n++
50 bgt.b 20b | loop | yes? do more samples
5130: | loop done |
52 move.l %accext01, %d4 | fetch S(n-1)[7:0]
53 movclr.l %acc0, %d3 | fetch S(n-1)[40:8] in %d5[31:0]
54 asl.l #8, %d3 | *s = (S(n-1)[40:8] << 8) | S(n-1)[7:0]
55 move.b %d4, %d3 |
56 move.l %d3, (%a3) |
57 subq.l #1, %d1 | next channel
58 bgt.b 10b | channel loop |
59 movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers
60 lea.l 20(%sp), %sp | cleanup stack
61 rts |
62 .size dsp_apply_gain,.-dsp_apply_gain
63
64/****************************************************************************
65 * void apply_crossfeed(int count, int32_t *buf[])
66 */
67 .section .text
68 .align 2
25 .global apply_crossfeed 69 .global apply_crossfeed
26apply_crossfeed: 70apply_crossfeed:
27 lea.l -44(%sp), %sp 71 lea.l -44(%sp), %sp |
28 movem.l %d2-%d7/%a2-%a6, (%sp) | save all regs 72 movem.l %d2-%d7/%a2-%a6, (%sp) | save all regs
29 movem.l 48(%sp), %d7/%a4 | %d7 = count, %a4 = src 73 movem.l 48(%sp), %d7/%a4 | %d7 = count, %a4 = src
30 movem.l (%a4), %a4-%a5 | %a4 = src[0], %a5 = src[1] 74 movem.l (%a4), %a4-%a5 | %a4 = src[0], %a5 = src[1]
31 lea.l crossfeed_data, %a1 75 lea.l crossfeed_data, %a1 | %a1 = &crossfeed_data
32 move.l (%a1)+, %a6 | a6 = direct gain 76 move.l (%a1)+, %d6 | %d6 = direct gain
33 movem.l 12(%a1), %d0-%d3 | fetch filter history samples 77 movem.l 12(%a1), %d0-%d3 | fetch filter history samples
34 move.l 132(%a1), %a0 | fetch delay line address 78 move.l 132(%a1), %a0 | fetch delay line address
35 movem.l (%a1), %a1-%a3 | load filter coefs 79 movem.l (%a1), %a1-%a3 | load filter coefs
80 lea.l crossfeed_data+136, %a6 | %a6 = delay line wrap limit
81 bra.b 20f | loop start | go to loop start point
36 /* Register usage in loop: 82 /* Register usage in loop:
37 * %a0 = delay_p, %a1..%a3 = b0, b1, a1 (filter coefs), 83 * %a0 = delay_p, %a1..%a3 = b0, b1, a1 (filter coefs),
38 * %a4 = src[0], %a5 = src[1], %a6 = direct gain, 84 * %a4 = buf[0], %a5 = buf[1],
85 * %a6 = delay line pointer wrap limit,
39 * %d0..%d3 = history 86 * %d0..%d3 = history
40 * %d4..%d6 = temp. 87 * %d4..%d5 = temp.
88 * %d6 = direct gain,
41 * %d7 = count 89 * %d7 = count
42 */ 90 */
43.cfloop: 9110: | loop |
44 mac.l %a2, %d0, 4(%a0), %d0, %acc0 | acc = b1*dr[n - 1] d0 = dr[n] 92 movclr.l %acc0, %d4 | write outputs
45 mac.l %a1, %d0 , %acc0 | acc += b0*dr[n] 93 move.l %d4, (%a4)+ | .
46 mac.l %a3, %d1, (%a4), %d4, %acc0 | acc += a1*y_l[n - 1], load L 94 movclr.l %acc1, %d5 | .
47 move.l %acc0, %d1 | get filtered delayed sample 95 move.l %d5, (%a5)+ | .
48 mac.l %a6, %d4, %acc0 | acc += gain*x_l[n] 9620: | loop start |
49 movclr.l %acc0, %d6 | 97 mac.l %a2, %d0, (%a0)+, %d0, %acc0 | %acc0 = b1*dl[n - 1], %d0 = dl[n]
50 move.l %d6, (%a4)+ | write result 98 mac.l %a1, %d0 , %acc0 | %acc0 += b0*dl[n]
51 99 mac.l %a3, %d1, (%a5), %d5, %acc0 | %acc0 += a1*y_r[n - 1], load R
52 mac.l %a2, %d2, (%a0), %d2, %acc0 | acc = b1*dl[n - 1], d2 = dl[n] 100 mac.l %a2, %d2, (%a0)+, %d2, %acc1 | %acc1 = b1*dr[n - 1], %d2 = dr[n]
53 mac.l %a1, %d2 , %acc0 | acc += b0*dl[n] 101 mac.l %a1, %d2 , %acc1 | %acc1 += b0*dr[n]
54 mac.l %a3, %d3, (%a5), %d5, %acc0 | acc += a1*y_r[n - 1], load R 102 mac.l %a3, %d3, (%a4), %d4, %acc1 | %acc1 += a1*y_l[n - 1], load L
55 movem.l %d4-%d5, (%a0) | save left & right inputs to delay line 103 movem.l %d4-%d5, -8(%a0) | save left & right inputs to delay line
56 move.l %acc0, %d3 | get filtered delayed sample 104 move.l %acc0, %d3 | get filtered delayed left sample (y_l[n])
57 mac.l %a6, %d5, %acc0 | acc += gain*x_r[n] 105 move.l %acc1, %d1 | get filtered delayed right sample (y_r[n])
58 lea.l 8(%a0), %a0 | increment delay pointer 106 mac.l %d6, %d4, %acc0 | %acc0 += gain*x_l[n]
59 movclr.l %acc0, %d6 | 107 mac.l %d6, %d5, %acc1 | %acc1 += gain*x_r[n]
60 move.l %d6, (%a5)+ | write result 108 cmp.l %a6, %a0 | wrap %a0 if passed end
61 109 bhs.b 30f | wrap buffer |
62 cmpa.l #crossfeed_data+136, %a0| wrap a0 if passed end 110 .word 0x51fb | tpf.l | trap the buffer wrap
63 bge.b .cfwrap | 11130: | wrap buffer | ...fwd taken branches more costly
64 .word 0x51fb | tpf.l - trap the buffer wrap 112 lea.l -104(%a0), %a0 | wrap it up
65.cfwrap: 113 subq.l #1, %d7 | --count > 0 ?
66 lea.l -104(%a0), %a0 | wrap 114 bgt.b 10b | loop | yes? do more
67 subq.l #1, %d7 | --count < 0 ? 115 movclr.l %acc0, %d4 | write last outputs
68 bgt.b .cfloop | 116 move.l %d4, (%a4) | .
117 movclr.l %acc1, %d5 | .
118 move.l %d5, (%a5) | .
69 lea.l crossfeed_data+16, %a1 | save data back to struct 119 lea.l crossfeed_data+16, %a1 | save data back to struct
70 movem.l %d0-%d3, (%a1) | ...history 120 movem.l %d0-%d3, (%a1) | ...history
71 move.l %a0, 120(%a1) | ...delay_p 121 move.l %a0, 120(%a1) | ...delay_p
72 movem.l (%sp), %d2-%d7/%a2-%a6 | restore all regs 122 movem.l (%sp), %d2-%d7/%a2-%a6 | restore all regs
73 lea.l 44(%sp), %sp 123 lea.l 44(%sp), %sp |
74 rts 124 rts |
75.cfend: 125 .size apply_crossfeed,.-apply_crossfeed
76 .size apply_crossfeed,.cfend-apply_crossfeed
77
78 126
79/**************************************************************************** 127/****************************************************************************
80 * int dsp_downsample(int count, struct dsp_data *data, 128 * int dsp_downsample(int count, struct dsp_data *data,
81 * in32_t *src[], int32_t *dst[]) 129 * in32_t *src[], int32_t *dst[])
82 */ 130 */
83 .section .text 131 .section .text
132 .align 2
84 .global dsp_downsample 133 .global dsp_downsample
85dsp_downsample: 134dsp_downsample:
86 lea.l -40(%sp), %sp | save non-clobberables 135 lea.l -40(%sp), %sp | save non-clobberables
@@ -92,7 +141,7 @@ dsp_downsample:
92 movem.l 4(%a0), %d3-%d4 | %d3 = ch = data->num_channels 141 movem.l 4(%a0), %d3-%d4 | %d3 = ch = data->num_channels
93 | %d4 = delta = data->resample_data.delta 142 | %d4 = delta = data->resample_data.delta
94 moveq.l #16, %d7 | %d7 = shift 143 moveq.l #16, %d7 | %d7 = shift
95.dschannel_loop: 14410: | channel loop |
96 move.l 12(%a0), %d5 | %d5 = phase = data->resample_data.phase 145 move.l 12(%a0), %d5 | %d5 = phase = data->resample_data.phase
97 move.l -4(%a1, %d3.l*4), %a3 | %a3 = s = src[ch-1] 146 move.l -4(%a1, %d3.l*4), %a3 | %a3 = s = src[ch-1]
98 move.l -4(%a2, %d3.l*4), %a4 | %a4 = d = dst[ch-1] 147 move.l -4(%a2, %d3.l*4), %a4 | %a4 = d = dst[ch-1]
@@ -102,15 +151,15 @@ dsp_downsample:
102 move.l %d5, %d6 | %d6 = pos = phase >> 16 151 move.l %d5, %d6 | %d6 = pos = phase >> 16
103 lsr.l %d7, %d6 | 152 lsr.l %d7, %d6 |
104 cmp.l %d2, %d6 | past end of samples? 153 cmp.l %d2, %d6 | past end of samples?
105 bge.b .dsloop_skip | yes? skip loop 154 bge.b 40f | skip resample loop| yes? skip loop
106 tst.l %d6 | need last sample of prev. frame? 155 tst.l %d6 | need last sample of prev. frame?
107 bne.b .dsloop | no? start main loop 156 bne.b 20f | resample loop | no? start main loop
108 move.l (%a3, %d6.l*4), %d1 | %d1 = s[pos] 157 move.l (%a3, %d6.l*4), %d1 | %d1 = s[pos]
109 bra.b .dsuse_last_start | start with last (last in %d0) 158 bra.b 30f | resample start last | start with last (last in %d0)
110.dsloop: 15920: | resample loop |
111 lea.l -4(%a3, %d6.l*4), %a5 | load s[pos-1] and s[pos] 160 lea.l -4(%a3, %d6.l*4), %a5 | load s[pos-1] and s[pos]
112 movem.l (%a5), %d0-%d1 | 161 movem.l (%a5), %d0-%d1 |
113.dsuse_last_start: 16230: | resample start last |
114 sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1] 163 sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1]
115 move.l %d0, %acc0 | %acc0 = previous sample 164 move.l %d0, %acc0 | %acc0 = previous sample
116 move.l %d5, %d0 | frac = (phase << 16) >> 1 165 move.l %d5, %d0 | frac = (phase << 16) >> 1
@@ -123,11 +172,11 @@ dsp_downsample:
123 movclr.l %acc0, %d0 | 172 movclr.l %acc0, %d0 |
124 move.l %d0, (%a4)+ | *d++ = %d0 173 move.l %d0, (%a4)+ | *d++ = %d0
125 cmp.l %d2, %d6 | pos < count? 174 cmp.l %d2, %d6 | pos < count?
126 blt.b .dsloop | yes? continue resampling 175 blt.b 20b | resample loop | yes? continue resampling
127.dsloop_skip: 17640: | skip resample loop |
128 subq.l #1, %d3 | ch > 0? 177 subq.l #1, %d3 | ch > 0?
129 bgt.b .dschannel_loop | yes? process next channel 178 bgt.b 10b | channel loop | yes? process next channel
130 asl.l %d7, %d2 | wrap phase to start of next frame 179 lsl.l %d7, %d2 | wrap phase to start of next frame
131 sub.l %d2, %d5 | data->resample_data.phase = 180 sub.l %d2, %d5 | data->resample_data.phase =
132 move.l %d5, 12(%a0) | ... phase - (count << 16) 181 move.l %d5, 12(%a0) | ... phase - (count << 16)
133 move.l %a4, %d0 | return d - d[0] 182 move.l %a4, %d0 | return d - d[0]
@@ -136,14 +185,14 @@ dsp_downsample:
136 movem.l (%sp), %d2-%d7/%a2-%a5 | restore non-clobberables 185 movem.l (%sp), %d2-%d7/%a2-%a5 | restore non-clobberables
137 lea.l 40(%sp), %sp | cleanup stack 186 lea.l 40(%sp), %sp | cleanup stack
138 rts | buh-bye 187 rts | buh-bye
139.dsend: 188 .size dsp_downsample,.-dsp_downsample
140 .size dsp_downsample,.dsend-dsp_downsample
141 189
142/**************************************************************************** 190/****************************************************************************
143 * int dsp_upsample(int count, struct dsp_data *dsp, 191 * int dsp_upsample(int count, struct dsp_data *dsp,
144 * in32_t *src[], int32_t *dst[]) 192 * int32_t *src[], int32_t *dst[])
145 */ 193 */
146 .section .text 194 .section .text
195 .align 2
147 .global dsp_upsample 196 .global dsp_upsample
148dsp_upsample: 197dsp_upsample:
149 lea.l -40(%sp), %sp | save non-clobberables 198 lea.l -40(%sp), %sp | save non-clobberables
@@ -154,47 +203,55 @@ dsp_upsample:
154 | %a2 = dst 203 | %a2 = dst
155 movem.l 4(%a0), %d3-%d4 | %d3 = ch = channels 204 movem.l 4(%a0), %d3-%d4 | %d3 = ch = channels
156 | %d4 = delta = data->resample_data.delta 205 | %d4 = delta = data->resample_data.delta
157 swap %d4 | swap delta to high word to use 206 swap %d4 | swap delta to high word to use...
158 | carries to increment position 207 | ...carries to increment position
159.uschannel_loop: 20810: | channel loop |
160 move.l 12(%a0), %d5 | %d5 = phase = data->resample_data.phase 209 move.l 12(%a0), %d5 | %d5 = phase = data->resample_data.phase
161 move.l -4(%a1, %d3.l*4), %a3 | %a3 = s = src[ch-1] 210 move.l -4(%a1, %d3.l*4), %a3 | %a3 = s = src[ch-1]
162 lea.l 12(%a0, %d3.l*4), %a4 | %a4 = &data->resample_data.last_sample[ch-1] 211 lea.l 12(%a0, %d3.l*4), %a4 | %a4 = &data->resample_data.last_sample[ch-1]
163 lea.l (%a3, %d2.l*4), %a5 | %a5 = src_end = &src[count] 212 lea.l -4(%a3, %d2.l*4), %a5 | %a5 = src_end = &src[count-1]
164 move.l (%a4), %d0 | %d0 = last = data->resample_data.last_sample[ch-1] 213 move.l (%a4), %d0 | %d0 = last = data->resample_data.last_sample[ch-1]
165 move.l -(%a5), (%a4) | data->resample_data.last_sample[ch-1] = s[count-1] 214 move.l (%a5), (%a4) | data->resample_data.last_sample[ch-1] = s[count-1]
166 move.l -4(%a2, %d3.l*4), %a4 | %a4 = d = dst[ch-1] 215 move.l -4(%a2, %d3.l*4), %a4 | %a4 = d = dst[ch-1]
216 move.l (%a3)+, %d1 | fetch first sample - might throw this...
217 | ...away later but we'll be preincremented
218 move.l %d1, %d6 | save sample value
219 sub.l %d0, %d1 | %d1 = diff = s[0] - last
167 swap %d5 | swap phase to high word to use 220 swap %d5 | swap phase to high word to use
168 | carries to increment position 221 | carries to increment position
169 move.l %d5, %d6 | %d6 = pos = phase >> 16 222 move.l %d5, %d7 | %d7 = pos = phase >> 16
170 clr.w %d5 | 223 clr.w %d5 |
171 eor.l %d5, %d6 | pos == 0? 224 eor.l %d5, %d7 | pos == 0?
172 beq.b .usstart_0 | no? transistion from down 225 beq.b 40f | loop start | yes? start loop
173 cmp.l %d2, %d6 | past end of samples? 226 cmp.l %d2, %d7 | past end of samples?
174 bge.b .usloop_skip | yes? skip loop 227 bge.b 50f | skip resample loop| yes? go to next channel and collect info
175 lea.l -4(%a3, %d6.l*4), %a3 | %a3 = s = &s[pos-1] (previous) 228 lea.l (%a3, %d7.l*4), %a3 | %a3 = s = &s[pos+1]
176 move.l (%a3)+, %d0 | %d0 = *s++ 229 movem.l -8(%a3), %d0-%d1 | %d0 = s[pos-1], %d1 = s[pos]
177 .word 0x51fa | tpf.w - trap next instruction 230 move.l %d1, %d6 | save sample value
178.usloop_1: 231 sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1]
232 bra.b 40f | loop start |
23320: | next sample loop |
179 move.l %d6, %d0 | move previous sample to %d0 234 move.l %d6, %d0 | move previous sample to %d0
180.usstart_0:
181 move.l (%a3)+, %d1 | fetch next sample 235 move.l (%a3)+, %d1 | fetch next sample
182 move.l %d1, %d6 | save sample value 236 move.l %d1, %d6 | save sample value
183 sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1] 237 sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1]
184.usloop_0: 23830: | same sample loop |
239 movclr.l %acc0, %d7 | %d7 = result
240 move.l %d7, (%a4)+ | *d++ = %d7
24140: | loop start |
185 lsr.l #1, %d5 | make phase into frac 242 lsr.l #1, %d5 | make phase into frac
243 move.l %d0, %acc0 | %acc0 = s[pos-1]
186 mac.l %d1, %d5, %acc0 | %acc0 = diff * frac 244 mac.l %d1, %d5, %acc0 | %acc0 = diff * frac
187 lsl.l #1, %d5 | restore frac to phase 245 lsl.l #1, %d5 | restore frac to phase
188 movclr.l %acc0, %d7 | %d7 = product
189 add.l %d0, %d7 | %d7 = last + product
190 move.l %d7, (%a4)+ | *d++ = %d7
191 add.l %d4, %d5 | phase += delta 246 add.l %d4, %d5 | phase += delta
192 bcc.b .usloop_0 | load next values? 247 bcc.b 30b | same sample loop | load next values?
193 cmp.l %a5, %a3 | src <= src_end? 248 cmp.l %a5, %a3 | src <= src_end?
194 ble.b .usloop_1 | yes? continue resampling 249 bls.b 20b | next sample loop | yes? continue resampling
195.usloop_skip: 250 movclr.l %acc0, %d7 | %d7 = result
251 move.l %d7, (%a4)+ | *d++ = %d7
25250: | skip resample loop |
196 subq.l #1, %d3 | ch > 0? 253 subq.l #1, %d3 | ch > 0?
197 bgt.b .uschannel_loop | yes? process next channel 254 bgt.b 10b | channel loop | yes? process next channel
198 swap %d5 | wrap phase to start of next frame 255 swap %d5 | wrap phase to start of next frame
199 move.l %d5, 12(%a0) | ...and save in data->resample_data.phase 256 move.l %d5, 12(%a0) | ...and save in data->resample_data.phase
200 move.l %a4, %d0 | return d - d[0] 257 move.l %a4, %d0 | return d - d[0]
@@ -203,12 +260,7 @@ dsp_upsample:
203 asr.l #2, %d0 | convert bytes->samples 260 asr.l #2, %d0 | convert bytes->samples
204 lea.l 40(%sp), %sp | cleanup stack 261 lea.l 40(%sp), %sp | cleanup stack
205 rts | buh-bye 262 rts | buh-bye
206.usend: 263 .size dsp_upsample,.-dsp_upsample
207 .size dsp_upsample,.usend-dsp_upsample
208
209/* These routines might benefit from burst transfers but we'll keep them
210 * small for now since they're rather light weight
211 */
212 264
213/**************************************************************************** 265/****************************************************************************
214 * void channels_process_sound_chan_mono(int count, int32_t *buf[]) 266 * void channels_process_sound_chan_mono(int count, int32_t *buf[])
@@ -216,31 +268,39 @@ dsp_upsample:
216 * Mix left and right channels 50/50 into a center channel. 268 * Mix left and right channels 50/50 into a center channel.
217 */ 269 */
218 .section .text 270 .section .text
271 .align 2
219 .global channels_process_sound_chan_mono 272 .global channels_process_sound_chan_mono
220channels_process_sound_chan_mono: 273channels_process_sound_chan_mono:
221 movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf 274 movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf
222 lea.l -12(%sp), %sp | save registers 275 lea.l -20(%sp), %sp | save registers
223 move.l %macsr, %d1 | 276 movem.l %d2-%d4/%a2-%a3, (%sp) |
224 movem.l %d1-%d3, (%sp) |
225 move.l #0xb0, %macsr | put emac in rounding fractional mode
226 movem.l (%a0), %a0-%a1 | get channel pointers 277 movem.l (%a0), %a0-%a1 | get channel pointers
278 move.l %a0, %a2 | use separate dst pointers since read
279 move.l %a1, %a3 | pointers run one ahead of write
227 move.l #0x40000000, %d3 | %d3 = 0.5 280 move.l #0x40000000, %d3 | %d3 = 0.5
2281: 281 move.l (%a0)+, %d1 | prime the input registers
229 move.l (%a0), %d1 | L = R = l/2 + r/2 282 move.l (%a1)+, %d2 |
230 mac.l %d1, %d3, (%a1), %d2, %acc0 | 283 mac.l %d1, %d3, (%a0)+, %d1, %acc0 |
231 mac.l %d2, %d3, %acc0 | 284 mac.l %d2, %d3, (%a1)+, %d2, %acc0 |
232 movclr.l %acc0, %d1 | 285 subq.l #1, %d0 |
233 move.l %d1, (%a0)+ | output to original buffer 286 ble.s 20f | loop done |
234 move.l %d1, (%a1)+ | 28710: | loop |
235 subq.l #1, %d0 | 288 movclr.l %acc0, %d4 | L = R = l/2 + r/2
236 bgt.s 1b | 289 mac.l %d1, %d3, (%a0)+, %d1, %acc0 |
237 movem.l (%sp), %d1-%d3 | restore registers 290 mac.l %d2, %d3, (%a1)+, %d2, %acc0 |
238 move.l %d1, %macsr | 291 move.l %d4, (%a2)+ | output to original buffer
239 lea.l 12(%sp), %sp | cleanup 292 move.l %d4, (%a3)+ |
240 rts 293 subq.l #1, %d0 |
241.cpmono_end: 294 bgt.s 10b | loop |
242 .size channels_process_sound_chan_mono, .cpmono_end-channels_process_sound_chan_mono 29520: | loop done |
243 296 movclr.l %acc0, %d4 | output last sample
297 move.l %d4, (%a2) |
298 move.l %d4, (%a3) |
299 movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers
300 lea.l 20(%sp), %sp | cleanup
301 rts |
302 .size channels_process_sound_chan_mono, \
303 .-channels_process_sound_chan_mono
244 304
245/**************************************************************************** 305/****************************************************************************
246 * void channels_process_sound_chan_custom(int count, int32_t *buf[]) 306 * void channels_process_sound_chan_custom(int count, int32_t *buf[])
@@ -248,34 +308,47 @@ channels_process_sound_chan_mono:
248 * Apply stereo width (narrowing/expanding) effect. 308 * Apply stereo width (narrowing/expanding) effect.
249 */ 309 */
250 .section .text 310 .section .text
311 .align 2
251 .global channels_process_sound_chan_custom 312 .global channels_process_sound_chan_custom
252channels_process_sound_chan_custom: 313channels_process_sound_chan_custom:
253 movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf 314 movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf
254 lea.l -16(%sp), %sp | save registers 315 lea.l -28(%sp), %sp | save registers
255 move.l %macsr, %d1 | 316 movem.l %d2-%d6/%a2-%a3, (%sp) |
256 movem.l %d1-%d4, (%sp) |
257 move.l #0xb0, %macsr | put emac in rounding fractional mode
258 movem.l (%a0), %a0-%a1 | get channel pointers 317 movem.l (%a0), %a0-%a1 | get channel pointers
318 move.l %a0, %a2 | use separate dst pointers since read
319 move.l %a1, %a3 | pointers run one ahead of write
259 move.l dsp_sw_gain, %d3 | load straight (mid) gain 320 move.l dsp_sw_gain, %d3 | load straight (mid) gain
260 move.l dsp_sw_cross, %d4 | load cross (side) gain 321 move.l dsp_sw_cross, %d4 | load cross (side) gain
2611: 322 move.l (%a0)+, %d1 | prime the input registers
262 move.l (%a0), %d1 | 323 move.l (%a1)+, %d2 |
263 mac.l %d1, %d3, (%a1), %d2, %acc0 | L = l*gain + r*cross 324 mac.l %d1, %d3 , %acc0 | L = l*gain + r*cross
264 mac.l %d1, %d4 , %acc1 | R = r*gain + l*cross 325 mac.l %d1, %d4, (%a0)+, %d1, %acc1 | R = r*gain + l*cross
265 mac.l %d2, %d4 , %acc0 | 326 mac.l %d2, %d4 , %acc0 |
266 mac.l %d2, %d3 , %acc1 | 327 mac.l %d2, %d3, (%a1)+, %d2, %acc1 |
267 movclr.l %acc0, %d1 |
268 movclr.l %acc1, %d2 |
269 move.l %d1, (%a0)+ |
270 move.l %d2, (%a1)+ |
271 subq.l #1, %d0 | 328 subq.l #1, %d0 |
272 bgt.s 1b | 329 ble.b 20f | loop done |
273 movem.l (%sp), %d1-%d4 | restore registers 33010: | loop |
274 move.l %d1, %macsr | 331 movclr.l %acc0, %d5 |
275 lea.l 16(%sp), %sp | cleanup 332 movclr.l %acc1, %d6 |
276 rts 33315: | loop start |
277.cpcustom_end: 334 mac.l %d1, %d3 , %acc0 | L = l*gain + r*cross
278 .size channels_process_sound_chan_custom, .cpcustom_end-channels_process_sound_chan_custom 335 mac.l %d1, %d4, (%a0)+, %d1, %acc1 | R = r*gain + l*cross
336 mac.l %d2, %d4 , %acc0 |
337 mac.l %d2, %d3, (%a1)+, %d2, %acc1 |
338 move.l %d5, (%a2)+ |
339 move.l %d6, (%a3)+ |
340 subq.l #1, %d0 |
341 bgt.s 10b | loop |
34220: | loop done |
343 movclr.l %acc0, %d5 | output last sample
344 movclr.l %acc1, %d6 |
345 move.l %d5, (%a2) |
346 move.l %d6, (%a3) |
347 movem.l (%sp), %d2-%d6/%a2-%a3 | restore registers
348 lea.l 28(%sp), %sp | cleanup
349 rts |
350 .size channels_process_sound_chan_custom, \
351 .-channels_process_sound_chan_custom
279 352
280/**************************************************************************** 353/****************************************************************************
281 * void channels_process_sound_chan_karaoke(int count, int32_t *buf[]) 354 * void channels_process_sound_chan_karaoke(int count, int32_t *buf[])
@@ -283,31 +356,42 @@ channels_process_sound_chan_custom:
283 * Separate channels into side channels. 356 * Separate channels into side channels.
284 */ 357 */
285 .section .text 358 .section .text
359 .align 2
286 .global channels_process_sound_chan_karaoke 360 .global channels_process_sound_chan_karaoke
287channels_process_sound_chan_karaoke: 361channels_process_sound_chan_karaoke:
288 movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf 362 movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf
289 lea.l -16(%sp), %sp | save registers 363 lea.l -20(%sp), %sp | save registers
290 move.l %macsr, %d1 | 364 movem.l %d2-%d4/%a2-%a3, (%sp) |
291 movem.l %d1-%d4, (%sp) | 365 movem.l (%a0), %a0-%a1 | get channel src pointers
292 move.l #0xb0, %macsr | put emac in rounding fractional mode 366 move.l %a0, %a2 | use separate dst pointers since read
293 movem.l (%a0), %a0-%a1 | get channel pointers 367 move.l %a1, %a3 | pointers run one ahead of write
294 move.l #0x40000000, %d4 | %d3 = 0.5 368 move.l #0x40000000, %d3 | %d3 = 0.5
2951: 369 move.l (%a0)+, %d1 | prime the input registers
296 move.l (%a0), %d1 | 370 move.l (%a1)+, %d2 |
297 msac.l %d1, %d4, (%a1), %d2, %acc0 | R = r/2 - l/2 371 mac.l %d1, %d3, (%a0)+, %d1, %acc0 | L = l/2 - r/2
298 mac.l %d2, %d4 , %acc0 | 372 msac.l %d2, %d3, (%a1)+, %d2, %acc0 |
299 movclr.l %acc0, %d1 | 373 subq.l #1, %d0 |
300 move.l %d1, (%a1)+ | 374 ble.b 20f | loop done |
301 neg.l %d1 | L = -R = -(r/2 - l/2) = l/2 - r/2 37510: | loop |
302 move.l %d1, (%a0)+ | 376 movclr.l %acc0, %d4 |
303 subq.l #1, %d0 | 377 mac.l %d1, %d3, (%a0)+, %d1, %acc0 | L = l/2 - r/2
304 bgt.s 1b | 378 msac.l %d2, %d3, (%a1)+, %d2, %acc0 |
305 movem.l (%sp), %d1-%d4 | restore registers 379 move.l %d4, (%a2)+ |
306 move.l %d1, %macsr | 380 neg.l %d4 | R = -L = -(l/2 - r/2) = r/2 - l/2
307 lea.l 16(%sp), %sp | cleanup 381 move.l %d4, (%a3)+ |
308 rts 382 subq.l #1, %d0 |
309.cpkaraoke_end: 383 bgt.s 10b | loop |
310 .size channels_process_sound_chan_karaoke, .cpkaraoke_end-channels_process_sound_chan_karaoke 38420: | loop done |
385 movclr.l %acc0, %d4 | output last sample
386 move.l %d4, (%a2) |
387 neg.l %d4 | R = -L = -(l/2 - r/2) = r/2 - l/2
388 move.l %d4, (%a3) |
389 movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers
390 lea.l 20(%sp), %sp | cleanup
391 rts |
392 .size channels_process_sound_chan_karaoke, \
393 .-channels_process_sound_chan_karaoke
394
311/**************************************************************************** 395/****************************************************************************
312 * void sample_output_stereo(int count, struct dsp_data *data, 396 * void sample_output_stereo(int count, struct dsp_data *data,
313 * int32_t *src[], int16_t *dst) 397 * int32_t *src[], int16_t *dst)
@@ -329,6 +413,7 @@ channels_process_sound_chan_karaoke:
329 * 413 *
330 */ 414 */
331 .section .text 415 .section .text
416 .align 2
332 .global sample_output_stereo 417 .global sample_output_stereo
333sample_output_stereo: 418sample_output_stereo:
334 lea.l -44(%sp), %sp | save registers 419 lea.l -44(%sp), %sp | save registers
@@ -348,11 +433,11 @@ sample_output_stereo:
348 add.l %a4, %d0 | 433 add.l %a4, %d0 |
349 and.l #0xfffffff0, %d0 | 434 and.l #0xfffffff0, %d0 |
350 cmp.l %a0, %d0 | at least a full line? 435 cmp.l %a0, %d0 | at least a full line?
351 bhi.w .sos_longloop_1_start | no? jump to trailing longword 436 bhi.w 40f | long loop 1 start | no? do as trailing longwords
352 sub.l #16, %d0 | %d1 = first line bound 437 sub.l #16, %d0 | %d1 = first line bound
353 cmp.l %a4, %d0 | any leading longwords? 438 cmp.l %a4, %d0 | any leading longwords?
354 bls.b .sos_lineloop_start | no? jump to line loop 439 bls.b 20f | line loop start | no? start line loop
355.sos_longloop_0: 44010: | long loop 0 |
356 move.l (%a2)+, %d1 | read longword from L and R 441 move.l (%a2)+, %d1 | read longword from L and R
357 mac.l %d1, %a1, (%a3)+, %d2, %acc0 | shift L to high word 442 mac.l %d1, %a1, (%a3)+, %d2, %acc0 | shift L to high word
358 mac.l %d2, %a1, %acc1 | shift R to high word 443 mac.l %d2, %a1, %acc1 | shift R to high word
@@ -362,10 +447,10 @@ sample_output_stereo:
362 move.w %d2, %d1 | interleave MS 16 bits of each 447 move.w %d2, %d1 | interleave MS 16 bits of each
363 move.l %d1, (%a4)+ | ...and write both 448 move.l %d1, (%a4)+ | ...and write both
364 cmp.l %a4, %d0 | 449 cmp.l %a4, %d0 |
365 bhi.b .sos_longloop_0 | 450 bhi.b 10b | long loop 0 |
366.sos_lineloop_start: 45120: | line loop start |
367 lea.l -12(%a0), %a5 | %a5 = at or just before last line bound 452 lea.l -12(%a0), %a5 | %a5 = at or just before last line bound
368.sos_lineloop: 45330: | line loop |
369 move.l (%a3)+, %d4 | get next 4 R samples and scale 454 move.l (%a3)+, %d4 | get next 4 R samples and scale
370 mac.l %d4, %a1, (%a3)+, %d5, %acc0 | with saturation 455 mac.l %d4, %a1, (%a3)+, %d5, %acc0 | with saturation
371 mac.l %d5, %a1, (%a3)+, %d6, %acc1 | 456 mac.l %d5, %a1, (%a3)+, %d6, %acc1 |
@@ -394,11 +479,11 @@ sample_output_stereo:
394 move.w %d7, %d3 | 479 move.w %d7, %d3 |
395 movem.l %d0-%d3, -16(%a4) | write four stereo samples 480 movem.l %d0-%d3, -16(%a4) | write four stereo samples
396 cmp.l %a4, %a5 | 481 cmp.l %a4, %a5 |
397 bhi.b .sos_lineloop | 482 bhi.b 30b | line loop |
398.sos_longloop_1_start: 48340: | long loop 1 start |
399 cmp.l %a4, %a0 | any longwords left? 484 cmp.l %a4, %a0 | any longwords left?
400 bls.b .sos_done | no? finished. 485 bls.b 60f | output end | no? stop
401.sos_longloop_1: 48650: | long loop 1 |
402 move.l (%a2)+, %d1 | handle trailing longwords 487 move.l (%a2)+, %d1 | handle trailing longwords
403 mac.l %d1, %a1, (%a3)+, %d2, %acc0 | the same way as leading ones 488 mac.l %d1, %a1, (%a3)+, %d2, %acc0 | the same way as leading ones
404 mac.l %d2, %a1, %acc1 | 489 mac.l %d2, %a1, %acc1 |
@@ -408,14 +493,13 @@ sample_output_stereo:
408 move.w %d2, %d1 | 493 move.w %d2, %d1 |
409 move.l %d1, (%a4)+ | 494 move.l %d1, (%a4)+ |
410 cmp.l %a4, %a0 | 495 cmp.l %a4, %a0 |
411 bhi.b .sos_longloop_1 | 496 bhi.b 50b | long loop 1
412.sos_done: 49760: | output end |
413 movem.l (%sp), %d1-%d7/%a2-%a5 | restore registers 498 movem.l (%sp), %d1-%d7/%a2-%a5 | restore registers
414 move.l %d1, %macsr | 499 move.l %d1, %macsr |
415 lea.l 44(%sp), %sp | cleanup 500 lea.l 44(%sp), %sp | cleanup
416 rts | 501 rts |
417.sos_end: 502 .size sample_output_stereo, .-sample_output_stereo
418 .size sample_output_stereo, .sos_end-sample_output_stereo
419 503
420/**************************************************************************** 504/****************************************************************************
421 * void sample_output_mono(int count, struct dsp_data *data, 505 * void sample_output_mono(int count, struct dsp_data *data,
@@ -424,6 +508,7 @@ sample_output_stereo:
424 * Same treatment as sample_output_stereo but for one channel. 508 * Same treatment as sample_output_stereo but for one channel.
425 */ 509 */
426 .section .text 510 .section .text
511 .align 2
427 .global sample_output_mono 512 .global sample_output_mono
428sample_output_mono: 513sample_output_mono:
429 lea.l -28(%sp), %sp | save registers 514 lea.l -28(%sp), %sp | save registers
@@ -442,11 +527,11 @@ sample_output_mono:
442 add.l %a3, %d0 | 527 add.l %a3, %d0 |
443 and.l #0xfffffff0, %d0 | 528 and.l #0xfffffff0, %d0 |
444 cmp.l %a0, %d0 | at least a full line? 529 cmp.l %a0, %d0 | at least a full line?
445 bhi.w .som_longloop_1_start | no? jump to trailing longword 530 bhi.w 40f | long loop 1 start | no? do as trailing longwords
446 sub.l #16, %d0 | %d1 = first line bound 531 sub.l #16, %d0 | %d1 = first line bound
447 cmp.l %a3, %d0 | any leading longwords? 532 cmp.l %a3, %d0 | any leading longwords?
448 bls.b .som_lineloop_start | no? jump to line loop 533 bls.b 20f | line loop start | no? start line loop
449.som_longloop_0: 53410: | long loop 0 |
450 move.l (%a2)+, %d1 | read longword from L and R 535 move.l (%a2)+, %d1 | read longword from L and R
451 mac.l %d1, %d5, %acc0 | shift L to high word 536 mac.l %d1, %d5, %acc0 | shift L to high word
452 movclr.l %acc0, %d1 | get possibly saturated results 537 movclr.l %acc0, %d1 | get possibly saturated results
@@ -455,10 +540,10 @@ sample_output_mono:
455 move.w %d2, %d1 | duplicate single channel into 540 move.w %d2, %d1 | duplicate single channel into
456 move.l %d1, (%a3)+ | L and R 541 move.l %d1, (%a3)+ | L and R
457 cmp.l %a3, %d0 | 542 cmp.l %a3, %d0 |
458 bhi.b .som_longloop_0 | 543 bhi.b 10b | long loop 0 |
459.som_lineloop_start: 54420: | line loop start |
460 lea.l -12(%a0), %a1 | %a1 = at or just before last line bound 545 lea.l -12(%a0), %a1 | %a1 = at or just before last line bound
461.som_lineloop: 54630: | line loop |
462 move.l (%a2)+, %d0 | get next 4 L samples and scale 547 move.l (%a2)+, %d0 | get next 4 L samples and scale
463 mac.l %d0, %d5, (%a2)+, %d1, %acc0 | with saturation 548 mac.l %d0, %d5, (%a2)+, %d1, %acc0 | with saturation
464 mac.l %d1, %d5, (%a2)+, %d2, %acc1 | 549 mac.l %d1, %d5, (%a2)+, %d2, %acc1 |
@@ -483,11 +568,11 @@ sample_output_mono:
483 move.w %d4, %d3 | 568 move.w %d4, %d3 |
484 movem.l %d0-%d3, -16(%a3) | write four stereo samples 569 movem.l %d0-%d3, -16(%a3) | write four stereo samples
485 cmp.l %a3, %a1 | 570 cmp.l %a3, %a1 |
486 bhi.b .som_lineloop | 571 bhi.b 30b | line loop |
487.som_longloop_1_start: 57240: | long loop 1 start |
488 cmp.l %a3, %a0 | any longwords left? 573 cmp.l %a3, %a0 | any longwords left?
489 bls.b .som_done | no? finished. 574 bls.b 60f | output end | no? stop
490.som_longloop_1: 57550: | loop loop 1 |
491 move.l (%a2)+, %d1 | handle trailing longwords 576 move.l (%a2)+, %d1 | handle trailing longwords
492 mac.l %d1, %d5, %acc0 | the same way as leading ones 577 mac.l %d1, %d5, %acc0 | the same way as leading ones
493 movclr.l %acc0, %d1 | 578 movclr.l %acc0, %d1 |
@@ -496,11 +581,10 @@ sample_output_mono:
496 move.w %d2, %d1 | 581 move.w %d2, %d1 |
497 move.l %d1, (%a3)+ | 582 move.l %d1, (%a3)+ |
498 cmp.l %a3, %a0 | 583 cmp.l %a3, %a0 |
499 bhi.b .som_longloop_1 | 584 bhi.b 50b | long loop 1 |
500.som_done: 58560: | output end |
501 movem.l (%sp), %d1-%d5/%a2-%a3 | restore registers 586 movem.l (%sp), %d1-%d5/%a2-%a3 | restore registers
502 move.l %d1, %macsr | 587 move.l %d1, %macsr |
503 lea.l 28(%sp), %sp | cleanup 588 lea.l 28(%sp), %sp | cleanup
504 rts | 589 rts |
505.som_end: 590 .size sample_output_mono, .-sample_output_mono
506 .size sample_output_mono, .som_end-sample_output_mono