diff options
Diffstat (limited to 'apps/dsp_cf.S')
-rw-r--r-- | apps/dsp_cf.S | 424 |
1 files changed, 254 insertions, 170 deletions
diff --git a/apps/dsp_cf.S b/apps/dsp_cf.S index af9ac1fa4b..e5d3ee8c55 100644 --- a/apps/dsp_cf.S +++ b/apps/dsp_cf.S | |||
@@ -19,68 +19,117 @@ | |||
19 | ****************************************************************************/ | 19 | ****************************************************************************/ |
20 | 20 | ||
21 | /**************************************************************************** | 21 | /**************************************************************************** |
22 | * void apply_crossfeed(int count, int32_t *src[]) | 22 | * void dsp_apply_gain(int count, struct dsp_data *data, int32_t *buf[]) |
23 | */ | 23 | */ |
24 | .section .text | 24 | .section .text |
25 | .align 2 | ||
26 | .global dsp_apply_gain | ||
27 | dsp_apply_gain: | ||
28 | lea.l -20(%sp), %sp | save registers | ||
29 | movem.l %d2-%d4/%a2-%a3, (%sp) | | ||
30 | movem.l 28(%sp), %a0-%a1 | %a0 = data, | ||
31 | | %a1 = buf | ||
32 | move.l 4(%a0), %d1 | %d1 = data->num_channels | ||
33 | move.l 32(%a0), %a0 | %a0 = data->gain (in s8.23) | ||
34 | 10: | channel loop | | ||
35 | move.l 24(%sp), %d0 | %d0 = count | ||
36 | move.l -4(%a1, %d1.l*4), %a2 | %a2 = s = buf[ch-1] | ||
37 | move.l %a2, %a3 | %a3 = d = s | ||
38 | move.l (%a2)+, %d2 | %d2 = *s++, | ||
39 | mac.l %a0, %d2, (%a2)+, %d2, %acc0 | %acc0 = S(n)*gain, load S(n+1) | ||
40 | subq.l #1, %d0 | --count > 0 ? : effectively n++ | ||
41 | ble.b 30f | loop done | no? finish up | ||
42 | 20: | loop | | ||
43 | move.l %accext01, %d4 | fetch S(n-1)[7:0] | ||
44 | movclr.l %acc0, %d3 | fetch S(n-1)[40:8] in %d5[31:0] | ||
45 | asl.l #8, %d3 | *s++ = (S(n-1)[40:8] << 8) | S(n-1)[7:0] | ||
46 | mac.l %a0, %d2, (%a2)+, %d2, %acc0 | %acc0 = S(n)*gain, load S(n+1) | ||
47 | move.b %d4, %d3 | | ||
48 | move.l %d3, (%a3)+ | | ||
49 | subq.l #1, %d0 | --count > 0 ? : effectively n++ | ||
50 | bgt.b 20b | loop | yes? do more samples | ||
51 | 30: | loop done | | ||
52 | move.l %accext01, %d4 | fetch S(n-1)[7:0] | ||
53 | movclr.l %acc0, %d3 | fetch S(n-1)[40:8] in %d5[31:0] | ||
54 | asl.l #8, %d3 | *s = (S(n-1)[40:8] << 8) | S(n-1)[7:0] | ||
55 | move.b %d4, %d3 | | ||
56 | move.l %d3, (%a3) | | ||
57 | subq.l #1, %d1 | next channel | ||
58 | bgt.b 10b | channel loop | | ||
59 | movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers | ||
60 | lea.l 20(%sp), %sp | cleanup stack | ||
61 | rts | | ||
62 | .size dsp_apply_gain,.-dsp_apply_gain | ||
63 | |||
64 | /**************************************************************************** | ||
65 | * void apply_crossfeed(int count, int32_t *buf[]) | ||
66 | */ | ||
67 | .section .text | ||
68 | .align 2 | ||
25 | .global apply_crossfeed | 69 | .global apply_crossfeed |
26 | apply_crossfeed: | 70 | apply_crossfeed: |
27 | lea.l -44(%sp), %sp | 71 | lea.l -44(%sp), %sp | |
28 | movem.l %d2-%d7/%a2-%a6, (%sp) | save all regs | 72 | movem.l %d2-%d7/%a2-%a6, (%sp) | save all regs |
29 | movem.l 48(%sp), %d7/%a4 | %d7 = count, %a4 = src | 73 | movem.l 48(%sp), %d7/%a4 | %d7 = count, %a4 = src |
30 | movem.l (%a4), %a4-%a5 | %a4 = src[0], %a5 = src[1] | 74 | movem.l (%a4), %a4-%a5 | %a4 = src[0], %a5 = src[1] |
31 | lea.l crossfeed_data, %a1 | 75 | lea.l crossfeed_data, %a1 | %a1 = &crossfeed_data |
32 | move.l (%a1)+, %a6 | a6 = direct gain | 76 | move.l (%a1)+, %d6 | %d6 = direct gain |
33 | movem.l 12(%a1), %d0-%d3 | fetch filter history samples | 77 | movem.l 12(%a1), %d0-%d3 | fetch filter history samples |
34 | move.l 132(%a1), %a0 | fetch delay line address | 78 | move.l 132(%a1), %a0 | fetch delay line address |
35 | movem.l (%a1), %a1-%a3 | load filter coefs | 79 | movem.l (%a1), %a1-%a3 | load filter coefs |
80 | lea.l crossfeed_data+136, %a6 | %a6 = delay line wrap limit | ||
81 | bra.b 20f | loop start | go to loop start point | ||
36 | /* Register usage in loop: | 82 | /* Register usage in loop: |
37 | * %a0 = delay_p, %a1..%a3 = b0, b1, a1 (filter coefs), | 83 | * %a0 = delay_p, %a1..%a3 = b0, b1, a1 (filter coefs), |
38 | * %a4 = src[0], %a5 = src[1], %a6 = direct gain, | 84 | * %a4 = buf[0], %a5 = buf[1], |
85 | * %a6 = delay line pointer wrap limit, | ||
39 | * %d0..%d3 = history | 86 | * %d0..%d3 = history |
40 | * %d4..%d6 = temp. | 87 | * %d4..%d5 = temp. |
88 | * %d6 = direct gain, | ||
41 | * %d7 = count | 89 | * %d7 = count |
42 | */ | 90 | */ |
43 | .cfloop: | 91 | 10: | loop | |
44 | mac.l %a2, %d0, 4(%a0), %d0, %acc0 | acc = b1*dr[n - 1] d0 = dr[n] | 92 | movclr.l %acc0, %d4 | write outputs |
45 | mac.l %a1, %d0 , %acc0 | acc += b0*dr[n] | 93 | move.l %d4, (%a4)+ | . |
46 | mac.l %a3, %d1, (%a4), %d4, %acc0 | acc += a1*y_l[n - 1], load L | 94 | movclr.l %acc1, %d5 | . |
47 | move.l %acc0, %d1 | get filtered delayed sample | 95 | move.l %d5, (%a5)+ | . |
48 | mac.l %a6, %d4, %acc0 | acc += gain*x_l[n] | 96 | 20: | loop start | |
49 | movclr.l %acc0, %d6 | | 97 | mac.l %a2, %d0, (%a0)+, %d0, %acc0 | %acc0 = b1*dl[n - 1], %d0 = dl[n] |
50 | move.l %d6, (%a4)+ | write result | 98 | mac.l %a1, %d0 , %acc0 | %acc0 += b0*dl[n] |
51 | 99 | mac.l %a3, %d1, (%a5), %d5, %acc0 | %acc0 += a1*y_r[n - 1], load R | |
52 | mac.l %a2, %d2, (%a0), %d2, %acc0 | acc = b1*dl[n - 1], d2 = dl[n] | 100 | mac.l %a2, %d2, (%a0)+, %d2, %acc1 | %acc1 = b1*dr[n - 1], %d2 = dr[n] |
53 | mac.l %a1, %d2 , %acc0 | acc += b0*dl[n] | 101 | mac.l %a1, %d2 , %acc1 | %acc1 += b0*dr[n] |
54 | mac.l %a3, %d3, (%a5), %d5, %acc0 | acc += a1*y_r[n - 1], load R | 102 | mac.l %a3, %d3, (%a4), %d4, %acc1 | %acc1 += a1*y_l[n - 1], load L |
55 | movem.l %d4-%d5, (%a0) | save left & right inputs to delay line | 103 | movem.l %d4-%d5, -8(%a0) | save left & right inputs to delay line |
56 | move.l %acc0, %d3 | get filtered delayed sample | 104 | move.l %acc0, %d3 | get filtered delayed left sample (y_l[n]) |
57 | mac.l %a6, %d5, %acc0 | acc += gain*x_r[n] | 105 | move.l %acc1, %d1 | get filtered delayed right sample (y_r[n]) |
58 | lea.l 8(%a0), %a0 | increment delay pointer | 106 | mac.l %d6, %d4, %acc0 | %acc0 += gain*x_l[n] |
59 | movclr.l %acc0, %d6 | | 107 | mac.l %d6, %d5, %acc1 | %acc1 += gain*x_r[n] |
60 | move.l %d6, (%a5)+ | write result | 108 | cmp.l %a6, %a0 | wrap %a0 if passed end |
61 | 109 | bhs.b 30f | wrap buffer | | |
62 | cmpa.l #crossfeed_data+136, %a0| wrap a0 if passed end | 110 | .word 0x51fb | tpf.l | trap the buffer wrap |
63 | bge.b .cfwrap | | 111 | 30: | wrap buffer | ...fwd taken branches more costly |
64 | .word 0x51fb | tpf.l - trap the buffer wrap | 112 | lea.l -104(%a0), %a0 | wrap it up |
65 | .cfwrap: | 113 | subq.l #1, %d7 | --count > 0 ? |
66 | lea.l -104(%a0), %a0 | wrap | 114 | bgt.b 10b | loop | yes? do more |
67 | subq.l #1, %d7 | --count < 0 ? | 115 | movclr.l %acc0, %d4 | write last outputs |
68 | bgt.b .cfloop | | 116 | move.l %d4, (%a4) | . |
117 | movclr.l %acc1, %d5 | . | ||
118 | move.l %d5, (%a5) | . | ||
69 | lea.l crossfeed_data+16, %a1 | save data back to struct | 119 | lea.l crossfeed_data+16, %a1 | save data back to struct |
70 | movem.l %d0-%d3, (%a1) | ...history | 120 | movem.l %d0-%d3, (%a1) | ...history |
71 | move.l %a0, 120(%a1) | ...delay_p | 121 | move.l %a0, 120(%a1) | ...delay_p |
72 | movem.l (%sp), %d2-%d7/%a2-%a6 | restore all regs | 122 | movem.l (%sp), %d2-%d7/%a2-%a6 | restore all regs |
73 | lea.l 44(%sp), %sp | 123 | lea.l 44(%sp), %sp | |
74 | rts | 124 | rts | |
75 | .cfend: | 125 | .size apply_crossfeed,.-apply_crossfeed |
76 | .size apply_crossfeed,.cfend-apply_crossfeed | ||
77 | |||
78 | 126 | ||
79 | /**************************************************************************** | 127 | /**************************************************************************** |
80 | * int dsp_downsample(int count, struct dsp_data *data, | 128 | * int dsp_downsample(int count, struct dsp_data *data, |
81 | * in32_t *src[], int32_t *dst[]) | 129 | * in32_t *src[], int32_t *dst[]) |
82 | */ | 130 | */ |
83 | .section .text | 131 | .section .text |
132 | .align 2 | ||
84 | .global dsp_downsample | 133 | .global dsp_downsample |
85 | dsp_downsample: | 134 | dsp_downsample: |
86 | lea.l -40(%sp), %sp | save non-clobberables | 135 | lea.l -40(%sp), %sp | save non-clobberables |
@@ -92,7 +141,7 @@ dsp_downsample: | |||
92 | movem.l 4(%a0), %d3-%d4 | %d3 = ch = data->num_channels | 141 | movem.l 4(%a0), %d3-%d4 | %d3 = ch = data->num_channels |
93 | | %d4 = delta = data->resample_data.delta | 142 | | %d4 = delta = data->resample_data.delta |
94 | moveq.l #16, %d7 | %d7 = shift | 143 | moveq.l #16, %d7 | %d7 = shift |
95 | .dschannel_loop: | 144 | 10: | channel loop | |
96 | move.l 12(%a0), %d5 | %d5 = phase = data->resample_data.phase | 145 | move.l 12(%a0), %d5 | %d5 = phase = data->resample_data.phase |
97 | move.l -4(%a1, %d3.l*4), %a3 | %a3 = s = src[ch-1] | 146 | move.l -4(%a1, %d3.l*4), %a3 | %a3 = s = src[ch-1] |
98 | move.l -4(%a2, %d3.l*4), %a4 | %a4 = d = dst[ch-1] | 147 | move.l -4(%a2, %d3.l*4), %a4 | %a4 = d = dst[ch-1] |
@@ -102,15 +151,15 @@ dsp_downsample: | |||
102 | move.l %d5, %d6 | %d6 = pos = phase >> 16 | 151 | move.l %d5, %d6 | %d6 = pos = phase >> 16 |
103 | lsr.l %d7, %d6 | | 152 | lsr.l %d7, %d6 | |
104 | cmp.l %d2, %d6 | past end of samples? | 153 | cmp.l %d2, %d6 | past end of samples? |
105 | bge.b .dsloop_skip | yes? skip loop | 154 | bge.b 40f | skip resample loop| yes? skip loop |
106 | tst.l %d6 | need last sample of prev. frame? | 155 | tst.l %d6 | need last sample of prev. frame? |
107 | bne.b .dsloop | no? start main loop | 156 | bne.b 20f | resample loop | no? start main loop |
108 | move.l (%a3, %d6.l*4), %d1 | %d1 = s[pos] | 157 | move.l (%a3, %d6.l*4), %d1 | %d1 = s[pos] |
109 | bra.b .dsuse_last_start | start with last (last in %d0) | 158 | bra.b 30f | resample start last | start with last (last in %d0) |
110 | .dsloop: | 159 | 20: | resample loop | |
111 | lea.l -4(%a3, %d6.l*4), %a5 | load s[pos-1] and s[pos] | 160 | lea.l -4(%a3, %d6.l*4), %a5 | load s[pos-1] and s[pos] |
112 | movem.l (%a5), %d0-%d1 | | 161 | movem.l (%a5), %d0-%d1 | |
113 | .dsuse_last_start: | 162 | 30: | resample start last | |
114 | sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1] | 163 | sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1] |
115 | move.l %d0, %acc0 | %acc0 = previous sample | 164 | move.l %d0, %acc0 | %acc0 = previous sample |
116 | move.l %d5, %d0 | frac = (phase << 16) >> 1 | 165 | move.l %d5, %d0 | frac = (phase << 16) >> 1 |
@@ -123,11 +172,11 @@ dsp_downsample: | |||
123 | movclr.l %acc0, %d0 | | 172 | movclr.l %acc0, %d0 | |
124 | move.l %d0, (%a4)+ | *d++ = %d0 | 173 | move.l %d0, (%a4)+ | *d++ = %d0 |
125 | cmp.l %d2, %d6 | pos < count? | 174 | cmp.l %d2, %d6 | pos < count? |
126 | blt.b .dsloop | yes? continue resampling | 175 | blt.b 20b | resample loop | yes? continue resampling |
127 | .dsloop_skip: | 176 | 40: | skip resample loop | |
128 | subq.l #1, %d3 | ch > 0? | 177 | subq.l #1, %d3 | ch > 0? |
129 | bgt.b .dschannel_loop | yes? process next channel | 178 | bgt.b 10b | channel loop | yes? process next channel |
130 | asl.l %d7, %d2 | wrap phase to start of next frame | 179 | lsl.l %d7, %d2 | wrap phase to start of next frame |
131 | sub.l %d2, %d5 | data->resample_data.phase = | 180 | sub.l %d2, %d5 | data->resample_data.phase = |
132 | move.l %d5, 12(%a0) | ... phase - (count << 16) | 181 | move.l %d5, 12(%a0) | ... phase - (count << 16) |
133 | move.l %a4, %d0 | return d - d[0] | 182 | move.l %a4, %d0 | return d - d[0] |
@@ -136,14 +185,14 @@ dsp_downsample: | |||
136 | movem.l (%sp), %d2-%d7/%a2-%a5 | restore non-clobberables | 185 | movem.l (%sp), %d2-%d7/%a2-%a5 | restore non-clobberables |
137 | lea.l 40(%sp), %sp | cleanup stack | 186 | lea.l 40(%sp), %sp | cleanup stack |
138 | rts | buh-bye | 187 | rts | buh-bye |
139 | .dsend: | 188 | .size dsp_downsample,.-dsp_downsample |
140 | .size dsp_downsample,.dsend-dsp_downsample | ||
141 | 189 | ||
142 | /**************************************************************************** | 190 | /**************************************************************************** |
143 | * int dsp_upsample(int count, struct dsp_data *dsp, | 191 | * int dsp_upsample(int count, struct dsp_data *dsp, |
144 | * in32_t *src[], int32_t *dst[]) | 192 | * int32_t *src[], int32_t *dst[]) |
145 | */ | 193 | */ |
146 | .section .text | 194 | .section .text |
195 | .align 2 | ||
147 | .global dsp_upsample | 196 | .global dsp_upsample |
148 | dsp_upsample: | 197 | dsp_upsample: |
149 | lea.l -40(%sp), %sp | save non-clobberables | 198 | lea.l -40(%sp), %sp | save non-clobberables |
@@ -154,47 +203,55 @@ dsp_upsample: | |||
154 | | %a2 = dst | 203 | | %a2 = dst |
155 | movem.l 4(%a0), %d3-%d4 | %d3 = ch = channels | 204 | movem.l 4(%a0), %d3-%d4 | %d3 = ch = channels |
156 | | %d4 = delta = data->resample_data.delta | 205 | | %d4 = delta = data->resample_data.delta |
157 | swap %d4 | swap delta to high word to use | 206 | swap %d4 | swap delta to high word to use... |
158 | | carries to increment position | 207 | | ...carries to increment position |
159 | .uschannel_loop: | 208 | 10: | channel loop | |
160 | move.l 12(%a0), %d5 | %d5 = phase = data->resample_data.phase | 209 | move.l 12(%a0), %d5 | %d5 = phase = data->resample_data.phase |
161 | move.l -4(%a1, %d3.l*4), %a3 | %a3 = s = src[ch-1] | 210 | move.l -4(%a1, %d3.l*4), %a3 | %a3 = s = src[ch-1] |
162 | lea.l 12(%a0, %d3.l*4), %a4 | %a4 = &data->resample_data.last_sample[ch-1] | 211 | lea.l 12(%a0, %d3.l*4), %a4 | %a4 = &data->resample_data.last_sample[ch-1] |
163 | lea.l (%a3, %d2.l*4), %a5 | %a5 = src_end = &src[count] | 212 | lea.l -4(%a3, %d2.l*4), %a5 | %a5 = src_end = &src[count-1] |
164 | move.l (%a4), %d0 | %d0 = last = data->resample_data.last_sample[ch-1] | 213 | move.l (%a4), %d0 | %d0 = last = data->resample_data.last_sample[ch-1] |
165 | move.l -(%a5), (%a4) | data->resample_data.last_sample[ch-1] = s[count-1] | 214 | move.l (%a5), (%a4) | data->resample_data.last_sample[ch-1] = s[count-1] |
166 | move.l -4(%a2, %d3.l*4), %a4 | %a4 = d = dst[ch-1] | 215 | move.l -4(%a2, %d3.l*4), %a4 | %a4 = d = dst[ch-1] |
216 | move.l (%a3)+, %d1 | fetch first sample - might throw this... | ||
217 | | ...away later but we'll be preincremented | ||
218 | move.l %d1, %d6 | save sample value | ||
219 | sub.l %d0, %d1 | %d1 = diff = s[0] - last | ||
167 | swap %d5 | swap phase to high word to use | 220 | swap %d5 | swap phase to high word to use |
168 | | carries to increment position | 221 | | carries to increment position |
169 | move.l %d5, %d6 | %d6 = pos = phase >> 16 | 222 | move.l %d5, %d7 | %d7 = pos = phase >> 16 |
170 | clr.w %d5 | | 223 | clr.w %d5 | |
171 | eor.l %d5, %d6 | pos == 0? | 224 | eor.l %d5, %d7 | pos == 0? |
172 | beq.b .usstart_0 | no? transistion from down | 225 | beq.b 40f | loop start | yes? start loop |
173 | cmp.l %d2, %d6 | past end of samples? | 226 | cmp.l %d2, %d7 | past end of samples? |
174 | bge.b .usloop_skip | yes? skip loop | 227 | bge.b 50f | skip resample loop| yes? go to next channel and collect info |
175 | lea.l -4(%a3, %d6.l*4), %a3 | %a3 = s = &s[pos-1] (previous) | 228 | lea.l (%a3, %d7.l*4), %a3 | %a3 = s = &s[pos+1] |
176 | move.l (%a3)+, %d0 | %d0 = *s++ | 229 | movem.l -8(%a3), %d0-%d1 | %d0 = s[pos-1], %d1 = s[pos] |
177 | .word 0x51fa | tpf.w - trap next instruction | 230 | move.l %d1, %d6 | save sample value |
178 | .usloop_1: | 231 | sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1] |
232 | bra.b 40f | loop start | | ||
233 | 20: | next sample loop | | ||
179 | move.l %d6, %d0 | move previous sample to %d0 | 234 | move.l %d6, %d0 | move previous sample to %d0 |
180 | .usstart_0: | ||
181 | move.l (%a3)+, %d1 | fetch next sample | 235 | move.l (%a3)+, %d1 | fetch next sample |
182 | move.l %d1, %d6 | save sample value | 236 | move.l %d1, %d6 | save sample value |
183 | sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1] | 237 | sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1] |
184 | .usloop_0: | 238 | 30: | same sample loop | |
239 | movclr.l %acc0, %d7 | %d7 = result | ||
240 | move.l %d7, (%a4)+ | *d++ = %d7 | ||
241 | 40: | loop start | | ||
185 | lsr.l #1, %d5 | make phase into frac | 242 | lsr.l #1, %d5 | make phase into frac |
243 | move.l %d0, %acc0 | %acc0 = s[pos-1] | ||
186 | mac.l %d1, %d5, %acc0 | %acc0 = diff * frac | 244 | mac.l %d1, %d5, %acc0 | %acc0 = diff * frac |
187 | lsl.l #1, %d5 | restore frac to phase | 245 | lsl.l #1, %d5 | restore frac to phase |
188 | movclr.l %acc0, %d7 | %d7 = product | ||
189 | add.l %d0, %d7 | %d7 = last + product | ||
190 | move.l %d7, (%a4)+ | *d++ = %d7 | ||
191 | add.l %d4, %d5 | phase += delta | 246 | add.l %d4, %d5 | phase += delta |
192 | bcc.b .usloop_0 | load next values? | 247 | bcc.b 30b | same sample loop | load next values? |
193 | cmp.l %a5, %a3 | src <= src_end? | 248 | cmp.l %a5, %a3 | src <= src_end? |
194 | ble.b .usloop_1 | yes? continue resampling | 249 | bls.b 20b | next sample loop | yes? continue resampling |
195 | .usloop_skip: | 250 | movclr.l %acc0, %d7 | %d7 = result |
251 | move.l %d7, (%a4)+ | *d++ = %d7 | ||
252 | 50: | skip resample loop | | ||
196 | subq.l #1, %d3 | ch > 0? | 253 | subq.l #1, %d3 | ch > 0? |
197 | bgt.b .uschannel_loop | yes? process next channel | 254 | bgt.b 10b | channel loop | yes? process next channel |
198 | swap %d5 | wrap phase to start of next frame | 255 | swap %d5 | wrap phase to start of next frame |
199 | move.l %d5, 12(%a0) | ...and save in data->resample_data.phase | 256 | move.l %d5, 12(%a0) | ...and save in data->resample_data.phase |
200 | move.l %a4, %d0 | return d - d[0] | 257 | move.l %a4, %d0 | return d - d[0] |
@@ -203,12 +260,7 @@ dsp_upsample: | |||
203 | asr.l #2, %d0 | convert bytes->samples | 260 | asr.l #2, %d0 | convert bytes->samples |
204 | lea.l 40(%sp), %sp | cleanup stack | 261 | lea.l 40(%sp), %sp | cleanup stack |
205 | rts | buh-bye | 262 | rts | buh-bye |
206 | .usend: | 263 | .size dsp_upsample,.-dsp_upsample |
207 | .size dsp_upsample,.usend-dsp_upsample | ||
208 | |||
209 | /* These routines might benefit from burst transfers but we'll keep them | ||
210 | * small for now since they're rather light weight | ||
211 | */ | ||
212 | 264 | ||
213 | /**************************************************************************** | 265 | /**************************************************************************** |
214 | * void channels_process_sound_chan_mono(int count, int32_t *buf[]) | 266 | * void channels_process_sound_chan_mono(int count, int32_t *buf[]) |
@@ -216,31 +268,39 @@ dsp_upsample: | |||
216 | * Mix left and right channels 50/50 into a center channel. | 268 | * Mix left and right channels 50/50 into a center channel. |
217 | */ | 269 | */ |
218 | .section .text | 270 | .section .text |
271 | .align 2 | ||
219 | .global channels_process_sound_chan_mono | 272 | .global channels_process_sound_chan_mono |
220 | channels_process_sound_chan_mono: | 273 | channels_process_sound_chan_mono: |
221 | movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf | 274 | movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf |
222 | lea.l -12(%sp), %sp | save registers | 275 | lea.l -20(%sp), %sp | save registers |
223 | move.l %macsr, %d1 | | 276 | movem.l %d2-%d4/%a2-%a3, (%sp) | |
224 | movem.l %d1-%d3, (%sp) | | ||
225 | move.l #0xb0, %macsr | put emac in rounding fractional mode | ||
226 | movem.l (%a0), %a0-%a1 | get channel pointers | 277 | movem.l (%a0), %a0-%a1 | get channel pointers |
278 | move.l %a0, %a2 | use separate dst pointers since read | ||
279 | move.l %a1, %a3 | pointers run one ahead of write | ||
227 | move.l #0x40000000, %d3 | %d3 = 0.5 | 280 | move.l #0x40000000, %d3 | %d3 = 0.5 |
228 | 1: | 281 | move.l (%a0)+, %d1 | prime the input registers |
229 | move.l (%a0), %d1 | L = R = l/2 + r/2 | 282 | move.l (%a1)+, %d2 | |
230 | mac.l %d1, %d3, (%a1), %d2, %acc0 | | 283 | mac.l %d1, %d3, (%a0)+, %d1, %acc0 | |
231 | mac.l %d2, %d3, %acc0 | | 284 | mac.l %d2, %d3, (%a1)+, %d2, %acc0 | |
232 | movclr.l %acc0, %d1 | | 285 | subq.l #1, %d0 | |
233 | move.l %d1, (%a0)+ | output to original buffer | 286 | ble.s 20f | loop done | |
234 | move.l %d1, (%a1)+ | | 287 | 10: | loop | |
235 | subq.l #1, %d0 | | 288 | movclr.l %acc0, %d4 | L = R = l/2 + r/2 |
236 | bgt.s 1b | | 289 | mac.l %d1, %d3, (%a0)+, %d1, %acc0 | |
237 | movem.l (%sp), %d1-%d3 | restore registers | 290 | mac.l %d2, %d3, (%a1)+, %d2, %acc0 | |
238 | move.l %d1, %macsr | | 291 | move.l %d4, (%a2)+ | output to original buffer |
239 | lea.l 12(%sp), %sp | cleanup | 292 | move.l %d4, (%a3)+ | |
240 | rts | 293 | subq.l #1, %d0 | |
241 | .cpmono_end: | 294 | bgt.s 10b | loop | |
242 | .size channels_process_sound_chan_mono, .cpmono_end-channels_process_sound_chan_mono | 295 | 20: | loop done | |
243 | 296 | movclr.l %acc0, %d4 | output last sample | |
297 | move.l %d4, (%a2) | | ||
298 | move.l %d4, (%a3) | | ||
299 | movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers | ||
300 | lea.l 20(%sp), %sp | cleanup | ||
301 | rts | | ||
302 | .size channels_process_sound_chan_mono, \ | ||
303 | .-channels_process_sound_chan_mono | ||
244 | 304 | ||
245 | /**************************************************************************** | 305 | /**************************************************************************** |
246 | * void channels_process_sound_chan_custom(int count, int32_t *buf[]) | 306 | * void channels_process_sound_chan_custom(int count, int32_t *buf[]) |
@@ -248,34 +308,47 @@ channels_process_sound_chan_mono: | |||
248 | * Apply stereo width (narrowing/expanding) effect. | 308 | * Apply stereo width (narrowing/expanding) effect. |
249 | */ | 309 | */ |
250 | .section .text | 310 | .section .text |
311 | .align 2 | ||
251 | .global channels_process_sound_chan_custom | 312 | .global channels_process_sound_chan_custom |
252 | channels_process_sound_chan_custom: | 313 | channels_process_sound_chan_custom: |
253 | movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf | 314 | movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf |
254 | lea.l -16(%sp), %sp | save registers | 315 | lea.l -28(%sp), %sp | save registers |
255 | move.l %macsr, %d1 | | 316 | movem.l %d2-%d6/%a2-%a3, (%sp) | |
256 | movem.l %d1-%d4, (%sp) | | ||
257 | move.l #0xb0, %macsr | put emac in rounding fractional mode | ||
258 | movem.l (%a0), %a0-%a1 | get channel pointers | 317 | movem.l (%a0), %a0-%a1 | get channel pointers |
318 | move.l %a0, %a2 | use separate dst pointers since read | ||
319 | move.l %a1, %a3 | pointers run one ahead of write | ||
259 | move.l dsp_sw_gain, %d3 | load straight (mid) gain | 320 | move.l dsp_sw_gain, %d3 | load straight (mid) gain |
260 | move.l dsp_sw_cross, %d4 | load cross (side) gain | 321 | move.l dsp_sw_cross, %d4 | load cross (side) gain |
261 | 1: | 322 | move.l (%a0)+, %d1 | prime the input registers |
262 | move.l (%a0), %d1 | | 323 | move.l (%a1)+, %d2 | |
263 | mac.l %d1, %d3, (%a1), %d2, %acc0 | L = l*gain + r*cross | 324 | mac.l %d1, %d3 , %acc0 | L = l*gain + r*cross |
264 | mac.l %d1, %d4 , %acc1 | R = r*gain + l*cross | 325 | mac.l %d1, %d4, (%a0)+, %d1, %acc1 | R = r*gain + l*cross |
265 | mac.l %d2, %d4 , %acc0 | | 326 | mac.l %d2, %d4 , %acc0 | |
266 | mac.l %d2, %d3 , %acc1 | | 327 | mac.l %d2, %d3, (%a1)+, %d2, %acc1 | |
267 | movclr.l %acc0, %d1 | | ||
268 | movclr.l %acc1, %d2 | | ||
269 | move.l %d1, (%a0)+ | | ||
270 | move.l %d2, (%a1)+ | | ||
271 | subq.l #1, %d0 | | 328 | subq.l #1, %d0 | |
272 | bgt.s 1b | | 329 | ble.b 20f | loop done | |
273 | movem.l (%sp), %d1-%d4 | restore registers | 330 | 10: | loop | |
274 | move.l %d1, %macsr | | 331 | movclr.l %acc0, %d5 | |
275 | lea.l 16(%sp), %sp | cleanup | 332 | movclr.l %acc1, %d6 | |
276 | rts | 333 | 15: | loop start | |
277 | .cpcustom_end: | 334 | mac.l %d1, %d3 , %acc0 | L = l*gain + r*cross |
278 | .size channels_process_sound_chan_custom, .cpcustom_end-channels_process_sound_chan_custom | 335 | mac.l %d1, %d4, (%a0)+, %d1, %acc1 | R = r*gain + l*cross |
336 | mac.l %d2, %d4 , %acc0 | | ||
337 | mac.l %d2, %d3, (%a1)+, %d2, %acc1 | | ||
338 | move.l %d5, (%a2)+ | | ||
339 | move.l %d6, (%a3)+ | | ||
340 | subq.l #1, %d0 | | ||
341 | bgt.s 10b | loop | | ||
342 | 20: | loop done | | ||
343 | movclr.l %acc0, %d5 | output last sample | ||
344 | movclr.l %acc1, %d6 | | ||
345 | move.l %d5, (%a2) | | ||
346 | move.l %d6, (%a3) | | ||
347 | movem.l (%sp), %d2-%d6/%a2-%a3 | restore registers | ||
348 | lea.l 28(%sp), %sp | cleanup | ||
349 | rts | | ||
350 | .size channels_process_sound_chan_custom, \ | ||
351 | .-channels_process_sound_chan_custom | ||
279 | 352 | ||
280 | /**************************************************************************** | 353 | /**************************************************************************** |
281 | * void channels_process_sound_chan_karaoke(int count, int32_t *buf[]) | 354 | * void channels_process_sound_chan_karaoke(int count, int32_t *buf[]) |
@@ -283,31 +356,42 @@ channels_process_sound_chan_custom: | |||
283 | * Separate channels into side channels. | 356 | * Separate channels into side channels. |
284 | */ | 357 | */ |
285 | .section .text | 358 | .section .text |
359 | .align 2 | ||
286 | .global channels_process_sound_chan_karaoke | 360 | .global channels_process_sound_chan_karaoke |
287 | channels_process_sound_chan_karaoke: | 361 | channels_process_sound_chan_karaoke: |
288 | movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf | 362 | movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf |
289 | lea.l -16(%sp), %sp | save registers | 363 | lea.l -20(%sp), %sp | save registers |
290 | move.l %macsr, %d1 | | 364 | movem.l %d2-%d4/%a2-%a3, (%sp) | |
291 | movem.l %d1-%d4, (%sp) | | 365 | movem.l (%a0), %a0-%a1 | get channel src pointers |
292 | move.l #0xb0, %macsr | put emac in rounding fractional mode | 366 | move.l %a0, %a2 | use separate dst pointers since read |
293 | movem.l (%a0), %a0-%a1 | get channel pointers | 367 | move.l %a1, %a3 | pointers run one ahead of write |
294 | move.l #0x40000000, %d4 | %d3 = 0.5 | 368 | move.l #0x40000000, %d3 | %d3 = 0.5 |
295 | 1: | 369 | move.l (%a0)+, %d1 | prime the input registers |
296 | move.l (%a0), %d1 | | 370 | move.l (%a1)+, %d2 | |
297 | msac.l %d1, %d4, (%a1), %d2, %acc0 | R = r/2 - l/2 | 371 | mac.l %d1, %d3, (%a0)+, %d1, %acc0 | L = l/2 - r/2 |
298 | mac.l %d2, %d4 , %acc0 | | 372 | msac.l %d2, %d3, (%a1)+, %d2, %acc0 | |
299 | movclr.l %acc0, %d1 | | 373 | subq.l #1, %d0 | |
300 | move.l %d1, (%a1)+ | | 374 | ble.b 20f | loop done | |
301 | neg.l %d1 | L = -R = -(r/2 - l/2) = l/2 - r/2 | 375 | 10: | loop | |
302 | move.l %d1, (%a0)+ | | 376 | movclr.l %acc0, %d4 | |
303 | subq.l #1, %d0 | | 377 | mac.l %d1, %d3, (%a0)+, %d1, %acc0 | L = l/2 - r/2 |
304 | bgt.s 1b | | 378 | msac.l %d2, %d3, (%a1)+, %d2, %acc0 | |
305 | movem.l (%sp), %d1-%d4 | restore registers | 379 | move.l %d4, (%a2)+ | |
306 | move.l %d1, %macsr | | 380 | neg.l %d4 | R = -L = -(l/2 - r/2) = r/2 - l/2 |
307 | lea.l 16(%sp), %sp | cleanup | 381 | move.l %d4, (%a3)+ | |
308 | rts | 382 | subq.l #1, %d0 | |
309 | .cpkaraoke_end: | 383 | bgt.s 10b | loop | |
310 | .size channels_process_sound_chan_karaoke, .cpkaraoke_end-channels_process_sound_chan_karaoke | 384 | 20: | loop done | |
385 | movclr.l %acc0, %d4 | output last sample | ||
386 | move.l %d4, (%a2) | | ||
387 | neg.l %d4 | R = -L = -(l/2 - r/2) = r/2 - l/2 | ||
388 | move.l %d4, (%a3) | | ||
389 | movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers | ||
390 | lea.l 20(%sp), %sp | cleanup | ||
391 | rts | | ||
392 | .size channels_process_sound_chan_karaoke, \ | ||
393 | .-channels_process_sound_chan_karaoke | ||
394 | |||
311 | /**************************************************************************** | 395 | /**************************************************************************** |
312 | * void sample_output_stereo(int count, struct dsp_data *data, | 396 | * void sample_output_stereo(int count, struct dsp_data *data, |
313 | * int32_t *src[], int16_t *dst) | 397 | * int32_t *src[], int16_t *dst) |
@@ -329,6 +413,7 @@ channels_process_sound_chan_karaoke: | |||
329 | * | 413 | * |
330 | */ | 414 | */ |
331 | .section .text | 415 | .section .text |
416 | .align 2 | ||
332 | .global sample_output_stereo | 417 | .global sample_output_stereo |
333 | sample_output_stereo: | 418 | sample_output_stereo: |
334 | lea.l -44(%sp), %sp | save registers | 419 | lea.l -44(%sp), %sp | save registers |
@@ -348,11 +433,11 @@ sample_output_stereo: | |||
348 | add.l %a4, %d0 | | 433 | add.l %a4, %d0 | |
349 | and.l #0xfffffff0, %d0 | | 434 | and.l #0xfffffff0, %d0 | |
350 | cmp.l %a0, %d0 | at least a full line? | 435 | cmp.l %a0, %d0 | at least a full line? |
351 | bhi.w .sos_longloop_1_start | no? jump to trailing longword | 436 | bhi.w 40f | long loop 1 start | no? do as trailing longwords |
352 | sub.l #16, %d0 | %d1 = first line bound | 437 | sub.l #16, %d0 | %d1 = first line bound |
353 | cmp.l %a4, %d0 | any leading longwords? | 438 | cmp.l %a4, %d0 | any leading longwords? |
354 | bls.b .sos_lineloop_start | no? jump to line loop | 439 | bls.b 20f | line loop start | no? start line loop |
355 | .sos_longloop_0: | 440 | 10: | long loop 0 | |
356 | move.l (%a2)+, %d1 | read longword from L and R | 441 | move.l (%a2)+, %d1 | read longword from L and R |
357 | mac.l %d1, %a1, (%a3)+, %d2, %acc0 | shift L to high word | 442 | mac.l %d1, %a1, (%a3)+, %d2, %acc0 | shift L to high word |
358 | mac.l %d2, %a1, %acc1 | shift R to high word | 443 | mac.l %d2, %a1, %acc1 | shift R to high word |
@@ -362,10 +447,10 @@ sample_output_stereo: | |||
362 | move.w %d2, %d1 | interleave MS 16 bits of each | 447 | move.w %d2, %d1 | interleave MS 16 bits of each |
363 | move.l %d1, (%a4)+ | ...and write both | 448 | move.l %d1, (%a4)+ | ...and write both |
364 | cmp.l %a4, %d0 | | 449 | cmp.l %a4, %d0 | |
365 | bhi.b .sos_longloop_0 | | 450 | bhi.b 10b | long loop 0 | |
366 | .sos_lineloop_start: | 451 | 20: | line loop start | |
367 | lea.l -12(%a0), %a5 | %a5 = at or just before last line bound | 452 | lea.l -12(%a0), %a5 | %a5 = at or just before last line bound |
368 | .sos_lineloop: | 453 | 30: | line loop | |
369 | move.l (%a3)+, %d4 | get next 4 R samples and scale | 454 | move.l (%a3)+, %d4 | get next 4 R samples and scale |
370 | mac.l %d4, %a1, (%a3)+, %d5, %acc0 | with saturation | 455 | mac.l %d4, %a1, (%a3)+, %d5, %acc0 | with saturation |
371 | mac.l %d5, %a1, (%a3)+, %d6, %acc1 | | 456 | mac.l %d5, %a1, (%a3)+, %d6, %acc1 | |
@@ -394,11 +479,11 @@ sample_output_stereo: | |||
394 | move.w %d7, %d3 | | 479 | move.w %d7, %d3 | |
395 | movem.l %d0-%d3, -16(%a4) | write four stereo samples | 480 | movem.l %d0-%d3, -16(%a4) | write four stereo samples |
396 | cmp.l %a4, %a5 | | 481 | cmp.l %a4, %a5 | |
397 | bhi.b .sos_lineloop | | 482 | bhi.b 30b | line loop | |
398 | .sos_longloop_1_start: | 483 | 40: | long loop 1 start | |
399 | cmp.l %a4, %a0 | any longwords left? | 484 | cmp.l %a4, %a0 | any longwords left? |
400 | bls.b .sos_done | no? finished. | 485 | bls.b 60f | output end | no? stop |
401 | .sos_longloop_1: | 486 | 50: | long loop 1 | |
402 | move.l (%a2)+, %d1 | handle trailing longwords | 487 | move.l (%a2)+, %d1 | handle trailing longwords |
403 | mac.l %d1, %a1, (%a3)+, %d2, %acc0 | the same way as leading ones | 488 | mac.l %d1, %a1, (%a3)+, %d2, %acc0 | the same way as leading ones |
404 | mac.l %d2, %a1, %acc1 | | 489 | mac.l %d2, %a1, %acc1 | |
@@ -408,14 +493,13 @@ sample_output_stereo: | |||
408 | move.w %d2, %d1 | | 493 | move.w %d2, %d1 | |
409 | move.l %d1, (%a4)+ | | 494 | move.l %d1, (%a4)+ | |
410 | cmp.l %a4, %a0 | | 495 | cmp.l %a4, %a0 | |
411 | bhi.b .sos_longloop_1 | | 496 | bhi.b 50b | long loop 1 |
412 | .sos_done: | 497 | 60: | output end | |
413 | movem.l (%sp), %d1-%d7/%a2-%a5 | restore registers | 498 | movem.l (%sp), %d1-%d7/%a2-%a5 | restore registers |
414 | move.l %d1, %macsr | | 499 | move.l %d1, %macsr | |
415 | lea.l 44(%sp), %sp | cleanup | 500 | lea.l 44(%sp), %sp | cleanup |
416 | rts | | 501 | rts | |
417 | .sos_end: | 502 | .size sample_output_stereo, .-sample_output_stereo |
418 | .size sample_output_stereo, .sos_end-sample_output_stereo | ||
419 | 503 | ||
420 | /**************************************************************************** | 504 | /**************************************************************************** |
421 | * void sample_output_mono(int count, struct dsp_data *data, | 505 | * void sample_output_mono(int count, struct dsp_data *data, |
@@ -424,6 +508,7 @@ sample_output_stereo: | |||
424 | * Same treatment as sample_output_stereo but for one channel. | 508 | * Same treatment as sample_output_stereo but for one channel. |
425 | */ | 509 | */ |
426 | .section .text | 510 | .section .text |
511 | .align 2 | ||
427 | .global sample_output_mono | 512 | .global sample_output_mono |
428 | sample_output_mono: | 513 | sample_output_mono: |
429 | lea.l -28(%sp), %sp | save registers | 514 | lea.l -28(%sp), %sp | save registers |
@@ -442,11 +527,11 @@ sample_output_mono: | |||
442 | add.l %a3, %d0 | | 527 | add.l %a3, %d0 | |
443 | and.l #0xfffffff0, %d0 | | 528 | and.l #0xfffffff0, %d0 | |
444 | cmp.l %a0, %d0 | at least a full line? | 529 | cmp.l %a0, %d0 | at least a full line? |
445 | bhi.w .som_longloop_1_start | no? jump to trailing longword | 530 | bhi.w 40f | long loop 1 start | no? do as trailing longwords |
446 | sub.l #16, %d0 | %d1 = first line bound | 531 | sub.l #16, %d0 | %d1 = first line bound |
447 | cmp.l %a3, %d0 | any leading longwords? | 532 | cmp.l %a3, %d0 | any leading longwords? |
448 | bls.b .som_lineloop_start | no? jump to line loop | 533 | bls.b 20f | line loop start | no? start line loop |
449 | .som_longloop_0: | 534 | 10: | long loop 0 | |
450 | move.l (%a2)+, %d1 | read longword from L and R | 535 | move.l (%a2)+, %d1 | read longword from L and R |
451 | mac.l %d1, %d5, %acc0 | shift L to high word | 536 | mac.l %d1, %d5, %acc0 | shift L to high word |
452 | movclr.l %acc0, %d1 | get possibly saturated results | 537 | movclr.l %acc0, %d1 | get possibly saturated results |
@@ -455,10 +540,10 @@ sample_output_mono: | |||
455 | move.w %d2, %d1 | duplicate single channel into | 540 | move.w %d2, %d1 | duplicate single channel into |
456 | move.l %d1, (%a3)+ | L and R | 541 | move.l %d1, (%a3)+ | L and R |
457 | cmp.l %a3, %d0 | | 542 | cmp.l %a3, %d0 | |
458 | bhi.b .som_longloop_0 | | 543 | bhi.b 10b | long loop 0 | |
459 | .som_lineloop_start: | 544 | 20: | line loop start | |
460 | lea.l -12(%a0), %a1 | %a1 = at or just before last line bound | 545 | lea.l -12(%a0), %a1 | %a1 = at or just before last line bound |
461 | .som_lineloop: | 546 | 30: | line loop | |
462 | move.l (%a2)+, %d0 | get next 4 L samples and scale | 547 | move.l (%a2)+, %d0 | get next 4 L samples and scale |
463 | mac.l %d0, %d5, (%a2)+, %d1, %acc0 | with saturation | 548 | mac.l %d0, %d5, (%a2)+, %d1, %acc0 | with saturation |
464 | mac.l %d1, %d5, (%a2)+, %d2, %acc1 | | 549 | mac.l %d1, %d5, (%a2)+, %d2, %acc1 | |
@@ -483,11 +568,11 @@ sample_output_mono: | |||
483 | move.w %d4, %d3 | | 568 | move.w %d4, %d3 | |
484 | movem.l %d0-%d3, -16(%a3) | write four stereo samples | 569 | movem.l %d0-%d3, -16(%a3) | write four stereo samples |
485 | cmp.l %a3, %a1 | | 570 | cmp.l %a3, %a1 | |
486 | bhi.b .som_lineloop | | 571 | bhi.b 30b | line loop | |
487 | .som_longloop_1_start: | 572 | 40: | long loop 1 start | |
488 | cmp.l %a3, %a0 | any longwords left? | 573 | cmp.l %a3, %a0 | any longwords left? |
489 | bls.b .som_done | no? finished. | 574 | bls.b 60f | output end | no? stop |
490 | .som_longloop_1: | 575 | 50: | loop loop 1 | |
491 | move.l (%a2)+, %d1 | handle trailing longwords | 576 | move.l (%a2)+, %d1 | handle trailing longwords |
492 | mac.l %d1, %d5, %acc0 | the same way as leading ones | 577 | mac.l %d1, %d5, %acc0 | the same way as leading ones |
493 | movclr.l %acc0, %d1 | | 578 | movclr.l %acc0, %d1 | |
@@ -496,11 +581,10 @@ sample_output_mono: | |||
496 | move.w %d2, %d1 | | 581 | move.w %d2, %d1 | |
497 | move.l %d1, (%a3)+ | | 582 | move.l %d1, (%a3)+ | |
498 | cmp.l %a3, %a0 | | 583 | cmp.l %a3, %a0 | |
499 | bhi.b .som_longloop_1 | | 584 | bhi.b 50b | long loop 1 | |
500 | .som_done: | 585 | 60: | output end | |
501 | movem.l (%sp), %d1-%d5/%a2-%a3 | restore registers | 586 | movem.l (%sp), %d1-%d5/%a2-%a3 | restore registers |
502 | move.l %d1, %macsr | | 587 | move.l %d1, %macsr | |
503 | lea.l 28(%sp), %sp | cleanup | 588 | lea.l 28(%sp), %sp | cleanup |
504 | rts | | 589 | rts | |
505 | .som_end: | 590 | .size sample_output_mono, .-sample_output_mono |
506 | .size sample_output_mono, .som_end-sample_output_mono | ||