diff options
-rw-r--r-- | apps/dsp_arm.S | 364 |
1 files changed, 218 insertions, 146 deletions
diff --git a/apps/dsp_arm.S b/apps/dsp_arm.S index f924569bc5..b4871d1506 100644 --- a/apps/dsp_arm.S +++ b/apps/dsp_arm.S | |||
@@ -33,24 +33,37 @@ | |||
33 | .type channels_process_sound_chan_mono, %function | 33 | .type channels_process_sound_chan_mono, %function |
34 | channels_process_sound_chan_mono: | 34 | channels_process_sound_chan_mono: |
35 | @ input: r0 = count, r1 = buf | 35 | @ input: r0 = count, r1 = buf |
36 | stmfd sp!, {r4-r5, lr} | 36 | stmfd sp!, { r4, lr } @ |
37 | ldmia r1, {r2-r3} @ r4 = buf[0], r5 = buf[1] | 37 | @ |
38 | 38 | ldmia r1, { r1, r2 } @ r1 = buf[0], r2 = buf[1] | |
39 | .monoloop: | 39 | subs r0, r0, #1 @ odd: end at 0; even: end at -1 |
40 | ldmia r2, {r4-r5} | 40 | beq .mono_singlesample @ Zero? Only one sample! |
41 | ldmia r3, {r12,lr} | 41 | @ |
42 | mov r4, r4, asr #1 @ r4 = r4/2 | 42 | .monoloop: @ |
43 | add r4, r4, r12, asr #1 @ r4 = r4 + r12/2 = (buf[0]+buf[1])/2 | 43 | ldmia r1, { r3, r4 } @ r3, r4 = Li0, Li1 |
44 | mov r5, r5, asr #1 @ r5 = r5/2 | 44 | ldmia r2, { r12, r14 } @ r12, r14 = Ri0, Ri1 |
45 | add r5, r5, lr, asr #1 @ r5 = r5 + lr/2 = (buf[0]+buf[1])/2 | 45 | mov r3, r3, asr #1 @ Mo0 = Li0 / 2 + Ri0 / 2 |
46 | stmia r2!, {r4-r5} | 46 | mov r4, r4, asr #1 @ Mo1 = Li1 / 2 + Ri1 / 2 |
47 | stmia r3!, {r4-r5} | 47 | add r12, r3, r12, asr #1 @ |
48 | subs r0, r0, #2 | 48 | add r14, r4, r14, asr #1 @ |
49 | bgt .monoloop | 49 | subs r0, r0, #2 @ |
50 | 50 | stmia r1!, { r12, r14 } @ store Mo0, Mo1 | |
51 | ldmfd sp!, {r4-r5, pc} | 51 | stmia r2!, { r12, r14 } @ store Mo0, Mo1 |
52 | .monoend: | 52 | bgt .monoloop @ |
53 | .size channels_process_sound_chan_mono,.monoend-channels_process_sound_chan_mono | 53 | @ |
54 | ldmltfd sp!, { r4, pc } @ if count was even, we're done | ||
55 | @ | ||
56 | .mono_singlesample: @ | ||
57 | ldr r3, [r1] @ r3 = Ls | ||
58 | ldr r12, [r2] @ r12 = Rs | ||
59 | mov r3, r3, asr #1 @ Mo = Ls / 2 + Rs / 2 | ||
60 | add r12, r3, r12, asr #1 @ | ||
61 | str r12, [r1] @ store Mo | ||
62 | str r12, [r2] @ store Mo | ||
63 | @ | ||
64 | ldmfd sp!, { r4, pc } @ | ||
65 | .size channels_process_sound_chan_mono, \ | ||
66 | .-channels_process_sound_chan_mono | ||
54 | 67 | ||
55 | /**************************************************************************** | 68 | /**************************************************************************** |
56 | * void channels_process_sound_chan_karaoke(int count, int32_t *buf[]) | 69 | * void channels_process_sound_chan_karaoke(int count, int32_t *buf[]) |
@@ -64,26 +77,40 @@ channels_process_sound_chan_mono: | |||
64 | .type channels_process_sound_chan_karaoke, %function | 77 | .type channels_process_sound_chan_karaoke, %function |
65 | channels_process_sound_chan_karaoke: | 78 | channels_process_sound_chan_karaoke: |
66 | @ input: r0 = count, r1 = buf | 79 | @ input: r0 = count, r1 = buf |
67 | stmfd sp!, {r4-r5, lr} | 80 | stmfd sp!, { r4, lr } @ |
68 | ldmia r1, {r2-r3} @ r4 = buf[0], r5 = buf[1] | 81 | @ |
69 | 82 | ldmia r1, { r1, r2 } @ r1 = buf[0], r2 = buf[1] | |
70 | .karaokeloop: | 83 | subs r0, r0, #1 @ odd: end at 0; even: end at -1 |
71 | ldmia r2, {r4-r5} | 84 | beq .karaoke_singlesample @ Zero? Only one sample! |
72 | ldmia r3, {r12,lr} | 85 | @ |
73 | mov r12, r12, asr #1 @ r12 = r12/2 | 86 | .karaokeloop: @ |
74 | rsb r4, r12, r4, asr #1 @ r4 = -r12 + r4/2 = (buf[0]-buf[1])/2 | 87 | ldmia r1, { r3, r4 } @ r3, r4 = Li0, Li1 |
75 | rsb r12, r4, #0 @ r12 = -r4 | 88 | ldmia r2, { r12, r14 } @ r12, r14 = Ri0, Ri1 |
76 | mov lr, lr, asr #1 @ lr = lr/2 | 89 | mov r3, r3, asr #1 @ Lo0 = Li0 / 2 - Ri0 / 2 |
77 | rsb r5, lr, r5, asr #1 @ r5 = -lr + r5/2 = (buf[0]-buf[1])/2 | 90 | mov r4, r4, asr #1 @ Lo1 = Li1 / 2 - Ri1 / 2 |
78 | rsb lr, r5, #0 @ lr = -r5 | 91 | sub r3, r3, r12, asr #1 @ |
79 | stmia r2!, {r4-r5} | 92 | sub r4, r4, r14, asr #1 @ |
80 | stmia r3!, {r12,lr} | 93 | rsb r12, r3, #0 @ Ro0 = -Lk0 = Rs0 / 2 - Ls0 / 2 |
81 | subs r0, r0, #2 | 94 | rsb r14, r4, #0 @ Ro1 = -Lk1 = Ri1 / 2 - Li1 / 2 |
82 | bgt .karaokeloop | 95 | subs r0, r0, #2 @ |
83 | 96 | stmia r1!, { r3, r4 } @ store Lo0, Lo1 | |
84 | ldmfd sp!, {r4-r5, pc} | 97 | stmia r2!, { r12, r14 } @ store Ro0, Ro1 |
85 | .karaokeend: | 98 | bgt .karaokeloop @ |
86 | .size channels_process_sound_chan_karaoke,.karaokeend-channels_process_sound_chan_karaoke | 99 | @ |
100 | ldmltfd sp!, { r4, pc } @ if count was even, we're done | ||
101 | @ | ||
102 | .karaoke_singlesample: @ | ||
103 | ldr r3, [r1] @ r3 = Li | ||
104 | ldr r12, [r2] @ r12 = Ri | ||
105 | mov r3, r3, asr #1 @ Lk = Li / 2 - Ri /2 | ||
106 | sub r3, r3, r12, asr #1 @ | ||
107 | rsb r12, r3, #0 @ Rk = -Lo = Ri / 2 - Li / 2 | ||
108 | str r3, [r1] @ store Lo | ||
109 | str r12, [r2] @ store Ro | ||
110 | @ | ||
111 | ldmfd sp!, { r4, pc } @ | ||
112 | .size channels_process_sound_chan_karaoke, \ | ||
113 | .-channels_process_sound_chan_karaoke | ||
87 | 114 | ||
88 | #if ARM_ARCH < 6 | 115 | #if ARM_ARCH < 6 |
89 | /**************************************************************************** | 116 | /**************************************************************************** |
@@ -99,42 +126,57 @@ channels_process_sound_chan_karaoke: | |||
99 | .type sample_output_mono, %function | 126 | .type sample_output_mono, %function |
100 | sample_output_mono: | 127 | sample_output_mono: |
101 | @ input: r0 = count, r1 = data, r2 = src, r3 = dst | 128 | @ input: r0 = count, r1 = data, r2 = src, r3 = dst |
102 | stmfd sp!, {r4-r7, lr} | 129 | stmfd sp!, { r4-r6, lr } |
103 | 130 | ||
104 | ldr r4, [r2] @ r4 = src[0] | 131 | ldr r1, [r1] @ lr = data->output_scale |
105 | ldr r5, [r1] @ lr = data->output_scale | 132 | ldr r2, [r2] @ r2 = src[0] |
106 | sub r1, r5, #1 @ r1 = r5-1 | 133 | |
107 | mov r2, #1 | 134 | mov r4, #1 |
108 | mov r2, r2, asl r1 @ r2 = 1<<r1 = 1 << (scale-1) | 135 | mov r4, r4, lsl r1 @ r4 = 1 << (scale-1) |
109 | mvn r1, #0x8000 @ r1 needed for clipping | 136 | mov r4, r4, lsr #1 |
110 | mov r12, #0xff00 | 137 | mvn r14, #0x8000 @ r14 = 0xffff7fff, needed for |
111 | orr r12, r12, #0xff @ r12 needed for masking | 138 | @ clipping and masking |
139 | subs r0, r0, #1 @ | ||
140 | beq .som_singlesample @ Zero? Only one sample! | ||
112 | 141 | ||
113 | .somloop: | 142 | .somloop: |
114 | ldmia r4!, {r6-r7} | 143 | ldmia r2!, { r5, r6 } |
115 | add r6, r6, r2 | 144 | add r5, r5, r4 @ r6 = (r6 + 1<<(scale-1)) >> scale |
116 | mov r6, r6, asr r5 @ r6 = (r6 + 1<<(scale-1)) >> scale | 145 | mov r5, r5, asr r1 |
117 | mov lr, r6, asr #15 | 146 | mov r12, r5, asr #15 |
118 | teq lr, lr, asr #31 | 147 | teq r12, r12, asr #31 |
119 | eorne r6, r1, lr, asr #31 @ Clip (-32768...+32767) | 148 | eorne r5, r14, r5, asr #31 @ Clip (-32768...+32767) |
120 | add r7, r7, r2 | 149 | add r6, r6, r4 |
121 | mov r7, r7, asr r5 @ r7 = (r7 + 1<<(scale-1)) >> scale | 150 | mov r6, r6, asr r1 @ r7 = (r7 + 1<<(scale-1)) >> scale |
122 | mov lr, r7, asr #15 | 151 | mov r12, r6, asr #15 |
123 | teq lr, lr, asr #31 | 152 | teq r12, r12, asr #31 |
124 | eorne r7, r1, lr, asr #31 @ Clip (-32768...+32767) | 153 | eorne r6, r14, r6, asr #31 @ Clip (-32768...+32767) |
125 | 154 | ||
126 | and r6, r6, r12 | 155 | and r5, r5, r14, lsr #16 |
127 | orr r6, r6, r6, asl #16 @ pack first 2 halfwords into 1 word | 156 | and r6, r6, r14, lsr #16 |
128 | and r7, r7, r12 | 157 | orr r5, r5, r5, lsl #16 @ pack first 2 halfwords into 1 word |
129 | orr r7, r7, r7, asl #16 @ pack last 2 halfwords into 1 word | 158 | orr r6, r6, r6, lsl #16 @ pack last 2 halfwords into 1 word |
130 | stmia r3!, {r6-r7} | 159 | stmia r3!, { r5, r6 } |
131 | 160 | ||
132 | subs r0, r0, #2 | 161 | subs r0, r0, #2 |
133 | bgt .somloop | 162 | bgt .somloop |
134 | 163 | ||
135 | ldmfd sp!, {r4-r7, pc} | 164 | ldmltfd sp!, { r4-r6, pc } @ even 'count'? return |
136 | .somend: | 165 | |
137 | .size sample_output_mono,.somend-sample_output_mono | 166 | .som_singlesample: |
167 | ldr r5, [r2] @ do odd sample | ||
168 | add r5, r5, r4 | ||
169 | mov r5, r5, asr r1 | ||
170 | mov r12, r5, asr #15 | ||
171 | teq r12, r12, asr #31 | ||
172 | eorne r5, r14, r5, asr #31 | ||
173 | |||
174 | and r5, r5, r14, lsr #16 @ pack 2 halfwords into 1 word | ||
175 | orr r5, r5, r5, lsl #16 | ||
176 | str r5, [r3] | ||
177 | |||
178 | ldmfd sp!, { r4-r6, pc } | ||
179 | .size sample_output_mono, .-sample_output_mono | ||
138 | 180 | ||
139 | /**************************************************************************** | 181 | /**************************************************************************** |
140 | * void sample_output_stereo(int count, struct dsp_data *data, | 182 | * void sample_output_stereo(int count, struct dsp_data *data, |
@@ -149,54 +191,80 @@ sample_output_mono: | |||
149 | .type sample_output_stereo, %function | 191 | .type sample_output_stereo, %function |
150 | sample_output_stereo: | 192 | sample_output_stereo: |
151 | @ input: r0 = count, r1 = data, r2 = src, r3 = dst | 193 | @ input: r0 = count, r1 = data, r2 = src, r3 = dst |
152 | stmfd sp!, {r4-r10, lr} | 194 | stmfd sp!, { r4-r9, lr } |
153 | 195 | ||
154 | ldmia r2, {r4-r5} @ r4 = src[0], r5 = src[1] | 196 | ldr r1, [r1] @ r1 = data->output_scale |
155 | ldr r6, [r1] @ r6 = data->output_scale | 197 | ldmia r2, { r2, r5 } @ r2 = src[0], r5 = src[1] |
156 | sub r1, r6, #1 @ r1 = r6-1 | 198 | |
157 | mov r2, #1 | 199 | mov r4, #1 |
158 | mov r2, r2, asl r1 @ r2 = 1<<r1 = 1 << (scale-1) | 200 | mov r4, r4, lsl r1 @ r4 = 1 << (scale-1) |
159 | mvn r1, #0x8000 @ r1 needed for clipping | 201 | mov r4, r4, lsr #1 @ |
160 | mov r12, #0xff00 | 202 | |
161 | orr r12, r12, #0xff @ r12 needed for masking | 203 | mvn r14, #0x8000 @ r14 = 0xffff7fff, needed for |
204 | @ clipping and masking | ||
205 | subs r0, r0, #1 @ | ||
206 | beq .sos_singlesample @ Zero? Only one sample! | ||
162 | 207 | ||
163 | .sosloop: | 208 | .sosloop: |
164 | ldmia r4!, {r7-r8} | 209 | ldmia r2!, { r6, r7 } @ 2 left |
165 | add r7, r7, r2 | 210 | ldmia r5!, { r8, r9 } @ 2 right |
166 | mov r7, r7, asr r6 @ r7 = (r7 + 1<<(scale-1)) >> scale | 211 | |
167 | mov lr, r7, asr #15 | 212 | add r6, r6, r4 @ r6 = (r6 + 1<<(scale-1)) >> scale |
168 | teq lr, lr, asr #31 | 213 | mov r6, r6, asr r1 |
169 | eorne r7, r1, lr, asr #31 @ Clip (-32768...+32767) | 214 | mov r12, r6, asr #15 |
170 | add r8, r8, r2 | 215 | teq r12, r12, asr #31 |
171 | mov r8, r8, asr r6 @ r8 = (r8 + 1<<(scale-1)) >> scale | 216 | eorne r6, r14, r6, asr #31 @ Clip (-32768...+32767) |
172 | mov lr, r8, asr #15 | 217 | add r7, r7, r4 |
173 | teq lr, lr, asr #31 | 218 | mov r7, r7, asr r1 @ r7 = (r7 + 1<<(scale-1)) >> scale |
174 | eorne r8, r1, lr, asr #31 @ Clip (-32768...+32767) | 219 | mov r12, r7, asr #15 |
220 | teq r12, r12, asr #31 | ||
221 | eorne r7, r14, r7, asr #31 @ Clip (-32768...+32767) | ||
175 | 222 | ||
176 | ldmia r5!, {r9-r10} | 223 | add r8, r8, r4 @ r8 = (r8 + 1<<(scale-1)) >> scale |
177 | add r9, r9, r2 | 224 | mov r8, r8, asr r1 |
178 | mov r9, r9, asr r6 @ r9 = (r9 + 1<<(scale-1)) >> scale | 225 | mov r12, r8, asr #15 |
179 | mov lr, r9, asr #15 | 226 | teq r12, r12, asr #31 |
180 | teq lr, lr, asr #31 | 227 | eorne r8, r14, r8, asr #31 @ Clip (-32768...+32767) |
181 | eorne r9, r1, lr, asr #31 @ Clip (-32768...+32767) | 228 | add r9, r9, r4 @ r9 = (r9 + 1<<(scale-1)) >> scale |
182 | add r10, r10, r2 | 229 | mov r9, r9, asr r1 |
183 | mov r10, r10, asr r6 @ r10 = (r10 + 1<<(scale-1)) >> scale | 230 | mov r12, r9, asr #15 |
184 | mov lr, r10, asr #15 | 231 | teq r12, r12, asr #31 |
185 | teq lr, lr, asr #31 | 232 | eorne r9, r14, r9, asr #31 @ Clip (-32768...+32767) |
186 | eorne r10, r1, lr, asr #31 @ Clip (-32768...+32767) | ||
187 | 233 | ||
188 | and r7, r7, r12 | 234 | and r6, r6, r14, lsr #16 @ pack first 2 halfwords into 1 word |
189 | orr r9, r7, r9, asl #16 @ pack first 2 halfwords into 1 word | 235 | orr r8, r6, r8, asl #16 |
190 | and r8, r8, r12 | 236 | and r7, r7, r14, lsr #16 @ pack last 2 halfwords into 1 word |
191 | orr r10, r8, r10, asl #16 @ pack last 2 halfwords into 1 word | 237 | orr r9, r7, r9, asl #16 |
192 | stmia r3!, {r9-r10} | 238 | |
239 | stmia r3!, { r8, r9 } | ||
193 | 240 | ||
194 | subs r0, r0, #2 | 241 | subs r0, r0, #2 |
195 | bgt .sosloop | 242 | bgt .sosloop |
196 | 243 | ||
197 | ldmfd sp!, {r4-r10, pc} | 244 | ldmltfd sp!, { r4-r9, pc } @ even 'count'? return |
198 | .sosend: | 245 | |
199 | .size sample_output_stereo,.sosend-sample_output_stereo | 246 | .sos_singlesample: |
247 | ldr r6, [r2] @ left odd sample | ||
248 | ldr r8, [r5] @ right odd sample | ||
249 | |||
250 | add r6, r6, r4 @ r6 = (r7 + 1<<(scale-1)) >> scale | ||
251 | mov r6, r6, asr r1 | ||
252 | mov r12, r6, asr #15 | ||
253 | teq r12, r12, asr #31 | ||
254 | eorne r6, r14, r6, asr #31 @ Clip (-32768...+32767) | ||
255 | add r8, r8, r4 @ r8 = (r8 + 1<<(scale-1)) >> scale | ||
256 | mov r8, r8, asr r1 | ||
257 | mov r12, r8, asr #15 | ||
258 | teq r12, r12, asr #31 | ||
259 | eorne r8, r14, r8, asr #31 @ Clip (-32768...+32767) | ||
260 | |||
261 | and r6, r6, r14, lsr #16 @ pack 2 halfwords into 1 word | ||
262 | orr r8, r6, r8, asl #16 | ||
263 | |||
264 | str r8, [r3] | ||
265 | |||
266 | ldmfd sp!, { r4-r9, pc } | ||
267 | .size sample_output_stereo, .-sample_output_stereo | ||
200 | #endif /* ARM_ARCH < 6 */ | 268 | #endif /* ARM_ARCH < 6 */ |
201 | 269 | ||
202 | /**************************************************************************** | 270 | /**************************************************************************** |
@@ -259,8 +327,7 @@ apply_crossfeed: | |||
259 | str r0, [r12, #30*4] @ save delay line index | 327 | str r0, [r12, #30*4] @ save delay line index |
260 | add sp, sp, #8 @ remove temp variables from stack | 328 | add sp, sp, #8 @ remove temp variables from stack |
261 | ldmia sp!, { r4-r11, pc } | 329 | ldmia sp!, { r4-r11, pc } |
262 | .cfend: | 330 | .size apply_crossfeed, .-apply_crossfeed |
263 | .size apply_crossfeed,.cfend-apply_crossfeed | ||
264 | 331 | ||
265 | /**************************************************************************** | 332 | /**************************************************************************** |
266 | * int dsp_downsample(int count, struct dsp_data *data, | 333 | * int dsp_downsample(int count, struct dsp_data *data, |
@@ -317,8 +384,7 @@ dsp_downsample: | |||
317 | sub r8, r8, r1 @ dst - &dst[0] | 384 | sub r8, r8, r1 @ dst - &dst[0] |
318 | mov r0, r8, lsr #2 @ convert bytes->samples | 385 | mov r0, r8, lsr #2 @ convert bytes->samples |
319 | ldmia sp!, { r4-r11, pc } @ ... and we're out | 386 | ldmia sp!, { r4-r11, pc } @ ... and we're out |
320 | .dsend: | 387 | .size dsp_downsample, .-dsp_downsample |
321 | .size dsp_downsample,.dsend-dsp_downsample | ||
322 | 388 | ||
323 | /**************************************************************************** | 389 | /**************************************************************************** |
324 | * int dsp_upsample(int count, struct dsp_data *dsp, | 390 | * int dsp_upsample(int count, struct dsp_data *dsp, |
@@ -327,23 +393,22 @@ dsp_downsample: | |||
327 | .section .text | 393 | .section .text |
328 | .global dsp_upsample | 394 | .global dsp_upsample |
329 | dsp_upsample: | 395 | dsp_upsample: |
330 | stmdb sp!, { r4-r11, lr } @ stack modified regs | 396 | stmfd sp!, { r4-r11, lr } @ stack modified regs |
331 | ldmib r1, { r5-r6 } @ r5 = num_channels,r6 = resample_data.delta | 397 | ldmib r1, { r5-r6 } @ r5 = num_channels,r6 = resample_data.delta |
332 | sub r5, r5, #1 @ pre-decrement num_channels for use | 398 | sub r5, r5, #1 @ pre-decrement num_channels for use |
333 | add r4, r1, #12 @ r4 = &resample_data.phase | 399 | add r4, r1, #12 @ r4 = &resample_data.phase |
334 | stmdb sp!, { r0, r4 } @ stack count and &resample_data.phase | 400 | mov r6, r6, lsl #16 @ we'll use carry to detect pos increments |
401 | stmfd sp!, { r0, r4 } @ stack count and &resample_data.phase | ||
335 | .uschannel_loop: | 402 | .uschannel_loop: |
336 | ldr r12, [r4] @ r12 = resample_data.phase | 403 | ldr r12, [r4] @ r12 = resample_data.phase |
337 | mov r1, r12, ror #16 @ swap halfword positions, we'll use carry | ||
338 | @ to detect pos increments | ||
339 | ldr r7, [r2, r5, lsl #2] @ r7 = s = src[ch - 1] | 404 | ldr r7, [r2, r5, lsl #2] @ r7 = s = src[ch - 1] |
340 | ldr r8, [r3, r5, lsl #2] @ r8 = d = dst[ch - 1] | 405 | ldr r8, [r3, r5, lsl #2] @ r8 = d = dst[ch - 1] |
341 | add r9, r4, #4 @ r9 = &last_sample[0] | 406 | add r9, r4, #4 @ r9 = &last_sample[0] |
342 | ldr r10, [r9, r5, lsl #2] @ r10 = last_sample[ch - 1] | 407 | mov r1, r12, lsl #16 @ we'll use carry to detect pos increments |
343 | sub r11, r0, #1 | 408 | sub r11, r0, #1 |
344 | ldr r14, [r7, r11, lsl #2] @ load last sample in s[] ... | 409 | ldr r14, [r7, r11, lsl #2] @ load last sample in s[] ... |
410 | ldr r10, [r9, r5, lsl #2] @ r10 = last_sample[ch - 1] | ||
345 | str r14, [r9, r5, lsl #2] @ and write as next frame's last_sample | 411 | str r14, [r9, r5, lsl #2] @ and write as next frame's last_sample |
346 | add r9, r7, r0, lsl #2 @ r9 = src_end = &src[count] | ||
347 | movs r14, r12, lsr #16 @ pos = resample_data.phase >> 16 | 412 | movs r14, r12, lsr #16 @ pos = resample_data.phase >> 16 |
348 | beq .usstart_0 @ pos = 0 | 413 | beq .usstart_0 @ pos = 0 |
349 | cmp r14, r0 @ if pos >= count, we're already done | 414 | cmp r14, r0 @ if pos >= count, we're already done |
@@ -354,41 +419,38 @@ dsp_upsample: | |||
354 | 419 | ||
355 | @ Register usage in loop: | 420 | @ Register usage in loop: |
356 | @ r0 = count, r1 = phase, r4 = &resample_data.phase, r5 = cur_channel, | 421 | @ r0 = count, r1 = phase, r4 = &resample_data.phase, r5 = cur_channel, |
357 | @ r6 = delta, r7 = s, r8 = d, r9 = src_end, r10 = s[pos - 1], r11 = s[pos] | 422 | @ r6 = delta, r7 = s, r8 = d, r9 = diff, r10 = s[pos - 1], r11 = s[pos] |
358 | .usloop_1: | 423 | .usloop_1: |
359 | mov r10, r11 @ r10 = previous sample | 424 | mov r10, r11 @ r10 = previous sample |
360 | .usstart_0: | 425 | .usstart_0: |
361 | ldr r11, [r7], #4 @ r11 = next sample | 426 | ldr r11, [r7], #4 @ r11 = next sample |
362 | sub r0, r11, r10 @ r0 = s[pos] - s[pos - 1] | 427 | mov r4, r1, lsr #16 @ r4 = frac = phase >> 16 |
428 | sub r9, r11, r10 @ r9 = diff = s[pos] - s[pos - 1] | ||
363 | .usloop_0: | 429 | .usloop_0: |
430 | smull r12, r14, r4, r9 | ||
431 | adds r1, r1, r6 @ phase += delta << 16 | ||
364 | mov r4, r1, lsr #16 @ r4 = frac = phase >> 16 | 432 | mov r4, r1, lsr #16 @ r4 = frac = phase >> 16 |
365 | smull r12, r14, r4, r0 | ||
366 | add r14, r10, r14, lsl #16 | 433 | add r14, r10, r14, lsl #16 |
367 | add r14, r14, r12, lsr #16 @ r14 = out = s[pos - 1] + frac*diff | 434 | add r14, r14, r12, lsr #16 @ r14 = out = s[pos - 1] + frac*diff |
368 | str r14, [r8], #4 @ *d++ = out | 435 | str r14, [r8], #4 @ *d++ = out |
369 | adds r1, r1, r6, lsl #16 @ phase += delta << 16 | ||
370 | bcc .usloop_0 @ if carry is set, pos is incremented | 436 | bcc .usloop_0 @ if carry is set, pos is incremented |
371 | cmp r7, r9 @ if s < src_end, do another sample | 437 | subs r0, r0, #1 @ if count > 0, do another sample |
372 | blo .usloop_1 | 438 | bgt .usloop_1 |
373 | .usloop_skip: | 439 | .usloop_skip: |
374 | subs r5, r5, #1 | 440 | subs r5, r5, #1 |
375 | ldmia sp, { r0, r4 } @ reload count and &resample_data.phase | 441 | ldmfd sp, { r0, r4 } @ reload count and &resample_data.phase |
376 | bpl .uschannel_loop @ if (--ch) >= 0, do another channel | 442 | bpl .uschannel_loop @ if (--ch) >= 0, do another channel |
377 | mov r1, r1, ror #16 @ wrap phase back to start of next frame | 443 | mov r1, r1, lsr #16 @ wrap phase back to start of next frame |
378 | str r1, [r4] @ store back | 444 | ldr r2, [r3] @ r1 = &dst[0] |
379 | ldr r1, [r3] @ r1 = &dst[0] | 445 | str r1, [r4] @ store phase |
380 | sub r8, r8, r1 @ dst - &dst[0] | 446 | sub r8, r8, r2 @ dst - &dst[0] |
381 | mov r0, r8, lsr #2 @ convert bytes->samples | 447 | mov r0, r8, lsr #2 @ convert bytes->samples |
382 | add sp, sp, #8 @ adjust stack for temp variables | 448 | add sp, sp, #8 @ adjust stack for temp variables |
383 | ldmia sp!, { r4-r11, pc } @ ... and we're out | 449 | ldmfd sp!, { r4-r11, pc } @ ... and we're out |
384 | .usend: | 450 | .size dsp_upsample, .-dsp_upsample |
385 | .size dsp_upsample,.usend-dsp_upsample | ||
386 | 451 | ||
387 | /**************************************************************************** | 452 | /**************************************************************************** |
388 | * void dsp_apply_gain(int count, struct dsp_data *data, int32_t *buf[]) | 453 | * void dsp_apply_gain(int count, struct dsp_data *data, int32_t *buf[]) |
389 | * NOTE: The following code processes two samples at once. When count is odd, | ||
390 | * there is an additional obsolete sample processed, which will not be | ||
391 | * used by the calling functions. | ||
392 | */ | 454 | */ |
393 | .section .icode, "ax", %progbits | 455 | .section .icode, "ax", %progbits |
394 | .align 2 | 456 | .align 2 |
@@ -396,30 +458,40 @@ dsp_upsample: | |||
396 | .type dsp_apply_gain, %function | 458 | .type dsp_apply_gain, %function |
397 | dsp_apply_gain: | 459 | dsp_apply_gain: |
398 | @ input: r0 = count, r1 = data, r2 = buf[] | 460 | @ input: r0 = count, r1 = data, r2 = buf[] |
399 | stmfd sp!, {r4-r7, lr} | 461 | stmfd sp!, { r4-r8, lr } |
400 | 462 | ||
401 | ldr r3, [r1, #4] @ r3 = data->num_channels | 463 | ldr r3, [r1, #4] @ r3 = data->num_channels |
402 | ldr r4, [r1, #32] @ r5 = data->gain | 464 | ldr r4, [r1, #32] @ r5 = data->gain |
403 | 465 | ||
404 | .dag_outerloop: | 466 | .dag_outerloop: |
405 | ldr r1, [r2], #4 @ r1 = buf[0] and increment index of buf[] | 467 | ldr r1, [r2], #4 @ r1 = buf[0] and increment index of buf[] |
406 | mov r12, r0 @ r12 = r0 = count | 468 | subs r12, r0, #1 @ r12 = r0 = count - 1 |
469 | beq .dag_singlesample @ Zero? Only one sample! | ||
407 | 470 | ||
408 | .dag_innerloop: | 471 | .dag_innerloop: |
409 | ldmia r1, {r5, r6} @ load r5, r6 from r1 | 472 | ldmia r1, { r5, r6 } @ load r5, r6 from r1 |
410 | smull r7, lr, r5, r4 @ r5 = FRACMUL_SHL(r5, r4, 8) | 473 | smull r7, r8, r5, r4 @ r7 = FRACMUL_SHL(r5, r4, 8) |
411 | mov lr, lr, asl #9 | 474 | smull r14, r5, r6, r4 @ r14 = FRACMUL_SHL(r6, r4, 8) |
412 | orr r5, lr, r7, lsr #23 | ||
413 | smull r7, lr, r6, r4 @ r6 = FRACMUL_SHL(r6, r4, 8) | ||
414 | mov lr, lr, asl #9 | ||
415 | orr r6, lr, r7, lsr #23 | ||
416 | stmia r1!, {r5, r6} @ save r5, r6 to r1 and increment r1 | ||
417 | subs r12, r12, #2 | 475 | subs r12, r12, #2 |
476 | mov r7, r7, lsr #23 | ||
477 | mov r14, r14, lsr #23 | ||
478 | orr r7, r7, r8, asl #9 | ||
479 | orr r14, r14, r5, asl #9 | ||
480 | stmia r1!, { r7, r14 } @ save r7, r14 to [r1] and increment r1 | ||
418 | bgt .dag_innerloop @ end of inner loop | 481 | bgt .dag_innerloop @ end of inner loop |
419 | 482 | ||
483 | blt .dag_evencount @ < 0? even count | ||
484 | |||
485 | .dag_singlesample: | ||
486 | ldr r5, [r1] @ handle odd sample | ||
487 | smull r7, r8, r5, r4 @ r7 = FRACMUL_SHL(r5, r4, 8) | ||
488 | mov r7, r7, lsr #23 | ||
489 | orr r7, r7, r8, asl #9 | ||
490 | str r7, [r1] | ||
491 | |||
492 | .dag_evencount: | ||
420 | subs r3, r3, #1 | 493 | subs r3, r3, #1 |
421 | bgt .dag_outerloop @ end of outer loop | 494 | bgt .dag_outerloop @ end of outer loop |
422 | 495 | ||
423 | ldmfd sp!, {r4-r7, pc} | 496 | ldmfd sp!, { r4-r8, pc } |
424 | .dagend: | 497 | .size dsp_apply_gain, .-dsp_apply_gain |
425 | .size dsp_apply_gain,.dagend-dsp_apply_gain | ||