diff options
Diffstat (limited to 'lib/rbcodec/dsp/dsp_arm.S')
-rw-r--r-- | lib/rbcodec/dsp/dsp_arm.S | 84 |
1 files changed, 42 insertions, 42 deletions
diff --git a/lib/rbcodec/dsp/dsp_arm.S b/lib/rbcodec/dsp/dsp_arm.S index ed58bed340..16394b8690 100644 --- a/lib/rbcodec/dsp/dsp_arm.S +++ b/lib/rbcodec/dsp/dsp_arm.S | |||
@@ -196,55 +196,56 @@ crossfeed_process: | |||
196 | @ to keep the count on the stack :/ | 196 | @ to keep the count on the stack :/ |
197 | ldr r1, [r1] @ r1 = buf = *buf_p; | 197 | ldr r1, [r1] @ r1 = buf = *buf_p; |
198 | stmfd sp!, { r4-r11, lr } @ stack modified regs | 198 | stmfd sp!, { r4-r11, lr } @ stack modified regs |
199 | ldr r12, [r1] @ r12 = buf->remcount | 199 | ldr r0, [r0] @ r0 = this->data = &crossfeed_state |
200 | ldr r14, [r0] @ r14 = this->data = &crossfeed_state | 200 | ldmia r1, { r1-r3 } @ r1 = buf->remcount, r2 = buf->p32[0], |
201 | ldmib r1, { r2-r3 } @ r2 = buf->p32[0], r3 = buf->p32[1] | 201 | @ r3 = buf->p32[1] |
202 | ldmia r14!, { r4-r11 } @ load direct gain and filter data | 202 | ldmia r0, { r4-r12, r14 } @ r4 = gain, r5-r7 = coeffs, |
203 | add r0, r14, #13*2*4 @ calculate end of delay | 203 | @ r8-r11 = history, r12 = index, |
204 | stmfd sp!, { r0, r12 } @ stack end of delay adr, count and state | 204 | @ r14 = index_max |
205 | ldr r0, [r0] @ fetch current delay line address | 205 | add r0, r0, #0x28 @ r0 = state->delay |
206 | 206 | stmfd sp!, { r0-r1, r14 } @ stack state->delay, count, index_max | |
207 | /* Register usage in loop: | 207 | |
208 | * r0 = &delay[index][0], r1 = accumulator high, r2 = buf->p32[0], | 208 | /* Register usage in loop: |
209 | * r0 = acc low/count, r1 = acc high, r2 = buf->p32[0], | ||
209 | * r3 = buf->p32[1], r4 = direct gain, r5-r7 = b0, b1, a1 (filter coefs), | 210 | * r3 = buf->p32[1], r4 = direct gain, r5-r7 = b0, b1, a1 (filter coefs), |
210 | * r8-r11 = filter history, r12 = temp, r14 = accumulator low | 211 | * r8 = dr[n-1], r9 = y_r[n-1], r10 = dl[n-1], r11 = y_l[n-1], |
212 | * r12 = index, r14 = scratch/index_max | ||
211 | */ | 213 | */ |
212 | .cfloop: | 214 | .cfloop: |
213 | smull r14, r1, r6, r8 @ acc = b1*dr[n - 1] | 215 | smull r0, r1, r6, r8 @ acc = b1*dr[n - 1] |
214 | smlal r14, r1, r7, r9 @ acc += a1*y_l[n - 1] | 216 | ldr r8, [r12, #4] @ r8 = dr[n] |
215 | ldr r8, [r0, #4] @ r8 = dr[n] | 217 | smlal r0, r1, r7, r9 @ acc += a1*y_r[n - 1] |
216 | smlal r14, r1, r5, r8 @ acc += b0*dr[n] | 218 | smlal r0, r1, r5, r8 @ acc += b0*dr[n] |
217 | mov r9, r1, lsl #1 @ fix format for filter history | 219 | ldr r14, [r2] @ load left input: x_l[n] |
218 | ldr r12, [r2] @ load left input | 220 | mov r9, r1, asl #1 @ fix format for filter history |
219 | smlal r14, r1, r4, r12 @ acc += gain*x_l[n] | 221 | smlal r0, r1, r4, r14 @ acc += gain*x_l[n] |
220 | mov r1, r1, lsl #1 @ fix format | 222 | mov r1, r1, asl #1 @ fix format |
221 | str r1, [r2], #4 @ save result | 223 | str r1, [r2], #4 @ save result |
222 | 224 | smull r0, r1, r6, r10 @ acc = b1*dl[n - 1] | |
223 | smull r14, r1, r6, r10 @ acc = b1*dl[n - 1] | 225 | ldr r10, [r12] @ r10 = dl[n] |
224 | smlal r14, r1, r7, r11 @ acc += a1*y_r[n - 1] | 226 | smlal r0, r1, r7, r11 @ acc += a1*y_l[n - 1] |
225 | ldr r10, [r0] @ r10 = dl[n] | 227 | smlal r0, r1, r5, r10 @ acc += b0*dl[n] |
226 | str r12, [r0], #4 @ save left input to delay line | 228 | str r14, [r12], #4 @ save left input to delay line |
227 | smlal r14, r1, r5, r10 @ acc += b0*dl[n] | 229 | ldr r14, [r3] @ load right input: x_r[n] |
228 | mov r11, r1, lsl #1 @ fix format for filter history | 230 | mov r11, r1, asl #1 @ fix format for filter history |
229 | ldr r12, [r3] @ load right input | 231 | smlal r0, r1, r4, r14 @ acc += gain*x_r[n] |
230 | smlal r14, r1, r4, r12 @ acc += gain*x_r[n] | 232 | str r14, [r12], #4 @ save right input to delay line |
231 | str r12, [r0], #4 @ save right input to delay line | 233 | ldmib sp, { r0, r14 } @ fetch count and delay end |
232 | mov r1, r1, lsl #1 @ fix format | 234 | mov r1, r1, asl #1 @ fix format |
233 | ldmia sp, { r12, r14 } @ fetch delay line end addr and count from stack | ||
234 | str r1, [r3], #4 @ save result | 235 | str r1, [r3], #4 @ save result |
235 | 236 | ||
236 | cmp r0, r12 @ need to wrap to start of delay? | 237 | cmp r12, r14 @ need to wrap to start of delay? |
237 | subhs r0, r12, #13*2*4 @ wrap back delay line ptr to start | 238 | ldrhs r12, [sp] @ wrap delay index |
238 | 239 | ||
239 | subs r14, r14, #1 @ are we finished? | 240 | subs r0, r0, #1 @ are we finished? |
240 | strgt r14, [sp, #4] @ nope, save count back to stack | 241 | strgt r0, [sp, #4] @ save count to stack |
241 | bgt .cfloop | 242 | bgt .cfloop |
242 | 243 | ||
243 | @ save data back to struct | 244 | @ save data back to struct |
244 | str r0, [r12] @ save delay line index | 245 | ldr r0, [sp] @ fetch state->delay |
245 | sub r12, r12, #13*2*4 + 4*4 @ r12 = data->history | 246 | sub r0, r0, #0x18 @ save filter history and delay index |
246 | stmia r12, { r8-r11 } @ save filter history | 247 | stmia r0, { r8-r12 } @ |
247 | add sp, sp, #8 @ remove temp variables from stack | 248 | add sp, sp, #12 @ remove temp variables from stack |
248 | ldmpc regs=r4-r11 | 249 | ldmpc regs=r4-r11 |
249 | .size crossfeed_process, .-crossfeed_process | 250 | .size crossfeed_process, .-crossfeed_process |
250 | 251 | ||
@@ -260,8 +261,7 @@ crossfeed_meier_process: | |||
260 | ldr r0, [r0] @ r0 = this->data = &crossfeed_state | 261 | ldr r0, [r0] @ r0 = this->data = &crossfeed_state |
261 | stmfd sp!, { r4-r10, lr } @ stack non-volatile context | 262 | stmfd sp!, { r4-r10, lr } @ stack non-volatile context |
262 | ldmia r1, { r1-r3 } @ r1 = buf->remcout, r2=p32[0], r3=p32[1] | 263 | ldmia r1, { r1-r3 } @ r1 = buf->remcout, r2=p32[0], r3=p32[1] |
263 | add r0, r0, #16 @ r0 = &state->vcl | 264 | ldmib r0, { r4-r8 } @ r4 = vcl, r5 = vcr, r6 = vdiff |
264 | ldmia r0, { r4-r8 } @ r4 = vcl, r5 = vcr, r6 = vdiff | ||
265 | @ r7 = coef1, r8 = coef2 | 265 | @ r7 = coef1, r8 = coef2 |
266 | .cfm_loop: | 266 | .cfm_loop: |
267 | ldr r12, [r2] @ r12 = lout | 267 | ldr r12, [r2] @ r12 = lout |
@@ -285,7 +285,7 @@ crossfeed_meier_process: | |||
285 | sub r5, r5, r12 @ r5 = vcr -= res2 | 285 | sub r5, r5, r12 @ r5 = vcr -= res2 |
286 | bgt .cfm_loop @ more samples? | 286 | bgt .cfm_loop @ more samples? |
287 | 287 | ||
288 | stmia r0, { r4-r6 } @ save vcl, vcr, vdiff | 288 | stmib r0, { r4-r6 } @ save vcl, vcr, vdiff |
289 | ldmpc regs=r4-r10 @ restore non-volatile context, return | 289 | ldmpc regs=r4-r10 @ restore non-volatile context, return |
290 | .size crossfeed_meier_process, .-crossfeed_meier_process | 290 | .size crossfeed_meier_process, .-crossfeed_meier_process |
291 | 291 | ||