summaryrefslogtreecommitdiff
path: root/lib/rbcodec/dsp/dsp_arm.S
diff options
context:
space:
mode:
Diffstat (limited to 'lib/rbcodec/dsp/dsp_arm.S')
-rw-r--r--lib/rbcodec/dsp/dsp_arm.S84
1 files changed, 42 insertions, 42 deletions
diff --git a/lib/rbcodec/dsp/dsp_arm.S b/lib/rbcodec/dsp/dsp_arm.S
index ed58bed340..16394b8690 100644
--- a/lib/rbcodec/dsp/dsp_arm.S
+++ b/lib/rbcodec/dsp/dsp_arm.S
@@ -196,55 +196,56 @@ crossfeed_process:
196 @ to keep the count on the stack :/ 196 @ to keep the count on the stack :/
197 ldr r1, [r1] @ r1 = buf = *buf_p; 197 ldr r1, [r1] @ r1 = buf = *buf_p;
198 stmfd sp!, { r4-r11, lr } @ stack modified regs 198 stmfd sp!, { r4-r11, lr } @ stack modified regs
199 ldr r12, [r1] @ r12 = buf->remcount 199 ldr r0, [r0] @ r0 = this->data = &crossfeed_state
200 ldr r14, [r0] @ r14 = this->data = &crossfeed_state 200 ldmia r1, { r1-r3 } @ r1 = buf->remcount, r2 = buf->p32[0],
201 ldmib r1, { r2-r3 } @ r2 = buf->p32[0], r3 = buf->p32[1] 201 @ r3 = buf->p32[1]
202 ldmia r14!, { r4-r11 } @ load direct gain and filter data 202 ldmia r0, { r4-r12, r14 } @ r4 = gain, r5-r7 = coeffs,
203 add r0, r14, #13*2*4 @ calculate end of delay 203 @ r8-r11 = history, r12 = index,
204 stmfd sp!, { r0, r12 } @ stack end of delay adr, count and state 204 @ r14 = index_max
205 ldr r0, [r0] @ fetch current delay line address 205 add r0, r0, #0x28 @ r0 = state->delay
206 206 stmfd sp!, { r0-r1, r14 } @ stack state->delay, count, index_max
207 /* Register usage in loop: 207
208 * r0 = &delay[index][0], r1 = accumulator high, r2 = buf->p32[0], 208 /* Register usage in loop:
209 * r0 = acc low/count, r1 = acc high, r2 = buf->p32[0],
209 * r3 = buf->p32[1], r4 = direct gain, r5-r7 = b0, b1, a1 (filter coefs), 210 * r3 = buf->p32[1], r4 = direct gain, r5-r7 = b0, b1, a1 (filter coefs),
210 * r8-r11 = filter history, r12 = temp, r14 = accumulator low 211 * r8 = dr[n-1], r9 = y_r[n-1], r10 = dl[n-1], r11 = y_l[n-1],
212 * r12 = index, r14 = scratch/index_max
211 */ 213 */
212.cfloop: 214.cfloop:
213 smull r14, r1, r6, r8 @ acc = b1*dr[n - 1] 215 smull r0, r1, r6, r8 @ acc = b1*dr[n - 1]
214 smlal r14, r1, r7, r9 @ acc += a1*y_l[n - 1] 216 ldr r8, [r12, #4] @ r8 = dr[n]
215 ldr r8, [r0, #4] @ r8 = dr[n] 217 smlal r0, r1, r7, r9 @ acc += a1*y_r[n - 1]
216 smlal r14, r1, r5, r8 @ acc += b0*dr[n] 218 smlal r0, r1, r5, r8 @ acc += b0*dr[n]
217 mov r9, r1, lsl #1 @ fix format for filter history 219 ldr r14, [r2] @ load left input: x_l[n]
218 ldr r12, [r2] @ load left input 220 mov r9, r1, asl #1 @ fix format for filter history
219 smlal r14, r1, r4, r12 @ acc += gain*x_l[n] 221 smlal r0, r1, r4, r14 @ acc += gain*x_l[n]
220 mov r1, r1, lsl #1 @ fix format 222 mov r1, r1, asl #1 @ fix format
221 str r1, [r2], #4 @ save result 223 str r1, [r2], #4 @ save result
222 224 smull r0, r1, r6, r10 @ acc = b1*dl[n - 1]
223 smull r14, r1, r6, r10 @ acc = b1*dl[n - 1] 225 ldr r10, [r12] @ r10 = dl[n]
224 smlal r14, r1, r7, r11 @ acc += a1*y_r[n - 1] 226 smlal r0, r1, r7, r11 @ acc += a1*y_l[n - 1]
225 ldr r10, [r0] @ r10 = dl[n] 227 smlal r0, r1, r5, r10 @ acc += b0*dl[n]
226 str r12, [r0], #4 @ save left input to delay line 228 str r14, [r12], #4 @ save left input to delay line
227 smlal r14, r1, r5, r10 @ acc += b0*dl[n] 229 ldr r14, [r3] @ load right input: x_r[n]
228 mov r11, r1, lsl #1 @ fix format for filter history 230 mov r11, r1, asl #1 @ fix format for filter history
229 ldr r12, [r3] @ load right input 231 smlal r0, r1, r4, r14 @ acc += gain*x_r[n]
230 smlal r14, r1, r4, r12 @ acc += gain*x_r[n] 232 str r14, [r12], #4 @ save right input to delay line
231 str r12, [r0], #4 @ save right input to delay line 233 ldmib sp, { r0, r14 } @ fetch count and delay end
232 mov r1, r1, lsl #1 @ fix format 234 mov r1, r1, asl #1 @ fix format
233 ldmia sp, { r12, r14 } @ fetch delay line end addr and count from stack
234 str r1, [r3], #4 @ save result 235 str r1, [r3], #4 @ save result
235 236
236 cmp r0, r12 @ need to wrap to start of delay? 237 cmp r12, r14 @ need to wrap to start of delay?
237 subhs r0, r12, #13*2*4 @ wrap back delay line ptr to start 238 ldrhs r12, [sp] @ wrap delay index
238 239
239 subs r14, r14, #1 @ are we finished? 240 subs r0, r0, #1 @ are we finished?
240 strgt r14, [sp, #4] @ nope, save count back to stack 241 strgt r0, [sp, #4] @ save count to stack
241 bgt .cfloop 242 bgt .cfloop
242 243
243 @ save data back to struct 244 @ save data back to struct
244 str r0, [r12] @ save delay line index 245 ldr r0, [sp] @ fetch state->delay
245 sub r12, r12, #13*2*4 + 4*4 @ r12 = data->history 246 sub r0, r0, #0x18 @ save filter history and delay index
246 stmia r12, { r8-r11 } @ save filter history 247 stmia r0, { r8-r12 } @
247 add sp, sp, #8 @ remove temp variables from stack 248 add sp, sp, #12 @ remove temp variables from stack
248 ldmpc regs=r4-r11 249 ldmpc regs=r4-r11
249 .size crossfeed_process, .-crossfeed_process 250 .size crossfeed_process, .-crossfeed_process
250 251
@@ -260,8 +261,7 @@ crossfeed_meier_process:
260 ldr r0, [r0] @ r0 = this->data = &crossfeed_state 261 ldr r0, [r0] @ r0 = this->data = &crossfeed_state
261 stmfd sp!, { r4-r10, lr } @ stack non-volatile context 262 stmfd sp!, { r4-r10, lr } @ stack non-volatile context
262 ldmia r1, { r1-r3 } @ r1 = buf->remcout, r2=p32[0], r3=p32[1] 263 ldmia r1, { r1-r3 } @ r1 = buf->remcout, r2=p32[0], r3=p32[1]
263 add r0, r0, #16 @ r0 = &state->vcl 264 ldmib r0, { r4-r8 } @ r4 = vcl, r5 = vcr, r6 = vdiff
264 ldmia r0, { r4-r8 } @ r4 = vcl, r5 = vcr, r6 = vdiff
265 @ r7 = coef1, r8 = coef2 265 @ r7 = coef1, r8 = coef2
266.cfm_loop: 266.cfm_loop:
267 ldr r12, [r2] @ r12 = lout 267 ldr r12, [r2] @ r12 = lout
@@ -285,7 +285,7 @@ crossfeed_meier_process:
285 sub r5, r5, r12 @ r5 = vcr -= res2 285 sub r5, r5, r12 @ r5 = vcr -= res2
286 bgt .cfm_loop @ more samples? 286 bgt .cfm_loop @ more samples?
287 287
288 stmia r0, { r4-r6 } @ save vcl, vcr, vdiff 288 stmib r0, { r4-r6 } @ save vcl, vcr, vdiff
289 ldmpc regs=r4-r10 @ restore non-volatile context, return 289 ldmpc regs=r4-r10 @ restore non-volatile context, return
290 .size crossfeed_meier_process, .-crossfeed_meier_process 290 .size crossfeed_meier_process, .-crossfeed_meier_process
291 291