summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndree Buschmann <AndreeBuschmann@t-online.de>2010-11-12 07:07:57 +0000
committerAndree Buschmann <AndreeBuschmann@t-online.de>2010-11-12 07:07:57 +0000
commitee610d47c7a636a465176cce52b932cdd26fd272 (patch)
tree054cdfcbc8d0674fbf12581ab140d4d21badcf27
parent3f4e0cf25b525f8acec950547ff7570db5c134a5 (diff)
downloadrockbox-ee610d47c7a636a465176cce52b932cdd26fd272.tar.gz
rockbox-ee610d47c7a636a465176cce52b932cdd26fd272.zip
Re-submit ARM asm optimizations in mpc synthesis as the performance regressions on S5L870x have been solved with r28561.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@28562 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/libmusepack/synth_filter_arm.S27
1 files changed, 10 insertions, 17 deletions
diff --git a/apps/codecs/libmusepack/synth_filter_arm.S b/apps/codecs/libmusepack/synth_filter_arm.S
index 598f218e45..9bd4e04626 100644
--- a/apps/codecs/libmusepack/synth_filter_arm.S
+++ b/apps/codecs/libmusepack/synth_filter_arm.S
@@ -164,7 +164,7 @@ mpc_decoder_windowing_D:
164 * r10 = lo, r11 = hi of 31..17 164 * r10 = lo, r11 = hi of 31..17
165 * r12 = V[31..16] 165 * r12 = V[31..16]
166 *****************************************/ 166 *****************************************/
167 mov lr, #15 167 mov lr, #15*8
168 add r12, r1, #30*4 /* r12 = V[31] */ 168 add r12, r1, #30*4 /* r12 = V[31] */
169.loop15: 169.loop15:
170 ldmia r2!, { r3-r6 } /* load D[00..03] */ 170 ldmia r2!, { r3-r6 } /* load D[00..03] */
@@ -238,21 +238,19 @@ mpc_decoder_windowing_D:
238 /* store Data[01..15] */ 238 /* store Data[01..15] */
239 mov r8, r8, lsr #16 239 mov r8, r8, lsr #16
240 orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */ 240 orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */
241 str r8, [r0] /* store Data */
242 /* store Data[31..17] */ 241 /* store Data[31..17] */
243 add r0, r0, lr, asl #3 /* r0 = r0 + 2*lr [words] */
244 mov r10, r10, lsr #16 242 mov r10, r10, lsr #16
245 orr r10, r10, r11, lsl #16 /* (lo>>16) || (hi<<16) */ 243 orr r10, r10, r11, lsl #16 /* (lo>>16) || (hi<<16) */
246 rsb r10, r10, #0 /* r10 = -r10 */ 244 rsb r10, r10, #0 /* r10 = -r10 */
247 str r10, [r0], #4 /* store Data */ 245 str r10, [r0, lr] /* store Data */
248 sub r0, r0, lr, asl #3 /* r0 = r0 - 2*lr [words] */ 246 str r8, [r0], #4 /* store Data */
249 /* correct adresses for next loop */ 247 /* correct adresses for next loop */
250 sub r12, r12, #4 /* r12 = V-- */ 248 sub r12, r12, #4 /* r12 = V-- */
251 add r1, r1, #4 /* r1 = V++ */ 249 add r1, r1, #4 /* r1 = V++ */
252 /* next loop */ 250 /* next loop */
253 subs lr, lr, #1 251 subs lr, lr, #8
254 bgt .loop15 252 bgt .loop15
255 253
256 /****************************************** 254 /******************************************
257 * V[16] with internal symmetry 255 * V[16] with internal symmetry
258 *****************************************/ 256 *****************************************/
@@ -293,7 +291,6 @@ mpc_decoder_windowing_D:
293 mov r8, r8, lsr #16 291 mov r8, r8, lsr #16
294 orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */ 292 orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */
295 str r8, [r0], #4 /* store Data */ 293 str r8, [r0], #4 /* store Data */
296 add r1, r1, #4 /* V++ */
297 294
298 ldmpc regs=r4-r11 295 ldmpc regs=r4-r11
299#elif ARM_ARCH < 6 /* arm9 and above */ 296#elif ARM_ARCH < 6 /* arm9 and above */
@@ -365,7 +362,7 @@ mpc_decoder_windowing_D:
365 * r10 = lo, r11 = hi of 31..17 362 * r10 = lo, r11 = hi of 31..17
366 * r12 = V[31..16] 363 * r12 = V[31..16]
367 *****************************************/ 364 *****************************************/
368 mov lr, #15 365 mov lr, #15*8
369 add r12, r1, #30*4 /* r12 = V[31] */ 366 add r12, r1, #30*4 /* r12 = V[31] */
370.loop15: 367.loop15:
371 ldmia r2!, { r3-r4 } /* load D[00..01] */ 368 ldmia r2!, { r3-r4 } /* load D[00..01] */
@@ -443,19 +440,17 @@ mpc_decoder_windowing_D:
443 /* store Data[01..15] */ 440 /* store Data[01..15] */
444 mov r8, r8, lsr #16 441 mov r8, r8, lsr #16
445 orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */ 442 orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */
446 str r8, [r0] /* store Data */
447 /* store Data[31..17] */ 443 /* store Data[31..17] */
448 add r0, r0, lr, asl #3 /* r0 = r0 + 2*lr [words] */
449 mov r10, r10, lsr #16 444 mov r10, r10, lsr #16
450 orr r10, r10, r11, lsl #16 /* (lo>>16) || (hi<<16) */ 445 orr r10, r10, r11, lsl #16 /* (lo>>16) || (hi<<16) */
451 rsb r10, r10, #0 /* r10 = -r10 */ 446 rsb r10, r10, #0 /* r10 = -r10 */
452 str r10, [r0], #4 /* store Data */ 447 str r10, [r0, lr] /* store Data */
453 sub r0, r0, lr, asl #3 /* r0 = r0 - 2*lr [words] */ 448 str r8, [r0], #4 /* store Data */
454 /* correct adresses for next loop */ 449 /* correct adresses for next loop */
455 sub r12, r12, #4 /* r12 = V-- */ 450 sub r12, r12, #4 /* r12 = V-- */
456 add r1, r1, #4 /* r1 = V++ */ 451 add r1, r1, #4 /* r1 = V++ */
457 /* next loop */ 452 /* next loop */
458 subs lr, lr, #1 453 subs lr, lr, #8
459 bgt .loop15 454 bgt .loop15
460 455
461 /****************************************** 456 /******************************************
@@ -498,7 +493,6 @@ mpc_decoder_windowing_D:
498 mov r8, r8, lsr #16 493 mov r8, r8, lsr #16
499 orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */ 494 orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */
500 str r8, [r0], #4 /* store Data */ 495 str r8, [r0], #4 /* store Data */
501 add r1, r1, #4 /* V++ */
502 496
503 ldmpc regs=r4-r11 497 ldmpc regs=r4-r11
504#else 498#else
@@ -645,11 +639,10 @@ mpc_decoder_windowing_D:
645 rsb r11, r11, #0 /* r11 = -r11 */ 639 rsb r11, r11, #0 /* r11 = -r11 */
646 /* store Data[01..15] */ 640 /* store Data[01..15] */
647 mov r9, r9, lsl #2 641 mov r9, r9, lsl #2
648 str r9, [r0] /* store Data */
649 /* store Data[31..17] */ 642 /* store Data[31..17] */
650 mov r11, r11, lsl #2 643 mov r11, r11, lsl #2
651 str r11, [r0, lr] /* store Data */ 644 str r11, [r0, lr] /* store Data */
652 add r0, r0, #4 /* r0++ */ 645 str r9, [r0], #4 /* store Data */
653 /* next loop */ 646 /* next loop */
654 subs lr, lr, #8 647 subs lr, lr, #8
655 bgt .loop15 648 bgt .loop15