diff options
author | Andree Buschmann <AndreeBuschmann@t-online.de> | 2010-11-12 07:07:57 +0000 |
---|---|---|
committer | Andree Buschmann <AndreeBuschmann@t-online.de> | 2010-11-12 07:07:57 +0000 |
commit | ee610d47c7a636a465176cce52b932cdd26fd272 (patch) | |
tree | 054cdfcbc8d0674fbf12581ab140d4d21badcf27 /apps/codecs | |
parent | 3f4e0cf25b525f8acec950547ff7570db5c134a5 (diff) | |
download | rockbox-ee610d47c7a636a465176cce52b932cdd26fd272.tar.gz rockbox-ee610d47c7a636a465176cce52b932cdd26fd272.zip |
Re-submit ARM asm optimizations in mpc synthesis as the performance regressions on S5L870x have been solved with r28561.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@28562 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs')
-rw-r--r-- | apps/codecs/libmusepack/synth_filter_arm.S | 27 |
1 files changed, 10 insertions, 17 deletions
diff --git a/apps/codecs/libmusepack/synth_filter_arm.S b/apps/codecs/libmusepack/synth_filter_arm.S index 598f218e45..9bd4e04626 100644 --- a/apps/codecs/libmusepack/synth_filter_arm.S +++ b/apps/codecs/libmusepack/synth_filter_arm.S | |||
@@ -164,7 +164,7 @@ mpc_decoder_windowing_D: | |||
164 | * r10 = lo, r11 = hi of 31..17 | 164 | * r10 = lo, r11 = hi of 31..17 |
165 | * r12 = V[31..16] | 165 | * r12 = V[31..16] |
166 | *****************************************/ | 166 | *****************************************/ |
167 | mov lr, #15 | 167 | mov lr, #15*8 |
168 | add r12, r1, #30*4 /* r12 = V[31] */ | 168 | add r12, r1, #30*4 /* r12 = V[31] */ |
169 | .loop15: | 169 | .loop15: |
170 | ldmia r2!, { r3-r6 } /* load D[00..03] */ | 170 | ldmia r2!, { r3-r6 } /* load D[00..03] */ |
@@ -238,21 +238,19 @@ mpc_decoder_windowing_D: | |||
238 | /* store Data[01..15] */ | 238 | /* store Data[01..15] */ |
239 | mov r8, r8, lsr #16 | 239 | mov r8, r8, lsr #16 |
240 | orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */ | 240 | orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */ |
241 | str r8, [r0] /* store Data */ | ||
242 | /* store Data[31..17] */ | 241 | /* store Data[31..17] */ |
243 | add r0, r0, lr, asl #3 /* r0 = r0 + 2*lr [words] */ | ||
244 | mov r10, r10, lsr #16 | 242 | mov r10, r10, lsr #16 |
245 | orr r10, r10, r11, lsl #16 /* (lo>>16) || (hi<<16) */ | 243 | orr r10, r10, r11, lsl #16 /* (lo>>16) || (hi<<16) */ |
246 | rsb r10, r10, #0 /* r10 = -r10 */ | 244 | rsb r10, r10, #0 /* r10 = -r10 */ |
247 | str r10, [r0], #4 /* store Data */ | 245 | str r10, [r0, lr] /* store Data */ |
248 | sub r0, r0, lr, asl #3 /* r0 = r0 - 2*lr [words] */ | 246 | str r8, [r0], #4 /* store Data */ |
249 | /* correct adresses for next loop */ | 247 | /* correct adresses for next loop */ |
250 | sub r12, r12, #4 /* r12 = V-- */ | 248 | sub r12, r12, #4 /* r12 = V-- */ |
251 | add r1, r1, #4 /* r1 = V++ */ | 249 | add r1, r1, #4 /* r1 = V++ */ |
252 | /* next loop */ | 250 | /* next loop */ |
253 | subs lr, lr, #1 | 251 | subs lr, lr, #8 |
254 | bgt .loop15 | 252 | bgt .loop15 |
255 | 253 | ||
256 | /****************************************** | 254 | /****************************************** |
257 | * V[16] with internal symmetry | 255 | * V[16] with internal symmetry |
258 | *****************************************/ | 256 | *****************************************/ |
@@ -293,7 +291,6 @@ mpc_decoder_windowing_D: | |||
293 | mov r8, r8, lsr #16 | 291 | mov r8, r8, lsr #16 |
294 | orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */ | 292 | orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */ |
295 | str r8, [r0], #4 /* store Data */ | 293 | str r8, [r0], #4 /* store Data */ |
296 | add r1, r1, #4 /* V++ */ | ||
297 | 294 | ||
298 | ldmpc regs=r4-r11 | 295 | ldmpc regs=r4-r11 |
299 | #elif ARM_ARCH < 6 /* arm9 and above */ | 296 | #elif ARM_ARCH < 6 /* arm9 and above */ |
@@ -365,7 +362,7 @@ mpc_decoder_windowing_D: | |||
365 | * r10 = lo, r11 = hi of 31..17 | 362 | * r10 = lo, r11 = hi of 31..17 |
366 | * r12 = V[31..16] | 363 | * r12 = V[31..16] |
367 | *****************************************/ | 364 | *****************************************/ |
368 | mov lr, #15 | 365 | mov lr, #15*8 |
369 | add r12, r1, #30*4 /* r12 = V[31] */ | 366 | add r12, r1, #30*4 /* r12 = V[31] */ |
370 | .loop15: | 367 | .loop15: |
371 | ldmia r2!, { r3-r4 } /* load D[00..01] */ | 368 | ldmia r2!, { r3-r4 } /* load D[00..01] */ |
@@ -443,19 +440,17 @@ mpc_decoder_windowing_D: | |||
443 | /* store Data[01..15] */ | 440 | /* store Data[01..15] */ |
444 | mov r8, r8, lsr #16 | 441 | mov r8, r8, lsr #16 |
445 | orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */ | 442 | orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */ |
446 | str r8, [r0] /* store Data */ | ||
447 | /* store Data[31..17] */ | 443 | /* store Data[31..17] */ |
448 | add r0, r0, lr, asl #3 /* r0 = r0 + 2*lr [words] */ | ||
449 | mov r10, r10, lsr #16 | 444 | mov r10, r10, lsr #16 |
450 | orr r10, r10, r11, lsl #16 /* (lo>>16) || (hi<<16) */ | 445 | orr r10, r10, r11, lsl #16 /* (lo>>16) || (hi<<16) */ |
451 | rsb r10, r10, #0 /* r10 = -r10 */ | 446 | rsb r10, r10, #0 /* r10 = -r10 */ |
452 | str r10, [r0], #4 /* store Data */ | 447 | str r10, [r0, lr] /* store Data */ |
453 | sub r0, r0, lr, asl #3 /* r0 = r0 - 2*lr [words] */ | 448 | str r8, [r0], #4 /* store Data */ |
454 | /* correct adresses for next loop */ | 449 | /* correct adresses for next loop */ |
455 | sub r12, r12, #4 /* r12 = V-- */ | 450 | sub r12, r12, #4 /* r12 = V-- */ |
456 | add r1, r1, #4 /* r1 = V++ */ | 451 | add r1, r1, #4 /* r1 = V++ */ |
457 | /* next loop */ | 452 | /* next loop */ |
458 | subs lr, lr, #1 | 453 | subs lr, lr, #8 |
459 | bgt .loop15 | 454 | bgt .loop15 |
460 | 455 | ||
461 | /****************************************** | 456 | /****************************************** |
@@ -498,7 +493,6 @@ mpc_decoder_windowing_D: | |||
498 | mov r8, r8, lsr #16 | 493 | mov r8, r8, lsr #16 |
499 | orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */ | 494 | orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */ |
500 | str r8, [r0], #4 /* store Data */ | 495 | str r8, [r0], #4 /* store Data */ |
501 | add r1, r1, #4 /* V++ */ | ||
502 | 496 | ||
503 | ldmpc regs=r4-r11 | 497 | ldmpc regs=r4-r11 |
504 | #else | 498 | #else |
@@ -645,11 +639,10 @@ mpc_decoder_windowing_D: | |||
645 | rsb r11, r11, #0 /* r11 = -r11 */ | 639 | rsb r11, r11, #0 /* r11 = -r11 */ |
646 | /* store Data[01..15] */ | 640 | /* store Data[01..15] */ |
647 | mov r9, r9, lsl #2 | 641 | mov r9, r9, lsl #2 |
648 | str r9, [r0] /* store Data */ | ||
649 | /* store Data[31..17] */ | 642 | /* store Data[31..17] */ |
650 | mov r11, r11, lsl #2 | 643 | mov r11, r11, lsl #2 |
651 | str r11, [r0, lr] /* store Data */ | 644 | str r11, [r0, lr] /* store Data */ |
652 | add r0, r0, #4 /* r0++ */ | 645 | str r9, [r0], #4 /* store Data */ |
653 | /* next loop */ | 646 | /* next loop */ |
654 | subs lr, lr, #8 | 647 | subs lr, lr, #8 |
655 | bgt .loop15 | 648 | bgt .loop15 |