summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNils Wallménius <nils@rockbox.org>2010-11-05 11:49:30 +0000
committerNils Wallménius <nils@rockbox.org>2010-11-05 11:49:30 +0000
commit9d98583a181bdab20263262d121130111cf3be86 (patch)
tree3c9f306a4f045a55e3bc31df3626f978f7c7c69e
parentdbdc0a8a8cbfa4e6b72e5f6fb643f5b0ef4afc27 (diff)
downloadrockbox-9d98583a181bdab20263262d121130111cf3be86.tar.gz
rockbox-9d98583a181bdab20263262d121130111cf3be86.zip
libmusepack: tiny optimization of the ARMv4 mpc_decoder_windowing_D implementations, using register indexed addressing to store data, saving one instruction in the loop and deleting one instruction adter the loop which isn't needed.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@28488 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/libmusepack/synth_filter_arm.S22
1 files changed, 9 insertions, 13 deletions
diff --git a/apps/codecs/libmusepack/synth_filter_arm.S b/apps/codecs/libmusepack/synth_filter_arm.S
index 598f218e45..521d690e28 100644
--- a/apps/codecs/libmusepack/synth_filter_arm.S
+++ b/apps/codecs/libmusepack/synth_filter_arm.S
@@ -164,7 +164,7 @@ mpc_decoder_windowing_D:
164 * r10 = lo, r11 = hi of 31..17 164 * r10 = lo, r11 = hi of 31..17
165 * r12 = V[31..16] 165 * r12 = V[31..16]
166 *****************************************/ 166 *****************************************/
167 mov lr, #15 167 mov lr, #15*8
168 add r12, r1, #30*4 /* r12 = V[31] */ 168 add r12, r1, #30*4 /* r12 = V[31] */
169.loop15: 169.loop15:
170 ldmia r2!, { r3-r6 } /* load D[00..03] */ 170 ldmia r2!, { r3-r6 } /* load D[00..03] */
@@ -240,19 +240,18 @@ mpc_decoder_windowing_D:
240 orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */ 240 orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */
241 str r8, [r0] /* store Data */ 241 str r8, [r0] /* store Data */
242 /* store Data[31..17] */ 242 /* store Data[31..17] */
243 add r0, r0, lr, asl #3 /* r0 = r0 + 2*lr [words] */
244 mov r10, r10, lsr #16 243 mov r10, r10, lsr #16
245 orr r10, r10, r11, lsl #16 /* (lo>>16) || (hi<<16) */ 244 orr r10, r10, r11, lsl #16 /* (lo>>16) || (hi<<16) */
246 rsb r10, r10, #0 /* r10 = -r10 */ 245 rsb r10, r10, #0 /* r10 = -r10 */
247 str r10, [r0], #4 /* store Data */ 246 str r10, [r0, lr] /* store Data */
248 sub r0, r0, lr, asl #3 /* r0 = r0 - 2*lr [words] */ 247 add r0, r0, #4 /* r0++ */
249 /* correct adresses for next loop */ 248 /* correct adresses for next loop */
250 sub r12, r12, #4 /* r12 = V-- */ 249 sub r12, r12, #4 /* r12 = V-- */
251 add r1, r1, #4 /* r1 = V++ */ 250 add r1, r1, #4 /* r1 = V++ */
252 /* next loop */ 251 /* next loop */
253 subs lr, lr, #1 252 subs lr, lr, #8
254 bgt .loop15 253 bgt .loop15
255 254
256 /****************************************** 255 /******************************************
257 * V[16] with internal symmetry 256 * V[16] with internal symmetry
258 *****************************************/ 257 *****************************************/
@@ -293,7 +292,6 @@ mpc_decoder_windowing_D:
293 mov r8, r8, lsr #16 292 mov r8, r8, lsr #16
294 orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */ 293 orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */
295 str r8, [r0], #4 /* store Data */ 294 str r8, [r0], #4 /* store Data */
296 add r1, r1, #4 /* V++ */
297 295
298 ldmpc regs=r4-r11 296 ldmpc regs=r4-r11
299#elif ARM_ARCH < 6 /* arm9 and above */ 297#elif ARM_ARCH < 6 /* arm9 and above */
@@ -365,7 +363,7 @@ mpc_decoder_windowing_D:
365 * r10 = lo, r11 = hi of 31..17 363 * r10 = lo, r11 = hi of 31..17
366 * r12 = V[31..16] 364 * r12 = V[31..16]
367 *****************************************/ 365 *****************************************/
368 mov lr, #15 366 mov lr, #15*8
369 add r12, r1, #30*4 /* r12 = V[31] */ 367 add r12, r1, #30*4 /* r12 = V[31] */
370.loop15: 368.loop15:
371 ldmia r2!, { r3-r4 } /* load D[00..01] */ 369 ldmia r2!, { r3-r4 } /* load D[00..01] */
@@ -445,17 +443,16 @@ mpc_decoder_windowing_D:
445 orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */ 443 orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */
446 str r8, [r0] /* store Data */ 444 str r8, [r0] /* store Data */
447 /* store Data[31..17] */ 445 /* store Data[31..17] */
448 add r0, r0, lr, asl #3 /* r0 = r0 + 2*lr [words] */
449 mov r10, r10, lsr #16 446 mov r10, r10, lsr #16
450 orr r10, r10, r11, lsl #16 /* (lo>>16) || (hi<<16) */ 447 orr r10, r10, r11, lsl #16 /* (lo>>16) || (hi<<16) */
451 rsb r10, r10, #0 /* r10 = -r10 */ 448 rsb r10, r10, #0 /* r10 = -r10 */
452 str r10, [r0], #4 /* store Data */ 449 str r10, [r0, lr] /* store Data */
453 sub r0, r0, lr, asl #3 /* r0 = r0 - 2*lr [words] */ 450 add r0, r0, #4 /* r0++ */
454 /* correct adresses for next loop */ 451 /* correct adresses for next loop */
455 sub r12, r12, #4 /* r12 = V-- */ 452 sub r12, r12, #4 /* r12 = V-- */
456 add r1, r1, #4 /* r1 = V++ */ 453 add r1, r1, #4 /* r1 = V++ */
457 /* next loop */ 454 /* next loop */
458 subs lr, lr, #1 455 subs lr, lr, #8
459 bgt .loop15 456 bgt .loop15
460 457
461 /****************************************** 458 /******************************************
@@ -498,7 +495,6 @@ mpc_decoder_windowing_D:
498 mov r8, r8, lsr #16 495 mov r8, r8, lsr #16
499 orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */ 496 orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */
500 str r8, [r0], #4 /* store Data */ 497 str r8, [r0], #4 /* store Data */
501 add r1, r1, #4 /* V++ */
502 498
503 ldmpc regs=r4-r11 499 ldmpc regs=r4-r11
504#else 500#else