From 4343011955bffaf61b002a89b0137d48e873d9d0 Mon Sep 17 00:00:00 2001 From: Thomas Martitz Date: Fri, 13 Jan 2012 16:08:52 +0100 Subject: libmad: Get rid of some bad trickery with the stack pointer. Using the stack pointer for anything else than pointing to the current stack can have in very bad effects, especially on hosted platforms (e.g. when mixed with signals). Remove this at very slight performance cost. --- apps/codecs/libmad/synth_full_arm.S | 288 ++++++++++++++++++------------------ 1 file changed, 141 insertions(+), 147 deletions(-) diff --git a/apps/codecs/libmad/synth_full_arm.S b/apps/codecs/libmad/synth_full_arm.S index 27383ed3d0..0a4f9b93c2 100644 --- a/apps/codecs/libmad/synth_full_arm.S +++ b/apps/codecs/libmad/synth_full_arm.S @@ -27,21 +27,21 @@ .global synth_full_odd_sbsample .global synth_full_even_sbsample - ;; r0 = pcm + /* + ;; r0 = pcm (pushed on the stack to free a register) ;; r1 = fo ;; r2 = fe ;; r3 = D0ptr ;; r4 = D1ptr - /*;; r5 = loop counter + ;; r5 = loop counter ;; r6,r7 accumulator1 - ;; r8,r9 accumulator2 */ + ;; r8,r9 accumulator2 + */ synth_full_odd_sbsample: - stmdb sp!, {r4-r11, lr} - ldr r4, [sp, #36] - ldr r5, =synth_full_sp - str sp, [r5] + stmdb sp!, {r0, r4-r11, lr} + ldr r4, [sp, #40] mov r5, #15 add r2, r2, #32 .l: @@ -49,85 +49,87 @@ synth_full_odd_sbsample: add r3, r3, #128 add r4, r4, #128 ldr r7, [r3, #4] - ldmia r1!, {r10, r11, r12, lr} + ldmia r1!, {r0, r10, r11, lr} ldr r9, [r4, #120] - smull r6, r7, r10, r7 - ldr sp, [r3, #60] - smull r8, r9, r10, r9 - ldr r10, [r3, #52] - smlal r6, r7, r11, sp - ldr sp, [r3, #44] - smlal r6, r7, r12, r10 - ldr r10, [r4, #64] - smlal r6, r7, lr, sp - ldr sp, [r4, #72] - smlal r8, r9, r11, r10 - ldr r10, [r4, #80] - smlal r8, r9, r12, sp - smlal r8, r9, lr, r10 - ldr r10, [r3, #36] + smull r6, r7, r0, r7 + ldr r12, [r3, #60] + smull r8, r9, r0, r9 + ldr r0, [r3, #52] + smlal r6, r7, r10, r12 + ldr r12, [r3, #44] + smlal r6, r7, r11, r0 + ldr r0, [r4, #64] + smlal r6, r7, lr, r12 + ldr r12, [r4, #72] + smlal r8, r9, r10, r0 + ldr r0, [r4, #80] + smlal r8, r9, r11, r12 + smlal r8, r9, lr, r0 + ldr r0, [r3, #36] - ldmia r1!, {r11, r12, sp, lr} - smlal r6, r7, r11, r10 + ldmia r1!, {r10, r11, r12, lr} + smlal r6, r7, r10, r0 - ldr r10, [r4, #88] /*;;1 cycle stall on arm9, but we free up r11*/ - smlal r8, r9, r11, r10 + ldr r0, [r4, #88] /*;;1 cycle stall on arm9, but we free up r10*/ + smlal r8, r9, r10, r0 - ldr r10, [r3, #28] - ldr r11, [r3, #20] + ldr r0, [r3, #28] + ldr r10, [r3, #20] + smlal r6, r7, r11, r0 + ldr r0, [r3, #12] smlal r6, r7, r12, r10 - ldr r10, [r3, #12] - smlal r6, r7, sp, r11 - ldr r11, [r4, #96] - smlal r6, r7, lr, r10 - ldr r10, [r4, #104] - smlal r8, r9, r12, r11 - ldr r11, [r4, #112] - smlal r8, r9, sp, r10 - smlal r8, r9, lr, r11 + ldr r10, [r4, #96] + smlal r6, r7, lr, r0 + ldr r0, [r4, #104] + smlal r8, r9, r11, r10 + ldr r10, [r4, #112] + smlal r8, r9, r12, r0 + smlal r8, r9, lr, r10 rsbs r6, r6, #0 rsc r7, r7, #0 /* ;; PROD_A and even half of SB_SAMPLE*/ - ldr r10, [r3, #0] - ldmia r2!, {r11, r12, sp, lr} - smlal r6, r7, r11, r10 + ldr r0, [r3, #0] + ldmia r2!, {r10, r11, r12, lr} + smlal r6, r7, r10, r0 - ldr r10, [r4, #60] /*;;1 cycle stall on arm9, but we free up r11*/ - smlal r8, r9, r11, r10 - ldr r11, [r3, #56] - ldr r10, [r3, #48] - smlal r6, r7, r12, r11 - ldr r11, [r3, #40] - smlal r6, r7, sp, r10 - ldr r10, [r4, #68] - smlal r6, r7, lr, r11 - ldr r11, [r4, #76] + ldr r0, [r4, #60] /*;;1 cycle stall on arm9, but we free up r10*/ + smlal r8, r9, r10, r0 + ldr r10, [r3, #56] + ldr r0, [r3, #48] + smlal r6, r7, r11, r10 + ldr r10, [r3, #40] + smlal r6, r7, r12, r0 + ldr r0, [r4, #68] + smlal r6, r7, lr, r10 + ldr r10, [r4, #76] + smlal r8, r9, r11, r0 + ldr r0, [r4, #84] smlal r8, r9, r12, r10 - ldr r10, [r4, #84] - smlal r8, r9, sp, r11 - smlal r8, r9, lr, r10 + smlal r8, r9, lr, r0 - ldr r10, [r3, #32] - ldmia r2!, {r11, r12, sp, lr} - smlal r6, r7, r11, r10 + ldr r0, [r3, #32] + ldmia r2!, {r10, r11, r12, lr} + smlal r6, r7, r10, r0 - ldr r10, [r4, #92] /*;;1 cycle stall on arm9, but we free up r11*/ - smlal r8, r9, r11, r10 + ldr r0, [r4, #92] /*;;1 cycle stall on arm9, but we free up r10*/ + smlal r8, r9, r10, r0 - ldr r10, [r3, #24] - ldr r11, [r3, #16] + ldr r0, [r3, #24] + ldr r10, [r3, #16] + smlal r6, r7, r11, r0 + ldr r0, [r3, #8] smlal r6, r7, r12, r10 - ldr r10, [r3, #8] - smlal r6, r7, sp, r11 - ldr r11, [r4, #100] - smlal r6, r7, lr, r10 - ldr r10, [r4, #108] - smlal r8, r9, r12, r11 - ldr r11, [r4, #116] - smlal r8, r9, sp, r10 - smlal r8, r9, lr, r11 + ldr r10, [r4, #100] + smlal r6, r7, lr, r0 + ldr r0, [r4, #108] + smlal r8, r9, r11, r10 + ldr r10, [r4, #116] + smlal r8, r9, r12, r0 + smlal r8, r9, lr, r10 + + ldr r0, [sp] movs r6, r6, lsr #16 adc r6, r6, r7, lsl #16 @@ -140,15 +142,11 @@ synth_full_odd_sbsample: subs r5, r5, #1 bne .l - ldr r5, =synth_full_sp - ldr sp, [r5] - ldmpc regs=r4-r11 + ldmpc regs="r0,r4-r11" synth_full_even_sbsample: - stmdb sp!, {r4-r11, lr} - ldr r4, [sp, #36] - ldr r5, =synth_full_sp - str sp, [r5] + stmdb sp!, {r0, r4-r11, lr} + ldr r4, [sp, #40] mov r5, #15 add r2, r2, #32 .l2: @@ -156,84 +154,86 @@ synth_full_even_sbsample: add r3, r3, #128 add r4, r4, #128 ldr r7, [r3, #0] - ldmia r1!, {r10, r11, r12, lr} + ldmia r1!, {r0, r10, r11, lr} ldr r9, [r4, #60] - smull r6, r7, r10, r7 - ldr sp, [r3, #56] - smull r8, r9, r10, r9 - ldr r10, [r3, #48] - smlal r6, r7, r11, sp - ldr sp, [r3, #40] - smlal r6, r7, r12, r10 - ldr r10, [r4, #68] - smlal r6, r7, lr, sp + smull r6, r7, r0, r7 + ldr r12, [r3, #56] + smull r8, r9, r0, r9 + ldr r0, [r3, #48] + smlal r6, r7, r10, r12 + ldr r12, [r3, #40] + smlal r6, r7, r11, r0 + ldr r0, [r4, #68] + smlal r6, r7, lr, r12 - ldr sp, [r4, #76] - smlal r8, r9, r11, r10 - ldr r10, [r4, #84] - smlal r8, r9, r12, sp - smlal r8, r9, lr, r10 + ldr r12, [r4, #76] + smlal r8, r9, r10, r0 + ldr r0, [r4, #84] + smlal r8, r9, r11, r12 + smlal r8, r9, lr, r0 - ldr r10, [r3, #32] - ldmia r1!, {r11, r12, sp, lr} + ldr r0, [r3, #32] + ldmia r1!, {r10, r11, r12, lr} - smlal r6, r7, r11, r10 - ldr r10, [r4, #92] - smlal r8, r9, r11, r10 - ldr r10, [r3, #24] - ldr r11, [r3, #16] + smlal r6, r7, r10, r0 + ldr r0, [r4, #92] + smlal r8, r9, r10, r0 + ldr r0, [r3, #24] + ldr r10, [r3, #16] + smlal r6, r7, r11, r0 + ldr r0, [r3, #8] smlal r6, r7, r12, r10 - ldr r10, [r3, #8] - smlal r6, r7, sp, r11 - ldr r11, [r4, #100] - smlal r6, r7, lr, r10 - ldr r10, [r4, #108] - smlal r8, r9, r12, r11 - ldr r11, [r4, #116] - smlal r8, r9, sp, r10 - smlal r8, r9, lr, r11 + ldr r10, [r4, #100] + smlal r6, r7, lr, r0 + ldr r0, [r4, #108] + smlal r8, r9, r11, r10 + ldr r10, [r4, #116] + smlal r8, r9, r12, r0 + smlal r8, r9, lr, r10 rsbs r6, r6, #0 rsc r7, r7, #0 - ldr r10, [r3, #4] - ldmia r2!, {r11, r12, sp, lr} - smlal r6, r7, r11, r10 - ldr r10, [r4, #120] /*;;1 cycle stall on arm9, but we free up r11*/ - smlal r8, r9, r11, r10 - ldr r10, [r3, #60] - ldr r11, [r3, #52] + ldr r0, [r3, #4] + ldmia r2!, {r10, r11, r12, lr} + smlal r6, r7, r10, r0 + ldr r0, [r4, #120] /*;;1 cycle stall on arm9, but we free up r10*/ + smlal r8, r9, r10, r0 + ldr r0, [r3, #60] + ldr r10, [r3, #52] + smlal r6, r7, r11, r0 + ldr r0, [r3, #44] smlal r6, r7, r12, r10 - ldr r10, [r3, #44] - smlal r6, r7, sp, r11 - ldr r11, [r4, #64] - smlal r6, r7, lr, r10 + ldr r10, [r4, #64] + smlal r6, r7, lr, r0 - ldr r10, [r4, #72] - smlal r8, r9, r12, r11 - ldr r11, [r4, #80] - smlal r8, r9, sp, r10 + ldr r0, [r4, #72] + smlal r8, r9, r11, r10 + ldr r10, [r4, #80] + smlal r8, r9, r12, r0 - smlal r8, r9, lr, r11 + smlal r8, r9, lr, r10 - ldr r10, [r3, #36] - ldmia r2!, {r11, r12, sp, lr} - smlal r6, r7, r11, r10 - ldr r10, [r4, #88] /*;;1 cycle stall on arm9, but we free up r11*/ - smlal r8, r9, r11, r10 + ldr r0, [r3, #36] + ldmia r2!, {r10, r11, r12, lr} + smlal r6, r7, r10, r0 + ldr r0, [r4, #88] /*;;1 cycle stall on arm9, but we free up r10*/ + smlal r8, r9, r10, r0 - ldr r10, [r3, #28] - ldr r11, [r3, #20] + ldr r0, [r3, #28] + ldr r10, [r3, #20] + smlal r6, r7, r11, r0 + ldr r0, [r3, #12] smlal r6, r7, r12, r10 - ldr r10, [r3, #12] - smlal r6, r7, sp, r11 - ldr r11, [r4, #96] - smlal r6, r7, lr, r10 - ldr r10, [r4, #104] - smlal r8, r9, r12, r11 - ldr r11, [r4, #112] - smlal r8, r9, sp, r10 - smlal r8, r9, lr, r11 + ldr r10, [r4, #96] + smlal r6, r7, lr, r0 + ldr r0, [r4, #104] + smlal r8, r9, r11, r10 + ldr r10, [r4, #112] + smlal r8, r9, r12, r0 + smlal r8, r9, lr, r10 + + ldr r0, [sp] movs r6, r6, lsr #16 adc r6, r6, r7, lsl #16 @@ -246,9 +246,7 @@ synth_full_even_sbsample: subs r5, r5, #1 bne .l2 - ldr r5, =synth_full_sp - ldr sp, [r5] - ldmpc regs=r4-r11 + ldmpc regs="r0,r4-r11" .global III_aliasreduce @@ -340,7 +338,3 @@ III_overlap: ldmia r0!, {r4, r5, r6, r7, r12, lr} stmia r1!, {r4, r5, r6, r7, r12, lr} ldmpc regs=r4-r7 - - .section IBSS_SECTION_MPA_ARM,"aw",%nobits -synth_full_sp: - .space 4 -- cgit v1.2.3