From 56f2ca74adaf64a23c3139fefb3180299ce4237a Mon Sep 17 00:00:00 2001 From: Thom Johansen Date: Wed, 16 Aug 2006 12:38:49 +0000 Subject: Assembler optimised crossfeed routine for ARM. Performance improvement is more than double. Should work fine, but watch your ears nevertheless. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@10608 a1c6a512-1295-4272-9138-f99709370657 --- apps/SOURCES | 1 + apps/dsp_arm.S | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ apps/dsp_asm.h | 2 +- 3 files changed, 81 insertions(+), 1 deletion(-) create mode 100644 apps/dsp_arm.S diff --git a/apps/SOURCES b/apps/SOURCES index 96f3773a36..a6eaae77f5 100644 --- a/apps/SOURCES +++ b/apps/SOURCES @@ -76,6 +76,7 @@ eq.c dsp_cf.S eq_cf.S #elif defined(CPU_ARM) && !defined(SIMULATOR) +dsp_arm.S eq_arm.S #endif eq_menu.c diff --git a/apps/dsp_arm.S b/apps/dsp_arm.S new file mode 100644 index 0000000000..1abfd34983 --- /dev/null +++ b/apps/dsp_arm.S @@ -0,0 +1,79 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2006 Thom Johansen + * + * All files in this archive are subject to the GNU General Public License. + * See the file COPYING in the source tree root for full license agreement. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + + .section .text + .global apply_crossfeed +apply_crossfeed: + @ unfortunately, we ended up in a bit of a register squeeze here, and need + @ to keep both the count and the delay line index on the stack :/ + stmdb sp!, { r4-r11, lr } @ stack modified regs + ldmia r0, { r2-r3 } @ r2 = src[0], r3 = src[1] + + ldr r0, =crossfeed_data + ldmia r0!, { r4-r11 } @ load direct gain and filter data + ldr r12, [r0, #13*4*2] @ fetch delay line index + add r0, r0, r12, lsl #3 @ r0 = &delay[index][0] + stmdb sp!, { r1, r12 } @ stack count and delay line index + /* Register usage in loop: + * r0 = &delay[index][0], r1 = accumulator high, r2 = src[0], r3 = src[1], + * r4 = direct gain, r5-r7 = b0, b1, a1 (filter coefs), + * r8-r11 = filter history, r12 = temp, r14 = accumulator low + */ +.cfloop: + smull r14, r1, r6, r8 @ acc = b1*dr[n - 1] + smlal r14, r1, r7, r9 @ acc += a1*y_l[n - 1] + ldr r8, [r0, #4] @ r8 = dr[n] + smlal r14, r1, r5, r8 @ acc += b0*dr[n] + mov r9, r1, lsl #1 @ fix format for filter history + ldr r12, [r2] @ load left input + smlal r14, r1, r4, r12 @ acc += gain*x_l[n] + mov r1, r1, lsl #1 @ fix format + str r1, [r2], #4 @ save result + + smull r14, r1, r6, r10 @ acc = b1*dl[n - 1] + smlal r14, r1, r7, r11 @ acc += a1*y_r[n - 1] + ldr r10, [r0] @ r10 = dl[n] + str r12, [r0], #4 @ save left input to delay line + smlal r14, r1, r5, r10 @ acc += b0*dl[n] + mov r11, r1, lsl #1 @ fix format for filter history + ldr r12, [r3] @ load right input + smlal r14, r1, r4, r12 @ acc += gain*x_r[n] + str r12, [r0], #4 @ save right input to delay line + mov r1, r1, lsl #1 @ fix format + str r1, [r3], #4 @ save result + + ldr r12, [sp, #4] @ fetch delay line index from stack + add r12, r12, #1 @ increment index + cmp r12, #13 @ do we need to wrap to start of delay? + moveq r12, #0 @ yes, wrap index to 0 + subeq r0, r0, #13*4*2 @ also wrap back delay line ptr to start + str r12, [sp, #4] @ stack delay line index again + + ldr r1, [sp] @ fetch count from stack + subs r1, r1, #1 @ are we finished? + strne r1, [sp] @ nope, save count back to stack + bne .cfloop + + @ save data back to struct + ldr r0, =crossfeed_data + 4*4 + stmia r0, { r8-r11 } @ save filter history + str r12, [r0, #30*4] @ save delay line index + add sp, sp, #8 @ remove temp variables from stack + ldmia sp!, { r4-r11, pc } + diff --git a/apps/dsp_asm.h b/apps/dsp_asm.h index ad08bc9f4b..04c2848a98 100644 --- a/apps/dsp_asm.h +++ b/apps/dsp_asm.h @@ -22,7 +22,7 @@ #ifndef _DSP_ASM_H #define _DSP_ASM_H -#ifdef CPU_COLDFIRE +#if defined(CPU_COLDFIRE) || defined(CPU_ARM) #define DSP_HAVE_ASM_CROSSFEED void apply_crossfeed(int32_t* src[], int count); #endif -- cgit v1.2.3