From cc94ae45a7f3b377a359f9652340e8c847da5ac6 Mon Sep 17 00:00:00 2001 From: Thom Johansen Date: Thu, 23 Mar 2006 19:59:52 +0000 Subject: Keep assembler versions of DSP routines in dsp_arch.S files. This also solves the annoying register allocator problem when not using -fomit-frame-pointer for the Coldfire assembler optimised crossfeed routine. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@9215 a1c6a512-1295-4272-9138-f99709370657 --- apps/SOURCES | 1 + apps/dsp.c | 92 +++++--------------------------------------------- apps/dsp_asm.h | 31 +++++++++++++++++ apps/dsp_cf.S | 104 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 145 insertions(+), 83 deletions(-) create mode 100644 apps/dsp_asm.h create mode 100644 apps/dsp_cf.S diff --git a/apps/SOURCES b/apps/SOURCES index f34cac9654..dee4e6e002 100644 --- a/apps/SOURCES +++ b/apps/SOURCES @@ -75,6 +75,7 @@ codecs.c dsp.c eq.c #if defined(CPU_COLDFIRE) && !defined(SIMULATOR) +dsp_cf.S eq_cf.S #elif defined(CPU_ARM) && !defined(SIMULATOR) eq_arm.S diff --git a/apps/dsp.c b/apps/dsp.c index c5bbc8fbe9..6e6f702a30 100644 --- a/apps/dsp.c +++ b/apps/dsp.c @@ -28,6 +28,10 @@ #include "replaygain.h" #include "debug.h" +#ifndef SIMULATOR +#include +#endif + /* The "dither" code to convert the 24-bit samples produced by libmad was * taken from the coolplayer project - coolplayer.sourceforge.net */ @@ -517,90 +521,12 @@ static long dither_sample(int32_t sample, int32_t bias, int32_t mask, return output; } -/* Apply a constant gain to the samples (e.g., for ReplayGain). May update - * the src array if gain was applied. - * Note that this must be called before the resampler. +/* Applies crossfeed to the stereo signal in src. + * Crossfeed is a process where listening over speakers is simulated. This + * is good for old hard panned stereo records, which might be quite fatiguing + * to listen to on headphones with no crossfeed. */ -#if defined(CPU_COLDFIRE) && !defined(SIMULATOR) -static const long crossfeed_coefs[6] ICONST_ATTR = { - LOW, LOW_COMP, HIGH_NEG, HIGH_COMP, ATT, ATT_COMP -}; - -static void apply_crossfeed(int32_t* src[], int count) -{ - asm volatile ( - "lea.l crossfeed_data, %%a1 \n" - "lea.l (16, %%a1), %%a0 \n" - "movem.l (%%a1), %%d0-%%d3 \n" - "move.l (120, %%a1), %%d4 \n" - /* fetch left, right, LOW and LOW_COMP for first iteration */ - "move.l (%[src0]), %%d5 \n" - "move.l (%[src1]), %%d6 \n" - "move.l (%[coef])+, %%a1 \n" - "move.l (%[coef])+, %%a2 \n" - /* Register usage in loop: - * a0 = &delay[0][0], a1 & a2 = coefs - * d0 = low_left, d1 = low_right, - * d2 = high_left, d3 = high_right, - * d4 = delay line index, - * d5 = src[0][i], d6 = src[1][i]. - * The rest are described in asm constraint list. - */ - ".cfloop:" - /* LOW*low_left + LOW_COMP*left */ - "mac.l %%a1, %%d0, %%acc0 \n" - "mac.l %%a2, %%d5, %%acc0 \n" - /* LOW*low_right + LOW_COMP*right */ - "mac.l %%a1, %%d1, (%[coef])+, %%a1, %%acc1 \n" /* a1 = HIGH_NEG */ - "mac.l %%a2, %%d6, (%[coef])+, %%a2, %%acc1 \n" /* a2 = HIGH_COMP */ - "movclr.l %%acc0, %%d0 \n" /* get low_left */ - "movclr.l %%acc1, %%d1 \n" /* get low_right */ - /* HIGH_NEG*high_left + HIGH_COMP*left */ - "mac.l %%a1, %%d2, %%acc0 \n" - "mac.l %%a2, %%d5, %%acc0 \n" - /* HIGH_NEG*high_right + HIGH_COMP*right */ - "mac.l %%a1, %%d3, (%[coef])+, %%a1, %%acc1 \n" /* a1 = ATT */ - "mac.l %%a2, %%d6, (%[coef])+, %%a2, %%acc1 \n" /* a2 = ATT_COMP */ - "lea.l (-6*4, %[coef]), %[coef] \n" /* coef = &coefs[0] */ - "move.l (%%a0, %%d4*4), %%a3 \n" /* a3=delay[0][idx] */ - "move.l (52, %%a0, %%d4*4), %%d5 \n" /* d5=delay[1][idx] */ - "movclr.l %%acc0, %%d2 \n" /* get high_left */ - "movclr.l %%acc1, %%d3 \n" /* get high_right */ - /* ATT*delay_r + ATT_COMP*high_left */ - "mac.l %%a1, %%d5, (4, %[src0]), %%d5, %%acc0\n" /* d5 = src[0][i+1] */ - "mac.l %%a2, %%d2, (4, %[src1]), %%d6, %%acc0\n" /* d6 = src[1][i+1] */ - /* ATT*delay_l + ATT_COMP*high_right */ - "mac.l %%a1, %%a3, (%[coef])+, %%a1, %%acc1 \n" /* a1 = LOW */ - "mac.l %%a2, %%d3, (%[coef])+, %%a2, %%acc1 \n" /* a2 = LOW_COMP */ - - /* save crossfed samples to output */ - "movclr.l %%acc0, %%a3 \n" - "move.l %%a3, (%[src0])+ \n" /* src[0][i++] = out_l */ - "movclr.l %%acc1, %%a3 \n" - "move.l %%a3, (%[src1])+ \n" /* src[1][i++] = out_r */ - "move.l %%d0, (%%a0, %%d4*4) \n" /* delay[0][index] = low_left */ - "move.l %%d1, (52, %%a0, %%d4*4)\n" /* delay[1][index] = low_right */ - "addq.l #1, %%d4 \n" /* index++ */ - "cmp.l #13, %%d4 \n" /* if (index >= 13) { */ - "jlt .nowrap \n" - "clr.l %%d4 \n" /* index = 0 */ - ".nowrap: \n" /* } */ - "subq.l #1, %[count] \n" - "jne .cfloop \n" - /* save data back to struct */ - "lea.l crossfeed_data, %%a1 \n" - "movem.l %%d0-%%d3, (%%a1) \n" - "move.l %%d4, (120, %%a1) \n" - /* NOTE: We _just_ have enough registers for our use here, clobber just - one more and GCC will fail. */ - : - : [count] "d" (count), - [src0] "a" (src[0]), [src1] "a" (src[1]), [coef] "a" (crossfeed_coefs) - : "d0", "d1", "d2", "d3", "d4", "d5", "d6", - "a0", "a1", "a2", "a3" - ); -} -#else +#ifndef DSP_HAVE_ASM_CROSSFEED static void apply_crossfeed(int32_t* src[], int count) { int32_t a; /* accumulator */ diff --git a/apps/dsp_asm.h b/apps/dsp_asm.h new file mode 100644 index 0000000000..ad08bc9f4b --- /dev/null +++ b/apps/dsp_asm.h @@ -0,0 +1,31 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2006 Thom Johansen + * + * All files in this archive are subject to the GNU General Public License. + * See the file COPYING in the source tree root for full license agreement. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + +#include + +#ifndef _DSP_ASM_H +#define _DSP_ASM_H + +#ifdef CPU_COLDFIRE +#define DSP_HAVE_ASM_CROSSFEED +void apply_crossfeed(int32_t* src[], int count); +#endif + +#endif + diff --git a/apps/dsp_cf.S b/apps/dsp_cf.S new file mode 100644 index 0000000000..6147ebeea7 --- /dev/null +++ b/apps/dsp_cf.S @@ -0,0 +1,104 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2006 Thom Johansen + * + * All files in this archive are subject to the GNU General Public License. + * See the file COPYING in the source tree root for full license agreement. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + + .section .idata,"aw",@progbits +crossfeed_coefs: + .long 0x4CCCCCCD | LOW + .long 0x33333333 | LOW_COMP + .long -0x66666666 | HIGH_NEG + .long 0x66666666 | HIGH_COMP + .long 0x0CCCCCCD | ATT + .long 0x73333333 | ATT_COMP + + .section .text + .global apply_crossfeed +apply_crossfeed: + lea.l (-44, %sp), %sp + movem.l %d2-%d7/%a2-%a6, (%sp) | save all regs + move.l (44+4, %sp), %a4 + movem.l (%a4), %a4-%a5 | a4 = src[0], a5 = src[1] + move.l (44+8, %sp), %d7 | d7 = count + + lea.l crossfeed_data, %a1 + lea.l crossfeed_coefs, %a6 + lea.l (16, %a1), %a0 | a0 = &delay[0][0] + movem.l (%a1), %d0-%d3 | fetch filter history samples + move.l (120, %a1), %d4 | fetch delay line index + move.l (%a4), %d5 | d5 = left sample + move.l (%a5), %d6 | d6 = right sample + move.l (%a6)+, %a1 | a1 = LOW value + move.l (%a6)+, %a2 | a2 = LOW_COMP value + /* Register usage in loop: + * a0 = &delay[0][0], a1 & a2 = coefs, a3 = temp storage, + * a4 = src[0], a5 = src[1], a6 = &crossfeed_coefs[0], + * d0 = low_left, d1 = low_right, + * d2 = high_left, d3 = high_right, + * d4 = delay line index, + * d5 = src[0][i], d6 = src[1][i]. + * d7 = count + */ +.cfloop: + | LOW*low_left + LOW_COMP*left + mac.l %a1, %d0, %acc0 + mac.l %a2, %d5, %acc0 + | LOW*low_right + LOW_COMP*right + mac.l %a1, %d1, (%a6)+, %a1, %acc1 | a1 = HIGH_NEG + mac.l %a2, %d6, (%a6)+, %a2, %acc1 | a2 = HIGH_COMP + movclr.l %acc0, %d0 | get low_left + movclr.l %acc1, %d1 | get low_right + | HIGH_NEG*high_left + HIGH_COMP*left + mac.l %a1, %d2, %acc0 + mac.l %a2, %d5, %acc0 + | HIGH_NEG*high_right + HIGH_COMP*right + mac.l %a1, %d3, (%a6)+, %a1, %acc1 | a1 = ATT + mac.l %a2, %d6, (%a6)+, %a2, %acc1 | a2 = ATT_COMP + lea.l (-6*4, %a6), %a6 | coef = &coefs[0] + move.l (%a0, %d4*4), %a3 | a3 = delay[0][idx] + move.l (52, %a0, %d4*4), %d5 | d5 = delay[1][idx] + movclr.l %acc0, %d2 | get high_left + movclr.l %acc1, %d3 | get high_right + | ATT*delay_r + ATT_COMP*high_left + mac.l %a1, %d5, (4, %a4), %d5, %acc0 | d5 = src[0][i+1] + mac.l %a2, %d2, (4, %a5), %d6, %acc0 | d6 = src[1][i+1] + | ATT*delay_l + ATT_COMP*high_right + mac.l %a1, %a3, (%a6)+, %a1, %acc1 | a1 = LOW + mac.l %a2, %d3, (%a6)+, %a2, %acc1 | a2 = LOW_COMP + + | save crossfed samples to output + movclr.l %acc0, %a3 + move.l %a3, (%a4)+ | src[0][i++] = out_l + movclr.l %acc1, %a3 + move.l %a3, (%a5)+ | src[1][i++] = out_r + move.l %d0, (%a0, %d4*4) | delay[0][index] = low_left + move.l %d1, (52, %a0, %d4*4) | delay[1][index] = low_right */ + addq.l #1, %d4 | index++ */ + cmp.l #13, %d4 | if (index >= 13) { + jlt .nowrap + clr.l %d4 | index = 0 +.nowrap: | } + subq.l #1, %d7 + jne .cfloop + | save data back to struct + lea.l crossfeed_data, %a1 + movem.l %d0-%d3, (%a1) + move.l %d4, (120, %a1) + movem.l (%sp), %d2-%d7/%a2-%a6 + lea.l (44, %sp), %sp + rts + -- cgit v1.2.3