From 436f4d3a204e8183d32d8c47975e6a294be1c0fa Mon Sep 17 00:00:00 2001 From: Andrew Mahone Date: Sat, 30 Jan 2010 02:20:54 +0000 Subject: Improve libdemac SATURATE slightly on ARMv4/5, move filter buffers and code out of IRAM for sizes that aren't near realtime and extend udiv32_arm reciprocal table. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@24376 a1c6a512-1295-4272-9138-f99709370657 --- apps/codecs/demac/libdemac/decoder.c | 21 +++++++++++++++++---- apps/codecs/demac/libdemac/demac_config.h | 4 ++++ apps/codecs/demac/libdemac/filter.c | 21 +++++++++++++++++++-- apps/codecs/demac/libdemac/filter_1280_15.c | 5 +++++ apps/codecs/demac/libdemac/filter_256_13.c | 5 +++++ apps/codecs/lib/udiv32_arm.S | 10 +++++----- 6 files changed, 55 insertions(+), 11 deletions(-) diff --git a/apps/codecs/demac/libdemac/decoder.c b/apps/codecs/demac/libdemac/decoder.c index 0763c11037..09563e0112 100644 --- a/apps/codecs/demac/libdemac/decoder.c +++ b/apps/codecs/demac/libdemac/decoder.c @@ -33,10 +33,23 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA /* Statically allocate the filter buffers */ +#ifdef FILTER256_IRAM static filter_int filterbuf32[(32*3 + FILTER_HISTORY_SIZE) * 2] IBSS_ATTR __attribute__((aligned(16))); /* 2432/4864 bytes */ static filter_int filterbuf256[(256*3 + FILTER_HISTORY_SIZE) * 2] IBSS_ATTR __attribute__((aligned(16))); /* 5120/10240 bytes */ +#define FILTERBUF64 filterbuf256 +#define FILTERBUF32 filterbuf32 +#define FILTERBUF16 filterbuf32 +#else +static filter_int filterbuf64[(64*3 + FILTER_HISTORY_SIZE) * 2] + IBSS_ATTR __attribute__((aligned(16))); /* 2432/4864 bytes */ +static filter_int filterbuf256[(256*3 + FILTER_HISTORY_SIZE) * 2] + __attribute__((aligned(16))); /* 5120/10240 bytes */ +#define FILTERBUF64 filterbuf64 +#define FILTERBUF32 filterbuf64 +#define FILTERBUF16 filterbuf64 +#endif /* This is only needed for "insane" files, and no current Rockbox targets can hope to decode them in realtime, although the Gigabeat S comes close. */ @@ -57,22 +70,22 @@ void init_frame_decoder(struct ape_ctx_t* ape_ctx, switch (ape_ctx->compressiontype) { case 2000: - init_filter_16_11(filterbuf32); + init_filter_16_11(FILTERBUF16); break; case 3000: - init_filter_64_11(filterbuf256); + init_filter_64_11(FILTERBUF64); break; case 4000: init_filter_256_13(filterbuf256); - init_filter_32_10(filterbuf32); + init_filter_32_10(FILTERBUF32); break; case 5000: init_filter_1280_15(filterbuf1280); init_filter_256_13(filterbuf256); - init_filter_16_11(filterbuf32); + init_filter_16_11(FILTERBUF32); } } diff --git a/apps/codecs/demac/libdemac/demac_config.h b/apps/codecs/demac/libdemac/demac_config.h index 1beda2b9cd..13166f69ae 100644 --- a/apps/codecs/demac/libdemac/demac_config.h +++ b/apps/codecs/demac/libdemac/demac_config.h @@ -44,6 +44,10 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA #define FILTER_BITS 32 #endif +#if !defined(CPU_PP) && !defined(CPU_S5L870X) +#define FILTER256_IRAM +#endif + #if CONFIG_CPU == PP5002 /* Code in IRAM for speed, not enough IRAM for the insane filter buffer. */ #define ICODE_SECTION_DEMAC_ARM .icode diff --git a/apps/codecs/demac/libdemac/filter.c b/apps/codecs/demac/libdemac/filter.c index bab830a8bd..93edf39cb2 100644 --- a/apps/codecs/demac/libdemac/filter.c +++ b/apps/codecs/demac/libdemac/filter.c @@ -97,9 +97,26 @@ struct filter_t { #define FP_HALF (1 << (FRACBITS - 1)) /* 0.5 in fixed-point format. */ #define FP_TO_INT(x) ((x + FP_HALF) >> FRACBITS) /* round(x) */ -#if defined(CPU_ARM) && (ARM_ARCH >= 6) +#ifdef CPU_ARM +#if ARM_ARCH >= 6 #define SATURATE(x) ({int __res; asm("ssat %0, #16, %1" : "=r"(__res) : "r"(x)); __res; }) -#else +#else /* ARM_ARCH < 6 */ +/* Keeping the asr #31 outside of the asm allows loads to be scheduled between + it and the rest of the block on ARM9E, with the load's result latency filled + by the other calculations. */ +#define SATURATE(x) ({ \ + int __res = (x) >> 31; \ + asm volatile ( \ + "teq %0, %1, asr #15\n\t" \ + "moveq %0, %1\n\t" \ + "eorne %0, %0, #0xff\n\t" \ + "eorne %0, %0, #0x7f00" \ + : "+r" (__res) : "r" (x) : "cc" \ + ); \ + __res; \ +}) +#endif /* ARM_ARCH */ +#else /* CPU_ARM */ #define SATURATE(x) (LIKELY((x) == (int16_t)(x)) ? (x) : ((x) >> 31) ^ 0x7FFF) #endif diff --git a/apps/codecs/demac/libdemac/filter_1280_15.c b/apps/codecs/demac/libdemac/filter_1280_15.c index 7077e0ee8e..f2301fb02a 100644 --- a/apps/codecs/demac/libdemac/filter_1280_15.c +++ b/apps/codecs/demac/libdemac/filter_1280_15.c @@ -22,6 +22,11 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA */ +#include "demac_config.h" +#ifndef FILTER256_IRAM +#undef ICODE_ATTR_DEMAC +#define ICODE_ATTR_DEMAC +#endif #define ORDER 1280 #define FRACBITS 15 #include "filter.c" diff --git a/apps/codecs/demac/libdemac/filter_256_13.c b/apps/codecs/demac/libdemac/filter_256_13.c index 69cf638903..9e4b9fcb13 100644 --- a/apps/codecs/demac/libdemac/filter_256_13.c +++ b/apps/codecs/demac/libdemac/filter_256_13.c @@ -22,6 +22,11 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA */ +#include "demac_config.h" +#ifndef FILTER256_IRAM +#undef ICODE_ATTR_DEMAC +#define ICODE_ATTR_DEMAC +#endif #define ORDER 256 #define FRACBITS 13 #include "filter.c" diff --git a/apps/codecs/lib/udiv32_arm.S b/apps/codecs/lib/udiv32_arm.S index 33ab7a43a5..8efc92c2e6 100644 --- a/apps/codecs/lib/udiv32_arm.S +++ b/apps/codecs/lib/udiv32_arm.S @@ -90,18 +90,18 @@ #ifdef CPU_PP #if CONFIG_CPU == PP5020 -.set recip_max, 5952 +.set recip_max, 8384 #elif CONFIG_CPU == PP5002 -.set recip_max, 1472 +.set recip_max, 4992 #else -.set recip_max, 14208 +.set recip_max, 16384 #endif #elif CONFIG_CPU == AS3525 .set recip_max, 42752 #elif CONFIG_CPU == S5L8701 -.set recip_max, 9600 +.set recip_max, 13184 #elif CONFIG_CPU == S5L8700 -.set recip_max, 5504 +.set recip_max, 9088 #endif udiv32_arm: -- cgit v1.2.3