From eec5eb9ecca46a4d202f7b41b3d0aa4a8626a892 Mon Sep 17 00:00:00 2001 From: Andree Buschmann Date: Sun, 2 May 2010 15:45:43 +0000 Subject: Refacturate arm version of libmad's synthesis filter. Only two asm macros left, renamed asm-implementation for better clarity. No change in speed or precision. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@25777 a1c6a512-1295-4272-9138-f99709370657 --- apps/codecs/libmad/synth.c | 264 ++++-------------------------------- apps/codecs/libmad/synth_full_arm.S | 8 +- 2 files changed, 30 insertions(+), 242 deletions(-) (limited to 'apps') diff --git a/apps/codecs/libmad/synth.c b/apps/codecs/libmad/synth.c index 9d1bdb7c91..15c1f9decc 100644 --- a/apps/codecs/libmad/synth.c +++ b/apps/codecs/libmad/synth.c @@ -828,60 +828,8 @@ void synth_full(struct mad_synth *synth, struct mad_frame const *frame, #elif defined(FPM_ARM) -#define PROD_ODD_0(hi, lo, f, ptr) \ - do { \ - mad_fixed_t *__p = (f); \ - asm("ldmia %2!, {r0, r1, r2, r3}\n\t" \ - "ldr r4, [%3, #4]\n\t" \ - "smull %0, %1, r0, r4\n\t" \ - "ldr r4, [%3, #60]\n\t" \ - "smlal %0, %1, r1, r4\n\t" \ - "ldr r4, [%3, #52]\n\t" \ - "smlal %0, %1, r2, r4\n\t" \ - "ldr r4, [%3, #44]\n\t" \ - "smlal %0, %1, r3, r4\n\t" \ - "ldmia %2, {r0, r1, r2, r3}\n\t" \ - "ldr r4, [%3, #36]\n\t" \ - "smlal %0, %1, r0, r4\n\t" \ - "ldr r4, [%3, #28]\n\t" \ - "smlal %0, %1, r1, r4\n\t" \ - "ldr r4, [%3, #20]\n\t" \ - "smlal %0, %1, r2, r4\n\t" \ - "ldr r4, [%3, #12]\n\t" \ - "smlal %0, %1, r3, r4\n\t" \ - : "=&r" (lo), "=&r" (hi), "+r" (__p) \ - : "r" (ptr) \ - : "r0", "r1", "r2", "r3", "r4"); \ - } while (0) - -#define PROD_ODD_A(hi, lo, f, ptr) \ - do { \ - mad_fixed_t *__p = (f); \ - asm("ldmia %2!, {r0, r1, r2, r3}\n\t" \ - "ldr r4, [%3, #4]\n\t" \ - "smlal %0, %1, r0, r4\n\t" \ - "ldr r4, [%3, #60]\n\t" \ - "smlal %0, %1, r1, r4\n\t" \ - "ldr r4, [%3, #52]\n\t" \ - "smlal %0, %1, r2, r4\n\t" \ - "ldr r4, [%3, #44]\n\t" \ - "smlal %0, %1, r3, r4\n\t" \ - "ldmia %2, {r0, r1, r2, r3}\n\t" \ - "ldr r4, [%3, #36]\n\t" \ - "smlal %0, %1, r0, r4\n\t" \ - "ldr r4, [%3, #28]\n\t" \ - "smlal %0, %1, r1, r4\n\t" \ - "ldr r4, [%3, #20]\n\t" \ - "smlal %0, %1, r2, r4\n\t" \ - "ldr r4, [%3, #12]\n\t" \ - "smlal %0, %1, r3, r4\n\t" \ - : "+r" (lo), "+r" (hi), "+r" (__p) \ - : "r" (ptr) \ - : "r0", "r1", "r2", "r3", "r4"); \ - } while (0) - -#define PROD_EVEN_0(hi, lo, f, ptr) \ - do { \ +#define PROD_O(hi, lo, f, ptr) \ + ({ \ mad_fixed_t *__p = (f); \ asm("ldmia %2!, {r0, r1, r2, r3}\n\t" \ "ldr r4, [%3, #0]\n\t" \ @@ -904,10 +852,10 @@ void synth_full(struct mad_synth *synth, struct mad_frame const *frame, : "=&r" (lo), "=&r" (hi), "+r" (__p) \ : "r" (ptr) \ : "r0", "r1", "r2", "r3", "r4"); \ - } while (0) + }) -#define PROD_EVEN_A(hi, lo, f, ptr) \ - do { \ +#define PROD_A(hi, lo, f, ptr) \ + ({ \ mad_fixed_t *__p = (f); \ asm("ldmia %2!, {r0, r1, r2, r3}\n\t" \ "ldr r4, [%3, #0]\n\t" \ @@ -930,118 +878,18 @@ void synth_full(struct mad_synth *synth, struct mad_frame const *frame, : "+r" (lo), "+r" (hi), "+r" (__p) \ : "r" (ptr) \ : "r0", "r1", "r2", "r3", "r4"); \ - } while (0) - -#define PROD_EVENBACK_0(hi, lo, f, ptr) \ - do { \ - mad_fixed_t *__p = (f); \ - asm("ldmia %2!, {r0, r1, r2, r3}\n\t" \ - "ldr r4, [%3, #60]\n\t" \ - "smull %0, %1, r0, r4\n\t" \ - "ldr r4, [%3, #68]\n\t" \ - "smlal %0, %1, r1, r4\n\t" \ - "ldr r4, [%3, #76]\n\t" \ - "smlal %0, %1, r2, r4\n\t" \ - "ldr r4, [%3, #84]\n\t" \ - "smlal %0, %1, r3, r4\n\t" \ - "ldmia %2, {r0, r1, r2, r3}\n\t" \ - "ldr r4, [%3, #92]\n\t" \ - "smlal %0, %1, r0, r4\n\t" \ - "ldr r4, [%3, #100]\n\t" \ - "smlal %0, %1, r1, r4\n\t" \ - "ldr r4, [%3, #108]\n\t" \ - "smlal %0, %1, r2, r4\n\t" \ - "ldr r4, [%3, #116]\n\t" \ - "smlal %0, %1, r3, r4\n\t" \ - : "=&r" (lo), "=&r" (hi), "+r" (__p) \ - : "r" (ptr) \ - : "r0", "r1", "r2", "r3", "r4"); \ - } while (0) - -#define PROD_EVENBACK_A(hi, lo, f, ptr) \ - do { \ - mad_fixed_t *__p = (f); \ - asm("ldmia %2!, {r0, r1, r2, r3}\n\t" \ - "ldr r4, [%3, #60]\n\t" \ - "smlal %0, %1, r0, r4\n\t" \ - "ldr r4, [%3, #68]\n\t" \ - "smlal %0, %1, r1, r4\n\t" \ - "ldr r4, [%3, #76]\n\t" \ - "smlal %0, %1, r2, r4\n\t" \ - "ldr r4, [%3, #84]\n\t" \ - "smlal %0, %1, r3, r4\n\t" \ - "ldmia %2, {r0, r1, r2, r3}\n\t" \ - "ldr r4, [%3, #92]\n\t" \ - "smlal %0, %1, r0, r4\n\t" \ - "ldr r4, [%3, #100]\n\t" \ - "smlal %0, %1, r1, r4\n\t" \ - "ldr r4, [%3, #108]\n\t" \ - "smlal %0, %1, r2, r4\n\t" \ - "ldr r4, [%3, #116]\n\t" \ - "smlal %0, %1, r3, r4\n\t" \ - : "+r" (lo), "+r" (hi), "+r" (__p) \ - : "r" (ptr) \ - : "r0", "r1", "r2", "r3", "r4"); \ - } while (0) - -#define PROD_ODDBACK_0(hi, lo, f, ptr) \ - do { \ - mad_fixed_t *__p = (f); \ - asm("ldmia %2!, {r0, r1, r2, r3}\n\t" \ - "ldr r4, [%3, #120]\n\t" \ - "smull %0, %1, r0, r4\n\t" \ - "ldr r4, [%3, #64]\n\t" \ - "smlal %0, %1, r1, r4\n\t" \ - "ldr r4, [%3, #72]\n\t" \ - "smlal %0, %1, r2, r4\n\t" \ - "ldr r4, [%3, #80]\n\t" \ - "smlal %0, %1, r3, r4\n\t" \ - "ldmia %2, {r0, r1, r2, r3}\n\t" \ - "ldr r4, [%3, #88]\n\t" \ - "smlal %0, %1, r0, r4\n\t" \ - "ldr r4, [%3, #96]\n\t" \ - "smlal %0, %1, r1, r4\n\t" \ - "ldr r4, [%3, #104]\n\t" \ - "smlal %0, %1, r2, r4\n\t" \ - "ldr r4, [%3, #112]\n\t" \ - "smlal %0, %1, r3, r4\n\t" \ - : "=&r" (lo), "=&r" (hi), "+r" (__p) \ - : "r" (ptr) \ - : "r0", "r1", "r2", "r3", "r4"); \ - } while (0) - -#define PROD_ODDBACK_A(hi, lo, f, ptr) \ - do { \ - mad_fixed_t *__p = (f); \ - asm("ldmia %2!, {r0, r1, r2, r3}\n\t" \ - "ldr r4, [%3, #120]\n\t" \ - "smlal %0, %1, r0, r4\n\t" \ - "ldr r4, [%3, #64]\n\t" \ - "smlal %0, %1, r1, r4\n\t" \ - "ldr r4, [%3, #72]\n\t" \ - "smlal %0, %1, r2, r4\n\t" \ - "ldr r4, [%3, #80]\n\t" \ - "smlal %0, %1, r3, r4\n\t" \ - "ldmia %2, {r0, r1, r2, r3}\n\t" \ - "ldr r4, [%3, #88]\n\t" \ - "smlal %0, %1, r0, r4\n\t" \ - "ldr r4, [%3, #96]\n\t" \ - "smlal %0, %1, r1, r4\n\t" \ - "ldr r4, [%3, #104]\n\t" \ - "smlal %0, %1, r2, r4\n\t" \ - "ldr r4, [%3, #112]\n\t" \ - "smlal %0, %1, r3, r4\n\t" \ - : "+r" (lo), "+r" (hi), "+r" (__p) \ - : "r" (ptr) \ - : "r0", "r1", "r2", "r3", "r4"); \ - } while (0) - -void synth_full1(mad_fixed_t *pcm, mad_fixed_t (*fo)[8], mad_fixed_t (*fe)[8], - mad_fixed_t const (*D0ptr)[32], - mad_fixed_t const (*D1ptr)[32]); -void synth_full2(mad_fixed_t *pcm, mad_fixed_t (*fo)[8], mad_fixed_t (*fe)[8], - mad_fixed_t const (*D0ptr)[32], - mad_fixed_t const (*D1ptr)[32]); + }) + +void synth_full_odd_band (mad_fixed_t *pcm, + mad_fixed_t (*fo)[8], + mad_fixed_t (*fe)[8], + mad_fixed_t const (*D0ptr)[32], + mad_fixed_t const (*D1ptr)[32]); +void synth_full_even_band(mad_fixed_t *pcm, + mad_fixed_t (*fo)[8], + mad_fixed_t (*fe)[8], + mad_fixed_t const (*D0ptr)[32], + mad_fixed_t const (*D1ptr)[32]); static void synth_full(struct mad_synth *synth, struct mad_frame const *frame, @@ -1083,99 +931,39 @@ void synth_full(struct mad_synth *synth, struct mad_frame const *frame, if(s & 1) { ptr = *D0ptr; -/* - ML0(hi, lo, (*fx)[0], ptr[ 1]); - MLA(hi, lo, (*fx)[1], ptr[15]); - MLA(hi, lo, (*fx)[2], ptr[13]); - MLA(hi, lo, (*fx)[3], ptr[11]); - MLA(hi, lo, (*fx)[4], ptr[ 9]); - MLA(hi, lo, (*fx)[5], ptr[ 7]); - MLA(hi, lo, (*fx)[6], ptr[ 5]); - MLA(hi, lo, (*fx)[7], ptr[ 3]); -*/ - PROD_ODD_0(hi, lo, *fx, ptr); + PROD_O(hi, lo, *fo, ptr+1); MLN(hi, lo); -/* - MLA(hi, lo, (*fe)[0], ptr[ 0]); - MLA(hi, lo, (*fe)[1], ptr[14]); - MLA(hi, lo, (*fe)[2], ptr[12]); - MLA(hi, lo, (*fe)[3], ptr[10]); - MLA(hi, lo, (*fe)[4], ptr[ 8]); - MLA(hi, lo, (*fe)[5], ptr[ 6]); - MLA(hi, lo, (*fe)[6], ptr[ 4]); - MLA(hi, lo, (*fe)[7], ptr[ 2]); -*/ - PROD_EVEN_A(hi, lo, *fe, ptr); + PROD_A(hi, lo, *fe, ptr); pcm[0] = SHIFT(MLZ(hi, lo)); pcm += 16; - synth_full1(pcm, fo, fe, D0ptr, D1ptr); + synth_full_odd_band(pcm, fo, fe, D0ptr, D1ptr); D0ptr += 15; D1ptr += 15; fo += 15; fe += 15; ptr = *(D0ptr + 1); - PROD_ODD_0(hi, lo, *fo, ptr); -/* - ML0(hi, lo, (*fo)[0], ptr[ 1]); - MLA(hi, lo, (*fo)[1], ptr[15]); - MLA(hi, lo, (*fo)[2], ptr[13]); - MLA(hi, lo, (*fo)[3], ptr[11]); - MLA(hi, lo, (*fo)[4], ptr[ 9]); - MLA(hi, lo, (*fo)[5], ptr[ 7]); - MLA(hi, lo, (*fo)[6], ptr[ 5]); - MLA(hi, lo, (*fo)[7], ptr[ 3]); -*/ + PROD_O(hi, lo, *fo, ptr+1); pcm[0] = SHIFT(-MLZ(hi, lo)); } else { ptr = *D0ptr; -/* - ML0(hi, lo, (*fx)[0], ptr[ 0]); - MLA(hi, lo, (*fx)[1], ptr[14]); - MLA(hi, lo, (*fx)[2], ptr[12]); - MLA(hi, lo, (*fx)[3], ptr[10]); - MLA(hi, lo, (*fx)[4], ptr[ 8]); - MLA(hi, lo, (*fx)[5], ptr[ 6]); - MLA(hi, lo, (*fx)[6], ptr[ 4]); - MLA(hi, lo, (*fx)[7], ptr[ 2]); -*/ - PROD_EVEN_0(hi, lo, *fx, ptr); + PROD_O(hi, lo, *fx, ptr); MLN(hi, lo); -/* - MLA(hi, lo, (*fe)[0], ptr[ 1]); - MLA(hi, lo, (*fe)[1], ptr[15]); - MLA(hi, lo, (*fe)[2], ptr[13]); - MLA(hi, lo, (*fe)[3], ptr[11]); - MLA(hi, lo, (*fe)[4], ptr[ 9]); - MLA(hi, lo, (*fe)[5], ptr[ 7]); - MLA(hi, lo, (*fe)[6], ptr[ 5]); - MLA(hi, lo, (*fe)[7], ptr[ 3]); -*/ - PROD_ODD_A(hi, lo, *fe, ptr); + PROD_A(hi, lo, *fe, ptr+1); pcm[0] = SHIFT(MLZ(hi, lo)); pcm += 16; - synth_full2(pcm, fo, fe, D0ptr, D1ptr); + synth_full_even_band(pcm, fo, fe, D0ptr, D1ptr); D0ptr += 15; D1ptr += 15; fo += 15; fe += 15; ptr = *(D0ptr + 1); -/* - ML0(hi, lo, (*fo)[0], ptr[ 0]); - MLA(hi, lo, (*fo)[1], ptr[14]); - MLA(hi, lo, (*fo)[2], ptr[12]); - MLA(hi, lo, (*fo)[3], ptr[10]); - MLA(hi, lo, (*fo)[4], ptr[ 8]); - MLA(hi, lo, (*fo)[5], ptr[ 6]); - MLA(hi, lo, (*fo)[6], ptr[ 4]); - MLA(hi, lo, (*fo)[7], ptr[ 2]); -*/ - PROD_EVEN_0(hi, lo, *fo, ptr); + PROD_O(hi, lo, *fo, ptr); pcm[0] = SHIFT(-MLZ(hi, lo)); } @@ -1185,7 +973,7 @@ void synth_full(struct mad_synth *synth, struct mad_frame const *frame, } } -# else +# else /* not FPM_COLDFIRE_EMAC and not FPM_ARM */ static void synth_full(struct mad_synth *synth, struct mad_frame const *frame, diff --git a/apps/codecs/libmad/synth_full_arm.S b/apps/codecs/libmad/synth_full_arm.S index 419bf2b96e..c5848d3327 100644 --- a/apps/codecs/libmad/synth_full_arm.S +++ b/apps/codecs/libmad/synth_full_arm.S @@ -23,15 +23,15 @@ .section ICODE_SECTION_MPA_ARM,"ax",%progbits - .global synth_full1 - .global synth_full2 + .global synth_full_odd_band + .global synth_full_even_band ;; r0 = pcm ;; r1 = fo ;; r2 = fe ;; r3 = D0ptr ;; r4 = D1ptr -synth_full1: +synth_full_odd_band: stmdb sp!, {r4-r11, lr} ldr r4, [sp, #36] ldr r5, =synth_full_sp @@ -137,7 +137,7 @@ synth_full1: ldr sp, [r5] ldmia sp!, {r4-r11, pc} -synth_full2: +synth_full_even_band: stmdb sp!, {r4-r11, lr} ldr r4, [sp, #36] ldr r5, =synth_full_sp -- cgit v1.2.3