From 52f17dfe9d5221fdc83b2c5d719fe701dce46f24 Mon Sep 17 00:00:00 2001
From: Andree Buschmann <AndreeBuschmann@t-online.de>
Date: Thu, 1 Jul 2010 21:18:42 +0000
Subject: Submit FS#11445. Speed up of faad (aac) decoder via several
 optimizations like refactoring some requantization routines, moving several
 arrays and code tables to IRAM, using an optimized swap32() function and
 inlining several huffman decoder functions. Decoding is sped up by ~10%
 (PP5002, PP5022, MCF5249) and ~22% (MCF5250).

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27225 a1c6a512-1295-4272-9138-f99709370657
---
 apps/codecs/aac.c                     | 14 ++++++-
 apps/codecs/libfaad/bits.c            |  7 ++--
 apps/codecs/libfaad/bits.h            | 34 +++--------------
 apps/codecs/libfaad/codebook/hcb_1.h  |  4 +-
 apps/codecs/libfaad/codebook/hcb_10.h |  4 +-
 apps/codecs/libfaad/codebook/hcb_11.h |  4 +-
 apps/codecs/libfaad/codebook/hcb_2.h  |  4 +-
 apps/codecs/libfaad/codebook/hcb_3.h  |  2 +-
 apps/codecs/libfaad/codebook/hcb_4.h  |  4 +-
 apps/codecs/libfaad/codebook/hcb_5.h  |  2 +-
 apps/codecs/libfaad/codebook/hcb_6.h  |  4 +-
 apps/codecs/libfaad/codebook/hcb_7.h  |  2 +-
 apps/codecs/libfaad/codebook/hcb_8.h  |  4 +-
 apps/codecs/libfaad/codebook/hcb_9.h  |  2 +-
 apps/codecs/libfaad/codebook/hcb_sf.h |  2 +-
 apps/codecs/libfaad/common.h          | 21 ++++++++++-
 apps/codecs/libfaad/huffman.c         | 69 +++++++++++++++++-----------------
 apps/codecs/libfaad/huffman.h         |  2 +-
 apps/codecs/libfaad/specrec.c         | 70 +++++++++++------------------------
 19 files changed, 121 insertions(+), 134 deletions(-)

diff --git a/apps/codecs/aac.c b/apps/codecs/aac.c
index 0f38ea1145..1daa7de11b 100644
--- a/apps/codecs/aac.c
+++ b/apps/codecs/aac.c
@@ -27,6 +27,11 @@
 
 CODEC_HEADER
 
+/* Global buffers to be used in the mdct synthesis. This way the arrays can
+ * be moved to IRAM for some targets */
+ALIGN real_t gb_time_buffer[2][1024] IBSS_ATTR_FAAD_LARGE_IRAM;
+ALIGN real_t gb_fb_intermed[2][1024] IBSS_ATTR_FAAD_LARGE_IRAM;
+
 /* this is the codec entry point */
 enum codec_status codec_main(void)
 {
@@ -105,7 +110,14 @@ next_track:
         err = CODEC_ERROR;
         goto done;
     }
-
+    
+    /* Set pointer to be able to use IRAM an to avoid alloc in decoder. Must
+     * be called after NeAACDecOpen(). */
+    decoder->time_out[0]    = &gb_time_buffer[0][0];
+    decoder->time_out[1]    = &gb_time_buffer[1][0];
+    decoder->fb_intermed[0] = &gb_fb_intermed[0][0];
+    decoder->fb_intermed[1] = &gb_fb_intermed[1][0];
+    
     ci->id3->frequency = s;
 
     i = 0;
diff --git a/apps/codecs/libfaad/bits.c b/apps/codecs/libfaad/bits.c
index a010e830ed..f1a36235bd 100644
--- a/apps/codecs/libfaad/bits.c
+++ b/apps/codecs/libfaad/bits.c
@@ -33,9 +33,10 @@
 #include "bits.h"
 
 /* Need to be large enough to fit the largest compressed sample in a file.
- * Samples a little larger than 1 KB observed in a 256 kbps file.
+ * Samples were observed to need up to 1500 bytes (400 kbps nero aac).
  */
-uint8_t static_buffer[2048];
+#define BUFFER_SIZE 2048
+uint8_t static_buffer[BUFFER_SIZE] IBSS_ATTR;
 
 /* initialize buffer, call once before first getbits or showbits */
 void faad_initbits(bitfile *ld, const void *_buffer, const uint32_t buffer_size)
@@ -47,7 +48,7 @@ void faad_initbits(bitfile *ld, const void *_buffer, const uint32_t buffer_size)
 
     memset(ld, 0, sizeof(bitfile));
 
-    if (buffer_size == 0 || _buffer == NULL)
+    if (buffer_size == 0 || _buffer == NULL || (buffer_size+12)>BUFFER_SIZE)
     {
         ld->error = 1;
         ld->no_more_reading = 1;
diff --git a/apps/codecs/libfaad/bits.h b/apps/codecs/libfaad/bits.h
index 0ebe04b03f..815db59c68 100644
--- a/apps/codecs/libfaad/bits.h
+++ b/apps/codecs/libfaad/bits.h
@@ -55,15 +55,11 @@ typedef struct _bitfile
     void *buffer;
 } bitfile;
 
-
-#if defined (_WIN32) && !defined(_WIN32_WCE) && !defined(__MINGW32__)
-#define BSWAP(a) __asm mov eax,a __asm bswap eax __asm mov a, eax
-#elif defined(LINUX) || defined(DJGPP) || defined(__MINGW32__)
-#define BSWAP(a) __asm__ ( "bswapl %0\n" : "=r" (a) : "0" (a) )
-#else
+/* rockbox: use asm optimized swap32()
 #define BSWAP(a) \
     ((a) = ( ((a)&0xff)<<24) | (((a)&0xff00)<<8) | (((a)>>8)&0xff00) | (((a)>>24)&0xff))
-#endif
+*/
+#define BSWAP(a) swap32(a)
 
 static uint32_t bitmask[] = {
     0x0, 0x1, 0x3, 0x7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF,
@@ -81,7 +77,7 @@ void faad_initbits_rev(bitfile *ld, void *buffer,
                        uint32_t bits_in_buffer);
 uint8_t faad_byte_align(bitfile *ld);
 uint32_t faad_get_processed_bits(bitfile *ld);
-void faad_flushbits_ex(bitfile *ld, uint32_t bits);
+INLINE void faad_flushbits_ex(bitfile *ld, uint32_t bits);
 void faad_rewindbits(bitfile *ld);
 uint8_t *faad_getbitbuffer(bitfile *ld, uint32_t bits
                        DEBUGDEC);
@@ -93,28 +89,10 @@ uint32_t faad_origbitbuffer_size(bitfile *ld);
 /* circumvent memory alignment errors on ARM */
 static INLINE uint32_t getdword(void *mem)
 {
-#ifdef ARM
-    uint32_t tmp;
 #ifndef ARCH_IS_BIG_ENDIAN
-    ((uint8_t*)&tmp)[0] = ((uint8_t*)mem)[3];
-    ((uint8_t*)&tmp)[1] = ((uint8_t*)mem)[2];
-    ((uint8_t*)&tmp)[2] = ((uint8_t*)mem)[1];
-    ((uint8_t*)&tmp)[3] = ((uint8_t*)mem)[0];
-#else
-    ((uint8_t*)&tmp)[0] = ((uint8_t*)mem)[0];
-    ((uint8_t*)&tmp)[1] = ((uint8_t*)mem)[1];
-    ((uint8_t*)&tmp)[2] = ((uint8_t*)mem)[2];
-    ((uint8_t*)&tmp)[3] = ((uint8_t*)mem)[3];
-#endif
-
-    return tmp;
+    return BSWAP(*(uint32_t*)mem);
 #else
-    uint32_t tmp;
-    tmp = *(uint32_t*)mem;
-#ifndef ARCH_IS_BIG_ENDIAN
-    BSWAP(tmp);
-#endif
-    return tmp;
+    return *(uint32_t*)mem;
 #endif
 }
 
diff --git a/apps/codecs/libfaad/codebook/hcb_1.h b/apps/codecs/libfaad/codebook/hcb_1.h
index 610e578efb..88c04b6012 100644
--- a/apps/codecs/libfaad/codebook/hcb_1.h
+++ b/apps/codecs/libfaad/codebook/hcb_1.h
@@ -33,7 +33,7 @@
  *
  * Used to find offset into 2nd step table and number of extra bits to get
  */
-static hcb hcb1_1[] = {
+static hcb hcb1_1[] ICONST_ATTR_FAAD_LARGE_IRAM = {
     { /* 00000 */ 0, 0 },
     { /*       */ 0, 0 },
     { /*       */ 0, 0 },
@@ -78,7 +78,7 @@ static hcb hcb1_1[] = {
  *
  * Gives size of codeword and actual data (x,y,v,w)
  */
-static hcb_2_quad hcb1_2[] = {
+static hcb_2_quad hcb1_2[] ICONST_ATTR_FAAD_LARGE_IRAM = {
     /* 1 bit codeword */
     { 1,  0,  0,  0,  0 },
 
diff --git a/apps/codecs/libfaad/codebook/hcb_10.h b/apps/codecs/libfaad/codebook/hcb_10.h
index 7f1b6a4d7e..971d06aaf0 100644
--- a/apps/codecs/libfaad/codebook/hcb_10.h
+++ b/apps/codecs/libfaad/codebook/hcb_10.h
@@ -33,7 +33,7 @@
  *
  * Used to find offset into 2nd step table and number of extra bits to get
  */
-static hcb hcb10_1[] = {
+static hcb hcb10_1[] ICONST_ATTR_FAAD_LARGE_IRAM = {
     /* 4 bit codewords */
     { /* 000000 */ 0, 0 },
     { /*        */ 0, 0 },
@@ -115,7 +115,7 @@ static hcb hcb10_1[] = {
  *
  * Gives size of codeword and actual data (x,y,v,w)
  */
-static hcb_2_pair hcb10_2[] = {
+static hcb_2_pair hcb10_2[] ICONST_ATTR = {
     /* 4 bit codewords */
     { 4,  1,  1 },
     { 4,  1,  2 },
diff --git a/apps/codecs/libfaad/codebook/hcb_11.h b/apps/codecs/libfaad/codebook/hcb_11.h
index c5137215f9..49e97af23f 100644
--- a/apps/codecs/libfaad/codebook/hcb_11.h
+++ b/apps/codecs/libfaad/codebook/hcb_11.h
@@ -33,7 +33,7 @@
  *
  * Used to find offset into 2nd step table and number of extra bits to get
  */
-static hcb hcb11_1[] = {
+static hcb hcb11_1[] ICONST_ATTR_FAAD_LARGE_IRAM = {
     /* 4 bits */
     { /* 00000 */ 0, 0 },
     { /*       */ 0, 0 },
@@ -95,7 +95,7 @@ static hcb hcb11_1[] = {
  *
  * Gives size of codeword and actual data (x,y,v,w)
  */
-static hcb_2_pair hcb11_2[] = {
+static hcb_2_pair hcb11_2[] ICONST_ATTR_FAAD_LARGE_IRAM = {
     /* 4 */
     { 4,  0,  0 },
     { 4,  1,  1 },
diff --git a/apps/codecs/libfaad/codebook/hcb_2.h b/apps/codecs/libfaad/codebook/hcb_2.h
index bcc8857c15..15e7d57084 100644
--- a/apps/codecs/libfaad/codebook/hcb_2.h
+++ b/apps/codecs/libfaad/codebook/hcb_2.h
@@ -33,7 +33,7 @@
  *
  * Used to find offset into 2nd step table and number of extra bits to get
  */
-static hcb hcb2_1[] = {
+static hcb hcb2_1[] ICONST_ATTR_FAAD_LARGE_IRAM = {
     { /* 00000 */ 0, 0 },
     { /*       */ 0, 0 },
     { /*       */ 0, 0 },
@@ -82,7 +82,7 @@ static hcb hcb2_1[] = {
  *
  * Gives size of codeword and actual data (x,y,v,w)
  */
-static hcb_2_quad hcb2_2[] = {
+static hcb_2_quad hcb2_2[] ICONST_ATTR_FAAD_LARGE_IRAM = {
     /* 3 bit codeword */
     { 3,  0,  0,  0,  0 },
 
diff --git a/apps/codecs/libfaad/codebook/hcb_3.h b/apps/codecs/libfaad/codebook/hcb_3.h
index 2672de0a47..3b12fdff60 100644
--- a/apps/codecs/libfaad/codebook/hcb_3.h
+++ b/apps/codecs/libfaad/codebook/hcb_3.h
@@ -28,7 +28,7 @@
 /* Binary search huffman table HCB_3 */
 
 
-static hcb_bin_quad hcb3[] = {
+static hcb_bin_quad hcb3[] ICONST_ATTR_FAAD_LARGE_IRAM = {
     { /*  0 */ 0, {  1,  2, 0, 0 } },
     { /*  1 */ 1, {  0,  0, 0, 0 } }, /* 0 */
     { /*  2 */ 0, {  1,  2, 0, 0 } },
diff --git a/apps/codecs/libfaad/codebook/hcb_4.h b/apps/codecs/libfaad/codebook/hcb_4.h
index 1dc90404f6..67397e1fb0 100644
--- a/apps/codecs/libfaad/codebook/hcb_4.h
+++ b/apps/codecs/libfaad/codebook/hcb_4.h
@@ -33,7 +33,7 @@
  *
  * Used to find offset into 2nd step table and number of extra bits to get
  */
-static hcb hcb4_1[] = {
+static hcb hcb4_1[] ICONST_ATTR_FAAD_LARGE_IRAM = {
     /* 4 bit codewords */
     { /* 00000 */ 0, 0 },
     { /*       */ 0, 0 },
@@ -85,7 +85,7 @@ static hcb hcb4_1[] = {
  *
  * Gives size of codeword and actual data (x,y,v,w)
  */
-static hcb_2_quad hcb4_2[] = {
+static hcb_2_quad hcb4_2[] ICONST_ATTR_FAAD_LARGE_IRAM = {
     /* 4 bit codewords */
     { 4,  1,  1,  1,  1 },
     { 4,  0,  1,  1,  1 },
diff --git a/apps/codecs/libfaad/codebook/hcb_5.h b/apps/codecs/libfaad/codebook/hcb_5.h
index d094a6b1c2..0143482901 100644
--- a/apps/codecs/libfaad/codebook/hcb_5.h
+++ b/apps/codecs/libfaad/codebook/hcb_5.h
@@ -28,7 +28,7 @@
 /* Binary search huffman table HCB_5 */
 
 
-static hcb_bin_pair hcb5[] = {
+static hcb_bin_pair hcb5[] ICONST_ATTR_FAAD_LARGE_IRAM = {
     { /*  0 */ 0, {  1,  2 } },
     { /*  1 */ 1, {  0,  0 } }, /* 0 */
     { /*  2 */ 0, {  1,  2 } },
diff --git a/apps/codecs/libfaad/codebook/hcb_6.h b/apps/codecs/libfaad/codebook/hcb_6.h
index 4df2380ad2..9988133552 100644
--- a/apps/codecs/libfaad/codebook/hcb_6.h
+++ b/apps/codecs/libfaad/codebook/hcb_6.h
@@ -33,7 +33,7 @@
  *
  * Used to find offset into 2nd step table and number of extra bits to get
  */
-static hcb hcb6_1[] = {
+static hcb hcb6_1[] ICONST_ATTR = {
     /* 4 bit codewords */
     { /* 00000 */ 0, 0 },
     { /*       */ 0, 0 },
@@ -83,7 +83,7 @@ static hcb hcb6_1[] = {
  *
  * Gives size of codeword and actual data (x,y,v,w)
  */
-static hcb_2_pair hcb6_2[] = {
+static hcb_2_pair hcb6_2[] ICONST_ATTR = {
     /* 4 bit codewords */
     { 4,  0,  0 },
     { 4,  1,  0 },
diff --git a/apps/codecs/libfaad/codebook/hcb_7.h b/apps/codecs/libfaad/codebook/hcb_7.h
index c26a1a328f..3b378dc5cd 100644
--- a/apps/codecs/libfaad/codebook/hcb_7.h
+++ b/apps/codecs/libfaad/codebook/hcb_7.h
@@ -28,7 +28,7 @@
 /* Binary search huffman table HCB_7 */
 
 
-static hcb_bin_pair hcb7[] = {
+static hcb_bin_pair hcb7[] ICONST_ATTR_FAAD_LARGE_IRAM = {
     { /*  0 */ 0, { 1, 2 } },
     { /*  1 */ 1, { 0, 0 } },
     { /*  2 */ 0, { 1, 2 } },
diff --git a/apps/codecs/libfaad/codebook/hcb_8.h b/apps/codecs/libfaad/codebook/hcb_8.h
index d75da9030e..b76eb8f667 100644
--- a/apps/codecs/libfaad/codebook/hcb_8.h
+++ b/apps/codecs/libfaad/codebook/hcb_8.h
@@ -33,7 +33,7 @@
  *
  * Used to find offset into 2nd step table and number of extra bits to get
  */
-static hcb hcb8_1[] = {
+static hcb hcb8_1[] ICONST_ATTR = {
     /* 3 bit codeword */
     { /* 00000 */ 0, 0 },
     { /*       */ 0, 0 },
@@ -87,7 +87,7 @@ static hcb hcb8_1[] = {
  *
  * Gives size of codeword and actual data (x,y,v,w)
  */
-static hcb_2_pair hcb8_2[] = {
+static hcb_2_pair hcb8_2[] ICONST_ATTR = {
     /* 3 bit codeword */
     { 3,  1,  1 },
 
diff --git a/apps/codecs/libfaad/codebook/hcb_9.h b/apps/codecs/libfaad/codebook/hcb_9.h
index 740a6c324e..2932001294 100644
--- a/apps/codecs/libfaad/codebook/hcb_9.h
+++ b/apps/codecs/libfaad/codebook/hcb_9.h
@@ -28,7 +28,7 @@
 /* Binary search huffman table HCB_9 */
 
 
-static hcb_bin_pair hcb9[] = {
+static hcb_bin_pair hcb9[] ICONST_ATTR_FAAD_LARGE_IRAM = {
     { /*  0 */ 0, { 1, 2 } },
     { /*  1 */ 1, { 0, 0 } },
     { /*  2 */ 0, { 1, 2 } },
diff --git a/apps/codecs/libfaad/codebook/hcb_sf.h b/apps/codecs/libfaad/codebook/hcb_sf.h
index aa5ccdbfcf..66762e2fce 100644
--- a/apps/codecs/libfaad/codebook/hcb_sf.h
+++ b/apps/codecs/libfaad/codebook/hcb_sf.h
@@ -28,7 +28,7 @@
 /* Binary search huffman table HCB_SF */
 
 
-static uint8_t hcb_sf[][2] = {
+static uint8_t hcb_sf[][2] ICONST_ATTR_FAAD_LARGE_IRAM = {
     { /*  0 */  1, 2 },
     { /*  1 */  60, 0 },
     { /*  2 */  1, 2 },
diff --git a/apps/codecs/libfaad/common.h b/apps/codecs/libfaad/common.h
index 01164e3746..59ce806e91 100644
--- a/apps/codecs/libfaad/common.h
+++ b/apps/codecs/libfaad/common.h
@@ -51,6 +51,25 @@ extern struct codec_api* ci;
 #define LOGF(...)
 #endif
 
+#if   (CONFIG_CPU == MCF5250) || defined(CPU_S5L870X)
+/* Enough IRAM but performance suffers with ICODE_ATTR. */
+#define IBSS_ATTR_FAAD_LARGE_IRAM   IBSS_ATTR
+#define ICODE_ATTR_FAAD_LARGE_IRAM
+#define ICONST_ATTR_FAAD_LARGE_IRAM ICONST_ATTR
+
+#elif (CONFIG_CPU == PP5022) || (CONFIG_CPU == PP5024)
+/* Enough IRAM to move additional data and code to it. */
+#define IBSS_ATTR_FAAD_LARGE_IRAM   IBSS_ATTR
+#define ICODE_ATTR_FAAD_LARGE_IRAM  ICODE_ATTR
+#define ICONST_ATTR_FAAD_LARGE_IRAM ICONST_ATTR
+
+#else
+/* Not enough IRAM available. */
+#define IBSS_ATTR_FAAD_LARGE_IRAM
+#define ICODE_ATTR_FAAD_LARGE_IRAM
+#define ICONST_ATTR_FAAD_LARGE_IRAM
+#endif
+
 #define INLINE __inline
 #if 0 //defined(_WIN32) && !defined(_WIN32_WCE)
 #define ALIGN __declspec(align(16))
@@ -71,7 +90,7 @@ extern struct codec_api* ci;
 /* #define USE_DOUBLE_PRECISION */
 /* use fixed point reals */
 #define FIXED_POINT
-//#define BIG_IQ_TABLE
+#define BIG_IQ_TABLE /* BIG_IQ_TABLE results in faster requantization */
 
 /* Use if target platform has address generators with autoincrement */
 //#define PREFER_POINTERS
diff --git a/apps/codecs/libfaad/huffman.c b/apps/codecs/libfaad/huffman.c
index c142ad7ac7..bea0dd7bf2 100644
--- a/apps/codecs/libfaad/huffman.c
+++ b/apps/codecs/libfaad/huffman.c
@@ -39,17 +39,18 @@
 
 
 /* static function declarations */
-static INLINE void huffman_sign_bits(bitfile *ld, int16_t *sp, uint8_t len);
+static INLINE void huffman_sign_bits_pair(bitfile *ld, int16_t *sp);
+static INLINE void huffman_sign_bits_quad(bitfile *ld, int16_t *sp);
 static INLINE int16_t huffman_getescape(bitfile *ld, int16_t sp);
 static uint8_t huffman_2step_quad(uint8_t cb, bitfile *ld, int16_t *sp);
 static uint8_t huffman_2step_quad_sign(uint8_t cb, bitfile *ld, int16_t *sp);
-static uint8_t huffman_2step_pair(uint8_t cb, bitfile *ld, int16_t *sp);
-static uint8_t huffman_2step_pair_sign(uint8_t cb, bitfile *ld, int16_t *sp);
+static INLINE uint8_t huffman_2step_pair(uint8_t cb, bitfile *ld, int16_t *sp);
+static INLINE uint8_t huffman_2step_pair_sign(uint8_t cb, bitfile *ld, int16_t *sp);
 static uint8_t huffman_binary_quad(uint8_t cb, bitfile *ld, int16_t *sp);
 static uint8_t huffman_binary_quad_sign(uint8_t cb, bitfile *ld, int16_t *sp);
 static uint8_t huffman_binary_pair(uint8_t cb, bitfile *ld, int16_t *sp);
 static uint8_t huffman_binary_pair_sign(uint8_t cb, bitfile *ld, int16_t *sp);
-static int16_t huffman_codebook(uint8_t i);
+static int16_t huffman_codebook(uint8_t i) ICODE_ATTR_FAAD_LARGE_IRAM;
 #ifdef ERROR_RESILIENCE
 static void vcb11_check_LAV(uint8_t cb, int16_t *sp);
 #endif
@@ -75,49 +76,51 @@ int8_t huffman_scale_factor(bitfile *ld)
 }
 
 
-hcb *hcb_table[] = {
+hcb *hcb_table[] ICONST_ATTR = {
     0, hcb1_1, hcb2_1, 0, hcb4_1, 0, hcb6_1, 0, hcb8_1, 0, hcb10_1, hcb11_1
 };
 
-hcb_2_quad *hcb_2_quad_table[] = {
+hcb_2_quad *hcb_2_quad_table[] ICONST_ATTR = {
     0, hcb1_2, hcb2_2, 0, hcb4_2, 0, 0, 0, 0, 0, 0, 0
 };
 
-hcb_2_pair *hcb_2_pair_table[] = {
+hcb_2_pair *hcb_2_pair_table[] ICONST_ATTR = {
     0, 0, 0, 0, 0, 0, hcb6_2, 0, hcb8_2, 0, hcb10_2, hcb11_2
 };
 
-hcb_bin_pair *hcb_bin_table[] = {
+hcb_bin_pair *hcb_bin_table[] ICONST_ATTR = {
     0, 0, 0, 0, 0, hcb5, 0, hcb7, 0, hcb9, 0, 0
 };
 
-uint8_t hcbN[] = { 0, 5, 5, 0, 5, 0, 5, 0, 5, 0, 6, 5 };
+uint8_t hcbN[] ICONST_ATTR = { 0, 5, 5, 0, 5, 0, 5, 0, 5, 0, 6, 5 };
 
 /* defines whether a huffman codebook is unsigned or not */
 /* Table 4.6.2 */
-uint8_t unsigned_cb[] = { 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0,
-  /* codebook 16 to 31 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+uint8_t unsigned_cb[] ICONST_ATTR = { 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0,
+              /* codebook 16 to 31 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
 };
 
-int hcb_2_quad_table_size[] = { 0, 114, 86, 0, 185, 0, 0, 0, 0, 0, 0, 0 };
-int hcb_2_pair_table_size[] = { 0, 0, 0, 0, 0, 0, 126, 0, 83, 0, 210, 373 };
-int hcb_bin_table_size[] = { 0, 0, 0, 161, 0, 161, 0, 127, 0, 337, 0, 0 };
+int hcb_2_quad_table_size[] ICONST_ATTR = { 0, 114, 86, 0, 185, 0, 0, 0, 0, 0, 0, 0 };
+int hcb_2_pair_table_size[] ICONST_ATTR = { 0, 0, 0, 0, 0, 0, 126, 0, 83, 0, 210, 373 };
+int hcb_bin_table_size[] ICONST_ATTR = { 0, 0, 0, 161, 0, 161, 0, 127, 0, 337, 0, 0 };
 
-static INLINE void huffman_sign_bits(bitfile *ld, int16_t *sp, uint8_t len)
+#define FAAD_GET_SIGN(idx)          \
+        if (sp[idx])                \
+            if (faad_get1bit(ld)&1) \
+                sp[idx] = -sp[idx]; \
+
+static INLINE void huffman_sign_bits_pair(bitfile *ld, int16_t *sp)
 {
-    uint8_t i;
+    FAAD_GET_SIGN(0)
+    FAAD_GET_SIGN(1)
+}
 
-    for (i = 0; i < len; i++)
-    {
-        if(sp[i])
-        {
-            if(faad_get1bit(ld
-                DEBUGVAR(1,5,"huffman_sign_bits(): sign bit")) & 1)
-            {
-                sp[i] = -sp[i];
-            }
-        }
-    }
+static INLINE void huffman_sign_bits_quad(bitfile *ld, int16_t *sp)
+{
+    FAAD_GET_SIGN(0)
+    FAAD_GET_SIGN(1)
+    FAAD_GET_SIGN(2)
+    FAAD_GET_SIGN(3)
 }
 
 static INLINE int16_t huffman_getescape(bitfile *ld, int16_t sp)
@@ -194,12 +197,12 @@ static uint8_t huffman_2step_quad(uint8_t cb, bitfile *ld, int16_t *sp)
 static uint8_t huffman_2step_quad_sign(uint8_t cb, bitfile *ld, int16_t *sp)
 {
     uint8_t err = huffman_2step_quad(cb, ld, sp);
-    huffman_sign_bits(ld, sp, QUAD_LEN);
+    huffman_sign_bits_quad(ld, sp);
 
     return err;
 }
 
-static uint8_t huffman_2step_pair(uint8_t cb, bitfile *ld, int16_t *sp)
+static INLINE uint8_t huffman_2step_pair(uint8_t cb, bitfile *ld, int16_t *sp)
 {
     uint32_t cw;
     uint16_t offset = 0;
@@ -232,10 +235,10 @@ static uint8_t huffman_2step_pair(uint8_t cb, bitfile *ld, int16_t *sp)
     return 0;
 }
 
-static uint8_t huffman_2step_pair_sign(uint8_t cb, bitfile *ld, int16_t *sp)
+static INLINE uint8_t huffman_2step_pair_sign(uint8_t cb, bitfile *ld, int16_t *sp)
 {
     uint8_t err = huffman_2step_pair(cb, ld, sp);
-    huffman_sign_bits(ld, sp, PAIR_LEN);
+    huffman_sign_bits_pair(ld, sp);
 
     return err;
 }
@@ -269,7 +272,7 @@ static uint8_t huffman_binary_quad(uint8_t cb, bitfile *ld, int16_t *sp)
 static uint8_t huffman_binary_quad_sign(uint8_t cb, bitfile *ld, int16_t *sp)
 {
     uint8_t err = huffman_binary_quad(cb, ld, sp);
-    huffman_sign_bits(ld, sp, QUAD_LEN);
+    huffman_sign_bits_quad(ld, sp);
 
     return err;
 }
@@ -301,7 +304,7 @@ static uint8_t huffman_binary_pair(uint8_t cb, bitfile *ld, int16_t *sp)
 static uint8_t huffman_binary_pair_sign(uint8_t cb, bitfile *ld, int16_t *sp)
 {
     uint8_t err = huffman_binary_pair(cb, ld, sp);
-    huffman_sign_bits(ld, sp, PAIR_LEN);
+    huffman_sign_bits_pair(ld, sp);
 
     return err;
 }
diff --git a/apps/codecs/libfaad/huffman.h b/apps/codecs/libfaad/huffman.h
index 683689002d..214d3a220f 100644
--- a/apps/codecs/libfaad/huffman.h
+++ b/apps/codecs/libfaad/huffman.h
@@ -33,7 +33,7 @@ extern "C" {
 #endif
 
 int8_t huffman_scale_factor(bitfile *ld);
-uint8_t huffman_spectral_data(uint8_t cb, bitfile *ld, int16_t *sp);
+uint8_t huffman_spectral_data(uint8_t cb, bitfile *ld, int16_t *sp) ICODE_ATTR_FAAD_LARGE_IRAM;
 #ifdef ERROR_RESILIENCE
 int8_t huffman_spectral_data_2(uint8_t cb, bits_t *ld, int16_t *sp);
 #endif
diff --git a/apps/codecs/libfaad/specrec.c b/apps/codecs/libfaad/specrec.c
index 4998828eff..74bf1f36f6 100644
--- a/apps/codecs/libfaad/specrec.c
+++ b/apps/codecs/libfaad/specrec.c
@@ -414,19 +414,18 @@ uint8_t window_grouping_info(NeAACDecHandle hDecoder, ic_stream *ics)
 /**/
 static INLINE real_t iquant(int16_t q, const real_t *tab, uint8_t *error)
 {
-#ifdef FIXED_POINT
+#ifndef BIG_IQ_TABLE
 /* For FIXED_POINT the iq_table is prescaled by 3 bits (iq_table[]/8) */
 /* BIG_IQ_TABLE allows you to use the full 8192 value table, if this is not
  * defined a 1026 value table and interpolation will be used
  */
-#ifndef BIG_IQ_TABLE
     static const real_t errcorr[] = {
         REAL_CONST(0), REAL_CONST(1.0/8.0), REAL_CONST(2.0/8.0), REAL_CONST(3.0/8.0),
         REAL_CONST(4.0/8.0),  REAL_CONST(5.0/8.0), REAL_CONST(6.0/8.0), REAL_CONST(7.0/8.0),
         REAL_CONST(0)
     };
     real_t x1, x2;
-#endif
+
     int16_t sgn = 1;
 
     if (q < 0)
@@ -445,7 +444,6 @@ static INLINE real_t iquant(int16_t q, const real_t *tab, uint8_t *error)
         return sgn * tab[q];
     }
 
-#ifndef BIG_IQ_TABLE
     if (q >= 8192)
     {
         *error = 17;
@@ -456,12 +454,7 @@ static INLINE real_t iquant(int16_t q, const real_t *tab, uint8_t *error)
     x1 = tab[q>>3];
     x2 = tab[(q>>3) + 1];
     return sgn * 16 * (MUL_R(errcorr[q&7],(x2-x1)) + x1);
-#else
-    *error = 17;
-    return 0;
-#endif
-
-#else
+#else /* #ifndef BIG_IQ_TABLE */
     if (q < 0)
     {
         /* tab contains a value for all possible q [0,8192] */
@@ -547,9 +540,7 @@ static uint8_t quant_to_spec(NeAACDecHandle hDecoder,
     uint8_t g, sfb, win;
     uint16_t width, bin, k, gindex, wa, wb;
     uint8_t error = 0; /* Init error flag */
-#ifndef FIXED_POINT
     real_t scf;
-#endif
 
     k = 0;
     gindex = 0;
@@ -597,6 +588,8 @@ static uint8_t quant_to_spec(NeAACDecHandle hDecoder,
 
 #ifndef FIXED_POINT
             scf = pow2sf_tab[exp/*+25*/] * pow2_table[frac];
+#else
+            scf = pow2_table[frac];
 #endif
 
             for (win = 0; win < ics->window_group_length[g]; win++)
@@ -612,32 +605,12 @@ static uint8_t quant_to_spec(NeAACDecHandle hDecoder,
                     spec_data[wb+3] = iquant(quant_data[k+3], tab, &error) * scf;
                         
 #else
-                    real_t iq0 = iquant(quant_data[k+0], tab, &error);
-                    real_t iq1 = iquant(quant_data[k+1], tab, &error);
-                    real_t iq2 = iquant(quant_data[k+2], tab, &error);
-                    real_t iq3 = iquant(quant_data[k+3], tab, &error);
-
                     wb = wa + bin;
-
-                    if (exp < 0)
-                    {
-                        spec_data[wb+0] = iq0 >>= -exp;
-                        spec_data[wb+1] = iq1 >>= -exp;
-                        spec_data[wb+2] = iq2 >>= -exp;
-                        spec_data[wb+3] = iq3 >>= -exp;
-                    } else {
-                        spec_data[wb+0] = iq0 <<= exp;
-                        spec_data[wb+1] = iq1 <<= exp;
-                        spec_data[wb+2] = iq2 <<= exp;
-                        spec_data[wb+3] = iq3 <<= exp;
-                    }
-                    if (frac != 0)
-                    {
-                        spec_data[wb+0] = MUL_C(spec_data[wb+0],pow2_table[frac]);
-                        spec_data[wb+1] = MUL_C(spec_data[wb+1],pow2_table[frac]);
-                        spec_data[wb+2] = MUL_C(spec_data[wb+2],pow2_table[frac]);
-                        spec_data[wb+3] = MUL_C(spec_data[wb+3],pow2_table[frac]);
-                    }
+                 
+                    spec_data[wb+0] = MUL_C((iquant(quant_data[k+0], tab, &error)<<exp), scf);
+                    spec_data[wb+1] = MUL_C((iquant(quant_data[k+1], tab, &error)<<exp), scf);
+                    spec_data[wb+2] = MUL_C((iquant(quant_data[k+2], tab, &error)<<exp), scf);
+                    spec_data[wb+3] = MUL_C((iquant(quant_data[k+3], tab, &error)<<exp), scf);
 
 //#define SCFS_PRINT
 #ifdef SCFS_PRINT
@@ -855,11 +828,14 @@ static uint8_t allocate_channel_pair(NeAACDecHandle hDecoder,
     return 0;
 }
 
+/* used by reconstruct_single_channel() and reconstruct_channel_pair() */
+ALIGN static real_t spec_coef1[1024] IBSS_ATTR;
+ALIGN static real_t spec_coef2[1024] IBSS_ATTR;
+
 uint8_t reconstruct_single_channel(NeAACDecHandle hDecoder, ic_stream *ics,
                                    element *sce, int16_t *spec_data)
 {
     uint8_t retval, output_channels;
-    ALIGN static real_t spec_coef[1024];
 
 #ifdef PROFILE
     int64_t count = faad_get_ts();
@@ -893,7 +869,7 @@ uint8_t reconstruct_single_channel(NeAACDecHandle hDecoder, ic_stream *ics,
 
 
     /* dequantisation and scaling */
-    retval = quant_to_spec(hDecoder, ics, spec_data, spec_coef, hDecoder->frameLength);
+    retval = quant_to_spec(hDecoder, ics, spec_data, spec_coef1, hDecoder->frameLength);
     if (retval > 0)
         return retval;
 
@@ -904,14 +880,14 @@ uint8_t reconstruct_single_channel(NeAACDecHandle hDecoder, ic_stream *ics,
 
 
     /* pns decoding */
-    pns_decode(ics, NULL, spec_coef, NULL, hDecoder->frameLength, 0, hDecoder->object_type);
+    pns_decode(ics, NULL, spec_coef1, NULL, hDecoder->frameLength, 0, hDecoder->object_type);
 
 #ifdef MAIN_DEC
     /* MAIN object type prediction */
     if (hDecoder->object_type == MAIN)
     {
         /* intra channel prediction */
-        ic_prediction(ics, spec_coef, hDecoder->pred_stat[sce->channel], hDecoder->frameLength,
+        ic_prediction(ics, spec_coef1, hDecoder->pred_stat[sce->channel], hDecoder->frameLength,
             hDecoder->sf_index);
 
         /* In addition, for scalefactor bands coded by perceptual
@@ -938,7 +914,7 @@ uint8_t reconstruct_single_channel(NeAACDecHandle hDecoder, ic_stream *ics,
 #endif
 
         /* long term prediction */
-        lt_prediction(ics, &(ics->ltp), spec_coef, hDecoder->lt_pred_stat[sce->channel], hDecoder->fb,
+        lt_prediction(ics, &(ics->ltp), spec_coef1, hDecoder->lt_pred_stat[sce->channel], hDecoder->fb,
             ics->window_shape, hDecoder->window_shape_prev[sce->channel],
             hDecoder->sf_index, hDecoder->object_type, hDecoder->frameLength);
     }
@@ -946,13 +922,13 @@ uint8_t reconstruct_single_channel(NeAACDecHandle hDecoder, ic_stream *ics,
 
     /* tns decoding */
     tns_decode_frame(ics, &(ics->tns), hDecoder->sf_index, hDecoder->object_type,
-        spec_coef, hDecoder->frameLength);
+        spec_coef1, hDecoder->frameLength);
 
     /* drc decoding */
     if (hDecoder->drc->present)
     {
         if (!hDecoder->drc->exclude_mask[sce->channel] || !hDecoder->drc->excluded_chns_present)
-            drc_decode(hDecoder->drc, spec_coef);
+            drc_decode(hDecoder->drc, spec_coef1);
     }
 
     /* filter bank */
@@ -961,13 +937,13 @@ uint8_t reconstruct_single_channel(NeAACDecHandle hDecoder, ic_stream *ics,
     {
 #endif
         ifilter_bank(ics->window_sequence,ics->window_shape,
-            hDecoder->window_shape_prev[sce->channel],spec_coef,
+            hDecoder->window_shape_prev[sce->channel],spec_coef1,
             hDecoder->time_out[sce->channel], hDecoder->fb_intermed[sce->channel],
             hDecoder->object_type, hDecoder->frameLength);
 #ifdef SSR_DEC
     } else {
         ssr_decode(&(ics->ssr), hDecoder->fb, ics->window_sequence, ics->window_shape,
-            hDecoder->window_shape_prev[sce->channel], spec_coef, hDecoder->time_out[sce->channel],
+            hDecoder->window_shape_prev[sce->channel], spec_coef1, hDecoder->time_out[sce->channel],
             hDecoder->ssr_overlap[sce->channel], hDecoder->ipqf_buffer[sce->channel], hDecoder->prev_fmd[sce->channel],
             hDecoder->frameLength);
     }
@@ -1051,8 +1027,6 @@ uint8_t reconstruct_channel_pair(NeAACDecHandle hDecoder, ic_stream *ics1, ic_st
                                  element *cpe, int16_t *spec_data1, int16_t *spec_data2)
 {
     uint8_t retval;
-    ALIGN static real_t spec_coef1[1024] IBSS_ATTR;
-    ALIGN static real_t spec_coef2[1024] IBSS_ATTR;
 
 #ifdef PROFILE
     int64_t count = faad_get_ts();
-- 
cgit v1.2.3