Submit part of FS#11498. Major speedup for WMA Professional on ARM and Coldfire CPUs. Introduce asm routines for multiplications, move arrays with major impact on decoding speed to IRAM. Speeds up decoding by 25% on PP5022 and 34% on mcf5249.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27582 a1c6a512-1295-4272-9138-f99709370657
author: Andree Buschmann <AndreeBuschmann@t-online.de> 2010-07-26 21:43:07 +0000
committer: Andree Buschmann <AndreeBuschmann@t-online.de> 2010-07-26 21:43:07 +0000
commit: 87d59ab56c30eadc4691a41ba7540cca868c9b50 (patch)
tree: ba0b3196b5dcabc7a4c5296c1ea9027f033f023f
parent: dbeb1ee07269942997a3676595a795d4aee547eb (diff)
download: rockbox-87d59ab56c30eadc4691a41ba7540cca868c9b50.tar.gz
rockbox-87d59ab56c30eadc4691a41ba7540cca868c9b50.zip
4 files changed, 240 insertions, 30 deletions
diff --git a/apps/codecs/libwmapro/mdct_tables.c b/apps/codecs/libwmapro/mdct_tables.c
index b87d1b4045..dd8b2a451c 100644
--- a/apps/codecs/libwmapro/mdct_tables.c
+++ b/apps/codecs/libwmapro/mdct_tables.c
@@ -1,3 +1,4 @@
+#include "wmaprodec.h"
 #include <inttypes.h>
  
 /* Tables for fixed-point trig tables for windowing and mdct */
@@ -689,7 +690,7 @@ const int32_t sine_4096[] = {
    0x8000078F, 0x800003DC, 0x80000164, 0x80000028
 };
-const int32_t sine_2048[] = {
+const int32_t sine_2048[] ICONST_ATTR_WMAPRO_WIN_VS_TMP = {
    0xFFF36F03, 0xFFDA4D09, 0xFFC12B0F, 0xFFA80918, 0xFF8EE725, 0xFF75C536, 
    0xFF5CA34B, 0xFF438168, 0xFF2A5F8C, 0xFF113DB7, 0xFEF81BEE, 0xFEDEFA2D, 
    0xFEC5D876, 0xFEACB6CE, 0xFE939531, 0xFE7A73A2, 0xFE615224, 0xFE4830B4, 
@@ -1208,7 +1209,7 @@ const int32_t sine_1024[] = {
    0x800078E8, 0x80003DB0, 0x80001636, 0x80000279
 };
-const int32_t sine_512[] = {
+const int32_t sine_512[] ICONST_ATTR_WMAPRO_WIN_VS_TMP = {
    0xFFCDBC0C, 0xFF693440, 0xFF04ACD1, 0xFEA025FE, 0xFE3BA003, 0xFDD71B21, 
    0xFD72978F, 0xFD0E1595, 0xFCA9956C, 0xFC45174E, 0xFBE09B85, 0xFB7C223F, 
    0xFB17ABC2, 0xFAB33854, 0xFA4EC823, 0xF9EA5B74, 0xF985F28F, 0xF9218DA0, 
@@ -1297,7 +1298,7 @@ const int32_t sine_512[] = {
    0x800058D4, 0x800009DF
 };
-const int32_t sine_256[] = {
+const int32_t sine_256[] ICONST_ATTR_WMAPRO_WIN_VS_TMP = {
    0xFF9B781E, 0xFED26950, 0xFE095D6B, 0xFD40565E, 0xFC775617, 0xFBAE5E8D, 
    0xFAE571A2, 0xFA1C9157, 0xF953BF94, 0xF88AFE40, 0xF7C24F61, 0xF6F9B4C9, 
    0xF6313075, 0xF568C463, 0xF4A07264, 0xF3D83C74, 0xF3102493, 0xF2482C8D, 
@@ -1343,7 +1344,7 @@ const int32_t sine_256[] = {
    0x80078E60, 0x8003DAF2, 0x8001634E, 0x8000277B
 };
-const int32_t sine_128[] = {
+const int32_t sine_128[] ICONST_ATTR = {
    0xFF36F079, 0xFDA4D929, 0xFC12D91C, 0xFA80FFCF, 0xF8EF5CBC, 0xF75DFF6C, 
    0xF5CCF73F, 0xF43C53CB, 0xF2AC2474, 0xF11C7895, 0xEF8D5FC8, 0xEDFEE931, 
    0xEC71244A, 0xEAE4208A, 0xE957ED00, 0xE7CC9912, 0xE642341D, 0xE4B8CD16, 
diff --git a/apps/codecs/libwmapro/wmapro_math.h b/apps/codecs/libwmapro/wmapro_math.h
index 71cc3d33d7..30b9a987ee 100644
--- a/apps/codecs/libwmapro/wmapro_math.h
+++ b/apps/codecs/libwmapro/wmapro_math.h
@@ -3,21 +3,187 @@
 #include <inttypes.h>
+/* rockbox: not used
 #define fixtof16(x)       (float)((float)(x) / (float)(1 << 16))
 #define fixtof31(x)       (float)((float)(x) / (float)(1 << 31))
 #define ftofix16(x)       ((int32_t)((x) * (float)(1 << 16) + ((x) < 0 ? -0.5:0.5)))
 #define ftofix31(x)       ((int32_t)((x) * (float)(1 << 31) + ((x) < 0 ? -0.5:0.5)))
+*/
-static inline int32_t fixmulshift(int32_t x, int32_t y, int shamt)
+#if defined(CPU_ARM)
-{
+    /* Calculates: result = (X*Y)>>Z */
-    int64_t temp;
+    #define fixmulshift(X,Y,Z) \
-    temp = x;
+    ({ \
-    temp *= y;
+        int32_t lo; \
+        int32_t hi; \
+        asm volatile ( \
+            "smull %[lo], %[hi], %[x], %[y] \n\t"   /* multiply */ \
+            "mov   %[lo], %[lo], lsr %[shr] \n\t"   /* lo >>= Z */ \
+            "orr   %[lo], %[lo], %[hi], lsl %[shl]" /* lo |= (hi << (32-Z)) */ \
+            : [lo]"=&r"(lo), [hi]"=&r"(hi) \
+            : [x]"r"(X), [y]"r"(Y), [shr]"r"(Z), [shl]"r"(32-Z)); \
+        lo; \
+    })
+     
+    /* Calculates: result = (X*Y)>>16 */
+    #define fixmul16(X,Y) \
+     ({ \
+        int32_t lo; \
+        int32_t hi; \
+        asm volatile ( \
+           "smull %[lo], %[hi], %[x], %[y] \n\t" /* multiply */ \
+           "mov   %[lo], %[lo], lsr #16    \n\t" /* lo >>= 16 */ \
+           "orr   %[lo], %[lo], %[hi], lsl #16"  /* lo |= (hi << 16) */ \
+           : [lo]"=&r"(lo), [hi]"=&r"(hi) \
+           : [x]"r"(X), [y]"r"(Y)); \
+        lo; \
+     })
+     
+    /* Calculates: result = (X*Y)>>24 */
+    #define fixmul24(X,Y) \
+     ({ \
+        int32_t lo; \
+        int32_t hi; \
+        asm volatile ( \
+           "smull %[lo], %[hi], %[x], %[y] \n\t" /* multiply */ \
+           "mov   %[lo], %[lo], lsr #24    \n\t" /* lo >>= 24 */ \
+           "orr   %[lo], %[lo], %[hi], lsl #8"   /* lo |= (hi << 8) */ \
+           : [lo]"=&r"(lo), [hi]"=&r"(hi) \
+           : [x]"r"(X), [y]"r"(Y)); \
+        lo; \
+     })
+     
+    /* Calculates: result = (X*Y)>>31 */
+    #define fixmul31(X,Y) \
+     ({ \
+        int32_t lo; \
+        int32_t hi; \
+        asm volatile ( \
+           "smull %[lo], %[hi], %[x], %[y] \n\t" /* multiply */ \
+           "mov   %[lo], %[lo], lsr #31    \n\t" /* lo >>= 31 */ \
+           "orr   %[lo], %[lo], %[hi], lsl #1"   /* lo |= (hi << 1) */ \
+           : [lo]"=&r"(lo), [hi]"=&r"(hi) \
+           : [x]"r"(X), [y]"r"(Y)); \
+        lo; \
+     })
+#elif defined(CPU_COLDFIRE)
+    /* Calculates: result = (X*Y)>>Z */
+    #define fixmulshift(X,Y,Z) \
+    ({ \
+        int32_t t1; \
+        int32_t t2; \
+        asm volatile ( \
+            "mac.l   %[x],%[y],%%acc0\n\t" /* multiply */ \
+            "mulu.l  %[y],%[x]       \n\t" /* get lower half, avoid emac stall */ \
+            "movclr.l %%acc0,%[t1]   \n\t" /* get higher half */ \
+            "moveq.l #31,%[t2]       \n\t" \
+            "sub.l   %[sh],%[t2]     \n\t" /* t2 = 31 - shift */ \
+            "ble.s   1f              \n\t" \
+            "asl.l   %[t2],%[t1]     \n\t" /* hi <<= 31 - shift */ \
+            "lsr.l   %[sh],%[x]      \n\t" /* (unsigned)lo >>= shift */ \
+            "or.l    %[x],%[t1]      \n\t" /* combine result */ \
+            "bra.s   2f              \n\t" \
+         "1:                         \n\t" \
+            "neg.l   %[t2]           \n\t" /* t2 = shift - 31 */ \
+            "asr.l   %[t2],%[t1]     \n\t" /* hi >>= t2 */ \
+         "2:                         \n" \
+        : [t1]"=&d"(t1), [t2]"=&d"(t2) \
+        : [x] "d"((X)), [y] "d"((Y)), [sh]"d"((Z))); \
+        t1; \
+    })
-    temp >>= shamt;
+    /* Calculates: result = (X*Y)>>16 */
+    #define fixmul16(X,Y) \
+    ({ \
+        int32_t t1, t2; \
+        asm volatile ( \
+            "mac.l   %[x],%[y],%%acc0\n\t" /* multiply */ \
+            "mulu.l  %[y],%[x]       \n\t" /* get lower half, avoid emac stall */ \
+            "movclr.l %%acc0,%[t1]   \n\t" /* get higher half */ \
+            "moveq.l #15,%[t2]       \n\t" \
+            "asl.l   %[t2],%[t1]     \n\t" /* hi <<= 15, plus one free */ \
+            "moveq.l #16,%[t2]       \n\t" \
+            "lsr.l   %[t2],%[x]      \n\t" /* (unsigned)lo >>= 16 */ \
+            "or.l    %[x],%[t1]      \n\t" /* combine result */ \
+            : [t1]"=&d"(t1), [t2]"=&d"(t2) \
+            : [x] "d" ((X)), [y] "d" ((Y))); \
+        t1; \
+    })
+    
+    /* Calculates: result = (X*Y)>>24 */
+    #define fixmul24(X,Y) \
+    ({ \
+        int32_t t1, t2; \
+        asm volatile ( \
+            "mac.l   %[x],%[y],%%acc0\n\t" /* multiply */ \
+            "mulu.l  %[y],%[x]       \n\t" /* get lower half, avoid emac stall */ \
+            "movclr.l %%acc0,%[t1]   \n\t" /* get higher half */ \
+            "moveq.l #7,%[t2]        \n\t" \
+            "asl.l   %[t2],%[t1]     \n\t" /* hi <<= 7, plus one free */ \
+            "moveq.l #24,%[t2]       \n\t" \
+            "lsr.l   %[t2],%[x]      \n\t" /* (unsigned)lo >>= 24 */ \
+            "or.l    %[x],%[t1]      \n\t" /* combine result */ \
+            : [t1]"=&d"(t1), [t2]"=&d"(t2) \
+            : [x] "d" ((X)), [y] "d" ((Y))); \
+        t1; \
+    })
-    return (int32_t)temp;
+    /* Calculates: result = (X*Y)>>32 */
-}
+    #define fixmul31(X,Y) \
+    ({ \
+       int32_t t; \
+       asm volatile ( \
+          "mac.l %[x], %[y], %%acc0\n\t"   /* multiply */ \
+          "movclr.l %%acc0, %[t]\n\t"      /* get higher half as result */ \
+          : [t] "=d" (t) \
+          : [x] "r" ((X)), [y] "r" ((Y))); \
+       t; \
+    })
+#else
+    static inline int32_t fixmulshift(int32_t x, int32_t y, int shamt)
+    {
+        int64_t temp;
+        temp = x;
+        temp *= y;
+    
+        temp >>= shamt;
+    
+        return (int32_t)temp;
+    }
+    
+    static inline int32_t fixmul31(int32_t x, int32_t y)
+    {
+        int64_t temp;
+        temp = x;
+        temp *= y;
+    
+        temp >>= 31;
+    
+        return (int32_t)temp;
+    }
+    
+    static inline int32_t fixmul24(int32_t x, int32_t y)
+    {
+        int64_t temp;
+        temp = x;
+        temp *= y;
+    
+        temp >>= 24;
+    
+        return (int32_t)temp;
+    }
+    
+    static inline int32_t fixmul16(int32_t x, int32_t y)
+    {
+        int64_t temp;
+        temp = x;
+        temp *= y;
+    
+        temp >>= 16;
+    
+        return (int32_t)temp;
+    }
+#endif /* CPU_COLDFIRE, CPU_ARM */
 #ifdef CPU_COLDFIRE
 static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0, 
@@ -62,18 +228,18 @@ static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0,
        int32_t s1 = src1[j];
        int32_t wi = -win[i];
        int32_t wj = -win[j];
-        dst[i] = fixmulshift(s0,wj,31) - fixmulshift(s1,wi,31);
+        dst[i] = fixmul31(s0, wj) - fixmul31(s1, wi);
-        dst[j] = fixmulshift(s0,wi,31) + fixmulshift(s1,wj,31);
+        dst[j] = fixmul31(s0, wi) + fixmul31(s1, wj);
    }
 }
 #endif
-static inline void vector_fixmul_scalar(int32_t *dst, const int32_t *src, int32_t mul,
+static inline void vector_fixmul_scalar(int32_t *dst, const int32_t *src, 
-                                        int len, int shift)
+                                        int32_t mul, int len)
 {
    int i;
    for(i=0; i<len; i++)
-        dst[i] = fixmulshift(src[i],mul,shift);   
+        dst[i] = fixmul24(src[i], mul);   
 }
 static inline int av_clip(int a, int amin, int amax)
diff --git a/apps/codecs/libwmapro/wmaprodec.c b/apps/codecs/libwmapro/wmaprodec.c
index b7879a2644..1f65157f64 100644
--- a/apps/codecs/libwmapro/wmaprodec.c
+++ b/apps/codecs/libwmapro/wmaprodec.c
@@ -133,6 +133,7 @@
 #define WMAPRO_BLOCK_MAX_BITS 12                                           ///< log2 of max block size
 #define WMAPRO_BLOCK_MAX_SIZE (1 << WMAPRO_BLOCK_MAX_BITS)                 ///< maximum block size
 #define WMAPRO_BLOCK_SIZES    (WMAPRO_BLOCK_MAX_BITS - BLOCK_MIN_BITS + 1) ///< possible block sizes
+#define WMAPRO_OUT_BUF_SIZE   (WMAPRO_BLOCK_MAX_SIZE + WMAPRO_BLOCK_MAX_SIZE / 2)
 #define VLCBITS            9
@@ -151,6 +152,12 @@ static VLC              vec1_vlc;         ///< 1 coefficient per symbol
 static VLC              coef_vlc[2];      ///< coefficient run length vlc codes
 //static float            sin64[33];        ///< sinus table for decorrelation
+/* Global defined arrays to allow IRAM usage for some models. */
+static int32_t g_tmp[WMAPRO_BLOCK_MAX_SIZE] IBSS_ATTR_WMAPRO_LARGE_IRAM;
+static int32_t g_out_ch0[WMAPRO_OUT_BUF_SIZE] IBSS_ATTR;
+static int32_t g_out_ch1[WMAPRO_OUT_BUF_SIZE] IBSS_ATTR_WMAPRO_LARGE_IRAM;
+static int32_t g_out_multichannel[WMAPRO_MAX_CHANNELS-2][WMAPRO_OUT_BUF_SIZE];
 /**
 * @brief frame specific decoder context for a single channel
 */
@@ -171,8 +178,8 @@ typedef struct {
    int8_t   scale_factor_idx;                        ///< index for the transmitted scale factor values (used for resampling)
    int*     scale_factors;                           ///< pointer to the scale factor values used for decoding
    uint8_t  table_idx;                               ///< index in sf_offsets for the scale factor reference block
-    int32_t*   coeffs;                                  ///< pointer to the subframe decode buffer
+    int32_t* coeffs;                                  ///< pointer to the subframe decode buffer
-    DECLARE_ALIGNED(16, int32_t, out)[WMAPRO_BLOCK_MAX_SIZE + WMAPRO_BLOCK_MAX_SIZE / 2]; ///< output buffer
+    int32_t* out;                                     ///< output buffer
 } WMAProChannelCtx;
 /**
@@ -195,7 +202,7 @@ typedef struct WMAProDecodeCtx {
    uint8_t          frame_data[MAX_FRAMESIZE +
                      FF_INPUT_BUFFER_PADDING_SIZE];///< compressed frame data
    PutBitContext    pb;                            ///< context for filling the frame_data buffer
-    DECLARE_ALIGNED(16, int32_t, tmp)[WMAPRO_BLOCK_MAX_SIZE]; ///< IMDCT input buffer
+    int32_t*         tmp;                           ///< IMDCT input buffer
    /* frame size dependent frame information (set during initialization) */
    uint32_t         decode_flags;                  ///< used compression features
@@ -229,8 +236,8 @@ typedef struct WMAProDecodeCtx {
    uint32_t         frame_num;                     ///< current frame number
    GetBitContext    gb;                            ///< bitstream reader context
    int              buf_bit_size;                  ///< buffer size in bits
-    int32_t*           samples;
+    int32_t*         samples;
-    int32_t*           samples_end;                   ///< maximum samplebuffer pointer
+    int32_t*         samples_end;                   ///< maximum samplebuffer pointer
    uint8_t          drc_gain;                      ///< gain for the DRC tool
    int8_t           skip_frame;                    ///< skip output step
    int8_t           parsed_all_subframes;          ///< all subframes decoded?
@@ -287,6 +294,15 @@ int decode_init(asf_waveformatex_t *wfx)
    int i;
    int log2_max_num_subframes;
    int num_possible_block_sizes;
+    
+    /* Use globally defined array. Allows IRAM usage for models with large IRAM. */
+    s->tmp = g_tmp;
+    
+    /* Use globally defined arrays. Allows IRAM usage for up to 2 channels. */
+    s->channel[0].out = g_out_ch0;
+    s->channel[1].out = g_out_ch1;
+    for (i=2; i<WMAPRO_MAX_CHANNELS; ++i)
+        s->channel[i].out = g_out_multichannel[i-2];
 #if defined(CPU_COLDFIRE)
    coldfire_set_macsr(EMAC_FRACTIONAL | EMAC_SATURATE);
@@ -657,9 +673,9 @@ static void decode_decorrelation_matrix(WMAProDecodeCtx *s,
                chgroup->decorrelation_matrix[y + i * chgroup->num_channels] =
                                               (v1 * cosv) + (v2 * sinv);
                chgroup->fixdecorrelation_matrix[y + x * chgroup->num_channels] =
-                                               fixmulshift(f1, fixsinv, 31) - fixmulshift(f2, fixcosv, 31);
+                                               fixmul31(f1, fixsinv) - fixmul31(f2, fixcosv);
                chgroup->fixdecorrelation_matrix[y + i * chgroup->num_channels] =
-                                               fixmulshift(f1, fixcosv, 31) + fixmulshift(f2, fixsinv, 31);
+                                               fixmul31(f1, fixcosv) + fixmul31(f2, fixsinv);
                                               
            }
        }
@@ -1009,20 +1025,21 @@ static void inverse_channel_transform(WMAProDecodeCtx *s)
                            data_ptr = data;
                                
                            while (data_ptr < data_end)
-                                sum += fixmulshift(*data_ptr++, *mat++, 16);
+                                sum += fixmul16(*data_ptr++, *mat++);
                            (*ch)[y] = sum;
                        }
                    }
                } else if (s->num_channels == 2) {
+                    /* Scale with sqrt(2). 0x016A09E6 = (sqrt(2)*(1<<24)) */
                    int len = FFMIN(sfb[1], s->subframe_len) - sfb[0];
                    vector_fixmul_scalar(ch_data[0] + sfb[0],
                                         ch_data[0] + sfb[0],
-                                         0x00016A00, len,16);
+                                         0x016A09E6, len);
                    vector_fixmul_scalar(ch_data[1] + sfb[0],
-                                                 ch_data[1] + sfb[0],
+                                         ch_data[1] + sfb[0],
-                                                 0x00016A00, len,16);
+                                         0x016A09E6, len);
                }
            }
@@ -1049,7 +1066,7 @@ static void wmapro_window(WMAProDecodeCtx *s)
            winlen = s->subframe_len;
        }
  
-        window = sine_windows[av_log2(winlen) - BLOCK_MIN_BITS];     
+        window = sine_windows[av_log2(winlen) - BLOCK_MIN_BITS];
            
        winlen >>= 1;
@@ -1261,7 +1278,7 @@ static int decode_subframe(WMAProDecodeCtx *s)
                   
                vector_fixmul_scalar(s->tmp+start, 
                                     s->channel[c].coeffs + start,
-                                     quant, end-start, 24);
+                                     quant, end-start);
               
            }
diff --git a/apps/codecs/libwmapro/wmaprodec.h b/apps/codecs/libwmapro/wmaprodec.h
index 40f3a60db6..3203dda583 100644
--- a/apps/codecs/libwmapro/wmaprodec.h
+++ b/apps/codecs/libwmapro/wmaprodec.h
@@ -1,5 +1,31 @@
+#include "codeclib.h"
 #include "../libasf/asf.h"
+#if   (CONFIG_CPU == MCF5250) || defined(CPU_S5L870X)
+/* Enough IRAM but performance suffers with ICODE_ATTR. */
+#define IBSS_ATTR_WMAPRO_LARGE_IRAM   IBSS_ATTR
+#define ICODE_ATTR_WMAPRO_LARGE_IRAM
+#define ICONST_ATTR_WMAPRO_LARGE_IRAM ICONST_ATTR
+#define ICONST_ATTR_WMAPRO_WIN_VS_TMP
+#elif (CONFIG_CPU == PP5022) || (CONFIG_CPU == PP5024)
+/* Enough IRAM to move additional data and code to it. */
+#define IBSS_ATTR_WMAPRO_LARGE_IRAM   IBSS_ATTR
+#define ICODE_ATTR_WMAPRO_LARGE_IRAM  ICODE_ATTR
+#define ICONST_ATTR_WMAPRO_LARGE_IRAM ICONST_ATTR
+#define ICONST_ATTR_WMAPRO_WIN_VS_TMP
+#else
+/* Not enough IRAM available. */
+#define IBSS_ATTR_WMAPRO_LARGE_IRAM
+#define ICODE_ATTR_WMAPRO_LARGE_IRAM
+#define ICONST_ATTR_WMAPRO_LARGE_IRAM
+/* Models with large IRAM put tmp to IRAM rather than window coefficients as
+ * this is the fastest option. On models with smaller IRAM the 2nd-best option
+ * is to move the window coefficients to IRAM. */
+#define ICONST_ATTR_WMAPRO_WIN_VS_TMP ICONST_ATTR
+#endif
 int decode_init(asf_waveformatex_t *wfx);
 int decode_packet(asf_waveformatex_t *wfx,
                  void *data, int *data_size, void* pktdata, int size);
author	Andree Buschmann <AndreeBuschmann@t-online.de>	2010-07-26 21:43:07 +0000
committer	Andree Buschmann <AndreeBuschmann@t-online.de>	2010-07-26 21:43:07 +0000
commit	87d59ab56c30eadc4691a41ba7540cca868c9b50 (patch)
tree	ba0b3196b5dcabc7a4c5296c1ea9027f033f023f
parent	dbeb1ee07269942997a3676595a795d4aee547eb (diff)
download	rockbox-87d59ab56c30eadc4691a41ba7540cca868c9b50.tar.gz rockbox-87d59ab56c30eadc4691a41ba7540cca868c9b50.zip

diff --git a/apps/codecs/libwmapro/mdct_tables.c b/apps/codecs/libwmapro/mdct_tables.c index b87d1b4045..dd8b2a451c 100644 --- a/apps/codecs/libwmapro/mdct_tables.c +++ b/apps/codecs/libwmapro/mdct_tables.c
@@ -1,3 +1,4 @@
		1	#include "wmaprodec.h"
1	#include <inttypes.h>	2	#include <inttypes.h>
2		3
3	/* Tables for fixed-point trig tables for windowing and mdct */	4	/* Tables for fixed-point trig tables for windowing and mdct */
@@ -689,7 +690,7 @@ const int32_t sine_4096[] = {
689	0x8000078F, 0x800003DC, 0x80000164, 0x80000028	690	0x8000078F, 0x800003DC, 0x80000164, 0x80000028
690	};	691	};
691		692
692	const int32_t sine_2048[] = {	693	const int32_t sine_2048[] ICONST_ATTR_WMAPRO_WIN_VS_TMP = {
693	0xFFF36F03, 0xFFDA4D09, 0xFFC12B0F, 0xFFA80918, 0xFF8EE725, 0xFF75C536,	694	0xFFF36F03, 0xFFDA4D09, 0xFFC12B0F, 0xFFA80918, 0xFF8EE725, 0xFF75C536,
694	0xFF5CA34B, 0xFF438168, 0xFF2A5F8C, 0xFF113DB7, 0xFEF81BEE, 0xFEDEFA2D,	695	0xFF5CA34B, 0xFF438168, 0xFF2A5F8C, 0xFF113DB7, 0xFEF81BEE, 0xFEDEFA2D,
695	0xFEC5D876, 0xFEACB6CE, 0xFE939531, 0xFE7A73A2, 0xFE615224, 0xFE4830B4,	696	0xFEC5D876, 0xFEACB6CE, 0xFE939531, 0xFE7A73A2, 0xFE615224, 0xFE4830B4,
@@ -1208,7 +1209,7 @@ const int32_t sine_1024[] = {
1208	0x800078E8, 0x80003DB0, 0x80001636, 0x80000279	1209	0x800078E8, 0x80003DB0, 0x80001636, 0x80000279
1209	};	1210	};
1210		1211
1211	const int32_t sine_512[] = {	1212	const int32_t sine_512[] ICONST_ATTR_WMAPRO_WIN_VS_TMP = {
1212	0xFFCDBC0C, 0xFF693440, 0xFF04ACD1, 0xFEA025FE, 0xFE3BA003, 0xFDD71B21,	1213	0xFFCDBC0C, 0xFF693440, 0xFF04ACD1, 0xFEA025FE, 0xFE3BA003, 0xFDD71B21,
1213	0xFD72978F, 0xFD0E1595, 0xFCA9956C, 0xFC45174E, 0xFBE09B85, 0xFB7C223F,	1214	0xFD72978F, 0xFD0E1595, 0xFCA9956C, 0xFC45174E, 0xFBE09B85, 0xFB7C223F,
1214	0xFB17ABC2, 0xFAB33854, 0xFA4EC823, 0xF9EA5B74, 0xF985F28F, 0xF9218DA0,	1215	0xFB17ABC2, 0xFAB33854, 0xFA4EC823, 0xF9EA5B74, 0xF985F28F, 0xF9218DA0,
@@ -1297,7 +1298,7 @@ const int32_t sine_512[] = {
1297	0x800058D4, 0x800009DF	1298	0x800058D4, 0x800009DF
1298	};	1299	};
1299		1300
1300	const int32_t sine_256[] = {	1301	const int32_t sine_256[] ICONST_ATTR_WMAPRO_WIN_VS_TMP = {
1301	0xFF9B781E, 0xFED26950, 0xFE095D6B, 0xFD40565E, 0xFC775617, 0xFBAE5E8D,	1302	0xFF9B781E, 0xFED26950, 0xFE095D6B, 0xFD40565E, 0xFC775617, 0xFBAE5E8D,
1302	0xFAE571A2, 0xFA1C9157, 0xF953BF94, 0xF88AFE40, 0xF7C24F61, 0xF6F9B4C9,	1303	0xFAE571A2, 0xFA1C9157, 0xF953BF94, 0xF88AFE40, 0xF7C24F61, 0xF6F9B4C9,
1303	0xF6313075, 0xF568C463, 0xF4A07264, 0xF3D83C74, 0xF3102493, 0xF2482C8D,	1304	0xF6313075, 0xF568C463, 0xF4A07264, 0xF3D83C74, 0xF3102493, 0xF2482C8D,
@@ -1343,7 +1344,7 @@ const int32_t sine_256[] = {
1343	0x80078E60, 0x8003DAF2, 0x8001634E, 0x8000277B	1344	0x80078E60, 0x8003DAF2, 0x8001634E, 0x8000277B
1344	};	1345	};
1345		1346
1346	const int32_t sine_128[] = {	1347	const int32_t sine_128[] ICONST_ATTR = {
1347	0xFF36F079, 0xFDA4D929, 0xFC12D91C, 0xFA80FFCF, 0xF8EF5CBC, 0xF75DFF6C,	1348	0xFF36F079, 0xFDA4D929, 0xFC12D91C, 0xFA80FFCF, 0xF8EF5CBC, 0xF75DFF6C,
1348	0xF5CCF73F, 0xF43C53CB, 0xF2AC2474, 0xF11C7895, 0xEF8D5FC8, 0xEDFEE931,	1349	0xF5CCF73F, 0xF43C53CB, 0xF2AC2474, 0xF11C7895, 0xEF8D5FC8, 0xEDFEE931,
1349	0xEC71244A, 0xEAE4208A, 0xE957ED00, 0xE7CC9912, 0xE642341D, 0xE4B8CD16,	1350	0xEC71244A, 0xEAE4208A, 0xE957ED00, 0xE7CC9912, 0xE642341D, 0xE4B8CD16,


diff --git a/apps/codecs/libwmapro/wmapro_math.h b/apps/codecs/libwmapro/wmapro_math.h index 71cc3d33d7..30b9a987ee 100644 --- a/apps/codecs/libwmapro/wmapro_math.h +++ b/apps/codecs/libwmapro/wmapro_math.h
@@ -3,21 +3,187 @@
3		3
4	#include <inttypes.h>	4	#include <inttypes.h>
5		5
		6	/* rockbox: not used
6	#define fixtof16(x) (float)((float)(x) / (float)(1 << 16))	7	#define fixtof16(x) (float)((float)(x) / (float)(1 << 16))
7	#define fixtof31(x) (float)((float)(x) / (float)(1 << 31))	8	#define fixtof31(x) (float)((float)(x) / (float)(1 << 31))
8	#define ftofix16(x) ((int32_t)((x) * (float)(1 << 16) + ((x) < 0 ? -0.5:0.5)))	9	#define ftofix16(x) ((int32_t)((x) * (float)(1 << 16) + ((x) < 0 ? -0.5:0.5)))
9	#define ftofix31(x) ((int32_t)((x) * (float)(1 << 31) + ((x) < 0 ? -0.5:0.5)))	10	#define ftofix31(x) ((int32_t)((x) * (float)(1 << 31) + ((x) < 0 ? -0.5:0.5)))
		11	*/
10		12
11	static inline int32_t fixmulshift(int32_t x, int32_t y, int shamt)	13	#if defined(CPU_ARM)
12	{	14	/* Calculates: result = (XY)>>Z /
13	int64_t temp;	15	#define fixmulshift(X,Y,Z) \
14	temp = x;	16	({ \
15	temp *= y;	17	int32_t lo; \
		18	int32_t hi; \
		19	asm volatile ( \
		20	"smull %[lo], %[hi], %[x], %[y] \n\t" /* multiply */ \
		21	"mov %[lo], %[lo], lsr %[shr] \n\t" /* lo >>= Z */ \
		22	"orr %[lo], %[lo], %[hi], lsl %[shl]" /* lo \|= (hi << (32-Z)) */ \
		23	: [lo]"=&r"(lo), [hi]"=&r"(hi) \
		24	: [x]"r"(X), [y]"r"(Y), [shr]"r"(Z), [shl]"r"(32-Z)); \
		25	lo; \
		26	})
		27
		28	/* Calculates: result = (XY)>>16 /
		29	#define fixmul16(X,Y) \
		30	({ \
		31	int32_t lo; \
		32	int32_t hi; \
		33	asm volatile ( \
		34	"smull %[lo], %[hi], %[x], %[y] \n\t" /* multiply */ \
		35	"mov %[lo], %[lo], lsr #16 \n\t" /* lo >>= 16 */ \
		36	"orr %[lo], %[lo], %[hi], lsl #16" /* lo \|= (hi << 16) */ \
		37	: [lo]"=&r"(lo), [hi]"=&r"(hi) \
		38	: [x]"r"(X), [y]"r"(Y)); \
		39	lo; \
		40	})
		41
		42	/* Calculates: result = (XY)>>24 /
		43	#define fixmul24(X,Y) \
		44	({ \
		45	int32_t lo; \
		46	int32_t hi; \
		47	asm volatile ( \
		48	"smull %[lo], %[hi], %[x], %[y] \n\t" /* multiply */ \
		49	"mov %[lo], %[lo], lsr #24 \n\t" /* lo >>= 24 */ \
		50	"orr %[lo], %[lo], %[hi], lsl #8" /* lo \|= (hi << 8) */ \
		51	: [lo]"=&r"(lo), [hi]"=&r"(hi) \
		52	: [x]"r"(X), [y]"r"(Y)); \
		53	lo; \
		54	})
		55
		56	/* Calculates: result = (XY)>>31 /
		57	#define fixmul31(X,Y) \
		58	({ \
		59	int32_t lo; \
		60	int32_t hi; \
		61	asm volatile ( \
		62	"smull %[lo], %[hi], %[x], %[y] \n\t" /* multiply */ \
		63	"mov %[lo], %[lo], lsr #31 \n\t" /* lo >>= 31 */ \
		64	"orr %[lo], %[lo], %[hi], lsl #1" /* lo \|= (hi << 1) */ \
		65	: [lo]"=&r"(lo), [hi]"=&r"(hi) \
		66	: [x]"r"(X), [y]"r"(Y)); \
		67	lo; \
		68	})
		69	#elif defined(CPU_COLDFIRE)
		70	/* Calculates: result = (XY)>>Z /
		71	#define fixmulshift(X,Y,Z) \
		72	({ \
		73	int32_t t1; \
		74	int32_t t2; \
		75	asm volatile ( \
		76	"mac.l %[x],%[y],%%acc0\n\t" /* multiply */ \
		77	"mulu.l %[y],%[x] \n\t" /* get lower half, avoid emac stall */ \
		78	"movclr.l %%acc0,%[t1] \n\t" /* get higher half */ \
		79	"moveq.l #31,%[t2] \n\t" \
		80	"sub.l %[sh],%[t2] \n\t" /* t2 = 31 - shift */ \
		81	"ble.s 1f \n\t" \
		82	"asl.l %[t2],%[t1] \n\t" /* hi <<= 31 - shift */ \
		83	"lsr.l %[sh],%[x] \n\t" /* (unsigned)lo >>= shift */ \
		84	"or.l %[x],%[t1] \n\t" /* combine result */ \
		85	"bra.s 2f \n\t" \
		86	"1: \n\t" \
		87	"neg.l %[t2] \n\t" /* t2 = shift - 31 */ \
		88	"asr.l %[t2],%[t1] \n\t" /* hi >>= t2 */ \
		89	"2: \n" \
		90	: [t1]"=&d"(t1), [t2]"=&d"(t2) \
		91	: [x] "d"((X)), [y] "d"((Y)), [sh]"d"((Z))); \
		92	t1; \
		93	})
16		94
17	temp >>= shamt;	95	/* Calculates: result = (XY)>>16 /
		96	#define fixmul16(X,Y) \
		97	({ \
		98	int32_t t1, t2; \
		99	asm volatile ( \
		100	"mac.l %[x],%[y],%%acc0\n\t" /* multiply */ \
		101	"mulu.l %[y],%[x] \n\t" /* get lower half, avoid emac stall */ \
		102	"movclr.l %%acc0,%[t1] \n\t" /* get higher half */ \
		103	"moveq.l #15,%[t2] \n\t" \
		104	"asl.l %[t2],%[t1] \n\t" /* hi <<= 15, plus one free */ \
		105	"moveq.l #16,%[t2] \n\t" \
		106	"lsr.l %[t2],%[x] \n\t" /* (unsigned)lo >>= 16 */ \
		107	"or.l %[x],%[t1] \n\t" /* combine result */ \
		108	: [t1]"=&d"(t1), [t2]"=&d"(t2) \
		109	: [x] "d" ((X)), [y] "d" ((Y))); \
		110	t1; \
		111	})
		112
		113	/* Calculates: result = (XY)>>24 /
		114	#define fixmul24(X,Y) \
		115	({ \
		116	int32_t t1, t2; \
		117	asm volatile ( \
		118	"mac.l %[x],%[y],%%acc0\n\t" /* multiply */ \
		119	"mulu.l %[y],%[x] \n\t" /* get lower half, avoid emac stall */ \
		120	"movclr.l %%acc0,%[t1] \n\t" /* get higher half */ \
		121	"moveq.l #7,%[t2] \n\t" \
		122	"asl.l %[t2],%[t1] \n\t" /* hi <<= 7, plus one free */ \
		123	"moveq.l #24,%[t2] \n\t" \
		124	"lsr.l %[t2],%[x] \n\t" /* (unsigned)lo >>= 24 */ \
		125	"or.l %[x],%[t1] \n\t" /* combine result */ \
		126	: [t1]"=&d"(t1), [t2]"=&d"(t2) \
		127	: [x] "d" ((X)), [y] "d" ((Y))); \
		128	t1; \
		129	})
18		130
19	return (int32_t)temp;	131	/* Calculates: result = (XY)>>32 /
20	}	132	#define fixmul31(X,Y) \
		133	({ \
		134	int32_t t; \
		135	asm volatile ( \
		136	"mac.l %[x], %[y], %%acc0\n\t" /* multiply */ \
		137	"movclr.l %%acc0, %[t]\n\t" /* get higher half as result */ \
		138	: [t] "=d" (t) \
		139	: [x] "r" ((X)), [y] "r" ((Y))); \
		140	t; \
		141	})
		142	#else
		143	static inline int32_t fixmulshift(int32_t x, int32_t y, int shamt)
		144	{
		145	int64_t temp;
		146	temp = x;
		147	temp *= y;
		148
		149	temp >>= shamt;
		150
		151	return (int32_t)temp;
		152	}
		153
		154	static inline int32_t fixmul31(int32_t x, int32_t y)
		155	{
		156	int64_t temp;
		157	temp = x;
		158	temp *= y;
		159
		160	temp >>= 31;
		161
		162	return (int32_t)temp;
		163	}
		164
		165	static inline int32_t fixmul24(int32_t x, int32_t y)
		166	{
		167	int64_t temp;
		168	temp = x;
		169	temp *= y;
		170
		171	temp >>= 24;
		172
		173	return (int32_t)temp;
		174	}
		175
		176	static inline int32_t fixmul16(int32_t x, int32_t y)
		177	{
		178	int64_t temp;
		179	temp = x;
		180	temp *= y;
		181
		182	temp >>= 16;
		183
		184	return (int32_t)temp;
		185	}
		186	#endif /* CPU_COLDFIRE, CPU_ARM */
21		187
22	#ifdef CPU_COLDFIRE	188	#ifdef CPU_COLDFIRE
23	static inline void vector_fixmul_window(int32_t dst, const int32_t src0,	189	static inline void vector_fixmul_window(int32_t dst, const int32_t src0,
@@ -62,18 +228,18 @@ static inline void vector_fixmul_window(int32_t dst, const int32_t src0,
62	int32_t s1 = src1[j];	228	int32_t s1 = src1[j];
63	int32_t wi = -win[i];	229	int32_t wi = -win[i];
64	int32_t wj = -win[j];	230	int32_t wj = -win[j];
65	dst[i] = fixmulshift(s0,wj,31) - fixmulshift(s1,wi,31);	231	dst[i] = fixmul31(s0, wj) - fixmul31(s1, wi);
66	dst[j] = fixmulshift(s0,wi,31) + fixmulshift(s1,wj,31);	232	dst[j] = fixmul31(s0, wi) + fixmul31(s1, wj);
67	}	233	}
68	}	234	}
69	#endif	235	#endif
70		236
71	static inline void vector_fixmul_scalar(int32_t dst, const int32_t src, int32_t mul,	237	static inline void vector_fixmul_scalar(int32_t dst, const int32_t src,
72	int len, int shift)	238	int32_t mul, int len)
73	{	239	{
74	int i;	240	int i;
75	for(i=0; i<len; i++)	241	for(i=0; i<len; i++)
76	dst[i] = fixmulshift(src[i],mul,shift);	242	dst[i] = fixmul24(src[i], mul);
77	}	243	}
78		244
79	static inline int av_clip(int a, int amin, int amax)	245	static inline int av_clip(int a, int amin, int amax)


diff --git a/apps/codecs/libwmapro/wmaprodec.c b/apps/codecs/libwmapro/wmaprodec.c index b7879a2644..1f65157f64 100644 --- a/apps/codecs/libwmapro/wmaprodec.c +++ b/apps/codecs/libwmapro/wmaprodec.c
@@ -133,6 +133,7 @@
133	#define WMAPRO_BLOCK_MAX_BITS 12 ///< log2 of max block size	133	#define WMAPRO_BLOCK_MAX_BITS 12 ///< log2 of max block size
134	#define WMAPRO_BLOCK_MAX_SIZE (1 << WMAPRO_BLOCK_MAX_BITS) ///< maximum block size	134	#define WMAPRO_BLOCK_MAX_SIZE (1 << WMAPRO_BLOCK_MAX_BITS) ///< maximum block size
135	#define WMAPRO_BLOCK_SIZES (WMAPRO_BLOCK_MAX_BITS - BLOCK_MIN_BITS + 1) ///< possible block sizes	135	#define WMAPRO_BLOCK_SIZES (WMAPRO_BLOCK_MAX_BITS - BLOCK_MIN_BITS + 1) ///< possible block sizes
		136	#define WMAPRO_OUT_BUF_SIZE (WMAPRO_BLOCK_MAX_SIZE + WMAPRO_BLOCK_MAX_SIZE / 2)
136		137
137		138
138	#define VLCBITS 9	139	#define VLCBITS 9
@@ -151,6 +152,12 @@ static VLC vec1_vlc; ///< 1 coefficient per symbol
151	static VLC coef_vlc[2]; ///< coefficient run length vlc codes	152	static VLC coef_vlc[2]; ///< coefficient run length vlc codes
152	//static float sin64[33]; ///< sinus table for decorrelation	153	//static float sin64[33]; ///< sinus table for decorrelation
153		154
		155	/* Global defined arrays to allow IRAM usage for some models. */
		156	static int32_t g_tmp[WMAPRO_BLOCK_MAX_SIZE] IBSS_ATTR_WMAPRO_LARGE_IRAM;
		157	static int32_t g_out_ch0[WMAPRO_OUT_BUF_SIZE] IBSS_ATTR;
		158	static int32_t g_out_ch1[WMAPRO_OUT_BUF_SIZE] IBSS_ATTR_WMAPRO_LARGE_IRAM;
		159	static int32_t g_out_multichannel[WMAPRO_MAX_CHANNELS-2][WMAPRO_OUT_BUF_SIZE];
		160
154	/**	161	/**
155	* @brief frame specific decoder context for a single channel	162	* @brief frame specific decoder context for a single channel
156	*/	163	*/
@@ -171,8 +178,8 @@ typedef struct {
171	int8_t scale_factor_idx; ///< index for the transmitted scale factor values (used for resampling)	178	int8_t scale_factor_idx; ///< index for the transmitted scale factor values (used for resampling)
172	int* scale_factors; ///< pointer to the scale factor values used for decoding	179	int* scale_factors; ///< pointer to the scale factor values used for decoding
173	uint8_t table_idx; ///< index in sf_offsets for the scale factor reference block	180	uint8_t table_idx; ///< index in sf_offsets for the scale factor reference block
174	int32_t* coeffs; ///< pointer to the subframe decode buffer	181	int32_t* coeffs; ///< pointer to the subframe decode buffer
175	DECLARE_ALIGNED(16, int32_t, out)[WMAPRO_BLOCK_MAX_SIZE + WMAPRO_BLOCK_MAX_SIZE / 2]; ///< output buffer	182	int32_t* out; ///< output buffer
176	} WMAProChannelCtx;	183	} WMAProChannelCtx;
177		184
178	/**	185	/**
@@ -195,7 +202,7 @@ typedef struct WMAProDecodeCtx {
195	uint8_t frame_data[MAX_FRAMESIZE +	202	uint8_t frame_data[MAX_FRAMESIZE +
196	FF_INPUT_BUFFER_PADDING_SIZE];///< compressed frame data	203	FF_INPUT_BUFFER_PADDING_SIZE];///< compressed frame data
197	PutBitContext pb; ///< context for filling the frame_data buffer	204	PutBitContext pb; ///< context for filling the frame_data buffer
198	DECLARE_ALIGNED(16, int32_t, tmp)[WMAPRO_BLOCK_MAX_SIZE]; ///< IMDCT input buffer	205	int32_t* tmp; ///< IMDCT input buffer
199		206
200	/* frame size dependent frame information (set during initialization) */	207	/* frame size dependent frame information (set during initialization) */
201	uint32_t decode_flags; ///< used compression features	208	uint32_t decode_flags; ///< used compression features
@@ -229,8 +236,8 @@ typedef struct WMAProDecodeCtx {
229	uint32_t frame_num; ///< current frame number	236	uint32_t frame_num; ///< current frame number
230	GetBitContext gb; ///< bitstream reader context	237	GetBitContext gb; ///< bitstream reader context
231	int buf_bit_size; ///< buffer size in bits	238	int buf_bit_size; ///< buffer size in bits
232	int32_t* samples;	239	int32_t* samples;
233	int32_t* samples_end; ///< maximum samplebuffer pointer	240	int32_t* samples_end; ///< maximum samplebuffer pointer
234	uint8_t drc_gain; ///< gain for the DRC tool	241	uint8_t drc_gain; ///< gain for the DRC tool
235	int8_t skip_frame; ///< skip output step	242	int8_t skip_frame; ///< skip output step
236	int8_t parsed_all_subframes; ///< all subframes decoded?	243	int8_t parsed_all_subframes; ///< all subframes decoded?
@@ -287,6 +294,15 @@ int decode_init(asf_waveformatex_t *wfx)
287	int i;	294	int i;
288	int log2_max_num_subframes;	295	int log2_max_num_subframes;
289	int num_possible_block_sizes;	296	int num_possible_block_sizes;
		297
		298	/* Use globally defined array. Allows IRAM usage for models with large IRAM. */
		299	s->tmp = g_tmp;
		300
		301	/* Use globally defined arrays. Allows IRAM usage for up to 2 channels. */
		302	s->channel[0].out = g_out_ch0;
		303	s->channel[1].out = g_out_ch1;
		304	for (i=2; i<WMAPRO_MAX_CHANNELS; ++i)
		305	s->channel[i].out = g_out_multichannel[i-2];
290		306
291	#if defined(CPU_COLDFIRE)	307	#if defined(CPU_COLDFIRE)
292	coldfire_set_macsr(EMAC_FRACTIONAL \| EMAC_SATURATE);	308	coldfire_set_macsr(EMAC_FRACTIONAL \| EMAC_SATURATE);
@@ -657,9 +673,9 @@ static void decode_decorrelation_matrix(WMAProDecodeCtx *s,
657	chgroup->decorrelation_matrix[y + i * chgroup->num_channels] =	673	chgroup->decorrelation_matrix[y + i * chgroup->num_channels] =
658	(v1 * cosv) + (v2 * sinv);	674	(v1 * cosv) + (v2 * sinv);
659	chgroup->fixdecorrelation_matrix[y + x * chgroup->num_channels] =	675	chgroup->fixdecorrelation_matrix[y + x * chgroup->num_channels] =
660	fixmulshift(f1, fixsinv, 31) - fixmulshift(f2, fixcosv, 31);	676	fixmul31(f1, fixsinv) - fixmul31(f2, fixcosv);
661	chgroup->fixdecorrelation_matrix[y + i * chgroup->num_channels] =	677	chgroup->fixdecorrelation_matrix[y + i * chgroup->num_channels] =
662	fixmulshift(f1, fixcosv, 31) + fixmulshift(f2, fixsinv, 31);	678	fixmul31(f1, fixcosv) + fixmul31(f2, fixsinv);
663		679
664	}	680	}
665	}	681	}
@@ -1009,20 +1025,21 @@ static void inverse_channel_transform(WMAProDecodeCtx *s)
1009	data_ptr = data;	1025	data_ptr = data;
1010		1026
1011	while (data_ptr < data_end)	1027	while (data_ptr < data_end)
1012	sum += fixmulshift(data_ptr++, mat++, 16);	1028	sum += fixmul16(data_ptr++, mat++);
1013		1029
1014	(*ch)[y] = sum;	1030	(*ch)[y] = sum;
1015	}	1031	}
1016	}	1032	}
1017	} else if (s->num_channels == 2) {	1033	} else if (s->num_channels == 2) {
1018		1034
		1035	/* Scale with sqrt(2). 0x016A09E6 = (sqrt(2)(1<<24)) /
1019	int len = FFMIN(sfb[1], s->subframe_len) - sfb[0];	1036	int len = FFMIN(sfb[1], s->subframe_len) - sfb[0];
1020	vector_fixmul_scalar(ch_data[0] + sfb[0],	1037	vector_fixmul_scalar(ch_data[0] + sfb[0],
1021	ch_data[0] + sfb[0],	1038	ch_data[0] + sfb[0],
1022	0x00016A00, len,16);	1039	0x016A09E6, len);
1023	vector_fixmul_scalar(ch_data[1] + sfb[0],	1040	vector_fixmul_scalar(ch_data[1] + sfb[0],
1024	ch_data[1] + sfb[0],	1041	ch_data[1] + sfb[0],
1025	0x00016A00, len,16);	1042	0x016A09E6, len);
1026		1043
1027	}	1044	}
1028	}	1045	}
@@ -1049,7 +1066,7 @@ static void wmapro_window(WMAProDecodeCtx *s)
1049	winlen = s->subframe_len;	1066	winlen = s->subframe_len;
1050	}	1067	}
1051		1068
1052	window = sine_windows[av_log2(winlen) - BLOCK_MIN_BITS];	1069	window = sine_windows[av_log2(winlen) - BLOCK_MIN_BITS];
1053		1070
1054	winlen >>= 1;	1071	winlen >>= 1;
1055		1072
@@ -1261,7 +1278,7 @@ static int decode_subframe(WMAProDecodeCtx *s)
1261		1278
1262	vector_fixmul_scalar(s->tmp+start,	1279	vector_fixmul_scalar(s->tmp+start,
1263	s->channel[c].coeffs + start,	1280	s->channel[c].coeffs + start,
1264	quant, end-start, 24);	1281	quant, end-start);
1265		1282
1266		1283
1267	}	1284	}


diff --git a/apps/codecs/libwmapro/wmaprodec.h b/apps/codecs/libwmapro/wmaprodec.h index 40f3a60db6..3203dda583 100644 --- a/apps/codecs/libwmapro/wmaprodec.h +++ b/apps/codecs/libwmapro/wmaprodec.h
@@ -1,5 +1,31 @@
		1	#include "codeclib.h"
1	#include "../libasf/asf.h"	2	#include "../libasf/asf.h"
2		3
		4	#if (CONFIG_CPU == MCF5250) \|\| defined(CPU_S5L870X)
		5	/* Enough IRAM but performance suffers with ICODE_ATTR. */
		6	#define IBSS_ATTR_WMAPRO_LARGE_IRAM IBSS_ATTR
		7	#define ICODE_ATTR_WMAPRO_LARGE_IRAM
		8	#define ICONST_ATTR_WMAPRO_LARGE_IRAM ICONST_ATTR
		9	#define ICONST_ATTR_WMAPRO_WIN_VS_TMP
		10
		11	#elif (CONFIG_CPU == PP5022) \|\| (CONFIG_CPU == PP5024)
		12	/* Enough IRAM to move additional data and code to it. */
		13	#define IBSS_ATTR_WMAPRO_LARGE_IRAM IBSS_ATTR
		14	#define ICODE_ATTR_WMAPRO_LARGE_IRAM ICODE_ATTR
		15	#define ICONST_ATTR_WMAPRO_LARGE_IRAM ICONST_ATTR
		16	#define ICONST_ATTR_WMAPRO_WIN_VS_TMP
		17
		18	#else
		19	/* Not enough IRAM available. */
		20	#define IBSS_ATTR_WMAPRO_LARGE_IRAM
		21	#define ICODE_ATTR_WMAPRO_LARGE_IRAM
		22	#define ICONST_ATTR_WMAPRO_LARGE_IRAM
		23	/* Models with large IRAM put tmp to IRAM rather than window coefficients as
		24	* this is the fastest option. On models with smaller IRAM the 2nd-best option
		25	* is to move the window coefficients to IRAM. */
		26	#define ICONST_ATTR_WMAPRO_WIN_VS_TMP ICONST_ATTR
		27	#endif
		28
3	int decode_init(asf_waveformatex_t *wfx);	29	int decode_init(asf_waveformatex_t *wfx);
4	int decode_packet(asf_waveformatex_t *wfx,	30	int decode_packet(asf_waveformatex_t *wfx,
5	void data, int data_size, void* pktdata, int size);	31	void data, int data_size, void* pktdata, int size);