From 3d6faa08bf95da6e0f65a070a11f014e78c0b682 Mon Sep 17 00:00:00 2001
From: Andree Buschmann <AndreeBuschmann@t-online.de>
Date: Sun, 21 Feb 2010 19:47:05 +0000
Subject: Optimization for cook codec. Rework sample output to be able to use
 highly optimized dsp routines. Moved some functions to iram. Speeds up codec
 by 1.3 MHz on PP5022.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@24815 a1c6a512-1295-4272-9138-f99709370657
---
 apps/codecs/cook.c                  | 14 +++++----
 apps/codecs/libcook/cook.c          | 12 ++++----
 apps/codecs/libcook/cook.h          |  2 +-
 apps/codecs/libcook/cook_fixpoint.h | 60 ++++++++++---------------------------
 apps/codecs/libcook/main.c          |  2 +-
 5 files changed, 34 insertions(+), 56 deletions(-)

diff --git a/apps/codecs/cook.c b/apps/codecs/cook.c
index 4214d30f38..30255d4fd7 100644
--- a/apps/codecs/cook.c
+++ b/apps/codecs/cook.c
@@ -31,6 +31,7 @@ CODEC_HEADER
 RMContext rmctx;
 RMPacket pkt;
 COOKContext q IBSS_ATTR;
+int32_t rm_outbuf[2048];
 
 static void init_rm(RMContext *rmctx)
 {
@@ -43,7 +44,6 @@ enum codec_status codec_main(void)
     static size_t buff_size;
     int datasize, res, consumed, i, time_offset;
     uint8_t *bit_buffer;
-    int16_t outbuf[2048] __attribute__((aligned(32)));
     uint16_t fs,sps,h;
     uint32_t packet_count;
     int scrambling_unit_size, num_units;
@@ -65,9 +65,11 @@ next_track:
     init_rm(&rmctx);
  
     ci->configure(DSP_SET_FREQUENCY, ci->id3->frequency);
-    ci->configure(DSP_SET_SAMPLE_DEPTH, 16);
+    /* cook's sample representation is 21.11
+     * DSP_SET_SAMPLE_DEPTH = 11 (FRACT) + 16 (NATIVE) - 1 (SIGN) = 26 */
+    ci->configure(DSP_SET_SAMPLE_DEPTH, 26);
     ci->configure(DSP_SET_STEREO_MODE, rmctx.nb_channels == 1 ?
-                  STEREO_MONO : STEREO_INTERLEAVED);
+                  STEREO_MONO : STEREO_NONINTERLEAVED);
 
     packet_count = rmctx.nb_packets;
     rmctx.audio_framesize = rmctx.block_align;
@@ -155,7 +157,7 @@ seek_start :
                 ci->set_elapsed(rmctx.audiotimestamp+(1000*8*sps/rmctx.bit_rate)*i);
                 ci->seek_complete(); 
             }    
-            res = cook_decode_frame(&rmctx,&q, outbuf, &datasize, pkt.frames[i], rmctx.block_align);
+            res = cook_decode_frame(&rmctx,&q, rm_outbuf, &datasize, pkt.frames[i], rmctx.block_align);
             rmctx.frame_number++;
 
             /* skip the first two frames; no valid audio */
@@ -166,7 +168,9 @@ seek_start :
                 return CODEC_ERROR;
             }
 
-            ci->pcmbuf_insert(outbuf, NULL, q.samples_per_frame / rmctx.nb_channels);
+            ci->pcmbuf_insert(rm_outbuf, 
+                              rm_outbuf+q.samples_per_channel,
+                              q.samples_per_channel);
             ci->set_elapsed(rmctx.audiotimestamp+(1000*8*sps/rmctx.bit_rate)*i);  
         }
         packet_count -= rmctx.audio_pkt_cnt;
diff --git a/apps/codecs/libcook/cook.c b/apps/codecs/libcook/cook.c
index 8d9611c4d9..814250ea32 100644
--- a/apps/codecs/libcook/cook.c
+++ b/apps/codecs/libcook/cook.c
@@ -598,7 +598,7 @@ decode_bytes_and_gain(COOKContext *q, const uint8_t *inbuffer,
 static void
 mlt_compensate_output(COOKContext *q, REAL_T *decode_buffer,
                       cook_gains *gains, REAL_T *previous_buffer,
-                      int16_t *out, int chan)
+                      int32_t *out, int chan)
 {
     REAL_T *buffer = q->mono_mdct_output;
     int i;
@@ -618,7 +618,9 @@ mlt_compensate_output(COOKContext *q, REAL_T *decode_buffer,
     memcpy(previous_buffer, buffer+q->samples_per_channel,
            sizeof(REAL_T)*q->samples_per_channel);
 
-    output_math(q, out, chan);
+    /* Copy output to non-interleaved sample buffer */
+    memcpy(out + (chan * q->samples_per_channel), buffer,
+           sizeof(REAL_T)*q->samples_per_channel);
 }
 
 
@@ -634,7 +636,7 @@ mlt_compensate_output(COOKContext *q, REAL_T *decode_buffer,
 
 
 static int decode_subpacket(COOKContext *q, const uint8_t *inbuffer,
-                            int sub_packet_size, int16_t *outbuffer) {
+                            int sub_packet_size, int32_t *outbuffer) {
     /* packet dump */
 //    for (i=0 ; i<sub_packet_size ; i++) {
 //        DEBUGF("%02x", inbuffer[i]);
@@ -666,7 +668,7 @@ static int decode_subpacket(COOKContext *q, const uint8_t *inbuffer,
                                   q->mono_previous_buffer2, outbuffer, 1);
         }
     }
-    return q->samples_per_frame * sizeof(int16_t);
+    return q->samples_per_frame * sizeof(int32_t);
 }
 
 
@@ -677,7 +679,7 @@ static int decode_subpacket(COOKContext *q, const uint8_t *inbuffer,
  */
 
 int cook_decode_frame(RMContext *rmctx,COOKContext *q,
-            int16_t *outbuffer, int *data_size,
+            int32_t *outbuffer, int *data_size,
             const uint8_t *inbuffer, int buf_size) {
     //COOKContext *q = avctx->priv_data;
     //COOKContext *q;
diff --git a/apps/codecs/libcook/cook.h b/apps/codecs/libcook/cook.h
index 0672553895..93abf9a52d 100644
--- a/apps/codecs/libcook/cook.h
+++ b/apps/codecs/libcook/cook.h
@@ -97,6 +97,6 @@ typedef struct cook {
 
 int cook_decode_init(RMContext *rmctx, COOKContext *q);
 int cook_decode_frame(RMContext *rmctx,COOKContext *q,
-                      int16_t *outbuffer, int *data_size,
+                      int32_t *outbuffer, int *data_size,
                       const uint8_t *inbuffer, int buf_size);
 #endif /*_COOK_H */
diff --git a/apps/codecs/libcook/cook_fixpoint.h b/apps/codecs/libcook/cook_fixpoint.h
index 30e5a3eee2..57c217dc43 100644
--- a/apps/codecs/libcook/cook_fixpoint.h
+++ b/apps/codecs/libcook/cook_fixpoint.h
@@ -79,28 +79,28 @@ static inline FIXP fixp_pow2_neg(FIXP x, int i)
 #else
 static inline FIXP fixp_mult_su(FIXP a, FIXPU b)
 {
-    int32_t hb = (a >> 16) * b; 	 
-    uint32_t lb = (a & 0xffff) * b; 	 
+    int32_t hb = (a >> 16) * b;      
+    uint32_t lb = (a & 0xffff) * b;      
 
-    return hb + (lb >> 16) + ((lb & 0x8000) >> 15); 	 
+    return hb + (lb >> 16) + ((lb & 0x8000) >> 15);      
 }
 #endif
 
 /* Faster version of the above using 32x32=64 bit multiply */
 #ifdef ROCKBOX
 #define fixmul31(x,y) (MULT31(x,y))
-#else 	 
-static inline int32_t fixmul31(int32_t x, int32_t y) 	 
-{ 	 
-    int64_t temp; 	 
+#else    
+static inline int32_t fixmul31(int32_t x, int32_t y)     
+{    
+    int64_t temp;    
 
-    temp = x; 	 
-    temp *= y; 	 
+    temp = x;    
+    temp *= y;   
 
-    temp >>= 31;        //16+31-16 = 31 bits 	 
+    temp >>= 31;        //16+31-16 = 31 bits     
     
-    return (int32_t)temp; 	 
-} 	 
+    return (int32_t)temp;    
+}    
 #endif
 
 /**
@@ -166,7 +166,8 @@ static void scalar_dequant_math(COOKContext *q, int index,
  */
 #include "../lib/mdct_lookup.h"
 
-static inline void imlt_math(COOKContext *q, FIXP *in)
+void imlt_math(COOKContext *q, FIXP *in) ICODE_ATTR;
+void imlt_math(COOKContext *q, FIXP *in)
 {
     const int n = q->samples_per_channel;
     const int step = 2 << (10 - av_log2(n));
@@ -203,7 +204,8 @@ static inline void imlt_math(COOKContext *q, FIXP *in)
  * @param gain              gain correction to apply first to output buffer
  * @param buffer            data to overlap
  */
-static inline void overlap_math(COOKContext *q, int gain, FIXP buffer[])
+void overlap_math(COOKContext *q, int gain, FIXP buffer[]) ICODE_ATTR;
+void overlap_math(COOKContext *q, int gain, FIXP buffer[])
 {
     int i;
 #ifdef ROCKBOX
@@ -280,33 +282,3 @@ static inline FIXP cplscale_math(FIXP x, int table, int i)
 {
   return fixp_mult_su(x, cplscales[table-2][i]);
 }
-
-
-/**
- * Final converion from floating point values to
- * signed, 16 bit sound samples. Round and clip.
- *
- * @param q                 pointer to the COOKContext
- * @param out               pointer to the output buffer
- * @param chan              0: left or single channel, 1: right channel
- */
-static inline void output_math(COOKContext *q, register int16_t *out, int chan)
-{
-#ifdef ROCKBOX
-    register REAL_T * mono_output_ptr = q->mono_mdct_output;
-    register REAL_T * mono_output_end = mono_output_ptr + q->samples_per_channel;
-    out += chan;
-    const int STEP = q->nb_channels;
-    while( mono_output_ptr < mono_output_end )
-    {
-      *out = CLIP_TO_15(fixp_pow2_neg(*mono_output_ptr++, 11));
-      out += STEP;
-    }
-#else
-    int j;
-    for (j = 0; j < q->samples_per_channel; j++) {
-        out[chan + q->nb_channels * j] =
-        av_clip(fixp_pow2(q->mono_mdct_output[j], -11), -32768, 32767);
-    }
-#endif
-}
diff --git a/apps/codecs/libcook/main.c b/apps/codecs/libcook/main.c
index 3f5d3e8528..71d02fb736 100644
--- a/apps/codecs/libcook/main.c
+++ b/apps/codecs/libcook/main.c
@@ -120,7 +120,7 @@ int main(int argc, char *argv[])
     char filename[15];
     int fd_out;
 #endif
-    int16_t outbuf[2048];
+    int32_t outbuf[2048];
     uint16_t fs,sps,h;
     uint32_t packet_count;
     COOKContext q;
-- 
cgit v1.2.3