From f5a0d61a1b24ea4397234e0c65bc0cbb71e4071f Mon Sep 17 00:00:00 2001 From: Andree Buschmann Date: Sun, 14 Feb 2010 14:38:18 +0000 Subject: Further optimization and minor clean up of atrac codec: Unroll iqmf_dewindowing for non-ARM speeds up decoder by +10% on H300. Removed some non-used arrays. Codec is still not fully realtime on Coldfire targets. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@24648 a1c6a512-1295-4272-9138-f99709370657 --- apps/codecs/libatrac/atrac3.c | 80 ++++++++++++++++++++++++++++++++++----- apps/codecs/libatrac/atrac3data.h | 11 +++--- apps/codecs/libatrac/main.c | 3 +- 3 files changed, 78 insertions(+), 16 deletions(-) diff --git a/apps/codecs/libatrac/atrac3.c b/apps/codecs/libatrac/atrac3.c index ac63925ce7..467f42f161 100644 --- a/apps/codecs/libatrac/atrac3.c +++ b/apps/codecs/libatrac/atrac3.c @@ -98,6 +98,20 @@ static channel_unit channel_units[2] IBSS_ATTR_LARGE_IRAM; * @param in input buffer * @param win windowing coefficients * @param nIn size of spectrum buffer + * Reference implementation: + * + * for (j = nIn; j != 0; j--) { + * s1 = fixmul32(in[0], win[0]); + * s2 = fixmul32(in[1], win[1]); + * for (i = 2; i < 48; i += 2) { + * s1 += fixmul31(in[i ], win[i ]); + * s2 += fixmul31(in[i+1], win[i+1]); + * } + * out[0] = s2; + * out[1] = s1; + * in += 2; + * out += 2; + * } */ #if defined(CPU_ARM) @@ -116,16 +130,62 @@ static channel_unit channel_units[2] IBSS_ATTR_LARGE_IRAM; int32_t i, j, s1, s2; for (j = nIn; j != 0; j--) { - /* i=0 */ - s1 = fixmul31(win[0], in[0]); - s2 = fixmul31(win[1], in[1]); - - /* i=2..46 */ - for (i = 2; i < 48; i += 2) { - s1 += fixmul31(win[i ], in[i ]); - s2 += fixmul31(win[i+1], in[i+1]); - } - + i = 0; + /* 0.. 7 */ + s1 = fixmul31(win[i], in[i]); i++; + s2 = fixmul31(win[i], in[i]); i++; + s1 += fixmul31(win[i], in[i]); i++; + s2 += fixmul31(win[i], in[i]); i++; + s1 += fixmul31(win[i], in[i]); i++; + s2 += fixmul31(win[i], in[i]); i++; + s1 += fixmul31(win[i], in[i]); i++; + s2 += fixmul31(win[i], in[i]); i++; + /* 8..15 */ + s1 += fixmul31(win[i], in[i]); i++; + s2 += fixmul31(win[i], in[i]); i++; + s1 += fixmul31(win[i], in[i]); i++; + s2 += fixmul31(win[i], in[i]); i++; + s1 += fixmul31(win[i], in[i]); i++; + s2 += fixmul31(win[i], in[i]); i++; + s1 += fixmul31(win[i], in[i]); i++; + s2 += fixmul31(win[i], in[i]); i++; + /* 16..23 */ + s1 += fixmul31(win[i], in[i]); i++; + s2 += fixmul31(win[i], in[i]); i++; + s1 += fixmul31(win[i], in[i]); i++; + s2 += fixmul31(win[i], in[i]); i++; + s1 += fixmul31(win[i], in[i]); i++; + s2 += fixmul31(win[i], in[i]); i++; + s1 += fixmul31(win[i], in[i]); i++; + s2 += fixmul31(win[i], in[i]); i++; + /* 24..31 */ + s1 += fixmul31(win[i], in[i]); i++; + s2 += fixmul31(win[i], in[i]); i++; + s1 += fixmul31(win[i], in[i]); i++; + s2 += fixmul31(win[i], in[i]); i++; + s1 += fixmul31(win[i], in[i]); i++; + s2 += fixmul31(win[i], in[i]); i++; + s1 += fixmul31(win[i], in[i]); i++; + s2 += fixmul31(win[i], in[i]); i++; + /* 32..39 */ + s1 += fixmul31(win[i], in[i]); i++; + s2 += fixmul31(win[i], in[i]); i++; + s1 += fixmul31(win[i], in[i]); i++; + s2 += fixmul31(win[i], in[i]); i++; + s1 += fixmul31(win[i], in[i]); i++; + s2 += fixmul31(win[i], in[i]); i++; + s1 += fixmul31(win[i], in[i]); i++; + s2 += fixmul31(win[i], in[i]); i++; + /* 40..47 */ + s1 += fixmul31(win[i], in[i]); i++; + s2 += fixmul31(win[i], in[i]); i++; + s1 += fixmul31(win[i], in[i]); i++; + s2 += fixmul31(win[i], in[i]); i++; + s1 += fixmul31(win[i], in[i]); i++; + s2 += fixmul31(win[i], in[i]); i++; + s1 += fixmul31(win[i], in[i]); i++; + s2 += fixmul31(win[i], in[i]); + out[0] = s2; out[1] = s1; diff --git a/apps/codecs/libatrac/atrac3data.h b/apps/codecs/libatrac/atrac3data.h index 8e7fe97c32..0da9d9e758 100644 --- a/apps/codecs/libatrac/atrac3data.h +++ b/apps/codecs/libatrac/atrac3data.h @@ -117,18 +117,18 @@ static const int8_t decTable1[18] = {0,0, 0,1, 0,-1, 1,0, -1,0, 1,1, 1,-1, -1,1, /* tables for the scalefactor decoding */ - +/* not needed anymore static const float iMaxQuant[8] = { 0.0, 1.0/1.5, 1.0/2.5, 1.0/3.5, 1.0/4.5, 1.0/7.5, 1.0/15.5, 1.0/31.5 }; - +*/ static const uint16_t subbandTab[33] = { 0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256, 288, 320, 352, 384, 416, 448, 480, 512, 576, 640, 704, 768, 896, 1024 }; /* transform data */ - +/* not needed anymore static const float qmf_48tap_half[24] = { -0.00001461907, -0.00009205479, -0.000056157569, 0.00030117269, 0.0002422519,-0.00085293897, -0.0005205574, 0.0020340169, @@ -137,8 +137,9 @@ static const float qmf_48tap_half[24] = { -0.007801671, -0.034090221, 0.01880949, 0.054326009, -0.043596379, -0.099384367, 0.13207909, 0.46424159 }; - +*/ /* joint stereo related tables */ +/* not needed anymore static const float matrixCoeffs[8] = {0.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0}; - +*/ #endif /* AVCODEC_ATRAC3DATA_H */ diff --git a/apps/codecs/libatrac/main.c b/apps/codecs/libatrac/main.c index 826dacf819..e0a3f8507d 100644 --- a/apps/codecs/libatrac/main.c +++ b/apps/codecs/libatrac/main.c @@ -9,6 +9,8 @@ #include "atrac3.h" #include "../librm/rm.h" +ATRAC3Context q IBSS_ATTR; + static unsigned char wav_header[44]={ 'R','I','F','F',// 0 - ChunkID 0,0,0,0, // 4 - ChunkSize (filesize-8) @@ -101,7 +103,6 @@ int main(int argc, char *argv[]) int16_t outbuf[2048]; uint16_t fs,sps,h; uint32_t packet_count; - ATRAC3Context q; RMContext rmctx; RMPacket pkt; -- cgit v1.2.3