summaryrefslogtreecommitdiff
path: root/apps/codecs
diff options
context:
space:
mode:
Diffstat (limited to 'apps/codecs')
-rw-r--r--apps/codecs/atrac3_rm.c2
-rw-r--r--apps/codecs/libatrac/SOURCES3
-rw-r--r--apps/codecs/libatrac/atrac3.c42
-rw-r--r--apps/codecs/libatrac/atrac3.h4
-rw-r--r--apps/codecs/libatrac/atrac3_armv5e.S163
5 files changed, 205 insertions, 9 deletions
diff --git a/apps/codecs/atrac3_rm.c b/apps/codecs/atrac3_rm.c
index 6c559ec868..bad9831a25 100644
--- a/apps/codecs/atrac3_rm.c
+++ b/apps/codecs/atrac3_rm.c
@@ -30,7 +30,7 @@ CODEC_HEADER
30 30
31static RMContext rmctx; 31static RMContext rmctx;
32static RMPacket pkt; 32static RMPacket pkt;
33static ATRAC3Context q IBSS_ATTR; 33static ATRAC3Context q IBSS_ATTR __attribute__ ((aligned (32)));
34 34
35static void init_rm(RMContext *rmctx) 35static void init_rm(RMContext *rmctx)
36{ 36{
diff --git a/apps/codecs/libatrac/SOURCES b/apps/codecs/libatrac/SOURCES
index 3eaf4c9c3b..85f011cb87 100644
--- a/apps/codecs/libatrac/SOURCES
+++ b/apps/codecs/libatrac/SOURCES
@@ -1,5 +1,8 @@
1atrac3.c 1atrac3.c
2#if defined(CPU_ARM) 2#if defined(CPU_ARM)
3atrac3_arm.S 3atrac3_arm.S
4#if (ARM_ARCH >= 5)
5atrac3_armv5e.S
6#endif
4#endif 7#endif
5 8
diff --git a/apps/codecs/libatrac/atrac3.c b/apps/codecs/libatrac/atrac3.c
index b93b058bb2..f6085fa2fa 100644
--- a/apps/codecs/libatrac/atrac3.c
+++ b/apps/codecs/libatrac/atrac3.c
@@ -55,7 +55,11 @@
55#define FFSWAP(type,a,b) do{type SWAP_tmp= b; b= a; a= SWAP_tmp;}while(0) 55#define FFSWAP(type,a,b) do{type SWAP_tmp= b; b= a; a= SWAP_tmp;}while(0)
56 56
57static VLC spectral_coeff_tab[7]; 57static VLC spectral_coeff_tab[7];
58#if defined(CPU_ARM) && (ARM_ARCH >= 5) /*ARMv5e+ uses 32x16 multiplication*/
59static int16_t qmf_window[48] IBSS_ATTR __attribute__ ((aligned (32)));
60#else
58static int32_t qmf_window[48] IBSS_ATTR; 61static int32_t qmf_window[48] IBSS_ATTR;
62#endif
59static int32_t atrac3_spectrum [2][1024] IBSS_ATTR __attribute__((aligned(16))); 63static int32_t atrac3_spectrum [2][1024] IBSS_ATTR __attribute__((aligned(16)));
60static int32_t atrac3_IMDCT_buf[2][ 512] IBSS_ATTR __attribute__((aligned(16))); 64static int32_t atrac3_IMDCT_buf[2][ 512] IBSS_ATTR __attribute__((aligned(16)));
61static int32_t atrac3_prevFrame[2][1024] IBSS_ATTR; 65static int32_t atrac3_prevFrame[2][1024] IBSS_ATTR;
@@ -118,12 +122,30 @@ static channel_unit channel_units[2] IBSS_ATTR_LARGE_IRAM;
118 * } 122 * }
119 */ 123 */
120 124
121#if defined(CPU_ARM) 125#if defined(CPU_ARM) && (ARM_ARCH >= 5)
122 extern void 126 extern void
123 atrac3_iqmf_dewindowing(int32_t *out, 127 atrac3_iqmf_dewindowing_armv5e(int32_t *out,
124 int32_t *in, 128 int32_t *in,
125 int32_t *win, 129 int16_t *win,
126 unsigned int nIn); 130 unsigned int nIn);
131 static inline void
132 atrac3_iqmf_dewindowing(int32_t *out,
133 int32_t *in,
134 int16_t *win,
135 unsigned int nIn)
136 {
137 //atrac3_iqmf_dewindowing_armv5e(out, in, win, nIn);
138
139 }
140
141
142#elif defined(CPU_ARM)
143 extern void
144 atrac3_iqmf_dewindowing(int32_t *out,
145 int32_t *in,
146 int16_t *win,
147 unsigned int nIn);
148
127#elif defined (CPU_COLDFIRE) 149#elif defined (CPU_COLDFIRE)
128 #define MULTIPLY_ADD_BLOCK \ 150 #define MULTIPLY_ADD_BLOCK \
129 "movem.l (%[win]), %%d0-%%d7 \n\t" \ 151 "movem.l (%[win]), %%d0-%%d7 \n\t" \
@@ -206,7 +228,9 @@ static channel_unit channel_units[2] IBSS_ATTR_LARGE_IRAM;
206 228
207 out[0] = s2; 229 out[0] = s2;
208 out[1] = s1; 230 out[1] = s1;
231
209 } 232 }
233
210 } 234 }
211#endif 235#endif
212 236
@@ -244,6 +268,7 @@ atrac3_imdct_windowing(int32_t *buffer,
244 268
245static void iqmf (int32_t *inlo, int32_t *inhi, unsigned int nIn, int32_t *pOut, int32_t *delayBuf, int32_t *temp) 269static void iqmf (int32_t *inlo, int32_t *inhi, unsigned int nIn, int32_t *pOut, int32_t *delayBuf, int32_t *temp)
246{ 270{
271
247 /* Restore the delay buffer */ 272 /* Restore the delay buffer */
248 memcpy(temp, delayBuf, 46*sizeof(int32_t)); 273 memcpy(temp, delayBuf, 46*sizeof(int32_t));
249 274
@@ -274,6 +299,7 @@ static void IMLT(int32_t *pInput, int32_t *pOutput)
274 299
275 /* Windowing. */ 300 /* Windowing. */
276 atrac3_imdct_windowing(pOutput, window_lookup); 301 atrac3_imdct_windowing(pOutput, window_lookup);
302
277} 303}
278 304
279 305
@@ -320,9 +346,13 @@ static void init_atrac3_transforms(void)
320 /* Generate the QMF window. */ 346 /* Generate the QMF window. */
321 for (i=0 ; i<24; i++) { 347 for (i=0 ; i<24; i++) {
322 s = qmf_48tap_half_fix[i] << 1; 348 s = qmf_48tap_half_fix[i] << 1;
323 qmf_window[i] = s; 349 #if defined(CPU_ARM) && (ARM_ARCH >= 5)
324 qmf_window[47 - i] = s; 350 qmf_window[i] = qmf_window[47-i] = (int16_t)((s+(1<<15))>>16);
351 #else
352 qmf_window[i] = qmf_window[47-i] = s;
353 #endif
325 } 354 }
355
326} 356}
327 357
328 358
@@ -1229,7 +1259,7 @@ int atrac3_decode_init(ATRAC3Context *q, struct mp3entry *id3)
1229 vlcs_initialized = 1; 1259 vlcs_initialized = 1;
1230 1260
1231 } 1261 }
1232 1262
1233 init_atrac3_transforms(); 1263 init_atrac3_transforms();
1234 1264
1235 /* init the joint-stereo decoding data */ 1265 /* init the joint-stereo decoding data */
diff --git a/apps/codecs/libatrac/atrac3.h b/apps/codecs/libatrac/atrac3.h
index 74dd992e1b..afe582ab72 100644
--- a/apps/codecs/libatrac/atrac3.h
+++ b/apps/codecs/libatrac/atrac3.h
@@ -67,6 +67,7 @@ typedef struct {
67} channel_unit; 67} channel_unit;
68 68
69typedef struct { 69typedef struct {
70 int32_t outSamples[2048];
70 GetBitContext gb; 71 GetBitContext gb;
71 //@{ 72 //@{
72 /** stream data */ 73 /** stream data */
@@ -90,8 +91,7 @@ typedef struct {
90 int weighting_delay[6]; 91 int weighting_delay[6];
91 //@} 92 //@}
92 //@{ 93 //@{
93 /** data buffers */ 94 /** data buffers */
94 int32_t outSamples[2048];
95 uint8_t decoded_bytes_buffer[1024]; 95 uint8_t decoded_bytes_buffer[1024];
96 int32_t tempBuf[1070]; 96 int32_t tempBuf[1070];
97 //@} 97 //@}
diff --git a/apps/codecs/libatrac/atrac3_armv5e.S b/apps/codecs/libatrac/atrac3_armv5e.S
new file mode 100644
index 0000000000..1add5faef5
--- /dev/null
+++ b/apps/codecs/libatrac/atrac3_armv5e.S
@@ -0,0 +1,163 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id:
9 *
10 * Copyright (C) 2010 by Michael Giacomelli
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version 2
15 * of the License, or (at your option) any later version.
16 *
17 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
18 * KIND, either express or implied.
19 *
20 ****************************************************************************/
21
22#include "config.h"
23
24 .section .text, "ax", %progbits
25
26
27/****************************************************************************
28 * atrac3_iqmf_dewindowing_armv5e(int32_t *out,
29 * int32_t *in,
30 * int32_t *win,
31 * unsigned int nIn);
32 *
33 * Dewindowing step within iqmf of atrac3 synthesis using 16 bit filter
34 * coefficients and armv5e packed multiply instructions. Uses 2.5 cycles
35 * per filter coefficient (ideal). Benchmarked 3.54 per coefficient (Clip+).
36 *
37 * Reference implementation:
38 *
39 * for (j = nIn; j != 0; j--) {
40 * s1 = fixmul32(in[0], win[0]);
41 * s2 = fixmul32(in[1], win[1]);
42 * for (i = 2; i < 48; i += 2) {
43 * s1 += fixmul32(in[i ], win[i ]);
44 * s2 += fixmul32(in[i+1], win[i+1]);
45 * }
46 * out[0] = s2 << 1;
47 * out[1] = s1 << 1;
48 * in += 2;
49 * out += 2;
50 * }
51 * Note: r12 is a scratch register and can be used without restorage.
52 ****************************************************************************/
53 .align 2
54 .global atrac3_iqmf_dewindowing_armv5e
55 .type atrac3_iqmf_dewindowing_armv5e, %function
56
57atrac3_iqmf_dewindowing_armv5e:
58 /* r0 = dest */
59 /* r1 = input samples */
60 /* r2 = window coefficients */
61 /* r3 = counter */
62 stmfd sp!, {r4-r11, lr} /* save non-scratch registers */
63
64.iqmf_dewindow_outer_loop: /* outer loop 0...counter-1 */
65 /* 0.. 7 */
66 ldmia r2!, {r4, r5, r8, r9} /* load win[0..7] */
67 ldmia r1!, {r6, r7, r10, r11} /* load in[0..3] to avoid stall on arm11 */
68 smulwb lr, r6, r4 /* s1 = in[0] * win[0] */
69 smulwt r12, r7, r4 /* s2 = in[1] * win[1] */
70 smlawb lr, r10, r5, lr /* s1 += in[i ] * win[i ] >> 16 */
71 smlawt r12, r11,r5, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
72
73 ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */
74 smlawb lr, r6, r8, lr /* s1 += in[i ] * win[i ] >> 16 */
75 smlawt r12, r7, r8, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
76 smlawb lr, r10, r9, lr /* s1 += in[i ] * win[i ] >> 16 */
77 smlawt r12, r11, r9, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
78
79 /* 8..15 */
80 ldmia r2!, {r4, r5, r8, r9} /* load win[8..15] */
81 ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */
82 smlawb lr, r6, r4, lr /* s1 += in[i ] * win[i ] >> 16 */
83 smlawt r12, r7, r4, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
84 smlawb lr, r10, r5, lr /* s1 += in[i ] * win[i ] >> 16 */
85 smlawt r12, r11,r5, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
86
87 ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */
88 smlawb lr, r6, r8, lr /* s1 += in[i ] * win[i ] >> 16 */
89 smlawt r12, r7, r8, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
90 smlawb lr, r10, r9, lr /* s1 += in[i ] * win[i ] >> 16 */
91 smlawt r12, r11,r9, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
92
93 /* 16..23 */
94 ldmia r2!, {r4, r5, r8, r9} /* load win[16..23] */
95 ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */
96 smlawb lr, r6, r4, lr /* s1 += in[i ] * win[i ] >> 16 */
97 smlawt r12, r7, r4, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
98 smlawb lr, r10, r5, lr /* s1 += in[i ] * win[i ] >> 16 */
99 smlawt r12, r11,r5, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
100
101 ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */
102 smlawb lr, r6, r8, lr /* s1 += in[i ] * win[i ] >> 16 */
103 smlawt r12, r7, r8, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
104 smlawb lr, r10, r9, lr /* s1 += in[i ] * win[i ] >> 16 */
105 smlawt r12, r11,r9, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
106
107 /* 24..31 */
108 ldmia r2!, {r4, r5, r8, r9} /* load win[24..31] */
109 ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */
110 smlawb lr, r6, r4, lr /* s1 += in[i ] * win[i ] >> 16 */
111 smlawt r12, r7, r4, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
112 smlawb lr, r10, r5, lr /* s1 += in[i ] * win[i ] >> 16 */
113 smlawt r12, r11,r5, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
114
115 ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */
116 smlawb lr, r6, r8, lr /* s1 += in[i ] * win[i ] >> 16 */
117 smlawt r12, r7, r8, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
118 smlawb lr, r10, r9, lr /* s1 += in[i ] * win[i ] >> 16 */
119 smlawt r12, r11,r9, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
120
121 /* 32..39 */
122 ldmia r2!, {r4, r5, r8, r9} /* load win[32..39] */
123 ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */
124 smlawb lr, r6, r4, lr /* s1 += in[i ] * win[i ] >> 16 */
125 smlawt r12, r7, r4, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
126 smlawb lr, r10, r5, lr /* s1 += in[i ] * win[i ] >> 16 */
127 smlawt r12, r11,r5, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
128
129 ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */
130 smlawb lr, r6, r8, lr /* s1 += in[i ] * win[i ] >> 16 */
131 smlawt r12, r7, r8, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
132 smlawb lr, r10, r9, lr /* s1 += in[i ] * win[i ] >> 16 */
133 smlawt r12, r11,r9, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
134
135 /* 40..47 */
136 ldmia r2!, {r4, r5, r8, r9} /* load win[40..47] */
137 ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */
138 smlawb lr, r6, r4, lr /* s1 += in[i ] * win[i ] >> 16 */
139 smlawt r12, r7, r4, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
140 smlawb lr, r10, r5, lr /* s1 += in[i ] * win[i ] >> 16 */
141 smlawt r12, r11,r5, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
142
143 ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */
144 smlawb lr, r6, r8, lr /* s1 += in[i ] * win[i ] >> 16 */
145 smlawt r12, r7, r8, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
146 smlawb lr, r10, r9, lr /* s1 += in[i ] * win[i ] >> 16 */
147 smlawt r12, r11,r9, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
148
149
150 mov lr , lr , lsl #1
151 mov r12, r12, lsl #1
152
153 stmia r0!, {r12, lr} /* store result out[0]=s2, out[1]=s1 */
154 sub r1, r1, #184 /* roll back 64 entries = 184 bytes */
155 sub r2, r2, #96 /* roll back 48 entries * 2 bytes = 96 bytes = win[0] */
156
157 subs r3, r3, #1 /* outer loop -= 1 */
158 bgt .iqmf_dewindow_outer_loop
159
160 ldmpc regs=r4-r11 /* restore registers */
161
162.atrac3_iqmf_dewindowing_armv5e_end:
163 .size atrac3_iqmf_dewindowing_armv5e,.atrac3_iqmf_dewindowing_armv5e_end-atrac3_iqmf_dewindowing_armv5e