diff options
Diffstat (limited to 'apps/codecs/libatrac')
-rw-r--r-- | apps/codecs/libatrac/SOURCES | 3 | ||||
-rw-r--r-- | apps/codecs/libatrac/atrac3.c | 42 | ||||
-rw-r--r-- | apps/codecs/libatrac/atrac3.h | 4 | ||||
-rw-r--r-- | apps/codecs/libatrac/atrac3_armv5e.S | 163 |
4 files changed, 204 insertions, 8 deletions
diff --git a/apps/codecs/libatrac/SOURCES b/apps/codecs/libatrac/SOURCES index 3eaf4c9c3b..85f011cb87 100644 --- a/apps/codecs/libatrac/SOURCES +++ b/apps/codecs/libatrac/SOURCES | |||
@@ -1,5 +1,8 @@ | |||
1 | atrac3.c | 1 | atrac3.c |
2 | #if defined(CPU_ARM) | 2 | #if defined(CPU_ARM) |
3 | atrac3_arm.S | 3 | atrac3_arm.S |
4 | #if (ARM_ARCH >= 5) | ||
5 | atrac3_armv5e.S | ||
6 | #endif | ||
4 | #endif | 7 | #endif |
5 | 8 | ||
diff --git a/apps/codecs/libatrac/atrac3.c b/apps/codecs/libatrac/atrac3.c index b93b058bb2..f6085fa2fa 100644 --- a/apps/codecs/libatrac/atrac3.c +++ b/apps/codecs/libatrac/atrac3.c | |||
@@ -55,7 +55,11 @@ | |||
55 | #define FFSWAP(type,a,b) do{type SWAP_tmp= b; b= a; a= SWAP_tmp;}while(0) | 55 | #define FFSWAP(type,a,b) do{type SWAP_tmp= b; b= a; a= SWAP_tmp;}while(0) |
56 | 56 | ||
57 | static VLC spectral_coeff_tab[7]; | 57 | static VLC spectral_coeff_tab[7]; |
58 | #if defined(CPU_ARM) && (ARM_ARCH >= 5) /*ARMv5e+ uses 32x16 multiplication*/ | ||
59 | static int16_t qmf_window[48] IBSS_ATTR __attribute__ ((aligned (32))); | ||
60 | #else | ||
58 | static int32_t qmf_window[48] IBSS_ATTR; | 61 | static int32_t qmf_window[48] IBSS_ATTR; |
62 | #endif | ||
59 | static int32_t atrac3_spectrum [2][1024] IBSS_ATTR __attribute__((aligned(16))); | 63 | static int32_t atrac3_spectrum [2][1024] IBSS_ATTR __attribute__((aligned(16))); |
60 | static int32_t atrac3_IMDCT_buf[2][ 512] IBSS_ATTR __attribute__((aligned(16))); | 64 | static int32_t atrac3_IMDCT_buf[2][ 512] IBSS_ATTR __attribute__((aligned(16))); |
61 | static int32_t atrac3_prevFrame[2][1024] IBSS_ATTR; | 65 | static int32_t atrac3_prevFrame[2][1024] IBSS_ATTR; |
@@ -118,12 +122,30 @@ static channel_unit channel_units[2] IBSS_ATTR_LARGE_IRAM; | |||
118 | * } | 122 | * } |
119 | */ | 123 | */ |
120 | 124 | ||
121 | #if defined(CPU_ARM) | 125 | #if defined(CPU_ARM) && (ARM_ARCH >= 5) |
122 | extern void | 126 | extern void |
123 | atrac3_iqmf_dewindowing(int32_t *out, | 127 | atrac3_iqmf_dewindowing_armv5e(int32_t *out, |
124 | int32_t *in, | 128 | int32_t *in, |
125 | int32_t *win, | 129 | int16_t *win, |
126 | unsigned int nIn); | 130 | unsigned int nIn); |
131 | static inline void | ||
132 | atrac3_iqmf_dewindowing(int32_t *out, | ||
133 | int32_t *in, | ||
134 | int16_t *win, | ||
135 | unsigned int nIn) | ||
136 | { | ||
137 | //atrac3_iqmf_dewindowing_armv5e(out, in, win, nIn); | ||
138 | |||
139 | } | ||
140 | |||
141 | |||
142 | #elif defined(CPU_ARM) | ||
143 | extern void | ||
144 | atrac3_iqmf_dewindowing(int32_t *out, | ||
145 | int32_t *in, | ||
146 | int16_t *win, | ||
147 | unsigned int nIn); | ||
148 | |||
127 | #elif defined (CPU_COLDFIRE) | 149 | #elif defined (CPU_COLDFIRE) |
128 | #define MULTIPLY_ADD_BLOCK \ | 150 | #define MULTIPLY_ADD_BLOCK \ |
129 | "movem.l (%[win]), %%d0-%%d7 \n\t" \ | 151 | "movem.l (%[win]), %%d0-%%d7 \n\t" \ |
@@ -206,7 +228,9 @@ static channel_unit channel_units[2] IBSS_ATTR_LARGE_IRAM; | |||
206 | 228 | ||
207 | out[0] = s2; | 229 | out[0] = s2; |
208 | out[1] = s1; | 230 | out[1] = s1; |
231 | |||
209 | } | 232 | } |
233 | |||
210 | } | 234 | } |
211 | #endif | 235 | #endif |
212 | 236 | ||
@@ -244,6 +268,7 @@ atrac3_imdct_windowing(int32_t *buffer, | |||
244 | 268 | ||
245 | static void iqmf (int32_t *inlo, int32_t *inhi, unsigned int nIn, int32_t *pOut, int32_t *delayBuf, int32_t *temp) | 269 | static void iqmf (int32_t *inlo, int32_t *inhi, unsigned int nIn, int32_t *pOut, int32_t *delayBuf, int32_t *temp) |
246 | { | 270 | { |
271 | |||
247 | /* Restore the delay buffer */ | 272 | /* Restore the delay buffer */ |
248 | memcpy(temp, delayBuf, 46*sizeof(int32_t)); | 273 | memcpy(temp, delayBuf, 46*sizeof(int32_t)); |
249 | 274 | ||
@@ -274,6 +299,7 @@ static void IMLT(int32_t *pInput, int32_t *pOutput) | |||
274 | 299 | ||
275 | /* Windowing. */ | 300 | /* Windowing. */ |
276 | atrac3_imdct_windowing(pOutput, window_lookup); | 301 | atrac3_imdct_windowing(pOutput, window_lookup); |
302 | |||
277 | } | 303 | } |
278 | 304 | ||
279 | 305 | ||
@@ -320,9 +346,13 @@ static void init_atrac3_transforms(void) | |||
320 | /* Generate the QMF window. */ | 346 | /* Generate the QMF window. */ |
321 | for (i=0 ; i<24; i++) { | 347 | for (i=0 ; i<24; i++) { |
322 | s = qmf_48tap_half_fix[i] << 1; | 348 | s = qmf_48tap_half_fix[i] << 1; |
323 | qmf_window[i] = s; | 349 | #if defined(CPU_ARM) && (ARM_ARCH >= 5) |
324 | qmf_window[47 - i] = s; | 350 | qmf_window[i] = qmf_window[47-i] = (int16_t)((s+(1<<15))>>16); |
351 | #else | ||
352 | qmf_window[i] = qmf_window[47-i] = s; | ||
353 | #endif | ||
325 | } | 354 | } |
355 | |||
326 | } | 356 | } |
327 | 357 | ||
328 | 358 | ||
@@ -1229,7 +1259,7 @@ int atrac3_decode_init(ATRAC3Context *q, struct mp3entry *id3) | |||
1229 | vlcs_initialized = 1; | 1259 | vlcs_initialized = 1; |
1230 | 1260 | ||
1231 | } | 1261 | } |
1232 | 1262 | ||
1233 | init_atrac3_transforms(); | 1263 | init_atrac3_transforms(); |
1234 | 1264 | ||
1235 | /* init the joint-stereo decoding data */ | 1265 | /* init the joint-stereo decoding data */ |
diff --git a/apps/codecs/libatrac/atrac3.h b/apps/codecs/libatrac/atrac3.h index 74dd992e1b..afe582ab72 100644 --- a/apps/codecs/libatrac/atrac3.h +++ b/apps/codecs/libatrac/atrac3.h | |||
@@ -67,6 +67,7 @@ typedef struct { | |||
67 | } channel_unit; | 67 | } channel_unit; |
68 | 68 | ||
69 | typedef struct { | 69 | typedef struct { |
70 | int32_t outSamples[2048]; | ||
70 | GetBitContext gb; | 71 | GetBitContext gb; |
71 | //@{ | 72 | //@{ |
72 | /** stream data */ | 73 | /** stream data */ |
@@ -90,8 +91,7 @@ typedef struct { | |||
90 | int weighting_delay[6]; | 91 | int weighting_delay[6]; |
91 | //@} | 92 | //@} |
92 | //@{ | 93 | //@{ |
93 | /** data buffers */ | 94 | /** data buffers */ |
94 | int32_t outSamples[2048]; | ||
95 | uint8_t decoded_bytes_buffer[1024]; | 95 | uint8_t decoded_bytes_buffer[1024]; |
96 | int32_t tempBuf[1070]; | 96 | int32_t tempBuf[1070]; |
97 | //@} | 97 | //@} |
diff --git a/apps/codecs/libatrac/atrac3_armv5e.S b/apps/codecs/libatrac/atrac3_armv5e.S new file mode 100644 index 0000000000..1add5faef5 --- /dev/null +++ b/apps/codecs/libatrac/atrac3_armv5e.S | |||
@@ -0,0 +1,163 @@ | |||
1 | /*************************************************************************** | ||
2 | * __________ __ ___. | ||
3 | * Open \______ \ ____ ____ | | _\_ |__ _______ ___ | ||
4 | * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / | ||
5 | * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < | ||
6 | * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ | ||
7 | * \/ \/ \/ \/ \/ | ||
8 | * $Id: | ||
9 | * | ||
10 | * Copyright (C) 2010 by Michael Giacomelli | ||
11 | * | ||
12 | * This program is free software; you can redistribute it and/or | ||
13 | * modify it under the terms of the GNU General Public License | ||
14 | * as published by the Free Software Foundation; either version 2 | ||
15 | * of the License, or (at your option) any later version. | ||
16 | * | ||
17 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY | ||
18 | * KIND, either express or implied. | ||
19 | * | ||
20 | ****************************************************************************/ | ||
21 | |||
22 | #include "config.h" | ||
23 | |||
24 | .section .text, "ax", %progbits | ||
25 | |||
26 | |||
27 | /**************************************************************************** | ||
28 | * atrac3_iqmf_dewindowing_armv5e(int32_t *out, | ||
29 | * int32_t *in, | ||
30 | * int32_t *win, | ||
31 | * unsigned int nIn); | ||
32 | * | ||
33 | * Dewindowing step within iqmf of atrac3 synthesis using 16 bit filter | ||
34 | * coefficients and armv5e packed multiply instructions. Uses 2.5 cycles | ||
35 | * per filter coefficient (ideal). Benchmarked 3.54 per coefficient (Clip+). | ||
36 | * | ||
37 | * Reference implementation: | ||
38 | * | ||
39 | * for (j = nIn; j != 0; j--) { | ||
40 | * s1 = fixmul32(in[0], win[0]); | ||
41 | * s2 = fixmul32(in[1], win[1]); | ||
42 | * for (i = 2; i < 48; i += 2) { | ||
43 | * s1 += fixmul32(in[i ], win[i ]); | ||
44 | * s2 += fixmul32(in[i+1], win[i+1]); | ||
45 | * } | ||
46 | * out[0] = s2 << 1; | ||
47 | * out[1] = s1 << 1; | ||
48 | * in += 2; | ||
49 | * out += 2; | ||
50 | * } | ||
51 | * Note: r12 is a scratch register and can be used without restorage. | ||
52 | ****************************************************************************/ | ||
53 | .align 2 | ||
54 | .global atrac3_iqmf_dewindowing_armv5e | ||
55 | .type atrac3_iqmf_dewindowing_armv5e, %function | ||
56 | |||
57 | atrac3_iqmf_dewindowing_armv5e: | ||
58 | /* r0 = dest */ | ||
59 | /* r1 = input samples */ | ||
60 | /* r2 = window coefficients */ | ||
61 | /* r3 = counter */ | ||
62 | stmfd sp!, {r4-r11, lr} /* save non-scratch registers */ | ||
63 | |||
64 | .iqmf_dewindow_outer_loop: /* outer loop 0...counter-1 */ | ||
65 | /* 0.. 7 */ | ||
66 | ldmia r2!, {r4, r5, r8, r9} /* load win[0..7] */ | ||
67 | ldmia r1!, {r6, r7, r10, r11} /* load in[0..3] to avoid stall on arm11 */ | ||
68 | smulwb lr, r6, r4 /* s1 = in[0] * win[0] */ | ||
69 | smulwt r12, r7, r4 /* s2 = in[1] * win[1] */ | ||
70 | smlawb lr, r10, r5, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
71 | smlawt r12, r11,r5, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
72 | |||
73 | ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */ | ||
74 | smlawb lr, r6, r8, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
75 | smlawt r12, r7, r8, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
76 | smlawb lr, r10, r9, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
77 | smlawt r12, r11, r9, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
78 | |||
79 | /* 8..15 */ | ||
80 | ldmia r2!, {r4, r5, r8, r9} /* load win[8..15] */ | ||
81 | ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */ | ||
82 | smlawb lr, r6, r4, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
83 | smlawt r12, r7, r4, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
84 | smlawb lr, r10, r5, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
85 | smlawt r12, r11,r5, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
86 | |||
87 | ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */ | ||
88 | smlawb lr, r6, r8, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
89 | smlawt r12, r7, r8, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
90 | smlawb lr, r10, r9, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
91 | smlawt r12, r11,r9, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
92 | |||
93 | /* 16..23 */ | ||
94 | ldmia r2!, {r4, r5, r8, r9} /* load win[16..23] */ | ||
95 | ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */ | ||
96 | smlawb lr, r6, r4, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
97 | smlawt r12, r7, r4, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
98 | smlawb lr, r10, r5, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
99 | smlawt r12, r11,r5, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
100 | |||
101 | ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */ | ||
102 | smlawb lr, r6, r8, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
103 | smlawt r12, r7, r8, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
104 | smlawb lr, r10, r9, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
105 | smlawt r12, r11,r9, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
106 | |||
107 | /* 24..31 */ | ||
108 | ldmia r2!, {r4, r5, r8, r9} /* load win[24..31] */ | ||
109 | ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */ | ||
110 | smlawb lr, r6, r4, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
111 | smlawt r12, r7, r4, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
112 | smlawb lr, r10, r5, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
113 | smlawt r12, r11,r5, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
114 | |||
115 | ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */ | ||
116 | smlawb lr, r6, r8, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
117 | smlawt r12, r7, r8, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
118 | smlawb lr, r10, r9, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
119 | smlawt r12, r11,r9, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
120 | |||
121 | /* 32..39 */ | ||
122 | ldmia r2!, {r4, r5, r8, r9} /* load win[32..39] */ | ||
123 | ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */ | ||
124 | smlawb lr, r6, r4, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
125 | smlawt r12, r7, r4, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
126 | smlawb lr, r10, r5, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
127 | smlawt r12, r11,r5, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
128 | |||
129 | ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */ | ||
130 | smlawb lr, r6, r8, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
131 | smlawt r12, r7, r8, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
132 | smlawb lr, r10, r9, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
133 | smlawt r12, r11,r9, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
134 | |||
135 | /* 40..47 */ | ||
136 | ldmia r2!, {r4, r5, r8, r9} /* load win[40..47] */ | ||
137 | ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */ | ||
138 | smlawb lr, r6, r4, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
139 | smlawt r12, r7, r4, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
140 | smlawb lr, r10, r5, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
141 | smlawt r12, r11,r5, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
142 | |||
143 | ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */ | ||
144 | smlawb lr, r6, r8, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
145 | smlawt r12, r7, r8, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
146 | smlawb lr, r10, r9, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
147 | smlawt r12, r11,r9, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
148 | |||
149 | |||
150 | mov lr , lr , lsl #1 | ||
151 | mov r12, r12, lsl #1 | ||
152 | |||
153 | stmia r0!, {r12, lr} /* store result out[0]=s2, out[1]=s1 */ | ||
154 | sub r1, r1, #184 /* roll back 64 entries = 184 bytes */ | ||
155 | sub r2, r2, #96 /* roll back 48 entries * 2 bytes = 96 bytes = win[0] */ | ||
156 | |||
157 | subs r3, r3, #1 /* outer loop -= 1 */ | ||
158 | bgt .iqmf_dewindow_outer_loop | ||
159 | |||
160 | ldmpc regs=r4-r11 /* restore registers */ | ||
161 | |||
162 | .atrac3_iqmf_dewindowing_armv5e_end: | ||
163 | .size atrac3_iqmf_dewindowing_armv5e,.atrac3_iqmf_dewindowing_armv5e_end-atrac3_iqmf_dewindowing_armv5e | ||