summaryrefslogtreecommitdiff
path: root/apps/codecs/libwma
diff options
context:
space:
mode:
authorDave Hooper <dave@beermex.com>2010-02-17 00:49:53 +0000
committerDave Hooper <dave@beermex.com>2010-02-17 00:49:53 +0000
commit42774d3128b91d5a37344cb40d56d3c4d147e5f2 (patch)
treebf336b407992ec9a5e454556f3351e3f8a0d10de /apps/codecs/libwma
parent62257ebc38bc0a3095b25dd0f58c4c8215edf602 (diff)
downloadrockbox-42774d3128b91d5a37344cb40d56d3c4d147e5f2.tar.gz
rockbox-42774d3128b91d5a37344cb40d56d3c4d147e5f2.zip
Merge from branches/mdctexp - faster ifft+imdct in codec lib
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@24712 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs/libwma')
-rw-r--r--apps/codecs/libwma/wmadec.h2
-rw-r--r--apps/codecs/libwma/wmadeci.c22
-rw-r--r--apps/codecs/libwma/wmafixed.c110
-rw-r--r--apps/codecs/libwma/wmafixed.h45
4 files changed, 9 insertions, 170 deletions
diff --git a/apps/codecs/libwma/wmadec.h b/apps/codecs/libwma/wmadec.h
index a547ece157..4efaa9b8a2 100644
--- a/apps/codecs/libwma/wmadec.h
+++ b/apps/codecs/libwma/wmadec.h
@@ -23,8 +23,6 @@
23#include "asf.h" 23#include "asf.h"
24#include "bitstream.h" /* For GetBitContext */ 24#include "bitstream.h" /* For GetBitContext */
25#include "types.h" 25#include "types.h"
26//#include "dsputil.h" /* For MDCTContext */
27
28 26
29//#define TRACE 27//#define TRACE
30/* size of blocks */ 28/* size of blocks */
diff --git a/apps/codecs/libwma/wmadeci.c b/apps/codecs/libwma/wmadeci.c
index ae1a93ecf2..6ff6a176ee 100644
--- a/apps/codecs/libwma/wmadeci.c
+++ b/apps/codecs/libwma/wmadeci.c
@@ -452,17 +452,6 @@ int wma_decode_init(WMADecodeContext* s, asf_waveformatex_t *wfx)
452 } 452 }
453 } 453 }
454 454
455 /*Not using the ffmpeg IMDCT anymore*/
456
457 /* mdct_init_global();
458
459 for(i = 0; i < s->nb_block_sizes; ++i)
460 {
461 ff_mdct_init(&s->mdct_ctx[i], s->frame_len_bits - i + 1, 1);
462
463 }
464 */
465
466 /* ffmpeg uses malloc to only allocate as many window sizes as needed. 455 /* ffmpeg uses malloc to only allocate as many window sizes as needed.
467 * However, we're really only interested in the worst case memory usage. 456 * However, we're really only interested in the worst case memory usage.
468 * In the worst case you can have 5 window sizes, 128 doubling up 2048 457 * In the worst case you can have 5 window sizes, 128 doubling up 2048
@@ -1253,14 +1242,9 @@ static int wma_decode_block(WMADecodeContext *s, int32_t *scratch_buffer)
1253 1242
1254 n4 = s->block_len >>1; 1243 n4 = s->block_len >>1;
1255 1244
1256 /*faster IMDCT from Vorbis*/ 1245 ff_imdct_calc( (s->frame_len_bits - bsize + 1),
1257 mdct_backward( (1 << (s->block_len_bits+1)), (int32_t*)(*(s->coefs))[ch], (int32_t*)scratch_buffer); 1246 (int32_t*)scratch_buffer,
1258 1247 (*(s->coefs))[ch]);
1259 /*slower but more easily understood IMDCT from FFMPEG*/
1260 //ff_imdct_calc(&s->mdct_ctx[bsize],
1261 // output,
1262 // (*(s->coefs))[ch]);
1263
1264 1248
1265 /* add in the frame */ 1249 /* add in the frame */
1266 index = (s->frame_len / 2) + s->block_pos - n4; 1250 index = (s->frame_len / 2) + s->block_pos - n4;
diff --git a/apps/codecs/libwma/wmafixed.c b/apps/codecs/libwma/wmafixed.c
index 5569309145..1472ed081c 100644
--- a/apps/codecs/libwma/wmafixed.c
+++ b/apps/codecs/libwma/wmafixed.c
@@ -250,113 +250,3 @@ fixed64 fixdiv64(fixed64 x, fixed64 y)
250 250
251 return (fixed32)(r << (PRECISION / 2)); 251 return (fixed32)(r << (PRECISION / 2));
252} 252}
253
254
255
256/* Inverse gain of circular cordic rotation in s0.31 format. */
257static const long cordic_circular_gain = 0xb2458939; /* 0.607252929 */
258
259/* Table of values of atan(2^-i) in 0.32 format fractions of pi where pi = 0xffffffff / 2 */
260static const unsigned long atan_table[] = {
261 0x1fffffff, /* +0.785398163 (or pi/4) */
262 0x12e4051d, /* +0.463647609 */
263 0x09fb385b, /* +0.244978663 */
264 0x051111d4, /* +0.124354995 */
265 0x028b0d43, /* +0.062418810 */
266 0x0145d7e1, /* +0.031239833 */
267 0x00a2f61e, /* +0.015623729 */
268 0x00517c55, /* +0.007812341 */
269 0x0028be53, /* +0.003906230 */
270 0x00145f2e, /* +0.001953123 */
271 0x000a2f98, /* +0.000976562 */
272 0x000517cc, /* +0.000488281 */
273 0x00028be6, /* +0.000244141 */
274 0x000145f3, /* +0.000122070 */
275 0x0000a2f9, /* +0.000061035 */
276 0x0000517c, /* +0.000030518 */
277 0x000028be, /* +0.000015259 */
278 0x0000145f, /* +0.000007629 */
279 0x00000a2f, /* +0.000003815 */
280 0x00000517, /* +0.000001907 */
281 0x0000028b, /* +0.000000954 */
282 0x00000145, /* +0.000000477 */
283 0x000000a2, /* +0.000000238 */
284 0x00000051, /* +0.000000119 */
285 0x00000028, /* +0.000000060 */
286 0x00000014, /* +0.000000030 */
287 0x0000000a, /* +0.000000015 */
288 0x00000005, /* +0.000000007 */
289 0x00000002, /* +0.000000004 */
290 0x00000001, /* +0.000000002 */
291 0x00000000, /* +0.000000001 */
292 0x00000000, /* +0.000000000 */
293};
294
295
296/*
297
298 Below here functions do not use standard fixed precision!
299*/
300
301
302/**
303 * Implements sin and cos using CORDIC rotation.
304 *
305 * @param phase has range from 0 to 0xffffffff, representing 0 and
306 * 2*pi respectively.
307 * @param cos return address for cos
308 * @return sin of phase, value is a signed value from LONG_MIN to LONG_MAX,
309 * representing -1 and 1 respectively.
310 *
311 * Gives at least 24 bits precision (last 2-8 bits or so are probably off)
312 */
313long fsincos(unsigned long phase, fixed32 *cos)
314{
315 int32_t x, x1, y, y1;
316 unsigned long z, z1;
317 int i;
318
319 /* Setup initial vector */
320 x = cordic_circular_gain;
321 y = 0;
322 z = phase;
323
324 /* The phase has to be somewhere between 0..pi for this to work right */
325 if (z < 0xffffffff / 4) {
326 /* z in first quadrant, z += pi/2 to correct */
327 x = -x;
328 z += 0xffffffff / 4;
329 } else if (z < 3 * (0xffffffff / 4)) {
330 /* z in third quadrant, z -= pi/2 to correct */
331 z -= 0xffffffff / 4;
332 } else {
333 /* z in fourth quadrant, z -= 3pi/2 to correct */
334 x = -x;
335 z -= 3 * (0xffffffff / 4);
336 }
337
338 /* Each iteration adds roughly 1-bit of extra precision */
339 for (i = 0; i < 31; i++) {
340 x1 = x >> i;
341 y1 = y >> i;
342 z1 = atan_table[i];
343
344 /* Decided which direction to rotate vector. Pivot point is pi/2 */
345 if (z >= 0xffffffff / 4) {
346 x -= y1;
347 y += x1;
348 z -= z1;
349 } else {
350 x += y1;
351 y -= x1;
352 z += z1;
353 }
354 }
355
356 if (cos)
357 *cos = x;
358
359 return y;
360}
361
362
diff --git a/apps/codecs/libwma/wmafixed.h b/apps/codecs/libwma/wmafixed.h
index 6b5137e044..0ecdc5cfbc 100644
--- a/apps/codecs/libwma/wmafixed.h
+++ b/apps/codecs/libwma/wmafixed.h
@@ -52,10 +52,10 @@ fixed64 fixdiv64(fixed64 x, fixed64 y);
52fixed32 fixsqrt32(fixed32 x); 52fixed32 fixsqrt32(fixed32 x);
53long fsincos(unsigned long phase, fixed32 *cos); 53long fsincos(unsigned long phase, fixed32 *cos);
54 54
55
55#ifdef CPU_ARM 56#ifdef CPU_ARM
56 57
57/*Sign-15.16 format */ 58/*Sign-15.16 format */
58
59#define fixmul32(x, y) \ 59#define fixmul32(x, y) \
60 ({ int32_t __hi; \ 60 ({ int32_t __hi; \
61 uint32_t __lo; \ 61 uint32_t __lo; \
@@ -70,18 +70,6 @@ long fsincos(unsigned long phase, fixed32 *cos);
70 __result; \ 70 __result; \
71 }) 71 })
72 72
73#define fixmul32b(x, y) \
74 ({ int32_t __hi; \
75 uint32_t __lo; \
76 int32_t __result; \
77 asm ("smull %0, %1, %3, %4\n\t" \
78 "movs %2, %1, lsl #1" \
79 : "=&r" (__lo), "=&r" (__hi), "=r" (__result) \
80 : "%r" (x), "r" (y) \
81 : "cc"); \
82 __result; \
83 })
84
85#elif defined(CPU_COLDFIRE) 73#elif defined(CPU_COLDFIRE)
86 74
87static inline int32_t fixmul32(int32_t x, int32_t y) 75static inline int32_t fixmul32(int32_t x, int32_t y)
@@ -91,9 +79,9 @@ static inline int32_t fixmul32(int32_t x, int32_t y)
91#endif 79#endif
92 int32_t t1; 80 int32_t t1;
93 asm ( 81 asm (
94 "mac.l %[x], %[y], %%acc0 \n" /* multiply */ 82 "mac.l %[x], %[y], %%acc0 \n" // multiply
95 "mulu.l %[y], %[x] \n" /* get lower half, avoid emac stall */ 83 "mulu.l %[y], %[x] \n" // get lower half, avoid emac stall
96 "movclr.l %%acc0, %[t1] \n" /* get higher half */ 84 "movclr.l %%acc0, %[t1] \n" // get higher half
97 "lsr.l #1, %[t1] \n" 85 "lsr.l #1, %[t1] \n"
98 "move.w %[t1], %[x] \n" 86 "move.w %[t1], %[x] \n"
99 "swap %[x] \n" 87 "swap %[x] \n"
@@ -103,17 +91,6 @@ static inline int32_t fixmul32(int32_t x, int32_t y)
103 return x; 91 return x;
104} 92}
105 93
106static inline int32_t fixmul32b(int32_t x, int32_t y)
107{
108 asm (
109 "mac.l %[x], %[y], %%acc0 \n" /* multiply */
110 "movclr.l %%acc0, %[x] \n" /* get higher half */
111 : [x] "+d" (x)
112 : [y] "d" (y)
113 );
114 return x;
115}
116
117#else 94#else
118 95
119static inline fixed32 fixmul32(fixed32 x, fixed32 y) 96static inline fixed32 fixmul32(fixed32 x, fixed32 y)
@@ -127,17 +104,7 @@ static inline fixed32 fixmul32(fixed32 x, fixed32 y)
127 return (fixed32)temp; 104 return (fixed32)temp;
128} 105}
129 106
130static inline fixed32 fixmul32b(fixed32 x, fixed32 y)
131{
132 fixed64 temp;
133
134 temp = x;
135 temp *= y;
136
137 temp >>= 31; //16+31-16 = 31 bits
138
139 return (fixed32)temp;
140}
141
142#endif 107#endif
143 108
109
110/* get fixmul32b from codeclib */