diff options
author | Dave Hooper <dave@beermex.com> | 2010-02-17 00:49:53 +0000 |
---|---|---|
committer | Dave Hooper <dave@beermex.com> | 2010-02-17 00:49:53 +0000 |
commit | 42774d3128b91d5a37344cb40d56d3c4d147e5f2 (patch) | |
tree | bf336b407992ec9a5e454556f3351e3f8a0d10de /apps/codecs/libwma | |
parent | 62257ebc38bc0a3095b25dd0f58c4c8215edf602 (diff) | |
download | rockbox-42774d3128b91d5a37344cb40d56d3c4d147e5f2.tar.gz rockbox-42774d3128b91d5a37344cb40d56d3c4d147e5f2.zip |
Merge from branches/mdctexp - faster ifft+imdct in codec lib
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@24712 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs/libwma')
-rw-r--r-- | apps/codecs/libwma/wmadec.h | 2 | ||||
-rw-r--r-- | apps/codecs/libwma/wmadeci.c | 22 | ||||
-rw-r--r-- | apps/codecs/libwma/wmafixed.c | 110 | ||||
-rw-r--r-- | apps/codecs/libwma/wmafixed.h | 45 |
4 files changed, 9 insertions, 170 deletions
diff --git a/apps/codecs/libwma/wmadec.h b/apps/codecs/libwma/wmadec.h index a547ece157..4efaa9b8a2 100644 --- a/apps/codecs/libwma/wmadec.h +++ b/apps/codecs/libwma/wmadec.h | |||
@@ -23,8 +23,6 @@ | |||
23 | #include "asf.h" | 23 | #include "asf.h" |
24 | #include "bitstream.h" /* For GetBitContext */ | 24 | #include "bitstream.h" /* For GetBitContext */ |
25 | #include "types.h" | 25 | #include "types.h" |
26 | //#include "dsputil.h" /* For MDCTContext */ | ||
27 | |||
28 | 26 | ||
29 | //#define TRACE | 27 | //#define TRACE |
30 | /* size of blocks */ | 28 | /* size of blocks */ |
diff --git a/apps/codecs/libwma/wmadeci.c b/apps/codecs/libwma/wmadeci.c index ae1a93ecf2..6ff6a176ee 100644 --- a/apps/codecs/libwma/wmadeci.c +++ b/apps/codecs/libwma/wmadeci.c | |||
@@ -452,17 +452,6 @@ int wma_decode_init(WMADecodeContext* s, asf_waveformatex_t *wfx) | |||
452 | } | 452 | } |
453 | } | 453 | } |
454 | 454 | ||
455 | /*Not using the ffmpeg IMDCT anymore*/ | ||
456 | |||
457 | /* mdct_init_global(); | ||
458 | |||
459 | for(i = 0; i < s->nb_block_sizes; ++i) | ||
460 | { | ||
461 | ff_mdct_init(&s->mdct_ctx[i], s->frame_len_bits - i + 1, 1); | ||
462 | |||
463 | } | ||
464 | */ | ||
465 | |||
466 | /* ffmpeg uses malloc to only allocate as many window sizes as needed. | 455 | /* ffmpeg uses malloc to only allocate as many window sizes as needed. |
467 | * However, we're really only interested in the worst case memory usage. | 456 | * However, we're really only interested in the worst case memory usage. |
468 | * In the worst case you can have 5 window sizes, 128 doubling up 2048 | 457 | * In the worst case you can have 5 window sizes, 128 doubling up 2048 |
@@ -1253,14 +1242,9 @@ static int wma_decode_block(WMADecodeContext *s, int32_t *scratch_buffer) | |||
1253 | 1242 | ||
1254 | n4 = s->block_len >>1; | 1243 | n4 = s->block_len >>1; |
1255 | 1244 | ||
1256 | /*faster IMDCT from Vorbis*/ | 1245 | ff_imdct_calc( (s->frame_len_bits - bsize + 1), |
1257 | mdct_backward( (1 << (s->block_len_bits+1)), (int32_t*)(*(s->coefs))[ch], (int32_t*)scratch_buffer); | 1246 | (int32_t*)scratch_buffer, |
1258 | 1247 | (*(s->coefs))[ch]); | |
1259 | /*slower but more easily understood IMDCT from FFMPEG*/ | ||
1260 | //ff_imdct_calc(&s->mdct_ctx[bsize], | ||
1261 | // output, | ||
1262 | // (*(s->coefs))[ch]); | ||
1263 | |||
1264 | 1248 | ||
1265 | /* add in the frame */ | 1249 | /* add in the frame */ |
1266 | index = (s->frame_len / 2) + s->block_pos - n4; | 1250 | index = (s->frame_len / 2) + s->block_pos - n4; |
diff --git a/apps/codecs/libwma/wmafixed.c b/apps/codecs/libwma/wmafixed.c index 5569309145..1472ed081c 100644 --- a/apps/codecs/libwma/wmafixed.c +++ b/apps/codecs/libwma/wmafixed.c | |||
@@ -250,113 +250,3 @@ fixed64 fixdiv64(fixed64 x, fixed64 y) | |||
250 | 250 | ||
251 | return (fixed32)(r << (PRECISION / 2)); | 251 | return (fixed32)(r << (PRECISION / 2)); |
252 | } | 252 | } |
253 | |||
254 | |||
255 | |||
256 | /* Inverse gain of circular cordic rotation in s0.31 format. */ | ||
257 | static const long cordic_circular_gain = 0xb2458939; /* 0.607252929 */ | ||
258 | |||
259 | /* Table of values of atan(2^-i) in 0.32 format fractions of pi where pi = 0xffffffff / 2 */ | ||
260 | static const unsigned long atan_table[] = { | ||
261 | 0x1fffffff, /* +0.785398163 (or pi/4) */ | ||
262 | 0x12e4051d, /* +0.463647609 */ | ||
263 | 0x09fb385b, /* +0.244978663 */ | ||
264 | 0x051111d4, /* +0.124354995 */ | ||
265 | 0x028b0d43, /* +0.062418810 */ | ||
266 | 0x0145d7e1, /* +0.031239833 */ | ||
267 | 0x00a2f61e, /* +0.015623729 */ | ||
268 | 0x00517c55, /* +0.007812341 */ | ||
269 | 0x0028be53, /* +0.003906230 */ | ||
270 | 0x00145f2e, /* +0.001953123 */ | ||
271 | 0x000a2f98, /* +0.000976562 */ | ||
272 | 0x000517cc, /* +0.000488281 */ | ||
273 | 0x00028be6, /* +0.000244141 */ | ||
274 | 0x000145f3, /* +0.000122070 */ | ||
275 | 0x0000a2f9, /* +0.000061035 */ | ||
276 | 0x0000517c, /* +0.000030518 */ | ||
277 | 0x000028be, /* +0.000015259 */ | ||
278 | 0x0000145f, /* +0.000007629 */ | ||
279 | 0x00000a2f, /* +0.000003815 */ | ||
280 | 0x00000517, /* +0.000001907 */ | ||
281 | 0x0000028b, /* +0.000000954 */ | ||
282 | 0x00000145, /* +0.000000477 */ | ||
283 | 0x000000a2, /* +0.000000238 */ | ||
284 | 0x00000051, /* +0.000000119 */ | ||
285 | 0x00000028, /* +0.000000060 */ | ||
286 | 0x00000014, /* +0.000000030 */ | ||
287 | 0x0000000a, /* +0.000000015 */ | ||
288 | 0x00000005, /* +0.000000007 */ | ||
289 | 0x00000002, /* +0.000000004 */ | ||
290 | 0x00000001, /* +0.000000002 */ | ||
291 | 0x00000000, /* +0.000000001 */ | ||
292 | 0x00000000, /* +0.000000000 */ | ||
293 | }; | ||
294 | |||
295 | |||
296 | /* | ||
297 | |||
298 | Below here functions do not use standard fixed precision! | ||
299 | */ | ||
300 | |||
301 | |||
302 | /** | ||
303 | * Implements sin and cos using CORDIC rotation. | ||
304 | * | ||
305 | * @param phase has range from 0 to 0xffffffff, representing 0 and | ||
306 | * 2*pi respectively. | ||
307 | * @param cos return address for cos | ||
308 | * @return sin of phase, value is a signed value from LONG_MIN to LONG_MAX, | ||
309 | * representing -1 and 1 respectively. | ||
310 | * | ||
311 | * Gives at least 24 bits precision (last 2-8 bits or so are probably off) | ||
312 | */ | ||
313 | long fsincos(unsigned long phase, fixed32 *cos) | ||
314 | { | ||
315 | int32_t x, x1, y, y1; | ||
316 | unsigned long z, z1; | ||
317 | int i; | ||
318 | |||
319 | /* Setup initial vector */ | ||
320 | x = cordic_circular_gain; | ||
321 | y = 0; | ||
322 | z = phase; | ||
323 | |||
324 | /* The phase has to be somewhere between 0..pi for this to work right */ | ||
325 | if (z < 0xffffffff / 4) { | ||
326 | /* z in first quadrant, z += pi/2 to correct */ | ||
327 | x = -x; | ||
328 | z += 0xffffffff / 4; | ||
329 | } else if (z < 3 * (0xffffffff / 4)) { | ||
330 | /* z in third quadrant, z -= pi/2 to correct */ | ||
331 | z -= 0xffffffff / 4; | ||
332 | } else { | ||
333 | /* z in fourth quadrant, z -= 3pi/2 to correct */ | ||
334 | x = -x; | ||
335 | z -= 3 * (0xffffffff / 4); | ||
336 | } | ||
337 | |||
338 | /* Each iteration adds roughly 1-bit of extra precision */ | ||
339 | for (i = 0; i < 31; i++) { | ||
340 | x1 = x >> i; | ||
341 | y1 = y >> i; | ||
342 | z1 = atan_table[i]; | ||
343 | |||
344 | /* Decided which direction to rotate vector. Pivot point is pi/2 */ | ||
345 | if (z >= 0xffffffff / 4) { | ||
346 | x -= y1; | ||
347 | y += x1; | ||
348 | z -= z1; | ||
349 | } else { | ||
350 | x += y1; | ||
351 | y -= x1; | ||
352 | z += z1; | ||
353 | } | ||
354 | } | ||
355 | |||
356 | if (cos) | ||
357 | *cos = x; | ||
358 | |||
359 | return y; | ||
360 | } | ||
361 | |||
362 | |||
diff --git a/apps/codecs/libwma/wmafixed.h b/apps/codecs/libwma/wmafixed.h index 6b5137e044..0ecdc5cfbc 100644 --- a/apps/codecs/libwma/wmafixed.h +++ b/apps/codecs/libwma/wmafixed.h | |||
@@ -52,10 +52,10 @@ fixed64 fixdiv64(fixed64 x, fixed64 y); | |||
52 | fixed32 fixsqrt32(fixed32 x); | 52 | fixed32 fixsqrt32(fixed32 x); |
53 | long fsincos(unsigned long phase, fixed32 *cos); | 53 | long fsincos(unsigned long phase, fixed32 *cos); |
54 | 54 | ||
55 | |||
55 | #ifdef CPU_ARM | 56 | #ifdef CPU_ARM |
56 | 57 | ||
57 | /*Sign-15.16 format */ | 58 | /*Sign-15.16 format */ |
58 | |||
59 | #define fixmul32(x, y) \ | 59 | #define fixmul32(x, y) \ |
60 | ({ int32_t __hi; \ | 60 | ({ int32_t __hi; \ |
61 | uint32_t __lo; \ | 61 | uint32_t __lo; \ |
@@ -70,18 +70,6 @@ long fsincos(unsigned long phase, fixed32 *cos); | |||
70 | __result; \ | 70 | __result; \ |
71 | }) | 71 | }) |
72 | 72 | ||
73 | #define fixmul32b(x, y) \ | ||
74 | ({ int32_t __hi; \ | ||
75 | uint32_t __lo; \ | ||
76 | int32_t __result; \ | ||
77 | asm ("smull %0, %1, %3, %4\n\t" \ | ||
78 | "movs %2, %1, lsl #1" \ | ||
79 | : "=&r" (__lo), "=&r" (__hi), "=r" (__result) \ | ||
80 | : "%r" (x), "r" (y) \ | ||
81 | : "cc"); \ | ||
82 | __result; \ | ||
83 | }) | ||
84 | |||
85 | #elif defined(CPU_COLDFIRE) | 73 | #elif defined(CPU_COLDFIRE) |
86 | 74 | ||
87 | static inline int32_t fixmul32(int32_t x, int32_t y) | 75 | static inline int32_t fixmul32(int32_t x, int32_t y) |
@@ -91,9 +79,9 @@ static inline int32_t fixmul32(int32_t x, int32_t y) | |||
91 | #endif | 79 | #endif |
92 | int32_t t1; | 80 | int32_t t1; |
93 | asm ( | 81 | asm ( |
94 | "mac.l %[x], %[y], %%acc0 \n" /* multiply */ | 82 | "mac.l %[x], %[y], %%acc0 \n" // multiply |
95 | "mulu.l %[y], %[x] \n" /* get lower half, avoid emac stall */ | 83 | "mulu.l %[y], %[x] \n" // get lower half, avoid emac stall |
96 | "movclr.l %%acc0, %[t1] \n" /* get higher half */ | 84 | "movclr.l %%acc0, %[t1] \n" // get higher half |
97 | "lsr.l #1, %[t1] \n" | 85 | "lsr.l #1, %[t1] \n" |
98 | "move.w %[t1], %[x] \n" | 86 | "move.w %[t1], %[x] \n" |
99 | "swap %[x] \n" | 87 | "swap %[x] \n" |
@@ -103,17 +91,6 @@ static inline int32_t fixmul32(int32_t x, int32_t y) | |||
103 | return x; | 91 | return x; |
104 | } | 92 | } |
105 | 93 | ||
106 | static inline int32_t fixmul32b(int32_t x, int32_t y) | ||
107 | { | ||
108 | asm ( | ||
109 | "mac.l %[x], %[y], %%acc0 \n" /* multiply */ | ||
110 | "movclr.l %%acc0, %[x] \n" /* get higher half */ | ||
111 | : [x] "+d" (x) | ||
112 | : [y] "d" (y) | ||
113 | ); | ||
114 | return x; | ||
115 | } | ||
116 | |||
117 | #else | 94 | #else |
118 | 95 | ||
119 | static inline fixed32 fixmul32(fixed32 x, fixed32 y) | 96 | static inline fixed32 fixmul32(fixed32 x, fixed32 y) |
@@ -127,17 +104,7 @@ static inline fixed32 fixmul32(fixed32 x, fixed32 y) | |||
127 | return (fixed32)temp; | 104 | return (fixed32)temp; |
128 | } | 105 | } |
129 | 106 | ||
130 | static inline fixed32 fixmul32b(fixed32 x, fixed32 y) | ||
131 | { | ||
132 | fixed64 temp; | ||
133 | |||
134 | temp = x; | ||
135 | temp *= y; | ||
136 | |||
137 | temp >>= 31; //16+31-16 = 31 bits | ||
138 | |||
139 | return (fixed32)temp; | ||
140 | } | ||
141 | |||
142 | #endif | 107 | #endif |
143 | 108 | ||
109 | |||
110 | /* get fixmul32b from codeclib */ | ||