summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Giacomelli <giac2000@hotmail.com>2007-08-24 20:02:50 +0000
committerMichael Giacomelli <giac2000@hotmail.com>2007-08-24 20:02:50 +0000
commit153d74443ae4f187fae0432346d0346bddfe46f3 (patch)
treedf8d1f2776228bb02f93da6d179b93e961b44c45
parentc5683b3c18cd64bff14d2213f827e966d9b7982a (diff)
downloadrockbox-153d74443ae4f187fae0432346d0346bddfe46f3.tar.gz
rockbox-153d74443ae4f187fae0432346d0346bddfe46f3.zip
Clean up iMDCT coefficient calculations.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@14451 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/libwma/wmadeci.c58
1 files changed, 30 insertions, 28 deletions
diff --git a/apps/codecs/libwma/wmadeci.c b/apps/codecs/libwma/wmadeci.c
index 2796815d72..3b81669d8f 100644
--- a/apps/codecs/libwma/wmadeci.c
+++ b/apps/codecs/libwma/wmadeci.c
@@ -30,7 +30,7 @@
30#include "bitstream.h" 30#include "bitstream.h"
31 31
32 32
33#define VLCBITS 7 /*7 is the lowest without glitching*/ 33#define VLCBITS 7 /*7 is the lowest without glitching*/
34#define VLCMAX ((22+VLCBITS-1)/VLCBITS) 34#define VLCMAX ((22+VLCBITS-1)/VLCBITS)
35 35
36#define EXPVLCBITS 7 36#define EXPVLCBITS 7
@@ -390,7 +390,7 @@ void vector_fmul_add_add(fixed32 *dst, const fixed32 *data, const fixed32 *windo
390 "smull r8, r9, r0, r4;" 390 "smull r8, r9, r0, r4;"
391 391
392 "ldmia %[dst], {r0, r4};" 392 "ldmia %[dst], {r0, r4};"
393 "add r0, r0, r9, lsl #1;" /* *dst=*dst+(r9<<1)*/ 393 "add r0, r0, r9, lsl #1;" /* *dst=*dst+(r9<<1)*/
394 "smull r8, r9, r1, r5;" 394 "smull r8, r9, r1, r5;"
395 "add r1, r4, r9, lsl #1;" 395 "add r1, r4, r9, lsl #1;"
396 "stmia %[dst]!, {r0, r1};" 396 "stmia %[dst]!, {r0, r1};"
@@ -433,7 +433,7 @@ static inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const
433 * We ensure that when the windows overlap their squared sum 433 * We ensure that when the windows overlap their squared sum
434 * is always 1 (MDCT reconstruction rule). 434 * is always 1 (MDCT reconstruction rule).
435 * 435 *
436 * The Vorbis I spec has a great diagram explaining this process. 436 * The Vorbis I spec has a great diagram explaining this process.
437 * See section 1.3.2.3 of http://xiph.org/vorbis/doc/Vorbis_I_spec.html 437 * See section 1.3.2.3 of http://xiph.org/vorbis/doc/Vorbis_I_spec.html
438 */ 438 */
439 static void wma_window(WMADecodeContext *s, fixed32 *in, fixed32 *out) 439 static void wma_window(WMADecodeContext *s, fixed32 *in, fixed32 *out)
@@ -450,7 +450,7 @@ static inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const
450 vector_fmul_add_add(out, in, s->windows[bsize], block_len); 450 vector_fmul_add_add(out, in, s->windows[bsize], block_len);
451 451
452 } else { 452 } else {
453 /*previous block was smaller or the same size, so use it's size to set the window length*/ 453 /*previous block was smaller or the same size, so use it's size to set the window length*/
454 block_len = 1 << s->prev_block_len_bits; 454 block_len = 1 << s->prev_block_len_bits;
455 /*find the middle of the two overlapped blocks, this will be the first overlapped sample*/ 455 /*find the middle of the two overlapped blocks, this will be the first overlapped sample*/
456 n = (s->block_len - block_len) / 2; 456 n = (s->block_len - block_len) / 2;
@@ -460,10 +460,10 @@ static inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const
460 460
461 memcpy(out+n+block_len, in+n+block_len, n*sizeof(fixed32)); 461 memcpy(out+n+block_len, in+n+block_len, n*sizeof(fixed32));
462 } 462 }
463 /* Advance to the end of the current block and prepare to window it for the next block. 463 /* Advance to the end of the current block and prepare to window it for the next block.
464 * Since the window function needs to be reversed, we do it backwards starting with the 464 * Since the window function needs to be reversed, we do it backwards starting with the
465 * last sample and moving towards the first 465 * last sample and moving towards the first
466 */ 466 */
467 out += s->block_len; 467 out += s->block_len;
468 in += s->block_len; 468 in += s->block_len;
469 469
@@ -1169,7 +1169,7 @@ static int wma_decode_block(WMADecodeContext *s)
1169 int nb_coefs[MAX_CHANNELS]; 1169 int nb_coefs[MAX_CHANNELS];
1170 fixed32 mdct_norm; 1170 fixed32 mdct_norm;
1171 1171
1172 DEBUGF("***decode_block: %d of (%d samples) (%d)\n", s->block_num, s->frame_len, s->block_len); 1172 DEBUGF("***decode_block: %d of (%d samples) (%d)\n", s->block_num, s->frame_len, s->block_len);
1173 1173
1174 /* compute current block length */ 1174 /* compute current block length */
1175 if (s->use_variable_block_len) 1175 if (s->use_variable_block_len)
@@ -1456,14 +1456,22 @@ static int wma_decode_block(WMADecodeContext *s)
1456 coefs1 = s->coefs1[ch]; 1456 coefs1 = s->coefs1[ch];
1457 exponents = s->exponents[ch]; 1457 exponents = s->exponents[ch];
1458 esize = s->exponents_bsize[ch]; 1458 esize = s->exponents_bsize[ch];
1459 mult = fixdiv64(pow_table[total_gain+20],Fixed32To64(s->max_exponent[ch]));
1460 mult = fixmul64byfixed(mult, mdct_norm); //what the hell? This is actually fixed64*2^16!
1461 coefs = (*(s->coefs))[ch]; 1459 coefs = (*(s->coefs))[ch];
1462 1460
1463 n=0; 1461 n=0;
1462
1463 /*
1464 * Previously the IMDCT was run in 17.15 precision to avoid overflow. However rare files could
1465 * overflow here as well, so switch to 17.15 during coefs calculation.
1466 */
1467
1464 1468
1465 if (s->use_noise_coding) 1469 if (s->use_noise_coding)
1466 { 1470 {
1471 /*TODO: mult should be converted to 32 bit to speed up noise coding*/
1472
1473 mult = fixdiv64(pow_table[total_gain+20],Fixed32To64(s->max_exponent[ch]));
1474 mult = mult* mdct_norm; //what the hell? This is actually fixed64*2^16!
1467 mult1 = mult; 1475 mult1 = mult;
1468 1476
1469 /* very low freqs : noise */ 1477 /* very low freqs : noise */
@@ -1565,29 +1573,23 @@ static int wma_decode_block(WMADecodeContext *s)
1565 } 1573 }
1566 else 1574 else
1567 { 1575 {
1576 /*Noise coding not used, simply convert from exp to fixed representation*/
1568 1577
1569 /* XXX: optimize more */ 1578
1579 fixed32 mult3 = (fixed32)(fixdiv64(pow_table[total_gain+20],Fixed32To64(s->max_exponent[ch])));
1580 mult3 = fixmul32(mult3, mdct_norm);
1570 1581
1571 n = nb_coefs[ch]; 1582 n = nb_coefs[ch];
1572 1583
1584 /* XXX: optimize more, unrolling this loop in asm might be a good idea */
1585
1573 for(i = 0;i < n; ++i) 1586 for(i = 0;i < n; ++i)
1574 { 1587 {
1575 /* 1588 atemp = (coefs1[i] * mult3)>>1;
1576 * Previously the IMDCT was run in 17.15 precision to avoid overflow. However rare files could 1589 *coefs++=fixmul32(atemp,exponents[i<<bsize>>esize]);
1577 * overflow here as well, so switch to 17.15 now. As a bonus, this saves us a shift later on. 1590 }
1578 */
1579
1580
1581 atemp = (fixed32)(coefs1[i]*mult>>17);
1582 //this "works" in the sense that the mdcts converge
1583 //atemp= ftofix32(coefs1[i] * fixtof64(exponents[i]) * fixtof64(mult>>16));
1584
1585 *coefs++=fixmul32(atemp,exponents[i<<bsize>>esize]);
1586
1587 }
1588 n = s->block_len - s->coefs_end[bsize]; 1591 n = s->block_len - s->coefs_end[bsize];
1589 for(i = 0;i < n; ++i) 1592 memset(coefs, 0, n*sizeof(fixed32));
1590 *coefs++ = 0;
1591 } 1593 }
1592 } 1594 }
1593 } 1595 }