summaryrefslogtreecommitdiff
path: root/apps/codecs/libfaad/sbr_dct.c
diff options
context:
space:
mode:
Diffstat (limited to 'apps/codecs/libfaad/sbr_dct.c')
-rw-r--r--apps/codecs/libfaad/sbr_dct.c345
1 files changed, 49 insertions, 296 deletions
diff --git a/apps/codecs/libfaad/sbr_dct.c b/apps/codecs/libfaad/sbr_dct.c
index c916a82a61..123514f226 100644
--- a/apps/codecs/libfaad/sbr_dct.c
+++ b/apps/codecs/libfaad/sbr_dct.c
@@ -26,6 +26,9 @@
26**/ 26**/
27 27
28#include "common.h" 28#include "common.h"
29#include "../lib/fft.h"
30#include "../lib/mdct_lookup.h"
31
29 32
30#ifdef SBR_DEC 33#ifdef SBR_DEC
31 34
@@ -1447,267 +1450,9 @@ void DCT2_32_unscaled(real_t *y, real_t *x)
1447 y[17] = f286 - f285; 1450 y[17] = f286 - f285;
1448} 1451}
1449 1452
1450#else 1453#else /* #ifdef SBR_LOW_POWER */
1451
1452
1453#define n 32
1454#define log2n 5
1455
1456// w_array_real[i] = cos(2*M_PI*i/32)
1457static const real_t w_array_real[] = {
1458 FRAC_CONST(1.000000000000000), FRAC_CONST(0.980785279337272),
1459 FRAC_CONST(0.923879528329380), FRAC_CONST(0.831469603195765),
1460 FRAC_CONST(0.707106765732237), FRAC_CONST(0.555570210304169),
1461 FRAC_CONST(0.382683402077046), FRAC_CONST(0.195090284503576),
1462 FRAC_CONST(0.000000000000000), FRAC_CONST(-0.195090370246552),
1463 FRAC_CONST(-0.382683482845162), FRAC_CONST(-0.555570282993553),
1464 FRAC_CONST(-0.707106827549476), FRAC_CONST(-0.831469651765257),
1465 FRAC_CONST(-0.923879561784627), FRAC_CONST(-0.980785296392607)
1466};
1467
1468// w_array_imag[i] = sin(-2*M_PI*i/32)
1469static const real_t w_array_imag[] = {
1470 FRAC_CONST(0.000000000000000), FRAC_CONST(-0.195090327375064),
1471 FRAC_CONST(-0.382683442461104), FRAC_CONST(-0.555570246648862),
1472 FRAC_CONST(-0.707106796640858), FRAC_CONST(-0.831469627480512),
1473 FRAC_CONST(-0.923879545057005), FRAC_CONST(-0.980785287864940),
1474 FRAC_CONST(-1.000000000000000), FRAC_CONST(-0.980785270809601),
1475 FRAC_CONST(-0.923879511601754), FRAC_CONST(-0.831469578911016),
1476 FRAC_CONST(-0.707106734823616), FRAC_CONST(-0.555570173959476),
1477 FRAC_CONST(-0.382683361692986), FRAC_CONST(-0.195090241632088)
1478};
1479
1480// FFT decimation in frequency
1481// 4*16*2+16=128+16=144 multiplications
1482// 6*16*2+10*8+4*16*2=192+80+128=400 additions
1483static void fft_dif(real_t * Real, real_t * Imag)
1484{
1485 real_t w_real, w_imag; // For faster access
1486 real_t point1_real, point1_imag, point2_real, point2_imag; // For faster access
1487 uint32_t j, i, i2, w_index; // Counters
1488
1489 // First 2 stages of 32 point FFT decimation in frequency
1490 // 4*16*2=64*2=128 multiplications
1491 // 6*16*2=96*2=192 additions
1492 // Stage 1 of 32 point FFT decimation in frequency
1493 for (i = 0; i < 16; i++)
1494 {
1495 point1_real = Real[i];
1496 point1_imag = Imag[i];
1497 i2 = i+16;
1498 point2_real = Real[i2];
1499 point2_imag = Imag[i2];
1500
1501 w_real = w_array_real[i];
1502 w_imag = w_array_imag[i];
1503
1504 // temp1 = x[i] - x[i2]
1505 point1_real -= point2_real;
1506 point1_imag -= point2_imag;
1507
1508 // x[i1] = x[i] + x[i2]
1509 Real[i] += point2_real;
1510 Imag[i] += point2_imag;
1511
1512 // x[i2] = (x[i] - x[i2]) * w
1513 Real[i2] = (MUL_F(point1_real,w_real) - MUL_F(point1_imag,w_imag));
1514 Imag[i2] = (MUL_F(point1_real,w_imag) + MUL_F(point1_imag,w_real));
1515 }
1516 // Stage 2 of 32 point FFT decimation in frequency
1517 for (j = 0, w_index = 0; j < 8; j++, w_index += 2)
1518 {
1519 w_real = w_array_real[w_index];
1520 w_imag = w_array_imag[w_index];
1521
1522 i = j;
1523 point1_real = Real[i];
1524 point1_imag = Imag[i];
1525 i2 = i+8;
1526 point2_real = Real[i2];
1527 point2_imag = Imag[i2];
1528
1529 // temp1 = x[i] - x[i2]
1530 point1_real -= point2_real;
1531 point1_imag -= point2_imag;
1532
1533 // x[i1] = x[i] + x[i2]
1534 Real[i] += point2_real;
1535 Imag[i] += point2_imag;
1536 1454
1537 // x[i2] = (x[i] - x[i2]) * w 1455static const real_t dct4_64_tab[] ICONST_ATTR = {
1538 Real[i2] = (MUL_F(point1_real,w_real) - MUL_F(point1_imag,w_imag));
1539 Imag[i2] = (MUL_F(point1_real,w_imag) + MUL_F(point1_imag,w_real));
1540
1541 i = j+16;
1542 point1_real = Real[i];
1543 point1_imag = Imag[i];
1544 i2 = i+8;
1545 point2_real = Real[i2];
1546 point2_imag = Imag[i2];
1547
1548 // temp1 = x[i] - x[i2]
1549 point1_real -= point2_real;
1550 point1_imag -= point2_imag;
1551
1552 // x[i1] = x[i] + x[i2]
1553 Real[i] += point2_real;
1554 Imag[i] += point2_imag;
1555
1556 // x[i2] = (x[i] - x[i2]) * w
1557 Real[i2] = (MUL_F(point1_real,w_real) - MUL_F(point1_imag,w_imag));
1558 Imag[i2] = (MUL_F(point1_real,w_imag) + MUL_F(point1_imag,w_real));
1559 }
1560
1561 // Stage 3 of 32 point FFT decimation in frequency
1562 // 2*4*2=16 multiplications
1563 // 4*4*2+6*4*2=10*8=80 additions
1564 for (i = 0; i < n; i += 8)
1565 {
1566 i2 = i+4;
1567 point1_real = Real[i];
1568 point1_imag = Imag[i];
1569
1570 point2_real = Real[i2];
1571 point2_imag = Imag[i2];
1572
1573 // out[i1] = point1 + point2
1574 Real[i] += point2_real;
1575 Imag[i] += point2_imag;
1576
1577 // out[i2] = point1 - point2
1578 Real[i2] = point1_real - point2_real;
1579 Imag[i2] = point1_imag - point2_imag;
1580 }
1581 w_real = w_array_real[4]; // = sqrt(2)/2
1582 // w_imag = -w_real; // = w_array_imag[4]; // = -sqrt(2)/2
1583 for (i = 1; i < n; i += 8)
1584 {
1585 i2 = i+4;
1586 point1_real = Real[i];
1587 point1_imag = Imag[i];
1588
1589 point2_real = Real[i2];
1590 point2_imag = Imag[i2];
1591
1592 // temp1 = x[i] - x[i2]
1593 point1_real -= point2_real;
1594 point1_imag -= point2_imag;
1595
1596 // x[i1] = x[i] + x[i2]
1597 Real[i] += point2_real;
1598 Imag[i] += point2_imag;
1599
1600 // x[i2] = (x[i] - x[i2]) * w
1601 Real[i2] = MUL_F(point1_real+point1_imag, w_real);
1602 Imag[i2] = MUL_F(point1_imag-point1_real, w_real);
1603 }
1604 for (i = 2; i < n; i += 8)
1605 {
1606 i2 = i+4;
1607 point1_real = Real[i];
1608 point1_imag = Imag[i];
1609
1610 point2_real = Real[i2];
1611 point2_imag = Imag[i2];
1612
1613 // x[i] = x[i] + x[i2]
1614 Real[i] += point2_real;
1615 Imag[i] += point2_imag;
1616
1617 // x[i2] = (x[i] - x[i2]) * (-i)
1618 Real[i2] = point1_imag - point2_imag;
1619 Imag[i2] = point2_real - point1_real;
1620 }
1621 w_real = w_array_real[12]; // = -sqrt(2)/2
1622 // w_imag = w_real; // = w_array_imag[12]; // = -sqrt(2)/2
1623 for (i = 3; i < n; i += 8)
1624 {
1625 i2 = i+4;
1626 point1_real = Real[i];
1627 point1_imag = Imag[i];
1628
1629 point2_real = Real[i2];
1630 point2_imag = Imag[i2];
1631
1632 // temp1 = x[i] - x[i2]
1633 point1_real -= point2_real;
1634 point1_imag -= point2_imag;
1635
1636 // x[i1] = x[i] + x[i2]
1637 Real[i] += point2_real;
1638 Imag[i] += point2_imag;
1639
1640 // x[i2] = (x[i] - x[i2]) * w
1641 Real[i2] = MUL_F(point1_real-point1_imag, w_real);
1642 Imag[i2] = MUL_F(point1_real+point1_imag, w_real);
1643 }
1644
1645
1646 // Stage 4 of 32 point FFT decimation in frequency (no multiplications)
1647 // 16*4=64 additions
1648 for (i = 0; i < n; i += 4)
1649 {
1650 i2 = i+2;
1651 point1_real = Real[i];
1652 point1_imag = Imag[i];
1653
1654 point2_real = Real[i2];
1655 point2_imag = Imag[i2];
1656
1657 // x[i1] = x[i] + x[i2]
1658 Real[i] += point2_real;
1659 Imag[i] += point2_imag;
1660
1661 // x[i2] = x[i] - x[i2]
1662 Real[i2] = point1_real - point2_real;
1663 Imag[i2] = point1_imag - point2_imag;
1664 }
1665 for (i = 1; i < n; i += 4)
1666 {
1667 i2 = i+2;
1668 point1_real = Real[i];
1669 point1_imag = Imag[i];
1670
1671 point2_real = Real[i2];
1672 point2_imag = Imag[i2];
1673
1674 // x[i] = x[i] + x[i2]
1675 Real[i] += point2_real;
1676 Imag[i] += point2_imag;
1677
1678 // x[i2] = (x[i] - x[i2]) * (-i)
1679 Real[i2] = point1_imag - point2_imag;
1680 Imag[i2] = point2_real - point1_real;
1681 }
1682
1683 // Stage 5 of 32 point FFT decimation in frequency (no multiplications)
1684 // 16*4=64 additions
1685 for (i = 0; i < n; i += 2)
1686 {
1687 i2 = i+1;
1688 point1_real = Real[i];
1689 point1_imag = Imag[i];
1690
1691 point2_real = Real[i2];
1692 point2_imag = Imag[i2];
1693
1694 // out[i1] = point1 + point2
1695 Real[i] += point2_real;
1696 Imag[i] += point2_imag;
1697
1698 // out[i2] = point1 - point2
1699 Real[i2] = point1_real - point2_real;
1700 Imag[i2] = point1_imag - point2_imag;
1701 }
1702
1703#ifdef REORDER_IN_FFT
1704 FFTReorder(Real, Imag);
1705#endif // #ifdef REORDER_IN_FFT
1706}
1707#undef n
1708#undef log2n
1709
1710static const real_t dct4_64_tab[] = {
1711 COEF_CONST(0.999924719333649), COEF_CONST(0.998118102550507), 1456 COEF_CONST(0.999924719333649), COEF_CONST(0.998118102550507),
1712 COEF_CONST(0.993906974792480), COEF_CONST(0.987301409244537), 1457 COEF_CONST(0.993906974792480), COEF_CONST(0.987301409244537),
1713 COEF_CONST(0.978317379951477), COEF_CONST(0.966976463794708), 1458 COEF_CONST(0.978317379951477), COEF_CONST(0.966976463794708),
@@ -1806,57 +1551,65 @@ static const real_t dct4_64_tab[] = {
1806 COEF_CONST(0.897167563438416), COEF_CONST(0.949727773666382) 1551 COEF_CONST(0.897167563438416), COEF_CONST(0.949727773666382)
1807}; 1552};
1808 1553
1554// Table adapted from codeclib to fit into IRAM
1555const uint32_t dct4_revtab[32] ICONST_ATTR = {
1556 0, 24, 12, 22, 6, 30, 11, 19, 3, 27, 15, 21, 5, 29, 9, 17,
1557 1, 25, 13, 23, 7, 31, 10, 18, 2, 26, 14, 20, 4, 28, 8, 16};
1558
1809/* size 64 only! */ 1559/* size 64 only! */
1810void dct4_kernel(real_t * in_real, real_t * in_imag, real_t * out_real, real_t * out_imag) 1560void dct4_kernel(real_t *real, real_t *imag)
1811{ 1561{
1812 // Tables with bit reverse values for 5 bits, bit reverse of i at i-th position 1562 uint32_t i, idx;
1813 const uint8_t bit_rev_tab[32] = { 0,16,8,24,4,20,12,28,2,18,10,26,6,22,14,30,1,17,9,25,5,21,13,29,3,19,11,27,7,23,15,31 }; 1563 real_t x_re, x_im, tmp;
1814 uint16_t i, i_rev; 1564 FFTComplex xc[32]; /* used for calling codeclib's fft implementation */
1815 1565
1816 /* Step 2: modulate */ 1566 /* Step 2: modulate and pre-rotate for codeclib's fft implementation */
1817 // 3*32=96 multiplications 1567 // 3*32=96 multiplications
1818 // 3*32=96 additions 1568 // 3*32=96 additions
1819 for (i = 0; i < 32; i++) 1569 for (i = 0; i < 32; i++)
1820 { 1570 {
1821 real_t x_re, x_im, tmp; 1571 idx = dct4_revtab[i];
1822 x_re = in_real[i]; 1572 x_re = real[i];
1823 x_im = in_imag[i]; 1573 x_im = imag[i];
1824 tmp = MUL_C(x_re + x_im, dct4_64_tab[i]); 1574 tmp = MUL_C(x_re + x_im, dct4_64_tab[i ]);
1825 in_real[i] = MUL_C(x_im, dct4_64_tab[i + 64]) + tmp; 1575 xc[idx].re = MUL_C(x_im , dct4_64_tab[i + 64]) + tmp;
1826 in_imag[i] = MUL_C(x_re, dct4_64_tab[i + 32]) + tmp; 1576 xc[idx].im = MUL_C(x_re , dct4_64_tab[i + 32]) + tmp;
1827 } 1577 }
1828 1578
1829 /* Step 3: FFT, but with output in bit reverse order */ 1579 /* Step 3: FFT (codeclib's implementation) */
1830 fft_dif(in_real, in_imag); 1580 ff_fft_calc_c(5, xc);
1831 1581
1832 /* Step 4: modulate + bitreverse reordering */ 1582 /* Step 4: modulate + reordering */
1833 // 3*31+2=95 multiplications 1583 // 3*31+2=95 multiplications
1834 // 3*31+2=95 additions 1584 // 3*31+2=95 additions
1835 for (i = 0; i < 16; i++) 1585 x_re = xc[0].re;
1586 x_im = xc[0].im;
1587 tmp = MUL_C(x_re + x_im, dct4_64_tab[0 + 3*32]);
1588 real[0] = MUL_C(x_im , dct4_64_tab[0 + 5*32]) + tmp;
1589 imag[0] = MUL_C(x_re , dct4_64_tab[0 + 4*32]) + tmp;
1590 for (i = 1; i < 16; i++)
1836 { 1591 {
1837 real_t x_re, x_im, tmp; 1592 idx = 32-i;
1838 i_rev = bit_rev_tab[i]; 1593 x_re = xc[idx].re;
1839 x_re = in_real[i_rev]; 1594 x_im = xc[idx].im;
1840 x_im = in_imag[i_rev]; 1595 tmp = MUL_C(x_re + x_im, dct4_64_tab[i + 3*32]);
1841 1596 real[i] = MUL_C(x_im , dct4_64_tab[i + 5*32]) + tmp;
1842 tmp = MUL_C(x_re + x_im, dct4_64_tab[i + 3*32]); 1597 imag[i] = MUL_C(x_re , dct4_64_tab[i + 4*32]) + tmp;
1843 out_real[i] = MUL_C(x_im, dct4_64_tab[i + 5*32]) + tmp;
1844 out_imag[i] = MUL_C(x_re, dct4_64_tab[i + 4*32]) + tmp;
1845 } 1598 }
1846 // i = 16, i_rev = 1 = rev(16); 1599 // i = 16, idx = 16 = reorder_tab[16];
1847 out_imag[16] = MUL_C(in_imag[1] - in_real[1], dct4_64_tab[16 + 3*32]); 1600 x_re = xc[16].re;
1848 out_real[16] = MUL_C(in_real[1] + in_imag[1], dct4_64_tab[16 + 3*32]); 1601 x_im = xc[16].im;
1602 imag[16] = MUL_C(x_im - x_re, dct4_64_tab[16 + 3*32]);
1603 real[16] = MUL_C(x_re + x_im, dct4_64_tab[16 + 3*32]);
1849 for (i = 17; i < 32; i++) 1604 for (i = 17; i < 32; i++)
1850 { 1605 {
1851 real_t x_re, x_im, tmp; 1606 idx = 32-i;
1852 i_rev = bit_rev_tab[i]; 1607 x_re = xc[idx].re;
1853 x_re = in_real[i_rev]; 1608 x_im = xc[idx].im;
1854 x_im = in_imag[i_rev]; 1609 tmp = MUL_C(x_re + x_im, dct4_64_tab[i + 3*32]);
1855 tmp = MUL_C(x_re + x_im, dct4_64_tab[i + 3*32]); 1610 real[i] = MUL_C(x_im , dct4_64_tab[i + 5*32]) + tmp;
1856 out_real[i] = MUL_C(x_im, dct4_64_tab[i + 5*32]) + tmp; 1611 imag[i] = MUL_C(x_re , dct4_64_tab[i + 4*32]) + tmp;
1857 out_imag[i] = MUL_C(x_re, dct4_64_tab[i + 4*32]) + tmp;
1858 } 1612 }
1859
1860} 1613}
1861 1614
1862void DST4_32(real_t *y, real_t *x) 1615void DST4_32(real_t *y, real_t *x)
@@ -2266,6 +2019,6 @@ void DST4_32(real_t *y, real_t *x)
2266 y[0] = MUL_R(REAL_CONST(20.3738781672314530), f304); 2019 y[0] = MUL_R(REAL_CONST(20.3738781672314530), f304);
2267} 2020}
2268 2021
2269#endif 2022#endif /* #ifdef SBR_LOW_POWER */
2270 2023
2271#endif 2024#endif /* #ifdef SBR_DEC */