diff options
Diffstat (limited to 'apps/codecs/libfaad/sbr_dct.c')
-rw-r--r-- | apps/codecs/libfaad/sbr_dct.c | 345 |
1 files changed, 49 insertions, 296 deletions
diff --git a/apps/codecs/libfaad/sbr_dct.c b/apps/codecs/libfaad/sbr_dct.c index c916a82a61..123514f226 100644 --- a/apps/codecs/libfaad/sbr_dct.c +++ b/apps/codecs/libfaad/sbr_dct.c | |||
@@ -26,6 +26,9 @@ | |||
26 | **/ | 26 | **/ |
27 | 27 | ||
28 | #include "common.h" | 28 | #include "common.h" |
29 | #include "../lib/fft.h" | ||
30 | #include "../lib/mdct_lookup.h" | ||
31 | |||
29 | 32 | ||
30 | #ifdef SBR_DEC | 33 | #ifdef SBR_DEC |
31 | 34 | ||
@@ -1447,267 +1450,9 @@ void DCT2_32_unscaled(real_t *y, real_t *x) | |||
1447 | y[17] = f286 - f285; | 1450 | y[17] = f286 - f285; |
1448 | } | 1451 | } |
1449 | 1452 | ||
1450 | #else | 1453 | #else /* #ifdef SBR_LOW_POWER */ |
1451 | |||
1452 | |||
1453 | #define n 32 | ||
1454 | #define log2n 5 | ||
1455 | |||
1456 | // w_array_real[i] = cos(2*M_PI*i/32) | ||
1457 | static const real_t w_array_real[] = { | ||
1458 | FRAC_CONST(1.000000000000000), FRAC_CONST(0.980785279337272), | ||
1459 | FRAC_CONST(0.923879528329380), FRAC_CONST(0.831469603195765), | ||
1460 | FRAC_CONST(0.707106765732237), FRAC_CONST(0.555570210304169), | ||
1461 | FRAC_CONST(0.382683402077046), FRAC_CONST(0.195090284503576), | ||
1462 | FRAC_CONST(0.000000000000000), FRAC_CONST(-0.195090370246552), | ||
1463 | FRAC_CONST(-0.382683482845162), FRAC_CONST(-0.555570282993553), | ||
1464 | FRAC_CONST(-0.707106827549476), FRAC_CONST(-0.831469651765257), | ||
1465 | FRAC_CONST(-0.923879561784627), FRAC_CONST(-0.980785296392607) | ||
1466 | }; | ||
1467 | |||
1468 | // w_array_imag[i] = sin(-2*M_PI*i/32) | ||
1469 | static const real_t w_array_imag[] = { | ||
1470 | FRAC_CONST(0.000000000000000), FRAC_CONST(-0.195090327375064), | ||
1471 | FRAC_CONST(-0.382683442461104), FRAC_CONST(-0.555570246648862), | ||
1472 | FRAC_CONST(-0.707106796640858), FRAC_CONST(-0.831469627480512), | ||
1473 | FRAC_CONST(-0.923879545057005), FRAC_CONST(-0.980785287864940), | ||
1474 | FRAC_CONST(-1.000000000000000), FRAC_CONST(-0.980785270809601), | ||
1475 | FRAC_CONST(-0.923879511601754), FRAC_CONST(-0.831469578911016), | ||
1476 | FRAC_CONST(-0.707106734823616), FRAC_CONST(-0.555570173959476), | ||
1477 | FRAC_CONST(-0.382683361692986), FRAC_CONST(-0.195090241632088) | ||
1478 | }; | ||
1479 | |||
1480 | // FFT decimation in frequency | ||
1481 | // 4*16*2+16=128+16=144 multiplications | ||
1482 | // 6*16*2+10*8+4*16*2=192+80+128=400 additions | ||
1483 | static void fft_dif(real_t * Real, real_t * Imag) | ||
1484 | { | ||
1485 | real_t w_real, w_imag; // For faster access | ||
1486 | real_t point1_real, point1_imag, point2_real, point2_imag; // For faster access | ||
1487 | uint32_t j, i, i2, w_index; // Counters | ||
1488 | |||
1489 | // First 2 stages of 32 point FFT decimation in frequency | ||
1490 | // 4*16*2=64*2=128 multiplications | ||
1491 | // 6*16*2=96*2=192 additions | ||
1492 | // Stage 1 of 32 point FFT decimation in frequency | ||
1493 | for (i = 0; i < 16; i++) | ||
1494 | { | ||
1495 | point1_real = Real[i]; | ||
1496 | point1_imag = Imag[i]; | ||
1497 | i2 = i+16; | ||
1498 | point2_real = Real[i2]; | ||
1499 | point2_imag = Imag[i2]; | ||
1500 | |||
1501 | w_real = w_array_real[i]; | ||
1502 | w_imag = w_array_imag[i]; | ||
1503 | |||
1504 | // temp1 = x[i] - x[i2] | ||
1505 | point1_real -= point2_real; | ||
1506 | point1_imag -= point2_imag; | ||
1507 | |||
1508 | // x[i1] = x[i] + x[i2] | ||
1509 | Real[i] += point2_real; | ||
1510 | Imag[i] += point2_imag; | ||
1511 | |||
1512 | // x[i2] = (x[i] - x[i2]) * w | ||
1513 | Real[i2] = (MUL_F(point1_real,w_real) - MUL_F(point1_imag,w_imag)); | ||
1514 | Imag[i2] = (MUL_F(point1_real,w_imag) + MUL_F(point1_imag,w_real)); | ||
1515 | } | ||
1516 | // Stage 2 of 32 point FFT decimation in frequency | ||
1517 | for (j = 0, w_index = 0; j < 8; j++, w_index += 2) | ||
1518 | { | ||
1519 | w_real = w_array_real[w_index]; | ||
1520 | w_imag = w_array_imag[w_index]; | ||
1521 | |||
1522 | i = j; | ||
1523 | point1_real = Real[i]; | ||
1524 | point1_imag = Imag[i]; | ||
1525 | i2 = i+8; | ||
1526 | point2_real = Real[i2]; | ||
1527 | point2_imag = Imag[i2]; | ||
1528 | |||
1529 | // temp1 = x[i] - x[i2] | ||
1530 | point1_real -= point2_real; | ||
1531 | point1_imag -= point2_imag; | ||
1532 | |||
1533 | // x[i1] = x[i] + x[i2] | ||
1534 | Real[i] += point2_real; | ||
1535 | Imag[i] += point2_imag; | ||
1536 | 1454 | ||
1537 | // x[i2] = (x[i] - x[i2]) * w | 1455 | static const real_t dct4_64_tab[] ICONST_ATTR = { |
1538 | Real[i2] = (MUL_F(point1_real,w_real) - MUL_F(point1_imag,w_imag)); | ||
1539 | Imag[i2] = (MUL_F(point1_real,w_imag) + MUL_F(point1_imag,w_real)); | ||
1540 | |||
1541 | i = j+16; | ||
1542 | point1_real = Real[i]; | ||
1543 | point1_imag = Imag[i]; | ||
1544 | i2 = i+8; | ||
1545 | point2_real = Real[i2]; | ||
1546 | point2_imag = Imag[i2]; | ||
1547 | |||
1548 | // temp1 = x[i] - x[i2] | ||
1549 | point1_real -= point2_real; | ||
1550 | point1_imag -= point2_imag; | ||
1551 | |||
1552 | // x[i1] = x[i] + x[i2] | ||
1553 | Real[i] += point2_real; | ||
1554 | Imag[i] += point2_imag; | ||
1555 | |||
1556 | // x[i2] = (x[i] - x[i2]) * w | ||
1557 | Real[i2] = (MUL_F(point1_real,w_real) - MUL_F(point1_imag,w_imag)); | ||
1558 | Imag[i2] = (MUL_F(point1_real,w_imag) + MUL_F(point1_imag,w_real)); | ||
1559 | } | ||
1560 | |||
1561 | // Stage 3 of 32 point FFT decimation in frequency | ||
1562 | // 2*4*2=16 multiplications | ||
1563 | // 4*4*2+6*4*2=10*8=80 additions | ||
1564 | for (i = 0; i < n; i += 8) | ||
1565 | { | ||
1566 | i2 = i+4; | ||
1567 | point1_real = Real[i]; | ||
1568 | point1_imag = Imag[i]; | ||
1569 | |||
1570 | point2_real = Real[i2]; | ||
1571 | point2_imag = Imag[i2]; | ||
1572 | |||
1573 | // out[i1] = point1 + point2 | ||
1574 | Real[i] += point2_real; | ||
1575 | Imag[i] += point2_imag; | ||
1576 | |||
1577 | // out[i2] = point1 - point2 | ||
1578 | Real[i2] = point1_real - point2_real; | ||
1579 | Imag[i2] = point1_imag - point2_imag; | ||
1580 | } | ||
1581 | w_real = w_array_real[4]; // = sqrt(2)/2 | ||
1582 | // w_imag = -w_real; // = w_array_imag[4]; // = -sqrt(2)/2 | ||
1583 | for (i = 1; i < n; i += 8) | ||
1584 | { | ||
1585 | i2 = i+4; | ||
1586 | point1_real = Real[i]; | ||
1587 | point1_imag = Imag[i]; | ||
1588 | |||
1589 | point2_real = Real[i2]; | ||
1590 | point2_imag = Imag[i2]; | ||
1591 | |||
1592 | // temp1 = x[i] - x[i2] | ||
1593 | point1_real -= point2_real; | ||
1594 | point1_imag -= point2_imag; | ||
1595 | |||
1596 | // x[i1] = x[i] + x[i2] | ||
1597 | Real[i] += point2_real; | ||
1598 | Imag[i] += point2_imag; | ||
1599 | |||
1600 | // x[i2] = (x[i] - x[i2]) * w | ||
1601 | Real[i2] = MUL_F(point1_real+point1_imag, w_real); | ||
1602 | Imag[i2] = MUL_F(point1_imag-point1_real, w_real); | ||
1603 | } | ||
1604 | for (i = 2; i < n; i += 8) | ||
1605 | { | ||
1606 | i2 = i+4; | ||
1607 | point1_real = Real[i]; | ||
1608 | point1_imag = Imag[i]; | ||
1609 | |||
1610 | point2_real = Real[i2]; | ||
1611 | point2_imag = Imag[i2]; | ||
1612 | |||
1613 | // x[i] = x[i] + x[i2] | ||
1614 | Real[i] += point2_real; | ||
1615 | Imag[i] += point2_imag; | ||
1616 | |||
1617 | // x[i2] = (x[i] - x[i2]) * (-i) | ||
1618 | Real[i2] = point1_imag - point2_imag; | ||
1619 | Imag[i2] = point2_real - point1_real; | ||
1620 | } | ||
1621 | w_real = w_array_real[12]; // = -sqrt(2)/2 | ||
1622 | // w_imag = w_real; // = w_array_imag[12]; // = -sqrt(2)/2 | ||
1623 | for (i = 3; i < n; i += 8) | ||
1624 | { | ||
1625 | i2 = i+4; | ||
1626 | point1_real = Real[i]; | ||
1627 | point1_imag = Imag[i]; | ||
1628 | |||
1629 | point2_real = Real[i2]; | ||
1630 | point2_imag = Imag[i2]; | ||
1631 | |||
1632 | // temp1 = x[i] - x[i2] | ||
1633 | point1_real -= point2_real; | ||
1634 | point1_imag -= point2_imag; | ||
1635 | |||
1636 | // x[i1] = x[i] + x[i2] | ||
1637 | Real[i] += point2_real; | ||
1638 | Imag[i] += point2_imag; | ||
1639 | |||
1640 | // x[i2] = (x[i] - x[i2]) * w | ||
1641 | Real[i2] = MUL_F(point1_real-point1_imag, w_real); | ||
1642 | Imag[i2] = MUL_F(point1_real+point1_imag, w_real); | ||
1643 | } | ||
1644 | |||
1645 | |||
1646 | // Stage 4 of 32 point FFT decimation in frequency (no multiplications) | ||
1647 | // 16*4=64 additions | ||
1648 | for (i = 0; i < n; i += 4) | ||
1649 | { | ||
1650 | i2 = i+2; | ||
1651 | point1_real = Real[i]; | ||
1652 | point1_imag = Imag[i]; | ||
1653 | |||
1654 | point2_real = Real[i2]; | ||
1655 | point2_imag = Imag[i2]; | ||
1656 | |||
1657 | // x[i1] = x[i] + x[i2] | ||
1658 | Real[i] += point2_real; | ||
1659 | Imag[i] += point2_imag; | ||
1660 | |||
1661 | // x[i2] = x[i] - x[i2] | ||
1662 | Real[i2] = point1_real - point2_real; | ||
1663 | Imag[i2] = point1_imag - point2_imag; | ||
1664 | } | ||
1665 | for (i = 1; i < n; i += 4) | ||
1666 | { | ||
1667 | i2 = i+2; | ||
1668 | point1_real = Real[i]; | ||
1669 | point1_imag = Imag[i]; | ||
1670 | |||
1671 | point2_real = Real[i2]; | ||
1672 | point2_imag = Imag[i2]; | ||
1673 | |||
1674 | // x[i] = x[i] + x[i2] | ||
1675 | Real[i] += point2_real; | ||
1676 | Imag[i] += point2_imag; | ||
1677 | |||
1678 | // x[i2] = (x[i] - x[i2]) * (-i) | ||
1679 | Real[i2] = point1_imag - point2_imag; | ||
1680 | Imag[i2] = point2_real - point1_real; | ||
1681 | } | ||
1682 | |||
1683 | // Stage 5 of 32 point FFT decimation in frequency (no multiplications) | ||
1684 | // 16*4=64 additions | ||
1685 | for (i = 0; i < n; i += 2) | ||
1686 | { | ||
1687 | i2 = i+1; | ||
1688 | point1_real = Real[i]; | ||
1689 | point1_imag = Imag[i]; | ||
1690 | |||
1691 | point2_real = Real[i2]; | ||
1692 | point2_imag = Imag[i2]; | ||
1693 | |||
1694 | // out[i1] = point1 + point2 | ||
1695 | Real[i] += point2_real; | ||
1696 | Imag[i] += point2_imag; | ||
1697 | |||
1698 | // out[i2] = point1 - point2 | ||
1699 | Real[i2] = point1_real - point2_real; | ||
1700 | Imag[i2] = point1_imag - point2_imag; | ||
1701 | } | ||
1702 | |||
1703 | #ifdef REORDER_IN_FFT | ||
1704 | FFTReorder(Real, Imag); | ||
1705 | #endif // #ifdef REORDER_IN_FFT | ||
1706 | } | ||
1707 | #undef n | ||
1708 | #undef log2n | ||
1709 | |||
1710 | static const real_t dct4_64_tab[] = { | ||
1711 | COEF_CONST(0.999924719333649), COEF_CONST(0.998118102550507), | 1456 | COEF_CONST(0.999924719333649), COEF_CONST(0.998118102550507), |
1712 | COEF_CONST(0.993906974792480), COEF_CONST(0.987301409244537), | 1457 | COEF_CONST(0.993906974792480), COEF_CONST(0.987301409244537), |
1713 | COEF_CONST(0.978317379951477), COEF_CONST(0.966976463794708), | 1458 | COEF_CONST(0.978317379951477), COEF_CONST(0.966976463794708), |
@@ -1806,57 +1551,65 @@ static const real_t dct4_64_tab[] = { | |||
1806 | COEF_CONST(0.897167563438416), COEF_CONST(0.949727773666382) | 1551 | COEF_CONST(0.897167563438416), COEF_CONST(0.949727773666382) |
1807 | }; | 1552 | }; |
1808 | 1553 | ||
1554 | // Table adapted from codeclib to fit into IRAM | ||
1555 | const uint32_t dct4_revtab[32] ICONST_ATTR = { | ||
1556 | 0, 24, 12, 22, 6, 30, 11, 19, 3, 27, 15, 21, 5, 29, 9, 17, | ||
1557 | 1, 25, 13, 23, 7, 31, 10, 18, 2, 26, 14, 20, 4, 28, 8, 16}; | ||
1558 | |||
1809 | /* size 64 only! */ | 1559 | /* size 64 only! */ |
1810 | void dct4_kernel(real_t * in_real, real_t * in_imag, real_t * out_real, real_t * out_imag) | 1560 | void dct4_kernel(real_t *real, real_t *imag) |
1811 | { | 1561 | { |
1812 | // Tables with bit reverse values for 5 bits, bit reverse of i at i-th position | 1562 | uint32_t i, idx; |
1813 | const uint8_t bit_rev_tab[32] = { 0,16,8,24,4,20,12,28,2,18,10,26,6,22,14,30,1,17,9,25,5,21,13,29,3,19,11,27,7,23,15,31 }; | 1563 | real_t x_re, x_im, tmp; |
1814 | uint16_t i, i_rev; | 1564 | FFTComplex xc[32]; /* used for calling codeclib's fft implementation */ |
1815 | 1565 | ||
1816 | /* Step 2: modulate */ | 1566 | /* Step 2: modulate and pre-rotate for codeclib's fft implementation */ |
1817 | // 3*32=96 multiplications | 1567 | // 3*32=96 multiplications |
1818 | // 3*32=96 additions | 1568 | // 3*32=96 additions |
1819 | for (i = 0; i < 32; i++) | 1569 | for (i = 0; i < 32; i++) |
1820 | { | 1570 | { |
1821 | real_t x_re, x_im, tmp; | 1571 | idx = dct4_revtab[i]; |
1822 | x_re = in_real[i]; | 1572 | x_re = real[i]; |
1823 | x_im = in_imag[i]; | 1573 | x_im = imag[i]; |
1824 | tmp = MUL_C(x_re + x_im, dct4_64_tab[i]); | 1574 | tmp = MUL_C(x_re + x_im, dct4_64_tab[i ]); |
1825 | in_real[i] = MUL_C(x_im, dct4_64_tab[i + 64]) + tmp; | 1575 | xc[idx].re = MUL_C(x_im , dct4_64_tab[i + 64]) + tmp; |
1826 | in_imag[i] = MUL_C(x_re, dct4_64_tab[i + 32]) + tmp; | 1576 | xc[idx].im = MUL_C(x_re , dct4_64_tab[i + 32]) + tmp; |
1827 | } | 1577 | } |
1828 | 1578 | ||
1829 | /* Step 3: FFT, but with output in bit reverse order */ | 1579 | /* Step 3: FFT (codeclib's implementation) */ |
1830 | fft_dif(in_real, in_imag); | 1580 | ff_fft_calc_c(5, xc); |
1831 | 1581 | ||
1832 | /* Step 4: modulate + bitreverse reordering */ | 1582 | /* Step 4: modulate + reordering */ |
1833 | // 3*31+2=95 multiplications | 1583 | // 3*31+2=95 multiplications |
1834 | // 3*31+2=95 additions | 1584 | // 3*31+2=95 additions |
1835 | for (i = 0; i < 16; i++) | 1585 | x_re = xc[0].re; |
1586 | x_im = xc[0].im; | ||
1587 | tmp = MUL_C(x_re + x_im, dct4_64_tab[0 + 3*32]); | ||
1588 | real[0] = MUL_C(x_im , dct4_64_tab[0 + 5*32]) + tmp; | ||
1589 | imag[0] = MUL_C(x_re , dct4_64_tab[0 + 4*32]) + tmp; | ||
1590 | for (i = 1; i < 16; i++) | ||
1836 | { | 1591 | { |
1837 | real_t x_re, x_im, tmp; | 1592 | idx = 32-i; |
1838 | i_rev = bit_rev_tab[i]; | 1593 | x_re = xc[idx].re; |
1839 | x_re = in_real[i_rev]; | 1594 | x_im = xc[idx].im; |
1840 | x_im = in_imag[i_rev]; | 1595 | tmp = MUL_C(x_re + x_im, dct4_64_tab[i + 3*32]); |
1841 | 1596 | real[i] = MUL_C(x_im , dct4_64_tab[i + 5*32]) + tmp; | |
1842 | tmp = MUL_C(x_re + x_im, dct4_64_tab[i + 3*32]); | 1597 | imag[i] = MUL_C(x_re , dct4_64_tab[i + 4*32]) + tmp; |
1843 | out_real[i] = MUL_C(x_im, dct4_64_tab[i + 5*32]) + tmp; | ||
1844 | out_imag[i] = MUL_C(x_re, dct4_64_tab[i + 4*32]) + tmp; | ||
1845 | } | 1598 | } |
1846 | // i = 16, i_rev = 1 = rev(16); | 1599 | // i = 16, idx = 16 = reorder_tab[16]; |
1847 | out_imag[16] = MUL_C(in_imag[1] - in_real[1], dct4_64_tab[16 + 3*32]); | 1600 | x_re = xc[16].re; |
1848 | out_real[16] = MUL_C(in_real[1] + in_imag[1], dct4_64_tab[16 + 3*32]); | 1601 | x_im = xc[16].im; |
1602 | imag[16] = MUL_C(x_im - x_re, dct4_64_tab[16 + 3*32]); | ||
1603 | real[16] = MUL_C(x_re + x_im, dct4_64_tab[16 + 3*32]); | ||
1849 | for (i = 17; i < 32; i++) | 1604 | for (i = 17; i < 32; i++) |
1850 | { | 1605 | { |
1851 | real_t x_re, x_im, tmp; | 1606 | idx = 32-i; |
1852 | i_rev = bit_rev_tab[i]; | 1607 | x_re = xc[idx].re; |
1853 | x_re = in_real[i_rev]; | 1608 | x_im = xc[idx].im; |
1854 | x_im = in_imag[i_rev]; | 1609 | tmp = MUL_C(x_re + x_im, dct4_64_tab[i + 3*32]); |
1855 | tmp = MUL_C(x_re + x_im, dct4_64_tab[i + 3*32]); | 1610 | real[i] = MUL_C(x_im , dct4_64_tab[i + 5*32]) + tmp; |
1856 | out_real[i] = MUL_C(x_im, dct4_64_tab[i + 5*32]) + tmp; | 1611 | imag[i] = MUL_C(x_re , dct4_64_tab[i + 4*32]) + tmp; |
1857 | out_imag[i] = MUL_C(x_re, dct4_64_tab[i + 4*32]) + tmp; | ||
1858 | } | 1612 | } |
1859 | |||
1860 | } | 1613 | } |
1861 | 1614 | ||
1862 | void DST4_32(real_t *y, real_t *x) | 1615 | void DST4_32(real_t *y, real_t *x) |
@@ -2266,6 +2019,6 @@ void DST4_32(real_t *y, real_t *x) | |||
2266 | y[0] = MUL_R(REAL_CONST(20.3738781672314530), f304); | 2019 | y[0] = MUL_R(REAL_CONST(20.3738781672314530), f304); |
2267 | } | 2020 | } |
2268 | 2021 | ||
2269 | #endif | 2022 | #endif /* #ifdef SBR_LOW_POWER */ |
2270 | 2023 | ||
2271 | #endif | 2024 | #endif /* #ifdef SBR_DEC */ |