summaryrefslogtreecommitdiff
path: root/apps/codecs/libmad/layer3.c
diff options
context:
space:
mode:
Diffstat (limited to 'apps/codecs/libmad/layer3.c')
-rw-r--r--apps/codecs/libmad/layer3.c330
1 files changed, 12 insertions, 318 deletions
diff --git a/apps/codecs/libmad/layer3.c b/apps/codecs/libmad/layer3.c
index 0198e66a3d..d8243f13c4 100644
--- a/apps/codecs/libmad/layer3.c
+++ b/apps/codecs/libmad/layer3.c
@@ -392,7 +392,7 @@ mad_fixed_t const imdct_s[6][6] ICONST_ATTR = {
392# include "imdct_s.dat" 392# include "imdct_s.dat"
393}; 393};
394 394
395# if !defined(ASO_IMDCT) 395# if !defined(FPM_ARM)
396/* 396/*
397 * windowing coefficients for long blocks 397 * windowing coefficients for long blocks
398 * derived from section 2.4.3.4.10.3 of ISO/IEC 11172-3 398 * derived from section 2.4.3.4.10.3 of ISO/IEC 11172-3
@@ -422,7 +422,7 @@ mad_fixed_t const window_l[36] ICONST_ATTR = {
422 MAD_F(0x04cfb0e2) /* 0.300705800 */, MAD_F(0x03768962) /* 0.216439614 */, 422 MAD_F(0x04cfb0e2) /* 0.300705800 */, MAD_F(0x03768962) /* 0.216439614 */,
423 MAD_F(0x0216a2a2) /* 0.130526192 */, MAD_F(0x00b2aa3e) /* 0.043619387 */, 423 MAD_F(0x0216a2a2) /* 0.130526192 */, MAD_F(0x00b2aa3e) /* 0.043619387 */,
424}; 424};
425# endif /* ASO_IMDCT */ 425# endif /* FPM_ARM */
426 426
427/* 427/*
428 * windowing coefficients for short blocks 428 * windowing coefficients for short blocks
@@ -1580,9 +1580,6 @@ void III_aliasreduce(mad_fixed_t xr[576], int lines)
1580 a = xr[-1 - i]; 1580 a = xr[-1 - i];
1581 b = xr[ i]; 1581 b = xr[ i];
1582 1582
1583# if defined(ASO_ZEROCHECK)
1584 if (a | b) {
1585# endif
1586# if defined(CPU_COLDFIRE) 1583# if defined(CPU_COLDFIRE)
1587 (void)hi, (void)lo; 1584 (void)hi, (void)lo;
1588 asm volatile ("mac.l %[a], %[csi], %%acc0\n\t" 1585 asm volatile ("mac.l %[a], %[csi], %%acc0\n\t"
@@ -1608,210 +1605,19 @@ void III_aliasreduce(mad_fixed_t xr[576], int lines)
1608 1605
1609 xr[ i] = MAD_F_MLZ(hi, lo); 1606 xr[ i] = MAD_F_MLZ(hi, lo);
1610# endif 1607# endif
1611# if defined(ASO_ZEROCHECK)
1612 }
1613# endif
1614 } 1608 }
1615 } 1609 }
1616} 1610}
1617#endif 1611#endif
1618 1612
1619# if defined(ASO_IMDCT) 1613# if defined(FPM_ARM)
1620void III_imdct_l(mad_fixed_t const [18], mad_fixed_t [36], unsigned int); 1614void III_imdct_l(mad_fixed_t const [18], mad_fixed_t [36], unsigned int);
1621# else 1615# else
1622# if 0
1623static
1624void fastsdct(mad_fixed_t const x[9], mad_fixed_t y[18])
1625{
1626 mad_fixed_t a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12;
1627 mad_fixed_t a13, a14, a15, a16, a17, a18, a19, a20, a21, a22, a23, a24, a25;
1628 mad_fixed_t m0, m1, m2, m3, m4, m5, m6, m7;
1629
1630 enum {
1631 c0 = MAD_F(0x1f838b8d), /* 2 * cos( 1 * PI / 18) */
1632 c1 = MAD_F(0x1bb67ae8), /* 2 * cos( 3 * PI / 18) */
1633 c2 = MAD_F(0x18836fa3), /* 2 * cos( 4 * PI / 18) */
1634 c3 = MAD_F(0x1491b752), /* 2 * cos( 5 * PI / 18) */
1635 c4 = MAD_F(0x0af1d43a), /* 2 * cos( 7 * PI / 18) */
1636 c5 = MAD_F(0x058e86a0), /* 2 * cos( 8 * PI / 18) */
1637 c6 = -MAD_F(0x1e11f642) /* 2 * cos(16 * PI / 18) */
1638 };
1639
1640 a0 = x[3] + x[5];
1641 a1 = x[3] - x[5];
1642 a2 = x[6] + x[2];
1643 a3 = x[6] - x[2];
1644 a4 = x[1] + x[7];
1645 a5 = x[1] - x[7];
1646 a6 = x[8] + x[0];
1647 a7 = x[8] - x[0];
1648
1649 a8 = a0 + a2;
1650 a9 = a0 - a2;
1651 a10 = a0 - a6;
1652 a11 = a2 - a6;
1653 a12 = a8 + a6;
1654 a13 = a1 - a3;
1655 a14 = a13 + a7;
1656 a15 = a3 + a7;
1657 a16 = a1 - a7;
1658 a17 = a1 + a3;
1659
1660 m0 = mad_f_mul(a17, -c3);
1661 m1 = mad_f_mul(a16, -c0);
1662 m2 = mad_f_mul(a15, -c4);
1663 m3 = mad_f_mul(a14, -c1);
1664 m4 = mad_f_mul(a5, -c1);
1665 m5 = mad_f_mul(a11, -c6);
1666 m6 = mad_f_mul(a10, -c5);
1667 m7 = mad_f_mul(a9, -c2);
1668
1669 a18 = x[4] + a4;
1670 a19 = 2 * x[4] - a4;
1671 a20 = a19 + m5;
1672 a21 = a19 - m5;
1673 a22 = a19 + m6;
1674 a23 = m4 + m2;
1675 a24 = m4 - m2;
1676 a25 = m4 + m1;
1677
1678 /* output to every other slot for convenience */
1679
1680 y[ 0] = a18 + a12;
1681 y[ 2] = m0 - a25;
1682 y[ 4] = m7 - a20;
1683 y[ 6] = m3;
1684 y[ 8] = a21 - m6;
1685 y[10] = a24 - m1;
1686 y[12] = a12 - 2 * a18;
1687 y[14] = a23 + m0;
1688 y[16] = a22 + m7;
1689}
1690
1691static inline
1692void sdctII(mad_fixed_t const x[18], mad_fixed_t X[18])
1693{
1694 mad_fixed_t tmp[9];
1695 int i;
1696
1697 /* scale[i] = 2 * cos(PI * (2 * i + 1) / (2 * 18)) */
1698 static mad_fixed_t const scale[9] = {
1699 MAD_F(0x1fe0d3b4), MAD_F(0x1ee8dd47), MAD_F(0x1d007930),
1700 MAD_F(0x1a367e59), MAD_F(0x16a09e66), MAD_F(0x125abcf8),
1701 MAD_F(0x0d8616bc), MAD_F(0x08483ee1), MAD_F(0x02c9fad7)
1702 };
1703
1704 /* divide the 18-point SDCT-II into two 9-point SDCT-IIs */
1705
1706 /* even input butterfly */
1707
1708 for (i = 0; i < 9; i += 3) {
1709 tmp[i + 0] = x[i + 0] + x[18 - (i + 0) - 1];
1710 tmp[i + 1] = x[i + 1] + x[18 - (i + 1) - 1];
1711 tmp[i + 2] = x[i + 2] + x[18 - (i + 2) - 1];
1712 }
1713
1714 fastsdct(tmp, &X[0]);
1715
1716 /* odd input butterfly and scaling */
1717
1718 for (i = 0; i < 9; i += 3) {
1719 tmp[i + 0] = mad_f_mul(x[i + 0] - x[18 - (i + 0) - 1], scale[i + 0]);
1720 tmp[i + 1] = mad_f_mul(x[i + 1] - x[18 - (i + 1) - 1], scale[i + 1]);
1721 tmp[i + 2] = mad_f_mul(x[i + 2] - x[18 - (i + 2) - 1], scale[i + 2]);
1722 }
1723
1724 fastsdct(tmp, &X[1]);
1725
1726 /* output accumulation */
1727
1728 for (i = 3; i < 18; i += 8) {
1729 X[i + 0] -= X[(i + 0) - 2];
1730 X[i + 2] -= X[(i + 2) - 2];
1731 X[i + 4] -= X[(i + 4) - 2];
1732 X[i + 6] -= X[(i + 6) - 2];
1733 }
1734}
1735
1736static inline
1737void dctIV(mad_fixed_t const y[18], mad_fixed_t X[18])
1738{
1739 mad_fixed_t tmp[18];
1740 int i;
1741
1742 /* scale[i] = 2 * cos(PI * (2 * i + 1) / (4 * 18)) */
1743 static mad_fixed_t const scale[18] = {
1744 MAD_F(0x1ff833fa), MAD_F(0x1fb9ea93), MAD_F(0x1f3dd120),
1745 MAD_F(0x1e84d969), MAD_F(0x1d906bcf), MAD_F(0x1c62648b),
1746 MAD_F(0x1afd100f), MAD_F(0x1963268b), MAD_F(0x1797c6a4),
1747 MAD_F(0x159e6f5b), MAD_F(0x137af940), MAD_F(0x11318ef3),
1748 MAD_F(0x0ec6a507), MAD_F(0x0c3ef153), MAD_F(0x099f61c5),
1749 MAD_F(0x06ed12c5), MAD_F(0x042d4544), MAD_F(0x0165547c)
1750 };
1751
1752 /* scaling */
1753
1754 for (i = 0; i < 18; i += 3) {
1755 tmp[i + 0] = mad_f_mul(y[i + 0], scale[i + 0]);
1756 tmp[i + 1] = mad_f_mul(y[i + 1], scale[i + 1]);
1757 tmp[i + 2] = mad_f_mul(y[i + 2], scale[i + 2]);
1758 }
1759
1760 /* SDCT-II */
1761
1762 sdctII(tmp, X);
1763
1764 /* scale reduction and output accumulation */
1765
1766 X[0] /= 2;
1767 for (i = 1; i < 17; i += 4) {
1768 X[i + 0] = X[i + 0] / 2 - X[(i + 0) - 1];
1769 X[i + 1] = X[i + 1] / 2 - X[(i + 1) - 1];
1770 X[i + 2] = X[i + 2] / 2 - X[(i + 2) - 1];
1771 X[i + 3] = X[i + 3] / 2 - X[(i + 3) - 1];
1772 }
1773 X[17] = X[17] / 2 - X[16];
1774}
1775
1776/*
1777 * NAME: imdct36
1778 * DESCRIPTION: perform X[18]->x[36] IMDCT using Szu-Wei Lee's fast algorithm
1779 */
1780static inline
1781void imdct36(mad_fixed_t const x[18], mad_fixed_t y[36])
1782{
1783 mad_fixed_t tmp[18];
1784 int i;
1785
1786 /* DCT-IV */
1787
1788 dctIV(x, tmp);
1789
1790 /* convert 18-point DCT-IV to 36-point IMDCT */
1791
1792 for (i = 0; i < 9; i += 3) {
1793 y[i + 0] = tmp[9 + (i + 0)];
1794 y[i + 1] = tmp[9 + (i + 1)];
1795 y[i + 2] = tmp[9 + (i + 2)];
1796 }
1797 for (i = 9; i < 27; i += 3) {
1798 y[i + 0] = -tmp[36 - (9 + (i + 0)) - 1];
1799 y[i + 1] = -tmp[36 - (9 + (i + 1)) - 1];
1800 y[i + 2] = -tmp[36 - (9 + (i + 2)) - 1];
1801 }
1802 for (i = 27; i < 36; i += 3) {
1803 y[i + 0] = -tmp[(i + 0) - 27];
1804 y[i + 1] = -tmp[(i + 1) - 27];
1805 y[i + 2] = -tmp[(i + 2) - 27];
1806 }
1807}
1808# else
1809/* 1616/*
1810 * NAME: imdct36 1617 * NAME: imdct36
1811 * DESCRIPTION: perform X[18]->x[36] IMDCT 1618 * DESCRIPTION: perform X[18]->x[36] IMDCT
1812 */ 1619 */
1813 1620# if defined(CPU_COLDFIRE)
1814# if defined(CPU_COLDFIRE)
1815/* emac optimized imdct36, it is very ugly and i hope to replace it soon. 1621/* emac optimized imdct36, it is very ugly and i hope to replace it soon.
1816 * for now it is actually somewhat faster than the stock implementation. 1622 * for now it is actually somewhat faster than the stock implementation.
1817 */ 1623 */
@@ -2476,7 +2282,7 @@ void imdct36(mad_fixed_t const X[18], mad_fixed_t x[36])
2476 /* pfew */ 2282 /* pfew */
2477} 2283}
2478 2284
2479#else 2285# else /* if defined(CPU_COLDFIRE) */
2480 2286
2481static inline 2287static inline
2482void imdct36(mad_fixed_t const X[18], mad_fixed_t x[36]) 2288void imdct36(mad_fixed_t const X[18], mad_fixed_t x[36])
@@ -2732,9 +2538,7 @@ void imdct36(mad_fixed_t const X[18], mad_fixed_t x[36])
2732 MAD_F_MLA(hi, lo, X[17], -MAD_F(0x00b2aa3e)); 2538 MAD_F_MLA(hi, lo, X[17], -MAD_F(0x00b2aa3e));
2733 x[26] = x[27] = MAD_F_MLZ(hi, lo) + t5; 2539 x[26] = x[27] = MAD_F_MLZ(hi, lo) + t5;
2734} 2540}
2735#endif /* CPU_COLDFIRE */ 2541# endif /* CPU_COLDFIRE */
2736
2737# endif
2738 2542
2739/* 2543/*
2740 * NAME: III_imdct_l() 2544 * NAME: III_imdct_l()
@@ -2754,39 +2558,8 @@ void III_imdct_l(mad_fixed_t const X[18], mad_fixed_t z[36],
2754 2558
2755 switch (block_type) { 2559 switch (block_type) {
2756 case 0: /* normal window */ 2560 case 0: /* normal window */
2757# if defined(ASO_INTERLEAVE1) 2561# if 1
2758 { 2562 /* loop unrolled implementation */
2759 register mad_fixed_t tmp1, tmp2;
2760
2761 tmp1 = window_l[0];
2762 tmp2 = window_l[1];
2763
2764 for (i = 0; i < 34; i += 2) {
2765 z[i + 0] = mad_f_mul(z[i + 0], tmp1);
2766 tmp1 = window_l[i + 2];
2767 z[i + 1] = mad_f_mul(z[i + 1], tmp2);
2768 tmp2 = window_l[i + 3];
2769 }
2770
2771 z[34] = mad_f_mul(z[34], tmp1);
2772 z[35] = mad_f_mul(z[35], tmp2);
2773 }
2774# elif defined(ASO_INTERLEAVE2)
2775 {
2776 register mad_fixed_t tmp1, tmp2;
2777
2778 tmp1 = z[0];
2779 tmp2 = window_l[0];
2780
2781 for (i = 0; i < 35; ++i) {
2782 z[i] = mad_f_mul(tmp1, tmp2);
2783 tmp1 = z[i + 1];
2784 tmp2 = window_l[i + 1];
2785 }
2786
2787 z[35] = mad_f_mul(tmp1, tmp2);
2788 }
2789# elif 1
2790 for (i = 0; i < 36; i += 4) { 2563 for (i = 0; i < 36; i += 4) {
2791 z[i + 0] = mad_f_mul(z[i + 0], window_l[i + 0]); 2564 z[i + 0] = mad_f_mul(z[i + 0], window_l[i + 0]);
2792 z[i + 1] = mad_f_mul(z[i + 1], window_l[i + 1]); 2565 z[i + 1] = mad_f_mul(z[i + 1], window_l[i + 1]);
@@ -2794,6 +2567,7 @@ void III_imdct_l(mad_fixed_t const X[18], mad_fixed_t z[36],
2794 z[i + 3] = mad_f_mul(z[i + 3], window_l[i + 3]); 2567 z[i + 3] = mad_f_mul(z[i + 3], window_l[i + 3]);
2795 } 2568 }
2796# else 2569# else
2570 /* reference implementation */
2797 for (i = 0; i < 36; ++i) z[i] = mad_f_mul(z[i], window_l[i]); 2571 for (i = 0; i < 36; ++i) z[i] = mad_f_mul(z[i], window_l[i]);
2798# endif 2572# endif
2799 break; 2573 break;
@@ -2821,7 +2595,7 @@ void III_imdct_l(mad_fixed_t const X[18], mad_fixed_t z[36],
2821 break; 2595 break;
2822 } 2596 }
2823} 2597}
2824# endif /* ASO_IMDCT */ 2598# endif /* FPM_ARM */
2825 2599
2826/* 2600/*
2827 * NAME: III_imdct_s() 2601 * NAME: III_imdct_s()
@@ -2923,43 +2697,10 @@ void III_overlap(mad_fixed_t const output[36], mad_fixed_t overlap[18],
2923 mad_fixed_t sample[18][32], unsigned int sb) 2697 mad_fixed_t sample[18][32], unsigned int sb)
2924{ 2698{
2925 unsigned int i; 2699 unsigned int i;
2926
2927# if defined(ASO_INTERLEAVE2)
2928 {
2929 register mad_fixed_t tmp1, tmp2;
2930
2931 tmp1 = overlap[0];
2932 tmp2 = overlap[1];
2933
2934 for (i = 0; i < 16; i += 2) {
2935 sample[i + 0][sb] = output[i + 0 + 0] + tmp1;
2936 overlap[i + 0] = output[i + 0 + 18];
2937 tmp1 = overlap[i + 2];
2938
2939 sample[i + 1][sb] = output[i + 1 + 0] + tmp2;
2940 overlap[i + 1] = output[i + 1 + 18];
2941 tmp2 = overlap[i + 3];
2942 }
2943
2944 sample[16][sb] = output[16 + 0] + tmp1;
2945 overlap[16] = output[16 + 18];
2946 sample[17][sb] = output[17 + 0] + tmp2;
2947 overlap[17] = output[17 + 18];
2948 }
2949# elif 0
2950 for (i = 0; i < 18; i += 2) {
2951 sample[i + 0][sb] = output[i + 0 + 0] + overlap[i + 0];
2952 overlap[i + 0] = output[i + 0 + 18];
2953
2954 sample[i + 1][sb] = output[i + 1 + 0] + overlap[i + 1];
2955 overlap[i + 1] = output[i + 1 + 18];
2956 }
2957# else
2958 for (i = 0; i < 18; ++i) { 2700 for (i = 0; i < 18; ++i) {
2959 sample[i][sb] = output[i + 0] + overlap[i]; 2701 sample[i][sb] = output[i + 0] + overlap[i];
2960 overlap[i] = output[i + 18]; 2702 overlap[i] = output[i + 18];
2961 } 2703 }
2962# endif
2963} 2704}
2964#endif 2705#endif
2965 2706
@@ -2972,35 +2713,10 @@ void III_overlap_z(mad_fixed_t overlap[18],
2972 mad_fixed_t sample[18][32], unsigned int sb) 2713 mad_fixed_t sample[18][32], unsigned int sb)
2973{ 2714{
2974 unsigned int i; 2715 unsigned int i;
2975
2976# if defined(ASO_INTERLEAVE2)
2977 {
2978 register mad_fixed_t tmp1, tmp2;
2979
2980 tmp1 = overlap[0];
2981 tmp2 = overlap[1];
2982
2983 for (i = 0; i < 16; i += 2) {
2984 sample[i + 0][sb] = tmp1;
2985 overlap[i + 0] = 0;
2986 tmp1 = overlap[i + 2];
2987
2988 sample[i + 1][sb] = tmp2;
2989 overlap[i + 1] = 0;
2990 tmp2 = overlap[i + 3];
2991 }
2992
2993 sample[16][sb] = tmp1;
2994 overlap[16] = 0;
2995 sample[17][sb] = tmp2;
2996 overlap[17] = 0;
2997 }
2998# else
2999 for (i = 0; i < 18; ++i) { 2716 for (i = 0; i < 18; ++i) {
3000 sample[i][sb] = overlap[i]; 2717 sample[i][sb] = overlap[i];
3001 overlap[i] = 0; 2718 overlap[i] = 0;
3002 } 2719 }
3003# endif
3004} 2720}
3005 2721
3006/* 2722/*
@@ -3011,30 +2727,8 @@ static
3011void III_freqinver(mad_fixed_t sample[18][32], unsigned int sb) 2727void III_freqinver(mad_fixed_t sample[18][32], unsigned int sb)
3012{ 2728{
3013 unsigned int i; 2729 unsigned int i;
3014 2730 for (i = 1; i < 18; i += 2)
3015# if 1 || defined(ASO_INTERLEAVE1) || defined(ASO_INTERLEAVE2) 2731 sample[i][sb] = -sample[i][sb];
3016 {
3017 register mad_fixed_t tmp1, tmp2;
3018
3019 tmp1 = sample[1][sb];
3020 tmp2 = sample[3][sb];
3021
3022 for (i = 1; i < 13; i += 4) {
3023 sample[i + 0][sb] = -tmp1;
3024 tmp1 = sample[i + 4][sb];
3025 sample[i + 2][sb] = -tmp2;
3026 tmp2 = sample[i + 6][sb];
3027 }
3028
3029 sample[13][sb] = -tmp1;
3030 tmp1 = sample[17][sb];
3031 sample[15][sb] = -tmp2;
3032 sample[17][sb] = -tmp1;
3033 }
3034# else
3035 for (i = 1; i < 18; i += 2)
3036 sample[i][sb] = -sample[i][sb];
3037# endif
3038} 2732}
3039 2733
3040/* 2734/*