summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThom Johansen <thomj@rockbox.org>2005-04-13 13:15:58 +0000
committerThom Johansen <thomj@rockbox.org>2005-04-13 13:15:58 +0000
commitb0b013ea0bc1e5401bc1ef11e1706eafa7db08cf (patch)
treef78c7c6ccd2e5dff4b79b328523268526d6a6b83
parentc5056b1d7f6e1d9811b01ef14b3afe2724db0a3b (diff)
downloadrockbox-b0b013ea0bc1e5401bc1ef11e1706eafa7db08cf.tar.gz
rockbox-b0b013ea0bc1e5401bc1ef11e1706eafa7db08cf.zip
Nicer imdct_s, butt-ugly imdct36 that urgently needs replacing. Moved some data to iram.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@6280 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/libmad/SOURCES3
-rw-r--r--apps/codecs/libmad/imdct_mcf5249.S128
-rw-r--r--apps/codecs/libmad/layer3.c702
3 files changed, 721 insertions, 112 deletions
diff --git a/apps/codecs/libmad/SOURCES b/apps/codecs/libmad/SOURCES
index 862ddad849..34a2a68818 100644
--- a/apps/codecs/libmad/SOURCES
+++ b/apps/codecs/libmad/SOURCES
@@ -9,3 +9,6 @@ stream.c
9synth.c 9synth.c
10timer.c 10timer.c
11version.c 11version.c
12#if CONFIG_CPU==MCF5249 && !defined(SIMULATOR)
13imdct_mcf5249.S
14#endif
diff --git a/apps/codecs/libmad/imdct_mcf5249.S b/apps/codecs/libmad/imdct_mcf5249.S
new file mode 100644
index 0000000000..be0072f674
--- /dev/null
+++ b/apps/codecs/libmad/imdct_mcf5249.S
@@ -0,0 +1,128 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2005 by Thom Johansen
11 *
12 * All files in this archive are subject to the GNU General Public License.
13 * See the file COPYING in the source tree root for full license agreement.
14 *
15 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
16 * KIND, either express or implied.
17 *
18 ****************************************************************************/
19/* this will also be the home to III_imdct_l in the future */
20
21 .global III_imdct_s
22III_imdct_s:
23 /* we need to save 9 registers and 36 samples of temp buffer */
24 lea.l (-45*4, %sp), %sp
25 movem.l %d2-%d7/%a2-%a4, (36*4, %sp)
26 move.l (45*4 + 4, %sp), %a2 /* a2 = X */
27 move.l %sp, %a3
28
29 /* IMDCT */
30
31 /* if additional precision is needed in this block, it is possible to
32 * get more low bits out of the accext01 register _before_ doing the
33 * movclrs.
34 */
35 move.l #0xb0, %macsr /* frac mode, saturation, rounding */
36 sub.l %a0, %a0 /* clear loop variable */
37 .align 2
38.imdctloop: /* outer loop label */
39 lea.l imdct_s, %a1 /* load pointer to imdct coefs in a1 */
40 movem.l (%a2), %d0-%d5 /* load some input data in d0-d5 */
41 lea.l (6*4, %a2), %a2
42
43 clr.l %d7 /* clear loop variable */
44 move.l (%a1)+, %a4 /* load imdct coef in a4 */
45 .align 2
46.macloop: /* inner loop label */
47 mac.l %d0, %a4, (%a1)+, %a4, %acc0 /* mac sequence */
48 mac.l %d1, %a4, (%a1)+, %a4, %acc0
49 mac.l %d2, %a4, (%a1)+, %a4, %acc0
50 mac.l %d3, %a4, (%a1)+, %a4, %acc0
51 mac.l %d4, %a4, (%a1)+, %a4, %acc0
52 mac.l %d5, %a4, (%a1)+, %a4, %acc0
53 movclr.l %acc0, %d6 /* get result, left shifted once */
54 asl.l #3, %d6 /* one shift free, shift three more */
55 move.l %d6, (%a3, %d7.l*4) /* yptr[i] = result */
56 neg.l %d6
57 neg.l %d7
58 move.l %d6, (5*4, %a3, %d7.l*4) /* yptr[5 - i] = -result */
59 mac.l %d0, %a4, (%a1)+, %a4, %acc0 /* mac sequence */
60 mac.l %d1, %a4, (%a1)+, %a4, %acc0
61 mac.l %d2, %a4, (%a1)+, %a4, %acc0
62 mac.l %d3, %a4, (%a1)+, %a4, %acc0
63 mac.l %d4, %a4, (%a1)+, %a4, %acc0
64 mac.l %d5, %a4, (%a1)+, %a4, %acc0
65 movclr.l %acc0, %d6 /* get result */
66 asl.l #3, %d6
67 move.l %d6, (11*4, %a3, %d7.l*4) /* yptr[11 - i] = result */
68 neg.l %d7
69 move.l %d6, (6*4, %a3, %d7.l*4) /* yptr[i + 6] = result */
70 addq.l #1, %d7 /* increment inner loop variable */
71 moveq.l #3, %d6
72 cmp.l %d6, %d7 /* we do three inner loop iterations */
73 jne .macloop
74
75 lea.l (12*4, %a3), %a3 /* add pointer increment */
76 addq.l #1, %a0 /* increment outer loop variable */
77 moveq.l #3, %d0
78 cmp.l %d0, %a0 /* we do three outer loop iterations */
79 jne .imdctloop
80
81 /* windowing, overlapping and concatenation */
82
83 move.l (45*4 + 8, %sp), %a2 /* a2 = z */
84 move.l %sp, %a3 /* a3 = tmp buffer ptr */
85 lea.l window_s, %a4 /* a4 = window coef pointer */
86
87 moveq.l #6, %d7 /* six iterations */
88 .align 2
89.overlaploop:
90 clr.l (%a2) /* z[i + 0] = 0 */
91 move.l (%a4), %d0
92 move.l (%a3), %d2
93 mac.l %d0, %d2, (6*4, %a4), %d1, %acc0
94 move.l (6*4, %a3), %d2
95 movclr.l %acc0, %d6
96 asl.l #3, %d6
97 move.l %d6, (6*4, %a2) /* z[i + 6] = result */
98
99 mac.l %d1, %d2, (12*4, %a3), %d2, %acc0
100 mac.l %d0, %d2, (18*4, %a3), %d2, %acc0
101 movclr.l %acc0, %d6
102 asl.l #3, %d6
103 move.l %d6, (12*4, %a2) /* z[i + 12] = result */
104
105 mac.l %d1, %d2, (24*4, %a3), %d2, %acc0
106 mac.l %d0, %d2, (30*4, %a3), %d2, %acc0
107 movclr.l %acc0, %d6
108 asl.l #3, %d6
109 move.l %d6, (18*4, %a2) /* z[i + 18] = result */
110
111 mac.l %d1, %d2, %acc0
112 movclr.l %acc0, %d6
113 asl.l #3, %d6
114 move.l %d6, (24*4, %a2) /* z[i + 24] = result */
115
116 clr.l (30*4, %a2) /* z[i + 30] = 0 */
117 addq.l #4, %a2 /* increment all pointers */
118 addq.l #4, %a3
119 addq.l #4, %a4
120 subq.l #1, %d7 /* decrement loop counter */
121 jne .overlaploop
122 /* fall through to exit if we're done */
123
124 /* clean up */
125 movem.l (36*4, %sp), %d2-%d7/%a2-%a4
126 lea.l (45*4, %sp), %sp
127 rts
128
diff --git a/apps/codecs/libmad/layer3.c b/apps/codecs/libmad/layer3.c
index 7449be71f2..aa46a71c16 100644
--- a/apps/codecs/libmad/layer3.c
+++ b/apps/codecs/libmad/layer3.c
@@ -384,8 +384,7 @@ mad_fixed_t const ca[8] = {
384 * imdct_s[i/even][k] = cos((PI / 24) * (2 * (i / 2) + 7) * (2 * k + 1)) 384 * imdct_s[i/even][k] = cos((PI / 24) * (2 * (i / 2) + 7) * (2 * k + 1))
385 * imdct_s[i /odd][k] = cos((PI / 24) * (2 * (6 + (i-1)/2) + 7) * (2 * k + 1)) 385 * imdct_s[i /odd][k] = cos((PI / 24) * (2 * (6 + (i-1)/2) + 7) * (2 * k + 1))
386 */ 386 */
387static 387mad_fixed_t const imdct_s[6][6] IDATA_ATTR = {
388mad_fixed_t const imdct_s[6][6] = {
389# include "imdct_s.dat" 388# include "imdct_s.dat"
390}; 389};
391 390
@@ -397,7 +396,7 @@ mad_fixed_t const imdct_s[6][6] = {
397 * window_l[i] = sin((PI / 36) * (i + 1/2)) 396 * window_l[i] = sin((PI / 36) * (i + 1/2))
398 */ 397 */
399static 398static
400mad_fixed_t const window_l[36] = { 399mad_fixed_t const window_l[36] IDATA_ATTR = {
401 MAD_F(0x00b2aa3e) /* 0.043619387 */, MAD_F(0x0216a2a2) /* 0.130526192 */, 400 MAD_F(0x00b2aa3e) /* 0.043619387 */, MAD_F(0x0216a2a2) /* 0.130526192 */,
402 MAD_F(0x03768962) /* 0.216439614 */, MAD_F(0x04cfb0e2) /* 0.300705800 */, 401 MAD_F(0x03768962) /* 0.216439614 */, MAD_F(0x04cfb0e2) /* 0.300705800 */,
403 MAD_F(0x061f78aa) /* 0.382683432 */, MAD_F(0x07635284) /* 0.461748613 */, 402 MAD_F(0x061f78aa) /* 0.382683432 */, MAD_F(0x07635284) /* 0.461748613 */,
@@ -427,8 +426,7 @@ mad_fixed_t const window_l[36] = {
427 * 426 *
428 * window_s[i] = sin((PI / 12) * (i + 1/2)) 427 * window_s[i] = sin((PI / 12) * (i + 1/2))
429 */ 428 */
430static 429mad_fixed_t const window_s[12] IDATA_ATTR = {
431mad_fixed_t const window_s[12] = {
432 MAD_F(0x0216a2a2) /* 0.130526192 */, MAD_F(0x061f78aa) /* 0.382683432 */, 430 MAD_F(0x0216a2a2) /* 0.130526192 */, MAD_F(0x061f78aa) /* 0.382683432 */,
433 MAD_F(0x09bd7ca0) /* 0.608761429 */, MAD_F(0x0cb19346) /* 0.793353340 */, 431 MAD_F(0x09bd7ca0) /* 0.608761429 */, MAD_F(0x0cb19346) /* 0.793353340 */,
434 MAD_F(0x0ec835e8) /* 0.923879533 */, MAD_F(0x0fdcf549) /* 0.991444861 */, 432 MAD_F(0x0ec835e8) /* 0.923879533 */, MAD_F(0x0fdcf549) /* 0.991444861 */,
@@ -1575,7 +1573,7 @@ void III_aliasreduce(mad_fixed_t xr[576], int lines)
1575# if defined(ASO_IMDCT) 1573# if defined(ASO_IMDCT)
1576void III_imdct_l(mad_fixed_t const [18], mad_fixed_t [36], unsigned int); 1574void III_imdct_l(mad_fixed_t const [18], mad_fixed_t [36], unsigned int);
1577# else 1575# else
1578# if 1 1576# if 0
1579static 1577static
1580void fastsdct(mad_fixed_t const x[9], mad_fixed_t y[18]) 1578void fastsdct(mad_fixed_t const x[9], mad_fixed_t y[18])
1581{ 1579{
@@ -1766,6 +1764,589 @@ void imdct36(mad_fixed_t const x[18], mad_fixed_t y[36])
1766 * NAME: imdct36 1764 * NAME: imdct36
1767 * DESCRIPTION: perform X[18]->x[36] IMDCT 1765 * DESCRIPTION: perform X[18]->x[36] IMDCT
1768 */ 1766 */
1767
1768# if CONFIG_CPU==MCF5249 && !defined(SIMULATOR)
1769/* emac optimized imdct36, it is very ugly and i hope to replace it soon.
1770 * for now it is actually somewhat faster than the stock implementation.
1771 */
1772static inline
1773void imdct36(mad_fixed_t const X[18], mad_fixed_t x[36])
1774{
1775 mad_fixed_t t[16];
1776 /* assumes FRACBITS = 28 */
1777 asm volatile (
1778 "move.l #0xb0, %%d0\n\t" /* frac. mode, saturate, round */
1779 "move.l %%d0, %%macsr\n\t"
1780 "move.l (4*4, %[X]), %%d0\n\t"
1781 "move.l #0x0ec835e8, %%d1\n\t"
1782 "mac.l %%d0, %%d1, (13*4, %[X]), %%d0, %%acc0\n\t"
1783 "move.l #0x061f78aa, %%d1\n\t"
1784 "mac.l %%d0, %%d1, (1*4, %[X]), %%d0, %%acc0\n\t"
1785 "move.l %%acc0, %%d7\n\t"
1786 "asl.l #3, %%d7\n\t"
1787 "move.l %%d7, (6*4, %[t])\n\t"
1788
1789 "sub.l (10*4, %[X]), %%d0\n\t"
1790 "move.l %%d0, (14*4, %[t])\n\t"
1791 "move.l #0x061f78aa, %%d1\n\t"
1792 "msac.l %%d0, %%d1, (7*4, %[X]), %%d0, %%acc0\n\t"
1793 "add.l (16*4, %[X]), %%d0\n\t"
1794 "move.l %%d0, (15*4, %[t])\n\t"
1795 "move.l #0x0ec835e8, %%d1\n\t"
1796 "msac.l %%d0, %%d1, (%[X]), %%d2, %%acc0\n\t"
1797 "move.l %%acc0, %%d6\n\t"
1798 "asl.l #3, %%d6\n\t"
1799 "move.l %%d6, (%[t])\n\t"
1800
1801 "sub.l (11*4, %[X]), %%d2\n\t" /* store t8-t11 in d2-d5, will need them soon */
1802 "sub.l (12*4, %[X]), %%d2\n\t"
1803 "move.l %%d2, (8*4, %[t])\n\t"
1804 "move.l #0x0216a2a2, %%d1\n\t"
1805 "mac.l %%d2, %%d1, (2*4, %[X]), %%d3, %%acc0\n\t"
1806
1807 "sub.l (9*4, %[X]), %%d3\n\t"
1808 "sub.l (14*4, %[X]), %%d3\n\t"
1809 "move.l %%d3, (9*4, %[t])\n\t"
1810 "move.l #0x09bd7ca0, %%d1\n\t"
1811 "mac.l %%d3, %%d1, (3*4, %[X]), %%d4, %%acc0\n\t"
1812
1813 "sub.l (8*4, %[X]), %%d4\n\t"
1814 "sub.l (15*4, %[X]), %%d4\n\t"
1815 "move.l %%d4, (10*4, %[t])\n\t"
1816 "move.l #0x0cb19346, %%d1\n\t"
1817 "msac.l %%d4, %%d1, (5*4, %[X]), %%d5, %%acc0\n\t"
1818
1819 "sub.l (6*4, %[X]), %%d5\n\t"
1820 "sub.l (17*4, %[X]), %%d5\n\t"
1821 "move.l %%d5, (11*4, %[t])\n\t"
1822 "move.l #0x0fdcf549, %%d1\n\t"
1823 "msac.l %%d5, %%d1, (%[X]), %%d0, %%acc0\n\t"
1824
1825 "movclr.l %%acc0, %%d7\n\t"
1826 "asl.l #3, %%d7\n\t"
1827 "move.l %%d7, (7*4, %[x])\n\t"
1828 "neg.l %%d7\n\t"
1829 "move.l %%d7, (10*4, %[x])\n\t"
1830
1831 "move.l #0x0cb19346, %%d1\n\t"
1832 "msac.l %%d2, %%d1, (3*4, %[X]), %%d2, %%acc0\n\t" /* preload for t12 statement */
1833 "move.l #0x0fdcf549, %%d1\n\t"
1834 "mac.l %%d3, %%d1, (8*4, %[X]), %%d3, %%acc0\n\t"
1835 "move.l #0x0216a2a2, %%d1\n\t"
1836 "mac.l %%d4, %%d1, (11*4, %[X]), %%d4, %%acc0\n\t"
1837 "move.l #0x09bd7ca0, %%d1\n\t"
1838 "msac.l %%d5, %%d1, (12*4, %[X]), %%d5, %%acc0\n\t"
1839 "movclr.l %%acc0, %%d7\n\t"
1840 "asl.l #3, %%d7\n\t"
1841 "sub.l %%d6, %%d7\n\t" /* t0 is still in d6 */
1842 "move.l %%d7, (19*4, %[x])\n\t"
1843 "move.l %%d7, (34*4, %[x])\n\t"
1844
1845 "sub.l %%d2, %%d0\n\t"
1846 "add.l %%d3, %%d0\n\t"
1847 "sub.l %%d4, %%d0\n\t"
1848 "sub.l %%d5, %%d0\n\t"
1849 "add.l (15*4, %[X]), %%d0\n\t"
1850
1851 "move.l (2*4, %[X]), %%d3\n\t"
1852 "add.l (5*4, %[X]), %%d3\n\t"
1853 "sub.l (6*4, %[X]), %%d3\n\t"
1854 "sub.l (9*4, %[X]), %%d3\n\t"
1855 "sub.l (14*4, %[X]), %%d3\n\t"
1856 "sub.l (17*4, %[X]), %%d3\n\t"
1857
1858 "move.l %%d0, (12*4, %[t])\n\t"
1859 "move.l %%d3, (13*4, %[t])\n\t"
1860
1861 "move.l #0x0ec835e8, %%d1\n\t"
1862 "msac.l %%d0, %%d1, (1*4, %[X]), %%d2, %%acc0\n\t"
1863 "move.l #0x061f78aa, %%d1\n\t"
1864 "mac.l %%d3, %%d1, (7*4, %[X]), %%d3, %%acc0\n\t"
1865 "movclr.l %%acc0, %%d7\n\t"
1866 "asl.l #3, %%d7\n\t"
1867 "add.l %%d6, %%d7\n\t"
1868 "move.l %%d7, (22*4, %[x])\n\t"
1869 "move.l %%d7, (31*4, %[x])\n\t"
1870
1871 "move.l #0x09bd7ca0, %%d1\n\t"
1872 "msac.l %%d1, %%d2, (10*4, %[X]), %%d2, %%acc0\n\t"
1873 "move.l #0x0216a2a2, %%d1\n\t"
1874 "mac.l %%d1, %%d3, (16*4, %[X]), %%d3, %%acc0\n\t"
1875 "move.l #0x0fdcf549, %%d1\n\t"
1876 "msac.l %%d1, %%d2, (6*4, %[t]), %%d2, %%acc0\n\t"
1877 "move.l #0x0cb19346, %%d1\n\t"
1878 "mac.l %%d1, %%d3, (%[X]), %%d0, %%acc0\n\t"
1879 "movclr.l %%acc0, %%d7\n\t"
1880 "asl.l #3, %%d7\n\t"
1881 "add.l %%d2, %%d7\n\t"
1882 "move.l %%d7, (1*4, %[t])\n\t"
1883
1884 "move.l #0x03768962, %%d1\n\t"
1885 "mac.l %%d1, %%d0, (2*4, %[X]), %%d0, %%acc0\n\t"
1886 "move.l #0x0e313245, %%d1\n\t"
1887 "mac.l %%d1, %%d0, (3*4, %[X]), %%d0, %%acc0\n\t"
1888 "move.l #0x0ffc19fd, %%d1\n\t"
1889 "msac.l %%d1, %%d0, (5*4, %[X]), %%d0, %%acc0\n\t"
1890 "move.l #0x0acf37ad, %%d1\n\t"
1891 "msac.l %%d1, %%d0, (6*4, %[X]), %%d0, %%acc0\n\t"
1892 "move.l #0x04cfb0e2, %%d1\n\t"
1893 "mac.l %%d1, %%d0, (8*4, %[X]), %%d0, %%acc0\n\t"
1894 "move.l #0x0898c779, %%d1\n\t"
1895 "msac.l %%d1, %%d0, (9*4, %[X]), %%d0, %%acc0\n\t"
1896 "move.l #0x0d7e8807, %%d1\n\t"
1897 "mac.l %%d1, %%d0, (11*4, %[X]), %%d0, %%acc0\n\t"
1898 "move.l #0x0f426cb5, %%d1\n\t"
1899 "mac.l %%d1, %%d0, (12*4, %[X]), %%d0, %%acc0\n\t"
1900 "move.l #0x0bcbe352, %%d1\n\t"
1901 "msac.l %%d1, %%d0, (14*4, %[X]), %%d0, %%acc0\n\t"
1902 "move.l #0x00b2aa3e, %%d1\n\t"
1903 "mac.l %%d1, %%d0, (15*4, %[X]), %%d0, %%acc0\n\t"
1904 "move.l #0x07635284, %%d1\n\t"
1905 "msac.l %%d1, %%d0, (17*4, %[X]), %%d0, %%acc0\n\t"
1906 "move.l #0x0f9ee890, %%d1\n\t"
1907 "msac.l %%d1, %%d0, (%[X]), %%d0, %%acc0\n\t"
1908 "movclr.l %%acc0, %%d6\n\t"
1909 "asl.l #3, %%d6\n\t"
1910 "add.l %%d7, %%d6\n\t"
1911 "move.l %%d6, (6*4, %[x])\n\t"
1912 "neg.l %%d6\n\t"
1913 "move.l %%d6, (11*4, %[x])\n\t"
1914
1915 "move.l #0x0f426cb5, %%d1\n\t"
1916 "msac.l %%d1, %%d0, (2*4, %[X]), %%d0, %%acc0\n\t"
1917 "move.l #0x00b2aa3e, %%d1\n\t"
1918 "msac.l %%d1, %%d0, (3*4, %[X]), %%d0, %%acc0\n\t"
1919 "move.l #0x0898c779, %%d1\n\t"
1920 "mac.l %%d1, %%d0, (5*4, %[X]), %%d0, %%acc0\n\t"
1921 "move.l #0x0f9ee890, %%d1\n\t"
1922 "mac.l %%d1, %%d0, (6*4, %[X]), %%d0, %%acc0\n\t"
1923 "move.l #0x0acf37ad, %%d1\n\t"
1924 "mac.l %%d1, %%d0, (8*4, %[X]), %%d0, %%acc0\n\t"
1925 "move.l #0x07635284, %%d1\n\t"
1926 "msac.l %%d1, %%d0, (9*4, %[X]), %%d0, %%acc0\n\t"
1927 "move.l #0x0e313245, %%d1\n\t"
1928 "msac.l %%d1, %%d0, (11*4, %[X]), %%d0, %%acc0\n\t"
1929 "move.l #0x0bcbe352, %%d1\n\t"
1930 "msac.l %%d1, %%d0, (12*4, %[X]), %%d0, %%acc0\n\t"
1931 "move.l #0x03768962, %%d1\n\t"
1932 "msac.l %%d1, %%d0, (14*4, %[X]), %%d0, %%acc0\n\t"
1933 "move.l #0x0d7e8807, %%d1\n\t"
1934 "mac.l %%d1, %%d0, (15*4, %[X]), %%d0, %%acc0\n\t"
1935 "move.l #0x0ffc19fd, %%d1\n\t"
1936 "mac.l %%d1, %%d0, (17*4, %[X]), %%d0, %%acc0\n\t"
1937 "move.l #0x04cfb0e2, %%d1\n\t"
1938 "mac.l %%d1, %%d0, (%[X]), %%d0, %%acc0\n\t"
1939 "movclr.l %%acc0, %%d6\n\t"
1940 "asl.l #3, %%d6\n\t"
1941 "add.l %%d7, %%d6\n\t"
1942 "move.l %%d6, (23*4, %[x])\n\t"
1943 "move.l %%d6, (30*4, %[x])\n\t"
1944
1945 "move.l #0x0bcbe352, %%d1\n\t"
1946 "msac.l %%d1, %%d0, (2*4, %[X]), %%d0, %%acc0\n\t"
1947 "move.l #0x0d7e8807, %%d1\n\t"
1948 "mac.l %%d1, %%d0, (3*4, %[X]), %%d0, %%acc0\n\t"
1949 "move.l #0x07635284, %%d1\n\t"
1950 "msac.l %%d1, %%d0, (5*4, %[X]), %%d0, %%acc0\n\t"
1951 "move.l #0x04cfb0e2, %%d1\n\t"
1952 "mac.l %%d1, %%d0, (6*4, %[X]), %%d0, %%acc0\n\t"
1953 "move.l #0x0f9ee890, %%d1\n\t"
1954 "mac.l %%d1, %%d0, (8*4, %[X]), %%d0, %%acc0\n\t"
1955 "move.l #0x0ffc19fd, %%d1\n\t"
1956 "msac.l %%d1, %%d0, (9*4, %[X]), %%d0, %%acc0\n\t"
1957 "move.l #0x00b2aa3e, %%d1\n\t"
1958 "msac.l %%d1, %%d0, (11*4, %[X]), %%d0, %%acc0\n\t"
1959 "move.l #0x03768962, %%d1\n\t"
1960 "mac.l %%d1, %%d0, (12*4, %[X]), %%d0, %%acc0\n\t"
1961 "move.l #0x0f426cb5, %%d1\n\t"
1962 "msac.l %%d1, %%d0, (14*4, %[X]), %%d0, %%acc0\n\t"
1963 "move.l #0x0e313245, %%d1\n\t"
1964 "mac.l %%d1, %%d0, (15*4, %[X]), %%d0, %%acc0\n\t"
1965 "move.l #0x0898c779, %%d1\n\t"
1966 "mac.l %%d1, %%d0, (17*4, %[X]), %%d0, %%acc0\n\t"
1967 "move.l #0x0acf37ad, %%d1\n\t"
1968 "msac.l %%d1, %%d0, (4*4, %[X]), %%d0, %%acc0\n\t"
1969 "movclr.l %%acc0, %%d6\n\t"
1970 "asl.l #3, %%d6\n\t"
1971 "sub.l %%d7, %%d6\n\t"
1972 "move.l %%d6, (18*4, %[x])\n\t"
1973 "move.l %%d6, (35*4, %[x])\n\t"
1974
1975 "move.l #0x061f78aa, %%d1\n\t"
1976 "mac.l %%d1, %%d0, (13*4, %[X]), %%d0, %%acc0\n\t"
1977 "move.l #0x0ec835e8, %%d1\n\t"
1978 "msac.l %%d1, %%d0, (1*4, %[X]), %%d0, %%acc0\n\t"
1979 "move.l %%acc0, %%d5\n\t"
1980 "asl.l #3, %%d5\n\t"
1981 "move.l %%d5, (7*4, %[t])\n\t"
1982
1983 "move.l #0x0cb19346, %%d1\n\t"
1984 "msac.l %%d1, %%d0, (7*4, %[X]), %%d0, %%acc0\n\t"
1985 "move.l #0x0fdcf549, %%d1\n\t"
1986 "mac.l %%d1, %%d0, (10*4, %[X]), %%d0, %%acc0\n\t"
1987 "move.l #0x0216a2a2, %%d1\n\t"
1988 "mac.l %%d1, %%d0, (16*4, %[X]), %%d0, %%acc0\n\t"
1989 "move.l #0x09bd7ca0, %%d1\n\t"
1990 "msac.l %%d1, %%d0, (%[X]), %%d0, %%acc0\n\t"
1991 "move.l %%acc0, %%d7\n\t"
1992 "asl.l #3, %%d7\n\t"
1993 "move.l %%d7, (2*4, %[t])\n\t"
1994
1995 "move.l #0x04cfb0e2, %%d1\n\t"
1996 "mac.l %%d1, %%d0, (2*4, %[X]), %%d0, %%acc0\n\t"
1997 "move.l #0x0ffc19fd, %%d1\n\t"
1998 "mac.l %%d1, %%d0, (3*4, %[X]), %%d0, %%acc0\n\t"
1999 "move.l #0x0d7e8807, %%d1\n\t"
2000 "msac.l %%d1, %%d0, (5*4, %[X]), %%d0, %%acc0\n\t"
2001 "move.l #0x03768962, %%d1\n\t"
2002 "mac.l %%d1, %%d0, (6*4, %[X]), %%d0, %%acc0\n\t"
2003 "move.l #0x0bcbe352, %%d1\n\t"
2004 "msac.l %%d1, %%d0, (8*4, %[X]), %%d0, %%acc0\n\t"
2005 "move.l #0x0e313245, %%d1\n\t"
2006 "msac.l %%d1, %%d0, (9*4, %[X]), %%d0, %%acc0\n\t"
2007 "move.l #0x07635284, %%d1\n\t"
2008 "mac.l %%d1, %%d0, (11*4, %[X]), %%d0, %%acc0\n\t"
2009 "move.l #0x0acf37ad, %%d1\n\t"
2010 "msac.l %%d1, %%d0, (12*4, %[X]), %%d0, %%acc0\n\t"
2011 "move.l #0x0f9ee890, %%d1\n\t"
2012 "mac.l %%d1, %%d0, (14*4, %[X]), %%d0, %%acc0\n\t"
2013 "move.l #0x0898c779, %%d1\n\t"
2014 "mac.l %%d1, %%d0, (15*4, %[X]), %%d0, %%acc0\n\t"
2015 "move.l #0x00b2aa3e, %%d1\n\t"
2016 "mac.l %%d1, %%d0, (17*4, %[X]), %%d0, %%acc0\n\t"
2017 "move.l #0x0f426cb5, %%d1\n\t"
2018 "mac.l %%d1, %%d0, (%[X]), %%d0, %%acc0\n\t"
2019 "movclr.l %%acc0, %%d6\n\t"
2020 "asl.l #3, %%d6\n\t"
2021 "move.l %%d6, (5*4, %[x])\n\t"
2022 "neg.l %%d6\n\t"
2023 "move.l %%d6, (12*4, %[x])\n\t"
2024
2025 "move.l #0x0acf37ad, %%d1\n\t"
2026 "mac.l %%d1, %%d0, (2*4, %[X]), %%d0, %%acc0\n\t"
2027 "move.l #0x0898c779, %%d1\n\t"
2028 "msac.l %%d1, %%d0, (3*4, %[X]), %%d0, %%acc0\n\t"
2029 "move.l #0x0e313245, %%d1\n\t"
2030 "mac.l %%d1, %%d0, (5*4, %[X]), %%d0, %%acc0\n\t"
2031 "move.l #0x0f426cb5, %%d1\n\t"
2032 "msac.l %%d1, %%d0, (6*4, %[X]), %%d0, %%acc0\n\t"
2033 "move.l #0x03768962, %%d1\n\t"
2034 "msac.l %%d1, %%d0, (8*4, %[X]), %%d0, %%acc0\n\t"
2035 "move.l #0x00b2aa3e, %%d1\n\t"
2036 "mac.l %%d1, %%d0, (9*4, %[X]), %%d0, %%acc0\n\t"
2037 "move.l #0x0ffc19fd, %%d1\n\t"
2038 "msac.l %%d1, %%d0, (11*4, %[X]), %%d0, %%acc0\n\t"
2039 "move.l #0x0f9ee890, %%d1\n\t"
2040 "mac.l %%d1, %%d0, (12*4, %[X]), %%d0, %%acc0\n\t"
2041 "move.l #0x04cfb0e2, %%d1\n\t"
2042 "msac.l %%d1, %%d0, (14*4, %[X]), %%d0, %%acc0\n\t"
2043 "move.l #0x07635284, %%d1\n\t"
2044 "mac.l %%d1, %%d0, (15*4, %[X]), %%d0, %%acc0\n\t"
2045 "move.l #0x0d7e8807, %%d1\n\t"
2046 "mac.l %%d1, %%d0, (17*4, %[X]), %%d0, %%acc0\n\t"
2047 "move.l #0x0bcbe352, %%d1\n\t"
2048 "msac.l %%d1, %%d0, (%[X]), %%d0, %%acc0\n\t"
2049 "movclr.l %%acc0, %%d6\n\t"
2050 "asl.l #3, %%d6\n\t"
2051 "add.l %%d7, %%d6\n\t"
2052 "move.l %%d6, (%[x])\n\t"
2053 "neg.l %%d6\n\t"
2054 "move.l %%d6, (17*4, %[x])\n\t"
2055
2056 "move.l #0x0f9ee890, %%d1\n\t"
2057 "msac.l %%d1, %%d0, (2*4, %[X]), %%d0, %%acc0\n\t"
2058 "move.l #0x07635284, %%d1\n\t"
2059 "msac.l %%d1, %%d0, (3*4, %[X]), %%d0, %%acc0\n\t"
2060 "move.l #0x00b2aa3e, %%d1\n\t"
2061 "msac.l %%d1, %%d0, (5*4, %[X]), %%d0, %%acc0\n\t"
2062 "move.l #0x0bcbe352, %%d1\n\t"
2063 "mac.l %%d1, %%d0, (6*4, %[X]), %%d0, %%acc0\n\t"
2064 "move.l #0x0f426cb5, %%d1\n\t"
2065 "mac.l %%d1, %%d0, (8*4, %[X]), %%d0, %%acc0\n\t"
2066 "move.l #0x0d7e8807, %%d1\n\t"
2067 "mac.l %%d1, %%d0, (9*4, %[X]), %%d0, %%acc0\n\t"
2068 "move.l #0x0898c779, %%d1\n\t"
2069 "mac.l %%d1, %%d0, (11*4, %[X]), %%d0, %%acc0\n\t"
2070 "move.l #0x04cfb0e2, %%d1\n\t"
2071 "msac.l %%d1, %%d0, (12*4, %[X]), %%d0, %%acc0\n\t"
2072 "move.l #0x0acf37ad, %%d1\n\t"
2073 "msac.l %%d1, %%d0, (14*4, %[X]), %%d0, %%acc0\n\t"
2074 "move.l #0x0ffc19fd, %%d1\n\t"
2075 "msac.l %%d1, %%d0, (15*4, %[X]), %%d0, %%acc0\n\t"
2076 "move.l #0x0e313245, %%d1\n\t"
2077 "msac.l %%d1, %%d0, (17*4, %[X]), %%d0, %%acc0\n\t"
2078 "move.l #0x03768962, %%d1\n\t"
2079 "msac.l %%d1, %%d0, (1*4, %[X]), %%d0, %%acc0\n\t"
2080 "movclr.l %%acc0, %%d6\n\t"
2081 "asl.l #3, %%d6\n\t"
2082 "add.l %%d7, %%d6\n\t"
2083 "move.l %%d6, (24*4, %[x])\n\t"
2084 "move.l %%d6, (29*4, %[x])\n\t"
2085
2086 "move.l #0x0216a2a2, %%d1\n\t"
2087 "msac.l %%d1, %%d0, (7*4, %[X]), %%d0, %%acc0\n\t"
2088 "move.l #0x09bd7ca0, %%d1\n\t"
2089 "msac.l %%d1, %%d0, (10*4, %[X]), %%d0, %%acc0\n\t"
2090 "move.l #0x0cb19346, %%d1\n\t"
2091 "mac.l %%d1, %%d0, (16*4, %[X]), %%d0, %%acc0\n\t"
2092 "move.l #0x0fdcf549, %%d1\n\t"
2093 "mac.l %%d1, %%d0, (%[X]), %%d0, %%acc0\n\t"
2094 "movclr.l %%acc0, %%d7\n\t"
2095 "asl.l #3, %%d7\n\t"
2096 "add.l %%d5, %%d7\n\t"
2097 "move.l %%d7, (3*4, %[t])\n\t"
2098
2099 "move.l #0x00b2aa3e, %%d1\n\t"
2100 "mac.l %%d1, %%d0, (2*4, %[X]), %%d0, %%acc0\n\t"
2101 "move.l #0x03768962, %%d1\n\t"
2102 "mac.l %%d1, %%d0, (3*4, %[X]), %%d0, %%acc0\n\t"
2103 "move.l #0x04cfb0e2, %%d1\n\t"
2104 "msac.l %%d1, %%d0, (5*4, %[X]), %%d0, %%acc0\n\t"
2105 "move.l #0x07635284, %%d1\n\t"
2106 "msac.l %%d1, %%d0, (6*4, %[X]), %%d0, %%acc0\n\t"
2107 "move.l #0x0898c779, %%d1\n\t"
2108 "mac.l %%d1, %%d0, (8*4, %[X]), %%d0, %%acc0\n\t"
2109 "move.l #0x0acf37ad, %%d1\n\t"
2110 "mac.l %%d1, %%d0, (9*4, %[X]), %%d0, %%acc0\n\t"
2111 "move.l #0x0bcbe352, %%d1\n\t"
2112 "msac.l %%d1, %%d0, (11*4, %[X]), %%d0, %%acc0\n\t"
2113 "move.l #0x0d7e8807, %%d1\n\t"
2114 "msac.l %%d1, %%d0, (12*4, %[X]), %%d0, %%acc0\n\t"
2115 "move.l #0x0e313245, %%d1\n\t"
2116 "mac.l %%d1, %%d0, (14*4, %[X]), %%d0, %%acc0\n\t"
2117 "move.l #0x0f426cb5, %%d1\n\t"
2118 "mac.l %%d1, %%d0, (15*4, %[X]), %%d0, %%acc0\n\t"
2119 "move.l #0x0f9ee890, %%d1\n\t"
2120 "msac.l %%d1, %%d0, (17*4, %[X]), %%d0, %%acc0\n\t"
2121 "move.l #0x0ffc19fd, %%d1\n\t"
2122 "msac.l %%d1, %%d0, (%[X]), %%d0, %%acc0\n\t"
2123 "movclr.l %%acc0, %%d6\n\t"
2124 "asl.l #3, %%d6\n\t"
2125 "add.l %%d7, %%d6\n\t"
2126 "move.l %%d6, (8*4, %[x])\n\t"
2127 "neg.l %%d6\n\t"
2128 "move.l %%d6, (9*4, %[x])\n\t"
2129
2130 "move.l #0x0e313245, %%d1\n\t"
2131 "msac.l %%d1, %%d0, (2*4, %[X]), %%d0, %%acc0\n\t"
2132 "move.l #0x0bcbe352, %%d1\n\t"
2133 "mac.l %%d1, %%d0, (3*4, %[X]), %%d0, %%acc0\n\t"
2134 "move.l #0x0f9ee890, %%d1\n\t"
2135 "mac.l %%d1, %%d0, (5*4, %[X]), %%d0, %%acc0\n\t"
2136 "move.l #0x0898c779, %%d1\n\t"
2137 "msac.l %%d1, %%d0, (6*4, %[X]), %%d0, %%acc0\n\t"
2138 "move.l #0x0ffc19fd, %%d1\n\t"
2139 "msac.l %%d1, %%d0, (8*4, %[X]), %%d0, %%acc0\n\t"
2140 "move.l #0x04cfb0e2, %%d1\n\t"
2141 "mac.l %%d1, %%d0, (9*4, %[X]), %%d0, %%acc0\n\t"
2142 "move.l #0x0f426cb5, %%d1\n\t"
2143 "mac.l %%d1, %%d0, (11*4, %[X]), %%d0, %%acc0\n\t"
2144 "move.l #0x00b2aa3e, %%d1\n\t"
2145 "msac.l %%d1, %%d0, (12*4, %[X]), %%d0, %%acc0\n\t"
2146 "move.l #0x0d7e8807, %%d1\n\t"
2147 "msac.l %%d1, %%d0, (14*4, %[X]), %%d0, %%acc0\n\t"
2148 "move.l #0x03768962, %%d1\n\t"
2149 "msac.l %%d1, %%d0, (15*4, %[X]), %%d0, %%acc0\n\t"
2150 "move.l #0x0acf37ad, %%d1\n\t"
2151 "mac.l %%d1, %%d0, (17*4, %[X]), %%d0, %%acc0\n\t"
2152 "move.l #0x07635284, %%d1\n\t"
2153 "mac.l %%d1, %%d0, (%[X]), %%d0, %%acc0\n\t"
2154 "movclr.l %%acc0, %%d6\n\t"
2155 "asl.l #3, %%d6\n\t"
2156 "add.l %%d7, %%d6\n\t"
2157 "move.l %%d6, (21*4, %[x])\n\t"
2158 "move.l %%d6, (32*4, %[x])\n\t"
2159
2160 "move.l #0x0d7e8807, %%d1\n\t"
2161 "msac.l %%d1, %%d0, (2*4, %[X]), %%d0, %%acc0\n\t"
2162 "move.l #0x0f426cb5, %%d1\n\t"
2163 "mac.l %%d1, %%d0, (3*4, %[X]), %%d0, %%acc0\n\t"
2164 "move.l #0x0acf37ad, %%d1\n\t"
2165 "mac.l %%d1, %%d0, (5*4, %[X]), %%d0, %%acc0\n\t"
2166 "move.l #0x0ffc19fd, %%d1\n\t"
2167 "msac.l %%d1, %%d0, (6*4, %[X]), %%d0, %%acc0\n\t"
2168 "move.l #0x07635284, %%d1\n\t"
2169 "msac.l %%d1, %%d0, (8*4, %[X]), %%d0, %%acc0\n\t"
2170 "move.l #0x0f9ee890, %%d1\n\t"
2171 "mac.l %%d1, %%d0, (9*4, %[X]), %%d0, %%acc0\n\t"
2172 "move.l #0x03768962, %%d1\n\t"
2173 "mac.l %%d1, %%d0, (11*4, %[X]), %%d0, %%acc0\n\t"
2174 "move.l #0x0e313245, %%d1\n\t"
2175 "msac.l %%d1, %%d0, (12*4, %[X]), %%d0, %%acc0\n\t"
2176 "move.l #0x00b2aa3e, %%d1\n\t"
2177 "mac.l %%d1, %%d0, (14*4, %[X]), %%d0, %%acc0\n\t"
2178 "move.l #0x0bcbe352, %%d1\n\t"
2179 "mac.l %%d1, %%d0, (15*4, %[X]), %%d0, %%acc0\n\t"
2180 "move.l #0x04cfb0e2, %%d1\n\t"
2181 "msac.l %%d1, %%d0, (17*4, %[X]), %%d0, %%acc0\n\t"
2182 "move.l #0x0898c779, %%d1\n\t"
2183 "msac.l %%d1, %%d0, (14*4, %[t]), %%d0, %%acc0\n\t"
2184 "movclr.l %%acc0, %%d6\n\t"
2185 "asl.l #3, %%d6\n\t"
2186 "sub.l %%d7, %%d6\n\t"
2187 "move.l %%d6, (20*4, %[x])\n\t"
2188 "move.l %%d6, (33*4, %[x])\n\t"
2189
2190 "move.l #0x0ec835e8, %%d1\n\t"
2191 "msac.l %%d1, %%d0, (15*4, %[t]), %%d0, %%acc0\n\t"
2192 "move.l #0x061f78aa, %%d1\n\t"
2193 "mac.l %%d1, %%d0, (12*4, %[t]), %%d0, %%acc0\n\t"
2194 "movclr.l %%acc0, %%d6\n\t"
2195 "asl.l #3, %%d6\n\t"
2196 "sub.l %%d5, %%d6\n\t"
2197 "move.l %%d6, (4*4, %[t])\n\t"
2198
2199 "move.l #0x061f78aa, %%d1\n\t"
2200 "mac.l %%d1, %%d0, (13*4, %[t]), %%d0, %%acc0\n\t"
2201 "move.l #0x0ec835e8, %%d1\n\t"
2202 "mac.l %%d1, %%d0, (8*4, %[t]), %%d0, %%acc0\n\t"
2203 "movclr.l %%acc0, %%d7\n\t" /* don't need t7 anymore */
2204 "asl.l #3, %%d7\n\t"
2205 "add.l %%d6, %%d7\n\t"
2206 "move.l %%d7, (4*4, %[x])\n\t"
2207 "neg.l %%d7\n\t"
2208 "move.l %%d7, (13*4, %[x])\n\t"
2209
2210 "move.l #0x09bd7ca0, %%d1\n\t"
2211 "mac.l %%d1, %%d0, (9*4, %[t]), %%d0, %%acc0\n\t"
2212 "move.l #0x0216a2a2, %%d1\n\t"
2213 "msac.l %%d1, %%d0, (10*4, %[t]), %%d0, %%acc0\n\t"
2214 "move.l #0x0fdcf549, %%d1\n\t"
2215 "mac.l %%d1, %%d0, (11*4, %[t]), %%d0, %%acc0\n\t"
2216 "move.l #0x0cb19346, %%d1\n\t"
2217 "msac.l %%d1, %%d0, (8*4, %[t]), %%d0, %%acc0\n\t"
2218 "movclr.l %%acc0, %%d7\n\t"
2219 "asl.l #3, %%d7\n\t"
2220 "add.l %%d6, %%d7\n\t"
2221 "move.l %%d7, (1*4, %[x])\n\t"
2222 "neg.l %%d7\n\t"
2223 "move.l %%d7, (16*4, %[x])\n\t"
2224
2225 "move.l #0x0fdcf549, %%d1\n\t"
2226 "msac.l %%d1, %%d0, (9*4, %[t]), %%d0, %%acc0\n\t"
2227 "move.l #0x0cb19346, %%d1\n\t"
2228 "msac.l %%d1, %%d0, (10*4, %[t]), %%d0, %%acc0\n\t"
2229 "move.l #0x09bd7ca0, %%d1\n\t"
2230 "msac.l %%d1, %%d0, (11*4, %[t]), %%d0, %%acc0\n\t"
2231 "move.l #0x0216a2a2, %%d1\n\t"
2232 "msac.l %%d1, %%d0, (1*4, %[X]), %%d0, %%acc0\n\t"
2233 "movclr.l %%acc0, %%d7\n\t"
2234 "asl.l #3, %%d7\n\t"
2235 "add.l %%d6, %%d7\n\t"
2236 "move.l %%d7, (25*4, %[x])\n\t"
2237 "move.l %%d7, (28*4, %[x])\n\t"
2238
2239 "move.l #0x0fdcf549, %%d1\n\t"
2240 "msac.l %%d1, %%d0, (7*4, %[X]), %%d0, %%acc0\n\t"
2241 "move.l #0x0cb19346, %%d1\n\t"
2242 "msac.l %%d1, %%d0, (10*4, %[X]), %%d0, %%acc0\n\t"
2243 "move.l #0x09bd7ca0, %%d1\n\t"
2244 "msac.l %%d1, %%d0, (16*4, %[X]), %%d0, %%acc0\n\t"
2245 "move.l #0x0216a2a2, %%d1\n\t"
2246 "msac.l %%d1, %%d0, (%[X]), %%d0, %%acc0\n\t"
2247 "movclr.l %%acc0, %%d6\n\t"
2248 "asl.l #3, %%d6\n\t"
2249 "sub.l (6*4, %[t]), %%d6\n\t"
2250 "move.l %%d6, (5*4, %[t])\n\t"
2251
2252 "move.l #0x0898c779, %%d1\n\t"
2253 "mac.l %%d1, %%d0, (2*4, %[X]), %%d0, %%acc0\n\t"
2254 "move.l #0x04cfb0e2, %%d1\n\t"
2255 "mac.l %%d1, %%d0, (3*4, %[X]), %%d0, %%acc0\n\t"
2256 "move.l #0x0bcbe352, %%d1\n\t"
2257 "mac.l %%d1, %%d0, (5*4, %[X]), %%d0, %%acc0\n\t"
2258 "move.l #0x00b2aa3e, %%d1\n\t"
2259 "mac.l %%d1, %%d0, (6*4, %[X]), %%d0, %%acc0\n\t"
2260 "move.l #0x0e313245, %%d1\n\t"
2261 "mac.l %%d1, %%d0, (8*4, %[X]), %%d0, %%acc0\n\t"
2262 "move.l #0x03768962, %%d1\n\t"
2263 "msac.l %%d1, %%d0, (9*4, %[X]), %%d0, %%acc0\n\t"
2264 "move.l #0x0f9ee890, %%d1\n\t"
2265 "mac.l %%d1, %%d0, (11*4, %[X]), %%d0, %%acc0\n\t"
2266 "move.l #0x07635284, %%d1\n\t"
2267 "msac.l %%d1, %%d0, (12*4, %[X]), %%d0, %%acc0\n\t"
2268 "move.l #0x0ffc19fd, %%d1\n\t"
2269 "mac.l %%d1, %%d0, (14*4, %[X]), %%d0, %%acc0\n\t"
2270 "move.l #0x0acf37ad, %%d1\n\t"
2271 "msac.l %%d1, %%d0, (15*4, %[X]), %%d0, %%acc0\n\t"
2272 "move.l #0x0f426cb5, %%d1\n\t"
2273 "mac.l %%d1, %%d0, (17*4, %[X]), %%d0, %%acc0\n\t"
2274 "move.l #0x0d7e8807, %%d1\n\t"
2275 "msac.l %%d1, %%d0, (%[X]), %%d0, %%acc0\n\t"
2276 "movclr.l %%acc0, %%d7\n\t"
2277 "asl.l #3, %%d7\n\t"
2278 "add.l %%d6, %%d7\n\t"
2279 "move.l %%d7, (2*4, %[x])\n\t"
2280 "neg.l %%d7\n\t"
2281 "move.l %%d7, (15*4, %[x])\n\t"
2282
2283 "move.l #0x07635284, %%d1\n\t"
2284 "mac.l %%d1, %%d0, (2*4, %[X]), %%d0, %%acc0\n\t"
2285 "move.l #0x0acf37ad, %%d1\n\t"
2286 "mac.l %%d1, %%d0, (3*4, %[X]), %%d0, %%acc0\n\t"
2287 "move.l #0x03768962, %%d1\n\t"
2288 "mac.l %%d1, %%d0, (5*4, %[X]), %%d0, %%acc0\n\t"
2289 "move.l #0x0d7e8807, %%d1\n\t"
2290 "mac.l %%d1, %%d0, (6*4, %[X]), %%d0, %%acc0\n\t"
2291 "move.l #0x00b2aa3e, %%d1\n\t"
2292 "msac.l %%d1, %%d0, (8*4, %[X]), %%d0, %%acc0\n\t"
2293 "move.l #0x0f426cb5, %%d1\n\t"
2294 "mac.l %%d1, %%d0, (9*4, %[X]), %%d0, %%acc0\n\t"
2295 "move.l #0x04cfb0e2, %%d1\n\t"
2296 "msac.l %%d1, %%d0, (11*4, %[X]), %%d0, %%acc0\n\t"
2297 "move.l #0x0ffc19fd, %%d1\n\t"
2298 "mac.l %%d1, %%d0, (12*4, %[X]), %%d0, %%acc0\n\t"
2299 "move.l #0x0898c779, %%d1\n\t"
2300 "msac.l %%d1, %%d0, (14*4, %[X]), %%d0, %%acc0\n\t"
2301 "move.l #0x0f9ee890, %%d1\n\t"
2302 "mac.l %%d1, %%d0, (15*4, %[X]), %%d0, %%acc0\n\t"
2303 "move.l #0x0bcbe352, %%d1\n\t"
2304 "msac.l %%d1, %%d0, (17*4, %[X]), %%d0, %%acc0\n\t"
2305 "move.l #0x0e313245, %%d1\n\t"
2306 "mac.l %%d1, %%d0, (%[X]), %%d0, %%acc0\n\t"
2307 "movclr.l %%acc0, %%d7\n\t"
2308 "asl.l #3, %%d7\n\t"
2309 "add.l %%d6, %%d7\n\t"
2310 "move.l %%d7, (3*4, %[x])\n\t"
2311 "neg.l %%d7\n\t"
2312 "move.l %%d7, (14*4, %[x])\n\t"
2313
2314 "move.l #0x0ffc19fd, %%d1\n\t"
2315 "msac.l %%d1, %%d0, (2*4, %[X]), %%d0, %%acc0\n\t"
2316 "move.l #0x0f9ee890, %%d1\n\t"
2317 "msac.l %%d1, %%d0, (3*4, %[X]), %%d0, %%acc0\n\t"
2318 "move.l #0x0f426cb5, %%d1\n\t"
2319 "msac.l %%d1, %%d0, (5*4, %[X]), %%d0, %%acc0\n\t"
2320 "move.l #0x0e313245, %%d1\n\t"
2321 "msac.l %%d1, %%d0, (6*4, %[X]), %%d0, %%acc0\n\t"
2322 "move.l #0x0d7e8807, %%d1\n\t"
2323 "msac.l %%d1, %%d0, (8*4, %[X]), %%d0, %%acc0\n\t"
2324 "move.l #0x0bcbe352, %%d1\n\t"
2325 "msac.l %%d1, %%d0, (9*4, %[X]), %%d0, %%acc0\n\t"
2326 "move.l #0x0acf37ad, %%d1\n\t"
2327 "msac.l %%d1, %%d0, (11*4, %[X]), %%d0, %%acc0\n\t"
2328 "move.l #0x0898c779, %%d1\n\t"
2329 "msac.l %%d1, %%d0, (12*4, %[X]), %%d0, %%acc0\n\t"
2330 "move.l #0x07635284, %%d1\n\t"
2331 "msac.l %%d1, %%d0, (14*4, %[X]), %%d0, %%acc0\n\t"
2332 "move.l #0x04cfb0e2, %%d1\n\t"
2333 "msac.l %%d1, %%d0, (15*4, %[X]), %%d0, %%acc0\n\t"
2334 "move.l #0x03768962, %%d1\n\t"
2335 "msac.l %%d1, %%d0, (17*4, %[X]), %%d0, %%acc0\n\t"
2336 "move.l #0x00b2aa3e, %%d1\n\t"
2337 "msac.l %%d1, %%d0, (%[X]), %%d0, %%acc0\n\t"
2338 "movclr.l %%acc0, %%d7\n\t"
2339 "asl.l #3, %%d7\n\t"
2340 "add.l %%d6, %%d7\n\t"
2341 "move.l %%d7, (26*4, %[x])\n\t"
2342 "move.l %%d7, (27*4, %[x])\n\t"
2343 : : [X] "a" (X), [x] "a" (x), [t] "a" (t)
2344 : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7");
2345 /* pfew */
2346}
2347
2348#else
2349
1769static inline 2350static inline
1770void imdct36(mad_fixed_t const X[18], mad_fixed_t x[36]) 2351void imdct36(mad_fixed_t const X[18], mad_fixed_t x[36])
1771{ 2352{
@@ -2052,6 +2633,8 @@ void imdct36(mad_fixed_t const X[18], mad_fixed_t x[36])
2052 2633
2053 x[26] = x[27] = MAD_F_MLZ(hi, lo) + t5; 2634 x[26] = x[27] = MAD_F_MLZ(hi, lo) + t5;
2054} 2635}
2636#endif /* MCF5249 */
2637
2055# endif 2638# endif
2056 2639
2057/* 2640/*
@@ -2147,112 +2730,7 @@ void III_imdct_l(mad_fixed_t const X[18], mad_fixed_t z[36],
2147 */ 2730 */
2148 2731
2149# if CONFIG_CPU==MCF5249 && !defined(SIMULATOR) 2732# if CONFIG_CPU==MCF5249 && !defined(SIMULATOR)
2150/* this should probably be stuffed in a .S file somewhere, it's almost 2733void III_imdct_s(mad_fixed_t const X[18], mad_fixed_t z[36]);
2151 100% asm as it is.
2152 */
2153static
2154void III_imdct_s(mad_fixed_t const X[18], mad_fixed_t z[36])
2155{
2156 mad_fixed_t y[36], *yptr;
2157 mad_fixed_t const *wptr;
2158
2159 /* IMDCT */
2160 yptr = &y[0];
2161
2162 /* if additional precision is needed in this block, it is possible to
2163 * get more low bits out of the accext01 register _before_ doing the
2164 * movclrs.
2165 */
2166 asm volatile (
2167 "move.l #0x000000b0, %%macsr\n\t" /* frac. mode, saturation, rounding */
2168 "suba.l %%a0, %%a0\n\t" /* clear loop variable */
2169 ".align 2\n\t.imdctloop:\n\t" /* outer loop label */
2170 "lea.l imdct_s, %%a1\n\t" /* load pointer to imdct coefs in a1 */
2171 "movem.l (%[X]), %%d0-%%d5\n\t" /* load input data in d0-d5 */
2172
2173 "clr.l %%d7\n\t" /* init loop variable */
2174 "move.l (%%a1)+, %%a5\n\t" /* load imdct coef in a5 */
2175 ".align 2\n\t.macloop:\n\t" /* inner loop label */
2176 "mac.l %%d0, %%a5, (%%a1)+, %%a5, %%acc0\n\t" /* mac sequence */
2177 "mac.l %%d1, %%a5, (%%a1)+, %%a5, %%acc0\n\t"
2178 "mac.l %%d2, %%a5, (%%a1)+, %%a5, %%acc0\n\t"
2179 "mac.l %%d3, %%a5, (%%a1)+, %%a5, %%acc0\n\t"
2180 "mac.l %%d4, %%a5, (%%a1)+, %%a5, %%acc0\n\t"
2181 "mac.l %%d5, %%a5, (%%a1)+, %%a5, %%acc0\n\t"
2182 "movclr.l %%acc0, %%d6\n\t" /* get result, left shifted once */
2183 "asl.l #3, %%d6\n\t" /* got one shift free, shift three more */
2184 "mov.l %%d6, (%[yptr], %%d7.l*4)\n\t" /* yptr[i] = result */
2185 "neg.l %%d6\n\t"
2186 "neg.l %%d7\n\t"
2187 "mov.l %%d6, (5*4, %[yptr], %%d7.l*4)\n\t" /* yptr[5 - 1] = -result */
2188 "mac.l %%d0, %%a5, (%%a1)+, %%a5, %%acc0\n\t" /* mac sequence */
2189 "mac.l %%d1, %%a5, (%%a1)+, %%a5, %%acc0\n\t"
2190 "mac.l %%d2, %%a5, (%%a1)+, %%a5, %%acc0\n\t"
2191 "mac.l %%d3, %%a5, (%%a1)+, %%a5, %%acc0\n\t"
2192 "mac.l %%d4, %%a5, (%%a1)+, %%a5, %%acc0\n\t"
2193 "mac.l %%d5, %%a5, (%%a1)+, %%a5, %%acc0\n\t"
2194 "movclr.l %%acc0, %%d6\n\t" /* get result */
2195 "asl.l #3, %%d6\n\t"
2196 "mov.l %%d6, (11*4, %[yptr], %%d7.l*4)\n\t" /* yptr[11 - i] = result*/
2197 "neg.l %%d7\n\t"
2198 "mov.l %%d6, (6*4, %[yptr], %%d7.l*4)\n\t" /* yptr[i + 6] = result */
2199 "addq.l #1, %%d7\n\t" /* increment inner loop variable */
2200 "cmp.l #3, %%d7\n\t" /* we do three inner loop iterations */
2201 "jne .macloop\n\t"
2202
2203 "adda.l #48, %[yptr]\n\t" /* add pointer increment */
2204 "adda.l #24, %[X]\n\t"
2205 "addq.l #1, %%a0\n\t" /* increment outer loop variable */
2206 "cmpa.l #3, %%a0\n\t" /* we do three outer loop iterations */
2207 "jne .imdctloop\n\t"
2208 : [X] "+a" (X), [yptr] "+a" (yptr)
2209 : : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "a0", "a1", "a5");
2210
2211 /* windowing, overlapping and concatenation */
2212
2213 yptr = &y[0];
2214 wptr = &window_s[0];
2215
2216 asm volatile (
2217 "clr.l %%d7\n\t"
2218 ".align 2\n\t.overlaploop:\n\t"
2219 "clr.l (%[z], %%d7.l*4)\n\t" /* z[i + 0] = 0 */
2220 "move.l (%[wptr]), %%d0\n\t"
2221 "move.l (%[yptr]), %%d2\n\t"
2222 "mac.l %%d0, %%d2, 24(%[wptr]), %%d1, %%acc0\n\t"
2223 "movclr.l %%acc0, %%d6\n\t"
2224 "asl.l #3, %%d6\n\t"
2225 "move.l %%d6, (6*4, %[z], %%d7.l*4)\n\t" /* z[i + 6] = result */
2226
2227 "move.l 24(%[yptr]), %%d2\n\t"
2228 "mac.l %%d1, %%d2, 48(%[yptr]), %%d2, %%acc0\n\t"
2229 "mac.l %%d0, %%d2, 72(%[yptr]), %%d2, %%acc0\n\t"
2230 "movclr.l %%acc0, %%d6\n\t"
2231 "asl.l #3, %%d6\n\t"
2232 "move.l %%d6, (12*4, %[z], %%d7.l*4)\n\t" /* z[i + 12] = result */
2233
2234 "mac.l %%d1, %%d2, (24*4, %[yptr]), %%d2, %%acc0\n\t"
2235 "mac.l %%d0, %%d2, (30*4, %[yptr]), %%d2, %%acc0\n\t"
2236 "movclr.l %%acc0, %%d6\n\t"
2237 "asl.l #3, %%d6\n\t"
2238 "move.l %%d6, (18*4, %[z], %%d7.l*4)\n\t" /* z[i + 18] = result */
2239
2240 "mac.l %%d1, %%d2, %%acc0\n\t"
2241 "movclr.l %%acc0, %%d6\n\t"
2242 "asl.l #3, %%d6\n\t"
2243 "move.l %%d6, (24*4, %[z], %%d7.l*4)\n\t" /* z[i + 24] = result */
2244
2245 "clr.l (30*4, %[z], %%d7.l*4)\n\t" /* z[i + 30] = 0 */
2246 "addq.l #1, %%d7\n\t"
2247 "addq.l #4, %[yptr]\n\t"
2248 "addq.l #4, %[wptr]\n\t"
2249 "cmp.l #6, %%d7\n\t" /* six iterations */
2250 "jne .overlaploop\n\t"
2251 : [yptr] "+a" (yptr), [wptr] "+a" (wptr)
2252 : [z] "a" (z)
2253 : "d7");
2254}
2255
2256#else 2734#else
2257 2735
2258static 2736static