summaryrefslogtreecommitdiff
path: root/apps
diff options
context:
space:
mode:
authorThom Johansen <thomj@rockbox.org>2005-03-06 22:13:44 +0000
committerThom Johansen <thomj@rockbox.org>2005-03-06 22:13:44 +0000
commite78186f4cfe0adae87cf499b73a75807d019d9bc (patch)
tree3d051bd1bbc80a5d43bce3bc7b238e76755425cc /apps
parent6a33dd761eab41585fc1eaed933e561724454d99 (diff)
downloadrockbox-e78186f4cfe0adae87cf499b73a75807d019d9bc.tar.gz
rockbox-e78186f4cfe0adae87cf499b73a75807d019d9bc.zip
Added asm optimized short block IMDCT and windowing.
Removed a warning in synth.c. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@6159 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps')
-rw-r--r--apps/codecs/libmad/layer3.c112
-rw-r--r--apps/codecs/libmad/synth.c7
2 files changed, 116 insertions, 3 deletions
diff --git a/apps/codecs/libmad/layer3.c b/apps/codecs/libmad/layer3.c
index b1a9919af0..27c8d18430 100644
--- a/apps/codecs/libmad/layer3.c
+++ b/apps/codecs/libmad/layer3.c
@@ -2144,6 +2144,116 @@ void III_imdct_l(mad_fixed_t const X[18], mad_fixed_t z[36],
2144 * NAME: III_imdct_s() 2144 * NAME: III_imdct_s()
2145 * DESCRIPTION: perform IMDCT and windowing for short blocks 2145 * DESCRIPTION: perform IMDCT and windowing for short blocks
2146 */ 2146 */
2147
2148# if CONFIG_CPU==MCF5249 && !defined(SIMULATOR)
2149/* this should probably be stuffed in a .S file somewhere, it's almost
2150 100% asm as it is.
2151 */
2152static
2153void III_imdct_s(mad_fixed_t const X[18], mad_fixed_t z[36])
2154{
2155 mad_fixed_t y[36], *yptr;
2156 mad_fixed_t const *wptr;
2157
2158 /* IMDCT */
2159 yptr = &y[0];
2160
2161 /* if additional precision is needed in this block, it is possible to
2162 * get more low bits out of the accext01 register _before_ doing the
2163 * movclrs.
2164 */
2165 asm volatile (
2166 "move.l #0x000000b0, %%macsr\n\t" /* frac. mode, saturation, rounding */
2167 "suba.l %%a0, %%a0\n\t" /* clear loop variable */
2168 ".align 2\n\t.imdctloop:\n\t" /* outer loop label */
2169 "lea.l imdct_s, %%a1\n\t" /* load pointer to imdct coefs in a1 */
2170 "movem.l (%[X]), %%d0-%%d5\n\t" /* load input data in d0-d5 */
2171
2172 "clr.l %%d7\n\t" /* init loop variable */
2173 "move.l (%%a1)+, %%a5\n\t" /* load imdct coef in a5 */
2174 ".align 2\n\t.macloop:\n\t" /* inner loop label */
2175 "mac.l %%d0, %%a5, (%%a1)+, %%a5, %%acc0\n\t" /* mac sequence */
2176 "mac.l %%d1, %%a5, (%%a1)+, %%a5, %%acc0\n\t"
2177 "mac.l %%d2, %%a5, (%%a1)+, %%a5, %%acc0\n\t"
2178 "mac.l %%d3, %%a5, (%%a1)+, %%a5, %%acc0\n\t"
2179 "mac.l %%d4, %%a5, (%%a1)+, %%a5, %%acc0\n\t"
2180 "mac.l %%d5, %%a5, (%%a1)+, %%a5, %%acc0\n\t"
2181 "movclr.l %%acc0, %%d6\n\t" /* get result, left shifted once */
2182 "asl.l #3, %%d6\n\t" /* got one shift free, shift three more */
2183 "mov.l %%d6, (%[yptr], %%d7.l*4)\n\t" /* yptr[i] = result */
2184 "neg.l %%d6\n\t"
2185 "neg.l %%d7\n\t"
2186 "mov.l %%d6, (5*4, %[yptr], %%d7.l*4)\n\t" /* yptr[5 - 1] = -result */
2187 "mac.l %%d0, %%a5, (%%a1)+, %%a5, %%acc0\n\t" /* mac sequence */
2188 "mac.l %%d1, %%a5, (%%a1)+, %%a5, %%acc0\n\t"
2189 "mac.l %%d2, %%a5, (%%a1)+, %%a5, %%acc0\n\t"
2190 "mac.l %%d3, %%a5, (%%a1)+, %%a5, %%acc0\n\t"
2191 "mac.l %%d4, %%a5, (%%a1)+, %%a5, %%acc0\n\t"
2192 "mac.l %%d5, %%a5, (%%a1)+, %%a5, %%acc0\n\t"
2193 "movclr.l %%acc0, %%d6\n\t" /* get result */
2194 "asl.l #3, %%d6\n\t"
2195 "mov.l %%d6, (11*4, %[yptr], %%d7.l*4)\n\t" /* yptr[11 - i] = result*/
2196 "neg.l %%d7\n\t"
2197 "mov.l %%d6, (6*4, %[yptr], %%d7.l*4)\n\t" /* yptr[i + 6] = result */
2198 "addq.l #1, %%d7\n\t" /* increment inner loop variable */
2199 "cmp.l #3, %%d7\n\t" /* we do three inner loop iterations */
2200 "jne .macloop\n\t"
2201
2202 "adda.l #48, %[yptr]\n\t" /* add pointer increment */
2203 "adda.l #24, %[X]\n\t"
2204 "addq.l #1, %%a0\n\t" /* increment outer loop variable */
2205 "cmpa.l #3, %%a0\n\t" /* we do three outer loop iterations */
2206 "jne .imdctloop\n\t"
2207 : [X] "+a" (X), [yptr] "+a" (yptr)
2208 : : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "a0", "a1", "a5");
2209
2210 /* windowing, overlapping and concatenation */
2211
2212 yptr = &y[0];
2213 wptr = &window_s[0];
2214
2215 asm volatile (
2216 "clr.l %%d7\n\t"
2217 ".align 2\n\t.overlaploop:\n\t"
2218 "clr.l (%[z], %%d7.l*4)\n\t" /* z[i + 0] = 0 */
2219 "move.l (%[wptr]), %%d0\n\t"
2220 "move.l (%[yptr]), %%d2\n\t"
2221 "mac.l %%d0, %%d2, 24(%[wptr]), %%d1, %%acc0\n\t"
2222 "movclr.l %%acc0, %%d6\n\t"
2223 "asl.l #3, %%d6\n\t"
2224 "move.l %%d6, (6*4, %[z], %%d7.l*4)\n\t" /* z[i + 6] = result */
2225
2226 "move.l 24(%[yptr]), %%d2\n\t"
2227 "mac.l %%d1, %%d2, 48(%[yptr]), %%d2, %%acc0\n\t"
2228 "mac.l %%d0, %%d2, 72(%[yptr]), %%d2, %%acc0\n\t"
2229 "movclr.l %%acc0, %%d6\n\t"
2230 "asl.l #3, %%d6\n\t"
2231 "move.l %%d6, (12*4, %[z], %%d7.l*4)\n\t" /* z[i + 12] = result */
2232
2233 "mac.l %%d1, %%d2, (24*4, %[yptr]), %%d2, %%acc0\n\t"
2234 "mac.l %%d0, %%d2, (30*4, %[yptr]), %%d2, %%acc0\n\t"
2235 "movclr.l %%acc0, %%d6\n\t"
2236 "asl.l #3, %%d6\n\t"
2237 "move.l %%d6, (18*4, %[z], %%d7.l*4)\n\t" /* z[i + 18] = result */
2238
2239 "mac.l %%d1, %%d2, %%acc0\n\t"
2240 "movclr.l %%acc0, %%d6\n\t"
2241 "asl.l #3, %%d6\n\t"
2242 "move.l %%d6, (24*4, %[z], %%d7.l*4)\n\t" /* z[i + 24] = result */
2243
2244 "clr.l (30*4, %[z], %%d7.l*4)\n\t" /* z[i + 30] = 0 */
2245 "addq.l #1, %%d7\n\t"
2246 "addq.l #4, %[yptr]\n\t"
2247 "addq.l #4, %[wptr]\n\t"
2248 "cmp.l #6, %%d7\n\t" /* six iterations */
2249 "jne .overlaploop\n\t"
2250 : [yptr] "+a" (yptr), [wptr] "+a" (wptr)
2251 : [z] "a" (z)
2252 : "d7");
2253}
2254
2255#else
2256
2147static 2257static
2148void III_imdct_s(mad_fixed_t const X[18], mad_fixed_t z[36]) 2258void III_imdct_s(mad_fixed_t const X[18], mad_fixed_t z[36])
2149{ 2259{
@@ -2219,6 +2329,8 @@ void III_imdct_s(mad_fixed_t const X[18], mad_fixed_t z[36])
2219 } 2329 }
2220} 2330}
2221 2331
2332#endif
2333
2222/* 2334/*
2223 * NAME: III_overlap() 2335 * NAME: III_overlap()
2224 * DESCRIPTION: perform overlap-add of windowed IMDCT outputs 2336 * DESCRIPTION: perform overlap-add of windowed IMDCT outputs
diff --git a/apps/codecs/libmad/synth.c b/apps/codecs/libmad/synth.c
index 530f33cdf6..2d212c091f 100644
--- a/apps/codecs/libmad/synth.c
+++ b/apps/codecs/libmad/synth.c
@@ -24,6 +24,7 @@
24# endif 24# endif
25 25
26# include "global.h" 26# include "global.h"
27
27# include "fixed.h" 28# include "fixed.h"
28# include "frame.h" 29# include "frame.h"
29# include "synth.h" 30# include "synth.h"
@@ -100,6 +101,7 @@ void mad_synth_mute(struct mad_synth *synth)
100# endif 101# endif
101 102
102/* possible DCT speed optimization */ 103/* possible DCT speed optimization */
104
103# if defined(OPT_SPEED) && defined(MAD_F_MLX) 105# if defined(OPT_SPEED) && defined(MAD_F_MLX)
104# define OPT_DCTO 106# define OPT_DCTO
105# define MUL(x, y) \ 107# define MUL(x, y) \
@@ -112,6 +114,7 @@ void mad_synth_mute(struct mad_synth *synth)
112# undef OPT_DCTO 114# undef OPT_DCTO
113# define MUL(x, y) mad_f_mul((x), (y)) 115# define MUL(x, y) mad_f_mul((x), (y))
114# endif 116# endif
117
115/* 118/*
116 * NAME: dct32() 119 * NAME: dct32()
117 * DESCRIPTION: perform fast in[32]->out[32] DCT 120 * DESCRIPTION: perform fast in[32]->out[32] DCT
@@ -547,7 +550,6 @@ mad_fixed_t const D[17][32] __attribute__ ((section(".idata"))) = {
547void synth_full(struct mad_synth *, struct mad_frame const *, 550void synth_full(struct mad_synth *, struct mad_frame const *,
548 unsigned int, unsigned int); 551 unsigned int, unsigned int);
549# else 552# else
550
551/* 553/*
552 * NAME: synth->full() 554 * NAME: synth->full()
553 * DESCRIPTION: perform full frequency PCM synthesis 555 * DESCRIPTION: perform full frequency PCM synthesis
@@ -563,7 +565,7 @@ void synth_full(struct mad_synth *synth, struct mad_frame const *frame,
563 mad_fixed_t *pcm1, *pcm2, (*filter)[2][2][16][8]; 565 mad_fixed_t *pcm1, *pcm2, (*filter)[2][2][16][8];
564 mad_fixed_t const (*sbsample)[36][32]; 566 mad_fixed_t const (*sbsample)[36][32];
565 mad_fixed_t (*fe)[8], (*fx)[8], (*fo)[8]; 567 mad_fixed_t (*fe)[8], (*fx)[8], (*fo)[8];
566 mad_fixed_t const (*Dptr)[32], *ptr; 568 mad_fixed_t const (*Dptr)[32];
567 mad_fixed64hi_t hi = 0; 569 mad_fixed64hi_t hi = 0;
568 mad_fixed64lo_t lo; 570 mad_fixed64lo_t lo;
569 571
@@ -1010,4 +1012,3 @@ void mad_synth_frame(struct mad_synth *synth, struct mad_frame const *frame)
1010 1012
1011 synth->phase = (synth->phase + ns) % 16; 1013 synth->phase = (synth->phase + ns) % 16;
1012} 1014}
1013