diff options
author | Thom Johansen <thomj@rockbox.org> | 2005-03-06 22:13:44 +0000 |
---|---|---|
committer | Thom Johansen <thomj@rockbox.org> | 2005-03-06 22:13:44 +0000 |
commit | e78186f4cfe0adae87cf499b73a75807d019d9bc (patch) | |
tree | 3d051bd1bbc80a5d43bce3bc7b238e76755425cc /apps/codecs | |
parent | 6a33dd761eab41585fc1eaed933e561724454d99 (diff) | |
download | rockbox-e78186f4cfe0adae87cf499b73a75807d019d9bc.tar.gz rockbox-e78186f4cfe0adae87cf499b73a75807d019d9bc.zip |
Added asm optimized short block IMDCT and windowing.
Removed a warning in synth.c.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@6159 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs')
-rw-r--r-- | apps/codecs/libmad/layer3.c | 112 | ||||
-rw-r--r-- | apps/codecs/libmad/synth.c | 7 |
2 files changed, 116 insertions, 3 deletions
diff --git a/apps/codecs/libmad/layer3.c b/apps/codecs/libmad/layer3.c index b1a9919af0..27c8d18430 100644 --- a/apps/codecs/libmad/layer3.c +++ b/apps/codecs/libmad/layer3.c | |||
@@ -2144,6 +2144,116 @@ void III_imdct_l(mad_fixed_t const X[18], mad_fixed_t z[36], | |||
2144 | * NAME: III_imdct_s() | 2144 | * NAME: III_imdct_s() |
2145 | * DESCRIPTION: perform IMDCT and windowing for short blocks | 2145 | * DESCRIPTION: perform IMDCT and windowing for short blocks |
2146 | */ | 2146 | */ |
2147 | |||
2148 | # if CONFIG_CPU==MCF5249 && !defined(SIMULATOR) | ||
2149 | /* this should probably be stuffed in a .S file somewhere, it's almost | ||
2150 | 100% asm as it is. | ||
2151 | */ | ||
2152 | static | ||
2153 | void III_imdct_s(mad_fixed_t const X[18], mad_fixed_t z[36]) | ||
2154 | { | ||
2155 | mad_fixed_t y[36], *yptr; | ||
2156 | mad_fixed_t const *wptr; | ||
2157 | |||
2158 | /* IMDCT */ | ||
2159 | yptr = &y[0]; | ||
2160 | |||
2161 | /* if additional precision is needed in this block, it is possible to | ||
2162 | * get more low bits out of the accext01 register _before_ doing the | ||
2163 | * movclrs. | ||
2164 | */ | ||
2165 | asm volatile ( | ||
2166 | "move.l #0x000000b0, %%macsr\n\t" /* frac. mode, saturation, rounding */ | ||
2167 | "suba.l %%a0, %%a0\n\t" /* clear loop variable */ | ||
2168 | ".align 2\n\t.imdctloop:\n\t" /* outer loop label */ | ||
2169 | "lea.l imdct_s, %%a1\n\t" /* load pointer to imdct coefs in a1 */ | ||
2170 | "movem.l (%[X]), %%d0-%%d5\n\t" /* load input data in d0-d5 */ | ||
2171 | |||
2172 | "clr.l %%d7\n\t" /* init loop variable */ | ||
2173 | "move.l (%%a1)+, %%a5\n\t" /* load imdct coef in a5 */ | ||
2174 | ".align 2\n\t.macloop:\n\t" /* inner loop label */ | ||
2175 | "mac.l %%d0, %%a5, (%%a1)+, %%a5, %%acc0\n\t" /* mac sequence */ | ||
2176 | "mac.l %%d1, %%a5, (%%a1)+, %%a5, %%acc0\n\t" | ||
2177 | "mac.l %%d2, %%a5, (%%a1)+, %%a5, %%acc0\n\t" | ||
2178 | "mac.l %%d3, %%a5, (%%a1)+, %%a5, %%acc0\n\t" | ||
2179 | "mac.l %%d4, %%a5, (%%a1)+, %%a5, %%acc0\n\t" | ||
2180 | "mac.l %%d5, %%a5, (%%a1)+, %%a5, %%acc0\n\t" | ||
2181 | "movclr.l %%acc0, %%d6\n\t" /* get result, left shifted once */ | ||
2182 | "asl.l #3, %%d6\n\t" /* got one shift free, shift three more */ | ||
2183 | "mov.l %%d6, (%[yptr], %%d7.l*4)\n\t" /* yptr[i] = result */ | ||
2184 | "neg.l %%d6\n\t" | ||
2185 | "neg.l %%d7\n\t" | ||
2186 | "mov.l %%d6, (5*4, %[yptr], %%d7.l*4)\n\t" /* yptr[5 - 1] = -result */ | ||
2187 | "mac.l %%d0, %%a5, (%%a1)+, %%a5, %%acc0\n\t" /* mac sequence */ | ||
2188 | "mac.l %%d1, %%a5, (%%a1)+, %%a5, %%acc0\n\t" | ||
2189 | "mac.l %%d2, %%a5, (%%a1)+, %%a5, %%acc0\n\t" | ||
2190 | "mac.l %%d3, %%a5, (%%a1)+, %%a5, %%acc0\n\t" | ||
2191 | "mac.l %%d4, %%a5, (%%a1)+, %%a5, %%acc0\n\t" | ||
2192 | "mac.l %%d5, %%a5, (%%a1)+, %%a5, %%acc0\n\t" | ||
2193 | "movclr.l %%acc0, %%d6\n\t" /* get result */ | ||
2194 | "asl.l #3, %%d6\n\t" | ||
2195 | "mov.l %%d6, (11*4, %[yptr], %%d7.l*4)\n\t" /* yptr[11 - i] = result*/ | ||
2196 | "neg.l %%d7\n\t" | ||
2197 | "mov.l %%d6, (6*4, %[yptr], %%d7.l*4)\n\t" /* yptr[i + 6] = result */ | ||
2198 | "addq.l #1, %%d7\n\t" /* increment inner loop variable */ | ||
2199 | "cmp.l #3, %%d7\n\t" /* we do three inner loop iterations */ | ||
2200 | "jne .macloop\n\t" | ||
2201 | |||
2202 | "adda.l #48, %[yptr]\n\t" /* add pointer increment */ | ||
2203 | "adda.l #24, %[X]\n\t" | ||
2204 | "addq.l #1, %%a0\n\t" /* increment outer loop variable */ | ||
2205 | "cmpa.l #3, %%a0\n\t" /* we do three outer loop iterations */ | ||
2206 | "jne .imdctloop\n\t" | ||
2207 | : [X] "+a" (X), [yptr] "+a" (yptr) | ||
2208 | : : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "a0", "a1", "a5"); | ||
2209 | |||
2210 | /* windowing, overlapping and concatenation */ | ||
2211 | |||
2212 | yptr = &y[0]; | ||
2213 | wptr = &window_s[0]; | ||
2214 | |||
2215 | asm volatile ( | ||
2216 | "clr.l %%d7\n\t" | ||
2217 | ".align 2\n\t.overlaploop:\n\t" | ||
2218 | "clr.l (%[z], %%d7.l*4)\n\t" /* z[i + 0] = 0 */ | ||
2219 | "move.l (%[wptr]), %%d0\n\t" | ||
2220 | "move.l (%[yptr]), %%d2\n\t" | ||
2221 | "mac.l %%d0, %%d2, 24(%[wptr]), %%d1, %%acc0\n\t" | ||
2222 | "movclr.l %%acc0, %%d6\n\t" | ||
2223 | "asl.l #3, %%d6\n\t" | ||
2224 | "move.l %%d6, (6*4, %[z], %%d7.l*4)\n\t" /* z[i + 6] = result */ | ||
2225 | |||
2226 | "move.l 24(%[yptr]), %%d2\n\t" | ||
2227 | "mac.l %%d1, %%d2, 48(%[yptr]), %%d2, %%acc0\n\t" | ||
2228 | "mac.l %%d0, %%d2, 72(%[yptr]), %%d2, %%acc0\n\t" | ||
2229 | "movclr.l %%acc0, %%d6\n\t" | ||
2230 | "asl.l #3, %%d6\n\t" | ||
2231 | "move.l %%d6, (12*4, %[z], %%d7.l*4)\n\t" /* z[i + 12] = result */ | ||
2232 | |||
2233 | "mac.l %%d1, %%d2, (24*4, %[yptr]), %%d2, %%acc0\n\t" | ||
2234 | "mac.l %%d0, %%d2, (30*4, %[yptr]), %%d2, %%acc0\n\t" | ||
2235 | "movclr.l %%acc0, %%d6\n\t" | ||
2236 | "asl.l #3, %%d6\n\t" | ||
2237 | "move.l %%d6, (18*4, %[z], %%d7.l*4)\n\t" /* z[i + 18] = result */ | ||
2238 | |||
2239 | "mac.l %%d1, %%d2, %%acc0\n\t" | ||
2240 | "movclr.l %%acc0, %%d6\n\t" | ||
2241 | "asl.l #3, %%d6\n\t" | ||
2242 | "move.l %%d6, (24*4, %[z], %%d7.l*4)\n\t" /* z[i + 24] = result */ | ||
2243 | |||
2244 | "clr.l (30*4, %[z], %%d7.l*4)\n\t" /* z[i + 30] = 0 */ | ||
2245 | "addq.l #1, %%d7\n\t" | ||
2246 | "addq.l #4, %[yptr]\n\t" | ||
2247 | "addq.l #4, %[wptr]\n\t" | ||
2248 | "cmp.l #6, %%d7\n\t" /* six iterations */ | ||
2249 | "jne .overlaploop\n\t" | ||
2250 | : [yptr] "+a" (yptr), [wptr] "+a" (wptr) | ||
2251 | : [z] "a" (z) | ||
2252 | : "d7"); | ||
2253 | } | ||
2254 | |||
2255 | #else | ||
2256 | |||
2147 | static | 2257 | static |
2148 | void III_imdct_s(mad_fixed_t const X[18], mad_fixed_t z[36]) | 2258 | void III_imdct_s(mad_fixed_t const X[18], mad_fixed_t z[36]) |
2149 | { | 2259 | { |
@@ -2219,6 +2329,8 @@ void III_imdct_s(mad_fixed_t const X[18], mad_fixed_t z[36]) | |||
2219 | } | 2329 | } |
2220 | } | 2330 | } |
2221 | 2331 | ||
2332 | #endif | ||
2333 | |||
2222 | /* | 2334 | /* |
2223 | * NAME: III_overlap() | 2335 | * NAME: III_overlap() |
2224 | * DESCRIPTION: perform overlap-add of windowed IMDCT outputs | 2336 | * DESCRIPTION: perform overlap-add of windowed IMDCT outputs |
diff --git a/apps/codecs/libmad/synth.c b/apps/codecs/libmad/synth.c index 530f33cdf6..2d212c091f 100644 --- a/apps/codecs/libmad/synth.c +++ b/apps/codecs/libmad/synth.c | |||
@@ -24,6 +24,7 @@ | |||
24 | # endif | 24 | # endif |
25 | 25 | ||
26 | # include "global.h" | 26 | # include "global.h" |
27 | |||
27 | # include "fixed.h" | 28 | # include "fixed.h" |
28 | # include "frame.h" | 29 | # include "frame.h" |
29 | # include "synth.h" | 30 | # include "synth.h" |
@@ -100,6 +101,7 @@ void mad_synth_mute(struct mad_synth *synth) | |||
100 | # endif | 101 | # endif |
101 | 102 | ||
102 | /* possible DCT speed optimization */ | 103 | /* possible DCT speed optimization */ |
104 | |||
103 | # if defined(OPT_SPEED) && defined(MAD_F_MLX) | 105 | # if defined(OPT_SPEED) && defined(MAD_F_MLX) |
104 | # define OPT_DCTO | 106 | # define OPT_DCTO |
105 | # define MUL(x, y) \ | 107 | # define MUL(x, y) \ |
@@ -112,6 +114,7 @@ void mad_synth_mute(struct mad_synth *synth) | |||
112 | # undef OPT_DCTO | 114 | # undef OPT_DCTO |
113 | # define MUL(x, y) mad_f_mul((x), (y)) | 115 | # define MUL(x, y) mad_f_mul((x), (y)) |
114 | # endif | 116 | # endif |
117 | |||
115 | /* | 118 | /* |
116 | * NAME: dct32() | 119 | * NAME: dct32() |
117 | * DESCRIPTION: perform fast in[32]->out[32] DCT | 120 | * DESCRIPTION: perform fast in[32]->out[32] DCT |
@@ -547,7 +550,6 @@ mad_fixed_t const D[17][32] __attribute__ ((section(".idata"))) = { | |||
547 | void synth_full(struct mad_synth *, struct mad_frame const *, | 550 | void synth_full(struct mad_synth *, struct mad_frame const *, |
548 | unsigned int, unsigned int); | 551 | unsigned int, unsigned int); |
549 | # else | 552 | # else |
550 | |||
551 | /* | 553 | /* |
552 | * NAME: synth->full() | 554 | * NAME: synth->full() |
553 | * DESCRIPTION: perform full frequency PCM synthesis | 555 | * DESCRIPTION: perform full frequency PCM synthesis |
@@ -563,7 +565,7 @@ void synth_full(struct mad_synth *synth, struct mad_frame const *frame, | |||
563 | mad_fixed_t *pcm1, *pcm2, (*filter)[2][2][16][8]; | 565 | mad_fixed_t *pcm1, *pcm2, (*filter)[2][2][16][8]; |
564 | mad_fixed_t const (*sbsample)[36][32]; | 566 | mad_fixed_t const (*sbsample)[36][32]; |
565 | mad_fixed_t (*fe)[8], (*fx)[8], (*fo)[8]; | 567 | mad_fixed_t (*fe)[8], (*fx)[8], (*fo)[8]; |
566 | mad_fixed_t const (*Dptr)[32], *ptr; | 568 | mad_fixed_t const (*Dptr)[32]; |
567 | mad_fixed64hi_t hi = 0; | 569 | mad_fixed64hi_t hi = 0; |
568 | mad_fixed64lo_t lo; | 570 | mad_fixed64lo_t lo; |
569 | 571 | ||
@@ -1010,4 +1012,3 @@ void mad_synth_frame(struct mad_synth *synth, struct mad_frame const *frame) | |||
1010 | 1012 | ||
1011 | synth->phase = (synth->phase + ns) % 16; | 1013 | synth->phase = (synth->phase + ns) % 16; |
1012 | } | 1014 | } |
1013 | |||