summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNils Wallménius <nils@rockbox.org>2011-09-14 09:34:01 +0000
committerNils Wallménius <nils@rockbox.org>2011-09-14 09:34:01 +0000
commit4f27931a583fc4b01054fc143e5a88064f9d6847 (patch)
treef8613308ed745645341b1d7bb25b355254d29ff8
parentc3cffb766e5f548e0827c04783b34ce3978edf32 (diff)
downloadrockbox-4f27931a583fc4b01054fc143e5a88064f9d6847.tar.gz
rockbox-4f27931a583fc4b01054fc143e5a88064f9d6847.zip
codeclib: further tweaks to the cf fft asm for a small speedup.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@30541 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/lib/fft-ffmpeg_cf.h32
1 files changed, 16 insertions, 16 deletions
diff --git a/apps/codecs/lib/fft-ffmpeg_cf.h b/apps/codecs/lib/fft-ffmpeg_cf.h
index 710e1dd1af..a29464a23d 100644
--- a/apps/codecs/lib/fft-ffmpeg_cf.h
+++ b/apps/codecs/lib/fft-ffmpeg_cf.h
@@ -212,16 +212,15 @@ static inline FFTComplex* TRANSFORM(FFTComplex * z, unsigned int n, FFTSample wr
212 "mac.l %%d5, %[wre], (4, %[z2]), %%d4, %%acc0\n\t" 212 "mac.l %%d5, %[wre], (4, %[z2]), %%d4, %%acc0\n\t"
213 "mac.l %%d4, %[wim], %%acc0\n\t" 213 "mac.l %%d4, %[wim], %%acc0\n\t"
214 "mac.l %%d4, %[wre], (%[z3]), %%d6, %%acc1\n\t" 214 "mac.l %%d4, %[wre], (%[z3]), %%d6, %%acc1\n\t"
215 "msac.l %%d5, %[wim], (4,%[z3]), %%d7, %%acc1\n\t" 215 "msac.l %%d5, %[wim], (4, %[z3]), %%d7, %%acc1\n\t"
216 "mac.l %%d6, %[wre], %%acc2\n\t" 216 "mac.l %%d6, %[wre], (%[z])+, %%d4, %%acc2\n\t"
217 "msac.l %%d7, %[wim], %%acc2\n\t" 217 "msac.l %%d7, %[wim], (%[z])+, %%d5, %%acc2\n\t"
218 "mac.l %%d7, %[wre], %%acc3\n\t" 218 "mac.l %%d7, %[wre], %%acc3\n\t"
219 "mac.l %%d6, %[wim], %%acc3\n\t" 219 "mac.l %%d6, %[wim], %%acc3\n\t"
220 220
221 "movclr.l %%acc0, %[wre]\n\t" /* t1 */ 221 "movclr.l %%acc0, %[wre]\n\t" /* t1 */
222 "movclr.l %%acc2, %[wim]\n\t" /* t5 */ 222 "movclr.l %%acc2, %[wim]\n\t" /* t5 */
223 223
224 "movem.l (%[z]), %%d4-%%d5\n\t" /* load z0 */
225 "move.l %%d4, %%d6\n\t" 224 "move.l %%d4, %%d6\n\t"
226 "move.l %[wim], %%d7\n\t" 225 "move.l %[wim], %%d7\n\t"
227 "sub.l %[wre], %[wim]\n\t" /* t5 = t5-t1 */ 226 "sub.l %[wre], %[wim]\n\t" /* t5 = t5-t1 */
@@ -240,7 +239,7 @@ static inline FFTComplex* TRANSFORM(FFTComplex * z, unsigned int n, FFTSample wr
240 239
241 "movem.l %%d6-%%d7, (%[z2])\n\t" /* store z2 */ 240 "movem.l %%d6-%%d7, (%[z2])\n\t" /* store z2 */
242 "add.l %[wre], %%d5\n\t" /* d5 = a0im + (t2+t6) => a0im */ 241 "add.l %[wre], %%d5\n\t" /* d5 = a0im + (t2+t6) => a0im */
243 "movem.l %%d4-%%d5, (%[z])\n\t" /* store z0 */ 242 "movem.l %%d4-%%d5, (-8, %[z])\n\t" /* store z0 */
244 243
245 "movem.l (%[z1]), %%d4-%%d5\n\t" /* load z1 */ 244 "movem.l (%[z1]), %%d4-%%d5\n\t" /* load z1 */
246 "move.l %%d4, %%d6\n\t" 245 "move.l %%d4, %%d6\n\t"
@@ -254,10 +253,11 @@ static inline FFTComplex* TRANSFORM(FFTComplex * z, unsigned int n, FFTSample wr
254 "add.l %[wim], %%d5\n\t" 253 "add.l %[wim], %%d5\n\t"
255 "movem.l %%d4-%%d5, (%[z1])\n\t" /* store z1 */ 254 "movem.l %%d4-%%d5, (%[z1])\n\t" /* store z1 */
256 255
257 : [wre] "+r" (wre), [wim] "+r" (wim) /* we clobber these after using them */ 256 : [wre] "+r" (wre), [wim] "+r" (wim), /* we clobber these after using them */
258 : [z] "a" (z), [z1] "a" (&z[n]), [z2] "a" (&z[2*n]), [z3] "a" (&z[3*n]) 257 [z] "+a" (z)
258 : [z1] "a" (&z[n]), [z2] "a" (&z[2*n]), [z3] "a" (&z[3*n])
259 : "d3", "d4", "d5", "d6", "d7", "cc", "memory"); 259 : "d3", "d4", "d5", "d6", "d7", "cc", "memory");
260 return z+1; 260 return z;
261} 261}
262 262
263static inline FFTComplex* TRANSFORM_W01(FFTComplex * z, unsigned int n, const FFTSample * w) 263static inline FFTComplex* TRANSFORM_W01(FFTComplex * z, unsigned int n, const FFTSample * w)
@@ -311,11 +311,12 @@ static inline FFTComplex* TRANSFORM_ZERO(FFTComplex * z, unsigned int n)
311 311
312static inline FFTComplex* TRANSFORM_EQUAL(FFTComplex * z, unsigned int n) 312static inline FFTComplex* TRANSFORM_EQUAL(FFTComplex * z, unsigned int n)
313{ 313{
314 asm volatile ("move.l (%[z2]), %%d5\n\t" 314 asm volatile ("movem.l (%[z2]), %%d0-%%d1\n\t"
315 "mac.l %%d5, %[PI2_8], (4, %[z2]), %%d5, %%acc0\n\t" 315 "move.l %[_cPI2_8], %%d2\n\t"
316 "mac.l %%d5, %[PI2_8], (%[z3]), %%d5, %%acc1\n\t" 316 "mac.l %%d0, %%d2, (%[z3]), %%d0, %%acc0\n\t"
317 "mac.l %%d5, %[PI2_8], (4,%[z3]), %%d5, %%acc2\n\t" 317 "mac.l %%d1, %%d2, (4, %[z3]), %%d1, %%acc1\n\t"
318 "mac.l %%d5, %[PI2_8], %%acc3\n\t" 318 "mac.l %%d0, %%d2, (%[z]), %%d4, %%acc2\n\t"
319 "mac.l %%d1, %%d2, (4, %[z]), %%d5, %%acc3\n\t"
319 320
320 "movclr.l %%acc0, %%d0\n\t" 321 "movclr.l %%acc0, %%d0\n\t"
321 "movclr.l %%acc1, %%d1\n\t" 322 "movclr.l %%acc1, %%d1\n\t"
@@ -330,7 +331,6 @@ static inline FFTComplex* TRANSFORM_EQUAL(FFTComplex * z, unsigned int n)
330 "add.l %%d2, %%d3\n\t" /* d3 == t6 */ 331 "add.l %%d2, %%d3\n\t" /* d3 == t6 */
331 "sub.l %%d7, %%d2\n\t" /* d2 == t5 */ 332 "sub.l %%d7, %%d2\n\t" /* d2 == t5 */
332 333
333 "movem.l (%[z]), %%d4-%%d5\n\t" /* load z0 */
334 "move.l %%d4, %%d6\n\t" 334 "move.l %%d4, %%d6\n\t"
335 "move.l %%d2, %%d7\n\t" 335 "move.l %%d2, %%d7\n\t"
336 "sub.l %%d0, %%d2\n\t" /* t5 = t5-t1 */ 336 "sub.l %%d0, %%d2\n\t" /* t5 = t5-t1 */
@@ -360,8 +360,8 @@ static inline FFTComplex* TRANSFORM_EQUAL(FFTComplex * z, unsigned int n)
360 "add.l %%d2, %%d5\n\t" 360 "add.l %%d2, %%d5\n\t"
361 "movem.l %%d4-%%d5, (%[z1])\n\t" /* store z1 */ 361 "movem.l %%d4-%%d5, (%[z1])\n\t" /* store z1 */
362 362
363 : 363 :: [z] "a" (z), [z1] "a" (&z[n]), [z2] "a" (&z[2*n]), [z3] "a" (&z[3*n]),
364 : [z] "a" (z), [z1] "a" (&z[n]), [z2] "a" (&z[2*n]), [z3] "a" (&z[3*n]), [PI2_8] "r" (cPI2_8) 364 [_cPI2_8] "i" (cPI2_8)
365 : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "cc", "memory"); 365 : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "cc", "memory");
366 366
367 return z+1; 367 return z+1;