summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJens Arnold <amiconn@rockbox.org>2010-07-11 13:44:41 +0000
committerJens Arnold <amiconn@rockbox.org>2010-07-11 13:44:41 +0000
commit12857cedfa93cdd7a6092902b54b5b9ef4165779 (patch)
treeb9ae4fa3b98fb2b07eeaf3c82e5ea95948ed8e8f
parent6cf66eb08ca2fee1c63147dae685afcb4d2046c4 (diff)
downloadrockbox-12857cedfa93cdd7a6092902b54b5b9ef4165779.tar.gz
rockbox-12857cedfa93cdd7a6092902b54b5b9ef4165779.zip
EMACify HE-AAC QMF on coldfire: 5% speedup on MCF5249.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27381 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/libfaad/iq_table.h2
-rw-r--r--apps/codecs/libfaad/sbr_qmf.c72
2 files changed, 54 insertions, 20 deletions
diff --git a/apps/codecs/libfaad/iq_table.h b/apps/codecs/libfaad/iq_table.h
index 84d279c031..52888dd13a 100644
--- a/apps/codecs/libfaad/iq_table.h
+++ b/apps/codecs/libfaad/iq_table.h
@@ -8243,6 +8243,8 @@ ALIGN static const real_t iq_table[IQ_TABLE_SIZE] =
8243 8243
8244#else 8244#else
8245 8245
8246#define BIG_IQ_TABLE /* using BIG_IQ_TABLE creates faster code */
8247
8246#ifdef BIG_IQ_TABLE 8248#ifdef BIG_IQ_TABLE
8247#define IQ_TABLE_SIZE 8192 8249#define IQ_TABLE_SIZE 8192
8248#else 8250#else
diff --git a/apps/codecs/libfaad/sbr_qmf.c b/apps/codecs/libfaad/sbr_qmf.c
index 944e619c5a..beb0c6496e 100644
--- a/apps/codecs/libfaad/sbr_qmf.c
+++ b/apps/codecs/libfaad/sbr_qmf.c
@@ -512,23 +512,7 @@ void sbr_qmf_synthesis_64(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][6
512 p_buf_1 = qmfs->v + qmfs->v_index; 512 p_buf_1 = qmfs->v + qmfs->v_index;
513 513
514 /* calculate 64 output samples and window */ 514 /* calculate 64 output samples and window */
515#if !defined(CPU_ARM) 515#ifdef CPU_ARM
516 for (k = 0; k < 64; k++)
517 {
518 idx0 = k*10;
519 output[out++] = FAAD_SYNTHESIS_SCALE(
520 MUL_F(p_buf_1[k ], qmf_c[idx0 ]) +
521 MUL_F(p_buf_1[k+ 192 ], qmf_c[idx0+1]) +
522 MUL_F(p_buf_1[k+ 256 ], qmf_c[idx0+2]) +
523 MUL_F(p_buf_1[k+ 256+192], qmf_c[idx0+3]) +
524 MUL_F(p_buf_1[k+ 512 ], qmf_c[idx0+4]) +
525 MUL_F(p_buf_1[k+ 512+192], qmf_c[idx0+5]) +
526 MUL_F(p_buf_1[k+ 768 ], qmf_c[idx0+6]) +
527 MUL_F(p_buf_1[k+ 768+192], qmf_c[idx0+7]) +
528 MUL_F(p_buf_1[k+1024 ], qmf_c[idx0+8]) +
529 MUL_F(p_buf_1[k+1024+192], qmf_c[idx0+9]));
530 }
531#else
532 const real_t *qtab = qmf_c; 516 const real_t *qtab = qmf_c;
533 real_t *pbuf = p_buf_1; 517 real_t *pbuf = p_buf_1;
534 for (k = 0; k < 64; k++, pbuf++) 518 for (k = 0; k < 64; k++, pbuf++)
@@ -544,7 +528,7 @@ void sbr_qmf_synthesis_64(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][6
544 "smlal r5, r6, r4, r2 \n\t" 528 "smlal r5, r6, r4, r2 \n\t"
545 "ldr r4, [%[pbuf], #448*4] \n\t" 529 "ldr r4, [%[pbuf], #448*4] \n\t"
546 "smlal r5, r6, r4, r3 \n\t" 530 "smlal r5, r6, r4, r3 \n\t"
547 531
548 "ldmia %[qtab]!, { r0-r3 } \n\t" 532 "ldmia %[qtab]!, { r0-r3 } \n\t"
549 "ldr r4, [%[pbuf], #512*4] \n\t" 533 "ldr r4, [%[pbuf], #512*4] \n\t"
550 "smlal r5, r6, r4, r0 \n\t" 534 "smlal r5, r6, r4, r0 \n\t"
@@ -554,7 +538,7 @@ void sbr_qmf_synthesis_64(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][6
554 "smlal r5, r6, r4, r2 \n\t" 538 "smlal r5, r6, r4, r2 \n\t"
555 "ldr r4, [%[pbuf], #960*4] \n\t" 539 "ldr r4, [%[pbuf], #960*4] \n\t"
556 "smlal r5, r6, r4, r3 \n\t" 540 "smlal r5, r6, r4, r3 \n\t"
557 541
558 "ldmia %[qtab]!, { r0-r1 } \n\t" 542 "ldmia %[qtab]!, { r0-r1 } \n\t"
559 "mov r2, #1024*4 \n\t" 543 "mov r2, #1024*4 \n\t"
560 "ldr r4, [%[pbuf], r2] \n\t" 544 "ldr r4, [%[pbuf], r2] \n\t"
@@ -563,11 +547,59 @@ void sbr_qmf_synthesis_64(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][6
563 "ldr r4, [%[pbuf], r2] \n\t" 547 "ldr r4, [%[pbuf], r2] \n\t"
564 "smlal r5, r6, r4, r1 \n\t" 548 "smlal r5, r6, r4, r1 \n\t"
565 549
566 "str r6, [%[pout]] \n" 550 "str r6, [%[pout]] \n"
567 : [qtab] "+r" (qtab) 551 : [qtab] "+r" (qtab)
568 : [pbuf] "r" (pbuf), [pout] "r" (pout) 552 : [pbuf] "r" (pbuf), [pout] "r" (pout)
569 : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "memory"); 553 : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "memory");
570 } 554 }
555#elif defined CPU_COLDFIRE
556 const real_t *qtab = qmf_c;
557 real_t *pbuf = p_buf_1;
558 for (k = 0; k < 64; k++, pbuf++)
559 {
560 real_t *pout = &output[out++];
561 asm volatile (
562 "move.l (%[pbuf]), %%d5 \n"
563
564 "movem.l (%[qtab]), %%d0-%%d4 \n"
565 "mac.l %%d0, %%d5, (192*4, %[pbuf]), %%d5, %%acc0 \n"
566 "mac.l %%d1, %%d5, (256*4, %[pbuf]), %%d5, %%acc0 \n"
567 "mac.l %%d2, %%d5, (448*4, %[pbuf]), %%d5, %%acc0 \n"
568 "mac.l %%d3, %%d5, (512*4, %[pbuf]), %%d5, %%acc0 \n"
569 "mac.l %%d4, %%d5, (704*4, %[pbuf]), %%d5, %%acc0 \n"
570 "lea.l (20, %[qtab]), %[qtab] \n"
571
572 "movem.l (%[qtab]), %%d0-%%d4 \n"
573 "mac.l %%d0, %%d5, (768*4, %[pbuf]), %%d5, %%acc0 \n"
574 "mac.l %%d1, %%d5, (960*4, %[pbuf]), %%d5, %%acc0 \n"
575 "mac.l %%d2, %%d5, (1024*4, %[pbuf]), %%d5, %%acc0 \n"
576 "mac.l %%d3, %%d5, (1216*4, %[pbuf]), %%d5, %%acc0 \n"
577 "mac.l %%d4, %%d5, %%acc0 \n"
578 "lea.l (20, %[qtab]), %[qtab] \n"
579
580 "movclr.l %%acc0, %%d0 \n"
581 "move.l %%d0, (%[pout]) \n"
582 : [qtab] "+a" (qtab)
583 : [pbuf] "a" (pbuf),
584 [pout] "a" (pout)
585 : "d0", "d1", "d2", "d3", "d4", "d5", "memory");
586 }
587#else
588 for (k = 0; k < 64; k++)
589 {
590 idx0 = k*10;
591 output[out++] = FAAD_SYNTHESIS_SCALE(
592 MUL_F(p_buf_1[k ], qmf_c[idx0 ]) +
593 MUL_F(p_buf_1[k+ 192 ], qmf_c[idx0+1]) +
594 MUL_F(p_buf_1[k+ 256 ], qmf_c[idx0+2]) +
595 MUL_F(p_buf_1[k+ 256+192], qmf_c[idx0+3]) +
596 MUL_F(p_buf_1[k+ 512 ], qmf_c[idx0+4]) +
597 MUL_F(p_buf_1[k+ 512+192], qmf_c[idx0+5]) +
598 MUL_F(p_buf_1[k+ 768 ], qmf_c[idx0+6]) +
599 MUL_F(p_buf_1[k+ 768+192], qmf_c[idx0+7]) +
600 MUL_F(p_buf_1[k+1024 ], qmf_c[idx0+8]) +
601 MUL_F(p_buf_1[k+1024+192], qmf_c[idx0+9]));
602 }
571#endif 603#endif
572 604
573 /* update ringbuffer index */ 605 /* update ringbuffer index */