summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndree Buschmann <AndreeBuschmann@t-online.de>2011-02-05 14:29:47 +0000
committerAndree Buschmann <AndreeBuschmann@t-online.de>2011-02-05 14:29:47 +0000
commit8b540fa9d9477dd0fc7f08a3bba14cdc582a47ca (patch)
tree177eab7b0c97fcf8ef9039e60ce93d104118c5ad
parent62fb090ac491a61c4100abc9ac62354dc38c0fc4 (diff)
downloadrockbox-8b540fa9d9477dd0fc7f08a3bba14cdc582a47ca.tar.gz
rockbox-8b540fa9d9477dd0fc7f08a3bba14cdc582a47ca.zip
Speed up AAC-HE SBR by 2% on S5L8701. Use MEM_ALIGN on critical arrays and avoid stalls in asm code.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@29209 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/libfaad/sbr_dct.c6
-rw-r--r--apps/codecs/libfaad/sbr_dec.c4
-rw-r--r--apps/codecs/libfaad/sbr_qmf.c56
3 files changed, 33 insertions, 33 deletions
diff --git a/apps/codecs/libfaad/sbr_dct.c b/apps/codecs/libfaad/sbr_dct.c
index f22a24a18e..da0e5e1f59 100644
--- a/apps/codecs/libfaad/sbr_dct.c
+++ b/apps/codecs/libfaad/sbr_dct.c
@@ -1453,7 +1453,7 @@ void DCT2_32_unscaled(real_t *y, real_t *x)
1453#else /* #ifdef SBR_LOW_POWER */ 1453#else /* #ifdef SBR_LOW_POWER */
1454 1454
1455/* table for pre-processing within dct4_kernel() */ 1455/* table for pre-processing within dct4_kernel() */
1456static const real_t dct4_pre_tab[] ICONST_ATTR = { 1456static const real_t dct4_pre_tab[] ICONST_ATTR MEM_ALIGN_ATTR = {
1457 COEF_CONST(0.999924719333649), COEF_CONST(-1.01219630241394), COEF_CONST(-0.987653195858002), 1457 COEF_CONST(0.999924719333649), COEF_CONST(-1.01219630241394), COEF_CONST(-0.987653195858002),
1458 COEF_CONST(0.998118102550507), COEF_CONST(-1.05943882465363), COEF_CONST(-0.936797380447388), 1458 COEF_CONST(0.998118102550507), COEF_CONST(-1.05943882465363), COEF_CONST(-0.936797380447388),
1459 COEF_CONST(0.993906974792480), COEF_CONST(-1.10412919521332), COEF_CONST(-0.883684754371643), 1459 COEF_CONST(0.993906974792480), COEF_CONST(-1.10412919521332), COEF_CONST(-0.883684754371643),
@@ -1489,7 +1489,7 @@ static const real_t dct4_pre_tab[] ICONST_ATTR = {
1489}; 1489};
1490 1490
1491/* table for post-processing within dct4_kernel() */ 1491/* table for post-processing within dct4_kernel() */
1492static const real_t dct4_post_tab[] ICONST_ATTR = { 1492static const real_t dct4_post_tab[] ICONST_ATTR MEM_ALIGN_ATTR = {
1493 COEF_CONST(1 ), COEF_CONST(-1 ), COEF_CONST(-1 ), 1493 COEF_CONST(1 ), COEF_CONST(-1 ), COEF_CONST(-1 ),
1494 COEF_CONST(0.998795449733734), COEF_CONST(-1.04786312580109), COEF_CONST(-0.949727773666382), 1494 COEF_CONST(0.998795449733734), COEF_CONST(-1.04786312580109), COEF_CONST(-0.949727773666382),
1495 COEF_CONST(0.995184719562531), COEF_CONST(-1.09320187568665), COEF_CONST(-0.897167563438416), 1495 COEF_CONST(0.995184719562531), COEF_CONST(-1.09320187568665), COEF_CONST(-0.897167563438416),
@@ -1525,7 +1525,7 @@ static const real_t dct4_post_tab[] ICONST_ATTR = {
1525}; 1525};
1526 1526
1527// Table adapted from codeclib to fit into IRAM 1527// Table adapted from codeclib to fit into IRAM
1528const uint32_t dct4_revtab[32] ICONST_ATTR = { 1528const uint32_t dct4_revtab[32] ICONST_ATTR MEM_ALIGN_ATTR = {
1529 0, 24, 12, 22, 6, 30, 11, 19, 3, 27, 15, 21, 5, 29, 9, 17, 1529 0, 24, 12, 22, 6, 30, 11, 19, 3, 27, 15, 21, 5, 29, 9, 17,
1530 1, 25, 13, 23, 7, 31, 10, 18, 2, 26, 14, 20, 4, 28, 8, 16}; 1530 1, 25, 13, 23, 7, 31, 10, 18, 2, 26, 14, 20, 4, 28, 8, 16};
1531 1531
diff --git a/apps/codecs/libfaad/sbr_dec.c b/apps/codecs/libfaad/sbr_dec.c
index 60bb2a6bdb..78c9c79d5d 100644
--- a/apps/codecs/libfaad/sbr_dec.c
+++ b/apps/codecs/libfaad/sbr_dec.c
@@ -521,8 +521,8 @@ uint8_t sbrDecodeSingleFrame(sbr_info *sbr, real_t *channel,
521 return 0; 521 return 0;
522} 522}
523 523
524ALIGN qmf_t X_left[MAX_NTSRHFG][64];// = {{0}}; 524qmf_t X_left [MAX_NTSRHFG][64] MEM_ALIGN_ATTR;// = {{0}};
525ALIGN qmf_t X_right[MAX_NTSRHFG][64];// = {{0}}; /* must set this to 0 */ 525qmf_t X_right[MAX_NTSRHFG][64] MEM_ALIGN_ATTR;// = {{0}}; /* must set this to 0 */
526 526
527#if (defined(PS_DEC) || defined(DRM_PS)) 527#if (defined(PS_DEC) || defined(DRM_PS))
528uint8_t sbrDecodeSingleFramePS(sbr_info *sbr, real_t *left_channel, real_t *right_channel, 528uint8_t sbrDecodeSingleFramePS(sbr_info *sbr, real_t *left_channel, real_t *right_channel,
diff --git a/apps/codecs/libfaad/sbr_qmf.c b/apps/codecs/libfaad/sbr_qmf.c
index f94c3072a4..bb6e176a3f 100644
--- a/apps/codecs/libfaad/sbr_qmf.c
+++ b/apps/codecs/libfaad/sbr_qmf.c
@@ -78,12 +78,12 @@ void qmfa_end(qmfa_info *qmfa)
78void sbr_qmf_analysis_32(sbr_info *sbr, qmfa_info *qmfa, const real_t *input, 78void sbr_qmf_analysis_32(sbr_info *sbr, qmfa_info *qmfa, const real_t *input,
79 qmf_t X[MAX_NTSRHFG][64], uint8_t offset, uint8_t kx) 79 qmf_t X[MAX_NTSRHFG][64], uint8_t offset, uint8_t kx)
80{ 80{
81 ALIGN real_t u[64]; 81 real_t u[64] MEM_ALIGN_ATTR;
82#ifndef SBR_LOW_POWER 82#ifndef SBR_LOW_POWER
83 ALIGN real_t real[32]; 83 real_t real[32] MEM_ALIGN_ATTR;
84 ALIGN real_t imag[32]; 84 real_t imag[32] MEM_ALIGN_ATTR;
85#else 85#else
86 ALIGN real_t y[32]; 86 real_t y[32] MEM_ALIGN_ATTR;
87#endif 87#endif
88 qmf_t *pX; 88 qmf_t *pX;
89 uint32_t in = 0; 89 uint32_t in = 0;
@@ -227,8 +227,8 @@ void qmfs_end(qmfs_info *qmfs)
227void sbr_qmf_synthesis_32(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][64], 227void sbr_qmf_synthesis_32(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][64],
228 real_t *output) 228 real_t *output)
229{ 229{
230 ALIGN real_t x[16]; 230 real_t x[16] MEM_ALIGN_ATTR;
231 ALIGN real_t y[16]; 231 real_t y[16] MEM_ALIGN_ATTR;
232 int16_t n, k, out = 0; 232 int16_t n, k, out = 0;
233 uint8_t l; 233 uint8_t l;
234 234
@@ -291,8 +291,8 @@ void sbr_qmf_synthesis_32(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][6
291void sbr_qmf_synthesis_64(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][64], 291void sbr_qmf_synthesis_64(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][64],
292 real_t *output) 292 real_t *output)
293{ 293{
294 ALIGN real_t x[64]; 294 real_t x[64] MEM_ALIGN_ATTR;
295 ALIGN real_t y[64]; 295 real_t y[64] MEM_ALIGN_ATTR;
296 int16_t n, k, out = 0; 296 int16_t n, k, out = 0;
297 uint8_t l; 297 uint8_t l;
298 298
@@ -401,8 +401,8 @@ static const complex_t qmf32_pre_twiddle[] =
401void sbr_qmf_synthesis_32(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][64], 401void sbr_qmf_synthesis_32(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][64],
402 real_t *output) 402 real_t *output)
403{ 403{
404 ALIGN real_t x1[32]; 404 real_t x1[32] MEM_ALIGN_ATTR;
405 ALIGN real_t x2[32]; 405 real_t x2[32] MEM_ALIGN_ATTR;
406 int32_t n, k, idx0, idx1, out = 0; 406 int32_t n, k, idx0, idx1, out = 0;
407 uint32_t l; 407 uint32_t l;
408 408
@@ -464,10 +464,10 @@ void sbr_qmf_synthesis_32(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][6
464void sbr_qmf_synthesis_64(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][64], 464void sbr_qmf_synthesis_64(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][64],
465 real_t *output) 465 real_t *output)
466{ 466{
467 ALIGN real_t real1[32]; 467 real_t real1[32] MEM_ALIGN_ATTR;
468 ALIGN real_t imag1[32]; 468 real_t imag1[32] MEM_ALIGN_ATTR;
469 ALIGN real_t real2[32]; 469 real_t real2[32] MEM_ALIGN_ATTR;
470 ALIGN real_t imag2[32]; 470 real_t imag2[32] MEM_ALIGN_ATTR;
471 qmf_t *pX; 471 qmf_t *pX;
472 real_t *p_buf_1, *p_buf_3; 472 real_t *p_buf_1, *p_buf_3;
473 int32_t n, k, idx0, idx1, out = 0; 473 int32_t n, k, idx0, idx1, out = 0;
@@ -517,36 +517,36 @@ void sbr_qmf_synthesis_64(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][6
517 asm volatile ( 517 asm volatile (
518 "ldmia %[qtab]!, { r0-r3 } \n\t" 518 "ldmia %[qtab]!, { r0-r3 } \n\t"
519 "ldr r4, [%[pbuf]] \n\t" 519 "ldr r4, [%[pbuf]] \n\t"
520 "ldr r7, [%[pbuf], #192*4] \n\t"
520 "smull r5, r6, r4, r0 \n\t" 521 "smull r5, r6, r4, r0 \n\t"
521 "ldr r4, [%[pbuf], #192*4] \n\t"
522 "smlal r5, r6, r4, r1 \n\t"
523 "ldr r4, [%[pbuf], #256*4] \n\t" 522 "ldr r4, [%[pbuf], #256*4] \n\t"
523 "smlal r5, r6, r7, r1 \n\t"
524 "ldr r7, [%[pbuf], #448*4] \n\t"
524 "smlal r5, r6, r4, r2 \n\t" 525 "smlal r5, r6, r4, r2 \n\t"
525 "ldr r4, [%[pbuf], #448*4] \n\t" 526 "ldr r4, [%[pbuf], #512*4] \n\t"
526 "smlal r5, r6, r4, r3 \n\t" 527 "smlal r5, r6, r7, r3 \n\t"
527 528
528 "ldmia %[qtab]!, { r0-r3 } \n\t" 529 "ldmia %[qtab]!, { r0-r3 } \n\t"
529 "ldr r4, [%[pbuf], #512*4] \n\t" 530 "ldr r7, [%[pbuf], #704*4] \n\t"
530 "smlal r5, r6, r4, r0 \n\t" 531 "smlal r5, r6, r4, r0 \n\t"
531 "ldr r4, [%[pbuf], #704*4] \n\t"
532 "smlal r5, r6, r4, r1 \n\t"
533 "ldr r4, [%[pbuf], #768*4] \n\t" 532 "ldr r4, [%[pbuf], #768*4] \n\t"
533 "smlal r5, r6, r7, r1 \n\t"
534 "ldr r7, [%[pbuf], #960*4] \n\t"
534 "smlal r5, r6, r4, r2 \n\t" 535 "smlal r5, r6, r4, r2 \n\t"
535 "ldr r4, [%[pbuf], #960*4] \n\t" 536 "mov r2, #1024*4 \n\t"
536 "smlal r5, r6, r4, r3 \n\t"
537 537
538 "ldmia %[qtab]!, { r0-r1 } \n\t" 538 "ldmia %[qtab]!, { r0-r1 } \n\t"
539 "mov r2, #1024*4 \n\t"
540 "ldr r4, [%[pbuf], r2] \n\t" 539 "ldr r4, [%[pbuf], r2] \n\t"
541 "smlal r5, r6, r4, r0 \n\t" 540 "smlal r5, r6, r7, r3 \n\t"
542 "mov r2, #1216*4 \n\t" 541 "mov r2, #1216*4 \n\t"
543 "ldr r4, [%[pbuf], r2] \n\t" 542 "ldr r7, [%[pbuf], r2] \n\t"
544 "smlal r5, r6, r4, r1 \n\t" 543 "smlal r5, r6, r4, r0 \n\t"
544 "smlal r5, r6, r7, r1 \n\t"
545 545
546 "str r6, [%[pout]] \n" 546 "str r6, [%[pout]] \n"
547 : [qtab] "+r" (qtab) 547 : [qtab] "+r" (qtab)
548 : [pbuf] "r" (pbuf), [pout] "r" (pout) 548 : [pbuf] "r" (pbuf), [pout] "r" (pout)
549 : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "memory"); 549 : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "memory");
550 } 550 }
551#elif defined CPU_COLDFIRE 551#elif defined CPU_COLDFIRE
552 const real_t *qtab = qmf_c; 552 const real_t *qtab = qmf_c;