diff options
author | Andree Buschmann <AndreeBuschmann@t-online.de> | 2011-02-05 14:29:47 +0000 |
---|---|---|
committer | Andree Buschmann <AndreeBuschmann@t-online.de> | 2011-02-05 14:29:47 +0000 |
commit | 8b540fa9d9477dd0fc7f08a3bba14cdc582a47ca (patch) | |
tree | 177eab7b0c97fcf8ef9039e60ce93d104118c5ad /apps | |
parent | 62fb090ac491a61c4100abc9ac62354dc38c0fc4 (diff) | |
download | rockbox-8b540fa9d9477dd0fc7f08a3bba14cdc582a47ca.tar.gz rockbox-8b540fa9d9477dd0fc7f08a3bba14cdc582a47ca.zip |
Speed up AAC-HE SBR by 2% on S5L8701. Use MEM_ALIGN on critical arrays and avoid stalls in asm code.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@29209 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps')
-rw-r--r-- | apps/codecs/libfaad/sbr_dct.c | 6 | ||||
-rw-r--r-- | apps/codecs/libfaad/sbr_dec.c | 4 | ||||
-rw-r--r-- | apps/codecs/libfaad/sbr_qmf.c | 56 |
3 files changed, 33 insertions, 33 deletions
diff --git a/apps/codecs/libfaad/sbr_dct.c b/apps/codecs/libfaad/sbr_dct.c index f22a24a18e..da0e5e1f59 100644 --- a/apps/codecs/libfaad/sbr_dct.c +++ b/apps/codecs/libfaad/sbr_dct.c | |||
@@ -1453,7 +1453,7 @@ void DCT2_32_unscaled(real_t *y, real_t *x) | |||
1453 | #else /* #ifdef SBR_LOW_POWER */ | 1453 | #else /* #ifdef SBR_LOW_POWER */ |
1454 | 1454 | ||
1455 | /* table for pre-processing within dct4_kernel() */ | 1455 | /* table for pre-processing within dct4_kernel() */ |
1456 | static const real_t dct4_pre_tab[] ICONST_ATTR = { | 1456 | static const real_t dct4_pre_tab[] ICONST_ATTR MEM_ALIGN_ATTR = { |
1457 | COEF_CONST(0.999924719333649), COEF_CONST(-1.01219630241394), COEF_CONST(-0.987653195858002), | 1457 | COEF_CONST(0.999924719333649), COEF_CONST(-1.01219630241394), COEF_CONST(-0.987653195858002), |
1458 | COEF_CONST(0.998118102550507), COEF_CONST(-1.05943882465363), COEF_CONST(-0.936797380447388), | 1458 | COEF_CONST(0.998118102550507), COEF_CONST(-1.05943882465363), COEF_CONST(-0.936797380447388), |
1459 | COEF_CONST(0.993906974792480), COEF_CONST(-1.10412919521332), COEF_CONST(-0.883684754371643), | 1459 | COEF_CONST(0.993906974792480), COEF_CONST(-1.10412919521332), COEF_CONST(-0.883684754371643), |
@@ -1489,7 +1489,7 @@ static const real_t dct4_pre_tab[] ICONST_ATTR = { | |||
1489 | }; | 1489 | }; |
1490 | 1490 | ||
1491 | /* table for post-processing within dct4_kernel() */ | 1491 | /* table for post-processing within dct4_kernel() */ |
1492 | static const real_t dct4_post_tab[] ICONST_ATTR = { | 1492 | static const real_t dct4_post_tab[] ICONST_ATTR MEM_ALIGN_ATTR = { |
1493 | COEF_CONST(1 ), COEF_CONST(-1 ), COEF_CONST(-1 ), | 1493 | COEF_CONST(1 ), COEF_CONST(-1 ), COEF_CONST(-1 ), |
1494 | COEF_CONST(0.998795449733734), COEF_CONST(-1.04786312580109), COEF_CONST(-0.949727773666382), | 1494 | COEF_CONST(0.998795449733734), COEF_CONST(-1.04786312580109), COEF_CONST(-0.949727773666382), |
1495 | COEF_CONST(0.995184719562531), COEF_CONST(-1.09320187568665), COEF_CONST(-0.897167563438416), | 1495 | COEF_CONST(0.995184719562531), COEF_CONST(-1.09320187568665), COEF_CONST(-0.897167563438416), |
@@ -1525,7 +1525,7 @@ static const real_t dct4_post_tab[] ICONST_ATTR = { | |||
1525 | }; | 1525 | }; |
1526 | 1526 | ||
1527 | // Table adapted from codeclib to fit into IRAM | 1527 | // Table adapted from codeclib to fit into IRAM |
1528 | const uint32_t dct4_revtab[32] ICONST_ATTR = { | 1528 | const uint32_t dct4_revtab[32] ICONST_ATTR MEM_ALIGN_ATTR = { |
1529 | 0, 24, 12, 22, 6, 30, 11, 19, 3, 27, 15, 21, 5, 29, 9, 17, | 1529 | 0, 24, 12, 22, 6, 30, 11, 19, 3, 27, 15, 21, 5, 29, 9, 17, |
1530 | 1, 25, 13, 23, 7, 31, 10, 18, 2, 26, 14, 20, 4, 28, 8, 16}; | 1530 | 1, 25, 13, 23, 7, 31, 10, 18, 2, 26, 14, 20, 4, 28, 8, 16}; |
1531 | 1531 | ||
diff --git a/apps/codecs/libfaad/sbr_dec.c b/apps/codecs/libfaad/sbr_dec.c index 60bb2a6bdb..78c9c79d5d 100644 --- a/apps/codecs/libfaad/sbr_dec.c +++ b/apps/codecs/libfaad/sbr_dec.c | |||
@@ -521,8 +521,8 @@ uint8_t sbrDecodeSingleFrame(sbr_info *sbr, real_t *channel, | |||
521 | return 0; | 521 | return 0; |
522 | } | 522 | } |
523 | 523 | ||
524 | ALIGN qmf_t X_left[MAX_NTSRHFG][64];// = {{0}}; | 524 | qmf_t X_left [MAX_NTSRHFG][64] MEM_ALIGN_ATTR;// = {{0}}; |
525 | ALIGN qmf_t X_right[MAX_NTSRHFG][64];// = {{0}}; /* must set this to 0 */ | 525 | qmf_t X_right[MAX_NTSRHFG][64] MEM_ALIGN_ATTR;// = {{0}}; /* must set this to 0 */ |
526 | 526 | ||
527 | #if (defined(PS_DEC) || defined(DRM_PS)) | 527 | #if (defined(PS_DEC) || defined(DRM_PS)) |
528 | uint8_t sbrDecodeSingleFramePS(sbr_info *sbr, real_t *left_channel, real_t *right_channel, | 528 | uint8_t sbrDecodeSingleFramePS(sbr_info *sbr, real_t *left_channel, real_t *right_channel, |
diff --git a/apps/codecs/libfaad/sbr_qmf.c b/apps/codecs/libfaad/sbr_qmf.c index f94c3072a4..bb6e176a3f 100644 --- a/apps/codecs/libfaad/sbr_qmf.c +++ b/apps/codecs/libfaad/sbr_qmf.c | |||
@@ -78,12 +78,12 @@ void qmfa_end(qmfa_info *qmfa) | |||
78 | void sbr_qmf_analysis_32(sbr_info *sbr, qmfa_info *qmfa, const real_t *input, | 78 | void sbr_qmf_analysis_32(sbr_info *sbr, qmfa_info *qmfa, const real_t *input, |
79 | qmf_t X[MAX_NTSRHFG][64], uint8_t offset, uint8_t kx) | 79 | qmf_t X[MAX_NTSRHFG][64], uint8_t offset, uint8_t kx) |
80 | { | 80 | { |
81 | ALIGN real_t u[64]; | 81 | real_t u[64] MEM_ALIGN_ATTR; |
82 | #ifndef SBR_LOW_POWER | 82 | #ifndef SBR_LOW_POWER |
83 | ALIGN real_t real[32]; | 83 | real_t real[32] MEM_ALIGN_ATTR; |
84 | ALIGN real_t imag[32]; | 84 | real_t imag[32] MEM_ALIGN_ATTR; |
85 | #else | 85 | #else |
86 | ALIGN real_t y[32]; | 86 | real_t y[32] MEM_ALIGN_ATTR; |
87 | #endif | 87 | #endif |
88 | qmf_t *pX; | 88 | qmf_t *pX; |
89 | uint32_t in = 0; | 89 | uint32_t in = 0; |
@@ -227,8 +227,8 @@ void qmfs_end(qmfs_info *qmfs) | |||
227 | void sbr_qmf_synthesis_32(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][64], | 227 | void sbr_qmf_synthesis_32(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][64], |
228 | real_t *output) | 228 | real_t *output) |
229 | { | 229 | { |
230 | ALIGN real_t x[16]; | 230 | real_t x[16] MEM_ALIGN_ATTR; |
231 | ALIGN real_t y[16]; | 231 | real_t y[16] MEM_ALIGN_ATTR; |
232 | int16_t n, k, out = 0; | 232 | int16_t n, k, out = 0; |
233 | uint8_t l; | 233 | uint8_t l; |
234 | 234 | ||
@@ -291,8 +291,8 @@ void sbr_qmf_synthesis_32(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][6 | |||
291 | void sbr_qmf_synthesis_64(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][64], | 291 | void sbr_qmf_synthesis_64(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][64], |
292 | real_t *output) | 292 | real_t *output) |
293 | { | 293 | { |
294 | ALIGN real_t x[64]; | 294 | real_t x[64] MEM_ALIGN_ATTR; |
295 | ALIGN real_t y[64]; | 295 | real_t y[64] MEM_ALIGN_ATTR; |
296 | int16_t n, k, out = 0; | 296 | int16_t n, k, out = 0; |
297 | uint8_t l; | 297 | uint8_t l; |
298 | 298 | ||
@@ -401,8 +401,8 @@ static const complex_t qmf32_pre_twiddle[] = | |||
401 | void sbr_qmf_synthesis_32(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][64], | 401 | void sbr_qmf_synthesis_32(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][64], |
402 | real_t *output) | 402 | real_t *output) |
403 | { | 403 | { |
404 | ALIGN real_t x1[32]; | 404 | real_t x1[32] MEM_ALIGN_ATTR; |
405 | ALIGN real_t x2[32]; | 405 | real_t x2[32] MEM_ALIGN_ATTR; |
406 | int32_t n, k, idx0, idx1, out = 0; | 406 | int32_t n, k, idx0, idx1, out = 0; |
407 | uint32_t l; | 407 | uint32_t l; |
408 | 408 | ||
@@ -464,10 +464,10 @@ void sbr_qmf_synthesis_32(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][6 | |||
464 | void sbr_qmf_synthesis_64(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][64], | 464 | void sbr_qmf_synthesis_64(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][64], |
465 | real_t *output) | 465 | real_t *output) |
466 | { | 466 | { |
467 | ALIGN real_t real1[32]; | 467 | real_t real1[32] MEM_ALIGN_ATTR; |
468 | ALIGN real_t imag1[32]; | 468 | real_t imag1[32] MEM_ALIGN_ATTR; |
469 | ALIGN real_t real2[32]; | 469 | real_t real2[32] MEM_ALIGN_ATTR; |
470 | ALIGN real_t imag2[32]; | 470 | real_t imag2[32] MEM_ALIGN_ATTR; |
471 | qmf_t *pX; | 471 | qmf_t *pX; |
472 | real_t *p_buf_1, *p_buf_3; | 472 | real_t *p_buf_1, *p_buf_3; |
473 | int32_t n, k, idx0, idx1, out = 0; | 473 | int32_t n, k, idx0, idx1, out = 0; |
@@ -517,36 +517,36 @@ void sbr_qmf_synthesis_64(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][6 | |||
517 | asm volatile ( | 517 | asm volatile ( |
518 | "ldmia %[qtab]!, { r0-r3 } \n\t" | 518 | "ldmia %[qtab]!, { r0-r3 } \n\t" |
519 | "ldr r4, [%[pbuf]] \n\t" | 519 | "ldr r4, [%[pbuf]] \n\t" |
520 | "ldr r7, [%[pbuf], #192*4] \n\t" | ||
520 | "smull r5, r6, r4, r0 \n\t" | 521 | "smull r5, r6, r4, r0 \n\t" |
521 | "ldr r4, [%[pbuf], #192*4] \n\t" | ||
522 | "smlal r5, r6, r4, r1 \n\t" | ||
523 | "ldr r4, [%[pbuf], #256*4] \n\t" | 522 | "ldr r4, [%[pbuf], #256*4] \n\t" |
523 | "smlal r5, r6, r7, r1 \n\t" | ||
524 | "ldr r7, [%[pbuf], #448*4] \n\t" | ||
524 | "smlal r5, r6, r4, r2 \n\t" | 525 | "smlal r5, r6, r4, r2 \n\t" |
525 | "ldr r4, [%[pbuf], #448*4] \n\t" | 526 | "ldr r4, [%[pbuf], #512*4] \n\t" |
526 | "smlal r5, r6, r4, r3 \n\t" | 527 | "smlal r5, r6, r7, r3 \n\t" |
527 | 528 | ||
528 | "ldmia %[qtab]!, { r0-r3 } \n\t" | 529 | "ldmia %[qtab]!, { r0-r3 } \n\t" |
529 | "ldr r4, [%[pbuf], #512*4] \n\t" | 530 | "ldr r7, [%[pbuf], #704*4] \n\t" |
530 | "smlal r5, r6, r4, r0 \n\t" | 531 | "smlal r5, r6, r4, r0 \n\t" |
531 | "ldr r4, [%[pbuf], #704*4] \n\t" | ||
532 | "smlal r5, r6, r4, r1 \n\t" | ||
533 | "ldr r4, [%[pbuf], #768*4] \n\t" | 532 | "ldr r4, [%[pbuf], #768*4] \n\t" |
533 | "smlal r5, r6, r7, r1 \n\t" | ||
534 | "ldr r7, [%[pbuf], #960*4] \n\t" | ||
534 | "smlal r5, r6, r4, r2 \n\t" | 535 | "smlal r5, r6, r4, r2 \n\t" |
535 | "ldr r4, [%[pbuf], #960*4] \n\t" | 536 | "mov r2, #1024*4 \n\t" |
536 | "smlal r5, r6, r4, r3 \n\t" | ||
537 | 537 | ||
538 | "ldmia %[qtab]!, { r0-r1 } \n\t" | 538 | "ldmia %[qtab]!, { r0-r1 } \n\t" |
539 | "mov r2, #1024*4 \n\t" | ||
540 | "ldr r4, [%[pbuf], r2] \n\t" | 539 | "ldr r4, [%[pbuf], r2] \n\t" |
541 | "smlal r5, r6, r4, r0 \n\t" | 540 | "smlal r5, r6, r7, r3 \n\t" |
542 | "mov r2, #1216*4 \n\t" | 541 | "mov r2, #1216*4 \n\t" |
543 | "ldr r4, [%[pbuf], r2] \n\t" | 542 | "ldr r7, [%[pbuf], r2] \n\t" |
544 | "smlal r5, r6, r4, r1 \n\t" | 543 | "smlal r5, r6, r4, r0 \n\t" |
544 | "smlal r5, r6, r7, r1 \n\t" | ||
545 | 545 | ||
546 | "str r6, [%[pout]] \n" | 546 | "str r6, [%[pout]] \n" |
547 | : [qtab] "+r" (qtab) | 547 | : [qtab] "+r" (qtab) |
548 | : [pbuf] "r" (pbuf), [pout] "r" (pout) | 548 | : [pbuf] "r" (pbuf), [pout] "r" (pout) |
549 | : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "memory"); | 549 | : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "memory"); |
550 | } | 550 | } |
551 | #elif defined CPU_COLDFIRE | 551 | #elif defined CPU_COLDFIRE |
552 | const real_t *qtab = qmf_c; | 552 | const real_t *qtab = qmf_c; |