diff options
author | Thom Johansen <thomj@rockbox.org> | 2006-03-23 19:59:52 +0000 |
---|---|---|
committer | Thom Johansen <thomj@rockbox.org> | 2006-03-23 19:59:52 +0000 |
commit | cc94ae45a7f3b377a359f9652340e8c847da5ac6 (patch) | |
tree | 2b5979b5469cf2c690b644b819446372146343ab /apps | |
parent | 5f9bd4fb987049ecea077c0a1039f0eee31c118c (diff) | |
download | rockbox-cc94ae45a7f3b377a359f9652340e8c847da5ac6.tar.gz rockbox-cc94ae45a7f3b377a359f9652340e8c847da5ac6.zip |
Keep assembler versions of DSP routines in dsp_arch.S files. This also
solves the annoying register allocator problem when not using
-fomit-frame-pointer for the Coldfire assembler optimised crossfeed
routine.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@9215 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps')
-rw-r--r-- | apps/SOURCES | 1 | ||||
-rw-r--r-- | apps/dsp.c | 92 | ||||
-rw-r--r-- | apps/dsp_asm.h | 31 | ||||
-rw-r--r-- | apps/dsp_cf.S | 104 |
4 files changed, 145 insertions, 83 deletions
diff --git a/apps/SOURCES b/apps/SOURCES index f34cac9654..dee4e6e002 100644 --- a/apps/SOURCES +++ b/apps/SOURCES | |||
@@ -75,6 +75,7 @@ codecs.c | |||
75 | dsp.c | 75 | dsp.c |
76 | eq.c | 76 | eq.c |
77 | #if defined(CPU_COLDFIRE) && !defined(SIMULATOR) | 77 | #if defined(CPU_COLDFIRE) && !defined(SIMULATOR) |
78 | dsp_cf.S | ||
78 | eq_cf.S | 79 | eq_cf.S |
79 | #elif defined(CPU_ARM) && !defined(SIMULATOR) | 80 | #elif defined(CPU_ARM) && !defined(SIMULATOR) |
80 | eq_arm.S | 81 | eq_arm.S |
diff --git a/apps/dsp.c b/apps/dsp.c index c5bbc8fbe9..6e6f702a30 100644 --- a/apps/dsp.c +++ b/apps/dsp.c | |||
@@ -28,6 +28,10 @@ | |||
28 | #include "replaygain.h" | 28 | #include "replaygain.h" |
29 | #include "debug.h" | 29 | #include "debug.h" |
30 | 30 | ||
31 | #ifndef SIMULATOR | ||
32 | #include <dsp_asm.h> | ||
33 | #endif | ||
34 | |||
31 | /* The "dither" code to convert the 24-bit samples produced by libmad was | 35 | /* The "dither" code to convert the 24-bit samples produced by libmad was |
32 | * taken from the coolplayer project - coolplayer.sourceforge.net | 36 | * taken from the coolplayer project - coolplayer.sourceforge.net |
33 | */ | 37 | */ |
@@ -517,90 +521,12 @@ static long dither_sample(int32_t sample, int32_t bias, int32_t mask, | |||
517 | return output; | 521 | return output; |
518 | } | 522 | } |
519 | 523 | ||
520 | /* Apply a constant gain to the samples (e.g., for ReplayGain). May update | 524 | /* Applies crossfeed to the stereo signal in src. |
521 | * the src array if gain was applied. | 525 | * Crossfeed is a process where listening over speakers is simulated. This |
522 | * Note that this must be called before the resampler. | 526 | * is good for old hard panned stereo records, which might be quite fatiguing |
527 | * to listen to on headphones with no crossfeed. | ||
523 | */ | 528 | */ |
524 | #if defined(CPU_COLDFIRE) && !defined(SIMULATOR) | 529 | #ifndef DSP_HAVE_ASM_CROSSFEED |
525 | static const long crossfeed_coefs[6] ICONST_ATTR = { | ||
526 | LOW, LOW_COMP, HIGH_NEG, HIGH_COMP, ATT, ATT_COMP | ||
527 | }; | ||
528 | |||
529 | static void apply_crossfeed(int32_t* src[], int count) | ||
530 | { | ||
531 | asm volatile ( | ||
532 | "lea.l crossfeed_data, %%a1 \n" | ||
533 | "lea.l (16, %%a1), %%a0 \n" | ||
534 | "movem.l (%%a1), %%d0-%%d3 \n" | ||
535 | "move.l (120, %%a1), %%d4 \n" | ||
536 | /* fetch left, right, LOW and LOW_COMP for first iteration */ | ||
537 | "move.l (%[src0]), %%d5 \n" | ||
538 | "move.l (%[src1]), %%d6 \n" | ||
539 | "move.l (%[coef])+, %%a1 \n" | ||
540 | "move.l (%[coef])+, %%a2 \n" | ||
541 | /* Register usage in loop: | ||
542 | * a0 = &delay[0][0], a1 & a2 = coefs | ||
543 | * d0 = low_left, d1 = low_right, | ||
544 | * d2 = high_left, d3 = high_right, | ||
545 | * d4 = delay line index, | ||
546 | * d5 = src[0][i], d6 = src[1][i]. | ||
547 | * The rest are described in asm constraint list. | ||
548 | */ | ||
549 | ".cfloop:" | ||
550 | /* LOW*low_left + LOW_COMP*left */ | ||
551 | "mac.l %%a1, %%d0, %%acc0 \n" | ||
552 | "mac.l %%a2, %%d5, %%acc0 \n" | ||
553 | /* LOW*low_right + LOW_COMP*right */ | ||
554 | "mac.l %%a1, %%d1, (%[coef])+, %%a1, %%acc1 \n" /* a1 = HIGH_NEG */ | ||
555 | "mac.l %%a2, %%d6, (%[coef])+, %%a2, %%acc1 \n" /* a2 = HIGH_COMP */ | ||
556 | "movclr.l %%acc0, %%d0 \n" /* get low_left */ | ||
557 | "movclr.l %%acc1, %%d1 \n" /* get low_right */ | ||
558 | /* HIGH_NEG*high_left + HIGH_COMP*left */ | ||
559 | "mac.l %%a1, %%d2, %%acc0 \n" | ||
560 | "mac.l %%a2, %%d5, %%acc0 \n" | ||
561 | /* HIGH_NEG*high_right + HIGH_COMP*right */ | ||
562 | "mac.l %%a1, %%d3, (%[coef])+, %%a1, %%acc1 \n" /* a1 = ATT */ | ||
563 | "mac.l %%a2, %%d6, (%[coef])+, %%a2, %%acc1 \n" /* a2 = ATT_COMP */ | ||
564 | "lea.l (-6*4, %[coef]), %[coef] \n" /* coef = &coefs[0] */ | ||
565 | "move.l (%%a0, %%d4*4), %%a3 \n" /* a3=delay[0][idx] */ | ||
566 | "move.l (52, %%a0, %%d4*4), %%d5 \n" /* d5=delay[1][idx] */ | ||
567 | "movclr.l %%acc0, %%d2 \n" /* get high_left */ | ||
568 | "movclr.l %%acc1, %%d3 \n" /* get high_right */ | ||
569 | /* ATT*delay_r + ATT_COMP*high_left */ | ||
570 | "mac.l %%a1, %%d5, (4, %[src0]), %%d5, %%acc0\n" /* d5 = src[0][i+1] */ | ||
571 | "mac.l %%a2, %%d2, (4, %[src1]), %%d6, %%acc0\n" /* d6 = src[1][i+1] */ | ||
572 | /* ATT*delay_l + ATT_COMP*high_right */ | ||
573 | "mac.l %%a1, %%a3, (%[coef])+, %%a1, %%acc1 \n" /* a1 = LOW */ | ||
574 | "mac.l %%a2, %%d3, (%[coef])+, %%a2, %%acc1 \n" /* a2 = LOW_COMP */ | ||
575 | |||
576 | /* save crossfed samples to output */ | ||
577 | "movclr.l %%acc0, %%a3 \n" | ||
578 | "move.l %%a3, (%[src0])+ \n" /* src[0][i++] = out_l */ | ||
579 | "movclr.l %%acc1, %%a3 \n" | ||
580 | "move.l %%a3, (%[src1])+ \n" /* src[1][i++] = out_r */ | ||
581 | "move.l %%d0, (%%a0, %%d4*4) \n" /* delay[0][index] = low_left */ | ||
582 | "move.l %%d1, (52, %%a0, %%d4*4)\n" /* delay[1][index] = low_right */ | ||
583 | "addq.l #1, %%d4 \n" /* index++ */ | ||
584 | "cmp.l #13, %%d4 \n" /* if (index >= 13) { */ | ||
585 | "jlt .nowrap \n" | ||
586 | "clr.l %%d4 \n" /* index = 0 */ | ||
587 | ".nowrap: \n" /* } */ | ||
588 | "subq.l #1, %[count] \n" | ||
589 | "jne .cfloop \n" | ||
590 | /* save data back to struct */ | ||
591 | "lea.l crossfeed_data, %%a1 \n" | ||
592 | "movem.l %%d0-%%d3, (%%a1) \n" | ||
593 | "move.l %%d4, (120, %%a1) \n" | ||
594 | /* NOTE: We _just_ have enough registers for our use here, clobber just | ||
595 | one more and GCC will fail. */ | ||
596 | : | ||
597 | : [count] "d" (count), | ||
598 | [src0] "a" (src[0]), [src1] "a" (src[1]), [coef] "a" (crossfeed_coefs) | ||
599 | : "d0", "d1", "d2", "d3", "d4", "d5", "d6", | ||
600 | "a0", "a1", "a2", "a3" | ||
601 | ); | ||
602 | } | ||
603 | #else | ||
604 | static void apply_crossfeed(int32_t* src[], int count) | 530 | static void apply_crossfeed(int32_t* src[], int count) |
605 | { | 531 | { |
606 | int32_t a; /* accumulator */ | 532 | int32_t a; /* accumulator */ |
diff --git a/apps/dsp_asm.h b/apps/dsp_asm.h new file mode 100644 index 0000000000..ad08bc9f4b --- /dev/null +++ b/apps/dsp_asm.h | |||
@@ -0,0 +1,31 @@ | |||
1 | /*************************************************************************** | ||
2 | * __________ __ ___. | ||
3 | * Open \______ \ ____ ____ | | _\_ |__ _______ ___ | ||
4 | * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / | ||
5 | * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < | ||
6 | * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ | ||
7 | * \/ \/ \/ \/ \/ | ||
8 | * $Id$ | ||
9 | * | ||
10 | * Copyright (C) 2006 Thom Johansen | ||
11 | * | ||
12 | * All files in this archive are subject to the GNU General Public License. | ||
13 | * See the file COPYING in the source tree root for full license agreement. | ||
14 | * | ||
15 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY | ||
16 | * KIND, either express or implied. | ||
17 | * | ||
18 | ****************************************************************************/ | ||
19 | |||
20 | #include <config.h> | ||
21 | |||
22 | #ifndef _DSP_ASM_H | ||
23 | #define _DSP_ASM_H | ||
24 | |||
25 | #ifdef CPU_COLDFIRE | ||
26 | #define DSP_HAVE_ASM_CROSSFEED | ||
27 | void apply_crossfeed(int32_t* src[], int count); | ||
28 | #endif | ||
29 | |||
30 | #endif | ||
31 | |||
diff --git a/apps/dsp_cf.S b/apps/dsp_cf.S new file mode 100644 index 0000000000..6147ebeea7 --- /dev/null +++ b/apps/dsp_cf.S | |||
@@ -0,0 +1,104 @@ | |||
1 | /*************************************************************************** | ||
2 | * __________ __ ___. | ||
3 | * Open \______ \ ____ ____ | | _\_ |__ _______ ___ | ||
4 | * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / | ||
5 | * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < | ||
6 | * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ | ||
7 | * \/ \/ \/ \/ \/ | ||
8 | * $Id$ | ||
9 | * | ||
10 | * Copyright (C) 2006 Thom Johansen | ||
11 | * | ||
12 | * All files in this archive are subject to the GNU General Public License. | ||
13 | * See the file COPYING in the source tree root for full license agreement. | ||
14 | * | ||
15 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY | ||
16 | * KIND, either express or implied. | ||
17 | * | ||
18 | ****************************************************************************/ | ||
19 | |||
20 | .section .idata,"aw",@progbits | ||
21 | crossfeed_coefs: | ||
22 | .long 0x4CCCCCCD | LOW | ||
23 | .long 0x33333333 | LOW_COMP | ||
24 | .long -0x66666666 | HIGH_NEG | ||
25 | .long 0x66666666 | HIGH_COMP | ||
26 | .long 0x0CCCCCCD | ATT | ||
27 | .long 0x73333333 | ATT_COMP | ||
28 | |||
29 | .section .text | ||
30 | .global apply_crossfeed | ||
31 | apply_crossfeed: | ||
32 | lea.l (-44, %sp), %sp | ||
33 | movem.l %d2-%d7/%a2-%a6, (%sp) | save all regs | ||
34 | move.l (44+4, %sp), %a4 | ||
35 | movem.l (%a4), %a4-%a5 | a4 = src[0], a5 = src[1] | ||
36 | move.l (44+8, %sp), %d7 | d7 = count | ||
37 | |||
38 | lea.l crossfeed_data, %a1 | ||
39 | lea.l crossfeed_coefs, %a6 | ||
40 | lea.l (16, %a1), %a0 | a0 = &delay[0][0] | ||
41 | movem.l (%a1), %d0-%d3 | fetch filter history samples | ||
42 | move.l (120, %a1), %d4 | fetch delay line index | ||
43 | move.l (%a4), %d5 | d5 = left sample | ||
44 | move.l (%a5), %d6 | d6 = right sample | ||
45 | move.l (%a6)+, %a1 | a1 = LOW value | ||
46 | move.l (%a6)+, %a2 | a2 = LOW_COMP value | ||
47 | /* Register usage in loop: | ||
48 | * a0 = &delay[0][0], a1 & a2 = coefs, a3 = temp storage, | ||
49 | * a4 = src[0], a5 = src[1], a6 = &crossfeed_coefs[0], | ||
50 | * d0 = low_left, d1 = low_right, | ||
51 | * d2 = high_left, d3 = high_right, | ||
52 | * d4 = delay line index, | ||
53 | * d5 = src[0][i], d6 = src[1][i]. | ||
54 | * d7 = count | ||
55 | */ | ||
56 | .cfloop: | ||
57 | | LOW*low_left + LOW_COMP*left | ||
58 | mac.l %a1, %d0, %acc0 | ||
59 | mac.l %a2, %d5, %acc0 | ||
60 | | LOW*low_right + LOW_COMP*right | ||
61 | mac.l %a1, %d1, (%a6)+, %a1, %acc1 | a1 = HIGH_NEG | ||
62 | mac.l %a2, %d6, (%a6)+, %a2, %acc1 | a2 = HIGH_COMP | ||
63 | movclr.l %acc0, %d0 | get low_left | ||
64 | movclr.l %acc1, %d1 | get low_right | ||
65 | | HIGH_NEG*high_left + HIGH_COMP*left | ||
66 | mac.l %a1, %d2, %acc0 | ||
67 | mac.l %a2, %d5, %acc0 | ||
68 | | HIGH_NEG*high_right + HIGH_COMP*right | ||
69 | mac.l %a1, %d3, (%a6)+, %a1, %acc1 | a1 = ATT | ||
70 | mac.l %a2, %d6, (%a6)+, %a2, %acc1 | a2 = ATT_COMP | ||
71 | lea.l (-6*4, %a6), %a6 | coef = &coefs[0] | ||
72 | move.l (%a0, %d4*4), %a3 | a3 = delay[0][idx] | ||
73 | move.l (52, %a0, %d4*4), %d5 | d5 = delay[1][idx] | ||
74 | movclr.l %acc0, %d2 | get high_left | ||
75 | movclr.l %acc1, %d3 | get high_right | ||
76 | | ATT*delay_r + ATT_COMP*high_left | ||
77 | mac.l %a1, %d5, (4, %a4), %d5, %acc0 | d5 = src[0][i+1] | ||
78 | mac.l %a2, %d2, (4, %a5), %d6, %acc0 | d6 = src[1][i+1] | ||
79 | | ATT*delay_l + ATT_COMP*high_right | ||
80 | mac.l %a1, %a3, (%a6)+, %a1, %acc1 | a1 = LOW | ||
81 | mac.l %a2, %d3, (%a6)+, %a2, %acc1 | a2 = LOW_COMP | ||
82 | |||
83 | | save crossfed samples to output | ||
84 | movclr.l %acc0, %a3 | ||
85 | move.l %a3, (%a4)+ | src[0][i++] = out_l | ||
86 | movclr.l %acc1, %a3 | ||
87 | move.l %a3, (%a5)+ | src[1][i++] = out_r | ||
88 | move.l %d0, (%a0, %d4*4) | delay[0][index] = low_left | ||
89 | move.l %d1, (52, %a0, %d4*4) | delay[1][index] = low_right */ | ||
90 | addq.l #1, %d4 | index++ */ | ||
91 | cmp.l #13, %d4 | if (index >= 13) { | ||
92 | jlt .nowrap | ||
93 | clr.l %d4 | index = 0 | ||
94 | .nowrap: | } | ||
95 | subq.l #1, %d7 | ||
96 | jne .cfloop | ||
97 | | save data back to struct | ||
98 | lea.l crossfeed_data, %a1 | ||
99 | movem.l %d0-%d3, (%a1) | ||
100 | move.l %d4, (120, %a1) | ||
101 | movem.l (%sp), %d2-%d7/%a2-%a6 | ||
102 | lea.l (44, %sp), %sp | ||
103 | rts | ||
104 | |||