summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThom Johansen <thomj@rockbox.org>2006-03-23 19:59:52 +0000
committerThom Johansen <thomj@rockbox.org>2006-03-23 19:59:52 +0000
commitcc94ae45a7f3b377a359f9652340e8c847da5ac6 (patch)
tree2b5979b5469cf2c690b644b819446372146343ab
parent5f9bd4fb987049ecea077c0a1039f0eee31c118c (diff)
downloadrockbox-cc94ae45a7f3b377a359f9652340e8c847da5ac6.tar.gz
rockbox-cc94ae45a7f3b377a359f9652340e8c847da5ac6.zip
Keep assembler versions of DSP routines in dsp_arch.S files. This also
solves the annoying register allocator problem when not using -fomit-frame-pointer for the Coldfire assembler optimised crossfeed routine. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@9215 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/SOURCES1
-rw-r--r--apps/dsp.c92
-rw-r--r--apps/dsp_asm.h31
-rw-r--r--apps/dsp_cf.S104
4 files changed, 145 insertions, 83 deletions
diff --git a/apps/SOURCES b/apps/SOURCES
index f34cac9654..dee4e6e002 100644
--- a/apps/SOURCES
+++ b/apps/SOURCES
@@ -75,6 +75,7 @@ codecs.c
75dsp.c 75dsp.c
76eq.c 76eq.c
77#if defined(CPU_COLDFIRE) && !defined(SIMULATOR) 77#if defined(CPU_COLDFIRE) && !defined(SIMULATOR)
78dsp_cf.S
78eq_cf.S 79eq_cf.S
79#elif defined(CPU_ARM) && !defined(SIMULATOR) 80#elif defined(CPU_ARM) && !defined(SIMULATOR)
80eq_arm.S 81eq_arm.S
diff --git a/apps/dsp.c b/apps/dsp.c
index c5bbc8fbe9..6e6f702a30 100644
--- a/apps/dsp.c
+++ b/apps/dsp.c
@@ -28,6 +28,10 @@
28#include "replaygain.h" 28#include "replaygain.h"
29#include "debug.h" 29#include "debug.h"
30 30
31#ifndef SIMULATOR
32#include <dsp_asm.h>
33#endif
34
31/* The "dither" code to convert the 24-bit samples produced by libmad was 35/* The "dither" code to convert the 24-bit samples produced by libmad was
32 * taken from the coolplayer project - coolplayer.sourceforge.net 36 * taken from the coolplayer project - coolplayer.sourceforge.net
33 */ 37 */
@@ -517,90 +521,12 @@ static long dither_sample(int32_t sample, int32_t bias, int32_t mask,
517 return output; 521 return output;
518} 522}
519 523
520/* Apply a constant gain to the samples (e.g., for ReplayGain). May update 524/* Applies crossfeed to the stereo signal in src.
521 * the src array if gain was applied. 525 * Crossfeed is a process where listening over speakers is simulated. This
522 * Note that this must be called before the resampler. 526 * is good for old hard panned stereo records, which might be quite fatiguing
527 * to listen to on headphones with no crossfeed.
523 */ 528 */
524#if defined(CPU_COLDFIRE) && !defined(SIMULATOR) 529#ifndef DSP_HAVE_ASM_CROSSFEED
525static const long crossfeed_coefs[6] ICONST_ATTR = {
526 LOW, LOW_COMP, HIGH_NEG, HIGH_COMP, ATT, ATT_COMP
527};
528
529static void apply_crossfeed(int32_t* src[], int count)
530{
531 asm volatile (
532 "lea.l crossfeed_data, %%a1 \n"
533 "lea.l (16, %%a1), %%a0 \n"
534 "movem.l (%%a1), %%d0-%%d3 \n"
535 "move.l (120, %%a1), %%d4 \n"
536 /* fetch left, right, LOW and LOW_COMP for first iteration */
537 "move.l (%[src0]), %%d5 \n"
538 "move.l (%[src1]), %%d6 \n"
539 "move.l (%[coef])+, %%a1 \n"
540 "move.l (%[coef])+, %%a2 \n"
541 /* Register usage in loop:
542 * a0 = &delay[0][0], a1 & a2 = coefs
543 * d0 = low_left, d1 = low_right,
544 * d2 = high_left, d3 = high_right,
545 * d4 = delay line index,
546 * d5 = src[0][i], d6 = src[1][i].
547 * The rest are described in asm constraint list.
548 */
549 ".cfloop:"
550 /* LOW*low_left + LOW_COMP*left */
551 "mac.l %%a1, %%d0, %%acc0 \n"
552 "mac.l %%a2, %%d5, %%acc0 \n"
553 /* LOW*low_right + LOW_COMP*right */
554 "mac.l %%a1, %%d1, (%[coef])+, %%a1, %%acc1 \n" /* a1 = HIGH_NEG */
555 "mac.l %%a2, %%d6, (%[coef])+, %%a2, %%acc1 \n" /* a2 = HIGH_COMP */
556 "movclr.l %%acc0, %%d0 \n" /* get low_left */
557 "movclr.l %%acc1, %%d1 \n" /* get low_right */
558 /* HIGH_NEG*high_left + HIGH_COMP*left */
559 "mac.l %%a1, %%d2, %%acc0 \n"
560 "mac.l %%a2, %%d5, %%acc0 \n"
561 /* HIGH_NEG*high_right + HIGH_COMP*right */
562 "mac.l %%a1, %%d3, (%[coef])+, %%a1, %%acc1 \n" /* a1 = ATT */
563 "mac.l %%a2, %%d6, (%[coef])+, %%a2, %%acc1 \n" /* a2 = ATT_COMP */
564 "lea.l (-6*4, %[coef]), %[coef] \n" /* coef = &coefs[0] */
565 "move.l (%%a0, %%d4*4), %%a3 \n" /* a3=delay[0][idx] */
566 "move.l (52, %%a0, %%d4*4), %%d5 \n" /* d5=delay[1][idx] */
567 "movclr.l %%acc0, %%d2 \n" /* get high_left */
568 "movclr.l %%acc1, %%d3 \n" /* get high_right */
569 /* ATT*delay_r + ATT_COMP*high_left */
570 "mac.l %%a1, %%d5, (4, %[src0]), %%d5, %%acc0\n" /* d5 = src[0][i+1] */
571 "mac.l %%a2, %%d2, (4, %[src1]), %%d6, %%acc0\n" /* d6 = src[1][i+1] */
572 /* ATT*delay_l + ATT_COMP*high_right */
573 "mac.l %%a1, %%a3, (%[coef])+, %%a1, %%acc1 \n" /* a1 = LOW */
574 "mac.l %%a2, %%d3, (%[coef])+, %%a2, %%acc1 \n" /* a2 = LOW_COMP */
575
576 /* save crossfed samples to output */
577 "movclr.l %%acc0, %%a3 \n"
578 "move.l %%a3, (%[src0])+ \n" /* src[0][i++] = out_l */
579 "movclr.l %%acc1, %%a3 \n"
580 "move.l %%a3, (%[src1])+ \n" /* src[1][i++] = out_r */
581 "move.l %%d0, (%%a0, %%d4*4) \n" /* delay[0][index] = low_left */
582 "move.l %%d1, (52, %%a0, %%d4*4)\n" /* delay[1][index] = low_right */
583 "addq.l #1, %%d4 \n" /* index++ */
584 "cmp.l #13, %%d4 \n" /* if (index >= 13) { */
585 "jlt .nowrap \n"
586 "clr.l %%d4 \n" /* index = 0 */
587 ".nowrap: \n" /* } */
588 "subq.l #1, %[count] \n"
589 "jne .cfloop \n"
590 /* save data back to struct */
591 "lea.l crossfeed_data, %%a1 \n"
592 "movem.l %%d0-%%d3, (%%a1) \n"
593 "move.l %%d4, (120, %%a1) \n"
594 /* NOTE: We _just_ have enough registers for our use here, clobber just
595 one more and GCC will fail. */
596 :
597 : [count] "d" (count),
598 [src0] "a" (src[0]), [src1] "a" (src[1]), [coef] "a" (crossfeed_coefs)
599 : "d0", "d1", "d2", "d3", "d4", "d5", "d6",
600 "a0", "a1", "a2", "a3"
601 );
602}
603#else
604static void apply_crossfeed(int32_t* src[], int count) 530static void apply_crossfeed(int32_t* src[], int count)
605{ 531{
606 int32_t a; /* accumulator */ 532 int32_t a; /* accumulator */
diff --git a/apps/dsp_asm.h b/apps/dsp_asm.h
new file mode 100644
index 0000000000..ad08bc9f4b
--- /dev/null
+++ b/apps/dsp_asm.h
@@ -0,0 +1,31 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2006 Thom Johansen
11 *
12 * All files in this archive are subject to the GNU General Public License.
13 * See the file COPYING in the source tree root for full license agreement.
14 *
15 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
16 * KIND, either express or implied.
17 *
18 ****************************************************************************/
19
20#include <config.h>
21
22#ifndef _DSP_ASM_H
23#define _DSP_ASM_H
24
25#ifdef CPU_COLDFIRE
26#define DSP_HAVE_ASM_CROSSFEED
27void apply_crossfeed(int32_t* src[], int count);
28#endif
29
30#endif
31
diff --git a/apps/dsp_cf.S b/apps/dsp_cf.S
new file mode 100644
index 0000000000..6147ebeea7
--- /dev/null
+++ b/apps/dsp_cf.S
@@ -0,0 +1,104 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2006 Thom Johansen
11 *
12 * All files in this archive are subject to the GNU General Public License.
13 * See the file COPYING in the source tree root for full license agreement.
14 *
15 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
16 * KIND, either express or implied.
17 *
18 ****************************************************************************/
19
20 .section .idata,"aw",@progbits
21crossfeed_coefs:
22 .long 0x4CCCCCCD | LOW
23 .long 0x33333333 | LOW_COMP
24 .long -0x66666666 | HIGH_NEG
25 .long 0x66666666 | HIGH_COMP
26 .long 0x0CCCCCCD | ATT
27 .long 0x73333333 | ATT_COMP
28
29 .section .text
30 .global apply_crossfeed
31apply_crossfeed:
32 lea.l (-44, %sp), %sp
33 movem.l %d2-%d7/%a2-%a6, (%sp) | save all regs
34 move.l (44+4, %sp), %a4
35 movem.l (%a4), %a4-%a5 | a4 = src[0], a5 = src[1]
36 move.l (44+8, %sp), %d7 | d7 = count
37
38 lea.l crossfeed_data, %a1
39 lea.l crossfeed_coefs, %a6
40 lea.l (16, %a1), %a0 | a0 = &delay[0][0]
41 movem.l (%a1), %d0-%d3 | fetch filter history samples
42 move.l (120, %a1), %d4 | fetch delay line index
43 move.l (%a4), %d5 | d5 = left sample
44 move.l (%a5), %d6 | d6 = right sample
45 move.l (%a6)+, %a1 | a1 = LOW value
46 move.l (%a6)+, %a2 | a2 = LOW_COMP value
47 /* Register usage in loop:
48 * a0 = &delay[0][0], a1 & a2 = coefs, a3 = temp storage,
49 * a4 = src[0], a5 = src[1], a6 = &crossfeed_coefs[0],
50 * d0 = low_left, d1 = low_right,
51 * d2 = high_left, d3 = high_right,
52 * d4 = delay line index,
53 * d5 = src[0][i], d6 = src[1][i].
54 * d7 = count
55 */
56.cfloop:
57 | LOW*low_left + LOW_COMP*left
58 mac.l %a1, %d0, %acc0
59 mac.l %a2, %d5, %acc0
60 | LOW*low_right + LOW_COMP*right
61 mac.l %a1, %d1, (%a6)+, %a1, %acc1 | a1 = HIGH_NEG
62 mac.l %a2, %d6, (%a6)+, %a2, %acc1 | a2 = HIGH_COMP
63 movclr.l %acc0, %d0 | get low_left
64 movclr.l %acc1, %d1 | get low_right
65 | HIGH_NEG*high_left + HIGH_COMP*left
66 mac.l %a1, %d2, %acc0
67 mac.l %a2, %d5, %acc0
68 | HIGH_NEG*high_right + HIGH_COMP*right
69 mac.l %a1, %d3, (%a6)+, %a1, %acc1 | a1 = ATT
70 mac.l %a2, %d6, (%a6)+, %a2, %acc1 | a2 = ATT_COMP
71 lea.l (-6*4, %a6), %a6 | coef = &coefs[0]
72 move.l (%a0, %d4*4), %a3 | a3 = delay[0][idx]
73 move.l (52, %a0, %d4*4), %d5 | d5 = delay[1][idx]
74 movclr.l %acc0, %d2 | get high_left
75 movclr.l %acc1, %d3 | get high_right
76 | ATT*delay_r + ATT_COMP*high_left
77 mac.l %a1, %d5, (4, %a4), %d5, %acc0 | d5 = src[0][i+1]
78 mac.l %a2, %d2, (4, %a5), %d6, %acc0 | d6 = src[1][i+1]
79 | ATT*delay_l + ATT_COMP*high_right
80 mac.l %a1, %a3, (%a6)+, %a1, %acc1 | a1 = LOW
81 mac.l %a2, %d3, (%a6)+, %a2, %acc1 | a2 = LOW_COMP
82
83 | save crossfed samples to output
84 movclr.l %acc0, %a3
85 move.l %a3, (%a4)+ | src[0][i++] = out_l
86 movclr.l %acc1, %a3
87 move.l %a3, (%a5)+ | src[1][i++] = out_r
88 move.l %d0, (%a0, %d4*4) | delay[0][index] = low_left
89 move.l %d1, (52, %a0, %d4*4) | delay[1][index] = low_right */
90 addq.l #1, %d4 | index++ */
91 cmp.l #13, %d4 | if (index >= 13) {
92 jlt .nowrap
93 clr.l %d4 | index = 0
94.nowrap: | }
95 subq.l #1, %d7
96 jne .cfloop
97 | save data back to struct
98 lea.l crossfeed_data, %a1
99 movem.l %d0-%d3, (%a1)
100 move.l %d4, (120, %a1)
101 movem.l (%sp), %d2-%d7/%a2-%a6
102 lea.l (44, %sp), %sp
103 rts
104