summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndree Buschmann <AndreeBuschmann@t-online.de>2008-03-19 13:55:53 +0000
committerAndree Buschmann <AndreeBuschmann@t-online.de>2008-03-19 13:55:53 +0000
commitfd052ec753cade16675e211ced0a2be19c0d545f (patch)
tree094375afe1644abe2a312bb7feee885dcbdb64c0
parent178df1cfcfa529c58ad37922d6d934e1e0328fc5 (diff)
downloadrockbox-fd052ec753cade16675e211ced0a2be19c0d545f.tar.gz
rockbox-fd052ec753cade16675e211ced0a2be19c0d545f.zip
Commit FS#8750. Add ARM assembler for the dsp-functions channels_process_sound_chan_mono(), channels_process_sound_chan_karaoke(), sample_output_mono() and sample_output_stereo(). By measurement the speed up is ~75% for the first three functions and ~40% for sample_output_stereo(). Additionally avoid calling yield() to often in dsp.c -- it is now limited to once per tick.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@16717 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/dsp.c9
-rw-r--r--apps/dsp_arm.S177
-rw-r--r--apps/dsp_asm.h4
3 files changed, 189 insertions, 1 deletions
diff --git a/apps/dsp.c b/apps/dsp.c
index 3c2d7f63b1..5bbbe08ac2 100644
--- a/apps/dsp.c
+++ b/apps/dsp.c
@@ -1112,6 +1112,7 @@ int dsp_callback(int msg, intptr_t param)
1112int dsp_process(struct dsp_config *dsp, char *dst, const char *src[], int count) 1112int dsp_process(struct dsp_config *dsp, char *dst, const char *src[], int count)
1113{ 1113{
1114 int32_t *tmp[2]; 1114 int32_t *tmp[2];
1115 long last_yield = current_tick;
1115 int written = 0; 1116 int written = 0;
1116 int samples; 1117 int samples;
1117 1118
@@ -1159,7 +1160,13 @@ int dsp_process(struct dsp_config *dsp, char *dst, const char *src[], int count)
1159 1160
1160 written += samples; 1161 written += samples;
1161 dst += samples * sizeof (int16_t) * 2; 1162 dst += samples * sizeof (int16_t) * 2;
1162 yield(); 1163
1164 /* yield at least once each tick */
1165 if (current_tick > last_yield)
1166 {
1167 yield();
1168 last_yield = current_tick;
1169 }
1163 } 1170 }
1164 1171
1165#if defined(CPU_COLDFIRE) 1172#if defined(CPU_COLDFIRE)
diff --git a/apps/dsp_arm.S b/apps/dsp_arm.S
index c3e5c7cd05..751e0f5130 100644
--- a/apps/dsp_arm.S
+++ b/apps/dsp_arm.S
@@ -18,6 +18,183 @@
18 ****************************************************************************/ 18 ****************************************************************************/
19 19
20/**************************************************************************** 20/****************************************************************************
21 * void channels_process_sound_chan_mono(int count, int32_t *buf[])
22 *
23 * NOTE: The following code processes two samples at once. When count is odd,
24 * there is an additional obsolete sample processed, which will not be
25 * used by the calling functions.
26 */
27 .section .icode, "ax", %progbits
28 .align 2
29 .global channels_process_sound_chan_mono
30 .type channels_process_sound_chan_mono, %function
31channels_process_sound_chan_mono:
32 @ input: r0 = count, r1 = buf
33 stmfd sp!, {r4-r6, lr}
34 ldmia r1, {r2-r3} @ r4 = buf[0], r5 = buf[1]
35
36.monoloop:
37 ldmia r2, {r4-r5}
38 ldmia r3, {r6,lr}
39 mov r4, r4, asr #1 @ r4 = r4/2
40 add r4, r4, r6, asr #1 @ r4 = r4 + r6/2 = (buf[0]+buf[1])/2
41 mov r5, r5, asr #1 @ r5 = r5/2
42 add r5, r5, lr, asr #1 @ r5 = r5 + lr/2 = (buf[0]+buf[1])/2
43 stmia r2!, {r4-r5}
44 stmia r3!, {r4-r5}
45 subs r0, r0, #2
46 bgt .monoloop
47
48 ldmfd sp!, {r4-r6, pc}
49.monoend:
50 .size channels_process_sound_chan_mono,.monoend-channels_process_sound_chan_mono
51
52/****************************************************************************
53 * void channels_process_sound_chan_karaoke(int count, int32_t *buf[])
54 * NOTE: The following code processes two samples at once. When count is odd,
55 * there is an additional obsolete sample processed, which will not be
56 * used by the calling functions.
57 */
58 .section .icode, "ax", %progbits
59 .align 2
60 .global channels_process_sound_chan_karaoke
61 .type channels_process_sound_chan_karaoke, %function
62channels_process_sound_chan_karaoke:
63 @ input: r0 = count, r1 = buf
64 stmfd sp!, {r4-r6, lr}
65 ldmia r1, {r2-r3} @ r4 = buf[0], r5 = buf[1]
66
67.karaokeloop:
68 ldmia r2, {r4-r5}
69 ldmia r3, {r6,lr}
70 mov r6, r6, asr #1 @ r6 = r6/2
71 rsb r4, r6, r4, asr #1 @ r4 = -r6 + r4/2 = (buf[0]-buf[1])/2
72 rsb r6, r4, #0 @ r6 = -r4
73 mov lr, lr, asr #1 @ lr = lr/2
74 rsb r5, lr, r5, asr #1 @ r5 = -lr + r5/2 = (buf[0]-buf[1])/2
75 rsb lr, r5, #0 @ lr = -r5
76 stmia r2!, {r4-r5}
77 stmia r3!, {r6,lr}
78 subs r0, r0, #2
79 bgt .karaokeloop
80
81 ldmfd sp!, {r4-r6, pc}
82.karaokeend:
83 .size channels_process_sound_chan_karaoke,.karaokeend-channels_process_sound_chan_karaoke
84
85/****************************************************************************
86 * void sample_output_mono(int count, struct dsp_data *data,
87 int32_t *src[], int16_t *dst)
88 * NOTE: The following code processes two samples at once. When count is odd,
89 * there is an additional obsolete sample processed, which will not be
90 * used by the calling functions.
91 */
92 .section .icode, "ax", %progbits
93 .align 2
94 .global sample_output_mono
95 .type sample_output_mono, %function
96sample_output_mono:
97 @ input: r0 = count, r1 = data, r2 = src, r3 = dst
98 stmfd sp!, {r4-r9, lr}
99
100 ldr r4, [r2] @ r4 = src[0]
101 ldr r5, [r1] @ lr = data->output_scale
102 sub r1, r5, #1 @ r1 = r5-1
103 mov r2, #1
104 mov r2, r2, asl r1 @ r2 = 1<<r1 = 1 << (scale-1)
105 mvn r1, #0x8000 @ r1 needed for clipping
106 mov r8, #0xff00
107 orr r8, r8, #0xff @ r8 needed for masking
108
109.somloop:
110 ldmia r4!, {r6-r7}
111 add r6, r6, r2
112 mov r6, r6, asr r5 @ r6 = (r6 + 1<<(scale-1)) >> scale
113 mov lr, r6, asr #15
114 teq lr, lr, asr #31
115 eorne r6, r1, lr, asr #31 @ Clip (-32768...+32767)
116 add r7, r7, r2
117 mov r7, r7, asr r5 @ r7 = (r7 + 1<<(scale-1)) >> scale
118 mov lr, r7, asr #15
119 teq lr, lr, asr #31
120 eorne r7, r1, lr, asr #31 @ Clip (-32768...+32767)
121
122 and r6, r6, r8
123 orr r6, r6, r6, asl #16 @ pack first 2 halfwords into 1 word
124 and r7, r7, r8
125 orr r7, r7, r7, asl #16 @ pack last 2 halfwords into 1 word
126 stmia r3!, {r6-r7}
127
128 subs r0, r0, #2
129 bgt .somloop
130
131 ldmfd sp!, {r4-r9, pc}
132.somend:
133 .size sample_output_mono,.somend-sample_output_mono
134
135/****************************************************************************
136 * void sample_output_stereo(int count, struct dsp_data *data,
137 int32_t *src[], int16_t *dst)
138 * NOTE: The following code processes two samples at once. When count is odd,
139 * there is an additional obsolete sample processed, which will not be
140 * used by the calling functions.
141 */
142 .section .icode, "ax", %progbits
143 .align 2
144 .global sample_output_stereo
145 .type sample_output_stereo, %function
146sample_output_stereo:
147 @ input: r0 = count, r1 = data, r2 = src, r3 = dst
148 stmfd sp!, {r4-r11, lr}
149
150 ldmia r2, {r4-r5} @ r4 = src[0], r5 = src[1]
151 ldr r6, [r1] @ r6 = data->output_scale
152 sub r1, r6, #1 @ r1 = r6-1
153 mov r2, #1
154 mov r2, r2, asl r1 @ r2 = 1<<r1 = 1 << (scale-1)
155 mvn r1, #0x8000 @ r1 needed for clipping
156 mov r11, #0xff00
157 orr r11, r11, #0xff @ r11 needed for masking
158
159.sosloop:
160 ldmia r4!, {r7-r8}
161 add r7, r7, r2
162 mov r7, r7, asr r6 @ r7 = (r7 + 1<<(scale-1)) >> scale
163 mov lr, r7, asr #15
164 teq lr, lr, asr #31
165 eorne r7, r1, lr, asr #31 @ Clip (-32768...+32767)
166 add r8, r8, r2
167 mov r8, r8, asr r6 @ r8 = (r8 + 1<<(scale-1)) >> scale
168 mov lr, r8, asr #15
169 teq lr, lr, asr #31
170 eorne r8, r1, lr, asr #31 @ Clip (-32768...+32767)
171
172 ldmia r5!, {r9-r10}
173 add r9, r9, r2
174 mov r9, r9, asr r6 @ r9 = (r9 + 1<<(scale-1)) >> scale
175 mov lr, r9, asr #15
176 teq lr, lr, asr #31
177 eorne r9, r1, lr, asr #31 @ Clip (-32768...+32767)
178 add r10, r10, r2
179 mov r10, r10, asr r6 @ r10 = (r10 + 1<<(scale-1)) >> scale
180 mov lr, r10, asr #15
181 teq lr, lr, asr #31
182 eorne r10, r1, lr, asr #31 @ Clip (-32768...+32767)
183
184 and r7, r7, r11
185 orr r9, r7, r9, asl #16 @ pack first 2 halfwords into 1 word
186 and r8, r8, r11
187 orr r10, r8, r10, asl #16 @ pack last 2 halfwords into 1 word
188 stmia r3!, {r9-r10}
189
190 subs r0, r0, #2
191 bgt .sosloop
192
193 ldmfd sp!, {r4-r11, pc}
194.sosend:
195 .size sample_output_stereo,.sosend-sample_output_stereo
196
197/****************************************************************************
21 * void apply_crossfeed(int count, int32_t* src[]) 198 * void apply_crossfeed(int count, int32_t* src[])
22 */ 199 */
23 .section .text 200 .section .text
diff --git a/apps/dsp_asm.h b/apps/dsp_asm.h
index 02307dbd89..9c40dee8b3 100644
--- a/apps/dsp_asm.h
+++ b/apps/dsp_asm.h
@@ -26,6 +26,10 @@
26#if defined(CPU_ARM) 26#if defined(CPU_ARM)
27#define DSP_HAVE_ASM_RESAMPLING 27#define DSP_HAVE_ASM_RESAMPLING
28#define DSP_HAVE_ASM_CROSSFEED 28#define DSP_HAVE_ASM_CROSSFEED
29#define DSP_HAVE_ASM_SOUND_CHAN_MONO
30#define DSP_HAVE_ASM_SOUND_CHAN_KARAOKE
31#define DSP_HAVE_ASM_SAMPLE_OUTPUT_MONO
32#define DSP_HAVE_ASM_SAMPLE_OUTPUT_STEREO
29#elif defined (CPU_COLDFIRE) 33#elif defined (CPU_COLDFIRE)
30#define DSP_HAVE_ASM_APPLY_GAIN 34#define DSP_HAVE_ASM_APPLY_GAIN
31#define DSP_HAVE_ASM_RESAMPLING 35#define DSP_HAVE_ASM_RESAMPLING