summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThom Johansen <thomj@rockbox.org>2007-03-11 23:33:58 +0000
committerThom Johansen <thomj@rockbox.org>2007-03-11 23:33:58 +0000
commit1b05ea8ffe7e2ac36d77c5ff712805f6fb476d1e (patch)
tree4e3b61800a5933055868caf085e8edcb14cd0670
parent1b3fc39a658644b85800a900ab7c56303d163aa9 (diff)
downloadrockbox-1b05ea8ffe7e2ac36d77c5ff712805f6fb476d1e.tar.gz
rockbox-1b05ea8ffe7e2ac36d77c5ff712805f6fb476d1e.zip
ARM assembler for resampling. Should provide some gains, though not huge ones.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@12732 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/dsp_arm.S130
-rw-r--r--apps/dsp_asm.h5
2 files changed, 129 insertions, 6 deletions
diff --git a/apps/dsp_arm.S b/apps/dsp_arm.S
index 27669203f1..c3e5c7cd05 100644
--- a/apps/dsp_arm.S
+++ b/apps/dsp_arm.S
@@ -17,14 +17,14 @@
17 * 17 *
18 ****************************************************************************/ 18 ****************************************************************************/
19 19
20/* 20/****************************************************************************
21 * void apply_crossfeed(int count, int32_t* src[]) 21 * void apply_crossfeed(int count, int32_t* src[])
22 */ 22 */
23 .section .text 23 .section .text
24 .global apply_crossfeed 24 .global apply_crossfeed
25apply_crossfeed: 25apply_crossfeed:
26 @ unfortunately, we ended up in a bit of a register squeeze here, and need 26 @ unfortunately, we ended up in a bit of a register squeeze here, and need
27 @ to keep both the count and the delay line index on the stack :/ 27 @ to keep the count on the stack :/
28 stmdb sp!, { r4-r11, lr } @ stack modified regs 28 stmdb sp!, { r4-r11, lr } @ stack modified regs
29 ldmia r1, { r2-r3 } @ r2 = src[0], r3 = src[1] 29 ldmia r1, { r2-r3 } @ r2 = src[0], r3 = src[1]
30 30
@@ -74,7 +74,131 @@ apply_crossfeed:
74 @ save data back to struct 74 @ save data back to struct
75 ldr r12, =crossfeed_data + 4*4 75 ldr r12, =crossfeed_data + 4*4
76 stmia r12, { r8-r11 } @ save filter history 76 stmia r12, { r8-r11 } @ save filter history
77 str r0, [r12, #30*4] @ save delay line index 77 str r0, [r12, #30*4] @ save delay line index
78 add sp, sp, #8 @ remove temp variables from stack 78 add sp, sp, #8 @ remove temp variables from stack
79 ldmia sp!, { r4-r11, pc } 79 ldmia sp!, { r4-r11, pc }
80.cfend:
81 .size apply_crossfeed,.cfend-apply_crossfeed
82
83/****************************************************************************
84 * int dsp_downsample(int count, struct dsp_data *data,
85 * in32_t *src[], int32_t *dst[])
86 */
87 .section .text
88 .global dsp_downsample
89dsp_downsample:
90 stmdb sp!, { r4-r11, lr } @ stack modified regs
91 ldmib r1, { r5-r6 } @ r5 = num_channels,r6 = resample_data.delta
92 sub r5, r5, #1 @ pre-decrement num_channels for use
93 add r4, r1, #12 @ r4 = &resample_data.phase
94 mov r12, #0xff
95 orr r12, r12, #0xff00 @ r12 = 0xffff
96.dschannel_loop:
97 ldr r1, [r4] @ r1 = resample_data.phase
98 ldr r7, [r2, r5, lsl #2] @ r7 = s = src[ch - 1]
99 ldr r8, [r3, r5, lsl #2] @ r8 = d = dst[ch - 1]
100 add r9, r4, #4 @ r9 = &last_sample[0]
101 ldr r10, [r9, r5, lsl #2] @ r10 = last_sample[ch - 1]
102 sub r11, r0, #1
103 ldr r14, [r7, r11, lsl #2] @ load last sample in s[] ...
104 str r14, [r9, r5, lsl #2] @ and write as next frame's last_sample
105 movs r9, r1, lsr #16 @ r9 = pos = phase >> 16
106 ldreq r11, [r7] @ if pos = 0, load src[0] and jump into loop
107 beq .dsuse_last_start
108 cmp r9, r0 @ if pos >= count, we're already done
109 bge .dsloop_skip
110
111 @ Register usage in loop:
112 @ r0 = count, r1 = phase, r4 = &resample_data.phase, r5 = cur_channel,
113 @ r6 = delta, r7 = s, r8 = d, r9 = pos, r10 = s[pos - 1], r11 = s[pos]
114.dsloop:
115 add r9, r7, r9, lsl #2 @ r9 = &s[pos]
116 ldmda r9, { r10, r11 } @ r10 = s[pos - 1], r11 = s[pos]
117.dsuse_last_start:
118 sub r11, r11, r10 @ r11 = diff = s[pos] - s[pos - 1]
119 @ keep frac in lower bits to take advantage of multiplier early termination
120 and r9, r1, r12 @ frac = phase & 0xffff
121 smull r9, r14, r11, r9
122 add r10, r10, r14, lsl #16
123 add r10, r10, r9, lsr #16 @ r10 = out = s[pos - 1] + frac*diff
124 str r10, [r8], #4 @ *d++ = out
125 add r1, r1, r6 @ phase += delta
126 mov r9, r1, lsr #16 @ pos = phase >> 16
127 cmp r9, r0 @ pos < count?
128 blt .dsloop @ yup, do more samples
129.dsloop_skip:
130 subs r5, r5, #1
131 bpl .dschannel_loop @ if (--ch) >= 0, do another channel
132 sub r1, r1, r0, lsl #16 @ wrap phase back to start
133 str r1, [r4] @ store back
134 ldr r1, [r3] @ r1 = &dst[0]
135 sub r8, r8, r1 @ dst - &dst[0]
136 mov r0, r8, lsr #2 @ convert bytes->samples
137 ldmia sp!, { r4-r11, pc } @ ... and we're out
138.dsend:
139 .size dsp_downsample,.dsend-dsp_downsample
140
141/****************************************************************************
142 * int dsp_upsample(int count, struct dsp_data *dsp,
143 * in32_t *src[], int32_t *dst[])
144 */
145 .section .text
146 .global dsp_upsample
147dsp_upsample:
148 stmdb sp!, { r4-r11, lr } @ stack modified regs
149 ldmib r1, { r5-r6 } @ r5 = num_channels,r6 = resample_data.delta
150 sub r5, r5, #1 @ pre-decrement num_channels for use
151 add r4, r1, #12 @ r4 = &resample_data.phase
152 stmdb sp!, { r0, r4 } @ stack count and &resample_data.phase
153.uschannel_loop:
154 ldr r12, [r4] @ r12 = resample_data.phase
155 mov r1, r12, ror #16 @ swap halfword positions, we'll use carry
156 @ to detect pos increments
157 ldr r7, [r2, r5, lsl #2] @ r7 = s = src[ch - 1]
158 ldr r8, [r3, r5, lsl #2] @ r8 = d = dst[ch - 1]
159 add r9, r4, #4 @ r9 = &last_sample[0]
160 ldr r10, [r9, r5, lsl #2] @ r10 = last_sample[ch - 1]
161 sub r11, r0, #1
162 ldr r14, [r7, r11, lsl #2] @ load last sample in s[] ...
163 str r14, [r9, r5, lsl #2] @ and write as next frame's last_sample
164 add r9, r7, r0, lsl #2 @ r9 = src_end = &src[count]
165 movs r14, r12, lsr #16 @ pos = resample_data.phase >> 16
166 beq .usstart_0 @ pos = 0
167 cmp r14, r0 @ if pos >= count, we're already done
168 bge .usloop_skip
169 add r7, r7, r14, lsl #2 @ r7 = &s[pos]
170 ldr r10, [r7, #-4] @ r11 = s[pos - 1]
171 b .usstart_0
172
173 @ Register usage in loop:
174 @ r0 = count, r1 = phase, r4 = &resample_data.phase, r5 = cur_channel,
175 @ r6 = delta, r7 = s, r8 = d, r9 = src_end, r10 = s[pos - 1], r11 = s[pos]
176.usloop_1:
177 mov r10, r11 @ r10 = previous sample
178.usstart_0:
179 ldr r11, [r7], #4 @ r11 = next sample
180 sub r0, r11, r10 @ r0 = s[pos] - s[pos - 1]
181.usloop_0:
182 mov r4, r1, lsr #16 @ r4 = frac = phase >> 16
183 smull r12, r14, r4, r0
184 add r14, r10, r14, lsl #16
185 add r14, r14, r12, lsr #16 @ r14 = out = s[pos - 1] + frac*diff
186 str r14, [r8], #4 @ *d++ = out
187 adds r1, r1, r6, lsl #16 @ phase += delta << 16
188 bcc .usloop_0 @ if carry is set, pos is incremented
189 cmp r7, r9 @ if s < src_end, do another sample
190 blo .usloop_1
191.usloop_skip:
192 subs r5, r5, #1
193 ldmia sp, { r0, r4 } @ reload count and &resample_data.phase
194 bpl .uschannel_loop @ if (--ch) >= 0, do another channel
195 mov r1, r1, ror #16 @ wrap phase back to start of next frame
196 str r1, [r4] @ store back
197 ldr r1, [r3] @ r1 = &dst[0]
198 sub r8, r8, r1 @ dst - &dst[0]
199 mov r0, r8, lsr #2 @ convert bytes->samples
200 add sp, sp, #8 @ adjust stack for temp variables
201 ldmia sp!, { r4-r11, pc } @ ... and we're out
202.usend:
203 .size dsp_upsample,.usend-dsp_upsample
80 204
diff --git a/apps/dsp_asm.h b/apps/dsp_asm.h
index ee90f5763e..f8df337b37 100644
--- a/apps/dsp_asm.h
+++ b/apps/dsp_asm.h
@@ -27,13 +27,12 @@
27#if defined(CPU_COLDFIRE) || defined(CPU_ARM) 27#if defined(CPU_COLDFIRE) || defined(CPU_ARM)
28#define DSP_HAVE_ASM_CROSSFEED 28#define DSP_HAVE_ASM_CROSSFEED
29void apply_crossfeed(int count, int32_t *buf[]); 29void apply_crossfeed(int count, int32_t *buf[]);
30#endif /* defined(CPU_COLDFIRE) || defined(CPU_ARM) */
31
32#if defined (CPU_COLDFIRE)
33#define DSP_HAVE_ASM_RESAMPLING 30#define DSP_HAVE_ASM_RESAMPLING
34int dsp_downsample(int count, struct dsp_data *data, int32_t *src[], int32_t *dst[]); 31int dsp_downsample(int count, struct dsp_data *data, int32_t *src[], int32_t *dst[]);
35int dsp_upsample(int count, struct dsp_data *data, int32_t *src[], int32_t *dst[]); 32int dsp_upsample(int count, struct dsp_data *data, int32_t *src[], int32_t *dst[]);
33#endif /* defined(CPU_COLDFIRE) || defined(CPU_ARM) */
36 34
35#if defined (CPU_COLDFIRE)
37#define DSP_HAVE_ASM_SOUND_CHAN_MONO 36#define DSP_HAVE_ASM_SOUND_CHAN_MONO
38void channels_process_sound_chan_mono(int count, int32_t *buf[]); 37void channels_process_sound_chan_mono(int count, int32_t *buf[]);
39#define DSP_HAVE_ASM_SOUND_CHAN_CUSTOM 38#define DSP_HAVE_ASM_SOUND_CHAN_CUSTOM