summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Sevakis <jethead71@rockbox.org>2010-05-11 08:40:52 +0000
committerMichael Sevakis <jethead71@rockbox.org>2010-05-11 08:40:52 +0000
commitab4c86cbc6a66b3c1df25676d0682c77a842a4a3 (patch)
tree69ba10984ec23e0e2765c44425d010b88ec8a177
parent156272fced75d2852b2a6c3f68df3d69f0038757 (diff)
downloadrockbox-ab4c86cbc6a66b3c1df25676d0682c77a842a4a3.tar.gz
rockbox-ab4c86cbc6a66b3c1df25676d0682c77a842a4a3.zip
ARM DSP: Make things a little more pipeline friendly. Reduce nonvolatile register stacking where possible. Routines now handle odd sample counts properly and will not over-write in that case. Remove a few pointless labels.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@25943 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/dsp_arm.S364
1 files changed, 218 insertions, 146 deletions
diff --git a/apps/dsp_arm.S b/apps/dsp_arm.S
index f924569bc5..b4871d1506 100644
--- a/apps/dsp_arm.S
+++ b/apps/dsp_arm.S
@@ -33,24 +33,37 @@
33 .type channels_process_sound_chan_mono, %function 33 .type channels_process_sound_chan_mono, %function
34channels_process_sound_chan_mono: 34channels_process_sound_chan_mono:
35 @ input: r0 = count, r1 = buf 35 @ input: r0 = count, r1 = buf
36 stmfd sp!, {r4-r5, lr} 36 stmfd sp!, { r4, lr } @
37 ldmia r1, {r2-r3} @ r4 = buf[0], r5 = buf[1] 37 @
38 38 ldmia r1, { r1, r2 } @ r1 = buf[0], r2 = buf[1]
39.monoloop: 39 subs r0, r0, #1 @ odd: end at 0; even: end at -1
40 ldmia r2, {r4-r5} 40 beq .mono_singlesample @ Zero? Only one sample!
41 ldmia r3, {r12,lr} 41 @
42 mov r4, r4, asr #1 @ r4 = r4/2 42.monoloop: @
43 add r4, r4, r12, asr #1 @ r4 = r4 + r12/2 = (buf[0]+buf[1])/2 43 ldmia r1, { r3, r4 } @ r3, r4 = Li0, Li1
44 mov r5, r5, asr #1 @ r5 = r5/2 44 ldmia r2, { r12, r14 } @ r12, r14 = Ri0, Ri1
45 add r5, r5, lr, asr #1 @ r5 = r5 + lr/2 = (buf[0]+buf[1])/2 45 mov r3, r3, asr #1 @ Mo0 = Li0 / 2 + Ri0 / 2
46 stmia r2!, {r4-r5} 46 mov r4, r4, asr #1 @ Mo1 = Li1 / 2 + Ri1 / 2
47 stmia r3!, {r4-r5} 47 add r12, r3, r12, asr #1 @
48 subs r0, r0, #2 48 add r14, r4, r14, asr #1 @
49 bgt .monoloop 49 subs r0, r0, #2 @
50 50 stmia r1!, { r12, r14 } @ store Mo0, Mo1
51 ldmfd sp!, {r4-r5, pc} 51 stmia r2!, { r12, r14 } @ store Mo0, Mo1
52.monoend: 52 bgt .monoloop @
53 .size channels_process_sound_chan_mono,.monoend-channels_process_sound_chan_mono 53 @
54 ldmltfd sp!, { r4, pc } @ if count was even, we're done
55 @
56.mono_singlesample: @
57 ldr r3, [r1] @ r3 = Ls
58 ldr r12, [r2] @ r12 = Rs
59 mov r3, r3, asr #1 @ Mo = Ls / 2 + Rs / 2
60 add r12, r3, r12, asr #1 @
61 str r12, [r1] @ store Mo
62 str r12, [r2] @ store Mo
63 @
64 ldmfd sp!, { r4, pc } @
65 .size channels_process_sound_chan_mono, \
66 .-channels_process_sound_chan_mono
54 67
55/**************************************************************************** 68/****************************************************************************
56 * void channels_process_sound_chan_karaoke(int count, int32_t *buf[]) 69 * void channels_process_sound_chan_karaoke(int count, int32_t *buf[])
@@ -64,26 +77,40 @@ channels_process_sound_chan_mono:
64 .type channels_process_sound_chan_karaoke, %function 77 .type channels_process_sound_chan_karaoke, %function
65channels_process_sound_chan_karaoke: 78channels_process_sound_chan_karaoke:
66 @ input: r0 = count, r1 = buf 79 @ input: r0 = count, r1 = buf
67 stmfd sp!, {r4-r5, lr} 80 stmfd sp!, { r4, lr } @
68 ldmia r1, {r2-r3} @ r4 = buf[0], r5 = buf[1] 81 @
69 82 ldmia r1, { r1, r2 } @ r1 = buf[0], r2 = buf[1]
70.karaokeloop: 83 subs r0, r0, #1 @ odd: end at 0; even: end at -1
71 ldmia r2, {r4-r5} 84 beq .karaoke_singlesample @ Zero? Only one sample!
72 ldmia r3, {r12,lr} 85 @
73 mov r12, r12, asr #1 @ r12 = r12/2 86.karaokeloop: @
74 rsb r4, r12, r4, asr #1 @ r4 = -r12 + r4/2 = (buf[0]-buf[1])/2 87 ldmia r1, { r3, r4 } @ r3, r4 = Li0, Li1
75 rsb r12, r4, #0 @ r12 = -r4 88 ldmia r2, { r12, r14 } @ r12, r14 = Ri0, Ri1
76 mov lr, lr, asr #1 @ lr = lr/2 89 mov r3, r3, asr #1 @ Lo0 = Li0 / 2 - Ri0 / 2
77 rsb r5, lr, r5, asr #1 @ r5 = -lr + r5/2 = (buf[0]-buf[1])/2 90 mov r4, r4, asr #1 @ Lo1 = Li1 / 2 - Ri1 / 2
78 rsb lr, r5, #0 @ lr = -r5 91 sub r3, r3, r12, asr #1 @
79 stmia r2!, {r4-r5} 92 sub r4, r4, r14, asr #1 @
80 stmia r3!, {r12,lr} 93 rsb r12, r3, #0 @ Ro0 = -Lk0 = Rs0 / 2 - Ls0 / 2
81 subs r0, r0, #2 94 rsb r14, r4, #0 @ Ro1 = -Lk1 = Ri1 / 2 - Li1 / 2
82 bgt .karaokeloop 95 subs r0, r0, #2 @
83 96 stmia r1!, { r3, r4 } @ store Lo0, Lo1
84 ldmfd sp!, {r4-r5, pc} 97 stmia r2!, { r12, r14 } @ store Ro0, Ro1
85.karaokeend: 98 bgt .karaokeloop @
86 .size channels_process_sound_chan_karaoke,.karaokeend-channels_process_sound_chan_karaoke 99 @
100 ldmltfd sp!, { r4, pc } @ if count was even, we're done
101 @
102.karaoke_singlesample: @
103 ldr r3, [r1] @ r3 = Li
104 ldr r12, [r2] @ r12 = Ri
105 mov r3, r3, asr #1 @ Lk = Li / 2 - Ri /2
106 sub r3, r3, r12, asr #1 @
107 rsb r12, r3, #0 @ Rk = -Lo = Ri / 2 - Li / 2
108 str r3, [r1] @ store Lo
109 str r12, [r2] @ store Ro
110 @
111 ldmfd sp!, { r4, pc } @
112 .size channels_process_sound_chan_karaoke, \
113 .-channels_process_sound_chan_karaoke
87 114
88#if ARM_ARCH < 6 115#if ARM_ARCH < 6
89/**************************************************************************** 116/****************************************************************************
@@ -99,42 +126,57 @@ channels_process_sound_chan_karaoke:
99 .type sample_output_mono, %function 126 .type sample_output_mono, %function
100sample_output_mono: 127sample_output_mono:
101 @ input: r0 = count, r1 = data, r2 = src, r3 = dst 128 @ input: r0 = count, r1 = data, r2 = src, r3 = dst
102 stmfd sp!, {r4-r7, lr} 129 stmfd sp!, { r4-r6, lr }
103 130
104 ldr r4, [r2] @ r4 = src[0] 131 ldr r1, [r1] @ lr = data->output_scale
105 ldr r5, [r1] @ lr = data->output_scale 132 ldr r2, [r2] @ r2 = src[0]
106 sub r1, r5, #1 @ r1 = r5-1 133
107 mov r2, #1 134 mov r4, #1
108 mov r2, r2, asl r1 @ r2 = 1<<r1 = 1 << (scale-1) 135 mov r4, r4, lsl r1 @ r4 = 1 << (scale-1)
109 mvn r1, #0x8000 @ r1 needed for clipping 136 mov r4, r4, lsr #1
110 mov r12, #0xff00 137 mvn r14, #0x8000 @ r14 = 0xffff7fff, needed for
111 orr r12, r12, #0xff @ r12 needed for masking 138 @ clipping and masking
139 subs r0, r0, #1 @
140 beq .som_singlesample @ Zero? Only one sample!
112 141
113.somloop: 142.somloop:
114 ldmia r4!, {r6-r7} 143 ldmia r2!, { r5, r6 }
115 add r6, r6, r2 144 add r5, r5, r4 @ r6 = (r6 + 1<<(scale-1)) >> scale
116 mov r6, r6, asr r5 @ r6 = (r6 + 1<<(scale-1)) >> scale 145 mov r5, r5, asr r1
117 mov lr, r6, asr #15 146 mov r12, r5, asr #15
118 teq lr, lr, asr #31 147 teq r12, r12, asr #31
119 eorne r6, r1, lr, asr #31 @ Clip (-32768...+32767) 148 eorne r5, r14, r5, asr #31 @ Clip (-32768...+32767)
120 add r7, r7, r2 149 add r6, r6, r4
121 mov r7, r7, asr r5 @ r7 = (r7 + 1<<(scale-1)) >> scale 150 mov r6, r6, asr r1 @ r7 = (r7 + 1<<(scale-1)) >> scale
122 mov lr, r7, asr #15 151 mov r12, r6, asr #15
123 teq lr, lr, asr #31 152 teq r12, r12, asr #31
124 eorne r7, r1, lr, asr #31 @ Clip (-32768...+32767) 153 eorne r6, r14, r6, asr #31 @ Clip (-32768...+32767)
125 154
126 and r6, r6, r12 155 and r5, r5, r14, lsr #16
127 orr r6, r6, r6, asl #16 @ pack first 2 halfwords into 1 word 156 and r6, r6, r14, lsr #16
128 and r7, r7, r12 157 orr r5, r5, r5, lsl #16 @ pack first 2 halfwords into 1 word
129 orr r7, r7, r7, asl #16 @ pack last 2 halfwords into 1 word 158 orr r6, r6, r6, lsl #16 @ pack last 2 halfwords into 1 word
130 stmia r3!, {r6-r7} 159 stmia r3!, { r5, r6 }
131 160
132 subs r0, r0, #2 161 subs r0, r0, #2
133 bgt .somloop 162 bgt .somloop
134 163
135 ldmfd sp!, {r4-r7, pc} 164 ldmltfd sp!, { r4-r6, pc } @ even 'count'? return
136.somend: 165
137 .size sample_output_mono,.somend-sample_output_mono 166.som_singlesample:
167 ldr r5, [r2] @ do odd sample
168 add r5, r5, r4
169 mov r5, r5, asr r1
170 mov r12, r5, asr #15
171 teq r12, r12, asr #31
172 eorne r5, r14, r5, asr #31
173
174 and r5, r5, r14, lsr #16 @ pack 2 halfwords into 1 word
175 orr r5, r5, r5, lsl #16
176 str r5, [r3]
177
178 ldmfd sp!, { r4-r6, pc }
179 .size sample_output_mono, .-sample_output_mono
138 180
139/**************************************************************************** 181/****************************************************************************
140 * void sample_output_stereo(int count, struct dsp_data *data, 182 * void sample_output_stereo(int count, struct dsp_data *data,
@@ -149,54 +191,80 @@ sample_output_mono:
149 .type sample_output_stereo, %function 191 .type sample_output_stereo, %function
150sample_output_stereo: 192sample_output_stereo:
151 @ input: r0 = count, r1 = data, r2 = src, r3 = dst 193 @ input: r0 = count, r1 = data, r2 = src, r3 = dst
152 stmfd sp!, {r4-r10, lr} 194 stmfd sp!, { r4-r9, lr }
153 195
154 ldmia r2, {r4-r5} @ r4 = src[0], r5 = src[1] 196 ldr r1, [r1] @ r1 = data->output_scale
155 ldr r6, [r1] @ r6 = data->output_scale 197 ldmia r2, { r2, r5 } @ r2 = src[0], r5 = src[1]
156 sub r1, r6, #1 @ r1 = r6-1 198
157 mov r2, #1 199 mov r4, #1
158 mov r2, r2, asl r1 @ r2 = 1<<r1 = 1 << (scale-1) 200 mov r4, r4, lsl r1 @ r4 = 1 << (scale-1)
159 mvn r1, #0x8000 @ r1 needed for clipping 201 mov r4, r4, lsr #1 @
160 mov r12, #0xff00 202
161 orr r12, r12, #0xff @ r12 needed for masking 203 mvn r14, #0x8000 @ r14 = 0xffff7fff, needed for
204 @ clipping and masking
205 subs r0, r0, #1 @
206 beq .sos_singlesample @ Zero? Only one sample!
162 207
163.sosloop: 208.sosloop:
164 ldmia r4!, {r7-r8} 209 ldmia r2!, { r6, r7 } @ 2 left
165 add r7, r7, r2 210 ldmia r5!, { r8, r9 } @ 2 right
166 mov r7, r7, asr r6 @ r7 = (r7 + 1<<(scale-1)) >> scale 211
167 mov lr, r7, asr #15 212 add r6, r6, r4 @ r6 = (r6 + 1<<(scale-1)) >> scale
168 teq lr, lr, asr #31 213 mov r6, r6, asr r1
169 eorne r7, r1, lr, asr #31 @ Clip (-32768...+32767) 214 mov r12, r6, asr #15
170 add r8, r8, r2 215 teq r12, r12, asr #31
171 mov r8, r8, asr r6 @ r8 = (r8 + 1<<(scale-1)) >> scale 216 eorne r6, r14, r6, asr #31 @ Clip (-32768...+32767)
172 mov lr, r8, asr #15 217 add r7, r7, r4
173 teq lr, lr, asr #31 218 mov r7, r7, asr r1 @ r7 = (r7 + 1<<(scale-1)) >> scale
174 eorne r8, r1, lr, asr #31 @ Clip (-32768...+32767) 219 mov r12, r7, asr #15
220 teq r12, r12, asr #31
221 eorne r7, r14, r7, asr #31 @ Clip (-32768...+32767)
175 222
176 ldmia r5!, {r9-r10} 223 add r8, r8, r4 @ r8 = (r8 + 1<<(scale-1)) >> scale
177 add r9, r9, r2 224 mov r8, r8, asr r1
178 mov r9, r9, asr r6 @ r9 = (r9 + 1<<(scale-1)) >> scale 225 mov r12, r8, asr #15
179 mov lr, r9, asr #15 226 teq r12, r12, asr #31
180 teq lr, lr, asr #31 227 eorne r8, r14, r8, asr #31 @ Clip (-32768...+32767)
181 eorne r9, r1, lr, asr #31 @ Clip (-32768...+32767) 228 add r9, r9, r4 @ r9 = (r9 + 1<<(scale-1)) >> scale
182 add r10, r10, r2 229 mov r9, r9, asr r1
183 mov r10, r10, asr r6 @ r10 = (r10 + 1<<(scale-1)) >> scale 230 mov r12, r9, asr #15
184 mov lr, r10, asr #15 231 teq r12, r12, asr #31
185 teq lr, lr, asr #31 232 eorne r9, r14, r9, asr #31 @ Clip (-32768...+32767)
186 eorne r10, r1, lr, asr #31 @ Clip (-32768...+32767)
187 233
188 and r7, r7, r12 234 and r6, r6, r14, lsr #16 @ pack first 2 halfwords into 1 word
189 orr r9, r7, r9, asl #16 @ pack first 2 halfwords into 1 word 235 orr r8, r6, r8, asl #16
190 and r8, r8, r12 236 and r7, r7, r14, lsr #16 @ pack last 2 halfwords into 1 word
191 orr r10, r8, r10, asl #16 @ pack last 2 halfwords into 1 word 237 orr r9, r7, r9, asl #16
192 stmia r3!, {r9-r10} 238
239 stmia r3!, { r8, r9 }
193 240
194 subs r0, r0, #2 241 subs r0, r0, #2
195 bgt .sosloop 242 bgt .sosloop
196 243
197 ldmfd sp!, {r4-r10, pc} 244 ldmltfd sp!, { r4-r9, pc } @ even 'count'? return
198.sosend: 245
199 .size sample_output_stereo,.sosend-sample_output_stereo 246.sos_singlesample:
247 ldr r6, [r2] @ left odd sample
248 ldr r8, [r5] @ right odd sample
249
250 add r6, r6, r4 @ r6 = (r7 + 1<<(scale-1)) >> scale
251 mov r6, r6, asr r1
252 mov r12, r6, asr #15
253 teq r12, r12, asr #31
254 eorne r6, r14, r6, asr #31 @ Clip (-32768...+32767)
255 add r8, r8, r4 @ r8 = (r8 + 1<<(scale-1)) >> scale
256 mov r8, r8, asr r1
257 mov r12, r8, asr #15
258 teq r12, r12, asr #31
259 eorne r8, r14, r8, asr #31 @ Clip (-32768...+32767)
260
261 and r6, r6, r14, lsr #16 @ pack 2 halfwords into 1 word
262 orr r8, r6, r8, asl #16
263
264 str r8, [r3]
265
266 ldmfd sp!, { r4-r9, pc }
267 .size sample_output_stereo, .-sample_output_stereo
200#endif /* ARM_ARCH < 6 */ 268#endif /* ARM_ARCH < 6 */
201 269
202/**************************************************************************** 270/****************************************************************************
@@ -259,8 +327,7 @@ apply_crossfeed:
259 str r0, [r12, #30*4] @ save delay line index 327 str r0, [r12, #30*4] @ save delay line index
260 add sp, sp, #8 @ remove temp variables from stack 328 add sp, sp, #8 @ remove temp variables from stack
261 ldmia sp!, { r4-r11, pc } 329 ldmia sp!, { r4-r11, pc }
262.cfend: 330 .size apply_crossfeed, .-apply_crossfeed
263 .size apply_crossfeed,.cfend-apply_crossfeed
264 331
265/**************************************************************************** 332/****************************************************************************
266 * int dsp_downsample(int count, struct dsp_data *data, 333 * int dsp_downsample(int count, struct dsp_data *data,
@@ -317,8 +384,7 @@ dsp_downsample:
317 sub r8, r8, r1 @ dst - &dst[0] 384 sub r8, r8, r1 @ dst - &dst[0]
318 mov r0, r8, lsr #2 @ convert bytes->samples 385 mov r0, r8, lsr #2 @ convert bytes->samples
319 ldmia sp!, { r4-r11, pc } @ ... and we're out 386 ldmia sp!, { r4-r11, pc } @ ... and we're out
320.dsend: 387 .size dsp_downsample, .-dsp_downsample
321 .size dsp_downsample,.dsend-dsp_downsample
322 388
323/**************************************************************************** 389/****************************************************************************
324 * int dsp_upsample(int count, struct dsp_data *dsp, 390 * int dsp_upsample(int count, struct dsp_data *dsp,
@@ -327,23 +393,22 @@ dsp_downsample:
327 .section .text 393 .section .text
328 .global dsp_upsample 394 .global dsp_upsample
329dsp_upsample: 395dsp_upsample:
330 stmdb sp!, { r4-r11, lr } @ stack modified regs 396 stmfd sp!, { r4-r11, lr } @ stack modified regs
331 ldmib r1, { r5-r6 } @ r5 = num_channels,r6 = resample_data.delta 397 ldmib r1, { r5-r6 } @ r5 = num_channels,r6 = resample_data.delta
332 sub r5, r5, #1 @ pre-decrement num_channels for use 398 sub r5, r5, #1 @ pre-decrement num_channels for use
333 add r4, r1, #12 @ r4 = &resample_data.phase 399 add r4, r1, #12 @ r4 = &resample_data.phase
334 stmdb sp!, { r0, r4 } @ stack count and &resample_data.phase 400 mov r6, r6, lsl #16 @ we'll use carry to detect pos increments
401 stmfd sp!, { r0, r4 } @ stack count and &resample_data.phase
335.uschannel_loop: 402.uschannel_loop:
336 ldr r12, [r4] @ r12 = resample_data.phase 403 ldr r12, [r4] @ r12 = resample_data.phase
337 mov r1, r12, ror #16 @ swap halfword positions, we'll use carry
338 @ to detect pos increments
339 ldr r7, [r2, r5, lsl #2] @ r7 = s = src[ch - 1] 404 ldr r7, [r2, r5, lsl #2] @ r7 = s = src[ch - 1]
340 ldr r8, [r3, r5, lsl #2] @ r8 = d = dst[ch - 1] 405 ldr r8, [r3, r5, lsl #2] @ r8 = d = dst[ch - 1]
341 add r9, r4, #4 @ r9 = &last_sample[0] 406 add r9, r4, #4 @ r9 = &last_sample[0]
342 ldr r10, [r9, r5, lsl #2] @ r10 = last_sample[ch - 1] 407 mov r1, r12, lsl #16 @ we'll use carry to detect pos increments
343 sub r11, r0, #1 408 sub r11, r0, #1
344 ldr r14, [r7, r11, lsl #2] @ load last sample in s[] ... 409 ldr r14, [r7, r11, lsl #2] @ load last sample in s[] ...
410 ldr r10, [r9, r5, lsl #2] @ r10 = last_sample[ch - 1]
345 str r14, [r9, r5, lsl #2] @ and write as next frame's last_sample 411 str r14, [r9, r5, lsl #2] @ and write as next frame's last_sample
346 add r9, r7, r0, lsl #2 @ r9 = src_end = &src[count]
347 movs r14, r12, lsr #16 @ pos = resample_data.phase >> 16 412 movs r14, r12, lsr #16 @ pos = resample_data.phase >> 16
348 beq .usstart_0 @ pos = 0 413 beq .usstart_0 @ pos = 0
349 cmp r14, r0 @ if pos >= count, we're already done 414 cmp r14, r0 @ if pos >= count, we're already done
@@ -354,41 +419,38 @@ dsp_upsample:
354 419
355 @ Register usage in loop: 420 @ Register usage in loop:
356 @ r0 = count, r1 = phase, r4 = &resample_data.phase, r5 = cur_channel, 421 @ r0 = count, r1 = phase, r4 = &resample_data.phase, r5 = cur_channel,
357 @ r6 = delta, r7 = s, r8 = d, r9 = src_end, r10 = s[pos - 1], r11 = s[pos] 422 @ r6 = delta, r7 = s, r8 = d, r9 = diff, r10 = s[pos - 1], r11 = s[pos]
358.usloop_1: 423.usloop_1:
359 mov r10, r11 @ r10 = previous sample 424 mov r10, r11 @ r10 = previous sample
360.usstart_0: 425.usstart_0:
361 ldr r11, [r7], #4 @ r11 = next sample 426 ldr r11, [r7], #4 @ r11 = next sample
362 sub r0, r11, r10 @ r0 = s[pos] - s[pos - 1] 427 mov r4, r1, lsr #16 @ r4 = frac = phase >> 16
428 sub r9, r11, r10 @ r9 = diff = s[pos] - s[pos - 1]
363.usloop_0: 429.usloop_0:
430 smull r12, r14, r4, r9
431 adds r1, r1, r6 @ phase += delta << 16
364 mov r4, r1, lsr #16 @ r4 = frac = phase >> 16 432 mov r4, r1, lsr #16 @ r4 = frac = phase >> 16
365 smull r12, r14, r4, r0
366 add r14, r10, r14, lsl #16 433 add r14, r10, r14, lsl #16
367 add r14, r14, r12, lsr #16 @ r14 = out = s[pos - 1] + frac*diff 434 add r14, r14, r12, lsr #16 @ r14 = out = s[pos - 1] + frac*diff
368 str r14, [r8], #4 @ *d++ = out 435 str r14, [r8], #4 @ *d++ = out
369 adds r1, r1, r6, lsl #16 @ phase += delta << 16
370 bcc .usloop_0 @ if carry is set, pos is incremented 436 bcc .usloop_0 @ if carry is set, pos is incremented
371 cmp r7, r9 @ if s < src_end, do another sample 437 subs r0, r0, #1 @ if count > 0, do another sample
372 blo .usloop_1 438 bgt .usloop_1
373.usloop_skip: 439.usloop_skip:
374 subs r5, r5, #1 440 subs r5, r5, #1
375 ldmia sp, { r0, r4 } @ reload count and &resample_data.phase 441 ldmfd sp, { r0, r4 } @ reload count and &resample_data.phase
376 bpl .uschannel_loop @ if (--ch) >= 0, do another channel 442 bpl .uschannel_loop @ if (--ch) >= 0, do another channel
377 mov r1, r1, ror #16 @ wrap phase back to start of next frame 443 mov r1, r1, lsr #16 @ wrap phase back to start of next frame
378 str r1, [r4] @ store back 444 ldr r2, [r3] @ r1 = &dst[0]
379 ldr r1, [r3] @ r1 = &dst[0] 445 str r1, [r4] @ store phase
380 sub r8, r8, r1 @ dst - &dst[0] 446 sub r8, r8, r2 @ dst - &dst[0]
381 mov r0, r8, lsr #2 @ convert bytes->samples 447 mov r0, r8, lsr #2 @ convert bytes->samples
382 add sp, sp, #8 @ adjust stack for temp variables 448 add sp, sp, #8 @ adjust stack for temp variables
383 ldmia sp!, { r4-r11, pc } @ ... and we're out 449 ldmfd sp!, { r4-r11, pc } @ ... and we're out
384.usend: 450 .size dsp_upsample, .-dsp_upsample
385 .size dsp_upsample,.usend-dsp_upsample
386 451
387/**************************************************************************** 452/****************************************************************************
388 * void dsp_apply_gain(int count, struct dsp_data *data, int32_t *buf[]) 453 * void dsp_apply_gain(int count, struct dsp_data *data, int32_t *buf[])
389 * NOTE: The following code processes two samples at once. When count is odd,
390 * there is an additional obsolete sample processed, which will not be
391 * used by the calling functions.
392 */ 454 */
393 .section .icode, "ax", %progbits 455 .section .icode, "ax", %progbits
394 .align 2 456 .align 2
@@ -396,30 +458,40 @@ dsp_upsample:
396 .type dsp_apply_gain, %function 458 .type dsp_apply_gain, %function
397dsp_apply_gain: 459dsp_apply_gain:
398 @ input: r0 = count, r1 = data, r2 = buf[] 460 @ input: r0 = count, r1 = data, r2 = buf[]
399 stmfd sp!, {r4-r7, lr} 461 stmfd sp!, { r4-r8, lr }
400 462
401 ldr r3, [r1, #4] @ r3 = data->num_channels 463 ldr r3, [r1, #4] @ r3 = data->num_channels
402 ldr r4, [r1, #32] @ r5 = data->gain 464 ldr r4, [r1, #32] @ r5 = data->gain
403 465
404.dag_outerloop: 466.dag_outerloop:
405 ldr r1, [r2], #4 @ r1 = buf[0] and increment index of buf[] 467 ldr r1, [r2], #4 @ r1 = buf[0] and increment index of buf[]
406 mov r12, r0 @ r12 = r0 = count 468 subs r12, r0, #1 @ r12 = r0 = count - 1
469 beq .dag_singlesample @ Zero? Only one sample!
407 470
408.dag_innerloop: 471.dag_innerloop:
409 ldmia r1, {r5, r6} @ load r5, r6 from r1 472 ldmia r1, { r5, r6 } @ load r5, r6 from r1
410 smull r7, lr, r5, r4 @ r5 = FRACMUL_SHL(r5, r4, 8) 473 smull r7, r8, r5, r4 @ r7 = FRACMUL_SHL(r5, r4, 8)
411 mov lr, lr, asl #9 474 smull r14, r5, r6, r4 @ r14 = FRACMUL_SHL(r6, r4, 8)
412 orr r5, lr, r7, lsr #23
413 smull r7, lr, r6, r4 @ r6 = FRACMUL_SHL(r6, r4, 8)
414 mov lr, lr, asl #9
415 orr r6, lr, r7, lsr #23
416 stmia r1!, {r5, r6} @ save r5, r6 to r1 and increment r1
417 subs r12, r12, #2 475 subs r12, r12, #2
476 mov r7, r7, lsr #23
477 mov r14, r14, lsr #23
478 orr r7, r7, r8, asl #9
479 orr r14, r14, r5, asl #9
480 stmia r1!, { r7, r14 } @ save r7, r14 to [r1] and increment r1
418 bgt .dag_innerloop @ end of inner loop 481 bgt .dag_innerloop @ end of inner loop
419 482
483 blt .dag_evencount @ < 0? even count
484
485.dag_singlesample:
486 ldr r5, [r1] @ handle odd sample
487 smull r7, r8, r5, r4 @ r7 = FRACMUL_SHL(r5, r4, 8)
488 mov r7, r7, lsr #23
489 orr r7, r7, r8, asl #9
490 str r7, [r1]
491
492.dag_evencount:
420 subs r3, r3, #1 493 subs r3, r3, #1
421 bgt .dag_outerloop @ end of outer loop 494 bgt .dag_outerloop @ end of outer loop
422 495
423 ldmfd sp!, {r4-r7, pc} 496 ldmfd sp!, { r4-r8, pc }
424.dagend: 497 .size dsp_apply_gain, .-dsp_apply_gain
425 .size dsp_apply_gain,.dagend-dsp_apply_gain