diff options
-rw-r--r-- | apps/dsp_arm.S | 130 | ||||
-rw-r--r-- | apps/dsp_asm.h | 5 |
2 files changed, 129 insertions, 6 deletions
diff --git a/apps/dsp_arm.S b/apps/dsp_arm.S index 27669203f1..c3e5c7cd05 100644 --- a/apps/dsp_arm.S +++ b/apps/dsp_arm.S | |||
@@ -17,14 +17,14 @@ | |||
17 | * | 17 | * |
18 | ****************************************************************************/ | 18 | ****************************************************************************/ |
19 | 19 | ||
20 | /* | 20 | /**************************************************************************** |
21 | * void apply_crossfeed(int count, int32_t* src[]) | 21 | * void apply_crossfeed(int count, int32_t* src[]) |
22 | */ | 22 | */ |
23 | .section .text | 23 | .section .text |
24 | .global apply_crossfeed | 24 | .global apply_crossfeed |
25 | apply_crossfeed: | 25 | apply_crossfeed: |
26 | @ unfortunately, we ended up in a bit of a register squeeze here, and need | 26 | @ unfortunately, we ended up in a bit of a register squeeze here, and need |
27 | @ to keep both the count and the delay line index on the stack :/ | 27 | @ to keep the count on the stack :/ |
28 | stmdb sp!, { r4-r11, lr } @ stack modified regs | 28 | stmdb sp!, { r4-r11, lr } @ stack modified regs |
29 | ldmia r1, { r2-r3 } @ r2 = src[0], r3 = src[1] | 29 | ldmia r1, { r2-r3 } @ r2 = src[0], r3 = src[1] |
30 | 30 | ||
@@ -74,7 +74,131 @@ apply_crossfeed: | |||
74 | @ save data back to struct | 74 | @ save data back to struct |
75 | ldr r12, =crossfeed_data + 4*4 | 75 | ldr r12, =crossfeed_data + 4*4 |
76 | stmia r12, { r8-r11 } @ save filter history | 76 | stmia r12, { r8-r11 } @ save filter history |
77 | str r0, [r12, #30*4] @ save delay line index | 77 | str r0, [r12, #30*4] @ save delay line index |
78 | add sp, sp, #8 @ remove temp variables from stack | 78 | add sp, sp, #8 @ remove temp variables from stack |
79 | ldmia sp!, { r4-r11, pc } | 79 | ldmia sp!, { r4-r11, pc } |
80 | .cfend: | ||
81 | .size apply_crossfeed,.cfend-apply_crossfeed | ||
82 | |||
83 | /**************************************************************************** | ||
84 | * int dsp_downsample(int count, struct dsp_data *data, | ||
85 | * in32_t *src[], int32_t *dst[]) | ||
86 | */ | ||
87 | .section .text | ||
88 | .global dsp_downsample | ||
89 | dsp_downsample: | ||
90 | stmdb sp!, { r4-r11, lr } @ stack modified regs | ||
91 | ldmib r1, { r5-r6 } @ r5 = num_channels,r6 = resample_data.delta | ||
92 | sub r5, r5, #1 @ pre-decrement num_channels for use | ||
93 | add r4, r1, #12 @ r4 = &resample_data.phase | ||
94 | mov r12, #0xff | ||
95 | orr r12, r12, #0xff00 @ r12 = 0xffff | ||
96 | .dschannel_loop: | ||
97 | ldr r1, [r4] @ r1 = resample_data.phase | ||
98 | ldr r7, [r2, r5, lsl #2] @ r7 = s = src[ch - 1] | ||
99 | ldr r8, [r3, r5, lsl #2] @ r8 = d = dst[ch - 1] | ||
100 | add r9, r4, #4 @ r9 = &last_sample[0] | ||
101 | ldr r10, [r9, r5, lsl #2] @ r10 = last_sample[ch - 1] | ||
102 | sub r11, r0, #1 | ||
103 | ldr r14, [r7, r11, lsl #2] @ load last sample in s[] ... | ||
104 | str r14, [r9, r5, lsl #2] @ and write as next frame's last_sample | ||
105 | movs r9, r1, lsr #16 @ r9 = pos = phase >> 16 | ||
106 | ldreq r11, [r7] @ if pos = 0, load src[0] and jump into loop | ||
107 | beq .dsuse_last_start | ||
108 | cmp r9, r0 @ if pos >= count, we're already done | ||
109 | bge .dsloop_skip | ||
110 | |||
111 | @ Register usage in loop: | ||
112 | @ r0 = count, r1 = phase, r4 = &resample_data.phase, r5 = cur_channel, | ||
113 | @ r6 = delta, r7 = s, r8 = d, r9 = pos, r10 = s[pos - 1], r11 = s[pos] | ||
114 | .dsloop: | ||
115 | add r9, r7, r9, lsl #2 @ r9 = &s[pos] | ||
116 | ldmda r9, { r10, r11 } @ r10 = s[pos - 1], r11 = s[pos] | ||
117 | .dsuse_last_start: | ||
118 | sub r11, r11, r10 @ r11 = diff = s[pos] - s[pos - 1] | ||
119 | @ keep frac in lower bits to take advantage of multiplier early termination | ||
120 | and r9, r1, r12 @ frac = phase & 0xffff | ||
121 | smull r9, r14, r11, r9 | ||
122 | add r10, r10, r14, lsl #16 | ||
123 | add r10, r10, r9, lsr #16 @ r10 = out = s[pos - 1] + frac*diff | ||
124 | str r10, [r8], #4 @ *d++ = out | ||
125 | add r1, r1, r6 @ phase += delta | ||
126 | mov r9, r1, lsr #16 @ pos = phase >> 16 | ||
127 | cmp r9, r0 @ pos < count? | ||
128 | blt .dsloop @ yup, do more samples | ||
129 | .dsloop_skip: | ||
130 | subs r5, r5, #1 | ||
131 | bpl .dschannel_loop @ if (--ch) >= 0, do another channel | ||
132 | sub r1, r1, r0, lsl #16 @ wrap phase back to start | ||
133 | str r1, [r4] @ store back | ||
134 | ldr r1, [r3] @ r1 = &dst[0] | ||
135 | sub r8, r8, r1 @ dst - &dst[0] | ||
136 | mov r0, r8, lsr #2 @ convert bytes->samples | ||
137 | ldmia sp!, { r4-r11, pc } @ ... and we're out | ||
138 | .dsend: | ||
139 | .size dsp_downsample,.dsend-dsp_downsample | ||
140 | |||
141 | /**************************************************************************** | ||
142 | * int dsp_upsample(int count, struct dsp_data *dsp, | ||
143 | * in32_t *src[], int32_t *dst[]) | ||
144 | */ | ||
145 | .section .text | ||
146 | .global dsp_upsample | ||
147 | dsp_upsample: | ||
148 | stmdb sp!, { r4-r11, lr } @ stack modified regs | ||
149 | ldmib r1, { r5-r6 } @ r5 = num_channels,r6 = resample_data.delta | ||
150 | sub r5, r5, #1 @ pre-decrement num_channels for use | ||
151 | add r4, r1, #12 @ r4 = &resample_data.phase | ||
152 | stmdb sp!, { r0, r4 } @ stack count and &resample_data.phase | ||
153 | .uschannel_loop: | ||
154 | ldr r12, [r4] @ r12 = resample_data.phase | ||
155 | mov r1, r12, ror #16 @ swap halfword positions, we'll use carry | ||
156 | @ to detect pos increments | ||
157 | ldr r7, [r2, r5, lsl #2] @ r7 = s = src[ch - 1] | ||
158 | ldr r8, [r3, r5, lsl #2] @ r8 = d = dst[ch - 1] | ||
159 | add r9, r4, #4 @ r9 = &last_sample[0] | ||
160 | ldr r10, [r9, r5, lsl #2] @ r10 = last_sample[ch - 1] | ||
161 | sub r11, r0, #1 | ||
162 | ldr r14, [r7, r11, lsl #2] @ load last sample in s[] ... | ||
163 | str r14, [r9, r5, lsl #2] @ and write as next frame's last_sample | ||
164 | add r9, r7, r0, lsl #2 @ r9 = src_end = &src[count] | ||
165 | movs r14, r12, lsr #16 @ pos = resample_data.phase >> 16 | ||
166 | beq .usstart_0 @ pos = 0 | ||
167 | cmp r14, r0 @ if pos >= count, we're already done | ||
168 | bge .usloop_skip | ||
169 | add r7, r7, r14, lsl #2 @ r7 = &s[pos] | ||
170 | ldr r10, [r7, #-4] @ r11 = s[pos - 1] | ||
171 | b .usstart_0 | ||
172 | |||
173 | @ Register usage in loop: | ||
174 | @ r0 = count, r1 = phase, r4 = &resample_data.phase, r5 = cur_channel, | ||
175 | @ r6 = delta, r7 = s, r8 = d, r9 = src_end, r10 = s[pos - 1], r11 = s[pos] | ||
176 | .usloop_1: | ||
177 | mov r10, r11 @ r10 = previous sample | ||
178 | .usstart_0: | ||
179 | ldr r11, [r7], #4 @ r11 = next sample | ||
180 | sub r0, r11, r10 @ r0 = s[pos] - s[pos - 1] | ||
181 | .usloop_0: | ||
182 | mov r4, r1, lsr #16 @ r4 = frac = phase >> 16 | ||
183 | smull r12, r14, r4, r0 | ||
184 | add r14, r10, r14, lsl #16 | ||
185 | add r14, r14, r12, lsr #16 @ r14 = out = s[pos - 1] + frac*diff | ||
186 | str r14, [r8], #4 @ *d++ = out | ||
187 | adds r1, r1, r6, lsl #16 @ phase += delta << 16 | ||
188 | bcc .usloop_0 @ if carry is set, pos is incremented | ||
189 | cmp r7, r9 @ if s < src_end, do another sample | ||
190 | blo .usloop_1 | ||
191 | .usloop_skip: | ||
192 | subs r5, r5, #1 | ||
193 | ldmia sp, { r0, r4 } @ reload count and &resample_data.phase | ||
194 | bpl .uschannel_loop @ if (--ch) >= 0, do another channel | ||
195 | mov r1, r1, ror #16 @ wrap phase back to start of next frame | ||
196 | str r1, [r4] @ store back | ||
197 | ldr r1, [r3] @ r1 = &dst[0] | ||
198 | sub r8, r8, r1 @ dst - &dst[0] | ||
199 | mov r0, r8, lsr #2 @ convert bytes->samples | ||
200 | add sp, sp, #8 @ adjust stack for temp variables | ||
201 | ldmia sp!, { r4-r11, pc } @ ... and we're out | ||
202 | .usend: | ||
203 | .size dsp_upsample,.usend-dsp_upsample | ||
80 | 204 | ||
diff --git a/apps/dsp_asm.h b/apps/dsp_asm.h index ee90f5763e..f8df337b37 100644 --- a/apps/dsp_asm.h +++ b/apps/dsp_asm.h | |||
@@ -27,13 +27,12 @@ | |||
27 | #if defined(CPU_COLDFIRE) || defined(CPU_ARM) | 27 | #if defined(CPU_COLDFIRE) || defined(CPU_ARM) |
28 | #define DSP_HAVE_ASM_CROSSFEED | 28 | #define DSP_HAVE_ASM_CROSSFEED |
29 | void apply_crossfeed(int count, int32_t *buf[]); | 29 | void apply_crossfeed(int count, int32_t *buf[]); |
30 | #endif /* defined(CPU_COLDFIRE) || defined(CPU_ARM) */ | ||
31 | |||
32 | #if defined (CPU_COLDFIRE) | ||
33 | #define DSP_HAVE_ASM_RESAMPLING | 30 | #define DSP_HAVE_ASM_RESAMPLING |
34 | int dsp_downsample(int count, struct dsp_data *data, int32_t *src[], int32_t *dst[]); | 31 | int dsp_downsample(int count, struct dsp_data *data, int32_t *src[], int32_t *dst[]); |
35 | int dsp_upsample(int count, struct dsp_data *data, int32_t *src[], int32_t *dst[]); | 32 | int dsp_upsample(int count, struct dsp_data *data, int32_t *src[], int32_t *dst[]); |
33 | #endif /* defined(CPU_COLDFIRE) || defined(CPU_ARM) */ | ||
36 | 34 | ||
35 | #if defined (CPU_COLDFIRE) | ||
37 | #define DSP_HAVE_ASM_SOUND_CHAN_MONO | 36 | #define DSP_HAVE_ASM_SOUND_CHAN_MONO |
38 | void channels_process_sound_chan_mono(int count, int32_t *buf[]); | 37 | void channels_process_sound_chan_mono(int count, int32_t *buf[]); |
39 | #define DSP_HAVE_ASM_SOUND_CHAN_CUSTOM | 38 | #define DSP_HAVE_ASM_SOUND_CHAN_CUSTOM |