diff options
author | Thomas Martitz <kugel@rockbox.org> | 2012-01-13 16:08:52 +0100 |
---|---|---|
committer | Thomas Martitz <kugel@rockbox.org> | 2012-01-21 18:39:19 +0100 |
commit | 4343011955bffaf61b002a89b0137d48e873d9d0 (patch) | |
tree | 9f25ee7f25daa1733f385f1cf78976e2b79833e8 /apps | |
parent | cdb28789ec3577659aabfc097a9918d304cdb769 (diff) | |
download | rockbox-4343011955bffaf61b002a89b0137d48e873d9d0.tar.gz rockbox-4343011955bffaf61b002a89b0137d48e873d9d0.zip |
libmad: Get rid of some bad trickery with the stack pointer.
Using the stack pointer for anything else than pointing to the
current stack can have in very bad effects, especially on hosted
platforms (e.g. when mixed with signals). Remove this at
very slight performance cost.
Diffstat (limited to 'apps')
-rw-r--r-- | apps/codecs/libmad/synth_full_arm.S | 288 |
1 files changed, 141 insertions, 147 deletions
diff --git a/apps/codecs/libmad/synth_full_arm.S b/apps/codecs/libmad/synth_full_arm.S index 27383ed3d0..0a4f9b93c2 100644 --- a/apps/codecs/libmad/synth_full_arm.S +++ b/apps/codecs/libmad/synth_full_arm.S | |||
@@ -27,21 +27,21 @@ | |||
27 | .global synth_full_odd_sbsample | 27 | .global synth_full_odd_sbsample |
28 | .global synth_full_even_sbsample | 28 | .global synth_full_even_sbsample |
29 | 29 | ||
30 | ;; r0 = pcm | 30 | /* |
31 | ;; r0 = pcm (pushed on the stack to free a register) | ||
31 | ;; r1 = fo | 32 | ;; r1 = fo |
32 | ;; r2 = fe | 33 | ;; r2 = fe |
33 | ;; r3 = D0ptr | 34 | ;; r3 = D0ptr |
34 | ;; r4 = D1ptr | 35 | ;; r4 = D1ptr |
35 | 36 | ||
36 | /*;; r5 = loop counter | 37 | ;; r5 = loop counter |
37 | ;; r6,r7 accumulator1 | 38 | ;; r6,r7 accumulator1 |
38 | ;; r8,r9 accumulator2 */ | 39 | ;; r8,r9 accumulator2 |
40 | */ | ||
39 | 41 | ||
40 | synth_full_odd_sbsample: | 42 | synth_full_odd_sbsample: |
41 | stmdb sp!, {r4-r11, lr} | 43 | stmdb sp!, {r0, r4-r11, lr} |
42 | ldr r4, [sp, #36] | 44 | ldr r4, [sp, #40] |
43 | ldr r5, =synth_full_sp | ||
44 | str sp, [r5] | ||
45 | mov r5, #15 | 45 | mov r5, #15 |
46 | add r2, r2, #32 | 46 | add r2, r2, #32 |
47 | .l: | 47 | .l: |
@@ -49,85 +49,87 @@ synth_full_odd_sbsample: | |||
49 | add r3, r3, #128 | 49 | add r3, r3, #128 |
50 | add r4, r4, #128 | 50 | add r4, r4, #128 |
51 | ldr r7, [r3, #4] | 51 | ldr r7, [r3, #4] |
52 | ldmia r1!, {r10, r11, r12, lr} | 52 | ldmia r1!, {r0, r10, r11, lr} |
53 | ldr r9, [r4, #120] | 53 | ldr r9, [r4, #120] |
54 | smull r6, r7, r10, r7 | 54 | smull r6, r7, r0, r7 |
55 | ldr sp, [r3, #60] | 55 | ldr r12, [r3, #60] |
56 | smull r8, r9, r10, r9 | 56 | smull r8, r9, r0, r9 |
57 | ldr r10, [r3, #52] | 57 | ldr r0, [r3, #52] |
58 | smlal r6, r7, r11, sp | 58 | smlal r6, r7, r10, r12 |
59 | ldr sp, [r3, #44] | 59 | ldr r12, [r3, #44] |
60 | smlal r6, r7, r12, r10 | 60 | smlal r6, r7, r11, r0 |
61 | ldr r10, [r4, #64] | 61 | ldr r0, [r4, #64] |
62 | smlal r6, r7, lr, sp | 62 | smlal r6, r7, lr, r12 |
63 | ldr sp, [r4, #72] | 63 | ldr r12, [r4, #72] |
64 | smlal r8, r9, r11, r10 | 64 | smlal r8, r9, r10, r0 |
65 | ldr r10, [r4, #80] | 65 | ldr r0, [r4, #80] |
66 | smlal r8, r9, r12, sp | 66 | smlal r8, r9, r11, r12 |
67 | smlal r8, r9, lr, r10 | 67 | smlal r8, r9, lr, r0 |
68 | ldr r10, [r3, #36] | 68 | ldr r0, [r3, #36] |
69 | 69 | ||
70 | ldmia r1!, {r11, r12, sp, lr} | 70 | ldmia r1!, {r10, r11, r12, lr} |
71 | smlal r6, r7, r11, r10 | 71 | smlal r6, r7, r10, r0 |
72 | 72 | ||
73 | ldr r10, [r4, #88] /*;;1 cycle stall on arm9, but we free up r11*/ | 73 | ldr r0, [r4, #88] /*;;1 cycle stall on arm9, but we free up r10*/ |
74 | smlal r8, r9, r11, r10 | 74 | smlal r8, r9, r10, r0 |
75 | 75 | ||
76 | ldr r10, [r3, #28] | 76 | ldr r0, [r3, #28] |
77 | ldr r11, [r3, #20] | 77 | ldr r10, [r3, #20] |
78 | smlal r6, r7, r11, r0 | ||
79 | ldr r0, [r3, #12] | ||
78 | smlal r6, r7, r12, r10 | 80 | smlal r6, r7, r12, r10 |
79 | ldr r10, [r3, #12] | 81 | ldr r10, [r4, #96] |
80 | smlal r6, r7, sp, r11 | 82 | smlal r6, r7, lr, r0 |
81 | ldr r11, [r4, #96] | 83 | ldr r0, [r4, #104] |
82 | smlal r6, r7, lr, r10 | 84 | smlal r8, r9, r11, r10 |
83 | ldr r10, [r4, #104] | 85 | ldr r10, [r4, #112] |
84 | smlal r8, r9, r12, r11 | 86 | smlal r8, r9, r12, r0 |
85 | ldr r11, [r4, #112] | 87 | smlal r8, r9, lr, r10 |
86 | smlal r8, r9, sp, r10 | ||
87 | smlal r8, r9, lr, r11 | ||
88 | 88 | ||
89 | rsbs r6, r6, #0 | 89 | rsbs r6, r6, #0 |
90 | rsc r7, r7, #0 | 90 | rsc r7, r7, #0 |
91 | 91 | ||
92 | /* ;; PROD_A and even half of SB_SAMPLE*/ | 92 | /* ;; PROD_A and even half of SB_SAMPLE*/ |
93 | ldr r10, [r3, #0] | 93 | ldr r0, [r3, #0] |
94 | ldmia r2!, {r11, r12, sp, lr} | 94 | ldmia r2!, {r10, r11, r12, lr} |
95 | smlal r6, r7, r11, r10 | 95 | smlal r6, r7, r10, r0 |
96 | 96 | ||
97 | ldr r10, [r4, #60] /*;;1 cycle stall on arm9, but we free up r11*/ | 97 | ldr r0, [r4, #60] /*;;1 cycle stall on arm9, but we free up r10*/ |
98 | smlal r8, r9, r11, r10 | 98 | smlal r8, r9, r10, r0 |
99 | ldr r11, [r3, #56] | 99 | ldr r10, [r3, #56] |
100 | ldr r10, [r3, #48] | 100 | ldr r0, [r3, #48] |
101 | smlal r6, r7, r12, r11 | 101 | smlal r6, r7, r11, r10 |
102 | ldr r11, [r3, #40] | 102 | ldr r10, [r3, #40] |
103 | smlal r6, r7, sp, r10 | 103 | smlal r6, r7, r12, r0 |
104 | ldr r10, [r4, #68] | 104 | ldr r0, [r4, #68] |
105 | smlal r6, r7, lr, r11 | 105 | smlal r6, r7, lr, r10 |
106 | ldr r11, [r4, #76] | 106 | ldr r10, [r4, #76] |
107 | smlal r8, r9, r11, r0 | ||
108 | ldr r0, [r4, #84] | ||
107 | smlal r8, r9, r12, r10 | 109 | smlal r8, r9, r12, r10 |
108 | ldr r10, [r4, #84] | 110 | smlal r8, r9, lr, r0 |
109 | smlal r8, r9, sp, r11 | ||
110 | smlal r8, r9, lr, r10 | ||
111 | 111 | ||
112 | ldr r10, [r3, #32] | 112 | ldr r0, [r3, #32] |
113 | ldmia r2!, {r11, r12, sp, lr} | 113 | ldmia r2!, {r10, r11, r12, lr} |
114 | smlal r6, r7, r11, r10 | 114 | smlal r6, r7, r10, r0 |
115 | 115 | ||
116 | ldr r10, [r4, #92] /*;;1 cycle stall on arm9, but we free up r11*/ | 116 | ldr r0, [r4, #92] /*;;1 cycle stall on arm9, but we free up r10*/ |
117 | smlal r8, r9, r11, r10 | 117 | smlal r8, r9, r10, r0 |
118 | 118 | ||
119 | ldr r10, [r3, #24] | 119 | ldr r0, [r3, #24] |
120 | ldr r11, [r3, #16] | 120 | ldr r10, [r3, #16] |
121 | smlal r6, r7, r11, r0 | ||
122 | ldr r0, [r3, #8] | ||
121 | smlal r6, r7, r12, r10 | 123 | smlal r6, r7, r12, r10 |
122 | ldr r10, [r3, #8] | 124 | ldr r10, [r4, #100] |
123 | smlal r6, r7, sp, r11 | 125 | smlal r6, r7, lr, r0 |
124 | ldr r11, [r4, #100] | 126 | ldr r0, [r4, #108] |
125 | smlal r6, r7, lr, r10 | 127 | smlal r8, r9, r11, r10 |
126 | ldr r10, [r4, #108] | 128 | ldr r10, [r4, #116] |
127 | smlal r8, r9, r12, r11 | 129 | smlal r8, r9, r12, r0 |
128 | ldr r11, [r4, #116] | 130 | smlal r8, r9, lr, r10 |
129 | smlal r8, r9, sp, r10 | 131 | |
130 | smlal r8, r9, lr, r11 | 132 | ldr r0, [sp] |
131 | 133 | ||
132 | movs r6, r6, lsr #16 | 134 | movs r6, r6, lsr #16 |
133 | adc r6, r6, r7, lsl #16 | 135 | adc r6, r6, r7, lsl #16 |
@@ -140,15 +142,11 @@ synth_full_odd_sbsample: | |||
140 | subs r5, r5, #1 | 142 | subs r5, r5, #1 |
141 | bne .l | 143 | bne .l |
142 | 144 | ||
143 | ldr r5, =synth_full_sp | 145 | ldmpc regs="r0,r4-r11" |
144 | ldr sp, [r5] | ||
145 | ldmpc regs=r4-r11 | ||
146 | 146 | ||
147 | synth_full_even_sbsample: | 147 | synth_full_even_sbsample: |
148 | stmdb sp!, {r4-r11, lr} | 148 | stmdb sp!, {r0, r4-r11, lr} |
149 | ldr r4, [sp, #36] | 149 | ldr r4, [sp, #40] |
150 | ldr r5, =synth_full_sp | ||
151 | str sp, [r5] | ||
152 | mov r5, #15 | 150 | mov r5, #15 |
153 | add r2, r2, #32 | 151 | add r2, r2, #32 |
154 | .l2: | 152 | .l2: |
@@ -156,84 +154,86 @@ synth_full_even_sbsample: | |||
156 | add r3, r3, #128 | 154 | add r3, r3, #128 |
157 | add r4, r4, #128 | 155 | add r4, r4, #128 |
158 | ldr r7, [r3, #0] | 156 | ldr r7, [r3, #0] |
159 | ldmia r1!, {r10, r11, r12, lr} | 157 | ldmia r1!, {r0, r10, r11, lr} |
160 | ldr r9, [r4, #60] | 158 | ldr r9, [r4, #60] |
161 | smull r6, r7, r10, r7 | 159 | smull r6, r7, r0, r7 |
162 | ldr sp, [r3, #56] | 160 | ldr r12, [r3, #56] |
163 | smull r8, r9, r10, r9 | 161 | smull r8, r9, r0, r9 |
164 | ldr r10, [r3, #48] | 162 | ldr r0, [r3, #48] |
165 | smlal r6, r7, r11, sp | 163 | smlal r6, r7, r10, r12 |
166 | ldr sp, [r3, #40] | 164 | ldr r12, [r3, #40] |
167 | smlal r6, r7, r12, r10 | 165 | smlal r6, r7, r11, r0 |
168 | ldr r10, [r4, #68] | 166 | ldr r0, [r4, #68] |
169 | smlal r6, r7, lr, sp | 167 | smlal r6, r7, lr, r12 |
170 | 168 | ||
171 | ldr sp, [r4, #76] | 169 | ldr r12, [r4, #76] |
172 | smlal r8, r9, r11, r10 | 170 | smlal r8, r9, r10, r0 |
173 | ldr r10, [r4, #84] | 171 | ldr r0, [r4, #84] |
174 | smlal r8, r9, r12, sp | 172 | smlal r8, r9, r11, r12 |
175 | smlal r8, r9, lr, r10 | 173 | smlal r8, r9, lr, r0 |
176 | 174 | ||
177 | ldr r10, [r3, #32] | 175 | ldr r0, [r3, #32] |
178 | ldmia r1!, {r11, r12, sp, lr} | 176 | ldmia r1!, {r10, r11, r12, lr} |
179 | 177 | ||
180 | smlal r6, r7, r11, r10 | 178 | smlal r6, r7, r10, r0 |
181 | ldr r10, [r4, #92] | 179 | ldr r0, [r4, #92] |
182 | smlal r8, r9, r11, r10 | 180 | smlal r8, r9, r10, r0 |
183 | ldr r10, [r3, #24] | 181 | ldr r0, [r3, #24] |
184 | ldr r11, [r3, #16] | 182 | ldr r10, [r3, #16] |
183 | smlal r6, r7, r11, r0 | ||
184 | ldr r0, [r3, #8] | ||
185 | smlal r6, r7, r12, r10 | 185 | smlal r6, r7, r12, r10 |
186 | ldr r10, [r3, #8] | 186 | ldr r10, [r4, #100] |
187 | smlal r6, r7, sp, r11 | 187 | smlal r6, r7, lr, r0 |
188 | ldr r11, [r4, #100] | 188 | ldr r0, [r4, #108] |
189 | smlal r6, r7, lr, r10 | 189 | smlal r8, r9, r11, r10 |
190 | ldr r10, [r4, #108] | 190 | ldr r10, [r4, #116] |
191 | smlal r8, r9, r12, r11 | 191 | smlal r8, r9, r12, r0 |
192 | ldr r11, [r4, #116] | 192 | smlal r8, r9, lr, r10 |
193 | smlal r8, r9, sp, r10 | ||
194 | smlal r8, r9, lr, r11 | ||
195 | 193 | ||
196 | rsbs r6, r6, #0 | 194 | rsbs r6, r6, #0 |
197 | rsc r7, r7, #0 | 195 | rsc r7, r7, #0 |
198 | 196 | ||
199 | ldr r10, [r3, #4] | 197 | ldr r0, [r3, #4] |
200 | ldmia r2!, {r11, r12, sp, lr} | 198 | ldmia r2!, {r10, r11, r12, lr} |
201 | smlal r6, r7, r11, r10 | 199 | smlal r6, r7, r10, r0 |
202 | ldr r10, [r4, #120] /*;;1 cycle stall on arm9, but we free up r11*/ | 200 | ldr r0, [r4, #120] /*;;1 cycle stall on arm9, but we free up r10*/ |
203 | smlal r8, r9, r11, r10 | 201 | smlal r8, r9, r10, r0 |
204 | ldr r10, [r3, #60] | 202 | ldr r0, [r3, #60] |
205 | ldr r11, [r3, #52] | 203 | ldr r10, [r3, #52] |
204 | smlal r6, r7, r11, r0 | ||
205 | ldr r0, [r3, #44] | ||
206 | smlal r6, r7, r12, r10 | 206 | smlal r6, r7, r12, r10 |
207 | ldr r10, [r3, #44] | 207 | ldr r10, [r4, #64] |
208 | smlal r6, r7, sp, r11 | 208 | smlal r6, r7, lr, r0 |
209 | ldr r11, [r4, #64] | ||
210 | smlal r6, r7, lr, r10 | ||
211 | 209 | ||
212 | ldr r10, [r4, #72] | 210 | ldr r0, [r4, #72] |
213 | smlal r8, r9, r12, r11 | 211 | smlal r8, r9, r11, r10 |
214 | ldr r11, [r4, #80] | 212 | ldr r10, [r4, #80] |
215 | smlal r8, r9, sp, r10 | 213 | smlal r8, r9, r12, r0 |
216 | 214 | ||
217 | smlal r8, r9, lr, r11 | 215 | smlal r8, r9, lr, r10 |
218 | 216 | ||
219 | ldr r10, [r3, #36] | 217 | ldr r0, [r3, #36] |
220 | ldmia r2!, {r11, r12, sp, lr} | 218 | ldmia r2!, {r10, r11, r12, lr} |
221 | smlal r6, r7, r11, r10 | 219 | smlal r6, r7, r10, r0 |
222 | ldr r10, [r4, #88] /*;;1 cycle stall on arm9, but we free up r11*/ | 220 | ldr r0, [r4, #88] /*;;1 cycle stall on arm9, but we free up r10*/ |
223 | smlal r8, r9, r11, r10 | 221 | smlal r8, r9, r10, r0 |
224 | 222 | ||
225 | ldr r10, [r3, #28] | 223 | ldr r0, [r3, #28] |
226 | ldr r11, [r3, #20] | 224 | ldr r10, [r3, #20] |
225 | smlal r6, r7, r11, r0 | ||
226 | ldr r0, [r3, #12] | ||
227 | smlal r6, r7, r12, r10 | 227 | smlal r6, r7, r12, r10 |
228 | ldr r10, [r3, #12] | 228 | ldr r10, [r4, #96] |
229 | smlal r6, r7, sp, r11 | 229 | smlal r6, r7, lr, r0 |
230 | ldr r11, [r4, #96] | 230 | ldr r0, [r4, #104] |
231 | smlal r6, r7, lr, r10 | 231 | smlal r8, r9, r11, r10 |
232 | ldr r10, [r4, #104] | 232 | ldr r10, [r4, #112] |
233 | smlal r8, r9, r12, r11 | 233 | smlal r8, r9, r12, r0 |
234 | ldr r11, [r4, #112] | 234 | smlal r8, r9, lr, r10 |
235 | smlal r8, r9, sp, r10 | 235 | |
236 | smlal r8, r9, lr, r11 | 236 | ldr r0, [sp] |
237 | 237 | ||
238 | movs r6, r6, lsr #16 | 238 | movs r6, r6, lsr #16 |
239 | adc r6, r6, r7, lsl #16 | 239 | adc r6, r6, r7, lsl #16 |
@@ -246,9 +246,7 @@ synth_full_even_sbsample: | |||
246 | subs r5, r5, #1 | 246 | subs r5, r5, #1 |
247 | bne .l2 | 247 | bne .l2 |
248 | 248 | ||
249 | ldr r5, =synth_full_sp | 249 | ldmpc regs="r0,r4-r11" |
250 | ldr sp, [r5] | ||
251 | ldmpc regs=r4-r11 | ||
252 | 250 | ||
253 | .global III_aliasreduce | 251 | .global III_aliasreduce |
254 | 252 | ||
@@ -340,7 +338,3 @@ III_overlap: | |||
340 | ldmia r0!, {r4, r5, r6, r7, r12, lr} | 338 | ldmia r0!, {r4, r5, r6, r7, r12, lr} |
341 | stmia r1!, {r4, r5, r6, r7, r12, lr} | 339 | stmia r1!, {r4, r5, r6, r7, r12, lr} |
342 | ldmpc regs=r4-r7 | 340 | ldmpc regs=r4-r7 |
343 | |||
344 | .section IBSS_SECTION_MPA_ARM,"aw",%nobits | ||
345 | synth_full_sp: | ||
346 | .space 4 | ||