summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Martitz <kugel@rockbox.org>2012-01-13 16:08:52 +0100
committerThomas Martitz <kugel@rockbox.org>2012-01-21 18:39:19 +0100
commit4343011955bffaf61b002a89b0137d48e873d9d0 (patch)
tree9f25ee7f25daa1733f385f1cf78976e2b79833e8
parentcdb28789ec3577659aabfc097a9918d304cdb769 (diff)
downloadrockbox-4343011955bffaf61b002a89b0137d48e873d9d0.tar.gz
rockbox-4343011955bffaf61b002a89b0137d48e873d9d0.zip
libmad: Get rid of some bad trickery with the stack pointer.
Using the stack pointer for anything else than pointing to the current stack can have in very bad effects, especially on hosted platforms (e.g. when mixed with signals). Remove this at very slight performance cost.
-rw-r--r--apps/codecs/libmad/synth_full_arm.S288
1 files changed, 141 insertions, 147 deletions
diff --git a/apps/codecs/libmad/synth_full_arm.S b/apps/codecs/libmad/synth_full_arm.S
index 27383ed3d0..0a4f9b93c2 100644
--- a/apps/codecs/libmad/synth_full_arm.S
+++ b/apps/codecs/libmad/synth_full_arm.S
@@ -27,21 +27,21 @@
27 .global synth_full_odd_sbsample 27 .global synth_full_odd_sbsample
28 .global synth_full_even_sbsample 28 .global synth_full_even_sbsample
29 29
30 ;; r0 = pcm 30 /*
31 ;; r0 = pcm (pushed on the stack to free a register)
31 ;; r1 = fo 32 ;; r1 = fo
32 ;; r2 = fe 33 ;; r2 = fe
33 ;; r3 = D0ptr 34 ;; r3 = D0ptr
34 ;; r4 = D1ptr 35 ;; r4 = D1ptr
35 36
36 /*;; r5 = loop counter 37 ;; r5 = loop counter
37 ;; r6,r7 accumulator1 38 ;; r6,r7 accumulator1
38 ;; r8,r9 accumulator2 */ 39 ;; r8,r9 accumulator2
40 */
39 41
40synth_full_odd_sbsample: 42synth_full_odd_sbsample:
41 stmdb sp!, {r4-r11, lr} 43 stmdb sp!, {r0, r4-r11, lr}
42 ldr r4, [sp, #36] 44 ldr r4, [sp, #40]
43 ldr r5, =synth_full_sp
44 str sp, [r5]
45 mov r5, #15 45 mov r5, #15
46 add r2, r2, #32 46 add r2, r2, #32
47.l: 47.l:
@@ -49,85 +49,87 @@ synth_full_odd_sbsample:
49 add r3, r3, #128 49 add r3, r3, #128
50 add r4, r4, #128 50 add r4, r4, #128
51 ldr r7, [r3, #4] 51 ldr r7, [r3, #4]
52 ldmia r1!, {r10, r11, r12, lr} 52 ldmia r1!, {r0, r10, r11, lr}
53 ldr r9, [r4, #120] 53 ldr r9, [r4, #120]
54 smull r6, r7, r10, r7 54 smull r6, r7, r0, r7
55 ldr sp, [r3, #60] 55 ldr r12, [r3, #60]
56 smull r8, r9, r10, r9 56 smull r8, r9, r0, r9
57 ldr r10, [r3, #52] 57 ldr r0, [r3, #52]
58 smlal r6, r7, r11, sp 58 smlal r6, r7, r10, r12
59 ldr sp, [r3, #44] 59 ldr r12, [r3, #44]
60 smlal r6, r7, r12, r10 60 smlal r6, r7, r11, r0
61 ldr r10, [r4, #64] 61 ldr r0, [r4, #64]
62 smlal r6, r7, lr, sp 62 smlal r6, r7, lr, r12
63 ldr sp, [r4, #72] 63 ldr r12, [r4, #72]
64 smlal r8, r9, r11, r10 64 smlal r8, r9, r10, r0
65 ldr r10, [r4, #80] 65 ldr r0, [r4, #80]
66 smlal r8, r9, r12, sp 66 smlal r8, r9, r11, r12
67 smlal r8, r9, lr, r10 67 smlal r8, r9, lr, r0
68 ldr r10, [r3, #36] 68 ldr r0, [r3, #36]
69 69
70 ldmia r1!, {r11, r12, sp, lr} 70 ldmia r1!, {r10, r11, r12, lr}
71 smlal r6, r7, r11, r10 71 smlal r6, r7, r10, r0
72 72
73 ldr r10, [r4, #88] /*;;1 cycle stall on arm9, but we free up r11*/ 73 ldr r0, [r4, #88] /*;;1 cycle stall on arm9, but we free up r10*/
74 smlal r8, r9, r11, r10 74 smlal r8, r9, r10, r0
75 75
76 ldr r10, [r3, #28] 76 ldr r0, [r3, #28]
77 ldr r11, [r3, #20] 77 ldr r10, [r3, #20]
78 smlal r6, r7, r11, r0
79 ldr r0, [r3, #12]
78 smlal r6, r7, r12, r10 80 smlal r6, r7, r12, r10
79 ldr r10, [r3, #12] 81 ldr r10, [r4, #96]
80 smlal r6, r7, sp, r11 82 smlal r6, r7, lr, r0
81 ldr r11, [r4, #96] 83 ldr r0, [r4, #104]
82 smlal r6, r7, lr, r10 84 smlal r8, r9, r11, r10
83 ldr r10, [r4, #104] 85 ldr r10, [r4, #112]
84 smlal r8, r9, r12, r11 86 smlal r8, r9, r12, r0
85 ldr r11, [r4, #112] 87 smlal r8, r9, lr, r10
86 smlal r8, r9, sp, r10
87 smlal r8, r9, lr, r11
88 88
89 rsbs r6, r6, #0 89 rsbs r6, r6, #0
90 rsc r7, r7, #0 90 rsc r7, r7, #0
91 91
92 /* ;; PROD_A and even half of SB_SAMPLE*/ 92 /* ;; PROD_A and even half of SB_SAMPLE*/
93 ldr r10, [r3, #0] 93 ldr r0, [r3, #0]
94 ldmia r2!, {r11, r12, sp, lr} 94 ldmia r2!, {r10, r11, r12, lr}
95 smlal r6, r7, r11, r10 95 smlal r6, r7, r10, r0
96 96
97 ldr r10, [r4, #60] /*;;1 cycle stall on arm9, but we free up r11*/ 97 ldr r0, [r4, #60] /*;;1 cycle stall on arm9, but we free up r10*/
98 smlal r8, r9, r11, r10 98 smlal r8, r9, r10, r0
99 ldr r11, [r3, #56] 99 ldr r10, [r3, #56]
100 ldr r10, [r3, #48] 100 ldr r0, [r3, #48]
101 smlal r6, r7, r12, r11 101 smlal r6, r7, r11, r10
102 ldr r11, [r3, #40] 102 ldr r10, [r3, #40]
103 smlal r6, r7, sp, r10 103 smlal r6, r7, r12, r0
104 ldr r10, [r4, #68] 104 ldr r0, [r4, #68]
105 smlal r6, r7, lr, r11 105 smlal r6, r7, lr, r10
106 ldr r11, [r4, #76] 106 ldr r10, [r4, #76]
107 smlal r8, r9, r11, r0
108 ldr r0, [r4, #84]
107 smlal r8, r9, r12, r10 109 smlal r8, r9, r12, r10
108 ldr r10, [r4, #84] 110 smlal r8, r9, lr, r0
109 smlal r8, r9, sp, r11
110 smlal r8, r9, lr, r10
111 111
112 ldr r10, [r3, #32] 112 ldr r0, [r3, #32]
113 ldmia r2!, {r11, r12, sp, lr} 113 ldmia r2!, {r10, r11, r12, lr}
114 smlal r6, r7, r11, r10 114 smlal r6, r7, r10, r0
115 115
116 ldr r10, [r4, #92] /*;;1 cycle stall on arm9, but we free up r11*/ 116 ldr r0, [r4, #92] /*;;1 cycle stall on arm9, but we free up r10*/
117 smlal r8, r9, r11, r10 117 smlal r8, r9, r10, r0
118 118
119 ldr r10, [r3, #24] 119 ldr r0, [r3, #24]
120 ldr r11, [r3, #16] 120 ldr r10, [r3, #16]
121 smlal r6, r7, r11, r0
122 ldr r0, [r3, #8]
121 smlal r6, r7, r12, r10 123 smlal r6, r7, r12, r10
122 ldr r10, [r3, #8] 124 ldr r10, [r4, #100]
123 smlal r6, r7, sp, r11 125 smlal r6, r7, lr, r0
124 ldr r11, [r4, #100] 126 ldr r0, [r4, #108]
125 smlal r6, r7, lr, r10 127 smlal r8, r9, r11, r10
126 ldr r10, [r4, #108] 128 ldr r10, [r4, #116]
127 smlal r8, r9, r12, r11 129 smlal r8, r9, r12, r0
128 ldr r11, [r4, #116] 130 smlal r8, r9, lr, r10
129 smlal r8, r9, sp, r10 131
130 smlal r8, r9, lr, r11 132 ldr r0, [sp]
131 133
132 movs r6, r6, lsr #16 134 movs r6, r6, lsr #16
133 adc r6, r6, r7, lsl #16 135 adc r6, r6, r7, lsl #16
@@ -140,15 +142,11 @@ synth_full_odd_sbsample:
140 subs r5, r5, #1 142 subs r5, r5, #1
141 bne .l 143 bne .l
142 144
143 ldr r5, =synth_full_sp 145 ldmpc regs="r0,r4-r11"
144 ldr sp, [r5]
145 ldmpc regs=r4-r11
146 146
147synth_full_even_sbsample: 147synth_full_even_sbsample:
148 stmdb sp!, {r4-r11, lr} 148 stmdb sp!, {r0, r4-r11, lr}
149 ldr r4, [sp, #36] 149 ldr r4, [sp, #40]
150 ldr r5, =synth_full_sp
151 str sp, [r5]
152 mov r5, #15 150 mov r5, #15
153 add r2, r2, #32 151 add r2, r2, #32
154.l2: 152.l2:
@@ -156,84 +154,86 @@ synth_full_even_sbsample:
156 add r3, r3, #128 154 add r3, r3, #128
157 add r4, r4, #128 155 add r4, r4, #128
158 ldr r7, [r3, #0] 156 ldr r7, [r3, #0]
159 ldmia r1!, {r10, r11, r12, lr} 157 ldmia r1!, {r0, r10, r11, lr}
160 ldr r9, [r4, #60] 158 ldr r9, [r4, #60]
161 smull r6, r7, r10, r7 159 smull r6, r7, r0, r7
162 ldr sp, [r3, #56] 160 ldr r12, [r3, #56]
163 smull r8, r9, r10, r9 161 smull r8, r9, r0, r9
164 ldr r10, [r3, #48] 162 ldr r0, [r3, #48]
165 smlal r6, r7, r11, sp 163 smlal r6, r7, r10, r12
166 ldr sp, [r3, #40] 164 ldr r12, [r3, #40]
167 smlal r6, r7, r12, r10 165 smlal r6, r7, r11, r0
168 ldr r10, [r4, #68] 166 ldr r0, [r4, #68]
169 smlal r6, r7, lr, sp 167 smlal r6, r7, lr, r12
170 168
171 ldr sp, [r4, #76] 169 ldr r12, [r4, #76]
172 smlal r8, r9, r11, r10 170 smlal r8, r9, r10, r0
173 ldr r10, [r4, #84] 171 ldr r0, [r4, #84]
174 smlal r8, r9, r12, sp 172 smlal r8, r9, r11, r12
175 smlal r8, r9, lr, r10 173 smlal r8, r9, lr, r0
176 174
177 ldr r10, [r3, #32] 175 ldr r0, [r3, #32]
178 ldmia r1!, {r11, r12, sp, lr} 176 ldmia r1!, {r10, r11, r12, lr}
179 177
180 smlal r6, r7, r11, r10 178 smlal r6, r7, r10, r0
181 ldr r10, [r4, #92] 179 ldr r0, [r4, #92]
182 smlal r8, r9, r11, r10 180 smlal r8, r9, r10, r0
183 ldr r10, [r3, #24] 181 ldr r0, [r3, #24]
184 ldr r11, [r3, #16] 182 ldr r10, [r3, #16]
183 smlal r6, r7, r11, r0
184 ldr r0, [r3, #8]
185 smlal r6, r7, r12, r10 185 smlal r6, r7, r12, r10
186 ldr r10, [r3, #8] 186 ldr r10, [r4, #100]
187 smlal r6, r7, sp, r11 187 smlal r6, r7, lr, r0
188 ldr r11, [r4, #100] 188 ldr r0, [r4, #108]
189 smlal r6, r7, lr, r10 189 smlal r8, r9, r11, r10
190 ldr r10, [r4, #108] 190 ldr r10, [r4, #116]
191 smlal r8, r9, r12, r11 191 smlal r8, r9, r12, r0
192 ldr r11, [r4, #116] 192 smlal r8, r9, lr, r10
193 smlal r8, r9, sp, r10
194 smlal r8, r9, lr, r11
195 193
196 rsbs r6, r6, #0 194 rsbs r6, r6, #0
197 rsc r7, r7, #0 195 rsc r7, r7, #0
198 196
199 ldr r10, [r3, #4] 197 ldr r0, [r3, #4]
200 ldmia r2!, {r11, r12, sp, lr} 198 ldmia r2!, {r10, r11, r12, lr}
201 smlal r6, r7, r11, r10 199 smlal r6, r7, r10, r0
202 ldr r10, [r4, #120] /*;;1 cycle stall on arm9, but we free up r11*/ 200 ldr r0, [r4, #120] /*;;1 cycle stall on arm9, but we free up r10*/
203 smlal r8, r9, r11, r10 201 smlal r8, r9, r10, r0
204 ldr r10, [r3, #60] 202 ldr r0, [r3, #60]
205 ldr r11, [r3, #52] 203 ldr r10, [r3, #52]
204 smlal r6, r7, r11, r0
205 ldr r0, [r3, #44]
206 smlal r6, r7, r12, r10 206 smlal r6, r7, r12, r10
207 ldr r10, [r3, #44] 207 ldr r10, [r4, #64]
208 smlal r6, r7, sp, r11 208 smlal r6, r7, lr, r0
209 ldr r11, [r4, #64]
210 smlal r6, r7, lr, r10
211 209
212 ldr r10, [r4, #72] 210 ldr r0, [r4, #72]
213 smlal r8, r9, r12, r11 211 smlal r8, r9, r11, r10
214 ldr r11, [r4, #80] 212 ldr r10, [r4, #80]
215 smlal r8, r9, sp, r10 213 smlal r8, r9, r12, r0
216 214
217 smlal r8, r9, lr, r11 215 smlal r8, r9, lr, r10
218 216
219 ldr r10, [r3, #36] 217 ldr r0, [r3, #36]
220 ldmia r2!, {r11, r12, sp, lr} 218 ldmia r2!, {r10, r11, r12, lr}
221 smlal r6, r7, r11, r10 219 smlal r6, r7, r10, r0
222 ldr r10, [r4, #88] /*;;1 cycle stall on arm9, but we free up r11*/ 220 ldr r0, [r4, #88] /*;;1 cycle stall on arm9, but we free up r10*/
223 smlal r8, r9, r11, r10 221 smlal r8, r9, r10, r0
224 222
225 ldr r10, [r3, #28] 223 ldr r0, [r3, #28]
226 ldr r11, [r3, #20] 224 ldr r10, [r3, #20]
225 smlal r6, r7, r11, r0
226 ldr r0, [r3, #12]
227 smlal r6, r7, r12, r10 227 smlal r6, r7, r12, r10
228 ldr r10, [r3, #12] 228 ldr r10, [r4, #96]
229 smlal r6, r7, sp, r11 229 smlal r6, r7, lr, r0
230 ldr r11, [r4, #96] 230 ldr r0, [r4, #104]
231 smlal r6, r7, lr, r10 231 smlal r8, r9, r11, r10
232 ldr r10, [r4, #104] 232 ldr r10, [r4, #112]
233 smlal r8, r9, r12, r11 233 smlal r8, r9, r12, r0
234 ldr r11, [r4, #112] 234 smlal r8, r9, lr, r10
235 smlal r8, r9, sp, r10 235
236 smlal r8, r9, lr, r11 236 ldr r0, [sp]
237 237
238 movs r6, r6, lsr #16 238 movs r6, r6, lsr #16
239 adc r6, r6, r7, lsl #16 239 adc r6, r6, r7, lsl #16
@@ -246,9 +246,7 @@ synth_full_even_sbsample:
246 subs r5, r5, #1 246 subs r5, r5, #1
247 bne .l2 247 bne .l2
248 248
249 ldr r5, =synth_full_sp 249 ldmpc regs="r0,r4-r11"
250 ldr sp, [r5]
251 ldmpc regs=r4-r11
252 250
253 .global III_aliasreduce 251 .global III_aliasreduce
254 252
@@ -340,7 +338,3 @@ III_overlap:
340 ldmia r0!, {r4, r5, r6, r7, r12, lr} 338 ldmia r0!, {r4, r5, r6, r7, r12, lr}
341 stmia r1!, {r4, r5, r6, r7, r12, lr} 339 stmia r1!, {r4, r5, r6, r7, r12, lr}
342 ldmpc regs=r4-r7 340 ldmpc regs=r4-r7
343
344 .section IBSS_SECTION_MPA_ARM,"aw",%nobits
345synth_full_sp:
346 .space 4