summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Giacomelli <giac2000@hotmail.com>2010-11-29 22:34:51 +0000
committerMichael Giacomelli <giac2000@hotmail.com>2010-11-29 22:34:51 +0000
commit9929512682a999c440606cc9e4d4074a294ca616 (patch)
tree8cf3c79a1757f3c0173bcf2c140a7c4de3177493
parent90d77fb77ac3ad1f8ec24837fe2e0d340b4b5ba9 (diff)
downloadrockbox-9929512682a999c440606cc9e4d4074a294ca616.tar.gz
rockbox-9929512682a999c440606cc9e4d4074a294ca616.zip
ARM9 optimized synth_full for libmad. Speeds up mp3 decoding by an even 2 MHz on all ARM9 and later devices. Note this is only optimized for arm9 (non-E), although it is faster on later devices. An arm9E/11 version will be needed for optimal performance on newer devices.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@28710 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/libmad/synth_full_arm.S182
1 files changed, 94 insertions, 88 deletions
diff --git a/apps/codecs/libmad/synth_full_arm.S b/apps/codecs/libmad/synth_full_arm.S
index dec437f666..8d312de645 100644
--- a/apps/codecs/libmad/synth_full_arm.S
+++ b/apps/codecs/libmad/synth_full_arm.S
@@ -31,7 +31,12 @@
31 ;; r1 = fo 31 ;; r1 = fo
32 ;; r2 = fe 32 ;; r2 = fe
33 ;; r3 = D0ptr 33 ;; r3 = D0ptr
34 ;; r4 = D1ptr 34 ;; r4 = D1ptr
35
36 /*;; r5 = loop counter
37 ;; r6,r7 accumulator1
38 ;; r8,r9 accumulator2 */
39
35synth_full_odd_sbsample: 40synth_full_odd_sbsample:
36 stmdb sp!, {r4-r11, lr} 41 stmdb sp!, {r4-r11, lr}
37 ldr r4, [sp, #36] 42 ldr r4, [sp, #36]
@@ -40,88 +45,89 @@ synth_full_odd_sbsample:
40 mov r5, #15 45 mov r5, #15
41 add r2, r2, #32 46 add r2, r2, #32
42.l: 47.l:
48 /* ;; PROD_O and odd half of SB_SAMPLE*/
43 add r3, r3, #128 49 add r3, r3, #128
44 add r4, r4, #128 50 add r4, r4, #128
45 ldmia r1!, {r10, r11, r12, lr}
46 ldr r7, [r3, #4] 51 ldr r7, [r3, #4]
47 smull r6, r7, r10, r7 52 ldmia r1!, {r10, r11, r12, lr}
48 ldr r9, [r4, #120] 53 ldr r9, [r4, #120]
54 smull r6, r7, r10, r7
55 ldr sp, [r3, #60]
49 smull r8, r9, r10, r9 56 smull r8, r9, r10, r9
50
51 ldr r10, [r3, #60]
52 smlal r6, r7, r11, r10
53 ldr r10, [r3, #52] 57 ldr r10, [r3, #52]
58 smlal r6, r7, r11, sp
59 ldr sp, [r3, #44]
54 smlal r6, r7, r12, r10 60 smlal r6, r7, r12, r10
55 ldr r10, [r3, #44]
56 smlal r6, r7, lr, r10
57
58 ldr r10, [r4, #64] 61 ldr r10, [r4, #64]
62 smlal r6, r7, lr, sp
63 ldr sp, [r4, #72]
59 smlal r8, r9, r11, r10 64 smlal r8, r9, r11, r10
60 ldr r10, [r4, #72]
61 smlal r8, r9, r12, r10
62 ldr r10, [r4, #80] 65 ldr r10, [r4, #80]
66 smlal r8, r9, r12, sp
63 smlal r8, r9, lr, r10 67 smlal r8, r9, lr, r10
64 68 ldr r10, [r3, #36]
69
65 ldmia r1!, {r11, r12, sp, lr} 70 ldmia r1!, {r11, r12, sp, lr}
66 ldr r10, [r3, #36]
67 smlal r6, r7, r11, r10 71 smlal r6, r7, r11, r10
72
73 ldr r10, [r4, #88] /*;;1 cycle stall on arm9, but we free up r11*/
74 smlal r8, r9, r11, r10
75
68 ldr r10, [r3, #28] 76 ldr r10, [r3, #28]
77 ldr r11, [r3, #20]
69 smlal r6, r7, r12, r10 78 smlal r6, r7, r12, r10
70 ldr r10, [r3, #20]
71 smlal r6, r7, sp, r10
72 ldr r10, [r3, #12] 79 ldr r10, [r3, #12]
80 smlal r6, r7, sp, r11
81 ldr r11, [r4, #96]
73 smlal r6, r7, lr, r10 82 smlal r6, r7, lr, r10
74
75 ldr r10, [r4, #88]
76 smlal r8, r9, r11, r10
77 ldr r10, [r4, #96]
78 smlal r8, r9, r12, r10
79 ldr r10, [r4, #104] 83 ldr r10, [r4, #104]
84 smlal r8, r9, r12, r11
85 ldr r11, [r4, #112]
80 smlal r8, r9, sp, r10 86 smlal r8, r9, sp, r10
81 ldr r10, [r4, #112] 87 smlal r8, r9, lr, r11
82 smlal r8, r9, lr, r10
83 88
84 rsbs r6, r6, #0 89 rsbs r6, r6, #0
85 rsc r7, r7, #0 90 rsc r7, r7, #0
86 91
87 ldmia r2!, {r11, r12, sp, lr} 92 /* ;; PROD_A and even half of SB_SAMPLE*/
88
89 ldr r10, [r3, #0] 93 ldr r10, [r3, #0]
94 ldmia r2!, {r11, r12, sp, lr}
90 smlal r6, r7, r11, r10 95 smlal r6, r7, r11, r10
91 ldr r10, [r3, #56] 96
92 smlal r6, r7, r12, r10 97 ldr r10, [r4, #60] /*;;1 cycle stall on arm9, but we free up r11*/
98 smlal r8, r9, r11, r10
99 ldr r11, [r3, #56]
93 ldr r10, [r3, #48] 100 ldr r10, [r3, #48]
101 smlal r6, r7, r12, r11
102 ldr r11, [r3, #40]
94 smlal r6, r7, sp, r10 103 smlal r6, r7, sp, r10
95 ldr r10, [r3, #40]
96 smlal r6, r7, lr, r10
97
98 ldr r10, [r4, #60]
99 smlal r8, r9, r11, r10
100 ldr r10, [r4, #68] 104 ldr r10, [r4, #68]
105 smlal r6, r7, lr, r11
106 ldr r11, [r4, #76]
101 smlal r8, r9, r12, r10 107 smlal r8, r9, r12, r10
102 ldr r10, [r4, #76] 108 ldr r10, [r4, #84]
103 smlal r8, r9, sp, r10 109 smlal r8, r9, sp, r11
104 ldr r10, [r4, #84]
105 smlal r8, r9, lr, r10 110 smlal r8, r9, lr, r10
106 111
107 ldmia r2!, {r11, r12, sp, lr}
108 ldr r10, [r3, #32] 112 ldr r10, [r3, #32]
113 ldmia r2!, {r11, r12, sp, lr}
109 smlal r6, r7, r11, r10 114 smlal r6, r7, r11, r10
115
116 ldr r10, [r4, #92] /*;;1 cycle stall on arm9, but we free up r11*/
117 smlal r8, r9, r11, r10
118
110 ldr r10, [r3, #24] 119 ldr r10, [r3, #24]
120 ldr r11, [r3, #16]
111 smlal r6, r7, r12, r10 121 smlal r6, r7, r12, r10
112 ldr r10, [r3, #16]
113 smlal r6, r7, sp, r10
114 ldr r10, [r3, #8] 122 ldr r10, [r3, #8]
123 smlal r6, r7, sp, r11
124 ldr r11, [r4, #100]
115 smlal r6, r7, lr, r10 125 smlal r6, r7, lr, r10
116
117 ldr r10, [r4, #92]
118 smlal r8, r9, r11, r10
119 ldr r10, [r4, #100]
120 smlal r8, r9, r12, r10
121 ldr r10, [r4, #108] 126 ldr r10, [r4, #108]
127 smlal r8, r9, r12, r11
128 ldr r11, [r4, #116]
122 smlal r8, r9, sp, r10 129 smlal r8, r9, sp, r10
123 ldr r10, [r4, #116] 130 smlal r8, r9, lr, r11
124 smlal r8, r9, lr, r10
125 131
126 movs r6, r6, lsr #16 132 movs r6, r6, lsr #16
127 adc r6, r6, r7, lsl #16 133 adc r6, r6, r7, lsl #16
@@ -146,88 +152,88 @@ synth_full_even_sbsample:
146 mov r5, #15 152 mov r5, #15
147 add r2, r2, #32 153 add r2, r2, #32
148.l2: 154.l2:
155 /* ;; PROD_O and odd half of SB_SAMPLE*/
149 add r3, r3, #128 156 add r3, r3, #128
150 add r4, r4, #128 157 add r4, r4, #128
151 ldmia r1!, {r10, r11, r12, lr}
152 ldr r7, [r3, #0] 158 ldr r7, [r3, #0]
153 smull r6, r7, r10, r7 159 ldmia r1!, {r10, r11, r12, lr}
154 ldr r9, [r4, #60] 160 ldr r9, [r4, #60]
161 smull r6, r7, r10, r7
162 ldr sp, [r3, #56]
155 smull r8, r9, r10, r9 163 smull r8, r9, r10, r9
156
157 ldr r10, [r3, #56]
158 smlal r6, r7, r11, r10
159 ldr r10, [r3, #48] 164 ldr r10, [r3, #48]
165 smlal r6, r7, r11, sp
166 ldr sp, [r3, #40]
160 smlal r6, r7, r12, r10 167 smlal r6, r7, r12, r10
161 ldr r10, [r3, #40]
162 smlal r6, r7, lr, r10
163
164 ldr r10, [r4, #68] 168 ldr r10, [r4, #68]
169 smlal r6, r7, lr, sp
170
171 ldr sp, [r4, #76]
165 smlal r8, r9, r11, r10 172 smlal r8, r9, r11, r10
166 ldr r10, [r4, #76]
167 smlal r8, r9, r12, r10
168 ldr r10, [r4, #84] 173 ldr r10, [r4, #84]
174 smlal r8, r9, r12, sp
169 smlal r8, r9, lr, r10 175 smlal r8, r9, lr, r10
170 176
171 ldmia r1!, {r11, r12, sp, lr}
172 ldr r10, [r3, #32] 177 ldr r10, [r3, #32]
178 ldmia r1!, {r11, r12, sp, lr}
179
173 smlal r6, r7, r11, r10 180 smlal r6, r7, r11, r10
181 ldr r10, [r4, #92]
182 smlal r8, r9, r11, r10
174 ldr r10, [r3, #24] 183 ldr r10, [r3, #24]
184 ldr r11, [r3, #16]
175 smlal r6, r7, r12, r10 185 smlal r6, r7, r12, r10
176 ldr r10, [r3, #16]
177 smlal r6, r7, sp, r10
178 ldr r10, [r3, #8] 186 ldr r10, [r3, #8]
187 smlal r6, r7, sp, r11
188 ldr r11, [r4, #100]
179 smlal r6, r7, lr, r10 189 smlal r6, r7, lr, r10
180
181 ldr r10, [r4, #92]
182 smlal r8, r9, r11, r10
183 ldr r10, [r4, #100]
184 smlal r8, r9, r12, r10
185 ldr r10, [r4, #108] 190 ldr r10, [r4, #108]
191 smlal r8, r9, r12, r11
192 ldr r11, [r4, #116]
186 smlal r8, r9, sp, r10 193 smlal r8, r9, sp, r10
187 ldr r10, [r4, #116] 194 smlal r8, r9, lr, r11
188 smlal r8, r9, lr, r10
189 195
190 rsbs r6, r6, #0 196 rsbs r6, r6, #0
191 rsc r7, r7, #0 197 rsc r7, r7, #0
192 198
193 ldmia r2!, {r11, r12, sp, lr}
194
195 ldr r10, [r3, #4] 199 ldr r10, [r3, #4]
200 ldmia r2!, {r11, r12, sp, lr}
196 smlal r6, r7, r11, r10 201 smlal r6, r7, r11, r10
202 ldr r10, [r4, #120] /*;;1 cycle stall on arm9, but we free up r11*/
203 smlal r8, r9, r11, r10
197 ldr r10, [r3, #60] 204 ldr r10, [r3, #60]
205 ldr r11, [r3, #52]
198 smlal r6, r7, r12, r10 206 smlal r6, r7, r12, r10
199 ldr r10, [r3, #52] 207 ldr r10, [r3, #44]
200 smlal r6, r7, sp, r10 208 smlal r6, r7, sp, r11
201 ldr r10, [r3, #44] 209 ldr r11, [r4, #64]
202 smlal r6, r7, lr, r10 210 smlal r6, r7, lr, r10
203 211
204 ldr r10, [r4, #120]
205 smlal r8, r9, r11, r10
206 ldr r10, [r4, #64]
207 smlal r8, r9, r12, r10
208 ldr r10, [r4, #72] 212 ldr r10, [r4, #72]
213 smlal r8, r9, r12, r11
214 ldr r11, [r4, #80]
209 smlal r8, r9, sp, r10 215 smlal r8, r9, sp, r10
210 ldr r10, [r4, #80]
211 smlal r8, r9, lr, r10
212 216
213 ldmia r2!, {r11, r12, sp, lr} 217 smlal r8, r9, lr, r11
218
214 ldr r10, [r3, #36] 219 ldr r10, [r3, #36]
220 ldmia r2!, {r11, r12, sp, lr}
215 smlal r6, r7, r11, r10 221 smlal r6, r7, r11, r10
222 ldr r10, [r4, #88] /*;;1 cycle stall on arm9, but we free up r11*/
223 smlal r8, r9, r11, r10
224
216 ldr r10, [r3, #28] 225 ldr r10, [r3, #28]
226 ldr r11, [r3, #20]
217 smlal r6, r7, r12, r10 227 smlal r6, r7, r12, r10
218 ldr r10, [r3, #20]
219 smlal r6, r7, sp, r10
220 ldr r10, [r3, #12] 228 ldr r10, [r3, #12]
229 smlal r6, r7, sp, r11
230 ldr r11, [r4, #96]
221 smlal r6, r7, lr, r10 231 smlal r6, r7, lr, r10
222
223 ldr r10, [r4, #88]
224 smlal r8, r9, r11, r10
225 ldr r10, [r4, #96]
226 smlal r8, r9, r12, r10
227 ldr r10, [r4, #104] 232 ldr r10, [r4, #104]
233 smlal r8, r9, r12, r11
234 ldr r11, [r4, #112]
228 smlal r8, r9, sp, r10 235 smlal r8, r9, sp, r10
229 ldr r10, [r4, #112] 236 smlal r8, r9, lr, r11
230 smlal r8, r9, lr, r10
231 237
232 movs r6, r6, lsr #16 238 movs r6, r6, lsr #16
233 adc r6, r6, r7, lsl #16 239 adc r6, r6, r7, lsl #16