diff options
author | Michael Giacomelli <giac2000@hotmail.com> | 2010-11-29 22:34:51 +0000 |
---|---|---|
committer | Michael Giacomelli <giac2000@hotmail.com> | 2010-11-29 22:34:51 +0000 |
commit | 9929512682a999c440606cc9e4d4074a294ca616 (patch) | |
tree | 8cf3c79a1757f3c0173bcf2c140a7c4de3177493 /apps | |
parent | 90d77fb77ac3ad1f8ec24837fe2e0d340b4b5ba9 (diff) | |
download | rockbox-9929512682a999c440606cc9e4d4074a294ca616.tar.gz rockbox-9929512682a999c440606cc9e4d4074a294ca616.zip |
ARM9 optimized synth_full for libmad. Speeds up mp3 decoding by an even 2 MHz on all ARM9 and later devices. Note this is only optimized for arm9 (non-E), although it is faster on later devices. An arm9E/11 version will be needed for optimal performance on newer devices.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@28710 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps')
-rw-r--r-- | apps/codecs/libmad/synth_full_arm.S | 182 |
1 files changed, 94 insertions, 88 deletions
diff --git a/apps/codecs/libmad/synth_full_arm.S b/apps/codecs/libmad/synth_full_arm.S index dec437f666..8d312de645 100644 --- a/apps/codecs/libmad/synth_full_arm.S +++ b/apps/codecs/libmad/synth_full_arm.S | |||
@@ -31,7 +31,12 @@ | |||
31 | ;; r1 = fo | 31 | ;; r1 = fo |
32 | ;; r2 = fe | 32 | ;; r2 = fe |
33 | ;; r3 = D0ptr | 33 | ;; r3 = D0ptr |
34 | ;; r4 = D1ptr | 34 | ;; r4 = D1ptr |
35 | |||
36 | /*;; r5 = loop counter | ||
37 | ;; r6,r7 accumulator1 | ||
38 | ;; r8,r9 accumulator2 */ | ||
39 | |||
35 | synth_full_odd_sbsample: | 40 | synth_full_odd_sbsample: |
36 | stmdb sp!, {r4-r11, lr} | 41 | stmdb sp!, {r4-r11, lr} |
37 | ldr r4, [sp, #36] | 42 | ldr r4, [sp, #36] |
@@ -40,88 +45,89 @@ synth_full_odd_sbsample: | |||
40 | mov r5, #15 | 45 | mov r5, #15 |
41 | add r2, r2, #32 | 46 | add r2, r2, #32 |
42 | .l: | 47 | .l: |
48 | /* ;; PROD_O and odd half of SB_SAMPLE*/ | ||
43 | add r3, r3, #128 | 49 | add r3, r3, #128 |
44 | add r4, r4, #128 | 50 | add r4, r4, #128 |
45 | ldmia r1!, {r10, r11, r12, lr} | ||
46 | ldr r7, [r3, #4] | 51 | ldr r7, [r3, #4] |
47 | smull r6, r7, r10, r7 | 52 | ldmia r1!, {r10, r11, r12, lr} |
48 | ldr r9, [r4, #120] | 53 | ldr r9, [r4, #120] |
54 | smull r6, r7, r10, r7 | ||
55 | ldr sp, [r3, #60] | ||
49 | smull r8, r9, r10, r9 | 56 | smull r8, r9, r10, r9 |
50 | |||
51 | ldr r10, [r3, #60] | ||
52 | smlal r6, r7, r11, r10 | ||
53 | ldr r10, [r3, #52] | 57 | ldr r10, [r3, #52] |
58 | smlal r6, r7, r11, sp | ||
59 | ldr sp, [r3, #44] | ||
54 | smlal r6, r7, r12, r10 | 60 | smlal r6, r7, r12, r10 |
55 | ldr r10, [r3, #44] | ||
56 | smlal r6, r7, lr, r10 | ||
57 | |||
58 | ldr r10, [r4, #64] | 61 | ldr r10, [r4, #64] |
62 | smlal r6, r7, lr, sp | ||
63 | ldr sp, [r4, #72] | ||
59 | smlal r8, r9, r11, r10 | 64 | smlal r8, r9, r11, r10 |
60 | ldr r10, [r4, #72] | ||
61 | smlal r8, r9, r12, r10 | ||
62 | ldr r10, [r4, #80] | 65 | ldr r10, [r4, #80] |
66 | smlal r8, r9, r12, sp | ||
63 | smlal r8, r9, lr, r10 | 67 | smlal r8, r9, lr, r10 |
64 | 68 | ldr r10, [r3, #36] | |
69 | |||
65 | ldmia r1!, {r11, r12, sp, lr} | 70 | ldmia r1!, {r11, r12, sp, lr} |
66 | ldr r10, [r3, #36] | ||
67 | smlal r6, r7, r11, r10 | 71 | smlal r6, r7, r11, r10 |
72 | |||
73 | ldr r10, [r4, #88] /*;;1 cycle stall on arm9, but we free up r11*/ | ||
74 | smlal r8, r9, r11, r10 | ||
75 | |||
68 | ldr r10, [r3, #28] | 76 | ldr r10, [r3, #28] |
77 | ldr r11, [r3, #20] | ||
69 | smlal r6, r7, r12, r10 | 78 | smlal r6, r7, r12, r10 |
70 | ldr r10, [r3, #20] | ||
71 | smlal r6, r7, sp, r10 | ||
72 | ldr r10, [r3, #12] | 79 | ldr r10, [r3, #12] |
80 | smlal r6, r7, sp, r11 | ||
81 | ldr r11, [r4, #96] | ||
73 | smlal r6, r7, lr, r10 | 82 | smlal r6, r7, lr, r10 |
74 | |||
75 | ldr r10, [r4, #88] | ||
76 | smlal r8, r9, r11, r10 | ||
77 | ldr r10, [r4, #96] | ||
78 | smlal r8, r9, r12, r10 | ||
79 | ldr r10, [r4, #104] | 83 | ldr r10, [r4, #104] |
84 | smlal r8, r9, r12, r11 | ||
85 | ldr r11, [r4, #112] | ||
80 | smlal r8, r9, sp, r10 | 86 | smlal r8, r9, sp, r10 |
81 | ldr r10, [r4, #112] | 87 | smlal r8, r9, lr, r11 |
82 | smlal r8, r9, lr, r10 | ||
83 | 88 | ||
84 | rsbs r6, r6, #0 | 89 | rsbs r6, r6, #0 |
85 | rsc r7, r7, #0 | 90 | rsc r7, r7, #0 |
86 | 91 | ||
87 | ldmia r2!, {r11, r12, sp, lr} | 92 | /* ;; PROD_A and even half of SB_SAMPLE*/ |
88 | |||
89 | ldr r10, [r3, #0] | 93 | ldr r10, [r3, #0] |
94 | ldmia r2!, {r11, r12, sp, lr} | ||
90 | smlal r6, r7, r11, r10 | 95 | smlal r6, r7, r11, r10 |
91 | ldr r10, [r3, #56] | 96 | |
92 | smlal r6, r7, r12, r10 | 97 | ldr r10, [r4, #60] /*;;1 cycle stall on arm9, but we free up r11*/ |
98 | smlal r8, r9, r11, r10 | ||
99 | ldr r11, [r3, #56] | ||
93 | ldr r10, [r3, #48] | 100 | ldr r10, [r3, #48] |
101 | smlal r6, r7, r12, r11 | ||
102 | ldr r11, [r3, #40] | ||
94 | smlal r6, r7, sp, r10 | 103 | smlal r6, r7, sp, r10 |
95 | ldr r10, [r3, #40] | ||
96 | smlal r6, r7, lr, r10 | ||
97 | |||
98 | ldr r10, [r4, #60] | ||
99 | smlal r8, r9, r11, r10 | ||
100 | ldr r10, [r4, #68] | 104 | ldr r10, [r4, #68] |
105 | smlal r6, r7, lr, r11 | ||
106 | ldr r11, [r4, #76] | ||
101 | smlal r8, r9, r12, r10 | 107 | smlal r8, r9, r12, r10 |
102 | ldr r10, [r4, #76] | 108 | ldr r10, [r4, #84] |
103 | smlal r8, r9, sp, r10 | 109 | smlal r8, r9, sp, r11 |
104 | ldr r10, [r4, #84] | ||
105 | smlal r8, r9, lr, r10 | 110 | smlal r8, r9, lr, r10 |
106 | 111 | ||
107 | ldmia r2!, {r11, r12, sp, lr} | ||
108 | ldr r10, [r3, #32] | 112 | ldr r10, [r3, #32] |
113 | ldmia r2!, {r11, r12, sp, lr} | ||
109 | smlal r6, r7, r11, r10 | 114 | smlal r6, r7, r11, r10 |
115 | |||
116 | ldr r10, [r4, #92] /*;;1 cycle stall on arm9, but we free up r11*/ | ||
117 | smlal r8, r9, r11, r10 | ||
118 | |||
110 | ldr r10, [r3, #24] | 119 | ldr r10, [r3, #24] |
120 | ldr r11, [r3, #16] | ||
111 | smlal r6, r7, r12, r10 | 121 | smlal r6, r7, r12, r10 |
112 | ldr r10, [r3, #16] | ||
113 | smlal r6, r7, sp, r10 | ||
114 | ldr r10, [r3, #8] | 122 | ldr r10, [r3, #8] |
123 | smlal r6, r7, sp, r11 | ||
124 | ldr r11, [r4, #100] | ||
115 | smlal r6, r7, lr, r10 | 125 | smlal r6, r7, lr, r10 |
116 | |||
117 | ldr r10, [r4, #92] | ||
118 | smlal r8, r9, r11, r10 | ||
119 | ldr r10, [r4, #100] | ||
120 | smlal r8, r9, r12, r10 | ||
121 | ldr r10, [r4, #108] | 126 | ldr r10, [r4, #108] |
127 | smlal r8, r9, r12, r11 | ||
128 | ldr r11, [r4, #116] | ||
122 | smlal r8, r9, sp, r10 | 129 | smlal r8, r9, sp, r10 |
123 | ldr r10, [r4, #116] | 130 | smlal r8, r9, lr, r11 |
124 | smlal r8, r9, lr, r10 | ||
125 | 131 | ||
126 | movs r6, r6, lsr #16 | 132 | movs r6, r6, lsr #16 |
127 | adc r6, r6, r7, lsl #16 | 133 | adc r6, r6, r7, lsl #16 |
@@ -146,88 +152,88 @@ synth_full_even_sbsample: | |||
146 | mov r5, #15 | 152 | mov r5, #15 |
147 | add r2, r2, #32 | 153 | add r2, r2, #32 |
148 | .l2: | 154 | .l2: |
155 | /* ;; PROD_O and odd half of SB_SAMPLE*/ | ||
149 | add r3, r3, #128 | 156 | add r3, r3, #128 |
150 | add r4, r4, #128 | 157 | add r4, r4, #128 |
151 | ldmia r1!, {r10, r11, r12, lr} | ||
152 | ldr r7, [r3, #0] | 158 | ldr r7, [r3, #0] |
153 | smull r6, r7, r10, r7 | 159 | ldmia r1!, {r10, r11, r12, lr} |
154 | ldr r9, [r4, #60] | 160 | ldr r9, [r4, #60] |
161 | smull r6, r7, r10, r7 | ||
162 | ldr sp, [r3, #56] | ||
155 | smull r8, r9, r10, r9 | 163 | smull r8, r9, r10, r9 |
156 | |||
157 | ldr r10, [r3, #56] | ||
158 | smlal r6, r7, r11, r10 | ||
159 | ldr r10, [r3, #48] | 164 | ldr r10, [r3, #48] |
165 | smlal r6, r7, r11, sp | ||
166 | ldr sp, [r3, #40] | ||
160 | smlal r6, r7, r12, r10 | 167 | smlal r6, r7, r12, r10 |
161 | ldr r10, [r3, #40] | ||
162 | smlal r6, r7, lr, r10 | ||
163 | |||
164 | ldr r10, [r4, #68] | 168 | ldr r10, [r4, #68] |
169 | smlal r6, r7, lr, sp | ||
170 | |||
171 | ldr sp, [r4, #76] | ||
165 | smlal r8, r9, r11, r10 | 172 | smlal r8, r9, r11, r10 |
166 | ldr r10, [r4, #76] | ||
167 | smlal r8, r9, r12, r10 | ||
168 | ldr r10, [r4, #84] | 173 | ldr r10, [r4, #84] |
174 | smlal r8, r9, r12, sp | ||
169 | smlal r8, r9, lr, r10 | 175 | smlal r8, r9, lr, r10 |
170 | 176 | ||
171 | ldmia r1!, {r11, r12, sp, lr} | ||
172 | ldr r10, [r3, #32] | 177 | ldr r10, [r3, #32] |
178 | ldmia r1!, {r11, r12, sp, lr} | ||
179 | |||
173 | smlal r6, r7, r11, r10 | 180 | smlal r6, r7, r11, r10 |
181 | ldr r10, [r4, #92] | ||
182 | smlal r8, r9, r11, r10 | ||
174 | ldr r10, [r3, #24] | 183 | ldr r10, [r3, #24] |
184 | ldr r11, [r3, #16] | ||
175 | smlal r6, r7, r12, r10 | 185 | smlal r6, r7, r12, r10 |
176 | ldr r10, [r3, #16] | ||
177 | smlal r6, r7, sp, r10 | ||
178 | ldr r10, [r3, #8] | 186 | ldr r10, [r3, #8] |
187 | smlal r6, r7, sp, r11 | ||
188 | ldr r11, [r4, #100] | ||
179 | smlal r6, r7, lr, r10 | 189 | smlal r6, r7, lr, r10 |
180 | |||
181 | ldr r10, [r4, #92] | ||
182 | smlal r8, r9, r11, r10 | ||
183 | ldr r10, [r4, #100] | ||
184 | smlal r8, r9, r12, r10 | ||
185 | ldr r10, [r4, #108] | 190 | ldr r10, [r4, #108] |
191 | smlal r8, r9, r12, r11 | ||
192 | ldr r11, [r4, #116] | ||
186 | smlal r8, r9, sp, r10 | 193 | smlal r8, r9, sp, r10 |
187 | ldr r10, [r4, #116] | 194 | smlal r8, r9, lr, r11 |
188 | smlal r8, r9, lr, r10 | ||
189 | 195 | ||
190 | rsbs r6, r6, #0 | 196 | rsbs r6, r6, #0 |
191 | rsc r7, r7, #0 | 197 | rsc r7, r7, #0 |
192 | 198 | ||
193 | ldmia r2!, {r11, r12, sp, lr} | ||
194 | |||
195 | ldr r10, [r3, #4] | 199 | ldr r10, [r3, #4] |
200 | ldmia r2!, {r11, r12, sp, lr} | ||
196 | smlal r6, r7, r11, r10 | 201 | smlal r6, r7, r11, r10 |
202 | ldr r10, [r4, #120] /*;;1 cycle stall on arm9, but we free up r11*/ | ||
203 | smlal r8, r9, r11, r10 | ||
197 | ldr r10, [r3, #60] | 204 | ldr r10, [r3, #60] |
205 | ldr r11, [r3, #52] | ||
198 | smlal r6, r7, r12, r10 | 206 | smlal r6, r7, r12, r10 |
199 | ldr r10, [r3, #52] | 207 | ldr r10, [r3, #44] |
200 | smlal r6, r7, sp, r10 | 208 | smlal r6, r7, sp, r11 |
201 | ldr r10, [r3, #44] | 209 | ldr r11, [r4, #64] |
202 | smlal r6, r7, lr, r10 | 210 | smlal r6, r7, lr, r10 |
203 | 211 | ||
204 | ldr r10, [r4, #120] | ||
205 | smlal r8, r9, r11, r10 | ||
206 | ldr r10, [r4, #64] | ||
207 | smlal r8, r9, r12, r10 | ||
208 | ldr r10, [r4, #72] | 212 | ldr r10, [r4, #72] |
213 | smlal r8, r9, r12, r11 | ||
214 | ldr r11, [r4, #80] | ||
209 | smlal r8, r9, sp, r10 | 215 | smlal r8, r9, sp, r10 |
210 | ldr r10, [r4, #80] | ||
211 | smlal r8, r9, lr, r10 | ||
212 | 216 | ||
213 | ldmia r2!, {r11, r12, sp, lr} | 217 | smlal r8, r9, lr, r11 |
218 | |||
214 | ldr r10, [r3, #36] | 219 | ldr r10, [r3, #36] |
220 | ldmia r2!, {r11, r12, sp, lr} | ||
215 | smlal r6, r7, r11, r10 | 221 | smlal r6, r7, r11, r10 |
222 | ldr r10, [r4, #88] /*;;1 cycle stall on arm9, but we free up r11*/ | ||
223 | smlal r8, r9, r11, r10 | ||
224 | |||
216 | ldr r10, [r3, #28] | 225 | ldr r10, [r3, #28] |
226 | ldr r11, [r3, #20] | ||
217 | smlal r6, r7, r12, r10 | 227 | smlal r6, r7, r12, r10 |
218 | ldr r10, [r3, #20] | ||
219 | smlal r6, r7, sp, r10 | ||
220 | ldr r10, [r3, #12] | 228 | ldr r10, [r3, #12] |
229 | smlal r6, r7, sp, r11 | ||
230 | ldr r11, [r4, #96] | ||
221 | smlal r6, r7, lr, r10 | 231 | smlal r6, r7, lr, r10 |
222 | |||
223 | ldr r10, [r4, #88] | ||
224 | smlal r8, r9, r11, r10 | ||
225 | ldr r10, [r4, #96] | ||
226 | smlal r8, r9, r12, r10 | ||
227 | ldr r10, [r4, #104] | 232 | ldr r10, [r4, #104] |
233 | smlal r8, r9, r12, r11 | ||
234 | ldr r11, [r4, #112] | ||
228 | smlal r8, r9, sp, r10 | 235 | smlal r8, r9, sp, r10 |
229 | ldr r10, [r4, #112] | 236 | smlal r8, r9, lr, r11 |
230 | smlal r8, r9, lr, r10 | ||
231 | 237 | ||
232 | movs r6, r6, lsr #16 | 238 | movs r6, r6, lsr #16 |
233 | adc r6, r6, r7, lsl #16 | 239 | adc r6, r6, r7, lsl #16 |