diff options
author | Dave Hooper <dave@beermex.com> | 2009-08-29 11:50:15 +0000 |
---|---|---|
committer | Dave Hooper <dave@beermex.com> | 2009-08-29 11:50:15 +0000 |
commit | 59cdbf5efca64962fe6a6c85eb03b64552eae6d2 (patch) | |
tree | 494580f9ee8ec0531abd71ead95fc1f2365d9d1e /apps/codecs/lib | |
parent | eeb1594494729596bd083c298ebfd65ed29411c7 (diff) | |
download | rockbox-59cdbf5efca64962fe6a6c85eb03b64552eae6d2.tar.gz rockbox-59cdbf5efca64962fe6a6c85eb03b64552eae6d2.zip |
Rerrange some registers in butterfly_generic to combine some 2-word stores into 4-word stores and remove some redundant mov instructions. Shave off some additional instructions (stacking and additions) in butterfly_32 by getting butterfly_8 and butterfly_16 to do the address incrementing for us. Add a few comments.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@22525 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs/lib')
-rw-r--r-- | apps/codecs/lib/mdct_arm.S | 130 |
1 files changed, 69 insertions, 61 deletions
diff --git a/apps/codecs/lib/mdct_arm.S b/apps/codecs/lib/mdct_arm.S index bacc049f6b..76139838a6 100644 --- a/apps/codecs/lib/mdct_arm.S +++ b/apps/codecs/lib/mdct_arm.S | |||
@@ -38,6 +38,9 @@ | |||
38 | .global mdct_butterfly_generic_loop | 38 | .global mdct_butterfly_generic_loop |
39 | 39 | ||
40 | mdct_butterfly_8: | 40 | mdct_butterfly_8: |
41 | @ inputs: r0,r1,r2,r3,r4,r5,r6,r10,r11 &lr | ||
42 | @ uses: r8,r9,r12(scratch) | ||
43 | @ modifies: r0,r1,r2,r3,r4,r5,r6,r10,r11. increments r0 by #8*4 | ||
41 | add r9, r5, r1 @ x4 + x0 | 44 | add r9, r5, r1 @ x4 + x0 |
42 | sub r5, r5, r1 @ x4 - x0 | 45 | sub r5, r5, r1 @ x4 - x0 |
43 | add r7, r6, r2 @ x5 + x1 | 46 | add r7, r6, r2 @ x5 + x1 |
@@ -55,11 +58,15 @@ mdct_butterfly_8: | |||
55 | sub r6, r12, r7 @ y5 = (x7 + x3) - (x5 + x1) | 58 | sub r6, r12, r7 @ y5 = (x7 + x3) - (x5 + x1) |
56 | add r10, r8, r9 @ y6 = (x6 + x2) + (x4 + x0) | 59 | add r10, r8, r9 @ y6 = (x6 + x2) + (x4 + x0) |
57 | add r11, r12, r7 @ y7 = (x7 + x3) + (x5 + x1) | 60 | add r11, r12, r7 @ y7 = (x7 + x3) + (x5 + x1) |
58 | stmia r0, {r1, r2, r3, r4, r5, r6, r10, r11} | 61 | stmia r0!, {r1, r2, r3, r4, r5, r6, r10, r11} |
59 | 62 | ||
60 | mov pc, lr | 63 | mov pc, lr |
61 | 64 | ||
62 | mdct_butterfly_16: | 65 | mdct_butterfly_16: |
66 | @ inputs: r0,r1 &lr | ||
67 | @ uses: r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12 | ||
68 | @ modifies: r0. increments r0 by #16*4 | ||
69 | @ calls mdct_butterfly_8 via bl so need to stack lr for return address | ||
63 | str lr, [sp, #-4]! | 70 | str lr, [sp, #-4]! |
64 | add r1, r0, #8*4 | 71 | add r1, r0, #8*4 |
65 | 72 | ||
@@ -112,9 +119,13 @@ mdct_butterfly_16: | |||
112 | sub r0, r0, #4*4 | 119 | sub r0, r0, #4*4 |
113 | ldmia r0, {r1, r2, r3, r4} | 120 | ldmia r0, {r1, r2, r3, r4} |
114 | bl mdct_butterfly_8 | 121 | bl mdct_butterfly_8 |
115 | add r0, r0, #8*4 | 122 | |
123 | @ mdct_butterfly_8 will have incremented r0 by #8*4 already | ||
116 | ldmia r0, {r1, r2, r3, r4, r5, r6, r10, r11} | 124 | ldmia r0, {r1, r2, r3, r4, r5, r6, r10, r11} |
125 | |||
117 | bl mdct_butterfly_8 | 126 | bl mdct_butterfly_8 |
127 | @ mdct_butterfly_8 increments r0 by another #8*4 here | ||
128 | @ at end, r0 has been incremented by #16*4 | ||
118 | 129 | ||
119 | ldr pc, [sp], #4 | 130 | ldr pc, [sp], #4 |
120 | 131 | ||
@@ -164,23 +175,23 @@ mdct_butterfly_32: | |||
164 | add r7, r7, r3 @ y21 = x21 + x5 | 175 | add r7, r7, r3 @ y21 = x21 + x5 |
165 | rsb r3, r7, r3, asl #1 @ x5 - x21 | 176 | rsb r3, r7, r3, asl #1 @ x5 - x21 |
166 | add r8, r8, r4 @ y22 = x22 + x6 | 177 | add r8, r8, r4 @ y22 = x22 + x6 |
167 | sub r4, r8, r4, asl #1 @ x22 - x6 | 178 | sub r11, r8, r4, asl #1 @ x22 - x6 |
168 | add r9, r9, r5 @ y23 = x23 + x7 | 179 | add r9, r9, r5 @ y23 = x23 + x7 |
169 | rsb r5, r9, r5, asl #1 @ x7 - x23 | 180 | rsb r10, r9, r5, asl #1 @ x7 - x23 |
170 | |||
171 | stmia r1!, {r6, r7, r8, r9} | 181 | stmia r1!, {r6, r7, r8, r9} |
172 | 182 | ||
173 | smull r10, r6, lr, r2 | 183 | @r4,r5,r6,r7,r8,r9 now free |
184 | @ we don't use r5, r8, r9 below | ||
185 | |||
186 | smull r4, r6, lr, r2 | ||
174 | rsb r2, r2, #0 | 187 | rsb r2, r2, #0 |
175 | smlal r10, r6, r12, r3 | 188 | smlal r4, r6, r12, r3 |
176 | smull r10, r7, lr, r3 | 189 | smull r4, r7, lr, r3 |
177 | smlal r10, r7, r12, r2 | 190 | smlal r4, r7, r12, r2 |
178 | mov r6, r6, asl #1 | 191 | mov r6, r6, asl #1 |
179 | mov r7, r7, asl #1 | 192 | mov r7, r7, asl #1 |
180 | 193 | ||
181 | mov r8, r5 | 194 | stmia r0!, {r6, r7, r10, r11} |
182 | mov r9, r4 | ||
183 | stmia r0!, {r6, r7, r8, r9} | ||
184 | 195 | ||
185 | ldmia r0, {r2, r3, r4, r5} | 196 | ldmia r0, {r2, r3, r4, r5} |
186 | ldmia r1, {r6, r7, r8, r9} | 197 | ldmia r1, {r6, r7, r8, r9} |
@@ -221,30 +232,29 @@ mdct_butterfly_32: | |||
221 | add r7, r7, r3 @ y29 = x29 + x13 | 232 | add r7, r7, r3 @ y29 = x29 + x13 |
222 | sub r3, r7, r3, asl #1 @ x29 - x13 | 233 | sub r3, r7, r3, asl #1 @ x29 - x13 |
223 | add r8, r8, r4 @ y30 = x30 + x14 | 234 | add r8, r8, r4 @ y30 = x30 + x14 |
224 | sub r4, r8, r4, asl #1 @ x30 - x14 | 235 | sub r10, r8, r4, asl #1 @ x30 - x14 |
225 | add r9, r9, r5 @ y31 = x31 + x15 | 236 | add r9, r9, r5 @ y31 = x31 + x15 |
226 | sub r5, r9, r5, asl #1 @ x31 - x15 | 237 | sub r11, r9, r5, asl #1 @ x31 - x15 |
227 | |||
228 | stmia r1, {r6, r7, r8, r9} | 238 | stmia r1, {r6, r7, r8, r9} |
229 | 239 | ||
230 | smull r10, r7, r12, r3 | 240 | @ r4,r5,r6,r7,r8,r9 now free |
241 | @ we don't use r5,r8,r9 below | ||
242 | |||
243 | smull r4, r7, r12, r3 | ||
231 | rsb r3, r3, #0 | 244 | rsb r3, r3, #0 |
232 | smlal r10, r7, lr, r2 | 245 | smlal r4, r7, lr, r2 |
233 | smull r10, r6, lr, r3 | 246 | smull r4, r6, lr, r3 |
234 | smlal r10, r6, r12, r2 | 247 | smlal r4, r6, r12, r2 |
235 | mov r6, r6, asl #1 | 248 | mov r6, r6, asl #1 |
236 | mov r7, r7, asl #1 | 249 | mov r7, r7, asl #1 |
237 | 250 | ||
238 | mov r8, r4 | 251 | stmia r0, {r6, r7, r10, r11} |
239 | mov r9, r5 | ||
240 | stmia r0, {r6, r7, r8, r9} | ||
241 | 252 | ||
242 | sub r0, r0, #12*4 | 253 | sub r0, r0, #12*4 |
243 | str r0, [sp, #-4]! | ||
244 | bl mdct_butterfly_16 | 254 | bl mdct_butterfly_16 |
245 | 255 | ||
246 | ldr r0, [sp], #4 | 256 | @ we know mdct_butterfly_16 increments r0 by #16*4 |
247 | add r0, r0, #16*4 | 257 | @ and we wanted to advance by #16*4 anyway, so just call again |
248 | bl mdct_butterfly_16 | 258 | bl mdct_butterfly_16 |
249 | 259 | ||
250 | ldmia sp!, {r4-r11, pc} | 260 | ldmia sp!, {r4-r11, pc} |
@@ -278,19 +288,18 @@ mdct_butterfly_generic_loop: | |||
278 | 288 | ||
279 | mov r8, r8, asl #1 | 289 | mov r8, r8, asl #1 |
280 | mov r9, r9, asl #1 | 290 | mov r9, r9, asl #1 |
281 | stmdb r1!, {r8, r9} | ||
282 | add r2, r2, r3, asl #2 | 291 | add r2, r2, r3, asl #2 |
283 | 292 | ||
284 | ldmia r2, {r6, r7} | 293 | ldmia r2, {r12, r14} |
285 | smull r5, r8, r6, r11 | 294 | smull r5, r6, r12, r11 |
286 | rsb r11, r11, #0 | 295 | rsb r11, r11, #0 |
287 | smlal r5, r8, r7, r10 | 296 | smlal r5, r6, r14, r10 |
288 | smull r5, r9, r6, r10 | 297 | smull r5, r7, r12, r10 |
289 | smlal r5, r9, r7, r11 | 298 | smlal r5, r7, r14, r11 |
290 | 299 | ||
291 | mov r8, r8, asl #1 | 300 | mov r6, r6, asl #1 |
292 | mov r9, r9, asl #1 | 301 | mov r7, r7, asl #1 |
293 | stmdb r1!, {r8, r9} | 302 | stmdb r1!, {r6, r7, r8, r9} |
294 | add r2, r2, r3, asl #2 | 303 | add r2, r2, r3, asl #2 |
295 | 304 | ||
296 | cmp r2, r4 | 305 | cmp r2, r4 |
@@ -321,19 +330,19 @@ mdct_butterfly_generic_loop: | |||
321 | 330 | ||
322 | mov r8, r8, asl #1 | 331 | mov r8, r8, asl #1 |
323 | mov r9, r9, asl #1 | 332 | mov r9, r9, asl #1 |
324 | stmdb r1!, {r8, r9} | 333 | |
325 | sub r2, r2, r3, asl #2 | 334 | sub r2, r2, r3, asl #2 |
326 | 335 | ||
327 | ldmia r2, {r6, r7} | 336 | ldmia r2, {r12, r14} |
328 | smull r5, r9, r6, r11 | 337 | smull r5, r7, r12, r11 |
329 | rsb r11, r11, #0 | 338 | rsb r11, r11, #0 |
330 | smlal r5, r9, r7, r10 | 339 | smlal r5, r7, r14, r10 |
331 | smull r5, r8, r6, r10 | 340 | smull r5, r6, r12, r10 |
332 | smlal r5, r8, r7, r11 | 341 | smlal r5, r6, r14, r11 |
333 | 342 | ||
334 | mov r8, r8, asl #1 | 343 | mov r6, r6, asl #1 |
335 | mov r9, r9, asl #1 | 344 | mov r7, r7, asl #1 |
336 | stmdb r1!, {r8, r9} | 345 | stmdb r1!, {r6, r7, r8, r9} |
337 | sub r2, r2, r3, asl #2 | 346 | sub r2, r2, r3, asl #2 |
338 | 347 | ||
339 | cmp r2, r4 | 348 | cmp r2, r4 |
@@ -364,19 +373,19 @@ mdct_butterfly_generic_loop: | |||
364 | 373 | ||
365 | mov r8, r8, asl #1 | 374 | mov r8, r8, asl #1 |
366 | mov r9, r9, asl #1 | 375 | mov r9, r9, asl #1 |
367 | stmdb r1!, {r8, r9} | 376 | |
368 | add r2, r2, r3, asl #2 | 377 | add r2, r2, r3, asl #2 |
369 | 378 | ||
370 | ldmia r2, {r6, r7} | 379 | ldmia r2, {r12, r14} |
371 | smull r5, r8, r6, r10 | 380 | smull r5, r6, r12, r10 |
372 | rsb r10, r10, #0 | 381 | rsb r10, r10, #0 |
373 | smlal r5, r8, r7, r11 | 382 | smlal r5, r6, r14, r11 |
374 | smull r5, r9, r6, r11 | 383 | smull r5, r7, r12, r11 |
375 | smlal r5, r9, r7, r10 | 384 | smlal r5, r7, r14, r10 |
376 | 385 | ||
377 | mov r8, r8, asl #1 | 386 | mov r6, r6, asl #1 |
378 | mov r9, r9, asl #1 | 387 | mov r7, r7, asl #1 |
379 | stmdb r1!, {r8, r9} | 388 | stmdb r1!, {r6, r7, r8, r9} |
380 | add r2, r2, r3, asl #2 | 389 | add r2, r2, r3, asl #2 |
381 | 390 | ||
382 | cmp r2, r4 | 391 | cmp r2, r4 |
@@ -407,19 +416,18 @@ mdct_butterfly_generic_loop: | |||
407 | 416 | ||
408 | mov r8, r8, asl #1 | 417 | mov r8, r8, asl #1 |
409 | mov r9, r9, asl #1 | 418 | mov r9, r9, asl #1 |
410 | stmdb r1!, {r8, r9} | ||
411 | sub r2, r2, r3, asl #2 | 419 | sub r2, r2, r3, asl #2 |
412 | 420 | ||
413 | ldmia r2, {r6, r7} | 421 | ldmia r2, {r12, r14} |
414 | smull r5, r9, r6, r10 | 422 | smull r5, r7, r12, r10 |
415 | rsb r10, r10, #0 | 423 | rsb r10, r10, #0 |
416 | smlal r5, r9, r7, r11 | 424 | smlal r5, r7, r14, r11 |
417 | smull r5, r8, r6, r11 | 425 | smull r5, r6, r12, r11 |
418 | smlal r5, r8, r7, r10 | 426 | smlal r5, r6, r14, r10 |
419 | 427 | ||
420 | mov r8, r8, asl #1 | 428 | mov r6, r6, asl #1 |
421 | mov r9, r9, asl #1 | 429 | mov r7, r7, asl #1 |
422 | stmdb r1!, {r8, r9} | 430 | stmdb r1!, {r6, r7, r8, r9} |
423 | sub r2, r2, r3, asl #2 | 431 | sub r2, r2, r3, asl #2 |
424 | 432 | ||
425 | cmp r2, r4 | 433 | cmp r2, r4 |