summaryrefslogtreecommitdiff
path: root/apps
diff options
context:
space:
mode:
authorDave Hooper <dave@beermex.com>2009-08-29 11:50:15 +0000
committerDave Hooper <dave@beermex.com>2009-08-29 11:50:15 +0000
commit59cdbf5efca64962fe6a6c85eb03b64552eae6d2 (patch)
tree494580f9ee8ec0531abd71ead95fc1f2365d9d1e /apps
parenteeb1594494729596bd083c298ebfd65ed29411c7 (diff)
downloadrockbox-59cdbf5efca64962fe6a6c85eb03b64552eae6d2.tar.gz
rockbox-59cdbf5efca64962fe6a6c85eb03b64552eae6d2.zip
Rerrange some registers in butterfly_generic to combine some 2-word stores into 4-word stores and remove some redundant mov instructions. Shave off some additional instructions (stacking and additions) in butterfly_32 by getting butterfly_8 and butterfly_16 to do the address incrementing for us. Add a few comments.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@22525 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps')
-rw-r--r--apps/codecs/lib/mdct_arm.S130
1 files changed, 69 insertions, 61 deletions
diff --git a/apps/codecs/lib/mdct_arm.S b/apps/codecs/lib/mdct_arm.S
index bacc049f6b..76139838a6 100644
--- a/apps/codecs/lib/mdct_arm.S
+++ b/apps/codecs/lib/mdct_arm.S
@@ -38,6 +38,9 @@
38 .global mdct_butterfly_generic_loop 38 .global mdct_butterfly_generic_loop
39 39
40mdct_butterfly_8: 40mdct_butterfly_8:
41@ inputs: r0,r1,r2,r3,r4,r5,r6,r10,r11 &lr
42@ uses: r8,r9,r12(scratch)
43@ modifies: r0,r1,r2,r3,r4,r5,r6,r10,r11. increments r0 by #8*4
41 add r9, r5, r1 @ x4 + x0 44 add r9, r5, r1 @ x4 + x0
42 sub r5, r5, r1 @ x4 - x0 45 sub r5, r5, r1 @ x4 - x0
43 add r7, r6, r2 @ x5 + x1 46 add r7, r6, r2 @ x5 + x1
@@ -55,11 +58,15 @@ mdct_butterfly_8:
55 sub r6, r12, r7 @ y5 = (x7 + x3) - (x5 + x1) 58 sub r6, r12, r7 @ y5 = (x7 + x3) - (x5 + x1)
56 add r10, r8, r9 @ y6 = (x6 + x2) + (x4 + x0) 59 add r10, r8, r9 @ y6 = (x6 + x2) + (x4 + x0)
57 add r11, r12, r7 @ y7 = (x7 + x3) + (x5 + x1) 60 add r11, r12, r7 @ y7 = (x7 + x3) + (x5 + x1)
58 stmia r0, {r1, r2, r3, r4, r5, r6, r10, r11} 61 stmia r0!, {r1, r2, r3, r4, r5, r6, r10, r11}
59 62
60 mov pc, lr 63 mov pc, lr
61 64
62mdct_butterfly_16: 65mdct_butterfly_16:
66@ inputs: r0,r1 &lr
67@ uses: r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12
68@ modifies: r0. increments r0 by #16*4
69@ calls mdct_butterfly_8 via bl so need to stack lr for return address
63 str lr, [sp, #-4]! 70 str lr, [sp, #-4]!
64 add r1, r0, #8*4 71 add r1, r0, #8*4
65 72
@@ -112,9 +119,13 @@ mdct_butterfly_16:
112 sub r0, r0, #4*4 119 sub r0, r0, #4*4
113 ldmia r0, {r1, r2, r3, r4} 120 ldmia r0, {r1, r2, r3, r4}
114 bl mdct_butterfly_8 121 bl mdct_butterfly_8
115 add r0, r0, #8*4 122
123 @ mdct_butterfly_8 will have incremented r0 by #8*4 already
116 ldmia r0, {r1, r2, r3, r4, r5, r6, r10, r11} 124 ldmia r0, {r1, r2, r3, r4, r5, r6, r10, r11}
125
117 bl mdct_butterfly_8 126 bl mdct_butterfly_8
127 @ mdct_butterfly_8 increments r0 by another #8*4 here
128 @ at end, r0 has been incremented by #16*4
118 129
119 ldr pc, [sp], #4 130 ldr pc, [sp], #4
120 131
@@ -164,23 +175,23 @@ mdct_butterfly_32:
164 add r7, r7, r3 @ y21 = x21 + x5 175 add r7, r7, r3 @ y21 = x21 + x5
165 rsb r3, r7, r3, asl #1 @ x5 - x21 176 rsb r3, r7, r3, asl #1 @ x5 - x21
166 add r8, r8, r4 @ y22 = x22 + x6 177 add r8, r8, r4 @ y22 = x22 + x6
167 sub r4, r8, r4, asl #1 @ x22 - x6 178 sub r11, r8, r4, asl #1 @ x22 - x6
168 add r9, r9, r5 @ y23 = x23 + x7 179 add r9, r9, r5 @ y23 = x23 + x7
169 rsb r5, r9, r5, asl #1 @ x7 - x23 180 rsb r10, r9, r5, asl #1 @ x7 - x23
170
171 stmia r1!, {r6, r7, r8, r9} 181 stmia r1!, {r6, r7, r8, r9}
172 182
173 smull r10, r6, lr, r2 183 @r4,r5,r6,r7,r8,r9 now free
184 @ we don't use r5, r8, r9 below
185
186 smull r4, r6, lr, r2
174 rsb r2, r2, #0 187 rsb r2, r2, #0
175 smlal r10, r6, r12, r3 188 smlal r4, r6, r12, r3
176 smull r10, r7, lr, r3 189 smull r4, r7, lr, r3
177 smlal r10, r7, r12, r2 190 smlal r4, r7, r12, r2
178 mov r6, r6, asl #1 191 mov r6, r6, asl #1
179 mov r7, r7, asl #1 192 mov r7, r7, asl #1
180 193
181 mov r8, r5 194 stmia r0!, {r6, r7, r10, r11}
182 mov r9, r4
183 stmia r0!, {r6, r7, r8, r9}
184 195
185 ldmia r0, {r2, r3, r4, r5} 196 ldmia r0, {r2, r3, r4, r5}
186 ldmia r1, {r6, r7, r8, r9} 197 ldmia r1, {r6, r7, r8, r9}
@@ -221,30 +232,29 @@ mdct_butterfly_32:
221 add r7, r7, r3 @ y29 = x29 + x13 232 add r7, r7, r3 @ y29 = x29 + x13
222 sub r3, r7, r3, asl #1 @ x29 - x13 233 sub r3, r7, r3, asl #1 @ x29 - x13
223 add r8, r8, r4 @ y30 = x30 + x14 234 add r8, r8, r4 @ y30 = x30 + x14
224 sub r4, r8, r4, asl #1 @ x30 - x14 235 sub r10, r8, r4, asl #1 @ x30 - x14
225 add r9, r9, r5 @ y31 = x31 + x15 236 add r9, r9, r5 @ y31 = x31 + x15
226 sub r5, r9, r5, asl #1 @ x31 - x15 237 sub r11, r9, r5, asl #1 @ x31 - x15
227
228 stmia r1, {r6, r7, r8, r9} 238 stmia r1, {r6, r7, r8, r9}
229 239
230 smull r10, r7, r12, r3 240 @ r4,r5,r6,r7,r8,r9 now free
241 @ we don't use r5,r8,r9 below
242
243 smull r4, r7, r12, r3
231 rsb r3, r3, #0 244 rsb r3, r3, #0
232 smlal r10, r7, lr, r2 245 smlal r4, r7, lr, r2
233 smull r10, r6, lr, r3 246 smull r4, r6, lr, r3
234 smlal r10, r6, r12, r2 247 smlal r4, r6, r12, r2
235 mov r6, r6, asl #1 248 mov r6, r6, asl #1
236 mov r7, r7, asl #1 249 mov r7, r7, asl #1
237 250
238 mov r8, r4 251 stmia r0, {r6, r7, r10, r11}
239 mov r9, r5
240 stmia r0, {r6, r7, r8, r9}
241 252
242 sub r0, r0, #12*4 253 sub r0, r0, #12*4
243 str r0, [sp, #-4]!
244 bl mdct_butterfly_16 254 bl mdct_butterfly_16
245 255
246 ldr r0, [sp], #4 256 @ we know mdct_butterfly_16 increments r0 by #16*4
247 add r0, r0, #16*4 257 @ and we wanted to advance by #16*4 anyway, so just call again
248 bl mdct_butterfly_16 258 bl mdct_butterfly_16
249 259
250 ldmia sp!, {r4-r11, pc} 260 ldmia sp!, {r4-r11, pc}
@@ -278,19 +288,18 @@ mdct_butterfly_generic_loop:
278 288
279 mov r8, r8, asl #1 289 mov r8, r8, asl #1
280 mov r9, r9, asl #1 290 mov r9, r9, asl #1
281 stmdb r1!, {r8, r9}
282 add r2, r2, r3, asl #2 291 add r2, r2, r3, asl #2
283 292
284 ldmia r2, {r6, r7} 293 ldmia r2, {r12, r14}
285 smull r5, r8, r6, r11 294 smull r5, r6, r12, r11
286 rsb r11, r11, #0 295 rsb r11, r11, #0
287 smlal r5, r8, r7, r10 296 smlal r5, r6, r14, r10
288 smull r5, r9, r6, r10 297 smull r5, r7, r12, r10
289 smlal r5, r9, r7, r11 298 smlal r5, r7, r14, r11
290 299
291 mov r8, r8, asl #1 300 mov r6, r6, asl #1
292 mov r9, r9, asl #1 301 mov r7, r7, asl #1
293 stmdb r1!, {r8, r9} 302 stmdb r1!, {r6, r7, r8, r9}
294 add r2, r2, r3, asl #2 303 add r2, r2, r3, asl #2
295 304
296 cmp r2, r4 305 cmp r2, r4
@@ -321,19 +330,19 @@ mdct_butterfly_generic_loop:
321 330
322 mov r8, r8, asl #1 331 mov r8, r8, asl #1
323 mov r9, r9, asl #1 332 mov r9, r9, asl #1
324 stmdb r1!, {r8, r9} 333
325 sub r2, r2, r3, asl #2 334 sub r2, r2, r3, asl #2
326 335
327 ldmia r2, {r6, r7} 336 ldmia r2, {r12, r14}
328 smull r5, r9, r6, r11 337 smull r5, r7, r12, r11
329 rsb r11, r11, #0 338 rsb r11, r11, #0
330 smlal r5, r9, r7, r10 339 smlal r5, r7, r14, r10
331 smull r5, r8, r6, r10 340 smull r5, r6, r12, r10
332 smlal r5, r8, r7, r11 341 smlal r5, r6, r14, r11
333 342
334 mov r8, r8, asl #1 343 mov r6, r6, asl #1
335 mov r9, r9, asl #1 344 mov r7, r7, asl #1
336 stmdb r1!, {r8, r9} 345 stmdb r1!, {r6, r7, r8, r9}
337 sub r2, r2, r3, asl #2 346 sub r2, r2, r3, asl #2
338 347
339 cmp r2, r4 348 cmp r2, r4
@@ -364,19 +373,19 @@ mdct_butterfly_generic_loop:
364 373
365 mov r8, r8, asl #1 374 mov r8, r8, asl #1
366 mov r9, r9, asl #1 375 mov r9, r9, asl #1
367 stmdb r1!, {r8, r9} 376
368 add r2, r2, r3, asl #2 377 add r2, r2, r3, asl #2
369 378
370 ldmia r2, {r6, r7} 379 ldmia r2, {r12, r14}
371 smull r5, r8, r6, r10 380 smull r5, r6, r12, r10
372 rsb r10, r10, #0 381 rsb r10, r10, #0
373 smlal r5, r8, r7, r11 382 smlal r5, r6, r14, r11
374 smull r5, r9, r6, r11 383 smull r5, r7, r12, r11
375 smlal r5, r9, r7, r10 384 smlal r5, r7, r14, r10
376 385
377 mov r8, r8, asl #1 386 mov r6, r6, asl #1
378 mov r9, r9, asl #1 387 mov r7, r7, asl #1
379 stmdb r1!, {r8, r9} 388 stmdb r1!, {r6, r7, r8, r9}
380 add r2, r2, r3, asl #2 389 add r2, r2, r3, asl #2
381 390
382 cmp r2, r4 391 cmp r2, r4
@@ -407,19 +416,18 @@ mdct_butterfly_generic_loop:
407 416
408 mov r8, r8, asl #1 417 mov r8, r8, asl #1
409 mov r9, r9, asl #1 418 mov r9, r9, asl #1
410 stmdb r1!, {r8, r9}
411 sub r2, r2, r3, asl #2 419 sub r2, r2, r3, asl #2
412 420
413 ldmia r2, {r6, r7} 421 ldmia r2, {r12, r14}
414 smull r5, r9, r6, r10 422 smull r5, r7, r12, r10
415 rsb r10, r10, #0 423 rsb r10, r10, #0
416 smlal r5, r9, r7, r11 424 smlal r5, r7, r14, r11
417 smull r5, r8, r6, r11 425 smull r5, r6, r12, r11
418 smlal r5, r8, r7, r10 426 smlal r5, r6, r14, r10
419 427
420 mov r8, r8, asl #1 428 mov r6, r6, asl #1
421 mov r9, r9, asl #1 429 mov r7, r7, asl #1
422 stmdb r1!, {r8, r9} 430 stmdb r1!, {r6, r7, r8, r9}
423 sub r2, r2, r3, asl #2 431 sub r2, r2, r3, asl #2
424 432
425 cmp r2, r4 433 cmp r2, r4