summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJens Arnold <amiconn@rockbox.org>2007-10-18 00:14:22 +0000
committerJens Arnold <amiconn@rockbox.org>2007-10-18 00:14:22 +0000
commit1cca3ceeefaf23b6da825f20fac7f6a398d7bf80 (patch)
tree80f14059b630e1f251a49765f3f1535d61eba180
parent7bcfa84c4a5908ad4feeb4ab508b1e053f19925d (diff)
downloadrockbox-1cca3ceeefaf23b6da825f20fac7f6a398d7bf80.tar.gz
rockbox-1cca3ceeefaf23b6da825f20fac7f6a398d7bf80.zip
Less stack hogging in the coldfire IDCT for mpegplayer. Speedup might or might not be measurable.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@15175 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/plugins/mpegplayer/idct_coldfire.S47
1 files changed, 23 insertions, 24 deletions
diff --git a/apps/plugins/mpegplayer/idct_coldfire.S b/apps/plugins/mpegplayer/idct_coldfire.S
index 7c3626d1d2..8db8401a07 100644
--- a/apps/plugins/mpegplayer/idct_coldfire.S
+++ b/apps/plugins/mpegplayer/idct_coldfire.S
@@ -25,11 +25,9 @@
25 25
26 /* The IDCT itself. 26 /* The IDCT itself.
27 * Input: %a0: block pointer 27 * Input: %a0: block pointer
28 * All registers are preserved. */ 28 * Caller must save all registers. */
29 .align 2 29 .align 2
30.idct: 30.idct:
31 lea.l (-15*4,%sp), %sp
32 movem.l %d0-%d7/%a0-%a6, (%sp) | save all registers
33 move.l %a0, %a6 31 move.l %a0, %a6
34 32
35 move.l #0, %macsr | signed integer mode 33 move.l #0, %macsr | signed integer mode
@@ -238,20 +236,19 @@
238 subq.l #1, %d3 | loop 8 times 236 subq.l #1, %d3 | loop 8 times
239 bne.w .col_loop 237 bne.w .col_loop
240 238
241 movem.l (%sp), %d0-%d7/%a0-%a6 | restore all registers
242 lea.l (15*4,%sp), %sp
243 rts 239 rts
244 240
245 .align 2 241 .align 2
246 242
247mpeg2_idct_copy_coldfire: 243mpeg2_idct_copy_coldfire:
248 lea.l (-4*4,%sp), %sp 244 lea.l (-11*4,%sp), %sp
249 movem.l %d2-%d4/%a2, (%sp) | save some registers 245 movem.l %d2-%d7/%a2-%a6, (%sp) | save some registers
250 movem.l (4*4+4,%sp), %a0-%a2| %a0 - block pointer 246 move.l (11*4+4,%sp), %a0 | %a0 - block pointer for idct
251 | %a1 - destination pointer
252 | %a2 - stride
253 247
254 bsr.w .idct | apply idct to block 248 bsr.w .idct | apply idct to block
249 movem.l (11*4+4,%sp), %a0-%a2 | %a0 - block pointer
250 | %a1 - destination pointer
251 | %a2 - stride
255 252
256 move.l #255, %d1 | preload constant for clipping 253 move.l #255, %d1 | preload constant for clipping
257 moveq.l #8, %d4 | loop counter 254 moveq.l #8, %d4 | loop counter
@@ -336,22 +333,23 @@ mpeg2_idct_copy_coldfire:
336 subq.l #1, %d4 | loop 8 times 333 subq.l #1, %d4 | loop 8 times
337 bne.w .copy_clip_loop 334 bne.w .copy_clip_loop
338 335
339 movem.l (%sp), %d2-%d4/%a2 | restore registers 336 movem.l (%sp), %d2-%d7/%a2-%a6
340 lea.l (4*4,%sp), %sp 337 lea.l (11*4,%sp), %sp
341 rts 338 rts
342 339
343 .align 2 340 .align 2
344 341
345mpeg2_idct_add_coldfire: 342mpeg2_idct_add_coldfire:
346 lea.l (-7*4,%sp), %sp 343 lea.l (-11*4,%sp), %sp
347 movem.l %d2-%d7/%a2, (%sp) | save some registers 344 movem.l %d2-%d7/%a2-%a6, (%sp)
348 movem.l (7*4+4,%sp), %d0/%a0-%a2| %d0 - last value 345 movem.l (11*4+4,%sp), %d0/%a0-%a2 | %d0 - last value
349 | %a0 - block pointer 346 | %a0 - block pointer
350 | %a1 - destination pointer 347 | %a1 - destination pointer
351 | %a2 - stride 348 | %a2 - stride
349
352 cmp.l #129, %d0 | last == 129 ? 350 cmp.l #129, %d0 | last == 129 ?
353 bne.b .idct_add | no: perform idct + addition 351 bne.b .idct_add | no: perform idct + addition
354 move.w (%a0), %d0 352 move.w (%a0), %d0
355 ext.l %d0 | ((block[0] 353 ext.l %d0 | ((block[0]
356 asr.l #4, %d0 | >> 4) 354 asr.l #4, %d0 | >> 4)
357 and.l #7, %d0 | & 7) 355 and.l #7, %d0 | & 7)
@@ -359,8 +357,9 @@ mpeg2_idct_add_coldfire:
359 bne.w .dc_add | no: just perform addition 357 bne.w .dc_add | no: just perform addition
360 358
361.idct_add: 359.idct_add:
362 bsr.w .idct | apply idct 360 bsr.w .idct | apply idct
363 361 movem.l (11*4+8,%sp), %a0-%a2 | reload arguments %a0..%a2
362
364 move.l #255, %d2 | preload constant for clipping 363 move.l #255, %d2 | preload constant for clipping
365 clr.l %d3 | used for splitting input words into bytes 364 clr.l %d3 | used for splitting input words into bytes
366 moveq.l #8, %d4 | loop counter 365 moveq.l #8, %d4 | loop counter
@@ -569,6 +568,6 @@ mpeg2_idct_add_coldfire:
569 bne.w .dc_clip_loop 568 bne.w .dc_clip_loop
570 569
571.idct_add_end: 570.idct_add_end:
572 movem.l (%sp), %d2-%d7/%a2 | restore registers 571 movem.l (%sp), %d2-%d7/%a2-%a6
573 lea.l (7*4,%sp), %sp 572 lea.l (11*4,%sp), %sp
574 rts 573 rts