From 4a2feaa30d96a7b38407551c9bece8b73a3a2aac Mon Sep 17 00:00:00 2001 From: Jens Arnold Date: Sun, 11 Sep 2005 21:44:48 +0000 Subject: Rockboy: Asm optimised updatepatpix() for coldfire. The vertical-mirroring pattern copy profits from burst mode (line aligned movem). git-svn-id: svn://svn.rockbox.org/rockbox/trunk@7509 a1c6a512-1295-4272-9138-f99709370657 --- apps/plugins/rockboy/lcd.c | 119 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 117 insertions(+), 2 deletions(-) diff --git a/apps/plugins/rockboy/lcd.c b/apps/plugins/rockboy/lcd.c index 9a7ead798f..872311d0e6 100644 --- a/apps/plugins/rockboy/lcd.c +++ b/apps/plugins/rockboy/lcd.c @@ -41,7 +41,11 @@ struct scan scan IDATA_ATTR; #define WT (scan.wt) #define WV (scan.wv) -byte patpix[4096][8][8]; +byte patpix[4096][8][8] +#if CONFIG_CPU == MCF5249 && !defined(SIMULATOR) + __attribute__ ((aligned(16))) /* to profit from burst mode */ +#endif + ; byte patdirty[1024]; byte anydirty; @@ -96,7 +100,7 @@ static byte *vdest; void updatepatpix(void) { int i, j; -#if CONFIG_CPU != SH7034 || defined(SIMULATOR) +#if ((CONFIG_CPU != SH7034) && (CONFIG_CPU != MCF5249)) || defined(SIMULATOR) int k, a, c; #endif byte *vram = lcd.vbank[0]; @@ -179,6 +183,70 @@ void updatepatpix(void) : /* clobbers */ "r0", "r1", "r2" ); +#elif CONFIG_CPU == MCF5249 && !defined(SIMULATOR) + asm volatile ( + "move.b (%2),%%d2 \n" + "move.b (1,%2),%%d1 \n" + + "addq.l #8,%1 \n" + "clr.l %%d0 \n" + "lsr.l #1,%%d1 \n" + "addx.l %%d0,%%d0 \n" + "lsr.l #1,%%d2 \n" + "addx.l %%d0,%%d0 \n" + "move.b %%d0,-(%1) \n" + "lsl.l #6,%%d0 \n" + "lsr.l #1,%%d1 \n" + "addx.l %%d0,%%d0 \n" + "lsr.l #1,%%d2 \n" + "addx.l %%d0,%%d0 \n" + "move.b %%d0,-(%1) \n" + "lsl.l #6,%%d0 \n" + "lsr.l #1,%%d1 \n" + "addx.l %%d0,%%d0 \n" + "lsr.l #1,%%d2 \n" + "addx.l %%d0,%%d0 \n" + "move.b %%d0,-(%1) \n" + "lsl.l #6,%%d0 \n" + "lsr.l #1,%%d1 \n" + "addx.l %%d0,%%d0 \n" + "lsr.l #1,%%d2 \n" + "addx.l %%d0,%%d0 \n" + "move.l %%d0,(%0) \n" + "move.b %%d0,-(%1) \n" + "clr.l %%d0 \n" + "lsr.l #1,%%d1 \n" + "addx.l %%d0,%%d0 \n" + "lsr.l #1,%%d2 \n" + "addx.l %%d0,%%d0 \n" + "move.b %%d0,-(%1) \n" + "lsl.l #6,%%d0 \n" + "lsr.l #1,%%d1 \n" + "addx.l %%d0,%%d0 \n" + "lsr.l #1,%%d2 \n" + "addx.l %%d0,%%d0 \n" + "move.b %%d0,-(%1) \n" + "lsl.l #6,%%d0 \n" + "lsr.l #1,%%d1 \n" + "addx.l %%d0,%%d0 \n" + "lsr.l #1,%%d2 \n" + "addx.l %%d0,%%d0 \n" + "move.b %%d0,-(%1) \n" + "lsl.l #6,%%d0 \n" + "lsr.l #1,%%d1 \n" + "addx.l %%d0,%%d0 \n" + "lsr.l #1,%%d2 \n" + "addx.l %%d0,%%d0 \n" + "move.l %%d0,(4,%0) \n" + "move.b %%d0,-(%1) \n" + : /* outputs */ + : /* inputs */ + /* %0 */ "a"(patpix[i+1024][j]), + /* %1 */ "a"(patpix[i][j]), + /* %2 */ "a"(&vram[(i<<4)|(j<<1)]) + : /* clobbers */ + "d0", "d1", "d2" + ); #else a = ((i<<4) | (j<<1)); for (k = 0; k < 8; k++) @@ -270,6 +338,53 @@ void updatepatpix(void) : /* clobbers */ "r0", "r1" ); +#elif CONFIG_CPU == MCF5249 && !defined(SIMULATOR) + asm volatile ( + "movem.l (%0),%%d0-%%d3 \n" + "move.l %%d0,%%d4 \n" + "move.l %%d1,%%d5 \n" + "movem.l %%d2-%%d5,(48,%1) \n" + "movem.l (16,%0),%%d0-%%d3 \n" + "move.l %%d0,%%d4 \n" + "move.l %%d1,%%d5 \n" + "movem.l %%d2-%%d5,(32,%1) \n" + "movem.l (32,%0),%%d0-%%d3 \n" + "move.l %%d0,%%d4 \n" + "move.l %%d1,%%d5 \n" + "movem.l %%d2-%%d5,(16,%1) \n" + "movem.l (48,%0),%%d0-%%d3 \n" + "move.l %%d0,%%d4 \n" + "move.l %%d1,%%d5 \n" + "movem.l %%d2-%%d5,(%1) \n" + + "move.l %2,%%d0 \n" + "add.l %%d0,%0 \n" + "add.l %%d0,%1 \n" + + "movem.l (%0),%%d0-%%d3 \n" + "move.l %%d0,%%d4 \n" + "move.l %%d1,%%d5 \n" + "movem.l %%d2-%%d5,(48,%1) \n" + "movem.l (16,%0),%%d0-%%d3 \n" + "move.l %%d0,%%d4 \n" + "move.l %%d1,%%d5 \n" + "movem.l %%d2-%%d5,(32,%1) \n" + "movem.l (32,%0),%%d0-%%d3 \n" + "move.l %%d0,%%d4 \n" + "move.l %%d1,%%d5 \n" + "movem.l %%d2-%%d5,(16,%1) \n" + "movem.l (48,%0),%%d0-%%d3 \n" + "move.l %%d0,%%d4 \n" + "move.l %%d1,%%d5 \n" + "movem.l %%d2-%%d5,(%1) \n" + : /* outputs */ + : /* inputs */ + /* %0 */ "a"(patpix[i][0]), + /* %1 */ "a"(patpix[i+2048][0]), + /* %2 */ "i"(1024*64) + : /* clobbers */ + "d0", "d1", "d2", "d3", "d4", "d5" + ); #else for (j = 0; j < 8; j++) { -- cgit v1.2.3