From eb65f89f0e1b88e3d7576eee3e490c007d02076d Mon Sep 17 00:00:00 2001 From: Jens Arnold Date: Wed, 22 Feb 2006 01:20:45 +0000 Subject: Added memset16() for filling memory regions with 16 bit values, needed for upcoming 16 bit LCD driver opts. ASM optimised for coldfire. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@8773 a1c6a512-1295-4272-9138-f99709370657 --- firmware/common/memset16_a.S | 146 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 146 insertions(+) create mode 100755 firmware/common/memset16_a.S (limited to 'firmware/common/memset16_a.S') diff --git a/firmware/common/memset16_a.S b/firmware/common/memset16_a.S new file mode 100755 index 0000000000..9ab1bdcb5b --- /dev/null +++ b/firmware/common/memset16_a.S @@ -0,0 +1,146 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2006 by Jens Arnold + * + * All files in this archive are subject to the GNU General Public License. + * See the file COPYING in the source tree root for full license agreement. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ +#include "config.h" + + .section .icode,"ax",@progbits + +#ifdef CPU_COLDFIRE + .global memset16 + .type memset16,@function + +/* Fills a memory region with specified word value + * Start address must be word aligned, length is in words + * This version is optimized for speed + * + * arguments: + * (4,%sp) - start address + * (8,%sp) - data + * (12,%sp) - length + * + * return value: + * %d0 - start address + * + * register usage: + * %d0 - data (spread to both words when using long stores) + * %d1 - temporary / data (for burst transfer) + * %d2 - data (for burst transfer) + * %d3 - data (for burst transfer) + * %a0 - start address + * %a1 - current address (runs down from end to start) + * + * For maximum speed this routine uses both long stores and burst mode, + * storing whole lines with movem.l. The routine fills memory from end + * to start in order to ease returning the start address. + */ +memset16: + move.l (4,%sp),%a0 /* start address */ + move.l (8,%sp),%d0 /* data */ + move.l (12,%sp),%a1 /* length */ + add.l %a1,%a1 + add.l %a0,%a1 /* %a1 = end address */ + + move.l %a0,%d1 + addq.l #6,%d1 + and.l #0xFFFFFFFC,%d1 /* %d1 = first long bound + 4 */ + cmp.l %d1,%a1 /* at least one aligned longword to fill? */ + blo.b .no_longs /* no, jump directly to word loop */ + + and.l #0xFFFF,%d0 /* start: spread data to both words */ + move.l %d0,%d1 + swap %d1 + or.l %d1,%d0 /* data now in both words */ + + move.l %a1,%d1 + and.l #0xFFFFFFFC,%d1 /* %d1 = last long bound */ + cmp.l %d1,%a1 /* one extra word? */ + bls.b .end_w1 /* no: skip */ + + move.w %d0,-(%a1) /* set leading word */ + +.end_w1: + moveq.l #30,%d1 + add.l %a0,%d1 + and.l #0xFFFFFFF0,%d1 /* %d1 = first line bound + 16 */ + cmp.l %d1,%a1 /* at least one full line to fill? */ + blo.b .no_lines /* no, jump to longword loop */ + + mov.l %a1,%d1 + and.l #0xFFFFFFF0,%d1 /* %d1 = last line bound */ + cmp.l %d1,%a1 /* any longwords to set? */ + bls.b .end_l1 /* no: skip longword loop */ + + /* leading longword loop: sets 0..3 longwords */ +.loop_l1: + move.l %d0,-(%a1) /* store longword */ + cmp.l %d1,%a1 /* runs %a1 down to last line bound */ + bhi.b .loop_l1 + +.end_l1: + move.l %d2,-(%sp) /* free some registers */ + move.l %d3,-(%sp) + + move.l %d0,%d1 /* spread data to 4 data registers */ + move.l %d0,%d2 + move.l %d0,%d3 + lea.l (14,%a0),%a0 /* start address += 14, acct. for trl. data */ + + /* main loop: set whole lines utilising burst mode */ +.loop_line: + lea.l (-16,%a1),%a1 /* pre-decrement */ + movem.l %d0-%d3,(%a1) /* store line */ + cmp.l %a0,%a1 /* runs %a1 down to first line bound */ + bhi.b .loop_line + + lea.l (-14,%a0),%a0 /* correct start address */ + move.l (%sp)+,%d3 /* restore registers */ + move.l (%sp)+,%d2 + + move.l %a0,%d1 /* %d1 = start address ... */ + addq.l #2,%d1 /* ... +2, account for possible trailing word */ + cmp.l %d1,%a1 /* any longwords left */ + bhi.b .loop_l2 /* yes: jump to longword loop */ + bra.b .no_longs /* no: skip loop */ + +.no_lines: + move.l %a0,%d1 /* %d1 = start address ... */ + addq.l #2,%d1 /* ... +2, account for possible trailing word */ + + /* trailing longword loop */ +.loop_l2: + move.l %d0,-(%a1) /* store longword */ + cmp.l %d1,%a1 /* runs %a1 down to first long bound */ + bhi.b .loop_l2 + +.no_longs: + cmp.l %a0,%a1 /* any words left? */ + bls.b .end_w2 /* no: skip loop */ + + /* trailing word loop */ +.loop_w2: + move.w %d0,-(%a1) /* store word */ + cmp.l %a0,%a1 /* runs %a1 down to start address */ + bhi.b .loop_w2 + +.end_w2: + move.l %a0,%d0 /* return start address */ + rts + +.end: + .size memset16,.end-memset16 +#endif -- cgit v1.2.3