From 418c9eeb141ac751a59572fde1fcbc1e4655f064 Mon Sep 17 00:00:00 2001 From: Nils Wallménius Date: Tue, 11 May 2010 22:23:43 +0000 Subject: Faster assembler strlen for coldfire using the load-a-whole-word-and-test-i-for-nullbytes-at-one trick, benched 28% faster than the old version git-svn-id: svn://svn.rockbox.org/rockbox/trunk@25959 a1c6a512-1295-4272-9138-f99709370657 --- firmware/target/coldfire/strlen-coldfire.S | 64 +++++++++++++++++++++++------- 1 file changed, 50 insertions(+), 14 deletions(-) diff --git a/firmware/target/coldfire/strlen-coldfire.S b/firmware/target/coldfire/strlen-coldfire.S index a65b0c3872..f1e5aca981 100644 --- a/firmware/target/coldfire/strlen-coldfire.S +++ b/firmware/target/coldfire/strlen-coldfire.S @@ -5,9 +5,9 @@ * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ * \/ \/ \/ \/ \/ - * $Id $ + * $Id$ * - * Copyright (C) 2007 Nils Wallménius + * Copyright (C) 2010 Nils Wallménius * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -21,22 +21,58 @@ /* size_t strlen(const char *str) */ - .section .text,"ax",@progbits - .align 2 - .globl strlen - .type strlen, @function + .section .text,"ax",@progbits + .align 2 + .globl strlen + .type strlen, @function strlen: - move.l 4(%sp),%a0 /* %a0 = *str */ - move.l %a0,%d0 /* %d0 = start address */ + move.l 4(%sp), %a0 /* %a0 = *str */ + move.l %a0, %a1 /* %a1 = start address */ + move.l %a0, %d0 + andi.l #3, %d0 /* %d0 = %a0 & 3 */ + jmp.l (2,%pc,%d0.l*2) + bra.b .bytes0 + bra.b .bytes3 + bra.b .bytes2 + bra.b .bytes1 +.bytes3: + tst.b (%a0)+ + beq.b .done +.bytes2: + tst.b (%a0)+ + beq.b .done +.bytes1: + tst.b (%a0)+ + beq.b .done +.bytes0: 1: - tst.b (%a0)+ /* test if %a0 == 0 and increment */ - bne.b 1b /* if the test was false repeat */ + move.l (%a0)+, %d0 /* load %d0 increment %a0 */ + /* use trick to test the whole word for null bytes */ + move.l %d0, %d1 + subi.l #0x01010101, %d1 + not.l %d0 + and.l %d1, %d0 + andi.l #0x80808080, %d0 + beq.b 1b /* if the test was false repeat */ - sub.l %d0,%a0 /* how many times did we repeat? */ - move.l %a0,%d0 - subq.l #1,%d0 /* %d0 is 1 too large due to the last increment */ + /* ok, so the last word contained a 0 byte, test individual bytes */ + subq.l #4, %a0 + tst.b (%a0)+ + beq.b .done + tst.b (%a0)+ + beq.b .done + tst.b (%a0)+ + beq.b .done + /* last byte must be 0 so we don't need to load it, so we don't increment a0 + so we jump past the subq instr */ + .word 0x51fa /* trapf.w, shadow next instr */ + +.done: + subq.l #1, %a0 /* %a0 is 1 too large due to the last increment */ + sub.l %a1, %a0 /* how many times did we repeat? */ + move.l %a0, %d0 /* return value in %d0 */ rts - .size strlen, .-strlen + .size strlen, .-strlen -- cgit v1.2.3