summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNils Wallménius <nils@rockbox.org>2010-05-11 22:23:43 +0000
committerNils Wallménius <nils@rockbox.org>2010-05-11 22:23:43 +0000
commit418c9eeb141ac751a59572fde1fcbc1e4655f064 (patch)
tree7be639f0f7495bd9d8e78ffe14c5c5a0b9396c4a
parentcdcb4ba4401ce87cdbc9309612d6e7a649971398 (diff)
downloadrockbox-418c9eeb141ac751a59572fde1fcbc1e4655f064.tar.gz
rockbox-418c9eeb141ac751a59572fde1fcbc1e4655f064.zip
Faster assembler strlen for coldfire using the load-a-whole-word-and-test-i-for-nullbytes-at-one trick, benched 28% faster than the old version
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@25959 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--firmware/target/coldfire/strlen-coldfire.S64
1 files changed, 50 insertions, 14 deletions
diff --git a/firmware/target/coldfire/strlen-coldfire.S b/firmware/target/coldfire/strlen-coldfire.S
index a65b0c3872..f1e5aca981 100644
--- a/firmware/target/coldfire/strlen-coldfire.S
+++ b/firmware/target/coldfire/strlen-coldfire.S
@@ -5,9 +5,9 @@
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < 5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ 6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/ 7 * \/ \/ \/ \/ \/
8 * $Id $ 8 * $Id$
9 * 9 *
10 * Copyright (C) 2007 Nils Wallménius 10 * Copyright (C) 2010 Nils Wallménius
11 * 11 *
12 * This program is free software; you can redistribute it and/or 12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License 13 * modify it under the terms of the GNU General Public License
@@ -21,22 +21,58 @@
21 21
22/* size_t strlen(const char *str) */ 22/* size_t strlen(const char *str) */
23 23
24 .section .text,"ax",@progbits 24 .section .text,"ax",@progbits
25 .align 2 25 .align 2
26 .globl strlen 26 .globl strlen
27 .type strlen, @function 27 .type strlen, @function
28 28
29strlen: 29strlen:
30 move.l 4(%sp),%a0 /* %a0 = *str */ 30 move.l 4(%sp), %a0 /* %a0 = *str */
31 move.l %a0,%d0 /* %d0 = start address */ 31 move.l %a0, %a1 /* %a1 = start address */
32 move.l %a0, %d0
33 andi.l #3, %d0 /* %d0 = %a0 & 3 */
34 jmp.l (2,%pc,%d0.l*2)
35 bra.b .bytes0
36 bra.b .bytes3
37 bra.b .bytes2
38 bra.b .bytes1
39.bytes3:
40 tst.b (%a0)+
41 beq.b .done
42.bytes2:
43 tst.b (%a0)+
44 beq.b .done
45.bytes1:
46 tst.b (%a0)+
47 beq.b .done
48.bytes0:
32 49
33 1: 50 1:
34 tst.b (%a0)+ /* test if %a0 == 0 and increment */ 51 move.l (%a0)+, %d0 /* load %d0 increment %a0 */
35 bne.b 1b /* if the test was false repeat */ 52 /* use trick to test the whole word for null bytes */
53 move.l %d0, %d1
54 subi.l #0x01010101, %d1
55 not.l %d0
56 and.l %d1, %d0
57 andi.l #0x80808080, %d0
58 beq.b 1b /* if the test was false repeat */
36 59
37 sub.l %d0,%a0 /* how many times did we repeat? */ 60 /* ok, so the last word contained a 0 byte, test individual bytes */
38 move.l %a0,%d0 61 subq.l #4, %a0
39 subq.l #1,%d0 /* %d0 is 1 too large due to the last increment */ 62 tst.b (%a0)+
63 beq.b .done
64 tst.b (%a0)+
65 beq.b .done
66 tst.b (%a0)+
67 beq.b .done
68 /* last byte must be 0 so we don't need to load it, so we don't increment a0
69 so we jump past the subq instr */
70 .word 0x51fa /* trapf.w, shadow next instr */
71
72.done:
73 subq.l #1, %a0 /* %a0 is 1 too large due to the last increment */
74 sub.l %a1, %a0 /* how many times did we repeat? */
75 move.l %a0, %d0 /* return value in %d0 */
40 rts 76 rts
41 .size strlen, .-strlen 77 .size strlen, .-strlen
42 78