diff options
author | Nils Wallménius <nils@rockbox.org> | 2010-05-11 22:23:43 +0000 |
---|---|---|
committer | Nils Wallménius <nils@rockbox.org> | 2010-05-11 22:23:43 +0000 |
commit | 418c9eeb141ac751a59572fde1fcbc1e4655f064 (patch) | |
tree | 7be639f0f7495bd9d8e78ffe14c5c5a0b9396c4a /firmware | |
parent | cdcb4ba4401ce87cdbc9309612d6e7a649971398 (diff) | |
download | rockbox-418c9eeb141ac751a59572fde1fcbc1e4655f064.tar.gz rockbox-418c9eeb141ac751a59572fde1fcbc1e4655f064.zip |
Faster assembler strlen for coldfire using the load-a-whole-word-and-test-i-for-nullbytes-at-one trick, benched 28% faster than the old version
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@25959 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'firmware')
-rw-r--r-- | firmware/target/coldfire/strlen-coldfire.S | 64 |
1 files changed, 50 insertions, 14 deletions
diff --git a/firmware/target/coldfire/strlen-coldfire.S b/firmware/target/coldfire/strlen-coldfire.S index a65b0c3872..f1e5aca981 100644 --- a/firmware/target/coldfire/strlen-coldfire.S +++ b/firmware/target/coldfire/strlen-coldfire.S | |||
@@ -5,9 +5,9 @@ | |||
5 | * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < | 5 | * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < |
6 | * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ | 6 | * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ |
7 | * \/ \/ \/ \/ \/ | 7 | * \/ \/ \/ \/ \/ |
8 | * $Id $ | 8 | * $Id$ |
9 | * | 9 | * |
10 | * Copyright (C) 2007 Nils Wallménius | 10 | * Copyright (C) 2010 Nils Wallménius |
11 | * | 11 | * |
12 | * This program is free software; you can redistribute it and/or | 12 | * This program is free software; you can redistribute it and/or |
13 | * modify it under the terms of the GNU General Public License | 13 | * modify it under the terms of the GNU General Public License |
@@ -21,22 +21,58 @@ | |||
21 | 21 | ||
22 | /* size_t strlen(const char *str) */ | 22 | /* size_t strlen(const char *str) */ |
23 | 23 | ||
24 | .section .text,"ax",@progbits | 24 | .section .text,"ax",@progbits |
25 | .align 2 | 25 | .align 2 |
26 | .globl strlen | 26 | .globl strlen |
27 | .type strlen, @function | 27 | .type strlen, @function |
28 | 28 | ||
29 | strlen: | 29 | strlen: |
30 | move.l 4(%sp),%a0 /* %a0 = *str */ | 30 | move.l 4(%sp), %a0 /* %a0 = *str */ |
31 | move.l %a0,%d0 /* %d0 = start address */ | 31 | move.l %a0, %a1 /* %a1 = start address */ |
32 | move.l %a0, %d0 | ||
33 | andi.l #3, %d0 /* %d0 = %a0 & 3 */ | ||
34 | jmp.l (2,%pc,%d0.l*2) | ||
35 | bra.b .bytes0 | ||
36 | bra.b .bytes3 | ||
37 | bra.b .bytes2 | ||
38 | bra.b .bytes1 | ||
39 | .bytes3: | ||
40 | tst.b (%a0)+ | ||
41 | beq.b .done | ||
42 | .bytes2: | ||
43 | tst.b (%a0)+ | ||
44 | beq.b .done | ||
45 | .bytes1: | ||
46 | tst.b (%a0)+ | ||
47 | beq.b .done | ||
48 | .bytes0: | ||
32 | 49 | ||
33 | 1: | 50 | 1: |
34 | tst.b (%a0)+ /* test if %a0 == 0 and increment */ | 51 | move.l (%a0)+, %d0 /* load %d0 increment %a0 */ |
35 | bne.b 1b /* if the test was false repeat */ | 52 | /* use trick to test the whole word for null bytes */ |
53 | move.l %d0, %d1 | ||
54 | subi.l #0x01010101, %d1 | ||
55 | not.l %d0 | ||
56 | and.l %d1, %d0 | ||
57 | andi.l #0x80808080, %d0 | ||
58 | beq.b 1b /* if the test was false repeat */ | ||
36 | 59 | ||
37 | sub.l %d0,%a0 /* how many times did we repeat? */ | 60 | /* ok, so the last word contained a 0 byte, test individual bytes */ |
38 | move.l %a0,%d0 | 61 | subq.l #4, %a0 |
39 | subq.l #1,%d0 /* %d0 is 1 too large due to the last increment */ | 62 | tst.b (%a0)+ |
63 | beq.b .done | ||
64 | tst.b (%a0)+ | ||
65 | beq.b .done | ||
66 | tst.b (%a0)+ | ||
67 | beq.b .done | ||
68 | /* last byte must be 0 so we don't need to load it, so we don't increment a0 | ||
69 | so we jump past the subq instr */ | ||
70 | .word 0x51fa /* trapf.w, shadow next instr */ | ||
71 | |||
72 | .done: | ||
73 | subq.l #1, %a0 /* %a0 is 1 too large due to the last increment */ | ||
74 | sub.l %a1, %a0 /* how many times did we repeat? */ | ||
75 | move.l %a0, %d0 /* return value in %d0 */ | ||
40 | rts | 76 | rts |
41 | .size strlen, .-strlen | 77 | .size strlen, .-strlen |
42 | 78 | ||