diff options
author | Jörg Hohensohn <hohensoh@rockbox.org> | 2004-03-03 07:18:26 +0000 |
---|---|---|
committer | Jörg Hohensohn <hohensoh@rockbox.org> | 2004-03-03 07:18:26 +0000 |
commit | 239a91c28cce4a120af21f7ea598217f54e17d0c (patch) | |
tree | e89ef8a4beb9b6c7c214b9ee004fc98af3432f62 | |
parent | 860586d992a1a434b3d40e594a755e5fb450f394 (diff) | |
download | rockbox-239a91c28cce4a120af21f7ea598217f54e17d0c.tar.gz rockbox-239a91c28cce4a120af21f7ea598217f54e17d0c.zip |
14% faster bitswap, thanks Jens
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@4337 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r-- | firmware/bitswap.S | 81 |
1 files changed, 41 insertions, 40 deletions
diff --git a/firmware/bitswap.S b/firmware/bitswap.S index da628a3b7f..990ecb4d00 100644 --- a/firmware/bitswap.S +++ b/firmware/bitswap.S | |||
@@ -18,7 +18,7 @@ | |||
18 | ****************************************************************************/ | 18 | ****************************************************************************/ |
19 | 19 | ||
20 | .section .icode,"ax",@progbits | 20 | .section .icode,"ax",@progbits |
21 | .align 4 | 21 | .align 2 |
22 | .global _bitswap | 22 | .global _bitswap |
23 | .type _bitswap,@function | 23 | .type _bitswap,@function |
24 | 24 | ||
@@ -26,68 +26,69 @@ | |||
26 | * | 26 | * |
27 | * r0 Temporary (required by some instructions) | 27 | * r0 Temporary (required by some instructions) |
28 | * r1 Low byte | 28 | * r1 Low byte |
29 | * r2 High byte | 29 | * r2 High byte / final result |
30 | * r3 Result after flip | 30 | * r4 &Data |
31 | * r4 Data | ||
32 | * r5 Length | 31 | * r5 Length |
33 | * r6 1 | ||
34 | * r7 Flip table | 32 | * r7 Flip table |
35 | */ | 33 | */ |
36 | 34 | ||
35 | /* The instruction order below is a bit strange, because: | ||
36 | * 1) Keeping load/stores on longword boundaries means the instruction fetch | ||
37 | * won't compete with the memory access (because instructions are fetched | ||
38 | * in pairs). | ||
39 | * 2) Using the result of a fetch in the next instruction causes a stall | ||
40 | * (except in certain circumstances). | ||
41 | * See the SH-1 programming manual for details. | ||
42 | */ | ||
43 | |||
37 | _bitswap: | 44 | _bitswap: |
38 | mov.l .fliptable,r7 | 45 | mov.l .fliptable,r7 |
39 | mov #1,r6 | 46 | add #-2,r4 /* ptr is used shifted by 2 */ |
47 | add r4,r5 /* r5 = end_address - 2 */ | ||
48 | add #-1,r5 /* r5 = &last_byte - 2 */ | ||
40 | mov r4,r0 | 49 | mov r4,r0 |
41 | tst #1,r0 /* odd address? */ | 50 | tst #1,r0 /* even address? */ |
42 | bt .init /* no, address is even */ | 51 | bt .init /* yes */ |
43 | 52 | ||
44 | mov.b @r4,r0 /* swap first byte */ | 53 | add #1,r4 /* r4 now even */ |
54 | mov.b @(1,r4),r0 /* no, swap first byte */ | ||
45 | extu.b r0,r0 | 55 | extu.b r0,r0 |
46 | mov.b @(r0,r7),r0 | 56 | mov.b @(r0,r7),r0 |
47 | mov.b r0,@r4 | 57 | mov.b r0,@(1,r4) |
48 | add #1,r4 | ||
49 | add #-1,r5 | ||
50 | bra .init | ||
51 | 58 | ||
52 | /* The instruction order below is a bit strange, because: | 59 | .init: |
53 | * 1) Keeping load/stores on longword boundaries means the instruction | 60 | cmp/hi r4,r5 /* at least 2 bytes to swap? */ |
54 | * fetch won't compete with the memory access (because instructions | 61 | bf .last /* no, skip main loop */ |
55 | * are fetched in pairs). | ||
56 | * 2) Using the result of a fetch in the next instruction causes a | ||
57 | * stall (except in certain circumstances). | ||
58 | * See the SH-1 programming manual for details. | ||
59 | */ | ||
60 | 62 | ||
61 | .loop: | 63 | .loop: |
62 | mov.w @r4,r1 /* data to flip */ | 64 | mov.w @(2,r4),r0 /* data to flip */ |
63 | add #-2,r5 | 65 | add #2,r4 /* early increment */ |
64 | swap.b r1,r2 /* get high byte */ | 66 | swap.b r0,r2 /* get high byte */ |
67 | extu.b r0,r0 /* prepare low byte */ | ||
68 | mov.b @(r0,r7),r1 /* swap low byte */ | ||
65 | extu.b r2,r0 /* prepare high byte */ | 69 | extu.b r2,r0 /* prepare high byte */ |
66 | mov.b @(r0,r7),r2 /* swap high byte */ | 70 | mov.b @(r0,r7),r2 /* swap high byte */ |
67 | extu.b r1,r0 /* perpare low byte */ | 71 | extu.b r1,r1 /* zero extend low byte */ |
68 | mov.b @(r0,r7),r1 /* swap low byte */ | 72 | shll8 r2 /* shift high byte, low byte zeroed */ |
69 | extu.b r2,r2 /* zero extend high byte */ | 73 | or r1,r2 /* put low byte in result */ |
70 | swap.b r2,r3 /* put high byte in result */ | 74 | mov.w r2,@r4 /* store result, ptr already incr'd */ |
71 | extu.b r1,r0 /* zero extend low byte */ | 75 | cmp/hi r4,r5 /* while &last_byte > data */ |
72 | or r0,r3 /* put low byte in result */ | 76 | bt .loop |
73 | mov.w r3,@r4 /* store result */ | ||
74 | add #2,r4 | ||
75 | .init: | ||
76 | cmp/gt r6,r5 /* while [bytes remaining] > 1 */ | ||
77 | bt .loop /* (at least 2 bytes left) */ | ||
78 | 77 | ||
79 | cmp/eq r6,r5 | 78 | .last: |
80 | bf .exit /* if not 1 byte left, exit */ | 79 | cmp/eq r4,r5 /* if behind (&last_byte - 2), exit */ |
80 | bf .exit | ||
81 | 81 | ||
82 | mov.b @r4,r0 /* swap last byte */ | 82 | mov.b @(2,r4),r0 /* swap last byte */ |
83 | extu.b r0,r0 | 83 | extu.b r0,r0 |
84 | mov.b @(r0,r7),r0 | 84 | mov.b @(r0,r7),r0 |
85 | mov.b r0,@r4 | 85 | mov.b r0,@(2,r4) |
86 | |||
86 | .exit: | 87 | .exit: |
87 | rts | 88 | rts |
88 | nop | 89 | nop |
89 | 90 | ||
90 | .align 4 | 91 | .align 2 |
91 | 92 | ||
92 | .fliptable: | 93 | .fliptable: |
93 | .long _fliptable | 94 | .long _fliptable |