summaryrefslogtreecommitdiff
path: root/firmware/target/sh/bitswap.S
diff options
context:
space:
mode:
Diffstat (limited to 'firmware/target/sh/bitswap.S')
-rw-r--r--firmware/target/sh/bitswap.S150
1 files changed, 150 insertions, 0 deletions
diff --git a/firmware/target/sh/bitswap.S b/firmware/target/sh/bitswap.S
new file mode 100644
index 0000000000..08e609b9e5
--- /dev/null
+++ b/firmware/target/sh/bitswap.S
@@ -0,0 +1,150 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2004 by Jens Arnold
11 *
12 * All files in this archive are subject to the GNU General Public License.
13 * See the file COPYING in the source tree root for full license agreement.
14 *
15 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
16 * KIND, either express or implied.
17 *
18 ****************************************************************************/
19
20 .section .icode,"ax",@progbits
21 .align 2
22 .global _bitswap
23 .type _bitswap,@function
24
25/* Flips the bits of all bytes in a memory area (required for mp3 data on
26 * the Archos). This version is optimized for speed and size.
27 *
28 * arguments:
29 * r4 - start address
30 * r5 - length
31 *
32 * return value: void
33 *
34 * register usage:
35 * r0 - temporary
36 * r1 - bit mask for rounding to long bound / low byte (after swap)
37 * r2 - high byte (after swap) / combined result
38 * r4 - data address - 4
39 * r5 - end address - 4
40 * r7 - flip table (addressing with signed offset)
41 *
42 * The instruction order below is devised in a way to utilize the pipelining
43 * of the SH1 to the max.
44 */
45
46_bitswap:
47 mova _fliptable,r0
48 mov r0,r7
49 add #-4,r4 /* address is shifted by 4 */
50 add r4,r5 /* r5 = end_address - 4 */
51 cmp/hi r4,r5 /* at least something to do? */
52 bf .exit /* no, get out of here! */
53
54 add #-3,r5 /* end offset for flipping 4 bytes per pass */
55 mov r4,r0
56 tst #1,r0 /* even address? */
57 bt .start2_w /* yes, jump into main loop */
58
59 /* no, flip first byte */
60 mov.b @(4,r4),r0 /* load byte, sign extension! */
61 add #1,r4 /* early increment */
62 mov.b @(r0,r7),r0 /* fliptable offset is signed */
63 bra .start2_w /* jump into main loop */
64 mov.b r0,@(3,r4) /* store byte */
65
66 /* main loop: flips 2 words per pass */
67 .align 2
68.loop2_w:
69 mov.w @(6,r4),r0 /* load second word */
70 add #4,r4 /* early increment */
71 swap.b r0,r2 /* get high byte (2nd word) */
72 exts.b r0,r0 /* prepare low byte (2nd word) */
73 mov.b @(r0,r7),r1 /* swap low byte (2nd word) */
74 exts.b r2,r0 /* prepare high byte (2nd word) */
75 mov.b @(r0,r7),r2 /* swap high byte (2nd word) */
76 extu.b r1,r0 /* zero extend low byte (2nd word) */
77 mov.w @r4,r1 /* load first word */
78 shll8 r2 /* shift high byte (2nd word), low byte zeroed */
79 or r2,r0 /* put low byte (2nd word) in result */
80 swap.b r1,r2 /* get high byte (1st word) */
81 mov.w r0,@(2,r4) /* store result (2nd word) */
82 exts.b r1,r0 /* prepare low byte (1st word) */
83 mov.b @(r0,r7),r1 /* swap low byte (1st word) */
84 exts.b r2,r0 /* prepare high byte (1st word) */
85 mov.b @(r0,r7),r2 /* swap high byte (1st word) */
86 extu.b r1,r0 /* zero extend low byte (1st word) */
87 shll8 r2 /* shift high byte (1st word), low byte zeroed */
88 or r2,r0 /* put low byte (1st word) in result */
89 mov.w r0,@r4 /* store result (1st word) */
90.start2_w:
91 cmp/hi r4,r5 /* runs r4 up to last long bound */
92 bt .loop2_w
93
94 bra .start_b2 /* jump into trailing byte loop */
95 add #3,r5 /* reset end offset */
96
97 /* trailing byte loop: flips 0..3 bytes */
98.loop_b2:
99 mov.b @(4,r4),r0 /* loand byte, sign extension! */
100 add #1,r4 /* early increment */
101 mov.b @(r0,r7),r0 /* fliptable offset is signed */
102 mov.b r0,@(3,r4) /* store byte */
103.start_b2:
104 cmp/hi r4,r5 /* runs r4 up to end address */
105 bt .loop_b2
106
107.exit:
108 rts
109 nop
110
111 .align 2
112 .global _fliptable
113
114 .byte 0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1
115 .byte 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1
116 .byte 0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9
117 .byte 0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9
118 .byte 0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5
119 .byte 0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5
120 .byte 0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed
121 .byte 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd
122 .byte 0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3
123 .byte 0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3
124 .byte 0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb
125 .byte 0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb
126 .byte 0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7
127 .byte 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7
128 .byte 0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef
129 .byte 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff
130_fliptable:
131 .byte 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0
132 .byte 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0
133 .byte 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8
134 .byte 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8
135 .byte 0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4
136 .byte 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4
137 .byte 0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec
138 .byte 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc
139 .byte 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2
140 .byte 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2
141 .byte 0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea
142 .byte 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa
143 .byte 0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6
144 .byte 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6
145 .byte 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee
146 .byte 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe
147
148
149.end:
150 .size _bitswap,.end-_bitswap