diff options
Diffstat (limited to 'firmware/descramble.S')
-rw-r--r-- | firmware/descramble.S | 121 |
1 files changed, 56 insertions, 65 deletions
diff --git a/firmware/descramble.S b/firmware/descramble.S index e124f0ceff..34e4d830c8 100644 --- a/firmware/descramble.S +++ b/firmware/descramble.S | |||
@@ -7,7 +7,7 @@ | |||
7 | * \/ \/ \/ \/ \/ | 7 | * \/ \/ \/ \/ \/ |
8 | * $Id$ | 8 | * $Id$ |
9 | * | 9 | * |
10 | * Copyright (C) 2003 by Magnus Holmgren | 10 | * Copyright (C) 2004 by Jens Arnold |
11 | * | 11 | * |
12 | * All files in this archive are subject to the GNU General Public License. | 12 | * All files in this archive are subject to the GNU General Public License. |
13 | * See the file COPYING in the source tree root for full license agreement. | 13 | * See the file COPYING in the source tree root for full license agreement. |
@@ -34,67 +34,59 @@ | |||
34 | * r4 - source (unsigned char*) | 34 | * r4 - source (unsigned char*) |
35 | * r5 - dest (unsigned char*) | 35 | * r5 - dest (unsigned char*) |
36 | * r6 - len (unsigned int) | 36 | * r6 - len (unsigned int) |
37 | */ | 37 | * |
38 | 38 | * Register usage: | |
39 | /* Register usage: | 39 | * r0 - data |
40 | * i - r0 | 40 | * r1 - temp |
41 | * i4 - r1 | 41 | * r2 - checksum |
42 | * checksum - r2 | 42 | * r3 - current src address |
43 | * addr - r3 | 43 | * r4 - source |
44 | * source - r4 | 44 | * r5 - dest |
45 | * dest - r5 | 45 | * r6 - len -> source_end |
46 | * len - r6 | 46 | * r7 - dest_end |
47 | * len4 - r7 | 47 | * r8 - len / 4 |
48 | * data - r8 | ||
49 | * temp - r9 | ||
50 | */ | 48 | */ |
51 | 49 | ||
52 | _descramble: | 50 | _descramble: |
53 | mov.l r8,@-r15 | 51 | mov.l r8,@-r15 |
54 | mov.l r9,@-r15 | 52 | mov r6,r8 |
55 | mov #0,r0 /* i = 0 */ | 53 | shlr2 r8 /* r8 = len / 4 */ |
56 | mov #0,r1 /* i4 = i / 4 */ | 54 | mov r5,r7 |
57 | mov #0,r2 /* checksum = 0 */ | 55 | add r6,r7 /* dest_end = dest + len */ |
56 | add r4,r6 /* source_end = source + len */ | ||
58 | mov r4,r3 /* addr = source */ | 57 | mov r4,r3 /* addr = source */ |
59 | mov r6,r7 | 58 | mov #0,r2 /* checksum = 0 */ |
60 | shlr2 r7 /* len4 = len / 4 */ | ||
61 | 59 | ||
62 | .loop: | 60 | .loop: |
63 | mov.b @r3,r8 /* data = source[addr] */ | 61 | mov.b @r3,r0 /* data = *addr */ |
64 | add r7,r3 /* addr += len4 */ | 62 | add r8,r3 /* addr += len / 4 */ |
65 | extu.b r8,r8 /* we want the data extended unsigned */ | 63 | extu.b r0,r0 /* zero extend data byte */ |
66 | shlr r8 /* start rotate right of low byte */ | 64 | swap.b r0,r1 /* byte swap low word to temp */ |
67 | movt r9 /* get bit 0 that was shifted out */ | 65 | or r1,r0 /* r0's two lower bytes now identical */ |
68 | shll8 r9 | 66 | shlr r0 /* -> this equals "rotr.b r0" now */ |
69 | shlr r9 /* move it to bit 7 */ | 67 | not r0,r0 /* negate */ |
70 | or r9,r8 /* finish rotate right */ | 68 | extu.b r0,r0 /* zero extend low byte (only needed for sum) */ |
71 | not r8,r8 | 69 | mov.b r0,@r5 /* *dest = data */ |
72 | extu.b r8,r8 | 70 | add r0,r2 /* checksum += data */ |
73 | mov.b r8,@(r0,r5) /* dest[i] = data */ | 71 | add #1,r5 /* dest++ */ |
74 | add r8,r2 /* checksum += data[i] */ | 72 | cmp/hi r3,r6 /* addr < source_end ? */ |
75 | add #1,r0 /* i++ */ | 73 | bt .loop |
76 | tst #3,r0 /* reset addr? */ | 74 | |
77 | bf .loop | 75 | add #1,r4 /* source++ */ |
78 | 76 | mov r4,r3 /* addr = source */ | |
79 | add #1,r1 /* i4++ */ | 77 | cmp/hi r5,r7 /* dest < dest_end */ |
80 | mov r4,r3 | 78 | bt .loop |
81 | add r1,r3 /* addr = source + i4 */ | 79 | |
82 | cmp/hs r6,r0 /* all done? */ | 80 | /* 15 clock cycles if no reset of source address, 19 if reset, |
83 | bf .loop | 81 | * avg. 16 cycles per byte. Magnus' Version needed 17-22 cycles per byte |
84 | 82 | */ | |
85 | /* 17 cycles if no "reset," 22 if reset => average 18.25 cycles per | 83 | |
86 | * byte, assuming no wait states from reads or writes. "Old" algorithm | ||
87 | * needed 24-26 cycles per byte, under the same assumptions. | ||
88 | */ | ||
89 | |||
90 | mov.l @r15+,r9 | ||
91 | mov.l @r15+,r8 | 84 | mov.l @r15+,r8 |
92 | rts | 85 | rts |
93 | extu.w r2,r0 | 86 | extu.w r2,r0 |
94 | 87 | ||
95 | 88 | ||
96 | 89 | /* Move len bytes from source to dest (which must be suitably aligned for | |
97 | /* Move len bytes from source to dest (which must be suitably aligned for | ||
98 | * long moves) and jump to dest + 0x200. | 90 | * long moves) and jump to dest + 0x200. |
99 | * | 91 | * |
100 | * Arguments: | 92 | * Arguments: |
@@ -103,26 +95,25 @@ _descramble: | |||
103 | * r6 - len | 95 | * r6 - len |
104 | */ | 96 | */ |
105 | 97 | ||
98 | .align 2 | ||
106 | .global _rolo_restart | 99 | .global _rolo_restart |
107 | .type _rolo_restart,@function | 100 | .type _rolo_restart,@function |
108 | 101 | ||
109 | _rolo_restart: | 102 | _rolo_restart: |
110 | mov.w .offset,r0 | 103 | mov r5,r0 |
111 | mov r5,r7 | 104 | sub r4,r0 /* r0 = dest - source */ |
112 | add r0,r7 /* start_func() */ | 105 | add #-4,r0 /* adjust for early increment */ |
113 | mov r6,r0 | 106 | add r4,r6 /* r6 = source + len */ |
114 | shlr2 r0 | 107 | mov.w .offset,r1 |
115 | add #1,r0 | 108 | add r1,r5 /* start_func() */ |
116 | .copy: | 109 | |
110 | .copy: /* loop takes 6 cycles per longword */ | ||
117 | mov.l @r4+,r1 | 111 | mov.l @r4+,r1 |
118 | add #-1,r0 | 112 | cmp/hi r4,r6 |
119 | mov.l r1,@r5 | 113 | mov.l r1,@(r0,r4) |
120 | add #4,r5 | 114 | bt .copy |
121 | cmp/eq #0,r0 | 115 | |
122 | bf .copy | 116 | jmp @r5 |
123 | |||
124 | jmp @r7 | ||
125 | |||
126 | nop | 117 | nop |
127 | 118 | ||
128 | .offset: | 119 | .offset: |