diff options
-rw-r--r-- | apps/codecs/demac/libdemac/demac_config.h | 4 | ||||
-rw-r--r-- | apps/codecs/lib/SOURCES | 4 | ||||
-rw-r--r-- | apps/codecs/lib/codeclib.h | 2 | ||||
-rw-r--r-- | apps/codecs/lib/udiv32_arm.S | 319 | ||||
-rw-r--r-- | apps/codecs/lib/udiv32_armv4.S | 134 |
5 files changed, 323 insertions, 140 deletions
diff --git a/apps/codecs/demac/libdemac/demac_config.h b/apps/codecs/demac/libdemac/demac_config.h index 1bbdef3d56..1beda2b9cd 100644 --- a/apps/codecs/demac/libdemac/demac_config.h +++ b/apps/codecs/demac/libdemac/demac_config.h | |||
@@ -57,11 +57,11 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | |||
57 | #elif defined(CPU_S5L870X) | 57 | #elif defined(CPU_S5L870X) |
58 | #define ICODE_SECTION_DEMAC_ARM .icode | 58 | #define ICODE_SECTION_DEMAC_ARM .icode |
59 | #define ICODE_ATTR_DEMAC ICODE_ATTR | 59 | #define ICODE_ATTR_DEMAC ICODE_ATTR |
60 | #define IBSS_ATTR_DEMAC_INSANEBUF IBSS_ATTR | 60 | #define IBSS_ATTR_DEMAC_INSANEBUF |
61 | #else | 61 | #else |
62 | #define ICODE_SECTION_DEMAC_ARM .text | 62 | #define ICODE_SECTION_DEMAC_ARM .text |
63 | #define ICODE_ATTR_DEMAC | 63 | #define ICODE_ATTR_DEMAC |
64 | #define IBSS_ATTR_DEMAC_INSANEBUF IBSS_ATTR | 64 | #define IBSS_ATTR_DEMAC_INSANEBUF |
65 | #endif | 65 | #endif |
66 | 66 | ||
67 | #else /* !ROCKBOX */ | 67 | #else /* !ROCKBOX */ |
diff --git a/apps/codecs/lib/SOURCES b/apps/codecs/lib/SOURCES index 3a741a5c81..ffbe1af92e 100644 --- a/apps/codecs/lib/SOURCES +++ b/apps/codecs/lib/SOURCES | |||
@@ -7,9 +7,7 @@ mdct_lookup.c | |||
7 | #ifdef CPU_ARM | 7 | #ifdef CPU_ARM |
8 | mdct_arm.S | 8 | mdct_arm.S |
9 | setjmp_arm.S | 9 | setjmp_arm.S |
10 | #if ARM_ARCH == 4 | 10 | udiv32_arm.S |
11 | udiv32_armv4.S | ||
12 | #endif | ||
13 | #endif | 11 | #endif |
14 | 12 | ||
15 | #ifdef CPU_COLDFIRE | 13 | #ifdef CPU_COLDFIRE |
diff --git a/apps/codecs/lib/codeclib.h b/apps/codecs/lib/codeclib.h index 517264f3a5..926035f05e 100644 --- a/apps/codecs/lib/codeclib.h +++ b/apps/codecs/lib/codeclib.h | |||
@@ -65,7 +65,7 @@ void qsort(void *base, size_t nmemb, size_t size, int(*compar)(const void *, con | |||
65 | 65 | ||
66 | extern void mdct_backward(int n, int32_t *in, int32_t *out); | 66 | extern void mdct_backward(int n, int32_t *in, int32_t *out); |
67 | 67 | ||
68 | #if defined(CPU_ARM) && (ARM_ARCH == 4) | 68 | #ifdef CPU_ARM |
69 | /* optimised unsigned integer division for ARMv4, in IRAM */ | 69 | /* optimised unsigned integer division for ARMv4, in IRAM */ |
70 | unsigned udiv32_arm(unsigned a, unsigned b); | 70 | unsigned udiv32_arm(unsigned a, unsigned b); |
71 | #define UDIV32(a, b) udiv32_arm(a, b) | 71 | #define UDIV32(a, b) udiv32_arm(a, b) |
diff --git a/apps/codecs/lib/udiv32_arm.S b/apps/codecs/lib/udiv32_arm.S new file mode 100644 index 0000000000..c46a09be5c --- /dev/null +++ b/apps/codecs/lib/udiv32_arm.S | |||
@@ -0,0 +1,319 @@ | |||
1 | /*************************************************************************** | ||
2 | * __________ __ ___. | ||
3 | * Open \______ \ ____ ____ | | _\_ |__ _______ ___ | ||
4 | * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / | ||
5 | * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < | ||
6 | * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ | ||
7 | * \/ \/ \/ \/ \/ | ||
8 | * $Id$ | ||
9 | * | ||
10 | * Copyright (C) 2008 by Jens Arnold | ||
11 | * Copyright (C) 2009 by Andrew Mahone | ||
12 | * | ||
13 | * Optimised unsigned integer division for ARMv4 | ||
14 | * | ||
15 | * Based on: libgcc routines for ARM cpu, additional algorithms from ARM System | ||
16 | * Developer's Guide | ||
17 | * Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk) | ||
18 | * Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005 | ||
19 | * Free Software Foundation, Inc. | ||
20 | * | ||
21 | * This program is free software; you can redistribute it and/or | ||
22 | * modify it under the terms of the GNU General Public License | ||
23 | * as published by the Free Software Foundation; either version 2 | ||
24 | * of the License, or (at your option) any later version. | ||
25 | * | ||
26 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY | ||
27 | * KIND, either express or implied. | ||
28 | * | ||
29 | ****************************************************************************/ | ||
30 | |||
31 | #include "config.h" | ||
32 | /* Codecs should not normally do this, but we need to check a macro, and | ||
33 | * codecs.h would confuse the assembler. */ | ||
34 | |||
35 | #ifdef USE_IRAM | ||
36 | #define DIV_RECIP | ||
37 | .section .icode,"ax",%progbits | ||
38 | #else | ||
39 | .text | ||
40 | #endif | ||
41 | .align | ||
42 | .global udiv32_arm | ||
43 | .type udiv32_arm,%function | ||
44 | |||
45 | #if ARM_ARCH < 5 | ||
46 | /* Adapted from an algorithm given in ARM System Developer's Guide (7.3.1.2) | ||
47 | for dividing a 30-bit value by a 15-bit value, with two operations per | ||
48 | iteration by storing quotient and remainder together and adding the previous | ||
49 | quotient bit during trial subtraction. Modified to work with any dividend | ||
50 | and divisor both less than 1 << 30, and skipping trials by calculating bits | ||
51 | in output. */ | ||
52 | .macro ARM_DIV_31_BODY dividend, divisor, result, bits, curbit, quotient, remainder | ||
53 | |||
54 | mov \bits, #1 | ||
55 | /* Shift the divisor left until it aligns with the numerator. If it already | ||
56 | has the high bit set, this is fine, everything inside .rept will be | ||
57 | skipped, and the add before and adcs after will set the one-bit result | ||
58 | to zero. */ | ||
59 | cmn \divisor, \dividend, lsr #16 | ||
60 | movcs \divisor, \divisor, lsl #16 | ||
61 | addcs \bits, \bits, #16 | ||
62 | cmn \divisor, \dividend, lsr #8 | ||
63 | movcs \divisor, \divisor, lsl #8 | ||
64 | addcs \bits, \bits, #8 | ||
65 | cmn \divisor, \dividend, lsr #4 | ||
66 | movcs \divisor, \divisor, lsl #4 | ||
67 | addcs \bits, \bits, #4 | ||
68 | cmn \divisor, \dividend, lsr #2 | ||
69 | movcs \divisor, \divisor, lsl #2 | ||
70 | addcs \bits, \bits, #2 | ||
71 | cmn \divisor, \dividend, lsr #1 | ||
72 | movcs \divisor, \divisor, lsl #1 | ||
73 | addcs \bits, \bits, #1 | ||
74 | adds \result, \dividend, \divisor | ||
75 | subcc \result, \result, \divisor | ||
76 | rsb \curbit, \bits, #31 | ||
77 | add pc, pc, \curbit, lsl #3 | ||
78 | nop | ||
79 | .rept 30 | ||
80 | adcs \result, \divisor, \result, lsl #1 | ||
81 | /* Fix the remainder portion of the result. This must be done because the | ||
82 | handler for 32-bit numerators needs the remainder. */ | ||
83 | subcc \result, \result, \divisor | ||
84 | .endr | ||
85 | /* Shift remainder/quotient left one, add final quotient bit */ | ||
86 | adc \result, \result, \result | ||
87 | mov \remainder, \result, lsr \bits | ||
88 | eor \quotient, \result, \remainder, lsl \bits | ||
89 | .endm | ||
90 | |||
91 | #ifdef CPU_PP | ||
92 | #if CONFIG_CPU == PP5020 | ||
93 | .set recip_max, 5952 | ||
94 | #elif CONFIG_CPU == PP5002 | ||
95 | .set recip_max, 1472 | ||
96 | #else | ||
97 | .set recip_max, 14208 | ||
98 | #endif | ||
99 | #elif CONFIG_CPU == AS3525 | ||
100 | .set recip_max, 42752 | ||
101 | #elif CONFIG_CPU == S5L8701 | ||
102 | .set recip_max, 9600 | ||
103 | #elif CONFIG_CPU == S5L8700 | ||
104 | .set recip_max, 5504 | ||
105 | #endif | ||
106 | |||
107 | udiv32_arm: | ||
108 | #ifdef DIV_RECIP | ||
109 | cmp r1, #3 | ||
110 | bcc .L_udiv_tiny | ||
111 | cmp r1, #recip_max | ||
112 | bhi .L_udiv | ||
113 | adr r3, .L_udiv_recip_table-12 | ||
114 | ldr r2, [r3, r1, lsl #2] | ||
115 | mov r3, r0 | ||
116 | umull ip, r0, r2, r0 | ||
117 | mul r2, r0, r1 | ||
118 | cmp r3, r2 | ||
119 | bxcs lr | ||
120 | sub r0, r0, #1 | ||
121 | bx lr | ||
122 | .L_udiv_tiny: | ||
123 | cmp r1, #1 | ||
124 | movhi r0, r0, lsr #1 | ||
125 | bxcs lr | ||
126 | b .L_div0 | ||
127 | #endif | ||
128 | .L_udiv: | ||
129 | /* Invert divisor. ARM_DIV_31_BODY uses adc to both subtract the divisor | ||
130 | and add the next bit of the result. The correction code at .L_udiv32 | ||
131 | does not need the divisor inverted, but can be modified to work with it, | ||
132 | and this allows the zero divisor test to be done early and without an | ||
133 | explicit comparison. */ | ||
134 | rsbs r1, r1, #0 | ||
135 | #ifndef DIV_RECIP | ||
136 | beq .L_div0 | ||
137 | #endif | ||
138 | tst r0, r0 | ||
139 | /* High bit must be unset, otherwise shift numerator right, calculate, | ||
140 | and correct results. As this case is very uncommon we want to avoid | ||
141 | any other delays on the main path in handling it, so the long divide | ||
142 | calls the short divide as a function. */ | ||
143 | bmi .L_udiv32 | ||
144 | .L_udiv31: | ||
145 | ARM_DIV_31_BODY r0, r1, r2, r3, ip, r0, r1 | ||
146 | bx lr | ||
147 | .L_udiv32: | ||
148 | /* store original numerator and divisor, we'll need them to correct the | ||
149 | result, */ | ||
150 | stmdb sp, { r0, r1, lr } | ||
151 | /* Call __div0 here if divisor is zero, otherwise it would report the wrong | ||
152 | address. */ | ||
153 | mov r0, r0, lsr #1 | ||
154 | bl .L_udiv31 | ||
155 | ldmdb sp, { r2, r3, lr } | ||
156 | /* Move the low bit of the original numerator to the carry bit */ | ||
157 | movs r2, r2, lsr #1 | ||
158 | /* Shift the remainder left one and add in the carry bit */ | ||
159 | adc r1, r1, r1 | ||
160 | /* Subtract the original divisor from the remainder, setting carry if the | ||
161 | result is non-negative */ | ||
162 | adds r1, r1, r3 | ||
163 | /* Shift quotient left one and add carry bit */ | ||
164 | adc r0, r0, r0 | ||
165 | bx lr | ||
166 | .L_div0: | ||
167 | /* __div0 expects the calling address on the top of the stack */ | ||
168 | stmdb sp!, { lr } | ||
169 | mov r0, #0 | ||
170 | #if defined(__ARM_EABI__) || !defined(USE_IRAM) | ||
171 | bl __div0 | ||
172 | #else | ||
173 | ldr pc, [pc, #-4] | ||
174 | .word __div0 | ||
175 | #endif | ||
176 | #ifdef DIV_RECIP | ||
177 | .L_udiv_recip_table: | ||
178 | .set div, 3 | ||
179 | .rept recip_max - 2 | ||
180 | .if (div - 1) & div | ||
181 | .set q, 0x40000000 / div | ||
182 | .set r, (0x40000000 - (q * div))<<1 | ||
183 | .set q, q << 1 | ||
184 | .if r >= div | ||
185 | .set q, q + 1 | ||
186 | .set r, r - div | ||
187 | .endif | ||
188 | .set r, r << 1 | ||
189 | .set q, q << 1 | ||
190 | .if r >= div | ||
191 | .set q, q + 1 | ||
192 | .set r, r - div | ||
193 | .endif | ||
194 | .set q, q + 1 | ||
195 | .else | ||
196 | .set q, 0x40000000 / div * 4 | ||
197 | .endif | ||
198 | .word q | ||
199 | .set div, div+1 | ||
200 | .endr | ||
201 | #endif | ||
202 | .size udiv32_arm, . - udiv32_arm | ||
203 | |||
204 | #else | ||
205 | .macro ARMV5_UDIV32_BODY numerator, divisor, quotient, bits, inv, neg, div0label | ||
206 | cmp \numerator, \divisor | ||
207 | clz \bits, \divisor | ||
208 | bcc 30f | ||
209 | mov \inv, \divisor, lsl \bits | ||
210 | add \neg, pc, \inv, lsr #25 | ||
211 | cmp \inv, #1<<31 | ||
212 | ldrhib \inv, [\neg, #.L_udiv_est_table-.-64] | ||
213 | bls 20f | ||
214 | subs \bits, \bits, #7 | ||
215 | rsb \neg, \divisor, #0 | ||
216 | movpl \divisor, \inv, lsl \bits | ||
217 | bmi 10f | ||
218 | mul \inv, \divisor, \neg | ||
219 | smlawt \divisor, \divisor, \inv, \divisor | ||
220 | mul \inv, \divisor, \neg | ||
221 | /* This will save a cycle on ARMv6, but does not produce a correct result | ||
222 | if numerator sign bit is set. This case accounts for about 1 in 10^7 of | ||
223 | divisions, done by the APE decoder, so we specialize for the more common | ||
224 | case and handle the uncommon large-numerator separately */ | ||
225 | #if ARM_ARCH >= 6 | ||
226 | tst \numerator, \numerator | ||
227 | smmla \divisor, \divisor, \inv, \divisor | ||
228 | bmi 40f | ||
229 | smmul \inv, \numerator, \divisor | ||
230 | #else | ||
231 | mov \bits, #0 | ||
232 | smlal \bits, \divisor, \divisor, \inv | ||
233 | umull \bits, \inv, \numerator, \divisor | ||
234 | #endif | ||
235 | add \numerator, \numerator, \neg | ||
236 | mla \divisor, \inv, \neg, \numerator | ||
237 | mov \quotient, \inv | ||
238 | cmn \divisor, \neg | ||
239 | addcc \quotient, \quotient, #1 | ||
240 | addpl \quotient, \quotient, #2 | ||
241 | bx lr | ||
242 | 10: | ||
243 | rsb \bits, \bits, #0 | ||
244 | sub \inv, \inv, #4 | ||
245 | mov \divisor, \inv, lsr \bits | ||
246 | #if ARM_ARCH >= 6 | ||
247 | tst \numerator, \numerator | ||
248 | smmla \divisor, \divisor, \inv, \divisor | ||
249 | bmi 50f | ||
250 | smmul \inv, \numerator, \divisor | ||
251 | #else | ||
252 | mov \bits, #0 | ||
253 | smlal \bits, \divisor, \divisor, \inv | ||
254 | umull \bits, \inv, \numerator, \divisor | ||
255 | #endif | ||
256 | mla \divisor, \inv, \neg, \numerator | ||
257 | mov \quotient, \inv | ||
258 | cmn \neg, \divisor, lsr #1 | ||
259 | addcs \divisor, \divisor, \neg, lsl #1 | ||
260 | addcs \quotient, \quotient, #2 | ||
261 | cmn \neg, \divisor | ||
262 | addcs \quotient, \quotient, #1 | ||
263 | bx lr | ||
264 | 20: | ||
265 | .ifnc "", "\div0label" | ||
266 | rsb \bits, \bits, #31 | ||
267 | bne \div0label | ||
268 | .endif | ||
269 | mov \quotient, \numerator, lsr \bits | ||
270 | bx lr | ||
271 | 30: | ||
272 | mov \quotient, #0 | ||
273 | bx lr | ||
274 | #if ARM_ARCH >= 6 | ||
275 | 40: | ||
276 | umull \bits, \inv, \numerator, \divisor | ||
277 | add \numerator, \numerator, \neg | ||
278 | mla \divisor, \inv, \neg, \numerator | ||
279 | mov \quotient, \inv | ||
280 | cmn \divisor, \neg | ||
281 | addcc \quotient, \quotient, #1 | ||
282 | addpl \quotient, \quotient, #2 | ||
283 | bx lr | ||
284 | 50: | ||
285 | umull \bits, \inv, \numerator, \divisor | ||
286 | mla \divisor, \inv, \neg, \numerator | ||
287 | mov \quotient, \inv | ||
288 | cmn \neg, \divisor, lsr #1 | ||
289 | addcs \divisor, \divisor, \neg, lsl #1 | ||
290 | addcs \quotient, \quotient, #2 | ||
291 | cmn \neg, \divisor | ||
292 | addcs \quotient, \quotient, #1 | ||
293 | bx lr | ||
294 | #endif | ||
295 | .endm | ||
296 | |||
297 | udiv32_arm: | ||
298 | ARMV5_UDIV32_BODY r0, r1, r0, r2, r3, ip, .L_div0 | ||
299 | .L_div0: | ||
300 | /* __div0 expects the calling address on the top of the stack */ | ||
301 | stmdb sp!, { lr } | ||
302 | mov r0, #0 | ||
303 | #if defined(__ARM_EABI__) || !defined(USE_IRAM) | ||
304 | bl __div0 | ||
305 | #else | ||
306 | ldr pc, [pc, #-4] | ||
307 | .word __div0 | ||
308 | #endif | ||
309 | .L_udiv_est_table: | ||
310 | .byte 0xff, 0xfc, 0xf8, 0xf4, 0xf0, 0xed, 0xea, 0xe6 | ||
311 | .byte 0xe3, 0xe0, 0xdd, 0xda, 0xd7, 0xd4, 0xd2, 0xcf | ||
312 | .byte 0xcc, 0xca, 0xc7, 0xc5, 0xc3, 0xc0, 0xbe, 0xbc | ||
313 | .byte 0xba, 0xb8, 0xb6, 0xb4, 0xb2, 0xb0, 0xae, 0xac | ||
314 | .byte 0xaa, 0xa8, 0xa7, 0xa5, 0xa3, 0xa2, 0xa0, 0x9f | ||
315 | .byte 0x9d, 0x9c, 0x9a, 0x99, 0x97, 0x96, 0x94, 0x93 | ||
316 | .byte 0x92, 0x90, 0x8f, 0x8e, 0x8d, 0x8c, 0x8a, 0x89 | ||
317 | .byte 0x88, 0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81 | ||
318 | #endif | ||
319 | .size udiv32_arm, . - udiv32_arm | ||
diff --git a/apps/codecs/lib/udiv32_armv4.S b/apps/codecs/lib/udiv32_armv4.S deleted file mode 100644 index c4aea14093..0000000000 --- a/apps/codecs/lib/udiv32_armv4.S +++ /dev/null | |||
@@ -1,134 +0,0 @@ | |||
1 | /*************************************************************************** | ||
2 | * __________ __ ___. | ||
3 | * Open \______ \ ____ ____ | | _\_ |__ _______ ___ | ||
4 | * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / | ||
5 | * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < | ||
6 | * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ | ||
7 | * \/ \/ \/ \/ \/ | ||
8 | * $Id$ | ||
9 | * | ||
10 | * Copyright (C) 2008 by Jens Arnold | ||
11 | * Copyright (C) 2009 by Andrew Mahone | ||
12 | * | ||
13 | * Optimised unsigned integer division for ARMv4 | ||
14 | * | ||
15 | * Based on: libgcc routines for ARM cpu. | ||
16 | * Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk) | ||
17 | * Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005 | ||
18 | * Free Software Foundation, Inc. | ||
19 | * | ||
20 | * This program is free software; you can redistribute it and/or | ||
21 | * modify it under the terms of the GNU General Public License | ||
22 | * as published by the Free Software Foundation; either version 2 | ||
23 | * of the License, or (at your option) any later version. | ||
24 | * | ||
25 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY | ||
26 | * KIND, either express or implied. | ||
27 | * | ||
28 | ****************************************************************************/ | ||
29 | |||
30 | #include "config.h" | ||
31 | /* Codecs should not normally do this, but we need to check a macro, and | ||
32 | * codecs.h would confuse the assembler. */ | ||
33 | |||
34 | /* Adapted from an algorithm given in ARM System Developer's Guide (7.3.1.2) | ||
35 | for dividing a 30-bit value by a 15-bit value, with two operations per | ||
36 | iteration by storing quotient and remainder together and adding the previous | ||
37 | quotient bit during trial subtraction. Modified to work with any dividend | ||
38 | and divisor both less than 1 << 30, and skipping trials by calculating bits | ||
39 | in output. */ | ||
40 | .macro ARM_DIV_31_BODY dividend, divisor, result, bits, curbit, quotient, remainder | ||
41 | |||
42 | mov \bits, #1 | ||
43 | /* Shift the divisor left until it aligns with the numerator. If it already | ||
44 | has the high bit set, this is fine, everything inside .rept will be | ||
45 | skipped, and the add before and adcs after will set the one-bit result | ||
46 | to zero. */ | ||
47 | cmn \divisor, \dividend, lsr #16 | ||
48 | movcs \divisor, \divisor, lsl #16 | ||
49 | addcs \bits, \bits, #16 | ||
50 | cmn \divisor, \dividend, lsr #8 | ||
51 | movcs \divisor, \divisor, lsl #8 | ||
52 | addcs \bits, \bits, #8 | ||
53 | cmn \divisor, \dividend, lsr #4 | ||
54 | movcs \divisor, \divisor, lsl #4 | ||
55 | addcs \bits, \bits, #4 | ||
56 | cmn \divisor, \dividend, lsr #2 | ||
57 | movcs \divisor, \divisor, lsl #2 | ||
58 | addcs \bits, \bits, #2 | ||
59 | cmn \divisor, \dividend, lsr #1 | ||
60 | movcs \divisor, \divisor, lsl #1 | ||
61 | addcs \bits, \bits, #1 | ||
62 | adds \result, \dividend, \divisor | ||
63 | subcc \result, \result, \divisor | ||
64 | rsb \curbit, \bits, #31 | ||
65 | add pc, pc, \curbit, lsl #3 | ||
66 | nop | ||
67 | .rept 30 | ||
68 | adcs \result, \divisor, \result, lsl #1 | ||
69 | /* Fix the remainder portion of the result. This must be done because the | ||
70 | handler for 32-bit numerators needs the remainder. */ | ||
71 | subcc \result, \result, \divisor | ||
72 | .endr | ||
73 | /* Shift remainder/quotient left one, add final quotient bit */ | ||
74 | adc \result, \result, \result | ||
75 | mov \remainder, \result, lsr \bits | ||
76 | eor \quotient, \result, \remainder, lsl \bits | ||
77 | .endm | ||
78 | |||
79 | #ifdef USE_IRAM | ||
80 | .section .icode,"ax",%progbits | ||
81 | #else | ||
82 | .text | ||
83 | #endif | ||
84 | .align | ||
85 | .global udiv32_arm | ||
86 | .type udiv32_arm,%function | ||
87 | |||
88 | udiv32_arm: | ||
89 | /* Invert divisor. ARM_DIV_31_BODY uses adc to both subtract the divisor | ||
90 | and add the next bit of the result. The correction code at .L_udiv32 | ||
91 | does not need the divisor inverted, but can be modified to work with it, | ||
92 | and this allows the zero divisor test to be done early and without an | ||
93 | explicit comparison. */ | ||
94 | rsbs r1, r1, #0 | ||
95 | beq .L_div0 | ||
96 | tst r0, r0 | ||
97 | /* High bit must be unset, otherwise shift numerator right, calculate, | ||
98 | and correct results. As this case is very uncommon we want to avoid | ||
99 | any other delays on the main path in handling it, so the long divide | ||
100 | calls the short divide as a function. */ | ||
101 | bmi .L_udiv32 | ||
102 | .L_udiv31: | ||
103 | ARM_DIV_31_BODY r0, r1, r2, r3, ip, r0, r1 | ||
104 | bx lr | ||
105 | |||
106 | .L_udiv32: | ||
107 | /* store original numerator and divisor, we'll need them to correct the | ||
108 | result, */ | ||
109 | stmdb sp, { r0, r1, lr } | ||
110 | /* Call __div0 here if divisor is zero, otherwise it would report the wrong | ||
111 | address. */ | ||
112 | mov r0, r0, lsr #1 | ||
113 | bl .L_udiv31 | ||
114 | ldmdb sp, { r2, r3, lr } | ||
115 | /* Move the low bit of the original numerator to the carry bit */ | ||
116 | movs r2, r2, lsr #1 | ||
117 | /* Shift the remainder left one and add in the carry bit */ | ||
118 | adc r1, r1, r1 | ||
119 | /* Subtract the original divisor from the remainder, setting carry if the | ||
120 | result is non-negative */ | ||
121 | adds r1, r1, r3 | ||
122 | /* Shift quotient left one and add carry bit */ | ||
123 | adc r0, r0, r0 | ||
124 | bx lr | ||
125 | .L_div0: | ||
126 | /* __div0 expects the calling address on the top of the stack */ | ||
127 | stmdb sp!, { lr } | ||
128 | #if defined(__ARM_EABI__) || !defined(USE_IRAM) | ||
129 | bl __div0 | ||
130 | #else | ||
131 | mov lr, pc | ||
132 | bx r3 | ||
133 | #endif | ||
134 | .size udiv32_arm, . - udiv32_arm | ||