summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Mahone <andrew.mahone@gmail.com>2010-02-01 01:36:46 +0000
committerAndrew Mahone <andrew.mahone@gmail.com>2010-02-01 01:36:46 +0000
commitbff5a35c3c51ebe1fe72ee20147b16ede847971d (patch)
tree549fc29d08c298462b91b9d1cfc054d704faa4bd
parent40a3e806974ff482006d4ee99b718a52d1e85224 (diff)
downloadrockbox-bff5a35c3c51ebe1fe72ee20147b16ede847971d.tar.gz
rockbox-bff5a35c3c51ebe1fe72ee20147b16ede847971d.zip
FS#10943, optimized division and clz routines to replace libgcc routines for ARM. Replaces libgcc support functions for unsigned and signed 32-bit division on ARMv4 and up, and leading-zero count on ARMv4.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@24432 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/lib/SOURCES1
-rw-r--r--apps/codecs/lib/codeclib.h2
-rw-r--r--apps/plugins/lib/SOURCES1
-rw-r--r--firmware/SOURCES1
-rw-r--r--firmware/target/arm/support-arm.S699
5 files changed, 703 insertions, 1 deletions
diff --git a/apps/codecs/lib/SOURCES b/apps/codecs/lib/SOURCES
index ffbe1af92e..a8c3feb1aa 100644
--- a/apps/codecs/lib/SOURCES
+++ b/apps/codecs/lib/SOURCES
@@ -8,6 +8,7 @@ mdct_lookup.c
8mdct_arm.S 8mdct_arm.S
9setjmp_arm.S 9setjmp_arm.S
10udiv32_arm.S 10udiv32_arm.S
11../../../firmware/target/arm/support-arm.S
11#endif 12#endif
12 13
13#ifdef CPU_COLDFIRE 14#ifdef CPU_COLDFIRE
diff --git a/apps/codecs/lib/codeclib.h b/apps/codecs/lib/codeclib.h
index 926035f05e..2d34523de5 100644
--- a/apps/codecs/lib/codeclib.h
+++ b/apps/codecs/lib/codeclib.h
@@ -65,7 +65,7 @@ void qsort(void *base, size_t nmemb, size_t size, int(*compar)(const void *, con
65 65
66extern void mdct_backward(int n, int32_t *in, int32_t *out); 66extern void mdct_backward(int n, int32_t *in, int32_t *out);
67 67
68#ifdef CPU_ARM 68#if defined(CPU_ARM) && (ARM_ARCH < 5 || defined(USE_IRAM))
69/* optimised unsigned integer division for ARMv4, in IRAM */ 69/* optimised unsigned integer division for ARMv4, in IRAM */
70unsigned udiv32_arm(unsigned a, unsigned b); 70unsigned udiv32_arm(unsigned a, unsigned b);
71#define UDIV32(a, b) udiv32_arm(a, b) 71#define UDIV32(a, b) udiv32_arm(a, b)
diff --git a/apps/plugins/lib/SOURCES b/apps/plugins/lib/SOURCES
index 00d3ac7c56..82807d15ee 100644
--- a/apps/plugins/lib/SOURCES
+++ b/apps/plugins/lib/SOURCES
@@ -30,6 +30,7 @@ profile_plugin.c
30#endif 30#endif
31#ifdef HAVE_LCD_BITMAP 31#ifdef HAVE_LCD_BITMAP
32#ifdef CPU_ARM 32#ifdef CPU_ARM
33../../../firmware/target/arm/support-arm.S
33pluginlib_jpeg_idct_arm.S 34pluginlib_jpeg_idct_arm.S
34#endif 35#endif
35pluginlib_jpeg_mem.c 36pluginlib_jpeg_mem.c
diff --git a/firmware/SOURCES b/firmware/SOURCES
index 56ab680417..e60bf63419 100644
--- a/firmware/SOURCES
+++ b/firmware/SOURCES
@@ -375,6 +375,7 @@ target/coldfire/i2c-coldfire.c
375 375
376#elif defined(CPU_PP) || defined(CPU_ARM) 376#elif defined(CPU_PP) || defined(CPU_ARM)
377/* CPU_PP => CPU_ARM, CPU_ARM !=> CPU_PP */ 377/* CPU_PP => CPU_ARM, CPU_ARM !=> CPU_PP */
378target/arm/support-arm.S
378target/arm/memcpy-arm.S 379target/arm/memcpy-arm.S
379target/arm/memmove-arm.S 380target/arm/memmove-arm.S
380common/strlen.c 381common/strlen.c
diff --git a/firmware/target/arm/support-arm.S b/firmware/target/arm/support-arm.S
new file mode 100644
index 0000000000..8703dd5b0a
--- /dev/null
+++ b/firmware/target/arm/support-arm.S
@@ -0,0 +1,699 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2008 by Jens Arnold
11 * Copyright (C) 2009 by Andrew Mahone
12 *
13 * Optimised replacements for libgcc functions
14 *
15 * Based on: libgcc routines for ARM cpu, additional algorithms from ARM System
16 * Developer's Guide
17 * Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
18 * Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005
19 * Free Software Foundation, Inc.
20 *
21 * This program is free software; you can redistribute it and/or
22 * modify it under the terms of the GNU General Public License
23 * as published by the Free Software Foundation; either version 2
24 * of the License, or (at your option) any later version.
25 *
26 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
27 * KIND, either express or implied.
28 *
29 ****************************************************************************/
30
31#include <config.h>
32
33.macro ARM_SDIV32_PRE numerator, divisor, sign
34 /* sign[31] = divisor sign */
35 ands \sign, \divisor, #1<<31
36 rsbeq \divisor, \divisor, #0
37 /* sign[31] = result sign, sign[0:30], C = numerator sign */
38 eors \sign, \sign, \numerator, asr #32
39 rsbcs \numerator, \numerator, #0
40.endm
41
42.macro ARM_SDIV32_POST quotient, remainder, sign
43 movs \sign, \sign, lsl #1
44.ifnc "", "\quotient"
45 rsbcs \quotient, \quotient, #0
46.endif
47.ifnc "", "\remainder"
48 rsbmi \remainder, \remainder, #0
49.endif
50.endm
51
52#if ARM_ARCH < 5
53.macro ARMV4_UDIV32_BODY numerator, divisor, quotient, remainder, tmp, bits, div0label, return
54.ifnc "", "\div0label"
55 rsbs \divisor, \divisor, #0
56 beq \div0label
57.else
58 rsb \divisor, \divisor, #0
59.endif
60 /* This SWAR divider requires a numerator less than 1<<31, because it must
61 be able to shift the remainder left at each step without shifting out
62 topmost bit. Since a shift might be needed for the aligned remainder to
63 exceed the divisor, the topmost bit must be unset at the start to avoid
64 this overflow case. The original numerator is saved so that the result
65 can be corrected after the reduced division completes. */
66 cmn \numerator, \divisor
67.ifc "", "\quotient"
68.ifc "\numerator", "\remainder"
69.if \return
70 bxcc lr
71.else
72 b 99f
73.endif
74.else
75 bcc 20f
76.endif
77.else
78 bcc 20f
79.endif
80 movs \tmp, \numerator
81 movmi \numerator, \numerator, lsr #1
82 mov \bits, #30
83.set shift, 16
84.rept 5
85 cmn \divisor, \numerator, lsr #shift
86 subcs \bits, \bits, #shift
87 movcs \divisor, \divisor, lsl #shift
88.set shift, shift >> 1
89.endr
90 adds \numerator, \numerator, \divisor
91 subcc \numerator, \numerator, \divisor
92 add pc, pc, \bits, lsl #3
93 nop
94.rept 30
95 adcs \numerator, \divisor, \numerator, lsl #1
96 subcc \numerator, \numerator, \divisor
97.endr
98 adc \numerator, \numerator, \numerator
99 movs \tmp, \tmp, asr #1
100 rsb \bits, \bits, #31
101 bmi 10f
102.ifc "", "\quotient"
103 mov \remainder, \numerator, lsr \bits
104.else
105.ifc "", "\remainder"
106 mov \divisor, \numerator, lsr \bits
107 eor \quotient, \numerator, \divisor, lsl \bits
108.else
109 mov \remainder, \numerator, lsr \bits
110 eor \quotient, \numerator, \remainder, lsl \bits
111.endif
112.endif
113.ifne \return
114 bx lr
115.else
116 b 99f
117.endif
11810:
119 mov \tmp, \numerator, lsr \bits
120 eor \numerator, \numerator, \tmp, lsl \bits
121 sub \bits, \bits, #1
122 adc \tmp, \tmp, \tmp
123 adds \tmp, \tmp, \divisor, asr \bits
124.ifnc "", "\quotient"
125 adc \quotient, \numerator, \numerator
126.endif
127.ifnc "", "\remainder"
128 subcc \remainder, \tmp, \divisor, asr \bits
129 movcs \remainder, \tmp
130.endif
131.ifne \return
132 bx lr
133.else
134 b 99f
135.endif
13620:
137.ifnc "", "\remainder"
138.ifnc "\remainder", "\numerator"
139 mov \remainder, \numerator
140.endif
141.endif
142.ifnc "", "\quotient"
143 mov \quotient, #0
144.endif
145.ifne \return
146 bx lr
147.else
14899:
149.endif
150.endm
151
152.macro ARMV4_SDIV32_BODY numerator, divisor, quotient, remainder, bits, sign, div0label, return
153 /* When this is wrapped for signed division, the wrapper code will handle
154 inverting the divisor, and also the zero divisor test. */
155 ARM_SDIV32_PRE \numerator, \divisor, \sign
156.ifnc "", "\div0label"
157 tst \divisor, \divisor
158 beq \div0label
159.endif
160 /* This SWAR divider requires a numerator less than 1<<31, because it must
161 be able to shift the remainder left at each step without shifting out
162 topmost bit. With signed inputs, whose absolute value may not exceed
163 1<<31,this may be accomplished simply by subtracting the divisor before
164 beginning division, and adding 1 to the quotient. */
165 adds \numerator, \numerator, \divisor
166 bcc 20f
167 mov \bits, #30
168.set shift, 16
169.rept 5
170 cmn \divisor, \numerator, lsr #shift
171 subcs \bits, \bits, #shift
172 movcs \divisor, \divisor, lsl #shift
173.set shift, shift >> 1
174.endr
175 adds \numerator, \numerator, \divisor
176 subcc \numerator, \numerator, \divisor
177 add pc, pc, \bits, lsl #3
178 nop
179.rept 30
180 adcs \numerator, \divisor, \numerator, lsl #1
181 subcc \numerator, \numerator, \divisor
182.endr
183 rsb \bits, \bits, #31
184 adc \numerator, \numerator, \numerator
185.ifc "", "\quotient"
186 mov \remainder, \numerator, lsr \bits
187.else
188.ifc "", "\remainder"
189 mov \divisor, \numerator, lsr \bits
190 add \numerator, \numerator, #1
191 sub \quotient, \numerator, \divisor, lsl \bits
192.else
193 mov \remainder, \numerator, lsr \bits
194 add \numerator, \numerator, #1
195 sub \quotient, \numerator, \remainder, lsl \bits
196.endif
197.endif
198.ifne \return
199 ARM_SDIV32_POST \quotient, \remainder, \sign
200 bx lr
201.else
202 b 99f
203.endif
20420:
205.ifnc "", "\remainder"
206 sub \remainder, \numerator, \divisor
207.endif
208.ifnc "", "\quotient"
209 mov \quotient, #0
210.endif
211.ifne \return
212 ARM_SDIV32_POST "", \remainder, \sign
213 bx lr
214.else
21599:
216 ARM_SDIV32_POST \quotient, \remainder, \sign
217.endif
218.endm
219
220#else
221.macro ARMV5_UDIV32_BODY numerator, divisor, quotient, remainder, bits, inv, neg, div0label, return
222 cmp \numerator, \divisor
223 clz \bits, \divisor
224 bcc 30f
225 mov \inv, \divisor, lsl \bits
226 add \neg, pc, \inv, lsr #25
227 /* Test whether divisor is 2^N */
228 cmp \inv, #1<<31
229 /* Load approximate reciprocal */
230 ldrhib \inv, [\neg, #.L_udiv_est_table-.-64]
231 bls 20f
232 subs \bits, \bits, #7
233 rsb \neg, \divisor, #0
234 /* Scale approximate reciprocal, or else branch to large-divisor path */
235 movpl \divisor, \inv, lsl \bits
236 bmi 10f
237 /* Newton-Raphson iteration to improve reciprocal accuracy */
238 mul \inv, \divisor, \neg
239 smlawt \divisor, \divisor, \inv, \divisor
240 mul \inv, \divisor, \neg
241 /* Complete N-R math and produce approximate quotient. Use smmla/smmul on
242 ARMv6. */
243#if ARM_ARCH >= 6
244 tst \numerator, \numerator
245 smmla \divisor, \divisor, \inv, \divisor
246 /* Branch to large-numerator handler, or else use smmul if sign bit is not
247 set. */
248 bmi 40f
249 smmul \inv, \numerator, \divisor
250#else
251 /* ARMv5e lacks smmul, so always uses umull. */
252 mov \bits, #0
253 smlal \bits, \divisor, \inv, \divisor
254 umull \bits, \inv, \numerator, \divisor
255#endif
256 /* Calculate remainder and correct result. */
257 add \numerator, \numerator, \neg
258.ifnc "", "\remainder"
259 mla \remainder, \inv, \neg, \numerator
260.ifnc "", "\quotient"
261 mov \quotient, \inv
262 cmn \remainder, \neg
263 subcs \remainder, \remainder, \neg
264 addpl \remainder, \remainder, \neg, lsl #1
265 addcc \quotient, \quotient, #1
266 addpl \quotient, \quotient, #2
267.else
268 cmn \remainder, \neg
269 subcs \remainder, \remainder, \neg
270 addpl \remainder, \remainder, \neg, lsl #1
271.endif
272.else
273 mla \divisor, \inv, \neg, \numerator
274 mov \quotient, \inv
275 cmn \divisor, \neg
276 addcc \quotient, \quotient, #1
277 addpl \quotient, \quotient, #2
278.endif
279.if \return
280 bx lr
281.else
282 b 99f
283.endif
28410:
285 /* Very large divisors can be handled without further improving the
286 reciprocal. First the reciprocal must be reduced to ensure that it
287 underestimates the correct value. */
288 rsb \bits, \bits, #0
289 sub \inv, \inv, #4
290 mov \divisor, \inv, lsr \bits
291 /* Calculate approximate quotient and remainder */
292 umull \bits, \inv, \numerator, \divisor
293 /* Correct quotient and remainder */
294.ifnc "", "\remainder"
295 mla \remainder, \inv, \neg, \numerator
296.ifnc "", "\quotient"
297 mov \quotient, \inv
298 cmn \neg, \remainder, lsr #1
299 addcs \remainder, \remainder, \neg, lsl #1
300 addcs \quotient, \quotient, #2
301 cmn \neg, \remainder
302 addcs \remainder, \remainder, \neg
303 addcs \quotient, \quotient, #1
304.else
305 cmn \neg, \remainder, lsr #1
306 addcs \remainder, \remainder, \neg, lsl #1
307 cmn \neg, \remainder
308 addcs \remainder, \remainder, \neg
309.endif
310.else
311 mla \divisor, \inv, \neg, \numerator
312 mov \quotient, \inv
313 cmn \neg, \divisor, lsr #1
314 addcs \divisor, \divisor, \neg, lsl #1
315 addcs \quotient, \quotient, #2
316 cmn \neg, \divisor
317 addcs \quotient, \quotient, #1
318.endif
319.if \return
320 bx lr
321.else
322 b 99f
323.endif
32420:
325 /* Handle division by powers of two by shifting right. Mod is handled
326 by using divisor-1 as a bitmask. */
327.ifnc "", "\remainder"
328.ifnc "", "\div0label"
329 bne \div0label
330.endif
331.ifnc "", "\quotient"
332 sub \divisor, \divisor, #1
333 rsb \bits, \bits, #31
334 and \remainder, \numerator, \divisor
335 mov \quotient, \numerator, lsr \bits
336.else
337 sub \divisor, \divisor, #1
338 and \remainder, \numerator, \divisor
339.endif
340.else
341 rsb \bits, \bits, #31
342.ifnc "", "\div0label"
343 bne \div0label
344.endif
345 mov \quotient, \numerator, lsr \bits
346.endif
347.if \return
348 bx lr
349.else
350 b 99f
351.endif
35230:
353 /* Handle numerator < divisor - quotient is zero, remainder is numerator,
354 which must be restored to its original value on ARMv6. */
355.ifnc "", "\remainder"
356 mov \remainder, \numerator
357.endif
358.ifnc "", "\quotient"
359 mov \quotient, #0
360.endif
361.if \return
362 bx lr
363.endif
364#if ARM_ARCH >= 6
36540:
366 /* Handle large (sign bit set) numerators. Works exactly as the ARMv5e code
367 above 10:. */
368 umull \bits, \inv, \numerator, \divisor
369 add \numerator, \numerator, \neg
370.ifnc "", "\remainder"
371 mla \remainder, \inv, \neg, \numerator
372.ifnc "", "\quotient"
373 mla \remainder, \inv, \neg, \numerator
374 mov \quotient, \inv
375 cmn \remainder, \neg
376 subcs \remainder, \remainder, \neg
377 addpl \remainder, \remainder, \neg, lsl #1
378 addcc \quotient, \quotient, #1
379 addpl \quotient, \quotient, #2
380.else
381 cmn \remainder, \neg
382 subcs \remainder, \remainder, \neg
383 addpl \remainder, \remainder, \neg, lsl #1
384.endif
385.else
386 mla \divisor, \inv, \neg, \numerator
387 mov \quotient, \inv
388 cmn \divisor, \neg
389 addcc \quotient, \quotient, #1
390 addpl \quotient, \quotient, #2
391.endif
392.if \return
393 bx lr
394.else
395 b 99f
396.endif
397#endif
39899:
399.endm
400
401.macro ARMV5_SDIV32_BODY numerator, divisor, quotient, remainder, bits, inv, neg, sign, div0label, return
402 /* sign[31] = divisor sign */
403 ands \sign, \divisor, #1<<31
404 rsbne \divisor, \divisor, #0
405 /* sign[31] = result sign, sign[0:30], C = numerator sign */
406 eors \sign, \sign, \numerator, asr #32
407 clz \bits, \divisor
408 rsbcs \numerator, \numerator, #0
409 /* On ARMv6, subtract divisor before performing division, which ensures
410 numerator sign bit is clear and smmul may be used in place of umull. The
411 fixup for the results can be fit entirely into existing delay slots on
412 the main division paths. It costs 1c in the num<div path if the
413 the remainder is to be produced in the numerator's register, and 1c in
414 the power-of-2-divisor path only if producing both remainder and
415 quotient. */
416#if ARM_ARCH >= 6
417 subs \numerator, \numerator, \divisor
418#else
419 cmp \numerator, \divisor
420#endif
421 movcs \inv, \divisor, lsl \bits
422 bcc 30f
423 /* Test whether divisor is 2^N */
424 cmp \inv, #1<<31
425 add \inv, pc, \inv, lsr #25
426 bls 20f
427 /* Load approximate reciprocal */
428 ldrb \inv, [\inv, #.L_udiv_est_table-.-64]
429 subs \bits, \bits, #7
430 rsb \neg, \divisor, #0
431 /* Scale approximate reciprocal, or else branch to large-divisor path */
432 movpl \divisor, \inv, lsl \bits
433 bmi 10f
434 /* Newton-Raphson iteration to improve reciprocal accuracy */
435 mul \inv, \divisor, \neg
436 smlawt \divisor, \divisor, \inv, \divisor
437 mul \inv, \divisor, \neg
438 /* Complete N-R math and produce approximate quotient. Use smmla/smmul on
439 ARMv6. */
440#if ARM_ARCH >= 6
441 smmla \divisor, \divisor, \inv, \divisor
442 smmul \inv, \numerator, \divisor
443#else
444 mov \bits, #0
445 smlal \bits, \divisor, \inv, \divisor
446 umull \bits, \inv, \numerator, \divisor
447#endif
448 /* Calculate remainder and correct quotient. */
449 add \numerator, \numerator, \neg
450.ifnc "", "\remainder"
451 mla \remainder, \inv, \neg, \numerator
452.ifnc "", "\quotient"
453#if ARM_ARCH >= 6
454 add \quotient, \inv, #1
455#else
456 mov \quotient, \inv
457#endif
458 cmn \remainder, \neg
459 subcs \remainder, \remainder, \neg
460 addpl \remainder, \remainder, \neg, lsl #1
461 addcc \quotient, \quotient, #1
462 addpl \quotient, \quotient, #2
463.else
464 cmn \remainder, \neg
465 subcs \remainder, \remainder, \neg
466 addpl \remainder, \remainder, \neg, lsl #1
467.endif
468.else
469 mla \divisor, \inv, \neg, \numerator
470#if ARM_ARCH >= 6
471 add \quotient, \inv, #1
472#else
473 mov \quotient, \inv
474#endif
475 cmn \divisor, \neg
476 addcc \quotient, \quotient, #1
477 addpl \quotient, \quotient, #2
478.endif
479 ARM_SDIV32_POST \quotient, \remainder, \sign
480.ifnc "", "\return"
481 \return
482.else
483 b 99f
484.endif
48510:
486 /* Very large divisors can be handled without further improving the
487 reciprocal. First the reciprocal must be reduced to ensure that it
488 underestimates the correct value. */
489 rsb \bits, \bits, #0
490 sub \inv, \inv, #4
491 mov \divisor, \inv, lsr \bits
492 /* Calculate approximate quotient and remainder */
493#if ARM_ARCH >= 6
494 smmul \inv, \numerator, \divisor
495#else
496 umull \bits, \inv, \numerator, \divisor
497#endif
498 /* Correct quotient and remainder */
499.ifnc "", "\remainder"
500 mla \remainder, \inv, \neg, \numerator
501.ifnc "", "\quotient"
502#if ARM_ARCH >= 6
503 add \quotient, \inv, #1
504#else
505 mov \quotient, \inv
506#endif
507 cmn \neg, \remainder, lsr #1
508 addcs \remainder, \remainder, \neg, lsl #1
509 addcs \quotient, \quotient, #2
510 cmn \neg, \remainder
511 addcs \remainder, \remainder, \neg
512 addcs \quotient, \quotient, #1
513.else
514 cmn \neg, \remainder, lsr #1
515 addcs \remainder, \remainder, \neg, lsl #1
516 cmn \neg, \remainder
517 addcs \remainder, \remainder, \neg
518.endif
519.else
520 mla \divisor, \inv, \neg, \numerator
521#if ARM_ARCH >= 6
522 add \quotient, \inv, #1
523#else
524 mov \quotient, \inv
525#endif
526 cmn \neg, \divisor, lsr #1
527 addcs \divisor, \divisor, \neg, lsl #1
528 addcs \quotient, \quotient, #2
529 cmn \neg, \divisor
530 addcs \quotient, \quotient, #1
531.endif
532 ARM_SDIV32_POST \quotient, \remainder, \sign
533.ifnc "", "\return"
534 \return
535.else
536 b 99f
537.endif
53820:
539 /* Handle division by powers of two by shifting right. Mod is handled
540 by using divisor-1 as a bitmask. */
541.ifnc "", "\div0label"
542 bne \div0label
543.endif
544.ifnc "", "\remainder"
545.ifnc "", "\quotient"
546 rsb \bits, \bits, #31
547#if ARM_ARCH >= 6
548 add \numerator, \numerator, \divisor
549#endif
550 sub \divisor, \divisor, #1
551 and \remainder, \numerator, \divisor
552 mov \quotient, \numerator, lsr \bits
553.else
554 sub \divisor, \divisor, #1
555 and \remainder, \numerator, \divisor
556.endif
557.else
558 rsb \bits, \bits, #31
559#if ARM_ARCH >= 6
560 add \numerator, \numerator, \divisor
561#endif
562 mov \quotient, \numerator, lsr \bits
563.endif
564 ARM_SDIV32_POST \quotient, \remainder, \sign
565.ifnc "", "\return"
566 \return
567.else
568 b 99f
569.endif
57030:
571 /* Handle numerator < divisor - quotient is zero, remainder is numerator,
572 which must be restored to its original value on ARMv6. */
573.ifnc "", "\remainder"
574#if ARM_ARCH >= 6
575 add \remainder, \numerator, \divisor
576#else
577.ifnc "\remainder", "\numerator"
578 mov \remainder, \numerator
579.endif
580#endif
581.endif
582.ifnc "", "\quotient"
583 mov \quotient, #0
584.endif
585 ARM_SDIV32_POST "", \remainder, \sign
586.ifnc "", "\return"
587 \return
588.endif
58999:
590.endm
591#endif
592
593 .section .text
594
595__div0_wrap_s:
596 sub sp, sp, #4
597 mov r0, #0
598 b __div0
599 .size __div0_wrap_s, . - __div0_wrap_s
600
601__div0_wrap:
602 str lr, [sp, #-4]!
603 mov r0, #0
604 b __div0
605 .size __div0_wrap, . - __div0_wrap
606
607#ifndef __ARM_EABI__
608 .global __divsi3
609 .type __divsi3,%function
610 .global __udivsi3
611 .type __udivsi3,%function
612 .global __udivsi3
613 .type __udivsi3,%function
614#else
615/* The div+mod averagess a fraction of a cycle worse for signed values, and
616 slightly better for unsigned, so just alias div to divmod. */
617 .global __aeabi_uidivmod
618 .type __aeabi_uidivmod,%function
619 .global __aeabi_uidiv
620 .type __aeabi_uidiv,%function
621 .set __aeabi_uidiv,__aeabi_uidivmod
622 .global __aeabi_idivmod
623 .type __aeabi_idivmod,%function
624 .global __aeabi_idiv
625 .type __aeabi_idiv,%function
626 .set __aeabi_idiv,__aeabi_idivmod
627#endif
628
629
630#if ARM_ARCH < 5
631 .global __clzsi2
632 .type __clzsi2, %function
633
634__clzsi2:
635 orr r0, r0, r0, lsr #8
636 orr r0, r0, r0, lsr #4
637 orr r0, r0, r0, lsr #2
638 orr r0, r0, r0, lsr #1
639 bic r0, r0, r0, lsr #16
640 rsb r0, r0, r0, lsl #14
641 rsb r0, r0, r0, lsl #11
642 rsb r0, r0, r0, lsl #9
643 ldrb r0, [pc, r0, lsr #26]
644 bx lr
645 .byte 32, 20, 19, 0, 0, 18, 0, 7, 10, 17, 0, 0, 14, 0, 6, 0
646 .byte 0, 9, 0, 16, 0, 0, 1, 26, 0, 13, 0, 0, 24, 5, 0, 0
647 .byte 0, 21, 0, 8, 11, 0, 15, 0, 0, 0, 0, 2, 27, 0, 25, 0
648 .byte 22, 0, 12, 0, 0, 3, 28, 0, 23, 0, 4, 29, 0, 0, 30, 31
649 .size __clzsi2, .-__clzsi2
650
651#ifndef __ARM_EABI__
652__udivsi3:
653 ARMV4_UDIV32_BODY r0, r1, r0, "", r2, r3, __div0_wrap, 1
654 .size __udivsi3, . - __udivsi3
655
656__divsi3:
657 ARMV4_SDIV32_BODY r0, r1, r0, "", r2, r3, __div0_wrap, 1
658 .size __divsi3, . - __divsi3
659
660#else
661__aeabi_uidivmod:
662 ARMV4_UDIV32_BODY r0, r1, r0, r1, r2, r3, __div0_wrap, 1
663 .size __aeabi_uidivmod, . - __aeabi_uidivmo
664
665__aeabi_idivmod
666 ARMV4_SDIV32_BODY r0, r1, r0, r1, r2, r3, __div0_wrap, 1
667 .size __aeabi_idivmod, . - __aeabi_idivmod
668#endif
669
670#else
671#ifndef __ARM_EABI__
672__udivsi3:
673 ARMV5_UDIV32_BODY r0, r1, r0, "", r2, r3, ip, __div0_wrap, 1
674 .size __udivsi3, . - __udivsi3
675
676__divsi3:
677 ARMV5_SDIV32_BODY r0, r1, r0, "", r2, lr, ip, r3, __div0_wrap_s, "ldr pc, [sp, #-4]"
678 .size __divsi3, . - __divsi3
679
680#else
681__aeabi_uidivmod:
682 ARMV5_UDIV32_BODY r0, r1, r0, r1, r2, r3, ip, __div0_wrap, 1
683 .size __aeabi_uidivmod, . - __aeabi_uidivmo
684
685__aeabi_idivmod
686 ARMV5_SDIV32_BODY r0, r1, r0, r1, r2, lr, ip, r3, __div0_wrap_s, "ldr pc, [sp, #-4]"
687 .size __aeabi_idivmod, . - __aeabi_idivmod
688#endif
689
690.L_udiv_est_table:
691 .byte 0xff, 0xfc, 0xf8, 0xf4, 0xf0, 0xed, 0xea, 0xe6
692 .byte 0xe3, 0xe0, 0xdd, 0xda, 0xd7, 0xd4, 0xd2, 0xcf
693 .byte 0xcc, 0xca, 0xc7, 0xc5, 0xc3, 0xc0, 0xbe, 0xbc
694 .byte 0xba, 0xb8, 0xb6, 0xb4, 0xb2, 0xb0, 0xae, 0xac
695 .byte 0xaa, 0xa8, 0xa7, 0xa5, 0xa3, 0xa2, 0xa0, 0x9f
696 .byte 0x9d, 0x9c, 0x9a, 0x99, 0x97, 0x96, 0x94, 0x93
697 .byte 0x92, 0x90, 0x8f, 0x8e, 0x8d, 0x8c, 0x8a, 0x89
698 .byte 0x88, 0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81
699#endif