Move udiv32_arm.S into libdemac, as this divider is specialized for the APE codec and an optimized divider is already provided for general use in codeclib.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@24506 a1c6a512-1295-4272-9138-f99709370657
author: Andrew Mahone <andrew.mahone@gmail.com> 2010-02-04 05:49:37 +0000
committer: Andrew Mahone <andrew.mahone@gmail.com> 2010-02-04 05:49:37 +0000
commit: 8ed7bda64cb98d491431fd130eb754c6320441a0 (patch)
tree: be9b6340aaaba3820e87a57c7c3db545c9c2cf15 /apps/codecs/demac/libdemac/udiv32_arm.S
parent: 7ed87517f734d7d70ab6f294735a77a65bd22e42 (diff)
download: rockbox-8ed7bda64cb98d491431fd130eb754c6320441a0.tar.gz
rockbox-8ed7bda64cb98d491431fd130eb754c6320441a0.zip
1 files changed, 300 insertions, 0 deletions
diff --git a/apps/codecs/demac/libdemac/udiv32_arm.S b/apps/codecs/demac/libdemac/udiv32_arm.S
new file mode 100644
index 0000000000..4492492d30
--- /dev/null
+++ b/apps/codecs/demac/libdemac/udiv32_arm.S
@@ -0,0 +1,300 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2008 by Jens Arnold
+ * Copyright (C) 2009 by Andrew Mahone
+ *
+ * Optimised unsigned integer division for ARMv4
+ *
+ * Based on: libgcc routines for ARM cpu, additional algorithms from ARM System
+ *           Developer's Guide
+ * Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
+ * Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005
+ * Free Software Foundation, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+#include "config.h"
+/* Codecs should not normally do this, but we need to check a macro, and
+ * codecs.h would confuse the assembler. */
+#ifdef USE_IRAM
+#define DIV_RECIP
+    .section    .icode,"ax",%progbits
+#else
+    .text
+#endif
+    .align
+    .global udiv32_arm
+    .type   udiv32_arm,%function
+#if ARM_ARCH < 5
+/* Adapted from an algorithm given in ARM System Developer's Guide (7.3.1.2)
+   for dividing a 30-bit value by a 15-bit value, with two operations per
+   iteration by storing quotient and remainder together and adding the previous
+   quotient bit during trial subtraction. Modified to work with any dividend
+   and divisor both less than 1 << 30, and skipping trials by calculating bits
+   in output. */
+.macro ARM_DIV_31_BODY dividend, divisor, result, bits, curbit, quotient, remainder
+    mov     \bits, #1
+    /* Shift the divisor left until it aligns with the numerator. If it already
+       has the high bit set, this is fine, everything inside .rept will be
+       skipped, and the add before and adcs after will set the one-bit result
+       to zero. */
+    cmn     \divisor, \dividend, lsr #16
+    movcs   \divisor, \divisor, lsl #16
+    addcs   \bits, \bits, #16
+    cmn     \divisor, \dividend, lsr #8
+    movcs   \divisor, \divisor, lsl #8
+    addcs   \bits, \bits, #8
+    cmn     \divisor, \dividend, lsr #4
+    movcs   \divisor, \divisor, lsl #4
+    addcs   \bits, \bits, #4
+    cmn     \divisor, \dividend, lsr #2
+    movcs   \divisor, \divisor, lsl #2
+    addcs   \bits, \bits, #2
+    cmn     \divisor, \dividend, lsr #1
+    movcs   \divisor, \divisor, lsl #1
+    addcs   \bits, \bits, #1
+    adds    \result, \dividend, \divisor
+    subcc   \result, \result, \divisor
+    rsb     \curbit, \bits, #31
+    add     pc, pc, \curbit, lsl #3
+    nop
+    .rept   30
+    adcs    \result, \divisor, \result, lsl #1
+    /* Fix the remainder portion of the result. This must be done because the
+       handler for 32-bit numerators needs the remainder. */
+    subcc   \result, \result, \divisor
+    .endr
+    /* Shift remainder/quotient left one, add final quotient bit */
+    adc     \result, \result, \result
+    mov     \remainder, \result, lsr \bits
+    eor     \quotient, \result, \remainder, lsl \bits
+.endm
+#ifdef CPU_PP
+#if CONFIG_CPU == PP5020
+.set recip_max, 8384
+#elif CONFIG_CPU == PP5002
+.set recip_max, 4608
+#else
+.set recip_max, 16384
+#endif
+#elif CONFIG_CPU == AS3525
+.set recip_max, 42752
+#elif CONFIG_CPU == S5L8701
+.set recip_max, 12800
+#elif CONFIG_CPU == S5L8700
+.set recip_max, 9088
+#endif
+udiv32_arm:
+#ifdef DIV_RECIP
+    cmp     r1, #3
+    bcc     .L_udiv_tiny
+    cmp     r1, #recip_max
+    bhi     .L_udiv
+    adr     r3, .L_udiv_recip_table-12
+    ldr     r2, [r3, r1, lsl #2]
+    mov     r3, r0
+    umull   ip, r0, r2, r0
+    mul     r2, r0, r1
+    cmp     r3, r2
+    bxcs    lr
+    sub     r0, r0, #1
+    bx      lr
+.L_udiv_tiny:
+    cmp     r1, #1
+    movhi   r0, r0, lsr #1
+    bxcs    lr
+    b       .L_div0
+#endif
+.L_udiv:
+    /* Invert divisor. ARM_DIV_31_BODY uses adc to both subtract the divisor
+       and add the next bit of the result. The correction code at .L_udiv32
+       does not need the divisor inverted, but can be modified to work with it,
+       and this allows the zero divisor test to be done early and without an
+       explicit comparison. */
+    rsbs    r1, r1, #0
+#ifndef DIV_RECIP
+    beq .L_div0
+#endif
+    tst     r0, r0
+    /* High bit must be unset, otherwise shift numerator right, calculate,
+       and correct results. As this case is very uncommon we want to avoid
+       any other delays on the main path in handling it, so the long divide
+       calls the short divide as a function. */
+    bmi     .L_udiv32
+.L_udiv31:
+    ARM_DIV_31_BODY r0, r1, r2, r3, ip, r0, r1
+    bx      lr
+.L_udiv32:
+    /* store original numerator and divisor, we'll need them to correct the
+       result, */
+    stmdb   sp, { r0, r1, lr }
+    /* Call __div0 here if divisor is zero, otherwise it would report the wrong
+       address. */
+    mov     r0, r0, lsr #1
+    bl      .L_udiv31
+    ldmdb   sp, { r2, r3, lr }
+    /* Move the low bit of the original numerator to the carry bit */
+    movs    r2, r2, lsr #1
+    /* Shift the remainder left one and add in the carry bit */
+    adc     r1, r1, r1
+    /* Subtract the original divisor from the remainder, setting carry if the
+       result is non-negative */
+    adds    r1, r1, r3
+    /* Shift quotient left one and add carry bit */
+    adc     r0, r0, r0
+    bx      lr
+.L_div0:
+    /* __div0 expects the calling address on the top of the stack */
+    stmdb sp!, { lr }
+    mov     r0, #0
+#if defined(__ARM_EABI__) || !defined(USE_IRAM)
+    bl      __div0
+#else
+    ldr     pc, [pc, #-4]
+    .word   __div0
+#endif
+#ifdef DIV_RECIP
+.L_udiv_recip_table:
+    .set div, 3
+    .rept recip_max - 2
+        .if (div - 1) & div
+            .set q, 0x40000000 / div
+            .set r, (0x40000000 - (q * div))<<1
+            .set q, q << 1
+            .if r >= div
+                .set q, q + 1
+                .set r, r - div
+            .endif
+            .set r, r << 1
+            .set q, q << 1
+            .if r >= div
+                .set q, q + 1
+                .set r, r - div
+            .endif
+            .set q, q + 1
+        .else
+            .set q, 0x40000000 / div * 4
+        .endif
+        .word q
+        .set div, div+1
+    .endr
+#endif
+    .size udiv32_arm, . - udiv32_arm
+#else
+.macro ARMV5_UDIV32_BODY numerator, divisor, quotient, bits, inv, neg, div0label
+    cmp     \numerator, \divisor
+    clz     \bits, \divisor
+    bcc     30f
+    mov     \inv, \divisor, lsl \bits
+    add     \neg, pc, \inv, lsr #25
+    cmp     \inv, #1<<31
+    ldrhib  \inv, [\neg, #.L_udiv_est_table-.-64]
+    bls     20f
+    subs    \bits, \bits, #7
+    rsb     \neg, \divisor, #0
+    movpl   \divisor, \inv, lsl \bits
+    bmi     10f
+    mul     \inv, \divisor, \neg
+    smlawt  \divisor, \divisor, \inv, \divisor
+    mul     \inv, \divisor, \neg
+    /* This will save a cycle on ARMv6, but does not produce a correct result
+       if numerator sign bit is set. This case accounts for about 1 in 10^7 of
+       divisions, done by the APE decoder, so we specialize for the more common
+       case and handle the uncommon large-numerator separately */
+#if ARM_ARCH >= 6
+    tst     \numerator, \numerator
+    smmla   \divisor, \divisor, \inv, \divisor
+    bmi     40f
+    smmul   \inv, \numerator, \divisor
+#else
+    mov     \bits, #0
+    smlal   \bits, \divisor, \inv, \divisor
+    umull   \bits, \inv, \numerator, \divisor
+#endif
+    add     \numerator, \numerator, \neg
+    mla     \divisor, \inv, \neg, \numerator
+    mov     \quotient, \inv
+    cmn     \divisor, \neg
+    addcc   \quotient, \quotient, #1
+    addpl   \quotient, \quotient, #2
+    bx      lr
+10:
+    rsb     \bits, \bits, #0
+    sub     \inv, \inv, #4
+    mov     \divisor, \inv, lsr \bits
+    umull   \bits, \inv, \numerator, \divisor
+    mla     \divisor, \inv, \neg, \numerator
+    mov     \quotient, \inv
+    cmn     \neg, \divisor, lsr #1
+    addcs   \divisor, \divisor, \neg, lsl #1
+    addcs   \quotient, \quotient, #2
+    cmn     \neg, \divisor
+    addcs   \quotient, \quotient, #1
+    bx      lr
+20:
+.ifnc "", "\div0label"
+    rsb     \bits, \bits, #31
+    bne     \div0label
+.endif
+    mov     \quotient, \numerator, lsr \bits
+    bx      lr
+30:
+    mov     \quotient, #0
+    bx      lr
+#if ARM_ARCH >= 6
+40:
+    umull   \bits, \inv, \numerator, \divisor
+    add     \numerator, \numerator, \neg
+    mla     \divisor, \inv, \neg, \numerator
+    mov     \quotient, \inv
+    cmn     \divisor, \neg
+    addcc   \quotient, \quotient, #1
+    addpl   \quotient, \quotient, #2
+    bx      lr
+#endif
+.endm
+udiv32_arm:
+    ARMV5_UDIV32_BODY r0, r1, r0, r2, r3, ip, .L_div0
+.L_div0:
+    /* __div0 expects the calling address on the top of the stack */
+    stmdb sp!, { lr }
+    mov     r0, #0
+#if defined(__ARM_EABI__) || !defined(USE_IRAM)
+    bl      __div0
+#else
+    ldr     pc, [pc, #-4]
+    .word   __div0
+#endif
+.L_udiv_est_table:
+    .byte 0xff, 0xfc, 0xf8, 0xf4, 0xf0, 0xed, 0xea, 0xe6
+    .byte 0xe3, 0xe0, 0xdd, 0xda, 0xd7, 0xd4, 0xd2, 0xcf
+    .byte 0xcc, 0xca, 0xc7, 0xc5, 0xc3, 0xc0, 0xbe, 0xbc
+    .byte 0xba, 0xb8, 0xb6, 0xb4, 0xb2, 0xb0, 0xae, 0xac
+    .byte 0xaa, 0xa8, 0xa7, 0xa5, 0xa3, 0xa2, 0xa0, 0x9f
+    .byte 0x9d, 0x9c, 0x9a, 0x99, 0x97, 0x96, 0x94, 0x93
+    .byte 0x92, 0x90, 0x8f, 0x8e, 0x8d, 0x8c, 0x8a, 0x89
+    .byte 0x88, 0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81
+#endif
+    .size udiv32_arm, . - udiv32_arm
author	Andrew Mahone <andrew.mahone@gmail.com>	2010-02-04 05:49:37 +0000
committer	Andrew Mahone <andrew.mahone@gmail.com>	2010-02-04 05:49:37 +0000
commit	8ed7bda64cb98d491431fd130eb754c6320441a0 (patch)
tree	be9b6340aaaba3820e87a57c7c3db545c9c2cf15 /apps/codecs/demac/libdemac/udiv32_arm.S
parent	7ed87517f734d7d70ab6f294735a77a65bd22e42 (diff)
download	rockbox-8ed7bda64cb98d491431fd130eb754c6320441a0.tar.gz rockbox-8ed7bda64cb98d491431fd130eb754c6320441a0.zip

diff --git a/apps/codecs/demac/libdemac/udiv32_arm.S b/apps/codecs/demac/libdemac/udiv32_arm.S new file mode 100644 index 0000000000..4492492d30 --- /dev/null +++ b/apps/codecs/demac/libdemac/udiv32_arm.S
@@ -0,0 +1,300 @@
	1	/***************************************************************************
	2	* __________ __ ___.
	3	* Open \______ \ ____ ____ \| \| _\_ \|__ _______ ___
	4	* Source \| _// _ \_/ ___\\| \|/ /\| __ \ / _ \ \/ /
	5	* Jukebox \| \| ( <_> ) \___\| < \| \_\ ( <_> > < <
	6	* Firmware \|____\|_ /\____/ \___ >__\|_ \\|___ /\____/__/\_ \
	7	* \/ \/ \/ \/ \/
	8	* $Id$
	9	*
	10	* Copyright (C) 2008 by Jens Arnold
	11	* Copyright (C) 2009 by Andrew Mahone
	12	*
	13	* Optimised unsigned integer division for ARMv4
	14	*
	15	* Based on: libgcc routines for ARM cpu, additional algorithms from ARM System
	16	* Developer's Guide
	17	* Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
	18	* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005
	19	* Free Software Foundation, Inc.
	20	*
	21	* This program is free software; you can redistribute it and/or
	22	* modify it under the terms of the GNU General Public License
	23	* as published by the Free Software Foundation; either version 2
	24	* of the License, or (at your option) any later version.
	25	*
	26	* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
	27	* KIND, either express or implied.
	28	*
	29	****************************************************************************/
	30
	31	#include "config.h"
	32	/* Codecs should not normally do this, but we need to check a macro, and
	33	* codecs.h would confuse the assembler. */
	34
	35	#ifdef USE_IRAM
	36	#define DIV_RECIP
	37	.section .icode,"ax",%progbits
	38	#else
	39	.text
	40	#endif
	41	.align
	42	.global udiv32_arm
	43	.type udiv32_arm,%function
	44
	45	#if ARM_ARCH < 5
	46	/* Adapted from an algorithm given in ARM System Developer's Guide (7.3.1.2)
	47	for dividing a 30-bit value by a 15-bit value, with two operations per
	48	iteration by storing quotient and remainder together and adding the previous
	49	quotient bit during trial subtraction. Modified to work with any dividend
	50	and divisor both less than 1 << 30, and skipping trials by calculating bits
	51	in output. */
	52	.macro ARM_DIV_31_BODY dividend, divisor, result, bits, curbit, quotient, remainder
	53
	54	mov \bits, #1
	55	/* Shift the divisor left until it aligns with the numerator. If it already
	56	has the high bit set, this is fine, everything inside .rept will be
	57	skipped, and the add before and adcs after will set the one-bit result
	58	to zero. */
	59	cmn \divisor, \dividend, lsr #16
	60	movcs \divisor, \divisor, lsl #16
	61	addcs \bits, \bits, #16
	62	cmn \divisor, \dividend, lsr #8
	63	movcs \divisor, \divisor, lsl #8
	64	addcs \bits, \bits, #8
	65	cmn \divisor, \dividend, lsr #4
	66	movcs \divisor, \divisor, lsl #4
	67	addcs \bits, \bits, #4
	68	cmn \divisor, \dividend, lsr #2
	69	movcs \divisor, \divisor, lsl #2
	70	addcs \bits, \bits, #2
	71	cmn \divisor, \dividend, lsr #1
	72	movcs \divisor, \divisor, lsl #1
	73	addcs \bits, \bits, #1
	74	adds \result, \dividend, \divisor
	75	subcc \result, \result, \divisor
	76	rsb \curbit, \bits, #31
	77	add pc, pc, \curbit, lsl #3
	78	nop
	79	.rept 30
	80	adcs \result, \divisor, \result, lsl #1
	81	/* Fix the remainder portion of the result. This must be done because the
	82	handler for 32-bit numerators needs the remainder. */
	83	subcc \result, \result, \divisor
	84	.endr
	85	/* Shift remainder/quotient left one, add final quotient bit */
	86	adc \result, \result, \result
	87	mov \remainder, \result, lsr \bits
	88	eor \quotient, \result, \remainder, lsl \bits
	89	.endm
	90
	91	#ifdef CPU_PP
	92	#if CONFIG_CPU == PP5020
	93	.set recip_max, 8384
	94	#elif CONFIG_CPU == PP5002
	95	.set recip_max, 4608
	96	#else
	97	.set recip_max, 16384
	98	#endif
	99	#elif CONFIG_CPU == AS3525
	100	.set recip_max, 42752
	101	#elif CONFIG_CPU == S5L8701
	102	.set recip_max, 12800
	103	#elif CONFIG_CPU == S5L8700
	104	.set recip_max, 9088
	105	#endif
	106
	107	udiv32_arm:
	108	#ifdef DIV_RECIP
	109	cmp r1, #3
	110	bcc .L_udiv_tiny
	111	cmp r1, #recip_max
	112	bhi .L_udiv
	113	adr r3, .L_udiv_recip_table-12
	114	ldr r2, [r3, r1, lsl #2]
	115	mov r3, r0
	116	umull ip, r0, r2, r0
	117	mul r2, r0, r1
	118	cmp r3, r2
	119	bxcs lr
	120	sub r0, r0, #1
	121	bx lr
	122	.L_udiv_tiny:
	123	cmp r1, #1
	124	movhi r0, r0, lsr #1
	125	bxcs lr
	126	b .L_div0
	127	#endif
	128	.L_udiv:
	129	/* Invert divisor. ARM_DIV_31_BODY uses adc to both subtract the divisor
	130	and add the next bit of the result. The correction code at .L_udiv32
	131	does not need the divisor inverted, but can be modified to work with it,
	132	and this allows the zero divisor test to be done early and without an
	133	explicit comparison. */
	134	rsbs r1, r1, #0
	135	#ifndef DIV_RECIP
	136	beq .L_div0
	137	#endif
	138	tst r0, r0
	139	/* High bit must be unset, otherwise shift numerator right, calculate,
	140	and correct results. As this case is very uncommon we want to avoid
	141	any other delays on the main path in handling it, so the long divide
	142	calls the short divide as a function. */
	143	bmi .L_udiv32
	144	.L_udiv31:
	145	ARM_DIV_31_BODY r0, r1, r2, r3, ip, r0, r1
	146	bx lr
	147	.L_udiv32:
	148	/* store original numerator and divisor, we'll need them to correct the
	149	result, */
	150	stmdb sp, { r0, r1, lr }
	151	/* Call __div0 here if divisor is zero, otherwise it would report the wrong
	152	address. */
	153	mov r0, r0, lsr #1
	154	bl .L_udiv31
	155	ldmdb sp, { r2, r3, lr }
	156	/* Move the low bit of the original numerator to the carry bit */
	157	movs r2, r2, lsr #1
	158	/* Shift the remainder left one and add in the carry bit */
	159	adc r1, r1, r1
	160	/* Subtract the original divisor from the remainder, setting carry if the
	161	result is non-negative */
	162	adds r1, r1, r3
	163	/* Shift quotient left one and add carry bit */
	164	adc r0, r0, r0
	165	bx lr
	166	.L_div0:
	167	/* __div0 expects the calling address on the top of the stack */
	168	stmdb sp!, { lr }
	169	mov r0, #0
	170	#if defined(__ARM_EABI__) \|\| !defined(USE_IRAM)
	171	bl __div0
	172	#else
	173	ldr pc, [pc, #-4]
	174	.word __div0
	175	#endif
	176	#ifdef DIV_RECIP
	177	.L_udiv_recip_table:
	178	.set div, 3
	179	.rept recip_max - 2
	180	.if (div - 1) & div
	181	.set q, 0x40000000 / div
	182	.set r, (0x40000000 - (q * div))<<1
	183	.set q, q << 1
	184	.if r >= div
	185	.set q, q + 1
	186	.set r, r - div
	187	.endif
	188	.set r, r << 1
	189	.set q, q << 1
	190	.if r >= div
	191	.set q, q + 1
	192	.set r, r - div
	193	.endif
	194	.set q, q + 1
	195	.else
	196	.set q, 0x40000000 / div * 4
	197	.endif
	198	.word q
	199	.set div, div+1
	200	.endr
	201	#endif
	202	.size udiv32_arm, . - udiv32_arm
	203
	204	#else
	205	.macro ARMV5_UDIV32_BODY numerator, divisor, quotient, bits, inv, neg, div0label
	206	cmp \numerator, \divisor
	207	clz \bits, \divisor
	208	bcc 30f
	209	mov \inv, \divisor, lsl \bits
	210	add \neg, pc, \inv, lsr #25
	211	cmp \inv, #1<<31
	212	ldrhib \inv, [\neg, #.L_udiv_est_table-.-64]
	213	bls 20f
	214	subs \bits, \bits, #7
	215	rsb \neg, \divisor, #0
	216	movpl \divisor, \inv, lsl \bits
	217	bmi 10f
	218	mul \inv, \divisor, \neg
	219	smlawt \divisor, \divisor, \inv, \divisor
	220	mul \inv, \divisor, \neg
	221	/* This will save a cycle on ARMv6, but does not produce a correct result
	222	if numerator sign bit is set. This case accounts for about 1 in 10^7 of
	223	divisions, done by the APE decoder, so we specialize for the more common
	224	case and handle the uncommon large-numerator separately */
	225	#if ARM_ARCH >= 6
	226	tst \numerator, \numerator
	227	smmla \divisor, \divisor, \inv, \divisor
	228	bmi 40f
	229	smmul \inv, \numerator, \divisor
	230	#else
	231	mov \bits, #0
	232	smlal \bits, \divisor, \inv, \divisor
	233	umull \bits, \inv, \numerator, \divisor
	234	#endif
	235	add \numerator, \numerator, \neg
	236	mla \divisor, \inv, \neg, \numerator
	237	mov \quotient, \inv
	238	cmn \divisor, \neg
	239	addcc \quotient, \quotient, #1
	240	addpl \quotient, \quotient, #2
	241	bx lr
	242	10:
	243	rsb \bits, \bits, #0
	244	sub \inv, \inv, #4
	245	mov \divisor, \inv, lsr \bits
	246	umull \bits, \inv, \numerator, \divisor
	247	mla \divisor, \inv, \neg, \numerator
	248	mov \quotient, \inv
	249	cmn \neg, \divisor, lsr #1
	250	addcs \divisor, \divisor, \neg, lsl #1
	251	addcs \quotient, \quotient, #2
	252	cmn \neg, \divisor
	253	addcs \quotient, \quotient, #1
	254	bx lr
	255	20:
	256	.ifnc "", "\div0label"
	257	rsb \bits, \bits, #31
	258	bne \div0label
	259	.endif
	260	mov \quotient, \numerator, lsr \bits
	261	bx lr
	262	30:
	263	mov \quotient, #0
	264	bx lr
	265	#if ARM_ARCH >= 6
	266	40:
	267	umull \bits, \inv, \numerator, \divisor
	268	add \numerator, \numerator, \neg
	269	mla \divisor, \inv, \neg, \numerator
	270	mov \quotient, \inv
	271	cmn \divisor, \neg
	272	addcc \quotient, \quotient, #1
	273	addpl \quotient, \quotient, #2
	274	bx lr
	275	#endif
	276	.endm
	277
	278	udiv32_arm:
	279	ARMV5_UDIV32_BODY r0, r1, r0, r2, r3, ip, .L_div0
	280	.L_div0:
	281	/* __div0 expects the calling address on the top of the stack */
	282	stmdb sp!, { lr }
	283	mov r0, #0
	284	#if defined(__ARM_EABI__) \|\| !defined(USE_IRAM)
	285	bl __div0
	286	#else
	287	ldr pc, [pc, #-4]
	288	.word __div0
	289	#endif
	290	.L_udiv_est_table:
	291	.byte 0xff, 0xfc, 0xf8, 0xf4, 0xf0, 0xed, 0xea, 0xe6
	292	.byte 0xe3, 0xe0, 0xdd, 0xda, 0xd7, 0xd4, 0xd2, 0xcf
	293	.byte 0xcc, 0xca, 0xc7, 0xc5, 0xc3, 0xc0, 0xbe, 0xbc
	294	.byte 0xba, 0xb8, 0xb6, 0xb4, 0xb2, 0xb0, 0xae, 0xac
	295	.byte 0xaa, 0xa8, 0xa7, 0xa5, 0xa3, 0xa2, 0xa0, 0x9f
	296	.byte 0x9d, 0x9c, 0x9a, 0x99, 0x97, 0x96, 0x94, 0x93
	297	.byte 0x92, 0x90, 0x8f, 0x8e, 0x8d, 0x8c, 0x8a, 0x89
	298	.byte 0x88, 0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81
	299	#endif
	300	.size udiv32_arm, . - udiv32_arm