summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Mahone <andrew.mahone@gmail.com>2009-12-31 08:32:15 +0000
committerAndrew Mahone <andrew.mahone@gmail.com>2009-12-31 08:32:15 +0000
commit822abc12360900030323560b92a440f425b5641a (patch)
tree037ba9d25b25a1ca842ef66ddbfe2ce9470a7c0d
parentbecdbaa12d58850efa65da9a3f623795aed8acfb (diff)
downloadrockbox-822abc12360900030323560b92a440f425b5641a.tar.gz
rockbox-822abc12360900030323560b92a440f425b5641a.zip
Add 31/31-bit unsigned division in apps/codecs/lib/udiv_arm.S, with 2 cycles / iteration, falling back to previous 32-bit, 3 cycle / iteration code when needed (well under 1% of divisions in sample file). APE normal sample is now 96.90% realtime, approx 1.3% improved vs svn. TODO: unify divisor normalization for both trial subtraction routines, possibly use divisor bits to select 31- vs 32-bit division.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@24130 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/lib/udiv32_armv4.S54
1 files changed, 52 insertions, 2 deletions
diff --git a/apps/codecs/lib/udiv32_armv4.S b/apps/codecs/lib/udiv32_armv4.S
index 6b34cae1b3..6921c7fbd1 100644
--- a/apps/codecs/lib/udiv32_armv4.S
+++ b/apps/codecs/lib/udiv32_armv4.S
@@ -8,6 +8,7 @@
8 * $Id$ 8 * $Id$
9 * 9 *
10 * Copyright (C) 2008 by Jens Arnold 10 * Copyright (C) 2008 by Jens Arnold
11 * Copyright (C) 2009 by Andrew Mahone
11 * 12 *
12 * Optimised unsigned integer division for ARMv4 13 * Optimised unsigned integer division for ARMv4
13 * 14 *
@@ -30,7 +31,48 @@
30/* Codecs should not normally do this, but we need to check a macro, and 31/* Codecs should not normally do this, but we need to check a macro, and
31 * codecs.h would confuse the assembler. */ 32 * codecs.h would confuse the assembler. */
32 33
33.macro ARM_DIV_BODY dividend, divisor, result, curbit 34/* Adapted from an algorithm given in ARM System Developer's Guide (7.3.1.2)
35 for dividing a 30-bit value by a 15-bit value, with two operations per
36 iteration by storing quotient and remainder together and adding the previous
37 quotient bit during trial subtraction. Modified to work with any dividend
38 and divisor both less than 1 << 30, and skipping trials by calculating bits
39 in output.
40*/
41.macro ARM_DIV_31_BODY dividend, divisor, result, bits, curbit, quotient
42
43 mov \bits, #1
44 cmp \divisor, \dividend, lsr #16
45 movls \divisor, \divisor, lsl #16
46 addls \bits, \bits, #16
47 cmp \divisor, \dividend, lsr #8
48 movls \divisor, \divisor, lsl #8
49 addls \bits, \bits, #8
50 cmp \divisor, \dividend, lsr #4
51 movls \divisor, \divisor, lsl #4
52 addls \bits, \bits, #4
53 cmp \divisor, \dividend, lsr #2
54 movls \divisor, \divisor, lsl #2
55 addls \bits, \bits, #2
56 cmp \divisor, \dividend, lsr #1
57 movls \divisor, \divisor, lsl #1
58 addls \bits, \bits, #1
59 rsb \divisor, \divisor, #0
60 adds \result, \dividend, \divisor
61 subcc \result, \result, \divisor
62 rsb \curbit, \bits, #31
63 add pc, pc, \curbit, lsl #3
64 nop
65 .rept 30
66 adcs \result, \divisor, \result, lsl #1
67 subcc \result, \result, \divisor
68 .endr
69 /* shift remainder/quotient left one, add final quotient bit */
70 adc \result, \result, \result
71 mov \dividend, \result, lsr \bits
72 eor \quotient, \result, \dividend, lsl \bits
73.endm
74
75.macro ARM_DIV_32_BODY dividend, divisor, result, curbit
34 76
35 mov \result, \dividend 77 mov \result, \dividend
36 mov \curbit, #90 @ 3 * 30, (calculating branch dest) 78 mov \curbit, #90 @ 3 * 30, (calculating branch dest)
@@ -93,8 +135,16 @@ udiv32_arm:
93 bls 10f 135 bls 10f
94 tst r1, r2 136 tst r1, r2
95 beq 30f 137 beq 30f
138 tst r0, r0
139 /* High bit must be unset, otherwise use ARM_DIV_32_BODY. High bit of
140 divisor is also unset dividend has been tested to be >= divisor.
141 */
142 bmi 5f
143 ARM_DIV_31_BODY r0, r1, r2, r3, ip, r0
144 bx lr
96 145
97 ARM_DIV_BODY r0, r1, r2, r3 1465:
147 ARM_DIV_32_BODY r0, r1, r2, r3
98 mov r0, r2 148 mov r0, r2
99 bx lr 149 bx lr
100 150