diff options
author | Andrew Mahone <andrew.mahone@gmail.com> | 2009-12-31 08:32:15 +0000 |
---|---|---|
committer | Andrew Mahone <andrew.mahone@gmail.com> | 2009-12-31 08:32:15 +0000 |
commit | 822abc12360900030323560b92a440f425b5641a (patch) | |
tree | 037ba9d25b25a1ca842ef66ddbfe2ce9470a7c0d /apps/codecs/lib/udiv32_armv4.S | |
parent | becdbaa12d58850efa65da9a3f623795aed8acfb (diff) | |
download | rockbox-822abc12360900030323560b92a440f425b5641a.tar.gz rockbox-822abc12360900030323560b92a440f425b5641a.zip |
Add 31/31-bit unsigned division in apps/codecs/lib/udiv_arm.S, with 2 cycles / iteration, falling back to previous 32-bit, 3 cycle / iteration code when needed (well under 1% of divisions in sample file). APE normal sample is now 96.90% realtime, approx 1.3% improved vs svn. TODO: unify divisor normalization for both trial subtraction routines, possibly use divisor bits to select 31- vs 32-bit division.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@24130 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs/lib/udiv32_armv4.S')
-rw-r--r-- | apps/codecs/lib/udiv32_armv4.S | 54 |
1 files changed, 52 insertions, 2 deletions
diff --git a/apps/codecs/lib/udiv32_armv4.S b/apps/codecs/lib/udiv32_armv4.S index 6b34cae1b3..6921c7fbd1 100644 --- a/apps/codecs/lib/udiv32_armv4.S +++ b/apps/codecs/lib/udiv32_armv4.S | |||
@@ -8,6 +8,7 @@ | |||
8 | * $Id$ | 8 | * $Id$ |
9 | * | 9 | * |
10 | * Copyright (C) 2008 by Jens Arnold | 10 | * Copyright (C) 2008 by Jens Arnold |
11 | * Copyright (C) 2009 by Andrew Mahone | ||
11 | * | 12 | * |
12 | * Optimised unsigned integer division for ARMv4 | 13 | * Optimised unsigned integer division for ARMv4 |
13 | * | 14 | * |
@@ -30,7 +31,48 @@ | |||
30 | /* Codecs should not normally do this, but we need to check a macro, and | 31 | /* Codecs should not normally do this, but we need to check a macro, and |
31 | * codecs.h would confuse the assembler. */ | 32 | * codecs.h would confuse the assembler. */ |
32 | 33 | ||
33 | .macro ARM_DIV_BODY dividend, divisor, result, curbit | 34 | /* Adapted from an algorithm given in ARM System Developer's Guide (7.3.1.2) |
35 | for dividing a 30-bit value by a 15-bit value, with two operations per | ||
36 | iteration by storing quotient and remainder together and adding the previous | ||
37 | quotient bit during trial subtraction. Modified to work with any dividend | ||
38 | and divisor both less than 1 << 30, and skipping trials by calculating bits | ||
39 | in output. | ||
40 | */ | ||
41 | .macro ARM_DIV_31_BODY dividend, divisor, result, bits, curbit, quotient | ||
42 | |||
43 | mov \bits, #1 | ||
44 | cmp \divisor, \dividend, lsr #16 | ||
45 | movls \divisor, \divisor, lsl #16 | ||
46 | addls \bits, \bits, #16 | ||
47 | cmp \divisor, \dividend, lsr #8 | ||
48 | movls \divisor, \divisor, lsl #8 | ||
49 | addls \bits, \bits, #8 | ||
50 | cmp \divisor, \dividend, lsr #4 | ||
51 | movls \divisor, \divisor, lsl #4 | ||
52 | addls \bits, \bits, #4 | ||
53 | cmp \divisor, \dividend, lsr #2 | ||
54 | movls \divisor, \divisor, lsl #2 | ||
55 | addls \bits, \bits, #2 | ||
56 | cmp \divisor, \dividend, lsr #1 | ||
57 | movls \divisor, \divisor, lsl #1 | ||
58 | addls \bits, \bits, #1 | ||
59 | rsb \divisor, \divisor, #0 | ||
60 | adds \result, \dividend, \divisor | ||
61 | subcc \result, \result, \divisor | ||
62 | rsb \curbit, \bits, #31 | ||
63 | add pc, pc, \curbit, lsl #3 | ||
64 | nop | ||
65 | .rept 30 | ||
66 | adcs \result, \divisor, \result, lsl #1 | ||
67 | subcc \result, \result, \divisor | ||
68 | .endr | ||
69 | /* shift remainder/quotient left one, add final quotient bit */ | ||
70 | adc \result, \result, \result | ||
71 | mov \dividend, \result, lsr \bits | ||
72 | eor \quotient, \result, \dividend, lsl \bits | ||
73 | .endm | ||
74 | |||
75 | .macro ARM_DIV_32_BODY dividend, divisor, result, curbit | ||
34 | 76 | ||
35 | mov \result, \dividend | 77 | mov \result, \dividend |
36 | mov \curbit, #90 @ 3 * 30, (calculating branch dest) | 78 | mov \curbit, #90 @ 3 * 30, (calculating branch dest) |
@@ -93,8 +135,16 @@ udiv32_arm: | |||
93 | bls 10f | 135 | bls 10f |
94 | tst r1, r2 | 136 | tst r1, r2 |
95 | beq 30f | 137 | beq 30f |
138 | tst r0, r0 | ||
139 | /* High bit must be unset, otherwise use ARM_DIV_32_BODY. High bit of | ||
140 | divisor is also unset dividend has been tested to be >= divisor. | ||
141 | */ | ||
142 | bmi 5f | ||
143 | ARM_DIV_31_BODY r0, r1, r2, r3, ip, r0 | ||
144 | bx lr | ||
96 | 145 | ||
97 | ARM_DIV_BODY r0, r1, r2, r3 | 146 | 5: |
147 | ARM_DIV_32_BODY r0, r1, r2, r3 | ||
98 | mov r0, r2 | 148 | mov r0, r2 |
99 | bx lr | 149 | bx lr |
100 | 150 | ||