diff options
author | Jens Arnold <amiconn@rockbox.org> | 2008-11-06 21:21:33 +0000 |
---|---|---|
committer | Jens Arnold <amiconn@rockbox.org> | 2008-11-06 21:21:33 +0000 |
commit | 545b51e2e46db6e9c6dd59ef45f73cc86529bace (patch) | |
tree | ce3f11540b1bfb7e7ee1127bdbd3c587289b26e6 /apps/codecs/lib | |
parent | 35823422c225336ac3788a4919a06d6cafaa3c1f (diff) | |
download | rockbox-545b51e2e46db6e9c6dd59ef45f73cc86529bace.tar.gz rockbox-545b51e2e46db6e9c6dd59ef45f73cc86529bace.zip |
ARMv4 unsigned integer division: Using an overflow-safe comparison method in the main calculation allows to put back the 1.5 cyle (average) optimisation. Shaved off another instruction, as we don't need the remainder. * Use the very efficient ffs algorithm from ffs-arm.S for dividing by a power of 2.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@19032 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs/lib')
-rw-r--r-- | apps/codecs/lib/udiv32_armv4.S | 55 |
1 files changed, 31 insertions, 24 deletions
diff --git a/apps/codecs/lib/udiv32_armv4.S b/apps/codecs/lib/udiv32_armv4.S index 2cb6fc8088..6b34cae1b3 100644 --- a/apps/codecs/lib/udiv32_armv4.S +++ b/apps/codecs/lib/udiv32_armv4.S | |||
@@ -33,7 +33,7 @@ | |||
33 | .macro ARM_DIV_BODY dividend, divisor, result, curbit | 33 | .macro ARM_DIV_BODY dividend, divisor, result, curbit |
34 | 34 | ||
35 | mov \result, \dividend | 35 | mov \result, \dividend |
36 | mov \curbit, #93 @ 3 * 31, (calculating branch dest) | 36 | mov \curbit, #90 @ 3 * 30, (calculating branch dest) |
37 | cmp \divisor, \result, lsr #16 | 37 | cmp \divisor, \result, lsr #16 |
38 | movls \result,\result, lsr #16 | 38 | movls \result,\result, lsr #16 |
39 | subls \curbit, \curbit, #48 | 39 | subls \curbit, \curbit, #48 |
@@ -44,40 +44,35 @@ | |||
44 | movls \result,\result, lsr #4 | 44 | movls \result,\result, lsr #4 |
45 | subls \curbit, \curbit, #12 | 45 | subls \curbit, \curbit, #12 |
46 | cmp \divisor, \result, lsr #2 | 46 | cmp \divisor, \result, lsr #2 |
47 | movls \result,\result, lsr #2 | ||
48 | subls \curbit, \curbit, #6 | 47 | subls \curbit, \curbit, #6 |
49 | cmp \divisor, \result, lsr #1 | 48 | @ Calculation is only done down to shift=2, because the shift=1 step |
50 | subls \curbit, \curbit, #3 | 49 | @ would need 3 more cycles, but would only gain 1.5 cycles on average. |
51 | mov \result, #0 | 50 | mov \result, #0 |
52 | add pc, pc, \curbit, lsl #2 | 51 | add pc, pc, \curbit, lsl #2 |
53 | nop | 52 | nop |
54 | .set shift, 32 | 53 | .set shift, 32 |
55 | .rept 32 | 54 | .rept 31 |
56 | .set shift, shift - 1 | 55 | .set shift, shift - 1 |
57 | cmp \dividend, \divisor, lsl #shift | 56 | cmp \divisor, \dividend, lsr #shift |
58 | adc \result, \result, \result | 57 | orrls \result, \result, #(1 << shift) |
59 | subcs \dividend, \dividend, \divisor, lsl #shift | 58 | subls \dividend, \dividend, \divisor, lsl #shift |
60 | .endr | 59 | .endr @ shift==0 in the .rept would cause a warning for lsr #0 |
60 | cmp \divisor, \dividend | ||
61 | orrls \result, \result, #1 | ||
62 | @subls \dividend, \dividend, \divisor @ correct remainder not needed | ||
61 | .endm | 63 | .endm |
62 | 64 | ||
63 | .macro ARM_DIV2_ORDER divisor, order | 65 | .macro ARM_DIV2_ORDER divisor, order |
64 | 66 | ||
65 | cmp \divisor, #(1 << 16) | 67 | @ There's exactly one bit set in the divisor, so ffs() can be used |
66 | movhs \divisor, \divisor, lsr #16 | 68 | @ This is the ffs algorithm devised by D.Seal and posted to |
67 | movhs \order, #16 | 69 | @ comp.sys.arm on 16 Feb 1994. |
68 | movlo \order, #0 | 70 | adr \order, L_ffs_table |
71 | orr \divisor, \divisor, \divisor, lsl #4 @ = X * 0x11 | ||
72 | orr \divisor, \divisor, \divisor, lsl #6 @ = X * 0x451 | ||
73 | rsb \divisor, \divisor, \divisor, lsl #16 @ = X * 0x0450fbaf | ||
69 | 74 | ||
70 | cmp \divisor, #(1 << 8) | 75 | ldrb \order, [\order, \divisor, lsr #26] |
71 | movhs \divisor, \divisor, lsr #8 | ||
72 | addhs \order, \order, #8 | ||
73 | |||
74 | cmp \divisor, #(1 << 4) | ||
75 | movhs \divisor, \divisor, lsr #4 | ||
76 | addhs \order, \order, #4 | ||
77 | |||
78 | cmp \divisor, #(1 << 2) | ||
79 | addhi \order, \order, #3 | ||
80 | addls \order, \order, \divisor, lsr #1 | ||
81 | .endm | 76 | .endm |
82 | 77 | ||
83 | 78 | ||
@@ -113,3 +108,15 @@ udiv32_arm: | |||
113 | ARM_DIV2_ORDER r1, r2 | 108 | ARM_DIV2_ORDER r1, r2 |
114 | mov r0, r0, lsr r2 | 109 | mov r0, r0, lsr r2 |
115 | bx lr | 110 | bx lr |
111 | |||
112 | L_ffs_table: | ||
113 | @ 0 1 2 3 4 5 6 7 | ||
114 | @---------------------------------------------- | ||
115 | .byte 32, 0, 1, 12, 2, 6, 0, 13 @ 0- 7 | ||
116 | .byte 3, 0, 7, 0, 0, 0, 0, 14 @ 8-15 | ||
117 | .byte 10, 4, 0, 0, 8, 0, 0, 25 @ 16-23 | ||
118 | .byte 0, 0, 0, 0, 0, 21, 27, 15 @ 24-31 | ||
119 | .byte 31, 11, 5, 0, 0, 0, 0, 0 @ 32-39 | ||
120 | .byte 9, 0, 0, 24, 0, 0, 20, 26 @ 40-47 | ||
121 | .byte 30, 0, 0, 0, 0, 23, 0, 19 @ 48-55 | ||
122 | .byte 29, 0, 22, 18, 28, 17, 16, 0 @ 56-63 | ||