summaryrefslogtreecommitdiff
path: root/apps/codecs/lib/udiv32_armv4.S
diff options
context:
space:
mode:
Diffstat (limited to 'apps/codecs/lib/udiv32_armv4.S')
-rw-r--r--apps/codecs/lib/udiv32_armv4.S107
1 files changed, 54 insertions, 53 deletions
diff --git a/apps/codecs/lib/udiv32_armv4.S b/apps/codecs/lib/udiv32_armv4.S
index 491cf43862..b54156809c 100644
--- a/apps/codecs/lib/udiv32_armv4.S
+++ b/apps/codecs/lib/udiv32_armv4.S
@@ -36,11 +36,14 @@
36 iteration by storing quotient and remainder together and adding the previous 36 iteration by storing quotient and remainder together and adding the previous
37 quotient bit during trial subtraction. Modified to work with any dividend 37 quotient bit during trial subtraction. Modified to work with any dividend
38 and divisor both less than 1 << 30, and skipping trials by calculating bits 38 and divisor both less than 1 << 30, and skipping trials by calculating bits
39 in output. 39 in output. */
40*/ 40.macro ARM_DIV_31_BODY dividend, divisor, result, bits, curbit, quotient, remainder
41.macro ARM_DIV_31_BODY dividend, divisor, result, bits, curbit, quotient
42 41
43 mov \bits, #1 42 mov \bits, #1
43 /* Shift the divisor left until it aligns with the numerator. If it already
44 has the high bit set, this is fine, everything inside .rept will be
45 skipped, and the add before and adcs after will set the one-bit result
46 to zero. */
44 cmp \divisor, \dividend, lsr #16 47 cmp \divisor, \dividend, lsr #16
45 movls \divisor, \divisor, lsl #16 48 movls \divisor, \divisor, lsl #16
46 addls \bits, \bits, #16 49 addls \bits, \bits, #16
@@ -56,7 +59,8 @@
56 cmp \divisor, \dividend, lsr #1 59 cmp \divisor, \dividend, lsr #1
57 movls \divisor, \divisor, lsl #1 60 movls \divisor, \divisor, lsl #1
58 addls \bits, \bits, #1 61 addls \bits, \bits, #1
59 rsb \divisor, \divisor, #0 62 rsbs \divisor, \divisor, #0
63 bcs .L_div0
60 adds \result, \dividend, \divisor 64 adds \result, \dividend, \divisor
61 subcc \result, \result, \divisor 65 subcc \result, \result, \divisor
62 rsb \curbit, \bits, #31 66 rsb \curbit, \bits, #31
@@ -64,44 +68,14 @@
64 nop 68 nop
65 .rept 30 69 .rept 30
66 adcs \result, \divisor, \result, lsl #1 70 adcs \result, \divisor, \result, lsl #1
71 /* Fix the remainder portion of the result. This must be done because the
72 handler for 32-bit numerators needs the remainder. */
67 subcc \result, \result, \divisor 73 subcc \result, \result, \divisor
68 .endr 74 .endr
69 /* shift remainder/quotient left one, add final quotient bit */ 75 /* Shift remainder/quotient left one, add final quotient bit */
70 adc \result, \result, \result 76 adc \result, \result, \result
71 mov \dividend, \result, lsr \bits 77 mov \remainder, \result, lsr \bits
72 eor \quotient, \result, \dividend, lsl \bits 78 eor \quotient, \result, \remainder, lsl \bits
73.endm
74
75.macro ARM_DIV_32_BODY dividend, divisor, result, curbit
76
77 mov \result, \dividend
78 mov \curbit, #90 @ 3 * 30, (calculating branch dest)
79 cmp \divisor, \result, lsr #16
80 movls \result,\result, lsr #16
81 subls \curbit, \curbit, #48
82 cmp \divisor, \result, lsr #8
83 movls \result,\result, lsr #8
84 subls \curbit, \curbit, #24
85 cmp \divisor, \result, lsr #4
86 movls \result,\result, lsr #4
87 subls \curbit, \curbit, #12
88 cmp \divisor, \result, lsr #2
89 subls \curbit, \curbit, #6
90 @ Calculation is only done down to shift=2, because the shift=1 step
91 @ would need 3 more cycles, but would only gain 1.5 cycles on average.
92 mov \result, #0
93 add pc, pc, \curbit, lsl #2
94 nop
95 .set shift, 32
96 .rept 31
97 .set shift, shift - 1
98 cmp \divisor, \dividend, lsr #shift
99 orrls \result, \result, #(1 << shift)
100 subls \dividend, \dividend, \divisor, lsl #shift
101 .endr @ shift==0 in the .rept would cause a warning for lsr #0
102 cmp \divisor, \dividend
103 orrls \result, \result, #1
104 @subls \dividend, \dividend, \divisor @ correct remainder not needed
105.endm 79.endm
106 80
107#ifdef USE_IRAM 81#ifdef USE_IRAM
@@ -114,21 +88,48 @@
114 .type udiv32_arm,%function 88 .type udiv32_arm,%function
115 89
116udiv32_arm: 90udiv32_arm:
117 cmp r1, #0
118 beq 20f
119 tst r0, r0 91 tst r0, r0
120 /* High bit must be unset, otherwise use ARM_DIV_32_BODY. High bit of 92 /* High bit must be unset, otherwise shift numerator right, calculate,
121 divisor is also unset dividend has been tested to be >= divisor. 93 and correct results. As this case is very uncommon we want to avoid
122 */ 94 any other delays on the main path in handling it, so the long divide
123 bmi 10f 95 calls the short divide as a function. */
124 ARM_DIV_31_BODY r0, r1, r2, r3, ip, r0 96 bmi .L_udiv32
125 bx lr 97.L_udiv31:
126 98 ARM_DIV_31_BODY r0, r1, r2, r3, ip, r0, r1
12710:
128 ARM_DIV_32_BODY r0, r1, r2, r3
129 mov r0, r2
130 bx lr 99 bx lr
131 100
13220: 101.L_udiv32:
133 movne r0, #0 102 /* store original numerator and divisor, we'll need them to correct the
103 result, */
104 stmdb sp, { r0, r1, lr }
105 /* Call __div0 here if divisor is zero, otherwise it would report the wrong
106 address. */
107 mov r0, r0, lsr #1
108 bl .L_udiv31
109 /* This address is never a branch target, but is used to test lr before
110 calling __div0. */
111.L_udiv32_div0_trap:
112 ldmdb sp, { r2, r3, lr }
113 /* Move the low bit of the original numerator to the carry bit */
114 movs r2, r2, lsr #1
115 /* Shift the remainder left one and add in the carry bit */
116 adc r1, r1, r1
117 /* Subtract the original divisor from the remainder, setting carry if the
118 result is non-negative */
119 subs r1, r1, r3
120 /* Shift quotient left one and add carry bit */
121 adc r0, r0, r0
134 bx lr 122 bx lr
123.L_div0:
124 /* Check the return address, since .L_udiv32 uses bl to wrap the 31-bit
125 divider. If the return address is at .L_udiv32_div0_trap, then the
126 the return address of the original caller is at sp - 4
127 */
128 adr r2, .L_udiv32_div0_trap
129 cmp r2, lr
130 subeq sp, sp, #4
131 bleq __div0
132 /* Otherwise, push lr to the stack before calling __div0 */
133 stmdb sp!, { lr }
134 bl __div0
135 .size udiv32_arm, . - udiv32_arm