diff options
Diffstat (limited to 'apps/codecs')
-rw-r--r-- | apps/codecs/lib/udiv32_armv4.S | 60 |
1 files changed, 24 insertions, 36 deletions
diff --git a/apps/codecs/lib/udiv32_armv4.S b/apps/codecs/lib/udiv32_armv4.S index ae2bddb20e..c4aea14093 100644 --- a/apps/codecs/lib/udiv32_armv4.S +++ b/apps/codecs/lib/udiv32_armv4.S | |||
@@ -44,23 +44,21 @@ | |||
44 | has the high bit set, this is fine, everything inside .rept will be | 44 | has the high bit set, this is fine, everything inside .rept will be |
45 | skipped, and the add before and adcs after will set the one-bit result | 45 | skipped, and the add before and adcs after will set the one-bit result |
46 | to zero. */ | 46 | to zero. */ |
47 | cmp \divisor, \dividend, lsr #16 | 47 | cmn \divisor, \dividend, lsr #16 |
48 | movls \divisor, \divisor, lsl #16 | 48 | movcs \divisor, \divisor, lsl #16 |
49 | addls \bits, \bits, #16 | 49 | addcs \bits, \bits, #16 |
50 | cmp \divisor, \dividend, lsr #8 | 50 | cmn \divisor, \dividend, lsr #8 |
51 | movls \divisor, \divisor, lsl #8 | 51 | movcs \divisor, \divisor, lsl #8 |
52 | addls \bits, \bits, #8 | 52 | addcs \bits, \bits, #8 |
53 | cmp \divisor, \dividend, lsr #4 | 53 | cmn \divisor, \dividend, lsr #4 |
54 | movls \divisor, \divisor, lsl #4 | 54 | movcs \divisor, \divisor, lsl #4 |
55 | addls \bits, \bits, #4 | 55 | addcs \bits, \bits, #4 |
56 | cmp \divisor, \dividend, lsr #2 | 56 | cmn \divisor, \dividend, lsr #2 |
57 | movls \divisor, \divisor, lsl #2 | 57 | movcs \divisor, \divisor, lsl #2 |
58 | addls \bits, \bits, #2 | 58 | addcs \bits, \bits, #2 |
59 | cmp \divisor, \dividend, lsr #1 | 59 | cmn \divisor, \dividend, lsr #1 |
60 | movls \divisor, \divisor, lsl #1 | 60 | movcs \divisor, \divisor, lsl #1 |
61 | addls \bits, \bits, #1 | 61 | addcs \bits, \bits, #1 |
62 | rsbs \divisor, \divisor, #0 | ||
63 | bcs .L_div0 | ||
64 | adds \result, \dividend, \divisor | 62 | adds \result, \dividend, \divisor |
65 | subcc \result, \result, \divisor | 63 | subcc \result, \result, \divisor |
66 | rsb \curbit, \bits, #31 | 64 | rsb \curbit, \bits, #31 |
@@ -88,6 +86,13 @@ | |||
88 | .type udiv32_arm,%function | 86 | .type udiv32_arm,%function |
89 | 87 | ||
90 | udiv32_arm: | 88 | udiv32_arm: |
89 | /* Invert divisor. ARM_DIV_31_BODY uses adc to both subtract the divisor | ||
90 | and add the next bit of the result. The correction code at .L_udiv32 | ||
91 | does not need the divisor inverted, but can be modified to work with it, | ||
92 | and this allows the zero divisor test to be done early and without an | ||
93 | explicit comparison. */ | ||
94 | rsbs r1, r1, #0 | ||
95 | beq .L_div0 | ||
91 | tst r0, r0 | 96 | tst r0, r0 |
92 | /* High bit must be unset, otherwise shift numerator right, calculate, | 97 | /* High bit must be unset, otherwise shift numerator right, calculate, |
93 | and correct results. As this case is very uncommon we want to avoid | 98 | and correct results. As this case is very uncommon we want to avoid |
@@ -106,9 +111,6 @@ udiv32_arm: | |||
106 | address. */ | 111 | address. */ |
107 | mov r0, r0, lsr #1 | 112 | mov r0, r0, lsr #1 |
108 | bl .L_udiv31 | 113 | bl .L_udiv31 |
109 | /* This address is never a branch target, but is used to test lr before | ||
110 | calling __div0. */ | ||
111 | .L_udiv32_div0_trap: | ||
112 | ldmdb sp, { r2, r3, lr } | 114 | ldmdb sp, { r2, r3, lr } |
113 | /* Move the low bit of the original numerator to the carry bit */ | 115 | /* Move the low bit of the original numerator to the carry bit */ |
114 | movs r2, r2, lsr #1 | 116 | movs r2, r2, lsr #1 |
@@ -116,26 +118,12 @@ udiv32_arm: | |||
116 | adc r1, r1, r1 | 118 | adc r1, r1, r1 |
117 | /* Subtract the original divisor from the remainder, setting carry if the | 119 | /* Subtract the original divisor from the remainder, setting carry if the |
118 | result is non-negative */ | 120 | result is non-negative */ |
119 | subs r1, r1, r3 | 121 | adds r1, r1, r3 |
120 | /* Shift quotient left one and add carry bit */ | 122 | /* Shift quotient left one and add carry bit */ |
121 | adc r0, r0, r0 | 123 | adc r0, r0, r0 |
122 | bx lr | 124 | bx lr |
123 | .L_div0: | 125 | .L_div0: |
124 | /* Check the return address, since .L_udiv32 uses bl to wrap the 31-bit | 126 | /* __div0 expects the calling address on the top of the stack */ |
125 | divider. If the return address is at .L_udiv32_div0_trap, then the | ||
126 | the return address of the original caller is at sp - 4 | ||
127 | */ | ||
128 | adr r2, .L_udiv32_div0_trap | ||
129 | cmp r2, lr | ||
130 | subeq sp, sp, #4 | ||
131 | #if defined(__ARM_EABI__) || !defined(USE_IRAM) | ||
132 | bleq __div0 | ||
133 | #else | ||
134 | ldr r3, =__div0 | ||
135 | moveq lr, pc | ||
136 | bxeq r3 | ||
137 | #endif | ||
138 | /* Otherwise, push lr to the stack before calling __div0 */ | ||
139 | stmdb sp!, { lr } | 127 | stmdb sp!, { lr } |
140 | #if defined(__ARM_EABI__) || !defined(USE_IRAM) | 128 | #if defined(__ARM_EABI__) || !defined(USE_IRAM) |
141 | bl __div0 | 129 | bl __div0 |