summaryrefslogtreecommitdiff
path: root/apps/codecs/demac/libdemac/udiv32_arm.S
diff options
context:
space:
mode:
authorAndrew Mahone <andrew.mahone@gmail.com>2010-02-04 05:49:37 +0000
committerAndrew Mahone <andrew.mahone@gmail.com>2010-02-04 05:49:37 +0000
commit8ed7bda64cb98d491431fd130eb754c6320441a0 (patch)
treebe9b6340aaaba3820e87a57c7c3db545c9c2cf15 /apps/codecs/demac/libdemac/udiv32_arm.S
parent7ed87517f734d7d70ab6f294735a77a65bd22e42 (diff)
downloadrockbox-8ed7bda64cb98d491431fd130eb754c6320441a0.tar.gz
rockbox-8ed7bda64cb98d491431fd130eb754c6320441a0.zip
Move udiv32_arm.S into libdemac, as this divider is specialized for the APE codec and an optimized divider is already provided for general use in codeclib.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@24506 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs/demac/libdemac/udiv32_arm.S')
-rw-r--r--apps/codecs/demac/libdemac/udiv32_arm.S300
1 files changed, 300 insertions, 0 deletions
diff --git a/apps/codecs/demac/libdemac/udiv32_arm.S b/apps/codecs/demac/libdemac/udiv32_arm.S
new file mode 100644
index 0000000000..4492492d30
--- /dev/null
+++ b/apps/codecs/demac/libdemac/udiv32_arm.S
@@ -0,0 +1,300 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2008 by Jens Arnold
11 * Copyright (C) 2009 by Andrew Mahone
12 *
13 * Optimised unsigned integer division for ARMv4
14 *
15 * Based on: libgcc routines for ARM cpu, additional algorithms from ARM System
16 * Developer's Guide
17 * Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
18 * Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005
19 * Free Software Foundation, Inc.
20 *
21 * This program is free software; you can redistribute it and/or
22 * modify it under the terms of the GNU General Public License
23 * as published by the Free Software Foundation; either version 2
24 * of the License, or (at your option) any later version.
25 *
26 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
27 * KIND, either express or implied.
28 *
29 ****************************************************************************/
30
31#include "config.h"
32/* Codecs should not normally do this, but we need to check a macro, and
33 * codecs.h would confuse the assembler. */
34
35#ifdef USE_IRAM
36#define DIV_RECIP
37 .section .icode,"ax",%progbits
38#else
39 .text
40#endif
41 .align
42 .global udiv32_arm
43 .type udiv32_arm,%function
44
45#if ARM_ARCH < 5
46/* Adapted from an algorithm given in ARM System Developer's Guide (7.3.1.2)
47 for dividing a 30-bit value by a 15-bit value, with two operations per
48 iteration by storing quotient and remainder together and adding the previous
49 quotient bit during trial subtraction. Modified to work with any dividend
50 and divisor both less than 1 << 30, and skipping trials by calculating bits
51 in output. */
52.macro ARM_DIV_31_BODY dividend, divisor, result, bits, curbit, quotient, remainder
53
54 mov \bits, #1
55 /* Shift the divisor left until it aligns with the numerator. If it already
56 has the high bit set, this is fine, everything inside .rept will be
57 skipped, and the add before and adcs after will set the one-bit result
58 to zero. */
59 cmn \divisor, \dividend, lsr #16
60 movcs \divisor, \divisor, lsl #16
61 addcs \bits, \bits, #16
62 cmn \divisor, \dividend, lsr #8
63 movcs \divisor, \divisor, lsl #8
64 addcs \bits, \bits, #8
65 cmn \divisor, \dividend, lsr #4
66 movcs \divisor, \divisor, lsl #4
67 addcs \bits, \bits, #4
68 cmn \divisor, \dividend, lsr #2
69 movcs \divisor, \divisor, lsl #2
70 addcs \bits, \bits, #2
71 cmn \divisor, \dividend, lsr #1
72 movcs \divisor, \divisor, lsl #1
73 addcs \bits, \bits, #1
74 adds \result, \dividend, \divisor
75 subcc \result, \result, \divisor
76 rsb \curbit, \bits, #31
77 add pc, pc, \curbit, lsl #3
78 nop
79 .rept 30
80 adcs \result, \divisor, \result, lsl #1
81 /* Fix the remainder portion of the result. This must be done because the
82 handler for 32-bit numerators needs the remainder. */
83 subcc \result, \result, \divisor
84 .endr
85 /* Shift remainder/quotient left one, add final quotient bit */
86 adc \result, \result, \result
87 mov \remainder, \result, lsr \bits
88 eor \quotient, \result, \remainder, lsl \bits
89.endm
90
91#ifdef CPU_PP
92#if CONFIG_CPU == PP5020
93.set recip_max, 8384
94#elif CONFIG_CPU == PP5002
95.set recip_max, 4608
96#else
97.set recip_max, 16384
98#endif
99#elif CONFIG_CPU == AS3525
100.set recip_max, 42752
101#elif CONFIG_CPU == S5L8701
102.set recip_max, 12800
103#elif CONFIG_CPU == S5L8700
104.set recip_max, 9088
105#endif
106
107udiv32_arm:
108#ifdef DIV_RECIP
109 cmp r1, #3
110 bcc .L_udiv_tiny
111 cmp r1, #recip_max
112 bhi .L_udiv
113 adr r3, .L_udiv_recip_table-12
114 ldr r2, [r3, r1, lsl #2]
115 mov r3, r0
116 umull ip, r0, r2, r0
117 mul r2, r0, r1
118 cmp r3, r2
119 bxcs lr
120 sub r0, r0, #1
121 bx lr
122.L_udiv_tiny:
123 cmp r1, #1
124 movhi r0, r0, lsr #1
125 bxcs lr
126 b .L_div0
127#endif
128.L_udiv:
129 /* Invert divisor. ARM_DIV_31_BODY uses adc to both subtract the divisor
130 and add the next bit of the result. The correction code at .L_udiv32
131 does not need the divisor inverted, but can be modified to work with it,
132 and this allows the zero divisor test to be done early and without an
133 explicit comparison. */
134 rsbs r1, r1, #0
135#ifndef DIV_RECIP
136 beq .L_div0
137#endif
138 tst r0, r0
139 /* High bit must be unset, otherwise shift numerator right, calculate,
140 and correct results. As this case is very uncommon we want to avoid
141 any other delays on the main path in handling it, so the long divide
142 calls the short divide as a function. */
143 bmi .L_udiv32
144.L_udiv31:
145 ARM_DIV_31_BODY r0, r1, r2, r3, ip, r0, r1
146 bx lr
147.L_udiv32:
148 /* store original numerator and divisor, we'll need them to correct the
149 result, */
150 stmdb sp, { r0, r1, lr }
151 /* Call __div0 here if divisor is zero, otherwise it would report the wrong
152 address. */
153 mov r0, r0, lsr #1
154 bl .L_udiv31
155 ldmdb sp, { r2, r3, lr }
156 /* Move the low bit of the original numerator to the carry bit */
157 movs r2, r2, lsr #1
158 /* Shift the remainder left one and add in the carry bit */
159 adc r1, r1, r1
160 /* Subtract the original divisor from the remainder, setting carry if the
161 result is non-negative */
162 adds r1, r1, r3
163 /* Shift quotient left one and add carry bit */
164 adc r0, r0, r0
165 bx lr
166.L_div0:
167 /* __div0 expects the calling address on the top of the stack */
168 stmdb sp!, { lr }
169 mov r0, #0
170#if defined(__ARM_EABI__) || !defined(USE_IRAM)
171 bl __div0
172#else
173 ldr pc, [pc, #-4]
174 .word __div0
175#endif
176#ifdef DIV_RECIP
177.L_udiv_recip_table:
178 .set div, 3
179 .rept recip_max - 2
180 .if (div - 1) & div
181 .set q, 0x40000000 / div
182 .set r, (0x40000000 - (q * div))<<1
183 .set q, q << 1
184 .if r >= div
185 .set q, q + 1
186 .set r, r - div
187 .endif
188 .set r, r << 1
189 .set q, q << 1
190 .if r >= div
191 .set q, q + 1
192 .set r, r - div
193 .endif
194 .set q, q + 1
195 .else
196 .set q, 0x40000000 / div * 4
197 .endif
198 .word q
199 .set div, div+1
200 .endr
201#endif
202 .size udiv32_arm, . - udiv32_arm
203
204#else
205.macro ARMV5_UDIV32_BODY numerator, divisor, quotient, bits, inv, neg, div0label
206 cmp \numerator, \divisor
207 clz \bits, \divisor
208 bcc 30f
209 mov \inv, \divisor, lsl \bits
210 add \neg, pc, \inv, lsr #25
211 cmp \inv, #1<<31
212 ldrhib \inv, [\neg, #.L_udiv_est_table-.-64]
213 bls 20f
214 subs \bits, \bits, #7
215 rsb \neg, \divisor, #0
216 movpl \divisor, \inv, lsl \bits
217 bmi 10f
218 mul \inv, \divisor, \neg
219 smlawt \divisor, \divisor, \inv, \divisor
220 mul \inv, \divisor, \neg
221 /* This will save a cycle on ARMv6, but does not produce a correct result
222 if numerator sign bit is set. This case accounts for about 1 in 10^7 of
223 divisions, done by the APE decoder, so we specialize for the more common
224 case and handle the uncommon large-numerator separately */
225#if ARM_ARCH >= 6
226 tst \numerator, \numerator
227 smmla \divisor, \divisor, \inv, \divisor
228 bmi 40f
229 smmul \inv, \numerator, \divisor
230#else
231 mov \bits, #0
232 smlal \bits, \divisor, \inv, \divisor
233 umull \bits, \inv, \numerator, \divisor
234#endif
235 add \numerator, \numerator, \neg
236 mla \divisor, \inv, \neg, \numerator
237 mov \quotient, \inv
238 cmn \divisor, \neg
239 addcc \quotient, \quotient, #1
240 addpl \quotient, \quotient, #2
241 bx lr
24210:
243 rsb \bits, \bits, #0
244 sub \inv, \inv, #4
245 mov \divisor, \inv, lsr \bits
246 umull \bits, \inv, \numerator, \divisor
247 mla \divisor, \inv, \neg, \numerator
248 mov \quotient, \inv
249 cmn \neg, \divisor, lsr #1
250 addcs \divisor, \divisor, \neg, lsl #1
251 addcs \quotient, \quotient, #2
252 cmn \neg, \divisor
253 addcs \quotient, \quotient, #1
254 bx lr
25520:
256.ifnc "", "\div0label"
257 rsb \bits, \bits, #31
258 bne \div0label
259.endif
260 mov \quotient, \numerator, lsr \bits
261 bx lr
26230:
263 mov \quotient, #0
264 bx lr
265#if ARM_ARCH >= 6
26640:
267 umull \bits, \inv, \numerator, \divisor
268 add \numerator, \numerator, \neg
269 mla \divisor, \inv, \neg, \numerator
270 mov \quotient, \inv
271 cmn \divisor, \neg
272 addcc \quotient, \quotient, #1
273 addpl \quotient, \quotient, #2
274 bx lr
275#endif
276.endm
277
278udiv32_arm:
279 ARMV5_UDIV32_BODY r0, r1, r0, r2, r3, ip, .L_div0
280.L_div0:
281 /* __div0 expects the calling address on the top of the stack */
282 stmdb sp!, { lr }
283 mov r0, #0
284#if defined(__ARM_EABI__) || !defined(USE_IRAM)
285 bl __div0
286#else
287 ldr pc, [pc, #-4]
288 .word __div0
289#endif
290.L_udiv_est_table:
291 .byte 0xff, 0xfc, 0xf8, 0xf4, 0xf0, 0xed, 0xea, 0xe6
292 .byte 0xe3, 0xe0, 0xdd, 0xda, 0xd7, 0xd4, 0xd2, 0xcf
293 .byte 0xcc, 0xca, 0xc7, 0xc5, 0xc3, 0xc0, 0xbe, 0xbc
294 .byte 0xba, 0xb8, 0xb6, 0xb4, 0xb2, 0xb0, 0xae, 0xac
295 .byte 0xaa, 0xa8, 0xa7, 0xa5, 0xa3, 0xa2, 0xa0, 0x9f
296 .byte 0x9d, 0x9c, 0x9a, 0x99, 0x97, 0x96, 0x94, 0x93
297 .byte 0x92, 0x90, 0x8f, 0x8e, 0x8d, 0x8c, 0x8a, 0x89
298 .byte 0x88, 0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81
299#endif
300 .size udiv32_arm, . - udiv32_arm