diff options
author | Andrew Mahone <andrew.mahone@gmail.com> | 2010-02-04 05:49:37 +0000 |
---|---|---|
committer | Andrew Mahone <andrew.mahone@gmail.com> | 2010-02-04 05:49:37 +0000 |
commit | 8ed7bda64cb98d491431fd130eb754c6320441a0 (patch) | |
tree | be9b6340aaaba3820e87a57c7c3db545c9c2cf15 /apps/codecs/demac/libdemac | |
parent | 7ed87517f734d7d70ab6f294735a77a65bd22e42 (diff) | |
download | rockbox-8ed7bda64cb98d491431fd130eb754c6320441a0.tar.gz rockbox-8ed7bda64cb98d491431fd130eb754c6320441a0.zip |
Move udiv32_arm.S into libdemac, as this divider is specialized for the APE codec and an optimized divider is already provided for general use in codeclib.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@24506 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs/demac/libdemac')
-rw-r--r-- | apps/codecs/demac/libdemac/SOURCES | 1 | ||||
-rw-r--r-- | apps/codecs/demac/libdemac/demac_config.h | 14 | ||||
-rw-r--r-- | apps/codecs/demac/libdemac/udiv32_arm.S | 300 |
3 files changed, 311 insertions, 4 deletions
diff --git a/apps/codecs/demac/libdemac/SOURCES b/apps/codecs/demac/libdemac/SOURCES index 5a4482376c..f9f8f217c7 100644 --- a/apps/codecs/demac/libdemac/SOURCES +++ b/apps/codecs/demac/libdemac/SOURCES | |||
@@ -2,6 +2,7 @@ crc.c | |||
2 | predictor.c | 2 | predictor.c |
3 | #ifdef CPU_ARM | 3 | #ifdef CPU_ARM |
4 | predictor-arm.S | 4 | predictor-arm.S |
5 | udiv32_arm.S | ||
5 | #elif defined CPU_COLDFIRE | 6 | #elif defined CPU_COLDFIRE |
6 | predictor-cf.S | 7 | predictor-cf.S |
7 | #endif | 8 | #endif |
diff --git a/apps/codecs/demac/libdemac/demac_config.h b/apps/codecs/demac/libdemac/demac_config.h index 13166f69ae..7388aa1059 100644 --- a/apps/codecs/demac/libdemac/demac_config.h +++ b/apps/codecs/demac/libdemac/demac_config.h | |||
@@ -91,10 +91,6 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | |||
91 | 91 | ||
92 | /* Defaults */ | 92 | /* Defaults */ |
93 | 93 | ||
94 | #ifndef UDIV32 | ||
95 | #define UDIV32(a, b) (a / b) | ||
96 | #endif | ||
97 | |||
98 | #ifndef FILTER_HISTORY_SIZE | 94 | #ifndef FILTER_HISTORY_SIZE |
99 | #define FILTER_HISTORY_SIZE 512 | 95 | #define FILTER_HISTORY_SIZE 512 |
100 | #endif | 96 | #endif |
@@ -109,6 +105,16 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA | |||
109 | 105 | ||
110 | 106 | ||
111 | #ifndef __ASSEMBLER__ | 107 | #ifndef __ASSEMBLER__ |
108 | |||
109 | #if defined(CPU_ARM) && (ARM_ARCH < 5 || defined(USE_IRAM)) | ||
110 | /* optimised unsigned integer division for ARMv4, in IRAM */ | ||
111 | unsigned udiv32_arm(unsigned a, unsigned b); | ||
112 | #define UDIV32(a, b) udiv32_arm(a, b) | ||
113 | #else | ||
114 | /* default */ | ||
115 | #define UDIV32(a, b) (a / b) | ||
116 | #endif | ||
117 | |||
112 | #include <inttypes.h> | 118 | #include <inttypes.h> |
113 | #if FILTER_BITS == 32 | 119 | #if FILTER_BITS == 32 |
114 | typedef int32_t filter_int; | 120 | typedef int32_t filter_int; |
diff --git a/apps/codecs/demac/libdemac/udiv32_arm.S b/apps/codecs/demac/libdemac/udiv32_arm.S new file mode 100644 index 0000000000..4492492d30 --- /dev/null +++ b/apps/codecs/demac/libdemac/udiv32_arm.S | |||
@@ -0,0 +1,300 @@ | |||
1 | /*************************************************************************** | ||
2 | * __________ __ ___. | ||
3 | * Open \______ \ ____ ____ | | _\_ |__ _______ ___ | ||
4 | * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / | ||
5 | * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < | ||
6 | * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ | ||
7 | * \/ \/ \/ \/ \/ | ||
8 | * $Id$ | ||
9 | * | ||
10 | * Copyright (C) 2008 by Jens Arnold | ||
11 | * Copyright (C) 2009 by Andrew Mahone | ||
12 | * | ||
13 | * Optimised unsigned integer division for ARMv4 | ||
14 | * | ||
15 | * Based on: libgcc routines for ARM cpu, additional algorithms from ARM System | ||
16 | * Developer's Guide | ||
17 | * Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk) | ||
18 | * Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005 | ||
19 | * Free Software Foundation, Inc. | ||
20 | * | ||
21 | * This program is free software; you can redistribute it and/or | ||
22 | * modify it under the terms of the GNU General Public License | ||
23 | * as published by the Free Software Foundation; either version 2 | ||
24 | * of the License, or (at your option) any later version. | ||
25 | * | ||
26 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY | ||
27 | * KIND, either express or implied. | ||
28 | * | ||
29 | ****************************************************************************/ | ||
30 | |||
31 | #include "config.h" | ||
32 | /* Codecs should not normally do this, but we need to check a macro, and | ||
33 | * codecs.h would confuse the assembler. */ | ||
34 | |||
35 | #ifdef USE_IRAM | ||
36 | #define DIV_RECIP | ||
37 | .section .icode,"ax",%progbits | ||
38 | #else | ||
39 | .text | ||
40 | #endif | ||
41 | .align | ||
42 | .global udiv32_arm | ||
43 | .type udiv32_arm,%function | ||
44 | |||
45 | #if ARM_ARCH < 5 | ||
46 | /* Adapted from an algorithm given in ARM System Developer's Guide (7.3.1.2) | ||
47 | for dividing a 30-bit value by a 15-bit value, with two operations per | ||
48 | iteration by storing quotient and remainder together and adding the previous | ||
49 | quotient bit during trial subtraction. Modified to work with any dividend | ||
50 | and divisor both less than 1 << 30, and skipping trials by calculating bits | ||
51 | in output. */ | ||
52 | .macro ARM_DIV_31_BODY dividend, divisor, result, bits, curbit, quotient, remainder | ||
53 | |||
54 | mov \bits, #1 | ||
55 | /* Shift the divisor left until it aligns with the numerator. If it already | ||
56 | has the high bit set, this is fine, everything inside .rept will be | ||
57 | skipped, and the add before and adcs after will set the one-bit result | ||
58 | to zero. */ | ||
59 | cmn \divisor, \dividend, lsr #16 | ||
60 | movcs \divisor, \divisor, lsl #16 | ||
61 | addcs \bits, \bits, #16 | ||
62 | cmn \divisor, \dividend, lsr #8 | ||
63 | movcs \divisor, \divisor, lsl #8 | ||
64 | addcs \bits, \bits, #8 | ||
65 | cmn \divisor, \dividend, lsr #4 | ||
66 | movcs \divisor, \divisor, lsl #4 | ||
67 | addcs \bits, \bits, #4 | ||
68 | cmn \divisor, \dividend, lsr #2 | ||
69 | movcs \divisor, \divisor, lsl #2 | ||
70 | addcs \bits, \bits, #2 | ||
71 | cmn \divisor, \dividend, lsr #1 | ||
72 | movcs \divisor, \divisor, lsl #1 | ||
73 | addcs \bits, \bits, #1 | ||
74 | adds \result, \dividend, \divisor | ||
75 | subcc \result, \result, \divisor | ||
76 | rsb \curbit, \bits, #31 | ||
77 | add pc, pc, \curbit, lsl #3 | ||
78 | nop | ||
79 | .rept 30 | ||
80 | adcs \result, \divisor, \result, lsl #1 | ||
81 | /* Fix the remainder portion of the result. This must be done because the | ||
82 | handler for 32-bit numerators needs the remainder. */ | ||
83 | subcc \result, \result, \divisor | ||
84 | .endr | ||
85 | /* Shift remainder/quotient left one, add final quotient bit */ | ||
86 | adc \result, \result, \result | ||
87 | mov \remainder, \result, lsr \bits | ||
88 | eor \quotient, \result, \remainder, lsl \bits | ||
89 | .endm | ||
90 | |||
91 | #ifdef CPU_PP | ||
92 | #if CONFIG_CPU == PP5020 | ||
93 | .set recip_max, 8384 | ||
94 | #elif CONFIG_CPU == PP5002 | ||
95 | .set recip_max, 4608 | ||
96 | #else | ||
97 | .set recip_max, 16384 | ||
98 | #endif | ||
99 | #elif CONFIG_CPU == AS3525 | ||
100 | .set recip_max, 42752 | ||
101 | #elif CONFIG_CPU == S5L8701 | ||
102 | .set recip_max, 12800 | ||
103 | #elif CONFIG_CPU == S5L8700 | ||
104 | .set recip_max, 9088 | ||
105 | #endif | ||
106 | |||
107 | udiv32_arm: | ||
108 | #ifdef DIV_RECIP | ||
109 | cmp r1, #3 | ||
110 | bcc .L_udiv_tiny | ||
111 | cmp r1, #recip_max | ||
112 | bhi .L_udiv | ||
113 | adr r3, .L_udiv_recip_table-12 | ||
114 | ldr r2, [r3, r1, lsl #2] | ||
115 | mov r3, r0 | ||
116 | umull ip, r0, r2, r0 | ||
117 | mul r2, r0, r1 | ||
118 | cmp r3, r2 | ||
119 | bxcs lr | ||
120 | sub r0, r0, #1 | ||
121 | bx lr | ||
122 | .L_udiv_tiny: | ||
123 | cmp r1, #1 | ||
124 | movhi r0, r0, lsr #1 | ||
125 | bxcs lr | ||
126 | b .L_div0 | ||
127 | #endif | ||
128 | .L_udiv: | ||
129 | /* Invert divisor. ARM_DIV_31_BODY uses adc to both subtract the divisor | ||
130 | and add the next bit of the result. The correction code at .L_udiv32 | ||
131 | does not need the divisor inverted, but can be modified to work with it, | ||
132 | and this allows the zero divisor test to be done early and without an | ||
133 | explicit comparison. */ | ||
134 | rsbs r1, r1, #0 | ||
135 | #ifndef DIV_RECIP | ||
136 | beq .L_div0 | ||
137 | #endif | ||
138 | tst r0, r0 | ||
139 | /* High bit must be unset, otherwise shift numerator right, calculate, | ||
140 | and correct results. As this case is very uncommon we want to avoid | ||
141 | any other delays on the main path in handling it, so the long divide | ||
142 | calls the short divide as a function. */ | ||
143 | bmi .L_udiv32 | ||
144 | .L_udiv31: | ||
145 | ARM_DIV_31_BODY r0, r1, r2, r3, ip, r0, r1 | ||
146 | bx lr | ||
147 | .L_udiv32: | ||
148 | /* store original numerator and divisor, we'll need them to correct the | ||
149 | result, */ | ||
150 | stmdb sp, { r0, r1, lr } | ||
151 | /* Call __div0 here if divisor is zero, otherwise it would report the wrong | ||
152 | address. */ | ||
153 | mov r0, r0, lsr #1 | ||
154 | bl .L_udiv31 | ||
155 | ldmdb sp, { r2, r3, lr } | ||
156 | /* Move the low bit of the original numerator to the carry bit */ | ||
157 | movs r2, r2, lsr #1 | ||
158 | /* Shift the remainder left one and add in the carry bit */ | ||
159 | adc r1, r1, r1 | ||
160 | /* Subtract the original divisor from the remainder, setting carry if the | ||
161 | result is non-negative */ | ||
162 | adds r1, r1, r3 | ||
163 | /* Shift quotient left one and add carry bit */ | ||
164 | adc r0, r0, r0 | ||
165 | bx lr | ||
166 | .L_div0: | ||
167 | /* __div0 expects the calling address on the top of the stack */ | ||
168 | stmdb sp!, { lr } | ||
169 | mov r0, #0 | ||
170 | #if defined(__ARM_EABI__) || !defined(USE_IRAM) | ||
171 | bl __div0 | ||
172 | #else | ||
173 | ldr pc, [pc, #-4] | ||
174 | .word __div0 | ||
175 | #endif | ||
176 | #ifdef DIV_RECIP | ||
177 | .L_udiv_recip_table: | ||
178 | .set div, 3 | ||
179 | .rept recip_max - 2 | ||
180 | .if (div - 1) & div | ||
181 | .set q, 0x40000000 / div | ||
182 | .set r, (0x40000000 - (q * div))<<1 | ||
183 | .set q, q << 1 | ||
184 | .if r >= div | ||
185 | .set q, q + 1 | ||
186 | .set r, r - div | ||
187 | .endif | ||
188 | .set r, r << 1 | ||
189 | .set q, q << 1 | ||
190 | .if r >= div | ||
191 | .set q, q + 1 | ||
192 | .set r, r - div | ||
193 | .endif | ||
194 | .set q, q + 1 | ||
195 | .else | ||
196 | .set q, 0x40000000 / div * 4 | ||
197 | .endif | ||
198 | .word q | ||
199 | .set div, div+1 | ||
200 | .endr | ||
201 | #endif | ||
202 | .size udiv32_arm, . - udiv32_arm | ||
203 | |||
204 | #else | ||
205 | .macro ARMV5_UDIV32_BODY numerator, divisor, quotient, bits, inv, neg, div0label | ||
206 | cmp \numerator, \divisor | ||
207 | clz \bits, \divisor | ||
208 | bcc 30f | ||
209 | mov \inv, \divisor, lsl \bits | ||
210 | add \neg, pc, \inv, lsr #25 | ||
211 | cmp \inv, #1<<31 | ||
212 | ldrhib \inv, [\neg, #.L_udiv_est_table-.-64] | ||
213 | bls 20f | ||
214 | subs \bits, \bits, #7 | ||
215 | rsb \neg, \divisor, #0 | ||
216 | movpl \divisor, \inv, lsl \bits | ||
217 | bmi 10f | ||
218 | mul \inv, \divisor, \neg | ||
219 | smlawt \divisor, \divisor, \inv, \divisor | ||
220 | mul \inv, \divisor, \neg | ||
221 | /* This will save a cycle on ARMv6, but does not produce a correct result | ||
222 | if numerator sign bit is set. This case accounts for about 1 in 10^7 of | ||
223 | divisions, done by the APE decoder, so we specialize for the more common | ||
224 | case and handle the uncommon large-numerator separately */ | ||
225 | #if ARM_ARCH >= 6 | ||
226 | tst \numerator, \numerator | ||
227 | smmla \divisor, \divisor, \inv, \divisor | ||
228 | bmi 40f | ||
229 | smmul \inv, \numerator, \divisor | ||
230 | #else | ||
231 | mov \bits, #0 | ||
232 | smlal \bits, \divisor, \inv, \divisor | ||
233 | umull \bits, \inv, \numerator, \divisor | ||
234 | #endif | ||
235 | add \numerator, \numerator, \neg | ||
236 | mla \divisor, \inv, \neg, \numerator | ||
237 | mov \quotient, \inv | ||
238 | cmn \divisor, \neg | ||
239 | addcc \quotient, \quotient, #1 | ||
240 | addpl \quotient, \quotient, #2 | ||
241 | bx lr | ||
242 | 10: | ||
243 | rsb \bits, \bits, #0 | ||
244 | sub \inv, \inv, #4 | ||
245 | mov \divisor, \inv, lsr \bits | ||
246 | umull \bits, \inv, \numerator, \divisor | ||
247 | mla \divisor, \inv, \neg, \numerator | ||
248 | mov \quotient, \inv | ||
249 | cmn \neg, \divisor, lsr #1 | ||
250 | addcs \divisor, \divisor, \neg, lsl #1 | ||
251 | addcs \quotient, \quotient, #2 | ||
252 | cmn \neg, \divisor | ||
253 | addcs \quotient, \quotient, #1 | ||
254 | bx lr | ||
255 | 20: | ||
256 | .ifnc "", "\div0label" | ||
257 | rsb \bits, \bits, #31 | ||
258 | bne \div0label | ||
259 | .endif | ||
260 | mov \quotient, \numerator, lsr \bits | ||
261 | bx lr | ||
262 | 30: | ||
263 | mov \quotient, #0 | ||
264 | bx lr | ||
265 | #if ARM_ARCH >= 6 | ||
266 | 40: | ||
267 | umull \bits, \inv, \numerator, \divisor | ||
268 | add \numerator, \numerator, \neg | ||
269 | mla \divisor, \inv, \neg, \numerator | ||
270 | mov \quotient, \inv | ||
271 | cmn \divisor, \neg | ||
272 | addcc \quotient, \quotient, #1 | ||
273 | addpl \quotient, \quotient, #2 | ||
274 | bx lr | ||
275 | #endif | ||
276 | .endm | ||
277 | |||
278 | udiv32_arm: | ||
279 | ARMV5_UDIV32_BODY r0, r1, r0, r2, r3, ip, .L_div0 | ||
280 | .L_div0: | ||
281 | /* __div0 expects the calling address on the top of the stack */ | ||
282 | stmdb sp!, { lr } | ||
283 | mov r0, #0 | ||
284 | #if defined(__ARM_EABI__) || !defined(USE_IRAM) | ||
285 | bl __div0 | ||
286 | #else | ||
287 | ldr pc, [pc, #-4] | ||
288 | .word __div0 | ||
289 | #endif | ||
290 | .L_udiv_est_table: | ||
291 | .byte 0xff, 0xfc, 0xf8, 0xf4, 0xf0, 0xed, 0xea, 0xe6 | ||
292 | .byte 0xe3, 0xe0, 0xdd, 0xda, 0xd7, 0xd4, 0xd2, 0xcf | ||
293 | .byte 0xcc, 0xca, 0xc7, 0xc5, 0xc3, 0xc0, 0xbe, 0xbc | ||
294 | .byte 0xba, 0xb8, 0xb6, 0xb4, 0xb2, 0xb0, 0xae, 0xac | ||
295 | .byte 0xaa, 0xa8, 0xa7, 0xa5, 0xa3, 0xa2, 0xa0, 0x9f | ||
296 | .byte 0x9d, 0x9c, 0x9a, 0x99, 0x97, 0x96, 0x94, 0x93 | ||
297 | .byte 0x92, 0x90, 0x8f, 0x8e, 0x8d, 0x8c, 0x8a, 0x89 | ||
298 | .byte 0x88, 0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81 | ||
299 | #endif | ||
300 | .size udiv32_arm, . - udiv32_arm | ||