summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJens Arnold <amiconn@rockbox.org>2008-11-05 00:10:05 +0000
committerJens Arnold <amiconn@rockbox.org>2008-11-05 00:10:05 +0000
commitfe04e40be7a26c758a82e410e58be63c1f3d571c (patch)
tree955b1557f3da7cd8362bc05d96302cac08a72ff2
parent7a835ee0c64bb941f205a2eb915cf0aaf460f1bc (diff)
downloadrockbox-fe04e40be7a26c758a82e410e58be63c1f3d571c.tar.gz
rockbox-fe04e40be7a26c758a82e410e58be63c1f3d571c.zip
Further optimised (vs. libgcc) unsigned 32 bit division for ARMv4 (based on the ARMv5(+) version from libgcc), in IRAM on PP for better performance on PP5002, and put into the codeclib for possible reuse. APE -c1000 is now usable on both PP502x and PP5002 (~138% realtime, they're on par now). Gigabeat F/X should also see an APE speedup.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@19009 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/demac/libdemac/rangecoding.h14
-rw-r--r--apps/codecs/lib/SOURCES3
-rw-r--r--apps/codecs/lib/codeclib.h9
-rw-r--r--apps/codecs/lib/udiv32_armv4.S114
4 files changed, 137 insertions, 3 deletions
diff --git a/apps/codecs/demac/libdemac/rangecoding.h b/apps/codecs/demac/libdemac/rangecoding.h
index c96886e32b..645fd1ad92 100644
--- a/apps/codecs/demac/libdemac/rangecoding.h
+++ b/apps/codecs/demac/libdemac/rangecoding.h
@@ -49,6 +49,14 @@ removing the rc parameter from each function (and the RNGC macro)).
49 49
50*/ 50*/
51 51
52#ifdef ROCKBOX
53#include "../lib/codeclib.h"
54/* for UDIV32() */
55#endif
56
57#ifndef UDIV32
58#define UDIV32(a, b) (a / b)
59#endif
52 60
53/* BITSTREAM READING FUNCTIONS */ 61/* BITSTREAM READING FUNCTIONS */
54 62
@@ -121,15 +129,15 @@ static inline void range_dec_normalize(void)
121static inline int range_decode_culfreq(int tot_f) 129static inline int range_decode_culfreq(int tot_f)
122{ 130{
123 range_dec_normalize(); 131 range_dec_normalize();
124 rc.help = rc.range / tot_f; 132 rc.help = UDIV32(rc.range, tot_f);
125 return rc.low / rc.help; 133 return UDIV32(rc.low, rc.help);
126} 134}
127 135
128static inline int range_decode_culshift(int shift) 136static inline int range_decode_culshift(int shift)
129{ 137{
130 range_dec_normalize(); 138 range_dec_normalize();
131 rc.help = rc.range >> shift; 139 rc.help = rc.range >> shift;
132 return rc.low / rc.help; 140 return UDIV32(rc.low, rc.help);
133} 141}
134 142
135 143
diff --git a/apps/codecs/lib/SOURCES b/apps/codecs/lib/SOURCES
index 9c6d4e7ff6..8099620098 100644
--- a/apps/codecs/lib/SOURCES
+++ b/apps/codecs/lib/SOURCES
@@ -5,6 +5,9 @@ codeclib.c
5mdct2.c 5mdct2.c
6#ifdef CPU_ARM 6#ifdef CPU_ARM
7mdct_arm.S 7mdct_arm.S
8#if ARM_ARCH == 4
9udiv32_armv4.S
10#endif
8#endif 11#endif
9 12
10#elif defined(SIMULATOR) && defined(__APPLE__) 13#elif defined(SIMULATOR) && defined(__APPLE__)
diff --git a/apps/codecs/lib/codeclib.h b/apps/codecs/lib/codeclib.h
index 744accb8aa..477818a23d 100644
--- a/apps/codecs/lib/codeclib.h
+++ b/apps/codecs/lib/codeclib.h
@@ -57,6 +57,15 @@ void qsort(void *base, size_t nmemb, size_t size, int(*compar)(const void *, con
57 57
58extern void mdct_backward(int n, int32_t *in, int32_t *out); 58extern void mdct_backward(int n, int32_t *in, int32_t *out);
59 59
60#if defined(CPU_ARM) && (ARM_ARCH == 4)
61/* optimised unsigned integer division for ARMv4, in IRAM */
62unsigned udiv32_arm(unsigned a, unsigned b);
63#define UDIV32(a, b) udiv32_arm(a, b)
64#else
65/* default */
66#define UDIV32(a, b) (a / b)
67#endif
68
60/* Various codec helper functions */ 69/* Various codec helper functions */
61 70
62int codec_init(void); 71int codec_init(void);
diff --git a/apps/codecs/lib/udiv32_armv4.S b/apps/codecs/lib/udiv32_armv4.S
new file mode 100644
index 0000000000..a659a9eb8e
--- /dev/null
+++ b/apps/codecs/lib/udiv32_armv4.S
@@ -0,0 +1,114 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2008 by Jens Arnold
11 *
12 * Optimised unsigned integer division for ARMv4
13 *
14 * Based on: libgcc routines for ARM cpu.
15 * Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
16 * Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005
17 * Free Software Foundation, Inc.
18 *
19 * This program is free software; you can redistribute it and/or
20 * modify it under the terms of the GNU General Public License
21 * as published by the Free Software Foundation; either version 2
22 * of the License, or (at your option) any later version.
23 *
24 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
25 * KIND, either express or implied.
26 *
27 ****************************************************************************/
28
29#include "config.h"
30/* Codecs should not normally do this, but we need to check a macro, and
31 * codecs.h would confuse the assembler. */
32
33.macro ARM_DIV_BODY dividend, divisor, result, curbit
34
35 mov \result, \dividend
36 mov \curbit, #90 @ 3 * 30, (calculating branch dest)
37 cmp \divisor, \result, lsr #16
38 movls \result,\result, lsr #16
39 subls \curbit, \curbit, #48
40 cmp \divisor, \result, lsr #8
41 movls \result,\result, lsr #8
42 subls \curbit, \curbit, #24
43 cmp \divisor, \result, lsr #4
44 movls \result,\result, lsr #4
45 subls \curbit, \curbit, #12
46 cmp \divisor, \result, lsr #2
47 subls \curbit, \curbit, #6
48 @ calculation is only done down to shift=2, because the shift=1 step
49 @ would need 3 more cycles, but would only gain 1.5 cycles on average
50 mov \result, #0
51 add pc, pc, \curbit, lsl #2
52 nop
53 .set shift, 32
54 .rept 32
55 .set shift, shift - 1
56 cmp \dividend, \divisor, lsl #shift
57 adc \result, \result, \result
58 subcs \dividend, \dividend, \divisor, lsl #shift
59 .endr
60.endm
61
62.macro ARM_DIV2_ORDER divisor, order
63
64 cmp \divisor, #(1 << 16)
65 movhs \divisor, \divisor, lsr #16
66 movhs \order, #16
67 movlo \order, #0
68
69 cmp \divisor, #(1 << 8)
70 movhs \divisor, \divisor, lsr #8
71 addhs \order, \order, #8
72
73 cmp \divisor, #(1 << 4)
74 movhs \divisor, \divisor, lsr #4
75 addhs \order, \order, #4
76
77 cmp \divisor, #(1 << 2)
78 addhi \order, \order, #3
79 addls \order, \order, \divisor, lsr #1
80.endm
81
82
83#ifdef USE_IRAM
84 .section .icode,"ax",%progbits
85#else
86 .text
87#endif
88 .align
89 .global udiv32_arm
90 .type udiv32_arm,%function
91
92udiv32_arm:
93 subs r2, r1, #1
94 bxeq lr
95 bcc 20f
96 cmp r0, r1
97 bls 10f
98 tst r1, r2
99 beq 30f
100
101 ARM_DIV_BODY r0, r1, r2, r3
102 mov r0, r2
103 bx lr
104
10510:
106 moveq r0, #1
10720:
108 movne r0, #0
109 bx lr
110
11130:
112 ARM_DIV2_ORDER r1, r2
113 mov r0, r0, lsr r2
114 bx lr