summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNils Wallménius <nils@rockbox.org>2010-06-17 15:24:23 +0000
committerNils Wallménius <nils@rockbox.org>2010-06-17 15:24:23 +0000
commit7afea915602cf5c172d0ee2ca099f248c780b8d9 (patch)
treee73a24165d074dbd3adc8c6a890c0e327d63fde8
parent231c26f8f4a40c07c2e12d779236c73bebb8411f (diff)
downloadrockbox-7afea915602cf5c172d0ee2ca099f248c780b8d9.tar.gz
rockbox-7afea915602cf5c172d0ee2ca099f248c780b8d9.zip
ARMv6 versions of X(N)PROD31 macros and MULT32 macro. Saves about 1MHz or 3% decoding vorbis on gigabeat S.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@26889 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/lib/asm_arm.h142
1 files changed, 86 insertions, 56 deletions
diff --git a/apps/codecs/lib/asm_arm.h b/apps/codecs/lib/asm_arm.h
index 627f4afd78..c0f9440450 100644
--- a/apps/codecs/lib/asm_arm.h
+++ b/apps/codecs/lib/asm_arm.h
@@ -19,20 +19,30 @@
19#if !defined(_V_WIDE_MATH) && !defined(_LOW_ACCURACY_) 19#if !defined(_V_WIDE_MATH) && !defined(_LOW_ACCURACY_)
20#define _V_WIDE_MATH 20#define _V_WIDE_MATH
21 21
22#if ARM_ARCH >= 6
22static inline int32_t MULT32(int32_t x, int32_t y) { 23static inline int32_t MULT32(int32_t x, int32_t y) {
23 int lo,hi; 24 int32_t hi;
24 asm volatile("smull\t%0, %1, %2, %3" 25 asm volatile("smmul %[hi], %[x], %[y] \n\t"
26 : [hi] "=&r" (hi)
27 : [x] "r" (x), [y] "r" (y) );
28 return(hi);
29}
30#else
31static inline int32_t MULT32(int32_t x, int32_t y) {
32 int32_t lo, hi;
33 asm volatile("smull\t%0, %1, %2, %3 \n\t"
25 : "=&r"(lo),"=&r"(hi) 34 : "=&r"(lo),"=&r"(hi)
26 : "r"(x),"r"(y) ); 35 : "r"(x),"r"(y) );
27 return(hi); 36 return(hi);
28} 37}
38#endif
29 39
30static inline int32_t MULT31(int32_t x, int32_t y) { 40static inline int32_t MULT31(int32_t x, int32_t y) {
31 return MULT32(x,y)<<1; 41 return MULT32(x,y)<<1;
32} 42}
33 43
34static inline int32_t MULT31_SHIFT15(int32_t x, int32_t y) { 44static inline int32_t MULT31_SHIFT15(int32_t x, int32_t y) {
35 int lo,hi; 45 int32_t lo,hi;
36 asm volatile("smull %0, %1, %2, %3\n\t" 46 asm volatile("smull %0, %1, %2, %3\n\t"
37 "movs %0, %0, lsr #15\n\t" 47 "movs %0, %0, lsr #15\n\t"
38 "adc %1, %0, %1, lsl #17\n\t" 48 "adc %1, %0, %1, lsl #17\n\t"
@@ -44,75 +54,95 @@ static inline int32_t MULT31_SHIFT15(int32_t x, int32_t y) {
44 54
45#define XPROD32(a, b, t, v, x, y) \ 55#define XPROD32(a, b, t, v, x, y) \
46{ \ 56{ \
47 long l; \ 57 int32_t l; \
48 asm( "smull %0, %1, %3, %5\n\t" \ 58 asm("smull %0, %1, %3, %5\n\t" \
49 "rsb %2, %6, #0\n\t" \ 59 "rsb %2, %6, #0\n\t" \
50 "smlal %0, %1, %4, %6\n\t" \ 60 "smlal %0, %1, %4, %6\n\t" \
51 "smull %0, %2, %3, %2\n\t" \ 61 "smull %0, %2, %3, %2\n\t" \
52 "smlal %0, %2, %4, %5" \ 62 "smlal %0, %2, %4, %5" \
53 : "=&r" (l), "=&r" (x), "=&r" (y) \ 63 : "=&r" (l), "=&r" (x), "=&r" (y) \
54 : "r" ((a)), "r" ((b)), "r" ((t)), "r" ((v)) ); \ 64 : "r" ((a)), "r" ((b)), "r" ((t)), "r" ((v)) ); \
55} 65}
56 66
57static inline void XPROD31(int32_t a, int32_t b, 67#if ARM_ARCH >= 6
58 int32_t t, int32_t v, 68/* These may yield slightly different result from the macros below
59 int32_t *x, int32_t *y) 69 because only the high 32 bits of the multiplications are accumulated while
60{ 70 the below macros use a 64 bit accumulator that is truncated to 32 bits.*/
61 int x1, y1, l; 71#define XPROD31_R(_a, _b, _t, _v, _x, _y)\
62 asm( "smull %0, %1, %3, %5\n\t" 72{\
63 "rsb %2, %6, #0\n\t" 73 int32_t x1, y1;\
64 "smlal %0, %1, %4, %6\n\t" 74 asm("smmul %[x1], %[t], %[a] \n\t"\
65 "smull %0, %2, %3, %2\n\t" 75 "smmul %[y1], %[t], %[b] \n\t"\
66 "smlal %0, %2, %4, %5" 76 "smmla %[x1], %[v], %[b], %[x1] \n\t"\
67 : "=&r" (l), "=&r" (x1), "=&r" (y1) 77 "smmls %[y1], %[v], %[a], %[y1] \n\t"\
68 : "r" (a), "r" (b), "r" (t), "r" (v) ); 78 : [x1] "=&r" (x1), [y1] "=&r" (y1)\
69 *x = x1 << 1; 79 : [a] "r" (_a), [b] "r" (_b), [t] "r" (_t), [v] "r" (_v) );\
70 *y = y1 << 1; 80 _x = x1 << 1;\
81 _y = y1 << 1;\
71} 82}
72 83
73static inline void XNPROD31(int32_t a, int32_t b, 84#define XNPROD31_R(_a, _b, _t, _v, _x, _y)\
74 int32_t t, int32_t v, 85{\
75 int32_t *x, int32_t *y) 86 int32_t x1, y1;\
76{ 87 asm("smmul %[x1], %[t], %[a] \n\t"\
77 int x1, y1, l; 88 "smmul %[y1], %[t], %[b] \n\t"\
78 asm( "smull %0, %1, %3, %5\n\t" 89 "smmls %[x1], %[v], %[b], %[x1] \n\t"\
79 "rsb %2, %4, #0\n\t" 90 "smmla %[y1], %[v], %[a], %[y1] \n\t"\
80 "smlal %0, %1, %2, %6\n\t" 91 : [x1] "=&r" (x1), [y1] "=&r" (y1)\
81 "smull %0, %2, %4, %5\n\t" 92 : [a] "r" (_a), [b] "r" (_b), [t] "r" (_t), [v] "r" (_v) );\
82 "smlal %0, %2, %3, %6" 93 _x = x1 << 1;\
83 : "=&r" (l), "=&r" (x1), "=&r" (y1) 94 _y = y1 << 1;\
84 : "r" (a), "r" (b), "r" (t), "r" (v) );
85 *x = x1 << 1;
86 *y = y1 << 1;
87} 95}
88 96#else
89#define XPROD31_R(_a, _b, _t, _v, _x, _y)\ 97#define XPROD31_R(_a, _b, _t, _v, _x, _y)\
90{\ 98{\
91 int x1, y1, l;\ 99 int32_t x1, y1, l;\
92 asm( "smull %0, %1, %5, %3\n\t"\ 100 asm("smull %0, %1, %5, %3\n\t"\
93 "rsb %2, %3, #0\n\t"\ 101 "rsb %2, %3, #0\n\t"\
94 "smlal %0, %1, %6, %4\n\t"\ 102 "smlal %0, %1, %6, %4\n\t"\
95 "smull %0, %2, %6, %2\n\t"\ 103 "smull %0, %2, %6, %2\n\t"\
96 "smlal %0, %2, %5, %4"\ 104 "smlal %0, %2, %5, %4"\
97 : "=&r" (l), "=&r" (x1), "=&r" (y1)\ 105 : "=&r" (l), "=&r" (x1), "=&r" (y1)\
98 : "r" (_a), "r" (_b), "r" (_t), "r" (_v) );\ 106 : "r" (_a), "r" (_b), "r" (_t), "r" (_v) );\
99 _x = x1 << 1;\ 107 _x = x1 << 1;\
100 _y = y1 << 1;\ 108 _y = y1 << 1;\
101} 109}
102 110
103#define XNPROD31_R(_a, _b, _t, _v, _x, _y)\ 111#define XNPROD31_R(_a, _b, _t, _v, _x, _y)\
104{\ 112{\
105 int x1, y1, l;\ 113 int32_t x1, y1, l;\
106 asm( "smull %0, %1, %5, %3\n\t"\ 114 asm("smull %0, %1, %5, %3\n\t"\
107 "rsb %2, %4, #0\n\t"\ 115 "rsb %2, %4, #0\n\t"\
108 "smlal %0, %1, %6, %2\n\t"\ 116 "smlal %0, %1, %6, %2\n\t"\
109 "smull %0, %2, %5, %4\n\t"\ 117 "smull %0, %2, %5, %4\n\t"\
110 "smlal %0, %2, %6, %3"\ 118 "smlal %0, %2, %6, %3"\
111 : "=&r" (l), "=&r" (x1), "=&r" (y1)\ 119 : "=&r" (l), "=&r" (x1), "=&r" (y1)\
112 : "r" (_a), "r" (_b), "r" (_t), "r" (_v) );\ 120 : "r" (_a), "r" (_b), "r" (_t), "r" (_v) );\
113 _x = x1 << 1;\ 121 _x = x1 << 1;\
114 _y = y1 << 1;\ 122 _y = y1 << 1;\
115} 123}
124#endif
125
126static inline void XPROD31(int32_t a, int32_t b,
127 int32_t t, int32_t v,
128 int32_t *x, int32_t *y)
129{
130 int32_t _x1, _y1;
131 XPROD31_R(a, b, t, v, _x1, _y1);
132 *x = _x1;
133 *y = _y1;
134}
135
136static inline void XNPROD31(int32_t a, int32_t b,
137 int32_t t, int32_t v,
138 int32_t *x, int32_t *y)
139{
140 int32_t _x1, _y1;
141 XNPROD31_R(a, b, t, v, _x1, _y1);
142 *x = _x1;
143 *y = _y1;
144}
145
116 146
117#ifndef _V_VECT_OPS 147#ifndef _V_VECT_OPS
118#define _V_VECT_OPS 148#define _V_VECT_OPS