summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRafaël Carré <rafael.carre@gmail.com>2010-08-03 17:41:34 +0000
committerRafaël Carré <rafael.carre@gmail.com>2010-08-03 17:41:34 +0000
commit2c00cb5e835f99ad17f9bb3a8d7dbbac2b14c05a (patch)
tree656ee2abe40fde6de1fc4224bc1fd4551506d564
parent79d5c0702bb6d7294c306ca65c3f5bade1c2aa51 (diff)
downloadrockbox-2c00cb5e835f99ad17f9bb3a8d7dbbac2b14c05a.tar.gz
rockbox-2c00cb5e835f99ad17f9bb3a8d7dbbac2b14c05a.zip
wma: move inline functions into .h file
use 'static inline' instead of GCC extension 'inline' some GCC don't support this (android NDK for example) git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27679 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/libwma/wmadeci.c6
-rw-r--r--apps/codecs/libwma/wmafixed.c129
-rw-r--r--apps/codecs/libwma/wmafixed.h129
3 files changed, 130 insertions, 134 deletions
diff --git a/apps/codecs/libwma/wmadeci.c b/apps/codecs/libwma/wmadeci.c
index 9e448f4b93..a3edea0e97 100644
--- a/apps/codecs/libwma/wmadeci.c
+++ b/apps/codecs/libwma/wmadeci.c
@@ -30,11 +30,7 @@
30#include "wmadata.h" 30#include "wmadata.h"
31 31
32static void wma_lsp_to_curve_init(WMADecodeContext *s, int frame_len); 32static void wma_lsp_to_curve_init(WMADecodeContext *s, int frame_len);
33inline void vector_fmul_add_add(fixed32 *dst, const fixed32 *data, 33
34 const fixed32 *window, int n);
35inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0,
36 const fixed32 *src1, int len);
37
38/*declarations of statically allocated variables used to remove malloc calls*/ 34/*declarations of statically allocated variables used to remove malloc calls*/
39 35
40fixed32 coefsarray[MAX_CHANNELS][BLOCK_MAX_SIZE] IBSS_ATTR; 36fixed32 coefsarray[MAX_CHANNELS][BLOCK_MAX_SIZE] IBSS_ATTR;
diff --git a/apps/codecs/libwma/wmafixed.c b/apps/codecs/libwma/wmafixed.c
index 205bab4dd9..bdf87a548b 100644
--- a/apps/codecs/libwma/wmafixed.c
+++ b/apps/codecs/libwma/wmafixed.c
@@ -63,135 +63,6 @@ fixed64 Fixed32To64(fixed32 x)
63 return (fixed64)x; 63 return (fixed64)x;
64} 64}
65 65
66
67/*
68 * Helper functions for wma_window.
69 *
70 *
71 */
72
73#ifdef CPU_ARM
74inline void vector_fmul_add_add(fixed32 *dst, const fixed32 *data,
75 const fixed32 *window, int n)
76{
77 /* Block sizes are always power of two */
78 asm volatile (
79 "0:"
80 "ldmia %[d]!, {r0, r1};"
81 "ldmia %[w]!, {r4, r5};"
82 /* consume the first data and window value so we can use those
83 * registers again */
84 "smull r8, r9, r0, r4;"
85 "ldmia %[dst], {r0, r4};"
86 "add r0, r0, r9, lsl #1;" /* *dst=*dst+(r9<<1)*/
87 "smull r8, r9, r1, r5;"
88 "add r1, r4, r9, lsl #1;"
89 "stmia %[dst]!, {r0, r1};"
90 "subs %[n], %[n], #2;"
91 "bne 0b;"
92 : [d] "+r" (data), [w] "+r" (window), [dst] "+r" (dst), [n] "+r" (n)
93 : : "r0", "r1", "r4", "r5", "r8", "r9", "memory", "cc");
94}
95
96inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const fixed32 *src1,
97 int len)
98{
99 /* Block sizes are always power of two */
100 asm volatile (
101 "add %[s1], %[s1], %[n], lsl #2;"
102 "0:"
103 "ldmia %[s0]!, {r0, r1};"
104 "ldmdb %[s1]!, {r4, r5};"
105 "smull r8, r9, r0, r5;"
106 "mov r0, r9, lsl #1;"
107 "smull r8, r9, r1, r4;"
108 "mov r1, r9, lsl #1;"
109 "stmia %[dst]!, {r0, r1};"
110 "subs %[n], %[n], #2;"
111 "bne 0b;"
112 : [s0] "+r" (src0), [s1] "+r" (src1), [dst] "+r" (dst), [n] "+r" (len)
113 : : "r0", "r1", "r4", "r5", "r8", "r9", "memory", "cc");
114}
115
116#elif defined(CPU_COLDFIRE)
117
118inline void vector_fmul_add_add(fixed32 *dst, const fixed32 *data,
119 const fixed32 *window, int n)
120{
121 /* Block sizes are always power of two. Smallest block is always way bigger
122 * than four too.*/
123 asm volatile (
124 "0:"
125 "movem.l (%[d]), %%d0-%%d3;"
126 "movem.l (%[w]), %%d4-%%d5/%%a0-%%a1;"
127 "mac.l %%d0, %%d4, %%acc0;"
128 "mac.l %%d1, %%d5, %%acc1;"
129 "mac.l %%d2, %%a0, %%acc2;"
130 "mac.l %%d3, %%a1, %%acc3;"
131 "lea.l (16, %[d]), %[d];"
132 "lea.l (16, %[w]), %[w];"
133 "movclr.l %%acc0, %%d0;"
134 "movclr.l %%acc1, %%d1;"
135 "movclr.l %%acc2, %%d2;"
136 "movclr.l %%acc3, %%d3;"
137 "movem.l (%[dst]), %%d4-%%d5/%%a0-%%a1;"
138 "add.l %%d4, %%d0;"
139 "add.l %%d5, %%d1;"
140 "add.l %%a0, %%d2;"
141 "add.l %%a1, %%d3;"
142 "movem.l %%d0-%%d3, (%[dst]);"
143 "lea.l (16, %[dst]), %[dst];"
144 "subq.l #4, %[n];"
145 "jne 0b;"
146 : [d] "+a" (data), [w] "+a" (window), [dst] "+a" (dst), [n] "+d" (n)
147 : : "d0", "d1", "d2", "d3", "d4", "d5", "a0", "a1", "memory", "cc");
148}
149
150inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const fixed32 *src1,
151 int len)
152{
153 /* Block sizes are always power of two. Smallest block is always way bigger
154 * than four too.*/
155 asm volatile (
156 "lea.l (-16, %[s1], %[n]*4), %[s1];"
157 "0:"
158 "movem.l (%[s0]), %%d0-%%d3;"
159 "movem.l (%[s1]), %%d4-%%d5/%%a0-%%a1;"
160 "mac.l %%d0, %%a1, %%acc0;"
161 "mac.l %%d1, %%a0, %%acc1;"
162 "mac.l %%d2, %%d5, %%acc2;"
163 "mac.l %%d3, %%d4, %%acc3;"
164 "lea.l (16, %[s0]), %[s0];"
165 "lea.l (-16, %[s1]), %[s1];"
166 "movclr.l %%acc0, %%d0;"
167 "movclr.l %%acc1, %%d1;"
168 "movclr.l %%acc2, %%d2;"
169 "movclr.l %%acc3, %%d3;"
170 "movem.l %%d0-%%d3, (%[dst]);"
171 "lea.l (16, %[dst]), %[dst];"
172 "subq.l #4, %[n];"
173 "jne 0b;"
174 : [s0] "+a" (src0), [s1] "+a" (src1), [dst] "+a" (dst), [n] "+d" (len)
175 : : "d0", "d1", "d2", "d3", "d4", "d5", "a0", "a1", "memory", "cc");
176}
177
178#else
179
180inline void vector_fmul_add_add(fixed32 *dst, const fixed32 *src0, const fixed32 *src1, int len){
181 int i;
182 for(i=0; i<len; i++)
183 dst[i] = fixmul32b(src0[i], src1[i]) + dst[i];
184}
185
186inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const fixed32 *src1, int len){
187 int i;
188 src1 += len-1;
189 for(i=0; i<len; i++)
190 dst[i] = fixmul32b(src0[i], src1[-i]);
191}
192
193#endif
194
195/* 66/*
196 Not performance senstitive code here 67 Not performance senstitive code here
197 68
diff --git a/apps/codecs/libwma/wmafixed.h b/apps/codecs/libwma/wmafixed.h
index 7f04a955ef..4225f165c9 100644
--- a/apps/codecs/libwma/wmafixed.h
+++ b/apps/codecs/libwma/wmafixed.h
@@ -106,3 +106,132 @@ static inline fixed32 fixmul32(fixed32 x, fixed32 y)
106} 106}
107 107
108#endif 108#endif
109
110
111/*
112 * Helper functions for wma_window.
113 *
114 *
115 */
116
117#ifdef CPU_ARM
118static inline void vector_fmul_add_add(fixed32 *dst, const fixed32 *data,
119 const fixed32 *window, int n)
120{
121 /* Block sizes are always power of two */
122 asm volatile (
123 "0:"
124 "ldmia %[d]!, {r0, r1};"
125 "ldmia %[w]!, {r4, r5};"
126 /* consume the first data and window value so we can use those
127 * registers again */
128 "smull r8, r9, r0, r4;"
129 "ldmia %[dst], {r0, r4};"
130 "add r0, r0, r9, lsl #1;" /* *dst=*dst+(r9<<1)*/
131 "smull r8, r9, r1, r5;"
132 "add r1, r4, r9, lsl #1;"
133 "stmia %[dst]!, {r0, r1};"
134 "subs %[n], %[n], #2;"
135 "bne 0b;"
136 : [d] "+r" (data), [w] "+r" (window), [dst] "+r" (dst), [n] "+r" (n)
137 : : "r0", "r1", "r4", "r5", "r8", "r9", "memory", "cc");
138}
139
140static inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const fixed32 *src1,
141 int len)
142{
143 /* Block sizes are always power of two */
144 asm volatile (
145 "add %[s1], %[s1], %[n], lsl #2;"
146 "0:"
147 "ldmia %[s0]!, {r0, r1};"
148 "ldmdb %[s1]!, {r4, r5};"
149 "smull r8, r9, r0, r5;"
150 "mov r0, r9, lsl #1;"
151 "smull r8, r9, r1, r4;"
152 "mov r1, r9, lsl #1;"
153 "stmia %[dst]!, {r0, r1};"
154 "subs %[n], %[n], #2;"
155 "bne 0b;"
156 : [s0] "+r" (src0), [s1] "+r" (src1), [dst] "+r" (dst), [n] "+r" (len)
157 : : "r0", "r1", "r4", "r5", "r8", "r9", "memory", "cc");
158}
159
160#elif defined(CPU_COLDFIRE)
161
162static inline void vector_fmul_add_add(fixed32 *dst, const fixed32 *data,
163 const fixed32 *window, int n)
164{
165 /* Block sizes are always power of two. Smallest block is always way bigger
166 * than four too.*/
167 asm volatile (
168 "0:"
169 "movem.l (%[d]), %%d0-%%d3;"
170 "movem.l (%[w]), %%d4-%%d5/%%a0-%%a1;"
171 "mac.l %%d0, %%d4, %%acc0;"
172 "mac.l %%d1, %%d5, %%acc1;"
173 "mac.l %%d2, %%a0, %%acc2;"
174 "mac.l %%d3, %%a1, %%acc3;"
175 "lea.l (16, %[d]), %[d];"
176 "lea.l (16, %[w]), %[w];"
177 "movclr.l %%acc0, %%d0;"
178 "movclr.l %%acc1, %%d1;"
179 "movclr.l %%acc2, %%d2;"
180 "movclr.l %%acc3, %%d3;"
181 "movem.l (%[dst]), %%d4-%%d5/%%a0-%%a1;"
182 "add.l %%d4, %%d0;"
183 "add.l %%d5, %%d1;"
184 "add.l %%a0, %%d2;"
185 "add.l %%a1, %%d3;"
186 "movem.l %%d0-%%d3, (%[dst]);"
187 "lea.l (16, %[dst]), %[dst];"
188 "subq.l #4, %[n];"
189 "jne 0b;"
190 : [d] "+a" (data), [w] "+a" (window), [dst] "+a" (dst), [n] "+d" (n)
191 : : "d0", "d1", "d2", "d3", "d4", "d5", "a0", "a1", "memory", "cc");
192}
193
194static inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const fixed32 *src1,
195 int len)
196{
197 /* Block sizes are always power of two. Smallest block is always way bigger
198 * than four too.*/
199 asm volatile (
200 "lea.l (-16, %[s1], %[n]*4), %[s1];"
201 "0:"
202 "movem.l (%[s0]), %%d0-%%d3;"
203 "movem.l (%[s1]), %%d4-%%d5/%%a0-%%a1;"
204 "mac.l %%d0, %%a1, %%acc0;"
205 "mac.l %%d1, %%a0, %%acc1;"
206 "mac.l %%d2, %%d5, %%acc2;"
207 "mac.l %%d3, %%d4, %%acc3;"
208 "lea.l (16, %[s0]), %[s0];"
209 "lea.l (-16, %[s1]), %[s1];"
210 "movclr.l %%acc0, %%d0;"
211 "movclr.l %%acc1, %%d1;"
212 "movclr.l %%acc2, %%d2;"
213 "movclr.l %%acc3, %%d3;"
214 "movem.l %%d0-%%d3, (%[dst]);"
215 "lea.l (16, %[dst]), %[dst];"
216 "subq.l #4, %[n];"
217 "jne 0b;"
218 : [s0] "+a" (src0), [s1] "+a" (src1), [dst] "+a" (dst), [n] "+d" (len)
219 : : "d0", "d1", "d2", "d3", "d4", "d5", "a0", "a1", "memory", "cc");
220}
221
222#else
223
224static inline void vector_fmul_add_add(fixed32 *dst, const fixed32 *src0, const fixed32 *src1, int len){
225 int i;
226 for(i=0; i<len; i++)
227 dst[i] = fixmul32b(src0[i], src1[i]) + dst[i];
228}
229
230static inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const fixed32 *src1, int len){
231 int i;
232 src1 += len-1;
233 for(i=0; i<len; i++)
234 dst[i] = fixmul32b(src0[i], src1[-i]);
235}
236
237#endif