summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNils Wallménius <nils@rockbox.org>2011-09-21 15:38:54 +0000
committerNils Wallménius <nils@rockbox.org>2011-09-21 15:38:54 +0000
commit151424a6fd0ba1f6e99b2c61bc85a068f4676cd4 (patch)
tree4388e7e884d87e8f2291f00c96e7c151ecd1453d
parentccaf55a8ae707951d9e91a799bfa05970df829c9 (diff)
downloadrockbox-151424a6fd0ba1f6e99b2c61bc85a068f4676cd4.tar.gz
rockbox-151424a6fd0ba1f6e99b2c61bc85a068f4676cd4.zip
codeclib/libtremor: Clean up duplication of inline funcs, constify the ones in codeclib and copy over the slightly faster MULT31_SHIFT15 from tremor for cf.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@30578 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/lib/asm_arm.h8
-rw-r--r--apps/codecs/lib/asm_mcf5249.h12
-rw-r--r--apps/codecs/lib/codeclib_misc.h8
-rw-r--r--apps/codecs/libtremor/asm_arm.h44
-rw-r--r--apps/codecs/libtremor/asm_mcf5249.h217
-rw-r--r--apps/codecs/libtremor/misc.h52
-rw-r--r--apps/codecs/libtremor/os_types.h9
-rw-r--r--apps/codecs/libtremor/sharedbook.c4
-rw-r--r--apps/codecs/libtremor/window.c4
9 files changed, 84 insertions, 274 deletions
diff --git a/apps/codecs/lib/asm_arm.h b/apps/codecs/lib/asm_arm.h
index 54ce4b0d98..8e5d0e68df 100644
--- a/apps/codecs/lib/asm_arm.h
+++ b/apps/codecs/lib/asm_arm.h
@@ -166,7 +166,7 @@ static inline void XNPROD31(int32_t a, int32_t b,
166 166
167/* asm versions of vector operations for block.c, window.c */ 167/* asm versions of vector operations for block.c, window.c */
168static inline 168static inline
169void vect_add(int32_t *x, int32_t *y, int n) 169void vect_add(int32_t *x, const int32_t *y, int n)
170{ 170{
171 while (n>=4) { 171 while (n>=4) {
172 asm volatile ("ldmia %[x], {r0, r1, r2, r3};" 172 asm volatile ("ldmia %[x], {r0, r1, r2, r3};"
@@ -190,7 +190,7 @@ void vect_add(int32_t *x, int32_t *y, int n)
190} 190}
191 191
192static inline 192static inline
193void vect_copy(int32_t *x, int32_t *y, int n) 193void vect_copy(int32_t *x, const int32_t *y, int n)
194{ 194{
195 while (n>=4) { 195 while (n>=4) {
196 asm volatile ("ldmia %[y]!, {r0, r1, r2, r3};" 196 asm volatile ("ldmia %[y]!, {r0, r1, r2, r3};"
@@ -208,7 +208,7 @@ void vect_copy(int32_t *x, int32_t *y, int n)
208} 208}
209 209
210static inline 210static inline
211void vect_mult_fw(int32_t *data, int32_t *window, int n) 211void vect_mult_fw(int32_t *data, const int32_t *window, int n)
212{ 212{
213 while (n>=4) { 213 while (n>=4) {
214 asm volatile ("ldmia %[d], {r0, r1, r2, r3};" 214 asm volatile ("ldmia %[d], {r0, r1, r2, r3};"
@@ -237,7 +237,7 @@ void vect_mult_fw(int32_t *data, int32_t *window, int n)
237} 237}
238 238
239static inline 239static inline
240void vect_mult_bw(int32_t *data, int32_t *window, int n) 240void vect_mult_bw(int32_t *data, const int32_t *window, int n)
241{ 241{
242 while (n>=4) { 242 while (n>=4) {
243 asm volatile ("ldmia %[d], {r0, r1, r2, r3};" 243 asm volatile ("ldmia %[d], {r0, r1, r2, r3};"
diff --git a/apps/codecs/lib/asm_mcf5249.h b/apps/codecs/lib/asm_mcf5249.h
index 88d439631d..841c413a94 100644
--- a/apps/codecs/lib/asm_mcf5249.h
+++ b/apps/codecs/lib/asm_mcf5249.h
@@ -44,17 +44,17 @@ static inline int32_t MULT31(int32_t x, int32_t y) {
44} 44}
45 45
46#define INCL_OPTIMIZED_MULT31_SHIFT15 46#define INCL_OPTIMIZED_MULT31_SHIFT15
47/* NOTE: this requires that the emac is *NOT* rounding */
47static inline int32_t MULT31_SHIFT15(int32_t x, int32_t y) { 48static inline int32_t MULT31_SHIFT15(int32_t x, int32_t y) {
48 int32_t r; 49 int32_t r;
49 50
50 asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply */ 51 asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply */
51 "mulu.l %[y], %[x];" /* get lower half, avoid emac stall */ 52 "mulu.l %[y], %[x];" /* get lower half, avoid emac stall */
52 "movclr.l %%acc0, %[r];" /* get higher half */ 53 "movclr.l %%acc0, %[r];" /* get higher half */
53 "asl.l #8, %[r];" /* hi<<16, plus one free */ 54 "swap %[r];" /* hi<<16, plus one free */
54 "asl.l #8, %[r];"
55 "lsr.l #8, %[x];" /* (unsigned)lo >> 15 */ 55 "lsr.l #8, %[x];" /* (unsigned)lo >> 15 */
56 "lsr.l #7, %[x];" 56 "lsr.l #7, %[x];"
57 "or.l %[x], %[r];" /* logical-or results */ 57 "move.w %[x], %[r];" /* logical-or results */
58 : [r] "=&d" (r), [x] "+d" (x) 58 : [r] "=&d" (r), [x] "+d" (x)
59 : [y] "d" (y) 59 : [y] "d" (y)
60 : "cc"); 60 : "cc");
@@ -202,7 +202,7 @@ void vect_add(int32_t *x, const int32_t *y, int n)
202} 202}
203 203
204static inline 204static inline
205void vect_copy(int32_t *x, int32_t *y, int n) 205void vect_copy(int32_t *x, const int32_t *y, int n)
206{ 206{
207 /* align to 16 bytes */ 207 /* align to 16 bytes */
208 while(n>0 && (int)x&15) { 208 while(n>0 && (int)x&15) {
@@ -228,7 +228,7 @@ void vect_copy(int32_t *x, int32_t *y, int n)
228} 228}
229 229
230static inline 230static inline
231void vect_mult_fw(int32_t *data, int32_t *window, int n) 231void vect_mult_fw(int32_t *data, const int32_t *window, int n)
232{ 232{
233 /* ensure data is aligned to 16-bytes */ 233 /* ensure data is aligned to 16-bytes */
234 while(n>0 && (int)data&15) { 234 while(n>0 && (int)data&15) {
@@ -282,7 +282,7 @@ void vect_mult_fw(int32_t *data, int32_t *window, int n)
282} 282}
283 283
284static inline 284static inline
285void vect_mult_bw(int32_t *data, int32_t *window, int n) 285void vect_mult_bw(int32_t *data, const int32_t *window, int n)
286{ 286{
287 /* ensure at least data is aligned to 16-bytes */ 287 /* ensure at least data is aligned to 16-bytes */
288 while(n>0 && (int)data&15) { 288 while(n>0 && (int)data&15) {
diff --git a/apps/codecs/lib/codeclib_misc.h b/apps/codecs/lib/codeclib_misc.h
index 08be93716f..8ebe22e37b 100644
--- a/apps/codecs/lib/codeclib_misc.h
+++ b/apps/codecs/lib/codeclib_misc.h
@@ -187,7 +187,7 @@ static inline void XNPROD31(int32_t a, int32_t b,
187#define _V_VECT_OPS 187#define _V_VECT_OPS
188 188
189static inline 189static inline
190void vect_add(int32_t *x, int32_t *y, int n) 190void vect_add(int32_t *x, const int32_t *y, int n)
191{ 191{
192 while (n>0) { 192 while (n>0) {
193 *x++ += *y++; 193 *x++ += *y++;
@@ -196,7 +196,7 @@ void vect_add(int32_t *x, int32_t *y, int n)
196} 196}
197 197
198static inline 198static inline
199void vect_copy(int32_t *x, int32_t *y, int n) 199void vect_copy(int32_t *x, const int32_t *y, int n)
200{ 200{
201 while (n>0) { 201 while (n>0) {
202 *x++ = *y++; 202 *x++ = *y++;
@@ -205,7 +205,7 @@ void vect_copy(int32_t *x, int32_t *y, int n)
205} 205}
206 206
207static inline 207static inline
208void vect_mult_fw(int32_t *data, int32_t *window, int n) 208void vect_mult_fw(int32_t *data, const int32_t *window, int n)
209{ 209{
210 while(n>0) { 210 while(n>0) {
211 *data = MULT31(*data, *window); 211 *data = MULT31(*data, *window);
@@ -216,7 +216,7 @@ void vect_mult_fw(int32_t *data, int32_t *window, int n)
216} 216}
217 217
218static inline 218static inline
219void vect_mult_bw(int32_t *data, int32_t *window, int n) 219void vect_mult_bw(int32_t *data, const int32_t *window, int n)
220{ 220{
221 while(n>0) { 221 while(n>0) {
222 *data = MULT31(*data, *window); 222 *data = MULT31(*data, *window);
diff --git a/apps/codecs/libtremor/asm_arm.h b/apps/codecs/libtremor/asm_arm.h
index 99790ea4b3..eb0d3ca789 100644
--- a/apps/codecs/libtremor/asm_arm.h
+++ b/apps/codecs/libtremor/asm_arm.h
@@ -19,33 +19,8 @@
19 19
20#if !defined(_V_WIDE_MATH) && !defined(_LOW_ACCURACY_) 20#if !defined(_V_WIDE_MATH) && !defined(_LOW_ACCURACY_)
21#define _V_WIDE_MATH 21#define _V_WIDE_MATH
22 22#ifndef _TREMOR_VECT_OPS
23static inline ogg_int32_t MULT32(ogg_int32_t x, ogg_int32_t y) { 23#define _TREMOR_VECT_OPS
24 int lo,hi;
25 asm volatile("smull\t%0, %1, %2, %3"
26 : "=&r"(lo),"=&r"(hi)
27 : "%r"(x),"r"(y) );
28 return(hi);
29}
30
31static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) {
32 return MULT32(x,y)<<1;
33}
34
35static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) {
36 int lo,hi;
37 asm volatile("smull %0, %1, %2, %3\n\t"
38 "movs %0, %0, lsr #15\n\t"
39 "adc %1, %0, %1, lsl #17\n\t"
40 : "=&r"(lo),"=&r"(hi)
41 : "%r"(x),"r"(y)
42 : "cc");
43 return(hi);
44}
45
46#ifndef _V_VECT_OPS
47#define _V_VECT_OPS
48
49/* asm versions of vector operations for block.c, window.c */ 24/* asm versions of vector operations for block.c, window.c */
50/* SOME IMPORTANT NOTES: this implementation of vect_mult_bw does 25/* SOME IMPORTANT NOTES: this implementation of vect_mult_bw does
51 NOT do a final shift, meaning that the result of vect_mult_bw is 26 NOT do a final shift, meaning that the result of vect_mult_bw is
@@ -114,7 +89,7 @@ void vect_add_left_right(ogg_int32_t *x, const ogg_int32_t *y, int n)
114 89
115#if ARM_ARCH >= 6 90#if ARM_ARCH >= 6
116static inline 91static inline
117void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n) 92void ogg_vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
118{ 93{
119 /* Note, mult_fw uses MULT31 */ 94 /* Note, mult_fw uses MULT31 */
120 do{ 95 do{
@@ -139,7 +114,7 @@ void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
139} 114}
140#else 115#else
141static inline 116static inline
142void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n) 117void ogg_vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
143{ 118{
144 /* Note, mult_fw uses MULT31 */ 119 /* Note, mult_fw uses MULT31 */
145 do{ 120 do{
@@ -166,7 +141,7 @@ void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
166 141
167#if ARM_ARCH >= 6 142#if ARM_ARCH >= 6
168static inline 143static inline
169void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n) 144void ogg_vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
170{ 145{
171 /* NOTE mult_bw uses MULT_32 i.e. doesn't shift result left at end */ 146 /* NOTE mult_bw uses MULT_32 i.e. doesn't shift result left at end */
172 /* On ARM, we can do the shift at the same time as the overlap-add */ 147 /* On ARM, we can do the shift at the same time as the overlap-add */
@@ -187,7 +162,7 @@ void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
187} 162}
188#else 163#else
189static inline 164static inline
190void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n) 165void ogg_vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
191{ 166{
192 /* NOTE mult_bw uses MULT_32 i.e. doesn't shift result left at end */ 167 /* NOTE mult_bw uses MULT_32 i.e. doesn't shift result left at end */
193 /* On ARM, we can do the shift at the same time as the overlap-add */ 168 /* On ARM, we can do the shift at the same time as the overlap-add */
@@ -207,14 +182,7 @@ void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
207 } while (n); 182 } while (n);
208} 183}
209#endif 184#endif
210
211static inline void vect_copy(ogg_int32_t *x, const ogg_int32_t *y, int n)
212{
213 memcpy(x,y,n*sizeof(ogg_int32_t));
214}
215
216#endif 185#endif
217
218#endif 186#endif
219 187
220#ifndef _V_LSP_MATH_ASM 188#ifndef _V_LSP_MATH_ASM
diff --git a/apps/codecs/libtremor/asm_mcf5249.h b/apps/codecs/libtremor/asm_mcf5249.h
index 3266113771..3e7d46983e 100644
--- a/apps/codecs/libtremor/asm_mcf5249.h
+++ b/apps/codecs/libtremor/asm_mcf5249.h
@@ -28,226 +28,37 @@
28 28
29#define MB() 29#define MB()
30 30
31static inline ogg_int32_t MULT32(ogg_int32_t x, ogg_int32_t y) { 31#ifndef _TREMOR_VECT_OPS
32 32#define _TREMOR_VECT_OPS
33 asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply & shift */
34 "movclr.l %%acc0, %[x];" /* move & clear acc */
35 "asr.l #1, %[x];" /* no overflow test */
36 : [x] "+&d" (x)
37 : [y] "r" (y)
38 : "cc");
39 return x;
40}
41
42static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) {
43
44 asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply */
45 "movclr.l %%acc0, %[x];" /* move and clear */
46 : [x] "+&r" (x)
47 : [y] "r" (y)
48 : "cc");
49 return x;
50}
51
52
53static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) {
54 ogg_int32_t r;
55
56 asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply */
57 "mulu.l %[y], %[x];" /* get lower half, avoid emac stall */
58 "movclr.l %%acc0, %[r];" /* get higher half */
59 "swap %[r];" /* hi<<16, plus one free */
60 "lsr.l #8, %[x];" /* (unsigned)lo >> 15 */
61 "lsr.l #7, %[x];"
62 "move.w %[x], %[r];" /* logical-or results */
63 : [r] "=&d" (r), [x] "+d" (x)
64 : [y] "d" (y)
65 : "cc");
66 return r;
67}
68
69#ifndef _V_VECT_OPS
70#define _V_VECT_OPS
71
72/* asm versions of vector operations for block.c, window.c */
73/* assumes MAC is initialized & accumulators cleared */
74static inline
75void vect_add_right_left(ogg_int32_t *x, const ogg_int32_t *y, int n)
76{
77 /* align to 16 bytes */
78 while(n>0 && (int)x&15) {
79 *x++ += *y++;
80 n--;
81 }
82 asm volatile ("bra 1f;"
83 "0:" /* loop start */
84 "movem.l (%[x]), %%d0-%%d3;" /* fetch values */
85 "movem.l (%[y]), %%a0-%%a3;"
86 /* add */
87 "add.l %%a0, %%d0;"
88 "add.l %%a1, %%d1;"
89 "add.l %%a2, %%d2;"
90 "add.l %%a3, %%d3;"
91 /* store and advance */
92 "movem.l %%d0-%%d3, (%[x]);"
93 "lea.l (4*4, %[x]), %[x];"
94 "lea.l (4*4, %[y]), %[y];"
95 "subq.l #4, %[n];" /* done 4 elements */
96 "1: cmpi.l #4, %[n];"
97 "bge 0b;"
98 : [n] "+d" (n), [x] "+a" (x), [y] "+a" (y)
99 : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
100 "cc", "memory");
101 /* add final elements */
102 while (n>0) {
103 *x++ += *y++;
104 n--;
105 }
106}
107static inline 33static inline
108void vect_add_left_right(ogg_int32_t *x, const ogg_int32_t *y, int n) 34void vect_add_left_right(ogg_int32_t *x, const ogg_int32_t *y, int n)
109{ 35{
110 /* coldfire asm has symmetrical versions of vect_add_right_left 36 /* coldfire asm has symmetrical versions of vect_add_right_left
111 and vect_add_left_right (since symmetrical versions of 37 and vect_add_left_right (since symmetrical versions of
112 vect_mult_fw and vect_mult_bw i.e. both use MULT31) */ 38 vect_mult_fw and vect_mult_bw i.e. both use MULT31) */
113 vect_add_right_left(x, y, n ); 39 vect_add(x, y, n );
114} 40}
115 41
116static inline 42static inline
117void vect_copy(ogg_int32_t *x, const ogg_int32_t *y, int n) 43void vect_add_right_left(ogg_int32_t *x, const ogg_int32_t *y, int n)
118{ 44{
119 /* align to 16 bytes */ 45 /* coldfire asm has symmetrical versions of vect_add_right_left
120 while(n>0 && (int)x&15) { 46 and vect_add_left_right (since symmetrical versions of
121 *x++ = *y++; 47 vect_mult_fw and vect_mult_bw i.e. both use MULT31) */
122 n--; 48 vect_add(x, y, n );
123 }
124 asm volatile ("bra 1f;"
125 "0:" /* loop start */
126 "movem.l (%[y]), %%d0-%%d3;" /* fetch values */
127 "movem.l %%d0-%%d3, (%[x]);" /* store */
128 "lea.l (4*4, %[x]), %[x];" /* advance */
129 "lea.l (4*4, %[y]), %[y];"
130 "subq.l #4, %[n];" /* done 4 elements */
131 "1: cmpi.l #4, %[n];"
132 "bge 0b;"
133 : [n] "+d" (n), [x] "+a" (x), [y] "+a" (y)
134 : : "%d0", "%d1", "%d2", "%d3", "cc", "memory");
135 /* copy final elements */
136 while (n>0) {
137 *x++ = *y++;
138 n--;
139 }
140} 49}
141 50
142static inline 51static inline
143void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n) 52void ogg_vect_mult_fw(int32_t *data, const int32_t *window, int n)
144{ 53{
145 /* ensure data is aligned to 16-bytes */ 54 vect_mult_fw(data, window, n);
146 while(n>0 && (int)data&15) {
147 *data = MULT31(*data, *window);
148 data++;
149 window++;
150 n--;
151 }
152 asm volatile ("movem.l (%[d]), %%d0-%%d3;" /* loop start */
153 "movem.l (%[w]), %%a0-%%a3;" /* pre-fetch registers */
154 "lea.l (4*4, %[w]), %[w];"
155 "bra 1f;" /* jump to loop condition */
156 "0:" /* loop body */
157 /* multiply and load next window values */
158 "mac.l %%d0, %%a0, (%[w])+, %%a0, %%acc0;"
159 "mac.l %%d1, %%a1, (%[w])+, %%a1, %%acc1;"
160 "mac.l %%d2, %%a2, (%[w])+, %%a2, %%acc2;"
161 "mac.l %%d3, %%a3, (%[w])+, %%a3, %%acc3;"
162 "movclr.l %%acc0, %%d0;" /* get the products */
163 "movclr.l %%acc1, %%d1;"
164 "movclr.l %%acc2, %%d2;"
165 "movclr.l %%acc3, %%d3;"
166 /* store and advance */
167 "movem.l %%d0-%%d3, (%[d]);"
168 "lea.l (4*4, %[d]), %[d];"
169 "movem.l (%[d]), %%d0-%%d3;"
170 "subq.l #4, %[n];" /* done 4 elements */
171 "1: cmpi.l #4, %[n];"
172 "bge 0b;"
173 /* multiply final elements */
174 "tst.l %[n];"
175 "beq 1f;" /* n=0 */
176 "mac.l %%d0, %%a0, %%acc0;"
177 "movclr.l %%acc0, %%d0;"
178 "move.l %%d0, (%[d])+;"
179 "subq.l #1, %[n];"
180 "beq 1f;" /* n=1 */
181 "mac.l %%d1, %%a1, %%acc0;"
182 "movclr.l %%acc0, %%d1;"
183 "move.l %%d1, (%[d])+;"
184 "subq.l #1, %[n];"
185 "beq 1f;" /* n=2 */
186 /* otherwise n = 3 */
187 "mac.l %%d2, %%a2, %%acc0;"
188 "movclr.l %%acc0, %%d2;"
189 "move.l %%d2, (%[d])+;"
190 "1:"
191 : [n] "+d" (n), [d] "+a" (data), [w] "+a" (window)
192 : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
193 "cc", "memory");
194} 55}
195 56
196static inline 57static inline
197void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n) 58void ogg_vect_mult_bw(int32_t *data, const int32_t *window, int n)
198{ 59{
199 /* ensure at least data is aligned to 16-bytes */ 60 vect_mult_bw(data, window, n);
200 while(n>0 && (int)data&15) {
201 *data = MULT31(*data, *window);
202 data++;
203 window--;
204 n--;
205 }
206 asm volatile ("lea.l (-3*4, %[w]), %[w];" /* loop start */
207 "movem.l (%[d]), %%d0-%%d3;" /* pre-fetch registers */
208 "movem.l (%[w]), %%a0-%%a3;"
209 "bra 1f;" /* jump to loop condition */
210 "0:" /* loop body */
211 /* multiply and load next window value */
212 "mac.l %%d0, %%a3, -(%[w]), %%a3, %%acc0;"
213 "mac.l %%d1, %%a2, -(%[w]), %%a2, %%acc1;"
214 "mac.l %%d2, %%a1, -(%[w]), %%a1, %%acc2;"
215 "mac.l %%d3, %%a0, -(%[w]), %%a0, %%acc3;"
216 "movclr.l %%acc0, %%d0;" /* get the products */
217 "movclr.l %%acc1, %%d1;"
218 "movclr.l %%acc2, %%d2;"
219 "movclr.l %%acc3, %%d3;"
220 /* store and advance */
221 "movem.l %%d0-%%d3, (%[d]);"
222 "lea.l (4*4, %[d]), %[d];"
223 "movem.l (%[d]), %%d0-%%d3;"
224 "subq.l #4, %[n];" /* done 4 elements */
225 "1: cmpi.l #4, %[n];"
226 "bge 0b;"
227 /* multiply final elements */
228 "tst.l %[n];"
229 "beq 1f;" /* n=0 */
230 "mac.l %%d0, %%a3, %%acc0;"
231 "movclr.l %%acc0, %%d0;"
232 "move.l %%d0, (%[d])+;"
233 "subq.l #1, %[n];"
234 "beq 1f;" /* n=1 */
235 "mac.l %%d1, %%a2, %%acc0;"
236 "movclr.l %%acc0, %%d1;"
237 "move.l %%d1, (%[d])+;"
238 "subq.l #1, %[n];"
239 "beq 1f;" /* n=2 */
240 /* otherwise n = 3 */
241 "mac.l %%d2, %%a1, %%acc0;"
242 "movclr.l %%acc0, %%d2;"
243 "move.l %%d2, (%[d])+;"
244 "1:"
245 : [n] "+d" (n), [d] "+a" (data), [w] "+a" (window)
246 : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
247 "cc", "memory");
248} 61}
249
250#endif 62#endif
251
252#endif 63#endif
253#endif 64#endif
diff --git a/apps/codecs/libtremor/misc.h b/apps/codecs/libtremor/misc.h
index 39f67057ab..0b0ff4d3a7 100644
--- a/apps/codecs/libtremor/misc.h
+++ b/apps/codecs/libtremor/misc.h
@@ -22,6 +22,8 @@
22#include "ivorbiscodec.h" 22#include "ivorbiscodec.h"
23#include "os_types.h" 23#include "os_types.h"
24 24
25#include "codeclib_misc.h"
26
25#include "asm_arm.h" 27#include "asm_arm.h"
26#include "asm_mcf5249.h" 28#include "asm_mcf5249.h"
27 29
@@ -37,7 +39,7 @@ extern int _ilog(unsigned int v);
37#ifndef _LOW_ACCURACY_ 39#ifndef _LOW_ACCURACY_
38/* 64 bit multiply */ 40/* 64 bit multiply */
39/* #include <sys/types.h> */ 41/* #include <sys/types.h> */
40 42#if 0
41#if BYTE_ORDER==LITTLE_ENDIAN 43#if BYTE_ORDER==LITTLE_ENDIAN
42union magic { 44union magic {
43 struct { 45 struct {
@@ -70,7 +72,7 @@ static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) {
70 magic.whole = (ogg_int64_t)x * y; 72 magic.whole = (ogg_int64_t)x * y;
71 return ((ogg_uint32_t)(magic.halves.lo)>>15) | ((magic.halves.hi)<<17); 73 return ((ogg_uint32_t)(magic.halves.lo)>>15) | ((magic.halves.hi)<<17);
72} 74}
73 75#endif
74#else 76#else
75/* 32 bit multiply, more portable but less accurate */ 77/* 32 bit multiply, more portable but less accurate */
76 78
@@ -119,10 +121,11 @@ static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) {
119 121
120/* replaced XPROD32 with a macro to avoid memory reference 122/* replaced XPROD32 with a macro to avoid memory reference
121 _x, _y are the results (must be l-values) */ 123 _x, _y are the results (must be l-values) */
124/*
122#define XPROD32(_a, _b, _t, _v, _x, _y) \ 125#define XPROD32(_a, _b, _t, _v, _x, _y) \
123 { (_x)=MULT32(_a,_t)+MULT32(_b,_v); \ 126 { (_x)=MULT32(_a,_t)+MULT32(_b,_v); \
124 (_y)=MULT32(_b,_t)-MULT32(_a,_v); } 127 (_y)=MULT32(_b,_t)-MULT32(_a,_v); }
125 128*/
126 129
127#ifdef __i386__ 130#ifdef __i386__
128 131
@@ -134,7 +137,7 @@ static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) {
134 *(_y)=MULT31(_b,_t)+MULT31(_a,_v); } 137 *(_y)=MULT31(_b,_t)+MULT31(_a,_v); }
135 138
136#else 139#else
137 140/*
138static inline void XPROD31(ogg_int32_t a, ogg_int32_t b, 141static inline void XPROD31(ogg_int32_t a, ogg_int32_t b,
139 ogg_int32_t t, ogg_int32_t v, 142 ogg_int32_t t, ogg_int32_t v,
140 ogg_int32_t *x, ogg_int32_t *y) 143 ogg_int32_t *x, ogg_int32_t *y)
@@ -150,8 +153,36 @@ static inline void XNPROD31(ogg_int32_t a, ogg_int32_t b,
150 *x = MULT31(a, t) - MULT31(b, v); 153 *x = MULT31(a, t) - MULT31(b, v);
151 *y = MULT31(b, t) + MULT31(a, v); 154 *y = MULT31(b, t) + MULT31(a, v);
152} 155}
156*/
157#endif
158#ifndef _TREMOR_VECT_OPS
159#define _TREMOR_VECT_OPS
160static inline
161void vect_add_left_right(ogg_int32_t *x, const ogg_int32_t *y, int n)
162{
163 vect_add(x, y, n );
164}
165
166static inline
167void vect_add_right_left(ogg_int32_t *x, const ogg_int32_t *y, int n)
168{
169 vect_add(x, y, n );
170}
171
172static inline
173void ogg_vect_mult_fw(int32_t *data, const int32_t *window, int n)
174{
175 vect_mult_fw(data, window, n);
176}
177
178static inline
179void ogg_vect_mult_bw(int32_t *data, const int32_t *window, int n)
180{
181 vect_mult_bw(data, window, n);
182}
153#endif 183#endif
154 184
185#if 0
155#ifndef _V_VECT_OPS 186#ifndef _V_VECT_OPS
156#define _V_VECT_OPS 187#define _V_VECT_OPS
157 188
@@ -174,7 +205,7 @@ void vect_add_left_right(ogg_int32_t *x, const ogg_int32_t *y, int n)
174} 205}
175 206
176static inline 207static inline
177void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n) 208void ogg_vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
178{ 209{
179 while(n>0) { 210 while(n>0) {
180 *data = MULT31(*data, *window); 211 *data = MULT31(*data, *window);
@@ -185,7 +216,7 @@ void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
185} 216}
186 217
187static inline 218static inline
188void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n) 219void ogg_vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
189{ 220{
190 while(n>0) { 221 while(n>0) {
191 *data = MULT31(*data, *window); 222 *data = MULT31(*data, *window);
@@ -202,8 +233,6 @@ static inline void vect_copy(ogg_int32_t *x, const ogg_int32_t *y, int n)
202} 233}
203#endif 234#endif
204 235
205#endif
206
207static inline ogg_int32_t VFLOAT_MULT(ogg_int32_t a,ogg_int32_t ap, 236static inline ogg_int32_t VFLOAT_MULT(ogg_int32_t a,ogg_int32_t ap,
208 ogg_int32_t b,ogg_int32_t bp, 237 ogg_int32_t b,ogg_int32_t bp,
209 ogg_int32_t *p){ 238 ogg_int32_t *p){
@@ -218,7 +247,8 @@ static inline ogg_int32_t VFLOAT_MULT(ogg_int32_t a,ogg_int32_t ap,
218 }else 247 }else
219 return 0; 248 return 0;
220} 249}
221 250#endif
251#endif
222static inline ogg_int32_t VFLOAT_MULTI(ogg_int32_t a,ogg_int32_t ap, 252static inline ogg_int32_t VFLOAT_MULTI(ogg_int32_t a,ogg_int32_t ap,
223 ogg_int32_t i, 253 ogg_int32_t i,
224 ogg_int32_t *p){ 254 ogg_int32_t *p){
@@ -226,7 +256,7 @@ static inline ogg_int32_t VFLOAT_MULTI(ogg_int32_t a,ogg_int32_t ap,
226 int ip=_ilog(abs(i))-31; 256 int ip=_ilog(abs(i))-31;
227 return VFLOAT_MULT(a,ap,i<<-ip,ip,p); 257 return VFLOAT_MULT(a,ap,i<<-ip,ip,p);
228} 258}
229 259#if 0
230static inline ogg_int32_t VFLOAT_ADD(ogg_int32_t a,ogg_int32_t ap, 260static inline ogg_int32_t VFLOAT_ADD(ogg_int32_t a,ogg_int32_t ap,
231 ogg_int32_t b,ogg_int32_t bp, 261 ogg_int32_t b,ogg_int32_t bp,
232 ogg_int32_t *p){ 262 ogg_int32_t *p){
@@ -268,6 +298,6 @@ static inline ogg_int32_t VFLOAT_ADD(ogg_int32_t a,ogg_int32_t ap,
268 } 298 }
269 return(a); 299 return(a);
270} 300}
271 301#endif
272#endif 302#endif
273 303
diff --git a/apps/codecs/libtremor/os_types.h b/apps/codecs/libtremor/os_types.h
index 337c055d54..b5bd0b713a 100644
--- a/apps/codecs/libtremor/os_types.h
+++ b/apps/codecs/libtremor/os_types.h
@@ -19,6 +19,7 @@
19#ifndef _OS_TYPES_H 19#ifndef _OS_TYPES_H
20#define _OS_TYPES_H 20#define _OS_TYPES_H
21 21
22#include <stdint.h>
22#include <stdlib.h> 23#include <stdlib.h>
23#include <codecs.h> 24#include <codecs.h>
24 25
@@ -49,9 +50,9 @@ void ogg_free(void *ptr);
49void iram_malloc_init(void); 50void iram_malloc_init(void);
50void *iram_malloc(size_t size); 51void *iram_malloc(size_t size);
51 52
52 typedef short ogg_int16_t; 53 typedef int16_t ogg_int16_t;
53 typedef int ogg_int32_t; 54 typedef int32_t ogg_int32_t;
54 typedef unsigned int ogg_uint32_t; 55 typedef uint32_t ogg_uint32_t;
55 typedef long long ogg_int64_t; 56 typedef int64_t ogg_int64_t;
56 57
57#endif /* _OS_TYPES_H */ 58#endif /* _OS_TYPES_H */
diff --git a/apps/codecs/libtremor/sharedbook.c b/apps/codecs/libtremor/sharedbook.c
index e9cdd13329..8b046217c7 100644
--- a/apps/codecs/libtremor/sharedbook.c
+++ b/apps/codecs/libtremor/sharedbook.c
@@ -224,7 +224,7 @@ static ogg_int32_t *_book_unquantize(const static_codebook *b,int n,
224 int indexdiv=1; 224 int indexdiv=1;
225 for(k=0;k<b->dim;k++){ 225 for(k=0;k<b->dim;k++){
226 int index= (j/indexdiv)%quantvals; 226 int index= (j/indexdiv)%quantvals;
227 int point=0; 227 ogg_int32_t point=0;
228 int val=VFLOAT_MULTI(delta,delpoint, 228 int val=VFLOAT_MULTI(delta,delpoint,
229 abs(b->quantlist[index]),&point); 229 abs(b->quantlist[index]),&point);
230 230
@@ -258,7 +258,7 @@ static ogg_int32_t *_book_unquantize(const static_codebook *b,int n,
258 int lastpoint=0; 258 int lastpoint=0;
259 259
260 for(k=0;k<b->dim;k++){ 260 for(k=0;k<b->dim;k++){
261 int point=0; 261 ogg_int32_t point=0;
262 int val=VFLOAT_MULTI(delta,delpoint, 262 int val=VFLOAT_MULTI(delta,delpoint,
263 abs(b->quantlist[j*b->dim+k]),&point); 263 abs(b->quantlist[j*b->dim+k]),&point);
264 264
diff --git a/apps/codecs/libtremor/window.c b/apps/codecs/libtremor/window.c
index 7b48886939..e46008aef0 100644
--- a/apps/codecs/libtremor/window.c
+++ b/apps/codecs/libtremor/window.c
@@ -73,8 +73,8 @@ void _vorbis_apply_window(ogg_int32_t *d,const void *window_p[2],
73 outside that region are not added (so don't need to be zerod). see block.c 73 outside that region are not added (so don't need to be zerod). see block.c
74 memset((void *)&d[0], 0, sizeof(ogg_int32_t)*leftbegin); */ 74 memset((void *)&d[0], 0, sizeof(ogg_int32_t)*leftbegin); */
75 75
76 vect_mult_fw(&d[leftbegin], &window[lW][0], leftend-leftbegin); 76 ogg_vect_mult_fw(&d[leftbegin], &window[lW][0], leftend-leftbegin);
77 vect_mult_bw(&d[rightbegin], &window[nW][rn/2-1], rightend-rightbegin); 77 ogg_vect_mult_bw(&d[rightbegin], &window[nW][rn/2-1], rightend-rightbegin);
78 78
79 /* Again - memset not needed 79 /* Again - memset not needed
80 memset((void *)&d[rightend], 0, sizeof(ogg_int32_t)*(n-rightend)); */ 80 memset((void *)&d[rightend], 0, sizeof(ogg_int32_t)*(n-rightend)); */