diff options
-rw-r--r-- | apps/codecs/lib/asm_arm.h | 8 | ||||
-rw-r--r-- | apps/codecs/lib/asm_mcf5249.h | 12 | ||||
-rw-r--r-- | apps/codecs/lib/codeclib_misc.h | 8 | ||||
-rw-r--r-- | apps/codecs/libtremor/asm_arm.h | 44 | ||||
-rw-r--r-- | apps/codecs/libtremor/asm_mcf5249.h | 217 | ||||
-rw-r--r-- | apps/codecs/libtremor/misc.h | 52 | ||||
-rw-r--r-- | apps/codecs/libtremor/os_types.h | 9 | ||||
-rw-r--r-- | apps/codecs/libtremor/sharedbook.c | 4 | ||||
-rw-r--r-- | apps/codecs/libtremor/window.c | 4 |
9 files changed, 84 insertions, 274 deletions
diff --git a/apps/codecs/lib/asm_arm.h b/apps/codecs/lib/asm_arm.h index 54ce4b0d98..8e5d0e68df 100644 --- a/apps/codecs/lib/asm_arm.h +++ b/apps/codecs/lib/asm_arm.h | |||
@@ -166,7 +166,7 @@ static inline void XNPROD31(int32_t a, int32_t b, | |||
166 | 166 | ||
167 | /* asm versions of vector operations for block.c, window.c */ | 167 | /* asm versions of vector operations for block.c, window.c */ |
168 | static inline | 168 | static inline |
169 | void vect_add(int32_t *x, int32_t *y, int n) | 169 | void vect_add(int32_t *x, const int32_t *y, int n) |
170 | { | 170 | { |
171 | while (n>=4) { | 171 | while (n>=4) { |
172 | asm volatile ("ldmia %[x], {r0, r1, r2, r3};" | 172 | asm volatile ("ldmia %[x], {r0, r1, r2, r3};" |
@@ -190,7 +190,7 @@ void vect_add(int32_t *x, int32_t *y, int n) | |||
190 | } | 190 | } |
191 | 191 | ||
192 | static inline | 192 | static inline |
193 | void vect_copy(int32_t *x, int32_t *y, int n) | 193 | void vect_copy(int32_t *x, const int32_t *y, int n) |
194 | { | 194 | { |
195 | while (n>=4) { | 195 | while (n>=4) { |
196 | asm volatile ("ldmia %[y]!, {r0, r1, r2, r3};" | 196 | asm volatile ("ldmia %[y]!, {r0, r1, r2, r3};" |
@@ -208,7 +208,7 @@ void vect_copy(int32_t *x, int32_t *y, int n) | |||
208 | } | 208 | } |
209 | 209 | ||
210 | static inline | 210 | static inline |
211 | void vect_mult_fw(int32_t *data, int32_t *window, int n) | 211 | void vect_mult_fw(int32_t *data, const int32_t *window, int n) |
212 | { | 212 | { |
213 | while (n>=4) { | 213 | while (n>=4) { |
214 | asm volatile ("ldmia %[d], {r0, r1, r2, r3};" | 214 | asm volatile ("ldmia %[d], {r0, r1, r2, r3};" |
@@ -237,7 +237,7 @@ void vect_mult_fw(int32_t *data, int32_t *window, int n) | |||
237 | } | 237 | } |
238 | 238 | ||
239 | static inline | 239 | static inline |
240 | void vect_mult_bw(int32_t *data, int32_t *window, int n) | 240 | void vect_mult_bw(int32_t *data, const int32_t *window, int n) |
241 | { | 241 | { |
242 | while (n>=4) { | 242 | while (n>=4) { |
243 | asm volatile ("ldmia %[d], {r0, r1, r2, r3};" | 243 | asm volatile ("ldmia %[d], {r0, r1, r2, r3};" |
diff --git a/apps/codecs/lib/asm_mcf5249.h b/apps/codecs/lib/asm_mcf5249.h index 88d439631d..841c413a94 100644 --- a/apps/codecs/lib/asm_mcf5249.h +++ b/apps/codecs/lib/asm_mcf5249.h | |||
@@ -44,17 +44,17 @@ static inline int32_t MULT31(int32_t x, int32_t y) { | |||
44 | } | 44 | } |
45 | 45 | ||
46 | #define INCL_OPTIMIZED_MULT31_SHIFT15 | 46 | #define INCL_OPTIMIZED_MULT31_SHIFT15 |
47 | /* NOTE: this requires that the emac is *NOT* rounding */ | ||
47 | static inline int32_t MULT31_SHIFT15(int32_t x, int32_t y) { | 48 | static inline int32_t MULT31_SHIFT15(int32_t x, int32_t y) { |
48 | int32_t r; | 49 | int32_t r; |
49 | 50 | ||
50 | asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply */ | 51 | asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply */ |
51 | "mulu.l %[y], %[x];" /* get lower half, avoid emac stall */ | 52 | "mulu.l %[y], %[x];" /* get lower half, avoid emac stall */ |
52 | "movclr.l %%acc0, %[r];" /* get higher half */ | 53 | "movclr.l %%acc0, %[r];" /* get higher half */ |
53 | "asl.l #8, %[r];" /* hi<<16, plus one free */ | 54 | "swap %[r];" /* hi<<16, plus one free */ |
54 | "asl.l #8, %[r];" | ||
55 | "lsr.l #8, %[x];" /* (unsigned)lo >> 15 */ | 55 | "lsr.l #8, %[x];" /* (unsigned)lo >> 15 */ |
56 | "lsr.l #7, %[x];" | 56 | "lsr.l #7, %[x];" |
57 | "or.l %[x], %[r];" /* logical-or results */ | 57 | "move.w %[x], %[r];" /* logical-or results */ |
58 | : [r] "=&d" (r), [x] "+d" (x) | 58 | : [r] "=&d" (r), [x] "+d" (x) |
59 | : [y] "d" (y) | 59 | : [y] "d" (y) |
60 | : "cc"); | 60 | : "cc"); |
@@ -202,7 +202,7 @@ void vect_add(int32_t *x, const int32_t *y, int n) | |||
202 | } | 202 | } |
203 | 203 | ||
204 | static inline | 204 | static inline |
205 | void vect_copy(int32_t *x, int32_t *y, int n) | 205 | void vect_copy(int32_t *x, const int32_t *y, int n) |
206 | { | 206 | { |
207 | /* align to 16 bytes */ | 207 | /* align to 16 bytes */ |
208 | while(n>0 && (int)x&15) { | 208 | while(n>0 && (int)x&15) { |
@@ -228,7 +228,7 @@ void vect_copy(int32_t *x, int32_t *y, int n) | |||
228 | } | 228 | } |
229 | 229 | ||
230 | static inline | 230 | static inline |
231 | void vect_mult_fw(int32_t *data, int32_t *window, int n) | 231 | void vect_mult_fw(int32_t *data, const int32_t *window, int n) |
232 | { | 232 | { |
233 | /* ensure data is aligned to 16-bytes */ | 233 | /* ensure data is aligned to 16-bytes */ |
234 | while(n>0 && (int)data&15) { | 234 | while(n>0 && (int)data&15) { |
@@ -282,7 +282,7 @@ void vect_mult_fw(int32_t *data, int32_t *window, int n) | |||
282 | } | 282 | } |
283 | 283 | ||
284 | static inline | 284 | static inline |
285 | void vect_mult_bw(int32_t *data, int32_t *window, int n) | 285 | void vect_mult_bw(int32_t *data, const int32_t *window, int n) |
286 | { | 286 | { |
287 | /* ensure at least data is aligned to 16-bytes */ | 287 | /* ensure at least data is aligned to 16-bytes */ |
288 | while(n>0 && (int)data&15) { | 288 | while(n>0 && (int)data&15) { |
diff --git a/apps/codecs/lib/codeclib_misc.h b/apps/codecs/lib/codeclib_misc.h index 08be93716f..8ebe22e37b 100644 --- a/apps/codecs/lib/codeclib_misc.h +++ b/apps/codecs/lib/codeclib_misc.h | |||
@@ -187,7 +187,7 @@ static inline void XNPROD31(int32_t a, int32_t b, | |||
187 | #define _V_VECT_OPS | 187 | #define _V_VECT_OPS |
188 | 188 | ||
189 | static inline | 189 | static inline |
190 | void vect_add(int32_t *x, int32_t *y, int n) | 190 | void vect_add(int32_t *x, const int32_t *y, int n) |
191 | { | 191 | { |
192 | while (n>0) { | 192 | while (n>0) { |
193 | *x++ += *y++; | 193 | *x++ += *y++; |
@@ -196,7 +196,7 @@ void vect_add(int32_t *x, int32_t *y, int n) | |||
196 | } | 196 | } |
197 | 197 | ||
198 | static inline | 198 | static inline |
199 | void vect_copy(int32_t *x, int32_t *y, int n) | 199 | void vect_copy(int32_t *x, const int32_t *y, int n) |
200 | { | 200 | { |
201 | while (n>0) { | 201 | while (n>0) { |
202 | *x++ = *y++; | 202 | *x++ = *y++; |
@@ -205,7 +205,7 @@ void vect_copy(int32_t *x, int32_t *y, int n) | |||
205 | } | 205 | } |
206 | 206 | ||
207 | static inline | 207 | static inline |
208 | void vect_mult_fw(int32_t *data, int32_t *window, int n) | 208 | void vect_mult_fw(int32_t *data, const int32_t *window, int n) |
209 | { | 209 | { |
210 | while(n>0) { | 210 | while(n>0) { |
211 | *data = MULT31(*data, *window); | 211 | *data = MULT31(*data, *window); |
@@ -216,7 +216,7 @@ void vect_mult_fw(int32_t *data, int32_t *window, int n) | |||
216 | } | 216 | } |
217 | 217 | ||
218 | static inline | 218 | static inline |
219 | void vect_mult_bw(int32_t *data, int32_t *window, int n) | 219 | void vect_mult_bw(int32_t *data, const int32_t *window, int n) |
220 | { | 220 | { |
221 | while(n>0) { | 221 | while(n>0) { |
222 | *data = MULT31(*data, *window); | 222 | *data = MULT31(*data, *window); |
diff --git a/apps/codecs/libtremor/asm_arm.h b/apps/codecs/libtremor/asm_arm.h index 99790ea4b3..eb0d3ca789 100644 --- a/apps/codecs/libtremor/asm_arm.h +++ b/apps/codecs/libtremor/asm_arm.h | |||
@@ -19,33 +19,8 @@ | |||
19 | 19 | ||
20 | #if !defined(_V_WIDE_MATH) && !defined(_LOW_ACCURACY_) | 20 | #if !defined(_V_WIDE_MATH) && !defined(_LOW_ACCURACY_) |
21 | #define _V_WIDE_MATH | 21 | #define _V_WIDE_MATH |
22 | 22 | #ifndef _TREMOR_VECT_OPS | |
23 | static inline ogg_int32_t MULT32(ogg_int32_t x, ogg_int32_t y) { | 23 | #define _TREMOR_VECT_OPS |
24 | int lo,hi; | ||
25 | asm volatile("smull\t%0, %1, %2, %3" | ||
26 | : "=&r"(lo),"=&r"(hi) | ||
27 | : "%r"(x),"r"(y) ); | ||
28 | return(hi); | ||
29 | } | ||
30 | |||
31 | static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) { | ||
32 | return MULT32(x,y)<<1; | ||
33 | } | ||
34 | |||
35 | static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) { | ||
36 | int lo,hi; | ||
37 | asm volatile("smull %0, %1, %2, %3\n\t" | ||
38 | "movs %0, %0, lsr #15\n\t" | ||
39 | "adc %1, %0, %1, lsl #17\n\t" | ||
40 | : "=&r"(lo),"=&r"(hi) | ||
41 | : "%r"(x),"r"(y) | ||
42 | : "cc"); | ||
43 | return(hi); | ||
44 | } | ||
45 | |||
46 | #ifndef _V_VECT_OPS | ||
47 | #define _V_VECT_OPS | ||
48 | |||
49 | /* asm versions of vector operations for block.c, window.c */ | 24 | /* asm versions of vector operations for block.c, window.c */ |
50 | /* SOME IMPORTANT NOTES: this implementation of vect_mult_bw does | 25 | /* SOME IMPORTANT NOTES: this implementation of vect_mult_bw does |
51 | NOT do a final shift, meaning that the result of vect_mult_bw is | 26 | NOT do a final shift, meaning that the result of vect_mult_bw is |
@@ -114,7 +89,7 @@ void vect_add_left_right(ogg_int32_t *x, const ogg_int32_t *y, int n) | |||
114 | 89 | ||
115 | #if ARM_ARCH >= 6 | 90 | #if ARM_ARCH >= 6 |
116 | static inline | 91 | static inline |
117 | void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n) | 92 | void ogg_vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n) |
118 | { | 93 | { |
119 | /* Note, mult_fw uses MULT31 */ | 94 | /* Note, mult_fw uses MULT31 */ |
120 | do{ | 95 | do{ |
@@ -139,7 +114,7 @@ void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n) | |||
139 | } | 114 | } |
140 | #else | 115 | #else |
141 | static inline | 116 | static inline |
142 | void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n) | 117 | void ogg_vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n) |
143 | { | 118 | { |
144 | /* Note, mult_fw uses MULT31 */ | 119 | /* Note, mult_fw uses MULT31 */ |
145 | do{ | 120 | do{ |
@@ -166,7 +141,7 @@ void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n) | |||
166 | 141 | ||
167 | #if ARM_ARCH >= 6 | 142 | #if ARM_ARCH >= 6 |
168 | static inline | 143 | static inline |
169 | void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n) | 144 | void ogg_vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n) |
170 | { | 145 | { |
171 | /* NOTE mult_bw uses MULT_32 i.e. doesn't shift result left at end */ | 146 | /* NOTE mult_bw uses MULT_32 i.e. doesn't shift result left at end */ |
172 | /* On ARM, we can do the shift at the same time as the overlap-add */ | 147 | /* On ARM, we can do the shift at the same time as the overlap-add */ |
@@ -187,7 +162,7 @@ void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n) | |||
187 | } | 162 | } |
188 | #else | 163 | #else |
189 | static inline | 164 | static inline |
190 | void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n) | 165 | void ogg_vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n) |
191 | { | 166 | { |
192 | /* NOTE mult_bw uses MULT_32 i.e. doesn't shift result left at end */ | 167 | /* NOTE mult_bw uses MULT_32 i.e. doesn't shift result left at end */ |
193 | /* On ARM, we can do the shift at the same time as the overlap-add */ | 168 | /* On ARM, we can do the shift at the same time as the overlap-add */ |
@@ -207,14 +182,7 @@ void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n) | |||
207 | } while (n); | 182 | } while (n); |
208 | } | 183 | } |
209 | #endif | 184 | #endif |
210 | |||
211 | static inline void vect_copy(ogg_int32_t *x, const ogg_int32_t *y, int n) | ||
212 | { | ||
213 | memcpy(x,y,n*sizeof(ogg_int32_t)); | ||
214 | } | ||
215 | |||
216 | #endif | 185 | #endif |
217 | |||
218 | #endif | 186 | #endif |
219 | 187 | ||
220 | #ifndef _V_LSP_MATH_ASM | 188 | #ifndef _V_LSP_MATH_ASM |
diff --git a/apps/codecs/libtremor/asm_mcf5249.h b/apps/codecs/libtremor/asm_mcf5249.h index 3266113771..3e7d46983e 100644 --- a/apps/codecs/libtremor/asm_mcf5249.h +++ b/apps/codecs/libtremor/asm_mcf5249.h | |||
@@ -28,226 +28,37 @@ | |||
28 | 28 | ||
29 | #define MB() | 29 | #define MB() |
30 | 30 | ||
31 | static inline ogg_int32_t MULT32(ogg_int32_t x, ogg_int32_t y) { | 31 | #ifndef _TREMOR_VECT_OPS |
32 | 32 | #define _TREMOR_VECT_OPS | |
33 | asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply & shift */ | ||
34 | "movclr.l %%acc0, %[x];" /* move & clear acc */ | ||
35 | "asr.l #1, %[x];" /* no overflow test */ | ||
36 | : [x] "+&d" (x) | ||
37 | : [y] "r" (y) | ||
38 | : "cc"); | ||
39 | return x; | ||
40 | } | ||
41 | |||
42 | static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) { | ||
43 | |||
44 | asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply */ | ||
45 | "movclr.l %%acc0, %[x];" /* move and clear */ | ||
46 | : [x] "+&r" (x) | ||
47 | : [y] "r" (y) | ||
48 | : "cc"); | ||
49 | return x; | ||
50 | } | ||
51 | |||
52 | |||
53 | static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) { | ||
54 | ogg_int32_t r; | ||
55 | |||
56 | asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply */ | ||
57 | "mulu.l %[y], %[x];" /* get lower half, avoid emac stall */ | ||
58 | "movclr.l %%acc0, %[r];" /* get higher half */ | ||
59 | "swap %[r];" /* hi<<16, plus one free */ | ||
60 | "lsr.l #8, %[x];" /* (unsigned)lo >> 15 */ | ||
61 | "lsr.l #7, %[x];" | ||
62 | "move.w %[x], %[r];" /* logical-or results */ | ||
63 | : [r] "=&d" (r), [x] "+d" (x) | ||
64 | : [y] "d" (y) | ||
65 | : "cc"); | ||
66 | return r; | ||
67 | } | ||
68 | |||
69 | #ifndef _V_VECT_OPS | ||
70 | #define _V_VECT_OPS | ||
71 | |||
72 | /* asm versions of vector operations for block.c, window.c */ | ||
73 | /* assumes MAC is initialized & accumulators cleared */ | ||
74 | static inline | ||
75 | void vect_add_right_left(ogg_int32_t *x, const ogg_int32_t *y, int n) | ||
76 | { | ||
77 | /* align to 16 bytes */ | ||
78 | while(n>0 && (int)x&15) { | ||
79 | *x++ += *y++; | ||
80 | n--; | ||
81 | } | ||
82 | asm volatile ("bra 1f;" | ||
83 | "0:" /* loop start */ | ||
84 | "movem.l (%[x]), %%d0-%%d3;" /* fetch values */ | ||
85 | "movem.l (%[y]), %%a0-%%a3;" | ||
86 | /* add */ | ||
87 | "add.l %%a0, %%d0;" | ||
88 | "add.l %%a1, %%d1;" | ||
89 | "add.l %%a2, %%d2;" | ||
90 | "add.l %%a3, %%d3;" | ||
91 | /* store and advance */ | ||
92 | "movem.l %%d0-%%d3, (%[x]);" | ||
93 | "lea.l (4*4, %[x]), %[x];" | ||
94 | "lea.l (4*4, %[y]), %[y];" | ||
95 | "subq.l #4, %[n];" /* done 4 elements */ | ||
96 | "1: cmpi.l #4, %[n];" | ||
97 | "bge 0b;" | ||
98 | : [n] "+d" (n), [x] "+a" (x), [y] "+a" (y) | ||
99 | : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3", | ||
100 | "cc", "memory"); | ||
101 | /* add final elements */ | ||
102 | while (n>0) { | ||
103 | *x++ += *y++; | ||
104 | n--; | ||
105 | } | ||
106 | } | ||
107 | static inline | 33 | static inline |
108 | void vect_add_left_right(ogg_int32_t *x, const ogg_int32_t *y, int n) | 34 | void vect_add_left_right(ogg_int32_t *x, const ogg_int32_t *y, int n) |
109 | { | 35 | { |
110 | /* coldfire asm has symmetrical versions of vect_add_right_left | 36 | /* coldfire asm has symmetrical versions of vect_add_right_left |
111 | and vect_add_left_right (since symmetrical versions of | 37 | and vect_add_left_right (since symmetrical versions of |
112 | vect_mult_fw and vect_mult_bw i.e. both use MULT31) */ | 38 | vect_mult_fw and vect_mult_bw i.e. both use MULT31) */ |
113 | vect_add_right_left(x, y, n ); | 39 | vect_add(x, y, n ); |
114 | } | 40 | } |
115 | 41 | ||
116 | static inline | 42 | static inline |
117 | void vect_copy(ogg_int32_t *x, const ogg_int32_t *y, int n) | 43 | void vect_add_right_left(ogg_int32_t *x, const ogg_int32_t *y, int n) |
118 | { | 44 | { |
119 | /* align to 16 bytes */ | 45 | /* coldfire asm has symmetrical versions of vect_add_right_left |
120 | while(n>0 && (int)x&15) { | 46 | and vect_add_left_right (since symmetrical versions of |
121 | *x++ = *y++; | 47 | vect_mult_fw and vect_mult_bw i.e. both use MULT31) */ |
122 | n--; | 48 | vect_add(x, y, n ); |
123 | } | ||
124 | asm volatile ("bra 1f;" | ||
125 | "0:" /* loop start */ | ||
126 | "movem.l (%[y]), %%d0-%%d3;" /* fetch values */ | ||
127 | "movem.l %%d0-%%d3, (%[x]);" /* store */ | ||
128 | "lea.l (4*4, %[x]), %[x];" /* advance */ | ||
129 | "lea.l (4*4, %[y]), %[y];" | ||
130 | "subq.l #4, %[n];" /* done 4 elements */ | ||
131 | "1: cmpi.l #4, %[n];" | ||
132 | "bge 0b;" | ||
133 | : [n] "+d" (n), [x] "+a" (x), [y] "+a" (y) | ||
134 | : : "%d0", "%d1", "%d2", "%d3", "cc", "memory"); | ||
135 | /* copy final elements */ | ||
136 | while (n>0) { | ||
137 | *x++ = *y++; | ||
138 | n--; | ||
139 | } | ||
140 | } | 49 | } |
141 | 50 | ||
142 | static inline | 51 | static inline |
143 | void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n) | 52 | void ogg_vect_mult_fw(int32_t *data, const int32_t *window, int n) |
144 | { | 53 | { |
145 | /* ensure data is aligned to 16-bytes */ | 54 | vect_mult_fw(data, window, n); |
146 | while(n>0 && (int)data&15) { | ||
147 | *data = MULT31(*data, *window); | ||
148 | data++; | ||
149 | window++; | ||
150 | n--; | ||
151 | } | ||
152 | asm volatile ("movem.l (%[d]), %%d0-%%d3;" /* loop start */ | ||
153 | "movem.l (%[w]), %%a0-%%a3;" /* pre-fetch registers */ | ||
154 | "lea.l (4*4, %[w]), %[w];" | ||
155 | "bra 1f;" /* jump to loop condition */ | ||
156 | "0:" /* loop body */ | ||
157 | /* multiply and load next window values */ | ||
158 | "mac.l %%d0, %%a0, (%[w])+, %%a0, %%acc0;" | ||
159 | "mac.l %%d1, %%a1, (%[w])+, %%a1, %%acc1;" | ||
160 | "mac.l %%d2, %%a2, (%[w])+, %%a2, %%acc2;" | ||
161 | "mac.l %%d3, %%a3, (%[w])+, %%a3, %%acc3;" | ||
162 | "movclr.l %%acc0, %%d0;" /* get the products */ | ||
163 | "movclr.l %%acc1, %%d1;" | ||
164 | "movclr.l %%acc2, %%d2;" | ||
165 | "movclr.l %%acc3, %%d3;" | ||
166 | /* store and advance */ | ||
167 | "movem.l %%d0-%%d3, (%[d]);" | ||
168 | "lea.l (4*4, %[d]), %[d];" | ||
169 | "movem.l (%[d]), %%d0-%%d3;" | ||
170 | "subq.l #4, %[n];" /* done 4 elements */ | ||
171 | "1: cmpi.l #4, %[n];" | ||
172 | "bge 0b;" | ||
173 | /* multiply final elements */ | ||
174 | "tst.l %[n];" | ||
175 | "beq 1f;" /* n=0 */ | ||
176 | "mac.l %%d0, %%a0, %%acc0;" | ||
177 | "movclr.l %%acc0, %%d0;" | ||
178 | "move.l %%d0, (%[d])+;" | ||
179 | "subq.l #1, %[n];" | ||
180 | "beq 1f;" /* n=1 */ | ||
181 | "mac.l %%d1, %%a1, %%acc0;" | ||
182 | "movclr.l %%acc0, %%d1;" | ||
183 | "move.l %%d1, (%[d])+;" | ||
184 | "subq.l #1, %[n];" | ||
185 | "beq 1f;" /* n=2 */ | ||
186 | /* otherwise n = 3 */ | ||
187 | "mac.l %%d2, %%a2, %%acc0;" | ||
188 | "movclr.l %%acc0, %%d2;" | ||
189 | "move.l %%d2, (%[d])+;" | ||
190 | "1:" | ||
191 | : [n] "+d" (n), [d] "+a" (data), [w] "+a" (window) | ||
192 | : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3", | ||
193 | "cc", "memory"); | ||
194 | } | 55 | } |
195 | 56 | ||
196 | static inline | 57 | static inline |
197 | void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n) | 58 | void ogg_vect_mult_bw(int32_t *data, const int32_t *window, int n) |
198 | { | 59 | { |
199 | /* ensure at least data is aligned to 16-bytes */ | 60 | vect_mult_bw(data, window, n); |
200 | while(n>0 && (int)data&15) { | ||
201 | *data = MULT31(*data, *window); | ||
202 | data++; | ||
203 | window--; | ||
204 | n--; | ||
205 | } | ||
206 | asm volatile ("lea.l (-3*4, %[w]), %[w];" /* loop start */ | ||
207 | "movem.l (%[d]), %%d0-%%d3;" /* pre-fetch registers */ | ||
208 | "movem.l (%[w]), %%a0-%%a3;" | ||
209 | "bra 1f;" /* jump to loop condition */ | ||
210 | "0:" /* loop body */ | ||
211 | /* multiply and load next window value */ | ||
212 | "mac.l %%d0, %%a3, -(%[w]), %%a3, %%acc0;" | ||
213 | "mac.l %%d1, %%a2, -(%[w]), %%a2, %%acc1;" | ||
214 | "mac.l %%d2, %%a1, -(%[w]), %%a1, %%acc2;" | ||
215 | "mac.l %%d3, %%a0, -(%[w]), %%a0, %%acc3;" | ||
216 | "movclr.l %%acc0, %%d0;" /* get the products */ | ||
217 | "movclr.l %%acc1, %%d1;" | ||
218 | "movclr.l %%acc2, %%d2;" | ||
219 | "movclr.l %%acc3, %%d3;" | ||
220 | /* store and advance */ | ||
221 | "movem.l %%d0-%%d3, (%[d]);" | ||
222 | "lea.l (4*4, %[d]), %[d];" | ||
223 | "movem.l (%[d]), %%d0-%%d3;" | ||
224 | "subq.l #4, %[n];" /* done 4 elements */ | ||
225 | "1: cmpi.l #4, %[n];" | ||
226 | "bge 0b;" | ||
227 | /* multiply final elements */ | ||
228 | "tst.l %[n];" | ||
229 | "beq 1f;" /* n=0 */ | ||
230 | "mac.l %%d0, %%a3, %%acc0;" | ||
231 | "movclr.l %%acc0, %%d0;" | ||
232 | "move.l %%d0, (%[d])+;" | ||
233 | "subq.l #1, %[n];" | ||
234 | "beq 1f;" /* n=1 */ | ||
235 | "mac.l %%d1, %%a2, %%acc0;" | ||
236 | "movclr.l %%acc0, %%d1;" | ||
237 | "move.l %%d1, (%[d])+;" | ||
238 | "subq.l #1, %[n];" | ||
239 | "beq 1f;" /* n=2 */ | ||
240 | /* otherwise n = 3 */ | ||
241 | "mac.l %%d2, %%a1, %%acc0;" | ||
242 | "movclr.l %%acc0, %%d2;" | ||
243 | "move.l %%d2, (%[d])+;" | ||
244 | "1:" | ||
245 | : [n] "+d" (n), [d] "+a" (data), [w] "+a" (window) | ||
246 | : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3", | ||
247 | "cc", "memory"); | ||
248 | } | 61 | } |
249 | |||
250 | #endif | 62 | #endif |
251 | |||
252 | #endif | 63 | #endif |
253 | #endif | 64 | #endif |
diff --git a/apps/codecs/libtremor/misc.h b/apps/codecs/libtremor/misc.h index 39f67057ab..0b0ff4d3a7 100644 --- a/apps/codecs/libtremor/misc.h +++ b/apps/codecs/libtremor/misc.h | |||
@@ -22,6 +22,8 @@ | |||
22 | #include "ivorbiscodec.h" | 22 | #include "ivorbiscodec.h" |
23 | #include "os_types.h" | 23 | #include "os_types.h" |
24 | 24 | ||
25 | #include "codeclib_misc.h" | ||
26 | |||
25 | #include "asm_arm.h" | 27 | #include "asm_arm.h" |
26 | #include "asm_mcf5249.h" | 28 | #include "asm_mcf5249.h" |
27 | 29 | ||
@@ -37,7 +39,7 @@ extern int _ilog(unsigned int v); | |||
37 | #ifndef _LOW_ACCURACY_ | 39 | #ifndef _LOW_ACCURACY_ |
38 | /* 64 bit multiply */ | 40 | /* 64 bit multiply */ |
39 | /* #include <sys/types.h> */ | 41 | /* #include <sys/types.h> */ |
40 | 42 | #if 0 | |
41 | #if BYTE_ORDER==LITTLE_ENDIAN | 43 | #if BYTE_ORDER==LITTLE_ENDIAN |
42 | union magic { | 44 | union magic { |
43 | struct { | 45 | struct { |
@@ -70,7 +72,7 @@ static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) { | |||
70 | magic.whole = (ogg_int64_t)x * y; | 72 | magic.whole = (ogg_int64_t)x * y; |
71 | return ((ogg_uint32_t)(magic.halves.lo)>>15) | ((magic.halves.hi)<<17); | 73 | return ((ogg_uint32_t)(magic.halves.lo)>>15) | ((magic.halves.hi)<<17); |
72 | } | 74 | } |
73 | 75 | #endif | |
74 | #else | 76 | #else |
75 | /* 32 bit multiply, more portable but less accurate */ | 77 | /* 32 bit multiply, more portable but less accurate */ |
76 | 78 | ||
@@ -119,10 +121,11 @@ static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) { | |||
119 | 121 | ||
120 | /* replaced XPROD32 with a macro to avoid memory reference | 122 | /* replaced XPROD32 with a macro to avoid memory reference |
121 | _x, _y are the results (must be l-values) */ | 123 | _x, _y are the results (must be l-values) */ |
124 | /* | ||
122 | #define XPROD32(_a, _b, _t, _v, _x, _y) \ | 125 | #define XPROD32(_a, _b, _t, _v, _x, _y) \ |
123 | { (_x)=MULT32(_a,_t)+MULT32(_b,_v); \ | 126 | { (_x)=MULT32(_a,_t)+MULT32(_b,_v); \ |
124 | (_y)=MULT32(_b,_t)-MULT32(_a,_v); } | 127 | (_y)=MULT32(_b,_t)-MULT32(_a,_v); } |
125 | 128 | */ | |
126 | 129 | ||
127 | #ifdef __i386__ | 130 | #ifdef __i386__ |
128 | 131 | ||
@@ -134,7 +137,7 @@ static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) { | |||
134 | *(_y)=MULT31(_b,_t)+MULT31(_a,_v); } | 137 | *(_y)=MULT31(_b,_t)+MULT31(_a,_v); } |
135 | 138 | ||
136 | #else | 139 | #else |
137 | 140 | /* | |
138 | static inline void XPROD31(ogg_int32_t a, ogg_int32_t b, | 141 | static inline void XPROD31(ogg_int32_t a, ogg_int32_t b, |
139 | ogg_int32_t t, ogg_int32_t v, | 142 | ogg_int32_t t, ogg_int32_t v, |
140 | ogg_int32_t *x, ogg_int32_t *y) | 143 | ogg_int32_t *x, ogg_int32_t *y) |
@@ -150,8 +153,36 @@ static inline void XNPROD31(ogg_int32_t a, ogg_int32_t b, | |||
150 | *x = MULT31(a, t) - MULT31(b, v); | 153 | *x = MULT31(a, t) - MULT31(b, v); |
151 | *y = MULT31(b, t) + MULT31(a, v); | 154 | *y = MULT31(b, t) + MULT31(a, v); |
152 | } | 155 | } |
156 | */ | ||
157 | #endif | ||
158 | #ifndef _TREMOR_VECT_OPS | ||
159 | #define _TREMOR_VECT_OPS | ||
160 | static inline | ||
161 | void vect_add_left_right(ogg_int32_t *x, const ogg_int32_t *y, int n) | ||
162 | { | ||
163 | vect_add(x, y, n ); | ||
164 | } | ||
165 | |||
166 | static inline | ||
167 | void vect_add_right_left(ogg_int32_t *x, const ogg_int32_t *y, int n) | ||
168 | { | ||
169 | vect_add(x, y, n ); | ||
170 | } | ||
171 | |||
172 | static inline | ||
173 | void ogg_vect_mult_fw(int32_t *data, const int32_t *window, int n) | ||
174 | { | ||
175 | vect_mult_fw(data, window, n); | ||
176 | } | ||
177 | |||
178 | static inline | ||
179 | void ogg_vect_mult_bw(int32_t *data, const int32_t *window, int n) | ||
180 | { | ||
181 | vect_mult_bw(data, window, n); | ||
182 | } | ||
153 | #endif | 183 | #endif |
154 | 184 | ||
185 | #if 0 | ||
155 | #ifndef _V_VECT_OPS | 186 | #ifndef _V_VECT_OPS |
156 | #define _V_VECT_OPS | 187 | #define _V_VECT_OPS |
157 | 188 | ||
@@ -174,7 +205,7 @@ void vect_add_left_right(ogg_int32_t *x, const ogg_int32_t *y, int n) | |||
174 | } | 205 | } |
175 | 206 | ||
176 | static inline | 207 | static inline |
177 | void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n) | 208 | void ogg_vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n) |
178 | { | 209 | { |
179 | while(n>0) { | 210 | while(n>0) { |
180 | *data = MULT31(*data, *window); | 211 | *data = MULT31(*data, *window); |
@@ -185,7 +216,7 @@ void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n) | |||
185 | } | 216 | } |
186 | 217 | ||
187 | static inline | 218 | static inline |
188 | void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n) | 219 | void ogg_vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n) |
189 | { | 220 | { |
190 | while(n>0) { | 221 | while(n>0) { |
191 | *data = MULT31(*data, *window); | 222 | *data = MULT31(*data, *window); |
@@ -202,8 +233,6 @@ static inline void vect_copy(ogg_int32_t *x, const ogg_int32_t *y, int n) | |||
202 | } | 233 | } |
203 | #endif | 234 | #endif |
204 | 235 | ||
205 | #endif | ||
206 | |||
207 | static inline ogg_int32_t VFLOAT_MULT(ogg_int32_t a,ogg_int32_t ap, | 236 | static inline ogg_int32_t VFLOAT_MULT(ogg_int32_t a,ogg_int32_t ap, |
208 | ogg_int32_t b,ogg_int32_t bp, | 237 | ogg_int32_t b,ogg_int32_t bp, |
209 | ogg_int32_t *p){ | 238 | ogg_int32_t *p){ |
@@ -218,7 +247,8 @@ static inline ogg_int32_t VFLOAT_MULT(ogg_int32_t a,ogg_int32_t ap, | |||
218 | }else | 247 | }else |
219 | return 0; | 248 | return 0; |
220 | } | 249 | } |
221 | 250 | #endif | |
251 | #endif | ||
222 | static inline ogg_int32_t VFLOAT_MULTI(ogg_int32_t a,ogg_int32_t ap, | 252 | static inline ogg_int32_t VFLOAT_MULTI(ogg_int32_t a,ogg_int32_t ap, |
223 | ogg_int32_t i, | 253 | ogg_int32_t i, |
224 | ogg_int32_t *p){ | 254 | ogg_int32_t *p){ |
@@ -226,7 +256,7 @@ static inline ogg_int32_t VFLOAT_MULTI(ogg_int32_t a,ogg_int32_t ap, | |||
226 | int ip=_ilog(abs(i))-31; | 256 | int ip=_ilog(abs(i))-31; |
227 | return VFLOAT_MULT(a,ap,i<<-ip,ip,p); | 257 | return VFLOAT_MULT(a,ap,i<<-ip,ip,p); |
228 | } | 258 | } |
229 | 259 | #if 0 | |
230 | static inline ogg_int32_t VFLOAT_ADD(ogg_int32_t a,ogg_int32_t ap, | 260 | static inline ogg_int32_t VFLOAT_ADD(ogg_int32_t a,ogg_int32_t ap, |
231 | ogg_int32_t b,ogg_int32_t bp, | 261 | ogg_int32_t b,ogg_int32_t bp, |
232 | ogg_int32_t *p){ | 262 | ogg_int32_t *p){ |
@@ -268,6 +298,6 @@ static inline ogg_int32_t VFLOAT_ADD(ogg_int32_t a,ogg_int32_t ap, | |||
268 | } | 298 | } |
269 | return(a); | 299 | return(a); |
270 | } | 300 | } |
271 | 301 | #endif | |
272 | #endif | 302 | #endif |
273 | 303 | ||
diff --git a/apps/codecs/libtremor/os_types.h b/apps/codecs/libtremor/os_types.h index 337c055d54..b5bd0b713a 100644 --- a/apps/codecs/libtremor/os_types.h +++ b/apps/codecs/libtremor/os_types.h | |||
@@ -19,6 +19,7 @@ | |||
19 | #ifndef _OS_TYPES_H | 19 | #ifndef _OS_TYPES_H |
20 | #define _OS_TYPES_H | 20 | #define _OS_TYPES_H |
21 | 21 | ||
22 | #include <stdint.h> | ||
22 | #include <stdlib.h> | 23 | #include <stdlib.h> |
23 | #include <codecs.h> | 24 | #include <codecs.h> |
24 | 25 | ||
@@ -49,9 +50,9 @@ void ogg_free(void *ptr); | |||
49 | void iram_malloc_init(void); | 50 | void iram_malloc_init(void); |
50 | void *iram_malloc(size_t size); | 51 | void *iram_malloc(size_t size); |
51 | 52 | ||
52 | typedef short ogg_int16_t; | 53 | typedef int16_t ogg_int16_t; |
53 | typedef int ogg_int32_t; | 54 | typedef int32_t ogg_int32_t; |
54 | typedef unsigned int ogg_uint32_t; | 55 | typedef uint32_t ogg_uint32_t; |
55 | typedef long long ogg_int64_t; | 56 | typedef int64_t ogg_int64_t; |
56 | 57 | ||
57 | #endif /* _OS_TYPES_H */ | 58 | #endif /* _OS_TYPES_H */ |
diff --git a/apps/codecs/libtremor/sharedbook.c b/apps/codecs/libtremor/sharedbook.c index e9cdd13329..8b046217c7 100644 --- a/apps/codecs/libtremor/sharedbook.c +++ b/apps/codecs/libtremor/sharedbook.c | |||
@@ -224,7 +224,7 @@ static ogg_int32_t *_book_unquantize(const static_codebook *b,int n, | |||
224 | int indexdiv=1; | 224 | int indexdiv=1; |
225 | for(k=0;k<b->dim;k++){ | 225 | for(k=0;k<b->dim;k++){ |
226 | int index= (j/indexdiv)%quantvals; | 226 | int index= (j/indexdiv)%quantvals; |
227 | int point=0; | 227 | ogg_int32_t point=0; |
228 | int val=VFLOAT_MULTI(delta,delpoint, | 228 | int val=VFLOAT_MULTI(delta,delpoint, |
229 | abs(b->quantlist[index]),&point); | 229 | abs(b->quantlist[index]),&point); |
230 | 230 | ||
@@ -258,7 +258,7 @@ static ogg_int32_t *_book_unquantize(const static_codebook *b,int n, | |||
258 | int lastpoint=0; | 258 | int lastpoint=0; |
259 | 259 | ||
260 | for(k=0;k<b->dim;k++){ | 260 | for(k=0;k<b->dim;k++){ |
261 | int point=0; | 261 | ogg_int32_t point=0; |
262 | int val=VFLOAT_MULTI(delta,delpoint, | 262 | int val=VFLOAT_MULTI(delta,delpoint, |
263 | abs(b->quantlist[j*b->dim+k]),&point); | 263 | abs(b->quantlist[j*b->dim+k]),&point); |
264 | 264 | ||
diff --git a/apps/codecs/libtremor/window.c b/apps/codecs/libtremor/window.c index 7b48886939..e46008aef0 100644 --- a/apps/codecs/libtremor/window.c +++ b/apps/codecs/libtremor/window.c | |||
@@ -73,8 +73,8 @@ void _vorbis_apply_window(ogg_int32_t *d,const void *window_p[2], | |||
73 | outside that region are not added (so don't need to be zerod). see block.c | 73 | outside that region are not added (so don't need to be zerod). see block.c |
74 | memset((void *)&d[0], 0, sizeof(ogg_int32_t)*leftbegin); */ | 74 | memset((void *)&d[0], 0, sizeof(ogg_int32_t)*leftbegin); */ |
75 | 75 | ||
76 | vect_mult_fw(&d[leftbegin], &window[lW][0], leftend-leftbegin); | 76 | ogg_vect_mult_fw(&d[leftbegin], &window[lW][0], leftend-leftbegin); |
77 | vect_mult_bw(&d[rightbegin], &window[nW][rn/2-1], rightend-rightbegin); | 77 | ogg_vect_mult_bw(&d[rightbegin], &window[nW][rn/2-1], rightend-rightbegin); |
78 | 78 | ||
79 | /* Again - memset not needed | 79 | /* Again - memset not needed |
80 | memset((void *)&d[rightend], 0, sizeof(ogg_int32_t)*(n-rightend)); */ | 80 | memset((void *)&d[rightend], 0, sizeof(ogg_int32_t)*(n-rightend)); */ |