diff options
-rw-r--r-- | apps/codecs/Tremor/asm_arm.h | 106 | ||||
-rw-r--r-- | apps/codecs/Tremor/asm_mcf5249.h | 13 | ||||
-rw-r--r-- | apps/codecs/Tremor/block.c | 43 | ||||
-rw-r--r-- | apps/codecs/Tremor/misc.h | 45 | ||||
-rw-r--r-- | apps/codecs/Tremor/window.c | 20 |
5 files changed, 168 insertions, 59 deletions
diff --git a/apps/codecs/Tremor/asm_arm.h b/apps/codecs/Tremor/asm_arm.h index e623ce9b3d..bc09ac5170 100644 --- a/apps/codecs/Tremor/asm_arm.h +++ b/apps/codecs/Tremor/asm_arm.h | |||
@@ -95,6 +95,112 @@ static inline void XNPROD31(ogg_int32_t a, ogg_int32_t b, | |||
95 | *y = y1 << 1; | 95 | *y = y1 << 1; |
96 | } | 96 | } |
97 | 97 | ||
98 | #ifndef _V_VECT_OPS | ||
99 | #define _V_VECT_OPS | ||
100 | |||
101 | /* asm versions of vector operations for block.c, window.c */ | ||
102 | static inline | ||
103 | void vect_add(ogg_int32_t *x, ogg_int32_t *y, int n) | ||
104 | { | ||
105 | while (n>=4) { | ||
106 | asm volatile ("ldmia %[x], {r0, r1, r2, r3};" | ||
107 | "ldmia %[y]!, {r4, r5, r6, r7};" | ||
108 | "add r0, r0, r4;" | ||
109 | "add r1, r1, r5;" | ||
110 | "add r2, r2, r6;" | ||
111 | "add r3, r3, r7;" | ||
112 | "stmia %[x]!, {r0, r1, r2, r3};" | ||
113 | : [x] "+r" (x), [y] "+r" (y) | ||
114 | : : "r0", "r1", "r2", "r3", | ||
115 | "r4", "r5", "r6", "r7", | ||
116 | "memory"); | ||
117 | n -= 4; | ||
118 | } | ||
119 | /* add final elements */ | ||
120 | while (n>0) { | ||
121 | *x++ += *y++; | ||
122 | n--; | ||
123 | } | ||
124 | } | ||
125 | |||
126 | static inline | ||
127 | void vect_copy(ogg_int32_t *x, ogg_int32_t *y, int n) | ||
128 | { | ||
129 | while (n>=4) { | ||
130 | asm volatile ("ldmia %[y]!, {r0, r1, r2, r3};" | ||
131 | "stmia %[x]!, {r0, r1, r2, r3};" | ||
132 | : [x] "+r" (x), [y] "+r" (y) | ||
133 | : : "r0", "r1", "r2", "r3", | ||
134 | "memory"); | ||
135 | n -= 4; | ||
136 | } | ||
137 | /* copy final elements */ | ||
138 | while (n>0) { | ||
139 | *x++ = *y++; | ||
140 | n--; | ||
141 | } | ||
142 | } | ||
143 | |||
144 | static inline | ||
145 | void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n) | ||
146 | { | ||
147 | while (n>=4) { | ||
148 | asm volatile ("ldmia %[d], {r0, r1, r2, r3};" | ||
149 | "ldmia %[w]!, {r4, r5, r6, r7};" | ||
150 | "smull r8, r9, r0, r4;" | ||
151 | "mov r0, r9, lsl #1;" | ||
152 | "smull r8, r9, r1, r5;" | ||
153 | "mov r1, r9, lsl #1;" | ||
154 | "smull r8, r9, r2, r6;" | ||
155 | "mov r2, r9, lsl #1;" | ||
156 | "smull r8, r9, r3, r7;" | ||
157 | "mov r3, r9, lsl #1;" | ||
158 | "stmia %[d]!, {r0, r1, r2, r3};" | ||
159 | : [d] "+r" (data), [w] "+r" (window) | ||
160 | : : "r0", "r1", "r2", "r3", | ||
161 | "r4", "r5", "r6", "r7", "r8", "r9", | ||
162 | "memory", "cc"); | ||
163 | n -= 4; | ||
164 | } | ||
165 | while(n>0) { | ||
166 | *data = MULT31(*data, *window); | ||
167 | data++; | ||
168 | window++; | ||
169 | n--; | ||
170 | } | ||
171 | } | ||
172 | |||
173 | static inline | ||
174 | void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n) | ||
175 | { | ||
176 | while (n>=4) { | ||
177 | asm volatile ("ldmia %[d], {r0, r1, r2, r3};" | ||
178 | "ldmda %[w]!, {r4, r5, r6, r7};" | ||
179 | "smull r8, r9, r0, r7;" | ||
180 | "mov r0, r9, lsl #1;" | ||
181 | "smull r8, r9, r1, r6;" | ||
182 | "mov r1, r9, lsl #1;" | ||
183 | "smull r8, r9, r2, r5;" | ||
184 | "mov r2, r9, lsl #1;" | ||
185 | "smull r8, r9, r3, r4;" | ||
186 | "mov r3, r9, lsl #1;" | ||
187 | "stmia %[d]!, {r0, r1, r2, r3};" | ||
188 | : [d] "+r" (data), [w] "+r" (window) | ||
189 | : : "r0", "r1", "r2", "r3", | ||
190 | "r4", "r5", "r6", "r7", "r8", "r9", | ||
191 | "memory", "cc"); | ||
192 | n -= 4; | ||
193 | } | ||
194 | while(n>0) { | ||
195 | *data = MULT31(*data, *window); | ||
196 | data++; | ||
197 | window--; | ||
198 | n--; | ||
199 | } | ||
200 | } | ||
201 | |||
202 | #endif | ||
203 | |||
98 | #endif | 204 | #endif |
99 | 205 | ||
100 | #ifndef _V_CLIP_MATH | 206 | #ifndef _V_CLIP_MATH |
diff --git a/apps/codecs/Tremor/asm_mcf5249.h b/apps/codecs/Tremor/asm_mcf5249.h index 16878d73ee..4d7f92c089 100644 --- a/apps/codecs/Tremor/asm_mcf5249.h +++ b/apps/codecs/Tremor/asm_mcf5249.h | |||
@@ -132,10 +132,13 @@ void XNPROD31(ogg_int32_t a, ogg_int32_t b, | |||
132 | [t] "r" (_t), [v] "r" (_v) \ | 132 | [t] "r" (_t), [v] "r" (_v) \ |
133 | : "cc"); | 133 | : "cc"); |
134 | 134 | ||
135 | #ifndef _V_VECT_OPS | ||
136 | #define _V_VECT_OPS | ||
137 | |||
135 | /* asm versions of vector operations for block.c, window.c */ | 138 | /* asm versions of vector operations for block.c, window.c */ |
136 | /* assumes MAC is initialized & accumulators cleared */ | 139 | /* assumes MAC is initialized & accumulators cleared */ |
137 | static inline | 140 | static inline |
138 | void mcf5249_vect_add(ogg_int32_t *x, ogg_int32_t *y, int n) | 141 | void vect_add(ogg_int32_t *x, ogg_int32_t *y, int n) |
139 | { | 142 | { |
140 | /* align to 16 bytes */ | 143 | /* align to 16 bytes */ |
141 | while(n>0 && (int)x&16) { | 144 | while(n>0 && (int)x&16) { |
@@ -169,7 +172,7 @@ void mcf5249_vect_add(ogg_int32_t *x, ogg_int32_t *y, int n) | |||
169 | } | 172 | } |
170 | 173 | ||
171 | static inline | 174 | static inline |
172 | void mcf5249_vect_copy(ogg_int32_t *x, ogg_int32_t *y, int n) | 175 | void vect_copy(ogg_int32_t *x, ogg_int32_t *y, int n) |
173 | { | 176 | { |
174 | /* align to 16 bytes */ | 177 | /* align to 16 bytes */ |
175 | while(n>0 && (int)x&16) { | 178 | while(n>0 && (int)x&16) { |
@@ -196,7 +199,7 @@ void mcf5249_vect_copy(ogg_int32_t *x, ogg_int32_t *y, int n) | |||
196 | 199 | ||
197 | 200 | ||
198 | static inline | 201 | static inline |
199 | void mcf5249_vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n) | 202 | void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n) |
200 | { | 203 | { |
201 | /* ensure data is aligned to 16-bytes */ | 204 | /* ensure data is aligned to 16-bytes */ |
202 | while(n>0 && (int)data%16) { | 205 | while(n>0 && (int)data%16) { |
@@ -250,7 +253,7 @@ void mcf5249_vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n) | |||
250 | } | 253 | } |
251 | 254 | ||
252 | static inline | 255 | static inline |
253 | void mcf5249_vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n) | 256 | void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n) |
254 | { | 257 | { |
255 | /* ensure at least data is aligned to 16-bytes */ | 258 | /* ensure at least data is aligned to 16-bytes */ |
256 | while(n>0 && (int)data%16) { | 259 | while(n>0 && (int)data%16) { |
@@ -338,6 +341,8 @@ void mcf5249_vect_zero(ogg_int32_t *ptr, int n) | |||
338 | 341 | ||
339 | #endif | 342 | #endif |
340 | 343 | ||
344 | #endif | ||
345 | |||
341 | #ifndef _V_CLIP_MATH | 346 | #ifndef _V_CLIP_MATH |
342 | #define _V_CLIP_MATH | 347 | #define _V_CLIP_MATH |
343 | 348 | ||
diff --git a/apps/codecs/Tremor/block.c b/apps/codecs/Tremor/block.c index 9dce13c6e5..80cbb7809c 100644 --- a/apps/codecs/Tremor/block.c +++ b/apps/codecs/Tremor/block.c | |||
@@ -262,11 +262,7 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){ | |||
262 | vorbis_info *vi=v->vi; | 262 | vorbis_info *vi=v->vi; |
263 | codec_setup_info *ci=(codec_setup_info *)vi->codec_setup; | 263 | codec_setup_info *ci=(codec_setup_info *)vi->codec_setup; |
264 | private_state *b=v->backend_state; | 264 | private_state *b=v->backend_state; |
265 | #ifdef CPU_COLDFIRE | ||
266 | int j; | 265 | int j; |
267 | #else | ||
268 | int i,j; | ||
269 | #endif | ||
270 | 266 | ||
271 | if(v->pcm_current>v->pcm_returned && v->pcm_returned!=-1)return(OV_EINVAL); | 267 | if(v->pcm_current>v->pcm_returned && v->pcm_returned!=-1)return(OV_EINVAL); |
272 | 268 | ||
@@ -312,47 +308,25 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){ | |||
312 | /* large/large */ | 308 | /* large/large */ |
313 | ogg_int32_t *pcm=v->pcm[j]+prevCenter; | 309 | ogg_int32_t *pcm=v->pcm[j]+prevCenter; |
314 | ogg_int32_t *p=vb->pcm[j]; | 310 | ogg_int32_t *p=vb->pcm[j]; |
315 | #ifdef CPU_COLDFIRE | 311 | vect_add(pcm, p, n1); |
316 | mcf5249_vect_add(pcm, p, n1); | ||
317 | #else | ||
318 | for(i=0;i<n1;i++) | ||
319 | pcm[i]+=p[i]; | ||
320 | #endif | ||
321 | }else{ | 312 | }else{ |
322 | /* large/small */ | 313 | /* large/small */ |
323 | ogg_int32_t *pcm=v->pcm[j]+prevCenter+n1/2-n0/2; | 314 | ogg_int32_t *pcm=v->pcm[j]+prevCenter+n1/2-n0/2; |
324 | ogg_int32_t *p=vb->pcm[j]; | 315 | ogg_int32_t *p=vb->pcm[j]; |
325 | #ifdef CPU_COLDFIRE | 316 | vect_add(pcm, p, n0); |
326 | mcf5249_vect_add(pcm, p, n0); | ||
327 | #else | ||
328 | for(i=0;i<n0;i++) | ||
329 | pcm[i]+=p[i]; | ||
330 | #endif | ||
331 | } | 317 | } |
332 | }else{ | 318 | }else{ |
333 | if(v->W){ | 319 | if(v->W){ |
334 | /* small/large */ | 320 | /* small/large */ |
335 | ogg_int32_t *pcm=v->pcm[j]+prevCenter; | 321 | ogg_int32_t *pcm=v->pcm[j]+prevCenter; |
336 | ogg_int32_t *p=vb->pcm[j]+n1/2-n0/2; | 322 | ogg_int32_t *p=vb->pcm[j]+n1/2-n0/2; |
337 | #ifdef CPU_COLDFIRE | 323 | vect_add(pcm, p, n0); |
338 | mcf5249_vect_add(pcm, p, n0); | 324 | vect_copy(&pcm[n0], &p[n0], n1/2-n0/2); |
339 | mcf5249_vect_copy(&pcm[n0], &p[n0], n1/2-n0/2); | ||
340 | #else | ||
341 | for(i=0;i<n0;i++) | ||
342 | pcm[i]+=p[i]; | ||
343 | for(;i<n1/2+n0/2;i++) | ||
344 | pcm[i]=p[i]; | ||
345 | #endif | ||
346 | }else{ | 325 | }else{ |
347 | /* small/small */ | 326 | /* small/small */ |
348 | ogg_int32_t *pcm=v->pcm[j]+prevCenter; | 327 | ogg_int32_t *pcm=v->pcm[j]+prevCenter; |
349 | ogg_int32_t *p=vb->pcm[j]; | 328 | ogg_int32_t *p=vb->pcm[j]; |
350 | #ifdef CPU_COLDFIRE | 329 | vect_add(pcm, p, n0); |
351 | mcf5249_vect_add(pcm, p, n0); | ||
352 | #else | ||
353 | for(i=0;i<n0;i++) | ||
354 | pcm[i]+=p[i]; | ||
355 | #endif | ||
356 | } | 330 | } |
357 | } | 331 | } |
358 | 332 | ||
@@ -360,12 +334,7 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){ | |||
360 | { | 334 | { |
361 | ogg_int32_t *pcm=v->pcm[j]+thisCenter; | 335 | ogg_int32_t *pcm=v->pcm[j]+thisCenter; |
362 | ogg_int32_t *p=vb->pcm[j]+n; | 336 | ogg_int32_t *p=vb->pcm[j]+n; |
363 | #ifdef CPU_COLDFIRE | 337 | vect_copy(pcm, p, n); |
364 | mcf5249_vect_copy(pcm, p, n); | ||
365 | #else | ||
366 | for(i=0;i<n;i++) | ||
367 | pcm[i]=p[i]; | ||
368 | #endif | ||
369 | } | 338 | } |
370 | } | 339 | } |
371 | 340 | ||
diff --git a/apps/codecs/Tremor/misc.h b/apps/codecs/Tremor/misc.h index 81903e1b95..a6eb0fa04a 100644 --- a/apps/codecs/Tremor/misc.h +++ b/apps/codecs/Tremor/misc.h | |||
@@ -151,6 +151,51 @@ static inline void XNPROD31(ogg_int32_t a, ogg_int32_t b, | |||
151 | *y = MULT31(b, t) + MULT31(a, v); | 151 | *y = MULT31(b, t) + MULT31(a, v); |
152 | } | 152 | } |
153 | #endif | 153 | #endif |
154 | |||
155 | #ifndef _V_VECT_OPS | ||
156 | #define _V_VECT_OPS | ||
157 | |||
158 | static inline | ||
159 | void vect_add(ogg_int32_t *x, ogg_int32_t *y, int n) | ||
160 | { | ||
161 | while (n>0) { | ||
162 | *x++ += *y++; | ||
163 | n--; | ||
164 | } | ||
165 | } | ||
166 | |||
167 | static inline | ||
168 | void vect_copy(ogg_int32_t *x, ogg_int32_t *y, int n) | ||
169 | { | ||
170 | while (n>0) { | ||
171 | *x++ = *y++; | ||
172 | n--; | ||
173 | } | ||
174 | } | ||
175 | |||
176 | static inline | ||
177 | void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n) | ||
178 | { | ||
179 | while(n>0) { | ||
180 | *data = MULT31(*data, *window); | ||
181 | data++; | ||
182 | window++; | ||
183 | n--; | ||
184 | } | ||
185 | } | ||
186 | |||
187 | static inline | ||
188 | void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n) | ||
189 | { | ||
190 | while(n>0) { | ||
191 | *data = MULT31(*data, *window); | ||
192 | data++; | ||
193 | window--; | ||
194 | n--; | ||
195 | } | ||
196 | } | ||
197 | #endif | ||
198 | |||
154 | #endif | 199 | #endif |
155 | 200 | ||
156 | #ifndef _V_CLIP_MATH | 201 | #ifndef _V_CLIP_MATH |
diff --git a/apps/codecs/Tremor/window.c b/apps/codecs/Tremor/window.c index 5c7b83f4f2..14d97cf6ac 100644 --- a/apps/codecs/Tremor/window.c +++ b/apps/codecs/Tremor/window.c | |||
@@ -68,27 +68,11 @@ void _vorbis_apply_window(ogg_int32_t *d,const void *window_p[2], | |||
68 | long rightbegin=n/2+n/4-rn/4; | 68 | long rightbegin=n/2+n/4-rn/4; |
69 | long rightend=rightbegin+rn/2; | 69 | long rightend=rightbegin+rn/2; |
70 | 70 | ||
71 | #ifdef CPU_COLDFIRE | ||
72 | memset((void *)&d[0], 0, sizeof(ogg_int32_t)*leftbegin); | 71 | memset((void *)&d[0], 0, sizeof(ogg_int32_t)*leftbegin); |
73 | /* mcf5249_vect_zero(&d[0], leftbegin); */ | 72 | /* mcf5249_vect_zero(&d[0], leftbegin); */ |
74 | mcf5249_vect_mult_fw(&d[leftbegin], &window[lW][0], leftend-leftbegin); | 73 | vect_mult_fw(&d[leftbegin], &window[lW][0], leftend-leftbegin); |
75 | mcf5249_vect_mult_bw(&d[rightbegin], &window[nW][rn/2-1], rightend-rightbegin); | 74 | vect_mult_bw(&d[rightbegin], &window[nW][rn/2-1], rightend-rightbegin); |
76 | memset((void *)&d[rightend], 0, sizeof(ogg_int32_t)*(n-rightend)); | 75 | memset((void *)&d[rightend], 0, sizeof(ogg_int32_t)*(n-rightend)); |
77 | /* mcf5249_vect_zero(&d[rightend], n-rightend); */ | 76 | /* mcf5249_vect_zero(&d[rightend], n-rightend); */ |
78 | #else | ||
79 | int i,p; | ||
80 | |||
81 | for(i=0;i<leftbegin;i++) | ||
82 | d[i]=0; | ||
83 | |||
84 | for(p=0;i<leftend;i++,p++) | ||
85 | d[i]=MULT31(d[i],window[lW][p]); | ||
86 | |||
87 | for(i=rightbegin,p=rn/2-1;i<rightend;i++,p--) | ||
88 | d[i]=MULT31(d[i],window[nW][p]); | ||
89 | |||
90 | for(;i<n;i++) | ||
91 | d[i]=0; | ||
92 | #endif | ||
93 | } | 77 | } |
94 | 78 | ||