summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTomasz Malesinski <tomal@rockbox.org>2007-03-24 13:45:54 +0000
committerTomasz Malesinski <tomal@rockbox.org>2007-03-24 13:45:54 +0000
commit25046aac17e09467aa1f4d240fb74db51f37e70c (patch)
tree5d69b6f3faa49457d68879949e76da00329ca71f
parent467651ae763107d478799586a1061693cafe6dab (diff)
downloadrockbox-25046aac17e09467aa1f4d240fb74db51f37e70c.tar.gz
rockbox-25046aac17e09467aa1f4d240fb74db51f37e70c.zip
FS #6848 - fast vector operations for ARM in Tremor.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@12902 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/Tremor/asm_arm.h106
-rw-r--r--apps/codecs/Tremor/asm_mcf5249.h13
-rw-r--r--apps/codecs/Tremor/block.c43
-rw-r--r--apps/codecs/Tremor/misc.h45
-rw-r--r--apps/codecs/Tremor/window.c20
5 files changed, 168 insertions, 59 deletions
diff --git a/apps/codecs/Tremor/asm_arm.h b/apps/codecs/Tremor/asm_arm.h
index e623ce9b3d..bc09ac5170 100644
--- a/apps/codecs/Tremor/asm_arm.h
+++ b/apps/codecs/Tremor/asm_arm.h
@@ -95,6 +95,112 @@ static inline void XNPROD31(ogg_int32_t a, ogg_int32_t b,
95 *y = y1 << 1; 95 *y = y1 << 1;
96} 96}
97 97
98#ifndef _V_VECT_OPS
99#define _V_VECT_OPS
100
101/* asm versions of vector operations for block.c, window.c */
102static inline
103void vect_add(ogg_int32_t *x, ogg_int32_t *y, int n)
104{
105 while (n>=4) {
106 asm volatile ("ldmia %[x], {r0, r1, r2, r3};"
107 "ldmia %[y]!, {r4, r5, r6, r7};"
108 "add r0, r0, r4;"
109 "add r1, r1, r5;"
110 "add r2, r2, r6;"
111 "add r3, r3, r7;"
112 "stmia %[x]!, {r0, r1, r2, r3};"
113 : [x] "+r" (x), [y] "+r" (y)
114 : : "r0", "r1", "r2", "r3",
115 "r4", "r5", "r6", "r7",
116 "memory");
117 n -= 4;
118 }
119 /* add final elements */
120 while (n>0) {
121 *x++ += *y++;
122 n--;
123 }
124}
125
126static inline
127void vect_copy(ogg_int32_t *x, ogg_int32_t *y, int n)
128{
129 while (n>=4) {
130 asm volatile ("ldmia %[y]!, {r0, r1, r2, r3};"
131 "stmia %[x]!, {r0, r1, r2, r3};"
132 : [x] "+r" (x), [y] "+r" (y)
133 : : "r0", "r1", "r2", "r3",
134 "memory");
135 n -= 4;
136 }
137 /* copy final elements */
138 while (n>0) {
139 *x++ = *y++;
140 n--;
141 }
142}
143
144static inline
145void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
146{
147 while (n>=4) {
148 asm volatile ("ldmia %[d], {r0, r1, r2, r3};"
149 "ldmia %[w]!, {r4, r5, r6, r7};"
150 "smull r8, r9, r0, r4;"
151 "mov r0, r9, lsl #1;"
152 "smull r8, r9, r1, r5;"
153 "mov r1, r9, lsl #1;"
154 "smull r8, r9, r2, r6;"
155 "mov r2, r9, lsl #1;"
156 "smull r8, r9, r3, r7;"
157 "mov r3, r9, lsl #1;"
158 "stmia %[d]!, {r0, r1, r2, r3};"
159 : [d] "+r" (data), [w] "+r" (window)
160 : : "r0", "r1", "r2", "r3",
161 "r4", "r5", "r6", "r7", "r8", "r9",
162 "memory", "cc");
163 n -= 4;
164 }
165 while(n>0) {
166 *data = MULT31(*data, *window);
167 data++;
168 window++;
169 n--;
170 }
171}
172
173static inline
174void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
175{
176 while (n>=4) {
177 asm volatile ("ldmia %[d], {r0, r1, r2, r3};"
178 "ldmda %[w]!, {r4, r5, r6, r7};"
179 "smull r8, r9, r0, r7;"
180 "mov r0, r9, lsl #1;"
181 "smull r8, r9, r1, r6;"
182 "mov r1, r9, lsl #1;"
183 "smull r8, r9, r2, r5;"
184 "mov r2, r9, lsl #1;"
185 "smull r8, r9, r3, r4;"
186 "mov r3, r9, lsl #1;"
187 "stmia %[d]!, {r0, r1, r2, r3};"
188 : [d] "+r" (data), [w] "+r" (window)
189 : : "r0", "r1", "r2", "r3",
190 "r4", "r5", "r6", "r7", "r8", "r9",
191 "memory", "cc");
192 n -= 4;
193 }
194 while(n>0) {
195 *data = MULT31(*data, *window);
196 data++;
197 window--;
198 n--;
199 }
200}
201
202#endif
203
98#endif 204#endif
99 205
100#ifndef _V_CLIP_MATH 206#ifndef _V_CLIP_MATH
diff --git a/apps/codecs/Tremor/asm_mcf5249.h b/apps/codecs/Tremor/asm_mcf5249.h
index 16878d73ee..4d7f92c089 100644
--- a/apps/codecs/Tremor/asm_mcf5249.h
+++ b/apps/codecs/Tremor/asm_mcf5249.h
@@ -132,10 +132,13 @@ void XNPROD31(ogg_int32_t a, ogg_int32_t b,
132 [t] "r" (_t), [v] "r" (_v) \ 132 [t] "r" (_t), [v] "r" (_v) \
133 : "cc"); 133 : "cc");
134 134
135#ifndef _V_VECT_OPS
136#define _V_VECT_OPS
137
135/* asm versions of vector operations for block.c, window.c */ 138/* asm versions of vector operations for block.c, window.c */
136/* assumes MAC is initialized & accumulators cleared */ 139/* assumes MAC is initialized & accumulators cleared */
137static inline 140static inline
138void mcf5249_vect_add(ogg_int32_t *x, ogg_int32_t *y, int n) 141void vect_add(ogg_int32_t *x, ogg_int32_t *y, int n)
139{ 142{
140 /* align to 16 bytes */ 143 /* align to 16 bytes */
141 while(n>0 && (int)x&16) { 144 while(n>0 && (int)x&16) {
@@ -169,7 +172,7 @@ void mcf5249_vect_add(ogg_int32_t *x, ogg_int32_t *y, int n)
169} 172}
170 173
171static inline 174static inline
172void mcf5249_vect_copy(ogg_int32_t *x, ogg_int32_t *y, int n) 175void vect_copy(ogg_int32_t *x, ogg_int32_t *y, int n)
173{ 176{
174 /* align to 16 bytes */ 177 /* align to 16 bytes */
175 while(n>0 && (int)x&16) { 178 while(n>0 && (int)x&16) {
@@ -196,7 +199,7 @@ void mcf5249_vect_copy(ogg_int32_t *x, ogg_int32_t *y, int n)
196 199
197 200
198static inline 201static inline
199void mcf5249_vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n) 202void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
200{ 203{
201 /* ensure data is aligned to 16-bytes */ 204 /* ensure data is aligned to 16-bytes */
202 while(n>0 && (int)data%16) { 205 while(n>0 && (int)data%16) {
@@ -250,7 +253,7 @@ void mcf5249_vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
250} 253}
251 254
252static inline 255static inline
253void mcf5249_vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n) 256void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
254{ 257{
255 /* ensure at least data is aligned to 16-bytes */ 258 /* ensure at least data is aligned to 16-bytes */
256 while(n>0 && (int)data%16) { 259 while(n>0 && (int)data%16) {
@@ -338,6 +341,8 @@ void mcf5249_vect_zero(ogg_int32_t *ptr, int n)
338 341
339#endif 342#endif
340 343
344#endif
345
341#ifndef _V_CLIP_MATH 346#ifndef _V_CLIP_MATH
342#define _V_CLIP_MATH 347#define _V_CLIP_MATH
343 348
diff --git a/apps/codecs/Tremor/block.c b/apps/codecs/Tremor/block.c
index 9dce13c6e5..80cbb7809c 100644
--- a/apps/codecs/Tremor/block.c
+++ b/apps/codecs/Tremor/block.c
@@ -262,11 +262,7 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){
262 vorbis_info *vi=v->vi; 262 vorbis_info *vi=v->vi;
263 codec_setup_info *ci=(codec_setup_info *)vi->codec_setup; 263 codec_setup_info *ci=(codec_setup_info *)vi->codec_setup;
264 private_state *b=v->backend_state; 264 private_state *b=v->backend_state;
265#ifdef CPU_COLDFIRE
266 int j; 265 int j;
267#else
268 int i,j;
269#endif
270 266
271 if(v->pcm_current>v->pcm_returned && v->pcm_returned!=-1)return(OV_EINVAL); 267 if(v->pcm_current>v->pcm_returned && v->pcm_returned!=-1)return(OV_EINVAL);
272 268
@@ -312,47 +308,25 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){
312 /* large/large */ 308 /* large/large */
313 ogg_int32_t *pcm=v->pcm[j]+prevCenter; 309 ogg_int32_t *pcm=v->pcm[j]+prevCenter;
314 ogg_int32_t *p=vb->pcm[j]; 310 ogg_int32_t *p=vb->pcm[j];
315#ifdef CPU_COLDFIRE 311 vect_add(pcm, p, n1);
316 mcf5249_vect_add(pcm, p, n1);
317#else
318 for(i=0;i<n1;i++)
319 pcm[i]+=p[i];
320#endif
321 }else{ 312 }else{
322 /* large/small */ 313 /* large/small */
323 ogg_int32_t *pcm=v->pcm[j]+prevCenter+n1/2-n0/2; 314 ogg_int32_t *pcm=v->pcm[j]+prevCenter+n1/2-n0/2;
324 ogg_int32_t *p=vb->pcm[j]; 315 ogg_int32_t *p=vb->pcm[j];
325#ifdef CPU_COLDFIRE 316 vect_add(pcm, p, n0);
326 mcf5249_vect_add(pcm, p, n0);
327#else
328 for(i=0;i<n0;i++)
329 pcm[i]+=p[i];
330#endif
331 } 317 }
332 }else{ 318 }else{
333 if(v->W){ 319 if(v->W){
334 /* small/large */ 320 /* small/large */
335 ogg_int32_t *pcm=v->pcm[j]+prevCenter; 321 ogg_int32_t *pcm=v->pcm[j]+prevCenter;
336 ogg_int32_t *p=vb->pcm[j]+n1/2-n0/2; 322 ogg_int32_t *p=vb->pcm[j]+n1/2-n0/2;
337#ifdef CPU_COLDFIRE 323 vect_add(pcm, p, n0);
338 mcf5249_vect_add(pcm, p, n0); 324 vect_copy(&pcm[n0], &p[n0], n1/2-n0/2);
339 mcf5249_vect_copy(&pcm[n0], &p[n0], n1/2-n0/2);
340#else
341 for(i=0;i<n0;i++)
342 pcm[i]+=p[i];
343 for(;i<n1/2+n0/2;i++)
344 pcm[i]=p[i];
345#endif
346 }else{ 325 }else{
347 /* small/small */ 326 /* small/small */
348 ogg_int32_t *pcm=v->pcm[j]+prevCenter; 327 ogg_int32_t *pcm=v->pcm[j]+prevCenter;
349 ogg_int32_t *p=vb->pcm[j]; 328 ogg_int32_t *p=vb->pcm[j];
350#ifdef CPU_COLDFIRE 329 vect_add(pcm, p, n0);
351 mcf5249_vect_add(pcm, p, n0);
352#else
353 for(i=0;i<n0;i++)
354 pcm[i]+=p[i];
355#endif
356 } 330 }
357 } 331 }
358 332
@@ -360,12 +334,7 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){
360 { 334 {
361 ogg_int32_t *pcm=v->pcm[j]+thisCenter; 335 ogg_int32_t *pcm=v->pcm[j]+thisCenter;
362 ogg_int32_t *p=vb->pcm[j]+n; 336 ogg_int32_t *p=vb->pcm[j]+n;
363#ifdef CPU_COLDFIRE 337 vect_copy(pcm, p, n);
364 mcf5249_vect_copy(pcm, p, n);
365#else
366 for(i=0;i<n;i++)
367 pcm[i]=p[i];
368#endif
369 } 338 }
370 } 339 }
371 340
diff --git a/apps/codecs/Tremor/misc.h b/apps/codecs/Tremor/misc.h
index 81903e1b95..a6eb0fa04a 100644
--- a/apps/codecs/Tremor/misc.h
+++ b/apps/codecs/Tremor/misc.h
@@ -151,6 +151,51 @@ static inline void XNPROD31(ogg_int32_t a, ogg_int32_t b,
151 *y = MULT31(b, t) + MULT31(a, v); 151 *y = MULT31(b, t) + MULT31(a, v);
152} 152}
153#endif 153#endif
154
155#ifndef _V_VECT_OPS
156#define _V_VECT_OPS
157
158static inline
159void vect_add(ogg_int32_t *x, ogg_int32_t *y, int n)
160{
161 while (n>0) {
162 *x++ += *y++;
163 n--;
164 }
165}
166
167static inline
168void vect_copy(ogg_int32_t *x, ogg_int32_t *y, int n)
169{
170 while (n>0) {
171 *x++ = *y++;
172 n--;
173 }
174}
175
176static inline
177void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
178{
179 while(n>0) {
180 *data = MULT31(*data, *window);
181 data++;
182 window++;
183 n--;
184 }
185}
186
187static inline
188void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
189{
190 while(n>0) {
191 *data = MULT31(*data, *window);
192 data++;
193 window--;
194 n--;
195 }
196}
197#endif
198
154#endif 199#endif
155 200
156#ifndef _V_CLIP_MATH 201#ifndef _V_CLIP_MATH
diff --git a/apps/codecs/Tremor/window.c b/apps/codecs/Tremor/window.c
index 5c7b83f4f2..14d97cf6ac 100644
--- a/apps/codecs/Tremor/window.c
+++ b/apps/codecs/Tremor/window.c
@@ -68,27 +68,11 @@ void _vorbis_apply_window(ogg_int32_t *d,const void *window_p[2],
68 long rightbegin=n/2+n/4-rn/4; 68 long rightbegin=n/2+n/4-rn/4;
69 long rightend=rightbegin+rn/2; 69 long rightend=rightbegin+rn/2;
70 70
71#ifdef CPU_COLDFIRE
72 memset((void *)&d[0], 0, sizeof(ogg_int32_t)*leftbegin); 71 memset((void *)&d[0], 0, sizeof(ogg_int32_t)*leftbegin);
73 /* mcf5249_vect_zero(&d[0], leftbegin); */ 72 /* mcf5249_vect_zero(&d[0], leftbegin); */
74 mcf5249_vect_mult_fw(&d[leftbegin], &window[lW][0], leftend-leftbegin); 73 vect_mult_fw(&d[leftbegin], &window[lW][0], leftend-leftbegin);
75 mcf5249_vect_mult_bw(&d[rightbegin], &window[nW][rn/2-1], rightend-rightbegin); 74 vect_mult_bw(&d[rightbegin], &window[nW][rn/2-1], rightend-rightbegin);
76 memset((void *)&d[rightend], 0, sizeof(ogg_int32_t)*(n-rightend)); 75 memset((void *)&d[rightend], 0, sizeof(ogg_int32_t)*(n-rightend));
77 /* mcf5249_vect_zero(&d[rightend], n-rightend); */ 76 /* mcf5249_vect_zero(&d[rightend], n-rightend); */
78#else
79 int i,p;
80
81 for(i=0;i<leftbegin;i++)
82 d[i]=0;
83
84 for(p=0;i<leftend;i++,p++)
85 d[i]=MULT31(d[i],window[lW][p]);
86
87 for(i=rightbegin,p=rn/2-1;i<rightend;i++,p--)
88 d[i]=MULT31(d[i],window[nW][p]);
89
90 for(;i<n;i++)
91 d[i]=0;
92#endif
93} 77}
94 78