summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNils Wallménius <nils@rockbox.org>2011-09-13 16:53:13 +0000
committerNils Wallménius <nils@rockbox.org>2011-09-13 16:53:13 +0000
commit85ef10826f7ebf742349acfc22cdd279db29db7c (patch)
treebbbbe21d2c113dc404f8df15decc47e2668c1653
parentb6271900b00548d98a121ded09ee51645b2d0f9b (diff)
downloadrockbox-85ef10826f7ebf742349acfc22cdd279db29db7c.tar.gz
rockbox-85ef10826f7ebf742349acfc22cdd279db29db7c.zip
codeclib: tweak coldfire mdct asm a bit more, flip usage of mac/msac instead of explicit negation, changes output slightly but is as close to the c version as before. Gives about 0.4MHz speedup of vorbis on cf.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@30516 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/lib/mdct.c95
1 files changed, 65 insertions, 30 deletions
diff --git a/apps/codecs/lib/mdct.c b/apps/codecs/lib/mdct.c
index 621b9cbbb8..5524afbbc4 100644
--- a/apps/codecs/lib/mdct.c
+++ b/apps/codecs/lib/mdct.c
@@ -124,7 +124,6 @@ void ff_imdct_half(unsigned int nbits, fixed32 *output, const fixed32 *input)
124 default: 124 default:
125 { 125 {
126 fixed32 * z1 = (fixed32 *)(&z[0]); 126 fixed32 * z1 = (fixed32 *)(&z[0]);
127 fixed32 * z2 = (fixed32 *)(&z[n4-1]);
128 int magic_step = step>>2; 127 int magic_step = step>>2;
129 int newstep; 128 int newstep;
130 if(n<=1024) 129 if(n<=1024)
@@ -137,47 +136,84 @@ void ff_imdct_half(unsigned int nbits, fixed32 *output, const fixed32 *input)
137 T = sincos_lookup1; 136 T = sincos_lookup1;
138 newstep = 2; 137 newstep = 2;
139 } 138 }
140 139
141 while(z1<z2)
142 {
143#ifdef CPU_COLDFIRE 140#ifdef CPU_COLDFIRE
141 fixed32 * z2 = (fixed32 *)(&z[n4]);
142 int c = n4;
143 if (newstep == 2)
144 {
144 asm volatile ("movem.l (%[z1]), %%d0-%%d1\n\t" 145 asm volatile ("movem.l (%[z1]), %%d0-%%d1\n\t"
146 "addq.l #8, %[z1]\n\t"
145 "movem.l (%[T]), %%d2-%%d3\n\t" 147 "movem.l (%[T]), %%d2-%%d3\n\t"
146 "mac.l %%d1, %%d2, %%acc0\n\t" 148 "addq.l #8, %[T]\n\t"
147 "msac.l %%d0, %%d3, %%acc0\n\t" 149 "bra.s 1f\n\t"
148 "mac.l %%d0, %%d2, %%acc1\n\t" 150 "0:\n\t"
149 "mac.l %%d1, %%d3, %%acc1\n\t" 151 "msac.l %%d1, %%d2, (%[T])+, %%a3, %%acc0\n\t"
152 "mac.l %%d0, %%d3, (%[T])+, %%a4, %%acc0\n\t"
153
154 "msac.l %%d1, %%d3, -(%[z2]), %%d1, %%acc1\n\t"
155 "msac.l %%d0, %%d2, -(%[z2]), %%d0, %%acc1\n\t"
150 156
151 "lea (%[newstep]*4, %[T]), %[T]\n\t" 157 "msac.l %%d1, %%a4, (%[T])+, %%d2, %%acc2\n\t"
158 "mac.l %%d0, %%a3, (%[T])+, %%d3, %%acc2\n\t"
159 "msac.l %%d0, %%a4, (%[z1])+, %%d0, %%acc3\n\t"
160 "msac.l %%d1, %%a3, (%[z1])+, %%d1, %%acc3\n\t"
161
162 "movclr.l %%acc0, %%a3\n\t"
163 "movclr.l %%acc3, %%a4\n\t"
164 "movem.l %%a3-%%a4, (-16, %[z1])\n\t"
152 165
153 "movem.l (%[z2]), %%d0-%%d1\n\t" 166 "movclr.l %%acc1, %%a4\n\t"
167 "movclr.l %%acc2, %%a3\n\t"
168 "movem.l %%a3-%%a4, (%[z2])\n\t"
169
170 "subq.l #2, %[n]\n\t"
171 "1:\n\t"
172 "bhi.s 0b\n\t"
173 : [z1] "+a" (z1), [z2] "+a" (z2), [T] "+a" (T), [n] "+d" (c)
174 :
175 : "d0", "d1", "d2", "d3", "a3", "a4", "cc", "memory");
176 }
177 else
178 {
179 asm volatile ("movem.l (%[z1]), %%d0-%%d1\n\t"
180 "addq.l #8, %[z1]\n\t"
154 "movem.l (%[T]), %%d2-%%d3\n\t" 181 "movem.l (%[T]), %%d2-%%d3\n\t"
155 "mac.l %%d1, %%d3, %%acc2\n\t" 182 "lea (%[newstep]*4, %[T]), %[T]\n\t"
156 "msac.l %%d0, %%d2, %%acc2\n\t" 183 "bra.s 1f\n\t"
157 "mac.l %%d0, %%d3, %%acc3\n\t" 184 "0:\n\t"
158 "mac.l %%d1, %%d2, %%acc3\n\t" 185 "msac.l %%d1, %%d2, (%[T]), %%a3, %%acc0\n\t"
186 "mac.l %%d0, %%d3, (4, %[T]), %%a4, %%acc0\n\t"
187 "msac.l %%d1, %%d3, -(%[z2]), %%d1, %%acc1\n\t"
188 "msac.l %%d0, %%d2, -(%[z2]), %%d0, %%acc1\n\t"
159 189
160 "lea (%[newstep]*4, %[T]), %[T]\n\t" 190 "lea (%[newstep]*4, %[T]), %[T]\n\t"
191 "msac.l %%d1, %%a4, (%[T]), %%d2, %%acc2\n\t"
192 "mac.l %%d0, %%a3, (4, %[T]), %%d3, %%acc2\n\t"
193 "msac.l %%d0, %%a4, (%[z1])+, %%d0, %%acc3\n\t"
194 "msac.l %%d1, %%a3, (%[z1])+, %%d1, %%acc3\n\t"
161 195
162 "movclr.l %%acc0, %%d0\n\t" 196 "lea (%[newstep]*4, %[T]), %[T]\n\t"
163 "movclr.l %%acc1, %%d2\n\t"
164 "movclr.l %%acc2, %%d1\n\t"
165 "movclr.l %%acc3, %%d3\n\t"
166 197
167 "neg.l %%d0\n\t" 198 "movclr.l %%acc0, %%a3\n\t"
168 "neg.l %%d1\n\t" 199 "movclr.l %%acc3, %%a4\n\t"
169 "neg.l %%d2\n\t" 200 "movem.l %%a3-%%a4, (-16, %[z1])\n\t"
170 "neg.l %%d3\n\t"
171 201
172 "movem.l %%d0/%%d3, (%[z1])\n\t" 202 "movclr.l %%acc1, %%a4\n\t"
173 "movem.l %%d1/%%d2, (%[z2])\n\t" 203 "movclr.l %%acc2, %%a3\n\t"
204 "movem.l %%a3-%%a4, (%[z2])\n\t"
174 205
175 "addq.l #8, %[z1]\n\t" 206 "subq.l #2, %[n]\n\t"
176 "subq.l #8, %[z2]\n\t" 207 "1:\n\t"
177 : [z1] "+a" (z1), [z2] "+a" (z2), [T] "+a" (T) 208 "bhi.s 0b\n\t"
209 : [z1] "+a" (z1), [z2] "+a" (z2), [T] "+a" (T), [n] "+d" (c)
178 : [newstep] "d" (newstep) 210 : [newstep] "d" (newstep)
179 : "d0", "d1", "d2", "d3", "cc", "memory"); 211 : "d0", "d1", "d2", "d3", "a3", "a4", "cc", "memory");
212 }
180#else 213#else
214 fixed32 * z2 = (fixed32 *)(&z[n4-1]);
215 while(z1<z2)
216 {
181 fixed32 r0,i0,r1,i1; 217 fixed32 r0,i0,r1,i1;
182 XNPROD31_R(z1[1], z1[0], T[0], T[1], r0, i1 ); T+=newstep; 218 XNPROD31_R(z1[1], z1[0], T[0], T[1], r0, i1 ); T+=newstep;
183 XNPROD31_R(z2[1], z2[0], T[1], T[0], r1, i0 ); T+=newstep; 219 XNPROD31_R(z2[1], z2[0], T[1], T[0], r1, i0 ); T+=newstep;
@@ -187,9 +223,8 @@ void ff_imdct_half(unsigned int nbits, fixed32 *output, const fixed32 *input)
187 z2[1] = -i1; 223 z2[1] = -i1;
188 z1+=2; 224 z1+=2;
189 z2-=2; 225 z2-=2;
190#endif
191 } 226 }
192 227#endif
193 break; 228 break;
194 } 229 }
195 230