summaryrefslogtreecommitdiff
path: root/apps/codecs/libcook/cook_fixp_mdct.h
diff options
context:
space:
mode:
authorDave Chapman <dave@dchapman.com>2009-05-10 22:26:02 +0000
committerDave Chapman <dave@dchapman.com>2009-05-10 22:26:02 +0000
commitfc28cb4ed5adf4a0bc548af38ca6de95bbf027e5 (patch)
tree5902a50d7efe84a5f3e6270c464dbf54572a744a /apps/codecs/libcook/cook_fixp_mdct.h
parent3a0a9915eb802d558c0399d17a5ac045934d6be1 (diff)
downloadrockbox-fc28cb4ed5adf4a0bc548af38ca6de95bbf027e5.tar.gz
rockbox-fc28cb4ed5adf4a0bc548af38ca6de95bbf027e5.zip
Patch by Mohamed Tarek from FS#10182 - convert codec to fixed-point using patches submitted to the ffmpeg mailing list in 2007 by Ian Braithwaite.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@20901 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs/libcook/cook_fixp_mdct.h')
-rw-r--r--apps/codecs/libcook/cook_fixp_mdct.h545
1 files changed, 545 insertions, 0 deletions
diff --git a/apps/codecs/libcook/cook_fixp_mdct.h b/apps/codecs/libcook/cook_fixp_mdct.h
new file mode 100644
index 0000000000..dcd6d96227
--- /dev/null
+++ b/apps/codecs/libcook/cook_fixp_mdct.h
@@ -0,0 +1,545 @@
1/*
2 * The following (normalized modified discrete cosine transform)
3 * is taken from the OggVorbis 'TREMOR' source code.
4 *
5 * It has been modified for the ffmpeg cook fixed point decoder.
6 */
7
8/********************************************************************
9 * *
10 * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002 *
11 * BY THE Xiph.Org FOUNDATION http://www.xiph.org/ *
12 * *
13 ********************************************************************
14
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions
17 are met:
18
19 - Redistributions of source code must retain the above copyright
20 notice, this list of conditions and the following disclaimer.
21
22 - Redistributions in binary form must reproduce the above copyright
23 notice, this list of conditions and the following disclaimer in the
24 documentation and/or other materials provided with the distribution.
25
26 - Neither the name of the Xiph.org Foundation nor the names of its
27 contributors may be used to endorse or promote products derived from
28 this software without specific prior written permission.
29
30 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
31 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
32 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
33 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION
34 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
36 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
37 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
38 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
39 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
40 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41
42 *********************************************************************
43
44 function: normalized modified discrete cosine transform
45 power of two length transform only [64 <= n ]
46 last mod: $Id: mdct.c 14281 2004-12-30 12:11:32Z henry $
47
48 Original algorithm adapted long ago from _The use of multirate filter
49 banks for coding of high quality digital audio_, by T. Sporer,
50 K. Brandenburg and B. Edler, collection of the European Signal
51 Processing Conference (EUSIPCO), Amsterdam, June 1992, Vol.1, pp
52 211-214
53
54 The below code implements an algorithm that no longer looks much like
55 that presented in the paper, but the basic structure remains if you
56 dig deep enough to see it.
57
58 This module DOES NOT INCLUDE code to generate/apply the window
59 function. Everybody has their own weird favorite including me... I
60 happen to like the properties of y=sin(.5PI*sin^2(x)), but others may
61 vehemently disagree.
62
63 ********************************************************************/
64
65#define STIN static inline
66
67typedef int32_t ogg_int32_t;
68
69#define DATA_TYPE ogg_int32_t
70#define REG_TYPE register ogg_int32_t
71#define LOOKUP_T const uint16_t
72
73static inline ogg_int32_t MULT32(ogg_int32_t x, ogg_int32_t y) {
74 return fixp_mult_su(x, y) >> 1;
75}
76
77static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) {
78 return fixp_mult_su(x, y);
79}
80
81/*
82 * This should be used as a memory barrier, forcing all cached values in
83 * registers to wr writen back to memory. Might or might not be beneficial
84 * depending on the architecture and compiler.
85 */
86#define MB()
87
88/*
89 * The XPROD functions are meant to optimize the cross products found all
90 * over the place in mdct.c by forcing memory operation ordering to avoid
91 * unnecessary register reloads as soon as memory is being written to.
92 * However this is only beneficial on CPUs with a sane number of general
93 * purpose registers which exclude the Intel x86. On Intel, better let the
94 * compiler actually reload registers directly from original memory by using
95 * macros.
96 */
97
98#ifdef __i386__
99
100#define XPROD32(_a, _b, _t, _v, _x, _y) \
101 { *(_x)=MULT32(_a,_t)+MULT32(_b,_v); \
102 *(_y)=MULT32(_b,_t)-MULT32(_a,_v); }
103#define XPROD31(_a, _b, _t, _v, _x, _y) \
104 { *(_x)=MULT31(_a,_t)+MULT31(_b,_v); \
105 *(_y)=MULT31(_b,_t)-MULT31(_a,_v); }
106#define XNPROD31(_a, _b, _t, _v, _x, _y) \
107 { *(_x)=MULT31(_a,_t)-MULT31(_b,_v); \
108 *(_y)=MULT31(_b,_t)+MULT31(_a,_v); }
109
110#else
111
112static inline void XPROD32(ogg_int32_t a, ogg_int32_t b,
113 ogg_int32_t t, ogg_int32_t v,
114 ogg_int32_t *x, ogg_int32_t *y)
115{
116 *x = MULT32(a, t) + MULT32(b, v);
117 *y = MULT32(b, t) - MULT32(a, v);
118}
119
120static inline void XPROD31(ogg_int32_t a, ogg_int32_t b,
121 ogg_int32_t t, ogg_int32_t v,
122 ogg_int32_t *x, ogg_int32_t *y)
123{
124 *x = MULT31(a, t) + MULT31(b, v);
125 *y = MULT31(b, t) - MULT31(a, v);
126}
127
128static inline void XNPROD31(ogg_int32_t a, ogg_int32_t b,
129 ogg_int32_t t, ogg_int32_t v,
130 ogg_int32_t *x, ogg_int32_t *y)
131{
132 *x = MULT31(a, t) - MULT31(b, v);
133 *y = MULT31(b, t) + MULT31(a, v);
134}
135
136#endif
137
138
139/* 8 point butterfly (in place) */
140STIN void mdct_butterfly_8(DATA_TYPE *x){
141
142 REG_TYPE r0 = x[4] + x[0];
143 REG_TYPE r1 = x[4] - x[0];
144 REG_TYPE r2 = x[5] + x[1];
145 REG_TYPE r3 = x[5] - x[1];
146 REG_TYPE r4 = x[6] + x[2];
147 REG_TYPE r5 = x[6] - x[2];
148 REG_TYPE r6 = x[7] + x[3];
149 REG_TYPE r7 = x[7] - x[3];
150
151 x[0] = r5 + r3;
152 x[1] = r7 - r1;
153 x[2] = r5 - r3;
154 x[3] = r7 + r1;
155 x[4] = r4 - r0;
156 x[5] = r6 - r2;
157 x[6] = r4 + r0;
158 x[7] = r6 + r2;
159 MB();
160}
161
162/* 16 point butterfly (in place, 4 register) */
163STIN void mdct_butterfly_16(DATA_TYPE *x){
164
165 REG_TYPE r0, r1;
166
167 r0 = x[ 0] - x[ 8]; x[ 8] += x[ 0];
168 r1 = x[ 1] - x[ 9]; x[ 9] += x[ 1];
169 x[ 0] = MULT31((r0 + r1) , cPI2_8);
170 x[ 1] = MULT31((r1 - r0) , cPI2_8);
171 MB();
172
173 r0 = x[10] - x[ 2]; x[10] += x[ 2];
174 r1 = x[ 3] - x[11]; x[11] += x[ 3];
175 x[ 2] = r1; x[ 3] = r0;
176 MB();
177
178 r0 = x[12] - x[ 4]; x[12] += x[ 4];
179 r1 = x[13] - x[ 5]; x[13] += x[ 5];
180 x[ 4] = MULT31((r0 - r1) , cPI2_8);
181 x[ 5] = MULT31((r0 + r1) , cPI2_8);
182 MB();
183
184 r0 = x[14] - x[ 6]; x[14] += x[ 6];
185 r1 = x[15] - x[ 7]; x[15] += x[ 7];
186 x[ 6] = r0; x[ 7] = r1;
187 MB();
188
189 mdct_butterfly_8(x);
190 mdct_butterfly_8(x+8);
191}
192
193/* 32 point butterfly (in place, 4 register) */
194STIN void mdct_butterfly_32(DATA_TYPE *x){
195
196 REG_TYPE r0, r1;
197
198 r0 = x[30] - x[14]; x[30] += x[14];
199 r1 = x[31] - x[15]; x[31] += x[15];
200 x[14] = r0; x[15] = r1;
201 MB();
202
203 r0 = x[28] - x[12]; x[28] += x[12];
204 r1 = x[29] - x[13]; x[29] += x[13];
205 XNPROD31( r0, r1, cPI1_8, cPI3_8, &x[12], &x[13] );
206 MB();
207
208 r0 = x[26] - x[10]; x[26] += x[10];
209 r1 = x[27] - x[11]; x[27] += x[11];
210 x[10] = MULT31((r0 - r1) , cPI2_8);
211 x[11] = MULT31((r0 + r1) , cPI2_8);
212 MB();
213
214 r0 = x[24] - x[ 8]; x[24] += x[ 8];
215 r1 = x[25] - x[ 9]; x[25] += x[ 9];
216 XNPROD31( r0, r1, cPI3_8, cPI1_8, &x[ 8], &x[ 9] );
217 MB();
218
219 r0 = x[22] - x[ 6]; x[22] += x[ 6];
220 r1 = x[ 7] - x[23]; x[23] += x[ 7];
221 x[ 6] = r1; x[ 7] = r0;
222 MB();
223
224 r0 = x[ 4] - x[20]; x[20] += x[ 4];
225 r1 = x[ 5] - x[21]; x[21] += x[ 5];
226 XPROD31 ( r0, r1, cPI3_8, cPI1_8, &x[ 4], &x[ 5] );
227 MB();
228
229 r0 = x[ 2] - x[18]; x[18] += x[ 2];
230 r1 = x[ 3] - x[19]; x[19] += x[ 3];
231 x[ 2] = MULT31((r1 + r0) , cPI2_8);
232 x[ 3] = MULT31((r1 - r0) , cPI2_8);
233 MB();
234
235 r0 = x[ 0] - x[16]; x[16] += x[ 0];
236 r1 = x[ 1] - x[17]; x[17] += x[ 1];
237 XPROD31 ( r0, r1, cPI1_8, cPI3_8, &x[ 0], &x[ 1] );
238 MB();
239
240 mdct_butterfly_16(x);
241 mdct_butterfly_16(x+16);
242}
243
244/* N/stage point generic N stage butterfly (in place, 2 register) */
245STIN void mdct_butterfly_generic(DATA_TYPE *x,int points,int step){
246
247 LOOKUP_T *T = sincos_lookup;
248 DATA_TYPE *x1 = x + points - 8;
249 DATA_TYPE *x2 = x + (points>>1) - 8;
250 REG_TYPE r0;
251 REG_TYPE r1;
252
253 //av_log(0, 0, "bfly: points=%d, step=%d\n", points, step);
254
255 do{
256 r0 = x1[6] - x2[6]; x1[6] += x2[6];
257 r1 = x2[7] - x1[7]; x1[7] += x2[7];
258 XPROD31( r1, r0, T[0], T[1], &x2[6], &x2[7] ); T+=step;
259
260 r0 = x1[4] - x2[4]; x1[4] += x2[4];
261 r1 = x2[5] - x1[5]; x1[5] += x2[5];
262 XPROD31( r1, r0, T[0], T[1], &x2[4], &x2[5] ); T+=step;
263
264 r0 = x1[2] - x2[2]; x1[2] += x2[2];
265 r1 = x2[3] - x1[3]; x1[3] += x2[3];
266 XPROD31( r1, r0, T[0], T[1], &x2[2], &x2[3] ); T+=step;
267
268 r0 = x1[0] - x2[0]; x1[0] += x2[0];
269 r1 = x2[1] - x1[1]; x1[1] += x2[1];
270 XPROD31( r1, r0, T[0], T[1], &x2[0], &x2[1] ); T+=step;
271
272 x1-=8; x2-=8;
273 }while(T<sincos_lookup+2048);
274 do{
275 r0 = x1[6] - x2[6]; x1[6] += x2[6];
276 r1 = x1[7] - x2[7]; x1[7] += x2[7];
277 XNPROD31( r0, r1, T[0], T[1], &x2[6], &x2[7] ); T-=step;
278
279 r0 = x1[4] - x2[4]; x1[4] += x2[4];
280 r1 = x1[5] - x2[5]; x1[5] += x2[5];
281 XNPROD31( r0, r1, T[0], T[1], &x2[4], &x2[5] ); T-=step;
282
283 r0 = x1[2] - x2[2]; x1[2] += x2[2];
284 r1 = x1[3] - x2[3]; x1[3] += x2[3];
285 XNPROD31( r0, r1, T[0], T[1], &x2[2], &x2[3] ); T-=step;
286
287 r0 = x1[0] - x2[0]; x1[0] += x2[0];
288 r1 = x1[1] - x2[1]; x1[1] += x2[1];
289 XNPROD31( r0, r1, T[0], T[1], &x2[0], &x2[1] ); T-=step;
290
291 x1-=8; x2-=8;
292 }while(T>sincos_lookup);
293 do{
294 r0 = x2[6] - x1[6]; x1[6] += x2[6];
295 r1 = x2[7] - x1[7]; x1[7] += x2[7];
296 XPROD31( r0, r1, T[0], T[1], &x2[6], &x2[7] ); T+=step;
297
298 r0 = x2[4] - x1[4]; x1[4] += x2[4];
299 r1 = x2[5] - x1[5]; x1[5] += x2[5];
300 XPROD31( r0, r1, T[0], T[1], &x2[4], &x2[5] ); T+=step;
301
302 r0 = x2[2] - x1[2]; x1[2] += x2[2];
303 r1 = x2[3] - x1[3]; x1[3] += x2[3];
304 XPROD31( r0, r1, T[0], T[1], &x2[2], &x2[3] ); T+=step;
305
306 r0 = x2[0] - x1[0]; x1[0] += x2[0];
307 r1 = x2[1] - x1[1]; x1[1] += x2[1];
308 XPROD31( r0, r1, T[0], T[1], &x2[0], &x2[1] ); T+=step;
309
310 x1-=8; x2-=8;
311 }while(T<sincos_lookup+2048);
312 do{
313 r0 = x1[6] - x2[6]; x1[6] += x2[6];
314 r1 = x2[7] - x1[7]; x1[7] += x2[7];
315 XNPROD31( r1, r0, T[0], T[1], &x2[6], &x2[7] ); T-=step;
316
317 r0 = x1[4] - x2[4]; x1[4] += x2[4];
318 r1 = x2[5] - x1[5]; x1[5] += x2[5];
319 XNPROD31( r1, r0, T[0], T[1], &x2[4], &x2[5] ); T-=step;
320
321 r0 = x1[2] - x2[2]; x1[2] += x2[2];
322 r1 = x2[3] - x1[3]; x1[3] += x2[3];
323 XNPROD31( r1, r0, T[0], T[1], &x2[2], &x2[3] ); T-=step;
324
325 r0 = x1[0] - x2[0]; x1[0] += x2[0];
326 r1 = x2[1] - x1[1]; x1[1] += x2[1];
327 XNPROD31( r1, r0, T[0], T[1], &x2[0], &x2[1] ); T-=step;
328
329 x1-=8; x2-=8;
330 }while(T>sincos_lookup);
331}
332
333STIN void mdct_butterflies(DATA_TYPE *x,int points,int shift){
334
335 int stages=8-shift;
336 int i,j;
337
338 for(i=0;--stages>0;i++){
339 for(j=0;j<(1<<i);j++)
340 mdct_butterfly_generic(x+(points>>i)*j,points>>i,8<<(i+shift));
341 }
342
343 for(j=0;j<points;j+=32)
344 mdct_butterfly_32(x+j);
345
346}
347
348static unsigned char bitrev[16]={0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15};
349
350STIN int bitrev12(int x){
351 return bitrev[x>>8]|(bitrev[(x&0x0f0)>>4]<<4)|(((int)bitrev[x&0x00f])<<8);
352}
353
354STIN void mdct_bitreverse(DATA_TYPE *x,int n,int step,int shift){
355
356 int bit = 0;
357 DATA_TYPE *w0 = x;
358 DATA_TYPE *w1 = x = w0+(n>>1);
359 LOOKUP_T *T = sincos_lookup+(step>>1);
360 LOOKUP_T *Ttop = T+2048;
361 DATA_TYPE r2;
362
363 //av_log(0, 0, "brev: shift=%d, step=%d\n", shift, step);
364
365 do{
366 DATA_TYPE r3 = bitrev12(bit++);
367 DATA_TYPE *x0 = x + ((r3 ^ 0xfff)>>shift) -1;
368 DATA_TYPE *x1 = x + (r3>>shift);
369
370 REG_TYPE r0 = x0[0] + x1[0];
371 REG_TYPE r1 = x1[1] - x0[1];
372
373 XPROD32( r0, r1, T[1], T[0], &r2, &r3 ); T+=step;
374
375 w1 -= 4;
376
377 r0 = (x0[1] + x1[1])>>1;
378 r1 = (x0[0] - x1[0])>>1;
379 w0[0] = r0 + r2;
380 w0[1] = r1 + r3;
381 w1[2] = r0 - r2;
382 w1[3] = r3 - r1;
383
384 r3 = bitrev12(bit++);
385 x0 = x + ((r3 ^ 0xfff)>>shift) -1;
386 x1 = x + (r3>>shift);
387
388 r0 = x0[0] + x1[0];
389 r1 = x1[1] - x0[1];
390
391 XPROD32( r0, r1, T[1], T[0], &r2, &r3 ); T+=step;
392
393 r0 = (x0[1] + x1[1])>>1;
394 r1 = (x0[0] - x1[0])>>1;
395 w0[2] = r0 + r2;
396 w0[3] = r1 + r3;
397 w1[0] = r0 - r2;
398 w1[1] = r3 - r1;
399
400 w0 += 4;
401 }while(T<Ttop);
402 do{
403 DATA_TYPE r3 = bitrev12(bit++);
404 DATA_TYPE *x0 = x + ((r3 ^ 0xfff)>>shift) -1;
405 DATA_TYPE *x1 = x + (r3>>shift);
406
407 REG_TYPE r0 = x0[0] + x1[0];
408 REG_TYPE r1 = x1[1] - x0[1];
409
410 T-=step; XPROD32( r0, r1, T[0], T[1], &r2, &r3 );
411
412 w1 -= 4;
413
414 r0 = (x0[1] + x1[1])>>1;
415 r1 = (x0[0] - x1[0])>>1;
416 w0[0] = r0 + r2;
417 w0[1] = r1 + r3;
418 w1[2] = r0 - r2;
419 w1[3] = r3 - r1;
420
421 r3 = bitrev12(bit++);
422 x0 = x + ((r3 ^ 0xfff)>>shift) -1;
423 x1 = x + (r3>>shift);
424
425 r0 = x0[0] + x1[0];
426 r1 = x1[1] - x0[1];
427
428 T-=step; XPROD32( r0, r1, T[0], T[1], &r2, &r3 );
429
430 r0 = (x0[1] + x1[1])>>1;
431 r1 = (x0[0] - x1[0])>>1;
432 w0[2] = r0 + r2;
433 w0[3] = r1 + r3;
434 w1[0] = r0 - r2;
435 w1[1] = r3 - r1;
436
437 w0 += 4;
438 }while(w0<w1);
439}
440
441STIN void cook_mdct_backward(int n, DATA_TYPE *in, DATA_TYPE *out){
442 int n2=n>>1;
443 int n4=n>>2;
444 DATA_TYPE *iX;
445 DATA_TYPE *oX;
446 LOOKUP_T *T;
447 int shift;
448 int step;
449
450 for (shift=6;!(n&(1<<shift));shift++);
451
452 shift=13-shift;
453 step=4<<shift;
454 //step=16;
455 //av_log(0, 0, "mdct: shift=%d, step=%d\n", shift, step);
456
457 /* rotate */
458
459 iX = in+n2-7;
460 oX = out+n2+n4;
461 T = sincos_lookup;
462
463 do{
464 oX-=4;
465 XPROD31( iX[4], iX[6], T[0], T[1], &oX[2], &oX[3] ); T+=step;
466 XPROD31( iX[0], iX[2], T[0], T[1], &oX[0], &oX[1] ); T+=step;
467 iX-=8;
468 }while(iX>=in+n4);
469 do{
470 oX-=4;
471 XPROD31( iX[4], iX[6], T[1], T[0], &oX[2], &oX[3] ); T-=step;
472 XPROD31( iX[0], iX[2], T[1], T[0], &oX[0], &oX[1] ); T-=step;
473 iX-=8;
474 }while(iX>=in);
475
476 iX = in+n2-8;
477 oX = out+n2+n4;
478 T = sincos_lookup;
479
480 do{
481 T+=step; XNPROD31( iX[6], iX[4], T[0], T[1], &oX[0], &oX[1] );
482 T+=step; XNPROD31( iX[2], iX[0], T[0], T[1], &oX[2], &oX[3] );
483 iX-=8;
484 oX+=4;
485 }while(iX>=in+n4);
486 do{
487 T-=step; XNPROD31( iX[6], iX[4], T[1], T[0], &oX[0], &oX[1] );
488 T-=step; XNPROD31( iX[2], iX[0], T[1], T[0], &oX[2], &oX[3] );
489 iX-=8;
490 oX+=4;
491 }while(iX>=in);
492
493 mdct_butterflies(out+n2,n2,shift);
494 mdct_bitreverse(out,n,step,shift);
495
496 /* rotate */
497
498 step>>=2;
499 //step=4;
500 {
501 DATA_TYPE *oX1=out+n2+n4;
502 DATA_TYPE *oX2=out+n2+n4;
503 DATA_TYPE *iX =out;
504
505 T=sincos_lookup+(step>>1);
506 do{
507 oX1-=4;
508 XPROD31( iX[0], -iX[1], T[0], T[1], &oX1[3], &oX2[0] ); T+=step;
509 XPROD31( iX[2], -iX[3], T[0], T[1], &oX1[2], &oX2[1] ); T+=step;
510 XPROD31( iX[4], -iX[5], T[0], T[1], &oX1[1], &oX2[2] ); T+=step;
511 XPROD31( iX[6], -iX[7], T[0], T[1], &oX1[0], &oX2[3] ); T+=step;
512 oX2+=4;
513 iX+=8;
514 }while(iX<oX1);
515
516 iX=out+n2+n4;
517 oX1=out+n4;
518 oX2=oX1;
519
520 do{
521 oX1-=4;
522 iX-=4;
523
524 oX2[0] = -(oX1[3] = iX[3]);
525 oX2[1] = -(oX1[2] = iX[2]);
526 oX2[2] = -(oX1[1] = iX[1]);
527 oX2[3] = -(oX1[0] = iX[0]);
528
529 oX2+=4;
530 }while(oX2<iX);
531
532 iX=out+n2+n4;
533 oX1=out+n2+n4;
534 oX2=out+n2;
535
536 do{
537 oX1-=4;
538 oX1[0]= iX[3];
539 oX1[1]= iX[2];
540 oX1[2]= iX[1];
541 oX1[3]= iX[0];
542 iX+=4;
543 }while(oX1>oX2);
544 }
545}