diff options
author | Tomasz Malesinski <tomal@rockbox.org> | 2007-09-27 21:58:51 +0000 |
---|---|---|
committer | Tomasz Malesinski <tomal@rockbox.org> | 2007-09-27 21:58:51 +0000 |
commit | c13eba29ff5615cc74a7818e42cc9d464a7c7075 (patch) | |
tree | eef1dfc0d4ed2b69e16b119b0d47052801ef827f /apps/codecs | |
parent | 1aaf5dbdb660d29ef384674f25c916f23da505bb (diff) | |
download | rockbox-c13eba29ff5615cc74a7818e42cc9d464a7c7075.tar.gz rockbox-c13eba29ff5615cc74a7818e42cc9d464a7c7075.zip |
FS #7833: Optimizations to the Vorbis codec:
- ARM assembly version of parts of mdct,
- special case for vorbis_book_decodevv_add for 2 channels and even book->dim,
- store the output in vb->pcm if possible, as it is usually in IRAM as
opposed to v->pcm.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@14875 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs')
-rw-r--r-- | apps/codecs/Tremor/SOURCES | 3 | ||||
-rw-r--r-- | apps/codecs/Tremor/block.c | 20 | ||||
-rw-r--r-- | apps/codecs/Tremor/codebook.c | 54 | ||||
-rw-r--r-- | apps/codecs/Tremor/ivorbiscodec.h | 1 | ||||
-rw-r--r-- | apps/codecs/Tremor/mdct.c | 15 | ||||
-rw-r--r-- | apps/codecs/Tremor/mdct_arm.S | 419 |
6 files changed, 502 insertions, 10 deletions
diff --git a/apps/codecs/Tremor/SOURCES b/apps/codecs/Tremor/SOURCES index 0877941808..9b8c05e340 100644 --- a/apps/codecs/Tremor/SOURCES +++ b/apps/codecs/Tremor/SOURCES | |||
@@ -7,6 +7,9 @@ framing.c | |||
7 | info.c | 7 | info.c |
8 | mapping0.c | 8 | mapping0.c |
9 | mdct.c | 9 | mdct.c |
10 | #ifdef CPU_ARM | ||
11 | mdct_arm.S | ||
12 | #endif | ||
10 | registry.c | 13 | registry.c |
11 | res012.c | 14 | res012.c |
12 | sharedbook.c | 15 | sharedbook.c |
diff --git a/apps/codecs/Tremor/block.c b/apps/codecs/Tremor/block.c index 80cbb7809c..e609fc44f7 100644 --- a/apps/codecs/Tremor/block.c +++ b/apps/codecs/Tremor/block.c | |||
@@ -171,6 +171,7 @@ static int _vds_init(vorbis_dsp_state *v,vorbis_info *vi){ | |||
171 | 171 | ||
172 | v->pcm_storage=ci->blocksizes[1]; | 172 | v->pcm_storage=ci->blocksizes[1]; |
173 | v->pcm=(ogg_int32_t **)_ogg_malloc(vi->channels*sizeof(*v->pcm)); | 173 | v->pcm=(ogg_int32_t **)_ogg_malloc(vi->channels*sizeof(*v->pcm)); |
174 | v->pcmb=(ogg_int32_t **)_ogg_malloc(vi->channels*sizeof(*v->pcmb)); | ||
174 | v->pcmret=(ogg_int32_t **)_ogg_malloc(vi->channels*sizeof(*v->pcmret)); | 175 | v->pcmret=(ogg_int32_t **)_ogg_malloc(vi->channels*sizeof(*v->pcmret)); |
175 | 176 | ||
176 | for(i=0;i<vi->channels;i++) | 177 | for(i=0;i<vi->channels;i++) |
@@ -308,25 +309,28 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){ | |||
308 | /* large/large */ | 309 | /* large/large */ |
309 | ogg_int32_t *pcm=v->pcm[j]+prevCenter; | 310 | ogg_int32_t *pcm=v->pcm[j]+prevCenter; |
310 | ogg_int32_t *p=vb->pcm[j]; | 311 | ogg_int32_t *p=vb->pcm[j]; |
311 | vect_add(pcm, p, n1); | 312 | vect_add(p, pcm, n1); |
313 | v->pcmb[j]=p; | ||
312 | }else{ | 314 | }else{ |
313 | /* large/small */ | 315 | /* large/small */ |
314 | ogg_int32_t *pcm=v->pcm[j]+prevCenter+n1/2-n0/2; | 316 | ogg_int32_t *pcm=v->pcm[j]+prevCenter+n1/2-n0/2; |
315 | ogg_int32_t *p=vb->pcm[j]; | 317 | ogg_int32_t *p=vb->pcm[j]; |
316 | vect_add(pcm, p, n0); | 318 | vect_add(pcm, p, n0); |
319 | v->pcmb[j]=v->pcm[j]+prevCenter; | ||
317 | } | 320 | } |
318 | }else{ | 321 | }else{ |
319 | if(v->W){ | 322 | if(v->W){ |
320 | /* small/large */ | 323 | /* small/large */ |
321 | ogg_int32_t *pcm=v->pcm[j]+prevCenter; | 324 | ogg_int32_t *pcm=v->pcm[j]+prevCenter; |
322 | ogg_int32_t *p=vb->pcm[j]+n1/2-n0/2; | 325 | ogg_int32_t *p=vb->pcm[j]+n1/2-n0/2; |
323 | vect_add(pcm, p, n0); | 326 | vect_add(p, pcm, n0); |
324 | vect_copy(&pcm[n0], &p[n0], n1/2-n0/2); | 327 | v->pcmb[j]=p; |
325 | }else{ | 328 | }else{ |
326 | /* small/small */ | 329 | /* small/small */ |
327 | ogg_int32_t *pcm=v->pcm[j]+prevCenter; | 330 | ogg_int32_t *pcm=v->pcm[j]+prevCenter; |
328 | ogg_int32_t *p=vb->pcm[j]; | 331 | ogg_int32_t *p=vb->pcm[j]; |
329 | vect_add(pcm, p, n0); | 332 | vect_add(p, pcm, n0); |
333 | v->pcmb[j]=p; | ||
330 | } | 334 | } |
331 | } | 335 | } |
332 | 336 | ||
@@ -351,10 +355,8 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){ | |||
351 | v->pcm_returned=thisCenter; | 355 | v->pcm_returned=thisCenter; |
352 | v->pcm_current=thisCenter; | 356 | v->pcm_current=thisCenter; |
353 | }else{ | 357 | }else{ |
354 | v->pcm_returned=prevCenter; | 358 | v->pcm_returned=0; |
355 | v->pcm_current=prevCenter+ | 359 | v->pcm_current=ci->blocksizes[v->lW]/4+ci->blocksizes[v->W]/4; |
356 | ci->blocksizes[v->lW]/4+ | ||
357 | ci->blocksizes[v->W]/4; | ||
358 | } | 360 | } |
359 | 361 | ||
360 | } | 362 | } |
@@ -436,7 +438,7 @@ int vorbis_synthesis_pcmout(vorbis_dsp_state *v,ogg_int32_t ***pcm){ | |||
436 | if(pcm){ | 438 | if(pcm){ |
437 | int i; | 439 | int i; |
438 | for(i=0;i<vi->channels;i++) | 440 | for(i=0;i<vi->channels;i++) |
439 | v->pcmret[i]=v->pcm[i]+v->pcm_returned; | 441 | v->pcmret[i]=v->pcmb[i]+v->pcm_returned; |
440 | *pcm=v->pcmret; | 442 | *pcm=v->pcmret; |
441 | } | 443 | } |
442 | return(v->pcm_current-v->pcm_returned); | 444 | return(v->pcm_current-v->pcm_returned); |
diff --git a/apps/codecs/Tremor/codebook.c b/apps/codecs/Tremor/codebook.c index 1287a95011..8c319ab49e 100644 --- a/apps/codecs/Tremor/codebook.c +++ b/apps/codecs/Tremor/codebook.c | |||
@@ -199,7 +199,7 @@ STIN long decode_packed_entry_number(codebook *book, | |||
199 | return(-1); | 199 | return(-1); |
200 | } | 200 | } |
201 | 201 | ||
202 | static inline long decode_packed_block(codebook *book, oggpack_buffer *b, | 202 | static long decode_packed_block(codebook *book, oggpack_buffer *b, |
203 | long *buf, int n){ | 203 | long *buf, int n){ |
204 | long *bufptr = buf; | 204 | long *bufptr = buf; |
205 | long *bufend = buf + n; | 205 | long *bufend = buf + n; |
@@ -399,6 +399,55 @@ long vorbis_book_decodev_set(codebook *book,ogg_int32_t *a, | |||
399 | return(0); | 399 | return(0); |
400 | } | 400 | } |
401 | 401 | ||
402 | static long vorbis_book_decodevv_add_2ch_even(codebook *book,ogg_int32_t **a, | ||
403 | long offset,oggpack_buffer *b, | ||
404 | int n,int point){ | ||
405 | long i,k,chunk,read; | ||
406 | int shift=point-book->binarypoint; | ||
407 | long entries[32]; | ||
408 | ogg_int32_t *p0 = &(a[0][offset]); | ||
409 | ogg_int32_t *p1 = &(a[1][offset]); | ||
410 | |||
411 | if(shift>=0){ | ||
412 | |||
413 | for(i=0;i<n;){ | ||
414 | chunk=32; | ||
415 | if (chunk*book->dim>(n-i)*2) | ||
416 | chunk=((n-i)*2+book->dim-1)/book->dim; | ||
417 | read = decode_packed_block(book,b,entries,chunk); | ||
418 | for(k=0;k<read;k++){ | ||
419 | const ogg_int32_t *t = book->valuelist+entries[k]*book->dim; | ||
420 | const ogg_int32_t *u = t+book->dim; | ||
421 | do{ | ||
422 | *p0++ += *t++>>shift; | ||
423 | *p1++ += *t++>>shift; | ||
424 | }while(t<u); | ||
425 | } | ||
426 | if (read<chunk)return-1; | ||
427 | i += read*book->dim/2; | ||
428 | } | ||
429 | }else{ | ||
430 | shift = -shift; | ||
431 | for(i=0;i<n;){ | ||
432 | chunk=32; | ||
433 | if (chunk*book->dim>(n-i)*2) | ||
434 | chunk=((n-i)*2+book->dim-1)/book->dim; | ||
435 | read = decode_packed_block(book,b,entries,chunk); | ||
436 | for(k=0;k<read;k++){ | ||
437 | const ogg_int32_t *t = book->valuelist+entries[k]*book->dim; | ||
438 | const ogg_int32_t *u = t+book->dim; | ||
439 | do{ | ||
440 | *p0++ += *t++<<shift; | ||
441 | *p1++ += *t++<<shift; | ||
442 | }while(t<u); | ||
443 | } | ||
444 | if (read<chunk)return-1; | ||
445 | i += read*book->dim/2; | ||
446 | } | ||
447 | } | ||
448 | return(0); | ||
449 | } | ||
450 | |||
402 | long vorbis_book_decodevv_add(codebook *book,ogg_int32_t **a, | 451 | long vorbis_book_decodevv_add(codebook *book,ogg_int32_t **a, |
403 | long offset,int ch, | 452 | long offset,int ch, |
404 | oggpack_buffer *b,int n,int point){ | 453 | oggpack_buffer *b,int n,int point){ |
@@ -408,6 +457,9 @@ long vorbis_book_decodevv_add(codebook *book,ogg_int32_t **a, | |||
408 | int shift=point-book->binarypoint; | 457 | int shift=point-book->binarypoint; |
409 | long entries[32]; | 458 | long entries[32]; |
410 | 459 | ||
460 | if (!(book->dim&1) && ch==2) | ||
461 | return vorbis_book_decodevv_add_2ch_even(book,a,offset,b,n,point); | ||
462 | |||
411 | if(shift>=0){ | 463 | if(shift>=0){ |
412 | 464 | ||
413 | for(i=offset;i<offset+n;){ | 465 | for(i=offset;i<offset+n;){ |
diff --git a/apps/codecs/Tremor/ivorbiscodec.h b/apps/codecs/Tremor/ivorbiscodec.h index b3e63226ee..2574a11f2a 100644 --- a/apps/codecs/Tremor/ivorbiscodec.h +++ b/apps/codecs/Tremor/ivorbiscodec.h | |||
@@ -59,6 +59,7 @@ typedef struct vorbis_dsp_state{ | |||
59 | vorbis_info *vi; | 59 | vorbis_info *vi; |
60 | 60 | ||
61 | ogg_int32_t **pcm; | 61 | ogg_int32_t **pcm; |
62 | ogg_int32_t **pcmb; | ||
62 | ogg_int32_t **pcmret; | 63 | ogg_int32_t **pcmret; |
63 | int pcm_storage; | 64 | int pcm_storage; |
64 | int pcm_current; | 65 | int pcm_current; |
diff --git a/apps/codecs/Tremor/mdct.c b/apps/codecs/Tremor/mdct.c index 8334cdf3c4..20abdb47f4 100644 --- a/apps/codecs/Tremor/mdct.c +++ b/apps/codecs/Tremor/mdct.c | |||
@@ -38,6 +38,19 @@ | |||
38 | #include "mdct.h" | 38 | #include "mdct.h" |
39 | #include "mdct_lookup.h" | 39 | #include "mdct_lookup.h" |
40 | 40 | ||
41 | #ifdef CPU_ARM | ||
42 | |||
43 | extern void mdct_butterfly_32(DATA_TYPE *x); | ||
44 | extern void mdct_butterfly_generic_loop(DATA_TYPE *x1, DATA_TYPE *x2, | ||
45 | LOOKUP_T *T0, int step, | ||
46 | LOOKUP_T *Ttop); | ||
47 | |||
48 | STIN void mdct_butterfly_generic(DATA_TYPE *x,int points, int step){ | ||
49 | mdct_butterfly_generic_loop(x + points, x + (points>>1), | ||
50 | sincos_lookup0, step, sincos_lookup0+1024); | ||
51 | } | ||
52 | |||
53 | #else | ||
41 | 54 | ||
42 | /* 8 point butterfly (in place) */ | 55 | /* 8 point butterfly (in place) */ |
43 | STIN void mdct_butterfly_8(DATA_TYPE *x){ | 56 | STIN void mdct_butterfly_8(DATA_TYPE *x){ |
@@ -225,6 +238,8 @@ void mdct_butterfly_generic(DATA_TYPE *x,int points, int step){ | |||
225 | }while(T>sincos_lookup0); | 238 | }while(T>sincos_lookup0); |
226 | } | 239 | } |
227 | 240 | ||
241 | #endif /* CPU_ARM */ | ||
242 | |||
228 | STIN void mdct_butterflies(DATA_TYPE *x,int points,int shift) { | 243 | STIN void mdct_butterflies(DATA_TYPE *x,int points,int shift) { |
229 | 244 | ||
230 | int stages=8-shift; | 245 | int stages=8-shift; |
diff --git a/apps/codecs/Tremor/mdct_arm.S b/apps/codecs/Tremor/mdct_arm.S new file mode 100644 index 0000000000..495e6a17c9 --- /dev/null +++ b/apps/codecs/Tremor/mdct_arm.S | |||
@@ -0,0 +1,419 @@ | |||
1 | /*************************************************************************** | ||
2 | * __________ __ ___. | ||
3 | * Open \______ \ ____ ____ | | _\_ |__ _______ ___ | ||
4 | * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / | ||
5 | * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < | ||
6 | * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ | ||
7 | * \/ \/ \/ \/ \/ | ||
8 | * $Id: $ | ||
9 | * | ||
10 | * Copyright (C) 2007 by Tomasz Malesinski | ||
11 | * | ||
12 | * All files in this archive are subject to the GNU General Public License. | ||
13 | * See the file COPYING in the source tree root for full license agreement. | ||
14 | * | ||
15 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY | ||
16 | * KIND, either express or implied. | ||
17 | * | ||
18 | ****************************************************************************/ | ||
19 | |||
20 | #define cPI3_8 (0x30fbc54d) | ||
21 | #define cPI2_8 (0x5a82799a) | ||
22 | #define cPI1_8 (0x7641af3d) | ||
23 | |||
24 | .section .icode,"ax",%progbits | ||
25 | .align | ||
26 | |||
27 | .global mdct_butterfly_32 | ||
28 | .global mdct_butterfly_generic_loop | ||
29 | |||
30 | mdct_butterfly_8: | ||
31 | add r9, r5, r1 @ x4 + x0 | ||
32 | sub r5, r5, r1 @ x4 - x0 | ||
33 | add r7, r6, r2 @ x5 + x1 | ||
34 | sub r6, r6, r2 @ x5 - x1 | ||
35 | add r8, r10, r3 @ x6 + x2 | ||
36 | sub r10, r10, r3 @ x6 - x2 | ||
37 | add r12, r11, r4 @ x7 + x3 | ||
38 | sub r11, r11, r4 @ x7 - x3 | ||
39 | |||
40 | add r1, r10, r6 @ y0 = (x6 - x2) + (x5 - x1) | ||
41 | sub r2, r11, r5 @ y1 = (x7 - x3) - (x4 - x0) | ||
42 | sub r3, r10, r6 @ y2 = (x6 - x2) - (x5 - x1) | ||
43 | add r4, r11, r5 @ y3 = (x7 - x3) + (x4 - x0) | ||
44 | sub r5, r8, r9 @ y4 = (x6 + x2) - (x4 + x0) | ||
45 | sub r6, r12, r7 @ y5 = (x7 + x3) - (x5 + x1) | ||
46 | add r10, r8, r9 @ y6 = (x6 + x2) + (x4 + x0) | ||
47 | add r11, r12, r7 @ y7 = (x7 + x3) + (x5 + x1) | ||
48 | stmia r0, {r1, r2, r3, r4, r5, r6, r10, r11} | ||
49 | |||
50 | mov pc, lr | ||
51 | |||
52 | mdct_butterfly_16: | ||
53 | str lr, [sp, #-4]! | ||
54 | add r1, r0, #8*4 | ||
55 | |||
56 | ldmia r0, {r2, r3, r4, r5} | ||
57 | ldmia r1, {r6, r7, r8, r9} | ||
58 | add r6, r6, r2 @ y8 = x8 + x0 | ||
59 | rsb r2, r6, r2, asl #1 @ x0 - x8 | ||
60 | add r7, r7, r3 @ y9 = x9 + x1 | ||
61 | rsb r3, r7, r3, asl #1 @ x1 - x9 | ||
62 | add r8, r8, r4 @ y10 = x10 + x2 | ||
63 | sub r11, r8, r4, asl #1 @ x10 - x2 | ||
64 | add r9, r9, r5 @ y11 = x11 + x3 | ||
65 | rsb r10, r9, r5, asl #1 @ x3 - x11 | ||
66 | |||
67 | stmia r1!, {r6, r7, r8, r9} | ||
68 | |||
69 | add r2, r2, r3 @ (x0 - x8) + (x1 - x9) | ||
70 | rsb r3, r2, r3, asl #1 @ (x1 - x9) - (x0 - x8) | ||
71 | |||
72 | ldr r12, =cPI2_8 | ||
73 | smull r8, r5, r2, r12 | ||
74 | mov r5, r5, asl #1 | ||
75 | smull r8, r6, r3, r12 | ||
76 | mov r6, r6, asl #1 | ||
77 | |||
78 | stmia r0!, {r5, r6, r10, r11} | ||
79 | |||
80 | ldmia r0, {r2, r3, r4, r5} | ||
81 | ldmia r1, {r6, r7, r8, r9} | ||
82 | add r6, r6, r2 @ y12 = x12 + x4 | ||
83 | sub r2, r6, r2, asl #1 @ x12 - x4 | ||
84 | add r7, r7, r3 @ y13 = x13 + x5 | ||
85 | sub r3, r7, r3, asl #1 @ x13 - x5 | ||
86 | add r8, r8, r4 @ y10 = x14 + x6 | ||
87 | sub r10, r8, r4, asl #1 @ x14 - x6 | ||
88 | add r9, r9, r5 @ y11 = x15 + x7 | ||
89 | sub r11, r9, r5, asl #1 @ x15 - x7 | ||
90 | |||
91 | stmia r1, {r6, r7, r8, r9} | ||
92 | |||
93 | sub r2, r2, r3 @ (x12 - x4) - (x13 - x5) | ||
94 | add r3, r2, r3, asl #1 @ (x12 - x4) + (x13 - x5) | ||
95 | |||
96 | smull r8, r5, r2, r12 | ||
97 | mov r5, r5, asl #1 | ||
98 | smull r8, r6, r3, r12 | ||
99 | mov r6, r6, asl #1 | ||
100 | @ no stmia here, r5, r6, r10, r11 are passed to mdct_butterfly_8 | ||
101 | |||
102 | sub r0, r0, #4*4 | ||
103 | ldmia r0, {r1, r2, r3, r4} | ||
104 | bl mdct_butterfly_8 | ||
105 | add r0, r0, #8*4 | ||
106 | ldmia r0, {r1, r2, r3, r4, r5, r6, r10, r11} | ||
107 | bl mdct_butterfly_8 | ||
108 | |||
109 | ldr pc, [sp], #4 | ||
110 | |||
111 | mdct_butterfly_32: | ||
112 | stmdb sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr} | ||
113 | |||
114 | add r1, r0, #16*4 | ||
115 | |||
116 | ldmia r0, {r2, r3, r4, r5} | ||
117 | ldmia r1, {r6, r7, r8, r9} | ||
118 | add r6, r6, r2 @ y16 = x16 + x0 | ||
119 | rsb r2, r6, r2, asl #1 @ x0 - x16 | ||
120 | add r7, r7, r3 @ y17 = x17 + x1 | ||
121 | rsb r3, r7, r3, asl #1 @ x1 - x17 | ||
122 | add r8, r8, r4 @ y18 = x18 + x2 | ||
123 | rsb r4, r8, r4, asl #1 @ x2 - x18 | ||
124 | add r9, r9, r5 @ y19 = x19 + x3 | ||
125 | rsb r5, r9, r5, asl #1 @ x3 - x19 | ||
126 | |||
127 | stmia r1!, {r6, r7, r8, r9} | ||
128 | |||
129 | ldr r12, =cPI1_8 | ||
130 | ldr lr, =cPI3_8 | ||
131 | smull r10, r6, r2, r12 | ||
132 | smlal r10, r6, r3, lr | ||
133 | rsb r2, r2, #0 | ||
134 | smull r10, r7, r3, r12 | ||
135 | smlal r10, r7, r2, lr | ||
136 | mov r6, r6, asl #1 | ||
137 | mov r7, r7, asl #1 | ||
138 | |||
139 | add r4, r4, r5 @ (x3 - x19) + (x2 - x18) | ||
140 | rsb r5, r4, r5, asl #1 @ (x3 - x19) - (x2 - x18) | ||
141 | |||
142 | ldr r11, =cPI2_8 | ||
143 | smull r10, r8, r4, r11 | ||
144 | mov r8, r8, asl #1 | ||
145 | smull r10, r9, r5, r11 | ||
146 | mov r9, r9, asl #1 | ||
147 | |||
148 | stmia r0!, {r6, r7, r8, r9} | ||
149 | |||
150 | ldmia r0, {r2, r3, r4, r5} | ||
151 | ldmia r1, {r6, r7, r8, r9} | ||
152 | add r6, r6, r2 @ y20 = x20 + x4 | ||
153 | rsb r2, r6, r2, asl #1 @ x4 - x20 | ||
154 | add r7, r7, r3 @ y21 = x21 + x5 | ||
155 | rsb r3, r7, r3, asl #1 @ x5 - x21 | ||
156 | add r8, r8, r4 @ y22 = x22 + x6 | ||
157 | sub r4, r8, r4, asl #1 @ x22 - x6 | ||
158 | add r9, r9, r5 @ y23 = x23 + x7 | ||
159 | rsb r5, r9, r5, asl #1 @ x7 - x23 | ||
160 | |||
161 | stmia r1!, {r6, r7, r8, r9} | ||
162 | |||
163 | smull r10, r6, r2, lr | ||
164 | smlal r10, r6, r3, r12 | ||
165 | rsb r2, r2, #0 | ||
166 | smull r10, r7, r3, lr | ||
167 | smlal r10, r7, r2, r12 | ||
168 | mov r6, r6, asl #1 | ||
169 | mov r7, r7, asl #1 | ||
170 | |||
171 | mov r8, r5 | ||
172 | mov r9, r4 | ||
173 | stmia r0!, {r6, r7, r8, r9} | ||
174 | |||
175 | ldmia r0, {r2, r3, r4, r5} | ||
176 | ldmia r1, {r6, r7, r8, r9} | ||
177 | add r6, r6, r2 @ y24 = x24 + x8 | ||
178 | sub r2, r6, r2, asl #1 @ x24 - x8 | ||
179 | add r7, r7, r3 @ y25 = x25 + x9 | ||
180 | sub r3, r7, r3, asl #1 @ x25 - x9 | ||
181 | add r8, r8, r4 @ y26 = x26 + x10 | ||
182 | sub r4, r8, r4, asl #1 @ x26 - x10 | ||
183 | add r9, r9, r5 @ y27 = x27 + x11 | ||
184 | sub r5, r9, r5, asl #1 @ x27 - x11 | ||
185 | |||
186 | stmia r1!, {r6, r7, r8, r9} | ||
187 | |||
188 | smull r10, r7, r2, r12 | ||
189 | smlal r10, r7, r3, lr | ||
190 | rsb r3, r3, #0 | ||
191 | smull r10, r6, r3, r12 | ||
192 | smlal r10, r6, r2, lr | ||
193 | mov r6, r6, asl #1 | ||
194 | mov r7, r7, asl #1 | ||
195 | |||
196 | sub r4, r4, r5 @ (x26 - x10) - (x27 - x11) | ||
197 | add r5, r4, r5, asl #1 @ (x26 - x10) + (x27 - x11) | ||
198 | |||
199 | ldr r11, =cPI2_8 | ||
200 | smull r10, r8, r4, r11 | ||
201 | mov r8, r8, asl #1 | ||
202 | smull r10, r9, r5, r11 | ||
203 | mov r9, r9, asl #1 | ||
204 | |||
205 | stmia r0!, {r6, r7, r8, r9} | ||
206 | |||
207 | ldmia r0, {r2, r3, r4, r5} | ||
208 | ldmia r1, {r6, r7, r8, r9} | ||
209 | add r6, r6, r2 @ y28 = x28 + x12 | ||
210 | sub r2, r6, r2, asl #1 @ x28 - x12 | ||
211 | add r7, r7, r3 @ y29 = x29 + x13 | ||
212 | sub r3, r7, r3, asl #1 @ x29 - x13 | ||
213 | add r8, r8, r4 @ y30 = x30 + x14 | ||
214 | sub r4, r8, r4, asl #1 @ x30 - x14 | ||
215 | add r9, r9, r5 @ y31 = x31 + x15 | ||
216 | sub r5, r9, r5, asl #1 @ x31 - x15 | ||
217 | |||
218 | stmia r1, {r6, r7, r8, r9} | ||
219 | |||
220 | smull r10, r7, r2, lr | ||
221 | smlal r10, r7, r3, r12 | ||
222 | rsb r3, r3, #0 | ||
223 | smull r10, r6, r3, lr | ||
224 | smlal r10, r6, r2, r12 | ||
225 | mov r6, r6, asl #1 | ||
226 | mov r7, r7, asl #1 | ||
227 | |||
228 | mov r8, r4 | ||
229 | mov r9, r5 | ||
230 | stmia r0, {r6, r7, r8, r9} | ||
231 | |||
232 | sub r0, r0, #12*4 | ||
233 | str r0, [sp, #-4]! | ||
234 | bl mdct_butterfly_16 | ||
235 | |||
236 | ldr r0, [sp], #4 | ||
237 | add r0, r0, #16*4 | ||
238 | bl mdct_butterfly_16 | ||
239 | |||
240 | ldmia sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, pc} | ||
241 | |||
242 | @ mdct_butterfly_generic_loop(x1, x2, T0, step, Ttop) | ||
243 | mdct_butterfly_generic_loop: | ||
244 | stmdb sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr} | ||
245 | str r2, [sp, #-4] | ||
246 | ldr r4, [sp, #40] | ||
247 | 1: | ||
248 | ldmdb r0, {r6, r7, r8, r9} | ||
249 | ldmdb r1, {r10, r11, r12, r14} | ||
250 | |||
251 | add r6, r6, r10 | ||
252 | sub r10, r6, r10, asl #1 | ||
253 | add r7, r7, r11 | ||
254 | rsb r11, r7, r11, asl #1 | ||
255 | add r8, r8, r12 | ||
256 | sub r12, r8, r12, asl #1 | ||
257 | add r9, r9, r14 | ||
258 | rsb r14, r9, r14, asl #1 | ||
259 | |||
260 | stmdb r0!, {r6, r7, r8, r9} | ||
261 | |||
262 | ldmia r2, {r6, r7} | ||
263 | smull r5, r8, r14, r6 | ||
264 | smlal r5, r8, r12, r7 | ||
265 | rsb r14, r14, #0 | ||
266 | smull r5, r9, r12, r6 | ||
267 | smlal r5, r9, r14, r7 | ||
268 | |||
269 | mov r8, r8, asl #1 | ||
270 | mov r9, r9, asl #1 | ||
271 | stmdb r1!, {r8, r9} | ||
272 | add r2, r2, r3, asl #2 | ||
273 | |||
274 | ldmia r2, {r6, r7} | ||
275 | smull r5, r8, r11, r6 | ||
276 | smlal r5, r8, r10, r7 | ||
277 | rsb r11, r11, #0 | ||
278 | smull r5, r9, r10, r6 | ||
279 | smlal r5, r9, r11, r7 | ||
280 | |||
281 | mov r8, r8, asl #1 | ||
282 | mov r9, r9, asl #1 | ||
283 | stmdb r1!, {r8, r9} | ||
284 | add r2, r2, r3, asl #2 | ||
285 | |||
286 | cmp r2, r4 | ||
287 | blo 1b | ||
288 | |||
289 | ldr r4, [sp, #-4] | ||
290 | 1: | ||
291 | ldmdb r0, {r6, r7, r8, r9} | ||
292 | ldmdb r1, {r10, r11, r12, r14} | ||
293 | |||
294 | add r6, r6, r10 | ||
295 | sub r10, r6, r10, asl #1 | ||
296 | add r7, r7, r11 | ||
297 | sub r11, r7, r11, asl #1 | ||
298 | add r8, r8, r12 | ||
299 | sub r12, r8, r12, asl #1 | ||
300 | add r9, r9, r14 | ||
301 | sub r14, r9, r14, asl #1 | ||
302 | |||
303 | stmdb r0!, {r6, r7, r8, r9} | ||
304 | |||
305 | ldmia r2, {r6, r7} | ||
306 | smull r5, r9, r14, r6 | ||
307 | smlal r5, r9, r12, r7 | ||
308 | rsb r14, r14, #0 | ||
309 | smull r5, r8, r12, r6 | ||
310 | smlal r5, r8, r14, r7 | ||
311 | |||
312 | mov r8, r8, asl #1 | ||
313 | mov r9, r9, asl #1 | ||
314 | stmdb r1!, {r8, r9} | ||
315 | sub r2, r2, r3, asl #2 | ||
316 | |||
317 | ldmia r2, {r6, r7} | ||
318 | smull r5, r9, r11, r6 | ||
319 | smlal r5, r9, r10, r7 | ||
320 | rsb r11, r11, #0 | ||
321 | smull r5, r8, r10, r6 | ||
322 | smlal r5, r8, r11, r7 | ||
323 | |||
324 | mov r8, r8, asl #1 | ||
325 | mov r9, r9, asl #1 | ||
326 | stmdb r1!, {r8, r9} | ||
327 | sub r2, r2, r3, asl #2 | ||
328 | |||
329 | cmp r2, r4 | ||
330 | bhi 1b | ||
331 | |||
332 | ldr r4, [sp, #40] | ||
333 | 1: | ||
334 | ldmdb r0, {r6, r7, r8, r9} | ||
335 | ldmdb r1, {r10, r11, r12, r14} | ||
336 | |||
337 | add r6, r6, r10 | ||
338 | rsb r10, r6, r10, asl #1 | ||
339 | add r7, r7, r11 | ||
340 | rsb r11, r7, r11, asl #1 | ||
341 | add r8, r8, r12 | ||
342 | rsb r12, r8, r12, asl #1 | ||
343 | add r9, r9, r14 | ||
344 | rsb r14, r9, r14, asl #1 | ||
345 | |||
346 | stmdb r0!, {r6, r7, r8, r9} | ||
347 | |||
348 | ldmia r2, {r6, r7} | ||
349 | smull r5, r8, r12, r6 | ||
350 | smlal r5, r8, r14, r7 | ||
351 | rsb r12, r12, #0 | ||
352 | smull r5, r9, r14, r6 | ||
353 | smlal r5, r9, r12, r7 | ||
354 | |||
355 | mov r8, r8, asl #1 | ||
356 | mov r9, r9, asl #1 | ||
357 | stmdb r1!, {r8, r9} | ||
358 | add r2, r2, r3, asl #2 | ||
359 | |||
360 | ldmia r2, {r6, r7} | ||
361 | smull r5, r8, r10, r6 | ||
362 | smlal r5, r8, r11, r7 | ||
363 | rsb r10, r10, #0 | ||
364 | smull r5, r9, r11, r6 | ||
365 | smlal r5, r9, r10, r7 | ||
366 | |||
367 | mov r8, r8, asl #1 | ||
368 | mov r9, r9, asl #1 | ||
369 | stmdb r1!, {r8, r9} | ||
370 | add r2, r2, r3, asl #2 | ||
371 | |||
372 | cmp r2, r4 | ||
373 | blo 1b | ||
374 | |||
375 | ldr r4, [sp, #-4] | ||
376 | 1: | ||
377 | ldmdb r0, {r6, r7, r8, r9} | ||
378 | ldmdb r1, {r10, r11, r12, r14} | ||
379 | |||
380 | add r6, r6, r10 | ||
381 | sub r10, r6, r10, asl #1 | ||
382 | add r7, r7, r11 | ||
383 | rsb r11, r7, r11, asl #1 | ||
384 | add r8, r8, r12 | ||
385 | sub r12, r8, r12, asl #1 | ||
386 | add r9, r9, r14 | ||
387 | rsb r14, r9, r14, asl #1 | ||
388 | |||
389 | stmdb r0!, {r6, r7, r8, r9} | ||
390 | |||
391 | ldmia r2, {r6, r7} | ||
392 | smull r5, r9, r12, r6 | ||
393 | smlal r5, r9, r14, r7 | ||
394 | rsb r12, r12, #0 | ||
395 | smull r5, r8, r14, r6 | ||
396 | smlal r5, r8, r12, r7 | ||
397 | |||
398 | mov r8, r8, asl #1 | ||
399 | mov r9, r9, asl #1 | ||
400 | stmdb r1!, {r8, r9} | ||
401 | sub r2, r2, r3, asl #2 | ||
402 | |||
403 | ldmia r2, {r6, r7} | ||
404 | smull r5, r9, r10, r6 | ||
405 | smlal r5, r9, r11, r7 | ||
406 | rsb r10, r10, #0 | ||
407 | smull r5, r8, r11, r6 | ||
408 | smlal r5, r8, r10, r7 | ||
409 | |||
410 | mov r8, r8, asl #1 | ||
411 | mov r9, r9, asl #1 | ||
412 | stmdb r1!, {r8, r9} | ||
413 | sub r2, r2, r3, asl #2 | ||
414 | |||
415 | cmp r2, r4 | ||
416 | bhi 1b | ||
417 | |||
418 | ldmia sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, pc} | ||
419 | |||