summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTomasz Malesinski <tomal@rockbox.org>2007-09-27 21:58:51 +0000
committerTomasz Malesinski <tomal@rockbox.org>2007-09-27 21:58:51 +0000
commitc13eba29ff5615cc74a7818e42cc9d464a7c7075 (patch)
treeeef1dfc0d4ed2b69e16b119b0d47052801ef827f
parent1aaf5dbdb660d29ef384674f25c916f23da505bb (diff)
downloadrockbox-c13eba29ff5615cc74a7818e42cc9d464a7c7075.tar.gz
rockbox-c13eba29ff5615cc74a7818e42cc9d464a7c7075.zip
FS #7833: Optimizations to the Vorbis codec:
- ARM assembly version of parts of mdct, - special case for vorbis_book_decodevv_add for 2 channels and even book->dim, - store the output in vb->pcm if possible, as it is usually in IRAM as opposed to v->pcm. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@14875 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/Tremor/SOURCES3
-rw-r--r--apps/codecs/Tremor/block.c20
-rw-r--r--apps/codecs/Tremor/codebook.c54
-rw-r--r--apps/codecs/Tremor/ivorbiscodec.h1
-rw-r--r--apps/codecs/Tremor/mdct.c15
-rw-r--r--apps/codecs/Tremor/mdct_arm.S419
6 files changed, 502 insertions, 10 deletions
diff --git a/apps/codecs/Tremor/SOURCES b/apps/codecs/Tremor/SOURCES
index 0877941808..9b8c05e340 100644
--- a/apps/codecs/Tremor/SOURCES
+++ b/apps/codecs/Tremor/SOURCES
@@ -7,6 +7,9 @@ framing.c
7info.c 7info.c
8mapping0.c 8mapping0.c
9mdct.c 9mdct.c
10#ifdef CPU_ARM
11mdct_arm.S
12#endif
10registry.c 13registry.c
11res012.c 14res012.c
12sharedbook.c 15sharedbook.c
diff --git a/apps/codecs/Tremor/block.c b/apps/codecs/Tremor/block.c
index 80cbb7809c..e609fc44f7 100644
--- a/apps/codecs/Tremor/block.c
+++ b/apps/codecs/Tremor/block.c
@@ -171,6 +171,7 @@ static int _vds_init(vorbis_dsp_state *v,vorbis_info *vi){
171 171
172 v->pcm_storage=ci->blocksizes[1]; 172 v->pcm_storage=ci->blocksizes[1];
173 v->pcm=(ogg_int32_t **)_ogg_malloc(vi->channels*sizeof(*v->pcm)); 173 v->pcm=(ogg_int32_t **)_ogg_malloc(vi->channels*sizeof(*v->pcm));
174 v->pcmb=(ogg_int32_t **)_ogg_malloc(vi->channels*sizeof(*v->pcmb));
174 v->pcmret=(ogg_int32_t **)_ogg_malloc(vi->channels*sizeof(*v->pcmret)); 175 v->pcmret=(ogg_int32_t **)_ogg_malloc(vi->channels*sizeof(*v->pcmret));
175 176
176 for(i=0;i<vi->channels;i++) 177 for(i=0;i<vi->channels;i++)
@@ -308,25 +309,28 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){
308 /* large/large */ 309 /* large/large */
309 ogg_int32_t *pcm=v->pcm[j]+prevCenter; 310 ogg_int32_t *pcm=v->pcm[j]+prevCenter;
310 ogg_int32_t *p=vb->pcm[j]; 311 ogg_int32_t *p=vb->pcm[j];
311 vect_add(pcm, p, n1); 312 vect_add(p, pcm, n1);
313 v->pcmb[j]=p;
312 }else{ 314 }else{
313 /* large/small */ 315 /* large/small */
314 ogg_int32_t *pcm=v->pcm[j]+prevCenter+n1/2-n0/2; 316 ogg_int32_t *pcm=v->pcm[j]+prevCenter+n1/2-n0/2;
315 ogg_int32_t *p=vb->pcm[j]; 317 ogg_int32_t *p=vb->pcm[j];
316 vect_add(pcm, p, n0); 318 vect_add(pcm, p, n0);
319 v->pcmb[j]=v->pcm[j]+prevCenter;
317 } 320 }
318 }else{ 321 }else{
319 if(v->W){ 322 if(v->W){
320 /* small/large */ 323 /* small/large */
321 ogg_int32_t *pcm=v->pcm[j]+prevCenter; 324 ogg_int32_t *pcm=v->pcm[j]+prevCenter;
322 ogg_int32_t *p=vb->pcm[j]+n1/2-n0/2; 325 ogg_int32_t *p=vb->pcm[j]+n1/2-n0/2;
323 vect_add(pcm, p, n0); 326 vect_add(p, pcm, n0);
324 vect_copy(&pcm[n0], &p[n0], n1/2-n0/2); 327 v->pcmb[j]=p;
325 }else{ 328 }else{
326 /* small/small */ 329 /* small/small */
327 ogg_int32_t *pcm=v->pcm[j]+prevCenter; 330 ogg_int32_t *pcm=v->pcm[j]+prevCenter;
328 ogg_int32_t *p=vb->pcm[j]; 331 ogg_int32_t *p=vb->pcm[j];
329 vect_add(pcm, p, n0); 332 vect_add(p, pcm, n0);
333 v->pcmb[j]=p;
330 } 334 }
331 } 335 }
332 336
@@ -351,10 +355,8 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){
351 v->pcm_returned=thisCenter; 355 v->pcm_returned=thisCenter;
352 v->pcm_current=thisCenter; 356 v->pcm_current=thisCenter;
353 }else{ 357 }else{
354 v->pcm_returned=prevCenter; 358 v->pcm_returned=0;
355 v->pcm_current=prevCenter+ 359 v->pcm_current=ci->blocksizes[v->lW]/4+ci->blocksizes[v->W]/4;
356 ci->blocksizes[v->lW]/4+
357 ci->blocksizes[v->W]/4;
358 } 360 }
359 361
360 } 362 }
@@ -436,7 +438,7 @@ int vorbis_synthesis_pcmout(vorbis_dsp_state *v,ogg_int32_t ***pcm){
436 if(pcm){ 438 if(pcm){
437 int i; 439 int i;
438 for(i=0;i<vi->channels;i++) 440 for(i=0;i<vi->channels;i++)
439 v->pcmret[i]=v->pcm[i]+v->pcm_returned; 441 v->pcmret[i]=v->pcmb[i]+v->pcm_returned;
440 *pcm=v->pcmret; 442 *pcm=v->pcmret;
441 } 443 }
442 return(v->pcm_current-v->pcm_returned); 444 return(v->pcm_current-v->pcm_returned);
diff --git a/apps/codecs/Tremor/codebook.c b/apps/codecs/Tremor/codebook.c
index 1287a95011..8c319ab49e 100644
--- a/apps/codecs/Tremor/codebook.c
+++ b/apps/codecs/Tremor/codebook.c
@@ -199,7 +199,7 @@ STIN long decode_packed_entry_number(codebook *book,
199 return(-1); 199 return(-1);
200} 200}
201 201
202static inline long decode_packed_block(codebook *book, oggpack_buffer *b, 202static long decode_packed_block(codebook *book, oggpack_buffer *b,
203 long *buf, int n){ 203 long *buf, int n){
204 long *bufptr = buf; 204 long *bufptr = buf;
205 long *bufend = buf + n; 205 long *bufend = buf + n;
@@ -399,6 +399,55 @@ long vorbis_book_decodev_set(codebook *book,ogg_int32_t *a,
399 return(0); 399 return(0);
400} 400}
401 401
402static long vorbis_book_decodevv_add_2ch_even(codebook *book,ogg_int32_t **a,
403 long offset,oggpack_buffer *b,
404 int n,int point){
405 long i,k,chunk,read;
406 int shift=point-book->binarypoint;
407 long entries[32];
408 ogg_int32_t *p0 = &(a[0][offset]);
409 ogg_int32_t *p1 = &(a[1][offset]);
410
411 if(shift>=0){
412
413 for(i=0;i<n;){
414 chunk=32;
415 if (chunk*book->dim>(n-i)*2)
416 chunk=((n-i)*2+book->dim-1)/book->dim;
417 read = decode_packed_block(book,b,entries,chunk);
418 for(k=0;k<read;k++){
419 const ogg_int32_t *t = book->valuelist+entries[k]*book->dim;
420 const ogg_int32_t *u = t+book->dim;
421 do{
422 *p0++ += *t++>>shift;
423 *p1++ += *t++>>shift;
424 }while(t<u);
425 }
426 if (read<chunk)return-1;
427 i += read*book->dim/2;
428 }
429 }else{
430 shift = -shift;
431 for(i=0;i<n;){
432 chunk=32;
433 if (chunk*book->dim>(n-i)*2)
434 chunk=((n-i)*2+book->dim-1)/book->dim;
435 read = decode_packed_block(book,b,entries,chunk);
436 for(k=0;k<read;k++){
437 const ogg_int32_t *t = book->valuelist+entries[k]*book->dim;
438 const ogg_int32_t *u = t+book->dim;
439 do{
440 *p0++ += *t++<<shift;
441 *p1++ += *t++<<shift;
442 }while(t<u);
443 }
444 if (read<chunk)return-1;
445 i += read*book->dim/2;
446 }
447 }
448 return(0);
449}
450
402long vorbis_book_decodevv_add(codebook *book,ogg_int32_t **a, 451long vorbis_book_decodevv_add(codebook *book,ogg_int32_t **a,
403 long offset,int ch, 452 long offset,int ch,
404 oggpack_buffer *b,int n,int point){ 453 oggpack_buffer *b,int n,int point){
@@ -408,6 +457,9 @@ long vorbis_book_decodevv_add(codebook *book,ogg_int32_t **a,
408 int shift=point-book->binarypoint; 457 int shift=point-book->binarypoint;
409 long entries[32]; 458 long entries[32];
410 459
460 if (!(book->dim&1) && ch==2)
461 return vorbis_book_decodevv_add_2ch_even(book,a,offset,b,n,point);
462
411 if(shift>=0){ 463 if(shift>=0){
412 464
413 for(i=offset;i<offset+n;){ 465 for(i=offset;i<offset+n;){
diff --git a/apps/codecs/Tremor/ivorbiscodec.h b/apps/codecs/Tremor/ivorbiscodec.h
index b3e63226ee..2574a11f2a 100644
--- a/apps/codecs/Tremor/ivorbiscodec.h
+++ b/apps/codecs/Tremor/ivorbiscodec.h
@@ -59,6 +59,7 @@ typedef struct vorbis_dsp_state{
59 vorbis_info *vi; 59 vorbis_info *vi;
60 60
61 ogg_int32_t **pcm; 61 ogg_int32_t **pcm;
62 ogg_int32_t **pcmb;
62 ogg_int32_t **pcmret; 63 ogg_int32_t **pcmret;
63 int pcm_storage; 64 int pcm_storage;
64 int pcm_current; 65 int pcm_current;
diff --git a/apps/codecs/Tremor/mdct.c b/apps/codecs/Tremor/mdct.c
index 8334cdf3c4..20abdb47f4 100644
--- a/apps/codecs/Tremor/mdct.c
+++ b/apps/codecs/Tremor/mdct.c
@@ -38,6 +38,19 @@
38#include "mdct.h" 38#include "mdct.h"
39#include "mdct_lookup.h" 39#include "mdct_lookup.h"
40 40
41#ifdef CPU_ARM
42
43extern void mdct_butterfly_32(DATA_TYPE *x);
44extern void mdct_butterfly_generic_loop(DATA_TYPE *x1, DATA_TYPE *x2,
45 LOOKUP_T *T0, int step,
46 LOOKUP_T *Ttop);
47
48STIN void mdct_butterfly_generic(DATA_TYPE *x,int points, int step){
49 mdct_butterfly_generic_loop(x + points, x + (points>>1),
50 sincos_lookup0, step, sincos_lookup0+1024);
51}
52
53#else
41 54
42/* 8 point butterfly (in place) */ 55/* 8 point butterfly (in place) */
43STIN void mdct_butterfly_8(DATA_TYPE *x){ 56STIN void mdct_butterfly_8(DATA_TYPE *x){
@@ -225,6 +238,8 @@ void mdct_butterfly_generic(DATA_TYPE *x,int points, int step){
225 }while(T>sincos_lookup0); 238 }while(T>sincos_lookup0);
226} 239}
227 240
241#endif /* CPU_ARM */
242
228STIN void mdct_butterflies(DATA_TYPE *x,int points,int shift) { 243STIN void mdct_butterflies(DATA_TYPE *x,int points,int shift) {
229 244
230 int stages=8-shift; 245 int stages=8-shift;
diff --git a/apps/codecs/Tremor/mdct_arm.S b/apps/codecs/Tremor/mdct_arm.S
new file mode 100644
index 0000000000..495e6a17c9
--- /dev/null
+++ b/apps/codecs/Tremor/mdct_arm.S
@@ -0,0 +1,419 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id: $
9 *
10 * Copyright (C) 2007 by Tomasz Malesinski
11 *
12 * All files in this archive are subject to the GNU General Public License.
13 * See the file COPYING in the source tree root for full license agreement.
14 *
15 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
16 * KIND, either express or implied.
17 *
18 ****************************************************************************/
19
20#define cPI3_8 (0x30fbc54d)
21#define cPI2_8 (0x5a82799a)
22#define cPI1_8 (0x7641af3d)
23
24 .section .icode,"ax",%progbits
25 .align
26
27 .global mdct_butterfly_32
28 .global mdct_butterfly_generic_loop
29
30mdct_butterfly_8:
31 add r9, r5, r1 @ x4 + x0
32 sub r5, r5, r1 @ x4 - x0
33 add r7, r6, r2 @ x5 + x1
34 sub r6, r6, r2 @ x5 - x1
35 add r8, r10, r3 @ x6 + x2
36 sub r10, r10, r3 @ x6 - x2
37 add r12, r11, r4 @ x7 + x3
38 sub r11, r11, r4 @ x7 - x3
39
40 add r1, r10, r6 @ y0 = (x6 - x2) + (x5 - x1)
41 sub r2, r11, r5 @ y1 = (x7 - x3) - (x4 - x0)
42 sub r3, r10, r6 @ y2 = (x6 - x2) - (x5 - x1)
43 add r4, r11, r5 @ y3 = (x7 - x3) + (x4 - x0)
44 sub r5, r8, r9 @ y4 = (x6 + x2) - (x4 + x0)
45 sub r6, r12, r7 @ y5 = (x7 + x3) - (x5 + x1)
46 add r10, r8, r9 @ y6 = (x6 + x2) + (x4 + x0)
47 add r11, r12, r7 @ y7 = (x7 + x3) + (x5 + x1)
48 stmia r0, {r1, r2, r3, r4, r5, r6, r10, r11}
49
50 mov pc, lr
51
52mdct_butterfly_16:
53 str lr, [sp, #-4]!
54 add r1, r0, #8*4
55
56 ldmia r0, {r2, r3, r4, r5}
57 ldmia r1, {r6, r7, r8, r9}
58 add r6, r6, r2 @ y8 = x8 + x0
59 rsb r2, r6, r2, asl #1 @ x0 - x8
60 add r7, r7, r3 @ y9 = x9 + x1
61 rsb r3, r7, r3, asl #1 @ x1 - x9
62 add r8, r8, r4 @ y10 = x10 + x2
63 sub r11, r8, r4, asl #1 @ x10 - x2
64 add r9, r9, r5 @ y11 = x11 + x3
65 rsb r10, r9, r5, asl #1 @ x3 - x11
66
67 stmia r1!, {r6, r7, r8, r9}
68
69 add r2, r2, r3 @ (x0 - x8) + (x1 - x9)
70 rsb r3, r2, r3, asl #1 @ (x1 - x9) - (x0 - x8)
71
72 ldr r12, =cPI2_8
73 smull r8, r5, r2, r12
74 mov r5, r5, asl #1
75 smull r8, r6, r3, r12
76 mov r6, r6, asl #1
77
78 stmia r0!, {r5, r6, r10, r11}
79
80 ldmia r0, {r2, r3, r4, r5}
81 ldmia r1, {r6, r7, r8, r9}
82 add r6, r6, r2 @ y12 = x12 + x4
83 sub r2, r6, r2, asl #1 @ x12 - x4
84 add r7, r7, r3 @ y13 = x13 + x5
85 sub r3, r7, r3, asl #1 @ x13 - x5
86 add r8, r8, r4 @ y10 = x14 + x6
87 sub r10, r8, r4, asl #1 @ x14 - x6
88 add r9, r9, r5 @ y11 = x15 + x7
89 sub r11, r9, r5, asl #1 @ x15 - x7
90
91 stmia r1, {r6, r7, r8, r9}
92
93 sub r2, r2, r3 @ (x12 - x4) - (x13 - x5)
94 add r3, r2, r3, asl #1 @ (x12 - x4) + (x13 - x5)
95
96 smull r8, r5, r2, r12
97 mov r5, r5, asl #1
98 smull r8, r6, r3, r12
99 mov r6, r6, asl #1
100 @ no stmia here, r5, r6, r10, r11 are passed to mdct_butterfly_8
101
102 sub r0, r0, #4*4
103 ldmia r0, {r1, r2, r3, r4}
104 bl mdct_butterfly_8
105 add r0, r0, #8*4
106 ldmia r0, {r1, r2, r3, r4, r5, r6, r10, r11}
107 bl mdct_butterfly_8
108
109 ldr pc, [sp], #4
110
111mdct_butterfly_32:
112 stmdb sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr}
113
114 add r1, r0, #16*4
115
116 ldmia r0, {r2, r3, r4, r5}
117 ldmia r1, {r6, r7, r8, r9}
118 add r6, r6, r2 @ y16 = x16 + x0
119 rsb r2, r6, r2, asl #1 @ x0 - x16
120 add r7, r7, r3 @ y17 = x17 + x1
121 rsb r3, r7, r3, asl #1 @ x1 - x17
122 add r8, r8, r4 @ y18 = x18 + x2
123 rsb r4, r8, r4, asl #1 @ x2 - x18
124 add r9, r9, r5 @ y19 = x19 + x3
125 rsb r5, r9, r5, asl #1 @ x3 - x19
126
127 stmia r1!, {r6, r7, r8, r9}
128
129 ldr r12, =cPI1_8
130 ldr lr, =cPI3_8
131 smull r10, r6, r2, r12
132 smlal r10, r6, r3, lr
133 rsb r2, r2, #0
134 smull r10, r7, r3, r12
135 smlal r10, r7, r2, lr
136 mov r6, r6, asl #1
137 mov r7, r7, asl #1
138
139 add r4, r4, r5 @ (x3 - x19) + (x2 - x18)
140 rsb r5, r4, r5, asl #1 @ (x3 - x19) - (x2 - x18)
141
142 ldr r11, =cPI2_8
143 smull r10, r8, r4, r11
144 mov r8, r8, asl #1
145 smull r10, r9, r5, r11
146 mov r9, r9, asl #1
147
148 stmia r0!, {r6, r7, r8, r9}
149
150 ldmia r0, {r2, r3, r4, r5}
151 ldmia r1, {r6, r7, r8, r9}
152 add r6, r6, r2 @ y20 = x20 + x4
153 rsb r2, r6, r2, asl #1 @ x4 - x20
154 add r7, r7, r3 @ y21 = x21 + x5
155 rsb r3, r7, r3, asl #1 @ x5 - x21
156 add r8, r8, r4 @ y22 = x22 + x6
157 sub r4, r8, r4, asl #1 @ x22 - x6
158 add r9, r9, r5 @ y23 = x23 + x7
159 rsb r5, r9, r5, asl #1 @ x7 - x23
160
161 stmia r1!, {r6, r7, r8, r9}
162
163 smull r10, r6, r2, lr
164 smlal r10, r6, r3, r12
165 rsb r2, r2, #0
166 smull r10, r7, r3, lr
167 smlal r10, r7, r2, r12
168 mov r6, r6, asl #1
169 mov r7, r7, asl #1
170
171 mov r8, r5
172 mov r9, r4
173 stmia r0!, {r6, r7, r8, r9}
174
175 ldmia r0, {r2, r3, r4, r5}
176 ldmia r1, {r6, r7, r8, r9}
177 add r6, r6, r2 @ y24 = x24 + x8
178 sub r2, r6, r2, asl #1 @ x24 - x8
179 add r7, r7, r3 @ y25 = x25 + x9
180 sub r3, r7, r3, asl #1 @ x25 - x9
181 add r8, r8, r4 @ y26 = x26 + x10
182 sub r4, r8, r4, asl #1 @ x26 - x10
183 add r9, r9, r5 @ y27 = x27 + x11
184 sub r5, r9, r5, asl #1 @ x27 - x11
185
186 stmia r1!, {r6, r7, r8, r9}
187
188 smull r10, r7, r2, r12
189 smlal r10, r7, r3, lr
190 rsb r3, r3, #0
191 smull r10, r6, r3, r12
192 smlal r10, r6, r2, lr
193 mov r6, r6, asl #1
194 mov r7, r7, asl #1
195
196 sub r4, r4, r5 @ (x26 - x10) - (x27 - x11)
197 add r5, r4, r5, asl #1 @ (x26 - x10) + (x27 - x11)
198
199 ldr r11, =cPI2_8
200 smull r10, r8, r4, r11
201 mov r8, r8, asl #1
202 smull r10, r9, r5, r11
203 mov r9, r9, asl #1
204
205 stmia r0!, {r6, r7, r8, r9}
206
207 ldmia r0, {r2, r3, r4, r5}
208 ldmia r1, {r6, r7, r8, r9}
209 add r6, r6, r2 @ y28 = x28 + x12
210 sub r2, r6, r2, asl #1 @ x28 - x12
211 add r7, r7, r3 @ y29 = x29 + x13
212 sub r3, r7, r3, asl #1 @ x29 - x13
213 add r8, r8, r4 @ y30 = x30 + x14
214 sub r4, r8, r4, asl #1 @ x30 - x14
215 add r9, r9, r5 @ y31 = x31 + x15
216 sub r5, r9, r5, asl #1 @ x31 - x15
217
218 stmia r1, {r6, r7, r8, r9}
219
220 smull r10, r7, r2, lr
221 smlal r10, r7, r3, r12
222 rsb r3, r3, #0
223 smull r10, r6, r3, lr
224 smlal r10, r6, r2, r12
225 mov r6, r6, asl #1
226 mov r7, r7, asl #1
227
228 mov r8, r4
229 mov r9, r5
230 stmia r0, {r6, r7, r8, r9}
231
232 sub r0, r0, #12*4
233 str r0, [sp, #-4]!
234 bl mdct_butterfly_16
235
236 ldr r0, [sp], #4
237 add r0, r0, #16*4
238 bl mdct_butterfly_16
239
240 ldmia sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, pc}
241
242 @ mdct_butterfly_generic_loop(x1, x2, T0, step, Ttop)
243mdct_butterfly_generic_loop:
244 stmdb sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr}
245 str r2, [sp, #-4]
246 ldr r4, [sp, #40]
2471:
248 ldmdb r0, {r6, r7, r8, r9}
249 ldmdb r1, {r10, r11, r12, r14}
250
251 add r6, r6, r10
252 sub r10, r6, r10, asl #1
253 add r7, r7, r11
254 rsb r11, r7, r11, asl #1
255 add r8, r8, r12
256 sub r12, r8, r12, asl #1
257 add r9, r9, r14
258 rsb r14, r9, r14, asl #1
259
260 stmdb r0!, {r6, r7, r8, r9}
261
262 ldmia r2, {r6, r7}
263 smull r5, r8, r14, r6
264 smlal r5, r8, r12, r7
265 rsb r14, r14, #0
266 smull r5, r9, r12, r6
267 smlal r5, r9, r14, r7
268
269 mov r8, r8, asl #1
270 mov r9, r9, asl #1
271 stmdb r1!, {r8, r9}
272 add r2, r2, r3, asl #2
273
274 ldmia r2, {r6, r7}
275 smull r5, r8, r11, r6
276 smlal r5, r8, r10, r7
277 rsb r11, r11, #0
278 smull r5, r9, r10, r6
279 smlal r5, r9, r11, r7
280
281 mov r8, r8, asl #1
282 mov r9, r9, asl #1
283 stmdb r1!, {r8, r9}
284 add r2, r2, r3, asl #2
285
286 cmp r2, r4
287 blo 1b
288
289 ldr r4, [sp, #-4]
2901:
291 ldmdb r0, {r6, r7, r8, r9}
292 ldmdb r1, {r10, r11, r12, r14}
293
294 add r6, r6, r10
295 sub r10, r6, r10, asl #1
296 add r7, r7, r11
297 sub r11, r7, r11, asl #1
298 add r8, r8, r12
299 sub r12, r8, r12, asl #1
300 add r9, r9, r14
301 sub r14, r9, r14, asl #1
302
303 stmdb r0!, {r6, r7, r8, r9}
304
305 ldmia r2, {r6, r7}
306 smull r5, r9, r14, r6
307 smlal r5, r9, r12, r7
308 rsb r14, r14, #0
309 smull r5, r8, r12, r6
310 smlal r5, r8, r14, r7
311
312 mov r8, r8, asl #1
313 mov r9, r9, asl #1
314 stmdb r1!, {r8, r9}
315 sub r2, r2, r3, asl #2
316
317 ldmia r2, {r6, r7}
318 smull r5, r9, r11, r6
319 smlal r5, r9, r10, r7
320 rsb r11, r11, #0
321 smull r5, r8, r10, r6
322 smlal r5, r8, r11, r7
323
324 mov r8, r8, asl #1
325 mov r9, r9, asl #1
326 stmdb r1!, {r8, r9}
327 sub r2, r2, r3, asl #2
328
329 cmp r2, r4
330 bhi 1b
331
332 ldr r4, [sp, #40]
3331:
334 ldmdb r0, {r6, r7, r8, r9}
335 ldmdb r1, {r10, r11, r12, r14}
336
337 add r6, r6, r10
338 rsb r10, r6, r10, asl #1
339 add r7, r7, r11
340 rsb r11, r7, r11, asl #1
341 add r8, r8, r12
342 rsb r12, r8, r12, asl #1
343 add r9, r9, r14
344 rsb r14, r9, r14, asl #1
345
346 stmdb r0!, {r6, r7, r8, r9}
347
348 ldmia r2, {r6, r7}
349 smull r5, r8, r12, r6
350 smlal r5, r8, r14, r7
351 rsb r12, r12, #0
352 smull r5, r9, r14, r6
353 smlal r5, r9, r12, r7
354
355 mov r8, r8, asl #1
356 mov r9, r9, asl #1
357 stmdb r1!, {r8, r9}
358 add r2, r2, r3, asl #2
359
360 ldmia r2, {r6, r7}
361 smull r5, r8, r10, r6
362 smlal r5, r8, r11, r7
363 rsb r10, r10, #0
364 smull r5, r9, r11, r6
365 smlal r5, r9, r10, r7
366
367 mov r8, r8, asl #1
368 mov r9, r9, asl #1
369 stmdb r1!, {r8, r9}
370 add r2, r2, r3, asl #2
371
372 cmp r2, r4
373 blo 1b
374
375 ldr r4, [sp, #-4]
3761:
377 ldmdb r0, {r6, r7, r8, r9}
378 ldmdb r1, {r10, r11, r12, r14}
379
380 add r6, r6, r10
381 sub r10, r6, r10, asl #1
382 add r7, r7, r11
383 rsb r11, r7, r11, asl #1
384 add r8, r8, r12
385 sub r12, r8, r12, asl #1
386 add r9, r9, r14
387 rsb r14, r9, r14, asl #1
388
389 stmdb r0!, {r6, r7, r8, r9}
390
391 ldmia r2, {r6, r7}
392 smull r5, r9, r12, r6
393 smlal r5, r9, r14, r7
394 rsb r12, r12, #0
395 smull r5, r8, r14, r6
396 smlal r5, r8, r12, r7
397
398 mov r8, r8, asl #1
399 mov r9, r9, asl #1
400 stmdb r1!, {r8, r9}
401 sub r2, r2, r3, asl #2
402
403 ldmia r2, {r6, r7}
404 smull r5, r9, r10, r6
405 smlal r5, r9, r11, r7
406 rsb r10, r10, #0
407 smull r5, r8, r11, r6
408 smlal r5, r8, r10, r7
409
410 mov r8, r8, asl #1
411 mov r9, r9, asl #1
412 stmdb r1!, {r8, r9}
413 sub r2, r2, r3, asl #2
414
415 cmp r2, r4
416 bhi 1b
417
418 ldmia sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, pc}
419