diff options
author | Andree Buschmann <AndreeBuschmann@t-online.de> | 2010-02-13 22:01:24 +0000 |
---|---|---|
committer | Andree Buschmann <AndreeBuschmann@t-online.de> | 2010-02-13 22:01:24 +0000 |
commit | 35024bd54e0e9a75b80ab102c44da4b4f369aec5 (patch) | |
tree | 6fad29eb617ad9b92748a7d1658f95f089a6e4f3 /apps/codecs/libatrac/atrac3_arm.S | |
parent | b4fd5d852a84f141bf5d34cdf33d2d969d051edd (diff) | |
download | rockbox-35024bd54e0e9a75b80ab102c44da4b4f369aec5.tar.gz rockbox-35024bd54e0e9a75b80ab102c44da4b4f369aec5.zip |
Speed up atrac codec for ARM through simple loop unrolling. Saves 9 MHz on PP5022 (14% speed up).
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@24637 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs/libatrac/atrac3_arm.S')
-rw-r--r-- | apps/codecs/libatrac/atrac3_arm.S | 127 |
1 files changed, 107 insertions, 20 deletions
diff --git a/apps/codecs/libatrac/atrac3_arm.S b/apps/codecs/libatrac/atrac3_arm.S index be8b2a0e0e..0908d582ed 100644 --- a/apps/codecs/libatrac/atrac3_arm.S +++ b/apps/codecs/libatrac/atrac3_arm.S | |||
@@ -100,38 +100,125 @@ atrac3_iqmf_dewindowing: | |||
100 | /* r1 = input samples */ | 100 | /* r1 = input samples */ |
101 | /* r2 = window coefficients */ | 101 | /* r2 = window coefficients */ |
102 | /* r3 = counter */ | 102 | /* r3 = counter */ |
103 | stmfd sp!, {r4-r10, lr} /* save non-scratch registers */ | 103 | stmfd sp!, {r4-r9, lr} /* save non-scratch registers */ |
104 | 104 | ||
105 | .iqmf_dewindow_outer_loop: /* outer loop 0...counter-1 */ | 105 | .iqmf_dewindow_outer_loop: /* outer loop 0...counter-1 */ |
106 | 106 | /* 0.. 7 */ | |
107 | ldmia r2!, {r5, r6} /* load win[0..1] */ | 107 | ldmia r2!, {r4, r5} /* load win[0..1] */ |
108 | ldmia r1!, {r7, r8} /* load in[0..1] */ | 108 | ldmia r1!, {r6, r7} /* load in[0..1] */ |
109 | smull lr , r10, r5, r7 /* s1 = win[0] * in[0] */ | 109 | smull lr , r9, r4, r6 /* s1 = win[0] * in[0] */ |
110 | smull r12, r9 , r6, r8 /* s2 = win[1] * in[1] */ | 110 | smull r12, r8, r5, r7 /* s2 = win[1] * in[1] */ |
111 | 111 | ldmia r2!, {r4, r5} /* load win[i...i+1] */ | |
112 | mov r4, #46 /* r4 = 46 */ | 112 | ldmia r1!, {r6, r7} /* load in[i...i+1] */ |
113 | .iqmf_dewindow_inner_loop: /* inner loop i=2...48 */ | 113 | smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ |
114 | ldmia r2!, {r5, r6} /* load win[i...i+1] */ | 114 | smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ |
115 | ldmia r1!, {r7, r8} /* load in[i...i+1] */ | 115 | ldmia r2!, {r4, r5} /* load win[i...i+1] */ |
116 | smlal lr , r10, r5, r7 /* s1 = win[i ] * in[i ] */ | 116 | ldmia r1!, {r6, r7} /* load in[i...i+1] */ |
117 | smlal r12, r9 , r6, r8 /* s2 = win[i+1] * in[i+1] */ | 117 | smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ |
118 | 118 | smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ | |
119 | subs r4, r4, #2 /* inner loop -= 2*/ | 119 | ldmia r2!, {r4, r5} /* load win[i...i+1] */ |
120 | bgt .iqmf_dewindow_inner_loop | 120 | ldmia r1!, {r6, r7} /* load in[i...i+1] */ |
121 | smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ | ||
122 | smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ | ||
123 | /* 8..15 */ | ||
124 | ldmia r2!, {r4, r5} /* load win[i...i+1] */ | ||
125 | ldmia r1!, {r6, r7} /* load in[i...i+1] */ | ||
126 | smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ | ||
127 | smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ | ||
128 | ldmia r2!, {r4, r5} /* load win[i...i+1] */ | ||
129 | ldmia r1!, {r6, r7} /* load in[i...i+1] */ | ||
130 | smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ | ||
131 | smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ | ||
132 | ldmia r2!, {r4, r5} /* load win[i...i+1] */ | ||
133 | ldmia r1!, {r6, r7} /* load in[i...i+1] */ | ||
134 | smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ | ||
135 | smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ | ||
136 | ldmia r2!, {r4, r5} /* load win[i...i+1] */ | ||
137 | ldmia r1!, {r6, r7} /* load in[i...i+1] */ | ||
138 | smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ | ||
139 | smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ | ||
140 | /* 16..23 */ | ||
141 | ldmia r2!, {r4, r5} /* load win[i...i+1] */ | ||
142 | ldmia r1!, {r6, r7} /* load in[i...i+1] */ | ||
143 | smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ | ||
144 | smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ | ||
145 | ldmia r2!, {r4, r5} /* load win[i...i+1] */ | ||
146 | ldmia r1!, {r6, r7} /* load in[i...i+1] */ | ||
147 | smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ | ||
148 | smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ | ||
149 | ldmia r2!, {r4, r5} /* load win[i...i+1] */ | ||
150 | ldmia r1!, {r6, r7} /* load in[i...i+1] */ | ||
151 | smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ | ||
152 | smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ | ||
153 | ldmia r2!, {r4, r5} /* load win[i...i+1] */ | ||
154 | ldmia r1!, {r6, r7} /* load in[i...i+1] */ | ||
155 | smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ | ||
156 | smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ | ||
157 | /* 24..31 */ | ||
158 | ldmia r2!, {r4, r5} /* load win[i...i+1] */ | ||
159 | ldmia r1!, {r6, r7} /* load in[i...i+1] */ | ||
160 | smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ | ||
161 | smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ | ||
162 | ldmia r2!, {r4, r5} /* load win[i...i+1] */ | ||
163 | ldmia r1!, {r6, r7} /* load in[i...i+1] */ | ||
164 | smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ | ||
165 | smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ | ||
166 | ldmia r2!, {r4, r5} /* load win[i...i+1] */ | ||
167 | ldmia r1!, {r6, r7} /* load in[i...i+1] */ | ||
168 | smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ | ||
169 | smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ | ||
170 | ldmia r2!, {r4, r5} /* load win[i...i+1] */ | ||
171 | ldmia r1!, {r6, r7} /* load in[i...i+1] */ | ||
172 | smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ | ||
173 | smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ | ||
174 | /* 32..39 */ | ||
175 | ldmia r2!, {r4, r5} /* load win[i...i+1] */ | ||
176 | ldmia r1!, {r6, r7} /* load in[i...i+1] */ | ||
177 | smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ | ||
178 | smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ | ||
179 | ldmia r2!, {r4, r5} /* load win[i...i+1] */ | ||
180 | ldmia r1!, {r6, r7} /* load in[i...i+1] */ | ||
181 | smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ | ||
182 | smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ | ||
183 | ldmia r2!, {r4, r5} /* load win[i...i+1] */ | ||
184 | ldmia r1!, {r6, r7} /* load in[i...i+1] */ | ||
185 | smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ | ||
186 | smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ | ||
187 | ldmia r2!, {r4, r5} /* load win[i...i+1] */ | ||
188 | ldmia r1!, {r6, r7} /* load in[i...i+1] */ | ||
189 | smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ | ||
190 | smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ | ||
191 | /* 40..47 */ | ||
192 | ldmia r2!, {r4, r5} /* load win[i...i+1] */ | ||
193 | ldmia r1!, {r6, r7} /* load in[i...i+1] */ | ||
194 | smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ | ||
195 | smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ | ||
196 | ldmia r2!, {r4, r5} /* load win[i...i+1] */ | ||
197 | ldmia r1!, {r6, r7} /* load in[i...i+1] */ | ||
198 | smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ | ||
199 | smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ | ||
200 | ldmia r2!, {r4, r5} /* load win[i...i+1] */ | ||
201 | ldmia r1!, {r6, r7} /* load in[i...i+1] */ | ||
202 | smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ | ||
203 | smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ | ||
204 | ldmia r2!, {r4, r5} /* load win[i...i+1] */ | ||
205 | ldmia r1!, {r6, r7} /* load in[i...i+1] */ | ||
206 | smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ | ||
207 | smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ | ||
121 | 208 | ||
122 | mov lr , lr , lsr #31 | 209 | mov lr , lr , lsr #31 |
123 | orr r10, lr , r10, lsl #1 /* s1 = low>>31 || hi<<1 */ | 210 | orr r9, lr , r9, lsl #1 /* s1 = low>>31 || hi<<1 */ |
124 | mov r12, r12, lsr #31 | 211 | mov r12, r12, lsr #31 |
125 | orr r9 , r12, r9 , lsl #1 /* s2 = low>>31 || hi<<1 */ | 212 | orr r8, r12, r8, lsl #1 /* s2 = low>>31 || hi<<1 */ |
126 | 213 | ||
127 | stmia r0!, {r9, r10} /* store result out[0]=s2, out[1]=s1 */ | 214 | stmia r0!, {r8, r9} /* store result out[0]=s2, out[1]=s1 */ |
128 | sub r1, r1, #184 /* roll back 64 entries = 184 bytes */ | 215 | sub r1, r1, #184 /* roll back 64 entries = 184 bytes */ |
129 | sub r2, r2, #192 /* roll back 48 entries = 192 bytes = win[0] */ | 216 | sub r2, r2, #192 /* roll back 48 entries = 192 bytes = win[0] */ |
130 | 217 | ||
131 | subs r3, r3, #1 /* outer loop -= 1 */ | 218 | subs r3, r3, #1 /* outer loop -= 1 */ |
132 | bgt .iqmf_dewindow_outer_loop | 219 | bgt .iqmf_dewindow_outer_loop |
133 | 220 | ||
134 | ldmfd sp!, {r4-r10, pc} /* restore registers */ | 221 | ldmfd sp!, {r4-r9, pc} /* restore registers */ |
135 | 222 | ||
136 | .atrac3_iqmf_dewindowing_end: | 223 | .atrac3_iqmf_dewindowing_end: |
137 | .size atrac3_iqmf_dewindowing,.atrac3_iqmf_dewindowing_end-atrac3_iqmf_dewindowing | 224 | .size atrac3_iqmf_dewindowing,.atrac3_iqmf_dewindowing_end-atrac3_iqmf_dewindowing |