summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndree Buschmann <AndreeBuschmann@t-online.de>2010-02-13 22:01:24 +0000
committerAndree Buschmann <AndreeBuschmann@t-online.de>2010-02-13 22:01:24 +0000
commit35024bd54e0e9a75b80ab102c44da4b4f369aec5 (patch)
tree6fad29eb617ad9b92748a7d1658f95f089a6e4f3
parentb4fd5d852a84f141bf5d34cdf33d2d969d051edd (diff)
downloadrockbox-35024bd54e0e9a75b80ab102c44da4b4f369aec5.tar.gz
rockbox-35024bd54e0e9a75b80ab102c44da4b4f369aec5.zip
Speed up atrac codec for ARM through simple loop unrolling. Saves 9 MHz on PP5022 (14% speed up).
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@24637 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/libatrac/atrac3_arm.S127
1 files changed, 107 insertions, 20 deletions
diff --git a/apps/codecs/libatrac/atrac3_arm.S b/apps/codecs/libatrac/atrac3_arm.S
index be8b2a0e0e..0908d582ed 100644
--- a/apps/codecs/libatrac/atrac3_arm.S
+++ b/apps/codecs/libatrac/atrac3_arm.S
@@ -100,38 +100,125 @@ atrac3_iqmf_dewindowing:
100 /* r1 = input samples */ 100 /* r1 = input samples */
101 /* r2 = window coefficients */ 101 /* r2 = window coefficients */
102 /* r3 = counter */ 102 /* r3 = counter */
103 stmfd sp!, {r4-r10, lr} /* save non-scratch registers */ 103 stmfd sp!, {r4-r9, lr} /* save non-scratch registers */
104 104
105.iqmf_dewindow_outer_loop: /* outer loop 0...counter-1 */ 105.iqmf_dewindow_outer_loop: /* outer loop 0...counter-1 */
106 106 /* 0.. 7 */
107 ldmia r2!, {r5, r6} /* load win[0..1] */ 107 ldmia r2!, {r4, r5} /* load win[0..1] */
108 ldmia r1!, {r7, r8} /* load in[0..1] */ 108 ldmia r1!, {r6, r7} /* load in[0..1] */
109 smull lr , r10, r5, r7 /* s1 = win[0] * in[0] */ 109 smull lr , r9, r4, r6 /* s1 = win[0] * in[0] */
110 smull r12, r9 , r6, r8 /* s2 = win[1] * in[1] */ 110 smull r12, r8, r5, r7 /* s2 = win[1] * in[1] */
111 111 ldmia r2!, {r4, r5} /* load win[i...i+1] */
112 mov r4, #46 /* r4 = 46 */ 112 ldmia r1!, {r6, r7} /* load in[i...i+1] */
113.iqmf_dewindow_inner_loop: /* inner loop i=2...48 */ 113 smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
114 ldmia r2!, {r5, r6} /* load win[i...i+1] */ 114 smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
115 ldmia r1!, {r7, r8} /* load in[i...i+1] */ 115 ldmia r2!, {r4, r5} /* load win[i...i+1] */
116 smlal lr , r10, r5, r7 /* s1 = win[i ] * in[i ] */ 116 ldmia r1!, {r6, r7} /* load in[i...i+1] */
117 smlal r12, r9 , r6, r8 /* s2 = win[i+1] * in[i+1] */ 117 smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
118 118 smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
119 subs r4, r4, #2 /* inner loop -= 2*/ 119 ldmia r2!, {r4, r5} /* load win[i...i+1] */
120 bgt .iqmf_dewindow_inner_loop 120 ldmia r1!, {r6, r7} /* load in[i...i+1] */
121 smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
122 smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
123 /* 8..15 */
124 ldmia r2!, {r4, r5} /* load win[i...i+1] */
125 ldmia r1!, {r6, r7} /* load in[i...i+1] */
126 smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
127 smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
128 ldmia r2!, {r4, r5} /* load win[i...i+1] */
129 ldmia r1!, {r6, r7} /* load in[i...i+1] */
130 smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
131 smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
132 ldmia r2!, {r4, r5} /* load win[i...i+1] */
133 ldmia r1!, {r6, r7} /* load in[i...i+1] */
134 smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
135 smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
136 ldmia r2!, {r4, r5} /* load win[i...i+1] */
137 ldmia r1!, {r6, r7} /* load in[i...i+1] */
138 smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
139 smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
140 /* 16..23 */
141 ldmia r2!, {r4, r5} /* load win[i...i+1] */
142 ldmia r1!, {r6, r7} /* load in[i...i+1] */
143 smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
144 smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
145 ldmia r2!, {r4, r5} /* load win[i...i+1] */
146 ldmia r1!, {r6, r7} /* load in[i...i+1] */
147 smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
148 smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
149 ldmia r2!, {r4, r5} /* load win[i...i+1] */
150 ldmia r1!, {r6, r7} /* load in[i...i+1] */
151 smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
152 smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
153 ldmia r2!, {r4, r5} /* load win[i...i+1] */
154 ldmia r1!, {r6, r7} /* load in[i...i+1] */
155 smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
156 smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
157 /* 24..31 */
158 ldmia r2!, {r4, r5} /* load win[i...i+1] */
159 ldmia r1!, {r6, r7} /* load in[i...i+1] */
160 smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
161 smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
162 ldmia r2!, {r4, r5} /* load win[i...i+1] */
163 ldmia r1!, {r6, r7} /* load in[i...i+1] */
164 smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
165 smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
166 ldmia r2!, {r4, r5} /* load win[i...i+1] */
167 ldmia r1!, {r6, r7} /* load in[i...i+1] */
168 smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
169 smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
170 ldmia r2!, {r4, r5} /* load win[i...i+1] */
171 ldmia r1!, {r6, r7} /* load in[i...i+1] */
172 smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
173 smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
174 /* 32..39 */
175 ldmia r2!, {r4, r5} /* load win[i...i+1] */
176 ldmia r1!, {r6, r7} /* load in[i...i+1] */
177 smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
178 smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
179 ldmia r2!, {r4, r5} /* load win[i...i+1] */
180 ldmia r1!, {r6, r7} /* load in[i...i+1] */
181 smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
182 smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
183 ldmia r2!, {r4, r5} /* load win[i...i+1] */
184 ldmia r1!, {r6, r7} /* load in[i...i+1] */
185 smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
186 smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
187 ldmia r2!, {r4, r5} /* load win[i...i+1] */
188 ldmia r1!, {r6, r7} /* load in[i...i+1] */
189 smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
190 smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
191 /* 40..47 */
192 ldmia r2!, {r4, r5} /* load win[i...i+1] */
193 ldmia r1!, {r6, r7} /* load in[i...i+1] */
194 smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
195 smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
196 ldmia r2!, {r4, r5} /* load win[i...i+1] */
197 ldmia r1!, {r6, r7} /* load in[i...i+1] */
198 smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
199 smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
200 ldmia r2!, {r4, r5} /* load win[i...i+1] */
201 ldmia r1!, {r6, r7} /* load in[i...i+1] */
202 smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
203 smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
204 ldmia r2!, {r4, r5} /* load win[i...i+1] */
205 ldmia r1!, {r6, r7} /* load in[i...i+1] */
206 smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */
207 smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */
121 208
122 mov lr , lr , lsr #31 209 mov lr , lr , lsr #31
123 orr r10, lr , r10, lsl #1 /* s1 = low>>31 || hi<<1 */ 210 orr r9, lr , r9, lsl #1 /* s1 = low>>31 || hi<<1 */
124 mov r12, r12, lsr #31 211 mov r12, r12, lsr #31
125 orr r9 , r12, r9 , lsl #1 /* s2 = low>>31 || hi<<1 */ 212 orr r8, r12, r8, lsl #1 /* s2 = low>>31 || hi<<1 */
126 213
127 stmia r0!, {r9, r10} /* store result out[0]=s2, out[1]=s1 */ 214 stmia r0!, {r8, r9} /* store result out[0]=s2, out[1]=s1 */
128 sub r1, r1, #184 /* roll back 64 entries = 184 bytes */ 215 sub r1, r1, #184 /* roll back 64 entries = 184 bytes */
129 sub r2, r2, #192 /* roll back 48 entries = 192 bytes = win[0] */ 216 sub r2, r2, #192 /* roll back 48 entries = 192 bytes = win[0] */
130 217
131 subs r3, r3, #1 /* outer loop -= 1 */ 218 subs r3, r3, #1 /* outer loop -= 1 */
132 bgt .iqmf_dewindow_outer_loop 219 bgt .iqmf_dewindow_outer_loop
133 220
134 ldmfd sp!, {r4-r10, pc} /* restore registers */ 221 ldmfd sp!, {r4-r9, pc} /* restore registers */
135 222
136.atrac3_iqmf_dewindowing_end: 223.atrac3_iqmf_dewindowing_end:
137 .size atrac3_iqmf_dewindowing,.atrac3_iqmf_dewindowing_end-atrac3_iqmf_dewindowing 224 .size atrac3_iqmf_dewindowing,.atrac3_iqmf_dewindowing_end-atrac3_iqmf_dewindowing