diff options
author | Thom Johansen <thomj@rockbox.org> | 2008-02-08 00:19:16 +0000 |
---|---|---|
committer | Thom Johansen <thomj@rockbox.org> | 2008-02-08 00:19:16 +0000 |
commit | 7667a865ff06565a2a7f91ccde0120cd2e0c977e (patch) | |
tree | ddb18bff68df1a8db98046e26f8fd49afb34735f /apps | |
parent | c04f4976703e516d69f03ce3f9045ed73ac4fcdb (diff) | |
download | rockbox-7667a865ff06565a2a7f91ccde0120cd2e0c977e.tar.gz rockbox-7667a865ff06565a2a7f91ccde0120cd2e0c977e.zip |
Optimize the ARM ASM qmf_synth() clipping stage a bit. Also fix a typo and remove some trailing white space.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@16242 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps')
-rw-r--r-- | apps/codecs/libspeex/filters_arm4.S | 76 |
1 files changed, 36 insertions, 40 deletions
diff --git a/apps/codecs/libspeex/filters_arm4.S b/apps/codecs/libspeex/filters_arm4.S index 2bfa592c24..109556fa92 100644 --- a/apps/codecs/libspeex/filters_arm4.S +++ b/apps/codecs/libspeex/filters_arm4.S | |||
@@ -49,7 +49,7 @@ iir_mem16: | |||
49 | beq .order_10 | 49 | beq .order_10 |
50 | cmp r5, #8 | 50 | cmp r5, #8 |
51 | beq .order_8 | 51 | beq .order_8 |
52 | ldmia sp!, { r4-r11, pc } @ Mon-supported order, return | 52 | ldmia sp!, { r4-r11, pc } @ Non-supported order, return |
53 | 53 | ||
54 | @ TODO: try using direct form 1 filtering | 54 | @ TODO: try using direct form 1 filtering |
55 | .order_8: | 55 | .order_8: |
@@ -67,28 +67,28 @@ iir_mem16: | |||
67 | strh r14, [r2], #2 @ Write result to y[i] | 67 | strh r14, [r2], #2 @ Write result to y[i] |
68 | 68 | ||
69 | ldrsh r4, [r1] | 69 | ldrsh r4, [r1] |
70 | mul r5, r4, r14 | 70 | mul r5, r4, r14 |
71 | sub r5, r6, r5 @ mem[0] = mem[1] - den[0]*y[i] | 71 | sub r5, r6, r5 @ mem[0] = mem[1] - den[0]*y[i] |
72 | ldrsh r4, [r1, #2] | 72 | ldrsh r4, [r1, #2] |
73 | mul r6, r4, r14 | 73 | mul r6, r4, r14 |
74 | sub r6, r7, r6 @ mem[1] = mem[2] - den[1]*y[i] | 74 | sub r6, r7, r6 @ mem[1] = mem[2] - den[1]*y[i] |
75 | ldrsh r4, [r1, #4] | 75 | ldrsh r4, [r1, #4] |
76 | mul r7, r4, r14 | 76 | mul r7, r4, r14 |
77 | sub r7, r8, r7 @ mem[2] = mem[3] - den[2]*y[i] | 77 | sub r7, r8, r7 @ mem[2] = mem[3] - den[2]*y[i] |
78 | ldrsh r4, [r1, #6] | 78 | ldrsh r4, [r1, #6] |
79 | mul r8, r4, r14 | 79 | mul r8, r4, r14 |
80 | sub r8, r9, r8 @ mem[3] = mem[4] - den[3]*y[i] | 80 | sub r8, r9, r8 @ mem[3] = mem[4] - den[3]*y[i] |
81 | ldrsh r4, [r1, #8] | 81 | ldrsh r4, [r1, #8] |
82 | mul r9, r4, r14 | 82 | mul r9, r4, r14 |
83 | sub r9, r10, r9 @ mem[4] = mem[5] - den[4]*y[i] | 83 | sub r9, r10, r9 @ mem[4] = mem[5] - den[4]*y[i] |
84 | ldrsh r4, [r1, #10] | 84 | ldrsh r4, [r1, #10] |
85 | mul r10, r4, r14 | 85 | mul r10, r4, r14 |
86 | sub r10, r11, r10 @ mem[5] = mem[6] - den[5]*y[i] | 86 | sub r10, r11, r10 @ mem[5] = mem[6] - den[5]*y[i] |
87 | ldrsh r4, [r1, #12] | 87 | ldrsh r4, [r1, #12] |
88 | mul r11, r4, r14 | 88 | mul r11, r4, r14 |
89 | sub r11, r12, r11 @ mem[6] = mem[7] - den[6]*y[i] | 89 | sub r11, r12, r11 @ mem[6] = mem[7] - den[6]*y[i] |
90 | ldrsh r4, [r1, #14] | 90 | ldrsh r4, [r1, #14] |
91 | mul r12, r4, r14 | 91 | mul r12, r4, r14 |
92 | rsb r12, r12, #0 @ mem[7] = -den[7]*y[i] | 92 | rsb r12, r12, #0 @ mem[7] = -den[7]*y[i] |
93 | subs r3, r3, #1 | 93 | subs r3, r3, #1 |
94 | bne 0b | 94 | bne 0b |
@@ -112,48 +112,48 @@ iir_mem16: | |||
112 | ldmia r1!, { r10-r12 } @ r10-r12 = den[0..5] | 112 | ldmia r1!, { r10-r12 } @ r10-r12 = den[0..5] |
113 | mov r5, r10, lsl #16 | 113 | mov r5, r10, lsl #16 |
114 | mov r5, r5, asr #16 | 114 | mov r5, r5, asr #16 |
115 | mul r5, r14, r5 | 115 | mul r5, r14, r5 |
116 | sub r5, r6, r5 @ mem[0] = mem[1] - den[0]*y[i] | 116 | sub r5, r6, r5 @ mem[0] = mem[1] - den[0]*y[i] |
117 | mov r10, r10, asr #16 | 117 | mov r10, r10, asr #16 |
118 | mul r6, r14, r10 | 118 | mul r6, r14, r10 |
119 | sub r6, r7, r6 @ mem[1] = mem[2] - den[1]*y[i] | 119 | sub r6, r7, r6 @ mem[1] = mem[2] - den[1]*y[i] |
120 | mov r10, r11, lsl #16 | 120 | mov r10, r11, lsl #16 |
121 | mov r10, r10, asr #16 | 121 | mov r10, r10, asr #16 |
122 | mul r7, r14, r10 | 122 | mul r7, r14, r10 |
123 | sub r7, r8, r7 @ mem[2] = mem[3] - den[2]*y[i] | 123 | sub r7, r8, r7 @ mem[2] = mem[3] - den[2]*y[i] |
124 | mov r10, r11, asr #16 | 124 | mov r10, r11, asr #16 |
125 | mul r8, r14, r10 | 125 | mul r8, r14, r10 |
126 | sub r8, r9, r8 @ mem[3] = mem[4] - den[3]*y[i] | 126 | sub r8, r9, r8 @ mem[3] = mem[4] - den[3]*y[i] |
127 | stmia r4!, { r5-r8 } @ Write back mem[0..3], r4 = &mem[4] | 127 | stmia r4!, { r5-r8 } @ Write back mem[0..3], r4 = &mem[4] |
128 | mov r10, r12, lsl #16 | 128 | mov r10, r12, lsl #16 |
129 | mov r10, r10, asr #16 | 129 | mov r10, r10, asr #16 |
130 | mul r5, r14, r10 | 130 | mul r5, r14, r10 |
131 | 131 | ||
132 | ldmib r4, { r6-r10 } @ r6-r10 = mem[5..9] | 132 | ldmib r4, { r6-r10 } @ r6-r10 = mem[5..9] |
133 | sub r5, r6, r5 @ mem[4] = mem[5] - den[4]*y[i] | 133 | sub r5, r6, r5 @ mem[4] = mem[5] - den[4]*y[i] |
134 | mov r12, r12, asr #16 | 134 | mov r12, r12, asr #16 |
135 | mul r6, r14, r12 | 135 | mul r6, r14, r12 |
136 | sub r6, r7, r6 @ mem[5] = mem[6] - den[5]*y[i] | 136 | sub r6, r7, r6 @ mem[5] = mem[6] - den[5]*y[i] |
137 | ldmia r1!, { r11-r12 } @ r11-r12 = den[6..9] | 137 | ldmia r1!, { r11-r12 } @ r11-r12 = den[6..9] |
138 | mov r7, r11, lsl #16 | 138 | mov r7, r11, lsl #16 |
139 | mov r7, r7, asr #16 | 139 | mov r7, r7, asr #16 |
140 | mul r7, r14, r7 | 140 | mul r7, r14, r7 |
141 | sub r7, r8, r7 @ mem[6] = mem[7] - den[6]*y[i] | 141 | sub r7, r8, r7 @ mem[6] = mem[7] - den[6]*y[i] |
142 | mov r11, r11, asr #16 | 142 | mov r11, r11, asr #16 |
143 | mul r8, r14, r11 | 143 | mul r8, r14, r11 |
144 | sub r8, r9, r8 @ mem[7] = mem[8] - den[7]*y[i] | 144 | sub r8, r9, r8 @ mem[7] = mem[8] - den[7]*y[i] |
145 | mov r11, r12, lsl #16 | 145 | mov r11, r12, lsl #16 |
146 | mov r11, r11, asr #16 | 146 | mov r11, r11, asr #16 |
147 | mul r9, r14, r11 | 147 | mul r9, r14, r11 |
148 | sub r9, r10, r9 @ mem[8] = mem[9] - den[8]*y[i] | 148 | sub r9, r10, r9 @ mem[8] = mem[9] - den[8]*y[i] |
149 | mov r12, r12, asr #16 | 149 | mov r12, r12, asr #16 |
150 | mul r10, r14, r12 | 150 | mul r10, r14, r12 |
151 | rsb r10, r10, #0 @ mem[9] = -den[9]*y[i] | 151 | rsb r10, r10, #0 @ mem[9] = -den[9]*y[i] |
152 | stmia r4!, { r5-r10 } @ Write back mem[4..9] | 152 | stmia r4!, { r5-r10 } @ Write back mem[4..9] |
153 | sub r4, r4, #10*4 | 153 | sub r4, r4, #10*4 |
154 | sub r1, r1, #10*2 | 154 | sub r1, r1, #10*2 |
155 | subs r3, r3, #1 | 155 | subs r3, r3, #1 |
156 | bne .order_10 | 156 | bne .order_10 |
157 | ldmia sp!, { r4-r11, pc } @ Exit | 157 | ldmia sp!, { r4-r11, pc } @ Exit |
158 | 158 | ||
159 | 159 | ||
@@ -255,31 +255,27 @@ qmf_synth: | |||
255 | sub r2, r2, r4, lsl #1 @ r2 = &a[0] | 255 | sub r2, r2, r4, lsl #1 @ r2 = &a[0] |
256 | sub r0, r0, r4 @ r0 = &xx1[N2 - 2 - i] | 256 | sub r0, r0, r4 @ r0 = &xx1[N2 - 2 - i] |
257 | sub r1, r1, r4 @ r1 = &xx2[N2 - 2 - i] | 257 | sub r1, r1, r4 @ r1 = &xx2[N2 - 2 - i] |
258 | 258 | ||
259 | mov r10, r10, asr #15 @ Shift outputs down | 259 | mov r10, r10, asr #15 @ Shift outputs down |
260 | mov r11, r11, asr #15 | 260 | mov r11, r11, asr #15 |
261 | mov r12, r12, asr #15 | 261 | mov r12, r12, asr #15 |
262 | mov r14, r14, asr #15 | 262 | mov r14, r14, asr #15 |
263 | 263 | ||
264 | @ TODO: this can be optimized further | 264 | @ Clip output to -32768..32767 range, which works fine despite not being |
265 | mov r9, #0x7f00 @ Clip all four outputs | 265 | @ Speex' usual clipping range. |
266 | orr r9, r9, #0xff @ r9 = 32767 | 266 | mvn r9, #0x8000 |
267 | cmp r10, r9 | 267 | mov r5, r10, asr #15 |
268 | movgt r10, r9 | 268 | teq r5, r5, asr #31 |
269 | cmn r10, r9 | 269 | eorne r10, r9, r5, asr #31 |
270 | rsblt r10, r9, #0 | 270 | mov r5, r11, asr #15 |
271 | cmp r11, r9 | 271 | teq r5, r5, asr #31 |
272 | movgt r11, r9 | 272 | eorne r11, r9, r5, asr #31 |
273 | cmn r11, r9 | 273 | mov r5, r12, asr #15 |
274 | rsblt r11, r9, #0 | 274 | teq r5, r5, asr #31 |
275 | cmp r12, r9 | 275 | eorne r12, r9, r5, asr #31 |
276 | movgt r12, r9 | 276 | mov r5, r14, asr #15 |
277 | cmn r12, r9 | 277 | teq r5, r5, asr #31 |
278 | rsblt r12, r9, #0 | 278 | eorne r14, r9, r5, asr #31 |
279 | cmp r14, r9 | ||
280 | movgt r14, r9 | ||
281 | cmn r14, r9 | ||
282 | rsblt r14, r9, #0 | ||
283 | 279 | ||
284 | strh r10, [r3], #2 @ Write outputs | 280 | strh r10, [r3], #2 @ Write outputs |
285 | strh r11, [r3], #2 | 281 | strh r11, [r3], #2 |