summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThom Johansen <thomj@rockbox.org>2008-02-08 00:19:16 +0000
committerThom Johansen <thomj@rockbox.org>2008-02-08 00:19:16 +0000
commit7667a865ff06565a2a7f91ccde0120cd2e0c977e (patch)
treeddb18bff68df1a8db98046e26f8fd49afb34735f
parentc04f4976703e516d69f03ce3f9045ed73ac4fcdb (diff)
downloadrockbox-7667a865ff06565a2a7f91ccde0120cd2e0c977e.tar.gz
rockbox-7667a865ff06565a2a7f91ccde0120cd2e0c977e.zip
Optimize the ARM ASM qmf_synth() clipping stage a bit. Also fix a typo and remove some trailing white space.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@16242 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/libspeex/filters_arm4.S76
1 files changed, 36 insertions, 40 deletions
diff --git a/apps/codecs/libspeex/filters_arm4.S b/apps/codecs/libspeex/filters_arm4.S
index 2bfa592c24..109556fa92 100644
--- a/apps/codecs/libspeex/filters_arm4.S
+++ b/apps/codecs/libspeex/filters_arm4.S
@@ -49,7 +49,7 @@ iir_mem16:
49 beq .order_10 49 beq .order_10
50 cmp r5, #8 50 cmp r5, #8
51 beq .order_8 51 beq .order_8
52 ldmia sp!, { r4-r11, pc } @ Mon-supported order, return 52 ldmia sp!, { r4-r11, pc } @ Non-supported order, return
53 53
54 @ TODO: try using direct form 1 filtering 54 @ TODO: try using direct form 1 filtering
55.order_8: 55.order_8:
@@ -67,28 +67,28 @@ iir_mem16:
67 strh r14, [r2], #2 @ Write result to y[i] 67 strh r14, [r2], #2 @ Write result to y[i]
68 68
69 ldrsh r4, [r1] 69 ldrsh r4, [r1]
70 mul r5, r4, r14 70 mul r5, r4, r14
71 sub r5, r6, r5 @ mem[0] = mem[1] - den[0]*y[i] 71 sub r5, r6, r5 @ mem[0] = mem[1] - den[0]*y[i]
72 ldrsh r4, [r1, #2] 72 ldrsh r4, [r1, #2]
73 mul r6, r4, r14 73 mul r6, r4, r14
74 sub r6, r7, r6 @ mem[1] = mem[2] - den[1]*y[i] 74 sub r6, r7, r6 @ mem[1] = mem[2] - den[1]*y[i]
75 ldrsh r4, [r1, #4] 75 ldrsh r4, [r1, #4]
76 mul r7, r4, r14 76 mul r7, r4, r14
77 sub r7, r8, r7 @ mem[2] = mem[3] - den[2]*y[i] 77 sub r7, r8, r7 @ mem[2] = mem[3] - den[2]*y[i]
78 ldrsh r4, [r1, #6] 78 ldrsh r4, [r1, #6]
79 mul r8, r4, r14 79 mul r8, r4, r14
80 sub r8, r9, r8 @ mem[3] = mem[4] - den[3]*y[i] 80 sub r8, r9, r8 @ mem[3] = mem[4] - den[3]*y[i]
81 ldrsh r4, [r1, #8] 81 ldrsh r4, [r1, #8]
82 mul r9, r4, r14 82 mul r9, r4, r14
83 sub r9, r10, r9 @ mem[4] = mem[5] - den[4]*y[i] 83 sub r9, r10, r9 @ mem[4] = mem[5] - den[4]*y[i]
84 ldrsh r4, [r1, #10] 84 ldrsh r4, [r1, #10]
85 mul r10, r4, r14 85 mul r10, r4, r14
86 sub r10, r11, r10 @ mem[5] = mem[6] - den[5]*y[i] 86 sub r10, r11, r10 @ mem[5] = mem[6] - den[5]*y[i]
87 ldrsh r4, [r1, #12] 87 ldrsh r4, [r1, #12]
88 mul r11, r4, r14 88 mul r11, r4, r14
89 sub r11, r12, r11 @ mem[6] = mem[7] - den[6]*y[i] 89 sub r11, r12, r11 @ mem[6] = mem[7] - den[6]*y[i]
90 ldrsh r4, [r1, #14] 90 ldrsh r4, [r1, #14]
91 mul r12, r4, r14 91 mul r12, r4, r14
92 rsb r12, r12, #0 @ mem[7] = -den[7]*y[i] 92 rsb r12, r12, #0 @ mem[7] = -den[7]*y[i]
93 subs r3, r3, #1 93 subs r3, r3, #1
94 bne 0b 94 bne 0b
@@ -112,48 +112,48 @@ iir_mem16:
112 ldmia r1!, { r10-r12 } @ r10-r12 = den[0..5] 112 ldmia r1!, { r10-r12 } @ r10-r12 = den[0..5]
113 mov r5, r10, lsl #16 113 mov r5, r10, lsl #16
114 mov r5, r5, asr #16 114 mov r5, r5, asr #16
115 mul r5, r14, r5 115 mul r5, r14, r5
116 sub r5, r6, r5 @ mem[0] = mem[1] - den[0]*y[i] 116 sub r5, r6, r5 @ mem[0] = mem[1] - den[0]*y[i]
117 mov r10, r10, asr #16 117 mov r10, r10, asr #16
118 mul r6, r14, r10 118 mul r6, r14, r10
119 sub r6, r7, r6 @ mem[1] = mem[2] - den[1]*y[i] 119 sub r6, r7, r6 @ mem[1] = mem[2] - den[1]*y[i]
120 mov r10, r11, lsl #16 120 mov r10, r11, lsl #16
121 mov r10, r10, asr #16 121 mov r10, r10, asr #16
122 mul r7, r14, r10 122 mul r7, r14, r10
123 sub r7, r8, r7 @ mem[2] = mem[3] - den[2]*y[i] 123 sub r7, r8, r7 @ mem[2] = mem[3] - den[2]*y[i]
124 mov r10, r11, asr #16 124 mov r10, r11, asr #16
125 mul r8, r14, r10 125 mul r8, r14, r10
126 sub r8, r9, r8 @ mem[3] = mem[4] - den[3]*y[i] 126 sub r8, r9, r8 @ mem[3] = mem[4] - den[3]*y[i]
127 stmia r4!, { r5-r8 } @ Write back mem[0..3], r4 = &mem[4] 127 stmia r4!, { r5-r8 } @ Write back mem[0..3], r4 = &mem[4]
128 mov r10, r12, lsl #16 128 mov r10, r12, lsl #16
129 mov r10, r10, asr #16 129 mov r10, r10, asr #16
130 mul r5, r14, r10 130 mul r5, r14, r10
131 131
132 ldmib r4, { r6-r10 } @ r6-r10 = mem[5..9] 132 ldmib r4, { r6-r10 } @ r6-r10 = mem[5..9]
133 sub r5, r6, r5 @ mem[4] = mem[5] - den[4]*y[i] 133 sub r5, r6, r5 @ mem[4] = mem[5] - den[4]*y[i]
134 mov r12, r12, asr #16 134 mov r12, r12, asr #16
135 mul r6, r14, r12 135 mul r6, r14, r12
136 sub r6, r7, r6 @ mem[5] = mem[6] - den[5]*y[i] 136 sub r6, r7, r6 @ mem[5] = mem[6] - den[5]*y[i]
137 ldmia r1!, { r11-r12 } @ r11-r12 = den[6..9] 137 ldmia r1!, { r11-r12 } @ r11-r12 = den[6..9]
138 mov r7, r11, lsl #16 138 mov r7, r11, lsl #16
139 mov r7, r7, asr #16 139 mov r7, r7, asr #16
140 mul r7, r14, r7 140 mul r7, r14, r7
141 sub r7, r8, r7 @ mem[6] = mem[7] - den[6]*y[i] 141 sub r7, r8, r7 @ mem[6] = mem[7] - den[6]*y[i]
142 mov r11, r11, asr #16 142 mov r11, r11, asr #16
143 mul r8, r14, r11 143 mul r8, r14, r11
144 sub r8, r9, r8 @ mem[7] = mem[8] - den[7]*y[i] 144 sub r8, r9, r8 @ mem[7] = mem[8] - den[7]*y[i]
145 mov r11, r12, lsl #16 145 mov r11, r12, lsl #16
146 mov r11, r11, asr #16 146 mov r11, r11, asr #16
147 mul r9, r14, r11 147 mul r9, r14, r11
148 sub r9, r10, r9 @ mem[8] = mem[9] - den[8]*y[i] 148 sub r9, r10, r9 @ mem[8] = mem[9] - den[8]*y[i]
149 mov r12, r12, asr #16 149 mov r12, r12, asr #16
150 mul r10, r14, r12 150 mul r10, r14, r12
151 rsb r10, r10, #0 @ mem[9] = -den[9]*y[i] 151 rsb r10, r10, #0 @ mem[9] = -den[9]*y[i]
152 stmia r4!, { r5-r10 } @ Write back mem[4..9] 152 stmia r4!, { r5-r10 } @ Write back mem[4..9]
153 sub r4, r4, #10*4 153 sub r4, r4, #10*4
154 sub r1, r1, #10*2 154 sub r1, r1, #10*2
155 subs r3, r3, #1 155 subs r3, r3, #1
156 bne .order_10 156 bne .order_10
157 ldmia sp!, { r4-r11, pc } @ Exit 157 ldmia sp!, { r4-r11, pc } @ Exit
158 158
159 159
@@ -255,31 +255,27 @@ qmf_synth:
255 sub r2, r2, r4, lsl #1 @ r2 = &a[0] 255 sub r2, r2, r4, lsl #1 @ r2 = &a[0]
256 sub r0, r0, r4 @ r0 = &xx1[N2 - 2 - i] 256 sub r0, r0, r4 @ r0 = &xx1[N2 - 2 - i]
257 sub r1, r1, r4 @ r1 = &xx2[N2 - 2 - i] 257 sub r1, r1, r4 @ r1 = &xx2[N2 - 2 - i]
258 258
259 mov r10, r10, asr #15 @ Shift outputs down 259 mov r10, r10, asr #15 @ Shift outputs down
260 mov r11, r11, asr #15 260 mov r11, r11, asr #15
261 mov r12, r12, asr #15 261 mov r12, r12, asr #15
262 mov r14, r14, asr #15 262 mov r14, r14, asr #15
263 263
264 @ TODO: this can be optimized further 264 @ Clip output to -32768..32767 range, which works fine despite not being
265 mov r9, #0x7f00 @ Clip all four outputs 265 @ Speex' usual clipping range.
266 orr r9, r9, #0xff @ r9 = 32767 266 mvn r9, #0x8000
267 cmp r10, r9 267 mov r5, r10, asr #15
268 movgt r10, r9 268 teq r5, r5, asr #31
269 cmn r10, r9 269 eorne r10, r9, r5, asr #31
270 rsblt r10, r9, #0 270 mov r5, r11, asr #15
271 cmp r11, r9 271 teq r5, r5, asr #31
272 movgt r11, r9 272 eorne r11, r9, r5, asr #31
273 cmn r11, r9 273 mov r5, r12, asr #15
274 rsblt r11, r9, #0 274 teq r5, r5, asr #31
275 cmp r12, r9 275 eorne r12, r9, r5, asr #31
276 movgt r12, r9 276 mov r5, r14, asr #15
277 cmn r12, r9 277 teq r5, r5, asr #31
278 rsblt r12, r9, #0 278 eorne r14, r9, r5, asr #31
279 cmp r14, r9
280 movgt r14, r9
281 cmn r14, r9
282 rsblt r14, r9, #0
283 279
284 strh r10, [r3], #2 @ Write outputs 280 strh r10, [r3], #2 @ Write outputs
285 strh r11, [r3], #2 281 strh r11, [r3], #2