diff options
Diffstat (limited to 'apps/codecs/libffmpegFLAC')
-rw-r--r-- | apps/codecs/libffmpegFLAC/SOURCES | 2 | ||||
-rw-r--r-- | apps/codecs/libffmpegFLAC/arm.S | 271 | ||||
-rw-r--r-- | apps/codecs/libffmpegFLAC/arm.h | 8 | ||||
-rw-r--r-- | apps/codecs/libffmpegFLAC/decoder.c | 6 |
4 files changed, 287 insertions, 0 deletions
diff --git a/apps/codecs/libffmpegFLAC/SOURCES b/apps/codecs/libffmpegFLAC/SOURCES index 1bd92e8be7..deed19bcec 100644 --- a/apps/codecs/libffmpegFLAC/SOURCES +++ b/apps/codecs/libffmpegFLAC/SOURCES | |||
@@ -3,4 +3,6 @@ decoder.c | |||
3 | shndec.c | 3 | shndec.c |
4 | #if defined(CPU_COLDFIRE) | 4 | #if defined(CPU_COLDFIRE) |
5 | coldfire.S | 5 | coldfire.S |
6 | #elif defined(CPU_ARM) | ||
7 | arm.S | ||
6 | #endif | 8 | #endif |
diff --git a/apps/codecs/libffmpegFLAC/arm.S b/apps/codecs/libffmpegFLAC/arm.S new file mode 100644 index 0000000000..2a2746eefa --- /dev/null +++ b/apps/codecs/libffmpegFLAC/arm.S | |||
@@ -0,0 +1,271 @@ | |||
1 | /*************************************************************************** | ||
2 | * __________ __ ___. | ||
3 | * Open \______ \ ____ ____ | | _\_ |__ _______ ___ | ||
4 | * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / | ||
5 | * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < | ||
6 | * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ | ||
7 | * \/ \/ \/ \/ \/ | ||
8 | * $Id$ | ||
9 | * | ||
10 | * Copyright (C) 2006 by Thom Johansen | ||
11 | * | ||
12 | * This program is free software; you can redistribute it and/or | ||
13 | * modify it under the terms of the GNU General Public License | ||
14 | * as published by the Free Software Foundation; either version 2 | ||
15 | * of the License, or (at your option) any later version. | ||
16 | * | ||
17 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY | ||
18 | * KIND, either express or implied. | ||
19 | * | ||
20 | ****************************************************************************/ | ||
21 | |||
22 | #include "config.h" | ||
23 | |||
24 | /* The following is an assembler optimised version of the LPC filtering | ||
25 | routines needed for FLAC decoding. It is optimised for use with ARM | ||
26 | processors. | ||
27 | All LPC filtering up to order 9 is done in specially optimised unrolled | ||
28 | loops, while every order above this is handled by a slower default routine. | ||
29 | */ | ||
30 | #ifdef USE_IRAM | ||
31 | .section .icode,"ax",%progbits | ||
32 | #else | ||
33 | .text | ||
34 | #endif | ||
35 | .global lpc_decode_arm | ||
36 | lpc_decode_arm: | ||
37 | stmdb sp!, { r4-r11, lr } | ||
38 | ldr r4, [sp, #36] | ||
39 | /* r0 = blocksize, r1 = qlevel, r2 = pred_order | ||
40 | r3 = data, r4 = coeffs | ||
41 | */ | ||
42 | |||
43 | /* the data pointer always lags behind history pointer by 'pred_order' | ||
44 | samples. since we have one loop for each order, we can hard code this | ||
45 | and free a register by not saving data pointer. | ||
46 | */ | ||
47 | sub r3, r3, r2, lsl #2 @ r3 = history | ||
48 | cmp r0, #0 @ no samples to process | ||
49 | beq .exit | ||
50 | cmp r2, #9 @ check if order is too high for unrolled loops | ||
51 | addls pc, pc, r2, lsl #2 @ jump to our unrolled decode loop if it exists | ||
52 | @ jumptable: | ||
53 | b .default @ order too high, go to default routine | ||
54 | b .exit @ zero order filter isn't possible, exit function | ||
55 | b .order1 | ||
56 | b .order2 | ||
57 | b .order3 | ||
58 | b .order4 | ||
59 | b .order5 | ||
60 | b .order6 | ||
61 | b .order7 | ||
62 | b .order8 | ||
63 | |||
64 | @ last jump table entry coincides with target, so leave it out | ||
65 | .order9: | ||
66 | ldmia r4, { r5-r12, r14 } @ fetch coefs | ||
67 | .loop9: | ||
68 | ldr r4, [r3], #4 @ load first history sample | ||
69 | mul r2, r4, r14 @ multiply with last coef | ||
70 | ldr r4, [r3], #4 @ rinse and repeat while accumulating sum in r2 | ||
71 | mla r2, r4, r12, r2 | ||
72 | ldr r4, [r3], #4 | ||
73 | mla r2, r4, r11, r2 | ||
74 | ldr r4, [r3], #4 | ||
75 | mla r2, r4, r10, r2 | ||
76 | ldr r4, [r3], #4 | ||
77 | mla r2, r4, r9, r2 | ||
78 | ldr r4, [r3], #4 | ||
79 | mla r2, r4, r8, r2 | ||
80 | ldr r4, [r3], #4 | ||
81 | mla r2, r4, r7, r2 | ||
82 | ldr r4, [r3], #4 | ||
83 | mla r2, r4, r6, r2 | ||
84 | ldr r4, [r3], #4 | ||
85 | mla r2, r4, r5, r2 | ||
86 | ldr r4, [r3] @ r4 = residual | ||
87 | add r2, r4, r2, asr r1 @ shift sum by qlevel bits and add residual | ||
88 | str r2, [r3], #-8*4 @ save result and wrap history pointer back | ||
89 | subs r0, r0, #1 @ check if we're done | ||
90 | bne .loop9 @ nope, jump back | ||
91 | b .exit | ||
92 | |||
93 | .order8: | ||
94 | ldmia r4, { r5-r12 } | ||
95 | .loop8: | ||
96 | @ we have more registers to spare here, so start block reading | ||
97 | ldmia r3!, { r4, r14 } | ||
98 | mul r2, r4, r12 | ||
99 | mla r2, r14, r11, r2 | ||
100 | ldmia r3!, { r4, r14 } | ||
101 | mla r2, r4, r10, r2 | ||
102 | mla r2, r14, r9, r2 | ||
103 | ldmia r3!, { r4, r14 } | ||
104 | mla r2, r4, r8, r2 | ||
105 | mla r2, r14, r7, r2 | ||
106 | ldmia r3!, { r4, r14 } | ||
107 | mla r2, r4, r6, r2 | ||
108 | mla r2, r14, r5, r2 | ||
109 | ldr r4, [r3] | ||
110 | add r2, r4, r2, asr r1 | ||
111 | str r2, [r3], #-7*4 | ||
112 | subs r0, r0, #1 | ||
113 | bne .loop8 | ||
114 | b .exit | ||
115 | |||
116 | .order7: | ||
117 | ldmia r4, { r5-r11 } | ||
118 | .loop7: | ||
119 | ldmia r3!, { r4, r12, r14 } | ||
120 | mul r2, r4, r11 | ||
121 | mla r2, r12, r10, r2 | ||
122 | mla r2, r14, r9, r2 | ||
123 | ldmia r3!, { r4, r12, r14 } | ||
124 | mla r2, r4, r8, r2 | ||
125 | mla r2, r12, r7, r2 | ||
126 | mla r2, r14, r6, r2 | ||
127 | ldr r4, [r3], #4 | ||
128 | mla r2, r4, r5, r2 | ||
129 | ldr r4, [r3] | ||
130 | add r2, r4, r2, asr r1 | ||
131 | str r2, [r3], #-6*4 | ||
132 | subs r0, r0, #1 | ||
133 | bne .loop7 | ||
134 | b .exit | ||
135 | |||
136 | .order6: | ||
137 | ldmia r4, { r5-r10 } | ||
138 | .loop6: | ||
139 | ldmia r3!, { r4, r11-r12, r14 } | ||
140 | mul r2, r4, r10 | ||
141 | mla r2, r11, r9, r2 | ||
142 | mla r2, r12, r8, r2 | ||
143 | mla r2, r14, r7, r2 | ||
144 | ldmia r3!, { r4, r11 } | ||
145 | mla r2, r4, r6, r2 | ||
146 | mla r2, r11, r5, r2 | ||
147 | ldr r4, [r3] | ||
148 | add r2, r4, r2, asr r1 | ||
149 | str r2, [r3], #-5*4 | ||
150 | subs r0, r0, #1 | ||
151 | bne .loop6 | ||
152 | b .exit | ||
153 | |||
154 | .order5: | ||
155 | ldmia r4, { r5-r9 } | ||
156 | .loop5: | ||
157 | ldmia r3!, { r4, r10-r12, r14 } | ||
158 | mul r2, r4, r9 | ||
159 | mla r2, r10, r8, r2 | ||
160 | mla r2, r11, r7, r2 | ||
161 | mla r2, r12, r6, r2 | ||
162 | mla r2, r14, r5, r2 | ||
163 | ldr r4, [r3] | ||
164 | add r2, r4, r2, asr r1 | ||
165 | str r2, [r3], #-4*4 | ||
166 | subs r0, r0, #1 | ||
167 | bne .loop5 | ||
168 | b .exit | ||
169 | |||
170 | .order4: | ||
171 | ldmia r4, { r5-r8 } | ||
172 | .loop4: | ||
173 | ldmia r3!, { r4, r11-r12, r14 } | ||
174 | mul r2, r4, r8 | ||
175 | mla r2, r11, r7, r2 | ||
176 | mla r2, r12, r6, r2 | ||
177 | mla r2, r14, r5, r2 | ||
178 | ldr r4, [r3] | ||
179 | add r2, r4, r2, asr r1 | ||
180 | str r2, [r3], #-3*4 | ||
181 | subs r0, r0, #1 | ||
182 | bne .loop4 | ||
183 | b .exit | ||
184 | |||
185 | .order3: | ||
186 | ldmia r4, { r5-r7 } | ||
187 | .loop3: | ||
188 | ldmia r3!, { r4, r12, r14 } | ||
189 | mul r2, r4, r7 | ||
190 | mla r2, r12, r6, r2 | ||
191 | mla r2, r14, r5, r2 | ||
192 | ldr r4, [r3] | ||
193 | add r2, r4, r2, asr r1 | ||
194 | str r2, [r3], #-2*4 | ||
195 | subs r0, r0, #1 | ||
196 | bne .loop3 | ||
197 | b .exit | ||
198 | |||
199 | .order2: | ||
200 | ldmia r4, { r5-r6 } | ||
201 | .loop2: | ||
202 | ldmia r3!, { r4, r14 } | ||
203 | mul r2, r4, r6 | ||
204 | mla r2, r14, r5, r2 | ||
205 | ldr r4, [r3] | ||
206 | add r2, r4, r2, asr r1 | ||
207 | str r2, [r3], #-1*4 | ||
208 | subs r0, r0, #1 | ||
209 | bne .loop2 | ||
210 | b .exit | ||
211 | |||
212 | .order1: | ||
213 | ldr r5, [r4] @ load the one coef we need | ||
214 | ldr r4, [r3], #4 @ load one history sample, r3 now points to residual | ||
215 | .loop1: | ||
216 | mul r2, r4, r5 @ multiply coef by history sample | ||
217 | ldr r4, [r3] @ load residual | ||
218 | add r4, r4, r2, asr r1 @ add result to residual | ||
219 | str r4, [r3], #4 @ place r3 at next residual, we already have | ||
220 | subs r0, r0, #1 @ the current sample in r4 for the next iteration | ||
221 | bne .loop1 | ||
222 | b .exit | ||
223 | |||
224 | .default: | ||
225 | /* we do the filtering in an unrolled by 4 loop as far as we can, and then | ||
226 | do the rest by jump table. */ | ||
227 | add r5, r4, r2, lsl #2 @ need to start in the other end of coefs | ||
228 | mov r7, r2, lsr #2 @ r7 = coefs/4 | ||
229 | mov r14, #0 @ init accumulator | ||
230 | .dloop1: | ||
231 | ldmdb r5!, { r8-r11 } | ||
232 | ldmia r3!, { r6, r12 } | ||
233 | mla r14, r6, r11, r14 | ||
234 | mla r14, r12, r10, r14 | ||
235 | ldmia r3!, { r6, r12 } | ||
236 | mla r14, r6, r9, r14 | ||
237 | mla r14, r12, r8, r14 | ||
238 | subs r7, r7, #1 | ||
239 | bne .dloop1 | ||
240 | |||
241 | and r7, r2, #3 @ get remaining samples to be filtered | ||
242 | add pc, pc, r7, lsl #2 @ jump into accumulator chain | ||
243 | @ jumptable: | ||
244 | b .dsave @ padding | ||
245 | b .dsave | ||
246 | b .oneleft | ||
247 | b .twoleft | ||
248 | @ implicit .threeleft | ||
249 | ldr r12, [r5, #-4]! | ||
250 | ldr r8, [r3], #4 | ||
251 | mla r14, r12, r8, r14 | ||
252 | .twoleft: | ||
253 | ldr r12, [r5, #-4]! | ||
254 | ldr r8, [r3], #4 | ||
255 | mla r14, r12, r8, r14 | ||
256 | .oneleft: | ||
257 | ldr r12, [r5, #-4]! | ||
258 | ldr r8, [r3], #4 | ||
259 | mla r14, r12, r8, r14 | ||
260 | |||
261 | .dsave: | ||
262 | ldr r12, [r3] @ load residual | ||
263 | add r14, r12, r14, asr r1 @ shift sum by qlevel bits and add residual | ||
264 | str r14, [r3], #4 @ store result | ||
265 | sub r3, r3, r2, lsl #2 @ and wrap history pointer back to next first pos | ||
266 | subs r0, r0, #1 @ are we done? | ||
267 | bne .default @ no, prepare for next sample | ||
268 | |||
269 | .exit: | ||
270 | ldmia sp!, { r4-r11, pc } | ||
271 | |||
diff --git a/apps/codecs/libffmpegFLAC/arm.h b/apps/codecs/libffmpegFLAC/arm.h new file mode 100644 index 0000000000..39080d7f75 --- /dev/null +++ b/apps/codecs/libffmpegFLAC/arm.h | |||
@@ -0,0 +1,8 @@ | |||
1 | #ifndef _FLAC_ARM_H | ||
2 | #define _FLAC_ARM_H | ||
3 | |||
4 | #include "bitstream.h" | ||
5 | |||
6 | void lpc_decode_arm(int blocksize, int qlevel, int pred_order, int32_t* data, int* coeffs); | ||
7 | |||
8 | #endif | ||
diff --git a/apps/codecs/libffmpegFLAC/decoder.c b/apps/codecs/libffmpegFLAC/decoder.c index e5c4b426d5..ed175548f2 100644 --- a/apps/codecs/libffmpegFLAC/decoder.c +++ b/apps/codecs/libffmpegFLAC/decoder.c | |||
@@ -44,6 +44,8 @@ | |||
44 | 44 | ||
45 | #if defined(CPU_COLDFIRE) | 45 | #if defined(CPU_COLDFIRE) |
46 | #include "coldfire.h" | 46 | #include "coldfire.h" |
47 | #elif defined(CPU_ARM) | ||
48 | #include "arm.h" | ||
47 | #endif | 49 | #endif |
48 | 50 | ||
49 | #define FFMAX(a,b) ((a) > (b) ? (a) : (b)) | 51 | #define FFMAX(a,b) ((a) > (b) ? (a) : (b)) |
@@ -262,6 +264,10 @@ static int decode_subframe_lpc(FLACContext *s, int32_t* decoded, int pred_order) | |||
262 | (void)sum; | 264 | (void)sum; |
263 | lpc_decode_emac(s->blocksize - pred_order, qlevel, pred_order, | 265 | lpc_decode_emac(s->blocksize - pred_order, qlevel, pred_order, |
264 | decoded + pred_order, coeffs); | 266 | decoded + pred_order, coeffs); |
267 | #elif defined(CPU_ARM) | ||
268 | (void)sum; | ||
269 | lpc_decode_arm(s->blocksize - pred_order, qlevel, pred_order, | ||
270 | decoded + pred_order, coeffs); | ||
265 | #else | 271 | #else |
266 | for (i = pred_order; i < s->blocksize; i++) | 272 | for (i = pred_order; i < s->blocksize; i++) |
267 | { | 273 | { |