summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThom Johansen <thomj@rockbox.org>2005-10-27 00:33:38 +0000
committerThom Johansen <thomj@rockbox.org>2005-10-27 00:33:38 +0000
commit0b38c7dcbe283ba7d13531831a5367afae668e69 (patch)
treedd5428f415fb6db9c860d6867c88b5059ba4f25a
parent273d2e81f72c7721447ab9c539877f6712faaecc (diff)
downloadrockbox-0b38c7dcbe283ba7d13531831a5367afae668e69.tar.gz
rockbox-0b38c7dcbe283ba7d13531831a5367afae668e69.zip
Assembler optimised LPC routines for Coldfire. Will enable them when codec has seen further testing.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@7657 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/libffmpegFLAC/coldfire.S237
-rw-r--r--apps/codecs/libffmpegFLAC/coldfire.h8
2 files changed, 245 insertions, 0 deletions
diff --git a/apps/codecs/libffmpegFLAC/coldfire.S b/apps/codecs/libffmpegFLAC/coldfire.S
new file mode 100644
index 0000000000..7e19e4b695
--- /dev/null
+++ b/apps/codecs/libffmpegFLAC/coldfire.S
@@ -0,0 +1,237 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2005 by Thom Johansen
11 *
12 * All files in this archive are subject to the GNU General Public License.
13 * See the file COPYING in the source tree root for full license agreement.
14 *
15 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
16 * KIND, either express or implied.
17 *
18 ****************************************************************************/
19
20/* The following is an assembler optimised version of the LPC filtering
21 routines needed for FLAC decoding. It is optimised for use with the
22 MCF5249 processor, or any other similar ColdFire core with the EMAC unit.
23 All LPC filtering up to order 8 is done in specially optimised unrolled
24 loops, while every order above this is handled by a slower default routine.
25 */
26 .text
27 .global lpc_decode_emac
28 .align 2
29lpc_decode_emac:
30 lea.l (-40, %sp), %sp
31 movem.l %d2-%d7/%a2-%a5, (%sp)
32 movem.l (40+4, %sp), %d0-%d2/%a0-%a1
33 /* d0 = blocksize, d1 = qlevel, d2 = pred_order
34 a0 = data, a1 = coeffs
35 */
36
37 /* the data pointer always lags behind history pointer by 'pred_order'
38 samples. since we have one loop for each order, we can hard code this
39 and free a register by not saving data pointer.
40 */
41 move.l %d2, %d3
42 neg.l %d3
43 lea.l (%a0, %d3.l*4), %a0 | history
44 clr.l %d3
45 move.l %d3, %macsr | we'll need integer mode for this
46 tst.l %d0
47 jeq .exit | zero samples to process, exit
48 moveq.l #8, %d3
49 cmp.l %d3, %d2
50 jgt .default | order is over 8, jump to default case
51 lea.l .jumptable, %a4
52 move.l (%a4, %d2.l*4), %a4
53 jmp (%a4)
54 .align 4 | avoid unaligned fetch
55.jumptable:
56 .long .exit
57 .long .order1
58 .long .order2
59 .long .order3
60 .long .order4
61 .long .order5
62 .long .order6
63 .long .order7
64 .long .order8
65
66.order8:
67 movem.l (%a1), %d3-%d7/%a2-%a4 | load lpc coefs
68 move.l (%a0)+, %a5 | load first history sample
69.loop8:
70 mac.l %a5, %a4, (%a0)+, %a5, %acc0
71 mac.l %a5, %a3, (%a0)+, %a5, %acc0
72 mac.l %a5, %a2, (%a0)+, %a5, %acc0
73 mac.l %a5, %d7, (%a0)+, %a5, %acc0
74 mac.l %a5, %d6, (%a0)+, %a5, %acc0
75 mac.l %a5, %d5, (%a0)+, %a5, %acc0
76 mac.l %a5, %d4, (%a0)+, %a5, %acc0
77 mac.l %a5, %d3, (-7*4, %a0), %a5, %acc0 | load for the next iteration
78 movclr.l %acc0, %d2 | get sum
79 asr.l %d1, %d2 | shift sum by lp_quantization bits
80 add.l %d2, (%a0) | add residual and save
81 lea.l (-6*4, %a0), %a0 | history pointer points at second element
82 subq.l #1, %d0 | decrement counter
83 jne .loop8 | are we done?
84 jra .exit
85
86.order7:
87 movem.l (%a1), %d3-%d7/%a2-%a3
88 move.l (%a0)+, %a5
89.loop7:
90 mac.l %a5, %a3, (%a0)+, %a5, %acc0
91 mac.l %a5, %a2, (%a0)+, %a5, %acc0
92 mac.l %a5, %d7, (%a0)+, %a5, %acc0
93 mac.l %a5, %d6, (%a0)+, %a5, %acc0
94 mac.l %a5, %d5, (%a0)+, %a5, %acc0
95 mac.l %a5, %d4, (%a0)+, %a5, %acc0
96 mac.l %a5, %d3, (-6*4, %a0), %a5, %acc0
97 movclr.l %acc0, %d2
98 asr.l %d1, %d2
99 add.l %d2, (%a0)
100 lea.l (-5*4, %a0), %a0
101 subq.l #1, %d0
102 jne .loop7
103 jra .exit
104
105.order6:
106 movem.l (%a1), %d3-%d7/%a2
107 move.l (%a0)+, %a5
108.loop6:
109 mac.l %a5, %a2, (%a0)+, %a5, %acc0
110 mac.l %a5, %d7, (%a0)+, %a5, %acc0
111 mac.l %a5, %d6, (%a0)+, %a5, %acc0
112 mac.l %a5, %d5, (%a0)+, %a5, %acc0
113 mac.l %a5, %d4, (%a0)+, %a5, %acc0
114 mac.l %a5, %d3, (-5*4, %a0), %a5, %acc0
115 movclr.l %acc0, %d2
116 asr.l %d1, %d2
117 add.l %d2, (%a0)
118 lea.l (-4*4, %a0), %a0
119 subq.l #1, %d0
120 jne .loop6
121 jra .exit
122
123.order5:
124 movem.l (%a1), %d3-%d7
125 move.l (%a0)+, %a5
126.loop5:
127 mac.l %a5, %d7, (%a0)+, %a5, %acc0
128 mac.l %a5, %d6, (%a0)+, %a5, %acc0
129 mac.l %a5, %d5, (%a0)+, %a5, %acc0
130 mac.l %a5, %d4, (%a0)+, %a5, %acc0
131 mac.l %a5, %d3, (-4*4, %a0), %a5, %acc0
132 movclr.l %acc0, %d2
133 asr.l %d1, %d2
134 add.l %d2, (%a0)
135 lea.l (-3*4, %a0), %a0
136 subq.l #1, %d0
137 jne .loop5
138 jra .exit
139
140.order4:
141 movem.l (%a1), %d3-%d6
142 move.l (%a0)+, %a5
143.loop4:
144 mac.l %a5, %d6, (%a0)+, %a5, %acc0
145 mac.l %a5, %d5, (%a0)+, %a5, %acc0
146 mac.l %a5, %d4, (%a0)+, %a5, %acc0
147 mac.l %a5, %d3, (-3*4, %a0), %a5, %acc0
148 movclr.l %acc0, %d2
149 asr.l %d1, %d2
150 add.l %d2, (%a0)
151 subq.l #8, %a0
152 subq.l #1, %d0
153 jne .loop4
154 jra .exit
155
156.order3:
157 movem.l (%a1), %d3-%d5
158 move.l (%a0)+, %a5
159.loop3:
160 mac.l %a5, %d5, (%a0)+, %a5, %acc0
161 mac.l %a5, %d4, (%a0)+, %a5, %acc0
162 mac.l %a5, %d3, (-2*4, %a0), %a5, %acc0
163 movclr.l %acc0, %d2
164 asr.l %d1, %d2
165 add.l %d2, (%a0)
166 subq.l #4, %a0
167 subq.l #1, %d0
168 jne .loop3
169 jra .exit
170
171.order2:
172 movem.l (%a1), %d3-%d4
173 move.l (%a0)+, %a5
174.loop2:
175 mac.l %a5, %d4, (%a0)+, %a5, %acc0
176 mac.l %a5, %d3, %acc0 | data for next iteration is already loaded
177 movclr.l %acc0, %d2
178 asr.l %d1, %d2
179 add.l %d2, (%a0)
180 subq.l #1, %d0
181 jne .loop2
182 jra .exit
183
184.order1:
185 | no point in using mac here
186 move.l (%a1), %d3
187.loop1:
188 move.l %d3, %d2
189 muls.l (%a0)+, %d2
190 asr.l %d1, %d2
191 add.l %d2, (%a0)
192 subq.l #1, %d0
193 jne .loop1
194 jra .exit
195
196.default:
197 /* we do the filtering in an unrolled by 4 loop as far as we can, and then
198 do the rest in an ordinary one by one sample loop.
199 */
200 lea.l (%a1, %d2.l*4), %a2 | need to start in the other end of coefs
201 move.l %a0, %a3 | working copy of history pointer
202 move.l %d2, %d3
203 lsr.l #2, %d3 | coefs/4, num of iterations needed in next loop
204 move.l (%a3)+, %a5 | preload data for loop
205.dloop1:
206 lea.l (-4*4, %a2), %a2 | move lpc coef pointer four samples backwards
207 movem.l (%a2), %d4-%d7 | load four coefs
208 mac.l %a5, %d7, (%a3)+, %a5, %acc0
209 mac.l %a5, %d6, (%a3)+, %a5, %acc0
210 mac.l %a5, %d5, (%a3)+, %a5, %acc0
211 mac.l %a5, %d4, (%a3)+, %a5, %acc0
212 subq.l #1, %d3 | any more unrolled loop operations left?
213 jne .dloop1
214
215 move.l %d2, %d3
216 moveq.l #3, %d4 | mask 0x00000003
217 and.l %d4, %d3 | get the remaining samples to be filtered
218 jeq .dsave | no remaining samples
219.dloop2:
220 move.l -(%a2), %d4 | get lpc coef
221 mac.l %a5, %d4, (%a3)+, %a5, %acc0
222 subq.l #1, %d3 | any more iterations left?
223 jne .dloop2
224.dsave:
225 movclr.l %acc0, %d3 | get result
226 asr.l %d1, %d3 | shift lp_quantization bits right
227 subq.l #4, %a3 | we're one past the save location
228 add.l %d3, (%a3) | add residual and save
229 addq.l #4, %a0 | increment history pointer
230 subq.l #1, %d0 | decrement data_len
231 jne .default | are we done?
232 | if so, fall through to exit
233
234.exit:
235 movem.l (%sp), %d2-%d7/%a2-%a5
236 lea.l (40, %sp), %sp
237 rts
diff --git a/apps/codecs/libffmpegFLAC/coldfire.h b/apps/codecs/libffmpegFLAC/coldfire.h
new file mode 100644
index 0000000000..5493f549f7
--- /dev/null
+++ b/apps/codecs/libffmpegFLAC/coldfire.h
@@ -0,0 +1,8 @@
1#ifndef _FLAC_COLDFIRE_H
2#define _FLAC_COLDFIRE_H
3
4#include "bitstream.h"
5
6void lpc_decode_emac(int blocksize, int qlevel, int pred_order, int32_t* data, int* coeffs);
7
8#endif