summaryrefslogtreecommitdiff
path: root/lib/rbcodec/codecs/libtta/filter_arm.S
diff options
context:
space:
mode:
Diffstat (limited to 'lib/rbcodec/codecs/libtta/filter_arm.S')
-rw-r--r--lib/rbcodec/codecs/libtta/filter_arm.S198
1 files changed, 198 insertions, 0 deletions
diff --git a/lib/rbcodec/codecs/libtta/filter_arm.S b/lib/rbcodec/codecs/libtta/filter_arm.S
new file mode 100644
index 0000000000..f3959b83ca
--- /dev/null
+++ b/lib/rbcodec/codecs/libtta/filter_arm.S
@@ -0,0 +1,198 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2010 Yoshihisa Uchida
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version 2
15 * of the License, or (at your option) any later version.
16 *
17 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
18 * KIND, either express or implied.
19 *
20 ****************************************************************************/
21
22#include "config.h"
23
24/*
25 * The following are assembler optimised version of
26 * void hybrid_filter(fltst *fs, int *in)
27 */
28
29#ifdef USE_IRAM
30 .section .icode, "ax", %progbits
31#else
32 .text
33#endif
34 .align
35 .global hybrid_filter
36 .type hybrid_filter, %function
37
38hybrid_filter:
39 @ input: r0 = fs, r1 = in
40 stmdb sp!, {r4 - r12, lr}
41
42 @ get fs members
43 @ r2 pA := fs->dl + fs->index
44 @ r3 pM := fs->dx + fs->index
45 @ r4 pB := fs->qm
46 @ r5 fs->index
47 @ r6 fs->error
48 @ lr sum := fs->round
49
50 ldmia r0, {r5, r6, lr} @ r5 = fs->index
51 @ r6 = fs->error
52 @ lr = fs->round
53 add r2, r0, #148 @ r2 = fs->dl
54 add r3, r0, #52 @ r3 = fs->dx
55 add r4, r0, #20 @ r4 = fs->qm
56 add r2, r2, r5 @ r2 = (unsigned char*)fs->dl + fs->index
57 add r3, r3, r5 @ r3 = (unsigned char*)fs->dx + fs->index
58
59 cmp r6, #0
60 bmi .hf_negative
61 bne .hf_positive
62
63 @ case fs->error == 0
64
65 add r3, r3, #32
66 ldmia r4!, {r5, r6, r7, r8 }
67 ldmia r2!, {r9, r10, r11, r12}
68 mla lr, r5, r9, lr
69 mla lr, r6, r10, lr
70 mla lr, r7, r11, lr
71 mla lr, r8, r12, lr
72 ldmia r4!, {r5, r6, r7, r8 }
73 b .hf2
74
75.hf_negative:
76 @ case fs->error < 0
77
78 ldmia r4, {r5, r6, r7, r8 }
79 ldmia r3!, {r9, r10, r11, r12}
80 sub r5, r5, r9
81 sub r6, r6, r10
82 sub r7, r7, r11
83 sub r8, r8, r12
84 stmia r4!, {r5, r6, r7, r8 } @ update fs->qm[0], ..., fs->qm[3]
85 ldmia r2!, {r9, r10, r11, r12}
86 mla lr, r5, r9, lr
87 mla lr, r6, r10, lr
88 mla lr, r7, r11, lr
89 mla lr, r8, r12, lr
90 ldmia r4, {r5, r6, r7, r8 }
91 ldmia r3!, {r9, r10, r11, r12}
92 sub r5, r5, r9
93 sub r6, r6, r10
94 sub r7, r7, r11
95 sub r8, r8, r12
96 stmia r4!, {r5, r6, r7, r8 } @ update fs->qm[4], ..., fs->qm[7]
97 b .hf2
98
99.hf_positive:
100 @ case fs->error > 0
101
102 ldmia r4, {r5, r6, r7, r8 }
103 ldmia r3!, {r9, r10, r11, r12}
104 add r5, r5, r9
105 add r6, r6, r10
106 add r7, r7, r11
107 add r8, r8, r12
108 stmia r4!, {r5, r6, r7, r8 } @ update fs->qm[0], ..., fs->qm[3]
109 ldmia r2!, {r9, r10, r11, r12}
110 mla lr, r5, r9, lr
111 mla lr, r6, r10, lr
112 mla lr, r7, r11, lr
113 mla lr, r8, r12, lr
114 ldmia r4, {r5, r6, r7, r8 }
115 ldmia r3!, {r9, r10, r11, r12}
116 add r5, r5, r9
117 add r6, r6, r10
118 add r7, r7, r11
119 add r8, r8, r12
120 stmia r4!, {r5, r6, r7, r8 } @ update fs->qm[4], ..., fs->qm[7]
121
122.hf2:
123 ldmia r2!, {r9, r10, r11, r12}
124 mla lr, r5, r9, lr
125 mla lr, r6, r10, lr
126 mla lr, r7, r11, lr
127 mla lr, r8, r12, lr
128
129 @ fs->error = *in;
130 @ *in += (sum >> fs->shift)
131 @ *pA = *in
132
133 ldr r5, [r1] @ r5 = *in
134 ldr r6, [r0, #12] @ r6 = fs->shift
135 add lr, r5, lr, asr r6
136 str lr, [r1] @ *in += (sum >> fs->shift)
137
138 @ update fs->index
139
140 ldr r1, [r0] @ r1 = fs->index
141 add r1, r1, #4
142 ands r1, r1, #63 @ set Z flag (after this, CPSR must keep !!)
143 stmia r0, {r1, r5} @ fs->index = (fs->index + 4) & 63
144 @ fs->error = (original) *in
145
146 @ change *pM, *(pM-1), *(pM-2), *(pM-3)
147 @ r9 = *(pA-4), r5 = *(pM-3)
148 @ r10 = *(pA-3), r6 = *(pM-2)
149 @ r11 = *(pA-2), r7 = *(pM-1)
150 @ r12 = *(pA-1), r8 = *(pM-0)
151 @ lr = *(pA-0)
152
153 mov r4, #1
154 orr r5, r4, r9, asr #30
155 orr r6, r4, r10, asr #30
156 orr r7, r4, r11, asr #30
157 orr r8, r4, r12, asr #30
158 mov r6, r6, lsl #1
159 mov r7, r7, lsl #1
160 mov r8, r8, lsl #2
161
162 @ change *(pA-1), *(pA-2), *(pA-3)
163 sub r12, lr, r12
164 sub r11, r12, r11
165 sub r10, r11, r10
166
167 @ set to the memory: *pA, *(pA-1), *(pA-2), *(pA-3), *pM, *(pM-1), *(pM-2), *(pM-3)
168 stmneda r2, {r10, r11, r12, lr}
169 stmneda r3, {r5, r6, r7, r8}
170 ldmpc cond=ne regs=r4-r12 @ hybrid_filter end (when fs->index != 0)
171
172.hf_memshl:
173 @ memshl (fs->dl)
174 @ r9 = fs->dl[16 + 3]
175 @ r10 = fs->dl[16 + 4]
176 @ r11 = fs->dl[16 + 5]
177 @ r12 = fs->dl[16 + 6]
178 @ lr = fs->dl[16 + 7]
179
180 add r2, r0, #212 @ r2 = fs->dl + 16
181 ldmia r2, {r1, r3, r4}
182 sub r2, r2, #64 @ r2 = fs->dl
183 stmia r2, {r1, r3, r4, r9 - r12, lr}
184
185 @ memshl (fs->dx)
186 @ r5 = fs->dx[16 + 4]
187 @ r6 = fs->dx[16 + 5]
188 @ r7 = fs->dx[16 + 6]
189 @ r8 = fs->dx[16 + 7]
190
191 add r9, r0, #116 @ r9 = fs->dx + 16
192 ldmia r9, {r1, r2, r3, r4}
193 sub r9, r9, #64 @ r9 = fs->dx
194 stmia r9, {r1 - r8}
195 ldmpc regs=r4-r12 @ hybrid_filter end (when fs->index == 0)
196
197hybrid_filter_end:
198 .size hybrid_filter, hybrid_filter_end - hybrid_filter