summaryrefslogtreecommitdiff
path: root/apps/codecs/libmusepack/synth_filter_arm.S
diff options
context:
space:
mode:
Diffstat (limited to 'apps/codecs/libmusepack/synth_filter_arm.S')
-rwxr-xr-xapps/codecs/libmusepack/synth_filter_arm.S155
1 files changed, 155 insertions, 0 deletions
diff --git a/apps/codecs/libmusepack/synth_filter_arm.S b/apps/codecs/libmusepack/synth_filter_arm.S
new file mode 100755
index 0000000000..ce668e888c
--- /dev/null
+++ b/apps/codecs/libmusepack/synth_filter_arm.S
@@ -0,0 +1,155 @@
1/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2008 by Andree Buschmann
11 *
12 * All files in this archive are subject to the GNU General Public License.
13 * See the file COPYING in the source tree root for full license agreement.
14 *
15 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
16 * KIND, either express or implied.
17 *
18 ****************************************************************************/
19
20#include "mpc_config.h"
21
22 .section .text, "ax", %progbits
23
24/****************************************************************************
25 * void mpc_decoder_windowing_D(...)
26 *
27 * 2nd step within synthesis filter. Does the dewindowing.
28 * 32=32x32 multiplies (OPTIMIZE_FOR_SPEED)
29 * Uses pre-shifted V[] and D[] values.
30 ****************************************************************************/
31#if defined(OPTIMIZE_FOR_SPEED)
32 .align 2
33 .global mpc_decoder_windowing_D
34 .type mpc_decoder_windowing_D, %function
35mpc_decoder_windowing_D:
36 /* r0 = Data[] */
37 /* r1 = V[] */
38 /* r2 = D[] */
39 /* lr = counter */
40
41 stmfd sp!, {r4-r12, lr}
42
43 mov lr, #32
44.loop32:
45 ldmia r2!, { r3-r10 } /* load first 8 window coefficients */
46 ldr r11, [r1] /* 0 */
47 mul r12, r3, r11
48 ldr r11, [r1, #96*4] /* 1 */
49 mla r12, r4, r11, r12
50 ldr r11, [r1, #128*4] /* 2 */
51 mla r12, r5, r11, r12
52 ldr r11, [r1, #224*4] /* 3 */
53 mla r12, r6, r11, r12
54 ldr r11, [r1, #256*4] /* 4 */
55 mla r12, r7, r11, r12
56 ldr r11, [r1, #352*4] /* 5 */
57 mla r12, r8, r11, r12
58 ldr r11, [r1, #384*4] /* 6 */
59 mla r12, r9, r11, r12
60 ldr r11, [r1, #480*4] /* 7 */
61 mla r12, r10, r11, r12
62 ldmia r2!, { r3-r10 } /* load last 8 window coefficients */
63 ldr r11, [r1, #512*4] /* 8 */
64 mla r12, r3, r11, r12
65 ldr r11, [r1, #608*4] /* 9 */
66 mla r12, r4, r11, r12
67 ldr r11, [r1, #640*4] /* 10 */
68 mla r12, r5, r11, r12
69 ldr r11, [r1, #736*4] /* 11 */
70 mla r12, r6, r11, r12
71 ldr r11, [r1, #768*4] /* 12 */
72 mla r12, r7, r11, r12
73 ldr r11, [r1, #864*4] /* 13 */
74 mla r12, r8, r11, r12
75 ldr r11, [r1, #896*4] /* 14 */
76 mla r12, r9, r11, r12
77 ldr r11, [r1, #992*4] /* 15 */
78 mla r12, r10, r11, r12
79 str r12, [r0], #4 /* store Data */
80 add r1, r1, #4 /* V++ */
81
82 subs lr, lr, #1
83 bgt .loop32
84
85 ldmfd sp!, {r4-r12, pc}
86.mpc_dewindowing_end:
87 .size mpc_decoder_windowing_D,.mpc_dewindowing_end-mpc_decoder_windowing_D
88#else
89/****************************************************************************
90 * void mpc_decoder_windowing_D(...)
91 *
92 * 2nd step within synthesis filter. Does the dewindowing.
93 * 64=32x32 multiplies
94 * Drops lo-part of 64bit multiply results and will therefor loose 1 bit
95 * accuracy. The decoder output is binary identical as this imprecision is
96 * far below the output's 16bit resolution.
97 ****************************************************************************/
98 .align 2
99 .global mpc_decoder_windowing_D
100 .type mpc_decoder_windowing_D, %function
101mpc_decoder_windowing_D:
102 /* r0 = Data[] */
103 /* r1 = V[] */
104 /* r2 = D[] */
105 /* lr = counter */
106
107 stmfd sp!, {r4-r12, lr}
108
109 mov lr, #32
110.loop32:
111 ldmia r2!, { r3-r10 } /* load first 8 window coefficients */
112 ldr r11, [r1] /* 0 */
113 smull r11, r12, r3, r11
114 ldr r11, [r1, #96*4] /* 1 */
115 smlal r11, r12, r4, r11
116 ldr r11, [r1, #128*4] /* 2 */
117 smlal r11, r12, r5, r11
118 ldr r11, [r1, #224*4] /* 3 */
119 smlal r11, r12, r6, r11
120 ldr r11, [r1, #256*4] /* 4 */
121 smlal r11, r12, r7, r11
122 ldr r11, [r1, #352*4] /* 5 */
123 smlal r11, r12, r8, r11
124 ldr r11, [r1, #384*4] /* 6 */
125 smlal r11, r12, r9, r11
126 ldr r11, [r1, #480*4] /* 7 */
127 smlal r11, r12, r10, r11
128 ldmia r2!, { r3-r10 } /* load last 8 window coefficients */
129 ldr r11, [r1, #512*4] /* 8 */
130 smlal r11, r12, r3, r11
131 ldr r11, [r1, #608*4] /* 9 */
132 smlal r11, r12, r4, r11
133 ldr r11, [r1, #640*4] /* 10 */
134 smlal r11, r12, r5, r11
135 ldr r11, [r1, #736*4] /* 11 */
136 smlal r11, r12, r6, r11
137 ldr r11, [r1, #768*4] /* 12 */
138 smlal r11, r12, r7, r11
139 ldr r11, [r1, #864*4] /* 13 */
140 smlal r11, r12, r8, r11
141 ldr r11, [r1, #896*4] /* 14 */
142 smlal r11, r12, r9, r11
143 ldr r11, [r1, #992*4] /* 15 */
144 smlal r11, r12, r10, r11
145 mov r4, r12, lsl #1 /* get result from hi-part */
146 str r4, [r0], #4 /* store Data */
147 add r1, r1, #4 /* V++ */
148
149 subs lr, lr, #1
150 bgt .loop32
151
152 ldmfd sp!, {r4-r12, pc}
153.mpc_dewindowing_end:
154 .size mpc_decoder_windowing_D,.mpc_dewindowing_end-mpc_decoder_windowing_D
155#endif