diff options
Diffstat (limited to 'lib/rbcodec/codecs/libatrac/atrac3_armv5e.S')
-rw-r--r-- | lib/rbcodec/codecs/libatrac/atrac3_armv5e.S | 163 |
1 files changed, 163 insertions, 0 deletions
diff --git a/lib/rbcodec/codecs/libatrac/atrac3_armv5e.S b/lib/rbcodec/codecs/libatrac/atrac3_armv5e.S new file mode 100644 index 0000000000..1d9d35a5da --- /dev/null +++ b/lib/rbcodec/codecs/libatrac/atrac3_armv5e.S | |||
@@ -0,0 +1,163 @@ | |||
1 | /*************************************************************************** | ||
2 | * __________ __ ___. | ||
3 | * Open \______ \ ____ ____ | | _\_ |__ _______ ___ | ||
4 | * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / | ||
5 | * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < | ||
6 | * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ | ||
7 | * \/ \/ \/ \/ \/ | ||
8 | * $Id: | ||
9 | * | ||
10 | * Copyright (C) 2010 by Michael Giacomelli | ||
11 | * | ||
12 | * This program is free software; you can redistribute it and/or | ||
13 | * modify it under the terms of the GNU General Public License | ||
14 | * as published by the Free Software Foundation; either version 2 | ||
15 | * of the License, or (at your option) any later version. | ||
16 | * | ||
17 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY | ||
18 | * KIND, either express or implied. | ||
19 | * | ||
20 | ****************************************************************************/ | ||
21 | |||
22 | #include "config.h" | ||
23 | |||
24 | .section .text, "ax", %progbits | ||
25 | |||
26 | |||
27 | /**************************************************************************** | ||
28 | * atrac3_iqmf_dewindowing_armv5e(int32_t *out, | ||
29 | * int32_t *in, | ||
30 | * int32_t *win, | ||
31 | * unsigned int nIn); | ||
32 | * | ||
33 | * Dewindowing step within iqmf of atrac3 synthesis using 16 bit filter | ||
34 | * coefficients and armv5e packed multiply instructions. Uses 2.5 cycles | ||
35 | * per filter coefficient (ideal). Benchmarked 3.54 per coefficient (Clip+). | ||
36 | * | ||
37 | * Reference implementation: | ||
38 | * | ||
39 | * for (j = nIn; j != 0; j--) { | ||
40 | * s1 = fixmul32(in[0], win[0]); | ||
41 | * s2 = fixmul32(in[1], win[1]); | ||
42 | * for (i = 2; i < 48; i += 2) { | ||
43 | * s1 += fixmul32(in[i ], win[i ]); | ||
44 | * s2 += fixmul32(in[i+1], win[i+1]); | ||
45 | * } | ||
46 | * out[0] = s2 << 1; | ||
47 | * out[1] = s1 << 1; | ||
48 | * in += 2; | ||
49 | * out += 2; | ||
50 | * } | ||
51 | * Note: r12 is a scratch register and can be used without restorage. | ||
52 | ****************************************************************************/ | ||
53 | .align 2 | ||
54 | .global atrac3_iqmf_dewindowing_armv5e | ||
55 | .type atrac3_iqmf_dewindowing_armv5e, %function | ||
56 | |||
57 | atrac3_iqmf_dewindowing_armv5e: | ||
58 | /* r0 = dest */ | ||
59 | /* r1 = input samples */ | ||
60 | /* r2 = window coefficients */ | ||
61 | /* r3 = counter */ | ||
62 | stmfd sp!, {r4-r11, lr} /* save non-scratch registers */ | ||
63 | |||
64 | .iqmf_dewindow_outer_loop: /* outer loop 0...counter-1 */ | ||
65 | /* 0.. 7 */ | ||
66 | ldmia r2!, {r4, r5, r8, r9} /* load win[0..7] */ | ||
67 | ldmia r1!, {r6, r7, r10, r11} /* load in[0..3] to avoid stall on arm11 */ | ||
68 | smulwb lr, r6, r4 /* s1 = in[0] * win[0] */ | ||
69 | smulwt r12, r7, r4 /* s2 = in[1] * win[1] */ | ||
70 | smlawb lr, r10, r5, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
71 | smlawt r12, r11,r5, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
72 | |||
73 | ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */ | ||
74 | smlawb lr, r6, r8, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
75 | smlawt r12, r7, r8, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
76 | smlawb lr, r10, r9, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
77 | smlawt r12, r11, r9, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
78 | |||
79 | /* 8..15 */ | ||
80 | ldmia r2!, {r4, r5, r8, r9} /* load win[8..15] */ | ||
81 | ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */ | ||
82 | smlawb lr, r6, r4, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
83 | smlawt r12, r7, r4, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
84 | smlawb lr, r10, r5, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
85 | smlawt r12, r11,r5, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
86 | |||
87 | ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */ | ||
88 | smlawb lr, r6, r8, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
89 | smlawt r12, r7, r8, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
90 | smlawb lr, r10, r9, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
91 | smlawt r12, r11,r9, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
92 | |||
93 | /* 16..23 */ | ||
94 | ldmia r2!, {r4, r5, r8, r9} /* load win[16..23] */ | ||
95 | ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */ | ||
96 | smlawb lr, r6, r4, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
97 | smlawt r12, r7, r4, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
98 | smlawb lr, r10, r5, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
99 | smlawt r12, r11,r5, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
100 | |||
101 | ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */ | ||
102 | smlawb lr, r6, r8, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
103 | smlawt r12, r7, r8, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
104 | smlawb lr, r10, r9, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
105 | smlawt r12, r11,r9, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
106 | |||
107 | /* 24..31 */ | ||
108 | ldmia r2!, {r4, r5, r8, r9} /* load win[24..31] */ | ||
109 | ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */ | ||
110 | smlawb lr, r6, r4, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
111 | smlawt r12, r7, r4, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
112 | smlawb lr, r10, r5, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
113 | smlawt r12, r11,r5, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
114 | |||
115 | ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */ | ||
116 | smlawb lr, r6, r8, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
117 | smlawt r12, r7, r8, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
118 | smlawb lr, r10, r9, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
119 | smlawt r12, r11,r9, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
120 | |||
121 | /* 32..39 */ | ||
122 | ldmia r2!, {r4, r5, r8, r9} /* load win[32..39] */ | ||
123 | ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */ | ||
124 | smlawb lr, r6, r4, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
125 | smlawt r12, r7, r4, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
126 | smlawb lr, r10, r5, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
127 | smlawt r12, r11,r5, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
128 | |||
129 | ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */ | ||
130 | smlawb lr, r6, r8, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
131 | smlawt r12, r7, r8, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
132 | smlawb lr, r10, r9, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
133 | smlawt r12, r11,r9, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
134 | |||
135 | /* 40..47 */ | ||
136 | ldmia r2!, {r4, r5, r8, r9} /* load win[40..47] */ | ||
137 | ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */ | ||
138 | smlawb lr, r6, r4, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
139 | smlawt r12, r7, r4, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
140 | smlawb lr, r10, r5, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
141 | smlawt r12, r11,r5, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
142 | |||
143 | ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */ | ||
144 | smlawb lr, r6, r8, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
145 | smlawt r12, r7, r8, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
146 | smlawb lr, r10, r9, lr /* s1 += in[i ] * win[i ] >> 16 */ | ||
147 | smlawt r12, r11,r9, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ | ||
148 | |||
149 | |||
150 | mov lr , lr , lsl #1 | ||
151 | mov r12, r12, lsl #1 | ||
152 | |||
153 | stmia r0!, {r12, lr} /* store result out[0]=s2, out[1]=s1 */ | ||
154 | sub r1, r1, #184 /* roll back 64 entries = 184 bytes */ | ||
155 | sub r2, r2, #96 /* roll back 48 entries * 2 bytes = 96 bytes = win[0] */ | ||
156 | |||
157 | subs r3, r3, #1 /* outer loop -= 1 */ | ||
158 | bgt .iqmf_dewindow_outer_loop | ||
159 | |||
160 | ldmpc regs=r4-r11 /* restore registers */ | ||
161 | |||
162 | .atrac3_iqmf_dewindowing_armv5e_end: | ||
163 | .size atrac3_iqmf_dewindowing_armv5e,.atrac3_iqmf_dewindowing_armv5e_end-atrac3_iqmf_dewindowing_armv5e | ||