diff options
Diffstat (limited to 'lib/rbcodec/codecs/libopus/celt/mips/mdct_mipsr1.h')
-rw-r--r-- | lib/rbcodec/codecs/libopus/celt/mips/mdct_mipsr1.h | 288 |
1 files changed, 288 insertions, 0 deletions
diff --git a/lib/rbcodec/codecs/libopus/celt/mips/mdct_mipsr1.h b/lib/rbcodec/codecs/libopus/celt/mips/mdct_mipsr1.h new file mode 100644 index 0000000000..2934dab776 --- /dev/null +++ b/lib/rbcodec/codecs/libopus/celt/mips/mdct_mipsr1.h | |||
@@ -0,0 +1,288 @@ | |||
1 | /* Copyright (c) 2007-2008 CSIRO | ||
2 | Copyright (c) 2007-2008 Xiph.Org Foundation | ||
3 | Written by Jean-Marc Valin */ | ||
4 | /* | ||
5 | Redistribution and use in source and binary forms, with or without | ||
6 | modification, are permitted provided that the following conditions | ||
7 | are met: | ||
8 | |||
9 | - Redistributions of source code must retain the above copyright | ||
10 | notice, this list of conditions and the following disclaimer. | ||
11 | |||
12 | - Redistributions in binary form must reproduce the above copyright | ||
13 | notice, this list of conditions and the following disclaimer in the | ||
14 | documentation and/or other materials provided with the distribution. | ||
15 | |||
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
17 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
18 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
19 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER | ||
20 | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||
21 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | ||
22 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | ||
23 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
24 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
25 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
26 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
27 | */ | ||
28 | |||
29 | /* This is a simple MDCT implementation that uses a N/4 complex FFT | ||
30 | to do most of the work. It should be relatively straightforward to | ||
31 | plug in pretty much and FFT here. | ||
32 | |||
33 | This replaces the Vorbis FFT (and uses the exact same API), which | ||
34 | was a bit too messy and that was ending up duplicating code | ||
35 | (might as well use the same FFT everywhere). | ||
36 | |||
37 | The algorithm is similar to (and inspired from) Fabrice Bellard's | ||
38 | MDCT implementation in FFMPEG, but has differences in signs, ordering | ||
39 | and scaling in many places. | ||
40 | */ | ||
41 | #ifndef __MDCT_MIPSR1_H__ | ||
42 | #define __MDCT_MIPSR1_H__ | ||
43 | |||
44 | #ifndef SKIP_CONFIG_H | ||
45 | #ifdef HAVE_CONFIG_H | ||
46 | #include "config.h" | ||
47 | #endif | ||
48 | #endif | ||
49 | |||
50 | #include "mdct.h" | ||
51 | #include "kiss_fft.h" | ||
52 | #include "_kiss_fft_guts.h" | ||
53 | #include <math.h> | ||
54 | #include "os_support.h" | ||
55 | #include "mathops.h" | ||
56 | #include "stack_alloc.h" | ||
57 | |||
58 | /* Forward MDCT trashes the input array */ | ||
59 | #define OVERRIDE_clt_mdct_forward | ||
60 | void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, | ||
61 | const opus_val16 *window, int overlap, int shift, int stride, int arch) | ||
62 | { | ||
63 | int i; | ||
64 | int N, N2, N4; | ||
65 | VARDECL(kiss_fft_scalar, f); | ||
66 | VARDECL(kiss_fft_cpx, f2); | ||
67 | const kiss_fft_state *st = l->kfft[shift]; | ||
68 | const kiss_twiddle_scalar *trig; | ||
69 | opus_val16 scale; | ||
70 | #ifdef FIXED_POINT | ||
71 | /* Allows us to scale with MULT16_32_Q16(), which is faster than | ||
72 | MULT16_32_Q15() on ARM. */ | ||
73 | int scale_shift = st->scale_shift-1; | ||
74 | #endif | ||
75 | |||
76 | (void)arch; | ||
77 | |||
78 | SAVE_STACK; | ||
79 | scale = st->scale; | ||
80 | |||
81 | N = l->n; | ||
82 | trig = l->trig; | ||
83 | for (i=0;i<shift;i++) | ||
84 | { | ||
85 | N >>= 1; | ||
86 | trig += N; | ||
87 | } | ||
88 | N2 = N>>1; | ||
89 | N4 = N>>2; | ||
90 | |||
91 | ALLOC(f, N2, kiss_fft_scalar); | ||
92 | ALLOC(f2, N4, kiss_fft_cpx); | ||
93 | |||
94 | /* Consider the input to be composed of four blocks: [a, b, c, d] */ | ||
95 | /* Window, shuffle, fold */ | ||
96 | { | ||
97 | /* Temp pointers to make it really clear to the compiler what we're doing */ | ||
98 | const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1); | ||
99 | const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1); | ||
100 | kiss_fft_scalar * OPUS_RESTRICT yp = f; | ||
101 | const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1); | ||
102 | const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1; | ||
103 | for(i=0;i<((overlap+3)>>2);i++) | ||
104 | { | ||
105 | /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/ | ||
106 | *yp++ = S_MUL_ADD(*wp2, xp1[N2],*wp1,*xp2); | ||
107 | *yp++ = S_MUL_SUB(*wp1, *xp1,*wp2, xp2[-N2]); | ||
108 | xp1+=2; | ||
109 | xp2-=2; | ||
110 | wp1+=2; | ||
111 | wp2-=2; | ||
112 | } | ||
113 | wp1 = window; | ||
114 | wp2 = window+overlap-1; | ||
115 | for(;i<N4-((overlap+3)>>2);i++) | ||
116 | { | ||
117 | /* Real part arranged as a-bR, Imag part arranged as -c-dR */ | ||
118 | *yp++ = *xp2; | ||
119 | *yp++ = *xp1; | ||
120 | xp1+=2; | ||
121 | xp2-=2; | ||
122 | } | ||
123 | for(;i<N4;i++) | ||
124 | { | ||
125 | /* Real part arranged as a-bR, Imag part arranged as -c-dR */ | ||
126 | *yp++ = S_MUL_SUB(*wp2, *xp2, *wp1, xp1[-N2]); | ||
127 | *yp++ = S_MUL_ADD(*wp2, *xp1, *wp1, xp2[N2]); | ||
128 | xp1+=2; | ||
129 | xp2-=2; | ||
130 | wp1+=2; | ||
131 | wp2-=2; | ||
132 | } | ||
133 | } | ||
134 | /* Pre-rotation */ | ||
135 | { | ||
136 | kiss_fft_scalar * OPUS_RESTRICT yp = f; | ||
137 | const kiss_twiddle_scalar *t = &trig[0]; | ||
138 | for(i=0;i<N4;i++) | ||
139 | { | ||
140 | kiss_fft_cpx yc; | ||
141 | kiss_twiddle_scalar t0, t1; | ||
142 | kiss_fft_scalar re, im, yr, yi; | ||
143 | t0 = t[i]; | ||
144 | t1 = t[N4+i]; | ||
145 | re = *yp++; | ||
146 | im = *yp++; | ||
147 | |||
148 | yr = S_MUL_SUB(re,t0,im,t1); | ||
149 | yi = S_MUL_ADD(im,t0,re,t1); | ||
150 | |||
151 | yc.r = yr; | ||
152 | yc.i = yi; | ||
153 | yc.r = PSHR32(MULT16_32_Q16(scale, yc.r), scale_shift); | ||
154 | yc.i = PSHR32(MULT16_32_Q16(scale, yc.i), scale_shift); | ||
155 | f2[st->bitrev[i]] = yc; | ||
156 | } | ||
157 | } | ||
158 | |||
159 | /* N/4 complex FFT, does not downscale anymore */ | ||
160 | opus_fft_impl(st, f2); | ||
161 | |||
162 | /* Post-rotate */ | ||
163 | { | ||
164 | /* Temp pointers to make it really clear to the compiler what we're doing */ | ||
165 | const kiss_fft_cpx * OPUS_RESTRICT fp = f2; | ||
166 | kiss_fft_scalar * OPUS_RESTRICT yp1 = out; | ||
167 | kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1); | ||
168 | const kiss_twiddle_scalar *t = &trig[0]; | ||
169 | /* Temp pointers to make it really clear to the compiler what we're doing */ | ||
170 | for(i=0;i<N4;i++) | ||
171 | { | ||
172 | kiss_fft_scalar yr, yi; | ||
173 | yr = S_MUL_SUB(fp->i,t[N4+i] , fp->r,t[i]); | ||
174 | yi = S_MUL_ADD(fp->r,t[N4+i] ,fp->i,t[i]); | ||
175 | *yp1 = yr; | ||
176 | *yp2 = yi; | ||
177 | fp++; | ||
178 | yp1 += 2*stride; | ||
179 | yp2 -= 2*stride; | ||
180 | } | ||
181 | } | ||
182 | RESTORE_STACK; | ||
183 | } | ||
184 | |||
185 | #define OVERRIDE_clt_mdct_backward | ||
186 | void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, | ||
187 | const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride, int arch) | ||
188 | { | ||
189 | int i; | ||
190 | int N, N2, N4; | ||
191 | const kiss_twiddle_scalar *trig; | ||
192 | |||
193 | (void)arch; | ||
194 | |||
195 | N = l->n; | ||
196 | trig = l->trig; | ||
197 | for (i=0;i<shift;i++) | ||
198 | { | ||
199 | N >>= 1; | ||
200 | trig += N; | ||
201 | } | ||
202 | N2 = N>>1; | ||
203 | N4 = N>>2; | ||
204 | |||
205 | /* Pre-rotate */ | ||
206 | { | ||
207 | /* Temp pointers to make it really clear to the compiler what we're doing */ | ||
208 | const kiss_fft_scalar * OPUS_RESTRICT xp1 = in; | ||
209 | const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1); | ||
210 | kiss_fft_scalar * OPUS_RESTRICT yp = out+(overlap>>1); | ||
211 | const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0]; | ||
212 | const opus_int16 * OPUS_RESTRICT bitrev = l->kfft[shift]->bitrev; | ||
213 | for(i=0;i<N4;i++) | ||
214 | { | ||
215 | int rev; | ||
216 | kiss_fft_scalar yr, yi; | ||
217 | rev = *bitrev++; | ||
218 | yr = S_MUL_ADD(*xp2, t[i] , *xp1, t[N4+i]); | ||
219 | yi = S_MUL_SUB(*xp1, t[i] , *xp2, t[N4+i]); | ||
220 | /* We swap real and imag because we use an FFT instead of an IFFT. */ | ||
221 | yp[2*rev+1] = yr; | ||
222 | yp[2*rev] = yi; | ||
223 | /* Storing the pre-rotation directly in the bitrev order. */ | ||
224 | xp1+=2*stride; | ||
225 | xp2-=2*stride; | ||
226 | } | ||
227 | } | ||
228 | |||
229 | opus_fft_impl(l->kfft[shift], (kiss_fft_cpx*)(out+(overlap>>1))); | ||
230 | |||
231 | /* Post-rotate and de-shuffle from both ends of the buffer at once to make | ||
232 | it in-place. */ | ||
233 | { | ||
234 | kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1); | ||
235 | kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2; | ||
236 | const kiss_twiddle_scalar *t = &trig[0]; | ||
237 | /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the | ||
238 | middle pair will be computed twice. */ | ||
239 | for(i=0;i<(N4+1)>>1;i++) | ||
240 | { | ||
241 | kiss_fft_scalar re, im, yr, yi; | ||
242 | kiss_twiddle_scalar t0, t1; | ||
243 | /* We swap real and imag because we're using an FFT instead of an IFFT. */ | ||
244 | re = yp0[1]; | ||
245 | im = yp0[0]; | ||
246 | t0 = t[i]; | ||
247 | t1 = t[N4+i]; | ||
248 | /* We'd scale up by 2 here, but instead it's done when mixing the windows */ | ||
249 | yr = S_MUL_ADD(re,t0 , im,t1); | ||
250 | yi = S_MUL_SUB(re,t1 , im,t0); | ||
251 | /* We swap real and imag because we're using an FFT instead of an IFFT. */ | ||
252 | re = yp1[1]; | ||
253 | im = yp1[0]; | ||
254 | yp0[0] = yr; | ||
255 | yp1[1] = yi; | ||
256 | |||
257 | t0 = t[(N4-i-1)]; | ||
258 | t1 = t[(N2-i-1)]; | ||
259 | /* We'd scale up by 2 here, but instead it's done when mixing the windows */ | ||
260 | yr = S_MUL_ADD(re,t0,im,t1); | ||
261 | yi = S_MUL_SUB(re,t1,im,t0); | ||
262 | yp1[0] = yr; | ||
263 | yp0[1] = yi; | ||
264 | yp0 += 2; | ||
265 | yp1 -= 2; | ||
266 | } | ||
267 | } | ||
268 | |||
269 | /* Mirror on both sides for TDAC */ | ||
270 | { | ||
271 | kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1; | ||
272 | kiss_fft_scalar * OPUS_RESTRICT yp1 = out; | ||
273 | const opus_val16 * OPUS_RESTRICT wp1 = window; | ||
274 | const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1; | ||
275 | |||
276 | for(i = 0; i < overlap/2; i++) | ||
277 | { | ||
278 | kiss_fft_scalar x1, x2; | ||
279 | x1 = *xp1; | ||
280 | x2 = *yp1; | ||
281 | *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1); | ||
282 | *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1); | ||
283 | wp1++; | ||
284 | wp2--; | ||
285 | } | ||
286 | } | ||
287 | } | ||
288 | #endif /* __MDCT_MIPSR1_H__ */ | ||