diff options
Diffstat (limited to 'lib/rbcodec/codecs/libspeex/ltp_bfin.h')
-rw-r--r-- | lib/rbcodec/codecs/libspeex/ltp_bfin.h | 419 |
1 files changed, 419 insertions, 0 deletions
diff --git a/lib/rbcodec/codecs/libspeex/ltp_bfin.h b/lib/rbcodec/codecs/libspeex/ltp_bfin.h new file mode 100644 index 0000000000..8d7225b017 --- /dev/null +++ b/lib/rbcodec/codecs/libspeex/ltp_bfin.h | |||
@@ -0,0 +1,419 @@ | |||
1 | /* Copyright (C) 2005 Analog Devices */ | ||
2 | /** | ||
3 | @file ltp_bfin.h | ||
4 | @author Jean-Marc Valin | ||
5 | @brief Long-Term Prediction functions (Blackfin version) | ||
6 | */ | ||
7 | /* | ||
8 | Redistribution and use in source and binary forms, with or without | ||
9 | modification, are permitted provided that the following conditions | ||
10 | are met: | ||
11 | |||
12 | - Redistributions of source code must retain the above copyright | ||
13 | notice, this list of conditions and the following disclaimer. | ||
14 | |||
15 | - Redistributions in binary form must reproduce the above copyright | ||
16 | notice, this list of conditions and the following disclaimer in the | ||
17 | documentation and/or other materials provided with the distribution. | ||
18 | |||
19 | - Neither the name of the Xiph.org Foundation nor the names of its | ||
20 | contributors may be used to endorse or promote products derived from | ||
21 | this software without specific prior written permission. | ||
22 | |||
23 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
24 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
25 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
26 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR | ||
27 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||
28 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | ||
29 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | ||
30 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
31 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
32 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
33 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
34 | */ | ||
35 | |||
36 | #define OVERRIDE_INNER_PROD | ||
37 | spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len) | ||
38 | { | ||
39 | spx_word32_t sum=0; | ||
40 | __asm__ __volatile__ ( | ||
41 | "P0 = %3;\n\t" | ||
42 | "P1 = %1;\n\t" | ||
43 | "P2 = %2;\n\t" | ||
44 | "I0 = P1;\n\t" | ||
45 | "I1 = P2;\n\t" | ||
46 | "L0 = 0;\n\t" | ||
47 | "L1 = 0;\n\t" | ||
48 | "A0 = 0;\n\t" | ||
49 | "R0.L = W[I0++] || R1.L = W[I1++];\n\t" | ||
50 | "LOOP inner%= LC0 = P0;\n\t" | ||
51 | "LOOP_BEGIN inner%=;\n\t" | ||
52 | "A0 += R0.L*R1.L (IS) || R0.L = W[I0++] || R1.L = W[I1++];\n\t" | ||
53 | "LOOP_END inner%=;\n\t" | ||
54 | "A0 += R0.L*R1.L (IS);\n\t" | ||
55 | "A0 = A0 >>> 6;\n\t" | ||
56 | "R0 = A0;\n\t" | ||
57 | "%0 = R0;\n\t" | ||
58 | : "=m" (sum) | ||
59 | : "m" (x), "m" (y), "d" (len-1) | ||
60 | : "P0", "P1", "P2", "R0", "R1", "A0", "I0", "I1", "L0", "L1", "R3" | ||
61 | ); | ||
62 | return sum; | ||
63 | } | ||
64 | |||
65 | #define OVERRIDE_PITCH_XCORR | ||
66 | void pitch_xcorr(const spx_word16_t *_x, const spx_word16_t *_y, spx_word32_t *corr, int len, int nb_pitch, char *stack) | ||
67 | { | ||
68 | corr += nb_pitch - 1; | ||
69 | __asm__ __volatile__ ( | ||
70 | "P2 = %0;\n\t" | ||
71 | "I0 = P2;\n\t" /* x in I0 */ | ||
72 | "B0 = P2;\n\t" /* x in B0 */ | ||
73 | "R0 = %3;\n\t" /* len in R0 */ | ||
74 | "P3 = %3;\n\t" | ||
75 | "P3 += -2;\n\t" /* len in R0 */ | ||
76 | "P4 = %4;\n\t" /* nb_pitch in R0 */ | ||
77 | "R1 = R0 << 1;\n\t" /* number of bytes in x */ | ||
78 | "L0 = R1;\n\t" | ||
79 | "P0 = %1;\n\t" | ||
80 | |||
81 | "P1 = %2;\n\t" | ||
82 | "B1 = P1;\n\t" | ||
83 | "L1 = 0;\n\t" /*Disable looping on I1*/ | ||
84 | |||
85 | "r0 = [I0++];\n\t" | ||
86 | "LOOP pitch%= LC0 = P4 >> 1;\n\t" | ||
87 | "LOOP_BEGIN pitch%=;\n\t" | ||
88 | "I1 = P0;\n\t" | ||
89 | "A1 = A0 = 0;\n\t" | ||
90 | "R1 = [I1++];\n\t" | ||
91 | "LOOP inner_prod%= LC1 = P3 >> 1;\n\t" | ||
92 | "LOOP_BEGIN inner_prod%=;\n\t" | ||
93 | "A1 += R0.L*R1.H, A0 += R0.L*R1.L (IS) || R1.L = W[I1++];\n\t" | ||
94 | "A1 += R0.H*R1.L, A0 += R0.H*R1.H (IS) || R1.H = W[I1++] || R0 = [I0++];\n\t" | ||
95 | "LOOP_END inner_prod%=;\n\t" | ||
96 | "A1 += R0.L*R1.H, A0 += R0.L*R1.L (IS) || R1.L = W[I1++];\n\t" | ||
97 | "A1 += R0.H*R1.L, A0 += R0.H*R1.H (IS) || R0 = [I0++];\n\t" | ||
98 | "A0 = A0 >>> 6;\n\t" | ||
99 | "A1 = A1 >>> 6;\n\t" | ||
100 | "R2 = A0, R3 = A1;\n\t" | ||
101 | "[P1--] = r2;\n\t" | ||
102 | "[P1--] = r3;\n\t" | ||
103 | "P0 += 4;\n\t" | ||
104 | "LOOP_END pitch%=;\n\t" | ||
105 | "L0 = 0;\n\t" | ||
106 | : : "m" (_x), "m" (_y), "m" (corr), "m" (len), "m" (nb_pitch) | ||
107 | : "A0", "A1", "P0", "P1", "P2", "P3", "P4", "R0", "R1", "R2", "R3", "I0", "I1", "L0", "L1", "B0", "B1", "memory" | ||
108 | ); | ||
109 | } | ||
110 | |||
111 | #define OVERRIDE_COMPUTE_PITCH_ERROR | ||
112 | static inline spx_word32_t compute_pitch_error(spx_word16_t *C, spx_word16_t *g, spx_word16_t pitch_control) | ||
113 | { | ||
114 | spx_word32_t sum; | ||
115 | __asm__ __volatile__ | ||
116 | ( | ||
117 | "A0 = 0;\n\t" | ||
118 | |||
119 | "R0 = W[%1++];\n\t" | ||
120 | "R1.L = %2.L*%5.L (IS);\n\t" | ||
121 | "A0 += R1.L*R0.L (IS) || R0 = W[%1++];\n\t" | ||
122 | |||
123 | "R1.L = %3.L*%5.L (IS);\n\t" | ||
124 | "A0 += R1.L*R0.L (IS) || R0 = W[%1++];\n\t" | ||
125 | |||
126 | "R1.L = %4.L*%5.L (IS);\n\t" | ||
127 | "A0 += R1.L*R0.L (IS) || R0 = W[%1++];\n\t" | ||
128 | |||
129 | "R1.L = %2.L*%3.L (IS);\n\t" | ||
130 | "A0 -= R1.L*R0.L (IS) || R0 = W[%1++];\n\t" | ||
131 | |||
132 | "R1.L = %4.L*%3.L (IS);\n\t" | ||
133 | "A0 -= R1.L*R0.L (IS) || R0 = W[%1++];\n\t" | ||
134 | |||
135 | "R1.L = %4.L*%2.L (IS);\n\t" | ||
136 | "A0 -= R1.L*R0.L (IS) || R0 = W[%1++];\n\t" | ||
137 | |||
138 | "R1.L = %2.L*%2.L (IS);\n\t" | ||
139 | "A0 -= R1.L*R0.L (IS) || R0 = W[%1++];\n\t" | ||
140 | |||
141 | "R1.L = %3.L*%3.L (IS);\n\t" | ||
142 | "A0 -= R1.L*R0.L (IS) || R0 = W[%1++];\n\t" | ||
143 | |||
144 | "R1.L = %4.L*%4.L (IS);\n\t" | ||
145 | "A0 -= R1.L*R0.L (IS);\n\t" | ||
146 | |||
147 | "%0 = A0;\n\t" | ||
148 | : "=&D" (sum), "=a" (C) | ||
149 | : "d" (g[0]), "d" (g[1]), "d" (g[2]), "d" (pitch_control), "1" (C) | ||
150 | : "R0", "R1", "R2", "A0" | ||
151 | ); | ||
152 | return sum; | ||
153 | } | ||
154 | |||
155 | #define OVERRIDE_OPEN_LOOP_NBEST_PITCH | ||
156 | #ifdef OVERRIDE_OPEN_LOOP_NBEST_PITCH | ||
157 | void open_loop_nbest_pitch(spx_word16_t *sw, int start, int end, int len, int *pitch, spx_word16_t *gain, int N, char *stack) | ||
158 | { | ||
159 | int i,j,k; | ||
160 | VARDECL(spx_word32_t *best_score); | ||
161 | VARDECL(spx_word32_t *best_ener); | ||
162 | spx_word32_t e0; | ||
163 | VARDECL(spx_word32_t *corr); | ||
164 | VARDECL(spx_word32_t *energy); | ||
165 | |||
166 | ALLOC(best_score, N, spx_word32_t); | ||
167 | ALLOC(best_ener, N, spx_word32_t); | ||
168 | ALLOC(corr, end-start+1, spx_word32_t); | ||
169 | ALLOC(energy, end-start+2, spx_word32_t); | ||
170 | |||
171 | for (i=0;i<N;i++) | ||
172 | { | ||
173 | best_score[i]=-1; | ||
174 | best_ener[i]=0; | ||
175 | pitch[i]=start; | ||
176 | } | ||
177 | |||
178 | energy[0]=inner_prod(sw-start, sw-start, len); | ||
179 | e0=inner_prod(sw, sw, len); | ||
180 | |||
181 | /* energy update -------------------------------------*/ | ||
182 | |||
183 | __asm__ __volatile__ | ||
184 | ( | ||
185 | " P0 = %0;\n\t" | ||
186 | " I1 = %1;\n\t" | ||
187 | " L1 = 0;\n\t" | ||
188 | " I2 = %2;\n\t" | ||
189 | " L2 = 0;\n\t" | ||
190 | " R2 = [P0++];\n\t" | ||
191 | " R3 = 0;\n\t" | ||
192 | " LSETUP (eu1, eu2) LC1 = %3;\n\t" | ||
193 | "eu1: R1.L = W [I1--] || R0.L = W [I2--] ;\n\t" | ||
194 | " R1 = R1.L * R1.L (IS);\n\t" | ||
195 | " R0 = R0.L * R0.L (IS);\n\t" | ||
196 | " R1 >>>= 6;\n\t" | ||
197 | " R1 = R1 + R2;\n\t" | ||
198 | " R0 >>>= 6;\n\t" | ||
199 | " R1 = R1 - R0;\n\t" | ||
200 | " R2 = MAX(R1,R3);\n\t" | ||
201 | "eu2: [P0++] = R2;\n\t" | ||
202 | : : "d" (energy), "d" (&sw[-start-1]), "d" (&sw[-start+len-1]), | ||
203 | "a" (end-start) | ||
204 | : "P0", "I1", "I2", "R0", "R1", "R2", "R3" | ||
205 | #if (__GNUC__ == 4) | ||
206 | , "LC1" | ||
207 | #endif | ||
208 | ); | ||
209 | |||
210 | pitch_xcorr(sw, sw-end, corr, len, end-start+1, stack); | ||
211 | |||
212 | /* FIXME: Fixed-point and floating-point code should be merged */ | ||
213 | { | ||
214 | VARDECL(spx_word16_t *corr16); | ||
215 | VARDECL(spx_word16_t *ener16); | ||
216 | ALLOC(corr16, end-start+1, spx_word16_t); | ||
217 | ALLOC(ener16, end-start+1, spx_word16_t); | ||
218 | /* Normalize to 180 so we can square it and it still fits in 16 bits */ | ||
219 | normalize16(corr, corr16, 180, end-start+1); | ||
220 | normalize16(energy, ener16, 180, end-start+1); | ||
221 | |||
222 | if (N == 1) { | ||
223 | /* optimised asm to handle N==1 case */ | ||
224 | __asm__ __volatile__ | ||
225 | ( | ||
226 | " I0 = %1;\n\t" /* I0: corr16[] */ | ||
227 | " L0 = 0;\n\t" | ||
228 | " I1 = %2;\n\t" /* I1: energy */ | ||
229 | " L1 = 0;\n\t" | ||
230 | " R2 = -1;\n\t" /* R2: best score */ | ||
231 | " R3 = 0;\n\t" /* R3: best energy */ | ||
232 | " P0 = %4;\n\t" /* P0: best pitch */ | ||
233 | " P1 = %4;\n\t" /* P1: counter */ | ||
234 | " LSETUP (sl1, sl2) LC1 = %3;\n\t" | ||
235 | "sl1: R0.L = W [I0++] || R1.L = W [I1++];\n\t" | ||
236 | " R0 = R0.L * R0.L (IS);\n\t" | ||
237 | " R1 += 1;\n\t" | ||
238 | " R4 = R0.L * R3.L;\n\t" | ||
239 | " R5 = R2.L * R1.L;\n\t" | ||
240 | " cc = R5 < R4;\n\t" | ||
241 | " if cc R2 = R0;\n\t" | ||
242 | " if cc R3 = R1;\n\t" | ||
243 | " if cc P0 = P1;\n\t" | ||
244 | "sl2: P1 += 1;\n\t" | ||
245 | " %0 = P0;\n\t" | ||
246 | : "=&d" (pitch[0]) | ||
247 | : "a" (corr16), "a" (ener16), "a" (end+1-start), "d" (start) | ||
248 | : "P0", "P1", "I0", "I1", "R0", "R1", "R2", "R3", "R4", "R5" | ||
249 | #if (__GNUC__ == 4) | ||
250 | , "LC1" | ||
251 | #endif | ||
252 | ); | ||
253 | |||
254 | } | ||
255 | else { | ||
256 | for (i=start;i<=end;i++) | ||
257 | { | ||
258 | spx_word16_t tmp = MULT16_16_16(corr16[i-start],corr16[i-start]); | ||
259 | /* Instead of dividing the tmp by the energy, we multiply on the other side */ | ||
260 | if (MULT16_16(tmp,best_ener[N-1])>MULT16_16(best_score[N-1],ADD16(1,ener16[i-start]))) | ||
261 | { | ||
262 | /* We can safely put it last and then check */ | ||
263 | best_score[N-1]=tmp; | ||
264 | best_ener[N-1]=ener16[i-start]+1; | ||
265 | pitch[N-1]=i; | ||
266 | /* Check if it comes in front of others */ | ||
267 | for (j=0;j<N-1;j++) | ||
268 | { | ||
269 | if (MULT16_16(tmp,best_ener[j])>MULT16_16(best_score[j],ADD16(1,ener16[i-start]))) | ||
270 | { | ||
271 | for (k=N-1;k>j;k--) | ||
272 | { | ||
273 | best_score[k]=best_score[k-1]; | ||
274 | best_ener[k]=best_ener[k-1]; | ||
275 | pitch[k]=pitch[k-1]; | ||
276 | } | ||
277 | best_score[j]=tmp; | ||
278 | best_ener[j]=ener16[i-start]+1; | ||
279 | pitch[j]=i; | ||
280 | break; | ||
281 | } | ||
282 | } | ||
283 | } | ||
284 | } | ||
285 | } | ||
286 | } | ||
287 | |||
288 | /* Compute open-loop gain */ | ||
289 | if (gain) | ||
290 | { | ||
291 | for (j=0;j<N;j++) | ||
292 | { | ||
293 | spx_word16_t g; | ||
294 | i=pitch[j]; | ||
295 | g = DIV32(corr[i-start], 10+SHR32(MULT16_16(spx_sqrt(e0),spx_sqrt(energy[i-start])),6)); | ||
296 | /* FIXME: g = max(g,corr/energy) */ | ||
297 | if (g<0) | ||
298 | g = 0; | ||
299 | gain[j]=g; | ||
300 | } | ||
301 | } | ||
302 | } | ||
303 | #endif | ||
304 | |||
305 | #define OVERRIDE_PITCH_GAIN_SEARCH_3TAP_VQ | ||
306 | #ifdef OVERRIDE_PITCH_GAIN_SEARCH_3TAP_VQ | ||
307 | static int pitch_gain_search_3tap_vq( | ||
308 | const signed char *gain_cdbk, | ||
309 | int gain_cdbk_size, | ||
310 | spx_word16_t *C16, | ||
311 | spx_word16_t max_gain | ||
312 | ) | ||
313 | { | ||
314 | const signed char *ptr=gain_cdbk; | ||
315 | int best_cdbk=0; | ||
316 | spx_word32_t best_sum=-VERY_LARGE32; | ||
317 | spx_word32_t sum=0; | ||
318 | spx_word16_t g[3]; | ||
319 | spx_word16_t pitch_control=64; | ||
320 | spx_word16_t gain_sum; | ||
321 | int i; | ||
322 | |||
323 | /* fast asm version of VQ codebook search */ | ||
324 | |||
325 | __asm__ __volatile__ | ||
326 | ( | ||
327 | |||
328 | " P0 = %2;\n\t" /* P0: ptr to gain_cdbk */ | ||
329 | " L1 = 0;\n\t" /* no circ addr for L1 */ | ||
330 | " %0 = 0;\n\t" /* %0: best_sum */ | ||
331 | " %1 = 0;\n\t" /* %1: best_cbdk */ | ||
332 | " P1 = 0;\n\t" /* P1: loop counter */ | ||
333 | |||
334 | " LSETUP (pgs1, pgs2) LC1 = %4;\n\t" | ||
335 | "pgs1: R2 = B [P0++] (X);\n\t" /* R2: g[0] */ | ||
336 | " R3 = B [P0++] (X);\n\t" /* R3: g[1] */ | ||
337 | " R4 = B [P0++] (X);\n\t" /* R4: g[2] */ | ||
338 | " R2 += 32;\n\t" | ||
339 | " R3 += 32;\n\t" | ||
340 | " R4 += 32;\n\t" | ||
341 | " R4.H = 64;\n\t" /* R4.H: pitch_control */ | ||
342 | |||
343 | " R0 = B [P0++] (X);\n\t" | ||
344 | " B0 = R0;\n\t" /* BO: gain_sum */ | ||
345 | |||
346 | /* compute_pitch_error() -------------------------------*/ | ||
347 | |||
348 | " I1 = %3;\n\t" /* I1: ptr to C */ | ||
349 | " A0 = 0;\n\t" | ||
350 | |||
351 | " R0.L = W[I1++];\n\t" | ||
352 | " R1.L = R2.L*R4.H (IS);\n\t" | ||
353 | " A0 += R1.L*R0.L (IS) || R0.L = W[I1++];\n\t" | ||
354 | |||
355 | " R1.L = R3.L*R4.H (IS);\n\t" | ||
356 | " A0 += R1.L*R0.L (IS) || R0.L = W[I1++];\n\t" | ||
357 | |||
358 | " R1.L = R4.L*R4.H (IS);\n\t" | ||
359 | " A0 += R1.L*R0.L (IS) || R0.L = W[I1++];\n\t" | ||
360 | |||
361 | " R1.L = R2.L*R3.L (IS);\n\t" | ||
362 | " A0 -= R1.L*R0.L (IS) || R0.L = W[I1++];\n\t" | ||
363 | |||
364 | " R1.L = R4.L*R3.L (IS);\n\t" | ||
365 | " A0 -= R1.L*R0.L (IS) || R0.L = W[I1++];\n\t" | ||
366 | |||
367 | " R1.L = R4.L*R2.L (IS);\n\t" | ||
368 | " A0 -= R1.L*R0.L (IS) || R0.L = W[I1++];\n\t" | ||
369 | |||
370 | " R1.L = R2.L*R2.L (IS);\n\t" | ||
371 | " A0 -= R1.L*R0.L (IS) || R0.L = W[I1++];\n\t" | ||
372 | |||
373 | " R1.L = R3.L*R3.L (IS);\n\t" | ||
374 | " A0 -= R1.L*R0.L (IS) || R0.L = W[I1++];\n\t" | ||
375 | |||
376 | " R1.L = R4.L*R4.L (IS);\n\t" | ||
377 | " R0 = (A0 -= R1.L*R0.L) (IS);\n\t" | ||
378 | |||
379 | /* | ||
380 | Re-arrange the if-then to code efficiently on the Blackfin: | ||
381 | |||
382 | if (sum>best_sum && gain_sum<=max_gain) ------ (1) | ||
383 | |||
384 | if (sum>best_sum && !(gain_sum>max_gain)) ------ (2) | ||
385 | |||
386 | if (max_gain<=gain_sum) { ------ (3) | ||
387 | sum = -VERY_LARGE32; | ||
388 | } | ||
389 | if (best_sum<=sum) | ||
390 | |||
391 | The blackin cc instructions are all of the form: | ||
392 | |||
393 | cc = x < y (or cc = x <= y) | ||
394 | */ | ||
395 | " R1 = B0\n\t" | ||
396 | " R2 = %5\n\t" | ||
397 | " R3 = %6\n\t" | ||
398 | " cc = R2 <= R1;\n\t" | ||
399 | " if cc R0 = R3;\n\t" | ||
400 | " cc = %0 <= R0;\n\t" | ||
401 | " if cc %0 = R0;\n\t" | ||
402 | " if cc %1 = P1;\n\t" | ||
403 | |||
404 | "pgs2: P1 += 1;\n\t" | ||
405 | |||
406 | : "=&d" (best_sum), "=&d" (best_cdbk) | ||
407 | : "a" (gain_cdbk), "a" (C16), "a" (gain_cdbk_size), "a" (max_gain), | ||
408 | "b" (-VERY_LARGE32) | ||
409 | : "R0", "R1", "R2", "R3", "R4", "P0", | ||
410 | "P1", "I1", "L1", "A0", "B0" | ||
411 | #if (__GNUC__ == 4) | ||
412 | , "LC1" | ||
413 | #endif | ||
414 | ); | ||
415 | |||
416 | return best_cdbk; | ||
417 | } | ||
418 | #endif | ||
419 | |||