diff options
Diffstat (limited to 'lib/rbcodec/codecs/libopus/celt/x86/celt_lpc_sse4_1.c')
-rw-r--r-- | lib/rbcodec/codecs/libopus/celt/x86/celt_lpc_sse4_1.c | 89 |
1 files changed, 89 insertions, 0 deletions
diff --git a/lib/rbcodec/codecs/libopus/celt/x86/celt_lpc_sse4_1.c b/lib/rbcodec/codecs/libopus/celt/x86/celt_lpc_sse4_1.c new file mode 100644 index 0000000000..5478568849 --- /dev/null +++ b/lib/rbcodec/codecs/libopus/celt/x86/celt_lpc_sse4_1.c | |||
@@ -0,0 +1,89 @@ | |||
1 | /* Copyright (c) 2014, Cisco Systems, INC | ||
2 | Written by XiangMingZhu WeiZhou MinPeng YanWang | ||
3 | |||
4 | Redistribution and use in source and binary forms, with or without | ||
5 | modification, are permitted provided that the following conditions | ||
6 | are met: | ||
7 | |||
8 | - Redistributions of source code must retain the above copyright | ||
9 | notice, this list of conditions and the following disclaimer. | ||
10 | |||
11 | - Redistributions in binary form must reproduce the above copyright | ||
12 | notice, this list of conditions and the following disclaimer in the | ||
13 | documentation and/or other materials provided with the distribution. | ||
14 | |||
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
16 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
17 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
18 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER | ||
19 | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||
20 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | ||
21 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | ||
22 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
23 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
24 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
25 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
26 | */ | ||
27 | |||
28 | #ifdef HAVE_CONFIG_H | ||
29 | #include "config.h" | ||
30 | #endif | ||
31 | |||
32 | #include <xmmintrin.h> | ||
33 | #include <emmintrin.h> | ||
34 | #include <smmintrin.h> | ||
35 | #include "celt_lpc.h" | ||
36 | #include "stack_alloc.h" | ||
37 | #include "mathops.h" | ||
38 | #include "pitch.h" | ||
39 | #include "x86cpu.h" | ||
40 | |||
41 | #if defined(FIXED_POINT) | ||
42 | |||
43 | void celt_fir_sse4_1(const opus_val16 *x, | ||
44 | const opus_val16 *num, | ||
45 | opus_val16 *y, | ||
46 | int N, | ||
47 | int ord, | ||
48 | int arch) | ||
49 | { | ||
50 | int i,j; | ||
51 | VARDECL(opus_val16, rnum); | ||
52 | |||
53 | __m128i vecNoA; | ||
54 | opus_int32 noA ; | ||
55 | SAVE_STACK; | ||
56 | |||
57 | ALLOC(rnum, ord, opus_val16); | ||
58 | for(i=0;i<ord;i++) | ||
59 | rnum[i] = num[ord-i-1]; | ||
60 | noA = EXTEND32(1) << SIG_SHIFT >> 1; | ||
61 | vecNoA = _mm_set_epi32(noA, noA, noA, noA); | ||
62 | |||
63 | for (i=0;i<N-3;i+=4) | ||
64 | { | ||
65 | opus_val32 sums[4] = {0}; | ||
66 | __m128i vecSum, vecX; | ||
67 | |||
68 | xcorr_kernel(rnum, x+i-ord, sums, ord, arch); | ||
69 | |||
70 | vecSum = _mm_loadu_si128((__m128i *)sums); | ||
71 | vecSum = _mm_add_epi32(vecSum, vecNoA); | ||
72 | vecSum = _mm_srai_epi32(vecSum, SIG_SHIFT); | ||
73 | vecX = OP_CVTEPI16_EPI32_M64(x + i); | ||
74 | vecSum = _mm_add_epi32(vecSum, vecX); | ||
75 | vecSum = _mm_packs_epi32(vecSum, vecSum); | ||
76 | _mm_storel_epi64((__m128i *)(y + i), vecSum); | ||
77 | } | ||
78 | for (;i<N;i++) | ||
79 | { | ||
80 | opus_val32 sum = 0; | ||
81 | for (j=0;j<ord;j++) | ||
82 | sum = MAC16_16(sum, rnum[j], x[i+j-ord]); | ||
83 | y[i] = SATURATE16(ADD32(EXTEND32(x[i]), PSHR32(sum, SIG_SHIFT))); | ||
84 | } | ||
85 | |||
86 | RESTORE_STACK; | ||
87 | } | ||
88 | |||
89 | #endif | ||