diff options
author | Sean Bartell <wingedtachikoma@gmail.com> | 2011-06-25 21:32:25 -0400 |
---|---|---|
committer | Nils Wallménius <nils@rockbox.org> | 2012-04-25 22:13:20 +0200 |
commit | f40bfc9267b13b54e6379dfe7539447662879d24 (patch) | |
tree | 9b20069d5e62809ff434061ad730096836f916f2 /lib/rbcodec/codecs/libspeex/ltp_sse.h | |
parent | a0009907de7a0107d49040d8a180f140e2eff299 (diff) | |
download | rockbox-f40bfc9267b13b54e6379dfe7539447662879d24.tar.gz rockbox-f40bfc9267b13b54e6379dfe7539447662879d24.zip |
Add codecs to librbcodec.
Change-Id: Id7f4717d51ed02d67cb9f9cb3c0ada4a81843f97
Reviewed-on: http://gerrit.rockbox.org/137
Reviewed-by: Nils Wallménius <nils@rockbox.org>
Tested-by: Nils Wallménius <nils@rockbox.org>
Diffstat (limited to 'lib/rbcodec/codecs/libspeex/ltp_sse.h')
-rw-r--r-- | lib/rbcodec/codecs/libspeex/ltp_sse.h | 92 |
1 files changed, 92 insertions, 0 deletions
diff --git a/lib/rbcodec/codecs/libspeex/ltp_sse.h b/lib/rbcodec/codecs/libspeex/ltp_sse.h new file mode 100644 index 0000000000..bed6eaac9a --- /dev/null +++ b/lib/rbcodec/codecs/libspeex/ltp_sse.h | |||
@@ -0,0 +1,92 @@ | |||
1 | /* Copyright (C) 2002 Jean-Marc Valin */ | ||
2 | /** | ||
3 | @file ltp_sse.h | ||
4 | @brief Long-Term Prediction functions (SSE version) | ||
5 | */ | ||
6 | /* | ||
7 | Redistribution and use in source and binary forms, with or without | ||
8 | modification, are permitted provided that the following conditions | ||
9 | are met: | ||
10 | |||
11 | - Redistributions of source code must retain the above copyright | ||
12 | notice, this list of conditions and the following disclaimer. | ||
13 | |||
14 | - Redistributions in binary form must reproduce the above copyright | ||
15 | notice, this list of conditions and the following disclaimer in the | ||
16 | documentation and/or other materials provided with the distribution. | ||
17 | |||
18 | - Neither the name of the Xiph.org Foundation nor the names of its | ||
19 | contributors may be used to endorse or promote products derived from | ||
20 | this software without specific prior written permission. | ||
21 | |||
22 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
23 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
24 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
25 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR | ||
26 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||
27 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | ||
28 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | ||
29 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
30 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
31 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
32 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
33 | */ | ||
34 | |||
35 | #include <xmmintrin.h> | ||
36 | |||
37 | #define OVERRIDE_INNER_PROD | ||
38 | float inner_prod(const float *a, const float *b, int len) | ||
39 | { | ||
40 | int i; | ||
41 | float ret; | ||
42 | __m128 sum = _mm_setzero_ps(); | ||
43 | for (i=0;i<(len>>2);i+=2) | ||
44 | { | ||
45 | sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+0), _mm_loadu_ps(b+0))); | ||
46 | sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+4), _mm_loadu_ps(b+4))); | ||
47 | a += 8; | ||
48 | b += 8; | ||
49 | } | ||
50 | sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum)); | ||
51 | sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55)); | ||
52 | _mm_store_ss(&ret, sum); | ||
53 | return ret; | ||
54 | } | ||
55 | |||
56 | #define OVERRIDE_PITCH_XCORR | ||
57 | void pitch_xcorr(const float *_x, const float *_y, float *corr, int len, int nb_pitch, char *stack) | ||
58 | { | ||
59 | int i, offset; | ||
60 | VARDECL(__m128 *x); | ||
61 | VARDECL(__m128 *y); | ||
62 | int N, L; | ||
63 | N = len>>2; | ||
64 | L = nb_pitch>>2; | ||
65 | ALLOC(x, N, __m128); | ||
66 | ALLOC(y, N+L, __m128); | ||
67 | for (i=0;i<N;i++) | ||
68 | x[i] = _mm_loadu_ps(_x+(i<<2)); | ||
69 | for (offset=0;offset<4;offset++) | ||
70 | { | ||
71 | for (i=0;i<N+L;i++) | ||
72 | y[i] = _mm_loadu_ps(_y+(i<<2)+offset); | ||
73 | for (i=0;i<L;i++) | ||
74 | { | ||
75 | int j; | ||
76 | __m128 sum, *xx, *yy; | ||
77 | sum = _mm_setzero_ps(); | ||
78 | yy = y+i; | ||
79 | xx = x; | ||
80 | for (j=0;j<N;j+=2) | ||
81 | { | ||
82 | sum = _mm_add_ps(sum, _mm_mul_ps(xx[0], yy[0])); | ||
83 | sum = _mm_add_ps(sum, _mm_mul_ps(xx[1], yy[1])); | ||
84 | xx += 2; | ||
85 | yy += 2; | ||
86 | } | ||
87 | sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum)); | ||
88 | sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55)); | ||
89 | _mm_store_ss(corr+nb_pitch-1-(i<<2)-offset, sum); | ||
90 | } | ||
91 | } | ||
92 | } | ||