summaryrefslogtreecommitdiff
path: root/lib/rbcodec/codecs/libopus/celt
diff options
context:
space:
mode:
Diffstat (limited to 'lib/rbcodec/codecs/libopus/celt')
-rw-r--r--lib/rbcodec/codecs/libopus/celt/_kiss_fft_guts.h21
-rw-r--r--lib/rbcodec/codecs/libopus/celt/arch.h66
-rwxr-xr-xlib/rbcodec/codecs/libopus/celt/arm/arm2gnu.pl353
-rw-r--r--lib/rbcodec/codecs/libopus/celt/arm/arm_celt_map.c160
-rw-r--r--lib/rbcodec/codecs/libopus/celt/arm/armcpu.c185
-rw-r--r--lib/rbcodec/codecs/libopus/celt/arm/armcpu.h77
-rw-r--r--lib/rbcodec/codecs/libopus/celt/arm/armopts.s.in37
-rw-r--r--lib/rbcodec/codecs/libopus/celt/arm/celt_fft_ne10.c173
-rw-r--r--lib/rbcodec/codecs/libopus/celt/arm/celt_mdct_ne10.c258
-rw-r--r--lib/rbcodec/codecs/libopus/celt/arm/celt_neon_intr.c211
-rw-r--r--lib/rbcodec/codecs/libopus/celt/arm/celt_pitch_xcorr_arm.s551
-rw-r--r--lib/rbcodec/codecs/libopus/celt/arm/fft_arm.h71
-rw-r--r--lib/rbcodec/codecs/libopus/celt/arm/fixed_arm64.h35
-rw-r--r--lib/rbcodec/codecs/libopus/celt/arm/fixed_armv4.h6
-rw-r--r--lib/rbcodec/codecs/libopus/celt/arm/fixed_armv5e.h4
-rw-r--r--lib/rbcodec/codecs/libopus/celt/arm/mdct_arm.h59
-rw-r--r--lib/rbcodec/codecs/libopus/celt/arm/pitch_arm.h160
-rw-r--r--lib/rbcodec/codecs/libopus/celt/arm/pitch_neon_intr.c290
-rw-r--r--lib/rbcodec/codecs/libopus/celt/bands.c351
-rw-r--r--lib/rbcodec/codecs/libopus/celt/bands.h26
-rw-r--r--lib/rbcodec/codecs/libopus/celt/celt.c44
-rw-r--r--lib/rbcodec/codecs/libopus/celt/celt.h44
-rw-r--r--lib/rbcodec/codecs/libopus/celt/celt_decoder.c263
-rw-r--r--lib/rbcodec/codecs/libopus/celt/celt_encoder.c2607
-rw-r--r--lib/rbcodec/codecs/libopus/celt/celt_lpc.c73
-rw-r--r--lib/rbcodec/codecs/libopus/celt/celt_lpc.h18
-rw-r--r--lib/rbcodec/codecs/libopus/celt/cpu_support.h20
-rw-r--r--lib/rbcodec/codecs/libopus/celt/cwrs.c8
-rw-r--r--lib/rbcodec/codecs/libopus/celt/dump_modes/Makefile32
-rw-r--r--lib/rbcodec/codecs/libopus/celt/dump_modes/dump_modes.c353
-rw-r--r--lib/rbcodec/codecs/libopus/celt/dump_modes/dump_modes_arch.h45
-rw-r--r--lib/rbcodec/codecs/libopus/celt/dump_modes/dump_modes_arm_ne10.c152
-rw-r--r--lib/rbcodec/codecs/libopus/celt/entcode.c2
-rw-r--r--lib/rbcodec/codecs/libopus/celt/entcode.h4
-rw-r--r--lib/rbcodec/codecs/libopus/celt/entdec.h2
-rw-r--r--lib/rbcodec/codecs/libopus/celt/entenc.c2
-rw-r--r--lib/rbcodec/codecs/libopus/celt/entenc.h2
-rw-r--r--lib/rbcodec/codecs/libopus/celt/fixed_c5x.h79
-rw-r--r--lib/rbcodec/codecs/libopus/celt/fixed_c6x.h70
-rw-r--r--lib/rbcodec/codecs/libopus/celt/fixed_debug.h791
-rw-r--r--lib/rbcodec/codecs/libopus/celt/fixed_generic.h27
-rw-r--r--lib/rbcodec/codecs/libopus/celt/float_cast.h16
-rw-r--r--lib/rbcodec/codecs/libopus/celt/kiss_fft.c91
-rw-r--r--lib/rbcodec/codecs/libopus/celt/kiss_fft.h67
-rw-r--r--lib/rbcodec/codecs/libopus/celt/mathops.c7
-rw-r--r--lib/rbcodec/codecs/libopus/celt/mathops.h40
-rw-r--r--lib/rbcodec/codecs/libopus/celt/mdct.c38
-rw-r--r--lib/rbcodec/codecs/libopus/celt/mdct.h56
-rw-r--r--lib/rbcodec/codecs/libopus/celt/mips/celt_mipsr1.h151
-rw-r--r--lib/rbcodec/codecs/libopus/celt/mips/fixed_generic_mipsr1.h126
-rw-r--r--lib/rbcodec/codecs/libopus/celt/mips/kiss_fft_mipsr1.h167
-rw-r--r--lib/rbcodec/codecs/libopus/celt/mips/mdct_mipsr1.h288
-rw-r--r--lib/rbcodec/codecs/libopus/celt/mips/pitch_mipsr1.h161
-rw-r--r--lib/rbcodec/codecs/libopus/celt/mips/vq_mipsr1.h122
-rw-r--r--lib/rbcodec/codecs/libopus/celt/modes.c10
-rw-r--r--lib/rbcodec/codecs/libopus/celt/opus_custom_demo.c210
-rw-r--r--lib/rbcodec/codecs/libopus/celt/os_support.h6
-rw-r--r--lib/rbcodec/codecs/libopus/celt/pitch.c138
-rw-r--r--lib/rbcodec/codecs/libopus/celt/pitch.h65
-rw-r--r--lib/rbcodec/codecs/libopus/celt/quant_bands.c13
-rw-r--r--lib/rbcodec/codecs/libopus/celt/rate.c17
-rw-r--r--lib/rbcodec/codecs/libopus/celt/rate.h4
-rw-r--r--lib/rbcodec/codecs/libopus/celt/static_modes_fixed.h65
-rw-r--r--lib/rbcodec/codecs/libopus/celt/static_modes_fixed_arm_ne10.h388
-rw-r--r--lib/rbcodec/codecs/libopus/celt/static_modes_float.h888
-rw-r--r--lib/rbcodec/codecs/libopus/celt/static_modes_float_arm_ne10.h404
-rw-r--r--lib/rbcodec/codecs/libopus/celt/tests/test_unit_cwrs32.c161
-rw-r--r--lib/rbcodec/codecs/libopus/celt/tests/test_unit_dft.c179
-rw-r--r--lib/rbcodec/codecs/libopus/celt/tests/test_unit_entropy.c383
-rw-r--r--lib/rbcodec/codecs/libopus/celt/tests/test_unit_laplace.c93
-rw-r--r--lib/rbcodec/codecs/libopus/celt/tests/test_unit_mathops.c266
-rw-r--r--lib/rbcodec/codecs/libopus/celt/tests/test_unit_mdct.c227
-rw-r--r--lib/rbcodec/codecs/libopus/celt/tests/test_unit_rotation.c86
-rw-r--r--lib/rbcodec/codecs/libopus/celt/tests/test_unit_types.c50
-rw-r--r--lib/rbcodec/codecs/libopus/celt/vq.c130
-rw-r--r--lib/rbcodec/codecs/libopus/celt/vq.h29
-rw-r--r--lib/rbcodec/codecs/libopus/celt/x86/celt_lpc_sse.h66
-rw-r--r--lib/rbcodec/codecs/libopus/celt/x86/celt_lpc_sse4_1.c89
-rw-r--r--lib/rbcodec/codecs/libopus/celt/x86/pitch_sse.c185
-rw-r--r--lib/rbcodec/codecs/libopus/celt/x86/pitch_sse.h192
-rw-r--r--lib/rbcodec/codecs/libopus/celt/x86/pitch_sse2.c95
-rw-r--r--lib/rbcodec/codecs/libopus/celt/x86/pitch_sse4_1.c195
-rw-r--r--lib/rbcodec/codecs/libopus/celt/x86/vq_sse.h50
-rw-r--r--lib/rbcodec/codecs/libopus/celt/x86/vq_sse2.c217
-rw-r--r--lib/rbcodec/codecs/libopus/celt/x86/x86_celt_map.c167
-rw-r--r--lib/rbcodec/codecs/libopus/celt/x86/x86cpu.c157
-rw-r--r--lib/rbcodec/codecs/libopus/celt/x86/x86cpu.h95
87 files changed, 13825 insertions, 557 deletions
diff --git a/lib/rbcodec/codecs/libopus/celt/_kiss_fft_guts.h b/lib/rbcodec/codecs/libopus/celt/_kiss_fft_guts.h
index 8ddb9adc96..17392b3e90 100644
--- a/lib/rbcodec/codecs/libopus/celt/_kiss_fft_guts.h
+++ b/lib/rbcodec/codecs/libopus/celt/_kiss_fft_guts.h
@@ -58,12 +58,12 @@
58# define S_MUL(a,b) MULT16_32_Q15(b, a) 58# define S_MUL(a,b) MULT16_32_Q15(b, a)
59 59
60# define C_MUL(m,a,b) \ 60# define C_MUL(m,a,b) \
61 do{ (m).r = SUB32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \ 61 do{ (m).r = SUB32_ovflw(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
62 (m).i = ADD32(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)); }while(0) 62 (m).i = ADD32_ovflw(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)); }while(0)
63 63
64# define C_MULC(m,a,b) \ 64# define C_MULC(m,a,b) \
65 do{ (m).r = ADD32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \ 65 do{ (m).r = ADD32_ovflw(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
66 (m).i = SUB32(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0) 66 (m).i = SUB32_ovflw(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0)
67 67
68# define C_MULBYSCALAR( c, s ) \ 68# define C_MULBYSCALAR( c, s ) \
69 do{ (c).r = S_MUL( (c).r , s ) ;\ 69 do{ (c).r = S_MUL( (c).r , s ) ;\
@@ -77,17 +77,17 @@
77 DIVSCALAR( (c).i , div); }while (0) 77 DIVSCALAR( (c).i , div); }while (0)
78 78
79#define C_ADD( res, a,b)\ 79#define C_ADD( res, a,b)\
80 do {(res).r=ADD32((a).r,(b).r); (res).i=ADD32((a).i,(b).i); \ 80 do {(res).r=ADD32_ovflw((a).r,(b).r); (res).i=ADD32_ovflw((a).i,(b).i); \
81 }while(0) 81 }while(0)
82#define C_SUB( res, a,b)\ 82#define C_SUB( res, a,b)\
83 do {(res).r=SUB32((a).r,(b).r); (res).i=SUB32((a).i,(b).i); \ 83 do {(res).r=SUB32_ovflw((a).r,(b).r); (res).i=SUB32_ovflw((a).i,(b).i); \
84 }while(0) 84 }while(0)
85#define C_ADDTO( res , a)\ 85#define C_ADDTO( res , a)\
86 do {(res).r = ADD32((res).r, (a).r); (res).i = ADD32((res).i,(a).i);\ 86 do {(res).r = ADD32_ovflw((res).r, (a).r); (res).i = ADD32_ovflw((res).i,(a).i);\
87 }while(0) 87 }while(0)
88 88
89#define C_SUBFROM( res , a)\ 89#define C_SUBFROM( res , a)\
90 do {(res).r = ADD32((res).r,(a).r); (res).i = SUB32((res).i,(a).i); \ 90 do {(res).r = ADD32_ovflw((res).r,(a).r); (res).i = SUB32_ovflw((res).i,(a).i); \
91 }while(0) 91 }while(0)
92 92
93#if defined(OPUS_ARM_INLINE_ASM) 93#if defined(OPUS_ARM_INLINE_ASM)
@@ -97,9 +97,8 @@
97#if defined(OPUS_ARM_INLINE_EDSP) 97#if defined(OPUS_ARM_INLINE_EDSP)
98#include "arm/kiss_fft_armv5e.h" 98#include "arm/kiss_fft_armv5e.h"
99#endif 99#endif
100 100#if defined(MIPSr1_ASM)
101#if defined(OPUS_CF_INLINE_ASM) 101#include "mips/kiss_fft_mipsr1.h"
102#include "cf/kiss_fft_cf.h"
103#endif 102#endif
104 103
105#else /* not FIXED_POINT*/ 104#else /* not FIXED_POINT*/
diff --git a/lib/rbcodec/codecs/libopus/celt/arch.h b/lib/rbcodec/codecs/libopus/celt/arch.h
index 035b92ff29..08b07db598 100644
--- a/lib/rbcodec/codecs/libopus/celt/arch.h
+++ b/lib/rbcodec/codecs/libopus/celt/arch.h
@@ -46,30 +46,54 @@
46# endif 46# endif
47# endif 47# endif
48 48
49#if OPUS_GNUC_PREREQ(3, 0)
50#define opus_likely(x) (__builtin_expect(!!(x), 1))
51#define opus_unlikely(x) (__builtin_expect(!!(x), 0))
52#else
53#define opus_likely(x) (!!(x))
54#define opus_unlikely(x) (!!(x))
55#endif
56
49#define CELT_SIG_SCALE 32768.f 57#define CELT_SIG_SCALE 32768.f
50 58
51#define celt_fatal(str) _celt_fatal(str, __FILE__, __LINE__); 59#define CELT_FATAL(str) celt_fatal(str, __FILE__, __LINE__);
52#ifdef ENABLE_ASSERTIONS 60
61#if defined(ENABLE_ASSERTIONS) || defined(ENABLE_HARDENING)
62#ifdef __GNUC__
63__attribute__((noreturn))
64#endif
65void celt_fatal(const char *str, const char *file, int line);
66
67#if defined(CELT_C) && !defined(OVERRIDE_celt_fatal)
53#include <stdio.h> 68#include <stdio.h>
54#include <stdlib.h> 69#include <stdlib.h>
55#ifdef __GNUC__ 70#ifdef __GNUC__
56__attribute__((noreturn)) 71__attribute__((noreturn))
57#endif 72#endif
58static OPUS_INLINE void _celt_fatal(const char *str, const char *file, int line) 73void celt_fatal(const char *str, const char *file, int line)
59{ 74{
60 fprintf (stderr, "Fatal (internal) error in %s, line %d: %s\n", file, line, str); 75 fprintf (stderr, "Fatal (internal) error in %s, line %d: %s\n", file, line, str);
61 abort(); 76 abort();
62} 77}
63#define celt_assert(cond) {if (!(cond)) {celt_fatal("assertion failed: " #cond);}} 78#endif
64#define celt_assert2(cond, message) {if (!(cond)) {celt_fatal("assertion failed: " #cond "\n" message);}} 79
80#define celt_assert(cond) {if (!(cond)) {CELT_FATAL("assertion failed: " #cond);}}
81#define celt_assert2(cond, message) {if (!(cond)) {CELT_FATAL("assertion failed: " #cond "\n" message);}}
82#define MUST_SUCCEED(call) celt_assert((call) == OPUS_OK)
65#else 83#else
66#define celt_assert(cond) 84#define celt_assert(cond)
67#define celt_assert2(cond, message) 85#define celt_assert2(cond, message)
86#define MUST_SUCCEED(call) do {if((call) != OPUS_OK) {RESTORE_STACK; return OPUS_INTERNAL_ERROR;} } while (0)
87#endif
88
89#if defined(ENABLE_ASSERTIONS)
90#define celt_sig_assert(cond) {if (!(cond)) {CELT_FATAL("signal assertion failed: " #cond);}}
91#else
92#define celt_sig_assert(cond)
68#endif 93#endif
69 94
70#define IMUL32(a,b) ((a)*(b)) 95#define IMUL32(a,b) ((a)*(b))
71 96
72#define ABS(x) ((x) < 0 ? (-(x)) : (x))
73#define MIN16(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum 16-bit value. */ 97#define MIN16(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum 16-bit value. */
74#define MAX16(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 16-bit value. */ 98#define MAX16(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 16-bit value. */
75#define MIN32(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum 32-bit value. */ 99#define MIN32(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum 32-bit value. */
@@ -79,20 +103,35 @@ static OPUS_INLINE void _celt_fatal(const char *str, const char *file, int line)
79#define UADD32(a,b) ((a)+(b)) 103#define UADD32(a,b) ((a)+(b))
80#define USUB32(a,b) ((a)-(b)) 104#define USUB32(a,b) ((a)-(b))
81 105
106/* Set this if opus_int64 is a native type of the CPU. */
107/* Assume that all LP64 architectures have fast 64-bit types; also x86_64
108 (which can be ILP32 for x32) and Win64 (which is LLP64). */
109#if defined(__x86_64__) || defined(__LP64__) || defined(_WIN64)
110#define OPUS_FAST_INT64 1
111#else
112#define OPUS_FAST_INT64 0
113#endif
114
82#define PRINT_MIPS(file) 115#define PRINT_MIPS(file)
83 116
84#ifdef FIXED_POINT 117#ifdef FIXED_POINT
85 118
86typedef opus_int16 opus_val16; 119typedef opus_int16 opus_val16;
87typedef opus_int32 opus_val32; 120typedef opus_int32 opus_val32;
121typedef opus_int64 opus_val64;
88 122
89typedef opus_val32 celt_sig; 123typedef opus_val32 celt_sig;
90typedef opus_val16 celt_norm; 124typedef opus_val16 celt_norm;
91typedef opus_val32 celt_ener; 125typedef opus_val32 celt_ener;
92 126
127#define celt_isnan(x) 0
128
93#define Q15ONE 32767 129#define Q15ONE 32767
94 130
95#define SIG_SHIFT 12 131#define SIG_SHIFT 12
132/* Safe saturation value for 32-bit signals. Should be less than
133 2^31*(1-0.85) to avoid blowing up on DC at deemphasis.*/
134#define SIG_SAT (300000000)
96 135
97#define NORM_SCALING 16384 136#define NORM_SCALING 16384
98 137
@@ -119,7 +158,9 @@ static OPUS_INLINE opus_int16 SAT16(opus_int32 x) {
119 158
120#include "fixed_generic.h" 159#include "fixed_generic.h"
121 160
122#ifdef OPUS_ARM_INLINE_EDSP 161#ifdef OPUS_ARM_PRESUME_AARCH64_NEON_INTR
162#include "arm/fixed_arm64.h"
163#elif defined (OPUS_ARM_INLINE_EDSP)
123#include "arm/fixed_armv5e.h" 164#include "arm/fixed_armv5e.h"
124#elif defined (OPUS_ARM_INLINE_ASM) 165#elif defined (OPUS_ARM_INLINE_ASM)
125#include "arm/fixed_armv4.h" 166#include "arm/fixed_armv4.h"
@@ -129,8 +170,6 @@ static OPUS_INLINE opus_int16 SAT16(opus_int32 x) {
129#include "fixed_c5x.h" 170#include "fixed_c5x.h"
130#elif defined (TI_C6X_ASM) 171#elif defined (TI_C6X_ASM)
131#include "fixed_c6x.h" 172#include "fixed_c6x.h"
132#elif defined (OPUS_CF_INLINE_ASM)
133#include "cf/fixed_cf.h"
134#endif 173#endif
135 174
136#endif 175#endif
@@ -139,6 +178,7 @@ static OPUS_INLINE opus_int16 SAT16(opus_int32 x) {
139 178
140typedef float opus_val16; 179typedef float opus_val16;
141typedef float opus_val32; 180typedef float opus_val32;
181typedef float opus_val64;
142 182
143typedef float celt_sig; 183typedef float celt_sig;
144typedef float celt_norm; 184typedef float celt_norm;
@@ -178,6 +218,7 @@ static OPUS_INLINE int celt_isnan(float x)
178 218
179#define NEG16(x) (-(x)) 219#define NEG16(x) (-(x))
180#define NEG32(x) (-(x)) 220#define NEG32(x) (-(x))
221#define NEG32_ovflw(x) (-(x))
181#define EXTRACT16(x) (x) 222#define EXTRACT16(x) (x)
182#define EXTEND32(x) (x) 223#define EXTEND32(x) (x)
183#define SHR16(a,shift) (a) 224#define SHR16(a,shift) (a)
@@ -194,6 +235,7 @@ static OPUS_INLINE int celt_isnan(float x)
194#define SATURATE16(x) (x) 235#define SATURATE16(x) (x)
195 236
196#define ROUND16(a,shift) (a) 237#define ROUND16(a,shift) (a)
238#define SROUND16(a,shift) (a)
197#define HALF16(x) (.5f*(x)) 239#define HALF16(x) (.5f*(x))
198#define HALF32(x) (.5f*(x)) 240#define HALF32(x) (.5f*(x))
199 241
@@ -201,6 +243,8 @@ static OPUS_INLINE int celt_isnan(float x)
201#define SUB16(a,b) ((a)-(b)) 243#define SUB16(a,b) ((a)-(b))
202#define ADD32(a,b) ((a)+(b)) 244#define ADD32(a,b) ((a)+(b))
203#define SUB32(a,b) ((a)-(b)) 245#define SUB32(a,b) ((a)-(b))
246#define ADD32_ovflw(a,b) ((a)+(b))
247#define SUB32_ovflw(a,b) ((a)-(b))
204#define MULT16_16_16(a,b) ((a)*(b)) 248#define MULT16_16_16(a,b) ((a)*(b))
205#define MULT16_16(a,b) ((opus_val32)(a)*(opus_val32)(b)) 249#define MULT16_16(a,b) ((opus_val32)(a)*(opus_val32)(b))
206#define MAC16_16(c,a,b) ((c)+(opus_val32)(a)*(opus_val32)(b)) 250#define MAC16_16(c,a,b) ((c)+(opus_val32)(a)*(opus_val32)(b))
@@ -235,9 +279,9 @@ static OPUS_INLINE int celt_isnan(float x)
235 279
236#ifndef GLOBAL_STACK_SIZE 280#ifndef GLOBAL_STACK_SIZE
237#ifdef FIXED_POINT 281#ifdef FIXED_POINT
238#define GLOBAL_STACK_SIZE 100000 282#define GLOBAL_STACK_SIZE 120000
239#else 283#else
240#define GLOBAL_STACK_SIZE 100000 284#define GLOBAL_STACK_SIZE 120000
241#endif 285#endif
242#endif 286#endif
243 287
diff --git a/lib/rbcodec/codecs/libopus/celt/arm/arm2gnu.pl b/lib/rbcodec/codecs/libopus/celt/arm/arm2gnu.pl
new file mode 100755
index 0000000000..a2895f7445
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/arm/arm2gnu.pl
@@ -0,0 +1,353 @@
1#!/usr/bin/perl
2# Copyright (C) 2002-2013 Xiph.org Foundation
3#
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions
6# are met:
7#
8# - Redistributions of source code must retain the above copyright
9# notice, this list of conditions and the following disclaimer.
10#
11# - Redistributions in binary form must reproduce the above copyright
12# notice, this list of conditions and the following disclaimer in the
13# documentation and/or other materials provided with the distribution.
14#
15# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27my $bigend; # little/big endian
28my $nxstack;
29my $apple = 0;
30my $symprefix = "";
31
32$nxstack = 0;
33
34eval 'exec /usr/local/bin/perl -S $0 ${1+"$@"}'
35 if $running_under_some_shell;
36
37while ($ARGV[0] =~ /^-/) {
38 $_ = shift;
39 last if /^--$/;
40 if (/^-n$/) {
41 $nflag++;
42 next;
43 }
44 if (/^--apple$/) {
45 $apple = 1;
46 $symprefix = "_";
47 next;
48 }
49 die "I don't recognize this switch: $_\\n";
50}
51$printit++ unless $nflag;
52
53$\ = "\n"; # automatically add newline on print
54$n=0;
55
56$thumb = 0; # ARM mode by default, not Thumb.
57@proc_stack = ();
58
59printf (" .syntax unified\n");
60
61LINE:
62while (<>) {
63
64 # For ADRLs we need to add a new line after the substituted one.
65 $addPadding = 0;
66
67 # First, we do not dare to touch *anything* inside double quotes, do we?
68 # Second, if you want a dollar character in the string,
69 # insert two of them -- that's how ARM C and assembler treat strings.
70 s/^([A-Za-z_]\w*)[ \t]+DCB[ \t]*\"/$1: .ascii \"/ && do { s/\$\$/\$/g; next };
71 s/\bDCB\b[ \t]*\"/.ascii \"/ && do { s/\$\$/\$/g; next };
72 s/^(\S+)\s+RN\s+(\S+)/$1 .req r$2/ && do { s/\$\$/\$/g; next };
73 # If there's nothing on a line but a comment, don't try to apply any further
74 # substitutions (this is a cheap hack to avoid mucking up the license header)
75 s/^([ \t]*);/$1@/ && do { s/\$\$/\$/g; next };
76 # If substituted -- leave immediately !
77
78 s/@/,:/;
79 s/;/@/;
80 while ( /@.*'/ ) {
81 s/(@.*)'/$1/g;
82 }
83 s/\{FALSE\}/0/g;
84 s/\{TRUE\}/1/g;
85 s/\{(\w\w\w\w+)\}/$1/g;
86 s/\bINCLUDE[ \t]*([^ \t\n]+)/.include \"$1\"/;
87 s/\bGET[ \t]*([^ \t\n]+)/.include \"${ my $x=$1; $x =~ s|\.s|-gnu.S|; \$x }\"/;
88 s/\bIMPORT\b/.extern/;
89 s/\bEXPORT\b\s*/.global $symprefix/;
90 s/^(\s+)\[/$1IF/;
91 s/^(\s+)\|/$1ELSE/;
92 s/^(\s+)\]/$1ENDIF/;
93 s/IF *:DEF:/ .ifdef/;
94 s/IF *:LNOT: *:DEF:/ .ifndef/;
95 s/ELSE/ .else/;
96 s/ENDIF/ .endif/;
97
98 if( /\bIF\b/ ) {
99 s/\bIF\b/ .if/;
100 s/=/==/;
101 }
102 if ( $n == 2) {
103 s/\$/\\/g;
104 }
105 if ($n == 1) {
106 s/\$//g;
107 s/label//g;
108 $n = 2;
109 }
110 if ( /MACRO/ ) {
111 s/MACRO *\n/.macro/;
112 $n=1;
113 }
114 if ( /\bMEND\b/ ) {
115 s/\bMEND\b/.endm/;
116 $n=0;
117 }
118
119 # ".rdata" doesn't work in 'as' version 2.13.2, as it is ".rodata" there.
120 #
121 if ( /\bAREA\b/ ) {
122 my $align;
123 $align = "2";
124 if ( /ALIGN=(\d+)/ ) {
125 $align = $1;
126 }
127 if ( /CODE/ ) {
128 $nxstack = 1;
129 }
130 s/^(.+)CODE(.+)READONLY(.*)/ .text/;
131 s/^(.+)DATA(.+)READONLY(.*)/ .section .rdata/;
132 s/^(.+)\|\|\.data\|\|(.+)/ .data/;
133 s/^(.+)\|\|\.bss\|\|(.+)/ .bss/;
134 s/$/; .p2align $align/;
135 # Enable NEON instructions but don't produce a binary that requires
136 # ARMv7. RVCT does not have equivalent directives, so we just do this
137 # for all CODE areas.
138 if ( /.text/ ) {
139 # Separating .arch, .fpu, etc., by semicolons does not work (gas
140 # thinks the semicolon is part of the arch name, even when there's
141 # whitespace separating them). Sadly this means our line numbers
142 # won't match the original source file (we could use the .line
143 # directive, which is documented to be obsolete, but then gdb will
144 # show the wrong line in the translated source file).
145 s/$/; .arch armv7-a\n .fpu neon\n .object_arch armv4t/ unless ($apple);
146 }
147 }
148
149 s/\|\|\.constdata\$(\d+)\|\|/.L_CONST$1/; # ||.constdata$3||
150 s/\|\|\.bss\$(\d+)\|\|/.L_BSS$1/; # ||.bss$2||
151 s/\|\|\.data\$(\d+)\|\|/.L_DATA$1/; # ||.data$2||
152 s/\|\|([a-zA-Z0-9_]+)\@([a-zA-Z0-9_]+)\|\|/@ $&/;
153 s/^(\s+)\%(\s)/ .space $1/;
154
155 s/\|(.+)\.(\d+)\|/\.$1_$2/; # |L80.123| -> .L80_123
156 s/\bCODE32\b/.code 32/ && do {$thumb = 0};
157 s/\bCODE16\b/.code 16/ && do {$thumb = 1};
158 if (/\bPROC\b/)
159 {
160 my $prefix;
161 my $proc;
162 /^([A-Za-z_\.]\w+)\b/;
163 $proc = $1;
164 $prefix = "";
165 if ($proc)
166 {
167 $prefix = $prefix.sprintf("\t.type\t%s, %%function", $proc) unless ($apple);
168 # Make sure we $prefix isn't empty here (for the $apple case).
169 # We handle mangling the label here, make sure it doesn't match
170 # the label handling below (if $prefix would be empty).
171 $prefix = $prefix."; ";
172 push(@proc_stack, $proc);
173 s/^[A-Za-z_\.]\w+/$symprefix$&:/;
174 }
175 $prefix = $prefix."\t.thumb_func; " if ($thumb);
176 s/\bPROC\b/@ $&/;
177 $_ = $prefix.$_;
178 }
179 s/^(\s*)(S|Q|SH|U|UQ|UH)ASX\b/$1$2ADDSUBX/;
180 s/^(\s*)(S|Q|SH|U|UQ|UH)SAX\b/$1$2SUBADDX/;
181 if (/\bENDP\b/)
182 {
183 my $proc;
184 s/\bENDP\b/@ $&/;
185 $proc = pop(@proc_stack);
186 $_ = "\t.size $proc, .-$proc".$_ if ($proc && !$apple);
187 }
188 s/\bSUBT\b/@ $&/;
189 s/\bDATA\b/@ $&/; # DATA directive is deprecated -- Asm guide, p.7-25
190 s/\bKEEP\b/@ $&/;
191 s/\bEXPORTAS\b/@ $&/;
192 s/\|\|(.)+\bEQU\b/@ $&/;
193 s/\|\|([\w\$]+)\|\|/$1/;
194 s/\bENTRY\b/@ $&/;
195 s/\bASSERT\b/@ $&/;
196 s/\bGBLL\b/@ $&/;
197 s/\bGBLA\b/@ $&/;
198 s/^\W+OPT\b/@ $&/;
199 s/:OR:/|/g;
200 s/:SHL:/<</g;
201 s/:SHR:/>>/g;
202 s/:AND:/&/g;
203 s/:LAND:/&&/g;
204 s/CPSR/cpsr/;
205 s/SPSR/spsr/;
206 s/ALIGN$/.balign 4/;
207 s/ALIGN\s+([0-9x]+)$/.balign $1/;
208 s/psr_cxsf/psr_all/;
209 s/LTORG/.ltorg/;
210 s/^([A-Za-z_]\w*)[ \t]+EQU/ .set $1,/;
211 s/^([A-Za-z_]\w*)[ \t]+SETL/ .set $1,/;
212 s/^([A-Za-z_]\w*)[ \t]+SETA/ .set $1,/;
213 s/^([A-Za-z_]\w*)[ \t]+\*/ .set $1,/;
214
215 # {PC} + 0xdeadfeed --> . + 0xdeadfeed
216 s/\{PC\} \+/ \. +/;
217
218 # Single hex constant on the line !
219 #
220 # >>> NOTE <<<
221 # Double-precision floats in gcc are always mixed-endian, which means
222 # bytes in two words are little-endian, but words are big-endian.
223 # So, 0x0000deadfeed0000 would be stored as 0x0000dead at low address
224 # and 0xfeed0000 at high address.
225 #
226 s/\bDCFD\b[ \t]+0x([a-fA-F0-9]{8})([a-fA-F0-9]{8})/.long 0x$1, 0x$2/;
227 # Only decimal constants on the line, no hex !
228 s/\bDCFD\b[ \t]+([0-9\.\-]+)/.double $1/;
229
230 # Single hex constant on the line !
231# s/\bDCFS\b[ \t]+0x([a-f0-9]{8})([a-f0-9]{8})/.long 0x$1, 0x$2/;
232 # Only decimal constants on the line, no hex !
233# s/\bDCFS\b[ \t]+([0-9\.\-]+)/.double $1/;
234 s/\bDCFS[ \t]+0x/.word 0x/;
235 s/\bDCFS\b/.float/;
236
237 s/^([A-Za-z_]\w*)[ \t]+DCD/$1 .word/;
238 s/\bDCD\b/.word/;
239 s/^([A-Za-z_]\w*)[ \t]+DCW/$1 .short/;
240 s/\bDCW\b/.short/;
241 s/^([A-Za-z_]\w*)[ \t]+DCB/$1 .byte/;
242 s/\bDCB\b/.byte/;
243 s/^([A-Za-z_]\w*)[ \t]+\%/.comm $1,/;
244 s/^[A-Za-z_\.]\w+/$&:/;
245 s/^(\d+)/$1:/;
246 s/\%(\d+)/$1b_or_f/;
247 s/\%[Bb](\d+)/$1b/;
248 s/\%[Ff](\d+)/$1f/;
249 s/\%[Ff][Tt](\d+)/$1f/;
250 s/&([\dA-Fa-f]+)/0x$1/;
251 if ( /\b2_[01]+\b/ ) {
252 s/\b2_([01]+)\b/conv$1&&&&/g;
253 while ( /[01][01][01][01]&&&&/ ) {
254 s/0000&&&&/&&&&0/g;
255 s/0001&&&&/&&&&1/g;
256 s/0010&&&&/&&&&2/g;
257 s/0011&&&&/&&&&3/g;
258 s/0100&&&&/&&&&4/g;
259 s/0101&&&&/&&&&5/g;
260 s/0110&&&&/&&&&6/g;
261 s/0111&&&&/&&&&7/g;
262 s/1000&&&&/&&&&8/g;
263 s/1001&&&&/&&&&9/g;
264 s/1010&&&&/&&&&A/g;
265 s/1011&&&&/&&&&B/g;
266 s/1100&&&&/&&&&C/g;
267 s/1101&&&&/&&&&D/g;
268 s/1110&&&&/&&&&E/g;
269 s/1111&&&&/&&&&F/g;
270 }
271 s/000&&&&/&&&&0/g;
272 s/001&&&&/&&&&1/g;
273 s/010&&&&/&&&&2/g;
274 s/011&&&&/&&&&3/g;
275 s/100&&&&/&&&&4/g;
276 s/101&&&&/&&&&5/g;
277 s/110&&&&/&&&&6/g;
278 s/111&&&&/&&&&7/g;
279 s/00&&&&/&&&&0/g;
280 s/01&&&&/&&&&1/g;
281 s/10&&&&/&&&&2/g;
282 s/11&&&&/&&&&3/g;
283 s/0&&&&/&&&&0/g;
284 s/1&&&&/&&&&1/g;
285 s/conv&&&&/0x/g;
286 }
287
288 if ( /commandline/)
289 {
290 if( /-bigend/)
291 {
292 $bigend=1;
293 }
294 }
295
296 if ( /\bDCDU\b/ )
297 {
298 my $cmd=$_;
299 my $value;
300 my $prefix;
301 my $w1;
302 my $w2;
303 my $w3;
304 my $w4;
305
306 s/\s+DCDU\b/@ $&/;
307
308 $cmd =~ /\bDCDU\b\s+0x(\d+)/;
309 $value = $1;
310 $value =~ /(\w\w)(\w\w)(\w\w)(\w\w)/;
311 $w1 = $1;
312 $w2 = $2;
313 $w3 = $3;
314 $w4 = $4;
315
316 if( $bigend ne "")
317 {
318 # big endian
319 $prefix = "\t.byte\t0x".$w1.";".
320 "\t.byte\t0x".$w2.";".
321 "\t.byte\t0x".$w3.";".
322 "\t.byte\t0x".$w4."; ";
323 }
324 else
325 {
326 # little endian
327 $prefix = "\t.byte\t0x".$w4.";".
328 "\t.byte\t0x".$w3.";".
329 "\t.byte\t0x".$w2.";".
330 "\t.byte\t0x".$w1."; ";
331 }
332 $_=$prefix.$_;
333 }
334
335 if ( /\badrl\b/i )
336 {
337 s/\badrl\s+(\w+)\s*,\s*(\w+)/ldr $1,=$2/i;
338 $addPadding = 1;
339 }
340 s/\bEND\b/@ END/;
341} continue {
342 printf ("%s", $_) if $printit;
343 if ($addPadding != 0)
344 {
345 printf (" mov r0,r0\n");
346 $addPadding = 0;
347 }
348}
349#If we had a code section, mark that this object doesn't need an executable
350# stack.
351if ($nxstack && !$apple) {
352 printf (" .section\t.note.GNU-stack,\"\",\%\%progbits\n");
353}
diff --git a/lib/rbcodec/codecs/libopus/celt/arm/arm_celt_map.c b/lib/rbcodec/codecs/libopus/celt/arm/arm_celt_map.c
new file mode 100644
index 0000000000..ca988b66f5
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/arm/arm_celt_map.c
@@ -0,0 +1,160 @@
1/* Copyright (c) 2010 Xiph.Org Foundation
2 * Copyright (c) 2013 Parrot */
3/*
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7
8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10
11 - Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*/
27
28#ifdef HAVE_CONFIG_H
29#include "config.h"
30#endif
31
32#include "pitch.h"
33#include "kiss_fft.h"
34#include "mdct.h"
35
36#if defined(OPUS_HAVE_RTCD)
37
38# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)
39opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x, const opus_val16 *y, int N) = {
40 celt_inner_prod_c, /* ARMv4 */
41 celt_inner_prod_c, /* EDSP */
42 celt_inner_prod_c, /* Media */
43 celt_inner_prod_neon /* NEON */
44};
45
46void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
47 int N, opus_val32 *xy1, opus_val32 *xy2) = {
48 dual_inner_prod_c, /* ARMv4 */
49 dual_inner_prod_c, /* EDSP */
50 dual_inner_prod_c, /* Media */
51 dual_inner_prod_neon /* NEON */
52};
53# endif
54
55# if defined(FIXED_POINT)
56# if ((defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_PRESUME_NEON)) || \
57 (defined(OPUS_ARM_MAY_HAVE_MEDIA) && !defined(OPUS_ARM_PRESUME_MEDIA)) || \
58 (defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP)))
59opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
60 const opus_val16 *, opus_val32 *, int, int, int) = {
61 celt_pitch_xcorr_c, /* ARMv4 */
62 MAY_HAVE_EDSP(celt_pitch_xcorr), /* EDSP */
63 MAY_HAVE_MEDIA(celt_pitch_xcorr), /* Media */
64 MAY_HAVE_NEON(celt_pitch_xcorr) /* NEON */
65};
66
67# endif
68# else /* !FIXED_POINT */
69# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)
70void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
71 const opus_val16 *, opus_val32 *, int, int, int) = {
72 celt_pitch_xcorr_c, /* ARMv4 */
73 celt_pitch_xcorr_c, /* EDSP */
74 celt_pitch_xcorr_c, /* Media */
75 celt_pitch_xcorr_float_neon /* Neon */
76};
77# endif
78# endif /* FIXED_POINT */
79
80#if defined(FIXED_POINT) && defined(OPUS_HAVE_RTCD) && \
81 defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)
82
83void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
84 const opus_val16 *x,
85 const opus_val16 *y,
86 opus_val32 sum[4],
87 int len
88) = {
89 xcorr_kernel_c, /* ARMv4 */
90 xcorr_kernel_c, /* EDSP */
91 xcorr_kernel_c, /* Media */
92 xcorr_kernel_neon_fixed, /* Neon */
93};
94
95#endif
96
97# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
98# if defined(HAVE_ARM_NE10)
99# if defined(CUSTOM_MODES)
100int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = {
101 opus_fft_alloc_arch_c, /* ARMv4 */
102 opus_fft_alloc_arch_c, /* EDSP */
103 opus_fft_alloc_arch_c, /* Media */
104 opus_fft_alloc_arm_neon /* Neon with NE10 library support */
105};
106
107void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = {
108 opus_fft_free_arch_c, /* ARMv4 */
109 opus_fft_free_arch_c, /* EDSP */
110 opus_fft_free_arch_c, /* Media */
111 opus_fft_free_arm_neon /* Neon with NE10 */
112};
113# endif /* CUSTOM_MODES */
114
115void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
116 const kiss_fft_cpx *fin,
117 kiss_fft_cpx *fout) = {
118 opus_fft_c, /* ARMv4 */
119 opus_fft_c, /* EDSP */
120 opus_fft_c, /* Media */
121 opus_fft_neon /* Neon with NE10 */
122};
123
124void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
125 const kiss_fft_cpx *fin,
126 kiss_fft_cpx *fout) = {
127 opus_ifft_c, /* ARMv4 */
128 opus_ifft_c, /* EDSP */
129 opus_ifft_c, /* Media */
130 opus_ifft_neon /* Neon with NE10 */
131};
132
133void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
134 kiss_fft_scalar *in,
135 kiss_fft_scalar * OPUS_RESTRICT out,
136 const opus_val16 *window,
137 int overlap, int shift,
138 int stride, int arch) = {
139 clt_mdct_forward_c, /* ARMv4 */
140 clt_mdct_forward_c, /* EDSP */
141 clt_mdct_forward_c, /* Media */
142 clt_mdct_forward_neon /* Neon with NE10 */
143};
144
145void (*const CLT_MDCT_BACKWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
146 kiss_fft_scalar *in,
147 kiss_fft_scalar * OPUS_RESTRICT out,
148 const opus_val16 *window,
149 int overlap, int shift,
150 int stride, int arch) = {
151 clt_mdct_backward_c, /* ARMv4 */
152 clt_mdct_backward_c, /* EDSP */
153 clt_mdct_backward_c, /* Media */
154 clt_mdct_backward_neon /* Neon with NE10 */
155};
156
157# endif /* HAVE_ARM_NE10 */
158# endif /* OPUS_ARM_MAY_HAVE_NEON_INTR */
159
160#endif /* OPUS_HAVE_RTCD */
diff --git a/lib/rbcodec/codecs/libopus/celt/arm/armcpu.c b/lib/rbcodec/codecs/libopus/celt/arm/armcpu.c
new file mode 100644
index 0000000000..694a63b78e
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/arm/armcpu.c
@@ -0,0 +1,185 @@
1/* Copyright (c) 2010 Xiph.Org Foundation
2 * Copyright (c) 2013 Parrot */
3/*
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7
8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10
11 - Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*/
27
28/* Original code from libtheora modified to suit to Opus */
29
30#ifdef HAVE_CONFIG_H
31#include "config.h"
32#endif
33
34#ifdef OPUS_HAVE_RTCD
35
36#include "armcpu.h"
37#include "cpu_support.h"
38#include "os_support.h"
39#include "opus_types.h"
40#include "arch.h"
41
42#define OPUS_CPU_ARM_V4_FLAG (1<<OPUS_ARCH_ARM_V4)
43#define OPUS_CPU_ARM_EDSP_FLAG (1<<OPUS_ARCH_ARM_EDSP)
44#define OPUS_CPU_ARM_MEDIA_FLAG (1<<OPUS_ARCH_ARM_MEDIA)
45#define OPUS_CPU_ARM_NEON_FLAG (1<<OPUS_ARCH_ARM_NEON)
46
47#if defined(_MSC_VER)
48/*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/
49# define WIN32_LEAN_AND_MEAN
50# define WIN32_EXTRA_LEAN
51# include <windows.h>
52
53static OPUS_INLINE opus_uint32 opus_cpu_capabilities(void){
54 opus_uint32 flags;
55 flags=0;
56 /* MSVC has no OPUS_INLINE __asm support for ARM, but it does let you __emit
57 * instructions via their assembled hex code.
58 * All of these instructions should be essentially nops. */
59# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_MEDIA) \
60 || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
61 __try{
62 /*PLD [r13]*/
63 __emit(0xF5DDF000);
64 flags|=OPUS_CPU_ARM_EDSP_FLAG;
65 }
66 __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
67 /*Ignore exception.*/
68 }
69# if defined(OPUS_ARM_MAY_HAVE_MEDIA) \
70 || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
71 __try{
72 /*SHADD8 r3,r3,r3*/
73 __emit(0xE6333F93);
74 flags|=OPUS_CPU_ARM_MEDIA_FLAG;
75 }
76 __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
77 /*Ignore exception.*/
78 }
79# if defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
80 __try{
81 /*VORR q0,q0,q0*/
82 __emit(0xF2200150);
83 flags|=OPUS_CPU_ARM_NEON_FLAG;
84 }
85 __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
86 /*Ignore exception.*/
87 }
88# endif
89# endif
90# endif
91 return flags;
92}
93
94#elif defined(__linux__)
95/* Linux based */
96opus_uint32 opus_cpu_capabilities(void)
97{
98 opus_uint32 flags = 0;
99 FILE *cpuinfo;
100
101 /* Reading /proc/self/auxv would be easier, but that doesn't work reliably on
102 * Android */
103 cpuinfo = fopen("/proc/cpuinfo", "r");
104
105 if(cpuinfo != NULL)
106 {
107 /* 512 should be enough for anybody (it's even enough for all the flags that
108 * x86 has accumulated... so far). */
109 char buf[512];
110
111 while(fgets(buf, 512, cpuinfo) != NULL)
112 {
113# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_MEDIA) \
114 || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
115 /* Search for edsp and neon flag */
116 if(memcmp(buf, "Features", 8) == 0)
117 {
118 char *p;
119 p = strstr(buf, " edsp");
120 if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
121 flags |= OPUS_CPU_ARM_EDSP_FLAG;
122
123# if defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
124 p = strstr(buf, " neon");
125 if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
126 flags |= OPUS_CPU_ARM_NEON_FLAG;
127# endif
128 }
129# endif
130
131# if defined(OPUS_ARM_MAY_HAVE_MEDIA) \
132 || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
133 /* Search for media capabilities (>= ARMv6) */
134 if(memcmp(buf, "CPU architecture:", 17) == 0)
135 {
136 int version;
137 version = atoi(buf+17);
138
139 if(version >= 6)
140 flags |= OPUS_CPU_ARM_MEDIA_FLAG;
141 }
142# endif
143 }
144
145 fclose(cpuinfo);
146 }
147 return flags;
148}
149#else
150/* The feature registers which can tell us what the processor supports are
151 * accessible in priveleged modes only, so we can't have a general user-space
152 * detection method like on x86.*/
153# error "Configured to use ARM asm but no CPU detection method available for " \
154 "your platform. Reconfigure with --disable-rtcd (or send patches)."
155#endif
156
157int opus_select_arch(void)
158{
159 opus_uint32 flags = opus_cpu_capabilities();
160 int arch = 0;
161
162 if(!(flags & OPUS_CPU_ARM_EDSP_FLAG)) {
163 /* Asserts ensure arch values are sequential */
164 celt_assert(arch == OPUS_ARCH_ARM_V4);
165 return arch;
166 }
167 arch++;
168
169 if(!(flags & OPUS_CPU_ARM_MEDIA_FLAG)) {
170 celt_assert(arch == OPUS_ARCH_ARM_EDSP);
171 return arch;
172 }
173 arch++;
174
175 if(!(flags & OPUS_CPU_ARM_NEON_FLAG)) {
176 celt_assert(arch == OPUS_ARCH_ARM_MEDIA);
177 return arch;
178 }
179 arch++;
180
181 celt_assert(arch == OPUS_ARCH_ARM_NEON);
182 return arch;
183}
184
185#endif
diff --git a/lib/rbcodec/codecs/libopus/celt/arm/armcpu.h b/lib/rbcodec/codecs/libopus/celt/arm/armcpu.h
new file mode 100644
index 0000000000..820262ff5f
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/arm/armcpu.h
@@ -0,0 +1,77 @@
1/* Copyright (c) 2010 Xiph.Org Foundation
2 * Copyright (c) 2013 Parrot */
3/*
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7
8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10
11 - Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*/
27
28#if !defined(ARMCPU_H)
29# define ARMCPU_H
30
31# if defined(OPUS_ARM_MAY_HAVE_EDSP)
32# define MAY_HAVE_EDSP(name) name ## _edsp
33# else
34# define MAY_HAVE_EDSP(name) name ## _c
35# endif
36
37# if defined(OPUS_ARM_MAY_HAVE_MEDIA)
38# define MAY_HAVE_MEDIA(name) name ## _media
39# else
40# define MAY_HAVE_MEDIA(name) MAY_HAVE_EDSP(name)
41# endif
42
43# if defined(OPUS_ARM_MAY_HAVE_NEON)
44# define MAY_HAVE_NEON(name) name ## _neon
45# else
46# define MAY_HAVE_NEON(name) MAY_HAVE_MEDIA(name)
47# endif
48
49# if defined(OPUS_ARM_PRESUME_EDSP)
50# define PRESUME_EDSP(name) name ## _edsp
51# else
52# define PRESUME_EDSP(name) name ## _c
53# endif
54
55# if defined(OPUS_ARM_PRESUME_MEDIA)
56# define PRESUME_MEDIA(name) name ## _media
57# else
58# define PRESUME_MEDIA(name) PRESUME_EDSP(name)
59# endif
60
61# if defined(OPUS_ARM_PRESUME_NEON)
62# define PRESUME_NEON(name) name ## _neon
63# else
64# define PRESUME_NEON(name) PRESUME_MEDIA(name)
65# endif
66
67# if defined(OPUS_HAVE_RTCD)
68int opus_select_arch(void);
69
70#define OPUS_ARCH_ARM_V4 (0)
71#define OPUS_ARCH_ARM_EDSP (1)
72#define OPUS_ARCH_ARM_MEDIA (2)
73#define OPUS_ARCH_ARM_NEON (3)
74
75# endif
76
77#endif
diff --git a/lib/rbcodec/codecs/libopus/celt/arm/armopts.s.in b/lib/rbcodec/codecs/libopus/celt/arm/armopts.s.in
new file mode 100644
index 0000000000..3d8aaf2754
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/arm/armopts.s.in
@@ -0,0 +1,37 @@
1/* Copyright (C) 2013 Mozilla Corporation */
2/*
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
5 are met:
6
7 - Redistributions of source code must retain the above copyright
8 notice, this list of conditions and the following disclaimer.
9
10 - Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
18 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
22 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25*/
26
27; Set the following to 1 if we have EDSP instructions
28; (LDRD/STRD, etc., ARMv5E and later).
29OPUS_ARM_MAY_HAVE_EDSP * @OPUS_ARM_MAY_HAVE_EDSP@
30
31; Set the following to 1 if we have ARMv6 media instructions.
32OPUS_ARM_MAY_HAVE_MEDIA * @OPUS_ARM_MAY_HAVE_MEDIA@
33
34; Set the following to 1 if we have NEON (some ARMv7)
35OPUS_ARM_MAY_HAVE_NEON * @OPUS_ARM_MAY_HAVE_NEON@
36
37END
diff --git a/lib/rbcodec/codecs/libopus/celt/arm/celt_fft_ne10.c b/lib/rbcodec/codecs/libopus/celt/arm/celt_fft_ne10.c
new file mode 100644
index 0000000000..ea5fd7808b
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/arm/celt_fft_ne10.c
@@ -0,0 +1,173 @@
1/* Copyright (c) 2015 Xiph.Org Foundation
2 Written by Viswanath Puttagunta */
3/**
4 @file celt_fft_ne10.c
5 @brief ARM Neon optimizations for fft using NE10 library
6 */
7
8/*
9 Redistribution and use in source and binary forms, with or without
10 modification, are permitted provided that the following conditions
11 are met:
12
13 - Redistributions of source code must retain the above copyright
14 notice, this list of conditions and the following disclaimer.
15
16 - Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
24 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
25 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31*/
32
33#ifndef SKIP_CONFIG_H
34#ifdef HAVE_CONFIG_H
35#include "config.h"
36#endif
37#endif
38
39#include <NE10_dsp.h>
40#include "os_support.h"
41#include "kiss_fft.h"
42#include "stack_alloc.h"
43
44#if !defined(FIXED_POINT)
45# define NE10_FFT_ALLOC_C2C_TYPE_NEON ne10_fft_alloc_c2c_float32_neon
46# define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_float32_t
47# define NE10_FFT_STATE_TYPE_T ne10_fft_state_float32_t
48# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_float32
49# define NE10_FFT_CPX_TYPE_T ne10_fft_cpx_float32_t
50# define NE10_FFT_C2C_1D_TYPE_NEON ne10_fft_c2c_1d_float32_neon
51#else
52# define NE10_FFT_ALLOC_C2C_TYPE_NEON(nfft) ne10_fft_alloc_c2c_int32_neon(nfft)
53# define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_int32_t
54# define NE10_FFT_STATE_TYPE_T ne10_fft_state_int32_t
55# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_int32
56# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_int32
57# define NE10_FFT_CPX_TYPE_T ne10_fft_cpx_int32_t
58# define NE10_FFT_C2C_1D_TYPE_NEON ne10_fft_c2c_1d_int32_neon
59#endif
60
61#if defined(CUSTOM_MODES)
62
63/* nfft lengths in NE10 that support scaled fft */
64# define NE10_FFTSCALED_SUPPORT_MAX 4
65static const int ne10_fft_scaled_support[NE10_FFTSCALED_SUPPORT_MAX] = {
66 480, 240, 120, 60
67};
68
69int opus_fft_alloc_arm_neon(kiss_fft_state *st)
70{
71 int i;
72 size_t memneeded = sizeof(struct arch_fft_state);
73
74 st->arch_fft = (arch_fft_state *)opus_alloc(memneeded);
75 if (!st->arch_fft)
76 return -1;
77
78 for (i = 0; i < NE10_FFTSCALED_SUPPORT_MAX; i++) {
79 if(st->nfft == ne10_fft_scaled_support[i])
80 break;
81 }
82 if (i == NE10_FFTSCALED_SUPPORT_MAX) {
83 /* This nfft length (scaled fft) is not supported in NE10 */
84 st->arch_fft->is_supported = 0;
85 st->arch_fft->priv = NULL;
86 }
87 else {
88 st->arch_fft->is_supported = 1;
89 st->arch_fft->priv = (void *)NE10_FFT_ALLOC_C2C_TYPE_NEON(st->nfft);
90 if (st->arch_fft->priv == NULL) {
91 return -1;
92 }
93 }
94 return 0;
95}
96
97void opus_fft_free_arm_neon(kiss_fft_state *st)
98{
99 NE10_FFT_CFG_TYPE_T cfg;
100
101 if (!st->arch_fft)
102 return;
103
104 cfg = (NE10_FFT_CFG_TYPE_T)st->arch_fft->priv;
105 if (cfg)
106 NE10_FFT_DESTROY_C2C_TYPE(cfg);
107 opus_free(st->arch_fft);
108}
109#endif
110
111void opus_fft_neon(const kiss_fft_state *st,
112 const kiss_fft_cpx *fin,
113 kiss_fft_cpx *fout)
114{
115 NE10_FFT_STATE_TYPE_T state;
116 NE10_FFT_CFG_TYPE_T cfg = &state;
117 VARDECL(NE10_FFT_CPX_TYPE_T, buffer);
118 SAVE_STACK;
119 ALLOC(buffer, st->nfft, NE10_FFT_CPX_TYPE_T);
120
121 if (!st->arch_fft->is_supported) {
122 /* This nfft length (scaled fft) not supported in NE10 */
123 opus_fft_c(st, fin, fout);
124 }
125 else {
126 memcpy((void *)cfg, st->arch_fft->priv, sizeof(NE10_FFT_STATE_TYPE_T));
127 state.buffer = (NE10_FFT_CPX_TYPE_T *)&buffer[0];
128#if !defined(FIXED_POINT)
129 state.is_forward_scaled = 1;
130
131 NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
132 (NE10_FFT_CPX_TYPE_T *)fin,
133 cfg, 0);
134#else
135 NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
136 (NE10_FFT_CPX_TYPE_T *)fin,
137 cfg, 0, 1);
138#endif
139 }
140 RESTORE_STACK;
141}
142
143void opus_ifft_neon(const kiss_fft_state *st,
144 const kiss_fft_cpx *fin,
145 kiss_fft_cpx *fout)
146{
147 NE10_FFT_STATE_TYPE_T state;
148 NE10_FFT_CFG_TYPE_T cfg = &state;
149 VARDECL(NE10_FFT_CPX_TYPE_T, buffer);
150 SAVE_STACK;
151 ALLOC(buffer, st->nfft, NE10_FFT_CPX_TYPE_T);
152
153 if (!st->arch_fft->is_supported) {
154 /* This nfft length (scaled fft) not supported in NE10 */
155 opus_ifft_c(st, fin, fout);
156 }
157 else {
158 memcpy((void *)cfg, st->arch_fft->priv, sizeof(NE10_FFT_STATE_TYPE_T));
159 state.buffer = (NE10_FFT_CPX_TYPE_T *)&buffer[0];
160#if !defined(FIXED_POINT)
161 state.is_backward_scaled = 0;
162
163 NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
164 (NE10_FFT_CPX_TYPE_T *)fin,
165 cfg, 1);
166#else
167 NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
168 (NE10_FFT_CPX_TYPE_T *)fin,
169 cfg, 1, 0);
170#endif
171 }
172 RESTORE_STACK;
173}
diff --git a/lib/rbcodec/codecs/libopus/celt/arm/celt_mdct_ne10.c b/lib/rbcodec/codecs/libopus/celt/arm/celt_mdct_ne10.c
new file mode 100644
index 0000000000..3531d02d10
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/arm/celt_mdct_ne10.c
@@ -0,0 +1,258 @@
1/* Copyright (c) 2015 Xiph.Org Foundation
2 Written by Viswanath Puttagunta */
3/**
4 @file celt_mdct_ne10.c
5 @brief ARM Neon optimizations for mdct using NE10 library
6 */
7
8/*
9 Redistribution and use in source and binary forms, with or without
10 modification, are permitted provided that the following conditions
11 are met:
12
13 - Redistributions of source code must retain the above copyright
14 notice, this list of conditions and the following disclaimer.
15
16 - Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
24 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
25 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31*/
32
33#ifndef SKIP_CONFIG_H
34#ifdef HAVE_CONFIG_H
35#include "config.h"
36#endif
37#endif
38
39#include "kiss_fft.h"
40#include "_kiss_fft_guts.h"
41#include "mdct.h"
42#include "stack_alloc.h"
43
44void clt_mdct_forward_neon(const mdct_lookup *l,
45 kiss_fft_scalar *in,
46 kiss_fft_scalar * OPUS_RESTRICT out,
47 const opus_val16 *window,
48 int overlap, int shift, int stride, int arch)
49{
50 int i;
51 int N, N2, N4;
52 VARDECL(kiss_fft_scalar, f);
53 VARDECL(kiss_fft_cpx, f2);
54 const kiss_fft_state *st = l->kfft[shift];
55 const kiss_twiddle_scalar *trig;
56
57 SAVE_STACK;
58
59 N = l->n;
60 trig = l->trig;
61 for (i=0;i<shift;i++)
62 {
63 N >>= 1;
64 trig += N;
65 }
66 N2 = N>>1;
67 N4 = N>>2;
68
69 ALLOC(f, N2, kiss_fft_scalar);
70 ALLOC(f2, N4, kiss_fft_cpx);
71
72 /* Consider the input to be composed of four blocks: [a, b, c, d] */
73 /* Window, shuffle, fold */
74 {
75 /* Temp pointers to make it really clear to the compiler what we're doing */
76 const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1);
77 const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1);
78 kiss_fft_scalar * OPUS_RESTRICT yp = f;
79 const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1);
80 const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;
81 for(i=0;i<((overlap+3)>>2);i++)
82 {
83 /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
84 *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2);
85 *yp++ = MULT16_32_Q15(*wp1, *xp1) - MULT16_32_Q15(*wp2, xp2[-N2]);
86 xp1+=2;
87 xp2-=2;
88 wp1+=2;
89 wp2-=2;
90 }
91 wp1 = window;
92 wp2 = window+overlap-1;
93 for(;i<N4-((overlap+3)>>2);i++)
94 {
95 /* Real part arranged as a-bR, Imag part arranged as -c-dR */
96 *yp++ = *xp2;
97 *yp++ = *xp1;
98 xp1+=2;
99 xp2-=2;
100 }
101 for(;i<N4;i++)
102 {
103 /* Real part arranged as a-bR, Imag part arranged as -c-dR */
104 *yp++ = -MULT16_32_Q15(*wp1, xp1[-N2]) + MULT16_32_Q15(*wp2, *xp2);
105 *yp++ = MULT16_32_Q15(*wp2, *xp1) + MULT16_32_Q15(*wp1, xp2[N2]);
106 xp1+=2;
107 xp2-=2;
108 wp1+=2;
109 wp2-=2;
110 }
111 }
112 /* Pre-rotation */
113 {
114 kiss_fft_scalar * OPUS_RESTRICT yp = f;
115 const kiss_twiddle_scalar *t = &trig[0];
116 for(i=0;i<N4;i++)
117 {
118 kiss_fft_cpx yc;
119 kiss_twiddle_scalar t0, t1;
120 kiss_fft_scalar re, im, yr, yi;
121 t0 = t[i];
122 t1 = t[N4+i];
123 re = *yp++;
124 im = *yp++;
125 yr = S_MUL(re,t0) - S_MUL(im,t1);
126 yi = S_MUL(im,t0) + S_MUL(re,t1);
127 yc.r = yr;
128 yc.i = yi;
129 f2[i] = yc;
130 }
131 }
132
133 opus_fft(st, f2, (kiss_fft_cpx *)f, arch);
134
135 /* Post-rotate */
136 {
137 /* Temp pointers to make it really clear to the compiler what we're doing */
138 const kiss_fft_cpx * OPUS_RESTRICT fp = (kiss_fft_cpx *)f;
139 kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
140 kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
141 const kiss_twiddle_scalar *t = &trig[0];
142 /* Temp pointers to make it really clear to the compiler what we're doing */
143 for(i=0;i<N4;i++)
144 {
145 kiss_fft_scalar yr, yi;
146 yr = S_MUL(fp->i,t[N4+i]) - S_MUL(fp->r,t[i]);
147 yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]);
148 *yp1 = yr;
149 *yp2 = yi;
150 fp++;
151 yp1 += 2*stride;
152 yp2 -= 2*stride;
153 }
154 }
155 RESTORE_STACK;
156}
157
158void clt_mdct_backward_neon(const mdct_lookup *l,
159 kiss_fft_scalar *in,
160 kiss_fft_scalar * OPUS_RESTRICT out,
161 const opus_val16 * OPUS_RESTRICT window,
162 int overlap, int shift, int stride, int arch)
163{
164 int i;
165 int N, N2, N4;
166 VARDECL(kiss_fft_scalar, f);
167 const kiss_twiddle_scalar *trig;
168 const kiss_fft_state *st = l->kfft[shift];
169
170 N = l->n;
171 trig = l->trig;
172 for (i=0;i<shift;i++)
173 {
174 N >>= 1;
175 trig += N;
176 }
177 N2 = N>>1;
178 N4 = N>>2;
179
180 ALLOC(f, N2, kiss_fft_scalar);
181
182 /* Pre-rotate */
183 {
184 /* Temp pointers to make it really clear to the compiler what we're doing */
185 const kiss_fft_scalar * OPUS_RESTRICT xp1 = in;
186 const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1);
187 kiss_fft_scalar * OPUS_RESTRICT yp = f;
188 const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0];
189 for(i=0;i<N4;i++)
190 {
191 kiss_fft_scalar yr, yi;
192 yr = S_MUL(*xp2, t[i]) + S_MUL(*xp1, t[N4+i]);
193 yi = S_MUL(*xp1, t[i]) - S_MUL(*xp2, t[N4+i]);
194 yp[2*i] = yr;
195 yp[2*i+1] = yi;
196 xp1+=2*stride;
197 xp2-=2*stride;
198 }
199 }
200
201 opus_ifft(st, (kiss_fft_cpx *)f, (kiss_fft_cpx*)(out+(overlap>>1)), arch);
202
203 /* Post-rotate and de-shuffle from both ends of the buffer at once to make
204 it in-place. */
205 {
206 kiss_fft_scalar * yp0 = out+(overlap>>1);
207 kiss_fft_scalar * yp1 = out+(overlap>>1)+N2-2;
208 const kiss_twiddle_scalar *t = &trig[0];
209 /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the
210 middle pair will be computed twice. */
211 for(i=0;i<(N4+1)>>1;i++)
212 {
213 kiss_fft_scalar re, im, yr, yi;
214 kiss_twiddle_scalar t0, t1;
215 re = yp0[0];
216 im = yp0[1];
217 t0 = t[i];
218 t1 = t[N4+i];
219 /* We'd scale up by 2 here, but instead it's done when mixing the windows */
220 yr = S_MUL(re,t0) + S_MUL(im,t1);
221 yi = S_MUL(re,t1) - S_MUL(im,t0);
222 re = yp1[0];
223 im = yp1[1];
224 yp0[0] = yr;
225 yp1[1] = yi;
226
227 t0 = t[(N4-i-1)];
228 t1 = t[(N2-i-1)];
229 /* We'd scale up by 2 here, but instead it's done when mixing the windows */
230 yr = S_MUL(re,t0) + S_MUL(im,t1);
231 yi = S_MUL(re,t1) - S_MUL(im,t0);
232 yp1[0] = yr;
233 yp0[1] = yi;
234 yp0 += 2;
235 yp1 -= 2;
236 }
237 }
238
239 /* Mirror on both sides for TDAC */
240 {
241 kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1;
242 kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
243 const opus_val16 * OPUS_RESTRICT wp1 = window;
244 const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;
245
246 for(i = 0; i < overlap/2; i++)
247 {
248 kiss_fft_scalar x1, x2;
249 x1 = *xp1;
250 x2 = *yp1;
251 *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1);
252 *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1);
253 wp1++;
254 wp2--;
255 }
256 }
257 RESTORE_STACK;
258}
diff --git a/lib/rbcodec/codecs/libopus/celt/arm/celt_neon_intr.c b/lib/rbcodec/codecs/libopus/celt/arm/celt_neon_intr.c
new file mode 100644
index 0000000000..effda769d0
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/arm/celt_neon_intr.c
@@ -0,0 +1,211 @@
1/* Copyright (c) 2014-2015 Xiph.Org Foundation
2 Written by Viswanath Puttagunta */
3/**
4 @file celt_neon_intr.c
5 @brief ARM Neon Intrinsic optimizations for celt
6 */
7
8/*
9 Redistribution and use in source and binary forms, with or without
10 modification, are permitted provided that the following conditions
11 are met:
12
13 - Redistributions of source code must retain the above copyright
14 notice, this list of conditions and the following disclaimer.
15
16 - Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
24 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
25 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31*/
32
33#ifdef HAVE_CONFIG_H
34#include "config.h"
35#endif
36
37#include <arm_neon.h>
38#include "../pitch.h"
39
40#if defined(FIXED_POINT)
41void xcorr_kernel_neon_fixed(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len)
42{
43 int j;
44 int32x4_t a = vld1q_s32(sum);
45 /* Load y[0...3] */
46 /* This requires len>0 to always be valid (which we assert in the C code). */
47 int16x4_t y0 = vld1_s16(y);
48 y += 4;
49
50 for (j = 0; j + 8 <= len; j += 8)
51 {
52 /* Load x[0...7] */
53 int16x8_t xx = vld1q_s16(x);
54 int16x4_t x0 = vget_low_s16(xx);
55 int16x4_t x4 = vget_high_s16(xx);
56 /* Load y[4...11] */
57 int16x8_t yy = vld1q_s16(y);
58 int16x4_t y4 = vget_low_s16(yy);
59 int16x4_t y8 = vget_high_s16(yy);
60 int32x4_t a0 = vmlal_lane_s16(a, y0, x0, 0);
61 int32x4_t a1 = vmlal_lane_s16(a0, y4, x4, 0);
62
63 int16x4_t y1 = vext_s16(y0, y4, 1);
64 int16x4_t y5 = vext_s16(y4, y8, 1);
65 int32x4_t a2 = vmlal_lane_s16(a1, y1, x0, 1);
66 int32x4_t a3 = vmlal_lane_s16(a2, y5, x4, 1);
67
68 int16x4_t y2 = vext_s16(y0, y4, 2);
69 int16x4_t y6 = vext_s16(y4, y8, 2);
70 int32x4_t a4 = vmlal_lane_s16(a3, y2, x0, 2);
71 int32x4_t a5 = vmlal_lane_s16(a4, y6, x4, 2);
72
73 int16x4_t y3 = vext_s16(y0, y4, 3);
74 int16x4_t y7 = vext_s16(y4, y8, 3);
75 int32x4_t a6 = vmlal_lane_s16(a5, y3, x0, 3);
76 int32x4_t a7 = vmlal_lane_s16(a6, y7, x4, 3);
77
78 y0 = y8;
79 a = a7;
80 x += 8;
81 y += 8;
82 }
83
84 for (; j < len; j++)
85 {
86 int16x4_t x0 = vld1_dup_s16(x); /* load next x */
87 int32x4_t a0 = vmlal_s16(a, y0, x0);
88
89 int16x4_t y4 = vld1_dup_s16(y); /* load next y */
90 y0 = vext_s16(y0, y4, 1);
91 a = a0;
92 x++;
93 y++;
94 }
95
96 vst1q_s32(sum, a);
97}
98
99#else
100/*
101 * Function: xcorr_kernel_neon_float
102 * ---------------------------------
103 * Computes 4 correlation values and stores them in sum[4]
104 */
105static void xcorr_kernel_neon_float(const float32_t *x, const float32_t *y,
106 float32_t sum[4], int len) {
107 float32x4_t YY[3];
108 float32x4_t YEXT[3];
109 float32x4_t XX[2];
110 float32x2_t XX_2;
111 float32x4_t SUMM;
112 const float32_t *xi = x;
113 const float32_t *yi = y;
114
115 celt_assert(len>0);
116
117 YY[0] = vld1q_f32(yi);
118 SUMM = vdupq_n_f32(0);
119
120 /* Consume 8 elements in x vector and 12 elements in y
121 * vector. However, the 12'th element never really gets
122 * touched in this loop. So, if len == 8, then we only
123 * must access y[0] to y[10]. y[11] must not be accessed
124 * hence make sure len > 8 and not len >= 8
125 */
126 while (len > 8) {
127 yi += 4;
128 YY[1] = vld1q_f32(yi);
129 yi += 4;
130 YY[2] = vld1q_f32(yi);
131
132 XX[0] = vld1q_f32(xi);
133 xi += 4;
134 XX[1] = vld1q_f32(xi);
135 xi += 4;
136
137 SUMM = vmlaq_lane_f32(SUMM, YY[0], vget_low_f32(XX[0]), 0);
138 YEXT[0] = vextq_f32(YY[0], YY[1], 1);
139 SUMM = vmlaq_lane_f32(SUMM, YEXT[0], vget_low_f32(XX[0]), 1);
140 YEXT[1] = vextq_f32(YY[0], YY[1], 2);
141 SUMM = vmlaq_lane_f32(SUMM, YEXT[1], vget_high_f32(XX[0]), 0);
142 YEXT[2] = vextq_f32(YY[0], YY[1], 3);
143 SUMM = vmlaq_lane_f32(SUMM, YEXT[2], vget_high_f32(XX[0]), 1);
144
145 SUMM = vmlaq_lane_f32(SUMM, YY[1], vget_low_f32(XX[1]), 0);
146 YEXT[0] = vextq_f32(YY[1], YY[2], 1);
147 SUMM = vmlaq_lane_f32(SUMM, YEXT[0], vget_low_f32(XX[1]), 1);
148 YEXT[1] = vextq_f32(YY[1], YY[2], 2);
149 SUMM = vmlaq_lane_f32(SUMM, YEXT[1], vget_high_f32(XX[1]), 0);
150 YEXT[2] = vextq_f32(YY[1], YY[2], 3);
151 SUMM = vmlaq_lane_f32(SUMM, YEXT[2], vget_high_f32(XX[1]), 1);
152
153 YY[0] = YY[2];
154 len -= 8;
155 }
156
157 /* Consume 4 elements in x vector and 8 elements in y
158 * vector. However, the 8'th element in y never really gets
159 * touched in this loop. So, if len == 4, then we only
160 * must access y[0] to y[6]. y[7] must not be accessed
161 * hence make sure len>4 and not len>=4
162 */
163 if (len > 4) {
164 yi += 4;
165 YY[1] = vld1q_f32(yi);
166
167 XX[0] = vld1q_f32(xi);
168 xi += 4;
169
170 SUMM = vmlaq_lane_f32(SUMM, YY[0], vget_low_f32(XX[0]), 0);
171 YEXT[0] = vextq_f32(YY[0], YY[1], 1);
172 SUMM = vmlaq_lane_f32(SUMM, YEXT[0], vget_low_f32(XX[0]), 1);
173 YEXT[1] = vextq_f32(YY[0], YY[1], 2);
174 SUMM = vmlaq_lane_f32(SUMM, YEXT[1], vget_high_f32(XX[0]), 0);
175 YEXT[2] = vextq_f32(YY[0], YY[1], 3);
176 SUMM = vmlaq_lane_f32(SUMM, YEXT[2], vget_high_f32(XX[0]), 1);
177
178 YY[0] = YY[1];
179 len -= 4;
180 }
181
182 while (--len > 0) {
183 XX_2 = vld1_dup_f32(xi++);
184 SUMM = vmlaq_lane_f32(SUMM, YY[0], XX_2, 0);
185 YY[0]= vld1q_f32(++yi);
186 }
187
188 XX_2 = vld1_dup_f32(xi);
189 SUMM = vmlaq_lane_f32(SUMM, YY[0], XX_2, 0);
190
191 vst1q_f32(sum, SUMM);
192}
193
194void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y,
195 opus_val32 *xcorr, int len, int max_pitch, int arch) {
196 int i;
197 (void)arch;
198 celt_assert(max_pitch > 0);
199 celt_sig_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0);
200
201 for (i = 0; i < (max_pitch-3); i += 4) {
202 xcorr_kernel_neon_float((const float32_t *)_x, (const float32_t *)_y+i,
203 (float32_t *)xcorr+i, len);
204 }
205
206 /* In case max_pitch isn't a multiple of 4, do non-unrolled version. */
207 for (; i < max_pitch; i++) {
208 xcorr[i] = celt_inner_prod_neon(_x, _y+i, len);
209 }
210}
211#endif
diff --git a/lib/rbcodec/codecs/libopus/celt/arm/celt_pitch_xcorr_arm.s b/lib/rbcodec/codecs/libopus/celt/arm/celt_pitch_xcorr_arm.s
new file mode 100644
index 0000000000..6e873afc37
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/arm/celt_pitch_xcorr_arm.s
@@ -0,0 +1,551 @@
1; Copyright (c) 2007-2008 CSIRO
2; Copyright (c) 2007-2009 Xiph.Org Foundation
3; Copyright (c) 2013 Parrot
4; Written by Aurélien Zanelli
5;
6; Redistribution and use in source and binary forms, with or without
7; modification, are permitted provided that the following conditions
8; are met:
9;
10; - Redistributions of source code must retain the above copyright
11; notice, this list of conditions and the following disclaimer.
12;
13; - Redistributions in binary form must reproduce the above copyright
14; notice, this list of conditions and the following disclaimer in the
15; documentation and/or other materials provided with the distribution.
16;
17; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
21; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29 AREA |.text|, CODE, READONLY
30
31 GET celt/arm/armopts.s
32
33IF OPUS_ARM_MAY_HAVE_EDSP
34 EXPORT celt_pitch_xcorr_edsp
35ENDIF
36
37IF OPUS_ARM_MAY_HAVE_NEON
38 EXPORT celt_pitch_xcorr_neon
39ENDIF
40
41IF OPUS_ARM_MAY_HAVE_NEON
42
43; Compute sum[k]=sum(x[j]*y[j+k],j=0...len-1), k=0...3
44xcorr_kernel_neon PROC
45xcorr_kernel_neon_start
46 ; input:
47 ; r3 = int len
48 ; r4 = opus_val16 *x
49 ; r5 = opus_val16 *y
50 ; q0 = opus_val32 sum[4]
51 ; output:
52 ; q0 = opus_val32 sum[4]
53 ; preserved: r0-r3, r6-r11, d2, q4-q7, q9-q15
54 ; internal usage:
55 ; r12 = int j
56 ; d3 = y_3|y_2|y_1|y_0
57 ; q2 = y_B|y_A|y_9|y_8|y_7|y_6|y_5|y_4
58 ; q3 = x_7|x_6|x_5|x_4|x_3|x_2|x_1|x_0
59 ; q8 = scratch
60 ;
61 ; Load y[0...3]
62 ; This requires len>0 to always be valid (which we assert in the C code).
63 VLD1.16 {d5}, [r5]!
64 SUBS r12, r3, #8
65 BLE xcorr_kernel_neon_process4
66; Process 8 samples at a time.
67; This loop loads one y value more than we actually need. Therefore we have to
68; stop as soon as there are 8 or fewer samples left (instead of 7), to avoid
69; reading past the end of the array.
70xcorr_kernel_neon_process8
71 ; This loop has 19 total instructions (10 cycles to issue, minimum), with
72 ; - 2 cycles of ARM insrtuctions,
73 ; - 10 cycles of load/store/byte permute instructions, and
74 ; - 9 cycles of data processing instructions.
75 ; On a Cortex A8, we dual-issue the maximum amount (9 cycles) between the
76 ; latter two categories, meaning the whole loop should run in 10 cycles per
77 ; iteration, barring cache misses.
78 ;
79 ; Load x[0...7]
80 VLD1.16 {d6, d7}, [r4]!
81 ; Unlike VMOV, VAND is a data processsing instruction (and doesn't get
82 ; assembled to VMOV, like VORR would), so it dual-issues with the prior VLD1.
83 VAND d3, d5, d5
84 SUBS r12, r12, #8
85 ; Load y[4...11]
86 VLD1.16 {d4, d5}, [r5]!
87 VMLAL.S16 q0, d3, d6[0]
88 VEXT.16 d16, d3, d4, #1
89 VMLAL.S16 q0, d4, d7[0]
90 VEXT.16 d17, d4, d5, #1
91 VMLAL.S16 q0, d16, d6[1]
92 VEXT.16 d16, d3, d4, #2
93 VMLAL.S16 q0, d17, d7[1]
94 VEXT.16 d17, d4, d5, #2
95 VMLAL.S16 q0, d16, d6[2]
96 VEXT.16 d16, d3, d4, #3
97 VMLAL.S16 q0, d17, d7[2]
98 VEXT.16 d17, d4, d5, #3
99 VMLAL.S16 q0, d16, d6[3]
100 VMLAL.S16 q0, d17, d7[3]
101 BGT xcorr_kernel_neon_process8
102; Process 4 samples here if we have > 4 left (still reading one extra y value).
103xcorr_kernel_neon_process4
104 ADDS r12, r12, #4
105 BLE xcorr_kernel_neon_process2
106 ; Load x[0...3]
107 VLD1.16 d6, [r4]!
108 ; Use VAND since it's a data processing instruction again.
109 VAND d4, d5, d5
110 SUB r12, r12, #4
111 ; Load y[4...7]
112 VLD1.16 d5, [r5]!
113 VMLAL.S16 q0, d4, d6[0]
114 VEXT.16 d16, d4, d5, #1
115 VMLAL.S16 q0, d16, d6[1]
116 VEXT.16 d16, d4, d5, #2
117 VMLAL.S16 q0, d16, d6[2]
118 VEXT.16 d16, d4, d5, #3
119 VMLAL.S16 q0, d16, d6[3]
120; Process 2 samples here if we have > 2 left (still reading one extra y value).
121xcorr_kernel_neon_process2
122 ADDS r12, r12, #2
123 BLE xcorr_kernel_neon_process1
124 ; Load x[0...1]
125 VLD2.16 {d6[],d7[]}, [r4]!
126 ; Use VAND since it's a data processing instruction again.
127 VAND d4, d5, d5
128 SUB r12, r12, #2
129 ; Load y[4...5]
130 VLD1.32 {d5[]}, [r5]!
131 VMLAL.S16 q0, d4, d6
132 VEXT.16 d16, d4, d5, #1
133 ; Replace bottom copy of {y5,y4} in d5 with {y3,y2} from d4, using VSRI
134 ; instead of VEXT, since it's a data-processing instruction.
135 VSRI.64 d5, d4, #32
136 VMLAL.S16 q0, d16, d7
137; Process 1 sample using the extra y value we loaded above.
138xcorr_kernel_neon_process1
139 ; Load next *x
140 VLD1.16 {d6[]}, [r4]!
141 ADDS r12, r12, #1
142 ; y[0...3] are left in d5 from prior iteration(s) (if any)
143 VMLAL.S16 q0, d5, d6
144 MOVLE pc, lr
145; Now process 1 last sample, not reading ahead.
146 ; Load last *y
147 VLD1.16 {d4[]}, [r5]!
148 VSRI.64 d4, d5, #16
149 ; Load last *x
150 VLD1.16 {d6[]}, [r4]!
151 VMLAL.S16 q0, d4, d6
152 MOV pc, lr
153 ENDP
154
155; opus_val32 celt_pitch_xcorr_neon(opus_val16 *_x, opus_val16 *_y,
156; opus_val32 *xcorr, int len, int max_pitch, int arch)
157celt_pitch_xcorr_neon PROC
158 ; input:
159 ; r0 = opus_val16 *_x
160 ; r1 = opus_val16 *_y
161 ; r2 = opus_val32 *xcorr
162 ; r3 = int len
163 ; output:
164 ; r0 = int maxcorr
165 ; internal usage:
166 ; r4 = opus_val16 *x (for xcorr_kernel_neon())
167 ; r5 = opus_val16 *y (for xcorr_kernel_neon())
168 ; r6 = int max_pitch
169 ; r12 = int j
170 ; q15 = int maxcorr[4] (q15 is not used by xcorr_kernel_neon())
171 ; ignored:
172 ; int arch
173 STMFD sp!, {r4-r6, lr}
174 LDR r6, [sp, #16]
175 VMOV.S32 q15, #1
176 ; if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done
177 SUBS r6, r6, #4
178 BLT celt_pitch_xcorr_neon_process4_done
179celt_pitch_xcorr_neon_process4
180 ; xcorr_kernel_neon parameters:
181 ; r3 = len, r4 = _x, r5 = _y, q0 = {0, 0, 0, 0}
182 MOV r4, r0
183 MOV r5, r1
184 VEOR q0, q0, q0
185 ; xcorr_kernel_neon only modifies r4, r5, r12, and q0...q3.
186 ; So we don't save/restore any other registers.
187 BL xcorr_kernel_neon_start
188 SUBS r6, r6, #4
189 VST1.32 {q0}, [r2]!
190 ; _y += 4
191 ADD r1, r1, #8
192 VMAX.S32 q15, q15, q0
193 ; if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done
194 BGE celt_pitch_xcorr_neon_process4
195; We have less than 4 sums left to compute.
196celt_pitch_xcorr_neon_process4_done
197 ADDS r6, r6, #4
198 ; Reduce maxcorr to a single value
199 VMAX.S32 d30, d30, d31
200 VPMAX.S32 d30, d30, d30
201 ; if (max_pitch <= 0) goto celt_pitch_xcorr_neon_done
202 BLE celt_pitch_xcorr_neon_done
203; Now compute each remaining sum one at a time.
204celt_pitch_xcorr_neon_process_remaining
205 MOV r4, r0
206 MOV r5, r1
207 VMOV.I32 q0, #0
208 SUBS r12, r3, #8
209 BLT celt_pitch_xcorr_neon_process_remaining4
210; Sum terms 8 at a time.
211celt_pitch_xcorr_neon_process_remaining_loop8
212 ; Load x[0...7]
213 VLD1.16 {q1}, [r4]!
214 ; Load y[0...7]
215 VLD1.16 {q2}, [r5]!
216 SUBS r12, r12, #8
217 VMLAL.S16 q0, d4, d2
218 VMLAL.S16 q0, d5, d3
219 BGE celt_pitch_xcorr_neon_process_remaining_loop8
220; Sum terms 4 at a time.
221celt_pitch_xcorr_neon_process_remaining4
222 ADDS r12, r12, #4
223 BLT celt_pitch_xcorr_neon_process_remaining4_done
224 ; Load x[0...3]
225 VLD1.16 {d2}, [r4]!
226 ; Load y[0...3]
227 VLD1.16 {d3}, [r5]!
228 SUB r12, r12, #4
229 VMLAL.S16 q0, d3, d2
230celt_pitch_xcorr_neon_process_remaining4_done
231 ; Reduce the sum to a single value.
232 VADD.S32 d0, d0, d1
233 VPADDL.S32 d0, d0
234 ADDS r12, r12, #4
235 BLE celt_pitch_xcorr_neon_process_remaining_loop_done
236; Sum terms 1 at a time.
237celt_pitch_xcorr_neon_process_remaining_loop1
238 VLD1.16 {d2[]}, [r4]!
239 VLD1.16 {d3[]}, [r5]!
240 SUBS r12, r12, #1
241 VMLAL.S16 q0, d2, d3
242 BGT celt_pitch_xcorr_neon_process_remaining_loop1
243celt_pitch_xcorr_neon_process_remaining_loop_done
244 VST1.32 {d0[0]}, [r2]!
245 VMAX.S32 d30, d30, d0
246 SUBS r6, r6, #1
247 ; _y++
248 ADD r1, r1, #2
249 ; if (--max_pitch > 0) goto celt_pitch_xcorr_neon_process_remaining
250 BGT celt_pitch_xcorr_neon_process_remaining
251celt_pitch_xcorr_neon_done
252 VMOV.32 r0, d30[0]
253 LDMFD sp!, {r4-r6, pc}
254 ENDP
255
256ENDIF
257
258IF OPUS_ARM_MAY_HAVE_EDSP
259
260; This will get used on ARMv7 devices without NEON, so it has been optimized
261; to take advantage of dual-issuing where possible.
262xcorr_kernel_edsp PROC
263xcorr_kernel_edsp_start
264 ; input:
265 ; r3 = int len
266 ; r4 = opus_val16 *_x (must be 32-bit aligned)
267 ; r5 = opus_val16 *_y (must be 32-bit aligned)
268 ; r6...r9 = opus_val32 sum[4]
269 ; output:
270 ; r6...r9 = opus_val32 sum[4]
271 ; preserved: r0-r5
272 ; internal usage
273 ; r2 = int j
274 ; r12,r14 = opus_val16 x[4]
275 ; r10,r11 = opus_val16 y[4]
276 STMFD sp!, {r2,r4,r5,lr}
277 LDR r10, [r5], #4 ; Load y[0...1]
278 SUBS r2, r3, #4 ; j = len-4
279 LDR r11, [r5], #4 ; Load y[2...3]
280 BLE xcorr_kernel_edsp_process4_done
281 LDR r12, [r4], #4 ; Load x[0...1]
282 ; Stall
283xcorr_kernel_edsp_process4
284 ; The multiplies must issue from pipeline 0, and can't dual-issue with each
285 ; other. Every other instruction here dual-issues with a multiply, and is
286 ; thus "free". There should be no stalls in the body of the loop.
287 SMLABB r6, r12, r10, r6 ; sum[0] = MAC16_16(sum[0],x_0,y_0)
288 LDR r14, [r4], #4 ; Load x[2...3]
289 SMLABT r7, r12, r10, r7 ; sum[1] = MAC16_16(sum[1],x_0,y_1)
290 SUBS r2, r2, #4 ; j-=4
291 SMLABB r8, r12, r11, r8 ; sum[2] = MAC16_16(sum[2],x_0,y_2)
292 SMLABT r9, r12, r11, r9 ; sum[3] = MAC16_16(sum[3],x_0,y_3)
293 SMLATT r6, r12, r10, r6 ; sum[0] = MAC16_16(sum[0],x_1,y_1)
294 LDR r10, [r5], #4 ; Load y[4...5]
295 SMLATB r7, r12, r11, r7 ; sum[1] = MAC16_16(sum[1],x_1,y_2)
296 SMLATT r8, r12, r11, r8 ; sum[2] = MAC16_16(sum[2],x_1,y_3)
297 SMLATB r9, r12, r10, r9 ; sum[3] = MAC16_16(sum[3],x_1,y_4)
298 LDRGT r12, [r4], #4 ; Load x[0...1]
299 SMLABB r6, r14, r11, r6 ; sum[0] = MAC16_16(sum[0],x_2,y_2)
300 SMLABT r7, r14, r11, r7 ; sum[1] = MAC16_16(sum[1],x_2,y_3)
301 SMLABB r8, r14, r10, r8 ; sum[2] = MAC16_16(sum[2],x_2,y_4)
302 SMLABT r9, r14, r10, r9 ; sum[3] = MAC16_16(sum[3],x_2,y_5)
303 SMLATT r6, r14, r11, r6 ; sum[0] = MAC16_16(sum[0],x_3,y_3)
304 LDR r11, [r5], #4 ; Load y[6...7]
305 SMLATB r7, r14, r10, r7 ; sum[1] = MAC16_16(sum[1],x_3,y_4)
306 SMLATT r8, r14, r10, r8 ; sum[2] = MAC16_16(sum[2],x_3,y_5)
307 SMLATB r9, r14, r11, r9 ; sum[3] = MAC16_16(sum[3],x_3,y_6)
308 BGT xcorr_kernel_edsp_process4
309xcorr_kernel_edsp_process4_done
310 ADDS r2, r2, #4
311 BLE xcorr_kernel_edsp_done
312 LDRH r12, [r4], #2 ; r12 = *x++
313 SUBS r2, r2, #1 ; j--
314 ; Stall
315 SMLABB r6, r12, r10, r6 ; sum[0] = MAC16_16(sum[0],x,y_0)
316 LDRHGT r14, [r4], #2 ; r14 = *x++
317 SMLABT r7, r12, r10, r7 ; sum[1] = MAC16_16(sum[1],x,y_1)
318 SMLABB r8, r12, r11, r8 ; sum[2] = MAC16_16(sum[2],x,y_2)
319 SMLABT r9, r12, r11, r9 ; sum[3] = MAC16_16(sum[3],x,y_3)
320 BLE xcorr_kernel_edsp_done
321 SMLABT r6, r14, r10, r6 ; sum[0] = MAC16_16(sum[0],x,y_1)
322 SUBS r2, r2, #1 ; j--
323 SMLABB r7, r14, r11, r7 ; sum[1] = MAC16_16(sum[1],x,y_2)
324 LDRH r10, [r5], #2 ; r10 = y_4 = *y++
325 SMLABT r8, r14, r11, r8 ; sum[2] = MAC16_16(sum[2],x,y_3)
326 LDRHGT r12, [r4], #2 ; r12 = *x++
327 SMLABB r9, r14, r10, r9 ; sum[3] = MAC16_16(sum[3],x,y_4)
328 BLE xcorr_kernel_edsp_done
329 SMLABB r6, r12, r11, r6 ; sum[0] = MAC16_16(sum[0],tmp,y_2)
330 CMP r2, #1 ; j--
331 SMLABT r7, r12, r11, r7 ; sum[1] = MAC16_16(sum[1],tmp,y_3)
332 LDRH r2, [r5], #2 ; r2 = y_5 = *y++
333 SMLABB r8, r12, r10, r8 ; sum[2] = MAC16_16(sum[2],tmp,y_4)
334 LDRHGT r14, [r4] ; r14 = *x
335 SMLABB r9, r12, r2, r9 ; sum[3] = MAC16_16(sum[3],tmp,y_5)
336 BLE xcorr_kernel_edsp_done
337 SMLABT r6, r14, r11, r6 ; sum[0] = MAC16_16(sum[0],tmp,y_3)
338 LDRH r11, [r5] ; r11 = y_6 = *y
339 SMLABB r7, r14, r10, r7 ; sum[1] = MAC16_16(sum[1],tmp,y_4)
340 SMLABB r8, r14, r2, r8 ; sum[2] = MAC16_16(sum[2],tmp,y_5)
341 SMLABB r9, r14, r11, r9 ; sum[3] = MAC16_16(sum[3],tmp,y_6)
342xcorr_kernel_edsp_done
343 LDMFD sp!, {r2,r4,r5,pc}
344 ENDP
345
346celt_pitch_xcorr_edsp PROC
347 ; input:
348 ; r0 = opus_val16 *_x (must be 32-bit aligned)
349 ; r1 = opus_val16 *_y (only needs to be 16-bit aligned)
350 ; r2 = opus_val32 *xcorr
351 ; r3 = int len
352 ; output:
353 ; r0 = maxcorr
354 ; internal usage
355 ; r4 = opus_val16 *x
356 ; r5 = opus_val16 *y
357 ; r6 = opus_val32 sum0
358 ; r7 = opus_val32 sum1
359 ; r8 = opus_val32 sum2
360 ; r9 = opus_val32 sum3
361 ; r1 = int max_pitch
362 ; r12 = int j
363 ; ignored:
364 ; int arch
365 STMFD sp!, {r4-r11, lr}
366 MOV r5, r1
367 LDR r1, [sp, #36]
368 MOV r4, r0
369 TST r5, #3
370 ; maxcorr = 1
371 MOV r0, #1
372 BEQ celt_pitch_xcorr_edsp_process1u_done
373; Compute one sum at the start to make y 32-bit aligned.
374 SUBS r12, r3, #4
375 ; r14 = sum = 0
376 MOV r14, #0
377 LDRH r8, [r5], #2
378 BLE celt_pitch_xcorr_edsp_process1u_loop4_done
379 LDR r6, [r4], #4
380 MOV r8, r8, LSL #16
381celt_pitch_xcorr_edsp_process1u_loop4
382 LDR r9, [r5], #4
383 SMLABT r14, r6, r8, r14 ; sum = MAC16_16(sum, x_0, y_0)
384 LDR r7, [r4], #4
385 SMLATB r14, r6, r9, r14 ; sum = MAC16_16(sum, x_1, y_1)
386 LDR r8, [r5], #4
387 SMLABT r14, r7, r9, r14 ; sum = MAC16_16(sum, x_2, y_2)
388 SUBS r12, r12, #4 ; j-=4
389 SMLATB r14, r7, r8, r14 ; sum = MAC16_16(sum, x_3, y_3)
390 LDRGT r6, [r4], #4
391 BGT celt_pitch_xcorr_edsp_process1u_loop4
392 MOV r8, r8, LSR #16
393celt_pitch_xcorr_edsp_process1u_loop4_done
394 ADDS r12, r12, #4
395celt_pitch_xcorr_edsp_process1u_loop1
396 LDRHGE r6, [r4], #2
397 ; Stall
398 SMLABBGE r14, r6, r8, r14 ; sum = MAC16_16(sum, *x, *y)
399 SUBSGE r12, r12, #1
400 LDRHGT r8, [r5], #2
401 BGT celt_pitch_xcorr_edsp_process1u_loop1
402 ; Restore _x
403 SUB r4, r4, r3, LSL #1
404 ; Restore and advance _y
405 SUB r5, r5, r3, LSL #1
406 ; maxcorr = max(maxcorr, sum)
407 CMP r0, r14
408 ADD r5, r5, #2
409 MOVLT r0, r14
410 SUBS r1, r1, #1
411 ; xcorr[i] = sum
412 STR r14, [r2], #4
413 BLE celt_pitch_xcorr_edsp_done
414celt_pitch_xcorr_edsp_process1u_done
415 ; if (max_pitch < 4) goto celt_pitch_xcorr_edsp_process2
416 SUBS r1, r1, #4
417 BLT celt_pitch_xcorr_edsp_process2
418celt_pitch_xcorr_edsp_process4
419 ; xcorr_kernel_edsp parameters:
420 ; r3 = len, r4 = _x, r5 = _y, r6...r9 = sum[4] = {0, 0, 0, 0}
421 MOV r6, #0
422 MOV r7, #0
423 MOV r8, #0
424 MOV r9, #0
425 BL xcorr_kernel_edsp_start ; xcorr_kernel_edsp(_x, _y+i, xcorr+i, len)
426 ; maxcorr = max(maxcorr, sum0, sum1, sum2, sum3)
427 CMP r0, r6
428 ; _y+=4
429 ADD r5, r5, #8
430 MOVLT r0, r6
431 CMP r0, r7
432 MOVLT r0, r7
433 CMP r0, r8
434 MOVLT r0, r8
435 CMP r0, r9
436 MOVLT r0, r9
437 STMIA r2!, {r6-r9}
438 SUBS r1, r1, #4
439 BGE celt_pitch_xcorr_edsp_process4
440celt_pitch_xcorr_edsp_process2
441 ADDS r1, r1, #2
442 BLT celt_pitch_xcorr_edsp_process1a
443 SUBS r12, r3, #4
444 ; {r10, r11} = {sum0, sum1} = {0, 0}
445 MOV r10, #0
446 MOV r11, #0
447 LDR r8, [r5], #4
448 BLE celt_pitch_xcorr_edsp_process2_loop_done
449 LDR r6, [r4], #4
450 LDR r9, [r5], #4
451celt_pitch_xcorr_edsp_process2_loop4
452 SMLABB r10, r6, r8, r10 ; sum0 = MAC16_16(sum0, x_0, y_0)
453 LDR r7, [r4], #4
454 SMLABT r11, r6, r8, r11 ; sum1 = MAC16_16(sum1, x_0, y_1)
455 SUBS r12, r12, #4 ; j-=4
456 SMLATT r10, r6, r8, r10 ; sum0 = MAC16_16(sum0, x_1, y_1)
457 LDR r8, [r5], #4
458 SMLATB r11, r6, r9, r11 ; sum1 = MAC16_16(sum1, x_1, y_2)
459 LDRGT r6, [r4], #4
460 SMLABB r10, r7, r9, r10 ; sum0 = MAC16_16(sum0, x_2, y_2)
461 SMLABT r11, r7, r9, r11 ; sum1 = MAC16_16(sum1, x_2, y_3)
462 SMLATT r10, r7, r9, r10 ; sum0 = MAC16_16(sum0, x_3, y_3)
463 LDRGT r9, [r5], #4
464 SMLATB r11, r7, r8, r11 ; sum1 = MAC16_16(sum1, x_3, y_4)
465 BGT celt_pitch_xcorr_edsp_process2_loop4
466celt_pitch_xcorr_edsp_process2_loop_done
467 ADDS r12, r12, #2
468 BLE celt_pitch_xcorr_edsp_process2_1
469 LDR r6, [r4], #4
470 ; Stall
471 SMLABB r10, r6, r8, r10 ; sum0 = MAC16_16(sum0, x_0, y_0)
472 LDR r9, [r5], #4
473 SMLABT r11, r6, r8, r11 ; sum1 = MAC16_16(sum1, x_0, y_1)
474 SUB r12, r12, #2
475 SMLATT r10, r6, r8, r10 ; sum0 = MAC16_16(sum0, x_1, y_1)
476 MOV r8, r9
477 SMLATB r11, r6, r9, r11 ; sum1 = MAC16_16(sum1, x_1, y_2)
478celt_pitch_xcorr_edsp_process2_1
479 LDRH r6, [r4], #2
480 ADDS r12, r12, #1
481 ; Stall
482 SMLABB r10, r6, r8, r10 ; sum0 = MAC16_16(sum0, x_0, y_0)
483 LDRHGT r7, [r4], #2
484 SMLABT r11, r6, r8, r11 ; sum1 = MAC16_16(sum1, x_0, y_1)
485 BLE celt_pitch_xcorr_edsp_process2_done
486 LDRH r9, [r5], #2
487 SMLABT r10, r7, r8, r10 ; sum0 = MAC16_16(sum0, x_0, y_1)
488 SMLABB r11, r7, r9, r11 ; sum1 = MAC16_16(sum1, x_0, y_2)
489celt_pitch_xcorr_edsp_process2_done
490 ; Restore _x
491 SUB r4, r4, r3, LSL #1
492 ; Restore and advance _y
493 SUB r5, r5, r3, LSL #1
494 ; maxcorr = max(maxcorr, sum0)
495 CMP r0, r10
496 ADD r5, r5, #2
497 MOVLT r0, r10
498 SUB r1, r1, #2
499 ; maxcorr = max(maxcorr, sum1)
500 CMP r0, r11
501 ; xcorr[i] = sum
502 STR r10, [r2], #4
503 MOVLT r0, r11
504 STR r11, [r2], #4
505celt_pitch_xcorr_edsp_process1a
506 ADDS r1, r1, #1
507 BLT celt_pitch_xcorr_edsp_done
508 SUBS r12, r3, #4
509 ; r14 = sum = 0
510 MOV r14, #0
511 BLT celt_pitch_xcorr_edsp_process1a_loop_done
512 LDR r6, [r4], #4
513 LDR r8, [r5], #4
514 LDR r7, [r4], #4
515 LDR r9, [r5], #4
516celt_pitch_xcorr_edsp_process1a_loop4
517 SMLABB r14, r6, r8, r14 ; sum = MAC16_16(sum, x_0, y_0)
518 SUBS r12, r12, #4 ; j-=4
519 SMLATT r14, r6, r8, r14 ; sum = MAC16_16(sum, x_1, y_1)
520 LDRGE r6, [r4], #4
521 SMLABB r14, r7, r9, r14 ; sum = MAC16_16(sum, x_2, y_2)
522 LDRGE r8, [r5], #4
523 SMLATT r14, r7, r9, r14 ; sum = MAC16_16(sum, x_3, y_3)
524 LDRGE r7, [r4], #4
525 LDRGE r9, [r5], #4
526 BGE celt_pitch_xcorr_edsp_process1a_loop4
527celt_pitch_xcorr_edsp_process1a_loop_done
528 ADDS r12, r12, #2
529 LDRGE r6, [r4], #4
530 LDRGE r8, [r5], #4
531 ; Stall
532 SMLABBGE r14, r6, r8, r14 ; sum = MAC16_16(sum, x_0, y_0)
533 SUBGE r12, r12, #2
534 SMLATTGE r14, r6, r8, r14 ; sum = MAC16_16(sum, x_1, y_1)
535 ADDS r12, r12, #1
536 LDRHGE r6, [r4], #2
537 LDRHGE r8, [r5], #2
538 ; Stall
539 SMLABBGE r14, r6, r8, r14 ; sum = MAC16_16(sum, *x, *y)
540 ; maxcorr = max(maxcorr, sum)
541 CMP r0, r14
542 ; xcorr[i] = sum
543 STR r14, [r2], #4
544 MOVLT r0, r14
545celt_pitch_xcorr_edsp_done
546 LDMFD sp!, {r4-r11, pc}
547 ENDP
548
549ENDIF
550
551END
diff --git a/lib/rbcodec/codecs/libopus/celt/arm/fft_arm.h b/lib/rbcodec/codecs/libopus/celt/arm/fft_arm.h
new file mode 100644
index 0000000000..0b78175f3a
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/arm/fft_arm.h
@@ -0,0 +1,71 @@
1/* Copyright (c) 2015 Xiph.Org Foundation
2 Written by Viswanath Puttagunta */
3/**
4 @file fft_arm.h
5 @brief ARM Neon Intrinsic optimizations for fft using NE10 library
6 */
7
8/*
9 Redistribution and use in source and binary forms, with or without
10 modification, are permitted provided that the following conditions
11 are met:
12
13 - Redistributions of source code must retain the above copyright
14 notice, this list of conditions and the following disclaimer.
15
16 - Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
24 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
25 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31*/
32
33
34#if !defined(FFT_ARM_H)
35#define FFT_ARM_H
36
37#include "kiss_fft.h"
38
39#if defined(HAVE_ARM_NE10)
40
41int opus_fft_alloc_arm_neon(kiss_fft_state *st);
42void opus_fft_free_arm_neon(kiss_fft_state *st);
43
44void opus_fft_neon(const kiss_fft_state *st,
45 const kiss_fft_cpx *fin,
46 kiss_fft_cpx *fout);
47
48void opus_ifft_neon(const kiss_fft_state *st,
49 const kiss_fft_cpx *fin,
50 kiss_fft_cpx *fout);
51
52#if !defined(OPUS_HAVE_RTCD)
53#define OVERRIDE_OPUS_FFT (1)
54
55#define opus_fft_alloc_arch(_st, arch) \
56 ((void)(arch), opus_fft_alloc_arm_neon(_st))
57
58#define opus_fft_free_arch(_st, arch) \
59 ((void)(arch), opus_fft_free_arm_neon(_st))
60
61#define opus_fft(_st, _fin, _fout, arch) \
62 ((void)(arch), opus_fft_neon(_st, _fin, _fout))
63
64#define opus_ifft(_st, _fin, _fout, arch) \
65 ((void)(arch), opus_ifft_neon(_st, _fin, _fout))
66
67#endif /* OPUS_HAVE_RTCD */
68
69#endif /* HAVE_ARM_NE10 */
70
71#endif
diff --git a/lib/rbcodec/codecs/libopus/celt/arm/fixed_arm64.h b/lib/rbcodec/codecs/libopus/celt/arm/fixed_arm64.h
new file mode 100644
index 0000000000..c6fbd3db2c
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/arm/fixed_arm64.h
@@ -0,0 +1,35 @@
1/* Copyright (C) 2015 Vidyo */
2/*
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
5 are met:
6
7 - Redistributions of source code must retain the above copyright
8 notice, this list of conditions and the following disclaimer.
9
10 - Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
18 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
22 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25*/
26
27#ifndef FIXED_ARM64_H
28#define FIXED_ARM64_H
29
30#include <arm_neon.h>
31
32#undef SIG2WORD16
33#define SIG2WORD16(x) (vqmovns_s32(PSHR32((x), SIG_SHIFT)))
34
35#endif
diff --git a/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv4.h b/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv4.h
index efb3b1896a..d84888a772 100644
--- a/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv4.h
+++ b/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv4.h
@@ -37,7 +37,7 @@ static OPUS_INLINE opus_val32 MULT16_32_Q16_armv4(opus_val16 a, opus_val32 b)
37 "#MULT16_32_Q16\n\t" 37 "#MULT16_32_Q16\n\t"
38 "smull %0, %1, %2, %3\n\t" 38 "smull %0, %1, %2, %3\n\t"
39 : "=&r"(rd_lo), "=&r"(rd_hi) 39 : "=&r"(rd_lo), "=&r"(rd_hi)
40 : "%r"(b),"r"(a<<16) 40 : "%r"(b),"r"(SHL32(a,16))
41 ); 41 );
42 return rd_hi; 42 return rd_hi;
43} 43}
@@ -54,10 +54,10 @@ static OPUS_INLINE opus_val32 MULT16_32_Q15_armv4(opus_val16 a, opus_val32 b)
54 "#MULT16_32_Q15\n\t" 54 "#MULT16_32_Q15\n\t"
55 "smull %0, %1, %2, %3\n\t" 55 "smull %0, %1, %2, %3\n\t"
56 : "=&r"(rd_lo), "=&r"(rd_hi) 56 : "=&r"(rd_lo), "=&r"(rd_hi)
57 : "%r"(b), "r"(a<<16) 57 : "%r"(b), "r"(SHL32(a,16))
58 ); 58 );
59 /*We intentionally don't OR in the high bit of rd_lo for speed.*/ 59 /*We intentionally don't OR in the high bit of rd_lo for speed.*/
60 return rd_hi<<1; 60 return SHL32(rd_hi,1);
61} 61}
62#define MULT16_32_Q15(a, b) (MULT16_32_Q15_armv4(a, b)) 62#define MULT16_32_Q15(a, b) (MULT16_32_Q15_armv4(a, b))
63 63
diff --git a/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv5e.h b/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv5e.h
index 36a6321101..6bf73cbace 100644
--- a/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv5e.h
+++ b/lib/rbcodec/codecs/libopus/celt/arm/fixed_armv5e.h
@@ -59,7 +59,7 @@ static OPUS_INLINE opus_val32 MULT16_32_Q15_armv5e(opus_val16 a, opus_val32 b)
59 : "=r"(res) 59 : "=r"(res)
60 : "r"(b), "r"(a) 60 : "r"(b), "r"(a)
61 ); 61 );
62 return res<<1; 62 return SHL32(res,1);
63} 63}
64#define MULT16_32_Q15(a, b) (MULT16_32_Q15_armv5e(a, b)) 64#define MULT16_32_Q15(a, b) (MULT16_32_Q15_armv5e(a, b))
65 65
@@ -76,7 +76,7 @@ static OPUS_INLINE opus_val32 MAC16_32_Q15_armv5e(opus_val32 c, opus_val16 a,
76 "#MAC16_32_Q15\n\t" 76 "#MAC16_32_Q15\n\t"
77 "smlawb %0, %1, %2, %3;\n" 77 "smlawb %0, %1, %2, %3;\n"
78 : "=r"(res) 78 : "=r"(res)
79 : "r"(b<<1), "r"(a), "r"(c) 79 : "r"(SHL32(b,1)), "r"(a), "r"(c)
80 ); 80 );
81 return res; 81 return res;
82} 82}
diff --git a/lib/rbcodec/codecs/libopus/celt/arm/mdct_arm.h b/lib/rbcodec/codecs/libopus/celt/arm/mdct_arm.h
new file mode 100644
index 0000000000..14200bac4b
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/arm/mdct_arm.h
@@ -0,0 +1,59 @@
1/* Copyright (c) 2015 Xiph.Org Foundation
2 Written by Viswanath Puttagunta */
3/**
4 @file arm_mdct.h
5 @brief ARM Neon Intrinsic optimizations for mdct using NE10 library
6 */
7
8/*
9 Redistribution and use in source and binary forms, with or without
10 modification, are permitted provided that the following conditions
11 are met:
12
13 - Redistributions of source code must retain the above copyright
14 notice, this list of conditions and the following disclaimer.
15
16 - Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
24 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
25 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31*/
32
33#if !defined(MDCT_ARM_H)
34#define MDCT_ARM_H
35
36#include "mdct.h"
37
38#if defined(HAVE_ARM_NE10)
39/** Compute a forward MDCT and scale by 4/N, trashes the input array */
40void clt_mdct_forward_neon(const mdct_lookup *l, kiss_fft_scalar *in,
41 kiss_fft_scalar * OPUS_RESTRICT out,
42 const opus_val16 *window, int overlap,
43 int shift, int stride, int arch);
44
45void clt_mdct_backward_neon(const mdct_lookup *l, kiss_fft_scalar *in,
46 kiss_fft_scalar * OPUS_RESTRICT out,
47 const opus_val16 *window, int overlap,
48 int shift, int stride, int arch);
49
50#if !defined(OPUS_HAVE_RTCD)
51#define OVERRIDE_OPUS_MDCT (1)
52#define clt_mdct_forward(_l, _in, _out, _window, _int, _shift, _stride, _arch) \
53 clt_mdct_forward_neon(_l, _in, _out, _window, _int, _shift, _stride, _arch)
54#define clt_mdct_backward(_l, _in, _out, _window, _int, _shift, _stride, _arch) \
55 clt_mdct_backward_neon(_l, _in, _out, _window, _int, _shift, _stride, _arch)
56#endif /* OPUS_HAVE_RTCD */
57#endif /* HAVE_ARM_NE10 */
58
59#endif
diff --git a/lib/rbcodec/codecs/libopus/celt/arm/pitch_arm.h b/lib/rbcodec/codecs/libopus/celt/arm/pitch_arm.h
new file mode 100644
index 0000000000..bed8b04eac
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/arm/pitch_arm.h
@@ -0,0 +1,160 @@
1/* Copyright (c) 2010 Xiph.Org Foundation
2 * Copyright (c) 2013 Parrot */
3/*
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7
8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10
11 - Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*/
27
28#if !defined(PITCH_ARM_H)
29# define PITCH_ARM_H
30
31# include "armcpu.h"
32
33# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
34opus_val32 celt_inner_prod_neon(const opus_val16 *x, const opus_val16 *y, int N);
35void dual_inner_prod_neon(const opus_val16 *x, const opus_val16 *y01,
36 const opus_val16 *y02, int N, opus_val32 *xy1, opus_val32 *xy2);
37
38# if !defined(OPUS_HAVE_RTCD) && defined(OPUS_ARM_PRESUME_NEON)
39# define OVERRIDE_CELT_INNER_PROD (1)
40# define OVERRIDE_DUAL_INNER_PROD (1)
41# define celt_inner_prod(x, y, N, arch) ((void)(arch), PRESUME_NEON(celt_inner_prod)(x, y, N))
42# define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) ((void)(arch), PRESUME_NEON(dual_inner_prod)(x, y01, y02, N, xy1, xy2))
43# endif
44# endif
45
46# if !defined(OVERRIDE_CELT_INNER_PROD)
47# if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))
48extern opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x, const opus_val16 *y, int N);
49# define OVERRIDE_CELT_INNER_PROD (1)
50# define celt_inner_prod(x, y, N, arch) ((*CELT_INNER_PROD_IMPL[(arch)&OPUS_ARCHMASK])(x, y, N))
51# elif defined(OPUS_ARM_PRESUME_NEON_INTR)
52# define OVERRIDE_CELT_INNER_PROD (1)
53# define celt_inner_prod(x, y, N, arch) ((void)(arch), celt_inner_prod_neon(x, y, N))
54# endif
55# endif
56
57# if !defined(OVERRIDE_DUAL_INNER_PROD)
58# if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))
59extern void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x,
60 const opus_val16 *y01, const opus_val16 *y02, int N, opus_val32 *xy1, opus_val32 *xy2);
61# define OVERRIDE_DUAL_INNER_PROD (1)
62# define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) ((*DUAL_INNER_PROD_IMPL[(arch)&OPUS_ARCHMASK])(x, y01, y02, N, xy1, xy2))
63# elif defined(OPUS_ARM_PRESUME_NEON_INTR)
64# define OVERRIDE_DUAL_INNER_PROD (1)
65# define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) ((void)(arch), dual_inner_prod_neon(x, y01, y02, N, xy1, xy2))
66# endif
67# endif
68
69# if defined(FIXED_POINT)
70
71# if defined(OPUS_ARM_MAY_HAVE_NEON)
72opus_val32 celt_pitch_xcorr_neon(const opus_val16 *_x, const opus_val16 *_y,
73 opus_val32 *xcorr, int len, int max_pitch, int arch);
74# endif
75
76# if defined(OPUS_ARM_MAY_HAVE_MEDIA)
77# define celt_pitch_xcorr_media MAY_HAVE_EDSP(celt_pitch_xcorr)
78# endif
79
80# if defined(OPUS_ARM_MAY_HAVE_EDSP)
81opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const opus_val16 *_y,
82 opus_val32 *xcorr, int len, int max_pitch, int arch);
83# endif
84
85# if defined(OPUS_HAVE_RTCD) && \
86 ((defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_PRESUME_NEON)) || \
87 (defined(OPUS_ARM_MAY_HAVE_MEDIA) && !defined(OPUS_ARM_PRESUME_MEDIA)) || \
88 (defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP)))
89extern opus_val32
90(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
91 const opus_val16 *, opus_val32 *, int, int, int);
92# define OVERRIDE_PITCH_XCORR (1)
93# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
94 ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
95 xcorr, len, max_pitch, arch))
96
97# elif defined(OPUS_ARM_PRESUME_EDSP) || \
98 defined(OPUS_ARM_PRESUME_MEDIA) || \
99 defined(OPUS_ARM_PRESUME_NEON)
100# define OVERRIDE_PITCH_XCORR (1)
101# define celt_pitch_xcorr (PRESUME_NEON(celt_pitch_xcorr))
102
103# endif
104
105# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
106void xcorr_kernel_neon_fixed(
107 const opus_val16 *x,
108 const opus_val16 *y,
109 opus_val32 sum[4],
110 int len);
111# endif
112
113# if defined(OPUS_HAVE_RTCD) && \
114 (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))
115
116extern void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
117 const opus_val16 *x,
118 const opus_val16 *y,
119 opus_val32 sum[4],
120 int len);
121
122# define OVERRIDE_XCORR_KERNEL (1)
123# define xcorr_kernel(x, y, sum, len, arch) \
124 ((*XCORR_KERNEL_IMPL[(arch) & OPUS_ARCHMASK])(x, y, sum, len))
125
126# elif defined(OPUS_ARM_PRESUME_NEON_INTR)
127# define OVERRIDE_XCORR_KERNEL (1)
128# define xcorr_kernel(x, y, sum, len, arch) \
129 ((void)arch, xcorr_kernel_neon_fixed(x, y, sum, len))
130
131# endif
132
133#else /* Start !FIXED_POINT */
134/* Float case */
135#if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
136void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y,
137 opus_val32 *xcorr, int len, int max_pitch, int arch);
138#endif
139
140# if defined(OPUS_HAVE_RTCD) && \
141 (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))
142extern void
143(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
144 const opus_val16 *, opus_val32 *, int, int, int);
145
146# define OVERRIDE_PITCH_XCORR (1)
147# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
148 ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
149 xcorr, len, max_pitch, arch))
150
151# elif defined(OPUS_ARM_PRESUME_NEON_INTR)
152
153# define OVERRIDE_PITCH_XCORR (1)
154# define celt_pitch_xcorr celt_pitch_xcorr_float_neon
155
156# endif
157
158#endif /* end !FIXED_POINT */
159
160#endif
diff --git a/lib/rbcodec/codecs/libopus/celt/arm/pitch_neon_intr.c b/lib/rbcodec/codecs/libopus/celt/arm/pitch_neon_intr.c
new file mode 100644
index 0000000000..1ac38c433a
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/arm/pitch_neon_intr.c
@@ -0,0 +1,290 @@
1/***********************************************************************
2Copyright (c) 2017 Google Inc.
3Redistribution and use in source and binary forms, with or without
4modification, are permitted provided that the following conditions
5are met:
6- Redistributions of source code must retain the above copyright notice,
7this list of conditions and the following disclaimer.
8- Redistributions in binary form must reproduce the above copyright
9notice, this list of conditions and the following disclaimer in the
10documentation and/or other materials provided with the distribution.
11- Neither the name of Internet Society, IETF or IETF Trust, nor the
12names of specific contributors, may be used to endorse or promote
13products derived from this software without specific prior written
14permission.
15THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25POSSIBILITY OF SUCH DAMAGE.
26***********************************************************************/
27
28#ifdef HAVE_CONFIG_H
29#include "config.h"
30#endif
31
32#include <arm_neon.h>
33#include "pitch.h"
34
35#ifdef FIXED_POINT
36
37opus_val32 celt_inner_prod_neon(const opus_val16 *x, const opus_val16 *y, int N)
38{
39 int i;
40 opus_val32 xy;
41 int16x8_t x_s16x8, y_s16x8;
42 int32x4_t xy_s32x4 = vdupq_n_s32(0);
43 int64x2_t xy_s64x2;
44 int64x1_t xy_s64x1;
45
46 for (i = 0; i < N - 7; i += 8) {
47 x_s16x8 = vld1q_s16(&x[i]);
48 y_s16x8 = vld1q_s16(&y[i]);
49 xy_s32x4 = vmlal_s16(xy_s32x4, vget_low_s16 (x_s16x8), vget_low_s16 (y_s16x8));
50 xy_s32x4 = vmlal_s16(xy_s32x4, vget_high_s16(x_s16x8), vget_high_s16(y_s16x8));
51 }
52
53 if (N - i >= 4) {
54 const int16x4_t x_s16x4 = vld1_s16(&x[i]);
55 const int16x4_t y_s16x4 = vld1_s16(&y[i]);
56 xy_s32x4 = vmlal_s16(xy_s32x4, x_s16x4, y_s16x4);
57 i += 4;
58 }
59
60 xy_s64x2 = vpaddlq_s32(xy_s32x4);
61 xy_s64x1 = vadd_s64(vget_low_s64(xy_s64x2), vget_high_s64(xy_s64x2));
62 xy = vget_lane_s32(vreinterpret_s32_s64(xy_s64x1), 0);
63
64 for (; i < N; i++) {
65 xy = MAC16_16(xy, x[i], y[i]);
66 }
67
68#ifdef OPUS_CHECK_ASM
69 celt_assert(celt_inner_prod_c(x, y, N) == xy);
70#endif
71
72 return xy;
73}
74
75void dual_inner_prod_neon(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
76 int N, opus_val32 *xy1, opus_val32 *xy2)
77{
78 int i;
79 opus_val32 xy01, xy02;
80 int16x8_t x_s16x8, y01_s16x8, y02_s16x8;
81 int32x4_t xy01_s32x4 = vdupq_n_s32(0);
82 int32x4_t xy02_s32x4 = vdupq_n_s32(0);
83 int64x2_t xy01_s64x2, xy02_s64x2;
84 int64x1_t xy01_s64x1, xy02_s64x1;
85
86 for (i = 0; i < N - 7; i += 8) {
87 x_s16x8 = vld1q_s16(&x[i]);
88 y01_s16x8 = vld1q_s16(&y01[i]);
89 y02_s16x8 = vld1q_s16(&y02[i]);
90 xy01_s32x4 = vmlal_s16(xy01_s32x4, vget_low_s16 (x_s16x8), vget_low_s16 (y01_s16x8));
91 xy02_s32x4 = vmlal_s16(xy02_s32x4, vget_low_s16 (x_s16x8), vget_low_s16 (y02_s16x8));
92 xy01_s32x4 = vmlal_s16(xy01_s32x4, vget_high_s16(x_s16x8), vget_high_s16(y01_s16x8));
93 xy02_s32x4 = vmlal_s16(xy02_s32x4, vget_high_s16(x_s16x8), vget_high_s16(y02_s16x8));
94 }
95
96 if (N - i >= 4) {
97 const int16x4_t x_s16x4 = vld1_s16(&x[i]);
98 const int16x4_t y01_s16x4 = vld1_s16(&y01[i]);
99 const int16x4_t y02_s16x4 = vld1_s16(&y02[i]);
100 xy01_s32x4 = vmlal_s16(xy01_s32x4, x_s16x4, y01_s16x4);
101 xy02_s32x4 = vmlal_s16(xy02_s32x4, x_s16x4, y02_s16x4);
102 i += 4;
103 }
104
105 xy01_s64x2 = vpaddlq_s32(xy01_s32x4);
106 xy02_s64x2 = vpaddlq_s32(xy02_s32x4);
107 xy01_s64x1 = vadd_s64(vget_low_s64(xy01_s64x2), vget_high_s64(xy01_s64x2));
108 xy02_s64x1 = vadd_s64(vget_low_s64(xy02_s64x2), vget_high_s64(xy02_s64x2));
109 xy01 = vget_lane_s32(vreinterpret_s32_s64(xy01_s64x1), 0);
110 xy02 = vget_lane_s32(vreinterpret_s32_s64(xy02_s64x1), 0);
111
112 for (; i < N; i++) {
113 xy01 = MAC16_16(xy01, x[i], y01[i]);
114 xy02 = MAC16_16(xy02, x[i], y02[i]);
115 }
116 *xy1 = xy01;
117 *xy2 = xy02;
118
119#ifdef OPUS_CHECK_ASM
120 {
121 opus_val32 xy1_c, xy2_c;
122 dual_inner_prod_c(x, y01, y02, N, &xy1_c, &xy2_c);
123 celt_assert(xy1_c == *xy1);
124 celt_assert(xy2_c == *xy2);
125 }
126#endif
127}
128
129#else /* !FIXED_POINT */
130
131/* ========================================================================== */
132
133#ifdef OPUS_CHECK_ASM
134
135/* This part of code simulates floating-point NEON operations. */
136
137/* celt_inner_prod_neon_float_c_simulation() simulates the floating-point */
138/* operations of celt_inner_prod_neon(), and both functions should have bit */
139/* exact output. */
140static opus_val32 celt_inner_prod_neon_float_c_simulation(const opus_val16 *x, const opus_val16 *y, int N)
141{
142 int i;
143 opus_val32 xy, xy0 = 0, xy1 = 0, xy2 = 0, xy3 = 0;
144 for (i = 0; i < N - 3; i += 4) {
145 xy0 = MAC16_16(xy0, x[i + 0], y[i + 0]);
146 xy1 = MAC16_16(xy1, x[i + 1], y[i + 1]);
147 xy2 = MAC16_16(xy2, x[i + 2], y[i + 2]);
148 xy3 = MAC16_16(xy3, x[i + 3], y[i + 3]);
149 }
150 xy0 += xy2;
151 xy1 += xy3;
152 xy = xy0 + xy1;
153 for (; i < N; i++) {
154 xy = MAC16_16(xy, x[i], y[i]);
155 }
156 return xy;
157}
158
159/* dual_inner_prod_neon_float_c_simulation() simulates the floating-point */
160/* operations of dual_inner_prod_neon(), and both functions should have bit */
161/* exact output. */
162static void dual_inner_prod_neon_float_c_simulation(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
163 int N, opus_val32 *xy1, opus_val32 *xy2)
164{
165 int i;
166 opus_val32 xy01, xy02, xy01_0 = 0, xy01_1 = 0, xy01_2 = 0, xy01_3 = 0, xy02_0 = 0, xy02_1 = 0, xy02_2 = 0, xy02_3 = 0;
167 for (i = 0; i < N - 3; i += 4) {
168 xy01_0 = MAC16_16(xy01_0, x[i + 0], y01[i + 0]);
169 xy01_1 = MAC16_16(xy01_1, x[i + 1], y01[i + 1]);
170 xy01_2 = MAC16_16(xy01_2, x[i + 2], y01[i + 2]);
171 xy01_3 = MAC16_16(xy01_3, x[i + 3], y01[i + 3]);
172 xy02_0 = MAC16_16(xy02_0, x[i + 0], y02[i + 0]);
173 xy02_1 = MAC16_16(xy02_1, x[i + 1], y02[i + 1]);
174 xy02_2 = MAC16_16(xy02_2, x[i + 2], y02[i + 2]);
175 xy02_3 = MAC16_16(xy02_3, x[i + 3], y02[i + 3]);
176 }
177 xy01_0 += xy01_2;
178 xy02_0 += xy02_2;
179 xy01_1 += xy01_3;
180 xy02_1 += xy02_3;
181 xy01 = xy01_0 + xy01_1;
182 xy02 = xy02_0 + xy02_1;
183 for (; i < N; i++) {
184 xy01 = MAC16_16(xy01, x[i], y01[i]);
185 xy02 = MAC16_16(xy02, x[i], y02[i]);
186 }
187 *xy1 = xy01;
188 *xy2 = xy02;
189}
190
191#endif /* OPUS_CHECK_ASM */
192
193/* ========================================================================== */
194
195opus_val32 celt_inner_prod_neon(const opus_val16 *x, const opus_val16 *y, int N)
196{
197 int i;
198 opus_val32 xy;
199 float32x4_t xy_f32x4 = vdupq_n_f32(0);
200 float32x2_t xy_f32x2;
201
202 for (i = 0; i < N - 7; i += 8) {
203 float32x4_t x_f32x4, y_f32x4;
204 x_f32x4 = vld1q_f32(&x[i]);
205 y_f32x4 = vld1q_f32(&y[i]);
206 xy_f32x4 = vmlaq_f32(xy_f32x4, x_f32x4, y_f32x4);
207 x_f32x4 = vld1q_f32(&x[i + 4]);
208 y_f32x4 = vld1q_f32(&y[i + 4]);
209 xy_f32x4 = vmlaq_f32(xy_f32x4, x_f32x4, y_f32x4);
210 }
211
212 if (N - i >= 4) {
213 const float32x4_t x_f32x4 = vld1q_f32(&x[i]);
214 const float32x4_t y_f32x4 = vld1q_f32(&y[i]);
215 xy_f32x4 = vmlaq_f32(xy_f32x4, x_f32x4, y_f32x4);
216 i += 4;
217 }
218
219 xy_f32x2 = vadd_f32(vget_low_f32(xy_f32x4), vget_high_f32(xy_f32x4));
220 xy_f32x2 = vpadd_f32(xy_f32x2, xy_f32x2);
221 xy = vget_lane_f32(xy_f32x2, 0);
222
223 for (; i < N; i++) {
224 xy = MAC16_16(xy, x[i], y[i]);
225 }
226
227#ifdef OPUS_CHECK_ASM
228 celt_assert(ABS32(celt_inner_prod_neon_float_c_simulation(x, y, N) - xy) <= VERY_SMALL);
229#endif
230
231 return xy;
232}
233
234void dual_inner_prod_neon(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
235 int N, opus_val32 *xy1, opus_val32 *xy2)
236{
237 int i;
238 opus_val32 xy01, xy02;
239 float32x4_t xy01_f32x4 = vdupq_n_f32(0);
240 float32x4_t xy02_f32x4 = vdupq_n_f32(0);
241 float32x2_t xy01_f32x2, xy02_f32x2;
242
243 for (i = 0; i < N - 7; i += 8) {
244 float32x4_t x_f32x4, y01_f32x4, y02_f32x4;
245 x_f32x4 = vld1q_f32(&x[i]);
246 y01_f32x4 = vld1q_f32(&y01[i]);
247 y02_f32x4 = vld1q_f32(&y02[i]);
248 xy01_f32x4 = vmlaq_f32(xy01_f32x4, x_f32x4, y01_f32x4);
249 xy02_f32x4 = vmlaq_f32(xy02_f32x4, x_f32x4, y02_f32x4);
250 x_f32x4 = vld1q_f32(&x[i + 4]);
251 y01_f32x4 = vld1q_f32(&y01[i + 4]);
252 y02_f32x4 = vld1q_f32(&y02[i + 4]);
253 xy01_f32x4 = vmlaq_f32(xy01_f32x4, x_f32x4, y01_f32x4);
254 xy02_f32x4 = vmlaq_f32(xy02_f32x4, x_f32x4, y02_f32x4);
255 }
256
257 if (N - i >= 4) {
258 const float32x4_t x_f32x4 = vld1q_f32(&x[i]);
259 const float32x4_t y01_f32x4 = vld1q_f32(&y01[i]);
260 const float32x4_t y02_f32x4 = vld1q_f32(&y02[i]);
261 xy01_f32x4 = vmlaq_f32(xy01_f32x4, x_f32x4, y01_f32x4);
262 xy02_f32x4 = vmlaq_f32(xy02_f32x4, x_f32x4, y02_f32x4);
263 i += 4;
264 }
265
266 xy01_f32x2 = vadd_f32(vget_low_f32(xy01_f32x4), vget_high_f32(xy01_f32x4));
267 xy02_f32x2 = vadd_f32(vget_low_f32(xy02_f32x4), vget_high_f32(xy02_f32x4));
268 xy01_f32x2 = vpadd_f32(xy01_f32x2, xy01_f32x2);
269 xy02_f32x2 = vpadd_f32(xy02_f32x2, xy02_f32x2);
270 xy01 = vget_lane_f32(xy01_f32x2, 0);
271 xy02 = vget_lane_f32(xy02_f32x2, 0);
272
273 for (; i < N; i++) {
274 xy01 = MAC16_16(xy01, x[i], y01[i]);
275 xy02 = MAC16_16(xy02, x[i], y02[i]);
276 }
277 *xy1 = xy01;
278 *xy2 = xy02;
279
280#ifdef OPUS_CHECK_ASM
281 {
282 opus_val32 xy1_c, xy2_c;
283 dual_inner_prod_neon_float_c_simulation(x, y01, y02, N, &xy1_c, &xy2_c);
284 celt_assert(ABS32(xy1_c - *xy1) <= VERY_SMALL);
285 celt_assert(ABS32(xy2_c - *xy2) <= VERY_SMALL);
286 }
287#endif
288}
289
290#endif /* FIXED_POINT */
diff --git a/lib/rbcodec/codecs/libopus/celt/bands.c b/lib/rbcodec/codecs/libopus/celt/bands.c
index caa70163b4..2702963c37 100644
--- a/lib/rbcodec/codecs/libopus/celt/bands.c
+++ b/lib/rbcodec/codecs/libopus/celt/bands.c
@@ -65,19 +65,19 @@ opus_uint32 celt_lcg_rand(opus_uint32 seed)
65 65
66/* This is a cos() approximation designed to be bit-exact on any platform. Bit exactness 66/* This is a cos() approximation designed to be bit-exact on any platform. Bit exactness
67 with this approximation is important because it has an impact on the bit allocation */ 67 with this approximation is important because it has an impact on the bit allocation */
68static opus_int16 bitexact_cos(opus_int16 x) 68opus_int16 bitexact_cos(opus_int16 x)
69{ 69{
70 opus_int32 tmp; 70 opus_int32 tmp;
71 opus_int16 x2; 71 opus_int16 x2;
72 tmp = (4096+((opus_int32)(x)*(x)))>>13; 72 tmp = (4096+((opus_int32)(x)*(x)))>>13;
73 celt_assert(tmp<=32767); 73 celt_sig_assert(tmp<=32767);
74 x2 = tmp; 74 x2 = tmp;
75 x2 = (32767-x2) + FRAC_MUL16(x2, (-7651 + FRAC_MUL16(x2, (8277 + FRAC_MUL16(-626, x2))))); 75 x2 = (32767-x2) + FRAC_MUL16(x2, (-7651 + FRAC_MUL16(x2, (8277 + FRAC_MUL16(-626, x2)))));
76 celt_assert(x2<=32766); 76 celt_sig_assert(x2<=32766);
77 return 1+x2; 77 return 1+x2;
78} 78}
79 79
80static int bitexact_log2tan(int isin,int icos) 80int bitexact_log2tan(int isin,int icos)
81{ 81{
82 int lc; 82 int lc;
83 int ls; 83 int ls;
@@ -90,13 +90,13 @@ static int bitexact_log2tan(int isin,int icos)
90 -FRAC_MUL16(icos, FRAC_MUL16(icos, -2597) + 7932); 90 -FRAC_MUL16(icos, FRAC_MUL16(icos, -2597) + 7932);
91} 91}
92 92
93#if 0
94#ifdef FIXED_POINT 93#ifdef FIXED_POINT
95/* Compute the amplitude (sqrt energy) in each of the bands */ 94/* Compute the amplitude (sqrt energy) in each of the bands */
96void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM) 95void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM, int arch)
97{ 96{
98 int i, c, N; 97 int i, c, N;
99 const opus_int16 *eBands = m->eBands; 98 const opus_int16 *eBands = m->eBands;
99 (void)arch;
100 N = m->shortMdctSize<<LM; 100 N = m->shortMdctSize<<LM;
101 c=0; do { 101 c=0; do {
102 for (i=0;i<end;i++) 102 for (i=0;i<end;i++)
@@ -156,7 +156,7 @@ void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, cel
156 156
157#else /* FIXED_POINT */ 157#else /* FIXED_POINT */
158/* Compute the amplitude (sqrt energy) in each of the bands */ 158/* Compute the amplitude (sqrt energy) in each of the bands */
159void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM) 159void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM, int arch)
160{ 160{
161 int i, c, N; 161 int i, c, N;
162 const opus_int16 *eBands = m->eBands; 162 const opus_int16 *eBands = m->eBands;
@@ -165,7 +165,7 @@ void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *band
165 for (i=0;i<end;i++) 165 for (i=0;i<end;i++)
166 { 166 {
167 opus_val32 sum; 167 opus_val32 sum;
168 sum = 1e-27f + celt_inner_prod(&X[c*N+(eBands[i]<<LM)], &X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM); 168 sum = 1e-27f + celt_inner_prod(&X[c*N+(eBands[i]<<LM)], &X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM, arch);
169 bandE[i+c*m->nbEBands] = celt_sqrt(sum); 169 bandE[i+c*m->nbEBands] = celt_sqrt(sum);
170 /*printf ("%f ", bandE[i+c*m->nbEBands]);*/ 170 /*printf ("%f ", bandE[i+c*m->nbEBands]);*/
171 } 171 }
@@ -191,7 +191,6 @@ void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, cel
191} 191}
192 192
193#endif /* FIXED_POINT */ 193#endif /* FIXED_POINT */
194#endif
195 194
196/* De-normalise the energy to produce the synthesis from the unit-energy bands */ 195/* De-normalise the energy to produce the synthesis from the unit-energy bands */
197void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X, 196void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
@@ -226,9 +225,9 @@ void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
226#endif 225#endif
227 j=M*eBands[i]; 226 j=M*eBands[i];
228 band_end = M*eBands[i+1]; 227 band_end = M*eBands[i+1];
229 lg = ADD16(bandLogE[i], SHL16((opus_val16)eMeans[i],6)); 228 lg = SATURATE16(ADD32(bandLogE[i], SHL32((opus_val32)eMeans[i],6)));
230#ifndef FIXED_POINT 229#ifndef FIXED_POINT
231 g = celt_exp2(lg); 230 g = celt_exp2(MIN32(32.f, lg));
232#else 231#else
233 /* Handle the integer part of the log energy */ 232 /* Handle the integer part of the log energy */
234 shift = 16-(lg>>DB_SHIFT); 233 shift = 16-(lg>>DB_SHIFT);
@@ -243,12 +242,12 @@ void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
243 /* Handle extreme gains with negative shift. */ 242 /* Handle extreme gains with negative shift. */
244 if (shift<0) 243 if (shift<0)
245 { 244 {
246 /* For shift < -2 we'd be likely to overflow, so we're capping 245 /* For shift <= -2 and g > 16384 we'd be likely to overflow, so we're
247 the gain here. This shouldn't happen unless the bitstream is 246 capping the gain here, which is equivalent to a cap of 18 on lg.
248 already corrupted. */ 247 This shouldn't trigger unless the bitstream is already corrupted. */
249 if (shift < -2) 248 if (shift <= -2)
250 { 249 {
251 g = 32767; 250 g = 16384;
252 shift = -2; 251 shift = -2;
253 } 252 }
254 do { 253 do {
@@ -268,7 +267,7 @@ void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
268/* This prevents energy collapse for transients with multiple short MDCTs */ 267/* This prevents energy collapse for transients with multiple short MDCTs */
269void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int size, 268void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int size,
270 int start, int end, const opus_val16 *logE, const opus_val16 *prev1logE, 269 int start, int end, const opus_val16 *logE, const opus_val16 *prev1logE,
271 const opus_val16 *prev2logE, const int *pulses, opus_uint32 seed) 270 const opus_val16 *prev2logE, const int *pulses, opus_uint32 seed, int arch)
272{ 271{
273 int c, i, j, k; 272 int c, i, j, k;
274 for (i=start;i<end;i++) 273 for (i=start;i<end;i++)
@@ -283,7 +282,7 @@ void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_mas
283 282
284 N0 = m->eBands[i+1]-m->eBands[i]; 283 N0 = m->eBands[i+1]-m->eBands[i];
285 /* depth in 1/8 bits */ 284 /* depth in 1/8 bits */
286 celt_assert(pulses[i]>=0); 285 celt_sig_assert(pulses[i]>=0);
287 depth = celt_udiv(1+pulses[i], (m->eBands[i+1]-m->eBands[i]))>>LM; 286 depth = celt_udiv(1+pulses[i], (m->eBands[i+1]-m->eBands[i]))>>LM;
288 287
289#ifdef FIXED_POINT 288#ifdef FIXED_POINT
@@ -357,11 +356,35 @@ void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_mas
357 } 356 }
358 /* We just added some energy, so we need to renormalise */ 357 /* We just added some energy, so we need to renormalise */
359 if (renormalize) 358 if (renormalize)
360 renormalise_vector(X, N0<<LM, Q15ONE); 359 renormalise_vector(X, N0<<LM, Q15ONE, arch);
361 } while (++c<C); 360 } while (++c<C);
362 } 361 }
363} 362}
364 363
364/* Compute the weights to use for optimizing normalized distortion across
365 channels. We use the amplitude to weight square distortion, which means
366 that we use the square root of the value we would have been using if we
367 wanted to minimize the MSE in the non-normalized domain. This roughly
368 corresponds to some quick-and-dirty perceptual experiments I ran to
369 measure inter-aural masking (there doesn't seem to be any published data
370 on the topic). */
371static void compute_channel_weights(celt_ener Ex, celt_ener Ey, opus_val16 w[2])
372{
373 celt_ener minE;
374#ifdef FIXED_POINT
375 int shift;
376#endif
377 minE = MIN32(Ex, Ey);
378 /* Adjustment to make the weights a bit more conservative. */
379 Ex = ADD32(Ex, minE/3);
380 Ey = ADD32(Ey, minE/3);
381#ifdef FIXED_POINT
382 shift = celt_ilog2(EPSILON+MAX32(Ex, Ey))-14;
383#endif
384 w[0] = VSHR32(Ex, shift);
385 w[1] = VSHR32(Ey, shift);
386}
387
365static void intensity_stereo(const CELTMode *m, celt_norm * OPUS_RESTRICT X, const celt_norm * OPUS_RESTRICT Y, const celt_ener *bandE, int bandID, int N) 388static void intensity_stereo(const CELTMode *m, celt_norm * OPUS_RESTRICT X, const celt_norm * OPUS_RESTRICT Y, const celt_ener *bandE, int bandID, int N)
366{ 389{
367 int i = bandID; 390 int i = bandID;
@@ -400,7 +423,7 @@ static void stereo_split(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT
400 } 423 }
401} 424}
402 425
403static void stereo_merge(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, opus_val16 mid, int N) 426static void stereo_merge(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, opus_val16 mid, int N, int arch)
404{ 427{
405 int j; 428 int j;
406 opus_val32 xp=0, side=0; 429 opus_val32 xp=0, side=0;
@@ -412,11 +435,11 @@ static void stereo_merge(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT
412 opus_val32 t, lgain, rgain; 435 opus_val32 t, lgain, rgain;
413 436
414 /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */ 437 /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */
415 dual_inner_prod(Y, X, Y, N, &xp, &side); 438 dual_inner_prod(Y, X, Y, N, &xp, &side, arch);
416 /* Compensating for the mid normalization */ 439 /* Compensating for the mid normalization */
417 xp = MULT16_32_Q15(mid, xp); 440 xp = MULT16_32_Q15(mid, xp);
418 /* mid and side are in Q15, not Q14 like X and Y */ 441 /* mid and side are in Q15, not Q14 like X and Y */
419 mid2 = SHR32(mid, 1); 442 mid2 = SHR16(mid, 1);
420 El = MULT16_16(mid2, mid2) + side - 2*xp; 443 El = MULT16_16(mid2, mid2) + side - 2*xp;
421 Er = MULT16_16(mid2, mid2) + side + 2*xp; 444 Er = MULT16_16(mid2, mid2) + side + 2*xp;
422 if (Er < QCONST32(6e-4f, 28) || El < QCONST32(6e-4f, 28)) 445 if (Er < QCONST32(6e-4f, 28) || El < QCONST32(6e-4f, 28))
@@ -452,11 +475,10 @@ static void stereo_merge(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT
452 } 475 }
453} 476}
454 477
455#if 0
456/* Decide whether we should spread the pulses in the current frame */ 478/* Decide whether we should spread the pulses in the current frame */
457int spreading_decision(const CELTMode *m, const celt_norm *X, int *average, 479int spreading_decision(const CELTMode *m, const celt_norm *X, int *average,
458 int last_decision, int *hf_average, int *tapset_decision, int update_hf, 480 int last_decision, int *hf_average, int *tapset_decision, int update_hf,
459 int end, int C, int M) 481 int end, int C, int M, const int *spread_weight)
460{ 482{
461 int i, c, N0; 483 int i, c, N0;
462 int sum = 0, nbBands=0; 484 int sum = 0, nbBands=0;
@@ -497,8 +519,8 @@ int spreading_decision(const CELTMode *m, const celt_norm *X, int *average,
497 if (i>m->nbEBands-4) 519 if (i>m->nbEBands-4)
498 hf_sum += celt_udiv(32*(tcount[1]+tcount[0]), N); 520 hf_sum += celt_udiv(32*(tcount[1]+tcount[0]), N);
499 tmp = (2*tcount[2] >= N) + (2*tcount[1] >= N) + (2*tcount[0] >= N); 521 tmp = (2*tcount[2] >= N) + (2*tcount[1] >= N) + (2*tcount[0] >= N);
500 sum += tmp*256; 522 sum += tmp*spread_weight[i];
501 nbBands++; 523 nbBands+=spread_weight[i];
502 } 524 }
503 } while (++c<C); 525 } while (++c<C);
504 526
@@ -522,7 +544,7 @@ int spreading_decision(const CELTMode *m, const celt_norm *X, int *average,
522 /*printf("%d %d %d\n", hf_sum, *hf_average, *tapset_decision);*/ 544 /*printf("%d %d %d\n", hf_sum, *hf_average, *tapset_decision);*/
523 celt_assert(nbBands>0); /* end has to be non-zero */ 545 celt_assert(nbBands>0); /* end has to be non-zero */
524 celt_assert(sum>=0); 546 celt_assert(sum>=0);
525 sum = celt_udiv(sum, nbBands); 547 sum = celt_udiv((opus_int32)sum<<8, nbBands);
526 /* Recursive averaging */ 548 /* Recursive averaging */
527 sum = (sum+*average)>>1; 549 sum = (sum+*average)>>1;
528 *average = sum; 550 *average = sum;
@@ -546,7 +568,6 @@ int spreading_decision(const CELTMode *m, const celt_norm *X, int *average,
546#endif 568#endif
547 return decision; 569 return decision;
548} 570}
549#endif
550 571
551/* Indexing table for converting from natural Hadamard to ordery Hadamard 572/* Indexing table for converting from natural Hadamard to ordery Hadamard
552 This is essentially a bit-reversed Gray, on top of which we've added 573 This is essentially a bit-reversed Gray, on top of which we've added
@@ -651,6 +672,7 @@ static int compute_qn(int N, int b, int offset, int pulse_cap, int stereo)
651 672
652struct band_ctx { 673struct band_ctx {
653 int encode; 674 int encode;
675 int resynth;
654 const CELTMode *m; 676 const CELTMode *m;
655 int i; 677 int i;
656 int intensity; 678 int intensity;
@@ -660,6 +682,10 @@ struct band_ctx {
660 opus_int32 remaining_bits; 682 opus_int32 remaining_bits;
661 const celt_ener *bandE; 683 const celt_ener *bandE;
662 opus_uint32 seed; 684 opus_uint32 seed;
685 int arch;
686 int theta_round;
687 int disable_inv;
688 int avoid_split_noise;
663}; 689};
664 690
665struct split_ctx { 691struct split_ctx {
@@ -711,14 +737,41 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
711 side and mid. With just that parameter, we can re-scale both 737 side and mid. With just that parameter, we can re-scale both
712 mid and side because we know that 1) they have unit norm and 738 mid and side because we know that 1) they have unit norm and
713 2) they are orthogonal. */ 739 2) they are orthogonal. */
714 itheta = stereo_itheta(X, Y, stereo, N); 740 itheta = stereo_itheta(X, Y, stereo, N, ctx->arch);
715 } 741 }
716 tell = ec_tell_frac(ec); 742 tell = ec_tell_frac(ec);
717 if (qn!=1) 743 if (qn!=1)
718 { 744 {
719 if (encode) 745 if (encode)
720 itheta = (itheta*qn+8192)>>14; 746 {
721 747 if (!stereo || ctx->theta_round == 0)
748 {
749 itheta = (itheta*(opus_int32)qn+8192)>>14;
750 if (!stereo && ctx->avoid_split_noise && itheta > 0 && itheta < qn)
751 {
752 /* Check if the selected value of theta will cause the bit allocation
753 to inject noise on one side. If so, make sure the energy of that side
754 is zero. */
755 int unquantized = celt_udiv((opus_int32)itheta*16384, qn);
756 imid = bitexact_cos((opus_int16)unquantized);
757 iside = bitexact_cos((opus_int16)(16384-unquantized));
758 delta = FRAC_MUL16((N-1)<<7,bitexact_log2tan(iside,imid));
759 if (delta > *b)
760 itheta = qn;
761 else if (delta < -*b)
762 itheta = 0;
763 }
764 } else {
765 int down;
766 /* Bias quantization towards itheta=0 and itheta=16384. */
767 int bias = itheta > 8192 ? 32767/qn : -32767/qn;
768 down = IMIN(qn-1, IMAX(0, (itheta*(opus_int32)qn + bias)>>14));
769 if (ctx->theta_round < 0)
770 itheta = down;
771 else
772 itheta = down+1;
773 }
774 }
722 /* Entropy coding of the angle. We use a uniform pdf for the 775 /* Entropy coding of the angle. We use a uniform pdf for the
723 time split, a step for stereo, and a triangular one for the rest. */ 776 time split, a step for stereo, and a triangular one for the rest. */
724 if (stereo && N>2) 777 if (stereo && N>2)
@@ -796,7 +849,7 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
796 } else if (stereo) { 849 } else if (stereo) {
797 if (encode) 850 if (encode)
798 { 851 {
799 inv = itheta > 8192; 852 inv = itheta > 8192 && !ctx->disable_inv;
800 if (inv) 853 if (inv)
801 { 854 {
802 int j; 855 int j;
@@ -813,6 +866,9 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
813 inv = ec_dec_bit_logp(ec, 2); 866 inv = ec_dec_bit_logp(ec, 2);
814 } else 867 } else
815 inv = 0; 868 inv = 0;
869 /* inv flag override to avoid problems with downmixing. */
870 if (ctx->disable_inv)
871 inv = 0;
816 itheta = 0; 872 itheta = 0;
817 } 873 }
818 qalloc = ec_tell_frac(ec) - tell; 874 qalloc = ec_tell_frac(ec) - tell;
@@ -848,11 +904,6 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
848static unsigned quant_band_n1(struct band_ctx *ctx, celt_norm *X, celt_norm *Y, int b, 904static unsigned quant_band_n1(struct band_ctx *ctx, celt_norm *X, celt_norm *Y, int b,
849 celt_norm *lowband_out) 905 celt_norm *lowband_out)
850{ 906{
851#ifdef RESYNTH
852 int resynth = 1;
853#else
854 int resynth = !ctx->encode;
855#endif
856 int c; 907 int c;
857 int stereo; 908 int stereo;
858 celt_norm *x = X; 909 celt_norm *x = X;
@@ -877,7 +928,7 @@ static unsigned quant_band_n1(struct band_ctx *ctx, celt_norm *X, celt_norm *Y,
877 ctx->remaining_bits -= 1<<BITRES; 928 ctx->remaining_bits -= 1<<BITRES;
878 b-=1<<BITRES; 929 b-=1<<BITRES;
879 } 930 }
880 if (resynth) 931 if (ctx->resynth)
881 x[0] = sign ? -NORM_SCALING : NORM_SCALING; 932 x[0] = sign ? -NORM_SCALING : NORM_SCALING;
882 x = Y; 933 x = Y;
883 } while (++c<1+stereo); 934 } while (++c<1+stereo);
@@ -902,11 +953,6 @@ static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
902 int B0=B; 953 int B0=B;
903 opus_val16 mid=0, side=0; 954 opus_val16 mid=0, side=0;
904 unsigned cm=0; 955 unsigned cm=0;
905#ifdef RESYNTH
906 int resynth = 1;
907#else
908 int resynth = !ctx->encode;
909#endif
910 celt_norm *Y=NULL; 956 celt_norm *Y=NULL;
911 int encode; 957 int encode;
912 const CELTMode *m; 958 const CELTMode *m;
@@ -938,8 +984,7 @@ static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
938 fill = (fill&1)|(fill<<1); 984 fill = (fill&1)|(fill<<1);
939 B = (B+1)>>1; 985 B = (B+1)>>1;
940 986
941 compute_theta(ctx, &sctx, X, Y, N, &b, B, B0, 987 compute_theta(ctx, &sctx, X, Y, N, &b, B, B0, LM, 0, &fill);
942 LM, 0, &fill);
943 imid = sctx.imid; 988 imid = sctx.imid;
944 iside = sctx.iside; 989 iside = sctx.iside;
945 delta = sctx.delta; 990 delta = sctx.delta;
@@ -973,24 +1018,20 @@ static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
973 rebalance = ctx->remaining_bits; 1018 rebalance = ctx->remaining_bits;
974 if (mbits >= sbits) 1019 if (mbits >= sbits)
975 { 1020 {
976 cm = quant_partition(ctx, X, N, mbits, B, 1021 cm = quant_partition(ctx, X, N, mbits, B, lowband, LM,
977 lowband, LM,
978 MULT16_16_P15(gain,mid), fill); 1022 MULT16_16_P15(gain,mid), fill);
979 rebalance = mbits - (rebalance-ctx->remaining_bits); 1023 rebalance = mbits - (rebalance-ctx->remaining_bits);
980 if (rebalance > 3<<BITRES && itheta!=0) 1024 if (rebalance > 3<<BITRES && itheta!=0)
981 sbits += rebalance - (3<<BITRES); 1025 sbits += rebalance - (3<<BITRES);
982 cm |= quant_partition(ctx, Y, N, sbits, B, 1026 cm |= quant_partition(ctx, Y, N, sbits, B, next_lowband2, LM,
983 next_lowband2, LM,
984 MULT16_16_P15(gain,side), fill>>B)<<(B0>>1); 1027 MULT16_16_P15(gain,side), fill>>B)<<(B0>>1);
985 } else { 1028 } else {
986 cm = quant_partition(ctx, Y, N, sbits, B, 1029 cm = quant_partition(ctx, Y, N, sbits, B, next_lowband2, LM,
987 next_lowband2, LM,
988 MULT16_16_P15(gain,side), fill>>B)<<(B0>>1); 1030 MULT16_16_P15(gain,side), fill>>B)<<(B0>>1);
989 rebalance = sbits - (rebalance-ctx->remaining_bits); 1031 rebalance = sbits - (rebalance-ctx->remaining_bits);
990 if (rebalance > 3<<BITRES && itheta!=16384) 1032 if (rebalance > 3<<BITRES && itheta!=16384)
991 mbits += rebalance - (3<<BITRES); 1033 mbits += rebalance - (3<<BITRES);
992 cm |= quant_partition(ctx, X, N, mbits, B, 1034 cm |= quant_partition(ctx, X, N, mbits, B, lowband, LM,
993 lowband, LM,
994 MULT16_16_P15(gain,mid), fill); 1035 MULT16_16_P15(gain,mid), fill);
995 } 1036 }
996 } else { 1037 } else {
@@ -1015,18 +1056,14 @@ static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
1015 /* Finally do the actual quantization */ 1056 /* Finally do the actual quantization */
1016 if (encode) 1057 if (encode)
1017 { 1058 {
1018 cm = alg_quant(X, N, K, spread, B, ec 1059 cm = alg_quant(X, N, K, spread, B, ec, gain, ctx->resynth, ctx->arch);
1019#ifdef RESYNTH
1020 , gain
1021#endif
1022 );
1023 } else { 1060 } else {
1024 cm = alg_unquant(X, N, K, spread, B, ec, gain); 1061 cm = alg_unquant(X, N, K, spread, B, ec, gain);
1025 } 1062 }
1026 } else { 1063 } else {
1027 /* If there's no pulse, fill the band anyway */ 1064 /* If there's no pulse, fill the band anyway */
1028 int j; 1065 int j;
1029 if (resynth) 1066 if (ctx->resynth)
1030 { 1067 {
1031 unsigned cm_mask; 1068 unsigned cm_mask;
1032 /* B can be as large as 16, so this shift might overflow an int on a 1069 /* B can be as large as 16, so this shift might overflow an int on a
@@ -1059,7 +1096,7 @@ static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
1059 } 1096 }
1060 cm = fill; 1097 cm = fill;
1061 } 1098 }
1062 renormalise_vector(X, N, gain); 1099 renormalise_vector(X, N, gain, ctx->arch);
1063 } 1100 }
1064 } 1101 }
1065 } 1102 }
@@ -1083,11 +1120,6 @@ static unsigned quant_band(struct band_ctx *ctx, celt_norm *X,
1083 int recombine=0; 1120 int recombine=0;
1084 int longBlocks; 1121 int longBlocks;
1085 unsigned cm=0; 1122 unsigned cm=0;
1086#ifdef RESYNTH
1087 int resynth = 1;
1088#else
1089 int resynth = !ctx->encode;
1090#endif
1091 int k; 1123 int k;
1092 int encode; 1124 int encode;
1093 int tf_change; 1125 int tf_change;
@@ -1154,11 +1186,10 @@ static unsigned quant_band(struct band_ctx *ctx, celt_norm *X,
1154 deinterleave_hadamard(lowband, N_B>>recombine, B0<<recombine, longBlocks); 1186 deinterleave_hadamard(lowband, N_B>>recombine, B0<<recombine, longBlocks);
1155 } 1187 }
1156 1188
1157 cm = quant_partition(ctx, X, N, b, B, lowband, 1189 cm = quant_partition(ctx, X, N, b, B, lowband, LM, gain, fill);
1158 LM, gain, fill);
1159 1190
1160 /* This code is used by the decoder and by the resynthesis-enabled encoder */ 1191 /* This code is used by the decoder and by the resynthesis-enabled encoder */
1161 if (resynth) 1192 if (ctx->resynth)
1162 { 1193 {
1163 /* Undo the sample reorganization going from time order to frequency order */ 1194 /* Undo the sample reorganization going from time order to frequency order */
1164 if (B0>1) 1195 if (B0>1)
@@ -1211,11 +1242,6 @@ static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm
1211 int inv = 0; 1242 int inv = 0;
1212 opus_val16 mid=0, side=0; 1243 opus_val16 mid=0, side=0;
1213 unsigned cm=0; 1244 unsigned cm=0;
1214#ifdef RESYNTH
1215 int resynth = 1;
1216#else
1217 int resynth = !ctx->encode;
1218#endif
1219 int mbits, sbits, delta; 1245 int mbits, sbits, delta;
1220 int itheta; 1246 int itheta;
1221 int qalloc; 1247 int qalloc;
@@ -1235,8 +1261,7 @@ static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm
1235 1261
1236 orig_fill = fill; 1262 orig_fill = fill;
1237 1263
1238 compute_theta(ctx, &sctx, X, Y, N, &b, B, B, 1264 compute_theta(ctx, &sctx, X, Y, N, &b, B, B, LM, 1, &fill);
1239 LM, 1, &fill);
1240 inv = sctx.inv; 1265 inv = sctx.inv;
1241 imid = sctx.imid; 1266 imid = sctx.imid;
1242 iside = sctx.iside; 1267 iside = sctx.iside;
@@ -1284,13 +1309,13 @@ static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm
1284 sign = 1-2*sign; 1309 sign = 1-2*sign;
1285 /* We use orig_fill here because we want to fold the side, but if 1310 /* We use orig_fill here because we want to fold the side, but if
1286 itheta==16384, we'll have cleared the low bits of fill. */ 1311 itheta==16384, we'll have cleared the low bits of fill. */
1287 cm = quant_band(ctx, x2, N, mbits, B, lowband, 1312 cm = quant_band(ctx, x2, N, mbits, B, lowband, LM, lowband_out, Q15ONE,
1288 LM, lowband_out, Q15ONE, lowband_scratch, orig_fill); 1313 lowband_scratch, orig_fill);
1289 /* We don't split N=2 bands, so cm is either 1 or 0 (for a fold-collapse), 1314 /* We don't split N=2 bands, so cm is either 1 or 0 (for a fold-collapse),
1290 and there's no need to worry about mixing with the other channel. */ 1315 and there's no need to worry about mixing with the other channel. */
1291 y2[0] = -sign*x2[1]; 1316 y2[0] = -sign*x2[1];
1292 y2[1] = sign*x2[0]; 1317 y2[1] = sign*x2[0];
1293 if (resynth) 1318 if (ctx->resynth)
1294 { 1319 {
1295 celt_norm tmp; 1320 celt_norm tmp;
1296 X[0] = MULT16_16_Q15(mid, X[0]); 1321 X[0] = MULT16_16_Q15(mid, X[0]);
@@ -1317,41 +1342,35 @@ static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm
1317 { 1342 {
1318 /* In stereo mode, we do not apply a scaling to the mid because we need the normalized 1343 /* In stereo mode, we do not apply a scaling to the mid because we need the normalized
1319 mid for folding later. */ 1344 mid for folding later. */
1320 cm = quant_band(ctx, X, N, mbits, B, 1345 cm = quant_band(ctx, X, N, mbits, B, lowband, LM, lowband_out, Q15ONE,
1321 lowband, LM, lowband_out, 1346 lowband_scratch, fill);
1322 Q15ONE, lowband_scratch, fill);
1323 rebalance = mbits - (rebalance-ctx->remaining_bits); 1347 rebalance = mbits - (rebalance-ctx->remaining_bits);
1324 if (rebalance > 3<<BITRES && itheta!=0) 1348 if (rebalance > 3<<BITRES && itheta!=0)
1325 sbits += rebalance - (3<<BITRES); 1349 sbits += rebalance - (3<<BITRES);
1326 1350
1327 /* For a stereo split, the high bits of fill are always zero, so no 1351 /* For a stereo split, the high bits of fill are always zero, so no
1328 folding will be done to the side. */ 1352 folding will be done to the side. */
1329 cm |= quant_band(ctx, Y, N, sbits, B, 1353 cm |= quant_band(ctx, Y, N, sbits, B, NULL, LM, NULL, side, NULL, fill>>B);
1330 NULL, LM, NULL,
1331 side, NULL, fill>>B);
1332 } else { 1354 } else {
1333 /* For a stereo split, the high bits of fill are always zero, so no 1355 /* For a stereo split, the high bits of fill are always zero, so no
1334 folding will be done to the side. */ 1356 folding will be done to the side. */
1335 cm = quant_band(ctx, Y, N, sbits, B, 1357 cm = quant_band(ctx, Y, N, sbits, B, NULL, LM, NULL, side, NULL, fill>>B);
1336 NULL, LM, NULL,
1337 side, NULL, fill>>B);
1338 rebalance = sbits - (rebalance-ctx->remaining_bits); 1358 rebalance = sbits - (rebalance-ctx->remaining_bits);
1339 if (rebalance > 3<<BITRES && itheta!=16384) 1359 if (rebalance > 3<<BITRES && itheta!=16384)
1340 mbits += rebalance - (3<<BITRES); 1360 mbits += rebalance - (3<<BITRES);
1341 /* In stereo mode, we do not apply a scaling to the mid because we need the normalized 1361 /* In stereo mode, we do not apply a scaling to the mid because we need the normalized
1342 mid for folding later. */ 1362 mid for folding later. */
1343 cm |= quant_band(ctx, X, N, mbits, B, 1363 cm |= quant_band(ctx, X, N, mbits, B, lowband, LM, lowband_out, Q15ONE,
1344 lowband, LM, lowband_out, 1364 lowband_scratch, fill);
1345 Q15ONE, lowband_scratch, fill);
1346 } 1365 }
1347 } 1366 }
1348 1367
1349 1368
1350 /* This code is used by the decoder and by the resynthesis-enabled encoder */ 1369 /* This code is used by the decoder and by the resynthesis-enabled encoder */
1351 if (resynth) 1370 if (ctx->resynth)
1352 { 1371 {
1353 if (N!=2) 1372 if (N!=2)
1354 stereo_merge(X, Y, mid, N); 1373 stereo_merge(X, Y, mid, N, ctx->arch);
1355 if (inv) 1374 if (inv)
1356 { 1375 {
1357 int j; 1376 int j;
@@ -1362,17 +1381,38 @@ static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm
1362 return cm; 1381 return cm;
1363} 1382}
1364 1383
1384static void special_hybrid_folding(const CELTMode *m, celt_norm *norm, celt_norm *norm2, int start, int M, int dual_stereo)
1385{
1386 int n1, n2;
1387 const opus_int16 * OPUS_RESTRICT eBands = m->eBands;
1388 n1 = M*(eBands[start+1]-eBands[start]);
1389 n2 = M*(eBands[start+2]-eBands[start+1]);
1390 /* Duplicate enough of the first band folding data to be able to fold the second band.
1391 Copies no data for CELT-only mode. */
1392 OPUS_COPY(&norm[n1], &norm[2*n1 - n2], n2-n1);
1393 if (dual_stereo)
1394 OPUS_COPY(&norm2[n1], &norm2[2*n1 - n2], n2-n1);
1395}
1365 1396
1366void quant_all_bands(int encode, const CELTMode *m, int start, int end, 1397void quant_all_bands(int encode, const CELTMode *m, int start, int end,
1367 celt_norm *X_, celt_norm *Y_, unsigned char *collapse_masks, const celt_ener *bandE, int *pulses, 1398 celt_norm *X_, celt_norm *Y_, unsigned char *collapse_masks,
1368 int shortBlocks, int spread, int dual_stereo, int intensity, int *tf_res, 1399 const celt_ener *bandE, int *pulses, int shortBlocks, int spread,
1369 opus_int32 total_bits, opus_int32 balance, ec_ctx *ec, int LM, int codedBands, opus_uint32 *seed) 1400 int dual_stereo, int intensity, int *tf_res, opus_int32 total_bits,
1401 opus_int32 balance, ec_ctx *ec, int LM, int codedBands,
1402 opus_uint32 *seed, int complexity, int arch, int disable_inv)
1370{ 1403{
1371 int i; 1404 int i;
1372 opus_int32 remaining_bits; 1405 opus_int32 remaining_bits;
1373 const opus_int16 * OPUS_RESTRICT eBands = m->eBands; 1406 const opus_int16 * OPUS_RESTRICT eBands = m->eBands;
1374 celt_norm * OPUS_RESTRICT norm, * OPUS_RESTRICT norm2; 1407 celt_norm * OPUS_RESTRICT norm, * OPUS_RESTRICT norm2;
1375 VARDECL(celt_norm, _norm); 1408 VARDECL(celt_norm, _norm);
1409 VARDECL(celt_norm, _lowband_scratch);
1410 VARDECL(celt_norm, X_save);
1411 VARDECL(celt_norm, Y_save);
1412 VARDECL(celt_norm, X_save2);
1413 VARDECL(celt_norm, Y_save2);
1414 VARDECL(celt_norm, norm_save2);
1415 int resynth_alloc;
1376 celt_norm *lowband_scratch; 1416 celt_norm *lowband_scratch;
1377 int B; 1417 int B;
1378 int M; 1418 int M;
@@ -1380,10 +1420,11 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
1380 int update_lowband = 1; 1420 int update_lowband = 1;
1381 int C = Y_ != NULL ? 2 : 1; 1421 int C = Y_ != NULL ? 2 : 1;
1382 int norm_offset; 1422 int norm_offset;
1423 int theta_rdo = encode && Y_!=NULL && !dual_stereo && complexity>=8;
1383#ifdef RESYNTH 1424#ifdef RESYNTH
1384 int resynth = 1; 1425 int resynth = 1;
1385#else 1426#else
1386 int resynth = !encode; 1427 int resynth = !encode || theta_rdo;
1387#endif 1428#endif
1388 struct band_ctx ctx; 1429 struct band_ctx ctx;
1389 SAVE_STACK; 1430 SAVE_STACK;
@@ -1396,9 +1437,24 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
1396 ALLOC(_norm, C*(M*eBands[m->nbEBands-1]-norm_offset), celt_norm); 1437 ALLOC(_norm, C*(M*eBands[m->nbEBands-1]-norm_offset), celt_norm);
1397 norm = _norm; 1438 norm = _norm;
1398 norm2 = norm + M*eBands[m->nbEBands-1]-norm_offset; 1439 norm2 = norm + M*eBands[m->nbEBands-1]-norm_offset;
1399 /* We can use the last band as scratch space because we don't need that 1440
1400 scratch space for the last band. */ 1441 /* For decoding, we can use the last band as scratch space because we don't need that
1401 lowband_scratch = X_+M*eBands[m->nbEBands-1]; 1442 scratch space for the last band and we don't care about the data there until we're
1443 decoding the last band. */
1444 if (encode && resynth)
1445 resynth_alloc = M*(eBands[m->nbEBands]-eBands[m->nbEBands-1]);
1446 else
1447 resynth_alloc = ALLOC_NONE;
1448 ALLOC(_lowband_scratch, resynth_alloc, celt_norm);
1449 if (encode && resynth)
1450 lowband_scratch = _lowband_scratch;
1451 else
1452 lowband_scratch = X_+M*eBands[m->nbEBands-1];
1453 ALLOC(X_save, resynth_alloc, celt_norm);
1454 ALLOC(Y_save, resynth_alloc, celt_norm);
1455 ALLOC(X_save2, resynth_alloc, celt_norm);
1456 ALLOC(Y_save2, resynth_alloc, celt_norm);
1457 ALLOC(norm_save2, resynth_alloc, celt_norm);
1402 1458
1403 lowband_offset = 0; 1459 lowband_offset = 0;
1404 ctx.bandE = bandE; 1460 ctx.bandE = bandE;
@@ -1408,6 +1464,12 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
1408 ctx.m = m; 1464 ctx.m = m;
1409 ctx.seed = *seed; 1465 ctx.seed = *seed;
1410 ctx.spread = spread; 1466 ctx.spread = spread;
1467 ctx.arch = arch;
1468 ctx.disable_inv = disable_inv;
1469 ctx.resynth = resynth;
1470 ctx.theta_round = 0;
1471 /* Avoid injecting noise in the first band on transients. */
1472 ctx.avoid_split_noise = B > 1;
1411 for (i=start;i<end;i++) 1473 for (i=start;i<end;i++)
1412 { 1474 {
1413 opus_int32 tell; 1475 opus_int32 tell;
@@ -1430,6 +1492,7 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
1430 else 1492 else
1431 Y = NULL; 1493 Y = NULL;
1432 N = M*eBands[i+1]-M*eBands[i]; 1494 N = M*eBands[i+1]-M*eBands[i];
1495 celt_assert(N > 0);
1433 tell = ec_tell_frac(ec); 1496 tell = ec_tell_frac(ec);
1434 1497
1435 /* Compute how many bits we want to allocate to this band */ 1498 /* Compute how many bits we want to allocate to this band */
@@ -1445,8 +1508,15 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
1445 b = 0; 1508 b = 0;
1446 } 1509 }
1447 1510
1511#ifndef DISABLE_UPDATE_DRAFT
1512 if (resynth && (M*eBands[i]-N >= M*eBands[start] || i==start+1) && (update_lowband || lowband_offset==0))
1513 lowband_offset = i;
1514 if (i == start+1)
1515 special_hybrid_folding(m, norm, norm2, start, M, dual_stereo);
1516#else
1448 if (resynth && M*eBands[i]-N >= M*eBands[start] && (update_lowband || lowband_offset==0)) 1517 if (resynth && M*eBands[i]-N >= M*eBands[start] && (update_lowband || lowband_offset==0))
1449 lowband_offset = i; 1518 lowband_offset = i;
1519#endif
1450 1520
1451 tf_change = tf_res[i]; 1521 tf_change = tf_res[i];
1452 ctx.tf_change = tf_change; 1522 ctx.tf_change = tf_change;
@@ -1457,7 +1527,7 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
1457 Y = norm; 1527 Y = norm;
1458 lowband_scratch = NULL; 1528 lowband_scratch = NULL;
1459 } 1529 }
1460 if (i==end-1) 1530 if (last && !theta_rdo)
1461 lowband_scratch = NULL; 1531 lowband_scratch = NULL;
1462 1532
1463 /* Get a conservative estimate of the collapse_mask's for the bands we're 1533 /* Get a conservative estimate of the collapse_mask's for the bands we're
@@ -1472,7 +1542,11 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
1472 fold_start = lowband_offset; 1542 fold_start = lowband_offset;
1473 while(M*eBands[--fold_start] > effective_lowband+norm_offset); 1543 while(M*eBands[--fold_start] > effective_lowband+norm_offset);
1474 fold_end = lowband_offset-1; 1544 fold_end = lowband_offset-1;
1545#ifndef DISABLE_UPDATE_DRAFT
1546 while(++fold_end < i && M*eBands[fold_end] < effective_lowband+norm_offset+N);
1547#else
1475 while(M*eBands[++fold_end] < effective_lowband+norm_offset+N); 1548 while(M*eBands[++fold_end] < effective_lowband+norm_offset+N);
1549#endif
1476 x_cm = y_cm = 0; 1550 x_cm = y_cm = 0;
1477 fold_i = fold_start; do { 1551 fold_i = fold_start; do {
1478 x_cm |= collapse_masks[fold_i*C+0]; 1552 x_cm |= collapse_masks[fold_i*C+0];
@@ -1505,13 +1579,79 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
1505 } else { 1579 } else {
1506 if (Y!=NULL) 1580 if (Y!=NULL)
1507 { 1581 {
1508 x_cm = quant_band_stereo(&ctx, X, Y, N, b, B, 1582 if (theta_rdo && i < intensity)
1509 effective_lowband != -1 ? norm+effective_lowband : NULL, LM, 1583 {
1510 last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, x_cm|y_cm); 1584 ec_ctx ec_save, ec_save2;
1585 struct band_ctx ctx_save, ctx_save2;
1586 opus_val32 dist0, dist1;
1587 unsigned cm, cm2;
1588 int nstart_bytes, nend_bytes, save_bytes;
1589 unsigned char *bytes_buf;
1590 unsigned char bytes_save[1275];
1591 opus_val16 w[2];
1592 compute_channel_weights(bandE[i], bandE[i+m->nbEBands], w);
1593 /* Make a copy. */
1594 cm = x_cm|y_cm;
1595 ec_save = *ec;
1596 ctx_save = ctx;
1597 OPUS_COPY(X_save, X, N);
1598 OPUS_COPY(Y_save, Y, N);
1599 /* Encode and round down. */
1600 ctx.theta_round = -1;
1601 x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
1602 effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
1603 last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, cm);
1604 dist0 = MULT16_32_Q15(w[0], celt_inner_prod(X_save, X, N, arch)) + MULT16_32_Q15(w[1], celt_inner_prod(Y_save, Y, N, arch));
1605
1606 /* Save first result. */
1607 cm2 = x_cm;
1608 ec_save2 = *ec;
1609 ctx_save2 = ctx;
1610 OPUS_COPY(X_save2, X, N);
1611 OPUS_COPY(Y_save2, Y, N);
1612 if (!last)
1613 OPUS_COPY(norm_save2, norm+M*eBands[i]-norm_offset, N);
1614 nstart_bytes = ec_save.offs;
1615 nend_bytes = ec_save.storage;
1616 bytes_buf = ec_save.buf+nstart_bytes;
1617 save_bytes = nend_bytes-nstart_bytes;
1618 OPUS_COPY(bytes_save, bytes_buf, save_bytes);
1619
1620 /* Restore */
1621 *ec = ec_save;
1622 ctx = ctx_save;
1623 OPUS_COPY(X, X_save, N);
1624 OPUS_COPY(Y, Y_save, N);
1625#ifndef DISABLE_UPDATE_DRAFT
1626 if (i == start+1)
1627 special_hybrid_folding(m, norm, norm2, start, M, dual_stereo);
1628#endif
1629 /* Encode and round up. */
1630 ctx.theta_round = 1;
1631 x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
1632 effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
1633 last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, cm);
1634 dist1 = MULT16_32_Q15(w[0], celt_inner_prod(X_save, X, N, arch)) + MULT16_32_Q15(w[1], celt_inner_prod(Y_save, Y, N, arch));
1635 if (dist0 >= dist1) {
1636 x_cm = cm2;
1637 *ec = ec_save2;
1638 ctx = ctx_save2;
1639 OPUS_COPY(X, X_save2, N);
1640 OPUS_COPY(Y, Y_save2, N);
1641 if (!last)
1642 OPUS_COPY(norm+M*eBands[i]-norm_offset, norm_save2, N);
1643 OPUS_COPY(bytes_buf, bytes_save, save_bytes);
1644 }
1645 } else {
1646 ctx.theta_round = 0;
1647 x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
1648 effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
1649 last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, x_cm|y_cm);
1650 }
1511 } else { 1651 } else {
1512 x_cm = quant_band(&ctx, X, N, b, B, 1652 x_cm = quant_band(&ctx, X, N, b, B,
1513 effective_lowband != -1 ? norm+effective_lowband : NULL, LM, 1653 effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
1514 last?NULL:norm+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, x_cm|y_cm); 1654 last?NULL:norm+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, x_cm|y_cm);
1515 } 1655 }
1516 y_cm = x_cm; 1656 y_cm = x_cm;
1517 } 1657 }
@@ -1521,6 +1661,9 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
1521 1661
1522 /* Update the folding position only as long as we have 1 bit/sample depth. */ 1662 /* Update the folding position only as long as we have 1 bit/sample depth. */
1523 update_lowband = b>(N<<BITRES); 1663 update_lowband = b>(N<<BITRES);
1664 /* We only need to avoid noise on a split for the first band. After that, we
1665 have folding. */
1666 ctx.avoid_split_noise = 0;
1524 } 1667 }
1525 *seed = ctx.seed; 1668 *seed = ctx.seed;
1526 1669
diff --git a/lib/rbcodec/codecs/libopus/celt/bands.h b/lib/rbcodec/codecs/libopus/celt/bands.h
index 69901b1e33..422b32cf75 100644
--- a/lib/rbcodec/codecs/libopus/celt/bands.h
+++ b/lib/rbcodec/codecs/libopus/celt/bands.h
@@ -36,12 +36,15 @@
36#include "entdec.h" 36#include "entdec.h"
37#include "rate.h" 37#include "rate.h"
38 38
39opus_int16 bitexact_cos(opus_int16 x);
40int bitexact_log2tan(int isin,int icos);
41
39/** Compute the amplitude (sqrt energy) in each of the bands 42/** Compute the amplitude (sqrt energy) in each of the bands
40 * @param m Mode data 43 * @param m Mode data
41 * @param X Spectrum 44 * @param X Spectrum
42 * @param bandE Square root of the energy for each band (returned) 45 * @param bandE Square root of the energy for each band (returned)
43 */ 46 */
44void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM); 47void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM, int arch);
45 48
46/*void compute_noise_energies(const CELTMode *m, const celt_sig *X, const opus_val16 *tonality, celt_ener *bandE);*/ 49/*void compute_noise_energies(const CELTMode *m, const celt_sig *X, const opus_val16 *tonality, celt_ener *bandE);*/
47 50
@@ -69,7 +72,7 @@ void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
69 72
70int spreading_decision(const CELTMode *m, const celt_norm *X, int *average, 73int spreading_decision(const CELTMode *m, const celt_norm *X, int *average,
71 int last_decision, int *hf_average, int *tapset_decision, int update_hf, 74 int last_decision, int *hf_average, int *tapset_decision, int update_hf,
72 int end, int C, int M); 75 int end, int C, int M, const int *spread_weight);
73 76
74#ifdef MEASURE_NORM_MSE 77#ifdef MEASURE_NORM_MSE
75void measure_norm_mse(const CELTMode *m, float *X, float *X0, float *bandE, float *bandE0, int M, int N, int C); 78void measure_norm_mse(const CELTMode *m, float *X, float *X0, float *bandE, float *bandE0, int M, int N, int C);
@@ -98,15 +101,20 @@ void haar1(celt_norm *X, int N0, int stride);
98 * @param LM log2() of the number of 2.5 subframes in the frame 101 * @param LM log2() of the number of 2.5 subframes in the frame
99 * @param codedBands Last band to receive bits + 1 102 * @param codedBands Last band to receive bits + 1
100 * @param seed Random generator seed 103 * @param seed Random generator seed
104 * @param arch Run-time architecture (see opus_select_arch())
101 */ 105 */
102void quant_all_bands(int encode, const CELTMode *m, int start, int end, 106void quant_all_bands(int encode, const CELTMode *m, int start, int end,
103 celt_norm * X, celt_norm * Y, unsigned char *collapse_masks, const celt_ener *bandE, int *pulses, 107 celt_norm * X, celt_norm * Y, unsigned char *collapse_masks,
104 int shortBlocks, int spread, int dual_stereo, int intensity, int *tf_res, 108 const celt_ener *bandE, int *pulses, int shortBlocks, int spread,
105 opus_int32 total_bits, opus_int32 balance, ec_ctx *ec, int M, int codedBands, opus_uint32 *seed); 109 int dual_stereo, int intensity, int *tf_res, opus_int32 total_bits,
106 110 opus_int32 balance, ec_ctx *ec, int M, int codedBands, opus_uint32 *seed,
107void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int size, 111 int complexity, int arch, int disable_inv);
108 int start, int end, const opus_val16 *logE, const opus_val16 *prev1logE, 112
109 const opus_val16 *prev2logE, const int *pulses, opus_uint32 seed); 113void anti_collapse(const CELTMode *m, celt_norm *X_,
114 unsigned char *collapse_masks, int LM, int C, int size, int start,
115 int end, const opus_val16 *logE, const opus_val16 *prev1logE,
116 const opus_val16 *prev2logE, const int *pulses, opus_uint32 seed,
117 int arch);
110 118
111opus_uint32 celt_lcg_rand(opus_uint32 seed); 119opus_uint32 celt_lcg_rand(opus_uint32 seed);
112 120
diff --git a/lib/rbcodec/codecs/libopus/celt/celt.c b/lib/rbcodec/codecs/libopus/celt/celt.c
index c0a1e0dab9..9ce234695c 100644
--- a/lib/rbcodec/codecs/libopus/celt/celt.c
+++ b/lib/rbcodec/codecs/libopus/celt/celt.c
@@ -89,10 +89,13 @@ int resampling_factor(opus_int32 rate)
89 return ret; 89 return ret;
90} 90}
91 91
92#ifndef OVERRIDE_COMB_FILTER_CONST 92#if !defined(OVERRIDE_COMB_FILTER_CONST) || defined(NON_STATIC_COMB_FILTER_CONST_C)
93/* This version should be faster on ARM */ 93/* This version should be faster on ARM */
94#ifdef OPUS_ARM_ASM 94#ifdef OPUS_ARM_ASM
95static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N, 95#ifndef NON_STATIC_COMB_FILTER_CONST_C
96static
97#endif
98void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
96 opus_val16 g10, opus_val16 g11, opus_val16 g12) 99 opus_val16 g10, opus_val16 g11, opus_val16 g12)
97{ 100{
98 opus_val32 x0, x1, x2, x3, x4; 101 opus_val32 x0, x1, x2, x3, x4;
@@ -108,26 +111,31 @@ static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N,
108 t = MAC16_32_Q16(x[i], g10, x2); 111 t = MAC16_32_Q16(x[i], g10, x2);
109 t = MAC16_32_Q16(t, g11, ADD32(x1,x3)); 112 t = MAC16_32_Q16(t, g11, ADD32(x1,x3));
110 t = MAC16_32_Q16(t, g12, ADD32(x0,x4)); 113 t = MAC16_32_Q16(t, g12, ADD32(x0,x4));
114 t = SATURATE(t, SIG_SAT);
111 y[i] = t; 115 y[i] = t;
112 x4=SHL32(x[i-T+3],1); 116 x4=SHL32(x[i-T+3],1);
113 t = MAC16_32_Q16(x[i+1], g10, x1); 117 t = MAC16_32_Q16(x[i+1], g10, x1);
114 t = MAC16_32_Q16(t, g11, ADD32(x0,x2)); 118 t = MAC16_32_Q16(t, g11, ADD32(x0,x2));
115 t = MAC16_32_Q16(t, g12, ADD32(x4,x3)); 119 t = MAC16_32_Q16(t, g12, ADD32(x4,x3));
120 t = SATURATE(t, SIG_SAT);
116 y[i+1] = t; 121 y[i+1] = t;
117 x3=SHL32(x[i-T+4],1); 122 x3=SHL32(x[i-T+4],1);
118 t = MAC16_32_Q16(x[i+2], g10, x0); 123 t = MAC16_32_Q16(x[i+2], g10, x0);
119 t = MAC16_32_Q16(t, g11, ADD32(x4,x1)); 124 t = MAC16_32_Q16(t, g11, ADD32(x4,x1));
120 t = MAC16_32_Q16(t, g12, ADD32(x3,x2)); 125 t = MAC16_32_Q16(t, g12, ADD32(x3,x2));
126 t = SATURATE(t, SIG_SAT);
121 y[i+2] = t; 127 y[i+2] = t;
122 x2=SHL32(x[i-T+5],1); 128 x2=SHL32(x[i-T+5],1);
123 t = MAC16_32_Q16(x[i+3], g10, x4); 129 t = MAC16_32_Q16(x[i+3], g10, x4);
124 t = MAC16_32_Q16(t, g11, ADD32(x3,x0)); 130 t = MAC16_32_Q16(t, g11, ADD32(x3,x0));
125 t = MAC16_32_Q16(t, g12, ADD32(x2,x1)); 131 t = MAC16_32_Q16(t, g12, ADD32(x2,x1));
132 t = SATURATE(t, SIG_SAT);
126 y[i+3] = t; 133 y[i+3] = t;
127 x1=SHL32(x[i-T+6],1); 134 x1=SHL32(x[i-T+6],1);
128 t = MAC16_32_Q16(x[i+4], g10, x3); 135 t = MAC16_32_Q16(x[i+4], g10, x3);
129 t = MAC16_32_Q16(t, g11, ADD32(x2,x4)); 136 t = MAC16_32_Q16(t, g11, ADD32(x2,x4));
130 t = MAC16_32_Q16(t, g12, ADD32(x1,x0)); 137 t = MAC16_32_Q16(t, g12, ADD32(x1,x0));
138 t = SATURATE(t, SIG_SAT);
131 y[i+4] = t; 139 y[i+4] = t;
132 } 140 }
133#ifdef CUSTOM_MODES 141#ifdef CUSTOM_MODES
@@ -138,6 +146,7 @@ static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N,
138 t = MAC16_32_Q16(x[i], g10, x2); 146 t = MAC16_32_Q16(x[i], g10, x2);
139 t = MAC16_32_Q16(t, g11, ADD32(x1,x3)); 147 t = MAC16_32_Q16(t, g11, ADD32(x1,x3));
140 t = MAC16_32_Q16(t, g12, ADD32(x0,x4)); 148 t = MAC16_32_Q16(t, g12, ADD32(x0,x4));
149 t = SATURATE(t, SIG_SAT);
141 y[i] = t; 150 y[i] = t;
142 x4=x3; 151 x4=x3;
143 x3=x2; 152 x3=x2;
@@ -147,7 +156,10 @@ static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N,
147#endif 156#endif
148} 157}
149#else 158#else
150static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N, 159#ifndef NON_STATIC_COMB_FILTER_CONST_C
160static
161#endif
162void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
151 opus_val16 g10, opus_val16 g11, opus_val16 g12) 163 opus_val16 g10, opus_val16 g11, opus_val16 g12)
152{ 164{
153 opus_val32 x0, x1, x2, x3, x4; 165 opus_val32 x0, x1, x2, x3, x4;
@@ -163,6 +175,7 @@ static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N,
163 + MULT16_32_Q15(g10,x2) 175 + MULT16_32_Q15(g10,x2)
164 + MULT16_32_Q15(g11,ADD32(x1,x3)) 176 + MULT16_32_Q15(g11,ADD32(x1,x3))
165 + MULT16_32_Q15(g12,ADD32(x0,x4)); 177 + MULT16_32_Q15(g12,ADD32(x0,x4));
178 y[i] = SATURATE(y[i], SIG_SAT);
166 x4=x3; 179 x4=x3;
167 x3=x2; 180 x3=x2;
168 x2=x1; 181 x2=x1;
@@ -176,7 +189,7 @@ static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N,
176#ifndef OVERRIDE_comb_filter 189#ifndef OVERRIDE_comb_filter
177void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N, 190void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
178 opus_val16 g0, opus_val16 g1, int tapset0, int tapset1, 191 opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
179 const opus_val16 *window, int overlap) 192 const opus_val16 *window, int overlap, int arch)
180{ 193{
181 int i; 194 int i;
182 /* printf ("%d %d %f %f\n", T0, T1, g0, g1); */ 195 /* printf ("%d %d %f %f\n", T0, T1, g0, g1); */
@@ -194,6 +207,10 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
194 OPUS_MOVE(y, x, N); 207 OPUS_MOVE(y, x, N);
195 return; 208 return;
196 } 209 }
210 /* When the gain is zero, T0 and/or T1 is set to zero. We need
211 to have then be at least 2 to avoid processing garbage data. */
212 T0 = IMAX(T0, COMBFILTER_MINPERIOD);
213 T1 = IMAX(T1, COMBFILTER_MINPERIOD);
197 g00 = MULT16_16_P15(g0, gains[tapset0][0]); 214 g00 = MULT16_16_P15(g0, gains[tapset0][0]);
198 g01 = MULT16_16_P15(g0, gains[tapset0][1]); 215 g01 = MULT16_16_P15(g0, gains[tapset0][1]);
199 g02 = MULT16_16_P15(g0, gains[tapset0][2]); 216 g02 = MULT16_16_P15(g0, gains[tapset0][2]);
@@ -219,6 +236,7 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
219 + MULT16_32_Q15(MULT16_16_Q15(f,g10),x2) 236 + MULT16_32_Q15(MULT16_16_Q15(f,g10),x2)
220 + MULT16_32_Q15(MULT16_16_Q15(f,g11),ADD32(x1,x3)) 237 + MULT16_32_Q15(MULT16_16_Q15(f,g11),ADD32(x1,x3))
221 + MULT16_32_Q15(MULT16_16_Q15(f,g12),ADD32(x0,x4)); 238 + MULT16_32_Q15(MULT16_16_Q15(f,g12),ADD32(x0,x4));
239 y[i] = SATURATE(y[i], SIG_SAT);
222 x4=x3; 240 x4=x3;
223 x3=x2; 241 x3=x2;
224 x2=x1; 242 x2=x1;
@@ -234,15 +252,20 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
234 } 252 }
235 253
236 /* Compute the part with the constant filter. */ 254 /* Compute the part with the constant filter. */
237 comb_filter_const(y+i, x+i, T1, N-i, g10, g11, g12); 255 comb_filter_const(y+i, x+i, T1, N-i, g10, g11, g12, arch);
238} 256}
239#endif /* OVERRIDE_comb_filter */ 257#endif /* OVERRIDE_comb_filter */
240 258
259/* TF change table. Positive values mean better frequency resolution (longer
260 effective window), whereas negative values mean better time resolution
261 (shorter effective window). The second index is computed as:
262 4*isTransient + 2*tf_select + per_band_flag */
241const signed char tf_select_table[4][8] = { 263const signed char tf_select_table[4][8] = {
242 {0, -1, 0, -1, 0,-1, 0,-1}, 264 /*isTransient=0 isTransient=1 */
243 {0, -1, 0, -2, 1, 0, 1,-1}, 265 {0, -1, 0, -1, 0,-1, 0,-1}, /* 2.5 ms */
244 {0, -2, 0, -3, 2, 0, 1,-1}, 266 {0, -1, 0, -2, 1, 0, 1,-1}, /* 5 ms */
245 {0, -2, 0, -3, 3, 0, 1,-1}, 267 {0, -2, 0, -3, 2, 0, 1,-1}, /* 10 ms */
268 {0, -2, 0, -3, 3, 0, 1,-1}, /* 20 ms */
246}; 269};
247 270
248 271
@@ -280,6 +303,9 @@ const char *opus_strerror(int error)
280const char *opus_get_version_string(void) 303const char *opus_get_version_string(void)
281{ 304{
282 return "libopus " PACKAGE_VERSION 305 return "libopus " PACKAGE_VERSION
306 /* Applications may rely on the presence of this substring in the version
307 string to determine if they have a fixed-point or floating-point build
308 at runtime. */
283#ifdef FIXED_POINT 309#ifdef FIXED_POINT
284 "-fixed" 310 "-fixed"
285#endif 311#endif
diff --git a/lib/rbcodec/codecs/libopus/celt/celt.h b/lib/rbcodec/codecs/libopus/celt/celt.h
index b1967516dc..24b6b2b520 100644
--- a/lib/rbcodec/codecs/libopus/celt/celt.h
+++ b/lib/rbcodec/codecs/libopus/celt/celt.h
@@ -50,6 +50,8 @@ extern "C" {
50#define CELTDecoder OpusCustomDecoder 50#define CELTDecoder OpusCustomDecoder
51#define CELTMode OpusCustomMode 51#define CELTMode OpusCustomMode
52 52
53#define LEAK_BANDS 19
54
53typedef struct { 55typedef struct {
54 int valid; 56 int valid;
55 float tonality; 57 float tonality;
@@ -57,17 +59,27 @@ typedef struct {
57 float noisiness; 59 float noisiness;
58 float activity; 60 float activity;
59 float music_prob; 61 float music_prob;
60 int bandwidth; 62 float music_prob_min;
61}AnalysisInfo; 63 float music_prob_max;
64 int bandwidth;
65 float activity_probability;
66 float max_pitch_ratio;
67 /* Store as Q6 char to save space. */
68 unsigned char leak_boost[LEAK_BANDS];
69} AnalysisInfo;
70
71typedef struct {
72 int signalType;
73 int offset;
74} SILKInfo;
62 75
63#define __celt_check_mode_ptr_ptr(ptr) ((ptr) + ((ptr) - (const CELTMode**)(ptr))) 76#define __celt_check_mode_ptr_ptr(ptr) ((ptr) + ((ptr) - (const CELTMode**)(ptr)))
64 77
65#define __celt_check_analysis_ptr(ptr) ((ptr) + ((ptr) - (const AnalysisInfo*)(ptr))) 78#define __celt_check_analysis_ptr(ptr) ((ptr) + ((ptr) - (const AnalysisInfo*)(ptr)))
66 79
67/* Encoder/decoder Requests */ 80#define __celt_check_silkinfo_ptr(ptr) ((ptr) + ((ptr) - (const SILKInfo*)(ptr)))
68 81
69/* Expose this option again when variable framesize actually works */ 82/* Encoder/decoder Requests */
70#define OPUS_FRAMESIZE_VARIABLE 5010 /**< Optimize the frame size dynamically */
71 83
72 84
73#define CELT_SET_PREDICTION_REQUEST 10002 85#define CELT_SET_PREDICTION_REQUEST 10002
@@ -116,6 +128,9 @@ typedef struct {
116#define OPUS_SET_ENERGY_MASK_REQUEST 10026 128#define OPUS_SET_ENERGY_MASK_REQUEST 10026
117#define OPUS_SET_ENERGY_MASK(x) OPUS_SET_ENERGY_MASK_REQUEST, __opus_check_val16_ptr(x) 129#define OPUS_SET_ENERGY_MASK(x) OPUS_SET_ENERGY_MASK_REQUEST, __opus_check_val16_ptr(x)
118 130
131#define CELT_SET_SILK_INFO_REQUEST 10028
132#define CELT_SET_SILK_INFO(x) CELT_SET_SILK_INFO_REQUEST, __celt_check_silkinfo_ptr(x)
133
119/* Encoder stuff */ 134/* Encoder stuff */
120 135
121int celt_encoder_get_size(int channels); 136int celt_encoder_get_size(int channels);
@@ -194,6 +209,13 @@ static OPUS_INLINE int fromOpus(unsigned char c)
194 209
195extern const signed char tf_select_table[4][8]; 210extern const signed char tf_select_table[4][8];
196 211
212#if defined(ENABLE_HARDENING) || defined(ENABLE_ASSERTIONS)
213void validate_celt_decoder(CELTDecoder *st);
214#define VALIDATE_CELT_DECODER(st) validate_celt_decoder(st)
215#else
216#define VALIDATE_CELT_DECODER(st)
217#endif
218
197int resampling_factor(opus_int32 rate); 219int resampling_factor(opus_int32 rate);
198 220
199void celt_preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp, 221void celt_preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp,
@@ -201,7 +223,17 @@ void celt_preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RES
201 223
202void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N, 224void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
203 opus_val16 g0, opus_val16 g1, int tapset0, int tapset1, 225 opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
204 const opus_val16 *window, int overlap); 226 const opus_val16 *window, int overlap, int arch);
227
228#ifdef NON_STATIC_COMB_FILTER_CONST_C
229void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
230 opus_val16 g10, opus_val16 g11, opus_val16 g12);
231#endif
232
233#ifndef OVERRIDE_COMB_FILTER_CONST
234# define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \
235 ((void)(arch),comb_filter_const_c(y, x, T, N, g10, g11, g12))
236#endif
205 237
206void init_caps(const CELTMode *m,int *cap,int LM,int C); 238void init_caps(const CELTMode *m,int *cap,int LM,int C);
207 239
diff --git a/lib/rbcodec/codecs/libopus/celt/celt_decoder.c b/lib/rbcodec/codecs/libopus/celt/celt_decoder.c
index 8af96b7931..e6efce9358 100644
--- a/lib/rbcodec/codecs/libopus/celt/celt_decoder.c
+++ b/lib/rbcodec/codecs/libopus/celt/celt_decoder.c
@@ -51,6 +51,14 @@
51#include "celt_lpc.h" 51#include "celt_lpc.h"
52#include "vq.h" 52#include "vq.h"
53 53
54/* The maximum pitch lag to allow in the pitch-based PLC. It's possible to save
55 CPU time in the PLC pitch search by making this smaller than MAX_PERIOD. The
56 current value corresponds to a pitch of 66.67 Hz. */
57#define PLC_PITCH_LAG_MAX (720)
58/* The minimum pitch lag to allow in the pitch-based PLC. This corresponds to a
59 pitch of 480 Hz. */
60#define PLC_PITCH_LAG_MIN (100)
61
54#if defined(SMALL_FOOTPRINT) && defined(FIXED_POINT) 62#if defined(SMALL_FOOTPRINT) && defined(FIXED_POINT)
55#define NORM_ALIASING_HACK 63#define NORM_ALIASING_HACK
56#endif 64#endif
@@ -73,6 +81,7 @@ struct OpusCustomDecoder {
73 int downsample; 81 int downsample;
74 int start, end; 82 int start, end;
75 int signalling; 83 int signalling;
84 int disable_inv;
76 int arch; 85 int arch;
77 86
78 /* Everything beyond this point gets cleared on a reset */ 87 /* Everything beyond this point gets cleared on a reset */
@@ -82,6 +91,7 @@ struct OpusCustomDecoder {
82 int error; 91 int error;
83 int last_pitch_index; 92 int last_pitch_index;
84 int loss_count; 93 int loss_count;
94 int skip_plc;
85 int postfilter_period; 95 int postfilter_period;
86 int postfilter_period_old; 96 int postfilter_period_old;
87 opus_val16 postfilter_gain; 97 opus_val16 postfilter_gain;
@@ -99,6 +109,38 @@ struct OpusCustomDecoder {
99 /* opus_val16 backgroundLogE[], Size = 2*mode->nbEBands */ 109 /* opus_val16 backgroundLogE[], Size = 2*mode->nbEBands */
100}; 110};
101 111
112#if defined(ENABLE_HARDENING) || defined(ENABLE_ASSERTIONS)
113/* Make basic checks on the CELT state to ensure we don't end
114 up writing all over memory. */
115void validate_celt_decoder(CELTDecoder *st)
116{
117#ifndef CUSTOM_MODES
118 celt_assert(st->mode == opus_custom_mode_create(48000, 960, NULL));
119 celt_assert(st->overlap == 120);
120#endif
121 celt_assert(st->channels == 1 || st->channels == 2);
122 celt_assert(st->stream_channels == 1 || st->stream_channels == 2);
123 celt_assert(st->downsample > 0);
124 celt_assert(st->start == 0 || st->start == 17);
125 celt_assert(st->start < st->end);
126 celt_assert(st->end <= 21);
127#ifdef OPUS_ARCHMASK
128 celt_assert(st->arch >= 0);
129 celt_assert(st->arch <= OPUS_ARCHMASK);
130#endif
131 celt_assert(st->last_pitch_index <= PLC_PITCH_LAG_MAX);
132 celt_assert(st->last_pitch_index >= PLC_PITCH_LAG_MIN || st->last_pitch_index == 0);
133 celt_assert(st->postfilter_period < MAX_PERIOD);
134 celt_assert(st->postfilter_period >= COMBFILTER_MINPERIOD || st->postfilter_period == 0);
135 celt_assert(st->postfilter_period_old < MAX_PERIOD);
136 celt_assert(st->postfilter_period_old >= COMBFILTER_MINPERIOD || st->postfilter_period_old == 0);
137 celt_assert(st->postfilter_tapset <= 2);
138 celt_assert(st->postfilter_tapset >= 0);
139 celt_assert(st->postfilter_tapset_old <= 2);
140 celt_assert(st->postfilter_tapset_old >= 0);
141}
142#endif
143
102int celt_decoder_get_size(int channels) 144int celt_decoder_get_size(int channels)
103{ 145{
104 const CELTMode *mode = opus_custom_mode_create(48000, 960, NULL); 146 const CELTMode *mode = opus_custom_mode_create(48000, 960, NULL);
@@ -162,10 +204,13 @@ OPUS_CUSTOM_NOSTATIC int opus_custom_decoder_init(CELTDecoder *st, const CELTMod
162 st->start = 0; 204 st->start = 0;
163 st->end = st->mode->effEBands; 205 st->end = st->mode->effEBands;
164 st->signalling = 1; 206 st->signalling = 1;
207#ifndef DISABLE_UPDATE_DRAFT
208 st->disable_inv = channels == 1;
209#else
210 st->disable_inv = 0;
211#endif
165 st->arch = opus_select_arch(); 212 st->arch = opus_select_arch();
166 213
167 st->loss_count = 0;
168
169 opus_custom_decoder_ctl(st, OPUS_RESET_STATE); 214 opus_custom_decoder_ctl(st, OPUS_RESET_STATE);
170 215
171 return OPUS_OK; 216 return OPUS_OK;
@@ -178,6 +223,36 @@ void opus_custom_decoder_destroy(CELTDecoder *st)
178} 223}
179#endif /* CUSTOM_MODES */ 224#endif /* CUSTOM_MODES */
180 225
226#ifndef CUSTOM_MODES
227/* Special case for stereo with no downsampling and no accumulation. This is
228 quite common and we can make it faster by processing both channels in the
229 same loop, reducing overhead due to the dependency loop in the IIR filter. */
230static void deemphasis_stereo_simple(celt_sig *in[], opus_val16 *pcm, int N, const opus_val16 coef0,
231 celt_sig *mem)
232{
233 celt_sig * OPUS_RESTRICT x0;
234 celt_sig * OPUS_RESTRICT x1;
235 celt_sig m0, m1;
236 int j;
237 x0=in[0];
238 x1=in[1];
239 m0 = mem[0];
240 m1 = mem[1];
241 for (j=0;j<N;j++)
242 {
243 celt_sig tmp0, tmp1;
244 /* Add VERY_SMALL to x[] first to reduce dependency chain. */
245 tmp0 = x0[j] + VERY_SMALL + m0;
246 tmp1 = x1[j] + VERY_SMALL + m1;
247 m0 = MULT16_32_Q15(coef0, tmp0);
248 m1 = MULT16_32_Q15(coef0, tmp1);
249 pcm[2*j ] = SCALEOUT(SIG2WORD16(tmp0));
250 pcm[2*j+1] = SCALEOUT(SIG2WORD16(tmp1));
251 }
252 mem[0] = m0;
253 mem[1] = m1;
254}
255#endif
181 256
182#ifndef RESYNTH 257#ifndef RESYNTH
183static 258static
@@ -191,6 +266,14 @@ void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, c
191 opus_val16 coef0; 266 opus_val16 coef0;
192 VARDECL(celt_sig, scratch); 267 VARDECL(celt_sig, scratch);
193 SAVE_STACK; 268 SAVE_STACK;
269#ifndef CUSTOM_MODES
270 /* Short version for common case. */
271 if (downsample == 1 && C == 2 && !accum)
272 {
273 deemphasis_stereo_simple(in, pcm, N, coef[0], mem);
274 return;
275 }
276#endif
194#ifndef FIXED_POINT 277#ifndef FIXED_POINT
195 (void)accum; 278 (void)accum;
196 celt_assert(accum==0); 279 celt_assert(accum==0);
@@ -226,7 +309,7 @@ void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, c
226 /* Shortcut for the standard (non-custom modes) case */ 309 /* Shortcut for the standard (non-custom modes) case */
227 for (j=0;j<N;j++) 310 for (j=0;j<N;j++)
228 { 311 {
229 celt_sig tmp = x[j] + m + VERY_SMALL; 312 celt_sig tmp = x[j] + VERY_SMALL + m;
230 m = MULT16_32_Q15(coef0, tmp); 313 m = MULT16_32_Q15(coef0, tmp);
231 scratch[j] = tmp; 314 scratch[j] = tmp;
232 } 315 }
@@ -247,7 +330,7 @@ void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, c
247 { 330 {
248 for (j=0;j<N;j++) 331 for (j=0;j<N;j++)
249 { 332 {
250 celt_sig tmp = x[j] + m + VERY_SMALL; 333 celt_sig tmp = x[j] + VERY_SMALL + m;
251 m = MULT16_32_Q15(coef0, tmp); 334 m = MULT16_32_Q15(coef0, tmp);
252 y[j*C] = SCALEOUT(SIG2WORD16(tmp)); 335 y[j*C] = SCALEOUT(SIG2WORD16(tmp));
253 } 336 }
@@ -278,8 +361,9 @@ void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, c
278static 361static
279#endif 362#endif
280void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[], 363void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[],
281 opus_val16 *oldBandE, int start, int effEnd, int C, int CC, int isTransient, 364 opus_val16 *oldBandE, int start, int effEnd, int C, int CC,
282 int LM, int downsample, int silence) 365 int isTransient, int LM, int downsample,
366 int silence, int arch)
283{ 367{
284 int c, i; 368 int c, i;
285 int M; 369 int M;
@@ -319,9 +403,9 @@ void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[],
319 freq2 = out_syn[1]+overlap/2; 403 freq2 = out_syn[1]+overlap/2;
320 OPUS_COPY(freq2, freq, N); 404 OPUS_COPY(freq2, freq, N);
321 for (b=0;b<B;b++) 405 for (b=0;b<B;b++)
322 clt_mdct_backward(&mode->mdct, &freq2[b], out_syn[0]+NB*b, mode->window, overlap, shift, B); 406 clt_mdct_backward(&mode->mdct, &freq2[b], out_syn[0]+NB*b, mode->window, overlap, shift, B, arch);
323 for (b=0;b<B;b++) 407 for (b=0;b<B;b++)
324 clt_mdct_backward(&mode->mdct, &freq[b], out_syn[1]+NB*b, mode->window, overlap, shift, B); 408 clt_mdct_backward(&mode->mdct, &freq[b], out_syn[1]+NB*b, mode->window, overlap, shift, B, arch);
325 } else if (CC==1&&C==2) 409 } else if (CC==1&&C==2)
326 { 410 {
327 /* Downmixing a stereo stream to mono */ 411 /* Downmixing a stereo stream to mono */
@@ -333,18 +417,24 @@ void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[],
333 denormalise_bands(mode, X+N, freq2, oldBandE+nbEBands, start, effEnd, M, 417 denormalise_bands(mode, X+N, freq2, oldBandE+nbEBands, start, effEnd, M,
334 downsample, silence); 418 downsample, silence);
335 for (i=0;i<N;i++) 419 for (i=0;i<N;i++)
336 freq[i] = HALF32(ADD32(freq[i],freq2[i])); 420 freq[i] = ADD32(HALF32(freq[i]), HALF32(freq2[i]));
337 for (b=0;b<B;b++) 421 for (b=0;b<B;b++)
338 clt_mdct_backward(&mode->mdct, &freq[b], out_syn[0]+NB*b, mode->window, overlap, shift, B); 422 clt_mdct_backward(&mode->mdct, &freq[b], out_syn[0]+NB*b, mode->window, overlap, shift, B, arch);
339 } else { 423 } else {
340 /* Normal case (mono or stereo) */ 424 /* Normal case (mono or stereo) */
341 c=0; do { 425 c=0; do {
342 denormalise_bands(mode, X+c*N, freq, oldBandE+c*nbEBands, start, effEnd, M, 426 denormalise_bands(mode, X+c*N, freq, oldBandE+c*nbEBands, start, effEnd, M,
343 downsample, silence); 427 downsample, silence);
344 for (b=0;b<B;b++) 428 for (b=0;b<B;b++)
345 clt_mdct_backward(&mode->mdct, &freq[b], out_syn[c]+NB*b, mode->window, overlap, shift, B); 429 clt_mdct_backward(&mode->mdct, &freq[b], out_syn[c]+NB*b, mode->window, overlap, shift, B, arch);
346 } while (++c<CC); 430 } while (++c<CC);
347 } 431 }
432 /* Saturate IMDCT output so that we can't overflow in the pitch postfilter
433 or in the */
434 c=0; do {
435 for (i=0;i<N;i++)
436 out_syn[c][i] = SATURATE(out_syn[c][i], SIG_SAT);
437 } while (++c<CC);
348 RESTORE_STACK; 438 RESTORE_STACK;
349} 439}
350 440
@@ -387,14 +477,6 @@ static void tf_decode(int start, int end, int isTransient, int *tf_res, int LM,
387 } 477 }
388} 478}
389 479
390/* The maximum pitch lag to allow in the pitch-based PLC. It's possible to save
391 CPU time in the PLC pitch search by making this smaller than MAX_PERIOD. The
392 current value corresponds to a pitch of 66.67 Hz. */
393#define PLC_PITCH_LAG_MAX (720)
394/* The minimum pitch lag to allow in the pitch-based PLC. This corresponds to a
395 pitch of 480 Hz. */
396#define PLC_PITCH_LAG_MIN (100)
397
398static int celt_plc_pitch_search(celt_sig *decode_mem[2], int C, int arch) 480static int celt_plc_pitch_search(celt_sig *decode_mem[2], int C, int arch)
399{ 481{
400 int pitch_index; 482 int pitch_index;
@@ -446,7 +528,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
446 528
447 loss_count = st->loss_count; 529 loss_count = st->loss_count;
448 start = st->start; 530 start = st->start;
449 noise_based = loss_count >= 5 || start != 0; 531 noise_based = loss_count >= 5 || start != 0 || st->skip_plc;
450 if (noise_based) 532 if (noise_based)
451 { 533 {
452 /* Noise-based PLC/CNG */ 534 /* Noise-based PLC/CNG */
@@ -456,10 +538,9 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
456 VARDECL(celt_norm, X); 538 VARDECL(celt_norm, X);
457#endif 539#endif
458 opus_uint32 seed; 540 opus_uint32 seed;
459 opus_val16 *plcLogE;
460 int end; 541 int end;
461 int effEnd; 542 int effEnd;
462 543 opus_val16 decay;
463 end = st->end; 544 end = st->end;
464 effEnd = IMAX(start, IMIN(end, mode->effEBands)); 545 effEnd = IMAX(start, IMIN(end, mode->effEBands));
465 546
@@ -471,19 +552,13 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
471 ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */ 552 ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */
472#endif 553#endif
473 554
474 if (loss_count >= 5) 555 /* Energy decay */
475 plcLogE = backgroundLogE; 556 decay = loss_count==0 ? QCONST16(1.5f, DB_SHIFT) : QCONST16(.5f, DB_SHIFT);
476 else { 557 c=0; do
477 /* Energy decay */ 558 {
478 opus_val16 decay = loss_count==0 ? 559 for (i=start;i<end;i++)
479 QCONST16(1.5f, DB_SHIFT) : QCONST16(.5f, DB_SHIFT); 560 oldBandE[c*nbEBands+i] = MAX16(backgroundLogE[c*nbEBands+i], oldBandE[c*nbEBands+i] - decay);
480 c=0; do 561 } while (++c<C);
481 {
482 for (i=start;i<end;i++)
483 oldBandE[c*nbEBands+i] -= decay;
484 } while (++c<C);
485 plcLogE = oldBandE;
486 }
487 seed = st->rng; 562 seed = st->rng;
488 for (c=0;c<C;c++) 563 for (c=0;c<C;c++)
489 { 564 {
@@ -499,7 +574,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
499 seed = celt_lcg_rand(seed); 574 seed = celt_lcg_rand(seed);
500 X[boffs+j] = (celt_norm)((opus_int32)seed>>20); 575 X[boffs+j] = (celt_norm)((opus_int32)seed>>20);
501 } 576 }
502 renormalise_vector(X+boffs, blen, Q15ONE); 577 renormalise_vector(X+boffs, blen, Q15ONE, st->arch);
503 } 578 }
504 } 579 }
505 st->rng = seed; 580 st->rng = seed;
@@ -509,14 +584,17 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
509 DECODE_BUFFER_SIZE-N+(overlap>>1)); 584 DECODE_BUFFER_SIZE-N+(overlap>>1));
510 } while (++c<C); 585 } while (++c<C);
511 586
512 celt_synthesis(mode, X, out_syn, plcLogE, start, effEnd, C, C, 0, LM, st->downsample, 0); 587 celt_synthesis(mode, X, out_syn, oldBandE, start, effEnd, C, C, 0, LM, st->downsample, 0, st->arch);
513 } else { 588 } else {
589 int exc_length;
514 /* Pitch-based PLC */ 590 /* Pitch-based PLC */
515 const opus_val16 *window; 591 const opus_val16 *window;
592 opus_val16 *exc;
516 opus_val16 fade = Q15ONE; 593 opus_val16 fade = Q15ONE;
517 int pitch_index; 594 int pitch_index;
518 VARDECL(opus_val32, etmp); 595 VARDECL(opus_val32, etmp);
519 VARDECL(opus_val16, exc); 596 VARDECL(opus_val16, _exc);
597 VARDECL(opus_val16, fir_tmp);
520 598
521 if (loss_count == 0) 599 if (loss_count == 0)
522 { 600 {
@@ -526,8 +604,14 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
526 fade = QCONST16(.8f,15); 604 fade = QCONST16(.8f,15);
527 } 605 }
528 606
607 /* We want the excitation for 2 pitch periods in order to look for a
608 decaying signal, but we can't get more than MAX_PERIOD. */
609 exc_length = IMIN(2*pitch_index, MAX_PERIOD);
610
529 ALLOC(etmp, overlap, opus_val32); 611 ALLOC(etmp, overlap, opus_val32);
530 ALLOC(exc, MAX_PERIOD, opus_val16); 612 ALLOC(_exc, MAX_PERIOD+LPC_ORDER, opus_val16);
613 ALLOC(fir_tmp, exc_length, opus_val16);
614 exc = _exc+LPC_ORDER;
531 window = mode->window; 615 window = mode->window;
532 c=0; do { 616 c=0; do {
533 opus_val16 decay; 617 opus_val16 decay;
@@ -536,13 +620,11 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
536 celt_sig *buf; 620 celt_sig *buf;
537 int extrapolation_offset; 621 int extrapolation_offset;
538 int extrapolation_len; 622 int extrapolation_len;
539 int exc_length;
540 int j; 623 int j;
541 624
542 buf = decode_mem[c]; 625 buf = decode_mem[c];
543 for (i=0;i<MAX_PERIOD;i++) { 626 for (i=0;i<MAX_PERIOD+LPC_ORDER;i++)
544 exc[i] = ROUND16(buf[DECODE_BUFFER_SIZE-MAX_PERIOD+i], SIG_SHIFT); 627 exc[i-LPC_ORDER] = ROUND16(buf[DECODE_BUFFER_SIZE-MAX_PERIOD-LPC_ORDER+i], SIG_SHIFT);
545 }
546 628
547 if (loss_count == 0) 629 if (loss_count == 0)
548 { 630 {
@@ -568,22 +650,32 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
568#endif 650#endif
569 } 651 }
570 _celt_lpc(lpc+c*LPC_ORDER, ac, LPC_ORDER); 652 _celt_lpc(lpc+c*LPC_ORDER, ac, LPC_ORDER);
653#ifdef FIXED_POINT
654 /* For fixed-point, apply bandwidth expansion until we can guarantee that
655 no overflow can happen in the IIR filter. This means:
656 32768*sum(abs(filter)) < 2^31 */
657 while (1) {
658 opus_val16 tmp=Q15ONE;
659 opus_val32 sum=QCONST16(1., SIG_SHIFT);
660 for (i=0;i<LPC_ORDER;i++)
661 sum += ABS16(lpc[c*LPC_ORDER+i]);
662 if (sum < 65535) break;
663 for (i=0;i<LPC_ORDER;i++)
664 {
665 tmp = MULT16_16_Q15(QCONST16(.99f,15), tmp);
666 lpc[c*LPC_ORDER+i] = MULT16_16_Q15(lpc[c*LPC_ORDER+i], tmp);
667 }
668 }
669#endif
571 } 670 }
572 /* We want the excitation for 2 pitch periods in order to look for a
573 decaying signal, but we can't get more than MAX_PERIOD. */
574 exc_length = IMIN(2*pitch_index, MAX_PERIOD);
575 /* Initialize the LPC history with the samples just before the start 671 /* Initialize the LPC history with the samples just before the start
576 of the region for which we're computing the excitation. */ 672 of the region for which we're computing the excitation. */
577 { 673 {
578 opus_val16 lpc_mem[LPC_ORDER]; 674 /* Compute the excitation for exc_length samples before the loss. We need the copy
579 for (i=0;i<LPC_ORDER;i++) 675 because celt_fir() cannot filter in-place. */
580 {
581 lpc_mem[i] =
582 ROUND16(buf[DECODE_BUFFER_SIZE-exc_length-1-i], SIG_SHIFT);
583 }
584 /* Compute the excitation for exc_length samples before the loss. */
585 celt_fir(exc+MAX_PERIOD-exc_length, lpc+c*LPC_ORDER, 676 celt_fir(exc+MAX_PERIOD-exc_length, lpc+c*LPC_ORDER,
586 exc+MAX_PERIOD-exc_length, exc_length, LPC_ORDER, lpc_mem); 677 fir_tmp, exc_length, LPC_ORDER, st->arch);
678 OPUS_COPY(exc+MAX_PERIOD-exc_length, fir_tmp, exc_length);
587 } 679 }
588 680
589 /* Check if the waveform is decaying, and if so how fast. 681 /* Check if the waveform is decaying, and if so how fast.
@@ -637,9 +729,8 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
637 tmp = ROUND16( 729 tmp = ROUND16(
638 buf[DECODE_BUFFER_SIZE-MAX_PERIOD-N+extrapolation_offset+j], 730 buf[DECODE_BUFFER_SIZE-MAX_PERIOD-N+extrapolation_offset+j],
639 SIG_SHIFT); 731 SIG_SHIFT);
640 S1 += SHR32(MULT16_16(tmp, tmp), 8); 732 S1 += SHR32(MULT16_16(tmp, tmp), 10);
641 } 733 }
642
643 { 734 {
644 opus_val16 lpc_mem[LPC_ORDER]; 735 opus_val16 lpc_mem[LPC_ORDER];
645 /* Copy the last decoded samples (prior to the overlap region) to 736 /* Copy the last decoded samples (prior to the overlap region) to
@@ -650,7 +741,11 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
650 the signal domain. */ 741 the signal domain. */
651 celt_iir(buf+DECODE_BUFFER_SIZE-N, lpc+c*LPC_ORDER, 742 celt_iir(buf+DECODE_BUFFER_SIZE-N, lpc+c*LPC_ORDER,
652 buf+DECODE_BUFFER_SIZE-N, extrapolation_len, LPC_ORDER, 743 buf+DECODE_BUFFER_SIZE-N, extrapolation_len, LPC_ORDER,
653 lpc_mem); 744 lpc_mem, st->arch);
745#ifdef FIXED_POINT
746 for (i=0; i < extrapolation_len; i++)
747 buf[DECODE_BUFFER_SIZE-N+i] = SATURATE(buf[DECODE_BUFFER_SIZE-N+i], SIG_SAT);
748#endif
654 } 749 }
655 750
656 /* Check if the synthesis energy is higher than expected, which can 751 /* Check if the synthesis energy is higher than expected, which can
@@ -661,7 +756,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
661 for (i=0;i<extrapolation_len;i++) 756 for (i=0;i<extrapolation_len;i++)
662 { 757 {
663 opus_val16 tmp = ROUND16(buf[DECODE_BUFFER_SIZE-N+i], SIG_SHIFT); 758 opus_val16 tmp = ROUND16(buf[DECODE_BUFFER_SIZE-N+i], SIG_SHIFT);
664 S2 += SHR32(MULT16_16(tmp, tmp), 8); 759 S2 += SHR32(MULT16_16(tmp, tmp), 10);
665 } 760 }
666 /* This checks for an "explosion" in the synthesis. */ 761 /* This checks for an "explosion" in the synthesis. */
667#ifdef FIXED_POINT 762#ifdef FIXED_POINT
@@ -698,7 +793,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
698 comb_filter(etmp, buf+DECODE_BUFFER_SIZE, 793 comb_filter(etmp, buf+DECODE_BUFFER_SIZE,
699 st->postfilter_period, st->postfilter_period, overlap, 794 st->postfilter_period, st->postfilter_period, overlap,
700 -st->postfilter_gain, -st->postfilter_gain, 795 -st->postfilter_gain, -st->postfilter_gain,
701 st->postfilter_tapset, st->postfilter_tapset, NULL, 0); 796 st->postfilter_tapset, st->postfilter_tapset, NULL, 0, st->arch);
702 797
703 /* Simulate TDAC on the concealed audio so that it blends with the 798 /* Simulate TDAC on the concealed audio so that it blends with the
704 MDCT of the next frame. */ 799 MDCT of the next frame. */
@@ -769,6 +864,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
769 const opus_int16 *eBands; 864 const opus_int16 *eBands;
770 ALLOC_STACK; 865 ALLOC_STACK;
771 866
867 VALIDATE_CELT_DECODER(st);
772 mode = st->mode; 868 mode = st->mode;
773 nbEBands = mode->nbEBands; 869 nbEBands = mode->nbEBands;
774 overlap = mode->overlap; 870 overlap = mode->overlap;
@@ -838,6 +934,10 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
838 return frame_size/st->downsample; 934 return frame_size/st->downsample;
839 } 935 }
840 936
937 /* Check if there are at least two packets received consecutively before
938 * turning on the pitch-based PLC */
939 st->skip_plc = st->loss_count != 0;
940
841 if (dec == NULL) 941 if (dec == NULL)
842 { 942 {
843 ec_dec_init(&_dec,(unsigned char*)data,len); 943 ec_dec_init(&_dec,(unsigned char*)data,len);
@@ -959,7 +1059,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
959 ALLOC(pulses, nbEBands, int); 1059 ALLOC(pulses, nbEBands, int);
960 ALLOC(fine_priority, nbEBands, int); 1060 ALLOC(fine_priority, nbEBands, int);
961 1061
962 codedBands = compute_allocation(mode, start, end, offsets, cap, 1062 codedBands = clt_compute_allocation(mode, start, end, offsets, cap,
963 alloc_trim, &intensity, &dual_stereo, bits, &balance, pulses, 1063 alloc_trim, &intensity, &dual_stereo, bits, &balance, pulses,
964 fine_quant, fine_priority, C, LM, dec, 0, 0, 0); 1064 fine_quant, fine_priority, C, LM, dec, 0, 0, 0);
965 1065
@@ -982,7 +1082,8 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
982 1082
983 quant_all_bands(0, mode, start, end, X, C==2 ? X+N : NULL, collapse_masks, 1083 quant_all_bands(0, mode, start, end, X, C==2 ? X+N : NULL, collapse_masks,
984 NULL, pulses, shortBlocks, spread_decision, dual_stereo, intensity, tf_res, 1084 NULL, pulses, shortBlocks, spread_decision, dual_stereo, intensity, tf_res,
985 len*(8<<BITRES)-anti_collapse_rsv, balance, dec, LM, codedBands, &st->rng); 1085 len*(8<<BITRES)-anti_collapse_rsv, balance, dec, LM, codedBands, &st->rng, 0,
1086 st->arch, st->disable_inv);
986 1087
987 if (anti_collapse_rsv > 0) 1088 if (anti_collapse_rsv > 0)
988 { 1089 {
@@ -994,7 +1095,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
994 1095
995 if (anti_collapse_on) 1096 if (anti_collapse_on)
996 anti_collapse(mode, X, collapse_masks, LM, C, N, 1097 anti_collapse(mode, X, collapse_masks, LM, C, N,
997 start, end, oldBandE, oldLogE, oldLogE2, pulses, st->rng); 1098 start, end, oldBandE, oldLogE, oldLogE2, pulses, st->rng, st->arch);
998 1099
999 if (silence) 1100 if (silence)
1000 { 1101 {
@@ -1002,18 +1103,19 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
1002 oldBandE[i] = -QCONST16(28.f,DB_SHIFT); 1103 oldBandE[i] = -QCONST16(28.f,DB_SHIFT);
1003 } 1104 }
1004 1105
1005 celt_synthesis(mode, X, out_syn, oldBandE, start, effEnd, C, CC, isTransient, LM, st->downsample, silence); 1106 celt_synthesis(mode, X, out_syn, oldBandE, start, effEnd,
1107 C, CC, isTransient, LM, st->downsample, silence, st->arch);
1006 1108
1007 c=0; do { 1109 c=0; do {
1008 st->postfilter_period=IMAX(st->postfilter_period, COMBFILTER_MINPERIOD); 1110 st->postfilter_period=IMAX(st->postfilter_period, COMBFILTER_MINPERIOD);
1009 st->postfilter_period_old=IMAX(st->postfilter_period_old, COMBFILTER_MINPERIOD); 1111 st->postfilter_period_old=IMAX(st->postfilter_period_old, COMBFILTER_MINPERIOD);
1010 comb_filter(out_syn[c], out_syn[c], st->postfilter_period_old, st->postfilter_period, mode->shortMdctSize, 1112 comb_filter(out_syn[c], out_syn[c], st->postfilter_period_old, st->postfilter_period, mode->shortMdctSize,
1011 st->postfilter_gain_old, st->postfilter_gain, st->postfilter_tapset_old, st->postfilter_tapset, 1113 st->postfilter_gain_old, st->postfilter_gain, st->postfilter_tapset_old, st->postfilter_tapset,
1012 mode->window, overlap); 1114 mode->window, overlap, st->arch);
1013 if (LM!=0) 1115 if (LM!=0)
1014 comb_filter(out_syn[c]+mode->shortMdctSize, out_syn[c]+mode->shortMdctSize, st->postfilter_period, postfilter_pitch, N-mode->shortMdctSize, 1116 comb_filter(out_syn[c]+mode->shortMdctSize, out_syn[c]+mode->shortMdctSize, st->postfilter_period, postfilter_pitch, N-mode->shortMdctSize,
1015 st->postfilter_gain, postfilter_gain, st->postfilter_tapset, postfilter_tapset, 1117 st->postfilter_gain, postfilter_gain, st->postfilter_tapset, postfilter_tapset,
1016 mode->window, overlap); 1118 mode->window, overlap, st->arch);
1017 1119
1018 } while (++c<CC); 1120 } while (++c<CC);
1019 st->postfilter_period_old = st->postfilter_period; 1121 st->postfilter_period_old = st->postfilter_period;
@@ -1035,10 +1137,18 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
1035 /* In case start or end were to change */ 1137 /* In case start or end were to change */
1036 if (!isTransient) 1138 if (!isTransient)
1037 { 1139 {
1140 opus_val16 max_background_increase;
1038 OPUS_COPY(oldLogE2, oldLogE, 2*nbEBands); 1141 OPUS_COPY(oldLogE2, oldLogE, 2*nbEBands);
1039 OPUS_COPY(oldLogE, oldBandE, 2*nbEBands); 1142 OPUS_COPY(oldLogE, oldBandE, 2*nbEBands);
1143 /* In normal circumstances, we only allow the noise floor to increase by
1144 up to 2.4 dB/second, but when we're in DTX, we allow up to 6 dB
1145 increase for each update.*/
1146 if (st->loss_count < 10)
1147 max_background_increase = M*QCONST16(0.001f,DB_SHIFT);
1148 else
1149 max_background_increase = QCONST16(1.f,DB_SHIFT);
1040 for (i=0;i<2*nbEBands;i++) 1150 for (i=0;i<2*nbEBands;i++)
1041 backgroundLogE[i] = MIN16(backgroundLogE[i] + M*QCONST16(0.001f,DB_SHIFT), oldBandE[i]); 1151 backgroundLogE[i] = MIN16(backgroundLogE[i] + max_background_increase, oldBandE[i]);
1042 } else { 1152 } else {
1043 for (i=0;i<2*nbEBands;i++) 1153 for (i=0;i<2*nbEBands;i++)
1044 oldLogE[i] = MIN16(oldLogE[i], oldBandE[i]); 1154 oldLogE[i] = MIN16(oldLogE[i], oldBandE[i]);
@@ -1195,6 +1305,7 @@ int opus_custom_decoder_ctl(CELTDecoder * OPUS_RESTRICT st, int request, ...)
1195 ((char*)&st->DECODER_RESET_START - (char*)st)); 1305 ((char*)&st->DECODER_RESET_START - (char*)st));
1196 for (i=0;i<2*st->mode->nbEBands;i++) 1306 for (i=0;i<2*st->mode->nbEBands;i++)
1197 oldLogE[i]=oldLogE2[i]=-QCONST16(28.f,DB_SHIFT); 1307 oldLogE[i]=oldLogE2[i]=-QCONST16(28.f,DB_SHIFT);
1308 st->skip_plc = 1;
1198 } 1309 }
1199 break; 1310 break;
1200 case OPUS_GET_PITCH_REQUEST: 1311 case OPUS_GET_PITCH_REQUEST:
@@ -1227,6 +1338,26 @@ int opus_custom_decoder_ctl(CELTDecoder * OPUS_RESTRICT st, int request, ...)
1227 *value=st->rng; 1338 *value=st->rng;
1228 } 1339 }
1229 break; 1340 break;
1341 case OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST:
1342 {
1343 opus_int32 value = va_arg(ap, opus_int32);
1344 if(value<0 || value>1)
1345 {
1346 goto bad_arg;
1347 }
1348 st->disable_inv = value;
1349 }
1350 break;
1351 case OPUS_GET_PHASE_INVERSION_DISABLED_REQUEST:
1352 {
1353 opus_int32 *value = va_arg(ap, opus_int32*);
1354 if (!value)
1355 {
1356 goto bad_arg;
1357 }
1358 *value = st->disable_inv;
1359 }
1360 break;
1230 default: 1361 default:
1231 goto bad_request; 1362 goto bad_request;
1232 } 1363 }
diff --git a/lib/rbcodec/codecs/libopus/celt/celt_encoder.c b/lib/rbcodec/codecs/libopus/celt/celt_encoder.c
new file mode 100644
index 0000000000..44cb0850ab
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/celt_encoder.c
@@ -0,0 +1,2607 @@
1/* Copyright (c) 2007-2008 CSIRO
2 Copyright (c) 2007-2010 Xiph.Org Foundation
3 Copyright (c) 2008 Gregory Maxwell
4 Written by Jean-Marc Valin and Gregory Maxwell */
5/*
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions
8 are met:
9
10 - Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12
13 - Redistributions in binary form must reproduce the above copyright
14 notice, this list of conditions and the following disclaimer in the
15 documentation and/or other materials provided with the distribution.
16
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
21 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28*/
29
30#ifdef HAVE_CONFIG_H
31#include "config.h"
32#endif
33
34#define CELT_ENCODER_C
35
36#include "cpu_support.h"
37#include "os_support.h"
38#include "mdct.h"
39#include <math.h>
40#include "celt.h"
41#include "pitch.h"
42#include "bands.h"
43#include "modes.h"
44#include "entcode.h"
45#include "quant_bands.h"
46#include "rate.h"
47#include "stack_alloc.h"
48#include "mathops.h"
49#include "float_cast.h"
50#include <stdarg.h>
51#include "celt_lpc.h"
52#include "vq.h"
53
54
55/** Encoder state
56 @brief Encoder state
57 */
58struct OpusCustomEncoder {
59 const OpusCustomMode *mode; /**< Mode used by the encoder */
60 int channels;
61 int stream_channels;
62
63 int force_intra;
64 int clip;
65 int disable_pf;
66 int complexity;
67 int upsample;
68 int start, end;
69
70 opus_int32 bitrate;
71 int vbr;
72 int signalling;
73 int constrained_vbr; /* If zero, VBR can do whatever it likes with the rate */
74 int loss_rate;
75 int lsb_depth;
76 int lfe;
77 int disable_inv;
78 int arch;
79
80 /* Everything beyond this point gets cleared on a reset */
81#define ENCODER_RESET_START rng
82
83 opus_uint32 rng;
84 int spread_decision;
85 opus_val32 delayedIntra;
86 int tonal_average;
87 int lastCodedBands;
88 int hf_average;
89 int tapset_decision;
90
91 int prefilter_period;
92 opus_val16 prefilter_gain;
93 int prefilter_tapset;
94#ifdef RESYNTH
95 int prefilter_period_old;
96 opus_val16 prefilter_gain_old;
97 int prefilter_tapset_old;
98#endif
99 int consec_transient;
100 AnalysisInfo analysis;
101 SILKInfo silk_info;
102
103 opus_val32 preemph_memE[2];
104 opus_val32 preemph_memD[2];
105
106 /* VBR-related parameters */
107 opus_int32 vbr_reservoir;
108 opus_int32 vbr_drift;
109 opus_int32 vbr_offset;
110 opus_int32 vbr_count;
111 opus_val32 overlap_max;
112 opus_val16 stereo_saving;
113 int intensity;
114 opus_val16 *energy_mask;
115 opus_val16 spec_avg;
116
117#ifdef RESYNTH
118 /* +MAX_PERIOD/2 to make space for overlap */
119 celt_sig syn_mem[2][2*MAX_PERIOD+MAX_PERIOD/2];
120#endif
121
122 celt_sig in_mem[1]; /* Size = channels*mode->overlap */
123 /* celt_sig prefilter_mem[], Size = channels*COMBFILTER_MAXPERIOD */
124 /* opus_val16 oldBandE[], Size = channels*mode->nbEBands */
125 /* opus_val16 oldLogE[], Size = channels*mode->nbEBands */
126 /* opus_val16 oldLogE2[], Size = channels*mode->nbEBands */
127 /* opus_val16 energyError[], Size = channels*mode->nbEBands */
128};
129
130int celt_encoder_get_size(int channels)
131{
132 CELTMode *mode = opus_custom_mode_create(48000, 960, NULL);
133 return opus_custom_encoder_get_size(mode, channels);
134}
135
136OPUS_CUSTOM_NOSTATIC int opus_custom_encoder_get_size(const CELTMode *mode, int channels)
137{
138 int size = sizeof(struct CELTEncoder)
139 + (channels*mode->overlap-1)*sizeof(celt_sig) /* celt_sig in_mem[channels*mode->overlap]; */
140 + channels*COMBFILTER_MAXPERIOD*sizeof(celt_sig) /* celt_sig prefilter_mem[channels*COMBFILTER_MAXPERIOD]; */
141 + 4*channels*mode->nbEBands*sizeof(opus_val16); /* opus_val16 oldBandE[channels*mode->nbEBands]; */
142 /* opus_val16 oldLogE[channels*mode->nbEBands]; */
143 /* opus_val16 oldLogE2[channels*mode->nbEBands]; */
144 /* opus_val16 energyError[channels*mode->nbEBands]; */
145 return size;
146}
147
148#ifdef CUSTOM_MODES
149CELTEncoder *opus_custom_encoder_create(const CELTMode *mode, int channels, int *error)
150{
151 int ret;
152 CELTEncoder *st = (CELTEncoder *)opus_alloc(opus_custom_encoder_get_size(mode, channels));
153 /* init will handle the NULL case */
154 ret = opus_custom_encoder_init(st, mode, channels);
155 if (ret != OPUS_OK)
156 {
157 opus_custom_encoder_destroy(st);
158 st = NULL;
159 }
160 if (error)
161 *error = ret;
162 return st;
163}
164#endif /* CUSTOM_MODES */
165
166static int opus_custom_encoder_init_arch(CELTEncoder *st, const CELTMode *mode,
167 int channels, int arch)
168{
169 if (channels < 0 || channels > 2)
170 return OPUS_BAD_ARG;
171
172 if (st==NULL || mode==NULL)
173 return OPUS_ALLOC_FAIL;
174
175 OPUS_CLEAR((char*)st, opus_custom_encoder_get_size(mode, channels));
176
177 st->mode = mode;
178 st->stream_channels = st->channels = channels;
179
180 st->upsample = 1;
181 st->start = 0;
182 st->end = st->mode->effEBands;
183 st->signalling = 1;
184 st->arch = arch;
185
186 st->constrained_vbr = 1;
187 st->clip = 1;
188
189 st->bitrate = OPUS_BITRATE_MAX;
190 st->vbr = 0;
191 st->force_intra = 0;
192 st->complexity = 5;
193 st->lsb_depth=24;
194
195 opus_custom_encoder_ctl(st, OPUS_RESET_STATE);
196
197 return OPUS_OK;
198}
199
200#ifdef CUSTOM_MODES
201int opus_custom_encoder_init(CELTEncoder *st, const CELTMode *mode, int channels)
202{
203 return opus_custom_encoder_init_arch(st, mode, channels, opus_select_arch());
204}
205#endif
206
207int celt_encoder_init(CELTEncoder *st, opus_int32 sampling_rate, int channels,
208 int arch)
209{
210 int ret;
211 ret = opus_custom_encoder_init_arch(st,
212 opus_custom_mode_create(48000, 960, NULL), channels, arch);
213 if (ret != OPUS_OK)
214 return ret;
215 st->upsample = resampling_factor(sampling_rate);
216 return OPUS_OK;
217}
218
219#ifdef CUSTOM_MODES
220void opus_custom_encoder_destroy(CELTEncoder *st)
221{
222 opus_free(st);
223}
224#endif /* CUSTOM_MODES */
225
226
227static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int C,
228 opus_val16 *tf_estimate, int *tf_chan, int allow_weak_transients,
229 int *weak_transient)
230{
231 int i;
232 VARDECL(opus_val16, tmp);
233 opus_val32 mem0,mem1;
234 int is_transient = 0;
235 opus_int32 mask_metric = 0;
236 int c;
237 opus_val16 tf_max;
238 int len2;
239 /* Forward masking: 6.7 dB/ms. */
240#ifdef FIXED_POINT
241 int forward_shift = 4;
242#else
243 opus_val16 forward_decay = QCONST16(.0625f,15);
244#endif
245 /* Table of 6*64/x, trained on real data to minimize the average error */
246 static const unsigned char inv_table[128] = {
247 255,255,156,110, 86, 70, 59, 51, 45, 40, 37, 33, 31, 28, 26, 25,
248 23, 22, 21, 20, 19, 18, 17, 16, 16, 15, 15, 14, 13, 13, 12, 12,
249 12, 12, 11, 11, 11, 10, 10, 10, 9, 9, 9, 9, 9, 9, 8, 8,
250 8, 8, 8, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6,
251 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5,
252 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
253 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3,
254 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2,
255 };
256 SAVE_STACK;
257 ALLOC(tmp, len, opus_val16);
258
259 *weak_transient = 0;
260 /* For lower bitrates, let's be more conservative and have a forward masking
261 decay of 3.3 dB/ms. This avoids having to code transients at very low
262 bitrate (mostly for hybrid), which can result in unstable energy and/or
263 partial collapse. */
264 if (allow_weak_transients)
265 {
266#ifdef FIXED_POINT
267 forward_shift = 5;
268#else
269 forward_decay = QCONST16(.03125f,15);
270#endif
271 }
272 len2=len/2;
273 for (c=0;c<C;c++)
274 {
275 opus_val32 mean;
276 opus_int32 unmask=0;
277 opus_val32 norm;
278 opus_val16 maxE;
279 mem0=0;
280 mem1=0;
281 /* High-pass filter: (1 - 2*z^-1 + z^-2) / (1 - z^-1 + .5*z^-2) */
282 for (i=0;i<len;i++)
283 {
284 opus_val32 x,y;
285 x = SHR32(in[i+c*len],SIG_SHIFT);
286 y = ADD32(mem0, x);
287#ifdef FIXED_POINT
288 mem0 = mem1 + y - SHL32(x,1);
289 mem1 = x - SHR32(y,1);
290#else
291 mem0 = mem1 + y - 2*x;
292 mem1 = x - .5f*y;
293#endif
294 tmp[i] = SROUND16(y, 2);
295 /*printf("%f ", tmp[i]);*/
296 }
297 /*printf("\n");*/
298 /* First few samples are bad because we don't propagate the memory */
299 OPUS_CLEAR(tmp, 12);
300
301#ifdef FIXED_POINT
302 /* Normalize tmp to max range */
303 {
304 int shift=0;
305 shift = 14-celt_ilog2(MAX16(1, celt_maxabs16(tmp, len)));
306 if (shift!=0)
307 {
308 for (i=0;i<len;i++)
309 tmp[i] = SHL16(tmp[i], shift);
310 }
311 }
312#endif
313
314 mean=0;
315 mem0=0;
316 /* Grouping by two to reduce complexity */
317 /* Forward pass to compute the post-echo threshold*/
318 for (i=0;i<len2;i++)
319 {
320 opus_val16 x2 = PSHR32(MULT16_16(tmp[2*i],tmp[2*i]) + MULT16_16(tmp[2*i+1],tmp[2*i+1]),16);
321 mean += x2;
322#ifdef FIXED_POINT
323 /* FIXME: Use PSHR16() instead */
324 tmp[i] = mem0 + PSHR32(x2-mem0,forward_shift);
325#else
326 tmp[i] = mem0 + MULT16_16_P15(forward_decay,x2-mem0);
327#endif
328 mem0 = tmp[i];
329 }
330
331 mem0=0;
332 maxE=0;
333 /* Backward pass to compute the pre-echo threshold */
334 for (i=len2-1;i>=0;i--)
335 {
336 /* Backward masking: 13.9 dB/ms. */
337#ifdef FIXED_POINT
338 /* FIXME: Use PSHR16() instead */
339 tmp[i] = mem0 + PSHR32(tmp[i]-mem0,3);
340#else
341 tmp[i] = mem0 + MULT16_16_P15(QCONST16(0.125f,15),tmp[i]-mem0);
342#endif
343 mem0 = tmp[i];
344 maxE = MAX16(maxE, mem0);
345 }
346 /*for (i=0;i<len2;i++)printf("%f ", tmp[i]/mean);printf("\n");*/
347
348 /* Compute the ratio of the "frame energy" over the harmonic mean of the energy.
349 This essentially corresponds to a bitrate-normalized temporal noise-to-mask
350 ratio */
351
352 /* As a compromise with the old transient detector, frame energy is the
353 geometric mean of the energy and half the max */
354#ifdef FIXED_POINT
355 /* Costs two sqrt() to avoid overflows */
356 mean = MULT16_16(celt_sqrt(mean), celt_sqrt(MULT16_16(maxE,len2>>1)));
357#else
358 mean = celt_sqrt(mean * maxE*.5*len2);
359#endif
360 /* Inverse of the mean energy in Q15+6 */
361 norm = SHL32(EXTEND32(len2),6+14)/ADD32(EPSILON,SHR32(mean,1));
362 /* Compute harmonic mean discarding the unreliable boundaries
363 The data is smooth, so we only take 1/4th of the samples */
364 unmask=0;
365 /* We should never see NaNs here. If we find any, then something really bad happened and we better abort
366 before it does any damage later on. If these asserts are disabled (no hardening), then the table
367 lookup a few lines below (id = ...) is likely to crash dur to an out-of-bounds read. DO NOT FIX
368 that crash on NaN since it could result in a worse issue later on. */
369 celt_assert(!celt_isnan(tmp[0]));
370 celt_assert(!celt_isnan(norm));
371 for (i=12;i<len2-5;i+=4)
372 {
373 int id;
374#ifdef FIXED_POINT
375 id = MAX32(0,MIN32(127,MULT16_32_Q15(tmp[i]+EPSILON,norm))); /* Do not round to nearest */
376#else
377 id = (int)MAX32(0,MIN32(127,floor(64*norm*(tmp[i]+EPSILON)))); /* Do not round to nearest */
378#endif
379 unmask += inv_table[id];
380 }
381 /*printf("%d\n", unmask);*/
382 /* Normalize, compensate for the 1/4th of the sample and the factor of 6 in the inverse table */
383 unmask = 64*unmask*4/(6*(len2-17));
384 if (unmask>mask_metric)
385 {
386 *tf_chan = c;
387 mask_metric = unmask;
388 }
389 }
390 is_transient = mask_metric>200;
391 /* For low bitrates, define "weak transients" that need to be
392 handled differently to avoid partial collapse. */
393 if (allow_weak_transients && is_transient && mask_metric<600) {
394 is_transient = 0;
395 *weak_transient = 1;
396 }
397 /* Arbitrary metric for VBR boost */
398 tf_max = MAX16(0,celt_sqrt(27*mask_metric)-42);
399 /* *tf_estimate = 1 + MIN16(1, sqrt(MAX16(0, tf_max-30))/20); */
400 *tf_estimate = celt_sqrt(MAX32(0, SHL32(MULT16_16(QCONST16(0.0069,14),MIN16(163,tf_max)),14)-QCONST32(0.139,28)));
401 /*printf("%d %f\n", tf_max, mask_metric);*/
402 RESTORE_STACK;
403#ifdef FUZZING
404 is_transient = rand()&0x1;
405#endif
406 /*printf("%d %f %d\n", is_transient, (float)*tf_estimate, tf_max);*/
407 return is_transient;
408}
409
410/* Looks for sudden increases of energy to decide whether we need to patch
411 the transient decision */
412static int patch_transient_decision(opus_val16 *newE, opus_val16 *oldE, int nbEBands,
413 int start, int end, int C)
414{
415 int i, c;
416 opus_val32 mean_diff=0;
417 opus_val16 spread_old[26];
418 /* Apply an aggressive (-6 dB/Bark) spreading function to the old frame to
419 avoid false detection caused by irrelevant bands */
420 if (C==1)
421 {
422 spread_old[start] = oldE[start];
423 for (i=start+1;i<end;i++)
424 spread_old[i] = MAX16(spread_old[i-1]-QCONST16(1.0f, DB_SHIFT), oldE[i]);
425 } else {
426 spread_old[start] = MAX16(oldE[start],oldE[start+nbEBands]);
427 for (i=start+1;i<end;i++)
428 spread_old[i] = MAX16(spread_old[i-1]-QCONST16(1.0f, DB_SHIFT),
429 MAX16(oldE[i],oldE[i+nbEBands]));
430 }
431 for (i=end-2;i>=start;i--)
432 spread_old[i] = MAX16(spread_old[i], spread_old[i+1]-QCONST16(1.0f, DB_SHIFT));
433 /* Compute mean increase */
434 c=0; do {
435 for (i=IMAX(2,start);i<end-1;i++)
436 {
437 opus_val16 x1, x2;
438 x1 = MAX16(0, newE[i + c*nbEBands]);
439 x2 = MAX16(0, spread_old[i]);
440 mean_diff = ADD32(mean_diff, EXTEND32(MAX16(0, SUB16(x1, x2))));
441 }
442 } while (++c<C);
443 mean_diff = DIV32(mean_diff, C*(end-1-IMAX(2,start)));
444 /*printf("%f %f %d\n", mean_diff, max_diff, count);*/
445 return mean_diff > QCONST16(1.f, DB_SHIFT);
446}
447
448/** Apply window and compute the MDCT for all sub-frames and
449 all channels in a frame */
450static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * OPUS_RESTRICT in,
451 celt_sig * OPUS_RESTRICT out, int C, int CC, int LM, int upsample,
452 int arch)
453{
454 const int overlap = mode->overlap;
455 int N;
456 int B;
457 int shift;
458 int i, b, c;
459 if (shortBlocks)
460 {
461 B = shortBlocks;
462 N = mode->shortMdctSize;
463 shift = mode->maxLM;
464 } else {
465 B = 1;
466 N = mode->shortMdctSize<<LM;
467 shift = mode->maxLM-LM;
468 }
469 c=0; do {
470 for (b=0;b<B;b++)
471 {
472 /* Interleaving the sub-frames while doing the MDCTs */
473 clt_mdct_forward(&mode->mdct, in+c*(B*N+overlap)+b*N,
474 &out[b+c*N*B], mode->window, overlap, shift, B,
475 arch);
476 }
477 } while (++c<CC);
478 if (CC==2&&C==1)
479 {
480 for (i=0;i<B*N;i++)
481 out[i] = ADD32(HALF32(out[i]), HALF32(out[B*N+i]));
482 }
483 if (upsample != 1)
484 {
485 c=0; do
486 {
487 int bound = B*N/upsample;
488 for (i=0;i<bound;i++)
489 out[c*B*N+i] *= upsample;
490 OPUS_CLEAR(&out[c*B*N+bound], B*N-bound);
491 } while (++c<C);
492 }
493}
494
495
496void celt_preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp,
497 int N, int CC, int upsample, const opus_val16 *coef, celt_sig *mem, int clip)
498{
499 int i;
500 opus_val16 coef0;
501 celt_sig m;
502 int Nu;
503
504 coef0 = coef[0];
505 m = *mem;
506
507 /* Fast path for the normal 48kHz case and no clipping */
508 if (coef[1] == 0 && upsample == 1 && !clip)
509 {
510 for (i=0;i<N;i++)
511 {
512 opus_val16 x;
513 x = SCALEIN(pcmp[CC*i]);
514 /* Apply pre-emphasis */
515 inp[i] = SHL32(x, SIG_SHIFT) - m;
516 m = SHR32(MULT16_16(coef0, x), 15-SIG_SHIFT);
517 }
518 *mem = m;
519 return;
520 }
521
522 Nu = N/upsample;
523 if (upsample!=1)
524 {
525 OPUS_CLEAR(inp, N);
526 }
527 for (i=0;i<Nu;i++)
528 inp[i*upsample] = SCALEIN(pcmp[CC*i]);
529
530#ifndef FIXED_POINT
531 if (clip)
532 {
533 /* Clip input to avoid encoding non-portable files */
534 for (i=0;i<Nu;i++)
535 inp[i*upsample] = MAX32(-65536.f, MIN32(65536.f,inp[i*upsample]));
536 }
537#else
538 (void)clip; /* Avoids a warning about clip being unused. */
539#endif
540#ifdef CUSTOM_MODES
541 if (coef[1] != 0)
542 {
543 opus_val16 coef1 = coef[1];
544 opus_val16 coef2 = coef[2];
545 for (i=0;i<N;i++)
546 {
547 celt_sig x, tmp;
548 x = inp[i];
549 /* Apply pre-emphasis */
550 tmp = MULT16_16(coef2, x);
551 inp[i] = tmp + m;
552 m = MULT16_32_Q15(coef1, inp[i]) - MULT16_32_Q15(coef0, tmp);
553 }
554 } else
555#endif
556 {
557 for (i=0;i<N;i++)
558 {
559 opus_val16 x;
560 x = inp[i];
561 /* Apply pre-emphasis */
562 inp[i] = SHL32(x, SIG_SHIFT) - m;
563 m = SHR32(MULT16_16(coef0, x), 15-SIG_SHIFT);
564 }
565 }
566 *mem = m;
567}
568
569
570
571static opus_val32 l1_metric(const celt_norm *tmp, int N, int LM, opus_val16 bias)
572{
573 int i;
574 opus_val32 L1;
575 L1 = 0;
576 for (i=0;i<N;i++)
577 L1 += EXTEND32(ABS16(tmp[i]));
578 /* When in doubt, prefer good freq resolution */
579 L1 = MAC16_32_Q15(L1, LM*bias, L1);
580 return L1;
581
582}
583
584static int tf_analysis(const CELTMode *m, int len, int isTransient,
585 int *tf_res, int lambda, celt_norm *X, int N0, int LM,
586 opus_val16 tf_estimate, int tf_chan, int *importance)
587{
588 int i;
589 VARDECL(int, metric);
590 int cost0;
591 int cost1;
592 VARDECL(int, path0);
593 VARDECL(int, path1);
594 VARDECL(celt_norm, tmp);
595 VARDECL(celt_norm, tmp_1);
596 int sel;
597 int selcost[2];
598 int tf_select=0;
599 opus_val16 bias;
600
601 SAVE_STACK;
602 bias = MULT16_16_Q14(QCONST16(.04f,15), MAX16(-QCONST16(.25f,14), QCONST16(.5f,14)-tf_estimate));
603 /*printf("%f ", bias);*/
604
605 ALLOC(metric, len, int);
606 ALLOC(tmp, (m->eBands[len]-m->eBands[len-1])<<LM, celt_norm);
607 ALLOC(tmp_1, (m->eBands[len]-m->eBands[len-1])<<LM, celt_norm);
608 ALLOC(path0, len, int);
609 ALLOC(path1, len, int);
610
611 for (i=0;i<len;i++)
612 {
613 int k, N;
614 int narrow;
615 opus_val32 L1, best_L1;
616 int best_level=0;
617 N = (m->eBands[i+1]-m->eBands[i])<<LM;
618 /* band is too narrow to be split down to LM=-1 */
619 narrow = (m->eBands[i+1]-m->eBands[i])==1;
620 OPUS_COPY(tmp, &X[tf_chan*N0 + (m->eBands[i]<<LM)], N);
621 /* Just add the right channel if we're in stereo */
622 /*if (C==2)
623 for (j=0;j<N;j++)
624 tmp[j] = ADD16(SHR16(tmp[j], 1),SHR16(X[N0+j+(m->eBands[i]<<LM)], 1));*/
625 L1 = l1_metric(tmp, N, isTransient ? LM : 0, bias);
626 best_L1 = L1;
627 /* Check the -1 case for transients */
628 if (isTransient && !narrow)
629 {
630 OPUS_COPY(tmp_1, tmp, N);
631 haar1(tmp_1, N>>LM, 1<<LM);
632 L1 = l1_metric(tmp_1, N, LM+1, bias);
633 if (L1<best_L1)
634 {
635 best_L1 = L1;
636 best_level = -1;
637 }
638 }
639 /*printf ("%f ", L1);*/
640 for (k=0;k<LM+!(isTransient||narrow);k++)
641 {
642 int B;
643
644 if (isTransient)
645 B = (LM-k-1);
646 else
647 B = k+1;
648
649 haar1(tmp, N>>k, 1<<k);
650
651 L1 = l1_metric(tmp, N, B, bias);
652
653 if (L1 < best_L1)
654 {
655 best_L1 = L1;
656 best_level = k+1;
657 }
658 }
659 /*printf ("%d ", isTransient ? LM-best_level : best_level);*/
660 /* metric is in Q1 to be able to select the mid-point (-0.5) for narrower bands */
661 if (isTransient)
662 metric[i] = 2*best_level;
663 else
664 metric[i] = -2*best_level;
665 /* For bands that can't be split to -1, set the metric to the half-way point to avoid
666 biasing the decision */
667 if (narrow && (metric[i]==0 || metric[i]==-2*LM))
668 metric[i]-=1;
669 /*printf("%d ", metric[i]/2 + (!isTransient)*LM);*/
670 }
671 /*printf("\n");*/
672 /* Search for the optimal tf resolution, including tf_select */
673 tf_select = 0;
674 for (sel=0;sel<2;sel++)
675 {
676 cost0 = importance[0]*abs(metric[0]-2*tf_select_table[LM][4*isTransient+2*sel+0]);
677 cost1 = importance[0]*abs(metric[0]-2*tf_select_table[LM][4*isTransient+2*sel+1]) + (isTransient ? 0 : lambda);
678 for (i=1;i<len;i++)
679 {
680 int curr0, curr1;
681 curr0 = IMIN(cost0, cost1 + lambda);
682 curr1 = IMIN(cost0 + lambda, cost1);
683 cost0 = curr0 + importance[i]*abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*sel+0]);
684 cost1 = curr1 + importance[i]*abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*sel+1]);
685 }
686 cost0 = IMIN(cost0, cost1);
687 selcost[sel]=cost0;
688 }
689 /* For now, we're conservative and only allow tf_select=1 for transients.
690 * If tests confirm it's useful for non-transients, we could allow it. */
691 if (selcost[1]<selcost[0] && isTransient)
692 tf_select=1;
693 cost0 = importance[0]*abs(metric[0]-2*tf_select_table[LM][4*isTransient+2*tf_select+0]);
694 cost1 = importance[0]*abs(metric[0]-2*tf_select_table[LM][4*isTransient+2*tf_select+1]) + (isTransient ? 0 : lambda);
695 /* Viterbi forward pass */
696 for (i=1;i<len;i++)
697 {
698 int curr0, curr1;
699 int from0, from1;
700
701 from0 = cost0;
702 from1 = cost1 + lambda;
703 if (from0 < from1)
704 {
705 curr0 = from0;
706 path0[i]= 0;
707 } else {
708 curr0 = from1;
709 path0[i]= 1;
710 }
711
712 from0 = cost0 + lambda;
713 from1 = cost1;
714 if (from0 < from1)
715 {
716 curr1 = from0;
717 path1[i]= 0;
718 } else {
719 curr1 = from1;
720 path1[i]= 1;
721 }
722 cost0 = curr0 + importance[i]*abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*tf_select+0]);
723 cost1 = curr1 + importance[i]*abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*tf_select+1]);
724 }
725 tf_res[len-1] = cost0 < cost1 ? 0 : 1;
726 /* Viterbi backward pass to check the decisions */
727 for (i=len-2;i>=0;i--)
728 {
729 if (tf_res[i+1] == 1)
730 tf_res[i] = path1[i+1];
731 else
732 tf_res[i] = path0[i+1];
733 }
734 /*printf("%d %f\n", *tf_sum, tf_estimate);*/
735 RESTORE_STACK;
736#ifdef FUZZING
737 tf_select = rand()&0x1;
738 tf_res[0] = rand()&0x1;
739 for (i=1;i<len;i++)
740 tf_res[i] = tf_res[i-1] ^ ((rand()&0xF) == 0);
741#endif
742 return tf_select;
743}
744
745static void tf_encode(int start, int end, int isTransient, int *tf_res, int LM, int tf_select, ec_enc *enc)
746{
747 int curr, i;
748 int tf_select_rsv;
749 int tf_changed;
750 int logp;
751 opus_uint32 budget;
752 opus_uint32 tell;
753 budget = enc->storage*8;
754 tell = ec_tell(enc);
755 logp = isTransient ? 2 : 4;
756 /* Reserve space to code the tf_select decision. */
757 tf_select_rsv = LM>0 && tell+logp+1 <= budget;
758 budget -= tf_select_rsv;
759 curr = tf_changed = 0;
760 for (i=start;i<end;i++)
761 {
762 if (tell+logp<=budget)
763 {
764 ec_enc_bit_logp(enc, tf_res[i] ^ curr, logp);
765 tell = ec_tell(enc);
766 curr = tf_res[i];
767 tf_changed |= curr;
768 }
769 else
770 tf_res[i] = curr;
771 logp = isTransient ? 4 : 5;
772 }
773 /* Only code tf_select if it would actually make a difference. */
774 if (tf_select_rsv &&
775 tf_select_table[LM][4*isTransient+0+tf_changed]!=
776 tf_select_table[LM][4*isTransient+2+tf_changed])
777 ec_enc_bit_logp(enc, tf_select, 1);
778 else
779 tf_select = 0;
780 for (i=start;i<end;i++)
781 tf_res[i] = tf_select_table[LM][4*isTransient+2*tf_select+tf_res[i]];
782 /*for(i=0;i<end;i++)printf("%d ", isTransient ? tf_res[i] : LM+tf_res[i]);printf("\n");*/
783}
784
785
786static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
787 const opus_val16 *bandLogE, int end, int LM, int C, int N0,
788 AnalysisInfo *analysis, opus_val16 *stereo_saving, opus_val16 tf_estimate,
789 int intensity, opus_val16 surround_trim, opus_int32 equiv_rate, int arch)
790{
791 int i;
792 opus_val32 diff=0;
793 int c;
794 int trim_index;
795 opus_val16 trim = QCONST16(5.f, 8);
796 opus_val16 logXC, logXC2;
797 /* At low bitrate, reducing the trim seems to help. At higher bitrates, it's less
798 clear what's best, so we're keeping it as it was before, at least for now. */
799 if (equiv_rate < 64000) {
800 trim = QCONST16(4.f, 8);
801 } else if (equiv_rate < 80000) {
802 opus_int32 frac = (equiv_rate-64000) >> 10;
803 trim = QCONST16(4.f, 8) + QCONST16(1.f/16.f, 8)*frac;
804 }
805 if (C==2)
806 {
807 opus_val16 sum = 0; /* Q10 */
808 opus_val16 minXC; /* Q10 */
809 /* Compute inter-channel correlation for low frequencies */
810 for (i=0;i<8;i++)
811 {
812 opus_val32 partial;
813 partial = celt_inner_prod(&X[m->eBands[i]<<LM], &X[N0+(m->eBands[i]<<LM)],
814 (m->eBands[i+1]-m->eBands[i])<<LM, arch);
815 sum = ADD16(sum, EXTRACT16(SHR32(partial, 18)));
816 }
817 sum = MULT16_16_Q15(QCONST16(1.f/8, 15), sum);
818 sum = MIN16(QCONST16(1.f, 10), ABS16(sum));
819 minXC = sum;
820 for (i=8;i<intensity;i++)
821 {
822 opus_val32 partial;
823 partial = celt_inner_prod(&X[m->eBands[i]<<LM], &X[N0+(m->eBands[i]<<LM)],
824 (m->eBands[i+1]-m->eBands[i])<<LM, arch);
825 minXC = MIN16(minXC, ABS16(EXTRACT16(SHR32(partial, 18))));
826 }
827 minXC = MIN16(QCONST16(1.f, 10), ABS16(minXC));
828 /*printf ("%f\n", sum);*/
829 /* mid-side savings estimations based on the LF average*/
830 logXC = celt_log2(QCONST32(1.001f, 20)-MULT16_16(sum, sum));
831 /* mid-side savings estimations based on min correlation */
832 logXC2 = MAX16(HALF16(logXC), celt_log2(QCONST32(1.001f, 20)-MULT16_16(minXC, minXC)));
833#ifdef FIXED_POINT
834 /* Compensate for Q20 vs Q14 input and convert output to Q8 */
835 logXC = PSHR32(logXC-QCONST16(6.f, DB_SHIFT),DB_SHIFT-8);
836 logXC2 = PSHR32(logXC2-QCONST16(6.f, DB_SHIFT),DB_SHIFT-8);
837#endif
838
839 trim += MAX16(-QCONST16(4.f, 8), MULT16_16_Q15(QCONST16(.75f,15),logXC));
840 *stereo_saving = MIN16(*stereo_saving + QCONST16(0.25f, 8), -HALF16(logXC2));
841 }
842
843 /* Estimate spectral tilt */
844 c=0; do {
845 for (i=0;i<end-1;i++)
846 {
847 diff += bandLogE[i+c*m->nbEBands]*(opus_int32)(2+2*i-end);
848 }
849 } while (++c<C);
850 diff /= C*(end-1);
851 /*printf("%f\n", diff);*/
852 trim -= MAX32(-QCONST16(2.f, 8), MIN32(QCONST16(2.f, 8), SHR32(diff+QCONST16(1.f, DB_SHIFT),DB_SHIFT-8)/6 ));
853 trim -= SHR16(surround_trim, DB_SHIFT-8);
854 trim -= 2*SHR16(tf_estimate, 14-8);
855#ifndef DISABLE_FLOAT_API
856 if (analysis->valid)
857 {
858 trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8),
859 (opus_val16)(QCONST16(2.f, 8)*(analysis->tonality_slope+.05f))));
860 }
861#else
862 (void)analysis;
863#endif
864
865#ifdef FIXED_POINT
866 trim_index = PSHR32(trim, 8);
867#else
868 trim_index = (int)floor(.5f+trim);
869#endif
870 trim_index = IMAX(0, IMIN(10, trim_index));
871 /*printf("%d\n", trim_index);*/
872#ifdef FUZZING
873 trim_index = rand()%11;
874#endif
875 return trim_index;
876}
877
878static int stereo_analysis(const CELTMode *m, const celt_norm *X,
879 int LM, int N0)
880{
881 int i;
882 int thetas;
883 opus_val32 sumLR = EPSILON, sumMS = EPSILON;
884
885 /* Use the L1 norm to model the entropy of the L/R signal vs the M/S signal */
886 for (i=0;i<13;i++)
887 {
888 int j;
889 for (j=m->eBands[i]<<LM;j<m->eBands[i+1]<<LM;j++)
890 {
891 opus_val32 L, R, M, S;
892 /* We cast to 32-bit first because of the -32768 case */
893 L = EXTEND32(X[j]);
894 R = EXTEND32(X[N0+j]);
895 M = ADD32(L, R);
896 S = SUB32(L, R);
897 sumLR = ADD32(sumLR, ADD32(ABS32(L), ABS32(R)));
898 sumMS = ADD32(sumMS, ADD32(ABS32(M), ABS32(S)));
899 }
900 }
901 sumMS = MULT16_32_Q15(QCONST16(0.707107f, 15), sumMS);
902 thetas = 13;
903 /* We don't need thetas for lower bands with LM<=1 */
904 if (LM<=1)
905 thetas -= 8;
906 return MULT16_32_Q15((m->eBands[13]<<(LM+1))+thetas, sumMS)
907 > MULT16_32_Q15(m->eBands[13]<<(LM+1), sumLR);
908}
909
910#define MSWAP(a,b) do {opus_val16 tmp = a;a=b;b=tmp;} while(0)
911static opus_val16 median_of_5(const opus_val16 *x)
912{
913 opus_val16 t0, t1, t2, t3, t4;
914 t2 = x[2];
915 if (x[0] > x[1])
916 {
917 t0 = x[1];
918 t1 = x[0];
919 } else {
920 t0 = x[0];
921 t1 = x[1];
922 }
923 if (x[3] > x[4])
924 {
925 t3 = x[4];
926 t4 = x[3];
927 } else {
928 t3 = x[3];
929 t4 = x[4];
930 }
931 if (t0 > t3)
932 {
933 MSWAP(t0, t3);
934 MSWAP(t1, t4);
935 }
936 if (t2 > t1)
937 {
938 if (t1 < t3)
939 return MIN16(t2, t3);
940 else
941 return MIN16(t4, t1);
942 } else {
943 if (t2 < t3)
944 return MIN16(t1, t3);
945 else
946 return MIN16(t2, t4);
947 }
948}
949
950static opus_val16 median_of_3(const opus_val16 *x)
951{
952 opus_val16 t0, t1, t2;
953 if (x[0] > x[1])
954 {
955 t0 = x[1];
956 t1 = x[0];
957 } else {
958 t0 = x[0];
959 t1 = x[1];
960 }
961 t2 = x[2];
962 if (t1 < t2)
963 return t1;
964 else if (t0 < t2)
965 return t2;
966 else
967 return t0;
968}
969
970static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 *bandLogE2,
971 int nbEBands, int start, int end, int C, int *offsets, int lsb_depth, const opus_int16 *logN,
972 int isTransient, int vbr, int constrained_vbr, const opus_int16 *eBands, int LM,
973 int effectiveBytes, opus_int32 *tot_boost_, int lfe, opus_val16 *surround_dynalloc,
974 AnalysisInfo *analysis, int *importance, int *spread_weight)
975{
976 int i, c;
977 opus_int32 tot_boost=0;
978 opus_val16 maxDepth;
979 VARDECL(opus_val16, follower);
980 VARDECL(opus_val16, noise_floor);
981 SAVE_STACK;
982 ALLOC(follower, C*nbEBands, opus_val16);
983 ALLOC(noise_floor, C*nbEBands, opus_val16);
984 OPUS_CLEAR(offsets, nbEBands);
985 /* Dynamic allocation code */
986 maxDepth=-QCONST16(31.9f, DB_SHIFT);
987 for (i=0;i<end;i++)
988 {
989 /* Noise floor must take into account eMeans, the depth, the width of the bands
990 and the preemphasis filter (approx. square of bark band ID) */
991 noise_floor[i] = MULT16_16(QCONST16(0.0625f, DB_SHIFT),logN[i])
992 +QCONST16(.5f,DB_SHIFT)+SHL16(9-lsb_depth,DB_SHIFT)-SHL16(eMeans[i],6)
993 +MULT16_16(QCONST16(.0062,DB_SHIFT),(i+5)*(i+5));
994 }
995 c=0;do
996 {
997 for (i=0;i<end;i++)
998 maxDepth = MAX16(maxDepth, bandLogE[c*nbEBands+i]-noise_floor[i]);
999 } while (++c<C);
1000 {
1001 /* Compute a really simple masking model to avoid taking into account completely masked
1002 bands when computing the spreading decision. */
1003 VARDECL(opus_val16, mask);
1004 VARDECL(opus_val16, sig);
1005 ALLOC(mask, nbEBands, opus_val16);
1006 ALLOC(sig, nbEBands, opus_val16);
1007 for (i=0;i<end;i++)
1008 mask[i] = bandLogE[i]-noise_floor[i];
1009 if (C==2)
1010 {
1011 for (i=0;i<end;i++)
1012 mask[i] = MAX16(mask[i], bandLogE[nbEBands+i]-noise_floor[i]);
1013 }
1014 OPUS_COPY(sig, mask, end);
1015 for (i=1;i<end;i++)
1016 mask[i] = MAX16(mask[i], mask[i-1] - QCONST16(2.f, DB_SHIFT));
1017 for (i=end-2;i>=0;i--)
1018 mask[i] = MAX16(mask[i], mask[i+1] - QCONST16(3.f, DB_SHIFT));
1019 for (i=0;i<end;i++)
1020 {
1021 /* Compute SMR: Mask is never more than 72 dB below the peak and never below the noise floor.*/
1022 opus_val16 smr = sig[i]-MAX16(MAX16(0, maxDepth-QCONST16(12.f, DB_SHIFT)), mask[i]);
1023 /* Clamp SMR to make sure we're not shifting by something negative or too large. */
1024#ifdef FIXED_POINT
1025 /* FIXME: Use PSHR16() instead */
1026 int shift = -PSHR32(MAX16(-QCONST16(5.f, DB_SHIFT), MIN16(0, smr)), DB_SHIFT);
1027#else
1028 int shift = IMIN(5, IMAX(0, -(int)floor(.5f + smr)));
1029#endif
1030 spread_weight[i] = 32 >> shift;
1031 }
1032 /*for (i=0;i<end;i++)
1033 printf("%d ", spread_weight[i]);
1034 printf("\n");*/
1035 }
1036 /* Make sure that dynamic allocation can't make us bust the budget */
1037 if (effectiveBytes > 50 && LM>=1 && !lfe)
1038 {
1039 int last=0;
1040 c=0;do
1041 {
1042 opus_val16 offset;
1043 opus_val16 tmp;
1044 opus_val16 *f;
1045 f = &follower[c*nbEBands];
1046 f[0] = bandLogE2[c*nbEBands];
1047 for (i=1;i<end;i++)
1048 {
1049 /* The last band to be at least 3 dB higher than the previous one
1050 is the last we'll consider. Otherwise, we run into problems on
1051 bandlimited signals. */
1052 if (bandLogE2[c*nbEBands+i] > bandLogE2[c*nbEBands+i-1]+QCONST16(.5f,DB_SHIFT))
1053 last=i;
1054 f[i] = MIN16(f[i-1]+QCONST16(1.5f,DB_SHIFT), bandLogE2[c*nbEBands+i]);
1055 }
1056 for (i=last-1;i>=0;i--)
1057 f[i] = MIN16(f[i], MIN16(f[i+1]+QCONST16(2.f,DB_SHIFT), bandLogE2[c*nbEBands+i]));
1058
1059 /* Combine with a median filter to avoid dynalloc triggering unnecessarily.
1060 The "offset" value controls how conservative we are -- a higher offset
1061 reduces the impact of the median filter and makes dynalloc use more bits. */
1062 offset = QCONST16(1.f, DB_SHIFT);
1063 for (i=2;i<end-2;i++)
1064 f[i] = MAX16(f[i], median_of_5(&bandLogE2[c*nbEBands+i-2])-offset);
1065 tmp = median_of_3(&bandLogE2[c*nbEBands])-offset;
1066 f[0] = MAX16(f[0], tmp);
1067 f[1] = MAX16(f[1], tmp);
1068 tmp = median_of_3(&bandLogE2[c*nbEBands+end-3])-offset;
1069 f[end-2] = MAX16(f[end-2], tmp);
1070 f[end-1] = MAX16(f[end-1], tmp);
1071
1072 for (i=0;i<end;i++)
1073 f[i] = MAX16(f[i], noise_floor[i]);
1074 } while (++c<C);
1075 if (C==2)
1076 {
1077 for (i=start;i<end;i++)
1078 {
1079 /* Consider 24 dB "cross-talk" */
1080 follower[nbEBands+i] = MAX16(follower[nbEBands+i], follower[ i]-QCONST16(4.f,DB_SHIFT));
1081 follower[ i] = MAX16(follower[ i], follower[nbEBands+i]-QCONST16(4.f,DB_SHIFT));
1082 follower[i] = HALF16(MAX16(0, bandLogE[i]-follower[i]) + MAX16(0, bandLogE[nbEBands+i]-follower[nbEBands+i]));
1083 }
1084 } else {
1085 for (i=start;i<end;i++)
1086 {
1087 follower[i] = MAX16(0, bandLogE[i]-follower[i]);
1088 }
1089 }
1090 for (i=start;i<end;i++)
1091 follower[i] = MAX16(follower[i], surround_dynalloc[i]);
1092 for (i=start;i<end;i++)
1093 {
1094#ifdef FIXED_POINT
1095 importance[i] = PSHR32(13*celt_exp2(MIN16(follower[i], QCONST16(4.f, DB_SHIFT))), 16);
1096#else
1097 importance[i] = (int)floor(.5f+13*celt_exp2(MIN16(follower[i], QCONST16(4.f, DB_SHIFT))));
1098#endif
1099 }
1100 /* For non-transient CBR/CVBR frames, halve the dynalloc contribution */
1101 if ((!vbr || constrained_vbr)&&!isTransient)
1102 {
1103 for (i=start;i<end;i++)
1104 follower[i] = HALF16(follower[i]);
1105 }
1106 for (i=start;i<end;i++)
1107 {
1108 if (i<8)
1109 follower[i] *= 2;
1110 if (i>=12)
1111 follower[i] = HALF16(follower[i]);
1112 }
1113#ifdef DISABLE_FLOAT_API
1114 (void)analysis;
1115#else
1116 if (analysis->valid)
1117 {
1118 for (i=start;i<IMIN(LEAK_BANDS, end);i++)
1119 follower[i] = follower[i] + QCONST16(1.f/64.f, DB_SHIFT)*analysis->leak_boost[i];
1120 }
1121#endif
1122 for (i=start;i<end;i++)
1123 {
1124 int width;
1125 int boost;
1126 int boost_bits;
1127
1128 follower[i] = MIN16(follower[i], QCONST16(4, DB_SHIFT));
1129
1130 width = C*(eBands[i+1]-eBands[i])<<LM;
1131 if (width<6)
1132 {
1133 boost = (int)SHR32(EXTEND32(follower[i]),DB_SHIFT);
1134 boost_bits = boost*width<<BITRES;
1135 } else if (width > 48) {
1136 boost = (int)SHR32(EXTEND32(follower[i])*8,DB_SHIFT);
1137 boost_bits = (boost*width<<BITRES)/8;
1138 } else {
1139 boost = (int)SHR32(EXTEND32(follower[i])*width/6,DB_SHIFT);
1140 boost_bits = boost*6<<BITRES;
1141 }
1142 /* For CBR and non-transient CVBR frames, limit dynalloc to 2/3 of the bits */
1143 if ((!vbr || (constrained_vbr&&!isTransient))
1144 && (tot_boost+boost_bits)>>BITRES>>3 > 2*effectiveBytes/3)
1145 {
1146 opus_int32 cap = ((2*effectiveBytes/3)<<BITRES<<3);
1147 offsets[i] = cap-tot_boost;
1148 tot_boost = cap;
1149 break;
1150 } else {
1151 offsets[i] = boost;
1152 tot_boost += boost_bits;
1153 }
1154 }
1155 } else {
1156 for (i=start;i<end;i++)
1157 importance[i] = 13;
1158 }
1159 *tot_boost_ = tot_boost;
1160 RESTORE_STACK;
1161 return maxDepth;
1162}
1163
1164
1165static int run_prefilter(CELTEncoder *st, celt_sig *in, celt_sig *prefilter_mem, int CC, int N,
1166 int prefilter_tapset, int *pitch, opus_val16 *gain, int *qgain, int enabled, int nbAvailableBytes, AnalysisInfo *analysis)
1167{
1168 int c;
1169 VARDECL(celt_sig, _pre);
1170 celt_sig *pre[2];
1171 const CELTMode *mode;
1172 int pitch_index;
1173 opus_val16 gain1;
1174 opus_val16 pf_threshold;
1175 int pf_on;
1176 int qg;
1177 int overlap;
1178 SAVE_STACK;
1179
1180 mode = st->mode;
1181 overlap = mode->overlap;
1182 ALLOC(_pre, CC*(N+COMBFILTER_MAXPERIOD), celt_sig);
1183
1184 pre[0] = _pre;
1185 pre[1] = _pre + (N+COMBFILTER_MAXPERIOD);
1186
1187
1188 c=0; do {
1189 OPUS_COPY(pre[c], prefilter_mem+c*COMBFILTER_MAXPERIOD, COMBFILTER_MAXPERIOD);
1190 OPUS_COPY(pre[c]+COMBFILTER_MAXPERIOD, in+c*(N+overlap)+overlap, N);
1191 } while (++c<CC);
1192
1193 if (enabled)
1194 {
1195 VARDECL(opus_val16, pitch_buf);
1196 ALLOC(pitch_buf, (COMBFILTER_MAXPERIOD+N)>>1, opus_val16);
1197
1198 pitch_downsample(pre, pitch_buf, COMBFILTER_MAXPERIOD+N, CC, st->arch);
1199 /* Don't search for the fir last 1.5 octave of the range because
1200 there's too many false-positives due to short-term correlation */
1201 pitch_search(pitch_buf+(COMBFILTER_MAXPERIOD>>1), pitch_buf, N,
1202 COMBFILTER_MAXPERIOD-3*COMBFILTER_MINPERIOD, &pitch_index,
1203 st->arch);
1204 pitch_index = COMBFILTER_MAXPERIOD-pitch_index;
1205
1206 gain1 = remove_doubling(pitch_buf, COMBFILTER_MAXPERIOD, COMBFILTER_MINPERIOD,
1207 N, &pitch_index, st->prefilter_period, st->prefilter_gain, st->arch);
1208 if (pitch_index > COMBFILTER_MAXPERIOD-2)
1209 pitch_index = COMBFILTER_MAXPERIOD-2;
1210 gain1 = MULT16_16_Q15(QCONST16(.7f,15),gain1);
1211 /*printf("%d %d %f %f\n", pitch_change, pitch_index, gain1, st->analysis.tonality);*/
1212 if (st->loss_rate>2)
1213 gain1 = HALF32(gain1);
1214 if (st->loss_rate>4)
1215 gain1 = HALF32(gain1);
1216 if (st->loss_rate>8)
1217 gain1 = 0;
1218 } else {
1219 gain1 = 0;
1220 pitch_index = COMBFILTER_MINPERIOD;
1221 }
1222#ifndef DISABLE_FLOAT_API
1223 if (analysis->valid)
1224 gain1 = (opus_val16)(gain1 * analysis->max_pitch_ratio);
1225#else
1226 (void)analysis;
1227#endif
1228 /* Gain threshold for enabling the prefilter/postfilter */
1229 pf_threshold = QCONST16(.2f,15);
1230
1231 /* Adjusting the threshold based on rate and continuity */
1232 if (abs(pitch_index-st->prefilter_period)*10>pitch_index)
1233 pf_threshold += QCONST16(.2f,15);
1234 if (nbAvailableBytes<25)
1235 pf_threshold += QCONST16(.1f,15);
1236 if (nbAvailableBytes<35)
1237 pf_threshold += QCONST16(.1f,15);
1238 if (st->prefilter_gain > QCONST16(.4f,15))
1239 pf_threshold -= QCONST16(.1f,15);
1240 if (st->prefilter_gain > QCONST16(.55f,15))
1241 pf_threshold -= QCONST16(.1f,15);
1242
1243 /* Hard threshold at 0.2 */
1244 pf_threshold = MAX16(pf_threshold, QCONST16(.2f,15));
1245 if (gain1<pf_threshold)
1246 {
1247 gain1 = 0;
1248 pf_on = 0;
1249 qg = 0;
1250 } else {
1251 /*This block is not gated by a total bits check only because
1252 of the nbAvailableBytes check above.*/
1253 if (ABS16(gain1-st->prefilter_gain)<QCONST16(.1f,15))
1254 gain1=st->prefilter_gain;
1255
1256#ifdef FIXED_POINT
1257 qg = ((gain1+1536)>>10)/3-1;
1258#else
1259 qg = (int)floor(.5f+gain1*32/3)-1;
1260#endif
1261 qg = IMAX(0, IMIN(7, qg));
1262 gain1 = QCONST16(0.09375f,15)*(qg+1);
1263 pf_on = 1;
1264 }
1265 /*printf("%d %f\n", pitch_index, gain1);*/
1266
1267 c=0; do {
1268 int offset = mode->shortMdctSize-overlap;
1269 st->prefilter_period=IMAX(st->prefilter_period, COMBFILTER_MINPERIOD);
1270 OPUS_COPY(in+c*(N+overlap), st->in_mem+c*(overlap), overlap);
1271 if (offset)
1272 comb_filter(in+c*(N+overlap)+overlap, pre[c]+COMBFILTER_MAXPERIOD,
1273 st->prefilter_period, st->prefilter_period, offset, -st->prefilter_gain, -st->prefilter_gain,
1274 st->prefilter_tapset, st->prefilter_tapset, NULL, 0, st->arch);
1275
1276 comb_filter(in+c*(N+overlap)+overlap+offset, pre[c]+COMBFILTER_MAXPERIOD+offset,
1277 st->prefilter_period, pitch_index, N-offset, -st->prefilter_gain, -gain1,
1278 st->prefilter_tapset, prefilter_tapset, mode->window, overlap, st->arch);
1279 OPUS_COPY(st->in_mem+c*(overlap), in+c*(N+overlap)+N, overlap);
1280
1281 if (N>COMBFILTER_MAXPERIOD)
1282 {
1283 OPUS_COPY(prefilter_mem+c*COMBFILTER_MAXPERIOD, pre[c]+N, COMBFILTER_MAXPERIOD);
1284 } else {
1285 OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD, prefilter_mem+c*COMBFILTER_MAXPERIOD+N, COMBFILTER_MAXPERIOD-N);
1286 OPUS_COPY(prefilter_mem+c*COMBFILTER_MAXPERIOD+COMBFILTER_MAXPERIOD-N, pre[c]+COMBFILTER_MAXPERIOD, N);
1287 }
1288 } while (++c<CC);
1289
1290 RESTORE_STACK;
1291 *gain = gain1;
1292 *pitch = pitch_index;
1293 *qgain = qg;
1294 return pf_on;
1295}
1296
1297static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32 base_target,
1298 int LM, opus_int32 bitrate, int lastCodedBands, int C, int intensity,
1299 int constrained_vbr, opus_val16 stereo_saving, int tot_boost,
1300 opus_val16 tf_estimate, int pitch_change, opus_val16 maxDepth,
1301 int lfe, int has_surround_mask, opus_val16 surround_masking,
1302 opus_val16 temporal_vbr)
1303{
1304 /* The target rate in 8th bits per frame */
1305 opus_int32 target;
1306 int coded_bins;
1307 int coded_bands;
1308 opus_val16 tf_calibration;
1309 int nbEBands;
1310 const opus_int16 *eBands;
1311
1312 nbEBands = mode->nbEBands;
1313 eBands = mode->eBands;
1314
1315 coded_bands = lastCodedBands ? lastCodedBands : nbEBands;
1316 coded_bins = eBands[coded_bands]<<LM;
1317 if (C==2)
1318 coded_bins += eBands[IMIN(intensity, coded_bands)]<<LM;
1319
1320 target = base_target;
1321
1322 /*printf("%f %f %f %f %d %d ", st->analysis.activity, st->analysis.tonality, tf_estimate, st->stereo_saving, tot_boost, coded_bands);*/
1323#ifndef DISABLE_FLOAT_API
1324 if (analysis->valid && analysis->activity<.4)
1325 target -= (opus_int32)((coded_bins<<BITRES)*(.4f-analysis->activity));
1326#endif
1327 /* Stereo savings */
1328 if (C==2)
1329 {
1330 int coded_stereo_bands;
1331 int coded_stereo_dof;
1332 opus_val16 max_frac;
1333 coded_stereo_bands = IMIN(intensity, coded_bands);
1334 coded_stereo_dof = (eBands[coded_stereo_bands]<<LM)-coded_stereo_bands;
1335 /* Maximum fraction of the bits we can save if the signal is mono. */
1336 max_frac = DIV32_16(MULT16_16(QCONST16(0.8f, 15), coded_stereo_dof), coded_bins);
1337 stereo_saving = MIN16(stereo_saving, QCONST16(1.f, 8));
1338 /*printf("%d %d %d ", coded_stereo_dof, coded_bins, tot_boost);*/
1339 target -= (opus_int32)MIN32(MULT16_32_Q15(max_frac,target),
1340 SHR32(MULT16_16(stereo_saving-QCONST16(0.1f,8),(coded_stereo_dof<<BITRES)),8));
1341 }
1342 /* Boost the rate according to dynalloc (minus the dynalloc average for calibration). */
1343 target += tot_boost-(19<<LM);
1344 /* Apply transient boost, compensating for average boost. */
1345 tf_calibration = QCONST16(0.044f,14);
1346 target += (opus_int32)SHL32(MULT16_32_Q15(tf_estimate-tf_calibration, target),1);
1347
1348#ifndef DISABLE_FLOAT_API
1349 /* Apply tonality boost */
1350 if (analysis->valid && !lfe)
1351 {
1352 opus_int32 tonal_target;
1353 float tonal;
1354
1355 /* Tonality boost (compensating for the average). */
1356 tonal = MAX16(0.f,analysis->tonality-.15f)-0.12f;
1357 tonal_target = target + (opus_int32)((coded_bins<<BITRES)*1.2f*tonal);
1358 if (pitch_change)
1359 tonal_target += (opus_int32)((coded_bins<<BITRES)*.8f);
1360 /*printf("%f %f ", analysis->tonality, tonal);*/
1361 target = tonal_target;
1362 }
1363#else
1364 (void)analysis;
1365 (void)pitch_change;
1366#endif
1367
1368 if (has_surround_mask&&!lfe)
1369 {
1370 opus_int32 surround_target = target + (opus_int32)SHR32(MULT16_16(surround_masking,coded_bins<<BITRES), DB_SHIFT);
1371 /*printf("%f %d %d %d %d %d %d ", surround_masking, coded_bins, st->end, st->intensity, surround_target, target, st->bitrate);*/
1372 target = IMAX(target/4, surround_target);
1373 }
1374
1375 {
1376 opus_int32 floor_depth;
1377 int bins;
1378 bins = eBands[nbEBands-2]<<LM;
1379 /*floor_depth = SHR32(MULT16_16((C*bins<<BITRES),celt_log2(SHL32(MAX16(1,sample_max),13))), DB_SHIFT);*/
1380 floor_depth = (opus_int32)SHR32(MULT16_16((C*bins<<BITRES),maxDepth), DB_SHIFT);
1381 floor_depth = IMAX(floor_depth, target>>2);
1382 target = IMIN(target, floor_depth);
1383 /*printf("%f %d\n", maxDepth, floor_depth);*/
1384 }
1385
1386 /* Make VBR less aggressive for constrained VBR because we can't keep a higher bitrate
1387 for long. Needs tuning. */
1388 if ((!has_surround_mask||lfe) && constrained_vbr)
1389 {
1390 target = base_target + (opus_int32)MULT16_32_Q15(QCONST16(0.67f, 15), target-base_target);
1391 }
1392
1393 if (!has_surround_mask && tf_estimate < QCONST16(.2f, 14))
1394 {
1395 opus_val16 amount;
1396 opus_val16 tvbr_factor;
1397 amount = MULT16_16_Q15(QCONST16(.0000031f, 30), IMAX(0, IMIN(32000, 96000-bitrate)));
1398 tvbr_factor = SHR32(MULT16_16(temporal_vbr, amount), DB_SHIFT);
1399 target += (opus_int32)MULT16_32_Q15(tvbr_factor, target);
1400 }
1401
1402 /* Don't allow more than doubling the rate */
1403 target = IMIN(2*base_target, target);
1404
1405 return target;
1406}
1407
1408int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes, ec_enc *enc)
1409{
1410 int i, c, N;
1411 opus_int32 bits;
1412 ec_enc _enc;
1413 VARDECL(celt_sig, in);
1414 VARDECL(celt_sig, freq);
1415 VARDECL(celt_norm, X);
1416 VARDECL(celt_ener, bandE);
1417 VARDECL(opus_val16, bandLogE);
1418 VARDECL(opus_val16, bandLogE2);
1419 VARDECL(int, fine_quant);
1420 VARDECL(opus_val16, error);
1421 VARDECL(int, pulses);
1422 VARDECL(int, cap);
1423 VARDECL(int, offsets);
1424 VARDECL(int, importance);
1425 VARDECL(int, spread_weight);
1426 VARDECL(int, fine_priority);
1427 VARDECL(int, tf_res);
1428 VARDECL(unsigned char, collapse_masks);
1429 celt_sig *prefilter_mem;
1430 opus_val16 *oldBandE, *oldLogE, *oldLogE2, *energyError;
1431 int shortBlocks=0;
1432 int isTransient=0;
1433 const int CC = st->channels;
1434 const int C = st->stream_channels;
1435 int LM, M;
1436 int tf_select;
1437 int nbFilledBytes, nbAvailableBytes;
1438 int start;
1439 int end;
1440 int effEnd;
1441 int codedBands;
1442 int alloc_trim;
1443 int pitch_index=COMBFILTER_MINPERIOD;
1444 opus_val16 gain1 = 0;
1445 int dual_stereo=0;
1446 int effectiveBytes;
1447 int dynalloc_logp;
1448 opus_int32 vbr_rate;
1449 opus_int32 total_bits;
1450 opus_int32 total_boost;
1451 opus_int32 balance;
1452 opus_int32 tell;
1453 opus_int32 tell0_frac;
1454 int prefilter_tapset=0;
1455 int pf_on;
1456 int anti_collapse_rsv;
1457 int anti_collapse_on=0;
1458 int silence=0;
1459 int tf_chan = 0;
1460 opus_val16 tf_estimate;
1461 int pitch_change=0;
1462 opus_int32 tot_boost;
1463 opus_val32 sample_max;
1464 opus_val16 maxDepth;
1465 const OpusCustomMode *mode;
1466 int nbEBands;
1467 int overlap;
1468 const opus_int16 *eBands;
1469 int secondMdct;
1470 int signalBandwidth;
1471 int transient_got_disabled=0;
1472 opus_val16 surround_masking=0;
1473 opus_val16 temporal_vbr=0;
1474 opus_val16 surround_trim = 0;
1475 opus_int32 equiv_rate;
1476 int hybrid;
1477 int weak_transient = 0;
1478 int enable_tf_analysis;
1479 VARDECL(opus_val16, surround_dynalloc);
1480 ALLOC_STACK;
1481
1482 mode = st->mode;
1483 nbEBands = mode->nbEBands;
1484 overlap = mode->overlap;
1485 eBands = mode->eBands;
1486 start = st->start;
1487 end = st->end;
1488 hybrid = start != 0;
1489 tf_estimate = 0;
1490 if (nbCompressedBytes<2 || pcm==NULL)
1491 {
1492 RESTORE_STACK;
1493 return OPUS_BAD_ARG;
1494 }
1495
1496 frame_size *= st->upsample;
1497 for (LM=0;LM<=mode->maxLM;LM++)
1498 if (mode->shortMdctSize<<LM==frame_size)
1499 break;
1500 if (LM>mode->maxLM)
1501 {
1502 RESTORE_STACK;
1503 return OPUS_BAD_ARG;
1504 }
1505 M=1<<LM;
1506 N = M*mode->shortMdctSize;
1507
1508 prefilter_mem = st->in_mem+CC*(overlap);
1509 oldBandE = (opus_val16*)(st->in_mem+CC*(overlap+COMBFILTER_MAXPERIOD));
1510 oldLogE = oldBandE + CC*nbEBands;
1511 oldLogE2 = oldLogE + CC*nbEBands;
1512 energyError = oldLogE2 + CC*nbEBands;
1513
1514 if (enc==NULL)
1515 {
1516 tell0_frac=tell=1;
1517 nbFilledBytes=0;
1518 } else {
1519 tell0_frac=ec_tell_frac(enc);
1520 tell=ec_tell(enc);
1521 nbFilledBytes=(tell+4)>>3;
1522 }
1523
1524#ifdef CUSTOM_MODES
1525 if (st->signalling && enc==NULL)
1526 {
1527 int tmp = (mode->effEBands-end)>>1;
1528 end = st->end = IMAX(1, mode->effEBands-tmp);
1529 compressed[0] = tmp<<5;
1530 compressed[0] |= LM<<3;
1531 compressed[0] |= (C==2)<<2;
1532 /* Convert "standard mode" to Opus header */
1533 if (mode->Fs==48000 && mode->shortMdctSize==120)
1534 {
1535 int c0 = toOpus(compressed[0]);
1536 if (c0<0)
1537 {
1538 RESTORE_STACK;
1539 return OPUS_BAD_ARG;
1540 }
1541 compressed[0] = c0;
1542 }
1543 compressed++;
1544 nbCompressedBytes--;
1545 }
1546#else
1547 celt_assert(st->signalling==0);
1548#endif
1549
1550 /* Can't produce more than 1275 output bytes */
1551 nbCompressedBytes = IMIN(nbCompressedBytes,1275);
1552 nbAvailableBytes = nbCompressedBytes - nbFilledBytes;
1553
1554 if (st->vbr && st->bitrate!=OPUS_BITRATE_MAX)
1555 {
1556 opus_int32 den=mode->Fs>>BITRES;
1557 vbr_rate=(st->bitrate*frame_size+(den>>1))/den;
1558#ifdef CUSTOM_MODES
1559 if (st->signalling)
1560 vbr_rate -= 8<<BITRES;
1561#endif
1562 effectiveBytes = vbr_rate>>(3+BITRES);
1563 } else {
1564 opus_int32 tmp;
1565 vbr_rate = 0;
1566 tmp = st->bitrate*frame_size;
1567 if (tell>1)
1568 tmp += tell;
1569 if (st->bitrate!=OPUS_BITRATE_MAX)
1570 nbCompressedBytes = IMAX(2, IMIN(nbCompressedBytes,
1571 (tmp+4*mode->Fs)/(8*mode->Fs)-!!st->signalling));
1572 effectiveBytes = nbCompressedBytes - nbFilledBytes;
1573 }
1574 equiv_rate = ((opus_int32)nbCompressedBytes*8*50 >> (3-LM)) - (40*C+20)*((400>>LM) - 50);
1575 if (st->bitrate != OPUS_BITRATE_MAX)
1576 equiv_rate = IMIN(equiv_rate, st->bitrate - (40*C+20)*((400>>LM) - 50));
1577
1578 if (enc==NULL)
1579 {
1580 ec_enc_init(&_enc, compressed, nbCompressedBytes);
1581 enc = &_enc;
1582 }
1583
1584 if (vbr_rate>0)
1585 {
1586 /* Computes the max bit-rate allowed in VBR mode to avoid violating the
1587 target rate and buffering.
1588 We must do this up front so that bust-prevention logic triggers
1589 correctly if we don't have enough bits. */
1590 if (st->constrained_vbr)
1591 {
1592 opus_int32 vbr_bound;
1593 opus_int32 max_allowed;
1594 /* We could use any multiple of vbr_rate as bound (depending on the
1595 delay).
1596 This is clamped to ensure we use at least two bytes if the encoder
1597 was entirely empty, but to allow 0 in hybrid mode. */
1598 vbr_bound = vbr_rate;
1599 max_allowed = IMIN(IMAX(tell==1?2:0,
1600 (vbr_rate+vbr_bound-st->vbr_reservoir)>>(BITRES+3)),
1601 nbAvailableBytes);
1602 if(max_allowed < nbAvailableBytes)
1603 {
1604 nbCompressedBytes = nbFilledBytes+max_allowed;
1605 nbAvailableBytes = max_allowed;
1606 ec_enc_shrink(enc, nbCompressedBytes);
1607 }
1608 }
1609 }
1610 total_bits = nbCompressedBytes*8;
1611
1612 effEnd = end;
1613 if (effEnd > mode->effEBands)
1614 effEnd = mode->effEBands;
1615
1616 ALLOC(in, CC*(N+overlap), celt_sig);
1617
1618 sample_max=MAX32(st->overlap_max, celt_maxabs16(pcm, C*(N-overlap)/st->upsample));
1619 st->overlap_max=celt_maxabs16(pcm+C*(N-overlap)/st->upsample, C*overlap/st->upsample);
1620 sample_max=MAX32(sample_max, st->overlap_max);
1621#ifdef FIXED_POINT
1622 silence = (sample_max==0);
1623#else
1624 silence = (sample_max <= (opus_val16)1/(1<<st->lsb_depth));
1625#endif
1626#ifdef FUZZING
1627 if ((rand()&0x3F)==0)
1628 silence = 1;
1629#endif
1630 if (tell==1)
1631 ec_enc_bit_logp(enc, silence, 15);
1632 else
1633 silence=0;
1634 if (silence)
1635 {
1636 /*In VBR mode there is no need to send more than the minimum. */
1637 if (vbr_rate>0)
1638 {
1639 effectiveBytes=nbCompressedBytes=IMIN(nbCompressedBytes, nbFilledBytes+2);
1640 total_bits=nbCompressedBytes*8;
1641 nbAvailableBytes=2;
1642 ec_enc_shrink(enc, nbCompressedBytes);
1643 }
1644 /* Pretend we've filled all the remaining bits with zeros
1645 (that's what the initialiser did anyway) */
1646 tell = nbCompressedBytes*8;
1647 enc->nbits_total+=tell-ec_tell(enc);
1648 }
1649 c=0; do {
1650 int need_clip=0;
1651#ifndef FIXED_POINT
1652 need_clip = st->clip && sample_max>65536.f;
1653#endif
1654 celt_preemphasis(pcm+c, in+c*(N+overlap)+overlap, N, CC, st->upsample,
1655 mode->preemph, st->preemph_memE+c, need_clip);
1656 } while (++c<CC);
1657
1658
1659
1660 /* Find pitch period and gain */
1661 {
1662 int enabled;
1663 int qg;
1664 enabled = ((st->lfe&&nbAvailableBytes>3) || nbAvailableBytes>12*C) && !hybrid && !silence && !st->disable_pf
1665 && st->complexity >= 5;
1666
1667 prefilter_tapset = st->tapset_decision;
1668 pf_on = run_prefilter(st, in, prefilter_mem, CC, N, prefilter_tapset, &pitch_index, &gain1, &qg, enabled, nbAvailableBytes, &st->analysis);
1669 if ((gain1 > QCONST16(.4f,15) || st->prefilter_gain > QCONST16(.4f,15)) && (!st->analysis.valid || st->analysis.tonality > .3)
1670 && (pitch_index > 1.26*st->prefilter_period || pitch_index < .79*st->prefilter_period))
1671 pitch_change = 1;
1672 if (pf_on==0)
1673 {
1674 if(!hybrid && tell+16<=total_bits)
1675 ec_enc_bit_logp(enc, 0, 1);
1676 } else {
1677 /*This block is not gated by a total bits check only because
1678 of the nbAvailableBytes check above.*/
1679 int octave;
1680 ec_enc_bit_logp(enc, 1, 1);
1681 pitch_index += 1;
1682 octave = EC_ILOG(pitch_index)-5;
1683 ec_enc_uint(enc, octave, 6);
1684 ec_enc_bits(enc, pitch_index-(16<<octave), 4+octave);
1685 pitch_index -= 1;
1686 ec_enc_bits(enc, qg, 3);
1687 ec_enc_icdf(enc, prefilter_tapset, tapset_icdf, 2);
1688 }
1689 }
1690
1691 isTransient = 0;
1692 shortBlocks = 0;
1693 if (st->complexity >= 1 && !st->lfe)
1694 {
1695 /* Reduces the likelihood of energy instability on fricatives at low bitrate
1696 in hybrid mode. It seems like we still want to have real transients on vowels
1697 though (small SILK quantization offset value). */
1698 int allow_weak_transients = hybrid && effectiveBytes<15 && st->silk_info.signalType != 2;
1699 isTransient = transient_analysis(in, N+overlap, CC,
1700 &tf_estimate, &tf_chan, allow_weak_transients, &weak_transient);
1701 }
1702 if (LM>0 && ec_tell(enc)+3<=total_bits)
1703 {
1704 if (isTransient)
1705 shortBlocks = M;
1706 } else {
1707 isTransient = 0;
1708 transient_got_disabled=1;
1709 }
1710
1711 ALLOC(freq, CC*N, celt_sig); /**< Interleaved signal MDCTs */
1712 ALLOC(bandE,nbEBands*CC, celt_ener);
1713 ALLOC(bandLogE,nbEBands*CC, opus_val16);
1714
1715 secondMdct = shortBlocks && st->complexity>=8;
1716 ALLOC(bandLogE2, C*nbEBands, opus_val16);
1717 if (secondMdct)
1718 {
1719 compute_mdcts(mode, 0, in, freq, C, CC, LM, st->upsample, st->arch);
1720 compute_band_energies(mode, freq, bandE, effEnd, C, LM, st->arch);
1721 amp2Log2(mode, effEnd, end, bandE, bandLogE2, C);
1722 for (i=0;i<C*nbEBands;i++)
1723 bandLogE2[i] += HALF16(SHL16(LM, DB_SHIFT));
1724 }
1725
1726 compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample, st->arch);
1727 /* This should catch any NaN in the CELT input. Since we're not supposed to see any (they're filtered
1728 at the Opus layer), just abort. */
1729 celt_assert(!celt_isnan(freq[0]) && (C==1 || !celt_isnan(freq[N])));
1730 if (CC==2&&C==1)
1731 tf_chan = 0;
1732 compute_band_energies(mode, freq, bandE, effEnd, C, LM, st->arch);
1733
1734 if (st->lfe)
1735 {
1736 for (i=2;i<end;i++)
1737 {
1738 bandE[i] = IMIN(bandE[i], MULT16_32_Q15(QCONST16(1e-4f,15),bandE[0]));
1739 bandE[i] = MAX32(bandE[i], EPSILON);
1740 }
1741 }
1742 amp2Log2(mode, effEnd, end, bandE, bandLogE, C);
1743
1744 ALLOC(surround_dynalloc, C*nbEBands, opus_val16);
1745 OPUS_CLEAR(surround_dynalloc, end);
1746 /* This computes how much masking takes place between surround channels */
1747 if (!hybrid&&st->energy_mask&&!st->lfe)
1748 {
1749 int mask_end;
1750 int midband;
1751 int count_dynalloc;
1752 opus_val32 mask_avg=0;
1753 opus_val32 diff=0;
1754 int count=0;
1755 mask_end = IMAX(2,st->lastCodedBands);
1756 for (c=0;c<C;c++)
1757 {
1758 for(i=0;i<mask_end;i++)
1759 {
1760 opus_val16 mask;
1761 mask = MAX16(MIN16(st->energy_mask[nbEBands*c+i],
1762 QCONST16(.25f, DB_SHIFT)), -QCONST16(2.0f, DB_SHIFT));
1763 if (mask > 0)
1764 mask = HALF16(mask);
1765 mask_avg += MULT16_16(mask, eBands[i+1]-eBands[i]);
1766 count += eBands[i+1]-eBands[i];
1767 diff += MULT16_16(mask, 1+2*i-mask_end);
1768 }
1769 }
1770 celt_assert(count>0);
1771 mask_avg = DIV32_16(mask_avg,count);
1772 mask_avg += QCONST16(.2f, DB_SHIFT);
1773 diff = diff*6/(C*(mask_end-1)*(mask_end+1)*mask_end);
1774 /* Again, being conservative */
1775 diff = HALF32(diff);
1776 diff = MAX32(MIN32(diff, QCONST32(.031f, DB_SHIFT)), -QCONST32(.031f, DB_SHIFT));
1777 /* Find the band that's in the middle of the coded spectrum */
1778 for (midband=0;eBands[midband+1] < eBands[mask_end]/2;midband++);
1779 count_dynalloc=0;
1780 for(i=0;i<mask_end;i++)
1781 {
1782 opus_val32 lin;
1783 opus_val16 unmask;
1784 lin = mask_avg + diff*(i-midband);
1785 if (C==2)
1786 unmask = MAX16(st->energy_mask[i], st->energy_mask[nbEBands+i]);
1787 else
1788 unmask = st->energy_mask[i];
1789 unmask = MIN16(unmask, QCONST16(.0f, DB_SHIFT));
1790 unmask -= lin;
1791 if (unmask > QCONST16(.25f, DB_SHIFT))
1792 {
1793 surround_dynalloc[i] = unmask - QCONST16(.25f, DB_SHIFT);
1794 count_dynalloc++;
1795 }
1796 }
1797 if (count_dynalloc>=3)
1798 {
1799 /* If we need dynalloc in many bands, it's probably because our
1800 initial masking rate was too low. */
1801 mask_avg += QCONST16(.25f, DB_SHIFT);
1802 if (mask_avg>0)
1803 {
1804 /* Something went really wrong in the original calculations,
1805 disabling masking. */
1806 mask_avg = 0;
1807 diff = 0;
1808 OPUS_CLEAR(surround_dynalloc, mask_end);
1809 } else {
1810 for(i=0;i<mask_end;i++)
1811 surround_dynalloc[i] = MAX16(0, surround_dynalloc[i]-QCONST16(.25f, DB_SHIFT));
1812 }
1813 }
1814 mask_avg += QCONST16(.2f, DB_SHIFT);
1815 /* Convert to 1/64th units used for the trim */
1816 surround_trim = 64*diff;
1817 /*printf("%d %d ", mask_avg, surround_trim);*/
1818 surround_masking = mask_avg;
1819 }
1820 /* Temporal VBR (but not for LFE) */
1821 if (!st->lfe)
1822 {
1823 opus_val16 follow=-QCONST16(10.0f,DB_SHIFT);
1824 opus_val32 frame_avg=0;
1825 opus_val16 offset = shortBlocks?HALF16(SHL16(LM, DB_SHIFT)):0;
1826 for(i=start;i<end;i++)
1827 {
1828 follow = MAX16(follow-QCONST16(1.f, DB_SHIFT), bandLogE[i]-offset);
1829 if (C==2)
1830 follow = MAX16(follow, bandLogE[i+nbEBands]-offset);
1831 frame_avg += follow;
1832 }
1833 frame_avg /= (end-start);
1834 temporal_vbr = SUB16(frame_avg,st->spec_avg);
1835 temporal_vbr = MIN16(QCONST16(3.f, DB_SHIFT), MAX16(-QCONST16(1.5f, DB_SHIFT), temporal_vbr));
1836 st->spec_avg += MULT16_16_Q15(QCONST16(.02f, 15), temporal_vbr);
1837 }
1838 /*for (i=0;i<21;i++)
1839 printf("%f ", bandLogE[i]);
1840 printf("\n");*/
1841
1842 if (!secondMdct)
1843 {
1844 OPUS_COPY(bandLogE2, bandLogE, C*nbEBands);
1845 }
1846
1847 /* Last chance to catch any transient we might have missed in the
1848 time-domain analysis */
1849 if (LM>0 && ec_tell(enc)+3<=total_bits && !isTransient && st->complexity>=5 && !st->lfe && !hybrid)
1850 {
1851 if (patch_transient_decision(bandLogE, oldBandE, nbEBands, start, end, C))
1852 {
1853 isTransient = 1;
1854 shortBlocks = M;
1855 compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample, st->arch);
1856 compute_band_energies(mode, freq, bandE, effEnd, C, LM, st->arch);
1857 amp2Log2(mode, effEnd, end, bandE, bandLogE, C);
1858 /* Compensate for the scaling of short vs long mdcts */
1859 for (i=0;i<C*nbEBands;i++)
1860 bandLogE2[i] += HALF16(SHL16(LM, DB_SHIFT));
1861 tf_estimate = QCONST16(.2f,14);
1862 }
1863 }
1864
1865 if (LM>0 && ec_tell(enc)+3<=total_bits)
1866 ec_enc_bit_logp(enc, isTransient, 3);
1867
1868 ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */
1869
1870 /* Band normalisation */
1871 normalise_bands(mode, freq, X, bandE, effEnd, C, M);
1872
1873 enable_tf_analysis = effectiveBytes>=15*C && !hybrid && st->complexity>=2 && !st->lfe;
1874
1875 ALLOC(offsets, nbEBands, int);
1876 ALLOC(importance, nbEBands, int);
1877 ALLOC(spread_weight, nbEBands, int);
1878
1879 maxDepth = dynalloc_analysis(bandLogE, bandLogE2, nbEBands, start, end, C, offsets,
1880 st->lsb_depth, mode->logN, isTransient, st->vbr, st->constrained_vbr,
1881 eBands, LM, effectiveBytes, &tot_boost, st->lfe, surround_dynalloc, &st->analysis, importance, spread_weight);
1882
1883 ALLOC(tf_res, nbEBands, int);
1884 /* Disable variable tf resolution for hybrid and at very low bitrate */
1885 if (enable_tf_analysis)
1886 {
1887 int lambda;
1888 lambda = IMAX(80, 20480/effectiveBytes + 2);
1889 tf_select = tf_analysis(mode, effEnd, isTransient, tf_res, lambda, X, N, LM, tf_estimate, tf_chan, importance);
1890 for (i=effEnd;i<end;i++)
1891 tf_res[i] = tf_res[effEnd-1];
1892 } else if (hybrid && weak_transient)
1893 {
1894 /* For weak transients, we rely on the fact that improving time resolution using
1895 TF on a long window is imperfect and will not result in an energy collapse at
1896 low bitrate. */
1897 for (i=0;i<end;i++)
1898 tf_res[i] = 1;
1899 tf_select=0;
1900 } else if (hybrid && effectiveBytes<15 && st->silk_info.signalType != 2)
1901 {
1902 /* For low bitrate hybrid, we force temporal resolution to 5 ms rather than 2.5 ms. */
1903 for (i=0;i<end;i++)
1904 tf_res[i] = 0;
1905 tf_select=isTransient;
1906 } else {
1907 for (i=0;i<end;i++)
1908 tf_res[i] = isTransient;
1909 tf_select=0;
1910 }
1911
1912 ALLOC(error, C*nbEBands, opus_val16);
1913 c=0;
1914 do {
1915 for (i=start;i<end;i++)
1916 {
1917 /* When the energy is stable, slightly bias energy quantization towards
1918 the previous error to make the gain more stable (a constant offset is
1919 better than fluctuations). */
1920 if (ABS32(SUB32(bandLogE[i+c*nbEBands], oldBandE[i+c*nbEBands])) < QCONST16(2.f, DB_SHIFT))
1921 {
1922 bandLogE[i+c*nbEBands] -= MULT16_16_Q15(energyError[i+c*nbEBands], QCONST16(0.25f, 15));
1923 }
1924 }
1925 } while (++c < C);
1926 quant_coarse_energy(mode, start, end, effEnd, bandLogE,
1927 oldBandE, total_bits, error, enc,
1928 C, LM, nbAvailableBytes, st->force_intra,
1929 &st->delayedIntra, st->complexity >= 4, st->loss_rate, st->lfe);
1930
1931 tf_encode(start, end, isTransient, tf_res, LM, tf_select, enc);
1932
1933 if (ec_tell(enc)+4<=total_bits)
1934 {
1935 if (st->lfe)
1936 {
1937 st->tapset_decision = 0;
1938 st->spread_decision = SPREAD_NORMAL;
1939 } else if (hybrid)
1940 {
1941 if (st->complexity == 0)
1942 st->spread_decision = SPREAD_NONE;
1943 else if (isTransient)
1944 st->spread_decision = SPREAD_NORMAL;
1945 else
1946 st->spread_decision = SPREAD_AGGRESSIVE;
1947 } else if (shortBlocks || st->complexity < 3 || nbAvailableBytes < 10*C)
1948 {
1949 if (st->complexity == 0)
1950 st->spread_decision = SPREAD_NONE;
1951 else
1952 st->spread_decision = SPREAD_NORMAL;
1953 } else {
1954 /* Disable new spreading+tapset estimator until we can show it works
1955 better than the old one. So far it seems like spreading_decision()
1956 works best. */
1957#if 0
1958 if (st->analysis.valid)
1959 {
1960 static const opus_val16 spread_thresholds[3] = {-QCONST16(.6f, 15), -QCONST16(.2f, 15), -QCONST16(.07f, 15)};
1961 static const opus_val16 spread_histeresis[3] = {QCONST16(.15f, 15), QCONST16(.07f, 15), QCONST16(.02f, 15)};
1962 static const opus_val16 tapset_thresholds[2] = {QCONST16(.0f, 15), QCONST16(.15f, 15)};
1963 static const opus_val16 tapset_histeresis[2] = {QCONST16(.1f, 15), QCONST16(.05f, 15)};
1964 st->spread_decision = hysteresis_decision(-st->analysis.tonality, spread_thresholds, spread_histeresis, 3, st->spread_decision);
1965 st->tapset_decision = hysteresis_decision(st->analysis.tonality_slope, tapset_thresholds, tapset_histeresis, 2, st->tapset_decision);
1966 } else
1967#endif
1968 {
1969 st->spread_decision = spreading_decision(mode, X,
1970 &st->tonal_average, st->spread_decision, &st->hf_average,
1971 &st->tapset_decision, pf_on&&!shortBlocks, effEnd, C, M, spread_weight);
1972 }
1973 /*printf("%d %d\n", st->tapset_decision, st->spread_decision);*/
1974 /*printf("%f %d %f %d\n\n", st->analysis.tonality, st->spread_decision, st->analysis.tonality_slope, st->tapset_decision);*/
1975 }
1976 ec_enc_icdf(enc, st->spread_decision, spread_icdf, 5);
1977 }
1978
1979 /* For LFE, everything interesting is in the first band */
1980 if (st->lfe)
1981 offsets[0] = IMIN(8, effectiveBytes/3);
1982 ALLOC(cap, nbEBands, int);
1983 init_caps(mode,cap,LM,C);
1984
1985 dynalloc_logp = 6;
1986 total_bits<<=BITRES;
1987 total_boost = 0;
1988 tell = ec_tell_frac(enc);
1989 for (i=start;i<end;i++)
1990 {
1991 int width, quanta;
1992 int dynalloc_loop_logp;
1993 int boost;
1994 int j;
1995 width = C*(eBands[i+1]-eBands[i])<<LM;
1996 /* quanta is 6 bits, but no more than 1 bit/sample
1997 and no less than 1/8 bit/sample */
1998 quanta = IMIN(width<<BITRES, IMAX(6<<BITRES, width));
1999 dynalloc_loop_logp = dynalloc_logp;
2000 boost = 0;
2001 for (j = 0; tell+(dynalloc_loop_logp<<BITRES) < total_bits-total_boost
2002 && boost < cap[i]; j++)
2003 {
2004 int flag;
2005 flag = j<offsets[i];
2006 ec_enc_bit_logp(enc, flag, dynalloc_loop_logp);
2007 tell = ec_tell_frac(enc);
2008 if (!flag)
2009 break;
2010 boost += quanta;
2011 total_boost += quanta;
2012 dynalloc_loop_logp = 1;
2013 }
2014 /* Making dynalloc more likely */
2015 if (j)
2016 dynalloc_logp = IMAX(2, dynalloc_logp-1);
2017 offsets[i] = boost;
2018 }
2019
2020 if (C==2)
2021 {
2022 static const opus_val16 intensity_thresholds[21]=
2023 /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 off*/
2024 { 1, 2, 3, 4, 5, 6, 7, 8,16,24,36,44,50,56,62,67,72,79,88,106,134};
2025 static const opus_val16 intensity_histeresis[21]=
2026 { 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 4, 5, 6, 8, 8};
2027
2028 /* Always use MS for 2.5 ms frames until we can do a better analysis */
2029 if (LM!=0)
2030 dual_stereo = stereo_analysis(mode, X, LM, N);
2031
2032 st->intensity = hysteresis_decision((opus_val16)(equiv_rate/1000),
2033 intensity_thresholds, intensity_histeresis, 21, st->intensity);
2034 st->intensity = IMIN(end,IMAX(start, st->intensity));
2035 }
2036
2037 alloc_trim = 5;
2038 if (tell+(6<<BITRES) <= total_bits - total_boost)
2039 {
2040 if (start > 0 || st->lfe)
2041 {
2042 st->stereo_saving = 0;
2043 alloc_trim = 5;
2044 } else {
2045 alloc_trim = alloc_trim_analysis(mode, X, bandLogE,
2046 end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate,
2047 st->intensity, surround_trim, equiv_rate, st->arch);
2048 }
2049 ec_enc_icdf(enc, alloc_trim, trim_icdf, 7);
2050 tell = ec_tell_frac(enc);
2051 }
2052
2053 /* Variable bitrate */
2054 if (vbr_rate>0)
2055 {
2056 opus_val16 alpha;
2057 opus_int32 delta;
2058 /* The target rate in 8th bits per frame */
2059 opus_int32 target, base_target;
2060 opus_int32 min_allowed;
2061 int lm_diff = mode->maxLM - LM;
2062
2063 /* Don't attempt to use more than 510 kb/s, even for frames smaller than 20 ms.
2064 The CELT allocator will just not be able to use more than that anyway. */
2065 nbCompressedBytes = IMIN(nbCompressedBytes,1275>>(3-LM));
2066 if (!hybrid)
2067 {
2068 base_target = vbr_rate - ((40*C+20)<<BITRES);
2069 } else {
2070 base_target = IMAX(0, vbr_rate - ((9*C+4)<<BITRES));
2071 }
2072
2073 if (st->constrained_vbr)
2074 base_target += (st->vbr_offset>>lm_diff);
2075
2076 if (!hybrid)
2077 {
2078 target = compute_vbr(mode, &st->analysis, base_target, LM, equiv_rate,
2079 st->lastCodedBands, C, st->intensity, st->constrained_vbr,
2080 st->stereo_saving, tot_boost, tf_estimate, pitch_change, maxDepth,
2081 st->lfe, st->energy_mask!=NULL, surround_masking,
2082 temporal_vbr);
2083 } else {
2084 target = base_target;
2085 /* Tonal frames (offset<100) need more bits than noisy (offset>100) ones. */
2086 if (st->silk_info.offset < 100) target += 12 << BITRES >> (3-LM);
2087 if (st->silk_info.offset > 100) target -= 18 << BITRES >> (3-LM);
2088 /* Boosting bitrate on transients and vowels with significant temporal
2089 spikes. */
2090 target += (opus_int32)MULT16_16_Q14(tf_estimate-QCONST16(.25f,14), (50<<BITRES));
2091 /* If we have a strong transient, let's make sure it has enough bits to code
2092 the first two bands, so that it can use folding rather than noise. */
2093 if (tf_estimate > QCONST16(.7f,14))
2094 target = IMAX(target, 50<<BITRES);
2095 }
2096 /* The current offset is removed from the target and the space used
2097 so far is added*/
2098 target=target+tell;
2099 /* In VBR mode the frame size must not be reduced so much that it would
2100 result in the encoder running out of bits.
2101 The margin of 2 bytes ensures that none of the bust-prevention logic
2102 in the decoder will have triggered so far. */
2103 min_allowed = ((tell+total_boost+(1<<(BITRES+3))-1)>>(BITRES+3)) + 2;
2104 /* Take into account the 37 bits we need to have left in the packet to
2105 signal a redundant frame in hybrid mode. Creating a shorter packet would
2106 create an entropy coder desync. */
2107 if (hybrid)
2108 min_allowed = IMAX(min_allowed, (tell0_frac+(37<<BITRES)+total_boost+(1<<(BITRES+3))-1)>>(BITRES+3));
2109
2110 nbAvailableBytes = (target+(1<<(BITRES+2)))>>(BITRES+3);
2111 nbAvailableBytes = IMAX(min_allowed,nbAvailableBytes);
2112 nbAvailableBytes = IMIN(nbCompressedBytes,nbAvailableBytes);
2113
2114 /* By how much did we "miss" the target on that frame */
2115 delta = target - vbr_rate;
2116
2117 target=nbAvailableBytes<<(BITRES+3);
2118
2119 /*If the frame is silent we don't adjust our drift, otherwise
2120 the encoder will shoot to very high rates after hitting a
2121 span of silence, but we do allow the bitres to refill.
2122 This means that we'll undershoot our target in CVBR/VBR modes
2123 on files with lots of silence. */
2124 if(silence)
2125 {
2126 nbAvailableBytes = 2;
2127 target = 2*8<<BITRES;
2128 delta = 0;
2129 }
2130
2131 if (st->vbr_count < 970)
2132 {
2133 st->vbr_count++;
2134 alpha = celt_rcp(SHL32(EXTEND32(st->vbr_count+20),16));
2135 } else
2136 alpha = QCONST16(.001f,15);
2137 /* How many bits have we used in excess of what we're allowed */
2138 if (st->constrained_vbr)
2139 st->vbr_reservoir += target - vbr_rate;
2140 /*printf ("%d\n", st->vbr_reservoir);*/
2141
2142 /* Compute the offset we need to apply in order to reach the target */
2143 if (st->constrained_vbr)
2144 {
2145 st->vbr_drift += (opus_int32)MULT16_32_Q15(alpha,(delta*(1<<lm_diff))-st->vbr_offset-st->vbr_drift);
2146 st->vbr_offset = -st->vbr_drift;
2147 }
2148 /*printf ("%d\n", st->vbr_drift);*/
2149
2150 if (st->constrained_vbr && st->vbr_reservoir < 0)
2151 {
2152 /* We're under the min value -- increase rate */
2153 int adjust = (-st->vbr_reservoir)/(8<<BITRES);
2154 /* Unless we're just coding silence */
2155 nbAvailableBytes += silence?0:adjust;
2156 st->vbr_reservoir = 0;
2157 /*printf ("+%d\n", adjust);*/
2158 }
2159 nbCompressedBytes = IMIN(nbCompressedBytes,nbAvailableBytes);
2160 /*printf("%d\n", nbCompressedBytes*50*8);*/
2161 /* This moves the raw bits to take into account the new compressed size */
2162 ec_enc_shrink(enc, nbCompressedBytes);
2163 }
2164
2165 /* Bit allocation */
2166 ALLOC(fine_quant, nbEBands, int);
2167 ALLOC(pulses, nbEBands, int);
2168 ALLOC(fine_priority, nbEBands, int);
2169
2170 /* bits = packet size - where we are - safety*/
2171 bits = (((opus_int32)nbCompressedBytes*8)<<BITRES) - ec_tell_frac(enc) - 1;
2172 anti_collapse_rsv = isTransient&&LM>=2&&bits>=((LM+2)<<BITRES) ? (1<<BITRES) : 0;
2173 bits -= anti_collapse_rsv;
2174 signalBandwidth = end-1;
2175#ifndef DISABLE_FLOAT_API
2176 if (st->analysis.valid)
2177 {
2178 int min_bandwidth;
2179 if (equiv_rate < (opus_int32)32000*C)
2180 min_bandwidth = 13;
2181 else if (equiv_rate < (opus_int32)48000*C)
2182 min_bandwidth = 16;
2183 else if (equiv_rate < (opus_int32)60000*C)
2184 min_bandwidth = 18;
2185 else if (equiv_rate < (opus_int32)80000*C)
2186 min_bandwidth = 19;
2187 else
2188 min_bandwidth = 20;
2189 signalBandwidth = IMAX(st->analysis.bandwidth, min_bandwidth);
2190 }
2191#endif
2192 if (st->lfe)
2193 signalBandwidth = 1;
2194 codedBands = clt_compute_allocation(mode, start, end, offsets, cap,
2195 alloc_trim, &st->intensity, &dual_stereo, bits, &balance, pulses,
2196 fine_quant, fine_priority, C, LM, enc, 1, st->lastCodedBands, signalBandwidth);
2197 if (st->lastCodedBands)
2198 st->lastCodedBands = IMIN(st->lastCodedBands+1,IMAX(st->lastCodedBands-1,codedBands));
2199 else
2200 st->lastCodedBands = codedBands;
2201
2202 quant_fine_energy(mode, start, end, oldBandE, error, fine_quant, enc, C);
2203
2204 /* Residual quantisation */
2205 ALLOC(collapse_masks, C*nbEBands, unsigned char);
2206 quant_all_bands(1, mode, start, end, X, C==2 ? X+N : NULL, collapse_masks,
2207 bandE, pulses, shortBlocks, st->spread_decision,
2208 dual_stereo, st->intensity, tf_res, nbCompressedBytes*(8<<BITRES)-anti_collapse_rsv,
2209 balance, enc, LM, codedBands, &st->rng, st->complexity, st->arch, st->disable_inv);
2210
2211 if (anti_collapse_rsv > 0)
2212 {
2213 anti_collapse_on = st->consec_transient<2;
2214#ifdef FUZZING
2215 anti_collapse_on = rand()&0x1;
2216#endif
2217 ec_enc_bits(enc, anti_collapse_on, 1);
2218 }
2219 quant_energy_finalise(mode, start, end, oldBandE, error, fine_quant, fine_priority, nbCompressedBytes*8-ec_tell(enc), enc, C);
2220 OPUS_CLEAR(energyError, nbEBands*CC);
2221 c=0;
2222 do {
2223 for (i=start;i<end;i++)
2224 {
2225 energyError[i+c*nbEBands] = MAX16(-QCONST16(0.5f, 15), MIN16(QCONST16(0.5f, 15), error[i+c*nbEBands]));
2226 }
2227 } while (++c < C);
2228
2229 if (silence)
2230 {
2231 for (i=0;i<C*nbEBands;i++)
2232 oldBandE[i] = -QCONST16(28.f,DB_SHIFT);
2233 }
2234
2235#ifdef RESYNTH
2236 /* Re-synthesis of the coded audio if required */
2237 {
2238 celt_sig *out_mem[2];
2239
2240 if (anti_collapse_on)
2241 {
2242 anti_collapse(mode, X, collapse_masks, LM, C, N,
2243 start, end, oldBandE, oldLogE, oldLogE2, pulses, st->rng);
2244 }
2245
2246 c=0; do {
2247 OPUS_MOVE(st->syn_mem[c], st->syn_mem[c]+N, 2*MAX_PERIOD-N+overlap/2);
2248 } while (++c<CC);
2249
2250 c=0; do {
2251 out_mem[c] = st->syn_mem[c]+2*MAX_PERIOD-N;
2252 } while (++c<CC);
2253
2254 celt_synthesis(mode, X, out_mem, oldBandE, start, effEnd,
2255 C, CC, isTransient, LM, st->upsample, silence, st->arch);
2256
2257 c=0; do {
2258 st->prefilter_period=IMAX(st->prefilter_period, COMBFILTER_MINPERIOD);
2259 st->prefilter_period_old=IMAX(st->prefilter_period_old, COMBFILTER_MINPERIOD);
2260 comb_filter(out_mem[c], out_mem[c], st->prefilter_period_old, st->prefilter_period, mode->shortMdctSize,
2261 st->prefilter_gain_old, st->prefilter_gain, st->prefilter_tapset_old, st->prefilter_tapset,
2262 mode->window, overlap);
2263 if (LM!=0)
2264 comb_filter(out_mem[c]+mode->shortMdctSize, out_mem[c]+mode->shortMdctSize, st->prefilter_period, pitch_index, N-mode->shortMdctSize,
2265 st->prefilter_gain, gain1, st->prefilter_tapset, prefilter_tapset,
2266 mode->window, overlap);
2267 } while (++c<CC);
2268
2269 /* We reuse freq[] as scratch space for the de-emphasis */
2270 deemphasis(out_mem, (opus_val16*)pcm, N, CC, st->upsample, mode->preemph, st->preemph_memD);
2271 st->prefilter_period_old = st->prefilter_period;
2272 st->prefilter_gain_old = st->prefilter_gain;
2273 st->prefilter_tapset_old = st->prefilter_tapset;
2274 }
2275#endif
2276
2277 st->prefilter_period = pitch_index;
2278 st->prefilter_gain = gain1;
2279 st->prefilter_tapset = prefilter_tapset;
2280#ifdef RESYNTH
2281 if (LM!=0)
2282 {
2283 st->prefilter_period_old = st->prefilter_period;
2284 st->prefilter_gain_old = st->prefilter_gain;
2285 st->prefilter_tapset_old = st->prefilter_tapset;
2286 }
2287#endif
2288
2289 if (CC==2&&C==1) {
2290 OPUS_COPY(&oldBandE[nbEBands], oldBandE, nbEBands);
2291 }
2292
2293 if (!isTransient)
2294 {
2295 OPUS_COPY(oldLogE2, oldLogE, CC*nbEBands);
2296 OPUS_COPY(oldLogE, oldBandE, CC*nbEBands);
2297 } else {
2298 for (i=0;i<CC*nbEBands;i++)
2299 oldLogE[i] = MIN16(oldLogE[i], oldBandE[i]);
2300 }
2301 /* In case start or end were to change */
2302 c=0; do
2303 {
2304 for (i=0;i<start;i++)
2305 {
2306 oldBandE[c*nbEBands+i]=0;
2307 oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
2308 }
2309 for (i=end;i<nbEBands;i++)
2310 {
2311 oldBandE[c*nbEBands+i]=0;
2312 oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
2313 }
2314 } while (++c<CC);
2315
2316 if (isTransient || transient_got_disabled)
2317 st->consec_transient++;
2318 else
2319 st->consec_transient=0;
2320 st->rng = enc->rng;
2321
2322 /* If there's any room left (can only happen for very high rates),
2323 it's already filled with zeros */
2324 ec_enc_done(enc);
2325
2326#ifdef CUSTOM_MODES
2327 if (st->signalling)
2328 nbCompressedBytes++;
2329#endif
2330
2331 RESTORE_STACK;
2332 if (ec_get_error(enc))
2333 return OPUS_INTERNAL_ERROR;
2334 else
2335 return nbCompressedBytes;
2336}
2337
2338
2339#ifdef CUSTOM_MODES
2340
2341#ifdef FIXED_POINT
2342int opus_custom_encode(CELTEncoder * OPUS_RESTRICT st, const opus_int16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes)
2343{
2344 return celt_encode_with_ec(st, pcm, frame_size, compressed, nbCompressedBytes, NULL);
2345}
2346
2347#ifndef DISABLE_FLOAT_API
2348int opus_custom_encode_float(CELTEncoder * OPUS_RESTRICT st, const float * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes)
2349{
2350 int j, ret, C, N;
2351 VARDECL(opus_int16, in);
2352 ALLOC_STACK;
2353
2354 if (pcm==NULL)
2355 return OPUS_BAD_ARG;
2356
2357 C = st->channels;
2358 N = frame_size;
2359 ALLOC(in, C*N, opus_int16);
2360
2361 for (j=0;j<C*N;j++)
2362 in[j] = FLOAT2INT16(pcm[j]);
2363
2364 ret=celt_encode_with_ec(st,in,frame_size,compressed,nbCompressedBytes, NULL);
2365#ifdef RESYNTH
2366 for (j=0;j<C*N;j++)
2367 ((float*)pcm)[j]=in[j]*(1.f/32768.f);
2368#endif
2369 RESTORE_STACK;
2370 return ret;
2371}
2372#endif /* DISABLE_FLOAT_API */
2373#else
2374
2375int opus_custom_encode(CELTEncoder * OPUS_RESTRICT st, const opus_int16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes)
2376{
2377 int j, ret, C, N;
2378 VARDECL(celt_sig, in);
2379 ALLOC_STACK;
2380
2381 if (pcm==NULL)
2382 return OPUS_BAD_ARG;
2383
2384 C=st->channels;
2385 N=frame_size;
2386 ALLOC(in, C*N, celt_sig);
2387 for (j=0;j<C*N;j++) {
2388 in[j] = SCALEOUT(pcm[j]);
2389 }
2390
2391 ret = celt_encode_with_ec(st,in,frame_size,compressed,nbCompressedBytes, NULL);
2392#ifdef RESYNTH
2393 for (j=0;j<C*N;j++)
2394 ((opus_int16*)pcm)[j] = FLOAT2INT16(in[j]);
2395#endif
2396 RESTORE_STACK;
2397 return ret;
2398}
2399
2400int opus_custom_encode_float(CELTEncoder * OPUS_RESTRICT st, const float * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes)
2401{
2402 return celt_encode_with_ec(st, pcm, frame_size, compressed, nbCompressedBytes, NULL);
2403}
2404
2405#endif
2406
2407#endif /* CUSTOM_MODES */
2408
2409int opus_custom_encoder_ctl(CELTEncoder * OPUS_RESTRICT st, int request, ...)
2410{
2411 va_list ap;
2412
2413 va_start(ap, request);
2414 switch (request)
2415 {
2416 case OPUS_SET_COMPLEXITY_REQUEST:
2417 {
2418 int value = va_arg(ap, opus_int32);
2419 if (value<0 || value>10)
2420 goto bad_arg;
2421 st->complexity = value;
2422 }
2423 break;
2424 case CELT_SET_START_BAND_REQUEST:
2425 {
2426 opus_int32 value = va_arg(ap, opus_int32);
2427 if (value<0 || value>=st->mode->nbEBands)
2428 goto bad_arg;
2429 st->start = value;
2430 }
2431 break;
2432 case CELT_SET_END_BAND_REQUEST:
2433 {
2434 opus_int32 value = va_arg(ap, opus_int32);
2435 if (value<1 || value>st->mode->nbEBands)
2436 goto bad_arg;
2437 st->end = value;
2438 }
2439 break;
2440 case CELT_SET_PREDICTION_REQUEST:
2441 {
2442 int value = va_arg(ap, opus_int32);
2443 if (value<0 || value>2)
2444 goto bad_arg;
2445 st->disable_pf = value<=1;
2446 st->force_intra = value==0;
2447 }
2448 break;
2449 case OPUS_SET_PACKET_LOSS_PERC_REQUEST:
2450 {
2451 int value = va_arg(ap, opus_int32);
2452 if (value<0 || value>100)
2453 goto bad_arg;
2454 st->loss_rate = value;
2455 }
2456 break;
2457 case OPUS_SET_VBR_CONSTRAINT_REQUEST:
2458 {
2459 opus_int32 value = va_arg(ap, opus_int32);
2460 st->constrained_vbr = value;
2461 }
2462 break;
2463 case OPUS_SET_VBR_REQUEST:
2464 {
2465 opus_int32 value = va_arg(ap, opus_int32);
2466 st->vbr = value;
2467 }
2468 break;
2469 case OPUS_SET_BITRATE_REQUEST:
2470 {
2471 opus_int32 value = va_arg(ap, opus_int32);
2472 if (value<=500 && value!=OPUS_BITRATE_MAX)
2473 goto bad_arg;
2474 value = IMIN(value, 260000*st->channels);
2475 st->bitrate = value;
2476 }
2477 break;
2478 case CELT_SET_CHANNELS_REQUEST:
2479 {
2480 opus_int32 value = va_arg(ap, opus_int32);
2481 if (value<1 || value>2)
2482 goto bad_arg;
2483 st->stream_channels = value;
2484 }
2485 break;
2486 case OPUS_SET_LSB_DEPTH_REQUEST:
2487 {
2488 opus_int32 value = va_arg(ap, opus_int32);
2489 if (value<8 || value>24)
2490 goto bad_arg;
2491 st->lsb_depth=value;
2492 }
2493 break;
2494 case OPUS_GET_LSB_DEPTH_REQUEST:
2495 {
2496 opus_int32 *value = va_arg(ap, opus_int32*);
2497 *value=st->lsb_depth;
2498 }
2499 break;
2500 case OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST:
2501 {
2502 opus_int32 value = va_arg(ap, opus_int32);
2503 if(value<0 || value>1)
2504 {
2505 goto bad_arg;
2506 }
2507 st->disable_inv = value;
2508 }
2509 break;
2510 case OPUS_GET_PHASE_INVERSION_DISABLED_REQUEST:
2511 {
2512 opus_int32 *value = va_arg(ap, opus_int32*);
2513 if (!value)
2514 {
2515 goto bad_arg;
2516 }
2517 *value = st->disable_inv;
2518 }
2519 break;
2520 case OPUS_RESET_STATE:
2521 {
2522 int i;
2523 opus_val16 *oldBandE, *oldLogE, *oldLogE2;
2524 oldBandE = (opus_val16*)(st->in_mem+st->channels*(st->mode->overlap+COMBFILTER_MAXPERIOD));
2525 oldLogE = oldBandE + st->channels*st->mode->nbEBands;
2526 oldLogE2 = oldLogE + st->channels*st->mode->nbEBands;
2527 OPUS_CLEAR((char*)&st->ENCODER_RESET_START,
2528 opus_custom_encoder_get_size(st->mode, st->channels)-
2529 ((char*)&st->ENCODER_RESET_START - (char*)st));
2530 for (i=0;i<st->channels*st->mode->nbEBands;i++)
2531 oldLogE[i]=oldLogE2[i]=-QCONST16(28.f,DB_SHIFT);
2532 st->vbr_offset = 0;
2533 st->delayedIntra = 1;
2534 st->spread_decision = SPREAD_NORMAL;
2535 st->tonal_average = 256;
2536 st->hf_average = 0;
2537 st->tapset_decision = 0;
2538 }
2539 break;
2540#ifdef CUSTOM_MODES
2541 case CELT_SET_INPUT_CLIPPING_REQUEST:
2542 {
2543 opus_int32 value = va_arg(ap, opus_int32);
2544 st->clip = value;
2545 }
2546 break;
2547#endif
2548 case CELT_SET_SIGNALLING_REQUEST:
2549 {
2550 opus_int32 value = va_arg(ap, opus_int32);
2551 st->signalling = value;
2552 }
2553 break;
2554 case CELT_SET_ANALYSIS_REQUEST:
2555 {
2556 AnalysisInfo *info = va_arg(ap, AnalysisInfo *);
2557 if (info)
2558 OPUS_COPY(&st->analysis, info, 1);
2559 }
2560 break;
2561 case CELT_SET_SILK_INFO_REQUEST:
2562 {
2563 SILKInfo *info = va_arg(ap, SILKInfo *);
2564 if (info)
2565 OPUS_COPY(&st->silk_info, info, 1);
2566 }
2567 break;
2568 case CELT_GET_MODE_REQUEST:
2569 {
2570 const CELTMode ** value = va_arg(ap, const CELTMode**);
2571 if (value==0)
2572 goto bad_arg;
2573 *value=st->mode;
2574 }
2575 break;
2576 case OPUS_GET_FINAL_RANGE_REQUEST:
2577 {
2578 opus_uint32 * value = va_arg(ap, opus_uint32 *);
2579 if (value==0)
2580 goto bad_arg;
2581 *value=st->rng;
2582 }
2583 break;
2584 case OPUS_SET_LFE_REQUEST:
2585 {
2586 opus_int32 value = va_arg(ap, opus_int32);
2587 st->lfe = value;
2588 }
2589 break;
2590 case OPUS_SET_ENERGY_MASK_REQUEST:
2591 {
2592 opus_val16 *value = va_arg(ap, opus_val16*);
2593 st->energy_mask = value;
2594 }
2595 break;
2596 default:
2597 goto bad_request;
2598 }
2599 va_end(ap);
2600 return OPUS_OK;
2601bad_arg:
2602 va_end(ap);
2603 return OPUS_BAD_ARG;
2604bad_request:
2605 va_end(ap);
2606 return OPUS_UNIMPLEMENTED;
2607}
diff --git a/lib/rbcodec/codecs/libopus/celt/celt_lpc.c b/lib/rbcodec/codecs/libopus/celt/celt_lpc.c
index fa29d626ea..8ecb693ee9 100644
--- a/lib/rbcodec/codecs/libopus/celt/celt_lpc.c
+++ b/lib/rbcodec/codecs/libopus/celt/celt_lpc.c
@@ -49,8 +49,7 @@ int p
49 float *lpc = _lpc; 49 float *lpc = _lpc;
50#endif 50#endif
51 51
52 for (i = 0; i < p; i++) 52 OPUS_CLEAR(lpc, p);
53 lpc[i] = 0;
54 if (ac[0] != 0) 53 if (ac[0] != 0)
55 { 54 {
56 for (i = 0; i < p; i++) { 55 for (i = 0; i < p; i++) {
@@ -88,56 +87,42 @@ int p
88#endif 87#endif
89} 88}
90 89
91void celt_fir(const opus_val16 *_x, 90
91void celt_fir_c(
92 const opus_val16 *x,
92 const opus_val16 *num, 93 const opus_val16 *num,
93 opus_val16 *_y, 94 opus_val16 *y,
94 int N, 95 int N,
95 int ord, 96 int ord,
96 opus_val16 *mem) 97 int arch)
97{ 98{
98 int i,j; 99 int i,j;
99 VARDECL(opus_val16, rnum); 100 VARDECL(opus_val16, rnum);
100 VARDECL(opus_val16, x);
101 SAVE_STACK; 101 SAVE_STACK;
102 102 celt_assert(x != y);
103 ALLOC(rnum, ord, opus_val16); 103 ALLOC(rnum, ord, opus_val16);
104 ALLOC(x, N+ord, opus_val16);
105 for(i=0;i<ord;i++) 104 for(i=0;i<ord;i++)
106 rnum[i] = num[ord-i-1]; 105 rnum[i] = num[ord-i-1];
107 for(i=0;i<ord;i++)
108 x[i] = mem[ord-i-1];
109 for (i=0;i<N;i++)
110 x[i+ord]=_x[i];
111 for(i=0;i<ord;i++)
112 mem[i] = _x[N-i-1];
113#ifdef SMALL_FOOTPRINT
114 for (i=0;i<N;i++)
115 {
116 opus_val32 sum = SHL32(EXTEND32(_x[i]), SIG_SHIFT);
117 for (j=0;j<ord;j++)
118 {
119 sum = MAC16_16(sum,rnum[j],x[i+j]);
120 }
121 _y[i] = SATURATE16(PSHR32(sum, SIG_SHIFT));
122 }
123#else
124 for (i=0;i<N-3;i+=4) 106 for (i=0;i<N-3;i+=4)
125 { 107 {
126 opus_val32 sum[4]={0,0,0,0}; 108 opus_val32 sum[4];
127 xcorr_kernel(rnum, x+i, sum, ord); 109 sum[0] = SHL32(EXTEND32(x[i ]), SIG_SHIFT);
128 _y[i ] = SATURATE16(ADD32(EXTEND32(_x[i ]), PSHR32(sum[0], SIG_SHIFT))); 110 sum[1] = SHL32(EXTEND32(x[i+1]), SIG_SHIFT);
129 _y[i+1] = SATURATE16(ADD32(EXTEND32(_x[i+1]), PSHR32(sum[1], SIG_SHIFT))); 111 sum[2] = SHL32(EXTEND32(x[i+2]), SIG_SHIFT);
130 _y[i+2] = SATURATE16(ADD32(EXTEND32(_x[i+2]), PSHR32(sum[2], SIG_SHIFT))); 112 sum[3] = SHL32(EXTEND32(x[i+3]), SIG_SHIFT);
131 _y[i+3] = SATURATE16(ADD32(EXTEND32(_x[i+3]), PSHR32(sum[3], SIG_SHIFT))); 113 xcorr_kernel(rnum, x+i-ord, sum, ord, arch);
114 y[i ] = ROUND16(sum[0], SIG_SHIFT);
115 y[i+1] = ROUND16(sum[1], SIG_SHIFT);
116 y[i+2] = ROUND16(sum[2], SIG_SHIFT);
117 y[i+3] = ROUND16(sum[3], SIG_SHIFT);
132 } 118 }
133 for (;i<N;i++) 119 for (;i<N;i++)
134 { 120 {
135 opus_val32 sum = 0; 121 opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
136 for (j=0;j<ord;j++) 122 for (j=0;j<ord;j++)
137 sum = MAC16_16(sum,rnum[j],x[i+j]); 123 sum = MAC16_16(sum,rnum[j],x[i+j-ord]);
138 _y[i] = SATURATE16(ADD32(EXTEND32(_x[i]), PSHR32(sum, SIG_SHIFT))); 124 y[i] = ROUND16(sum, SIG_SHIFT);
139 } 125 }
140#endif
141 RESTORE_STACK; 126 RESTORE_STACK;
142} 127}
143 128
@@ -146,10 +131,12 @@ void celt_iir(const opus_val32 *_x,
146 opus_val32 *_y, 131 opus_val32 *_y,
147 int N, 132 int N,
148 int ord, 133 int ord,
149 opus_val16 *mem) 134 opus_val16 *mem,
135 int arch)
150{ 136{
151#ifdef SMALL_FOOTPRINT 137#ifdef SMALL_FOOTPRINT
152 int i,j; 138 int i,j;
139 (void)arch;
153 for (i=0;i<N;i++) 140 for (i=0;i<N;i++)
154 { 141 {
155 opus_val32 sum = _x[i]; 142 opus_val32 sum = _x[i];
@@ -161,7 +148,7 @@ void celt_iir(const opus_val32 *_x,
161 { 148 {
162 mem[j]=mem[j-1]; 149 mem[j]=mem[j-1];
163 } 150 }
164 mem[0] = ROUND16(sum,SIG_SHIFT); 151 mem[0] = SROUND16(sum, SIG_SHIFT);
165 _y[i] = sum; 152 _y[i] = sum;
166 } 153 }
167#else 154#else
@@ -187,23 +174,23 @@ void celt_iir(const opus_val32 *_x,
187 sum[1]=_x[i+1]; 174 sum[1]=_x[i+1];
188 sum[2]=_x[i+2]; 175 sum[2]=_x[i+2];
189 sum[3]=_x[i+3]; 176 sum[3]=_x[i+3];
190 xcorr_kernel(rden, y+i, sum, ord); 177 xcorr_kernel(rden, y+i, sum, ord, arch);
191 178
192 /* Patch up the result to compensate for the fact that this is an IIR */ 179 /* Patch up the result to compensate for the fact that this is an IIR */
193 y[i+ord ] = -ROUND16(sum[0],SIG_SHIFT); 180 y[i+ord ] = -SROUND16(sum[0],SIG_SHIFT);
194 _y[i ] = sum[0]; 181 _y[i ] = sum[0];
195 sum[1] = MAC16_16(sum[1], y[i+ord ], den[0]); 182 sum[1] = MAC16_16(sum[1], y[i+ord ], den[0]);
196 y[i+ord+1] = -ROUND16(sum[1],SIG_SHIFT); 183 y[i+ord+1] = -SROUND16(sum[1],SIG_SHIFT);
197 _y[i+1] = sum[1]; 184 _y[i+1] = sum[1];
198 sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]); 185 sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]);
199 sum[2] = MAC16_16(sum[2], y[i+ord ], den[1]); 186 sum[2] = MAC16_16(sum[2], y[i+ord ], den[1]);
200 y[i+ord+2] = -ROUND16(sum[2],SIG_SHIFT); 187 y[i+ord+2] = -SROUND16(sum[2],SIG_SHIFT);
201 _y[i+2] = sum[2]; 188 _y[i+2] = sum[2];
202 189
203 sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]); 190 sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]);
204 sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]); 191 sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]);
205 sum[3] = MAC16_16(sum[3], y[i+ord ], den[2]); 192 sum[3] = MAC16_16(sum[3], y[i+ord ], den[2]);
206 y[i+ord+3] = -ROUND16(sum[3],SIG_SHIFT); 193 y[i+ord+3] = -SROUND16(sum[3],SIG_SHIFT);
207 _y[i+3] = sum[3]; 194 _y[i+3] = sum[3];
208 } 195 }
209 for (;i<N;i++) 196 for (;i<N;i++)
@@ -211,7 +198,7 @@ void celt_iir(const opus_val32 *_x,
211 opus_val32 sum = _x[i]; 198 opus_val32 sum = _x[i];
212 for (j=0;j<ord;j++) 199 for (j=0;j<ord;j++)
213 sum -= MULT16_16(rden[j],y[i+j]); 200 sum -= MULT16_16(rden[j],y[i+j]);
214 y[i+ord] = ROUND16(sum,SIG_SHIFT); 201 y[i+ord] = SROUND16(sum,SIG_SHIFT);
215 _y[i] = sum; 202 _y[i] = sum;
216 } 203 }
217 for(i=0;i<ord;i++) 204 for(i=0;i<ord;i++)
diff --git a/lib/rbcodec/codecs/libopus/celt/celt_lpc.h b/lib/rbcodec/codecs/libopus/celt/celt_lpc.h
index dc2a0a3d26..a4c5fd6ea5 100644
--- a/lib/rbcodec/codecs/libopus/celt/celt_lpc.h
+++ b/lib/rbcodec/codecs/libopus/celt/celt_lpc.h
@@ -29,24 +29,36 @@
29#define PLC_H 29#define PLC_H
30 30
31#include "arch.h" 31#include "arch.h"
32#include "cpu_support.h"
33
34#if defined(OPUS_X86_MAY_HAVE_SSE4_1)
35#include "x86/celt_lpc_sse.h"
36#endif
32 37
33#define LPC_ORDER 24 38#define LPC_ORDER 24
34 39
35void _celt_lpc(opus_val16 *_lpc, const opus_val32 *ac, int p); 40void _celt_lpc(opus_val16 *_lpc, const opus_val32 *ac, int p);
36 41
37void celt_fir(const opus_val16 *x, 42void celt_fir_c(
43 const opus_val16 *x,
38 const opus_val16 *num, 44 const opus_val16 *num,
39 opus_val16 *y, 45 opus_val16 *y,
40 int N, 46 int N,
41 int ord, 47 int ord,
42 opus_val16 *mem); 48 int arch);
49
50#if !defined(OVERRIDE_CELT_FIR)
51#define celt_fir(x, num, y, N, ord, arch) \
52 (celt_fir_c(x, num, y, N, ord, arch))
53#endif
43 54
44void celt_iir(const opus_val32 *x, 55void celt_iir(const opus_val32 *x,
45 const opus_val16 *den, 56 const opus_val16 *den,
46 opus_val32 *y, 57 opus_val32 *y,
47 int N, 58 int N,
48 int ord, 59 int ord,
49 opus_val16 *mem); 60 opus_val16 *mem,
61 int arch);
50 62
51int _celt_autocorr(const opus_val16 *x, opus_val32 *ac, 63int _celt_autocorr(const opus_val16 *x, opus_val32 *ac,
52 const opus_val16 *window, int overlap, int lag, int n, int arch); 64 const opus_val16 *window, int overlap, int lag, int n, int arch);
diff --git a/lib/rbcodec/codecs/libopus/celt/cpu_support.h b/lib/rbcodec/codecs/libopus/celt/cpu_support.h
index d68dbe62c5..68fc60678f 100644
--- a/lib/rbcodec/codecs/libopus/celt/cpu_support.h
+++ b/lib/rbcodec/codecs/libopus/celt/cpu_support.h
@@ -31,7 +31,8 @@
31#include "opus_types.h" 31#include "opus_types.h"
32#include "opus_defines.h" 32#include "opus_defines.h"
33 33
34#if defined(OPUS_HAVE_RTCD) && defined(OPUS_ARM_ASM) 34#if defined(OPUS_HAVE_RTCD) && \
35 (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
35#include "arm/armcpu.h" 36#include "arm/armcpu.h"
36 37
37/* We currently support 4 ARM variants: 38/* We currently support 4 ARM variants:
@@ -42,6 +43,22 @@
42 */ 43 */
43#define OPUS_ARCHMASK 3 44#define OPUS_ARCHMASK 3
44 45
46#elif (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
47 (defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \
48 (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
49 (defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX))
50
51#include "x86/x86cpu.h"
52/* We currently support 5 x86 variants:
53 * arch[0] -> non-sse
54 * arch[1] -> sse
55 * arch[2] -> sse2
56 * arch[3] -> sse4.1
57 * arch[4] -> avx
58 */
59#define OPUS_ARCHMASK 7
60int opus_select_arch(void);
61
45#else 62#else
46#define OPUS_ARCHMASK 0 63#define OPUS_ARCHMASK 0
47 64
@@ -50,5 +67,4 @@ static OPUS_INLINE int opus_select_arch(void)
50 return 0; 67 return 0;
51} 68}
52#endif 69#endif
53
54#endif 70#endif
diff --git a/lib/rbcodec/codecs/libopus/celt/cwrs.c b/lib/rbcodec/codecs/libopus/celt/cwrs.c
index 031a875995..a552e4f0fb 100644
--- a/lib/rbcodec/codecs/libopus/celt/cwrs.c
+++ b/lib/rbcodec/codecs/libopus/celt/cwrs.c
@@ -74,7 +74,7 @@ int log2_frac(opus_uint32 val, int frac)
74/*Although derived separately, the pulse vector coding scheme is equivalent to 74/*Although derived separately, the pulse vector coding scheme is equivalent to
75 a Pyramid Vector Quantizer \cite{Fis86}. 75 a Pyramid Vector Quantizer \cite{Fis86}.
76 Some additional notes about an early version appear at 76 Some additional notes about an early version appear at
77 http://people.xiph.org/~tterribe/notes/cwrs.html, but the codebook ordering 77 https://people.xiph.org/~tterribe/notes/cwrs.html, but the codebook ordering
78 and the definitions of some terms have evolved since that was written. 78 and the definitions of some terms have evolved since that was written.
79 79
80 The conversion from a pulse vector to an integer index (encoding) and back 80 The conversion from a pulse vector to an integer index (encoding) and back
@@ -210,7 +210,7 @@ int log2_frac(opus_uint32 val, int frac)
210#if defined(CUSTOM_MODES) 210#if defined(CUSTOM_MODES)
211static const opus_uint32 CELT_PVQ_U_DATA[1488]={ 211static const opus_uint32 CELT_PVQ_U_DATA[1488]={
212#else 212#else
213static const opus_uint32 CELT_PVQ_U_DATA[1272] ICONST_ATTR ={ 213static const opus_uint32 CELT_PVQ_U_DATA[1272]={
214#endif 214#endif
215 /*N=0, K=0...176:*/ 215 /*N=0, K=0...176:*/
216 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 216 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -418,7 +418,7 @@ static const opus_uint32 *const CELT_PVQ_U_ROW[15]={
418 CELT_PVQ_U_DATA+1464,CELT_PVQ_U_DATA+1470,CELT_PVQ_U_DATA+1473 418 CELT_PVQ_U_DATA+1464,CELT_PVQ_U_DATA+1470,CELT_PVQ_U_DATA+1473
419}; 419};
420#else 420#else
421static const opus_uint32 *const CELT_PVQ_U_ROW[15] ICONST_ATTR ={ 421static const opus_uint32 *const CELT_PVQ_U_ROW[15]={
422 CELT_PVQ_U_DATA+ 0,CELT_PVQ_U_DATA+ 176,CELT_PVQ_U_DATA+ 351, 422 CELT_PVQ_U_DATA+ 0,CELT_PVQ_U_DATA+ 176,CELT_PVQ_U_DATA+ 351,
423 CELT_PVQ_U_DATA+ 525,CELT_PVQ_U_DATA+ 698,CELT_PVQ_U_DATA+ 870, 423 CELT_PVQ_U_DATA+ 525,CELT_PVQ_U_DATA+ 698,CELT_PVQ_U_DATA+ 870,
424 CELT_PVQ_U_DATA+1041,CELT_PVQ_U_DATA+1131,CELT_PVQ_U_DATA+1178, 424 CELT_PVQ_U_DATA+1041,CELT_PVQ_U_DATA+1131,CELT_PVQ_U_DATA+1178,
@@ -482,7 +482,7 @@ static opus_val32 cwrsi(int _n,int _k,opus_uint32 _i,int *_y){
482 k0=_k; 482 k0=_k;
483 q=row[_n]; 483 q=row[_n];
484 if(q>_i){ 484 if(q>_i){
485 celt_assert(p>q); 485 celt_sig_assert(p>q);
486 _k=_n; 486 _k=_n;
487 do p=CELT_PVQ_U_ROW[--_k][_n]; 487 do p=CELT_PVQ_U_ROW[--_k][_n];
488 while(p>_i); 488 while(p>_i);
diff --git a/lib/rbcodec/codecs/libopus/celt/dump_modes/Makefile b/lib/rbcodec/codecs/libopus/celt/dump_modes/Makefile
new file mode 100644
index 0000000000..93f599fb5b
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/dump_modes/Makefile
@@ -0,0 +1,32 @@
1
2CFLAGS=-O2 -Wall -Wextra -DHAVE_CONFIG_H
3INCLUDES=-I. -I../ -I../.. -I../../include
4
5SOURCES = dump_modes.c \
6 ../modes.c \
7 ../cwrs.c \
8 ../rate.c \
9 ../entcode.c \
10 ../entenc.c \
11 ../entdec.c \
12 ../mathops.c \
13 ../mdct.c \
14 ../kiss_fft.c
15
16ifdef HAVE_ARM_NE10
17CC = gcc
18CFLAGS += -mfpu=neon
19INCLUDES += -I$(NE10_INCDIR) -DHAVE_ARM_NE10 -DOPUS_ARM_PRESUME_NEON_INTR
20LIBS = -L$(NE10_LIBDIR) -lNE10
21SOURCES += ../arm/celt_ne10_fft.c \
22 dump_modes_arm_ne10.c \
23 ../arm/armcpu.c
24endif
25
26all: dump_modes
27
28dump_modes:
29 $(PREFIX)$(CC) $(CFLAGS) $(INCLUDES) -DCUSTOM_MODES_ONLY -DCUSTOM_MODES $(SOURCES) -o $@ $(LIBS) -lm
30
31clean:
32 rm -f dump_modes
diff --git a/lib/rbcodec/codecs/libopus/celt/dump_modes/dump_modes.c b/lib/rbcodec/codecs/libopus/celt/dump_modes/dump_modes.c
new file mode 100644
index 0000000000..9105a5344e
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/dump_modes/dump_modes.c
@@ -0,0 +1,353 @@
1/* Copyright (c) 2008 CSIRO
2 Copyright (c) 2008-2009 Xiph.Org Foundation
3 Written by Jean-Marc Valin */
4/*
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
7 are met:
8
9 - Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
11
12 - Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
20 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27*/
28
29#ifdef HAVE_CONFIG_H
30#include "config.h"
31#endif
32
33#include <stdlib.h>
34#include <stdio.h>
35#include "modes.h"
36#include "celt.h"
37#include "rate.h"
38#include "dump_modes_arch.h"
39
40#define INT16 "%d"
41#define INT32 "%d"
42#define FLOAT "%#0.8gf"
43
44#ifdef FIXED_POINT
45#define WORD16 INT16
46#define WORD32 INT32
47#else
48#define WORD16 FLOAT
49#define WORD32 FLOAT
50#endif
51
52void dump_modes(FILE *file, CELTMode **modes, int nb_modes)
53{
54 int i, j, k;
55 int mdct_twiddles_size;
56 fprintf(file, "/* The contents of this file was automatically generated by dump_modes.c\n");
57 fprintf(file, " with arguments:");
58 for (i=0;i<nb_modes;i++)
59 {
60 CELTMode *mode = modes[i];
61 fprintf(file, " %d %d",mode->Fs,mode->shortMdctSize*mode->nbShortMdcts);
62 }
63 fprintf(file, "\n It contains static definitions for some pre-defined modes. */\n");
64 fprintf(file, "#include \"modes.h\"\n");
65 fprintf(file, "#include \"rate.h\"\n");
66 fprintf(file, "\n#ifdef HAVE_ARM_NE10\n");
67 fprintf(file, "#define OVERRIDE_FFT 1\n");
68 fprintf(file, "#include \"%s\"\n", ARM_NE10_ARCH_FILE_NAME);
69 fprintf(file, "#endif\n");
70
71 fprintf(file, "\n");
72
73 for (i=0;i<nb_modes;i++)
74 {
75 CELTMode *mode = modes[i];
76 int mdctSize;
77 int standard, framerate;
78
79 mdctSize = mode->shortMdctSize*mode->nbShortMdcts;
80 standard = (mode->Fs == 400*(opus_int32)mode->shortMdctSize);
81 framerate = mode->Fs/mode->shortMdctSize;
82
83 if (!standard)
84 {
85 fprintf(file, "#ifndef DEF_EBANDS%d_%d\n", mode->Fs, mdctSize);
86 fprintf(file, "#define DEF_EBANDS%d_%d\n", mode->Fs, mdctSize);
87 fprintf (file, "static const opus_int16 eBands%d_%d[%d] = {\n", mode->Fs, mdctSize, mode->nbEBands+2);
88 for (j=0;j<mode->nbEBands+2;j++)
89 fprintf (file, "%d, ", mode->eBands[j]);
90 fprintf (file, "};\n");
91 fprintf(file, "#endif\n");
92 fprintf(file, "\n");
93 }
94
95 fprintf(file, "#ifndef DEF_WINDOW%d\n", mode->overlap);
96 fprintf(file, "#define DEF_WINDOW%d\n", mode->overlap);
97 fprintf (file, "static const opus_val16 window%d[%d] = {\n", mode->overlap, mode->overlap);
98 for (j=0;j<mode->overlap;j++)
99 fprintf (file, WORD16 ",%c", mode->window[j],(j+6)%5==0?'\n':' ');
100 fprintf (file, "};\n");
101 fprintf(file, "#endif\n");
102 fprintf(file, "\n");
103
104 if (!standard)
105 {
106 fprintf(file, "#ifndef DEF_ALLOC_VECTORS%d_%d\n", mode->Fs, mdctSize);
107 fprintf(file, "#define DEF_ALLOC_VECTORS%d_%d\n", mode->Fs, mdctSize);
108 fprintf (file, "static const unsigned char allocVectors%d_%d[%d] = {\n", mode->Fs, mdctSize, mode->nbEBands*mode->nbAllocVectors);
109 for (j=0;j<mode->nbAllocVectors;j++)
110 {
111 for (k=0;k<mode->nbEBands;k++)
112 fprintf (file, "%2d, ", mode->allocVectors[j*mode->nbEBands+k]);
113 fprintf (file, "\n");
114 }
115 fprintf (file, "};\n");
116 fprintf(file, "#endif\n");
117 fprintf(file, "\n");
118 }
119
120 fprintf(file, "#ifndef DEF_LOGN%d\n", framerate);
121 fprintf(file, "#define DEF_LOGN%d\n", framerate);
122 fprintf (file, "static const opus_int16 logN%d[%d] = {\n", framerate, mode->nbEBands);
123 for (j=0;j<mode->nbEBands;j++)
124 fprintf (file, "%d, ", mode->logN[j]);
125 fprintf (file, "};\n");
126 fprintf(file, "#endif\n");
127 fprintf(file, "\n");
128
129 /* Pulse cache */
130 fprintf(file, "#ifndef DEF_PULSE_CACHE%d\n", mode->Fs/mdctSize);
131 fprintf(file, "#define DEF_PULSE_CACHE%d\n", mode->Fs/mdctSize);
132 fprintf (file, "static const opus_int16 cache_index%d[%d] = {\n", mode->Fs/mdctSize, (mode->maxLM+2)*mode->nbEBands);
133 for (j=0;j<mode->nbEBands*(mode->maxLM+2);j++)
134 fprintf (file, "%d,%c", mode->cache.index[j],(j+16)%15==0?'\n':' ');
135 fprintf (file, "};\n");
136 fprintf (file, "static const unsigned char cache_bits%d[%d] = {\n", mode->Fs/mdctSize, mode->cache.size);
137 for (j=0;j<mode->cache.size;j++)
138 fprintf (file, "%d,%c", mode->cache.bits[j],(j+16)%15==0?'\n':' ');
139 fprintf (file, "};\n");
140 fprintf (file, "static const unsigned char cache_caps%d[%d] = {\n", mode->Fs/mdctSize, (mode->maxLM+1)*2*mode->nbEBands);
141 for (j=0;j<(mode->maxLM+1)*2*mode->nbEBands;j++)
142 fprintf (file, "%d,%c", mode->cache.caps[j],(j+16)%15==0?'\n':' ');
143 fprintf (file, "};\n");
144
145 fprintf(file, "#endif\n");
146 fprintf(file, "\n");
147
148 /* FFT twiddles */
149 fprintf(file, "#ifndef FFT_TWIDDLES%d_%d\n", mode->Fs, mdctSize);
150 fprintf(file, "#define FFT_TWIDDLES%d_%d\n", mode->Fs, mdctSize);
151 fprintf (file, "static const kiss_twiddle_cpx fft_twiddles%d_%d[%d] = {\n",
152 mode->Fs, mdctSize, mode->mdct.kfft[0]->nfft);
153 for (j=0;j<mode->mdct.kfft[0]->nfft;j++)
154 fprintf (file, "{" WORD16 ", " WORD16 "},%c", mode->mdct.kfft[0]->twiddles[j].r, mode->mdct.kfft[0]->twiddles[j].i,(j+3)%2==0?'\n':' ');
155 fprintf (file, "};\n");
156
157#ifdef OVERRIDE_FFT
158 dump_mode_arch(mode);
159#endif
160 /* FFT Bitrev tables */
161 for (k=0;k<=mode->mdct.maxshift;k++)
162 {
163 fprintf(file, "#ifndef FFT_BITREV%d\n", mode->mdct.kfft[k]->nfft);
164 fprintf(file, "#define FFT_BITREV%d\n", mode->mdct.kfft[k]->nfft);
165 fprintf (file, "static const opus_int16 fft_bitrev%d[%d] = {\n",
166 mode->mdct.kfft[k]->nfft, mode->mdct.kfft[k]->nfft);
167 for (j=0;j<mode->mdct.kfft[k]->nfft;j++)
168 fprintf (file, "%d,%c", mode->mdct.kfft[k]->bitrev[j],(j+16)%15==0?'\n':' ');
169 fprintf (file, "};\n");
170
171 fprintf(file, "#endif\n");
172 fprintf(file, "\n");
173 }
174
175 /* FFT States */
176 for (k=0;k<=mode->mdct.maxshift;k++)
177 {
178 fprintf(file, "#ifndef FFT_STATE%d_%d_%d\n", mode->Fs, mdctSize, k);
179 fprintf(file, "#define FFT_STATE%d_%d_%d\n", mode->Fs, mdctSize, k);
180 fprintf (file, "static const kiss_fft_state fft_state%d_%d_%d = {\n",
181 mode->Fs, mdctSize, k);
182 fprintf (file, "%d, /* nfft */\n", mode->mdct.kfft[k]->nfft);
183 fprintf (file, WORD16 ", /* scale */\n", mode->mdct.kfft[k]->scale);
184#ifdef FIXED_POINT
185 fprintf (file, "%d, /* scale_shift */\n", mode->mdct.kfft[k]->scale_shift);
186#endif
187 fprintf (file, "%d, /* shift */\n", mode->mdct.kfft[k]->shift);
188 fprintf (file, "{");
189 for (j=0;j<2*MAXFACTORS;j++)
190 fprintf (file, "%d, ", mode->mdct.kfft[k]->factors[j]);
191 fprintf (file, "}, /* factors */\n");
192 fprintf (file, "fft_bitrev%d, /* bitrev */\n", mode->mdct.kfft[k]->nfft);
193 fprintf (file, "fft_twiddles%d_%d, /* bitrev */\n", mode->Fs, mdctSize);
194
195 fprintf (file, "#ifdef OVERRIDE_FFT\n");
196 fprintf (file, "(arch_fft_state *)&cfg_arch_%d,\n", mode->mdct.kfft[k]->nfft);
197 fprintf (file, "#else\n");
198 fprintf (file, "NULL,\n");
199 fprintf(file, "#endif\n");
200
201 fprintf (file, "};\n");
202
203 fprintf(file, "#endif\n");
204 fprintf(file, "\n");
205 }
206
207 fprintf(file, "#endif\n");
208 fprintf(file, "\n");
209
210 /* MDCT twiddles */
211 mdct_twiddles_size = mode->mdct.n-(mode->mdct.n/2>>mode->mdct.maxshift);
212 fprintf(file, "#ifndef MDCT_TWIDDLES%d\n", mdctSize);
213 fprintf(file, "#define MDCT_TWIDDLES%d\n", mdctSize);
214 fprintf (file, "static const opus_val16 mdct_twiddles%d[%d] = {\n",
215 mdctSize, mdct_twiddles_size);
216 for (j=0;j<mdct_twiddles_size;j++)
217 fprintf (file, WORD16 ",%c", mode->mdct.trig[j],(j+6)%5==0?'\n':' ');
218 fprintf (file, "};\n");
219
220 fprintf(file, "#endif\n");
221 fprintf(file, "\n");
222
223
224 /* Print the actual mode data */
225 fprintf(file, "static const CELTMode mode%d_%d_%d = {\n", mode->Fs, mdctSize, mode->overlap);
226 fprintf(file, INT32 ", /* Fs */\n", mode->Fs);
227 fprintf(file, "%d, /* overlap */\n", mode->overlap);
228 fprintf(file, "%d, /* nbEBands */\n", mode->nbEBands);
229 fprintf(file, "%d, /* effEBands */\n", mode->effEBands);
230 fprintf(file, "{");
231 for (j=0;j<4;j++)
232 fprintf(file, WORD16 ", ", mode->preemph[j]);
233 fprintf(file, "}, /* preemph */\n");
234 if (standard)
235 fprintf(file, "eband5ms, /* eBands */\n");
236 else
237 fprintf(file, "eBands%d_%d, /* eBands */\n", mode->Fs, mdctSize);
238
239 fprintf(file, "%d, /* maxLM */\n", mode->maxLM);
240 fprintf(file, "%d, /* nbShortMdcts */\n", mode->nbShortMdcts);
241 fprintf(file, "%d, /* shortMdctSize */\n", mode->shortMdctSize);
242
243 fprintf(file, "%d, /* nbAllocVectors */\n", mode->nbAllocVectors);
244 if (standard)
245 fprintf(file, "band_allocation, /* allocVectors */\n");
246 else
247 fprintf(file, "allocVectors%d_%d, /* allocVectors */\n", mode->Fs, mdctSize);
248
249 fprintf(file, "logN%d, /* logN */\n", framerate);
250 fprintf(file, "window%d, /* window */\n", mode->overlap);
251 fprintf(file, "{%d, %d, {", mode->mdct.n, mode->mdct.maxshift);
252 for (k=0;k<=mode->mdct.maxshift;k++)
253 fprintf(file, "&fft_state%d_%d_%d, ", mode->Fs, mdctSize, k);
254 fprintf (file, "}, mdct_twiddles%d}, /* mdct */\n", mdctSize);
255
256 fprintf(file, "{%d, cache_index%d, cache_bits%d, cache_caps%d}, /* cache */\n",
257 mode->cache.size, mode->Fs/mdctSize, mode->Fs/mdctSize, mode->Fs/mdctSize);
258 fprintf(file, "};\n");
259 }
260 fprintf(file, "\n");
261 fprintf(file, "/* List of all the available modes */\n");
262 fprintf(file, "#define TOTAL_MODES %d\n", nb_modes);
263 fprintf(file, "static const CELTMode * const static_mode_list[TOTAL_MODES] = {\n");
264 for (i=0;i<nb_modes;i++)
265 {
266 CELTMode *mode = modes[i];
267 int mdctSize;
268 mdctSize = mode->shortMdctSize*mode->nbShortMdcts;
269 fprintf(file, "&mode%d_%d_%d,\n", mode->Fs, mdctSize, mode->overlap);
270 }
271 fprintf(file, "};\n");
272}
273
274void dump_header(FILE *file, CELTMode **modes, int nb_modes)
275{
276 int i;
277 int channels = 0;
278 int frame_size = 0;
279 int overlap = 0;
280 fprintf (file, "/* This header file is generated automatically*/\n");
281 for (i=0;i<nb_modes;i++)
282 {
283 CELTMode *mode = modes[i];
284 if (frame_size==0)
285 frame_size = mode->shortMdctSize*mode->nbShortMdcts;
286 else if (frame_size != mode->shortMdctSize*mode->nbShortMdcts)
287 frame_size = -1;
288 if (overlap==0)
289 overlap = mode->overlap;
290 else if (overlap != mode->overlap)
291 overlap = -1;
292 }
293 if (channels>0)
294 {
295 fprintf (file, "#define CHANNELS(mode) %d\n", channels);
296 if (channels==1)
297 fprintf (file, "#define DISABLE_STEREO\n");
298 }
299 if (frame_size>0)
300 {
301 fprintf (file, "#define FRAMESIZE(mode) %d\n", frame_size);
302 }
303 if (overlap>0)
304 {
305 fprintf (file, "#define OVERLAP(mode) %d\n", overlap);
306 }
307}
308
309#ifdef FIXED_POINT
310#define BASENAME "static_modes_fixed"
311#else
312#define BASENAME "static_modes_float"
313#endif
314
315int main(int argc, char **argv)
316{
317 int i, nb;
318 FILE *file;
319 CELTMode **m;
320 if (argc%2 != 1 || argc<3)
321 {
322 fprintf (stderr, "Usage: %s rate frame_size [rate frame_size] [rate frame_size]...\n",argv[0]);
323 return 1;
324 }
325 nb = (argc-1)/2;
326 m = malloc(nb*sizeof(CELTMode*));
327 for (i=0;i<nb;i++)
328 {
329 int Fs, frame;
330 Fs = atoi(argv[2*i+1]);
331 frame = atoi(argv[2*i+2]);
332 m[i] = opus_custom_mode_create(Fs, frame, NULL);
333 if (m[i]==NULL)
334 {
335 fprintf(stderr,"Error creating mode with Fs=%s, frame_size=%s\n",
336 argv[2*i+1],argv[2*i+2]);
337 return EXIT_FAILURE;
338 }
339 }
340 file = fopen(BASENAME ".h", "w");
341#ifdef OVERRIDE_FFT
342 dump_modes_arch_init(m, nb);
343#endif
344 dump_modes(file, m, nb);
345 fclose(file);
346#ifdef OVERRIDE_FFT
347 dump_modes_arch_finalize();
348#endif
349 for (i=0;i<nb;i++)
350 opus_custom_mode_destroy(m[i]);
351 free(m);
352 return 0;
353}
diff --git a/lib/rbcodec/codecs/libopus/celt/dump_modes/dump_modes_arch.h b/lib/rbcodec/codecs/libopus/celt/dump_modes/dump_modes_arch.h
new file mode 100644
index 0000000000..cc0d4be1ec
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/dump_modes/dump_modes_arch.h
@@ -0,0 +1,45 @@
1/* Copyright (c) 2015 Xiph.Org Foundation
2 Written by Viswanath Puttagunta */
3/*
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7
8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10
11 - Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*/
27
28#ifndef DUMP_MODE_ARCH_H
29#define DUMP_MODE_ARCH_H
30
31void dump_modes_arch_init();
32void dump_mode_arch(CELTMode *mode);
33void dump_modes_arch_finalize();
34
35#if !defined(FIXED_POINT)
36#define ARM_NE10_ARCH_FILE_NAME "static_modes_float_arm_ne10.h"
37#else
38#define ARM_NE10_ARCH_FILE_NAME "static_modes_fixed_arm_ne10.h"
39#endif
40
41#if defined(HAVE_ARM_NE10)
42#define OVERRIDE_FFT (1)
43#endif
44
45#endif
diff --git a/lib/rbcodec/codecs/libopus/celt/dump_modes/dump_modes_arm_ne10.c b/lib/rbcodec/codecs/libopus/celt/dump_modes/dump_modes_arm_ne10.c
new file mode 100644
index 0000000000..828e7b9fff
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/dump_modes/dump_modes_arm_ne10.c
@@ -0,0 +1,152 @@
1/* Copyright (c) 2015 Xiph.Org Foundation
2 Written by Viswanath Puttagunta */
3/*
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7
8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10
11 - Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*/
27
28#if defined(HAVE_CONFIG_H)
29# include "config.h"
30#endif
31
32#include <stdio.h>
33#include <stdlib.h>
34#include "modes.h"
35#include "dump_modes_arch.h"
36#include <NE10_dsp.h>
37
38#if !defined(FIXED_POINT)
39# define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_float32_t
40# define NE10_FFT_CPX_TYPE_T_STR "ne10_fft_cpx_float32_t"
41# define NE10_FFT_STATE_TYPE_T_STR "ne10_fft_state_float32_t"
42#else
43# define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_int32_t
44# define NE10_FFT_CPX_TYPE_T_STR "ne10_fft_cpx_int32_t"
45# define NE10_FFT_STATE_TYPE_T_STR "ne10_fft_state_int32_t"
46#endif
47
48static FILE *file;
49
50void dump_modes_arch_init(CELTMode **modes, int nb_modes)
51{
52 int i;
53
54 file = fopen(ARM_NE10_ARCH_FILE_NAME, "w");
55 fprintf(file, "/* The contents of this file was automatically generated by\n");
56 fprintf(file, " * dump_mode_arm_ne10.c with arguments:");
57 for (i=0;i<nb_modes;i++)
58 {
59 CELTMode *mode = modes[i];
60 fprintf(file, " %d %d",mode->Fs,mode->shortMdctSize*mode->nbShortMdcts);
61 }
62 fprintf(file, "\n * It contains static definitions for some pre-defined modes. */\n");
63 fprintf(file, "#include <NE10_types.h>\n\n");
64}
65
66void dump_modes_arch_finalize()
67{
68 fclose(file);
69}
70
71void dump_mode_arch(CELTMode *mode)
72{
73 int k, j;
74 int mdctSize;
75
76 mdctSize = mode->shortMdctSize*mode->nbShortMdcts;
77
78 fprintf(file, "#ifndef NE10_FFT_PARAMS%d_%d\n", mode->Fs, mdctSize);
79 fprintf(file, "#define NE10_FFT_PARAMS%d_%d\n", mode->Fs, mdctSize);
80 /* cfg->factors */
81 for(k=0;k<=mode->mdct.maxshift;k++) {
82 NE10_FFT_CFG_TYPE_T cfg;
83 cfg = (NE10_FFT_CFG_TYPE_T)mode->mdct.kfft[k]->arch_fft->priv;
84 if (!cfg)
85 continue;
86 fprintf(file, "static const ne10_int32_t ne10_factors_%d[%d] = {\n",
87 mode->mdct.kfft[k]->nfft, (NE10_MAXFACTORS * 2));
88 for(j=0;j<(NE10_MAXFACTORS * 2);j++) {
89 fprintf(file, "%d,%c", cfg->factors[j],(j+16)%15==0?'\n':' ');
90 }
91 fprintf (file, "};\n");
92 }
93
94 /* cfg->twiddles */
95 for(k=0;k<=mode->mdct.maxshift;k++) {
96 NE10_FFT_CFG_TYPE_T cfg;
97 cfg = (NE10_FFT_CFG_TYPE_T)mode->mdct.kfft[k]->arch_fft->priv;
98 if (!cfg)
99 continue;
100 fprintf(file, "static const %s ne10_twiddles_%d[%d] = {\n",
101 NE10_FFT_CPX_TYPE_T_STR, mode->mdct.kfft[k]->nfft,
102 mode->mdct.kfft[k]->nfft);
103 for(j=0;j<mode->mdct.kfft[k]->nfft;j++) {
104#if !defined(FIXED_POINT)
105 fprintf(file, "{%#0.8gf,%#0.8gf},%c",
106 cfg->twiddles[j].r, cfg->twiddles[j].i,(j+4)%3==0?'\n':' ');
107#else
108 fprintf(file, "{%d,%d},%c",
109 cfg->twiddles[j].r, cfg->twiddles[j].i,(j+4)%3==0?'\n':' ');
110#endif
111 }
112 fprintf (file, "};\n");
113 }
114
115 for(k=0;k<=mode->mdct.maxshift;k++) {
116 NE10_FFT_CFG_TYPE_T cfg;
117 cfg = (NE10_FFT_CFG_TYPE_T)mode->mdct.kfft[k]->arch_fft->priv;
118 if (!cfg) {
119 fprintf(file, "/* Ne10 does not support scaled FFT for length = %d */\n",
120 mode->mdct.kfft[k]->nfft);
121 fprintf(file, "static const arch_fft_state cfg_arch_%d = {\n", mode->mdct.kfft[k]->nfft);
122 fprintf(file, "0,\n");
123 fprintf(file, "NULL\n");
124 fprintf(file, "};\n");
125 continue;
126 }
127 fprintf(file, "static const %s %s_%d = {\n", NE10_FFT_STATE_TYPE_T_STR,
128 NE10_FFT_STATE_TYPE_T_STR, mode->mdct.kfft[k]->nfft);
129 fprintf(file, "%d,\n", cfg->nfft);
130 fprintf(file, "(ne10_int32_t *)ne10_factors_%d,\n", mode->mdct.kfft[k]->nfft);
131 fprintf(file, "(%s *)ne10_twiddles_%d,\n",
132 NE10_FFT_CPX_TYPE_T_STR, mode->mdct.kfft[k]->nfft);
133 fprintf(file, "NULL,\n"); /* buffer */
134 fprintf(file, "(%s *)&ne10_twiddles_%d[%d],\n",
135 NE10_FFT_CPX_TYPE_T_STR, mode->mdct.kfft[k]->nfft, cfg->nfft);
136#if !defined(FIXED_POINT)
137 fprintf(file, "/* is_forward_scaled = true */\n");
138 fprintf(file, "(ne10_int32_t) 1,\n");
139 fprintf(file, "/* is_backward_scaled = false */\n");
140 fprintf(file, "(ne10_int32_t) 0,\n");
141#endif
142 fprintf(file, "};\n");
143
144 fprintf(file, "static const arch_fft_state cfg_arch_%d = {\n",
145 mode->mdct.kfft[k]->nfft);
146 fprintf(file, "1,\n");
147 fprintf(file, "(void *)&%s_%d,\n",
148 NE10_FFT_STATE_TYPE_T_STR, mode->mdct.kfft[k]->nfft);
149 fprintf(file, "};\n\n");
150 }
151 fprintf(file, "#endif /* end NE10_FFT_PARAMS%d_%d */\n", mode->Fs, mdctSize);
152}
diff --git a/lib/rbcodec/codecs/libopus/celt/entcode.c b/lib/rbcodec/codecs/libopus/celt/entcode.c
index 461a36dd55..70f32016ec 100644
--- a/lib/rbcodec/codecs/libopus/celt/entcode.c
+++ b/lib/rbcodec/codecs/libopus/celt/entcode.c
@@ -116,7 +116,7 @@ opus_uint32 ec_tell_frac(ec_ctx *_this){
116 116
117#ifdef USE_SMALL_DIV_TABLE 117#ifdef USE_SMALL_DIV_TABLE
118/* Result of 2^32/(2*i+1), except for i=0. */ 118/* Result of 2^32/(2*i+1), except for i=0. */
119const opus_uint32 SMALL_DIV_TABLE[129] ICONST_ATTR = { 119const opus_uint32 SMALL_DIV_TABLE[129] = {
120 0xFFFFFFFF, 0x55555555, 0x33333333, 0x24924924, 120 0xFFFFFFFF, 0x55555555, 0x33333333, 0x24924924,
121 0x1C71C71C, 0x1745D174, 0x13B13B13, 0x11111111, 121 0x1C71C71C, 0x1745D174, 0x13B13B13, 0x11111111,
122 0x0F0F0F0F, 0x0D79435E, 0x0C30C30C, 0x0B21642C, 122 0x0F0F0F0F, 0x0D79435E, 0x0C30C30C, 0x0B21642C,
diff --git a/lib/rbcodec/codecs/libopus/celt/entcode.h b/lib/rbcodec/codecs/libopus/celt/entcode.h
index 13d6c84ef0..3763e3f284 100644
--- a/lib/rbcodec/codecs/libopus/celt/entcode.h
+++ b/lib/rbcodec/codecs/libopus/celt/entcode.h
@@ -122,7 +122,7 @@ opus_uint32 ec_tell_frac(ec_ctx *_this);
122 122
123/* Tested exhaustively for all n and for 1<=d<=256 */ 123/* Tested exhaustively for all n and for 1<=d<=256 */
124static OPUS_INLINE opus_uint32 celt_udiv(opus_uint32 n, opus_uint32 d) { 124static OPUS_INLINE opus_uint32 celt_udiv(opus_uint32 n, opus_uint32 d) {
125 celt_assert(d>0); 125 celt_sig_assert(d>0);
126#ifdef USE_SMALL_DIV_TABLE 126#ifdef USE_SMALL_DIV_TABLE
127 if (d>256) 127 if (d>256)
128 return n/d; 128 return n/d;
@@ -138,7 +138,7 @@ static OPUS_INLINE opus_uint32 celt_udiv(opus_uint32 n, opus_uint32 d) {
138} 138}
139 139
140static OPUS_INLINE opus_int32 celt_sudiv(opus_int32 n, opus_int32 d) { 140static OPUS_INLINE opus_int32 celt_sudiv(opus_int32 n, opus_int32 d) {
141 celt_assert(d>0); 141 celt_sig_assert(d>0);
142#ifdef USE_SMALL_DIV_TABLE 142#ifdef USE_SMALL_DIV_TABLE
143 if (n<0) 143 if (n<0)
144 return -(opus_int32)celt_udiv(-n, d); 144 return -(opus_int32)celt_udiv(-n, d);
diff --git a/lib/rbcodec/codecs/libopus/celt/entdec.h b/lib/rbcodec/codecs/libopus/celt/entdec.h
index d8ab318730..025fc1870d 100644
--- a/lib/rbcodec/codecs/libopus/celt/entdec.h
+++ b/lib/rbcodec/codecs/libopus/celt/entdec.h
@@ -85,7 +85,7 @@ int ec_dec_icdf(ec_dec *_this,const unsigned char *_icdf,unsigned _ftb);
85 The bits must have been encoded with ec_enc_uint(). 85 The bits must have been encoded with ec_enc_uint().
86 No call to ec_dec_update() is necessary after this call. 86 No call to ec_dec_update() is necessary after this call.
87 _ft: The number of integers that can be decoded (one more than the max). 87 _ft: The number of integers that can be decoded (one more than the max).
88 This must be at least one, and no more than 2**32-1. 88 This must be at least 2, and no more than 2**32-1.
89 Return: The decoded bits.*/ 89 Return: The decoded bits.*/
90opus_uint32 ec_dec_uint(ec_dec *_this,opus_uint32 _ft); 90opus_uint32 ec_dec_uint(ec_dec *_this,opus_uint32 _ft);
91 91
diff --git a/lib/rbcodec/codecs/libopus/celt/entenc.c b/lib/rbcodec/codecs/libopus/celt/entenc.c
index 271e4d30c5..f1750d25b8 100644
--- a/lib/rbcodec/codecs/libopus/celt/entenc.c
+++ b/lib/rbcodec/codecs/libopus/celt/entenc.c
@@ -98,7 +98,7 @@ static void ec_enc_carry_out(ec_enc *_this,int _c){
98 else _this->ext++; 98 else _this->ext++;
99} 99}
100 100
101static void ec_enc_normalize(ec_enc *_this){ 101static OPUS_INLINE void ec_enc_normalize(ec_enc *_this){
102 /*If the range is too small, output some bits and rescale it.*/ 102 /*If the range is too small, output some bits and rescale it.*/
103 while(_this->rng<=EC_CODE_BOT){ 103 while(_this->rng<=EC_CODE_BOT){
104 ec_enc_carry_out(_this,(int)(_this->val>>EC_CODE_SHIFT)); 104 ec_enc_carry_out(_this,(int)(_this->val>>EC_CODE_SHIFT));
diff --git a/lib/rbcodec/codecs/libopus/celt/entenc.h b/lib/rbcodec/codecs/libopus/celt/entenc.h
index 796bc4d572..f502eaf662 100644
--- a/lib/rbcodec/codecs/libopus/celt/entenc.h
+++ b/lib/rbcodec/codecs/libopus/celt/entenc.h
@@ -67,7 +67,7 @@ void ec_enc_icdf(ec_enc *_this,int _s,const unsigned char *_icdf,unsigned _ftb);
67/*Encodes a raw unsigned integer in the stream. 67/*Encodes a raw unsigned integer in the stream.
68 _fl: The integer to encode. 68 _fl: The integer to encode.
69 _ft: The number of integers that can be encoded (one more than the max). 69 _ft: The number of integers that can be encoded (one more than the max).
70 This must be at least one, and no more than 2**32-1.*/ 70 This must be at least 2, and no more than 2**32-1.*/
71void ec_enc_uint(ec_enc *_this,opus_uint32 _fl,opus_uint32 _ft); 71void ec_enc_uint(ec_enc *_this,opus_uint32 _fl,opus_uint32 _ft);
72 72
73/*Encodes a sequence of raw bits in the stream. 73/*Encodes a sequence of raw bits in the stream.
diff --git a/lib/rbcodec/codecs/libopus/celt/fixed_c5x.h b/lib/rbcodec/codecs/libopus/celt/fixed_c5x.h
new file mode 100644
index 0000000000..ea95a998c3
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/fixed_c5x.h
@@ -0,0 +1,79 @@
1/* Copyright (C) 2003 Jean-Marc Valin */
2/**
3 @file fixed_c5x.h
4 @brief Fixed-point operations for the TI C5x DSP family
5*/
6/*
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions
9 are met:
10
11 - Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
13
14 - Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
17
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
22 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31#ifndef FIXED_C5X_H
32#define FIXED_C5X_H
33
34#include "dsplib.h"
35
36#undef IMUL32
37static OPUS_INLINE long IMUL32(long i, long j)
38{
39 long ac0, ac1;
40 ac0 = _lmpy(i>>16,j);
41 ac1 = ac0 + _lmpy(i,j>>16);
42 return _lmpyu(i,j) + (ac1<<16);
43}
44
45#undef MAX16
46#define MAX16(a,b) _max(a,b)
47
48#undef MIN16
49#define MIN16(a,b) _min(a,b)
50
51#undef MAX32
52#define MAX32(a,b) _lmax(a,b)
53
54#undef MIN32
55#define MIN32(a,b) _lmin(a,b)
56
57#undef VSHR32
58#define VSHR32(a, shift) _lshl(a,-(shift))
59
60#undef MULT16_16_Q15
61#define MULT16_16_Q15(a,b) (_smpy(a,b))
62
63#undef MULT16_16SU
64#define MULT16_16SU(a,b) _lmpysu(a,b)
65
66#undef MULT_16_16
67#define MULT_16_16(a,b) _lmpy(a,b)
68
69/* FIXME: This is technically incorrect and is bound to cause problems. Is there any cleaner solution? */
70#undef MULT16_32_Q15
71#define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),(b)),15))
72
73#define celt_ilog2(x) (30 - _lnorm(x))
74#define OVERRIDE_CELT_ILOG2
75
76#define celt_maxabs16(x, len) MAX32(EXTEND32(maxval((DATA *)x, len)),-EXTEND32(minval((DATA *)x, len)))
77#define OVERRIDE_CELT_MAXABS16
78
79#endif /* FIXED_C5X_H */
diff --git a/lib/rbcodec/codecs/libopus/celt/fixed_c6x.h b/lib/rbcodec/codecs/libopus/celt/fixed_c6x.h
new file mode 100644
index 0000000000..bb6ad92780
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/fixed_c6x.h
@@ -0,0 +1,70 @@
1/* Copyright (C) 2008 CSIRO */
2/**
3 @file fixed_c6x.h
4 @brief Fixed-point operations for the TI C6x DSP family
5*/
6/*
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions
9 are met:
10
11 - Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
13
14 - Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
17
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
22 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31#ifndef FIXED_C6X_H
32#define FIXED_C6X_H
33
34#undef MULT16_16SU
35#define MULT16_16SU(a,b) _mpysu(a,b)
36
37#undef MULT_16_16
38#define MULT_16_16(a,b) _mpy(a,b)
39
40#define celt_ilog2(x) (30 - _norm(x))
41#define OVERRIDE_CELT_ILOG2
42
43#undef MULT16_32_Q15
44#define MULT16_32_Q15(a,b) (_mpylill(a, b) >> 15)
45
46#if 0
47#include "dsplib.h"
48
49#undef MAX16
50#define MAX16(a,b) _max(a,b)
51
52#undef MIN16
53#define MIN16(a,b) _min(a,b)
54
55#undef MAX32
56#define MAX32(a,b) _lmax(a,b)
57
58#undef MIN32
59#define MIN32(a,b) _lmin(a,b)
60
61#undef VSHR32
62#define VSHR32(a, shift) _lshl(a,-(shift))
63
64#undef MULT16_16_Q15
65#define MULT16_16_Q15(a,b) (_smpy(a,b))
66
67#define celt_maxabs16(x, len) MAX32(EXTEND32(maxval((DATA *)x, len)),-EXTEND32(minval((DATA *)x, len)))
68#define OVERRIDE_CELT_MAXABS16
69
70#endif /* FIXED_C6X_H */
diff --git a/lib/rbcodec/codecs/libopus/celt/fixed_debug.h b/lib/rbcodec/codecs/libopus/celt/fixed_debug.h
new file mode 100644
index 0000000000..f435295234
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/fixed_debug.h
@@ -0,0 +1,791 @@
1/* Copyright (C) 2003-2008 Jean-Marc Valin
2 Copyright (C) 2007-2012 Xiph.Org Foundation */
3/**
4 @file fixed_debug.h
5 @brief Fixed-point operations with debugging
6*/
7/*
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions
10 are met:
11
12 - Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
14
15 - Redistributions in binary form must reproduce the above copyright
16 notice, this list of conditions and the following disclaimer in the
17 documentation and/or other materials provided with the distribution.
18
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
23 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30*/
31
32#ifndef FIXED_DEBUG_H
33#define FIXED_DEBUG_H
34
35#include <stdio.h>
36#include "opus_defines.h"
37
38#ifdef CELT_C
39OPUS_EXPORT opus_int64 celt_mips=0;
40#else
41extern opus_int64 celt_mips;
42#endif
43
44#define MULT16_16SU(a,b) ((opus_val32)(opus_val16)(a)*(opus_val32)(opus_uint16)(b))
45#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL32(MULT16_16(SHR32((a),16),SHR((b),16)),1), SHR32(MULT16_16SU(SHR32((a),16),((b)&0x0000ffff)),15)), SHR32(MULT16_16SU(SHR32((b),16),((a)&0x0000ffff)),15))
46
47/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */
48#define MULT16_32_Q16(a,b) ADD32(MULT16_16((a),SHR32((b),16)), SHR32(MULT16_16SU((a),((b)&0x0000ffff)),16))
49
50#define MULT16_32_P16(a,b) MULT16_32_PX(a,b,16)
51
52#define QCONST16(x,bits) ((opus_val16)(.5+(x)*(((opus_val32)1)<<(bits))))
53#define QCONST32(x,bits) ((opus_val32)(.5+(x)*(((opus_val32)1)<<(bits))))
54
55#define VERIFY_SHORT(x) ((x)<=32767&&(x)>=-32768)
56#define VERIFY_INT(x) ((x)<=2147483647LL&&(x)>=-2147483648LL)
57#define VERIFY_UINT(x) ((x)<=(2147483647LLU<<1))
58
59#define SHR(a,b) SHR32(a,b)
60#define PSHR(a,b) PSHR32(a,b)
61
62/** Add two 32-bit values, ignore any overflows */
63#define ADD32_ovflw(a,b) (celt_mips+=2,(opus_val32)((opus_uint32)(a)+(opus_uint32)(b)))
64/** Subtract two 32-bit values, ignore any overflows */
65#define SUB32_ovflw(a,b) (celt_mips+=2,(opus_val32)((opus_uint32)(a)-(opus_uint32)(b)))
66/* Avoid MSVC warning C4146: unary minus operator applied to unsigned type */
67/** Negate 32-bit value, ignore any overflows */
68#define NEG32_ovflw(a) (celt_mips+=2,(opus_val32)(0-(opus_uint32)(a)))
69
70static OPUS_INLINE short NEG16(int x)
71{
72 int res;
73 if (!VERIFY_SHORT(x))
74 {
75 fprintf (stderr, "NEG16: input is not short: %d\n", (int)x);
76#ifdef FIXED_DEBUG_ASSERT
77 celt_assert(0);
78#endif
79 }
80 res = -x;
81 if (!VERIFY_SHORT(res))
82 {
83 fprintf (stderr, "NEG16: output is not short: %d\n", (int)res);
84#ifdef FIXED_DEBUG_ASSERT
85 celt_assert(0);
86#endif
87 }
88 celt_mips++;
89 return res;
90}
91static OPUS_INLINE int NEG32(opus_int64 x)
92{
93 opus_int64 res;
94 if (!VERIFY_INT(x))
95 {
96 fprintf (stderr, "NEG16: input is not int: %d\n", (int)x);
97#ifdef FIXED_DEBUG_ASSERT
98 celt_assert(0);
99#endif
100 }
101 res = -x;
102 if (!VERIFY_INT(res))
103 {
104 fprintf (stderr, "NEG16: output is not int: %d\n", (int)res);
105#ifdef FIXED_DEBUG_ASSERT
106 celt_assert(0);
107#endif
108 }
109 celt_mips+=2;
110 return res;
111}
112
113#define EXTRACT16(x) EXTRACT16_(x, __FILE__, __LINE__)
114static OPUS_INLINE short EXTRACT16_(int x, char *file, int line)
115{
116 int res;
117 if (!VERIFY_SHORT(x))
118 {
119 fprintf (stderr, "EXTRACT16: input is not short: %d in %s: line %d\n", x, file, line);
120#ifdef FIXED_DEBUG_ASSERT
121 celt_assert(0);
122#endif
123 }
124 res = x;
125 celt_mips++;
126 return res;
127}
128
129#define EXTEND32(x) EXTEND32_(x, __FILE__, __LINE__)
130static OPUS_INLINE int EXTEND32_(int x, char *file, int line)
131{
132 int res;
133 if (!VERIFY_SHORT(x))
134 {
135 fprintf (stderr, "EXTEND32: input is not short: %d in %s: line %d\n", x, file, line);
136#ifdef FIXED_DEBUG_ASSERT
137 celt_assert(0);
138#endif
139 }
140 res = x;
141 celt_mips++;
142 return res;
143}
144
145#define SHR16(a, shift) SHR16_(a, shift, __FILE__, __LINE__)
146static OPUS_INLINE short SHR16_(int a, int shift, char *file, int line)
147{
148 int res;
149 if (!VERIFY_SHORT(a) || !VERIFY_SHORT(shift))
150 {
151 fprintf (stderr, "SHR16: inputs are not short: %d >> %d in %s: line %d\n", a, shift, file, line);
152#ifdef FIXED_DEBUG_ASSERT
153 celt_assert(0);
154#endif
155 }
156 res = a>>shift;
157 if (!VERIFY_SHORT(res))
158 {
159 fprintf (stderr, "SHR16: output is not short: %d in %s: line %d\n", res, file, line);
160#ifdef FIXED_DEBUG_ASSERT
161 celt_assert(0);
162#endif
163 }
164 celt_mips++;
165 return res;
166}
167#define SHL16(a, shift) SHL16_(a, shift, __FILE__, __LINE__)
168static OPUS_INLINE short SHL16_(int a, int shift, char *file, int line)
169{
170 int res;
171 if (!VERIFY_SHORT(a) || !VERIFY_SHORT(shift))
172 {
173 fprintf (stderr, "SHL16: inputs are not short: %d %d in %s: line %d\n", a, shift, file, line);
174#ifdef FIXED_DEBUG_ASSERT
175 celt_assert(0);
176#endif
177 }
178 res = a<<shift;
179 if (!VERIFY_SHORT(res))
180 {
181 fprintf (stderr, "SHL16: output is not short: %d in %s: line %d\n", res, file, line);
182#ifdef FIXED_DEBUG_ASSERT
183 celt_assert(0);
184#endif
185 }
186 celt_mips++;
187 return res;
188}
189
190static OPUS_INLINE int SHR32(opus_int64 a, int shift)
191{
192 opus_int64 res;
193 if (!VERIFY_INT(a) || !VERIFY_SHORT(shift))
194 {
195 fprintf (stderr, "SHR32: inputs are not int: %d %d\n", (int)a, shift);
196#ifdef FIXED_DEBUG_ASSERT
197 celt_assert(0);
198#endif
199 }
200 res = a>>shift;
201 if (!VERIFY_INT(res))
202 {
203 fprintf (stderr, "SHR32: output is not int: %d\n", (int)res);
204#ifdef FIXED_DEBUG_ASSERT
205 celt_assert(0);
206#endif
207 }
208 celt_mips+=2;
209 return res;
210}
211#define SHL32(a, shift) SHL32_(a, shift, __FILE__, __LINE__)
212static OPUS_INLINE int SHL32_(opus_int64 a, int shift, char *file, int line)
213{
214 opus_int64 res;
215 if (!VERIFY_INT(a) || !VERIFY_SHORT(shift))
216 {
217 fprintf (stderr, "SHL32: inputs are not int: %lld %d in %s: line %d\n", a, shift, file, line);
218#ifdef FIXED_DEBUG_ASSERT
219 celt_assert(0);
220#endif
221 }
222 res = a<<shift;
223 if (!VERIFY_INT(res))
224 {
225 fprintf (stderr, "SHL32: output is not int: %lld<<%d = %lld in %s: line %d\n", a, shift, res, file, line);
226#ifdef FIXED_DEBUG_ASSERT
227 celt_assert(0);
228#endif
229 }
230 celt_mips+=2;
231 return res;
232}
233
234#define PSHR32(a,shift) (celt_mips--,SHR32(ADD32((a),(((opus_val32)(1)<<((shift))>>1))),shift))
235#define VSHR32(a, shift) (((shift)>0) ? SHR32(a, shift) : SHL32(a, -(shift)))
236
237#define ROUND16(x,a) (celt_mips--,EXTRACT16(PSHR32((x),(a))))
238#define SROUND16(x,a) (celt_mips--,EXTRACT16(SATURATE(PSHR32(x,a), 32767)));
239
240#define HALF16(x) (SHR16(x,1))
241#define HALF32(x) (SHR32(x,1))
242
243#define ADD16(a, b) ADD16_(a, b, __FILE__, __LINE__)
244static OPUS_INLINE short ADD16_(int a, int b, char *file, int line)
245{
246 int res;
247 if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
248 {
249 fprintf (stderr, "ADD16: inputs are not short: %d %d in %s: line %d\n", a, b, file, line);
250#ifdef FIXED_DEBUG_ASSERT
251 celt_assert(0);
252#endif
253 }
254 res = a+b;
255 if (!VERIFY_SHORT(res))
256 {
257 fprintf (stderr, "ADD16: output is not short: %d+%d=%d in %s: line %d\n", a,b,res, file, line);
258#ifdef FIXED_DEBUG_ASSERT
259 celt_assert(0);
260#endif
261 }
262 celt_mips++;
263 return res;
264}
265
266#define SUB16(a, b) SUB16_(a, b, __FILE__, __LINE__)
267static OPUS_INLINE short SUB16_(int a, int b, char *file, int line)
268{
269 int res;
270 if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
271 {
272 fprintf (stderr, "SUB16: inputs are not short: %d %d in %s: line %d\n", a, b, file, line);
273#ifdef FIXED_DEBUG_ASSERT
274 celt_assert(0);
275#endif
276 }
277 res = a-b;
278 if (!VERIFY_SHORT(res))
279 {
280 fprintf (stderr, "SUB16: output is not short: %d in %s: line %d\n", res, file, line);
281#ifdef FIXED_DEBUG_ASSERT
282 celt_assert(0);
283#endif
284 }
285 celt_mips++;
286 return res;
287}
288
289#define ADD32(a, b) ADD32_(a, b, __FILE__, __LINE__)
290static OPUS_INLINE int ADD32_(opus_int64 a, opus_int64 b, char *file, int line)
291{
292 opus_int64 res;
293 if (!VERIFY_INT(a) || !VERIFY_INT(b))
294 {
295 fprintf (stderr, "ADD32: inputs are not int: %d %d in %s: line %d\n", (int)a, (int)b, file, line);
296#ifdef FIXED_DEBUG_ASSERT
297 celt_assert(0);
298#endif
299 }
300 res = a+b;
301 if (!VERIFY_INT(res))
302 {
303 fprintf (stderr, "ADD32: output is not int: %d in %s: line %d\n", (int)res, file, line);
304#ifdef FIXED_DEBUG_ASSERT
305 celt_assert(0);
306#endif
307 }
308 celt_mips+=2;
309 return res;
310}
311
312#define SUB32(a, b) SUB32_(a, b, __FILE__, __LINE__)
313static OPUS_INLINE int SUB32_(opus_int64 a, opus_int64 b, char *file, int line)
314{
315 opus_int64 res;
316 if (!VERIFY_INT(a) || !VERIFY_INT(b))
317 {
318 fprintf (stderr, "SUB32: inputs are not int: %d %d in %s: line %d\n", (int)a, (int)b, file, line);
319#ifdef FIXED_DEBUG_ASSERT
320 celt_assert(0);
321#endif
322 }
323 res = a-b;
324 if (!VERIFY_INT(res))
325 {
326 fprintf (stderr, "SUB32: output is not int: %d in %s: line %d\n", (int)res, file, line);
327#ifdef FIXED_DEBUG_ASSERT
328 celt_assert(0);
329#endif
330 }
331 celt_mips+=2;
332 return res;
333}
334
335#undef UADD32
336#define UADD32(a, b) UADD32_(a, b, __FILE__, __LINE__)
337static OPUS_INLINE unsigned int UADD32_(opus_uint64 a, opus_uint64 b, char *file, int line)
338{
339 opus_uint64 res;
340 if (!VERIFY_UINT(a) || !VERIFY_UINT(b))
341 {
342 fprintf (stderr, "UADD32: inputs are not uint32: %llu %llu in %s: line %d\n", a, b, file, line);
343#ifdef FIXED_DEBUG_ASSERT
344 celt_assert(0);
345#endif
346 }
347 res = a+b;
348 if (!VERIFY_UINT(res))
349 {
350 fprintf (stderr, "UADD32: output is not uint32: %llu in %s: line %d\n", res, file, line);
351#ifdef FIXED_DEBUG_ASSERT
352 celt_assert(0);
353#endif
354 }
355 celt_mips+=2;
356 return res;
357}
358
359#undef USUB32
360#define USUB32(a, b) USUB32_(a, b, __FILE__, __LINE__)
361static OPUS_INLINE unsigned int USUB32_(opus_uint64 a, opus_uint64 b, char *file, int line)
362{
363 opus_uint64 res;
364 if (!VERIFY_UINT(a) || !VERIFY_UINT(b))
365 {
366 fprintf (stderr, "USUB32: inputs are not uint32: %llu %llu in %s: line %d\n", a, b, file, line);
367#ifdef FIXED_DEBUG_ASSERT
368 celt_assert(0);
369#endif
370 }
371 if (a<b)
372 {
373 fprintf (stderr, "USUB32: inputs underflow: %llu < %llu in %s: line %d\n", a, b, file, line);
374#ifdef FIXED_DEBUG_ASSERT
375 celt_assert(0);
376#endif
377 }
378 res = a-b;
379 if (!VERIFY_UINT(res))
380 {
381 fprintf (stderr, "USUB32: output is not uint32: %llu - %llu = %llu in %s: line %d\n", a, b, res, file, line);
382#ifdef FIXED_DEBUG_ASSERT
383 celt_assert(0);
384#endif
385 }
386 celt_mips+=2;
387 return res;
388}
389
390/* result fits in 16 bits */
391static OPUS_INLINE short MULT16_16_16(int a, int b)
392{
393 int res;
394 if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
395 {
396 fprintf (stderr, "MULT16_16_16: inputs are not short: %d %d\n", a, b);
397#ifdef FIXED_DEBUG_ASSERT
398 celt_assert(0);
399#endif
400 }
401 res = a*b;
402 if (!VERIFY_SHORT(res))
403 {
404 fprintf (stderr, "MULT16_16_16: output is not short: %d\n", res);
405#ifdef FIXED_DEBUG_ASSERT
406 celt_assert(0);
407#endif
408 }
409 celt_mips++;
410 return res;
411}
412
413#define MULT16_16(a, b) MULT16_16_(a, b, __FILE__, __LINE__)
414static OPUS_INLINE int MULT16_16_(int a, int b, char *file, int line)
415{
416 opus_int64 res;
417 if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
418 {
419 fprintf (stderr, "MULT16_16: inputs are not short: %d %d in %s: line %d\n", a, b, file, line);
420#ifdef FIXED_DEBUG_ASSERT
421 celt_assert(0);
422#endif
423 }
424 res = ((opus_int64)a)*b;
425 if (!VERIFY_INT(res))
426 {
427 fprintf (stderr, "MULT16_16: output is not int: %d in %s: line %d\n", (int)res, file, line);
428#ifdef FIXED_DEBUG_ASSERT
429 celt_assert(0);
430#endif
431 }
432 celt_mips++;
433 return res;
434}
435
436#define MAC16_16(c,a,b) (celt_mips-=2,ADD32((c),MULT16_16((a),(b))))
437
438#define MULT16_32_QX(a, b, Q) MULT16_32_QX_(a, b, Q, __FILE__, __LINE__)
439static OPUS_INLINE int MULT16_32_QX_(int a, opus_int64 b, int Q, char *file, int line)
440{
441 opus_int64 res;
442 if (!VERIFY_SHORT(a) || !VERIFY_INT(b))
443 {
444 fprintf (stderr, "MULT16_32_Q%d: inputs are not short+int: %d %d in %s: line %d\n", Q, (int)a, (int)b, file, line);
445#ifdef FIXED_DEBUG_ASSERT
446 celt_assert(0);
447#endif
448 }
449 if (ABS32(b)>=((opus_val32)(1)<<(15+Q)))
450 {
451 fprintf (stderr, "MULT16_32_Q%d: second operand too large: %d %d in %s: line %d\n", Q, (int)a, (int)b, file, line);
452#ifdef FIXED_DEBUG_ASSERT
453 celt_assert(0);
454#endif
455 }
456 res = (((opus_int64)a)*(opus_int64)b) >> Q;
457 if (!VERIFY_INT(res))
458 {
459 fprintf (stderr, "MULT16_32_Q%d: output is not int: %d*%d=%d in %s: line %d\n", Q, (int)a, (int)b,(int)res, file, line);
460#ifdef FIXED_DEBUG_ASSERT
461 celt_assert(0);
462#endif
463 }
464 if (Q==15)
465 celt_mips+=3;
466 else
467 celt_mips+=4;
468 return res;
469}
470
471#define MULT16_32_PX(a, b, Q) MULT16_32_PX_(a, b, Q, __FILE__, __LINE__)
472static OPUS_INLINE int MULT16_32_PX_(int a, opus_int64 b, int Q, char *file, int line)
473{
474 opus_int64 res;
475 if (!VERIFY_SHORT(a) || !VERIFY_INT(b))
476 {
477 fprintf (stderr, "MULT16_32_P%d: inputs are not short+int: %d %d in %s: line %d\n\n", Q, (int)a, (int)b, file, line);
478#ifdef FIXED_DEBUG_ASSERT
479 celt_assert(0);
480#endif
481 }
482 if (ABS32(b)>=((opus_int64)(1)<<(15+Q)))
483 {
484 fprintf (stderr, "MULT16_32_Q%d: second operand too large: %d %d in %s: line %d\n\n", Q, (int)a, (int)b,file, line);
485#ifdef FIXED_DEBUG_ASSERT
486 celt_assert(0);
487#endif
488 }
489 res = ((((opus_int64)a)*(opus_int64)b) + (((opus_val32)(1)<<Q)>>1))>> Q;
490 if (!VERIFY_INT(res))
491 {
492 fprintf (stderr, "MULT16_32_P%d: output is not int: %d*%d=%d in %s: line %d\n\n", Q, (int)a, (int)b,(int)res, file, line);
493#ifdef FIXED_DEBUG_ASSERT
494 celt_assert(0);
495#endif
496 }
497 if (Q==15)
498 celt_mips+=4;
499 else
500 celt_mips+=5;
501 return res;
502}
503
504#define MULT16_32_Q15(a,b) MULT16_32_QX(a,b,15)
505#define MAC16_32_Q15(c,a,b) (celt_mips-=2,ADD32((c),MULT16_32_Q15((a),(b))))
506#define MAC16_32_Q16(c,a,b) (celt_mips-=2,ADD32((c),MULT16_32_Q16((a),(b))))
507
508static OPUS_INLINE int SATURATE(int a, int b)
509{
510 if (a>b)
511 a=b;
512 if (a<-b)
513 a = -b;
514 celt_mips+=3;
515 return a;
516}
517
518static OPUS_INLINE opus_int16 SATURATE16(opus_int32 a)
519{
520 celt_mips+=3;
521 if (a>32767)
522 return 32767;
523 else if (a<-32768)
524 return -32768;
525 else return a;
526}
527
528static OPUS_INLINE int MULT16_16_Q11_32(int a, int b)
529{
530 opus_int64 res;
531 if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
532 {
533 fprintf (stderr, "MULT16_16_Q11: inputs are not short: %d %d\n", a, b);
534#ifdef FIXED_DEBUG_ASSERT
535 celt_assert(0);
536#endif
537 }
538 res = ((opus_int64)a)*b;
539 res >>= 11;
540 if (!VERIFY_INT(res))
541 {
542 fprintf (stderr, "MULT16_16_Q11: output is not short: %d*%d=%d\n", (int)a, (int)b, (int)res);
543#ifdef FIXED_DEBUG_ASSERT
544 celt_assert(0);
545#endif
546 }
547 celt_mips+=3;
548 return res;
549}
550static OPUS_INLINE short MULT16_16_Q13(int a, int b)
551{
552 opus_int64 res;
553 if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
554 {
555 fprintf (stderr, "MULT16_16_Q13: inputs are not short: %d %d\n", a, b);
556#ifdef FIXED_DEBUG_ASSERT
557 celt_assert(0);
558#endif
559 }
560 res = ((opus_int64)a)*b;
561 res >>= 13;
562 if (!VERIFY_SHORT(res))
563 {
564 fprintf (stderr, "MULT16_16_Q13: output is not short: %d*%d=%d\n", a, b, (int)res);
565#ifdef FIXED_DEBUG_ASSERT
566 celt_assert(0);
567#endif
568 }
569 celt_mips+=3;
570 return res;
571}
572static OPUS_INLINE short MULT16_16_Q14(int a, int b)
573{
574 opus_int64 res;
575 if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
576 {
577 fprintf (stderr, "MULT16_16_Q14: inputs are not short: %d %d\n", a, b);
578#ifdef FIXED_DEBUG_ASSERT
579 celt_assert(0);
580#endif
581 }
582 res = ((opus_int64)a)*b;
583 res >>= 14;
584 if (!VERIFY_SHORT(res))
585 {
586 fprintf (stderr, "MULT16_16_Q14: output is not short: %d\n", (int)res);
587#ifdef FIXED_DEBUG_ASSERT
588 celt_assert(0);
589#endif
590 }
591 celt_mips+=3;
592 return res;
593}
594
595#define MULT16_16_Q15(a, b) MULT16_16_Q15_(a, b, __FILE__, __LINE__)
596static OPUS_INLINE short MULT16_16_Q15_(int a, int b, char *file, int line)
597{
598 opus_int64 res;
599 if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
600 {
601 fprintf (stderr, "MULT16_16_Q15: inputs are not short: %d %d in %s: line %d\n", a, b, file, line);
602#ifdef FIXED_DEBUG_ASSERT
603 celt_assert(0);
604#endif
605 }
606 res = ((opus_int64)a)*b;
607 res >>= 15;
608 if (!VERIFY_SHORT(res))
609 {
610 fprintf (stderr, "MULT16_16_Q15: output is not short: %d in %s: line %d\n", (int)res, file, line);
611#ifdef FIXED_DEBUG_ASSERT
612 celt_assert(0);
613#endif
614 }
615 celt_mips+=1;
616 return res;
617}
618
619static OPUS_INLINE short MULT16_16_P13(int a, int b)
620{
621 opus_int64 res;
622 if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
623 {
624 fprintf (stderr, "MULT16_16_P13: inputs are not short: %d %d\n", a, b);
625#ifdef FIXED_DEBUG_ASSERT
626 celt_assert(0);
627#endif
628 }
629 res = ((opus_int64)a)*b;
630 res += 4096;
631 if (!VERIFY_INT(res))
632 {
633 fprintf (stderr, "MULT16_16_P13: overflow: %d*%d=%d\n", a, b, (int)res);
634#ifdef FIXED_DEBUG_ASSERT
635 celt_assert(0);
636#endif
637 }
638 res >>= 13;
639 if (!VERIFY_SHORT(res))
640 {
641 fprintf (stderr, "MULT16_16_P13: output is not short: %d*%d=%d\n", a, b, (int)res);
642#ifdef FIXED_DEBUG_ASSERT
643 celt_assert(0);
644#endif
645 }
646 celt_mips+=4;
647 return res;
648}
649static OPUS_INLINE short MULT16_16_P14(int a, int b)
650{
651 opus_int64 res;
652 if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
653 {
654 fprintf (stderr, "MULT16_16_P14: inputs are not short: %d %d\n", a, b);
655#ifdef FIXED_DEBUG_ASSERT
656 celt_assert(0);
657#endif
658 }
659 res = ((opus_int64)a)*b;
660 res += 8192;
661 if (!VERIFY_INT(res))
662 {
663 fprintf (stderr, "MULT16_16_P14: overflow: %d*%d=%d\n", a, b, (int)res);
664#ifdef FIXED_DEBUG_ASSERT
665 celt_assert(0);
666#endif
667 }
668 res >>= 14;
669 if (!VERIFY_SHORT(res))
670 {
671 fprintf (stderr, "MULT16_16_P14: output is not short: %d*%d=%d\n", a, b, (int)res);
672#ifdef FIXED_DEBUG_ASSERT
673 celt_assert(0);
674#endif
675 }
676 celt_mips+=4;
677 return res;
678}
679static OPUS_INLINE short MULT16_16_P15(int a, int b)
680{
681 opus_int64 res;
682 if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b))
683 {
684 fprintf (stderr, "MULT16_16_P15: inputs are not short: %d %d\n", a, b);
685#ifdef FIXED_DEBUG_ASSERT
686 celt_assert(0);
687#endif
688 }
689 res = ((opus_int64)a)*b;
690 res += 16384;
691 if (!VERIFY_INT(res))
692 {
693 fprintf (stderr, "MULT16_16_P15: overflow: %d*%d=%d\n", a, b, (int)res);
694#ifdef FIXED_DEBUG_ASSERT
695 celt_assert(0);
696#endif
697 }
698 res >>= 15;
699 if (!VERIFY_SHORT(res))
700 {
701 fprintf (stderr, "MULT16_16_P15: output is not short: %d*%d=%d\n", a, b, (int)res);
702#ifdef FIXED_DEBUG_ASSERT
703 celt_assert(0);
704#endif
705 }
706 celt_mips+=2;
707 return res;
708}
709
710#define DIV32_16(a, b) DIV32_16_(a, b, __FILE__, __LINE__)
711
712static OPUS_INLINE int DIV32_16_(opus_int64 a, opus_int64 b, char *file, int line)
713{
714 opus_int64 res;
715 if (b==0)
716 {
717 fprintf(stderr, "DIV32_16: divide by zero: %d/%d in %s: line %d\n", (int)a, (int)b, file, line);
718#ifdef FIXED_DEBUG_ASSERT
719 celt_assert(0);
720#endif
721 return 0;
722 }
723 if (!VERIFY_INT(a) || !VERIFY_SHORT(b))
724 {
725 fprintf (stderr, "DIV32_16: inputs are not int/short: %d %d in %s: line %d\n", (int)a, (int)b, file, line);
726#ifdef FIXED_DEBUG_ASSERT
727 celt_assert(0);
728#endif
729 }
730 res = a/b;
731 if (!VERIFY_SHORT(res))
732 {
733 fprintf (stderr, "DIV32_16: output is not short: %d / %d = %d in %s: line %d\n", (int)a,(int)b,(int)res, file, line);
734 if (res>32767)
735 res = 32767;
736 if (res<-32768)
737 res = -32768;
738#ifdef FIXED_DEBUG_ASSERT
739 celt_assert(0);
740#endif
741 }
742 celt_mips+=35;
743 return res;
744}
745
746#define DIV32(a, b) DIV32_(a, b, __FILE__, __LINE__)
747static OPUS_INLINE int DIV32_(opus_int64 a, opus_int64 b, char *file, int line)
748{
749 opus_int64 res;
750 if (b==0)
751 {
752 fprintf(stderr, "DIV32: divide by zero: %d/%d in %s: line %d\n", (int)a, (int)b, file, line);
753#ifdef FIXED_DEBUG_ASSERT
754 celt_assert(0);
755#endif
756 return 0;
757 }
758
759 if (!VERIFY_INT(a) || !VERIFY_INT(b))
760 {
761 fprintf (stderr, "DIV32: inputs are not int/short: %d %d in %s: line %d\n", (int)a, (int)b, file, line);
762#ifdef FIXED_DEBUG_ASSERT
763 celt_assert(0);
764#endif
765 }
766 res = a/b;
767 if (!VERIFY_INT(res))
768 {
769 fprintf (stderr, "DIV32: output is not int: %d in %s: line %d\n", (int)res, file, line);
770#ifdef FIXED_DEBUG_ASSERT
771 celt_assert(0);
772#endif
773 }
774 celt_mips+=70;
775 return res;
776}
777
778static OPUS_INLINE opus_val16 SIG2WORD16_generic(celt_sig x)
779{
780 x = PSHR32(x, SIG_SHIFT);
781 x = MAX32(x, -32768);
782 x = MIN32(x, 32767);
783 return EXTRACT16(x);
784}
785#define SIG2WORD16(x) (SIG2WORD16_generic(x))
786
787
788#undef PRINT_MIPS
789#define PRINT_MIPS(file) do {fprintf (file, "total complexity = %llu MIPS\n", celt_mips);} while (0);
790
791#endif
diff --git a/lib/rbcodec/codecs/libopus/celt/fixed_generic.h b/lib/rbcodec/codecs/libopus/celt/fixed_generic.h
index ac67d37ce8..5f4abda76e 100644
--- a/lib/rbcodec/codecs/libopus/celt/fixed_generic.h
+++ b/lib/rbcodec/codecs/libopus/celt/fixed_generic.h
@@ -37,16 +37,32 @@
37#define MULT16_16SU(a,b) ((opus_val32)(opus_val16)(a)*(opus_val32)(opus_uint16)(b)) 37#define MULT16_16SU(a,b) ((opus_val32)(opus_val16)(a)*(opus_val32)(opus_uint16)(b))
38 38
39/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */ 39/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */
40#if OPUS_FAST_INT64
41#define MULT16_32_Q16(a,b) ((opus_val32)SHR((opus_int64)((opus_val16)(a))*(b),16))
42#else
40#define MULT16_32_Q16(a,b) ADD32(MULT16_16((a),SHR((b),16)), SHR(MULT16_16SU((a),((b)&0x0000ffff)),16)) 43#define MULT16_32_Q16(a,b) ADD32(MULT16_16((a),SHR((b),16)), SHR(MULT16_16SU((a),((b)&0x0000ffff)),16))
44#endif
41 45
42/** 16x32 multiplication, followed by a 16-bit shift right (round-to-nearest). Results fits in 32 bits */ 46/** 16x32 multiplication, followed by a 16-bit shift right (round-to-nearest). Results fits in 32 bits */
47#if OPUS_FAST_INT64
48#define MULT16_32_P16(a,b) ((opus_val32)PSHR((opus_int64)((opus_val16)(a))*(b),16))
49#else
43#define MULT16_32_P16(a,b) ADD32(MULT16_16((a),SHR((b),16)), PSHR(MULT16_16SU((a),((b)&0x0000ffff)),16)) 50#define MULT16_32_P16(a,b) ADD32(MULT16_16((a),SHR((b),16)), PSHR(MULT16_16SU((a),((b)&0x0000ffff)),16))
51#endif
44 52
45/** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */ 53/** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */
54#if OPUS_FAST_INT64
55#define MULT16_32_Q15(a,b) ((opus_val32)SHR((opus_int64)((opus_val16)(a))*(b),15))
56#else
46#define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15)) 57#define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15))
58#endif
47 59
48/** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */ 60/** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */
61#if OPUS_FAST_INT64
62#define MULT32_32_Q31(a,b) ((opus_val32)SHR((opus_int64)(a)*(opus_int64)(b),31))
63#else
49#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15)) 64#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15))
65#endif
50 66
51/** Compile-time conversion of float constant to 16-bit value */ 67/** Compile-time conversion of float constant to 16-bit value */
52#define QCONST16(x,bits) ((opus_val16)(.5+(x)*(((opus_val32)1)<<(bits)))) 68#define QCONST16(x,bits) ((opus_val16)(.5+(x)*(((opus_val32)1)<<(bits))))
@@ -88,6 +104,9 @@
88 104
89/** Shift by a and round-to-neareast 32-bit value. Result is a 16-bit value */ 105/** Shift by a and round-to-neareast 32-bit value. Result is a 16-bit value */
90#define ROUND16(x,a) (EXTRACT16(PSHR32((x),(a)))) 106#define ROUND16(x,a) (EXTRACT16(PSHR32((x),(a))))
107/** Shift by a and round-to-neareast 32-bit value. Result is a saturated 16-bit value */
108#define SROUND16(x,a) EXTRACT16(SATURATE(PSHR32(x,a), 32767));
109
91/** Divide by two */ 110/** Divide by two */
92#define HALF16(x) (SHR16(x,1)) 111#define HALF16(x) (SHR16(x,1))
93#define HALF32(x) (SHR32(x,1)) 112#define HALF32(x) (SHR32(x,1))
@@ -101,6 +120,14 @@
101/** Subtract two 32-bit values */ 120/** Subtract two 32-bit values */
102#define SUB32(a,b) ((opus_val32)(a)-(opus_val32)(b)) 121#define SUB32(a,b) ((opus_val32)(a)-(opus_val32)(b))
103 122
123/** Add two 32-bit values, ignore any overflows */
124#define ADD32_ovflw(a,b) ((opus_val32)((opus_uint32)(a)+(opus_uint32)(b)))
125/** Subtract two 32-bit values, ignore any overflows */
126#define SUB32_ovflw(a,b) ((opus_val32)((opus_uint32)(a)-(opus_uint32)(b)))
127/* Avoid MSVC warning C4146: unary minus operator applied to unsigned type */
128/** Negate 32-bit value, ignore any overflows */
129#define NEG32_ovflw(a) ((opus_val32)(0-(opus_uint32)(a)))
130
104/** 16x16 multiplication where the result fits in 16 bits */ 131/** 16x16 multiplication where the result fits in 16 bits */
105#define MULT16_16_16(a,b) ((((opus_val16)(a))*((opus_val16)(b)))) 132#define MULT16_16_16(a,b) ((((opus_val16)(a))*((opus_val16)(b))))
106 133
diff --git a/lib/rbcodec/codecs/libopus/celt/float_cast.h b/lib/rbcodec/codecs/libopus/celt/float_cast.h
index ede6574860..889dae965f 100644
--- a/lib/rbcodec/codecs/libopus/celt/float_cast.h
+++ b/lib/rbcodec/codecs/libopus/celt/float_cast.h
@@ -61,7 +61,13 @@
61** the config.h file. 61** the config.h file.
62*/ 62*/
63 63
64#if (HAVE_LRINTF) 64/* With GCC, when SSE is available, the fastest conversion is cvtss2si. */
65#if defined(__GNUC__) && defined(__SSE__)
66
67#include <xmmintrin.h>
68static OPUS_INLINE opus_int32 float2int(float x) {return _mm_cvt_ss2si(_mm_set_ss(x));}
69
70#elif defined(HAVE_LRINTF)
65 71
66/* These defines enable functionality introduced with the 1999 ISO C 72/* These defines enable functionality introduced with the 1999 ISO C
67** standard. They must be defined before the inclusion of math.h to 73** standard. They must be defined before the inclusion of math.h to
@@ -90,21 +96,21 @@
90#include <math.h> 96#include <math.h>
91#define float2int(x) lrint(x) 97#define float2int(x) lrint(x)
92 98
93#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && (defined (WIN64) || defined (_WIN64)) 99#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && (defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1))
94 #include <xmmintrin.h> 100 #include <xmmintrin.h>
95 101
96 __inline long int float2int(float value) 102 static __inline long int float2int(float value)
97 { 103 {
98 return _mm_cvtss_si32(_mm_load_ss(&value)); 104 return _mm_cvtss_si32(_mm_load_ss(&value));
99 } 105 }
100#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && (defined (WIN32) || defined (_WIN32)) 106#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && defined (_M_IX86)
101 #include <math.h> 107 #include <math.h>
102 108
103 /* Win32 doesn't seem to have these functions. 109 /* Win32 doesn't seem to have these functions.
104 ** Therefore implement OPUS_INLINE versions of these functions here. 110 ** Therefore implement OPUS_INLINE versions of these functions here.
105 */ 111 */
106 112
107 __inline long int 113 static __inline long int
108 float2int (float flt) 114 float2int (float flt)
109 { int intgr; 115 { int intgr;
110 116
diff --git a/lib/rbcodec/codecs/libopus/celt/kiss_fft.c b/lib/rbcodec/codecs/libopus/celt/kiss_fft.c
index 833ef5a71f..83775165d8 100644
--- a/lib/rbcodec/codecs/libopus/celt/kiss_fft.c
+++ b/lib/rbcodec/codecs/libopus/celt/kiss_fft.c
@@ -82,8 +82,8 @@ static void kf_bfly2(
82 C_SUB( Fout2[0] , Fout[0] , t ); 82 C_SUB( Fout2[0] , Fout[0] , t );
83 C_ADDTO( Fout[0] , t ); 83 C_ADDTO( Fout[0] , t );
84 84
85 t.r = S_MUL(Fout2[1].r+Fout2[1].i, tw); 85 t.r = S_MUL(ADD32_ovflw(Fout2[1].r, Fout2[1].i), tw);
86 t.i = S_MUL(Fout2[1].i-Fout2[1].r, tw); 86 t.i = S_MUL(SUB32_ovflw(Fout2[1].i, Fout2[1].r), tw);
87 C_SUB( Fout2[1] , Fout[1] , t ); 87 C_SUB( Fout2[1] , Fout[1] , t );
88 C_ADDTO( Fout[1] , t ); 88 C_ADDTO( Fout[1] , t );
89 89
@@ -92,8 +92,8 @@ static void kf_bfly2(
92 C_SUB( Fout2[2] , Fout[2] , t ); 92 C_SUB( Fout2[2] , Fout[2] , t );
93 C_ADDTO( Fout[2] , t ); 93 C_ADDTO( Fout[2] , t );
94 94
95 t.r = S_MUL(Fout2[3].i-Fout2[3].r, tw); 95 t.r = S_MUL(SUB32_ovflw(Fout2[3].i, Fout2[3].r), tw);
96 t.i = S_MUL(-Fout2[3].i-Fout2[3].r, tw); 96 t.i = S_MUL(NEG32_ovflw(ADD32_ovflw(Fout2[3].i, Fout2[3].r)), tw);
97 C_SUB( Fout2[3] , Fout[3] , t ); 97 C_SUB( Fout2[3] , Fout[3] , t );
98 C_ADDTO( Fout[3] , t ); 98 C_ADDTO( Fout[3] , t );
99 Fout += 8; 99 Fout += 8;
@@ -126,10 +126,10 @@ static void kf_bfly4(
126 C_ADDTO( *Fout , scratch1 ); 126 C_ADDTO( *Fout , scratch1 );
127 C_SUB( scratch1 , Fout[1] , Fout[3] ); 127 C_SUB( scratch1 , Fout[1] , Fout[3] );
128 128
129 Fout[1].r = scratch0.r + scratch1.i; 129 Fout[1].r = ADD32_ovflw(scratch0.r, scratch1.i);
130 Fout[1].i = scratch0.i - scratch1.r; 130 Fout[1].i = SUB32_ovflw(scratch0.i, scratch1.r);
131 Fout[3].r = scratch0.r - scratch1.i; 131 Fout[3].r = SUB32_ovflw(scratch0.r, scratch1.i);
132 Fout[3].i = scratch0.i + scratch1.r; 132 Fout[3].i = ADD32_ovflw(scratch0.i, scratch1.r);
133 Fout+=4; 133 Fout+=4;
134 } 134 }
135 } else { 135 } else {
@@ -160,10 +160,10 @@ static void kf_bfly4(
160 tw3 += fstride*3; 160 tw3 += fstride*3;
161 C_ADDTO( *Fout , scratch[3] ); 161 C_ADDTO( *Fout , scratch[3] );
162 162
163 Fout[m].r = scratch[5].r + scratch[4].i; 163 Fout[m].r = ADD32_ovflw(scratch[5].r, scratch[4].i);
164 Fout[m].i = scratch[5].i - scratch[4].r; 164 Fout[m].i = SUB32_ovflw(scratch[5].i, scratch[4].r);
165 Fout[m3].r = scratch[5].r - scratch[4].i; 165 Fout[m3].r = SUB32_ovflw(scratch[5].r, scratch[4].i);
166 Fout[m3].i = scratch[5].i + scratch[4].r; 166 Fout[m3].i = ADD32_ovflw(scratch[5].i, scratch[4].r);
167 ++Fout; 167 ++Fout;
168 } 168 }
169 } 169 }
@@ -191,7 +191,7 @@ static void kf_bfly3(
191 191
192 kiss_fft_cpx * Fout_beg = Fout; 192 kiss_fft_cpx * Fout_beg = Fout;
193#ifdef FIXED_POINT 193#ifdef FIXED_POINT
194 epi3.r = -16384; 194 /*epi3.r = -16384;*/ /* Unused */
195 epi3.i = -28378; 195 epi3.i = -28378;
196#else 196#else
197 epi3 = st->twiddles[fstride*m]; 197 epi3 = st->twiddles[fstride*m];
@@ -212,18 +212,18 @@ static void kf_bfly3(
212 tw1 += fstride; 212 tw1 += fstride;
213 tw2 += fstride*2; 213 tw2 += fstride*2;
214 214
215 Fout[m].r = Fout->r - HALF_OF(scratch[3].r); 215 Fout[m].r = SUB32_ovflw(Fout->r, HALF_OF(scratch[3].r));
216 Fout[m].i = Fout->i - HALF_OF(scratch[3].i); 216 Fout[m].i = SUB32_ovflw(Fout->i, HALF_OF(scratch[3].i));
217 217
218 C_MULBYSCALAR( scratch[0] , epi3.i ); 218 C_MULBYSCALAR( scratch[0] , epi3.i );
219 219
220 C_ADDTO(*Fout,scratch[3]); 220 C_ADDTO(*Fout,scratch[3]);
221 221
222 Fout[m2].r = Fout[m].r + scratch[0].i; 222 Fout[m2].r = ADD32_ovflw(Fout[m].r, scratch[0].i);
223 Fout[m2].i = Fout[m].i - scratch[0].r; 223 Fout[m2].i = SUB32_ovflw(Fout[m].i, scratch[0].r);
224 224
225 Fout[m].r -= scratch[0].i; 225 Fout[m].r = SUB32_ovflw(Fout[m].r, scratch[0].i);
226 Fout[m].i += scratch[0].r; 226 Fout[m].i = ADD32_ovflw(Fout[m].i, scratch[0].r);
227 227
228 ++Fout; 228 ++Fout;
229 } while(--k); 229 } while(--k);
@@ -282,22 +282,22 @@ static void kf_bfly5(
282 C_ADD( scratch[8],scratch[2],scratch[3]); 282 C_ADD( scratch[8],scratch[2],scratch[3]);
283 C_SUB( scratch[9],scratch[2],scratch[3]); 283 C_SUB( scratch[9],scratch[2],scratch[3]);
284 284
285 Fout0->r += scratch[7].r + scratch[8].r; 285 Fout0->r = ADD32_ovflw(Fout0->r, ADD32_ovflw(scratch[7].r, scratch[8].r));
286 Fout0->i += scratch[7].i + scratch[8].i; 286 Fout0->i = ADD32_ovflw(Fout0->i, ADD32_ovflw(scratch[7].i, scratch[8].i));
287 287
288 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r); 288 scratch[5].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,ya.r), S_MUL(scratch[8].r,yb.r)));
289 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r); 289 scratch[5].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,ya.r), S_MUL(scratch[8].i,yb.r)));
290 290
291 scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i); 291 scratch[6].r = ADD32_ovflw(S_MUL(scratch[10].i,ya.i), S_MUL(scratch[9].i,yb.i));
292 scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i); 292 scratch[6].i = NEG32_ovflw(ADD32_ovflw(S_MUL(scratch[10].r,ya.i), S_MUL(scratch[9].r,yb.i)));
293 293
294 C_SUB(*Fout1,scratch[5],scratch[6]); 294 C_SUB(*Fout1,scratch[5],scratch[6]);
295 C_ADD(*Fout4,scratch[5],scratch[6]); 295 C_ADD(*Fout4,scratch[5],scratch[6]);
296 296
297 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r); 297 scratch[11].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,yb.r), S_MUL(scratch[8].r,ya.r)));
298 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r); 298 scratch[11].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,yb.r), S_MUL(scratch[8].i,ya.r)));
299 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i); 299 scratch[12].r = SUB32_ovflw(S_MUL(scratch[9].i,ya.i), S_MUL(scratch[10].i,yb.i));
300 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i); 300 scratch[12].i = SUB32_ovflw(S_MUL(scratch[10].r,yb.i), S_MUL(scratch[9].r,ya.i));
301 301
302 C_ADD(*Fout2,scratch[11],scratch[12]); 302 C_ADD(*Fout2,scratch[11],scratch[12]);
303 C_SUB(*Fout3,scratch[11],scratch[12]); 303 C_SUB(*Fout3,scratch[11],scratch[12]);
@@ -423,13 +423,19 @@ static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft)
423#endif 423#endif
424} 424}
425 425
426int opus_fft_alloc_arch_c(kiss_fft_state *st) {
427 (void)st;
428 return 0;
429}
430
426/* 431/*
427 * 432 *
428 * Allocates all necessary storage space for the fft and ifft. 433 * Allocates all necessary storage space for the fft and ifft.
429 * The return value is a contiguous block of memory. As such, 434 * The return value is a contiguous block of memory. As such,
430 * It can be freed with free(). 435 * It can be freed with free().
431 * */ 436 * */
432kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base) 437kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem,
438 const kiss_fft_state *base, int arch)
433{ 439{
434 kiss_fft_state *st=NULL; 440 kiss_fft_state *st=NULL;
435 size_t memneeded = sizeof(struct kiss_fft_state); /* twiddle factors*/ 441 size_t memneeded = sizeof(struct kiss_fft_state); /* twiddle factors*/
@@ -478,22 +484,31 @@ kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, co
478 if (st->bitrev==NULL) 484 if (st->bitrev==NULL)
479 goto fail; 485 goto fail;
480 compute_bitrev_table(0, bitrev, 1,1, st->factors,st); 486 compute_bitrev_table(0, bitrev, 1,1, st->factors,st);
487
488 /* Initialize architecture specific fft parameters */
489 if (opus_fft_alloc_arch(st, arch))
490 goto fail;
481 } 491 }
482 return st; 492 return st;
483fail: 493fail:
484 opus_fft_free(st); 494 opus_fft_free(st, arch);
485 return NULL; 495 return NULL;
486} 496}
487 497
488kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem ) 498kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch)
489{ 499{
490 return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL); 500 return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL, arch);
491} 501}
492 502
493void opus_fft_free(const kiss_fft_state *cfg) 503void opus_fft_free_arch_c(kiss_fft_state *st) {
504 (void)st;
505}
506
507void opus_fft_free(const kiss_fft_state *cfg, int arch)
494{ 508{
495 if (cfg) 509 if (cfg)
496 { 510 {
511 opus_fft_free_arch((kiss_fft_state *)cfg, arch);
497 opus_free((opus_int16*)cfg->bitrev); 512 opus_free((opus_int16*)cfg->bitrev);
498 if (cfg->shift < 0) 513 if (cfg->shift < 0)
499 opus_free((kiss_twiddle_cpx*)cfg->twiddles); 514 opus_free((kiss_twiddle_cpx*)cfg->twiddles);
@@ -551,8 +566,7 @@ void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout)
551 } 566 }
552} 567}
553 568
554#if 0 569void opus_fft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
555void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
556{ 570{
557 int i; 571 int i;
558 opus_val16 scale; 572 opus_val16 scale;
@@ -573,11 +587,9 @@ void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou
573 } 587 }
574 opus_fft_impl(st, fout); 588 opus_fft_impl(st, fout);
575} 589}
576#endif
577 590
578 591
579#ifdef TEST_UNIT_DFT_C 592void opus_ifft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
580void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
581{ 593{
582 int i; 594 int i;
583 celt_assert2 (fin != fout, "In-place FFT not supported"); 595 celt_assert2 (fin != fout, "In-place FFT not supported");
@@ -590,4 +602,3 @@ void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fo
590 for (i=0;i<st->nfft;i++) 602 for (i=0;i<st->nfft;i++)
591 fout[i].i = -fout[i].i; 603 fout[i].i = -fout[i].i;
592} 604}
593#endif
diff --git a/lib/rbcodec/codecs/libopus/celt/kiss_fft.h b/lib/rbcodec/codecs/libopus/celt/kiss_fft.h
index 390b54d948..bffa2bfad6 100644
--- a/lib/rbcodec/codecs/libopus/celt/kiss_fft.h
+++ b/lib/rbcodec/codecs/libopus/celt/kiss_fft.h
@@ -32,6 +32,7 @@
32#include <stdlib.h> 32#include <stdlib.h>
33#include <math.h> 33#include <math.h>
34#include "arch.h" 34#include "arch.h"
35#include "cpu_support.h"
35 36
36#ifdef __cplusplus 37#ifdef __cplusplus
37extern "C" { 38extern "C" {
@@ -77,6 +78,11 @@ typedef struct {
77 4*4*4*2 78 4*4*4*2
78 */ 79 */
79 80
81typedef struct arch_fft_state{
82 int is_supported;
83 void *priv;
84} arch_fft_state;
85
80typedef struct kiss_fft_state{ 86typedef struct kiss_fft_state{
81 int nfft; 87 int nfft;
82 opus_val16 scale; 88 opus_val16 scale;
@@ -87,8 +93,13 @@ typedef struct kiss_fft_state{
87 opus_int16 factors[2*MAXFACTORS]; 93 opus_int16 factors[2*MAXFACTORS];
88 const opus_int16 *bitrev; 94 const opus_int16 *bitrev;
89 const kiss_twiddle_cpx *twiddles; 95 const kiss_twiddle_cpx *twiddles;
96 arch_fft_state *arch_fft;
90} kiss_fft_state; 97} kiss_fft_state;
91 98
99#if defined(HAVE_ARM_NE10)
100#include "arm/fft_arm.h"
101#endif
102
92/*typedef struct kiss_fft_state* kiss_fft_cfg;*/ 103/*typedef struct kiss_fft_state* kiss_fft_cfg;*/
93 104
94/** 105/**
@@ -114,9 +125,9 @@ typedef struct kiss_fft_state{
114 * buffer size in *lenmem. 125 * buffer size in *lenmem.
115 * */ 126 * */
116 127
117kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base); 128kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base, int arch);
118 129
119kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem); 130kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch);
120 131
121/** 132/**
122 * opus_fft(cfg,in_out_buf) 133 * opus_fft(cfg,in_out_buf)
@@ -128,13 +139,59 @@ kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem);
128 * Note that each element is complex and can be accessed like 139 * Note that each element is complex and can be accessed like
129 f[k].r and f[k].i 140 f[k].r and f[k].i
130 * */ 141 * */
131void opus_fft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); 142void opus_fft_c(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
132void opus_ifft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); 143void opus_ifft_c(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
133 144
134void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout); 145void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout);
135void opus_ifft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout); 146void opus_ifft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout);
136 147
137void opus_fft_free(const kiss_fft_state *cfg); 148void opus_fft_free(const kiss_fft_state *cfg, int arch);
149
150
151void opus_fft_free_arch_c(kiss_fft_state *st);
152int opus_fft_alloc_arch_c(kiss_fft_state *st);
153
154#if !defined(OVERRIDE_OPUS_FFT)
155/* Is run-time CPU detection enabled on this platform? */
156#if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10))
157
158extern int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])(
159 kiss_fft_state *st);
160
161#define opus_fft_alloc_arch(_st, arch) \
162 ((*OPUS_FFT_ALLOC_ARCH_IMPL[(arch)&OPUS_ARCHMASK])(_st))
163
164extern void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])(
165 kiss_fft_state *st);
166#define opus_fft_free_arch(_st, arch) \
167 ((*OPUS_FFT_FREE_ARCH_IMPL[(arch)&OPUS_ARCHMASK])(_st))
168
169extern void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
170 const kiss_fft_cpx *fin, kiss_fft_cpx *fout);
171#define opus_fft(_cfg, _fin, _fout, arch) \
172 ((*OPUS_FFT[(arch)&OPUS_ARCHMASK])(_cfg, _fin, _fout))
173
174extern void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
175 const kiss_fft_cpx *fin, kiss_fft_cpx *fout);
176#define opus_ifft(_cfg, _fin, _fout, arch) \
177 ((*OPUS_IFFT[(arch)&OPUS_ARCHMASK])(_cfg, _fin, _fout))
178
179#else /* else for if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */
180
181#define opus_fft_alloc_arch(_st, arch) \
182 ((void)(arch), opus_fft_alloc_arch_c(_st))
183
184#define opus_fft_free_arch(_st, arch) \
185 ((void)(arch), opus_fft_free_arch_c(_st))
186
187#define opus_fft(_cfg, _fin, _fout, arch) \
188 ((void)(arch), opus_fft_c(_cfg, _fin, _fout))
189
190#define opus_ifft(_cfg, _fin, _fout, arch) \
191 ((void)(arch), opus_ifft_c(_cfg, _fin, _fout))
192
193#endif /* end if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */
194#endif /* end if !defined(OVERRIDE_OPUS_FFT) */
138 195
139#ifdef __cplusplus 196#ifdef __cplusplus
140} 197}
diff --git a/lib/rbcodec/codecs/libopus/celt/mathops.c b/lib/rbcodec/codecs/libopus/celt/mathops.c
index 3f8c5dcc0e..6ee9b9e101 100644
--- a/lib/rbcodec/codecs/libopus/celt/mathops.c
+++ b/lib/rbcodec/codecs/libopus/celt/mathops.c
@@ -38,7 +38,8 @@
38#include "mathops.h" 38#include "mathops.h"
39 39
40/*Compute floor(sqrt(_val)) with exact arithmetic. 40/*Compute floor(sqrt(_val)) with exact arithmetic.
41 This has been tested on all possible 32-bit inputs.*/ 41 _val must be greater than 0.
42 This has been tested on all possible 32-bit inputs greater than 0.*/
42unsigned isqrt32(opus_uint32 _val){ 43unsigned isqrt32(opus_uint32 _val){
43 unsigned b; 44 unsigned b;
44 unsigned g; 45 unsigned g;
@@ -164,7 +165,7 @@ opus_val16 celt_cos_norm(opus_val32 x)
164 { 165 {
165 return _celt_cos_pi_2(EXTRACT16(x)); 166 return _celt_cos_pi_2(EXTRACT16(x));
166 } else { 167 } else {
167 return NEG32(_celt_cos_pi_2(EXTRACT16(65536-x))); 168 return NEG16(_celt_cos_pi_2(EXTRACT16(65536-x)));
168 } 169 }
169 } else { 170 } else {
170 if (x&0x0000ffff) 171 if (x&0x0000ffff)
@@ -182,7 +183,7 @@ opus_val32 celt_rcp(opus_val32 x)
182 int i; 183 int i;
183 opus_val16 n; 184 opus_val16 n;
184 opus_val16 r; 185 opus_val16 r;
185 celt_assert2(x>0, "celt_rcp() only defined for positive values"); 186 celt_sig_assert(x>0);
186 i = celt_ilog2(x); 187 i = celt_ilog2(x);
187 /* n is Q15 with range [0,1). */ 188 /* n is Q15 with range [0,1). */
188 n = VSHR32(x,i-15)-32768; 189 n = VSHR32(x,i-15)-32768;
diff --git a/lib/rbcodec/codecs/libopus/celt/mathops.h b/lib/rbcodec/codecs/libopus/celt/mathops.h
index a0525a9610..f3e5246a39 100644
--- a/lib/rbcodec/codecs/libopus/celt/mathops.h
+++ b/lib/rbcodec/codecs/libopus/celt/mathops.h
@@ -38,11 +38,48 @@
38#include "entcode.h" 38#include "entcode.h"
39#include "os_support.h" 39#include "os_support.h"
40 40
41#define PI 3.141592653f
42
43#ifndef ABS
44#define ABS(a)(((a) < 0) ? - (a) :(a))
45#endif
46
41/* Multiplies two 16-bit fractional values. Bit-exactness of this macro is important */ 47/* Multiplies two 16-bit fractional values. Bit-exactness of this macro is important */
42#define FRAC_MUL16(a,b) ((16384+((opus_int32)(opus_int16)(a)*(opus_int16)(b)))>>15) 48#define FRAC_MUL16(a,b) ((16384+((opus_int32)(opus_int16)(a)*(opus_int16)(b)))>>15)
43 49
44unsigned isqrt32(opus_uint32 _val); 50unsigned isqrt32(opus_uint32 _val);
45 51
52/* CELT doesn't need it for fixed-point, by analysis.c does. */
53#if !defined(FIXED_POINT) || defined(ANALYSIS_C)
54#define cA 0.43157974f
55#define cB 0.67848403f
56#define cC 0.08595542f
57#define cE ((float)PI/2)
58static OPUS_INLINE float fast_atan2f(float y, float x) {
59 float x2, y2;
60 x2 = x*x;
61 y2 = y*y;
62 /* For very small values, we don't care about the answer, so
63 we can just return 0. */
64 if (x2 + y2 < 1e-18f)
65 {
66 return 0;
67 }
68 if(x2<y2){
69 float den = (y2 + cB*x2) * (y2 + cC*x2);
70 return -x*y*(y2 + cA*x2) / den + (y<0 ? -cE : cE);
71 }else{
72 float den = (x2 + cB*y2) * (x2 + cC*y2);
73 return x*y*(x2 + cA*y2) / den + (y<0 ? -cE : cE) - (x*y<0 ? -cE : cE);
74 }
75}
76#undef cA
77#undef cB
78#undef cC
79#undef cE
80#endif
81
82
46#ifndef OVERRIDE_CELT_MAXABS16 83#ifndef OVERRIDE_CELT_MAXABS16
47static OPUS_INLINE opus_val32 celt_maxabs16(const opus_val16 *x, int len) 84static OPUS_INLINE opus_val32 celt_maxabs16(const opus_val16 *x, int len)
48{ 85{
@@ -80,7 +117,6 @@ static OPUS_INLINE opus_val32 celt_maxabs32(const opus_val32 *x, int len)
80 117
81#ifndef FIXED_POINT 118#ifndef FIXED_POINT
82 119
83#define PI 3.141592653f
84#define celt_sqrt(x) ((float)sqrt(x)) 120#define celt_sqrt(x) ((float)sqrt(x))
85#define celt_rsqrt(x) (1.f/celt_sqrt(x)) 121#define celt_rsqrt(x) (1.f/celt_sqrt(x))
86#define celt_rsqrt_norm(x) (celt_rsqrt(x)) 122#define celt_rsqrt_norm(x) (celt_rsqrt(x))
@@ -147,7 +183,7 @@ static OPUS_INLINE float celt_exp2(float x)
147/** Integer log in base2. Undefined for zero and negative numbers */ 183/** Integer log in base2. Undefined for zero and negative numbers */
148static OPUS_INLINE opus_int16 celt_ilog2(opus_int32 x) 184static OPUS_INLINE opus_int16 celt_ilog2(opus_int32 x)
149{ 185{
150 celt_assert2(x>0, "celt_ilog2() only defined for strictly positive numbers"); 186 celt_sig_assert(x>0);
151 return EC_ILOG(x)-1; 187 return EC_ILOG(x)-1;
152} 188}
153#endif 189#endif
diff --git a/lib/rbcodec/codecs/libopus/celt/mdct.c b/lib/rbcodec/codecs/libopus/celt/mdct.c
index 7fa8eaf6bf..5c6dab5b75 100644
--- a/lib/rbcodec/codecs/libopus/celt/mdct.c
+++ b/lib/rbcodec/codecs/libopus/celt/mdct.c
@@ -60,7 +60,7 @@
60 60
61#ifdef CUSTOM_MODES 61#ifdef CUSTOM_MODES
62 62
63int clt_mdct_init(mdct_lookup *l,int N, int maxshift) 63int clt_mdct_init(mdct_lookup *l,int N, int maxshift, int arch)
64{ 64{
65 int i; 65 int i;
66 kiss_twiddle_scalar *trig; 66 kiss_twiddle_scalar *trig;
@@ -71,9 +71,9 @@ int clt_mdct_init(mdct_lookup *l,int N, int maxshift)
71 for (i=0;i<=maxshift;i++) 71 for (i=0;i<=maxshift;i++)
72 { 72 {
73 if (i==0) 73 if (i==0)
74 l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0); 74 l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0, arch);
75 else 75 else
76 l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0]); 76 l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0], arch);
77#ifndef ENABLE_TI_DSPLIB55 77#ifndef ENABLE_TI_DSPLIB55
78 if (l->kfft[i]==NULL) 78 if (l->kfft[i]==NULL)
79 return 0; 79 return 0;
@@ -104,21 +104,20 @@ int clt_mdct_init(mdct_lookup *l,int N, int maxshift)
104 return 1; 104 return 1;
105} 105}
106 106
107void clt_mdct_clear(mdct_lookup *l) 107void clt_mdct_clear(mdct_lookup *l, int arch)
108{ 108{
109 int i; 109 int i;
110 for (i=0;i<=l->maxshift;i++) 110 for (i=0;i<=l->maxshift;i++)
111 opus_fft_free(l->kfft[i]); 111 opus_fft_free(l->kfft[i], arch);
112 opus_free((kiss_twiddle_scalar*)l->trig); 112 opus_free((kiss_twiddle_scalar*)l->trig);
113} 113}
114 114
115#endif /* CUSTOM_MODES */ 115#endif /* CUSTOM_MODES */
116 116
117#if 0
118/* Forward MDCT trashes the input array */ 117/* Forward MDCT trashes the input array */
119#ifndef OVERRIDE_clt_mdct_forward 118#ifndef OVERRIDE_clt_mdct_forward
120void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, 119void clt_mdct_forward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
121 const opus_val16 *window, int overlap, int shift, int stride) 120 const opus_val16 *window, int overlap, int shift, int stride, int arch)
122{ 121{
123 int i; 122 int i;
124 int N, N2, N4; 123 int N, N2, N4;
@@ -133,6 +132,7 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
133 int scale_shift = st->scale_shift-1; 132 int scale_shift = st->scale_shift-1;
134#endif 133#endif
135 SAVE_STACK; 134 SAVE_STACK;
135 (void)arch;
136 scale = st->scale; 136 scale = st->scale;
137 137
138 N = l->n; 138 N = l->n;
@@ -237,15 +237,15 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
237 RESTORE_STACK; 237 RESTORE_STACK;
238} 238}
239#endif /* OVERRIDE_clt_mdct_forward */ 239#endif /* OVERRIDE_clt_mdct_forward */
240#endif
241 240
242#ifndef OVERRIDE_clt_mdct_backward 241#ifndef OVERRIDE_clt_mdct_backward
243void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, 242void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
244 const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride) 243 const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride, int arch)
245{ 244{
246 int i; 245 int i;
247 int N, N2, N4; 246 int N, N2, N4;
248 const kiss_twiddle_scalar *trig; 247 const kiss_twiddle_scalar *trig;
248 (void) arch;
249 249
250 N = l->n; 250 N = l->n;
251 trig = l->trig; 251 trig = l->trig;
@@ -270,8 +270,8 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala
270 int rev; 270 int rev;
271 kiss_fft_scalar yr, yi; 271 kiss_fft_scalar yr, yi;
272 rev = *bitrev++; 272 rev = *bitrev++;
273 yr = S_MUL(*xp2, t[i]) + S_MUL(*xp1, t[N4+i]); 273 yr = ADD32_ovflw(S_MUL(*xp2, t[i]), S_MUL(*xp1, t[N4+i]));
274 yi = S_MUL(*xp1, t[i]) - S_MUL(*xp2, t[N4+i]); 274 yi = SUB32_ovflw(S_MUL(*xp1, t[i]), S_MUL(*xp2, t[N4+i]));
275 /* We swap real and imag because we use an FFT instead of an IFFT. */ 275 /* We swap real and imag because we use an FFT instead of an IFFT. */
276 yp[2*rev+1] = yr; 276 yp[2*rev+1] = yr;
277 yp[2*rev] = yi; 277 yp[2*rev] = yi;
@@ -301,8 +301,8 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala
301 t0 = t[i]; 301 t0 = t[i];
302 t1 = t[N4+i]; 302 t1 = t[N4+i];
303 /* We'd scale up by 2 here, but instead it's done when mixing the windows */ 303 /* We'd scale up by 2 here, but instead it's done when mixing the windows */
304 yr = S_MUL(re,t0) + S_MUL(im,t1); 304 yr = ADD32_ovflw(S_MUL(re,t0), S_MUL(im,t1));
305 yi = S_MUL(re,t1) - S_MUL(im,t0); 305 yi = SUB32_ovflw(S_MUL(re,t1), S_MUL(im,t0));
306 /* We swap real and imag because we're using an FFT instead of an IFFT. */ 306 /* We swap real and imag because we're using an FFT instead of an IFFT. */
307 re = yp1[1]; 307 re = yp1[1];
308 im = yp1[0]; 308 im = yp1[0];
@@ -312,8 +312,8 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala
312 t0 = t[(N4-i-1)]; 312 t0 = t[(N4-i-1)];
313 t1 = t[(N2-i-1)]; 313 t1 = t[(N2-i-1)];
314 /* We'd scale up by 2 here, but instead it's done when mixing the windows */ 314 /* We'd scale up by 2 here, but instead it's done when mixing the windows */
315 yr = S_MUL(re,t0) + S_MUL(im,t1); 315 yr = ADD32_ovflw(S_MUL(re,t0), S_MUL(im,t1));
316 yi = S_MUL(re,t1) - S_MUL(im,t0); 316 yi = SUB32_ovflw(S_MUL(re,t1), S_MUL(im,t0));
317 yp1[0] = yr; 317 yp1[0] = yr;
318 yp0[1] = yi; 318 yp0[1] = yi;
319 yp0 += 2; 319 yp0 += 2;
@@ -333,8 +333,8 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala
333 kiss_fft_scalar x1, x2; 333 kiss_fft_scalar x1, x2;
334 x1 = *xp1; 334 x1 = *xp1;
335 x2 = *yp1; 335 x2 = *yp1;
336 *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1); 336 *yp1++ = SUB32_ovflw(MULT16_32_Q15(*wp2, x2), MULT16_32_Q15(*wp1, x1));
337 *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1); 337 *xp1-- = ADD32_ovflw(MULT16_32_Q15(*wp1, x2), MULT16_32_Q15(*wp2, x1));
338 wp1++; 338 wp1++;
339 wp2--; 339 wp2--;
340 } 340 }
diff --git a/lib/rbcodec/codecs/libopus/celt/mdct.h b/lib/rbcodec/codecs/libopus/celt/mdct.h
index d72182138a..160ae4e0f3 100644
--- a/lib/rbcodec/codecs/libopus/celt/mdct.h
+++ b/lib/rbcodec/codecs/libopus/celt/mdct.h
@@ -53,18 +53,60 @@ typedef struct {
53 const kiss_twiddle_scalar * OPUS_RESTRICT trig; 53 const kiss_twiddle_scalar * OPUS_RESTRICT trig;
54} mdct_lookup; 54} mdct_lookup;
55 55
56int clt_mdct_init(mdct_lookup *l,int N, int maxshift); 56#if defined(HAVE_ARM_NE10)
57void clt_mdct_clear(mdct_lookup *l); 57#include "arm/mdct_arm.h"
58#endif
59
60
61int clt_mdct_init(mdct_lookup *l,int N, int maxshift, int arch);
62void clt_mdct_clear(mdct_lookup *l, int arch);
58 63
59/** Compute a forward MDCT and scale by 4/N, trashes the input array */ 64/** Compute a forward MDCT and scale by 4/N, trashes the input array */
60void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, 65void clt_mdct_forward_c(const mdct_lookup *l, kiss_fft_scalar *in,
61 kiss_fft_scalar * OPUS_RESTRICT out, 66 kiss_fft_scalar * OPUS_RESTRICT out,
62 const opus_val16 *window, int overlap, int shift, int stride); 67 const opus_val16 *window, int overlap,
68 int shift, int stride, int arch);
63 69
64/** Compute a backward MDCT (no scaling) and performs weighted overlap-add 70/** Compute a backward MDCT (no scaling) and performs weighted overlap-add
65 (scales implicitly by 1/2) */ 71 (scales implicitly by 1/2) */
66void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, 72void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in,
67 kiss_fft_scalar * OPUS_RESTRICT out, 73 kiss_fft_scalar * OPUS_RESTRICT out,
68 const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride); 74 const opus_val16 * OPUS_RESTRICT window,
75 int overlap, int shift, int stride, int arch);
76
77#if !defined(OVERRIDE_OPUS_MDCT)
78/* Is run-time CPU detection enabled on this platform? */
79#if defined(OPUS_HAVE_RTCD) && defined(HAVE_ARM_NE10)
80
81extern void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(
82 const mdct_lookup *l, kiss_fft_scalar *in,
83 kiss_fft_scalar * OPUS_RESTRICT out, const opus_val16 *window,
84 int overlap, int shift, int stride, int arch);
85
86#define clt_mdct_forward(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) \
87 ((*CLT_MDCT_FORWARD_IMPL[(arch)&OPUS_ARCHMASK])(_l, _in, _out, \
88 _window, _overlap, _shift, \
89 _stride, _arch))
90
91extern void (*const CLT_MDCT_BACKWARD_IMPL[OPUS_ARCHMASK+1])(
92 const mdct_lookup *l, kiss_fft_scalar *in,
93 kiss_fft_scalar * OPUS_RESTRICT out, const opus_val16 *window,
94 int overlap, int shift, int stride, int arch);
95
96#define clt_mdct_backward(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) \
97 (*CLT_MDCT_BACKWARD_IMPL[(arch)&OPUS_ARCHMASK])(_l, _in, _out, \
98 _window, _overlap, _shift, \
99 _stride, _arch)
100
101#else /* if defined(OPUS_HAVE_RTCD) && defined(HAVE_ARM_NE10) */
102
103#define clt_mdct_forward(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) \
104 clt_mdct_forward_c(_l, _in, _out, _window, _overlap, _shift, _stride, _arch)
105
106#define clt_mdct_backward(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) \
107 clt_mdct_backward_c(_l, _in, _out, _window, _overlap, _shift, _stride, _arch)
108
109#endif /* end if defined(OPUS_HAVE_RTCD) && defined(HAVE_ARM_NE10) && !defined(FIXED_POINT) */
110#endif /* end if !defined(OVERRIDE_OPUS_MDCT) */
69 111
70#endif 112#endif
diff --git a/lib/rbcodec/codecs/libopus/celt/mips/celt_mipsr1.h b/lib/rbcodec/codecs/libopus/celt/mips/celt_mipsr1.h
new file mode 100644
index 0000000000..e85661a661
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/mips/celt_mipsr1.h
@@ -0,0 +1,151 @@
1/* Copyright (c) 2007-2008 CSIRO
2 Copyright (c) 2007-2010 Xiph.Org Foundation
3 Copyright (c) 2008 Gregory Maxwell
4 Written by Jean-Marc Valin and Gregory Maxwell */
5/*
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions
8 are met:
9
10 - Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12
13 - Redistributions in binary form must reproduce the above copyright
14 notice, this list of conditions and the following disclaimer in the
15 documentation and/or other materials provided with the distribution.
16
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
21 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28*/
29
30#ifndef __CELT_MIPSR1_H__
31#define __CELT_MIPSR1_H__
32
33#ifdef HAVE_CONFIG_H
34#include "config.h"
35#endif
36
37#define CELT_C
38
39#include "os_support.h"
40#include "mdct.h"
41#include <math.h>
42#include "celt.h"
43#include "pitch.h"
44#include "bands.h"
45#include "modes.h"
46#include "entcode.h"
47#include "quant_bands.h"
48#include "rate.h"
49#include "stack_alloc.h"
50#include "mathops.h"
51#include "float_cast.h"
52#include <stdarg.h>
53#include "celt_lpc.h"
54#include "vq.h"
55
56#define OVERRIDE_comb_filter
57void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
58 opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
59 const opus_val16 *window, int overlap, int arch)
60{
61 int i;
62 opus_val32 x0, x1, x2, x3, x4;
63
64 (void)arch;
65
66 /* printf ("%d %d %f %f\n", T0, T1, g0, g1); */
67 opus_val16 g00, g01, g02, g10, g11, g12;
68 static const opus_val16 gains[3][3] = {
69 {QCONST16(0.3066406250f, 15), QCONST16(0.2170410156f, 15), QCONST16(0.1296386719f, 15)},
70 {QCONST16(0.4638671875f, 15), QCONST16(0.2680664062f, 15), QCONST16(0.f, 15)},
71 {QCONST16(0.7998046875f, 15), QCONST16(0.1000976562f, 15), QCONST16(0.f, 15)}};
72
73 if (g0==0 && g1==0)
74 {
75 /* OPT: Happens to work without the OPUS_MOVE(), but only because the current encoder already copies x to y */
76 if (x!=y)
77 OPUS_MOVE(y, x, N);
78 return;
79 }
80
81 g00 = MULT16_16_P15(g0, gains[tapset0][0]);
82 g01 = MULT16_16_P15(g0, gains[tapset0][1]);
83 g02 = MULT16_16_P15(g0, gains[tapset0][2]);
84 g10 = MULT16_16_P15(g1, gains[tapset1][0]);
85 g11 = MULT16_16_P15(g1, gains[tapset1][1]);
86 g12 = MULT16_16_P15(g1, gains[tapset1][2]);
87 x1 = x[-T1+1];
88 x2 = x[-T1 ];
89 x3 = x[-T1-1];
90 x4 = x[-T1-2];
91 /* If the filter didn't change, we don't need the overlap */
92 if (g0==g1 && T0==T1 && tapset0==tapset1)
93 overlap=0;
94
95 for (i=0;i<overlap;i++)
96 {
97 opus_val16 f;
98 opus_val32 res;
99 f = MULT16_16_Q15(window[i],window[i]);
100 x0= x[i-T1+2];
101
102 asm volatile("MULT $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15((Q15ONE-f),g00)), "r" ((int)x[i-T0]));
103
104 asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15((Q15ONE-f),g01)), "r" ((int)ADD32(x[i-T0-1],x[i-T0+1])));
105 asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15((Q15ONE-f),g02)), "r" ((int)ADD32(x[i-T0-2],x[i-T0+2])));
106 asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15(f,g10)), "r" ((int)x2));
107 asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15(f,g11)), "r" ((int)ADD32(x3,x1)));
108 asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15(f,g12)), "r" ((int)ADD32(x4,x0)));
109
110 asm volatile("EXTR.W %0,$ac1, %1" : "=r" (res): "i" (15));
111
112 y[i] = x[i] + res;
113
114 x4=x3;
115 x3=x2;
116 x2=x1;
117 x1=x0;
118 }
119
120 x4 = x[i-T1-2];
121 x3 = x[i-T1-1];
122 x2 = x[i-T1];
123 x1 = x[i-T1+1];
124
125 if (g1==0)
126 {
127 /* OPT: Happens to work without the OPUS_MOVE(), but only because the current encoder already copies x to y */
128 if (x!=y)
129 OPUS_MOVE(y+overlap, x+overlap, N-overlap);
130 return;
131 }
132
133 for (i=overlap;i<N;i++)
134 {
135 opus_val32 res;
136 x0=x[i-T1+2];
137
138 asm volatile("MULT $ac1, %0, %1" : : "r" ((int)g10), "r" ((int)x2));
139
140 asm volatile("MADD $ac1, %0, %1" : : "r" ((int)g11), "r" ((int)ADD32(x3,x1)));
141 asm volatile("MADD $ac1, %0, %1" : : "r" ((int)g12), "r" ((int)ADD32(x4,x0)));
142 asm volatile("EXTR.W %0,$ac1, %1" : "=r" (res): "i" (15));
143 y[i] = x[i] + res;
144 x4=x3;
145 x3=x2;
146 x2=x1;
147 x1=x0;
148 }
149}
150
151#endif /* __CELT_MIPSR1_H__ */
diff --git a/lib/rbcodec/codecs/libopus/celt/mips/fixed_generic_mipsr1.h b/lib/rbcodec/codecs/libopus/celt/mips/fixed_generic_mipsr1.h
new file mode 100644
index 0000000000..4a05efbf85
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/mips/fixed_generic_mipsr1.h
@@ -0,0 +1,126 @@
1/* Copyright (C) 2007-2009 Xiph.Org Foundation
2 Copyright (C) 2003-2008 Jean-Marc Valin
3 Copyright (C) 2007-2008 CSIRO */
4/**
5 @file fixed_generic.h
6 @brief Generic fixed-point operations
7*/
8/*
9 Redistribution and use in source and binary forms, with or without
10 modification, are permitted provided that the following conditions
11 are met:
12
13 - Redistributions of source code must retain the above copyright
14 notice, this list of conditions and the following disclaimer.
15
16 - Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
24 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
25 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31*/
32
33#ifndef CELT_FIXED_GENERIC_MIPSR1_H
34#define CELT_FIXED_GENERIC_MIPSR1_H
35
36#undef MULT16_32_Q15_ADD
37static inline int MULT16_32_Q15_ADD(int a, int b, int c, int d) {
38 int m;
39 asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));
40 asm volatile("madd $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));
41 asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));
42 return m;
43}
44
45#undef MULT16_32_Q15_SUB
46static inline int MULT16_32_Q15_SUB(int a, int b, int c, int d) {
47 int m;
48 asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));
49 asm volatile("msub $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));
50 asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));
51 return m;
52}
53
54#undef MULT16_16_Q15_ADD
55static inline int MULT16_16_Q15_ADD(int a, int b, int c, int d) {
56 int m;
57 asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));
58 asm volatile("madd $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));
59 asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));
60 return m;
61}
62
63#undef MULT16_16_Q15_SUB
64static inline int MULT16_16_Q15_SUB(int a, int b, int c, int d) {
65 int m;
66 asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));
67 asm volatile("msub $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));
68 asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));
69 return m;
70}
71
72
73#undef MULT16_32_Q16
74static inline int MULT16_32_Q16(int a, int b)
75{
76 int c;
77 asm volatile("MULT $ac1,%0, %1" : : "r" (a), "r" (b));
78 asm volatile("EXTR.W %0,$ac1, %1" : "=r" (c): "i" (16));
79 return c;
80}
81
82#undef MULT16_32_P16
83static inline int MULT16_32_P16(int a, int b)
84{
85 int c;
86 asm volatile("MULT $ac1, %0, %1" : : "r" (a), "r" (b));
87 asm volatile("EXTR_R.W %0,$ac1, %1" : "=r" (c): "i" (16));
88 return c;
89}
90
91#undef MULT16_32_Q15
92static inline int MULT16_32_Q15(int a, int b)
93{
94 int c;
95 asm volatile("MULT $ac1, %0, %1" : : "r" (a), "r" (b));
96 asm volatile("EXTR.W %0,$ac1, %1" : "=r" (c): "i" (15));
97 return c;
98}
99
100#undef MULT32_32_Q31
101static inline int MULT32_32_Q31(int a, int b)
102{
103 int r;
104 asm volatile("MULT $ac1, %0, %1" : : "r" (a), "r" (b));
105 asm volatile("EXTR.W %0,$ac1, %1" : "=r" (r): "i" (31));
106 return r;
107}
108
109#undef PSHR32
110static inline int PSHR32(int a, int shift)
111{
112 int r;
113 asm volatile ("SHRAV_R.W %0, %1, %2" :"=r" (r): "r" (a), "r" (shift));
114 return r;
115}
116
117#undef MULT16_16_P15
118static inline int MULT16_16_P15(int a, int b)
119{
120 int r;
121 asm volatile ("mul %0, %1, %2" :"=r" (r): "r" (a), "r" (b));
122 asm volatile ("SHRA_R.W %0, %1, %2" : "+r" (r): "0" (r), "i"(15));
123 return r;
124}
125
126#endif /* CELT_FIXED_GENERIC_MIPSR1_H */
diff --git a/lib/rbcodec/codecs/libopus/celt/mips/kiss_fft_mipsr1.h b/lib/rbcodec/codecs/libopus/celt/mips/kiss_fft_mipsr1.h
new file mode 100644
index 0000000000..400ca4de9c
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/mips/kiss_fft_mipsr1.h
@@ -0,0 +1,167 @@
1/*Copyright (c) 2013, Xiph.Org Foundation and contributors.
2
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright notice,
11 this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
18 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 POSSIBILITY OF SUCH DAMAGE.*/
25
26#ifndef KISS_FFT_MIPSR1_H
27#define KISS_FFT_MIPSR1_H
28
29#if !defined(KISS_FFT_GUTS_H)
30#error "This file should only be included from _kiss_fft_guts.h"
31#endif
32
33#ifdef FIXED_POINT
34
35#define S_MUL_ADD(a, b, c, d) (S_MUL(a,b)+S_MUL(c,d))
36#define S_MUL_SUB(a, b, c, d) (S_MUL(a,b)-S_MUL(c,d))
37
38#undef S_MUL_ADD
39static inline int S_MUL_ADD(int a, int b, int c, int d) {
40 int m;
41 asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));
42 asm volatile("madd $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));
43 asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));
44 return m;
45}
46
47#undef S_MUL_SUB
48static inline int S_MUL_SUB(int a, int b, int c, int d) {
49 int m;
50 asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));
51 asm volatile("msub $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));
52 asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));
53 return m;
54}
55
56#undef C_MUL
57# define C_MUL(m,a,b) (m=C_MUL_fun(a,b))
58static inline kiss_fft_cpx C_MUL_fun(kiss_fft_cpx a, kiss_twiddle_cpx b) {
59 kiss_fft_cpx m;
60
61 asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.r));
62 asm volatile("msub $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.i));
63 asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.r): "i" (15));
64 asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.i));
65 asm volatile("madd $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.r));
66 asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.i): "i" (15));
67
68 return m;
69}
70#undef C_MULC
71# define C_MULC(m,a,b) (m=C_MULC_fun(a,b))
72static inline kiss_fft_cpx C_MULC_fun(kiss_fft_cpx a, kiss_twiddle_cpx b) {
73 kiss_fft_cpx m;
74
75 asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.r));
76 asm volatile("madd $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.i));
77 asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.r): "i" (15));
78 asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.r));
79 asm volatile("msub $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.i));
80 asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.i): "i" (15));
81
82 return m;
83}
84
85#endif /* FIXED_POINT */
86
87#define OVERRIDE_kf_bfly5
88static void kf_bfly5(
89 kiss_fft_cpx * Fout,
90 const size_t fstride,
91 const kiss_fft_state *st,
92 int m,
93 int N,
94 int mm
95 )
96{
97 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
98 int i, u;
99 kiss_fft_cpx scratch[13];
100
101 const kiss_twiddle_cpx *tw;
102 kiss_twiddle_cpx ya,yb;
103 kiss_fft_cpx * Fout_beg = Fout;
104
105#ifdef FIXED_POINT
106 ya.r = 10126;
107 ya.i = -31164;
108 yb.r = -26510;
109 yb.i = -19261;
110#else
111 ya = st->twiddles[fstride*m];
112 yb = st->twiddles[fstride*2*m];
113#endif
114
115 tw=st->twiddles;
116
117 for (i=0;i<N;i++)
118 {
119 Fout = Fout_beg + i*mm;
120 Fout0=Fout;
121 Fout1=Fout0+m;
122 Fout2=Fout0+2*m;
123 Fout3=Fout0+3*m;
124 Fout4=Fout0+4*m;
125
126 /* For non-custom modes, m is guaranteed to be a multiple of 4. */
127 for ( u=0; u<m; ++u ) {
128 scratch[0] = *Fout0;
129
130
131 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
132 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
133 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
134 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
135
136 C_ADD( scratch[7],scratch[1],scratch[4]);
137 C_SUB( scratch[10],scratch[1],scratch[4]);
138 C_ADD( scratch[8],scratch[2],scratch[3]);
139 C_SUB( scratch[9],scratch[2],scratch[3]);
140
141 Fout0->r += scratch[7].r + scratch[8].r;
142 Fout0->i += scratch[7].i + scratch[8].i;
143 scratch[5].r = scratch[0].r + S_MUL_ADD(scratch[7].r,ya.r,scratch[8].r,yb.r);
144 scratch[5].i = scratch[0].i + S_MUL_ADD(scratch[7].i,ya.r,scratch[8].i,yb.r);
145
146 scratch[6].r = S_MUL_ADD(scratch[10].i,ya.i,scratch[9].i,yb.i);
147 scratch[6].i = -S_MUL_ADD(scratch[10].r,ya.i,scratch[9].r,yb.i);
148
149 C_SUB(*Fout1,scratch[5],scratch[6]);
150 C_ADD(*Fout4,scratch[5],scratch[6]);
151
152 scratch[11].r = scratch[0].r + S_MUL_ADD(scratch[7].r,yb.r,scratch[8].r,ya.r);
153 scratch[11].i = scratch[0].i + S_MUL_ADD(scratch[7].i,yb.r,scratch[8].i,ya.r);
154
155 scratch[12].r = S_MUL_SUB(scratch[9].i,ya.i,scratch[10].i,yb.i);
156 scratch[12].i = S_MUL_SUB(scratch[10].r,yb.i,scratch[9].r,ya.i);
157
158 C_ADD(*Fout2,scratch[11],scratch[12]);
159 C_SUB(*Fout3,scratch[11],scratch[12]);
160
161 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
162 }
163 }
164}
165
166
167#endif /* KISS_FFT_MIPSR1_H */
diff --git a/lib/rbcodec/codecs/libopus/celt/mips/mdct_mipsr1.h b/lib/rbcodec/codecs/libopus/celt/mips/mdct_mipsr1.h
new file mode 100644
index 0000000000..2934dab776
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/mips/mdct_mipsr1.h
@@ -0,0 +1,288 @@
1/* Copyright (c) 2007-2008 CSIRO
2 Copyright (c) 2007-2008 Xiph.Org Foundation
3 Written by Jean-Marc Valin */
4/*
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
7 are met:
8
9 - Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
11
12 - Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
20 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27*/
28
29/* This is a simple MDCT implementation that uses a N/4 complex FFT
30 to do most of the work. It should be relatively straightforward to
31 plug in pretty much and FFT here.
32
33 This replaces the Vorbis FFT (and uses the exact same API), which
34 was a bit too messy and that was ending up duplicating code
35 (might as well use the same FFT everywhere).
36
37 The algorithm is similar to (and inspired from) Fabrice Bellard's
38 MDCT implementation in FFMPEG, but has differences in signs, ordering
39 and scaling in many places.
40*/
41#ifndef __MDCT_MIPSR1_H__
42#define __MDCT_MIPSR1_H__
43
44#ifndef SKIP_CONFIG_H
45#ifdef HAVE_CONFIG_H
46#include "config.h"
47#endif
48#endif
49
50#include "mdct.h"
51#include "kiss_fft.h"
52#include "_kiss_fft_guts.h"
53#include <math.h>
54#include "os_support.h"
55#include "mathops.h"
56#include "stack_alloc.h"
57
58/* Forward MDCT trashes the input array */
59#define OVERRIDE_clt_mdct_forward
60void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
61 const opus_val16 *window, int overlap, int shift, int stride, int arch)
62{
63 int i;
64 int N, N2, N4;
65 VARDECL(kiss_fft_scalar, f);
66 VARDECL(kiss_fft_cpx, f2);
67 const kiss_fft_state *st = l->kfft[shift];
68 const kiss_twiddle_scalar *trig;
69 opus_val16 scale;
70#ifdef FIXED_POINT
71 /* Allows us to scale with MULT16_32_Q16(), which is faster than
72 MULT16_32_Q15() on ARM. */
73 int scale_shift = st->scale_shift-1;
74#endif
75
76 (void)arch;
77
78 SAVE_STACK;
79 scale = st->scale;
80
81 N = l->n;
82 trig = l->trig;
83 for (i=0;i<shift;i++)
84 {
85 N >>= 1;
86 trig += N;
87 }
88 N2 = N>>1;
89 N4 = N>>2;
90
91 ALLOC(f, N2, kiss_fft_scalar);
92 ALLOC(f2, N4, kiss_fft_cpx);
93
94 /* Consider the input to be composed of four blocks: [a, b, c, d] */
95 /* Window, shuffle, fold */
96 {
97 /* Temp pointers to make it really clear to the compiler what we're doing */
98 const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1);
99 const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1);
100 kiss_fft_scalar * OPUS_RESTRICT yp = f;
101 const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1);
102 const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;
103 for(i=0;i<((overlap+3)>>2);i++)
104 {
105 /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
106 *yp++ = S_MUL_ADD(*wp2, xp1[N2],*wp1,*xp2);
107 *yp++ = S_MUL_SUB(*wp1, *xp1,*wp2, xp2[-N2]);
108 xp1+=2;
109 xp2-=2;
110 wp1+=2;
111 wp2-=2;
112 }
113 wp1 = window;
114 wp2 = window+overlap-1;
115 for(;i<N4-((overlap+3)>>2);i++)
116 {
117 /* Real part arranged as a-bR, Imag part arranged as -c-dR */
118 *yp++ = *xp2;
119 *yp++ = *xp1;
120 xp1+=2;
121 xp2-=2;
122 }
123 for(;i<N4;i++)
124 {
125 /* Real part arranged as a-bR, Imag part arranged as -c-dR */
126 *yp++ = S_MUL_SUB(*wp2, *xp2, *wp1, xp1[-N2]);
127 *yp++ = S_MUL_ADD(*wp2, *xp1, *wp1, xp2[N2]);
128 xp1+=2;
129 xp2-=2;
130 wp1+=2;
131 wp2-=2;
132 }
133 }
134 /* Pre-rotation */
135 {
136 kiss_fft_scalar * OPUS_RESTRICT yp = f;
137 const kiss_twiddle_scalar *t = &trig[0];
138 for(i=0;i<N4;i++)
139 {
140 kiss_fft_cpx yc;
141 kiss_twiddle_scalar t0, t1;
142 kiss_fft_scalar re, im, yr, yi;
143 t0 = t[i];
144 t1 = t[N4+i];
145 re = *yp++;
146 im = *yp++;
147
148 yr = S_MUL_SUB(re,t0,im,t1);
149 yi = S_MUL_ADD(im,t0,re,t1);
150
151 yc.r = yr;
152 yc.i = yi;
153 yc.r = PSHR32(MULT16_32_Q16(scale, yc.r), scale_shift);
154 yc.i = PSHR32(MULT16_32_Q16(scale, yc.i), scale_shift);
155 f2[st->bitrev[i]] = yc;
156 }
157 }
158
159 /* N/4 complex FFT, does not downscale anymore */
160 opus_fft_impl(st, f2);
161
162 /* Post-rotate */
163 {
164 /* Temp pointers to make it really clear to the compiler what we're doing */
165 const kiss_fft_cpx * OPUS_RESTRICT fp = f2;
166 kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
167 kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
168 const kiss_twiddle_scalar *t = &trig[0];
169 /* Temp pointers to make it really clear to the compiler what we're doing */
170 for(i=0;i<N4;i++)
171 {
172 kiss_fft_scalar yr, yi;
173 yr = S_MUL_SUB(fp->i,t[N4+i] , fp->r,t[i]);
174 yi = S_MUL_ADD(fp->r,t[N4+i] ,fp->i,t[i]);
175 *yp1 = yr;
176 *yp2 = yi;
177 fp++;
178 yp1 += 2*stride;
179 yp2 -= 2*stride;
180 }
181 }
182 RESTORE_STACK;
183}
184
185#define OVERRIDE_clt_mdct_backward
186void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
187 const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride, int arch)
188{
189 int i;
190 int N, N2, N4;
191 const kiss_twiddle_scalar *trig;
192
193 (void)arch;
194
195 N = l->n;
196 trig = l->trig;
197 for (i=0;i<shift;i++)
198 {
199 N >>= 1;
200 trig += N;
201 }
202 N2 = N>>1;
203 N4 = N>>2;
204
205 /* Pre-rotate */
206 {
207 /* Temp pointers to make it really clear to the compiler what we're doing */
208 const kiss_fft_scalar * OPUS_RESTRICT xp1 = in;
209 const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1);
210 kiss_fft_scalar * OPUS_RESTRICT yp = out+(overlap>>1);
211 const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0];
212 const opus_int16 * OPUS_RESTRICT bitrev = l->kfft[shift]->bitrev;
213 for(i=0;i<N4;i++)
214 {
215 int rev;
216 kiss_fft_scalar yr, yi;
217 rev = *bitrev++;
218 yr = S_MUL_ADD(*xp2, t[i] , *xp1, t[N4+i]);
219 yi = S_MUL_SUB(*xp1, t[i] , *xp2, t[N4+i]);
220 /* We swap real and imag because we use an FFT instead of an IFFT. */
221 yp[2*rev+1] = yr;
222 yp[2*rev] = yi;
223 /* Storing the pre-rotation directly in the bitrev order. */
224 xp1+=2*stride;
225 xp2-=2*stride;
226 }
227 }
228
229 opus_fft_impl(l->kfft[shift], (kiss_fft_cpx*)(out+(overlap>>1)));
230
231 /* Post-rotate and de-shuffle from both ends of the buffer at once to make
232 it in-place. */
233 {
234 kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1);
235 kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2;
236 const kiss_twiddle_scalar *t = &trig[0];
237 /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the
238 middle pair will be computed twice. */
239 for(i=0;i<(N4+1)>>1;i++)
240 {
241 kiss_fft_scalar re, im, yr, yi;
242 kiss_twiddle_scalar t0, t1;
243 /* We swap real and imag because we're using an FFT instead of an IFFT. */
244 re = yp0[1];
245 im = yp0[0];
246 t0 = t[i];
247 t1 = t[N4+i];
248 /* We'd scale up by 2 here, but instead it's done when mixing the windows */
249 yr = S_MUL_ADD(re,t0 , im,t1);
250 yi = S_MUL_SUB(re,t1 , im,t0);
251 /* We swap real and imag because we're using an FFT instead of an IFFT. */
252 re = yp1[1];
253 im = yp1[0];
254 yp0[0] = yr;
255 yp1[1] = yi;
256
257 t0 = t[(N4-i-1)];
258 t1 = t[(N2-i-1)];
259 /* We'd scale up by 2 here, but instead it's done when mixing the windows */
260 yr = S_MUL_ADD(re,t0,im,t1);
261 yi = S_MUL_SUB(re,t1,im,t0);
262 yp1[0] = yr;
263 yp0[1] = yi;
264 yp0 += 2;
265 yp1 -= 2;
266 }
267 }
268
269 /* Mirror on both sides for TDAC */
270 {
271 kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1;
272 kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
273 const opus_val16 * OPUS_RESTRICT wp1 = window;
274 const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;
275
276 for(i = 0; i < overlap/2; i++)
277 {
278 kiss_fft_scalar x1, x2;
279 x1 = *xp1;
280 x2 = *yp1;
281 *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1);
282 *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1);
283 wp1++;
284 wp2--;
285 }
286 }
287}
288#endif /* __MDCT_MIPSR1_H__ */
diff --git a/lib/rbcodec/codecs/libopus/celt/mips/pitch_mipsr1.h b/lib/rbcodec/codecs/libopus/celt/mips/pitch_mipsr1.h
new file mode 100644
index 0000000000..a9500aff58
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/mips/pitch_mipsr1.h
@@ -0,0 +1,161 @@
1/* Copyright (c) 2007-2008 CSIRO
2 Copyright (c) 2007-2009 Xiph.Org Foundation
3 Written by Jean-Marc Valin */
4/**
5 @file pitch.h
6 @brief Pitch analysis
7 */
8
9/*
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions
12 are met:
13
14 - Redistributions of source code must retain the above copyright
15 notice, this list of conditions and the following disclaimer.
16
17 - Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
25 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32*/
33
34#ifndef PITCH_MIPSR1_H
35#define PITCH_MIPSR1_H
36
37#define OVERRIDE_DUAL_INNER_PROD
38static inline void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
39 int N, opus_val32 *xy1, opus_val32 *xy2, int arch)
40{
41 int j;
42 opus_val32 xy01=0;
43 opus_val32 xy02=0;
44
45 (void)arch;
46
47 asm volatile("MULT $ac1, $0, $0");
48 asm volatile("MULT $ac2, $0, $0");
49 /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */
50 for (j=0;j<N;j++)
51 {
52 asm volatile("MADD $ac1, %0, %1" : : "r" ((int)x[j]), "r" ((int)y01[j]));
53 asm volatile("MADD $ac2, %0, %1" : : "r" ((int)x[j]), "r" ((int)y02[j]));
54 ++j;
55 asm volatile("MADD $ac1, %0, %1" : : "r" ((int)x[j]), "r" ((int)y01[j]));
56 asm volatile("MADD $ac2, %0, %1" : : "r" ((int)x[j]), "r" ((int)y02[j]));
57 }
58 asm volatile ("mflo %0, $ac1": "=r"(xy01));
59 asm volatile ("mflo %0, $ac2": "=r"(xy02));
60 *xy1 = xy01;
61 *xy2 = xy02;
62}
63
64static inline void xcorr_kernel_mips(const opus_val16 * x,
65 const opus_val16 * y, opus_val32 sum[4], int len)
66{
67 int j;
68 opus_val16 y_0, y_1, y_2, y_3;
69
70 opus_int64 sum_0, sum_1, sum_2, sum_3;
71 sum_0 = (opus_int64)sum[0];
72 sum_1 = (opus_int64)sum[1];
73 sum_2 = (opus_int64)sum[2];
74 sum_3 = (opus_int64)sum[3];
75
76 y_3=0; /* gcc doesn't realize that y_3 can't be used uninitialized */
77 y_0=*y++;
78 y_1=*y++;
79 y_2=*y++;
80 for (j=0;j<len-3;j+=4)
81 {
82 opus_val16 tmp;
83 tmp = *x++;
84 y_3=*y++;
85
86 sum_0 = __builtin_mips_madd( sum_0, tmp, y_0);
87 sum_1 = __builtin_mips_madd( sum_1, tmp, y_1);
88 sum_2 = __builtin_mips_madd( sum_2, tmp, y_2);
89 sum_3 = __builtin_mips_madd( sum_3, tmp, y_3);
90
91 tmp=*x++;
92 y_0=*y++;
93
94 sum_0 = __builtin_mips_madd( sum_0, tmp, y_1 );
95 sum_1 = __builtin_mips_madd( sum_1, tmp, y_2 );
96 sum_2 = __builtin_mips_madd( sum_2, tmp, y_3);
97 sum_3 = __builtin_mips_madd( sum_3, tmp, y_0);
98
99 tmp=*x++;
100 y_1=*y++;
101
102 sum_0 = __builtin_mips_madd( sum_0, tmp, y_2 );
103 sum_1 = __builtin_mips_madd( sum_1, tmp, y_3 );
104 sum_2 = __builtin_mips_madd( sum_2, tmp, y_0);
105 sum_3 = __builtin_mips_madd( sum_3, tmp, y_1);
106
107
108 tmp=*x++;
109 y_2=*y++;
110
111 sum_0 = __builtin_mips_madd( sum_0, tmp, y_3 );
112 sum_1 = __builtin_mips_madd( sum_1, tmp, y_0 );
113 sum_2 = __builtin_mips_madd( sum_2, tmp, y_1);
114 sum_3 = __builtin_mips_madd( sum_3, tmp, y_2);
115
116 }
117 if (j++<len)
118 {
119 opus_val16 tmp = *x++;
120 y_3=*y++;
121
122 sum_0 = __builtin_mips_madd( sum_0, tmp, y_0 );
123 sum_1 = __builtin_mips_madd( sum_1, tmp, y_1 );
124 sum_2 = __builtin_mips_madd( sum_2, tmp, y_2);
125 sum_3 = __builtin_mips_madd( sum_3, tmp, y_3);
126 }
127
128 if (j++<len)
129 {
130 opus_val16 tmp=*x++;
131 y_0=*y++;
132
133 sum_0 = __builtin_mips_madd( sum_0, tmp, y_1 );
134 sum_1 = __builtin_mips_madd( sum_1, tmp, y_2 );
135 sum_2 = __builtin_mips_madd( sum_2, tmp, y_3);
136 sum_3 = __builtin_mips_madd( sum_3, tmp, y_0);
137 }
138
139 if (j<len)
140 {
141 opus_val16 tmp=*x++;
142 y_1=*y++;
143
144 sum_0 = __builtin_mips_madd( sum_0, tmp, y_2 );
145 sum_1 = __builtin_mips_madd( sum_1, tmp, y_3 );
146 sum_2 = __builtin_mips_madd( sum_2, tmp, y_0);
147 sum_3 = __builtin_mips_madd( sum_3, tmp, y_1);
148
149 }
150
151 sum[0] = (opus_val32)sum_0;
152 sum[1] = (opus_val32)sum_1;
153 sum[2] = (opus_val32)sum_2;
154 sum[3] = (opus_val32)sum_3;
155}
156
157#define OVERRIDE_XCORR_KERNEL
158#define xcorr_kernel(x, y, sum, len, arch) \
159 ((void)(arch), xcorr_kernel_mips(x, y, sum, len))
160
161#endif /* PITCH_MIPSR1_H */
diff --git a/lib/rbcodec/codecs/libopus/celt/mips/vq_mipsr1.h b/lib/rbcodec/codecs/libopus/celt/mips/vq_mipsr1.h
new file mode 100644
index 0000000000..fd18eab7a9
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/mips/vq_mipsr1.h
@@ -0,0 +1,122 @@
1/* Copyright (c) 2007-2008 CSIRO
2 Copyright (c) 2007-2009 Xiph.Org Foundation
3 Written by Jean-Marc Valin */
4/*
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
7 are met:
8
9 - Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
11
12 - Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
20 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27*/
28
29#ifndef __VQ_MIPSR1_H__
30#define __VQ_MIPSR1_H__
31
32#ifdef HAVE_CONFIG_H
33#include "config.h"
34#endif
35
36#include "mathops.h"
37#include "arch.h"
38
39static void renormalise_vector_mips(celt_norm *X, int N, opus_val16 gain, int arch);
40
41#define OVERRIDE_vq_exp_rotation1
42static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)
43{
44 int i;
45 opus_val16 ms;
46 celt_norm *Xptr;
47 Xptr = X;
48 ms = NEG16(s);
49 for (i=0;i<len-stride;i++)
50 {
51 celt_norm x1, x2;
52 x1 = Xptr[0];
53 x2 = Xptr[stride];
54 Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15));
55 *Xptr++ = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
56 }
57 Xptr = &X[len-2*stride-1];
58 for (i=len-2*stride-1;i>=0;i--)
59 {
60 celt_norm x1, x2;
61 x1 = Xptr[0];
62 x2 = Xptr[stride];
63 Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15));
64 *Xptr-- = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
65 }
66}
67
68#define OVERRIDE_renormalise_vector
69
70#define renormalise_vector(X, N, gain, arch) \
71 (renormalise_vector_mips(X, N, gain, arch))
72
73void renormalise_vector_mips(celt_norm *X, int N, opus_val16 gain, int arch)
74{
75 int i;
76#ifdef FIXED_POINT
77 int k;
78#endif
79 opus_val32 E = EPSILON;
80 opus_val16 g;
81 opus_val32 t;
82 celt_norm *xptr = X;
83 int X0, X1;
84
85 (void)arch;
86
87 asm volatile("mult $ac1, $0, $0");
88 asm volatile("MTLO %0, $ac1" : :"r" (E));
89 /*if(N %4)
90 printf("error");*/
91 for (i=0;i<N-2;i+=2)
92 {
93 X0 = (int)*xptr++;
94 asm volatile("MADD $ac1, %0, %1" : : "r" (X0), "r" (X0));
95
96 X1 = (int)*xptr++;
97 asm volatile("MADD $ac1, %0, %1" : : "r" (X1), "r" (X1));
98 }
99
100 for (;i<N;i++)
101 {
102 X0 = (int)*xptr++;
103 asm volatile("MADD $ac1, %0, %1" : : "r" (X0), "r" (X0));
104 }
105
106 asm volatile("MFLO %0, $ac1" : "=r" (E));
107#ifdef FIXED_POINT
108 k = celt_ilog2(E)>>1;
109#endif
110 t = VSHR32(E, 2*(k-7));
111 g = MULT16_16_P15(celt_rsqrt_norm(t),gain);
112
113 xptr = X;
114 for (i=0;i<N;i++)
115 {
116 *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+1));
117 xptr++;
118 }
119 /*return celt_sqrt(E);*/
120}
121
122#endif /* __VQ_MIPSR1_H__ */
diff --git a/lib/rbcodec/codecs/libopus/celt/modes.c b/lib/rbcodec/codecs/libopus/celt/modes.c
index 42e68e1cb7..390c5e8aeb 100644
--- a/lib/rbcodec/codecs/libopus/celt/modes.c
+++ b/lib/rbcodec/codecs/libopus/celt/modes.c
@@ -37,6 +37,7 @@
37#include "os_support.h" 37#include "os_support.h"
38#include "stack_alloc.h" 38#include "stack_alloc.h"
39#include "quant_bands.h" 39#include "quant_bands.h"
40#include "cpu_support.h"
40 41
41static const opus_int16 eband5ms[] = { 42static const opus_int16 eband5ms[] = {
42/*0 200 400 600 800 1k 1.2 1.4 1.6 2k 2.4 2.8 3.2 4k 4.8 5.6 6.8 8k 9.6 12k 15.6 */ 43/*0 200 400 600 800 1k 1.2 1.4 1.6 2k 2.4 2.8 3.2 4k 4.8 5.6 6.8 8k 9.6 12k 15.6 */
@@ -229,6 +230,7 @@ CELTMode *opus_custom_mode_create(opus_int32 Fs, int frame_size, int *error)
229 opus_val16 *window; 230 opus_val16 *window;
230 opus_int16 *logN; 231 opus_int16 *logN;
231 int LM; 232 int LM;
233 int arch = opus_select_arch();
232 ALLOC_STACK; 234 ALLOC_STACK;
233#if !defined(VAR_ARRAYS) && !defined(USE_ALLOCA) 235#if !defined(VAR_ARRAYS) && !defined(USE_ALLOCA)
234 if (global_stack==NULL) 236 if (global_stack==NULL)
@@ -389,7 +391,7 @@ CELTMode *opus_custom_mode_create(opus_int32 Fs, int frame_size, int *error)
389 compute_pulse_cache(mode, mode->maxLM); 391 compute_pulse_cache(mode, mode->maxLM);
390 392
391 if (clt_mdct_init(&mode->mdct, 2*mode->shortMdctSize*mode->nbShortMdcts, 393 if (clt_mdct_init(&mode->mdct, 2*mode->shortMdctSize*mode->nbShortMdcts,
392 mode->maxLM) == 0) 394 mode->maxLM, arch) == 0)
393 goto failure; 395 goto failure;
394 396
395 if (error) 397 if (error)
@@ -408,6 +410,8 @@ failure:
408#ifdef CUSTOM_MODES 410#ifdef CUSTOM_MODES
409void opus_custom_mode_destroy(CELTMode *mode) 411void opus_custom_mode_destroy(CELTMode *mode)
410{ 412{
413 int arch = opus_select_arch();
414
411 if (mode == NULL) 415 if (mode == NULL)
412 return; 416 return;
413#ifndef CUSTOM_MODES_ONLY 417#ifndef CUSTOM_MODES_ONLY
@@ -423,7 +427,7 @@ void opus_custom_mode_destroy(CELTMode *mode)
423 } 427 }
424#endif /* CUSTOM_MODES_ONLY */ 428#endif /* CUSTOM_MODES_ONLY */
425 opus_free((opus_int16*)mode->eBands); 429 opus_free((opus_int16*)mode->eBands);
426 opus_free((opus_int16*)mode->allocVectors); 430 opus_free((unsigned char*)mode->allocVectors);
427 431
428 opus_free((opus_val16*)mode->window); 432 opus_free((opus_val16*)mode->window);
429 opus_free((opus_int16*)mode->logN); 433 opus_free((opus_int16*)mode->logN);
@@ -431,7 +435,7 @@ void opus_custom_mode_destroy(CELTMode *mode)
431 opus_free((opus_int16*)mode->cache.index); 435 opus_free((opus_int16*)mode->cache.index);
432 opus_free((unsigned char*)mode->cache.bits); 436 opus_free((unsigned char*)mode->cache.bits);
433 opus_free((unsigned char*)mode->cache.caps); 437 opus_free((unsigned char*)mode->cache.caps);
434 clt_mdct_clear(&mode->mdct); 438 clt_mdct_clear(&mode->mdct, arch);
435 439
436 opus_free((CELTMode *)mode); 440 opus_free((CELTMode *)mode);
437} 441}
diff --git a/lib/rbcodec/codecs/libopus/celt/opus_custom_demo.c b/lib/rbcodec/codecs/libopus/celt/opus_custom_demo.c
new file mode 100644
index 0000000000..ae41c0de5a
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/opus_custom_demo.c
@@ -0,0 +1,210 @@
1/* Copyright (c) 2007-2008 CSIRO
2 Copyright (c) 2007-2009 Xiph.Org Foundation
3 Written by Jean-Marc Valin */
4/*
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
7 are met:
8
9 - Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
11
12 - Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
20 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27*/
28
29#ifdef HAVE_CONFIG_H
30#include "config.h"
31#endif
32
33#include "opus_custom.h"
34#include "arch.h"
35#include <stdio.h>
36#include <stdlib.h>
37#include <math.h>
38#include <string.h>
39
40#define MAX_PACKET 1275
41
42int main(int argc, char *argv[])
43{
44 int err;
45 char *inFile, *outFile;
46 FILE *fin, *fout;
47 OpusCustomMode *mode=NULL;
48 OpusCustomEncoder *enc;
49 OpusCustomDecoder *dec;
50 int len;
51 opus_int32 frame_size, channels, rate;
52 int bytes_per_packet;
53 unsigned char data[MAX_PACKET];
54 int complexity;
55#if !(defined (FIXED_POINT) && !defined(CUSTOM_MODES)) && defined(RESYNTH)
56 int i;
57 double rmsd = 0;
58#endif
59 int count = 0;
60 opus_int32 skip;
61 opus_int16 *in, *out;
62 if (argc != 9 && argc != 8 && argc != 7)
63 {
64 fprintf (stderr, "Usage: test_opus_custom <rate> <channels> <frame size> "
65 " <bytes per packet> [<complexity> [packet loss rate]] "
66 "<input> <output>\n");
67 return 1;
68 }
69
70 rate = (opus_int32)atol(argv[1]);
71 channels = atoi(argv[2]);
72 frame_size = atoi(argv[3]);
73 mode = opus_custom_mode_create(rate, frame_size, NULL);
74 if (mode == NULL)
75 {
76 fprintf(stderr, "failed to create a mode\n");
77 return 1;
78 }
79
80 bytes_per_packet = atoi(argv[4]);
81 if (bytes_per_packet < 0 || bytes_per_packet > MAX_PACKET)
82 {
83 fprintf (stderr, "bytes per packet must be between 0 and %d\n",
84 MAX_PACKET);
85 return 1;
86 }
87
88 inFile = argv[argc-2];
89 fin = fopen(inFile, "rb");
90 if (!fin)
91 {
92 fprintf (stderr, "Could not open input file %s\n", argv[argc-2]);
93 return 1;
94 }
95 outFile = argv[argc-1];
96 fout = fopen(outFile, "wb+");
97 if (!fout)
98 {
99 fprintf (stderr, "Could not open output file %s\n", argv[argc-1]);
100 fclose(fin);
101 return 1;
102 }
103
104 enc = opus_custom_encoder_create(mode, channels, &err);
105 if (err != 0)
106 {
107 fprintf(stderr, "Failed to create the encoder: %s\n", opus_strerror(err));
108 fclose(fin);
109 fclose(fout);
110 return 1;
111 }
112 dec = opus_custom_decoder_create(mode, channels, &err);
113 if (err != 0)
114 {
115 fprintf(stderr, "Failed to create the decoder: %s\n", opus_strerror(err));
116 fclose(fin);
117 fclose(fout);
118 return 1;
119 }
120 opus_custom_decoder_ctl(dec, OPUS_GET_LOOKAHEAD(&skip));
121
122 if (argc>7)
123 {
124 complexity=atoi(argv[5]);
125 opus_custom_encoder_ctl(enc,OPUS_SET_COMPLEXITY(complexity));
126 }
127
128 in = (opus_int16*)malloc(frame_size*channels*sizeof(opus_int16));
129 out = (opus_int16*)malloc(frame_size*channels*sizeof(opus_int16));
130
131 while (!feof(fin))
132 {
133 int ret;
134 err = fread(in, sizeof(short), frame_size*channels, fin);
135 if (feof(fin))
136 break;
137 len = opus_custom_encode(enc, in, frame_size, data, bytes_per_packet);
138 if (len <= 0)
139 fprintf (stderr, "opus_custom_encode() failed: %s\n", opus_strerror(len));
140
141 /* This is for simulating bit errors */
142#if 0
143 int errors = 0;
144 int eid = 0;
145 /* This simulates random bit error */
146 for (i=0;i<len*8;i++)
147 {
148 if (rand()%atoi(argv[8])==0)
149 {
150 if (i<64)
151 {
152 errors++;
153 eid = i;
154 }
155 data[i/8] ^= 1<<(7-(i%8));
156 }
157 }
158 if (errors == 1)
159 data[eid/8] ^= 1<<(7-(eid%8));
160 else if (errors%2 == 1)
161 data[rand()%8] ^= 1<<rand()%8;
162#endif
163
164#if 1 /* Set to zero to use the encoder's output instead */
165 /* This is to simulate packet loss */
166 if (argc==9 && rand()%1000<atoi(argv[argc-3]))
167 /*if (errors && (errors%2==0))*/
168 ret = opus_custom_decode(dec, NULL, len, out, frame_size);
169 else
170 ret = opus_custom_decode(dec, data, len, out, frame_size);
171 if (ret < 0)
172 fprintf(stderr, "opus_custom_decode() failed: %s\n", opus_strerror(ret));
173#else
174 for (i=0;i<ret*channels;i++)
175 out[i] = in[i];
176#endif
177#if !(defined (FIXED_POINT) && !defined(CUSTOM_MODES)) && defined(RESYNTH)
178 for (i=0;i<ret*channels;i++)
179 {
180 rmsd += (in[i]-out[i])*1.0*(in[i]-out[i]);
181 /*out[i] -= in[i];*/
182 }
183#endif
184 count++;
185 fwrite(out+skip*channels, sizeof(short), (ret-skip)*channels, fout);
186 skip = 0;
187 }
188 PRINT_MIPS(stderr);
189
190 opus_custom_encoder_destroy(enc);
191 opus_custom_decoder_destroy(dec);
192 fclose(fin);
193 fclose(fout);
194 opus_custom_mode_destroy(mode);
195 free(in);
196 free(out);
197#if !(defined (FIXED_POINT) && !defined(CUSTOM_MODES)) && defined(RESYNTH)
198 if (rmsd > 0)
199 {
200 rmsd = sqrt(rmsd/(1.0*frame_size*channels*count));
201 fprintf (stderr, "Error: encoder doesn't match decoder\n");
202 fprintf (stderr, "RMS mismatch is %f\n", rmsd);
203 return 1;
204 } else {
205 fprintf (stderr, "Encoder matches decoder!!\n");
206 }
207#endif
208 return 0;
209}
210
diff --git a/lib/rbcodec/codecs/libopus/celt/os_support.h b/lib/rbcodec/codecs/libopus/celt/os_support.h
index 5e47e3cff9..a2171971e9 100644
--- a/lib/rbcodec/codecs/libopus/celt/os_support.h
+++ b/lib/rbcodec/codecs/libopus/celt/os_support.h
@@ -67,18 +67,18 @@ static OPUS_INLINE void opus_free (void *ptr)
67} 67}
68#endif 68#endif
69 69
70/** Copy n bytes of memory from src to dst. The 0* term provides compile-time type checking */ 70/** Copy n elements from src to dst. The 0* term provides compile-time type checking */
71#ifndef OVERRIDE_OPUS_COPY 71#ifndef OVERRIDE_OPUS_COPY
72#define OPUS_COPY(dst, src, n) (memcpy((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) )) 72#define OPUS_COPY(dst, src, n) (memcpy((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) ))
73#endif 73#endif
74 74
75/** Copy n bytes of memory from src to dst, allowing overlapping regions. The 0* term 75/** Copy n elements from src to dst, allowing overlapping regions. The 0* term
76 provides compile-time type checking */ 76 provides compile-time type checking */
77#ifndef OVERRIDE_OPUS_MOVE 77#ifndef OVERRIDE_OPUS_MOVE
78#define OPUS_MOVE(dst, src, n) (memmove((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) )) 78#define OPUS_MOVE(dst, src, n) (memmove((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) ))
79#endif 79#endif
80 80
81/** Set n elements of dst to zero, starting at address s */ 81/** Set n elements of dst to zero */
82#ifndef OVERRIDE_OPUS_CLEAR 82#ifndef OVERRIDE_OPUS_CLEAR
83#define OPUS_CLEAR(dst, n) (memset((dst), 0, (n)*sizeof(*(dst)))) 83#define OPUS_CLEAR(dst, n) (memset((dst), 0, (n)*sizeof(*(dst))))
84#endif 84#endif
diff --git a/lib/rbcodec/codecs/libopus/celt/pitch.c b/lib/rbcodec/codecs/libopus/celt/pitch.c
index ee56a434f0..872582a48a 100644
--- a/lib/rbcodec/codecs/libopus/celt/pitch.c
+++ b/lib/rbcodec/codecs/libopus/celt/pitch.c
@@ -102,11 +102,9 @@ static void find_best_pitch(opus_val32 *xcorr, opus_val16 *y, int len,
102 } 102 }
103} 103}
104 104
105static void celt_fir5(const opus_val16 *x, 105static void celt_fir5(opus_val16 *x,
106 const opus_val16 *num, 106 const opus_val16 *num,
107 opus_val16 *y, 107 int N)
108 int N,
109 opus_val16 *mem)
110{ 108{
111 int i; 109 int i;
112 opus_val16 num0, num1, num2, num3, num4; 110 opus_val16 num0, num1, num2, num3, num4;
@@ -116,11 +114,11 @@ static void celt_fir5(const opus_val16 *x,
116 num2=num[2]; 114 num2=num[2];
117 num3=num[3]; 115 num3=num[3];
118 num4=num[4]; 116 num4=num[4];
119 mem0=mem[0]; 117 mem0=0;
120 mem1=mem[1]; 118 mem1=0;
121 mem2=mem[2]; 119 mem2=0;
122 mem3=mem[3]; 120 mem3=0;
123 mem4=mem[4]; 121 mem4=0;
124 for (i=0;i<N;i++) 122 for (i=0;i<N;i++)
125 { 123 {
126 opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT); 124 opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
@@ -134,13 +132,8 @@ static void celt_fir5(const opus_val16 *x,
134 mem2 = mem1; 132 mem2 = mem1;
135 mem1 = mem0; 133 mem1 = mem0;
136 mem0 = x[i]; 134 mem0 = x[i];
137 y[i] = ROUND16(sum, SIG_SHIFT); 135 x[i] = ROUND16(sum, SIG_SHIFT);
138 } 136 }
139 mem[0]=mem0;
140 mem[1]=mem1;
141 mem[2]=mem2;
142 mem[3]=mem3;
143 mem[4]=mem4;
144} 137}
145 138
146 139
@@ -150,7 +143,7 @@ void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x
150 int i; 143 int i;
151 opus_val32 ac[5]; 144 opus_val32 ac[5];
152 opus_val16 tmp=Q15ONE; 145 opus_val16 tmp=Q15ONE;
153 opus_val16 lpc[4], mem[5]={0,0,0,0,0}; 146 opus_val16 lpc[4];
154 opus_val16 lpc2[5]; 147 opus_val16 lpc2[5];
155 opus_val16 c1 = QCONST16(.8f,15); 148 opus_val16 c1 = QCONST16(.8f,15);
156#ifdef FIXED_POINT 149#ifdef FIXED_POINT
@@ -211,28 +204,33 @@ void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x
211 lpc2[2] = lpc[2] + MULT16_16_Q15(c1,lpc[1]); 204 lpc2[2] = lpc[2] + MULT16_16_Q15(c1,lpc[1]);
212 lpc2[3] = lpc[3] + MULT16_16_Q15(c1,lpc[2]); 205 lpc2[3] = lpc[3] + MULT16_16_Q15(c1,lpc[2]);
213 lpc2[4] = MULT16_16_Q15(c1,lpc[3]); 206 lpc2[4] = MULT16_16_Q15(c1,lpc[3]);
214 celt_fir5(x_lp, lpc2, x_lp, len>>1, mem); 207 celt_fir5(x_lp, lpc2, len>>1);
215} 208}
216 209
217#if 0 /* This is a simple version of the pitch correlation that should work 210/* Pure C implementation. */
218 well on DSPs like Blackfin and TI C5x/C6x */
219
220#ifdef FIXED_POINT 211#ifdef FIXED_POINT
221opus_val32 212opus_val32
222#else 213#else
223void 214void
224#endif 215#endif
225celt_pitch_xcorr(opus_val16 *x, opus_val16 *y, opus_val32 *xcorr, int len, int max_pitch) 216celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
217 opus_val32 *xcorr, int len, int max_pitch, int arch)
226{ 218{
219
220#if 0 /* This is a simple version of the pitch correlation that should work
221 well on DSPs like Blackfin and TI C5x/C6x */
227 int i, j; 222 int i, j;
228#ifdef FIXED_POINT 223#ifdef FIXED_POINT
229 opus_val32 maxcorr=1; 224 opus_val32 maxcorr=1;
230#endif 225#endif
226#if !defined(OVERRIDE_PITCH_XCORR)
227 (void)arch;
228#endif
231 for (i=0;i<max_pitch;i++) 229 for (i=0;i<max_pitch;i++)
232 { 230 {
233 opus_val32 sum = 0; 231 opus_val32 sum = 0;
234 for (j=0;j<len;j++) 232 for (j=0;j<len;j++)
235 sum = MAC16_16(sum, x[j],y[i+j]); 233 sum = MAC16_16(sum, _x[j], _y[i+j]);
236 xcorr[i] = sum; 234 xcorr[i] = sum;
237#ifdef FIXED_POINT 235#ifdef FIXED_POINT
238 maxcorr = MAX32(maxcorr, sum); 236 maxcorr = MAX32(maxcorr, sum);
@@ -241,17 +239,8 @@ celt_pitch_xcorr(opus_val16 *x, opus_val16 *y, opus_val32 *xcorr, int len, int m
241#ifdef FIXED_POINT 239#ifdef FIXED_POINT
242 return maxcorr; 240 return maxcorr;
243#endif 241#endif
244}
245 242
246#else /* Unrolled version of the pitch correlation -- runs faster on x86 and ARM */ 243#else /* Unrolled version of the pitch correlation -- runs faster on x86 and ARM */
247
248#ifdef FIXED_POINT
249opus_val32
250#else
251void
252#endif
253celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch)
254{
255 int i; 244 int i;
256 /*The EDSP version requires that max_pitch is at least 1, and that _x is 245 /*The EDSP version requires that max_pitch is at least 1, and that _x is
257 32-bit aligned. 246 32-bit aligned.
@@ -260,11 +249,11 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr
260 opus_val32 maxcorr=1; 249 opus_val32 maxcorr=1;
261#endif 250#endif
262 celt_assert(max_pitch>0); 251 celt_assert(max_pitch>0);
263 celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0); 252 celt_sig_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0);
264 for (i=0;i<max_pitch-3;i+=4) 253 for (i=0;i<max_pitch-3;i+=4)
265 { 254 {
266 opus_val32 sum[4]={0,0,0,0}; 255 opus_val32 sum[4]={0,0,0,0};
267 xcorr_kernel(_x, _y+i, sum, len); 256 xcorr_kernel(_x, _y+i, sum, len, arch);
268 xcorr[i]=sum[0]; 257 xcorr[i]=sum[0];
269 xcorr[i+1]=sum[1]; 258 xcorr[i+1]=sum[1];
270 xcorr[i+2]=sum[2]; 259 xcorr[i+2]=sum[2];
@@ -280,7 +269,7 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr
280 for (;i<max_pitch;i++) 269 for (;i<max_pitch;i++)
281 { 270 {
282 opus_val32 sum; 271 opus_val32 sum;
283 sum = celt_inner_prod(_x, _y+i, len); 272 sum = celt_inner_prod(_x, _y+i, len, arch);
284 xcorr[i] = sum; 273 xcorr[i] = sum;
285#ifdef FIXED_POINT 274#ifdef FIXED_POINT
286 maxcorr = MAX32(maxcorr, sum); 275 maxcorr = MAX32(maxcorr, sum);
@@ -289,9 +278,9 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr
289#ifdef FIXED_POINT 278#ifdef FIXED_POINT
290 return maxcorr; 279 return maxcorr;
291#endif 280#endif
281#endif
292} 282}
293 283
294#endif
295void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y, 284void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y,
296 int len, int max_pitch, int *pitch, int arch) 285 int len, int max_pitch, int *pitch, int arch)
297{ 286{
@@ -369,7 +358,7 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR
369 for (j=0;j<len>>1;j++) 358 for (j=0;j<len>>1;j++)
370 sum += SHR32(MULT16_16(x_lp[j],y[i+j]), shift); 359 sum += SHR32(MULT16_16(x_lp[j],y[i+j]), shift);
371#else 360#else
372 sum = celt_inner_prod(x_lp, y+i, len>>1); 361 sum = celt_inner_prod(x_lp, y+i, len>>1, arch);
373#endif 362#endif
374 xcorr[i] = MAX32(-1, sum); 363 xcorr[i] = MAX32(-1, sum);
375#ifdef FIXED_POINT 364#ifdef FIXED_POINT
@@ -403,10 +392,44 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR
403 RESTORE_STACK; 392 RESTORE_STACK;
404} 393}
405 394
406#if 0 395#ifdef FIXED_POINT
396static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy)
397{
398 opus_val32 x2y2;
399 int sx, sy, shift;
400 opus_val32 g;
401 opus_val16 den;
402 if (xy == 0 || xx == 0 || yy == 0)
403 return 0;
404 sx = celt_ilog2(xx)-14;
405 sy = celt_ilog2(yy)-14;
406 shift = sx + sy;
407 x2y2 = SHR32(MULT16_16(VSHR32(xx, sx), VSHR32(yy, sy)), 14);
408 if (shift & 1) {
409 if (x2y2 < 32768)
410 {
411 x2y2 <<= 1;
412 shift--;
413 } else {
414 x2y2 >>= 1;
415 shift++;
416 }
417 }
418 den = celt_rsqrt_norm(x2y2);
419 g = MULT16_32_Q15(den, xy);
420 g = VSHR32(g, (shift>>1)-1);
421 return EXTRACT16(MIN32(g, Q15ONE));
422}
423#else
424static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy)
425{
426 return xy/celt_sqrt(1+xx*yy);
427}
428#endif
429
407static const int second_check[16] = {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2}; 430static const int second_check[16] = {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2};
408opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, 431opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
409 int N, int *T0_, int prev_period, opus_val16 prev_gain) 432 int N, int *T0_, int prev_period, opus_val16 prev_gain, int arch)
410{ 433{
411 int k, i, T, T0; 434 int k, i, T, T0;
412 opus_val16 g, g0; 435 opus_val16 g, g0;
@@ -431,7 +454,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
431 454
432 T = T0 = *T0_; 455 T = T0 = *T0_;
433 ALLOC(yy_lookup, maxperiod+1, opus_val32); 456 ALLOC(yy_lookup, maxperiod+1, opus_val32);
434 dual_inner_prod(x, x, x-T0, N, &xx, &xy); 457 dual_inner_prod(x, x, x-T0, N, &xx, &xy, arch);
435 yy_lookup[0] = xx; 458 yy_lookup[0] = xx;
436 yy=xx; 459 yy=xx;
437 for (i=1;i<=maxperiod;i++) 460 for (i=1;i<=maxperiod;i++)
@@ -442,18 +465,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
442 yy = yy_lookup[T0]; 465 yy = yy_lookup[T0];
443 best_xy = xy; 466 best_xy = xy;
444 best_yy = yy; 467 best_yy = yy;
445#ifdef FIXED_POINT 468 g = g0 = compute_pitch_gain(xy, xx, yy);
446 {
447 opus_val32 x2y2;
448 int sh, t;
449 x2y2 = 1+HALF32(MULT32_32_Q31(xx,yy));
450 sh = celt_ilog2(x2y2)>>1;
451 t = VSHR32(x2y2, 2*(sh-7));
452 g = g0 = VSHR32(MULT16_32_Q15(celt_rsqrt_norm(t), xy),sh+1);
453 }
454#else
455 g = g0 = xy/celt_sqrt(1+xx*yy);
456#endif
457 /* Look for any pitch at T/k */ 469 /* Look for any pitch at T/k */
458 for (k=2;k<=15;k++) 470 for (k=2;k<=15;k++)
459 { 471 {
@@ -475,25 +487,14 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
475 { 487 {
476 T1b = celt_udiv(2*second_check[k]*T0+k, 2*k); 488 T1b = celt_udiv(2*second_check[k]*T0+k, 2*k);
477 } 489 }
478 dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2); 490 dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2, arch);
479 xy += xy2; 491 xy = HALF32(xy + xy2);
480 yy = yy_lookup[T1] + yy_lookup[T1b]; 492 yy = HALF32(yy_lookup[T1] + yy_lookup[T1b]);
481#ifdef FIXED_POINT 493 g1 = compute_pitch_gain(xy, xx, yy);
482 {
483 opus_val32 x2y2;
484 int sh, t;
485 x2y2 = 1+MULT32_32_Q31(xx,yy);
486 sh = celt_ilog2(x2y2)>>1;
487 t = VSHR32(x2y2, 2*(sh-7));
488 g1 = VSHR32(MULT16_32_Q15(celt_rsqrt_norm(t), xy),sh+1);
489 }
490#else
491 g1 = xy/celt_sqrt(1+2.f*xx*1.f*yy);
492#endif
493 if (abs(T1-prev_period)<=1) 494 if (abs(T1-prev_period)<=1)
494 cont = prev_gain; 495 cont = prev_gain;
495 else if (abs(T1-prev_period)<=2 && 5*k*k < T0) 496 else if (abs(T1-prev_period)<=2 && 5*k*k < T0)
496 cont = HALF32(prev_gain); 497 cont = HALF16(prev_gain);
497 else 498 else
498 cont = 0; 499 cont = 0;
499 thresh = MAX16(QCONST16(.3f,15), MULT16_16_Q15(QCONST16(.7f,15),g0)-cont); 500 thresh = MAX16(QCONST16(.3f,15), MULT16_16_Q15(QCONST16(.7f,15),g0)-cont);
@@ -518,7 +519,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
518 pg = SHR32(frac_div32(best_xy,best_yy+1),16); 519 pg = SHR32(frac_div32(best_xy,best_yy+1),16);
519 520
520 for (k=0;k<3;k++) 521 for (k=0;k<3;k++)
521 xcorr[k] = celt_inner_prod(x, x-(T+k-1), N); 522 xcorr[k] = celt_inner_prod(x, x-(T+k-1), N, arch);
522 if ((xcorr[2]-xcorr[0]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[0])) 523 if ((xcorr[2]-xcorr[0]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[0]))
523 offset = 1; 524 offset = 1;
524 else if ((xcorr[0]-xcorr[2]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[2])) 525 else if ((xcorr[0]-xcorr[2]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[2]))
@@ -534,4 +535,3 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
534 RESTORE_STACK; 535 RESTORE_STACK;
535 return pg; 536 return pg;
536} 537}
537#endif
diff --git a/lib/rbcodec/codecs/libopus/celt/pitch.h b/lib/rbcodec/codecs/libopus/celt/pitch.h
index 96dbc0d794..e425f56aea 100644
--- a/lib/rbcodec/codecs/libopus/celt/pitch.h
+++ b/lib/rbcodec/codecs/libopus/celt/pitch.h
@@ -37,7 +37,8 @@
37#include "modes.h" 37#include "modes.h"
38#include "cpu_support.h" 38#include "cpu_support.h"
39 39
40#if defined(__SSE__) && !defined(FIXED_POINT) 40#if (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)) \
41 || ((defined(OPUS_X86_MAY_HAVE_SSE4_1) || defined(OPUS_X86_MAY_HAVE_SSE2)) && defined(FIXED_POINT))
41#include "x86/pitch_sse.h" 42#include "x86/pitch_sse.h"
42#endif 43#endif
43 44
@@ -45,8 +46,8 @@
45#include "mips/pitch_mipsr1.h" 46#include "mips/pitch_mipsr1.h"
46#endif 47#endif
47 48
48#if defined(OPUS_ARM_ASM) && defined(FIXED_POINT) 49#if (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
49//# include "arm/pitch_arm.h" 50# include "arm/pitch_arm.h"
50#endif 51#endif
51 52
52void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp, 53void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp,
@@ -56,12 +57,12 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR
56 int len, int max_pitch, int *pitch, int arch); 57 int len, int max_pitch, int *pitch, int arch);
57 58
58opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, 59opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
59 int N, int *T0, int prev_period, opus_val16 prev_gain); 60 int N, int *T0, int prev_period, opus_val16 prev_gain, int arch);
61
60 62
61/* OPT: This is the kernel you really want to optimize. It gets used a lot 63/* OPT: This is the kernel you really want to optimize. It gets used a lot
62 by the prefilter and by the PLC. */ 64 by the prefilter and by the PLC. */
63#ifndef OVERRIDE_XCORR_KERNEL 65static OPUS_INLINE void xcorr_kernel_c(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len)
64static OPUS_INLINE void xcorr_kernel(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len)
65{ 66{
66 int j; 67 int j;
67 opus_val16 y_0, y_1, y_2, y_3; 68 opus_val16 y_0, y_1, y_2, y_3;
@@ -126,10 +127,14 @@ static OPUS_INLINE void xcorr_kernel(const opus_val16 * x, const opus_val16 * y,
126 sum[3] = MAC16_16(sum[3],tmp,y_1); 127 sum[3] = MAC16_16(sum[3],tmp,y_1);
127 } 128 }
128} 129}
130
131#ifndef OVERRIDE_XCORR_KERNEL
132#define xcorr_kernel(x, y, sum, len, arch) \
133 ((void)(arch),xcorr_kernel_c(x, y, sum, len))
129#endif /* OVERRIDE_XCORR_KERNEL */ 134#endif /* OVERRIDE_XCORR_KERNEL */
130 135
131#ifndef OVERRIDE_DUAL_INNER_PROD 136
132static OPUS_INLINE void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02, 137static OPUS_INLINE void dual_inner_prod_c(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
133 int N, opus_val32 *xy1, opus_val32 *xy2) 138 int N, opus_val32 *xy1, opus_val32 *xy2)
134{ 139{
135 int i; 140 int i;
@@ -143,11 +148,16 @@ static OPUS_INLINE void dual_inner_prod(const opus_val16 *x, const opus_val16 *y
143 *xy1 = xy01; 148 *xy1 = xy01;
144 *xy2 = xy02; 149 *xy2 = xy02;
145} 150}
151
152#ifndef OVERRIDE_DUAL_INNER_PROD
153# define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) \
154 ((void)(arch),dual_inner_prod_c(x, y01, y02, N, xy1, xy2))
146#endif 155#endif
147 156
148#ifndef OVERRIDE_CELT_INNER_PROD 157/*We make sure a C version is always available for cases where the overhead of
149static OPUS_INLINE opus_val32 celt_inner_prod(const opus_val16 *x, const opus_val16 *y, 158 vectorization and passing around an arch flag aren't worth it.*/
150 int N) 159static OPUS_INLINE opus_val32 celt_inner_prod_c(const opus_val16 *x,
160 const opus_val16 *y, int N)
151{ 161{
152 int i; 162 int i;
153 opus_val32 xy=0; 163 opus_val32 xy=0;
@@ -155,35 +165,28 @@ static OPUS_INLINE opus_val32 celt_inner_prod(const opus_val16 *x, const opus_va
155 xy = MAC16_16(xy, x[i], y[i]); 165 xy = MAC16_16(xy, x[i], y[i]);
156 return xy; 166 return xy;
157} 167}
168
169#if !defined(OVERRIDE_CELT_INNER_PROD)
170# define celt_inner_prod(x, y, N, arch) \
171 ((void)(arch),celt_inner_prod_c(x, y, N))
172#endif
173
174#ifdef NON_STATIC_COMB_FILTER_CONST_C
175void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
176 opus_val16 g10, opus_val16 g11, opus_val16 g12);
158#endif 177#endif
159 178
179
160#ifdef FIXED_POINT 180#ifdef FIXED_POINT
161opus_val32 181opus_val32
162#else 182#else
163void 183void
164#endif 184#endif
165celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, 185celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
166 opus_val32 *xcorr, int len, int max_pitch); 186 opus_val32 *xcorr, int len, int max_pitch, int arch);
167 187
168#if !defined(OVERRIDE_PITCH_XCORR) 188#ifndef OVERRIDE_PITCH_XCORR
169/*Is run-time CPU detection enabled on this platform?*/ 189# define celt_pitch_xcorr celt_pitch_xcorr_c
170# if defined(OPUS_HAVE_RTCD)
171extern
172# if defined(FIXED_POINT)
173opus_val32
174# else
175void
176# endif
177(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
178 const opus_val16 *, opus_val32 *, int, int);
179
180# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
181 ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
182 xcorr, len, max_pitch))
183# else
184# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
185 ((void)(arch),celt_pitch_xcorr_c(_x, _y, xcorr, len, max_pitch))
186# endif
187#endif 190#endif
188 191
189#endif 192#endif
diff --git a/lib/rbcodec/codecs/libopus/celt/quant_bands.c b/lib/rbcodec/codecs/libopus/celt/quant_bands.c
index ac6952c266..39a221eda5 100644
--- a/lib/rbcodec/codecs/libopus/celt/quant_bands.c
+++ b/lib/rbcodec/codecs/libopus/celt/quant_bands.c
@@ -292,7 +292,7 @@ void quant_coarse_energy(const CELTMode *m, int start, int end, int effEnd,
292#endif 292#endif
293 } 293 }
294 if (lfe) 294 if (lfe)
295 max_decay=3; 295 max_decay = QCONST16(3.f,DB_SHIFT);
296 enc_start_state = *enc; 296 enc_start_state = *enc;
297 297
298 ALLOC(oldEBands_intra, C*m->nbEBands, opus_val16); 298 ALLOC(oldEBands_intra, C*m->nbEBands, opus_val16);
@@ -418,6 +418,7 @@ void quant_energy_finalise(const CELTMode *m, int start, int end, opus_val16 *ol
418 offset = (q2-.5f)*(1<<(14-fine_quant[i]-1))*(1.f/16384); 418 offset = (q2-.5f)*(1<<(14-fine_quant[i]-1))*(1.f/16384);
419#endif 419#endif
420 oldEBands[i+c*m->nbEBands] += offset; 420 oldEBands[i+c*m->nbEBands] += offset;
421 error[i+c*m->nbEBands] -= offset;
421 bits_left--; 422 bits_left--;
422 } while (++c < C); 423 } while (++c < C);
423 } 424 }
@@ -456,7 +457,7 @@ void unquant_coarse_energy(const CELTMode *m, int start, int end, opus_val16 *ol
456 /* It would be better to express this invariant as a 457 /* It would be better to express this invariant as a
457 test on C at function entry, but that isn't enough 458 test on C at function entry, but that isn't enough
458 to make the static analyzer happy. */ 459 to make the static analyzer happy. */
459 celt_assert(c<2); 460 celt_sig_assert(c<2);
460 tell = ec_tell(dec); 461 tell = ec_tell(dec);
461 if(budget-tell>=15) 462 if(budget-tell>=15)
462 { 463 {
@@ -547,9 +548,15 @@ void amp2Log2(const CELTMode *m, int effEnd, int end,
547 c=0; 548 c=0;
548 do { 549 do {
549 for (i=0;i<effEnd;i++) 550 for (i=0;i<effEnd;i++)
551 {
550 bandLogE[i+c*m->nbEBands] = 552 bandLogE[i+c*m->nbEBands] =
551 celt_log2(SHL32(bandE[i+c*m->nbEBands],2)) 553 celt_log2(bandE[i+c*m->nbEBands])
552 - SHL16((opus_val16)eMeans[i],6); 554 - SHL16((opus_val16)eMeans[i],6);
555#ifdef FIXED_POINT
556 /* Compensate for bandE[] being Q12 but celt_log2() taking a Q14 input. */
557 bandLogE[i+c*m->nbEBands] += QCONST16(2.f, DB_SHIFT);
558#endif
559 }
553 for (i=effEnd;i<end;i++) 560 for (i=effEnd;i<end;i++)
554 bandLogE[c*m->nbEBands+i] = -QCONST16(14.f,DB_SHIFT); 561 bandLogE[c*m->nbEBands+i] = -QCONST16(14.f,DB_SHIFT);
555 } while (++c < C); 562 } while (++c < C);
diff --git a/lib/rbcodec/codecs/libopus/celt/rate.c b/lib/rbcodec/codecs/libopus/celt/rate.c
index f85c3ee63c..465e1ba26c 100644
--- a/lib/rbcodec/codecs/libopus/celt/rate.c
+++ b/lib/rbcodec/codecs/libopus/celt/rate.c
@@ -131,7 +131,7 @@ void compute_pulse_cache(CELTMode *m, int LM)
131 for (i=0;i<nbEntries;i++) 131 for (i=0;i<nbEntries;i++)
132 { 132 {
133 unsigned char *ptr = bits+entryI[i]; 133 unsigned char *ptr = bits+entryI[i];
134 opus_int16 tmp[MAX_PULSES+1]; 134 opus_int16 tmp[CELT_MAX_PULSES+1];
135 get_required_bits(tmp, entryN[i], get_pulses(entryK[i]), BITRES); 135 get_required_bits(tmp, entryN[i], get_pulses(entryK[i]), BITRES);
136 for (j=1;j<=entryK[i];j++) 136 for (j=1;j<=entryK[i];j++)
137 ptr[j] = tmp[get_pulses(j)]-1; 137 ptr[j] = tmp[get_pulses(j)]-1;
@@ -296,7 +296,7 @@ static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end,
296 done = 0; 296 done = 0;
297 for (j=end;j-->start;) 297 for (j=end;j-->start;)
298 { 298 {
299 int tmp = bits1[j] + (lo*bits2[j]>>ALLOC_STEPS); 299 int tmp = bits1[j] + ((opus_int32)lo*bits2[j]>>ALLOC_STEPS);
300 if (tmp < thresh[j] && !done) 300 if (tmp < thresh[j] && !done)
301 { 301 {
302 if (tmp >= alloc_floor) 302 if (tmp >= alloc_floor)
@@ -348,12 +348,17 @@ static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end,
348 /*This if() block is the only part of the allocation function that 348 /*This if() block is the only part of the allocation function that
349 is not a mandatory part of the bitstream: any bands we choose to 349 is not a mandatory part of the bitstream: any bands we choose to
350 skip here must be explicitly signaled.*/ 350 skip here must be explicitly signaled.*/
351 /*Choose a threshold with some hysteresis to keep bands from 351 int depth_threshold;
352 fluctuating in and out.*/ 352 /*We choose a threshold with some hysteresis to keep bands from
353 fluctuating in and out, but we try not to fold below a certain point. */
354 if (codedBands > 17)
355 depth_threshold = j<prev ? 7 : 9;
356 else
357 depth_threshold = 0;
353#ifdef FUZZING 358#ifdef FUZZING
354 if ((rand()&0x1) == 0) 359 if ((rand()&0x1) == 0)
355#else 360#else
356 if (codedBands<=start+2 || (band_bits > ((j<prev?7:9)*band_width<<LM<<BITRES)>>4 && j<=signalBandwidth)) 361 if (codedBands<=start+2 || (band_bits > (depth_threshold*band_width<<LM<<BITRES)>>4 && j<=signalBandwidth))
357#endif 362#endif
358 { 363 {
359 ec_enc_bit_logp(ec, 1, 1); 364 ec_enc_bit_logp(ec, 1, 1);
@@ -524,7 +529,7 @@ static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end,
524 return codedBands; 529 return codedBands;
525} 530}
526 531
527int compute_allocation(const CELTMode *m, int start, int end, const int *offsets, const int *cap, int alloc_trim, int *intensity, int *dual_stereo, 532int clt_compute_allocation(const CELTMode *m, int start, int end, const int *offsets, const int *cap, int alloc_trim, int *intensity, int *dual_stereo,
528 opus_int32 total, opus_int32 *balance, int *pulses, int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth) 533 opus_int32 total, opus_int32 *balance, int *pulses, int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth)
529{ 534{
530 int lo, hi, len, j; 535 int lo, hi, len, j;
diff --git a/lib/rbcodec/codecs/libopus/celt/rate.h b/lib/rbcodec/codecs/libopus/celt/rate.h
index f1e0661129..852b9d6f60 100644
--- a/lib/rbcodec/codecs/libopus/celt/rate.h
+++ b/lib/rbcodec/codecs/libopus/celt/rate.h
@@ -32,7 +32,7 @@
32#define MAX_PSEUDO 40 32#define MAX_PSEUDO 40
33#define LOG_MAX_PSEUDO 6 33#define LOG_MAX_PSEUDO 6
34 34
35#define MAX_PULSES 128 35#define CELT_MAX_PULSES 128
36 36
37#define MAX_FINE_BITS 8 37#define MAX_FINE_BITS 8
38 38
@@ -95,7 +95,7 @@ static OPUS_INLINE int pulses2bits(const CELTMode *m, int band, int LM, int puls
95 @param pulses Number of pulses per band (returned) 95 @param pulses Number of pulses per band (returned)
96 @return Total number of bits allocated 96 @return Total number of bits allocated
97*/ 97*/
98int compute_allocation(const CELTMode *m, int start, int end, const int *offsets, const int *cap, int alloc_trim, int *intensity, int *dual_stero, 98int clt_compute_allocation(const CELTMode *m, int start, int end, const int *offsets, const int *cap, int alloc_trim, int *intensity, int *dual_stero,
99 opus_int32 total, opus_int32 *balance, int *pulses, int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth); 99 opus_int32 total, opus_int32 *balance, int *pulses, int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth);
100 100
101#endif 101#endif
diff --git a/lib/rbcodec/codecs/libopus/celt/static_modes_fixed.h b/lib/rbcodec/codecs/libopus/celt/static_modes_fixed.h
index 1f13497c69..8717d626cb 100644
--- a/lib/rbcodec/codecs/libopus/celt/static_modes_fixed.h
+++ b/lib/rbcodec/codecs/libopus/celt/static_modes_fixed.h
@@ -4,9 +4,14 @@
4#include "modes.h" 4#include "modes.h"
5#include "rate.h" 5#include "rate.h"
6 6
7#ifdef HAVE_ARM_NE10
8#define OVERRIDE_FFT 1
9#include "static_modes_fixed_arm_ne10.h"
10#endif
11
7#ifndef DEF_WINDOW120 12#ifndef DEF_WINDOW120
8#define DEF_WINDOW120 13#define DEF_WINDOW120
9static const opus_val16 window120[120] ICONST_ATTR = { 14static const opus_val16 window120[120] = {
102, 20, 55, 108, 178, 152, 20, 55, 108, 178,
11266, 372, 494, 635, 792, 16266, 372, 494, 635, 792,
12966, 1157, 1365, 1590, 1831, 17966, 1157, 1365, 1590, 1831,
@@ -36,13 +41,13 @@ static const opus_val16 window120[120] ICONST_ATTR = {
36 41
37#ifndef DEF_LOGN400 42#ifndef DEF_LOGN400
38#define DEF_LOGN400 43#define DEF_LOGN400
39static const opus_int16 logN400[21] ICONST_ATTR = { 44static const opus_int16 logN400[21] = {
400, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 16, 16, 16, 21, 21, 24, 29, 34, 36, }; 450, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 16, 16, 16, 21, 21, 24, 29, 34, 36, };
41#endif 46#endif
42 47
43#ifndef DEF_PULSE_CACHE50 48#ifndef DEF_PULSE_CACHE50
44#define DEF_PULSE_CACHE50 49#define DEF_PULSE_CACHE50
45static const opus_int16 cache_index50[105] ICONST_ATTR = { 50static const opus_int16 cache_index50[105] = {
46-1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 41, 41, 41, 51-1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 41, 41, 41,
4782, 82, 123, 164, 200, 222, 0, 0, 0, 0, 0, 0, 0, 0, 41, 5282, 82, 123, 164, 200, 222, 0, 0, 0, 0, 0, 0, 0, 0, 41,
4841, 41, 41, 123, 123, 123, 164, 164, 240, 266, 283, 295, 41, 41, 41, 5341, 41, 41, 123, 123, 123, 164, 164, 240, 266, 283, 295, 41, 41, 41,
@@ -51,7 +56,7 @@ static const opus_int16 cache_index50[105] ICONST_ATTR = {
51305, 305, 305, 318, 318, 343, 351, 358, 364, 240, 240, 240, 240, 240, 240, 56305, 305, 305, 318, 318, 343, 351, 358, 364, 240, 240, 240, 240, 240, 240,
52240, 240, 305, 305, 305, 305, 343, 343, 343, 351, 351, 370, 376, 382, 387, 57240, 240, 305, 305, 305, 305, 343, 343, 343, 351, 351, 370, 376, 382, 387,
53}; 58};
54static const unsigned char cache_bits50[392] ICONST_ATTR = { 59static const unsigned char cache_bits50[392] = {
5540, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6040, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
567, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 617, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
577, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 40, 15, 23, 28, 627, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 40, 15, 23, 28,
@@ -79,7 +84,7 @@ static const unsigned char cache_bits50[392] ICONST_ATTR = {
79106, 151, 192, 231, 5, 59, 111, 158, 202, 243, 5, 55, 103, 147, 187, 84106, 151, 192, 231, 5, 59, 111, 158, 202, 243, 5, 55, 103, 147, 187,
80224, 5, 60, 113, 161, 206, 248, 4, 65, 122, 175, 224, 4, 67, 127, 85224, 5, 60, 113, 161, 206, 248, 4, 65, 122, 175, 224, 4, 67, 127,
81182, 234, }; 86182, 234, };
82static const unsigned char cache_caps50[168] ICONST_ATTR = { 87static const unsigned char cache_caps50[168] = {
83224, 224, 224, 224, 224, 224, 224, 224, 160, 160, 160, 160, 185, 185, 185, 88224, 224, 224, 224, 224, 224, 224, 224, 160, 160, 160, 160, 185, 185, 185,
84178, 178, 168, 134, 61, 37, 224, 224, 224, 224, 224, 224, 224, 224, 240, 89178, 178, 168, 134, 61, 37, 224, 224, 224, 224, 224, 224, 224, 224, 240,
85240, 240, 240, 207, 207, 207, 198, 198, 183, 144, 66, 40, 160, 160, 160, 90240, 240, 240, 207, 207, 207, 198, 198, 183, 144, 66, 40, 160, 160, 160,
@@ -96,7 +101,7 @@ static const unsigned char cache_caps50[168] ICONST_ATTR = {
96 101
97#ifndef FFT_TWIDDLES48000_960 102#ifndef FFT_TWIDDLES48000_960
98#define FFT_TWIDDLES48000_960 103#define FFT_TWIDDLES48000_960
99static const kiss_twiddle_cpx fft_twiddles48000_960[480] ICONST_ATTR = { 104static const kiss_twiddle_cpx fft_twiddles48000_960[480] = {
100{32767, 0}, {32766, -429}, 105{32767, 0}, {32766, -429},
101{32757, -858}, {32743, -1287}, 106{32757, -858}, {32743, -1287},
102{32724, -1715}, {32698, -2143}, 107{32724, -1715}, {32698, -2143},
@@ -424,53 +429,73 @@ static const opus_int16 fft_bitrev60[60] = {
424 429
425#ifndef FFT_STATE48000_960_0 430#ifndef FFT_STATE48000_960_0
426#define FFT_STATE48000_960_0 431#define FFT_STATE48000_960_0
427static const kiss_fft_state fft_state48000_960_0 ICONST_ATTR = { 432static const kiss_fft_state fft_state48000_960_0 = {
428480, /* nfft */ 433480, /* nfft */
42917476, /* scale */ 43417476, /* scale */
4308, /* scale_shift */ 4358, /* scale_shift */
431-1, /* shift */ 436-1, /* shift */
432{5, 96, 3, 32, 4, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, }, /* factors */ 437{5, 96, 3, 32, 4, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, }, /* factors */
433fft_bitrev480, /* bitrev */ 438fft_bitrev480, /* bitrev */
434fft_twiddles48000_960, /* bitrev */ 439fft_twiddles48000_960, /* bitrev */
440#ifdef OVERRIDE_FFT
441(arch_fft_state *)&cfg_arch_480,
442#else
443NULL,
444#endif
435}; 445};
436#endif 446#endif
437 447
438#ifndef FFT_STATE48000_960_1 448#ifndef FFT_STATE48000_960_1
439#define FFT_STATE48000_960_1 449#define FFT_STATE48000_960_1
440static const kiss_fft_state fft_state48000_960_1 ICONST_ATTR = { 450static const kiss_fft_state fft_state48000_960_1 = {
441240, /* nfft */ 451240, /* nfft */
44217476, /* scale */ 45217476, /* scale */
4437, /* scale_shift */ 4537, /* scale_shift */
4441, /* shift */ 4541, /* shift */
445{5, 48, 3, 16, 4, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ 455{5, 48, 3, 16, 4, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
446fft_bitrev240, /* bitrev */ 456fft_bitrev240, /* bitrev */
447fft_twiddles48000_960, /* bitrev */ 457fft_twiddles48000_960, /* bitrev */
458#ifdef OVERRIDE_FFT
459(arch_fft_state *)&cfg_arch_240,
460#else
461NULL,
462#endif
448}; 463};
449#endif 464#endif
450 465
451#ifndef FFT_STATE48000_960_2 466#ifndef FFT_STATE48000_960_2
452#define FFT_STATE48000_960_2 467#define FFT_STATE48000_960_2
453static const kiss_fft_state fft_state48000_960_2 ICONST_ATTR = { 468static const kiss_fft_state fft_state48000_960_2 = {
454120, /* nfft */ 469120, /* nfft */
45517476, /* scale */ 47017476, /* scale */
4566, /* scale_shift */ 4716, /* scale_shift */
4572, /* shift */ 4722, /* shift */
458{5, 24, 3, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ 473{5, 24, 3, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
459fft_bitrev120, /* bitrev */ 474fft_bitrev120, /* bitrev */
460fft_twiddles48000_960, /* bitrev */ 475fft_twiddles48000_960, /* bitrev */
476#ifdef OVERRIDE_FFT
477(arch_fft_state *)&cfg_arch_120,
478#else
479NULL,
480#endif
461}; 481};
462#endif 482#endif
463 483
464#ifndef FFT_STATE48000_960_3 484#ifndef FFT_STATE48000_960_3
465#define FFT_STATE48000_960_3 485#define FFT_STATE48000_960_3
466static const kiss_fft_state fft_state48000_960_3 ICONST_ATTR = { 486static const kiss_fft_state fft_state48000_960_3 = {
46760, /* nfft */ 48760, /* nfft */
46817476, /* scale */ 48817476, /* scale */
4695, /* scale_shift */ 4895, /* scale_shift */
4703, /* shift */ 4903, /* shift */
471{5, 12, 3, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ 491{5, 12, 3, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
472fft_bitrev60, /* bitrev */ 492fft_bitrev60, /* bitrev */
473fft_twiddles48000_960, /* bitrev */ 493fft_twiddles48000_960, /* bitrev */
494#ifdef OVERRIDE_FFT
495(arch_fft_state *)&cfg_arch_60,
496#else
497NULL,
498#endif
474}; 499};
475#endif 500#endif
476 501
@@ -478,7 +503,7 @@ fft_twiddles48000_960, /* bitrev */
478 503
479#ifndef MDCT_TWIDDLES960 504#ifndef MDCT_TWIDDLES960
480#define MDCT_TWIDDLES960 505#define MDCT_TWIDDLES960
481static const opus_val16 mdct_twiddles960[1800] ICONST_ATTR = { 506static const opus_val16 mdct_twiddles960[1800] = {
48232767, 32767, 32767, 32766, 32765, 50732767, 32767, 32767, 32766, 32765,
48332763, 32761, 32759, 32756, 32753, 50832763, 32761, 32759, 32756, 32753,
48432750, 32746, 32742, 32738, 32733, 50932750, 32746, 32742, 32738, 32733,
@@ -842,7 +867,7 @@ static const opus_val16 mdct_twiddles960[1800] ICONST_ATTR = {
842}; 867};
843#endif 868#endif
844 869
845static const CELTMode mode48000_960_120 ICONST_ATTR = { 870static const CELTMode mode48000_960_120 = {
84648000, /* Fs */ 87148000, /* Fs */
847120, /* overlap */ 872120, /* overlap */
84821, /* nbEBands */ 87321, /* nbEBands */
diff --git a/lib/rbcodec/codecs/libopus/celt/static_modes_fixed_arm_ne10.h b/lib/rbcodec/codecs/libopus/celt/static_modes_fixed_arm_ne10.h
new file mode 100644
index 0000000000..7623092192
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/static_modes_fixed_arm_ne10.h
@@ -0,0 +1,388 @@
1/* The contents of this file was automatically generated by
2 * dump_mode_arm_ne10.c with arguments: 48000 960
3 * It contains static definitions for some pre-defined modes. */
4#include <NE10_types.h>
5
6#ifndef NE10_FFT_PARAMS48000_960
7#define NE10_FFT_PARAMS48000_960
8static const ne10_int32_t ne10_factors_480[64] = {
94, 40, 4, 30, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0,
100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
110, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
120, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
130, 0, 0, 0, };
14static const ne10_int32_t ne10_factors_240[64] = {
153, 20, 4, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0,
160, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
170, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
180, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
190, 0, 0, 0, };
20static const ne10_int32_t ne10_factors_120[64] = {
213, 10, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0,
220, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
230, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
240, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
250, 0, 0, 0, };
26static const ne10_int32_t ne10_factors_60[64] = {
272, 5, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
280, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
290, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
300, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
310, 0, 0, 0, };
32static const ne10_fft_cpx_int32_t ne10_twiddles_480[480] = {
33{0,0}, {2147483647,0}, {2147483647,0},
34{2147483647,0}, {1961823921,-873460313}, {1436946998,-1595891394},
35{2147483647,0}, {1436946998,-1595891394}, {-224473265,-2135719496},
36{2147483647,0}, {663608871,-2042378339}, {-1737350854,-1262259096},
37{2147483647,0}, {-224473265,-2135719496}, {-2100555935,446487152},
38{2147483647,0}, {2100555974,-446486968}, {1961823921,-873460313},
39{1737350743,-1262259248}, {1436946998,-1595891394}, {1073741769,-1859775424},
40{663608871,-2042378339}, {224473078,-2135719516}, {-224473265,-2135719496},
41{-663609049,-2042378281}, {-1073741932,-1859775330}, {-1436947137,-1595891268},
42{-1737350854,-1262259096}, {-1961823997,-873460141}, {-2100556013,-446486785},
43{2147483647,0}, {2144540595,-112390613}, {2135719506,-224473172},
44{2121044558,-335940465}, {2100555974,-446486968}, {2074309912,-555809682},
45{2042378310,-663608960}, {2004848691,-769589332}, {1961823921,-873460313},
46{1913421927,-974937199}, {1859775377,-1073741851}, {1801031311,-1169603450},
47{1737350743,-1262259248}, {1668908218,-1351455280}, {1595891331,-1436947067},
48{1518500216,-1518500282}, {1436946998,-1595891394}, {1351455207,-1668908277},
49{1262259172,-1737350799}, {1169603371,-1801031362}, {1073741769,-1859775424},
50{974937230,-1913421912}, {873460227,-1961823959}, {769589125,-2004848771},
51{663608871,-2042378339}, {555809715,-2074309903}, {446486876,-2100555994},
52{335940246,-2121044593}, {224473078,-2135719516}, {112390647,-2144540593},
53{2147483647,0}, {2135719506,-224473172}, {2100555974,-446486968},
54{2042378310,-663608960}, {1961823921,-873460313}, {1859775377,-1073741851},
55{1737350743,-1262259248}, {1595891331,-1436947067}, {1436946998,-1595891394},
56{1262259172,-1737350799}, {1073741769,-1859775424}, {873460227,-1961823959},
57{663608871,-2042378339}, {446486876,-2100555994}, {224473078,-2135719516},
58{-94,-2147483647}, {-224473265,-2135719496}, {-446487060,-2100555955},
59{-663609049,-2042378281}, {-873460398,-1961823883}, {-1073741932,-1859775330},
60{-1262259116,-1737350839}, {-1436947137,-1595891268}, {-1595891628,-1436946738},
61{-1737350854,-1262259096}, {-1859775343,-1073741910}, {-1961823997,-873460141},
62{-2042378447,-663608538}, {-2100556013,-446486785}, {-2135719499,-224473240},
63{2147483647,0}, {2121044558,-335940465}, {2042378310,-663608960},
64{1913421927,-974937199}, {1737350743,-1262259248}, {1518500216,-1518500282},
65{1262259172,-1737350799}, {974937230,-1913421912}, {663608871,-2042378339},
66{335940246,-2121044593}, {-94,-2147483647}, {-335940431,-2121044564},
67{-663609049,-2042378281}, {-974937397,-1913421827}, {-1262259116,-1737350839},
68{-1518500258,-1518500240}, {-1737350854,-1262259096}, {-1913422071,-974936918},
69{-2042378447,-663608538}, {-2121044568,-335940406}, {-2147483647,188},
70{-2121044509,335940777}, {-2042378331,663608895}, {-1913421900,974937252},
71{-1737350633,1262259400}, {-1518499993,1518500506}, {-1262258813,1737351059},
72{-974936606,1913422229}, {-663609179,2042378239}, {-335940566,2121044542},
73{2147483647,0}, {2147299667,-28109693}, {2146747758,-56214570},
74{2145828015,-84309815}, {2144540595,-112390613}, {2142885719,-140452154},
75{2140863671,-168489630}, {2138474797,-196498235}, {2135719506,-224473172},
76{2132598271,-252409646}, {2129111626,-280302871}, {2125260168,-308148068},
77{2121044558,-335940465}, {2116465518,-363675300}, {2111523833,-391347822},
78{2106220349,-418953288}, {2100555974,-446486968}, {2094531681,-473944146},
79{2088148500,-501320115}, {2081407525,-528610186}, {2074309912,-555809682},
80{2066856885,-582913912}, {2059049696,-609918325}, {2050889698,-636818231},
81{2042378310,-663608960}, {2033516972,-690285983}, {2024307180,-716844791},
82{2014750533,-743280770}, {2004848691,-769589332}, {1994603329,-795766029},
83{1984016179,-821806435}, {1973089077,-847706028}, {1961823921,-873460313},
84{1950222618,-899064934}, {1938287127,-924515564}, {1926019520,-949807783},
85{1913421927,-974937199}, {1900496481,-999899565}, {1887245364,-1024690661},
86{1873670877,-1049306180}, {1859775377,-1073741851}, {1845561215,-1097993541},
87{1831030826,-1122057097}, {1816186632,-1145928502}, {1801031311,-1169603450},
88{1785567394,-1193077993}, {1769797456,-1216348214}, {1753724345,-1239409914},
89{1737350743,-1262259248}, {1720679456,-1284892300}, {1703713340,-1307305194},
90{1686455222,-1329494189}, {1668908218,-1351455280}, {1651075255,-1373184807},
91{1632959307,-1394679144}, {1614563642,-1415934412}, {1595891331,-1436947067},
92{1576945572,-1457713510}, {1557729613,-1478230181}, {1538246655,-1498493658},
93{1518500216,-1518500282}, {1498493590,-1538246721}, {1478230113,-1557729677},
94{1457713441,-1576945636}, {1436946998,-1595891394}, {1415934341,-1614563704},
95{1394679073,-1632959368}, {1373184735,-1651075315}, {1351455207,-1668908277},
96{1329494115,-1686455280}, {1307305120,-1703713397}, {1284892225,-1720679512},
97{1262259172,-1737350799}, {1239409837,-1753724400}, {1216348136,-1769797510},
98{1193077915,-1785567446}, {1169603371,-1801031362}, {1145928423,-1816186682},
99{1122057017,-1831030875}, {1097993571,-1845561197}, {1073741769,-1859775424},
100{1049305987,-1873670985}, {1024690635,-1887245378}, {999899482,-1900496524},
101{974937230,-1913421912}, {949807699,-1926019561}, {924515422,-1938287195},
102{899064965,-1950222603}, {873460227,-1961823959}, {847705824,-1973089164},
103{821806407,-1984016190}, {795765941,-1994603364}, {769589125,-2004848771},
104{743280682,-2014750566}, {716844642,-2024307233}, {690286016,-2033516961},
105{663608871,-2042378339}, {636818019,-2050889764}, {609918296,-2059049705},
106{582913822,-2066856911}, {555809715,-2074309903}, {528610126,-2081407540},
107{501319962,-2088148536}, {473944148,-2094531680}, {446486876,-2100555994},
108{418953102,-2106220386}, {391347792,-2111523838}, {363675176,-2116465540},
109{335940246,-2121044593}, {308148006,-2125260177}, {280302715,-2129111646},
110{252409648,-2132598271}, {224473078,-2135719516}, {196498046,-2138474814},
111{168489600,-2140863674}, {140452029,-2142885728}, {112390647,-2144540593},
112{84309753,-2145828017}, {56214412,-2146747762}, {28109695,-2147299667},
113{2147483647,0}, {2146747758,-56214570}, {2144540595,-112390613},
114{2140863671,-168489630}, {2135719506,-224473172}, {2129111626,-280302871},
115{2121044558,-335940465}, {2111523833,-391347822}, {2100555974,-446486968},
116{2088148500,-501320115}, {2074309912,-555809682}, {2059049696,-609918325},
117{2042378310,-663608960}, {2024307180,-716844791}, {2004848691,-769589332},
118{1984016179,-821806435}, {1961823921,-873460313}, {1938287127,-924515564},
119{1913421927,-974937199}, {1887245364,-1024690661}, {1859775377,-1073741851},
120{1831030826,-1122057097}, {1801031311,-1169603450}, {1769797456,-1216348214},
121{1737350743,-1262259248}, {1703713340,-1307305194}, {1668908218,-1351455280},
122{1632959307,-1394679144}, {1595891331,-1436947067}, {1557729613,-1478230181},
123{1518500216,-1518500282}, {1478230113,-1557729677}, {1436946998,-1595891394},
124{1394679073,-1632959368}, {1351455207,-1668908277}, {1307305120,-1703713397},
125{1262259172,-1737350799}, {1216348136,-1769797510}, {1169603371,-1801031362},
126{1122057017,-1831030875}, {1073741769,-1859775424}, {1024690635,-1887245378},
127{974937230,-1913421912}, {924515422,-1938287195}, {873460227,-1961823959},
128{821806407,-1984016190}, {769589125,-2004848771}, {716844642,-2024307233},
129{663608871,-2042378339}, {609918296,-2059049705}, {555809715,-2074309903},
130{501319962,-2088148536}, {446486876,-2100555994}, {391347792,-2111523838},
131{335940246,-2121044593}, {280302715,-2129111646}, {224473078,-2135719516},
132{168489600,-2140863674}, {112390647,-2144540593}, {56214412,-2146747762},
133{-94,-2147483647}, {-56214600,-2146747757}, {-112390835,-2144540584},
134{-168489787,-2140863659}, {-224473265,-2135719496}, {-280302901,-2129111622},
135{-335940431,-2121044564}, {-391347977,-2111523804}, {-446487060,-2100555955},
136{-501320144,-2088148493}, {-555809896,-2074309855}, {-609918476,-2059049651},
137{-663609049,-2042378281}, {-716844819,-2024307170}, {-769589300,-2004848703},
138{-821806581,-1984016118}, {-873460398,-1961823883}, {-924515591,-1938287114},
139{-974937397,-1913421827}, {-1024690575,-1887245411}, {-1073741932,-1859775330},
140{-1122057395,-1831030643}, {-1169603421,-1801031330}, {-1216348291,-1769797403},
141{-1262259116,-1737350839}, {-1307305268,-1703713283}, {-1351455453,-1668908078},
142{-1394679021,-1632959413}, {-1436947137,-1595891268}, {-1478230435,-1557729372},
143{-1518500258,-1518500240}, {-1557729742,-1478230045}, {-1595891628,-1436946738},
144{-1632959429,-1394679001}, {-1668908417,-1351455035}, {-1703713298,-1307305248},
145{-1737350854,-1262259096}, {-1769797708,-1216347848}, {-1801031344,-1169603400},
146{-1831030924,-1122056937}, {-1859775343,-1073741910}, {-1887245423,-1024690552},
147{-1913422071,-974936918}, {-1938287125,-924515568}, {-1961823997,-873460141},
148{-1984016324,-821806084}, {-2004848713,-769589276}, {-2024307264,-716844553},
149{-2042378447,-663608538}, {-2059049731,-609918206}, {-2074309994,-555809377},
150{-2088148499,-501320119}, {-2100556013,-446486785}, {-2111523902,-391347448},
151{-2121044568,-335940406}, {-2129111659,-280302621}, {-2135719499,-224473240},
152{-2140863681,-168489506}, {-2144540612,-112390298}, {-2146747758,-56214574},
153{2147483647,0}, {2145828015,-84309815}, {2140863671,-168489630},
154{2132598271,-252409646}, {2121044558,-335940465}, {2106220349,-418953288},
155{2088148500,-501320115}, {2066856885,-582913912}, {2042378310,-663608960},
156{2014750533,-743280770}, {1984016179,-821806435}, {1950222618,-899064934},
157{1913421927,-974937199}, {1873670877,-1049306180}, {1831030826,-1122057097},
158{1785567394,-1193077993}, {1737350743,-1262259248}, {1686455222,-1329494189},
159{1632959307,-1394679144}, {1576945572,-1457713510}, {1518500216,-1518500282},
160{1457713441,-1576945636}, {1394679073,-1632959368}, {1329494115,-1686455280},
161{1262259172,-1737350799}, {1193077915,-1785567446}, {1122057017,-1831030875},
162{1049305987,-1873670985}, {974937230,-1913421912}, {899064965,-1950222603},
163{821806407,-1984016190}, {743280682,-2014750566}, {663608871,-2042378339},
164{582913822,-2066856911}, {501319962,-2088148536}, {418953102,-2106220386},
165{335940246,-2121044593}, {252409648,-2132598271}, {168489600,-2140863674},
166{84309753,-2145828017}, {-94,-2147483647}, {-84309940,-2145828010},
167{-168489787,-2140863659}, {-252409834,-2132598249}, {-335940431,-2121044564},
168{-418953286,-2106220349}, {-501320144,-2088148493}, {-582914003,-2066856860},
169{-663609049,-2042378281}, {-743280858,-2014750501}, {-821806581,-1984016118},
170{-899065136,-1950222525}, {-974937397,-1913421827}, {-1049306374,-1873670768},
171{-1122057395,-1831030643}, {-1193078284,-1785567199}, {-1262259116,-1737350839},
172{-1329494061,-1686455323}, {-1394679021,-1632959413}, {-1457713485,-1576945595},
173{-1518500258,-1518500240}, {-1576945613,-1457713466}, {-1632959429,-1394679001},
174{-1686455338,-1329494041}, {-1737350854,-1262259096}, {-1785567498,-1193077837},
175{-1831030924,-1122056937}, {-1873671031,-1049305905}, {-1913422071,-974936918},
176{-1950222750,-899064648}, {-1984016324,-821806084}, {-2014750687,-743280354},
177{-2042378447,-663608538}, {-2066856867,-582913978}, {-2088148499,-501320119},
178{-2106220354,-418953261}, {-2121044568,-335940406}, {-2132598282,-252409555},
179{-2140863681,-168489506}, {-2145828021,-84309659}, {-2147483647,188},
180{-2145828006,84310034}, {-2140863651,168489881}, {-2132598237,252409928},
181{-2121044509,335940777}, {-2106220281,418953629}, {-2088148411,501320484},
182{-2066856765,582914339}, {-2042378331,663608895}, {-2014750557,743280706},
183{-1984016181,821806431}, {-1950222593,899064989}, {-1913421900,974937252},
184{-1873670848,1049306232}, {-1831030728,1122057257}, {-1785567289,1193078149},
185{-1737350633,1262259400}, {-1686455106,1329494336}, {-1632959185,1394679287},
186{-1576945358,1457713742}, {-1518499993,1518500506}, {-1457713209,1576945850},
187{-1394678735,1632959656}, {-1329493766,1686455555}, {-1262258813,1737351059},
188{-1193077546,1785567692}, {-1122056638,1831031107}, {-1049305599,1873671202},
189{-974936606,1913422229}, {-899064330,1950222896}, {-821805761,1984016458},
190{-743280025,2014750808}, {-663609179,2042378239}, {-582914134,2066856823},
191{-501320277,2088148461}, {-418953420,2106220322}, {-335940566,2121044542},
192{-252409716,2132598263}, {-168489668,2140863668}, {-84309821,2145828015},
193};
194static const ne10_fft_cpx_int32_t ne10_twiddles_240[240] = {
195{0,0}, {2147483647,0}, {2147483647,0},
196{2147483647,0}, {1961823921,-873460313}, {1436946998,-1595891394},
197{2147483647,0}, {1436946998,-1595891394}, {-224473265,-2135719496},
198{2147483647,0}, {663608871,-2042378339}, {-1737350854,-1262259096},
199{2147483647,0}, {-224473265,-2135719496}, {-2100555935,446487152},
200{2147483647,0}, {2135719506,-224473172}, {2100555974,-446486968},
201{2042378310,-663608960}, {1961823921,-873460313}, {1859775377,-1073741851},
202{1737350743,-1262259248}, {1595891331,-1436947067}, {1436946998,-1595891394},
203{1262259172,-1737350799}, {1073741769,-1859775424}, {873460227,-1961823959},
204{663608871,-2042378339}, {446486876,-2100555994}, {224473078,-2135719516},
205{2147483647,0}, {2100555974,-446486968}, {1961823921,-873460313},
206{1737350743,-1262259248}, {1436946998,-1595891394}, {1073741769,-1859775424},
207{663608871,-2042378339}, {224473078,-2135719516}, {-224473265,-2135719496},
208{-663609049,-2042378281}, {-1073741932,-1859775330}, {-1436947137,-1595891268},
209{-1737350854,-1262259096}, {-1961823997,-873460141}, {-2100556013,-446486785},
210{2147483647,0}, {2042378310,-663608960}, {1737350743,-1262259248},
211{1262259172,-1737350799}, {663608871,-2042378339}, {-94,-2147483647},
212{-663609049,-2042378281}, {-1262259116,-1737350839}, {-1737350854,-1262259096},
213{-2042378447,-663608538}, {-2147483647,188}, {-2042378331,663608895},
214{-1737350633,1262259400}, {-1262258813,1737351059}, {-663609179,2042378239},
215{2147483647,0}, {2146747758,-56214570}, {2144540595,-112390613},
216{2140863671,-168489630}, {2135719506,-224473172}, {2129111626,-280302871},
217{2121044558,-335940465}, {2111523833,-391347822}, {2100555974,-446486968},
218{2088148500,-501320115}, {2074309912,-555809682}, {2059049696,-609918325},
219{2042378310,-663608960}, {2024307180,-716844791}, {2004848691,-769589332},
220{1984016179,-821806435}, {1961823921,-873460313}, {1938287127,-924515564},
221{1913421927,-974937199}, {1887245364,-1024690661}, {1859775377,-1073741851},
222{1831030826,-1122057097}, {1801031311,-1169603450}, {1769797456,-1216348214},
223{1737350743,-1262259248}, {1703713340,-1307305194}, {1668908218,-1351455280},
224{1632959307,-1394679144}, {1595891331,-1436947067}, {1557729613,-1478230181},
225{1518500216,-1518500282}, {1478230113,-1557729677}, {1436946998,-1595891394},
226{1394679073,-1632959368}, {1351455207,-1668908277}, {1307305120,-1703713397},
227{1262259172,-1737350799}, {1216348136,-1769797510}, {1169603371,-1801031362},
228{1122057017,-1831030875}, {1073741769,-1859775424}, {1024690635,-1887245378},
229{974937230,-1913421912}, {924515422,-1938287195}, {873460227,-1961823959},
230{821806407,-1984016190}, {769589125,-2004848771}, {716844642,-2024307233},
231{663608871,-2042378339}, {609918296,-2059049705}, {555809715,-2074309903},
232{501319962,-2088148536}, {446486876,-2100555994}, {391347792,-2111523838},
233{335940246,-2121044593}, {280302715,-2129111646}, {224473078,-2135719516},
234{168489600,-2140863674}, {112390647,-2144540593}, {56214412,-2146747762},
235{2147483647,0}, {2144540595,-112390613}, {2135719506,-224473172},
236{2121044558,-335940465}, {2100555974,-446486968}, {2074309912,-555809682},
237{2042378310,-663608960}, {2004848691,-769589332}, {1961823921,-873460313},
238{1913421927,-974937199}, {1859775377,-1073741851}, {1801031311,-1169603450},
239{1737350743,-1262259248}, {1668908218,-1351455280}, {1595891331,-1436947067},
240{1518500216,-1518500282}, {1436946998,-1595891394}, {1351455207,-1668908277},
241{1262259172,-1737350799}, {1169603371,-1801031362}, {1073741769,-1859775424},
242{974937230,-1913421912}, {873460227,-1961823959}, {769589125,-2004848771},
243{663608871,-2042378339}, {555809715,-2074309903}, {446486876,-2100555994},
244{335940246,-2121044593}, {224473078,-2135719516}, {112390647,-2144540593},
245{-94,-2147483647}, {-112390835,-2144540584}, {-224473265,-2135719496},
246{-335940431,-2121044564}, {-446487060,-2100555955}, {-555809896,-2074309855},
247{-663609049,-2042378281}, {-769589300,-2004848703}, {-873460398,-1961823883},
248{-974937397,-1913421827}, {-1073741932,-1859775330}, {-1169603421,-1801031330},
249{-1262259116,-1737350839}, {-1351455453,-1668908078}, {-1436947137,-1595891268},
250{-1518500258,-1518500240}, {-1595891628,-1436946738}, {-1668908417,-1351455035},
251{-1737350854,-1262259096}, {-1801031344,-1169603400}, {-1859775343,-1073741910},
252{-1913422071,-974936918}, {-1961823997,-873460141}, {-2004848713,-769589276},
253{-2042378447,-663608538}, {-2074309994,-555809377}, {-2100556013,-446486785},
254{-2121044568,-335940406}, {-2135719499,-224473240}, {-2144540612,-112390298},
255{2147483647,0}, {2140863671,-168489630}, {2121044558,-335940465},
256{2088148500,-501320115}, {2042378310,-663608960}, {1984016179,-821806435},
257{1913421927,-974937199}, {1831030826,-1122057097}, {1737350743,-1262259248},
258{1632959307,-1394679144}, {1518500216,-1518500282}, {1394679073,-1632959368},
259{1262259172,-1737350799}, {1122057017,-1831030875}, {974937230,-1913421912},
260{821806407,-1984016190}, {663608871,-2042378339}, {501319962,-2088148536},
261{335940246,-2121044593}, {168489600,-2140863674}, {-94,-2147483647},
262{-168489787,-2140863659}, {-335940431,-2121044564}, {-501320144,-2088148493},
263{-663609049,-2042378281}, {-821806581,-1984016118}, {-974937397,-1913421827},
264{-1122057395,-1831030643}, {-1262259116,-1737350839}, {-1394679021,-1632959413},
265{-1518500258,-1518500240}, {-1632959429,-1394679001}, {-1737350854,-1262259096},
266{-1831030924,-1122056937}, {-1913422071,-974936918}, {-1984016324,-821806084},
267{-2042378447,-663608538}, {-2088148499,-501320119}, {-2121044568,-335940406},
268{-2140863681,-168489506}, {-2147483647,188}, {-2140863651,168489881},
269{-2121044509,335940777}, {-2088148411,501320484}, {-2042378331,663608895},
270{-1984016181,821806431}, {-1913421900,974937252}, {-1831030728,1122057257},
271{-1737350633,1262259400}, {-1632959185,1394679287}, {-1518499993,1518500506},
272{-1394678735,1632959656}, {-1262258813,1737351059}, {-1122056638,1831031107},
273{-974936606,1913422229}, {-821805761,1984016458}, {-663609179,2042378239},
274{-501320277,2088148461}, {-335940566,2121044542}, {-168489668,2140863668},
275};
276static const ne10_fft_cpx_int32_t ne10_twiddles_120[120] = {
277{0,0}, {2147483647,0}, {2147483647,0},
278{2147483647,0}, {1961823921,-873460313}, {1436946998,-1595891394},
279{2147483647,0}, {1436946998,-1595891394}, {-224473265,-2135719496},
280{2147483647,0}, {663608871,-2042378339}, {-1737350854,-1262259096},
281{2147483647,0}, {-224473265,-2135719496}, {-2100555935,446487152},
282{2147483647,0}, {2100555974,-446486968}, {1961823921,-873460313},
283{1737350743,-1262259248}, {1436946998,-1595891394}, {1073741769,-1859775424},
284{663608871,-2042378339}, {224473078,-2135719516}, {-224473265,-2135719496},
285{-663609049,-2042378281}, {-1073741932,-1859775330}, {-1436947137,-1595891268},
286{-1737350854,-1262259096}, {-1961823997,-873460141}, {-2100556013,-446486785},
287{2147483647,0}, {2144540595,-112390613}, {2135719506,-224473172},
288{2121044558,-335940465}, {2100555974,-446486968}, {2074309912,-555809682},
289{2042378310,-663608960}, {2004848691,-769589332}, {1961823921,-873460313},
290{1913421927,-974937199}, {1859775377,-1073741851}, {1801031311,-1169603450},
291{1737350743,-1262259248}, {1668908218,-1351455280}, {1595891331,-1436947067},
292{1518500216,-1518500282}, {1436946998,-1595891394}, {1351455207,-1668908277},
293{1262259172,-1737350799}, {1169603371,-1801031362}, {1073741769,-1859775424},
294{974937230,-1913421912}, {873460227,-1961823959}, {769589125,-2004848771},
295{663608871,-2042378339}, {555809715,-2074309903}, {446486876,-2100555994},
296{335940246,-2121044593}, {224473078,-2135719516}, {112390647,-2144540593},
297{2147483647,0}, {2135719506,-224473172}, {2100555974,-446486968},
298{2042378310,-663608960}, {1961823921,-873460313}, {1859775377,-1073741851},
299{1737350743,-1262259248}, {1595891331,-1436947067}, {1436946998,-1595891394},
300{1262259172,-1737350799}, {1073741769,-1859775424}, {873460227,-1961823959},
301{663608871,-2042378339}, {446486876,-2100555994}, {224473078,-2135719516},
302{-94,-2147483647}, {-224473265,-2135719496}, {-446487060,-2100555955},
303{-663609049,-2042378281}, {-873460398,-1961823883}, {-1073741932,-1859775330},
304{-1262259116,-1737350839}, {-1436947137,-1595891268}, {-1595891628,-1436946738},
305{-1737350854,-1262259096}, {-1859775343,-1073741910}, {-1961823997,-873460141},
306{-2042378447,-663608538}, {-2100556013,-446486785}, {-2135719499,-224473240},
307{2147483647,0}, {2121044558,-335940465}, {2042378310,-663608960},
308{1913421927,-974937199}, {1737350743,-1262259248}, {1518500216,-1518500282},
309{1262259172,-1737350799}, {974937230,-1913421912}, {663608871,-2042378339},
310{335940246,-2121044593}, {-94,-2147483647}, {-335940431,-2121044564},
311{-663609049,-2042378281}, {-974937397,-1913421827}, {-1262259116,-1737350839},
312{-1518500258,-1518500240}, {-1737350854,-1262259096}, {-1913422071,-974936918},
313{-2042378447,-663608538}, {-2121044568,-335940406}, {-2147483647,188},
314{-2121044509,335940777}, {-2042378331,663608895}, {-1913421900,974937252},
315{-1737350633,1262259400}, {-1518499993,1518500506}, {-1262258813,1737351059},
316{-974936606,1913422229}, {-663609179,2042378239}, {-335940566,2121044542},
317};
318static const ne10_fft_cpx_int32_t ne10_twiddles_60[60] = {
319{0,0}, {2147483647,0}, {2147483647,0},
320{2147483647,0}, {1961823921,-873460313}, {1436946998,-1595891394},
321{2147483647,0}, {1436946998,-1595891394}, {-224473265,-2135719496},
322{2147483647,0}, {663608871,-2042378339}, {-1737350854,-1262259096},
323{2147483647,0}, {-224473265,-2135719496}, {-2100555935,446487152},
324{2147483647,0}, {2135719506,-224473172}, {2100555974,-446486968},
325{2042378310,-663608960}, {1961823921,-873460313}, {1859775377,-1073741851},
326{1737350743,-1262259248}, {1595891331,-1436947067}, {1436946998,-1595891394},
327{1262259172,-1737350799}, {1073741769,-1859775424}, {873460227,-1961823959},
328{663608871,-2042378339}, {446486876,-2100555994}, {224473078,-2135719516},
329{2147483647,0}, {2100555974,-446486968}, {1961823921,-873460313},
330{1737350743,-1262259248}, {1436946998,-1595891394}, {1073741769,-1859775424},
331{663608871,-2042378339}, {224473078,-2135719516}, {-224473265,-2135719496},
332{-663609049,-2042378281}, {-1073741932,-1859775330}, {-1436947137,-1595891268},
333{-1737350854,-1262259096}, {-1961823997,-873460141}, {-2100556013,-446486785},
334{2147483647,0}, {2042378310,-663608960}, {1737350743,-1262259248},
335{1262259172,-1737350799}, {663608871,-2042378339}, {-94,-2147483647},
336{-663609049,-2042378281}, {-1262259116,-1737350839}, {-1737350854,-1262259096},
337{-2042378447,-663608538}, {-2147483647,188}, {-2042378331,663608895},
338{-1737350633,1262259400}, {-1262258813,1737351059}, {-663609179,2042378239},
339};
340static const ne10_fft_state_int32_t ne10_fft_state_int32_t_480 = {
341120,
342(ne10_int32_t *)ne10_factors_480,
343(ne10_fft_cpx_int32_t *)ne10_twiddles_480,
344NULL,
345(ne10_fft_cpx_int32_t *)&ne10_twiddles_480[120],
346};
347static const arch_fft_state cfg_arch_480 = {
3481,
349(void *)&ne10_fft_state_int32_t_480,
350};
351
352static const ne10_fft_state_int32_t ne10_fft_state_int32_t_240 = {
35360,
354(ne10_int32_t *)ne10_factors_240,
355(ne10_fft_cpx_int32_t *)ne10_twiddles_240,
356NULL,
357(ne10_fft_cpx_int32_t *)&ne10_twiddles_240[60],
358};
359static const arch_fft_state cfg_arch_240 = {
3601,
361(void *)&ne10_fft_state_int32_t_240,
362};
363
364static const ne10_fft_state_int32_t ne10_fft_state_int32_t_120 = {
36530,
366(ne10_int32_t *)ne10_factors_120,
367(ne10_fft_cpx_int32_t *)ne10_twiddles_120,
368NULL,
369(ne10_fft_cpx_int32_t *)&ne10_twiddles_120[30],
370};
371static const arch_fft_state cfg_arch_120 = {
3721,
373(void *)&ne10_fft_state_int32_t_120,
374};
375
376static const ne10_fft_state_int32_t ne10_fft_state_int32_t_60 = {
37715,
378(ne10_int32_t *)ne10_factors_60,
379(ne10_fft_cpx_int32_t *)ne10_twiddles_60,
380NULL,
381(ne10_fft_cpx_int32_t *)&ne10_twiddles_60[15],
382};
383static const arch_fft_state cfg_arch_60 = {
3841,
385(void *)&ne10_fft_state_int32_t_60,
386};
387
388#endif /* end NE10_FFT_PARAMS48000_960 */
diff --git a/lib/rbcodec/codecs/libopus/celt/static_modes_float.h b/lib/rbcodec/codecs/libopus/celt/static_modes_float.h
new file mode 100644
index 0000000000..e102a38391
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/static_modes_float.h
@@ -0,0 +1,888 @@
1/* The contents of this file was automatically generated by dump_modes.c
2 with arguments: 48000 960
3 It contains static definitions for some pre-defined modes. */
4#include "modes.h"
5#include "rate.h"
6
7#ifdef HAVE_ARM_NE10
8#define OVERRIDE_FFT 1
9#include "static_modes_float_arm_ne10.h"
10#endif
11
12#ifndef DEF_WINDOW120
13#define DEF_WINDOW120
14static const opus_val16 window120[120] = {
156.7286966e-05f, 0.00060551348f, 0.0016815970f, 0.0032947962f, 0.0054439943f,
160.0081276923f, 0.011344001f, 0.015090633f, 0.019364886f, 0.024163635f,
170.029483315f, 0.035319905f, 0.041668911f, 0.048525347f, 0.055883718f,
180.063737999f, 0.072081616f, 0.080907428f, 0.090207705f, 0.099974111f,
190.11019769f, 0.12086883f, 0.13197729f, 0.14351214f, 0.15546177f,
200.16781389f, 0.18055550f, 0.19367290f, 0.20715171f, 0.22097682f,
210.23513243f, 0.24960208f, 0.26436860f, 0.27941419f, 0.29472040f,
220.31026818f, 0.32603788f, 0.34200931f, 0.35816177f, 0.37447407f,
230.39092462f, 0.40749142f, 0.42415215f, 0.44088423f, 0.45766484f,
240.47447104f, 0.49127978f, 0.50806798f, 0.52481261f, 0.54149077f,
250.55807973f, 0.57455701f, 0.59090049f, 0.60708841f, 0.62309951f,
260.63891306f, 0.65450896f, 0.66986776f, 0.68497077f, 0.69980010f,
270.71433873f, 0.72857055f, 0.74248043f, 0.75605424f, 0.76927895f,
280.78214257f, 0.79463430f, 0.80674445f, 0.81846456f, 0.82978733f,
290.84070669f, 0.85121779f, 0.86131698f, 0.87100183f, 0.88027111f,
300.88912479f, 0.89756398f, 0.90559094f, 0.91320904f, 0.92042270f,
310.92723738f, 0.93365955f, 0.93969656f, 0.94535671f, 0.95064907f,
320.95558353f, 0.96017067f, 0.96442171f, 0.96834849f, 0.97196334f,
330.97527906f, 0.97830883f, 0.98106616f, 0.98356480f, 0.98581869f,
340.98784191f, 0.98964856f, 0.99125274f, 0.99266849f, 0.99390969f,
350.99499004f, 0.99592297f, 0.99672162f, 0.99739874f, 0.99796667f,
360.99843728f, 0.99882195f, 0.99913147f, 0.99937606f, 0.99956527f,
370.99970802f, 0.99981248f, 0.99988613f, 0.99993565f, 0.99996697f,
380.99998518f, 0.99999457f, 0.99999859f, 0.99999982f, 1.0000000f,
39};
40#endif
41
42#ifndef DEF_LOGN400
43#define DEF_LOGN400
44static const opus_int16 logN400[21] = {
450, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 16, 16, 16, 21, 21, 24, 29, 34, 36, };
46#endif
47
48#ifndef DEF_PULSE_CACHE50
49#define DEF_PULSE_CACHE50
50static const opus_int16 cache_index50[105] = {
51-1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 41, 41, 41,
5282, 82, 123, 164, 200, 222, 0, 0, 0, 0, 0, 0, 0, 0, 41,
5341, 41, 41, 123, 123, 123, 164, 164, 240, 266, 283, 295, 41, 41, 41,
5441, 41, 41, 41, 41, 123, 123, 123, 123, 240, 240, 240, 266, 266, 305,
55318, 328, 336, 123, 123, 123, 123, 123, 123, 123, 123, 240, 240, 240, 240,
56305, 305, 305, 318, 318, 343, 351, 358, 364, 240, 240, 240, 240, 240, 240,
57240, 240, 305, 305, 305, 305, 343, 343, 343, 351, 351, 370, 376, 382, 387,
58};
59static const unsigned char cache_bits50[392] = {
6040, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
617, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
627, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 40, 15, 23, 28,
6331, 34, 36, 38, 39, 41, 42, 43, 44, 45, 46, 47, 47, 49, 50,
6451, 52, 53, 54, 55, 55, 57, 58, 59, 60, 61, 62, 63, 63, 65,
6566, 67, 68, 69, 70, 71, 71, 40, 20, 33, 41, 48, 53, 57, 61,
6664, 66, 69, 71, 73, 75, 76, 78, 80, 82, 85, 87, 89, 91, 92,
6794, 96, 98, 101, 103, 105, 107, 108, 110, 112, 114, 117, 119, 121, 123,
68124, 126, 128, 40, 23, 39, 51, 60, 67, 73, 79, 83, 87, 91, 94,
6997, 100, 102, 105, 107, 111, 115, 118, 121, 124, 126, 129, 131, 135, 139,
70142, 145, 148, 150, 153, 155, 159, 163, 166, 169, 172, 174, 177, 179, 35,
7128, 49, 65, 78, 89, 99, 107, 114, 120, 126, 132, 136, 141, 145, 149,
72153, 159, 165, 171, 176, 180, 185, 189, 192, 199, 205, 211, 216, 220, 225,
73229, 232, 239, 245, 251, 21, 33, 58, 79, 97, 112, 125, 137, 148, 157,
74166, 174, 182, 189, 195, 201, 207, 217, 227, 235, 243, 251, 17, 35, 63,
7586, 106, 123, 139, 152, 165, 177, 187, 197, 206, 214, 222, 230, 237, 250,
7625, 31, 55, 75, 91, 105, 117, 128, 138, 146, 154, 161, 168, 174, 180,
77185, 190, 200, 208, 215, 222, 229, 235, 240, 245, 255, 16, 36, 65, 89,
78110, 128, 144, 159, 173, 185, 196, 207, 217, 226, 234, 242, 250, 11, 41,
7974, 103, 128, 151, 172, 191, 209, 225, 241, 255, 9, 43, 79, 110, 138,
80163, 186, 207, 227, 246, 12, 39, 71, 99, 123, 144, 164, 182, 198, 214,
81228, 241, 253, 9, 44, 81, 113, 142, 168, 192, 214, 235, 255, 7, 49,
8290, 127, 160, 191, 220, 247, 6, 51, 95, 134, 170, 203, 234, 7, 47,
8387, 123, 155, 184, 212, 237, 6, 52, 97, 137, 174, 208, 240, 5, 57,
84106, 151, 192, 231, 5, 59, 111, 158, 202, 243, 5, 55, 103, 147, 187,
85224, 5, 60, 113, 161, 206, 248, 4, 65, 122, 175, 224, 4, 67, 127,
86182, 234, };
87static const unsigned char cache_caps50[168] = {
88224, 224, 224, 224, 224, 224, 224, 224, 160, 160, 160, 160, 185, 185, 185,
89178, 178, 168, 134, 61, 37, 224, 224, 224, 224, 224, 224, 224, 224, 240,
90240, 240, 240, 207, 207, 207, 198, 198, 183, 144, 66, 40, 160, 160, 160,
91160, 160, 160, 160, 160, 185, 185, 185, 185, 193, 193, 193, 183, 183, 172,
92138, 64, 38, 240, 240, 240, 240, 240, 240, 240, 240, 207, 207, 207, 207,
93204, 204, 204, 193, 193, 180, 143, 66, 40, 185, 185, 185, 185, 185, 185,
94185, 185, 193, 193, 193, 193, 193, 193, 193, 183, 183, 172, 138, 65, 39,
95207, 207, 207, 207, 207, 207, 207, 207, 204, 204, 204, 204, 201, 201, 201,
96188, 188, 176, 141, 66, 40, 193, 193, 193, 193, 193, 193, 193, 193, 193,
97193, 193, 193, 194, 194, 194, 184, 184, 173, 139, 65, 39, 204, 204, 204,
98204, 204, 204, 204, 204, 201, 201, 201, 201, 198, 198, 198, 187, 187, 175,
99140, 66, 40, };
100#endif
101
102#ifndef FFT_TWIDDLES48000_960
103#define FFT_TWIDDLES48000_960
104static const kiss_twiddle_cpx fft_twiddles48000_960[480] = {
105{1.0000000f, -0.0000000f}, {0.99991433f, -0.013089596f},
106{0.99965732f, -0.026176948f}, {0.99922904f, -0.039259816f},
107{0.99862953f, -0.052335956f}, {0.99785892f, -0.065403129f},
108{0.99691733f, -0.078459096f}, {0.99580493f, -0.091501619f},
109{0.99452190f, -0.10452846f}, {0.99306846f, -0.11753740f},
110{0.99144486f, -0.13052619f}, {0.98965139f, -0.14349262f},
111{0.98768834f, -0.15643447f}, {0.98555606f, -0.16934950f},
112{0.98325491f, -0.18223553f}, {0.98078528f, -0.19509032f},
113{0.97814760f, -0.20791169f}, {0.97534232f, -0.22069744f},
114{0.97236992f, -0.23344536f}, {0.96923091f, -0.24615329f},
115{0.96592583f, -0.25881905f}, {0.96245524f, -0.27144045f},
116{0.95881973f, -0.28401534f}, {0.95501994f, -0.29654157f},
117{0.95105652f, -0.30901699f}, {0.94693013f, -0.32143947f},
118{0.94264149f, -0.33380686f}, {0.93819134f, -0.34611706f},
119{0.93358043f, -0.35836795f}, {0.92880955f, -0.37055744f},
120{0.92387953f, -0.38268343f}, {0.91879121f, -0.39474386f},
121{0.91354546f, -0.40673664f}, {0.90814317f, -0.41865974f},
122{0.90258528f, -0.43051110f}, {0.89687274f, -0.44228869f},
123{0.89100652f, -0.45399050f}, {0.88498764f, -0.46561452f},
124{0.87881711f, -0.47715876f}, {0.87249601f, -0.48862124f},
125{0.86602540f, -0.50000000f}, {0.85940641f, -0.51129309f},
126{0.85264016f, -0.52249856f}, {0.84572782f, -0.53361452f},
127{0.83867057f, -0.54463904f}, {0.83146961f, -0.55557023f},
128{0.82412619f, -0.56640624f}, {0.81664156f, -0.57714519f},
129{0.80901699f, -0.58778525f}, {0.80125381f, -0.59832460f},
130{0.79335334f, -0.60876143f}, {0.78531693f, -0.61909395f},
131{0.77714596f, -0.62932039f}, {0.76884183f, -0.63943900f},
132{0.76040597f, -0.64944805f}, {0.75183981f, -0.65934582f},
133{0.74314483f, -0.66913061f}, {0.73432251f, -0.67880075f},
134{0.72537437f, -0.68835458f}, {0.71630194f, -0.69779046f},
135{0.70710678f, -0.70710678f}, {0.69779046f, -0.71630194f},
136{0.68835458f, -0.72537437f}, {0.67880075f, -0.73432251f},
137{0.66913061f, -0.74314483f}, {0.65934582f, -0.75183981f},
138{0.64944805f, -0.76040597f}, {0.63943900f, -0.76884183f},
139{0.62932039f, -0.77714596f}, {0.61909395f, -0.78531693f},
140{0.60876143f, -0.79335334f}, {0.59832460f, -0.80125381f},
141{0.58778525f, -0.80901699f}, {0.57714519f, -0.81664156f},
142{0.56640624f, -0.82412619f}, {0.55557023f, -0.83146961f},
143{0.54463904f, -0.83867057f}, {0.53361452f, -0.84572782f},
144{0.52249856f, -0.85264016f}, {0.51129309f, -0.85940641f},
145{0.50000000f, -0.86602540f}, {0.48862124f, -0.87249601f},
146{0.47715876f, -0.87881711f}, {0.46561452f, -0.88498764f},
147{0.45399050f, -0.89100652f}, {0.44228869f, -0.89687274f},
148{0.43051110f, -0.90258528f}, {0.41865974f, -0.90814317f},
149{0.40673664f, -0.91354546f}, {0.39474386f, -0.91879121f},
150{0.38268343f, -0.92387953f}, {0.37055744f, -0.92880955f},
151{0.35836795f, -0.93358043f}, {0.34611706f, -0.93819134f},
152{0.33380686f, -0.94264149f}, {0.32143947f, -0.94693013f},
153{0.30901699f, -0.95105652f}, {0.29654157f, -0.95501994f},
154{0.28401534f, -0.95881973f}, {0.27144045f, -0.96245524f},
155{0.25881905f, -0.96592583f}, {0.24615329f, -0.96923091f},
156{0.23344536f, -0.97236992f}, {0.22069744f, -0.97534232f},
157{0.20791169f, -0.97814760f}, {0.19509032f, -0.98078528f},
158{0.18223553f, -0.98325491f}, {0.16934950f, -0.98555606f},
159{0.15643447f, -0.98768834f}, {0.14349262f, -0.98965139f},
160{0.13052619f, -0.99144486f}, {0.11753740f, -0.99306846f},
161{0.10452846f, -0.99452190f}, {0.091501619f, -0.99580493f},
162{0.078459096f, -0.99691733f}, {0.065403129f, -0.99785892f},
163{0.052335956f, -0.99862953f}, {0.039259816f, -0.99922904f},
164{0.026176948f, -0.99965732f}, {0.013089596f, -0.99991433f},
165{6.1230318e-17f, -1.0000000f}, {-0.013089596f, -0.99991433f},
166{-0.026176948f, -0.99965732f}, {-0.039259816f, -0.99922904f},
167{-0.052335956f, -0.99862953f}, {-0.065403129f, -0.99785892f},
168{-0.078459096f, -0.99691733f}, {-0.091501619f, -0.99580493f},
169{-0.10452846f, -0.99452190f}, {-0.11753740f, -0.99306846f},
170{-0.13052619f, -0.99144486f}, {-0.14349262f, -0.98965139f},
171{-0.15643447f, -0.98768834f}, {-0.16934950f, -0.98555606f},
172{-0.18223553f, -0.98325491f}, {-0.19509032f, -0.98078528f},
173{-0.20791169f, -0.97814760f}, {-0.22069744f, -0.97534232f},
174{-0.23344536f, -0.97236992f}, {-0.24615329f, -0.96923091f},
175{-0.25881905f, -0.96592583f}, {-0.27144045f, -0.96245524f},
176{-0.28401534f, -0.95881973f}, {-0.29654157f, -0.95501994f},
177{-0.30901699f, -0.95105652f}, {-0.32143947f, -0.94693013f},
178{-0.33380686f, -0.94264149f}, {-0.34611706f, -0.93819134f},
179{-0.35836795f, -0.93358043f}, {-0.37055744f, -0.92880955f},
180{-0.38268343f, -0.92387953f}, {-0.39474386f, -0.91879121f},
181{-0.40673664f, -0.91354546f}, {-0.41865974f, -0.90814317f},
182{-0.43051110f, -0.90258528f}, {-0.44228869f, -0.89687274f},
183{-0.45399050f, -0.89100652f}, {-0.46561452f, -0.88498764f},
184{-0.47715876f, -0.87881711f}, {-0.48862124f, -0.87249601f},
185{-0.50000000f, -0.86602540f}, {-0.51129309f, -0.85940641f},
186{-0.52249856f, -0.85264016f}, {-0.53361452f, -0.84572782f},
187{-0.54463904f, -0.83867057f}, {-0.55557023f, -0.83146961f},
188{-0.56640624f, -0.82412619f}, {-0.57714519f, -0.81664156f},
189{-0.58778525f, -0.80901699f}, {-0.59832460f, -0.80125381f},
190{-0.60876143f, -0.79335334f}, {-0.61909395f, -0.78531693f},
191{-0.62932039f, -0.77714596f}, {-0.63943900f, -0.76884183f},
192{-0.64944805f, -0.76040597f}, {-0.65934582f, -0.75183981f},
193{-0.66913061f, -0.74314483f}, {-0.67880075f, -0.73432251f},
194{-0.68835458f, -0.72537437f}, {-0.69779046f, -0.71630194f},
195{-0.70710678f, -0.70710678f}, {-0.71630194f, -0.69779046f},
196{-0.72537437f, -0.68835458f}, {-0.73432251f, -0.67880075f},
197{-0.74314483f, -0.66913061f}, {-0.75183981f, -0.65934582f},
198{-0.76040597f, -0.64944805f}, {-0.76884183f, -0.63943900f},
199{-0.77714596f, -0.62932039f}, {-0.78531693f, -0.61909395f},
200{-0.79335334f, -0.60876143f}, {-0.80125381f, -0.59832460f},
201{-0.80901699f, -0.58778525f}, {-0.81664156f, -0.57714519f},
202{-0.82412619f, -0.56640624f}, {-0.83146961f, -0.55557023f},
203{-0.83867057f, -0.54463904f}, {-0.84572782f, -0.53361452f},
204{-0.85264016f, -0.52249856f}, {-0.85940641f, -0.51129309f},
205{-0.86602540f, -0.50000000f}, {-0.87249601f, -0.48862124f},
206{-0.87881711f, -0.47715876f}, {-0.88498764f, -0.46561452f},
207{-0.89100652f, -0.45399050f}, {-0.89687274f, -0.44228869f},
208{-0.90258528f, -0.43051110f}, {-0.90814317f, -0.41865974f},
209{-0.91354546f, -0.40673664f}, {-0.91879121f, -0.39474386f},
210{-0.92387953f, -0.38268343f}, {-0.92880955f, -0.37055744f},
211{-0.93358043f, -0.35836795f}, {-0.93819134f, -0.34611706f},
212{-0.94264149f, -0.33380686f}, {-0.94693013f, -0.32143947f},
213{-0.95105652f, -0.30901699f}, {-0.95501994f, -0.29654157f},
214{-0.95881973f, -0.28401534f}, {-0.96245524f, -0.27144045f},
215{-0.96592583f, -0.25881905f}, {-0.96923091f, -0.24615329f},
216{-0.97236992f, -0.23344536f}, {-0.97534232f, -0.22069744f},
217{-0.97814760f, -0.20791169f}, {-0.98078528f, -0.19509032f},
218{-0.98325491f, -0.18223553f}, {-0.98555606f, -0.16934950f},
219{-0.98768834f, -0.15643447f}, {-0.98965139f, -0.14349262f},
220{-0.99144486f, -0.13052619f}, {-0.99306846f, -0.11753740f},
221{-0.99452190f, -0.10452846f}, {-0.99580493f, -0.091501619f},
222{-0.99691733f, -0.078459096f}, {-0.99785892f, -0.065403129f},
223{-0.99862953f, -0.052335956f}, {-0.99922904f, -0.039259816f},
224{-0.99965732f, -0.026176948f}, {-0.99991433f, -0.013089596f},
225{-1.0000000f, -1.2246064e-16f}, {-0.99991433f, 0.013089596f},
226{-0.99965732f, 0.026176948f}, {-0.99922904f, 0.039259816f},
227{-0.99862953f, 0.052335956f}, {-0.99785892f, 0.065403129f},
228{-0.99691733f, 0.078459096f}, {-0.99580493f, 0.091501619f},
229{-0.99452190f, 0.10452846f}, {-0.99306846f, 0.11753740f},
230{-0.99144486f, 0.13052619f}, {-0.98965139f, 0.14349262f},
231{-0.98768834f, 0.15643447f}, {-0.98555606f, 0.16934950f},
232{-0.98325491f, 0.18223553f}, {-0.98078528f, 0.19509032f},
233{-0.97814760f, 0.20791169f}, {-0.97534232f, 0.22069744f},
234{-0.97236992f, 0.23344536f}, {-0.96923091f, 0.24615329f},
235{-0.96592583f, 0.25881905f}, {-0.96245524f, 0.27144045f},
236{-0.95881973f, 0.28401534f}, {-0.95501994f, 0.29654157f},
237{-0.95105652f, 0.30901699f}, {-0.94693013f, 0.32143947f},
238{-0.94264149f, 0.33380686f}, {-0.93819134f, 0.34611706f},
239{-0.93358043f, 0.35836795f}, {-0.92880955f, 0.37055744f},
240{-0.92387953f, 0.38268343f}, {-0.91879121f, 0.39474386f},
241{-0.91354546f, 0.40673664f}, {-0.90814317f, 0.41865974f},
242{-0.90258528f, 0.43051110f}, {-0.89687274f, 0.44228869f},
243{-0.89100652f, 0.45399050f}, {-0.88498764f, 0.46561452f},
244{-0.87881711f, 0.47715876f}, {-0.87249601f, 0.48862124f},
245{-0.86602540f, 0.50000000f}, {-0.85940641f, 0.51129309f},
246{-0.85264016f, 0.52249856f}, {-0.84572782f, 0.53361452f},
247{-0.83867057f, 0.54463904f}, {-0.83146961f, 0.55557023f},
248{-0.82412619f, 0.56640624f}, {-0.81664156f, 0.57714519f},
249{-0.80901699f, 0.58778525f}, {-0.80125381f, 0.59832460f},
250{-0.79335334f, 0.60876143f}, {-0.78531693f, 0.61909395f},
251{-0.77714596f, 0.62932039f}, {-0.76884183f, 0.63943900f},
252{-0.76040597f, 0.64944805f}, {-0.75183981f, 0.65934582f},
253{-0.74314483f, 0.66913061f}, {-0.73432251f, 0.67880075f},
254{-0.72537437f, 0.68835458f}, {-0.71630194f, 0.69779046f},
255{-0.70710678f, 0.70710678f}, {-0.69779046f, 0.71630194f},
256{-0.68835458f, 0.72537437f}, {-0.67880075f, 0.73432251f},
257{-0.66913061f, 0.74314483f}, {-0.65934582f, 0.75183981f},
258{-0.64944805f, 0.76040597f}, {-0.63943900f, 0.76884183f},
259{-0.62932039f, 0.77714596f}, {-0.61909395f, 0.78531693f},
260{-0.60876143f, 0.79335334f}, {-0.59832460f, 0.80125381f},
261{-0.58778525f, 0.80901699f}, {-0.57714519f, 0.81664156f},
262{-0.56640624f, 0.82412619f}, {-0.55557023f, 0.83146961f},
263{-0.54463904f, 0.83867057f}, {-0.53361452f, 0.84572782f},
264{-0.52249856f, 0.85264016f}, {-0.51129309f, 0.85940641f},
265{-0.50000000f, 0.86602540f}, {-0.48862124f, 0.87249601f},
266{-0.47715876f, 0.87881711f}, {-0.46561452f, 0.88498764f},
267{-0.45399050f, 0.89100652f}, {-0.44228869f, 0.89687274f},
268{-0.43051110f, 0.90258528f}, {-0.41865974f, 0.90814317f},
269{-0.40673664f, 0.91354546f}, {-0.39474386f, 0.91879121f},
270{-0.38268343f, 0.92387953f}, {-0.37055744f, 0.92880955f},
271{-0.35836795f, 0.93358043f}, {-0.34611706f, 0.93819134f},
272{-0.33380686f, 0.94264149f}, {-0.32143947f, 0.94693013f},
273{-0.30901699f, 0.95105652f}, {-0.29654157f, 0.95501994f},
274{-0.28401534f, 0.95881973f}, {-0.27144045f, 0.96245524f},
275{-0.25881905f, 0.96592583f}, {-0.24615329f, 0.96923091f},
276{-0.23344536f, 0.97236992f}, {-0.22069744f, 0.97534232f},
277{-0.20791169f, 0.97814760f}, {-0.19509032f, 0.98078528f},
278{-0.18223553f, 0.98325491f}, {-0.16934950f, 0.98555606f},
279{-0.15643447f, 0.98768834f}, {-0.14349262f, 0.98965139f},
280{-0.13052619f, 0.99144486f}, {-0.11753740f, 0.99306846f},
281{-0.10452846f, 0.99452190f}, {-0.091501619f, 0.99580493f},
282{-0.078459096f, 0.99691733f}, {-0.065403129f, 0.99785892f},
283{-0.052335956f, 0.99862953f}, {-0.039259816f, 0.99922904f},
284{-0.026176948f, 0.99965732f}, {-0.013089596f, 0.99991433f},
285{-1.8369095e-16f, 1.0000000f}, {0.013089596f, 0.99991433f},
286{0.026176948f, 0.99965732f}, {0.039259816f, 0.99922904f},
287{0.052335956f, 0.99862953f}, {0.065403129f, 0.99785892f},
288{0.078459096f, 0.99691733f}, {0.091501619f, 0.99580493f},
289{0.10452846f, 0.99452190f}, {0.11753740f, 0.99306846f},
290{0.13052619f, 0.99144486f}, {0.14349262f, 0.98965139f},
291{0.15643447f, 0.98768834f}, {0.16934950f, 0.98555606f},
292{0.18223553f, 0.98325491f}, {0.19509032f, 0.98078528f},
293{0.20791169f, 0.97814760f}, {0.22069744f, 0.97534232f},
294{0.23344536f, 0.97236992f}, {0.24615329f, 0.96923091f},
295{0.25881905f, 0.96592583f}, {0.27144045f, 0.96245524f},
296{0.28401534f, 0.95881973f}, {0.29654157f, 0.95501994f},
297{0.30901699f, 0.95105652f}, {0.32143947f, 0.94693013f},
298{0.33380686f, 0.94264149f}, {0.34611706f, 0.93819134f},
299{0.35836795f, 0.93358043f}, {0.37055744f, 0.92880955f},
300{0.38268343f, 0.92387953f}, {0.39474386f, 0.91879121f},
301{0.40673664f, 0.91354546f}, {0.41865974f, 0.90814317f},
302{0.43051110f, 0.90258528f}, {0.44228869f, 0.89687274f},
303{0.45399050f, 0.89100652f}, {0.46561452f, 0.88498764f},
304{0.47715876f, 0.87881711f}, {0.48862124f, 0.87249601f},
305{0.50000000f, 0.86602540f}, {0.51129309f, 0.85940641f},
306{0.52249856f, 0.85264016f}, {0.53361452f, 0.84572782f},
307{0.54463904f, 0.83867057f}, {0.55557023f, 0.83146961f},
308{0.56640624f, 0.82412619f}, {0.57714519f, 0.81664156f},
309{0.58778525f, 0.80901699f}, {0.59832460f, 0.80125381f},
310{0.60876143f, 0.79335334f}, {0.61909395f, 0.78531693f},
311{0.62932039f, 0.77714596f}, {0.63943900f, 0.76884183f},
312{0.64944805f, 0.76040597f}, {0.65934582f, 0.75183981f},
313{0.66913061f, 0.74314483f}, {0.67880075f, 0.73432251f},
314{0.68835458f, 0.72537437f}, {0.69779046f, 0.71630194f},
315{0.70710678f, 0.70710678f}, {0.71630194f, 0.69779046f},
316{0.72537437f, 0.68835458f}, {0.73432251f, 0.67880075f},
317{0.74314483f, 0.66913061f}, {0.75183981f, 0.65934582f},
318{0.76040597f, 0.64944805f}, {0.76884183f, 0.63943900f},
319{0.77714596f, 0.62932039f}, {0.78531693f, 0.61909395f},
320{0.79335334f, 0.60876143f}, {0.80125381f, 0.59832460f},
321{0.80901699f, 0.58778525f}, {0.81664156f, 0.57714519f},
322{0.82412619f, 0.56640624f}, {0.83146961f, 0.55557023f},
323{0.83867057f, 0.54463904f}, {0.84572782f, 0.53361452f},
324{0.85264016f, 0.52249856f}, {0.85940641f, 0.51129309f},
325{0.86602540f, 0.50000000f}, {0.87249601f, 0.48862124f},
326{0.87881711f, 0.47715876f}, {0.88498764f, 0.46561452f},
327{0.89100652f, 0.45399050f}, {0.89687274f, 0.44228869f},
328{0.90258528f, 0.43051110f}, {0.90814317f, 0.41865974f},
329{0.91354546f, 0.40673664f}, {0.91879121f, 0.39474386f},
330{0.92387953f, 0.38268343f}, {0.92880955f, 0.37055744f},
331{0.93358043f, 0.35836795f}, {0.93819134f, 0.34611706f},
332{0.94264149f, 0.33380686f}, {0.94693013f, 0.32143947f},
333{0.95105652f, 0.30901699f}, {0.95501994f, 0.29654157f},
334{0.95881973f, 0.28401534f}, {0.96245524f, 0.27144045f},
335{0.96592583f, 0.25881905f}, {0.96923091f, 0.24615329f},
336{0.97236992f, 0.23344536f}, {0.97534232f, 0.22069744f},
337{0.97814760f, 0.20791169f}, {0.98078528f, 0.19509032f},
338{0.98325491f, 0.18223553f}, {0.98555606f, 0.16934950f},
339{0.98768834f, 0.15643447f}, {0.98965139f, 0.14349262f},
340{0.99144486f, 0.13052619f}, {0.99306846f, 0.11753740f},
341{0.99452190f, 0.10452846f}, {0.99580493f, 0.091501619f},
342{0.99691733f, 0.078459096f}, {0.99785892f, 0.065403129f},
343{0.99862953f, 0.052335956f}, {0.99922904f, 0.039259816f},
344{0.99965732f, 0.026176948f}, {0.99991433f, 0.013089596f},
345};
346#ifndef FFT_BITREV480
347#define FFT_BITREV480
348static const opus_int16 fft_bitrev480[480] = {
3490, 96, 192, 288, 384, 32, 128, 224, 320, 416, 64, 160, 256, 352, 448,
3508, 104, 200, 296, 392, 40, 136, 232, 328, 424, 72, 168, 264, 360, 456,
35116, 112, 208, 304, 400, 48, 144, 240, 336, 432, 80, 176, 272, 368, 464,
35224, 120, 216, 312, 408, 56, 152, 248, 344, 440, 88, 184, 280, 376, 472,
3534, 100, 196, 292, 388, 36, 132, 228, 324, 420, 68, 164, 260, 356, 452,
35412, 108, 204, 300, 396, 44, 140, 236, 332, 428, 76, 172, 268, 364, 460,
35520, 116, 212, 308, 404, 52, 148, 244, 340, 436, 84, 180, 276, 372, 468,
35628, 124, 220, 316, 412, 60, 156, 252, 348, 444, 92, 188, 284, 380, 476,
3571, 97, 193, 289, 385, 33, 129, 225, 321, 417, 65, 161, 257, 353, 449,
3589, 105, 201, 297, 393, 41, 137, 233, 329, 425, 73, 169, 265, 361, 457,
35917, 113, 209, 305, 401, 49, 145, 241, 337, 433, 81, 177, 273, 369, 465,
36025, 121, 217, 313, 409, 57, 153, 249, 345, 441, 89, 185, 281, 377, 473,
3615, 101, 197, 293, 389, 37, 133, 229, 325, 421, 69, 165, 261, 357, 453,
36213, 109, 205, 301, 397, 45, 141, 237, 333, 429, 77, 173, 269, 365, 461,
36321, 117, 213, 309, 405, 53, 149, 245, 341, 437, 85, 181, 277, 373, 469,
36429, 125, 221, 317, 413, 61, 157, 253, 349, 445, 93, 189, 285, 381, 477,
3652, 98, 194, 290, 386, 34, 130, 226, 322, 418, 66, 162, 258, 354, 450,
36610, 106, 202, 298, 394, 42, 138, 234, 330, 426, 74, 170, 266, 362, 458,
36718, 114, 210, 306, 402, 50, 146, 242, 338, 434, 82, 178, 274, 370, 466,
36826, 122, 218, 314, 410, 58, 154, 250, 346, 442, 90, 186, 282, 378, 474,
3696, 102, 198, 294, 390, 38, 134, 230, 326, 422, 70, 166, 262, 358, 454,
37014, 110, 206, 302, 398, 46, 142, 238, 334, 430, 78, 174, 270, 366, 462,
37122, 118, 214, 310, 406, 54, 150, 246, 342, 438, 86, 182, 278, 374, 470,
37230, 126, 222, 318, 414, 62, 158, 254, 350, 446, 94, 190, 286, 382, 478,
3733, 99, 195, 291, 387, 35, 131, 227, 323, 419, 67, 163, 259, 355, 451,
37411, 107, 203, 299, 395, 43, 139, 235, 331, 427, 75, 171, 267, 363, 459,
37519, 115, 211, 307, 403, 51, 147, 243, 339, 435, 83, 179, 275, 371, 467,
37627, 123, 219, 315, 411, 59, 155, 251, 347, 443, 91, 187, 283, 379, 475,
3777, 103, 199, 295, 391, 39, 135, 231, 327, 423, 71, 167, 263, 359, 455,
37815, 111, 207, 303, 399, 47, 143, 239, 335, 431, 79, 175, 271, 367, 463,
37923, 119, 215, 311, 407, 55, 151, 247, 343, 439, 87, 183, 279, 375, 471,
38031, 127, 223, 319, 415, 63, 159, 255, 351, 447, 95, 191, 287, 383, 479,
381};
382#endif
383
384#ifndef FFT_BITREV240
385#define FFT_BITREV240
386static const opus_int16 fft_bitrev240[240] = {
3870, 48, 96, 144, 192, 16, 64, 112, 160, 208, 32, 80, 128, 176, 224,
3884, 52, 100, 148, 196, 20, 68, 116, 164, 212, 36, 84, 132, 180, 228,
3898, 56, 104, 152, 200, 24, 72, 120, 168, 216, 40, 88, 136, 184, 232,
39012, 60, 108, 156, 204, 28, 76, 124, 172, 220, 44, 92, 140, 188, 236,
3911, 49, 97, 145, 193, 17, 65, 113, 161, 209, 33, 81, 129, 177, 225,
3925, 53, 101, 149, 197, 21, 69, 117, 165, 213, 37, 85, 133, 181, 229,
3939, 57, 105, 153, 201, 25, 73, 121, 169, 217, 41, 89, 137, 185, 233,
39413, 61, 109, 157, 205, 29, 77, 125, 173, 221, 45, 93, 141, 189, 237,
3952, 50, 98, 146, 194, 18, 66, 114, 162, 210, 34, 82, 130, 178, 226,
3966, 54, 102, 150, 198, 22, 70, 118, 166, 214, 38, 86, 134, 182, 230,
39710, 58, 106, 154, 202, 26, 74, 122, 170, 218, 42, 90, 138, 186, 234,
39814, 62, 110, 158, 206, 30, 78, 126, 174, 222, 46, 94, 142, 190, 238,
3993, 51, 99, 147, 195, 19, 67, 115, 163, 211, 35, 83, 131, 179, 227,
4007, 55, 103, 151, 199, 23, 71, 119, 167, 215, 39, 87, 135, 183, 231,
40111, 59, 107, 155, 203, 27, 75, 123, 171, 219, 43, 91, 139, 187, 235,
40215, 63, 111, 159, 207, 31, 79, 127, 175, 223, 47, 95, 143, 191, 239,
403};
404#endif
405
406#ifndef FFT_BITREV120
407#define FFT_BITREV120
408static const opus_int16 fft_bitrev120[120] = {
4090, 24, 48, 72, 96, 8, 32, 56, 80, 104, 16, 40, 64, 88, 112,
4104, 28, 52, 76, 100, 12, 36, 60, 84, 108, 20, 44, 68, 92, 116,
4111, 25, 49, 73, 97, 9, 33, 57, 81, 105, 17, 41, 65, 89, 113,
4125, 29, 53, 77, 101, 13, 37, 61, 85, 109, 21, 45, 69, 93, 117,
4132, 26, 50, 74, 98, 10, 34, 58, 82, 106, 18, 42, 66, 90, 114,
4146, 30, 54, 78, 102, 14, 38, 62, 86, 110, 22, 46, 70, 94, 118,
4153, 27, 51, 75, 99, 11, 35, 59, 83, 107, 19, 43, 67, 91, 115,
4167, 31, 55, 79, 103, 15, 39, 63, 87, 111, 23, 47, 71, 95, 119,
417};
418#endif
419
420#ifndef FFT_BITREV60
421#define FFT_BITREV60
422static const opus_int16 fft_bitrev60[60] = {
4230, 12, 24, 36, 48, 4, 16, 28, 40, 52, 8, 20, 32, 44, 56,
4241, 13, 25, 37, 49, 5, 17, 29, 41, 53, 9, 21, 33, 45, 57,
4252, 14, 26, 38, 50, 6, 18, 30, 42, 54, 10, 22, 34, 46, 58,
4263, 15, 27, 39, 51, 7, 19, 31, 43, 55, 11, 23, 35, 47, 59,
427};
428#endif
429
430#ifndef FFT_STATE48000_960_0
431#define FFT_STATE48000_960_0
432static const kiss_fft_state fft_state48000_960_0 = {
433480, /* nfft */
4340.002083333f, /* scale */
435-1, /* shift */
436{5, 96, 3, 32, 4, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, }, /* factors */
437fft_bitrev480, /* bitrev */
438fft_twiddles48000_960, /* bitrev */
439#ifdef OVERRIDE_FFT
440(arch_fft_state *)&cfg_arch_480,
441#else
442NULL,
443#endif
444};
445#endif
446
447#ifndef FFT_STATE48000_960_1
448#define FFT_STATE48000_960_1
449static const kiss_fft_state fft_state48000_960_1 = {
450240, /* nfft */
4510.004166667f, /* scale */
4521, /* shift */
453{5, 48, 3, 16, 4, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
454fft_bitrev240, /* bitrev */
455fft_twiddles48000_960, /* bitrev */
456#ifdef OVERRIDE_FFT
457(arch_fft_state *)&cfg_arch_240,
458#else
459NULL,
460#endif
461};
462#endif
463
464#ifndef FFT_STATE48000_960_2
465#define FFT_STATE48000_960_2
466static const kiss_fft_state fft_state48000_960_2 = {
467120, /* nfft */
4680.008333333f, /* scale */
4692, /* shift */
470{5, 24, 3, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
471fft_bitrev120, /* bitrev */
472fft_twiddles48000_960, /* bitrev */
473#ifdef OVERRIDE_FFT
474(arch_fft_state *)&cfg_arch_120,
475#else
476NULL,
477#endif
478};
479#endif
480
481#ifndef FFT_STATE48000_960_3
482#define FFT_STATE48000_960_3
483static const kiss_fft_state fft_state48000_960_3 = {
48460, /* nfft */
4850.016666667f, /* scale */
4863, /* shift */
487{5, 12, 3, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
488fft_bitrev60, /* bitrev */
489fft_twiddles48000_960, /* bitrev */
490#ifdef OVERRIDE_FFT
491(arch_fft_state *)&cfg_arch_60,
492#else
493NULL,
494#endif
495};
496#endif
497
498#endif
499
500#ifndef MDCT_TWIDDLES960
501#define MDCT_TWIDDLES960
502static const opus_val16 mdct_twiddles960[1800] = {
5030.99999994f, 0.99999321f, 0.99997580f, 0.99994773f, 0.99990886f,
5040.99985933f, 0.99979913f, 0.99972820f, 0.99964654f, 0.99955416f,
5050.99945110f, 0.99933738f, 0.99921292f, 0.99907774f, 0.99893188f,
5060.99877530f, 0.99860805f, 0.99843007f, 0.99824142f, 0.99804211f,
5070.99783206f, 0.99761140f, 0.99737996f, 0.99713790f, 0.99688518f,
5080.99662173f, 0.99634761f, 0.99606287f, 0.99576741f, 0.99546129f,
5090.99514455f, 0.99481714f, 0.99447906f, 0.99413031f, 0.99377096f,
5100.99340093f, 0.99302030f, 0.99262899f, 0.99222708f, 0.99181455f,
5110.99139136f, 0.99095762f, 0.99051321f, 0.99005818f, 0.98959261f,
5120.98911643f, 0.98862964f, 0.98813224f, 0.98762429f, 0.98710573f,
5130.98657662f, 0.98603696f, 0.98548669f, 0.98492593f, 0.98435456f,
5140.98377270f, 0.98318028f, 0.98257732f, 0.98196387f, 0.98133987f,
5150.98070538f, 0.98006040f, 0.97940493f, 0.97873890f, 0.97806245f,
5160.97737551f, 0.97667813f, 0.97597027f, 0.97525197f, 0.97452319f,
5170.97378403f, 0.97303438f, 0.97227436f, 0.97150391f, 0.97072303f,
5180.96993178f, 0.96913016f, 0.96831810f, 0.96749574f, 0.96666300f,
5190.96581990f, 0.96496642f, 0.96410263f, 0.96322852f, 0.96234411f,
5200.96144938f, 0.96054435f, 0.95962906f, 0.95870346f, 0.95776761f,
5210.95682150f, 0.95586514f, 0.95489854f, 0.95392174f, 0.95293468f,
5220.95193744f, 0.95093000f, 0.94991243f, 0.94888461f, 0.94784665f,
5230.94679856f, 0.94574034f, 0.94467193f, 0.94359344f, 0.94250488f,
5240.94140619f, 0.94029742f, 0.93917859f, 0.93804967f, 0.93691075f,
5250.93576175f, 0.93460274f, 0.93343377f, 0.93225473f, 0.93106574f,
5260.92986679f, 0.92865789f, 0.92743903f, 0.92621022f, 0.92497152f,
5270.92372292f, 0.92246443f, 0.92119598f, 0.91991776f, 0.91862965f,
5280.91733170f, 0.91602397f, 0.91470635f, 0.91337901f, 0.91204184f,
5290.91069490f, 0.90933824f, 0.90797186f, 0.90659571f, 0.90520984f,
5300.90381432f, 0.90240908f, 0.90099424f, 0.89956969f, 0.89813554f,
5310.89669174f, 0.89523834f, 0.89377540f, 0.89230281f, 0.89082074f,
5320.88932908f, 0.88782793f, 0.88631725f, 0.88479710f, 0.88326746f,
5330.88172835f, 0.88017982f, 0.87862182f, 0.87705445f, 0.87547767f,
5340.87389153f, 0.87229604f, 0.87069118f, 0.86907703f, 0.86745358f,
5350.86582077f, 0.86417878f, 0.86252749f, 0.86086690f, 0.85919720f,
5360.85751826f, 0.85583007f, 0.85413277f, 0.85242635f, 0.85071075f,
5370.84898609f, 0.84725231f, 0.84550947f, 0.84375757f, 0.84199661f,
5380.84022665f, 0.83844769f, 0.83665979f, 0.83486289f, 0.83305705f,
5390.83124226f, 0.82941860f, 0.82758605f, 0.82574469f, 0.82389444f,
5400.82203537f, 0.82016748f, 0.81829083f, 0.81640542f, 0.81451124f,
5410.81260836f, 0.81069672f, 0.80877650f, 0.80684757f, 0.80490994f,
5420.80296379f, 0.80100900f, 0.79904562f, 0.79707366f, 0.79509324f,
5430.79310423f, 0.79110676f, 0.78910083f, 0.78708643f, 0.78506362f,
5440.78303236f, 0.78099275f, 0.77894479f, 0.77688843f, 0.77482378f,
5450.77275085f, 0.77066964f, 0.76858020f, 0.76648247f, 0.76437658f,
5460.76226246f, 0.76014024f, 0.75800985f, 0.75587130f, 0.75372469f,
5470.75157005f, 0.74940729f, 0.74723655f, 0.74505776f, 0.74287105f,
5480.74067634f, 0.73847371f, 0.73626316f, 0.73404479f, 0.73181850f,
5490.72958434f, 0.72734243f, 0.72509271f, 0.72283524f, 0.72057003f,
5500.71829706f, 0.71601641f, 0.71372813f, 0.71143216f, 0.70912862f,
5510.70681745f, 0.70449871f, 0.70217246f, 0.69983864f, 0.69749737f,
5520.69514859f, 0.69279242f, 0.69042879f, 0.68805778f, 0.68567938f,
5530.68329364f, 0.68090063f, 0.67850029f, 0.67609268f, 0.67367786f,
5540.67125577f, 0.66882652f, 0.66639012f, 0.66394657f, 0.66149592f,
5550.65903819f, 0.65657341f, 0.65410155f, 0.65162271f, 0.64913690f,
5560.64664418f, 0.64414448f, 0.64163786f, 0.63912445f, 0.63660413f,
5570.63407701f, 0.63154310f, 0.62900239f, 0.62645501f, 0.62390089f,
5580.62134010f, 0.61877263f, 0.61619854f, 0.61361790f, 0.61103064f,
5590.60843682f, 0.60583651f, 0.60322970f, 0.60061646f, 0.59799677f,
5600.59537065f, 0.59273821f, 0.59009939f, 0.58745426f, 0.58480281f,
5610.58214509f, 0.57948118f, 0.57681108f, 0.57413477f, 0.57145232f,
5620.56876373f, 0.56606907f, 0.56336832f, 0.56066155f, 0.55794877f,
5630.55523002f, 0.55250537f, 0.54977477f, 0.54703826f, 0.54429591f,
5640.54154772f, 0.53879374f, 0.53603399f, 0.53326851f, 0.53049731f,
5650.52772039f, 0.52493787f, 0.52214974f, 0.51935595f, 0.51655668f,
5660.51375180f, 0.51094145f, 0.50812566f, 0.50530440f, 0.50247771f,
5670.49964568f, 0.49680826f, 0.49396557f, 0.49111754f, 0.48826426f,
5680.48540577f, 0.48254207f, 0.47967321f, 0.47679919f, 0.47392011f,
5690.47103590f, 0.46814668f, 0.46525243f, 0.46235323f, 0.45944905f,
5700.45653993f, 0.45362595f, 0.45070711f, 0.44778344f, 0.44485497f,
5710.44192174f, 0.43898380f, 0.43604112f, 0.43309379f, 0.43014181f,
5720.42718524f, 0.42422408f, 0.42125839f, 0.41828820f, 0.41531351f,
5730.41233435f, 0.40935081f, 0.40636289f, 0.40337059f, 0.40037400f,
5740.39737311f, 0.39436796f, 0.39135858f, 0.38834500f, 0.38532731f,
5750.38230544f, 0.37927949f, 0.37624949f, 0.37321547f, 0.37017745f,
5760.36713544f, 0.36408952f, 0.36103970f, 0.35798600f, 0.35492846f,
5770.35186714f, 0.34880206f, 0.34573323f, 0.34266070f, 0.33958447f,
5780.33650464f, 0.33342120f, 0.33033419f, 0.32724363f, 0.32414958f,
5790.32105204f, 0.31795108f, 0.31484672f, 0.31173897f, 0.30862790f,
5800.30551350f, 0.30239585f, 0.29927495f, 0.29615086f, 0.29302359f,
5810.28989318f, 0.28675964f, 0.28362307f, 0.28048345f, 0.27734083f,
5820.27419522f, 0.27104670f, 0.26789525f, 0.26474094f, 0.26158381f,
5830.25842386f, 0.25526115f, 0.25209570f, 0.24892756f, 0.24575676f,
5840.24258332f, 0.23940729f, 0.23622867f, 0.23304754f, 0.22986393f,
5850.22667783f, 0.22348931f, 0.22029841f, 0.21710514f, 0.21390954f,
5860.21071166f, 0.20751151f, 0.20430915f, 0.20110460f, 0.19789790f,
5870.19468907f, 0.19147816f, 0.18826519f, 0.18505022f, 0.18183327f,
5880.17861435f, 0.17539354f, 0.17217083f, 0.16894630f, 0.16571994f,
5890.16249183f, 0.15926196f, 0.15603039f, 0.15279715f, 0.14956227f,
5900.14632578f, 0.14308774f, 0.13984816f, 0.13660708f, 0.13336454f,
5910.13012058f, 0.12687522f, 0.12362850f, 0.12038045f, 0.11713112f,
5920.11388054f, 0.11062872f, 0.10737573f, 0.10412160f, 0.10086634f,
5930.097609997f, 0.094352618f, 0.091094226f, 0.087834857f, 0.084574550f,
5940.081313334f, 0.078051247f, 0.074788325f, 0.071524605f, 0.068260118f,
5950.064994894f, 0.061728980f, 0.058462404f, 0.055195201f, 0.051927410f,
5960.048659060f, 0.045390189f, 0.042120833f, 0.038851023f, 0.035580799f,
5970.032310195f, 0.029039243f, 0.025767982f, 0.022496443f, 0.019224664f,
5980.015952680f, 0.012680525f, 0.0094082337f, 0.0061358409f, 0.0028633832f,
599-0.00040910527f, -0.0036815894f, -0.0069540343f, -0.010226404f, -0.013498665f,
600-0.016770782f, -0.020042717f, -0.023314439f, -0.026585912f, -0.029857099f,
601-0.033127967f, -0.036398482f, -0.039668605f, -0.042938303f, -0.046207540f,
602-0.049476285f, -0.052744497f, -0.056012146f, -0.059279196f, -0.062545612f,
603-0.065811358f, -0.069076397f, -0.072340697f, -0.075604223f, -0.078866936f,
604-0.082128808f, -0.085389800f, -0.088649876f, -0.091909006f, -0.095167145f,
605-0.098424271f, -0.10168034f, -0.10493532f, -0.10818918f, -0.11144188f,
606-0.11469338f, -0.11794366f, -0.12119267f, -0.12444039f, -0.12768677f,
607-0.13093179f, -0.13417540f, -0.13741758f, -0.14065829f, -0.14389749f,
608-0.14713514f, -0.15037122f, -0.15360570f, -0.15683852f, -0.16006967f,
609-0.16329910f, -0.16652679f, -0.16975269f, -0.17297678f, -0.17619900f,
610-0.17941935f, -0.18263777f, -0.18585424f, -0.18906870f, -0.19228116f,
611-0.19549155f, -0.19869985f, -0.20190603f, -0.20511003f, -0.20831184f,
612-0.21151142f, -0.21470875f, -0.21790376f, -0.22109644f, -0.22428675f,
613-0.22747467f, -0.23066014f, -0.23384315f, -0.23702365f, -0.24020162f,
614-0.24337701f, -0.24654980f, -0.24971995f, -0.25288740f, -0.25605217f,
615-0.25921419f, -0.26237345f, -0.26552987f, -0.26868346f, -0.27183419f,
616-0.27498198f, -0.27812684f, -0.28126872f, -0.28440759f, -0.28754342f,
617-0.29067615f, -0.29380578f, -0.29693225f, -0.30005556f, -0.30317566f,
618-0.30629250f, -0.30940607f, -0.31251630f, -0.31562322f, -0.31872672f,
619-0.32182685f, -0.32492352f, -0.32801670f, -0.33110636f, -0.33419248f,
620-0.33727503f, -0.34035397f, -0.34342924f, -0.34650084f, -0.34956875f,
621-0.35263291f, -0.35569328f, -0.35874987f, -0.36180258f, -0.36485144f,
622-0.36789638f, -0.37093741f, -0.37397444f, -0.37700745f, -0.38003644f,
623-0.38306138f, -0.38608220f, -0.38909888f, -0.39211139f, -0.39511973f,
624-0.39812380f, -0.40112361f, -0.40411916f, -0.40711036f, -0.41009718f,
625-0.41307965f, -0.41605768f, -0.41903123f, -0.42200032f, -0.42496487f,
626-0.42792490f, -0.43088034f, -0.43383113f, -0.43677729f, -0.43971881f,
627-0.44265559f, -0.44558764f, -0.44851488f, -0.45143735f, -0.45435500f,
628-0.45726776f, -0.46017563f, -0.46307856f, -0.46597654f, -0.46886954f,
629-0.47175750f, -0.47464043f, -0.47751826f, -0.48039100f, -0.48325855f,
630-0.48612097f, -0.48897815f, -0.49183011f, -0.49467680f, -0.49751821f,
631-0.50035429f, -0.50318497f, -0.50601029f, -0.50883019f, -0.51164466f,
632-0.51445359f, -0.51725709f, -0.52005500f, -0.52284735f, -0.52563411f,
633-0.52841520f, -0.53119069f, -0.53396046f, -0.53672451f, -0.53948283f,
634-0.54223537f, -0.54498214f, -0.54772300f, -0.55045801f, -0.55318713f,
635-0.55591035f, -0.55862761f, -0.56133890f, -0.56404412f, -0.56674337f,
636-0.56943649f, -0.57212353f, -0.57480448f, -0.57747924f, -0.58014780f,
637-0.58281022f, -0.58546633f, -0.58811617f, -0.59075975f, -0.59339696f,
638-0.59602785f, -0.59865236f, -0.60127044f, -0.60388207f, -0.60648727f,
639-0.60908598f, -0.61167812f, -0.61426371f, -0.61684275f, -0.61941516f,
640-0.62198097f, -0.62454009f, -0.62709254f, -0.62963831f, -0.63217729f,
641-0.63470948f, -0.63723493f, -0.63975352f, -0.64226526f, -0.64477009f,
642-0.64726806f, -0.64975911f, -0.65224314f, -0.65472025f, -0.65719032f,
643-0.65965337f, -0.66210932f, -0.66455823f, -0.66700000f, -0.66943461f,
644-0.67186207f, -0.67428231f, -0.67669535f, -0.67910111f, -0.68149966f,
645-0.68389088f, -0.68627477f, -0.68865126f, -0.69102043f, -0.69338220f,
646-0.69573659f, -0.69808346f, -0.70042288f, -0.70275480f, -0.70507920f,
647-0.70739603f, -0.70970529f, -0.71200693f, -0.71430099f, -0.71658736f,
648-0.71886611f, -0.72113711f, -0.72340041f, -0.72565591f, -0.72790372f,
649-0.73014367f, -0.73237586f, -0.73460019f, -0.73681659f, -0.73902518f,
650-0.74122584f, -0.74341851f, -0.74560326f, -0.74778003f, -0.74994880f,
651-0.75210953f, -0.75426215f, -0.75640678f, -0.75854325f, -0.76067162f,
652-0.76279181f, -0.76490390f, -0.76700771f, -0.76910341f, -0.77119076f,
653-0.77326995f, -0.77534080f, -0.77740335f, -0.77945763f, -0.78150350f,
654-0.78354102f, -0.78557014f, -0.78759086f, -0.78960317f, -0.79160696f,
655-0.79360235f, -0.79558921f, -0.79756755f, -0.79953730f, -0.80149853f,
656-0.80345118f, -0.80539525f, -0.80733067f, -0.80925739f, -0.81117553f,
657-0.81308490f, -0.81498563f, -0.81687760f, -0.81876087f, -0.82063532f,
658-0.82250100f, -0.82435787f, -0.82620591f, -0.82804507f, -0.82987541f,
659-0.83169687f, -0.83350939f, -0.83531296f, -0.83710766f, -0.83889335f,
660-0.84067005f, -0.84243774f, -0.84419644f, -0.84594607f, -0.84768665f,
661-0.84941816f, -0.85114056f, -0.85285389f, -0.85455805f, -0.85625303f,
662-0.85793889f, -0.85961550f, -0.86128294f, -0.86294121f, -0.86459017f,
663-0.86622989f, -0.86786032f, -0.86948150f, -0.87109333f, -0.87269586f,
664-0.87428904f, -0.87587279f, -0.87744725f, -0.87901229f, -0.88056785f,
665-0.88211405f, -0.88365078f, -0.88517809f, -0.88669586f, -0.88820416f,
666-0.88970292f, -0.89119220f, -0.89267188f, -0.89414203f, -0.89560264f,
667-0.89705360f, -0.89849502f, -0.89992678f, -0.90134889f, -0.90276134f,
668-0.90416414f, -0.90555727f, -0.90694070f, -0.90831441f, -0.90967834f,
669-0.91103262f, -0.91237706f, -0.91371179f, -0.91503674f, -0.91635185f,
670-0.91765714f, -0.91895264f, -0.92023826f, -0.92151409f, -0.92277998f,
671-0.92403603f, -0.92528218f, -0.92651838f, -0.92774469f, -0.92896110f,
672-0.93016750f, -0.93136400f, -0.93255049f, -0.93372697f, -0.93489349f,
673-0.93604994f, -0.93719643f, -0.93833286f, -0.93945926f, -0.94057560f,
674-0.94168180f, -0.94277799f, -0.94386405f, -0.94494003f, -0.94600588f,
675-0.94706154f, -0.94810712f, -0.94914252f, -0.95016778f, -0.95118284f,
676-0.95218778f, -0.95318246f, -0.95416695f, -0.95514119f, -0.95610523f,
677-0.95705903f, -0.95800257f, -0.95893586f, -0.95985889f, -0.96077162f,
678-0.96167403f, -0.96256620f, -0.96344805f, -0.96431959f, -0.96518075f,
679-0.96603161f, -0.96687216f, -0.96770233f, -0.96852213f, -0.96933156f,
680-0.97013056f, -0.97091925f, -0.97169751f, -0.97246534f, -0.97322279f,
681-0.97396982f, -0.97470641f, -0.97543252f, -0.97614825f, -0.97685349f,
682-0.97754824f, -0.97823256f, -0.97890645f, -0.97956979f, -0.98022264f,
683-0.98086500f, -0.98149687f, -0.98211825f, -0.98272908f, -0.98332942f,
684-0.98391914f, -0.98449844f, -0.98506713f, -0.98562527f, -0.98617285f,
685-0.98670989f, -0.98723638f, -0.98775226f, -0.98825759f, -0.98875231f,
686-0.98923647f, -0.98971003f, -0.99017298f, -0.99062532f, -0.99106705f,
687-0.99149817f, -0.99191868f, -0.99232858f, -0.99272782f, -0.99311644f,
688-0.99349445f, -0.99386179f, -0.99421853f, -0.99456459f, -0.99489999f,
689-0.99522477f, -0.99553883f, -0.99584228f, -0.99613506f, -0.99641716f,
690-0.99668860f, -0.99694937f, -0.99719942f, -0.99743885f, -0.99766755f,
691-0.99788558f, -0.99809295f, -0.99828959f, -0.99847561f, -0.99865085f,
692-0.99881548f, -0.99896932f, -0.99911255f, -0.99924499f, -0.99936682f,
693-0.99947786f, -0.99957830f, -0.99966794f, -0.99974692f, -0.99981517f,
694-0.99987274f, -0.99991959f, -0.99995571f, -0.99998116f, -0.99999589f,
6950.99999964f, 0.99997288f, 0.99990326f, 0.99979085f, 0.99963558f,
6960.99943751f, 0.99919659f, 0.99891287f, 0.99858636f, 0.99821711f,
6970.99780506f, 0.99735034f, 0.99685282f, 0.99631262f, 0.99572974f,
6980.99510419f, 0.99443603f, 0.99372530f, 0.99297196f, 0.99217612f,
6990.99133772f, 0.99045694f, 0.98953366f, 0.98856801f, 0.98756003f,
7000.98650974f, 0.98541719f, 0.98428243f, 0.98310548f, 0.98188645f,
7010.98062533f, 0.97932225f, 0.97797716f, 0.97659022f, 0.97516143f,
7020.97369087f, 0.97217858f, 0.97062469f, 0.96902919f, 0.96739221f,
7030.96571374f, 0.96399397f, 0.96223283f, 0.96043050f, 0.95858705f,
7040.95670253f, 0.95477700f, 0.95281059f, 0.95080340f, 0.94875544f,
7050.94666684f, 0.94453770f, 0.94236809f, 0.94015813f, 0.93790787f,
7060.93561745f, 0.93328691f, 0.93091643f, 0.92850608f, 0.92605597f,
7070.92356616f, 0.92103678f, 0.91846794f, 0.91585976f, 0.91321236f,
7080.91052586f, 0.90780038f, 0.90503591f, 0.90223277f, 0.89939094f,
7090.89651060f, 0.89359182f, 0.89063478f, 0.88763964f, 0.88460642f,
7100.88153529f, 0.87842643f, 0.87527996f, 0.87209594f, 0.86887461f,
7110.86561602f, 0.86232042f, 0.85898781f, 0.85561842f, 0.85221243f,
7120.84876984f, 0.84529096f, 0.84177583f, 0.83822471f, 0.83463764f,
7130.83101481f, 0.82735640f, 0.82366252f, 0.81993335f, 0.81616908f,
7140.81236988f, 0.80853581f, 0.80466717f, 0.80076402f, 0.79682660f,
7150.79285502f, 0.78884947f, 0.78481019f, 0.78073722f, 0.77663082f,
7160.77249116f, 0.76831841f, 0.76411277f, 0.75987434f, 0.75560343f,
7170.75130010f, 0.74696463f, 0.74259710f, 0.73819780f, 0.73376691f,
7180.72930455f, 0.72481096f, 0.72028631f, 0.71573079f, 0.71114463f,
7190.70652801f, 0.70188117f, 0.69720417f, 0.69249737f, 0.68776089f,
7200.68299496f, 0.67819971f, 0.67337549f, 0.66852236f, 0.66364062f,
7210.65873051f, 0.65379208f, 0.64882571f, 0.64383155f, 0.63880974f,
7220.63376063f, 0.62868434f, 0.62358117f, 0.61845124f, 0.61329484f,
7230.60811216f, 0.60290343f, 0.59766883f, 0.59240872f, 0.58712316f,
7240.58181250f, 0.57647687f, 0.57111657f, 0.56573176f, 0.56032276f,
7250.55488980f, 0.54943299f, 0.54395270f, 0.53844911f, 0.53292239f,
7260.52737290f, 0.52180082f, 0.51620632f, 0.51058978f, 0.50495136f,
7270.49929130f, 0.49360985f, 0.48790723f, 0.48218375f, 0.47643960f,
7280.47067502f, 0.46489030f, 0.45908567f, 0.45326138f, 0.44741765f,
7290.44155475f, 0.43567297f, 0.42977250f, 0.42385364f, 0.41791660f,
7300.41196167f, 0.40598908f, 0.39999911f, 0.39399201f, 0.38796803f,
7310.38192743f, 0.37587047f, 0.36979741f, 0.36370850f, 0.35760403f,
7320.35148421f, 0.34534934f, 0.33919969f, 0.33303553f, 0.32685706f,
7330.32066461f, 0.31445843f, 0.30823877f, 0.30200592f, 0.29576012f,
7340.28950164f, 0.28323078f, 0.27694780f, 0.27065292f, 0.26434645f,
7350.25802869f, 0.25169984f, 0.24536023f, 0.23901010f, 0.23264973f,
7360.22627939f, 0.21989937f, 0.21350993f, 0.20711134f, 0.20070387f,
7370.19428782f, 0.18786344f, 0.18143101f, 0.17499080f, 0.16854310f,
7380.16208819f, 0.15562633f, 0.14915779f, 0.14268288f, 0.13620184f,
7390.12971498f, 0.12322257f, 0.11672486f, 0.11022217f, 0.10371475f,
7400.097202882f, 0.090686858f, 0.084166944f, 0.077643424f, 0.071116582f,
7410.064586692f, 0.058054037f, 0.051518895f, 0.044981543f, 0.038442269f,
7420.031901345f, 0.025359053f, 0.018815678f, 0.012271495f, 0.0057267868f,
743-0.00081816671f, -0.0073630852f, -0.013907688f, -0.020451695f, -0.026994826f,
744-0.033536803f, -0.040077340f, -0.046616159f, -0.053152986f, -0.059687532f,
745-0.066219524f, -0.072748676f, -0.079274714f, -0.085797355f, -0.092316322f,
746-0.098831341f, -0.10534211f, -0.11184838f, -0.11834986f, -0.12484626f,
747-0.13133731f, -0.13782275f, -0.14430228f, -0.15077563f, -0.15724251f,
748-0.16370267f, -0.17015581f, -0.17660165f, -0.18303993f, -0.18947038f,
749-0.19589271f, -0.20230664f, -0.20871192f, -0.21510825f, -0.22149536f,
750-0.22787298f, -0.23424086f, -0.24059868f, -0.24694622f, -0.25328314f,
751-0.25960925f, -0.26592422f, -0.27222782f, -0.27851975f, -0.28479972f,
752-0.29106751f, -0.29732284f, -0.30356544f, -0.30979502f, -0.31601134f,
753-0.32221413f, -0.32840309f, -0.33457801f, -0.34073856f, -0.34688455f,
754-0.35301566f, -0.35913166f, -0.36523229f, -0.37131724f, -0.37738630f,
755-0.38343921f, -0.38947567f, -0.39549544f, -0.40149832f, -0.40748394f,
756-0.41345215f, -0.41940263f, -0.42533514f, -0.43124944f, -0.43714526f,
757-0.44302234f, -0.44888046f, -0.45471936f, -0.46053877f, -0.46633846f,
758-0.47211814f, -0.47787762f, -0.48361665f, -0.48933494f, -0.49503228f,
759-0.50070840f, -0.50636309f, -0.51199609f, -0.51760709f, -0.52319598f,
760-0.52876246f, -0.53430629f, -0.53982723f, -0.54532504f, -0.55079949f,
761-0.55625033f, -0.56167740f, -0.56708032f, -0.57245898f, -0.57781315f,
762-0.58314258f, -0.58844697f, -0.59372622f, -0.59897995f, -0.60420811f,
763-0.60941035f, -0.61458647f, -0.61973625f, -0.62485951f, -0.62995601f,
764-0.63502556f, -0.64006782f, -0.64508271f, -0.65007001f, -0.65502942f,
765-0.65996075f, -0.66486382f, -0.66973841f, -0.67458433f, -0.67940134f,
766-0.68418926f, -0.68894786f, -0.69367695f, -0.69837630f, -0.70304573f,
767-0.70768511f, -0.71229410f, -0.71687263f, -0.72142041f, -0.72593731f,
768-0.73042315f, -0.73487765f, -0.73930067f, -0.74369204f, -0.74805158f,
769-0.75237900f, -0.75667429f, -0.76093709f, -0.76516730f, -0.76936477f,
770-0.77352923f, -0.77766061f, -0.78175867f, -0.78582323f, -0.78985411f,
771-0.79385114f, -0.79781419f, -0.80174309f, -0.80563760f, -0.80949765f,
772-0.81332302f, -0.81711352f, -0.82086903f, -0.82458937f, -0.82827437f,
773-0.83192390f, -0.83553779f, -0.83911592f, -0.84265804f, -0.84616417f,
774-0.84963393f, -0.85306740f, -0.85646427f, -0.85982448f, -0.86314780f,
775-0.86643422f, -0.86968350f, -0.87289548f, -0.87607014f, -0.87920725f,
776-0.88230664f, -0.88536829f, -0.88839203f, -0.89137769f, -0.89432514f,
777-0.89723432f, -0.90010506f, -0.90293723f, -0.90573072f, -0.90848541f,
778-0.91120118f, -0.91387796f, -0.91651553f, -0.91911387f, -0.92167282f,
779-0.92419231f, -0.92667222f, -0.92911243f, -0.93151283f, -0.93387336f,
780-0.93619382f, -0.93847424f, -0.94071442f, -0.94291431f, -0.94507378f,
781-0.94719279f, -0.94927126f, -0.95130903f, -0.95330608f, -0.95526224f,
782-0.95717752f, -0.95905179f, -0.96088499f, -0.96267700f, -0.96442777f,
783-0.96613729f, -0.96780539f, -0.96943200f, -0.97101706f, -0.97256058f,
784-0.97406244f, -0.97552258f, -0.97694093f, -0.97831738f, -0.97965199f,
785-0.98094457f, -0.98219514f, -0.98340368f, -0.98457009f, -0.98569429f,
786-0.98677629f, -0.98781598f, -0.98881340f, -0.98976845f, -0.99068111f,
787-0.99155134f, -0.99237907f, -0.99316430f, -0.99390697f, -0.99460709f,
788-0.99526459f, -0.99587947f, -0.99645168f, -0.99698120f, -0.99746799f,
789-0.99791211f, -0.99831343f, -0.99867201f, -0.99898779f, -0.99926084f,
790-0.99949104f, -0.99967843f, -0.99982297f, -0.99992472f, -0.99998361f,
7910.99999869f, 0.99989158f, 0.99961317f, 0.99916345f, 0.99854255f,
7920.99775058f, 0.99678761f, 0.99565387f, 0.99434954f, 0.99287480f,
7930.99122995f, 0.98941529f, 0.98743105f, 0.98527765f, 0.98295540f,
7940.98046476f, 0.97780609f, 0.97497988f, 0.97198665f, 0.96882683f,
7950.96550101f, 0.96200979f, 0.95835376f, 0.95453346f, 0.95054960f,
7960.94640291f, 0.94209403f, 0.93762374f, 0.93299282f, 0.92820197f,
7970.92325211f, 0.91814411f, 0.91287869f, 0.90745693f, 0.90187967f,
7980.89614785f, 0.89026248f, 0.88422459f, 0.87803519f, 0.87169534f,
7990.86520612f, 0.85856867f, 0.85178405f, 0.84485358f, 0.83777827f,
8000.83055943f, 0.82319832f, 0.81569612f, 0.80805415f, 0.80027372f,
8010.79235619f, 0.78430289f, 0.77611518f, 0.76779449f, 0.75934225f,
8020.75075996f, 0.74204898f, 0.73321080f, 0.72424710f, 0.71515924f,
8030.70594883f, 0.69661748f, 0.68716675f, 0.67759830f, 0.66791373f,
8040.65811473f, 0.64820296f, 0.63818014f, 0.62804794f, 0.61780810f,
8050.60746247f, 0.59701276f, 0.58646071f, 0.57580817f, 0.56505698f,
8060.55420899f, 0.54326600f, 0.53222996f, 0.52110273f, 0.50988621f,
8070.49858227f, 0.48719296f, 0.47572014f, 0.46416581f, 0.45253196f,
8080.44082057f, 0.42903364f, 0.41717321f, 0.40524128f, 0.39323992f,
8090.38117120f, 0.36903715f, 0.35683987f, 0.34458145f, 0.33226398f,
8100.31988961f, 0.30746040f, 0.29497850f, 0.28244606f, 0.26986524f,
8110.25723818f, 0.24456702f, 0.23185398f, 0.21910121f, 0.20631088f,
8120.19348522f, 0.18062639f, 0.16773662f, 0.15481812f, 0.14187308f,
8130.12890373f, 0.11591230f, 0.10290100f, 0.089872077f, 0.076827750f,
8140.063770257f, 0.050701842f, 0.037624735f, 0.024541186f, 0.011453429f,
815-0.0016362892f, -0.014725727f, -0.027812643f, -0.040894791f, -0.053969935f,
816-0.067035832f, -0.080090240f, -0.093130924f, -0.10615565f, -0.11916219f,
817-0.13214831f, -0.14511178f, -0.15805040f, -0.17096193f, -0.18384418f,
818-0.19669491f, -0.20951195f, -0.22229309f, -0.23503613f, -0.24773891f,
819-0.26039925f, -0.27301496f, -0.28558388f, -0.29810387f, -0.31057280f,
820-0.32298848f, -0.33534884f, -0.34765175f, -0.35989508f, -0.37207675f,
821-0.38419467f, -0.39624676f, -0.40823093f, -0.42014518f, -0.43198743f,
822-0.44375566f, -0.45544785f, -0.46706200f, -0.47859612f, -0.49004826f,
823-0.50141639f, -0.51269865f, -0.52389306f, -0.53499764f, -0.54601061f,
824-0.55693001f, -0.56775403f, -0.57848072f, -0.58910829f, -0.59963489f,
825-0.61005878f, -0.62037814f, -0.63059121f, -0.64069623f, -0.65069145f,
826-0.66057515f, -0.67034572f, -0.68000144f, -0.68954057f, -0.69896162f,
827-0.70826286f, -0.71744281f, -0.72649974f, -0.73543227f, -0.74423873f,
828-0.75291771f, -0.76146764f, -0.76988715f, -0.77817470f, -0.78632891f,
829-0.79434842f, -0.80223179f, -0.80997771f, -0.81758487f, -0.82505190f,
830-0.83237761f, -0.83956063f, -0.84659988f, -0.85349399f, -0.86024189f,
831-0.86684239f, -0.87329435f, -0.87959671f, -0.88574833f, -0.89174819f,
832-0.89759529f, -0.90328854f, -0.90882701f, -0.91420978f, -0.91943592f,
833-0.92450452f, -0.92941469f, -0.93416560f, -0.93875647f, -0.94318646f,
834-0.94745487f, -0.95156091f, -0.95550388f, -0.95928317f, -0.96289814f,
835-0.96634805f, -0.96963239f, -0.97275060f, -0.97570217f, -0.97848648f,
836-0.98110318f, -0.98355180f, -0.98583186f, -0.98794299f, -0.98988485f,
837-0.99165714f, -0.99325943f, -0.99469161f, -0.99595332f, -0.99704438f,
838-0.99796462f, -0.99871385f, -0.99929196f, -0.99969882f, -0.99993443f,
8390.99999464f, 0.99956632f, 0.99845290f, 0.99665523f, 0.99417448f,
8400.99101239f, 0.98717111f, 0.98265326f, 0.97746199f, 0.97160077f,
8410.96507365f, 0.95788515f, 0.95004016f, 0.94154406f, 0.93240267f,
8420.92262226f, 0.91220951f, 0.90117162f, 0.88951606f, 0.87725091f,
8430.86438453f, 0.85092574f, 0.83688372f, 0.82226819f, 0.80708915f,
8440.79135692f, 0.77508235f, 0.75827658f, 0.74095112f, 0.72311783f,
8450.70478898f, 0.68597710f, 0.66669506f, 0.64695615f, 0.62677377f,
8460.60616189f, 0.58513457f, 0.56370622f, 0.54189157f, 0.51970547f,
8470.49716324f, 0.47428027f, 0.45107225f, 0.42755505f, 0.40374488f,
8480.37965798f, 0.35531086f, 0.33072025f, 0.30590299f, 0.28087607f,
8490.25565663f, 0.23026201f, 0.20470956f, 0.17901683f, 0.15320139f,
8500.12728097f, 0.10127331f, 0.075196236f, 0.049067631f, 0.022905400f,
851-0.0032725304f, -0.029448219f, -0.055603724f, -0.081721120f, -0.10778251f,
852-0.13377003f, -0.15966587f, -0.18545228f, -0.21111161f, -0.23662624f,
853-0.26197869f, -0.28715160f, -0.31212771f, -0.33688989f, -0.36142120f,
854-0.38570482f, -0.40972409f, -0.43346253f, -0.45690393f, -0.48003218f,
855-0.50283146f, -0.52528608f, -0.54738069f, -0.56910020f, -0.59042966f,
856-0.61135447f, -0.63186026f, -0.65193301f, -0.67155898f, -0.69072473f,
857-0.70941705f, -0.72762316f, -0.74533063f, -0.76252723f, -0.77920127f,
858-0.79534131f, -0.81093621f, -0.82597536f, -0.84044844f, -0.85434550f,
859-0.86765707f, -0.88037395f, -0.89248747f, -0.90398932f, -0.91487163f,
860-0.92512697f, -0.93474823f, -0.94372886f, -0.95206273f, -0.95974404f,
861-0.96676767f, -0.97312868f, -0.97882277f, -0.98384601f, -0.98819500f,
862-0.99186671f, -0.99485862f, -0.99716878f, -0.99879545f, -0.99973762f,
863};
864#endif
865
866static const CELTMode mode48000_960_120 = {
86748000, /* Fs */
868120, /* overlap */
86921, /* nbEBands */
87021, /* effEBands */
871{0.85000610f, 0.0000000f, 1.0000000f, 1.0000000f, }, /* preemph */
872eband5ms, /* eBands */
8733, /* maxLM */
8748, /* nbShortMdcts */
875120, /* shortMdctSize */
87611, /* nbAllocVectors */
877band_allocation, /* allocVectors */
878logN400, /* logN */
879window120, /* window */
880{1920, 3, {&fft_state48000_960_0, &fft_state48000_960_1, &fft_state48000_960_2, &fft_state48000_960_3, }, mdct_twiddles960}, /* mdct */
881{392, cache_index50, cache_bits50, cache_caps50}, /* cache */
882};
883
884/* List of all the available modes */
885#define TOTAL_MODES 1
886static const CELTMode * const static_mode_list[TOTAL_MODES] = {
887&mode48000_960_120,
888};
diff --git a/lib/rbcodec/codecs/libopus/celt/static_modes_float_arm_ne10.h b/lib/rbcodec/codecs/libopus/celt/static_modes_float_arm_ne10.h
new file mode 100644
index 0000000000..66e1abb101
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/static_modes_float_arm_ne10.h
@@ -0,0 +1,404 @@
1/* The contents of this file was automatically generated by
2 * dump_mode_arm_ne10.c with arguments: 48000 960
3 * It contains static definitions for some pre-defined modes. */
4#include <NE10_types.h>
5
6#ifndef NE10_FFT_PARAMS48000_960
7#define NE10_FFT_PARAMS48000_960
8static const ne10_int32_t ne10_factors_480[64] = {
94, 40, 4, 30, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0,
100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
110, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
120, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
130, 0, 0, 0, };
14static const ne10_int32_t ne10_factors_240[64] = {
153, 20, 4, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0,
160, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
170, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
180, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
190, 0, 0, 0, };
20static const ne10_int32_t ne10_factors_120[64] = {
213, 10, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0,
220, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
230, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
240, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
250, 0, 0, 0, };
26static const ne10_int32_t ne10_factors_60[64] = {
272, 5, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
280, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
290, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
300, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
310, 0, 0, 0, };
32static const ne10_fft_cpx_float32_t ne10_twiddles_480[480] = {
33{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f},
34{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f},
35{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f},
36{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f},
37{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f},
38{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f},
39{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f},
40{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f},
41{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f},
42{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f},
43{1.0000000f,-0.0000000f}, {0.99862951f,-0.052335959f}, {0.99452192f,-0.10452846f},
44{0.98768836f,-0.15643448f}, {0.97814763f,-0.20791170f}, {0.96592581f,-0.25881904f},
45{0.95105648f,-0.30901700f}, {0.93358040f,-0.35836795f}, {0.91354543f,-0.40673664f},
46{0.89100653f,-0.45399052f}, {0.86602545f,-0.50000000f}, {0.83867055f,-0.54463905f},
47{0.80901700f,-0.58778524f}, {0.77714598f,-0.62932038f}, {0.74314475f,-0.66913062f},
48{0.70710677f,-0.70710683f}, {0.66913056f,-0.74314487f}, {0.62932038f,-0.77714598f},
49{0.58778524f,-0.80901700f}, {0.54463899f,-0.83867055f}, {0.49999997f,-0.86602545f},
50{0.45399052f,-0.89100653f}, {0.40673661f,-0.91354549f}, {0.35836786f,-0.93358046f},
51{0.30901697f,-0.95105654f}, {0.25881907f,-0.96592581f}, {0.20791166f,-0.97814763f},
52{0.15643437f,-0.98768836f}, {0.10452842f,-0.99452192f}, {0.052335974f,-0.99862951f},
53{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f},
54{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f},
55{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f},
56{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f},
57{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f},
58{-4.3711388e-08f,-1.0000000f}, {-0.10452851f,-0.99452192f}, {-0.20791174f,-0.97814757f},
59{-0.30901703f,-0.95105648f}, {-0.40673670f,-0.91354543f}, {-0.50000006f,-0.86602533f},
60{-0.58778518f,-0.80901700f}, {-0.66913068f,-0.74314475f}, {-0.74314493f,-0.66913044f},
61{-0.80901700f,-0.58778518f}, {-0.86602539f,-0.50000006f}, {-0.91354549f,-0.40673658f},
62{-0.95105654f,-0.30901679f}, {-0.97814763f,-0.20791161f}, {-0.99452192f,-0.10452849f},
63{1.0000000f,-0.0000000f}, {0.98768836f,-0.15643448f}, {0.95105648f,-0.30901700f},
64{0.89100653f,-0.45399052f}, {0.80901700f,-0.58778524f}, {0.70710677f,-0.70710683f},
65{0.58778524f,-0.80901700f}, {0.45399052f,-0.89100653f}, {0.30901697f,-0.95105654f},
66{0.15643437f,-0.98768836f}, {-4.3711388e-08f,-1.0000000f}, {-0.15643445f,-0.98768836f},
67{-0.30901703f,-0.95105648f}, {-0.45399061f,-0.89100647f}, {-0.58778518f,-0.80901700f},
68{-0.70710677f,-0.70710677f}, {-0.80901700f,-0.58778518f}, {-0.89100659f,-0.45399037f},
69{-0.95105654f,-0.30901679f}, {-0.98768836f,-0.15643445f}, {-1.0000000f,8.7422777e-08f},
70{-0.98768830f,0.15643461f}, {-0.95105654f,0.30901697f}, {-0.89100653f,0.45399055f},
71{-0.80901694f,0.58778536f}, {-0.70710665f,0.70710689f}, {-0.58778507f,0.80901712f},
72{-0.45399022f,0.89100665f}, {-0.30901709f,0.95105648f}, {-0.15643452f,0.98768830f},
73{1.0000000f,-0.0000000f}, {0.99991435f,-0.013089596f}, {0.99965733f,-0.026176950f},
74{0.99922901f,-0.039259817f}, {0.99862951f,-0.052335959f}, {0.99785894f,-0.065403134f},
75{0.99691731f,-0.078459099f}, {0.99580491f,-0.091501623f}, {0.99452192f,-0.10452846f},
76{0.99306846f,-0.11753740f}, {0.99144489f,-0.13052620f}, {0.98965138f,-0.14349262f},
77{0.98768836f,-0.15643448f}, {0.98555607f,-0.16934951f}, {0.98325491f,-0.18223552f},
78{0.98078525f,-0.19509032f}, {0.97814763f,-0.20791170f}, {0.97534233f,-0.22069745f},
79{0.97236991f,-0.23344538f}, {0.96923089f,-0.24615330f}, {0.96592581f,-0.25881904f},
80{0.96245521f,-0.27144045f}, {0.95881975f,-0.28401536f}, {0.95501995f,-0.29654160f},
81{0.95105648f,-0.30901700f}, {0.94693011f,-0.32143945f}, {0.94264150f,-0.33380687f},
82{0.93819129f,-0.34611708f}, {0.93358040f,-0.35836795f}, {0.92880952f,-0.37055743f},
83{0.92387956f,-0.38268346f}, {0.91879117f,-0.39474389f}, {0.91354543f,-0.40673664f},
84{0.90814316f,-0.41865975f}, {0.90258527f,-0.43051112f}, {0.89687270f,-0.44228873f},
85{0.89100653f,-0.45399052f}, {0.88498765f,-0.46561453f}, {0.87881708f,-0.47715878f},
86{0.87249601f,-0.48862126f}, {0.86602545f,-0.50000000f}, {0.85940641f,-0.51129311f},
87{0.85264015f,-0.52249855f}, {0.84572786f,-0.53361452f}, {0.83867055f,-0.54463905f},
88{0.83146960f,-0.55557024f}, {0.82412618f,-0.56640625f}, {0.81664151f,-0.57714522f},
89{0.80901700f,-0.58778524f}, {0.80125380f,-0.59832460f}, {0.79335332f,-0.60876143f},
90{0.78531694f,-0.61909395f}, {0.77714598f,-0.62932038f}, {0.76884180f,-0.63943899f},
91{0.76040596f,-0.64944810f}, {0.75183982f,-0.65934587f}, {0.74314475f,-0.66913062f},
92{0.73432249f,-0.67880076f}, {0.72537434f,-0.68835455f}, {0.71630192f,-0.69779050f},
93{0.70710677f,-0.70710683f}, {0.69779044f,-0.71630198f}, {0.68835455f,-0.72537440f},
94{0.67880070f,-0.73432255f}, {0.66913056f,-0.74314487f}, {0.65934581f,-0.75183982f},
95{0.64944804f,-0.76040596f}, {0.63943899f,-0.76884186f}, {0.62932038f,-0.77714598f},
96{0.61909395f,-0.78531694f}, {0.60876137f,-0.79335338f}, {0.59832460f,-0.80125386f},
97{0.58778524f,-0.80901700f}, {0.57714516f,-0.81664151f}, {0.56640625f,-0.82412618f},
98{0.55557019f,-0.83146960f}, {0.54463899f,-0.83867055f}, {0.53361452f,-0.84572786f},
99{0.52249849f,-0.85264015f}, {0.51129311f,-0.85940641f}, {0.49999997f,-0.86602545f},
100{0.48862118f,-0.87249601f}, {0.47715876f,-0.87881708f}, {0.46561447f,-0.88498765f},
101{0.45399052f,-0.89100653f}, {0.44228867f,-0.89687276f}, {0.43051103f,-0.90258533f},
102{0.41865975f,-0.90814316f}, {0.40673661f,-0.91354549f}, {0.39474380f,-0.91879129f},
103{0.38268343f,-0.92387956f}, {0.37055740f,-0.92880958f}, {0.35836786f,-0.93358046f},
104{0.34611705f,-0.93819135f}, {0.33380681f,-0.94264150f}, {0.32143947f,-0.94693011f},
105{0.30901697f,-0.95105654f}, {0.29654151f,-0.95501995f}, {0.28401533f,-0.95881975f},
106{0.27144039f,-0.96245527f}, {0.25881907f,-0.96592581f}, {0.24615327f,-0.96923089f},
107{0.23344530f,-0.97236991f}, {0.22069745f,-0.97534233f}, {0.20791166f,-0.97814763f},
108{0.19509023f,-0.98078531f}, {0.18223552f,-0.98325491f}, {0.16934945f,-0.98555607f},
109{0.15643437f,-0.98768836f}, {0.14349259f,-0.98965138f}, {0.13052613f,-0.99144489f},
110{0.11753740f,-0.99306846f}, {0.10452842f,-0.99452192f}, {0.091501534f,-0.99580491f},
111{0.078459084f,-0.99691731f}, {0.065403074f,-0.99785894f}, {0.052335974f,-0.99862951f},
112{0.039259788f,-0.99922901f}, {0.026176875f,-0.99965733f}, {0.013089597f,-0.99991435f},
113{1.0000000f,-0.0000000f}, {0.99965733f,-0.026176950f}, {0.99862951f,-0.052335959f},
114{0.99691731f,-0.078459099f}, {0.99452192f,-0.10452846f}, {0.99144489f,-0.13052620f},
115{0.98768836f,-0.15643448f}, {0.98325491f,-0.18223552f}, {0.97814763f,-0.20791170f},
116{0.97236991f,-0.23344538f}, {0.96592581f,-0.25881904f}, {0.95881975f,-0.28401536f},
117{0.95105648f,-0.30901700f}, {0.94264150f,-0.33380687f}, {0.93358040f,-0.35836795f},
118{0.92387956f,-0.38268346f}, {0.91354543f,-0.40673664f}, {0.90258527f,-0.43051112f},
119{0.89100653f,-0.45399052f}, {0.87881708f,-0.47715878f}, {0.86602545f,-0.50000000f},
120{0.85264015f,-0.52249855f}, {0.83867055f,-0.54463905f}, {0.82412618f,-0.56640625f},
121{0.80901700f,-0.58778524f}, {0.79335332f,-0.60876143f}, {0.77714598f,-0.62932038f},
122{0.76040596f,-0.64944810f}, {0.74314475f,-0.66913062f}, {0.72537434f,-0.68835455f},
123{0.70710677f,-0.70710683f}, {0.68835455f,-0.72537440f}, {0.66913056f,-0.74314487f},
124{0.64944804f,-0.76040596f}, {0.62932038f,-0.77714598f}, {0.60876137f,-0.79335338f},
125{0.58778524f,-0.80901700f}, {0.56640625f,-0.82412618f}, {0.54463899f,-0.83867055f},
126{0.52249849f,-0.85264015f}, {0.49999997f,-0.86602545f}, {0.47715876f,-0.87881708f},
127{0.45399052f,-0.89100653f}, {0.43051103f,-0.90258533f}, {0.40673661f,-0.91354549f},
128{0.38268343f,-0.92387956f}, {0.35836786f,-0.93358046f}, {0.33380681f,-0.94264150f},
129{0.30901697f,-0.95105654f}, {0.28401533f,-0.95881975f}, {0.25881907f,-0.96592581f},
130{0.23344530f,-0.97236991f}, {0.20791166f,-0.97814763f}, {0.18223552f,-0.98325491f},
131{0.15643437f,-0.98768836f}, {0.13052613f,-0.99144489f}, {0.10452842f,-0.99452192f},
132{0.078459084f,-0.99691731f}, {0.052335974f,-0.99862951f}, {0.026176875f,-0.99965733f},
133{-4.3711388e-08f,-1.0000000f}, {-0.026176963f,-0.99965733f}, {-0.052336060f,-0.99862951f},
134{-0.078459173f,-0.99691731f}, {-0.10452851f,-0.99452192f}, {-0.13052621f,-0.99144489f},
135{-0.15643445f,-0.98768836f}, {-0.18223560f,-0.98325491f}, {-0.20791174f,-0.97814757f},
136{-0.23344538f,-0.97236991f}, {-0.25881916f,-0.96592581f}, {-0.28401542f,-0.95881969f},
137{-0.30901703f,-0.95105648f}, {-0.33380687f,-0.94264150f}, {-0.35836795f,-0.93358040f},
138{-0.38268352f,-0.92387950f}, {-0.40673670f,-0.91354543f}, {-0.43051112f,-0.90258527f},
139{-0.45399061f,-0.89100647f}, {-0.47715873f,-0.87881708f}, {-0.50000006f,-0.86602533f},
140{-0.52249867f,-0.85264009f}, {-0.54463905f,-0.83867055f}, {-0.56640631f,-0.82412612f},
141{-0.58778518f,-0.80901700f}, {-0.60876143f,-0.79335332f}, {-0.62932050f,-0.77714586f},
142{-0.64944804f,-0.76040596f}, {-0.66913068f,-0.74314475f}, {-0.68835467f,-0.72537428f},
143{-0.70710677f,-0.70710677f}, {-0.72537446f,-0.68835449f}, {-0.74314493f,-0.66913044f},
144{-0.76040596f,-0.64944804f}, {-0.77714604f,-0.62932026f}, {-0.79335332f,-0.60876143f},
145{-0.80901700f,-0.58778518f}, {-0.82412624f,-0.56640613f}, {-0.83867055f,-0.54463899f},
146{-0.85264021f,-0.52249849f}, {-0.86602539f,-0.50000006f}, {-0.87881714f,-0.47715873f},
147{-0.89100659f,-0.45399037f}, {-0.90258527f,-0.43051112f}, {-0.91354549f,-0.40673658f},
148{-0.92387956f,-0.38268328f}, {-0.93358040f,-0.35836792f}, {-0.94264150f,-0.33380675f},
149{-0.95105654f,-0.30901679f}, {-0.95881975f,-0.28401530f}, {-0.96592587f,-0.25881892f},
150{-0.97236991f,-0.23344538f}, {-0.97814763f,-0.20791161f}, {-0.98325491f,-0.18223536f},
151{-0.98768836f,-0.15643445f}, {-0.99144489f,-0.13052608f}, {-0.99452192f,-0.10452849f},
152{-0.99691737f,-0.078459039f}, {-0.99862957f,-0.052335810f}, {-0.99965733f,-0.026176952f},
153{1.0000000f,-0.0000000f}, {0.99922901f,-0.039259817f}, {0.99691731f,-0.078459099f},
154{0.99306846f,-0.11753740f}, {0.98768836f,-0.15643448f}, {0.98078525f,-0.19509032f},
155{0.97236991f,-0.23344538f}, {0.96245521f,-0.27144045f}, {0.95105648f,-0.30901700f},
156{0.93819129f,-0.34611708f}, {0.92387956f,-0.38268346f}, {0.90814316f,-0.41865975f},
157{0.89100653f,-0.45399052f}, {0.87249601f,-0.48862126f}, {0.85264015f,-0.52249855f},
158{0.83146960f,-0.55557024f}, {0.80901700f,-0.58778524f}, {0.78531694f,-0.61909395f},
159{0.76040596f,-0.64944810f}, {0.73432249f,-0.67880076f}, {0.70710677f,-0.70710683f},
160{0.67880070f,-0.73432255f}, {0.64944804f,-0.76040596f}, {0.61909395f,-0.78531694f},
161{0.58778524f,-0.80901700f}, {0.55557019f,-0.83146960f}, {0.52249849f,-0.85264015f},
162{0.48862118f,-0.87249601f}, {0.45399052f,-0.89100653f}, {0.41865975f,-0.90814316f},
163{0.38268343f,-0.92387956f}, {0.34611705f,-0.93819135f}, {0.30901697f,-0.95105654f},
164{0.27144039f,-0.96245527f}, {0.23344530f,-0.97236991f}, {0.19509023f,-0.98078531f},
165{0.15643437f,-0.98768836f}, {0.11753740f,-0.99306846f}, {0.078459084f,-0.99691731f},
166{0.039259788f,-0.99922901f}, {-4.3711388e-08f,-1.0000000f}, {-0.039259877f,-0.99922901f},
167{-0.078459173f,-0.99691731f}, {-0.11753749f,-0.99306846f}, {-0.15643445f,-0.98768836f},
168{-0.19509032f,-0.98078525f}, {-0.23344538f,-0.97236991f}, {-0.27144048f,-0.96245521f},
169{-0.30901703f,-0.95105648f}, {-0.34611711f,-0.93819129f}, {-0.38268352f,-0.92387950f},
170{-0.41865984f,-0.90814310f}, {-0.45399061f,-0.89100647f}, {-0.48862135f,-0.87249595f},
171{-0.52249867f,-0.85264009f}, {-0.55557036f,-0.83146954f}, {-0.58778518f,-0.80901700f},
172{-0.61909389f,-0.78531694f}, {-0.64944804f,-0.76040596f}, {-0.67880076f,-0.73432249f},
173{-0.70710677f,-0.70710677f}, {-0.73432249f,-0.67880070f}, {-0.76040596f,-0.64944804f},
174{-0.78531694f,-0.61909389f}, {-0.80901700f,-0.58778518f}, {-0.83146966f,-0.55557019f},
175{-0.85264021f,-0.52249849f}, {-0.87249607f,-0.48862115f}, {-0.89100659f,-0.45399037f},
176{-0.90814322f,-0.41865960f}, {-0.92387956f,-0.38268328f}, {-0.93819135f,-0.34611690f},
177{-0.95105654f,-0.30901679f}, {-0.96245521f,-0.27144048f}, {-0.97236991f,-0.23344538f},
178{-0.98078531f,-0.19509031f}, {-0.98768836f,-0.15643445f}, {-0.99306846f,-0.11753736f},
179{-0.99691737f,-0.078459039f}, {-0.99922901f,-0.039259743f}, {-1.0000000f,8.7422777e-08f},
180{-0.99922901f,0.039259918f}, {-0.99691731f,0.078459218f}, {-0.99306846f,0.11753753f},
181{-0.98768830f,0.15643461f}, {-0.98078525f,0.19509049f}, {-0.97236985f,0.23344554f},
182{-0.96245515f,0.27144065f}, {-0.95105654f,0.30901697f}, {-0.93819135f,0.34611705f},
183{-0.92387956f,0.38268346f}, {-0.90814316f,0.41865975f}, {-0.89100653f,0.45399055f},
184{-0.87249601f,0.48862129f}, {-0.85264015f,0.52249861f}, {-0.83146960f,0.55557030f},
185{-0.80901694f,0.58778536f}, {-0.78531688f,0.61909401f}, {-0.76040590f,0.64944816f},
186{-0.73432243f,0.67880082f}, {-0.70710665f,0.70710689f}, {-0.67880058f,0.73432261f},
187{-0.64944792f,0.76040608f}, {-0.61909378f,0.78531706f}, {-0.58778507f,0.80901712f},
188{-0.55557001f,0.83146977f}, {-0.52249837f,0.85264033f}, {-0.48862100f,0.87249613f},
189{-0.45399022f,0.89100665f}, {-0.41865945f,0.90814328f}, {-0.38268313f,0.92387968f},
190{-0.34611672f,0.93819147f}, {-0.30901709f,0.95105648f}, {-0.27144054f,0.96245521f},
191{-0.23344545f,0.97236991f}, {-0.19509038f,0.98078525f}, {-0.15643452f,0.98768830f},
192{-0.11753743f,0.99306846f}, {-0.078459114f,0.99691731f}, {-0.039259821f,0.99922901f},
193};
194static const ne10_fft_cpx_float32_t ne10_twiddles_240[240] = {
195{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f},
196{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f},
197{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f},
198{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f},
199{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f},
200{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f},
201{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f},
202{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f},
203{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f},
204{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f},
205{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f},
206{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f},
207{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f},
208{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f},
209{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f},
210{1.0000000f,-0.0000000f}, {0.95105648f,-0.30901700f}, {0.80901700f,-0.58778524f},
211{0.58778524f,-0.80901700f}, {0.30901697f,-0.95105654f}, {-4.3711388e-08f,-1.0000000f},
212{-0.30901703f,-0.95105648f}, {-0.58778518f,-0.80901700f}, {-0.80901700f,-0.58778518f},
213{-0.95105654f,-0.30901679f}, {-1.0000000f,8.7422777e-08f}, {-0.95105654f,0.30901697f},
214{-0.80901694f,0.58778536f}, {-0.58778507f,0.80901712f}, {-0.30901709f,0.95105648f},
215{1.0000000f,-0.0000000f}, {0.99965733f,-0.026176950f}, {0.99862951f,-0.052335959f},
216{0.99691731f,-0.078459099f}, {0.99452192f,-0.10452846f}, {0.99144489f,-0.13052620f},
217{0.98768836f,-0.15643448f}, {0.98325491f,-0.18223552f}, {0.97814763f,-0.20791170f},
218{0.97236991f,-0.23344538f}, {0.96592581f,-0.25881904f}, {0.95881975f,-0.28401536f},
219{0.95105648f,-0.30901700f}, {0.94264150f,-0.33380687f}, {0.93358040f,-0.35836795f},
220{0.92387956f,-0.38268346f}, {0.91354543f,-0.40673664f}, {0.90258527f,-0.43051112f},
221{0.89100653f,-0.45399052f}, {0.87881708f,-0.47715878f}, {0.86602545f,-0.50000000f},
222{0.85264015f,-0.52249855f}, {0.83867055f,-0.54463905f}, {0.82412618f,-0.56640625f},
223{0.80901700f,-0.58778524f}, {0.79335332f,-0.60876143f}, {0.77714598f,-0.62932038f},
224{0.76040596f,-0.64944810f}, {0.74314475f,-0.66913062f}, {0.72537434f,-0.68835455f},
225{0.70710677f,-0.70710683f}, {0.68835455f,-0.72537440f}, {0.66913056f,-0.74314487f},
226{0.64944804f,-0.76040596f}, {0.62932038f,-0.77714598f}, {0.60876137f,-0.79335338f},
227{0.58778524f,-0.80901700f}, {0.56640625f,-0.82412618f}, {0.54463899f,-0.83867055f},
228{0.52249849f,-0.85264015f}, {0.49999997f,-0.86602545f}, {0.47715876f,-0.87881708f},
229{0.45399052f,-0.89100653f}, {0.43051103f,-0.90258533f}, {0.40673661f,-0.91354549f},
230{0.38268343f,-0.92387956f}, {0.35836786f,-0.93358046f}, {0.33380681f,-0.94264150f},
231{0.30901697f,-0.95105654f}, {0.28401533f,-0.95881975f}, {0.25881907f,-0.96592581f},
232{0.23344530f,-0.97236991f}, {0.20791166f,-0.97814763f}, {0.18223552f,-0.98325491f},
233{0.15643437f,-0.98768836f}, {0.13052613f,-0.99144489f}, {0.10452842f,-0.99452192f},
234{0.078459084f,-0.99691731f}, {0.052335974f,-0.99862951f}, {0.026176875f,-0.99965733f},
235{1.0000000f,-0.0000000f}, {0.99862951f,-0.052335959f}, {0.99452192f,-0.10452846f},
236{0.98768836f,-0.15643448f}, {0.97814763f,-0.20791170f}, {0.96592581f,-0.25881904f},
237{0.95105648f,-0.30901700f}, {0.93358040f,-0.35836795f}, {0.91354543f,-0.40673664f},
238{0.89100653f,-0.45399052f}, {0.86602545f,-0.50000000f}, {0.83867055f,-0.54463905f},
239{0.80901700f,-0.58778524f}, {0.77714598f,-0.62932038f}, {0.74314475f,-0.66913062f},
240{0.70710677f,-0.70710683f}, {0.66913056f,-0.74314487f}, {0.62932038f,-0.77714598f},
241{0.58778524f,-0.80901700f}, {0.54463899f,-0.83867055f}, {0.49999997f,-0.86602545f},
242{0.45399052f,-0.89100653f}, {0.40673661f,-0.91354549f}, {0.35836786f,-0.93358046f},
243{0.30901697f,-0.95105654f}, {0.25881907f,-0.96592581f}, {0.20791166f,-0.97814763f},
244{0.15643437f,-0.98768836f}, {0.10452842f,-0.99452192f}, {0.052335974f,-0.99862951f},
245{-4.3711388e-08f,-1.0000000f}, {-0.052336060f,-0.99862951f}, {-0.10452851f,-0.99452192f},
246{-0.15643445f,-0.98768836f}, {-0.20791174f,-0.97814757f}, {-0.25881916f,-0.96592581f},
247{-0.30901703f,-0.95105648f}, {-0.35836795f,-0.93358040f}, {-0.40673670f,-0.91354543f},
248{-0.45399061f,-0.89100647f}, {-0.50000006f,-0.86602533f}, {-0.54463905f,-0.83867055f},
249{-0.58778518f,-0.80901700f}, {-0.62932050f,-0.77714586f}, {-0.66913068f,-0.74314475f},
250{-0.70710677f,-0.70710677f}, {-0.74314493f,-0.66913044f}, {-0.77714604f,-0.62932026f},
251{-0.80901700f,-0.58778518f}, {-0.83867055f,-0.54463899f}, {-0.86602539f,-0.50000006f},
252{-0.89100659f,-0.45399037f}, {-0.91354549f,-0.40673658f}, {-0.93358040f,-0.35836792f},
253{-0.95105654f,-0.30901679f}, {-0.96592587f,-0.25881892f}, {-0.97814763f,-0.20791161f},
254{-0.98768836f,-0.15643445f}, {-0.99452192f,-0.10452849f}, {-0.99862957f,-0.052335810f},
255{1.0000000f,-0.0000000f}, {0.99691731f,-0.078459099f}, {0.98768836f,-0.15643448f},
256{0.97236991f,-0.23344538f}, {0.95105648f,-0.30901700f}, {0.92387956f,-0.38268346f},
257{0.89100653f,-0.45399052f}, {0.85264015f,-0.52249855f}, {0.80901700f,-0.58778524f},
258{0.76040596f,-0.64944810f}, {0.70710677f,-0.70710683f}, {0.64944804f,-0.76040596f},
259{0.58778524f,-0.80901700f}, {0.52249849f,-0.85264015f}, {0.45399052f,-0.89100653f},
260{0.38268343f,-0.92387956f}, {0.30901697f,-0.95105654f}, {0.23344530f,-0.97236991f},
261{0.15643437f,-0.98768836f}, {0.078459084f,-0.99691731f}, {-4.3711388e-08f,-1.0000000f},
262{-0.078459173f,-0.99691731f}, {-0.15643445f,-0.98768836f}, {-0.23344538f,-0.97236991f},
263{-0.30901703f,-0.95105648f}, {-0.38268352f,-0.92387950f}, {-0.45399061f,-0.89100647f},
264{-0.52249867f,-0.85264009f}, {-0.58778518f,-0.80901700f}, {-0.64944804f,-0.76040596f},
265{-0.70710677f,-0.70710677f}, {-0.76040596f,-0.64944804f}, {-0.80901700f,-0.58778518f},
266{-0.85264021f,-0.52249849f}, {-0.89100659f,-0.45399037f}, {-0.92387956f,-0.38268328f},
267{-0.95105654f,-0.30901679f}, {-0.97236991f,-0.23344538f}, {-0.98768836f,-0.15643445f},
268{-0.99691737f,-0.078459039f}, {-1.0000000f,8.7422777e-08f}, {-0.99691731f,0.078459218f},
269{-0.98768830f,0.15643461f}, {-0.97236985f,0.23344554f}, {-0.95105654f,0.30901697f},
270{-0.92387956f,0.38268346f}, {-0.89100653f,0.45399055f}, {-0.85264015f,0.52249861f},
271{-0.80901694f,0.58778536f}, {-0.76040590f,0.64944816f}, {-0.70710665f,0.70710689f},
272{-0.64944792f,0.76040608f}, {-0.58778507f,0.80901712f}, {-0.52249837f,0.85264033f},
273{-0.45399022f,0.89100665f}, {-0.38268313f,0.92387968f}, {-0.30901709f,0.95105648f},
274{-0.23344545f,0.97236991f}, {-0.15643452f,0.98768830f}, {-0.078459114f,0.99691731f},
275};
276static const ne10_fft_cpx_float32_t ne10_twiddles_120[120] = {
277{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f},
278{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f},
279{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f},
280{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f},
281{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f},
282{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f},
283{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f},
284{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f},
285{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f},
286{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f},
287{1.0000000f,-0.0000000f}, {0.99862951f,-0.052335959f}, {0.99452192f,-0.10452846f},
288{0.98768836f,-0.15643448f}, {0.97814763f,-0.20791170f}, {0.96592581f,-0.25881904f},
289{0.95105648f,-0.30901700f}, {0.93358040f,-0.35836795f}, {0.91354543f,-0.40673664f},
290{0.89100653f,-0.45399052f}, {0.86602545f,-0.50000000f}, {0.83867055f,-0.54463905f},
291{0.80901700f,-0.58778524f}, {0.77714598f,-0.62932038f}, {0.74314475f,-0.66913062f},
292{0.70710677f,-0.70710683f}, {0.66913056f,-0.74314487f}, {0.62932038f,-0.77714598f},
293{0.58778524f,-0.80901700f}, {0.54463899f,-0.83867055f}, {0.49999997f,-0.86602545f},
294{0.45399052f,-0.89100653f}, {0.40673661f,-0.91354549f}, {0.35836786f,-0.93358046f},
295{0.30901697f,-0.95105654f}, {0.25881907f,-0.96592581f}, {0.20791166f,-0.97814763f},
296{0.15643437f,-0.98768836f}, {0.10452842f,-0.99452192f}, {0.052335974f,-0.99862951f},
297{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f},
298{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f},
299{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f},
300{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f},
301{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f},
302{-4.3711388e-08f,-1.0000000f}, {-0.10452851f,-0.99452192f}, {-0.20791174f,-0.97814757f},
303{-0.30901703f,-0.95105648f}, {-0.40673670f,-0.91354543f}, {-0.50000006f,-0.86602533f},
304{-0.58778518f,-0.80901700f}, {-0.66913068f,-0.74314475f}, {-0.74314493f,-0.66913044f},
305{-0.80901700f,-0.58778518f}, {-0.86602539f,-0.50000006f}, {-0.91354549f,-0.40673658f},
306{-0.95105654f,-0.30901679f}, {-0.97814763f,-0.20791161f}, {-0.99452192f,-0.10452849f},
307{1.0000000f,-0.0000000f}, {0.98768836f,-0.15643448f}, {0.95105648f,-0.30901700f},
308{0.89100653f,-0.45399052f}, {0.80901700f,-0.58778524f}, {0.70710677f,-0.70710683f},
309{0.58778524f,-0.80901700f}, {0.45399052f,-0.89100653f}, {0.30901697f,-0.95105654f},
310{0.15643437f,-0.98768836f}, {-4.3711388e-08f,-1.0000000f}, {-0.15643445f,-0.98768836f},
311{-0.30901703f,-0.95105648f}, {-0.45399061f,-0.89100647f}, {-0.58778518f,-0.80901700f},
312{-0.70710677f,-0.70710677f}, {-0.80901700f,-0.58778518f}, {-0.89100659f,-0.45399037f},
313{-0.95105654f,-0.30901679f}, {-0.98768836f,-0.15643445f}, {-1.0000000f,8.7422777e-08f},
314{-0.98768830f,0.15643461f}, {-0.95105654f,0.30901697f}, {-0.89100653f,0.45399055f},
315{-0.80901694f,0.58778536f}, {-0.70710665f,0.70710689f}, {-0.58778507f,0.80901712f},
316{-0.45399022f,0.89100665f}, {-0.30901709f,0.95105648f}, {-0.15643452f,0.98768830f},
317};
318static const ne10_fft_cpx_float32_t ne10_twiddles_60[60] = {
319{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f},
320{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f},
321{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f},
322{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f},
323{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f},
324{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f},
325{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f},
326{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f},
327{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f},
328{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f},
329{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f},
330{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f},
331{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f},
332{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f},
333{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f},
334{1.0000000f,-0.0000000f}, {0.95105648f,-0.30901700f}, {0.80901700f,-0.58778524f},
335{0.58778524f,-0.80901700f}, {0.30901697f,-0.95105654f}, {-4.3711388e-08f,-1.0000000f},
336{-0.30901703f,-0.95105648f}, {-0.58778518f,-0.80901700f}, {-0.80901700f,-0.58778518f},
337{-0.95105654f,-0.30901679f}, {-1.0000000f,8.7422777e-08f}, {-0.95105654f,0.30901697f},
338{-0.80901694f,0.58778536f}, {-0.58778507f,0.80901712f}, {-0.30901709f,0.95105648f},
339};
340static const ne10_fft_state_float32_t ne10_fft_state_float32_t_480 = {
341120,
342(ne10_int32_t *)ne10_factors_480,
343(ne10_fft_cpx_float32_t *)ne10_twiddles_480,
344NULL,
345(ne10_fft_cpx_float32_t *)&ne10_twiddles_480[120],
346/* is_forward_scaled = true */
347(ne10_int32_t) 1,
348/* is_backward_scaled = false */
349(ne10_int32_t) 0,
350};
351static const arch_fft_state cfg_arch_480 = {
3521,
353(void *)&ne10_fft_state_float32_t_480,
354};
355
356static const ne10_fft_state_float32_t ne10_fft_state_float32_t_240 = {
35760,
358(ne10_int32_t *)ne10_factors_240,
359(ne10_fft_cpx_float32_t *)ne10_twiddles_240,
360NULL,
361(ne10_fft_cpx_float32_t *)&ne10_twiddles_240[60],
362/* is_forward_scaled = true */
363(ne10_int32_t) 1,
364/* is_backward_scaled = false */
365(ne10_int32_t) 0,
366};
367static const arch_fft_state cfg_arch_240 = {
3681,
369(void *)&ne10_fft_state_float32_t_240,
370};
371
372static const ne10_fft_state_float32_t ne10_fft_state_float32_t_120 = {
37330,
374(ne10_int32_t *)ne10_factors_120,
375(ne10_fft_cpx_float32_t *)ne10_twiddles_120,
376NULL,
377(ne10_fft_cpx_float32_t *)&ne10_twiddles_120[30],
378/* is_forward_scaled = true */
379(ne10_int32_t) 1,
380/* is_backward_scaled = false */
381(ne10_int32_t) 0,
382};
383static const arch_fft_state cfg_arch_120 = {
3841,
385(void *)&ne10_fft_state_float32_t_120,
386};
387
388static const ne10_fft_state_float32_t ne10_fft_state_float32_t_60 = {
38915,
390(ne10_int32_t *)ne10_factors_60,
391(ne10_fft_cpx_float32_t *)ne10_twiddles_60,
392NULL,
393(ne10_fft_cpx_float32_t *)&ne10_twiddles_60[15],
394/* is_forward_scaled = true */
395(ne10_int32_t) 1,
396/* is_backward_scaled = false */
397(ne10_int32_t) 0,
398};
399static const arch_fft_state cfg_arch_60 = {
4001,
401(void *)&ne10_fft_state_float32_t_60,
402};
403
404#endif /* end NE10_FFT_PARAMS48000_960 */
diff --git a/lib/rbcodec/codecs/libopus/celt/tests/test_unit_cwrs32.c b/lib/rbcodec/codecs/libopus/celt/tests/test_unit_cwrs32.c
new file mode 100644
index 0000000000..36dd8af5f5
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/tests/test_unit_cwrs32.c
@@ -0,0 +1,161 @@
1/* Copyright (c) 2008-2011 Xiph.Org Foundation, Mozilla Corporation,
2 Gregory Maxwell
3 Written by Jean-Marc Valin, Gregory Maxwell, and Timothy B. Terriberry */
4/*
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
7 are met:
8
9 - Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
11
12 - Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
20 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27*/
28
29#ifdef HAVE_CONFIG_H
30#include "config.h"
31#endif
32
33#include <stdio.h>
34#include <string.h>
35
36#ifndef CUSTOM_MODES
37#define CUSTOM_MODES
38#else
39#define TEST_CUSTOM_MODES
40#endif
41
42#define CELT_C
43#include "stack_alloc.h"
44#include "entenc.c"
45#include "entdec.c"
46#include "entcode.c"
47#include "cwrs.c"
48#include "mathops.c"
49#include "rate.h"
50
51#define NMAX (240)
52#define KMAX (128)
53
54#ifdef TEST_CUSTOM_MODES
55
56#define NDIMS (44)
57static const int pn[NDIMS]={
58 2, 3, 4, 5, 6, 7, 8, 9, 10,
59 11, 12, 13, 14, 15, 16, 18, 20, 22,
60 24, 26, 28, 30, 32, 36, 40, 44, 48,
61 52, 56, 60, 64, 72, 80, 88, 96, 104,
62 112, 120, 128, 144, 160, 176, 192, 208
63};
64static const int pkmax[NDIMS]={
65 128, 128, 128, 128, 88, 52, 36, 26, 22,
66 18, 16, 15, 13, 12, 12, 11, 10, 9,
67 9, 8, 8, 7, 7, 7, 7, 6, 6,
68 6, 6, 6, 5, 5, 5, 5, 5, 5,
69 4, 4, 4, 4, 4, 4, 4, 4
70};
71
72#else /* TEST_CUSTOM_MODES */
73
74#define NDIMS (22)
75static const int pn[NDIMS]={
76 2, 3, 4, 6, 8, 9, 11, 12, 16,
77 18, 22, 24, 32, 36, 44, 48, 64, 72,
78 88, 96, 144, 176
79};
80static const int pkmax[NDIMS]={
81 128, 128, 128, 88, 36, 26, 18, 16, 12,
82 11, 9, 9, 7, 7, 6, 6, 5, 5,
83 5, 5, 4, 4
84};
85
86#endif
87
88int main(void){
89 int t;
90 int n;
91 ALLOC_STACK;
92 for(t=0;t<NDIMS;t++){
93 int pseudo;
94 n=pn[t];
95 for(pseudo=1;pseudo<41;pseudo++)
96 {
97 int k;
98#if defined(SMALL_FOOTPRINT)
99 opus_uint32 uu[KMAX+2U];
100#endif
101 opus_uint32 inc;
102 opus_uint32 nc;
103 opus_uint32 i;
104 k=get_pulses(pseudo);
105 if (k>pkmax[t])break;
106 printf("Testing CWRS with N=%i, K=%i...\n",n,k);
107#if defined(SMALL_FOOTPRINT)
108 nc=ncwrs_urow(n,k,uu);
109#else
110 nc=CELT_PVQ_V(n,k);
111#endif
112 inc=nc/20000;
113 if(inc<1)inc=1;
114 for(i=0;i<nc;i+=inc){
115#if defined(SMALL_FOOTPRINT)
116 opus_uint32 u[KMAX+2U];
117#endif
118 int y[NMAX];
119 int sy;
120 opus_uint32 v;
121 opus_uint32 ii;
122 int j;
123#if defined(SMALL_FOOTPRINT)
124 memcpy(u,uu,(k+2U)*sizeof(*u));
125 cwrsi(n,k,i,y,u);
126#else
127 cwrsi(n,k,i,y);
128#endif
129 sy=0;
130 for(j=0;j<n;j++)sy+=abs(y[j]);
131 if(sy!=k){
132 fprintf(stderr,"N=%d Pulse count mismatch in cwrsi (%d!=%d).\n",
133 n,sy,k);
134 return 99;
135 }
136 /*printf("%6u of %u:",i,nc);
137 for(j=0;j<n;j++)printf(" %+3i",y[j]);
138 printf(" ->");*/
139#if defined(SMALL_FOOTPRINT)
140 ii=icwrs(n,k,&v,y,u);
141#else
142 ii=icwrs(n,y);
143 v=CELT_PVQ_V(n,k);
144#endif
145 if(ii!=i){
146 fprintf(stderr,"Combination-index mismatch (%lu!=%lu).\n",
147 (long)ii,(long)i);
148 return 1;
149 }
150 if(v!=nc){
151 fprintf(stderr,"Combination count mismatch (%lu!=%lu).\n",
152 (long)v,(long)nc);
153 return 2;
154 }
155 /*printf(" %6u\n",i);*/
156 }
157 /*printf("\n");*/
158 }
159 }
160 return 0;
161}
diff --git a/lib/rbcodec/codecs/libopus/celt/tests/test_unit_dft.c b/lib/rbcodec/codecs/libopus/celt/tests/test_unit_dft.c
new file mode 100644
index 0000000000..70f8f4937b
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/tests/test_unit_dft.c
@@ -0,0 +1,179 @@
1/* Copyright (c) 2008 Xiph.Org Foundation
2 Written by Jean-Marc Valin */
3/*
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7
8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10
11 - Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*/
27
28#ifdef HAVE_CONFIG_H
29#include "config.h"
30#endif
31
32#include <stdio.h>
33
34#include "stack_alloc.h"
35#include "kiss_fft.h"
36#include "mathops.h"
37#include "modes.h"
38
39#ifndef M_PI
40#define M_PI 3.141592653
41#endif
42
43int ret = 0;
44
45void check(kiss_fft_cpx * in,kiss_fft_cpx * out,int nfft,int isinverse)
46{
47 int bin,k;
48 double errpow=0,sigpow=0, snr;
49
50 for (bin=0;bin<nfft;++bin) {
51 double ansr = 0;
52 double ansi = 0;
53 double difr;
54 double difi;
55
56 for (k=0;k<nfft;++k) {
57 double phase = -2*M_PI*bin*k/nfft;
58 double re = cos(phase);
59 double im = sin(phase);
60 if (isinverse)
61 im = -im;
62
63 if (!isinverse)
64 {
65 re /= nfft;
66 im /= nfft;
67 }
68
69 ansr += in[k].r * re - in[k].i * im;
70 ansi += in[k].r * im + in[k].i * re;
71 }
72 /*printf ("%d %d ", (int)ansr, (int)ansi);*/
73 difr = ansr - out[bin].r;
74 difi = ansi - out[bin].i;
75 errpow += difr*difr + difi*difi;
76 sigpow += ansr*ansr+ansi*ansi;
77 }
78 snr = 10*log10(sigpow/errpow);
79 printf("nfft=%d inverse=%d,snr = %f\n",nfft,isinverse,snr );
80 if (snr<60) {
81 printf( "** poor snr: %f ** \n", snr);
82 ret = 1;
83 }
84}
85
86void test1d(int nfft,int isinverse,int arch)
87{
88 size_t buflen = sizeof(kiss_fft_cpx)*nfft;
89 kiss_fft_cpx *in;
90 kiss_fft_cpx *out;
91 int k;
92#ifdef CUSTOM_MODES
93 kiss_fft_state *cfg = opus_fft_alloc(nfft,0,0,arch);
94#else
95 int id;
96 const kiss_fft_state *cfg;
97 CELTMode *mode = opus_custom_mode_create(48000, 960, NULL);
98 if (nfft == 480) id = 0;
99 else if (nfft == 240) id = 1;
100 else if (nfft == 120) id = 2;
101 else if (nfft == 60) id = 3;
102 else return;
103 cfg = mode->mdct.kfft[id];
104#endif
105
106 in = (kiss_fft_cpx*)malloc(buflen);
107 out = (kiss_fft_cpx*)malloc(buflen);
108
109 for (k=0;k<nfft;++k) {
110 in[k].r = (rand() % 32767) - 16384;
111 in[k].i = (rand() % 32767) - 16384;
112 }
113
114 for (k=0;k<nfft;++k) {
115 in[k].r *= 32768;
116 in[k].i *= 32768;
117 }
118
119 if (isinverse)
120 {
121 for (k=0;k<nfft;++k) {
122 in[k].r /= nfft;
123 in[k].i /= nfft;
124 }
125 }
126
127 /*for (k=0;k<nfft;++k) printf("%d %d ", in[k].r, in[k].i);printf("\n");*/
128
129 if (isinverse)
130 opus_ifft(cfg,in,out, arch);
131 else
132 opus_fft(cfg,in,out, arch);
133
134 /*for (k=0;k<nfft;++k) printf("%d %d ", out[k].r, out[k].i);printf("\n");*/
135
136 check(in,out,nfft,isinverse);
137
138 free(in);
139 free(out);
140#ifdef CUSTOM_MODES
141 opus_fft_free(cfg, arch);
142#endif
143}
144
145int main(int argc,char ** argv)
146{
147 ALLOC_STACK;
148 int arch = opus_select_arch();
149
150 if (argc>1) {
151 int k;
152 for (k=1;k<argc;++k) {
153 test1d(atoi(argv[k]),0,arch);
154 test1d(atoi(argv[k]),1,arch);
155 }
156 }else{
157 test1d(32,0,arch);
158 test1d(32,1,arch);
159 test1d(128,0,arch);
160 test1d(128,1,arch);
161 test1d(256,0,arch);
162 test1d(256,1,arch);
163#ifndef RADIX_TWO_ONLY
164 test1d(36,0,arch);
165 test1d(36,1,arch);
166 test1d(50,0,arch);
167 test1d(50,1,arch);
168 test1d(60,0,arch);
169 test1d(60,1,arch);
170 test1d(120,0,arch);
171 test1d(120,1,arch);
172 test1d(240,0,arch);
173 test1d(240,1,arch);
174 test1d(480,0,arch);
175 test1d(480,1,arch);
176#endif
177 }
178 return ret;
179}
diff --git a/lib/rbcodec/codecs/libopus/celt/tests/test_unit_entropy.c b/lib/rbcodec/codecs/libopus/celt/tests/test_unit_entropy.c
new file mode 100644
index 0000000000..7f674529df
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/tests/test_unit_entropy.c
@@ -0,0 +1,383 @@
1/* Copyright (c) 2007-2011 Xiph.Org Foundation, Mozilla Corporation,
2 Gregory Maxwell
3 Written by Jean-Marc Valin, Gregory Maxwell, and Timothy B. Terriberry */
4/*
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
7 are met:
8
9 - Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
11
12 - Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
20 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27*/
28
29#ifdef HAVE_CONFIG_H
30#include "config.h"
31#endif
32
33#include <stdlib.h>
34#include <stdio.h>
35#include <math.h>
36#include <time.h>
37#define CELT_C
38#include "entcode.h"
39#include "entenc.h"
40#include "entdec.h"
41#include <string.h>
42
43#include "entenc.c"
44#include "entdec.c"
45#include "entcode.c"
46
47#ifndef M_LOG2E
48# define M_LOG2E 1.4426950408889634074
49#endif
50#define DATA_SIZE 10000000
51#define DATA_SIZE2 10000
52
53int main(int _argc,char **_argv){
54 ec_enc enc;
55 ec_dec dec;
56 long nbits;
57 long nbits2;
58 double entropy;
59 int ft;
60 int ftb;
61 int sz;
62 int i;
63 int ret;
64 unsigned int sym;
65 unsigned int seed;
66 unsigned char *ptr;
67 const char *env_seed;
68 ret=0;
69 entropy=0;
70 if (_argc > 2) {
71 fprintf(stderr, "Usage: %s [<seed>]\n", _argv[0]);
72 return 1;
73 }
74 env_seed = getenv("SEED");
75 if (_argc > 1)
76 seed = atoi(_argv[1]);
77 else if (env_seed)
78 seed = atoi(env_seed);
79 else
80 seed = time(NULL);
81 /*Testing encoding of raw bit values.*/
82 ptr = (unsigned char *)malloc(DATA_SIZE);
83 ec_enc_init(&enc,ptr, DATA_SIZE);
84 for(ft=2;ft<1024;ft++){
85 for(i=0;i<ft;i++){
86 entropy+=log(ft)*M_LOG2E;
87 ec_enc_uint(&enc,i,ft);
88 }
89 }
90 /*Testing encoding of raw bit values.*/
91 for(ftb=1;ftb<16;ftb++){
92 for(i=0;i<(1<<ftb);i++){
93 entropy+=ftb;
94 nbits=ec_tell(&enc);
95 ec_enc_bits(&enc,i,ftb);
96 nbits2=ec_tell(&enc);
97 if(nbits2-nbits!=ftb){
98 fprintf(stderr,"Used %li bits to encode %i bits directly.\n",
99 nbits2-nbits,ftb);
100 ret=-1;
101 }
102 }
103 }
104 nbits=ec_tell_frac(&enc);
105 ec_enc_done(&enc);
106 fprintf(stderr,
107 "Encoded %0.2lf bits of entropy to %0.2lf bits (%0.3lf%% wasted).\n",
108 entropy,ldexp(nbits,-3),100*(nbits-ldexp(entropy,3))/nbits);
109 fprintf(stderr,"Packed to %li bytes.\n",(long)ec_range_bytes(&enc));
110 ec_dec_init(&dec,ptr,DATA_SIZE);
111 for(ft=2;ft<1024;ft++){
112 for(i=0;i<ft;i++){
113 sym=ec_dec_uint(&dec,ft);
114 if(sym!=(unsigned)i){
115 fprintf(stderr,"Decoded %i instead of %i with ft of %i.\n",sym,i,ft);
116 ret=-1;
117 }
118 }
119 }
120 for(ftb=1;ftb<16;ftb++){
121 for(i=0;i<(1<<ftb);i++){
122 sym=ec_dec_bits(&dec,ftb);
123 if(sym!=(unsigned)i){
124 fprintf(stderr,"Decoded %i instead of %i with ftb of %i.\n",sym,i,ftb);
125 ret=-1;
126 }
127 }
128 }
129 nbits2=ec_tell_frac(&dec);
130 if(nbits!=nbits2){
131 fprintf(stderr,
132 "Reported number of bits used was %0.2lf, should be %0.2lf.\n",
133 ldexp(nbits2,-3),ldexp(nbits,-3));
134 ret=-1;
135 }
136 /*Testing an encoder bust prefers range coder data over raw bits.
137 This isn't a general guarantee, will only work for data that is buffered in
138 the encoder state and not yet stored in the user buffer, and should never
139 get used in practice.
140 It's mostly here for code coverage completeness.*/
141 /*Start with a 16-bit buffer.*/
142 ec_enc_init(&enc,ptr,2);
143 /*Write 7 raw bits.*/
144 ec_enc_bits(&enc,0x55,7);
145 /*Write 12.3 bits of range coder data.*/
146 ec_enc_uint(&enc,1,2);
147 ec_enc_uint(&enc,1,3);
148 ec_enc_uint(&enc,1,4);
149 ec_enc_uint(&enc,1,5);
150 ec_enc_uint(&enc,2,6);
151 ec_enc_uint(&enc,6,7);
152 ec_enc_done(&enc);
153 ec_dec_init(&dec,ptr,2);
154 if(!enc.error
155 /*The raw bits should have been overwritten by the range coder data.*/
156 ||ec_dec_bits(&dec,7)!=0x05
157 /*And all the range coder data should have been encoded correctly.*/
158 ||ec_dec_uint(&dec,2)!=1
159 ||ec_dec_uint(&dec,3)!=1
160 ||ec_dec_uint(&dec,4)!=1
161 ||ec_dec_uint(&dec,5)!=1
162 ||ec_dec_uint(&dec,6)!=2
163 ||ec_dec_uint(&dec,7)!=6){
164 fprintf(stderr,"Encoder bust overwrote range coder data with raw bits.\n");
165 ret=-1;
166 }
167 srand(seed);
168 fprintf(stderr,"Testing random streams... Random seed: %u (%.4X)\n", seed, rand() % 65536);
169 for(i=0;i<409600;i++){
170 unsigned *data;
171 unsigned *tell;
172 unsigned tell_bits;
173 int j;
174 int zeros;
175 ft=rand()/((RAND_MAX>>(rand()%11U))+1U)+10;
176 sz=rand()/((RAND_MAX>>(rand()%9U))+1U);
177 data=(unsigned *)malloc(sz*sizeof(*data));
178 tell=(unsigned *)malloc((sz+1)*sizeof(*tell));
179 ec_enc_init(&enc,ptr,DATA_SIZE2);
180 zeros = rand()%13==0;
181 tell[0]=ec_tell_frac(&enc);
182 for(j=0;j<sz;j++){
183 if (zeros)
184 data[j]=0;
185 else
186 data[j]=rand()%ft;
187 ec_enc_uint(&enc,data[j],ft);
188 tell[j+1]=ec_tell_frac(&enc);
189 }
190 if (rand()%2==0)
191 while(ec_tell(&enc)%8 != 0)
192 ec_enc_uint(&enc, rand()%2, 2);
193 tell_bits = ec_tell(&enc);
194 ec_enc_done(&enc);
195 if(tell_bits!=(unsigned)ec_tell(&enc)){
196 fprintf(stderr,"ec_tell() changed after ec_enc_done(): %i instead of %i (Random seed: %u)\n",
197 ec_tell(&enc),tell_bits,seed);
198 ret=-1;
199 }
200 if ((tell_bits+7)/8 < ec_range_bytes(&enc))
201 {
202 fprintf (stderr, "ec_tell() lied, there's %i bytes instead of %d (Random seed: %u)\n",
203 ec_range_bytes(&enc), (tell_bits+7)/8,seed);
204 ret=-1;
205 }
206 ec_dec_init(&dec,ptr,DATA_SIZE2);
207 if(ec_tell_frac(&dec)!=tell[0]){
208 fprintf(stderr,
209 "Tell mismatch between encoder and decoder at symbol %i: %i instead of %i (Random seed: %u).\n",
210 0,ec_tell_frac(&dec),tell[0],seed);
211 }
212 for(j=0;j<sz;j++){
213 sym=ec_dec_uint(&dec,ft);
214 if(sym!=data[j]){
215 fprintf(stderr,
216 "Decoded %i instead of %i with ft of %i at position %i of %i (Random seed: %u).\n",
217 sym,data[j],ft,j,sz,seed);
218 ret=-1;
219 }
220 if(ec_tell_frac(&dec)!=tell[j+1]){
221 fprintf(stderr,
222 "Tell mismatch between encoder and decoder at symbol %i: %i instead of %i (Random seed: %u).\n",
223 j+1,ec_tell_frac(&dec),tell[j+1],seed);
224 }
225 }
226 free(tell);
227 free(data);
228 }
229 /*Test compatibility between multiple different encode/decode routines.*/
230 for(i=0;i<409600;i++){
231 unsigned *logp1;
232 unsigned *data;
233 unsigned *tell;
234 unsigned *enc_method;
235 int j;
236 sz=rand()/((RAND_MAX>>(rand()%9U))+1U);
237 logp1=(unsigned *)malloc(sz*sizeof(*logp1));
238 data=(unsigned *)malloc(sz*sizeof(*data));
239 tell=(unsigned *)malloc((sz+1)*sizeof(*tell));
240 enc_method=(unsigned *)malloc(sz*sizeof(*enc_method));
241 ec_enc_init(&enc,ptr,DATA_SIZE2);
242 tell[0]=ec_tell_frac(&enc);
243 for(j=0;j<sz;j++){
244 data[j]=rand()/((RAND_MAX>>1)+1);
245 logp1[j]=(rand()%15)+1;
246 enc_method[j]=rand()/((RAND_MAX>>2)+1);
247 switch(enc_method[j]){
248 case 0:{
249 ec_encode(&enc,data[j]?(1<<logp1[j])-1:0,
250 (1<<logp1[j])-(data[j]?0:1),1<<logp1[j]);
251 }break;
252 case 1:{
253 ec_encode_bin(&enc,data[j]?(1<<logp1[j])-1:0,
254 (1<<logp1[j])-(data[j]?0:1),logp1[j]);
255 }break;
256 case 2:{
257 ec_enc_bit_logp(&enc,data[j],logp1[j]);
258 }break;
259 case 3:{
260 unsigned char icdf[2];
261 icdf[0]=1;
262 icdf[1]=0;
263 ec_enc_icdf(&enc,data[j],icdf,logp1[j]);
264 }break;
265 }
266 tell[j+1]=ec_tell_frac(&enc);
267 }
268 ec_enc_done(&enc);
269 if((ec_tell(&enc)+7U)/8U<ec_range_bytes(&enc)){
270 fprintf(stderr,"tell() lied, there's %i bytes instead of %d (Random seed: %u)\n",
271 ec_range_bytes(&enc),(ec_tell(&enc)+7)/8,seed);
272 ret=-1;
273 }
274 ec_dec_init(&dec,ptr,DATA_SIZE2);
275 if(ec_tell_frac(&dec)!=tell[0]){
276 fprintf(stderr,
277 "Tell mismatch between encoder and decoder at symbol %i: %i instead of %i (Random seed: %u).\n",
278 0,ec_tell_frac(&dec),tell[0],seed);
279 }
280 for(j=0;j<sz;j++){
281 int fs;
282 int dec_method;
283 dec_method=rand()/((RAND_MAX>>2)+1);
284 switch(dec_method){
285 case 0:{
286 fs=ec_decode(&dec,1<<logp1[j]);
287 sym=fs>=(1<<logp1[j])-1;
288 ec_dec_update(&dec,sym?(1<<logp1[j])-1:0,
289 (1<<logp1[j])-(sym?0:1),1<<logp1[j]);
290 }break;
291 case 1:{
292 fs=ec_decode_bin(&dec,logp1[j]);
293 sym=fs>=(1<<logp1[j])-1;
294 ec_dec_update(&dec,sym?(1<<logp1[j])-1:0,
295 (1<<logp1[j])-(sym?0:1),1<<logp1[j]);
296 }break;
297 case 2:{
298 sym=ec_dec_bit_logp(&dec,logp1[j]);
299 }break;
300 case 3:{
301 unsigned char icdf[2];
302 icdf[0]=1;
303 icdf[1]=0;
304 sym=ec_dec_icdf(&dec,icdf,logp1[j]);
305 }break;
306 }
307 if(sym!=data[j]){
308 fprintf(stderr,
309 "Decoded %i instead of %i with logp1 of %i at position %i of %i (Random seed: %u).\n",
310 sym,data[j],logp1[j],j,sz,seed);
311 fprintf(stderr,"Encoding method: %i, decoding method: %i\n",
312 enc_method[j],dec_method);
313 ret=-1;
314 }
315 if(ec_tell_frac(&dec)!=tell[j+1]){
316 fprintf(stderr,
317 "Tell mismatch between encoder and decoder at symbol %i: %i instead of %i (Random seed: %u).\n",
318 j+1,ec_tell_frac(&dec),tell[j+1],seed);
319 }
320 }
321 free(enc_method);
322 free(tell);
323 free(data);
324 free(logp1);
325 }
326 ec_enc_init(&enc,ptr,DATA_SIZE2);
327 ec_enc_bit_logp(&enc,0,1);
328 ec_enc_bit_logp(&enc,0,1);
329 ec_enc_bit_logp(&enc,0,1);
330 ec_enc_bit_logp(&enc,0,1);
331 ec_enc_bit_logp(&enc,0,2);
332 ec_enc_patch_initial_bits(&enc,3,2);
333 if(enc.error){
334 fprintf(stderr,"patch_initial_bits failed");
335 ret=-1;
336 }
337 ec_enc_patch_initial_bits(&enc,0,5);
338 if(!enc.error){
339 fprintf(stderr,"patch_initial_bits didn't fail when it should have");
340 ret=-1;
341 }
342 ec_enc_done(&enc);
343 if(ec_range_bytes(&enc)!=1||ptr[0]!=192){
344 fprintf(stderr,"Got %d when expecting 192 for patch_initial_bits",ptr[0]);
345 ret=-1;
346 }
347 ec_enc_init(&enc,ptr,DATA_SIZE2);
348 ec_enc_bit_logp(&enc,0,1);
349 ec_enc_bit_logp(&enc,0,1);
350 ec_enc_bit_logp(&enc,1,6);
351 ec_enc_bit_logp(&enc,0,2);
352 ec_enc_patch_initial_bits(&enc,0,2);
353 if(enc.error){
354 fprintf(stderr,"patch_initial_bits failed");
355 ret=-1;
356 }
357 ec_enc_done(&enc);
358 if(ec_range_bytes(&enc)!=2||ptr[0]!=63){
359 fprintf(stderr,"Got %d when expecting 63 for patch_initial_bits",ptr[0]);
360 ret=-1;
361 }
362 ec_enc_init(&enc,ptr,2);
363 ec_enc_bit_logp(&enc,0,2);
364 for(i=0;i<48;i++){
365 ec_enc_bits(&enc,0,1);
366 }
367 ec_enc_done(&enc);
368 if(!enc.error){
369 fprintf(stderr,"Raw bits overfill didn't fail when it should have");
370 ret=-1;
371 }
372 ec_enc_init(&enc,ptr,2);
373 for(i=0;i<17;i++){
374 ec_enc_bits(&enc,0,1);
375 }
376 ec_enc_done(&enc);
377 if(!enc.error){
378 fprintf(stderr,"17 raw bits encoded in two bytes");
379 ret=-1;
380 }
381 free(ptr);
382 return ret;
383}
diff --git a/lib/rbcodec/codecs/libopus/celt/tests/test_unit_laplace.c b/lib/rbcodec/codecs/libopus/celt/tests/test_unit_laplace.c
new file mode 100644
index 0000000000..727bf012ef
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/tests/test_unit_laplace.c
@@ -0,0 +1,93 @@
1/* Copyright (c) 2008-2011 Xiph.Org Foundation, Mozilla Corporation
2 Written by Jean-Marc Valin and Timothy B. Terriberry */
3/*
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7
8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10
11 - Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*/
27
28#ifdef HAVE_CONFIG_H
29#include "config.h"
30#endif
31
32#include <stdio.h>
33#include <stdlib.h>
34#define CELT_C
35#include "laplace.h"
36#include "stack_alloc.h"
37
38#include "entenc.c"
39#include "entdec.c"
40#include "entcode.c"
41#include "laplace.c"
42
43#define DATA_SIZE 40000
44
45int ec_laplace_get_start_freq(int decay)
46{
47 opus_uint32 ft = 32768 - LAPLACE_MINP*(2*LAPLACE_NMIN+1);
48 int fs = (ft*(16384-decay))/(16384+decay);
49 return fs+LAPLACE_MINP;
50}
51
52int main(void)
53{
54 int i;
55 int ret = 0;
56 ec_enc enc;
57 ec_dec dec;
58 unsigned char *ptr;
59 int val[10000], decay[10000];
60 ALLOC_STACK;
61 ptr = (unsigned char *)malloc(DATA_SIZE);
62 ec_enc_init(&enc,ptr,DATA_SIZE);
63
64 val[0] = 3; decay[0] = 6000;
65 val[1] = 0; decay[1] = 5800;
66 val[2] = -1; decay[2] = 5600;
67 for (i=3;i<10000;i++)
68 {
69 val[i] = rand()%15-7;
70 decay[i] = rand()%11000+5000;
71 }
72 for (i=0;i<10000;i++)
73 ec_laplace_encode(&enc, &val[i],
74 ec_laplace_get_start_freq(decay[i]), decay[i]);
75
76 ec_enc_done(&enc);
77
78 ec_dec_init(&dec,ec_get_buffer(&enc),ec_range_bytes(&enc));
79
80 for (i=0;i<10000;i++)
81 {
82 int d = ec_laplace_decode(&dec,
83 ec_laplace_get_start_freq(decay[i]), decay[i]);
84 if (d != val[i])
85 {
86 fprintf (stderr, "Got %d instead of %d\n", d, val[i]);
87 ret = 1;
88 }
89 }
90
91 free(ptr);
92 return ret;
93}
diff --git a/lib/rbcodec/codecs/libopus/celt/tests/test_unit_mathops.c b/lib/rbcodec/codecs/libopus/celt/tests/test_unit_mathops.c
new file mode 100644
index 0000000000..874e9adf0f
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/tests/test_unit_mathops.c
@@ -0,0 +1,266 @@
1/* Copyright (c) 2008-2011 Xiph.Org Foundation, Mozilla Corporation,
2 Gregory Maxwell
3 Written by Jean-Marc Valin, Gregory Maxwell, and Timothy B. Terriberry */
4/*
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
7 are met:
8
9 - Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
11
12 - Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
20 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27*/
28
29#ifdef HAVE_CONFIG_H
30#include "config.h"
31#endif
32
33#ifndef CUSTOM_MODES
34#define CUSTOM_MODES
35#endif
36
37#include <stdio.h>
38#include <math.h>
39#include "mathops.h"
40#include "bands.h"
41
42#ifdef FIXED_POINT
43#define WORD "%d"
44#else
45#define WORD "%f"
46#endif
47
48int ret = 0;
49
50void testdiv(void)
51{
52 opus_int32 i;
53 for (i=1;i<=327670;i++)
54 {
55 double prod;
56 opus_val32 val;
57 val = celt_rcp(i);
58#ifdef FIXED_POINT
59 prod = (1./32768./65526.)*val*i;
60#else
61 prod = val*i;
62#endif
63 if (fabs(prod-1) > .00025)
64 {
65 fprintf (stderr, "div failed: 1/%d="WORD" (product = %f)\n", i, val, prod);
66 ret = 1;
67 }
68 }
69}
70
71void testsqrt(void)
72{
73 opus_int32 i;
74 for (i=1;i<=1000000000;i++)
75 {
76 double ratio;
77 opus_val16 val;
78 val = celt_sqrt(i);
79 ratio = val/sqrt(i);
80 if (fabs(ratio - 1) > .0005 && fabs(val-sqrt(i)) > 2)
81 {
82 fprintf (stderr, "sqrt failed: sqrt(%d)="WORD" (ratio = %f)\n", i, val, ratio);
83 ret = 1;
84 }
85 i+= i>>10;
86 }
87}
88
89void testbitexactcos(void)
90{
91 int i;
92 opus_int32 min_d,max_d,last,chk;
93 chk=max_d=0;
94 last=min_d=32767;
95 for(i=64;i<=16320;i++)
96 {
97 opus_int32 d;
98 opus_int32 q=bitexact_cos(i);
99 chk ^= q*i;
100 d = last - q;
101 if (d>max_d)max_d=d;
102 if (d<min_d)min_d=d;
103 last = q;
104 }
105 if ((chk!=89408644)||(max_d!=5)||(min_d!=0)||(bitexact_cos(64)!=32767)||
106 (bitexact_cos(16320)!=200)||(bitexact_cos(8192)!=23171))
107 {
108 fprintf (stderr, "bitexact_cos failed\n");
109 ret = 1;
110 }
111}
112
113void testbitexactlog2tan(void)
114{
115 int i,fail;
116 opus_int32 min_d,max_d,last,chk;
117 fail=chk=max_d=0;
118 last=min_d=15059;
119 for(i=64;i<8193;i++)
120 {
121 opus_int32 d;
122 opus_int32 mid=bitexact_cos(i);
123 opus_int32 side=bitexact_cos(16384-i);
124 opus_int32 q=bitexact_log2tan(mid,side);
125 chk ^= q*i;
126 d = last - q;
127 if (q!=-1*bitexact_log2tan(side,mid))
128 fail = 1;
129 if (d>max_d)max_d=d;
130 if (d<min_d)min_d=d;
131 last = q;
132 }
133 if ((chk!=15821257)||(max_d!=61)||(min_d!=-2)||fail||
134 (bitexact_log2tan(32767,200)!=15059)||(bitexact_log2tan(30274,12540)!=2611)||
135 (bitexact_log2tan(23171,23171)!=0))
136 {
137 fprintf (stderr, "bitexact_log2tan failed\n");
138 ret = 1;
139 }
140}
141
142#ifndef FIXED_POINT
143void testlog2(void)
144{
145 float x;
146 for (x=0.001;x<1677700.0;x+=(x/8.0))
147 {
148 float error = fabs((1.442695040888963387*log(x))-celt_log2(x));
149 if (error>0.0009)
150 {
151 fprintf (stderr, "celt_log2 failed: fabs((1.442695040888963387*log(x))-celt_log2(x))>0.001 (x = %f, error = %f)\n", x,error);
152 ret = 1;
153 }
154 }
155}
156
157void testexp2(void)
158{
159 float x;
160 for (x=-11.0;x<24.0;x+=0.0007)
161 {
162 float error = fabs(x-(1.442695040888963387*log(celt_exp2(x))));
163 if (error>0.0002)
164 {
165 fprintf (stderr, "celt_exp2 failed: fabs(x-(1.442695040888963387*log(celt_exp2(x))))>0.0005 (x = %f, error = %f)\n", x,error);
166 ret = 1;
167 }
168 }
169}
170
171void testexp2log2(void)
172{
173 float x;
174 for (x=-11.0;x<24.0;x+=0.0007)
175 {
176 float error = fabs(x-(celt_log2(celt_exp2(x))));
177 if (error>0.001)
178 {
179 fprintf (stderr, "celt_log2/celt_exp2 failed: fabs(x-(celt_log2(celt_exp2(x))))>0.001 (x = %f, error = %f)\n", x,error);
180 ret = 1;
181 }
182 }
183}
184#else
185void testlog2(void)
186{
187 opus_val32 x;
188 for (x=8;x<1073741824;x+=(x>>3))
189 {
190 float error = fabs((1.442695040888963387*log(x/16384.0))-celt_log2(x)/1024.0);
191 if (error>0.003)
192 {
193 fprintf (stderr, "celt_log2 failed: x = %ld, error = %f\n", (long)x,error);
194 ret = 1;
195 }
196 }
197}
198
199void testexp2(void)
200{
201 opus_val16 x;
202 for (x=-32768;x<15360;x++)
203 {
204 float error1 = fabs(x/1024.0-(1.442695040888963387*log(celt_exp2(x)/65536.0)));
205 float error2 = fabs(exp(0.6931471805599453094*x/1024.0)-celt_exp2(x)/65536.0);
206 if (error1>0.0002&&error2>0.00004)
207 {
208 fprintf (stderr, "celt_exp2 failed: x = "WORD", error1 = %f, error2 = %f\n", x,error1,error2);
209 ret = 1;
210 }
211 }
212}
213
214void testexp2log2(void)
215{
216 opus_val32 x;
217 for (x=8;x<65536;x+=(x>>3))
218 {
219 float error = fabs(x-0.25*celt_exp2(celt_log2(x)))/16384;
220 if (error>0.004)
221 {
222 fprintf (stderr, "celt_log2/celt_exp2 failed: fabs(x-(celt_exp2(celt_log2(x))))>0.001 (x = %ld, error = %f)\n", (long)x,error);
223 ret = 1;
224 }
225 }
226}
227
228void testilog2(void)
229{
230 opus_val32 x;
231 for (x=1;x<=268435455;x+=127)
232 {
233 opus_val32 lg;
234 opus_val32 y;
235
236 lg = celt_ilog2(x);
237 if (lg<0 || lg>=31)
238 {
239 printf("celt_ilog2 failed: 0<=celt_ilog2(x)<31 (x = %d, celt_ilog2(x) = %d)\n",x,lg);
240 ret = 1;
241 }
242 y = 1<<lg;
243
244 if (x<y || (x>>1)>=y)
245 {
246 printf("celt_ilog2 failed: 2**celt_ilog2(x)<=x<2**(celt_ilog2(x)+1) (x = %d, 2**celt_ilog2(x) = %d)\n",x,y);
247 ret = 1;
248 }
249 }
250}
251#endif
252
253int main(void)
254{
255 testbitexactcos();
256 testbitexactlog2tan();
257 testdiv();
258 testsqrt();
259 testlog2();
260 testexp2();
261 testexp2log2();
262#ifdef FIXED_POINT
263 testilog2();
264#endif
265 return ret;
266}
diff --git a/lib/rbcodec/codecs/libopus/celt/tests/test_unit_mdct.c b/lib/rbcodec/codecs/libopus/celt/tests/test_unit_mdct.c
new file mode 100644
index 0000000000..4a563ccfe3
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/tests/test_unit_mdct.c
@@ -0,0 +1,227 @@
1/* Copyright (c) 2008-2011 Xiph.Org Foundation
2 Written by Jean-Marc Valin */
3/*
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7
8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10
11 - Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*/
27
28#ifdef HAVE_CONFIG_H
29#include "config.h"
30#endif
31
32#include <stdio.h>
33
34#include "mdct.h"
35#include "stack_alloc.h"
36#include "kiss_fft.h"
37#include "mdct.h"
38#include "modes.h"
39
40#ifndef M_PI
41#define M_PI 3.141592653
42#endif
43
44int ret = 0;
45void check(kiss_fft_scalar * in,kiss_fft_scalar * out,int nfft,int isinverse)
46{
47 int bin,k;
48 double errpow=0,sigpow=0;
49 double snr;
50 for (bin=0;bin<nfft/2;++bin) {
51 double ansr = 0;
52 double difr;
53
54 for (k=0;k<nfft;++k) {
55 double phase = 2*M_PI*(k+.5+.25*nfft)*(bin+.5)/nfft;
56 double re = cos(phase);
57
58 re /= nfft/4;
59
60 ansr += in[k] * re;
61 }
62 /*printf ("%f %f\n", ansr, out[bin]);*/
63 difr = ansr - out[bin];
64 errpow += difr*difr;
65 sigpow += ansr*ansr;
66 }
67 snr = 10*log10(sigpow/errpow);
68 printf("nfft=%d inverse=%d,snr = %f\n",nfft,isinverse,snr );
69 if (snr<60) {
70 printf( "** poor snr: %f **\n", snr);
71 ret = 1;
72 }
73}
74
75void check_inv(kiss_fft_scalar * in,kiss_fft_scalar * out,int nfft,int isinverse)
76{
77 int bin,k;
78 double errpow=0,sigpow=0;
79 double snr;
80 for (bin=0;bin<nfft;++bin) {
81 double ansr = 0;
82 double difr;
83
84 for (k=0;k<nfft/2;++k) {
85 double phase = 2*M_PI*(bin+.5+.25*nfft)*(k+.5)/nfft;
86 double re = cos(phase);
87
88 /*re *= 2;*/
89
90 ansr += in[k] * re;
91 }
92 /*printf ("%f %f\n", ansr, out[bin]);*/
93 difr = ansr - out[bin];
94 errpow += difr*difr;
95 sigpow += ansr*ansr;
96 }
97 snr = 10*log10(sigpow/errpow);
98 printf("nfft=%d inverse=%d,snr = %f\n",nfft,isinverse,snr );
99 if (snr<60) {
100 printf( "** poor snr: %f **\n", snr);
101 ret = 1;
102 }
103}
104
105
106void test1d(int nfft,int isinverse,int arch)
107{
108 size_t buflen = sizeof(kiss_fft_scalar)*nfft;
109 kiss_fft_scalar *in;
110 kiss_fft_scalar *in_copy;
111 kiss_fft_scalar *out;
112 opus_val16 *window;
113 int k;
114
115#ifdef CUSTOM_MODES
116 int shift = 0;
117 const mdct_lookup *cfg;
118 mdct_lookup _cfg;
119 clt_mdct_init(&_cfg, nfft, 0, arch);
120 cfg = &_cfg;
121#else
122 int shift;
123 const mdct_lookup *cfg;
124 CELTMode *mode = opus_custom_mode_create(48000, 960, NULL);
125 if (nfft == 1920) shift = 0;
126 else if (nfft == 960) shift = 1;
127 else if (nfft == 480) shift = 2;
128 else if (nfft == 240) shift = 3;
129 else return;
130 cfg = &mode->mdct;
131#endif
132
133 in = (kiss_fft_scalar*)malloc(buflen);
134 in_copy = (kiss_fft_scalar*)malloc(buflen);
135 out = (kiss_fft_scalar*)malloc(buflen);
136 window = (opus_val16*)malloc(sizeof(opus_val16)*nfft/2);
137
138 for (k=0;k<nfft;++k) {
139 in[k] = (rand() % 32768) - 16384;
140 }
141
142 for (k=0;k<nfft/2;++k) {
143 window[k] = Q15ONE;
144 }
145 for (k=0;k<nfft;++k) {
146 in[k] *= 32768;
147 }
148
149 if (isinverse)
150 {
151 for (k=0;k<nfft;++k) {
152 in[k] /= nfft;
153 }
154 }
155
156 for (k=0;k<nfft;++k)
157 in_copy[k] = in[k];
158 /*for (k=0;k<nfft;++k) printf("%d %d ", in[k].r, in[k].i);printf("\n");*/
159
160 if (isinverse)
161 {
162 for (k=0;k<nfft;++k)
163 out[k] = 0;
164 clt_mdct_backward(cfg,in,out, window, nfft/2, shift, 1, arch);
165 /* apply TDAC because clt_mdct_backward() no longer does that */
166 for (k=0;k<nfft/4;++k)
167 out[nfft-k-1] = out[nfft/2+k];
168 check_inv(in,out,nfft,isinverse);
169 } else {
170 clt_mdct_forward(cfg,in,out,window, nfft/2, shift, 1, arch);
171 check(in_copy,out,nfft,isinverse);
172 }
173 /*for (k=0;k<nfft;++k) printf("%d %d ", out[k].r, out[k].i);printf("\n");*/
174
175
176 free(in);
177 free(in_copy);
178 free(out);
179 free(window);
180#ifdef CUSTOM_MODES
181 clt_mdct_clear(&_cfg, arch);
182#endif
183}
184
185int main(int argc,char ** argv)
186{
187 ALLOC_STACK;
188 int arch = opus_select_arch();
189
190 if (argc>1) {
191 int k;
192 for (k=1;k<argc;++k) {
193 test1d(atoi(argv[k]),0,arch);
194 test1d(atoi(argv[k]),1,arch);
195 }
196 }else{
197 test1d(32,0,arch);
198 test1d(32,1,arch);
199 test1d(256,0,arch);
200 test1d(256,1,arch);
201 test1d(512,0,arch);
202 test1d(512,1,arch);
203 test1d(1024,0,arch);
204 test1d(1024,1,arch);
205 test1d(2048,0,arch);
206 test1d(2048,1,arch);
207#ifndef RADIX_TWO_ONLY
208 test1d(36,0,arch);
209 test1d(36,1,arch);
210 test1d(40,0,arch);
211 test1d(40,1,arch);
212 test1d(60,0,arch);
213 test1d(60,1,arch);
214 test1d(120,0,arch);
215 test1d(120,1,arch);
216 test1d(240,0,arch);
217 test1d(240,1,arch);
218 test1d(480,0,arch);
219 test1d(480,1,arch);
220 test1d(960,0,arch);
221 test1d(960,1,arch);
222 test1d(1920,0,arch);
223 test1d(1920,1,arch);
224#endif
225 }
226 return ret;
227}
diff --git a/lib/rbcodec/codecs/libopus/celt/tests/test_unit_rotation.c b/lib/rbcodec/codecs/libopus/celt/tests/test_unit_rotation.c
new file mode 100644
index 0000000000..8a31b3f2b1
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/tests/test_unit_rotation.c
@@ -0,0 +1,86 @@
1/* Copyright (c) 2008-2011 Xiph.Org Foundation
2 Written by Jean-Marc Valin */
3/*
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7
8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10
11 - Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*/
27
28#ifdef HAVE_CONFIG_H
29#include "config.h"
30#endif
31
32#ifndef CUSTOM_MODES
33#define CUSTOM_MODES
34#endif
35
36#include <stdio.h>
37#include <stdlib.h>
38#include "vq.h"
39#include "bands.h"
40#include "stack_alloc.h"
41#include <math.h>
42
43
44#define MAX_SIZE 100
45
46int ret=0;
47void test_rotation(int N, int K)
48{
49 int i;
50 double err = 0, ener = 0, snr, snr0;
51 opus_val16 x0[MAX_SIZE];
52 opus_val16 x1[MAX_SIZE];
53 for (i=0;i<N;i++)
54 x1[i] = x0[i] = rand()%32767-16384;
55 exp_rotation(x1, N, 1, 1, K, SPREAD_NORMAL);
56 for (i=0;i<N;i++)
57 {
58 err += (x0[i]-(double)x1[i])*(x0[i]-(double)x1[i]);
59 ener += x0[i]*(double)x0[i];
60 }
61 snr0 = 20*log10(ener/err);
62 err = ener = 0;
63 exp_rotation(x1, N, -1, 1, K, SPREAD_NORMAL);
64 for (i=0;i<N;i++)
65 {
66 err += (x0[i]-(double)x1[i])*(x0[i]-(double)x1[i]);
67 ener += x0[i]*(double)x0[i];
68 }
69 snr = 20*log10(ener/err);
70 printf ("SNR for size %d (%d pulses) is %f (was %f without inverse)\n", N, K, snr, snr0);
71 if (snr < 60 || snr0 > 20)
72 {
73 fprintf(stderr, "FAIL!\n");
74 ret = 1;
75 }
76}
77
78int main(void)
79{
80 ALLOC_STACK;
81 test_rotation(15, 3);
82 test_rotation(23, 5);
83 test_rotation(50, 3);
84 test_rotation(80, 1);
85 return ret;
86}
diff --git a/lib/rbcodec/codecs/libopus/celt/tests/test_unit_types.c b/lib/rbcodec/codecs/libopus/celt/tests/test_unit_types.c
new file mode 100644
index 0000000000..67a0fb8ed3
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/tests/test_unit_types.c
@@ -0,0 +1,50 @@
1/* Copyright (c) 2008-2011 Xiph.Org Foundation
2 Written by Jean-Marc Valin */
3/*
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7
8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10
11 - Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*/
27
28#ifdef HAVE_CONFIG_H
29#include "config.h"
30#endif
31
32#include "opus_types.h"
33#include <stdio.h>
34
35int main(void)
36{
37 opus_int16 i = 1;
38 i <<= 14;
39 if (i>>14 != 1)
40 {
41 fprintf(stderr, "opus_int16 isn't 16 bits\n");
42 return 1;
43 }
44 if (sizeof(opus_int16)*2 != sizeof(opus_int32))
45 {
46 fprintf(stderr, "16*2 != 32\n");
47 return 1;
48 }
49 return 0;
50}
diff --git a/lib/rbcodec/codecs/libopus/celt/vq.c b/lib/rbcodec/codecs/libopus/celt/vq.c
index b047b22774..a6b5552d69 100644
--- a/lib/rbcodec/codecs/libopus/celt/vq.c
+++ b/lib/rbcodec/codecs/libopus/celt/vq.c
@@ -39,10 +39,6 @@
39#include "rate.h" 39#include "rate.h"
40#include "pitch.h" 40#include "pitch.h"
41 41
42#if defined(MIPSr1_ASM)
43#include "mips/vq_mipsr1.h"
44#endif
45
46#ifndef OVERRIDE_vq_exp_rotation1 42#ifndef OVERRIDE_vq_exp_rotation1
47static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s) 43static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)
48{ 44{
@@ -71,7 +67,7 @@ static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_
71} 67}
72#endif /* OVERRIDE_vq_exp_rotation1 */ 68#endif /* OVERRIDE_vq_exp_rotation1 */
73 69
74static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread) 70void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread)
75{ 71{
76 static const int SPREAD_FACTOR[3]={15,10,5}; 72 static const int SPREAD_FACTOR[3]={15,10,5};
77 int i; 73 int i;
@@ -162,42 +158,27 @@ static unsigned extract_collapse_mask(int *iy, int N, int B)
162 return collapse_mask; 158 return collapse_mask;
163} 159}
164 160
165unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc 161opus_val16 op_pvq_search_c(celt_norm *X, int *iy, int K, int N, int arch)
166#ifdef RESYNTH
167 , opus_val16 gain
168#endif
169 )
170{ 162{
171 VARDECL(celt_norm, y); 163 VARDECL(celt_norm, y);
172 VARDECL(int, iy); 164 VARDECL(int, signx);
173 VARDECL(opus_val16, signx);
174 int i, j; 165 int i, j;
175 opus_val16 s;
176 int pulsesLeft; 166 int pulsesLeft;
177 opus_val32 sum; 167 opus_val32 sum;
178 opus_val32 xy; 168 opus_val32 xy;
179 opus_val16 yy; 169 opus_val16 yy;
180 unsigned collapse_mask;
181 SAVE_STACK; 170 SAVE_STACK;
182 171
183 celt_assert2(K>0, "alg_quant() needs at least one pulse"); 172 (void)arch;
184 celt_assert2(N>1, "alg_quant() needs at least two dimensions");
185
186 ALLOC(y, N, celt_norm); 173 ALLOC(y, N, celt_norm);
187 ALLOC(iy, N, int); 174 ALLOC(signx, N, int);
188 ALLOC(signx, N, opus_val16);
189
190 exp_rotation(X, N, 1, B, K, spread);
191 175
192 /* Get rid of the sign */ 176 /* Get rid of the sign */
193 sum = 0; 177 sum = 0;
194 j=0; do { 178 j=0; do {
195 if (X[j]>0) 179 signx[j] = X[j]<0;
196 signx[j]=1; 180 /* OPT: Make sure the compiler doesn't use a branch on ABS16(). */
197 else { 181 X[j] = ABS16(X[j]);
198 signx[j]=-1;
199 X[j]=-X[j];
200 }
201 iy[j] = 0; 182 iy[j] = 0;
202 y[j] = 0; 183 y[j] = 0;
203 } while (++j<N); 184 } while (++j<N);
@@ -229,7 +210,12 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
229 while (++j<N); 210 while (++j<N);
230 sum = QCONST16(1.f,14); 211 sum = QCONST16(1.f,14);
231 } 212 }
232 rcp = EXTRACT16(MULT16_32_Q16(K-1, celt_rcp(sum))); 213#ifdef FIXED_POINT
214 rcp = EXTRACT16(MULT16_32_Q16(K, celt_rcp(sum)));
215#else
216 /* Using K+e with e < 1 guarantees we cannot get more than K pulses. */
217 rcp = EXTRACT16(MULT16_32_Q16(K+0.8f, celt_rcp(sum)));
218#endif
233 j=0; do { 219 j=0; do {
234#ifdef FIXED_POINT 220#ifdef FIXED_POINT
235 /* It's really important to round *towards zero* here */ 221 /* It's really important to round *towards zero* here */
@@ -244,12 +230,12 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
244 pulsesLeft -= iy[j]; 230 pulsesLeft -= iy[j];
245 } while (++j<N); 231 } while (++j<N);
246 } 232 }
247 celt_assert2(pulsesLeft>=1, "Allocated too many pulses in the quick pass"); 233 celt_sig_assert(pulsesLeft>=0);
248 234
249 /* This should never happen, but just in case it does (e.g. on silence) 235 /* This should never happen, but just in case it does (e.g. on silence)
250 we fill the first bin with pulses. */ 236 we fill the first bin with pulses. */
251#ifdef FIXED_POINT_DEBUG 237#ifdef FIXED_POINT_DEBUG
252 celt_assert2(pulsesLeft<=N+3, "Not enough pulses in the quick pass"); 238 celt_sig_assert(pulsesLeft<=N+3);
253#endif 239#endif
254 if (pulsesLeft > N+3) 240 if (pulsesLeft > N+3)
255 { 241 {
@@ -260,12 +246,12 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
260 pulsesLeft=0; 246 pulsesLeft=0;
261 } 247 }
262 248
263 s = 1;
264 for (i=0;i<pulsesLeft;i++) 249 for (i=0;i<pulsesLeft;i++)
265 { 250 {
251 opus_val16 Rxy, Ryy;
266 int best_id; 252 int best_id;
267 opus_val32 best_num = -VERY_LARGE16; 253 opus_val32 best_num;
268 opus_val16 best_den = 0; 254 opus_val16 best_den;
269#ifdef FIXED_POINT 255#ifdef FIXED_POINT
270 int rshift; 256 int rshift;
271#endif 257#endif
@@ -275,10 +261,23 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
275 best_id = 0; 261 best_id = 0;
276 /* The squared magnitude term gets added anyway, so we might as well 262 /* The squared magnitude term gets added anyway, so we might as well
277 add it outside the loop */ 263 add it outside the loop */
278 yy = ADD32(yy, 1); 264 yy = ADD16(yy, 1);
279 j=0; 265
266 /* Calculations for position 0 are out of the loop, in part to reduce
267 mispredicted branches (since the if condition is usually false)
268 in the loop. */
269 /* Temporary sums of the new pulse(s) */
270 Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[0])),rshift));
271 /* We're multiplying y[j] by two so we don't have to do it here */
272 Ryy = ADD16(yy, y[0]);
273
274 /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that
275 Rxy is positive because the sign is pre-computed) */
276 Rxy = MULT16_16_Q15(Rxy,Rxy);
277 best_den = Ryy;
278 best_num = Rxy;
279 j=1;
280 do { 280 do {
281 opus_val16 Rxy, Ryy;
282 /* Temporary sums of the new pulse(s) */ 281 /* Temporary sums of the new pulse(s) */
283 Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift)); 282 Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift));
284 /* We're multiplying y[j] by two so we don't have to do it here */ 283 /* We're multiplying y[j] by two so we don't have to do it here */
@@ -289,8 +288,11 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
289 Rxy = MULT16_16_Q15(Rxy,Rxy); 288 Rxy = MULT16_16_Q15(Rxy,Rxy);
290 /* The idea is to check for num/den >= best_num/best_den, but that way 289 /* The idea is to check for num/den >= best_num/best_den, but that way
291 we can do it without any division */ 290 we can do it without any division */
292 /* OPT: Make sure to use conditional moves here */ 291 /* OPT: It's not clear whether a cmov is faster than a branch here
293 if (MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num)) 292 since the condition is more often false than true and using
293 a cmov introduces data dependencies across iterations. The optimal
294 choice may be architecture-dependent. */
295 if (opus_unlikely(MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num)))
294 { 296 {
295 best_den = Ryy; 297 best_den = Ryy;
296 best_num = Rxy; 298 best_num = Rxy;
@@ -305,23 +307,47 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
305 307
306 /* Only now that we've made the final choice, update y/iy */ 308 /* Only now that we've made the final choice, update y/iy */
307 /* Multiplying y[j] by 2 so we don't have to do it everywhere else */ 309 /* Multiplying y[j] by 2 so we don't have to do it everywhere else */
308 y[best_id] += 2*s; 310 y[best_id] += 2;
309 iy[best_id]++; 311 iy[best_id]++;
310 } 312 }
311 313
312 /* Put the original sign back */ 314 /* Put the original sign back */
313 j=0; 315 j=0;
314 do { 316 do {
315 X[j] = MULT16_16(signx[j],X[j]); 317 /*iy[j] = signx[j] ? -iy[j] : iy[j];*/
316 if (signx[j] < 0) 318 /* OPT: The is more likely to be compiled without a branch than the code above
317 iy[j] = -iy[j]; 319 but has the same performance otherwise. */
320 iy[j] = (iy[j]^-signx[j]) + signx[j];
318 } while (++j<N); 321 } while (++j<N);
322 RESTORE_STACK;
323 return yy;
324}
325
326unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc,
327 opus_val16 gain, int resynth, int arch)
328{
329 VARDECL(int, iy);
330 opus_val16 yy;
331 unsigned collapse_mask;
332 SAVE_STACK;
333
334 celt_assert2(K>0, "alg_quant() needs at least one pulse");
335 celt_assert2(N>1, "alg_quant() needs at least two dimensions");
336
337 /* Covers vectorization by up to 4. */
338 ALLOC(iy, N+3, int);
339
340 exp_rotation(X, N, 1, B, K, spread);
341
342 yy = op_pvq_search(X, iy, K, N, arch);
343
319 encode_pulses(iy, N, K, enc); 344 encode_pulses(iy, N, K, enc);
320 345
321#ifdef RESYNTH 346 if (resynth)
322 normalise_residual(iy, X, N, yy, gain); 347 {
323 exp_rotation(X, N, -1, B, K, spread); 348 normalise_residual(iy, X, N, yy, gain);
324#endif 349 exp_rotation(X, N, -1, B, K, spread);
350 }
325 351
326 collapse_mask = extract_collapse_mask(iy, N, B); 352 collapse_mask = extract_collapse_mask(iy, N, B);
327 RESTORE_STACK; 353 RESTORE_STACK;
@@ -350,7 +376,7 @@ unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
350} 376}
351 377
352#ifndef OVERRIDE_renormalise_vector 378#ifndef OVERRIDE_renormalise_vector
353void renormalise_vector(celt_norm *X, int N, opus_val16 gain) 379void renormalise_vector(celt_norm *X, int N, opus_val16 gain, int arch)
354{ 380{
355 int i; 381 int i;
356#ifdef FIXED_POINT 382#ifdef FIXED_POINT
@@ -360,7 +386,7 @@ void renormalise_vector(celt_norm *X, int N, opus_val16 gain)
360 opus_val16 g; 386 opus_val16 g;
361 opus_val32 t; 387 opus_val32 t;
362 celt_norm *xptr; 388 celt_norm *xptr;
363 E = EPSILON + celt_inner_prod(X, X, N); 389 E = EPSILON + celt_inner_prod(X, X, N, arch);
364#ifdef FIXED_POINT 390#ifdef FIXED_POINT
365 k = celt_ilog2(E)>>1; 391 k = celt_ilog2(E)>>1;
366#endif 392#endif
@@ -377,7 +403,7 @@ void renormalise_vector(celt_norm *X, int N, opus_val16 gain)
377} 403}
378#endif /* OVERRIDE_renormalise_vector */ 404#endif /* OVERRIDE_renormalise_vector */
379 405
380int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N) 406int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N, int arch)
381{ 407{
382 int i; 408 int i;
383 int itheta; 409 int itheta;
@@ -396,8 +422,8 @@ int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N)
396 Eside = MAC16_16(Eside, s, s); 422 Eside = MAC16_16(Eside, s, s);
397 } 423 }
398 } else { 424 } else {
399 Emid += celt_inner_prod(X, X, N); 425 Emid += celt_inner_prod(X, X, N, arch);
400 Eside += celt_inner_prod(Y, Y, N); 426 Eside += celt_inner_prod(Y, Y, N, arch);
401 } 427 }
402 mid = celt_sqrt(Emid); 428 mid = celt_sqrt(Emid);
403 side = celt_sqrt(Eside); 429 side = celt_sqrt(Eside);
@@ -405,7 +431,7 @@ int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N)
405 /* 0.63662 = 2/pi */ 431 /* 0.63662 = 2/pi */
406 itheta = MULT16_16_Q15(QCONST16(0.63662f,15),celt_atan2p(side, mid)); 432 itheta = MULT16_16_Q15(QCONST16(0.63662f,15),celt_atan2p(side, mid));
407#else 433#else
408 itheta = (int)floor(.5f+16384*0.63662f*atan2(side,mid)); 434 itheta = (int)floor(.5f+16384*0.63662f*fast_atan2f(side,mid));
409#endif 435#endif
410 436
411 return itheta; 437 return itheta;
diff --git a/lib/rbcodec/codecs/libopus/celt/vq.h b/lib/rbcodec/codecs/libopus/celt/vq.h
index 84115cbcbb..0dfe6af058 100644
--- a/lib/rbcodec/codecs/libopus/celt/vq.h
+++ b/lib/rbcodec/codecs/libopus/celt/vq.h
@@ -37,6 +37,23 @@
37#include "entdec.h" 37#include "entdec.h"
38#include "modes.h" 38#include "modes.h"
39 39
40#if (defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(FIXED_POINT))
41#include "x86/vq_sse.h"
42#endif
43
44#if defined(MIPSr1_ASM)
45#include "mips/vq_mipsr1.h"
46#endif
47
48void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread);
49
50opus_val16 op_pvq_search_c(celt_norm *X, int *iy, int K, int N, int arch);
51
52#if !defined(OVERRIDE_OP_PVQ_SEARCH)
53#define op_pvq_search(x, iy, K, N, arch) \
54 (op_pvq_search_c(x, iy, K, N, arch))
55#endif
56
40/** Algebraic pulse-vector quantiser. The signal x is replaced by the sum of 57/** Algebraic pulse-vector quantiser. The signal x is replaced by the sum of
41 * the pitch and a combination of pulses such that its norm is still equal 58 * the pitch and a combination of pulses such that its norm is still equal
42 * to 1. This is the function that will typically require the most CPU. 59 * to 1. This is the function that will typically require the most CPU.
@@ -46,12 +63,8 @@
46 * @param enc Entropy encoder state 63 * @param enc Entropy encoder state
47 * @ret A mask indicating which blocks in the band received pulses 64 * @ret A mask indicating which blocks in the band received pulses
48*/ 65*/
49unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, 66unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc,
50 ec_enc *enc 67 opus_val16 gain, int resynth, int arch);
51#ifdef RESYNTH
52 , opus_val16 gain
53#endif
54 );
55 68
56/** Algebraic pulse decoder 69/** Algebraic pulse decoder
57 * @param X Decoded normalised spectrum (returned) 70 * @param X Decoded normalised spectrum (returned)
@@ -63,8 +76,8 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B,
63unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B, 76unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
64 ec_dec *dec, opus_val16 gain); 77 ec_dec *dec, opus_val16 gain);
65 78
66void renormalise_vector(celt_norm *X, int N, opus_val16 gain); 79void renormalise_vector(celt_norm *X, int N, opus_val16 gain, int arch);
67 80
68int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N); 81int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N, int arch);
69 82
70#endif /* VQ_H */ 83#endif /* VQ_H */
diff --git a/lib/rbcodec/codecs/libopus/celt/x86/celt_lpc_sse.h b/lib/rbcodec/codecs/libopus/celt/x86/celt_lpc_sse.h
new file mode 100644
index 0000000000..7d1ecf7533
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/x86/celt_lpc_sse.h
@@ -0,0 +1,66 @@
1/* Copyright (c) 2014, Cisco Systems, INC
2 Written by XiangMingZhu WeiZhou MinPeng YanWang
3
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7
8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10
11 - Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*/
27
28#ifndef CELT_LPC_SSE_H
29#define CELT_LPC_SSE_H
30
31#ifdef HAVE_CONFIG_H
32#include "config.h"
33#endif
34
35#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT)
36#define OVERRIDE_CELT_FIR
37
38void celt_fir_sse4_1(
39 const opus_val16 *x,
40 const opus_val16 *num,
41 opus_val16 *y,
42 int N,
43 int ord,
44 int arch);
45
46#if defined(OPUS_X86_PRESUME_SSE4_1)
47#define celt_fir(x, num, y, N, ord, arch) \
48 ((void)arch, celt_fir_sse4_1(x, num, y, N, ord, arch))
49
50#else
51
52extern void (*const CELT_FIR_IMPL[OPUS_ARCHMASK + 1])(
53 const opus_val16 *x,
54 const opus_val16 *num,
55 opus_val16 *y,
56 int N,
57 int ord,
58 int arch);
59
60# define celt_fir(x, num, y, N, ord, arch) \
61 ((*CELT_FIR_IMPL[(arch) & OPUS_ARCHMASK])(x, num, y, N, ord, arch))
62
63#endif
64#endif
65
66#endif
diff --git a/lib/rbcodec/codecs/libopus/celt/x86/celt_lpc_sse4_1.c b/lib/rbcodec/codecs/libopus/celt/x86/celt_lpc_sse4_1.c
new file mode 100644
index 0000000000..5478568849
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/x86/celt_lpc_sse4_1.c
@@ -0,0 +1,89 @@
1/* Copyright (c) 2014, Cisco Systems, INC
2 Written by XiangMingZhu WeiZhou MinPeng YanWang
3
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7
8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10
11 - Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*/
27
28#ifdef HAVE_CONFIG_H
29#include "config.h"
30#endif
31
32#include <xmmintrin.h>
33#include <emmintrin.h>
34#include <smmintrin.h>
35#include "celt_lpc.h"
36#include "stack_alloc.h"
37#include "mathops.h"
38#include "pitch.h"
39#include "x86cpu.h"
40
41#if defined(FIXED_POINT)
42
43void celt_fir_sse4_1(const opus_val16 *x,
44 const opus_val16 *num,
45 opus_val16 *y,
46 int N,
47 int ord,
48 int arch)
49{
50 int i,j;
51 VARDECL(opus_val16, rnum);
52
53 __m128i vecNoA;
54 opus_int32 noA ;
55 SAVE_STACK;
56
57 ALLOC(rnum, ord, opus_val16);
58 for(i=0;i<ord;i++)
59 rnum[i] = num[ord-i-1];
60 noA = EXTEND32(1) << SIG_SHIFT >> 1;
61 vecNoA = _mm_set_epi32(noA, noA, noA, noA);
62
63 for (i=0;i<N-3;i+=4)
64 {
65 opus_val32 sums[4] = {0};
66 __m128i vecSum, vecX;
67
68 xcorr_kernel(rnum, x+i-ord, sums, ord, arch);
69
70 vecSum = _mm_loadu_si128((__m128i *)sums);
71 vecSum = _mm_add_epi32(vecSum, vecNoA);
72 vecSum = _mm_srai_epi32(vecSum, SIG_SHIFT);
73 vecX = OP_CVTEPI16_EPI32_M64(x + i);
74 vecSum = _mm_add_epi32(vecSum, vecX);
75 vecSum = _mm_packs_epi32(vecSum, vecSum);
76 _mm_storel_epi64((__m128i *)(y + i), vecSum);
77 }
78 for (;i<N;i++)
79 {
80 opus_val32 sum = 0;
81 for (j=0;j<ord;j++)
82 sum = MAC16_16(sum, rnum[j], x[i+j-ord]);
83 y[i] = SATURATE16(ADD32(EXTEND32(x[i]), PSHR32(sum, SIG_SHIFT)));
84 }
85
86 RESTORE_STACK;
87}
88
89#endif
diff --git a/lib/rbcodec/codecs/libopus/celt/x86/pitch_sse.c b/lib/rbcodec/codecs/libopus/celt/x86/pitch_sse.c
new file mode 100644
index 0000000000..20e73126b6
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/x86/pitch_sse.c
@@ -0,0 +1,185 @@
1/* Copyright (c) 2014, Cisco Systems, INC
2 Written by XiangMingZhu WeiZhou MinPeng YanWang
3
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7
8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10
11 - Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*/
27
28#ifdef HAVE_CONFIG_H
29#include "config.h"
30#endif
31
32#include "macros.h"
33#include "celt_lpc.h"
34#include "stack_alloc.h"
35#include "mathops.h"
36#include "pitch.h"
37
38#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)
39
40#include <xmmintrin.h>
41#include "arch.h"
42
43void xcorr_kernel_sse(const opus_val16 *x, const opus_val16 *y, opus_val32 sum[4], int len)
44{
45 int j;
46 __m128 xsum1, xsum2;
47 xsum1 = _mm_loadu_ps(sum);
48 xsum2 = _mm_setzero_ps();
49
50 for (j = 0; j < len-3; j += 4)
51 {
52 __m128 x0 = _mm_loadu_ps(x+j);
53 __m128 yj = _mm_loadu_ps(y+j);
54 __m128 y3 = _mm_loadu_ps(y+j+3);
55
56 xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0x00),yj));
57 xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0x55),
58 _mm_shuffle_ps(yj,y3,0x49)));
59 xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0xaa),
60 _mm_shuffle_ps(yj,y3,0x9e)));
61 xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0xff),y3));
62 }
63 if (j < len)
64 {
65 xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j)));
66 if (++j < len)
67 {
68 xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j)));
69 if (++j < len)
70 {
71 xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j)));
72 }
73 }
74 }
75 _mm_storeu_ps(sum,_mm_add_ps(xsum1,xsum2));
76}
77
78
79void dual_inner_prod_sse(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
80 int N, opus_val32 *xy1, opus_val32 *xy2)
81{
82 int i;
83 __m128 xsum1, xsum2;
84 xsum1 = _mm_setzero_ps();
85 xsum2 = _mm_setzero_ps();
86 for (i=0;i<N-3;i+=4)
87 {
88 __m128 xi = _mm_loadu_ps(x+i);
89 __m128 y1i = _mm_loadu_ps(y01+i);
90 __m128 y2i = _mm_loadu_ps(y02+i);
91 xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(xi, y1i));
92 xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(xi, y2i));
93 }
94 /* Horizontal sum */
95 xsum1 = _mm_add_ps(xsum1, _mm_movehl_ps(xsum1, xsum1));
96 xsum1 = _mm_add_ss(xsum1, _mm_shuffle_ps(xsum1, xsum1, 0x55));
97 _mm_store_ss(xy1, xsum1);
98 xsum2 = _mm_add_ps(xsum2, _mm_movehl_ps(xsum2, xsum2));
99 xsum2 = _mm_add_ss(xsum2, _mm_shuffle_ps(xsum2, xsum2, 0x55));
100 _mm_store_ss(xy2, xsum2);
101 for (;i<N;i++)
102 {
103 *xy1 = MAC16_16(*xy1, x[i], y01[i]);
104 *xy2 = MAC16_16(*xy2, x[i], y02[i]);
105 }
106}
107
108opus_val32 celt_inner_prod_sse(const opus_val16 *x, const opus_val16 *y,
109 int N)
110{
111 int i;
112 float xy;
113 __m128 sum;
114 sum = _mm_setzero_ps();
115 /* FIXME: We should probably go 8-way and use 2 sums. */
116 for (i=0;i<N-3;i+=4)
117 {
118 __m128 xi = _mm_loadu_ps(x+i);
119 __m128 yi = _mm_loadu_ps(y+i);
120 sum = _mm_add_ps(sum,_mm_mul_ps(xi, yi));
121 }
122 /* Horizontal sum */
123 sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
124 sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
125 _mm_store_ss(&xy, sum);
126 for (;i<N;i++)
127 {
128 xy = MAC16_16(xy, x[i], y[i]);
129 }
130 return xy;
131}
132
133void comb_filter_const_sse(opus_val32 *y, opus_val32 *x, int T, int N,
134 opus_val16 g10, opus_val16 g11, opus_val16 g12)
135{
136 int i;
137 __m128 x0v;
138 __m128 g10v, g11v, g12v;
139 g10v = _mm_load1_ps(&g10);
140 g11v = _mm_load1_ps(&g11);
141 g12v = _mm_load1_ps(&g12);
142 x0v = _mm_loadu_ps(&x[-T-2]);
143 for (i=0;i<N-3;i+=4)
144 {
145 __m128 yi, yi2, x1v, x2v, x3v, x4v;
146 const opus_val32 *xp = &x[i-T-2];
147 yi = _mm_loadu_ps(x+i);
148 x4v = _mm_loadu_ps(xp+4);
149#if 0
150 /* Slower version with all loads */
151 x1v = _mm_loadu_ps(xp+1);
152 x2v = _mm_loadu_ps(xp+2);
153 x3v = _mm_loadu_ps(xp+3);
154#else
155 x2v = _mm_shuffle_ps(x0v, x4v, 0x4e);
156 x1v = _mm_shuffle_ps(x0v, x2v, 0x99);
157 x3v = _mm_shuffle_ps(x2v, x4v, 0x99);
158#endif
159
160 yi = _mm_add_ps(yi, _mm_mul_ps(g10v,x2v));
161#if 0 /* Set to 1 to make it bit-exact with the non-SSE version */
162 yi = _mm_add_ps(yi, _mm_mul_ps(g11v,_mm_add_ps(x3v,x1v)));
163 yi = _mm_add_ps(yi, _mm_mul_ps(g12v,_mm_add_ps(x4v,x0v)));
164#else
165 /* Use partial sums */
166 yi2 = _mm_add_ps(_mm_mul_ps(g11v,_mm_add_ps(x3v,x1v)),
167 _mm_mul_ps(g12v,_mm_add_ps(x4v,x0v)));
168 yi = _mm_add_ps(yi, yi2);
169#endif
170 x0v=x4v;
171 _mm_storeu_ps(y+i, yi);
172 }
173#ifdef CUSTOM_MODES
174 for (;i<N;i++)
175 {
176 y[i] = x[i]
177 + MULT16_32_Q15(g10,x[i-T])
178 + MULT16_32_Q15(g11,ADD32(x[i-T+1],x[i-T-1]))
179 + MULT16_32_Q15(g12,ADD32(x[i-T+2],x[i-T-2]));
180 }
181#endif
182}
183
184
185#endif
diff --git a/lib/rbcodec/codecs/libopus/celt/x86/pitch_sse.h b/lib/rbcodec/codecs/libopus/celt/x86/pitch_sse.h
new file mode 100644
index 0000000000..e5f87ab51a
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/x86/pitch_sse.h
@@ -0,0 +1,192 @@
1/* Copyright (c) 2013 Jean-Marc Valin and John Ridges
2 Copyright (c) 2014, Cisco Systems, INC MingXiang WeiZhou MinPeng YanWang*/
3/**
4 @file pitch_sse.h
5 @brief Pitch analysis
6 */
7
8/*
9 Redistribution and use in source and binary forms, with or without
10 modification, are permitted provided that the following conditions
11 are met:
12
13 - Redistributions of source code must retain the above copyright
14 notice, this list of conditions and the following disclaimer.
15
16 - Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
24 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
25 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31*/
32
33#ifndef PITCH_SSE_H
34#define PITCH_SSE_H
35
36#if defined(HAVE_CONFIG_H)
37#include "config.h"
38#endif
39
40#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT)
41void xcorr_kernel_sse4_1(
42 const opus_int16 *x,
43 const opus_int16 *y,
44 opus_val32 sum[4],
45 int len);
46#endif
47
48#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)
49void xcorr_kernel_sse(
50 const opus_val16 *x,
51 const opus_val16 *y,
52 opus_val32 sum[4],
53 int len);
54#endif
55
56#if defined(OPUS_X86_PRESUME_SSE4_1) && defined(FIXED_POINT)
57#define OVERRIDE_XCORR_KERNEL
58#define xcorr_kernel(x, y, sum, len, arch) \
59 ((void)arch, xcorr_kernel_sse4_1(x, y, sum, len))
60
61#elif defined(OPUS_X86_PRESUME_SSE) && !defined(FIXED_POINT)
62#define OVERRIDE_XCORR_KERNEL
63#define xcorr_kernel(x, y, sum, len, arch) \
64 ((void)arch, xcorr_kernel_sse(x, y, sum, len))
65
66#elif (defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT)) || (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT))
67
68extern void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
69 const opus_val16 *x,
70 const opus_val16 *y,
71 opus_val32 sum[4],
72 int len);
73
74#define OVERRIDE_XCORR_KERNEL
75#define xcorr_kernel(x, y, sum, len, arch) \
76 ((*XCORR_KERNEL_IMPL[(arch) & OPUS_ARCHMASK])(x, y, sum, len))
77
78#endif
79
80#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT)
81opus_val32 celt_inner_prod_sse4_1(
82 const opus_int16 *x,
83 const opus_int16 *y,
84 int N);
85#endif
86
87#if defined(OPUS_X86_MAY_HAVE_SSE2) && defined(FIXED_POINT)
88opus_val32 celt_inner_prod_sse2(
89 const opus_int16 *x,
90 const opus_int16 *y,
91 int N);
92#endif
93
94#if defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(FIXED_POINT)
95opus_val32 celt_inner_prod_sse(
96 const opus_val16 *x,
97 const opus_val16 *y,
98 int N);
99#endif
100
101
102#if defined(OPUS_X86_PRESUME_SSE4_1) && defined(FIXED_POINT)
103#define OVERRIDE_CELT_INNER_PROD
104#define celt_inner_prod(x, y, N, arch) \
105 ((void)arch, celt_inner_prod_sse4_1(x, y, N))
106
107#elif defined(OPUS_X86_PRESUME_SSE2) && defined(FIXED_POINT) && !defined(OPUS_X86_MAY_HAVE_SSE4_1)
108#define OVERRIDE_CELT_INNER_PROD
109#define celt_inner_prod(x, y, N, arch) \
110 ((void)arch, celt_inner_prod_sse2(x, y, N))
111
112#elif defined(OPUS_X86_PRESUME_SSE) && !defined(FIXED_POINT)
113#define OVERRIDE_CELT_INNER_PROD
114#define celt_inner_prod(x, y, N, arch) \
115 ((void)arch, celt_inner_prod_sse(x, y, N))
116
117
118#elif ((defined(OPUS_X86_MAY_HAVE_SSE4_1) || defined(OPUS_X86_MAY_HAVE_SSE2)) && defined(FIXED_POINT)) || \
119 (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT))
120
121extern opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
122 const opus_val16 *x,
123 const opus_val16 *y,
124 int N);
125
126#define OVERRIDE_CELT_INNER_PROD
127#define celt_inner_prod(x, y, N, arch) \
128 ((*CELT_INNER_PROD_IMPL[(arch) & OPUS_ARCHMASK])(x, y, N))
129
130#endif
131
132#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)
133
134#define OVERRIDE_DUAL_INNER_PROD
135#define OVERRIDE_COMB_FILTER_CONST
136
137#undef dual_inner_prod
138#undef comb_filter_const
139
140void dual_inner_prod_sse(const opus_val16 *x,
141 const opus_val16 *y01,
142 const opus_val16 *y02,
143 int N,
144 opus_val32 *xy1,
145 opus_val32 *xy2);
146
147void comb_filter_const_sse(opus_val32 *y,
148 opus_val32 *x,
149 int T,
150 int N,
151 opus_val16 g10,
152 opus_val16 g11,
153 opus_val16 g12);
154
155
156#if defined(OPUS_X86_PRESUME_SSE)
157# define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) \
158 ((void)(arch),dual_inner_prod_sse(x, y01, y02, N, xy1, xy2))
159
160# define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \
161 ((void)(arch),comb_filter_const_sse(y, x, T, N, g10, g11, g12))
162#else
163
164extern void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
165 const opus_val16 *x,
166 const opus_val16 *y01,
167 const opus_val16 *y02,
168 int N,
169 opus_val32 *xy1,
170 opus_val32 *xy2);
171
172#define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) \
173 ((*DUAL_INNER_PROD_IMPL[(arch) & OPUS_ARCHMASK])(x, y01, y02, N, xy1, xy2))
174
175extern void (*const COMB_FILTER_CONST_IMPL[OPUS_ARCHMASK + 1])(
176 opus_val32 *y,
177 opus_val32 *x,
178 int T,
179 int N,
180 opus_val16 g10,
181 opus_val16 g11,
182 opus_val16 g12);
183
184#define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \
185 ((*COMB_FILTER_CONST_IMPL[(arch) & OPUS_ARCHMASK])(y, x, T, N, g10, g11, g12))
186
187#define NON_STATIC_COMB_FILTER_CONST_C
188
189#endif
190#endif
191
192#endif
diff --git a/lib/rbcodec/codecs/libopus/celt/x86/pitch_sse2.c b/lib/rbcodec/codecs/libopus/celt/x86/pitch_sse2.c
new file mode 100644
index 0000000000..a0e7d1beaf
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/x86/pitch_sse2.c
@@ -0,0 +1,95 @@
1/* Copyright (c) 2014, Cisco Systems, INC
2 Written by XiangMingZhu WeiZhou MinPeng YanWang
3
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7
8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10
11 - Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*/
27
28#ifdef HAVE_CONFIG_H
29#include "config.h"
30#endif
31
32#include <xmmintrin.h>
33#include <emmintrin.h>
34
35#include "macros.h"
36#include "celt_lpc.h"
37#include "stack_alloc.h"
38#include "mathops.h"
39#include "pitch.h"
40
41#if defined(OPUS_X86_MAY_HAVE_SSE2) && defined(FIXED_POINT)
42opus_val32 celt_inner_prod_sse2(const opus_val16 *x, const opus_val16 *y,
43 int N)
44{
45 opus_int i, dataSize16;
46 opus_int32 sum;
47
48 __m128i inVec1_76543210, inVec1_FEDCBA98, acc1;
49 __m128i inVec2_76543210, inVec2_FEDCBA98, acc2;
50
51 sum = 0;
52 dataSize16 = N & ~15;
53
54 acc1 = _mm_setzero_si128();
55 acc2 = _mm_setzero_si128();
56
57 for (i=0;i<dataSize16;i+=16)
58 {
59 inVec1_76543210 = _mm_loadu_si128((__m128i *)(&x[i + 0]));
60 inVec2_76543210 = _mm_loadu_si128((__m128i *)(&y[i + 0]));
61
62 inVec1_FEDCBA98 = _mm_loadu_si128((__m128i *)(&x[i + 8]));
63 inVec2_FEDCBA98 = _mm_loadu_si128((__m128i *)(&y[i + 8]));
64
65 inVec1_76543210 = _mm_madd_epi16(inVec1_76543210, inVec2_76543210);
66 inVec1_FEDCBA98 = _mm_madd_epi16(inVec1_FEDCBA98, inVec2_FEDCBA98);
67
68 acc1 = _mm_add_epi32(acc1, inVec1_76543210);
69 acc2 = _mm_add_epi32(acc2, inVec1_FEDCBA98);
70 }
71
72 acc1 = _mm_add_epi32( acc1, acc2 );
73
74 if (N - i >= 8)
75 {
76 inVec1_76543210 = _mm_loadu_si128((__m128i *)(&x[i + 0]));
77 inVec2_76543210 = _mm_loadu_si128((__m128i *)(&y[i + 0]));
78
79 inVec1_76543210 = _mm_madd_epi16(inVec1_76543210, inVec2_76543210);
80
81 acc1 = _mm_add_epi32(acc1, inVec1_76543210);
82 i += 8;
83 }
84
85 acc1 = _mm_add_epi32(acc1, _mm_unpackhi_epi64( acc1, acc1));
86 acc1 = _mm_add_epi32(acc1, _mm_shufflelo_epi16( acc1, 0x0E));
87 sum += _mm_cvtsi128_si32(acc1);
88
89 for (;i<N;i++) {
90 sum = silk_SMLABB(sum, x[i], y[i]);
91 }
92
93 return sum;
94}
95#endif
diff --git a/lib/rbcodec/codecs/libopus/celt/x86/pitch_sse4_1.c b/lib/rbcodec/codecs/libopus/celt/x86/pitch_sse4_1.c
new file mode 100644
index 0000000000..a092c68b24
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/x86/pitch_sse4_1.c
@@ -0,0 +1,195 @@
1/* Copyright (c) 2014, Cisco Systems, INC
2 Written by XiangMingZhu WeiZhou MinPeng YanWang
3
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7
8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10
11 - Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*/
27
28#ifdef HAVE_CONFIG_H
29#include "config.h"
30#endif
31
32#include <xmmintrin.h>
33#include <emmintrin.h>
34
35#include "macros.h"
36#include "celt_lpc.h"
37#include "stack_alloc.h"
38#include "mathops.h"
39#include "pitch.h"
40
41#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && defined(FIXED_POINT)
42#include <smmintrin.h>
43#include "x86cpu.h"
44
45opus_val32 celt_inner_prod_sse4_1(const opus_val16 *x, const opus_val16 *y,
46 int N)
47{
48 opus_int i, dataSize16;
49 opus_int32 sum;
50 __m128i inVec1_76543210, inVec1_FEDCBA98, acc1;
51 __m128i inVec2_76543210, inVec2_FEDCBA98, acc2;
52 __m128i inVec1_3210, inVec2_3210;
53
54 sum = 0;
55 dataSize16 = N & ~15;
56
57 acc1 = _mm_setzero_si128();
58 acc2 = _mm_setzero_si128();
59
60 for (i=0;i<dataSize16;i+=16) {
61 inVec1_76543210 = _mm_loadu_si128((__m128i *)(&x[i + 0]));
62 inVec2_76543210 = _mm_loadu_si128((__m128i *)(&y[i + 0]));
63
64 inVec1_FEDCBA98 = _mm_loadu_si128((__m128i *)(&x[i + 8]));
65 inVec2_FEDCBA98 = _mm_loadu_si128((__m128i *)(&y[i + 8]));
66
67 inVec1_76543210 = _mm_madd_epi16(inVec1_76543210, inVec2_76543210);
68 inVec1_FEDCBA98 = _mm_madd_epi16(inVec1_FEDCBA98, inVec2_FEDCBA98);
69
70 acc1 = _mm_add_epi32(acc1, inVec1_76543210);
71 acc2 = _mm_add_epi32(acc2, inVec1_FEDCBA98);
72 }
73
74 acc1 = _mm_add_epi32(acc1, acc2);
75
76 if (N - i >= 8)
77 {
78 inVec1_76543210 = _mm_loadu_si128((__m128i *)(&x[i + 0]));
79 inVec2_76543210 = _mm_loadu_si128((__m128i *)(&y[i + 0]));
80
81 inVec1_76543210 = _mm_madd_epi16(inVec1_76543210, inVec2_76543210);
82
83 acc1 = _mm_add_epi32(acc1, inVec1_76543210);
84 i += 8;
85 }
86
87 if (N - i >= 4)
88 {
89 inVec1_3210 = OP_CVTEPI16_EPI32_M64(&x[i + 0]);
90 inVec2_3210 = OP_CVTEPI16_EPI32_M64(&y[i + 0]);
91
92 inVec1_3210 = _mm_mullo_epi32(inVec1_3210, inVec2_3210);
93
94 acc1 = _mm_add_epi32(acc1, inVec1_3210);
95 i += 4;
96 }
97
98 acc1 = _mm_add_epi32(acc1, _mm_unpackhi_epi64(acc1, acc1));
99 acc1 = _mm_add_epi32(acc1, _mm_shufflelo_epi16(acc1, 0x0E));
100
101 sum += _mm_cvtsi128_si32(acc1);
102
103 for (;i<N;i++)
104 {
105 sum = silk_SMLABB(sum, x[i], y[i]);
106 }
107
108 return sum;
109}
110
111void xcorr_kernel_sse4_1(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[ 4 ], int len)
112{
113 int j;
114
115 __m128i vecX, vecX0, vecX1, vecX2, vecX3;
116 __m128i vecY0, vecY1, vecY2, vecY3;
117 __m128i sum0, sum1, sum2, sum3, vecSum;
118 __m128i initSum;
119
120 celt_assert(len >= 3);
121
122 sum0 = _mm_setzero_si128();
123 sum1 = _mm_setzero_si128();
124 sum2 = _mm_setzero_si128();
125 sum3 = _mm_setzero_si128();
126
127 for (j=0;j<(len-7);j+=8)
128 {
129 vecX = _mm_loadu_si128((__m128i *)(&x[j + 0]));
130 vecY0 = _mm_loadu_si128((__m128i *)(&y[j + 0]));
131 vecY1 = _mm_loadu_si128((__m128i *)(&y[j + 1]));
132 vecY2 = _mm_loadu_si128((__m128i *)(&y[j + 2]));
133 vecY3 = _mm_loadu_si128((__m128i *)(&y[j + 3]));
134
135 sum0 = _mm_add_epi32(sum0, _mm_madd_epi16(vecX, vecY0));
136 sum1 = _mm_add_epi32(sum1, _mm_madd_epi16(vecX, vecY1));
137 sum2 = _mm_add_epi32(sum2, _mm_madd_epi16(vecX, vecY2));
138 sum3 = _mm_add_epi32(sum3, _mm_madd_epi16(vecX, vecY3));
139 }
140
141 sum0 = _mm_add_epi32(sum0, _mm_unpackhi_epi64( sum0, sum0));
142 sum0 = _mm_add_epi32(sum0, _mm_shufflelo_epi16( sum0, 0x0E));
143
144 sum1 = _mm_add_epi32(sum1, _mm_unpackhi_epi64( sum1, sum1));
145 sum1 = _mm_add_epi32(sum1, _mm_shufflelo_epi16( sum1, 0x0E));
146
147 sum2 = _mm_add_epi32(sum2, _mm_unpackhi_epi64( sum2, sum2));
148 sum2 = _mm_add_epi32(sum2, _mm_shufflelo_epi16( sum2, 0x0E));
149
150 sum3 = _mm_add_epi32(sum3, _mm_unpackhi_epi64( sum3, sum3));
151 sum3 = _mm_add_epi32(sum3, _mm_shufflelo_epi16( sum3, 0x0E));
152
153 vecSum = _mm_unpacklo_epi64(_mm_unpacklo_epi32(sum0, sum1),
154 _mm_unpacklo_epi32(sum2, sum3));
155
156 for (;j<(len-3);j+=4)
157 {
158 vecX = OP_CVTEPI16_EPI32_M64(&x[j + 0]);
159 vecX0 = _mm_shuffle_epi32(vecX, 0x00);
160 vecX1 = _mm_shuffle_epi32(vecX, 0x55);
161 vecX2 = _mm_shuffle_epi32(vecX, 0xaa);
162 vecX3 = _mm_shuffle_epi32(vecX, 0xff);
163
164 vecY0 = OP_CVTEPI16_EPI32_M64(&y[j + 0]);
165 vecY1 = OP_CVTEPI16_EPI32_M64(&y[j + 1]);
166 vecY2 = OP_CVTEPI16_EPI32_M64(&y[j + 2]);
167 vecY3 = OP_CVTEPI16_EPI32_M64(&y[j + 3]);
168
169 sum0 = _mm_mullo_epi32(vecX0, vecY0);
170 sum1 = _mm_mullo_epi32(vecX1, vecY1);
171 sum2 = _mm_mullo_epi32(vecX2, vecY2);
172 sum3 = _mm_mullo_epi32(vecX3, vecY3);
173
174 sum0 = _mm_add_epi32(sum0, sum1);
175 sum2 = _mm_add_epi32(sum2, sum3);
176 vecSum = _mm_add_epi32(vecSum, sum0);
177 vecSum = _mm_add_epi32(vecSum, sum2);
178 }
179
180 for (;j<len;j++)
181 {
182 vecX = OP_CVTEPI16_EPI32_M64(&x[j + 0]);
183 vecX0 = _mm_shuffle_epi32(vecX, 0x00);
184
185 vecY0 = OP_CVTEPI16_EPI32_M64(&y[j + 0]);
186
187 sum0 = _mm_mullo_epi32(vecX0, vecY0);
188 vecSum = _mm_add_epi32(vecSum, sum0);
189 }
190
191 initSum = _mm_loadu_si128((__m128i *)(&sum[0]));
192 initSum = _mm_add_epi32(initSum, vecSum);
193 _mm_storeu_si128((__m128i *)sum, initSum);
194}
195#endif
diff --git a/lib/rbcodec/codecs/libopus/celt/x86/vq_sse.h b/lib/rbcodec/codecs/libopus/celt/x86/vq_sse.h
new file mode 100644
index 0000000000..b4efe8f249
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/x86/vq_sse.h
@@ -0,0 +1,50 @@
1/* Copyright (c) 2016 Jean-Marc Valin */
2/*
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
5 are met:
6
7 - Redistributions of source code must retain the above copyright
8 notice, this list of conditions and the following disclaimer.
9
10 - Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
18 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
22 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25*/
26
27#ifndef VQ_SSE_H
28#define VQ_SSE_H
29
30#if defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(FIXED_POINT)
31#define OVERRIDE_OP_PVQ_SEARCH
32
33opus_val16 op_pvq_search_sse2(celt_norm *_X, int *iy, int K, int N, int arch);
34
35#if defined(OPUS_X86_PRESUME_SSE2)
36#define op_pvq_search(x, iy, K, N, arch) \
37 (op_pvq_search_sse2(x, iy, K, N, arch))
38
39#else
40
41extern opus_val16 (*const OP_PVQ_SEARCH_IMPL[OPUS_ARCHMASK + 1])(
42 celt_norm *_X, int *iy, int K, int N, int arch);
43
44# define op_pvq_search(X, iy, K, N, arch) \
45 ((*OP_PVQ_SEARCH_IMPL[(arch) & OPUS_ARCHMASK])(X, iy, K, N, arch))
46
47#endif
48#endif
49
50#endif
diff --git a/lib/rbcodec/codecs/libopus/celt/x86/vq_sse2.c b/lib/rbcodec/codecs/libopus/celt/x86/vq_sse2.c
new file mode 100644
index 0000000000..775042860d
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/x86/vq_sse2.c
@@ -0,0 +1,217 @@
1/* Copyright (c) 2007-2008 CSIRO
2 Copyright (c) 2007-2009 Xiph.Org Foundation
3 Copyright (c) 2007-2016 Jean-Marc Valin */
4/*
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
7 are met:
8
9 - Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
11
12 - Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
20 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27*/
28
29#ifdef HAVE_CONFIG_H
30#include "config.h"
31#endif
32
33#include <xmmintrin.h>
34#include <emmintrin.h>
35#include "celt_lpc.h"
36#include "stack_alloc.h"
37#include "mathops.h"
38#include "vq.h"
39#include "x86cpu.h"
40
41
42#ifndef FIXED_POINT
43
44opus_val16 op_pvq_search_sse2(celt_norm *_X, int *iy, int K, int N, int arch)
45{
46 int i, j;
47 int pulsesLeft;
48 float xy, yy;
49 VARDECL(celt_norm, y);
50 VARDECL(celt_norm, X);
51 VARDECL(float, signy);
52 __m128 signmask;
53 __m128 sums;
54 __m128i fours;
55 SAVE_STACK;
56
57 (void)arch;
58 /* All bits set to zero, except for the sign bit. */
59 signmask = _mm_set_ps1(-0.f);
60 fours = _mm_set_epi32(4, 4, 4, 4);
61 ALLOC(y, N+3, celt_norm);
62 ALLOC(X, N+3, celt_norm);
63 ALLOC(signy, N+3, float);
64
65 OPUS_COPY(X, _X, N);
66 X[N] = X[N+1] = X[N+2] = 0;
67 sums = _mm_setzero_ps();
68 for (j=0;j<N;j+=4)
69 {
70 __m128 x4, s4;
71 x4 = _mm_loadu_ps(&X[j]);
72 s4 = _mm_cmplt_ps(x4, _mm_setzero_ps());
73 /* Get rid of the sign */
74 x4 = _mm_andnot_ps(signmask, x4);
75 sums = _mm_add_ps(sums, x4);
76 /* Clear y and iy in case we don't do the projection. */
77 _mm_storeu_ps(&y[j], _mm_setzero_ps());
78 _mm_storeu_si128((__m128i*)&iy[j], _mm_setzero_si128());
79 _mm_storeu_ps(&X[j], x4);
80 _mm_storeu_ps(&signy[j], s4);
81 }
82 sums = _mm_add_ps(sums, _mm_shuffle_ps(sums, sums, _MM_SHUFFLE(1, 0, 3, 2)));
83 sums = _mm_add_ps(sums, _mm_shuffle_ps(sums, sums, _MM_SHUFFLE(2, 3, 0, 1)));
84
85 xy = yy = 0;
86
87 pulsesLeft = K;
88
89 /* Do a pre-search by projecting on the pyramid */
90 if (K > (N>>1))
91 {
92 __m128i pulses_sum;
93 __m128 yy4, xy4;
94 __m128 rcp4;
95 opus_val32 sum = _mm_cvtss_f32(sums);
96 /* If X is too small, just replace it with a pulse at 0 */
97 /* Prevents infinities and NaNs from causing too many pulses
98 to be allocated. 64 is an approximation of infinity here. */
99 if (!(sum > EPSILON && sum < 64))
100 {
101 X[0] = QCONST16(1.f,14);
102 j=1; do
103 X[j]=0;
104 while (++j<N);
105 sums = _mm_set_ps1(1.f);
106 }
107 /* Using K+e with e < 1 guarantees we cannot get more than K pulses. */
108 rcp4 = _mm_mul_ps(_mm_set_ps1((float)(K+.8)), _mm_rcp_ps(sums));
109 xy4 = yy4 = _mm_setzero_ps();
110 pulses_sum = _mm_setzero_si128();
111 for (j=0;j<N;j+=4)
112 {
113 __m128 rx4, x4, y4;
114 __m128i iy4;
115 x4 = _mm_loadu_ps(&X[j]);
116 rx4 = _mm_mul_ps(x4, rcp4);
117 iy4 = _mm_cvttps_epi32(rx4);
118 pulses_sum = _mm_add_epi32(pulses_sum, iy4);
119 _mm_storeu_si128((__m128i*)&iy[j], iy4);
120 y4 = _mm_cvtepi32_ps(iy4);
121 xy4 = _mm_add_ps(xy4, _mm_mul_ps(x4, y4));
122 yy4 = _mm_add_ps(yy4, _mm_mul_ps(y4, y4));
123 /* double the y[] vector so we don't have to do it in the search loop. */
124 _mm_storeu_ps(&y[j], _mm_add_ps(y4, y4));
125 }
126 pulses_sum = _mm_add_epi32(pulses_sum, _mm_shuffle_epi32(pulses_sum, _MM_SHUFFLE(1, 0, 3, 2)));
127 pulses_sum = _mm_add_epi32(pulses_sum, _mm_shuffle_epi32(pulses_sum, _MM_SHUFFLE(2, 3, 0, 1)));
128 pulsesLeft -= _mm_cvtsi128_si32(pulses_sum);
129 xy4 = _mm_add_ps(xy4, _mm_shuffle_ps(xy4, xy4, _MM_SHUFFLE(1, 0, 3, 2)));
130 xy4 = _mm_add_ps(xy4, _mm_shuffle_ps(xy4, xy4, _MM_SHUFFLE(2, 3, 0, 1)));
131 xy = _mm_cvtss_f32(xy4);
132 yy4 = _mm_add_ps(yy4, _mm_shuffle_ps(yy4, yy4, _MM_SHUFFLE(1, 0, 3, 2)));
133 yy4 = _mm_add_ps(yy4, _mm_shuffle_ps(yy4, yy4, _MM_SHUFFLE(2, 3, 0, 1)));
134 yy = _mm_cvtss_f32(yy4);
135 }
136 X[N] = X[N+1] = X[N+2] = -100;
137 y[N] = y[N+1] = y[N+2] = 100;
138 celt_sig_assert(pulsesLeft>=0);
139
140 /* This should never happen, but just in case it does (e.g. on silence)
141 we fill the first bin with pulses. */
142 if (pulsesLeft > N+3)
143 {
144 opus_val16 tmp = (opus_val16)pulsesLeft;
145 yy = MAC16_16(yy, tmp, tmp);
146 yy = MAC16_16(yy, tmp, y[0]);
147 iy[0] += pulsesLeft;
148 pulsesLeft=0;
149 }
150
151 for (i=0;i<pulsesLeft;i++)
152 {
153 int best_id;
154 __m128 xy4, yy4;
155 __m128 max, max2;
156 __m128i count;
157 __m128i pos;
158 /* The squared magnitude term gets added anyway, so we might as well
159 add it outside the loop */
160 yy = ADD16(yy, 1);
161 xy4 = _mm_load1_ps(&xy);
162 yy4 = _mm_load1_ps(&yy);
163 max = _mm_setzero_ps();
164 pos = _mm_setzero_si128();
165 count = _mm_set_epi32(3, 2, 1, 0);
166 for (j=0;j<N;j+=4)
167 {
168 __m128 x4, y4, r4;
169 x4 = _mm_loadu_ps(&X[j]);
170 y4 = _mm_loadu_ps(&y[j]);
171 x4 = _mm_add_ps(x4, xy4);
172 y4 = _mm_add_ps(y4, yy4);
173 y4 = _mm_rsqrt_ps(y4);
174 r4 = _mm_mul_ps(x4, y4);
175 /* Update the index of the max. */
176 pos = _mm_max_epi16(pos, _mm_and_si128(count, _mm_castps_si128(_mm_cmpgt_ps(r4, max))));
177 /* Update the max. */
178 max = _mm_max_ps(max, r4);
179 /* Update the indices (+4) */
180 count = _mm_add_epi32(count, fours);
181 }
182 /* Horizontal max */
183 max2 = _mm_max_ps(max, _mm_shuffle_ps(max, max, _MM_SHUFFLE(1, 0, 3, 2)));
184 max2 = _mm_max_ps(max2, _mm_shuffle_ps(max2, max2, _MM_SHUFFLE(2, 3, 0, 1)));
185 /* Now that max2 contains the max at all positions, look at which value(s) of the
186 partial max is equal to the global max. */
187 pos = _mm_and_si128(pos, _mm_castps_si128(_mm_cmpeq_ps(max, max2)));
188 pos = _mm_max_epi16(pos, _mm_unpackhi_epi64(pos, pos));
189 pos = _mm_max_epi16(pos, _mm_shufflelo_epi16(pos, _MM_SHUFFLE(1, 0, 3, 2)));
190 best_id = _mm_cvtsi128_si32(pos);
191
192 /* Updating the sums of the new pulse(s) */
193 xy = ADD32(xy, EXTEND32(X[best_id]));
194 /* We're multiplying y[j] by two so we don't have to do it here */
195 yy = ADD16(yy, y[best_id]);
196
197 /* Only now that we've made the final choice, update y/iy */
198 /* Multiplying y[j] by 2 so we don't have to do it everywhere else */
199 y[best_id] += 2;
200 iy[best_id]++;
201 }
202
203 /* Put the original sign back */
204 for (j=0;j<N;j+=4)
205 {
206 __m128i y4;
207 __m128i s4;
208 y4 = _mm_loadu_si128((__m128i*)&iy[j]);
209 s4 = _mm_castps_si128(_mm_loadu_ps(&signy[j]));
210 y4 = _mm_xor_si128(_mm_add_epi32(y4, s4), s4);
211 _mm_storeu_si128((__m128i*)&iy[j], y4);
212 }
213 RESTORE_STACK;
214 return yy;
215}
216
217#endif
diff --git a/lib/rbcodec/codecs/libopus/celt/x86/x86_celt_map.c b/lib/rbcodec/codecs/libopus/celt/x86/x86_celt_map.c
new file mode 100644
index 0000000000..d39d88edec
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/x86/x86_celt_map.c
@@ -0,0 +1,167 @@
1/* Copyright (c) 2014, Cisco Systems, INC
2 Written by XiangMingZhu WeiZhou MinPeng YanWang
3
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7
8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10
11 - Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*/
27
28#if defined(HAVE_CONFIG_H)
29#include "config.h"
30#endif
31
32#include "x86/x86cpu.h"
33#include "celt_lpc.h"
34#include "pitch.h"
35#include "pitch_sse.h"
36#include "vq.h"
37
38#if defined(OPUS_HAVE_RTCD)
39
40# if defined(FIXED_POINT)
41
42#if defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)
43
44void (*const CELT_FIR_IMPL[OPUS_ARCHMASK + 1])(
45 const opus_val16 *x,
46 const opus_val16 *num,
47 opus_val16 *y,
48 int N,
49 int ord,
50 int arch
51) = {
52 celt_fir_c, /* non-sse */
53 celt_fir_c,
54 celt_fir_c,
55 MAY_HAVE_SSE4_1(celt_fir), /* sse4.1 */
56 MAY_HAVE_SSE4_1(celt_fir) /* avx */
57};
58
59void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
60 const opus_val16 *x,
61 const opus_val16 *y,
62 opus_val32 sum[4],
63 int len
64) = {
65 xcorr_kernel_c, /* non-sse */
66 xcorr_kernel_c,
67 xcorr_kernel_c,
68 MAY_HAVE_SSE4_1(xcorr_kernel), /* sse4.1 */
69 MAY_HAVE_SSE4_1(xcorr_kernel) /* avx */
70};
71
72#endif
73
74#if (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
75 (!defined(OPUS_X86_MAY_HAVE_SSE_4_1) && defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2))
76
77opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
78 const opus_val16 *x,
79 const opus_val16 *y,
80 int N
81) = {
82 celt_inner_prod_c, /* non-sse */
83 celt_inner_prod_c,
84 MAY_HAVE_SSE2(celt_inner_prod),
85 MAY_HAVE_SSE4_1(celt_inner_prod), /* sse4.1 */
86 MAY_HAVE_SSE4_1(celt_inner_prod) /* avx */
87};
88
89#endif
90
91# else
92
93#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)
94
95void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
96 const opus_val16 *x,
97 const opus_val16 *y,
98 opus_val32 sum[4],
99 int len
100) = {
101 xcorr_kernel_c, /* non-sse */
102 MAY_HAVE_SSE(xcorr_kernel),
103 MAY_HAVE_SSE(xcorr_kernel),
104 MAY_HAVE_SSE(xcorr_kernel),
105 MAY_HAVE_SSE(xcorr_kernel)
106};
107
108opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
109 const opus_val16 *x,
110 const opus_val16 *y,
111 int N
112) = {
113 celt_inner_prod_c, /* non-sse */
114 MAY_HAVE_SSE(celt_inner_prod),
115 MAY_HAVE_SSE(celt_inner_prod),
116 MAY_HAVE_SSE(celt_inner_prod),
117 MAY_HAVE_SSE(celt_inner_prod)
118};
119
120void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
121 const opus_val16 *x,
122 const opus_val16 *y01,
123 const opus_val16 *y02,
124 int N,
125 opus_val32 *xy1,
126 opus_val32 *xy2
127) = {
128 dual_inner_prod_c, /* non-sse */
129 MAY_HAVE_SSE(dual_inner_prod),
130 MAY_HAVE_SSE(dual_inner_prod),
131 MAY_HAVE_SSE(dual_inner_prod),
132 MAY_HAVE_SSE(dual_inner_prod)
133};
134
135void (*const COMB_FILTER_CONST_IMPL[OPUS_ARCHMASK + 1])(
136 opus_val32 *y,
137 opus_val32 *x,
138 int T,
139 int N,
140 opus_val16 g10,
141 opus_val16 g11,
142 opus_val16 g12
143) = {
144 comb_filter_const_c, /* non-sse */
145 MAY_HAVE_SSE(comb_filter_const),
146 MAY_HAVE_SSE(comb_filter_const),
147 MAY_HAVE_SSE(comb_filter_const),
148 MAY_HAVE_SSE(comb_filter_const)
149};
150
151
152#endif
153
154#if defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)
155opus_val16 (*const OP_PVQ_SEARCH_IMPL[OPUS_ARCHMASK + 1])(
156 celt_norm *_X, int *iy, int K, int N, int arch
157) = {
158 op_pvq_search_c, /* non-sse */
159 op_pvq_search_c,
160 MAY_HAVE_SSE2(op_pvq_search),
161 MAY_HAVE_SSE2(op_pvq_search),
162 MAY_HAVE_SSE2(op_pvq_search)
163};
164#endif
165
166#endif
167#endif
diff --git a/lib/rbcodec/codecs/libopus/celt/x86/x86cpu.c b/lib/rbcodec/codecs/libopus/celt/x86/x86cpu.c
new file mode 100644
index 0000000000..080eb25e41
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/x86/x86cpu.c
@@ -0,0 +1,157 @@
1/* Copyright (c) 2014, Cisco Systems, INC
2 Written by XiangMingZhu WeiZhou MinPeng YanWang
3
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7
8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10
11 - Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*/
27
28#ifdef HAVE_CONFIG_H
29#include "config.h"
30#endif
31
32#include "cpu_support.h"
33#include "macros.h"
34#include "main.h"
35#include "pitch.h"
36#include "x86cpu.h"
37
38#if (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
39 (defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \
40 (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
41 (defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX))
42
43
44#if defined(_MSC_VER)
45
46#include <intrin.h>
47static _inline void cpuid(unsigned int CPUInfo[4], unsigned int InfoType)
48{
49 __cpuid((int*)CPUInfo, InfoType);
50}
51
52#else
53
54#if defined(CPU_INFO_BY_C)
55#include <cpuid.h>
56#endif
57
58static void cpuid(unsigned int CPUInfo[4], unsigned int InfoType)
59{
60#if defined(CPU_INFO_BY_ASM)
61#if defined(__i386__) && defined(__PIC__)
62/* %ebx is PIC register in 32-bit, so mustn't clobber it. */
63 __asm__ __volatile__ (
64 "xchg %%ebx, %1\n"
65 "cpuid\n"
66 "xchg %%ebx, %1\n":
67 "=a" (CPUInfo[0]),
68 "=r" (CPUInfo[1]),
69 "=c" (CPUInfo[2]),
70 "=d" (CPUInfo[3]) :
71 "0" (InfoType)
72 );
73#else
74 __asm__ __volatile__ (
75 "cpuid":
76 "=a" (CPUInfo[0]),
77 "=b" (CPUInfo[1]),
78 "=c" (CPUInfo[2]),
79 "=d" (CPUInfo[3]) :
80 "0" (InfoType)
81 );
82#endif
83#elif defined(CPU_INFO_BY_C)
84 __get_cpuid(InfoType, &(CPUInfo[0]), &(CPUInfo[1]), &(CPUInfo[2]), &(CPUInfo[3]));
85#endif
86}
87
88#endif
89
90typedef struct CPU_Feature{
91 /* SIMD: 128-bit */
92 int HW_SSE;
93 int HW_SSE2;
94 int HW_SSE41;
95 /* SIMD: 256-bit */
96 int HW_AVX;
97} CPU_Feature;
98
99static void opus_cpu_feature_check(CPU_Feature *cpu_feature)
100{
101 unsigned int info[4] = {0};
102 unsigned int nIds = 0;
103
104 cpuid(info, 0);
105 nIds = info[0];
106
107 if (nIds >= 1){
108 cpuid(info, 1);
109 cpu_feature->HW_SSE = (info[3] & (1 << 25)) != 0;
110 cpu_feature->HW_SSE2 = (info[3] & (1 << 26)) != 0;
111 cpu_feature->HW_SSE41 = (info[2] & (1 << 19)) != 0;
112 cpu_feature->HW_AVX = (info[2] & (1 << 28)) != 0;
113 }
114 else {
115 cpu_feature->HW_SSE = 0;
116 cpu_feature->HW_SSE2 = 0;
117 cpu_feature->HW_SSE41 = 0;
118 cpu_feature->HW_AVX = 0;
119 }
120}
121
122int opus_select_arch(void)
123{
124 CPU_Feature cpu_feature;
125 int arch;
126
127 opus_cpu_feature_check(&cpu_feature);
128
129 arch = 0;
130 if (!cpu_feature.HW_SSE)
131 {
132 return arch;
133 }
134 arch++;
135
136 if (!cpu_feature.HW_SSE2)
137 {
138 return arch;
139 }
140 arch++;
141
142 if (!cpu_feature.HW_SSE41)
143 {
144 return arch;
145 }
146 arch++;
147
148 if (!cpu_feature.HW_AVX)
149 {
150 return arch;
151 }
152 arch++;
153
154 return arch;
155}
156
157#endif
diff --git a/lib/rbcodec/codecs/libopus/celt/x86/x86cpu.h b/lib/rbcodec/codecs/libopus/celt/x86/x86cpu.h
new file mode 100644
index 0000000000..1e2bf17b9b
--- /dev/null
+++ b/lib/rbcodec/codecs/libopus/celt/x86/x86cpu.h
@@ -0,0 +1,95 @@
1/* Copyright (c) 2014, Cisco Systems, INC
2 Written by XiangMingZhu WeiZhou MinPeng YanWang
3
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7
8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10
11 - Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*/
27
28#if !defined(X86CPU_H)
29# define X86CPU_H
30
31# if defined(OPUS_X86_MAY_HAVE_SSE)
32# define MAY_HAVE_SSE(name) name ## _sse
33# else
34# define MAY_HAVE_SSE(name) name ## _c
35# endif
36
37# if defined(OPUS_X86_MAY_HAVE_SSE2)
38# define MAY_HAVE_SSE2(name) name ## _sse2
39# else
40# define MAY_HAVE_SSE2(name) name ## _c
41# endif
42
43# if defined(OPUS_X86_MAY_HAVE_SSE4_1)
44# define MAY_HAVE_SSE4_1(name) name ## _sse4_1
45# else
46# define MAY_HAVE_SSE4_1(name) name ## _c
47# endif
48
49# if defined(OPUS_X86_MAY_HAVE_AVX)
50# define MAY_HAVE_AVX(name) name ## _avx
51# else
52# define MAY_HAVE_AVX(name) name ## _c
53# endif
54
55# if defined(OPUS_HAVE_RTCD)
56int opus_select_arch(void);
57# endif
58
59/*gcc appears to emit MOVDQA's to load the argument of an _mm_cvtepi8_epi32()
60 or _mm_cvtepi16_epi32() when optimizations are disabled, even though the
61 actual PMOVSXWD instruction takes an m32 or m64. Unlike a normal memory
62 reference, these require 16-byte alignment and load a full 16 bytes (instead
63 of 4 or 8), possibly reading out of bounds.
64
65 We can insert an explicit MOVD or MOVQ using _mm_cvtsi32_si128() or
66 _mm_loadl_epi64(), which should have the same semantics as an m32 or m64
67 reference in the PMOVSXWD instruction itself, but gcc is not smart enough to
68 optimize this out when optimizations ARE enabled.
69
70 Clang, in contrast, requires us to do this always for _mm_cvtepi8_epi32
71 (which is fair, since technically the compiler is always allowed to do the
72 dereference before invoking the function implementing the intrinsic).
73 However, it is smart enough to eliminate the extra MOVD instruction.
74 For _mm_cvtepi16_epi32, it does the right thing, though does *not* optimize out
75 the extra MOVQ if it's specified explicitly */
76
77# if defined(__clang__) || !defined(__OPTIMIZE__)
78# define OP_CVTEPI8_EPI32_M32(x) \
79 (_mm_cvtepi8_epi32(_mm_cvtsi32_si128(*(int *)(x))))
80# else
81# define OP_CVTEPI8_EPI32_M32(x) \
82 (_mm_cvtepi8_epi32(*(__m128i *)(x)))
83#endif
84
85/* similar reasoning about the instruction sequence as in the 32-bit macro above,
86 */
87# if defined(__clang__) || !defined(__OPTIMIZE__)
88# define OP_CVTEPI16_EPI32_M64(x) \
89 (_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i *)(x))))
90# else
91# define OP_CVTEPI16_EPI32_M64(x) \
92 (_mm_cvtepi16_epi32(*(__m128i *)(x)))
93# endif
94
95#endif