1 files changed, 73 insertions, 6 deletions
diff --git a/lib/rbcodec/codecs/libopus/celt/celt.c b/lib/rbcodec/codecs/libopus/celt/celt.c
index 3e0ce6e6a5..c0a1e0dab9 100644
--- a/lib/rbcodec/codecs/libopus/celt/celt.c
+++ b/lib/rbcodec/codecs/libopus/celt/celt.c
@@ -54,6 +54,10 @@
 #define PACKAGE_VERSION "unknown"
 #endif
+#if defined(MIPSr1_ASM)
+#include "mips/celt_mipsr1.h"
+#endif
 int resampling_factor(opus_int32 rate)
 {
@@ -86,6 +90,63 @@ int resampling_factor(opus_int32 rate)
 }
 #ifndef OVERRIDE_COMB_FILTER_CONST
+/* This version should be faster on ARM */
+#ifdef OPUS_ARM_ASM
+static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N,
+      opus_val16 g10, opus_val16 g11, opus_val16 g12)
+{
+   opus_val32 x0, x1, x2, x3, x4;
+   int i;
+   x4 = SHL32(x[-T-2], 1);
+   x3 = SHL32(x[-T-1], 1);
+   x2 = SHL32(x[-T], 1);
+   x1 = SHL32(x[-T+1], 1);
+   for (i=0;i<N-4;i+=5)
+   {
+      opus_val32 t;
+      x0=SHL32(x[i-T+2],1);
+      t = MAC16_32_Q16(x[i], g10, x2);
+      t = MAC16_32_Q16(t, g11, ADD32(x1,x3));
+      t = MAC16_32_Q16(t, g12, ADD32(x0,x4));
+      y[i] = t;
+      x4=SHL32(x[i-T+3],1);
+      t = MAC16_32_Q16(x[i+1], g10, x1);
+      t = MAC16_32_Q16(t, g11, ADD32(x0,x2));
+      t = MAC16_32_Q16(t, g12, ADD32(x4,x3));
+      y[i+1] = t;
+      x3=SHL32(x[i-T+4],1);
+      t = MAC16_32_Q16(x[i+2], g10, x0);
+      t = MAC16_32_Q16(t, g11, ADD32(x4,x1));
+      t = MAC16_32_Q16(t, g12, ADD32(x3,x2));
+      y[i+2] = t;
+      x2=SHL32(x[i-T+5],1);
+      t = MAC16_32_Q16(x[i+3], g10, x4);
+      t = MAC16_32_Q16(t, g11, ADD32(x3,x0));
+      t = MAC16_32_Q16(t, g12, ADD32(x2,x1));
+      y[i+3] = t;
+      x1=SHL32(x[i-T+6],1);
+      t = MAC16_32_Q16(x[i+4], g10, x3);
+      t = MAC16_32_Q16(t, g11, ADD32(x2,x4));
+      t = MAC16_32_Q16(t, g12, ADD32(x1,x0));
+      y[i+4] = t;
+   }
+#ifdef CUSTOM_MODES
+   for (;i<N;i++)
+   {
+      opus_val32 t;
+      x0=SHL32(x[i-T+2],1);
+      t = MAC16_32_Q16(x[i], g10, x2);
+      t = MAC16_32_Q16(t, g11, ADD32(x1,x3));
+      t = MAC16_32_Q16(t, g12, ADD32(x0,x4));
+      y[i] = t;
+      x4=x3;
+      x3=x2;
+      x2=x1;
+      x1=x0;
+   }
+#endif
+}
+#else
 static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N,
      opus_val16 g10, opus_val16 g11, opus_val16 g12)
 {
@@ -110,7 +171,9 @@ static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N,
 }
 #endif
+#endif
+#ifndef OVERRIDE_comb_filter
 void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
      opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
      const opus_val16 *window, int overlap)
@@ -131,16 +194,19 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
         OPUS_MOVE(y, x, N);
      return;
   }
-   g00 = MULT16_16_Q15(g0, gains[tapset0][0]);
+   g00 = MULT16_16_P15(g0, gains[tapset0][0]);
-   g01 = MULT16_16_Q15(g0, gains[tapset0][1]);
+   g01 = MULT16_16_P15(g0, gains[tapset0][1]);
-   g02 = MULT16_16_Q15(g0, gains[tapset0][2]);
+   g02 = MULT16_16_P15(g0, gains[tapset0][2]);
-   g10 = MULT16_16_Q15(g1, gains[tapset1][0]);
+   g10 = MULT16_16_P15(g1, gains[tapset1][0]);
-   g11 = MULT16_16_Q15(g1, gains[tapset1][1]);
+   g11 = MULT16_16_P15(g1, gains[tapset1][1]);
-   g12 = MULT16_16_Q15(g1, gains[tapset1][2]);
+   g12 = MULT16_16_P15(g1, gains[tapset1][2]);
   x1 = x[-T1+1];
   x2 = x[-T1  ];
   x3 = x[-T1-1];
   x4 = x[-T1-2];
+   /* If the filter didn't change, we don't need the overlap */
+   if (g0==g1 && T0==T1 && tapset0==tapset1)
+      overlap=0;
   for (i=0;i<overlap;i++)
   {
      opus_val16 f;
@@ -170,6 +236,7 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
   /* Compute the part with the constant filter. */
   comb_filter_const(y+i, x+i, T1, N-i, g10, g11, g12);
 }
+#endif /* OVERRIDE_comb_filter */
 const signed char tf_select_table[4][8] = {
      {0, -1, 0, -1,    0,-1, 0,-1},

diff --git a/lib/rbcodec/codecs/libopus/celt/celt.c b/lib/rbcodec/codecs/libopus/celt/celt.c index 3e0ce6e6a5..c0a1e0dab9 100644 --- a/lib/rbcodec/codecs/libopus/celt/celt.c +++ b/lib/rbcodec/codecs/libopus/celt/celt.c
@@ -54,6 +54,10 @@
54	#define PACKAGE_VERSION "unknown"	54	#define PACKAGE_VERSION "unknown"
55	#endif	55	#endif
56		56
		57	#if defined(MIPSr1_ASM)
		58	#include "mips/celt_mipsr1.h"
		59	#endif
		60
57		61
58	int resampling_factor(opus_int32 rate)	62	int resampling_factor(opus_int32 rate)
59	{	63	{
@@ -86,6 +90,63 @@ int resampling_factor(opus_int32 rate)
86	}	90	}
87		91
88	#ifndef OVERRIDE_COMB_FILTER_CONST	92	#ifndef OVERRIDE_COMB_FILTER_CONST
		93	/* This version should be faster on ARM */
		94	#ifdef OPUS_ARM_ASM
		95	static void comb_filter_const(opus_val32 y, opus_val32 x, int T, int N,
		96	opus_val16 g10, opus_val16 g11, opus_val16 g12)
		97	{
		98	opus_val32 x0, x1, x2, x3, x4;
		99	int i;
		100	x4 = SHL32(x[-T-2], 1);
		101	x3 = SHL32(x[-T-1], 1);
		102	x2 = SHL32(x[-T], 1);
		103	x1 = SHL32(x[-T+1], 1);
		104	for (i=0;i<N-4;i+=5)
		105	{
		106	opus_val32 t;
		107	x0=SHL32(x[i-T+2],1);
		108	t = MAC16_32_Q16(x[i], g10, x2);
		109	t = MAC16_32_Q16(t, g11, ADD32(x1,x3));
		110	t = MAC16_32_Q16(t, g12, ADD32(x0,x4));
		111	y[i] = t;
		112	x4=SHL32(x[i-T+3],1);
		113	t = MAC16_32_Q16(x[i+1], g10, x1);
		114	t = MAC16_32_Q16(t, g11, ADD32(x0,x2));
		115	t = MAC16_32_Q16(t, g12, ADD32(x4,x3));
		116	y[i+1] = t;
		117	x3=SHL32(x[i-T+4],1);
		118	t = MAC16_32_Q16(x[i+2], g10, x0);
		119	t = MAC16_32_Q16(t, g11, ADD32(x4,x1));
		120	t = MAC16_32_Q16(t, g12, ADD32(x3,x2));
		121	y[i+2] = t;
		122	x2=SHL32(x[i-T+5],1);
		123	t = MAC16_32_Q16(x[i+3], g10, x4);
		124	t = MAC16_32_Q16(t, g11, ADD32(x3,x0));
		125	t = MAC16_32_Q16(t, g12, ADD32(x2,x1));
		126	y[i+3] = t;
		127	x1=SHL32(x[i-T+6],1);
		128	t = MAC16_32_Q16(x[i+4], g10, x3);
		129	t = MAC16_32_Q16(t, g11, ADD32(x2,x4));
		130	t = MAC16_32_Q16(t, g12, ADD32(x1,x0));
		131	y[i+4] = t;
		132	}
		133	#ifdef CUSTOM_MODES
		134	for (;i<N;i++)
		135	{
		136	opus_val32 t;
		137	x0=SHL32(x[i-T+2],1);
		138	t = MAC16_32_Q16(x[i], g10, x2);
		139	t = MAC16_32_Q16(t, g11, ADD32(x1,x3));
		140	t = MAC16_32_Q16(t, g12, ADD32(x0,x4));
		141	y[i] = t;
		142	x4=x3;
		143	x3=x2;
		144	x2=x1;
		145	x1=x0;
		146	}
		147	#endif
		148	}
		149	#else
89	static void comb_filter_const(opus_val32 y, opus_val32 x, int T, int N,	150	static void comb_filter_const(opus_val32 y, opus_val32 x, int T, int N,
90	opus_val16 g10, opus_val16 g11, opus_val16 g12)	151	opus_val16 g10, opus_val16 g11, opus_val16 g12)
91	{	152	{
@@ -110,7 +171,9 @@ static void comb_filter_const(opus_val32 y, opus_val32 x, int T, int N,
110		171
111	}	172	}
112	#endif	173	#endif
		174	#endif
113		175
		176	#ifndef OVERRIDE_comb_filter
114	void comb_filter(opus_val32 y, opus_val32 x, int T0, int T1, int N,	177	void comb_filter(opus_val32 y, opus_val32 x, int T0, int T1, int N,
115	opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,	178	opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
116	const opus_val16 *window, int overlap)	179	const opus_val16 *window, int overlap)
@@ -131,16 +194,19 @@ void comb_filter(opus_val32 y, opus_val32 x, int T0, int T1, int N,
131	OPUS_MOVE(y, x, N);	194	OPUS_MOVE(y, x, N);
132	return;	195	return;
133	}	196	}
134	g00 = MULT16_16_Q15(g0, gains[tapset0][0]);	197	g00 = MULT16_16_P15(g0, gains[tapset0][0]);
135	g01 = MULT16_16_Q15(g0, gains[tapset0][1]);	198	g01 = MULT16_16_P15(g0, gains[tapset0][1]);
136	g02 = MULT16_16_Q15(g0, gains[tapset0][2]);	199	g02 = MULT16_16_P15(g0, gains[tapset0][2]);
137	g10 = MULT16_16_Q15(g1, gains[tapset1][0]);	200	g10 = MULT16_16_P15(g1, gains[tapset1][0]);
138	g11 = MULT16_16_Q15(g1, gains[tapset1][1]);	201	g11 = MULT16_16_P15(g1, gains[tapset1][1]);
139	g12 = MULT16_16_Q15(g1, gains[tapset1][2]);	202	g12 = MULT16_16_P15(g1, gains[tapset1][2]);
140	x1 = x[-T1+1];	203	x1 = x[-T1+1];
141	x2 = x[-T1 ];	204	x2 = x[-T1 ];
142	x3 = x[-T1-1];	205	x3 = x[-T1-1];
143	x4 = x[-T1-2];	206	x4 = x[-T1-2];
		207	/* If the filter didn't change, we don't need the overlap */
		208	if (g0==g1 && T0==T1 && tapset0==tapset1)
		209	overlap=0;
144	for (i=0;i<overlap;i++)	210	for (i=0;i<overlap;i++)
145	{	211	{
146	opus_val16 f;	212	opus_val16 f;
@@ -170,6 +236,7 @@ void comb_filter(opus_val32 y, opus_val32 x, int T0, int T1, int N,
170	/* Compute the part with the constant filter. */	236	/* Compute the part with the constant filter. */
171	comb_filter_const(y+i, x+i, T1, N-i, g10, g11, g12);	237	comb_filter_const(y+i, x+i, T1, N-i, g10, g11, g12);
172	}	238	}
		239	#endif /* OVERRIDE_comb_filter */
173		240
174	const signed char tf_select_table[4][8] = {	241	const signed char tf_select_table[4][8] = {
175	{0, -1, 0, -1, 0,-1, 0,-1},	242	{0, -1, 0, -1, 0,-1, 0,-1},