Sync opus codec to upstream git

Sync opus codec to upstream commit 02fed471a4568852d6618e041c4f2af0d7730ee2 (August 30 2013) This brings in a lot of optimizations but also makes the diff between our codec and the upstream much smaller as most of our optimizations have been upstreamed or supeceded. Speedups across the board for CELT mode files: 64kbps 128kbps H300 9.82MHz 15.48MHz c200 4.86MHz 9.63MHz fuze v1 10.32MHz 15.92MHz For the silk mode test file (16kbps) arm targets get a speedup of about 2MHz while the H300 is 7.8MHz slower, likely because it's now using the pseudostack more rather than the real stack which is in iram. Patches to get around that are upcomming. Change-Id: Ifecf963e461c51ac42e09dac1e91bc4bc3b12fa3
author: Nils Wallménius <nils@rockbox.org> 2013-05-20 22:25:57 +0200
committer: Nils Wallménius <nils@rockbox.org> 2013-08-31 08:30:51 +0200
commit: 580b307fd791c0997a8831bc800bba87797bfb7e (patch)
tree: 807846056f06fd944a750ce41217a877910ebd59 /lib/rbcodec/codecs/libopus/celt/celt_lpc.c
parent: 74761b70acd96cecc0d35450dd56a98ad9ee7d3d (diff)
download: rockbox-580b307fd791c0997a8831bc800bba87797bfb7e.tar.gz
rockbox-580b307fd791c0997a8831bc800bba87797bfb7e.zip
1 files changed, 158 insertions, 38 deletions
diff --git a/lib/rbcodec/codecs/libopus/celt/celt_lpc.c b/lib/rbcodec/codecs/libopus/celt/celt_lpc.c
index 66aed1de09..7ffe90a357 100644
--- a/lib/rbcodec/codecs/libopus/celt/celt_lpc.c
+++ b/lib/rbcodec/codecs/libopus/celt/celt_lpc.c
@@ -26,12 +26,13 @@
 */
 #ifdef HAVE_CONFIG_H
-#include "opus_config.h"
+#include "config.h"
 #endif
 #include "celt_lpc.h"
 #include "stack_alloc.h"
 #include "mathops.h"
+#include "pitch.h"
 void _celt_lpc(
      opus_val16       *_lpc, /* out: [0...p-1] LPC coefficients      */
@@ -87,42 +88,71 @@ int          p
 #endif
 }
-void celt_fir(const opus_val16 *x,
+void celt_fir(const opus_val16 *_x,
         const opus_val16 *num,
-         opus_val16 *y,
+         opus_val16 *_y,
         int N,
         int ord,
         opus_val16 *mem)
 {
   int i,j;
+   VARDECL(opus_val16, rnum);
+   VARDECL(opus_val16, x);
+   SAVE_STACK;
+   ALLOC(rnum, ord, opus_val16);
+   ALLOC(x, N+ord, opus_val16);
+   for(i=0;i<ord;i++)
+      rnum[i] = num[ord-i-1];
+   for(i=0;i<ord;i++)
+      x[i] = mem[ord-i-1];
+   for (i=0;i<N;i++)
+      x[i+ord]=_x[i];
+   for(i=0;i<ord;i++)
+      mem[i] = _x[N-i-1];
+#ifdef SMALL_FOOTPRINT
   for (i=0;i<N;i++)
   {
-      opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
+      opus_val32 sum = SHL32(EXTEND32(_x[i]), SIG_SHIFT);
      for (j=0;j<ord;j++)
      {
-         sum += MULT16_16(num[j],mem[j]);
+         sum = MAC16_16(sum,rnum[j],x[i+j]);
-      }
-      for (j=ord-1;j>=1;j--)
-      {
-         mem[j]=mem[j-1];
      }
-      mem[0] = x[i];
+      _y[i] = SATURATE16(PSHR32(sum, SIG_SHIFT));
-      y[i] = ROUND16(sum, SIG_SHIFT);
   }
+#else
+   for (i=0;i<N-3;i+=4)
+   {
+      opus_val32 sum[4]={0,0,0,0};
+      xcorr_kernel(rnum, x+i, sum, ord);
+      _y[i  ] = SATURATE16(ADD32(EXTEND32(_x[i  ]), PSHR32(sum[0], SIG_SHIFT)));
+      _y[i+1] = SATURATE16(ADD32(EXTEND32(_x[i+1]), PSHR32(sum[1], SIG_SHIFT)));
+      _y[i+2] = SATURATE16(ADD32(EXTEND32(_x[i+2]), PSHR32(sum[2], SIG_SHIFT)));
+      _y[i+3] = SATURATE16(ADD32(EXTEND32(_x[i+3]), PSHR32(sum[3], SIG_SHIFT)));
+   }
+   for (;i<N;i++)
+   {
+      opus_val32 sum = 0;
+      for (j=0;j<ord;j++)
+         sum = MAC16_16(sum,rnum[j],x[i+j]);
+      _y[i] = SATURATE16(ADD32(EXTEND32(_x[i]), PSHR32(sum, SIG_SHIFT)));
+   }
+#endif
+   RESTORE_STACK;
 }
-void celt_iir(const opus_val32 *x,
+void celt_iir(const opus_val32 *_x,
         const opus_val16 *den,
-         opus_val32 *y,
+         opus_val32 *_y,
         int N,
         int ord,
         opus_val16 *mem)
 {
+#ifdef SMALL_FOOTPRINT
   int i,j;
   for (i=0;i<N;i++)
   {
-      opus_val32 sum = x[i];
+      opus_val32 sum = _x[i];
      for (j=0;j<ord;j++)
      {
         sum -= MULT16_16(den[j],mem[j]);
@@ -132,11 +162,65 @@ void celt_iir(const opus_val32 *x,
         mem[j]=mem[j-1];
      }
      mem[0] = ROUND16(sum,SIG_SHIFT);
-      y[i] = sum;
+      _y[i] = sum;
   }
+#else
+   int i,j;
+   VARDECL(opus_val16, rden);
+   VARDECL(opus_val16, y);
+   SAVE_STACK;
+   celt_assert((ord&3)==0);
+   ALLOC(rden, ord, opus_val16);
+   ALLOC(y, N+ord, opus_val16);
+   for(i=0;i<ord;i++)
+      rden[i] = den[ord-i-1];
+   for(i=0;i<ord;i++)
+      y[i] = -mem[ord-i-1];
+   for(;i<N+ord;i++)
+      y[i]=0;
+   for (i=0;i<N-3;i+=4)
+   {
+      /* Unroll by 4 as if it were an FIR filter */
+      opus_val32 sum[4];
+      sum[0]=_x[i];
+      sum[1]=_x[i+1];
+      sum[2]=_x[i+2];
+      sum[3]=_x[i+3];
+      xcorr_kernel(rden, y+i, sum, ord);
+      /* Patch up the result to compensate for the fact that this is an IIR */
+      y[i+ord  ] = -ROUND16(sum[0],SIG_SHIFT);
+      _y[i  ] = sum[0];
+      sum[1] = MAC16_16(sum[1], y[i+ord  ], den[0]);
+      y[i+ord+1] = -ROUND16(sum[1],SIG_SHIFT);
+      _y[i+1] = sum[1];
+      sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]);
+      sum[2] = MAC16_16(sum[2], y[i+ord  ], den[1]);
+      y[i+ord+2] = -ROUND16(sum[2],SIG_SHIFT);
+      _y[i+2] = sum[2];
+      sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]);
+      sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]);
+      sum[3] = MAC16_16(sum[3], y[i+ord  ], den[2]);
+      y[i+ord+3] = -ROUND16(sum[3],SIG_SHIFT);
+      _y[i+3] = sum[3];
+   }
+   for (;i<N;i++)
+   {
+      opus_val32 sum = _x[i];
+      for (j=0;j<ord;j++)
+         sum -= MULT16_16(rden[j],y[i+j]);
+      y[i+ord] = ROUND16(sum,SIG_SHIFT);
+      _y[i] = sum;
+   }
+   for(i=0;i<ord;i++)
+      mem[i] = _y[N-i-1];
+   RESTORE_STACK;
+#endif
 }
-void _celt_autocorr(
+int _celt_autocorr(
                   const opus_val16 *x,   /*  in: [0...n-1] samples x   */
                   opus_val32       *ac,  /* out: [0...lag-1] ac values */
                   const opus_val16       *window,
@@ -146,43 +230,79 @@ void _celt_autocorr(
                  )
 {
   opus_val32 d;
-   int i;
+   int i, k;
+   int fastN=n-lag;
+   int shift;
+   const opus_val16 *xptr;
   VARDECL(opus_val16, xx);
   SAVE_STACK;
   ALLOC(xx, n, opus_val16);
   celt_assert(n>0);
   celt_assert(overlap>=0);
-   for (i=0;i<n;i++)
+   if (overlap == 0)
-      xx[i] = x[i];
-   for (i=0;i<overlap;i++)
   {
-      xx[i] = MULT16_16_Q15(x[i],window[i]);
+      xptr = x;
-      xx[n-i-1] = MULT16_16_Q15(x[n-i-1],window[i]);
+   } else {
+      for (i=0;i<n;i++)
+         xx[i] = x[i];
+      for (i=0;i<overlap;i++)
+      {
+         xx[i] = MULT16_16_Q15(x[i],window[i]);
+         xx[n-i-1] = MULT16_16_Q15(x[n-i-1],window[i]);
+      }
+      xptr = xx;
   }
+   shift=0;
 #ifdef FIXED_POINT
   {
-      opus_val32 ac0=0;
+      opus_val32 ac0;
-      int shift;
+      ac0 = 1+(n<<7);
-      for(i=0;i<n;i++)
+      if (n&1) ac0 += SHR32(MULT16_16(xptr[0],xptr[0]),9);
-         ac0 += SHR32(MULT16_16(xx[i],xx[i]),9);
+      for(i=(n&1);i<n;i+=2)
-      ac0 += 1+n;
+      {
+         ac0 += SHR32(MULT16_16(xptr[i],xptr[i]),9);
+         ac0 += SHR32(MULT16_16(xptr[i+1],xptr[i+1]),9);
+      }
      shift = celt_ilog2(ac0)-30+10;
-      shift = (shift+1)/2;
+      shift = (shift)/2;
-      for(i=0;i<n;i++)
+      if (shift>0)
-         xx[i] = VSHR32(xx[i], shift);
+      {
+         for(i=0;i<n;i++)
+            xx[i] = PSHR32(xptr[i], shift);
+         xptr = xx;
+      } else
+         shift = 0;
   }
 #endif
-   while (lag>=0)
+   celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1);
+   for (k=0;k<=lag;k++)
   {
-      for (i = lag, d = 0; i < n; i++)
+      for (i = k+fastN, d = 0; i < n; i++)
-         d += xx[i] * xx[i-lag];
+         d = MAC16_16(d, xptr[i], xptr[i-k]);
-      ac[lag] = d;
+      ac[k] += d;
-      /*printf ("%f ", ac[lag]);*/
-      lag--;
   }
-   /*printf ("\n");*/
+#ifdef FIXED_POINT
-   ac[0] += 10;
+   shift = 2*shift;
+   if (shift<=0)
+      ac[0] += SHL32((opus_int32)1, -shift);
+   if (ac[0] < 268435456)
+   {
+      int shift2 = 29 - EC_ILOG(ac[0]);
+      for (i=0;i<=lag;i++)
+         ac[i] = SHL32(ac[i], shift2);
+      shift -= shift2;
+   } else if (ac[0] >= 536870912)
+   {
+      int shift2=1;
+      if (ac[0] >= 1073741824)
+         shift2++;
+      for (i=0;i<=lag;i++)
+         ac[i] = SHR32(ac[i], shift2);
+      shift += shift2;
+   }
+#endif
   RESTORE_STACK;
+   return shift;
 }
author	Nils Wallménius <nils@rockbox.org>	2013-05-20 22:25:57 +0200
committer	Nils Wallménius <nils@rockbox.org>	2013-08-31 08:30:51 +0200
commit	580b307fd791c0997a8831bc800bba87797bfb7e (patch)
tree	807846056f06fd944a750ce41217a877910ebd59 /lib/rbcodec/codecs/libopus/celt/celt_lpc.c
parent	74761b70acd96cecc0d35450dd56a98ad9ee7d3d (diff)
download	rockbox-580b307fd791c0997a8831bc800bba87797bfb7e.tar.gz rockbox-580b307fd791c0997a8831bc800bba87797bfb7e.zip

diff --git a/lib/rbcodec/codecs/libopus/celt/celt_lpc.c b/lib/rbcodec/codecs/libopus/celt/celt_lpc.c index 66aed1de09..7ffe90a357 100644 --- a/lib/rbcodec/codecs/libopus/celt/celt_lpc.c +++ b/lib/rbcodec/codecs/libopus/celt/celt_lpc.c
@@ -26,12 +26,13 @@
26	*/	26	*/
27		27
28	#ifdef HAVE_CONFIG_H	28	#ifdef HAVE_CONFIG_H
29	#include "opus_config.h"	29	#include "config.h"
30	#endif	30	#endif
31		31
32	#include "celt_lpc.h"	32	#include "celt_lpc.h"
33	#include "stack_alloc.h"	33	#include "stack_alloc.h"
34	#include "mathops.h"	34	#include "mathops.h"
		35	#include "pitch.h"
35		36
36	void _celt_lpc(	37	void _celt_lpc(
37	opus_val16 _lpc, / out: [0...p-1] LPC coefficients */	38	opus_val16 _lpc, / out: [0...p-1] LPC coefficients */
@@ -87,42 +88,71 @@ int p
87	#endif	88	#endif
88	}	89	}
89		90
90	void celt_fir(const opus_val16 *x,	91	void celt_fir(const opus_val16 *_x,
91	const opus_val16 *num,	92	const opus_val16 *num,
92	opus_val16 *y,	93	opus_val16 *_y,
93	int N,	94	int N,
94	int ord,	95	int ord,
95	opus_val16 *mem)	96	opus_val16 *mem)
96	{	97	{
97	int i,j;	98	int i,j;
		99	VARDECL(opus_val16, rnum);
		100	VARDECL(opus_val16, x);
		101	SAVE_STACK;
98		102
		103	ALLOC(rnum, ord, opus_val16);
		104	ALLOC(x, N+ord, opus_val16);
		105	for(i=0;i<ord;i++)
		106	rnum[i] = num[ord-i-1];
		107	for(i=0;i<ord;i++)
		108	x[i] = mem[ord-i-1];
		109	for (i=0;i<N;i++)
		110	x[i+ord]=_x[i];
		111	for(i=0;i<ord;i++)
		112	mem[i] = _x[N-i-1];
		113	#ifdef SMALL_FOOTPRINT
99	for (i=0;i<N;i++)	114	for (i=0;i<N;i++)
100	{	115	{
101	opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);	116	opus_val32 sum = SHL32(EXTEND32(_x[i]), SIG_SHIFT);
102	for (j=0;j<ord;j++)	117	for (j=0;j<ord;j++)
103	{	118	{
104	sum += MULT16_16(num[j],mem[j]);	119	sum = MAC16_16(sum,rnum[j],x[i+j]);
105	}
106	for (j=ord-1;j>=1;j--)
107	{
108	mem[j]=mem[j-1];
109	}	120	}
110	mem[0] = x[i];	121	_y[i] = SATURATE16(PSHR32(sum, SIG_SHIFT));
111	y[i] = ROUND16(sum, SIG_SHIFT);
112	}	122	}
		123	#else
		124	for (i=0;i<N-3;i+=4)
		125	{
		126	opus_val32 sum[4]={0,0,0,0};
		127	xcorr_kernel(rnum, x+i, sum, ord);
		128	_y[i ] = SATURATE16(ADD32(EXTEND32(_x[i ]), PSHR32(sum[0], SIG_SHIFT)));
		129	_y[i+1] = SATURATE16(ADD32(EXTEND32(_x[i+1]), PSHR32(sum[1], SIG_SHIFT)));
		130	_y[i+2] = SATURATE16(ADD32(EXTEND32(_x[i+2]), PSHR32(sum[2], SIG_SHIFT)));
		131	_y[i+3] = SATURATE16(ADD32(EXTEND32(_x[i+3]), PSHR32(sum[3], SIG_SHIFT)));
		132	}
		133	for (;i<N;i++)
		134	{
		135	opus_val32 sum = 0;
		136	for (j=0;j<ord;j++)
		137	sum = MAC16_16(sum,rnum[j],x[i+j]);
		138	_y[i] = SATURATE16(ADD32(EXTEND32(_x[i]), PSHR32(sum, SIG_SHIFT)));
		139	}
		140	#endif
		141	RESTORE_STACK;
113	}	142	}
114		143
115	void celt_iir(const opus_val32 *x,	144	void celt_iir(const opus_val32 *_x,
116	const opus_val16 *den,	145	const opus_val16 *den,
117	opus_val32 *y,	146	opus_val32 *_y,
118	int N,	147	int N,
119	int ord,	148	int ord,
120	opus_val16 *mem)	149	opus_val16 *mem)
121	{	150	{
		151	#ifdef SMALL_FOOTPRINT
122	int i,j;	152	int i,j;
123	for (i=0;i<N;i++)	153	for (i=0;i<N;i++)
124	{	154	{
125	opus_val32 sum = x[i];	155	opus_val32 sum = _x[i];
126	for (j=0;j<ord;j++)	156	for (j=0;j<ord;j++)
127	{	157	{
128	sum -= MULT16_16(den[j],mem[j]);	158	sum -= MULT16_16(den[j],mem[j]);
@@ -132,11 +162,65 @@ void celt_iir(const opus_val32 *x,
132	mem[j]=mem[j-1];	162	mem[j]=mem[j-1];
133	}	163	}
134	mem[0] = ROUND16(sum,SIG_SHIFT);	164	mem[0] = ROUND16(sum,SIG_SHIFT);
135	y[i] = sum;	165	_y[i] = sum;
136	}	166	}
		167	#else
		168	int i,j;
		169	VARDECL(opus_val16, rden);
		170	VARDECL(opus_val16, y);
		171	SAVE_STACK;
		172
		173	celt_assert((ord&3)==0);
		174	ALLOC(rden, ord, opus_val16);
		175	ALLOC(y, N+ord, opus_val16);
		176	for(i=0;i<ord;i++)
		177	rden[i] = den[ord-i-1];
		178	for(i=0;i<ord;i++)
		179	y[i] = -mem[ord-i-1];
		180	for(;i<N+ord;i++)
		181	y[i]=0;
		182	for (i=0;i<N-3;i+=4)
		183	{
		184	/* Unroll by 4 as if it were an FIR filter */
		185	opus_val32 sum[4];
		186	sum[0]=_x[i];
		187	sum[1]=_x[i+1];
		188	sum[2]=_x[i+2];
		189	sum[3]=_x[i+3];
		190	xcorr_kernel(rden, y+i, sum, ord);
		191
		192	/* Patch up the result to compensate for the fact that this is an IIR */
		193	y[i+ord ] = -ROUND16(sum[0],SIG_SHIFT);
		194	_y[i ] = sum[0];
		195	sum[1] = MAC16_16(sum[1], y[i+ord ], den[0]);
		196	y[i+ord+1] = -ROUND16(sum[1],SIG_SHIFT);
		197	_y[i+1] = sum[1];
		198	sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]);
		199	sum[2] = MAC16_16(sum[2], y[i+ord ], den[1]);
		200	y[i+ord+2] = -ROUND16(sum[2],SIG_SHIFT);
		201	_y[i+2] = sum[2];
		202
		203	sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]);
		204	sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]);
		205	sum[3] = MAC16_16(sum[3], y[i+ord ], den[2]);
		206	y[i+ord+3] = -ROUND16(sum[3],SIG_SHIFT);
		207	_y[i+3] = sum[3];
		208	}
		209	for (;i<N;i++)
		210	{
		211	opus_val32 sum = _x[i];
		212	for (j=0;j<ord;j++)
		213	sum -= MULT16_16(rden[j],y[i+j]);
		214	y[i+ord] = ROUND16(sum,SIG_SHIFT);
		215	_y[i] = sum;
		216	}
		217	for(i=0;i<ord;i++)
		218	mem[i] = _y[N-i-1];
		219	RESTORE_STACK;
		220	#endif
137	}	221	}
138		222
139	void _celt_autocorr(	223	int _celt_autocorr(
140	const opus_val16 x, / in: [0...n-1] samples x */	224	const opus_val16 x, / in: [0...n-1] samples x */
141	opus_val32 ac, / out: [0...lag-1] ac values */	225	opus_val32 ac, / out: [0...lag-1] ac values */
142	const opus_val16 *window,	226	const opus_val16 *window,
@@ -146,43 +230,79 @@ void _celt_autocorr(
146	)	230	)
147	{	231	{
148	opus_val32 d;	232	opus_val32 d;
149	int i;	233	int i, k;
		234	int fastN=n-lag;
		235	int shift;
		236	const opus_val16 *xptr;
150	VARDECL(opus_val16, xx);	237	VARDECL(opus_val16, xx);
151	SAVE_STACK;	238	SAVE_STACK;
152	ALLOC(xx, n, opus_val16);	239	ALLOC(xx, n, opus_val16);
153	celt_assert(n>0);	240	celt_assert(n>0);
154	celt_assert(overlap>=0);	241	celt_assert(overlap>=0);
155	for (i=0;i<n;i++)	242	if (overlap == 0)
156	xx[i] = x[i];
157	for (i=0;i<overlap;i++)
158	{	243	{
159	xx[i] = MULT16_16_Q15(x[i],window[i]);	244	xptr = x;
160	xx[n-i-1] = MULT16_16_Q15(x[n-i-1],window[i]);	245	} else {
		246	for (i=0;i<n;i++)
		247	xx[i] = x[i];
		248	for (i=0;i<overlap;i++)
		249	{
		250	xx[i] = MULT16_16_Q15(x[i],window[i]);
		251	xx[n-i-1] = MULT16_16_Q15(x[n-i-1],window[i]);
		252	}
		253	xptr = xx;
161	}	254	}
		255	shift=0;
162	#ifdef FIXED_POINT	256	#ifdef FIXED_POINT
163	{	257	{
164	opus_val32 ac0=0;	258	opus_val32 ac0;
165	int shift;	259	ac0 = 1+(n<<7);
166	for(i=0;i<n;i++)	260	if (n&1) ac0 += SHR32(MULT16_16(xptr[0],xptr[0]),9);
167	ac0 += SHR32(MULT16_16(xx[i],xx[i]),9);	261	for(i=(n&1);i<n;i+=2)
168	ac0 += 1+n;	262	{
		263	ac0 += SHR32(MULT16_16(xptr[i],xptr[i]),9);
		264	ac0 += SHR32(MULT16_16(xptr[i+1],xptr[i+1]),9);
		265	}
169		266
170	shift = celt_ilog2(ac0)-30+10;	267	shift = celt_ilog2(ac0)-30+10;
171	shift = (shift+1)/2;	268	shift = (shift)/2;
172	for(i=0;i<n;i++)	269	if (shift>0)
173	xx[i] = VSHR32(xx[i], shift);	270	{
		271	for(i=0;i<n;i++)
		272	xx[i] = PSHR32(xptr[i], shift);
		273	xptr = xx;
		274	} else
		275	shift = 0;
174	}	276	}
175	#endif	277	#endif
176	while (lag>=0)	278	celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1);
		279	for (k=0;k<=lag;k++)
177	{	280	{
178	for (i = lag, d = 0; i < n; i++)	281	for (i = k+fastN, d = 0; i < n; i++)
179	d += xx[i] * xx[i-lag];	282	d = MAC16_16(d, xptr[i], xptr[i-k]);
180	ac[lag] = d;	283	ac[k] += d;
181	/printf ("%f ", ac[lag]);/
182	lag--;
183	}	284	}
184	/printf ("\n");/	285	#ifdef FIXED_POINT
185	ac[0] += 10;	286	shift = 2*shift;
		287	if (shift<=0)
		288	ac[0] += SHL32((opus_int32)1, -shift);
		289	if (ac[0] < 268435456)
		290	{
		291	int shift2 = 29 - EC_ILOG(ac[0]);
		292	for (i=0;i<=lag;i++)
		293	ac[i] = SHL32(ac[i], shift2);
		294	shift -= shift2;
		295	} else if (ac[0] >= 536870912)
		296	{
		297	int shift2=1;
		298	if (ac[0] >= 1073741824)
		299	shift2++;
		300	for (i=0;i<=lag;i++)
		301	ac[i] = SHR32(ac[i], shift2);
		302	shift += shift2;
		303	}
		304	#endif
186		305
187	RESTORE_STACK;	306	RESTORE_STACK;
		307	return shift;
188	}	308	}