1 files changed, 179 insertions, 109 deletions
diff --git a/apps/codecs/demac/libdemac/predictor.c b/apps/codecs/demac/libdemac/predictor.c
index 9531786fd1..a7210bf014 100644
--- a/apps/codecs/demac/libdemac/predictor.c
+++ b/apps/codecs/demac/libdemac/predictor.c
@@ -37,160 +37,230 @@ static const int32_t initial_coeffs[4] = {
  360, 317, -109, 98
 };
-static void init_predictor(struct predictor_t* p)
+#define YDELAYA (18 + PREDICTOR_ORDER*4)
+#define YDELAYB (18 + PREDICTOR_ORDER*3)
+#define XDELAYA (18 + PREDICTOR_ORDER*2)
+#define XDELAYB (18 + PREDICTOR_ORDER)
+#define YADAPTCOEFFSA (18)
+#define XADAPTCOEFFSA (14)
+#define YADAPTCOEFFSB (10)
+#define XADAPTCOEFFSB (5)
+void init_predictor_decoder(struct predictor_t* p)
 {
    /* Zero the history buffers */
-    memset(p->historybuffer, 0, (PREDICTOR_ORDER*4) * sizeof(int32_t));
+    memset(p->historybuffer, 0, PREDICTOR_SIZE * sizeof(int32_t));
-    p->delayA = p->historybuffer + PREDICTOR_ORDER*4;
+    p->buf = p->historybuffer;
-    p->delayB = p->historybuffer + PREDICTOR_ORDER*3;
-    p->adaptcoeffsA = p->historybuffer + PREDICTOR_ORDER*2;
-    p->adaptcoeffsB = p->historybuffer + PREDICTOR_ORDER;
    /* Initialise and zero the co-efficients */
-    memcpy(p->coeffsA, initial_coeffs, sizeof(initial_coeffs));
+    memcpy(p->YcoeffsA, initial_coeffs, sizeof(initial_coeffs));
-    memset(p->coeffsB, 0, sizeof(p->coeffsB));
+    memcpy(p->XcoeffsA, initial_coeffs, sizeof(initial_coeffs));
+    memset(p->YcoeffsB, 0, sizeof(p->YcoeffsB));
-    p->filterA = 0;
+    memset(p->XcoeffsB, 0, sizeof(p->XcoeffsB));
-    p->filterB = 0;
-    
+    p->YfilterA = 0;
-    p->lastA = 0;
+    p->YfilterB = 0;
+    p->YlastA = 0;
+    p->XfilterA = 0;
+    p->XfilterB = 0;
+    p->XlastA = 0;
 }
-static int do_predictor_decode(struct predictor_t* p, int32_t A, int32_t B)
+#ifdef CPU_COLDFIRE
-{
+/* Putting this in IRAM makes a small speedup (e.g. 186% -> 187%
-    int32_t predictionA, predictionB, currentA;
+   realtime for a -c1000 file on Coldfire, but is slower on PP. */
+int predictor_decode_stereo(struct predictor_t* p, int32_t* decoded0, int32_t* decoded1, int count) ICODE_ATTR;
-    p->delayA[0] = p->lastA;
+#endif
-    p->delayA[-1] = p->delayA[0] - p->delayA[-1];
-    predictionA = scalarproduct4_rev32(p->coeffsA,p->delayA);
-    /*  Apply a scaled first-order filter compression */
-    p->delayB[0] = B - ((p->filterB * 31) >> 5);
-    p->filterB = B;
-    p->delayB[-1] = p->delayB[0] - p->delayB[-1];
-    predictionB = scalarproduct5_rev32(p->coeffsB,p->delayB);
-    currentA = A + ((predictionA + (predictionB >> 1)) >> 10);
+int predictor_decode_stereo(struct predictor_t* p, int32_t* decoded0, int32_t* decoded1, int count)
+{
-    p->adaptcoeffsA[0] = SIGN(p->delayA[0]);
+    int32_t predictionA, predictionB;
-    p->adaptcoeffsA[-1] = SIGN(p->delayA[-1]);
-    p->adaptcoeffsB[0] = SIGN(p->delayB[0]);
-    p->adaptcoeffsB[-1] = SIGN(p->delayB[-1]);
-    if (A > 0) 
+    while (count--)
-    {
-        vector_sub4_rev32(p->coeffsA, p->adaptcoeffsA);
-        vector_sub5_rev32(p->coeffsB, p->adaptcoeffsB);
-    }
-    else if (A < 0) 
    {
-        vector_add4_rev32(p->coeffsA, p->adaptcoeffsA);
+        /* Predictor Y */
-        vector_add5_rev32(p->coeffsB, p->adaptcoeffsB);
+        p->buf[YDELAYA] = p->YlastA;
-    }
+        p->buf[YADAPTCOEFFSA] = SIGN(p->buf[YDELAYA]);
-    p->delayA++;
+        p->buf[YDELAYA-1] = p->buf[YDELAYA] - p->buf[YDELAYA-1];
-    p->delayB++;
+        p->buf[YADAPTCOEFFSA-1] = SIGN(p->buf[YDELAYA-1]);
-    p->adaptcoeffsA++;
-    p->adaptcoeffsB++;
+        predictionA = (p->buf[YDELAYA] * p->YcoeffsA[0]) + 
+                      (p->buf[YDELAYA-1] * p->YcoeffsA[1]) + 
-    /* Have we filled the history buffer? */
+                      (p->buf[YDELAYA-2] * p->YcoeffsA[2]) + 
-    if (p->delayA == p->historybuffer + HISTORY_SIZE + (PREDICTOR_ORDER*4)) {
+                      (p->buf[YDELAYA-3] * p->YcoeffsA[3]);
-        memmove(p->historybuffer, p->delayA - (PREDICTOR_ORDER*4), 
-                (PREDICTOR_ORDER*4) * sizeof(int32_t));
+        /*  Apply a scaled first-order filter compression */
-        p->delayA = p->historybuffer + PREDICTOR_ORDER*4;
+        p->buf[YDELAYB] = p->XfilterA - ((p->YfilterB * 31) >> 5);
-        p->delayB = p->historybuffer + PREDICTOR_ORDER*3;
+        p->buf[YADAPTCOEFFSB] = SIGN(p->buf[YDELAYB]);
-        p->adaptcoeffsA = p->historybuffer + PREDICTOR_ORDER*2;
+        p->YfilterB = p->XfilterA;
-        p->adaptcoeffsB = p->historybuffer + PREDICTOR_ORDER;
-    }
+        p->buf[YDELAYB-1] = p->buf[YDELAYB] - p->buf[YDELAYB-1];
+        p->buf[YADAPTCOEFFSB-1] = SIGN(p->buf[YDELAYB-1]);
+        predictionB = (p->buf[YDELAYB] * p->YcoeffsB[0]) + 
+                      (p->buf[YDELAYB-1] * p->YcoeffsB[1]) + 
+                      (p->buf[YDELAYB-2] * p->YcoeffsB[2]) + 
+                      (p->buf[YDELAYB-3] * p->YcoeffsB[3]) + 
+                      (p->buf[YDELAYB-4] * p->YcoeffsB[4]);
+        p->YlastA = *decoded0 + ((predictionA + (predictionB >> 1)) >> 10);
+        p->YfilterA =  p->YlastA + ((p->YfilterA * 31) >> 5);
+        /* Predictor X */
+        p->buf[XDELAYA] = p->XlastA;
+        p->buf[XADAPTCOEFFSA] = SIGN(p->buf[XDELAYA]);
+        p->buf[XDELAYA-1] = p->buf[XDELAYA] - p->buf[XDELAYA-1];
+        p->buf[XADAPTCOEFFSA-1] = SIGN(p->buf[XDELAYA-1]);
+        predictionA = (p->buf[XDELAYA] * p->XcoeffsA[0]) + 
+                      (p->buf[XDELAYA-1] * p->XcoeffsA[1]) + 
+                      (p->buf[XDELAYA-2] * p->XcoeffsA[2]) + 
+                      (p->buf[XDELAYA-3] * p->XcoeffsA[3]);
+        /*  Apply a scaled first-order filter compression */
+        p->buf[XDELAYB] = p->YfilterA - ((p->XfilterB * 31) >> 5);
+        p->buf[XADAPTCOEFFSB] = SIGN(p->buf[XDELAYB]);
+        p->XfilterB = p->YfilterA;
+        p->buf[XDELAYB-1] = p->buf[XDELAYB] - p->buf[XDELAYB-1];
+        p->buf[XADAPTCOEFFSB-1] = SIGN(p->buf[XDELAYB-1]);
+        predictionB = (p->buf[XDELAYB] * p->XcoeffsB[0]) + 
+                      (p->buf[XDELAYB-1] * p->XcoeffsB[1]) + 
+                      (p->buf[XDELAYB-2] * p->XcoeffsB[2]) + 
+                      (p->buf[XDELAYB-3] * p->XcoeffsB[3]) + 
+                      (p->buf[XDELAYB-4] * p->XcoeffsB[4]);
+        p->XlastA = *decoded1 + ((predictionA + (predictionB >> 1)) >> 10); 
+        p->XfilterA =  p->XlastA + ((p->XfilterA * 31) >> 5);
+        if (*decoded0 > 0) 
+        {
+            p->YcoeffsA[0] -= p->buf[YADAPTCOEFFSA];
+            p->YcoeffsA[1] -= p->buf[YADAPTCOEFFSA-1];
+            p->YcoeffsA[2] -= p->buf[YADAPTCOEFFSA-2];
+            p->YcoeffsA[3] -= p->buf[YADAPTCOEFFSA-3];
+            p->YcoeffsB[0] -= p->buf[YADAPTCOEFFSB];
+            p->YcoeffsB[1] -= p->buf[YADAPTCOEFFSB-1];
+            p->YcoeffsB[2] -= p->buf[YADAPTCOEFFSB-2];
+            p->YcoeffsB[3] -= p->buf[YADAPTCOEFFSB-3];
+            p->YcoeffsB[4] -= p->buf[YADAPTCOEFFSB-4];
+        }
+        else if (*decoded0 < 0) 
+        {
+            p->YcoeffsA[0] += p->buf[YADAPTCOEFFSA];
+            p->YcoeffsA[1] += p->buf[YADAPTCOEFFSA-1];
+            p->YcoeffsA[2] += p->buf[YADAPTCOEFFSA-2];
+            p->YcoeffsA[3] += p->buf[YADAPTCOEFFSA-3];
+            p->YcoeffsB[0] += p->buf[YADAPTCOEFFSB];
+            p->YcoeffsB[1] += p->buf[YADAPTCOEFFSB-1];
+            p->YcoeffsB[2] += p->buf[YADAPTCOEFFSB-2];
+            p->YcoeffsB[3] += p->buf[YADAPTCOEFFSB-3];
+            p->YcoeffsB[4] += p->buf[YADAPTCOEFFSB-4];
+        }
-    p->lastA = currentA;
+        *(decoded0++) = p->YfilterA;
-    p->filterA =  currentA + ((p->filterA * 31) >> 5);
-    return p->filterA;
+        if (*decoded1 > 0) 
-}
+        {
+            p->XcoeffsA[0] -= p->buf[XADAPTCOEFFSA];
-static int32_t X;
+            p->XcoeffsA[1] -= p->buf[XADAPTCOEFFSA-1];
+            p->XcoeffsA[2] -= p->buf[XADAPTCOEFFSA-2];
+            p->XcoeffsA[3] -= p->buf[XADAPTCOEFFSA-3];
+            p->XcoeffsB[0] -= p->buf[XADAPTCOEFFSB];
+            p->XcoeffsB[1] -= p->buf[XADAPTCOEFFSB-1];
+            p->XcoeffsB[2] -= p->buf[XADAPTCOEFFSB-2];
+            p->XcoeffsB[3] -= p->buf[XADAPTCOEFFSB-3];
+            p->XcoeffsB[4] -= p->buf[XADAPTCOEFFSB-4];
+        }
+        else if (*decoded1 < 0) 
+        {
+            p->XcoeffsA[0] += p->buf[XADAPTCOEFFSA];
+            p->XcoeffsA[1] += p->buf[XADAPTCOEFFSA-1];
+            p->XcoeffsA[2] += p->buf[XADAPTCOEFFSA-2];
+            p->XcoeffsA[3] += p->buf[XADAPTCOEFFSA-3];
+            p->XcoeffsB[0] += p->buf[XADAPTCOEFFSB];
+            p->XcoeffsB[1] += p->buf[XADAPTCOEFFSB-1];
+            p->XcoeffsB[2] += p->buf[XADAPTCOEFFSB-2];
+            p->XcoeffsB[3] += p->buf[XADAPTCOEFFSB-3];
+            p->XcoeffsB[4] += p->buf[XADAPTCOEFFSB-4];
+        }
-void init_predictor_decoder(struct ape_ctx_t* ape_ctx)
+        *(decoded1++) = p->XfilterA;
-{
-    X = 0;
-    init_predictor(&ape_ctx->predictorY);
+        /* Combined */
-    init_predictor(&ape_ctx->predictorX);
+        p->buf++;
-}
-int predictor_decode_stereo(struct ape_ctx_t* ape_ctx, int32_t* decoded0, int32_t* decoded1, int count) ICODE_ATTR;
+        /* Have we filled the history buffer? */
-int predictor_decode_stereo(struct ape_ctx_t* ape_ctx, int32_t* decoded0, int32_t* decoded1, int count)
+        if (p->buf == p->historybuffer + HISTORY_SIZE) {
-{
+            memmove(p->historybuffer, p->buf, 
-    while (count--)
+                    PREDICTOR_SIZE * sizeof(int32_t));
-    {
+            p->buf = p->historybuffer;
-        *decoded0 = do_predictor_decode(&ape_ctx->predictorY, *decoded0, X);
+        }
-        X = do_predictor_decode(&ape_ctx->predictorX, *decoded1, *(decoded0)++);
-        *(decoded1++) = X;
    }
    return 0;
 }
-int predictor_decode_mono(struct ape_ctx_t* ape_ctx, int32_t* decoded0, int count)
+int predictor_decode_mono(struct predictor_t* p, int32_t* decoded0, int count)
 {
-    struct predictor_t* p = &ape_ctx->predictorY;
    int32_t predictionA, currentA, A;
-    currentA = p->lastA;
+    currentA = p->YlastA;
    while (count--)
    {
        A = *decoded0;
-        p->delayA[0] = currentA;
+        p->buf[YDELAYA] = currentA;
-        p->delayA[-1] = p->delayA[0] - p->delayA[-1];
+        p->buf[YDELAYA-1] = p->buf[YDELAYA] - p->buf[YDELAYA-1];
-        predictionA = (p->delayA[0] * p->coeffsA[0]) + 
+        predictionA = (p->buf[YDELAYA] * p->YcoeffsA[0]) + 
-                      (p->delayA[-1] * p->coeffsA[1]) + 
+                      (p->buf[YDELAYA-1] * p->YcoeffsA[1]) + 
-                      (p->delayA[-2] * p->coeffsA[2]) + 
+                      (p->buf[YDELAYA-2] * p->YcoeffsA[2]) + 
-                      (p->delayA[-3] * p->coeffsA[3]);
+                      (p->buf[YDELAYA-3] * p->YcoeffsA[3]);
        currentA = A + (predictionA >> 10);
-        p->adaptcoeffsA[0] = SIGN(p->delayA[0]);
+        p->buf[YADAPTCOEFFSA] = SIGN(p->buf[YDELAYA]);
-        p->adaptcoeffsA[-1] = SIGN(p->delayA[-1]);
+        p->buf[YADAPTCOEFFSA-1] = SIGN(p->buf[YDELAYA-1]);
        
        if (A > 0) 
        {
-            p->coeffsA[0] -= p->adaptcoeffsA[0];
+            p->YcoeffsA[0] -= p->buf[YADAPTCOEFFSA];
-            p->coeffsA[1] -= p->adaptcoeffsA[-1];
+            p->YcoeffsA[1] -= p->buf[YADAPTCOEFFSA-1];
-            p->coeffsA[2] -= p->adaptcoeffsA[-2];
+            p->YcoeffsA[2] -= p->buf[YADAPTCOEFFSA-2];
-            p->coeffsA[3] -= p->adaptcoeffsA[-3];
+            p->YcoeffsA[3] -= p->buf[YADAPTCOEFFSA-3];
        }
        else if (A < 0) 
        {
-            p->coeffsA[0] += p->adaptcoeffsA[0];
+            p->YcoeffsA[0] += p->buf[YADAPTCOEFFSA];
-            p->coeffsA[1] += p->adaptcoeffsA[-1];
+            p->YcoeffsA[1] += p->buf[YADAPTCOEFFSA-1];
-            p->coeffsA[2] += p->adaptcoeffsA[-2];
+            p->YcoeffsA[2] += p->buf[YADAPTCOEFFSA-2];
-            p->coeffsA[3] += p->adaptcoeffsA[-3];
+            p->YcoeffsA[3] += p->buf[YADAPTCOEFFSA-3];
        }
-        p->delayA++;
+        p->buf++;
-        p->adaptcoeffsA++;
        /* Have we filled the history buffer? */
-        if (p->delayA == p->historybuffer + HISTORY_SIZE + (PREDICTOR_ORDER*4)) {
+        if (p->buf == p->historybuffer + HISTORY_SIZE) {
-            memmove(p->historybuffer, p->delayA - (PREDICTOR_ORDER*4), 
+            memmove(p->historybuffer, p->buf, 
-                    (PREDICTOR_ORDER*4) * sizeof(int32_t));
+                    PREDICTOR_SIZE * sizeof(int32_t));
-            p->delayA = p->historybuffer + PREDICTOR_ORDER*4;
+            p->buf = p->historybuffer;
-            p->adaptcoeffsA = p->historybuffer + PREDICTOR_ORDER*2;
        }
-        p->filterA =  currentA + ((p->filterA * 31) >> 5);
+        p->YfilterA =  currentA + ((p->YfilterA * 31) >> 5);
-        *(decoded0++) = p->filterA;
+        *(decoded0++) = p->YfilterA;
    }
-    p->lastA = currentA;
+    p->YlastA = currentA;
    return 0;
 }

diff --git a/apps/codecs/demac/libdemac/predictor.c b/apps/codecs/demac/libdemac/predictor.c index 9531786fd1..a7210bf014 100644 --- a/apps/codecs/demac/libdemac/predictor.c +++ b/apps/codecs/demac/libdemac/predictor.c
@@ -37,160 +37,230 @@ static const int32_t initial_coeffs[4] = {
37	360, 317, -109, 98	37	360, 317, -109, 98
38	};	38	};
39		39
40	static void init_predictor(struct predictor_t* p)	40	#define YDELAYA (18 + PREDICTOR_ORDER*4)
		41	#define YDELAYB (18 + PREDICTOR_ORDER*3)
		42	#define XDELAYA (18 + PREDICTOR_ORDER*2)
		43	#define XDELAYB (18 + PREDICTOR_ORDER)
		44
		45	#define YADAPTCOEFFSA (18)
		46	#define XADAPTCOEFFSA (14)
		47	#define YADAPTCOEFFSB (10)
		48	#define XADAPTCOEFFSB (5)
		49
		50	void init_predictor_decoder(struct predictor_t* p)
41	{	51	{
42	/* Zero the history buffers */	52	/* Zero the history buffers */
43	memset(p->historybuffer, 0, (PREDICTOR_ORDER4) sizeof(int32_t));	53	memset(p->historybuffer, 0, PREDICTOR_SIZE * sizeof(int32_t));
44	p->delayA = p->historybuffer + PREDICTOR_ORDER*4;	54	p->buf = p->historybuffer;
45	p->delayB = p->historybuffer + PREDICTOR_ORDER*3;
46	p->adaptcoeffsA = p->historybuffer + PREDICTOR_ORDER*2;
47	p->adaptcoeffsB = p->historybuffer + PREDICTOR_ORDER;
48		55
49	/* Initialise and zero the co-efficients */	56	/* Initialise and zero the co-efficients */
50	memcpy(p->coeffsA, initial_coeffs, sizeof(initial_coeffs));	57	memcpy(p->YcoeffsA, initial_coeffs, sizeof(initial_coeffs));
51	memset(p->coeffsB, 0, sizeof(p->coeffsB));	58	memcpy(p->XcoeffsA, initial_coeffs, sizeof(initial_coeffs));
52		59	memset(p->YcoeffsB, 0, sizeof(p->YcoeffsB));
53	p->filterA = 0;	60	memset(p->XcoeffsB, 0, sizeof(p->XcoeffsB));
54	p->filterB = 0;	61
55		62	p->YfilterA = 0;
56	p->lastA = 0;	63	p->YfilterB = 0;
		64	p->YlastA = 0;
		65
		66	p->XfilterA = 0;
		67	p->XfilterB = 0;
		68	p->XlastA = 0;
57	}	69	}
58		70
59	static int do_predictor_decode(struct predictor_t* p, int32_t A, int32_t B)	71	#ifdef CPU_COLDFIRE
60	{	72	/* Putting this in IRAM makes a small speedup (e.g. 186% -> 187%
61	int32_t predictionA, predictionB, currentA;	73	realtime for a -c1000 file on Coldfire, but is slower on PP. */
62		74	int predictor_decode_stereo(struct predictor_t* p, int32_t* decoded0, int32_t* decoded1, int count) ICODE_ATTR;
63	p->delayA[0] = p->lastA;	75	#endif
64	p->delayA[-1] = p->delayA[0] - p->delayA[-1];
65
66	predictionA = scalarproduct4_rev32(p->coeffsA,p->delayA);
67
68	/* Apply a scaled first-order filter compression */
69	p->delayB[0] = B - ((p->filterB * 31) >> 5);
70	p->filterB = B;
71
72	p->delayB[-1] = p->delayB[0] - p->delayB[-1];
73
74	predictionB = scalarproduct5_rev32(p->coeffsB,p->delayB);
75		76
76	currentA = A + ((predictionA + (predictionB >> 1)) >> 10);	77	int predictor_decode_stereo(struct predictor_t* p, int32_t* decoded0, int32_t* decoded1, int count)
77		78	{
78	p->adaptcoeffsA[0] = SIGN(p->delayA[0]);	79	int32_t predictionA, predictionB;
79	p->adaptcoeffsA[-1] = SIGN(p->delayA[-1]);
80
81	p->adaptcoeffsB[0] = SIGN(p->delayB[0]);
82	p->adaptcoeffsB[-1] = SIGN(p->delayB[-1]);
83		80
84	if (A > 0)	81	while (count--)
85	{
86	vector_sub4_rev32(p->coeffsA, p->adaptcoeffsA);
87	vector_sub5_rev32(p->coeffsB, p->adaptcoeffsB);
88	}
89	else if (A < 0)
90	{	82	{
91	vector_add4_rev32(p->coeffsA, p->adaptcoeffsA);	83	/* Predictor Y */
92	vector_add5_rev32(p->coeffsB, p->adaptcoeffsB);	84	p->buf[YDELAYA] = p->YlastA;
93	}	85	p->buf[YADAPTCOEFFSA] = SIGN(p->buf[YDELAYA]);
94		86
95	p->delayA++;	87	p->buf[YDELAYA-1] = p->buf[YDELAYA] - p->buf[YDELAYA-1];
96	p->delayB++;	88	p->buf[YADAPTCOEFFSA-1] = SIGN(p->buf[YDELAYA-1]);
97	p->adaptcoeffsA++;	89
98	p->adaptcoeffsB++;	90	predictionA = (p->buf[YDELAYA] * p->YcoeffsA[0]) +
99		91	(p->buf[YDELAYA-1] * p->YcoeffsA[1]) +
100	/* Have we filled the history buffer? */	92	(p->buf[YDELAYA-2] * p->YcoeffsA[2]) +
101	if (p->delayA == p->historybuffer + HISTORY_SIZE + (PREDICTOR_ORDER*4)) {	93	(p->buf[YDELAYA-3] * p->YcoeffsA[3]);
102	memmove(p->historybuffer, p->delayA - (PREDICTOR_ORDER*4),	94
103	(PREDICTOR_ORDER4) sizeof(int32_t));	95	/* Apply a scaled first-order filter compression */
104	p->delayA = p->historybuffer + PREDICTOR_ORDER*4;	96	p->buf[YDELAYB] = p->XfilterA - ((p->YfilterB * 31) >> 5);
105	p->delayB = p->historybuffer + PREDICTOR_ORDER*3;	97	p->buf[YADAPTCOEFFSB] = SIGN(p->buf[YDELAYB]);
106	p->adaptcoeffsA = p->historybuffer + PREDICTOR_ORDER*2;	98	p->YfilterB = p->XfilterA;
107	p->adaptcoeffsB = p->historybuffer + PREDICTOR_ORDER;	99
108	}	100	p->buf[YDELAYB-1] = p->buf[YDELAYB] - p->buf[YDELAYB-1];
		101	p->buf[YADAPTCOEFFSB-1] = SIGN(p->buf[YDELAYB-1]);
		102
		103	predictionB = (p->buf[YDELAYB] * p->YcoeffsB[0]) +
		104	(p->buf[YDELAYB-1] * p->YcoeffsB[1]) +
		105	(p->buf[YDELAYB-2] * p->YcoeffsB[2]) +
		106	(p->buf[YDELAYB-3] * p->YcoeffsB[3]) +
		107	(p->buf[YDELAYB-4] * p->YcoeffsB[4]);
		108
		109	p->YlastA = *decoded0 + ((predictionA + (predictionB >> 1)) >> 10);
		110	p->YfilterA = p->YlastA + ((p->YfilterA * 31) >> 5);
		111
		112	/* Predictor X */
		113
		114	p->buf[XDELAYA] = p->XlastA;
		115	p->buf[XADAPTCOEFFSA] = SIGN(p->buf[XDELAYA]);
		116	p->buf[XDELAYA-1] = p->buf[XDELAYA] - p->buf[XDELAYA-1];
		117	p->buf[XADAPTCOEFFSA-1] = SIGN(p->buf[XDELAYA-1]);
		118
		119	predictionA = (p->buf[XDELAYA] * p->XcoeffsA[0]) +
		120	(p->buf[XDELAYA-1] * p->XcoeffsA[1]) +
		121	(p->buf[XDELAYA-2] * p->XcoeffsA[2]) +
		122	(p->buf[XDELAYA-3] * p->XcoeffsA[3]);
		123
		124	/* Apply a scaled first-order filter compression */
		125	p->buf[XDELAYB] = p->YfilterA - ((p->XfilterB * 31) >> 5);
		126	p->buf[XADAPTCOEFFSB] = SIGN(p->buf[XDELAYB]);
		127	p->XfilterB = p->YfilterA;
		128	p->buf[XDELAYB-1] = p->buf[XDELAYB] - p->buf[XDELAYB-1];
		129	p->buf[XADAPTCOEFFSB-1] = SIGN(p->buf[XDELAYB-1]);
		130
		131	predictionB = (p->buf[XDELAYB] * p->XcoeffsB[0]) +
		132	(p->buf[XDELAYB-1] * p->XcoeffsB[1]) +
		133	(p->buf[XDELAYB-2] * p->XcoeffsB[2]) +
		134	(p->buf[XDELAYB-3] * p->XcoeffsB[3]) +
		135	(p->buf[XDELAYB-4] * p->XcoeffsB[4]);
		136
		137	p->XlastA = *decoded1 + ((predictionA + (predictionB >> 1)) >> 10);
		138	p->XfilterA = p->XlastA + ((p->XfilterA * 31) >> 5);
		139
		140	if (*decoded0 > 0)
		141	{
		142	p->YcoeffsA[0] -= p->buf[YADAPTCOEFFSA];
		143	p->YcoeffsA[1] -= p->buf[YADAPTCOEFFSA-1];
		144	p->YcoeffsA[2] -= p->buf[YADAPTCOEFFSA-2];
		145	p->YcoeffsA[3] -= p->buf[YADAPTCOEFFSA-3];
		146
		147	p->YcoeffsB[0] -= p->buf[YADAPTCOEFFSB];
		148	p->YcoeffsB[1] -= p->buf[YADAPTCOEFFSB-1];
		149	p->YcoeffsB[2] -= p->buf[YADAPTCOEFFSB-2];
		150	p->YcoeffsB[3] -= p->buf[YADAPTCOEFFSB-3];
		151	p->YcoeffsB[4] -= p->buf[YADAPTCOEFFSB-4];
		152	}
		153	else if (*decoded0 < 0)
		154	{
		155	p->YcoeffsA[0] += p->buf[YADAPTCOEFFSA];
		156	p->YcoeffsA[1] += p->buf[YADAPTCOEFFSA-1];
		157	p->YcoeffsA[2] += p->buf[YADAPTCOEFFSA-2];
		158	p->YcoeffsA[3] += p->buf[YADAPTCOEFFSA-3];
		159
		160	p->YcoeffsB[0] += p->buf[YADAPTCOEFFSB];
		161	p->YcoeffsB[1] += p->buf[YADAPTCOEFFSB-1];
		162	p->YcoeffsB[2] += p->buf[YADAPTCOEFFSB-2];
		163	p->YcoeffsB[3] += p->buf[YADAPTCOEFFSB-3];
		164	p->YcoeffsB[4] += p->buf[YADAPTCOEFFSB-4];
		165	}
109		166
110	p->lastA = currentA;	167	*(decoded0++) = p->YfilterA;
111	p->filterA = currentA + ((p->filterA * 31) >> 5);
112		168
113	return p->filterA;	169	if (*decoded1 > 0)
114	}	170	{
115		171	p->XcoeffsA[0] -= p->buf[XADAPTCOEFFSA];
116	static int32_t X;	172	p->XcoeffsA[1] -= p->buf[XADAPTCOEFFSA-1];
		173	p->XcoeffsA[2] -= p->buf[XADAPTCOEFFSA-2];
		174	p->XcoeffsA[3] -= p->buf[XADAPTCOEFFSA-3];
		175
		176	p->XcoeffsB[0] -= p->buf[XADAPTCOEFFSB];
		177	p->XcoeffsB[1] -= p->buf[XADAPTCOEFFSB-1];
		178	p->XcoeffsB[2] -= p->buf[XADAPTCOEFFSB-2];
		179	p->XcoeffsB[3] -= p->buf[XADAPTCOEFFSB-3];
		180	p->XcoeffsB[4] -= p->buf[XADAPTCOEFFSB-4];
		181	}
		182	else if (*decoded1 < 0)
		183	{
		184	p->XcoeffsA[0] += p->buf[XADAPTCOEFFSA];
		185	p->XcoeffsA[1] += p->buf[XADAPTCOEFFSA-1];
		186	p->XcoeffsA[2] += p->buf[XADAPTCOEFFSA-2];
		187	p->XcoeffsA[3] += p->buf[XADAPTCOEFFSA-3];
		188
		189	p->XcoeffsB[0] += p->buf[XADAPTCOEFFSB];
		190	p->XcoeffsB[1] += p->buf[XADAPTCOEFFSB-1];
		191	p->XcoeffsB[2] += p->buf[XADAPTCOEFFSB-2];
		192	p->XcoeffsB[3] += p->buf[XADAPTCOEFFSB-3];
		193	p->XcoeffsB[4] += p->buf[XADAPTCOEFFSB-4];
		194	}
117		195
118	void init_predictor_decoder(struct ape_ctx_t* ape_ctx)	196	*(decoded1++) = p->XfilterA;
119	{
120	X = 0;
121		197
122	init_predictor(&ape_ctx->predictorY);	198	/* Combined */
123	init_predictor(&ape_ctx->predictorX);	199	p->buf++;
124	}
125		200
126	int predictor_decode_stereo(struct ape_ctx_t* ape_ctx, int32_t* decoded0, int32_t* decoded1, int count) ICODE_ATTR;	201	/* Have we filled the history buffer? */
127	int predictor_decode_stereo(struct ape_ctx_t* ape_ctx, int32_t* decoded0, int32_t* decoded1, int count)	202	if (p->buf == p->historybuffer + HISTORY_SIZE) {
128	{	203	memmove(p->historybuffer, p->buf,
129	while (count--)	204	PREDICTOR_SIZE * sizeof(int32_t));
130	{	205	p->buf = p->historybuffer;
131	decoded0 = do_predictor_decode(&ape_ctx->predictorY, decoded0, X);	206	}
132	X = do_predictor_decode(&ape_ctx->predictorX, decoded1, (decoded0)++);
133	*(decoded1++) = X;
134	}	207	}
135		208
136	return 0;	209	return 0;
137	}	210	}
138		211
139	int predictor_decode_mono(struct ape_ctx_t* ape_ctx, int32_t* decoded0, int count)	212	int predictor_decode_mono(struct predictor_t* p, int32_t* decoded0, int count)
140	{	213	{
141	struct predictor_t* p = &ape_ctx->predictorY;
142	int32_t predictionA, currentA, A;	214	int32_t predictionA, currentA, A;
143		215
144	currentA = p->lastA;	216	currentA = p->YlastA;
145		217
146	while (count--)	218	while (count--)
147	{	219	{
148	A = *decoded0;	220	A = *decoded0;
149		221
150	p->delayA[0] = currentA;	222	p->buf[YDELAYA] = currentA;
151	p->delayA[-1] = p->delayA[0] - p->delayA[-1];	223	p->buf[YDELAYA-1] = p->buf[YDELAYA] - p->buf[YDELAYA-1];
152		224
153	predictionA = (p->delayA[0] * p->coeffsA[0]) +	225	predictionA = (p->buf[YDELAYA] * p->YcoeffsA[0]) +
154	(p->delayA[-1] * p->coeffsA[1]) +	226	(p->buf[YDELAYA-1] * p->YcoeffsA[1]) +
155	(p->delayA[-2] * p->coeffsA[2]) +	227	(p->buf[YDELAYA-2] * p->YcoeffsA[2]) +
156	(p->delayA[-3] * p->coeffsA[3]);	228	(p->buf[YDELAYA-3] * p->YcoeffsA[3]);
157		229
158	currentA = A + (predictionA >> 10);	230	currentA = A + (predictionA >> 10);
159		231
160	p->adaptcoeffsA[0] = SIGN(p->delayA[0]);	232	p->buf[YADAPTCOEFFSA] = SIGN(p->buf[YDELAYA]);
161	p->adaptcoeffsA[-1] = SIGN(p->delayA[-1]);	233	p->buf[YADAPTCOEFFSA-1] = SIGN(p->buf[YDELAYA-1]);
162		234
163	if (A > 0)	235	if (A > 0)
164	{	236	{
165	p->coeffsA[0] -= p->adaptcoeffsA[0];	237	p->YcoeffsA[0] -= p->buf[YADAPTCOEFFSA];
166	p->coeffsA[1] -= p->adaptcoeffsA[-1];	238	p->YcoeffsA[1] -= p->buf[YADAPTCOEFFSA-1];
167	p->coeffsA[2] -= p->adaptcoeffsA[-2];	239	p->YcoeffsA[2] -= p->buf[YADAPTCOEFFSA-2];
168	p->coeffsA[3] -= p->adaptcoeffsA[-3];	240	p->YcoeffsA[3] -= p->buf[YADAPTCOEFFSA-3];
169	}	241	}
170	else if (A < 0)	242	else if (A < 0)
171	{	243	{
172	p->coeffsA[0] += p->adaptcoeffsA[0];	244	p->YcoeffsA[0] += p->buf[YADAPTCOEFFSA];
173	p->coeffsA[1] += p->adaptcoeffsA[-1];	245	p->YcoeffsA[1] += p->buf[YADAPTCOEFFSA-1];
174	p->coeffsA[2] += p->adaptcoeffsA[-2];	246	p->YcoeffsA[2] += p->buf[YADAPTCOEFFSA-2];
175	p->coeffsA[3] += p->adaptcoeffsA[-3];	247	p->YcoeffsA[3] += p->buf[YADAPTCOEFFSA-3];
176	}	248	}
177		249
178	p->delayA++;	250	p->buf++;
179	p->adaptcoeffsA++;
180		251
181	/* Have we filled the history buffer? */	252	/* Have we filled the history buffer? */
182	if (p->delayA == p->historybuffer + HISTORY_SIZE + (PREDICTOR_ORDER*4)) {	253	if (p->buf == p->historybuffer + HISTORY_SIZE) {
183	memmove(p->historybuffer, p->delayA - (PREDICTOR_ORDER*4),	254	memmove(p->historybuffer, p->buf,
184	(PREDICTOR_ORDER4) sizeof(int32_t));	255	PREDICTOR_SIZE * sizeof(int32_t));
185	p->delayA = p->historybuffer + PREDICTOR_ORDER*4;	256	p->buf = p->historybuffer;
186	p->adaptcoeffsA = p->historybuffer + PREDICTOR_ORDER*2;
187	}	257	}
188		258
189	p->filterA = currentA + ((p->filterA * 31) >> 5);	259	p->YfilterA = currentA + ((p->YfilterA * 31) >> 5);
190	*(decoded0++) = p->filterA;	260	*(decoded0++) = p->YfilterA;
191	}	261	}
192		262
193	p->lastA = currentA;	263	p->YlastA = currentA;
194		264
195	return 0;	265	return 0;
196	}	266	}