diff options
Diffstat (limited to 'apps/codecs/demac/libdemac/predictor.c')
-rw-r--r-- | apps/codecs/demac/libdemac/predictor.c | 288 |
1 files changed, 179 insertions, 109 deletions
diff --git a/apps/codecs/demac/libdemac/predictor.c b/apps/codecs/demac/libdemac/predictor.c index 9531786fd1..a7210bf014 100644 --- a/apps/codecs/demac/libdemac/predictor.c +++ b/apps/codecs/demac/libdemac/predictor.c | |||
@@ -37,160 +37,230 @@ static const int32_t initial_coeffs[4] = { | |||
37 | 360, 317, -109, 98 | 37 | 360, 317, -109, 98 |
38 | }; | 38 | }; |
39 | 39 | ||
40 | static void init_predictor(struct predictor_t* p) | 40 | #define YDELAYA (18 + PREDICTOR_ORDER*4) |
41 | #define YDELAYB (18 + PREDICTOR_ORDER*3) | ||
42 | #define XDELAYA (18 + PREDICTOR_ORDER*2) | ||
43 | #define XDELAYB (18 + PREDICTOR_ORDER) | ||
44 | |||
45 | #define YADAPTCOEFFSA (18) | ||
46 | #define XADAPTCOEFFSA (14) | ||
47 | #define YADAPTCOEFFSB (10) | ||
48 | #define XADAPTCOEFFSB (5) | ||
49 | |||
50 | void init_predictor_decoder(struct predictor_t* p) | ||
41 | { | 51 | { |
42 | /* Zero the history buffers */ | 52 | /* Zero the history buffers */ |
43 | memset(p->historybuffer, 0, (PREDICTOR_ORDER*4) * sizeof(int32_t)); | 53 | memset(p->historybuffer, 0, PREDICTOR_SIZE * sizeof(int32_t)); |
44 | p->delayA = p->historybuffer + PREDICTOR_ORDER*4; | 54 | p->buf = p->historybuffer; |
45 | p->delayB = p->historybuffer + PREDICTOR_ORDER*3; | ||
46 | p->adaptcoeffsA = p->historybuffer + PREDICTOR_ORDER*2; | ||
47 | p->adaptcoeffsB = p->historybuffer + PREDICTOR_ORDER; | ||
48 | 55 | ||
49 | /* Initialise and zero the co-efficients */ | 56 | /* Initialise and zero the co-efficients */ |
50 | memcpy(p->coeffsA, initial_coeffs, sizeof(initial_coeffs)); | 57 | memcpy(p->YcoeffsA, initial_coeffs, sizeof(initial_coeffs)); |
51 | memset(p->coeffsB, 0, sizeof(p->coeffsB)); | 58 | memcpy(p->XcoeffsA, initial_coeffs, sizeof(initial_coeffs)); |
52 | 59 | memset(p->YcoeffsB, 0, sizeof(p->YcoeffsB)); | |
53 | p->filterA = 0; | 60 | memset(p->XcoeffsB, 0, sizeof(p->XcoeffsB)); |
54 | p->filterB = 0; | 61 | |
55 | 62 | p->YfilterA = 0; | |
56 | p->lastA = 0; | 63 | p->YfilterB = 0; |
64 | p->YlastA = 0; | ||
65 | |||
66 | p->XfilterA = 0; | ||
67 | p->XfilterB = 0; | ||
68 | p->XlastA = 0; | ||
57 | } | 69 | } |
58 | 70 | ||
59 | static int do_predictor_decode(struct predictor_t* p, int32_t A, int32_t B) | 71 | #ifdef CPU_COLDFIRE |
60 | { | 72 | /* Putting this in IRAM makes a small speedup (e.g. 186% -> 187% |
61 | int32_t predictionA, predictionB, currentA; | 73 | realtime for a -c1000 file on Coldfire, but is slower on PP. */ |
62 | 74 | int predictor_decode_stereo(struct predictor_t* p, int32_t* decoded0, int32_t* decoded1, int count) ICODE_ATTR; | |
63 | p->delayA[0] = p->lastA; | 75 | #endif |
64 | p->delayA[-1] = p->delayA[0] - p->delayA[-1]; | ||
65 | |||
66 | predictionA = scalarproduct4_rev32(p->coeffsA,p->delayA); | ||
67 | |||
68 | /* Apply a scaled first-order filter compression */ | ||
69 | p->delayB[0] = B - ((p->filterB * 31) >> 5); | ||
70 | p->filterB = B; | ||
71 | |||
72 | p->delayB[-1] = p->delayB[0] - p->delayB[-1]; | ||
73 | |||
74 | predictionB = scalarproduct5_rev32(p->coeffsB,p->delayB); | ||
75 | 76 | ||
76 | currentA = A + ((predictionA + (predictionB >> 1)) >> 10); | 77 | int predictor_decode_stereo(struct predictor_t* p, int32_t* decoded0, int32_t* decoded1, int count) |
77 | 78 | { | |
78 | p->adaptcoeffsA[0] = SIGN(p->delayA[0]); | 79 | int32_t predictionA, predictionB; |
79 | p->adaptcoeffsA[-1] = SIGN(p->delayA[-1]); | ||
80 | |||
81 | p->adaptcoeffsB[0] = SIGN(p->delayB[0]); | ||
82 | p->adaptcoeffsB[-1] = SIGN(p->delayB[-1]); | ||
83 | 80 | ||
84 | if (A > 0) | 81 | while (count--) |
85 | { | ||
86 | vector_sub4_rev32(p->coeffsA, p->adaptcoeffsA); | ||
87 | vector_sub5_rev32(p->coeffsB, p->adaptcoeffsB); | ||
88 | } | ||
89 | else if (A < 0) | ||
90 | { | 82 | { |
91 | vector_add4_rev32(p->coeffsA, p->adaptcoeffsA); | 83 | /* Predictor Y */ |
92 | vector_add5_rev32(p->coeffsB, p->adaptcoeffsB); | 84 | p->buf[YDELAYA] = p->YlastA; |
93 | } | 85 | p->buf[YADAPTCOEFFSA] = SIGN(p->buf[YDELAYA]); |
94 | 86 | ||
95 | p->delayA++; | 87 | p->buf[YDELAYA-1] = p->buf[YDELAYA] - p->buf[YDELAYA-1]; |
96 | p->delayB++; | 88 | p->buf[YADAPTCOEFFSA-1] = SIGN(p->buf[YDELAYA-1]); |
97 | p->adaptcoeffsA++; | 89 | |
98 | p->adaptcoeffsB++; | 90 | predictionA = (p->buf[YDELAYA] * p->YcoeffsA[0]) + |
99 | 91 | (p->buf[YDELAYA-1] * p->YcoeffsA[1]) + | |
100 | /* Have we filled the history buffer? */ | 92 | (p->buf[YDELAYA-2] * p->YcoeffsA[2]) + |
101 | if (p->delayA == p->historybuffer + HISTORY_SIZE + (PREDICTOR_ORDER*4)) { | 93 | (p->buf[YDELAYA-3] * p->YcoeffsA[3]); |
102 | memmove(p->historybuffer, p->delayA - (PREDICTOR_ORDER*4), | 94 | |
103 | (PREDICTOR_ORDER*4) * sizeof(int32_t)); | 95 | /* Apply a scaled first-order filter compression */ |
104 | p->delayA = p->historybuffer + PREDICTOR_ORDER*4; | 96 | p->buf[YDELAYB] = p->XfilterA - ((p->YfilterB * 31) >> 5); |
105 | p->delayB = p->historybuffer + PREDICTOR_ORDER*3; | 97 | p->buf[YADAPTCOEFFSB] = SIGN(p->buf[YDELAYB]); |
106 | p->adaptcoeffsA = p->historybuffer + PREDICTOR_ORDER*2; | 98 | p->YfilterB = p->XfilterA; |
107 | p->adaptcoeffsB = p->historybuffer + PREDICTOR_ORDER; | 99 | |
108 | } | 100 | p->buf[YDELAYB-1] = p->buf[YDELAYB] - p->buf[YDELAYB-1]; |
101 | p->buf[YADAPTCOEFFSB-1] = SIGN(p->buf[YDELAYB-1]); | ||
102 | |||
103 | predictionB = (p->buf[YDELAYB] * p->YcoeffsB[0]) + | ||
104 | (p->buf[YDELAYB-1] * p->YcoeffsB[1]) + | ||
105 | (p->buf[YDELAYB-2] * p->YcoeffsB[2]) + | ||
106 | (p->buf[YDELAYB-3] * p->YcoeffsB[3]) + | ||
107 | (p->buf[YDELAYB-4] * p->YcoeffsB[4]); | ||
108 | |||
109 | p->YlastA = *decoded0 + ((predictionA + (predictionB >> 1)) >> 10); | ||
110 | p->YfilterA = p->YlastA + ((p->YfilterA * 31) >> 5); | ||
111 | |||
112 | /* Predictor X */ | ||
113 | |||
114 | p->buf[XDELAYA] = p->XlastA; | ||
115 | p->buf[XADAPTCOEFFSA] = SIGN(p->buf[XDELAYA]); | ||
116 | p->buf[XDELAYA-1] = p->buf[XDELAYA] - p->buf[XDELAYA-1]; | ||
117 | p->buf[XADAPTCOEFFSA-1] = SIGN(p->buf[XDELAYA-1]); | ||
118 | |||
119 | predictionA = (p->buf[XDELAYA] * p->XcoeffsA[0]) + | ||
120 | (p->buf[XDELAYA-1] * p->XcoeffsA[1]) + | ||
121 | (p->buf[XDELAYA-2] * p->XcoeffsA[2]) + | ||
122 | (p->buf[XDELAYA-3] * p->XcoeffsA[3]); | ||
123 | |||
124 | /* Apply a scaled first-order filter compression */ | ||
125 | p->buf[XDELAYB] = p->YfilterA - ((p->XfilterB * 31) >> 5); | ||
126 | p->buf[XADAPTCOEFFSB] = SIGN(p->buf[XDELAYB]); | ||
127 | p->XfilterB = p->YfilterA; | ||
128 | p->buf[XDELAYB-1] = p->buf[XDELAYB] - p->buf[XDELAYB-1]; | ||
129 | p->buf[XADAPTCOEFFSB-1] = SIGN(p->buf[XDELAYB-1]); | ||
130 | |||
131 | predictionB = (p->buf[XDELAYB] * p->XcoeffsB[0]) + | ||
132 | (p->buf[XDELAYB-1] * p->XcoeffsB[1]) + | ||
133 | (p->buf[XDELAYB-2] * p->XcoeffsB[2]) + | ||
134 | (p->buf[XDELAYB-3] * p->XcoeffsB[3]) + | ||
135 | (p->buf[XDELAYB-4] * p->XcoeffsB[4]); | ||
136 | |||
137 | p->XlastA = *decoded1 + ((predictionA + (predictionB >> 1)) >> 10); | ||
138 | p->XfilterA = p->XlastA + ((p->XfilterA * 31) >> 5); | ||
139 | |||
140 | if (*decoded0 > 0) | ||
141 | { | ||
142 | p->YcoeffsA[0] -= p->buf[YADAPTCOEFFSA]; | ||
143 | p->YcoeffsA[1] -= p->buf[YADAPTCOEFFSA-1]; | ||
144 | p->YcoeffsA[2] -= p->buf[YADAPTCOEFFSA-2]; | ||
145 | p->YcoeffsA[3] -= p->buf[YADAPTCOEFFSA-3]; | ||
146 | |||
147 | p->YcoeffsB[0] -= p->buf[YADAPTCOEFFSB]; | ||
148 | p->YcoeffsB[1] -= p->buf[YADAPTCOEFFSB-1]; | ||
149 | p->YcoeffsB[2] -= p->buf[YADAPTCOEFFSB-2]; | ||
150 | p->YcoeffsB[3] -= p->buf[YADAPTCOEFFSB-3]; | ||
151 | p->YcoeffsB[4] -= p->buf[YADAPTCOEFFSB-4]; | ||
152 | } | ||
153 | else if (*decoded0 < 0) | ||
154 | { | ||
155 | p->YcoeffsA[0] += p->buf[YADAPTCOEFFSA]; | ||
156 | p->YcoeffsA[1] += p->buf[YADAPTCOEFFSA-1]; | ||
157 | p->YcoeffsA[2] += p->buf[YADAPTCOEFFSA-2]; | ||
158 | p->YcoeffsA[3] += p->buf[YADAPTCOEFFSA-3]; | ||
159 | |||
160 | p->YcoeffsB[0] += p->buf[YADAPTCOEFFSB]; | ||
161 | p->YcoeffsB[1] += p->buf[YADAPTCOEFFSB-1]; | ||
162 | p->YcoeffsB[2] += p->buf[YADAPTCOEFFSB-2]; | ||
163 | p->YcoeffsB[3] += p->buf[YADAPTCOEFFSB-3]; | ||
164 | p->YcoeffsB[4] += p->buf[YADAPTCOEFFSB-4]; | ||
165 | } | ||
109 | 166 | ||
110 | p->lastA = currentA; | 167 | *(decoded0++) = p->YfilterA; |
111 | p->filterA = currentA + ((p->filterA * 31) >> 5); | ||
112 | 168 | ||
113 | return p->filterA; | 169 | if (*decoded1 > 0) |
114 | } | 170 | { |
115 | 171 | p->XcoeffsA[0] -= p->buf[XADAPTCOEFFSA]; | |
116 | static int32_t X; | 172 | p->XcoeffsA[1] -= p->buf[XADAPTCOEFFSA-1]; |
173 | p->XcoeffsA[2] -= p->buf[XADAPTCOEFFSA-2]; | ||
174 | p->XcoeffsA[3] -= p->buf[XADAPTCOEFFSA-3]; | ||
175 | |||
176 | p->XcoeffsB[0] -= p->buf[XADAPTCOEFFSB]; | ||
177 | p->XcoeffsB[1] -= p->buf[XADAPTCOEFFSB-1]; | ||
178 | p->XcoeffsB[2] -= p->buf[XADAPTCOEFFSB-2]; | ||
179 | p->XcoeffsB[3] -= p->buf[XADAPTCOEFFSB-3]; | ||
180 | p->XcoeffsB[4] -= p->buf[XADAPTCOEFFSB-4]; | ||
181 | } | ||
182 | else if (*decoded1 < 0) | ||
183 | { | ||
184 | p->XcoeffsA[0] += p->buf[XADAPTCOEFFSA]; | ||
185 | p->XcoeffsA[1] += p->buf[XADAPTCOEFFSA-1]; | ||
186 | p->XcoeffsA[2] += p->buf[XADAPTCOEFFSA-2]; | ||
187 | p->XcoeffsA[3] += p->buf[XADAPTCOEFFSA-3]; | ||
188 | |||
189 | p->XcoeffsB[0] += p->buf[XADAPTCOEFFSB]; | ||
190 | p->XcoeffsB[1] += p->buf[XADAPTCOEFFSB-1]; | ||
191 | p->XcoeffsB[2] += p->buf[XADAPTCOEFFSB-2]; | ||
192 | p->XcoeffsB[3] += p->buf[XADAPTCOEFFSB-3]; | ||
193 | p->XcoeffsB[4] += p->buf[XADAPTCOEFFSB-4]; | ||
194 | } | ||
117 | 195 | ||
118 | void init_predictor_decoder(struct ape_ctx_t* ape_ctx) | 196 | *(decoded1++) = p->XfilterA; |
119 | { | ||
120 | X = 0; | ||
121 | 197 | ||
122 | init_predictor(&ape_ctx->predictorY); | 198 | /* Combined */ |
123 | init_predictor(&ape_ctx->predictorX); | 199 | p->buf++; |
124 | } | ||
125 | 200 | ||
126 | int predictor_decode_stereo(struct ape_ctx_t* ape_ctx, int32_t* decoded0, int32_t* decoded1, int count) ICODE_ATTR; | 201 | /* Have we filled the history buffer? */ |
127 | int predictor_decode_stereo(struct ape_ctx_t* ape_ctx, int32_t* decoded0, int32_t* decoded1, int count) | 202 | if (p->buf == p->historybuffer + HISTORY_SIZE) { |
128 | { | 203 | memmove(p->historybuffer, p->buf, |
129 | while (count--) | 204 | PREDICTOR_SIZE * sizeof(int32_t)); |
130 | { | 205 | p->buf = p->historybuffer; |
131 | *decoded0 = do_predictor_decode(&ape_ctx->predictorY, *decoded0, X); | 206 | } |
132 | X = do_predictor_decode(&ape_ctx->predictorX, *decoded1, *(decoded0)++); | ||
133 | *(decoded1++) = X; | ||
134 | } | 207 | } |
135 | 208 | ||
136 | return 0; | 209 | return 0; |
137 | } | 210 | } |
138 | 211 | ||
139 | int predictor_decode_mono(struct ape_ctx_t* ape_ctx, int32_t* decoded0, int count) | 212 | int predictor_decode_mono(struct predictor_t* p, int32_t* decoded0, int count) |
140 | { | 213 | { |
141 | struct predictor_t* p = &ape_ctx->predictorY; | ||
142 | int32_t predictionA, currentA, A; | 214 | int32_t predictionA, currentA, A; |
143 | 215 | ||
144 | currentA = p->lastA; | 216 | currentA = p->YlastA; |
145 | 217 | ||
146 | while (count--) | 218 | while (count--) |
147 | { | 219 | { |
148 | A = *decoded0; | 220 | A = *decoded0; |
149 | 221 | ||
150 | p->delayA[0] = currentA; | 222 | p->buf[YDELAYA] = currentA; |
151 | p->delayA[-1] = p->delayA[0] - p->delayA[-1]; | 223 | p->buf[YDELAYA-1] = p->buf[YDELAYA] - p->buf[YDELAYA-1]; |
152 | 224 | ||
153 | predictionA = (p->delayA[0] * p->coeffsA[0]) + | 225 | predictionA = (p->buf[YDELAYA] * p->YcoeffsA[0]) + |
154 | (p->delayA[-1] * p->coeffsA[1]) + | 226 | (p->buf[YDELAYA-1] * p->YcoeffsA[1]) + |
155 | (p->delayA[-2] * p->coeffsA[2]) + | 227 | (p->buf[YDELAYA-2] * p->YcoeffsA[2]) + |
156 | (p->delayA[-3] * p->coeffsA[3]); | 228 | (p->buf[YDELAYA-3] * p->YcoeffsA[3]); |
157 | 229 | ||
158 | currentA = A + (predictionA >> 10); | 230 | currentA = A + (predictionA >> 10); |
159 | 231 | ||
160 | p->adaptcoeffsA[0] = SIGN(p->delayA[0]); | 232 | p->buf[YADAPTCOEFFSA] = SIGN(p->buf[YDELAYA]); |
161 | p->adaptcoeffsA[-1] = SIGN(p->delayA[-1]); | 233 | p->buf[YADAPTCOEFFSA-1] = SIGN(p->buf[YDELAYA-1]); |
162 | 234 | ||
163 | if (A > 0) | 235 | if (A > 0) |
164 | { | 236 | { |
165 | p->coeffsA[0] -= p->adaptcoeffsA[0]; | 237 | p->YcoeffsA[0] -= p->buf[YADAPTCOEFFSA]; |
166 | p->coeffsA[1] -= p->adaptcoeffsA[-1]; | 238 | p->YcoeffsA[1] -= p->buf[YADAPTCOEFFSA-1]; |
167 | p->coeffsA[2] -= p->adaptcoeffsA[-2]; | 239 | p->YcoeffsA[2] -= p->buf[YADAPTCOEFFSA-2]; |
168 | p->coeffsA[3] -= p->adaptcoeffsA[-3]; | 240 | p->YcoeffsA[3] -= p->buf[YADAPTCOEFFSA-3]; |
169 | } | 241 | } |
170 | else if (A < 0) | 242 | else if (A < 0) |
171 | { | 243 | { |
172 | p->coeffsA[0] += p->adaptcoeffsA[0]; | 244 | p->YcoeffsA[0] += p->buf[YADAPTCOEFFSA]; |
173 | p->coeffsA[1] += p->adaptcoeffsA[-1]; | 245 | p->YcoeffsA[1] += p->buf[YADAPTCOEFFSA-1]; |
174 | p->coeffsA[2] += p->adaptcoeffsA[-2]; | 246 | p->YcoeffsA[2] += p->buf[YADAPTCOEFFSA-2]; |
175 | p->coeffsA[3] += p->adaptcoeffsA[-3]; | 247 | p->YcoeffsA[3] += p->buf[YADAPTCOEFFSA-3]; |
176 | } | 248 | } |
177 | 249 | ||
178 | p->delayA++; | 250 | p->buf++; |
179 | p->adaptcoeffsA++; | ||
180 | 251 | ||
181 | /* Have we filled the history buffer? */ | 252 | /* Have we filled the history buffer? */ |
182 | if (p->delayA == p->historybuffer + HISTORY_SIZE + (PREDICTOR_ORDER*4)) { | 253 | if (p->buf == p->historybuffer + HISTORY_SIZE) { |
183 | memmove(p->historybuffer, p->delayA - (PREDICTOR_ORDER*4), | 254 | memmove(p->historybuffer, p->buf, |
184 | (PREDICTOR_ORDER*4) * sizeof(int32_t)); | 255 | PREDICTOR_SIZE * sizeof(int32_t)); |
185 | p->delayA = p->historybuffer + PREDICTOR_ORDER*4; | 256 | p->buf = p->historybuffer; |
186 | p->adaptcoeffsA = p->historybuffer + PREDICTOR_ORDER*2; | ||
187 | } | 257 | } |
188 | 258 | ||
189 | p->filterA = currentA + ((p->filterA * 31) >> 5); | 259 | p->YfilterA = currentA + ((p->YfilterA * 31) >> 5); |
190 | *(decoded0++) = p->filterA; | 260 | *(decoded0++) = p->YfilterA; |
191 | } | 261 | } |
192 | 262 | ||
193 | p->lastA = currentA; | 263 | p->YlastA = currentA; |
194 | 264 | ||
195 | return 0; | 265 | return 0; |
196 | } | 266 | } |