summaryrefslogtreecommitdiff
path: root/apps/codecs/libffmpegFLAC/shndec.c
diff options
context:
space:
mode:
authorDave Chapman <dave@dchapman.com>2006-02-07 22:16:35 +0000
committerDave Chapman <dave@dchapman.com>2006-02-07 22:16:35 +0000
commit8e46ab85a9a1c50589920897763ce53e593c9369 (patch)
tree0a0fe757659050c39d6b5160c22311915557abbd /apps/codecs/libffmpegFLAC/shndec.c
parent6099dc8b77e1b536ff47b4b74edf20f1fafda5b6 (diff)
downloadrockbox-8e46ab85a9a1c50589920897763ce53e593c9369.tar.gz
rockbox-8e46ab85a9a1c50589920897763ce53e593c9369.zip
Patch #1426489 - Shorten codec optimisations from Mark Arigo
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@8615 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs/libffmpegFLAC/shndec.c')
-rw-r--r--apps/codecs/libffmpegFLAC/shndec.c190
1 files changed, 142 insertions, 48 deletions
diff --git a/apps/codecs/libffmpegFLAC/shndec.c b/apps/codecs/libffmpegFLAC/shndec.c
index 6dde8f7a70..d7fc6a15a6 100644
--- a/apps/codecs/libffmpegFLAC/shndec.c
+++ b/apps/codecs/libffmpegFLAC/shndec.c
@@ -28,12 +28,6 @@
28#include "golomb.h" 28#include "golomb.h"
29#include "shndec.h" 29#include "shndec.h"
30 30
31/* These seem reasonable from my test files.
32 Does MAX_HEADER_SIZE really need to be 16384? */
33#define MAX_PRED_ORDER 16
34#define MAX_HEADER_SIZE DEFAULT_BLOCK_SIZE*4
35//#define MAX_HEADER_SIZE 16384
36
37#define ULONGSIZE 2 31#define ULONGSIZE 2
38 32
39#define WAVE_FORMAT_PCM 0x0001 33#define WAVE_FORMAT_PCM 0x0001
@@ -54,16 +48,6 @@
54#define V2LPCQOFFSET (1 << LPCQUANT) 48#define V2LPCQOFFSET (1 << LPCQUANT)
55 49
56#define FNSIZE 2 50#define FNSIZE 2
57#define FN_DIFF0 0
58#define FN_DIFF1 1
59#define FN_DIFF2 2
60#define FN_DIFF3 3
61#define FN_QUIT 4
62#define FN_BLOCKSIZE 5
63#define FN_BITSHIFT 6
64#define FN_QLPC 7
65#define FN_ZERO 8
66#define FN_VERBATIM 9
67 51
68#define VERBATIM_CKSIZE_SIZE 5 52#define VERBATIM_CKSIZE_SIZE 5
69#define VERBATIM_BYTE_SIZE 8 53#define VERBATIM_BYTE_SIZE 8
@@ -76,22 +60,21 @@
76#define get_le16(gb) bswap_16(get_bits_long(gb, 16)) 60#define get_le16(gb) bswap_16(get_bits_long(gb, 16))
77#define get_le32(gb) bswap_32(get_bits_long(gb, 32)) 61#define get_le32(gb) bswap_32(get_bits_long(gb, 32))
78 62
79static inline uint32_t bswap_32(uint32_t x){ 63static uint32_t bswap_32(uint32_t x){
80 x= ((x<<8)&0xFF00FF00) | ((x>>8)&0x00FF00FF); 64 x= ((x<<8)&0xFF00FF00) | ((x>>8)&0x00FF00FF);
81 return (x>>16) | (x<<16); 65 return (x>>16) | (x<<16);
82} 66}
83 67
84static inline uint16_t bswap_16(uint16_t x){ 68static uint16_t bswap_16(uint16_t x){
85 return (x>>8) | (x<<8); 69 return (x>>8) | (x<<8);
86} 70}
87 71
88/* converts fourcc string to int */ 72/* converts fourcc string to int */
89static inline int ff_get_fourcc(const char *s){ 73static int ff_get_fourcc(const char *s){
90 //assert( strlen(s)==4 ); 74 //assert( strlen(s)==4 );
91 return (s[0]) + (s[1]<<8) + (s[2]<<16) + (s[3]<<24); 75 return (s[0]) + (s[1]<<8) + (s[2]<<16) + (s[3]<<24);
92} 76}
93 77
94static unsigned int get_uint(ShortenContext *s, int k) ICODE_ATTR;
95static unsigned int get_uint(ShortenContext *s, int k) 78static unsigned int get_uint(ShortenContext *s, int k)
96{ 79{
97 if (s->version != 0) 80 if (s->version != 0)
@@ -99,10 +82,77 @@ static unsigned int get_uint(ShortenContext *s, int k)
99 return get_ur_golomb_shorten(&s->gb, k); 82 return get_ur_golomb_shorten(&s->gb, k);
100} 83}
101 84
102static void decode_subframe_lpc(ShortenContext *s, int32_t *decoded, 85#if defined(CPU_COLDFIRE) && !defined(SIMULATOR)
103 int residual_size, int pred_order) ICODE_ATTR; 86static void coldfire_lshift_samples(int n, int shift, int32_t *samples) ICODE_ATTR;
104static void decode_subframe_lpc(ShortenContext *s, int32_t *decoded, 87static void coldfire_lshift_samples(int n, int shift, int32_t *samples)
105 int residual_size, int pred_order) 88{
89/*
90 for (i = 0; i < n; i++)
91 samples[i] =<< shift;
92*/
93 asm volatile (
94 "move.l %[n], %%d0 \n" /* d0 = loop counter */
95 "asr.l #2, %%d0 \n"
96 "beq l1_shift \n"
97 "l2_shift:" /* main loop (unroll by 4) */
98 "movem.l (%[x]), %%d4-%%d7 \n"
99 "asl.l %[s], %%d4 \n"
100 "asl.l %[s], %%d5 \n"
101 "asl.l %[s], %%d6 \n"
102 "asl.l %[s], %%d7 \n"
103 "movem.l %%d4-%%d7, (%[x]) \n"
104 "add.l #16, %[x] \n"
105
106 "subq.l #1, %%d0 \n"
107 "bne l2_shift \n"
108 "l1_shift:" /* any loops left? */
109 "and.l #3, %[n] \n"
110 "beq l4_shift \n"
111 "l3_shift:" /* remaining loops */
112 "move.l (%[x]), %%d4 \n"
113 "asl.l %[s], %%d4 \n"
114 "move.l %%d4, (%[x])+ \n"
115
116 "subq.l #1, %[n] \n"
117 "bne l3_shift \n"
118 "l4_shift:" /* exit */
119 : [n] "+d" (n), /* d1 */
120 [s] "+d" (shift), /* d2 */
121 [x] "+a" (samples) /* a0 */
122 :
123 : "%d0", "%d4", "%d5", "%d6", "%d7"
124 );
125}
126#endif
127
128static inline void fix_bitshift(ShortenContext *s, int32_t *samples)
129{
130 int i;
131
132 /* Wrapped samples don't get bitshifted, so we'll do them during
133 the next iteration. */
134 if (s->bitshift != 0) {
135#if defined(CPU_COLDFIRE) && !defined(SIMULATOR)
136 coldfire_lshift_samples(s->blocksize, s->bitshift, samples - s->nwrap);
137#else
138 for (i = -s->nwrap; i < (s->blocksize - s->nwrap); i++)
139 samples[i] <<= s->bitshift;
140#endif
141 }
142
143 /* Also, when we have to remember to fix the wrapped samples when
144 the bitshift changes.*/
145 if (s->bitshift != s->last_bitshift) {
146 if (s->last_bitshift != 0)
147 for (i = -s->nwrap; i < 0; i++)
148 samples[i] <<= s->last_bitshift;
149
150 s->last_bitshift = s->bitshift;
151 }
152}
153
154static inline void decode_subframe_lpc(ShortenContext *s, int32_t *decoded,
155 int residual_size, int pred_order)
106{ 156{
107 int sum, i, j; 157 int sum, i, j;
108 int coeffs[MAX_PRED_ORDER]; 158 int coeffs[MAX_PRED_ORDER];
@@ -121,18 +171,12 @@ static void decode_subframe_lpc(ShortenContext *s, int32_t *decoded,
121 } 171 }
122} 172}
123 173
124int shorten_decode_frame(ShortenContext *s, 174static inline int shorten_decode_frame(ShortenContext *s, int32_t *decoded,
125 int32_t *decoded, 175 int32_t *offset)
126 int32_t *offset,
127 uint8_t *buf,
128 int buf_size)
129{ 176{
130 int i; 177 int i;
131 int32_t sum; 178 int32_t sum;
132 179
133 init_get_bits(&s->gb, buf, buf_size*8);
134 get_bits(&s->gb, s->bitindex);
135
136 int cmd = get_ur_golomb_shorten(&s->gb, FNSIZE); 180 int cmd = get_ur_golomb_shorten(&s->gb, FNSIZE);
137 switch (cmd) { 181 switch (cmd) {
138 case FN_ZERO: 182 case FN_ZERO:
@@ -201,10 +245,6 @@ int shorten_decode_frame(ShortenContext *s,
201 case FN_QLPC: 245 case FN_QLPC:
202 { 246 {
203 int pred_order = get_ur_golomb_shorten(&s->gb, LPCQSIZE); 247 int pred_order = get_ur_golomb_shorten(&s->gb, LPCQSIZE);
204 if (pred_order > MAX_PRED_ORDER) {
205 return -2;
206 }
207
208 for (i=0; i<pred_order; i++) 248 for (i=0; i<pred_order; i++)
209 decoded[i - pred_order] -= coffset; 249 decoded[i - pred_order] -= coffset;
210 decode_subframe_lpc(s, decoded, residual_size, pred_order); 250 decode_subframe_lpc(s, decoded, residual_size, pred_order);
@@ -231,12 +271,7 @@ int shorten_decode_frame(ShortenContext *s,
231 } 271 }
232 } 272 }
233 273
234 for (i=-s->nwrap; i<0; i++) 274 fix_bitshift(s, decoded);
235 decoded[i] = decoded[i + s->blocksize];
236
237 int scale = s->bitshift + SHN_OUTPUT_DEPTH - s->bits_per_sample;
238 for (i = 0; i < s->blocksize; i++)
239 decoded[i] <<= scale;
240 break; 275 break;
241 } 276 }
242 277
@@ -244,29 +279,88 @@ int shorten_decode_frame(ShortenContext *s,
244 i = get_ur_golomb_shorten(&s->gb, VERBATIM_CKSIZE_SIZE); 279 i = get_ur_golomb_shorten(&s->gb, VERBATIM_CKSIZE_SIZE);
245 while (i--) 280 while (i--)
246 get_ur_golomb_shorten(&s->gb, VERBATIM_BYTE_SIZE); 281 get_ur_golomb_shorten(&s->gb, VERBATIM_BYTE_SIZE);
247 return 4;
248 break; 282 break;
249 283
250 case FN_BITSHIFT: 284 case FN_BITSHIFT:
251 s->bitshift = get_ur_golomb_shorten(&s->gb, BITSHIFTSIZE); 285 s->bitshift = get_ur_golomb_shorten(&s->gb, BITSHIFTSIZE);
252 return 3;
253 break; 286 break;
254 287
255 case FN_BLOCKSIZE: 288 case FN_BLOCKSIZE:
256 s->blocksize = get_uint(s, av_log2(s->blocksize)); 289 s->blocksize = get_uint(s, av_log2(s->blocksize));
257 return 2;
258 break; 290 break;
259 291
260 case FN_QUIT: 292 case FN_QUIT:
261 return 1;
262 break; 293 break;
263 294
264 default: 295 default:
265 return -1; 296 return FN_ERROR;
266 break; 297 break;
267 } 298 }
268 299
269 return 0; 300 return cmd;
301}
302
303int shorten_decode_frames(ShortenContext *s, int *nsamples,
304 int32_t *decoded0, int32_t *decoded1,
305 int32_t *offset0, int32_t *offset1,
306 uint8_t *buf, int buf_size,
307 void (*yield)(void))
308{
309 int32_t *decoded, *offset;
310 int cmd;
311
312 *nsamples = 0;
313
314 init_get_bits(&s->gb, buf, buf_size*8);
315 get_bits(&s->gb, s->bitindex);
316
317 int n = 0;
318 while (n < NUM_DEC_LOOPS) {
319 int chan = n%2;
320 if (chan == 0) {
321 decoded = decoded0 + s->nwrap + *nsamples;
322 offset = offset0;
323 } else {
324 decoded = decoded1 + s->nwrap + *nsamples;
325 offset = offset1;
326 }
327
328 yield();
329
330 cmd = shorten_decode_frame(s, decoded, offset);
331
332 if (cmd == FN_VERBATIM || cmd == FN_BITSHIFT || cmd == FN_BLOCKSIZE) {
333 continue;
334 } else if (cmd == FN_QUIT || cmd == FN_ERROR) {
335 break;
336 }
337
338 *nsamples += chan * s->blocksize;
339 n++;
340 }
341
342 if (*nsamples) {
343 /* Wrap the samples for the next loop */
344 int i;
345 for (i = 0; i < s->nwrap; i++) {
346 decoded0[i] = decoded0[*nsamples + i];
347 decoded1[i] = decoded1[*nsamples + i];
348 }
349
350 /* Scale the samples for the pcmbuf */
351 int scale = SHN_OUTPUT_DEPTH - s->bits_per_sample;
352#if defined(CPU_COLDFIRE) && !defined(SIMULATOR)
353 coldfire_lshift_samples(*nsamples, scale, decoded0 + s->nwrap);
354 coldfire_lshift_samples(*nsamples, scale, decoded1 + s->nwrap);
355#else
356 for (i = 0; i < *nsamples; i++) {
357 decoded0[i + s->nwrap] <<= scale;
358 decoded1[i + s->nwrap] <<= scale;
359 }
360#endif
361 }
362
363 return cmd;
270} 364}
271 365
272static int decode_wave_header(ShortenContext *s, 366static int decode_wave_header(ShortenContext *s,