summaryrefslogtreecommitdiff
path: root/apps/codecs/lib/ffmpeg_bitstream.c
diff options
context:
space:
mode:
authorNils Wallménius <nils@rockbox.org>2010-07-15 16:19:17 +0000
committerNils Wallménius <nils@rockbox.org>2010-07-15 16:19:17 +0000
commita87c61854ef614b258ca7d4d0b40db017884e63e (patch)
tree4f0129350a8a2d25ee5e5d218aa787ae2dbbeca3 /apps/codecs/lib/ffmpeg_bitstream.c
parent328f2f9c285dd9ccec4ddabe4d64a508b0e498fa (diff)
downloadrockbox-a87c61854ef614b258ca7d4d0b40db017884e63e.tar.gz
rockbox-a87c61854ef614b258ca7d4d0b40db017884e63e.zip
Sync codeclib bitstream code with upstream ffmpeg code. Build ffmpeg_bitstream.c as a part of the codec lib. Use this codeclib implementation in libffmpegFLAC. Implement adapted version of the unaligned longword reading optimization for coldfire from the libwma version of this code. Speeds up cook decoding by 2-3% on h300 and flac by 25% on h300, also speeds up flac decoding by 2% on c200 (decoding speed of cook on c200 is unchanged).
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27430 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps/codecs/lib/ffmpeg_bitstream.c')
-rw-r--r--apps/codecs/lib/ffmpeg_bitstream.c321
1 files changed, 205 insertions, 116 deletions
diff --git a/apps/codecs/lib/ffmpeg_bitstream.c b/apps/codecs/lib/ffmpeg_bitstream.c
index 88e3cbfe3a..c879661c47 100644
--- a/apps/codecs/lib/ffmpeg_bitstream.c
+++ b/apps/codecs/lib/ffmpeg_bitstream.c
@@ -2,6 +2,7 @@
2 * Common bit i/o utils 2 * Common bit i/o utils
3 * Copyright (c) 2000, 2001 Fabrice Bellard 3 * Copyright (c) 2000, 2001 Fabrice Bellard
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> 4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5 * Copyright (c) 2010 Loren Merritt
5 * 6 *
6 * alternative bitstream reader & writer by Michael Niedermayer <michaelni@gmx.at> 7 * alternative bitstream reader & writer by Michael Niedermayer <michaelni@gmx.at>
7 * 8 *
@@ -22,7 +23,16 @@
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 */ 24 */
24 25
25#include "ffmpeg_bitstream.h" 26/**
27 * @file
28 * bitstream api.
29 */
30
31//#include "avcodec.h"
32#include "ffmpeg_get_bits.h"
33//#include "put_bits.h"
34
35#define av_log(...)
26 36
27#ifdef ROCKBOX 37#ifdef ROCKBOX
28#undef DEBUGF 38#undef DEBUGF
@@ -36,27 +46,47 @@ const uint8_t ff_log2_run[32]={
36 8, 9,10,11,12,13,14,15 46 8, 9,10,11,12,13,14,15
37}; 47};
38 48
39/** 49#if 0 // unused in rockbox
40 * Same as av_mallocz_static(), but does a realloc. 50void align_put_bits(PutBitContext *s)
41 * 51{
42 * @param[in] ptr The block of memory to reallocate. 52#ifdef ALT_BITSTREAM_WRITER
43 * @param[in] size The requested size. 53 put_bits(s,( - s->index) & 7,0);
44 * @return Block of memory of requested size. 54#else
45 * @deprecated. Code which uses ff_realloc_static is broken/misdesigned 55 put_bits(s,s->bit_left & 7,0);
46 * and should correctly use static arrays 56#endif
47 */ 57}
48
49 58
50void ff_put_string(PutBitContext * pbc, const char *s, int put_zero) 59void ff_put_string(PutBitContext *pb, const char *string, int terminate_string)
51{ 60{
52 while(*s){ 61 while(*string){
53 put_bits(pbc, 8, *s); 62 put_bits(pb, 8, *string);
54 s++; 63 string++;
55 } 64 }
56 if(put_zero) 65 if(terminate_string)
57 put_bits(pbc, 8, 0); 66 put_bits(pb, 8, 0);
58} 67}
59 68
69void ff_copy_bits(PutBitContext *pb, const uint8_t *src, int length)
70{
71 int words= length>>4;
72 int bits= length&15;
73 int i;
74
75 if(length==0) return;
76
77 if(CONFIG_SMALL || words < 16 || put_bits_count(pb)&7){
78 for(i=0; i<words; i++) put_bits(pb, 16, AV_RB16(src + 2*i));
79 }else{
80 for(i=0; put_bits_count(pb)&31; i++)
81 put_bits(pb, 8, src[i]);
82 flush_put_bits(pb);
83 memcpy(put_bits_ptr(pb), src+i, 2*words-i);
84 skip_put_bytes(pb, 2*words-i);
85 }
86
87 put_bits(pb, bits, AV_RB16(src + 2*words)>>(16-bits));
88}
89#endif
60/* VLC decoding */ 90/* VLC decoding */
61 91
62//#define DEBUG_VLC 92//#define DEBUG_VLC
@@ -84,118 +114,143 @@ static int alloc_table(VLC *vlc, int size, int use_static)
84 index = vlc->table_size; 114 index = vlc->table_size;
85 vlc->table_size += size; 115 vlc->table_size += size;
86 if (vlc->table_size > vlc->table_allocated) { 116 if (vlc->table_size > vlc->table_allocated) {
87 if(use_static>1){ 117 if(use_static)
118 {
88 DEBUGF("init_vlc() used with too little memory : table_size > allocated_memory\n"); 119 DEBUGF("init_vlc() used with too little memory : table_size > allocated_memory\n");
120 return -1;
89 } 121 }
90 122// abort(); //cant do anything, init_vlc() is used with too little memory
123// vlc->table_allocated += (1 << vlc->bits);
124// vlc->table = av_realloc(vlc->table,
125// sizeof(VLC_TYPE) * 2 * vlc->table_allocated);
91 if (!vlc->table) 126 if (!vlc->table)
92 return -1; 127 return -1;
93 } 128 }
94 return index; 129 return index;
95} 130}
96 131
97static int build_table(VLC *vlc, int table_nb_bits, 132/*
98 int nb_codes, 133static av_always_inline uint32_t bitswap_32(uint32_t x) {
99 const void *bits, int bits_wrap, int bits_size, 134 return av_reverse[x&0xFF]<<24
100 const void *codes, int codes_wrap, int codes_size, 135 | av_reverse[(x>>8)&0xFF]<<16
101 const void *symbols, int symbols_wrap, int symbols_size, 136 | av_reverse[(x>>16)&0xFF]<<8
102 uint32_t code_prefix, int n_prefix, int flags) 137 | av_reverse[x>>24];
103{ 138}
104 int i, j, k, n, table_size, table_index, nb, n1, index, code_prefix2, symbol; 139*/
140
141typedef struct {
142 uint8_t bits;
143 uint16_t symbol;
144 /** codeword, with the first bit-to-be-read in the msb
145 * (even if intended for a little-endian bitstream reader) */
105 uint32_t code; 146 uint32_t code;
147} VLCcode;
148
149static int compare_vlcspec(const void *a, const void *b)
150{
151 const VLCcode *sa=a, *sb=b;
152 return (sa->code >> 1) - (sb->code >> 1);
153}
154
155/**
156 * Build VLC decoding tables suitable for use with get_vlc().
157 *
158 * @param vlc the context to be initted
159 *
160 * @param table_nb_bits max length of vlc codes to store directly in this table
161 * (Longer codes are delegated to subtables.)
162 *
163 * @param nb_codes number of elements in codes[]
164 *
165 * @param codes descriptions of the vlc codes
166 * These must be ordered such that codes going into the same subtable are contiguous.
167 * Sorting by VLCcode.code is sufficient, though not necessary.
168 */
169static int build_table(VLC *vlc, int table_nb_bits, int nb_codes,
170 VLCcode *codes, int flags)
171{
172 int table_size, table_index, index, symbol, subtable_bits;
173 int i, j, k, n, nb, inc;
174 uint32_t code, code_prefix;
106 VLC_TYPE (*table)[2]; 175 VLC_TYPE (*table)[2];
107 176
108 table_size = 1 << table_nb_bits; 177 table_size = 1 << table_nb_bits;
109 table_index = alloc_table(vlc, table_size, flags & (INIT_VLC_USE_STATIC|INIT_VLC_USE_NEW_STATIC)); 178 table_index = alloc_table(vlc, table_size, flags & INIT_VLC_USE_NEW_STATIC);
110#ifdef DEBUG_VLC 179#ifdef DEBUG_VLC
111 DEBUGF("new table index=%d size=%d code_prefix=%x n=%d\n", 180 av_log(NULL,AV_LOG_DEBUG,"new table index=%d size=%d\n",
112 table_index, table_size, code_prefix, n_prefix); 181 table_index, table_size);
113#endif 182#endif
114 if (table_index < 0) 183 if (table_index < 0)
115 return -1; 184 return -1;
116 table = &vlc->table[table_index]; 185 table = &vlc->table[table_index];
117 186
118 for(i=0;i<table_size;i++) { 187 for (i = 0; i < table_size; i++) {
119 table[i][1] = 0; //bits 188 table[i][1] = 0; //bits
120 table[i][0] = -1; //codes 189 table[i][0] = -1; //codes
121 } 190 }
122 191
123 /* first pass: map codes and compute auxillary table sizes */ 192 /* first pass: map codes and compute auxillary table sizes */
124 for(i=0;i<nb_codes;i++) { 193 for (i = 0; i < nb_codes; i++) {
125 GET_DATA(n, bits, i, bits_wrap, bits_size); 194 n = codes[i].bits;
126 GET_DATA(code, codes, i, codes_wrap, codes_size); 195 code = codes[i].code;
127 /* we accept tables with holes */ 196 symbol = codes[i].symbol;
128 if (n <= 0)
129 continue;
130 if (!symbols)
131 symbol = i;
132 else
133 GET_DATA(symbol, symbols, i, symbols_wrap, symbols_size);
134#if defined(DEBUG_VLC) && 0 197#if defined(DEBUG_VLC) && 0
135 DEBUGF("i=%d n=%d code=0x%x\n", i, n, code); 198 av_log(NULL,AV_LOG_DEBUG,"i=%d n=%d code=0x%x\n", i, n, code);
136#endif 199#endif
137 /* if code matches the prefix, it is in the table */ 200 if (n <= table_nb_bits) {
138 n -= n_prefix; 201 /* no need to add another table */
139 if(flags & INIT_VLC_LE) 202 j = code >> (32 - table_nb_bits);
140 code_prefix2= code & (n_prefix>=32 ? (int)0xffffffff : (1 << n_prefix)-1); 203 nb = 1 << (table_nb_bits - n);
141 else 204 inc = 1;
142 code_prefix2= code >> n; 205/* if (flags & INIT_VLC_LE) {
143 if (n > 0 && code_prefix2 == (int)code_prefix) { 206 j = bitswap_32(code);
144 if (n <= table_nb_bits) { 207 inc = 1 << n;
145 /* no need to add another table */ 208 } */
146 j = (code << (table_nb_bits - n)) & (table_size - 1); 209 for (k = 0; k < nb; k++) {
147 nb = 1 << (table_nb_bits - n);
148 for(k=0;k<nb;k++) {
149 if(flags & INIT_VLC_LE)
150 j = (code >> n_prefix) + (k<<n);
151#ifdef DEBUG_VLC 210#ifdef DEBUG_VLC
152 DEBUGF("%4x: code=%d n=%d\n", 211 av_log(NULL, AV_LOG_DEBUG, "%4x: code=%d n=%d\n",
153 j, i, n); 212 j, i, n);
154#endif 213#endif
155 if (table[j][1] /*bits*/ != 0) { 214 if (table[j][1] /*bits*/ != 0) {
156 DEBUGF("incorrect codes\n"); 215 av_log(NULL, AV_LOG_ERROR, "incorrect codes\n");
157 return -1; 216 return -1;
158 }
159 table[j][1] = n; //bits
160 table[j][0] = symbol;
161 j++;
162 } 217 }
163 } else { 218 table[j][1] = n; //bits
164 n -= table_nb_bits; 219 table[j][0] = symbol;
165 j = (code >> ((flags & INIT_VLC_LE) ? n_prefix : n)) & ((1 << table_nb_bits) - 1); 220 j += inc;
166#ifdef DEBUG_VLC
167 DEBUGF("%4x: n=%d (subtable)\n",
168 j, n);
169#endif
170 /* compute table size */
171 n1 = -table[j][1]; //bits
172 if (n > n1)
173 n1 = n;
174 table[j][1] = -n1; //bits
175 } 221 }
176 } 222 } else {
177 } 223 /* fill auxiliary table recursively */
178 224 n -= table_nb_bits;
179 /* second pass : fill auxillary tables recursively */ 225 code_prefix = code >> (32 - table_nb_bits);
180 for(i=0;i<table_size;i++) { 226 subtable_bits = n;
181 n = table[i][1]; //bits 227 codes[i].bits = n;
182 if (n < 0) { 228 codes[i].code = code << table_nb_bits;
183 n = -n; 229 for (k = i+1; k < nb_codes; k++) {
184 if (n > table_nb_bits) { 230 n = codes[k].bits - table_nb_bits;
185 n = table_nb_bits; 231 if (n <= 0)
186 table[i][1] = -n; //bits 232 break;
233 code = codes[k].code;
234 if (code >> (32 - table_nb_bits) != code_prefix)
235 break;
236 codes[k].bits = n;
237 codes[k].code = code << table_nb_bits;
238 subtable_bits = FFMAX(subtable_bits, n);
187 } 239 }
188 index = build_table(vlc, n, nb_codes, 240 subtable_bits = FFMIN(subtable_bits, table_nb_bits);
189 bits, bits_wrap, bits_size, 241 j = /*(flags & INIT_VLC_LE) ? bitswap_32(code_prefix) >> (32 - table_nb_bits) :*/ code_prefix;
190 codes, codes_wrap, codes_size, 242 table[j][1] = -subtable_bits;
191 symbols, symbols_wrap, symbols_size, 243#ifdef DEBUG_VLC
192 (flags & INIT_VLC_LE) ? (code_prefix | (i << n_prefix)) : ((code_prefix << table_nb_bits) | i), 244 av_log(NULL,AV_LOG_DEBUG,"%4x: n=%d (subtable)\n",
193 n_prefix + table_nb_bits, flags); 245 j, codes[i].bits + table_nb_bits);
246#endif
247 index = build_table(vlc, subtable_bits, k-i, codes+i, flags);
194 if (index < 0) 248 if (index < 0)
195 return -1; 249 return -1;
196 /* note: realloc has been done, so reload tables */ 250 /* note: realloc has been done, so reload tables */
197 table = &vlc->table[table_index]; 251 table = &vlc->table[table_index];
198 table[i][0] = index; //code 252 table[j][0] = index; //code
253 i = k-1;
199 } 254 }
200 } 255 }
201 return table_index; 256 return table_index;
@@ -228,49 +283,83 @@ static int build_table(VLC *vlc, int table_nb_bits,
228 'use_static' should be set to 1 for tables, which should be freed 283 'use_static' should be set to 1 for tables, which should be freed
229 with av_free_static(), 0 if free_vlc() will be used. 284 with av_free_static(), 0 if free_vlc() will be used.
230*/ 285*/
286
287/* Rockbox: support for INIT_VLC_LE is currently disabled since none of our
288 codecs use it, there's a LUT based bit reverse function for this commented
289 out above (bitswap_32) and an inline asm version in libtremor/codebook.c
290 if we ever want this */
231int init_vlc_sparse(VLC *vlc, int nb_bits, int nb_codes, 291int init_vlc_sparse(VLC *vlc, int nb_bits, int nb_codes,
232 const void *bits, int bits_wrap, int bits_size, 292 const void *bits, int bits_wrap, int bits_size,
233 const void *codes, int codes_wrap, int codes_size, 293 const void *codes, int codes_wrap, int codes_size,
234 const void *symbols, int symbols_wrap, int symbols_size, 294 const void *symbols, int symbols_wrap, int symbols_size,
235 int flags) 295 int flags)
236{ 296{
297 VLCcode buf[nb_codes+1]; /* worst case from cook seems to be nb_codes == 607
298 which would make this about 4.8k... */
299 int i, j, ret;
300
237 vlc->bits = nb_bits; 301 vlc->bits = nb_bits;
238 if(flags & INIT_VLC_USE_NEW_STATIC){ 302 if(flags & INIT_VLC_USE_NEW_STATIC){
239 if(vlc->table_size && vlc->table_size == vlc->table_allocated){ 303 if(vlc->table_size && vlc->table_size == vlc->table_allocated){
240 return 0; 304 return 0;
241 }else if(vlc->table_size){ 305 }else if(vlc->table_size){
242 return -1; // fatal error, we are called on a partially initialized table 306 DEBUGF("fatal error, we are called on a partially initialized table\n");
307 return -1;
308// abort(); // fatal error, we are called on a partially initialized table
243 } 309 }
244 }else if(!(flags & INIT_VLC_USE_STATIC)) { 310 }else {
245 vlc->table = NULL; 311 vlc->table = NULL;
246 vlc->table_allocated = 0; 312 vlc->table_allocated = 0;
247 vlc->table_size = 0; 313 vlc->table_size = 0;
248 } else {
249 /* Static tables are initially always NULL, return
250 if vlc->table != NULL to avoid double allocation */
251 if(vlc->table)
252 return 0;
253 } 314 }
254 315
255#ifdef DEBUG_VLC 316#ifdef DEBUG_VLC
256 DEBUGF("build table nb_codes=%d\n", nb_codes); 317 av_log(NULL,AV_LOG_DEBUG,"build table nb_codes=%d\n", nb_codes);
257#endif 318#endif
258 319
259 if (build_table(vlc, nb_bits, nb_codes, 320// buf = av_malloc((nb_codes+1)*sizeof(VLCcode));
260 bits, bits_wrap, bits_size, 321
261 codes, codes_wrap, codes_size, 322// assert(symbols_size <= 2 || !symbols);
262 symbols, symbols_wrap, symbols_size, 323 j = 0;
263 0, 0, flags) < 0) { 324#define COPY(condition)\
264 //free(&vlc->table); 325 for (i = 0; i < nb_codes; i++) {\
326 GET_DATA(buf[j].bits, bits, i, bits_wrap, bits_size);\
327 if (!(condition))\
328 continue;\
329 GET_DATA(buf[j].code, codes, i, codes_wrap, codes_size);\
330/* if (flags & INIT_VLC_LE)*/\
331/* buf[j].code = bitswap_32(buf[j].code);*/\
332/* else*/\
333 buf[j].code <<= 32 - buf[j].bits;\
334 if (symbols)\
335 GET_DATA(buf[j].symbol, symbols, i, symbols_wrap, symbols_size)\
336 else\
337 buf[j].symbol = i;\
338 j++;\
339 }
340 COPY(buf[j].bits > nb_bits);
341 // qsort is the slowest part of init_vlc, and could probably be improved or avoided
342 qsort(buf, j, sizeof(VLCcode), compare_vlcspec);
343 COPY(buf[j].bits && buf[j].bits <= nb_bits);
344 nb_codes = j;
345
346 ret = build_table(vlc, nb_bits, nb_codes, buf, flags);
347
348// av_free(buf);
349 if (ret < 0) {
350// av_freep(&vlc->table);
265 return -1; 351 return -1;
266 } 352 }
267 /* Changed the following condition to be true if table_size > table_allocated. * 353 if((flags & INIT_VLC_USE_NEW_STATIC) && vlc->table_size != vlc->table_allocated) {
268 * This would be more sensible for static tables since we want warnings for * 354 av_log(NULL, AV_LOG_ERROR, "needed %d had %d\n", vlc->table_size, vlc->table_allocated);
269 * memory shortages only. */ 355 }
270#ifdef TEST
271 if((flags & INIT_VLC_USE_NEW_STATIC) && vlc->table_size > vlc->table_allocated)
272 DEBUGF("needed %d had %d\n", vlc->table_size, vlc->table_allocated);
273#endif
274 return 0; 356 return 0;
275} 357}
276 358
359/* not used in rockbox
360void free_vlc(VLC *vlc)
361{
362 av_freep(&vlc->table);
363}
364*/
365