summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave Chapman <dave@dchapman.com>2007-07-04 20:55:59 +0000
committerDave Chapman <dave@dchapman.com>2007-07-04 20:55:59 +0000
commitb081b94d170866dcea6dababbdc1c8dfb39d65a5 (patch)
tree8ee9241bc36190b166c66e820ab04f0f45e396ab
parent9b96d66621ce0a7266df2f232b60683248d532e8 (diff)
downloadrockbox-b081b94d170866dcea6dababbdc1c8dfb39d65a5.tar.gz
rockbox-b081b94d170866dcea6dababbdc1c8dfb39d65a5.zip
Add tag parsing to the ASF metadata parser. TODO: 1) Stress test to ensure it deals correctly with very long tags and the id3v2/v2 buffers becoming full. 2) Review the entire parser and attempt to reduce the binary size.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@13790 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/metadata/asf.c258
1 files changed, 180 insertions, 78 deletions
diff --git a/apps/metadata/asf.c b/apps/metadata/asf.c
index cae33ab375..3af23942d6 100644
--- a/apps/metadata/asf.c
+++ b/apps/metadata/asf.c
@@ -21,16 +21,16 @@
21#include <string.h> 21#include <string.h>
22#include <stdlib.h> 22#include <stdlib.h>
23#include <ctype.h> 23#include <ctype.h>
24#include <atoi.h>
24#include <inttypes.h> 25#include <inttypes.h>
25 26
26#include "id3.h" 27#include "id3.h"
27#include "debug.h" 28#include "debug.h"
28#include "rbunicode.h" 29#include "rbunicode.h"
29#include "metadata_common.h" 30#include "metadata_common.h"
31#include "system.h"
30#include <codecs/libwma/asf.h> 32#include <codecs/libwma/asf.h>
31 33
32static asf_waveformatex_t wfx;
33
34/* TODO: Just read the GUIDs into a 16-byte array, and use memcmp to compare */ 34/* TODO: Just read the GUIDs into a 16-byte array, and use memcmp to compare */
35struct guid_s { 35struct guid_s {
36 uint32_t v1; 36 uint32_t v1;
@@ -121,7 +121,105 @@ static void asf_read_object_header(asf_object_t *obj, int fd)
121 obj->datalen = 0; 121 obj->datalen = 0;
122} 122}
123 123
124static int asf_parse_header(int fd, struct mp3entry* id3) 124/* Parse an integer from the extended content object - we always
125 convert to an int, regardless of native format.
126*/
127static int asf_intdecode(int fd, int type, int length)
128{
129 uint16_t tmp16;
130 uint32_t tmp32;
131 uint64_t tmp64;
132
133 if (type==3) {
134 read_uint32le(fd, &tmp32);
135 lseek(fd,length - 4,SEEK_CUR);
136 return (int)tmp32;
137 } else if (type==4) {
138 read_uint32le(fd, &tmp64);
139 lseek(fd,length - 8,SEEK_CUR);
140 return (int)tmp64;
141 } else if (type == 5) {
142 read_uint16le(fd, &tmp16);
143 lseek(fd,length - 2,SEEK_CUR);
144 return (int)tmp16;
145 }
146
147 return 0;
148}
149
150/* Decode a LE utf16 string from a disk buffer into a fixed-sized
151 utf8 buffer.
152*/
153
154static void asf_utf16LEdecode(int fd,
155 uint16_t utf16bytes,
156 unsigned char **utf8,
157 int* utf8bytes
158 )
159{
160 unsigned long ucs;
161 int n;
162 unsigned char utf16buf[256];
163 unsigned char* utf16 = utf16buf;
164 unsigned char* newutf8;
165
166 n = read(fd, utf16buf, MIN(sizeof(utf16buf), utf16bytes));
167 utf16bytes -= n;
168
169 while (n > 0) {
170 /* Check for a surrogate pair */
171 if (utf16[1] >= 0xD8 && utf16[1] < 0xE0) {
172 if (n < 4) {
173 /* Run out of utf16 bytes, read some more */
174 utf16buf[0] = utf16[0];
175 utf16buf[1] = utf16[1];
176
177 n = read(fd, utf16buf + 2, MIN(sizeof(utf16buf)-2, utf16bytes));
178 utf16 = utf16buf;
179 utf16bytes -= n;
180 n += 2;
181 }
182
183 if (n < 4) {
184 /* Truncated utf16 string, abort */
185 break;
186 }
187 ucs = 0x10000 + ((utf16[0] << 10) | ((utf16[1] - 0xD8) << 18)
188 | utf16[2] | ((utf16[3] - 0xDC) << 8));
189 utf16 += 4;
190 n -= 4;
191 } else {
192 ucs = (utf16[0] | (utf16[1] << 8));
193 utf16 += 2;
194 n -= 2;
195 }
196
197 if (*utf8bytes > 6) {
198 newutf8 = utf8encode(ucs, *utf8);
199 *utf8bytes -= (newutf8 - *utf8);
200 *utf8 += (newutf8 - *utf8);
201 }
202
203 /* We have run out of utf16 bytes, read more if available */
204 if ((n == 0) && (utf16bytes > 0)) {
205 n = read(fd, utf16buf, MIN(sizeof(utf16buf), utf16bytes));
206 utf16 = utf16buf;
207 utf16bytes -= n;
208 }
209 }
210
211 *utf8[0] = 0;
212 --*utf8bytes;
213
214 if (utf16bytes > 0) {
215 /* Skip any remaining bytes */
216 lseek(fd, utf16bytes, SEEK_CUR);
217 }
218 return;
219}
220
221static int asf_parse_header(int fd, struct mp3entry* id3,
222 asf_waveformatex_t* wfx)
125{ 223{
126 asf_object_t current; 224 asf_object_t current;
127 asf_object_t header; 225 asf_object_t header;
@@ -129,14 +227,11 @@ static int asf_parse_header(int fd, struct mp3entry* id3)
129 int i; 227 int i;
130 int fileprop = 0; 228 int fileprop = 0;
131 uint64_t play_duration; 229 uint64_t play_duration;
132 uint64_t tmp64;
133 uint32_t tmp32;
134 uint16_t tmp16;
135 uint8_t tmp8;
136 uint16_t flags; 230 uint16_t flags;
137 uint32_t subobjects; 231 uint32_t subobjects;
138 uint8_t utf16buf[512];
139 uint8_t utf8buf[512]; 232 uint8_t utf8buf[512];
233 int id3buf_remaining = sizeof(id3->id3v2buf) + sizeof(id3->id3v1buf);
234 unsigned char* id3buf = (unsigned char*)id3->id3v2buf;
140 235
141 asf_read_object_header((asf_object_t *) &header, fd); 236 asf_read_object_header((asf_object_t *) &header, fd);
142 237
@@ -193,7 +288,7 @@ static int asf_parse_header(int fd, struct mp3entry* id3)
193 288
194 /* Read the packet size - uint32_t at offset 68 */ 289 /* Read the packet size - uint32_t at offset 68 */
195 lseek(fd, 20, SEEK_CUR); 290 lseek(fd, 20, SEEK_CUR);
196 read_uint32le(fd, &wfx.packet_size); 291 read_uint32le(fd, &wfx->packet_size);
197 292
198 /* Skip bytes remaining in object */ 293 /* Skip bytes remaining in object */
199 lseek(fd, current.size - 24 - 72, SEEK_CUR); 294 lseek(fd, current.size - 24 - 72, SEEK_CUR);
@@ -225,7 +320,7 @@ static int asf_parse_header(int fd, struct mp3entry* id3)
225 DEBUGF("Found stream properties for audio stream %d\n",flags&0x7f); 320 DEBUGF("Found stream properties for audio stream %d\n",flags&0x7f);
226 321
227 /* TODO: Check codec_id and find the lowest numbered audio stream in the file */ 322 /* TODO: Check codec_id and find the lowest numbered audio stream in the file */
228 wfx.audiostream = flags&0x7f; 323 wfx->audiostream = flags&0x7f;
229 324
230 if (propdatalen < 18) { 325 if (propdatalen < 18) {
231 return ASF_ERROR_INVALID_LENGTH; 326 return ASF_ERROR_INVALID_LENGTH;
@@ -236,29 +331,25 @@ static int asf_parse_header(int fd, struct mp3entry* id3)
236 return ASF_ERROR_INVALID_LENGTH; 331 return ASF_ERROR_INVALID_LENGTH;
237 } 332 }
238#endif 333#endif
239 read_uint16le(fd, &wfx.codec_id); 334 read_uint16le(fd, &wfx->codec_id);
240 read_uint16le(fd, &wfx.channels); 335 read_uint16le(fd, &wfx->channels);
241 read_uint32le(fd, &wfx.rate); 336 read_uint32le(fd, &wfx->rate);
242 read_uint32le(fd, &wfx.bitrate); 337 read_uint32le(fd, &wfx->bitrate);
243 wfx.bitrate *= 8; 338 wfx->bitrate *= 8;
244 read_uint16le(fd, &wfx.blockalign); 339 read_uint16le(fd, &wfx->blockalign);
245 read_uint16le(fd, &wfx.bitspersample); 340 read_uint16le(fd, &wfx->bitspersample);
246 read_uint16le(fd, &wfx.datalen); 341 read_uint16le(fd, &wfx->datalen);
247 342
248 /* Round bitrate to the nearest kbit */ 343 /* Round bitrate to the nearest kbit */
249 id3->bitrate = (wfx.bitrate + 500) / 1000; 344 id3->bitrate = (wfx->bitrate + 500) / 1000;
250 id3->frequency = wfx.rate; 345 id3->frequency = wfx->rate;
251 346
252 if (wfx.codec_id == ASF_CODEC_ID_WMAV1) { 347 if (wfx->codec_id == ASF_CODEC_ID_WMAV1) {
253 read(fd, wfx.data, 4); 348 read(fd, wfx->data, 4);
254 lseek(fd,current.size - 24 - 72 - 4,SEEK_CUR); 349 lseek(fd,current.size - 24 - 72 - 4,SEEK_CUR);
255 /* A hack - copy the wfx struct to the MP3 TOC field in the id3 struct */ 350 } else if (wfx->codec_id == ASF_CODEC_ID_WMAV2) {
256 memcpy(id3->toc, &wfx, sizeof(wfx)); 351 read(fd, wfx->data, 6);
257 } else if (wfx.codec_id == ASF_CODEC_ID_WMAV2) {
258 read(fd, wfx.data, 6);
259 lseek(fd,current.size - 24 - 72 - 6,SEEK_CUR); 352 lseek(fd,current.size - 24 - 72 - 6,SEEK_CUR);
260 /* A hack - copy the wfx struct to the MP3 TOC field in the id3 struct */
261 memcpy(id3->toc, &wfx, sizeof(wfx));
262 } else { 353 } else {
263 lseek(fd,current.size - 24 - 72,SEEK_CUR); 354 lseek(fd,current.size - 24 - 72,SEEK_CUR);
264 } 355 }
@@ -279,13 +370,24 @@ static int asf_parse_header(int fd, struct mp3entry* id3)
279 DEBUGF("strlength = %u\n",strlength[i]); 370 DEBUGF("strlength = %u\n",strlength[i]);
280 } 371 }
281 372
282 for (i=0; i<5 ; i++) { 373 if (strlength[0] > 0) { /* 0 - Title */
283 if (strlength[i] > 0) { 374 id3->title = id3buf;
284 read(fd, utf16buf, strlength[i]); 375 asf_utf16LEdecode(fd, strlength[0], &id3buf, &id3buf_remaining);
285 utf16LEdecode(utf16buf, utf8buf, strlength[i]); 376 }
286 DEBUGF("TAG %d = %s\n",i,utf8buf); 377
287 } 378 if (strlength[1] > 0) { /* 1 - Artist */
379 id3->artist = id3buf;
380 asf_utf16LEdecode(fd, strlength[1], &id3buf, &id3buf_remaining);
381 }
382
383 lseek(fd, strlength[2], SEEK_CUR); /* 2 - copyright */
384
385 if (strlength[3] > 0) { /* 3 - description */
386 id3->comment = id3buf;
387 asf_utf16LEdecode(fd, strlength[3], &id3buf, &id3buf_remaining);
288 } 388 }
389
390 lseek(fd, strlength[4], SEEK_CUR); /* 4 - rating */
289 } else if (asf_guid_match(&current.guid, &asf_guid_extended_content_description)) { 391 } else if (asf_guid_match(&current.guid, &asf_guid_extended_content_description)) {
290 uint16_t count; 392 uint16_t count;
291 int i; 393 int i;
@@ -298,55 +400,50 @@ static int asf_parse_header(int fd, struct mp3entry* id3)
298 400
299 for (i=0; i < count; i++) { 401 for (i=0; i < count; i++) {
300 uint16_t length, type; 402 uint16_t length, type;
403 unsigned char* utf8 = utf8buf;
404 int utf8length = 512;
301 405
302 read_uint16le(fd, &length); 406 read_uint16le(fd, &length);
303 read(fd, utf16buf, length); 407 asf_utf16LEdecode(fd, length, &utf8, &utf8length);
304 utf16LEdecode(utf16buf, utf8buf, length);
305 DEBUGF("Key=\"%s\" ",utf8buf);
306 bytesleft -= 2 + length; 408 bytesleft -= 2 + length;
307 409
308 read_uint16le(fd, &type); 410 read_uint16le(fd, &type);
309 read_uint16le(fd, &length); 411 read_uint16le(fd, &length);
310 switch(type) 412
311 { 413 if (!strcmp("WM/TrackNumber",utf8buf)) {
312 case 0: /* String */ 414 if (type == 0) {
313 read(fd, utf16buf, length); 415 id3->track_string = id3buf;
314 utf16LEdecode(utf16buf, utf8buf, length); 416 asf_utf16LEdecode(fd, length, &id3buf, &id3buf_remaining);
315 DEBUGF("Value=\"%s\"\n",utf8buf); 417 id3->tracknum = atoi(id3->track_string);
316 break; 418 } else if ((type >=2) && (type <= 5)) {
317 419 id3->tracknum = asf_intdecode(fd, type, length);
318 case 1: /* Hex string */ 420 } else {
319 DEBUGF("Value=NOT YET IMPLEMENTED (HEX STRING)\n"); 421 lseek(fd, length, SEEK_CUR);
320 lseek(fd,length,SEEK_CUR); 422 }
321 break; 423 } else if ((!strcmp("WM/Genre",utf8buf)) && (type == 0)) {
322 424 id3->genre_string = id3buf;
323 case 2: /* Bool */ 425 asf_utf16LEdecode(fd, length, &id3buf, &id3buf_remaining);
324 read(fd, &tmp8, 1); 426 } else if ((!strcmp("WM/AlbumTitle",utf8buf)) && (type == 0)) {
325 DEBUGF("Value=%s\n",(tmp8 ? "TRUE" : "FALSE")); 427 id3->album = id3buf;
326 lseek(fd,length - 1,SEEK_CUR); 428 asf_utf16LEdecode(fd, length, &id3buf, &id3buf_remaining);
327 break; 429 } else if ((!strcmp("WM/AlbumArtist",utf8buf)) && (type == 0)) {
328 430 id3->albumartist = id3buf;
329 case 3: /* 32-bit int */ 431 asf_utf16LEdecode(fd, length, &id3buf, &id3buf_remaining);
330 read_uint32le(fd, &tmp32); 432 } else if ((!strcmp("WM/Composer",utf8buf)) && (type == 0)) {
331 DEBUGF("Value=%u\n",(unsigned int)tmp32); 433 id3->composer = id3buf;
332 lseek(fd,length - 4,SEEK_CUR); 434 asf_utf16LEdecode(fd, length, &id3buf, &id3buf_remaining);
333 break; 435 } else if (!strcmp("WM/Year",utf8buf)) {
334 436 if (type == 0) {
335 case 4: /* 64-bit int */ 437 id3->year_string = id3buf;
336 read_uint64le(fd, &tmp64); 438 asf_utf16LEdecode(fd, length, &id3buf, &id3buf_remaining);
337 DEBUGF("Value=[64-bit int]\n"); 439 id3->year = atoi(id3->year_string);
338 lseek(fd,length - 8,SEEK_CUR); 440 } else if ((type >=2) && (type <= 5)) {
339 break; 441 id3->year = asf_intdecode(fd, type, length);
340 442 } else {
341 case 5: /* 16-bit int */ 443 lseek(fd, length, SEEK_CUR);
342 read_uint16le(fd, &tmp16); 444 }
343 DEBUGF("Value=%u\n",tmp16); 445 } else {
344 lseek(fd,length - 2,SEEK_CUR); 446 lseek(fd, length, SEEK_CUR);
345 break;
346
347 default:
348 lseek(fd,length,SEEK_CUR);
349 break;
350 } 447 }
351 bytesleft -= 4 + length; 448 bytesleft -= 4 + length;
352 } 449 }
@@ -386,10 +483,11 @@ bool get_asf_metadata(int fd, struct mp3entry* id3)
386{ 483{
387 int res; 484 int res;
388 asf_object_t obj; 485 asf_object_t obj;
486 asf_waveformatex_t wfx;
389 487
390 wfx.audiostream = -1; 488 wfx.audiostream = -1;
391 489
392 res = asf_parse_header(fd, id3); 490 res = asf_parse_header(fd, id3, &wfx);
393 491
394 if (res < 0) { 492 if (res < 0) {
395 DEBUGF("ASF: parsing error - %d\n",res); 493 DEBUGF("ASF: parsing error - %d\n",res);
@@ -419,5 +517,9 @@ bool get_asf_metadata(int fd, struct mp3entry* id3)
419 */ 517 */
420 id3->first_frame_offset = lseek(fd, 0, SEEK_CUR) + 26; 518 id3->first_frame_offset = lseek(fd, 0, SEEK_CUR) + 26;
421 519
520 /* We copy the wfx struct to the MP3 TOC field in the id3 struct so
521 the codec doesn't need to parse the header object again */
522 memcpy(id3->toc, &wfx, sizeof(wfx));
523
422 return true; 524 return true;
423} 525}