From 9d7df9ae4d829204856a19fc14fae166631389bf Mon Sep 17 00:00:00 2001 From: Amaury Pouly Date: Thu, 15 Sep 2011 16:10:31 +0000 Subject: sbtools: move the db parse to its own file and improve error messages git-svn-id: svn://svn.rockbox.org/rockbox/trunk@30557 a1c6a512-1295-4272-9138-f99709370657 --- utils/sbtools/dbparser.c | 626 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 626 insertions(+) create mode 100644 utils/sbtools/dbparser.c (limited to 'utils/sbtools/dbparser.c') diff --git a/utils/sbtools/dbparser.c b/utils/sbtools/dbparser.c new file mode 100644 index 0000000000..20f2d66c0e --- /dev/null +++ b/utils/sbtools/dbparser.c @@ -0,0 +1,626 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id$ + * + * Copyright (C) 2011 Amaury Pouly + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + +#include +#include +#include +#include "dbparser.h" + +typedef uint8_t byte; + +extern bool g_debug; +extern void *xmalloc(size_t s); +extern int convxdigit(char digit, byte *val); + +#define bug(...) do { fprintf(stderr, __VA_ARGS__); exit(1); } while(0) +#define bugp(...) do { fprintf(stderr, __VA_ARGS__); perror(" "); exit(1); } while(0) + +enum lexem_type_t +{ + LEX_IDENTIFIER, + LEX_LPAREN, + LEX_RPAREN, + LEX_NUMBER, + LEX_STRING, /* double-quoted string */ + LEX_EQUAL, + LEX_SEMICOLON, + LEX_LBRACE, + LEX_RBRACE, + LEX_RANGLE, + LEX_EOF +}; + +struct lexem_t +{ + enum lexem_type_t type; + char *str; + uint32_t num; + int line; + const char *file; +}; + +struct context_t +{ + const char *file; + char *begin; + char *end; + char *ptr; + int line; +}; + +#define parse_error(ctx, ...) \ + do { fprintf(stderr, "%s:%d: ", ctx->file, ctx->line); \ + fprintf(stderr, __VA_ARGS__); exit(2); } while(0) + +static void advance(struct context_t *ctx, int nr_chars) +{ + while(nr_chars--) + { + if(*(ctx->ptr++) == '\n') + ctx->line++; + } +} + +static inline bool eof(struct context_t *ctx) +{ + return ctx->ptr == ctx->end; +} + +static inline bool next_valid(struct context_t *ctx, int nr) +{ + return ctx->ptr + nr < ctx->end; +} + +static inline char cur_char(struct context_t *ctx) +{ + return *ctx->ptr; +} + +static inline char next_char(struct context_t *ctx, int nr) +{ + return ctx->ptr[nr]; +} + +static inline void locate_lexem(struct lexem_t *lex, struct context_t *ctx) +{ + lex->file = ctx->file; + lex->line = ctx->line; +} + +static void __parse_string(struct context_t *ctx, void *user, void (*emit_fn)(void *user, char c)) +{ + while(!eof(ctx)) + { + if(cur_char(ctx) == '"') + break; + else if(cur_char(ctx) == '\\') + { + advance(ctx, 1); + if(eof(ctx)) + parse_error(ctx, "Unfinished string\n"); + if(cur_char(ctx) == '\\') emit_fn(user, '\\'); + else if(cur_char(ctx) == '\'') emit_fn(user, '\''); + else if(cur_char(ctx) == '\"') emit_fn(user, '\"'); + else parse_error(ctx, "Unknown escape sequence \\%c\n", cur_char(ctx)); + advance(ctx, 1); + } + else + { + emit_fn(user, cur_char(ctx)); + advance(ctx, 1); + } + } + if(eof(ctx) || cur_char(ctx) != '"') + parse_error(ctx, "Unfinished string\n"); + advance(ctx, 1); +} + +static void __parse_string_emit(void *user, char c) +{ + char **pstr = (char **)user; + *(*pstr)++ = c; +} + +static void __parse_string_count(void *user, char c) +{ + (void) c; + (*(int *)user)++; +} + +static void parse_string(struct context_t *ctx, struct lexem_t *lexem) +{ + locate_lexem(lexem, ctx); + /* skip " */ + advance(ctx, 1); + /* compute length */ + struct context_t cpy_ctx = *ctx; + int length = 0; + __parse_string(&cpy_ctx, (void *)&length, __parse_string_count); + /* parse again */ + lexem->type = LEX_STRING; + lexem->str = xmalloc(length + 1); + lexem->str[length] = 0; + char *pstr = lexem->str; + __parse_string(ctx, (void *)&pstr, __parse_string_emit); +} + +static void parse_ascii_number(struct context_t *ctx, struct lexem_t *lexem) +{ + locate_lexem(lexem, ctx); + /* skip ' */ + advance(ctx, 1); + /* we expect n<=4 character and then ' */ + int len = 0; + uint32_t value = 0; + while(!eof(ctx)) + { + if(cur_char(ctx) != '\'') + { + value = value << 8 | cur_char(ctx); + len++; + advance(ctx, 1); + } + else + break; + } + if(eof(ctx) || cur_char(ctx) != '\'') + parse_error(ctx, "Unterminated ascii number literal\n"); + if(len == 0 || len > 4) + parse_error(ctx, "Invalid ascii number literal length: only 1 to 4 characters allowed\n"); + /* skip ' */ + advance(ctx, 1); + lexem->type = LEX_NUMBER; + lexem->num = value; +} + +static void parse_number(struct context_t *ctx, struct lexem_t *lexem) +{ + locate_lexem(lexem, ctx); + /* check base */ + int base = 10; + if(cur_char(ctx) == '0' && next_valid(ctx, 1) && next_char(ctx, 1) == 'x') + { + advance(ctx, 2); + base = 16; + } + + lexem->type = LEX_NUMBER; + lexem->num = 0; + while(!eof(ctx) && isxdigit(cur_char(ctx))) + { + if(base == 10 && !isdigit(cur_char(ctx))) + break; + byte v; + if(convxdigit(cur_char(ctx), &v)) + break; + lexem->num = base * lexem->num + v; + advance(ctx, 1); + } +} + +static void parse_identifier(struct context_t *ctx, struct lexem_t *lexem) +{ + locate_lexem(lexem, ctx); + /* remember position */ + char *old = ctx->ptr; + while(!eof(ctx) && (isalnum(cur_char(ctx)) || cur_char(ctx) == '_')) + advance(ctx, 1); + lexem->type = LEX_IDENTIFIER; + int len = ctx->ptr - old; + lexem->str = xmalloc(len + 1); + lexem->str[len] = 0; + memcpy(lexem->str, old, len); +} + +static void next_lexem(struct context_t *ctx, struct lexem_t *lexem) +{ + #define ret_simple(t, adv) \ + do {locate_lexem(lexem, ctx); \ + lexem->type = t; \ + advance(ctx, adv); \ + return;} while(0) + while(!eof(ctx)) + { + char c = cur_char(ctx); + /* skip whitespace */ + if(c == ' ' || c == '\t' || c == '\n' || c == '\r') + { + advance(ctx, 1); + continue; + } + /* skip C++ style comments */ + if(c == '/' && next_valid(ctx, 1) && next_char(ctx, 1) == '/') + { + while(!eof(ctx) && cur_char(ctx) != '\n') + advance(ctx, 1); + continue; + } + /* skip C-style comments */ + if(c == '/' && next_valid(ctx, 1) && next_char(ctx, 1) == '*') + { + advance(ctx, 2); + while(true) + { + if(!next_valid(ctx, 1)) + parse_error(ctx, "Unterminated comment"); + if(cur_char(ctx) == '*' && next_char(ctx, 1) == '/') + { + advance(ctx, 2); + break; + } + advance(ctx, 1); + } + continue; + } + break; + } + if(eof(ctx)) ret_simple(LEX_EOF, 0); + char c = cur_char(ctx); + if(c == '(') ret_simple(LEX_LPAREN, 1); + if(c == ')') ret_simple(LEX_RPAREN, 1); + if(c == '{') ret_simple(LEX_LBRACE, 1); + if(c == '}') ret_simple(LEX_RBRACE, 1); + if(c == '>') ret_simple(LEX_RANGLE, 1); + if(c == '=') ret_simple(LEX_EQUAL, 1); + if(c == ';') ret_simple(LEX_SEMICOLON, 1); + if(c == '"') return parse_string(ctx, lexem); + if(c == '\'') return parse_ascii_number(ctx, lexem); + if(isdigit(c)) return parse_number(ctx, lexem); + if(isalpha(c) || c == '_') return parse_identifier(ctx, lexem); + parse_error(ctx, "Unexpected character '%c'\n", c); + #undef ret_simple +} + +#if 0 +static void log_lexem(struct lexem_t *lexem) +{ + switch(lexem->type) + { + case LEX_EOF: printf(""); break; + case LEX_EQUAL: printf("="); break; + case LEX_IDENTIFIER: printf("id(%s)", lexem->str); break; + case LEX_LPAREN: printf("("); break; + case LEX_RPAREN: printf(")"); break; + case LEX_LBRACE: printf("{"); break; + case LEX_RBRACE: printf("}"); break; + case LEX_SEMICOLON: printf(";"); break; + case LEX_NUMBER: printf("num(%d)", lexem->num); break; + case LEX_STRING: printf("str(%s)", lexem->str); break; + default: printf(""); + } +} +#endif + +struct cmd_source_t *db_find_source_by_id(struct cmd_file_t *cmd_file, const char *id) +{ + struct cmd_source_t *src = cmd_file->source_list; + while(src) + { + if(strcmp(src->identifier, id) == 0) + return src; + src = src->next; + } + return NULL; +} + +static void generate_default_version(struct sb_version_t *ver) +{ + ver->major = 0x999; + ver->minor = 0x999; + ver->revision = 0x999; +} + +#define INVALID_SB_SUBVERSION 0xffff + +static uint16_t parse_sb_subversion(char *str) +{ + int len = strlen(str); + uint16_t n = 0; + if(len == 0 || len > 4) + return INVALID_SB_SUBVERSION; + for(int i = 0; i < len; i++) + { + if(!isdigit(str[i])) + return INVALID_SB_SUBVERSION; + n = n << 4 | (str[i] - '0'); + } + return n; +} + +bool db_parse_sb_version(struct sb_version_t *ver, char *str) +{ + int len = strlen(str); + int cnt = 0; + int pos[2]; + + for(int i = 0; i < len; i++) + { + if(str[i] != '.') + continue; + if(cnt == 2) + return false; + pos[cnt++] = i + 1; + str[i] = 0; + } + if(cnt != 2) + return false; + ver->major = parse_sb_subversion(str); + ver->minor = parse_sb_subversion(str + pos[0]); + ver->revision = parse_sb_subversion(str + pos[1]); + return ver->major != INVALID_SB_SUBVERSION && + ver->minor != INVALID_SB_SUBVERSION && + ver->revision != INVALID_SB_SUBVERSION; +} + +#undef parse_error +#define parse_error(lexem, ...) \ + do { fprintf(stderr, "%s:%d: ", lexem.file, lexem.line); \ + fprintf(stderr, __VA_ARGS__); exit(2); } while(0) + +struct cmd_file_t *db_parse_file(const char *file) +{ + size_t size; + FILE *f = fopen(file, "r"); + if(f == NULL) + bugp("Cannot open file '%s'", file); + fseek(f, 0, SEEK_END); + size = ftell(f); + fseek(f, 0, SEEK_SET); + char *buf = xmalloc(size); + if(fread(buf, size, 1, f) != 1) + bugp("Cannot read file '%s'", file); + fclose(f); + + if(g_debug) + printf("Parsing db file '%s'\n", file); + struct cmd_file_t *cmd_file = xmalloc(sizeof(struct cmd_file_t)); + memset(cmd_file, 0, sizeof(struct cmd_file_t)); + + generate_default_version(&cmd_file->product_ver); + generate_default_version(&cmd_file->component_ver); + + struct lexem_t lexem; + struct context_t ctx; + ctx.file = file; + ctx.line = 1; + ctx.begin = buf; + ctx.ptr = buf; + ctx.end = buf + size; + #define next() next_lexem(&ctx, &lexem) + /* init lexer */ + next(); + /* options ? */ + if(lexem.type == LEX_IDENTIFIER && !strcmp(lexem.str, "options")) + { + next(); + if(lexem.type != LEX_LBRACE) + parse_error(lexem, "'{' expected after 'options'\n"); + + while(true) + { + next(); + if(lexem.type == LEX_RBRACE) + break; + if(lexem.type != LEX_IDENTIFIER) + parse_error(lexem, "Identifier expected in options\n"); + char *opt = lexem.str; + next(); + if(lexem.type != LEX_EQUAL) + parse_error(lexem, "'=' expected after identifier\n"); + next(); + if(!strcmp(opt, "productVersion") || !strcmp(opt, "componentVersion")) + { + if(lexem.type != LEX_STRING) + parse_error(lexem, "String expected after '='\n"); + bool ret; + if(!strcmp(opt, "productVersion")) + ret = db_parse_sb_version(&cmd_file->product_ver, lexem.str); + else + ret = db_parse_sb_version(&cmd_file->component_ver, lexem.str); + if(!ret) + parse_error(lexem, "Invalid product/component version"); + } + else + parse_error(lexem, "Unknown option '%s'\n", opt); + next(); + if(lexem.type != LEX_SEMICOLON) + parse_error(lexem, "';' expected after string\n"); + } + next(); + } + /* sources */ + if(lexem.type != LEX_IDENTIFIER || strcmp(lexem.str, "sources")) + parse_error(lexem, "'sources' expected\n"); + next(); + if(lexem.type != LEX_LBRACE) + parse_error(lexem, "'{' expected after 'sources'\n"); + + while(true) + { + next(); + if(lexem.type == LEX_RBRACE) + break; + struct cmd_source_t *src = xmalloc(sizeof(struct cmd_source_t)); + memset(src, 0, sizeof(struct cmd_source_t)); + src->next = cmd_file->source_list; + if(lexem.type != LEX_IDENTIFIER) + parse_error(lexem, "identifier expected in sources\n"); + src->identifier = lexem.str; + next(); + if(lexem.type != LEX_EQUAL) + parse_error(lexem, "'=' expected after identifier\n"); + next(); + if(lexem.type != LEX_STRING) + parse_error(lexem, "String expected after '='\n"); + src->filename = lexem.str; + next(); + if(lexem.type != LEX_SEMICOLON) + parse_error(lexem, "';' expected after string\n"); + if(db_find_source_by_id(cmd_file, src->identifier) != NULL) + parse_error(lexem, "Duplicate source identifier\n"); + /* type filled later */ + src->type = CMD_SRC_UNK; + cmd_file->source_list = src; + } + + /* sections */ + struct cmd_section_t *end_sec = NULL; + while(true) + { + struct cmd_section_t *sec = xmalloc(sizeof(struct cmd_section_t)); + struct cmd_inst_t *end_list = NULL; + memset(sec, 0, sizeof(struct cmd_section_t)); + next(); + if(lexem.type == LEX_EOF) + break; + if(lexem.type != LEX_IDENTIFIER || strcmp(lexem.str, "section") != 0) + parse_error(lexem, "'section' expected\n"); + next(); + if(lexem.type != LEX_LPAREN) + parse_error(lexem, "'(' expected after 'section'\n"); + next(); + /* can be a number or a 4 character long string */ + if(lexem.type == LEX_NUMBER) + { + sec->identifier = lexem.num; + } + else + parse_error(lexem, "Number expected as section identifier\n"); + + next(); + if(lexem.type != LEX_RPAREN) + parse_error(lexem, "')' expected after section identifier\n"); + next(); + if(lexem.type != LEX_LBRACE) + parse_error(lexem, "'{' expected after section directive\n"); + /* commands */ + while(true) + { + struct cmd_inst_t *inst = xmalloc(sizeof(struct cmd_inst_t)); + memset(inst, 0, sizeof(struct cmd_inst_t)); + next(); + if(lexem.type == LEX_RBRACE) + break; + if(lexem.type != LEX_IDENTIFIER) + parse_error(lexem, "Instruction expected in section\n"); + if(strcmp(lexem.str, "load") == 0) + inst->type = CMD_LOAD; + else if(strcmp(lexem.str, "call") == 0) + inst->type = CMD_CALL; + else if(strcmp(lexem.str, "jump") == 0) + inst->type = CMD_JUMP; + else if(strcmp(lexem.str, "mode") == 0) + inst->type = CMD_MODE; + else + parse_error(lexem, "Instruction expected in section\n"); + next(); + + if(inst->type == CMD_LOAD) + { + if(lexem.type != LEX_IDENTIFIER) + parse_error(lexem, "Identifier expected after instruction\n"); + inst->identifier = lexem.str; + if(db_find_source_by_id(cmd_file, inst->identifier) == NULL) + parse_error(lexem, "Undefined reference to source '%s'\n", inst->identifier); + next(); + if(lexem.type == LEX_RANGLE) + { + // load at + inst->type = CMD_LOAD_AT; + next(); + if(lexem.type != LEX_NUMBER) + parse_error(lexem, "Number expected for loading address\n"); + inst->addr = lexem.num; + next(); + } + if(lexem.type != LEX_SEMICOLON) + parse_error(lexem, "';' expected after command\n"); + } + else if(inst->type == CMD_CALL || inst->type == CMD_JUMP) + { + if(lexem.type == LEX_IDENTIFIER) + { + inst->identifier = lexem.str; + if(db_find_source_by_id(cmd_file, inst->identifier) == NULL) + parse_error(lexem, "Undefined reference to source '%s'\n", inst->identifier); + next(); + } + else if(lexem.type == LEX_NUMBER) + { + inst->type = (inst->type == CMD_CALL) ? CMD_CALL_AT : CMD_JUMP_AT; + inst->addr = lexem.num; + next(); + } + else + parse_error(lexem, "Identifier or number expected after jump/load\n"); + + if(lexem.type == LEX_LPAREN) + { + next(); + if(lexem.type != LEX_NUMBER) + parse_error(lexem, "Expected numeral expression after (\n"); + inst->argument = lexem.num; + next(); + if(lexem.type != LEX_RPAREN) + parse_error(lexem, "Expected closing brace\n"); + next(); + } + if(lexem.type != LEX_SEMICOLON) + parse_error(lexem, "Expected ';' after command\n"); + } + else if(inst->type == CMD_MODE) + { + if(lexem.type != LEX_NUMBER) + parse_error(lexem, "Number expected after 'mode'\n"); + inst->argument = lexem.num; + next(); + if(lexem.type != LEX_SEMICOLON) + parse_error(lexem, "Expected ';' after command\n"); + } + else + parse_error(lexem, "Internal error"); + if(end_list == NULL) + { + sec->inst_list = inst; + end_list = inst; + } + else + { + end_list->next = inst; + end_list = inst; + } + } + + if(end_sec == NULL) + { + cmd_file->section_list = sec; + end_sec = sec; + } + else + { + end_sec->next = sec; + end_sec = sec; + } + } + #undef next + + return cmd_file; +} -- cgit v1.2.3