diff --git a/libretro-db/Makefile b/libretro-db/Makefile index 6cd5d6a5ca..777ee2f0c4 100644 --- a/libretro-db/Makefile +++ b/libretro-db/Makefile @@ -16,6 +16,22 @@ LUA_CONVERTER_C = \ LUA_CONVERTER_C_OBJS := $(LUA_CONVERTER_C:.c=.o) +PLAIN_CONVERTER_C = \ + djb2.c \ + lexer.c \ + parser.c \ + rmsgpack.c \ + rmsgpack_dom.c \ + libretrodb.c \ + bintree.c \ + query.c \ + main.c \ + $(LIBRETRO_COMMON_DIR)/compat/compat_fnmatch.c \ + $(LIBRETRO_COMMON_DIR)/file/retro_file.c \ + $(LIBRETRO_COMMON_DIR)/compat/compat.c + +PLAIN_CONVERTER_C_OBJS := $(PLAIN_CONVERTER_C:.c=.o) + RARCHDB_TOOL_C = \ rmsgpack.c \ rmsgpack_dom.c \ @@ -55,11 +71,14 @@ TESTLIB_FLAGS = ${CFLAGS} ${LUA_FLAGS} -shared -fpic .PHONY: all clean check -all: rmsgpack_test libretrodb_tool lua_converter +all: rmsgpack_test libretrodb_tool lua_converter plain_dat_converter %.o: %.c ${CC} $(INCFLAGS) $< -c ${CFLAGS} -o $@ +plain_dat_converter: ${PLAIN_CONVERTER_C_OBJS} + ${CC} $(INCFLAGS) ${PLAIN_CONVERTER_C_OBJS} -o $@ + lua_converter: ${LUA_CONVERTER_C_OBJS} ${CC} $(INCFLAGS) ${LUA_CONVERTER_C_OBJS} ${LUA_FLAGS} -o $@ @@ -79,4 +98,4 @@ clean: rm -rf $(LIBRETRO_COMMON_DIR)/*.o rm -rf $(LIBRETRO_COMMON_DIR)/compat/*.o rm -rf $(LIBRETRO_COMMON_DIR)/file/*.o - rm -rf *.o rmsgpack_test lua_converter libretrodb_tool testlib.so + rm -rf *.o rmsgpack_test lua_converter plain_dat_converter libretrodb_tool testlib.so diff --git a/libretro-db/djb2.c b/libretro-db/djb2.c new file mode 100644 index 0000000000..7b6273a0bb --- /dev/null +++ b/libretro-db/djb2.c @@ -0,0 +1,12 @@ +unsigned djb2( const char* str, unsigned len ) +{ + const unsigned char* aux = (const unsigned char*)str; + unsigned hash = 5381; + + while ( len-- ) + { + hash = ( hash << 5 ) + hash + *aux++; + } + + return hash; +} diff --git a/libretro-db/djb2.h b/libretro-db/djb2.h new file mode 100644 index 0000000000..1369a5c048 --- /dev/null +++ b/libretro-db/djb2.h @@ -0,0 +1,6 @@ +#ifndef DJB2_H +#define DJB2_H + +unsigned djb2( const char* str, unsigned len ); + +#endif /* DJB2_H */ diff --git a/libretro-db/lexer.c b/libretro-db/lexer.c new file mode 100644 index 0000000000..43482ebfd1 --- /dev/null +++ b/libretro-db/lexer.c @@ -0,0 +1,136 @@ +#include +#include + +#include "lexer.h" + +static void skip(lx_state_t* lexer) +{ + if ( lexer->current < lexer->end ) + lexer->current++; +} + +int lx_next(lx_state_t* lexer) +{ + /* skip spaces */ + for ( ;; ) + { + if ( isspace( *lexer->current ) ) + { + if ( *lexer->current == '\n' ) + { + lexer->line++; + } + + skip( lexer ); + } + else if ( *lexer->current != 0 ) + { + break; + } + else + { + /* return LX_EOF if we've reached the end of the input */ + lexer->start = ""; + lexer->len = 5; + lexer->token = LX_EOF; + return 0; + } + } + + lexer->start = lexer->current; + + /* if the character is alphabetic or '_', the token is an identifier */ + if ( isalpha( *lexer->current ) || *lexer->current == '_' ) + { + /* get all alphanumeric and '_' characters */ + do + { + skip( lexer ); + } + while ( isalnum( *lexer->current ) || *lexer->current == '_' ); + + lexer->len = lexer->current - lexer->start; + lexer->token = LX_TAG; + return 0; + } + + /* if the character is an hex digit, the token is a number */ + if ( isxdigit( *lexer->current ) ) + { + do + { + skip( lexer ); + } + while ( isxdigit( *lexer->current ) ); + + if ( *lexer->current == '-' ) + { + do + { + skip( lexer ); + } + while ( isxdigit( *lexer->current ) ); + + lexer->token = LX_VERSION; + } + else + { + lexer->token = LX_NUMBER; + } + + lexer->len = lexer->current - lexer->start; + return 0; + } + + /* if the character is a quote, it's a string */ + if ( *lexer->current == '"' ) + { + /* get anything until another quote */ + do + { + skip( lexer ); + + if ( *lexer->current == '"' && lexer->current[ -1 ] != '\\' ) + { + break; + } + } + while ( lexer->current < lexer->end ); + + if ( lexer->current == lexer->end ) + { + return LX_UNTERMINATED_STRING; + } + + skip( lexer ); + lexer->start++; + lexer->len = lexer->current - lexer->start - 1; + lexer->token = LX_STRING; + return 0; + } + + /* otherwise the token is a symbol */ + lexer->len = 1; + + switch ( *lexer->current++ ) + { + case '(': + lexer->token = LX_LPAREN; + return 0; + case ')': + lexer->token = LX_RPAREN; + return 0; + } + + return LX_INVALID_CHARACTER; +} + +void lx_new( lx_state_t* lexer, const char* source, unsigned srclen ) +{ + lexer->line = 1; + lexer->current = source; + lexer->end = source + srclen; + lexer->start = NULL; + lexer->len = 0; + lexer->token = 0; +} diff --git a/libretro-db/lexer.h b/libretro-db/lexer.h new file mode 100644 index 0000000000..63835b6cac --- /dev/null +++ b/libretro-db/lexer.h @@ -0,0 +1,40 @@ +#ifndef LEXER_H +#define LEXER_H + +enum +{ + LX_UNTERMINATED_STRING = -1, + LX_INVALID_CHARACTER = -2, +}; + +enum +{ + LX_EOF = 256, + LX_TAG, + LX_NUMBER, + LX_STRING, + LX_VERSION, + LX_LPAREN, + LX_RPAREN, +}; + +typedef struct +{ + /* source code */ + int line; + + /* lexer state */ + const char* current; + const char* end; + + /* lookahead */ + int token; + const char* start; + unsigned len; +} +lx_state_t; + +int lx_next( lx_state_t* lexer ); +void lx_new( lx_state_t* lexer, const char* source, unsigned srclen ); + +#endif /* LEXER_H */ diff --git a/libretro-db/main.c b/libretro-db/main.c new file mode 100644 index 0000000000..f57ce5a860 --- /dev/null +++ b/libretro-db/main.c @@ -0,0 +1,212 @@ +#include +#include +#include +#include +#include + +#include + +#include "parser.h" +#include "djb2.h" +#include "libretrodb.h" + +static const char *printchar( pr_state_t* parser ) +{ + static char k[ 16 ]; + + if ( *parser->lexer.current < 32 ) + { + snprintf( k, sizeof( k ), "\\x%02x", (unsigned char)*parser->lexer.current ); + k[ sizeof( k ) - 1 ] = 0; + } + else + { + k[ 0 ] = *parser->lexer.current; + k[ 1 ] = 0; + } + + return k; +} + +static const char *printtoken( pr_state_t* parser ) +{ + static char k[ 256 ]; + char *aux = k; + const char *end = aux + sizeof( k ) - 1; + + while ( parser->lexer.len-- && aux < end ) + *aux++ = *parser->lexer.start++; + + *aux = 0; + return k; +} + +static char *dup_string( const char* str, unsigned len ) +{ + char *dup = (char*)malloc( len + 1 ); + + if (dup) + { + memcpy( (void*)dup, (const void*)str, len ); + dup[ len ] = 0; + } + + return dup; +} + +static unsigned char *dup_binary( const char* str, unsigned len ) +{ + char byte[3]; + unsigned char* dup = (unsigned char*)malloc( len / 2 ); + unsigned char* aux = dup; + + byte[ 2 ] = 0; + + if ( dup ) + { + len /= 2; + + while ( len-- ) + { + byte[ 0 ] = *str++; + byte[ 1 ] = *str++; + printf( "%s", byte ); + *aux++ = strtol( byte, NULL, 16 ); + } + printf( "\n" ); + } + + return dup; +} + +static int provider( void* ctx, struct rmsgpack_dom_value* out ) +{ + unsigned i, hash; + pr_node_t** game_ptr = (pr_node_t**)ctx; + pr_node_t* game = *game_ptr; + + if ( game == NULL ) + return 1; + + *game_ptr = game->next; + + out->type = RDT_MAP; + out->val.map.len = game->count; + out->val.map.items = calloc( game->count, sizeof(struct rmsgpack_dom_pair)); + + for ( i = 0; i < game->count; i++ ) + { + out->val.map.items[ i ].key.type = RDT_STRING; + out->val.map.items[ i ].key.val.string.len = game->pairs[ i ].key_len; + out->val.map.items[ i ].key.val.string.buff = dup_string( game->pairs[ i ].key, game->pairs[ i ].key_len ); + + hash = djb2( game->pairs[ i ].key, game->pairs[ i ].key_len ); + + switch ( hash ) + { + case 0x0b88671dU: /* crc */ + case 0x0f3ea922U: /* crc32 */ + case 0x0b888fabU: /* md5 */ + case 0x7c9de632U: /* sha1 */ + out->val.map.items[ i ].value.type = RDT_BINARY; + out->val.map.items[ i ].value.val.binary.len = game->pairs[ i ].value_len / 2; + out->val.map.items[ i ].value.val.binary.buff = dup_binary( game->pairs[ i ].value, game->pairs[ i ].value_len ); + break; + + case 0x7c9dede0U: /* size */ + out->val.map.items[ i ].value.type = RDT_UINT; + out->val.map.items[ i ].value.val.uint_ = strtol( game->pairs[ i ].value, NULL, 10 ); + break; + + default: + out->val.map.items[ i ].value.type = RDT_STRING; + out->val.map.items[ i ].value.val.string.len = game->pairs[ i ].value_len; + out->val.map.items[ i ].value.val.string.buff = dup_string( game->pairs[ i ].value, game->pairs[ i ].value_len ); + break; + } + } + + return 0; +} + +int main( int argc, const char* argv[] ) +{ + char* source; + unsigned size; + pr_state_t parser; + pr_node_t* game; + pr_node_t* next; + RFILE *out, *file; + int res; + + if ( argc != 3 ) + { + fprintf( stderr, "usage:\ndatconv \n\n" ); + return 1; + } + + file = retro_fopen(argv[ 2 ], RFILE_MODE_READ, -1); + + if (!file) + { + fprintf( stderr, "Error opening DAT file: %s\n", argv[2] ); + return 1; + } + + retro_fseek(file, 0, SEEK_END ); + size = retro_ftell( file ); + retro_fseek( file, 0, SEEK_SET ); + + source = (char*)malloc( size + 1 ); + + if ( source == NULL ) + { + retro_fclose( file ); + fprintf( stderr, "Out of memory\n" ); + return 1; + } + + retro_fread(file, (void*)source, size); + retro_fclose( file ); + source[ size ] = 0; + + pr_new( &parser, source, size ); + res = pr_parse( &parser ); + + switch ( res ) + { + case PR_UNTERMINATED_STRING: + fprintf( stderr, "%s:%u: Unterminated string\n", "source", parser.lexer.line ); + break; + case PR_INVALID_CHARACTER: + fprintf( stderr, "%s:%u: Invalid character %s\n", "source", parser.lexer.line, printchar( &parser ) ); + break; + case PR_UNEXPECTED_TOKEN: + fprintf( stderr, "%s:%u: Unexpected token \"%s\"\n", "source", parser.lexer.line, printtoken( &parser ) ); + break; + case PR_OUT_OF_MEMORY: + fprintf( stderr, "%s:%u: Out of memory\n", "source", parser.lexer.line ); + break; + default: + game = parser.first; + out = retro_fopen( argv[ 1 ], RFILE_MODE_WRITE, -1); + + if (out) + { + res = libretrodb_create(out, &provider, (void*)&game ); + retro_fclose(out); + + while (game) + { + next = game->next; + free( game ); + game = next; + } + } + else + res = 1; + } + + free( source ); + return res; +} diff --git a/libretro-db/parser.c b/libretro-db/parser.c new file mode 100644 index 0000000000..fec76435c1 --- /dev/null +++ b/libretro-db/parser.c @@ -0,0 +1,153 @@ +#include +#include + +#include "djb2.h" +#include "parser.h" + +static void match_any( pr_state_t* parser ) +{ + switch ( lx_next( &parser->lexer ) ) + { + case LX_UNTERMINATED_STRING: + longjmp( parser->env, PR_UNTERMINATED_STRING ); + case LX_INVALID_CHARACTER: + longjmp( parser->env, PR_INVALID_CHARACTER ); + } +} + +static void match( pr_state_t* parser, int token ) +{ + if ( parser->lexer.token != token ) + longjmp( parser->env, PR_UNEXPECTED_TOKEN ); + + match_any( parser ); +} + +static void match_tag( pr_state_t* parser, const char* tag ) +{ + if ( parser->lexer.token != LX_TAG || strncmp( parser->lexer.start, tag, strlen( tag ) ) ) + longjmp( parser->env, PR_UNEXPECTED_TOKEN ); + + match_any( parser ); +} + +static void parse_value( pr_state_t* parser, const char* key, unsigned keylen, pr_node_t* node, int isrom ) +{ + unsigned i; + + if ( isrom && keylen == 4 && !strncmp( key, "name", 4 ) ) + { + key = "rom_name"; + keylen = 8; + } + + for ( i = 0; i < node->count; i++ ) + { + if ( keylen == node->pairs[ i ].key_len && !strncmp( key, node->pairs[ i ].key, keylen ) ) + break; + } + + if ( i == node->count ) + node->count++; + + node->pairs[ i ].key = key; + node->pairs[ i ].key_len = keylen; + + node->pairs[ i ].value = parser->lexer.start; + node->pairs[ i ].value_len = parser->lexer.len; + + if ( parser->lexer.token == LX_STRING || parser->lexer.token == LX_NUMBER || parser->lexer.token == LX_TAG ) + match_any( parser ); + else + longjmp( parser->env, PR_UNEXPECTED_TOKEN ); +} + +static void parse_map( pr_state_t* parser, int skip, int isrom ) +{ + pr_node_t dummy; + pr_node_t* node; + unsigned hash; + const char* key; + unsigned keylen; + + if ( skip ) + { + node = &dummy; + dummy.count = 0; + } + else + node = parser->node; + + match( parser, LX_LPAREN ); + + while ( parser->lexer.token != LX_RPAREN ) + { + if ( parser->lexer.token != LX_TAG ) + longjmp( parser->env, PR_UNEXPECTED_TOKEN ); + + key = parser->lexer.start; + keylen = parser->lexer.len; + + hash = djb2( key, keylen ); + + match_any( parser ); + + switch ( hash ) + { + case 0x0b88a693U: /* rom */ + parse_map( parser, skip, 1 ); + break; + + default: + parse_value( parser, key, keylen, node, isrom ); + break; + } + } + + match_any( parser ); +} + +static void parse_clrmamepro( pr_state_t* parser ) +{ + match_tag( parser, "clrmamepro" ); + parse_map( parser, 1, 0 ); +} + +static void parse_game( pr_state_t* parser ) +{ + match_tag( parser, "game" ); + + pr_node_t* node = (pr_node_t*)malloc( sizeof( pr_node_t ) ); + + if ( node == NULL ) + longjmp( parser->env, PR_OUT_OF_MEMORY ); + + node->count = 0; + parser->node = node; + *parser->prev = node; + parser->prev = &node->next; + parse_map( parser, 0, 0 ); +} + +void pr_new( pr_state_t* parser, const char* source, unsigned srclen ) +{ + lx_new( &parser->lexer, source, srclen ); + parser->prev = &parser->first; +} + +int pr_parse( pr_state_t* parser ) +{ + int res; + + if ( ( res = setjmp( parser->env ) ) == 0 ) + { + match_any( parser ); + parse_clrmamepro( parser ); + + while ( parser->lexer.token != LX_EOF ) + parse_game( parser ); + } + + *parser->prev = NULL; + return res; +} diff --git a/libretro-db/parser.h b/libretro-db/parser.h new file mode 100644 index 0000000000..c807505334 --- /dev/null +++ b/libretro-db/parser.h @@ -0,0 +1,47 @@ +#ifndef PARSER_H +#define PARSER_H + +#include + +#include "lexer.h" + +enum +{ + PR_UNTERMINATED_STRING = -1, + PR_INVALID_CHARACTER = -2, + PR_UNEXPECTED_TOKEN = -3, + PR_OUT_OF_MEMORY = -4, +}; + +typedef struct +{ + const char* key; + unsigned key_len; + const char* value; + unsigned value_len; +} +pr_pair_t; + +typedef struct pr_node_t pr_node_t; + +struct pr_node_t +{ + pr_pair_t pairs[ 64 ]; + unsigned count; + pr_node_t* next; +}; + +typedef struct +{ + lx_state_t lexer; + pr_node_t* node; + pr_node_t* first; + pr_node_t** prev; + jmp_buf env; +} +pr_state_t; + +void pr_new( pr_state_t* parser, const char* source, unsigned srclen ); +int pr_parse( pr_state_t* parser ); + +#endif /* PARSER_H */