vgmstream/src/meta/cri_utf.c

479 lines
18 KiB
C
Raw Normal View History

#include "cri_utf.h"
#include "../util/log.h"
2019-12-14 19:50:56 +01:00
#define UTF_MAX_SCHEMA_SIZE 0x8000 /* arbitrary max */
2020-01-24 22:52:04 +01:00
#define COLUMN_BITMASK_FLAG 0xf0
#define COLUMN_BITMASK_TYPE 0x0f
enum columna_flag_t {
COLUMN_FLAG_NAME = 0x10, /* column has name (may be empty) */
COLUMN_FLAG_DEFAULT = 0x20, /* data is found relative to schema start (typically constant value for all rows) */
COLUMN_FLAG_ROW = 0x40, /* data is found relative to row start */
COLUMN_FLAG_UNDEFINED = 0x80 /* shouldn't exist */
};
2019-12-14 19:50:56 +01:00
enum column_type_t {
2020-01-24 22:52:04 +01:00
COLUMN_TYPE_UINT8 = 0x00,
COLUMN_TYPE_SINT8 = 0x01,
COLUMN_TYPE_UINT16 = 0x02,
COLUMN_TYPE_SINT16 = 0x03,
COLUMN_TYPE_UINT32 = 0x04,
COLUMN_TYPE_SINT32 = 0x05,
COLUMN_TYPE_UINT64 = 0x06,
COLUMN_TYPE_SINT64 = 0x07,
COLUMN_TYPE_FLOAT = 0x08,
2020-01-24 22:52:04 +01:00
COLUMN_TYPE_DOUBLE = 0x09,
COLUMN_TYPE_STRING = 0x0a,
2020-01-24 22:52:04 +01:00
COLUMN_TYPE_VLDATA = 0x0b,
COLUMN_TYPE_UINT128 = 0x0c, /* for GUIDs */
COLUMN_TYPE_UNDEFINED = -1
};
2019-12-14 19:50:56 +01:00
struct utf_context {
STREAMFILE* sf;
2019-12-14 19:50:56 +01:00
uint32_t table_offset;
/* header */
uint32_t table_size;
uint16_t version;
uint16_t rows_offset;
uint32_t strings_offset;
uint32_t data_offset;
uint32_t name_offset;
uint16_t columns;
uint16_t row_width;
uint32_t rows;
uint8_t* schema_buf;
struct utf_column_t {
2020-01-24 22:52:04 +01:00
uint8_t flag;
uint8_t type;
const char* name;
uint32_t offset;
} *schema;
2019-12-14 19:50:56 +01:00
/* derived */
uint32_t schema_offset;
uint32_t schema_size;
uint32_t rows_size;
uint32_t data_size;
2019-12-14 19:50:56 +01:00
uint32_t strings_size;
char* string_table;
const char* table_name;
2019-12-14 19:50:56 +01:00
};
/* @UTF table context creation */
2020-01-24 22:52:04 +01:00
utf_context* utf_open(STREAMFILE* sf, uint32_t table_offset, int* p_rows, const char** p_row_name) {
2019-12-14 19:50:56 +01:00
utf_context* utf = NULL;
uint8_t buf[0x20];
int bytes;
2019-12-14 19:50:56 +01:00
utf = calloc(1, sizeof(utf_context));
if (!utf) goto fail;
2019-12-15 19:37:52 +01:00
utf->sf = sf;
2019-12-14 19:50:56 +01:00
utf->table_offset = table_offset;
bytes = read_streamfile(buf, table_offset, sizeof(buf), sf);
if (bytes != sizeof(buf)) goto fail;
2019-12-14 19:50:56 +01:00
/* load table header */
if (get_u32be(buf + 0x00) != get_id32be("@UTF"))
goto fail;
utf->table_size = get_u32be(buf + 0x04) + 0x08;
utf->version = get_u16be(buf + 0x08);
utf->rows_offset = get_u16be(buf + 0x0a) + 0x08;
utf->strings_offset = get_u32be(buf + 0x0c) + 0x08;
utf->data_offset = get_u32be(buf + 0x10) + 0x08;
utf->name_offset = get_u32be(buf + 0x14); /* within string table */
utf->columns = get_u16be(buf + 0x18);
utf->row_width = get_u16be(buf + 0x1a);
utf->rows = get_u32be(buf + 0x1c);
utf->schema_offset = 0x20;
utf->schema_size = utf->rows_offset - utf->schema_offset;
utf->rows_size = utf->strings_offset - utf->rows_offset;
utf->strings_size = utf->data_offset - utf->strings_offset;
utf->data_size = utf->table_size - utf->data_offset;
2019-12-14 19:50:56 +01:00
/* 00: early (32b rows_offset?), 01: +2017 (no apparent differences) */
if (utf->version != 0x00 && utf->version != 0x01) {
vgm_logi("@UTF: unknown version\n");
goto fail;
2019-12-14 19:50:56 +01:00
}
if (utf->table_offset + utf->table_size > get_streamfile_size(sf))
goto fail;
if (utf->rows_offset > utf->table_size || utf->strings_offset > utf->table_size || utf->data_offset > utf->table_size)
2019-12-14 19:50:56 +01:00
goto fail;
if (utf->strings_size <= 0 || utf->name_offset > utf->strings_size)
2019-12-14 19:50:56 +01:00
goto fail;
/* no rows is possible for empty tables (have schema and columns names but no data) [PES 2013 (PC)] */
if (utf->columns <= 0 /*|| utf->rows <= 0 || utf->rows_width <= 0*/)
2019-12-14 19:50:56 +01:00
goto fail;
if (utf->schema_size >= UTF_MAX_SCHEMA_SIZE)
goto fail;
2019-12-14 19:50:56 +01:00
/* load sections linearly (to optimize stream) */
2019-12-14 19:50:56 +01:00
{
/* schema section: small so keep it around (useful to avoid re-reads on column values) */
utf->schema_buf = malloc(utf->schema_size);
if (!utf->schema_buf) goto fail;
bytes = read_streamfile(utf->schema_buf, utf->table_offset + utf->schema_offset, utf->schema_size, sf);
if (bytes != utf->schema_size) goto fail;
2019-12-14 19:50:56 +01:00
/* row section: skip, mid to big (0x10000~0x50000) so not preloaded for now */
/* string section: low to mid size but used to return c-strings */
2019-12-14 19:50:56 +01:00
utf->string_table = calloc(utf->strings_size + 1, sizeof(char));
if (!utf->string_table) goto fail;
bytes = read_streamfile((unsigned char*)utf->string_table, utf->table_offset + utf->strings_offset, utf->strings_size, sf);
if (bytes != utf->strings_size) goto fail;
2019-12-14 19:50:56 +01:00
/* data section: skip (may be big with memory AWB) */
2019-12-14 19:50:56 +01:00
}
/* load column schema */
{
int i;
uint32_t value_size, column_offset = 0;
int schema_pos = 0;
2019-12-14 19:50:56 +01:00
utf->table_name = utf->string_table + utf->name_offset;
2019-12-14 19:50:56 +01:00
utf->schema = malloc(utf->columns * sizeof(struct utf_column_t));
2019-12-14 19:50:56 +01:00
if (!utf->schema) goto fail;
for (i = 0; i < utf->columns; i++) {
uint8_t info = get_u8(utf->schema_buf + schema_pos + 0x00);
uint32_t name_offset = get_u32be(utf->schema_buf + schema_pos + 0x01);
2019-12-14 19:50:56 +01:00
if (name_offset > utf->strings_size)
goto fail;
schema_pos += 0x01 + 0x04;
2020-01-24 22:52:04 +01:00
utf->schema[i].flag = info & COLUMN_BITMASK_FLAG;
utf->schema[i].type = info & COLUMN_BITMASK_TYPE;
utf->schema[i].name = NULL;
utf->schema[i].offset = 0;
/* known flags are name+default or name+row, but name+default+row is mentioned in VGMToolbox
* even though isn't possible in CRI's craft utils (meaningless), and no name is apparently possible */
2020-01-24 22:52:04 +01:00
if ( (utf->schema[i].flag == 0) ||
!(utf->schema[i].flag & COLUMN_FLAG_NAME) ||
((utf->schema[i].flag & COLUMN_FLAG_DEFAULT) && (utf->schema[i].flag & COLUMN_FLAG_ROW)) ||
(utf->schema[i].flag & COLUMN_FLAG_UNDEFINED) ) {
vgm_logi("@UTF: unknown column flag combo found\n");
2020-01-24 22:52:04 +01:00
goto fail;
}
switch (utf->schema[i].type) {
2019-12-14 19:50:56 +01:00
case COLUMN_TYPE_UINT8:
2020-01-24 22:52:04 +01:00
case COLUMN_TYPE_SINT8:
2019-12-14 19:50:56 +01:00
value_size = 0x01;
break;
case COLUMN_TYPE_UINT16:
2020-01-24 22:52:04 +01:00
case COLUMN_TYPE_SINT16:
2019-12-14 19:50:56 +01:00
value_size = 0x02;
break;
case COLUMN_TYPE_UINT32:
2020-01-24 22:52:04 +01:00
case COLUMN_TYPE_SINT32:
2019-12-14 19:50:56 +01:00
case COLUMN_TYPE_FLOAT:
case COLUMN_TYPE_STRING:
value_size = 0x04;
break;
2020-01-24 22:52:04 +01:00
case COLUMN_TYPE_UINT64:
2019-12-14 19:50:56 +01:00
case COLUMN_TYPE_SINT64:
2020-01-24 22:52:04 +01:00
//case COLUMN_TYPE_DOUBLE:
case COLUMN_TYPE_VLDATA:
2019-12-14 19:50:56 +01:00
value_size = 0x08;
break;
2020-01-24 22:52:04 +01:00
//case COLUMN_TYPE_UINT128:
// value_size = 0x16;
2019-12-14 19:50:56 +01:00
default:
vgm_logi("@UTF: unknown column type\n");
2019-12-14 19:50:56 +01:00
goto fail;
}
2020-01-24 22:52:04 +01:00
if (utf->schema[i].flag & COLUMN_FLAG_NAME) {
utf->schema[i].name = utf->string_table + name_offset;
}
if (utf->schema[i].flag & COLUMN_FLAG_DEFAULT) {
utf->schema[i].offset = schema_pos;
schema_pos += value_size;
2020-01-24 22:52:04 +01:00
}
if (utf->schema[i].flag & COLUMN_FLAG_ROW) {
utf->schema[i].offset = column_offset;
column_offset += value_size;
2019-12-14 19:50:56 +01:00
}
}
}
#if 0
VGM_LOG("- %s\n", utf->table_name);
VGM_LOG("utf_o=%08x (%x)\n", utf->table_offset, utf->table_size);
VGM_LOG(" sch_o=%08x (%x), c=%i\n", utf->table_offset + utf->schema_offset, utf->schema_size, utf->columns);
VGM_LOG(" row_o=%08x (%x), r=%i\n", utf->table_offset + utf->rows_offset, utf->rows_size, utf->rows);
VGM_LOG(" str_o=%08x (%x)\n", utf->table_offset + utf->strings_offset, utf->strings_size);
VGM_LOG(" dat_o=%08x (%x))\n", utf->table_offset + utf->data_offset, utf->data_size);
#endif
2019-12-14 19:50:56 +01:00
/* write info */
if (p_rows) *p_rows = utf->rows;
if (p_row_name) *p_row_name = utf->string_table + utf->name_offset;
2019-12-14 19:50:56 +01:00
return utf;
fail:
utf_close(utf);
vgm_logi("@UTF: init failure\n");
2019-12-14 19:50:56 +01:00
return NULL;
}
2020-01-24 22:52:04 +01:00
void utf_close(utf_context* utf) {
2019-12-14 19:50:56 +01:00
if (!utf) return;
free(utf->string_table);
free(utf->schema_buf);
2019-12-14 19:50:56 +01:00
free(utf->schema);
free(utf);
}
int utf_get_column(utf_context* utf, const char* column) {
2019-12-14 19:50:56 +01:00
int i;
/* find target column */
for (i = 0; i < utf->columns; i++) {
struct utf_column_t* col = &utf->schema[i];
2019-12-14 19:50:56 +01:00
2020-01-24 22:52:04 +01:00
if (col->name == NULL || strcmp(col->name, column) != 0)
2019-12-14 19:50:56 +01:00
continue;
return i;
}
return -1;
}
typedef struct {
enum column_type_t type;
union {
int8_t s8;
uint8_t u8;
int16_t s16;
uint16_t u16;
int32_t s32;
uint32_t u32;
int64_t s64;
uint64_t u64;
float flt;
double dbl;
struct utf_data_t {
uint32_t offset;
uint32_t size;
} data;
#if 0
struct utf_u128_t {
uint64_t hi;
uint64_t lo;
} value_u128;
#endif
const char* str;
} value;
} utf_result_t;
static int utf_query(utf_context* utf, int row, int column, utf_result_t* result) {
if (row >= utf->rows || row < 0)
goto fail;
if (column >= utf->columns || column < 0)
goto fail;
/* get target column */
{
struct utf_column_t* col = &utf->schema[column];
uint32_t data_offset = 0;
uint8_t* buf = NULL;
2019-12-14 19:50:56 +01:00
2020-01-24 22:52:04 +01:00
result->type = col->type;
2019-12-14 19:50:56 +01:00
2020-01-24 22:52:04 +01:00
if (col->flag & COLUMN_FLAG_DEFAULT) {
if (utf->schema_buf)
buf = utf->schema_buf + col->offset;
else
data_offset = utf->table_offset + utf->schema_offset + col->offset;
2020-01-24 22:52:04 +01:00
}
else if (col->flag & COLUMN_FLAG_ROW) {
data_offset = utf->table_offset + utf->rows_offset + row * utf->row_width + col->offset;
}
else {
/* shouldn't happen */
memset(&result->value, 0, sizeof(result->value));
return 1; /* ??? */
2019-12-14 19:50:56 +01:00
}
/* read row/constant value (use buf if available) */
2020-01-24 22:52:04 +01:00
switch (col->type) {
2019-12-14 19:50:56 +01:00
case COLUMN_TYPE_UINT8:
result->value.u8 = buf ? get_u8(buf) : read_u8(data_offset, utf->sf);
2019-12-14 19:50:56 +01:00
break;
2020-01-24 22:52:04 +01:00
case COLUMN_TYPE_SINT8:
result->value.s8 = buf ? get_s8(buf) : read_s8(data_offset, utf->sf);
2019-12-14 19:50:56 +01:00
break;
case COLUMN_TYPE_UINT16:
result->value.u16 = buf ? get_u16be(buf) : read_u16be(data_offset, utf->sf);
2019-12-14 19:50:56 +01:00
break;
2020-01-24 22:52:04 +01:00
case COLUMN_TYPE_SINT16:
result->value.s16 = buf ? get_s16be(buf) : read_s16be(data_offset, utf->sf);
2019-12-14 19:50:56 +01:00
break;
case COLUMN_TYPE_UINT32:
result->value.u32 = buf ? get_u32be(buf) : read_u32be(data_offset, utf->sf);
2019-12-14 19:50:56 +01:00
break;
2020-01-24 22:52:04 +01:00
case COLUMN_TYPE_SINT32:
result->value.s32 = buf ? get_s32be(buf) : read_s32be(data_offset, utf->sf);
2019-12-14 19:50:56 +01:00
break;
case COLUMN_TYPE_UINT64:
result->value.u64 = buf ? get_u64be(buf) : read_u64be(data_offset, utf->sf);
2019-12-14 19:50:56 +01:00
break;
2020-01-24 22:52:04 +01:00
case COLUMN_TYPE_SINT64:
result->value.s64 = buf ? get_s64be(buf) : read_s64be(data_offset, utf->sf);
2020-01-24 22:52:04 +01:00
break;
case COLUMN_TYPE_FLOAT:
result->value.flt = buf ? get_f32be(buf) : read_f32be(data_offset, utf->sf);
2019-12-14 19:50:56 +01:00
break;
#if 0
case COLUMN_TYPE_DOUBLE:
result->value.dbl = buf ? get_d64be(buf) : read_d64be(data_offset, utf->sf);
2019-12-14 19:50:56 +01:00
break;
#endif
case COLUMN_TYPE_STRING: {
uint32_t name_offset = buf ? get_u32be(buf) : read_u32be(data_offset, utf->sf);
2019-12-14 19:50:56 +01:00
if (name_offset > utf->strings_size)
goto fail;
result->value.str = utf->string_table + name_offset;
2019-12-14 19:50:56 +01:00
break;
}
2020-01-24 22:52:04 +01:00
case COLUMN_TYPE_VLDATA:
result->value.data.offset = buf ? get_u32be(buf + 0x0) : read_u32be(data_offset + 0x00, utf->sf);
result->value.data.size = buf ? get_u32be(buf + 0x4) : read_u32be(data_offset + 0x04, utf->sf);
2019-12-14 19:50:56 +01:00
break;
2020-01-24 22:52:04 +01:00
#if 0
case COLUMN_TYPE_UINT128:
result->value.value_u128.hi = buf ? get_u32be(buf + 0x0) : read_u64be(data_offset + 0x00, utf->sf);
result->value.value_u128.lo = buf ? get_u32be(buf + 0x4) : read_u64be(data_offset + 0x08, utf->sf);
2020-01-24 22:52:04 +01:00
break;
#endif
2019-12-14 19:50:56 +01:00
default:
goto fail;
}
}
return 1;
fail:
return 0;
}
static int utf_query_value(utf_context* utf, int row, int column, void* value, enum column_type_t type) {
2019-12-15 19:37:52 +01:00
utf_result_t result = {0};
int valid;
2019-12-14 19:50:56 +01:00
2019-12-15 19:37:52 +01:00
valid = utf_query(utf, row, column, &result);
if (!valid || result.type != type)
2019-12-14 19:50:56 +01:00
return 0;
switch(result.type) {
case COLUMN_TYPE_UINT8: (*(uint8_t*)value) = result.value.u8; break;
case COLUMN_TYPE_SINT8: (*(int8_t*)value) = result.value.s8; break;
case COLUMN_TYPE_UINT16: (*(uint16_t*)value) = result.value.u16; break;
case COLUMN_TYPE_SINT16: (*(int16_t*)value) = result.value.s16; break;
case COLUMN_TYPE_UINT32: (*(uint32_t*)value) = result.value.u32; break;
case COLUMN_TYPE_SINT32: (*(int32_t*)value) = result.value.s32; break;
case COLUMN_TYPE_UINT64: (*(uint64_t*)value) = result.value.u64; break;
case COLUMN_TYPE_SINT64: (*(int64_t*)value) = result.value.s64; break;
case COLUMN_TYPE_STRING: (*(const char**)value) = result.value.str; break;
2019-12-14 19:50:56 +01:00
default:
return 0;
}
return 1;
}
int utf_query_col_s8(utf_context* utf, int row, int column, int8_t* value) {
return utf_query_value(utf, row, column, (void*)value, COLUMN_TYPE_SINT8);
}
int utf_query_col_u8(utf_context* utf, int row, int column, uint8_t* value) {
2020-01-24 22:52:04 +01:00
return utf_query_value(utf, row, column, (void*)value, COLUMN_TYPE_UINT8);
2019-12-15 19:37:52 +01:00
}
int utf_query_col_s16(utf_context* utf, int row, int column, int16_t* value) {
return utf_query_value(utf, row, column, (void*)value, COLUMN_TYPE_SINT16);
}
int utf_query_col_u16(utf_context* utf, int row, int column, uint16_t* value) {
2020-01-24 22:52:04 +01:00
return utf_query_value(utf, row, column, (void*)value, COLUMN_TYPE_UINT16);
2019-12-14 19:50:56 +01:00
}
int utf_query_col_s32(utf_context* utf, int row, int column, int32_t* value) {
return utf_query_value(utf, row, column, (void*)value, COLUMN_TYPE_SINT32);
}
int utf_query_col_u32(utf_context* utf, int row, int column, uint32_t* value) {
2020-01-24 22:52:04 +01:00
return utf_query_value(utf, row, column, (void*)value, COLUMN_TYPE_UINT32);
2019-12-14 19:50:56 +01:00
}
int utf_query_col_s64(utf_context* utf, int row, int column, int64_t* value) {
return utf_query_value(utf, row, column, (void*)value, COLUMN_TYPE_SINT64);
}
int utf_query_col_u64(utf_context* utf, int row, int column, uint64_t* value) {
return utf_query_value(utf, row, column, (void*)value, COLUMN_TYPE_UINT64);
}
int utf_query_col_string(utf_context* utf, int row, int column, const char** value) {
2019-12-15 19:37:52 +01:00
return utf_query_value(utf, row, column, (void*)value, COLUMN_TYPE_STRING);
2019-12-14 19:50:56 +01:00
}
int utf_query_col_data(utf_context* utf, int row, int column, uint32_t* p_offset, uint32_t* p_size) {
2019-12-15 19:37:52 +01:00
utf_result_t result = {0};
int valid;
2019-12-14 19:50:56 +01:00
2019-12-15 19:37:52 +01:00
valid = utf_query(utf, row, column, &result);
if (!valid || result.type != COLUMN_TYPE_VLDATA)
2019-12-14 19:50:56 +01:00
return 0;
if (p_offset) *p_offset = utf->table_offset + utf->data_offset + result.value.data.offset;
if (p_size) *p_size = result.value.data.size;
2019-12-14 19:50:56 +01:00
return 1;
}
int utf_query_s8(utf_context* utf, int row, const char* column_name, int8_t* value) {
return utf_query_value(utf, row, utf_get_column(utf, column_name), (void*)value, COLUMN_TYPE_SINT8);
}
int utf_query_u8(utf_context* utf, int row, const char* column_name, uint8_t* value) {
return utf_query_value(utf, row, utf_get_column(utf, column_name), (void*)value, COLUMN_TYPE_UINT8);
}
int utf_query_s16(utf_context* utf, int row, const char* column_name, int16_t* value) {
return utf_query_value(utf, row, utf_get_column(utf, column_name), (void*)value, COLUMN_TYPE_SINT16);
}
int utf_query_u16(utf_context* utf, int row, const char* column_name, uint16_t* value) {
return utf_query_value(utf, row, utf_get_column(utf, column_name), (void*)value, COLUMN_TYPE_UINT16);
}
int utf_query_s32(utf_context* utf, int row, const char* column_name, int32_t* value) {
return utf_query_value(utf, row, utf_get_column(utf, column_name), (void*)value, COLUMN_TYPE_SINT32);
}
int utf_query_u32(utf_context* utf, int row, const char* column_name, uint32_t* value) {
return utf_query_value(utf, row, utf_get_column(utf, column_name), (void*)value, COLUMN_TYPE_UINT32);
}
int utf_query_s64(utf_context* utf, int row, const char* column_name, int64_t* value) {
return utf_query_value(utf, row, utf_get_column(utf, column_name), (void*)value, COLUMN_TYPE_SINT64);
}
int utf_query_u64(utf_context* utf, int row, const char* column_name, uint64_t* value) {
return utf_query_value(utf, row, utf_get_column(utf, column_name), (void*)value, COLUMN_TYPE_UINT64);
}
int utf_query_string(utf_context* utf, int row, const char* column_name, const char** value) {
return utf_query_value(utf, row, utf_get_column(utf, column_name), (void*)value, COLUMN_TYPE_STRING);
}
int utf_query_data(utf_context* utf, int row, const char* column_name, uint32_t* p_offset, uint32_t* p_size) {
return utf_query_col_data(utf, row, utf_get_column(utf, column_name), p_offset, p_size);
}