vgmstream/src/meta/acb_utf.h

408 lines
14 KiB
C
Raw Normal View History

2019-12-14 19:50:56 +01:00
#ifndef _ACB_UTF_H_
#define _ACB_UTF_H_
/* CRI @UTF (Universal Table Format?) is a generic database-like table made of
* rows/columns that contain numbers/strings/binary data, which also can be other tables.
2019-12-14 19:50:56 +01:00
*
* A table starts with "@UTF" and defines some values (row/data/string offsets, counts, etc)
* then DB schema (columns type+name), then rows, string table and binary data. Formats using @UTF
2019-12-14 19:50:56 +01:00
* store and read data by row number + column name. Being a generic table with no fixed schema
* CRI uses it for different purposes (.acf: cues, .cpk: files, .aax: bgm, .usm: video, etc).
*
* (adapted from hcs's code to do multiple querys in the same table)
*/
2019-12-15 19:37:52 +01:00
//todo move to src/util subdir
2019-12-14 19:50:56 +01:00
2019-12-15 19:37:52 +01:00
/* opaque struct */
typedef struct utf_context utf_context;
2019-12-14 19:50:56 +01:00
2019-12-15 19:37:52 +01:00
/* open a CRI UTF table at offset, returning table name and rows. Passed streamfile is used internally for next calls */
utf_context* utf_open(STREAMFILE *sf, uint32_t table_offset, int *p_rows, const char **p_row_name);
void utf_close(utf_context *utf);
/* query calls */
int utf_query_s8(utf_context *utf, int row, const char *column, int8_t *value);
int utf_query_s16(utf_context *utf, int row, const char *column, int16_t *value);
int utf_query_string(utf_context *utf, int row, const char *column, const char **value);
int utf_query_data(utf_context *utf, int row, const char *column, uint32_t *offset, uint32_t *size);
2019-12-14 19:50:56 +01:00
/* ************************************************* */
/* INTERNALS */
/* possibly 3b+5b from clUTF decompilation */
#define COLUMN_BITMASK_STORAGE 0xf0
#define COLUMN_BITMASK_TYPE 0x0f
enum column_storage_t {
COLUMN_STORAGE_ZERO = 0x10,
COLUMN_STORAGE_CONSTANT = 0x30,
COLUMN_STORAGE_ROW = 0x50
//COLUMN_STORAGE_CONSTANT2 = 0x70 /* from vgmtoolbox */
};
2019-12-14 19:50:56 +01:00
enum column_type_t {
COLUMN_TYPE_SINT8 = 0x00,
COLUMN_TYPE_UINT8 = 0x01,
COLUMN_TYPE_SINT16 = 0x02,
COLUMN_TYPE_UINT16 = 0x03,
COLUMN_TYPE_SINT32 = 0x04,
COLUMN_TYPE_UINT32 = 0x05,
COLUMN_TYPE_SINT64 = 0x06,
//COLUMN_TYPE_UINT64 = 0x07,
COLUMN_TYPE_FLOAT = 0x08,
//COLUMN_TYPE_DOUBLE = 0x09,
COLUMN_TYPE_STRING = 0x0a,
COLUMN_TYPE_DATA = 0x0b
};
2019-12-14 19:50:56 +01:00
typedef struct {
int found;
2019-12-15 19:37:52 +01:00
enum column_type_t type;
2019-12-14 19:50:56 +01:00
union {
int8_t value_s8;
uint8_t value_u8;
int16_t value_s16;
uint16_t value_u16;
int32_t value_s32;
uint32_t value_u32;
int64_t value_s64;
uint64_t value_u64;
float value_float;
double value_double;
struct utf_data_t {
uint32_t offset;
uint32_t size;
} value_data;
2019-12-14 19:50:56 +01:00
const char *value_string;
} value;
2019-12-15 19:37:52 +01:00
} utf_result_t;
2019-12-14 19:50:56 +01:00
struct utf_context {
2019-12-15 19:37:52 +01:00
STREAMFILE *sf;
2019-12-14 19:50:56 +01:00
uint32_t table_offset;
/* header */
uint32_t table_size;
uint16_t version;
uint16_t rows_offset;
uint32_t strings_offset;
uint32_t data_offset;
uint32_t name_offset;
uint16_t columns;
uint16_t row_width;
uint32_t rows;
struct utf_column_t {
uint8_t flags;
const char *name;
uint32_t offset;
} *schema;
2019-12-14 19:50:56 +01:00
/* derived */
uint32_t schema_offset;
uint32_t strings_size;
char *string_table;
2019-12-14 19:50:56 +01:00
const char *table_name;
};
/* @UTF table reading, abridged */
/*static*/ utf_context* utf_open(STREAMFILE *sf, uint32_t table_offset, int *p_rows, const char **p_row_name) {
2019-12-14 19:50:56 +01:00
utf_context* utf = NULL;
utf = calloc(1, sizeof(utf_context));
if (!utf) goto fail;
2019-12-15 19:37:52 +01:00
utf->sf = sf;
2019-12-14 19:50:56 +01:00
utf->table_offset = table_offset;
/* check header */
if (read_u32be(table_offset + 0x00, sf) != 0x40555446) /* "@UTF" */
2019-12-14 19:50:56 +01:00
goto fail;
/* load table header */
utf->table_size = read_u32be(table_offset + 0x04, sf) + 0x08;
utf->version = read_u16be(table_offset + 0x08, sf);
utf->rows_offset = read_u16be(table_offset + 0x0a, sf) + 0x08;
utf->strings_offset = read_u32be(table_offset + 0x0c, sf) + 0x08;
utf->data_offset = read_u32be(table_offset + 0x10, sf) + 0x08;
utf->name_offset = read_u32be(table_offset + 0x14, sf); /* within string table */
utf->columns = read_u16be(table_offset + 0x18, sf);
utf->row_width = read_u16be(table_offset + 0x1a, sf);
utf->rows = read_u32be(table_offset + 0x1c, sf);
2019-12-14 19:50:56 +01:00
utf->schema_offset = 0x20;
utf->strings_size = utf->data_offset - utf->strings_offset;
/* 00: early (32b rows_offset?), 01: +2017 (no apparent differences) */
if (utf->version != 0x00 && utf->version != 0x01) {
VGM_LOG("@UTF: unknown version\n");
}
if (utf->table_offset + utf->table_size > get_streamfile_size(sf))
goto fail;
if (utf->rows_offset > utf->table_size || utf->strings_offset > utf->table_size || utf->data_offset > utf->table_size)
2019-12-14 19:50:56 +01:00
goto fail;
if (utf->strings_size <= 0 || utf->name_offset > utf->strings_size)
2019-12-14 19:50:56 +01:00
goto fail;
/* no rows is possible for empty tables (have schema and columns names but no data) [PES 2013 (PC)] */
if (utf->columns <= 0 /*|| utf->rows <= 0 || utf->rows_width <= 0*/)
2019-12-14 19:50:56 +01:00
goto fail;
/* load string table */
{
size_t read;
utf->string_table = calloc(utf->strings_size + 1, sizeof(char));
if (!utf->string_table) goto fail;
utf->table_name = utf->string_table + utf->name_offset;
read = read_streamfile((unsigned char*)utf->string_table, utf->table_offset + utf->strings_offset, utf->strings_size, sf);
2019-12-14 19:50:56 +01:00
if (utf->strings_size != read) goto fail;
}
/* load column schema */
{
int i;
uint32_t value_size, column_offset = 0;
uint32_t schema_offset = utf->table_offset + utf->schema_offset;
utf->schema = malloc(sizeof(struct utf_column_t) * utf->columns);
2019-12-14 19:50:56 +01:00
if (!utf->schema) goto fail;
for (i = 0; i < utf->columns; i++) {
uint8_t flags = read_u8(schema_offset + 0x00, sf);
uint32_t name_offset = read_u32be(schema_offset + 0x01, sf);
2019-12-14 19:50:56 +01:00
if (name_offset > utf->strings_size)
goto fail;
utf->schema[i].flags = flags;
utf->schema[i].name = utf->string_table + name_offset;
schema_offset += 0x01 + 0x04;
switch (utf->schema[i].flags & COLUMN_BITMASK_TYPE) {
case COLUMN_TYPE_SINT8:
case COLUMN_TYPE_UINT8:
value_size = 0x01;
break;
case COLUMN_TYPE_SINT16:
case COLUMN_TYPE_UINT16:
value_size = 0x02;
break;
case COLUMN_TYPE_SINT32:
case COLUMN_TYPE_UINT32:
case COLUMN_TYPE_FLOAT:
case COLUMN_TYPE_STRING:
value_size = 0x04;
break;
case COLUMN_TYPE_SINT64:
//case COLUMN_TYPE_UINT64:
//case COLUMN_TYPE_DOUBLE:
case COLUMN_TYPE_DATA:
value_size = 0x08;
break;
default:
VGM_LOG("@UTF: unknown column type\n");
goto fail;
}
switch (utf->schema[i].flags & COLUMN_BITMASK_STORAGE) {
case COLUMN_STORAGE_ROW:
utf->schema[i].offset = column_offset;
column_offset += value_size;
break;
case COLUMN_STORAGE_CONSTANT:
//case COLUMN_STORAGE_CONSTANT2:
utf->schema[i].offset = schema_offset - (utf->table_offset + utf->schema_offset); /* relative to schema */
schema_offset += value_size;
break;
case COLUMN_STORAGE_ZERO:
utf->schema[i].offset = 0; /* ? */
break;
default:
VGM_LOG("@UTF: unknown column storage\n");
goto fail;
}
}
}
/* write info */
if (p_rows) *p_rows = utf->rows;
if (p_row_name) *p_row_name = utf->string_table + utf->name_offset;
2019-12-14 19:50:56 +01:00
return utf;
fail:
utf_close(utf);
VGM_LOG("@UTF: fail\n");
2019-12-14 19:50:56 +01:00
return NULL;
}
/*static*/ void utf_close(utf_context *utf) {
if (!utf) return;
free(utf->string_table);
free(utf->schema);
free(utf);
}
2019-12-15 19:37:52 +01:00
static int utf_query(utf_context *utf, int row, const char *column, utf_result_t *result) {
2019-12-14 19:50:56 +01:00
int i;
result->found = 0;
if (row >= utf->rows || row < 0)
goto fail;
/* find target column */
for (i = 0; i < utf->columns; i++) {
struct utf_column_t *col = &utf->schema[i];
2019-12-14 19:50:56 +01:00
uint32_t data_offset;
if (strcmp(col->name, column) != 0)
continue;
result->found = 1;
result->type = col->flags & COLUMN_BITMASK_TYPE;
switch (col->flags & COLUMN_BITMASK_STORAGE) {
case COLUMN_STORAGE_ROW:
data_offset = utf->table_offset + utf->rows_offset + row * utf->row_width + col->offset;
break;
case COLUMN_STORAGE_CONSTANT:
//case COLUMN_STORAGE_CONSTANT2:
data_offset = utf->table_offset + utf->schema_offset + col->offset;
break;
case COLUMN_STORAGE_ZERO:
data_offset = 0;
memset(&result->value, 0, sizeof(result->value));
break;
default:
goto fail;
}
/* ignore zero value */
if (!data_offset)
break;
/* read row/constant value */
switch (col->flags & COLUMN_BITMASK_TYPE) {
case COLUMN_TYPE_SINT8:
2019-12-15 19:37:52 +01:00
result->value.value_s8 = read_s8(data_offset, utf->sf);
2019-12-14 19:50:56 +01:00
break;
case COLUMN_TYPE_UINT8:
2019-12-15 19:37:52 +01:00
result->value.value_u8 = read_u8(data_offset, utf->sf);
2019-12-14 19:50:56 +01:00
break;
case COLUMN_TYPE_SINT16:
2019-12-15 19:37:52 +01:00
result->value.value_s16 = read_s16be(data_offset, utf->sf);
2019-12-14 19:50:56 +01:00
break;
case COLUMN_TYPE_UINT16:
2019-12-15 19:37:52 +01:00
result->value.value_u16 = read_u16be(data_offset, utf->sf);
2019-12-14 19:50:56 +01:00
break;
case COLUMN_TYPE_SINT32:
2019-12-15 19:37:52 +01:00
result->value.value_s32 = read_s32be(data_offset, utf->sf);
2019-12-14 19:50:56 +01:00
break;
case COLUMN_TYPE_UINT32:
2019-12-15 19:37:52 +01:00
result->value.value_u32 = read_u32be(data_offset, utf->sf);
2019-12-14 19:50:56 +01:00
break;
case COLUMN_TYPE_SINT64:
2019-12-15 19:37:52 +01:00
result->value.value_s64 = read_s64be(data_offset, utf->sf);
2019-12-14 19:50:56 +01:00
break;
#if 0
case COLUMN_TYPE_UINT64:
2019-12-15 19:37:52 +01:00
result->value.value_u64 = read_u64be(data_offset, utf->sf);
2019-12-14 19:50:56 +01:00
break;
#endif
case COLUMN_TYPE_FLOAT: {
2019-12-15 19:37:52 +01:00
result->value.value_float = read_f32be(data_offset, utf->sf);
2019-12-14 19:50:56 +01:00
break;
}
#if 0
case COLUMN_TYPE_DOUBLE: {
2019-12-15 19:37:52 +01:00
result->value.value_double = read_d64be(data_offset, utf->sf);
2019-12-14 19:50:56 +01:00
break;
}
#endif
case COLUMN_TYPE_STRING: {
2019-12-15 19:37:52 +01:00
uint32_t name_offset = read_u32be(data_offset, utf->sf);
2019-12-14 19:50:56 +01:00
if (name_offset > utf->strings_size)
goto fail;
result->value.value_string = utf->string_table + name_offset;
break;
}
case COLUMN_TYPE_DATA:
2019-12-15 19:37:52 +01:00
result->value.value_data.offset = read_u32be(data_offset + 0x00, utf->sf);
result->value.value_data.size = read_u32be(data_offset + 0x04, utf->sf);
2019-12-14 19:50:56 +01:00
break;
default:
goto fail;
}
break; /* column found and read */
}
return 1;
fail:
return 0;
}
2019-12-15 19:37:52 +01:00
static int utf_query_value(utf_context *utf, int row, const char *column, void *value, enum column_type_t type) {
utf_result_t result = {0};
int valid;
2019-12-14 19:50:56 +01:00
2019-12-15 19:37:52 +01:00
valid = utf_query(utf, row, column, &result);
if (!valid || !result.found || result.type != type)
2019-12-14 19:50:56 +01:00
return 0;
switch(result.type) {
case COLUMN_TYPE_SINT8: (*(int8_t*)value) = result.value.value_s8; break;
case COLUMN_TYPE_UINT8: (*(uint8_t*)value) = result.value.value_u8; break;
case COLUMN_TYPE_SINT16: (*(int16_t*)value) = result.value.value_s16; break;
case COLUMN_TYPE_UINT16: (*(uint16_t*)value) = result.value.value_u16; break;
case COLUMN_TYPE_SINT32: (*(int32_t*)value) = result.value.value_s32; break;
case COLUMN_TYPE_UINT32: (*(uint32_t*)value) = result.value.value_u32; break;
case COLUMN_TYPE_SINT64: (*(int64_t*)value) = result.value.value_s64; break;
//case COLUMN_TYPE_UINT64: (*(uint64_t*)value) = result.value.value_u64; break;
case COLUMN_TYPE_STRING: (*(const char**)value) = result.value.value_string; break;
default:
return 0;
}
return 1;
}
2019-12-15 19:37:52 +01:00
int utf_query_s8(utf_context *utf, int row, const char *column, int8_t *value) {
return utf_query_value(utf, row, column, (void*)value, COLUMN_TYPE_SINT8);
}
int utf_query_s16(utf_context *utf, int row, const char *column, int16_t *value) {
return utf_query_value(utf, row, column, (void*)value, COLUMN_TYPE_SINT16);
2019-12-14 19:50:56 +01:00
}
2019-12-15 19:37:52 +01:00
int utf_query_s32(utf_context *utf, int row, const char *column, int32_t *value) {
return utf_query_value(utf, row, column, (void*)value, COLUMN_TYPE_SINT32);
2019-12-14 19:50:56 +01:00
}
2019-12-15 19:37:52 +01:00
int utf_query_string(utf_context *utf, int row, const char *column, const char **value) {
return utf_query_value(utf, row, column, (void*)value, COLUMN_TYPE_STRING);
2019-12-14 19:50:56 +01:00
}
2019-12-15 19:37:52 +01:00
int utf_query_data(utf_context *utf, int row, const char *column, uint32_t *p_offset, uint32_t *p_size) {
utf_result_t result = {0};
int valid;
2019-12-14 19:50:56 +01:00
2019-12-15 19:37:52 +01:00
valid = utf_query(utf, row, column, &result);
if (!valid || !result.found || result.type != COLUMN_TYPE_DATA)
2019-12-14 19:50:56 +01:00
return 0;
if (p_offset) *p_offset = utf->table_offset + utf->data_offset + result.value.value_data.offset;
if (p_size) *p_size = result.value.value_data.size;
2019-12-14 19:50:56 +01:00
return 1;
}
#endif /* _ACB_UTF_H_ */