3
0
mirror of synced 2024-11-30 18:24:29 +01:00
popnhax_tools/omnimix/verify_data.py

503 lines
19 KiB
Python
Raw Normal View History

2023-07-24 23:48:07 +02:00
import argparse
import os
import sys
import ifstools
import popndll
from enum import Enum
from lxml.etree import tostring, fromstring
from lxml.builder import E
class DataErrors(Enum):
SD_PATH_NOT_EXIST = 1
SD_IFS_NOT_EXIST = 2
SD_CHARTS_NOT_FOUND = 3
SD_CHARTS_UNUSED = 4
KC_NOT_FOUND = 5
BG_NOT_FOUND = 6
CHARA_IFS_NOT_FOUND = 7
CHARA_IFS_INNER_NOT_FOUND = 8
IFS_READ_ERROR = 9
SD_CHART_ERROR = 10
CHART_LABELS = ["ep", "np", "hp", "op", "bp", "bp"]
CHART_MASKS = [0x00080000, 0, 0x01000000, 0x02000000, 0, 0x04000000]
def elem2dict(node):
"""
Convert an lxml.etree node tree into a dict.
Source: https://gist.github.com/jacobian/795571#gistcomment-2810160
"""
result = {}
idx = node.get('id', None)
if idx:
result['_id'] = int(idx)
idx = node.get('idx', None)
if idx:
labels = ["ep", "np", "hp", "op", "bp_n", "bp_h"]
if idx in labels:
idx = labels.index(idx)
result['_id'] = int(idx)
for element in node.iterchildren():
# Remove namespace prefix
key = element.tag.split('}')[1] if '}' in element.tag else element.tag
if key == 'charts':
value = [0] * 7
for chart in element.iterchildren():
chart_data = elem2dict(chart)
value[chart_data['_id']] = chart_data
else:
# Process element as tree element if the inner XML contains non-whitespace content
if element.text and element.text.strip():
elm_type = element.get('__type', None)
elm_count = element.get('__count', None)
value = element.text
if elm_count:
value = value.split(' ')
if elm_type in ['u8', 's8', 'u16', 's16', 'u32', 's32']:
if type(value) is list:
value = [int(x) for x in value]
else:
value = int(value)
else:
value = elem2dict(element)
result[key] = value
return result
def convert_db_to_dict(db):
return {i: entry for i, entry in enumerate(db)}
def load_patch_dbs(input_db_folder, databases):
def get_sequential_files(db, master_xml_path, target_elm):
master_xml = fromstring(open(master_xml_path, "rb").read().replace(b"shift-jis", b"cp932"))
for filename in master_xml.findall('filename'):
patch_xml = fromstring(open(os.path.join(input_db_folder, filename.text), "rb").read().replace(b"shift-jis", b"cp932"))
for elm in patch_xml.findall(target_elm):
idx = int(elm.get('id'))
new_entry = elem2dict(elm)
if idx not in db:
db[idx] = new_entry
else:
if 'charts' in db[idx]:
for chart in new_entry.get('charts', []):
if chart == 0:
continue
if db[idx]['charts'][chart['_id']] == 0:
db[idx]['charts'][chart['_id']] = chart
else:
db[idx]['charts'][chart['_id']].update(chart)
if 'charts' in new_entry:
del new_entry['charts']
db[idx].update(new_entry)
return db
master_xml_path = os.path.join(input_db_folder, "master.xml")
if not os.path.exists(master_xml_path):
return databases
databases['charadb'] = get_sequential_files(databases['charadb'], master_xml_path, "chara")
databases['musicdb'] = get_sequential_files(databases['musicdb'], master_xml_path, "music")
return databases
def verify_chart(data):
assert(len(data) > 0)
event_size = 12
if len(data) / 12 != len(data) // 12:
# The chart data should be divisble both as a float and int and get the same result
event_size = 8
elif len(data) / 8 != len(data) // 8:
# The chart data should be divisble both as a float and int and get the same result
event_size = 12
else:
# You can still get cases where the above check is true for 8 byte events so do more checking
marker_8 = sorted(list(set([data[i+4] for i in range(0, len(data), 8)])))
marker_12 = sorted(list(set([data[i+4] for i in range(0, len(data), 12)])))
marker_8_diff = list(set(marker_8) - set([0x00, 0x45]))
marker_12_diff = list(set(marker_12) - set([0x00, 0x45]))
if len(marker_8_diff) > 0 and len(marker_12_diff) == 0:
event_size = 12
elif len(marker_8_diff) == 0 and len(marker_12_diff) > 0:
event_size = 8
elif len(marker_8_diff) == 0 and len(marker_12_diff) == 0:
# Inconclusive, do more testing
cmd_8 = sorted(list(set([data[i+5] for i in range(0, len(data), 8)])))
cmd_12 = sorted(list(set([data[i+5] for i in range(0, len(data), 12)])))
cmd_8_diff = list(set(cmd_8) - set([1, 2, 3, 4, 5, 6, 7, 8, 10, 11]))
cmd_12_diff = list(set(cmd_12) - set([1, 2, 3, 4, 5, 6, 7, 8, 10, 11]))
if len(cmd_8_diff) > 0 and len(cmd_12_diff) == 0:
event_size = 12
elif len(cmd_8_diff) == 0 and len(cmd_12_diff) > 0:
event_size = 8
else:
raise Exception("Couldn't determine size of chart events")
events_by_cmd = {}
events = []
for i in range(0, len(data), event_size):
chunk = data[i:i+event_size]
if len(chunk) != event_size:
break
timestamp = int.from_bytes(chunk[:4], 'little')
marker = chunk[4]
cmd = chunk[5] & 0x0f
param1 = chunk[5] >> 4
param2 = chunk[6:8]
param3 = chunk[8:] if event_size == 12 else 0
# import hexdump
# hexdump.hexdump(chunk)
event = (chunk, timestamp, marker, cmd, param1, param2, param3)
events.append(event)
# is_valid_marker = (cmd in [0x0a, 0x0b] and marker == 0) or (cmd not in [0x0a, 0x0b] and marker == 0x45)
# assert(is_valid_marker == True)
if cmd not in events_by_cmd:
events_by_cmd[cmd] = []
events_by_cmd[cmd].append(event)
chart_is_sequential = events == sorted(events, key=lambda x:x[1])
assert(chart_is_sequential == True)
chart_has_timings = 0x08 in events_by_cmd and len(events_by_cmd.get(0x08, [])) >= 6
assert(chart_has_timings == True)
chart_has_timings_at_zero = 0x08 in events_by_cmd and min([x[1] for x in events_by_cmd.get(0x08, [])]) == 0
assert(chart_has_timings_at_zero == True)
chart_timings = {x[5][1] >> 4: (x[5][1] & 0x0f) | x[5][0] for x in events_by_cmd.get(0x08, [])}
chart_has_sequential_timings = sorted([chart_timings[k] for k in range(6)]) == [chart_timings[k] for k in range(6)]
assert(chart_has_sequential_timings == True)
standard_timings = [
0x76, # Early bad
0x7a, # Early good
0x7e, # Early great
0x84, # Late great
0x88, # Late good
0x8c, # Late bad
]
chart_has_sensible_timings = [abs(chart_timings[k] - standard_timings[k]) < 15 for k in range(6)]
chart_has_sensible_timings = list(set(chart_has_sensible_timings)) == [True]
assert(chart_has_sensible_timings == True)
chart_has_bpm = 0x04 in events_by_cmd and len(events_by_cmd.get(0x04, [])) > 0
assert(chart_has_bpm == True)
chart_has_bpm_at_zero = 0x04 in events_by_cmd and min([x[1] for x in events_by_cmd.get(0x04, [])]) == 0
assert(chart_has_bpm_at_zero == True)
chart_has_valid_bpms = 0x04 in events_by_cmd and min([int.from_bytes(x[5], 'little') for x in events_by_cmd.get(0x04, [])]) >= 0
assert(chart_has_valid_bpms == True)
chart_has_metronome = 0x05 in events_by_cmd and len(events_by_cmd.get(0x05, [])) > 0
assert(chart_has_metronome == True)
chart_has_metronome_at_zero = 0x05 in events_by_cmd and min([x[1] for x in events_by_cmd.get(0x05, [])]) == 0
assert(chart_has_metronome_at_zero == True)
used_notes = sorted(list(set([x[5][0] for x in events_by_cmd.get(0x01, [])])))
is_valid_range_notes = not used_notes or (min(used_notes) >= 0 and max(used_notes) <= 8)
assert(is_valid_range_notes == True)
chart_has_notes = len(used_notes) > 0
assert(chart_has_notes == True)
used_notes = sorted(list(set([x[5][1] >> 4 for x in events_by_cmd.get(0x02, [])])))
is_valid_range_keysound_range = not used_notes or (min(used_notes) >= 0 and max(used_notes) <= 8)
assert(is_valid_range_notes == True)
chart_has_keysounds = len(used_notes) > 0
assert(chart_has_keysounds == True)
used_notes = sorted(list(set([x[5][1] >> 4 for x in events_by_cmd.get(0x07, [])])))
is_valid_range_auto_keysound_range = not used_notes or (min(used_notes) >= 0 and max(used_notes) <= 15)
assert(is_valid_range_auto_keysound_range == True)
chart_has_measures = len(events_by_cmd.get(0x0a, [])) > 0
assert(chart_has_measures == True)
chart_has_beats = len(events_by_cmd.get(0x0b, [])) > 0
assert(chart_has_beats == True)
chart_has_bgm_start = len(events_by_cmd.get(0x03, [])) > 0
assert(chart_has_bgm_start == True)
chart_has_single_bgm_start = len(events_by_cmd.get(0x03, [])) == 1
assert(chart_has_single_bgm_start == True)
chart_has_ending = len(events_by_cmd.get(0x06, [])) > 0
assert(chart_has_ending == True)
# chart_has_single_ending = len(events_by_cmd.get(0x06, [])) == 1
# assert(chart_has_single_ending == True)
if event_size == 12:
hold_events = [(x[5][0], x[1], x[1] + int.from_bytes(x[6], 'little'), x[0]) for x in events_by_cmd.get(0x01, []) if int.from_bytes(x[6], 'little') > 0]
for hold_event in hold_events:
for x in events_by_cmd.get(0x01, []):
if x[5][0] == hold_event[0] and x[1] != hold_event[1]:
is_impossible_hold = x[1] >= hold_event[1] and x[1] < hold_event[2]
assert(is_impossible_hold == False)
chart_has_no_notes_at_zero = len([x for x in events_by_cmd.get(0x01, []) if x[1] == 0]) == 0
assert(chart_has_no_notes_at_zero == True)
return True
def verify_musicdb(musicdb, input_data_folder, is_mod_ifs):
errors = []
sd_path = os.path.join(input_data_folder, "sd")
bg_ifs_path = os.path.join(input_data_folder, "tex", "system", "bg_mod.ifs" if is_mod_ifs else "bg_diff.ifs")
bg_ifs = ifstools.IFS(bg_ifs_path)
bg_ifs_files = [str(x) for x in bg_ifs.tree.all_files]
bg_ifs.close()
kc_ifs_path = os.path.join(input_data_folder, "tex", "system", "kc_mod.ifs" if is_mod_ifs else "kc_diff.ifs")
kc_ifs = ifstools.IFS(kc_ifs_path)
kc_ifs_files = [str(x) for x in kc_ifs.tree.all_files]
kc_ifs.close()
for music_idx in musicdb:
entry = musicdb[music_idx]
if popndll.is_placeholder_song(entry):
# Skip placeholder entries
continue
# Generate mask and expected charts list
if 'mask' not in entry:
entry['mask'] = 0
expected_charts = []
for chart in entry.get('charts', []):
if chart == 0:
continue
entry['mask'] |= CHART_MASKS[chart['_id']]
if chart.get('diff', 0) == 0:
# If a song has a 0 difficulty level then the game won't make it selectable so it doesn't matter if it exists or not
continue
if CHART_LABELS[chart['_id']] not in expected_charts:
expected_charts.append(CHART_LABELS[chart['_id']])
found_charts = []
found_chart_errors = []
for chart_idx, chart in enumerate(entry['charts']):
if type(chart) is int:
# Doesn't exist
continue
sd_game_path = os.path.join(sd_path, chart['folder'])
if not os.path.exists(sd_game_path):
print("Could not find", sd_game_path)
errors.append((DataErrors.SD_PATH_NOT_EXIST, music_idx, [sd_game_path]))
sd_ifs_base_path = os.path.join(sd_path, chart['folder'], chart['filename'])
if chart['file_type'] > 0 and chart['file_type'] <= 5:
sd_ifs_base_path = "%s_%02d" % (sd_ifs_base_path, chart['file_type'])
elif chart['file_type'] > 0 and chart['file_type'] > 5:
sd_ifs_base_path = "%s_diff" % (sd_ifs_base_path)
sd_ifs_path = "%s.ifs" % (sd_ifs_base_path)
if not os.path.exists(sd_ifs_path):
print("Could not find", sd_ifs_path)
errors.append((DataErrors.SD_IFS_NOT_EXIST, music_idx, [sd_ifs_path]))
continue
preview_filename = "%s_pre.2dx" % (chart['filename'])
keysounds_filename = "%s.2dx" % (chart['filename'])
target_chart_filename = "%s_%s.bin" % (chart['filename'], CHART_LABELS[chart_idx])
ifs = ifstools.IFS(sd_ifs_path)
found_preview = False
found_keysounds = False
found_target_chart = False
for inner_filename in ifs.tree.all_files:
found_preview = inner_filename == preview_filename or found_preview
found_keysounds = inner_filename == keysounds_filename or found_keysounds
found_target_chart = inner_filename == target_chart_filename or found_target_chart
for chart_label in CHART_LABELS:
if str(inner_filename).endswith("_%s.bin" % chart_label):
found_charts.append(chart_label)
try:
verify_chart(inner_filename.load())
except BaseException as e:
import traceback
exc_type, exc_value, exc_traceback = sys.exc_info()
traceback_info = traceback.extract_tb(exc_traceback)
filename, line, func, text = traceback_info[-1]
errors.append((DataErrors.SD_CHART_ERROR, music_idx, [str(inner_filename), text]))
print(errors[-1])
ifs.close()
found_charts = list(set(found_charts))
# TODO: Add check to make sure battle hyper chart exists?
unused_charts = list(set(found_charts) - set(expected_charts))
found_charts = list(set(found_charts) - set(unused_charts))
found_charts = sorted(found_charts)
expected_charts = sorted(expected_charts)
if len(unused_charts) > 0:
# print("Found unused charts:", found_charts, expected_charts, unused_charts)
# errors.append((DataErrors.SD_CHARTS_UNUSED, music_idx, [found_charts, expected_charts, unused_charts]))
pass
if found_charts != expected_charts:
errors.append((DataErrors.SD_CHARTS_NOT_FOUND, music_idx, [found_charts, expected_charts, list(set(expected_charts) - set(found_charts))]))
kc_path = "kc_%04d.ifs" % (music_idx)
if kc_path not in kc_ifs_files:
errors.append((DataErrors.KC_NOT_FOUND, music_idx, [kc_path]))
if entry.get('folder', 0) <= 21:
# Later games don't use bg_*.ifs
bg_path = "bg_%04d.ifs" % (music_idx)
if bg_path not in bg_ifs_files:
errors.append((DataErrors.BG_NOT_FOUND, music_idx, [bg_path]))
return errors
def verify_charadb(charadb, input_data_folder, is_mod_ifs):
errors = []
tex_path = os.path.join(input_data_folder, "tex")
for chara_idx in charadb:
entry = charadb[chara_idx]
if popndll.is_placeholder_chara(entry):
# Skip placeholder entries
continue
chara_ifs_base_path = os.path.join(tex_path, entry['folder'], entry['chara_id'])
if entry['file_type'] > 0 and entry['file_type'] <= 5:
chara_ifs_base_path = "%s_%02d" % (chara_ifs_base_path, entry['file_type'])
elif entry['file_type'] > 0 and entry['file_type'] > 5:
chara_ifs_base_path = "%s_diff" % (chara_ifs_base_path)
chara_ifs_path = "%s.ifs" % (chara_ifs_base_path)
if not os.path.exists(chara_ifs_path):
print("chara ifs not found:", chara_ifs_path)
errors.append((DataErrors.CHARA_IFS_NOT_FOUND, chara_idx, [chara_ifs_path]))
exit(1)
try:
chara_ifs = ifstools.IFS(chara_ifs_path)
chara_ifs_files = [str(x) for x in chara_ifs.tree.all_files]
icon1_path = os.path.join("tex", entry['icon1']) + ".png"
icon2_path = os.path.join("tex", entry['icon2']) + ".png"
gg_path = os.path.join("tex", entry['gg']) + ".png"
for inner_path in [icon1_path, icon2_path, gg_path]:
if inner_path not in chara_ifs_files:
print("chara inner file not found:", inner_path)
errors.append((DataErrors.CHARA_IFS_INNER_NOT_FOUND, chara_idx, [inner_path]))
exit(1)
except:
print("ifs read error:", chara_ifs_path)
errors.append((DataErrors.IFS_READ_ERROR, chara_idx, [chara_ifs_path]))
chara_ifs.close()
return errors
def verify_data(databases, input_data_folder, is_mod_ifs):
musicdb_errors = verify_musicdb(databases['musicdb'], input_data_folder, is_mod_ifs)
charadb_errors = verify_charadb(databases['charadb'], input_data_folder, is_mod_ifs)
for error in musicdb_errors + charadb_errors:
print(error)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--input-dll', help='Input DLL file', default=None, required=True)
parser.add_argument('--input-xml', help='Input XML file', default=None, required=True)
parser.add_argument('--input-data', help='Input data folder', default=None, required=True)
parser.add_argument('--input-db', help='Input db folder', default=None)
args = parser.parse_args()
databases = popndll.parse_database_from_dll(args.input_dll, args.input_xml)
databases = {k: convert_db_to_dict(databases[k]) for k in databases}
if args.input_db:
databases = load_patch_dbs(args.input_db, databases)
verify_data(databases, args.input_data, args.input_db is not None)