diff --git a/jubeatools/formats/guess.py b/jubeatools/formats/guess.py index 4bd1591..dffa041 100644 --- a/jubeatools/formats/guess.py +++ b/jubeatools/formats/guess.py @@ -1,6 +1,7 @@ import json import re from functools import wraps +from io import StringIO from pathlib import Path from typing import Any, Callable, Type @@ -12,38 +13,43 @@ def guess_format(path: Path) -> Format: format is unknown""" if path.is_dir(): raise ValueError("Can't guess chart format for a folder") + + contents = path.read_bytes() + return guess_file_format(contents) + + +def guess_file_format(contents: bytes) -> Format: try: - return recognize_json_formats(path) - except (json.JSONDecodeError, UnicodeDecodeError, ValueError): + return recognize_json_formats(contents) + except (json.JSONDecodeError, UnicodeError, ValueError): pass try: - return recognize_jubeat_analyser_format(path) - except (UnicodeDecodeError, ValueError): + return recognize_jubeat_analyser_format(contents) + except (UnicodeError, ValueError): pass - if looks_like_eve(path): + if looks_like_eve(contents): return Format.EVE - if looks_like_jbsq(path): + if looks_like_jbsq(contents): return Format.JBSQ - if looks_like_yubiosi_1_0(path): + if looks_like_yubiosi_1_0(contents): return Format.YUBIOSI_1_0 - if looks_like_yubiosi_1_5(path): + if looks_like_yubiosi_1_5(contents): return Format.YUBIOSI_1_5 - if looks_like_yubiosi_2_0(path): + if looks_like_yubiosi_2_0(contents): return Format.YUBIOSI_2_0 raise ValueError("Unrecognized file format") -def recognize_json_formats(path: Path) -> Format: - with path.open(encoding="utf8") as f: - obj = json.load(f) +def recognize_json_formats(contents: bytes) -> Format: + obj = json.loads(contents) if not isinstance(obj, dict): raise ValueError("Top level value is not an object") @@ -95,9 +101,9 @@ def _dirty_jba_line_strip(line: str) -> str: return COMMENT.sub("", line).strip() -def recognize_jubeat_analyser_format(path: Path) -> Format: - with path.open(encoding="shift-jis-2004", errors="surrogateescape") as f: - lines = f.readlines() +def recognize_jubeat_analyser_format(contents: bytes) -> Format: + text = contents.decode(encoding="shift-jis-2004", errors="surrogateescape") + lines = text.splitlines() saw_jubeat_analyser_commands = False for raw_line in lines: @@ -138,10 +144,10 @@ def false_if_raises( return decorator -@false_if_raises(UnicodeDecodeError, StopIteration) -def looks_like_eve(path: Path) -> bool: - with path.open(encoding="ascii") as f: - return looks_like_eve_line(f.readline()) +@false_if_raises(UnicodeError, StopIteration) +def looks_like_eve(contents: bytes) -> bool: + f = StringIO(contents.decode("ascii")) + return looks_like_eve_line(f.readline()) EVE_COMMANDS = { @@ -176,30 +182,28 @@ def looks_like_eve_line(line: str) -> bool: return True -def looks_like_jbsq(path: Path) -> bool: - with path.open(mode="rb") as f: - magic = f.read(4) - return magic in (b"IJBQ", b"IJSQ", b"JBSQ") +def looks_like_jbsq(contents: bytes) -> bool: + magic = contents[:4] + return magic in (b"IJBQ", b"IJSQ", b"JBSQ") -@false_if_raises(UnicodeDecodeError, ValueError) -def looks_like_yubiosi_1_0(path: Path) -> bool: - with path.open(encoding="shift-jis-2004") as f: - lines = f.read().split("\n") - ( - _, # title - _, # save_data_name - raw_bpm, - chart_duration_ms, - raw_offset, - raw_note_count, - *raw_times_and_positions, - ) = lines - float(raw_bpm) - int(chart_duration_ms) - int(raw_offset) - note_count = int(raw_note_count) - return len(raw_times_and_positions) == 2 * note_count +@false_if_raises(UnicodeError, ValueError) +def looks_like_yubiosi_1_0(contents: bytes) -> bool: + lines = contents.decode(encoding="shift-jis-2004").splitlines() + ( + _, # title + _, # save_data_name + raw_bpm, + chart_duration_ms, + raw_offset, + raw_note_count, + *raw_times_and_positions, + ) = lines + float(raw_bpm) + int(chart_duration_ms) + int(raw_offset) + note_count = int(raw_note_count) + return len(raw_times_and_positions) == 2 * note_count YUBIOSI_1_5_TAGS = { @@ -215,12 +219,11 @@ YUBIOSI_1_5_TAGS = { } -@false_if_raises(UnicodeDecodeError, ValueError) -def looks_like_yubiosi_1_5(path: Path) -> bool: - with path.open(encoding="shift-jis-2004") as f: - lines = f.read().split("\n") - note_index = lines.index("[Notes]") - return any(line_has_yubiosi_tag(line) for line in lines[:note_index]) +@false_if_raises(UnicodeError, ValueError) +def looks_like_yubiosi_1_5(contents: bytes) -> bool: + lines = contents.decode(encoding="shift-jis-2004").splitlines() + note_index = lines.index("[Notes]") + return any(line_has_yubiosi_tag(line) for line in lines[:note_index]) def line_has_yubiosi_tag(line: str) -> bool: @@ -231,7 +234,8 @@ def line_has_yubiosi_tag(line: str) -> bool: return False -@false_if_raises(UnicodeDecodeError) -def looks_like_yubiosi_2_0(path: Path) -> bool: - with path.open(encoding="utf-16") as f: - return f.readline() == "//Yubiosi 2.0\n" +@false_if_raises(UnicodeError) +def looks_like_yubiosi_2_0(contents: bytes) -> bool: + text = contents.decode("utf-16") + lines = text.splitlines() + return lines and lines[0] == "//Yubiosi 2.0" diff --git a/jubeatools/formats/tests/test_guess.py b/jubeatools/formats/tests/test_guess.py new file mode 100644 index 0000000..139393a --- /dev/null +++ b/jubeatools/formats/tests/test_guess.py @@ -0,0 +1,21 @@ +from pathlib import Path + +import hypothesis.strategies as st +from hypothesis import given, settings + +from .. import guess + + +@given(st.binary()) +def test_that_guess_format_only_raises_the_specific_value_error(contents: bytes): + try: + guess.guess_file_format(contents) + except ValueError as e: + if e.args != ("Unrecognized file format",): + raise + + +def test_that_yubiosi_2_0_detection_does_not_raise_exception_for_non_utf16_files(): + text = "blablabla" + bytes_ = text.encode("ascii") + guess.looks_like_yubiosi_2_0(bytes_) \ No newline at end of file