Switch to bytes for file format guessing
This commit is contained in:
parent
ce51b56dd7
commit
65c86de19c
@ -1,6 +1,7 @@
|
||||
import json
|
||||
import re
|
||||
from functools import wraps
|
||||
from io import StringIO
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Type
|
||||
|
||||
@ -12,38 +13,43 @@ def guess_format(path: Path) -> Format:
|
||||
format is unknown"""
|
||||
if path.is_dir():
|
||||
raise ValueError("Can't guess chart format for a folder")
|
||||
|
||||
contents = path.read_bytes()
|
||||
|
||||
return guess_file_format(contents)
|
||||
|
||||
|
||||
def guess_file_format(contents: bytes) -> Format:
|
||||
try:
|
||||
return recognize_json_formats(path)
|
||||
except (json.JSONDecodeError, UnicodeDecodeError, ValueError):
|
||||
return recognize_json_formats(contents)
|
||||
except (json.JSONDecodeError, UnicodeError, ValueError):
|
||||
pass
|
||||
|
||||
try:
|
||||
return recognize_jubeat_analyser_format(path)
|
||||
except (UnicodeDecodeError, ValueError):
|
||||
return recognize_jubeat_analyser_format(contents)
|
||||
except (UnicodeError, ValueError):
|
||||
pass
|
||||
|
||||
if looks_like_eve(path):
|
||||
if looks_like_eve(contents):
|
||||
return Format.EVE
|
||||
|
||||
if looks_like_jbsq(path):
|
||||
if looks_like_jbsq(contents):
|
||||
return Format.JBSQ
|
||||
|
||||
if looks_like_yubiosi_1_0(path):
|
||||
if looks_like_yubiosi_1_0(contents):
|
||||
return Format.YUBIOSI_1_0
|
||||
|
||||
if looks_like_yubiosi_1_5(path):
|
||||
if looks_like_yubiosi_1_5(contents):
|
||||
return Format.YUBIOSI_1_5
|
||||
|
||||
if looks_like_yubiosi_2_0(path):
|
||||
if looks_like_yubiosi_2_0(contents):
|
||||
return Format.YUBIOSI_2_0
|
||||
|
||||
raise ValueError("Unrecognized file format")
|
||||
|
||||
|
||||
def recognize_json_formats(path: Path) -> Format:
|
||||
with path.open(encoding="utf8") as f:
|
||||
obj = json.load(f)
|
||||
def recognize_json_formats(contents: bytes) -> Format:
|
||||
obj = json.loads(contents)
|
||||
|
||||
if not isinstance(obj, dict):
|
||||
raise ValueError("Top level value is not an object")
|
||||
@ -95,9 +101,9 @@ def _dirty_jba_line_strip(line: str) -> str:
|
||||
return COMMENT.sub("", line).strip()
|
||||
|
||||
|
||||
def recognize_jubeat_analyser_format(path: Path) -> Format:
|
||||
with path.open(encoding="shift-jis-2004", errors="surrogateescape") as f:
|
||||
lines = f.readlines()
|
||||
def recognize_jubeat_analyser_format(contents: bytes) -> Format:
|
||||
text = contents.decode(encoding="shift-jis-2004", errors="surrogateescape")
|
||||
lines = text.splitlines()
|
||||
|
||||
saw_jubeat_analyser_commands = False
|
||||
for raw_line in lines:
|
||||
@ -138,10 +144,10 @@ def false_if_raises(
|
||||
return decorator
|
||||
|
||||
|
||||
@false_if_raises(UnicodeDecodeError, StopIteration)
|
||||
def looks_like_eve(path: Path) -> bool:
|
||||
with path.open(encoding="ascii") as f:
|
||||
return looks_like_eve_line(f.readline())
|
||||
@false_if_raises(UnicodeError, StopIteration)
|
||||
def looks_like_eve(contents: bytes) -> bool:
|
||||
f = StringIO(contents.decode("ascii"))
|
||||
return looks_like_eve_line(f.readline())
|
||||
|
||||
|
||||
EVE_COMMANDS = {
|
||||
@ -176,30 +182,28 @@ def looks_like_eve_line(line: str) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
def looks_like_jbsq(path: Path) -> bool:
|
||||
with path.open(mode="rb") as f:
|
||||
magic = f.read(4)
|
||||
return magic in (b"IJBQ", b"IJSQ", b"JBSQ")
|
||||
def looks_like_jbsq(contents: bytes) -> bool:
|
||||
magic = contents[:4]
|
||||
return magic in (b"IJBQ", b"IJSQ", b"JBSQ")
|
||||
|
||||
|
||||
@false_if_raises(UnicodeDecodeError, ValueError)
|
||||
def looks_like_yubiosi_1_0(path: Path) -> bool:
|
||||
with path.open(encoding="shift-jis-2004") as f:
|
||||
lines = f.read().split("\n")
|
||||
(
|
||||
_, # title
|
||||
_, # save_data_name
|
||||
raw_bpm,
|
||||
chart_duration_ms,
|
||||
raw_offset,
|
||||
raw_note_count,
|
||||
*raw_times_and_positions,
|
||||
) = lines
|
||||
float(raw_bpm)
|
||||
int(chart_duration_ms)
|
||||
int(raw_offset)
|
||||
note_count = int(raw_note_count)
|
||||
return len(raw_times_and_positions) == 2 * note_count
|
||||
@false_if_raises(UnicodeError, ValueError)
|
||||
def looks_like_yubiosi_1_0(contents: bytes) -> bool:
|
||||
lines = contents.decode(encoding="shift-jis-2004").splitlines()
|
||||
(
|
||||
_, # title
|
||||
_, # save_data_name
|
||||
raw_bpm,
|
||||
chart_duration_ms,
|
||||
raw_offset,
|
||||
raw_note_count,
|
||||
*raw_times_and_positions,
|
||||
) = lines
|
||||
float(raw_bpm)
|
||||
int(chart_duration_ms)
|
||||
int(raw_offset)
|
||||
note_count = int(raw_note_count)
|
||||
return len(raw_times_and_positions) == 2 * note_count
|
||||
|
||||
|
||||
YUBIOSI_1_5_TAGS = {
|
||||
@ -215,12 +219,11 @@ YUBIOSI_1_5_TAGS = {
|
||||
}
|
||||
|
||||
|
||||
@false_if_raises(UnicodeDecodeError, ValueError)
|
||||
def looks_like_yubiosi_1_5(path: Path) -> bool:
|
||||
with path.open(encoding="shift-jis-2004") as f:
|
||||
lines = f.read().split("\n")
|
||||
note_index = lines.index("[Notes]")
|
||||
return any(line_has_yubiosi_tag(line) for line in lines[:note_index])
|
||||
@false_if_raises(UnicodeError, ValueError)
|
||||
def looks_like_yubiosi_1_5(contents: bytes) -> bool:
|
||||
lines = contents.decode(encoding="shift-jis-2004").splitlines()
|
||||
note_index = lines.index("[Notes]")
|
||||
return any(line_has_yubiosi_tag(line) for line in lines[:note_index])
|
||||
|
||||
|
||||
def line_has_yubiosi_tag(line: str) -> bool:
|
||||
@ -231,7 +234,8 @@ def line_has_yubiosi_tag(line: str) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
@false_if_raises(UnicodeDecodeError)
|
||||
def looks_like_yubiosi_2_0(path: Path) -> bool:
|
||||
with path.open(encoding="utf-16") as f:
|
||||
return f.readline() == "//Yubiosi 2.0\n"
|
||||
@false_if_raises(UnicodeError)
|
||||
def looks_like_yubiosi_2_0(contents: bytes) -> bool:
|
||||
text = contents.decode("utf-16")
|
||||
lines = text.splitlines()
|
||||
return lines and lines[0] == "//Yubiosi 2.0"
|
||||
|
21
jubeatools/formats/tests/test_guess.py
Normal file
21
jubeatools/formats/tests/test_guess.py
Normal file
@ -0,0 +1,21 @@
|
||||
from pathlib import Path
|
||||
|
||||
import hypothesis.strategies as st
|
||||
from hypothesis import given, settings
|
||||
|
||||
from .. import guess
|
||||
|
||||
|
||||
@given(st.binary())
|
||||
def test_that_guess_format_only_raises_the_specific_value_error(contents: bytes):
|
||||
try:
|
||||
guess.guess_file_format(contents)
|
||||
except ValueError as e:
|
||||
if e.args != ("Unrecognized file format",):
|
||||
raise
|
||||
|
||||
|
||||
def test_that_yubiosi_2_0_detection_does_not_raise_exception_for_non_utf16_files():
|
||||
text = "blablabla"
|
||||
bytes_ = text.encode("ascii")
|
||||
guess.looks_like_yubiosi_2_0(bytes_)
|
Loading…
Reference in New Issue
Block a user