1
0
mirror of synced 2025-01-31 12:13:48 +01:00

mono-column format parser wip

This commit is contained in:
Stepland 2020-06-23 23:29:54 +02:00
parent f79da0c612
commit 0e3e882bd4
12 changed files with 553 additions and 103 deletions

4
.gitignore vendored
View File

@ -1,2 +1,4 @@
*.pyc
.vscode
.vscode
.hypothesis
.pytest_cache

View File

@ -7,4 +7,26 @@ The machine-readable variants are partially documented (in japanese)
on these pages :
- http://yosh52.web.fc2.com/jubeat/fumenformat.html
- http://yosh52.web.fc2.com/jubeat/holdmarker.html
Known simple commands :
- b=<decimal> : beats per measure (4 by default)
- m="<path>" : music file path
- o=<int> : offset in ms (100 by default)
- r=<int> : increase the offset (in ms)
- t=<decimal> : tempo
Known hash commands :
- #memo # mono-column format
- #memo1 # youbeat-like but missing a lot of the youbeat features
- #memo2 # youbeat-like memo
- #boogie # youbeat
- #pw=<int> # number of panels horizontally (4 by default)
- #ph=<int> # number of panels vertically (4 by default)
- #lev=<int> # chart level (typically 1 to 10)
- #dif={1, 2, 3} # 1: BSC, 2: ADV, 3: EXT
- #title="<str>" # music title
- #artist="<str>" # artist's name
- #jacket="<path>" # music cover art path
- #prevpos=<int> # preview start (in ms)
- #bpp # bytes per panel (2 by default)
"""

View File

@ -0,0 +1,73 @@
"""
Useful things to parse the header of analyser-like formats
"""
from decimal import Decimal
from typing import List, Tuple, Union, Iterable, Optional
from parsimonious import Grammar, NodeVisitor, ParseError
command_grammar = Grammar(
r"""
line = ws command ws comment?
command = hash_command / short_command
hash_command = "#" key equals_value?
short_command = letter equals_value
letter = ~r"\w"
key = ~r"\w+"
equals_value = ws "=" ws value
value = value_in_quotes / number
value_in_quotes = '"' quoted_value '"'
quoted_value = ~r"[^\"]+"
number = ~r"\d+(\.\d+)?"
ws = ~r"[\t ]*"
comment = ~r"//.*"
"""
)
class CommandVisitor(NodeVisitor):
"""Returns a (key, value) tuple or None if the line contains no useful
information for the parser (a comment or an empty line)"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.key = None
self.value = None
def visit_line(self, node, visited_children):
return self.key, self.value
def visit_hash_command(self, node, visited_children):
_, key, _ = node.children
self.key = key.text
def visit_short_command(self, node, visited_children):
letter, _ = node.children
self.key = letter.text
def visit_quoted_value(self, node, visited_children):
self.value = node.text
def visit_number(self, node, visited_children):
self.value = node.text
def generic_visit(self, node, visited_children):
...
def is_command(line: str) -> bool:
try:
command_grammar.parse(line)
except ParseError:
return False
else:
return True
def parse_command(line: str) -> Tuple[str, str]:
try:
return CommandVisitor().visit(command_grammar.parse(line))
except ParseError:
if line.strip()[0] == "#":
raise ParseError(f"Invalid command syntax : {line}") from None
else:
raise

View File

@ -1,93 +0,0 @@
"""
Useful things to parse the header of analyser-like formats
"""
from decimal import Decimal
from typing import List, Tuple, Union, Iterable
from parsimonious import Grammar, NodeVisitor
from parsimonious.expressions import Node
header_line_grammar = Grammar(
r"""
raw_line = line comment?
line = command? ws
command = hash_command / simple_command
hash_command = "#" key "=" value
key = ~"[a-z]+"
value = value_in_quotes / number
value_in_quotes = "\"" quoted_value "\""
quoted_value = ~"[^\"]+"
number = ~"\d+"
simple_command = letter "=" value
letter = ~"\w"
ws = ~"[\t ]*"
comment = ~"//.*"
"""
)
class HeaderLineVisitor(NodeVisitor):
"""Returns a (key, value) tuple or None if the line contains no useful
information for the parser (a comment or an empty line)"""
def _as_text(self, node, visited_children):
return node.text
def visit_raw_line(self, node, visited_children):
value, _ = visited_children
return value
def visit_line(self, node, visited_children):
command, _ = visited_children
value = list(command)
return value[0] if value else None
visit_command = NodeVisitor.lift_child
def visit_hash_command(self, node, visited_children):
_, key, _, value = visited_children
return (key, value)
visit_key = _as_text
visit_value = NodeVisitor.lift_child
def visit_value_in_quotes(self, node, visited_children):
_, value, _ = visited_children
return value
visit_quoted_value = _as_text
def visit_number(self, node, visited_children):
return Decimal(node.text)
def visit_simple_command(self, node, visited_children):
letter, _, value = visited_children
return (letter, value)
visit_letter = _as_text
def generic_visit(self, node, visited_children):
return visited_children or node
_header_line_visitor = HeaderLineVisitor()
def load_preamble_line(line: str) -> Union[Tuple[str, Union[str, Decimal]], None]:
_header_line_visitor.visit(header_line_grammar.parse(line))
def first_non_header_line(lines: Iterable[str]) -> int:
"""Return the index of the first line on the iterable which does not
parse correctly as header line"""
res = 0
for line in lines:
try:
header_line_grammar.parse(line)
except Exception:
break
else:
res += 1
return res

View File

@ -0,0 +1,296 @@
import re
from typing import Iterable, Optional, Union
from collections import namedtuple
from dataclasses import dataclass, field
from enum import Enum
from decimal import Decimal
from copy import deepcopy
from typing import List, Dict, Union, Iterable
from itertools import product
from parsimonious import ParseError, Grammar, NodeVisitor
from jubeatools.song import *
from .command import parse_command, is_command
from .symbol import parse_symbol_definition, is_symbol_definition
mono_column_chart_line_grammar = Grammar(r"""
line = ws chart_line ws comment?
chart_line = ~r"[^*#:|\-/\s]{4,8}"
ws = ~r"[\t ]*"
comment = ~r"//.*"
""")
class MonoColumnChartLineVisitor(NodeVisitor):
def visit_line(self, node, visited_children):
_, chart_line, _, _ = node.children
return chart_line.text
def generic_visit(self, node, visited_children):
...
def is_mono_column_chart_line(line: str) -> bool:
try:
mono_column_chart_line_grammar.parse(line)
except ParseError:
return False
else:
return True
def parse_mono_column_chart_line(line: str) -> str:
return MonoColumnChartLineVisitor().visit(mono_column_chart_line_grammar.parse(line))
SEPARATOR = re.compile(r"--.*")
def is_separator(line: str) -> bool:
return bool(SEPARATOR.match(line))
EMPTY_LINE = re.compile(r"\s*(//.*)?")
def is_empty_line(line: str) -> bool:
return bool(EMPTY_LINE.match(line))
DIFFICULTIES = {
1: "BSC",
2: "ADV",
3: "EXT"
}
def split_chart_line(line: str) -> List[str]:
"""Split a #bpp=2 chart line into symbols :
Given the symbol definition : *25:6
>>> split_chart_line("25口口25")
... ["25","","","25"]
>>> split_chart_line("口⑪①25")
... ["","","","25"]
"""
encoded_line = line.encode("shift_jis_2004")
if len(encoded_line) % 2 != 0:
raise ValueError(f"Invalid chart line : {line}")
symbols = []
for i in range(0, len(encoded_line), 2):
symbols.append(encoded_line[i:i+2].decode("shift_jis_2004"))
return symbols
@dataclass
class MonoColumnSection:
"""
A mono column chart section, contains :
- raw chart lines
- defined timing symbols
- length in beats (usually 4)
- tempo
"""
chart_lines: List[str]
symbols: Dict[str, Decimal]
length: Decimal
tempo: Decimal
def blocs(self, bpp=2) -> Iterable[List[List[str]]]:
if bpp not in (1, 2):
raise ValueError(f"Invalid bpp : {bpp}")
elif bpp == 2:
split_line = split_chart_line
else:
split_line = lambda l: list(l)
for i in range(0, len(self.chart_lines), 4):
yield [
split_line(self.chart_lines[i+j])
for j in range(4)
]
class MonoColumnParser:
CIRCLED_NUMBERS = "①②③④⑤⑥⑦⑧⑨⑩⑪⑫⑬⑭⑮⑯"
MEMO_SYMBOLS = {
symbol: Decimal("0.25")*index for index, symbol in enumerate(CIRCLED_NUMBERS)
}
def __init__(self):
self.music = None
self.symbols = deepcopy(MonoColumnParser.MEMO_SYMBOLS)
self.current_beat = Decimal("0")
self.current_tempo = None
self.current_chart_lines = []
self.timing_events = []
self.offset = 0
self.beats_per_section = 4
self.bytes_per_panel = 2
self.level = 1
self.difficulty = None
self.title = None
self.artist = None
self.jacket = None
self.preview_start = None
self.sections: List[MonoColumnSection] = []
def handle_command(self, command, value = None):
try:
method = getattr(self, f"do_{command}")
except AttributeError:
raise SyntaxError(f"Unknown analyser command : {command}") from None
if value is not None:
method(value)
else:
method()
def do_m(self, value):
self.music = value
def do_t(self, value):
self.current_tempo = Decimal(value)
self.timing_events.append(
BPMEvent(time=self.current_beat, BPM=self.current_tempo)
)
def do_o(self, value):
self.offset = int(value)
def do_b(self, value):
self.beats_per_section = Decimal(value)
def do_memo(self):
...
def do_memo1(self):
raise ValueError("This is not a mono-column file")
do_memo2 = do_boogie = do_memo1
def do_pw(self, value):
if int(value) != 4:
raise ValueError("jubeatools only supports 4x4 charts")
do_ph = do_pw
def do_lev(self, value):
self.level = int(value)
def do_dif(self, value):
dif = int(value)
if dif <= 0:
raise ValueError(f"Unknown chart difficulty : {dif}")
if dif < 4:
self.difficulty = DIFFICULTIES[dif]
else:
self.difficulty = f"EDIT-{dif-3}"
def do_title(self, value):
self.title = value
def do_artist(self, value):
self.artist = value
def do_jacket(self, value):
self.jacket = value
def do_prevpos(self, value):
self.preview_start = int(value)
def do_bpp(self, value):
if self.sections:
raise ValueError("jubeatools does not handle changing the bytes per panel value halfway")
elif int(value) not in (1, 2):
raise ValueError(f"Unexcpected bpp value : {value}")
else:
self.bytes_per_panel = int(value)
def define_symbol(self, character: str, timing: Union[int, Decimal]):
if len(character) != 1:
raise ValueError(f"Invalid symbol definition : '{character}' is not 1 character long")
if timing > self.beats_per_section:
message = "\n".join([
"Invalid symbol definition conscidering the number of beats per frame :"
f"*{character}:{timing}"
])
raise ValueError(message)
self.symbols[character] = timing
def move_to_next_section(self):
if len(self.current_chart_lines) % 4 != 0:
raise SyntaxError("Current section is missing chart lines")
else:
self.sections.append(MonoColumnSection(
chart_lines=self.current_chart_lines,
symbols=deepcopy(self.symbols),
length=self.beats_per_section,
tempo=self.current_tempo
))
self.current_chart_lines = []
self.current_beat += self.beats_per_section
self.beats_per_section = 4
def append_chart_line(self, line: str):
if self.bytes_per_panel == 1 and len(line) != 4:
raise SyntaxError(f"Invalid chart line for #bpp=1 : {line}")
elif self.bytes_per_panel == 2 and len(line.encode("shift_jis_2004")) != 8:
raise SyntaxError(f"Invalid chart line for #bpp=2 : {line}")
self.current_chart_lines.append(line)
def load_line(self, raw_line: str):
line = raw_line.strip()
if is_command(line):
command, value = parse_command(line)
self.handle_command(command, value)
elif is_symbol_definition(line):
symbol, value = parse_symbol_definition(line)
self.define_symbol(symbol, value)
elif is_mono_column_chart_line(line):
chart_line = parse_mono_column_chart_line(line)
self.append_chart_line(chart_line)
elif is_separator(line):
self.move_to_next_section()
elif not is_empty_line(line):
raise SyntaxError(f"not a valid #memo line : {line}")
def notes(self) -> Iterable[Union[TapNote, LongNote]]:
current_beat = Decimal(0)
for section in self.sections:
for bloc, y, x in product(section.blocs(), range(4), range(4)):
symbol = bloc[y][x]
if symbol in section.symbols:
decimal_time = current_beat + section.symbols[symbol]
fraction_time = BeatsTime(decimal_time).limit_denominator(240)
position = NotePosition(x, y)
yield TapNote(fraction_time, position)
current_beat += section.length
def load_mono_column(lines: Iterable[str]) -> Song:
state = MonoColumnParser()
for i, raw_line in enumerate(lines):
try:
state.load_line(raw_line)
except Exception as e:
raise SyntaxError(
f"Error while parsing mono column line {i} :\n"
f"{type(e).__name__}: {e}"
) from None
metadata = Metadata(
title=state.title,
artist=state.artist,
audio=state.music,
cover=state.jacket
)
if state.preview_start is not None:
metadata.preview_start = state.preview_start
metadata.preview_length = SecondsTime(10)
timing = Timing(
events=state.timing_events,
beat_zero_offset=state.offset
)
charts = {
state.difficulty: Chart(
level=state.level,
timing=timing,
notes=list(state.notes())
)
}
return Song(metadata=metadata, chart=charts)

View File

@ -0,0 +1,46 @@
"""
Beat symbol definition
"""
from decimal import Decimal
from typing import Tuple, Optional
from parsimonious import Grammar, NodeVisitor, ParseError
beat_symbol_line_grammar = Grammar(
r"""
line = "*" symbol ":" number comment?
symbol = ws ~r"[^*#:|\-/\s]" ws
number = ws ~r"\d+(\.\d+)?" ws
ws = ~r"\s*"
comment = ~r"//.*"
"""
)
class BeatSymbolVisitor(NodeVisitor):
def __init__(self):
super().__init__()
self.symbol = None
self.number = None
def visit_line(self, node, visited_children):
return self.symbol, self.number
def visit_symbol(self, node, visited_children):
_, symbol, _ = node.children
self.symbol = symbol.text
def visit_number(self, node, visited_children):
_, number, _ = node.children
self.number = Decimal(number.text)
def is_symbol_definition(line: str) -> bool:
try:
beat_symbol_line_grammar.parse(line)
except ParseError:
return False
else:
return True
def parse_symbol_definition(line: str) -> Tuple[str, Decimal]:
return BeatSymbolVisitor().visit(beat_symbol_line_grammar.parse(line))

View File

@ -0,0 +1,69 @@
from typing import Union, Iterable
from jubeatools.formats.memo.mono_column import MonoColumnParser
from jubeatools.song import TapNote, BeatsTime, NotePosition, LongNote
def compare_chart_notes(chart: str, expected: Iterable[Union[TapNote, LongNote]]):
parser = MonoColumnParser()
for line in chart.split("\n"):
parser.load_line(line)
actual = list(parser.notes())
assert set(expected) == set(actual)
def test_simple_mono_column():
chart = (
"""
-------
"""
)
expected = [
TapNote(time=BeatsTime(i), position=NotePosition(i,i))
for i in range(4)
]
compare_chart_notes(chart, expected)
def test_compound_section_mono_column():
chart = (
"""
------------- 2
"""
)
expected = [
TapNote(time=BeatsTime("0.25")*(t-1), position=NotePosition(x,y))
for t,x,y in [
( 1, 1, 0),
( 1, 2, 0),
( 3, 0, 3),
( 3, 3, 3),
( 4, 0, 2),
( 5, 3, 2),
( 6, 1, 3),
( 7, 2, 3),
( 8, 1, 2),
( 9, 2, 2),
(10, 1, 1),
(11, 2, 1),
(12, 1, 0),
(13, 2, 0),
(14, 0, 3),
(14, 3, 3),
(16, 0, 0),
(16, 3, 0)
]
]
compare_chart_notes(chart, expected)

View File

@ -29,7 +29,7 @@ class SecondsTime(Decimal):
...
@dataclass
@dataclass(frozen=True)
class NotePosition:
x: int
y: int
@ -46,13 +46,13 @@ class NotePosition:
return cls(x=index % 4, y=index // 4)
@dataclass
@dataclass(frozen=True)
class TapNote:
time: BeatsTime
position: NotePosition
@dataclass
@dataclass(frozen=True)
class LongNote:
time: BeatsTime
position: NotePosition

41
poetry.lock generated
View File

@ -66,6 +66,29 @@ optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
version = "0.4.3"
[[package]]
category = "dev"
description = "A library for property-based testing"
name = "hypothesis"
optional = false
python-versions = ">=3.5.2"
version = "5.16.3"
[package.dependencies]
attrs = ">=19.2.0"
sortedcontainers = ">=2.1.0,<3.0.0"
[package.extras]
all = ["django (>=2.2)", "dpcontracts (>=0.4)", "lark-parser (>=0.6.5)", "numpy (>=1.9.0)", "pandas (>=0.19)", "pytest (>=4.3)", "python-dateutil (>=1.4)", "pytz (>=2014.1)"]
dateutil = ["python-dateutil (>=1.4)"]
django = ["pytz (>=2014.1)", "django (>=2.2)"]
dpcontracts = ["dpcontracts (>=0.4)"]
lark = ["lark-parser (>=0.6.5)"]
numpy = ["numpy (>=1.9.0)"]
pandas = ["pandas (>=0.19)"]
pytest = ["pytest (>=4.3)"]
pytz = ["pytz (>=2014.1)"]
[[package]]
category = "main"
description = "A lightweight library for converting complex datatypes to and from native Python datatypes."
@ -223,6 +246,14 @@ optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
version = "1.15.0"
[[package]]
category = "dev"
description = "Sorted Containers -- Sorted List, Sorted Dict, Sorted Set"
name = "sortedcontainers"
optional = false
python-versions = "*"
version = "2.2.2"
[[package]]
category = "dev"
description = "Python Library for Tom's Obvious, Minimal Language"
@ -248,7 +279,7 @@ python-versions = "*"
version = "0.1.9"
[metadata]
content-hash = "a07afd18095d3d59ba9a855d90c797920c63fd004b7e1cb134a5ec981e7e16b8"
content-hash = "6e977fc0cdf3417165a67e8ad881cce02b12727515aa08ee10c875662a7c9a04"
python-versions = "^3.8"
[metadata.files]
@ -276,6 +307,10 @@ colorama = [
{file = "colorama-0.4.3-py2.py3-none-any.whl", hash = "sha256:7d73d2a99753107a36ac6b455ee49046802e59d9d076ef8e47b61499fa29afff"},
{file = "colorama-0.4.3.tar.gz", hash = "sha256:e96da0d330793e2cb9485e9ddfd918d456036c7149416295932478192f4436a1"},
]
hypothesis = [
{file = "hypothesis-5.16.3-py3-none-any.whl", hash = "sha256:7e8e0b7d412cb758c662dcbdcdc93cb8dccc38a9c67f2cd4bf885ba9f714f8d4"},
{file = "hypothesis-5.16.3.tar.gz", hash = "sha256:81f9a033900ea73c7b594173dbce7b9eb39222c04fc6278a6f33cc39c49b144a"},
]
marshmallow = [
{file = "marshmallow-3.6.0-py2.py3-none-any.whl", hash = "sha256:f88fe96434b1f0f476d54224d59333eba8ca1a203a2695683c1855675c4049a7"},
{file = "marshmallow-3.6.0.tar.gz", hash = "sha256:c2673233aa21dde264b84349dc2fd1dce5f30ed724a0a00e75426734de5b84ab"},
@ -394,6 +429,10 @@ six = [
{file = "six-1.15.0-py2.py3-none-any.whl", hash = "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced"},
{file = "six-1.15.0.tar.gz", hash = "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259"},
]
sortedcontainers = [
{file = "sortedcontainers-2.2.2-py2.py3-none-any.whl", hash = "sha256:c633ebde8580f241f274c1f8994a665c0e54a17724fecd0cae2f079e09c36d3f"},
{file = "sortedcontainers-2.2.2.tar.gz", hash = "sha256:4e73a757831fc3ca4de2859c422564239a31d8213d09a2a666e375807034d2ba"},
]
toml = [
{file = "toml-0.10.1-py2.py3-none-any.whl", hash = "sha256:bda89d5935c2eac546d648028b9901107a595863cb36bae0c73ac804a9b4ce88"},
{file = "toml-0.10.1.tar.gz", hash = "sha256:926b612be1e5ce0634a2ca03470f95169cf16f939018233a670519cb4ac58b0f"},

View File

@ -18,6 +18,7 @@ parsimonious = "^0.8.1"
pytest = "^5.2"
rope = "^0.17.0"
black = "^19.10b0"
hypothesis = "^5.16.3"
[tool.poetry.scripts]
jubeatools = 'jubeatools.cli:convert'

View File

@ -1,5 +0,0 @@
from jubeatools import __version__
def test_version():
assert __version__ == "0.1.0"