Check in some beta code for a new style of if detection that does not work yet.

2024-11-24 06:20:12 +01:00 · 2021-05-02 03:50:19 +00:00 · 2021-05-02 03:50:19 +00:00 · 336ef6fc3d
commit 336ef6fc3d
parent 7493db034f
3 changed files with 176 additions and 3 deletions
--- a/bemani/format/afp/decompile.py
+++ b/bemani/format/afp/decompile.py
@ -94,6 +94,32 @@ class ControlFlow:
        return f"ControlFlow(beginning={self.beginning}, end={self.end}, next={(', '.join(str(n) for n in self.next_flow)) or 'N/A'}"


+class IfResult:
+    def __init__(self, stmt_id: int, path: bool) -> None:
+        self.stmt_id = stmt_id
+        self.path = path
+
+    def makes_tautology(self, other: "IfResult") -> bool:
+        return self.stmt_id == other.stmt_id and self.path != other.path
+
+    def __repr__(self) -> str:
+        return f"IfResult(stmt_id={self.stmt_id}, path={self.path})"
+
+    def __eq__(self, other: object) -> bool:
+        if not isinstance(other, IfResult):
+            return NotImplemented
+        return self.stmt_id == other.stmt_id and self.path == other.path
+
+    def __ne__(self, other: object) -> bool:
+        if not isinstance(other, IfResult):
+            return NotImplemented
+        return not (self.stmt_id == other.stmt_id and self.path == other.path)
+
+    def __hash__(self) -> int:
+        # Lower bit will be for true/false, upper bits for statement ID.
+        return (self.stmt_id * 2) + (1 if self.path else 0)
+
+
 class ConvertedAction:
    # An action that has been analyzed and converted to an intermediate representation.
    pass
@ -1917,6 +1943,115 @@ class ByteCodeDecompiler(VerboseOutput):
        self.vprint(f"Finished separating if statements out of graph starting at {start_id}")
        return [c for _, c in chunks_by_id.items()]

+    def __new_separate_ifs(self, start_id: int, end_id: Optional[int], chunks: Sequence[ArbitraryCodeChunk], offset_map: Dict[int, int]) -> List[ArbitraryCodeChunk]:
+        # TODO: This algorithm can possibly do better than the original at identifying cases.
+        # In particular, it handles compound if statements (if x or y) where the previous one
+        # ends up sticking gotos in. The problem is that it needs to know what if statements
+        # exist before combining them, and we can't do that until we walk the stack, and the
+        # stack walking algorithm both a) comes later and b) relies on all ifs being processed.
+        # So, this stays as a beta for now, and will possibly be integrated at a later time.
+        chunks_by_id: Dict[int, ArbitraryCodeChunk] = {chunk.id: chunk for chunk in chunks}
+        chunks_examined: Set[int] = set()
+
+        self.vprint(f"BETA: Separating if statements out of graph starting at {start_id}")
+
+        def walk_children(cur_chunk: ArbitraryCodeChunk, apply_logic: Sequence[IfResult]) -> Dict[int, Set[IfResult]]:
+            # First, if we have any previous if statements to apply to this chunk, do that now.
+            self.vprint(f"BETA: Applying {apply_logic} to {cur_chunk.id}")
+            chunks_to_logic: Dict[int, Set[IfResult]] = {cur_chunk.id: {x for x in apply_logic}}
+
+            # Now, if it is a loop and we haven't already passed over this chunk, recursively
+            # find if statements inside it as well.
+            if isinstance(cur_chunk, Loop):
+                if cur_chunk.id not in chunks_examined:
+                    chunks_examined.add(cur_chunk.id)
+
+                    self.vprint(f"BETA: Examining loop {cur_chunk.id} body for if statements...")
+                    cur_chunk.chunks = self.__new_separate_ifs(cur_chunk.id, None, cur_chunk.chunks, offset_map)
+                    self.vprint(f"BETA: Finished examining loop {cur_chunk.id} body for if statements...")
+
+            # Now, see if we need to split logic up or not.
+            if not cur_chunk.next_chunks:
+                # We are at the end of our walk.
+                return chunks_to_logic
+
+            if len(cur_chunk.next_chunks) == 1:
+                # We only have one child, so follow that link.
+                next_chunk = cur_chunk.next_chunks[0]
+                if next_chunk in chunks_by_id:
+                    for cid, logic in walk_children(chunks_by_id[next_chunk], apply_logic).items():
+                        chunks_to_logic[cid] = {*chunks_to_logic.get(cid, set()), *logic}
+                return chunks_to_logic
+
+            if not isinstance(cur_chunk, ByteCodeChunk):
+                # We should only be looking at bytecode chunks at this point, all other
+                # types should have a single next chunk.
+                raise Exception(f"Logic error, found converted Loop or If chunk {cur_chunk.id} with multiple successors!")
+
+            if len(cur_chunk.next_chunks) != 2:
+                # This needs to be an if statement.
+                raise Exception(f"Logic error, expected 2 successors but got {len(cur_chunk.next_chunks)} in chunk {cur_chunk.id}!")
+            last_action = cur_chunk.actions[-1]
+            if not isinstance(last_action, IfAction):
+                # This needs, again, to be an if statement.
+                raise Exception("Logic error, only IfActions can have multiple successors in chunk {cur_chunk.id}!")
+
+            # Find the true and false jump points, walk those graphs and assign logical predecessors
+            # to each of them.
+            true_jump_point = offset_map[last_action.jump_if_true_offset]
+            false_jump_points = [n for n in cur_chunk.next_chunks if n != true_jump_point]
+            if len(false_jump_points) != 1:
+                raise Exception("Logic error, got more than one false jump point for an if statement!")
+            false_jump_point = false_jump_points[0]
+
+            if true_jump_point == false_jump_point:
+                # This should never happen.
+                raise Exception("Logic error, both true and false jumps are to the same location!")
+
+            self.vprint(f"BETA: Chunk ID {cur_chunk.id} is an if statement with true node {true_jump_point} and false node {false_jump_point}")
+
+            # Walk both halves, assigning the if statement that has to exist to get to each half.
+            if true_jump_point in chunks_by_id:
+                for cid, logic in walk_children(chunks_by_id[true_jump_point], [*apply_logic, IfResult(cur_chunk.id, True)]).items():
+                    chunks_to_logic[cid] = {*chunks_to_logic.get(cid, set()), *logic}
+            if false_jump_point in chunks_by_id:
+                for cid, logic in walk_children(chunks_by_id[false_jump_point], [*apply_logic, IfResult(cur_chunk.id, False)]).items():
+                    chunks_to_logic[cid] = {*chunks_to_logic.get(cid, set()), *logic}
+            return chunks_to_logic
+
+        # First, walk through and identify how we get to each chunk.
+        chunks_by_logic = walk_children(chunks_by_id[start_id], [])
+        self.vprint(f"BETA: List of logics: {chunks_by_logic}")
+
+        # Now, go through each chunk and remove tautologies (where we get to it through a previous
+        # if statement from both true and false paths, meaning this isn't owned by an if statement).
+        for cid in chunks_by_logic:
+            changed: bool = True
+            while changed:
+                # Assume we didn't change anything.
+                changed = False
+
+                # Figure out if there is a tautology existing in this logic.
+                for path in chunks_by_logic[cid]:
+                    remove: Optional[IfResult] = None
+                    for other in chunks_by_logic[cid]:
+                        if path.makes_tautology(other):
+                            remove = other
+                            break
+
+                    if remove:
+                        # We found a tautology, remove both halves.
+                        self.vprint(f"BETA: {path} makes a tautology with {remove}, removing both of them!")
+                        chunks_by_logic[cid].remove(path)
+                        chunks_by_logic[cid].remove(remove)
+                        changed = True
+                        break
+
+        self.vprint(f"BETA: Cleaned up logics: {chunks_by_logic}")
+
+        self.vprint(f"BETA: Finished separating if statements out of graph starting at {start_id}")
+        return [c for _, c in chunks_by_id.items()]
+
    def __check_graph(self, start_id: int, chunks: Sequence[ArbitraryCodeChunk]) -> List[ArbitraryCodeChunk]:
        # Recursively go through and verify that all entries to the graph have only one link.
        # Also, clean up the graph.
@ -2542,7 +2677,7 @@ class ByteCodeDecompiler(VerboseOutput):
                    if isinstance(statement, InsertionLocation):
                        # Convert to any statements we need to insert.
                        if statement.location in insertables:
-                            self.vprint("Inserting temp variable assignments into insertion location {stataement.location}")
+                            self.vprint(f"Inserting temp variable assignments into insertion location {statement.location}")
                            for stmt in insertables[statement.location]:
                                new_statements.append(stmt)
                    else:
--- a/bemani/tests/helpers.py
+++ b/bemani/tests/helpers.py
@ -1,4 +1,5 @@
 # vim: set fileencoding=utf-8
+import sys
 import unittest
 from typing import Container, List, Dict, Any

@ -8,6 +9,10 @@ __unittest = True


 class ExtendedTestCase(unittest.TestCase):
+    @property
+    def verbose(self) -> bool:
+        return ("-v" in sys.argv) or ("--verbose" in sys.argv)
+
    def assertItemsEqual(self, a: Container[Any], b: Container[Any]) -> None:
        a_items = {x for x in a}
        b_items = {x for x in b}
--- a/bemani/tests/test_afp_decompile.py
+++ b/bemani/tests/test_afp_decompile.py
@ -534,7 +534,7 @@ class TestAFPDecompile(ExtendedTestCase):
    def __call_decompile(self, bytecode: ByteCode) -> List[Statement]:
        # Just create a dummy compiler so we can access the internal method for testing.
        bcd = ByteCodeDecompiler(bytecode)
-        bcd.decompile()
+        bcd.decompile(verbose=self.verbose)
        return bcd.statements

    def __equiv(self, statements: List[Statement]) -> List[str]:
@ -602,7 +602,7 @@ class TestAFPDecompile(ExtendedTestCase):
        statements = self.__call_decompile(bytecode)
        self.assertEqual(self.__equiv(statements), ["throw 'exception'"])

-    def test_if_handling_basic(self) -> None:
+    def test_if_handling_basic_flow_to_end(self) -> None:
        # If by itself case.
        bytecode = self.__make_bytecode([
            # Beginning of the if statement.
@ -750,3 +750,36 @@ class TestAFPDecompile(ExtendedTestCase):

        # TODO: The output should be optimized to remove redundant return statements.
        self.assertEqual(self.__equiv(statements), ["if (True) {\n  builtin_StartPlaying()\n  return\n} else {\n  builtin_StopPlaying()\n  return\n}"])
+
+    def test_if_handling_or(self) -> None:
+        # Two ifs that together make an or (if register == 1 or register == 3)
+        bytecode = self.__make_bytecode([
+            # Beginning of the first if statement.
+            PushAction(100, [Register(0), 1]),
+            IfAction(101, IfAction.EQUALS, 104),
+            # False case (circuit not broken, register is not equal to 1)
+            PushAction(102, [Register(0), 2]),
+            IfAction(103, IfAction.NOT_EQUALS, 106),
+            # This is the true case
+            AP2Action(104, AP2Action.PLAY),
+            JumpAction(105, 107),
+            # This is the false case
+            AP2Action(106, AP2Action.STOP),
+            # This is the fall-through after the if.
+            PushAction(107, ['strval']),
+            AP2Action(108, AP2Action.RETURN),
+        ])
+        statements = self.__call_decompile(bytecode)
+
+        # TODO: This should be optimized as a compound if statement.
+        self.assertEqual(self.__equiv(statements), [
+            "if (registers[0] != 1) {\n"
+            "  if (registers[0] != 2) {\n"
+            "    builtin_StopPlaying()\n"
+            "    label_4:\n"
+            "    return 'strval'\n"
+            "  }\n"
+            "}",
+            "builtin_StartPlaying()",
+            "goto label_4",
+        ])