Implement compound or statement detection and folding for AFP decompiler.
This commit is contained in:
parent
de3f9107cd
commit
9b773fd0a8
@ -64,6 +64,8 @@ from .types import (
|
||||
StoreRegisterStatement,
|
||||
ExpressionStatement,
|
||||
IfExpr,
|
||||
AndIf,
|
||||
OrIf,
|
||||
IsUndefinedIf,
|
||||
IsBooleanIf,
|
||||
TwoParameterIf,
|
||||
@ -1381,113 +1383,6 @@ class ByteCodeDecompiler(VerboseOutput):
|
||||
self.vprint(f"Finished separating if statements out of graph starting at {start_id}")
|
||||
return [c for _, c in chunks_by_id.items()]
|
||||
|
||||
def __new_separate_ifs(self, start_id: int, end_id: Optional[int], chunks: Sequence[ArbitraryCodeChunk], offset_map: Dict[int, int]) -> List[ArbitraryCodeChunk]:
|
||||
# TODO: This algorithm can possibly do better than the original at identifying cases.
|
||||
# In particular, it handles compound if statements (if x or y) where the previous one
|
||||
# ends up sticking gotos in. The problem is that it needs to know what if statements
|
||||
# exist before combining them, and we can't do that until we walk the stack, and the
|
||||
# stack walking algorithm both a) comes later and b) relies on all ifs being processed.
|
||||
# So, this stays as a beta for now, and will possibly be integrated at a later time.
|
||||
# If we want to use this, we should probably reformat it to work on the finished
|
||||
# statement list we get after fully rendering the stack, and use it in the optimization
|
||||
# pass phase to rewrite code with fewer (possibly sometimes no) gotos.
|
||||
chunks_by_id: Dict[int, ArbitraryCodeChunk] = {chunk.id: chunk for chunk in chunks}
|
||||
chunks_examined: Set[int] = set()
|
||||
|
||||
self.vprint(f"BETA: Separating if statements out of graph starting at {start_id}")
|
||||
|
||||
def walk_children(cur_chunk: ArbitraryCodeChunk, apply_logic: Sequence[IfResult]) -> Dict[int, Set[IfResult]]:
|
||||
# First, if we have any previous if statements to apply to this chunk, do that now.
|
||||
self.vprint(f"BETA: Applying {apply_logic} to {cur_chunk.id}")
|
||||
chunks_to_logic: Dict[int, Set[IfResult]] = {cur_chunk.id: {x for x in apply_logic}}
|
||||
|
||||
# Now, if it is a loop and we haven't already passed over this chunk, recursively
|
||||
# find if statements inside it as well.
|
||||
if isinstance(cur_chunk, Loop):
|
||||
if cur_chunk.id not in chunks_examined:
|
||||
chunks_examined.add(cur_chunk.id)
|
||||
|
||||
self.vprint(f"BETA: Examining loop {cur_chunk.id} body for if statements...")
|
||||
cur_chunk.chunks = self.__new_separate_ifs(cur_chunk.id, None, cur_chunk.chunks, offset_map)
|
||||
self.vprint(f"BETA: Finished examining loop {cur_chunk.id} body for if statements...")
|
||||
|
||||
# Now, see if we need to split logic up or not.
|
||||
if not cur_chunk.next_chunks:
|
||||
# We are at the end of our walk.
|
||||
return chunks_to_logic
|
||||
|
||||
if len(cur_chunk.next_chunks) == 1:
|
||||
# We only have one child, so follow that link.
|
||||
next_chunk = cur_chunk.next_chunks[0]
|
||||
if next_chunk in chunks_by_id:
|
||||
for cid, logic in walk_children(chunks_by_id[next_chunk], apply_logic).items():
|
||||
chunks_to_logic[cid] = {*chunks_to_logic.get(cid, set()), *logic}
|
||||
return chunks_to_logic
|
||||
|
||||
if not isinstance(cur_chunk, ByteCodeChunk):
|
||||
# We should only be looking at bytecode chunks at this point, all other
|
||||
# types should have a single next chunk.
|
||||
raise Exception(f"Logic error, found converted Loop or If chunk {cur_chunk.id} with multiple successors!")
|
||||
|
||||
if len(cur_chunk.next_chunks) != 2:
|
||||
# This needs to be an if statement.
|
||||
raise Exception(f"Logic error, expected 2 successors but got {len(cur_chunk.next_chunks)} in chunk {cur_chunk.id}!")
|
||||
last_action = cur_chunk.actions[-1]
|
||||
if not isinstance(last_action, IfAction):
|
||||
# This needs, again, to be an if statement.
|
||||
raise Exception("Logic error, only IfActions can have multiple successors in chunk {cur_chunk.id}!")
|
||||
|
||||
# Find the true and false jump points, walk those graphs and assign logical predecessors
|
||||
# to each of them.
|
||||
true_jump_point, false_jump_point = self.__get_jump_points(cur_chunk, offset_map)
|
||||
if true_jump_point == false_jump_point:
|
||||
# This should never happen.
|
||||
raise Exception("Logic error, both true and false jumps are to the same location!")
|
||||
|
||||
self.vprint(f"BETA: Chunk ID {cur_chunk.id} is an if statement with true node {true_jump_point} and false node {false_jump_point}")
|
||||
|
||||
# Walk both halves, assigning the if statement that has to exist to get to each half.
|
||||
if true_jump_point in chunks_by_id:
|
||||
for cid, logic in walk_children(chunks_by_id[true_jump_point], [*apply_logic, IfResult(cur_chunk.id, True)]).items():
|
||||
chunks_to_logic[cid] = {*chunks_to_logic.get(cid, set()), *logic}
|
||||
if false_jump_point in chunks_by_id:
|
||||
for cid, logic in walk_children(chunks_by_id[false_jump_point], [*apply_logic, IfResult(cur_chunk.id, False)]).items():
|
||||
chunks_to_logic[cid] = {*chunks_to_logic.get(cid, set()), *logic}
|
||||
return chunks_to_logic
|
||||
|
||||
# First, walk through and identify how we get to each chunk.
|
||||
chunks_by_logic = walk_children(chunks_by_id[start_id], [])
|
||||
self.vprint(f"BETA: List of logics: {chunks_by_logic}")
|
||||
|
||||
# Now, go through each chunk and remove tautologies (where we get to it through a previous
|
||||
# if statement from both true and false paths, meaning this isn't owned by an if statement).
|
||||
for cid in chunks_by_logic:
|
||||
changed: bool = True
|
||||
while changed:
|
||||
# Assume we didn't change anything.
|
||||
changed = False
|
||||
|
||||
# Figure out if there is a tautology existing in this logic.
|
||||
for path in chunks_by_logic[cid]:
|
||||
remove: Optional[IfResult] = None
|
||||
for other in chunks_by_logic[cid]:
|
||||
if path.makes_tautology(other):
|
||||
remove = other
|
||||
break
|
||||
|
||||
if remove:
|
||||
# We found a tautology, remove both halves.
|
||||
self.vprint(f"BETA: {path} makes a tautology with {remove}, removing both of them!")
|
||||
chunks_by_logic[cid].remove(path)
|
||||
chunks_by_logic[cid].remove(remove)
|
||||
changed = True
|
||||
break
|
||||
|
||||
self.vprint(f"BETA: Cleaned up logics: {chunks_by_logic}")
|
||||
|
||||
self.vprint(f"BETA: Finished separating if statements out of graph starting at {start_id}")
|
||||
return [c for _, c in chunks_by_id.items()]
|
||||
|
||||
def __check_graph(self, start_id: int, chunks: Sequence[ArbitraryCodeChunk]) -> List[ArbitraryCodeChunk]:
|
||||
# Recursively go through and verify that all entries to the graph have only one link.
|
||||
# Also, clean up the graph.
|
||||
@ -1535,19 +1430,19 @@ class ByteCodeDecompiler(VerboseOutput):
|
||||
# Make a copy of the stack so we can safely modify it ourselves.
|
||||
stack = [s for s in stack]
|
||||
|
||||
# TODO: Its possible for there to be a function/method call with no subsequent use of the return
|
||||
# value and no POP to clear the stack. If this is the case, technically the function WAS called,
|
||||
# just the result was completely ignored. This shows up in a few Pop'n animations. What should
|
||||
# happen is that we check the stack for any leftover function/method calls and re-insert them
|
||||
# into the spot where they were called since we know that they aren't used.
|
||||
|
||||
def make_if_expr(action: IfAction) -> IfExpr:
|
||||
if action.comparison in [IfAction.COMP_IS_UNDEFINED, IfAction.COMP_IS_NOT_UNDEFINED]:
|
||||
if action.comparison == IfAction.COMP_IS_UNDEFINED:
|
||||
conditional = stack.pop()
|
||||
return IsUndefinedIf(conditional, negate=(action.comparison != IfAction.COMP_IS_UNDEFINED))
|
||||
elif action.comparison in [IfAction.COMP_IS_TRUE, IfAction.COMP_IS_FALSE]:
|
||||
return IsUndefinedIf(conditional)
|
||||
elif action.comparison == IfAction.COMP_IS_NOT_UNDEFINED:
|
||||
conditional = stack.pop()
|
||||
return IsBooleanIf(conditional, negate=(action.comparison != IfAction.COMP_IS_TRUE))
|
||||
return IsUndefinedIf(conditional).invert()
|
||||
elif action.comparison == IfAction.COMP_IS_TRUE:
|
||||
conditional = stack.pop()
|
||||
return IsBooleanIf(conditional)
|
||||
elif action.comparison == IfAction.COMP_IS_FALSE:
|
||||
conditional = stack.pop()
|
||||
return IsBooleanIf(conditional).invert()
|
||||
elif action.comparison in [
|
||||
IfAction.COMP_EQUALS,
|
||||
IfAction.COMP_NOT_EQUALS,
|
||||
@ -3192,6 +3087,329 @@ class ByteCodeDecompiler(VerboseOutput):
|
||||
|
||||
return update_ifs(statements, in_loop=False)
|
||||
|
||||
def __gather_flow(self, parent_conditional: IfExpr, statements: Sequence[Statement]) -> Tuple[IfExpr, Dict[int, IfExpr], List[Tuple[IfExpr, Statement]]]:
|
||||
flowed_statements: List[Tuple[IfExpr, Statement]] = []
|
||||
running_conditional: IfExpr = parent_conditional
|
||||
gotos: Dict[int, IfExpr] = {}
|
||||
|
||||
def merge_gotos(location: int, conditional: IfExpr) -> None:
|
||||
if location in gotos:
|
||||
gotos[location] = OrIf(gotos[location], conditional).simplify()
|
||||
else:
|
||||
gotos[location] = conditional
|
||||
|
||||
for statement in statements:
|
||||
if isinstance(statement, IfStatement):
|
||||
true_cond, true_gotos, true_statements = self.__gather_flow(AndIf(running_conditional, statement.cond).simplify(), statement.true_statements)
|
||||
false_cond, false_gotos, false_statements = self.__gather_flow(AndIf(running_conditional, statement.cond.invert()).simplify(), statement.false_statements)
|
||||
|
||||
flowed_statements.append((running_conditional, statement))
|
||||
flowed_statements.extend(true_statements)
|
||||
flowed_statements.extend(false_statements)
|
||||
for line, goto in true_gotos.items():
|
||||
merge_gotos(line, goto)
|
||||
for line, goto in false_gotos.items():
|
||||
merge_gotos(line, goto)
|
||||
|
||||
if true_cond == IsBooleanIf(False) and false_cond == IsBooleanIf(False):
|
||||
# Both conditionals exited.
|
||||
running_conditional = IsBooleanIf(False)
|
||||
elif true_cond != IsBooleanIf(False) and false_cond == IsBooleanIf(False):
|
||||
# The subsequent statements are only parented by the true conditional.
|
||||
running_conditional = AndIf(true_cond, running_conditional).simplify()
|
||||
elif true_cond == IsBooleanIf(False) and false_cond != IsBooleanIf(False):
|
||||
# The subsequent statements are only parented by the false conditional.
|
||||
running_conditional = AndIf(false_cond, running_conditional).simplify()
|
||||
else:
|
||||
# We are parented by either of the true/false cases.
|
||||
running_conditional = OrIf(AndIf(true_cond, running_conditional), AndIf(false_cond, running_conditional)).simplify()
|
||||
else:
|
||||
flowed_statements.append((running_conditional, statement))
|
||||
if isinstance(statement, (NullReturnStatement, ReturnStatement, ThrowStatement)):
|
||||
# We shouldn't find any more statements after this, unless there's a label.
|
||||
running_conditional = IsBooleanIf(False)
|
||||
elif isinstance(statement, GotoStatement):
|
||||
# The statements after this start from scratch, we shouldn't find any more
|
||||
# statements after this unless its followed by a label.
|
||||
merge_gotos(statement.location, running_conditional)
|
||||
running_conditional = IsBooleanIf(False)
|
||||
|
||||
if gotos:
|
||||
goto_conditional: IfExpr = IsBooleanIf(False)
|
||||
for i, (cond, stmt) in enumerate(flowed_statements):
|
||||
if isinstance(stmt, DefineLabelStatement):
|
||||
# This code might not be fully optimized yet and this might be a label
|
||||
# to which there is no goto.
|
||||
if stmt.location not in gotos:
|
||||
continue
|
||||
goto_conditional = OrIf(gotos[stmt.location], goto_conditional).simplify()
|
||||
flowed_statements[i] = (OrIf(cond, goto_conditional).simplify(), stmt)
|
||||
if isinstance(stmt, (NullReturnStatement, ReturnStatement, ThrowStatement, GotoStatement)):
|
||||
# The current running conditional no longer applies after this statement.
|
||||
goto_conditional = IsBooleanIf(False)
|
||||
|
||||
return running_conditional, gotos, flowed_statements
|
||||
|
||||
def __gather_candidates(self, statements: Sequence[Statement]) -> List[IfStatement]:
|
||||
candidates: List[IfStatement] = []
|
||||
|
||||
for statement in statements:
|
||||
# We intentionally ignore while loops here, because we don't collapse gotos across while loop
|
||||
# boundaries. We will end up running this code again over the contents of any while loop when
|
||||
# we call __rearrange_compound_ifs on its body.
|
||||
if isinstance(statement, IfStatement):
|
||||
if statement.true_statements and not statement.false_statements:
|
||||
candidates.append(statement)
|
||||
|
||||
candidates.extend(self.__gather_candidates(statement.true_statements))
|
||||
candidates.extend(self.__gather_candidates(statement.false_statements))
|
||||
|
||||
return candidates
|
||||
|
||||
def __hoist_compound_ifs(
|
||||
self,
|
||||
parent_conditional: IfExpr,
|
||||
statements: Sequence[Statement],
|
||||
candidates: List[IfStatement],
|
||||
flow: List[Tuple[IfExpr, Statement]],
|
||||
) -> Tuple[List[Statement], bool]:
|
||||
stmt_to_flow: Dict[Statement, IfExpr] = {f[1]: f[0] for f in flow}
|
||||
paths: Set[IfExpr] = {f[0] for f in flow}
|
||||
new_statements: List[Statement] = []
|
||||
changed: bool = False
|
||||
|
||||
def get_child_candidate(statement: IfStatement) -> Optional[IfStatement]:
|
||||
# If the current if statement has any false statements, or its only true
|
||||
# statement isn't a child if, then there is no child candidate to add.
|
||||
if statement.false_statements or len(statement.true_statements) != 1:
|
||||
return None
|
||||
|
||||
# Now, see if the child candidate is in our list of valid candidates.
|
||||
for candidate in candidates:
|
||||
if candidate is statement.true_statements[0]:
|
||||
return candidate
|
||||
|
||||
# We didn't find a candidate.
|
||||
return None
|
||||
|
||||
def get_compound_if(statements: List[IfStatement]) -> IfExpr:
|
||||
# Start with "True", since anding with true is the identity.
|
||||
combined: IfExpr = IsBooleanIf(True)
|
||||
|
||||
# Each if statement contributes to the full if.
|
||||
for statement in statements:
|
||||
combined = AndIf(combined, statement.cond)
|
||||
|
||||
# Finally, simplify it for ease of comparison.
|
||||
return combined.simplify()
|
||||
|
||||
i = 0
|
||||
while i < len(statements):
|
||||
statement = statements[i]
|
||||
|
||||
if isinstance(statement, IfStatement):
|
||||
# See if this is a compound if pattern.
|
||||
candidate_statements: List[IfStatement] = [statement]
|
||||
is_candidate = False
|
||||
|
||||
if candidates and (not statement.false_statements) and len(statement.true_statements) == 1:
|
||||
while True:
|
||||
# First, is the current combination a valid combined or statement?
|
||||
candidate_true_expr = get_compound_if(candidate_statements)
|
||||
candidate_false_expr = candidate_true_expr.invert().simplify()
|
||||
|
||||
true_cond = AndIf(parent_conditional, candidate_true_expr).simplify()
|
||||
false_cond = AndIf(parent_conditional, candidate_false_expr).simplify()
|
||||
|
||||
if true_cond in paths and false_cond in paths:
|
||||
is_candidate = True
|
||||
break
|
||||
|
||||
# Now, try to add on the next in the layer.
|
||||
new_candidate = get_child_candidate(candidate_statements[-1])
|
||||
if new_candidate is None:
|
||||
# There are no more candidates to try to add.
|
||||
break
|
||||
|
||||
# Add this to our consideration, retry the logic test.
|
||||
candidate_statements.append(new_candidate)
|
||||
|
||||
if len(candidate_statements) < 2 or not is_candidate:
|
||||
# Move past this statement, we don't care about it.
|
||||
i += 1
|
||||
|
||||
# However, check its children for any compound ifs that could
|
||||
# be collapsed.
|
||||
true_statements, child_changed = self.__hoist_compound_ifs(
|
||||
AndIf(parent_conditional, statement.cond).simplify(),
|
||||
statement.true_statements,
|
||||
candidates,
|
||||
flow,
|
||||
)
|
||||
changed = changed or child_changed
|
||||
false_statements, child_changed = self.__hoist_compound_ifs(
|
||||
AndIf(parent_conditional, statement.cond.invert()).simplify(),
|
||||
statement.false_statements,
|
||||
candidates,
|
||||
flow,
|
||||
)
|
||||
changed = changed or child_changed
|
||||
|
||||
new_statements.append(
|
||||
IfStatement(
|
||||
statement.cond,
|
||||
true_statements,
|
||||
false_statements,
|
||||
)
|
||||
)
|
||||
else:
|
||||
true_expr = get_compound_if(candidate_statements)
|
||||
false_expr = true_expr.invert().simplify()
|
||||
true_cond = AndIf(parent_conditional, true_expr).simplify()
|
||||
false_cond = AndIf(parent_conditional, false_expr).simplify()
|
||||
|
||||
hoist_after: List[Statement] = []
|
||||
true_statements = []
|
||||
false_statements = []
|
||||
|
||||
for stmt in candidate_statements[-1].true_statements:
|
||||
if stmt_to_flow[stmt] == parent_conditional:
|
||||
hoist_after.append(stmt)
|
||||
else:
|
||||
true_statements.append(stmt)
|
||||
|
||||
# Handled this statement, look at the following statements to figure out
|
||||
# which ones are part of the false portion.
|
||||
i += 1
|
||||
while i < len(statements):
|
||||
statement = statements[i]
|
||||
if stmt_to_flow[statement] == false_cond:
|
||||
false_statements.append(statement)
|
||||
i += 1
|
||||
else:
|
||||
# We don't need to include this statement.
|
||||
break
|
||||
|
||||
# Now, add this new if statement, but make sure to gather up
|
||||
# any compound if statements in any of its true/false children
|
||||
# that we just updated.
|
||||
true_statements, child_changed = self.__hoist_compound_ifs(
|
||||
true_cond,
|
||||
true_statements,
|
||||
candidates,
|
||||
flow,
|
||||
)
|
||||
changed = changed or child_changed
|
||||
false_statements, child_changed = self.__hoist_compound_ifs(
|
||||
false_cond,
|
||||
false_statements,
|
||||
candidates,
|
||||
flow,
|
||||
)
|
||||
changed = changed or child_changed
|
||||
|
||||
# Due to the way decompiling works, negating the if produces
|
||||
# more pleasing decompilation. This is due to the fact that
|
||||
# these are compound or statements and the easiest way for a
|
||||
# compiler to generate code is to do an and of the negation.
|
||||
new_statements.append(
|
||||
IfStatement(
|
||||
true_expr.invert(),
|
||||
false_statements,
|
||||
true_statements,
|
||||
)
|
||||
)
|
||||
|
||||
# Finally, add the statements we hoisted after the compound
|
||||
# expression, being sure to recurse into any if statements therein.
|
||||
for stmt in hoist_after:
|
||||
if isinstance(stmt, IfStatement):
|
||||
true_statements, child_changed = self.__hoist_compound_ifs(
|
||||
AndIf(parent_conditional, stmt.cond).simplify(),
|
||||
stmt.true_statements,
|
||||
candidates,
|
||||
flow,
|
||||
)
|
||||
changed = changed or child_changed
|
||||
false_statements, child_changed = self.__hoist_compound_ifs(
|
||||
AndIf(parent_conditional, stmt.cond.invert()).simplify(),
|
||||
stmt.false_statements,
|
||||
candidates,
|
||||
flow,
|
||||
)
|
||||
changed = changed or child_changed
|
||||
new_statements.append(
|
||||
IfStatement(
|
||||
stmt.cond,
|
||||
true_statements,
|
||||
false_statements,
|
||||
)
|
||||
)
|
||||
elif isinstance(stmt, DoWhileStatement):
|
||||
new_body, child_changed = self.__rearrange_compound_ifs(stmt.body)
|
||||
changed = changed or child_changed
|
||||
stmt.body = new_body
|
||||
new_statements.append(stmt)
|
||||
else:
|
||||
new_statements.append(stmt)
|
||||
|
||||
# We hoisted a compound if, so report a change.
|
||||
changed = True
|
||||
elif isinstance(statement, DoWhileStatement):
|
||||
new_body, child_changed = self.__rearrange_compound_ifs(statement.body)
|
||||
changed = changed or child_changed
|
||||
statement.body = new_body
|
||||
new_statements.append(statement)
|
||||
i += 1
|
||||
else:
|
||||
new_statements.append(statement)
|
||||
i += 1
|
||||
|
||||
return new_statements, changed
|
||||
|
||||
def __rearrange_compound_ifs(self, statements: Sequence[Statement]) -> Tuple[List[Statement], bool]:
|
||||
candidates = self.__gather_candidates(statements)
|
||||
if candidates:
|
||||
_, _, flow = self.__gather_flow(IsBooleanIf(True), statements)
|
||||
else:
|
||||
flow = []
|
||||
|
||||
return self.__hoist_compound_ifs(IsBooleanIf(True), statements, candidates, flow)
|
||||
|
||||
def _optimize_code(self, statements: Sequence[Statement]) -> List[Statement]:
|
||||
statements = list(statements)
|
||||
|
||||
if self.optimize:
|
||||
while True:
|
||||
self.vprint("Running optimizer pass...")
|
||||
any_changed = False
|
||||
for func in [
|
||||
self.__collapse_identical_labels,
|
||||
self.__eliminate_useless_continues,
|
||||
self.__eliminate_unused_labels,
|
||||
self.__remove_useless_gotos,
|
||||
self.__remove_goto_return,
|
||||
self.__eliminate_useless_returns,
|
||||
self.__convert_loops,
|
||||
self.__swap_empty_ifs,
|
||||
self.__drop_unneeded_else,
|
||||
self.__rearrange_compound_ifs,
|
||||
]:
|
||||
statements, changed = func(statements)
|
||||
any_changed = any_changed or changed
|
||||
|
||||
if not any_changed:
|
||||
self.vprint("Optimizer did not change anything.")
|
||||
break
|
||||
else:
|
||||
self.vprint("Optimizer changed code, running another pass.")
|
||||
|
||||
# TODO: We probably want to collapse some really long if chains to switch
|
||||
# statements or if/elif/else blocks for readability but that is left as a
|
||||
# future enhancement.
|
||||
|
||||
return statements
|
||||
|
||||
def __verify_balanced_labels(self, statements: Sequence[Statement]) -> None:
|
||||
gotos: Set[int] = set()
|
||||
labels: Set[int] = set()
|
||||
@ -3228,7 +3446,11 @@ class ByteCodeDecompiler(VerboseOutput):
|
||||
|
||||
self.__walk(statements, check_ifs)
|
||||
|
||||
def __pretty_print(self, statements: Sequence[Statement], prefix: str = "") -> str:
|
||||
def __sanity_check_code(self, statements: Sequence[Statement]) -> None:
|
||||
self.__verify_balanced_labels(statements)
|
||||
self.__verify_no_empty_ifs(statements)
|
||||
|
||||
def _pretty_print(self, statements: Sequence[Statement], prefix: str = "") -> str:
|
||||
output: List[str] = []
|
||||
|
||||
for statement in statements:
|
||||
@ -3271,25 +3493,7 @@ class ByteCodeDecompiler(VerboseOutput):
|
||||
statements = self.__eval_chunks(start_id, chunks_loops_and_ifs, offset_map)
|
||||
|
||||
# Now, let's do some clean-up passes.
|
||||
if self.optimize:
|
||||
while True:
|
||||
any_changed = False
|
||||
for func in [
|
||||
self.__collapse_identical_labels,
|
||||
self.__eliminate_useless_continues,
|
||||
self.__eliminate_unused_labels,
|
||||
self.__remove_useless_gotos,
|
||||
self.__remove_goto_return,
|
||||
self.__eliminate_useless_returns,
|
||||
self.__convert_loops,
|
||||
self.__swap_empty_ifs,
|
||||
self.__drop_unneeded_else,
|
||||
]:
|
||||
statements, changed = func(statements)
|
||||
any_changed = any_changed or changed
|
||||
|
||||
if not any_changed:
|
||||
break
|
||||
statements = self._optimize_code(statements)
|
||||
|
||||
# TODO: There's definitely a lot missing from this decompilation process.
|
||||
# For one, function definitions do not include any mention of number of
|
||||
@ -3306,24 +3510,15 @@ class ByteCodeDecompiler(VerboseOutput):
|
||||
# contents in a function call to nail this down, but it is left as a future
|
||||
# enhancement.
|
||||
|
||||
# TODO: If statements still don't support compound or properly, and resort
|
||||
# to using nasty gotos. We have a prototype of an algorithm above with its
|
||||
# own TODO section that can possibly fix this, but I haven't taken the time
|
||||
# to try to fix it up and integrate it. It would produde far more readable
|
||||
# code in some instances. We also would probably want to collapse some really
|
||||
# long if chains to swithc statements or if/elif/else blocks for readability
|
||||
# but that is also left as a future enhancement.
|
||||
|
||||
# Let's sanity check the code for a few things that might trip us up.
|
||||
self.__verify_balanced_labels(statements)
|
||||
self.__verify_no_empty_ifs(statements)
|
||||
self.__sanity_check_code(statements)
|
||||
|
||||
# Finally, let's save the code!
|
||||
self.__statements = statements
|
||||
|
||||
def as_string(self, prefix: str = "", verbose: bool = False) -> str:
|
||||
with self.debugging(verbose):
|
||||
code = self.__pretty_print(self.statements, prefix=prefix)
|
||||
code = self._pretty_print(self.statements, prefix=prefix)
|
||||
self.vprint(f"Final code:{os.linesep}{code}")
|
||||
return code
|
||||
|
||||
|
@ -82,6 +82,8 @@ from .statement import (
|
||||
IsUndefinedIf,
|
||||
IsBooleanIf,
|
||||
TwoParameterIf,
|
||||
AndIf,
|
||||
OrIf,
|
||||
)
|
||||
|
||||
|
||||
@ -156,6 +158,8 @@ __all__ = [
|
||||
'IsUndefinedIf',
|
||||
'IsBooleanIf',
|
||||
'TwoParameterIf',
|
||||
'AndIf',
|
||||
'OrIf',
|
||||
'PushAction',
|
||||
'InitRegisterAction',
|
||||
'StoreRegisterAction',
|
||||
|
@ -1,5 +1,5 @@
|
||||
import os
|
||||
from typing import Any, List, Sequence, Union
|
||||
from typing import Any, List, Optional, Sequence, Union
|
||||
from typing_extensions import Final
|
||||
|
||||
from .expression import (
|
||||
@ -9,6 +9,7 @@ from .expression import (
|
||||
value_ref,
|
||||
name_ref,
|
||||
object_ref,
|
||||
UNDEFINED,
|
||||
)
|
||||
|
||||
|
||||
@ -411,40 +412,451 @@ class IfExpr(ConvertedAction):
|
||||
def swap(self) -> "IfExpr":
|
||||
raise NotImplementedError("Not implemented!")
|
||||
|
||||
def is_always_true(self) -> bool:
|
||||
return False
|
||||
|
||||
def is_always_false(self) -> bool:
|
||||
return False
|
||||
|
||||
def simplify(self) -> "IfExpr":
|
||||
if self.is_always_true():
|
||||
return IsBooleanIf(True)
|
||||
if self.is_always_false():
|
||||
return IsBooleanIf(False)
|
||||
return self
|
||||
|
||||
def __eq__(self, other: object) -> bool:
|
||||
if not isinstance(other, IfExpr):
|
||||
return False
|
||||
return repr(self) == repr(other)
|
||||
|
||||
def __hash__(self) -> int:
|
||||
return hash(repr(self))
|
||||
|
||||
|
||||
class AndIf(IfExpr):
|
||||
def __init__(self, left: IfExpr, right: IfExpr) -> None:
|
||||
self.left: Final[IfExpr] = left.simplify()
|
||||
self.right: Final[IfExpr] = right.simplify()
|
||||
self.__true: Optional[bool] = None
|
||||
self.__false: Optional[bool] = None
|
||||
self._simplified = False
|
||||
self.__inverted: Optional[OrIf] = None
|
||||
self._gathered: Optional[List[IfExpr]] = None
|
||||
self.__hash: Optional[int] = None
|
||||
|
||||
def invert(self) -> "OrIf":
|
||||
if self.__inverted is None:
|
||||
self.__inverted = OrIf(self.left.invert(), self.right.invert())
|
||||
self.__inverted._simplified = self._simplified
|
||||
return self.__inverted
|
||||
|
||||
def swap(self) -> "AndIf":
|
||||
new_and = AndIf(self.right, self.left)
|
||||
new_and.__true = self.__true
|
||||
new_and.__false = self.__false
|
||||
new_and._simplified = self._simplified
|
||||
new_and._gathered = self._gathered
|
||||
new_and.__hash = self.__hash
|
||||
return new_and
|
||||
|
||||
def is_always_true(self) -> bool:
|
||||
if self.__true is None:
|
||||
self.__true = self.left.is_always_true() and self.right.is_always_true()
|
||||
return self.__true
|
||||
|
||||
def is_always_false(self) -> bool:
|
||||
if self.__false is None:
|
||||
if self.left.invert() == self.right:
|
||||
# If the left and right side are inverses of each other, we know
|
||||
# for a fact that this if can never be true.
|
||||
self.__false = True
|
||||
else:
|
||||
self.__false = self.left.is_always_false() or self.right.is_always_false()
|
||||
return self.__false
|
||||
|
||||
def simplify(self) -> "IfExpr":
|
||||
# If we already know that we're as simple as we can get, just return ourselves.
|
||||
if self._simplified:
|
||||
return self
|
||||
|
||||
# Basic superclass stuff.
|
||||
if self.is_always_true():
|
||||
return IsBooleanIf(True)
|
||||
if self.is_always_false():
|
||||
return IsBooleanIf(False)
|
||||
|
||||
# Tautology simplifications.
|
||||
if self.left.is_always_true() and not self.right.is_always_true():
|
||||
return self.right
|
||||
if not self.left.is_always_true() and self.right.is_always_true():
|
||||
return self.left
|
||||
|
||||
# Equivalent folding (this can get complicated because "x && y && x"
|
||||
# should be folded to "x && y". We use set membership to fold.
|
||||
# Gather up each piece in order, dropping duplicates.
|
||||
ifexprs: List[IfExpr] = _gather_and(self)
|
||||
final: List[IfExpr] = []
|
||||
|
||||
for expr in ifexprs:
|
||||
if expr.is_always_true():
|
||||
# Don't bother adding this, it should always be discarded.
|
||||
continue
|
||||
if expr in final:
|
||||
# Don't bother adding this, we already saw it.
|
||||
continue
|
||||
|
||||
# Now, make sure that this isn't a negation of a previous term.
|
||||
for fexpr in final:
|
||||
if fexpr == expr.invert():
|
||||
return IsBooleanIf(False)
|
||||
|
||||
# Now, try to factor this expression out with an existing one to simplify.
|
||||
for i, fexpr in enumerate(final):
|
||||
factor = _factor_and(fexpr, expr)
|
||||
if factor:
|
||||
final[i] = factor
|
||||
break
|
||||
else:
|
||||
# We did not find a factor. See if there's a negative absorption available.
|
||||
for i, fexpr in enumerate(final):
|
||||
absorb = _negative_absorb_and(fexpr, expr)
|
||||
if absorb:
|
||||
final[i] = absorb
|
||||
break
|
||||
else:
|
||||
# Nothing simplifies, just add this
|
||||
final.append(expr)
|
||||
|
||||
# Now, grab the last entry, adding it to the right side of and expressions
|
||||
# over and over until we have nothing to add.
|
||||
if len(final) == 1:
|
||||
return final[0]
|
||||
new_and = _accum_and(final, simplified=True)
|
||||
if not isinstance(new_and, AndIf):
|
||||
raise Exception("Logic error!")
|
||||
new_and.__true = self.__true
|
||||
new_and.__false = self.__false
|
||||
new_and._simplified = True
|
||||
return new_and
|
||||
|
||||
def __eq__(self, other: object) -> bool:
|
||||
if not isinstance(other, AndIf):
|
||||
return False
|
||||
return set(_gather_and(self)) == set(_gather_and(other))
|
||||
|
||||
def __hash__(self) -> int:
|
||||
if self.__hash is None:
|
||||
self.__hash = hash("AND:" + ",".join(sorted(str(hash(s)) for s in set(_gather_and(self)))))
|
||||
return self.__hash
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return " && ".join((f"({c!r})" if isinstance(c, (AndIf, OrIf)) else repr(c)) for c in _gather_and(self))
|
||||
|
||||
|
||||
class OrIf(IfExpr):
|
||||
def __init__(self, left: IfExpr, right: IfExpr) -> None:
|
||||
self.left: Final[IfExpr] = left.simplify()
|
||||
self.right: Final[IfExpr] = right.simplify()
|
||||
self.__true: Optional[bool] = None
|
||||
self.__false: Optional[bool] = None
|
||||
self._simplified = False
|
||||
self.__inverted: Optional[AndIf] = None
|
||||
self._gathered: Optional[List[IfExpr]] = None
|
||||
self.__hash: Optional[int] = None
|
||||
|
||||
def invert(self) -> "AndIf":
|
||||
if not self.__inverted:
|
||||
self.__inverted = AndIf(self.left.invert(), self.right.invert())
|
||||
self.__inverted._simplified = self._simplified
|
||||
return self.__inverted
|
||||
|
||||
def swap(self) -> "OrIf":
|
||||
new_or = OrIf(self.right, self.left)
|
||||
new_or.__true = self.__true
|
||||
new_or.__false = self.__false
|
||||
new_or._simplified = self._simplified
|
||||
new_or._gathered = self._gathered
|
||||
new_or.__hash = self.__hash
|
||||
return new_or
|
||||
|
||||
def is_always_true(self) -> bool:
|
||||
if self.__true is None:
|
||||
if self.left.invert() == self.right:
|
||||
# If the left and right side are inverses of each other, we know
|
||||
# for a fact that this if can never be false.
|
||||
self.__true = True
|
||||
else:
|
||||
self.__true = self.left.is_always_true() or self.right.is_always_true()
|
||||
return self.__true
|
||||
|
||||
def is_always_false(self) -> bool:
|
||||
if self.__false is None:
|
||||
self.__false = self.left.is_always_false() and self.right.is_always_false()
|
||||
return self.__false
|
||||
|
||||
def simplify(self) -> "IfExpr":
|
||||
# If we already know that we're as simple as we can get, just return ourselves.
|
||||
if self._simplified:
|
||||
return self
|
||||
|
||||
# Basic superclass stuff.
|
||||
if self.is_always_true():
|
||||
return IsBooleanIf(True)
|
||||
if self.is_always_false():
|
||||
return IsBooleanIf(False)
|
||||
|
||||
# Tautology simplifications.
|
||||
if self.left.is_always_false() and not self.right.is_always_false():
|
||||
return self.right
|
||||
if not self.left.is_always_false() and self.right.is_always_false():
|
||||
return self.left
|
||||
|
||||
# Equivalent folding (this can get complicated because "x && y && x"
|
||||
# should be folded to "x && y". We use set membership to fold.
|
||||
# Gather up each piece in order, dropping duplicates.
|
||||
ifexprs: List[IfExpr] = _gather_or(self)
|
||||
final: List[IfExpr] = []
|
||||
|
||||
for expr in ifexprs:
|
||||
if expr.is_always_false():
|
||||
# Don't bother adding this, it should always be discarded.
|
||||
continue
|
||||
if expr in final:
|
||||
# Don't bother adding this, we already saw it.
|
||||
continue
|
||||
|
||||
# Now, make sure that this isn't a negation of a previous term.
|
||||
for fexpr in final:
|
||||
if fexpr == expr.invert():
|
||||
return IsBooleanIf(True)
|
||||
|
||||
# Now, try to factor this expression out with an existing one to simplify.
|
||||
for i, fexpr in enumerate(final):
|
||||
factor = _factor_or(fexpr, expr)
|
||||
if factor:
|
||||
final[i] = factor
|
||||
break
|
||||
else:
|
||||
# We did not find a factor. See if there's a negative absorption available.
|
||||
for i, fexpr in enumerate(final):
|
||||
absorb = _negative_absorb_or(fexpr, expr)
|
||||
if absorb:
|
||||
final[i] = absorb
|
||||
break
|
||||
else:
|
||||
# Nothing simplifies, just add this
|
||||
final.append(expr)
|
||||
|
||||
# Now, grab the last entry, adding it to the right side of and expressions
|
||||
# over and over until we have nothing to add.
|
||||
if len(final) == 1:
|
||||
return final[0]
|
||||
new_or = _accum_or(final, simplified=True)
|
||||
if not isinstance(new_or, OrIf):
|
||||
raise Exception("Logic error!")
|
||||
new_or.__true = self.__true
|
||||
new_or.__false = self.__false
|
||||
new_or._simplified = True
|
||||
return new_or
|
||||
|
||||
def __eq__(self, other: object) -> bool:
|
||||
if not isinstance(other, OrIf):
|
||||
return False
|
||||
return set(_gather_or(self)) == set(_gather_or(other))
|
||||
|
||||
def __hash__(self) -> int:
|
||||
if self.__hash is None:
|
||||
self.__hash = hash("OR:" + ",".join(sorted(str(hash(s)) for s in set(_gather_or(self)))))
|
||||
return self.__hash
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return " || ".join((f"({c!r})" if isinstance(c, (AndIf, OrIf)) else repr(c)) for c in _gather_or(self))
|
||||
|
||||
|
||||
def _gather_and(obj: IfExpr) -> List[IfExpr]:
|
||||
if isinstance(obj, AndIf):
|
||||
if obj._gathered is None:
|
||||
obj._gathered = [*_gather_and(obj.left), *_gather_and(obj.right)]
|
||||
return obj._gathered
|
||||
else:
|
||||
return [obj]
|
||||
|
||||
|
||||
def _accum_and(objs: List[IfExpr], simplified: bool = False) -> IfExpr:
|
||||
accum = objs[-1]
|
||||
for i, obj in enumerate(reversed(objs)):
|
||||
if i == 0:
|
||||
continue
|
||||
accum = AndIf(obj, accum)
|
||||
accum._simplified = simplified
|
||||
return accum
|
||||
|
||||
|
||||
def _factor_and(left: IfExpr, right: IfExpr) -> Optional[IfExpr]:
|
||||
left_ors = _gather_or(left)
|
||||
right_ors = _gather_or(right)
|
||||
commons: List[IfExpr] = []
|
||||
|
||||
for exp in left_ors:
|
||||
if exp in right_ors:
|
||||
commons.append(exp)
|
||||
|
||||
if commons:
|
||||
left_ors = [exp for exp in left_ors if exp not in commons]
|
||||
right_ors = [exp for exp in right_ors if exp not in commons]
|
||||
if not left_ors or not right_ors:
|
||||
return _accum_or(commons).simplify()
|
||||
|
||||
return OrIf(_accum_or(commons), AndIf(_accum_or(left_ors), _accum_or(right_ors))).simplify()
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def _negative_absorb_and(left: IfExpr, right: IfExpr) -> Optional[IfExpr]:
|
||||
left_ors = _gather_or(left)
|
||||
right_ors = _gather_or(right)
|
||||
neg_left = left.invert()
|
||||
neg_right = right.invert()
|
||||
|
||||
for val in right_ors:
|
||||
if neg_left == val:
|
||||
return AndIf(
|
||||
left,
|
||||
_accum_or([o for o in right_ors if o is not val]),
|
||||
).simplify()
|
||||
for val in left_ors:
|
||||
if neg_right == val:
|
||||
return AndIf(
|
||||
_accum_or([o for o in left_ors if o is not val]),
|
||||
right,
|
||||
).simplify()
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _gather_or(obj: IfExpr) -> List[IfExpr]:
|
||||
if isinstance(obj, OrIf):
|
||||
if obj._gathered is None:
|
||||
obj._gathered = [*_gather_or(obj.left), *_gather_or(obj.right)]
|
||||
return obj._gathered
|
||||
else:
|
||||
return [obj]
|
||||
|
||||
|
||||
def _accum_or(objs: List[IfExpr], simplified: bool = False) -> IfExpr:
|
||||
accum = objs[-1]
|
||||
for i, obj in enumerate(reversed(objs)):
|
||||
if i == 0:
|
||||
continue
|
||||
accum = OrIf(obj, accum)
|
||||
accum._simplified = simplified
|
||||
return accum
|
||||
|
||||
|
||||
def _factor_or(left: IfExpr, right: IfExpr) -> Optional[IfExpr]:
|
||||
left_ands = _gather_and(left)
|
||||
right_ands = _gather_and(right)
|
||||
commons: List[IfExpr] = []
|
||||
|
||||
for exp in left_ands:
|
||||
if exp in right_ands:
|
||||
commons.append(exp)
|
||||
|
||||
if commons:
|
||||
left_ands = [exp for exp in left_ands if exp not in commons]
|
||||
right_ands = [exp for exp in right_ands if exp not in commons]
|
||||
if not left_ands or not right_ands:
|
||||
return _accum_and(commons).simplify()
|
||||
|
||||
return AndIf(_accum_and(commons), OrIf(_accum_and(left_ands), _accum_and(right_ands))).simplify()
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def _negative_absorb_or(left: IfExpr, right: IfExpr) -> Optional[IfExpr]:
|
||||
left_ands = _gather_and(left)
|
||||
right_ands = _gather_and(right)
|
||||
neg_left = left.invert()
|
||||
neg_right = right.invert()
|
||||
|
||||
for val in right_ands:
|
||||
if neg_left == val:
|
||||
return OrIf(
|
||||
left,
|
||||
_accum_and([o for o in right_ands if o is not val]),
|
||||
).simplify()
|
||||
for val in left_ands:
|
||||
if neg_right == val:
|
||||
return OrIf(
|
||||
_accum_and([o for o in left_ands if o is not val]),
|
||||
right,
|
||||
).simplify()
|
||||
|
||||
return None
|
||||
|
||||
|
||||
class IsUndefinedIf(IfExpr):
|
||||
def __init__(self, conditional: Any, negate: bool) -> None:
|
||||
self.conditional = conditional
|
||||
self.negate = negate
|
||||
def __init__(self, conditional: Any) -> None:
|
||||
self.conditional: Final[Any] = conditional
|
||||
self.__negated = False
|
||||
|
||||
def invert(self) -> "IsUndefinedIf":
|
||||
return IsUndefinedIf(self.conditional, not self.negate)
|
||||
new = IsUndefinedIf(self.conditional)
|
||||
new.__negated = not self.__negated
|
||||
return new
|
||||
|
||||
def swap(self) -> "IsUndefinedIf":
|
||||
return IsUndefinedIf(self.conditional, self.negate)
|
||||
return IsUndefinedIf(self.conditional)
|
||||
|
||||
def is_always_true(self) -> bool:
|
||||
if self.conditional is UNDEFINED:
|
||||
return not self.__negated
|
||||
return False
|
||||
|
||||
def is_always_false(self) -> bool:
|
||||
if self.conditional is UNDEFINED:
|
||||
return self.__negated
|
||||
return False
|
||||
|
||||
def __repr__(self) -> str:
|
||||
val = value_ref(self.conditional, "", parens=True)
|
||||
if self.negate:
|
||||
if self.__negated:
|
||||
return f"{val} is not UNDEFINED"
|
||||
else:
|
||||
return f"{val} is UNDEFINED"
|
||||
|
||||
|
||||
class IsBooleanIf(IfExpr):
|
||||
def __init__(self, conditional: Any, negate: bool) -> None:
|
||||
self.conditional = conditional
|
||||
self.negate = negate
|
||||
def __init__(self, conditional: Any) -> None:
|
||||
self.conditional: Final[Any] = conditional
|
||||
self.__negated = False
|
||||
|
||||
def invert(self) -> "IsBooleanIf":
|
||||
return IsBooleanIf(self.conditional, not self.negate)
|
||||
new = IsBooleanIf(self.conditional)
|
||||
new.__negated = not self.__negated
|
||||
return new
|
||||
|
||||
def swap(self) -> "IsBooleanIf":
|
||||
return IsBooleanIf(self.conditional, self.negate)
|
||||
return IsBooleanIf(self.conditional)
|
||||
|
||||
def is_always_true(self) -> bool:
|
||||
if self.conditional is True:
|
||||
return not self.__negated
|
||||
elif self.conditional is False:
|
||||
return self.__negated
|
||||
return False
|
||||
|
||||
def is_always_false(self) -> bool:
|
||||
if self.conditional is True:
|
||||
return self.__negated
|
||||
elif self.conditional is False:
|
||||
return not self.__negated
|
||||
return False
|
||||
|
||||
def __repr__(self) -> str:
|
||||
val = value_ref(self.conditional, "", parens=True)
|
||||
if self.negate:
|
||||
if self.__negated:
|
||||
return f"not {val}"
|
||||
else:
|
||||
return f"{val}"
|
||||
@ -473,9 +885,9 @@ class TwoParameterIf(IfExpr):
|
||||
}:
|
||||
raise Exception(f"Invalid comparision {comp}!")
|
||||
|
||||
self.conditional1 = conditional1
|
||||
self.comp = comp
|
||||
self.conditional2 = conditional2
|
||||
self.conditional1: Final[Any] = conditional1
|
||||
self.comp: Final[str] = comp
|
||||
self.conditional2: Final[Any] = conditional2
|
||||
|
||||
def invert(self) -> "TwoParameterIf":
|
||||
if self.comp == self.EQUALS:
|
||||
@ -650,7 +1062,7 @@ class ForStatement(DoWhileStatement):
|
||||
local = ""
|
||||
|
||||
return [
|
||||
f"{prefix}for ({local}{self.inc_variable} = {inc_init}; {self.cond}; {self.inc_variable} = {inc_assign}) {{",
|
||||
f"{prefix}for ({local}{self.inc_variable} = {inc_init}; {self.cond}; {self.inc_variable} = {inc_assign})",
|
||||
f"{prefix}{{",
|
||||
*entries,
|
||||
f"{prefix}}}",
|
||||
@ -680,7 +1092,7 @@ class WhileStatement(DoWhileStatement):
|
||||
entries.extend(statement.render(prefix=prefix + " "))
|
||||
|
||||
return [
|
||||
f"{prefix}while ({self.cond}) {{",
|
||||
f"{prefix}while ({self.cond})",
|
||||
f"{prefix}{{",
|
||||
*entries,
|
||||
f"{prefix}}}",
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user