4115 lines
198 KiB
Python
4115 lines
198 KiB
Python
import os
|
|
from typing import Any, Dict, List, Sequence, Tuple, Set, Union, Optional, Callable, cast
|
|
|
|
from .types import (
|
|
AP2Action,
|
|
JumpAction,
|
|
IfAction,
|
|
PushAction,
|
|
AddNumVariableAction,
|
|
AddNumRegisterAction,
|
|
InitRegisterAction,
|
|
StoreRegisterAction,
|
|
DefineFunction2Action,
|
|
GotoFrame2Action,
|
|
WithAction,
|
|
GetURL2Action,
|
|
StartDragAction,
|
|
Expression,
|
|
StringConstant,
|
|
ArithmeticExpression,
|
|
MethodCall,
|
|
FunctionCall,
|
|
GetTimeFunctionCall,
|
|
GetPathFunctionCall,
|
|
Variable,
|
|
Register,
|
|
Member,
|
|
NewFunction,
|
|
NewObject,
|
|
Array,
|
|
Object,
|
|
NotExpression,
|
|
UNDEFINED,
|
|
GLOBAL,
|
|
object_ref,
|
|
ConvertedAction,
|
|
Statement,
|
|
NopStatement,
|
|
StopMovieStatement,
|
|
PlayMovieStatement,
|
|
GotoFrameStatement,
|
|
GetURL2Statement,
|
|
NextFrameStatement,
|
|
PreviousFrameStatement,
|
|
StopSoundStatement,
|
|
CloneSpriteStatement,
|
|
RemoveSpriteStatement,
|
|
DebugTraceStatement,
|
|
IfStatement,
|
|
SwitchStatement,
|
|
SwitchCase,
|
|
DoWhileStatement,
|
|
WhileStatement,
|
|
ForStatement,
|
|
BreakStatement,
|
|
ContinueStatement,
|
|
ReturnStatement,
|
|
NullReturnStatement,
|
|
ThrowStatement,
|
|
GotoStatement,
|
|
DefineLabelStatement,
|
|
SetVariableStatement,
|
|
DeleteVariableStatement,
|
|
SetMemberStatement,
|
|
DeleteMemberStatement,
|
|
SetLocalStatement,
|
|
StoreRegisterStatement,
|
|
ExpressionStatement,
|
|
IfExpr,
|
|
AndIf,
|
|
OrIf,
|
|
IsUndefinedIf,
|
|
IsBooleanIf,
|
|
TwoParameterIf,
|
|
)
|
|
from .util import VerboseOutput
|
|
|
|
|
|
class ByteCode:
|
|
# A list of bytecodes to execute.
|
|
def __init__(self, name: Optional[str], actions: Sequence[AP2Action], end_offset: int) -> None:
|
|
self.name = name
|
|
self.actions = list(actions)
|
|
self.start_offset = self.actions[0].offset if actions else None
|
|
self.end_offset = end_offset
|
|
|
|
def decompile(self, verbose: bool = False) -> str:
|
|
decompiler = ByteCodeDecompiler(self)
|
|
decompiler.decompile(verbose=verbose)
|
|
code = decompiler.as_string(prefix=" " if self.name else "", verbose=verbose)
|
|
if self.name:
|
|
opar = '{'
|
|
cpar = '}'
|
|
code = f"{self.name}(){os.linesep}{opar}{os.linesep}{code}{os.linesep}{cpar}"
|
|
return code
|
|
|
|
def as_dict(self, *args: Any, **kwargs: Any) -> Dict[str, Any]:
|
|
if kwargs.get('decompile_bytecode', False):
|
|
return {
|
|
'code': self.decompile(verbose=kwargs.get('verbose', False)),
|
|
}
|
|
else:
|
|
return {
|
|
'actions': [a.as_dict(*args, **kwargs) for a in self.actions],
|
|
'end_offset': self.end_offset,
|
|
}
|
|
|
|
def __repr__(self) -> str:
|
|
entries: List[str] = []
|
|
for action in self.actions:
|
|
entries.extend([f" {s}" for s in str(action).split(os.linesep)])
|
|
|
|
return f"ByteCode({os.linesep}{os.linesep.join(entries)}{os.linesep} {self.end_offset}: END{os.linesep})"
|
|
|
|
|
|
class ControlFlow:
|
|
def __init__(self, beginning: int, end: int, next_flow: List[int]) -> None:
|
|
self.beginning = beginning
|
|
self.end = end
|
|
self.next_flow = next_flow
|
|
|
|
def contains(self, offset: int) -> bool:
|
|
return (self.beginning <= offset) and (offset < self.end)
|
|
|
|
def is_first(self, offset: int) -> bool:
|
|
return self.beginning == offset
|
|
|
|
def is_last(self, offset: int) -> bool:
|
|
return self.end == (offset + 1)
|
|
|
|
def split(self, offset: int, link: bool = False) -> Tuple["ControlFlow", "ControlFlow"]:
|
|
if not self.contains(offset):
|
|
raise Exception(f"Logic error, this ControlFlow does not contain offset {offset}")
|
|
|
|
# First, make the second half that the first half will point to.
|
|
second = ControlFlow(
|
|
offset,
|
|
self.end,
|
|
self.next_flow,
|
|
)
|
|
|
|
# Now, make the first half that we can point to.
|
|
first = ControlFlow(
|
|
self.beginning,
|
|
offset,
|
|
[second.beginning] if link else [],
|
|
)
|
|
|
|
return (first, second)
|
|
|
|
def __repr__(self) -> str:
|
|
return f"ControlFlow(beginning={self.beginning}, end={self.end}, next={(', '.join(str(n) for n in self.next_flow)) or 'N/A'}"
|
|
|
|
|
|
class IfResult:
|
|
def __init__(self, stmt_id: int, path: bool) -> None:
|
|
self.stmt_id = stmt_id
|
|
self.path = path
|
|
|
|
def makes_tautology(self, other: "IfResult") -> bool:
|
|
return self.stmt_id == other.stmt_id and self.path != other.path
|
|
|
|
def __repr__(self) -> str:
|
|
return f"IfResult(stmt_id={self.stmt_id}, path={self.path})"
|
|
|
|
def __eq__(self, other: object) -> bool:
|
|
if not isinstance(other, IfResult):
|
|
return NotImplemented
|
|
return self.stmt_id == other.stmt_id and self.path == other.path
|
|
|
|
def __ne__(self, other: object) -> bool:
|
|
if not isinstance(other, IfResult):
|
|
return NotImplemented
|
|
return not (self.stmt_id == other.stmt_id and self.path == other.path)
|
|
|
|
def __hash__(self) -> int:
|
|
# Lower bit will be for true/false, upper bits for statement ID.
|
|
return (self.stmt_id * 2) + (1 if self.path else 0)
|
|
|
|
|
|
class TempVariable(Variable):
|
|
# This is solely for recognizing when a stack which is being reconciled already has
|
|
# a variable.
|
|
def __repr__(self) -> str:
|
|
return f"TempVariable({self.name})"
|
|
|
|
|
|
class MaybeStackEntry(Expression):
|
|
def __init__(self, parent_stack_id: int) -> None:
|
|
self.parent_stack_id = parent_stack_id
|
|
|
|
def __repr__(self) -> str:
|
|
return f"MaybeStackEntry({self.parent_stack_id})"
|
|
|
|
def render(self, parent_prefix: str, nested: bool = False) -> str:
|
|
raise Exception("Logic error, a MaybeStackEntry should never make it to the render stage!")
|
|
|
|
|
|
class MultiAction(ConvertedAction):
|
|
# An action that allows us to expand the number of lines we have to work with, for
|
|
# opcodes that perform more than one statement's worth of actions.
|
|
def __init__(self, actions: Sequence[ConvertedAction]) -> None:
|
|
self.actions = actions
|
|
|
|
def __repr__(self) -> str:
|
|
# We should never emit one of these in printing.
|
|
return f"MultiAction({self.actions})"
|
|
|
|
|
|
ArbitraryOpcode = Union[AP2Action, ConvertedAction]
|
|
|
|
|
|
class IntermediateIf(ConvertedAction):
|
|
def __init__(self, parent_action: Union[IfAction, IfExpr], true_statements: Sequence[Statement], false_statements: Sequence[Statement]) -> None:
|
|
self.parent_action = parent_action
|
|
self.true_statements = list(true_statements)
|
|
self.false_statements = list(false_statements)
|
|
|
|
def __repr__(self) -> str:
|
|
true_entries: List[str] = []
|
|
for action in self.true_statements:
|
|
true_entries.extend([f" {s}" for s in str(action).split(os.linesep)])
|
|
|
|
false_entries: List[str] = []
|
|
for action in self.false_statements:
|
|
false_entries.extend([f" {s}" for s in str(action).split(os.linesep)])
|
|
|
|
if self.false_statements:
|
|
return os.linesep.join([
|
|
f"if <{self.parent_action}> {{",
|
|
os.linesep.join(true_entries),
|
|
"} else {",
|
|
os.linesep.join(false_entries),
|
|
"}"
|
|
])
|
|
else:
|
|
return os.linesep.join([
|
|
f"if <{self.parent_action}> {{",
|
|
os.linesep.join(true_entries),
|
|
"}"
|
|
])
|
|
|
|
|
|
class ByteCodeChunk:
|
|
def __init__(self, id: int, actions: Sequence[ArbitraryOpcode], next_chunks: List[int] = [], previous_chunks: List[int] = []) -> None:
|
|
self.id = id
|
|
self.actions = list(actions)
|
|
self.next_chunks = next_chunks or []
|
|
self.previous_chunks = previous_chunks or []
|
|
|
|
def __repr__(self) -> str:
|
|
entries: List[str] = []
|
|
for action in self.actions:
|
|
if isinstance(action, DefineFunction2Action):
|
|
# Special case, since we will decompile this later, we don't want to print it now.
|
|
entries.append(f" {action.offset}: {AP2Action.action_to_name(action.opcode)}, Name: {action.name or '<anonymous function>'}, Flags: {hex(action.flags)}")
|
|
else:
|
|
entries.extend([f" {s}" for s in str(action).split(os.linesep)])
|
|
|
|
return (
|
|
f"ByteCodeChunk({os.linesep}" +
|
|
f" ID: {self.id}{os.linesep}" +
|
|
(f" Previous Chunks: {', '.join(str(n) for n in self.previous_chunks)}{os.linesep}" if self.previous_chunks else f" Start Chunk{os.linesep}") +
|
|
f"{os.linesep.join(entries)}{os.linesep}" +
|
|
(f" Next Chunks: {', '.join(str(n) for n in self.next_chunks)}{os.linesep}" if self.next_chunks else f" End Chunk{os.linesep}") +
|
|
")"
|
|
)
|
|
|
|
|
|
ArbitraryCodeChunk = Union[ByteCodeChunk, "Loop", "IfBody"]
|
|
|
|
|
|
class Loop:
|
|
def __init__(self, id: int, chunks: Sequence[ArbitraryCodeChunk]) -> None:
|
|
# The ID is the chunk that other chunks point into, aka the loop header.
|
|
self.id = id
|
|
|
|
# Calculate predecessors (who points into it) and successors (who we point out of).
|
|
ided_chunks: Dict[int, ArbitraryCodeChunk] = {chunk.id: chunk for chunk in chunks}
|
|
|
|
self.previous_chunks: List[int] = []
|
|
self.next_chunks: List[int] = []
|
|
self.chunks = list(chunks)
|
|
self.post_statements: List[Statement] = []
|
|
|
|
for chunk in chunks:
|
|
for nextid in chunk.next_chunks:
|
|
if nextid not in ided_chunks:
|
|
self.next_chunks.append(nextid)
|
|
for previd in chunk.previous_chunks:
|
|
if previd not in ided_chunks:
|
|
self.previous_chunks.append(previd)
|
|
|
|
def __repr__(self) -> str:
|
|
entries: List[str] = []
|
|
for chunk in self.chunks:
|
|
entries.extend([f" {s}" for s in str(chunk).split(os.linesep)])
|
|
|
|
return (
|
|
f"Loop({os.linesep}" +
|
|
f" ID: {self.id}{os.linesep}" +
|
|
(f" Previous Chunks: {', '.join(str(n) for n in self.previous_chunks)}{os.linesep}" if self.previous_chunks else f" Start Chunk{os.linesep}") +
|
|
f"{os.linesep.join(entries)}{os.linesep}" +
|
|
(f" Next Chunks: {', '.join(str(n) for n in self.next_chunks)}{os.linesep}" if self.next_chunks else f" End Chunk{os.linesep}") +
|
|
")"
|
|
)
|
|
|
|
|
|
class IfBody:
|
|
def __init__(self, id: int, true_chunks: Sequence[ArbitraryCodeChunk], false_chunks: Sequence[ArbitraryCodeChunk], next_chunk: Optional[int], previous_chunk: int) -> None:
|
|
# The ID in this case is what the previous block points at. It does not
|
|
# have any bearing on the ID of the true and false chunks.
|
|
self.id = id
|
|
|
|
# If bodies are a bit special compared to Loops, we know the previous and next chunks
|
|
# for all of them.
|
|
self.previous_chunks: List[int] = [previous_chunk]
|
|
self.next_chunks: List[int] = [next_chunk] if next_chunk is not None else []
|
|
self.true_chunks = list(true_chunks)
|
|
self.false_chunks = list(false_chunks)
|
|
|
|
def __repr__(self) -> str:
|
|
true_entries: List[str] = []
|
|
for chunk in self.true_chunks:
|
|
true_entries.extend([f" {s}" for s in str(chunk).split(os.linesep)])
|
|
|
|
false_entries: List[str] = []
|
|
for chunk in self.false_chunks:
|
|
false_entries.extend([f" {s}" for s in str(chunk).split(os.linesep)])
|
|
|
|
return (
|
|
f"IfBody({os.linesep}" +
|
|
f" ID: {self.id}{os.linesep}" +
|
|
(f" Previous Chunks: {', '.join(str(n) for n in self.previous_chunks)}{os.linesep}" if self.previous_chunks else f" Start Chunk{os.linesep}") +
|
|
f" True Chunks:{os.linesep}" +
|
|
f"{os.linesep.join(true_entries)}{os.linesep}" +
|
|
f" False Chunks:{os.linesep}" +
|
|
f"{os.linesep.join(false_entries)}{os.linesep}" +
|
|
(f" Next Chunks: {', '.join(str(n) for n in self.next_chunks)}{os.linesep}" if self.next_chunks else f" End Chunk{os.linesep}") +
|
|
")"
|
|
)
|
|
|
|
|
|
class InsertionLocation(Statement):
|
|
def __init__(self, location: int) -> None:
|
|
self.location = location
|
|
|
|
def __repr__(self) -> str:
|
|
return f"<INSERTION POINT FOR {self.location}>"
|
|
|
|
def render(self, prefix: str, verbose: bool = False) -> List[str]:
|
|
raise Exception("Logic error, an InsertionLocation should never make it to the render stage!")
|
|
|
|
|
|
class OriginalCallLocation(Statement):
|
|
def __init__(self, insertion_id: int) -> None:
|
|
self.insertion_id = insertion_id
|
|
|
|
def __repr__(self) -> str:
|
|
return f"<INSERTION POINT FOR {self.insertion_id}>"
|
|
|
|
def render(self, prefix: str, verbose: bool = False) -> List[str]:
|
|
raise Exception("Logic error, an InsertionLocation should never make it to the render stage!")
|
|
|
|
|
|
class BitVector:
|
|
def __init__(self, length: int, init: bool = False) -> None:
|
|
self.__bits: Dict[int, bool] = {i: init for i in range(length)}
|
|
|
|
def clone(self) -> "BitVector":
|
|
new = BitVector(len(self.__bits))
|
|
new.__bits = {i: self.__bits[i] for i in self.__bits}
|
|
return new
|
|
|
|
def setAllBitsTo(self, val: bool) -> "BitVector":
|
|
self.__bits = {i: val for i in self.__bits}
|
|
return self
|
|
|
|
def setBit(self, bit: int) -> "BitVector":
|
|
if bit < 0 or bit >= len(self.__bits):
|
|
raise Exception(f"Logic error, trying to set bit {bit} of a bitvector length {len(self.__bits)}!")
|
|
self.__bits[bit] = True
|
|
return self
|
|
|
|
def clearBit(self, bit: int) -> "BitVector":
|
|
if bit < 0 or bit >= len(self.__bits):
|
|
raise Exception(f"Logic error, trying to set bit {bit} of a bitvector length {len(self.__bits)}!")
|
|
self.__bits[bit] = False
|
|
return self
|
|
|
|
def orVector(self, other: "BitVector") -> "BitVector":
|
|
if len(self.__bits) != len(other.__bits):
|
|
raise Exception(f"Logic error, trying to combine bitvector of size {len(self.__bits)} with another of size {len(other.__bits)}!")
|
|
self.__bits = {i: (self.__bits[i] or other.__bits[i]) for i in self.__bits}
|
|
return self
|
|
|
|
def andVector(self, other: "BitVector") -> "BitVector":
|
|
if len(self.__bits) != len(other.__bits):
|
|
raise Exception(f"Logic error, trying to combine bitvector of size {len(self.__bits)} with another of size {len(other.__bits)}!")
|
|
self.__bits = {i: (self.__bits[i] and other.__bits[i]) for i in self.__bits}
|
|
return self
|
|
|
|
def __eq__(self, other: object) -> bool:
|
|
if not isinstance(other, BitVector):
|
|
return NotImplemented
|
|
if len(self.__bits) != len(other.__bits):
|
|
raise Exception(f"Logic error, trying to compare bitvector of size {len(self.__bits)} with another of size {len(other.__bits)}!")
|
|
|
|
for i in self.__bits:
|
|
if self.__bits[i] != other.__bits[i]:
|
|
return False
|
|
return True
|
|
|
|
def __ne__(self, other: object) -> bool:
|
|
return not self.__eq__(other)
|
|
|
|
def __len__(self) -> int:
|
|
return len(self.__bits)
|
|
|
|
@property
|
|
def bitsSet(self) -> Set[int]:
|
|
return {i for i in self.__bits if self.__bits[i]}
|
|
|
|
|
|
class ByteCodeDecompiler(VerboseOutput):
|
|
def __init__(self, bytecode: ByteCode, optimize: bool = True) -> None:
|
|
super().__init__()
|
|
|
|
self.bytecode = bytecode
|
|
self.optimize = optimize
|
|
self.__statements: Optional[List[Statement]] = None
|
|
self.__tmpvar_id: int = 0
|
|
self.__goto_body_id: int = -1
|
|
self.__insertion_id: int = 0
|
|
|
|
@property
|
|
def statements(self) -> List[Statement]:
|
|
if self.__statements is None:
|
|
raise Exception("Call decompile() first before retrieving statements!")
|
|
return self.__statements
|
|
|
|
def _graph_control_flow(self, bytecode: ByteCode) -> Tuple[List[ByteCodeChunk], Dict[int, int]]:
|
|
# Start by assuming that the whole bytecode never directs flow. This is, confusingly,
|
|
# indexed by AP2Action offset, not by actual bytecode offset, so we can avoid the
|
|
# prickly problem of opcodes that take more than one byte in the data.
|
|
flows: Dict[int, ControlFlow] = {}
|
|
end = len(bytecode.actions)
|
|
beginning = 0
|
|
|
|
# The end of the program.
|
|
flows[end] = ControlFlow(end, end + 1, [])
|
|
|
|
# The rest of the program.
|
|
flows[beginning] = ControlFlow(beginning, end, [end])
|
|
|
|
# Function that helps us find a flow by position.
|
|
def find(opcodeno: int) -> int:
|
|
for start, cf in flows.items():
|
|
if cf.contains(opcodeno):
|
|
return start
|
|
|
|
raise Exception(f"Logic error, offset {opcodeno} somehow not in our control flow graph!")
|
|
|
|
# Now, walk the entire bytecode, and every control flow point split the graph at that point.
|
|
for i, action in enumerate(bytecode.actions):
|
|
current_action = i
|
|
next_action = i + 1
|
|
|
|
if action.opcode in [AP2Action.THROW, AP2Action.RETURN, AP2Action.END]:
|
|
# This should end execution, so we should cap off the current execution
|
|
# and send it to the end.
|
|
current_action_flow = find(current_action)
|
|
next_action_flow = find(next_action)
|
|
|
|
if current_action_flow == next_action_flow:
|
|
# We need to split this on the next_action boundary.
|
|
first, second = flows[current_action_flow].split(next_action)
|
|
first.next_flow = [end]
|
|
|
|
self.vprint(f"{action} action split {flows[current_action_flow]} into {first}, {second}")
|
|
|
|
flows[current_action_flow] = first
|
|
flows[next_action] = second
|
|
|
|
else:
|
|
# This already was split in two, presumably by something
|
|
# earlier in the chain jumping to the opcode after this.
|
|
# We need to unlink the current flow from the second and
|
|
# link it to the end.
|
|
flows[current_action_flow].next_flow = [end]
|
|
|
|
self.vprint(f"{action} action repointed {flows[current_action_flow]} to end")
|
|
elif action.opcode == AP2Action.JUMP:
|
|
# Unconditional control flow redirection after this, we should split the
|
|
# section if necessary and point this section at the new offset.
|
|
# First, we need to find the jump point and make sure that its the start
|
|
# of a section.
|
|
action = cast(JumpAction, action)
|
|
for j, dest in enumerate(bytecode.actions):
|
|
if dest.offset == action.jump_offset:
|
|
dest_action = j
|
|
break
|
|
else:
|
|
if action.jump_offset == bytecode.end_offset:
|
|
dest_action = end
|
|
else:
|
|
raise Exception(f"{action} jumps to an opcode that doesn't exist!")
|
|
|
|
# If the destination action flow already starts with the jump offset,
|
|
# then we're good, we just need to point our current split at this new
|
|
# offset. If it doesn't start with the jump offset, then we need to split
|
|
# that flow so we can point to the opcode directly.
|
|
dest_action_flow = find(dest_action)
|
|
if not flows[dest_action_flow].is_first(dest_action):
|
|
first, second = flows[dest_action_flow].split(dest_action, link=True)
|
|
|
|
self.vprint(f"{action} action required split of {flows[dest_action_flow]} into {first, second}")
|
|
|
|
flows[dest_action_flow] = first
|
|
flows[dest_action] = second
|
|
|
|
# Now, the second is what we want to point at in the next section.
|
|
dest_action_flow = dest_action
|
|
|
|
# Now, we must split the current flow at the point of this jump.
|
|
current_action_flow = find(current_action)
|
|
next_action_flow = find(next_action)
|
|
|
|
if current_action_flow == next_action_flow:
|
|
# We need to split this on the next_action boundary.
|
|
first, second = flows[current_action_flow].split(next_action)
|
|
first.next_flow = [dest_action_flow]
|
|
|
|
self.vprint(f"{action} action split {flows[current_action_flow]} into {first}, {second}")
|
|
|
|
flows[current_action_flow] = first
|
|
flows[next_action] = second
|
|
else:
|
|
# This already was split in two, presumably by something
|
|
# earlier in the chain jumping to the opcode after this.
|
|
# We need to unlink the current flow from the second and
|
|
# link it to the end.
|
|
flows[current_action_flow].next_flow = [dest_action_flow]
|
|
|
|
self.vprint(f"{action} action repointed {flows[current_action_flow]} to new chunk")
|
|
elif action.opcode == AP2Action.IF:
|
|
# Conditional control flow redirection after this, we should split the
|
|
# section if necessary and point this section at the new offset as well
|
|
# as the second half of the split section.
|
|
# First, we need to find the jump point and make sure that its the start
|
|
# of a section.
|
|
action = cast(IfAction, action)
|
|
for j, dest in enumerate(bytecode.actions):
|
|
if dest.offset == action.jump_if_true_offset:
|
|
dest_action = j
|
|
break
|
|
else:
|
|
if action.jump_if_true_offset == bytecode.end_offset:
|
|
dest_action = end
|
|
else:
|
|
raise Exception(f"{action} conditionally jumps to an opcode that doesn't exist!")
|
|
|
|
# If the destination action flow already starts with the jump offset,
|
|
# then we're good, we just need to point our current split at this new
|
|
# offset. If it doesn't start with the jump offset, then we need to split
|
|
# that flow so we can point to the opcode directly.
|
|
dest_action_flow = find(dest_action)
|
|
if not flows[dest_action_flow].is_first(dest_action):
|
|
first, second = flows[dest_action_flow].split(dest_action, link=True)
|
|
|
|
self.vprint(f"{action} action required split of {flows[dest_action_flow]} into {first, second}")
|
|
|
|
flows[dest_action_flow] = first
|
|
flows[dest_action] = second
|
|
|
|
# Now, the second is what we want to point at in the next section.
|
|
dest_action_flow = dest_action
|
|
|
|
# Now, we must split the current flow at the point of this jump.
|
|
current_action_flow = find(current_action)
|
|
next_action_flow = find(next_action)
|
|
|
|
if current_action_flow == next_action_flow:
|
|
# We need to split this on the next_action boundary.
|
|
first, second = flows[current_action_flow].split(next_action)
|
|
first.next_flow = [next_action, dest_action_flow]
|
|
|
|
self.vprint(f"{action} action split {flows[current_action_flow]} into {first}, {second}")
|
|
|
|
flows[current_action_flow] = first
|
|
flows[next_action] = second
|
|
else:
|
|
# This already was split in two, presumably by something
|
|
# earlier in the chain jumping to the opcode after this.
|
|
# We need to unlink the current flow from the second and
|
|
# link it to the end.
|
|
flows[current_action_flow].next_flow = [next_action, dest_action_flow]
|
|
|
|
self.vprint(f"{action} action repointed {flows[current_action_flow]} to new chunk")
|
|
elif action.opcode == AP2Action.IF2:
|
|
# We don't emit this anymore, so this is a problem.
|
|
raise Exception("Logic error, unexpected AP2Action.IF2 opcode which we should not emit in parsing stage!")
|
|
|
|
# Finally, return chunks of contiguous execution.
|
|
chunks: List[ByteCodeChunk] = []
|
|
for start, flow in flows.items():
|
|
if start == end:
|
|
# We don't want to render out the end of the graph, it was only there to make
|
|
# the above algorithm easier. We'll add it back later after we fix up the
|
|
# chunks based on start_offset, which the end chunk would not have on account
|
|
# of containing zero instructions.
|
|
continue
|
|
|
|
next_chunks: List[int] = []
|
|
for ano in flow.next_flow:
|
|
if ano == end:
|
|
next_chunks.append(bytecode.end_offset)
|
|
else:
|
|
next_chunks.append(bytecode.actions[ano].offset)
|
|
chunks.append(ByteCodeChunk(bytecode.actions[flow.beginning].offset, bytecode.actions[flow.beginning:flow.end], next_chunks))
|
|
|
|
# Calculate who points to us as well, for posterity. We can still use chunk.id as
|
|
# the offset of the chunk since we haven't converted yet.
|
|
entries: Dict[int, List[int]] = {}
|
|
for chunk in chunks:
|
|
# We haven't emitted any non-AP2Actions yet, so we are safe in casting here.
|
|
for next_chunk in chunk.next_chunks:
|
|
entries[next_chunk] = entries.get(next_chunk, []) + [chunk.id]
|
|
|
|
for chunk in chunks:
|
|
# We haven't emitted any non-AP2Actions yet, so we are safe in casting here.
|
|
chunk.previous_chunks = entries.get(chunk.id, [])
|
|
|
|
# Now, eliminate any dead code since it will trip us up later. Chunk ID is still the
|
|
# offset of the first entry in the chunk since we haven't assigned IDs yet.
|
|
while True:
|
|
dead_chunk_ids = {c.id for c in chunks if not c.previous_chunks and c.id != bytecode.start_offset}
|
|
if dead_chunk_ids:
|
|
self.vprint(f"Elimitating dead code chunks {', '.join(str(d) for d in dead_chunk_ids)}")
|
|
chunks = [c for c in chunks if c.id not in dead_chunk_ids]
|
|
|
|
for chunk in chunks:
|
|
for c in chunk.next_chunks:
|
|
if c in dead_chunk_ids:
|
|
# Hoo this shouldn't be possible!
|
|
raise Exception(f"Logic error, chunk ID {chunk.id} points at a dead code chunk we're eliminating!")
|
|
chunk.previous_chunks = [c for c in chunk.previous_chunks if c not in dead_chunk_ids]
|
|
else:
|
|
break
|
|
|
|
# Sort by start, so IDs make more sense.
|
|
chunks = sorted(chunks, key=lambda c: c.id)
|
|
|
|
# Now, calculate contiguous IDs for each remaining chunk.
|
|
offset_to_id: Dict[int, int] = {}
|
|
chunk_id: int = 0
|
|
for chunk in chunks:
|
|
# We haven't emitted any non-AP2Actions yet, so we are safe in casting here.
|
|
offset_to_id[chunk.id] = chunk_id
|
|
chunk.id = chunk_id
|
|
|
|
chunk_id += 1
|
|
|
|
end_chunk_id = chunk_id
|
|
offset_to_id[bytecode.end_offset] = end_chunk_id
|
|
|
|
# Now, convert the offsets to chunk ID pointers.
|
|
end_previous_chunks: List[int] = []
|
|
for chunk in chunks:
|
|
if chunk.next_chunks:
|
|
# Normal chunk.
|
|
chunk.next_chunks = [offset_to_id[c] for c in chunk.next_chunks]
|
|
if end_chunk_id in chunk.next_chunks:
|
|
end_previous_chunks.append(chunk.id)
|
|
else:
|
|
# Point this chunk at the end of bytecode sentinel.
|
|
chunk.next_chunks = [end_chunk_id]
|
|
end_previous_chunks.append(chunk.id)
|
|
chunk.previous_chunks = [offset_to_id[c] for c in chunk.previous_chunks]
|
|
|
|
# Add the "return" chunk now that we've converted everything.
|
|
chunks.append(ByteCodeChunk(end_chunk_id, [], [], previous_chunks=end_previous_chunks))
|
|
|
|
# Verify a few invariants about the tree we just created.
|
|
num_start_chunks = 0
|
|
num_end_chunks = 0
|
|
for chunk in chunks:
|
|
if not chunk.next_chunks:
|
|
num_end_chunks += 1
|
|
if not chunk.previous_chunks:
|
|
if bytecode.start_offset is None:
|
|
raise Exception("Logic error, expected a start offset for bytecode chunk, we shouldn't be decompiling empty bytecode!")
|
|
if chunk.id != offset_to_id[bytecode.start_offset]:
|
|
raise Exception(f"Start of graph found at ID {chunk.id} but expected to be {offset_to_id[bytecode.start_offset]}!")
|
|
num_start_chunks += 1
|
|
|
|
if chunk.actions:
|
|
# We haven't done any fixing up, we're guaranteed this is an AP2Action.
|
|
last_action = cast(AP2Action, chunk.actions[-1])
|
|
|
|
if last_action.opcode in [AP2Action.THROW, AP2Action.RETURN, AP2Action.JUMP, AP2Action.END] and len(chunk.next_chunks) != 1:
|
|
raise Exception(f"Chunk ID {chunk.id} has control flow action expecting one next chunk but has {len(chunk.next_chunks)}!")
|
|
if len(chunk.next_chunks) == 2 and last_action.opcode != AP2Action.IF:
|
|
raise Exception(f"Chunk ID {chunk.id} has two next chunks but control flow action is not an if statement!")
|
|
if len(chunk.next_chunks) > 2:
|
|
raise Exception(f"Chunk ID {chunk.id} has more than two next chunks!")
|
|
|
|
# Num start chunks can be 0 (if the start chunk is a loop beginning) or 1 (if its a normal chunk).
|
|
if num_start_chunks > 1:
|
|
raise Exception(f"Found {num_start_chunks} start chunks but expecting at most 1!")
|
|
# Num end chunks can only be 1 as we created an artificial end chunk.
|
|
if num_end_chunks != 1:
|
|
raise Exception(f"Found {num_end_chunks} end chunks but expecting exactly 1!")
|
|
|
|
# Now that we're satisfied with the tree we created, return it.
|
|
return (chunks, offset_to_id)
|
|
|
|
def __get_entry_block(self, chunks: Sequence[ArbitraryCodeChunk]) -> int:
|
|
start_id: Optional[int] = None
|
|
for chunk in chunks:
|
|
if not chunk.previous_chunks:
|
|
if start_id is not None:
|
|
# This should never happen, we have one entrypoint. If we run into
|
|
# this we might need to do dead code analysis and discarding.
|
|
raise Exception("Logic error, more than one start block found!")
|
|
start_id = chunk.id
|
|
|
|
if start_id is None:
|
|
# We should never get to this as we always have at least one entrypoint.
|
|
raise Exception("Logic error, no start block found!")
|
|
return start_id
|
|
|
|
def __compute_dominators(self, start_id: int, chunks: Sequence[ByteCodeChunk]) -> Dict[int, Set[int]]:
|
|
# Compute dominators recursively
|
|
chunklen = len(chunks)
|
|
dominators: Dict[int, BitVector] = {chunk.id: BitVector(chunklen, init=True) for chunk in chunks}
|
|
dominators[start_id].setAllBitsTo(False).setBit(start_id)
|
|
|
|
# Verify that the chunk IDs are contiguous. Otherwise this algorithm fails, since it
|
|
# assigns an integer ID to each bit in a bitfield contiguously.
|
|
for chunk in chunks:
|
|
if chunk.id < 0 or chunk.id >= len(chunks):
|
|
raise Exception("Chunk ID {chunk.id} is outside of our created BitVector, the ID space of chunks is non-contiguous!")
|
|
|
|
changed = True
|
|
while changed:
|
|
changed = False
|
|
|
|
for chunk in chunks:
|
|
if chunk.id == start_id:
|
|
continue
|
|
|
|
for previd in chunk.previous_chunks:
|
|
comparison = dominators[chunk.id].clone()
|
|
dominators[chunk.id].andVector(dominators[previd]).setBit(chunk.id)
|
|
if dominators[chunk.id] != comparison:
|
|
changed = True
|
|
|
|
return {chunk.id: dominators[chunk.id].bitsSet for chunk in chunks}
|
|
|
|
def __analyze_loop_jumps(self, loop: Loop, offset_map: Dict[int, int]) -> Loop:
|
|
# Go through and try to determine which jumps are "break" and "continue" statements based on
|
|
# where they point (to the header or to the exit point). First, let's try to identify all
|
|
# exits, and which one is the break point and which ones are possibly goto statements
|
|
# (break out of multiple loop depths).
|
|
internal_jump_points = {c.id for c in loop.chunks}
|
|
|
|
header_chunks = [c for c in loop.chunks if c.id == loop.id]
|
|
if len(header_chunks) != 1:
|
|
# Should never happen, only one should match ID.
|
|
raise Exception("Logic error, didn't find the header chunk based on Loop ID!")
|
|
header_chunk = header_chunks[0]
|
|
|
|
# Identify external jumps from the header.
|
|
break_points = [i for i in header_chunk.next_chunks if i not in internal_jump_points]
|
|
if len(break_points) > 1:
|
|
# We should not have two exits here, if so this isn't a loop!
|
|
raise Exception("Logic error, loop has more than one next chunk to jump to on break!")
|
|
if not break_points:
|
|
# This might be possible, but I don't know how to deal with it.
|
|
raise Exception("Logic error, loop has no chunk to jump to on break!")
|
|
|
|
# Identify the break and continue jump points.
|
|
break_point = break_points[0]
|
|
continue_point = header_chunk.id
|
|
|
|
self.vprint(f"Loop ID {loop.id} breaks to {break_point} and continues to {continue_point}")
|
|
|
|
# Now, go through each chunk, identify whether it has an if, and fix up the
|
|
# if statements.
|
|
for chunk in loop.chunks:
|
|
if not chunk.next_chunks:
|
|
# All chunks need a next chunk of some type, the only one that doesn't
|
|
# is the end chunk which should never be part of a loop.
|
|
raise Exception(f"Logic error, chunk ID {chunk.id} has no successor and we haven't broken the graph yet!")
|
|
if not isinstance(chunk, ByteCodeChunk):
|
|
# We don't need to fix up loops, we already did this in a previous
|
|
# fixup.
|
|
continue
|
|
|
|
last_action = chunk.actions[-1]
|
|
if isinstance(last_action, AP2Action):
|
|
if last_action.opcode == AP2Action.JUMP:
|
|
# This is either an unconditional break/continue or an
|
|
# internal jump.
|
|
if len(chunk.next_chunks) != 1:
|
|
raise Exception(f"Logic error, chunk ID {chunk.id} has jump control action but {len(chunk.next_chunks)} next chunks!")
|
|
next_chunk = chunk.next_chunks[0]
|
|
|
|
if next_chunk == break_point:
|
|
self.vprint("Converting jump to loop break into break statement.")
|
|
chunk.actions[-1] = BreakStatement()
|
|
chunk.next_chunks = []
|
|
elif next_chunk == continue_point:
|
|
self.vprint("Converting jump to loop continue into continue statement.")
|
|
chunk.actions[-1] = ContinueStatement()
|
|
chunk.next_chunks = []
|
|
elif next_chunk not in internal_jump_points:
|
|
if next_chunk == offset_map[self.bytecode.end_offset]:
|
|
self.vprint("Converting jump to external point into return statement.")
|
|
chunk.actions[-1] = NullReturnStatement()
|
|
else:
|
|
self.vprint("Converting jump to external point into goto statement.")
|
|
chunk.actions[-1] = GotoStatement(next_chunk)
|
|
chunk.next_chunks = []
|
|
continue
|
|
|
|
if last_action.opcode == AP2Action.IF:
|
|
# Calculate true and false jump points.
|
|
true_jump_point, false_jump_point = self.__get_jump_points(chunk, offset_map)
|
|
end_offset = offset_map[self.bytecode.end_offset]
|
|
|
|
# Calculate true and false jump points, see if they are break/continue/goto.
|
|
# Its possible for the true and false jump points to be equal if this is an
|
|
# if statement which jumps to the next line of code in the true case. The below
|
|
# code will still work (it will change both the true and false points to a break,
|
|
# continue or return statement).
|
|
true_action: Optional[Statement] = None
|
|
if true_jump_point == break_point:
|
|
self.vprint("Converting jump if true to loop break into break statement.")
|
|
true_action = BreakStatement()
|
|
chunk.next_chunks = [n for n in chunk.next_chunks if n != true_jump_point]
|
|
elif true_jump_point == continue_point:
|
|
self.vprint("Converting jump if true to loop continue into continue statement.")
|
|
true_action = ContinueStatement()
|
|
chunk.next_chunks = [n for n in chunk.next_chunks if n != true_jump_point]
|
|
elif true_jump_point not in internal_jump_points:
|
|
if true_jump_point == end_offset:
|
|
self.vprint("Converting jump if true to external point into return statement.")
|
|
true_action = NullReturnStatement()
|
|
else:
|
|
self.vprint("Converting jump if true to external point into goto statement.")
|
|
true_action = GotoStatement(true_jump_point)
|
|
chunk.next_chunks = [n for n in chunk.next_chunks if n != true_jump_point]
|
|
|
|
false_action: Optional[Statement] = None
|
|
if false_jump_point == break_point:
|
|
self.vprint("Converting jump if false to loop break into break statement.")
|
|
false_action = BreakStatement()
|
|
chunk.next_chunks = [n for n in chunk.next_chunks if n != false_jump_point]
|
|
elif false_jump_point == continue_point:
|
|
self.vprint("Converting jump if false to loop continue into continue statement.")
|
|
false_action = ContinueStatement()
|
|
chunk.next_chunks = [n for n in chunk.next_chunks if n != false_jump_point]
|
|
elif false_jump_point not in internal_jump_points:
|
|
if false_jump_point == end_offset:
|
|
self.vprint("Converting jump if false to external point into return statement.")
|
|
false_action = NullReturnStatement()
|
|
else:
|
|
self.vprint("Converting jump if false to external point into goto statement.")
|
|
false_action = GotoStatement(false_jump_point)
|
|
chunk.next_chunks = [n for n in chunk.next_chunks if n != false_jump_point]
|
|
|
|
if true_action is None and false_action is None:
|
|
# This is an internal-only if statement, we don't care. We will handle it in
|
|
# a later if logic step.
|
|
continue
|
|
|
|
chunk.actions[-1] = IntermediateIf(
|
|
cast(IfAction, last_action),
|
|
[true_action] if true_action else [],
|
|
[false_action] if false_action else [],
|
|
)
|
|
|
|
if last_action.opcode in [AP2Action.RETURN, AP2Action.THROW, AP2Action.END]:
|
|
if len(chunk.next_chunks) != 1:
|
|
raise Exception(f"Logic error, chunkd ID {chunk.id} returns, throws or end to multiple blocks!")
|
|
if chunk.next_chunks[0] != offset_map[self.bytecode.end_offset]:
|
|
raise Exception(f"Expected chunk ID {chunk.id} to jump to return block but jumped elsewhere!")
|
|
# We will convert this later.
|
|
self.vprint("Severing link to return address.")
|
|
chunk.next_chunks = []
|
|
|
|
# At this point, all chunks in our list should point only to other chunks in our list.
|
|
for chunk in loop.chunks:
|
|
for n in chunk.next_chunks:
|
|
if n not in internal_jump_points:
|
|
raise Exception(f"Found unconverted next chunk {n} in chunk ID {chunk.id}, for loop ID {loop.id} with break point {break_point}!")
|
|
if isinstance(chunk, ByteCodeChunk):
|
|
last_action = chunk.actions[-1]
|
|
if isinstance(last_action, AP2Action):
|
|
if last_action.opcode == AP2Action.IF and len(chunk.next_chunks) != 2:
|
|
raise Exception(f"Somehow messed up the next pointers on if statement in chunk ID {chunk.id}!")
|
|
if last_action.opcode == AP2Action.JUMP and len(chunk.next_chunks) != 1:
|
|
raise Exception(f"Somehow messed up the next pointers on control flow statement in chunk ID {chunk.id}!")
|
|
if last_action.opcode in [AP2Action.RETURN, AP2Action.THROW, AP2Action.END] and len(chunk.next_chunks) != 0:
|
|
raise Exception(f"Somehow messed up the next pointers on control flow statement in chunk ID {chunk.id}!")
|
|
else:
|
|
if len(chunk.next_chunks) > 1:
|
|
raise Exception(f"Somehow messed up the next pointers on converted statement in chunk ID {chunk.id}!")
|
|
|
|
# Now, we have converted all external jumps to either break or goto, so we don't
|
|
# need to keep track of the next chunk aside from the break location. We know this
|
|
# is the correct location to break form in normal circumstances because we verified
|
|
# it above.
|
|
loop.next_chunks = [break_point]
|
|
|
|
return loop
|
|
|
|
def __separate_loops(
|
|
self,
|
|
start_id: int,
|
|
chunks: Sequence[ByteCodeChunk],
|
|
dominators: Dict[int, Set[int]],
|
|
offset_map: Dict[int, int],
|
|
) -> List[Union[ByteCodeChunk, Loop]]:
|
|
chunks_by_id: Dict[int, Union[ByteCodeChunk, Loop]] = {chunk.id: chunk for chunk in chunks}
|
|
|
|
# Go through and gather up all loops in the chunks.
|
|
loops: Dict[int, Set[int]] = {}
|
|
for chunk in chunks:
|
|
for nextid in chunk.next_chunks:
|
|
# If this next chunk dominates us, then that means we found a loop.
|
|
if nextid in dominators[chunk.id]:
|
|
# Calculate the blocks that are in this loop.
|
|
header = nextid
|
|
tail = chunk.id
|
|
blocks = {header}
|
|
|
|
# If we don't already have a loop of one block,
|
|
# we need to walk backwards to find all blocks in this
|
|
# loop.
|
|
if header != tail:
|
|
blocks.add(tail)
|
|
blocks_to_examine = [tail]
|
|
|
|
while blocks_to_examine:
|
|
block = blocks_to_examine.pop()
|
|
for predecessor in chunks_by_id[block].previous_chunks:
|
|
if predecessor not in blocks:
|
|
blocks.add(predecessor)
|
|
blocks_to_examine.append(predecessor)
|
|
|
|
self.vprint(f"Found loop with header {header} and blocks {', '.join(str(b) for b in blocks)}.")
|
|
|
|
# Now, make sure we scoop up any remaining if/else bodies not found in the backwards walk.
|
|
changed: bool = True
|
|
while changed:
|
|
changed = False
|
|
|
|
for b in blocks:
|
|
# Explicitly exclude the header here, as it will only point at the break
|
|
# location which will usually pass the following dominator test.
|
|
if b == header:
|
|
continue
|
|
add_id: Optional[int] = None
|
|
for cid, doms in dominators.items():
|
|
if dominators[b] == doms - {cid} and cid not in blocks and cid != header:
|
|
add_id = cid
|
|
break
|
|
if add_id is not None:
|
|
self.vprint(f"Chunk {cid} should be included in loop list!")
|
|
blocks.add(add_id)
|
|
changed = True
|
|
break
|
|
|
|
# We found a loop!
|
|
if header in loops:
|
|
raise Exception(f"Logic error, loop with header {header} was already found!")
|
|
loops[header] = blocks
|
|
|
|
# Now, we need to reduce our list of chunks down to non-loops only. We do this
|
|
# by recursively trying to find inner loops until we find a loop that has no
|
|
# inner loops, and converting that. Once we do that, we remove the chunks from
|
|
# our list, add it to that new loop, and convert all other loops that might
|
|
# reference it to point at the loop instead.
|
|
deleted_chunks: Set[int] = set()
|
|
while loops:
|
|
delete_header: Optional[int] = None
|
|
delete_blocks: Set[int] = set()
|
|
for header, blocks in loops.items():
|
|
# See if any of the blocks in this loop are the header of any other loop.
|
|
for block in blocks:
|
|
if block in loops and loops[block] is not blocks:
|
|
# This particular block of code is the header of another loop,
|
|
# so we shouldn't convert this loop until we handle the inner
|
|
# loop.
|
|
self.vprint(f"Skipping loop with header {header} for now because it contains another unconverted loop with header {block}.")
|
|
break
|
|
else:
|
|
# This loop does not contain any loops of its own. It is safe to
|
|
# convert.
|
|
self.vprint(f"Converting loop with header {header} and blocks {', '.join(str(b) for b in blocks)}.")
|
|
new_loop = Loop(header, [chunks_by_id[i] for i in blocks])
|
|
|
|
# Eliminate jumps that are to the beginning/end of the loop to
|
|
# make if statement detection later on easier. This also breaks
|
|
# the graph at any spot where we successfully converted a jump
|
|
# to a break/continue/goto.
|
|
new_loop = self.__analyze_loop_jumps(new_loop, offset_map)
|
|
if len(new_loop.next_chunks) != 1:
|
|
raise Exception(f"Newly created loop ID {new_loop.id} has more than one exit point!")
|
|
chunks_by_id[header] = new_loop
|
|
|
|
# These blocks are now part of the loop, so we need to remove them
|
|
# from the IDed chunks as well as from existing loops.
|
|
delete_blocks = {block for block in blocks if block != header}
|
|
delete_header = header
|
|
break
|
|
|
|
if delete_header is None:
|
|
# We must find at LEAST one loop that has no inner loops of its own.
|
|
raise Exception("Logic error, we found no fixable loops, yet have at least one loop to fix up!")
|
|
|
|
# Remove this loop from the processing list
|
|
del loops[delete_header]
|
|
|
|
# Go through and remove the rest of the chunks from the rest of the loops
|
|
loops = {header: {block for block in blocks if block not in delete_blocks} for (header, blocks) in loops.items()}
|
|
|
|
# Also remove the rest of the chunks from our IDed chunks as they are part of this loop now.
|
|
for block in delete_blocks:
|
|
del chunks_by_id[block]
|
|
|
|
# Verify that we don't have any existing chunks that point at the non-header portion of the loop.
|
|
for chunk_id, chunk_or_loop in chunks_by_id.items():
|
|
for nextid in chunk_or_loop.next_chunks:
|
|
if nextid in delete_blocks:
|
|
# Woah, we point at a chunk inside this loop that isn't the header!
|
|
raise Exception(f"Logic error, chunkd ID {chunk_id} points into loop ID {delete_header} body!")
|
|
|
|
# Update our master list of chunks we deleted.
|
|
deleted_chunks.update(delete_blocks)
|
|
|
|
# Finally, construct our new list of chunks and verify that we didn't accidentally keep any that we shouldn't have.
|
|
updated_chunks = [chunks_by_id[i] for i in chunks_by_id]
|
|
for new_chunk in updated_chunks:
|
|
if new_chunk.id in deleted_chunks:
|
|
raise Exception(f"Chunk ID {new_chunk.id} in list of chunks we converted but we expected it to be deleted!")
|
|
return updated_chunks
|
|
|
|
def __get_jump_points(self, chunk: ByteCodeChunk, offset_map: Dict[int, int]) -> Tuple[int, int]:
|
|
action = chunk.actions[-1]
|
|
|
|
if isinstance(action, IfAction):
|
|
true_jump_point = offset_map[action.jump_if_true_offset]
|
|
false_jump_points = [n for n in chunk.next_chunks if n != true_jump_point]
|
|
if len(false_jump_points) != 1:
|
|
if chunk.next_chunks[0] != chunk.next_chunks[1]:
|
|
raise Exception(f"Logic error, got more than one false jump point for if statement in chunk {chunk.id}")
|
|
else:
|
|
false_jump_point = true_jump_point
|
|
else:
|
|
false_jump_point = false_jump_points[0]
|
|
|
|
return true_jump_point, false_jump_point
|
|
else:
|
|
raise Exception(f"Logic error, expecting JumpAction but got {action} in chunk {chunk.id}!")
|
|
|
|
def __break_graph(self, chunks: Sequence[Union[ByteCodeChunk, Loop]], offset_map: Dict[int, int]) -> None:
|
|
for chunk in chunks:
|
|
if chunk.id == offset_map[self.bytecode.end_offset]:
|
|
# Don't examine the sentinel we keep around as a jump point for returns.
|
|
continue
|
|
|
|
if isinstance(chunk, Loop):
|
|
self.vprint(f"Entering into loop {chunk.id} to break graph...")
|
|
|
|
# At this point, we know chunk.chunks is a Union[ByteCodeChunk, Loop] because we haven't run
|
|
# any if detection yet.
|
|
self.__break_graph(cast(List[Union[ByteCodeChunk, Loop]], chunk.chunks), offset_map)
|
|
else:
|
|
# Examine the last instruction.
|
|
last_action = chunk.actions[-1]
|
|
if isinstance(last_action, AP2Action):
|
|
if last_action.opcode in [AP2Action.THROW, AP2Action.RETURN, AP2Action.END]:
|
|
# The last action already dictates what we should do here. Break
|
|
# the chain at this point.
|
|
self.vprint(f"Breaking chain on {chunk.id} because it is a {last_action}.")
|
|
chunk.next_chunks = []
|
|
elif len(chunk.next_chunks) == 1 and chunk.next_chunks[0] == offset_map[self.bytecode.end_offset]:
|
|
# The jump point for this is the end of the function. If it is a jump,
|
|
# then we should replace it with a return. If it is not a jump, we should
|
|
# add a return.
|
|
if last_action.opcode == AP2Action.JUMP:
|
|
self.vprint(f"Converting jump to end of code in {chunk.id} into a null return.")
|
|
chunk.actions[-1] = NullReturnStatement()
|
|
else:
|
|
if last_action.opcode == AP2Action.IF:
|
|
raise Exception(f"Logic error, unexpected if statement with only one successor in {chunk.id}!")
|
|
self.vprint(f"Converting fall-through to end of code in {chunk.id} into a null return.")
|
|
chunk.actions.append(NullReturnStatement())
|
|
chunk.next_chunks = []
|
|
elif len(chunk.next_chunks) == 2:
|
|
if last_action.opcode != AP2Action.IF:
|
|
raise Exception(f"Logic error, expected if statement with two successors in {chunk.id}!")
|
|
|
|
# This is an if statement, let's see if any of the arms point to a return.
|
|
true_jump_point, false_jump_point = self.__get_jump_points(chunk, offset_map)
|
|
end_offset = offset_map[self.bytecode.end_offset]
|
|
|
|
# It's possible for the true and false jump point to be equal, if the code being
|
|
# decompiled has not been optimized. The below code will produce the correct
|
|
# result for this case (true and false cases both containing the same return).
|
|
true_action: Optional[Statement] = None
|
|
if true_jump_point == end_offset:
|
|
self.vprint(f"Converting jump if true to external point into return statement in {chunk.id}.")
|
|
true_action = NullReturnStatement()
|
|
chunk.next_chunks = [c for c in chunk.next_chunks if c != true_jump_point]
|
|
|
|
false_action: Optional[Statement] = None
|
|
if false_jump_point == end_offset:
|
|
self.vprint(f"Converting jump if false to external point into return statement in {chunk.id}.")
|
|
false_action = NullReturnStatement()
|
|
chunk.next_chunks = [c for c in chunk.next_chunks if c != false_jump_point]
|
|
|
|
if true_action or false_action:
|
|
chunk.actions[-1] = IntermediateIf(
|
|
cast(IfAction, last_action),
|
|
[true_action] if true_action else [],
|
|
[false_action] if false_action else [],
|
|
)
|
|
|
|
def __find_shallowest_successor(self, start_chunk: int, chunks_by_id: Dict[int, ArbitraryCodeChunk]) -> Optional[int]:
|
|
if len(chunks_by_id[start_chunk].next_chunks) != 2:
|
|
# We don't care about this, the successor is the next chunk!
|
|
raise Exception("Logic error!")
|
|
|
|
left, right = chunks_by_id[start_chunk].next_chunks
|
|
visited: Set[int] = set()
|
|
|
|
# First, let's find all the successors to the left side.
|
|
candidates: List[int] = [left] if left in chunks_by_id else []
|
|
while candidates:
|
|
for candidate in candidates:
|
|
visited.add(candidate)
|
|
|
|
new_candidates = []
|
|
for candidate in candidates:
|
|
# We can avoid re-traversing what we've already traversed, as we only want to color
|
|
# in the part of the tree that we're interested in. We are also not interested in
|
|
# goto/return/throw statements as they should be treated the same as not finding an
|
|
# end.
|
|
new_candidates.extend([c for c in chunks_by_id[candidate].next_chunks if c not in visited and c in chunks_by_id])
|
|
candidates = new_candidates
|
|
|
|
# Now, lets do the same with the right, and the first one we encounter that's visited is our guy.
|
|
candidates = [right] if right in chunks_by_id else []
|
|
while candidates:
|
|
possible_candidates = {c for c in candidates if c in visited}
|
|
if len(possible_candidates) == 1:
|
|
return possible_candidates.pop()
|
|
if len(possible_candidates) > 1:
|
|
# This shouldn't be possible, I don't think? Let's enforce it as an invariant because I don't know what it means if this happens.
|
|
raise Exception(f"Logic error, found too many candidates {possible_candidates} as shallowest successor to {start_chunk}!")
|
|
|
|
new_candidates = []
|
|
for candidate in candidates:
|
|
# We can't take the same shortcut here as above, as we are trying to ask the question
|
|
# of what's the shallowest successor, not color them in.
|
|
new_candidates.extend([c for c in chunks_by_id[candidate].next_chunks if c in chunks_by_id])
|
|
candidates = new_candidates
|
|
|
|
# If we didn't find a successor, that means one of the control paths leads to end of execution.
|
|
return None
|
|
|
|
def __gather_chunks(self, start_chunk: int, end_chunk: Optional[int], chunks_by_id: Dict[int, ArbitraryCodeChunk]) -> List[ArbitraryCodeChunk]:
|
|
# Gather all chunks starting with the start_chunk, walking the tree until we hit
|
|
# end_chunk. Return all chunks in that walk up to but not including the end_chunk.
|
|
# If end_chunk is None, then just walk the tree until we hit the end, including all
|
|
# of those nodes. Note that if some chunks point at ndes we don't have in our
|
|
# chunks_by_id map, we assume they are goto/return/throw statements and ignore them.
|
|
|
|
visited: Set[int] = set()
|
|
chunks: List[ArbitraryCodeChunk] = []
|
|
candidates: List[int] = [start_chunk]
|
|
|
|
while candidates:
|
|
first_candidate = candidates.pop()
|
|
if first_candidate in visited or first_candidate not in chunks_by_id:
|
|
# We already visited this node, no need to include it or its children
|
|
# twice, or the node isn't in our list of nodes to gather (its a goto/
|
|
# return/throw) and we don't care to try to grab it.
|
|
continue
|
|
|
|
if end_chunk is None or first_candidate != end_chunk:
|
|
chunks.append(chunks_by_id[first_candidate])
|
|
visited.add(first_candidate)
|
|
candidates.extend(chunks_by_id[first_candidate].next_chunks)
|
|
|
|
# The chunk list is all chunks that belong in this sequence. Now, kill any pointers to the end chunk.
|
|
for chunk in chunks:
|
|
if chunk.id == start_chunk:
|
|
# This is safe to do because we've already encapsulated loops into Loop structures and broken
|
|
# their chains. So we break this in order to find it again as the start chunk later.
|
|
chunk.previous_chunks = []
|
|
|
|
# Make sure we have one and only one start chunk.
|
|
num_start_chunks: int = 0
|
|
for chunk in chunks:
|
|
if not chunk.previous_chunks:
|
|
num_start_chunks += 1
|
|
if chunks and num_start_chunks != 1:
|
|
# We're allowed to gather zero chunks (say an if with no else), but if we gather at least one
|
|
# chunk, we should better have one and only one start to the flow.
|
|
raise Exception(f"Logic error, splitting chunks by start chunk {start_chunk} should leave us with one start, but we got {num_start_chunks}!")
|
|
|
|
return chunks
|
|
|
|
def __separate_ifs(self, start_id: int, end_id: Optional[int], chunks: Sequence[ArbitraryCodeChunk], offset_map: Dict[int, int]) -> List[ArbitraryCodeChunk]:
|
|
chunks_by_id: Dict[int, ArbitraryCodeChunk] = {chunk.id: chunk for chunk in chunks}
|
|
cur_id = start_id
|
|
|
|
self.vprint(f"Separating if statements out of graph starting at {start_id}")
|
|
|
|
while True:
|
|
cur_chunk = chunks_by_id[cur_id]
|
|
if isinstance(cur_chunk, Loop):
|
|
self.vprint(f"Examining loop {cur_chunk.id} body for if statements...")
|
|
cur_chunk.chunks = self.__separate_ifs(cur_chunk.id, None, cur_chunk.chunks, offset_map)
|
|
self.vprint(f"Finished examining loop {cur_chunk.id} body for if statements...")
|
|
|
|
# Filter out anything pointing at the end chunk, since we know that's where we will end up
|
|
# when we leave this if statement anyway. Don't do this for if statements as we need to
|
|
# preserve the jump point in that case.
|
|
if len(chunks_by_id[cur_id].next_chunks) == 1:
|
|
chunks_by_id[cur_id].next_chunks = [c for c in chunks_by_id[cur_id].next_chunks if c != end_id]
|
|
|
|
if not chunks_by_id[cur_id].next_chunks:
|
|
# We're done!
|
|
break
|
|
|
|
if len(chunks_by_id[cur_id].next_chunks) == 1:
|
|
if not isinstance(cur_chunk, ByteCodeChunk):
|
|
# This is an already-handled loop or if, don't bother checking for
|
|
# if-goto patterns.
|
|
next_id = chunks_by_id[cur_id].next_chunks[0]
|
|
if next_id not in chunks_by_id:
|
|
# We need to go to the next chunk, but we don't own it. Convert it to a goto.
|
|
if isinstance(cur_chunk, Loop):
|
|
self.vprint(f"Loop ID {cur_id} needs a goto outside of this if.")
|
|
cur_chunk.post_statements.append(GotoStatement(next_id))
|
|
chunks_by_id[cur_id].next_chunks = []
|
|
break
|
|
else:
|
|
raise Exception(f"Logic error, we can't jump to chunk {next_id} for if {cur_id} as it is outside of our scope!")
|
|
|
|
cur_id = next_id
|
|
continue
|
|
|
|
last_action = cur_chunk.actions[-1]
|
|
if isinstance(last_action, IfAction):
|
|
raise Exception(f"Logic error, IfAction with only one child in chunk {cur_chunk}!")
|
|
|
|
next_id = chunks_by_id[cur_id].next_chunks[0]
|
|
if isinstance(last_action, AP2Action) and last_action.opcode in [AP2Action.THROW, AP2Action.RETURN, AP2Action.END, AP2Action.JUMP]:
|
|
if next_id not in chunks_by_id:
|
|
# This is just a goto/chunk, move on to the next one.
|
|
self.vprint(f"Chunk ID {cur_id} is a goto outside of this if.")
|
|
chunks_by_id[cur_id].next_chunks = []
|
|
break
|
|
|
|
else:
|
|
if next_id not in chunks_by_id:
|
|
# We need to go to the next chunk, but we don't own it. Convert it to a goto.
|
|
self.vprint(f"Chunk ID {cur_id} needs a goto outside of this if.")
|
|
cur_chunk.actions.append(GotoStatement(next_id))
|
|
chunks_by_id[cur_id].next_chunks = []
|
|
break
|
|
|
|
cur_id = next_id
|
|
continue
|
|
|
|
if not isinstance(cur_chunk, ByteCodeChunk):
|
|
# We should only be looking at bytecode chunks at this point, all other
|
|
# types should have a single next chunk.
|
|
raise Exception(f"Logic error, found converted Loop or If chunk {cur_chunk.id} with multiple successors!")
|
|
|
|
if len(chunks_by_id[cur_id].next_chunks) != 2:
|
|
# This needs to be an if statement.
|
|
raise Exception(f"Logic error, expected 2 successors but got {len(chunks_by_id[cur_id].next_chunks)} in chunk {cur_chunk.id}!")
|
|
last_action = cur_chunk.actions[-1]
|
|
if not isinstance(last_action, IfAction):
|
|
# This needs, again, to be an if statement.
|
|
raise Exception("Logic error, only IfActions can have multiple successors in chunk {cur_chunk.id}!")
|
|
|
|
# This should be an if statement. Figure out if it is an if-else or an
|
|
# if, and if both branches return.
|
|
if_end = self.__find_shallowest_successor(cur_id, chunks_by_id)
|
|
true_jump_point, false_jump_point = self.__get_jump_points(cur_chunk, offset_map)
|
|
if true_jump_point == false_jump_point:
|
|
# This is an optimized-away if statement, render it out as an empty intermediate If
|
|
# and set the jump point to the next location.
|
|
self.vprint(f"Chunk ID {cur_id} is an empty if statement")
|
|
chunks_by_id[cur_id].next_chunks = [true_jump_point]
|
|
cur_chunk.actions[-1] = IntermediateIf(
|
|
last_action,
|
|
[],
|
|
[],
|
|
)
|
|
|
|
next_id = chunks_by_id[cur_id].next_chunks[0]
|
|
if next_id not in chunks_by_id:
|
|
# We need to go to the next chunk, but we don't own it. Convert it to a goto.
|
|
self.vprint(f"Chunk ID {cur_id} needs a goto after empty if.")
|
|
cur_chunk.actions.append(GotoStatement(next_id))
|
|
chunks_by_id[cur_id].next_chunks = []
|
|
break
|
|
|
|
cur_id = next_id
|
|
continue
|
|
|
|
self.vprint(f"Chunk ID {cur_id} is an if statement with true node {true_jump_point} and false node {false_jump_point} and ending at {if_end}")
|
|
|
|
true_chunks: List[ArbitraryCodeChunk] = []
|
|
if true_jump_point not in chunks_by_id and true_jump_point != if_end:
|
|
self.vprint(f"If statement true jump point {true_jump_point} is a goto!")
|
|
true_chunks.append(ByteCodeChunk(self.__goto_body_id, [GotoStatement(true_jump_point)]))
|
|
self.__goto_body_id -= 1
|
|
elif true_jump_point not in {if_end, end_id}:
|
|
self.vprint(f"Gathering true path starting with {true_jump_point} and ending with {if_end} and detecting if statements within it as well.")
|
|
|
|
# First, grab all the chunks in this if statement body.
|
|
true_chunks = self.__gather_chunks(true_jump_point, if_end, chunks_by_id)
|
|
self.vprint(f"True chunks are {', '.join(str(c.id) for c in true_chunks)}")
|
|
|
|
# Delete these chunks from our chunk mapping since we're putting them in an if body.
|
|
for chunk in true_chunks:
|
|
del chunks_by_id[chunk.id]
|
|
|
|
# Now, recursively attempt to detect if statements within this chunk as well.
|
|
true_chunks = self.__separate_ifs(true_jump_point, if_end if if_end is not None else end_id, true_chunks, offset_map)
|
|
|
|
false_chunks: List[ArbitraryCodeChunk] = []
|
|
if false_jump_point not in chunks_by_id and false_jump_point != if_end:
|
|
self.vprint(f"If statement false jump point {false_jump_point} is a goto!")
|
|
false_chunks.append(ByteCodeChunk(self.__goto_body_id, [GotoStatement(false_jump_point)]))
|
|
self.__goto_body_id -= 1
|
|
elif false_jump_point not in {if_end, end_id}:
|
|
self.vprint(f"Gathering false path starting with {false_jump_point} and ending with {if_end} and detecting if statements within it as well.")
|
|
|
|
# First, grab all the chunks in this if statement body.
|
|
false_chunks = self.__gather_chunks(false_jump_point, if_end, chunks_by_id)
|
|
self.vprint(f"False chunks are {', '.join(str(c.id) for c in false_chunks)}")
|
|
|
|
# Delete these chunks from our chunk mapping since we're putting them in an if body.
|
|
for chunk in false_chunks:
|
|
del chunks_by_id[chunk.id]
|
|
|
|
# Now, recursively attempt to detect if statements within this chunk as well.
|
|
false_chunks = self.__separate_ifs(false_jump_point, if_end if if_end is not None else end_id, false_chunks, offset_map)
|
|
|
|
if (not true_chunks) and (not false_chunks):
|
|
# We should have at least one!
|
|
raise Exception("Logic error, if statement has no code for if or else!")
|
|
|
|
# Lets use a brand new ID here for easier traversal and so we don't accidentally
|
|
# reuse the ID of one of our parents if a jump point is a goto.
|
|
if_id = self.__goto_body_id
|
|
self.__goto_body_id -= 1
|
|
|
|
# Add a new if body that this current chunk points to. At this point, chunks_by_id contains
|
|
# none of the chunks in the true or false bodies of the if, so we add it back to the graph
|
|
# in the form of an IfBody.
|
|
self.vprint(f"Created new IfBody for chunk {cur_id} to point at, ending at {if_id}")
|
|
chunks_by_id[if_id] = IfBody(if_id, true_chunks, false_chunks, if_end, cur_id)
|
|
chunks_by_id[cur_id].next_chunks = [if_id]
|
|
|
|
if if_end is not None:
|
|
# Skip over the if, we already analyzed it.
|
|
cur_id = if_end
|
|
else:
|
|
# This if statement encompases all the rest of the statements, we're done.
|
|
break
|
|
|
|
self.vprint(f"Finished separating if statements out of graph starting at {start_id}")
|
|
return [c for _, c in chunks_by_id.items()]
|
|
|
|
def __check_graph(self, start_id: int, chunks: Sequence[ArbitraryCodeChunk]) -> List[ArbitraryCodeChunk]:
|
|
# Recursively go through and verify that all entries to the graph have only one link.
|
|
# Also, clean up the graph.
|
|
chunks_by_id: Dict[int, ArbitraryCodeChunk] = {chunk.id: chunk for chunk in chunks}
|
|
new_chunks: List[ArbitraryCodeChunk] = []
|
|
|
|
while True:
|
|
cur_chunk = chunks_by_id[start_id]
|
|
|
|
# First, clean up any code in chunks that contain other chunks.
|
|
if isinstance(cur_chunk, Loop):
|
|
# Clean up the loop's chunks
|
|
self.vprint(f"Cleaning up graph of Loop {cur_chunk.id}")
|
|
cur_chunk.chunks = self.__check_graph(cur_chunk.id, cur_chunk.chunks)
|
|
elif isinstance(cur_chunk, IfBody):
|
|
# Clean up the if's chunks
|
|
if cur_chunk.true_chunks:
|
|
self.vprint(f"Cleaning up graph of IfBody {cur_chunk.id} true case")
|
|
true_start = self.__get_entry_block(cur_chunk.true_chunks)
|
|
cur_chunk.true_chunks = self.__check_graph(true_start, cur_chunk.true_chunks)
|
|
if cur_chunk.false_chunks:
|
|
self.vprint(f"Cleaning up graph of IfBody {cur_chunk.id} false case")
|
|
false_start = self.__get_entry_block(cur_chunk.false_chunks)
|
|
cur_chunk.false_chunks = self.__check_graph(false_start, cur_chunk.false_chunks)
|
|
|
|
# Now, check to make sure that we have only one exit pointer.
|
|
num_exits = len(cur_chunk.next_chunks)
|
|
if num_exits > 1:
|
|
raise Exception("Logic error!")
|
|
|
|
# Now, we know this chunk is visited, so we can keep it.
|
|
new_chunks.append(cur_chunk)
|
|
|
|
# Finally, bail if we've hit the end of the list.
|
|
if num_exits == 0:
|
|
break
|
|
|
|
# Go to the next one!
|
|
start_id = cur_chunk.next_chunks[0]
|
|
|
|
# Return the tree, stripped of all dead code (most likely just the return sentinel).
|
|
return new_chunks
|
|
|
|
def __eval_stack(self, chunk: ByteCodeChunk, stack: List[Any], offset_map: Dict[int, int]) -> Tuple[List[ConvertedAction], List[Any]]:
|
|
# Make a copy of the stack so we can safely modify it ourselves.
|
|
stack = [s for s in stack]
|
|
|
|
def make_if_expr(action: IfAction) -> IfExpr:
|
|
if action.comparison == IfAction.COMP_IS_UNDEFINED:
|
|
conditional = stack.pop()
|
|
return IsUndefinedIf(conditional)
|
|
elif action.comparison == IfAction.COMP_IS_NOT_UNDEFINED:
|
|
conditional = stack.pop()
|
|
return IsUndefinedIf(conditional).invert()
|
|
elif action.comparison == IfAction.COMP_IS_TRUE:
|
|
conditional = stack.pop()
|
|
return IsBooleanIf(conditional)
|
|
elif action.comparison == IfAction.COMP_IS_FALSE:
|
|
conditional = stack.pop()
|
|
return IsBooleanIf(conditional).invert()
|
|
elif action.comparison in [
|
|
IfAction.COMP_EQUALS,
|
|
IfAction.COMP_NOT_EQUALS,
|
|
IfAction.COMP_STRICT_EQUALS,
|
|
IfAction.COMP_STRICT_NOT_EQUALS,
|
|
IfAction.COMP_LT,
|
|
IfAction.COMP_GT,
|
|
IfAction.COMP_LT_EQUALS,
|
|
IfAction.COMP_GT_EQUALS
|
|
]:
|
|
conditional2 = stack.pop()
|
|
conditional1 = stack.pop()
|
|
comp = {
|
|
IfAction.COMP_EQUALS: TwoParameterIf.EQUALS,
|
|
IfAction.COMP_NOT_EQUALS: TwoParameterIf.NOT_EQUALS,
|
|
IfAction.COMP_STRICT_EQUALS: TwoParameterIf.STRICT_EQUALS,
|
|
IfAction.COMP_STRICT_NOT_EQUALS: TwoParameterIf.STRICT_NOT_EQUALS,
|
|
IfAction.COMP_LT: TwoParameterIf.LT,
|
|
IfAction.COMP_GT: TwoParameterIf.GT,
|
|
IfAction.COMP_LT_EQUALS: TwoParameterIf.LT_EQUALS,
|
|
IfAction.COMP_GT_EQUALS: TwoParameterIf.GT_EQUALS,
|
|
}[action.comparison]
|
|
|
|
return TwoParameterIf(conditional1, comp, conditional2)
|
|
elif action.comparison in [IfAction.COMP_BITAND, IfAction.COMP_NOT_BITAND]:
|
|
conditional2 = stack.pop()
|
|
conditional1 = stack.pop()
|
|
comp = TwoParameterIf.NOT_EQUALS if action.comparison == IfAction.COMP_BITAND else TwoParameterIf.EQUALS
|
|
|
|
return TwoParameterIf(
|
|
ArithmeticExpression(conditional1, "&", conditional2),
|
|
comp,
|
|
0,
|
|
)
|
|
else:
|
|
raise Exception(f"Logic error, unknown if action {action}!")
|
|
|
|
for i in range(len(chunk.actions)):
|
|
action = chunk.actions[i]
|
|
|
|
if isinstance(action, PushAction):
|
|
for obj in action.objects:
|
|
stack.append(obj)
|
|
|
|
chunk.actions[i] = NopStatement()
|
|
continue
|
|
|
|
if isinstance(action, DefineFunction2Action):
|
|
decompiler = ByteCodeDecompiler(action.body)
|
|
decompiler.decompile(verbose=self.verbose)
|
|
|
|
if action.name:
|
|
# This defines a global function, so it won't go on the stack.
|
|
chunk.actions[i] = SetVariableStatement(action.name, NewFunction(action.flags, decompiler))
|
|
else:
|
|
# This defines a function object, most likely for attaching to a member of an object.
|
|
stack.append(NewFunction(action.flags, decompiler))
|
|
chunk.actions[i] = NopStatement()
|
|
|
|
continue
|
|
|
|
if isinstance(action, GotoFrame2Action):
|
|
after: Statement
|
|
|
|
if action.stop:
|
|
after = StopMovieStatement()
|
|
else:
|
|
after = PlayMovieStatement()
|
|
|
|
frame = stack.pop()
|
|
if action.additional_frames:
|
|
frame = ArithmeticExpression(frame, '+', action.additional_frames)
|
|
|
|
chunk.actions[i] = MultiAction([
|
|
GotoFrameStatement(frame),
|
|
after,
|
|
])
|
|
continue
|
|
|
|
if isinstance(action, StoreRegisterAction):
|
|
# This one's fun, because a store register can generate zero or more statements.
|
|
# So we need to expand the stack. But we can't mid-iteration without a lot of
|
|
# shenanigans, so we instead invent a new type of ConvertedAction that can contain
|
|
# multiple statements.
|
|
set_value = stack.pop()
|
|
if action.preserve_stack:
|
|
# If we are only initializing one register, put the register back
|
|
# on the stack instead of the value, to make decompiled output
|
|
# better. This helps a lot when we initialize to a function call return.
|
|
if len(action.registers) == 1:
|
|
stack.append(action.registers[0])
|
|
else:
|
|
stack.append(set_value)
|
|
|
|
store_actions: List[StoreRegisterStatement] = []
|
|
|
|
for reg in action.registers:
|
|
store_actions.append(StoreRegisterStatement(reg, set_value))
|
|
|
|
chunk.actions[i] = MultiAction(store_actions)
|
|
continue
|
|
|
|
if isinstance(action, InitRegisterAction):
|
|
# Same as the above statement, but we are initializing to UNDEFINED.
|
|
init_actions: List[StoreRegisterStatement] = []
|
|
|
|
for reg in action.registers:
|
|
init_actions.append(StoreRegisterStatement(reg, UNDEFINED))
|
|
|
|
chunk.actions[i] = MultiAction(init_actions)
|
|
continue
|
|
|
|
if isinstance(action, JumpAction):
|
|
# This could possibly be a jump to the very next line, but we will wait for the
|
|
# optimization pass to figure that out.
|
|
chunk.actions[i] = GotoStatement(offset_map[action.jump_offset])
|
|
continue
|
|
|
|
if isinstance(action, IfAction):
|
|
chunk.actions[i] = make_if_expr(action)
|
|
continue
|
|
|
|
if isinstance(action, WithAction):
|
|
# TODO: I have to figure out what "with" actually even does.
|
|
# It sets some context and local variables, but to what?
|
|
raise Exception(f"TODO: {action}")
|
|
|
|
if isinstance(action, GetURL2Action):
|
|
# TODO: I have to figure out what "geturl2" actually even does.
|
|
# It is something to do with getting the "URL" of the current
|
|
# movie clip.
|
|
url = stack.pop()
|
|
target = stack.pop()
|
|
chunk.actions[i] = GetURL2Statement(action.action, url, target)
|
|
continue
|
|
|
|
if isinstance(action, StartDragAction):
|
|
# TODO: I have to implement this, if I ever come across it.
|
|
raise Exception(f"TODO: {action}")
|
|
|
|
if isinstance(action, AddNumVariableAction):
|
|
variable_name = stack.pop()
|
|
if not isinstance(variable_name, (str, StringConstant)):
|
|
raise Exception("Logic error!")
|
|
|
|
chunk.actions[i] = SetVariableStatement(
|
|
variable_name,
|
|
ArithmeticExpression(
|
|
Variable(variable_name),
|
|
"+" if action.amount_to_add >= 0 else '-',
|
|
abs(action.amount_to_add),
|
|
)
|
|
)
|
|
continue
|
|
|
|
if isinstance(action, AddNumRegisterAction):
|
|
chunk.actions[i] = StoreRegisterStatement(
|
|
action.register,
|
|
ArithmeticExpression(
|
|
action.register,
|
|
"+" if action.amount_to_add >= 0 else '-',
|
|
abs(action.amount_to_add),
|
|
)
|
|
)
|
|
continue
|
|
|
|
if isinstance(action, AP2Action):
|
|
if action.opcode == AP2Action.STOP:
|
|
chunk.actions[i] = StopMovieStatement()
|
|
continue
|
|
|
|
if action.opcode == AP2Action.PLAY:
|
|
chunk.actions[i] = PlayMovieStatement()
|
|
continue
|
|
|
|
if action.opcode == AP2Action.END:
|
|
chunk.actions[i] = NullReturnStatement()
|
|
continue
|
|
|
|
if action.opcode == AP2Action.NEXT_FRAME:
|
|
chunk.actions[i] = NextFrameStatement()
|
|
continue
|
|
|
|
if action.opcode == AP2Action.PREVIOUS_FRAME:
|
|
chunk.actions[i] = PreviousFrameStatement()
|
|
continue
|
|
|
|
if action.opcode == AP2Action.STOP_SOUND:
|
|
chunk.actions[i] = StopSoundStatement()
|
|
continue
|
|
|
|
if action.opcode == AP2Action.CLONE_SPRITE:
|
|
depth = stack.pop()
|
|
if not isinstance(depth, (int, Expression)):
|
|
raise Exception("Logic error!")
|
|
name = stack.pop()
|
|
if not isinstance(name, (str, Expression)):
|
|
raise Exception("Logic error!")
|
|
obj = stack.pop()
|
|
chunk.actions[i] = CloneSpriteStatement(obj, name, depth)
|
|
continue
|
|
|
|
if action.opcode == AP2Action.REMOVE_SPRITE:
|
|
obj = stack.pop()
|
|
chunk.actions[i] = RemoveSpriteStatement(obj)
|
|
continue
|
|
|
|
if action.opcode == AP2Action.TO_NUMBER:
|
|
obj_ref = stack.pop()
|
|
stack.append(FunctionCall(self.__insertion_id, 'int', [obj_ref]))
|
|
|
|
chunk.actions[i] = OriginalCallLocation(self.__insertion_id)
|
|
self.__insertion_id += 1
|
|
continue
|
|
|
|
if action.opcode == AP2Action.TO_STRING:
|
|
obj_ref = stack.pop()
|
|
stack.append(FunctionCall(self.__insertion_id, 'str', [obj_ref]))
|
|
|
|
chunk.actions[i] = OriginalCallLocation(self.__insertion_id)
|
|
self.__insertion_id += 1
|
|
continue
|
|
|
|
if action.opcode == AP2Action.INCREMENT:
|
|
obj_ref = stack.pop()
|
|
stack.append(ArithmeticExpression(obj_ref, '+', 1))
|
|
|
|
chunk.actions[i] = NopStatement()
|
|
continue
|
|
|
|
if action.opcode == AP2Action.DECREMENT:
|
|
obj_ref = stack.pop()
|
|
stack.append(ArithmeticExpression(obj_ref, '-', 1))
|
|
|
|
chunk.actions[i] = NopStatement()
|
|
continue
|
|
|
|
if action.opcode == AP2Action.NOT:
|
|
obj_ref = stack.pop()
|
|
stack.append(NotExpression(obj_ref))
|
|
|
|
chunk.actions[i] = NopStatement()
|
|
continue
|
|
|
|
if action.opcode == AP2Action.INSTANCEOF:
|
|
name_ref = stack.pop()
|
|
obj_to_check = stack.pop()
|
|
stack.append(FunctionCall(self.__insertion_id, 'isinstance', [obj_to_check, name_ref]))
|
|
|
|
chunk.actions[i] = OriginalCallLocation(self.__insertion_id)
|
|
self.__insertion_id += 1
|
|
continue
|
|
|
|
if action.opcode == AP2Action.TYPEOF:
|
|
obj_to_check = stack.pop()
|
|
stack.append(FunctionCall(self.__insertion_id, 'typeof', [obj_to_check]))
|
|
|
|
chunk.actions[i] = OriginalCallLocation(self.__insertion_id)
|
|
self.__insertion_id += 1
|
|
continue
|
|
|
|
if action.opcode == AP2Action.CALL_METHOD:
|
|
method_name = stack.pop()
|
|
if not isinstance(method_name, (str, int, Expression)):
|
|
raise Exception("Logic error!")
|
|
object_reference = stack.pop()
|
|
num_params = stack.pop()
|
|
if not isinstance(num_params, int):
|
|
raise Exception("Logic error!")
|
|
params = []
|
|
for _ in range(num_params):
|
|
params.append(stack.pop())
|
|
stack.append(MethodCall(self.__insertion_id, object_reference, method_name, params))
|
|
|
|
chunk.actions[i] = OriginalCallLocation(self.__insertion_id)
|
|
self.__insertion_id += 1
|
|
continue
|
|
|
|
if action.opcode == AP2Action.CALL_FUNCTION:
|
|
function_name = stack.pop()
|
|
if not isinstance(function_name, (str, StringConstant)):
|
|
raise Exception("Logic error!")
|
|
num_params = stack.pop()
|
|
if not isinstance(num_params, int):
|
|
raise Exception("Logic error!")
|
|
params = []
|
|
for _ in range(num_params):
|
|
params.append(stack.pop())
|
|
stack.append(FunctionCall(self.__insertion_id, function_name, params))
|
|
|
|
chunk.actions[i] = OriginalCallLocation(self.__insertion_id)
|
|
self.__insertion_id += 1
|
|
continue
|
|
|
|
if action.opcode == AP2Action.RETURN:
|
|
retval = stack.pop()
|
|
chunk.actions[i] = ReturnStatement(retval)
|
|
continue
|
|
|
|
if action.opcode == AP2Action.THROW:
|
|
retval = stack.pop()
|
|
chunk.actions[i] = ThrowStatement(retval)
|
|
continue
|
|
|
|
if action.opcode == AP2Action.POP:
|
|
# This is a discard. Let's see if its discarding a function or method
|
|
# call. If so, that means the return doesn't matter.
|
|
discard = stack.pop()
|
|
if isinstance(discard, (FunctionCall, MethodCall)):
|
|
# It is! Let's act on the statement.
|
|
chunk.actions[i] = ExpressionStatement(discard)
|
|
else:
|
|
chunk.actions[i] = NopStatement()
|
|
continue
|
|
|
|
if action.opcode == AP2Action.GET_VARIABLE:
|
|
variable_name = stack.pop()
|
|
if isinstance(variable_name, (str, StringConstant)):
|
|
stack.append(Variable(variable_name))
|
|
else:
|
|
# This is probably a reference to a variable by
|
|
# string concatenation.
|
|
stack.append(Member(GLOBAL, variable_name))
|
|
|
|
chunk.actions[i] = NopStatement()
|
|
continue
|
|
|
|
if action.opcode == AP2Action.SET_VARIABLE:
|
|
set_value = stack.pop()
|
|
local_name = stack.pop()
|
|
if isinstance(local_name, (str, StringConstant)):
|
|
chunk.actions[i] = SetVariableStatement(local_name, set_value)
|
|
else:
|
|
# This is probably a reference to a variable by
|
|
# string concatenation.
|
|
chunk.actions[i] = SetMemberStatement(GLOBAL, local_name, set_value)
|
|
|
|
continue
|
|
|
|
if action.opcode == AP2Action.DELETE:
|
|
member_name = stack.pop()
|
|
if not isinstance(member_name, (str, int, Expression)):
|
|
raise Exception("Logic error!")
|
|
obj_name = stack.pop()
|
|
|
|
chunk.actions[i] = DeleteMemberStatement(obj_name, member_name)
|
|
continue
|
|
|
|
if action.opcode == AP2Action.DELETE2:
|
|
variable_name = stack.pop()
|
|
if not isinstance(variable_name, (str, StringConstant)):
|
|
raise Exception("Logic error!")
|
|
|
|
chunk.actions[i] = DeleteVariableStatement(variable_name)
|
|
continue
|
|
|
|
if action.opcode == AP2Action.GET_MEMBER:
|
|
member_name = stack.pop()
|
|
if not isinstance(member_name, (str, int, Expression)):
|
|
raise Exception("Logic error!")
|
|
object_reference = stack.pop()
|
|
stack.append(Member(object_reference, member_name))
|
|
|
|
chunk.actions[i] = NopStatement()
|
|
continue
|
|
|
|
if action.opcode == AP2Action.SET_MEMBER:
|
|
set_value = stack.pop()
|
|
member_name = stack.pop()
|
|
if not isinstance(member_name, (str, int, Expression)):
|
|
raise Exception("Logic error!")
|
|
object_reference = stack.pop()
|
|
|
|
chunk.actions[i] = SetMemberStatement(object_reference, member_name, set_value)
|
|
continue
|
|
|
|
if action.opcode == AP2Action.GET_PROPERTY:
|
|
property_int = stack.pop()
|
|
if not isinstance(property_int, int):
|
|
# Its possible that code which uses this outdated SWF GET_PROPERTY call
|
|
# might dynamically calculate the integer which it wants to use to get
|
|
# a property on. But, probably not. I haven't seen any code use this or
|
|
# SET_PROPERTY so this is just here for documentation.
|
|
raise Exception("Logic error!")
|
|
object_reference = stack.pop()
|
|
stack.append(Member(object_reference, StringConstant(property_int + 0x100)))
|
|
|
|
chunk.actions[i] = NopStatement()
|
|
continue
|
|
|
|
if action.opcode == AP2Action.SET_PROPERTY:
|
|
set_value = stack.pop()
|
|
property_int = stack.pop()
|
|
if not isinstance(property_int, int):
|
|
raise Exception("Logic error!")
|
|
object_reference = stack.pop()
|
|
|
|
chunk.actions[i] = SetMemberStatement(object_reference, StringConstant(property_int + 0x100), set_value)
|
|
continue
|
|
|
|
if action.opcode == AP2Action.DEFINE_LOCAL:
|
|
set_value = stack.pop()
|
|
local_name = stack.pop()
|
|
if not isinstance(local_name, (str, StringConstant)):
|
|
raise Exception(f"Logic error, local name {local_name} is not a string!")
|
|
|
|
chunk.actions[i] = SetLocalStatement(local_name, set_value)
|
|
continue
|
|
|
|
if action.opcode == AP2Action.DEFINE_LOCAL2:
|
|
local_name = stack.pop()
|
|
if not isinstance(local_name, (str, StringConstant)):
|
|
raise Exception(f"Logic error, local name {local_name} is not a string!")
|
|
|
|
chunk.actions[i] = SetLocalStatement(local_name, UNDEFINED)
|
|
continue
|
|
|
|
if action.opcode == AP2Action.NEW_OBJECT:
|
|
object_name = stack.pop()
|
|
if not isinstance(object_name, (str, StringConstant)):
|
|
raise Exception("Logic error!")
|
|
num_params = stack.pop()
|
|
if not isinstance(num_params, int):
|
|
raise Exception("Logic error!")
|
|
params = []
|
|
for _ in range(num_params):
|
|
params.append(stack.pop())
|
|
stack.append(NewObject(object_name, params))
|
|
|
|
chunk.actions[i] = NopStatement()
|
|
continue
|
|
|
|
if action.opcode == AP2Action.INIT_ARRAY:
|
|
num_entries = stack.pop()
|
|
if not isinstance(num_entries, int):
|
|
raise Exception("Logic error!")
|
|
arrparams = []
|
|
for _ in range(num_entries):
|
|
arrparams.append(stack.pop())
|
|
stack.append(Array(arrparams))
|
|
|
|
chunk.actions[i] = NopStatement()
|
|
continue
|
|
|
|
if action.opcode == AP2Action.INIT_OBJECT:
|
|
num_entries = stack.pop()
|
|
if not isinstance(num_entries, int):
|
|
raise Exception("Logic error!")
|
|
objparams: Dict[Any, Any] = {}
|
|
for _ in range(num_entries):
|
|
val = stack.pop()
|
|
key = stack.pop()
|
|
objparams[key] = val
|
|
stack.append(Object(objparams))
|
|
|
|
chunk.actions[i] = NopStatement()
|
|
continue
|
|
|
|
if action.opcode == AP2Action.TRACE:
|
|
trace_obj = stack.pop()
|
|
chunk.actions[i] = DebugTraceStatement(trace_obj)
|
|
continue
|
|
|
|
if action.opcode == AP2Action.ADD2:
|
|
expr2 = stack.pop()
|
|
expr1 = stack.pop()
|
|
stack.append(ArithmeticExpression(expr1, "+", expr2))
|
|
|
|
chunk.actions[i] = NopStatement()
|
|
continue
|
|
|
|
if action.opcode == AP2Action.SUBTRACT:
|
|
expr2 = stack.pop()
|
|
expr1 = stack.pop()
|
|
stack.append(ArithmeticExpression(expr1, "-", expr2))
|
|
|
|
chunk.actions[i] = NopStatement()
|
|
continue
|
|
|
|
if action.opcode == AP2Action.MULTIPLY:
|
|
expr2 = stack.pop()
|
|
expr1 = stack.pop()
|
|
stack.append(ArithmeticExpression(expr1, "*", expr2))
|
|
|
|
chunk.actions[i] = NopStatement()
|
|
continue
|
|
|
|
if action.opcode == AP2Action.DIVIDE:
|
|
expr2 = stack.pop()
|
|
expr1 = stack.pop()
|
|
stack.append(ArithmeticExpression(expr1, "/", expr2))
|
|
|
|
chunk.actions[i] = NopStatement()
|
|
continue
|
|
|
|
if action.opcode == AP2Action.MODULO:
|
|
expr2 = stack.pop()
|
|
expr1 = stack.pop()
|
|
stack.append(ArithmeticExpression(expr1, "%", expr2))
|
|
|
|
chunk.actions[i] = NopStatement()
|
|
continue
|
|
|
|
if action.opcode == AP2Action.BIT_OR:
|
|
expr2 = stack.pop()
|
|
expr1 = stack.pop()
|
|
stack.append(ArithmeticExpression(expr1, "|", expr2))
|
|
|
|
chunk.actions[i] = NopStatement()
|
|
continue
|
|
|
|
if action.opcode == AP2Action.BIT_AND:
|
|
expr2 = stack.pop()
|
|
expr1 = stack.pop()
|
|
stack.append(ArithmeticExpression(expr1, "&", expr2))
|
|
|
|
chunk.actions[i] = NopStatement()
|
|
continue
|
|
|
|
if action.opcode == AP2Action.BIT_XOR:
|
|
expr2 = stack.pop()
|
|
expr1 = stack.pop()
|
|
stack.append(ArithmeticExpression(expr1, "^", expr2))
|
|
|
|
chunk.actions[i] = NopStatement()
|
|
continue
|
|
|
|
if action.opcode == AP2Action.BIT_L_SHIFT:
|
|
shift_amt = stack.pop()
|
|
shift_val = stack.pop()
|
|
stack.append(ArithmeticExpression(shift_val, "<<", shift_amt))
|
|
|
|
chunk.actions[i] = NopStatement()
|
|
continue
|
|
|
|
if action.opcode in {AP2Action.BIT_R_SHIFT, AP2Action.BIT_U_R_SHIFT}:
|
|
shift_amt = stack.pop()
|
|
shift_val = stack.pop()
|
|
stack.append(ArithmeticExpression(shift_val, ">>", shift_amt))
|
|
|
|
chunk.actions[i] = NopStatement()
|
|
continue
|
|
|
|
if action.opcode == AP2Action.EQUALS2:
|
|
expr2 = stack.pop()
|
|
expr1 = stack.pop()
|
|
stack.append(ArithmeticExpression(expr1, "==", expr2))
|
|
|
|
chunk.actions[i] = NopStatement()
|
|
continue
|
|
|
|
if action.opcode == AP2Action.STRICT_EQUALS:
|
|
expr2 = stack.pop()
|
|
expr1 = stack.pop()
|
|
stack.append(ArithmeticExpression(expr1, "===", expr2))
|
|
|
|
chunk.actions[i] = NopStatement()
|
|
continue
|
|
|
|
if action.opcode == AP2Action.GREATER:
|
|
expr2 = stack.pop()
|
|
expr1 = stack.pop()
|
|
stack.append(ArithmeticExpression(expr1, ">", expr2))
|
|
|
|
chunk.actions[i] = NopStatement()
|
|
continue
|
|
|
|
if action.opcode == AP2Action.LESS2:
|
|
expr2 = stack.pop()
|
|
expr1 = stack.pop()
|
|
stack.append(ArithmeticExpression(expr1, "<", expr2))
|
|
|
|
chunk.actions[i] = NopStatement()
|
|
continue
|
|
|
|
if action.opcode == AP2Action.PUSH_DUPLICATE:
|
|
dup = stack.pop()
|
|
stack.append(dup)
|
|
stack.append(dup)
|
|
|
|
chunk.actions[i] = NopStatement()
|
|
continue
|
|
|
|
if action.opcode == AP2Action.GET_TIME:
|
|
stack.append(GetTimeFunctionCall(self.__insertion_id))
|
|
|
|
chunk.actions[i] = OriginalCallLocation(self.__insertion_id)
|
|
self.__insertion_id += 1
|
|
continue
|
|
|
|
if action.opcode == AP2Action.TARGET_PATH:
|
|
clip = stack.pop()
|
|
stack.append(GetPathFunctionCall(self.__insertion_id, clip))
|
|
|
|
chunk.actions[i] = OriginalCallLocation(self.__insertion_id)
|
|
self.__insertion_id += 1
|
|
continue
|
|
|
|
if action.opcode == AP2Action.CAST_OP:
|
|
obj_ref = stack.pop()
|
|
class_ref = stack.pop()
|
|
stack.append(FunctionCall(self.__insertion_id, 'cast', [obj_ref, class_ref]))
|
|
|
|
chunk.actions[i] = OriginalCallLocation(self.__insertion_id)
|
|
self.__insertion_id += 1
|
|
continue
|
|
|
|
if action.opcode == AP2Action.IMPLEMENTS_OP:
|
|
# This appears to be completely unimplemented/broken in
|
|
# Bishi so I have no idea what it intends to do. Probably
|
|
# I could look at the SWF spec and infer the functionality
|
|
# but there aren't any files that I've found in any games
|
|
# that use this opcode, so meh.
|
|
raise Exception(f"TODO: {action}")
|
|
|
|
if action.opcode == AP2Action.STACK_SWAP:
|
|
first = stack.pop()
|
|
second = stack.pop()
|
|
stack.append(first)
|
|
stack.append(second)
|
|
|
|
chunk.actions[i] = NopStatement()
|
|
continue
|
|
|
|
# None of the below actions are understood outside of the fact
|
|
# that they operate entirely on the stack. They do not appear to
|
|
# be used in any game code I've come across and might be remnants
|
|
# of when the code was for playing SWF directly.
|
|
if action.opcode == AP2Action.ENUMERATE2:
|
|
raise Exception(f"TODO: {action}")
|
|
|
|
if action.opcode == AP2Action.EXTENDS:
|
|
raise Exception(f"TODO: {action}")
|
|
|
|
if action.opcode == AP2Action.END_DRAG:
|
|
raise Exception(f"TODO: {action}")
|
|
|
|
if action.opcode == AP2Action.NEW_METHOD:
|
|
raise Exception(f"TODO: {action}")
|
|
|
|
if action.opcode == AP2Action.GET_TARGET:
|
|
raise Exception(f"TODO: {action}")
|
|
|
|
if isinstance(action, NullReturnStatement):
|
|
# We already handled this
|
|
continue
|
|
|
|
if isinstance(action, ContinueStatement):
|
|
# We already handled this
|
|
continue
|
|
|
|
if isinstance(action, BreakStatement):
|
|
# We already handled this
|
|
continue
|
|
|
|
if isinstance(action, GotoStatement):
|
|
# We already handled this
|
|
continue
|
|
|
|
if isinstance(action, IntermediateIf):
|
|
# A partially-converted if from loop detection. Leave as-is, this
|
|
# is the job of our caller since it needs to follow the stack to
|
|
# the next jump given the statements in this intermediate if. The
|
|
# only thing we convert is the expression, since we need the current
|
|
# stack to do that.
|
|
chunk.actions[i] = IntermediateIf(
|
|
make_if_expr(cast(IfAction, action.parent_action)),
|
|
action.true_statements,
|
|
action.false_statements,
|
|
)
|
|
continue
|
|
|
|
raise Exception(f"Unexpected action {action}, the cases above should be exhaustive!")
|
|
|
|
# Now, clean up code generation.
|
|
new_actions: List[ConvertedAction] = []
|
|
for action in chunk.actions:
|
|
if not isinstance(action, ConvertedAction):
|
|
# We should have handled all AP2Actions at this point!
|
|
raise Exception("Logic error!")
|
|
if isinstance(action, NopStatement):
|
|
# Filter out noops.
|
|
continue
|
|
if isinstance(action, NullReturnStatement):
|
|
if new_actions and isinstance(new_actions[-1], NullReturnStatement):
|
|
# Filter out redundant return statements.
|
|
continue
|
|
if isinstance(action, MultiAction):
|
|
for new_action in action.actions:
|
|
new_actions.append(new_action)
|
|
continue
|
|
|
|
new_actions.append(action)
|
|
|
|
# Finally, return everything we did.
|
|
return new_actions, stack
|
|
|
|
def __eval_chunks(self, start_id: int, chunks: Sequence[ArbitraryCodeChunk], offset_map: Dict[int, int]) -> List[Statement]:
|
|
stack: Dict[int, List[Any]] = {start_id: []}
|
|
insertables: Dict[int, List[Statement]] = {}
|
|
orphaned_functions: Dict[int, Union[FunctionCall, MethodCall]] = {}
|
|
other_locs: Dict[int, int] = {}
|
|
|
|
# Convert all chunks to a list of statements.
|
|
statements = self.__eval_chunks_impl(start_id, chunks, None, stack, insertables, orphaned_functions, other_locs, offset_map)
|
|
|
|
# Now, go through and fix up any insertables.
|
|
def fixup(statements: Sequence[Statement]) -> List[Statement]:
|
|
new_statements: List[Statement] = []
|
|
|
|
for statement in statements:
|
|
if isinstance(statement, DoWhileStatement):
|
|
statement.body = fixup(statement.body)
|
|
new_statements.append(statement)
|
|
elif isinstance(statement, IfStatement):
|
|
statement.true_statements = fixup(statement.true_statements)
|
|
statement.false_statements = fixup(statement.false_statements)
|
|
new_statements.append(statement)
|
|
else:
|
|
if isinstance(statement, InsertionLocation):
|
|
# Convert to any statements we need to insert.
|
|
if statement.location in insertables:
|
|
self.vprint(f"Inserting temp variable assignments into insertion location {statement.location}")
|
|
for stmt in insertables[statement.location]:
|
|
new_statements.append(stmt)
|
|
elif isinstance(statement, OriginalCallLocation):
|
|
# Convert any orphaned function calls to calls without an assignment.
|
|
if statement.insertion_id in orphaned_functions:
|
|
self.vprint(f"Inserting orphaned function into insertion location {statement.insertion_id}")
|
|
new_statements.append(ExpressionStatement(orphaned_functions[statement.insertion_id]))
|
|
del orphaned_functions[statement.insertion_id]
|
|
else:
|
|
new_statements.append(statement)
|
|
return new_statements
|
|
|
|
statements = fixup(statements)
|
|
|
|
if orphaned_functions:
|
|
raise Exception(f"Unexpected leftover orphan functions {orphaned_functions}!")
|
|
|
|
# Make sure we consumed the stack.
|
|
for cid, leftovers in stack.items():
|
|
if leftovers:
|
|
raise Exception(f"Stack not empty, chunk {cid} contains {stack}!")
|
|
|
|
# Finally, return the statements!
|
|
return statements
|
|
|
|
def __eval_chunks_impl(
|
|
self,
|
|
start_id: int,
|
|
chunks: Sequence[ArbitraryCodeChunk],
|
|
next_id: Optional[int],
|
|
stacks: Dict[int, List[Any]],
|
|
insertables: Dict[int, List[Statement]],
|
|
orphaned_functions: Dict[int, Union[FunctionCall, MethodCall]],
|
|
other_stack_locs: Dict[int, int],
|
|
offset_map: Dict[int, int],
|
|
) -> List[Statement]:
|
|
chunks_by_id: Dict[int, ArbitraryCodeChunk] = {chunk.id: chunk for chunk in chunks}
|
|
statements: List[Statement] = []
|
|
|
|
def reconcile_stacks(cur_chunk: int, new_stack_id: int, new_stack: List[Any]) -> List[Statement]:
|
|
if new_stack_id in stacks:
|
|
if cur_chunk == other_stack_locs[new_stack_id]:
|
|
raise Exception("Logic error, cannot reconcile variable names with self!")
|
|
other_chunk = other_stack_locs[new_stack_id]
|
|
if len(stacks[new_stack_id]) != len(new_stack):
|
|
min_len = min(len(stacks[new_stack_id]), len(new_stack))
|
|
max_len = max(len(stacks[new_stack_id]), len(new_stack))
|
|
borrows = max_len - min_len
|
|
if borrows <= 0:
|
|
raise Exception("Logic error!")
|
|
|
|
# It doesn't matter what it is, just mark the stack entry as being poisoned since
|
|
# we couldn't reconcile it. We want to throw an exception down the line if we
|
|
# run into this value, as we needed it but only sometimes got it.
|
|
borrow_vals = [MaybeStackEntry(new_stack_id) for _ in range(borrows)]
|
|
|
|
if min_len > 0:
|
|
stacks[new_stack_id] = [*borrow_vals, *stacks[new_stack_id][-min_len:]]
|
|
new_stack = [*borrow_vals, new_stack[-min_len:]]
|
|
else:
|
|
stacks[new_stack_id] = [*borrow_vals]
|
|
new_stack = [*borrow_vals]
|
|
self.vprint(f"Chopped off {borrows} values from longest stack and replaced with MaybeStackEntry for {new_stack_id}")
|
|
|
|
if len(new_stack) != len(stacks[new_stack_id]):
|
|
raise Exception(f"Logic error, expected {new_stack} and {stacks[new_stack_id]} to be equal length!")
|
|
|
|
self.vprint(
|
|
f"Merging stack {stacks[new_stack_id]} for chunk ID {new_stack_id} with {new_stack}, " +
|
|
f"and scheduling chunks {cur_chunk} and {other_chunk} for variable definitions."
|
|
)
|
|
|
|
stack: List[Any] = []
|
|
definitions: List[Statement] = []
|
|
for j in range(len(new_stack)):
|
|
# Walk the stack backwards to mimic the order in which a stack entry would be pulled.
|
|
i = (len(new_stack) - (j + 1))
|
|
new_entry = new_stack[i]
|
|
old_entry = stacks[new_stack_id][i]
|
|
|
|
if new_entry != old_entry:
|
|
if isinstance(old_entry, TempVariable):
|
|
# This is already converted in another stack, so we just need to use the same.
|
|
tmpname = old_entry.name
|
|
|
|
insertables[cur_chunk] = insertables.get(cur_chunk, []) + [SetVariableStatement(tmpname, new_entry)]
|
|
|
|
stack.append(TempVariable(tmpname))
|
|
self.vprint(f"Reusing temporary variable {tmpname} to hold stack value {new_stack[i]}")
|
|
else:
|
|
tmpname = f"tempvar_{self.__tmpvar_id}"
|
|
self.__tmpvar_id += 1
|
|
|
|
insertables[cur_chunk] = insertables.get(cur_chunk, []) + [SetVariableStatement(tmpname, new_entry)]
|
|
insertables[other_chunk] = insertables.get(other_chunk, []) + [SetVariableStatement(tmpname, old_entry)]
|
|
|
|
stack.append(TempVariable(tmpname))
|
|
self.vprint(f"Creating temporary variable {tmpname} to hold stack values {new_stack[i]} and {stacks[new_stack_id][i]}")
|
|
else:
|
|
stack.append(new_entry)
|
|
|
|
self.vprint(f"Redefining stack for chunk ID {new_stack_id} to be {stack} after merging multiple paths")
|
|
stacks[new_stack_id] = stack[::-1]
|
|
return definitions
|
|
else:
|
|
self.vprint(f"Defining stack for chunk ID {new_stack_id} to be {new_stack} based on evaluation of {cur_chunk}")
|
|
other_stack_locs[new_stack_id] = cur_chunk
|
|
stacks[new_stack_id] = new_stack
|
|
return []
|
|
|
|
while True:
|
|
# Grab the chunk to operate on.
|
|
chunk = chunks_by_id[start_id]
|
|
if len(chunk.next_chunks) > 1:
|
|
# We've checked so this should be impossible.
|
|
raise Exception("Logic error!")
|
|
next_chunk_id = chunk.next_chunks[0] if chunk.next_chunks else next_id
|
|
|
|
if isinstance(chunk, Loop):
|
|
# Evaluate the loop. No need to update per-chunk stacks here since we will do it in a child eval.
|
|
self.vprint(f"Evaluating graph in Loop {chunk.id}")
|
|
loop_statements = self.__eval_chunks_impl(chunk.id, chunk.chunks, next_chunk_id, stacks, insertables, orphaned_functions, other_stack_locs, offset_map)
|
|
statements.append(DoWhileStatement(loop_statements))
|
|
statements.extend(chunk.post_statements)
|
|
elif isinstance(chunk, IfBody):
|
|
# We should have evaluated this earlier!
|
|
raise Exception("Logic error!")
|
|
else:
|
|
if start_id >= 0:
|
|
# Make sure when we collapse chunks, we don't lose labels.
|
|
statements.append(DefineLabelStatement(start_id))
|
|
|
|
# Grab the computed start stack for this ID
|
|
if chunk.id not in stacks:
|
|
# We somehow failed to assign a stack to this chunk but got here anyway?
|
|
raise Exception(f"Logic error, stack for {chunk.id} does not exist!")
|
|
|
|
stack = stacks[chunk.id]
|
|
del stacks[chunk.id]
|
|
|
|
# Calculate the statements for this chunk, as well as the leftover stack entries.
|
|
self.vprint(f"Evaluating graph of ByteCodeChunk {chunk.id} with stack {stack}")
|
|
new_statements, stack_leftovers = self.__eval_stack(chunk, stack, offset_map)
|
|
|
|
# We need to check and see if the last entry is an IfExpr, and hoist it
|
|
# into a statement here.
|
|
if new_statements and isinstance(new_statements[-1], IfExpr):
|
|
if_body = chunk.next_chunks[0]
|
|
if_body_chunk = chunks_by_id[if_body]
|
|
|
|
if not isinstance(if_body_chunk, IfBody):
|
|
# IfBody should always follow a chunk that ends with an if.
|
|
raise Exception(f"Logic error, expecting an IfBody chunk but got {if_body_chunk}!")
|
|
|
|
if if_body in stacks:
|
|
# Nothing should ever create a stack pointing at an IfBody except this code here.
|
|
raise Exception(f"Logic error, IfBody ID {if_body} already has a stack {stacks[if_body]}!")
|
|
|
|
# Recalculate next chunk ID since we're calculating two chunks here.
|
|
if len(if_body_chunk.next_chunks) > 1:
|
|
# We've checked so this should be impossible.
|
|
raise Exception("Logic error!")
|
|
if if_body_chunk.next_chunks:
|
|
next_chunk_id = if_body_chunk.next_chunks[0]
|
|
else:
|
|
next_chunk_id = next_id
|
|
self.vprint(f"Recalculated next ID for IfBody {if_body} to be {next_chunk_id}")
|
|
|
|
# Make sure if its an if with only one body (true/false) that we track
|
|
# the stack in this case as well.
|
|
if_sentinels: List[ConvertedAction] = [InsertionLocation(chunk.id)]
|
|
if_sentinels.append(new_statements[-1])
|
|
new_statements = new_statements[:-1]
|
|
new_statements.extend(if_sentinels)
|
|
|
|
# Evaluate the if body
|
|
true_statements: List[Statement] = []
|
|
if if_body_chunk.true_chunks:
|
|
self.vprint(f"Evaluating graph of IfBody {if_body_chunk.id} true case")
|
|
true_start = self.__get_entry_block(if_body_chunk.true_chunks)
|
|
if true_start in stacks:
|
|
raise Exception("Logic error, unexpected stack for if!")
|
|
else:
|
|
# The stack for both of these is the leftovers from the previous evaluation as they
|
|
# rollover.
|
|
stacks[true_start] = [s for s in stack_leftovers]
|
|
self.vprint(f"True start {true_start} of IfBody has stack {stacks[true_start]}")
|
|
true_statements = self.__eval_chunks_impl(
|
|
true_start,
|
|
if_body_chunk.true_chunks,
|
|
next_chunk_id,
|
|
stacks,
|
|
insertables,
|
|
orphaned_functions,
|
|
other_stack_locs,
|
|
offset_map,
|
|
)
|
|
else:
|
|
if next_chunk_id is None:
|
|
raise Exception("Logic error, cannot reconcile stacks when next chunk is the end!")
|
|
reconcile_stacks(chunk.id, next_chunk_id, stack_leftovers)
|
|
|
|
false_statements: List[Statement] = []
|
|
if if_body_chunk.false_chunks:
|
|
self.vprint(f"Evaluating graph of IfBody {if_body_chunk.id} false case")
|
|
false_start = self.__get_entry_block(if_body_chunk.false_chunks)
|
|
if false_start in stacks:
|
|
raise Exception("Logic error, unexpected stack for if!")
|
|
else:
|
|
# The stack for both of these is the leftovers from the previous evaluation as they
|
|
# rollover.
|
|
stacks[false_start] = [s for s in stack_leftovers]
|
|
self.vprint(f"False start {false_start} of IfBody has stack {stacks[false_start]}")
|
|
false_statements = self.__eval_chunks_impl(
|
|
false_start,
|
|
if_body_chunk.false_chunks,
|
|
next_chunk_id,
|
|
stacks,
|
|
insertables,
|
|
orphaned_functions,
|
|
other_stack_locs,
|
|
offset_map,
|
|
)
|
|
else:
|
|
if next_chunk_id is None:
|
|
raise Exception("Logic error, cannot reconcile stacks when next chunk is the end!")
|
|
reconcile_stacks(chunk.id, next_chunk_id, stack_leftovers)
|
|
|
|
# Convert this IfExpr to a full-blown IfStatement.
|
|
new_statements[-1] = IfStatement(
|
|
cast(IfExpr, new_statements[-1]),
|
|
true_statements,
|
|
false_statements,
|
|
)
|
|
|
|
# Skip evaluating the IfBody next iteration.
|
|
chunk = if_body_chunk
|
|
else:
|
|
# We must propagate the stack to the next entry. If it already exists we must merge it.
|
|
new_next_ids: Set[int] = {next_chunk_id} if next_chunk_id else set()
|
|
if new_statements:
|
|
last_new_statement = new_statements[-1]
|
|
if isinstance(last_new_statement, GotoStatement):
|
|
# Replace the next IDs with just the goto.
|
|
new_next_ids = {last_new_statement.location}
|
|
elif isinstance(last_new_statement, (ThrowStatement, NullReturnStatement, ReturnStatement)):
|
|
# We don't have a next ID, we're returning.
|
|
new_next_ids = set()
|
|
elif isinstance(last_new_statement, IntermediateIf):
|
|
# We have potentially more than one next ID, given what statements exist
|
|
# inside the true/false chunks.
|
|
intermediates: List[Statement] = []
|
|
if len(last_new_statement.true_statements) > 1:
|
|
raise Exception(f"Logic error, expected only one true statement in intermediate if {last_new_statement}!")
|
|
else:
|
|
intermediates.extend(last_new_statement.true_statements)
|
|
if len(last_new_statement.false_statements) > 1:
|
|
raise Exception(f"Logic error, expected only one false statement in intermediate if {last_new_statement}!")
|
|
else:
|
|
intermediates.extend(last_new_statement.false_statements)
|
|
|
|
for stmt in intermediates:
|
|
if isinstance(stmt, GotoStatement):
|
|
new_next_ids.add(stmt.location)
|
|
elif isinstance(stmt, (ThrowStatement, NullReturnStatement, ReturnStatement, ContinueStatement)):
|
|
# Do nothing. Three of these cases point at the end of the program, one
|
|
# points back at the top of the loop which we've already covered. Maybe
|
|
# we should assert here like we do below? Not sure.
|
|
pass
|
|
elif isinstance(stmt, BreakStatement):
|
|
# This points at the next chunk ID after the loop.
|
|
if next_id is not None:
|
|
new_next_ids.add(next_id)
|
|
else:
|
|
raise Exception(f"Logic error, unexpected statement {stmt}!")
|
|
|
|
if new_next_ids:
|
|
for new_next_id in new_next_ids:
|
|
reconcile_stacks(chunk.id, new_next_id, [s for s in stack_leftovers])
|
|
|
|
# Insert a sentinel for where temporary variables can be added if we
|
|
# need to in the future.
|
|
sentinels: List[Union[Statement, IntermediateIf]] = [InsertionLocation(chunk.id)]
|
|
|
|
# If we have a goto or intermediate if, we need to insert the tempvar assignment before it.
|
|
# This is because in both cases we will redirect control flow, so we need to make sure
|
|
# tempvar assignment happens before that redirection for the code to make sense.
|
|
if new_statements and isinstance(new_statements[-1], (GotoStatement, IntermediateIf)):
|
|
sentinels.append(new_statements[-1])
|
|
new_statements = new_statements[:-1]
|
|
|
|
# Add our new statements to the end of the statement list.
|
|
new_statements.extend(sentinels)
|
|
else:
|
|
# We have nowhere else to go, verify that we have an empty stack.
|
|
orphans = [s for s in stack_leftovers if isinstance(s, (FunctionCall, MethodCall))]
|
|
stack_leftovers = [s for s in stack_leftovers if not isinstance(s, (MaybeStackEntry, FunctionCall, MethodCall))]
|
|
for func in orphans:
|
|
if func.insertion_ref in orphaned_functions:
|
|
raise Exception(f"Logic error, already have an insertion ID {func.insertion_ref}!")
|
|
orphaned_functions[func.insertion_ref] = func
|
|
if stack_leftovers:
|
|
raise Exception(f"Logic error, reached execution end and have stack entries {stack_leftovers} still!")
|
|
|
|
# Verify that we converted all the statements properly.
|
|
for statement in new_statements:
|
|
if isinstance(statement, IntermediateIf):
|
|
# Intermediate if conditional (such as a break/return/goto inside
|
|
# a loop.
|
|
if not isinstance(statement.parent_action, IfExpr):
|
|
raise Exception(f"Logic error, found unconverted IntermediateIf {statement}!")
|
|
|
|
if not statement.true_statements and not statement.false_statements:
|
|
self.vprint(f"Skipping adding if statement {statement} because it is an empty sentinel!")
|
|
else:
|
|
statements.append(
|
|
IfStatement(
|
|
statement.parent_action,
|
|
statement.true_statements,
|
|
statement.false_statements,
|
|
)
|
|
)
|
|
elif isinstance(statement, Statement):
|
|
# Regular statement.
|
|
statements.append(statement)
|
|
else:
|
|
# We didn't convert a statement properly.
|
|
raise Exception(f"Logic error, {statement} is not converted!")
|
|
|
|
# Go to the next chunk
|
|
if not chunk.next_chunks:
|
|
break
|
|
start_id = chunk.next_chunks[0]
|
|
|
|
return statements
|
|
|
|
def __walk(self, statements: Sequence[Statement], do: Callable[[Statement], Optional[Statement]]) -> List[Statement]:
|
|
new_statements: List[Statement] = []
|
|
|
|
for statement in statements:
|
|
new_statement = do(statement)
|
|
if isinstance(new_statement, DoWhileStatement):
|
|
new_statement.body = self.__walk(new_statement.body, do)
|
|
new_statements.append(new_statement)
|
|
elif isinstance(new_statement, IfStatement):
|
|
new_statement.true_statements = self.__walk(new_statement.true_statements, do)
|
|
new_statement.false_statements = self.__walk(new_statement.false_statements, do)
|
|
new_statements.append(new_statement)
|
|
elif isinstance(new_statement, SwitchStatement):
|
|
new_statement.cases = [
|
|
SwitchCase(
|
|
case.const,
|
|
self.__walk(case.statements, do),
|
|
) for case in new_statement.cases
|
|
]
|
|
new_statements.append(new_statement)
|
|
elif new_statement:
|
|
new_statements.append(new_statement)
|
|
|
|
return new_statements
|
|
|
|
def __collapse_identical_labels(self, statements: Sequence[Statement]) -> Tuple[List[Statement], bool]:
|
|
# Go through and find labels that point at gotos, remove them and point the
|
|
# gotos to those labels at the second gotos.
|
|
statements = list(statements)
|
|
|
|
def find_labels_and_gotos(statements: Sequence[Statement]) -> Dict[int, int]:
|
|
label_and_goto: Dict[int, int] = {}
|
|
|
|
for i in range(len(statements)):
|
|
cur_statement = statements[i]
|
|
next_statement = statements[i + 1] if (i < len(statements) - 1) else None
|
|
if (
|
|
isinstance(cur_statement, DefineLabelStatement) and
|
|
isinstance(next_statement, GotoStatement)
|
|
):
|
|
label_and_goto[cur_statement.location] = next_statement.location
|
|
|
|
elif isinstance(cur_statement, DoWhileStatement):
|
|
label_and_goto.update(find_labels_and_gotos(cur_statement.body))
|
|
|
|
elif isinstance(cur_statement, IfStatement):
|
|
label_and_goto.update(find_labels_and_gotos(cur_statement.true_statements))
|
|
label_and_goto.update(find_labels_and_gotos(cur_statement.false_statements))
|
|
|
|
elif isinstance(cur_statement, SwitchStatement):
|
|
for case in cur_statement.cases:
|
|
label_and_goto.update(find_labels_and_gotos(case.statements))
|
|
|
|
return label_and_goto
|
|
|
|
def reduce_labels_and_gotos(pairs: Dict[int, int]) -> Dict[int, int]:
|
|
changed = True
|
|
while changed:
|
|
changed = False
|
|
|
|
for label, goto in pairs.items():
|
|
if goto in pairs:
|
|
pairs[label] = pairs[goto]
|
|
changed = True
|
|
|
|
return pairs
|
|
|
|
changed: bool = False
|
|
while True:
|
|
redundant_pairs = reduce_labels_and_gotos(find_labels_and_gotos(statements))
|
|
if not redundant_pairs:
|
|
break
|
|
|
|
# Whether we change the tree this pass. If not, we should bail.
|
|
updated: bool = False
|
|
|
|
def update_gotos(statement: Statement) -> Statement:
|
|
nonlocal updated
|
|
|
|
if isinstance(statement, GotoStatement):
|
|
if statement.location in redundant_pairs:
|
|
statement.location = redundant_pairs[statement.location]
|
|
updated = True
|
|
return statement
|
|
|
|
statements = self.__walk(statements, update_gotos)
|
|
changed = changed or updated
|
|
if not updated:
|
|
break
|
|
|
|
return statements, changed
|
|
|
|
def __remove_goto_return(self, statements: Sequence[Statement]) -> Tuple[List[Statement], bool]:
|
|
# Go through and find labels that point at returns, convert any gotos pointing
|
|
# at them to returns.
|
|
def find_labels(statements: Sequence[Statement], parent_next_statement: Optional[Statement]) -> Set[int]:
|
|
labels: Set[int] = set()
|
|
|
|
for i in range(len(statements)):
|
|
cur_statement = statements[i]
|
|
next_statement = statements[i + 1] if (i < len(statements) - 1) else parent_next_statement
|
|
if (
|
|
isinstance(cur_statement, DefineLabelStatement) and
|
|
isinstance(next_statement, NullReturnStatement)
|
|
):
|
|
labels.add(cur_statement.location)
|
|
|
|
elif isinstance(cur_statement, DoWhileStatement):
|
|
labels.update(find_labels(cur_statement.body, next_statement))
|
|
|
|
elif isinstance(cur_statement, IfStatement):
|
|
labels.update(find_labels(cur_statement.true_statements, next_statement))
|
|
labels.update(find_labels(cur_statement.false_statements, next_statement))
|
|
|
|
elif isinstance(cur_statement, SwitchStatement):
|
|
for case in cur_statement.cases:
|
|
labels.update(find_labels(case.statements, next_statement))
|
|
|
|
return labels
|
|
|
|
labels = find_labels(statements, None)
|
|
|
|
updated: bool = False
|
|
|
|
def update_gotos(statement: Statement) -> Statement:
|
|
nonlocal updated
|
|
|
|
if isinstance(statement, GotoStatement):
|
|
if statement.location in labels:
|
|
return NullReturnStatement()
|
|
updated = True
|
|
return statement
|
|
|
|
statements = self.__walk(statements, update_gotos)
|
|
return statements, updated
|
|
|
|
def __eliminate_useless_returns(self, statements: Sequence[Statement]) -> Tuple[List[Statement], bool]:
|
|
# Go through and find returns that are on the "last" line. Basically, any
|
|
# return statement where the next statement is another return statement
|
|
# or the end of a function.
|
|
def find_returns(statements: Sequence[Statement], parent_next_statement: Statement) -> Set[NullReturnStatement]:
|
|
returns: Set[NullReturnStatement] = set()
|
|
|
|
for i in range(len(statements)):
|
|
cur_statement = statements[i]
|
|
next_statement = statements[i + 1] if (i < len(statements) - 1) else parent_next_statement
|
|
if (
|
|
isinstance(cur_statement, NullReturnStatement) and
|
|
isinstance(next_statement, NullReturnStatement)
|
|
):
|
|
returns.add(cur_statement)
|
|
|
|
elif isinstance(cur_statement, DoWhileStatement):
|
|
returns.update(find_returns(cur_statement.body, next_statement))
|
|
|
|
elif isinstance(cur_statement, IfStatement):
|
|
returns.update(find_returns(cur_statement.true_statements, next_statement))
|
|
returns.update(find_returns(cur_statement.false_statements, next_statement))
|
|
|
|
elif isinstance(cur_statement, SwitchStatement):
|
|
for case in cur_statement.cases:
|
|
returns.update(find_returns(case.statements, next_statement))
|
|
|
|
return returns
|
|
|
|
# Instead of an empty next statement, make up a return so we catch anything
|
|
# without needing multiple conditionals above.
|
|
returns = find_returns(statements, NullReturnStatement())
|
|
|
|
updated: bool = False
|
|
|
|
def remove_returns(statement: Statement) -> Optional[Statement]:
|
|
nonlocal updated
|
|
|
|
for removable in returns:
|
|
if removable is statement:
|
|
updated = True
|
|
return None
|
|
return statement
|
|
|
|
statements = self.__walk(statements, remove_returns)
|
|
return statements, updated
|
|
|
|
def __remove_useless_gotos(self, statements: Sequence[Statement]) -> Tuple[List[Statement], bool]:
|
|
# Go through and find gotos that point at the very next line and remove them.
|
|
# This can happen due to the way we analyze if statements.
|
|
statements = list(statements)
|
|
|
|
def find_goto_next_line(statements: Sequence[Statement], next_instruction: Statement) -> List[Statement]:
|
|
gotos: List[Statement] = []
|
|
|
|
for i in range(len(statements)):
|
|
cur_statement = statements[i]
|
|
next_statement = statements[i + 1] if (i < len(statements) - 1) else next_instruction
|
|
|
|
if (
|
|
isinstance(cur_statement, GotoStatement) and
|
|
isinstance(next_statement, DefineLabelStatement)
|
|
):
|
|
if cur_statement.location == next_statement.location:
|
|
gotos.append(cur_statement)
|
|
|
|
if (
|
|
isinstance(cur_statement, GotoStatement) and
|
|
isinstance(next_statement, GotoStatement)
|
|
):
|
|
if cur_statement.location == next_statement.location:
|
|
gotos.append(cur_statement)
|
|
|
|
elif isinstance(cur_statement, DoWhileStatement):
|
|
# Loops do not "flow" into the next line, they can only "break" to the next
|
|
# line. Goto of the next line has already been converted to a "break" statement.
|
|
gotos.extend(find_goto_next_line(cur_statement.body, NopStatement()))
|
|
|
|
elif isinstance(cur_statement, IfStatement):
|
|
# The next statement for both the if and else body is the next statement we have
|
|
# looked up, either the next statement in this group of statements, or the next
|
|
# statement in the parent.
|
|
gotos.extend(find_goto_next_line(cur_statement.true_statements, next_statement))
|
|
gotos.extend(find_goto_next_line(cur_statement.false_statements, next_statement))
|
|
|
|
elif isinstance(cur_statement, SwitchStatement):
|
|
# Switch cases do not "flow" into the next line, they flow into the next switch
|
|
# case. Only if they have a 'break' statement do they flow to the parent. So
|
|
# we check for this case, removing breaks when we find them (to simulate falling
|
|
# out of the execution to the parent) and setting the next statement to the next
|
|
# switch case first instruction when we don't. This isn't perfect, but eliminates
|
|
# a lot of gotos in practice.
|
|
cases = cur_statement.cases
|
|
|
|
def get_next_instruction(case: SwitchCase) -> Statement:
|
|
found = False
|
|
|
|
for newcase in cases:
|
|
if found:
|
|
# If we identified the case, grab the next statement
|
|
# from the next available case that has statements.
|
|
if case.statements:
|
|
return case.statements[0]
|
|
if newcase is case:
|
|
# We found our case, so the next case with statements
|
|
# is the one we care about.
|
|
found = True
|
|
|
|
# We failed to find our case, or we failed to find a case
|
|
# after ours with statements.
|
|
return NopStatement()
|
|
|
|
for case in cases:
|
|
if case.statements and isinstance(case.statements[-1], BreakStatement):
|
|
gotos.extend(find_goto_next_line(case.statements[:-1], next_statement))
|
|
else:
|
|
gotos.extend(find_goto_next_line(case.statements, get_next_instruction(case)))
|
|
|
|
return gotos
|
|
|
|
# Whether we made at least one substitution.
|
|
changed: bool = False
|
|
|
|
while True:
|
|
gotos = find_goto_next_line(statements, NopStatement())
|
|
if not gotos:
|
|
break
|
|
|
|
def remove_goto(statement: Statement) -> Optional[Statement]:
|
|
nonlocal changed
|
|
|
|
for goto in gotos:
|
|
if statement is goto:
|
|
changed = True
|
|
return None
|
|
return statement
|
|
|
|
statements = self.__walk(statements, remove_goto)
|
|
|
|
return statements, changed
|
|
|
|
def __eliminate_unused_labels(self, statements: Sequence[Statement]) -> Tuple[List[Statement], bool]:
|
|
# Go through and find labels that nothing is pointing at, and remove them.
|
|
locations: Set[int] = set()
|
|
|
|
def find_goto(statement: Statement) -> Statement:
|
|
if isinstance(statement, GotoStatement):
|
|
locations.add(statement.location)
|
|
return statement
|
|
|
|
self.__walk(statements, find_goto)
|
|
changed: bool = False
|
|
|
|
def remove_label(statement: Statement) -> Optional[Statement]:
|
|
nonlocal changed
|
|
|
|
if isinstance(statement, DefineLabelStatement):
|
|
if statement.location not in locations:
|
|
changed = True
|
|
return None
|
|
return statement
|
|
|
|
return self.__walk(statements, remove_label), changed
|
|
|
|
def __eliminate_useless_continues(self, statements: Sequence[Statement]) -> Tuple[List[Statement], bool]:
|
|
# Go through and find continues that are on the "last" line of a while. Basically, any
|
|
# continue statement where the next statement is another continue statement or the end
|
|
# of a loop.
|
|
def find_continues(statements: Sequence[Statement], parent_next_statement: Statement) -> Set[ContinueStatement]:
|
|
continues: Set[ContinueStatement] = set()
|
|
|
|
for i in range(len(statements)):
|
|
cur_statement = statements[i]
|
|
next_statement = statements[i + 1] if (i < len(statements) - 1) else parent_next_statement
|
|
if (
|
|
isinstance(cur_statement, ContinueStatement) and
|
|
isinstance(next_statement, ContinueStatement)
|
|
):
|
|
continues.add(cur_statement)
|
|
|
|
elif isinstance(cur_statement, DoWhileStatement):
|
|
# Clever hack, where we pretend the next value after the loop is a continue,
|
|
# because hitting the bottom of a loop is actually a continue.
|
|
continues.update(find_continues(cur_statement.body, ContinueStatement()))
|
|
|
|
elif isinstance(cur_statement, IfStatement):
|
|
continues.update(find_continues(cur_statement.true_statements, next_statement))
|
|
continues.update(find_continues(cur_statement.false_statements, next_statement))
|
|
|
|
elif isinstance(cur_statement, SwitchStatement):
|
|
for case in cur_statement.cases:
|
|
continues.update(find_continues(case.statements, next_statement))
|
|
|
|
return continues
|
|
|
|
# Instead of an empty next statement, make up a return because that's what
|
|
# falling off the end of execution means.
|
|
continues = find_continues(statements, NullReturnStatement())
|
|
|
|
updated: bool = False
|
|
|
|
def remove_continues(statement: Statement) -> Optional[Statement]:
|
|
nonlocal updated
|
|
|
|
for removable in continues:
|
|
if removable is statement:
|
|
updated = True
|
|
return None
|
|
return statement
|
|
|
|
statements = self.__walk(statements, remove_continues)
|
|
return statements, updated
|
|
|
|
def __eliminate_useless_breaks(self, statements: Sequence[Statement]) -> Tuple[List[Statement], bool]:
|
|
# Go through and find breaks that show up just before another break logically.
|
|
def find_breaks(statements: Sequence[Statement], parent_next_statement: Statement) -> Set[BreakStatement]:
|
|
breaks: Set[BreakStatement] = set()
|
|
|
|
for i in range(len(statements)):
|
|
cur_statement = statements[i]
|
|
next_statement = statements[i + 1] if (i < len(statements) - 1) else parent_next_statement
|
|
if (
|
|
isinstance(cur_statement, BreakStatement) and
|
|
isinstance(next_statement, BreakStatement)
|
|
):
|
|
breaks.add(cur_statement)
|
|
|
|
elif isinstance(cur_statement, DoWhileStatement):
|
|
# The next entry after a loop can be a break, as it applies to a different statement.
|
|
breaks.update(find_breaks(cur_statement.body, NopStatement()))
|
|
|
|
elif isinstance(cur_statement, IfStatement):
|
|
breaks.update(find_breaks(cur_statement.true_statements, next_statement))
|
|
breaks.update(find_breaks(cur_statement.false_statements, next_statement))
|
|
|
|
elif isinstance(cur_statement, SwitchStatement):
|
|
# The next entry after a switch can be a break, as it applies to a different statement.
|
|
for case in cur_statement.cases:
|
|
breaks.update(find_breaks(case.statements, NopStatement()))
|
|
|
|
return breaks
|
|
|
|
# Instead of an empty next statement, make up a return because that's what
|
|
# falling off the end of execution means.
|
|
breaks = find_breaks(statements, NullReturnStatement())
|
|
|
|
updated: bool = False
|
|
|
|
def remove_breaks(statement: Statement) -> Optional[Statement]:
|
|
nonlocal updated
|
|
|
|
for removable in breaks:
|
|
if removable is statement:
|
|
updated = True
|
|
return None
|
|
return statement
|
|
|
|
statements = self.__walk(statements, remove_breaks)
|
|
return statements, updated
|
|
|
|
def __is_math(self, expression: Expression, variable: str) -> bool:
|
|
if isinstance(expression, ArithmeticExpression):
|
|
# Okay, let's see if it is any sort of math.
|
|
if expression.op in {"+", "-", "*", "/"}:
|
|
# It is, let's see if one of the two sides contains the
|
|
# variable we care about.
|
|
left = None
|
|
try:
|
|
left = object_ref(expression.left, "")
|
|
except Exception:
|
|
pass
|
|
|
|
right = None
|
|
try:
|
|
right = object_ref(expression.right, "")
|
|
except Exception:
|
|
pass
|
|
|
|
return left == variable or right == variable
|
|
return False
|
|
|
|
def __get_increment_variable(self, statement: Statement) -> Optional[str]:
|
|
if isinstance(statement, SetMemberStatement):
|
|
if isinstance(statement.valueref, Expression):
|
|
if self.__is_math(statement.valueref, statement.code_equiv()):
|
|
return statement.code_equiv()
|
|
if isinstance(statement, StoreRegisterStatement):
|
|
if isinstance(statement.valueref, Expression):
|
|
if self.__is_math(statement.valueref, statement.code_equiv()):
|
|
return statement.code_equiv()
|
|
if isinstance(statement, SetVariableStatement):
|
|
if isinstance(statement.valueref, Expression):
|
|
if self.__is_math(statement.valueref, statement.code_equiv()):
|
|
return statement.code_equiv()
|
|
if isinstance(statement, SetLocalStatement):
|
|
if isinstance(statement.valueref, Expression):
|
|
if self.__is_math(statement.valueref, statement.code_equiv()):
|
|
return statement.code_equiv()
|
|
return None
|
|
|
|
def __get_assignment(self, statement: Statement) -> Any:
|
|
if isinstance(statement, SetMemberStatement):
|
|
return statement.valueref
|
|
if isinstance(statement, StoreRegisterStatement):
|
|
return statement.valueref
|
|
if isinstance(statement, SetVariableStatement):
|
|
return statement.valueref
|
|
if isinstance(statement, SetLocalStatement):
|
|
return statement.valueref
|
|
return None
|
|
|
|
def __extract_condition(self, possible_if: Statement, required_variable: Optional[str]) -> Tuple[Optional[IfExpr], List[Statement]]:
|
|
if isinstance(possible_if, IfStatement):
|
|
if len(possible_if.true_statements) == 1 and isinstance(possible_if.true_statements[0], BreakStatement):
|
|
# This is possibly a candidate, check the condition's variable usage.
|
|
if isinstance(possible_if.cond, IsUndefinedIf):
|
|
if required_variable is not None:
|
|
if_variable = None
|
|
try:
|
|
if_variable = object_ref(possible_if.cond.conditional, "")
|
|
except Exception:
|
|
pass
|
|
if required_variable != if_variable:
|
|
return None, []
|
|
return possible_if.cond, possible_if.false_statements
|
|
elif isinstance(possible_if.cond, IsBooleanIf):
|
|
if required_variable is not None:
|
|
if_variable = None
|
|
try:
|
|
if_variable = object_ref(possible_if.cond.conditional, "")
|
|
except Exception:
|
|
pass
|
|
if required_variable != if_variable:
|
|
return None, []
|
|
return possible_if.cond, possible_if.false_statements
|
|
elif isinstance(possible_if.cond, TwoParameterIf):
|
|
if required_variable is not None:
|
|
if_variable1 = None
|
|
try:
|
|
if_variable1 = object_ref(possible_if.cond.conditional1, "")
|
|
except Exception:
|
|
pass
|
|
if if_variable1 == required_variable:
|
|
return possible_if.cond, possible_if.false_statements
|
|
|
|
if_variable2 = None
|
|
try:
|
|
if_variable2 = object_ref(possible_if.cond.conditional2, "")
|
|
except Exception:
|
|
pass
|
|
if if_variable2 == required_variable:
|
|
return possible_if.cond.swap(), possible_if.false_statements
|
|
return possible_if.cond, possible_if.false_statements
|
|
return None, []
|
|
|
|
def __convert_loops(self, statements: Sequence[Statement]) -> Tuple[List[Statement], bool]:
|
|
# Convert any do {} while loops that resemble for statements into actual for statements.
|
|
# First, we need to hoist any increment to the actual end of the loop in case its in the
|
|
# last statement of some if/else condition. This isn't going to be perfectly accurate because
|
|
# there can be all sorts of bizarre for statements, but it should be good enough for most
|
|
# cases to make better code.
|
|
def convert_loops(statements: Sequence[Statement]) -> Tuple[List[Statement], bool]:
|
|
new_statements: List[Statement] = []
|
|
updated_statements: Dict[DoWhileStatement, DoWhileStatement] = {}
|
|
changed: bool = False
|
|
|
|
for i in range(len(statements)):
|
|
cur_statement = statements[i]
|
|
next_statement = statements[i + 1] if (i < len(statements) - 1) else None
|
|
|
|
if isinstance(cur_statement, IfStatement):
|
|
# Don't care about this, but we need to recursively walk its children.
|
|
cur_statement.true_statements, new_changed = convert_loops(cur_statement.true_statements)
|
|
changed = changed or new_changed
|
|
|
|
cur_statement.false_statements, new_changed = convert_loops(cur_statement.false_statements)
|
|
changed = changed or new_changed
|
|
|
|
new_statements.append(cur_statement)
|
|
|
|
elif isinstance(cur_statement, SwitchStatement):
|
|
for case in cur_statement.cases:
|
|
case.statements, new_changed = convert_loops(case.statements)
|
|
changed = changed or new_changed
|
|
new_statements.append(cur_statement)
|
|
|
|
elif isinstance(cur_statement, DoWhileStatement):
|
|
# If we addressed this statement, we should use the new statement instead.
|
|
for old, new in updated_statements.items():
|
|
if old is cur_statement:
|
|
cur_statement = new
|
|
break
|
|
|
|
if not isinstance(cur_statement, (ForStatement, WhileStatement)):
|
|
# This might be a candidate for white statement hoisting.
|
|
if len(cur_statement.body) > 0:
|
|
# Let's see if the first statement is an if statement with a break.
|
|
possible_cond, false_body = self.__extract_condition(cur_statement.body[0], None)
|
|
else:
|
|
possible_cond = None
|
|
|
|
if possible_cond is not None:
|
|
# This is a for statement. Let's convert it.
|
|
cur_statement = WhileStatement(
|
|
possible_cond.invert(),
|
|
# Drop the if statement, since we are incorporating it.
|
|
false_body + cur_statement.body[1:],
|
|
)
|
|
changed = True
|
|
|
|
# Need to recursively walk through and perform stuff on the body of this.
|
|
cur_statement.body, new_changed = convert_loops(cur_statement.body)
|
|
changed = changed or new_changed
|
|
|
|
new_statements.append(cur_statement)
|
|
elif (
|
|
isinstance(cur_statement, (SetMemberStatement, StoreRegisterStatement, SetVariableStatement, SetLocalStatement)) and
|
|
isinstance(next_statement, DoWhileStatement) and
|
|
not isinstance(next_statement, ForStatement)
|
|
):
|
|
# This is a possible conversion that hasn't been converted yet. Let's try to grab
|
|
# the increment variable.
|
|
if next_statement.body:
|
|
inc_variable = self.__get_increment_variable(next_statement.body[-1])
|
|
else:
|
|
inc_variable = None
|
|
|
|
# Now that we know what's being incremented, let's see if it matches our
|
|
# initializer.
|
|
inc_assignment = None
|
|
if inc_variable is not None and inc_variable != cur_statement.code_equiv():
|
|
# This doesn't match, so let's kill our reference.
|
|
inc_variable = None
|
|
else:
|
|
inc_assignment = self.__get_assignment(next_statement.body[-1])
|
|
|
|
if inc_variable is not None:
|
|
# This is a while statement previously converted, possibly due to
|
|
# an incomplete increment variable hoisting. We can further convert
|
|
# it to a for statement, but we need the conditional.
|
|
if isinstance(next_statement, WhileStatement):
|
|
possible_cond = next_statement.cond.invert()
|
|
if isinstance(possible_cond, TwoParameterIf):
|
|
try:
|
|
if_variable = object_ref(possible_cond.conditional2, "")
|
|
if inc_variable == if_variable:
|
|
possible_cond = possible_cond.swap()
|
|
except Exception:
|
|
pass
|
|
false_body = []
|
|
else:
|
|
# Let's see if the first statement is an if statement with a break.
|
|
possible_cond, false_body = self.__extract_condition(next_statement.body[0], inc_variable)
|
|
else:
|
|
possible_cond = None
|
|
|
|
if inc_variable is not None and possible_cond is not None:
|
|
# This is a for statement. Let's convert it.
|
|
updated_statements[next_statement] = ForStatement(
|
|
inc_variable,
|
|
self.__get_assignment(cur_statement),
|
|
possible_cond.invert(),
|
|
inc_assignment,
|
|
# Drop the increment and the if statement, since we are incorporating them.
|
|
false_body + (next_statement.body[:-1] if isinstance(next_statement, WhileStatement) else next_statement.body[1:-1]),
|
|
local=isinstance(cur_statement, SetLocalStatement),
|
|
)
|
|
changed = True
|
|
else:
|
|
new_statements.append(cur_statement)
|
|
else:
|
|
# Don't care about this one, just append it.
|
|
new_statements.append(cur_statement)
|
|
|
|
return new_statements, changed
|
|
|
|
return convert_loops(statements)
|
|
|
|
def __swap_empty_ifs(self, statements: Sequence[Statement]) -> Tuple[List[Statement], bool]:
|
|
# Get rid of empty if statements. If statements with empty if bodies and nonempty
|
|
# else bodies will also be swapped.
|
|
changed: bool = False
|
|
updated: bool = False
|
|
|
|
def swap_empty_ifs(statement: Statement) -> Optional[Statement]:
|
|
nonlocal changed
|
|
nonlocal updated
|
|
|
|
if isinstance(statement, IfStatement):
|
|
if statement.false_statements and (not statement.true_statements):
|
|
# Swap this, invert the conditional
|
|
changed = True
|
|
updated = True
|
|
return IfStatement(
|
|
statement.cond.invert(),
|
|
statement.false_statements,
|
|
statement.true_statements,
|
|
)
|
|
elif (not statement.true_statements) and (not statement.false_statements):
|
|
# Drop the if, it has no body.
|
|
changed = True
|
|
updated = True
|
|
return None
|
|
return statement
|
|
|
|
while True:
|
|
changed = False
|
|
statements = self.__walk(statements, swap_empty_ifs)
|
|
if not changed:
|
|
return statements, updated
|
|
|
|
def __swap_ugly_ifexprs(self, statements: Sequence[Statement]) -> Tuple[List[Statement], bool]:
|
|
# Swap if expressions that have a constant on the LHS.
|
|
changed: bool = False
|
|
|
|
def swap_ugly_ifs(statement: Statement) -> Optional[Statement]:
|
|
nonlocal changed
|
|
|
|
if isinstance(statement, IfStatement):
|
|
if isinstance(statement.cond, TwoParameterIf):
|
|
if (
|
|
isinstance(statement.cond.conditional1, (str, int, bool, float, StringConstant)) and
|
|
isinstance(statement.cond.conditional2, Expression) and
|
|
not isinstance(statement.cond.conditional2, StringConstant)
|
|
):
|
|
changed = True
|
|
return IfStatement(
|
|
statement.cond.swap(),
|
|
statement.true_statements,
|
|
statement.false_statements,
|
|
)
|
|
return statement
|
|
|
|
statements = self.__walk(statements, swap_ugly_ifs)
|
|
return statements, changed
|
|
|
|
def __drop_unneeded_else(self, statements: Sequence[Statement]) -> Tuple[List[Statement], bool]:
|
|
# If an if has an else, but the last line of the if is a break/continue/return/throw/goto
|
|
# then the else body doesn't need to exist, so hoist it up into the parent. If the false
|
|
# statement also has an exit condition, don't drop it for asthetics.
|
|
def update_ifs(statements: Sequence[Statement]) -> Tuple[List[Statement], bool]:
|
|
new_statements: List[Statement] = []
|
|
changed: bool = False
|
|
|
|
for statement in statements:
|
|
if isinstance(statement, IfStatement):
|
|
if (
|
|
statement.true_statements and
|
|
statement.false_statements and
|
|
isinstance(
|
|
statement.true_statements[-1],
|
|
(ReturnStatement, NullReturnStatement, ThrowStatement, GotoStatement),
|
|
) and
|
|
not isinstance(
|
|
statement.false_statements[-1],
|
|
(ReturnStatement, NullReturnStatement, ThrowStatement, GotoStatement),
|
|
)
|
|
):
|
|
# We need to walk both halves still, but once we're done, swap the true/false
|
|
# locations of the statements, so that the false value can be eliminated at
|
|
# some later optimization stage.
|
|
changed = True
|
|
false_statements, _ = update_ifs(statement.true_statements)
|
|
true_statements, _ = update_ifs(statement.false_statements)
|
|
|
|
# Now, append the if statement, and follow up with the body.
|
|
statement.true_statements = true_statements
|
|
statement.false_statements = false_statements
|
|
statement.cond = statement.cond.invert()
|
|
new_statements.append(statement)
|
|
elif (
|
|
statement.true_statements and
|
|
statement.false_statements and
|
|
isinstance(
|
|
statement.true_statements[-1],
|
|
(BreakStatement, ContinueStatement),
|
|
) and
|
|
not isinstance(
|
|
statement.false_statements[-1],
|
|
(BreakStatement, ContinueStatement),
|
|
)
|
|
):
|
|
# We need to walk both halves still, but once we're done, hoist the false
|
|
# statements up to our level.
|
|
statement.true_statements, new_changed = update_ifs(statement.true_statements)
|
|
changed = changed or new_changed
|
|
|
|
new_false_statements, new_changed = update_ifs(statement.false_statements)
|
|
changed = changed or new_changed
|
|
statement.false_statements = []
|
|
|
|
# Now, append the if statement, and follow up with the body.
|
|
new_statements.append(statement)
|
|
new_statements.extend(new_false_statements)
|
|
else:
|
|
statement.true_statements, new_changed = update_ifs(statement.true_statements)
|
|
changed = changed or new_changed
|
|
|
|
statement.false_statements, new_changed = update_ifs(statement.false_statements)
|
|
changed = changed or new_changed
|
|
|
|
new_statements.append(statement)
|
|
elif isinstance(statement, DoWhileStatement):
|
|
# Need to recursively walk through and perform stuff on the body of this.
|
|
statement.body, new_changed = update_ifs(statement.body)
|
|
changed = changed or new_changed
|
|
|
|
new_statements.append(statement)
|
|
elif isinstance(statement, SwitchStatement):
|
|
# Need to recursively walk through and perform stuff on the cases of this.
|
|
for case in statement.cases:
|
|
case.statements, new_changed = update_ifs(case.statements)
|
|
changed = changed or new_changed
|
|
|
|
new_statements.append(statement)
|
|
else:
|
|
# Don't care about this one, just append it.
|
|
new_statements.append(statement)
|
|
|
|
return new_statements, changed
|
|
|
|
return update_ifs(statements)
|
|
|
|
def __gather_flow(self, parent_conditional: IfExpr, statements: Sequence[Statement]) -> Tuple[IfExpr, Dict[int, IfExpr], List[Tuple[IfExpr, Statement]]]:
|
|
flowed_statements: List[Tuple[IfExpr, Statement]] = []
|
|
running_conditional: IfExpr = parent_conditional
|
|
gotos: Dict[int, IfExpr] = {}
|
|
|
|
def merge_gotos(location: int, conditional: IfExpr) -> None:
|
|
if location in gotos:
|
|
gotos[location] = OrIf(gotos[location], conditional).simplify()
|
|
else:
|
|
gotos[location] = conditional
|
|
|
|
for statement in statements:
|
|
if isinstance(statement, IfStatement):
|
|
true_cond, true_gotos, true_statements = self.__gather_flow(AndIf(running_conditional, statement.cond).simplify(), statement.true_statements)
|
|
false_cond, false_gotos, false_statements = self.__gather_flow(AndIf(running_conditional, statement.cond.invert()).simplify(), statement.false_statements)
|
|
|
|
flowed_statements.append((running_conditional, statement))
|
|
flowed_statements.extend(true_statements)
|
|
flowed_statements.extend(false_statements)
|
|
for line, goto in true_gotos.items():
|
|
merge_gotos(line, goto)
|
|
for line, goto in false_gotos.items():
|
|
merge_gotos(line, goto)
|
|
|
|
if true_cond == IsBooleanIf(False) and false_cond == IsBooleanIf(False):
|
|
# Both conditionals exited.
|
|
running_conditional = IsBooleanIf(False)
|
|
elif true_cond != IsBooleanIf(False) and false_cond == IsBooleanIf(False):
|
|
# The subsequent statements are only parented by the true conditional.
|
|
running_conditional = AndIf(true_cond, running_conditional).simplify()
|
|
elif true_cond == IsBooleanIf(False) and false_cond != IsBooleanIf(False):
|
|
# The subsequent statements are only parented by the false conditional.
|
|
running_conditional = AndIf(false_cond, running_conditional).simplify()
|
|
else:
|
|
# We are parented by either of the true/false cases.
|
|
running_conditional = OrIf(AndIf(true_cond, running_conditional), AndIf(false_cond, running_conditional)).simplify()
|
|
else:
|
|
flowed_statements.append((running_conditional, statement))
|
|
if isinstance(statement, (NullReturnStatement, ReturnStatement, ThrowStatement, BreakStatement, ContinueStatement)):
|
|
# We shouldn't find any more statements after this, unless there's a label.
|
|
running_conditional = IsBooleanIf(False)
|
|
elif isinstance(statement, GotoStatement):
|
|
# The statements after this start from scratch, we shouldn't find any more
|
|
# statements after this unless its followed by a label.
|
|
merge_gotos(statement.location, running_conditional)
|
|
running_conditional = IsBooleanIf(False)
|
|
|
|
if gotos:
|
|
goto_conditional: IfExpr = IsBooleanIf(False)
|
|
for i, (cond, stmt) in enumerate(flowed_statements):
|
|
if isinstance(stmt, DefineLabelStatement):
|
|
# This code might not be fully optimized yet and this might be a label
|
|
# to which there is no goto.
|
|
if stmt.location not in gotos:
|
|
continue
|
|
goto_conditional = OrIf(gotos[stmt.location], goto_conditional).simplify()
|
|
flowed_statements[i] = (OrIf(cond, goto_conditional).simplify(), stmt)
|
|
if isinstance(stmt, (NullReturnStatement, ReturnStatement, ThrowStatement, GotoStatement, BreakStatement, ContinueStatement)):
|
|
# The current running conditional no longer applies after this statement.
|
|
goto_conditional = IsBooleanIf(False)
|
|
|
|
return running_conditional, gotos, flowed_statements
|
|
|
|
def __gather_candidates(self, statements: Sequence[Statement]) -> List[IfStatement]:
|
|
candidates: List[IfStatement] = []
|
|
|
|
for statement in statements:
|
|
# We intentionally ignore while loops and switches here, because we don't collapse gotos
|
|
# across while loop/switch boundaries. We will end up running this code again over the
|
|
# contents of any while loop or switch when we call __rearrange_compound_ifs on its body.
|
|
if isinstance(statement, IfStatement):
|
|
if statement.true_statements and not statement.false_statements:
|
|
candidates.append(statement)
|
|
|
|
candidates.extend(self.__gather_candidates(statement.true_statements))
|
|
candidates.extend(self.__gather_candidates(statement.false_statements))
|
|
|
|
return candidates
|
|
|
|
def __hoist_compound_ifs(
|
|
self,
|
|
parent_conditional: IfExpr,
|
|
statements: Sequence[Statement],
|
|
candidates: List[IfStatement],
|
|
flow: List[Tuple[IfExpr, Statement]],
|
|
) -> Tuple[List[Statement], bool]:
|
|
stmt_to_flow: Dict[Statement, IfExpr] = {f[1]: f[0] for f in flow}
|
|
paths: Set[IfExpr] = {f[0] for f in flow}
|
|
new_statements: List[Statement] = []
|
|
changed: bool = False
|
|
|
|
def get_child_candidate(statement: IfStatement) -> Optional[IfStatement]:
|
|
# If the current if statement has any false statements, or its only true
|
|
# statement isn't a child if, then there is no child candidate to add.
|
|
if statement.false_statements or len(statement.true_statements) != 1:
|
|
return None
|
|
|
|
# Now, see if the child candidate is in our list of valid candidates.
|
|
for candidate in candidates:
|
|
if candidate is statement.true_statements[0]:
|
|
return candidate
|
|
|
|
# We didn't find a candidate.
|
|
return None
|
|
|
|
def get_compound_if(statements: List[IfStatement]) -> IfExpr:
|
|
# Start with "True", since anding with true is the identity.
|
|
combined: IfExpr = IsBooleanIf(True)
|
|
|
|
# Each if statement contributes to the full if.
|
|
for statement in statements:
|
|
combined = AndIf(combined, statement.cond)
|
|
|
|
# Finally, simplify it for ease of comparison.
|
|
return combined.simplify()
|
|
|
|
def get_candidate_group(conditional: IfExpr, statement: IfStatement) -> Optional[List[IfStatement]]:
|
|
candidate_statements: List[IfStatement] = [statement]
|
|
|
|
if candidates and (not statement.false_statements) and len(statement.true_statements) == 1:
|
|
while True:
|
|
# First, is the current combination a valid combined or statement?
|
|
candidate_true_expr = get_compound_if(candidate_statements)
|
|
candidate_false_expr = candidate_true_expr.invert().simplify()
|
|
false_cond = AndIf(conditional, candidate_false_expr).simplify()
|
|
|
|
if false_cond in paths:
|
|
if len(candidate_statements) < 2:
|
|
return None
|
|
else:
|
|
return candidate_statements
|
|
|
|
# Now, try to add on the next in the layer.
|
|
new_candidate = get_child_candidate(candidate_statements[-1])
|
|
if new_candidate is None:
|
|
# There are no more candidates to try to add.
|
|
return None
|
|
|
|
# Add this to our consideration, retry the logic test.
|
|
candidate_statements.append(new_candidate)
|
|
return None
|
|
|
|
i = 0
|
|
while i < len(statements):
|
|
statement = statements[i]
|
|
|
|
if isinstance(statement, IfStatement):
|
|
# See if this is a compound if pattern.
|
|
candidate_statements = get_candidate_group(parent_conditional, statement)
|
|
if candidate_statements is None:
|
|
# Move past this statement, we don't care about it.
|
|
i += 1
|
|
|
|
# However, check its children for any compound ifs that could
|
|
# be collapsed.
|
|
true_statements, child_changed = self.__hoist_compound_ifs(
|
|
AndIf(parent_conditional, statement.cond).simplify(),
|
|
statement.true_statements,
|
|
candidates,
|
|
flow,
|
|
)
|
|
changed = changed or child_changed
|
|
false_statements, child_changed = self.__hoist_compound_ifs(
|
|
AndIf(parent_conditional, statement.cond.invert()).simplify(),
|
|
statement.false_statements,
|
|
candidates,
|
|
flow,
|
|
)
|
|
changed = changed or child_changed
|
|
|
|
new_statements.append(
|
|
IfStatement(
|
|
statement.cond,
|
|
true_statements,
|
|
false_statements,
|
|
)
|
|
)
|
|
else:
|
|
true_expr = get_compound_if(candidate_statements)
|
|
false_expr = true_expr.invert().simplify()
|
|
true_cond = AndIf(parent_conditional, true_expr).simplify()
|
|
false_cond = AndIf(parent_conditional, false_expr).simplify()
|
|
|
|
hoist_after: List[Statement] = []
|
|
true_statements = []
|
|
false_statements = []
|
|
|
|
hit_after = False
|
|
for stmt in candidate_statements[-1].true_statements:
|
|
if isinstance(stmt, DefineLabelStatement):
|
|
hit_after = True
|
|
|
|
# We know that this is going to have a jump into it at some point.
|
|
# So, gather everything until the jump as the true statements, and
|
|
# everything after the jump as the hoisted statements.
|
|
if hit_after:
|
|
hoist_after.append(stmt)
|
|
else:
|
|
true_statements.append(stmt)
|
|
|
|
# Handled this statement, look at the following statements to figure out
|
|
# which ones are part of the false portion.
|
|
i += 1
|
|
while i < len(statements):
|
|
statement = statements[i]
|
|
if stmt_to_flow[statement] == parent_conditional:
|
|
# In some alternate cases, this can be a label that is jumped to
|
|
# by the true case. This should only happen if we never ran into
|
|
# a hoisted after statement above.
|
|
if hit_after:
|
|
raise Exception("Logic error!")
|
|
break
|
|
|
|
false_statements.append(statement)
|
|
i += 1
|
|
|
|
if isinstance(statement, (NullReturnStatement, ReturnStatement, ThrowStatement, GotoStatement, BreakStatement, ContinueStatement)):
|
|
break
|
|
|
|
# Now, add this new if statement, but make sure to gather up
|
|
# any compound if statements in any of its true/false children
|
|
# that we just updated.
|
|
true_statements, child_changed = self.__hoist_compound_ifs(
|
|
true_cond,
|
|
true_statements,
|
|
candidates,
|
|
flow,
|
|
)
|
|
changed = changed or child_changed
|
|
false_statements, child_changed = self.__hoist_compound_ifs(
|
|
false_cond,
|
|
false_statements,
|
|
candidates,
|
|
flow,
|
|
)
|
|
changed = changed or child_changed
|
|
|
|
# Due to the way decompiling works, negating the if produces
|
|
# more pleasing decompilation. This is due to the fact that
|
|
# these are compound or statements and the easiest way for a
|
|
# compiler to generate code is to do an and of the negation.
|
|
new_statements.append(
|
|
IfStatement(
|
|
true_expr.invert(),
|
|
false_statements,
|
|
true_statements,
|
|
)
|
|
)
|
|
|
|
# Finally, add the statements we hoisted after the compound
|
|
# expression, being sure to recurse into any if statements therein.
|
|
for stmt in hoist_after:
|
|
if isinstance(stmt, IfStatement):
|
|
true_statements, child_changed = self.__hoist_compound_ifs(
|
|
AndIf(parent_conditional, stmt.cond).simplify(),
|
|
stmt.true_statements,
|
|
candidates,
|
|
flow,
|
|
)
|
|
changed = changed or child_changed
|
|
false_statements, child_changed = self.__hoist_compound_ifs(
|
|
AndIf(parent_conditional, stmt.cond.invert()).simplify(),
|
|
stmt.false_statements,
|
|
candidates,
|
|
flow,
|
|
)
|
|
changed = changed or child_changed
|
|
new_statements.append(
|
|
IfStatement(
|
|
stmt.cond,
|
|
true_statements,
|
|
false_statements,
|
|
)
|
|
)
|
|
elif isinstance(stmt, DoWhileStatement):
|
|
new_body, child_changed = self.__rearrange_compound_ifs(stmt.body)
|
|
changed = changed or child_changed
|
|
stmt.body = new_body
|
|
new_statements.append(stmt)
|
|
else:
|
|
new_statements.append(stmt)
|
|
|
|
# We hoisted a compound if, so report a change.
|
|
changed = True
|
|
elif isinstance(statement, DoWhileStatement):
|
|
statement.body, child_changed = self.__rearrange_compound_ifs(statement.body)
|
|
changed = changed or child_changed
|
|
new_statements.append(statement)
|
|
i += 1
|
|
elif isinstance(statement, SwitchStatement):
|
|
for case in statement.cases:
|
|
case.statements, new_changed = self.__rearrange_compound_ifs(case.statements)
|
|
changed = changed or new_changed
|
|
new_statements.append(statement)
|
|
i += 1
|
|
else:
|
|
new_statements.append(statement)
|
|
i += 1
|
|
|
|
return new_statements, changed
|
|
|
|
def __rearrange_compound_ifs(self, statements: Sequence[Statement]) -> Tuple[List[Statement], bool]:
|
|
candidates = self.__gather_candidates(statements)
|
|
if candidates:
|
|
_, _, flow = self.__gather_flow(IsBooleanIf(True), statements)
|
|
else:
|
|
flow = []
|
|
|
|
return self.__hoist_compound_ifs(IsBooleanIf(True), statements, candidates, flow)
|
|
|
|
def __convert_switches(self, statements: Sequence[Statement]) -> Tuple[List[Statement], bool]:
|
|
# Convert any cascading if statements comparing the same variable/register against a series
|
|
# of constants into switch statements. This relies on a previous run of anohter optimizer
|
|
# that ensures that variables show up on the LHS of if statements.
|
|
batches: List[Tuple[IfStatement, ...]] = []
|
|
processed: Set[Statement] = set()
|
|
changed: bool = False
|
|
|
|
def get_lhs(statement: IfStatement) -> Optional[Expression]:
|
|
if not isinstance(statement.cond, TwoParameterIf):
|
|
return None
|
|
if statement.cond.comp not in {TwoParameterIf.EQUALS, TwoParameterIf.NOT_EQUALS, TwoParameterIf.STRICT_EQUALS, TwoParameterIf.STRICT_NOT_EQUALS}:
|
|
return None
|
|
if not isinstance(statement.cond.conditional1, (Variable, Register, Member)):
|
|
return None
|
|
# We intentionally widen this to allow variables, since there are a lot of places in
|
|
# various code that uses variables as constants. We made up the entire language we are
|
|
# decompiling into so we can allow this for readability.
|
|
if not isinstance(statement.cond.conditional2, (str, bool, float, int, StringConstant, Variable)):
|
|
return None
|
|
|
|
return statement.cond.conditional1
|
|
|
|
def get_next_candidate(statements: List[Statement], lhs: Expression) -> Optional[IfStatement]:
|
|
if len(statements) != 1:
|
|
return None
|
|
statement = statements[0]
|
|
if not isinstance(statement, IfStatement):
|
|
return None
|
|
new_lhs = get_lhs(statement)
|
|
if new_lhs is None:
|
|
return None
|
|
if str(lhs) != str(new_lhs):
|
|
return None
|
|
return statement
|
|
|
|
def find_if_batches(statement: Statement) -> Optional[Statement]:
|
|
nonlocal batches
|
|
nonlocal processed
|
|
|
|
if isinstance(statement, IfStatement):
|
|
# Don't sum up statements already grabbed by a previous invocation.
|
|
if statement in processed:
|
|
return statement
|
|
|
|
# First, detect if this is a two parameter if statement comparing a recognized LHS
|
|
# against some constant (number, integer, float, bool, string constant).
|
|
lhs = get_lhs(statement)
|
|
if lhs is None:
|
|
return statement
|
|
|
|
# Now, collect as many if statement children as we can.
|
|
batch: List[IfStatement] = [statement]
|
|
while True:
|
|
candidate = get_next_candidate(batch[-1].true_statements, lhs)
|
|
if candidate:
|
|
batch.append(candidate)
|
|
continue
|
|
candidate = get_next_candidate(batch[-1].false_statements, lhs)
|
|
if candidate:
|
|
batch.append(candidate)
|
|
continue
|
|
break
|
|
|
|
if len(batch) > 1:
|
|
# Found one!
|
|
batches.append(tuple(batch))
|
|
processed.update(batch)
|
|
|
|
return statement
|
|
|
|
def has_break(statements: Sequence[Statement]) -> bool:
|
|
# We intentionally ignore switch/while/for statements here, because
|
|
# if there is a break inside one of those, it applies to that statement.
|
|
# We only want to know if there are any break statements in our children.
|
|
|
|
for statement in statements:
|
|
if isinstance(statement, BreakStatement):
|
|
# This is a break statement.
|
|
return True
|
|
elif isinstance(statement, IfStatement):
|
|
# Need to check the true and false cases.
|
|
if has_break(statement.true_statements):
|
|
return True
|
|
if has_break(statement.false_statements):
|
|
return True
|
|
|
|
# We found no break statements, we're solid.
|
|
return False
|
|
|
|
def replace_if_with_switch(statement: Statement) -> Optional[Statement]:
|
|
nonlocal changed
|
|
nonlocal batches
|
|
nonlocal processed
|
|
|
|
if isinstance(statement, IfStatement):
|
|
if statement not in processed:
|
|
return statement
|
|
|
|
# This is an if statement we should try to handle.
|
|
our_batches = [b for b in batches if b[0] is statement]
|
|
batches = [b for b in batches if b[0] is not statement]
|
|
|
|
if len(our_batches) != 1:
|
|
raise Exception("Logic error!")
|
|
batch = set(our_batches[0])
|
|
new_batch: List[IfStatement] = []
|
|
processed.difference_update(batch)
|
|
|
|
# First, make sure all the if entries are an equals.
|
|
def swap_not_equals(statement: Statement) -> Optional[Statement]:
|
|
nonlocal batch
|
|
nonlocal new_batch
|
|
|
|
if isinstance(statement, IfStatement):
|
|
if statement not in batch:
|
|
return statement
|
|
# We know this is correct, it wouldn't be in our list otherwise.
|
|
cond = cast(TwoParameterIf, statement.cond)
|
|
|
|
# If its already switched, leave it alone.
|
|
if cond.comp in {TwoParameterIf.EQUALS, TwoParameterIf.STRICT_EQUALS}:
|
|
new_batch.append(statement)
|
|
return statement
|
|
|
|
new_statement = IfStatement(
|
|
statement.cond.invert(),
|
|
statement.false_statements,
|
|
statement.true_statements,
|
|
)
|
|
new_batch.append(new_statement)
|
|
return new_statement
|
|
return statement
|
|
|
|
changed_statements = self.__walk([statement], swap_not_equals)
|
|
if len(changed_statements) != 1:
|
|
raise Exception("Logic error!")
|
|
changed_statement = changed_statements[0]
|
|
if not isinstance(changed_statement, IfStatement):
|
|
raise Exception("Logic error!")
|
|
|
|
# Now, make sure this is actually a switch pattern.
|
|
cur_statement: Statement = changed_statement
|
|
cases: List[SwitchCase] = []
|
|
for i in range(len(new_batch)):
|
|
if new_batch[i] is not cur_statement:
|
|
raise Exception("Logic error!")
|
|
|
|
if not isinstance(cur_statement, IfStatement):
|
|
# This isn't even an if statement. We should never hit
|
|
# this but the type checker wants to be happy.
|
|
return statement
|
|
if has_break(cur_statement.true_statements) or has_break(cur_statement.false_statements):
|
|
# This code already uses a 'break' statement. If we stuck it
|
|
# in a switch, it would change the sematics of this statement
|
|
# so we have no choice but to ignore this.
|
|
return statement
|
|
|
|
if i < (len(new_batch) - 1):
|
|
# We dont check for the final case, since this will include
|
|
# the 'default' case in the else body.
|
|
if len(cur_statement.false_statements) != 1:
|
|
# This can't be a switch, it needs to be an embedded if.
|
|
return statement
|
|
if cur_statement.false_statements[0] is not new_batch[i + 1]:
|
|
# This can't be a switch, it needs to contain the nested
|
|
# if we identified.
|
|
return statement
|
|
cond = new_batch[i].cond
|
|
if not isinstance(cond, TwoParameterIf):
|
|
# This is purely to make the type checker happy, it should
|
|
# never be hit.
|
|
return statement
|
|
|
|
cases.append(
|
|
SwitchCase(
|
|
cond.conditional2,
|
|
[
|
|
*new_batch[i].true_statements,
|
|
BreakStatement(),
|
|
],
|
|
)
|
|
)
|
|
|
|
# This is still in the running
|
|
cur_statement = cur_statement.false_statements[0]
|
|
else:
|
|
cond = new_batch[i].cond
|
|
if not isinstance(cond, TwoParameterIf):
|
|
# This is purely to make the type checker happy, it should
|
|
# never be hit.
|
|
return statement
|
|
|
|
cases.append(
|
|
SwitchCase(
|
|
cond.conditional2,
|
|
[
|
|
*new_batch[i].true_statements,
|
|
BreakStatement(),
|
|
],
|
|
)
|
|
)
|
|
if new_batch[i].false_statements:
|
|
# Only append a default case if there are any statements
|
|
# in the last false body.
|
|
cases.append(
|
|
SwitchCase(
|
|
None,
|
|
[
|
|
*new_batch[i].false_statements,
|
|
BreakStatement(),
|
|
],
|
|
)
|
|
)
|
|
|
|
cond = new_batch[0].cond
|
|
if not isinstance(cond, TwoParameterIf):
|
|
# This is purely to make the type checker happy, it should
|
|
# never be hit.
|
|
return statement
|
|
|
|
changed = True
|
|
return SwitchStatement(
|
|
cond.conditional1,
|
|
cases,
|
|
)
|
|
return statement
|
|
|
|
# First, identify any if statements that should be included in a batch.
|
|
self.__walk(statements, find_if_batches)
|
|
|
|
# Now, replace them with switch statements.
|
|
statements = self.__walk(statements, replace_if_with_switch)
|
|
return statements, changed
|
|
|
|
def __convert_if_gotos(self, statements: Sequence[Statement]) -> Tuple[List[Statement], bool]:
|
|
# Find if statements in the middle of a chunk of code whose last true statement is
|
|
# a return/goto/throw. Take all the following statements and put them in the if
|
|
# statement's false clause. We do this because it allows us to recognize other
|
|
# optimizations we could do on the code, including switch folding and redundant
|
|
# statement eliminiation. It also often helps get rid of gotos that were only used
|
|
# to skip the false path of an if statement.
|
|
new_statements: List[Statement] = []
|
|
changed: bool = False
|
|
|
|
i = 0
|
|
while i < len(statements):
|
|
cur_statement = statements[i]
|
|
|
|
if isinstance(cur_statement, IfStatement):
|
|
if (
|
|
cur_statement.true_statements and
|
|
not cur_statement.false_statements and
|
|
isinstance(cur_statement.true_statements[-1], (NullReturnStatement, ReturnStatement, ThrowStatement, GotoStatement))
|
|
):
|
|
# This is a candidate! Take all following statements up until the possible label we jump to and
|
|
# make them into the false chunk.
|
|
last_true_statement = cur_statement.true_statements[-1]
|
|
stop_at_label: Optional[int] = None
|
|
if isinstance(last_true_statement, GotoStatement):
|
|
stop_at_label = last_true_statement.location
|
|
|
|
# We skip past this statement because its the one we're updating.
|
|
i += 1
|
|
|
|
false_statements: List[Statement] = []
|
|
while i < len(statements):
|
|
false_statement = statements[i]
|
|
if stop_at_label is not None and isinstance(false_statement, DefineLabelStatement):
|
|
if stop_at_label == false_statement.location:
|
|
# Exit early, the rest of the code including this
|
|
# label is not part of the else case.
|
|
break
|
|
|
|
false_statements.append(false_statement)
|
|
i += 1
|
|
|
|
# Add all of these to the false case.
|
|
if false_statements:
|
|
cur_statement.false_statements = false_statements
|
|
changed = True
|
|
else:
|
|
# Skip past this statement, we have nothing to do aside from walk its children.
|
|
i += 1
|
|
|
|
# Regardless of whether we modified the if statement, recurse down its true and false path.
|
|
cur_statement.true_statements, new_changed = self.__convert_if_gotos(cur_statement.true_statements)
|
|
changed = changed or new_changed
|
|
|
|
cur_statement.false_statements, new_changed = self.__convert_if_gotos(cur_statement.false_statements)
|
|
changed = changed or new_changed
|
|
|
|
new_statements.append(cur_statement)
|
|
|
|
elif isinstance(cur_statement, SwitchStatement):
|
|
for case in cur_statement.cases:
|
|
case.statements, new_changed = self.__convert_if_gotos(case.statements)
|
|
changed = changed or new_changed
|
|
new_statements.append(cur_statement)
|
|
i += 1
|
|
|
|
elif isinstance(cur_statement, DoWhileStatement):
|
|
cur_statement.body, new_changed = self.__convert_if_gotos(cur_statement.body)
|
|
changed = changed or new_changed
|
|
|
|
new_statements.append(cur_statement)
|
|
i += 1
|
|
|
|
else:
|
|
new_statements.append(cur_statement)
|
|
i += 1
|
|
|
|
return new_statements, changed
|
|
|
|
def __convert_switch_gotos(self, statements: Sequence[Statement]) -> Tuple[List[Statement], bool]:
|
|
# Go through and find switch cases that goto the next line in the switch, replacing those
|
|
# with break statements.
|
|
def find_gotos(statements: Sequence[Statement], parent_next_statement: Statement, goto_next_statement: Statement) -> Set[GotoStatement]:
|
|
gotos: Set[GotoStatement] = set()
|
|
|
|
for i in range(len(statements)):
|
|
cur_statement = statements[i]
|
|
next_statement = statements[i + 1] if (i < len(statements) - 1) else parent_next_statement
|
|
|
|
if (
|
|
isinstance(cur_statement, GotoStatement) and
|
|
isinstance(goto_next_statement, DefineLabelStatement) and
|
|
cur_statement.location == goto_next_statement.location
|
|
):
|
|
# We are jumping to a location where we could insert a break.
|
|
gotos.add(cur_statement)
|
|
|
|
elif isinstance(cur_statement, DoWhileStatement):
|
|
# We don't want to track gotos into while loops because replacing one of
|
|
# these with a break would change the flow.
|
|
gotos.update(find_gotos(cur_statement.body, next_statement, NopStatement()))
|
|
|
|
elif isinstance(cur_statement, IfStatement):
|
|
gotos.update(find_gotos(cur_statement.true_statements, next_statement, goto_next_statement))
|
|
gotos.update(find_gotos(cur_statement.false_statements, next_statement, goto_next_statement))
|
|
|
|
elif isinstance(cur_statement, SwitchStatement):
|
|
# The next entry after this switch is what we're interested in, so pass it
|
|
# as the goto next statement. This is the only reason we need to track this.
|
|
# We don't care about the semantic next statement for the purposes of this
|
|
# call, so just set it as a NOP.
|
|
for case in cur_statement.cases:
|
|
gotos.update(find_gotos(case.statements, NopStatement(), next_statement))
|
|
|
|
return gotos
|
|
|
|
# Instead of an empty next statement, make up a return because that's what
|
|
# falling off the end of execution means.
|
|
gotos = find_gotos(statements, NullReturnStatement(), NullReturnStatement())
|
|
|
|
updated: bool = False
|
|
|
|
def remove_gotos(statement: Statement) -> Optional[Statement]:
|
|
nonlocal updated
|
|
|
|
for removable in gotos:
|
|
if removable is statement:
|
|
updated = True
|
|
return BreakStatement()
|
|
return statement
|
|
|
|
statements = self.__walk(statements, remove_gotos)
|
|
return statements, updated
|
|
|
|
def _optimize_code(self, statements: Sequence[Statement]) -> List[Statement]:
|
|
statements = list(statements)
|
|
|
|
if self.optimize:
|
|
funcs = [
|
|
self.__collapse_identical_labels,
|
|
self.__eliminate_useless_continues,
|
|
self.__eliminate_useless_breaks,
|
|
self.__eliminate_unused_labels,
|
|
self.__remove_useless_gotos,
|
|
self.__remove_goto_return,
|
|
self.__eliminate_useless_returns,
|
|
self.__convert_loops,
|
|
self.__convert_if_gotos,
|
|
self.__swap_empty_ifs,
|
|
self.__drop_unneeded_else,
|
|
self.__swap_ugly_ifexprs,
|
|
self.__rearrange_compound_ifs,
|
|
self.__convert_switches,
|
|
self.__convert_switch_gotos,
|
|
]
|
|
else:
|
|
# These are required for some sanity checks to pass.
|
|
funcs = [
|
|
self.__eliminate_unused_labels,
|
|
]
|
|
|
|
# TODO: The bytecode compiler that was used in Bishi seems to like using
|
|
# registers as temporary storage for variables in a lot of cases. We should
|
|
# be able to track register assignment and use and see if there is only one
|
|
# use/assignment of a register after all other optimization passes are done.
|
|
# If this is the case, we should be able to substitute the original assignment
|
|
# for the register when used, and get rid of the assignment altogether. This
|
|
# should slightly clean up a fair bit of code.
|
|
|
|
while True:
|
|
self.vprint("Running optimizer pass...")
|
|
any_changed = False
|
|
for func in funcs:
|
|
statements, changed = func(statements)
|
|
any_changed = any_changed or changed
|
|
|
|
if not any_changed:
|
|
self.vprint("Optimizer did not change anything.")
|
|
break
|
|
else:
|
|
self.vprint("Optimizer changed code, running another pass.")
|
|
|
|
return statements
|
|
|
|
def __verify_balanced_labels(self, statements: Sequence[Statement]) -> None:
|
|
gotos: Set[int] = set()
|
|
labels: Set[int] = set()
|
|
|
|
# Gather gotos and labels and make sure they're balanced.
|
|
def gather_gotos_and_labels(statement: Statement) -> Optional[Statement]:
|
|
nonlocal gotos
|
|
nonlocal labels
|
|
|
|
if isinstance(statement, GotoStatement):
|
|
gotos.add(statement.location)
|
|
elif isinstance(statement, DefineLabelStatement):
|
|
labels.add(statement.location)
|
|
return statement
|
|
|
|
self.__walk(statements, gather_gotos_and_labels)
|
|
|
|
unmatched_gotos = gotos - labels
|
|
unmatched_labels = labels - gotos
|
|
|
|
if unmatched_gotos:
|
|
formatted_labels = ", ".join(f"label_{x}" for x in unmatched_gotos)
|
|
raise Exception(f"Logic error, gotos found jumping to the following labels which don't exist: {formatted_labels}")
|
|
if unmatched_labels and self.optimize:
|
|
formatted_labels = ", ".join(f"label_{x}" for x in unmatched_labels)
|
|
raise Exception(f"Logic error, labels found with no gotos pointing at them: {formatted_labels}")
|
|
|
|
def __verify_no_empty_ifs(self, statements: Sequence[Statement]) -> None:
|
|
def check_ifs(statement: Statement) -> Optional[Statement]:
|
|
if isinstance(statement, IfStatement):
|
|
if (not statement.true_statements) and (not statement.false_statements):
|
|
raise Exception(f"If statement {statement} has no true or false statements inside it!")
|
|
return statement
|
|
|
|
self.__walk(statements, check_ifs)
|
|
|
|
def __sanity_check_code(self, statements: Sequence[Statement]) -> None:
|
|
self.__verify_balanced_labels(statements)
|
|
self.__verify_no_empty_ifs(statements)
|
|
|
|
def _pretty_print(self, statements: Sequence[Statement], prefix: str = "") -> str:
|
|
output: List[str] = []
|
|
|
|
for statement in statements:
|
|
output.extend(statement.render(prefix))
|
|
|
|
return os.linesep.join(output)
|
|
|
|
def __decompile(self) -> None:
|
|
# First, we need to construct a control flow graph.
|
|
self.vprint("Generating control flow graph...")
|
|
chunks, offset_map = self._graph_control_flow(self.bytecode)
|
|
if self.bytecode.start_offset is None:
|
|
raise Exception("Logic error, we should not be decompiling empty bytecode!")
|
|
start_id = offset_map[self.bytecode.start_offset]
|
|
|
|
# Now, compute dominators so we can locate back-refs.
|
|
self.vprint("Generating dominator list...")
|
|
dominators = self.__compute_dominators(start_id, chunks)
|
|
|
|
# Now, separate chunks out into chunks and loops.
|
|
self.vprint("Identifying and separating loops...")
|
|
chunks_and_loops = self.__separate_loops(start_id, chunks, dominators, offset_map)
|
|
|
|
# Now, break the graph anywhere where we have control
|
|
# flow that ends the execution (return, throw, goto end).
|
|
self.vprint("Breaking control flow graph on non-returnable statements...")
|
|
self.__break_graph(chunks_and_loops, offset_map)
|
|
|
|
# Now, identify any remaining control flow logic.
|
|
self.vprint("Identifying and separating ifs...")
|
|
chunks_loops_and_ifs = self.__separate_ifs(start_id, None, chunks_and_loops, offset_map)
|
|
|
|
# At this point, we *should* have a directed graph where there are no
|
|
# backwards refs and every fork has been identified as an if. This means
|
|
# we can now walk and recursively generate pseudocode in one pass.
|
|
self.vprint("Cleaning up and checking graph...")
|
|
chunks_loops_and_ifs = self.__check_graph(start_id, chunks_loops_and_ifs)
|
|
|
|
# Now, its safe to start actually evaluating the stack.
|
|
statements = self.__eval_chunks(start_id, chunks_loops_and_ifs, offset_map)
|
|
|
|
# Now, let's do some clean-up passes.
|
|
statements = self._optimize_code(statements)
|
|
|
|
# TODO: There's definitely a lot missing from this decompilation process.
|
|
# For one, function definitions do not include any mention of number of
|
|
# arguments. It appears that functions take arguments in the registers
|
|
# and when you call a function/method, the values that are popped from
|
|
# the stack for the function/method call are placed into registers for the
|
|
# function itself to access. However, there's some implicit parameters such
|
|
# as "_this" which is even checked for in some Bishi code. Ideally the code
|
|
# can be cross-referenced with function calls to determine the number of
|
|
# arguments and the decompilation can be improved in that regard, but we
|
|
# would need to nail down function call semantics better. The TRACE opcode
|
|
# is still active in Bishi and its output can be coaxed to appear in stdout
|
|
# so it would be possible to craft some bytecode and print out the register
|
|
# contents in a function call to nail this down, but it is left as a future
|
|
# enhancement.
|
|
|
|
# Let's sanity check the code for a few things that might trip us up.
|
|
self.__sanity_check_code(statements)
|
|
|
|
# Finally, let's save the code!
|
|
self.__statements = statements
|
|
|
|
def as_string(self, prefix: str = "", verbose: bool = False) -> str:
|
|
with self.debugging(verbose):
|
|
code = self._pretty_print(self.statements, prefix=prefix)
|
|
self.vprint(f"Final code:{os.linesep}{code}")
|
|
return code
|
|
|
|
def decompile(self, verbose: bool = False) -> None:
|
|
with self.debugging(verbose):
|
|
if self.bytecode.start_offset is None:
|
|
self.vprint("ByteCode is empty, decompiling to nothing!")
|
|
self.__statements = []
|
|
else:
|
|
self.__decompile()
|