573in1/tools/common/decompile.py

236 lines
6.2 KiB
Python
Raw Normal View History

2024-11-29 00:49:07 +01:00
# -*- coding: utf-8 -*-
# 573in1 - Copyright (C) 2022-2024 spicyjpeg
#
# 573in1 is free software: you can redistribute it and/or modify it under the
# terms of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later
# version.
#
# 573in1 is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with
# 573in1. If not, see <https://www.gnu.org/licenses/>.
from collections.abc import ByteString, Generator
from struct import Struct
from typing import Any, BinaryIO, TextIO
2024-11-29 00:49:07 +01:00
from .mips import \
ImmInstruction, Instruction, Opcode, Register, encodeJAL, encodeJR, \
parseInstruction
2024-11-29 00:49:07 +01:00
## Executable analyzer
_EXE_HEADER_STRUCT: Struct = Struct("< 8s 8x 4I 16x 2I 20x 1972s")
_EXE_HEADER_MAGIC: bytes = b"PS-X EXE"
class AnalysisError(Exception):
pass
2024-11-29 00:49:07 +01:00
class PSEXEAnalyzer:
def __init__(self, file: BinaryIO):
(
magic,
entryPoint,
initialGP,
startAddress,
length,
stackOffset,
stackLength,
region
2024-11-29 00:49:07 +01:00
) = \
_EXE_HEADER_STRUCT.unpack(file.read(_EXE_HEADER_STRUCT.size))
2024-11-29 00:49:07 +01:00
if magic != _EXE_HEADER_MAGIC:
raise AnalysisError("file is not a valid PS1 executable")
2024-11-29 00:49:07 +01:00
self.entryPoint: int = entryPoint
self.startAddress: int = startAddress
self.endAddress: int = startAddress + length
self.body: bytes = file.read(length)
#file.close()
def __getitem__(self, key: int | slice) -> Any:
if isinstance(key, slice):
return self.body[self._makeSlice(key.start, key.stop, key.step)]
else:
return self.body[key - self.startAddress]
def _makeSlice(
self,
start: int | None = None,
stop: int | None = None,
step: int = 1
2024-11-29 00:49:07 +01:00
) -> slice:
_start: int = \
0 if (start is None) else (start - self.startAddress)
_stop: int = \
len(self.body) if (stop is None) else (stop - self.startAddress)
# Allow for searching/disassembling backwards by swapping the start and
# stop parameters.
if _start > _stop:
#_start -= step
#_stop -= step
step = -step
return slice(_start, _stop, step)
def disassembleAt(self, address: int) -> Instruction | None:
offset: int = address - self.startAddress
try:
return parseInstruction(address, self.body[offset:offset + 4])
except:
return None
def disassemble(
self,
start: int | None = None,
stop: int | None = None
2024-11-29 00:49:07 +01:00
) -> Generator[Instruction | None, None, None]:
area: slice = self._makeSlice(start, stop, 4)
offset: int = area.start
if (area.start % 4) or (area.stop % 4):
raise ValueError("unaligned start and/or end addresses")
while offset != area.stop:
address: int = self.startAddress + offset
try:
yield parseInstruction(address, self.body[offset:offset + 4])
except:
yield None
offset += area.step
def dumpDisassembly(
self,
output: TextIO,
start: int | None = None,
stop: int | None = None
2024-11-29 00:49:07 +01:00
):
for inst in self.disassemble(start, stop):
if inst is not None:
output.write(f"{inst.address:08x}: {inst.toString()}\n")
def findBytes(
self,
data: ByteString,
start: int | None = None,
stop: int | None = None,
alignment: int = 4
2024-11-29 00:49:07 +01:00
) -> Generator[int, None, None]:
area: slice = self._makeSlice(start, stop)
offset: int = area.start
if area.step > 0:
step: int = len(data)
index: function = \
lambda offset: self.body.index(data, offset, area.stop)
else:
step: int = -len(data)
index: function = \
lambda offset: self.body.rindex(data, area.stop, offset)
while True:
try:
offset = index(offset)
except ValueError:
return
if not (offset % alignment):
yield self.startAddress + offset
offset += step
def findSingleMatch(
self,
data: ByteString,
start: int | None = None,
stop: int | None = None,
alignment: int = 4
) -> int:
matches: Generator[int, None, None] = \
self.findBytes(data, start, stop, alignment)
try:
firstMatch: int = next(matches)
except StopIteration:
raise AnalysisError("no match found")
try:
next(matches)
raise AnalysisError("more than one match found")
except StopIteration:
return firstMatch
2024-11-29 00:49:07 +01:00
def findFunctionReturns(
self,
start: int | None = None,
stop: int | None = None
2024-11-29 00:49:07 +01:00
) -> Generator[int, None, None]:
inst: bytes = encodeJR(Register.RA)
# Yield pointers to the end of the return "statement", skipping the
# instruction itself as well as its delay slot. In most cases these will
# be pointers to the end of a function and thus the beginning of another
# one.
2024-11-29 00:49:07 +01:00
for offset in self.findBytes(inst, start, stop, 4):
yield offset + 8
def findCalls(
self,
target: int,
start: int | None = None,
stop: int | None = None
) -> Generator[int, None, None]:
inst: bytes = encodeJAL(target)
yield from self.findBytes(inst, start, stop, 4)
def findValueLoads(
self,
value: int,
start: int | None = None,
stop: int | None = None,
maxDistance: int = 1
) -> Generator[ImmInstruction, None, None]:
# 32-bit loads are typically encoded as a LUI followed by either ORI or
# ADDIU. Due to ADDIU only supporting signed immediates, the LUI's
# immediate may not actually match the upper 16 bits of the value if the
# ADDIU is supposed to subtract from it.
2024-11-29 00:49:07 +01:00
for inst in self.disassemble(start, stop):
if inst is None:
continue
for offset in range(4, (maxDistance + 1) * 4, 4):
nextInst: Instruction | None = \
self.disassembleAt(inst.address + offset)
match inst, nextInst:
case (
ImmInstruction(
opcode = Opcode.LUI, rt = rt, value = msb
),
ImmInstruction(
opcode = Opcode.ORI, rs = rs, value = lsb
)
) if (rt == rs) and (((msb << 16) | lsb) == value):
yield nextInst
case (
ImmInstruction(
opcode = Opcode.LUI, rt = rt, value = msb
),
ImmInstruction(
opcode = Opcode.ADDIU, rs = rs, value = lsb
)
) if (rt == rs) and (((msb << 16) + lsb) == value):
yield nextInst