1
0
mirror of https://github.com/mon/ifstools.git synced 2024-09-24 10:58:21 +02:00

Repacking and better compression tools

This commit is contained in:
Will Toohey 2017-12-18 19:37:16 +10:00
parent da489545c8
commit e0125caf09
9 changed files with 430 additions and 139 deletions

View File

@ -1,15 +1,19 @@
# ifstools
Extractor for Konmai IFS files.
Requires [kbinxml](https://github.com/mon/kbinxml/).
Features:
- Converts all textures to png without requiring a second program
- Repacks without ingame display issues
- Works on eacloud music ifs files
- Correctly names AFP files
- Converts version.xml, afplist.xml, texturelist.xml to plaintext, to facilitate further experimentation.
- Dumps the ifs manifest so you can explore the format
Todo:
- Repacking
Requires [kbinxml](https://github.com/mon/kbinxml/).
- DXT5 repacking support (current workaround: edit texturelist to use argb8888rev)
- Cache compressed textures (compression is very slow)
- Recursive repacking for ifs inside ifs
I hope the rest is self explanatory. Confused? Create a new issue and tell me what docs to add.

View File

@ -1,21 +1,77 @@
from os.path import getmtime
from kbinxml import KBinXML
import lxml.etree as etree
from . import escapes
class GenericFile(object):
def __init__(self, ifs, elem, name):
def __init__(self, ifs, path, name, time, start = -1, size = -1):
self.ifs = ifs
self.path = path
self.name = name
self.elem = elem
self.start, self.size, self.time = self._split_ints(elem.text)
self._packed_name = name
self.time = time
self.start = start
self.size = size
def _split_ints(self, text, delim = ' '):
@classmethod
def from_xml(cls, ifs, elem, name):
start, size, time = cls._split_ints(elem.text)
self = cls(ifs, None, name, time, start, size)
return self
@classmethod
def from_filesystem(cls, ifs, path, name):
time = int(getmtime(path))
start = size = -1
self = cls(ifs, path, name, time, start, size)
return self
@staticmethod
def _split_ints(text, delim = ' '):
return list(map(int, text.split(delim)))
def tostring(self, indent = 0):
return '{}{}\n'.format(' ' * indent, self.name)
def load(self, raw = False):
def load(self, convert_kbin = True):
if self.path:
return self._load_from_filesystem(convert_kbin)
else:
return self._load_from_ifs(convert_kbin)
def _load_from_ifs(self, convert_kbin = True):
data = self.ifs.load_file(self.start, self.size)
if not raw:
if self.name.endswith('.xml') and KBinXML.is_binary_xml(data):
data = KBinXML(data).to_text().encode('utf8')
if convert_kbin and self.name.endswith('.xml') and KBinXML.is_binary_xml(data):
data = KBinXML(data).to_text().encode('utf8')
return data
def _load_from_filesystem(self, convert_kbin = True):
with open(self.path, 'rb') as f:
ret = f.read()
self.size = len(ret)
return ret
def repack(self, manifest, data_blob, progress):
if progress:
print(self.name)
elem = etree.SubElement(manifest, self.packed_name)
elem.attrib['__type'] = '3s32'
data = self.load(convert_kbin = False)
if self.name.endswith('.xml') and not KBinXML.is_binary_xml(data):
data = KBinXML(data).to_binary()
# offset, size, timestamp
elem.text = '{} {} {}'.format(len(data_blob.getvalue()), len(data), self.time)
data_blob.write(data)
@property
def packed_name(self):
return self.sanitize_name(self._packed_name)
def sanitize_name(self, n):
for e in escapes[::-1]:
n = n.replace(e[1], e[0])
if n[0].isdigit():
n = '_' + n
return n

View File

@ -1,34 +1,68 @@
from itertools import chain
from os.path import getmtime, basename, join
import lxml.etree as etree
from . import get_folder_handlers
from . import get_folder_handlers, escapes
from .GenericFile import GenericFile
escapes = [
('_E', '.'),
('__', '_'),
]
class GenericFolder():
def __init__(self, ifs, element, name = ''):
self.ifs = ifs
self.info_elem = None
self.name = name
self.elem = element
self.time = element.text
self.files = {}
self.folders = {}
def __init__(self, ifs, name, time, files, folders):
self.ifs = ifs
self.name = name
# xml sanitisation performed by the public property
self._packed_name = name
self.time = time
self.files = files
self.folders = folders
@classmethod
def from_xml(cls, ifs, element, name = ''):
time = int(element.text) if element.text else None
files = {}
folders = {}
for child in element.iterchildren(tag=etree.Element):
name = self.fix_name(child.tag)
if name == '_info_': # metadata
self.info_elem = child
filename = cls.fix_name(child.tag)
if filename == '_info_': # metadata
info_elem = child
elif list(child): # folder
handler = get_folder_handlers().get(name, GenericFolder)
self.folders[name] = handler(self.ifs, child, name)
handler = get_folder_handlers().get(filename, GenericFolder)
folders[filename] = handler.from_xml(ifs, child, filename)
else: # file
self.files[name] = GenericFile(self.ifs, child, name)
files[filename] = GenericFile.from_xml(ifs, child, filename)
return cls(ifs, name, time, files, folders)
@classmethod
def from_filesystem(cls, ifs, tree, name = ''):
time = int(getmtime(tree['path']))
files = {}
folders = {}
for folder in tree['folders']:
base = basename(folder['path'])
handler = get_folder_handlers().get(base, GenericFolder)
folders[base] = handler.from_filesystem(ifs, folder, base)
for filename in tree['files']:
path = join(tree['path'], filename)
files[filename] = GenericFile.from_filesystem(ifs, path, filename)
return cls(ifs, name, time, files, folders)
def repack(self, manifest, data_blob, progress):
if self.name:
manifest = etree.SubElement(manifest, self.packed_name)
manifest.attrib['__type'] = 's32'
manifest.text = str(self.time)
if progress:
print(self.name)
for name, entry in chain(self.folders.items(), self.files.items()):
entry.repack(manifest, data_blob, progress)
def tostring(self, indent = 0):
ret = ''
@ -39,7 +73,19 @@ class GenericFolder():
ret += entry.tostring(indent)
return ret
def fix_name(self, n):
@property
def packed_name(self):
return self.sanitize_name(self._packed_name)
def sanitize_name(self, n):
for e in escapes[::-1]:
n = n.replace(e[1], e[0])
if n[0].isdigit():
n = '_' + n
return n
@staticmethod
def fix_name(n):
for e in escapes:
n = n.replace(*e)
if n[0] == '_' and n[1].isdigit():

View File

@ -2,6 +2,7 @@ from io import BytesIO
from struct import unpack, pack
from PIL import Image
import lxml.etree as etree
from kbinxml import KBinXML
from . import GenericFile
@ -20,8 +21,10 @@ dxt5_end = b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + \
class ImageFile(GenericFile):
def __init__(self, gen_file, image_elem, fmt, compress):
super().__init__(gen_file.ifs, gen_file.elem, gen_file.name + '.png')
super().__init__(gen_file.ifs, gen_file.path,
gen_file.name + '.png', gen_file.time,
gen_file.start, gen_file.size)
self._packed_name = gen_file._packed_name
self.image_elem = image_elem
self.format = fmt
self.compress = compress
@ -33,8 +36,15 @@ class ImageFile(GenericFile):
(self.imgrect[3]-self.imgrect[2])//2
)
def load(self):
data = super().load()
@classmethod
def from_xml(cls, ifs, elem, name):
raise Exception('ImageFile must be instantiated from existing element')
@classmethod
def from_filesystem(cls, ifs, tree, name):
raise Exception('ImageFile must be instantiated from existing element')
def _load_from_ifs(self, convert_kbin = False):
data = super()._load_from_ifs()
if self.compress == 'avslz':
uncompressed_size = unpack('>I', data[:4])[0]
@ -72,3 +82,29 @@ class ImageFile(GenericFile):
b = BytesIO()
im.save(b, format = 'PNG')
return b.getvalue()
def repack(self, manifest, data_blob, progress):
if progress:
print(self.name)
data = self.load()
im = Image.open(BytesIO(data))
if self.format == 'argb8888rev':
data = im.tobytes('raw', 'BGRA')
else:
raise NotImplementedError('Unknown format {}'.format(self.format))
if self.compress == 'avslz':
o = data
uncompressed_size = len(data)
data = lz77.compress(data)
compressed_size = len(data)
data = pack('>I', uncompressed_size) + pack('>I', compressed_size) + data
# offset, size, timestamp
elem = etree.SubElement(manifest, self.packed_name)
elem.attrib['__type'] = '3s32'
elem.text = '{} {} {}'.format(len(data_blob.getvalue()), len(data), self.time)
data_blob.write(data)

View File

@ -5,32 +5,53 @@ from kbinxml import KBinXML
from . import GenericFolder
class MD5Folder(GenericFolder):
def __init__(self, ifs, element, name, md5_tag = None):
super().__init__(ifs, element, name)
def __init__(self, ifs, name, time, files, folders):
super().__init__(ifs, name, time, files, folders)
for filename, file in self.files.items():
if filename.endswith('.xml'):
self.info_kbin = file
break
if not self.info_kbin:
raise KeyError('MD5 folder expected but no mapping xml')
raise KeyError('MD5 folder contents have no mapping xml')
self.info_kbin = KBinXML(self.info_kbin.load(True))
self.info_kbin = KBinXML(self.info_kbin.load(convert_kbin = False))
@classmethod
def from_xml(cls, ifs, element, name = '', md5_tag = None, extension = None):
self = super().from_xml(ifs, element, name)
self._apply_md5(md5_tag, extension)
return self
@classmethod
def from_filesystem(cls, ifs, tree, name = '', md5_tag = None, extension = None):
self = super().from_filesystem(ifs, tree, name)
self._apply_md5(md5_tag, extension)
return self
def _apply_md5(self, md5_tag, extension):
if not md5_tag:
md5_tag = name
# findall needs xpath or it'll only search children
md5_tag = self.name
# findall needs xpath or it'll only search direct children
for tag in self.info_kbin.xml_doc.findall('.//' + md5_tag):
filename = tag.attrib['name']
hash = md5(filename.encode(self.info_kbin.encoding)).hexdigest()
# handles subfolders like afp/bsi/
self.rename_recurse(self, hash, filename)
self.rename_recurse(self, hash, filename, extension)
def rename_recurse(self, entry, original, replacement):
def rename_recurse(self, entry, original, replacement, extension):
# handles renamed files (eg tex->png)
if extension and (replacement + extension in entry.files):
entry.files[replacement] = entry.files.pop(replacement + extension)
entry.files[replacement].name = replacement
# handles deobfuscated filesystems
if replacement in entry.files:
entry.files[replacement]._packed_name = original
if original in entry.files:
orig = entry.files.pop(original)
orig.name = replacement
entry.files[replacement] = orig
for name, folder in entry.folders.items():
self.rename_recurse(folder, original, replacement)
self.rename_recurse(folder, original, replacement, extension)

View File

@ -1,11 +1,23 @@
from . import MD5Folder, GenericFile, ImageFile
class TexFolder(MD5Folder):
def __init__(self, ifs, element, name):
super().__init__(ifs, element, name, 'image')
def __init__(self, ifs, name, time, files, folders):
super().__init__(ifs, name, time, files, folders)
self.compress = self.info_kbin.xml_doc.attrib.get('compress')
@classmethod
def from_xml(cls, ifs, element, name = ''):
self = super().from_xml(ifs, element, name, 'image', '.png')
self._create_images()
return self
@classmethod
def from_filesystem(cls, ifs, tree, name = ''):
self = super().from_filesystem(ifs, tree, name, 'image', '.png')
self._create_images()
return self
def _create_images(self):
for tex in self.info_kbin.xml_doc.iterchildren():
folder = tex.attrib['name']
fmt = tex.attrib['format']

View File

@ -5,6 +5,11 @@ def get_folder_handlers():
'tex' : TexFolder
}
escapes = [
('_E', '.'),
('__', '_'),
]
from .GenericFile import GenericFile
from .ImageFile import ImageFile

View File

@ -1,5 +1,7 @@
# consistency with py 2/3
from builtins import bytes
from struct import unpack, pack
from io import BytesIO
WINDOW_SIZE = 0x1000
WINDOW_MASK = WINDOW_SIZE - 1
@ -7,113 +9,103 @@ THRESHOLD = 3
INPLACE_THRESHOLD = 0xA
LOOK_RANGE = 0x200
MAX_LEN = 0xF + THRESHOLD
MAX_BUFFER = 0x10 + 1
def decompress(input):
input = bytes(input)
input = BytesIO(input)
decompressed = bytearray()
cur_byte = 0
input_length = len(input)
window = [0] * WINDOW_SIZE
window_cursor = 0
while cur_byte < input_length:
flag = input[cur_byte]
cur_byte += 1
while True:
flag = input.read(1)[0]
for i in range(8):
if (flag >> i) & 1 == 1:
decompressed.append(input[cur_byte])
window[window_cursor] = input[cur_byte]
window_cursor = (window_cursor + 1) & WINDOW_MASK
cur_byte += 1
decompressed.append(input.read(1)[0])
else:
w = input[cur_byte] << 8 | input[cur_byte + 1]
cur_byte += 2
if (w >> 4) == 0:
w = unpack('>H', input.read(2))[0]
position = (w >> 4)
length = (w & 0x0F) + THRESHOLD
if position == 0:
return bytes(decompressed)
position = ((window_cursor - (w >> 4)) & WINDOW_MASK)
length = (w & 0x0F) + THRESHOLD
if position > len(decompressed):
diff = position - len(decompressed)
diff = min(diff, length)
decompressed.extend([0]*diff)
length -= diff
# optimise
if -position+length < 0:
decompressed.extend(decompressed[-position:-position+length])
else:
for loop in range(length):
decompressed.append(decompressed[-position])
for loop in range(length):
b = window[position & WINDOW_MASK]
decompressed.append(b)
window[window_cursor] = b
window_cursor = (window_cursor + 1) & WINDOW_MASK
position = position + 1
return bytes(decompressed)
def match_window(in_data, offset):
'''Find the longest match for the string starting at offset in the preceeding data
'''
window_start = max(offset - WINDOW_MASK, 0)
for n in range(MAX_LEN, THRESHOLD-1, -1):
window_end = min(offset + n, len(in_data))
# we've not got enough data left for a meaningful result
if window_end - offset < THRESHOLD:
return None
str_to_find = in_data[offset:window_end]
idx = in_data.rfind(str_to_find, window_start, window_end-n)
if idx != -1:
code_offset = offset - idx # - 1
code_len = len(str_to_find)
return (code_offset, code_len)
def match_current(window, pos, max_len, data, dpos):
length = 0
data_len = len(data)
while dpos + length < data_len and length < max_len and \
window[(pos + length) & WINDOW_MASK] == data[dpos + length] and length < MAX_LEN:
length += 1
return length
def match_window(window, pos, data, dpos):
max_pos = 0;
max_len = 0;
for i in range(THRESHOLD, LOOK_RANGE):
length = match_current(window, (pos - i) & WINDOW_MASK, i, data, dpos)
if length >= INPLACE_THRESHOLD:
return (i, length)
if length >= THRESHOLD:
max_pos = i
max_len = length
if max_len >= THRESHOLD:
return (max_pos, max_len)
else:
return None
return None
def compress(input):
compressed = bytearray()
input = bytes(input)
input = bytes([0]*WINDOW_SIZE) + bytes(input)
input_size = len(input)
window = [0] * WINDOW_SIZE
current_pos = 0
current_window = 0
current_pos = WINDOW_SIZE
bit = 0
buf = [0] * 0x11
while current_pos < input_size:
flag_byte = 0;
current_buffer = 0;
buf = bytearray()
for _ in range(8):
if current_pos >= input_size:
buf[current_buffer] = 0;
window[current_window] = 0;
current_buffer += 1;
current_pos += 1;
current_window += 1;
bit = 0;
else:
match = match_window(window, current_window, input, current_pos)
match = match_window(input, current_pos)
if match:
pos, length = match
byte1 = (pos >> 4)
byte2 = (((pos & 0x0F) << 4) | ((length - THRESHOLD) & 0x0F))
buf[current_buffer] = byte1
buf[current_buffer + 1] = byte2
current_buffer += 2
info = (pos << 4) | ((length - THRESHOLD) & 0x0F)
buf.extend(pack('>H', info))
bit = 0
for _ in range(length):
window[current_window & WINDOW_MASK] = input[current_pos]
current_pos += 1
current_window += 1
current_pos += length
else:
buf[current_buffer] = input[current_pos]
window[current_window] = input[current_pos]
buf.append(input[current_pos])
current_pos += 1
current_window += 1
current_buffer += 1
bit = 1
flag_byte = (flag_byte >> 1) | ((bit & 1) << 7)
current_window = current_window & WINDOW_MASK
compressed.append(flag_byte)
for i in range(current_buffer):
compressed.append(buf[i])
compressed.extend(buf)
compressed.append(0)
compressed.append(0)
compressed.append(0)
return bytes(compressed)
def compress_dummy(input):
input_length = len(input)
compressed = bytearray()
extra_bytes = input_length % 8
for i in range(0, input_length-extra_bytes, 8):
compressed.append(0xFF)
compressed.extend(input[i:i+8])
if extra_bytes > 0:
compressed.append(0xFF >> (8 - extra_bytes))
compressed.extend(input[-extra_bytes:])
compressed.append(0)
compressed.append(0)
compressed.append(0)
return bytes(compressed)

View File

@ -1,39 +1,90 @@
from os.path import basename, dirname, splitext, join
from os import mkdir, utime
from os.path import basename, dirname, splitext, join, isdir, isfile, getmtime
from os import mkdir, utime, walk
from io import BytesIO
import hashlib
import lxml.etree as etree
from struct import unpack
from time import time as unixtime
from kbinxml.kbinxml import KBinXML
from kbinxml.bytebuffer import ByteBuffer
from handlers import GenericFolder
SIGNATURE = 0x6CAD8F89
KBIN_OFFSET = 36
FILE_VERSION = 3
class IFS:
def __init__(self, path):
if isfile(path):
self._load_ifs(path)
self.is_file = True
elif isdir(path):
self._load_dir(path)
self.is_file = False
else:
raise IOError('Input path does not exist')
def _load_ifs(self, path):
out = splitext(basename(path))[0] + '_ifs'
self.default_out = join(dirname(path), out)
self.ifs_out = path
with open(path, 'rb') as f:
self.file = f.read()
b = ByteBuffer(self.file)
self.signature = b.get_u32()
self.ifs_size = b.get_u32()
self.unk1 = b.get_u32()
self.unk2 = b.get_u32()
signature = b.get_u32()
if signature != SIGNATURE:
raise IOError('Given file was not an IFS file!')
self.file_version = b.get_u16()
# next u16 is just NOT(version)
assert b.get_u16() ^ self.file_version == 0xFFFF
self.time = b.get_u32()
self.tree_size = b.get_u32()
self.header_end = b.get_u32()
# 16 bytes more, unsure
# 16 bytes for manifest md5, unchecked
self.manifest = KBinXML(self.file[KBIN_OFFSET:])
#with open('debug_manifest.xml', 'wb') as f:
# f.write(self.manifest.to_text().encode('utf8'))
self._parse_manifest()
assert self.tree_size == self._tree_size()
def _load_dir(self, path):
self.default_out = path
self.ifs_out = basename(path).replace('_ifs', '.ifs')
self.file_version = FILE_VERSION
self.time = int(getmtime(path))
self.tree_size = -1
self.header_end = -1
self.manifest = None
os_tree = self._create_dir_tree(path)
self.tree = GenericFolder.from_filesystem(self, os_tree)
def _create_dir_tree(self, path):
tree = self._create_dir_tree_recurse(walk(path))
if 'ifs_manifest.xml' in tree['files']:
tree['files'].remove('ifs_manifest.xml')
return tree
def _create_dir_tree_recurse(self, walker):
tree = {}
root, dirs, files = next(walker)
tree['path'] = root
tree['files'] = files
tree['folders'] = []
for dir in dirs:
tree['folders'].append(self._create_dir_tree_recurse(walker))
return tree
def _parse_manifest(self):
self.tree = GenericFolder(self, self.manifest.xml_doc)
self.tree = GenericFolder.from_xml(self, self.manifest.xml_doc)
def tostring(self):
return self.tree.tostring()
@ -41,10 +92,67 @@ class IFS:
def extract_all(self, progress = True, recurse = True, path = None):
self.out = path if path else self.default_out
self._mkdir(self.out)
with open(join(self.out, 'ifs_manifest.xml'), 'wb') as f:
f.write(self.manifest.to_text().encode('utf8'))
if self.manifest:
with open(join(self.out, 'ifs_manifest.xml'), 'wb') as f:
f.write(self.manifest.to_text().encode('utf8'))
self._extract_tree(self.tree, progress, recurse)
def repack(self, progress = True, path = None):
if path is None:
path = self.ifs_out
data_blob = BytesIO()
self.manifest = KBinXML(etree.Element('imgfs'))
manifest_info = etree.SubElement(self.manifest.xml_doc, '_info_')
# the important bit
self.tree.repack(self.manifest.xml_doc, data_blob, progress)
data = data_blob.getvalue()
data_md5 = etree.SubElement(manifest_info, 'md5')
data_md5.attrib['__type'] = 'bin'
data_md5.attrib['__size'] = '16'
data_md5.text = hashlib.md5(data).hexdigest()
data_size = etree.SubElement(manifest_info, 'size')
data_size.attrib['__type'] = 'u32'
data_size.text = str(len(data))
manifest_bin = self.manifest.to_binary()
self.header_end = 36 + len(manifest_bin)
self.ifs_size = self.header_end + len(data)
self.tree_size = self._tree_size()
manifest_hash = hashlib.md5(manifest_bin).digest()
head = ByteBuffer()
head.append_u32(SIGNATURE)
head.append_u16(self.file_version)
head.append_u16(self.file_version ^ 0xFFFF)
head.append_u32(int(unixtime()))
head.append_u32(self.tree_size)
head.append_u32(self.header_end)
with open(path, 'wb') as ifs_file:
ifs_file.write(head.data)
ifs_file.write(manifest_hash)
ifs_file.write(manifest_bin)
ifs_file.write(data)
# suspected to be the in-memory representation
def _tree_size(self):
BASE_SIZE = 856
return BASE_SIZE + self._tree_size_recurse(self.tree)
def _tree_size_recurse(self, tree, depth = 0):
FILE = 64
FOLDER = 56
DEPTH_MULTIPLIER = 16
size = len(tree.files) * FILE
size += len(tree.folders) * (FOLDER - depth*DEPTH_MULTIPLIER)
for name, folder in tree.folders.items():
size += self._tree_size_recurse(folder, depth+1)
return size
def _extract_tree(self, tree, progress = True, recurse = True, dir = ''):
outdir = join(self.out, dir)
if progress:
@ -64,6 +172,10 @@ class IFS:
for name, f in tree.folders.items():
self._extract_tree(f, progress, recurse, join(dir, f.name))
# fallback to file timestamp
timestamp = tree.time if tree.time else self.time
utime(outdir, (timestamp, timestamp))
def _mkdir(self, dir):
try:
@ -72,7 +184,11 @@ class IFS:
pass
def load_file(self, start, size):
return self.file[self.header_end+start:self.header_end+start+size]
start = self.header_end+start
end = start + size
assert start <= len(self.file) and end <= len(self.file)
return self.file[start:end]
def _save_with_time(self, filename, data, time):
with open(filename, 'wb') as f:
@ -82,7 +198,10 @@ class IFS:
if __name__ == '__main__':
import sys
if len(sys.argv) < 2:
print('ifstools filename.ifs')
print('ifstools filename.ifs OR folder_ifs')
exit()
i = IFS(sys.argv[1])
i.extract_all()
if i.is_file:
i.extract_all()
else:
i.repack()