From cef18f3fd77dae3d38502cd16c6ec8048b5d8247 Mon Sep 17 00:00:00 2001 From: Will Toohey Date: Sat, 16 Dec 2017 03:43:57 +1000 Subject: [PATCH] Initial commit --- .gitignore | 1 + README.md | 15 +++++ handlers/GenericFile.py | 21 +++++++ handlers/GenericFolder.py | 47 +++++++++++++++ handlers/ImageFile.py | 74 ++++++++++++++++++++++++ handlers/MD5Folder.py | 36 ++++++++++++ handlers/TexFolder.py | 19 ++++++ handlers/__init__.py | 13 +++++ handlers/lz77.py | 119 ++++++++++++++++++++++++++++++++++++++ ifstools.py | 88 ++++++++++++++++++++++++++++ 10 files changed, 433 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 handlers/GenericFile.py create mode 100644 handlers/GenericFolder.py create mode 100644 handlers/ImageFile.py create mode 100644 handlers/MD5Folder.py create mode 100644 handlers/TexFolder.py create mode 100644 handlers/__init__.py create mode 100644 handlers/lz77.py create mode 100644 ifstools.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c18dd8d --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +__pycache__/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..9cb6c22 --- /dev/null +++ b/README.md @@ -0,0 +1,15 @@ +# ifstools +Extractor for Konmai IFS files. + +Features: +- Converts all textures to png without requiring a second program +- Works on eacloud music ifs files +- Correctly names AFP files +- Converts version.xml, afplist.xml, texturelist.xml to plaintext, to facilitate further experimentation. + +Todo: +- Repacking + +Requires [kbinxml](https://github.com/mon/kbinxml/). + +I hope the rest is self explanatory. Confused? Create a new issue and tell me what docs to add. diff --git a/handlers/GenericFile.py b/handlers/GenericFile.py new file mode 100644 index 0000000..b753bf1 --- /dev/null +++ b/handlers/GenericFile.py @@ -0,0 +1,21 @@ +from kbinxml import KBinXML + +class GenericFile(object): + def __init__(self, ifs, elem, name): + self.ifs = ifs + self.name = name + self.elem = elem + self.start, self.size, self.time = self._split_ints(elem.text) + + def _split_ints(self, text, delim = ' '): + return list(map(int, text.split(delim))) + + def tostring(self, indent = 0): + return '{}{}\n'.format(' ' * indent, self.name) + + def load(self, raw = False): + data = self.ifs.load_file(self.start, self.size) + if not raw: + if self.name.endswith('.xml') and KBinXML.is_binary_xml(data): + data = KBinXML(data).to_text().encode('utf8') + return data diff --git a/handlers/GenericFolder.py b/handlers/GenericFolder.py new file mode 100644 index 0000000..9cb6c40 --- /dev/null +++ b/handlers/GenericFolder.py @@ -0,0 +1,47 @@ +from itertools import chain + +import lxml.etree as etree + +from . import get_folder_handlers +from .GenericFile import GenericFile + +escapes = [ + ('_E', '.'), + ('__', '_'), +] + +class GenericFolder(): + def __init__(self, ifs, element, name = ''): + self.ifs = ifs + self.info_elem = None + self.name = name + self.elem = element + self.time = element.text + + self.files = {} + self.folders = {} + for child in element.iterchildren(tag=etree.Element): + name = self.fix_name(child.tag) + if name == '_info_': # metadata + self.info_elem = child + elif list(child): # folder + handler = get_folder_handlers().get(name, GenericFolder) + self.folders[name] = handler(self.ifs, child, name) + else: # file + self.files[name] = GenericFile(self.ifs, child, name) + + def tostring(self, indent = 0): + ret = '' + if self.name: + ret += '{}{}/\n'.format(' ' * indent, self.name) + indent += 1 + for name, entry in chain(self.folders.items(), self.files.items()): + ret += entry.tostring(indent) + return ret + + def fix_name(self, n): + for e in escapes: + n = n.replace(*e) + if n[0] == '_' and n[1].isdigit(): + n = n[1:] + return n diff --git a/handlers/ImageFile.py b/handlers/ImageFile.py new file mode 100644 index 0000000..f9a7f5a --- /dev/null +++ b/handlers/ImageFile.py @@ -0,0 +1,74 @@ +from io import BytesIO +from struct import unpack, pack + +from PIL import Image +from kbinxml import KBinXML + +from . import GenericFile +from . import lz77 + +# header for a standard DDS with DXT5 compression and RGBA pixels +# gap placed for image height/width insertion +dxt5_start = b'DDS |\x00\x00\x00\x07\x10\x00\x00' +dxt5_end = b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + \ + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + \ + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + \ + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00 \x00\x00\x00\x04' + \ + b'\x00\x00\x00DXT5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + \ + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00' + \ + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + +class ImageFile(GenericFile): + def __init__(self, gen_file, image_elem, fmt, compress): + super().__init__(gen_file.ifs, gen_file.elem, gen_file.name + '.png') + + self.image_elem = image_elem + self.format = fmt + self.compress = compress + + self.uvrect = self._split_ints(image_elem.find('uvrect').text) + self.imgrect = self._split_ints(image_elem.find('imgrect').text) + self.img_size = ( + (self.imgrect[1]-self.imgrect[0])//2, + (self.imgrect[3]-self.imgrect[2])//2 + ) + + def load(self): + data = super().load() + + if self.compress == 'avslz': + uncompressed_size = unpack('>I', data[:4])[0] + compressed_size = unpack('>I', data[4:8])[0] + # sometimes the headers are missing: not actually compressed + # The 2 extra u32 are moved to the end of the file + # Quality file format. + if len(data) == compressed_size + 8: + data = data[8:] + data = lz77.decompress(data) + assert len(data) == uncompressed_size + else: + data = data[8:] + data[:8] + + if self.format == 'argb8888rev': + need = self.img_size[0] * self.img_size[1] * 4 + if len(data) < need: + print('WARNING: Not enough image data for {}, padding'.format(self.name)) + data += b'\x00' * (need-len(data)) + im = Image.frombytes('RGBA', self.img_size, data, 'raw', 'BGRA') + elif self.format == 'dxt5': + b = BytesIO() + b.write(dxt5_start) + b.write(pack('<2I', self.img_size[1], self.img_size[0])) + b.write(dxt5_end) + # the data has swapped endianness for every WORD + l = len(data)//2 + big = unpack('>{}H'.format(l), data) + little = pack('<{}H'.format(l), *big) + b.write(little) + im = Image.open(b) + else: + raise NotImplementedError('Unknown format {}'.format(self.format)) + + b = BytesIO() + im.save(b, format = 'PNG') + return b.getvalue() diff --git a/handlers/MD5Folder.py b/handlers/MD5Folder.py new file mode 100644 index 0000000..cef3238 --- /dev/null +++ b/handlers/MD5Folder.py @@ -0,0 +1,36 @@ +from hashlib import md5 + +from kbinxml import KBinXML + +from . import GenericFolder + +class MD5Folder(GenericFolder): + def __init__(self, ifs, element, name, md5_tag = None): + super().__init__(ifs, element, name) + + for filename, file in self.files.items(): + if filename.endswith('.xml'): + self.info_kbin = file + break + if not self.info_kbin: + raise KeyError('MD5 folder expected but no mapping xml') + + self.info_kbin = KBinXML(self.info_kbin.load(True)) + + if not md5_tag: + md5_tag = name + # findall needs xpath or it'll only search children + for tag in self.info_kbin.xml_doc.findall('.//' + md5_tag): + filename = tag.attrib['name'] + hash = md5(filename.encode(self.info_kbin.encoding)).hexdigest() + # handles subfolders like afp/bsi/ + self.rename_recurse(self, hash, filename) + + def rename_recurse(self, entry, original, replacement): + if original in entry.files: + orig = entry.files.pop(original) + orig.name = replacement + entry.files[replacement] = orig + + for name, folder in entry.folders.items(): + self.rename_recurse(folder, original, replacement) diff --git a/handlers/TexFolder.py b/handlers/TexFolder.py new file mode 100644 index 0000000..3d5c39e --- /dev/null +++ b/handlers/TexFolder.py @@ -0,0 +1,19 @@ +from . import MD5Folder, GenericFile, ImageFile + +class TexFolder(MD5Folder): + def __init__(self, ifs, element, name): + super().__init__(ifs, element, name, 'image') + + self.compress = self.info_kbin.xml_doc.attrib.get('compress') + + for tex in self.info_kbin.xml_doc.iterchildren(): + folder = tex.attrib['name'] + fmt = tex.attrib['format'] + for indiv in tex.iterchildren(): + if indiv.tag == 'size': + continue + elif indiv.tag == 'image': + name = indiv.attrib['name'] + self.files[name] = ImageFile(self.files[name], indiv, fmt, self.compress) + else: + print('Unknown texturelist.xml element {}'.format(indiv.tag)) diff --git a/handlers/__init__.py b/handlers/__init__.py new file mode 100644 index 0000000..ec6f8f3 --- /dev/null +++ b/handlers/__init__.py @@ -0,0 +1,13 @@ +# because we import this circularly, it needs to be a getter +def get_folder_handlers(): + return { + 'afp' : MD5Folder, + 'tex' : TexFolder + } + +from .GenericFile import GenericFile +from .ImageFile import ImageFile + +from .GenericFolder import GenericFolder +from .MD5Folder import MD5Folder +from .TexFolder import TexFolder diff --git a/handlers/lz77.py b/handlers/lz77.py new file mode 100644 index 0000000..3a57064 --- /dev/null +++ b/handlers/lz77.py @@ -0,0 +1,119 @@ +# consistency with py 2/3 +from builtins import bytes + +WINDOW_SIZE = 0x1000 +WINDOW_MASK = WINDOW_SIZE - 1 +THRESHOLD = 3 +INPLACE_THRESHOLD = 0xA +LOOK_RANGE = 0x200 +MAX_LEN = 0xF + THRESHOLD +MAX_BUFFER = 0x10 + 1 + +def decompress(input): + input = bytes(input) + decompressed = bytearray() + cur_byte = 0 + input_length = len(input) + window = [0] * WINDOW_SIZE + window_cursor = 0 + + while cur_byte < input_length: + flag = input[cur_byte] + cur_byte += 1 + for i in range(8): + if (flag >> i) & 1 == 1: + decompressed.append(input[cur_byte]) + window[window_cursor] = input[cur_byte] + window_cursor = (window_cursor + 1) & WINDOW_MASK + cur_byte += 1 + else: + w = input[cur_byte] << 8 | input[cur_byte + 1] + cur_byte += 2 + if (w >> 4) == 0: + return bytes(decompressed) + + position = ((window_cursor - (w >> 4)) & WINDOW_MASK) + length = (w & 0x0F) + THRESHOLD + + for loop in range(length): + b = window[position & WINDOW_MASK] + decompressed.append(b) + window[window_cursor] = b + window_cursor = (window_cursor + 1) & WINDOW_MASK + position = position + 1 + return bytes(decompressed) + + +def match_current(window, pos, max_len, data, dpos): + length = 0 + data_len = len(data) + while dpos + length < data_len and length < max_len and \ + window[(pos + length) & WINDOW_MASK] == data[dpos + length] and length < MAX_LEN: + length += 1 + return length + +def match_window(window, pos, data, dpos): + max_pos = 0; + max_len = 0; + for i in range(THRESHOLD, LOOK_RANGE): + length = match_current(window, (pos - i) & WINDOW_MASK, i, data, dpos) + if length >= INPLACE_THRESHOLD: + return (i, length) + if length >= THRESHOLD: + max_pos = i + max_len = length + if max_len >= THRESHOLD: + return (max_pos, max_len) + else: + return None + +def compress(input): + compressed = bytearray() + input = bytes(input) + input_size = len(input) + window = [0] * WINDOW_SIZE + current_pos = 0 + current_window = 0 + bit = 0 + buf = [0] * 0x11 + while current_pos < input_size: + flag_byte = 0; + current_buffer = 0; + for _ in range(8): + if current_pos >= input_size: + buf[current_buffer] = 0; + window[current_window] = 0; + current_buffer += 1; + current_pos += 1; + current_window += 1; + bit = 0; + else: + match = match_window(window, current_window, input, current_pos) + if match: + pos, length = match + byte1 = (pos >> 4) + byte2 = (((pos & 0x0F) << 4) | ((length - THRESHOLD) & 0x0F)) + buf[current_buffer] = byte1 + buf[current_buffer + 1] = byte2 + current_buffer += 2 + bit = 0 + for _ in range(length): + window[current_window & WINDOW_MASK] = input[current_pos] + current_pos += 1 + current_window += 1 + else: + buf[current_buffer] = input[current_pos] + window[current_window] = input[current_pos] + current_pos += 1 + current_window += 1 + current_buffer += 1 + bit = 1 + flag_byte = (flag_byte >> 1) | ((bit & 1) << 7) + current_window = current_window & WINDOW_MASK + compressed.append(flag_byte) + for i in range(current_buffer): + compressed.append(buf[i]) + compressed.append(0) + compressed.append(0) + compressed.append(0) + return bytes(compressed) diff --git a/ifstools.py b/ifstools.py new file mode 100644 index 0000000..d7b94e9 --- /dev/null +++ b/ifstools.py @@ -0,0 +1,88 @@ +from os.path import basename, dirname, splitext, join +from os import mkdir, utime +import hashlib +import lxml.etree as etree +from struct import unpack + +from kbinxml.kbinxml import KBinXML +from kbinxml.bytebuffer import ByteBuffer + +from handlers import GenericFolder + +KBIN_OFFSET = 36 + +class IFS: + def __init__(self, path): + out = splitext(basename(path))[0] + '_ifs' + self.default_out = join(dirname(path), out) + + with open(path, 'rb') as f: + self.file = f.read() + b = ByteBuffer(self.file) + + self.signature = b.get_u32() + self.ifs_size = b.get_u32() + self.unk1 = b.get_u32() + self.unk2 = b.get_u32() + self.header_end = b.get_u32() + # 16 bytes more, unsure + + self.manifest = KBinXML(self.file[KBIN_OFFSET:]) + #with open('debug_manifest.xml', 'wb') as f: + # f.write(self.manifest.to_text().encode('utf8')) + self._parse_manifest() + + def _parse_manifest(self): + self.tree = GenericFolder(self, self.manifest.xml_doc) + + def tostring(self): + return self.tree.tostring() + + def extract_all(self, progress = True, recurse = True, path = None): + self.out = path if path else self.default_out + self._mkdir(self.out) + with open(join(self.out, 'ifs_manifest.xml'), 'wb') as f: + f.write(self.manifest.to_text().encode('utf8')) + self._extract_tree(self.tree, progress, recurse) + + def _extract_tree(self, tree, progress = True, recurse = True, dir = ''): + outdir = join(self.out, dir) + if progress: + print(outdir) + self._mkdir(outdir) + for name, f in tree.files.items(): + out = join(outdir, f.name) + if progress: + print(out) + + data = f.load() + self._save_with_time(out, data, f.time) + if recurse and f.name.endswith('.ifs'): + i = IFS(out) + i.extract_all() + + for name, f in tree.folders.items(): + self._extract_tree(f, progress, recurse, join(dir, f.name)) + + + def _mkdir(self, dir): + try: + mkdir(dir) + except FileExistsError: + pass + + def load_file(self, start, size): + return self.file[self.header_end+start:self.header_end+start+size] + + def _save_with_time(self, filename, data, time): + with open(filename, 'wb') as f: + f.write(data) + utime(filename, (time,time)) + +if __name__ == '__main__': + import sys + if len(sys.argv) < 2: + print('ifstools filename.ifs') + exit() + i = IFS(argv[1]) + i.extract_all()