1
0
mirror of https://github.com/mon/ifstools.git synced 2024-11-27 18:40:48 +01:00

Big fat cleanup and perf improvements

This commit is contained in:
Will Toohey 2017-12-30 15:03:50 +10:00
parent 7d1e389b02
commit 57045b482a
19 changed files with 728 additions and 543 deletions

View File

@ -1,23 +1,45 @@
# ifstools
Extractor for Konmai IFS files.
## Features
- Converts all textures to png without requiring a second program
- Repacks without ingame display issues
- Multithreaded recompression
- Only changed textures are recompressed, the rest are cached
- Works on eacloud music ifs files
- Correctly names files under `afp`, `bsi` and `geo` folders
- Converts internal binary xmls to plaintext, to facilitate further experimentation.
- Dumps the ifs manifest so you can explore the format
## Install
`pip install -r requirements.txt`
## Usage
`python ifstools.py [filename]`
```
usage: ifstools.py [-h] [-y] [-o OUT_DIR] [--tex-only] [--nocache] [-s] [-r]
file.ifs|folder_ifs [file.ifs|folder_ifs ...]
Features:
- Converts all textures to png without requiring a second program
- Repacks without ingame display issues
- Works on eacloud music ifs files
- Correctly names AFP files
- Converts version.xml, afplist.xml, texturelist.xml to plaintext, to facilitate further experimentation.
- Dumps the ifs manifest so you can explore the format
Unpack/pack IFS files and textures
positional arguments:
file.ifs|folder_ifs files/folders to process. Files will be unpacked,
folders will be repacked
optional arguments:
-h, --help show this help message and exit
-y don't prompt for file/folder overwrite
-o OUT_DIR output directory
--tex-only only extract textures
--nocache ignore texture cache, recompress all
-s, --silent don't display files as they are processed
-r, --norecurse if file contains another IFS, don't extract its
contents
```
Notes:
- dxt5 texture repacking is not fully supported - they will silently be converted to argb8888rev
Todo:
- DXT5 repacking support (current workaround: edit texturelist to use argb8888rev)
- Cache compressed textures (compression is very slow)
- Recursive repacking for ifs inside ifs
I hope the rest is self explanatory. Confused? Create a new issue and tell me what docs to add.

View File

@ -1,77 +0,0 @@
from os.path import getmtime
from kbinxml import KBinXML
import lxml.etree as etree
from . import escapes
class GenericFile(object):
def __init__(self, ifs, path, name, time, start = -1, size = -1):
self.ifs = ifs
self.path = path
self.name = name
self._packed_name = name
self.time = time
self.start = start
self.size = size
@classmethod
def from_xml(cls, ifs, elem, name):
start, size, time = cls._split_ints(elem.text)
self = cls(ifs, None, name, time, start, size)
return self
@classmethod
def from_filesystem(cls, ifs, path, name):
time = int(getmtime(path))
start = size = -1
self = cls(ifs, path, name, time, start, size)
return self
@staticmethod
def _split_ints(text, delim = ' '):
return list(map(int, text.split(delim)))
def tostring(self, indent = 0):
return '{}{}\n'.format(' ' * indent, self.name)
def load(self, convert_kbin = True):
if self.path:
return self._load_from_filesystem(convert_kbin)
else:
return self._load_from_ifs(convert_kbin)
def _load_from_ifs(self, convert_kbin = True):
data = self.ifs.load_file(self.start, self.size)
if convert_kbin and self.name.endswith('.xml') and KBinXML.is_binary_xml(data):
data = KBinXML(data).to_text().encode('utf8')
return data
def _load_from_filesystem(self, convert_kbin = True):
with open(self.path, 'rb') as f:
ret = f.read()
self.size = len(ret)
return ret
def repack(self, manifest, data_blob, progress, recache):
if progress:
print(self.name)
elem = etree.SubElement(manifest, self.packed_name)
elem.attrib['__type'] = '3s32'
data = self.load(convert_kbin = False)
if self.name.endswith('.xml') and not KBinXML.is_binary_xml(data):
data = KBinXML(data).to_binary()
# offset, size, timestamp
elem.text = '{} {} {}'.format(len(data_blob.getvalue()), len(data), self.time)
data_blob.write(data)
@property
def packed_name(self):
return self.sanitize_name(self._packed_name)
def sanitize_name(self, n):
for e in escapes[::-1]:
n = n.replace(e[1], e[0])
if n[0].isdigit():
n = '_' + n
return n

View File

@ -1,93 +0,0 @@
from itertools import chain
from os.path import getmtime, basename, join
import lxml.etree as etree
from . import get_folder_handlers, escapes
from .GenericFile import GenericFile
class GenericFolder(object):
def __init__(self, ifs, name, time, files, folders):
self.ifs = ifs
self.name = name
# xml sanitisation performed by the public property
self._packed_name = name
self.time = time
self.files = files
self.folders = folders
@classmethod
def from_xml(cls, ifs, element, name = ''):
time = int(element.text) if element.text else None
files = {}
folders = {}
for child in element.iterchildren(tag=etree.Element):
filename = cls.fix_name(child.tag)
if filename == '_info_': # metadata
info_elem = child
elif list(child): # folder
handler = get_folder_handlers().get(filename, GenericFolder)
folders[filename] = handler.from_xml(ifs, child, filename)
else: # file
files[filename] = GenericFile.from_xml(ifs, child, filename)
return cls(ifs, name, time, files, folders)
@classmethod
def from_filesystem(cls, ifs, tree, name = ''):
time = int(getmtime(tree['path']))
files = {}
folders = {}
for folder in tree['folders']:
base = basename(folder['path'])
handler = get_folder_handlers().get(base, GenericFolder)
folders[base] = handler.from_filesystem(ifs, folder, base)
for filename in tree['files']:
path = join(tree['path'], filename)
files[filename] = GenericFile.from_filesystem(ifs, path, filename)
return cls(ifs, name, time, files, folders)
def repack(self, manifest, data_blob, progress, recache):
if self.name:
manifest = etree.SubElement(manifest, self.packed_name)
manifest.attrib['__type'] = 's32'
manifest.text = str(self.time)
if progress:
print(self.name)
for name, entry in chain(self.folders.items(), self.files.items()):
entry.repack(manifest, data_blob, progress, recache)
def tostring(self, indent = 0):
ret = ''
if self.name:
ret += '{}{}/\n'.format(' ' * indent, self.name)
indent += 1
for name, entry in chain(self.folders.items(), self.files.items()):
ret += entry.tostring(indent)
return ret
@property
def packed_name(self):
return self.sanitize_name(self._packed_name)
def sanitize_name(self, n):
for e in escapes[::-1]:
n = n.replace(e[1], e[0])
if n[0].isdigit():
n = '_' + n
return n
@staticmethod
def fix_name(n):
for e in escapes:
n = n.replace(*e)
if n[0] == '_' and n[1].isdigit():
n = n[1:]
return n

View File

@ -1,57 +0,0 @@
from hashlib import md5
from kbinxml import KBinXML
from . import GenericFolder
class MD5Folder(GenericFolder):
def __init__(self, ifs, name, time, files, folders):
super(MD5Folder, self).__init__(ifs, name, time, files, folders)
for filename, file in self.files.items():
if filename.endswith('.xml'):
self.info_kbin = file
break
if not self.info_kbin:
raise KeyError('MD5 folder contents have no mapping xml')
self.info_kbin = KBinXML(self.info_kbin.load(convert_kbin = False))
@classmethod
def from_xml(cls, ifs, element, name = '', md5_tag = None, extension = None):
self = super(MD5Folder, cls).from_xml(ifs, element, name)
self._apply_md5(md5_tag, extension)
return self
@classmethod
def from_filesystem(cls, ifs, tree, name = '', md5_tag = None, extension = None):
self = super(MD5Folder, cls).from_filesystem(ifs, tree, name)
self._apply_md5(md5_tag, extension)
return self
def _apply_md5(self, md5_tag, extension):
if not md5_tag:
md5_tag = self.name
# findall needs xpath or it'll only search direct children
for tag in self.info_kbin.xml_doc.findall('.//' + md5_tag):
filename = tag.attrib['name']
hash = md5(filename.encode(self.info_kbin.encoding)).hexdigest()
# handles subfolders like afp/bsi/
self.rename_recurse(self, hash, filename, extension)
def rename_recurse(self, entry, original, replacement, extension):
# handles renamed files (eg tex->png)
if extension and (replacement + extension in entry.files):
entry.files[replacement] = entry.files.pop(replacement + extension)
entry.files[replacement].name = replacement
# handles deobfuscated filesystems
if replacement in entry.files:
entry.files[replacement]._packed_name = original
if original in entry.files:
orig = entry.files.pop(original)
orig.name = replacement
entry.files[replacement] = orig
for name, folder in entry.folders.items():
self.rename_recurse(folder, original, replacement, extension)

View File

@ -1,31 +0,0 @@
from . import MD5Folder, GenericFile, ImageFile
class TexFolder(MD5Folder):
def __init__(self, ifs, name, time, files, folders):
super(TexFolder, self).__init__(ifs, name, time, files, folders)
self.compress = self.info_kbin.xml_doc.attrib.get('compress')
@classmethod
def from_xml(cls, ifs, element, name = ''):
self = super(TexFolder, cls).from_xml(ifs, element, name, 'image', '.png')
self._create_images()
return self
@classmethod
def from_filesystem(cls, ifs, tree, name = ''):
self = super(TexFolder, cls).from_filesystem(ifs, tree, name, 'image', '.png')
self._create_images()
return self
def _create_images(self):
for tex in self.info_kbin.xml_doc.iterchildren():
folder = tex.attrib['name']
fmt = tex.attrib['format']
for indiv in tex.iterchildren():
if indiv.tag == 'size':
continue
elif indiv.tag == 'image':
name = indiv.attrib['name']
self.files[name] = ImageFile(self.files[name], indiv, fmt, self.compress)
else:
print('Unknown texturelist.xml element {}'.format(indiv.tag))

View File

@ -1,18 +0,0 @@
# because we import this circularly, it needs to be a getter
def get_folder_handlers():
return {
'afp' : MD5Folder,
'tex' : TexFolder
}
escapes = [
('_E', '.'),
('__', '_'),
]
from .GenericFile import GenericFile
from .ImageFile import ImageFile
from .GenericFolder import GenericFolder
from .MD5Folder import MD5Folder
from .TexFolder import TexFolder

1
ifs/__init__.py Normal file
View File

@ -0,0 +1 @@
from .ifs import IFS

19
ifs/handlers/AfpFolder.py Normal file
View File

@ -0,0 +1,19 @@
from . import MD5Folder
class AfpFolder(MD5Folder):
def tree_complete(self):
MD5Folder.tree_complete(self)
# findall needs xpath or it'll only search direct children
names = []
geo_names = []
for tag in self.info_kbin.xml_doc.findall('.//' + self.md5_tag):
name = tag.attrib['name']
names.append(name)
for geo in tag.findall('geo'):
for shape in self._split_ints(geo.text):
geo_names.append('{}_shape{}'.format(name, shape))
self._apply_md5_folder(names, self.folders['bsi'])
self._apply_md5_folder(geo_names, self.parent.folders['geo'])

View File

@ -0,0 +1,68 @@
import os
from kbinxml import KBinXML
import lxml.etree as etree
from .Node import Node
from .. import utils
class GenericFile(Node):
def from_xml(self, element):
self.start, self.size, self.time = self._split_ints(element.text)
def from_filesystem(self, folder):
self.base_path = self.parent.base_path
self.time = int(os.path.getmtime(self.disk_path))
self.start = self.size = None
def extract(self, base, use_cache = True):
data = self.load()
path = os.path.join(base, self.full_path)
utils.save_with_timestamp(path, data, self.time)
def load(self, convert_kbin = True):
if self.from_ifs:
return self._load_from_ifs(convert_kbin)
else:
return self._load_from_filesystem()
def _load_from_ifs(self, convert_kbin = True):
end = self.start + self.size
assert self.start <= len(self.ifs_data) and end <= len(self.ifs_data)
data = self.ifs_data[self.start:end]
if convert_kbin and self.name.endswith('.xml') and KBinXML.is_binary_xml(data):
data = KBinXML(data).to_text().encode('utf8')
return data
def _load_from_filesystem(self):
with open(self.disk_path, 'rb') as f:
ret = f.read()
self.size = len(ret)
return ret
@property
def needs_preload(self):
return False
def preload(self, use_cache):
pass
def repack(self, manifest, data_blob, tqdm_progress):
if tqdm_progress:
tqdm_progress.write(self.full_path)
tqdm_progress.update(1)
elem = etree.SubElement(manifest, self.packed_name)
elem.attrib['__type'] = '3s32'
data = self.load(convert_kbin = False)
if self.name.endswith('.xml') and not KBinXML.is_binary_xml(data):
data = KBinXML(data).to_binary()
# offset, size, timestamp
elem.text = '{} {} {}'.format(len(data_blob.getvalue()), len(data), self.time)
data_blob.write(data)
@property
def disk_path(self):
if self.from_ifs:
raise Exception('disk_path invalid for IFS file')
return os.path.join(self.base_path, self.full_path)

View File

@ -0,0 +1,95 @@
from itertools import chain
from os.path import getmtime, basename, join
import lxml.etree as etree
from . import GenericFile
from .Node import Node
class GenericFolder(Node):
def __init__(self, ifs_data, obj, parent = None, path = '', name = ''):
# circular dependencies mean we import here
from . import AfpFolder, TexFolder
self.folder_handlers = {
'afp' : AfpFolder,
'tex' : TexFolder,
}
Node.__init__(self, ifs_data, obj, parent, path, name)
file_handler = GenericFile
def from_xml(self, element):
if element.text:
self.time = int(element.text)
self.files = {}
self.folders = {}
for child in element.iterchildren(tag=etree.Element):
filename = Node.fix_name(child.tag)
if filename == '_info_': # metadata
continue
elif list(child): # folder
handler = self.folder_handlers.get(filename, GenericFolder)
self.folders[filename] = handler(self.ifs_data, child, self, self.full_path, filename)
else: # file
self.files[filename] = self.file_handler(self.ifs_data, child, self, self.full_path, filename)
if not self.full_path: # root
self.tree_complete()
def from_filesystem(self, tree):
self.base_path = self.parent.base_path if self.parent else tree['path']
self.time = int(getmtime(self.base_path))
self.files = {}
self.folders = {}
for folder in tree['folders']:
base = basename(folder['path'])
handler = self.folder_handlers.get(base, GenericFolder)
self.folders[base] = handler(self.ifs_data, folder, self, self.full_path, base)
for filename in tree['files']:
self.files[filename] = self.file_handler(self.ifs_data, None, self, self.full_path, filename)
if not self.full_path: # root
self.tree_complete()
def tree_complete(self):
for f in self.folders.values():
f.tree_complete()
for f in self.files.values():
f.tree_complete()
def repack(self, manifest, data_blob, tqdm_progress):
if self.name:
manifest = etree.SubElement(manifest, self.packed_name)
manifest.attrib['__type'] = 's32'
manifest.text = str(self.time)
for name, entry in chain(self.folders.items(), self.files.items()):
entry.repack(manifest, data_blob, tqdm_progress)
@property
def all_files(self):
files = []
for f in self.all_folders:
files.extend(f.files.values())
return files
@property
def all_folders(self):
queue = [self]
folders = []
while queue:
folder = queue.pop()
folders.append(folder)
queue.extend(folder.folders.values())
return folders
def __str__(self):
path = self.full_path
if not path:
path = '<root>'
return '{} ({} files, {} folders)'.format(path, len(self.files), len(self.folders))

View File

@ -10,6 +10,7 @@ from kbinxml import KBinXML
from . import GenericFile
from . import lz77
from .. import utils
# header for a standard DDS with DXT5 compression and RGBA pixels
# gap placed for image height/width insertion
@ -23,12 +24,14 @@ dxt5_end = b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + \
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
class ImageFile(GenericFile):
def __init__(self, gen_file, image_elem, fmt, compress):
super(ImageFile, self).__init__(gen_file.ifs, gen_file.path,
gen_file.name + '.png', gen_file.time,
gen_file.start, gen_file.size)
self._packed_name = gen_file._packed_name
self.image_elem = image_elem
def __init__(self, ifs_data, obj, parent = None, path = '', name = ''):
raise Exception('ImageFile must be instantiated from existing GenericFile with ImageFile.upgrade_generic')
@classmethod
def upgrade_generic(cls, gen_file, image_elem, fmt, compress):
self = gen_file
self.__class__ = cls
self.format = fmt
self.compress = compress
@ -39,15 +42,14 @@ class ImageFile(GenericFile):
(self.imgrect[3]-self.imgrect[2])//2
)
@classmethod
def from_xml(cls, ifs, elem, name):
raise Exception('ImageFile must be instantiated from existing element')
@classmethod
def from_filesystem(cls, ifs, tree, name):
raise Exception('ImageFile must be instantiated from existing element')
def extract(self, base, use_cache = True):
GenericFile.extract(self, base)
if use_cache and self.compress and self.from_ifs and self.format == 'argb8888rev':
self.write_cache(GenericFile._load_from_ifs(self), base)
def _load_from_ifs(self, convert_kbin = False):
data = super(ImageFile, self)._load_from_ifs()
data = GenericFile._load_from_ifs(self, False)
if self.compress == 'avslz':
uncompressed_size = unpack('>I', data[:4])[0]
@ -86,12 +88,13 @@ class ImageFile(GenericFile):
im.save(b, format = 'PNG')
return b.getvalue()
def repack(self, manifest, data_blob, progress, recache):
if progress:
print(self.name)
def repack(self, manifest, data_blob, tqdm_progress):
if tqdm_progress:
tqdm_progress.write(self.full_path)
tqdm_progress.update(1)
if self.compress == 'avslz':
data = self.read_cache(recache)
data = self.read_cache()
else:
data = self._load_im()
@ -107,48 +110,45 @@ class ImageFile(GenericFile):
im = Image.open(BytesIO(data))
if im.mode != 'RGBA':
im = im.convert('RGBA')
if self.format == 'argb8888rev':
# we translate dxt5 to arb since dxt5 is lossy and not in python
if self.format == 'argb8888rev' or self.format == 'dxt5':
data = im.tobytes('raw', 'BGRA')
else:
raise NotImplementedError('Unknown format {}'.format(self.format))
return data
def write_cache(self, data):
cache = join(dirname(self.path), '_cache', self.name)
self._mkdir(dirname(cache))
with open(cache, 'wb') as f:
f.write(data)
utime(cache, (self.time, self.time))
def read_cache(self, recache):
cache = join(dirname(self.path), '_cache', self.name)
if isfile(cache) and not recache:
@property
def needs_preload(self):
cache = join(dirname(self.disk_path), '_cache', self._packed_name)
if isfile(cache):
mtime = int(getmtime(cache))
if self.time <= mtime:
with open(cache, 'rb') as f:
return f.read()
print('Not cached/out of date, compressing')
return False
return True
def preload(self, use_cache):
if not self.needs_preload and use_cache:
return
# Not cached/out of date, compressing
data = self._load_im()
uncompressed_size = len(data)
data = lz77.compress(data)
compressed_size = len(data)
data = pack('>I', uncompressed_size) + pack('>I', compressed_size) + data
self.write_cache(data)
return data
def _mkdir(self, dir):
try: # python 3
try:
mkdir(dir)
except FileExistsError:
pass
except NameError: # python 2
try:
mkdir(dir)
except OSError as e:
if e.errno == errno.EEXIST:
pass
else:
raise
def write_cache(self, data, base = None):
if not self.from_ifs:
base = self.base_path
cache = join(base, self.path, '_cache', self._packed_name)
utils.mkdir_silent(dirname(cache))
with open(cache, 'wb') as f:
f.write(data)
utime(cache, (self.time, self.time))
def read_cache(self):
cache = join(dirname(self.disk_path), '_cache', self._packed_name)
with open(cache, 'rb') as f:
return f.read()

47
ifs/handlers/MD5Folder.py Normal file
View File

@ -0,0 +1,47 @@
from hashlib import md5
from kbinxml import KBinXML
from . import GenericFolder
class MD5Folder(GenericFolder):
def __init__(self, ifs_data, parent, obj, path = '', name = '', md5_tag = None, extension = None):
GenericFolder.__init__(self, ifs_data, parent, obj, path, name)
self.md5_tag = md5_tag if md5_tag else self.name
self.extension = extension
def tree_complete(self):
GenericFolder.tree_complete(self)
for filename, file in self.files.items():
if filename.endswith('.xml'):
self.info_file = file
break
if not self.info_file:
raise KeyError('MD5 folder contents have no mapping xml')
self.info_kbin = KBinXML(self.info_file.load(convert_kbin = False))
self._apply_md5()
def _apply_md5(self):
# findall needs xpath or it'll only search direct children
names = (tag.attrib['name'] for tag in self.info_kbin.xml_doc.findall('.//' + self.md5_tag))
self._apply_md5_folder(names, self)
def _apply_md5_folder(self, plain_list, folder):
for plain in plain_list:
hashed = md5(plain.encode(self.info_kbin.encoding)).hexdigest()
if self.extension:
plain += self.extension
# add correct packed name to deobfuscated filesystems
if plain in folder.files:
folder.files[plain]._packed_name = hashed
# deobfuscate packed filesystems
if hashed in folder.files:
orig = folder.files.pop(hashed)
orig.name = plain
folder.files[plain] = orig

70
ifs/handlers/Node.py Normal file
View File

@ -0,0 +1,70 @@
import os
import lxml.etree as etree
escapes = [
('_E', '.'),
('__', '_'),
]
class Node(object):
def __init__(self, ifs_data, obj, parent = None, path = '', name = ''):
self.ifs_data = ifs_data
self.parent = parent
self.path = path
self.name = name
# xml sanitisation performed by the property
self._packed_name = name
self.time = None
if isinstance(obj, etree._Element):
self.from_ifs = True
self.from_xml(obj)
else:
self.from_ifs = False
self.from_filesystem(obj)
def from_xml(self, elem):
raise NotImplementedError
def from_filesystem(self, path):
raise NotImplementedError
def tree_complete(self):
'''Call this when the entire tree is parsed and ready for modification'''
pass
def __str__(self):
return os.path.join(self.path, self.name)
def __repr__(self):
return '<{}: {}>'.format(self.__class__.__name__, self.full_path)
@property
def packed_name(self):
return self.sanitize_name(self._packed_name)
@property
def full_path(self):
return os.path.join(self.path, self.name)
@staticmethod
def sanitize_name(n):
for e in escapes[::-1]:
n = n.replace(e[1], e[0])
if n[0].isdigit():
n = '_' + n
return n
@staticmethod
def fix_name(n):
for e in escapes:
n = n.replace(*e)
if n[0] == '_' and n[1].isdigit():
n = n[1:]
return n
@staticmethod
def _split_ints(text, delim = ' '):
return list(map(int, text.split(delim)))

40
ifs/handlers/TexFolder.py Normal file
View File

@ -0,0 +1,40 @@
from kbinxml import KBinXML
from . import MD5Folder, ImageFile, GenericFile
class TextureList(GenericFile):
def _load_from_filesystem(self):
raw = GenericFile._load_from_filesystem(self)
k = KBinXML(raw)
# force the only type we can compress
for tex in k.xml_doc.iterchildren():
tex.attrib['format'] = 'argb8888rev'
return k.to_binary()
class TexFolder(MD5Folder):
def __init__(self, ifs_data, obj, parent = None, path = '', name = ''):
MD5Folder.__init__(self, ifs_data, obj, parent, path, name, 'image', '.png')
def tree_complete(self):
MD5Folder.tree_complete(self)
self.compress = self.info_kbin.xml_doc.attrib.get('compress')
self.info_file.__class__ = TextureList
if '_cache' in self.folders:
self.folders.pop('_cache')
self._create_images()
def _create_images(self):
for tex in self.info_kbin.xml_doc.iterchildren():
folder = tex.attrib['name']
fmt = tex.attrib['format']
for indiv in tex.iterchildren():
if indiv.tag == 'size':
continue
elif indiv.tag == 'image':
name = indiv.attrib['name'] + '.png'
ImageFile.upgrade_generic(self.files[name], indiv, fmt, self.compress)
else:
print('Unknown texturelist.xml element {}'.format(indiv.tag))

9
ifs/handlers/__init__.py Normal file
View File

@ -0,0 +1,9 @@
from .Node import Node
from .GenericFile import GenericFile
from .ImageFile import ImageFile
from .GenericFolder import GenericFolder
from .MD5Folder import MD5Folder
from .AfpFolder import AfpFolder
from .TexFolder import TexFolder

View File

@ -60,7 +60,7 @@ def match_window(in_data, offset):
return None
def compress(input, progress = True):
def compress(input, progress = False):
pbar = tqdm(total = len(input), leave = False, unit = 'b', unit_scale = True,
desc = 'Compressing', disable = not progress)
compressed = bytearray()

244
ifs/ifs.py Normal file
View File

@ -0,0 +1,244 @@
from multiprocessing import Pool
from os.path import basename, dirname, splitext, join, isdir, isfile, getmtime
from os import utime, walk
from io import BytesIO
import itertools
import hashlib
import lxml.etree as etree
from time import time as unixtime
from tqdm import tqdm
from kbinxml import KBinXML
from kbinxml.bytebuffer import ByteBuffer
from .handlers import GenericFolder, MD5Folder, ImageFile
from . import utils
SIGNATURE = 0x6CAD8F89
HEADER_SIZE = 36
FILE_VERSION = 3
# must be toplevel or can't be pickled
def _extract(args):
f = args[0]
path = args[1]
f.extract(path)
return f.full_path
def _load(args):
f = args[0]
use_cache = args[1]
f.preload(use_cache)
return f.full_path
class IFS:
def __init__(self, path):
if isfile(path):
self.load_ifs(path)
elif isdir(path):
self.load_dir(path)
else:
raise IOError('Input path does not exist')
def load_ifs(self, path):
self.is_file = True
name = basename(path)
self.ifs_out = name
self.folder_out = splitext(name)[0] + '_ifs'
self.default_out = self.folder_out
with open(path, 'rb') as f:
file = ByteBuffer(f.read())
signature = file.get_u32()
if signature != SIGNATURE:
raise IOError('Given file was not an IFS file!')
self.file_version = file.get_u16()
# next u16 is just NOT(version)
assert file.get_u16() ^ self.file_version == 0xFFFF
self.time = file.get_u32()
ifs_tree_size = file.get_u32()
manifest_end = file.get_u32()
self.data_blob = bytes(file.data[manifest_end:])
# 16 bytes for manifest md5, unchecked
self.manifest = KBinXML(file.data[HEADER_SIZE:])
self.tree = GenericFolder(self.data_blob, self.manifest.xml_doc)
assert ifs_tree_size == self.tree_size
def load_dir(self, path):
self.is_file = False
path = path.rstrip('/\\')
self.folder_out = basename(path)
if '_ifs' in self.folder_out:
self.ifs_out = self.folder_out.replace('_ifs', '.ifs')
else:
self.ifs_out = self.folder_out + '.ifs'
self.default_out = self.ifs_out
self.file_version = FILE_VERSION
self.time = int(getmtime(path))
self.data_blob = None
self.manifest = None
os_tree = self._create_dir_tree(path)
self.tree = GenericFolder(None, os_tree)
def _create_dir_tree(self, path):
tree = self._create_dir_tree_recurse(walk(path))
if 'ifs_manifest.xml' in tree['files']:
tree['files'].remove('ifs_manifest.xml')
return tree
def _create_dir_tree_recurse(self, walker):
tree = {}
root, dirs, files = next(walker)
tree['path'] = root
tree['files'] = files
tree['folders'] = []
for dir in dirs:
tree['folders'].append(self._create_dir_tree_recurse(walker))
return tree
def __str__(self):
return str(self.tree)
def extract(self, progress = True, use_cache = True, recurse = True, tex_only = False, path = None):
if path is None:
path = self.folder_out
if tex_only and 'tex' not in self.tree.folders:
return
utils.mkdir_silent(path)
utime(path, (self.time, self.time))
if self.manifest and not tex_only:
with open(join(path, 'ifs_manifest.xml'), 'wb') as f:
f.write(self.manifest.to_text().encode('utf8'))
# build the tree
for folder in self.tree.all_folders:
if tex_only and folder.name != 'tex':
continue
f_path = join(path, folder.full_path)
utils.mkdir_silent(f_path)
utime(f_path, (self.time, self.time))
# extract the files
for f in tqdm(self.tree.all_files):
if tex_only and not isinstance(f, ImageFile):
continue
f.extract(path, use_cache)
if progress:
tqdm.write(f.full_path)
if recurse and f.name.endswith('.ifs'):
rpath = join(path, f.full_path)
i = IFS(rpath)
i.extract(progress, use_cache, recurse, rpath.replace('.ifs','_ifs'))
''' If you can get shared memory for IFS.data_blob working, this will
be a lot faster. As it is, it gets pickled for every file, and
is 3x slower than the serial implementation even with image extraction
'''
# extract the tree
'''p = Pool()
args = zip(self.tree.all_files, itertools.cycle((path,)))
for f in tqdm(p.imap_unordered(_extract, args)):
if progress:
tqdm.write(f)'''
def repack(self, progress = True, use_cache = True, path = None):
if path is None:
path = self.ifs_out
# open first in case path is bad
ifs_file = open(path, 'wb')
self.data_blob = BytesIO()
self.manifest = KBinXML(etree.Element('imgfs'))
manifest_info = etree.SubElement(self.manifest.xml_doc, '_info_')
# the important bit
data = self._repack_tree(progress, use_cache)
data_md5 = etree.SubElement(manifest_info, 'md5')
data_md5.attrib['__type'] = 'bin'
data_md5.attrib['__size'] = '16'
data_md5.text = hashlib.md5(data).hexdigest()
data_size = etree.SubElement(manifest_info, 'size')
data_size.attrib['__type'] = 'u32'
data_size.text = str(len(data))
manifest_bin = self.manifest.to_binary()
manifest_end = HEADER_SIZE + len(manifest_bin)
manifest_hash = hashlib.md5(manifest_bin).digest()
head = ByteBuffer()
head.append_u32(SIGNATURE)
head.append_u16(self.file_version)
head.append_u16(self.file_version ^ 0xFFFF)
head.append_u32(int(unixtime()))
head.append_u32(self.tree_size)
head.append_u32(manifest_end)
ifs_file.write(head.data)
ifs_file.write(manifest_hash)
ifs_file.write(manifest_bin)
ifs_file.write(data)
ifs_file.close()
def _repack_tree(self, progress = True, use_cache = True):
folders = self.tree.all_folders
files = self.tree.all_files
# Can't pickle lmxl, so to dirty-hack land we go
kbin_backup = []
for folder in folders:
if isinstance(folder, MD5Folder):
kbin_backup.append(folder.info_kbin)
folder.info_kbin = None
needs_preload = (f for f in files if f.needs_preload or not use_cache)
args = list(zip(needs_preload, itertools.cycle((use_cache,))))
p = Pool()
for f in tqdm(p.imap_unordered(_load, args), desc='Caching', total=len(args), disable = not progress):
if progress:
tqdm.write(f)
# restore stuff from before
for folder in folders:
if isinstance(folder, MD5Folder):
folder.info_kbin = kbin_backup.pop(0)
tqdm_progress = None
if progress:
tqdm_progress = tqdm(desc='Writing', total=len(files))
self.tree.repack(self.manifest.xml_doc, self.data_blob, tqdm_progress)
return self.data_blob.getvalue()
# suspected to be the in-memory representation
@property
def tree_size(self):
BASE_SIZE = 856
return BASE_SIZE + self._tree_size_recurse(self.tree)
def _tree_size_recurse(self, tree, depth = 0):
FILE = 64
FOLDER = 56
DEPTH_MULTIPLIER = 16
size = len(tree.files) * FILE
size += len(tree.folders) * (FOLDER - depth*DEPTH_MULTIPLIER)
for name, folder in tree.folders.items():
size += self._tree_size_recurse(folder, depth+1)
return size

24
ifs/utils.py Normal file
View File

@ -0,0 +1,24 @@
import errno
import os
def mkdir_silent(dir):
try: # python 3
FileExistsError
try:
os.mkdir(dir)
except FileExistsError:
pass
except NameError: # python 2
try:
os.mkdir(dir)
except OSError as e:
if e.errno == errno.EEXIST:
pass
else:
raise
def save_with_timestamp(filename, data, timestamp):
mkdir_silent(os.path.dirname(filename))
with open(filename, 'wb') as f:
f.write(data)
os.utime(filename, (timestamp,timestamp))

View File

@ -1,218 +1,28 @@
from os.path import basename, dirname, splitext, join, isdir, isfile, getmtime
from os import mkdir, utime, walk
import errno
from io import BytesIO
import hashlib
import lxml.etree as etree
from time import time as unixtime
import argparse
import os
from kbinxml.kbinxml import KBinXML
from kbinxml.bytebuffer import ByteBuffer
from ifs import IFS
from handlers import GenericFolder
SIGNATURE = 0x6CAD8F89
KBIN_OFFSET = 36
FILE_VERSION = 3
class IFS:
def __init__(self, path):
if isfile(path):
self._load_ifs(path)
self.is_file = True
elif isdir(path):
self._load_dir(path)
self.is_file = False
def get_choice(prompt):
while True:
q = input(prompt + ' [Y/n] ').lower()
if not q:
return True # default to yes
elif q == 'y':
return True
elif q == 'n':
return False
else:
raise IOError('Input path does not exist')
def _load_ifs(self, path):
self.ifs_out = basename(path)
self.default_out = splitext(self.ifs_out)[0] + '_ifs'
with open(path, 'rb') as f:
self.file = f.read()
b = ByteBuffer(self.file)
signature = b.get_u32()
if signature != SIGNATURE:
raise IOError('Given file was not an IFS file!')
self.file_version = b.get_u16()
# next u16 is just NOT(version)
assert b.get_u16() ^ self.file_version == 0xFFFF
self.time = b.get_u32()
self.tree_size = b.get_u32()
self.header_end = b.get_u32()
# 16 bytes for manifest md5, unchecked
self.manifest = KBinXML(self.file[KBIN_OFFSET:])
self._parse_manifest()
assert self.tree_size == self._tree_size()
def _load_dir(self, path):
path = path.rstrip('/\\') + '/'
self.default_out = dirname(path)
self.ifs_out = self.default_out.replace('_ifs', '.ifs')
self.file_version = FILE_VERSION
self.time = int(getmtime(path))
self.tree_size = -1
self.header_end = -1
self.manifest = None
os_tree = self._create_dir_tree(path)
self.tree = GenericFolder.from_filesystem(self, os_tree)
def _create_dir_tree(self, path):
tree = self._create_dir_tree_recurse(walk(path))
if 'ifs_manifest.xml' in tree['files']:
tree['files'].remove('ifs_manifest.xml')
return tree
def _create_dir_tree_recurse(self, walker):
tree = {}
root, dirs, files = next(walker)
tree['path'] = root
tree['files'] = files
tree['folders'] = []
for dir in dirs:
subdir = self._create_dir_tree_recurse(walker)
# this should probably be moved to TexFolder.py
if basename(subdir['path']) != '_cache':
tree['folders'].append(subdir)
return tree
def _parse_manifest(self):
self.tree = GenericFolder.from_xml(self, self.manifest.xml_doc)
def tostring(self):
return self.tree.tostring()
def extract_all(self, progress = True, recurse = True, path = None):
self.out = path if path else self.default_out
self._mkdir(self.out)
if self.manifest:
with open(join(self.out, 'ifs_manifest.xml'), 'wb') as f:
f.write(self.manifest.to_text().encode('utf8'))
self._extract_tree(self.tree, progress, recurse)
def repack(self, progress = True, recache = False, path = None):
if path is None:
path = self.ifs_out
data_blob = BytesIO()
self.manifest = KBinXML(etree.Element('imgfs'))
manifest_info = etree.SubElement(self.manifest.xml_doc, '_info_')
# the important bit
self.tree.repack(self.manifest.xml_doc, data_blob, progress, recache)
data = data_blob.getvalue()
data_md5 = etree.SubElement(manifest_info, 'md5')
data_md5.attrib['__type'] = 'bin'
data_md5.attrib['__size'] = '16'
data_md5.text = hashlib.md5(data).hexdigest()
data_size = etree.SubElement(manifest_info, 'size')
data_size.attrib['__type'] = 'u32'
data_size.text = str(len(data))
manifest_bin = self.manifest.to_binary()
self.header_end = 36 + len(manifest_bin)
self.ifs_size = self.header_end + len(data)
self.tree_size = self._tree_size()
manifest_hash = hashlib.md5(manifest_bin).digest()
head = ByteBuffer()
head.append_u32(SIGNATURE)
head.append_u16(self.file_version)
head.append_u16(self.file_version ^ 0xFFFF)
head.append_u32(int(unixtime()))
head.append_u32(self.tree_size)
head.append_u32(self.header_end)
with open(path, 'wb') as ifs_file:
ifs_file.write(head.data)
ifs_file.write(manifest_hash)
ifs_file.write(manifest_bin)
ifs_file.write(data)
# suspected to be the in-memory representation
def _tree_size(self):
BASE_SIZE = 856
return BASE_SIZE + self._tree_size_recurse(self.tree)
def _tree_size_recurse(self, tree, depth = 0):
FILE = 64
FOLDER = 56
DEPTH_MULTIPLIER = 16
size = len(tree.files) * FILE
size += len(tree.folders) * (FOLDER - depth*DEPTH_MULTIPLIER)
for name, folder in tree.folders.items():
size += self._tree_size_recurse(folder, depth+1)
return size
def _extract_tree(self, tree, progress = True, recurse = True, dir = ''):
outdir = join(self.out, dir)
if progress:
print(outdir)
self._mkdir(outdir)
for name, f in tree.files.items():
out = join(outdir, f.name)
if progress:
print(out)
data = f.load()
self._save_with_time(out, data, f.time)
if recurse and f.name.endswith('.ifs'):
i = IFS(out)
i.extract_all(progress, recurse)
for name, f in tree.folders.items():
self._extract_tree(f, progress, recurse, join(dir, f.name))
# fallback to file timestamp
timestamp = tree.time if tree.time else self.time
utime(outdir, (timestamp, timestamp))
def _mkdir(self, dir):
try: # python 3
try:
mkdir(dir)
except FileExistsError:
pass
except NameError: # python 2
try:
mkdir(dir)
except OSError as e:
if e.errno == errno.EEXIST:
pass
else:
raise
def load_file(self, start, size):
start = self.header_end+start
end = start + size
assert start <= len(self.file) and end <= len(self.file)
return self.file[start:end]
def _save_with_time(self, filename, data, time):
with open(filename, 'wb') as f:
f.write(data)
utime(filename, (time,time))
print('Please answer y/n')
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Unpack/pack IFS files and textures')
parser.add_argument('files', metavar='file.ifs|folder_ifs', type=str, nargs='+',
help='files/folders to process. Files will be unpacked, folders will be repacked')
parser.add_argument('--recache', action='store_true', help='ignore texture cache, recompress all')
parser.add_argument('-y', action='store_true', help='don\'t prompt for file/folder overwrite', dest='overwrite')
parser.add_argument('-o', default='.', help='output directory', dest='out_dir')
parser.add_argument('--tex-only', action='store_true', help='only extract textures', dest='tex_only')
parser.add_argument('--nocache', action='store_false', help='ignore texture cache, recompress all', dest='use_cache')
parser.add_argument('-s', '--silent', action='store_false', dest='progress',
help='don\'t display files as they are processed')
parser.add_argument('-r', '--norecurse', action='store_false', dest='recurse',
@ -221,9 +31,21 @@ if __name__ == '__main__':
args = parser.parse_args()
for f in args.files:
i = IFS(f)
path = None
if args.progress:
print(f)
try:
i = IFS(f)
except IOError as e:
# human friendly
print('{}: {}'.format(os.path.basename(f), str(e)))
exit(1)
path = os.path.join(args.out_dir, i.default_out)
if os.path.exists(path) and not args.overwrite:
if not get_choice('{} exists. Overwrite?'.format(path)):
continue
if i.is_file:
i.extract_all(args.progress, args.recurse)
i.extract(args.progress, args.use_cache, args.recurse, args.tex_only, path)
else:
i.repack(args.progress, args.recache)
i.repack(args.progress, args.use_cache, path)