vgmstream/cli/tools/vrts.py

510 lines
17 KiB
Python
Raw Normal View History

# VRTS - VGMSTREAM REGRESSION TESTING SCRIPT
#
# Searches for files in a directory (or optionally subdirs) and compares
# the output of two CLI versions, both wav and stdout, for regression
# testing. This creates and deletes temp files, trying to process all
# extensions found unless specified (except a few).
# TODO reject some .wav but not all (detect created by v)
# TODO capture stdout and enable fuzzy depending on codec
# TODO fix -l option, add decode reset option
import os, argparse, time, datetime, glob, subprocess, struct
# don't try to decode common stuff
IGNORED_EXTENSIONS = ['.exe', '.dll', '.zip', '.7z', '.rar', '.bat', '.sh', '.txt', '.lnk', '.wav', '.py', '.md', '.idb']
#TODO others
FUZZY_CODECS = ['ffmpeg', 'vorbis', 'mpeg', 'speex', 'celt']
DEFAULT_CLI_NEW = 'vgmstream-cli'
DEFAULT_CLI_OLD = 'vgmstream-cli_old'
# result codes, where >= 0: ok (acceptable), <0: ko (not good)
RESULT_SAME = 0 # no diffs
RESULT_FUZZY = 1 # no duffs allowing +-N
RESULT_NONE = 2 # neither exists
RESULT_DIFFS = -3 # different
RESULT_SIZES = -4 # different sizes
RESULT_MISSING_NEW = -5 # new does not exist
RESULT_MISSING_OLD = 6 # old does not exist
###############################################################################
def parse_args():
description = (
"Compares new vs old vgmstream CLI output, for regression testing"
)
epilog = (
"examples:\n"
"%(prog)s *.ogg -r -n\n"
"- checks for differences in ogg of this and subfolders\n"
"%(prog)s *.adx -nd\n"
"- checks for differences in adx and doesn't delete wav output\n"
"%(prog)s -p\n"
"- compares performance performance of all files\n"
)
ap = argparse.ArgumentParser(description=description, epilog=epilog, formatter_class=argparse.RawTextHelpFormatter)
ap.add_argument("files", help="files to match", nargs='*', default=["*.*"])
ap.add_argument("-r","--recursive", help="search files in subfolders", action='store_true')
ap.add_argument("-z","--fuzzy", help="fuzzy threshold of +-N PCM16LE", type=int, default=1)
ap.add_argument("-nd","--no-delete", help="don't delete output", action='store_true')
ap.add_argument("-rd","--result-diffs", help="only report full diffs", action='store_true')
ap.add_argument("-rz","--result-fuzzy", help="only report full and fuzzy diffs", action='store_true')
ap.add_argument("-p","--performance-both", help="compare decode performance", action='store_true')
ap.add_argument("-pn","--performance-new", help="test performance of new CLI", action='store_true')
ap.add_argument("-po","--performance-old", help="test performance of old CLI", action='store_true')
ap.add_argument("-pw","--performance-write", help="compare decode+write performance", action='store_true')
ap.add_argument("-pr","--performance-repeat", help="repeat decoding files N times\n(more files makes easier to see performance changes)", type=int, default=0)
ap.add_argument("-l","--looping", help="compare looping files (slower)", action='store_true')
ap.add_argument("-cn","--cli-new", help="sets name of new CLI (can be a path)")
ap.add_argument("-co","--cli-old", help="sets name of old CLI (can be a path)")
args = ap.parse_args()
# derived defaults to simplify
args.performance = args.performance_both or args.performance_new or args.performance_old or args.performance_write
args.compare = not args.performance
if args.performance_both:
args.performance_new = True
args.performance_old = True
return args
###############################################################################
S16_UNPACK = struct.Struct('<h').unpack_from
# Compares 2 files and returns if contents are the same.
# If fuzzy is set detects +- PCM changes (slower).
class VrtsComparator:
CHUNK_HEADER = 0x50
CHUNK_SIZE = 0x00100000
def __init__(self, path1, path2, fuzzy_max=0):
self._path1 = path1
self._path2 = path2
self._fuzzy_max = fuzzy_max
self._offset = 0
self.fuzzy_count = 0
self.fuzzy_diff = 0
self.fuzzy_offset = 0
def _compare_fuzzy(self, b1, b2):
len1 = len(b1)
len2 = len(b2)
if len1 != len2:
return RESULT_SAME
# compares PCM16LE bytes allowing +-N diffs between PCM bytes
# useful when comparing output from floats, that can change slightly due to compiler optimizations
max = self._fuzzy_max
pos = 0
while pos < len1:
# slower than struct unpack
#pcm1 = b1[pos+0] | (b1[pos+1] << 8)
#if pcm1 & 0x8000:
# pcm1 -= 0x10000
#pcm2 = b2[pos+0] | (b2[pos+1] << 8)
#if pcm2 & 0x8000:
# pcm2 -= 0x10000
pcm1, = S16_UNPACK(b1, pos)
pcm2, = S16_UNPACK(b2, pos)
if not (pcm1 >= pcm2 - max and pcm1 <= pcm2 + max):
2023-01-17 18:12:08 +01:00
#print("%i vs %i +- %i at %x" % (pcm1, pcm2, max, self._offset + pos))
self.fuzzy_diff = pcm1 - pcm2
self.fuzzy_offset = self._offset + pos
return RESULT_DIFFS
pos += 2
self.fuzzy_count = 1
return 0
def _compare_bytes(self, b1, b2):
# even though python is much slower than C this test is reasonably fast
if b1 == b2:
return RESULT_SAME
# different: fuzzy check if same
if self._fuzzy_max:
return self._compare_fuzzy(b1, b2)
return RESULT_DIFFS
def _compare_files(self, f1, f2):
# header not part of fuzzyness (no need to get exact with sizes)
if self._fuzzy_max:
b1 = f1.read(self.CHUNK_HEADER)
b2 = f2.read(self.CHUNK_HEADER)
cmp = self._compare_bytes(b1, b2)
if cmp < 0:
return cmp
self._offset += self.CHUNK_HEADER
while True:
b1 = f1.read(self.CHUNK_SIZE)
b2 = f2.read(self.CHUNK_SIZE)
if not b1 or not b2:
break
cmp = self._compare_bytes(b1, b2)
if cmp < 0:
return cmp
self._offset += self.CHUNK_SIZE
return 0
def compare(self):
try:
f1_len = os.path.getsize(self._path1)
except FileNotFoundError:
f1_len = -1
try:
f2_len = os.path.getsize(self._path2)
except FileNotFoundError:
f2_len = -1
if f1_len < 0 and f2_len < 0:
return RESULT_NONE
if f1_len < 0:
return RESULT_MISSING_NEW
if f2_len < 0:
return RESULT_MISSING_OLD
if f1_len != f2_len:
return RESULT_SIZES
with open(self._path1, 'rb') as f1, open(self._path2, 'rb') as f2:
cmp = self._compare_files(f1, f2)
if cmp < 0:
return cmp
if self.fuzzy_count > 0:
return RESULT_FUZZY
return RESULT_SAME
###############################################################################
# prints colored text in CLI
# https://pkg.go.dev/github.com/whitedevops/colors
# https://stackoverflow.com/questions/287871/
class VrtsPrinter:
RESET = '\033[0m'
BOLD = '\033[1m'
LIGHT_RED = '\033[91m'
LIGHT_GREEN = '\033[92m'
LIGHT_YELLOW = '\033[93m'
LIGHT_BLUE = '\033[94m'
LIGHT_CYAN = '\033[96m'
WHITE = '\033[97m'
LIGHT_GRAY = "\033[37m"
DARK_GRAY = "\033[90m"
COLOR_RESULT = {
RESULT_SAME: WHITE,
2023-01-17 18:12:08 +01:00
RESULT_FUZZY: LIGHT_CYAN,
RESULT_NONE: LIGHT_YELLOW,
RESULT_DIFFS: LIGHT_RED,
RESULT_SIZES: LIGHT_RED,
RESULT_MISSING_NEW: LIGHT_RED,
RESULT_MISSING_OLD: LIGHT_YELLOW,
}
TEXT_RESULT = {
RESULT_SAME: 'same',
RESULT_FUZZY: 'fuzzy same',
RESULT_NONE: 'neither works',
RESULT_DIFFS: 'diffs',
RESULT_SIZES: 'wrong sizes',
RESULT_MISSING_NEW: 'missing new',
RESULT_MISSING_OLD: 'missing old',
}
def __init__(self):
try:
os.system('color') #win only?
except:
pass
def _print(self, msg, color=None):
if color:
print("%s%s%s" % (color, msg, self.RESET))
else:
print(msg)
def result(self, msg, code, fuzzy_diff, fuzzy_offset):
text = self.TEXT_RESULT.get(code)
color = self.COLOR_RESULT.get(code)
if not text:
text = code
msg = "%s: %s" % (msg, text)
if fuzzy_diff != 0:
msg += " (%s @0x%x)" % (fuzzy_diff, fuzzy_offset)
self._print(msg, color)
def info(self, msg):
msg = "%s (%s)" % (msg, self._get_date())
self._print(msg, self.DARK_GRAY)
pass
def _get_date(self):
return datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
###############################################################################
class VrtsFiles:
def __init__(self, args):
self._args = args
self.filenames = []
def prepare(self):
for fpattern in self._args.files:
recursive = self._args.recursive
if recursive:
fpattern = '**/' + fpattern
files = glob.glob(fpattern, recursive=recursive)
for file in files:
if not os.path.isfile(file):
continue
# ignores non useful files
_, ext = os.path.splitext(file)
if ext.lower() in IGNORED_EXTENSIONS:
continue
self.filenames.append(file)
# same file N times
if self._args.performance and self._args.performance_repeat:
for i in range(self._args.performance_repeat):
self.filenames.append(file)
# calling subprocess with python:
# - os.system(command)
# - not recommended by docs (less flexible and spawns a new process?)
# - subprocess.call
# - wait till complete and returns code
# - subprocess.check_call
# - wait till complete and raise CalledProcessError on nonzero return code
# - subprocess.check_output
# - call without wait, raise CalledProcessError on nonzero return code
# - subprocess.run
# - recommended but python 3.5+
# (check=True: raise exceptions like check_*, capture_output: return STDOUT/STDERR)
class VrtsProcess:
def call(self, args, stdout=False):
try:
#with open(os.devnull, 'wb') as DEVNULL: #python2
# res = subprocess.check_call(args, stdout=DEVNULL, stderr=DEVNULL)
res = subprocess.run(args, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) #capture_output=stdout,
#print("result:", res.returncode)
#print("result:", res.strout, res.strerr)
if stdout:
return res.stdout
return True #exists and returns ok
except subprocess.CalledProcessError as e:
#print("call error: ", e) #, e.stderr: disable DEVNULL
return False #non-zero, exists but returns strerr (ex. ran with no args)
except FileNotFoundError as e:
#print("file error: ", e)
return None #doesn't exists/etc
class VrtsApp:
def __init__(self, args):
self._args = args
self._files = VrtsFiles(args)
self._prc = VrtsProcess()
self._p = VrtsPrinter()
self._cli_new = None
self._cli_old = None
def _find_cli(self, arg_cli, default_cli):
if arg_cli and os.path.isdir(arg_cli):
cli = os.path.join(arg_cli, default_cli)
elif arg_cli: #is file
cli = arg_cli
else:
cli = default_cli
args = [cli] #plain call to see if program is in PATH
res = self._prc.call(args)
if res is not None:
return cli
return None
# detects CLI location:
# - defaults to (cli) [new] + (cli)_old [old] assumed to be in PATH
# - can be passed a dir or file for old/new
# - old is optional in performance mode
def _detect_cli(self):
cli = self._find_cli(self._args.cli_new, DEFAULT_CLI_NEW)
if cli:
self._cli_new = cli
cli = self._find_cli(self._args.cli_old, DEFAULT_CLI_OLD)
if cli:
self._cli_old = cli
if not self._cli_new and (self._args.compare or self._args.performance_new):
raise ValueError("new CLI not found")
if not self._cli_old and (self._args.compare or self._args.performance_old):
raise ValueError("old CLI not found")
def _get_performance_args(self, cli):
args = [cli, '-O'] #flag to not write files
if self._args.looping:
args.append('-i')
args.extend(self._files.filenames)
return args
def _performance(self):
flag_looping = ''
if self._args.looping:
flag_looping = '-i'
# pases all files at once, as it's faster than 1 by 1 (that has to init program every time)
if self._performance_new:
self._p.info("testing new performance")
ts_st = time.time()
args = self._get_performance_args(self._cli_new)
res = self._prc.call(args)
ts_ed = time.time()
self._p.info("done: elapsed %ss" % (ts_ed - ts_st))
if self._performance_old:
self._p.info("testing old performance")
ts_st = time.time()
args = self._get_performance_args(self._cli_old)
res = self._prc.call(args)
ts_ed = time.time()
self._p.info("done: elapsed %ss (%s)" % (ts_ed - ts_st))
#if self._performance_both:
# ...
# returns max fuzzy count, except for non-fuzzable files (that use int math)
def _get_fuzzy_count(self, stdout):
fuzzy = self._args.fuzzy
if self._args.fuzzy <= 0:
return 0
if not stdout:
return fuzzy
try:
pos = stdout.index(b'encoding:')
codec_line = stdout[0:].split('\n', 1)[0]
for fuzzy_codec in FUZZY_CODECS:
if fuzzy_codec in codec_line:
return fuzzy
except Exception as e:
pass
return 0 #non-fuzable
def _get_compare_args(self, cli, outwav, filename):
args = [cli, '-o', outwav] #flag to not write files
if self._args.looping:
args.append('-i')
args.append(filename)
return args
def _compare(self):
ts_st = time.time()
self._p.info("comparing files")
flag_looping = ''
if self._args.looping:
flag_looping = '-i'
total_ok = 0
total_ko = 0
for filename in self._files.filenames:
filename_newwav = filename + ".new.wav"
filename_oldwav = filename + ".old.wav"
# main decode (ignores errors, comparator already checks them)
args = self._get_compare_args(self._cli_new, filename_newwav, filename)
stdout = self._prc.call(args, stdout=True)
args = self._get_compare_args(self._cli_old, filename_oldwav, filename)
self._prc.call(args, stdout=False)
# test results
fuzzy = self._get_fuzzy_count(stdout)
cmp = VrtsComparator(filename_newwav, filename_oldwav, fuzzy)
code = cmp.compare()
self._p.result(filename, code, cmp.fuzzy_diff, cmp.fuzzy_offset)
if code < 0:
total_ko += 1
else:
total_ok += 1
# post cleanup
if not self._args.no_delete:
try:
os.remove(filename_newwav)
except:
pass
try:
os.remove(filename_oldwav)
except:
pass
ts_ed = time.time()
self._p.info("done: ok=%s, ko=%s, elapsed %ss" % (total_ok, total_ko, ts_ed - ts_st))
def start(self):
self._detect_cli()
self._files.prepare()
if self._args.performance:
self._performance()
else:
self._compare()
def main():
args = parse_args()
if not args:
return
try:
VrtsApp(args).start()
except ValueError as e:
print(e)
if __name__ == "__main__":
main()