2023-01-15 17:41:58 +01:00
# VRTS - VGMSTREAM REGRESSION TESTING SCRIPT
#
# Searches for files in a directory (or optionally subdirs) and compares
# the output of two CLI versions, both wav and stdout, for regression
# testing. This creates and deletes temp files, trying to process all
# extensions found unless specified (except a few).
# TODO reject some .wav but not all (detect created by v)
# TODO capture stdout and enable fuzzy depending on codec
# TODO fix -l option, add decode reset option
import os , argparse , time , datetime , glob , subprocess , struct
# don't try to decode common stuff
IGNORED_EXTENSIONS = [ ' .exe ' , ' .dll ' , ' .zip ' , ' .7z ' , ' .rar ' , ' .bat ' , ' .sh ' , ' .txt ' , ' .lnk ' , ' .wav ' , ' .py ' , ' .md ' , ' .idb ' ]
#TODO others
FUZZY_CODECS = [ ' ffmpeg ' , ' vorbis ' , ' mpeg ' , ' speex ' , ' celt ' ]
DEFAULT_CLI_NEW = ' vgmstream-cli '
DEFAULT_CLI_OLD = ' vgmstream-cli_old '
# result codes, where >= 0: ok (acceptable), <0: ko (not good)
RESULT_SAME = 0 # no diffs
RESULT_FUZZY = 1 # no duffs allowing +-N
RESULT_NONE = 2 # neither exists
RESULT_DIFFS = - 3 # different
RESULT_SIZES = - 4 # different sizes
RESULT_MISSING_NEW = - 5 # new does not exist
RESULT_MISSING_OLD = 6 # old does not exist
###############################################################################
def parse_args ( ) :
description = (
" Compares new vs old vgmstream CLI output, for regression testing "
)
epilog = (
" examples: \n "
" %(prog)s *.ogg -r -n \n "
" - checks for differences in ogg of this and subfolders \n "
" %(prog)s *.adx -nd \n "
" - checks for differences in adx and doesn ' t delete wav output \n "
" %(prog)s -p \n "
" - compares performance performance of all files \n "
)
ap = argparse . ArgumentParser ( description = description , epilog = epilog , formatter_class = argparse . RawTextHelpFormatter )
ap . add_argument ( " files " , help = " files to match " , nargs = ' * ' , default = [ " *.* " ] )
ap . add_argument ( " -r " , " --recursive " , help = " search files in subfolders " , action = ' store_true ' )
ap . add_argument ( " -z " , " --fuzzy " , help = " fuzzy threshold of +-N PCM16LE " , type = int , default = 1 )
ap . add_argument ( " -nd " , " --no-delete " , help = " don ' t delete output " , action = ' store_true ' )
ap . add_argument ( " -rd " , " --result-diffs " , help = " only report full diffs " , action = ' store_true ' )
ap . add_argument ( " -rz " , " --result-fuzzy " , help = " only report full and fuzzy diffs " , action = ' store_true ' )
ap . add_argument ( " -p " , " --performance-both " , help = " compare decode performance " , action = ' store_true ' )
ap . add_argument ( " -pn " , " --performance-new " , help = " test performance of new CLI " , action = ' store_true ' )
ap . add_argument ( " -po " , " --performance-old " , help = " test performance of old CLI " , action = ' store_true ' )
ap . add_argument ( " -pw " , " --performance-write " , help = " compare decode+write performance " , action = ' store_true ' )
ap . add_argument ( " -pr " , " --performance-repeat " , help = " repeat decoding files N times \n (more files makes easier to see performance changes) " , type = int , default = 0 )
ap . add_argument ( " -l " , " --looping " , help = " compare looping files (slower) " , action = ' store_true ' )
ap . add_argument ( " -cn " , " --cli-new " , help = " sets name of new CLI (can be a path) " )
ap . add_argument ( " -co " , " --cli-old " , help = " sets name of old CLI (can be a path) " )
args = ap . parse_args ( )
# derived defaults to simplify
args . performance = args . performance_both or args . performance_new or args . performance_old or args . performance_write
args . compare = not args . performance
if args . performance_both :
args . performance_new = True
args . performance_old = True
return args
###############################################################################
S16_UNPACK = struct . Struct ( ' <h ' ) . unpack_from
# Compares 2 files and returns if contents are the same.
# If fuzzy is set detects +- PCM changes (slower).
class VrtsComparator :
CHUNK_HEADER = 0x50
CHUNK_SIZE = 0x00100000
def __init__ ( self , path1 , path2 , fuzzy_max = 0 ) :
self . _path1 = path1
self . _path2 = path2
self . _fuzzy_max = fuzzy_max
self . _offset = 0
self . fuzzy_count = 0
self . fuzzy_diff = 0
self . fuzzy_offset = 0
def _compare_fuzzy ( self , b1 , b2 ) :
len1 = len ( b1 )
len2 = len ( b2 )
if len1 != len2 :
return RESULT_SAME
# compares PCM16LE bytes allowing +-N diffs between PCM bytes
# useful when comparing output from floats, that can change slightly due to compiler optimizations
max = self . _fuzzy_max
pos = 0
while pos < len1 :
# slower than struct unpack
#pcm1 = b1[pos+0] | (b1[pos+1] << 8)
#if pcm1 & 0x8000:
# pcm1 -= 0x10000
#pcm2 = b2[pos+0] | (b2[pos+1] << 8)
#if pcm2 & 0x8000:
# pcm2 -= 0x10000
pcm1 , = S16_UNPACK ( b1 , pos )
pcm2 , = S16_UNPACK ( b2 , pos )
if not ( pcm1 > = pcm2 - max and pcm1 < = pcm2 + max ) :
2023-01-17 18:12:08 +01:00
#print("%i vs %i +- %i at %x" % (pcm1, pcm2, max, self._offset + pos))
2023-01-15 17:41:58 +01:00
self . fuzzy_diff = pcm1 - pcm2
self . fuzzy_offset = self . _offset + pos
return RESULT_DIFFS
pos + = 2
self . fuzzy_count = 1
return 0
def _compare_bytes ( self , b1 , b2 ) :
# even though python is much slower than C this test is reasonably fast
if b1 == b2 :
return RESULT_SAME
# different: fuzzy check if same
if self . _fuzzy_max :
return self . _compare_fuzzy ( b1 , b2 )
return RESULT_DIFFS
def _compare_files ( self , f1 , f2 ) :
# header not part of fuzzyness (no need to get exact with sizes)
if self . _fuzzy_max :
b1 = f1 . read ( self . CHUNK_HEADER )
b2 = f2 . read ( self . CHUNK_HEADER )
cmp = self . _compare_bytes ( b1 , b2 )
if cmp < 0 :
return cmp
self . _offset + = self . CHUNK_HEADER
while True :
b1 = f1 . read ( self . CHUNK_SIZE )
b2 = f2 . read ( self . CHUNK_SIZE )
if not b1 or not b2 :
break
cmp = self . _compare_bytes ( b1 , b2 )
if cmp < 0 :
return cmp
self . _offset + = self . CHUNK_SIZE
return 0
def compare ( self ) :
try :
f1_len = os . path . getsize ( self . _path1 )
except FileNotFoundError :
f1_len = - 1
try :
f2_len = os . path . getsize ( self . _path2 )
except FileNotFoundError :
f2_len = - 1
if f1_len < 0 and f2_len < 0 :
return RESULT_NONE
if f1_len < 0 :
return RESULT_MISSING_NEW
if f2_len < 0 :
return RESULT_MISSING_OLD
if f1_len != f2_len :
return RESULT_SIZES
with open ( self . _path1 , ' rb ' ) as f1 , open ( self . _path2 , ' rb ' ) as f2 :
cmp = self . _compare_files ( f1 , f2 )
if cmp < 0 :
return cmp
if self . fuzzy_count > 0 :
return RESULT_FUZZY
return RESULT_SAME
###############################################################################
# prints colored text in CLI
# https://pkg.go.dev/github.com/whitedevops/colors
# https://stackoverflow.com/questions/287871/
class VrtsPrinter :
RESET = ' \033 [0m '
BOLD = ' \033 [1m '
LIGHT_RED = ' \033 [91m '
LIGHT_GREEN = ' \033 [92m '
LIGHT_YELLOW = ' \033 [93m '
LIGHT_BLUE = ' \033 [94m '
LIGHT_CYAN = ' \033 [96m '
WHITE = ' \033 [97m '
LIGHT_GRAY = " \033 [37m "
DARK_GRAY = " \033 [90m "
COLOR_RESULT = {
RESULT_SAME : WHITE ,
2023-01-17 18:12:08 +01:00
RESULT_FUZZY : LIGHT_CYAN ,
2023-01-15 17:41:58 +01:00
RESULT_NONE : LIGHT_YELLOW ,
RESULT_DIFFS : LIGHT_RED ,
RESULT_SIZES : LIGHT_RED ,
RESULT_MISSING_NEW : LIGHT_RED ,
RESULT_MISSING_OLD : LIGHT_YELLOW ,
}
TEXT_RESULT = {
RESULT_SAME : ' same ' ,
RESULT_FUZZY : ' fuzzy same ' ,
RESULT_NONE : ' neither works ' ,
RESULT_DIFFS : ' diffs ' ,
RESULT_SIZES : ' wrong sizes ' ,
RESULT_MISSING_NEW : ' missing new ' ,
RESULT_MISSING_OLD : ' missing old ' ,
}
def __init__ ( self ) :
try :
os . system ( ' color ' ) #win only?
except :
pass
def _print ( self , msg , color = None ) :
if color :
print ( " %s %s %s " % ( color , msg , self . RESET ) )
else :
print ( msg )
def result ( self , msg , code , fuzzy_diff , fuzzy_offset ) :
text = self . TEXT_RESULT . get ( code )
color = self . COLOR_RESULT . get ( code )
if not text :
text = code
msg = " %s : %s " % ( msg , text )
if fuzzy_diff != 0 :
msg + = " ( %s @0x %x ) " % ( fuzzy_diff , fuzzy_offset )
self . _print ( msg , color )
def info ( self , msg ) :
msg = " %s ( %s ) " % ( msg , self . _get_date ( ) )
self . _print ( msg , self . DARK_GRAY )
pass
def _get_date ( self ) :
return datetime . datetime . now ( ) . strftime ( " % Y- % m- %d % H: % M: % S " )
###############################################################################
class VrtsFiles :
def __init__ ( self , args ) :
self . _args = args
self . filenames = [ ]
def prepare ( self ) :
for fpattern in self . _args . files :
recursive = self . _args . recursive
if recursive :
fpattern = ' **/ ' + fpattern
files = glob . glob ( fpattern , recursive = recursive )
for file in files :
if not os . path . isfile ( file ) :
continue
# ignores non useful files
_ , ext = os . path . splitext ( file )
if ext . lower ( ) in IGNORED_EXTENSIONS :
continue
self . filenames . append ( file )
# same file N times
if self . _args . performance and self . _args . performance_repeat :
for i in range ( self . _args . performance_repeat ) :
self . filenames . append ( file )
# calling subprocess with python:
# - os.system(command)
# - not recommended by docs (less flexible and spawns a new process?)
# - subprocess.call
# - wait till complete and returns code
# - subprocess.check_call
# - wait till complete and raise CalledProcessError on nonzero return code
# - subprocess.check_output
# - call without wait, raise CalledProcessError on nonzero return code
# - subprocess.run
# - recommended but python 3.5+
# (check=True: raise exceptions like check_*, capture_output: return STDOUT/STDERR)
class VrtsProcess :
def call ( self , args , stdout = False ) :
try :
#with open(os.devnull, 'wb') as DEVNULL: #python2
# res = subprocess.check_call(args, stdout=DEVNULL, stderr=DEVNULL)
res = subprocess . run ( args , check = True , stdout = subprocess . DEVNULL , stderr = subprocess . DEVNULL ) #capture_output=stdout,
#print("result:", res.returncode)
#print("result:", res.strout, res.strerr)
if stdout :
return res . stdout
return True #exists and returns ok
except subprocess . CalledProcessError as e :
#print("call error: ", e) #, e.stderr: disable DEVNULL
return False #non-zero, exists but returns strerr (ex. ran with no args)
except FileNotFoundError as e :
#print("file error: ", e)
return None #doesn't exists/etc
class VrtsApp :
def __init__ ( self , args ) :
self . _args = args
self . _files = VrtsFiles ( args )
self . _prc = VrtsProcess ( )
self . _p = VrtsPrinter ( )
self . _cli_new = None
self . _cli_old = None
def _find_cli ( self , arg_cli , default_cli ) :
if arg_cli and os . path . isdir ( arg_cli ) :
cli = os . path . join ( arg_cli , default_cli )
elif arg_cli : #is file
cli = arg_cli
else :
cli = default_cli
args = [ cli ] #plain call to see if program is in PATH
res = self . _prc . call ( args )
if res is not None :
return cli
return None
# detects CLI location:
# - defaults to (cli) [new] + (cli)_old [old] assumed to be in PATH
# - can be passed a dir or file for old/new
# - old is optional in performance mode
def _detect_cli ( self ) :
cli = self . _find_cli ( self . _args . cli_new , DEFAULT_CLI_NEW )
if cli :
self . _cli_new = cli
cli = self . _find_cli ( self . _args . cli_old , DEFAULT_CLI_OLD )
if cli :
self . _cli_old = cli
if not self . _cli_new and ( self . _args . compare or self . _args . performance_new ) :
raise ValueError ( " new CLI not found " )
if not self . _cli_old and ( self . _args . compare or self . _args . performance_old ) :
raise ValueError ( " old CLI not found " )
def _get_performance_args ( self , cli ) :
args = [ cli , ' -O ' ] #flag to not write files
if self . _args . looping :
args . append ( ' -i ' )
args . extend ( self . _files . filenames )
return args
def _performance ( self ) :
flag_looping = ' '
if self . _args . looping :
flag_looping = ' -i '
# pases all files at once, as it's faster than 1 by 1 (that has to init program every time)
if self . _performance_new :
self . _p . info ( " testing new performance " )
ts_st = time . time ( )
args = self . _get_performance_args ( self . _cli_new )
res = self . _prc . call ( args )
ts_ed = time . time ( )
self . _p . info ( " done: elapsed %s s " % ( ts_ed - ts_st ) )
if self . _performance_old :
self . _p . info ( " testing old performance " )
ts_st = time . time ( )
args = self . _get_performance_args ( self . _cli_old )
res = self . _prc . call ( args )
ts_ed = time . time ( )
self . _p . info ( " done: elapsed %s s ( %s ) " % ( ts_ed - ts_st ) )
#if self._performance_both:
# ...
# returns max fuzzy count, except for non-fuzzable files (that use int math)
def _get_fuzzy_count ( self , stdout ) :
fuzzy = self . _args . fuzzy
if self . _args . fuzzy < = 0 :
return 0
if not stdout :
return fuzzy
try :
pos = stdout . index ( b ' encoding: ' )
codec_line = stdout [ 0 : ] . split ( ' \n ' , 1 ) [ 0 ]
for fuzzy_codec in FUZZY_CODECS :
if fuzzy_codec in codec_line :
return fuzzy
except Exception as e :
pass
return 0 #non-fuzable
def _get_compare_args ( self , cli , outwav , filename ) :
args = [ cli , ' -o ' , outwav ] #flag to not write files
if self . _args . looping :
args . append ( ' -i ' )
args . append ( filename )
return args
def _compare ( self ) :
ts_st = time . time ( )
self . _p . info ( " comparing files " )
flag_looping = ' '
if self . _args . looping :
flag_looping = ' -i '
total_ok = 0
total_ko = 0
for filename in self . _files . filenames :
filename_newwav = filename + " .new.wav "
filename_oldwav = filename + " .old.wav "
# main decode (ignores errors, comparator already checks them)
args = self . _get_compare_args ( self . _cli_new , filename_newwav , filename )
stdout = self . _prc . call ( args , stdout = True )
args = self . _get_compare_args ( self . _cli_old , filename_oldwav , filename )
self . _prc . call ( args , stdout = False )
# test results
fuzzy = self . _get_fuzzy_count ( stdout )
cmp = VrtsComparator ( filename_newwav , filename_oldwav , fuzzy )
code = cmp . compare ( )
self . _p . result ( filename , code , cmp . fuzzy_diff , cmp . fuzzy_offset )
if code < 0 :
total_ko + = 1
else :
total_ok + = 1
# post cleanup
if not self . _args . no_delete :
try :
os . remove ( filename_newwav )
except :
pass
try :
os . remove ( filename_oldwav )
except :
pass
ts_ed = time . time ( )
self . _p . info ( " done: ok= %s , ko= %s , elapsed %s s " % ( total_ok , total_ko , ts_ed - ts_st ) )
def start ( self ) :
self . _detect_cli ( )
self . _files . prepare ( )
if self . _args . performance :
self . _performance ( )
else :
self . _compare ( )
def main ( ) :
args = parse_args ( )
if not args :
return
try :
VrtsApp ( args ) . start ( )
except ValueError as e :
print ( e )
if __name__ == " __main__ " :
main ( )