You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
wasm3/test/run-spec-test.py

420 lines
12 KiB
Python

5 years ago
#!/usr/bin/env python3
# Author: Volodymyr Shymanskyy
# Usage:
# ./run-spec-test.py
# ./run-spec-test.py ./core/i32.json
# ./run-spec-test.py ./core/float_exprs.json --line 2070
# ./run-spec-test.py --exec ../custom_build/wasm3
#
# TODO Get more tests from:
# https://github.com/microsoft/ChakraCore/tree/master/test/WasmSpec
5 years ago
import argparse
import os
import os.path
import subprocess
import glob
import sys
import json
import re
import struct
import math
5 years ago
from pprint import pprint
#
# Args handling
#
parser = argparse.ArgumentParser()
parser.add_argument("--exec", metavar="<interpreter>", default="../build/wasm3")
parser.add_argument("--line", metavar="<source line>", type=int)
parser.add_argument("--all", action="store_true")
parser.add_argument("--show-logs", action="store_true")
parser.add_argument("--skip-crashes", action="store_true")
parser.add_argument("--format", choices=["raw", "hex", "fp"], default="fp")
parser.add_argument("-v", "--verbose", action="store_true")
parser.add_argument("-s", "--silent", action="store_true")
parser.add_argument("file", nargs='*')
args = parser.parse_args()
if args.line:
args.show_logs = True
5 years ago
#
# Utilities
#
log = open("spec-test.log","w+")
log.write("======================\n")
5 years ago
class ansi:
ENDC = '\033[0m'
HEADER = '\033[94m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
5 years ago
class dotdict(dict):
def __init__(self, *args, **kwargs):
super(dotdict, self).__init__(*args, **kwargs)
for arg in args:
if isinstance(arg, dict):
for k, v in arg.items():
self[k] = v
if kwargs:
for k, v in kwargs.items():
self[k] = v
__getattr__ = dict.get
__setattr__ = dict.__setitem__
__delattr__ = dict.__delitem__
5 years ago
def warning(msg):
log.write("Warning: " + msg + "\n")
5 years ago
print(f"{ansi.WARNING}Warning:{ansi.ENDC} {msg}")
5 years ago
def run(cmd):
return subprocess.check_output(cmd, shell=True)
def filename(p):
_, fn = os.path.split(p)
return fn
def binaryToFloat(num, t):
if t == "f32":
return struct.unpack('!f', struct.pack('!L', int(num)))[0]
elif t == "f64":
return struct.unpack('!d', struct.pack('!Q', int(num)))[0]
else:
raise(Exception(f"Unknown type: {t}"))
#
# Value format options
#
def formatValueRaw(num, t):
return str(num)
def formatValueHex(num, t):
if t == "f32" or t == "i32":
return "{0:#0{1}x}".format(int(num), 8+2)
elif t == "f64" or t == "i64":
return "{0:#0{1}x}".format(int(num), 16+2)
else:
return str(num)
def formatValueFloat(num, t):
if t == "f32":
s = 6
elif t == "f64":
s = 10
else:
return str(num)
result = "{0:.{1}f}".format(binaryToFloat(num, t), s).rstrip('0')
if result.endswith('.'): result = result + '0'
if len(result) > s*2:
result = "{0:.{1}e}".format(binaryToFloat(num, t), s)
return result
formaters = {
'raw': formatValueRaw,
'hex': formatValueHex,
'fp': formatValueFloat,
}
formatValue = formaters[args.format]
if args.format == "fp":
warning("When using fp display format, values are compared loosely (some tests may produce false positives)")
5 years ago
#
# Spec tests preparation
#
5 years ago
def specTestsFetch():
from io import BytesIO
from zipfile import ZipFile
from urllib.request import urlopen
officialSpec = "https://github.com/WebAssembly/spec/archive/wg_draft2.zip"
print(f"Downloading {officialSpec}")
resp = urlopen(officialSpec)
with ZipFile(BytesIO(resp.read())) as zipFile:
for zipInfo in zipFile.infolist():
if re.match(r"spec-wg_draft2/test/core/.*", zipInfo.filename):
zipInfo.filename = specDir + filename(zipInfo.filename)
zipFile.extract(zipInfo)
def specTestsPreprocess():
print("Preprocessing spec files...")
inputFiles = glob.glob(os.path.join(specDir, "*.wast"))
inputFiles.sort()
for fn in inputFiles:
fn = os.path.basename(fn)
wast_fn = os.path.join(specDir, fn)
json_fn = os.path.join(coreDir, os.path.splitext(fn)[0]) + ".json"
run(f"wast2json --debug-names -o {json_fn} {wast_fn}")
5 years ago
#
# Actual test
#
curDir = os.path.dirname(os.path.abspath(sys.argv[0]))
coreDir = os.path.join(curDir, "core")
specDir = "core/spec/"
stats = dotdict(total_run=0, skipped=0, failed=0, crashed=0, success=0, missing=0)
5 years ago
5 years ago
def runInvoke(test):
wasm = os.path.relpath(os.path.join(coreDir, test.module), curDir)
5 years ago
cmd = [args.exec, wasm, test.action.field]
displayArgs = []
5 years ago
for arg in test.action.args:
cmd.append(arg['value'])
displayArgs.append(formatValue(arg['value'], arg['type']))
5 years ago
if args.verbose:
print(f"Running {' '.join(cmd)}")
try:
wasm3 = subprocess.run(cmd, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
except ValueError:
stats.skipped += 1
return
5 years ago
output = (wasm3.stdout + wasm3.stderr).strip()
# Parse the actual output
actual = None
actual_val = None
5 years ago
if len(output) == 0 or wasm3.returncode < 0:
5 years ago
stats.crashed += 1
actual = "<Crashed>"
if not actual:
5 years ago
result = re.findall(r'^Result: (.*?)$', "\n" + output + "\n", re.MULTILINE)
if len(result) > 0:
actual = "result " + result[-1]
actual_val = result[0]
if not actual:
5 years ago
result = re.findall(r'^Error: \[trap\] (.*?) \(', "\n" + output + "\n", re.MULTILINE)
if len(result) > 0:
actual = "trap " + result[-1]
if not actual:
result = re.findall(r'^Error: (.*?)$', "\n" + output + "\n", re.MULTILINE)
if len(result) > 0:
actual = "error " + result[-1]
if not actual:
actual = "<No Result>"
if actual == "error no operation ()":
actual = "<Not Implemented>"
stats.missing += 1
# Prepare the expected result
expect = None
if "expected" in test:
if len(test.expected) == 0:
expect = "result <Empty Stack>"
elif len(test.expected) == 1:
t = test.expected[0]['type']
value = str(test.expected[0]['value'])
expect = "result " + value
if actual_val != None:
if (t == "f32" or t == "f64") and (value == "<Canonical NaN>" or value == "<Arithmetic NaN>"):
val = binaryToFloat(actual_val, t)
#warning(f"{actual_val} => {val}")
if math.isnan(val):
actual = "<Some NaN>"
expect = "<Some NaN>"
else:
expect = "result " + formatValue(value, t)
actual = "result " + formatValue(actual_val, t)
else:
warning(f"Test {test.source} specifies multiple results")
expect = "result <Multiple>"
elif "expected_trap" in test:
expect = "trap " + str(test.expected_trap)
else:
expect = "<Unknown>"
5 years ago
def showTestResult():
print(" ----------------------")
print(f"Test: {ansi.HEADER}{test.source}{ansi.ENDC} -> {' '.join(cmd)}")
print(f"Args: {', '.join(displayArgs)}")
#print(f"RetCode: {wasm3.returncode}")
print(f"Expected: {ansi.OKGREEN}{expect}{ansi.ENDC}")
print(f"Actual: {ansi.WARNING}{actual}{ansi.ENDC}")
if args.show_logs and len(output):
print(f"Log:")
print(output)
log.write(f"{test.source}\t|\t{filename(wasm)} {test.action.field}({', '.join(displayArgs)})\t=>\t\t")
if actual == expect:
stats.success += 1
log.write(f"OK: {actual}\n")
if args.line:
showTestResult()
else:
stats.failed += 1
log.write(f"FAIL: {actual}, should be: {expect}\n")
if args.silent: return
if args.skip_crashes and actual == "<Crashed>": return
showTestResult()
#sys.exit(1)
5 years ago
if not os.path.isdir(coreDir):
if not os.path.isdir(specDir):
specTestsFetch()
specTestsPreprocess()
# Currently default to running the predefined list of tests
# TODO: Switch to running all tests when wasm spec is implemented
if args.file:
jsonFiles = args.file
elif args.all:
jsonFiles = glob.glob(os.path.join(coreDir, "*.json"))
jsonFiles.sort()
else:
jsonFiles = list(map(lambda x : f"./core/{x}.json", [
#--- Complete ---
"i32", "i64",
"int_exprs",
"f32", "f32_cmp", "f32_bitwise",
"f64", "f64_cmp", "f64_bitwise",
"float_misc",
"conversions",
"stack", "fac",
"call", "call_indirect",
"left-to-right",
"break-drop",
"forward",
"func_ptrs",
"endianness",
"int_literals",
#--- Almost ready ---
#"memory_trap", "address", -> init memory size + track memory bounds
#"float_memory",
#"memory_redundancy", "memory_grow",
#--- TODO ---
#"get_local", "set_local", "tee_local",
#"if", "loop", "labels", "block", "br", "br_if", "br_table", "return",
#"nop", "unreachable",
#"align", "memory",
#"float_literals",
#"globals",
#"func",
#"float_exprs",
#"elem",
#"switch",
]))
5 years ago
for fn in jsonFiles:
with open(fn) as f:
data = json.load(f)
wast_source = filename(data["source_filename"])
wast_module = ""
if wast_source in ["linking.wast", "exports.wast", "names.wast"]:
count = len(data["commands"])
stats.skipped += count
warning(f"Skipped {wast_source} ({count} tests)")
5 years ago
continue
print(f"Running {fn}")
5 years ago
for cmd in data["commands"]:
test = dotdict()
test.line = int(cmd["line"])
test.source = wast_source + ":" + str(test.line)
5 years ago
test.module = wast_module
test.type = cmd["type"]
if test.type == "module":
wast_module = cmd["filename"]
elif ( test.type == "action" or
5 years ago
test.type == "assert_return" or
test.type == "assert_trap" or
test.type == "assert_exhaustion" or
test.type == "assert_return_canonical_nan" or
test.type == "assert_return_arithmetic_nan"):
if args.line and test.line != args.line:
continue
if args.verbose:
print(f"Checking {test.source}")
5 years ago
if test.type == "assert_return":
test.expected = cmd["expected"]
elif test.type == "assert_return_canonical_nan":
test.expected = cmd["expected"]
test.expected[0]["value"] = "<Canonical NaN>"
elif test.type == "assert_return_arithmetic_nan":
test.expected = cmd["expected"]
test.expected[0]["value"] = "<Arithmetic NaN>"
elif test.type == "assert_trap":
5 years ago
test.expected_trap = cmd["text"]
else:
stats.skipped += 1
warning(f"Skipped {test.source} {test.type}")
continue
5 years ago
test.action = dotdict(cmd["action"])
if test.action.type == "invoke":
stats.total_run += 1
5 years ago
runInvoke(test)
else:
stats.skipped += 1
warning(f"Unknown action: {test.action}")
continue
5 years ago
elif ( test.type == "register" or
test.type == "assert_invalid" or
5 years ago
test.type == "assert_malformed" or
test.type == "assert_unlinkable" or
test.type == "assert_uninstantiable"):
stats.skipped += 1
else:
raise(Exception(f"Unknown command: {test}"))
5 years ago
if (stats.failed + stats.success) != stats.total_run:
warning("Statistics summary invalid")
5 years ago
pprint(stats)
5 years ago
if stats.failed > 0:
failed = (stats.failed*100)/stats.total_run
5 years ago
print(f"{ansi.FAIL}=======================")
print(f" FAILED: {failed:.2f}%")
if stats.crashed > 0:
print(f" Crashed: {stats.crashed}")
5 years ago
print(f"======================={ansi.ENDC}")
elif stats.success > 0:
5 years ago
print(f"{ansi.OKGREEN}=======================")
print(f" {stats.success}/{stats.total_run} tests OK")
5 years ago
if stats.skipped > 0:
print(f"{ansi.WARNING} ({stats.skipped} tests skipped){ansi.OKGREEN}")
5 years ago
print(f"======================={ansi.ENDC}")