You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
wasm3/test/run-spec-test.py

582 lines
17 KiB
Python

5 years ago
#!/usr/bin/env python3
# Author: Volodymyr Shymanskyy
# Usage:
# ./run-spec-test.py
# ./run-spec-test.py ./core/i32.json
# ./run-spec-test.py ./core/float_exprs.json --line 2070
# ./run-spec-test.py --exec ../build-custom/wasm3
# ./run-spec-test.py --engine "wasmer run" --exec ../build-wasi/wasm3.wasm
# ./run-spec-test.py --engine "wasmer run --backend=llvm" --exec ../build-wasi/wasm3.wasm
#
# TODO
# - Get more tests from: https://github.com/microsoft/ChakraCore/tree/master/test/WasmSpec
# - Fix "Empty Stack" check
# - Check Canonical NaN and Arithmetic NaN separately
# - Fix names.wast
5 years ago
import argparse
5 years ago
import os, sys, glob, time
import subprocess
5 years ago
import json
import re
import struct
import math
5 years ago
from pprint import pprint
#
# Args handling
#
parser = argparse.ArgumentParser()
parser.add_argument("--exec", metavar="<interpreter>", default="../build/wasm3")
parser.add_argument("--engine", metavar="<engine>")
parser.add_argument("--line", metavar="<source line>", type=int)
parser.add_argument("--all", action="store_true")
parser.add_argument("--show-logs", action="store_true")
parser.add_argument("--skip-crashes", action="store_true")
parser.add_argument("--format", choices=["raw", "hex", "fp"], default="fp")
#parser.add_argument("--wasm-opt", metavar="<opt flags>")
parser.add_argument("-v", "--verbose", action="store_true")
parser.add_argument("-s", "--silent", action="store_true")
parser.add_argument("file", nargs='*')
args = parser.parse_args()
if args.line:
args.show_logs = True
5 years ago
#
# Utilities
#
log = open("spec-test.log","w+")
log.write("======================\n")
5 years ago
class ansi:
ENDC = '\033[0m'
HEADER = '\033[94m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
5 years ago
class dotdict(dict):
def __init__(self, *args, **kwargs):
super(dotdict, self).__init__(*args, **kwargs)
for arg in args:
if isinstance(arg, dict):
for k, v in arg.items():
self[k] = v
if kwargs:
for k, v in kwargs.items():
self[k] = v
__getattr__ = dict.get
__setattr__ = dict.__setitem__
__delattr__ = dict.__delitem__
5 years ago
def warning(msg):
log.write("Warning: " + msg + "\n")
log.flush()
5 years ago
print(f"{ansi.WARNING}Warning:{ansi.ENDC} {msg}")
def fatal(msg):
log.write("Fatal: " + msg + "\n")
log.flush()
print(f"{ansi.FAIL}Fatal:{ansi.ENDC} {msg}")
sys.exit(1)
5 years ago
def filename(p):
_, fn = os.path.split(p)
return fn
def binaryToFloat(num, t):
if t == "f32":
return struct.unpack('!f', struct.pack('!L', int(num)))[0]
elif t == "f64":
return struct.unpack('!d', struct.pack('!Q', int(num)))[0]
else:
fatal(f"Unknown type '{t}'")
#
# Value format options
#
def formatValueRaw(num, t):
return str(num)
def formatValueHex(num, t):
if t == "f32" or t == "i32":
return "{0:#0{1}x}".format(int(num), 8+2)
elif t == "f64" or t == "i64":
return "{0:#0{1}x}".format(int(num), 16+2)
else:
return str(num)
def formatValueFloat(num, t):
if t == "f32":
s = 6
elif t == "f64":
s = 10
else:
return str(num)
result = "{0:.{1}f}".format(binaryToFloat(num, t), s).rstrip('0')
if result.endswith('.'): result = result + '0'
if len(result) > s*2:
result = "{0:.{1}e}".format(binaryToFloat(num, t), s)
return result
formaters = {
'raw': formatValueRaw,
'hex': formatValueHex,
'fp': formatValueFloat,
}
formatValue = formaters[args.format]
if args.format == "fp":
warning("When using fp display format, values are compared loosely (some tests may produce false positives)")
5 years ago
#
# Spec tests preparation
#
5 years ago
def specTestsFetch():
from io import BytesIO
from zipfile import ZipFile
from urllib.request import urlopen
officialSpec = "https://github.com/wasm3/wasm-core-testsuite/archive/master.zip"
5 years ago
print(f"Downloading {officialSpec}")
resp = urlopen(officialSpec)
with ZipFile(BytesIO(resp.read())) as zipFile:
for zipInfo in zipFile.infolist():
if re.match(r".*-master/core/.*", zipInfo.filename):
zipInfo.filename = "core/" + filename(zipInfo.filename)
5 years ago
zipFile.extract(zipInfo)
#
# Wasm3 REPL
#
from subprocess import Popen, STDOUT, PIPE
from threading import Thread
from queue import Queue, Empty
import shlex
def get_engine_cmd(engine, exe, wasm):
if engine:
cmd = shlex.split(engine)
if "wasirun" in engine or "wasm3" in engine:
return cmd + [exe, "--repl", wasm]
elif "wasmer" in engine:
return cmd + ["--dir=.", exe, "--", "--repl", wasm]
elif "wasmtime" in engine:
return cmd + ["--dir=.", exe, "--", "--repl", wasm]
elif "iwasm" in engine:
return cmd + ["--dir=.", exe, "--repl", wasm]
elif "wavm" in engine:
return cmd + ["--mount-root", ".", exe, "--repl", "/" + wasm]
else:
fatal(f"Don't know how to run engine {engine}")
else:
if exe.endswith(".wasm"):
fatal(f"Need engine to execute wasm")
return shlex.split(exe) + ["--repl", wasm]
class Wasm3():
def __init__(self, exe, engine=None):
self.exe = exe
self.engine = engine
self.p = None
self.timeout = 15.0
5 years ago
def load(self, fn):
if self.p:
self.terminate()
5 years ago
self.loaded = fn
self.p = Popen(
get_engine_cmd(self.engine, self.exe, fn),
bufsize=0, stdin=PIPE, stdout=PIPE, stderr=STDOUT
)
def _read_output(out, queue):
for data in iter(lambda: out.read(1024), b''):
queue.put(data)
queue.put(None)
self.q = Queue()
self.t = Thread(target=_read_output, args=(self.p.stdout, self.q))
self.t.daemon = True
self.t.start()
try:
output = self._read_until("wasm3> ", False)
except Exception:
pass
def invoke(self, cmd):
cmd = " ".join(map(str, cmd)) + "\n"
self._flush_input()
self._write(cmd)
5 years ago
return self._read_until("\nwasm3> ")
def _read_until(self, token, autorestart=True):
buff = ""
5 years ago
tout = time.time() + self.timeout
error = None
while time.time() < tout:
try:
5 years ago
data = self.q.get(timeout=0.1)
if data == None:
5 years ago
error = "Crashed"
break
buff = buff + data.decode("utf-8")
5 years ago
idx = buff.rfind(token)
if idx >= 0:
return buff[0:idx]
except Empty:
pass
5 years ago
else:
error = "Timeout"
# Crash => restart
if autorestart:
self.load(self.loaded)
5 years ago
raise Exception(error)
def _write(self, data):
if not self._is_running():
self.load(self.loaded)
#raise Exception("Not running")
self.p.stdin.write(data.encode("utf-8"))
self.p.stdin.flush()
def _is_running(self):
return self.p and (self.p.poll() == None)
def _flush_input(self):
while not self.q.empty():
self.q.get()
def terminate(self):
self.p.stdin.close()
self.p.terminate()
self.p.wait(timeout=1.0)
self.p = None
#
# Blacklist
#
import fnmatch
class Blacklist():
def __init__(self, patterns):
patterns = map(fnmatch.translate, patterns)
final = '|'.join(patterns)
self._regex = re.compile(final)
def __contains__(self, item):
return self._regex.match(item) != None
5 years ago
#
# Actual test
#
curDir = os.path.dirname(os.path.abspath(sys.argv[0]))
coreDir = os.path.join(curDir, "core")
wasm3 = Wasm3(args.exec, args.engine)
blacklist = Blacklist([
5 years ago
"float_exprs.wast:* f32.nonarithmetic_nan_bitpattern*",
"*.wast:* *.wasm print32*",
"*.wast:* *.wasm print64*",
"names.wast:*",
])
5 years ago
stats = dotdict(total_run=0, skipped=0, failed=0, crashed=0, timeout=0, success=0, missing=0)
5 years ago
# Convert some trap names from the original spec
trapmap = {
"unreachable": "unreachable executed"
}
5 years ago
def runInvoke(test):
test.cmd = [test.action.field]
displayArgs = []
5 years ago
for arg in test.action.args:
test.cmd.append(arg['value'])
displayArgs.append(formatValue(arg['value'], arg['type']))
5 years ago
5 years ago
test_id = f"{test.source} {test.wasm} {test.cmd[0]}({', '.join(test.cmd[1:])})"
if test_id in blacklist:
warning(f"Skipping {test_id} (blacklisted)")
stats.skipped += 1
return
if args.verbose:
print(f"Running {test_id}")
stats.total_run += 1
5 years ago
output = ""
actual = None
actual_val = None
5 years ago
force_fail = False
try:
output = wasm3.invoke(test.cmd).strip()
except Exception as e:
actual = f"<{e}>"
5 years ago
force_fail = True
# Parse the actual output
if not actual:
result = re.findall(r'Result: (.*?)$', "\n" + output + "\n", re.MULTILINE)
if len(result) > 0:
actual = "result " + result[-1]
actual_val = result[0]
if not actual:
result = re.findall(r'Error: \[trap\] (.*?) \(', "\n" + output + "\n", re.MULTILINE)
if len(result) > 0:
actual = "trap " + result[-1]
if not actual:
result = re.findall(r'Error: (.*?)$', "\n" + output + "\n", re.MULTILINE)
if len(result) > 0:
actual = "error " + result[-1]
if not actual:
actual = "<No Result>"
5 years ago
force_fail = True
if actual == "error no operation ()":
actual = "<Not Implemented>"
stats.missing += 1
5 years ago
force_fail = True
5 years ago
elif actual == "<Crashed>":
stats.crashed += 1
5 years ago
force_fail = True
5 years ago
elif actual == "<Timeout>":
stats.timeout += 1
5 years ago
force_fail = True
# Prepare the expected result
expect = None
if "expected" in test:
if len(test.expected) == 0:
expect = "result <Empty Stack>"
elif len(test.expected) == 1:
t = test.expected[0]['type']
value = str(test.expected[0]['value'])
expect = "result " + value
if actual_val != None:
if (t == "f32" or t == "f64") and (value == "<Canonical NaN>" or value == "<Arithmetic NaN>"):
val = binaryToFloat(actual_val, t)
#warning(f"{actual_val} => {val}")
if math.isnan(val):
actual = "<Some NaN>"
expect = "<Some NaN>"
else:
expect = "result " + formatValue(value, t)
actual = "result " + formatValue(actual_val, t)
else:
warning(f"Test {test.source} specifies multiple results")
expect = "result <Multiple>"
elif "expected_trap" in test:
if test.expected_trap in trapmap:
test.expected_trap = trapmap[test.expected_trap]
expect = "trap " + str(test.expected_trap)
elif "expected_anything" in test:
expect = "<Anything>"
else:
expect = "<Unknown>"
5 years ago
def showTestResult():
print(" ----------------------")
print(f"Test: {ansi.HEADER}{test.source}{ansi.ENDC} -> {' '.join(test.cmd)}")
print(f"Args: {', '.join(displayArgs)}")
print(f"Expected: {ansi.OKGREEN}{expect}{ansi.ENDC}")
print(f"Actual: {ansi.WARNING}{actual}{ansi.ENDC}")
if args.show_logs and len(output):
print(f"Log:")
print(output)
5 years ago
log.write(f"{test.source}\t|\t{test.wasm} {test.action.field}({', '.join(displayArgs)})\t=>\t\t")
5 years ago
if actual == expect or (expect == "<Anything>" and not force_fail):
stats.success += 1
log.write(f"OK: {actual}\n")
if args.line:
showTestResult()
else:
stats.failed += 1
log.write(f"FAIL: {actual}, should be: {expect}\n")
if args.silent: return
if args.skip_crashes and actual == "<Crashed>": return
showTestResult()
#sys.exit(1)
5 years ago
if not os.path.isdir(coreDir):
specTestsFetch()
5 years ago
# Currently default to running the predefined list of tests
# TODO: Switch to running all tests when wasm spec is implemented
if args.file:
jsonFiles = args.file
elif args.all:
jsonFiles = glob.glob(os.path.join(coreDir, "*.json"))
5 years ago
jsonFiles = list(map(lambda x: os.path.relpath(x, curDir), jsonFiles))
jsonFiles.sort()
else:
5 years ago
jsonFiles = list(map(lambda x: f"core/{x}.json", [
"get_local", "set_local", "tee_local",
"globals",
"int_literals",
"i32", "i64",
"int_exprs",
"float_literals",
"f32", "f32_cmp", "f32_bitwise",
"f64", "f64_cmp", "f64_bitwise",
"float_misc",
"select",
"conversions",
"stack", "fac",
"call", "call_indirect",
"left-to-right",
"break-drop",
"forward",
"func_ptrs",
"address", "align", "endianness",
"memory_redundancy", "float_memory",
"memory", "memory_trap", "memory_grow",
5 years ago
"unreachable",
"switch", "if", "br", "br_if", "br_table", "loop", "block",
"return", "nop", "start", "unwind", "labels"
#--- TODO ---
#"float_exprs",
]))
5 years ago
for fn in jsonFiles:
with open(fn) as f:
data = json.load(f)
wast_source = filename(data["source_filename"])
5 years ago
wast_module = ""
5 years ago
print(f"Running {fn}")
5 years ago
for cmd in data["commands"]:
test = dotdict()
test.line = int(cmd["line"])
test.source = wast_source + ":" + str(test.line)
5 years ago
test.wasm = wast_module
5 years ago
test.type = cmd["type"]
if test.type == "module":
5 years ago
wast_module = cmd["filename"]
if args.verbose:
print(f"Loading {wast_module}")
try:
fn = os.path.relpath(os.path.join(coreDir, wast_module), curDir)
wasm3.load(fn)
except Exception as e:
fatal(str(e))
5 years ago
elif ( test.type == "action" or
5 years ago
test.type == "assert_return" or
test.type == "assert_trap" or
test.type == "assert_exhaustion" or
test.type == "assert_return_canonical_nan" or
test.type == "assert_return_arithmetic_nan"):
if args.line and test.line != args.line:
continue
if test.type == "action":
test.expected_anything = True
elif test.type == "assert_return":
5 years ago
test.expected = cmd["expected"]
elif test.type == "assert_return_canonical_nan":
test.expected = cmd["expected"]
test.expected[0]["value"] = "<Canonical NaN>"
elif test.type == "assert_return_arithmetic_nan":
test.expected = cmd["expected"]
test.expected[0]["value"] = "<Arithmetic NaN>"
elif test.type == "assert_trap":
5 years ago
test.expected_trap = cmd["text"]
elif test.type == "assert_exhaustion":
test.expected_trap = "stack overflow"
else:
stats.skipped += 1
warning(f"Skipped {test.source} ({test.type} not implemented)")
continue
5 years ago
test.action = dotdict(cmd["action"])
if test.action.type == "invoke":
5 years ago
# TODO: invoking in modules not implemented
if test.action.module:
stats.skipped += 1
warning(f"Skipped {test.source} (invoke in module)")
continue
5 years ago
runInvoke(test)
else:
warning(f"Unknown action type '{test.action.type}'")
5 years ago
elif ( test.type == "register" or
test.type == "assert_invalid" or
5 years ago
test.type == "assert_malformed" or
test.type == "assert_unlinkable" or
test.type == "assert_uninstantiable"):
stats.skipped += 1
#warning(f"Skipped {test.source} ({test.type} not implemented)")
5 years ago
else:
fatal(f"Unknown command '{test}'")
5 years ago
if (stats.failed + stats.success) != stats.total_run:
warning("Statistics summary invalid")
5 years ago
pprint(stats)
5 years ago
if stats.failed > 0:
failed = (stats.failed*100)/stats.total_run
5 years ago
print(f"{ansi.FAIL}=======================")
print(f" FAILED: {failed:.2f}%")
if stats.crashed > 0:
print(f" Crashed: {stats.crashed}")
5 years ago
print(f"======================={ansi.ENDC}")
sys.exit(1)
elif stats.success > 0:
5 years ago
print(f"{ansi.OKGREEN}=======================")
print(f" {stats.success}/{stats.total_run} tests OK")
5 years ago
if stats.skipped > 0:
print(f"{ansi.WARNING} ({stats.skipped} tests skipped){ansi.OKGREEN}")
5 years ago
print(f"======================={ansi.ENDC}")