You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
wasm3/test/run-spec-test.py

430 lines
13 KiB
Python

#!/usr/bin/env python3
# Author: Volodymyr Shymanskyy
# Usage:
# ./run-spec-test.py
# ./run-spec-test.py ./core/i32.json
# ./run-spec-test.py ./core/float_exprs.json --line 2070
# ./run-spec-test.py --exec ../custom_build/wasm3
#
# TODO
# - Get more tests from: https://github.com/microsoft/ChakraCore/tree/master/test/WasmSpec
# - Fix "Empty Stack" check
# - Check Canonical NaN and Arithmetic NaN separately
import argparse
import os
import os.path
import subprocess
import glob
import sys
import json
import re
import struct
import math
from pprint import pprint
#
# Args handling
#
parser = argparse.ArgumentParser()
parser.add_argument("--exec", metavar="<interpreter>", default="../build/wasm3")
parser.add_argument("--line", metavar="<source line>", type=int)
parser.add_argument("--all", action="store_true")
parser.add_argument("--show-logs", action="store_true")
parser.add_argument("--skip-crashes", action="store_true")
parser.add_argument("--format", choices=["raw", "hex", "fp"], default="fp")
parser.add_argument("-v", "--verbose", action="store_true")
parser.add_argument("-s", "--silent", action="store_true")
parser.add_argument("file", nargs='*')
args = parser.parse_args()
if args.line:
args.show_logs = True
#
# Utilities
#
log = open("spec-test.log","w+")
log.write("======================\n")
class ansi:
ENDC = '\033[0m'
HEADER = '\033[94m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
class dotdict(dict):
def __init__(self, *args, **kwargs):
super(dotdict, self).__init__(*args, **kwargs)
for arg in args:
if isinstance(arg, dict):
for k, v in arg.items():
self[k] = v
if kwargs:
for k, v in kwargs.items():
self[k] = v
__getattr__ = dict.get
__setattr__ = dict.__setitem__
__delattr__ = dict.__delitem__
def warning(msg):
log.write("Warning: " + msg + "\n")
print(f"{ansi.WARNING}Warning:{ansi.ENDC} {msg}")
def run(cmd):
return subprocess.check_output(cmd, shell=True)
def filename(p):
_, fn = os.path.split(p)
return fn
def binaryToFloat(num, t):
if t == "f32":
return struct.unpack('!f', struct.pack('!L', int(num)))[0]
elif t == "f64":
return struct.unpack('!d', struct.pack('!Q', int(num)))[0]
else:
raise(Exception(f"Unknown type: {t}"))
#
# Value format options
#
def formatValueRaw(num, t):
return str(num)
def formatValueHex(num, t):
if t == "f32" or t == "i32":
return "{0:#0{1}x}".format(int(num), 8+2)
elif t == "f64" or t == "i64":
return "{0:#0{1}x}".format(int(num), 16+2)
else:
return str(num)
def formatValueFloat(num, t):
if t == "f32":
s = 6
elif t == "f64":
s = 10
else:
return str(num)
result = "{0:.{1}f}".format(binaryToFloat(num, t), s).rstrip('0')
if result.endswith('.'): result = result + '0'
if len(result) > s*2:
result = "{0:.{1}e}".format(binaryToFloat(num, t), s)
return result
formaters = {
'raw': formatValueRaw,
'hex': formatValueHex,
'fp': formatValueFloat,
}
formatValue = formaters[args.format]
if args.format == "fp":
warning("When using fp display format, values are compared loosely (some tests may produce false positives)")
#
# Spec tests preparation
#
def specTestsFetch():
from io import BytesIO
from zipfile import ZipFile
from urllib.request import urlopen
officialSpec = "https://github.com/WebAssembly/spec/archive/wg_draft2.zip"
print(f"Downloading {officialSpec}")
resp = urlopen(officialSpec)
with ZipFile(BytesIO(resp.read())) as zipFile:
for zipInfo in zipFile.infolist():
if re.match(r"spec-wg_draft2/test/core/.*", zipInfo.filename):
zipInfo.filename = specDir + filename(zipInfo.filename)
zipFile.extract(zipInfo)
def specTestsPreprocess():
print("Preprocessing spec files...")
inputFiles = glob.glob(os.path.join(specDir, "*.wast"))
inputFiles.sort()
for fn in inputFiles:
fn = os.path.basename(fn)
wast_fn = os.path.join(specDir, fn)
json_fn = os.path.join(coreDir, os.path.splitext(fn)[0]) + ".json"
run(f"wast2json --debug-names -o {json_fn} {wast_fn}")
#
# Actual test
#
curDir = os.path.dirname(os.path.abspath(sys.argv[0]))
coreDir = os.path.join(curDir, "core")
specDir = "core/spec/"
stats = dotdict(total_run=0, skipped=0, failed=0, crashed=0, success=0, missing=0)
# Convert some trap names from the original spec
trapmap = {
"unreachable": "unreachable executed"
}
def runInvoke(test):
wasm = os.path.relpath(os.path.join(coreDir, test.module), curDir)
cmd = [args.exec, wasm, test.action.field]
displayArgs = []
for arg in test.action.args:
cmd.append(arg['value'])
displayArgs.append(formatValue(arg['value'], arg['type']))
if args.verbose:
print(f"Running {' '.join(cmd)}")
try:
wasm3 = subprocess.run(cmd, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
except ValueError:
stats.skipped += 1
return
output = (wasm3.stdout + wasm3.stderr).strip()
# Parse the actual output
actual = None
actual_val = None
if len(output) == 0 or wasm3.returncode < 0:
stats.crashed += 1
actual = "<Crashed>"
if not actual:
result = re.findall(r'^Result: (.*?)$', "\n" + output + "\n", re.MULTILINE)
if len(result) > 0:
actual = "result " + result[-1]
actual_val = result[0]
if not actual:
result = re.findall(r'^Error: \[trap\] (.*?) \(', "\n" + output + "\n", re.MULTILINE)
if len(result) > 0:
actual = "trap " + result[-1]
if not actual:
result = re.findall(r'^Error: (.*?)$', "\n" + output + "\n", re.MULTILINE)
if len(result) > 0:
actual = "error " + result[-1]
if not actual:
actual = "<No Result>"
if actual == "error no operation ()":
actual = "<Not Implemented>"
stats.missing += 1
# Prepare the expected result
expect = None
if "expected" in test:
if len(test.expected) == 0:
expect = "result <Empty Stack>"
elif len(test.expected) == 1:
t = test.expected[0]['type']
value = str(test.expected[0]['value'])
expect = "result " + value
if actual_val != None:
if (t == "f32" or t == "f64") and (value == "<Canonical NaN>" or value == "<Arithmetic NaN>"):
val = binaryToFloat(actual_val, t)
#warning(f"{actual_val} => {val}")
if math.isnan(val):
actual = "<Some NaN>"
expect = "<Some NaN>"
else:
expect = "result " + formatValue(value, t)
actual = "result " + formatValue(actual_val, t)
else:
warning(f"Test {test.source} specifies multiple results")
expect = "result <Multiple>"
elif "expected_trap" in test:
if test.expected_trap in trapmap:
test.expected_trap = trapmap[test.expected_trap]
expect = "trap " + str(test.expected_trap)
else:
expect = "<Unknown>"
def showTestResult():
print(" ----------------------")
print(f"Test: {ansi.HEADER}{test.source}{ansi.ENDC} -> {' '.join(cmd)}")
print(f"Args: {', '.join(displayArgs)}")
#print(f"RetCode: {wasm3.returncode}")
print(f"Expected: {ansi.OKGREEN}{expect}{ansi.ENDC}")
print(f"Actual: {ansi.WARNING}{actual}{ansi.ENDC}")
if args.show_logs and len(output):
print(f"Log:")
print(output)
log.write(f"{test.source}\t|\t{filename(wasm)} {test.action.field}({', '.join(displayArgs)})\t=>\t\t")
if actual == expect:
stats.success += 1
log.write(f"OK: {actual}\n")
if args.line:
showTestResult()
else:
stats.failed += 1
log.write(f"FAIL: {actual}, should be: {expect}\n")
if args.silent: return
if args.skip_crashes and actual == "<Crashed>": return
showTestResult()
#sys.exit(1)
if not os.path.isdir(coreDir):
if not os.path.isdir(specDir):
specTestsFetch()
specTestsPreprocess()
# Currently default to running the predefined list of tests
# TODO: Switch to running all tests when wasm spec is implemented
if args.file:
jsonFiles = args.file
elif args.all:
jsonFiles = glob.glob(os.path.join(coreDir, "*.json"))
jsonFiles.sort()
else:
jsonFiles = list(map(lambda x : f"./core/{x}.json", [
#--- Complete ---
"i32", "i64",
"int_exprs",
"f32", "f32_cmp", "f32_bitwise",
"f64", "f64_cmp", "f64_bitwise",
"float_misc",
"conversions",
"stack", "fac",
"call", "call_indirect",
"left-to-right",
"break-drop",
"forward",
"func_ptrs",
"endianness",
"int_literals",
#--- Almost ready ---
#"memory_trap", "address", -> init memory size + track memory bounds
#"float_memory",
#"memory_redundancy", "memory_grow",
#--- TODO ---
#"get_local", "set_local", "tee_local",
#"if", "loop", "labels", "block", "br", "br_if", "br_table", "return",
#"nop", "unreachable",
#"align", "memory",
#"float_literals",
#"globals",
#"func",
#"float_exprs",
#"elem",
#"switch",
]))
for fn in jsonFiles:
with open(fn) as f:
data = json.load(f)
wast_source = filename(data["source_filename"])
wast_module = ""
if wast_source in ["linking.wast", "exports.wast", "names.wast"]:
count = len(data["commands"])
stats.skipped += count
warning(f"Skipped {wast_source} ({count} tests)")
continue
print(f"Running {fn}")
for cmd in data["commands"]:
test = dotdict()
test.line = int(cmd["line"])
test.source = wast_source + ":" + str(test.line)
test.module = wast_module
test.type = cmd["type"]
if test.type == "module":
wast_module = cmd["filename"]
elif ( test.type == "action" or
test.type == "assert_return" or
test.type == "assert_trap" or
test.type == "assert_exhaustion" or
test.type == "assert_return_canonical_nan" or
test.type == "assert_return_arithmetic_nan"):
if args.line and test.line != args.line:
continue
if args.verbose:
print(f"Checking {test.source}")
if test.type == "assert_return":
test.expected = cmd["expected"]
elif test.type == "assert_return_canonical_nan":
test.expected = cmd["expected"]
test.expected[0]["value"] = "<Canonical NaN>"
elif test.type == "assert_return_arithmetic_nan":
test.expected = cmd["expected"]
test.expected[0]["value"] = "<Arithmetic NaN>"
elif test.type == "assert_trap":
test.expected_trap = cmd["text"]
else:
stats.skipped += 1
warning(f"Skipped {test.source} {test.type}")
continue
test.action = dotdict(cmd["action"])
if test.action.type == "invoke":
stats.total_run += 1
runInvoke(test)
else:
stats.skipped += 1
warning(f"Unknown action: {test.action}")
continue
elif ( test.type == "register" or
test.type == "assert_invalid" or
test.type == "assert_malformed" or
test.type == "assert_unlinkable" or
test.type == "assert_uninstantiable"):
stats.skipped += 1
else:
raise(Exception(f"Unknown command: {test}"))
if (stats.failed + stats.success) != stats.total_run:
warning("Statistics summary invalid")
pprint(stats)
if stats.failed > 0:
failed = (stats.failed*100)/stats.total_run
print(f"{ansi.FAIL}=======================")
print(f" FAILED: {failed:.2f}%")
if stats.crashed > 0:
print(f" Crashed: {stats.crashed}")
print(f"======================={ansi.ENDC}")
elif stats.success > 0:
print(f"{ansi.OKGREEN}=======================")
print(f" {stats.success}/{stats.total_run} tests OK")
if stats.skipped > 0:
print(f"{ansi.WARNING} ({stats.skipped} tests skipped){ansi.OKGREEN}")
print(f"======================={ansi.ENDC}")