extensions
Steven Massey 4 years ago
parent 314fe09b05
commit fb70fab0f9

@ -14,10 +14,6 @@
//------------------------------------------------------------------------------------------------------------------------- //-------------------------------------------------------------------------------------------------------------------------
static const IM3Operation c_setSetOps [] = { NULL, op_SetSlot_i32, op_SetSlot_i64, op_SetSlot_f32, op_SetSlot_f64 };
static const u16 c_slotUnused = 0xffff;
#define d_indent " | " #define d_indent " | "
// just want less letter and numbers to stare at down the way in the compiler table // just want less letter and numbers to stare at down the way in the compiler table
@ -28,6 +24,11 @@ static const u16 c_slotUnused = 0xffff;
#define none c_m3Type_none #define none c_m3Type_none
#define any (u8)-1 #define any (u8)-1
static const IM3Operation c_setSetOps [] = { NULL, op_SetSlot_i32, op_SetSlot_i64, op_SetSlot_f32, op_SetSlot_f64 };
static const u16 c_m3RegisterUnallocated = 0;
static const u16 c_slotUnused = 0xffff;
void ReleaseCompilationCodePage (IM3Compilation o) void ReleaseCompilationCodePage (IM3Compilation o)
{ {
@ -43,10 +44,15 @@ bool IsRegisterLocation (i16 i_location) { return (i_location >= d_m3
bool IsFpRegisterLocation (i16 i_location) { return (i_location == d_m3Fp0SlotAlias); } bool IsFpRegisterLocation (i16 i_location) { return (i_location == d_m3Fp0SlotAlias); }
bool IsIntRegisterLocation (i16 i_location) { return (i_location == d_m3Reg0SlotAlias); } bool IsIntRegisterLocation (i16 i_location) { return (i_location == d_m3Reg0SlotAlias); }
u8 GetBlockType (IM3Compilation o) { return o->block.type; }
bool BlockHasType (IM3Compilation o) { return GetBlockType (o) != c_m3Type_none; }
i16 GetNumBlockValues (IM3Compilation o) { return o->stackIndex - o->block.initStackIndex; }
u32 GetTypeNumSlots (u8 i_type) u16 GetTypeNumSlots (u8 i_type)
{ {
return Is64BitType (i_type) ? 1 : 1; // return 1;
u16 n = Is64BitType (i_type) ? 2 : 1;
return n;
} }
i16 GetStackTopIndex (IM3Compilation o) i16 GetStackTopIndex (IM3Compilation o)
@ -84,24 +90,6 @@ u8 GetStackBottomType (IM3Compilation o, u16 i_offset)
} }
u8 GetBlockType (IM3Compilation o)
{
return o->block.type;
}
bool BlockHasType (IM3Compilation o)
{
return GetBlockType (o) != c_m3Type_none;
}
i16 GetNumBlockValues (IM3Compilation o)
{
return o->stackIndex - o->block.initStackIndex;
}
bool IsStackIndexInRegister (IM3Compilation o, u16 i_stackIndex) bool IsStackIndexInRegister (IM3Compilation o, u16 i_stackIndex)
{ d_m3Assert (i_stackIndex < o->stackIndex or IsStackPolymorphic (o)); { d_m3Assert (i_stackIndex < o->stackIndex or IsStackPolymorphic (o));
if (i_stackIndex < o->stackIndex) if (i_stackIndex < o->stackIndex)
@ -157,46 +145,78 @@ u16 GetStackTopSlotIndex (IM3Compilation o)
} }
u16 GetSlotForStackIndex (IM3Compilation o, u16 i_stackIndex)
{ d_m3Assert (i_stackIndex < o->stackIndex);
return o->wasmStack [i_stackIndex];
}
bool IsValidSlot (u16 i_slot) bool IsValidSlot (u16 i_slot)
{ {
return (i_slot < d_m3MaxFunctionStackHeight); return (i_slot < c_m3MaxFunctionSlots);
} }
bool IsSlotAllocated (IM3Compilation o, u16 i_slot)
{
return o->m3Slots [i_slot];
}
void MarkSlotAllocated (IM3Compilation o, u16 i_slot) void MarkSlotAllocated (IM3Compilation o, u16 i_slot)
{ d_m3Assert (o->m3Slots [i_slot] == 0); // shouldn't be already allocated { d_m3Assert (o->m3Slots [i_slot] == 0); // shouldn't be already allocated
o->m3Slots [i_slot] = 1; o->m3Slots [i_slot] = 1;
o->numAllocatedSlots++;
if (i_slot >= o->firstDynamicSlotIndex) // don't track constants
o->numAllocatedSlots++;
} }
bool AllocateSlots (IM3Compilation o, u16 * o_execSlot, u8 i_type) M3Result AllocateSlotsWithinRange (IM3Compilation o, u16 * o_slot, u8 i_type, u16 i_startSlot, u16 i_endSlot)
{ {
bool found = false; M3Result result = m3Err_functionStackOverflow;
u16 numSlots = GetTypeNumSlots (i_type);
u16 searchOffset = numSlots - 1;
// search for empty slot in the execution stack // search for 1 or 2 consecutive slots in the execution stack
i16 i = o->firstSlotIndex; u16 i = i_startSlot;
while (i < d_m3MaxFunctionStackHeight) while (i < i_endSlot)
{ {
if (o->m3Slots [i] == 0) if (o->m3Slots [i] == 0 and o->m3Slots [i + searchOffset] == 0)
{ {
MarkSlotAllocated (o, i); MarkSlotAllocated (o, i);
* o_execSlot = i;
if (numSlots == 2)
MarkSlotAllocated (o, i + 1);
found = true; * o_slot = i;
result = m3Err_none;
break; break;
} }
++i; // keep 2-slot allocations even-aligned
i += numSlots;
} }
return found; return result;
}
M3Result AllocateSlots (IM3Compilation o, u16 * o_slot, u8 i_type)
{
return AllocateSlotsWithinRange (o, o_slot, i_type, o->firstDynamicSlotIndex, c_m3MaxFunctionSlots);
}
M3Result AllocateConstantSlots (IM3Compilation o, u16 * o_slot, u8 i_type)
{
return AllocateSlotsWithinRange (o, o_slot, i_type, o->firstConstSlotIndex, o->firstDynamicSlotIndex);
} }
M3Result IncrementSlotUsageCount (IM3Compilation o, u16 i_slot) M3Result IncrementSlotUsageCount (IM3Compilation o, u16 i_slot)
{ d_m3Assert (i_slot < d_m3MaxFunctionStackHeight); { d_m3Assert (i_slot < c_m3MaxFunctionSlots);
M3Result result = m3Err_none; d_m3Assert (o->m3Slots [i_slot] > 0); M3Result result = m3Err_none; d_m3Assert (o->m3Slots [i_slot] > 0);
// OPTZ (memory): 'm3Slots' could still be fused with 'typeStack' if 4 bits were used to indicate: [0,1,2,many]. The many-case // OPTZ (memory): 'm3Slots' could still be fused with 'typeStack' if 4 bits were used to indicate: [0,1,2,many]. The many-case
@ -212,9 +232,9 @@ M3Result IncrementSlotUsageCount (IM3Compilation o, u16 i_slot)
void DeallocateSlot (IM3Compilation o, i16 i_slotIndex, u8 i_type) void DeallocateSlot (IM3Compilation o, i16 i_slotIndex, u8 i_type)
{ d_m3Assert (i_slotIndex >= o->firstSlotIndex); { d_m3Assert (i_slotIndex >= o->firstDynamicSlotIndex);
d_m3Assert (o->m3Slots [i_slotIndex]); d_m3Assert (o->m3Slots [i_slotIndex]);
for (u32 i = 0; i < GetTypeNumSlots (i_type); ++i, ++i_slotIndex) for (u16 i = 0; i < GetTypeNumSlots (i_type); ++i, ++i_slotIndex)
{ {
if (-- o->m3Slots [i_slotIndex] == 0) if (-- o->m3Slots [i_slotIndex] == 0)
o->numAllocatedSlots--; o->numAllocatedSlots--;
@ -258,18 +278,18 @@ u16 GetRegisterStackIndex (IM3Compilation o, u32 i_register)
} }
u16 GetMaxExecSlot (IM3Compilation o) u16 GetMaxUsedSlotPlusOne (IM3Compilation o)
{ {
u16 i = o->firstSlotIndex; u16 i = o->firstDynamicSlotIndex;
u32 allocated = o->numAllocatedSlots; u32 allocated = o->numAllocatedSlots;
while (i < d_m3MaxFunctionStackHeight) while (i < c_m3MaxFunctionSlots)
{ {
if (allocated == 0) if (allocated == 0)
break; break;
if (o->m3Slots [i]) if (IsSlotAllocated (o, i))
--allocated; --allocated;
++i; ++i;
@ -294,15 +314,12 @@ M3Result PreserveRegisterIfOccupied (IM3Compilation o, u8 i_registerType)
// and point to a exec slot // and point to a exec slot
u16 slot; u16 slot;
if (AllocateSlots (o, & slot, type)) _ (AllocateSlots (o, & slot, type));
{
o->wasmStack [stackIndex] = slot; o->wasmStack [stackIndex] = slot;
_ (EmitOp (o, c_setSetOps [type])); _ (EmitOp (o, c_setSetOps [type]));
EmitSlotOffset (o, slot); EmitSlotOffset (o, slot);
} }
else _throw (m3Err_functionStackOverflow);
}
_catch: return result; _catch: return result;
} }
@ -397,20 +414,20 @@ M3Result Pop (IM3Compilation o)
{ {
o->stackIndex--; // printf ("pop: %d\n", (i32) o->stackIndex); o->stackIndex--; // printf ("pop: %d\n", (i32) o->stackIndex);
i16 location = o->wasmStack [o->stackIndex]; u16 slot = o->wasmStack [o->stackIndex];
u8 type = o->typeStack [o->stackIndex]; u8 type = o->typeStack [o->stackIndex];
if (IsRegisterLocation (location)) if (IsRegisterLocation (slot))
{ {
u32 regSelect = IsFpRegisterLocation (location); u32 regSelect = IsFpRegisterLocation (slot);
DeallocateRegister (o, regSelect); DeallocateRegister (o, regSelect);
} }
else if (location >= o->firstSlotIndex) else if (slot >= o->firstDynamicSlotIndex)
{ {
DeallocateSlot (o, location, type); DeallocateSlot (o, slot, type);
} }
m3logif (stack, dump_type_stack (o)) // m3logif (stack, dump_type_stack (o))
} }
else if (not IsStackPolymorphic (o)) else if (not IsStackPolymorphic (o))
result = m3Err_functionStackUnderrun; result = m3Err_functionStackUnderrun;
@ -445,14 +462,13 @@ M3Result _PushAllocatedSlotAndEmit (IM3Compilation o, u8 i_type, bool i_doEmit
u16 slot; u16 slot;
if (AllocateSlots (o, & slot, i_type)) _ (AllocateSlots (o, & slot, i_type));
{
_ (Push (o, i_type, slot)); _ (Push (o, i_type, slot));
if (i_doEmit) if (i_doEmit)
EmitSlotOffset (o, slot); EmitSlotOffset (o, slot);
}
else result = m3Err_functionStackOverflow; // printf ("push: %d\n", (u32) slot);
_catch: return result; _catch: return result;
} }
@ -474,36 +490,79 @@ M3Result PushConst (IM3Compilation o, u64 i_word, u8 i_type)
{ {
M3Result result = m3Err_none; M3Result result = m3Err_none;
i16 location = -1; bool matchFound = false;
u32 numConstants = o->constSlotIndex - o->firstConstSlotIndex; u32 numUsedConstSlots = o->maxConstSlotIndex - o->firstConstSlotIndex;
u16 numRequiredSlots = GetTypeNumSlots (i_type);
// search for duplicate matching constant slot to reuse // search for duplicate matching constant slot to reuse
for (u32 i = 0; i < numConstants; ++i) if (numRequiredSlots == 2)
{ {
if (o->constants [i] == i_word) numUsedConstSlots &= ~1; // round down to even num
for (u32 i = 0; i < numUsedConstSlots; i += 2)
{ {
location = o->firstConstSlotIndex + i; u16 slot = o->firstConstSlotIndex + i;
_ (Push (o, i_type, location));
break; if (IsSlotAllocated (o, slot) and IsSlotAllocated (o, slot + 1))
{
u64 * constant = (u64 *) & o->constants [i];
if (* constant == i_word)
{
matchFound = true;
_ (Push (o, i_type, slot));
break;
}
}
} }
} }
else
if (location < 0)
{ {
if (o->constSlotIndex < o->firstSlotIndex) for (u32 i = 0; i < numUsedConstSlots; ++i)
{ {
o->constants [numConstants] = i_word; u16 slot = o->firstConstSlotIndex + i;
location = o->constSlotIndex++;
if (IsSlotAllocated (o, slot))
_ (Push (o, i_type, location)); {
if (o->constants [i] == i_word)
{
matchFound = true;
_ (Push (o, i_type, slot));
break;
}
}
} }
else }
if (not matchFound)
{
u16 slot;
result = AllocateConstantSlots (o, & slot, i_type);
if (result) // no more constant table space; use inline constants
{ {
_ (EmitOp (o, Is64BitType (i_type) ? op_Const64 : op_Const32)); result = m3Err_none;
_ (EmitOp (o, numRequiredSlots == 1 ? op_Const32 : op_Const64));
EmitConstant64 (o, i_word); EmitConstant64 (o, i_word);
_ (PushAllocatedSlotAndEmit (o, i_type)); _ (PushAllocatedSlotAndEmit (o, i_type));
} }
else
{
u16 constTableIndex = slot - o->firstConstSlotIndex;
if (numRequiredSlots == 2)
{
u64 * constant64 = (u64 *) & o->constants [constTableIndex];
* constant64 = i_word;
}
else o->constants [constTableIndex] = (u32) i_word;
_ (Push (o, i_type, slot));
o->maxConstSlotIndex = m3_max (slot + numRequiredSlots, o->maxConstSlotIndex);
}
} }
_catch: return result; _catch: return result;
@ -605,7 +664,10 @@ _ (EmitOp (o, op));
EmitSlotOffset (o, i_destSlot); EmitSlotOffset (o, i_destSlot);
if (not inRegister) if (not inRegister)
EmitSlotOffset (o, o->wasmStack [i_stackIndex]); {
u16 srcSlot = GetSlotForStackIndex (o, i_stackIndex);
EmitSlotOffset (o, srcSlot);
}
_catch: return result; _catch: return result;
} }
@ -703,7 +765,7 @@ M3Result ReturnStackTop (IM3Compilation o)
// if local is unreferenced, o_preservedSlotIndex will be equal to localIndex on return // if local is unreferenced, o_preservedSlotIndex will be equal to localIndex on return
M3Result FindReferencedLocalWithinCurrentBlock (IM3Compilation o, u16 * o_preservedSlotIndex, u32 i_localIndex) M3Result FindReferencedLocalWithinCurrentBlock (IM3Compilation o, u16 * o_preservedSlotIndex, u32 i_localSlot)
{ {
M3Result result = m3Err_none; M3Result result = m3Err_none;
@ -719,18 +781,17 @@ M3Result FindReferencedLocalWithinCurrentBlock (IM3Compilation o, u16 * o_pres
startIndex = scope->initStackIndex; startIndex = scope->initStackIndex;
} }
* o_preservedSlotIndex = (u16) i_localIndex; * o_preservedSlotIndex = (u16) i_localSlot;
for (u32 i = startIndex; i < o->stackIndex; ++i) for (u32 i = startIndex; i < o->stackIndex; ++i)
{ {
if (o->wasmStack [i] == i_localIndex) if (o->wasmStack [i] == i_localSlot)
{ {
if (* o_preservedSlotIndex == i_localIndex) if (* o_preservedSlotIndex == i_localSlot)
{ {
u8 localType = GetStackBottomType (o, i_localIndex); u8 localType = GetStackBottomType (o, i_localSlot);
if (not AllocateSlots (o, o_preservedSlotIndex, localType)) _ (AllocateSlots (o, o_preservedSlotIndex, localType));
_throw (m3Err_functionStackOverflow);
} }
else else
_ (IncrementSlotUsageCount (o, * o_preservedSlotIndex)); _ (IncrementSlotUsageCount (o, * o_preservedSlotIndex));
@ -881,11 +942,13 @@ M3Result Compile_SetLocal (IM3Compilation o, u8 i_opcode)
{ {
M3Result result; M3Result result;
u32 localSlot; u32 localIndex;
_ (ReadLEB_u32 (& localSlot, & o->wasm, o->wasmEnd)); // printf ("--- set local: %d \n", localSlot); _ (ReadLEB_u32 (& localIndex, & o->wasm, o->wasmEnd)); // printf ("--- set local: %d \n", localSlot);
if (localSlot < GetFunctionNumArgsAndLocals (o->function)) if (localIndex < GetFunctionNumArgsAndLocals (o->function))
{ {
u16 localSlot = GetSlotForStackIndex (o, localIndex);
u16 preserveSlot; u16 preserveSlot;
_ (FindReferencedLocalWithinCurrentBlock (o, & preserveSlot, localSlot)); // preserve will be different than local, if referenced _ (FindReferencedLocalWithinCurrentBlock (o, & preserveSlot, localSlot)); // preserve will be different than local, if referenced
@ -908,11 +971,17 @@ M3Result Compile_GetLocal (IM3Compilation o, u8 i_opcode)
M3Result result; M3Result result;
_try { _try {
u32 localIndex; u32 localIndex;
_ (ReadLEB_u32 (& localIndex, & o->wasm, o->wasmEnd)); _ (ReadLEB_u32 (& localIndex, & o->wasm, o->wasmEnd));
u8 type = o->typeStack [localIndex]; if (localIndex >= GetFunctionNumArgsAndLocals (o->function))
_ (Push (o, type, localIndex)); _throw ("local index out of bounds");
u8 type = GetStackBottomType (o, localIndex);
u16 slot = GetSlotForStackIndex (o, localIndex);
_ (Push (o, type, slot));
} _catch: return result; } _catch: return result;
} }
@ -1151,22 +1220,42 @@ _ (AcquirePatch (o, & patch));
} }
M3Result CompileCallArgsReturn (IM3Compilation o, u16 * o_stackOffset, IM3FuncType i_type, bool i_isIndirect) void AlignSlotIndexToType (u16 * io_slotIndex, u8 i_type)
{
// align 64-bit words to even slots
u16 numSlots = GetTypeNumSlots (i_type);
// printf ("%d\n", (u32) numSlots);
u16 mask = numSlots - 1;
* io_slotIndex = (* io_slotIndex + mask) & ~mask;
}
M3Result CompileCallArgsAndReturn (IM3Compilation o, u16 * o_stackOffset, IM3FuncType i_type, bool i_isIndirect)
{ {
M3Result result = m3Err_none; M3Result result = m3Err_none;
_try { _try {
u16 topSlot = GetMaxUsedSlotPlusOne (o);
// force use of at least one stack slot; this is to help ensure // force use of at least one stack slot; this is to help ensure
// the m3 stack overflows (and traps) before the native stack can overflow. // the m3 stack overflows (and traps) before the native stack can overflow.
// e.g. see Wasm spec test 'runaway' in call.wast // e.g. see Wasm spec test 'runaway' in call.wast
u16 execTop = GetMaxExecSlot (o); topSlot = m3_max (1, topSlot);
if (execTop == 0)
execTop = 1;
* o_stackOffset = execTop; // stack frame is 64-bit aligned
AlignSlotIndexToType (& topSlot, c_m3Type_i64);
* o_stackOffset = topSlot;
u32 numArgs = i_type->numArgs + i_isIndirect; // wait to pop this here so that topSlot search is correct
u16 argTop = execTop + numArgs; if (i_isIndirect)
_ (Pop (o));
u32 numArgs = i_type->numArgs;
u16 argTop = topSlot + numArgs;
while (numArgs--) while (numArgs--)
{ {
@ -1178,8 +1267,8 @@ _ (Pop (o));
if (numReturns) if (numReturns)
{ {
MarkSlotAllocated (o, execTop); MarkSlotAllocated (o, topSlot);
_ (Push (o, i_type->returnType, execTop)); _ (Push (o, i_type->returnType, topSlot));
} }
} _catch: return result; } _catch: return result;
@ -1204,7 +1293,7 @@ _ (ReadLEB_u32 (& functionIndex, & o->wasm, o->wasmEnd));
// OPTZ: could avoid arg copy when args are already sequential and at top // OPTZ: could avoid arg copy when args are already sequential and at top
u16 slotTop; u16 slotTop;
_ (CompileCallArgsReturn (o, & slotTop, function->funcType, false)); _ (CompileCallArgsAndReturn (o, & slotTop, function->funcType, false));
IM3Operation op; IM3Operation op;
const void * operand; const void * operand;
@ -1245,18 +1334,23 @@ _ (ReadLEB_u32 (& typeIndex, & o->wasm, o->wasmEnd));
i8 reserved; i8 reserved;
_ (ReadLEB_i7 (& reserved, & o->wasm, o->wasmEnd)); _ (ReadLEB_i7 (& reserved, & o->wasm, o->wasmEnd));
if (typeIndex < o->module->numFuncTypes) if (typeIndex >= o->module->numFuncTypes)
{ _throw ("function type index out of range");
u16 execTop;
IM3FuncType type = & o->module->funcTypes [typeIndex]; if (IsStackTopInRegister (o))
_ (CompileCallArgsReturn (o, & execTop, type, true)); _ (PreserveRegisterIfOccupied (o, c_m3Type_i32));
_ (EmitOp (o, op_CallIndirect)); u16 tableIndexSlot = GetStackTopSlotIndex (o);
EmitPointer (o, o->module);
EmitPointer (o, type); // TODO: unify all types in M3Environment u16 execTop;
EmitSlotOffset (o, execTop); IM3FuncType type = & o->module->funcTypes [typeIndex];
} _ (CompileCallArgsAndReturn (o, & execTop, type, true));
else _throw ("function type index out of range");
_ (EmitOp (o, op_CallIndirect));
EmitSlotOffset (o, tableIndexSlot);
EmitPointer (o, o->module);
EmitPointer (o, type); // TODO: unify all types in M3Environment
EmitSlotOffset (o, execTop);
_catch: return result; _catch: return result;
} }
@ -1302,8 +1396,8 @@ M3Result ReadBlockType (IM3Compilation o, u8 * o_blockType)
i8 type; i8 type;
_ (ReadLEB_i7 (& type, & o->wasm, o->wasmEnd)); _ (ReadLEB_i7 (& type, & o->wasm, o->wasmEnd));
_ (NormalizeType (o_blockType, type)); if (* o_blockType) m3log (compile, d_indent "%s (block_type: 0x%02x normalized: %d)", _ (NormalizeType (o_blockType, type)); if (* o_blockType) m3log (compile, d_indent "%s (type: %s)",
get_indention_string (o), (u32) (u8) type, (u32) * o_blockType); get_indention_string (o), c_waTypes [(u32) * o_blockType]);
_catch: return result; _catch: return result;
} }
@ -1316,7 +1410,7 @@ M3Result PreserveArgsAndLocals (IM3Compilation o)
{ {
M3Result result = m3Err_none; M3Result result = m3Err_none;
if (o->stackIndex > o->firstSlotIndex) if (o->stackIndex > o->firstDynamicStackIndex)
{ {
u32 numArgsAndLocals = GetFunctionNumArgsAndLocals (o->function); u32 numArgsAndLocals = GetFunctionNumArgsAndLocals (o->function);
@ -2064,29 +2158,27 @@ M3Result Compile_ReserveConstants (IM3Compilation o)
// in the interest of speed, this blindly scans the Wasm code looking for any byte // in the interest of speed, this blindly scans the Wasm code looking for any byte
// that looks like an const opcode. // that looks like an const opcode.
u32 numConstants = 0; u32 numConstantSlots = 0;
bytes_t wa = o->wasm; bytes_t wa = o->wasm;
while (wa < o->wasmEnd) while (wa < o->wasmEnd)
{ {
u8 code = * wa++; u8 code = * wa++;
if (code >= 0x41 and code <= 0x44) if (code == 0x41 or code == 0x43) // i32, f32
++numConstants; numConstantSlots += 1;
} m3log (compile, "estimated constants: %d", numConstants) else if (code == 0x42 or code == 0x44) // i64, f64
numConstantSlots += 2;
o->firstSlotIndex = o->firstConstSlotIndex = o->constSlotIndex = o->stackIndex; } m3log (compile, "estimated constant slots: %d", numConstantSlots)
// if constants overflow their reserved stack space, the compiler simply emits op_Const // if constants overflow their reserved stack space, the compiler simply emits op_Const
// operations as needed. Compiled expressions (global inits) don't pass through this // operations as needed. Compiled expressions (global inits) don't pass through this
// ReserveConstants function and thus always produce inline contants. // ReserveConstants function and thus always produce inline contants.
numConstants = m3_min (numConstants, d_m3MaxConstantTableSize); numConstantSlots = m3_min (numConstantSlots, d_m3MaxConstantTableSize);
u32 freeSlots = d_m3MaxFunctionStackHeight - o->constSlotIndex; o->firstDynamicSlotIndex = o->firstConstSlotIndex + numConstantSlots;
if (numConstants <= freeSlots) if (o->firstDynamicSlotIndex >= c_m3MaxFunctionSlots)
o->firstSlotIndex += numConstants;
else
result = m3Err_functionStackOverflow; result = m3Err_functionStackOverflow;
return result; return result;
@ -2126,21 +2218,35 @@ M3Result Compile_Function (IM3Function io_function)
// push the arg types to the type stack // push the arg types to the type stack
M3FuncType * ft = io_function->funcType; M3FuncType * ft = io_function->funcType;
// all args are 64-bit aligned
u32 argSlotCount = sizeof (u64) / sizeof (m3slot_t);
o->function->numArgSlots = GetFunctionNumArgs (o->function) * argSlotCount;
for (u32 i = 0; i < GetFunctionNumArgs (io_function); ++i) for (u32 i = 0; i < GetFunctionNumArgs (io_function); ++i)
{ {
u8 type = ft->argTypes [i]; u8 type = ft->argTypes [i];
_ (PushAllocatedSlot (o, type)); _ (PushAllocatedSlot (o, type));
o->firstDynamicSlotIndex += argSlotCount; // don't let the allocator fill-in
} }
o->firstLocalSlotIndex = o->firstDynamicSlotIndex;
_ (CompileLocals (o)); _ (CompileLocals (o));
o->firstDynamicSlotIndex = 0;
u16 maxSlot = GetMaxUsedSlotPlusOne (o);
o->function->numLocalBytes = (maxSlot - o->firstLocalSlotIndex) * sizeof (m3slot_t);
_ (Compile_ReserveConstants (o)); o->firstConstSlotIndex = o->maxConstSlotIndex = maxSlot;
// ReserveConstants initializes o->firstDynamicSlotIndex
_ (Compile_ReserveConstants (o));
// start tracking the max stack used (Push() also updates this value) so that op_Entry can precisely detect stack overflow // start tracking the max stack used (Push() also updates this value) so that op_Entry can precisely detect stack overflow
o->function->maxStackSlots = o->firstSlotIndex; o->function->maxStackSlots = o->firstDynamicSlotIndex;
o->numAllocatedSlots = 0; // this var only tracks dynamic slots so clear local+constant allocations o->numAllocatedSlots = 0; // this var only tracks dynamic slots so clear local+constant allocations
o->block.initStackIndex = o->stackIndex; o->block.initStackIndex = o->firstDynamicStackIndex = o->stackIndex; m3log (compile, "start stack index: %d", (u32) o->firstDynamicStackIndex);
_ (EmitOp (o, op_Entry)); _ (EmitOp (o, op_Entry));
EmitPointer (o, io_function); EmitPointer (o, io_function);
@ -2149,13 +2255,13 @@ _ (Compile_BlockStatements (o));
io_function->compiled = pc; io_function->compiled = pc;
u32 numConstants = o->constSlotIndex - o->firstConstSlotIndex; u32 numConstantSlots = o->maxConstSlotIndex - o->firstConstSlotIndex; m3log (compile, "unique constant slots: %d; unused slots: %d", numConstantSlots, o->firstDynamicSlotIndex - o->maxConstSlotIndex);
io_function->numConstantBytes = numConstants * sizeof (m3slot_t); m3log (compile, "unique constants: %d; unused slots: %d", numConstants, o->firstSlotIndex - o->constSlotIndex); io_function->numConstantBytes = numConstantSlots * sizeof (u32);
if (numConstants) if (numConstantSlots)
{ {
_ (m3Alloc (& io_function->constants, m3slot_t, numConstants)); _ (m3Alloc (& io_function->constants, u32, numConstantSlots));
memcpy (io_function->constants, o->constants, io_function->numConstantBytes); memcpy (io_function->constants, o->constants, io_function->numConstantBytes);
} }

@ -62,7 +62,7 @@ M3CompilationScope;
typedef M3CompilationScope * IM3CompilationScope; typedef M3CompilationScope * IM3CompilationScope;
static const u16 c_m3RegisterUnallocated = 0; static const u16 c_m3MaxFunctionSlots = d_m3MaxFunctionStackHeight * 2;
typedef struct typedef struct
{ {
@ -83,20 +83,23 @@ typedef struct
u32 numEmits; u32 numEmits;
u32 numOpcodes; u32 numOpcodes;
u16 firstSlotIndex; // numArgs + numLocals + numReservedConstants. the first mutable slot available to the compiler. u16 firstDynamicStackIndex;
u16 stackIndex; // current stack index u16 stackIndex; // current stack index
u16 firstConstSlotIndex; u16 firstConstSlotIndex;
u16 constSlotIndex; // as const's are encountered during compilation this tracks their location in the "real" stack u16 maxConstSlotIndex; // as const's are encountered during compilation this tracks their location in the "real" stack
u16 firstLocalSlotIndex;
u16 firstDynamicSlotIndex; // numArgs + numLocals + numReservedConstants. the first mutable slot available to the compiler.
u64 constants [d_m3MaxConstantTableSize]; u32 constants [d_m3MaxConstantTableSize];
// 'wasmStack' is unused for args/locals. for the dynamic portion of the stack, 'wasmStack' holds slot locations // 'wasmStack' holds slot locations
u16 wasmStack [d_m3MaxFunctionStackHeight]; u16 wasmStack [d_m3MaxFunctionStackHeight];
u8 typeStack [d_m3MaxFunctionStackHeight]; u8 typeStack [d_m3MaxFunctionStackHeight];
// 'm3Slots' contains allocation usage counts // 'm3Slots' contains allocation usage counts
u8 m3Slots [d_m3MaxFunctionStackHeight]; u8 m3Slots [c_m3MaxFunctionSlots];
u16 numAllocatedSlots; u16 numAllocatedSlots;
@ -145,6 +148,9 @@ extern const M3OpInfo c_operations [];
//----------------------------------------------------------------------------------------------------------------------------------- //-----------------------------------------------------------------------------------------------------------------------------------
u16 GetTypeNumSlots (u8 i_type);
void AlignSlotIndexToType (u16 * io_slotIndex, u8 i_type);
bool IsRegisterAllocated (IM3Compilation o, u32 i_register); bool IsRegisterAllocated (IM3Compilation o, u32 i_register);
bool IsRegisterLocation (i16 i_location); bool IsRegisterLocation (i16 i_location);
bool IsFpRegisterLocation (i16 i_location); bool IsFpRegisterLocation (i16 i_location);
@ -158,7 +164,7 @@ M3Result Compile_BlockStatements (IM3Compilation io);
M3Result Compile_Function (IM3Function io_function); M3Result Compile_Function (IM3Function io_function);
bool PeekNextOpcode (IM3Compilation o, u8 i_opcode); bool PeekNextOpcode (IM3Compilation o, u8 i_opcode);
u16 GetMaxExecSlot (IM3Compilation o); u16 GetMaxUsedSlotPlusOne (IM3Compilation o);
d_m3EndExternC d_m3EndExternC

@ -56,11 +56,11 @@
# define d_m3LogParse 0 // .wasm binary decoding info # define d_m3LogParse 0 // .wasm binary decoding info
# define d_m3LogModule 0 // Wasm module info # define d_m3LogModule 0 // Wasm module info
# define d_m3LogCompile 0 // wasm -> metacode generation phase # define d_m3LogCompile 1 // wasm -> metacode generation phase
# define d_m3LogWasmStack 0 // dump the wasm stack when pushed or popped # define d_m3LogWasmStack 1 // dump the wasm stack when pushed or popped
# define d_m3LogEmit 0 // metacode generation info # define d_m3LogEmit 0 // metacode generation info
# define d_m3LogCodePages 0 // dump metacode pages when released # define d_m3LogCodePages 0 // dump metacode pages when released
# define d_m3LogExec 0 // low-level interpreter specific logs # define d_m3LogExec 1 // low-level interpreter specific logs
# define d_m3LogRuntime 0 // higher-level runtime information # define d_m3LogRuntime 0 // higher-level runtime information
# define d_m3LogStackTrace 0 // dump the call stack when traps occur # define d_m3LogStackTrace 0 // dump the call stack when traps occur
# define d_m3LogNativeStack 0 // track the memory usage of the C-stack # define d_m3LogNativeStack 0 // track the memory usage of the C-stack

@ -48,7 +48,7 @@ typedef const u8 * bytes_t;
typedef const u8 * const cbytes_t; typedef const u8 * const cbytes_t;
typedef i64 m3reg_t; typedef i64 m3reg_t;
typedef u64 m3slot_t; typedef u32 m3slot_t;
typedef m3slot_t * m3stack_t; typedef m3slot_t * m3stack_t;
typedef typedef

@ -234,11 +234,11 @@ M3Result EvaluateExpression (IM3Module i_module, void * o_expressed, u8 i_type
{ {
if (SizeOfType (i_type) == sizeof (u32)) if (SizeOfType (i_type) == sizeof (u32))
{ {
* (u32 *) o_expressed = *stack & 0xFFFFFFFF; * (u32 *) o_expressed = * ((u32 *) stack);
} }
else if (SizeOfType (i_type) == sizeof (u64)) else if (SizeOfType (i_type) == sizeof (u64))
{ {
* (u64 *) o_expressed = *stack; * (u64 *) o_expressed = * ((u64 *) stack);
} }
} }
} }

@ -49,7 +49,10 @@ typedef struct M3Function
u16 maxStackSlots; u16 maxStackSlots;
u16 numArgSlots;
u16 numLocals; // not including args u16 numLocals; // not including args
u16 numLocalBytes;
u16 numConstantBytes; u16 numConstantBytes;
void * constants; void * constants;

@ -51,6 +51,7 @@ d_m3OpDef (Call)
d_m3OpDef (CallIndirect) d_m3OpDef (CallIndirect)
{ {
u32 tableIndex = slot (u32);
IM3Module module = immediate (IM3Module); IM3Module module = immediate (IM3Module);
IM3FuncType type = immediate (IM3FuncType); IM3FuncType type = immediate (IM3FuncType);
i32 stackOffset = immediate (i32); i32 stackOffset = immediate (i32);
@ -58,9 +59,7 @@ d_m3OpDef (CallIndirect)
m3stack_t sp = _sp + stackOffset; m3stack_t sp = _sp + stackOffset;
i32 tableIndex = * (i32 *) (sp + type->numArgs); if (tableIndex < module->table0Size)
if (tableIndex >= 0 and (u32)tableIndex < module->table0Size)
{ {
m3ret_t r = m3Err_none; m3ret_t r = m3Err_none;
@ -208,14 +207,13 @@ d_m3OpDef (Entry)
{ {
function->hits++; m3log (exec, " enter %p > %s %s", _pc - 2, function->name ? function->name : ".unnamed", SPrintFunctionArgList (function, _sp)); function->hits++; m3log (exec, " enter %p > %s %s", _pc - 2, function->name ? function->name : ".unnamed", SPrintFunctionArgList (function, _sp));
m3stack_t stack = _sp + function->funcType->numArgs; u8 * stack = (u8 *) (_sp + function->numArgSlots);
u32 numLocals = function->numLocals;
// zero locals memset (stack, 0x0, function->numLocalBytes);
while (numLocals--) stack += function->numLocalBytes;
* (stack++) = 0;
if (function->constants) { if (function->constants)
{
memcpy (stack, function->constants, function->numConstantBytes); memcpy (stack, function->constants, function->numConstantBytes);
} }

@ -302,7 +302,9 @@ void dump_type_stack (IM3Compilation o)
applied until this compilation stage is finished applied until this compilation stage is finished
-- constants are not statically represented in the type stack (like args & constants) since they don't have/need -- constants are not statically represented in the type stack (like args & constants) since they don't have/need
write counts write counts
-- the number shown for static args and locals (value in wasmStack [i]) represents the write count for the variable -- the number shown for static args and locals (value in wasmStack [i]) represents the write count for the variable
-- (does Wasm ever write to an arg? I dunno/don't remember.) -- (does Wasm ever write to an arg? I dunno/don't remember.)
-- the number for the dynamic stack values represents the slot number. -- the number for the dynamic stack values represents the slot number.
-- if the slot index points to arg, local or constant it's denoted with a lowercase 'a', 'l' or 'c' -- if the slot index points to arg, local or constant it's denoted with a lowercase 'a', 'l' or 'c'
@ -316,24 +318,10 @@ void dump_type_stack (IM3Compilation o)
printf (" "); printf (" ");
printf ("%s %s ", regAllocated [0] ? "(r0)" : " ", regAllocated [1] ? "(fp0)" : " "); printf ("%s %s ", regAllocated [0] ? "(r0)" : " ", regAllocated [1] ? "(fp0)" : " ");
u32 numArgs = GetFunctionNumArgs (o->function); for (u32 i = o->firstDynamicStackIndex; i < o->stackIndex; ++i)
for (u32 i = 0; i < o->stackIndex; ++i)
{ {
if (i == o->firstConstSlotIndex)
printf (" | "); // divide the static & dynamic portion of the stack
// printf (" %d:%s.", i, c_waTypes [o->typeStack [i]]);
printf (" %s", c_waCompactTypes [o->typeStack [i]]); printf (" %s", c_waCompactTypes [o->typeStack [i]]);
if (i < o->firstConstSlotIndex)
{
u16 writeCount = o->wasmStack [i];
printf ((i < numArgs) ? "A" : "L"); // arg / local
printf ("%d", (i32) writeCount); // writeCount
}
else
{
u16 slot = o->wasmStack [i]; u16 slot = o->wasmStack [i];
if (IsRegisterLocation (slot)) if (IsRegisterLocation (slot))
@ -345,19 +333,18 @@ void dump_type_stack (IM3Compilation o)
} }
else else
{ {
if (slot < o->firstSlotIndex) if (slot < o->firstDynamicSlotIndex)
{ {
if (slot >= o->firstConstSlotIndex) if (slot >= o->firstConstSlotIndex)
printf ("c"); printf ("c");
else if (slot >= numArgs) else if (slot >= o->function->numArgSlots)
printf ("l"); printf ("L");
else else
printf ("a"); printf ("a");
} }
printf ("%d", (i32) slot); // slot printf ("%d", (i32) slot); // slot
} }
}
printf (" "); printf (" ");
} }

Loading…
Cancel
Save