Skip to content

Commit 641f40d

Browse files
committed
Merge-in fixes
1 parent 7143dfa commit 641f40d

File tree

3 files changed

+52
-69
lines changed

3 files changed

+52
-69
lines changed

src/m3_compile.c

Lines changed: 49 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -151,14 +151,14 @@ bool IsStackTopMinus1InRegister (IM3Compilation o)
151151
}
152152

153153

154-
void MarkExecSlotAllocated (IM3Compilation o, u16 i_slot)
154+
void MarkSlotAllocated (IM3Compilation o, u16 i_slot)
155155
{ d_m3Assert (o->m3Slots [i_slot] == 0); // shouldn't be already allocated
156156
o->m3Slots [i_slot] = 1;
157157
o->numAllocatedExecSlots++;
158158
}
159159

160160

161-
bool AllocateExecSlot (IM3Compilation o, u16 * o_execSlot)
161+
bool AllocateSlot (IM3Compilation o, u16 * o_execSlot)
162162
{
163163
bool found = false;
164164

@@ -168,7 +168,7 @@ bool AllocateExecSlot (IM3Compilation o, u16 * o_execSlot)
168168
{
169169
if (o->m3Slots [i] == 0)
170170
{
171-
MarkExecSlotAllocated (o, i);
171+
MarkSlotAllocated (o, i);
172172
* o_execSlot = i;
173173

174174
found = true;
@@ -177,16 +177,32 @@ bool AllocateExecSlot (IM3Compilation o, u16 * o_execSlot)
177177

178178
++i;
179179
}
180-
// printf ("allocate %d\n", (i32) i);
181180

182181
return found;
183182
}
184183

185184

185+
M3Result IncrementSlotUsageCount (IM3Compilation o, u16 i_slot)
186+
{ d_m3Assert (i_slot < d_m3MaxFunctionStackHeight);
187+
M3Result result = m3Err_none; d_m3Assert (o->m3Slots [i_slot] > 0);
188+
189+
// OPTZ (memory): 'm3Slots' could still be fused with 'typeStack' if 4 bits were used to indicate: [0,1,2,many]. The many-case
190+
// would scan 'wasmStack' to determine the actual usage count
191+
if (o->m3Slots [i_slot] < 0xFF)
192+
{
193+
o->m3Slots [i_slot]++;
194+
}
195+
else result = "slot usage count overflow";
196+
197+
return result;
198+
}
199+
200+
186201
void DeallocateSlot (IM3Compilation o, i16 i_slotIndex)
187202
{ d_m3Assert (i_slotIndex >= o->firstSlotIndex);
188-
o->numAllocatedExecSlots--; d_m3Assert (o->m3Slots [i_slotIndex]);
189-
o->m3Slots [i_slotIndex] --;
203+
d_m3Assert (o->m3Slots [i_slotIndex]);
204+
if (-- o->m3Slots [i_slotIndex] == 0)
205+
o->numAllocatedExecSlots--;
190206
}
191207

192208

@@ -259,7 +275,7 @@ M3Result PreserveRegisterIfOccupied (IM3Compilation o, u8 i_registerType)
259275

260276
// and point to a exec slot
261277
u16 slot;
262-
if (AllocateExecSlot (o, & slot))
278+
if (AllocateSlot (o, & slot))
263279
{
264280
o->wasmStack [stackIndex] = slot;
265281

@@ -408,7 +424,7 @@ M3Result _PushAllocatedSlotAndEmit (IM3Compilation o, u8 i_m3Type, bool i_doEm
408424

409425
u16 slot;
410426

411-
if (AllocateExecSlot (o, & slot))
427+
if (AllocateSlot (o, & slot))
412428
{
413429
_ (Push (o, i_m3Type, slot));
414430

@@ -647,7 +663,7 @@ M3Result ReturnStackTop (IM3Compilation o)
647663

648664

649665
// if local is unreferenced, o_preservedSlotIndex will be equal to localIndex on return
650-
M3Result IsLocalReferencedWithCurrentBlock (IM3Compilation o, u16 * o_preservedSlotIndex, u32 i_localIndex)
666+
M3Result FindReferencedLocalsWithCurrentBlock (IM3Compilation o, u16 * o_preservedSlotIndex, u32 i_localIndex)
651667
{
652668
M3Result result = m3Err_none;
653669

@@ -671,13 +687,11 @@ M3Result IsLocalReferencedWithCurrentBlock (IM3Compilation o, u16 * o_preserve
671687
{
672688
if (* o_preservedSlotIndex == i_localIndex)
673689
{
674-
if (not AllocateExecSlot (o, o_preservedSlotIndex)) {
690+
if (not AllocateSlot (o, o_preservedSlotIndex))
675691
_throw (m3Err_functionStackOverflow);
676-
}
677-
} else {
678-
o->m3Slots [*o_preservedSlotIndex] += 1;
679-
o->numAllocatedExecSlots++;
680692
}
693+
else
694+
_ (IncrementSlotUsageCount (o, * o_preservedSlotIndex));
681695

682696
o->wasmStack [i] = * o_preservedSlotIndex;
683697
}
@@ -842,7 +856,7 @@ _ (ReadLEB_u32 (& localSlot, & o->wasm, o->wasmEnd)); // printf (
842856
if (localSlot < GetFunctionNumArgsAndLocals (o->function))
843857
{
844858
u16 preserveSlot;
845-
_ (IsLocalReferencedWithCurrentBlock (o, & preserveSlot, localSlot)); // preserve will be different than local, if referenced
859+
_ (FindReferencedLocalsWithCurrentBlock (o, & preserveSlot, localSlot)); // preserve will be different than local, if referenced
846860

847861
if (preserveSlot == localSlot)
848862
_ (CopyTopSlot (o, localSlot))
@@ -1130,9 +1144,7 @@ _ (Pop (o));
11301144

11311145
if (numReturns)
11321146
{
1133-
o->m3Slots [execTop] = 1;
1134-
o->numAllocatedExecSlots++;
1135-
1147+
MarkSlotAllocated (o, execTop);
11361148
_ (Push (o, i_type->returnType, execTop));
11371149
}
11381150

@@ -1266,62 +1278,33 @@ _ (NormalizeType (o_blockType, type)); if (* o_
12661278
// (versus the COW strategy that happens in SetLocal within a block). Initially, I thought I'd have to be clever and
12671279
// retroactively insert preservation code to avoid impacting general performance, but this compilation pattern doesn't
12681280
// really occur in compiled Wasm code, so PreserveArgsAndLocals generally does nothing. Still waiting on a real-world case!
1269-
M3Result PreserveArgsAndLocals (IM3Compilation o) {
1281+
M3Result PreserveArgsAndLocals (IM3Compilation o)
1282+
{
12701283
M3Result result = m3Err_none;
12711284

1272-
if (o->block.initStackIndex >= o->stackIndex) // return if block stack is empty.
1273-
return result;
1274-
1275-
bool needed = false;
1276-
u32 numArgsAndLocals = GetFunctionNumArgsAndLocals (o->function);
1277-
1278-
for (u32 i = o->block.initStackIndex; i < o->stackIndex; ++i)
1279-
{
1280-
if (o->wasmStack [i] < numArgsAndLocals)
1281-
{
1282-
needed = true;
1283-
break;
1284-
}
1285-
}
1286-
1287-
if (!needed) // return if no references to locals.
1288-
return result;
1289-
1290-
#if defined(M3_COMPILER_MSVC)
1291-
u16 preservedStackIndex [128]; // hmm, heap allocate?...
1292-
1293-
if (numArgsAndLocals > 128)
1294-
_throw ("argument/local count overflow");
1295-
#else
1296-
u16 preservedStackIndex [numArgsAndLocals];
1297-
#endif
1298-
1299-
memset (preservedStackIndex, 0xff, numArgsAndLocals * sizeof (u16));
1300-
1301-
for (u32 i = o->block.initStackIndex; i < o->stackIndex; ++i)
1285+
if (o->stackIndex > o->firstSlotIndex)
13021286
{
1303-
if (o->wasmStack [i] < numArgsAndLocals)
1287+
u32 numArgsAndLocals = GetFunctionNumArgsAndLocals (o->function);
1288+
1289+
for (u32 i = 0; i < numArgsAndLocals; ++i)
13041290
{
1305-
u16 localSlot = o->wasmStack [i];
1306-
1307-
if (preservedStackIndex [localSlot] == 0xffff)
1291+
u16 preservedSlotIndex;
1292+
_ (FindReferencedLocalsWithCurrentBlock (o, & preservedSlotIndex, i));
1293+
1294+
if (preservedSlotIndex != i)
13081295
{
1309-
if (not AllocateExecSlot (o, & preservedStackIndex [localSlot]))
1310-
_throw (m3Err_functionStackOverflow);
1311-
1312-
_ (EmitOp (o, op_CopySlot_64));
1313-
EmitConstant (o, preservedStackIndex [localSlot]);
1314-
EmitConstant (o, localSlot);
1315-
} else {
1316-
o->m3Slots [preservedStackIndex [localSlot]] += 1;
1317-
o->numAllocatedExecSlots++;
1296+
u8 type = GetStackType (o, i);
1297+
IM3Operation op = Is64BitType (type) ? op_CopySlot_64 : op_CopySlot_32;
1298+
1299+
EmitOp (o, op);
1300+
EmitSlotOffset (o, preservedSlotIndex);
1301+
EmitSlotOffset (o, i);
13181302
}
1319-
1320-
o->wasmStack [i] = preservedStackIndex [localSlot];
13211303
}
13221304
}
1323-
1324-
_catch: return result;
1305+
1306+
_catch:
1307+
return result;
13251308
}
13261309

13271310

src/m3_compile.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ typedef struct
9494
u16 wasmStack [d_m3MaxFunctionStackHeight];
9595
u8 typeStack [d_m3MaxFunctionStackHeight];
9696

97-
// OPTZ: this array just contains single bit allocation flags. could be fused with the typeStack to conserve space
97+
// 'm3Slots' contains allocation usage counts
9898
u8 m3Slots [d_m3MaxFunctionStackHeight];
9999

100100
u16 numAllocatedExecSlots;

src/m3_exec.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ d_m3OpDef (CallIndirect)
7070
{
7171
// TODO: this can eventually be simplified. by using a shared set of unique M3FuncType objects in
7272
// M3Environment, the compare can be reduced to a single pointer-compare operation
73-
73+
#if !defined(d_m3SkipCallCheck)
7474
if (type->numArgs != function->funcType->numArgs)
7575
{
7676
return m3Err_trapIndirectCallTypeMismatch;
@@ -88,7 +88,7 @@ d_m3OpDef (CallIndirect)
8888
return m3Err_trapIndirectCallTypeMismatch;
8989
}
9090
}
91-
91+
#endif
9292
if (not function->compiled)
9393
r = Compile_Function (function);
9494

0 commit comments

Comments
 (0)