@@ -151,14 +151,14 @@ bool IsStackTopMinus1InRegister (IM3Compilation o)
151
151
}
152
152
153
153
154
- void MarkExecSlotAllocated (IM3Compilation o , u16 i_slot )
154
+ void MarkSlotAllocated (IM3Compilation o , u16 i_slot )
155
155
{ d_m3Assert (o -> m3Slots [i_slot ] == 0 ); // shouldn't be already allocated
156
156
o -> m3Slots [i_slot ] = 1 ;
157
157
o -> numAllocatedExecSlots ++ ;
158
158
}
159
159
160
160
161
- bool AllocateExecSlot (IM3Compilation o , u16 * o_execSlot )
161
+ bool AllocateSlot (IM3Compilation o , u16 * o_execSlot )
162
162
{
163
163
bool found = false;
164
164
@@ -168,7 +168,7 @@ bool AllocateExecSlot (IM3Compilation o, u16 * o_execSlot)
168
168
{
169
169
if (o -> m3Slots [i ] == 0 )
170
170
{
171
- MarkExecSlotAllocated (o , i );
171
+ MarkSlotAllocated (o , i );
172
172
* o_execSlot = i ;
173
173
174
174
found = true;
@@ -177,16 +177,32 @@ bool AllocateExecSlot (IM3Compilation o, u16 * o_execSlot)
177
177
178
178
++ i ;
179
179
}
180
- // printf ("allocate %d\n", (i32) i);
181
180
182
181
return found ;
183
182
}
184
183
185
184
185
+ M3Result IncrementSlotUsageCount (IM3Compilation o , u16 i_slot )
186
+ { d_m3Assert (i_slot < d_m3MaxFunctionStackHeight );
187
+ M3Result result = m3Err_none ; d_m3Assert (o -> m3Slots [i_slot ] > 0 );
188
+
189
+ // OPTZ (memory): 'm3Slots' could still be fused with 'typeStack' if 4 bits were used to indicate: [0,1,2,many]. The many-case
190
+ // would scan 'wasmStack' to determine the actual usage count
191
+ if (o -> m3Slots [i_slot ] < 0xFF )
192
+ {
193
+ o -> m3Slots [i_slot ]++ ;
194
+ }
195
+ else result = "slot usage count overflow" ;
196
+
197
+ return result ;
198
+ }
199
+
200
+
186
201
void DeallocateSlot (IM3Compilation o , i16 i_slotIndex )
187
202
{ d_m3Assert (i_slotIndex >= o -> firstSlotIndex );
188
- o -> numAllocatedExecSlots -- ; d_m3Assert (o -> m3Slots [i_slotIndex ]);
189
- o -> m3Slots [i_slotIndex ] -- ;
203
+ d_m3Assert (o -> m3Slots [i_slotIndex ]);
204
+ if (-- o -> m3Slots [i_slotIndex ] == 0 )
205
+ o -> numAllocatedExecSlots -- ;
190
206
}
191
207
192
208
@@ -259,7 +275,7 @@ M3Result PreserveRegisterIfOccupied (IM3Compilation o, u8 i_registerType)
259
275
260
276
// and point to a exec slot
261
277
u16 slot ;
262
- if (AllocateExecSlot (o , & slot ))
278
+ if (AllocateSlot (o , & slot ))
263
279
{
264
280
o -> wasmStack [stackIndex ] = slot ;
265
281
@@ -408,7 +424,7 @@ M3Result _PushAllocatedSlotAndEmit (IM3Compilation o, u8 i_m3Type, bool i_doEm
408
424
409
425
u16 slot ;
410
426
411
- if (AllocateExecSlot (o , & slot ))
427
+ if (AllocateSlot (o , & slot ))
412
428
{
413
429
_ (Push (o , i_m3Type , slot ));
414
430
@@ -647,7 +663,7 @@ M3Result ReturnStackTop (IM3Compilation o)
647
663
648
664
649
665
// if local is unreferenced, o_preservedSlotIndex will be equal to localIndex on return
650
- M3Result IsLocalReferencedWithCurrentBlock (IM3Compilation o , u16 * o_preservedSlotIndex , u32 i_localIndex )
666
+ M3Result FindReferencedLocalsWithCurrentBlock (IM3Compilation o , u16 * o_preservedSlotIndex , u32 i_localIndex )
651
667
{
652
668
M3Result result = m3Err_none ;
653
669
@@ -671,13 +687,11 @@ M3Result IsLocalReferencedWithCurrentBlock (IM3Compilation o, u16 * o_preserve
671
687
{
672
688
if (* o_preservedSlotIndex == i_localIndex )
673
689
{
674
- if (not AllocateExecSlot (o , o_preservedSlotIndex )) {
690
+ if (not AllocateSlot (o , o_preservedSlotIndex ))
675
691
_throw (m3Err_functionStackOverflow );
676
- }
677
- } else {
678
- o -> m3Slots [* o_preservedSlotIndex ] += 1 ;
679
- o -> numAllocatedExecSlots ++ ;
680
692
}
693
+ else
694
+ _ (IncrementSlotUsageCount (o , * o_preservedSlotIndex ));
681
695
682
696
o -> wasmStack [i ] = * o_preservedSlotIndex ;
683
697
}
@@ -842,7 +856,7 @@ _ (ReadLEB_u32 (& localSlot, & o->wasm, o->wasmEnd)); // printf (
842
856
if (localSlot < GetFunctionNumArgsAndLocals (o -> function ))
843
857
{
844
858
u16 preserveSlot ;
845
- _ (IsLocalReferencedWithCurrentBlock (o , & preserveSlot , localSlot )); // preserve will be different than local, if referenced
859
+ _ (FindReferencedLocalsWithCurrentBlock (o , & preserveSlot , localSlot )); // preserve will be different than local, if referenced
846
860
847
861
if (preserveSlot == localSlot )
848
862
_ (CopyTopSlot (o , localSlot ))
@@ -1130,9 +1144,7 @@ _ (Pop (o));
1130
1144
1131
1145
if (numReturns )
1132
1146
{
1133
- o -> m3Slots [execTop ] = 1 ;
1134
- o -> numAllocatedExecSlots ++ ;
1135
-
1147
+ MarkSlotAllocated (o , execTop );
1136
1148
_ (Push (o , i_type -> returnType , execTop ));
1137
1149
}
1138
1150
@@ -1266,62 +1278,33 @@ _ (NormalizeType (o_blockType, type)); if (* o_
1266
1278
// (versus the COW strategy that happens in SetLocal within a block). Initially, I thought I'd have to be clever and
1267
1279
// retroactively insert preservation code to avoid impacting general performance, but this compilation pattern doesn't
1268
1280
// really occur in compiled Wasm code, so PreserveArgsAndLocals generally does nothing. Still waiting on a real-world case!
1269
- M3Result PreserveArgsAndLocals (IM3Compilation o ) {
1281
+ M3Result PreserveArgsAndLocals (IM3Compilation o )
1282
+ {
1270
1283
M3Result result = m3Err_none ;
1271
1284
1272
- if (o -> block .initStackIndex >= o -> stackIndex ) // return if block stack is empty.
1273
- return result ;
1274
-
1275
- bool needed = false;
1276
- u32 numArgsAndLocals = GetFunctionNumArgsAndLocals (o -> function );
1277
-
1278
- for (u32 i = o -> block .initStackIndex ; i < o -> stackIndex ; ++ i )
1279
- {
1280
- if (o -> wasmStack [i ] < numArgsAndLocals )
1281
- {
1282
- needed = true;
1283
- break ;
1284
- }
1285
- }
1286
-
1287
- if (!needed ) // return if no references to locals.
1288
- return result ;
1289
-
1290
- #if defined(M3_COMPILER_MSVC )
1291
- u16 preservedStackIndex [128 ]; // hmm, heap allocate?...
1292
-
1293
- if (numArgsAndLocals > 128 )
1294
- _throw ("argument/local count overflow" );
1295
- #else
1296
- u16 preservedStackIndex [numArgsAndLocals ];
1297
- #endif
1298
-
1299
- memset (preservedStackIndex , 0xff , numArgsAndLocals * sizeof (u16 ));
1300
-
1301
- for (u32 i = o -> block .initStackIndex ; i < o -> stackIndex ; ++ i )
1285
+ if (o -> stackIndex > o -> firstSlotIndex )
1302
1286
{
1303
- if (o -> wasmStack [i ] < numArgsAndLocals )
1287
+ u32 numArgsAndLocals = GetFunctionNumArgsAndLocals (o -> function );
1288
+
1289
+ for (u32 i = 0 ; i < numArgsAndLocals ; ++ i )
1304
1290
{
1305
- u16 localSlot = o -> wasmStack [i ];
1306
-
1307
- if (preservedStackIndex [localSlot ] == 0xffff )
1291
+ u16 preservedSlotIndex ;
1292
+ _ (FindReferencedLocalsWithCurrentBlock (o , & preservedSlotIndex , i ));
1293
+
1294
+ if (preservedSlotIndex != i )
1308
1295
{
1309
- if (not AllocateExecSlot (o , & preservedStackIndex [localSlot ]))
1310
- _throw (m3Err_functionStackOverflow );
1311
-
1312
- _ (EmitOp (o , op_CopySlot_64 ));
1313
- EmitConstant (o , preservedStackIndex [localSlot ]);
1314
- EmitConstant (o , localSlot );
1315
- } else {
1316
- o -> m3Slots [preservedStackIndex [localSlot ]] += 1 ;
1317
- o -> numAllocatedExecSlots ++ ;
1296
+ u8 type = GetStackType (o , i );
1297
+ IM3Operation op = Is64BitType (type ) ? op_CopySlot_64 : op_CopySlot_32 ;
1298
+
1299
+ EmitOp (o , op );
1300
+ EmitSlotOffset (o , preservedSlotIndex );
1301
+ EmitSlotOffset (o , i );
1318
1302
}
1319
-
1320
- o -> wasmStack [i ] = preservedStackIndex [localSlot ];
1321
1303
}
1322
1304
}
1323
-
1324
- _catch : return result ;
1305
+
1306
+ _catch :
1307
+ return result ;
1325
1308
}
1326
1309
1327
1310
0 commit comments