Skip to content

Commit 6a047a9

Browse files
authored
[wasm] More jiterpreter cleanup (#78519)
Move more jiterpreter configuration and constants into options Fix should_generate_trace_here not scanning across multiple basic blocks Disable specialized JIT call in threaded wasm mode (though I think it might work, it's better to turn it off to be sure for now) Introduces genmintops.py, a script that automatically generates mintops.ts from mintops.def Adjust typescript config to make it able to find the generated mintops.ts (and fix ESLint on Linux) Unroll memsets below a certain size into raw wasm opcodes, because v8 generates expensive function calls for memset and memcpy. Unrolling memcpy is a TODO for later Rename "always generate" to "disable heuristic" to more accurately describe what it does Fix jiterpreter_dump_stats hiding errors if startup failed before cwraps were ready Misc. code cleanup
1 parent 38bcdd2 commit 6a047a9

18 files changed

+437
-1879
lines changed

src/mono/mono/mini/interp/interp.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2147,7 +2147,7 @@ typedef struct {
21472147
gpointer *many_args;
21482148
} InterpEntryData;
21492149

2150-
static gboolean
2150+
static MONO_ALWAYS_INLINE gboolean
21512151
is_method_multicastdelegate_invoke (MonoMethod *method)
21522152
{
21532153
return m_class_get_parent (method->klass) == mono_defaults.multicastdelegate_class && !strcmp (method->name, "Invoke");
@@ -2672,16 +2672,17 @@ do_jit_call (ThreadContext *context, stackval *ret_sp, stackval *sp, InterpFrame
26722672
goto epilogue;
26732673
} else {
26742674
int count = cinfo->hit_count;
2675-
if (count == JITERPRETER_JIT_CALL_TRAMPOLINE_HIT_COUNT) {
2675+
if (count == mono_opt_jiterpreter_jit_call_trampoline_hit_count) {
26762676
void *fn = cinfo->no_wrapper ? cinfo->addr : cinfo->wrapper;
26772677
mono_interp_jit_wasm_jit_call_trampoline (
26782678
rmethod, cinfo, fn, rmethod->hasthis, rmethod->param_count,
26792679
rmethod->arg_offsets, mono_aot_mode == MONO_AOT_MODE_LLVMONLY_INTERP
26802680
);
26812681
} else {
2682-
if (count <= JITERPRETER_JIT_CALL_QUEUE_FLUSH_THRESHOLD)
2682+
int excess = count - mono_opt_jiterpreter_jit_call_queue_flush_threshold;
2683+
if (excess <= 0)
26832684
cinfo->hit_count++;
2684-
if (count == JITERPRETER_JIT_CALL_QUEUE_FLUSH_THRESHOLD)
2685+
if (excess == 0)
26852686
mono_interp_flush_jitcall_queue ();
26862687
}
26872688
}

src/mono/mono/mini/interp/jiterpreter.c

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -840,32 +840,35 @@ jiterp_should_abort_trace (InterpInst *ins, gboolean *inside_branch_block)
840840
}
841841

842842
static gboolean
843-
should_generate_trace_here (InterpBasicBlock *bb, InterpInst *last_ins) {
843+
should_generate_trace_here (InterpBasicBlock *bb) {
844844
int current_trace_length = 0;
845845
// A preceding trace may have been in a branch block, but we only care whether the current
846846
// trace will have a branch block opened, because that determines whether calls and branches
847847
// will unconditionally abort the trace or not.
848848
gboolean inside_branch_block = FALSE;
849849

850-
// We scan forward through the entire method body starting from the current block, not just
851-
// the current block (since the actual trace compiler doesn't know about block boundaries).
852-
for (InterpInst *ins = bb->first_ins; (ins != NULL) && (ins != last_ins); ins = ins->next) {
853-
int category = jiterp_should_abort_trace(ins, &inside_branch_block);
854-
switch (category) {
855-
case TRACE_ABORT: {
856-
jiterpreter_abort_counts[ins->opcode]++;
857-
return current_trace_length >= mono_opt_jiterpreter_minimum_trace_length;
850+
while (bb) {
851+
// We scan forward through the entire method body starting from the current block, not just
852+
// the current block (since the actual trace compiler doesn't know about block boundaries).
853+
for (InterpInst *ins = bb->first_ins; ins != NULL; ins = ins->next) {
854+
int category = jiterp_should_abort_trace(ins, &inside_branch_block);
855+
switch (category) {
856+
case TRACE_ABORT:
857+
jiterpreter_abort_counts[ins->opcode]++;
858+
return current_trace_length >= mono_opt_jiterpreter_minimum_trace_length;
859+
case TRACE_IGNORE:
860+
break;
861+
default:
862+
current_trace_length++;
863+
break;
858864
}
859-
case TRACE_IGNORE:
860-
break;
861-
default:
862-
current_trace_length++;
863-
break;
865+
866+
// Once we know the trace is long enough we can stop scanning.
867+
if (current_trace_length >= mono_opt_jiterpreter_minimum_trace_length)
868+
return TRUE;
864869
}
865870

866-
// Once we know the trace is long enough we can stop scanning.
867-
if (current_trace_length >= mono_opt_jiterpreter_minimum_trace_length)
868-
return TRUE;
871+
bb = bb->next_bb;
869872
}
870873

871874
return FALSE;
@@ -908,12 +911,12 @@ jiterp_insert_entry_points (void *_td)
908911
// multiple times and waste some work. At present this is unavoidable because
909912
// control flow means we can end up with two traces covering different subsets
910913
// of the same method in order to handle loops and resuming
911-
gboolean should_generate = enabled && should_generate_trace_here(bb, td->last_ins);
914+
gboolean should_generate = enabled && should_generate_trace_here(bb);
912915

913916
if (mono_opt_jiterpreter_call_resume_enabled && bb->contains_call_instruction)
914917
enter_at_next = TRUE;
915918

916-
if (mono_opt_jiterpreter_always_generate)
919+
if (mono_opt_jiterpreter_disable_heuristic)
917920
should_generate = TRUE;
918921

919922
if (enabled && should_generate) {

src/mono/mono/mini/interp/jiterpreter.h

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,15 @@
33

44
#ifdef HOST_BROWSER
55

6+
#ifdef DISABLE_THREADS
7+
#define JITERPRETER_ENABLE_JIT_CALL_TRAMPOLINES 1
68
// enables specialized mono_llvm_cpp_catch_exception replacement (see jiterpreter-jit-call.ts)
79
// works even if the jiterpreter is otherwise disabled.
810
#define JITERPRETER_ENABLE_SPECIALIZED_JIT_CALL 1
11+
#else
12+
#define JITERPRETER_ENABLE_JIT_CALL_TRAMPOLINES 0
13+
#define JITERPRETER_ENABLE_SPECIALIZED_JIT_CALL 0
14+
#endif // DISABLE_THREADS
915

1016
// mono_interp_tier_prepare_jiterpreter will return these special values if it doesn't
1117
// have a function pointer for a specific entry point.
@@ -14,13 +20,6 @@
1420
// NOT_JITTED indicates that the trace was not jitted and it should be turned into a NOP
1521
#define JITERPRETER_NOT_JITTED 1
1622

17-
#define JITERPRETER_ENABLE_JIT_CALL_TRAMPOLINES 1
18-
// After a do_jit_call call site is hit this many times, we will queue it to be jitted
19-
#define JITERPRETER_JIT_CALL_TRAMPOLINE_HIT_COUNT 2999
20-
// If a do_jit_call site is hit this many times without being jitted (due to waiting in
21-
// the queue), we will flush the queue immediately
22-
#define JITERPRETER_JIT_CALL_QUEUE_FLUSH_THRESHOLD 10000
23-
2423
typedef const ptrdiff_t (*JiterpreterThunk) (void *frame, void *pLocals);
2524
typedef void (*WasmJitCallThunk) (void *extra_arg, void *ret_sp, void *sp, gboolean *thrown);
2625
typedef void (*WasmDoJitCall) (gpointer cb, gpointer arg, gboolean *out_thrown);

src/mono/mono/mini/interp/mintops.def

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@
88
* optype describes the contents of the instruction, following the dreg/sreg offsets.
99
*/
1010

11+
/*
12+
* This file is parsed by genmintops.py to generate typescript during the wasm build process,
13+
* so if you make any changes to its syntax you will need to update that script.
14+
*/
15+
1116
OPDEF(MINT_NOP, "nop", 1, 0, 0, MintOpNoArgs)
1217
OPDEF(MINT_NIY, "niy", 1, 0, 0, MintOpNoArgs)
1318
OPDEF(MINT_DEF, "def", 2, 1, 0, MintOpNoArgs)

src/mono/mono/mini/interp/transform.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10071,7 +10071,8 @@ generate (MonoMethod *method, MonoMethodHeader *header, InterpMethod *rtm, MonoG
1007110071
interp_optimize_code (td);
1007210072
interp_alloc_offsets (td);
1007310073
#if HOST_BROWSER
10074-
jiterp_insert_entry_points (td);
10074+
if (mono_interp_tiering_enabled ())
10075+
jiterp_insert_entry_points (td);
1007510076
#endif
1007610077
}
1007710078

src/mono/mono/utils/options-def.h

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -65,21 +65,21 @@ DEFINE_BOOL(aot_lazy_assembly_load, "aot-lazy-assembly-load", FALSE, "Load assem
6565

6666
// the jiterpreter is not yet thread safe due to the need to synchronize function pointers
6767
// and wasm modules between threads. before these can be enabled we need to implement all that
68-
#if FEATURE_WASM_THREADS
68+
#ifdef DISABLE_THREADS
6969
// traces_enabled controls whether the jiterpreter will JIT individual interpreter opcode traces
70-
DEFINE_BOOL_READONLY(jiterpreter_traces_enabled, "jiterpreter-traces-enabled", FALSE, "JIT interpreter opcode traces into WASM")
70+
DEFINE_BOOL(jiterpreter_traces_enabled, "jiterpreter-traces-enabled", FALSE, "JIT interpreter opcode traces into WASM")
7171
// interp_entry_enabled controls whether specialized interp_entry wrappers will be jitted
72-
DEFINE_BOOL_READONLY(jiterpreter_interp_entry_enabled, "jiterpreter-interp-entry-enabled", FALSE, "JIT specialized WASM interp_entry wrappers")
72+
DEFINE_BOOL(jiterpreter_interp_entry_enabled, "jiterpreter-interp-entry-enabled", FALSE, "JIT specialized WASM interp_entry wrappers")
7373
// jit_call_enabled controls whether do_jit_call will use specialized trampolines for hot call sites
74-
DEFINE_BOOL_READONLY(jiterpreter_jit_call_enabled, "jiterpreter-jit-call-enabled", FALSE, "JIT specialized WASM do_jit_call trampolines")
74+
DEFINE_BOOL(jiterpreter_jit_call_enabled, "jiterpreter-jit-call-enabled", FALSE, "JIT specialized WASM do_jit_call trampolines")
7575
#else
7676
// traces_enabled controls whether the jiterpreter will JIT individual interpreter opcode traces
77-
DEFINE_BOOL(jiterpreter_traces_enabled, "jiterpreter-traces-enabled", FALSE, "JIT interpreter opcode traces into WASM")
77+
DEFINE_BOOL_READONLY(jiterpreter_traces_enabled, "jiterpreter-traces-enabled", FALSE, "JIT interpreter opcode traces into WASM")
7878
// interp_entry_enabled controls whether specialized interp_entry wrappers will be jitted
79-
DEFINE_BOOL(jiterpreter_interp_entry_enabled, "jiterpreter-interp-entry-enabled", FALSE, "JIT specialized WASM interp_entry wrappers")
79+
DEFINE_BOOL_READONLY(jiterpreter_interp_entry_enabled, "jiterpreter-interp-entry-enabled", FALSE, "JIT specialized WASM interp_entry wrappers")
8080
// jit_call_enabled controls whether do_jit_call will use specialized trampolines for hot call sites
81-
DEFINE_BOOL(jiterpreter_jit_call_enabled, "jiterpreter-jit-call-enabled", FALSE, "JIT specialized WASM do_jit_call trampolines")
82-
#endif // FEATURE_WASM_THREADS
81+
DEFINE_BOOL_READONLY(jiterpreter_jit_call_enabled, "jiterpreter-jit-call-enabled", FALSE, "JIT specialized WASM do_jit_call trampolines")
82+
#endif // DISABLE_THREADS
8383

8484
// enables using WASM try/catch_all instructions where appropriate (currently only do_jit_call),
8585
// will be automatically turned off if the instructions are not available.
@@ -93,16 +93,24 @@ DEFINE_BOOL(jiterpreter_call_resume_enabled, "jiterpreter-call-resume-enabled",
9393
// For locations where the jiterpreter heuristic says we will be unable to generate
9494
// a trace, insert an entry point opcode anyway. This enables collecting accurate
9595
// stats for options like estimateHeat, but raises overhead.
96-
DEFINE_BOOL(jiterpreter_always_generate, "jiterpreter-always-generate", FALSE, "Always insert trace entry points for more accurate statistics")
96+
DEFINE_BOOL(jiterpreter_disable_heuristic, "jiterpreter-disable-heuristic", FALSE, "Always insert trace entry points for more accurate statistics")
9797
// Automatically prints stats at app exit or when jiterpreter_dump_stats is called
9898
DEFINE_BOOL(jiterpreter_stats_enabled, "jiterpreter-stats-enabled", FALSE, "Automatically print jiterpreter statistics")
9999
// Continue counting hits for traces that fail to compile and use it to estimate
100100
// the relative importance of the opcode that caused them to abort
101101
DEFINE_BOOL(jiterpreter_estimate_heat, "jiterpreter-estimate-heat", FALSE, "Maintain accurate hit count for all trace entry points")
102102
// Count the number of times a trace bails out (branch taken, etc) and for what reason
103103
DEFINE_BOOL(jiterpreter_count_bailouts, "jiterpreter-count-bailouts", FALSE, "Maintain accurate count of all trace bailouts based on cause")
104+
// Dump the wasm blob for all compiled traces
105+
DEFINE_BOOL(jiterpreter_dump_traces, "jiterpreter-dump-traces", FALSE, "Dump the wasm blob for all compiled traces to the console")
104106
// any trace that doesn't have at least this many meaningful (non-nop) opcodes in it will be rejected
105107
DEFINE_INT(jiterpreter_minimum_trace_length, "jiterpreter-minimum-trace-length", 8, "Reject traces shorter than this number of meaningful opcodes")
108+
// once a trace entry point is inserted, we only actually JIT code for it once it's been hit this many times
109+
DEFINE_INT(jiterpreter_minimum_trace_hit_count, "jiterpreter-minimum-trace-hit-count", 10000, "JIT trace entry points once they are hit this many times")
110+
// After a do_jit_call call site is hit this many times, we will queue it to be jitted
111+
DEFINE_INT(jiterpreter_jit_call_trampoline_hit_count, "jiterpreter-jit-call-hit-count", 3000, "Queue specialized do_jit_call trampoline for JIT after this many hits")
112+
// After a do_jit_call call site is hit this many times without being jitted, we will flush the JIT queue
113+
DEFINE_INT(jiterpreter_jit_call_queue_flush_threshold, "jiterpreter-jit-call-queue-flush-threshold", 10000, "Flush the do_jit_call JIT queue after an unJITted call site has this many hits")
106114
#endif // HOST_BROWSER
107115

108116
/* Cleanup */

src/mono/mono/utils/options.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ mono_options_print_usage (void)
9090
static GHashTable *_option_hash = NULL;
9191

9292
static GHashTable *
93-
get_option_hash ()
93+
get_option_hash (void)
9494
{
9595
GHashTable *result;
9696

0 commit comments

Comments
 (0)