bytecodealliance · jameysharp · Aug 7, 2024 · Apr 24, 2025 · Apr 24, 2025 · Apr 24, 2025
@@ -211,39 +211,41 @@ impl wasmtime_environ::Compiler for Compiler {
 
         let mut func_env = FuncEnvironment::new(self, translation, types, wasm_func_ty);
 
-        // The `stack_limit` global value below is the implementation of stack
-        // overflow checks in Wasmtime.
-        //
-        // The Wasm spec defines that stack overflows will raise a trap, and
-        // there's also an added constraint where as an embedder you frequently
-        // are running host-provided code called from wasm. WebAssembly and
-        // native code currently share the same call stack, so Wasmtime needs to
-        // make sure that host-provided code will have enough call-stack
-        // available to it.
-        //
-        // The way that stack overflow is handled here is by adding a prologue
-        // check to all functions for how much native stack is remaining. The
-        // `VMContext` pointer is the first argument to all functions, and the
-        // first field of this structure is `*const VMStoreContext` and the
-        // first field of that is the stack limit. Note that the stack limit in
-        // this case means "if the stack pointer goes below this, trap". Each
-        // function which consumes stack space or isn't a leaf function starts
-        // off by loading the stack limit, checking it against the stack
-        // pointer, and optionally traps.
-        //
-        // This manual check allows the embedder to give wasm a relatively
-        // precise amount of stack allocation. Using this scheme we reserve a
-        // chunk of stack for wasm code relative from where wasm code was
-        // called. This ensures that native code called by wasm should have
-        // native stack space to run, and the numbers of stack spaces here
-        // should all be configurable for various embeddings.
-        //
-        // Note that this check is independent of each thread's stack guard page
-        // here. If the stack guard page is reached that's still considered an
-        // abort for the whole program since the runtime limits configured by
-        // the embedder should cause wasm to trap before it reaches that
-        // (ensuring the host has enough space as well for its functionality).
-        if !isa.triple().is_pulley() {
+        // If we're not catching traps with signals, insert checks at the
+        // beginnings of functions.
+        if !self.tunables.signals_based_traps && !isa.triple().is_pulley() {
+            // The `stack_limit` global value below is the implementation of stack
+            // overflow checks in Wasmtime.
+            //
+            // The Wasm spec defines that stack overflows will raise a trap, and
+            // there's also an added constraint where as an embedder you frequently
+            // are running host-provided code called from wasm. WebAssembly and
+            // native code currently share the same call stack, so Wasmtime needs to
+            // make sure that host-provided code will have enough call-stack
+            // available to it.
+            //
+            // The way that stack overflow is handled here is by adding a prologue
+            // check to all functions for how much native stack is remaining. The
+            // `VMContext` pointer is the first argument to all functions, and the
+            // first field of this structure is `*const VMStoreContext` and the
+            // first field of that is the stack limit. Note that the stack limit in
+            // this case means "if the stack pointer goes below this, trap". Each
+            // function which consumes stack space or isn't a leaf function starts
+            // off by loading the stack limit, checking it against the stack
+            // pointer, and optionally traps.
+            //
+            // This manual check allows the embedder to give wasm a relatively
+            // precise amount of stack allocation. Using this scheme we reserve a
+            // chunk of stack for wasm code relative from where wasm code was
+            // called. This ensures that native code called by wasm should have
+            // native stack space to run, and the numbers of stack spaces here
+            // should all be configurable for various embeddings.
+            //
+            // Note that this check is independent of each thread's stack guard page
+            // here. If the stack guard page is reached that's still considered an
+            // abort for the whole program since the runtime limits configured by
+            // the embedder should cause wasm to trap before it reaches that
+            // (ensuring the host has enough space as well for its functionality).
             let vmctx = context
                 .func
                 .create_global_value(ir::GlobalValueData::VMContext);
@@ -259,11 +261,7 @@ impl wasmtime_environ::Compiler for Compiler {
                 global_type: isa.pointer_type(),
                 flags: MemFlags::trusted(),
             });
-            if self.tunables.signals_based_traps {
-                context.func.stack_limit = Some(stack_limit);
-            } else {
-                func_env.stack_limit_at_function_entry = Some(stack_limit);
-            }
+            func_env.stack_limit_at_function_entry = Some(stack_limit);
         }
         let FunctionBodyData { validator, body } = input;
         let mut validator =
@@ -402,7 +400,14 @@ impl wasmtime_environ::Compiler for Compiler {
             caller_vmctx,
             i32::from(ptr.vmcontext_store_context()),
         );
-        save_last_wasm_exit_fp_and_pc(&mut builder, pointer_type, &ptr, vm_store_context);
+        save_last_wasm_exit_fp_and_pc(
+            &mut builder,
+            pointer_type,
+            &ptr,
+            vm_store_context,
+            &self.tunables,
+            isa,
+        );
 
         // Spill all wasm arguments to the stack in `ValRaw` slots.
         let (args_base, args_len) =
@@ -609,7 +614,14 @@ impl wasmtime_environ::Compiler for Compiler {
             vmctx,
             ptr_size.vmcontext_store_context(),
         );
-        save_last_wasm_exit_fp_and_pc(&mut builder, pointer_type, &ptr_size, vm_store_context);
+        save_last_wasm_exit_fp_and_pc(
+            &mut builder,
+            pointer_type,
+            &ptr_size,
+            vm_store_context,
+            &self.tunables,
+            isa,
+        );
 
         // Now it's time to delegate to the actual builtin. Forward all our own
         // arguments to the libcall itself.
@@ -1183,7 +1195,41 @@ fn save_last_wasm_exit_fp_and_pc(
     pointer_type: ir::Type,
     ptr: &impl PtrSize,
     limits: Value,
+    tunables: &Tunables,
+    isa: &dyn TargetIsa,
 ) {
+    // If we're catching traps with signals, insert checks into trampolines.
+    if tunables.signals_based_traps && !isa.triple().is_pulley() {
+        // The Wasm spec defines that stack overflows will raise a trap, and
+        // there's also an added constraint where as an embedder you frequently are
+        // running host-provided code called from wasm. WebAssembly and native code
+        // currently share the same call stack, so Wasmtime needs to make sure that
+        // host-provided code will have enough call-stack available to it.
+        //
+        // The first field of `VMStoreContext` is the stack limit. If the stack
+        // pointer is below this limit when we're about to call out of guest code,
+        // trap. But we don't check this limit as long as we stay within guest or
+        // trampoline code. Instead, we rely on the guest hitting a guard page,
+        // which the OS will tell our signal handler about. The following explicit
+        // check on guest exit ensures that native code called by wasm should have
+        // enough stack space to run without hitting a guard page.
+        let trampoline_sp = builder.ins().get_stack_pointer(pointer_type);
+        let stack_limit = builder.ins().load(
+            pointer_type,
+            MemFlags::trusted(),
+            limits,
+            ptr.vmstore_context_stack_limit(),
+        );
+        let is_overflow = builder.ins().icmp(
+            ir::condcodes::IntCC::UnsignedLessThan,
+            trampoline_sp,
+            stack_limit,
+        );
+        builder
+            .ins()
+            .trapnz(is_overflow, ir::TrapCode::STACK_OVERFLOW);
+    }
+
     // Save the exit Wasm FP to the limits. We dereference the current FP to get
     // the previous FP because the current FP is the trampoline's FP, and we
     // want the Wasm function's FP, which is the caller of this trampoline.

@@ -1492,6 +1492,8 @@ impl ComponentCompiler for Compiler {
                     pointer_type,
                     &c.offsets.ptr,
                     vm_store_context,
+                    &self.tunables,
+                    &*self.isa,
                 );
             }
 

@@ -100,12 +100,15 @@ pub(crate) fn from_runtime_box(
         } => {
             let mut err: Error = trap.into();
 
-            // If a fault address was present, for example with segfaults,
-            // then simultaneously assert that it's within a known linear memory
-            // and additionally translate it to a wasm-local address to be added
-            // as context to the error.
-            if let Some(fault) = faulting_addr.and_then(|addr| store.wasm_fault(pc, addr)) {
-                err = err.context(fault);
+            // If a fault address was present, and if it isn't a stack-overflow
+            // guard page fault, for example with segfaults, then simultaneously
+            // assert that it's within a known linear memory and additionally
+            // translate it to a wasm-local address to be added as context to
+            // the error.
+            if trap != Trap::StackOverflow {
+                if let Some(fault) = faulting_addr.and_then(|addr| store.wasm_fault(pc, addr)) {
+                    err = err.context(fault);
+                }
             }
             (err, Some(pc))
         }

@@ -168,6 +168,7 @@ impl InterpreterRef<'_> {
         let regs = TrapRegisters {
             pc: pc.as_ptr() as usize,
             fp: self.0.fp() as usize,
+            sp: self.0.sp() as usize,
         };
         tls::with(|s| {
             let s = s.unwrap();

@@ -307,12 +307,13 @@ unsafe fn handle_exception(request: &mut ExceptionRequest) -> bool {
 
             let thread_state_flavor = x86_THREAD_STATE64;
 
-            let get_pc_and_fp = |state: &ThreadState| (
+            let get_pc_and_fp_and_sp = |state: &ThreadState| (
                 state.__rip as *const u8,
                 state.__rbp as usize,
+                state.__rsp as usize,
             );
 
-            let resume = |state: &mut ThreadState, pc: usize, fp: usize, fault1: usize, fault2: usize, trap: Trap| {
+            let resume = |state: &mut ThreadState, pc: usize, fp: usize, sp: usize, fault1: usize, fault2: usize, trap: Trap| {
                 // The x86_64 ABI requires a 16-byte stack alignment for
                 // functions, so typically we'll be 16-byte aligned. In this
                 // case we simulate a `call` instruction by decrementing the
@@ -339,22 +340,24 @@ unsafe fn handle_exception(request: &mut ExceptionRequest) -> bool {
                 state.__rip = unwind as u64;
                 state.__rdi = pc as u64;
                 state.__rsi = fp as u64;
-                state.__rdx = fault1 as u64;
-                state.__rcx = fault2 as u64;
-                state.__r8 = trap as u64;
+                state.__rdx = sp as u64;
+                state.__rcx = fault1 as u64;
+                state.__r8 = fault2 as u64;
+                state.__r9 = trap as u64;
             };
             let mut thread_state = ThreadState::new();
         } else if #[cfg(target_arch = "aarch64")] {
             type ThreadState = mach2::structs::arm_thread_state64_t;
 
             let thread_state_flavor = ARM_THREAD_STATE64;
 
-            let get_pc_and_fp = |state: &ThreadState| (
+            let get_pc_and_fp_and_sp = |state: &ThreadState| (
                 state.__pc as *const u8,
                 state.__fp as usize,
+                state.__sp as usize,
             );
 
-            let resume = |state: &mut ThreadState, pc: usize, fp: usize, fault1: usize, fault2: usize, trap: Trap| {
+            let resume = |state: &mut ThreadState, pc: usize, fp: usize, sp: usize, fault1: usize, fault2: usize, trap: Trap| {
                 // Clobber LR with the faulting PC, so unwinding resumes at the
                 // faulting instruction. The previous value of LR has been saved
                 // by the callee (in Cranelift generated code), so no need to
@@ -365,9 +368,10 @@ unsafe fn handle_exception(request: &mut ExceptionRequest) -> bool {
                 // it looks like a call to unwind.
                 state.__x[0] = pc as u64;
                 state.__x[1] = fp as u64;
-                state.__x[2] = fault1 as u64;
-                state.__x[3] = fault2 as u64;
-                state.__x[4] = trap as u64;
+                state.__x[2] = sp as u64;
+                state.__x[3] = fault1 as u64;
+                state.__x[4] = fault2 as u64;
+                state.__x[5] = trap as u64;
                 state.__pc = unwind as u64;
             };
             let mut thread_state = mem::zeroed::<ThreadState>();
@@ -403,7 +407,7 @@ unsafe fn handle_exception(request: &mut ExceptionRequest) -> bool {
     // Finally our indirection with a pointer means that we can read the
     // pointer value and if `MAP` changes happen after we read our entry that's
     // ok since they won't invalidate our entry.
-    let (pc, fp) = get_pc_and_fp(&thread_state);
+    let (pc, fp, sp) = get_pc_and_fp_and_sp(&thread_state);
     let Some((code, text_offset)) = lookup_code(pc as usize) else {
         return false;
     };
@@ -420,7 +424,7 @@ unsafe fn handle_exception(request: &mut ExceptionRequest) -> bool {
         None => (0, 0),
         Some(addr) => (1, addr),
     };
-    resume(&mut thread_state, pc as usize, fp, fault1, fault2, trap);
+    resume(&mut thread_state, pc as usize, fp, sp, fault1, fault2, trap);
     let kret = thread_set_state(
         origin_thread,
         thread_state_flavor,
@@ -437,10 +441,17 @@ unsafe fn handle_exception(request: &mut ExceptionRequest) -> bool {
 /// a native backtrace once we've switched back to the thread itself. After
 /// the backtrace is captured we can do the usual `longjmp` back to the source
 /// of the wasm code.
-unsafe extern "C" fn unwind(pc: usize, fp: usize, fault1: usize, fault2: usize, trap: u8) -> ! {
+unsafe extern "C" fn unwind(
+    pc: usize,
+    fp: usize,
+    sp: usize,
+    fault1: usize,
+    fault2: usize,
+    trap: u8,
+) -> ! {
     let jmp_buf = tls::with(|state| {
         let state = state.unwrap();
-        let regs = TrapRegisters { pc, fp };
+        let regs = TrapRegisters { pc, fp, sp };
         let faulting_addr = match fault1 {
             0 => None,
             _ => Some(fault2),

@@ -270,18 +270,21 @@ unsafe fn get_trap_registers(cx: *mut libc::c_void, _signum: libc::c_int) -> Tra
             TrapRegisters {
                 pc: cx.uc_mcontext.gregs[libc::REG_RIP as usize] as usize,
                 fp: cx.uc_mcontext.gregs[libc::REG_RBP as usize] as usize,
+                sp: cx.uc_mcontext.gregs[libc::REG_RSP as usize] as usize,
             }
         } else if #[cfg(all(target_os = "linux", target_arch = "x86"))] {
             let cx = &*(cx as *const libc::ucontext_t);
             TrapRegisters {
                 pc: cx.uc_mcontext.gregs[libc::REG_EIP as usize] as usize,
                 fp: cx.uc_mcontext.gregs[libc::REG_EBP as usize] as usize,
+                sp: cx.uc_mcontext.gregs[libc::REG_ESP as usize] as usize,
             }
         } else if #[cfg(all(any(target_os = "linux", target_os = "android"), target_arch = "aarch64"))] {
             let cx = &*(cx as *const libc::ucontext_t);
             TrapRegisters {
                 pc: cx.uc_mcontext.pc as usize,
                 fp: cx.uc_mcontext.regs[29] as usize,
+                sp: cx.uc_mcontext.sp as usize,
             }
         } else if #[cfg(all(target_os = "linux", target_arch = "s390x"))] {
             // On s390x, SIGILL and SIGFPE are delivered with the PSW address
@@ -302,48 +305,56 @@ unsafe fn get_trap_registers(cx: *mut libc::c_void, _signum: libc::c_int) -> Tra
             TrapRegisters {
                 pc: (cx.uc_mcontext.psw.addr - trap_offset) as usize,
                 fp: *(cx.uc_mcontext.gregs[15] as *const usize),
+                sp: *(cx.uc_mcontext.gregs[15] as *const usize),
             }
         } else if #[cfg(all(target_vendor = "apple", target_arch = "x86_64"))] {
             let cx = &*(cx as *const libc::ucontext_t);
             TrapRegisters {
                 pc: (*cx.uc_mcontext).__ss.__rip as usize,
                 fp: (*cx.uc_mcontext).__ss.__rbp as usize,
+                sp: (*cx.uc_mcontext).__ss.__rsp as usize,
             }
         } else if #[cfg(all(target_vendor = "apple", target_arch = "aarch64"))] {
             let cx = &*(cx as *const libc::ucontext_t);
             TrapRegisters {
                 pc: (*cx.uc_mcontext).__ss.__pc as usize,
                 fp: (*cx.uc_mcontext).__ss.__fp as usize,
+                sp: (*cx.uc_mcontext).__ss.__sp as usize,
             }
         } else if #[cfg(all(target_os = "freebsd", target_arch = "x86_64"))] {
             let cx = &*(cx as *const libc::ucontext_t);
             TrapRegisters {
                 pc: cx.uc_mcontext.mc_rip as usize,
                 fp: cx.uc_mcontext.mc_rbp as usize,
+                sp: cx.uc_mcontext.mc_rsp as usize,
             }
         } else if #[cfg(all(target_os = "linux", target_arch = "riscv64"))] {
             let cx = &*(cx as *const libc::ucontext_t);
             TrapRegisters {
                 pc: cx.uc_mcontext.__gregs[libc::REG_PC] as usize,
                 fp: cx.uc_mcontext.__gregs[libc::REG_S0] as usize,
+                sp: cx.uc_mcontext.__gregs[libc::REG_SP] as usize,
             }
         } else if #[cfg(all(target_os = "freebsd", target_arch = "aarch64"))] {
             let cx = &*(cx as *const libc::mcontext_t);
             TrapRegisters {
                 pc: cx.mc_gpregs.gp_elr as usize,
                 fp: cx.mc_gpregs.gp_x[29] as usize,
+                sp: cx.mc_gpregs.sp as usize,
             }
         } else if #[cfg(all(target_os = "openbsd", target_arch = "x86_64"))] {
             let cx = &*(cx as *const libc::ucontext_t);
             TrapRegisters {
                 pc: cx.sc_rip as usize,
                 fp: cx.sc_rbp as usize,
+                sp: cx.sc_rsp as usize,
             }
         } else if #[cfg(all(target_os = "linux", target_arch = "arm"))] {
             let cx = &*(cx as *const libc::ucontext_t);
             TrapRegisters {
                 pc: cx.uc_mcontext.arm_pc as usize,
                 fp: cx.uc_mcontext.arm_fp as usize,
+                sp: cx.uc_mcontext.arm_sp as usize,
             }
         } else {
             compile_error!("unsupported platform");

@@ -87,16 +87,19 @@ unsafe extern "system" fn exception_handler(exception_info: *mut EXCEPTION_POINT
                 let regs = TrapRegisters {
                     pc: context.Rip as usize,
                     fp: context.Rbp as usize,
+                    sp: context.Rsp as usize,
                 };
             } else if #[cfg(target_arch = "aarch64")] {
                 let regs = TrapRegisters {
                     pc: context.Pc as usize,
                     fp: context.Anonymous.Anonymous.Fp as usize,
+                    sp: context.Sp as usize,
                 };
             } else if #[cfg(target_arch = "x86")] {
                 let regs = TrapRegisters {
                     pc: context.Eip as usize,
                     fp: context.Ebp as usize,
+                    sp: context.Esp as usize,
                 };
             } else {
                 compile_error!("unsupported platform");