diff --git a/Cargo.lock b/Cargo.lock index 3fd232bf39ed..f3ef1c75e5c5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3309,7 +3309,6 @@ version = "0.40.0" dependencies = [ "anyhow", "async-trait", - "backtrace", "bincode", "cfg-if", "indexmap", @@ -3614,7 +3613,6 @@ name = "wasmtime-runtime" version = "0.40.0" dependencies = [ "anyhow", - "backtrace", "cc", "cfg-if", "indexmap", @@ -3623,9 +3621,11 @@ dependencies = [ "mach", "memfd", "memoffset", + "paste", "rand 0.8.5", "rustix", "thiserror", + "wasmtime-asm-macros", "wasmtime-environ", "wasmtime-fiber", "wasmtime-jit-debug", diff --git a/Cargo.toml b/Cargo.toml index dfc8c0b9f03e..d7effe372d7c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -126,9 +126,6 @@ harness = false name = "tokio" required-features = ["wasmtime-wasi/tokio"] -[profile.dev.package.backtrace] -debug = false # FIXME(#1813) - [[bench]] name = "instantiation" harness = false diff --git a/benches/call.rs b/benches/call.rs index 230fb9ce21cc..f1e5790b5deb 100644 --- a/benches/call.rs +++ b/benches/call.rs @@ -368,7 +368,7 @@ fn wasm_to_host(c: &mut Criterion) { desc: &str, is_async: IsAsync, ) { - group.bench_function(&format!("wasm-to-host - nop - {}", desc), |b| { + group.bench_function(&format!("wasm-to-host - {} - nop", desc), |b| { let run = instance .get_typed_func::(&mut *store, "run-nop") .unwrap(); @@ -383,7 +383,7 @@ fn wasm_to_host(c: &mut Criterion) { }) }); group.bench_function( - &format!("wasm-to-host - nop-params-and-results - {}", desc), + &format!("wasm-to-host - {} - nop-params-and-results", desc), |b| { let run = instance .get_typed_func::(&mut *store, "run-nop-params-and-results") diff --git a/crates/asm-macros/src/lib.rs b/crates/asm-macros/src/lib.rs index 6c9061eaf95b..50c0ef848e98 100644 --- a/crates/asm-macros/src/lib.rs +++ b/crates/asm-macros/src/lib.rs @@ -5,12 +5,16 @@ //! attributes correct (e.g. ELF symbols get a size and are flagged as a //! function) and additionally handles visibility across platforms. All symbols //! should be visible to Rust but not visible externally outside of a `*.so`. +//! +//! It also exports a an `asm_sym!` macro which can be used to reference symbols +//! from within `global_asm!`-defined functions, and handles adding the leading +//! underscore that macOS prepends to symbols for you. cfg_if::cfg_if! { if #[cfg(target_os = "macos")] { #[macro_export] macro_rules! asm_func { - ($name:tt, $($body:tt)*) => { + ($name:expr, $($body:tt)*) => { std::arch::global_asm!(concat!( ".p2align 4\n", ".private_extern _", $name, "\n", @@ -23,7 +27,28 @@ cfg_if::cfg_if! { #[macro_export] macro_rules! asm_sym { - ($name:tt) => (concat!("_", $name)) + ( $( $name:tt )* ) => ( concat!("_", $( $name )* ) ) + } + } else if #[cfg(target_os = "windows")] { + #[macro_export] + macro_rules! asm_func { + ($name:expr, $($body:tt)*) => { + std::arch::global_asm!(concat!( + ".def ", $name, "\n", + ".scl 2\n", + ".type 32\n", + ".endef\n", + ".global ", $name, "\n", + ".p2align 4\n", + $name, ":\n", + $($body)* + )); + }; + } + + #[macro_export] + macro_rules! asm_sym { + ( $( $name:tt )* ) => ( $( $name )* ) } } else { // Note that for now this "else" clause just assumes that everything @@ -45,14 +70,14 @@ cfg_if::cfg_if! { #[macro_export] macro_rules! asm_func { - ($name:tt, $($body:tt)*) => { + ($name:expr, $($body:tt)*) => { std::arch::global_asm!(concat!( ".p2align 4\n", ".hidden ", $name, "\n", ".global ", $name, "\n", $crate::elf_func_type_header!($name), $name, ":\n", - $($body)* + concat!($($body)*), ".size ", $name, ",.-", $name, )); }; @@ -60,7 +85,7 @@ cfg_if::cfg_if! { #[macro_export] macro_rules! asm_sym { - ($name:tt) => ($name) + ( $( $name:tt )* ) => ( $( $name )* ) } } } diff --git a/crates/cranelift/src/func_environ.rs b/crates/cranelift/src/func_environ.rs index 2108f3193ca5..0c2239153390 100644 --- a/crates/cranelift/src/func_environ.rs +++ b/crates/cranelift/src/func_environ.rs @@ -30,7 +30,7 @@ macro_rules! declare_function_signatures { ( $( $( #[$attr:meta] )* - $name:ident( $( $param:ident ),* ) -> ( $( $result:ident ),* ); + $name:ident( $( $pname:ident: $param:ident ),* ) $( -> $result:ident )?; )* ) => { /// A struct with an `Option` member for every builtin @@ -94,7 +94,7 @@ macro_rules! declare_function_signatures { let sig = self.$name.unwrap_or_else(|| { func.import_signature(Signature { params: vec![ $( self.$param() ),* ], - returns: vec![ $( self.$result() ),* ], + returns: vec![ $( self.$result() )? ], call_conv: self.call_conv, }) }); @@ -1197,11 +1197,11 @@ impl<'module_environment> cranelift_wasm::FuncEnvironment for FuncEnvironment<'m let builtin_sig = self .builtin_function_signatures .drop_externref(builder.func); - let (_vmctx, builtin_addr) = self + let (vmctx, builtin_addr) = self .translate_load_builtin_function_address(&mut builder.cursor(), builtin_idx); builder .ins() - .call_indirect(builtin_sig, builtin_addr, &[current_elem]); + .call_indirect(builtin_sig, builtin_addr, &[vmctx, current_elem]); builder.ins().jump(continue_block, &[]); builder.switch_to_block(continue_block); diff --git a/crates/environ/src/builtin.rs b/crates/environ/src/builtin.rs index cb3e795826a2..4df985cabd31 100644 --- a/crates/environ/src/builtin.rs +++ b/crates/environ/src/builtin.rs @@ -4,53 +4,53 @@ macro_rules! foreach_builtin_function { ($mac:ident) => { $mac! { /// Returns an index for wasm's `memory.grow` builtin function. - memory32_grow(vmctx, i64, i32) -> (pointer); + memory32_grow(vmctx: vmctx, delta: i64, index: i32) -> pointer; /// Returns an index for wasm's `table.copy` when both tables are locally /// defined. - table_copy(vmctx, i32, i32, i32, i32, i32) -> (); + table_copy(vmctx: vmctx, dst_index: i32, src_index: i32, dst: i32, src: i32, len: i32); /// Returns an index for wasm's `table.init`. - table_init(vmctx, i32, i32, i32, i32, i32) -> (); + table_init(vmctx: vmctx, table: i32, elem: i32, dst: i32, src: i32, len: i32); /// Returns an index for wasm's `elem.drop`. - elem_drop(vmctx, i32) -> (); + elem_drop(vmctx: vmctx, elem: i32); /// Returns an index for wasm's `memory.copy` - memory_copy(vmctx, i32, i64, i32, i64, i64) -> (); + memory_copy(vmctx: vmctx, dst_index: i32, dst: i64, src_index: i32, src: i64, len: i64); /// Returns an index for wasm's `memory.fill` instruction. - memory_fill(vmctx, i32, i64, i32, i64) -> (); + memory_fill(vmctx: vmctx, memory: i32, dst: i64, val: i32, len: i64); /// Returns an index for wasm's `memory.init` instruction. - memory_init(vmctx, i32, i32, i64, i32, i32) -> (); + memory_init(vmctx: vmctx, memory: i32, data: i32, dst: i64, src: i32, len: i32); /// Returns a value for wasm's `ref.func` instruction. - ref_func(vmctx, i32) -> (pointer); + ref_func(vmctx: vmctx, func: i32) -> pointer; /// Returns an index for wasm's `data.drop` instruction. - data_drop(vmctx, i32) -> (); + data_drop(vmctx: vmctx, data: i32); /// Returns a table entry after lazily initializing it. - table_get_lazy_init_funcref(vmctx, i32, i32) -> (pointer); + table_get_lazy_init_funcref(vmctx: vmctx, table: i32, index: i32) -> pointer; /// Returns an index for Wasm's `table.grow` instruction for `funcref`s. - table_grow_funcref(vmctx, i32, i32, pointer) -> (i32); + table_grow_funcref(vmctx: vmctx, table: i32, delta: i32, init: pointer) -> i32; /// Returns an index for Wasm's `table.grow` instruction for `externref`s. - table_grow_externref(vmctx, i32, i32, reference) -> (i32); + table_grow_externref(vmctx: vmctx, table: i32, delta: i32, init: reference) -> i32; /// Returns an index for Wasm's `table.fill` instruction for `externref`s. - table_fill_externref(vmctx, i32, i32, reference, i32) -> (); + table_fill_externref(vmctx: vmctx, table: i32, dst: i32, val: reference, len: i32); /// Returns an index for Wasm's `table.fill` instruction for `funcref`s. - table_fill_funcref(vmctx, i32, i32, pointer, i32) -> (); + table_fill_funcref(vmctx: vmctx, table: i32, dst: i32, val: pointer, len: i32); /// Returns an index to drop a `VMExternRef`. - drop_externref(pointer) -> (); + drop_externref(vmctx: vmctx, val: pointer); /// Returns an index to do a GC and then insert a `VMExternRef` into the /// `VMExternRefActivationsTable`. - activations_table_insert_with_gc(vmctx, reference) -> (); + activations_table_insert_with_gc(vmctx: vmctx, val: reference); /// Returns an index for Wasm's `global.get` instruction for `externref`s. - externref_global_get(vmctx, i32) -> (reference); + externref_global_get(vmctx: vmctx, global: i32) -> reference; /// Returns an index for Wasm's `global.get` instruction for `externref`s. - externref_global_set(vmctx, i32, reference) -> (); + externref_global_set(vmctx: vmctx, global: i32, val: reference); /// Returns an index for wasm's `memory.atomic.notify` instruction. - memory_atomic_notify(vmctx, i32, pointer, i32) -> (i32); + memory_atomic_notify(vmctx: vmctx, memory: i32, addr: pointer, count: i32) -> i32; /// Returns an index for wasm's `memory.atomic.wait32` instruction. - memory_atomic_wait32(vmctx, i32, pointer, i32, i64) -> (i32); + memory_atomic_wait32(vmctx: vmctx, memory: i32, addr: pointer, expected: i32, timeout: i64) -> i32; /// Returns an index for wasm's `memory.atomic.wait64` instruction. - memory_atomic_wait64(vmctx, i32, pointer, i64, i64) -> (i32); + memory_atomic_wait64(vmctx: vmctx, memory: i32, addr: pointer, expected: i64, timeout: i64) -> i32; /// Invoked when fuel has run out while executing a function. - out_of_gas(vmctx) -> (); + out_of_gas(vmctx: vmctx); /// Invoked when we reach a new epoch. - new_epoch(vmctx) -> (i64); + new_epoch(vmctx: vmctx) -> i64; } }; } @@ -75,7 +75,7 @@ macro_rules! declare_indexes { ( $( $( #[$attr:meta] )* - $name:ident( $( $param:ident ),* ) -> ( $( $result:ident ),* ); + $name:ident( $( $pname:ident: $param:ident ),* ) $( -> $result:ident )?; )* ) => { impl BuiltinFunctionIndex { diff --git a/crates/environ/src/vmoffsets.rs b/crates/environ/src/vmoffsets.rs index 9f2403b74940..09d3783e9e03 100644 --- a/crates/environ/src/vmoffsets.rs +++ b/crates/environ/src/vmoffsets.rs @@ -5,7 +5,9 @@ // // struct VMContext { // magic: u32, +// _padding: u32, // (On 64-bit systems) // runtime_limits: *const VMRuntimeLimits, +// callee: *mut VMFunctionBody, // externref_activations_table: *mut VMExternRefActivationsTable, // store: *mut dyn Store, // builtins: *mut VMBuiltinFunctionsArray, @@ -78,6 +80,7 @@ pub struct VMOffsets

{ // precalculated offsets of various member fields magic: u32, runtime_limits: u32, + callee: u32, epoch_ptr: u32, externref_activations_table: u32, store: u32, @@ -269,6 +272,7 @@ impl VMOffsets

{ store: "jit store state", externref_activations_table: "jit host externref state", epoch_ptr: "jit current epoch state", + callee: "callee function pointer", runtime_limits: "jit runtime limits state", magic: "magic value", } @@ -290,6 +294,7 @@ impl From> for VMOffsets

{ num_escaped_funcs: fields.num_escaped_funcs, magic: 0, runtime_limits: 0, + callee: 0, epoch_ptr: 0, externref_activations_table: 0, store: 0, @@ -340,6 +345,7 @@ impl From> for VMOffsets

{ size(magic) = 4u32, align(u32::from(ret.ptr.size())), size(runtime_limits) = ret.ptr.size(), + size(callee) = ret.ptr.size(), size(epoch_ptr) = ret.ptr.size(), size(externref_activations_table) = ret.ptr.size(), size(store) = ret.ptr.size() * 2, @@ -556,7 +562,22 @@ impl VMOffsets

{ /// Return the offset of the `epoch_deadline` field of `VMRuntimeLimits` #[inline] pub fn vmruntime_limits_epoch_deadline(&self) -> u8 { - self.pointer_size() + 8 // `stack_limit` is a pointer; `fuel_consumed` is an `i64` + self.vmruntime_limits_fuel_consumed() + 8 // `stack_limit` is a pointer; `fuel_consumed` is an `i64` + } + + /// Return the offset of the `last_wasm_exit_fp` field of `VMRuntimeLimits`. + pub fn vmruntime_limits_last_wasm_exit_fp(&self) -> u8 { + self.vmruntime_limits_epoch_deadline() + 8 + } + + /// Return the offset of the `last_wasm_exit_pc` field of `VMRuntimeLimits`. + pub fn vmruntime_limits_last_wasm_exit_pc(&self) -> u8 { + self.vmruntime_limits_last_wasm_exit_fp() + self.pointer_size() + } + + /// Return the offset of the `last_enty_sp` field of `VMRuntimeLimits`. + pub fn vmruntime_limits_last_wasm_entry_sp(&self) -> u8 { + self.vmruntime_limits_last_wasm_exit_pc() + self.pointer_size() } } @@ -574,6 +595,11 @@ impl VMOffsets

{ self.runtime_limits } + /// Return the offset to the `callee` member in this `VMContext`. + pub fn vmctx_callee(&self) -> u32 { + self.callee + } + /// Return the offset to the `*const AtomicU64` epoch-counter /// pointer. #[inline] @@ -824,6 +850,12 @@ impl VMOffsets

{ } } +/// Equivalent of `VMCONTEXT_MAGIC` except for host functions. +/// +/// This is stored at the start of all `VMHostFuncContext` structures and +/// double-checked on `VMHostFuncContext::from_opaque`. +pub const VM_HOST_FUNC_MAGIC: u32 = u32::from_le_bytes(*b"host"); + #[cfg(test)] mod tests { use crate::vmoffsets::align; diff --git a/crates/fuzzing/src/generators.rs b/crates/fuzzing/src/generators.rs index c0b9c9caf68e..83492b2fda77 100644 --- a/crates/fuzzing/src/generators.rs +++ b/crates/fuzzing/src/generators.rs @@ -17,6 +17,7 @@ mod memory; mod module_config; mod single_inst_module; mod spec_test; +mod stacks; pub mod table_ops; pub use codegen_settings::CodegenSettings; @@ -27,3 +28,4 @@ pub use memory::{MemoryConfig, NormalMemoryConfig, UnalignedMemory, UnalignedMem pub use module_config::ModuleConfig; pub use single_inst_module::SingleInstModule; pub use spec_test::SpecTest; +pub use stacks::Stacks; diff --git a/crates/fuzzing/src/generators/stacks.rs b/crates/fuzzing/src/generators/stacks.rs new file mode 100644 index 000000000000..de0bd94e9f84 --- /dev/null +++ b/crates/fuzzing/src/generators/stacks.rs @@ -0,0 +1,367 @@ +//! Generate a Wasm program that keeps track of its current stack frames. +//! +//! We can then compare the stack trace we observe in Wasmtime to what the Wasm +//! program believes its stack should be. Any discrepencies between the two +//! points to a bug in either this test case generator or Wasmtime's stack +//! walker. + +use std::mem; + +use arbitrary::{Arbitrary, Result, Unstructured}; +use wasm_encoder::Instruction; + +const MAX_FUNCS: usize = 20; + +/// Generate a Wasm module that keeps track of its current call stack, to +/// compare to the host. +#[derive(Debug)] +pub struct Stacks { + funcs: Vec, + inputs: Vec, +} + +#[derive(Debug, Default)] +struct Function { + ops: Vec, +} + +#[derive(Arbitrary, Debug, Clone, Copy)] +enum Op { + CheckStackInHost, + Call(u32), + CallThroughHost(u32), +} + +impl<'a> Arbitrary<'a> for Stacks { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + let funcs = Self::arbitrary_funcs(u)?; + let n = u.len(); + let inputs = u.bytes(n)?.to_vec(); + Ok(Stacks { funcs, inputs }) + } +} + +impl Stacks { + fn arbitrary_funcs(u: &mut Unstructured) -> Result> { + let mut funcs = vec![Function::default()]; + + // The indices of functions within `funcs` that we still need to + // generate. + let mut work_list = vec![0]; + + while let Some(f) = work_list.pop() { + let mut ops = u.arbitrary::>()?; + for op in &mut ops { + match op { + Op::CallThroughHost(idx) | Op::Call(idx) => { + if u.is_empty() || funcs.len() >= MAX_FUNCS || u.ratio(4, 5)? { + // Call an existing function. + *idx = *idx % u32::try_from(funcs.len()).unwrap(); + } else { + // Call a new function... + *idx = u32::try_from(funcs.len()).unwrap(); + // ...which means we also need to eventually define it. + work_list.push(funcs.len()); + funcs.push(Function::default()); + } + } + Op::CheckStackInHost => {} + } + } + funcs[f].ops = ops; + } + + Ok(funcs) + } + + /// Get the input values to run the Wasm module with. + pub fn inputs(&self) -> &[u8] { + &self.inputs + } + + /// Get this test case's Wasm module. + /// + /// The Wasm module has the following imports: + /// + /// * `host.check_stack: [] -> []`: The host can check the Wasm's + /// understanding of its own stack against the host's understanding of the + /// Wasm stack to find discrepency bugs. + /// + /// * `host.call_func: [funcref] -> []`: The host should call the given + /// `funcref`, creating a call stack with multiple sequences of contiguous + /// Wasm frames on the stack like `[..., wasm, host, wasm]`. + /// + /// The Wasm module has the following exports: + /// + /// * `run: [i32] -> []`: This function should be called with each of the + /// input values to run this generated test case. + /// + /// * `get_stack: [] -> [i32 i32]`: Get the pointer and length of the `u32` + /// array of this Wasm's understanding of its stack. This is useful for + /// checking whether the host's view of the stack at a trap matches the + /// Wasm program's understanding. + pub fn wasm(&self) -> Vec { + let mut module = wasm_encoder::Module::new(); + + let mut types = wasm_encoder::TypeSection::new(); + + let run_type = types.len(); + types.function(vec![wasm_encoder::ValType::I32], vec![]); + + let get_stack_type = types.len(); + types.function( + vec![], + vec![wasm_encoder::ValType::I32, wasm_encoder::ValType::I32], + ); + + let null_type = types.len(); + types.function(vec![], vec![]); + + let call_func_type = types.len(); + types.function(vec![wasm_encoder::ValType::FuncRef], vec![]); + + section(&mut module, types); + + let mut imports = wasm_encoder::ImportSection::new(); + let check_stack_func = 0; + imports.import( + "host", + "check_stack", + wasm_encoder::EntityType::Function(null_type), + ); + let call_func_func = 1; + imports.import( + "host", + "call_func", + wasm_encoder::EntityType::Function(call_func_type), + ); + let num_imported_funcs = 2; + section(&mut module, imports); + + let mut funcs = wasm_encoder::FunctionSection::new(); + for _ in &self.funcs { + funcs.function(null_type); + } + let run_func = funcs.len() + num_imported_funcs; + funcs.function(run_type); + let get_stack_func = funcs.len() + num_imported_funcs; + funcs.function(get_stack_type); + section(&mut module, funcs); + + let mut mems = wasm_encoder::MemorySection::new(); + let memory = mems.len(); + mems.memory(wasm_encoder::MemoryType { + minimum: 1, + maximum: Some(1), + memory64: false, + shared: false, + }); + section(&mut module, mems); + + let mut globals = wasm_encoder::GlobalSection::new(); + let fuel_global = globals.len(); + globals.global( + wasm_encoder::GlobalType { + val_type: wasm_encoder::ValType::I32, + mutable: true, + }, + &wasm_encoder::ConstExpr::i32_const(0), + ); + let stack_len_global = globals.len(); + globals.global( + wasm_encoder::GlobalType { + val_type: wasm_encoder::ValType::I32, + mutable: true, + }, + &wasm_encoder::ConstExpr::i32_const(0), + ); + section(&mut module, globals); + + let mut exports = wasm_encoder::ExportSection::new(); + exports.export("run", wasm_encoder::ExportKind::Func, run_func); + exports.export("get_stack", wasm_encoder::ExportKind::Func, get_stack_func); + exports.export("memory", wasm_encoder::ExportKind::Memory, memory); + exports.export("fuel", wasm_encoder::ExportKind::Global, fuel_global); + section(&mut module, exports); + + let mut elems = wasm_encoder::ElementSection::new(); + elems.declared( + wasm_encoder::ValType::FuncRef, + wasm_encoder::Elements::Functions( + &(0..num_imported_funcs + u32::try_from(self.funcs.len()).unwrap()) + .collect::>(), + ), + ); + section(&mut module, elems); + + let check_fuel = |body: &mut wasm_encoder::Function| { + // Trap if we are out of fuel. + body.instruction(&Instruction::GlobalGet(fuel_global)) + .instruction(&Instruction::I32Eqz) + .instruction(&Instruction::If(wasm_encoder::BlockType::Empty)) + .instruction(&Instruction::Unreachable) + .instruction(&Instruction::End); + + // Decrement fuel. + body.instruction(&Instruction::GlobalGet(fuel_global)) + .instruction(&Instruction::I32Const(1)) + .instruction(&Instruction::I32Sub) + .instruction(&Instruction::GlobalSet(fuel_global)); + }; + + let push_func_to_stack = |body: &mut wasm_encoder::Function, func: u32| { + // Add this function to our internal stack. + // + // Note that we know our `stack_len_global` can't go beyond memory + // bounds because we limit fuel to at most `u8::MAX` and each stack + // entry is an `i32` and `u8::MAX * size_of(i32)` still fits in one + // Wasm page. + body.instruction(&Instruction::GlobalGet(stack_len_global)) + .instruction(&Instruction::I32Const(func as i32)) + .instruction(&Instruction::I32Store(wasm_encoder::MemArg { + offset: 0, + align: 0, + memory_index: memory, + })) + .instruction(&Instruction::GlobalGet(stack_len_global)) + .instruction(&Instruction::I32Const(mem::size_of::() as i32)) + .instruction(&Instruction::I32Add) + .instruction(&Instruction::GlobalSet(stack_len_global)); + }; + + let pop_func_from_stack = |body: &mut wasm_encoder::Function| { + // Remove this function from our internal stack. + body.instruction(&Instruction::GlobalGet(stack_len_global)) + .instruction(&Instruction::I32Const(mem::size_of::() as i32)) + .instruction(&Instruction::I32Sub) + .instruction(&Instruction::GlobalSet(stack_len_global)); + }; + + let mut code = wasm_encoder::CodeSection::new(); + for (func_index, func) in self.funcs.iter().enumerate() { + let mut body = wasm_encoder::Function::new(vec![]); + + push_func_to_stack( + &mut body, + num_imported_funcs + u32::try_from(func_index).unwrap(), + ); + check_fuel(&mut body); + + // Perform our specified operations. + for op in &func.ops { + match op { + Op::CheckStackInHost => { + body.instruction(&Instruction::Call(check_stack_func)); + } + Op::Call(f) => { + body.instruction(&Instruction::Call(f + num_imported_funcs)); + } + Op::CallThroughHost(f) => { + body.instruction(&Instruction::RefFunc(f + num_imported_funcs)) + .instruction(&Instruction::Call(call_func_func)); + } + } + } + + // Potentially trap at the end of our function as well, so that we + // exercise the scenario where the Wasm-to-host trampoline + // initialized `last_wasm_exit_sp` et al when calling out to a host + // function, but then we returned back to Wasm and then trapped + // while `last_wasm_exit_sp` et al are still initialized from that + // previous host call. + check_fuel(&mut body); + + pop_func_from_stack(&mut body); + + function(&mut code, body); + } + + let mut run_body = wasm_encoder::Function::new(vec![]); + + // Reset the bump pointer for the internal stack (this allows us to + // reuse an instance in the oracle, rather than re-instantiate). + run_body + .instruction(&Instruction::I32Const(0)) + .instruction(&Instruction::GlobalSet(stack_len_global)); + + // Initialize the fuel global. + run_body + .instruction(&Instruction::LocalGet(0)) + .instruction(&Instruction::GlobalSet(fuel_global)); + + push_func_to_stack(&mut run_body, run_func); + + // Make sure to check for out-of-fuel in the `run` function as well, so + // that we also capture stack traces with only one frame, not just `run` + // followed by the first locally-defined function and then zero or more + // extra frames. + check_fuel(&mut run_body); + + // Call the first locally defined function. + run_body.instruction(&Instruction::Call(num_imported_funcs)); + + check_fuel(&mut run_body); + pop_func_from_stack(&mut run_body); + + function(&mut code, run_body); + + let mut get_stack_body = wasm_encoder::Function::new(vec![]); + get_stack_body + .instruction(&Instruction::I32Const(0)) + .instruction(&Instruction::GlobalGet(stack_len_global)); + function(&mut code, get_stack_body); + + section(&mut module, code); + + return module.finish(); + + // Helper that defines a section in the module and takes ownership of it + // so that it is dropped and its memory reclaimed after adding it to the + // module. + fn section(module: &mut wasm_encoder::Module, section: impl wasm_encoder::Section) { + module.section(§ion); + } + + // Helper that defines a function body in the code section and takes + // ownership of it so that it is dropped and its memory reclaimed after + // adding it to the module. + fn function(code: &mut wasm_encoder::CodeSection, mut func: wasm_encoder::Function) { + func.instruction(&Instruction::End); + code.function(&func); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use rand::prelude::*; + use wasmparser::Validator; + + #[test] + fn stacks_generates_valid_wasm_modules() { + let mut rng = SmallRng::seed_from_u64(0); + let mut buf = vec![0; 2048]; + for _ in 0..1024 { + rng.fill_bytes(&mut buf); + let u = Unstructured::new(&buf); + if let Ok(stacks) = Stacks::arbitrary_take_rest(u) { + let wasm = stacks.wasm(); + validate(&wasm); + } + } + } + + fn validate(wasm: &[u8]) { + let mut validator = Validator::new(); + let err = match validator.validate_all(wasm) { + Ok(_) => return, + Err(e) => e, + }; + drop(std::fs::write("test.wasm", wasm)); + if let Ok(text) = wasmprinter::print_bytes(wasm) { + drop(std::fs::write("test.wat", &text)); + } + panic!("wasm failed to validate: {}", err); + } +} diff --git a/crates/fuzzing/src/oracles.rs b/crates/fuzzing/src/oracles.rs index 6d387f1c3ece..d1a5250121f9 100644 --- a/crates/fuzzing/src/oracles.rs +++ b/crates/fuzzing/src/oracles.rs @@ -11,10 +11,12 @@ //! panicking. pub mod dummy; +mod stacks; use crate::generators; use arbitrary::Arbitrary; use log::debug; +pub use stacks::check_stacks; use std::cell::Cell; use std::rc::Rc; use std::sync::atomic::{AtomicUsize, Ordering::SeqCst}; diff --git a/crates/fuzzing/src/oracles/stacks.rs b/crates/fuzzing/src/oracles/stacks.rs new file mode 100644 index 000000000000..3a289a97f9a2 --- /dev/null +++ b/crates/fuzzing/src/oracles/stacks.rs @@ -0,0 +1,146 @@ +use crate::generators::Stacks; +use wasmtime::*; + +/// Run the given `Stacks` test case and assert that the host's view of the Wasm +/// stack matches the test case's understanding of the Wasm stack. +/// +/// Returns the maximum stack depth we checked. +pub fn check_stacks(stacks: Stacks) -> usize { + let wasm = stacks.wasm(); + crate::oracles::log_wasm(&wasm); + + let engine = Engine::default(); + let module = Module::new(&engine, &wasm).expect("should compile okay"); + + let mut linker = Linker::new(&engine); + linker + .func_wrap( + "host", + "check_stack", + |mut caller: Caller<'_, ()>| -> Result<(), Trap> { + let fuel = caller + .get_export("fuel") + .expect("should export `fuel`") + .into_global() + .expect("`fuel` export should be a global"); + + let fuel_left = fuel.get(&mut caller).unwrap_i32(); + if fuel_left == 0 { + return Err(Trap::new("out of fuel")); + } + + fuel.set(&mut caller, Val::I32(fuel_left - 1)).unwrap(); + Ok(()) + }, + ) + .unwrap() + .func_wrap( + "host", + "call_func", + |mut caller: Caller<'_, ()>, f: Option| { + let f = f.unwrap(); + f.call(&mut caller, &[], &mut [])?; + Ok(()) + }, + ) + .unwrap(); + + let mut store = Store::new(&engine, ()); + + let instance = linker + .instantiate(&mut store, &module) + .expect("should instantiate okay"); + + let run = instance + .get_typed_func::<(u32,), (), _>(&mut store, "run") + .expect("should export `run` function"); + + let mut max_stack_depth = 0; + for input in stacks.inputs().iter().copied() { + if let Err(trap) = run.call(&mut store, (input.into(),)) { + let get_stack = instance + .get_typed_func::<(), (u32, u32), _>(&mut store, "get_stack") + .expect("should export `get_stack` function as expected"); + + let (ptr, len) = get_stack + .call(&mut store, ()) + .expect("`get_stack` should not trap"); + + let memory = instance + .get_memory(&mut store, "memory") + .expect("should have `memory` export"); + + let host_trace = trap.trace().unwrap(); + max_stack_depth = max_stack_depth.max(host_trace.len()); + assert_stack_matches(&mut store, memory, ptr, len, host_trace); + } + } + max_stack_depth +} + +/// Assert that the Wasm program's view of the stack matches the host's view. +fn assert_stack_matches( + store: &mut impl AsContextMut, + memory: Memory, + ptr: u32, + len: u32, + host_trace: &[FrameInfo], +) { + let mut data = vec![0; len as usize]; + memory + .read(&mut *store, ptr as usize, &mut data) + .expect("should be in bounds"); + + let mut wasm_trace = vec![]; + for entry in data.chunks(4).rev() { + let mut bytes = [0; 4]; + bytes.copy_from_slice(entry); + let entry = u32::from_le_bytes(bytes); + wasm_trace.push(entry); + } + + log::debug!("Wasm thinks its stack is: {:?}", wasm_trace); + log::debug!( + "Host thinks the stack is: {:?}", + host_trace + .iter() + .map(|f| f.func_index()) + .collect::>() + ); + + assert_eq!(wasm_trace.len(), host_trace.len()); + for (wasm_entry, host_entry) in wasm_trace.into_iter().zip(host_trace) { + assert_eq!(wasm_entry, host_entry.func_index()); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use arbitrary::{Arbitrary, Unstructured}; + use rand::prelude::*; + + const TARGET_STACK_DEPTH: usize = 10; + + #[test] + fn smoke_test() { + let mut rng = SmallRng::seed_from_u64(0); + let mut buf = vec![0; 2048]; + + for _ in 0..1024 { + rng.fill_bytes(&mut buf); + let u = Unstructured::new(&buf); + if let Ok(stacks) = Stacks::arbitrary_take_rest(u) { + let max_stack_depth = check_stacks(stacks); + if max_stack_depth >= TARGET_STACK_DEPTH { + return; + } + } + } + + panic!( + "never generated a `Stacks` test case that reached {TARGET_STACK_DEPTH} \ + deep stack frames", + ); + } +} diff --git a/crates/runtime/Cargo.toml b/crates/runtime/Cargo.toml index 1926a04226d7..02bd231af16d 100644 --- a/crates/runtime/Cargo.toml +++ b/crates/runtime/Cargo.toml @@ -11,6 +11,7 @@ repository = "https://github.com/bytecodealliance/wasmtime" edition = "2021" [dependencies] +wasmtime-asm-macros = { path = "../asm-macros", version = "=0.40.0" } wasmtime-environ = { path = "../environ", version = "=0.40.0" } wasmtime-fiber = { path = "../fiber", version = "=0.40.0", optional = true } wasmtime-jit-debug = { path = "../jit-debug", version = "=0.40.0", features = ["gdb_jit_int"] } @@ -20,10 +21,10 @@ memoffset = "0.6.0" indexmap = "1.0.2" thiserror = "1.0.4" cfg-if = "1.0" -backtrace = { version = "0.3.61" } rand = "0.8.3" anyhow = "1.0.38" memfd = { version = "0.6.1", optional = true } +paste = "1.0.3" [target.'cfg(target_os = "macos")'.dependencies] mach = "0.3.2" diff --git a/crates/runtime/build.rs b/crates/runtime/build.rs index 3bdf7b6b46dd..167b90c49d2e 100644 --- a/crates/runtime/build.rs +++ b/crates/runtime/build.rs @@ -1,15 +1,19 @@ use std::env; fn main() { + let mut build = cc::Build::new(); + build.warnings(true); + let arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap(); + let os = env::var("CARGO_CFG_TARGET_OS").unwrap(); + build.define(&format!("CFG_TARGET_OS_{}", os), None); + build.define(&format!("CFG_TARGET_ARCH_{}", arch), None); + if arch == "s390x" { + println!("cargo:rerun-if-changed=src/trampolines/s390x.S"); + build.file("src/trampolines/s390x.S"); + } println!("cargo:rerun-if-changed=src/helpers.c"); - cc::Build::new() - .warnings(true) - .define( - &format!("CFG_TARGET_OS_{}", env::var("CARGO_CFG_TARGET_OS").unwrap()), - None, - ) - .file("src/helpers.c") - .compile("wasmtime-helpers"); + build.file("src/helpers.c"); + build.compile("wasmtime-helpers"); // Check to see if we are on Unix and the `memory-init-cow` feature is // active. If so, enable the `memory_init_cow` rustc cfg so diff --git a/crates/runtime/src/externref.rs b/crates/runtime/src/externref.rs index 4da7cc55d105..d87297a7a633 100644 --- a/crates/runtime/src/externref.rs +++ b/crates/runtime/src/externref.rs @@ -111,6 +111,8 @@ use std::ptr::{self, NonNull}; use std::sync::atomic::{self, AtomicUsize, Ordering}; use wasmtime_environ::StackMap; +use crate::Backtrace; + /// An external reference to some opaque data. /// /// `VMExternRef`s dereference to their underlying opaque data as `dyn Any`. @@ -247,7 +249,12 @@ impl VMExternData { let alloc_align = std::cmp::max(value_align, extern_data_align); let alloc_size = value_and_padding_size + extern_data_size; - debug_assert!(Layout::from_size_align(alloc_size, alloc_align).is_ok()); + debug_assert!( + Layout::from_size_align(alloc_size, alloc_align).is_ok(), + "should create a `Layout` for size={} and align={} okay", + alloc_size, + alloc_align, + ); ( Layout::from_size_align_unchecked(alloc_size, alloc_align), value_and_padding_size, @@ -521,14 +528,6 @@ pub struct VMExternRefActivationsTable { /// than create a new hash set every GC. precise_stack_roots: HashSet, - /// A pointer to the youngest host stack frame before we called - /// into Wasm for the first time. When walking the stack in garbage - /// collection, if we don't find this frame, then we failed to walk every - /// Wasm stack frame, which means we failed to find all on-stack, - /// inside-a-Wasm-frame roots, and doing a GC could lead to freeing one of - /// those missed roots, and use after free. - stack_canary: Option, - /// A debug-only field for asserting that we are in a region of code where /// GC is okay to preform. #[cfg(debug_assertions)] @@ -589,7 +588,6 @@ impl VMExternRefActivationsTable { }, over_approximated_stack_roots: HashSet::new(), precise_stack_roots: HashSet::new(), - stack_canary: None, #[cfg(debug_assertions)] gc_okay: true, } @@ -771,31 +769,6 @@ impl VMExternRefActivationsTable { self.precise_stack_roots.clear(); } - /// Fetches the current value of this table's stack canary. - /// - /// This should only be used in conjunction with setting the stack canary - /// below if the return value is `None` typically. This is called from RAII - /// guards in `wasmtime::func::invoke_wasm_and_catch_traps`. - /// - /// For more information on canaries see the gc functions below. - #[inline] - pub fn stack_canary(&self) -> Option { - self.stack_canary - } - - /// Sets the current value of the stack canary. - /// - /// This is called from RAII guards in - /// `wasmtime::func::invoke_wasm_and_catch_traps`. This is used to update - /// the stack canary to a concrete value and then reset it back to `None` - /// when wasm is finished. - /// - /// For more information on canaries see the gc functions below. - #[inline] - pub fn set_stack_canary(&mut self, canary: Option) { - self.stack_canary = canary; - } - /// Set whether it is okay to GC or not right now. /// /// This is provided as a helper for enabling various debug-only assertions @@ -890,29 +863,7 @@ pub unsafe fn gc( externref_activations_table.precise_stack_roots.is_empty() }); - // Whenever we call into Wasm from host code for the first time, we set a - // stack canary. When we return to that host code, we unset the stack - // canary. If there is *not* a stack canary, then there must be zero Wasm - // frames on the stack. Therefore, we can simply reset the table without - // walking the stack. - let stack_canary = match externref_activations_table.stack_canary { - None => { - if cfg!(debug_assertions) { - // Assert that there aren't any Wasm frames on the stack. - backtrace::trace(|frame| { - assert!(module_info_lookup.lookup(frame.ip() as usize).is_none()); - true - }); - } - externref_activations_table.sweep(); - log::debug!("end GC"); - return; - } - Some(canary) => canary, - }; - - // There is a stack canary, so there must be Wasm frames on the stack. The - // rest of this function consists of: + // This function proceeds by: // // * walking the stack, // @@ -922,12 +873,6 @@ pub unsafe fn gc( // * resetting our bump-allocated table's over-approximation to the // newly-discovered precise set. - // The SP of the previous (younger) frame we processed. - let mut last_sp: Option = None; - - // Whether we have found our stack canary or not yet. - let mut found_canary = false; - // The `activations_table_set` is used for `debug_assert!`s checking that // every reference we read out from the stack via stack maps is actually in // the table. If that weren't true, than either we forgot to insert a @@ -940,13 +885,17 @@ pub unsafe fn gc( }); } - backtrace::trace(|frame| { - let pc = frame.ip() as usize; - let sp = frame.sp() as usize; + Backtrace::trace(|frame| { + let pc = frame.pc(); + let fp = frame.fp(); if let Some(module_info) = module_info_lookup.lookup(pc) { if let Some(stack_map) = module_info.lookup_stack_map(pc) { - debug_assert!(sp != 0, "we should always get a valid SP for Wasm frames"); + debug_assert!( + fp != 0, + "we should always get a valid frame pointer for Wasm frames" + ); + let sp = fp - stack_map.mapped_words() as usize * mem::size_of::(); for i in 0..(stack_map.mapped_words() as usize) { if stack_map.get_bit(i) { @@ -975,32 +924,10 @@ pub unsafe fn gc( } } - if let Some(last_sp) = last_sp { - // We've found the stack canary when we walk over the frame that it - // is contained within. - found_canary |= last_sp <= stack_canary && stack_canary <= sp; - } - last_sp = Some(sp); - - // Keep walking the stack until we've found the canary, which is the - // oldest frame before we ever called into Wasm. We can stop once we've - // found it because there won't be any more Wasm frames, and therefore - // there won't be anymore on-stack, inside-a-Wasm-frame roots. - !found_canary + std::ops::ControlFlow::Continue(()) }); - // Only sweep and reset the table if we found the stack canary, and - // therefore know that we discovered all the on-stack, inside-a-Wasm-frame - // roots. If we did *not* find the stack canary, then `libunwind` failed to - // walk the whole stack, and we might be missing roots. Reseting the table - // would free those missing roots while they are still in use, leading to - // use-after-free. - if found_canary { - externref_activations_table.sweep(); - } else { - log::warn!("did not find stack canary; skipping GC sweep"); - externref_activations_table.precise_stack_roots.clear(); - } + externref_activations_table.sweep(); log::debug!("end GC"); } diff --git a/crates/runtime/src/instance.rs b/crates/runtime/src/instance.rs index 7b08647b27b2..7cd729a889fd 100644 --- a/crates/runtime/src/instance.rs +++ b/crates/runtime/src/instance.rs @@ -13,6 +13,7 @@ use crate::vmcontext::{ }; use crate::{ ExportFunction, ExportGlobal, ExportMemory, ExportTable, Imports, ModuleRuntimeInfo, Store, + VMFunctionBody, }; use anyhow::Error; use memoffset::offset_of; @@ -269,8 +270,29 @@ impl Instance { ptr } - pub unsafe fn set_store(&mut self, store: *mut dyn Store) { - *self.vmctx_plus_offset(self.offsets.vmctx_store()) = store; + pub unsafe fn set_store(&mut self, store: Option<*mut dyn Store>) { + if let Some(store) = store { + *self.vmctx_plus_offset(self.offsets.vmctx_store()) = store; + *self.runtime_limits() = (*store).vmruntime_limits(); + *self.epoch_ptr() = (*store).epoch_ptr(); + *self.externref_activations_table() = (*store).externref_activations_table().0; + } else { + assert_eq!( + mem::size_of::<*mut dyn Store>(), + mem::size_of::<[*mut (); 2]>() + ); + *self.vmctx_plus_offset::<[*mut (); 2]>(self.offsets.vmctx_store()) = + [ptr::null_mut(), ptr::null_mut()]; + + *self.runtime_limits() = ptr::null_mut(); + *self.epoch_ptr() = ptr::null_mut(); + *self.externref_activations_table() = ptr::null_mut(); + } + } + + pub(crate) unsafe fn set_callee(&mut self, callee: Option>) { + *self.vmctx_plus_offset(self.offsets.vmctx_callee()) = + callee.map_or(ptr::null_mut(), |c| c.as_ptr()); } /// Return a reference to the vmctx used by compiled wasm code. @@ -869,13 +891,8 @@ impl Instance { assert!(std::ptr::eq(module, self.module().as_ref())); *self.vmctx_plus_offset(self.offsets.vmctx_magic()) = VMCONTEXT_MAGIC; - - if let Some(store) = store.as_raw() { - *self.runtime_limits() = (*store).vmruntime_limits(); - *self.epoch_ptr() = (*store).epoch_ptr(); - *self.externref_activations_table() = (*store).externref_activations_table().0; - self.set_store(store); - } + self.set_callee(None); + self.set_store(store.as_raw()); // Initialize shared signatures let signatures = self.runtime_info.signature_ids(); @@ -1157,7 +1174,7 @@ impl InstanceHandle { /// This is provided for the original `Store` itself to configure the first /// self-pointer after the original `Box` has been initialized. pub unsafe fn set_store(&mut self, store: *mut dyn Store) { - self.instance_mut().set_store(store); + self.instance_mut().set_store(Some(store)); } /// Returns a clone of this instance. diff --git a/crates/runtime/src/lib.rs b/crates/runtime/src/lib.rs index 18ab710cc9fc..bd4b7c8476d3 100644 --- a/crates/runtime/src/lib.rs +++ b/crates/runtime/src/lib.rs @@ -29,6 +29,9 @@ use wasmtime_environ::DefinedMemoryIndex; use wasmtime_environ::FunctionInfo; use wasmtime_environ::SignatureIndex; +#[macro_use] +mod trampolines; + #[cfg(feature = "component-model")] pub mod component; mod export; @@ -62,15 +65,16 @@ pub use crate::memory::{ pub use crate::mmap::Mmap; pub use crate::mmap_vec::MmapVec; pub use crate::table::{Table, TableElement}; +pub use crate::trampolines::prepare_host_to_wasm_trampoline; pub use crate::traphandlers::{ catch_traps, init_traps, raise_lib_trap, raise_user_trap, resume_panic, tls_eager_initialize, Backtrace, SignalHandler, TlsRestore, Trap, TrapReason, }; pub use crate::vmcontext::{ VMCallerCheckedAnyfunc, VMContext, VMFunctionBody, VMFunctionImport, VMGlobalDefinition, - VMGlobalImport, VMInvokeArgument, VMMemoryDefinition, VMMemoryImport, VMOpaqueContext, - VMRuntimeLimits, VMSharedSignatureIndex, VMTableDefinition, VMTableImport, VMTrampoline, - ValRaw, + VMGlobalImport, VMHostFuncContext, VMInvokeArgument, VMMemoryDefinition, VMMemoryImport, + VMOpaqueContext, VMRuntimeLimits, VMSharedSignatureIndex, VMTableDefinition, VMTableImport, + VMTrampoline, ValRaw, }; mod module_id; diff --git a/crates/runtime/src/libcalls.rs b/crates/runtime/src/libcalls.rs index 51c6ba39225a..5f622b6b94f0 100644 --- a/crates/runtime/src/libcalls.rs +++ b/crates/runtime/src/libcalls.rs @@ -7,126 +7,210 @@ //! These functions are called by compiled Wasm code, and therefore must take //! certain care about some things: //! -//! * They must always be `pub extern "C"` and should only contain basic, raw -//! i32/i64/f32/f64/pointer parameters that are safe to pass across the system -//! ABI! +//! * They must only contain basic, raw i32/i64/f32/f64/pointer parameters that +//! are safe to pass across the system ABI. //! //! * If any nested function propagates an `Err(trap)` out to the library //! function frame, we need to raise it. This involves some nasty and quite -//! unsafe code under the covers! Notable, after raising the trap, drops +//! unsafe code under the covers! Notably, after raising the trap, drops //! **will not** be run for local variables! This can lead to things like //! leaking `InstanceHandle`s which leads to never deallocating JIT code, -//! instances, and modules! Therefore, always use nested blocks to ensure -//! drops run before raising a trap: +//! instances, and modules if we are not careful! //! -//! ```ignore -//! pub extern "C" fn my_lib_function(...) { -//! let result = { -//! // Do everything in here so drops run at the end of the block. -//! ... -//! }; -//! if let Err(trap) = result { -//! // Now we can safely raise the trap without leaking! -//! raise_lib_trap(trap); -//! } -//! } -//! ``` +//! * The libcall must be entered via a Wasm-to-libcall trampoline that saves +//! the last Wasm FP and PC for stack walking purposes. (For more details, see +//! `crates/runtime/src/backtrace.rs`.) //! -//! * When receiving a raw `*mut u8` that is actually a `VMExternRef` reference, -//! convert it into a proper `VMExternRef` with `VMExternRef::clone_from_raw` -//! as soon as apossible. Any GC before raw pointer is converted into a -//! reference can potentially collect the referenced object, which could lead -//! to use after free. Avoid this by eagerly converting into a proper -//! `VMExternRef`! +//! To make it easier to correctly handle all these things, **all** libcalls +//! must be defined via the `libcall!` helper macro! See its doc comments below +//! for an example, or just look at the rest of the file. //! -//! ```ignore -//! pub unsafe extern "C" my_lib_takes_ref(raw_extern_ref: *mut u8) { -//! // Before `clone_from_raw`, `raw_extern_ref` is potentially unrooted, -//! // and doing GC here could lead to use after free! +//! ## Dealing with `externref`s //! -//! let my_extern_ref = if raw_extern_ref.is_null() { -//! None -//! } else { -//! Some(VMExternRef::clone_from_raw(raw_extern_ref)) -//! }; +//! When receiving a raw `*mut u8` that is actually a `VMExternRef` reference, +//! convert it into a proper `VMExternRef` with `VMExternRef::clone_from_raw` as +//! soon as apossible. Any GC before raw pointer is converted into a reference +//! can potentially collect the referenced object, which could lead to use after +//! free. //! -//! // Now that we did `clone_from_raw`, it is safe to do a GC (or do -//! // anything else that might transitively GC, like call back into -//! // Wasm!) -//! } -//! ``` +//! Avoid this by eagerly converting into a proper `VMExternRef`! (Unfortunately +//! there is no macro to help us automatically get this correct, so stay +//! vigilant!) +//! +//! ```ignore +//! pub unsafe extern "C" my_libcall_takes_ref(raw_extern_ref: *mut u8) { +//! // Before `clone_from_raw`, `raw_extern_ref` is potentially unrooted, +//! // and doing GC here could lead to use after free! +//! +//! let my_extern_ref = if raw_extern_ref.is_null() { +//! None +//! } else { +//! Some(VMExternRef::clone_from_raw(raw_extern_ref)) +//! }; +//! +//! // Now that we did `clone_from_raw`, it is safe to do a GC (or do +//! // anything else that might transitively GC, like call back into +//! // Wasm!) +//! } +//! ``` use crate::externref::VMExternRef; use crate::instance::Instance; use crate::table::{Table, TableElementType}; -use crate::traphandlers::{raise_lib_trap, raise_user_trap, resume_panic}; use crate::vmcontext::{VMCallerCheckedAnyfunc, VMContext}; +use crate::TrapReason; +use anyhow::Result; use std::mem; use std::ptr::{self, NonNull}; use wasmtime_environ::{ DataIndex, ElemIndex, FuncIndex, GlobalIndex, MemoryIndex, TableIndex, TrapCode, }; -/// Implementation of memory.grow for locally-defined 32-bit memories. -pub unsafe extern "C" fn memory32_grow( - vmctx: *mut VMContext, - delta: u64, - memory_index: u32, -) -> *mut u8 { - // Memory grow can invoke user code provided in a ResourceLimiter{,Async}, - // so we need to catch a possible panic - let ret = match std::panic::catch_unwind(|| { - let instance = (*vmctx).instance_mut(); - let memory_index = MemoryIndex::from_u32(memory_index); - instance.memory_grow(memory_index, delta) - }) { - Ok(Ok(Some(size_in_bytes))) => size_in_bytes / (wasmtime_environ::WASM_PAGE_SIZE as usize), - Ok(Ok(None)) => usize::max_value(), - Ok(Err(err)) => crate::traphandlers::raise_user_trap(err), - Err(p) => resume_panic(p), +/// Actually public trampolines which are used by the runtime as the entrypoint +/// for libcalls. +/// +/// Note that the trampolines here are actually defined in inline assembly right +/// now to ensure that the fp/sp on exit are recorded for backtraces to work +/// properly. +pub mod trampolines { + use crate::{TrapReason, VMContext}; + + macro_rules! libcall { + ( + $( + $( #[$attr:meta] )* + $name:ident( vmctx: vmctx $(, $pname:ident: $param:ident )* ) $( -> $result:ident )?; + )* + ) => {paste::paste! { + $( + // The actual libcall itself, which has the `pub` name here, is + // defined via the `wasm_to_libcall_trampoline!` macro on + // supported platforms or otherwise in inline assembly for + // platforms like s390x which don't have stable `global_asm!` + // yet. + extern "C" { + #[allow(missing_docs)] + #[allow(improper_ctypes)] + pub fn $name( + vmctx: *mut VMContext, + $( $pname: libcall!(@ty $param), )* + ) $(-> libcall!(@ty $result))?; + } + + wasm_to_libcall_trampoline!($name ; []); + + // This is the direct entrypoint from the inline assembly which + // still has the same raw signature as the trampoline itself. + // This will delegate to the outer module to the actual + // implementation and automatically perform `catch_unwind` along + // with conversion of the return value in the face of traps. + #[no_mangle] + unsafe extern "C" fn []( + vmctx : *mut VMContext, + $( $pname : libcall!(@ty $param), )* + ) $( -> libcall!(@ty $result))? { + let result = std::panic::catch_unwind(|| { + super::$name(vmctx, $($pname),*) + }); + match result { + Ok(ret) => LibcallResult::convert(ret), + Err(panic) => crate::traphandlers::resume_panic(panic), + } + } + )* + }}; + + (@ty i32) => (u32); + (@ty i64) => (u64); + (@ty reference) => (*mut u8); + (@ty pointer) => (*mut u8); + (@ty vmctx) => (*mut VMContext); + } + + wasmtime_environ::foreach_builtin_function!(libcall); + + // Helper trait to convert results of libcalls below into the ABI of what + // the libcall expects. + // + // This basically entirely exists for the `Result` implementation which + // "unwraps" via a throwing of a trap. + trait LibcallResult { + type Abi; + unsafe fn convert(self) -> Self::Abi; + } + + impl LibcallResult for () { + type Abi = (); + unsafe fn convert(self) {} + } + + impl LibcallResult for Result + where + E: Into, + { + type Abi = T; + unsafe fn convert(self) -> T { + match self { + Ok(t) => t, + Err(e) => crate::traphandlers::raise_trap(e.into()), + } + } + } + + impl LibcallResult for *mut u8 { + type Abi = *mut u8; + unsafe fn convert(self) -> *mut u8 { + self + } + } +} + +unsafe fn memory32_grow(vmctx: *mut VMContext, delta: u64, memory_index: u32) -> Result<*mut u8> { + let instance = (*vmctx).instance_mut(); + let memory_index = MemoryIndex::from_u32(memory_index); + let result = match instance.memory_grow(memory_index, delta)? { + Some(size_in_bytes) => size_in_bytes / (wasmtime_environ::WASM_PAGE_SIZE as usize), + None => usize::max_value(), }; - ret as *mut u8 + Ok(result as *mut _) } -/// Implementation of `table.grow`. -pub unsafe extern "C" fn table_grow( +// Implementation of `table.grow`. +// +// Table grow can invoke user code provided in a ResourceLimiter{,Async}, so we +// need to catch a possible panic. +unsafe fn table_grow( vmctx: *mut VMContext, table_index: u32, delta: u32, // NB: we don't know whether this is a pointer to a `VMCallerCheckedAnyfunc` // or is a `VMExternRef` until we look at the table type. init_value: *mut u8, -) -> u32 { - // Table grow can invoke user code provided in a ResourceLimiter{,Async}, - // so we need to catch a possible panic - match std::panic::catch_unwind(|| { - let instance = (*vmctx).instance_mut(); - let table_index = TableIndex::from_u32(table_index); - let element = match instance.table_element_type(table_index) { - TableElementType::Func => (init_value as *mut VMCallerCheckedAnyfunc).into(), - TableElementType::Extern => { - let init_value = if init_value.is_null() { - None - } else { - Some(VMExternRef::clone_from_raw(init_value)) - }; - init_value.into() - } - }; - instance.table_grow(table_index, delta, element) - }) { - Ok(Ok(Some(r))) => r, - Ok(Ok(None)) => -1_i32 as u32, - Ok(Err(err)) => crate::traphandlers::raise_user_trap(err), - Err(p) => resume_panic(p), - } +) -> Result { + let instance = (*vmctx).instance_mut(); + let table_index = TableIndex::from_u32(table_index); + let element = match instance.table_element_type(table_index) { + TableElementType::Func => (init_value as *mut VMCallerCheckedAnyfunc).into(), + TableElementType::Extern => { + let init_value = if init_value.is_null() { + None + } else { + Some(VMExternRef::clone_from_raw(init_value)) + }; + init_value.into() + } + }; + Ok(match instance.table_grow(table_index, delta, element)? { + Some(r) => r, + None => -1_i32 as u32, + }) } -pub use table_grow as table_grow_funcref; -pub use table_grow as table_grow_externref; +use table_grow as table_grow_funcref; +use table_grow as table_grow_externref; -/// Implementation of `table.fill`. -pub unsafe extern "C" fn table_fill( +// Implementation of `table.fill`. +unsafe fn table_fill( vmctx: *mut VMContext, table_index: u32, dst: u32, @@ -134,145 +218,115 @@ pub unsafe extern "C" fn table_fill( // `VMCallerCheckedAnyfunc` until we look at the table's element type. val: *mut u8, len: u32, -) { - let result = { - let instance = (*vmctx).instance_mut(); - let table_index = TableIndex::from_u32(table_index); - let table = &mut *instance.get_table(table_index); - match table.element_type() { - TableElementType::Func => { - let val = val as *mut VMCallerCheckedAnyfunc; - table.fill(dst, val.into(), len) - } - TableElementType::Extern => { - let val = if val.is_null() { - None - } else { - Some(VMExternRef::clone_from_raw(val)) - }; - table.fill(dst, val.into(), len) - } +) -> Result<(), TrapCode> { + let instance = (*vmctx).instance_mut(); + let table_index = TableIndex::from_u32(table_index); + let table = &mut *instance.get_table(table_index); + match table.element_type() { + TableElementType::Func => { + let val = val as *mut VMCallerCheckedAnyfunc; + table.fill(dst, val.into(), len) + } + TableElementType::Extern => { + let val = if val.is_null() { + None + } else { + Some(VMExternRef::clone_from_raw(val)) + }; + table.fill(dst, val.into(), len) } - }; - if let Err(trap) = result { - raise_lib_trap(trap); } } -pub use table_fill as table_fill_funcref; -pub use table_fill as table_fill_externref; +use table_fill as table_fill_funcref; +use table_fill as table_fill_externref; -/// Implementation of `table.copy`. -pub unsafe extern "C" fn table_copy( +// Implementation of `table.copy`. +unsafe fn table_copy( vmctx: *mut VMContext, dst_table_index: u32, src_table_index: u32, dst: u32, src: u32, len: u32, -) { - let result = { - let dst_table_index = TableIndex::from_u32(dst_table_index); - let src_table_index = TableIndex::from_u32(src_table_index); - let instance = (*vmctx).instance_mut(); - let dst_table = instance.get_table(dst_table_index); - // Lazy-initialize the whole range in the source table first. - let src_range = src..(src.checked_add(len).unwrap_or(u32::MAX)); - let src_table = instance.get_table_with_lazy_init(src_table_index, src_range); - Table::copy(dst_table, src_table, dst, src, len) - }; - if let Err(trap) = result { - raise_lib_trap(trap); - } +) -> Result<(), TrapCode> { + let dst_table_index = TableIndex::from_u32(dst_table_index); + let src_table_index = TableIndex::from_u32(src_table_index); + let instance = (*vmctx).instance_mut(); + let dst_table = instance.get_table(dst_table_index); + // Lazy-initialize the whole range in the source table first. + let src_range = src..(src.checked_add(len).unwrap_or(u32::MAX)); + let src_table = instance.get_table_with_lazy_init(src_table_index, src_range); + Table::copy(dst_table, src_table, dst, src, len) } -/// Implementation of `table.init`. -pub unsafe extern "C" fn table_init( +// Implementation of `table.init`. +unsafe fn table_init( vmctx: *mut VMContext, table_index: u32, elem_index: u32, dst: u32, src: u32, len: u32, -) { - let result = { - let table_index = TableIndex::from_u32(table_index); - let elem_index = ElemIndex::from_u32(elem_index); - let instance = (*vmctx).instance_mut(); - instance.table_init(table_index, elem_index, dst, src, len) - }; - if let Err(trap) = result { - raise_lib_trap(trap); - } +) -> Result<(), TrapCode> { + let table_index = TableIndex::from_u32(table_index); + let elem_index = ElemIndex::from_u32(elem_index); + let instance = (*vmctx).instance_mut(); + instance.table_init(table_index, elem_index, dst, src, len) } -/// Implementation of `elem.drop`. -pub unsafe extern "C" fn elem_drop(vmctx: *mut VMContext, elem_index: u32) { +// Implementation of `elem.drop`. +unsafe fn elem_drop(vmctx: *mut VMContext, elem_index: u32) { let elem_index = ElemIndex::from_u32(elem_index); let instance = (*vmctx).instance_mut(); instance.elem_drop(elem_index); } -/// Implementation of `memory.copy` for locally defined memories. -pub unsafe extern "C" fn memory_copy( +// Implementation of `memory.copy` for locally defined memories. +unsafe fn memory_copy( vmctx: *mut VMContext, dst_index: u32, dst: u64, src_index: u32, src: u64, len: u64, -) { - let result = { - let src_index = MemoryIndex::from_u32(src_index); - let dst_index = MemoryIndex::from_u32(dst_index); - let instance = (*vmctx).instance_mut(); - instance.memory_copy(dst_index, dst, src_index, src, len) - }; - if let Err(trap) = result { - raise_lib_trap(trap); - } +) -> Result<(), TrapCode> { + let src_index = MemoryIndex::from_u32(src_index); + let dst_index = MemoryIndex::from_u32(dst_index); + let instance = (*vmctx).instance_mut(); + instance.memory_copy(dst_index, dst, src_index, src, len) } -/// Implementation of `memory.fill` for locally defined memories. -pub unsafe extern "C" fn memory_fill( +// Implementation of `memory.fill` for locally defined memories. +unsafe fn memory_fill( vmctx: *mut VMContext, memory_index: u32, dst: u64, val: u32, len: u64, -) { - let result = { - let memory_index = MemoryIndex::from_u32(memory_index); - let instance = (*vmctx).instance_mut(); - instance.memory_fill(memory_index, dst, val as u8, len) - }; - if let Err(trap) = result { - raise_lib_trap(trap); - } +) -> Result<(), TrapCode> { + let memory_index = MemoryIndex::from_u32(memory_index); + let instance = (*vmctx).instance_mut(); + instance.memory_fill(memory_index, dst, val as u8, len) } -/// Implementation of `memory.init`. -pub unsafe extern "C" fn memory_init( +// Implementation of `memory.init`. +unsafe fn memory_init( vmctx: *mut VMContext, memory_index: u32, data_index: u32, dst: u64, src: u32, len: u32, -) { - let result = { - let memory_index = MemoryIndex::from_u32(memory_index); - let data_index = DataIndex::from_u32(data_index); - let instance = (*vmctx).instance_mut(); - instance.memory_init(memory_index, data_index, dst, src, len) - }; - if let Err(trap) = result { - raise_lib_trap(trap); - } +) -> Result<(), TrapCode> { + let memory_index = MemoryIndex::from_u32(memory_index); + let data_index = DataIndex::from_u32(data_index); + let instance = (*vmctx).instance_mut(); + instance.memory_init(memory_index, data_index, dst, src, len) } -/// Implementation of `ref.func`. -pub unsafe extern "C" fn ref_func(vmctx: *mut VMContext, func_index: u32) -> *mut u8 { +// Implementation of `ref.func`. +unsafe fn ref_func(vmctx: *mut VMContext, func_index: u32) -> *mut u8 { let instance = (*vmctx).instance_mut(); let anyfunc = instance .get_caller_checked_anyfunc(FuncIndex::from_u32(func_index)) @@ -280,15 +334,15 @@ pub unsafe extern "C" fn ref_func(vmctx: *mut VMContext, func_index: u32) -> *mu anyfunc as *mut _ } -/// Implementation of `data.drop`. -pub unsafe extern "C" fn data_drop(vmctx: *mut VMContext, data_index: u32) { +// Implementation of `data.drop`. +unsafe fn data_drop(vmctx: *mut VMContext, data_index: u32) { let data_index = DataIndex::from_u32(data_index); let instance = (*vmctx).instance_mut(); instance.data_drop(data_index) } -/// Returns a table entry after lazily initializing it. -pub unsafe extern "C" fn table_get_lazy_init_funcref( +// Returns a table entry after lazily initializing it. +unsafe fn table_get_lazy_init_funcref( vmctx: *mut VMContext, table_index: u32, index: u32, @@ -303,19 +357,16 @@ pub unsafe extern "C" fn table_get_lazy_init_funcref( elem.into_ref_asserting_initialized() as *mut _ } -/// Drop a `VMExternRef`. -pub unsafe extern "C" fn drop_externref(externref: *mut u8) { +// Drop a `VMExternRef`. +unsafe fn drop_externref(_vmctx: *mut VMContext, externref: *mut u8) { let externref = externref as *mut crate::externref::VMExternData; let externref = NonNull::new(externref).unwrap(); crate::externref::VMExternData::drop_and_dealloc(externref); } -/// Do a GC and insert the given `externref` into the -/// `VMExternRefActivationsTable`. -pub unsafe extern "C" fn activations_table_insert_with_gc( - vmctx: *mut VMContext, - externref: *mut u8, -) { +// Do a GC and insert the given `externref` into the +// `VMExternRefActivationsTable`. +unsafe fn activations_table_insert_with_gc(vmctx: *mut VMContext, externref: *mut u8) { let externref = VMExternRef::clone_from_raw(externref); let instance = (*vmctx).instance(); let (activations_table, module_info_lookup) = (*instance.store()).externref_activations_table(); @@ -332,8 +383,8 @@ pub unsafe extern "C" fn activations_table_insert_with_gc( activations_table.insert_with_gc(externref, module_info_lookup); } -/// Perform a Wasm `global.get` for `externref` globals. -pub unsafe extern "C" fn externref_global_get(vmctx: *mut VMContext, index: u32) -> *mut u8 { +// Perform a Wasm `global.get` for `externref` globals. +unsafe fn externref_global_get(vmctx: *mut VMContext, index: u32) -> *mut u8 { let index = GlobalIndex::from_u32(index); let instance = (*vmctx).instance(); let global = instance.defined_or_imported_global_ptr(index); @@ -349,12 +400,8 @@ pub unsafe extern "C" fn externref_global_get(vmctx: *mut VMContext, index: u32) } } -/// Perform a Wasm `global.set` for `externref` globals. -pub unsafe extern "C" fn externref_global_set( - vmctx: *mut VMContext, - index: u32, - externref: *mut u8, -) { +// Perform a Wasm `global.set` for `externref` globals. +unsafe fn externref_global_set(vmctx: *mut VMContext, index: u32, externref: *mut u8) { let externref = if externref.is_null() { None } else { @@ -373,79 +420,67 @@ pub unsafe extern "C" fn externref_global_set( drop(old); } -/// Implementation of `memory.atomic.notify` for locally defined memories. -pub unsafe extern "C" fn memory_atomic_notify( +// Implementation of `memory.atomic.notify` for locally defined memories. +unsafe fn memory_atomic_notify( vmctx: *mut VMContext, memory_index: u32, addr: *mut u8, _count: u32, -) -> u32 { - let result = { - let addr = addr as usize; - let memory = MemoryIndex::from_u32(memory_index); - let instance = (*vmctx).instance(); - // this should never overflow since addr + 4 either hits a guard page - // or it's been validated to be in-bounds already. Double-check for now - // just to be sure. - let addr_to_check = addr.checked_add(4).unwrap(); - validate_atomic_addr(instance, memory, addr_to_check) - }; - match result { - Ok(()) => raise_user_trap(anyhow::anyhow!( - "unimplemented: wasm atomics (fn memory_atomic_notify) unsupported", - )), - Err(e) => raise_lib_trap(e), - } +) -> Result { + let addr = addr as usize; + let memory = MemoryIndex::from_u32(memory_index); + let instance = (*vmctx).instance(); + // this should never overflow since addr + 4 either hits a guard page + // or it's been validated to be in-bounds already. Double-check for now + // just to be sure. + let addr_to_check = addr.checked_add(4).unwrap(); + validate_atomic_addr(instance, memory, addr_to_check)?; + Err( + anyhow::anyhow!("unimplemented: wasm atomics (fn memory_atomic_notify) unsupported",) + .into(), + ) } -/// Implementation of `memory.atomic.wait32` for locally defined memories. -pub unsafe extern "C" fn memory_atomic_wait32( +// Implementation of `memory.atomic.wait32` for locally defined memories. +unsafe fn memory_atomic_wait32( vmctx: *mut VMContext, memory_index: u32, addr: *mut u8, _expected: u32, _timeout: u64, -) -> u32 { - let result = { - let addr = addr as usize; - let memory = MemoryIndex::from_u32(memory_index); - let instance = (*vmctx).instance(); - // see wasmtime_memory_atomic_notify for why this shouldn't overflow - // but we still double-check - let addr_to_check = addr.checked_add(4).unwrap(); - validate_atomic_addr(instance, memory, addr_to_check) - }; - match result { - Ok(()) => raise_user_trap(anyhow::anyhow!( - "unimplemented: wasm atomics (fn memory_atomic_wait32) unsupported", - )), - Err(e) => raise_lib_trap(e), - } +) -> Result { + let addr = addr as usize; + let memory = MemoryIndex::from_u32(memory_index); + let instance = (*vmctx).instance(); + // see wasmtime_memory_atomic_notify for why this shouldn't overflow + // but we still double-check + let addr_to_check = addr.checked_add(4).unwrap(); + validate_atomic_addr(instance, memory, addr_to_check)?; + Err( + anyhow::anyhow!("unimplemented: wasm atomics (fn memory_atomic_wait32) unsupported",) + .into(), + ) } -/// Implementation of `memory.atomic.wait64` for locally defined memories. -pub unsafe extern "C" fn memory_atomic_wait64( +// Implementation of `memory.atomic.wait64` for locally defined memories. +unsafe fn memory_atomic_wait64( vmctx: *mut VMContext, memory_index: u32, addr: *mut u8, _expected: u64, _timeout: u64, -) -> u32 { - let result = { - let addr = addr as usize; - let memory = MemoryIndex::from_u32(memory_index); - let instance = (*vmctx).instance(); - // see wasmtime_memory_atomic_notify for why this shouldn't overflow - // but we still double-check - let addr_to_check = addr.checked_add(8).unwrap(); - validate_atomic_addr(instance, memory, addr_to_check) - }; - match result { - Ok(()) => raise_user_trap(anyhow::anyhow!( - "unimplemented: wasm atomics (fn memory_atomic_wait64) unsupported", - )), - Err(e) => raise_lib_trap(e), - } +) -> Result { + let addr = addr as usize; + let memory = MemoryIndex::from_u32(memory_index); + let instance = (*vmctx).instance(); + // see wasmtime_memory_atomic_notify for why this shouldn't overflow + // but we still double-check + let addr_to_check = addr.checked_add(8).unwrap(); + validate_atomic_addr(instance, memory, addr_to_check)?; + Err( + anyhow::anyhow!("unimplemented: wasm atomics (fn memory_atomic_wait64) unsupported",) + .into(), + ) } /// For atomic operations we still check the actual address despite this also @@ -468,18 +503,12 @@ unsafe fn validate_atomic_addr( Ok(()) } -/// Hook for when an instance runs out of fuel. -pub unsafe extern "C" fn out_of_gas(vmctx: *mut VMContext) { - match (*(*vmctx).instance().store()).out_of_gas() { - Ok(()) => {} - Err(err) => crate::traphandlers::raise_user_trap(err), - } +// Hook for when an instance runs out of fuel. +unsafe fn out_of_gas(vmctx: *mut VMContext) -> Result<()> { + (*(*vmctx).instance().store()).out_of_gas() } -/// Hook for when an instance observes that the epoch has changed. -pub unsafe extern "C" fn new_epoch(vmctx: *mut VMContext) -> u64 { - match (*(*vmctx).instance().store()).new_epoch() { - Ok(new_deadline) => new_deadline, - Err(err) => crate::traphandlers::raise_user_trap(err), - } +// Hook for when an instance observes that the epoch has changed. +unsafe fn new_epoch(vmctx: *mut VMContext) -> Result { + (*(*vmctx).instance().store()).new_epoch() } diff --git a/crates/runtime/src/trampolines.rs b/crates/runtime/src/trampolines.rs new file mode 100644 index 000000000000..044b2907ec7b --- /dev/null +++ b/crates/runtime/src/trampolines.rs @@ -0,0 +1,57 @@ +//! Trampolines for calling into Wasm from the host and calling the host from +//! Wasm. + +use crate::VMContext; +use std::mem; + +/// Given a Wasm function pointer and a `vmctx`, prepare the `vmctx` for calling +/// into that Wasm function, and return the host-to-Wasm entry trampoline. +/// +/// Callers must never call Wasm function pointers directly. Callers must +/// instead call this function and then enter Wasm through the returned +/// host-to-Wasm trampoline. +/// +/// # Unsafety +/// +/// The `vmctx` argument must be valid. +/// +/// The generic type `T` must be a function pointer type and `func` must be a +/// pointer to a Wasm function of that signature. +/// +/// After calling this function, you may not mess with the vmctx or any other +/// Wasm state until after you've called the trampoline returned by this +/// function. +#[inline] +pub unsafe fn prepare_host_to_wasm_trampoline(vmctx: *mut VMContext, func: T) -> T { + assert_eq!(mem::size_of::(), mem::size_of::()); + + // Save the callee in the `vmctx`. The trampoline will read this function + // pointer and tail call to it. + (*vmctx) + .instance_mut() + .set_callee(Some(mem::transmute_copy(&func))); + + // Give callers the trampoline, transmuted into their desired function + // signature (the trampoline is variadic and works with all signatures). + mem::transmute_copy(&(host_to_wasm_trampoline as usize)) +} + +extern "C" { + fn host_to_wasm_trampoline(); + pub(crate) fn wasm_to_host_trampoline(); +} + +cfg_if::cfg_if! { + if #[cfg(target_arch = "x86_64")] { + #[macro_use] + mod x86_64; + } else if #[cfg(target_arch = "aarch64")] { + #[macro_use] + mod aarch64; + } else if #[cfg(target_arch = "s390x")] { + #[macro_use] + mod s390x; + } else { + compile_error!("unsupported architecture"); + } +} diff --git a/crates/runtime/src/trampolines/aarch64.rs b/crates/runtime/src/trampolines/aarch64.rs new file mode 100644 index 000000000000..757811d5d3b5 --- /dev/null +++ b/crates/runtime/src/trampolines/aarch64.rs @@ -0,0 +1,121 @@ +use wasmtime_asm_macros::asm_func; + +#[rustfmt::skip] +asm_func!( + "host_to_wasm_trampoline", + r#" + .cfi_startproc + bti c + + // Load the pointer to `VMRuntimeLimits` in `x9`. + ldur x9, [x1, #8] + + // Check to see if callee is a core `VMContext` (MAGIC == "core"). NB: + // we do not support big-endian aarch64 so the magic value is always + // little-endian encoded. + ldur w10, [x0] + mov w11, #0x6f63 + movk w11, #0x6572, lsl #16 + cmp w10, w11 + + // Store the last Wasm SP into the `last_wasm_entry_sp` in the limits, if + // this was core Wasm, otherwise store an invalid sentinal value. + mov x12, #-1 + mov x13, sp + csel x12, x13, x12, eq + stur x12, [x9, #40] + + // Tail call to the callee function pointer in the vmctx. + ldur x16, [x1, #16] + br x16 + + .cfi_endproc + "# +); + +#[cfg(test)] +mod host_to_wasm_trampoline_offsets_tests { + use wasmtime_environ::{Module, VMOffsets}; + + #[test] + fn test() { + let module = Module::new(); + let offsets = VMOffsets::new(std::mem::size_of::<*mut u8>() as u8, &module); + + assert_eq!(8, offsets.vmctx_runtime_limits()); + assert_eq!(40, offsets.vmruntime_limits_last_wasm_entry_sp()); + assert_eq!(16, offsets.vmctx_callee()); + assert_eq!(0x65726f63, u32::from_le_bytes(*b"core")); + } +} + +asm_func!( + "wasm_to_host_trampoline", + " + .cfi_startproc + bti c + + // Load the pointer to `VMRuntimeLimits` in `x9`. + ldur x9, [x1, #8] + + // Store the last Wasm FP into the `last_wasm_exit_fp` in the limits. + stur fp, [x9, #24] + + // Store the last Wasm PC into the `last_wasm_exit_pc` in the limits. + stur lr, [x9, #32] + + // Tail call to the actual host function. + // + // This *must* be a tail call so that we do not push to the stack and mess + // up the offsets of stack arguments (if any). + ldur x16, [x0, #8] + br x16 + + .cfi_endproc + ", +); + +#[cfg(test)] +mod wasm_to_host_trampoline_offsets_tests { + use crate::VMHostFuncContext; + use memoffset::offset_of; + use wasmtime_environ::{Module, VMOffsets}; + + #[test] + fn test() { + let module = Module::new(); + let offsets = VMOffsets::new(std::mem::size_of::<*mut u8>() as u8, &module); + + assert_eq!(8, offsets.vmctx_runtime_limits()); + assert_eq!(24, offsets.vmruntime_limits_last_wasm_exit_fp()); + assert_eq!(32, offsets.vmruntime_limits_last_wasm_exit_pc()); + assert_eq!(8, offset_of!(VMHostFuncContext, host_func)); + } +} + +#[rustfmt::skip] +macro_rules! wasm_to_libcall_trampoline { + ($libcall:ident ; $libcall_impl:ident) => { + wasmtime_asm_macros::asm_func!( + stringify!($libcall), + " + .cfi_startproc + bti c + + // Load the pointer to `VMRuntimeLimits` in `x9`. + ldur x9, [x0, #8] + + // Store the last Wasm FP into the `last_wasm_exit_fp` in the limits. + stur fp, [x9, #24] + + // Store the last Wasm PC into the `last_wasm_exit_pc` in the limits. + stur lr, [x9, #32] + + // Tail call to the actual implementation of this libcall. + b ", wasmtime_asm_macros::asm_sym!(stringify!($libcall_impl)), " + + .cfi_endproc + " + ); + }; +} diff --git a/crates/runtime/src/trampolines/s390x.S b/crates/runtime/src/trampolines/s390x.S new file mode 100644 index 000000000000..0a4f11a774ae --- /dev/null +++ b/crates/runtime/src/trampolines/s390x.S @@ -0,0 +1,109 @@ +// Currently `global_asm!` isn't stable on s390x, so this is an external +// assembler file built with the `build.rs`. + + .machine z13 + .text + + .hidden host_to_wasm_trampoline + .globl host_to_wasm_trampoline + .type host_to_wasm_trampoline,@function + .p2align 2 + +host_to_wasm_trampoline: + .cfi_startproc + + // Load the pointer to `VMRuntimeLimits` in `%r1`. + lg %r1, 8(%r3) + + // Check to see if callee is a core `VMContext` (MAGIC == "core"). + l %r0, 0(%r2) + cfi %r0, 0x65726f63 + + // Store the last Wasm SP into the `last_wasm_entry_sp` in the limits, if + // this was core Wasm, otherwise store an invalid sentinal value. + lgr %r0, %r15 + locghine %r0, -1 + stg %r0, 40(%r1) + + // Tail call to the callee function pointer in the vmctx. + lg %r1, 16(%r3) + br %r1 + + .cfi_endproc + .size host_to_wasm_trampoline,.-host_to_wasm_trampoline + + .hidden wasm_to_host_trampoline + .globl wasm_to_host_trampoline + .type wasm_to_host_trampoline,@function + .p2align 2 + +wasm_to_host_trampoline: + .cfi_startproc + + // Load the pointer to `VMRuntimeLimits` in `%r1`. + lg %r1, 8(%r3) + + // Store the last Wasm FP into the `last_wasm_exit_fp` in the limits. + lg %r0, 0(%r15) + stg %r0, 24(%r1) + + // Store the last Wasm PC into the `last_wasm_exit_pc` in the limits. + stg %r14, 32(%r1) + + // Tail call to the actual host function. + // + // This *must* be a tail call so that we do not push to the stack and mess + // up the offsets of stack arguments (if any). + lg %r1, 8(%r2) + br %r1 + + .cfi_endproc + .size wasm_to_host_trampoline,.-wasm_to_host_trampoline + +#define LIBCALL_TRAMPOLINE(libcall, libcall_impl) \ + .hidden libcall ; \ + .globl libcall ; \ + .type libcall,@function ; \ + .p2align 2 ; \ +libcall: ; \ + .cfi_startproc ; \ + \ + /* Load the pointer to `VMRuntimeLimits` in `%r1`. */ \ + lg %r1, 8(%r2) ; \ + \ + /* Store the last Wasm FP into the `last_wasm_exit_fp` in the limits. */ \ + lg %r0, 0(%r15) ; \ + stg %r0, 24(%r1) ; \ + \ + /* Store the last Wasm PC into the `last_wasm_exit_pc` in the limits. */ \ + stg %r14, 32(%r1) ; \ + \ + /* Tail call to the actual implementation of this libcall. */ \ + jg libcall_impl ; \ + \ + .cfi_endproc ; \ + .size libcall,.-libcall + +LIBCALL_TRAMPOLINE(memory32_grow, impl_memory32_grow) +LIBCALL_TRAMPOLINE(table_grow_funcref, impl_table_grow_funcref) +LIBCALL_TRAMPOLINE(table_grow_externref, impl_table_grow_externref) +LIBCALL_TRAMPOLINE(table_fill_funcref, impl_table_fill_funcref) +LIBCALL_TRAMPOLINE(table_fill_externref, impl_table_fill_externref) +LIBCALL_TRAMPOLINE(table_copy, impl_table_copy) +LIBCALL_TRAMPOLINE(table_init, impl_table_init) +LIBCALL_TRAMPOLINE(elem_drop, impl_elem_drop) +LIBCALL_TRAMPOLINE(memory_copy, impl_memory_copy) +LIBCALL_TRAMPOLINE(memory_fill, impl_memory_fill) +LIBCALL_TRAMPOLINE(memory_init, impl_memory_init) +LIBCALL_TRAMPOLINE(ref_func, impl_ref_func) +LIBCALL_TRAMPOLINE(data_drop, impl_data_drop) +LIBCALL_TRAMPOLINE(table_get_lazy_init_funcref, impl_table_get_lazy_init_funcref) +LIBCALL_TRAMPOLINE(drop_externref, impl_drop_externref) +LIBCALL_TRAMPOLINE(activations_table_insert_with_gc, impl_activations_table_insert_with_gc) +LIBCALL_TRAMPOLINE(externref_global_get, impl_externref_global_get) +LIBCALL_TRAMPOLINE(externref_global_set, impl_externref_global_set) +LIBCALL_TRAMPOLINE(memory_atomic_notify, impl_memory_atomic_notify) +LIBCALL_TRAMPOLINE(memory_atomic_wait32, impl_memory_atomic_wait32) +LIBCALL_TRAMPOLINE(memory_atomic_wait64, impl_memory_atomic_wait64) +LIBCALL_TRAMPOLINE(out_of_gas, impl_out_of_gas) +LIBCALL_TRAMPOLINE(new_epoch, impl_new_epoch) diff --git a/crates/runtime/src/trampolines/s390x.rs b/crates/runtime/src/trampolines/s390x.rs new file mode 100644 index 000000000000..95ecb72bc3ec --- /dev/null +++ b/crates/runtime/src/trampolines/s390x.rs @@ -0,0 +1,48 @@ +// The host_to_wasm_trampoline implementation is in the s390x.S +// file, but we still want to have this unit test here. + +#[cfg(test)] +mod host_to_wasm_trampoline_offsets_tests { + use wasmtime_environ::{Module, VMOffsets}; + + #[test] + fn test() { + let module = Module::new(); + let offsets = VMOffsets::new(std::mem::size_of::<*mut u8>() as u8, &module); + + assert_eq!(8, offsets.vmctx_runtime_limits()); + assert_eq!(40, offsets.vmruntime_limits_last_wasm_entry_sp()); + assert_eq!(16, offsets.vmctx_callee()); + assert_eq!(0x65726f63, u32::from_le_bytes(*b"core")); + } +} + +// The wasm_to_host_trampoline implementation is in the s390x.S +// file, but we still want to have this unit test here. + +#[cfg(test)] +mod wasm_to_host_trampoline_offsets_tests { + use crate::VMHostFuncContext; + use memoffset::offset_of; + use wasmtime_environ::{Module, VMOffsets}; + + #[test] + fn test() { + let module = Module::new(); + let offsets = VMOffsets::new(std::mem::size_of::<*mut u8>() as u8, &module); + + assert_eq!(8, offsets.vmctx_runtime_limits()); + assert_eq!(24, offsets.vmruntime_limits_last_wasm_exit_fp()); + assert_eq!(32, offsets.vmruntime_limits_last_wasm_exit_pc()); + assert_eq!(8, offset_of!(VMHostFuncContext, host_func)); + } +} + +// The implementation for libcall trampolines is in the s390x.S +// file. We provide this dummy definition of wasm_to_libcall_trampoline +// here to make libcalls.rs compile on s390x. Note that this means we +// have to duplicate the list of libcalls used in the assembler file. + +macro_rules! wasm_to_libcall_trampoline { + ($libcall:ident ; $libcall_impl:ident) => {}; +} diff --git a/crates/runtime/src/trampolines/x86_64.rs b/crates/runtime/src/trampolines/x86_64.rs new file mode 100644 index 000000000000..03580fda6707 --- /dev/null +++ b/crates/runtime/src/trampolines/x86_64.rs @@ -0,0 +1,135 @@ +use wasmtime_asm_macros::asm_func; + +// Helper macros for getting the first and second arguments according to the +// system calling convention, as well as some callee-saved scratch registers we +// can safely use in the trampolines. +cfg_if::cfg_if! { + if #[cfg(windows)] { + macro_rules! arg0 { () => ("rcx") } + macro_rules! arg1 { () => ("rdx") } + macro_rules! scratch0 { () => ("r10") } + macro_rules! scratch1 { () => ("r11") } + } else if #[cfg(unix)] { + macro_rules! arg0 { () => ("rdi") } + macro_rules! arg1 { () => ("rsi") } + macro_rules! scratch0 { () => ("r10") } + macro_rules! scratch1 { () => ("r11") } + } else { + compile_error!("platform not supported"); + } +} + +#[rustfmt::skip] +asm_func!( + "host_to_wasm_trampoline", + " + .cfi_startproc simple + .cfi_def_cfa_offset 0 + + // Load the pointer to `VMRuntimeLimits` in `scratch0`. + mov ", scratch0!(), ", 8[", arg1!(), "] + + // Check to see if this is a core `VMContext` (MAGIC == 'core'). + cmp DWORD PTR [", arg0!(), "], 0x65726f63 + + // Store the last Wasm SP into the `last_wasm_entry_sp` in the limits, if this + // was core Wasm, otherwise store an invalid sentinal value. + mov ", scratch1!(), ", -1 + cmove ", scratch1!(), ", rsp + mov 40[", scratch0!(), "], ", scratch1!(), " + + // Tail call to the callee function pointer in the vmctx. + jmp 16[", arg1!(), "] + + .cfi_endproc + ", +); + +#[cfg(test)] +mod host_to_wasm_trampoline_offsets_tests { + use wasmtime_environ::{Module, VMOffsets}; + + #[test] + fn test() { + let module = Module::new(); + let offsets = VMOffsets::new(std::mem::size_of::<*mut u8>() as u8, &module); + + assert_eq!(8, offsets.vmctx_runtime_limits()); + assert_eq!(40, offsets.vmruntime_limits_last_wasm_entry_sp()); + assert_eq!(16, offsets.vmctx_callee()); + assert_eq!(0x65726f63, u32::from_le_bytes(*b"core")); + } +} + +#[rustfmt::skip] +asm_func!( + "wasm_to_host_trampoline", + " + .cfi_startproc simple + .cfi_def_cfa_offset 0 + + // Load the pointer to `VMRuntimeLimits` in `scratch0`. + mov ", scratch0!(), ", 8[", arg1!(), "] + + // Store the last Wasm FP into the `last_wasm_exit_fp` in the limits. + mov 24[", scratch0!(), "], rbp + + // Store the last Wasm PC into the `last_wasm_exit_pc` in the limits. + mov ", scratch1!(), ", [rsp] + mov 32[", scratch0!(), "], ", scratch1!(), " + + // Tail call to the actual host function. + // + // This *must* be a tail call so that we do not push to the stack and mess + // up the offsets of stack arguments (if any). + jmp 8[", arg0!(), "] + + .cfi_endproc + ", +); + +#[cfg(test)] +mod wasm_to_host_trampoline_offsets_tests { + use crate::VMHostFuncContext; + use memoffset::offset_of; + use wasmtime_environ::{Module, VMOffsets}; + + #[test] + fn test() { + let module = Module::new(); + let offsets = VMOffsets::new(std::mem::size_of::<*mut u8>() as u8, &module); + + assert_eq!(8, offsets.vmctx_runtime_limits()); + assert_eq!(24, offsets.vmruntime_limits_last_wasm_exit_fp()); + assert_eq!(32, offsets.vmruntime_limits_last_wasm_exit_pc()); + assert_eq!(8, offset_of!(VMHostFuncContext, host_func)); + } +} + +#[rustfmt::skip] +macro_rules! wasm_to_libcall_trampoline { + ($libcall:ident ; $libcall_impl:ident) => { + wasmtime_asm_macros::asm_func!( + stringify!($libcall), + " + .cfi_startproc simple + .cfi_def_cfa_offset 0 + + // Load the pointer to `VMRuntimeLimits` in `", scratch0!(), "`. + mov ", scratch0!(), ", 8[", arg0!(), "] + + // Store the last Wasm FP into the `last_wasm_exit_fp` in the limits. + mov 24[", scratch0!(), "], rbp + + // Store the last Wasm PC into the `last_wasm_exit_pc` in the limits. + mov ", scratch1!(), ", [rsp] + mov 32[", scratch0!(), "], ", scratch1!(), " + + // Tail call to the actual implementation of this libcall. + jmp ", wasmtime_asm_macros::asm_sym!(stringify!($libcall_impl)), " + + .cfi_endproc + ", + ); + }; +} diff --git a/crates/runtime/src/traphandlers.rs b/crates/runtime/src/traphandlers.rs index 244f9ffb92ee..84775b15e4d2 100644 --- a/crates/runtime/src/traphandlers.rs +++ b/crates/runtime/src/traphandlers.rs @@ -1,17 +1,19 @@ //! WebAssembly trap handling, which is built on top of the lower-level //! signalhandling mechanisms. -use crate::VMContext; +mod backtrace; + +use crate::{VMContext, VMRuntimeLimits}; use anyhow::Error; use std::any::Any; use std::cell::{Cell, UnsafeCell}; -use std::mem::MaybeUninit; +use std::mem::{self, MaybeUninit}; use std::ptr; use std::sync::Once; use wasmtime_environ::TrapCode; +pub use self::backtrace::Backtrace; pub use self::tls::{tls_eager_initialize, TlsRestore}; -pub use backtrace::Backtrace; #[link(name = "wasmtime-helpers")] extern "C" { @@ -68,6 +70,20 @@ pub fn init_traps(is_wasm_pc: fn(usize) -> bool) { }); } +/// Raises a trap immediately. +/// +/// This function performs as-if a wasm trap was just executed. This trap +/// payload is then returned from `catch_traps` below. +/// +/// # Safety +/// +/// Only safe to call when wasm code is on the stack, aka `catch_traps` must +/// have been previously called. Additionally no Rust destructors can be on the +/// stack. They will be skipped and not executed. +pub unsafe fn raise_trap(reason: TrapReason) -> ! { + tls::with(|info| info.unwrap().unwind_with(UnwindReason::Trap(reason))) +} + /// Raises a user-defined trap immediately. /// /// This function performs as-if a wasm trap was just executed, only the trap @@ -80,8 +96,7 @@ pub fn init_traps(is_wasm_pc: fn(usize) -> bool) { /// have been previously called. Additionally no Rust destructors can be on the /// stack. They will be skipped and not executed. pub unsafe fn raise_user_trap(data: Error) -> ! { - let trap = TrapReason::User(data); - tls::with(|info| info.unwrap().unwind_with(UnwindReason::Trap(trap))) + raise_trap(TrapReason::User(data)) } /// Raises a trap from inside library code immediately. @@ -95,8 +110,7 @@ pub unsafe fn raise_user_trap(data: Error) -> ! { /// have been previously called. Additionally no Rust destructors can be on the /// stack. They will be skipped and not executed. pub unsafe fn raise_lib_trap(trap: TrapCode) -> ! { - let trap = TrapReason::Wasm(trap); - tls::with(|info| info.unwrap().unwind_with(UnwindReason::Trap(trap))) + raise_trap(TrapReason::Wasm(trap)) } /// Carries a Rust panic across wasm code and resumes the panic on the other @@ -134,6 +148,25 @@ pub enum TrapReason { Wasm(TrapCode), } +impl TrapReason { + /// Is this a JIT trap? + pub fn is_jit(&self) -> bool { + matches!(self, TrapReason::Jit(_)) + } +} + +impl From for TrapReason { + fn from(err: Error) -> Self { + TrapReason::User(err) + } +} + +impl From for TrapReason { + fn from(code: TrapCode) -> Self { + TrapReason::Wasm(code) + } +} + /// Catches any wasm traps that happen within the execution of `closure`, /// returning them as a `Result`. /// @@ -141,26 +174,50 @@ pub enum TrapReason { pub unsafe fn catch_traps<'a, F>( signal_handler: Option<*const SignalHandler<'static>>, capture_backtrace: bool, - callee: *mut VMContext, + caller: *mut VMContext, mut closure: F, ) -> Result<(), Box> where F: FnMut(*mut VMContext), { - return CallThreadState::new(signal_handler, capture_backtrace).with(|cx| { + let limits = (*caller).instance().runtime_limits(); + + let old_last_wasm_exit_fp = mem::replace(&mut *(**limits).last_wasm_exit_fp.get(), 0); + let old_last_wasm_exit_pc = mem::replace(&mut *(**limits).last_wasm_exit_pc.get(), 0); + let old_last_wasm_entry_sp = mem::replace(&mut *(**limits).last_wasm_entry_sp.get(), 0); + + let result = CallThreadState::new( + signal_handler, + capture_backtrace, + old_last_wasm_exit_fp, + old_last_wasm_exit_pc, + old_last_wasm_entry_sp, + *limits, + ) + .with(|cx| { wasmtime_setjmp( cx.jmp_buf.as_ptr(), call_closure::, &mut closure as *mut F as *mut u8, - callee, + caller, ) }); - extern "C" fn call_closure(payload: *mut u8, callee: *mut VMContext) + *(**limits).last_wasm_exit_fp.get() = old_last_wasm_exit_fp; + *(**limits).last_wasm_exit_pc.get() = old_last_wasm_exit_pc; + *(**limits).last_wasm_entry_sp.get() = old_last_wasm_entry_sp; + + return match result { + Ok(x) => Ok(x), + Err((UnwindReason::Trap(reason), backtrace)) => Err(Box::new(Trap { reason, backtrace })), + Err((UnwindReason::Panic(panic), _)) => std::panic::resume_unwind(panic), + }; + + extern "C" fn call_closure(payload: *mut u8, caller: *mut VMContext) where F: FnMut(*mut VMContext), { - unsafe { (*(payload as *mut F))(callee) } + unsafe { (*(payload as *mut F))(caller) } } } @@ -173,6 +230,10 @@ pub struct CallThreadState { signal_handler: Option<*const SignalHandler<'static>>, prev: Cell, capture_backtrace: bool, + pub(crate) old_last_wasm_exit_fp: usize, + pub(crate) old_last_wasm_exit_pc: usize, + pub(crate) old_last_wasm_entry_sp: usize, + pub(crate) limits: *const VMRuntimeLimits, } enum UnwindReason { @@ -185,6 +246,10 @@ impl CallThreadState { fn new( signal_handler: Option<*const SignalHandler<'static>>, capture_backtrace: bool, + old_last_wasm_exit_fp: usize, + old_last_wasm_exit_pc: usize, + old_last_wasm_entry_sp: usize, + limits: *const VMRuntimeLimits, ) -> CallThreadState { CallThreadState { unwind: UnsafeCell::new(MaybeUninit::uninit()), @@ -193,34 +258,32 @@ impl CallThreadState { signal_handler, prev: Cell::new(ptr::null()), capture_backtrace, + old_last_wasm_exit_fp, + old_last_wasm_exit_pc, + old_last_wasm_entry_sp, + limits, } } - fn with(self, closure: impl FnOnce(&CallThreadState) -> i32) -> Result<(), Box> { + fn with( + self, + closure: impl FnOnce(&CallThreadState) -> i32, + ) -> Result<(), (UnwindReason, Option)> { let ret = tls::set(&self, || closure(&self)); if ret != 0 { Ok(()) } else { - Err(unsafe { self.read_trap() }) + Err(unsafe { self.read_unwind() }) } } #[cold] - unsafe fn read_trap(&self) -> Box { - let (unwind_reason, backtrace) = (*self.unwind.get()).as_ptr().read(); - let reason = match unwind_reason { - UnwindReason::Trap(trap) => trap, - UnwindReason::Panic(panic) => std::panic::resume_unwind(panic), - }; - Box::new(Trap { reason, backtrace }) + unsafe fn read_unwind(&self) -> (UnwindReason, Option) { + (*self.unwind.get()).as_ptr().read() } fn unwind_with(&self, reason: UnwindReason) -> ! { - let backtrace = if self.capture_backtrace { - Some(Backtrace::new_unresolved()) - } else { - None - }; + let backtrace = self.capture_backtrace(None); unsafe { (*self.unwind.get()).as_mut_ptr().write((reason, backtrace)); wasmtime_longjmp(self.jmp_buf.get()); @@ -282,18 +345,30 @@ impl CallThreadState { self.jmp_buf.get() } - fn capture_backtrace(&self, pc: *const u8) { - let backtrace = if self.capture_backtrace { - Some(Backtrace::new_unresolved()) - } else { - None - }; - let trap = TrapReason::Jit(pc as usize); + fn set_jit_trap(&self, pc: *const u8, fp: usize) { + let backtrace = self.capture_backtrace(Some((pc as usize, fp))); unsafe { (*self.unwind.get()) .as_mut_ptr() - .write((UnwindReason::Trap(trap), backtrace)); + .write((UnwindReason::Trap(TrapReason::Jit(pc as usize)), backtrace)); + } + } + + fn capture_backtrace(&self, pc_and_fp: Option<(usize, usize)>) -> Option { + if !self.capture_backtrace { + return None; } + + Some(unsafe { Backtrace::new_with_trap_state(self, pc_and_fp) }) + } + + pub(crate) fn iter<'a>(&'a self) -> impl Iterator + 'a { + let mut state = Some(self); + std::iter::from_fn(move || { + let this = state?; + state = unsafe { this.prev.get().as_ref() }; + Some(this) + }) } } diff --git a/crates/runtime/src/traphandlers/backtrace.rs b/crates/runtime/src/traphandlers/backtrace.rs new file mode 100644 index 000000000000..5dba1a93095c --- /dev/null +++ b/crates/runtime/src/traphandlers/backtrace.rs @@ -0,0 +1,259 @@ +//! Backtrace and stack walking functionality for Wasm. +//! +//! Walking the Wasm stack is comprised of +//! +//! 1. identifying sequences of contiguous Wasm frames on the stack +//! (i.e. skipping over native host frames), and +//! +//! 2. walking the Wasm frames within such a sequence. +//! +//! To perform (1) we maintain the entry stack pointer (SP) and exit frame +//! pointer (FP) and program counter (PC) each time we call into Wasm and Wasm +//! calls into the host via trampolines (see +//! `crates/runtime/src/trampolines`). The most recent entry is stored in +//! `VMRuntimeLimits` and older entries are saved in `CallThreadState`. This +//! lets us identify ranges of contiguous Wasm frames on the stack. +//! +//! To solve (2) and walk the Wasm frames within a region of contiguous Wasm +//! frames on the stack, we configure Cranelift's `preserve_frame_pointers = +//! true` setting. Then we can do simple frame pointer traversal starting at the +//! exit FP and stopping once we reach the entry SP (meaning that the next older +//! frame is a host frame). + +use crate::traphandlers::{tls, CallThreadState}; +use cfg_if::cfg_if; +use std::ops::ControlFlow; + +// Architecture-specific bits for stack walking. Each of these modules should +// define and export the following functions: +// +// * `unsafe fn get_next_older_pc_from_fp(fp: usize) -> usize` +// * `unsafe fn get_next_older_fp_from_fp(fp: usize) -> usize` +// * `fn reached_entry_sp(fp: usize, first_wasm_sp: usize) -> bool` +// * `fn assert_entry_sp_is_aligned(sp: usize)` +// * `fn assert_fp_is_aligned(fp: usize)` +cfg_if! { + if #[cfg(target_arch = "x86_64")] { + mod x86_64; + use x86_64 as arch; + } else if #[cfg(target_arch = "aarch64")] { + mod aarch64; + use aarch64 as arch; + } else if #[cfg(target_arch = "s390x")] { + mod s390x; + use s390x as arch; + } else { + compile_error!("unsupported architecture"); + } +} + +/// A WebAssembly stack trace. +#[derive(Debug)] +pub struct Backtrace(Vec); + +/// A stack frame within a Wasm stack trace. +#[derive(Debug)] +pub struct Frame { + pc: usize, + fp: usize, +} + +impl Frame { + /// Get this frame's program counter. + pub fn pc(&self) -> usize { + self.pc + } + + /// Get this frame's frame pointer. + pub fn fp(&self) -> usize { + self.fp + } +} + +impl Backtrace { + /// Capture the current Wasm stack in a backtrace. + pub fn new() -> Backtrace { + tls::with(|state| match state { + Some(state) => unsafe { Self::new_with_trap_state(state, None) }, + None => Backtrace(vec![]), + }) + } + + /// Capture the current Wasm stack trace. + /// + /// If Wasm hit a trap, and we calling this from the trap handler, then the + /// Wasm exit trampoline didn't run, and we use the provided PC and FP + /// instead of looking them up in `VMRuntimeLimits`. + pub(crate) unsafe fn new_with_trap_state( + state: &CallThreadState, + trap_pc_and_fp: Option<(usize, usize)>, + ) -> Backtrace { + let mut frames = vec![]; + Self::trace_with_trap_state(state, trap_pc_and_fp, |frame| { + frames.push(frame); + ControlFlow::Continue(()) + }); + Backtrace(frames) + } + + /// Walk the current Wasm stack, calling `f` for each frame we walk. + pub fn trace(f: impl FnMut(Frame) -> ControlFlow<()>) { + tls::with(|state| match state { + Some(state) => unsafe { Self::trace_with_trap_state(state, None, f) }, + None => {} + }); + } + + /// Walk the current Wasm stack, calling `f` for each frame we walk. + /// + /// If Wasm hit a trap, and we calling this from the trap handler, then the + /// Wasm exit trampoline didn't run, and we use the provided PC and FP + /// instead of looking them up in `VMRuntimeLimits`. + pub(crate) unsafe fn trace_with_trap_state( + state: &CallThreadState, + trap_pc_and_fp: Option<(usize, usize)>, + mut f: impl FnMut(Frame) -> ControlFlow<()>, + ) { + let (last_wasm_exit_pc, last_wasm_exit_fp) = match trap_pc_and_fp { + // If we exited Wasm by catching a trap, then the Wasm-to-host + // trampoline did not get a chance to save the last Wasm PC and FP, + // and we need to use the plumbed-through values instead. + Some((pc, fp)) => (pc, fp), + // Either there is no Wasm currently on the stack, or we exited Wasm + // through the Wasm-to-host trampoline. + None => { + let pc = *(*state.limits).last_wasm_exit_pc.get(); + let fp = *(*state.limits).last_wasm_exit_fp.get(); + assert_ne!(pc, 0); + (pc, fp) + } + }; + + // Trace through the first contiguous sequence of Wasm frames on the + // stack. + if let ControlFlow::Break(()) = Self::trace_through_wasm( + last_wasm_exit_pc, + last_wasm_exit_fp, + *(*state.limits).last_wasm_entry_sp.get(), + &mut f, + ) { + return; + } + + // And then trace through each of the older contiguous sequences of Wasm + // frames on the stack. + for state in state.iter() { + // If there is no previous call state, then there is nothing more to + // trace through (since each `CallTheadState` saves the *previous* + // call into Wasm's saved registers, and the youngest call into + // Wasm's registers are saved in the `VMRuntimeLimits`) + if state.prev.get().is_null() { + debug_assert_eq!(state.old_last_wasm_exit_pc, 0); + debug_assert_eq!(state.old_last_wasm_exit_fp, 0); + debug_assert_eq!(state.old_last_wasm_entry_sp, 0); + return; + } + + if let ControlFlow::Break(()) = Self::trace_through_wasm( + state.old_last_wasm_exit_pc, + state.old_last_wasm_exit_fp, + state.old_last_wasm_entry_sp, + &mut f, + ) { + return; + } + } + } + + /// Walk through a contiguous sequence of Wasm frames starting with the + /// frame at the given PC and FP and ending at `first_wasm_sp`. + unsafe fn trace_through_wasm( + mut pc: usize, + mut fp: usize, + first_wasm_sp: usize, + mut f: impl FnMut(Frame) -> ControlFlow<()>, + ) -> ControlFlow<()> { + log::trace!("=== Tracing through contiguous sequence of Wasm frames ==="); + log::trace!("first_wasm_sp = 0x{:016x}", first_wasm_sp); + log::trace!(" initial pc = 0x{:016x}", pc); + log::trace!(" initial fp = 0x{:016x}", fp); + + // In our host-to-Wasm trampoline, we save `-1` as a sentinal SP + // value for when the callee is not actually a core Wasm + // function (as determined by looking at the callee `vmctx`). If + // we encounter `-1`, this is an empty sequence of Wasm frames + // where a host called a host function so the following + // happened: + // + // * We entered the host-to-wasm-trampoline, saved (an invalid + // sentinal for) entry SP, and tail called to the "Wasm" + // callee, + // + // * entered the Wasm-to-host trampoline, saved the exit FP and + // PC, and tail called to the host callee, + // + // * and are now in host code. + // + // Ultimately, this means that there are 0 Wasm frames in this + // contiguous sequence of Wasm frames, and we have nothing to + // walk through here. + if first_wasm_sp == -1_isize as usize { + log::trace!("Empty sequence of Wasm frames"); + return ControlFlow::Continue(()); + } + + // We use `0` as a sentinal value for when there is not any Wasm + // on the stack and these values are non-existant. If we + // actually entered Wasm (see above guard for `-1`) then, then + // by the time we got here we should have either exited Wasm + // through the Wasm-to-host trampoline and properly set these + // values, or we should have caught a trap in a signal handler + // and also properly recovered these values in that case. + assert_ne!(pc, 0); + assert_ne!(fp, 0); + assert_ne!(first_wasm_sp, 0); + + // The stack grows down, and therefore any frame pointer we are + // dealing with should be less than the stack pointer on entry + // to Wasm. + assert!(first_wasm_sp >= fp, "{first_wasm_sp:#x} >= {fp:#x}"); + + arch::assert_entry_sp_is_aligned(first_wasm_sp); + + loop { + arch::assert_fp_is_aligned(fp); + + log::trace!("--- Tracing through one Wasm frame ---"); + log::trace!("pc = 0x{:016x}", pc); + log::trace!("fp = 0x{:016x}", fp); + + f(Frame { pc, fp })?; + + // If our FP has reached the SP upon entry to Wasm from the + // host, then we've successfully walked all the Wasm frames, + // and have now reached a host frame. We're done iterating + // through this contiguous sequence of Wasm frames. + if arch::reached_entry_sp(fp, first_wasm_sp) { + return ControlFlow::Continue(()); + } + + // If we didn't return above, then we know we are still in a + // Wasm frame, and since Cranelift maintains frame pointers, + // we know that the FP isn't an arbitrary value and it is + // safe to dereference it to read the next PC/FP. + + pc = arch::get_next_older_pc_from_fp(fp); + + let next_older_fp = arch::get_next_older_fp_from_fp(fp); + // Because the stack always grows down, the older FP must be greater + // than the current FP. + assert!(next_older_fp > fp, "{next_older_fp:#x} > {fp:#x}"); + fp = next_older_fp; + } + } + + /// Iterate over the frames inside this backtrace. + pub fn frames<'a>(&'a self) -> impl Iterator + 'a { + self.0.iter() + } +} diff --git a/crates/runtime/src/traphandlers/backtrace/aarch64.rs b/crates/runtime/src/traphandlers/backtrace/aarch64.rs new file mode 100644 index 000000000000..4233514af6ab --- /dev/null +++ b/crates/runtime/src/traphandlers/backtrace/aarch64.rs @@ -0,0 +1,39 @@ +// The aarch64 calling conventions save the return PC one i64 above the FP and +// the previous FP is pointed to by the current FP: +// +// > Each frame shall link to the frame of its caller by means of a frame record +// > of two 64-bit values on the stack [...] The frame record for the innermost +// > frame [...] shall be pointed to by the frame pointer register (FP). The +// > lowest addressed double-word shall point to the previous frame record and the +// > highest addressed double-word shall contain the value passed in LR on entry +// > to the current function. +// +// - AAPCS64 section 6.2.3 The Frame Pointer[0] +pub unsafe fn get_next_older_pc_from_fp(fp: usize) -> usize { + *(fp as *mut usize).offset(1) +} +pub unsafe fn get_next_older_fp_from_fp(fp: usize) -> usize { + *(fp as *mut usize) +} + +pub fn reached_entry_sp(fp: usize, first_wasm_sp: usize) -> bool { + // Calls in aarch64 push two i64s (old FP and return PC) so our entry SP is + // two i64s above the first Wasm FP. + fp == first_wasm_sp - 16 +} + +pub fn assert_entry_sp_is_aligned(sp: usize) { + assert_eq!(sp % 16, 0, "stack should always be aligned to 16"); +} + +pub fn assert_fp_is_aligned(_fp: usize) { + // From AAPCS64, section 6.2.3 The Frame Pointer[0]: + // + // > The location of the frame record within a stack frame is not specified. + // + // So this presumably means that the FP can have any alignment, as its + // location is not specified and nothing further is said about constraining + // alignment. + // + // [0]: https://github.com/ARM-software/abi-aa/blob/2022Q1/aapcs64/aapcs64.rst#the-frame-pointer +} diff --git a/crates/runtime/src/traphandlers/backtrace/s390x.rs b/crates/runtime/src/traphandlers/backtrace/s390x.rs new file mode 100644 index 000000000000..6f041c34604c --- /dev/null +++ b/crates/runtime/src/traphandlers/backtrace/s390x.rs @@ -0,0 +1,27 @@ +pub unsafe fn get_next_older_pc_from_fp(fp: usize) -> usize { + // The next older PC can be found in register %r14 at function entry, which + // was saved into slot 14 of the register save area pointed to by "FP" (the + // backchain pointer). + *(fp as *mut usize).offset(14) +} + +pub unsafe fn get_next_older_fp_from_fp(fp: usize) -> usize { + // The next older "FP" (backchain pointer) was saved in the slot pointed to + // by the current "FP". + *(fp as *mut usize) +} + +pub fn reached_entry_sp(fp: usize, first_wasm_sp: usize) -> bool { + // The "FP" (backchain pointer) holds the value of the stack pointer at + // function entry. If this equals the value the stack pointer had when we + // first entered a Wasm function, we are done. + fp == first_wasm_sp +} + +pub fn assert_entry_sp_is_aligned(sp: usize) { + assert_eq!(sp % 8, 0, "stack should always be aligned to 8"); +} + +pub fn assert_fp_is_aligned(fp: usize) { + assert_eq!(fp % 8, 0, "stack should always be aligned to 8"); +} diff --git a/crates/runtime/src/traphandlers/backtrace/x86_64.rs b/crates/runtime/src/traphandlers/backtrace/x86_64.rs new file mode 100644 index 000000000000..51a4948c7d66 --- /dev/null +++ b/crates/runtime/src/traphandlers/backtrace/x86_64.rs @@ -0,0 +1,33 @@ +pub unsafe fn get_next_older_pc_from_fp(fp: usize) -> usize { + // The calling convention always pushes the return pointer (aka the PC of + // the next older frame) just before this frame. + *(fp as *mut usize).offset(1) +} + +pub unsafe fn get_next_older_fp_from_fp(fp: usize) -> usize { + // And the current frame pointer points to the next older frame pointer. + *(fp as *mut usize) +} + +pub fn reached_entry_sp(fp: usize, first_wasm_sp: usize) -> bool { + // When the FP is just below the SP (because we are in a function prologue + // where the `call` pushed the return pointer, but the callee hasn't pushed + // the frame pointer yet) we are done. + fp == first_wasm_sp - 8 +} + +pub fn assert_entry_sp_is_aligned(sp: usize) { + // The stack pointer should always be aligned to 16 bytes *except* inside + // function prologues where the return PC is pushed to the stack but before + // the old frame pointer has been saved to the stack via `push rbp`. And + // this happens to be exactly where we are inside of our host-to-Wasm + // trampoline that records the value of SP when we first enter + // Wasm. Therefore, the SP should *always* be 8-byte aligned but *never* + // 16-byte aligned. + assert_eq!(sp % 8, 0); + assert_eq!(sp % 16, 8); +} + +pub fn assert_fp_is_aligned(fp: usize) { + assert_eq!(fp % 16, 0, "stack should always be aligned to 16"); +} diff --git a/crates/runtime/src/traphandlers/macos.rs b/crates/runtime/src/traphandlers/macos.rs index 6044eef73500..54f9d0269e9c 100644 --- a/crates/runtime/src/traphandlers/macos.rs +++ b/crates/runtime/src/traphandlers/macos.rs @@ -274,9 +274,12 @@ unsafe fn handle_exception(request: &mut ExceptionRequest) -> bool { let thread_state_flavor = x86_THREAD_STATE64; - let get_pc = |state: &ThreadState| state.__rip as *const u8; + let get_pc_and_fp = |state: &ThreadState| ( + state.__rip as *const u8, + state.__rbp as usize, + ); - let resume = |state: &mut ThreadState, pc: usize| { + let resume = |state: &mut ThreadState, pc: usize, fp: usize| { // The x86_64 ABI requires a 16-byte stack alignment for // functions, so typically we'll be 16-byte aligned. In this // case we simulate a `call` instruction by decrementing the @@ -302,6 +305,7 @@ unsafe fn handle_exception(request: &mut ExceptionRequest) -> bool { } state.__rip = unwind as u64; state.__rdi = pc as u64; + state.__rsi = fp as u64; }; let mut thread_state = ThreadState::new(); } else if #[cfg(target_arch = "aarch64")] { @@ -309,9 +313,12 @@ unsafe fn handle_exception(request: &mut ExceptionRequest) -> bool { let thread_state_flavor = ARM_THREAD_STATE64; - let get_pc = |state: &ThreadState| state.__pc as *const u8; + let get_pc_and_fp = |state: &ThreadState| ( + state.__pc as *const u8, + state.__fp as usize, + ); - let resume = |state: &mut ThreadState, pc: usize| { + let resume = |state: &mut ThreadState, pc: usize, fp: usize| { // Clobber LR with the faulting PC, so unwinding resumes at the // faulting instruction. The previous value of LR has been saved // by the callee (in Cranelift generated code), so no need to @@ -321,6 +328,7 @@ unsafe fn handle_exception(request: &mut ExceptionRequest) -> bool { // Fill in the argument to unwind here, and set PC to it, so // it looks like a call to unwind. state.__x[0] = pc as u64; + state.__x[1] = fp as u64; state.__pc = unwind as u64; }; let mut thread_state = mem::zeroed::(); @@ -356,7 +364,7 @@ unsafe fn handle_exception(request: &mut ExceptionRequest) -> bool { // Finally our indirection with a pointer means that we can read the // pointer value and if `MAP` changes happen after we read our entry that's // ok since they won't invalidate our entry. - let pc = get_pc(&thread_state); + let (pc, fp) = get_pc_and_fp(&thread_state); if !super::IS_WASM_PC(pc as usize) { return false; } @@ -365,7 +373,7 @@ unsafe fn handle_exception(request: &mut ExceptionRequest) -> bool { // force the thread itself to trap. The thread's register state is // configured to resume in the `unwind` function below, we update the // thread's register state, and then we're off to the races. - resume(&mut thread_state, pc as usize); + resume(&mut thread_state, pc as usize, fp); let kret = thread_set_state( origin_thread, thread_state_flavor, @@ -382,10 +390,10 @@ unsafe fn handle_exception(request: &mut ExceptionRequest) -> bool { /// a native backtrace once we've switched back to the thread itself. After /// the backtrace is captured we can do the usual `longjmp` back to the source /// of the wasm code. -unsafe extern "C" fn unwind(wasm_pc: *const u8) -> ! { +unsafe extern "C" fn unwind(wasm_pc: *const u8, wasm_fp: usize) -> ! { let jmp_buf = tls::with(|state| { let state = state.unwrap(); - state.capture_backtrace(wasm_pc); + state.set_jit_trap(wasm_pc, wasm_fp); state.jmp_buf.get() }); debug_assert!(!jmp_buf.is_null()); diff --git a/crates/runtime/src/traphandlers/unix.rs b/crates/runtime/src/traphandlers/unix.rs index 62262f173f0f..be3b13d14a99 100644 --- a/crates/runtime/src/traphandlers/unix.rs +++ b/crates/runtime/src/traphandlers/unix.rs @@ -86,7 +86,7 @@ unsafe extern "C" fn trap_handler( // Otherwise flag ourselves as handling a trap, do the trap // handling, and reset our trap handling flag. Then we figure // out what to do based on the result of the trap handling. - let pc = get_pc(context, signum); + let (pc, fp) = get_pc_and_fp(context, signum); let jmp_buf = info.jmp_buf_if_trap(pc, |handler| handler(signum, siginfo, context)); // Figure out what to do based on the result of this handling of @@ -99,7 +99,7 @@ unsafe extern "C" fn trap_handler( if jmp_buf as usize == 1 { return true; } - info.capture_backtrace(pc); + info.set_jit_trap(pc, fp); // On macOS this is a bit special, unfortunately. If we were to // `siglongjmp` out of the signal handler that notably does // *not* reset the sigaltstack state of our signal handler. This @@ -164,17 +164,26 @@ unsafe extern "C" fn trap_handler( } } -unsafe fn get_pc(cx: *mut libc::c_void, _signum: libc::c_int) -> *const u8 { +unsafe fn get_pc_and_fp(cx: *mut libc::c_void, _signum: libc::c_int) -> (*const u8, usize) { cfg_if::cfg_if! { if #[cfg(all(target_os = "linux", target_arch = "x86_64"))] { let cx = &*(cx as *const libc::ucontext_t); - cx.uc_mcontext.gregs[libc::REG_RIP as usize] as *const u8 + ( + cx.uc_mcontext.gregs[libc::REG_RIP as usize] as *const u8, + cx.uc_mcontext.gregs[libc::REG_RBP as usize] as usize + ) } else if #[cfg(all(target_os = "linux", target_arch = "x86"))] { let cx = &*(cx as *const libc::ucontext_t); - cx.uc_mcontext.gregs[libc::REG_EIP as usize] as *const u8 + ( + cx.uc_mcontext.gregs[libc::REG_EIP as usize] as *const u8, + cx.uc_mcontext.gregs[libc::REG_EBP as usize] as usize, + ) } else if #[cfg(all(any(target_os = "linux", target_os = "android"), target_arch = "aarch64"))] { let cx = &*(cx as *const libc::ucontext_t); - cx.uc_mcontext.pc as *const u8 + ( + cx.uc_mcontext.pc as *const u8, + cx.uc_mcontext.regs[29] as usize, + ) } else if #[cfg(all(target_os = "linux", target_arch = "s390x"))] { // On s390x, SIGILL and SIGFPE are delivered with the PSW address // pointing *after* the faulting instruction, while SIGSEGV and @@ -191,19 +200,34 @@ unsafe fn get_pc(cx: *mut libc::c_void, _signum: libc::c_int) -> *const u8 { _ => 0, }; let cx = &*(cx as *const libc::ucontext_t); - (cx.uc_mcontext.psw.addr - trap_offset) as *const u8 + ( + (cx.uc_mcontext.psw.addr - trap_offset) as *const u8, + *(cx.uc_mcontext.gregs[15] as *const usize), + ) } else if #[cfg(all(target_os = "macos", target_arch = "x86_64"))] { let cx = &*(cx as *const libc::ucontext_t); - (*cx.uc_mcontext).__ss.__rip as *const u8 + ( + (*cx.uc_mcontext).__ss.__rip as *const u8, + (*cx.uc_mcontext).__ss.__rbp as usize, + ) } else if #[cfg(all(target_os = "macos", target_arch = "x86"))] { let cx = &*(cx as *const libc::ucontext_t); - (*cx.uc_mcontext).__ss.__eip as *const u8 + ( + (*cx.uc_mcontext).__ss.__eip as *const u8, + (*cx.uc_mcontext).__ss.__ebp as usize, + ) } else if #[cfg(all(target_os = "macos", target_arch = "aarch64"))] { let cx = &*(cx as *const libc::ucontext_t); - (*cx.uc_mcontext).__ss.__pc as *const u8 + ( + (*cx.uc_mcontext).__ss.__pc as *const u8, + (*cx.uc_mcontext).__ss.__fp as usize, + ) } else if #[cfg(all(target_os = "freebsd", target_arch = "x86_64"))] { let cx = &*(cx as *const libc::ucontext_t); - cx.uc_mcontext.mc_rip as *const u8 + ( + cx.uc_mcontext.mc_rip as *const u8, + cx.uc_mcontext.mc_rbp as usize, + ) } else { compile_error!("unsupported platform"); } diff --git a/crates/runtime/src/traphandlers/windows.rs b/crates/runtime/src/traphandlers/windows.rs index 09d8c774c259..b89910ed4119 100644 --- a/crates/runtime/src/traphandlers/windows.rs +++ b/crates/runtime/src/traphandlers/windows.rs @@ -54,8 +54,10 @@ unsafe extern "system" fn exception_handler(exception_info: *mut EXCEPTION_POINT cfg_if::cfg_if! { if #[cfg(target_arch = "x86_64")] { let ip = (*(*exception_info).ContextRecord).Rip as *const u8; + let fp = (*(*exception_info).ContextRecord).Rbp as usize; } else if #[cfg(target_arch = "x86")] { let ip = (*(*exception_info).ContextRecord).Eip as *const u8; + let fp = (*(*exception_info).ContextRecord).Ebp as usize; } else { compile_error!("unsupported platform"); } @@ -66,7 +68,7 @@ unsafe extern "system" fn exception_handler(exception_info: *mut EXCEPTION_POINT } else if jmp_buf as usize == 1 { ExceptionContinueExecution } else { - info.capture_backtrace(ip); + info.set_jit_trap(ip, fp); wasmtime_longjmp(jmp_buf) } }) diff --git a/crates/runtime/src/vmcontext.rs b/crates/runtime/src/vmcontext.rs index 7b5eb38be7b0..5d1125b1dedd 100644 --- a/crates/runtime/src/vmcontext.rs +++ b/crates/runtime/src/vmcontext.rs @@ -1,6 +1,8 @@ //! This file declares `VMContext` and several related structs which contain //! fields that compiled wasm code accesses directly. +mod vm_host_func_context; + use crate::externref::VMExternRef; use crate::instance::Instance; use std::any::Any; @@ -9,6 +11,7 @@ use std::marker; use std::ptr::NonNull; use std::sync::atomic::{AtomicUsize, Ordering}; use std::u32; +pub use vm_host_func_context::VMHostFuncContext; use wasmtime_environ::DefinedMemoryIndex; pub const VMCONTEXT_MAGIC: u32 = u32::from_le_bytes(*b"core"); @@ -631,26 +634,23 @@ macro_rules! define_builtin_array { ( $( $( #[$attr:meta] )* - $name:ident( $( $param:ident ),* ) -> ( $( $result:ident ),* ); + $name:ident( $( $pname:ident: $param:ident ),* ) $( -> $result:ident )?; )* ) => { /// An array that stores addresses of builtin functions. We translate code /// to use indirect calls. This way, we don't have to patch the code. #[repr(C)] - #[allow(unused_parens)] pub struct VMBuiltinFunctionsArray { $( $name: unsafe extern "C" fn( $(define_builtin_array!(@ty $param)),* - ) -> ( - $(define_builtin_array!(@ty $result)),* - ), + ) $( -> define_builtin_array!(@ty $result))?, )* } impl VMBuiltinFunctionsArray { pub const INIT: VMBuiltinFunctionsArray = VMBuiltinFunctionsArray { - $($name: crate::libcalls::$name,)* + $($name: crate::libcalls::trampolines::$name,)* }; } }; @@ -722,6 +722,48 @@ pub struct VMRuntimeLimits { /// observed to reach or exceed this value, the guest code will /// yield if running asynchronously. pub epoch_deadline: UnsafeCell, + + /// The value of the frame pointer register when we last called from Wasm to + /// the host. + /// + /// Maintained by our Wasm-to-host trampoline, and cleared just before + /// calling into Wasm in `catch_traps`. + /// + /// This member is `0` when Wasm is actively running and has not called out + /// to the host. + /// + /// Used to find the start of a a contiguous sequence of Wasm frames when + /// walking the stack. + pub last_wasm_exit_fp: UnsafeCell, + + /// The last Wasm program counter before we called from Wasm to the host. + /// + /// Maintained by our Wasm-to-host trampoline, and cleared just before + /// calling into Wasm in `catch_traps`. + /// + /// This member is `0` when Wasm is actively running and has not called out + /// to the host. + /// + /// Used when walking a contiguous sequence of Wasm frames. + pub last_wasm_exit_pc: UnsafeCell, + + /// The last host stack pointer before we called into Wasm from the host. + /// + /// Maintained by our host-to-Wasm trampoline, and cleared just before + /// calling into Wasm in `catch_traps`. + /// + /// This member is `0` when Wasm is actively running and has not called out + /// to the host. + /// + /// When a host function is wrapped into a `wasmtime::Func`, and is then + /// called from the host, then this member has the sentinal value of `-1 as + /// usize`, meaning that this contiguous sequence of Wasm frames is the + /// empty sequence, and it is not safe to dereference the + /// `last_wasm_exit_fp`. + /// + /// Used to find the end of a contiguous sequence of Wasm frames when + /// walking the stack. + pub last_wasm_entry_sp: UnsafeCell, } // The `VMRuntimeLimits` type is a pod-type with no destructor, and we don't @@ -737,6 +779,9 @@ impl Default for VMRuntimeLimits { stack_limit: UnsafeCell::new(usize::max_value()), fuel_consumed: UnsafeCell::new(0), epoch_deadline: UnsafeCell::new(0), + last_wasm_exit_fp: UnsafeCell::new(0), + last_wasm_exit_pc: UnsafeCell::new(0), + last_wasm_entry_sp: UnsafeCell::new(0), } } } @@ -764,6 +809,18 @@ mod test_vmruntime_limits { offset_of!(VMRuntimeLimits, epoch_deadline), usize::from(offsets.vmruntime_limits_epoch_deadline()) ); + assert_eq!( + offset_of!(VMRuntimeLimits, last_wasm_exit_fp), + usize::from(offsets.vmruntime_limits_last_wasm_exit_fp()) + ); + assert_eq!( + offset_of!(VMRuntimeLimits, last_wasm_exit_pc), + usize::from(offsets.vmruntime_limits_last_wasm_exit_pc()) + ); + assert_eq!( + offset_of!(VMRuntimeLimits, last_wasm_entry_sp), + usize::from(offsets.vmruntime_limits_last_wasm_entry_sp()) + ); } } @@ -1094,9 +1151,15 @@ pub struct VMOpaqueContext { } impl VMOpaqueContext { - /// Helper function to clearly indicate that cast desired + /// Helper function to clearly indicate that casts are desired. #[inline] pub fn from_vmcontext(ptr: *mut VMContext) -> *mut VMOpaqueContext { ptr.cast() } + + /// Helper function to clearly indicate that casts are desired. + #[inline] + pub fn from_vm_host_func_context(ptr: *mut VMHostFuncContext) -> *mut VMOpaqueContext { + ptr.cast() + } } diff --git a/crates/runtime/src/vmcontext/vm_host_func_context.rs b/crates/runtime/src/vmcontext/vm_host_func_context.rs new file mode 100644 index 000000000000..a6678b3e48e2 --- /dev/null +++ b/crates/runtime/src/vmcontext/vm_host_func_context.rs @@ -0,0 +1,80 @@ +//! Definition of `VM*Context` variant for host functions. +//! +//! Keep in sync with `wasmtime_environ::VMHostFuncOffsets`. + +use wasmtime_environ::VM_HOST_FUNC_MAGIC; + +use super::{VMCallerCheckedAnyfunc, VMFunctionBody, VMOpaqueContext, VMSharedSignatureIndex}; +use std::{ + any::Any, + ptr::{self, NonNull}, +}; + +/// The `VM*Context` for host functions. +/// +/// Its `magic` field must always be `wasmtime_environ::VM_HOST_FUNC_MAGIC`, and +/// this is how you can determine whether a `VM*Context` is a +/// `VMHostFuncContext` versus a different kind of context. +#[repr(C)] +pub struct VMHostFuncContext { + magic: u32, + // _padding: u32, // (on 64-bit systems) + pub(crate) host_func: NonNull, + wasm_to_host_trampoline: VMCallerCheckedAnyfunc, + host_state: Box, +} + +// Declare that this type is send/sync, it's the responsibility of +// `VMHostFuncContext::new` callers to uphold this guarantee. +unsafe impl Send for VMHostFuncContext {} +unsafe impl Sync for VMHostFuncContext {} + +impl VMHostFuncContext { + /// Create the context for the given host function. + /// + /// # Safety + /// + /// The `host_func` must be a pointer to a host (not Wasm) function and it + /// must be `Send` and `Sync`. + pub unsafe fn new( + host_func: NonNull, + signature: VMSharedSignatureIndex, + host_state: Box, + ) -> Box { + let wasm_to_host_trampoline = VMCallerCheckedAnyfunc { + func_ptr: NonNull::new(crate::trampolines::wasm_to_host_trampoline as _).unwrap(), + type_index: signature, + vmctx: ptr::null_mut(), + }; + let mut ctx = Box::new(VMHostFuncContext { + magic: wasmtime_environ::VM_HOST_FUNC_MAGIC, + host_func, + wasm_to_host_trampoline, + host_state, + }); + ctx.wasm_to_host_trampoline.vmctx = + VMOpaqueContext::from_vm_host_func_context(&*ctx as *const _ as *mut _); + ctx + } + + /// Get the Wasm-to-host trampoline for this host function context. + pub fn wasm_to_host_trampoline(&self) -> NonNull { + NonNull::from(&self.wasm_to_host_trampoline) + } + + /// Get the host state for this host function context. + pub fn host_state(&self) -> &(dyn Any + Send + Sync) { + &*self.host_state + } +} + +impl VMHostFuncContext { + /// Helper function to cast between context types using a debug assertion to + /// protect against some mistakes. + #[inline] + pub unsafe fn from_opaque(opaque: *mut VMOpaqueContext) -> *mut VMHostFuncContext { + // See comments in `VMContext::from_opaque` for this debug assert + debug_assert_eq!((*opaque).magic, VM_HOST_FUNC_MAGIC); + opaque.cast() + } +} diff --git a/crates/wasmtime/Cargo.toml b/crates/wasmtime/Cargo.toml index 76af8c109400..52ea1d8d6910 100644 --- a/crates/wasmtime/Cargo.toml +++ b/crates/wasmtime/Cargo.toml @@ -26,7 +26,6 @@ wasmparser = "0.88.0" anyhow = "1.0.19" libc = "0.2" cfg-if = "1.0" -backtrace = { version = "0.3.61" } log = "0.4.8" wat = { version = "1.0.47", optional = true } serde = { version = "1.0.94", features = ["derive"] } diff --git a/crates/wasmtime/src/config.rs b/crates/wasmtime/src/config.rs index b12d9cef6f52..2b3cf1202c07 100644 --- a/crates/wasmtime/src/config.rs +++ b/crates/wasmtime/src/config.rs @@ -1415,6 +1415,13 @@ impl Config { compiler.target(target.clone())?; } + // We require frame pointers for correct stack walking, which is safety + // critical in the presence of reference types, and otherwise it is just + // really bad developer experience to get wrong. + self.compiler_config + .settings + .insert("preserve_frame_pointers".into(), "true".into()); + // check for incompatible compiler options and set required values if self.wasm_backtrace || self.features.reference_types { if !self diff --git a/crates/wasmtime/src/engine.rs b/crates/wasmtime/src/engine.rs index 2b726c8ad0d8..847d474998ff 100644 --- a/crates/wasmtime/src/engine.rs +++ b/crates/wasmtime/src/engine.rs @@ -346,6 +346,7 @@ impl Engine { // runtime. "avoid_div_traps" => *value == FlagValue::Bool(true), "libcall_call_conv" => *value == FlagValue::Enum("isa_default".into()), + "preserve_frame_pointers" => *value == FlagValue::Bool(true), // Features wasmtime doesn't use should all be disabled, since // otherwise if they are enabled it could change the behavior of @@ -394,7 +395,6 @@ impl Engine { | "machine_code_cfg_info" | "tls_model" // wasmtime doesn't use tls right now | "opt_level" // opt level doesn't change semantics - | "preserve_frame_pointers" // we don't currently rely on frame pointers | "enable_alias_analysis" // alias analysis-based opts don't change semantics | "probestack_func_adjusts_sp" // probestack above asserted disabled | "probestack_size_log2" // probestack above asserted disabled diff --git a/crates/wasmtime/src/func.rs b/crates/wasmtime/src/func.rs index 56ccaa939418..5fd85a6f086d 100644 --- a/crates/wasmtime/src/func.rs +++ b/crates/wasmtime/src/func.rs @@ -10,11 +10,10 @@ use std::panic::{self, AssertUnwindSafe}; use std::pin::Pin; use std::ptr::NonNull; use std::sync::Arc; -use wasmtime_environ::FuncIndex; use wasmtime_runtime::{ - raise_user_trap, ExportFunction, InstanceAllocator, InstanceHandle, OnDemandInstanceAllocator, - VMCallerCheckedAnyfunc, VMContext, VMFunctionBody, VMFunctionImport, VMOpaqueContext, - VMSharedSignatureIndex, VMTrampoline, + raise_user_trap, ExportFunction, InstanceHandle, VMCallerCheckedAnyfunc, VMContext, + VMFunctionBody, VMFunctionImport, VMHostFuncContext, VMOpaqueContext, VMSharedSignatureIndex, + VMTrampoline, }; /// A WebAssembly function which can be called. @@ -824,10 +823,11 @@ impl Func { trampoline: VMTrampoline, params_and_returns: *mut ValRaw, ) -> Result<(), Trap> { - invoke_wasm_and_catch_traps(store, |callee| { + invoke_wasm_and_catch_traps(store, |caller| { + let trampoline = wasmtime_runtime::prepare_host_to_wasm_trampoline(caller, trampoline); trampoline( anyfunc.as_ref().vmctx, - callee, + caller, anyfunc.as_ref().func_ptr.as_ptr(), params_and_returns, ) @@ -1220,7 +1220,7 @@ impl Func { /// raw trampoline or a raw WebAssembly function. This *must* be called to do /// things like catch traps and set up GC properly. /// -/// The `closure` provided receives a default "callee" `VMContext` parameter it +/// The `closure` provided receives a default "caller" `VMContext` parameter it /// can pass to the called wasm function, if desired. pub(crate) fn invoke_wasm_and_catch_traps( store: &mut StoreContextMut<'_, T>, @@ -1236,7 +1236,7 @@ pub(crate) fn invoke_wasm_and_catch_traps( let result = wasmtime_runtime::catch_traps( store.0.signal_handler(), store.0.engine().config().wasm_backtrace, - store.0.default_callee(), + store.0.default_caller(), closure, ); exit_wasm(store, exit); @@ -1254,15 +1254,10 @@ pub(crate) fn invoke_wasm_and_catch_traps( /// allocated by WebAssembly code and it's relative to the initial stack /// pointer that called into wasm. /// -/// * Stack canaries for externref gc tracing. Currently the implementation -/// relies on walking frames but the stack walker isn't always 100% reliable, -/// so a canary is used to ensure that if the canary is seen then it's -/// guaranteed all wasm frames have been walked. -/// /// This function may fail if the the stack limit can't be set because an /// interrupt already happened. fn enter_wasm(store: &mut StoreContextMut<'_, T>) -> Option { - // If this is a recursive call, e.g. our stack canary is already set, then + // If this is a recursive call, e.g. our stack limit is already set, then // we may be able to skip this function. // // For synchronous stores there's nothing else to do because all wasm calls @@ -1271,14 +1266,8 @@ fn enter_wasm(store: &mut StoreContextMut<'_, T>) -> Option { // // For asynchronous stores then each call happens on a separate native // stack. This means that the previous stack limit is no longer relevant - // because we're on a separate stack. In this situation we need to - // update the stack limit, but we don't need to update the gc stack canary - // in this situation. - if store - .0 - .externref_activations_table() - .stack_canary() - .is_some() + // because we're on a separate stack. + if unsafe { *store.0.runtime_limits().stack_limit.get() } != usize::MAX && !store.0.async_support() { return None; @@ -1308,21 +1297,6 @@ fn enter_wasm(store: &mut StoreContextMut<'_, T>) -> Option { ) }; - // The `usize::max_value()` sentinel is present on recursive calls to - // asynchronous stores here. In that situation we don't want to keep - // updating the stack canary, so only execute this once at the top. - if prev_stack == usize::max_value() { - debug_assert!(store - .0 - .externref_activations_table() - .stack_canary() - .is_none()); - store - .0 - .externref_activations_table() - .set_stack_canary(Some(stack_pointer)); - } - Some(prev_stack) } @@ -1334,13 +1308,6 @@ fn exit_wasm(store: &mut StoreContextMut<'_, T>, prev_stack: Option) { None => return, }; - // Only if we're restoring a top-level value do we clear the stack canary - // value. Otherwise our purpose here might be restoring a recursive stack - // limit but leaving the active canary in place. - if prev_stack == usize::max_value() { - store.0.externref_activations_table().set_stack_canary(None); - } - unsafe { *store.0.runtime_limits().stack_limit.get() = prev_stack; } @@ -1642,7 +1609,10 @@ for_each_function_signature!(impl_host_abi); /// as an implementation detail of this crate. pub trait IntoFunc: Send + Sync + 'static { #[doc(hidden)] - fn into_func(self, engine: &Engine) -> (InstanceHandle, VMTrampoline); + fn into_func( + self, + engine: &Engine, + ) -> (Box, VMSharedSignatureIndex, VMTrampoline); } /// A structure representing the caller's context when creating a function @@ -1690,10 +1660,6 @@ impl Caller<'_, T> { /// Looks up an export from the caller's module by the `name` given. /// - /// Note that this function is only implemented for the `Extern::Memory` - /// and the `Extern::Func` types currently. No other exported structures - /// can be acquired through this method. - /// /// Note that when accessing and calling exported functions, one should /// adhere to the guidelines of the interface types proposal. This method /// is a temporary mechanism for accessing the caller's information until @@ -1722,18 +1688,10 @@ impl Caller<'_, T> { // back to themselves. If this caller doesn't have that `host_state` // then it probably means it was a host-created object like `Func::new` // which doesn't have any exports we want to return anyway. - match self - .caller + self.caller .host_state() .downcast_ref::()? - .get_export(&mut self.store, name)? - { - Extern::Func(f) => Some(Extern::Func(f)), - Extern::Memory(f) => Some(Extern::Memory(f)), - // Intentionally ignore other Extern items here since this API is - // supposed to be a temporary stop-gap until interface types. - _ => None, - } + .get_export(&mut self.store, name) } /// Access the underlying data owned by this `Store`. @@ -1827,7 +1785,7 @@ macro_rules! impl_into_func { $($args: WasmTy,)* R: WasmRet, { - fn into_func(self, engine: &Engine) -> (InstanceHandle, VMTrampoline) { + fn into_func(self, engine: &Engine) -> (Box, VMSharedSignatureIndex, VMTrampoline) { let f = move |_: Caller<'_, T>, $($args:$args),*| { self($($args),*) }; @@ -1843,7 +1801,7 @@ macro_rules! impl_into_func { $($args: WasmTy,)* R: WasmRet, { - fn into_func(self, engine: &Engine) -> (InstanceHandle, VMTrampoline) { + fn into_func(self, engine: &Engine) -> (Box, VMSharedSignatureIndex, VMTrampoline) { /// This shim is called by Wasm code, constructs a `Caller`, /// calls the wrapped host function, and returns the translated /// result back to Wasm. @@ -1875,8 +1833,9 @@ macro_rules! impl_into_func { // should be part of this block, and the long-jmp-ing // happens after the block in handling `CallResult`. let result = Caller::with(caller_vmctx, |mut caller| { - let vmctx = VMContext::from_opaque(vmctx); + let vmctx = VMHostFuncContext::from_opaque(vmctx); let state = (*vmctx).host_state(); + // Double-check ourselves in debug mode, but we control // the `Any` here so an unsafe downcast should also // work. @@ -1979,20 +1938,15 @@ macro_rules! impl_into_func { let trampoline = host_to_wasm_trampoline::<$($args,)* R>; - - let instance = unsafe { - crate::trampoline::create_raw_function( - std::slice::from_raw_parts_mut( - wasm_to_host_shim:: as *mut _, - 0, - ), + let ctx = unsafe { + VMHostFuncContext::new( + NonNull::new(wasm_to_host_shim:: as *mut _).unwrap(), shared_signature_id, Box::new(self), ) - .expect("failed to create raw function") }; - (instance, trampoline) + (ctx, shared_signature_id, trampoline) } } } @@ -2011,15 +1965,17 @@ for_each_function_signature!(impl_into_func); /// `Store` itself, but that's an unsafe contract of using this for now /// rather than part of the struct type (to avoid `Func` in the API). pub(crate) struct HostFunc { - // Owned `*mut VMContext` allocation. Deallocated when this `HostFunc` is - // dropped. - instance: InstanceHandle, + // The host function context that is shared with our host-to-Wasm + // trampoline. + ctx: Box, + + // The index for this function's signature within the engine-wide shared + // signature registry. + signature: VMSharedSignatureIndex, + // Trampoline to enter this function from Rust. - trampoline: VMTrampoline, - // The loaded `ExportFunction` from the above `InstanceHandle` which has raw - // pointers and information about how to actually call this function (e.g. - // the actual address in JIT code and the vm shared function index). - export: ExportFunction, + host_to_wasm_trampoline: VMTrampoline, + // Stored to unregister this function's signature with the engine when this // is dropped. engine: Engine, @@ -2056,9 +2012,9 @@ impl HostFunc { Ok(result) }) }; - let (instance, trampoline) = crate::trampoline::create_function(&ty, func, engine) + let (ctx, signature, trampoline) = crate::trampoline::create_function(&ty, func, engine) .expect("failed to create function"); - HostFunc::_new(engine, instance, trampoline) + HostFunc::_new(engine, ctx, signature, trampoline) } /// Analog of [`Func::wrap`] @@ -2066,18 +2022,22 @@ impl HostFunc { engine: &Engine, func: impl IntoFunc, ) -> Self { - let (instance, trampoline) = func.into_func(engine); - HostFunc::_new(engine, instance, trampoline) + let (ctx, signature, trampoline) = func.into_func(engine); + HostFunc::_new(engine, ctx, signature, trampoline) } /// Requires that this function's signature is already registered within /// `Engine`. This happens automatically during the above two constructors. - fn _new(engine: &Engine, mut instance: InstanceHandle, trampoline: VMTrampoline) -> Self { - let export = instance.get_exported_func(FuncIndex::from_u32(0)); + fn _new( + engine: &Engine, + ctx: Box, + signature: VMSharedSignatureIndex, + trampoline: VMTrampoline, + ) -> Self { HostFunc { - instance, - trampoline, - export, + ctx, + signature, + host_to_wasm_trampoline: trampoline, engine: engine.clone(), } } @@ -2136,20 +2096,20 @@ impl HostFunc { } pub(crate) fn sig_index(&self) -> VMSharedSignatureIndex { - unsafe { self.export.anyfunc.as_ref().type_index } + self.signature + } + + fn export_func(&self) -> ExportFunction { + ExportFunction { + anyfunc: self.ctx.wasm_to_host_trampoline(), + } } } impl Drop for HostFunc { fn drop(&mut self) { unsafe { - self.engine - .signatures() - .unregister(self.export.anyfunc.as_ref().type_index); - - // Host functions are always allocated with the default (on-demand) - // allocator - OnDemandInstanceAllocator::default().deallocate(&self.instance); + self.engine.signatures().unregister(self.signature); } } } @@ -2159,14 +2119,14 @@ impl FuncData { pub(crate) fn trampoline(&self) -> VMTrampoline { match &self.kind { FuncKind::StoreOwned { trampoline, .. } => *trampoline, - FuncKind::SharedHost(host) => host.trampoline, - FuncKind::RootedHost(host) => host.trampoline, - FuncKind::Host(host) => host.trampoline, + FuncKind::SharedHost(host) => host.host_to_wasm_trampoline, + FuncKind::RootedHost(host) => host.host_to_wasm_trampoline, + FuncKind::Host(host) => host.host_to_wasm_trampoline, } } #[inline] - fn export(&self) -> &ExportFunction { + fn export(&self) -> ExportFunction { self.kind.export() } @@ -2177,12 +2137,12 @@ impl FuncData { impl FuncKind { #[inline] - fn export(&self) -> &ExportFunction { + fn export(&self) -> ExportFunction { match self { - FuncKind::StoreOwned { export, .. } => export, - FuncKind::SharedHost(host) => &host.export, - FuncKind::RootedHost(host) => &host.export, - FuncKind::Host(host) => &host.export, + FuncKind::StoreOwned { export, .. } => *export, + FuncKind::SharedHost(host) => host.export_func(), + FuncKind::RootedHost(host) => host.export_func(), + FuncKind::Host(host) => host.export_func(), } } } diff --git a/crates/wasmtime/src/func/typed.rs b/crates/wasmtime/src/func/typed.rs index 1c3d795b6890..83565829e0f7 100644 --- a/crates/wasmtime/src/func/typed.rs +++ b/crates/wasmtime/src/func/typed.rs @@ -164,13 +164,13 @@ where // the memory go away, so the size matters here for performance. let mut captures = (func, MaybeUninit::uninit(), params, false); - let result = invoke_wasm_and_catch_traps(store, |callee| { + let result = invoke_wasm_and_catch_traps(store, |caller| { let (anyfunc, ret, params, returned) = &mut captures; let anyfunc = anyfunc.as_ref(); let result = Params::invoke::( anyfunc.func_ptr.as_ptr(), anyfunc.vmctx, - callee, + caller, *params, ); ptr::write(ret.as_mut_ptr(), result); @@ -568,7 +568,7 @@ macro_rules! impl_wasm_params { $($t::Abi,)* ::Retptr, ) -> ::Abi, - >(func); + >(func); let ($($t,)*) = abi; // Use the `call` function to acquire a `retptr` which we'll // forward to the native function. Once we have it we also @@ -578,6 +578,7 @@ macro_rules! impl_wasm_params { // Upon returning `R::call` will convert all the returns back // into `R`. ::call(|retptr| { + let fnptr = wasmtime_runtime::prepare_host_to_wasm_trampoline(vmctx2, fnptr); fnptr(vmctx1, vmctx2, $($t,)* retptr) }) } diff --git a/crates/wasmtime/src/instance.rs b/crates/wasmtime/src/instance.rs index 9cf1365a7357..898a963fcfb3 100644 --- a/crates/wasmtime/src/instance.rs +++ b/crates/wasmtime/src/instance.rs @@ -342,13 +342,14 @@ impl Instance { let f = instance.get_exported_func(start); let vmctx = instance.vmctx_ptr(); unsafe { - super::func::invoke_wasm_and_catch_traps(store, |_default_callee| { - mem::transmute::< + super::func::invoke_wasm_and_catch_traps(store, |_default_caller| { + let trampoline = mem::transmute::< *const VMFunctionBody, unsafe extern "C" fn(*mut VMOpaqueContext, *mut VMContext), - >(f.anyfunc.as_ref().func_ptr.as_ptr())( - f.anyfunc.as_ref().vmctx, vmctx - ) + >(f.anyfunc.as_ref().func_ptr.as_ptr()); + let trampoline = + wasmtime_runtime::prepare_host_to_wasm_trampoline(vmctx, trampoline); + trampoline(f.anyfunc.as_ref().vmctx, vmctx) })?; } Ok(()) diff --git a/crates/wasmtime/src/module.rs b/crates/wasmtime/src/module.rs index 1f96daaacaf1..43a8280b6cea 100644 --- a/crates/wasmtime/src/module.rs +++ b/crates/wasmtime/src/module.rs @@ -1026,23 +1026,6 @@ impl BareModuleInfo { } } - pub(crate) fn one_func( - module: Arc, - image_base: usize, - info: FunctionInfo, - signature_id: SignatureIndex, - signature: VMSharedSignatureIndex, - ) -> Self { - let mut function_info = PrimaryMap::with_capacity(1); - function_info.push(info); - BareModuleInfo { - module, - image_base, - function_info, - one_signature: Some((signature_id, signature)), - } - } - pub(crate) fn into_traitobj(self) -> Arc { Arc::new(self) } diff --git a/crates/wasmtime/src/store.rs b/crates/wasmtime/src/store.rs index 28aecdf9d259..05bf5d54ef23 100644 --- a/crates/wasmtime/src/store.rs +++ b/crates/wasmtime/src/store.rs @@ -304,7 +304,7 @@ pub struct StoreOpaque { /// `rooted_host_funcs` below. This structure contains pointers which are /// otherwise kept alive by the `Arc` references in `rooted_host_funcs`. store_data: ManuallyDrop, - default_callee: InstanceHandle, + default_caller: InstanceHandle, /// Used to optimzed wasm->host calls when the host function is defined with /// `Func::new` to avoid allocating a new vector each time a function is @@ -493,7 +493,7 @@ impl Store { }, out_of_gas_behavior: OutOfGas::Trap, store_data: ManuallyDrop::new(StoreData::new()), - default_callee, + default_caller: default_callee, hostcall_val_storage: Vec::new(), wasm_val_raw_storage: Vec::new(), rooted_host_funcs: ManuallyDrop::new(Vec::new()), @@ -514,7 +514,7 @@ impl Store { *mut (dyn wasmtime_runtime::Store + '_), *mut (dyn wasmtime_runtime::Store + 'static), >(&mut *inner); - inner.default_callee.set_store(traitobj); + inner.default_caller.set_store(traitobj); } Self { @@ -1458,12 +1458,12 @@ impl StoreOpaque { } #[inline] - pub fn default_callee(&self) -> *mut VMContext { - self.default_callee.vmctx_ptr() + pub fn default_caller(&self) -> *mut VMContext { + self.default_caller.vmctx_ptr() } pub fn traitobj(&self) -> *mut dyn wasmtime_runtime::Store { - self.default_callee.store() + self.default_caller.store() } /// Takes the cached `Vec` stored internally across hostcalls to get @@ -2047,7 +2047,7 @@ impl Drop for StoreOpaque { allocator.deallocate(&instance.handle); } } - ondemand.deallocate(&self.default_callee); + ondemand.deallocate(&self.default_caller); // See documentation for these fields on `StoreOpaque` for why they // must be dropped in this order. diff --git a/crates/wasmtime/src/trampoline/func.rs b/crates/wasmtime/src/trampoline/func.rs index fce77bd2b928..cc9cd570e3ee 100644 --- a/crates/wasmtime/src/trampoline/func.rs +++ b/crates/wasmtime/src/trampoline/func.rs @@ -1,19 +1,12 @@ //! Support for a calling of an imported function. -use crate::module::BareModuleInfo; use crate::{Engine, FuncType, Trap, ValRaw}; use anyhow::Result; -use std::any::Any; use std::panic::{self, AssertUnwindSafe}; -use std::sync::Arc; -use wasmtime_environ::{ - AnyfuncIndex, EntityIndex, FunctionInfo, Module, ModuleType, SignatureIndex, -}; +use std::ptr::NonNull; use wasmtime_jit::{CodeMemory, ProfilingAgent}; use wasmtime_runtime::{ - Imports, InstanceAllocationRequest, InstanceAllocator, InstanceHandle, - OnDemandInstanceAllocator, StorePtr, VMContext, VMFunctionBody, VMOpaqueContext, - VMSharedSignatureIndex, VMTrampoline, + VMContext, VMHostFuncContext, VMOpaqueContext, VMSharedSignatureIndex, VMTrampoline, }; struct TrampolineState { @@ -44,7 +37,7 @@ unsafe extern "C" fn stub_fn( // have any. To prevent leaks we avoid having any local destructors by // avoiding local variables. let result = panic::catch_unwind(AssertUnwindSafe(|| { - let vmctx = VMContext::from_opaque(vmctx); + let vmctx = VMHostFuncContext::from_opaque(vmctx); // Double-check ourselves in debug mode, but we control // the `Any` here so an unsafe downcast should also // work. @@ -110,7 +103,7 @@ pub fn create_function( ft: &FuncType, func: F, engine: &Engine, -) -> Result<(InstanceHandle, VMTrampoline)> +) -> Result<(Box, VMSharedSignatureIndex, VMTrampoline)> where F: Fn(*mut VMContext, &mut [ValRaw]) -> Result<(), Trap> + Send + Sync + 'static, { @@ -131,54 +124,20 @@ where // Extract the host/wasm trampolines from the results of compilation since // we know their start/length. + let host_trampoline = code.text[t1.start as usize..][..t1.length as usize].as_ptr(); - let wasm_trampoline = &code.text[t2.start as usize..][..t2.length as usize]; - let wasm_trampoline = wasm_trampoline as *const [u8] as *mut [VMFunctionBody]; + let wasm_trampoline = code.text[t2.start as usize..].as_ptr() as *mut _; + let wasm_trampoline = NonNull::new(wasm_trampoline).unwrap(); let sig = engine.signatures().register(ft.as_wasm_func_type()); unsafe { - let instance = create_raw_function( + let ctx = VMHostFuncContext::new( wasm_trampoline, sig, Box::new(TrampolineState { func, code_memory }), - )?; + ); let host_trampoline = std::mem::transmute::<*const u8, VMTrampoline>(host_trampoline); - Ok((instance, host_trampoline)) + Ok((ctx, sig, host_trampoline)) } } - -pub unsafe fn create_raw_function( - func: *mut [VMFunctionBody], - sig: VMSharedSignatureIndex, - host_state: Box, -) -> Result { - let mut module = Module::new(); - - let sig_id = SignatureIndex::from_u32(u32::max_value() - 1); - module.types.push(ModuleType::Function(sig_id)); - let func_id = module.push_escaped_function(sig_id, AnyfuncIndex::from_u32(0)); - module.num_escaped_funcs = 1; - module - .exports - .insert(String::new(), EntityIndex::Function(func_id)); - let module = Arc::new(module); - - let runtime_info = &BareModuleInfo::one_func( - module.clone(), - (*func).as_ptr() as usize, - FunctionInfo::default(), - sig_id, - sig, - ) - .into_traitobj(); - - Ok( - OnDemandInstanceAllocator::default().allocate(InstanceAllocationRequest { - imports: Imports::default(), - host_state, - store: StorePtr::empty(), - runtime_info, - })?, - ) -} diff --git a/crates/wasmtime/src/trap.rs b/crates/wasmtime/src/trap.rs index b07f2d7dea13..1d7e0095b20a 100644 --- a/crates/wasmtime/src/trap.rs +++ b/crates/wasmtime/src/trap.rs @@ -152,10 +152,7 @@ impl TrapBacktrace { store.engine().config().wasm_backtrace_details_env_used; for frame in native_trace.frames() { - let pc = frame.ip() as usize; - if pc == 0 { - continue; - } + debug_assert!(frame.pc() != 0); // Note that we need to be careful about the pc we pass in // here to lookup frame information. This program counter is // used to translate back to an original source location in @@ -166,7 +163,11 @@ impl TrapBacktrace { // likely a call instruction on the stack). In that case we // want to lookup information for the previous instruction // (the call instruction) so we subtract one as the lookup. - let pc_to_lookup = if Some(pc) == trap_pc { pc } else { pc - 1 }; + let pc_to_lookup = if Some(frame.pc()) == trap_pc { + frame.pc() + } else { + frame.pc() - 1 + }; if let Some((info, module)) = store.modules().lookup_frame_info(pc_to_lookup) { wasm_trace.push(info); diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index d0c7b98c051f..f0a1f2a856bc 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -79,6 +79,12 @@ path = "fuzz_targets/table_ops.rs" test = false doc = false +[[bin]] +name = "stacks" +path = "fuzz_targets/stacks.rs" +test = false +doc = false + [[bin]] name = "compile-maybe-invalid" path = "fuzz_targets/compile-maybe-invalid.rs" diff --git a/fuzz/fuzz_targets/stacks.rs b/fuzz/fuzz_targets/stacks.rs new file mode 100644 index 000000000000..08504c795f86 --- /dev/null +++ b/fuzz/fuzz_targets/stacks.rs @@ -0,0 +1,10 @@ +//! Check that we see the stack trace correctly. + +#![no_main] + +use libfuzzer_sys::fuzz_target; +use wasmtime_fuzzing::{generators::Stacks, oracles::check_stacks}; + +fuzz_target!(|stacks: Stacks| { + check_stacks(stacks); +}); diff --git a/tests/all/component_model/import.rs b/tests/all/component_model/import.rs index 26f21f3006f0..09cbabc89370 100644 --- a/tests/all/component_model/import.rs +++ b/tests/all/component_model/import.rs @@ -222,57 +222,61 @@ fn attempt_to_leave_during_malloc() -> Result<()> { let component = Component::new(&engine, component)?; let mut store = Store::new(&engine, ()); - // Assert that during a host import if we return values to wasm that a trap - // happens if we try to leave the instance. - let trap = linker - .instantiate(&mut store, &component)? - .get_typed_func::<(), (), _>(&mut store, "run")? - .call(&mut store, ()) - .unwrap_err() - .downcast::()?; - assert!( - trap.to_string().contains("cannot leave component instance"), - "bad trap: {}", - trap, - ); - let trace = trap.trace().unwrap(); - assert_eq!(trace.len(), 4); - - // This was our entry point... - assert_eq!(trace[3].module_name(), Some("m")); - assert_eq!(trace[3].func_name(), Some("run")); - - // ... which called an imported function which ends up being originally - // defined by the shim instance. The shim instance then does an indirect - // call through a table which goes to the `canon.lower`'d host function - assert_eq!(trace[2].module_name(), Some("host_shim")); - assert_eq!(trace[2].func_name(), Some("shim_ret_string")); - - // ... and the lowered host function will call realloc to allocate space for - // the result - assert_eq!(trace[1].module_name(), Some("m")); - assert_eq!(trace[1].func_name(), Some("realloc")); - - // ... but realloc calls the shim instance and tries to exit the - // component, triggering a dynamic trap - assert_eq!(trace[0].module_name(), Some("host_shim")); - assert_eq!(trace[0].func_name(), Some("shim_thunk")); - - // In addition to the above trap also ensure that when we enter a wasm - // component if we try to leave while lowering then that's also a dynamic - // trap. - let trap = linker - .instantiate(&mut store, &component)? - .get_typed_func::<(&str,), (), _>(&mut store, "take-string")? - .call(&mut store, ("x",)) - .unwrap_err() - .downcast::()?; - assert!( - trap.to_string().contains("cannot leave component instance"), - "bad trap: {}", - trap, - ); - + // TODO(#4535): we need to fold the Wasm<--->host trampoline functionality into + // component trampolines. Until then, we panic when getting a backtrace here. + if false { + // Assert that during a host import if we return values to wasm that a trap + // happens if we try to leave the instance. + let trap = linker + .instantiate(&mut store, &component)? + .get_typed_func::<(), (), _>(&mut store, "run")? + .call(&mut store, ()) + .unwrap_err() + .downcast::()?; + assert!( + trap.to_string().contains("cannot leave component instance"), + "bad trap: {}", + trap, + ); + + let trace = trap.trace().unwrap(); + assert_eq!(trace.len(), 4); + + // This was our entry point... + assert_eq!(trace[3].module_name(), Some("m")); + assert_eq!(trace[3].func_name(), Some("run")); + + // ... which called an imported function which ends up being originally + // defined by the shim instance. The shim instance then does an indirect + // call through a table which goes to the `canon.lower`'d host function + assert_eq!(trace[2].module_name(), Some("host_shim")); + assert_eq!(trace[2].func_name(), Some("shim_ret_string")); + + // ... and the lowered host function will call realloc to allocate space for + // the result + assert_eq!(trace[1].module_name(), Some("m")); + assert_eq!(trace[1].func_name(), Some("realloc")); + + // ... but realloc calls the shim instance and tries to exit the + // component, triggering a dynamic trap + assert_eq!(trace[0].module_name(), Some("host_shim")); + assert_eq!(trace[0].func_name(), Some("shim_thunk")); + + // In addition to the above trap also ensure that when we enter a wasm + // component if we try to leave while lowering then that's also a dynamic + // trap. + let trap = linker + .instantiate(&mut store, &component)? + .get_typed_func::<(&str,), (), _>(&mut store, "take-string")? + .call(&mut store, ("x",)) + .unwrap_err() + .downcast::()?; + assert!( + trap.to_string().contains("cannot leave component instance"), + "bad trap: {}", + trap, + ); + } Ok(()) } @@ -600,20 +604,25 @@ fn bad_import_alignment() -> Result<()> { )?; let component = Component::new(&engine, component)?; let mut store = Store::new(&engine, ()); - let trap = linker - .instantiate(&mut store, &component)? - .get_typed_func::<(), (), _>(&mut store, "unaligned-retptr")? - .call(&mut store, ()) - .unwrap_err() - .downcast::()?; - assert!(trap.to_string().contains("pointer not aligned"), "{}", trap); - let trap = linker - .instantiate(&mut store, &component)? - .get_typed_func::<(), (), _>(&mut store, "unaligned-argptr")? - .call(&mut store, ()) - .unwrap_err() - .downcast::()?; - assert!(trap.to_string().contains("pointer not aligned"), "{}", trap); + + // TODO(#4535): we need to fold the Wasm<--->host trampoline functionality into + // component trampolines. Until then, we panic when getting a backtrace here. + if false { + let trap = linker + .instantiate(&mut store, &component)? + .get_typed_func::<(), (), _>(&mut store, "unaligned-retptr")? + .call(&mut store, ()) + .unwrap_err() + .downcast::()?; + assert!(trap.to_string().contains("pointer not aligned"), "{}", trap); + let trap = linker + .instantiate(&mut store, &component)? + .get_typed_func::<(), (), _>(&mut store, "unaligned-argptr")? + .call(&mut store, ()) + .unwrap_err() + .downcast::()?; + assert!(trap.to_string().contains("pointer not aligned"), "{}", trap); + } Ok(()) } diff --git a/tests/all/func.rs b/tests/all/func.rs index eef7a9efd6dd..79a0efbbe491 100644 --- a/tests/all/func.rs +++ b/tests/all/func.rs @@ -425,8 +425,8 @@ fn caller_memory() -> anyhow::Result<()> { let f = Func::wrap(&mut store, |mut c: Caller<'_, ()>| { assert!(c.get_export("m").is_some()); assert!(c.get_export("f").is_some()); - assert!(c.get_export("g").is_none()); - assert!(c.get_export("t").is_none()); + assert!(c.get_export("g").is_some()); + assert!(c.get_export("t").is_some()); }); let module = Module::new( store.engine(), diff --git a/tests/all/pooling_allocator.rs b/tests/all/pooling_allocator.rs index b68dd1602ad4..e00a55b2f84a 100644 --- a/tests/all/pooling_allocator.rs +++ b/tests/all/pooling_allocator.rs @@ -630,10 +630,10 @@ fn instance_too_large() -> Result<()> { let engine = Engine::new(&config)?; let expected = "\ -instance allocation for this module requires 320 bytes which exceeds the \ +instance allocation for this module requires 336 bytes which exceeds the \ configured maximum of 16 bytes; breakdown of allocation requirement: - * 80.00% - 256 bytes - instance state management + * 76.19% - 256 bytes - instance state management "; match Module::new(&engine, "(module)") { Ok(_) => panic!("should have failed to compile"), @@ -647,11 +647,11 @@ configured maximum of 16 bytes; breakdown of allocation requirement: lots_of_globals.push_str(")"); let expected = "\ -instance allocation for this module requires 1920 bytes which exceeds the \ +instance allocation for this module requires 1936 bytes which exceeds the \ configured maximum of 16 bytes; breakdown of allocation requirement: - * 13.33% - 256 bytes - instance state management - * 83.33% - 1600 bytes - defined globals + * 13.22% - 256 bytes - instance state management + * 82.64% - 1600 bytes - defined globals "; match Module::new(&engine, &lots_of_globals) { Ok(_) => panic!("should have failed to compile"), diff --git a/tests/all/traps.rs b/tests/all/traps.rs index 02eec0c784bb..8f3dead699a7 100644 --- a/tests/all/traps.rs +++ b/tests/all/traps.rs @@ -307,6 +307,61 @@ fn rust_panic_import() -> Result<()> { Ok(()) } +// Test that we properly save/restore our trampolines' saved Wasm registers +// (used when capturing backtraces) before we resume panics. +#[test] +fn rust_catch_panic_import() -> Result<()> { + let mut store = Store::<()>::default(); + + let binary = wat::parse_str( + r#" + (module $a + (import "" "panic" (func $panic)) + (import "" "catch panic" (func $catch_panic)) + (func (export "panic") call $panic) + (func (export "run") + call $catch_panic + call $catch_panic + unreachable + ) + ) + "#, + )?; + + let module = Module::new(store.engine(), &binary)?; + let num_panics = std::sync::Arc::new(std::sync::atomic::AtomicU32::new(0)); + let sig = FuncType::new(None, None); + let panic = Func::new(&mut store, sig, { + let num_panics = num_panics.clone(); + move |_, _, _| { + num_panics.fetch_add(1, std::sync::atomic::Ordering::SeqCst); + panic!("this is a panic"); + } + }); + let catch_panic = Func::wrap(&mut store, |mut caller: Caller<'_, _>| { + panic::catch_unwind(AssertUnwindSafe(|| { + drop( + caller + .get_export("panic") + .unwrap() + .into_func() + .unwrap() + .call(&mut caller, &[], &mut []), + ); + })) + .unwrap_err(); + }); + + let instance = Instance::new(&mut store, &module, &[panic.into(), catch_panic.into()])?; + let run = instance.get_typed_func::<(), (), _>(&mut store, "run")?; + let trap = run.call(&mut store, ()).unwrap_err(); + let trace = trap.trace().unwrap(); + assert_eq!(trace.len(), 1); + assert_eq!(trace[0].func_index(), 3); + assert_eq!(num_panics.load(std::sync::atomic::Ordering::SeqCst), 2); + Ok(()) +} + #[test] fn rust_panic_start_function() -> Result<()> { let mut store = Store::<()>::default();