From 773b90924d571ba9a68705e9a6c12554c9f7b555 Mon Sep 17 00:00:00 2001 From: Danno Ferrin Date: Mon, 27 Jan 2025 04:43:00 -0700 Subject: [PATCH] feat: Add essential EIP-7756 tracing fields (#2023) * Add essential EIP-7756 tracing fields Add (for eof) section and funcion depth fields, and make pc container relative instead of code section relative. * rm trace_pc and move logic to pc fn * change start of PC to start of bytes * cleanup --------- Co-authored-by: rakita Co-authored-by: rakita --- crates/bytecode/src/bytecode.rs | 24 +++++++++++---- crates/bytecode/src/eof.rs | 1 + crates/bytecode/src/eof/body.rs | 14 ++++----- crates/handler/src/frame.rs | 1 + crates/inspector/src/eip3155.rs | 29 +++++++++++++++++-- .../src/interpreter/ext_bytecode.rs | 13 +++++---- 6 files changed, 60 insertions(+), 22 deletions(-) diff --git a/crates/bytecode/src/bytecode.rs b/crates/bytecode/src/bytecode.rs index a2df02d447..00a9fb1aac 100644 --- a/crates/bytecode/src/bytecode.rs +++ b/crates/bytecode/src/bytecode.rs @@ -142,22 +142,34 @@ impl Bytecode { } } + /// Pointer to the executable bytecode. + /// + /// Note: EOF will return the pointer to the start of the code section. + /// while legacy bytecode will point to the start of the bytes. + pub fn bytecode_ptr(&self) -> *const u8 { + self.bytecode().as_ptr() + } + /// Returns bytes. #[inline] pub fn bytes(&self) -> Bytes { + self.bytes_ref().clone() + } + + /// Returns bytes. + #[inline] + pub fn bytes_ref(&self) -> &Bytes { match self { - Self::LegacyAnalyzed(analyzed) => analyzed.bytecode().clone(), - _ => self.original_bytes(), + Self::LegacyAnalyzed(analyzed) => analyzed.bytecode(), + Self::Eof(eof) => &eof.raw, + Self::Eip7702(code) => code.raw(), } } /// Returns bytes slice. #[inline] pub fn bytes_slice(&self) -> &[u8] { - match self { - Self::LegacyAnalyzed(analyzed) => analyzed.bytecode(), - _ => self.original_byte_slice(), - } + self.bytes_ref() } /// Returns a reference to the original bytecode. diff --git a/crates/bytecode/src/eof.rs b/crates/bytecode/src/eof.rs index 5ee155d844..2e167664f3 100644 --- a/crates/bytecode/src/eof.rs +++ b/crates/bytecode/src/eof.rs @@ -43,6 +43,7 @@ impl Default for Eof { code_section: vec![1], // One code section with a STOP byte. code: Bytes::from_static(&[0x00]), + code_offset: 0, container_section: vec![], data_section: Bytes::new(), is_data_filled: true, diff --git a/crates/bytecode/src/eof/body.rs b/crates/bytecode/src/eof/body.rs index 67756c8544..4174d45f42 100644 --- a/crates/bytecode/src/eof/body.rs +++ b/crates/bytecode/src/eof/body.rs @@ -15,6 +15,7 @@ pub struct EofBody { /// Index of the last byte of each code section pub code_section: Vec, pub code: Bytes, + pub code_offset: usize, pub container_section: Vec, pub data_section: Bytes, pub is_data_filled: bool, @@ -34,7 +35,6 @@ impl EofBody { /// Creates an EOF container from this body. pub fn into_eof(self) -> Eof { - // TODO : Add bounds checks. let mut prev_value = 0; let header = EofHeader { types_size: self.types_section.len() as u16 * 4, @@ -59,11 +59,7 @@ impl EofBody { let mut buffer = Vec::new(); header.encode(&mut buffer); self.encode(&mut buffer); - Eof { - header, - body: self, - raw: buffer.into(), - } + Eof::decode(buffer.into()).expect("Failed to encode EOF") } /// Returns offset of the start of indexed code section. @@ -71,10 +67,11 @@ impl EofBody { /// First code section starts at 0. pub fn eof_code_section_start(&self, idx: usize) -> Option { // Starting code section start with 0. + let code_offset = self.code_offset; if idx == 0 { - return Some(0); + return Some(code_offset); } - self.code_section.get(idx - 1).cloned() + self.code_section.get(idx - 1).map(|i| i + code_offset) } /// Encodes this body into the given buffer. @@ -118,6 +115,7 @@ impl EofBody { // Extract code section let start = header_len + header.types_size as usize; + body.code_offset = start; let mut code_end = 0; for size in header.code_sizes.iter().map(|x| *x as usize) { code_end += size; diff --git a/crates/handler/src/frame.rs b/crates/handler/src/frame.rs index 83d1a7f0fd..0df9a004ac 100644 --- a/crates/handler/src/frame.rs +++ b/crates/handler/src/frame.rs @@ -161,6 +161,7 @@ where // ExtDelegateCall is not allowed to call non-EOF contracts. if is_ext_delegate_call && !bytecode.bytes_slice().starts_with(&EOF_MAGIC_BYTES) { + context.journal().checkpoint_revert(checkpoint); return return_result(InstructionResult::InvalidExtDelegateCallTarget); } diff --git a/crates/inspector/src/eip3155.rs b/crates/inspector/src/eip3155.rs index 920336e231..4ecaca1c8f 100644 --- a/crates/inspector/src/eip3155.rs +++ b/crates/inspector/src/eip3155.rs @@ -1,4 +1,5 @@ use crate::{inspectors::GasInspector, Inspector}; +use revm::interpreter::interpreter_types::{RuntimeFlag, SubRoutineStack}; use revm::{ bytecode::opcode::OpCode, context::Cfg, @@ -20,7 +21,9 @@ pub struct TracerEip3155 { /// Print summary of the execution. print_summary: bool, stack: Vec, - pc: usize, + pc: u64, + section: Option, + function_depth: Option, opcode: u8, gas: u64, refunded: i64, @@ -39,6 +42,9 @@ struct Output { // Required fields: /// Program counter pc: u64, + /// EOF code section + #[serde(default, skip_serializing_if = "Option::is_none")] + section: Option, /// OpCode op: u8, /// Gas left before executing this operation @@ -49,6 +55,9 @@ struct Output { stack: Vec, /// Depth of the call stack depth: u64, + /// Depth of the EOF function call stack + #[serde(default, skip_serializing_if = "Option::is_none")] + function_depth: Option, /// Data returned by the function call return_data: String, /// Amount of **global** gas refunded @@ -140,6 +149,8 @@ where stack: Default::default(), memory: Default::default(), pc: 0, + section: None, + function_depth: None, opcode: 0, gas: 0, refunded: 0, @@ -213,7 +224,17 @@ where } else { None }; - self.pc = interp.bytecode.pc(); + self.pc = interp.bytecode.pc() as u64; + self.section = if interp.runtime_flag.is_eof() { + Some(interp.sub_routine.routine_idx() as u64) + } else { + None + }; + self.function_depth = if interp.runtime_flag.is_eof() { + Some(interp.sub_routine.len() as u64 + 1) + } else { + None + }; self.opcode = interp.bytecode.opcode(); self.mem_size = interp.memory.size(); self.gas = interp.control.gas().remaining(); @@ -228,12 +249,14 @@ where } let value = Output { - pc: self.pc as u64, + pc: self.pc, + section: self.section, op: self.opcode, gas: hex_number(self.gas), gas_cost: hex_number(self.gas_inspector.last_gas_cost()), stack: self.stack.iter().map(hex_number_u256).collect(), depth: context.journal().depth() as u64, + function_depth: self.function_depth, return_data: "0x".to_string(), refund: hex_number(self.refunded as u64), mem_size: self.mem_size.to_string(), diff --git a/crates/interpreter/src/interpreter/ext_bytecode.rs b/crates/interpreter/src/interpreter/ext_bytecode.rs index 4853849e1e..f80155ff8d 100644 --- a/crates/interpreter/src/interpreter/ext_bytecode.rs +++ b/crates/interpreter/src/interpreter/ext_bytecode.rs @@ -30,7 +30,7 @@ impl Deref for ExtBytecode { impl ExtBytecode { /// Create new extended bytecode and set the instruction pointer to the start of the bytecode. pub fn new(base: Bytecode) -> Self { - let instruction_pointer = base.bytecode().as_ptr(); + let instruction_pointer = base.bytecode_ptr(); Self { base, instruction_pointer, @@ -40,7 +40,7 @@ impl ExtBytecode { /// Creates new `ExtBytecode` with the given hash. pub fn new_with_hash(base: Bytecode, hash: B256) -> Self { - let instruction_pointer = base.bytecode().as_ptr(); + let instruction_pointer = base.bytecode_ptr(); Self { base, instruction_pointer, @@ -66,10 +66,12 @@ impl Jumps for ExtBytecode { fn relative_jump(&mut self, offset: isize) { self.instruction_pointer = unsafe { self.instruction_pointer.offset(offset) }; } + #[inline] fn absolute_jump(&mut self, offset: usize) { - self.instruction_pointer = unsafe { self.base.bytecode().as_ptr().add(offset) }; + self.instruction_pointer = unsafe { self.base.bytes_ref().as_ptr().add(offset) }; } + #[inline] fn is_valid_legacy_jump(&mut self, offset: usize) -> bool { self.base @@ -83,13 +85,14 @@ impl Jumps for ExtBytecode { // SAFETY: `instruction_pointer` always point to bytecode. unsafe { *self.instruction_pointer } } + #[inline] fn pc(&self) -> usize { - // SAFETY: `instruction_pointer` should be at an offset from the start of the bytecode. + // SAFETY: `instruction_pointer` should be at an offset from the start of the bytes. // In practice this is always true unless a caller modifies the `instruction_pointer` field manually. unsafe { self.instruction_pointer - .offset_from(self.base.bytecode().as_ptr()) as usize + .offset_from(self.base.bytes_ref().as_ptr()) as usize } } }