Skip to content

perf(levm): use specialized PUSH1 and PUSH2 implementations #3262

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jun 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

## Perf

### 2025-06-23

- Use specialized PUSH1 and PUSH2 implementations [#3262](https://github.com/lambdaclass/ethrex/pull/3262)

### 2025-05-27

- Improved the performance of shift instructions. [2933](https://github.com/lambdaclass/ethrex/pull/2933)
Expand Down
4 changes: 3 additions & 1 deletion crates/vm/levm/src/execution_handlers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,10 @@ impl<'a> VM<'a> {
Opcode::BLOBHASH => self.op_blobhash(),
Opcode::BLOBBASEFEE => self.op_blobbasefee(),
Opcode::PUSH0 => self.op_push0(),
Opcode::PUSH1 => self.op_push1(),
Opcode::PUSH2 => self.op_push2(),
// PUSHn
op if (Opcode::PUSH1..=Opcode::PUSH32).contains(&op) => {
op if (Opcode::PUSH3..=Opcode::PUSH32).contains(&op) => {
let n_bytes = get_n_value(op, Opcode::PUSH1)?;
self.op_push(n_bytes)
}
Expand Down
4 changes: 2 additions & 2 deletions crates/vm/levm/src/opcode_handlers/bitwise_comparison.rs
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,6 @@ pub fn checked_shift_left(value: U256, shift: U256) -> Result<U256, VMError> {
Ok(result)
}

fn u256_from_bool(value: bool) -> U256 {
U256::from(u8::from(value))
const fn u256_from_bool(value: bool) -> U256 {
if value { U256::one() } else { U256::zero() }
}
63 changes: 62 additions & 1 deletion crates/vm/levm/src/opcode_handlers/push.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use ethrex_common::{U256, types::Fork};
// Opcodes: PUSH0, PUSH1 ... PUSH32

impl<'a> VM<'a> {
// PUSH operation
// Generic PUSH operation
pub fn op_push(&mut self, n_bytes: usize) -> Result<OpcodeResult, VMError> {
let current_call_frame = self.current_call_frame_mut()?;
current_call_frame.increase_consumed_gas(gas_cost::PUSHN)?;
Expand All @@ -30,6 +30,42 @@ impl<'a> VM<'a> {
})
}

/// Specialized PUSH1 operation
///
/// We use specialized push1 and push2 implementations because they are way more frequent than the others,
/// so their impact on performance is significant.
/// These implementations allow using U256::from, which is considerable more performant than U256::from_big_endian)
pub fn op_push1(&mut self) -> Result<OpcodeResult, VMError> {
let current_call_frame = self.current_call_frame_mut()?;
current_call_frame.increase_consumed_gas(gas_cost::PUSHN)?;

let value = read_bytcode_slice_const::<1>(current_call_frame)?[0];

current_call_frame.stack.push(U256::from(value))?;

Ok(OpcodeResult::Continue {
// The 1 byte that you push to the stack + 1 for the next instruction
pc_increment: 2,
})
}

// Specialized PUSH2 operation
pub fn op_push2(&mut self) -> Result<OpcodeResult, VMError> {
let current_call_frame = self.current_call_frame_mut()?;
current_call_frame.increase_consumed_gas(gas_cost::PUSHN)?;

let read_n_bytes = read_bytcode_slice_const::<2>(current_call_frame)?;

let value = u16::from_be_bytes(read_n_bytes);

current_call_frame.stack.push(U256::from(value))?;

Ok(OpcodeResult::Continue {
// The 2 bytes that you push to the stack + 1 for the next instruction
pc_increment: 3,
})
}

// PUSH0
pub fn op_push0(&mut self) -> Result<OpcodeResult, VMError> {
// [EIP-3855] - PUSH0 is only available from SHANGHAI
Expand Down Expand Up @@ -61,3 +97,28 @@ fn read_bytcode_slice(current_call_frame: &CallFrame, n_bytes: usize) -> Result<
.get(pc_offset..pc_offset.checked_add(n_bytes).ok_or(OutOfBounds)?)
.unwrap_or_default())
}

// Like `read_bytcode_slice` but using a const generic and returning a fixed size array.
fn read_bytcode_slice_const<const N: usize>(
current_call_frame: &CallFrame,
) -> Result<[u8; N], VMError> {
let current_pc = current_call_frame.pc;
let pc_offset = current_pc
// Add 1 to the PC because we don't want to include the
// Bytecode of the current instruction in the data we're about
// to read. We only want to read the data _NEXT_ to that
// bytecode
.checked_add(1)
.ok_or(InternalError::Overflow)?;

if let Some(slice) = current_call_frame
.bytecode
.get(pc_offset..pc_offset.checked_add(N).ok_or(OutOfBounds)?)
{
Ok(slice
.try_into()
.map_err(|_| VMError::Internal(InternalError::TypeConversion))?)
} else {
Ok([0; N])
}
}
Loading