lambdaclass · edg-l · Jun 23, 2025 · Jun 23, 2025 · Jun 23, 2025 · Jun 23, 2025
@@ -2,6 +2,10 @@
 
 ## Perf
 
+### 2025-06-23
+
+- Use specialized PUSH1 and PUSH2 implementations [#3262](https://github.com/lambdaclass/ethrex/pull/3262)
+
 ### 2025-05-27
 
 - Improved the performance of shift instructions. [2933](https://github.com/lambdaclass/ethrex/pull/2933)

@@ -75,8 +75,10 @@ impl<'a> VM<'a> {
             Opcode::BLOBHASH => self.op_blobhash(),
             Opcode::BLOBBASEFEE => self.op_blobbasefee(),
             Opcode::PUSH0 => self.op_push0(),
+            Opcode::PUSH1 => self.op_push1(),
+            Opcode::PUSH2 => self.op_push2(),
             // PUSHn
-            op if (Opcode::PUSH1..=Opcode::PUSH32).contains(&op) => {
+            op if (Opcode::PUSH3..=Opcode::PUSH32).contains(&op) => {
                 let n_bytes = get_n_value(op, Opcode::PUSH1)?;
                 self.op_push(n_bytes)
             }

@@ -266,6 +266,6 @@ pub fn checked_shift_left(value: U256, shift: U256) -> Result<U256, VMError> {
     Ok(result)
 }
 
-fn u256_from_bool(value: bool) -> U256 {
-    U256::from(u8::from(value))
+const fn u256_from_bool(value: bool) -> U256 {
+    if value { U256::one() } else { U256::zero() }
 }
@@ -11,7 +11,7 @@ use ethrex_common::{U256, types::Fork};
 // Opcodes: PUSH0, PUSH1 ... PUSH32
 
 impl<'a> VM<'a> {
-    // PUSH operation
+    // Generic PUSH operation
     pub fn op_push(&mut self, n_bytes: usize) -> Result<OpcodeResult, VMError> {
         let current_call_frame = self.current_call_frame_mut()?;
         current_call_frame.increase_consumed_gas(gas_cost::PUSHN)?;
@@ -30,6 +30,42 @@ impl<'a> VM<'a> {
         })
     }
 
+    /// Specialized PUSH1 operation
+    ///
+    /// We use specialized push1 and push2 implementations because they are way more frequent than the others,
+    /// so their impact on performance is significant.
+    /// These implementations allow using U256::from, which is considerable more performant than U256::from_big_endian)
+    pub fn op_push1(&mut self) -> Result<OpcodeResult, VMError> {
+        let current_call_frame = self.current_call_frame_mut()?;
+        current_call_frame.increase_consumed_gas(gas_cost::PUSHN)?;
+
+        let value = read_bytcode_slice_const::<1>(current_call_frame)?[0];
+
+        current_call_frame.stack.push(U256::from(value))?;
+
+        Ok(OpcodeResult::Continue {
+            // The 1 byte that you push to the stack + 1 for the next instruction
+            pc_increment: 2,
+        })
+    }
+
+    // Specialized PUSH2 operation
+    pub fn op_push2(&mut self) -> Result<OpcodeResult, VMError> {
+        let current_call_frame = self.current_call_frame_mut()?;
+        current_call_frame.increase_consumed_gas(gas_cost::PUSHN)?;
+
+        let read_n_bytes = read_bytcode_slice_const::<2>(current_call_frame)?;
+
+        let value = u16::from_be_bytes(read_n_bytes);
+
+        current_call_frame.stack.push(U256::from(value))?;
+
+        Ok(OpcodeResult::Continue {
+            // The 2 bytes that you push to the stack + 1 for the next instruction
+            pc_increment: 3,
+        })
+    }
+
     // PUSH0
     pub fn op_push0(&mut self) -> Result<OpcodeResult, VMError> {
         // [EIP-3855] - PUSH0 is only available from SHANGHAI
@@ -61,3 +97,28 @@ fn read_bytcode_slice(current_call_frame: &CallFrame, n_bytes: usize) -> Result<
         .get(pc_offset..pc_offset.checked_add(n_bytes).ok_or(OutOfBounds)?)
         .unwrap_or_default())
 }
+
+// Like `read_bytcode_slice` but using a const generic and returning a fixed size array.
+fn read_bytcode_slice_const<const N: usize>(
+    current_call_frame: &CallFrame,
+) -> Result<[u8; N], VMError> {
+    let current_pc = current_call_frame.pc;
+    let pc_offset = current_pc
+        // Add 1 to the PC because we don't want to include the
+        // Bytecode of the current instruction in the data we're about
+        // to read. We only want to read the data _NEXT_ to that
+        // bytecode
+        .checked_add(1)
+        .ok_or(InternalError::Overflow)?;
+
+    if let Some(slice) = current_call_frame
+        .bytecode
+        .get(pc_offset..pc_offset.checked_add(N).ok_or(OutOfBounds)?)
+    {
+        Ok(slice
+            .try_into()
+            .map_err(|_| VMError::Internal(InternalError::TypeConversion))?)
+    } else {
+        Ok([0; N])
+    }
+}