diff --git a/aya/src/maps/perf/async_perf_event_array.rs b/aya/src/maps/perf/async_perf_event_array.rs
index 7828759ae..7f666e07c 100644
--- a/aya/src/maps/perf/async_perf_event_array.rs
+++ b/aya/src/maps/perf/async_perf_event_array.rs
@@ -1,4 +1,4 @@
-use std::borrow::{Borrow, BorrowMut};
+use std::{borrow::{Borrow, BorrowMut}, os::fd::AsFd};
 
 // See https://doc.rust-lang.org/cargo/reference/features.html#mutually-exclusive-features.
 //
@@ -105,6 +105,17 @@ impl<T: BorrowMut<MapData>> AsyncPerfEventArray<T> {
         let buf = Async::new(buf)?;
         Ok(AsyncPerfEventArrayBuffer { buf })
     }
+    
+    /// Inserts a perf_event file descriptor at the given index.
+    ///
+    /// ## Errors
+    ///
+    /// Returns [`MapError::OutOfBounds`] if `index` is out of bounds, [`MapError::SyscallError`]
+    /// if `bpf_map_update_elem` fails.
+    pub fn set<FD: AsFd>(&mut self, index: u32, value: &FD) -> Result<(), MapError> {
+        let Self { perf_map } = self;
+        perf_map.set(index, value)
+    }
 }
 
 impl<T: Borrow<MapData>> AsyncPerfEventArray<T> {
diff --git a/aya/src/maps/perf/perf_event_array.rs b/aya/src/maps/perf/perf_event_array.rs
index c1df535ae..bf1974f67 100644
--- a/aya/src/maps/perf/perf_event_array.rs
+++ b/aya/src/maps/perf/perf_event_array.rs
@@ -8,14 +8,16 @@ use std::{
     sync::Arc,
 };
 
+use aya_obj::generated::BPF_ANY;
 use bytes::BytesMut;
 
 use crate::{
     maps::{
+        check_bounds,
         perf::{Events, PerfBuffer, PerfBufferError},
         MapData, MapError,
     },
-    sys::bpf_map_update_elem,
+    sys::{bpf_map_update_elem, SyscallError},
     util::page_size,
 };
 
@@ -178,6 +180,9 @@ impl<T: Borrow<MapData>> PerfEventArray<T> {
 impl<T: BorrowMut<MapData>> PerfEventArray<T> {
     /// Opens the perf buffer at the given index.
     ///
+    /// A ring-buffer of `1 + page_count` pages is created with `mmap`, where `page_count`
+    /// must be a power of two. 
+    /// 
     /// The returned buffer will receive all the events eBPF programs send at the given index.
     pub fn open(
         &mut self,
@@ -197,4 +202,27 @@ impl<T: BorrowMut<MapData>> PerfEventArray<T> {
             _map: self.map.clone(),
         })
     }
+
+    /// Inserts a perf_event file descriptor at the given index.
+    ///
+    /// ## Errors
+    ///
+    /// Returns [`MapError::OutOfBounds`] if `index` is out of bounds, [`MapError::SyscallError`]
+    /// if `bpf_map_update_elem` fails.
+    pub fn set<FD: AsFd>(&mut self, index: u32, value: &FD) -> Result<(), MapError> {
+        let data: &MapData = self.map.deref().borrow();
+        check_bounds(data, index)?;
+        let fd = data.fd().as_fd();
+
+        // only BPF_ANY or BPF_EXIST are allowed, and for arrays they do the same thing (the elements always exist)
+        let flags = BPF_ANY as u64;
+        let value = value.as_fd().as_raw_fd();
+        bpf_map_update_elem(fd, Some(&index), &value, flags).map_err(|(_, io_error)| {
+            MapError::SyscallError(SyscallError {
+                call: "bpf_map_update_elem",
+                io_error,
+            })
+        })?;
+        Ok(())
+    }
 }
diff --git a/aya/src/programs/perf_event.rs b/aya/src/programs/perf_event.rs
index b7b233cf4..5845ab474 100644
--- a/aya/src/programs/perf_event.rs
+++ b/aya/src/programs/perf_event.rs
@@ -1,6 +1,6 @@
 //! Perf event programs.
 
-use std::os::fd::AsFd as _;
+use std::os::fd::{AsFd as _, OwnedFd};
 
 pub use crate::generated::{
     perf_hw_cache_id, perf_hw_cache_op_id, perf_hw_cache_op_result_id, perf_hw_id, perf_sw_ids,
@@ -20,10 +20,10 @@ use crate::{
         perf_attach::{PerfLinkIdInner, PerfLinkInner},
         FdLink, LinkError, ProgramData, ProgramError,
     },
-    sys::{bpf_link_get_info_by_fd, perf_event_open, SyscallError},
+    sys::{self, bpf_link_get_info_by_fd, SyscallError},
 };
 
-/// The type of perf event
+/// The type of perf event.
 #[repr(u32)]
 #[derive(Debug, Clone)]
 pub enum PerfTypeId {
@@ -41,7 +41,7 @@ pub enum PerfTypeId {
     Breakpoint = PERF_TYPE_BREAKPOINT as u32,
 }
 
-/// Sample Policy
+/// Sample Policy.
 #[derive(Debug, Clone)]
 pub enum SamplePolicy {
     /// Period
@@ -50,30 +50,44 @@ pub enum SamplePolicy {
     Frequency(u64),
 }
 
-/// The scope of a PerfEvent
+/// A flag whose bits indicate the fields to include in the event samples.
+#[derive(Debug, Clone)]
+pub struct SampleType(u64);
+
+/// "Wake up" overflow notification policy.
+/// Overflows are generated only by sampling events.
+#[derive(Debug, Clone)]
+pub enum WakeupPolicy {
+    /// Wake up after n events.
+    Events(u32),
+    /// Wake up after n bytes.
+    Watermark(u32),
+}
+
+/// The scope of a PerfEvent.
 #[derive(Debug, Clone)]
 #[allow(clippy::enum_variant_names)]
 pub enum PerfEventScope {
-    /// Calling process, any cpu
+    /// Calling process, any cpu.
     CallingProcessAnyCpu,
-    /// calling process, one cpu
+    /// Calling process, one cpu.
     CallingProcessOneCpu {
         /// cpu id
         cpu: u32,
     },
-    /// one process, any cpu
+    /// One process, any cpu.
     OneProcessAnyCpu {
         /// process id
         pid: u32,
     },
-    /// one process, one cpu
+    /// One process, one cpu.
     OneProcessOneCpu {
         /// cpu id
         cpu: u32,
         /// process id
         pid: u32,
     },
-    /// all processes, one cpu
+    /// All processes, one cpu.
     AllProcessesOneCpu {
         /// cpu id
         cpu: u32,
@@ -147,33 +161,11 @@ impl PerfEvent {
     ) -> Result<PerfEventLinkId, ProgramError> {
         let prog_fd = self.fd()?;
         let prog_fd = prog_fd.as_fd();
-        let (sample_period, sample_frequency) = match sample_policy {
-            SamplePolicy::Period(period) => (period, None),
-            SamplePolicy::Frequency(frequency) => (0, Some(frequency)),
-        };
-        let (pid, cpu) = match scope {
-            PerfEventScope::CallingProcessAnyCpu => (0, -1),
-            PerfEventScope::CallingProcessOneCpu { cpu } => (0, cpu as i32),
-            PerfEventScope::OneProcessAnyCpu { pid } => (pid as i32, -1),
-            PerfEventScope::OneProcessOneCpu { cpu, pid } => (pid as i32, cpu as i32),
-            PerfEventScope::AllProcessesOneCpu { cpu } => (-1, cpu as i32),
-        };
-        let fd = perf_event_open(
-            perf_type as u32,
-            config,
-            pid,
-            cpu,
-            sample_period,
-            sample_frequency,
-            false,
-            0,
-        )
-        .map_err(|(_code, io_error)| SyscallError {
-            call: "perf_event_open",
-            io_error,
-        })?;
 
-        let link = perf_attach(prog_fd, fd)?;
+        let sampling = Some((sample_policy, SampleType(PERF_TYPE_RAW as u64)));
+        let event_fd = perf_event_open(perf_type as u32, config, scope, sampling, None, 0)?;
+
+        let link = perf_attach(prog_fd, event_fd)?;
         self.data.links.insert(PerfEventLink::new(link))
     }
 
@@ -225,3 +217,65 @@ define_link_wrapper!(
     PerfLinkInner,
     PerfLinkIdInner
 );
+
+/// Performs a call to `perf_event_open` and returns the event's file descriptor.
+///
+/// # Arguments
+///
+/// * `perf_type` - the type of event, see [`crate::generated::perf_type_id`] for a list of types. Note that this list is non-exhaustive, because PMUs (Performance Monitoring Units) can be added to the system. Their ids can be read from the sysfs (see the kernel documentation on perf_event_open).
+/// * `config` - the event that we want to open
+/// * `scope` - which process and cpu to monitor (logical cpu, not physical socket)
+/// * `sampling` - if not None, enables the sampling mode with the given parameters
+/// * `wakeup` - if not None, sets up the wake-up for the overflow notifications
+/// * `flags` - various flags combined with a binary OR (for ex. `FLAG_A | FLAG_B`), zero means no flag
+pub fn perf_event_open(
+    perf_type: u32,
+    config: u64,
+    scope: PerfEventScope,
+    sampling: Option<(SamplePolicy, SampleType)>,
+    wakeup: Option<WakeupPolicy>,
+    flags: u32,
+) -> Result<OwnedFd, ProgramError> {
+    let mut attr = sys::init_perf_event_attr();
+
+    // Fill in the attributes
+    attr.type_ = perf_type;
+    attr.config = config;
+    match sampling {
+        Some((SamplePolicy::Frequency(f), SampleType(t))) => {
+            attr.set_freq(1);
+            attr.__bindgen_anon_1.sample_freq = f;
+            attr.sample_type = t;
+        }
+        Some((SamplePolicy::Period(p), SampleType(t))) => {
+            attr.__bindgen_anon_1.sample_period = p;
+            attr.sample_type = t;
+        }
+        None => (),
+    };
+    match wakeup {
+        Some(WakeupPolicy::Events(n)) => {
+            attr.__bindgen_anon_2.wakeup_events = n;
+        }
+        Some(WakeupPolicy::Watermark(n)) => {
+            attr.set_watermark(1);
+            attr.__bindgen_anon_2.wakeup_watermark = n;
+        }
+        None => (),
+    };
+
+    let (pid, cpu) = match scope {
+        PerfEventScope::CallingProcessAnyCpu => (0, -1),
+        PerfEventScope::CallingProcessOneCpu { cpu } => (0, cpu as i32),
+        PerfEventScope::OneProcessAnyCpu { pid } => (pid as i32, -1),
+        PerfEventScope::OneProcessOneCpu { cpu, pid } => (pid as i32, cpu as i32),
+        PerfEventScope::AllProcessesOneCpu { cpu } => (-1, cpu as i32),
+    };
+
+    sys::perf_event_sys(attr, pid, cpu, flags).map_err(|(_, io_error)| {
+        ProgramError::SyscallError(SyscallError {
+            call: "perf_event_open",
+            io_error,
+        })
+    })
+}
diff --git a/aya/src/sys/perf_event.rs b/aya/src/sys/perf_event.rs
index b06f4fba4..effc59173 100644
--- a/aya/src/sys/perf_event.rs
+++ b/aya/src/sys/perf_event.rs
@@ -15,8 +15,14 @@ use crate::generated::{
     PERF_FLAG_FD_CLOEXEC,
 };
 
+pub(crate) fn init_perf_event_attr() -> perf_event_attr {
+    let mut attr = unsafe { mem::zeroed::<perf_event_attr>() };
+    attr.size = mem::size_of::<perf_event_attr>() as u32;
+    attr
+}
+
 #[allow(clippy::too_many_arguments)]
-pub(crate) fn perf_event_open(
+pub(crate) fn perf_event_open_sampled(
     perf_type: u32,
     config: u64,
     pid: pid_t,
@@ -26,10 +32,8 @@ pub(crate) fn perf_event_open(
     wakeup: bool,
     flags: u32,
 ) -> SysResult<OwnedFd> {
-    let mut attr = unsafe { mem::zeroed::<perf_event_attr>() };
-
+    let mut attr = init_perf_event_attr();
     attr.config = config;
-    attr.size = mem::size_of::<perf_event_attr>() as u32;
     attr.type_ = perf_type;
     attr.sample_type = PERF_SAMPLE_RAW as u64;
     // attr.inherits = if pid > 0 { 1 } else { 0 };
@@ -46,7 +50,7 @@ pub(crate) fn perf_event_open(
 }
 
 pub(crate) fn perf_event_open_bpf(cpu: c_int) -> SysResult<OwnedFd> {
-    perf_event_open(
+    perf_event_open_sampled(
         PERF_TYPE_SOFTWARE as u32,
         PERF_COUNT_SW_BPF_OUTPUT as u64,
         -1,
@@ -67,7 +71,7 @@ pub(crate) fn perf_event_open_probe(
 ) -> SysResult<OwnedFd> {
     use std::os::unix::ffi::OsStrExt as _;
 
-    let mut attr = unsafe { mem::zeroed::<perf_event_attr>() };
+    let mut attr = init_perf_event_attr();
 
     if let Some(ret_bit) = ret_bit {
         attr.config = 1 << ret_bit;
@@ -75,7 +79,6 @@ pub(crate) fn perf_event_open_probe(
 
     let c_name = CString::new(name.as_bytes()).unwrap();
 
-    attr.size = mem::size_of::<perf_event_attr>() as u32;
     attr.type_ = ty;
     attr.__bindgen_anon_3.config1 = c_name.as_ptr() as u64;
     attr.__bindgen_anon_4.config2 = offset;
@@ -87,9 +90,7 @@ pub(crate) fn perf_event_open_probe(
 }
 
 pub(crate) fn perf_event_open_trace_point(id: u32, pid: Option<pid_t>) -> SysResult<OwnedFd> {
-    let mut attr = unsafe { mem::zeroed::<perf_event_attr>() };
-
-    attr.size = mem::size_of::<perf_event_attr>() as u32;
+    let mut attr = init_perf_event_attr();
     attr.type_ = PERF_TYPE_TRACEPOINT as u32;
     attr.config = id as u64;
 
@@ -112,7 +113,7 @@ pub(crate) fn perf_event_ioctl(
     return crate::sys::TEST_SYSCALL.with(|test_impl| unsafe { test_impl.borrow()(call) });
 }
 
-fn perf_event_sys(attr: perf_event_attr, pid: pid_t, cpu: i32, flags: u32) -> SysResult<OwnedFd> {
+pub(crate) fn perf_event_sys(attr: perf_event_attr, pid: pid_t, cpu: i32, flags: u32) -> SysResult<OwnedFd> {
     let fd = syscall(Syscall::PerfEventOpen {
         attr,
         pid,
diff --git a/bpf/aya-bpf/src/maps/perf/perf_event_array.rs b/bpf/aya-bpf/src/maps/perf/perf_event_array.rs
index f7c874a6c..bd3a4ff40 100644
--- a/bpf/aya-bpf/src/maps/perf/perf_event_array.rs
+++ b/bpf/aya-bpf/src/maps/perf/perf_event_array.rs
@@ -1,12 +1,40 @@
-use core::{cell::UnsafeCell, marker::PhantomData, mem};
+use core::{
+    cell::UnsafeCell,
+    marker::PhantomData,
+    mem::{self, MaybeUninit},
+};
+
+use aya_bpf_bindings::{bindings::bpf_perf_event_value, helpers::bpf_perf_event_read_value};
 
 use crate::{
-    bindings::{bpf_map_def, bpf_map_type::BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_F_CURRENT_CPU},
+    bindings::{
+        bpf_map_def, bpf_map_type::BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_F_CURRENT_CPU,
+        BPF_F_INDEX_MASK,
+    },
     helpers::bpf_perf_event_output,
     maps::PinningType,
     BpfContext,
 };
 
+/// A map of type `BPF_MAP_TYPE_PERF_EVENT_ARRAY`.
+///
+/// # Minimum kernel version
+///
+/// The minimum kernel version required to read perf_event values using [PerfEventArray] is 4.15.
+/// This concerns the functions [`PerfEventArray::read_current_cpu()`] and [`PerfEventArray::read_at_index()`].
+///
+/// # Example
+///
+/// ```no_run
+/// #[map]
+/// static mut DESCRIPTORS: PerfEventArray<i32> = PerfEventArray::with_max_entries(1, 0);
+///
+/// pub fn read_event() -> Result<u64, i64> {
+///     let event: bpf_perf_event_value = unsafe { DESCRIPTORS.read_current_cpu() }?;
+///     let value: u64 = event.counter;
+///     Ok(value)
+/// }
+/// ```
 #[repr(transparent)]
 pub struct PerfEventArray<T> {
     def: UnsafeCell<bpf_map_def>,
@@ -50,20 +78,55 @@ impl<T> PerfEventArray<T> {
         }
     }
 
-    pub fn output<C: BpfContext>(&self, ctx: &C, data: &T, flags: u32) {
-        self.output_at_index(ctx, BPF_F_CURRENT_CPU as u32, data, flags)
+    pub fn output_current_cpu<C: BpfContext>(&self, ctx: &C, data: &T) -> Result<(), i64> {
+        self.output(ctx, data, BPF_F_CURRENT_CPU)
+    }
+
+    pub fn output_at_index<C: BpfContext>(&self, ctx: &C, data: &T, index: u32) -> Result<(), i64> {
+        self.output(ctx, data, u64::from(index) & BPF_F_INDEX_MASK)
     }
 
-    pub fn output_at_index<C: BpfContext>(&self, ctx: &C, index: u32, data: &T, flags: u32) {
-        let flags = u64::from(flags) << 32 | u64::from(index);
-        unsafe {
+    fn output<C: BpfContext>(&self, ctx: &C, data: &T, flags: u64) -> Result<(), i64> {
+        let ret = unsafe {
             bpf_perf_event_output(
                 ctx.as_ptr(),
                 self.def.get() as *mut _,
                 flags,
                 data as *const _ as *mut _,
                 mem::size_of::<T>() as u64,
-            );
+            )
+        };
+        if ret == 0 {
+            Ok(())
+        } else {
+            Err(ret)
+        }
+    }
+
+    pub fn read_current_cpu(&self) -> Result<bpf_perf_event_value, i64> {
+        self.read(BPF_F_CURRENT_CPU)
+    }
+
+    pub fn read_at_index(&self, index: u32) -> Result<bpf_perf_event_value, i64> {
+        self.read(u64::from(index) & BPF_F_INDEX_MASK)
+    }
+
+    fn read(&self, flags: u64) -> Result<bpf_perf_event_value, i64> {
+        let mut buf = MaybeUninit::<bpf_perf_event_value>::uninit();
+        let ret = unsafe {
+            // According to the Linux manual (see `man bpf-helpers`), `bpf_perf_event_read_value` is recommended over `bpf_perf_event_read`.
+            bpf_perf_event_read_value(
+                self.def.get() as *mut _,
+                flags,
+                buf.as_mut_ptr(),
+                mem::size_of::<bpf_perf_event_value>() as u32,
+            )
+        };
+        if ret == 0 {
+            let value = unsafe { buf.assume_init() };
+            Ok(value)
+        } else {
+            Err(ret)
         }
     }
 }
diff --git a/test/integration-ebpf/Cargo.toml b/test/integration-ebpf/Cargo.toml
index d471acf31..9faf21dab 100644
--- a/test/integration-ebpf/Cargo.toml
+++ b/test/integration-ebpf/Cargo.toml
@@ -55,3 +55,7 @@ path = "src/xdp_sec.rs"
 [[bin]]
 name = "ring_buf"
 path = "src/ring_buf.rs"
+
+[[bin]]
+name = "perf_events"
+path = "src/perf_events.rs"
diff --git a/test/integration-ebpf/src/perf_events.rs b/test/integration-ebpf/src/perf_events.rs
new file mode 100644
index 000000000..1afd19f9e
--- /dev/null
+++ b/test/integration-ebpf/src/perf_events.rs
@@ -0,0 +1,58 @@
+#![no_std]
+#![no_main]
+
+use aya_bpf::{
+    bindings::bpf_perf_event_value,
+    helpers::bpf_get_smp_processor_id,
+    macros::{map, perf_event},
+    maps::PerfEventArray,
+    programs::PerfEventContext,
+};
+
+/// Data sent by the bpf program to userspace.
+/// This structure must be defined in the exact same way on the userspace side.
+#[repr(C)]
+struct EventData {
+    value: u64,
+    cpu_id: u32,
+    tag: u8,
+}
+
+/// Input map: file descriptors of the perf events, obtained by calling
+/// `perf_event_open` in user space.
+#[map]
+static mut DESCRIPTORS: PerfEventArray<i32> = PerfEventArray::with_max_entries(1, 0);
+
+#[map]
+static mut OUTPUT: PerfEventArray<EventData> = PerfEventArray::with_max_entries(1, 0);
+
+#[perf_event]
+pub fn on_perf_event(ctx: PerfEventContext) -> i64 {
+    match read_event(&ctx).and_then(|res| write_output(&ctx, res)) {
+        Ok(()) => 0,
+        Err(e) => e,
+    }
+}
+
+fn read_event(ctx: &PerfEventContext) -> Result<EventData, i64> {
+    // read the event value using the file descriptor in the DESCRIPTORS array
+    let event: bpf_perf_event_value = unsafe { DESCRIPTORS.read_current_cpu() }?;
+
+    let cpu_id = unsafe { bpf_get_smp_processor_id() };
+    let res = EventData {
+        value: event.counter,
+        cpu_id,
+        tag: 0xAB,
+    };
+    Ok(res)
+}
+
+fn write_output(ctx: &PerfEventContext, output: EventData) -> Result<(), i64> {
+    unsafe { OUTPUT.output_current_cpu(ctx, &output) }
+}
+
+#[cfg(not(test))]
+#[panic_handler]
+fn panic(_info: &core::panic::PanicInfo) -> ! {
+    loop {}
+}
diff --git a/test/integration-test/Cargo.toml b/test/integration-test/Cargo.toml
index 6ef158336..b40d0dac2 100644
--- a/test/integration-test/Cargo.toml
+++ b/test/integration-test/Cargo.toml
@@ -10,6 +10,7 @@ assert_matches = { workspace = true }
 aya = { workspace = true }
 aya-log = { workspace = true }
 aya-obj = { workspace = true }
+bytes = { workspace = true }
 env_logger = { workspace = true }
 epoll = { workspace = true }
 futures = { workspace = true, features = ["std"] }
diff --git a/test/integration-test/src/lib.rs b/test/integration-test/src/lib.rs
index d47080336..c01b3ab99 100644
--- a/test/integration-test/src/lib.rs
+++ b/test/integration-test/src/lib.rs
@@ -22,6 +22,7 @@ pub const BPF_PROBE_READ: &[u8] =
 pub const REDIRECT: &[u8] = include_bytes_aligned!(concat!(env!("OUT_DIR"), "/redirect"));
 pub const XDP_SEC: &[u8] = include_bytes_aligned!(concat!(env!("OUT_DIR"), "/xdp_sec"));
 pub const RING_BUF: &[u8] = include_bytes_aligned!(concat!(env!("OUT_DIR"), "/ring_buf"));
+pub const PERF_EVENTS: &[u8] = include_bytes_aligned!(concat!(env!("OUT_DIR"), "/perf_events"));
 
 #[cfg(test)]
 mod tests;
diff --git a/test/integration-test/src/tests.rs b/test/integration-test/src/tests.rs
index f37d54bbe..e25125c4c 100644
--- a/test/integration-test/src/tests.rs
+++ b/test/integration-test/src/tests.rs
@@ -3,6 +3,7 @@ mod btf_relocations;
 mod elf;
 mod load;
 mod log;
+mod perf_events;
 mod rbpf;
 mod relocations;
 mod ring_buf;
diff --git a/test/integration-test/src/tests/perf_events.rs b/test/integration-test/src/tests/perf_events.rs
new file mode 100644
index 000000000..31cd64c88
--- /dev/null
+++ b/test/integration-test/src/tests/perf_events.rs
@@ -0,0 +1,161 @@
+use std::os::fd::OwnedFd;
+use std::time::{Duration, Instant};
+
+use aya::maps::perf::Events;
+use aya::maps::{AsyncPerfEventArray, PerfEventArray};
+use aya::programs::perf_event::{perf_event_open, PerfEventLinkId, PerfEventScope};
+use aya::programs::{PerfEvent, PerfTypeId, ProgramError, SamplePolicy};
+use aya::Bpf;
+use aya_obj::generated::{perf_hw_id, perf_sw_ids, perf_type_id};
+use bytes::BytesMut;
+use test_log::test;
+
+/// Data sent by the bpf program to userspace.
+/// This structure must be defined in the exact same way on the bpf side.
+#[derive(Debug)]
+#[repr(C)]
+struct EventData {
+    value: u64,
+    cpu_id: u32,
+    tag: u8,
+}
+
+const CPU_ID: u32 = 0;
+const SAMPLING_FREQUENCY_HZ: u64 = 100;
+const BUF_PAGE_COUNT: usize = 2; // must be a power of two
+const WAIT_TIMEOUT: Duration = Duration::from_secs(1);
+
+/// Opens an hardware perf_event for testing.
+fn open_perf_event_hw() -> Result<OwnedFd, ProgramError> {
+    perf_event_open(
+        perf_type_id::PERF_TYPE_HARDWARE as u32,
+        perf_hw_id::PERF_COUNT_HW_CPU_CYCLES as u64,
+        PerfEventScope::AllProcessesOneCpu { cpu: CPU_ID },
+        None,
+        None,
+        0,
+    )
+}
+
+/// Attaches a PerfEvent bpf program to a software clock event.
+fn attach_bpf_to_clock(program: &mut PerfEvent) -> Result<PerfEventLinkId, ProgramError> {
+    program.attach(
+        PerfTypeId::Software,
+        perf_sw_ids::PERF_COUNT_SW_CPU_CLOCK as u64,
+        PerfEventScope::AllProcessesOneCpu { cpu: CPU_ID },
+        SamplePolicy::Period(SAMPLING_FREQUENCY_HZ),
+    )
+}
+
+#[test]
+fn perf_event_read_from_kernel() {
+    // load bpf program
+    let mut bpf = Bpf::load(crate::PERF_EVENTS).expect("failed to load bpf code");
+    let mut descriptors = PerfEventArray::try_from(bpf.take_map("DESCRIPTORS").unwrap()).unwrap();
+    let mut bpf_output = PerfEventArray::try_from(bpf.take_map("OUTPUT").unwrap()).unwrap();
+
+    // open a perf_event
+    let event_fd = open_perf_event_hw().unwrap();
+
+    // pass pointer to bpf array
+    descriptors
+        .set(0, &event_fd)
+        .expect("failed to put event's fd into the map");
+
+    // load program
+    let program: &mut PerfEvent = bpf
+        .program_mut("on_perf_event")
+        .unwrap()
+        .try_into()
+        .unwrap();
+
+    program
+        .load()
+        .expect("the bpf program should load properly");
+
+    // get buffer to poll the events
+    let mut buf = bpf_output
+        .open(CPU_ID, Some(BUF_PAGE_COUNT))
+        .expect("failed to open output buffer to poll events");
+
+    // attach program
+    attach_bpf_to_clock(program).expect("the bpf program should attach properly");
+
+    // wait for the values to be added to the buffer
+    let t0 = Instant::now();
+    while !buf.readable() {
+        std::thread::sleep(Duration::from_millis(1000/SAMPLING_FREQUENCY_HZ));
+        assert!(
+            t0.elapsed() < WAIT_TIMEOUT,
+            "timeout elapsed: no data in the buffer"
+        );
+    }
+
+    // read the events and check that the returned data is correct
+    let mut events_data: [BytesMut; BUF_PAGE_COUNT] = std::array::from_fn(|_| BytesMut::new());
+    let Events { read, lost } = buf
+        .read_events(&mut events_data)
+        .expect("failed to poll events");
+
+    for data_buf in events_data.iter_mut().take(read) {
+        let ptr = data_buf.as_ptr() as *const EventData;
+        let data @ EventData { cpu_id, tag, value } = unsafe { ptr.read_unaligned() };
+        assert!(value > 0, "unexpected data: {:?}", value);
+        assert_eq!(cpu_id, CPU_ID, "unexpected data: {:?}", data);
+        assert_eq!(tag, 0xAB, "unexpected data: {:?}", data);
+    }
+    assert_eq!(lost, 0, "lost {} events", lost);
+}
+
+#[test(tokio::test)]
+async fn perf_event_read_from_kernel_async() {
+    // load bpf program
+    let mut bpf = Bpf::load(crate::PERF_EVENTS).expect("failed to load bpf code");
+    let mut descriptors =
+        AsyncPerfEventArray::try_from(bpf.take_map("DESCRIPTORS").unwrap()).unwrap();
+    let mut bpf_output = AsyncPerfEventArray::try_from(bpf.take_map("OUTPUT").unwrap()).unwrap();
+
+    // open a perf_event
+    let event_fd = open_perf_event_hw().unwrap();
+
+    // pass pointer to bpf array
+    descriptors
+        .set(0, &event_fd)
+        .expect("failed to put event's fd into the map");
+
+    // load program
+    let program: &mut PerfEvent = bpf
+        .program_mut("on_perf_event")
+        .unwrap()
+        .try_into()
+        .unwrap();
+
+    program
+        .load()
+        .expect("the bpf program should load properly");
+
+    // get buffer to poll the events
+    let mut buf = bpf_output
+        .open(CPU_ID, Some(BUF_PAGE_COUNT))
+        .expect("failed to open output buffer to poll events");
+
+    // attach program
+    attach_bpf_to_clock(program).expect("the bpf program should attach properly");
+
+    // read the events as soon as they are available
+    let mut events_data: [BytesMut; BUF_PAGE_COUNT] = std::array::from_fn(|_| BytesMut::new());
+    let Events { read, lost } = tokio::time::timeout(WAIT_TIMEOUT, buf.read_events(&mut events_data))
+        .await
+        .expect("timeout elapsed: no data in the buffer")
+        .expect("failed to poll events");
+
+    // check that the returned data is correct
+    for data_buf in events_data.iter_mut().take(read) {
+        let ptr = data_buf.as_ptr() as *const EventData;
+        let data @ EventData { cpu_id, tag, value } = unsafe { ptr.read_unaligned() };
+        assert!(value > 0, "unexpected data: {:?}", value);
+        assert_eq!(cpu_id, CPU_ID, "unexpected data: {:?}", data);
+        assert_eq!(tag, 0xAB, "unexpected data: {:?}", data);
+    }
+    assert_eq!(lost, 0, "lost {} events", lost)
+}