diff --git a/examples/hpc/Cargo.toml b/examples/hpc/Cargo.toml index e8f8d039..af4a86aa 100644 --- a/examples/hpc/Cargo.toml +++ b/examples/hpc/Cargo.toml @@ -1,3 +1,3 @@ [workspace] -members = ["hello-dla", "hello-hpc"] +members = ["hello-dla", "hello-hpc", "dla-driver"] resolver = "2" diff --git a/examples/hpc/dla-driver/.cargo/config.toml b/examples/hpc/dla-driver/.cargo/config.toml index 9e79aa2d..d9b3f2f0 100644 --- a/examples/hpc/dla-driver/.cargo/config.toml +++ b/examples/hpc/dla-driver/.cargo/config.toml @@ -1,6 +1,6 @@ [target.riscv64imac-unknown-none-elf] runner = "../../../scripts/run_on_hpc.sh" -rustflags = ["-C", "link-arg=-Tmem_hpc.x", "-C", "link-arg=-Tlink.x"] +rustflags = ["-C", "link-arg=-Tsdram_hpc.x", "-C", "link-arg=-Tlink.x"] [build] target = "riscv64imac-unknown-none-elf" diff --git a/examples/hpc/dla-driver/Cargo.toml b/examples/hpc/dla-driver/Cargo.toml index 46b18a49..54c2710f 100644 --- a/examples/hpc/dla-driver/Cargo.toml +++ b/examples/hpc/dla-driver/Cargo.toml @@ -4,9 +4,16 @@ version = "0.1.0" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[features] +vp = [] [dependencies] panic-halt = "0.2.0" headsail-bsp = { version = "0.1.0", path = "../../headsail-bsp", features = [ "hpc-rt", + "alloc", + "sdram", ] } + + +rand = { version = "0.8.3", features = ["small_rng"], default-features = false } diff --git a/examples/hpc/dla-driver/examples/dla.rs b/examples/hpc/dla-driver/examples/dla.rs index 375c63bd..7f592552 100644 --- a/examples/hpc/dla-driver/examples/dla.rs +++ b/examples/hpc/dla-driver/examples/dla.rs @@ -1,15 +1,15 @@ #![no_std] #![no_main] -use headsail_bsp::{rt::entry, sprintln}; use dla_driver::*; +use headsail_bsp::{rt::entry, sprintln}; use panic_halt as _; #[entry] fn main() -> ! { sprintln!("Hello world!"); - dla_write_str("Hello DLA"); - dla_init(); + let mut dla = Dla::new(); + dla.init_layer(); sprintln!("Dla initalized"); loop {} } diff --git a/examples/hpc/dla-driver/examples/mac_benchmark.rs b/examples/hpc/dla-driver/examples/mac_benchmark.rs new file mode 100644 index 00000000..6a835cc5 --- /dev/null +++ b/examples/hpc/dla-driver/examples/mac_benchmark.rs @@ -0,0 +1,111 @@ +#![no_std] +#![no_main] + +extern crate alloc; + +use dla_driver::*; +use headsail_bsp::{init_alloc, rt::entry, sprint, sprintln}; +use panic_halt as _; + +use rand::rngs::SmallRng; +use rand::RngCore; +use rand::SeedableRng; + +use alloc::vec::Vec; + +fn calculate_conv2d_out_param_dim( + input: (usize, usize), + kernel: (usize, usize), + padding: (usize, usize), + dilation: (usize, usize), + stride: (usize, usize), +) -> (usize, usize) { + let output_width = (input.0 + 2 * padding.0 - dilation.0 * (kernel.0 - 1) - 1) / stride.0 + 1; + let output_height = (input.1 + 2 * padding.1 - dilation.1 * (kernel.1 - 1) - 1) / stride.1 + 1; + (output_width, output_height) +} + +fn generate_random_array(buffer: &mut [u8], size: usize) { + let mut rng = SmallRng::seed_from_u64(1234567890); + for i in 0..size { + buffer[i] = rng.next_u64() as u8; + } +} + +fn generate_random_matrix(height: usize, width: usize, seed: u64) -> Vec { + let mut res: Vec = Vec::new(); + let mut rng = SmallRng::seed_from_u64(seed); + for _ in 0..(height * width) { + res.push((rng.next_u64() & 0xFF) as u8); + } + res +} + +fn generate_random_matrix_small(height: usize, width: usize, seed: u64) -> Vec { + let mut res: Vec = Vec::new(); + let mut rng = SmallRng::seed_from_u64(seed); + for _ in 0..(height * width) { + res.push((rng.next_u64() & 0x1) as u8); + } + res +} + +fn run_random_layer( + dla: &mut Dla, + input_width: usize, + input_height: usize, + kernel_width: usize, + kernel_height: usize, + seed: u64, +) -> Vec { + // Generate input and kernel + dla.init_layer(); + + let mut input = generate_random_matrix(input_width, input_height, seed); + let mut kernel = generate_random_matrix_small(kernel_width, kernel_height, seed * 2); + + dla.set_kernel_size(1, kernel_width, kernel_height); + dla.set_input_size(1, input_width, input_height); + + dla.write_input(&mut input); + dla.write_kernel(&mut kernel); + + // Calculate output size + let (output_width, output_height) = calculate_conv2d_out_param_dim( + (input_width, input_height), + (kernel_width, kernel_height), + (0, 0), + (1, 1), + (1, 1), + ); + + dla.kernel_data_ready(true); + dla.input_data_ready(true); + + // Print the matrix + sprintln!("Waiting for calculation"); + while !dla.handle_handshake() {} + sprintln!("Calculation ready"); + let output: Vec = dla.read_output(output_width * output_height); + output +} + +#[entry] +fn main() -> ! { + init_alloc(); + + let mut dla = Dla::new(); + sprintln!("Starting benchmark.."); + + dla.set_mac_clip(8); + dla.set_pp_clip(8); + + for x in 0..2 { + let res = run_random_layer(&mut dla, 8, 8, 2, 2, x * x); + for x in res { + sprint!("{:?} ", x); + } + sprint!("\n\n"); + } + loop {} +} diff --git a/examples/hpc/dla-driver/src/lib.rs b/examples/hpc/dla-driver/src/lib.rs index 9c5e5eab..7fbef378 100644 --- a/examples/hpc/dla-driver/src/lib.rs +++ b/examples/hpc/dla-driver/src/lib.rs @@ -1,11 +1,100 @@ #![no_std] -mod mmap; +extern crate alloc; +mod mmap; +pub use mmap::{ + DLA0_ADDR, MEMORY_BANK_0_OFFSET, MEMORY_BANK_10_OFFSET, MEMORY_BANK_11_OFFSET, + MEMORY_BANK_12_OFFSET, MEMORY_BANK_13_OFFSET, MEMORY_BANK_14_OFFSET, MEMORY_BANK_15_OFFSET, + MEMORY_BANK_1_OFFSET, MEMORY_BANK_2_OFFSET, MEMORY_BANK_3_OFFSET, MEMORY_BANK_4_OFFSET, + MEMORY_BANK_5_OFFSET, MEMORY_BANK_6_OFFSET, MEMORY_BANK_7_OFFSET, MEMORY_BANK_8_OFFSET, + MEMORY_BANK_9_OFFSET, MEMORY_BANK_BASE_ADDR, +}; + +use alloc::vec::Vec; use core::ptr; -use headsail_bsp::sprint; +use headsail_bsp::{sprint, sprintln}; use mmap::*; +pub struct Dla { +} + +#[derive(Clone, Copy)] +#[rustfmt::skip] +pub enum MemoryBank { + Bank0, Bank1, Bank2, Bank3, Bank4, Bank5, Bank6, Bank7, Bank8, Bank9, + Bank10, Bank11, Bank12, Bank13, Bank14, Bank15, +} + +impl MemoryBank { + + fn from_u32(value: u32) -> MemoryBank { + match value { + 0 => MemoryBank::Bank0, + 1 => MemoryBank::Bank1, + 2 => MemoryBank::Bank2, + 3 => MemoryBank::Bank3, + 4 => MemoryBank::Bank4, + 5 => MemoryBank::Bank5, + 6 => MemoryBank::Bank6, + 7 => MemoryBank::Bank7, + 8=> MemoryBank::Bank8, + 9 => MemoryBank::Bank9 , + 10 => MemoryBank::Bank10, + 11 => MemoryBank::Bank11, + 12 => MemoryBank::Bank12, + 13 => MemoryBank::Bank13, + 14 => MemoryBank::Bank14, + 15 => MemoryBank::Bank15, + _ => MemoryBank::Bank0, + } + } + + fn addr(&self) -> usize { + match self { + MemoryBank::Bank0 => MEMORY_BANK_0_OFFSET, + MemoryBank::Bank1 => MEMORY_BANK_1_OFFSET, + MemoryBank::Bank2 => MEMORY_BANK_2_OFFSET, + MemoryBank::Bank3 => MEMORY_BANK_3_OFFSET, + MemoryBank::Bank4 => MEMORY_BANK_4_OFFSET, + MemoryBank::Bank5 => MEMORY_BANK_5_OFFSET, + MemoryBank::Bank6 => MEMORY_BANK_6_OFFSET, + MemoryBank::Bank7 => MEMORY_BANK_7_OFFSET, + MemoryBank::Bank8 => MEMORY_BANK_8_OFFSET, + MemoryBank::Bank9 => MEMORY_BANK_9_OFFSET, + MemoryBank::Bank10 => MEMORY_BANK_10_OFFSET, + MemoryBank::Bank11 => MEMORY_BANK_11_OFFSET, + MemoryBank::Bank12 => MEMORY_BANK_12_OFFSET, + MemoryBank::Bank13 => MEMORY_BANK_13_OFFSET, + MemoryBank::Bank14 => MEMORY_BANK_14_OFFSET, + MemoryBank::Bank15 => MEMORY_BANK_15_OFFSET, + _ => 0, + } + } + fn value(&self) -> usize { + match self { + MemoryBank::Bank0 => 0, + MemoryBank::Bank1 => 1, + MemoryBank::Bank2 => 2, + MemoryBank::Bank3 => 3, + MemoryBank::Bank4 => 4, + MemoryBank::Bank5 => 5, + MemoryBank::Bank6 => 6, + MemoryBank::Bank7 => 7, + MemoryBank::Bank8 => 8, + MemoryBank::Bank9 => 9, + MemoryBank::Bank10 => 10, + MemoryBank::Bank11 => 11, + MemoryBank::Bank12 => 12, + MemoryBank::Bank13 => 13, + MemoryBank::Bank14 => 14, + MemoryBank::Bank15 => 15, + _ => 0, + } + } +} + +#[derive(Copy, Clone)] pub enum SimdBitMode { EightBits = 0, FourBits = 1, @@ -18,291 +107,496 @@ macro_rules! set_bits { }; } -pub fn dla_write_str(s: &str) { - for b in s.as_bytes() { - unsafe { ptr::write_volatile(DLA0_ADDR as *mut u8, *b) }; - } -} -pub fn dla_write(offset: usize, value: u8) { - unsafe { ptr::write_volatile((offset) as *mut u8, value) }; +macro_rules! get_bits { + ($reg:expr, $mask:expr) => { + ($reg & ($mask as u32)) as u32 + }; } -pub fn dla_write_reg(offset: usize, value: u32) { - unsafe { ptr::write_volatile((DLA0_ADDR + offset) as *mut u32, value) } -} +impl Dla { + pub fn new() -> Self { + Dla { + } + } + pub fn write_u8(&self, offset: usize, value: u8) { + unsafe { ptr::write_volatile((offset) as *mut u8, value) }; + } -pub fn dla_read_reg(offset: usize) -> u32 { - unsafe { ptr::read_volatile((DLA0_ADDR + offset) as *mut u32) } -} + pub fn write_u32(&self, offset: usize, value: u32) { + unsafe { ptr::write_volatile((DLA0_ADDR + offset) as *mut u32, value) } + } -pub fn dla_read(buf: &mut [u8], len: usize, offset: usize) { - for i in 0..len { - unsafe { buf[i] = ptr::read_volatile((DLA0_ADDR + offset + i) as *mut u8) } + pub fn read_u32(&self, offset: usize) -> u32 { + unsafe { ptr::read_volatile((DLA0_ADDR + offset) as *mut u32) } } -} -pub fn dla_write_data_bank(offset: usize, buf: &mut [u8]) { - sprint!("\nWrite to bank {:#x}, data: {:?}", offset, buf); - for (i, b) in buf.iter().enumerate() { - unsafe { ptr::write_volatile((MEMORY_BANK_BASE_ADDR + offset + i) as *mut u8, *b) }; + pub fn read_bytes(&self, offset: usize, len: usize, buf: &mut [u8]) { + for i in 0..len { + unsafe { buf[i] = ptr::read_volatile((DLA0_ADDR + offset + i) as *mut u8) } + } } -} -pub fn dla_set_input_data_bank(bank: usize) { - let mut reg = dla_read_reg(DLA_BUF_DATA_BANK); - reg = set_bits!( - DLA_BUF_DATA_BANK_B_OFFSET, - DLA_BUF_DATA_BANK_B_BITMASK, - reg, - bank - ); - dla_write_reg(DLA_BUF_DATA_BANK, reg); -} + pub fn write_data_bank(&self, offset: usize, buf: &mut [u8]) { + //sprintln!("\nWrite to bank {:#x}, data: {:?}", offset, buf); + for (i, b) in buf.iter().enumerate() { + unsafe { ptr::write_volatile((MEMORY_BANK_BASE_ADDR + offset + i) as *mut u8, *b) }; + } + } -pub fn dla_set_kernel_data_bank(bank: usize) { - let mut reg = dla_read_reg(DLA_BUF_DATA_BANK); - reg = set_bits!( - DLA_BUF_DATA_BANK_A_OFFSET, - DLA_BUF_DATA_BANK_A_BITMASK, - reg, - bank - ); - dla_write_reg(DLA_BUF_DATA_BANK, reg); -} + fn read_data_bank_offset(&self, bank: &MemoryBank, offset: usize) -> u128 { + // NOTE: this function enforces the 128-bit addressing + if cfg!(feature = "vp") { + let mut result: u128 = 0; + for i in 0..4 { + result |= (unsafe { + ptr::read_volatile( + (MEMORY_BANK_BASE_ADDR + bank.addr() + offset + (i * 4)) as *mut u32, + ) + } as u128) + << (32 * i) + } + result + } else { + unsafe { + ptr::read_volatile( + (MEMORY_BANK_BASE_ADDR + bank.addr() + offset ) as *mut u128, + ) + } + } + } -pub fn dla_set_input_size(channels: usize, width: usize, height: usize) { - let mut reg = 0; - reg = set_bits!( - DLA_BUF_INPUT_CHANNELS_OFFSET, - DLA_BUF_INPUT_CHANNELS_BITMASK, - reg, - channels - 1 - ); - reg = set_bits!( - DLA_BUF_INPUT_WIDTH_OFFSET, - DLA_BUF_INPUT_WIDTH_BITMASK, - reg, - width - 1 - ); - reg = set_bits!( - DLA_BUF_INPUT_HEIGHT_OFFSET, - DLA_BUF_INPUT_HEIGHT_BITMASK, - reg, - height - 1 - ); - dla_write_reg(DLA_BUF_INPUT, reg); -} + fn read_data_bank(&self, bank: &MemoryBank, len: usize) -> Vec { + let mut res: Vec = Vec::new(); + + let mut next_bank_offset = 0; + while res.len() < len { + let data = self.read_data_bank_offset(bank, next_bank_offset); + let bytes_remaining = len - res.len(); + let bytes_to_copy = core::cmp::min(16, bytes_remaining); + + // Copy everything from one 128-bit address + for i in 0..bytes_to_copy { + let byte = ((data >> (i * 8)) & 0xFF) as u8; + res.push(byte) + } + next_bank_offset = next_bank_offset + 0x10; + } + res + } -pub fn dla_set_kernel_size(channels: usize, width: usize, height: usize) { - let mut reg = 0; - reg = set_bits!( - DLA_BUF_KERNEL_0_S_CHANNELS_OFFSET, - DLA_BUF_KERNEL_0_S_CHANNELS_BITMASK, - reg, - channels - 1 - ); - reg = set_bits!( - DLA_BUF_KERNEL_0_WIDTH_OFFSET, - DLA_BUF_KERNEL_0_WIDTH_BITMASK, - reg, - width - 1 - ); - reg = set_bits!( - DLA_BUF_KERNEL_0_HEIGHT_OFFSET, - DLA_BUF_KERNEL_0_HEIGHT_BITMASK, - reg, - height - 1 - ); - dla_write_reg(DLA_BUF_KERNEL_0, reg); -} + pub fn read_output(&self, len: usize) -> Vec { + // VP only support reading from banks + if cfg!(feature = "vp") { + return self.read_data_bank(&self.get_output_bank(), len); + } + self.read_data_bank(&MemoryBank::Bank0, len) + } -pub fn dla_input_data_ready(ready: bool) { - let mut reg = dla_read_reg(DLA_BUF_CTRL); - reg = set_bits!( - DLA_READ_B_VALID_OFFSET, - DLA_READ_B_VALID_BITMASK, - reg, - ready as usize - ); - dla_write_reg(DLA_BUF_CTRL, reg); -} + pub fn read_input_bank(&self, len: usize) -> Vec { + self.read_data_bank(&self.get_input_bank(), len) + } -pub fn dla_kernel_data_ready(ready: bool) { - let mut reg = dla_read_reg(DLA_BUF_CTRL); - reg = set_bits!( - DLA_READ_A_VALID_OFFSET, - DLA_READ_A_VALID_BITMASK, - reg, - ready as usize - ); - dla_write_reg(DLA_BUF_CTRL, reg); -} + pub fn read_weight_bank(&self, len: usize) -> Vec { + self.read_data_bank(&self.get_kernel_bank(), len) + } -pub fn dla_enable_relu(enable: bool) { - let mut reg = dla_read_reg(DLA_PP_CTRL); - // Relu is active low - reg = set_bits!( - DLA_ACTIVE_MODE_OFFSET, - DLA_ACTIVE_MODE_BITMASK, - reg, - (!enable) as usize - ); - dla_write_reg(DLA_PP_CTRL, reg); -} + pub fn write_input(&self, input: &mut [u8]) { + // TODO optimize memory bank logic + self.write_data_bank(self.get_input_bank().addr(), input) + } -pub fn dla_enable_bias(enable: bool) { - let mut reg = dla_read_reg(DLA_PP_CTRL); - reg = set_bits!( - DLA_PP_SELECT_OFFSET, - DLA_PP_SELECT_BITMASK, - reg, - enable as usize - ); - dla_write_reg(DLA_PP_CTRL, reg); -} + pub fn write_kernel(&self, kernel: &mut [u8]) { + // TODO optimize memory bank logic + self.write_data_bank(self.get_kernel_bank().addr(), kernel) + } -pub fn dla_set_input_padding(top: usize, right: usize, bottom: usize, left: usize, value: usize) { - let mut reg = 0; - reg = set_bits!(DLA_BUF_PAD_TOP_OFFSET, DLA_BUF_PAD_TOP_BITMASK, reg, top); - reg = set_bits!( - DLA_BUF_PAD_RIGHT_OFFSET, - DLA_BUF_PAD_RIGHT_BITMASK, - reg, - right - ); - reg = set_bits!( - DLA_BUF_PAD_BOTTOM_OFFSET, - DLA_BUF_PAD_BOTTOM_BITMASK, - reg, - bottom - ); - reg = set_bits!(DLA_BUF_PAD_LEFT_OFFSET, DLA_BUF_PAD_LEFT_BITMASK, reg, left); - reg = set_bits!( - DLA_BUF_PAD_VALUE_OFFSET, - DLA_BUF_PAD_VALUE_BITMASK, - reg, - value - ); - dla_write_reg(DLA_BUF_PAD, reg); -} + pub fn set_input_data_bank(&mut self, bank: MemoryBank) { + let mut reg = self.read_u32(DLA_BUF_DATA_BANK); + reg = set_bits!( + DLA_BUF_DATA_BANK_B_OFFSET, + DLA_BUF_DATA_BANK_B_BITMASK, + reg, + bank.value() + ); + self.write_u32(DLA_BUF_DATA_BANK, reg); + } -pub fn dla_set_stride(x: usize, y: usize) { - let mut reg = 0; - reg = set_bits!( - DLA_BUF_STRIDE_X_OFFSET, - DLA_BUF_STRIDE_X_BITMASK, - reg, - x - 1 - ); - reg = set_bits!( - DLA_BUF_STRIDE_Y_OFFSET, - DLA_BUF_STRIDE_Y_BITMASK, - reg, - y - 1 - ); - dla_write_reg(DLA_BUF_STRIDE, reg); -} + pub fn set_kernel_data_bank(&mut self, bank: MemoryBank) { + let mut reg = self.read_u32(DLA_BUF_DATA_BANK); + reg = set_bits!( + DLA_BUF_DATA_BANK_A_OFFSET, + DLA_BUF_DATA_BANK_A_BITMASK, + reg, + bank.value() + ); + self.write_u32(DLA_BUF_DATA_BANK, reg); + } -pub fn dla_set_bias_address(addr: usize) { - let reg = set_bits!( - DLA_PP_AXI_READ_ADDRESS_OFFSET, - DLA_PP_AXI_READ_ADDRESS_BITMASK, - 0, - addr - ); - dla_write_reg(DLA_PP_AXI_READ, reg); -} + pub fn set_output_bank(&mut self, bank: MemoryBank) { + let mut reg = self.read_u32(DLA_PP_AXI_WRITE); + reg = set_bits!( + DLA_PP_AXI_WRITE_ADDRESS_OFFSET, + DLA_PP_AXI_WRITE_ADDRESS_BITMASK, + reg, + bank.addr() + MEMORY_BANK_BASE_ADDR + ); + self.write_u32(DLA_PP_AXI_WRITE, reg); + } -pub fn dla_get_status() -> u32 { - return dla_read_reg(DLA_STATUS_ADDR); -} + pub fn set_input_size(&self, channels: usize, width: usize, height: usize) { + let mut reg = 0; + reg = set_bits!( + DLA_BUF_INPUT_CHANNELS_OFFSET, + DLA_BUF_INPUT_CHANNELS_BITMASK, + reg, + channels - 1 + ); + reg = set_bits!( + DLA_BUF_INPUT_WIDTH_OFFSET, + DLA_BUF_INPUT_WIDTH_BITMASK, + reg, + width - 1 + ); + reg = set_bits!( + DLA_BUF_INPUT_HEIGHT_OFFSET, + DLA_BUF_INPUT_HEIGHT_BITMASK, + reg, + height - 1 + ); + self.write_u32(DLA_BUF_INPUT, reg); + } -pub fn dla_set_simd_select(mode: SimdBitMode) { - let mut reg = dla_read_reg(DLA_MAC_CTRL); - reg = set_bits!( - DLA_SIMD_SELECT_OFFSET, - DLA_SIMD_SELECT_BITMASK, - reg, - mode as usize - ); - dla_write_reg(DLA_MAC_CTRL, reg) -} + pub fn set_kernel_size(&self, channels: usize, width: usize, height: usize) { + let mut reg = 0; + reg = set_bits!( + DLA_BUF_KERNEL_0_S_CHANNELS_OFFSET, + DLA_BUF_KERNEL_0_S_CHANNELS_BITMASK, + reg, + channels - 1 + ); + reg = set_bits!( + DLA_BUF_KERNEL_0_WIDTH_OFFSET, + DLA_BUF_KERNEL_0_WIDTH_BITMASK, + reg, + width - 1 + ); + reg = set_bits!( + DLA_BUF_KERNEL_0_HEIGHT_OFFSET, + DLA_BUF_KERNEL_0_HEIGHT_BITMASK, + reg, + height - 1 + ); + self.write_u32(DLA_BUF_KERNEL_0, reg); + } -pub fn dla_set_mac_clip(clip_amount: usize) { - let mut reg = dla_read_reg(DLA_MAC_CTRL); - // Cap clipping amount - if clip_amount > 21 { - reg = set_bits!(DLA_MAC_CLIP_OFFSET, DLA_MAC_CLIP_BITMASK, reg, 0x1F); - } else { - reg = set_bits!(DLA_MAC_CLIP_OFFSET, DLA_MAC_CLIP_BITMASK, reg, clip_amount); + pub fn input_data_ready(&self, ready: bool) { + let mut reg = self.read_u32(DLA_BUF_CTRL); + reg = set_bits!( + DLA_READ_B_VALID_OFFSET, + DLA_READ_B_VALID_BITMASK, + reg, + ready as usize + ); + self.write_u32(DLA_BUF_CTRL, reg); } - dla_write_reg(DLA_MAC_CTRL, reg) -} -pub fn dla_set_pp_clip(clip_amount: usize) { - let mut reg = dla_read_reg(DLA_PP_CTRL); - // Cap clipping amount - if clip_amount > 0x1F { - reg = set_bits!(DLA_PP_CLIP_OFFSET, DLA_PP_CLIP_BITMASK, reg, 0x1F); - } else { - reg = set_bits!(DLA_PP_CLIP_OFFSET, DLA_PP_CLIP_BITMASK, reg, clip_amount); + pub fn kernel_data_ready(&self, ready: bool) { + let mut reg = self.read_u32(DLA_BUF_CTRL); + reg = set_bits!( + DLA_READ_A_VALID_OFFSET, + DLA_READ_A_VALID_BITMASK, + reg, + ready as usize + ); + self.write_u32(DLA_BUF_CTRL, reg); } - dla_write_reg(DLA_PP_CTRL, reg) -} -pub fn dla_set_pp_rounding(enable: bool) { - let mut reg = dla_read_reg(DLA_PP_CTRL); - reg = set_bits!( - DLA_ROUNDING_OFFSET, - DLA_ROUNDING_BITMASK, - reg, - enable as usize - ); - dla_write_reg(DLA_PP_CTRL, reg); -} + pub fn enable_pp(&self, enable: bool) { + let mut reg = self.read_u32(DLA_HANDSHAKE); + reg = set_bits!( + DLA_HANDSHAKE_BYPASS_ENABLE_OFFSET, + DLA_HANDSHAKE_BYPASS_ENABLE_BITMASK, + reg, + enable as usize + ); + self.write_u32(DLA_HANDSHAKE, reg); + } -pub fn dla_set_bias_addr(addr: u32) { - dla_write_reg(DLA_PP_AXI_READ, addr); -} + pub fn enable_relu(&self, enable: bool) { + let mut reg = self.read_u32(DLA_HANDSHAKE); + reg = set_bits!( + DLA_HANDSHAKE_ACTIVE_ENABLE_OFFSET, + DLA_HANDSHAKE_ACTIVE_ENABLE_BITMASK, + reg, + enable as usize + ); + self.write_u32(DLA_HANDSHAKE, reg); + } + + pub fn enable_bias(&self, enable: bool) { + let mut reg = self.read_u32(DLA_HANDSHAKE); + reg = set_bits!( + DLA_HANDSHAKE_BIAS_ENABLE_OFFSET, + DLA_HANDSHAKE_BIAS_ENABLE_BITMASK, + reg, + enable as usize + ); + self.write_u32(DLA_HANDSHAKE, reg); + } + + pub fn set_input_padding( + &self, + top: usize, + right: usize, + bottom: usize, + left: usize, + value: usize, + ) { + let mut reg = 0; + reg = set_bits!(DLA_BUF_PAD_TOP_OFFSET, DLA_BUF_PAD_TOP_BITMASK, reg, top); + reg = set_bits!( + DLA_BUF_PAD_RIGHT_OFFSET, + DLA_BUF_PAD_RIGHT_BITMASK, + reg, + right + ); + reg = set_bits!( + DLA_BUF_PAD_BOTTOM_OFFSET, + DLA_BUF_PAD_BOTTOM_BITMASK, + reg, + bottom + ); + reg = set_bits!(DLA_BUF_PAD_LEFT_OFFSET, DLA_BUF_PAD_LEFT_BITMASK, reg, left); + reg = set_bits!( + DLA_BUF_PAD_VALUE_OFFSET, + DLA_BUF_PAD_VALUE_BITMASK, + reg, + value + ); + self.write_u32(DLA_BUF_PAD, reg); + } -pub fn dla_init() { - let mut reg = dla_read_reg(DLA_HANDSHAKE); - reg = set_bits!( - DLA_HANDSHAKE_BUFFER_ENABLE_OFFSET, - DLA_HANDSHAKE_BUFFER_ENABLE_BITMASK, - reg, - 1 - ); - reg = set_bits!( - DLA_HANDSHAKE_MAC_ENABLE_OFFSET, - DLA_HANDSHAKE_MAC_ENABLE_BITMASK, - reg, - 1 - ); - reg = set_bits!( - DLA_HANDSHAKE_BYPASS_ENABLE_OFFSET, - DLA_HANDSHAKE_BYPASS_ENABLE_BITMASK, - reg, - 1 - ); - dla_write_reg(DLA_HANDSHAKE, reg); - - dla_set_kernel_size(1, 3, 3); - dla_set_input_size(1, 5, 5); - - dla_set_input_data_bank(0); - dla_set_kernel_data_bank(8); - - dla_enable_relu(true); - dla_enable_bias(true); - - let mut A = [ - 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, - ]; - let mut B = [0, 1, 0, 1, 1, 1, 0, 1, 0]; - dla_write_data_bank(MEMORY_BANK_0_OFFSET, &mut A); - dla_write_data_bank(MEMORY_BANK_8_OFFSET, &mut B); - - dla_kernel_data_ready(true); - dla_input_data_ready(true); + pub fn set_stride(&self, x: usize, y: usize) { + let mut reg = 0; + reg = set_bits!( + DLA_BUF_STRIDE_X_OFFSET, + DLA_BUF_STRIDE_X_BITMASK, + reg, + x - 1 + ); + reg = set_bits!( + DLA_BUF_STRIDE_Y_OFFSET, + DLA_BUF_STRIDE_Y_BITMASK, + reg, + y - 1 + ); + self.write_u32(DLA_BUF_STRIDE, reg); + } + + pub fn set_bias_address(&self, addr: usize) { + let reg = set_bits!( + DLA_PP_AXI_READ_ADDRESS_OFFSET, + DLA_PP_AXI_READ_ADDRESS_BITMASK, + 0, + addr + ); + self.write_u32(DLA_PP_AXI_READ, reg); + } + + pub fn get_status(&self) -> u32 { + return self.read_u32(DLA_STATUS_ADDR); + } + + pub fn set_simd_select(&mut self, mode: SimdBitMode) { + let mut reg = self.read_u32(DLA_MAC_CTRL); + reg = set_bits!( + DLA_SIMD_SELECT_OFFSET, + DLA_SIMD_SELECT_BITMASK, + reg, + mode as usize + ); + self.write_u32(DLA_MAC_CTRL, reg) + } + + pub fn get_simd_mode(&self) -> SimdBitMode { + let mut reg = self.read_u32(DLA_MAC_CTRL); + reg = get_bits!(reg, DLA_SIMD_SELECT_BITMASK); + match reg { + 0 => SimdBitMode::EightBits, + 1 => SimdBitMode::FourBits, + 2 => SimdBitMode::TwoBits, + _ => SimdBitMode::EightBits, + } + } + + pub fn get_input_bank(&self) -> MemoryBank { + let mut reg = self.read_u32(DLA_BUF_DATA_BANK); + reg = get_bits!(reg, DLA_BUF_DATA_BANK_B_BITMASK); + MemoryBank::from_u32(reg) + } + + pub fn get_kernel_bank(&self) -> MemoryBank { + let mut reg = self.read_u32(DLA_BUF_DATA_BANK); + reg = get_bits!(reg, DLA_BUF_DATA_BANK_A_BITMASK); + MemoryBank::from_u32(reg) + } + + pub fn get_output_bank(&self) -> MemoryBank { + let reg = self.read_u32(DLA_PP_AXI_WRITE); + let bank_idx : u32 = (reg - MEMORY_BANK_BASE_ADDR as u32) / MEMORY_BANK_SIZE as u32; + MemoryBank::from_u32(bank_idx) + } + + pub fn set_mac_clip(&self, clip_amount: u32) { + let mut reg = self.read_u32(DLA_MAC_CTRL); + // Cap clipping amount + if clip_amount > 21 { + reg = set_bits!(DLA_MAC_CLIP_OFFSET, DLA_MAC_CLIP_BITMASK, reg, 0x1F); + } else { + reg = set_bits!(DLA_MAC_CLIP_OFFSET, DLA_MAC_CLIP_BITMASK, reg, clip_amount); + } + self.write_u32(DLA_MAC_CTRL, reg) + } + + pub fn set_pp_clip(&self, clip_amount: usize) { + let mut reg = self.read_u32(DLA_PP_CTRL); + // Cap clipping amount + if clip_amount > 0x1F { + reg = set_bits!(DLA_PP_CLIP_OFFSET, DLA_PP_CLIP_BITMASK, reg, 0x1F); + } else { + reg = set_bits!(DLA_PP_CLIP_OFFSET, DLA_PP_CLIP_BITMASK, reg, clip_amount); + } + self.write_u32(DLA_PP_CTRL, reg) + } + + pub fn set_pp_rounding(&self, enable: bool) { + let mut reg = self.read_u32(DLA_PP_CTRL); + reg = set_bits!( + DLA_ROUNDING_OFFSET, + DLA_ROUNDING_BITMASK, + reg, + enable as usize + ); + self.write_u32(DLA_PP_CTRL, reg); + } + + pub fn is_ready(&self) -> bool { + let status = self.read_u32(DLA_STATUS_ADDR); + return !get_bits!(status, DLA_BUF_DONE_BITMASK) != 0; + } + + pub fn set_bias_addr(&self, addr: u32) { + self.write_u32(DLA_PP_AXI_READ, addr); + } + + pub fn is_enabled(&self) -> bool { + let handshake_reg = self.read_u32(DLA_HANDSHAKE); + let buf_enabled = get_bits!(handshake_reg, DLA_HANDSHAKE_BUFFER_ENABLE_BITMASK) != 0; + let mac_enabled = get_bits!(handshake_reg, DLA_HANDSHAKE_MAC_ENABLE_BITMASK) != 0; + let active_enabled = get_bits!(handshake_reg, DLA_HANDSHAKE_ACTIVE_ENABLE_BITMASK) != 0; + buf_enabled & mac_enabled & active_enabled + } + + fn handshake_disable_hw(&self) { + let mut handshake_reg = self.read_u32(DLA_HANDSHAKE); + handshake_reg = set_bits!( + DLA_HANDSHAKE_BUFFER_ENABLE_OFFSET, + DLA_HANDSHAKE_BUFFER_ENABLE_BITMASK, + handshake_reg, + 0 + ); + handshake_reg = set_bits!( + DLA_HANDSHAKE_MAC_ENABLE_OFFSET, + DLA_HANDSHAKE_MAC_ENABLE_BITMASK, + handshake_reg, + 0 + ); + handshake_reg = set_bits!( + DLA_HANDSHAKE_ACTIVE_ENABLE_OFFSET, + DLA_HANDSHAKE_ACTIVE_ENABLE_BITMASK, + handshake_reg, + 0 + ); + handshake_reg = set_bits!( + DLA_HANDSHAKE_BIAS_ENABLE_OFFSET, + DLA_HANDSHAKE_BIAS_ENABLE_BITMASK, + handshake_reg, + 0 + ); + handshake_reg = set_bits!( + DLA_HANDSHAKE_BYPASS_ENABLE_OFFSET, + DLA_HANDSHAKE_BYPASS_ENABLE_BITMASK, + handshake_reg, + 0 + ); + + self.write_u32(DLA_HANDSHAKE, handshake_reg); + } + + pub fn handle_handshake(&self) -> bool { + // Handshake only if dla status is done + if !self.is_ready() { + return false; + } + + if self.is_enabled() { + self.handshake_disable_hw(); + return false; + } + + let mut handshake_reg = self.read_u32(DLA_HANDSHAKE); + handshake_reg = set_bits!( + DLA_HANDSHAKE_BUFFER_VALID_OFFSET, + DLA_HANDSHAKE_BUFFER_VALID_BITMASK, + handshake_reg, + 1 + ); + handshake_reg = set_bits!( + DLA_HANDSHAKE_MAC_VALID_OFFSET, + DLA_HANDSHAKE_MAC_VALID_BITMASK, + handshake_reg, + 1 + ); + handshake_reg = set_bits!( + DLA_HANDSHAKE_ACTIVE_VALID_OFFSET, + DLA_HANDSHAKE_ACTIVE_VALID_BITMASK, + handshake_reg, + 1 + ); + + self.write_u32(DLA_HANDSHAKE, handshake_reg); + return true; + } + + pub fn init_layer(&mut self) { + let mut reg = self.read_u32(DLA_HANDSHAKE); + reg = set_bits!( + DLA_HANDSHAKE_BUFFER_ENABLE_OFFSET, + DLA_HANDSHAKE_BUFFER_ENABLE_BITMASK, + reg, + 1 + ); + reg = set_bits!( + DLA_HANDSHAKE_MAC_ENABLE_OFFSET, + DLA_HANDSHAKE_MAC_ENABLE_BITMASK, + reg, + 1 + ); + reg = set_bits!( + DLA_HANDSHAKE_BYPASS_ENABLE_OFFSET, + DLA_HANDSHAKE_BYPASS_ENABLE_BITMASK, + reg, + 1 + ); + self.write_u32(DLA_HANDSHAKE, reg); + + self.set_input_data_bank(MemoryBank::Bank0); + self.set_kernel_data_bank(MemoryBank::Bank0); + self.set_output_bank(MemoryBank::Bank12); + self.set_simd_select(SimdBitMode::EightBits); + + self.enable_pp(true); + self.enable_relu(true); + self.enable_bias(true); + } } diff --git a/examples/hpc/dla-driver/src/mmap.rs b/examples/hpc/dla-driver/src/mmap.rs index 2c182624..e14c1051 100644 --- a/examples/hpc/dla-driver/src/mmap.rs +++ b/examples/hpc/dla-driver/src/mmap.rs @@ -1,22 +1,23 @@ pub(crate) const UART0_ADDR: usize = 0xFFF00000; -pub(crate) const DLA0_ADDR: usize = 0xFF700000; -pub(crate) const MEMORY_BANK_BASE_ADDR: usize = 0x70000000; -pub(crate) const MEMORY_BANK_0_OFFSET: usize = 0x00000; -pub(crate) const MEMORY_BANK_1_OFFSET: usize = 0x08000; -pub(crate) const MEMORY_BANK_2_OFFSET: usize = 0x10000; -pub(crate) const MEMORY_BANK_3_OFFSET: usize = 0x18000; -pub(crate) const MEMORY_BANK_4_OFFSET: usize = 0x20000; -pub(crate) const MEMORY_BANK_5_OFFSET: usize = 0x28000; -pub(crate) const MEMORY_BANK_6_OFFSET: usize = 0x30000; -pub(crate) const MEMORY_BANK_7_OFFSET: usize = 0x38000; -pub(crate) const MEMORY_BANK_8_OFFSET: usize = 0x40000; -pub(crate) const MEMORY_BANK_9_OFFSET: usize = 0x48000; -pub(crate) const MEMORY_BANK_10_OFFSET: usize = 0x50000; -pub(crate) const MEMORY_BANK_11_OFFSET: usize = 0x58000; -pub(crate) const MEMORY_BANK_12_OFFSET: usize = 0x60000; -pub(crate) const MEMORY_BANK_13_OFFSET: usize = 0x68000; -pub(crate) const MEMORY_BANK_14_OFFSET: usize = 0x70000; -pub(crate) const MEMORY_BANK_15_OFFSET: usize = 0x78000; +pub const DLA0_ADDR: usize = 0xFF700000; +pub const MEMORY_BANK_BASE_ADDR: usize = 0x70000000; +pub const MEMORY_BANK_SIZE: usize = 0x8000; +pub const MEMORY_BANK_0_OFFSET: usize = 0x00000; +pub const MEMORY_BANK_1_OFFSET: usize = 0x08000; +pub const MEMORY_BANK_2_OFFSET: usize = 0x10000; +pub const MEMORY_BANK_3_OFFSET: usize = 0x18000; +pub const MEMORY_BANK_4_OFFSET: usize = 0x20000; +pub const MEMORY_BANK_5_OFFSET: usize = 0x28000; +pub const MEMORY_BANK_6_OFFSET: usize = 0x30000; +pub const MEMORY_BANK_7_OFFSET: usize = 0x38000; +pub const MEMORY_BANK_8_OFFSET: usize = 0x40000; +pub const MEMORY_BANK_9_OFFSET: usize = 0x48000; +pub const MEMORY_BANK_10_OFFSET: usize = 0x50000; +pub const MEMORY_BANK_11_OFFSET: usize = 0x58000; +pub const MEMORY_BANK_12_OFFSET: usize = 0x60000; +pub const MEMORY_BANK_13_OFFSET: usize = 0x68000; +pub const MEMORY_BANK_14_OFFSET: usize = 0x70000; +pub const MEMORY_BANK_15_OFFSET: usize = 0x78000; pub(crate) const DLA_BASE_ADDR: usize = 0x1000; @@ -36,7 +37,7 @@ pub(crate) const DLA_HP_RST_OFFSET: usize = 0x4; pub(crate) const DLA_SW_IRQ_OFFSET: usize = 0x8; pub(crate) const DLA_CPU_FE_BITMASK: usize = 0b1; pub(crate) const DLA_HP_RST_BITMASK: usize = 0b10000; -pub(crate) const DLA_SW_IRQ_BITMASK: usize = 0b100000000; +pub(crate) const DLA_SW_IRQ_BITMASK: usize = 0b1 << 8; pub(crate) const DLA_BUF_CTRL: usize = 0x8; pub(crate) const DLA_CONV_MODE_OFFSET: usize = 0x0; @@ -44,13 +45,13 @@ pub(crate) const DLA_READ_A_VALID_OFFSET: usize = 0x4; pub(crate) const DLA_READ_B_VALID_OFFSET: usize = 0x8; pub(crate) const DLA_CONV_MODE_BITMASK: usize = 0b1111; pub(crate) const DLA_READ_A_VALID_BITMASK: usize = 0b10000; -pub(crate) const DLA_READ_B_VALID_BITMASK: usize = 0b100000000; +pub(crate) const DLA_READ_B_VALID_BITMASK: usize = 0b1 << 8; pub(crate) const DLA_MAC_CTRL: usize = 0xC; pub(crate) const DLA_SIMD_SELECT_OFFSET: usize = 0x1; pub(crate) const DLA_MAC_CLIP_OFFSET: usize = 0x8; pub(crate) const DLA_SIMD_SELECT_BITMASK: usize = 0x11; -pub(crate) const DLA_MAC_CLIP_BITMASK: usize = 0x1111100000000; +pub(crate) const DLA_MAC_CLIP_BITMASK: usize = 0b11111 << 8; pub(crate) const DLA_PP_CTRL: usize = 0x10; pub(crate) const DLA_ACTIVE_MODE_OFFSET: usize = 0x0; @@ -64,27 +65,27 @@ pub(crate) const DLA_PP_CLIP_OFFSET: usize = 0x10; pub(crate) const DLA_ACTIVE_MODE_BITMASK: usize = 0b11; pub(crate) const DLA_RELU_BITMASK_UNUSED: usize = 0b1100; pub(crate) const DLA_MAX_BITMASK_UNUSED: usize = 0b110000; -pub(crate) const DLA_PP_SELECT_BITMASK: usize = 0b1000000; -pub(crate) const DLA_POOL_MODE_BITMASK_UNUSED: usize = 0b110000000; -pub(crate) const DLA_ROUNDING_BITMASK: usize = 0b1000000000; -pub(crate) const DLA_CTRL_VLD_BITMASK_UNUSED: usize = 0b10000000000; -pub(crate) const DLA_PP_CLIP_BITMASK: usize = 0b111110000000000000000; +pub(crate) const DLA_PP_SELECT_BITMASK: usize = 0b1 << 6; +pub(crate) const DLA_POOL_MODE_BITMASK_UNUSED: usize = 0b11 << 7; +pub(crate) const DLA_ROUNDING_BITMASK: usize = 0b1 << 9; +pub(crate) const DLA_CTRL_VLD_BITMASK_UNUSED: usize = 0b1 << 10; +pub(crate) const DLA_PP_CLIP_BITMASK: usize = 0b11111 << 16; pub(crate) const DLA_BUF_INPUT: usize = 0x14; -pub(crate) const DLA_BUF_INPUT_WIDTH_OFFSET: usize = 0x0; -pub(crate) const DLA_BUF_INPUT_HEIGHT_OFFSET: usize = 0x9; -pub(crate) const DLA_BUF_INPUT_CHANNELS_OFFSET: usize = 0x18; +pub(crate) const DLA_BUF_INPUT_WIDTH_OFFSET: usize = 0; +pub(crate) const DLA_BUF_INPUT_HEIGHT_OFFSET: usize = 9; +pub(crate) const DLA_BUF_INPUT_CHANNELS_OFFSET: usize = 18; pub(crate) const DLA_BUF_INPUT_WIDTH_BITMASK: usize = 0b111111111; -pub(crate) const DLA_BUF_INPUT_HEIGHT_BITMASK: usize = 0b111111111000000000; -pub(crate) const DLA_BUF_INPUT_CHANNELS_BITMASK: usize = 0b111111111111000000000000000000; +pub(crate) const DLA_BUF_INPUT_HEIGHT_BITMASK: usize = 0b111111111 << 9; +pub(crate) const DLA_BUF_INPUT_CHANNELS_BITMASK: usize = 0b111111111111 << 18; pub(crate) const DLA_BUF_KERNEL_0: usize = 0x18; -pub(crate) const DLA_BUF_KERNEL_0_WIDTH_OFFSET: usize = 0x0; -pub(crate) const DLA_BUF_KERNEL_0_HEIGHT_OFFSET: usize = 0x4; -pub(crate) const DLA_BUF_KERNEL_0_S_CHANNELS_OFFSET: usize = 0x8; +pub(crate) const DLA_BUF_KERNEL_0_WIDTH_OFFSET: usize = 0; +pub(crate) const DLA_BUF_KERNEL_0_HEIGHT_OFFSET: usize = 4; +pub(crate) const DLA_BUF_KERNEL_0_S_CHANNELS_OFFSET: usize = 8; pub(crate) const DLA_BUF_KERNEL_0_WIDTH_BITMASK: usize = 0b1111; -pub(crate) const DLA_BUF_KERNEL_0_HEIGHT_BITMASK: usize = 0b11110000; -pub(crate) const DLA_BUF_KERNEL_0_S_CHANNELS_BITMASK: usize = 0b11111111111100000000; +pub(crate) const DLA_BUF_KERNEL_0_HEIGHT_BITMASK: usize = 0b1111 << 4; +pub(crate) const DLA_BUF_KERNEL_0_S_CHANNELS_BITMASK: usize = 0b111111111111 << 8; pub(crate) const DLA_BUF_KERNEL_1: usize = 0x1C; pub(crate) const DLA_BUF_KERNEL_1_NUM_OFFSET: usize = 0x0; @@ -97,28 +98,28 @@ pub(crate) const DLA_BUF_PAD_BOTTOM_OFFSET: usize = 0x8; pub(crate) const DLA_BUF_PAD_LEFT_OFFSET: usize = 0xC; pub(crate) const DLA_BUF_PAD_VALUE_OFFSET: usize = 0x10; pub(crate) const DLA_BUF_PAD_TOP_BITMASK: usize = 0b1111; -pub(crate) const DLA_BUF_PAD_RIGHT_BITMASK: usize = 0b11110000; -pub(crate) const DLA_BUF_PAD_BOTTOM_BITMASK: usize = 0b111100000000; -pub(crate) const DLA_BUF_PAD_LEFT_BITMASK: usize = 0b1111000000000000; -pub(crate) const DLA_BUF_PAD_VALUE_BITMASK: usize = 0b11110000000000000000; +pub(crate) const DLA_BUF_PAD_RIGHT_BITMASK: usize = 0b1111 << 4; +pub(crate) const DLA_BUF_PAD_BOTTOM_BITMASK: usize = 0b1111 << 8; +pub(crate) const DLA_BUF_PAD_LEFT_BITMASK: usize = 0b1111 << 12; +pub(crate) const DLA_BUF_PAD_VALUE_BITMASK: usize = 0b1111 << 16; pub(crate) const DLA_BUF_STRIDE: usize = 0x24; pub(crate) const DLA_BUF_STRIDE_X_OFFSET: usize = 0x0; pub(crate) const DLA_BUF_STRIDE_Y_OFFSET: usize = 0x10; pub(crate) const DLA_BUF_STRIDE_X_BITMASK: usize = 0b1111; -pub(crate) const DLA_BUF_STRIDE_Y_BITMASK: usize = 0b11110000000000000000; +pub(crate) const DLA_BUF_STRIDE_Y_BITMASK: usize = 0b1111 << 16; pub(crate) const DLA_PP_INPUT: usize = 0x28; pub(crate) const DLA_PP_INPUT_WIDTH_OFFSET: usize = 0x0; pub(crate) const DLA_PP_INPUT_HEIGHT_OFFSET: usize = 0x10; pub(crate) const DLA_PP_INPUT_WIDTH_BITMASK: usize = 0b111111111; -pub(crate) const DLA_PP_INPUT_HEIGHT_BITMASK: usize = 0b1111111110000000000000000; +pub(crate) const DLA_PP_INPUT_HEIGHT_BITMASK: usize = 0b111111111 << 16; pub(crate) const DLA_BUF_DATA_BANK: usize = 0x2C; pub(crate) const DLA_BUF_DATA_BANK_A_OFFSET: usize = 0x0; -pub(crate) const DLA_BUF_DATA_BANK_B_OFFSET: usize = 0x10; +pub(crate) const DLA_BUF_DATA_BANK_B_OFFSET: usize = 16; pub(crate) const DLA_BUF_DATA_BANK_A_BITMASK: usize = 0b1111; -pub(crate) const DLA_BUF_DATA_BANK_B_BITMASK: usize = 0b11110000; +pub(crate) const DLA_BUF_DATA_BANK_B_BITMASK: usize = 0b1111 << 16; pub(crate) const DLA_BUF_DATA_WAIT_A: usize = 0x30; pub(crate) const DLA_BUF_DATA_WAIT_A_OFFSET: usize = 0x0; @@ -168,6 +169,10 @@ pub(crate) const DLA_MAC_SAT_MIN: usize = 0x58; pub(crate) const DLA_MAC_SAT_MIN_OFFSET: usize = 0x0; pub(crate) const DLA_MAC_SAT_MIN_BITMASK: usize = 0xFFFFFFFF; +pub(crate) const DLA_PP_AXI_WRITE: usize = 0x5c; +pub(crate) const DLA_PP_AXI_WRITE_ADDRESS_OFFSET: usize = 0x00; +pub(crate) const DLA_PP_AXI_WRITE_ADDRESS_BITMASK: usize = 0xFFFFFFFF; + pub(crate) const DLA_PP_AXI_READ: usize = 0x60; pub(crate) const DLA_PP_AXI_READ_ADDRESS_OFFSET: usize = 0x00; pub(crate) const DLA_PP_AXI_READ_ADDRESS_BITMASK: usize = 0xFFFFFFFF; @@ -184,12 +189,12 @@ pub(crate) const DLA_HANDSHAKE_POOL_ENABLE_OFFSET: usize = 0x7; pub(crate) const DLA_HANDSHAKE_BIAS_ENABLE_OFFSET: usize = 0x8; pub(crate) const DLA_HANDSHAKE_BYPASS_ENABLE_OFFSET: usize = 0x9; pub(crate) const DLA_HANDSHAKE_BUFFER_VALID_BITMASK: usize = 0b1; -pub(crate) const DLA_HANDSHAKE_MAC_VALID_BITMASK: usize = 0b10; -pub(crate) const DLA_HANDSHAKE_POOL_VALID_BITMASK: usize = 0b100; -pub(crate) const DLA_HANDSHAKE_ACTIVE_VALID_BITMASK: usize = 0b1000; -pub(crate) const DLA_HANDSHAKE_BUFFER_ENABLE_BITMASK: usize = 0b10000; -pub(crate) const DLA_HANDSHAKE_MAC_ENABLE_BITMASK: usize = 0b100000; -pub(crate) const DLA_HANDSHAKE_ACTIVE_ENABLE_BITMASK: usize = 0b1000000; -pub(crate) const DLA_HANDSHAKE_POOL_ENABLE_BITMASK: usize = 0b10000000; -pub(crate) const DLA_HANDSHAKE_BIAS_ENABLE_BITMASK: usize = 0b100000000; -pub(crate) const DLA_HANDSHAKE_BYPASS_ENABLE_BITMASK: usize = 0b1000000000; +pub(crate) const DLA_HANDSHAKE_MAC_VALID_BITMASK: usize = 0b1 << 1; +pub(crate) const DLA_HANDSHAKE_POOL_VALID_BITMASK: usize = 0b1 << 2; +pub(crate) const DLA_HANDSHAKE_ACTIVE_VALID_BITMASK: usize = 0b1 << 3; +pub(crate) const DLA_HANDSHAKE_BUFFER_ENABLE_BITMASK: usize = 0b1 << 4; +pub(crate) const DLA_HANDSHAKE_MAC_ENABLE_BITMASK: usize = 0b1 << 5; +pub(crate) const DLA_HANDSHAKE_ACTIVE_ENABLE_BITMASK: usize = 0b1 << 6; +pub(crate) const DLA_HANDSHAKE_POOL_ENABLE_BITMASK: usize = 0b1 << 7; +pub(crate) const DLA_HANDSHAKE_BIAS_ENABLE_BITMASK: usize = 0b1 << 8; +pub(crate) const DLA_HANDSHAKE_BYPASS_ENABLE_BITMASK: usize = 0b1 << 9;