Fix linter warnings

soc-hub-fi · Dec 10, 2024 · 30bbfda · 30bbfda
1 parent 949fe5b
commit 30bbfda
Show file tree

Hide file tree

Showing 5 changed files with 73 additions and 48 deletions.
diff --git a/examples/hpc/dla-driver-ffi/src/lib.rs b/examples/hpc/dla-driver-ffi/src/lib.rs
@@ -4,18 +4,18 @@
 #![no_std]
 #![no_main]
 
-#[macro_use]
 extern crate alloc;
 use alloc::vec::Vec;
 use core::ffi::{c_char, CStr};
 use core::slice;
 use dla_driver::layers::{conv2d, conv2d_bias, conv2d_bias_relu, conv2d_relu, grouped_conv2d};
-use dla_driver::tensor3::{rescale, Order3, Tensor3};
+use dla_driver::tensor3::{Order3, Tensor3};
 use dla_driver::tensor4::{Order4, Tensor4};
 use dla_driver::utils::optimal_pp_bias_heuristic;
 use dla_driver::{Padding, Stride};
 
 /// Converts C-types to DLA Tensors for use with the highlevel layer
+#[allow(clippy::too_many_arguments)]
 unsafe fn ffi_data_import(
     input_data: *const i8,
     input_channels: usize,
@@ -29,7 +29,7 @@ unsafe fn ffi_data_import(
     kernel_width: usize,
     kernel_order: *const c_char,
 ) -> (Tensor3<i8>, Tensor4<i8>) {
-    let mut input_data: Vec<i8> = unsafe {
+    let input_data: Vec<i8> = unsafe {
         slice::from_raw_parts(input_data, input_channels * input_height * input_width).to_vec()
     };
 
@@ -77,6 +77,7 @@ pub unsafe extern "C" fn dla_init() {
 
 /// Executes Conv2D on DLA with given parameters and writes result to output buffer.
 #[no_mangle]
+#[allow(clippy::too_many_arguments)]
 pub unsafe extern "C" fn dla_conv2d(
     input_data: *const i8,
     kernel_data: *const i8,
@@ -141,6 +142,7 @@ pub unsafe extern "C" fn dla_conv2d(
 
 /// Executes Conv2D + ReLU on DLA with given parameters and writes result to output buffer.
 #[no_mangle]
+#[allow(clippy::too_many_arguments)]
 pub unsafe extern "C" fn dla_conv2d_relu(
     input_data: *const i8,
     kernel_data: *const i8,
@@ -208,6 +210,7 @@ pub unsafe extern "C" fn dla_conv2d_relu(
 ///
 /// * `bias` - Bias is actually i16 in hardware, here we use 32 for TVM compatibility
 #[no_mangle]
+#[allow(clippy::too_many_arguments)]
 pub unsafe extern "C" fn dla_conv2d_bias(
     input_data: *const i8,
     kernel_data: *const i8,
@@ -282,6 +285,7 @@ pub unsafe extern "C" fn dla_conv2d_bias(
 ///
 /// * `bias` - Buffer containing bias data. NOTE: Bias is actually i16 in hardware, here we use 32 for TVM compatibility
 #[no_mangle]
+#[allow(clippy::too_many_arguments)]
 pub unsafe extern "C" fn dla_conv2d_bias_relu(
     input_data: *const i8,
     kernel_data: *const i8,
@@ -366,6 +370,7 @@ pub unsafe extern "C" fn dla_conv2d_bias_relu(
 ///
 /// * `bias` - Buffer containing bias data. NOTE: Bias is actually i16 in hardware, here we use 32 for TVM compatibility
 #[no_mangle]
+#[allow(clippy::too_many_arguments)]
 pub unsafe extern "C" fn dla_tvm_qnn_conv2d_bias(
     input_data: *const i8,
     kernel_data: *const i8,
@@ -385,11 +390,11 @@ pub unsafe extern "C" fn dla_tvm_qnn_conv2d_bias(
     pad_right: u32,
     pad_left: u32,
     pad_bottom: u32,
-    pad_value: i32,
+    _pad_value: i32,
     stride_x: u32,
     stride_y: u32,
     mac_clip: u32,
-    pp_clip: u32,
+    _pp_clip: u32,
 ) {
     let (input_tensor, kernels_tensor) = unsafe {
         ffi_data_import(
@@ -419,7 +424,7 @@ pub unsafe extern "C" fn dla_tvm_qnn_conv2d_bias(
     //let optimized_pp = optimal_pp_bias_heuristic(&bias);
     let optimized_pp = 7;
 
-    let mut result: Tensor3<i8> = conv2d_bias(
+    let result: Tensor3<i8> = conv2d_bias(
         input_tensor,
         kernels_tensor,
         bias,
@@ -439,7 +444,7 @@ pub unsafe extern "C" fn dla_tvm_qnn_conv2d_bias(
         None,
     );
 
-    let input_order_string = unsafe { CStr::from_ptr(input_order).to_str().unwrap_unchecked() };
+    let _input_order_string = unsafe { CStr::from_ptr(input_order).to_str().unwrap_unchecked() };
 
     // TVM requantization and clip
     // NOTE:(20240927 [email protected]) on DLA clipping behaviour with TVM.
@@ -460,6 +465,7 @@ pub unsafe extern "C" fn dla_tvm_qnn_conv2d_bias(
 ///
 /// * `bias` - Buffer containing bias data. NOTE: Bias is actually i16 in hardware, here we use 32 for TVM compatibility
 #[no_mangle]
+#[allow(clippy::too_many_arguments)]
 pub unsafe extern "C" fn dla_tvm_qnn_conv2d_grouped_bias(
     input_data: *const i8,
     kernel_data: *const i8,
@@ -480,11 +486,11 @@ pub unsafe extern "C" fn dla_tvm_qnn_conv2d_grouped_bias(
     pad_right: u32,
     pad_left: u32,
     pad_bottom: u32,
-    pad_value: i32,
+    _pad_value: i32,
     stride_x: u32,
     stride_y: u32,
     mac_clip: u32,
-    pp_clip: u32,
+    _pp_clip: u32,
 ) {
     let (input_tensor, kernels_tensor) = unsafe {
         ffi_data_import(
@@ -513,7 +519,7 @@ pub unsafe extern "C" fn dla_tvm_qnn_conv2d_grouped_bias(
 
     let optimized_pp = optimal_pp_bias_heuristic(&bias);
 
-    let mut result: Tensor3<i8> = grouped_conv2d(
+    let result: Tensor3<i8> = grouped_conv2d(
         input_tensor,
         kernels_tensor,
         bias,
@@ -534,7 +540,7 @@ pub unsafe extern "C" fn dla_tvm_qnn_conv2d_grouped_bias(
         groups,
     );
 
-    let input_order_string = unsafe { CStr::from_ptr(input_order).to_str().unwrap_unchecked() };
+    let _input_order_string = unsafe { CStr::from_ptr(input_order).to_str().unwrap_unchecked() };
 
     // TVM requantization and clip
     // NOTE:(20240927 [email protected]) on DLA clipping behaviour with TVM.

diff --git a/examples/hpc/dla-driver/src/layers.rs b/examples/hpc/dla-driver/src/layers.rs
@@ -3,8 +3,6 @@ use crate::tensor4::{Order4, Tensor4};
 use crate::{Dla, InputSize, KernelSize, LayerConfig, Padding, SimdBitMode, Stride};
 use alloc::vec::Vec;
 
-use headsail_bsp::sprintln;
-
 use crate::utils::{calculate_conv2d_out_param_dim, get_banks_for_layer};
 
 // Define a trait for output handling
@@ -190,19 +188,19 @@ pub fn grouped_conv2d<T: DlaOutput + Clone>(
     mac_clip: Option<u32>,
     pp_clip: Option<u32>,
     simd_mode: Option<SimdBitMode>,
-    groups: usize
+    groups: usize,
 ) -> Tensor3<T> {
     let total_in_channels = input.channels();
-    let total_out_channels = kernels.kernels();
     let group_in_channels = total_in_channels / groups;
     let group_out_channels = kernels.kernels() / groups;
 
     // Placeholder for the output tensor
     let mut output_tensors = Vec::new();
 
     for g in 0..groups {
-        let input_group = input.slice_channels(g * group_in_channels..(g + 1)*group_in_channels);
-        let kernels_group = kernels.slice_channels(g * group_in_channels..(g + 1)*group_in_channels);
+        let input_group = input.slice_channels(g * group_in_channels..(g + 1) * group_in_channels);
+        let kernels_group =
+            kernels.slice_channels(g * group_in_channels..(g + 1) * group_in_channels);
         let bias_group = bias[g * group_out_channels..(g + 1) * group_out_channels].to_vec();
 
         let output_group = run_layers(
@@ -223,10 +221,8 @@ pub fn grouped_conv2d<T: DlaOutput + Clone>(
 
     // Concatenate the output tensors along the channel dimension
     Tensor3::concat_interleaved(output_tensors)
-
 }
 
-
 fn run_layers<T: DlaOutput + Clone>(
     input: Tensor3<i8>,
     kernels: Tensor4<i8>,

diff --git a/examples/hpc/dla-driver/src/tensor3.rs b/examples/hpc/dla-driver/src/tensor3.rs
@@ -1,6 +1,6 @@
 use alloc::vec::*;
 use core::ffi::c_char;
-use ndarray::{Array, Array3, ArrayView3 , Axis, s, stack, concatenate};
+use ndarray::{s, Array, Array3};
 
 #[derive(Clone, Copy, Debug, PartialEq)]
 pub enum Order3 {
@@ -185,9 +185,14 @@ impl<T: Clone> Tensor3<T> {
 
     /// Concatenates a Tensor along the least significant axis (axis=2) by interleaving the tensors
     pub fn concat_interleaved(tensors: Vec<Tensor3<T>>) -> Tensor3<T> {
-        let target_order = tensors[0].order();
-        let (height, width, channels) = (tensors[0].height(), tensors[0].width(), tensors[0].channels());
-        let mut intermediary_buffer: Vec<T> = Vec::with_capacity(height * width * channels * tensors.len());
+        let _target_order = tensors[0].order();
+        let (height, width, channels) = (
+            tensors[0].height(),
+            tensors[0].width(),
+            tensors[0].channels(),
+        );
+        let mut intermediary_buffer: Vec<T> =
+            Vec::with_capacity(height * width * channels * tensors.len());
         for h in 0..height {
             for w in 0..width {
                 for c in 0..channels {
@@ -197,7 +202,14 @@ impl<T: Clone> Tensor3<T> {
                 }
             }
         }
-        Tensor3::from_data_buffer(channels * tensors.len(), height, width, intermediary_buffer, Order3::HWC).unwrap()
+        Tensor3::from_data_buffer(
+            channels * tensors.len(),
+            height,
+            width,
+            intermediary_buffer,
+            Order3::HWC,
+        )
+        .unwrap()
     }
 
     /// Slice tensors channel axis with the given range
@@ -217,7 +229,7 @@ impl<T: Clone> Tensor3<T> {
 
         Tensor3 {
             data: sliced_data,
-            order: self.order.clone(),
+            order: self.order,
         }
     }
 
@@ -271,7 +283,6 @@ impl<T: Clone> Tensor3<T> {
         data.permute(order);
         data.to_buffer()
     }
-
 }
 
 pub fn rescale(
@@ -298,10 +309,9 @@ pub fn rescale(
         };
 
         channel_slice.map_inplace(|x| {
-             let value = (input_scale / scale) * (*x as f32 * pre_scale - input_zero as f32)
-                 + output_zero as f32;
+            let value = (input_scale / scale) * (*x as f32 * pre_scale - input_zero as f32)
+                + output_zero as f32;
             *x = value.clamp(i8::MIN as f32, i8::MAX as f32) as i8
-
         });
     }
 }
diff --git a/examples/hpc/dla-driver/src/tensor4.rs b/examples/hpc/dla-driver/src/tensor4.rs
@@ -1,7 +1,6 @@
 use alloc::vec::*;
 use core::ffi::c_char;
-#[macro_use]
-use ndarray::{Array, Array4, s, concatenate};
+use ndarray::{s, Array, Array4};
 
 #[derive(Clone, Copy, Debug, PartialEq)]
 pub enum Order4 {
@@ -280,18 +279,33 @@ impl<T: Clone> Tensor4<T> {
     pub fn slice_channels(&self, c_range: core::ops::Range<usize>) -> Tensor4<T> {
         // Determine the index of the channel dimension based on the tensor order
         let kernel_axis = match self.order {
-            Order4::KCHW | Order4::KCWH | Order4::KHWC |
-            Order4::KHCW | Order4::KWCH | Order4::KWHC => 0,
-
-            Order4::CKHW | Order4::CKWH | Order4::HKCW |
-            Order4::HKWC | Order4::WKCH | Order4::WKHC => 1,
-
-            Order4::CHKW | Order4::CWKH | Order4::HCKW |
-            Order4::HWKC | Order4::WCKH | Order4::WHKC => 2,
-
-            Order4::CHWK | Order4::CWHK | Order4::HWCK |
-            Order4::HCWK | Order4::WCHK | Order4::WHCK => 3,
-
+            Order4::KCHW
+            | Order4::KCWH
+            | Order4::KHWC
+            | Order4::KHCW
+            | Order4::KWCH
+            | Order4::KWHC => 0,
+
+            Order4::CKHW
+            | Order4::CKWH
+            | Order4::HKCW
+            | Order4::HKWC
+            | Order4::WKCH
+            | Order4::WKHC => 1,
+
+            Order4::CHKW
+            | Order4::CWKH
+            | Order4::HCKW
+            | Order4::HWKC
+            | Order4::WCKH
+            | Order4::WHKC => 2,
+
+            Order4::CHWK
+            | Order4::CWHK
+            | Order4::HWCK
+            | Order4::HCWK
+            | Order4::WCHK
+            | Order4::WHCK => 3,
         };
 
         // Create a slice pattern for `s![]` by slicing only on the channels axis
@@ -307,7 +321,7 @@ impl<T: Clone> Tensor4<T> {
         // Return a new Tensor4 with the sliced data and the same order
         Tensor4 {
             data: sliced_data,
-            order: self.order.clone(),
+            order: self.order,
         }
     }
 

diff --git a/examples/hpc/dla-driver/src/utils.rs b/examples/hpc/dla-driver/src/utils.rs
@@ -62,7 +62,7 @@ pub fn generate_output_tensor<I: Clone, K: Clone, O: Clone>(
 /// * `bytes` - Number of bytes the data contains
 pub fn calculate_number_of_banks_needed(bytes: usize) -> usize {
     // Take ceil
-    (bytes + (MEMORY_BANK_SIZE - 1)) / MEMORY_BANK_SIZE
+    bytes.div_ceil(MEMORY_BANK_SIZE)
 }
 
 /// Assigns data banks for layer data
@@ -118,13 +118,12 @@ fn calculate_same_padding(input: (u32, u32), kernel: (u32, u32), stride: Stride)
     }
 }
 
-
 /// Calculate optimal amount of PP clip based on bias heuristic for minimal loss in granularity
-pub fn optimal_pp_bias_heuristic(bias: &Vec<i16>) -> u32 {
+pub fn optimal_pp_bias_heuristic(bias: &[i16]) -> u32 {
     let abs_max = bias.iter().map(|&x| x.abs() as i32).max().unwrap_or(0) as u32;
     let pp = (abs_max.max(127) / 127).ilog2() + 1;
     if pp > 8 {
-        return 8
+        return 8;
     }
     pp
 }