-
Notifications
You must be signed in to change notification settings - Fork 534
/
Copy pathkernel.rs
71 lines (59 loc) · 1.94 KB
/
kernel.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
use crate::{
element::JitElement, kernel::reduce::init_reduce_output, tensor::JitTensor, JitRuntime,
};
use cubecl::calculate_cube_count_elemwise;
use cubecl::prelude::*;
use super::base::ReduceDimNaive;
use super::base::ReduceDimNaiveFamily;
#[cube(launch_unchecked)]
pub(crate) fn naive_reduce_dim_kernel<RD: ReduceDimNaiveFamily, EI: Numeric, EO: Numeric>(
input: &Tensor<EI>,
output: &mut Tensor<EO>,
dim: u32,
) {
naive_reduce::<RD::Reduce<EI>, EI, EO>(input, output, dim)
}
#[cube]
fn naive_reduce<RD: ReduceDimNaive<EI>, EI: Numeric, EO: Numeric>(
input: &Tensor<EI>,
output: &mut Tensor<EO>,
dim: u32,
) {
if ABSOLUTE_POS >= output.len() {
return;
}
let mut offset_input = 0;
for i in 0..input.rank() {
let mut offset_local = ABSOLUTE_POS / output.stride(i);
offset_local %= output.shape(i);
if i != dim {
offset_input += offset_local * input.stride(i);
}
}
let mut accumulator = RD::initialize_naive();
for i in 0..input.shape(dim) {
let index = i * input.stride(dim) + offset_input;
RD::inner_loop_naive(&mut accumulator, input[index], i);
}
RD::assign_naive::<EO>(output, accumulator, input.shape(dim));
}
/// Executes the naive kernel for reduce dim
pub fn reduce_dim_naive<RD: ReduceDimNaiveFamily, R: JitRuntime, EI: JitElement, EO: JitElement>(
input: JitTensor<R>,
dim: usize,
) -> Result<JitTensor<R>, String> {
let output = init_reduce_output::<R, EI, EO>(&input, dim);
let cube_dim = CubeDim::default();
let cube_count = calculate_cube_count_elemwise(output.shape.num_elements(), cube_dim);
unsafe {
naive_reduce_dim_kernel::launch_unchecked::<RD, EI, EO, R>(
&input.client,
cube_count,
cube_dim,
input.as_tensor_arg::<EI>(1),
output.as_tensor_arg::<EO>(1),
ScalarArg::new(dim as u32),
);
}
Ok(output)
}