We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent fb78f16 commit dd6822aCopy full SHA for dd6822a
kernels/mat-transpose/mat_transpose.cu
@@ -364,8 +364,8 @@ __global__ void mat_transpose_f32x4_shared_bcf_merge_write_row2col2d_kernel(
364
dim3 block(WARP_SIZE_S, WARP_SIZE_S); \
365
dim3 grid((N + WARP_SIZE_S - 1) / (WARP_SIZE_S * n_element_col), \
366
(M + WARP_SIZE_S - 1) / (WARP_SIZE_S * n_element_row)); \
367
- mat_transpose_##tag##2d_kernel < < < grid, \
368
- block >>> (reinterpret_cast<element_type *>(x.data_ptr()), \
+ mat_transpose_##tag##2d_kernel<<<grid, block>>>( \
+ reinterpret_cast<element_type *>(x.data_ptr()), \
369
reinterpret_cast<element_type *>(y.data_ptr()), M, N); \
370
}
371
0 commit comments