Skip to content

Commit 5f6d10c

Browse files
authored
[CI/Build] Enforce style for C++ and CUDA code with clang-format (vllm-project#4722)
1 parent 9b9a10d commit 5f6d10c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+6571
-6963
lines changed

.clang-format

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
BasedOnStyle: Google
2+
UseTab: Never
3+
IndentWidth: 2
4+
ColumnLimit: 80
5+
6+
# Force pointers to the type for C++.
7+
DerivePointerAlignment: false
8+
PointerAlignment: Left
9+
10+
# Reordering #include statements can (and currently will) introduce errors
11+
SortIncludes: false
12+
13+
# Style choices
14+
AlignConsecutiveAssignments: false
15+
AlignConsecutiveDeclarations: false
16+
IndentPPDirectives: BeforeHash
17+
18+
IncludeCategories:
19+
- Regex: '^<'
20+
Priority: 4
21+
- Regex: '^"(llvm|llvm-c|clang|clang-c|mlir|mlir-c)/'
22+
Priority: 3
23+
- Regex: '^"(qoda|\.\.)/'
24+
Priority: 2
25+
- Regex: '.*'
26+
Priority: 1

.github/workflows/clang-format.yml

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
name: clang-format
2+
3+
on:
4+
# Trigger the workflow on push or pull request,
5+
# but only for the main branch
6+
push:
7+
branches:
8+
- main
9+
pull_request:
10+
branches:
11+
- main
12+
13+
jobs:
14+
clang-format:
15+
runs-on: ubuntu-latest
16+
strategy:
17+
matrix:
18+
python-version: ["3.11"]
19+
steps:
20+
- uses: actions/checkout@v2
21+
- name: Set up Python ${{ matrix.python-version }}
22+
uses: actions/setup-python@v2
23+
with:
24+
python-version: ${{ matrix.python-version }}
25+
- name: Install dependencies
26+
run: |
27+
python -m pip install --upgrade pip
28+
pip install clang-format==18.1.5
29+
- name: Running clang-format
30+
run: |
31+
EXCLUDES=(
32+
'csrc/moe/topk_softmax_kernels.cu'
33+
'csrc/punica/bgmv/bgmv_bf16_bf16_bf16.cu'
34+
'csrc/punica/bgmv/bgmv_config.h'
35+
'csrc/punica/bgmv/bgmv_impl.cuh'
36+
'csrc/punica/bgmv/vec_dtypes.cuh'
37+
'csrc/punica/punica_ops.cu'
38+
'csrc/punica/type_convert.h'
39+
)
40+
find csrc/ \( -name '*.h' -o -name '*.cpp' -o -name '*.cu' -o -name '*.cuh' \) -print \
41+
| grep -vFf <(printf "%s\n" "${EXCLUDES[@]}") \
42+
| xargs clang-format --dry-run --Werror

csrc/activation_kernels.cu

+64-75
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,11 @@
1010
namespace vllm {
1111

1212
// Activation and gating kernel template.
13-
template<typename scalar_t, scalar_t (*ACT_FN)(const scalar_t&)>
13+
template <typename scalar_t, scalar_t (*ACT_FN)(const scalar_t&)>
1414
__global__ void act_and_mul_kernel(
15-
scalar_t* __restrict__ out, // [..., d]
16-
const scalar_t* __restrict__ input, // [..., 2, d]
17-
const int d) {
15+
scalar_t* __restrict__ out, // [..., d]
16+
const scalar_t* __restrict__ input, // [..., 2, d]
17+
const int d) {
1818
const int64_t token_idx = blockIdx.x;
1919
for (int64_t idx = threadIdx.x; idx < d; idx += blockDim.x) {
2020
const scalar_t x = VLLM_LDG(&input[token_idx * 2 * d + idx]);
@@ -23,139 +23,128 @@ __global__ void act_and_mul_kernel(
2323
}
2424
}
2525

26-
template<typename T>
26+
template <typename T>
2727
__device__ __forceinline__ T silu_kernel(const T& x) {
2828
// x * sigmoid(x)
29-
return (T) (((float) x) / (1.0f + expf((float) -x)));
29+
return (T)(((float)x) / (1.0f + expf((float)-x)));
3030
}
3131

32-
template<typename T>
32+
template <typename T>
3333
__device__ __forceinline__ T gelu_kernel(const T& x) {
3434
// Equivalent to PyTorch GELU with 'none' approximation.
3535
// Refer to:
3636
// https://github.com/pytorch/pytorch/blob/8ac9b20d4b090c213799e81acf48a55ea8d437d6/aten/src/ATen/native/cuda/ActivationGeluKernel.cu#L36-L38
37-
const float f = (float) x;
37+
const float f = (float)x;
3838
constexpr float ALPHA = M_SQRT1_2;
39-
return (T) (f * 0.5f * (1.0f + ::erf(f * ALPHA)));
39+
return (T)(f * 0.5f * (1.0f + ::erf(f * ALPHA)));
4040
}
4141

42-
template<typename T>
42+
template <typename T>
4343
__device__ __forceinline__ T gelu_tanh_kernel(const T& x) {
4444
// Equivalent to PyTorch GELU with 'tanh' approximation.
4545
// Refer to:
4646
// https://github.com/pytorch/pytorch/blob/8ac9b20d4b090c213799e81acf48a55ea8d437d6/aten/src/ATen/native/cuda/ActivationGeluKernel.cu#L25-L30
47-
const float f = (float) x;
47+
const float f = (float)x;
4848
constexpr float BETA = M_SQRT2 * M_2_SQRTPI * 0.5f;
4949
constexpr float KAPPA = 0.044715;
5050
float x_cube = f * f * f;
5151
float inner = BETA * (f + KAPPA * x_cube);
52-
return (T) (0.5f * f * (1.0f + ::tanhf(inner)));
52+
return (T)(0.5f * f * (1.0f + ::tanhf(inner)));
5353
}
5454

55-
} // namespace vllm
55+
} // namespace vllm
5656

5757
// Launch activation and gating kernel.
58-
#define LAUNCH_ACTIVATION_GATE_KERNEL(KERNEL) \
59-
int d = input.size(-1) / 2; \
60-
int64_t num_tokens = input.numel() / input.size(-1); \
61-
dim3 grid(num_tokens); \
62-
dim3 block(std::min(d, 1024)); \
63-
const at::cuda::OptionalCUDAGuard device_guard(device_of(input)); \
64-
const cudaStream_t stream = at::cuda::getCurrentCUDAStream(); \
65-
VLLM_DISPATCH_FLOATING_TYPES( \
66-
input.scalar_type(), \
67-
"act_and_mul_kernel", \
68-
[&] { \
69-
vllm::act_and_mul_kernel<scalar_t, KERNEL<scalar_t>><<<grid, block, 0, stream>>>( \
70-
out.data_ptr<scalar_t>(), \
71-
input.data_ptr<scalar_t>(), \
72-
d); \
73-
});
74-
75-
void silu_and_mul(
76-
torch::Tensor& out, // [..., d]
77-
torch::Tensor& input) // [..., 2 * d]
58+
#define LAUNCH_ACTIVATION_GATE_KERNEL(KERNEL) \
59+
int d = input.size(-1) / 2; \
60+
int64_t num_tokens = input.numel() / input.size(-1); \
61+
dim3 grid(num_tokens); \
62+
dim3 block(std::min(d, 1024)); \
63+
const at::cuda::OptionalCUDAGuard device_guard(device_of(input)); \
64+
const cudaStream_t stream = at::cuda::getCurrentCUDAStream(); \
65+
VLLM_DISPATCH_FLOATING_TYPES( \
66+
input.scalar_type(), "act_and_mul_kernel", [&] { \
67+
vllm::act_and_mul_kernel<scalar_t, KERNEL<scalar_t>> \
68+
<<<grid, block, 0, stream>>>(out.data_ptr<scalar_t>(), \
69+
input.data_ptr<scalar_t>(), d); \
70+
});
71+
72+
void silu_and_mul(torch::Tensor& out, // [..., d]
73+
torch::Tensor& input) // [..., 2 * d]
7874
{
7975
LAUNCH_ACTIVATION_GATE_KERNEL(vllm::silu_kernel);
8076
}
8177

82-
void gelu_and_mul(
83-
torch::Tensor& out, // [..., d]
84-
torch::Tensor& input) // [..., 2 * d]
78+
void gelu_and_mul(torch::Tensor& out, // [..., d]
79+
torch::Tensor& input) // [..., 2 * d]
8580
{
8681
LAUNCH_ACTIVATION_GATE_KERNEL(vllm::gelu_kernel);
8782
}
8883

89-
void gelu_tanh_and_mul(
90-
torch::Tensor& out, // [..., d]
91-
torch::Tensor& input) // [..., 2 * d]
84+
void gelu_tanh_and_mul(torch::Tensor& out, // [..., d]
85+
torch::Tensor& input) // [..., 2 * d]
9286
{
9387
LAUNCH_ACTIVATION_GATE_KERNEL(vllm::gelu_tanh_kernel);
9488
}
9589

9690
namespace vllm {
9791

9892
// Element-wise activation kernel template.
99-
template<typename scalar_t, scalar_t (*ACT_FN)(const scalar_t&)>
93+
template <typename scalar_t, scalar_t (*ACT_FN)(const scalar_t&)>
10094
__global__ void activation_kernel(
101-
scalar_t* __restrict__ out, // [..., d]
102-
const scalar_t* __restrict__ input, // [..., d]
103-
const int d) {
95+
scalar_t* __restrict__ out, // [..., d]
96+
const scalar_t* __restrict__ input, // [..., d]
97+
const int d) {
10498
const int64_t token_idx = blockIdx.x;
10599
for (int64_t idx = threadIdx.x; idx < d; idx += blockDim.x) {
106100
const scalar_t x = VLLM_LDG(&input[token_idx * d + idx]);
107101
out[token_idx * d + idx] = ACT_FN(x);
108102
}
109103
}
110104

111-
} // namespace vllm
105+
} // namespace vllm
112106

113107
// Launch element-wise activation kernel.
114-
#define LAUNCH_ACTIVATION_KERNEL(KERNEL) \
115-
int d = input.size(-1); \
116-
int64_t num_tokens = input.numel() / d; \
117-
dim3 grid(num_tokens); \
118-
dim3 block(std::min(d, 1024)); \
119-
const at::cuda::OptionalCUDAGuard device_guard(device_of(input)); \
120-
const cudaStream_t stream = at::cuda::getCurrentCUDAStream(); \
121-
VLLM_DISPATCH_FLOATING_TYPES( \
122-
input.scalar_type(), \
123-
"activation_kernel", \
124-
[&] { \
125-
vllm::activation_kernel<scalar_t, KERNEL<scalar_t>><<<grid, block, 0, stream>>>( \
126-
out.data_ptr<scalar_t>(), \
127-
input.data_ptr<scalar_t>(), \
128-
d); \
129-
});
108+
#define LAUNCH_ACTIVATION_KERNEL(KERNEL) \
109+
int d = input.size(-1); \
110+
int64_t num_tokens = input.numel() / d; \
111+
dim3 grid(num_tokens); \
112+
dim3 block(std::min(d, 1024)); \
113+
const at::cuda::OptionalCUDAGuard device_guard(device_of(input)); \
114+
const cudaStream_t stream = at::cuda::getCurrentCUDAStream(); \
115+
VLLM_DISPATCH_FLOATING_TYPES(input.scalar_type(), "activation_kernel", [&] { \
116+
vllm::activation_kernel<scalar_t, KERNEL<scalar_t>> \
117+
<<<grid, block, 0, stream>>>(out.data_ptr<scalar_t>(), \
118+
input.data_ptr<scalar_t>(), d); \
119+
});
130120

131121
namespace vllm {
132122

133-
template<typename T>
123+
template <typename T>
134124
__device__ __forceinline__ T gelu_new_kernel(const T& x) {
135-
const float x3 = (float) (x * x * x);
136-
const T t = (T) tanhf((T) (0.79788456f * (float) (x + (T) (0.044715f * x3))));
137-
return ((T) 0.5) * x * (((T) 1.0) + t);
125+
const float x3 = (float)(x * x * x);
126+
const T t = (T)tanhf((T)(0.79788456f * (float)(x + (T)(0.044715f * x3))));
127+
return ((T)0.5) * x * (((T)1.0) + t);
138128
}
139129

140-
template<typename T>
130+
template <typename T>
141131
__device__ __forceinline__ T gelu_fast_kernel(const T& x) {
142-
const float f = (float) x;
143-
const T t = (T) tanhf(((T) (f * 0.79788456f)) * (((T) 1.0) + (T) (0.044715f * f) * x));
144-
return ((T) 0.5) * x * (((T) 1.0) + t);
132+
const float f = (float)x;
133+
const T t =
134+
(T)tanhf(((T)(f * 0.79788456f)) * (((T)1.0) + (T)(0.044715f * f) * x));
135+
return ((T)0.5) * x * (((T)1.0) + t);
145136
}
146137

147-
} // namespace vllm
138+
} // namespace vllm
148139

149-
void gelu_new(
150-
torch::Tensor& out, // [..., d]
151-
torch::Tensor& input) // [..., d]
140+
void gelu_new(torch::Tensor& out, // [..., d]
141+
torch::Tensor& input) // [..., d]
152142
{
153143
LAUNCH_ACTIVATION_KERNEL(vllm::gelu_new_kernel);
154144
}
155145

156-
void gelu_fast(
157-
torch::Tensor& out, // [..., d]
158-
torch::Tensor& input) // [..., d]
146+
void gelu_fast(torch::Tensor& out, // [..., d]
147+
torch::Tensor& input) // [..., d]
159148
{
160149
LAUNCH_ACTIVATION_KERNEL(vllm::gelu_fast_kernel);
161150
}

csrc/attention/attention_generic.cuh

+10-9
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/*
2-
* Adapted from https://github.com/NVIDIA/FasterTransformer/blob/release/v5.3_tag/src/fastertransformer/kernels/decoder_masked_multihead_attention_utils.h
2+
* Adapted from
3+
* https://github.com/NVIDIA/FasterTransformer/blob/release/v5.3_tag/src/fastertransformer/kernels/decoder_masked_multihead_attention_utils.h
34
* Copyright (c) 2023, The vLLM team.
45
* Copyright (c) 2020-2023, NVIDIA CORPORATION. All rights reserved.
56
*
@@ -22,31 +23,31 @@
2223
namespace vllm {
2324

2425
// A vector type to store Q, K, V elements.
25-
template<typename T, int VEC_SIZE>
26+
template <typename T, int VEC_SIZE>
2627
struct Vec {};
2728

2829
// A vector type to store FP32 accumulators.
29-
template<typename T>
30+
template <typename T>
3031
struct FloatVec {};
3132

3233
// Template vector operations.
33-
template<typename Acc, typename A, typename B>
34+
template <typename Acc, typename A, typename B>
3435
inline __device__ Acc mul(A a, B b);
3536

36-
template<typename T>
37+
template <typename T>
3738
inline __device__ float sum(T v);
3839

39-
template<typename T>
40+
template <typename T>
4041
inline __device__ float dot(T a, T b) {
4142
return sum(mul<T, T, T>(a, b));
4243
}
4344

44-
template<typename A, typename T>
45+
template <typename A, typename T>
4546
inline __device__ float dot(T a, T b) {
4647
return sum(mul<A, T, T>(a, b));
4748
}
4849

49-
template<typename T>
50+
template <typename T>
5051
inline __device__ void zero(T& dst) {
5152
constexpr int WORDS = sizeof(T) / 4;
5253
union {
@@ -61,4 +62,4 @@ inline __device__ void zero(T& dst) {
6162
dst = tmp.raw;
6263
}
6364

64-
} // namespace vllm
65+
} // namespace vllm

0 commit comments

Comments
 (0)