Skip to content

Commit 3f5e3f6

Browse files
authored
Merge branch 'main' into fix_encoder_only_models
2 parents 0c767bd + 1a0b157 commit 3f5e3f6

File tree

217 files changed

+3366
-875
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

217 files changed

+3366
-875
lines changed

.buildkite/test-pipeline.yaml

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ steps:
5757
- pytest -v -s -m 'not cpu_test' multimodal
5858
- pytest -v -s utils_
5959

60-
- label: Async Engine, Inputs, Utils, Worker Test (CPU) # 4 mins
60+
- label: Async Engine, Inputs, Utils, Worker, Config Test (CPU) # 4 mins
6161
timeout_in_minutes: 10
6262
source_file_dependencies:
6363
- vllm/
@@ -66,13 +66,15 @@ steps:
6666
- tests/multimodal
6767
- tests/standalone_tests/lazy_imports.py
6868
- tests/transformers_utils
69+
- tests/config
6970
no_gpu: true
7071
commands:
7172
- python3 standalone_tests/lazy_imports.py
7273
- pytest -v -s test_inputs.py
7374
- pytest -v -s test_outputs.py
7475
- pytest -v -s -m 'cpu_test' multimodal
7576
- pytest -v -s transformers_utils
77+
- pytest -v -s config
7678

7779
- label: Python-only Installation Test # 10min
7880
timeout_in_minutes: 20
@@ -872,12 +874,12 @@ steps:
872874
optional: true
873875
commands:
874876
- pip install --upgrade git+https://github.com/huggingface/transformers
875-
- pytest -v -s tests/models/test_initialization.py
877+
- pytest -v -s tests/models/test_initialization.py -k 'not (Gemma3 or ModernBert or Qwen2_5_VL or Qwen2_5vl or Qwen2VL or TransformersMultiModalEmbeddingModel or TransformersMultiModalForSequenceClassification or Ultravox or Phi4Multimodal or LlavaNextVideo or MiniCPMO or Lfm2Moe or PaliGemma or RobertaForSequenceClassification or Ovis2_5 or Fuyu or DeepseekOCR or KimiVL)'
876878
- pytest -v -s tests/models/test_transformers.py
877-
- pytest -v -s tests/models/multimodal/processing/
878-
- pytest -v -s tests/models/multimodal/test_mapping.py
879+
# - pytest -v -s tests/models/multimodal/processing/
880+
- pytest -v -s tests/models/multimodal/test_mapping.py -k 'not (Gemma3 or Qwen2VL or Qwen2_5_VL)'
879881
- python3 examples/offline_inference/basic/chat.py
880-
- python3 examples/offline_inference/vision_language.py --model-type qwen2_5_vl
882+
# - python3 examples/offline_inference/vision_language.py --model-type qwen2_5_vl
881883
# Whisper needs spawn method to avoid deadlock
882884
- VLLM_WORKER_MULTIPROC_METHOD=spawn python3 examples/offline_inference/audio_language.py --model-type whisper
883885

CMakeLists.txt

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,13 @@ set(PYTHON_SUPPORTED_VERSIONS "3.10" "3.11" "3.12" "3.13")
3939
# Supported AMD GPU architectures.
4040
set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1101;gfx1200;gfx1201;gfx1150;gfx1151")
4141

42+
# ROCm installation prefix. Default to /opt/rocm but allow override via
43+
# -DROCM_PATH=/your/rocm/path when invoking cmake.
44+
if(NOT DEFINED ROCM_PATH)
45+
set(ROCM_PATH "/opt/rocm" CACHE PATH "ROCm installation prefix")
46+
else()
47+
set(ROCM_PATH ${ROCM_PATH} CACHE PATH "ROCm installation prefix" FORCE)
48+
endif()
4249
#
4350
# Supported/expected torch versions for CUDA/ROCm.
4451
#
@@ -237,10 +244,27 @@ set_gencode_flags_for_srcs(
237244
SRCS "${VLLM_CUMEM_EXT_SRC}"
238245
CUDA_ARCHS "${CUDA_ARCHS}")
239246

240-
if(VLLM_GPU_LANG STREQUAL "CUDA")
247+
if(VLLM_GPU_LANG STREQUAL "CUDA" OR VLLM_GPU_LANG STREQUAL "HIP")
241248
message(STATUS "Enabling cumem allocator extension.")
242-
# link against cuda driver library
243-
list(APPEND CUMEM_LIBS CUDA::cuda_driver)
249+
if(VLLM_GPU_LANG STREQUAL "CUDA")
250+
# link against cuda driver library
251+
list(APPEND CUMEM_LIBS CUDA::cuda_driver)
252+
else()
253+
# link against rocm driver library. Prefer an absolute path to
254+
# libamdhip64.so inside ${ROCM_PATH}/lib if available, otherwise fall
255+
# back to linking by name "amdhip64".
256+
find_library(AMDHIP64_LIB
257+
NAMES amdhip64 libamdhip64.so
258+
PATHS ${ROCM_PATH}/lib
259+
NO_DEFAULT_PATH)
260+
if(AMDHIP64_LIB)
261+
message(STATUS "Found libamdhip64 at ${AMDHIP64_LIB}")
262+
list(APPEND CUMEM_LIBS ${AMDHIP64_LIB})
263+
else()
264+
message(WARNING "libamdhip64 not found in ${ROCM_PATH}/lib; falling back to linking 'amdhip64' by name")
265+
list(APPEND CUMEM_LIBS amdhip64)
266+
endif()
267+
endif()
244268
define_extension_target(
245269
cumem_allocator
246270
DESTINATION vllm
@@ -331,7 +355,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
331355
# Keep building Marlin for 9.0 as there are some group sizes and shapes that
332356
# are not supported by Machete yet.
333357
# 9.0 for latest bf16 atomicAdd PTX
334-
cuda_archs_loose_intersection(MARLIN_ARCHS "8.0;8.7;9.0+PTX" "${CUDA_ARCHS}")
358+
cuda_archs_loose_intersection(MARLIN_ARCHS "8.0+PTX;9.0+PTX" "${CUDA_ARCHS}")
335359
if (MARLIN_ARCHS)
336360

337361
#
@@ -915,7 +939,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
915939

916940
list(APPEND VLLM_MOE_EXT_SRC "${VLLM_MOE_WNA16_SRC}")
917941
# 9.0 for latest bf16 atomicAdd PTX
918-
cuda_archs_loose_intersection(MARLIN_MOE_ARCHS "8.0;8.7;9.0+PTX" "${CUDA_ARCHS}")
942+
cuda_archs_loose_intersection(MARLIN_MOE_ARCHS "8.0+PTX;9.0+PTX" "${CUDA_ARCHS}")
919943
if (MARLIN_MOE_ARCHS)
920944

921945
#

0 commit comments

Comments
 (0)