6
6
7
7
import pytest
8
8
9
+ from vllm .config import LoadFormat
9
10
from vllm .engine .arg_utils import AsyncEngineArgs , EngineArgs
10
11
from vllm .engine .async_llm_engine import AsyncLLMEngine
11
12
from vllm .engine .llm_engine import LLMEngine
12
13
from vllm .executor .uniproc_executor import UniProcExecutor
13
14
from vllm .sampling_params import SamplingParams
14
15
16
+ from ..conftest import MODEL_WEIGHTS_S3_BUCKET
17
+
18
+ RUNAI_STREAMER_LOAD_FORMAT = LoadFormat .RUNAI_STREAMER
19
+
15
20
16
21
class Mock :
17
22
...
@@ -33,10 +38,11 @@ def collective_rpc(self,
33
38
CustomUniExecutorAsync = CustomUniExecutor
34
39
35
40
36
- @pytest .mark .parametrize ("model" , ["facebook/opt-125m " ])
41
+ @pytest .mark .parametrize ("model" , [f" { MODEL_WEIGHTS_S3_BUCKET } /distilgpt2 " ])
37
42
def test_custom_executor_type_checking (model ):
38
43
with pytest .raises (ValueError ):
39
44
engine_args = EngineArgs (model = model ,
45
+ load_format = RUNAI_STREAMER_LOAD_FORMAT ,
40
46
distributed_executor_backend = Mock )
41
47
LLMEngine .from_engine_args (engine_args )
42
48
with pytest .raises (ValueError ):
@@ -45,7 +51,7 @@ def test_custom_executor_type_checking(model):
45
51
AsyncLLMEngine .from_engine_args (engine_args )
46
52
47
53
48
- @pytest .mark .parametrize ("model" , ["facebook/opt-125m " ])
54
+ @pytest .mark .parametrize ("model" , [f" { MODEL_WEIGHTS_S3_BUCKET } /distilgpt2 " ])
49
55
def test_custom_executor (model , tmp_path ):
50
56
cwd = os .path .abspath ("." )
51
57
os .chdir (tmp_path )
@@ -54,6 +60,7 @@ def test_custom_executor(model, tmp_path):
54
60
55
61
engine_args = EngineArgs (
56
62
model = model ,
63
+ load_format = RUNAI_STREAMER_LOAD_FORMAT ,
57
64
distributed_executor_backend = CustomUniExecutor ,
58
65
enforce_eager = True , # reduce test time
59
66
)
@@ -68,7 +75,7 @@ def test_custom_executor(model, tmp_path):
68
75
os .chdir (cwd )
69
76
70
77
71
- @pytest .mark .parametrize ("model" , ["facebook/opt-125m " ])
78
+ @pytest .mark .parametrize ("model" , [f" { MODEL_WEIGHTS_S3_BUCKET } /distilgpt2 " ])
72
79
def test_custom_executor_async (model , tmp_path ):
73
80
cwd = os .path .abspath ("." )
74
81
os .chdir (tmp_path )
@@ -77,6 +84,7 @@ def test_custom_executor_async(model, tmp_path):
77
84
78
85
engine_args = AsyncEngineArgs (
79
86
model = model ,
87
+ load_format = RUNAI_STREAMER_LOAD_FORMAT ,
80
88
distributed_executor_backend = CustomUniExecutorAsync ,
81
89
enforce_eager = True , # reduce test time
82
90
)
@@ -95,7 +103,7 @@ async def t():
95
103
os .chdir (cwd )
96
104
97
105
98
- @pytest .mark .parametrize ("model" , ["facebook/opt-125m " ])
106
+ @pytest .mark .parametrize ("model" , [f" { MODEL_WEIGHTS_S3_BUCKET } /distilgpt2 " ])
99
107
def test_respect_ray (model ):
100
108
# even for TP=1 and PP=1,
101
109
# if users specify ray, we should use ray.
@@ -104,6 +112,7 @@ def test_respect_ray(model):
104
112
engine_args = EngineArgs (
105
113
model = model ,
106
114
distributed_executor_backend = "ray" ,
115
+ load_format = RUNAI_STREAMER_LOAD_FORMAT ,
107
116
enforce_eager = True , # reduce test time
108
117
)
109
118
engine = LLMEngine .from_engine_args (engine_args )
0 commit comments