Skip to content

Commit 7eb0cb4

Browse files
Revert "[Frontend] Factor out code for running uvicorn" (vllm-project#7012)
Co-authored-by: Robert Shaw <[email protected]>
1 parent a0dce93 commit 7eb0cb4

File tree

5 files changed

+75
-117
lines changed

5 files changed

+75
-117
lines changed

pyproject.toml

-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,6 @@ files = [
6060
"vllm/logging",
6161
"vllm/multimodal",
6262
"vllm/platforms",
63-
"vllm/server",
6463
"vllm/transformers_utils",
6564
"vllm/triton_utils",
6665
"vllm/usage",

vllm/entrypoints/api_server.py

+24-50
Original file line numberDiff line numberDiff line change
@@ -5,23 +5,21 @@
55
We are also not going to accept PRs modifying this file, please
66
change `vllm/entrypoints/openai/api_server.py` instead.
77
"""
8-
import asyncio
8+
99
import json
1010
import ssl
11-
from argparse import Namespace
12-
from typing import Any, AsyncGenerator, Optional
11+
from typing import AsyncGenerator
1312

13+
import uvicorn
1414
from fastapi import FastAPI, Request
1515
from fastapi.responses import JSONResponse, Response, StreamingResponse
1616

1717
from vllm.engine.arg_utils import AsyncEngineArgs
1818
from vllm.engine.async_llm_engine import AsyncLLMEngine
1919
from vllm.logger import init_logger
2020
from vllm.sampling_params import SamplingParams
21-
from vllm.server import serve_http
2221
from vllm.usage.usage_lib import UsageContext
2322
from vllm.utils import FlexibleArgumentParser, random_uuid
24-
from vllm.version import __version__ as VLLM_VERSION
2523

2624
logger = init_logger("vllm.entrypoints.api_server")
2725

@@ -83,50 +81,6 @@ async def stream_results() -> AsyncGenerator[bytes, None]:
8381
return JSONResponse(ret)
8482

8583

86-
def build_app(args: Namespace) -> FastAPI:
87-
global app
88-
89-
app.root_path = args.root_path
90-
return app
91-
92-
93-
async def init_app(
94-
args: Namespace,
95-
llm_engine: Optional[AsyncLLMEngine] = None,
96-
) -> FastAPI:
97-
app = build_app(args)
98-
99-
global engine
100-
101-
engine_args = AsyncEngineArgs.from_cli_args(args)
102-
engine = (llm_engine
103-
if llm_engine is not None else AsyncLLMEngine.from_engine_args(
104-
engine_args, usage_context=UsageContext.API_SERVER))
105-
106-
return app
107-
108-
109-
async def run_server(args: Namespace,
110-
llm_engine: Optional[AsyncLLMEngine] = None,
111-
**uvicorn_kwargs: Any) -> None:
112-
logger.info("vLLM API server version %s", VLLM_VERSION)
113-
logger.info("args: %s", args)
114-
115-
app = await init_app(args, llm_engine)
116-
await serve_http(
117-
app,
118-
host=args.host,
119-
port=args.port,
120-
log_level=args.log_level,
121-
timeout_keep_alive=TIMEOUT_KEEP_ALIVE,
122-
ssl_keyfile=args.ssl_keyfile,
123-
ssl_certfile=args.ssl_certfile,
124-
ssl_ca_certs=args.ssl_ca_certs,
125-
ssl_cert_reqs=args.ssl_cert_reqs,
126-
**uvicorn_kwargs,
127-
)
128-
129-
13084
if __name__ == "__main__":
13185
parser = FlexibleArgumentParser()
13286
parser.add_argument("--host", type=str, default=None)
@@ -151,5 +105,25 @@ async def run_server(args: Namespace,
151105
parser.add_argument("--log-level", type=str, default="debug")
152106
parser = AsyncEngineArgs.add_cli_args(parser)
153107
args = parser.parse_args()
108+
engine_args = AsyncEngineArgs.from_cli_args(args)
109+
engine = AsyncLLMEngine.from_engine_args(
110+
engine_args, usage_context=UsageContext.API_SERVER)
111+
112+
app.root_path = args.root_path
154113

155-
asyncio.run(run_server(args))
114+
logger.info("Available routes are:")
115+
for route in app.routes:
116+
if not hasattr(route, 'methods'):
117+
continue
118+
methods = ', '.join(route.methods)
119+
logger.info("Route: %s, Methods: %s", route.path, methods)
120+
121+
uvicorn.run(app,
122+
host=args.host,
123+
port=args.port,
124+
log_level=args.log_level,
125+
timeout_keep_alive=TIMEOUT_KEEP_ALIVE,
126+
ssl_keyfile=args.ssl_keyfile,
127+
ssl_certfile=args.ssl_certfile,
128+
ssl_ca_certs=args.ssl_ca_certs,
129+
ssl_cert_reqs=args.ssl_cert_reqs)

vllm/entrypoints/openai/api_server.py

+51-21
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,14 @@
22
import importlib
33
import inspect
44
import re
5-
from argparse import Namespace
5+
import signal
66
from contextlib import asynccontextmanager
77
from http import HTTPStatus
8-
from typing import Any, Optional, Set
8+
from typing import Optional, Set
99

10-
from fastapi import APIRouter, FastAPI, Request
10+
import fastapi
11+
import uvicorn
12+
from fastapi import APIRouter, Request
1113
from fastapi.exceptions import RequestValidationError
1214
from fastapi.middleware.cors import CORSMiddleware
1315
from fastapi.responses import JSONResponse, Response, StreamingResponse
@@ -36,7 +38,6 @@
3638
from vllm.entrypoints.openai.serving_tokenization import (
3739
OpenAIServingTokenization)
3840
from vllm.logger import init_logger
39-
from vllm.server import serve_http
4041
from vllm.usage.usage_lib import UsageContext
4142
from vllm.utils import FlexibleArgumentParser
4243
from vllm.version import __version__ as VLLM_VERSION
@@ -56,7 +57,7 @@
5657

5758

5859
@asynccontextmanager
59-
async def lifespan(app: FastAPI):
60+
async def lifespan(app: fastapi.FastAPI):
6061

6162
async def _force_log():
6263
while True:
@@ -74,7 +75,7 @@ async def _force_log():
7475
router = APIRouter()
7576

7677

77-
def mount_metrics(app: FastAPI):
78+
def mount_metrics(app: fastapi.FastAPI):
7879
# Add prometheus asgi middleware to route /metrics requests
7980
metrics_route = Mount("/metrics", make_asgi_app())
8081
# Workaround for 307 Redirect for /metrics
@@ -164,8 +165,8 @@ async def create_embedding(request: EmbeddingRequest, raw_request: Request):
164165
return JSONResponse(content=generator.model_dump())
165166

166167

167-
def build_app(args: Namespace) -> FastAPI:
168-
app = FastAPI(lifespan=lifespan)
168+
def build_app(args):
169+
app = fastapi.FastAPI(lifespan=lifespan)
169170
app.include_router(router)
170171
app.root_path = args.root_path
171172

@@ -213,8 +214,11 @@ async def authentication(request: Request, call_next):
213214
return app
214215

215216

216-
async def init_app(args: Namespace,
217-
llm_engine: Optional[AsyncLLMEngine] = None) -> FastAPI:
217+
async def build_server(
218+
args,
219+
llm_engine: Optional[AsyncLLMEngine] = None,
220+
**uvicorn_kwargs,
221+
) -> uvicorn.Server:
218222
app = build_app(args)
219223

220224
if args.served_model_name is not None:
@@ -277,17 +281,14 @@ async def init_app(args: Namespace,
277281
)
278282
app.root_path = args.root_path
279283

280-
return app
281-
282-
283-
async def run_server(args: Namespace,
284-
llm_engine: Optional[AsyncLLMEngine] = None,
285-
**uvicorn_kwargs: Any) -> None:
286-
logger.info("vLLM API server version %s", VLLM_VERSION)
287-
logger.info("args: %s", args)
284+
logger.info("Available routes are:")
285+
for route in app.routes:
286+
if not hasattr(route, 'methods'):
287+
continue
288+
methods = ', '.join(route.methods)
289+
logger.info("Route: %s, Methods: %s", route.path, methods)
288290

289-
app = await init_app(args, llm_engine)
290-
await serve_http(
291+
config = uvicorn.Config(
291292
app,
292293
host=args.host,
293294
port=args.port,
@@ -300,6 +301,36 @@ async def run_server(args: Namespace,
300301
**uvicorn_kwargs,
301302
)
302303

304+
return uvicorn.Server(config)
305+
306+
307+
async def run_server(args, llm_engine=None, **uvicorn_kwargs) -> None:
308+
logger.info("vLLM API server version %s", VLLM_VERSION)
309+
logger.info("args: %s", args)
310+
311+
server = await build_server(
312+
args,
313+
llm_engine,
314+
**uvicorn_kwargs,
315+
)
316+
317+
loop = asyncio.get_running_loop()
318+
319+
server_task = loop.create_task(server.serve())
320+
321+
def signal_handler() -> None:
322+
# prevents the uvicorn signal handler to exit early
323+
server_task.cancel()
324+
325+
loop.add_signal_handler(signal.SIGINT, signal_handler)
326+
loop.add_signal_handler(signal.SIGTERM, signal_handler)
327+
328+
try:
329+
await server_task
330+
except asyncio.CancelledError:
331+
print("Gracefully stopping http server")
332+
await server.shutdown()
333+
303334

304335
if __name__ == "__main__":
305336
# NOTE(simon):
@@ -308,5 +339,4 @@ async def run_server(args: Namespace,
308339
description="vLLM OpenAI-Compatible RESTful API server.")
309340
parser = make_arg_parser(parser)
310341
args = parser.parse_args()
311-
312342
asyncio.run(run_server(args))

vllm/server/__init__.py

-3
This file was deleted.

vllm/server/launch.py

-42
This file was deleted.

0 commit comments

Comments
 (0)