-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdocker-compose.yaml
109 lines (103 loc) · 2.63 KB
/
docker-compose.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
services:
trt-llm-backend:
build:
context: TensorRT-LLM
target: release
args:
- BASE_IMAGE=nvcr.io/nvidia/pytorch
- BASE_TAG=23.10-py3
dockerfile: docker/Dockerfile.multi
image: tensorrt_llm:trt-llm-backend
triton-backend:
build:
context: TensorRT-LLM
target: release
args:
- BASE_IMAGE=nvcr.io/nvidia/tritonserver
- BASE_TAG=23.10-pyt-python-py3
dockerfile: docker/Dockerfile.multi
image: tensorrt_llm:triton-backend
triton-trt-llm:
build:
context: docker
dockerfile: Dockerfile
target: release
args:
- BASE_IMAGE_TAG=tensorrt_llm:triton-backend
image: tensorrt_llm:triton-trt-llm
download:
image: tensorrt_llm:trt-llm-backend
working_dir: /app/tensorrt_llm/examples/enc_dec
volumes:
- type: bind
source: TensorRT-LLM/examples/enc_dec/
target: /app/tensorrt_llm/examples/enc_dec/
entrypoint: ["python3", "download.py"]
build:
image: tensorrt_llm:trt-llm-backend
working_dir: /app/tensorrt_llm/examples/enc_dec
runtime: nvidia
volumes:
- type: bind
source: TensorRT-LLM/examples/enc_dec/
target: /app/tensorrt_llm/examples/enc_dec/
entrypoint:
[
"python3",
"build.py",
"--model_dir=./models/",
"--use_bert_attention_plugin",
"--use_gpt_attention_plugin",
"--dtype=float16",
"--max_beam_width=3",
]
triton-server:
image: tensorrt_llm:triton-backend
runtime: nvidia
environment:
- PYTHONPATH=/models/t5/1
ports:
- 8000:8000
- 8001:8001
- 8002:8002
env_file:
- .env
volumes:
- type: bind
source: cache
target: ${TRANSFORMERS_CACHE}
- type: bind
source: TensorRT-LLM/examples/enc_dec/trt_engines
target: /trt_engines
- type: bind
source: models
target: /models
entrypoint:
["tritonserver", "--model-repository=/models", "--log-verbose=0"]
triton-client:
build:
context: docker
target: devel
args:
- BASE_IMAGE_TAG=nvcr.io/nvidia/tritonserver:23.10-py3-sdk
image: tensorrt_llm:triton-trt-llm-client
ports:
- 8888:8888
env_file:
- .env
working_dir: /workspace
volumes:
- type: bind
source: workspace
target: /workspace
entrypoint:
[
"jupyter",
"lab",
"--ServerApp.ip=0.0.0.0",
"--ServerApp.port=8888",
"--ServerApp.allow_root=True",
"--ServerApp.token=''",
"--ServerApp.password=''",
"--no-browser",
]