-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdocker-compose.yaml
More file actions
42 lines (38 loc) · 969 Bytes
/
docker-compose.yaml
File metadata and controls
42 lines (38 loc) · 969 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
services:
proxy:
build: .
ports:
- "8081:8081"
environment:
BATCH_PROXY__INFERENCE_API__TARGET_URL: "http://inference-api"
BATCH_PROXY__BATCH__MAX_BATCH_SIZE: 8
RUST_LOG: "debug"
inference-api:
image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.8"
ports:
- "8080:80"
volumes:
- "./text-embeddings-data:/data"
environment:
MODEL_ID: "nomic-ai/nomic-embed-text-v1.5"
profiles:
- cpu
inference-api-gpu:
image: "ghcr.io/huggingface/text-embeddings-inference:turing-1.8"
ports:
- "8080:80"
volumes:
- "./text-embeddings-data:/data"
environment:
MODEL_ID: "nomic-ai/nomic-embed-text-v1.5"
DTYPE: "float16"
MAX_BATCH_TOKENS: 8192
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
profiles:
- gpu