generated from guardrails-ai/validator-template
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathserve
57 lines (43 loc) · 1.59 KB
/
serve
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/usr/bin/env python
import multiprocessing
import os
import signal
import subprocess
import sys
import spacy
cpu_count = multiprocessing.cpu_count()
default_worker_count = max(cpu_count // 2,1)
model_server_timeout = os.environ.get('MODEL_SERVER_TIMEOUT', 60)
model_server_workers = int(os.environ.get('MODEL_SERVER_WORKERS', default_worker_count))
def sigterm_handler(gunicorn_pid):
try:
os.kill(gunicorn_pid, signal.SIGTERM)
except OSError:
pass
sys.exit(0)
def save_and_load_model():
model = "en_core_web_trf"
if not spacy.util.is_package(model):
print(
f"Spacy model {model} not installed. "
"Download should start now and take a few minutes."
)
spacy.require_gpu()
spacy.cli.download(model) # type: ignore
def start_server():
print(f'Starting the inference server with {model_server_workers} workers.')
save_and_load_model()
# Start Gunicorn to serve the FastAPI app
gunicorn = subprocess.Popen(['gunicorn',
'--timeout', str(model_server_timeout),
'-k', 'uvicorn.workers.UvicornWorker',
'-b', '0.0.0.0:8080',
'-w', str(model_server_workers),
'app:app'])
signal.signal(signal.SIGTERM, lambda a, b: sigterm_handler(gunicorn.pid))
# Wait for the Gunicorn process to exit
gunicorn.wait()
print('Inference server exiting')
# The main routine just invokes the start function.
if __name__ == '__main__':
start_server()