Skip to content

Commit 022c413

Browse files
committed
feat: added LMStudio plugin
1 parent a4d2f92 commit 022c413

File tree

6 files changed

+382
-56
lines changed

6 files changed

+382
-56
lines changed

docker-compose.dev.yml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ services:
3131
depends_on:
3232
postgres:
3333
condition: service_healthy
34+
nilauth-credit-server:
35+
condition: service_healthy
3436
environment:
3537
- POSTGRES_DB=${POSTGRES_DB_NUC}
3638
volumes:
@@ -104,7 +106,11 @@ services:
104106
depends_on:
105107
nilauth-postgres:
106108
condition: service_healthy
107-
108-
109+
healthcheck:
110+
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3000/health"]
111+
interval: 30s
112+
retries: 3
113+
start_period: 15s
114+
timeout: 10s
109115
volumes:
110116
postgres_data:

docker/README.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,17 @@ docker run -d --name etcd-server \
3434
--env ETCD_ADVERTISE_CLIENT_URLS=http://etcd-server:2379 \
3535
bitnami/etcd:latest
3636
```
37+
38+
## Announcing LMStudio Models
39+
40+
LMStudio can run on the host at `localhost:1234` while the stack runs inside Docker. Build the announcer image and bring it up alongside the core services:
41+
42+
```sh
43+
docker build -t nilai/lmstudio-announcer:latest -f docker/lmstudio-announcer.Dockerfile .
44+
docker compose -f docker-compose.yml \
45+
-f docker-compose.dev.yml \
46+
-f docker/compose/docker-compose.lmstudio.yml \
47+
up -d lmstudio_announcer
48+
```
49+
50+
The announcer registers every model returned by `http://host.docker.internal:1234/v1/models` in etcd so that `nilai-api` can route chat requests to LMStudio. Override defaults with environment variables such as `LMSTUDIO_MODEL_IDS`, `LMSTUDIO_TOOL_SUPPORT_MODELS`, or `LMSTUDIO_MULTIMODAL_MODELS` inside the compose override.
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
services:
2+
lmstudio_announcer:
3+
image: nilai/lmstudio-announcer:latest
4+
container_name: nilai-lmstudio-announcer
5+
restart: unless-stopped
6+
depends_on:
7+
etcd:
8+
condition: service_healthy
9+
environment:
10+
- SVC_HOST=host.docker.internal
11+
- SVC_PORT=1234
12+
- ETCD_HOST=etcd
13+
- ETCD_PORT=2379
14+
- LMSTUDIO_SUPPORTED_FEATURES=chat_completion
15+
extra_hosts:
16+
- "host.docker.internal:host-gateway"
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
FROM python:3.12-slim
2+
3+
ENV PYTHONUNBUFFERED=1 \
4+
PIP_NO_CACHE_DIR=1
5+
6+
WORKDIR /app
7+
8+
COPY --link packages/nilai-common /app/packages/nilai-common
9+
COPY --link nilai-models /app/nilai-models
10+
11+
RUN pip install --upgrade pip && \
12+
pip install /app/packages/nilai-common /app/nilai-models
13+
14+
ENTRYPOINT ["python", "-m", "nilai_models.lmstudio_announcer"]
Lines changed: 65 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,45 +1,44 @@
11
# nilai/models/model.py
22
import asyncio
3-
import signal
43
import logging
4+
import signal
5+
56
import httpx
67

7-
from nilai_common import ( # Model service discovery and host settings
8-
SETTINGS,
8+
from nilai_common import (
99
MODEL_SETTINGS,
10-
ModelServiceDiscovery,
10+
SETTINGS,
1111
ModelEndpoint,
1212
ModelMetadata,
13+
ModelServiceDiscovery,
1314
)
1415

1516
logger = logging.getLogger(__name__)
1617

1718

1819
async def get_metadata():
19-
"""Fetch model metadata from model
20-
service and return as ModelMetadata object"""
20+
"""Fetch model metadata from model service and return as ModelMetadata object."""
2121
current_retries = 0
2222
while True:
2323
url = None
2424
try:
2525
url = f"http://{SETTINGS.host}:{SETTINGS.port}/v1/models"
26-
# Request model metadata from localhost:8000/v1/models
2726
async with httpx.AsyncClient() as client:
2827
response = await client.get(url)
2928
response.raise_for_status()
3029
response_data = response.json()
3130
model_name = response_data["data"][0]["id"]
3231
return ModelMetadata(
33-
id=model_name, # Unique identifier
34-
name=model_name, # Human-readable name
35-
version="1.0", # Model version
32+
id=model_name,
33+
name=model_name,
34+
version="1.0",
3635
description="",
37-
author="", # Model creators
38-
license="Apache 2.0", # Usage license
39-
source=f"https://huggingface.co/{model_name}", # Model source
40-
supported_features=["chat_completion"], # Capabilities
41-
tool_support=SETTINGS.tool_support, # Tool support
42-
multimodal_support=SETTINGS.multimodal_support, # Multimodal support
36+
author="",
37+
license="Apache 2.0",
38+
source=f"https://huggingface.co/{model_name}",
39+
supported_features=["chat_completion"],
40+
tool_support=SETTINGS.tool_support,
41+
multimodal_support=SETTINGS.multimodal_support,
4342
)
4443

4544
except Exception as e:
@@ -49,16 +48,16 @@ async def get_metadata():
4948
logger.warning(f"Failed to fetch model metadata from {url}: {e}")
5049
current_retries += 1
5150
if (
52-
MODEL_SETTINGS.num_retries
53-
!= -1 # If num_retries == -1 then we do infinite number of retries
51+
MODEL_SETTINGS.num_retries != -1
5452
and current_retries >= MODEL_SETTINGS.num_retries
5553
):
5654
raise e
5755
await asyncio.sleep(MODEL_SETTINGS.timeout)
5856

5957

6058
async def run_service(discovery_service, model_endpoint):
61-
"""Runs the model service and keeps it alive"""
59+
"""Register model with discovery service and keep it alive."""
60+
lease = None
6261
try:
6362
logger.info(f"Registering model: {model_endpoint.metadata.id}")
6463
lease = await discovery_service.register_model(model_endpoint, prefix="/models")
@@ -73,50 +72,62 @@ async def run_service(discovery_service, model_endpoint):
7372
logger.error(f"Service error: {e}")
7473
raise
7574
finally:
76-
try:
77-
await discovery_service.unregister_model(model_endpoint.metadata.id)
78-
logger.info(f"Model unregistered: {model_endpoint.metadata.id}")
79-
except Exception as e:
80-
logger.error(f"Error unregistering model: {e}")
75+
if lease:
76+
try:
77+
await discovery_service.unregister_model(model_endpoint.metadata.id)
78+
logger.info(f"Model unregistered: {model_endpoint.metadata.id}")
79+
except Exception as e:
80+
logger.error(f"Error unregistering model: {e}")
8181

8282

8383
async def main():
84-
discovery_service = None
85-
model_endpoint = None
86-
87-
try:
88-
# Initialize discovery service
89-
discovery_service = ModelServiceDiscovery(
90-
host=SETTINGS.etcd_host, port=SETTINGS.etcd_port
91-
)
92-
93-
metadata = await get_metadata()
94-
model_endpoint = ModelEndpoint(
95-
url=f"http://{SETTINGS.host}:{SETTINGS.port}", metadata=metadata
96-
)
84+
"""Main entry point for model daemon."""
85+
logging.basicConfig(level=logging.INFO)
9786

98-
# Setup signal handlers
99-
loop = asyncio.get_running_loop()
100-
for sig in (signal.SIGTERM, signal.SIGINT):
101-
loop.add_signal_handler(sig, lambda: asyncio.create_task(shutdown()))
87+
# Initialize discovery service
88+
discovery_service = ModelServiceDiscovery(
89+
host=SETTINGS.etcd_host, port=SETTINGS.etcd_port
90+
)
10291

103-
# Run service
104-
await run_service(discovery_service, model_endpoint)
92+
# Fetch metadata and create endpoint
93+
metadata = await get_metadata()
94+
model_endpoint = ModelEndpoint(
95+
url=f"http://{SETTINGS.host}:{SETTINGS.port}", metadata=metadata
96+
)
10597

106-
except Exception as e:
107-
logger.error(f"Failed to initialize model service: {e}")
108-
raise
98+
# Create service task
99+
service_task = asyncio.create_task(run_service(discovery_service, model_endpoint))
109100

110-
111-
async def shutdown():
112-
"""Cleanup and shutdown"""
113-
tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()]
114-
[task.cancel() for task in tasks]
115-
await asyncio.gather(*tasks, return_exceptions=True)
101+
# Setup signal handling
102+
stop_event = asyncio.Event()
116103
loop = asyncio.get_running_loop()
117-
loop.stop()
104+
for sig in (signal.SIGTERM, signal.SIGINT):
105+
try:
106+
loop.add_signal_handler(sig, stop_event.set)
107+
except NotImplementedError:
108+
# Windows doesn't support add_signal_handler
109+
pass
110+
111+
# Wait for either shutdown signal or service completion
112+
wait_task = asyncio.create_task(stop_event.wait())
113+
114+
done, _ = await asyncio.wait(
115+
{wait_task, service_task}, return_when=asyncio.FIRST_COMPLETED
116+
)
117+
118+
# Handle shutdown
119+
if wait_task in done:
120+
logger.info("Stop signal received; shutting down daemon")
121+
service_task.cancel()
122+
try:
123+
await service_task
124+
except asyncio.CancelledError:
125+
pass
126+
else:
127+
# Service completed (possibly with error)
128+
wait_task.cancel()
129+
await service_task # Re-raise any exception
118130

119131

120132
if __name__ == "__main__":
121-
logging.basicConfig(level=logging.INFO)
122133
asyncio.run(main())

0 commit comments

Comments
 (0)