Skip to content

Dev2 #13

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 32 commits into from
May 24, 2025
Merged

Dev2 #13

Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
f559beb
enable funcspec update
inferx-net Apr 29, 2025
dbb3af6
add delete model function in dashboard
inferx-net Apr 29, 2025
cf06ec6
add admin func
inferx-net Apr 30, 2025
fc8fea2
minor change:
inferx-net May 1, 2025
a300b8c
clean admin page
inferx-net May 1, 2025
f190684
add links in admin page
inferx-net May 1, 2025
aa053ad
update release container version
inferx-net May 1, 2025
ee7d1f8
enable k8s deployment#1
inferx-net May 3, 2025
1a4f63b
db port update
inferx-net May 3, 2025
f6d24f3
add node config
inferx-net May 3, 2025
e6bd91d
use cluster ip for etcd
inferx-net May 3, 2025
ca076a1
use container network for inferx_one and inferx_dashboard
inferx-net May 4, 2025
10d261f
expose keycloak and inferx_one with ingress
inferx-net May 4, 2025
28cd035
enable blobstore for k3s
inferx-net May 4, 2025
8a7f5ad
add spdk yaml
inferx-net May 4, 2025
b7f6369
add non-blob config
inferx-net May 5, 2025
e7868b4
non-blob inferx-one yaml
inferx-net May 5, 2025
e46771d
enable nodeagent/scheduler/statesvc
inferx-net May 6, 2025
71869c5
add nodeagent.yaml
inferx-net May 7, 2025
83d0ba9
add podip in docker compose
inferx-net May 8, 2025
2f546b8
reenable keycloak localhost access
inferx-net May 8, 2025
0acf1f3
minor fix
inferx-net May 8, 2025
01eb57d
minor fix
inferx-net May 8, 2025
19b2c43
fix func.html openai restapi token handle bug
inferx-net May 10, 2025
f925d12
code clean
inferx-net May 10, 2025
d536c2e
update reosurce allocation
inferx-net May 10, 2025
63e9d52
fix nodename bug
inferx-net May 13, 2025
5ac5d35
update yaml
inferx-net May 14, 2025
51d9911
enable second nodeagent
inferx-net May 14, 2025
f4fcc1c
update runtime to vllm 7.3
inferx-net May 22, 2025
083451a
move test website to /demo
inferx-net May 23, 2025
e6df608
expose https through port 8443
inferx-net May 24, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 42 additions & 20 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
ARCH := ${shell uname -m}

LOCAL_IP=${hostname -I | awk '{print $$1}' | xargs}
VERSION := v0.1.1
NODE_NAME=${shell hostname}

all: ctl dash spdk runmodel

Expand All @@ -14,52 +14,53 @@ dash:
-rm ./target/dashboard/* -rf
cp ./dashboard/* ./target/dashboard -rL
cp ./deployment/dashboard.Dockerfile ./target/dashboard/Dockerfile
-sudo docker image rm inferx/inferx_dashboard:v0.1.0
sudo docker build -t inferx/inferx_dashboard:v0.1.0 ./target/dashboard
# sudo docker push inferx/inferx_dashboard:v0.1.0
-sudo docker image rm inferx/inferx_dashboard:$(VERSION)
sudo docker build -t inferx/inferx_dashboard:$(VERSION) ./target/dashboard

pushdash:
# sudo docker login -u inferx
sudo docker tag inferx/inferx_dashboard:v0.1.0 inferx/inferx_dashboard:v0.1.0
sudo docker push inferx/inferx_dashboard:v0.1.0
sudo docker tag inferx/inferx_dashboard:$(VERSION) inferx/inferx_dashboard:$(VERSION)
sudo docker push inferx/inferx_dashboard:$(VERSION)

runmodel:
mkdir -p ./target/runmodel
cp ./script/run_model.py ./target/runmodel
cp ./script/run_llava.py ./target/runmodel
cp ./script/run_stablediffusion.py ./target/runmodel
cp ./deployment/vllm-opai.Dockerfile ./target/runmodel/Dockerfile
-sudo docker image rm vllm-openai-upgraded:v0.1.0
sudo docker build -t vllm-openai-upgraded:v0.1.0 ./target/runmodel
-sudo docker image rm vllm-openai-upgraded:$(VERSION)
sudo docker build -t vllm-openai-upgraded:$(VERSION) ./target/runmodel

spdk:
mkdir -p ./target/spdk
-rm ./target/spdk/* -rf
cp ./deployment/spdk.Dockerfile ./target/spdk/Dockerfile
-sudo docker image rm inferx/spdk-container:v0.1.0
sudo docker build -t inferx/spdk-container:v0.1.0 ./target/spdk
-sudo docker image rm inferx/spdk-container:$(VERSION)
sudo docker build -t inferx/spdk-container:$(VERSION) ./target/spdk

spdk2:
mkdir -p ./target/spdk
-rm ./target/spdk/* -rf
cp ./deployment/spdk2.Dockerfile ./target/spdk/Dockerfile
cp ./deployment/spdk.script ./target/spdk/entrypoint.sh
-sudo docker image rm inferx/spdk-container2:v0.1.0
sudo docker build -t inferx/spdk-container2:v0.1.0 ./target/spdk
-sudo docker image rm inferx/spdk-container2:$(VERSION)
sudo docker build -t inferx/spdk-container2:$(VERSION) ./target/spdk

pushspdk:
# sudo docker login -u inferx
sudo docker tag inferx/spdk-container:v0.1.0 inferx/spdk-container:v0.1.0
sudo docker push inferx/spdk-container:v0.1.0
sudo docker tag inferx/spdk-container2:v0.1.0 inferx/spdk-container2:v0.1.0
sudo docker push inferx/spdk-container2:v0.1.0
sudo docker tag inferx/spdk-container:$(VERSION) inferx/spdk-container:$(VERSION)
sudo docker push inferx/spdk-container:$(VERSION)
sudo docker tag inferx/spdk-container2:$(VERSION) inferx/spdk-container2:$(VERSION)
sudo docker push inferx/spdk-container2:$(VERSION)
sql:
sudo cp ./dashboard/sql/create_table.sql /opt/inferx/config
sudo cp ./dashboard/sql/secret.sql /opt/inferx/config

run:
-sudo pkill -9 inferx
@echo "LOCAL_IP=$$(hostname -I | awk '{print $$1}' | xargs)" > .env
@echo "Version=$(VERSION)" >> .env
@echo "HOSTNAME=$(NODE_NAME)" >> .env
sudo docker compose -f docker-compose.yml build
- sudo rm -f /opt/inferx/log/inferx.log
- sudo rm -f /opt/inferx/log/onenode.log
Expand All @@ -68,11 +69,14 @@ run:

runblob:
-sudo pkill -9 inferx
@echo "LOCAL_IP=$$(hostname -I | awk '{print $$1}' | xargs)" > .env
@echo "LOCAL_IP=$$(hostname -I | tr ' ' '\n' | grep -v '^172\.' | head -n 1 | xargs)" > .env
@echo "Version=$(VERSION)" >> .env
@echo "HOSTNAME=$(NODE_NAME)" >> .env
sudo docker compose -f docker-compose_blob.yml build
- sudo rm -f /opt/inferx/log/inferx.log
- sudo rm -f /opt/inferx/log/onenode.log
sudo docker compose -f docker-compose_blob.yml up -d --remove-orphans
cat .env
rm .env

stop:
Expand All @@ -82,7 +86,25 @@ stopblob:
sudo docker compose -f docker-compose_blob.yml down

rundash:
sudo docker run --net=host --name inferx_dashboard -v /etc/letsencrypt/:/etc/letsencrypt/ --rm inferx/inferx_dashboard:v0.1.0
sudo docker run --net=host --name inferx_dashboard --env "KEYCLOAK_URL=http://192.168.0.22:1260/authn" \
-v /etc/letsencrypt/:/etc/letsencrypt/ --rm inferx/inferx_dashboard:$(VERSION)

stopdash:
sudo docker stop inferx_dashboard
sudo docker stop inferx_dashboard

runkblob:
sudo kubectl apply -f k8s/spdk.yaml
sudo kubectl apply -f k8s/etcd.yaml
sudo kubectl apply -f k8s/secretdb.yaml
sudo kubectl apply -f k8s/db-deployment.yaml
sudo kubectl apply -f k8s/keycloak_postgres.yaml
sudo kubectl apply -f k8s/keycloak.yaml
sudo kubectl apply -f k8s/statesvc.yaml
sudo kubectl apply -f k8s/scheduler.yaml
sudo kubectl apply -f k8s/nodeagent.yaml
sudo kubectl apply -f k8s/dashboard.yaml
sudo kubectl apply -f k8s/ingress.yaml

stopnodeagent:
sudo kubectl delete DaemonSet nodeagent-blob
sudo kubectl delete DaemonSet nodeagent-file
8 changes: 6 additions & 2 deletions config/Aquila-7B.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"name": "Aquila-7B",
"object": {
"spec": {
"image": "vllm/vllm-openai:v0.6.2",
"image": "vllm/vllm-openai:v0.7.3",
"commands": [
"--model",
"BAAI/Aquila-7B",
Expand All @@ -17,7 +17,7 @@
],
"resources": {
"CPU": 20000,
"Mem": 50000,
"Mem": 60000,
"GPU": {
"Type": "Any",
"Count": 2,
Expand All @@ -28,6 +28,10 @@
[
"LD_LIBRARY_PATH",
"/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
],
[
"VLLM_CUDART_SO_PATH",
"/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
]
],
"mounts": [
Expand Down
6 changes: 5 additions & 1 deletion config/Baichuan-7B.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"name": "Baichuan-7B",
"object": {
"spec": {
"image": "vllm/vllm-openai:v0.6.2",
"image": "vllm/vllm-openai:v0.7.3",
"commands": [
"--model",
"baichuan-inc/Baichuan-7B",
Expand All @@ -28,6 +28,10 @@
[
"LD_LIBRARY_PATH",
"/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
],
[
"VLLM_CUDART_SO_PATH",
"/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
]
],
"mounts": [
Expand Down
10 changes: 7 additions & 3 deletions config/Baichuan2-13B-Chat-4bits.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"name": "Baichuan2-13B-Chat-4bits",
"object": {
"spec": {
"image": "vllm-openai-upgraded:v.0.1",
"image": "vllm/vllm-openai:v0.7.3",
"commands": [
"--model",
"baichuan-inc/Baichuan2-13B-Chat-4bits",
Expand All @@ -16,17 +16,21 @@
],
"resources": {
"CPU": 12000,
"Mem": 14000,
"Mem": 24000,
"GPU": {
"Type": "Any",
"Count": 1,
"vRam": 8000
"vRam": 13800
}
},
"envs": [
[
"LD_LIBRARY_PATH",
"/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
],
[
"VLLM_CUDART_SO_PATH",
"/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
]
],
"mounts": [
Expand Down
6 changes: 5 additions & 1 deletion config/Baichuan2-7B-Chat.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"name": "Baichuan2-7B-Chat",
"object": {
"spec": {
"image": "vllm/vllm-openai:v0.6.2",
"image": "vllm/vllm-openai:v0.7.3",
"commands": [
"--model",
"baichuan-inc/Baichuan2-7B-Chat",
Expand All @@ -28,6 +28,10 @@
[
"LD_LIBRARY_PATH",
"/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
],
[
"VLLM_CUDART_SO_PATH",
"/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
]
],
"mounts": [
Expand Down
2 changes: 1 addition & 1 deletion config/DeciLM-7B-instruct.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"name": "DeciLM-7B-instruct",
"object": {
"spec": {
"image": "vllm/vllm-openai:v0.6.2",
"image": "vllm/vllm-openai:v0.7.3",
"commands": [
"--model",
"Deci/DeciLM-7B-instruct",
Expand Down
2 changes: 1 addition & 1 deletion config/DeciLM-7B.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"name": "DeciLM-7B",
"object": {
"spec": {
"image": "vllm/vllm-openai:v0.6.2",
"image": "vllm/vllm-openai:v0.7.3",
"commands": [
"--model",
"Deci/DeciLM-7B",
Expand Down
6 changes: 5 additions & 1 deletion config/DeepSeek-R1-Distill-Llama-8B.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"name": "DeepSeek-R1-Distill-Llama-8B",
"object": {
"spec": {
"image": "vllm/vllm-openai:v0.6.2",
"image": "vllm/vllm-openai:v0.7.3",
"commands": [
"--model",
"/root/.cache/huggingface/git/DeepSeek-R1-Distill-Llama-8B",
Expand All @@ -30,6 +30,10 @@
[
"LD_LIBRARY_PATH",
"/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
],
[
"VLLM_CUDART_SO_PATH",
"/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
]
],
"mounts": [
Expand Down
8 changes: 6 additions & 2 deletions config/DeepSeek-R1-Distill-Qwen-1.5B.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"name": "DeepSeek-R1-Distill-Qwen-1.5B",
"object": {
"spec": {
"image": "vllm/vllm-openai:v0.6.2",
"image": "vllm/vllm-openai:v0.7.3",
"commands": [
"--model",
"/root/.cache/huggingface/git/DeepSeek-R1-Distill-Qwen-1.5B",
Expand All @@ -18,7 +18,7 @@
],
"resources": {
"CPU": 20000,
"Mem": 50000,
"Mem": 60000,
"GPU": {
"Type": "Any",
"Count": 1,
Expand All @@ -29,6 +29,10 @@
[
"LD_LIBRARY_PATH",
"/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
],
[
"VLLM_CUDART_SO_PATH",
"/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
]
],
"mounts": [
Expand Down
6 changes: 5 additions & 1 deletion config/DeepSeek-R1-Distill-Qwen-7B.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"name": "DeepSeek-R1-Distill-Qwen-7B",
"object": {
"spec": {
"image": "vllm/vllm-openai:v0.6.2",
"image": "vllm/vllm-openai:v0.7.3",
"commands": [
"--model",
"/root/.cache/huggingface/git/DeepSeek-R1-Distill-Qwen-7B",
Expand All @@ -30,6 +30,10 @@
[
"LD_LIBRARY_PATH",
"/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
],
[
"VLLM_CUDART_SO_PATH",
"/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
]
],
"mounts": [
Expand Down
2 changes: 1 addition & 1 deletion config/EXAONE-3.0-7.8B-Instruct copy.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"namespace": "ns1",
"name": "gemma-7b",
"spec": {
"image": "vllm/vllm-openai:v0.6.2",
"image": "vllm/vllm-openai:v0.7.3",
"commands": [
"--model",
"google/gemma-7b",
Expand Down
2 changes: 1 addition & 1 deletion config/EXAONE-3.0-7.8B-Instruct.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"namespace": "ns1",
"name": "EXAONE-3.0-7.8B-Instruct",
"spec": {
"image": "vllm/vllm-openai:v0.6.2",
"image": "vllm/vllm-openai:v0.7.3",
"commands": [
"--model",
"LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct",
Expand Down
8 changes: 6 additions & 2 deletions config/Llama-2-13b-hf.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"name": "Llama-2-13b-hf",
"object": {
"spec": {
"image": "vllm/vllm-openai:v0.6.2",
"image": "vllm/vllm-openai:v0.7.3",
"commands": [
"--model",
"meta-llama/Llama-2-13b-hf",
Expand All @@ -18,7 +18,7 @@
],
"resources": {
"CPU": 20000,
"Mem": 50000,
"Mem": 60000,
"GPU": {
"Type": "Any",
"Count": 2,
Expand All @@ -29,6 +29,10 @@
[
"LD_LIBRARY_PATH",
"/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
],
[
"VLLM_CUDART_SO_PATH",
"/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
]
],
"mounts": [
Expand Down
6 changes: 5 additions & 1 deletion config/Llama-3.2-3B-Instruct.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"name": "Llama-3.2-3B-Instruct",
"object": {
"spec": {
"image": "vllm/vllm-openai:v0.6.2",
"image": "vllm/vllm-openai:v0.7.3",
"commands": [
"--model",
"meta-llama/Llama-3.2-3B-Instruct",
Expand All @@ -27,6 +27,10 @@
[
"LD_LIBRARY_PATH",
"/usr/local/lib/python3.12/dist-packages/nvidia/cuda_nvrtc/lib/:$LD_LIBRARY_PATH"
],
[
"VLLM_CUDART_SO_PATH",
"/usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudart.so.12"
]
],
"mounts": [
Expand Down
2 changes: 1 addition & 1 deletion config/Llama-3.2-3B-Instruct_2gpu.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"name": "Llama-3.2-3B-Instruct_2gpu",
"object": {
"spec": {
"image": "vllm/vllm-openai:v0.6.2",
"image": "vllm/vllm-openai:v0.7.3",
"commands": [
"--model",
"meta-llama/Llama-3.2-3B-Instruct",
Expand Down
2 changes: 1 addition & 1 deletion config/Meta-Llama-3-8B-Instruct.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"name": "Meta-Llama-3-8B-Instruct",
"object": {
"spec": {
"image": "vllm/vllm-openai:v0.6.2",
"image": "vllm/vllm-openai:v0.7.3",
"commands": [
"--model",
"meta-llama/Meta-Llama-3-8B-Instruct",
Expand Down
Loading