Skip to content

Dev2 #13

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 29 commits into
base: main
Choose a base branch
from
Open

Dev2 #13

Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
f559beb
enable funcspec update
inferx-net Apr 29, 2025
dbb3af6
add delete model function in dashboard
inferx-net Apr 29, 2025
cf06ec6
add admin func
inferx-net Apr 30, 2025
fc8fea2
minor change:
inferx-net May 1, 2025
a300b8c
clean admin page
inferx-net May 1, 2025
f190684
add links in admin page
inferx-net May 1, 2025
aa053ad
update release container version
inferx-net May 1, 2025
ee7d1f8
enable k8s deployment#1
inferx-net May 3, 2025
1a4f63b
db port update
inferx-net May 3, 2025
f6d24f3
add node config
inferx-net May 3, 2025
e6bd91d
use cluster ip for etcd
inferx-net May 3, 2025
ca076a1
use container network for inferx_one and inferx_dashboard
inferx-net May 4, 2025
10d261f
expose keycloak and inferx_one with ingress
inferx-net May 4, 2025
28cd035
enable blobstore for k3s
inferx-net May 4, 2025
8a7f5ad
add spdk yaml
inferx-net May 4, 2025
b7f6369
add non-blob config
inferx-net May 5, 2025
e7868b4
non-blob inferx-one yaml
inferx-net May 5, 2025
e46771d
enable nodeagent/scheduler/statesvc
inferx-net May 6, 2025
71869c5
add nodeagent.yaml
inferx-net May 7, 2025
83d0ba9
add podip in docker compose
inferx-net May 8, 2025
2f546b8
reenable keycloak localhost access
inferx-net May 8, 2025
0acf1f3
minor fix
inferx-net May 8, 2025
01eb57d
minor fix
inferx-net May 8, 2025
19b2c43
fix func.html openai restapi token handle bug
inferx-net May 10, 2025
f925d12
code clean
inferx-net May 10, 2025
d536c2e
update reosurce allocation
inferx-net May 10, 2025
63e9d52
fix nodename bug
inferx-net May 13, 2025
5ac5d35
update yaml
inferx-net May 14, 2025
51d9911
enable second nodeagent
inferx-net May 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 41 additions & 20 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
ARCH := ${shell uname -m}

LOCAL_IP=${hostname -I | awk '{print $$1}' | xargs}
VERSION := v0.1.1
NODE_NAME=${shell hostname}

all: ctl dash spdk runmodel

Expand All @@ -14,52 +14,53 @@ dash:
-rm ./target/dashboard/* -rf
cp ./dashboard/* ./target/dashboard -rL
cp ./deployment/dashboard.Dockerfile ./target/dashboard/Dockerfile
-sudo docker image rm inferx/inferx_dashboard:v0.1.0
sudo docker build -t inferx/inferx_dashboard:v0.1.0 ./target/dashboard
# sudo docker push inferx/inferx_dashboard:v0.1.0
-sudo docker image rm inferx/inferx_dashboard:$(VERSION)
sudo docker build -t inferx/inferx_dashboard:$(VERSION) ./target/dashboard

pushdash:
# sudo docker login -u inferx
sudo docker tag inferx/inferx_dashboard:v0.1.0 inferx/inferx_dashboard:v0.1.0
sudo docker push inferx/inferx_dashboard:v0.1.0
sudo docker tag inferx/inferx_dashboard:$(VERSION) inferx/inferx_dashboard:$(VERSION)
sudo docker push inferx/inferx_dashboard:$(VERSION)

runmodel:
mkdir -p ./target/runmodel
cp ./script/run_model.py ./target/runmodel
cp ./script/run_llava.py ./target/runmodel
cp ./script/run_stablediffusion.py ./target/runmodel
cp ./deployment/vllm-opai.Dockerfile ./target/runmodel/Dockerfile
-sudo docker image rm vllm-openai-upgraded:v0.1.0
sudo docker build -t vllm-openai-upgraded:v0.1.0 ./target/runmodel
-sudo docker image rm vllm-openai-upgraded:$(VERSION)
sudo docker build -t vllm-openai-upgraded:$(VERSION) ./target/runmodel

spdk:
mkdir -p ./target/spdk
-rm ./target/spdk/* -rf
cp ./deployment/spdk.Dockerfile ./target/spdk/Dockerfile
-sudo docker image rm inferx/spdk-container:v0.1.0
sudo docker build -t inferx/spdk-container:v0.1.0 ./target/spdk
-sudo docker image rm inferx/spdk-container:$(VERSION)
sudo docker build -t inferx/spdk-container:$(VERSION) ./target/spdk

spdk2:
mkdir -p ./target/spdk
-rm ./target/spdk/* -rf
cp ./deployment/spdk2.Dockerfile ./target/spdk/Dockerfile
cp ./deployment/spdk.script ./target/spdk/entrypoint.sh
-sudo docker image rm inferx/spdk-container2:v0.1.0
sudo docker build -t inferx/spdk-container2:v0.1.0 ./target/spdk
-sudo docker image rm inferx/spdk-container2:$(VERSION)
sudo docker build -t inferx/spdk-container2:$(VERSION) ./target/spdk

pushspdk:
# sudo docker login -u inferx
sudo docker tag inferx/spdk-container:v0.1.0 inferx/spdk-container:v0.1.0
sudo docker push inferx/spdk-container:v0.1.0
sudo docker tag inferx/spdk-container2:v0.1.0 inferx/spdk-container2:v0.1.0
sudo docker push inferx/spdk-container2:v0.1.0
sudo docker tag inferx/spdk-container:$(VERSION) inferx/spdk-container:$(VERSION)
sudo docker push inferx/spdk-container:$(VERSION)
sudo docker tag inferx/spdk-container2:$(VERSION) inferx/spdk-container2:$(VERSION)
sudo docker push inferx/spdk-container2:$(VERSION)
sql:
sudo cp ./dashboard/sql/create_table.sql /opt/inferx/config
sudo cp ./dashboard/sql/secret.sql /opt/inferx/config

run:
-sudo pkill -9 inferx
@echo "LOCAL_IP=$$(hostname -I | awk '{print $$1}' | xargs)" > .env
@echo "Version=$(VERSION)" >> .env
@echo "HOSTNAME=$(NODE_NAME)" >> .env
sudo docker compose -f docker-compose.yml build
- sudo rm -f /opt/inferx/log/inferx.log
- sudo rm -f /opt/inferx/log/onenode.log
Expand All @@ -68,11 +69,14 @@ run:

runblob:
-sudo pkill -9 inferx
@echo "LOCAL_IP=$$(hostname -I | awk '{print $$1}' | xargs)" > .env
@echo "LOCAL_IP=$$(hostname -I | tr ' ' '\n' | grep -v '^172\.' | head -n 1 | xargs)" > .env
@echo "Version=$(VERSION)" >> .env
@echo "HOSTNAME=$(NODE_NAME)" >> .env
sudo docker compose -f docker-compose_blob.yml build
- sudo rm -f /opt/inferx/log/inferx.log
- sudo rm -f /opt/inferx/log/onenode.log
sudo docker compose -f docker-compose_blob.yml up -d --remove-orphans
cat .env
rm .env

stop:
Expand All @@ -82,7 +86,24 @@ stopblob:
sudo docker compose -f docker-compose_blob.yml down

rundash:
sudo docker run --net=host --name inferx_dashboard -v /etc/letsencrypt/:/etc/letsencrypt/ --rm inferx/inferx_dashboard:v0.1.0
sudo docker run --net=host --name inferx_dashboard -v /etc/letsencrypt/:/etc/letsencrypt/ --rm inferx/inferx_dashboard:$(VERSION)

stopdash:
sudo docker stop inferx_dashboard
sudo docker stop inferx_dashboard

runkblob:
sudo kubectl apply -f k8s/spdk.yaml
sudo kubectl apply -f k8s/etcd.yaml
sudo kubectl apply -f k8s/secretdb.yaml
sudo kubectl apply -f k8s/db-deployment.yaml
sudo kubectl apply -f k8s/keycloak_postgres.yaml
sudo kubectl apply -f k8s/keycloak.yaml
sudo kubectl apply -f k8s/statesvc.yaml
sudo kubectl apply -f k8s/scheduler.yaml
sudo kubectl apply -f k8s/nodeagent.yaml
sudo kubectl apply -f k8s/dashboard.yaml
sudo kubectl apply -f k8s/ingress.yaml

stopnodeagent:
sudo kubectl delete DaemonSet nodeagent-blob
sudo kubectl delete DaemonSet nodeagent-file
117 changes: 104 additions & 13 deletions dashboard/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,22 +39,34 @@

import logging
import sys
import multiprocessing

from werkzeug.middleware.proxy_fix import ProxyFix



logger = logging.getLogger('gunicorn.error')
sys.stdout = sys.stderr = logger.handlers[0].stream
# logger = logging.getLogger('gunicorn.error')
# sys.stdout = sys.stderr = logger.handlers[0].stream

app = Flask(__name__)
app.secret_key = os.environ.get("FLASK_SECRET", "supersecret")

def configure_logging():
if "gunicorn" in multiprocessing.current_process().name.lower():
logger = logging.getLogger('gunicorn.error')
if logger.handlers:
sys.stdout = sys.stderr = logger.handlers[0].stream
app.logger.info("Redirecting stdout/stderr to Gunicorn logger.")
else:
app.logger.info("Running standalone Flask — no stdout/stderr redirection.")

configure_logging()

KEYCLOAK_URL = os.getenv('KEYCLOAK_URL', "http://192.168.0.22:81/authn")

KEYCLOAK_URL = os.getenv('KEYCLOAK_URL', "http://192.168.0.22:31260/authn")
KEYCLOAK_REALM_NAME = os.getenv('KEYCLOAK_REALM_NAME', "inferx")
KEYCLOAK_CLIENT_ID = os.getenv('KEYCLOAK_CLIENT_ID', "infer_client")
KEYCLOAK_CLIENT_SECRET = os.getenv('KEYCLOAK_CLIENT_SECRET', "SJvfmGFViBNHsLfhkto4eRE0PnPhpyft")
KEYCLOAK_CLIENT_SECRET = os.getenv('KEYCLOAK_CLIENT_SECRET', "M2Dse5531tdtyipZdGizLEeoOVgziQRX")

server_metadata_url = f"{KEYCLOAK_URL}/realms/{KEYCLOAK_REALM_NAME}/.well-known/openid-configuration"

Expand All @@ -81,7 +93,7 @@

tls = False

apihostaddr = "http://localhost:4000"
apihostaddr = os.getenv('INFERX_APIGW_ADDR', "http://localhost:4000")
# apihostaddr = "https://quarksoft.io:4000"

def is_token_expired():
Expand Down Expand Up @@ -197,7 +209,7 @@ def logout():
f"id_token_hint={id_token}"
)

def getapkkeys():
def getapikeys():
access_token = session.get('token')['access_token']
# Include the access token in the Authorization header
headers = {'Authorization': f'Bearer {access_token}'}
Expand All @@ -208,20 +220,20 @@ def getapkkeys():

return apikeys

@app.route('/apikeys')
@app.route('/admin')
@require_login
def apikeys():
apikeys = getapkkeys()
return render_template(
"apikey.html", apikeys=apikeys
"admin.html"
)

@app.route('/generate_apikeys', methods=['GET'])
@require_login
def generate_apikeys():
apikeys = getapkkeys()
apikeys = getapikeys()
return apikeys


@app.route('/apikeys', methods=['PUT'])
@require_login
def create_apikey():
Expand Down Expand Up @@ -319,6 +331,29 @@ def getnode(name: str):

return func

def listtenants():
access_token = session.get('access_token', '')
if access_token == "":
headers = {}
else:
headers = {'Authorization': f'Bearer {access_token}'}
url = "{}/objects/tenant/system/system/".format(apihostaddr)
resp = requests.get(url, headers=headers)
tenants = json.loads(resp.content)

return tenants

def listnamespaces():
access_token = session.get('access_token', '')
if access_token == "":
headers = {}
else:
headers = {'Authorization': f'Bearer {access_token}'}
url = "{}/objects/namespace///".format(apihostaddr)
resp = requests.get(url, headers=headers)
namespaces = json.loads(resp.content)

return namespaces

def listpods(tenant: str, namespace: str, funcname: str):
access_token = session.get('access_token', '')
Expand Down Expand Up @@ -460,6 +495,25 @@ def text2img():
headers = [(name, value) for (name, value) in resp.raw.headers.items() if name.lower() not in excluded_headers]
return Response(resp.iter_content(1024000), resp.status_code, headers)

@app.route('/generate_tenants', methods=['GET'])
@require_login
def generate_tenants():
tenants = listtenants()
print("tenants ", tenants)
return tenants

@app.route('/generate_namespaces', methods=['GET'])
@require_login
def generate_namespaces():
namespaces = listnamespaces()
print("namespaces ", namespaces)
return namespaces

@app.route('/generate_funcs', methods=['GET'])
@require_login
def generate_funcs():
funcs = listfuncs("", "")
return funcs

@app.route('/generate', methods=['POST'])
@not_require_login
Expand Down Expand Up @@ -574,19 +628,56 @@ def proxy(path):
data=request.get_data(),
cookies=request.cookies,
allow_redirects=False,
timeout=60,
stream=True
)
except requests.exceptions.RequestException as e:
return Response(f"Error connecting to backend server: {e}", status=502)

# Exclude hop-by-hop headers as per RFC 2616 section 13.5.1
excluded_headers = ['content-encoding', 'content-length', 'transfer-encoding', 'connection']
excluded_headers = ['content-encoding', 'transfer-encoding', 'connection']
headers = [(name, value) for name, value in resp.raw.headers.items() if name.lower() not in excluded_headers]

# Create a Flask response object with the backend server's response
response = Response(stream_response(resp), resp.status_code, headers)
return response

@app.route('/proxy1/<path:path>', methods=['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'OPTIONS'])
@require_login
def proxy1(path):
access_token = session.get('access_token', '')
headers = {key: value for key, value in request.headers if key.lower() != 'host'}
if access_token != "":
headers["Authorization"] = f'Bearer {access_token}'

# Construct the full URL for the backend request
url = f"{apihostaddr}/{path}"

try:
resp = requests.request(
method=request.method,
url=url,
headers=headers,
params=request.args,
data=request.get_data(),
cookies=request.cookies,
allow_redirects=False,
timeout=60,
stream=False
)
except requests.exceptions.RequestException as e:
print("error ....")
return Response(f"Error connecting to backend server: {e}", status=502, mimetype='text/plain')

response = Response(resp.content, resp.status_code, mimetype='text/plain')
# for name, value in resp.headers.items():
# if name.lower() not in ['content-encoding', 'transfer-encoding', 'connection']:
# response.headers[name] = value

return response



@app.route("/intro")
def md():
name = request.args.get("name")
Expand Down Expand Up @@ -679,14 +770,13 @@ def GetFunc():
sample = func["func"]["object"]["spec"]["sample_query"]
map = sample["body"]
apiType = sample["apiType"]
isAdmin = func["isAdmin"]

version = func["func"]["object"]["spec"]["version"]
fails = GetFailLogs(tenant, namespace, name, version)

# Convert Python dictionary to pretty JSON string
funcspec = json.dumps(func["func"]["object"]["spec"], indent=4)
funcspec = funcspec.replace("\n", "<br>")
funcspec = funcspec.replace(" ", "&emsp;")

return render_template(
"func.html",
Expand All @@ -698,6 +788,7 @@ def GetFunc():
funcspec=funcspec,
apiType=apiType,
map=map,
isAdmin=isAdmin,
path=sample["path"]
)

Expand Down
Loading