Skip to content

Commit 45ae865

Browse files
authored
fix: readme and rbac issue (#78)
* fix: readme and rbac issue * fix: bump chart version
1 parent f7a3afb commit 45ae865

File tree

6 files changed

+52
-34
lines changed

6 files changed

+52
-34
lines changed

README.md

+13-2
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,22 @@ Tensor Fusion is a state-of-the-art **GPU virtualization and pooling solution**
3535
#### 🔄 Battle-tested GPU-over-IP Remote GPU Sharing
3636
#### ⚖️ GPU-first Scheduling and Auto-scaling
3737
#### 📊 Computing Oversubscription and GPU VRAM Expansion
38-
#### 🛫 GPU Live Migration
38+
#### 🛫 GPU Pooling, Monitoring, Live Migration, AI Model Preloading and more
3939

4040
## 🎬 Demo
4141

42-
WIP
42+
### Fractional vGPU & GPU-over-IP & Distributed Allocation
43+
44+
![Fractional vGPU & GPU-over-IP & Distributed Allocation](https://cdn.tensor-fusion.ai//demo/overall-demo.gif)
45+
46+
47+
### AI Infra Console
48+
49+
![AI Infra Console](https://cdn.tensor-fusion.ai//demo/ai-infra-console.gif)
50+
51+
### GPU Live-migration [End-to-end feature WIP]
52+
53+
![Live-migration PoC](https://cdn.tensor-fusion.ai//demo/gpu-migration-poc.gif)
4354

4455
## 🚀 Quick Start
4556

charts/tensor-fusion/Chart.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ type: application
1515
# This is the chart version. This version number should be incremented each time you make changes
1616
# to the chart and its templates, including the app version.
1717
# Versions are expected to follow Semantic Versioning (https://semver.org/)
18-
version: 1.2.9
18+
version: 1.2.10
1919

2020
# This is the version number of the application being deployed. This version number should be
2121
# incremented each time you make changes to the application. Versions are not expected to

charts/tensor-fusion/templates/rbac.yaml

+21-18
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,17 @@ kind: ClusterRole
33
metadata:
44
name: {{ include "tensor-fusion.fullname" . }}-role
55
rules:
6+
rules:
7+
- apiGroups:
8+
- ""
9+
resources:
10+
- configmaps
11+
- namespaces
12+
- secrets
13+
verbs:
14+
- get
15+
- list
16+
- watch
617
- apiGroups:
718
- ""
819
resources:
@@ -23,15 +34,6 @@ rules:
2334
- patch
2435
- update
2536
- watch
26-
- apiGroups:
27-
- ""
28-
resources:
29-
- pods/exec
30-
verbs:
31-
- create
32-
- get
33-
- patch
34-
- update
3537
- apiGroups:
3638
- ""
3739
resources:
@@ -51,19 +53,17 @@ rules:
5153
- apiGroups:
5254
- ""
5355
resources:
54-
- secrets
56+
- pods/exec
5557
verbs:
58+
- create
5659
- get
57-
- list
58-
- watch
60+
- patch
61+
- update
5962
- apiGroups:
6063
- apps
6164
resources:
62-
- configmaps
6365
- daemonsets
6466
- deployments
65-
- namespaces
66-
- secrets
6767
verbs:
6868
- create
6969
- delete
@@ -95,6 +95,7 @@ rules:
9595
- apiGroups:
9696
- tensor-fusion.ai
9797
resources:
98+
- clientprofiles
9899
- gpunodeclasses
99100
- gpunodes
100101
- gpupools
@@ -103,7 +104,6 @@ rules:
103104
- tensorfusionclusters
104105
- tensorfusionconnections
105106
- tensorfusionworkloads
106-
- clientprofiles
107107
verbs:
108108
- create
109109
- delete
@@ -115,18 +115,21 @@ rules:
115115
- apiGroups:
116116
- tensor-fusion.ai
117117
resources:
118+
- clientprofiles/finalizers
118119
- gpunodeclasses/finalizers
119120
- gpunodes/finalizers
120121
- gpupools/finalizers
121122
- gpus/finalizers
122123
- schedulingconfigtemplates/finalizers
124+
- tensorfusionclusters/finalizers
123125
- tensorfusionconnections/finalizers
124126
- tensorfusionworkloads/finalizers
125127
verbs:
126128
- update
127129
- apiGroups:
128130
- tensor-fusion.ai
129131
resources:
132+
- clientprofiles/status
130133
- gpunodeclasses/status
131134
- gpunodes/status
132135
- gpupools/status
@@ -135,11 +138,11 @@ rules:
135138
- tensorfusionclusters/status
136139
- tensorfusionconnections/status
137140
- tensorfusionworkloads/status
138-
- clientprofiles/status
139141
verbs:
140142
- get
141143
- patch
142144
- update
145+
143146
---
144147
apiVersion: rbac.authorization.k8s.io/v1
145148
kind: ClusterRoleBinding
@@ -152,4 +155,4 @@ roleRef:
152155
subjects:
153156
- kind: ServiceAccount
154157
name: {{ include "tensor-fusion.serviceAccountName" . }}
155-
namespace: {{ include "tensor-fusion.namespace" . }}
158+
namespace: {{ include "tensor-fusion.namespace" . }}

config/rbac/role.yaml

+13-11
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,16 @@ kind: ClusterRole
44
metadata:
55
name: manager-role
66
rules:
7+
- apiGroups:
8+
- ""
9+
resources:
10+
- configmaps
11+
- namespaces
12+
- secrets
13+
verbs:
14+
- get
15+
- list
16+
- watch
717
- apiGroups:
818
- ""
919
resources:
@@ -49,22 +59,11 @@ rules:
4959
- get
5060
- patch
5161
- update
52-
- apiGroups:
53-
- ""
54-
resources:
55-
- secrets
56-
verbs:
57-
- get
58-
- list
59-
- watch
6062
- apiGroups:
6163
- apps
6264
resources:
63-
- configmaps
6465
- daemonsets
6566
- deployments
66-
- namespaces
67-
- secrets
6867
verbs:
6968
- create
7069
- delete
@@ -104,6 +103,7 @@ rules:
104103
- schedulingconfigtemplates
105104
- tensorfusionclusters
106105
- tensorfusionconnections
106+
- tensorfusionworkloads
107107
verbs:
108108
- create
109109
- delete
@@ -123,6 +123,7 @@ rules:
123123
- schedulingconfigtemplates/finalizers
124124
- tensorfusionclusters/finalizers
125125
- tensorfusionconnections/finalizers
126+
- tensorfusionworkloads/finalizers
126127
verbs:
127128
- update
128129
- apiGroups:
@@ -136,6 +137,7 @@ rules:
136137
- schedulingconfigtemplates/status
137138
- tensorfusionclusters/status
138139
- tensorfusionconnections/status
140+
- tensorfusionworkloads/status
139141
verbs:
140142
- get
141143
- patch

internal/controller/tensorfusioncluster_controller.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,10 @@ type TensorFusionClusterReconciler struct {
5656
// +kubebuilder:rbac:groups=tensor-fusion.ai,resources=tensorfusionclusters/status,verbs=get;update;patch
5757
// +kubebuilder:rbac:groups=tensor-fusion.ai,resources=tensorfusionclusters/finalizers,verbs=update
5858
// +kubebuilder:rbac:groups=core,resources=events,verbs=create;patch
59-
// +kubebuilder:rbac:groups=apps,resources=deployments;namespaces;configmaps;secrets,verbs=get;list;watch;create;update;patch;delete
59+
// +kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch;create;update;patch;delete
6060
// +kubebuilder:rbac:groups=batch,resources=jobs,verbs=get;list;watch;create;update;patch;delete
6161
// +kubebuilder:rbac:groups=batch,resources=cronjobs,verbs=get;list;watch
62-
// +kubebuilder:rbac:groups="",resources=secrets,verbs=get;list;watch
62+
// +kubebuilder:rbac:groups="",resources=secrets;namespaces;configmaps,verbs=get;list;watch
6363
// +kubebuilder:rbac:groups=apps,resources=daemonsets,verbs=get;list;watch;create;update;patch;delete
6464

6565
// Reconcile a TensorFusionCluster object, create and monitor GPU Pool, managing cluster level component versions

internal/controller/tensorfusionworkload_controller.go

+2
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ type TensorFusionWorkloadReconciler struct {
5454
// +kubebuilder:rbac:groups=tensor-fusion.ai,resources=tensorfusionworkloads,verbs=get;list;watch;create;update;patch;delete
5555
// +kubebuilder:rbac:groups=tensor-fusion.ai,resources=tensorfusionworkloads/status,verbs=get;update;patch
5656
// +kubebuilder:rbac:groups=tensor-fusion.ai,resources=tensorfusionworkloads/finalizers,verbs=update
57+
58+
// TensorFusionWorkload Reconciler
5759
func (r *TensorFusionWorkloadReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
5860
log := log.FromContext(ctx)
5961
log.Info("Reconciling TensorFusionWorkload", "request", req)

0 commit comments

Comments
 (0)