Skip to content

Commit fcfd533

Browse files
authoredMar 6, 2025··
feat: tensor-fusion workload Implementation (#59)
1 parent fadaeae commit fcfd533

35 files changed

+2109
-654
lines changed
 

‎.vscode/launch.json

+4-4
Original file line numberDiff line numberDiff line change
@@ -50,12 +50,12 @@
5050
"program": "${workspaceFolder}/cmd/main.go",
5151
},
5252
{
53-
"name": "Debug Test Code",
53+
"name": "Run Unit Tests",
5454
"type": "go",
5555
"request": "launch",
56-
"mode": "auto",
57-
"console": "integratedTerminal",
58-
"program": "${workspaceFolder}/cmd/tmp/main.go",
56+
"mode": "test",
57+
"program": "${workspaceFolder}",
58+
"console": "integratedTerminal"
5959
}
6060
]
6161
}

‎PROJECT

+8
Original file line numberDiff line numberDiff line change
@@ -79,4 +79,12 @@ resources:
7979
kind: ClientProfile
8080
path: github.com/NexusGPU/tensor-fusion-operator/api/v1
8181
version: v1
82+
- api:
83+
crdVersion: v1
84+
namespaced: true
85+
controller: true
86+
domain: tensor-fusion.ai
87+
kind: TensorFusionWorkload
88+
path: github.com/NexusGPU/tensor-fusion-operator/api/v1
89+
version: v1
8290
version: "3"

‎api/v1/tensorfusionconnection_types.go

+4-19
Original file line numberDiff line numberDiff line change
@@ -33,29 +33,14 @@ type Resources struct {
3333

3434
// TensorFusionConnectionSpec defines the desired state of TensorFusionConnection.
3535
type TensorFusionConnectionSpec struct {
36-
PoolName string `json:"poolName"`
37-
Resources Resources `json:"resources"`
38-
39-
// +optional
40-
// localGpu mode will schedule the GPU in advance
41-
GPUs []string `json:"gpu"`
36+
WorkloadName string `json:"workloadName"`
4237
}
4338

44-
type TensorFusionConnectionPhase string
45-
46-
// These are the valid phases of a GpuConnection.
47-
const (
48-
TensorFusionConnectionPending TensorFusionConnectionPhase = "Pending"
49-
TensorFusionConnectionStarting TensorFusionConnectionPhase = "Starting"
50-
TensorFusionConnectionRunning TensorFusionConnectionPhase = "Running"
51-
)
52-
5339
// TensorFusionConnectionStatus defines the observed state of TensorFusionConnection.
5440
type TensorFusionConnectionStatus struct {
55-
Phase TensorFusionConnectionPhase `json:"phase"`
56-
ConnectionURL string `json:"connectionURL"`
57-
QoS QoSLevel `json:"qos,omitempty"`
58-
GPU string `json:"gpu,omitempty"`
41+
Phase WorkerPhase `json:"phase"`
42+
ConnectionURL string `json:"connectionURL"`
43+
WorkerName string `json:"workerName"`
5944
}
6045

6146
// +kubebuilder:object:root=true

‎api/v1/tensorfusionworkload_types.go

+88
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
/*
2+
Copyright 2024.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package v1
18+
19+
import (
20+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
21+
)
22+
23+
// TensorFusionWorkloadSpec defines the desired state of TensorFusionWorkload.
24+
type TensorFusionWorkloadSpec struct {
25+
Replicas *int32 `json:"replicas,omitempty"`
26+
PoolName string `json:"poolName"`
27+
// +optional
28+
Resources Resources `json:"resources"`
29+
// +optional
30+
Qos QoSLevel `json:"qos,omitempty"`
31+
// +optional
32+
IsLocalGPU bool `json:"isLocalGPU,omitempty"`
33+
}
34+
35+
type WorkerPhase string
36+
37+
const (
38+
WorkerPending WorkerPhase = "Pending"
39+
WorkerRunning WorkerPhase = "Running"
40+
WorkerFailed WorkerPhase = "Failed"
41+
)
42+
43+
type WorkerStatus struct {
44+
WorkerPhase WorkerPhase `json:"workerPhase"`
45+
46+
WorkerName string `json:"workerName"`
47+
NodeSelector map[string]string `json:"nodeSelector,omitempty"`
48+
// +optional
49+
WorkerIp string `json:"workerIp,omitempty"`
50+
// +optional
51+
WorkerPort int `json:"workerPort,omitempty"`
52+
}
53+
54+
// TensorFusionWorkloadStatus defines the observed state of TensorFusionWorkload.
55+
type TensorFusionWorkloadStatus struct {
56+
// replicas is the number of Pods created by the Workload controller.
57+
Replicas int32 `json:"replicas"`
58+
59+
// readyReplicas is the number of pods created for this Workload with a Ready Condition.
60+
ReadyReplicas int32 `json:"readyReplicas,omitempty"`
61+
62+
WorkerStatuses []WorkerStatus `json:"workerStatuses,omitempty"`
63+
}
64+
65+
// +kubebuilder:object:root=true
66+
// +kubebuilder:subresource:status
67+
68+
// TensorFusionWorkload is the Schema for the tensorfusionworkloads API.
69+
type TensorFusionWorkload struct {
70+
metav1.TypeMeta `json:",inline"`
71+
metav1.ObjectMeta `json:"metadata,omitempty"`
72+
73+
Spec TensorFusionWorkloadSpec `json:"spec,omitempty"`
74+
Status TensorFusionWorkloadStatus `json:"status,omitempty"`
75+
}
76+
77+
// +kubebuilder:object:root=true
78+
79+
// TensorFusionWorkloadList contains a list of TensorFusionWorkload.
80+
type TensorFusionWorkloadList struct {
81+
metav1.TypeMeta `json:",inline"`
82+
metav1.ListMeta `json:"metadata,omitempty"`
83+
Items []TensorFusionWorkload `json:"items"`
84+
}
85+
86+
func init() {
87+
SchemeBuilder.Register(&TensorFusionWorkload{}, &TensorFusionWorkloadList{})
88+
}

‎api/v1/zz_generated.deepcopy.go

+125-7
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎charts/tensor-fusion/Chart.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ type: application
1515
# This is the chart version. This version number should be incremented each time you make changes
1616
# to the chart and its templates, including the app version.
1717
# Versions are expected to follow Semantic Versioning (https://semver.org/)
18-
version: 1.2.5
18+
version: 1.2.6
1919

2020
# This is the version number of the application being deployed. This version number should be
2121
# incremented each time you make changes to the application. Versions are not expected to

0 commit comments

Comments
 (0)
Please sign in to comment.