Skip to content

Commit b43d3fe

Browse files
committed
[refactor][plugin] RayClusterSpecObject
so we can use it for [this proposed cluster config schema][1]. We change in a backwards-compatible way to support multiple worker groups and other fields in the schema. [1]: https://docs.google.com/document/d/18V5_7SGUzS0LGlaJB7xus04omyr3drKkK4vV6Kz14jY/edit?tab=t.0#heading=h.tuqyq1b6b2x7 Signed-off-by: David Xia <[email protected]>
1 parent e794dc3 commit b43d3fe

File tree

4 files changed

+233
-174
lines changed

4 files changed

+233
-174
lines changed

kubectl-plugin/pkg/cmd/create/create_cluster.go

+24-20
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"github.com/ray-project/kuberay/kubectl-plugin/pkg/util/generation"
1010
"github.com/spf13/cobra"
1111
"k8s.io/cli-runtime/pkg/genericclioptions"
12+
"k8s.io/utils/ptr"
1213

1314
"github.com/ray-project/kuberay/kubectl-plugin/pkg/util/client"
1415
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -164,29 +165,32 @@ func (options *CreateClusterOptions) Run(ctx context.Context, k8sClient client.C
164165
return fmt.Errorf("the Ray cluster %s in namespace %s already exists", options.clusterName, options.namespace)
165166
}
166167

167-
rayClusterObject := generation.RayClusterYamlObject{
168-
Namespace: options.namespace,
169-
ClusterName: options.clusterName,
170-
RayClusterSpecObject: generation.RayClusterSpecObject{
171-
RayVersion: options.rayVersion,
172-
Image: options.image,
173-
HeadCPU: options.headCPU,
174-
HeadMemory: options.headMemory,
175-
HeadEphemeralStorage: options.headEphemeralStorage,
176-
HeadGPU: options.headGPU,
177-
HeadRayStartParams: options.headRayStartParams,
178-
WorkerReplicas: options.workerReplicas,
179-
WorkerCPU: options.workerCPU,
180-
WorkerMemory: options.workerMemory,
181-
WorkerEphemeralStorage: options.workerEphemeralStorage,
182-
WorkerGPU: options.workerGPU,
183-
WorkerRayStartParams: options.workerRayStartParams,
184-
HeadNodeSelectors: options.headNodeSelectors,
185-
WorkerNodeSelectors: options.workerNodeSelectors,
168+
rayClusterSpecObject := generation.RayClusterSpecObject{
169+
Namespace: &options.namespace,
170+
Name: &options.clusterName,
171+
RayVersion: &options.rayVersion,
172+
Image: &options.image,
173+
HeadCPU: &options.headCPU,
174+
HeadMemory: &options.headMemory,
175+
HeadEphemeralStorage: &options.headEphemeralStorage,
176+
HeadGPU: &options.headGPU,
177+
HeadRayStartParams: options.headRayStartParams,
178+
HeadNodeSelectors: options.headNodeSelectors,
179+
WorkerGroups: []generation.WorkerGroupConfig{
180+
{
181+
Name: ptr.To("default-group"),
182+
WorkerReplicas: &options.workerReplicas,
183+
WorkerCPU: &options.workerCPU,
184+
WorkerMemory: &options.workerMemory,
185+
WorkerEphemeralStorage: &options.workerEphemeralStorage,
186+
WorkerGPU: &options.workerGPU,
187+
WorkerRayStartParams: options.workerRayStartParams,
188+
WorkerNodeSelectors: options.workerNodeSelectors,
189+
},
186190
},
187191
}
188192

189-
rayClusterac := rayClusterObject.GenerateRayClusterApplyConfig()
193+
rayClusterac := rayClusterSpecObject.GenerateRayClusterApplyConfig()
190194

191195
// If dry run is enabled, it will call the YAML converter and print out the YAML
192196
if options.dryRun {

kubectl-plugin/pkg/cmd/job/job_submit.go

+13-9
Original file line numberDiff line numberDiff line change
@@ -281,15 +281,19 @@ func (options *SubmitJobOptions) Run(ctx context.Context, factory cmdutil.Factor
281281
// See https://github.com/ray-project/kuberay/issues/3126.
282282
Entrypoint: options.entryPoint,
283283
RayClusterSpecObject: generation.RayClusterSpecObject{
284-
RayVersion: options.rayVersion,
285-
Image: options.image,
286-
HeadCPU: options.headCPU,
287-
HeadMemory: options.headMemory,
288-
HeadGPU: options.headGPU,
289-
WorkerCPU: options.workerCPU,
290-
WorkerMemory: options.workerMemory,
291-
WorkerGPU: options.workerGPU,
292-
WorkerReplicas: options.workerReplicas,
284+
RayVersion: &options.rayVersion,
285+
Image: &options.image,
286+
HeadCPU: &options.headCPU,
287+
HeadMemory: &options.headMemory,
288+
HeadGPU: &options.headGPU,
289+
WorkerGroups: []generation.WorkerGroupConfig{
290+
{
291+
WorkerCPU: &options.workerCPU,
292+
WorkerMemory: &options.workerMemory,
293+
WorkerGPU: &options.workerGPU,
294+
WorkerReplicas: &options.workerReplicas,
295+
},
296+
},
293297
},
294298
}
295299
rayJobApplyConfig := rayJobObject.GenerateRayJobApplyConfig()

kubectl-plugin/pkg/util/generation/generation.go

+90-52
Original file line numberDiff line numberDiff line change
@@ -17,29 +17,34 @@ import (
1717
)
1818

1919
type RayClusterSpecObject struct {
20-
HeadRayStartParams map[string]string
21-
WorkerRayStartParams map[string]string
22-
HeadNodeSelectors map[string]string
23-
WorkerNodeSelectors map[string]string
24-
RayVersion string
25-
Image string
26-
HeadCPU string
27-
HeadGPU string
28-
HeadMemory string
29-
HeadEphemeralStorage string
30-
WorkerCPU string
31-
WorkerGPU string
32-
WorkerMemory string
33-
WorkerEphemeralStorage string
34-
WorkerReplicas int32
20+
Context *string `yaml:"context,omitempty"`
21+
Namespace *string `yaml:"namespace,omitempty"`
22+
Name *string `yaml:"name,omitempty"`
23+
Labels map[string]string `yaml:"labels,omitempty"`
24+
Annotations map[string]string `yaml:"annotations,omitempty"`
25+
26+
RayVersion *string `yaml:"ray-version,omitempty"`
27+
Image *string `yaml:"image,omitempty"`
28+
29+
HeadCPU *string `yaml:"head-cpu,omitempty"`
30+
HeadGPU *string `yaml:"head-gpu,omitempty"`
31+
HeadMemory *string `yaml:"head-memory,omitempty"`
32+
HeadEphemeralStorage *string `yaml:"head-ephemeral-storage,omitempty"`
33+
HeadRayStartParams map[string]string `yaml:"head-ray-start-params,omitempty"`
34+
HeadNodeSelectors map[string]string `yaml:"head-node-selectors,omitempty"`
35+
36+
WorkerGroups []WorkerGroupConfig `yaml:"worker-groups,omitempty"`
3537
}
3638

37-
type RayClusterYamlObject struct {
38-
ClusterName string
39-
Namespace string
40-
Labels map[string]string
41-
Annotations map[string]string
42-
RayClusterSpecObject
39+
type WorkerGroupConfig struct {
40+
Name *string `yaml:"name,omitempty"`
41+
WorkerCPU *string `yaml:"worker-cpu,omitempty"`
42+
WorkerGPU *string `yaml:"worker-gpu,omitempty"`
43+
WorkerMemory *string `yaml:"worker-memory,omitempty"`
44+
WorkerEphemeralStorage *string `yaml:"worker-ephemeral-storage,omitempty"`
45+
WorkerReplicas *int32 `yaml:"worker-replicas,omitempty"`
46+
WorkerRayStartParams map[string]string `yaml:"worker-ray-start-params,omitempty"`
47+
WorkerNodeSelectors map[string]string `yaml:"worker-node-selectors,omitempty"`
4348
}
4449

4550
type RayJobYamlObject struct {
@@ -50,11 +55,11 @@ type RayJobYamlObject struct {
5055
RayClusterSpecObject
5156
}
5257

53-
func (rayClusterObject *RayClusterYamlObject) GenerateRayClusterApplyConfig() *rayv1ac.RayClusterApplyConfiguration {
54-
rayClusterApplyConfig := rayv1ac.RayCluster(rayClusterObject.ClusterName, rayClusterObject.Namespace).
55-
WithLabels(rayClusterObject.Labels).
56-
WithAnnotations(rayClusterObject.Annotations).
57-
WithSpec(rayClusterObject.generateRayClusterSpec())
58+
func (rayClusterSpecObject *RayClusterSpecObject) GenerateRayClusterApplyConfig() *rayv1ac.RayClusterApplyConfiguration {
59+
rayClusterApplyConfig := rayv1ac.RayCluster(*rayClusterSpecObject.Name, *rayClusterSpecObject.Namespace).
60+
WithLabels(rayClusterSpecObject.Labels).
61+
WithAnnotations(rayClusterSpecObject.Annotations).
62+
WithSpec(rayClusterSpecObject.generateRayClusterSpec())
5863

5964
return rayClusterApplyConfig
6065
}
@@ -70,35 +75,63 @@ func (rayJobObject *RayJobYamlObject) GenerateRayJobApplyConfig() *rayv1ac.RayJo
7075
}
7176

7277
// generateRequestResources returns a corev1.ResourceList with the given CPU, memory, ephemeral storage, and GPU values for only resource requests
73-
func generateRequestResources(cpu, memory, ephemeralStorage, gpu string) corev1.ResourceList {
74-
resources := corev1.ResourceList{
75-
corev1.ResourceCPU: resource.MustParse(cpu),
76-
corev1.ResourceMemory: resource.MustParse(memory),
78+
func generateRequestResources(cpu, memory, ephemeralStorage, gpu *string) corev1.ResourceList {
79+
resources := corev1.ResourceList{}
80+
81+
if cpu != nil && *cpu != "" {
82+
cpuResource := resource.MustParse(*cpu)
83+
if !cpuResource.IsZero() {
84+
resources[corev1.ResourceCPU] = cpuResource
85+
}
86+
}
87+
88+
if memory != nil && *memory != "" {
89+
memoryResource := resource.MustParse(*memory)
90+
if !memoryResource.IsZero() {
91+
resources[corev1.ResourceMemory] = memoryResource
92+
}
7793
}
78-
if ephemeralStorage != "" {
79-
resources[corev1.ResourceEphemeralStorage] = resource.MustParse(ephemeralStorage)
94+
95+
if ephemeralStorage != nil && *ephemeralStorage != "" {
96+
ephemeralStorageResource := resource.MustParse(*ephemeralStorage)
97+
if !ephemeralStorageResource.IsZero() {
98+
resources[corev1.ResourceEphemeralStorage] = ephemeralStorageResource
99+
}
80100
}
81101

82-
gpuResource := resource.MustParse(gpu)
83-
if !gpuResource.IsZero() {
84-
resources[corev1.ResourceName(util.ResourceNvidiaGPU)] = gpuResource
102+
if gpu != nil && *gpu != "" {
103+
gpuResource := resource.MustParse(*gpu)
104+
if !gpuResource.IsZero() {
105+
resources[corev1.ResourceName(util.ResourceNvidiaGPU)] = gpuResource
106+
}
85107
}
86108

87109
return resources
88110
}
89111

90112
// generateLimitResources returns a corev1.ResourceList with the given memory, ephemeral storage, and GPU values for only resource limits
91-
func generateLimitResources(memory, ephemeralStorage, gpu string) corev1.ResourceList {
92-
resources := corev1.ResourceList{
93-
corev1.ResourceMemory: resource.MustParse(memory),
113+
func generateLimitResources(memory, ephemeralStorage, gpu *string) corev1.ResourceList {
114+
resources := corev1.ResourceList{}
115+
116+
if memory != nil && *memory != "" {
117+
memoryResource := resource.MustParse(*memory)
118+
if !memoryResource.IsZero() {
119+
resources[corev1.ResourceMemory] = memoryResource
120+
}
94121
}
95-
if ephemeralStorage != "" {
96-
resources[corev1.ResourceEphemeralStorage] = resource.MustParse(ephemeralStorage)
122+
123+
if ephemeralStorage != nil && *ephemeralStorage != "" {
124+
ephemeralStorageResource := resource.MustParse(*ephemeralStorage)
125+
if !ephemeralStorageResource.IsZero() {
126+
resources[corev1.ResourceEphemeralStorage] = ephemeralStorageResource
127+
}
97128
}
98129

99-
gpuResource := resource.MustParse(gpu)
100-
if !gpuResource.IsZero() {
101-
resources[corev1.ResourceName(util.ResourceNvidiaGPU)] = gpuResource
130+
if gpu != nil && *gpu != "" {
131+
gpuResource := resource.MustParse(*gpu)
132+
if !gpuResource.IsZero() {
133+
resources[corev1.ResourceName(util.ResourceNvidiaGPU)] = gpuResource
134+
}
102135
}
103136

104137
return resources
@@ -115,23 +148,28 @@ func (rayClusterSpecObject *RayClusterSpecObject) generateRayClusterSpec() *rayv
115148
"metrics-export-port": "8080",
116149
}
117150
maps.Copy(headRayStartParams, rayClusterSpecObject.HeadRayStartParams)
118-
maps.Copy(workerRayStartParams, rayClusterSpecObject.WorkerRayStartParams)
151+
maps.Copy(workerRayStartParams, rayClusterSpecObject.WorkerGroups[0].WorkerRayStartParams)
119152

120153
headRequestResources := generateRequestResources(rayClusterSpecObject.HeadCPU, rayClusterSpecObject.HeadMemory, rayClusterSpecObject.HeadEphemeralStorage, rayClusterSpecObject.HeadGPU)
121154
headLimitResources := generateLimitResources(rayClusterSpecObject.HeadMemory, rayClusterSpecObject.HeadEphemeralStorage, rayClusterSpecObject.HeadGPU)
122-
workerRequestResources := generateRequestResources(rayClusterSpecObject.WorkerCPU, rayClusterSpecObject.WorkerMemory, rayClusterSpecObject.WorkerEphemeralStorage, rayClusterSpecObject.WorkerGPU)
123-
workerLimitResources := generateLimitResources(rayClusterSpecObject.WorkerMemory, rayClusterSpecObject.WorkerEphemeralStorage, rayClusterSpecObject.WorkerGPU)
155+
workerRequestResources := generateRequestResources(rayClusterSpecObject.WorkerGroups[0].WorkerCPU, rayClusterSpecObject.WorkerGroups[0].WorkerMemory, rayClusterSpecObject.WorkerGroups[0].WorkerEphemeralStorage, rayClusterSpecObject.WorkerGroups[0].WorkerGPU)
156+
workerLimitResources := generateLimitResources(rayClusterSpecObject.WorkerGroups[0].WorkerMemory, rayClusterSpecObject.WorkerGroups[0].WorkerEphemeralStorage, rayClusterSpecObject.WorkerGroups[0].WorkerGPU)
157+
158+
workerGroupName := "default-group"
159+
if rayClusterSpecObject.WorkerGroups[0].Name != nil {
160+
workerGroupName = *rayClusterSpecObject.WorkerGroups[0].Name
161+
}
124162

125163
rayClusterSpec := rayv1ac.RayClusterSpec().
126-
WithRayVersion(rayClusterSpecObject.RayVersion).
164+
WithRayVersion(*rayClusterSpecObject.RayVersion).
127165
WithHeadGroupSpec(rayv1ac.HeadGroupSpec().
128166
WithRayStartParams(headRayStartParams).
129167
WithTemplate(corev1ac.PodTemplateSpec().
130168
WithSpec(corev1ac.PodSpec().
131169
WithNodeSelector(rayClusterSpecObject.HeadNodeSelectors).
132170
WithContainers(corev1ac.Container().
133171
WithName("ray-head").
134-
WithImage(rayClusterSpecObject.Image).
172+
WithImage(*rayClusterSpecObject.Image).
135173
WithResources(corev1ac.ResourceRequirements().
136174
WithRequests(headRequestResources).
137175
WithLimits(headLimitResources)).
@@ -140,14 +178,14 @@ func (rayClusterSpecObject *RayClusterSpecObject) generateRayClusterSpec() *rayv
140178
corev1ac.ContainerPort().WithContainerPort(10001).WithName("client")))))).
141179
WithWorkerGroupSpecs(rayv1ac.WorkerGroupSpec().
142180
WithRayStartParams(workerRayStartParams).
143-
WithGroupName("default-group").
144-
WithReplicas(rayClusterSpecObject.WorkerReplicas).
181+
WithGroupName(workerGroupName).
182+
WithReplicas(*rayClusterSpecObject.WorkerGroups[0].WorkerReplicas).
145183
WithTemplate(corev1ac.PodTemplateSpec().
146184
WithSpec(corev1ac.PodSpec().
147-
WithNodeSelector(rayClusterSpecObject.WorkerNodeSelectors).
185+
WithNodeSelector(rayClusterSpecObject.WorkerGroups[0].WorkerNodeSelectors).
148186
WithContainers(corev1ac.Container().
149187
WithName("ray-worker").
150-
WithImage(rayClusterSpecObject.Image).
188+
WithImage(*rayClusterSpecObject.Image).
151189
WithResources(corev1ac.ResourceRequirements().
152190
WithRequests(workerRequestResources).
153191
WithLimits(workerLimitResources))))))

0 commit comments

Comments
 (0)