@@ -14,9 +14,9 @@ def test_ray_job_spec_creation():
14
14
entrypoint_memory = 1024 ,
15
15
entrypoint_resources = {"custom.com/resource" : 1.0 },
16
16
cluster_name = "test-cluster" ,
17
- cluster_namespace = "test-ns"
17
+ cluster_namespace = "test-ns" ,
18
18
)
19
-
19
+
20
20
assert spec .entrypoint == "python script.py"
21
21
assert spec .submission_id == "test-123"
22
22
assert spec .runtime_env == {"pip" : ["numpy" , "pandas" ]}
@@ -35,47 +35,56 @@ def test_ray_job_yaml_generation_full():
35
35
spec = RayJobSpec (
36
36
entrypoint = "python -c 'import ray; print(ray.cluster_resources())'" ,
37
37
submission_id = "test-submission-456" ,
38
- runtime_env = {"pip" : ["numpy==1.24.0" , "pandas" ], "env_vars" : {"RAY_LOG_LEVEL" : "DEBUG" }},
38
+ runtime_env = {
39
+ "pip" : ["numpy==1.24.0" , "pandas" ],
40
+ "env_vars" : {"RAY_LOG_LEVEL" : "DEBUG" },
41
+ },
39
42
metadata = {"job_timeout_s" : "1800" , "author" : "test-user" },
40
43
entrypoint_num_cpus = 2.5 ,
41
44
entrypoint_num_gpus = 1.0 ,
42
45
entrypoint_memory = 2048 ,
43
46
entrypoint_resources = {"custom.com/special" : 1.0 },
44
47
cluster_name = "ml-cluster" ,
45
- cluster_namespace = "ml-namespace"
48
+ cluster_namespace = "ml-namespace" ,
46
49
)
47
-
50
+
48
51
job = RayJob (
49
52
metadata = {
50
53
"name" : "comprehensive-test-job" ,
51
54
"namespace" : "test-namespace" ,
52
- "labels" : {"app" : "ml-training" }
55
+ "labels" : {"app" : "ml-training" },
53
56
},
54
- spec = spec
57
+ spec = spec ,
55
58
)
56
-
59
+
57
60
yaml_dict = job .to_dict ()
58
-
61
+
59
62
# Verify top-level structure
60
63
assert yaml_dict ["apiVersion" ] == "ray.io/v1"
61
64
assert yaml_dict ["kind" ] == "RayJob"
62
65
assert yaml_dict ["metadata" ]["name" ] == "comprehensive-test-job"
63
66
assert yaml_dict ["metadata" ]["namespace" ] == "test-namespace"
64
67
assert yaml_dict ["metadata" ]["labels" ] == {"app" : "ml-training" }
65
-
68
+
66
69
# Verify spec section
67
70
spec_dict = yaml_dict ["spec" ]
68
- assert spec_dict ["entrypoint" ] == "python -c 'import ray; print(ray.cluster_resources())'"
71
+ assert (
72
+ spec_dict ["entrypoint" ]
73
+ == "python -c 'import ray; print(ray.cluster_resources())'"
74
+ )
69
75
assert spec_dict ["submission_id" ] == "test-submission-456"
70
- assert spec_dict ["runtime_env" ] == {"pip" : ["numpy==1.24.0" , "pandas" ], "env_vars" : {"RAY_LOG_LEVEL" : "DEBUG" }}
76
+ assert spec_dict ["runtime_env" ] == {
77
+ "pip" : ["numpy==1.24.0" , "pandas" ],
78
+ "env_vars" : {"RAY_LOG_LEVEL" : "DEBUG" },
79
+ }
71
80
assert spec_dict ["metadata" ] == {"job_timeout_s" : "1800" , "author" : "test-user" }
72
81
assert spec_dict ["entrypoint_num_cpus" ] == 2.5
73
82
assert spec_dict ["entrypoint_num_gpus" ] == 1.0
74
83
assert spec_dict ["entrypoint_memory" ] == 2048
75
84
assert spec_dict ["entrypoint_resources" ] == {"custom.com/special" : 1.0 }
76
85
assert spec_dict ["cluster_name" ] == "ml-cluster"
77
86
assert spec_dict ["cluster_namespace" ] == "ml-namespace"
78
-
87
+
79
88
# Verify status section (should use spec status when job.status is None)
80
89
status_dict = yaml_dict ["status" ]
81
90
assert status_dict ["status" ] == RayJobStatus .PENDING
@@ -88,19 +97,16 @@ def test_ray_job_yaml_generation_full():
88
97
def test_ray_job_yaml_generation_minimal ():
89
98
"""Test RayJob YAML generation with minimal required fields."""
90
99
spec = RayJobSpec (entrypoint = "python minimal_job.py" )
91
-
92
- job = RayJob (
93
- metadata = {"name" : "minimal-job" },
94
- spec = spec
95
- )
96
-
100
+
101
+ job = RayJob (metadata = {"name" : "minimal-job" }, spec = spec )
102
+
97
103
yaml_dict = job .to_dict ()
98
-
104
+
99
105
# Verify structure
100
106
assert yaml_dict ["apiVersion" ] == "ray.io/v1"
101
107
assert yaml_dict ["kind" ] == "RayJob"
102
108
assert yaml_dict ["metadata" ]["name" ] == "minimal-job"
103
-
109
+
104
110
# Verify spec has only required field and defaults to None for others
105
111
spec_dict = yaml_dict ["spec" ]
106
112
assert spec_dict ["entrypoint" ] == "python minimal_job.py"
@@ -113,7 +119,7 @@ def test_ray_job_yaml_generation_minimal():
113
119
assert spec_dict ["entrypoint_resources" ] is None
114
120
assert spec_dict ["cluster_name" ] is None
115
121
assert spec_dict ["cluster_namespace" ] is None
116
-
122
+
117
123
# Verify default status
118
124
status_dict = yaml_dict ["status" ]
119
125
assert status_dict ["status" ] == RayJobStatus .PENDING
@@ -123,9 +129,9 @@ def test_ray_job_yaml_with_existing_status():
123
129
"""Test RayJob YAML generation when status is pre-populated."""
124
130
spec = RayJobSpec (
125
131
entrypoint = "python running_job.py" ,
126
- status = RayJobStatus .PENDING # This should be overridden by job.status
132
+ status = RayJobStatus .PENDING , # This should be overridden by job.status
127
133
)
128
-
134
+
129
135
# Simulate status from Kubernetes controller
130
136
existing_status = {
131
137
"status" : "RUNNING" ,
@@ -135,18 +141,18 @@ def test_ray_job_yaml_with_existing_status():
135
141
"driver_info" : {
136
142
"id" : "driver-abc123" ,
137
143
"node_ip_address" : "10.244.1.5" ,
138
- "pid" : "12345"
139
- }
144
+ "pid" : "12345" ,
145
+ },
140
146
}
141
-
147
+
142
148
job = RayJob (
143
149
metadata = {"name" : "status-test-job" , "namespace" : "test-ns" },
144
150
spec = spec ,
145
- status = existing_status
151
+ status = existing_status ,
146
152
)
147
-
153
+
148
154
yaml_dict = job .to_dict ()
149
-
155
+
150
156
# Should use existing status, not spec status
151
157
assert yaml_dict ["status" ] == existing_status
152
158
assert yaml_dict ["status" ]["status" ] == "RUNNING"
@@ -163,36 +169,38 @@ def test_ray_job_yaml_with_complex_runtime_env():
163
169
"env_vars" : {
164
170
"CUDA_VISIBLE_DEVICES" : "0,1" ,
165
171
"PYTHONPATH" : "/opt/ml/code" ,
166
- "HF_HOME" : "/tmp/huggingface"
172
+ "HF_HOME" : "/tmp/huggingface" ,
167
173
},
168
174
"working_dir" : "./training_code" ,
169
- "py_modules" : ["utils" , "models" ]
175
+ "py_modules" : ["utils" , "models" ],
170
176
}
171
-
177
+
172
178
spec = RayJobSpec (
173
179
entrypoint = "python train_model.py --epochs 100" ,
174
180
runtime_env = complex_runtime_env ,
175
181
entrypoint_num_gpus = 2.0 ,
176
- entrypoint_memory = 8192
182
+ entrypoint_memory = 8192 ,
177
183
)
178
-
184
+
179
185
job = RayJob (
180
- metadata = {"name" : "complex-env-job" , "namespace" : "ml-training" },
181
- spec = spec
186
+ metadata = {"name" : "complex-env-job" , "namespace" : "ml-training" }, spec = spec
182
187
)
183
-
188
+
184
189
yaml_dict = job .to_dict ()
185
-
190
+
186
191
# Verify complex runtime_env is preserved exactly
187
192
spec_runtime_env = yaml_dict ["spec" ]["runtime_env" ]
188
193
assert spec_runtime_env ["pip" ] == ["torch==1.13.0" , "transformers" , "datasets" ]
189
- assert spec_runtime_env ["conda" ]["dependencies" ] == ["python=3.9" , "cudatoolkit=11.8" ]
194
+ assert spec_runtime_env ["conda" ]["dependencies" ] == [
195
+ "python=3.9" ,
196
+ "cudatoolkit=11.8" ,
197
+ ]
190
198
assert spec_runtime_env ["env_vars" ]["CUDA_VISIBLE_DEVICES" ] == "0,1"
191
199
assert spec_runtime_env ["env_vars" ]["PYTHONPATH" ] == "/opt/ml/code"
192
200
assert spec_runtime_env ["env_vars" ]["HF_HOME" ] == "/tmp/huggingface"
193
201
assert spec_runtime_env ["working_dir" ] == "./training_code"
194
202
assert spec_runtime_env ["py_modules" ] == ["utils" , "models" ]
195
-
203
+
196
204
# Verify other fields
197
205
assert yaml_dict ["spec" ]["entrypoint" ] == "python train_model.py --epochs 100"
198
206
assert yaml_dict ["spec" ]["entrypoint_num_gpus" ] == 2.0
@@ -203,23 +211,19 @@ def test_ray_job_yaml_different_statuses():
203
211
"""Test RayJob YAML generation with different status values."""
204
212
statuses_to_test = [
205
213
RayJobStatus .PENDING ,
206
- RayJobStatus .RUNNING ,
214
+ RayJobStatus .RUNNING ,
207
215
RayJobStatus .SUCCEEDED ,
208
216
RayJobStatus .FAILED ,
209
- RayJobStatus .STOPPED
217
+ RayJobStatus .STOPPED ,
210
218
]
211
-
219
+
212
220
for status in statuses_to_test :
213
- spec = RayJobSpec (
214
- entrypoint = f"python job_{ status .lower ()} .py" ,
215
- status = status
216
- )
217
-
221
+ spec = RayJobSpec (entrypoint = f"python job_{ status .lower ()} .py" , status = status )
222
+
218
223
job = RayJob (
219
- metadata = {"name" : f"job-{ status .lower ()} " , "namespace" : "test" },
220
- spec = spec
224
+ metadata = {"name" : f"job-{ status .lower ()} " , "namespace" : "test" }, spec = spec
221
225
)
222
-
226
+
223
227
yaml_dict = job .to_dict ()
224
228
assert yaml_dict ["status" ]["status" ] == status
225
- assert yaml_dict ["spec" ]["entrypoint" ] == f"python job_{ status .lower ()} .py"
229
+ assert yaml_dict ["spec" ]["entrypoint" ] == f"python job_{ status .lower ()} .py"
0 commit comments