@@ -271,5 +271,100 @@ var _ = Describe("TensorFusionConnection Controller", func() {
271
271
connection .Status .ConnectionURL == "native+192.168.1.2+8081+test-worker-2-0"
272
272
}, time .Second * 5 , time .Millisecond * 100 ).Should (BeTrue ())
273
273
})
274
+
275
+ It ("should update status to WorkerPending when worker selection fails" , func () {
276
+ By ("Creating a TensorFusionWorkload without worker status" )
277
+
278
+ // Create a workload with no workers (empty WorkerStatuses)
279
+ failWorkloadName := "test-workload-no-workers"
280
+ failWorkloadNamespacedName := types.NamespacedName {
281
+ Name : failWorkloadName ,
282
+ Namespace : "default" ,
283
+ }
284
+
285
+ failWorkload := & tfv1.TensorFusionWorkload {
286
+ ObjectMeta : metav1.ObjectMeta {
287
+ Name : failWorkloadName ,
288
+ Namespace : "default" ,
289
+ },
290
+ Spec : tfv1.TensorFusionWorkloadSpec {
291
+ PoolName : "mock-empty" ,
292
+ Resources : tfv1.Resources {
293
+ Requests : tfv1.Resource {
294
+ Tflops : resource .MustParse ("1" ),
295
+ Vram : resource .MustParse ("1Gi" ),
296
+ },
297
+ Limits : tfv1.Resource {
298
+ Tflops : resource .MustParse ("1" ),
299
+ Vram : resource .MustParse ("1Gi" ),
300
+ },
301
+ },
302
+ },
303
+ Status : tfv1.TensorFusionWorkloadStatus {
304
+ Replicas : 0 ,
305
+ ReadyReplicas : 0 ,
306
+ // Empty WorkerStatuses to force selection failure
307
+ WorkerStatuses : []tfv1.WorkerStatus {},
308
+ },
309
+ }
310
+ Expect (k8sClient .Create (ctx , failWorkload )).To (Succeed ())
311
+ // Update status
312
+ Expect (k8sClient .Status ().Update (ctx , failWorkload )).To (Succeed ())
313
+
314
+ // Verify workload was created properly
315
+ createdWorkload := & tfv1.TensorFusionWorkload {}
316
+ Eventually (func () bool {
317
+ if err := k8sClient .Get (ctx , failWorkloadNamespacedName , createdWorkload ); err != nil {
318
+ return false
319
+ }
320
+ return len (createdWorkload .Status .WorkerStatuses ) == 0
321
+ }, time .Second * 5 , time .Millisecond * 100 ).Should (BeTrue ())
322
+
323
+ By ("Creating a connection to the workload with no workers" )
324
+ failConnectionName := "test-connection-fail"
325
+ failConnectionNamespacedName := types.NamespacedName {
326
+ Name : failConnectionName ,
327
+ Namespace : "default" ,
328
+ }
329
+
330
+ failConnection := & tfv1.TensorFusionConnection {
331
+ ObjectMeta : metav1.ObjectMeta {
332
+ Name : failConnectionName ,
333
+ Namespace : "default" ,
334
+ Labels : map [string ]string {
335
+ constants .WorkloadKey : failWorkloadName ,
336
+ },
337
+ },
338
+ Spec : tfv1.TensorFusionConnectionSpec {
339
+ WorkloadName : failWorkloadName ,
340
+ },
341
+ }
342
+ Expect (k8sClient .Create (ctx , failConnection )).To (Succeed ())
343
+
344
+ By ("Reconciling the connection to trigger worker selection failure" )
345
+ controllerReconciler := & TensorFusionConnectionReconciler {
346
+ Client : k8sClient ,
347
+ Scheme : k8sClient .Scheme (),
348
+ Recorder : record .NewFakeRecorder (10 ),
349
+ }
350
+
351
+ _ , err := controllerReconciler .Reconcile (ctx , reconcile.Request {
352
+ NamespacedName : failConnectionNamespacedName ,
353
+ })
354
+ // We expect an error since worker selection should fail
355
+ Expect (err ).To (HaveOccurred ())
356
+
357
+ By ("Verifying the connection status is updated to WorkerPending" )
358
+ Eventually (func () bool {
359
+ if err := k8sClient .Get (ctx , failConnectionNamespacedName , failConnection ); err != nil {
360
+ return false
361
+ }
362
+ return failConnection .Status .Phase == tfv1 .WorkerPending
363
+ }, time .Second * 5 , time .Millisecond * 100 ).Should (BeTrue ())
364
+
365
+ By ("Cleaning up test resources" )
366
+ Expect (k8sClient .Delete (ctx , failConnection )).To (Succeed ())
367
+ Expect (k8sClient .Delete (ctx , failWorkload )).To (Succeed ())
368
+ })
274
369
})
275
370
})
0 commit comments