@@ -541,7 +541,7 @@ def _marked_select_instance(self, cget_status: _ClusterGetStatus) -> bool:
541
541
if cget_status .marked_running_my_anywhere :
542
542
self .log (
543
543
f"c{ cget_status .instance_num } : tests marked with my mark '{ cget_status .mark } ' "
544
- "already running on other cluster instance, cannot run "
544
+ "already running on other cluster instance, cannot start "
545
545
)
546
546
return False
547
547
@@ -832,9 +832,7 @@ def get_cluster_instance( # noqa: C901
832
832
cget_status .instance_dir .mkdir (exist_ok = True )
833
833
834
834
# Cleanup cluster instance where attempt to start cluster failed repeatedly
835
- if status_files .get_cluster_dead_file (
836
- instance_num = cget_status .instance_num
837
- ).exists ():
835
+ if status_files .get_cluster_dead_file (instance_num = instance_num ).exists ():
838
836
self ._cleanup_dead_clusters (cget_status )
839
837
continue
840
838
@@ -845,19 +843,30 @@ def get_cluster_instance( # noqa: C901
845
843
846
844
# Are there tests already running on this cluster instance?
847
845
cget_status .started_tests_sfiles = status_files .list_test_running_files (
848
- instance_num = cget_status . instance_num
846
+ instance_num = instance_num
849
847
)
850
848
851
849
# "marked tests" = group of tests marked with my mark
852
850
cget_status .marked_ready_sfiles = status_files .list_curr_mark_files (
853
- instance_num = cget_status . instance_num , mark = mark
851
+ instance_num = instance_num , mark = mark
854
852
)
855
853
856
854
# If marked tests are already running, update their status
857
855
self ._update_marked_tests (
858
856
marked_tests_cache = marked_tests_cache , cget_status = cget_status
859
857
)
860
858
859
+ # If there would be more tests running on this cluster instance than allowed,
860
+ # we need to wait.
861
+ if (
862
+ self .num_of_instances > 1
863
+ and (tnum := len (cget_status .started_tests_sfiles ))
864
+ >= configuration .MAX_TESTS_PER_CLUSTER
865
+ ):
866
+ cget_status .sleep_delay = 2
867
+ self .log (f"c{ instance_num } : { tnum } tests are already running, cannot start" )
868
+ continue
869
+
861
870
# Does the cluster instance needs respin to continue?
862
871
# Cache the result as the check itself can be expensive.
863
872
cget_status .cluster_needs_respin = self ._cluster_needs_respin (instance_num )
0 commit comments