Skip to content

Commit 20288a4

Browse files
authored
Merge pull request #3034 from IntersectMBO/add_MAX_TESTS_PER_CLUSTER
feat(cluster): enforce max tests per cluster instance
2 parents fc2dda9 + c96151c commit 20288a4

File tree

4 files changed

+23
-6
lines changed

4 files changed

+23
-6
lines changed

.github/regression.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,12 @@ elif [ "$TX_ERA" = "default" ]; then
7070
export TX_ERA=""
7171
fi
7272

73+
# Decrease the number of tests per cluster if we are using the "disk" (LMDB) UTxO backend to avoid
74+
# having too many concurrent readers.
75+
if [ -z "${MAX_TESTS_PER_CLUSTER:-""}" ] && [ "${UTXO_BACKEND:-""}" = "disk" ]; then
76+
export MAX_TESTS_PER_CLUSTER=5
77+
fi
78+
7379
if [ -n "${BOOTSTRAP_DIR:-""}" ]; then
7480
: # don't touch `SCRIPTS_DIRNAME` when running on testnet
7581
elif [ "${CI_BYRON_CLUSTER:-"false"}" != "false" ]; then

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ Test execution can be configured using environment variables.
108108
* `PYTEST_ARGS` – specifies additional arguments for pytest (default: unset).
109109
* `MARKEXPR` – specifies marker expression for pytest (default: unset).
110110
* `TEST_THREADS` – specifies the number of pytest workers (default: 20).
111+
* `MAX_TESTS_PER_CLUSTER` - specifies the maximum number of tests that can be run on a single cluster instance (default: 8).
111112
* `CLUSTERS_COUNT` – number of cluster instances that will be started (default: 9).
112113
* `CLUSTER_ERA` – cluster era for Cardano node – used for selecting the correct cluster start script (default: conway).
113114
* `COMMAND_ERA` – era for cardano-cli commands – can be used for creating Shelley-era (Allegra-era, ...) transactions (default: unset).

cardano_node_tests/cluster_management/cluster_getter.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -541,7 +541,7 @@ def _marked_select_instance(self, cget_status: _ClusterGetStatus) -> bool:
541541
if cget_status.marked_running_my_anywhere:
542542
self.log(
543543
f"c{cget_status.instance_num}: tests marked with my mark '{cget_status.mark}' "
544-
"already running on other cluster instance, cannot run"
544+
"already running on other cluster instance, cannot start"
545545
)
546546
return False
547547

@@ -832,9 +832,7 @@ def get_cluster_instance( # noqa: C901
832832
cget_status.instance_dir.mkdir(exist_ok=True)
833833

834834
# Cleanup cluster instance where attempt to start cluster failed repeatedly
835-
if status_files.get_cluster_dead_file(
836-
instance_num=cget_status.instance_num
837-
).exists():
835+
if status_files.get_cluster_dead_file(instance_num=instance_num).exists():
838836
self._cleanup_dead_clusters(cget_status)
839837
continue
840838

@@ -845,19 +843,30 @@ def get_cluster_instance( # noqa: C901
845843

846844
# Are there tests already running on this cluster instance?
847845
cget_status.started_tests_sfiles = status_files.list_test_running_files(
848-
instance_num=cget_status.instance_num
846+
instance_num=instance_num
849847
)
850848

851849
# "marked tests" = group of tests marked with my mark
852850
cget_status.marked_ready_sfiles = status_files.list_curr_mark_files(
853-
instance_num=cget_status.instance_num, mark=mark
851+
instance_num=instance_num, mark=mark
854852
)
855853

856854
# If marked tests are already running, update their status
857855
self._update_marked_tests(
858856
marked_tests_cache=marked_tests_cache, cget_status=cget_status
859857
)
860858

859+
# If there would be more tests running on this cluster instance than allowed,
860+
# we need to wait.
861+
if (
862+
self.num_of_instances > 1
863+
and (tnum := len(cget_status.started_tests_sfiles))
864+
>= configuration.MAX_TESTS_PER_CLUSTER
865+
):
866+
cget_status.sleep_delay = 2
867+
self.log(f"c{instance_num}: {tnum} tests are already running, cannot start")
868+
continue
869+
861870
# Does the cluster instance needs respin to continue?
862871
# Cache the result as the check itself can be expensive.
863872
cget_status.cluster_needs_respin = self._cluster_needs_respin(instance_num)

cardano_node_tests/utils/configuration.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959

6060
CLUSTERS_COUNT = int(os.environ.get("CLUSTERS_COUNT") or 0)
6161
WORKERS_COUNT = int(os.environ.get("PYTEST_XDIST_WORKER_COUNT") or 1)
62+
MAX_TESTS_PER_CLUSTER = int(os.environ.get("MAX_TESTS_PER_CLUSTER") or 8)
6263
CLUSTERS_COUNT = int(CLUSTERS_COUNT or (min(WORKERS_COUNT, 9)))
6364

6465
DEV_CLUSTER_RUNNING = bool(os.environ.get("DEV_CLUSTER_RUNNING"))

0 commit comments

Comments
 (0)