From 1ba76b6a531c1caab67b198a297502b58007bd67 Mon Sep 17 00:00:00 2001 From: Barry Jay Sly-Delgado Date: Wed, 21 Aug 2024 09:11:57 -0700 Subject: [PATCH] vine status merlin --- merlin/common/vine_tasks.py | 5 ++- merlin/examples/taskvine/hello_samples.yaml | 8 +++-- merlin/study/vineadapter.py | 40 +++++++++++++++++---- 3 files changed, 44 insertions(+), 9 deletions(-) diff --git a/merlin/common/vine_tasks.py b/merlin/common/vine_tasks.py index c3a01c2a..36795457 100644 --- a/merlin/common/vine_tasks.py +++ b/merlin/common/vine_tasks.py @@ -638,7 +638,10 @@ def expand_tasks_with_samples( # pylint: disable=R0913,R0914 next_index.min, ).set_manager(steps[0].get_task_manager()) - # TODO VINE local execution option + # TODO VINE possible local execution option: + # there does not exsist the functionallity + # to schedule to the manager remotely so a group + # of tasks needs to retuned together. #if self.request.is_eager: # sig.delay() if 1: diff --git a/merlin/examples/taskvine/hello_samples.yaml b/merlin/examples/taskvine/hello_samples.yaml index a2b7c815..c74eed28 100644 --- a/merlin/examples/taskvine/hello_samples.yaml +++ b/merlin/examples/taskvine/hello_samples.yaml @@ -23,16 +23,20 @@ study: cmd: print("Hurrah, we did it!") depends: [step_1_*] shell: /usr/bin/env python3 + + +#batch: +# type: slurm merlin: resources: task_server: taskvine workers: default: - manager: merlin_test_manager + manager: hello_samples_manager steps: [step_1, step_2] managers: - merlin_test_manager: + hello_samples_manager: samples: generate: cmd: python3 $(SPECROOT)/make_samples.py --filepath=$(MERLIN_INFO)/samples.csv --number=$(N_SAMPLES) diff --git a/merlin/study/vineadapter.py b/merlin/study/vineadapter.py index 5c6c189e..9eda8100 100644 --- a/merlin/study/vineadapter.py +++ b/merlin/study/vineadapter.py @@ -140,20 +140,48 @@ def query_taskvine_study(spec: MerlinSpec): :param spec: A MerlinSpec object representing our study """ + study_info = {"waiting":0, + "running":0, + "complete":0, + "workers":0, + "cores":0, + "cores_inuse":0, + "memory":0, + "memory_inuse":0, + "gpus":0, + "gpus_inuse":0} result = subprocess.run(["vine_status"], capture_output=True) - # PROJECT HOST PORT WAITING RUNNING COMPLETE WORKERS - - study_info = {"waiting":0, "running":0, "complete":0, "workers":0} + # MANAGER HOST PORT WAITING RUNNING COMPLETE WORKERS lines = result.stdout.splitlines() for line in lines[1:]: line = line.decode("utf-8") - manager, host, port, waiting, running, complete, workers = line.split(maxspit=6) - if manager in spec.merlin["resources"]["managers"]: + manager, host, port, waiting, running, complete, workers = line.split(maxsplit=6) + #if manager in spec.merlin["resources"]["managers"]: + if 1: study_info["waiting"] += int(waiting) study_info["running"] += int(running) study_info["complete"] += int(complete) study_info["workers"] += int(workers) - print(spec.merlin["resources"]["managers"]) + result = subprocess.run(["vine_status", "-R"], capture_output=True) + # MANAGER CORES INUSE MEM(GB) INUSE GPUS INUSE + lines = result.stdout.splitlines() + for line in lines[1:]: + line = line.decode("utf-8") + manager, cores, cores_inuse, memory, memory_inuse, gpus, gpus_inuse = line.split(maxsplit=6) + #if manager in spec.merlin["resources"]["managers"]: + if 1: + study_info["cores"] += int(cores) + study_info["cores_inuse"] += int(cores_inuse) + study_info["memory"] += int(memory) + study_info["memory_inuse"] += int(memory_inuse) + study_info["gpus"] += int(gpus) + study_info["gpus_inuse"] += int(gpus_inuse) + headers = list(study_info.keys()) + values = list(study_info.values()) + print(headers, values) + print(tabulate(values, headers=headers)) + print(study_info) + def get_running_managers(celery_app_name: str, test_mode: bool = False) -> List[str]: