Skip to content

Commit 0e3d33c

Browse files
authored
Merge pull request #694 from adi611/adi611-patch-testpsij-1
Draft: Adding new worker which uses PSI/J to run tasks
2 parents 0245cdc + 2c695d5 commit 0e3d33c

File tree

7 files changed

+308
-1
lines changed

7 files changed

+308
-1
lines changed

.github/workflows/testpsijlocal.yml

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
name: PSI/J-Local
2+
3+
on:
4+
push:
5+
branches:
6+
- master
7+
pull_request:
8+
9+
concurrency:
10+
group: ${{ github.workflow }}-${{ github.ref }}
11+
cancel-in-progress: true
12+
13+
permissions:
14+
contents: read
15+
16+
jobs:
17+
test:
18+
strategy:
19+
matrix:
20+
os: [ubuntu-latest, macos-latest]
21+
python-version: ['3.11']
22+
fail-fast: false
23+
runs-on: ${{ matrix.os }}
24+
25+
steps:
26+
- name: Checkout repository
27+
uses: actions/checkout@v4
28+
with:
29+
repository: ${{ github.repository }}
30+
31+
- name: Setup Python version ${{ matrix.python-version }}
32+
uses: actions/setup-python@v4
33+
with:
34+
python-version: ${{ matrix.python-version }}
35+
36+
- name: Install dependencies for PSI/J
37+
run: |
38+
pip install -e ".[test, psij]"
39+
40+
- name: Run tests for PSI/J
41+
run: |
42+
pytest --color=yes -vs --psij=local -n auto pydra/engine --cov pydra --cov-config .coveragerc --cov-report xml:cov.xml
43+
44+
- name: Upload to codecov
45+
run: codecov -f cov.xml -F unittests -e GITHUB_WORKFLOW

.github/workflows/testpsijslurm.yml

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
name: PSI/J-SLURM
2+
3+
on:
4+
push:
5+
branches:
6+
- master
7+
pull_request:
8+
9+
jobs:
10+
build:
11+
strategy:
12+
matrix:
13+
python-version: [3.11.5]
14+
fail-fast: false
15+
runs-on: ubuntu-latest
16+
env:
17+
DOCKER_IMAGE: adi611/docker-centos7-slurm:23.02.1
18+
19+
steps:
20+
- name: Disable etelemetry
21+
run: echo "NO_ET=TRUE" >> $GITHUB_ENV
22+
- uses: actions/checkout@v4
23+
- name: Pull docker image
24+
run: |
25+
docker pull $DOCKER_IMAGE
26+
# Have image running in the background
27+
docker run `bash <(curl -s https://codecov.io/env)` -itd -h slurmctl --cap-add sys_admin -d --name slurm -v `pwd`:/pydra -e NO_ET=$NO_ET $DOCKER_IMAGE
28+
- name: Display previous jobs with sacct
29+
run: |
30+
echo "Allowing ports/daemons time to start" && sleep 10
31+
docker exec slurm bash -c "sacctmgr -i add account none,test Cluster=linux Description='none' Organization='none'"
32+
docker exec slurm bash -c "sacct && sinfo && squeue" 2&> /dev/null
33+
if [ $? -ne 0 ]; then
34+
echo "Slurm docker image error"
35+
exit 1
36+
fi
37+
- name: Setup Python
38+
run: |
39+
docker exec slurm bash -c "echo $NO_ET"
40+
docker exec slurm bash -c "ls -la && echo list top level dir"
41+
docker exec slurm bash -c "ls -la /pydra && echo list pydra dir"
42+
if [[ "${{ matrix.python-version }}" == "3.11.5" ]]; then
43+
docker exec slurm bash -c "CONFIGURE_OPTS=\"-with-openssl=/opt/openssl\" pyenv install -v 3.11.5"
44+
fi
45+
docker exec slurm bash -c "pyenv global ${{ matrix.python-version }}"
46+
docker exec slurm bash -c "pip install --upgrade pip && pip install -e /pydra[test,psij] && python -c 'import pydra; print(pydra.__version__)'"
47+
- name: Run pytest
48+
run: |
49+
docker exec slurm bash -c "pytest --color=yes -vs -n auto --psij=slurm --cov pydra --cov-config /pydra/.coveragerc --cov-report xml:/pydra/cov.xml --doctest-modules /pydra/pydra/ -k 'not test_audit_prov and not test_audit_prov_messdir_1 and not test_audit_prov_messdir_2 and not test_audit_prov_wf and not test_audit_all'"
50+
- name: Upload to codecov
51+
run: |
52+
docker exec slurm bash -c "pip install urllib3==1.26.6"
53+
docker exec slurm bash -c "codecov --root /pydra -f /pydra/cov.xml -F unittests"
54+
docker rm -f slurm

pydra/conftest.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,12 @@
77

88
def pytest_addoption(parser):
99
parser.addoption("--dask", action="store_true", help="run all combinations")
10+
parser.addoption(
11+
"--psij",
12+
action="store",
13+
help="run with psij subtype plugin",
14+
choices=["local", "slurm"],
15+
)
1016

1117

1218
def pytest_generate_tests(metafunc):
@@ -21,6 +27,16 @@ def pytest_generate_tests(metafunc):
2127
except ValueError:
2228
# Called as --pyargs, so --dask isn't available
2329
pass
30+
try:
31+
if metafunc.config.getoption("psij"):
32+
Plugins.append("psij-" + metafunc.config.getoption("psij"))
33+
if (
34+
bool(shutil.which("sbatch"))
35+
and metafunc.config.getoption("psij") == "slurm"
36+
):
37+
Plugins.remove("slurm")
38+
except ValueError:
39+
pass
2440
metafunc.parametrize("plugin_dask_opt", Plugins)
2541

2642
if "plugin" in metafunc.fixturenames:
@@ -35,6 +51,16 @@ def pytest_generate_tests(metafunc):
3551
Plugins = ["slurm"]
3652
else:
3753
Plugins = ["cf"]
54+
try:
55+
if metafunc.config.getoption("psij"):
56+
Plugins.append("psij-" + metafunc.config.getoption("psij"))
57+
if (
58+
bool(shutil.which("sbatch"))
59+
and metafunc.config.getoption("psij") == "slurm"
60+
):
61+
Plugins.remove("slurm")
62+
except ValueError:
63+
pass
3864
metafunc.parametrize("plugin", Plugins)
3965

4066

pydra/engine/run_pickled.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import pickle
2+
import sys
3+
from pydra.engine.helpers import load_and_run
4+
5+
6+
def run_pickled(*file_paths, rerun=False):
7+
loaded_objects = []
8+
9+
for file_path in file_paths:
10+
with open(file_path, "rb") as file:
11+
loaded_objects.append(pickle.load(file))
12+
13+
if len(loaded_objects) == 1:
14+
result = loaded_objects[0](rerun=rerun)
15+
elif len(loaded_objects) == 2:
16+
result = load_and_run(loaded_objects[0], loaded_objects[1], rerun=rerun)
17+
else:
18+
raise ValueError("Unsupported number of loaded objects")
19+
20+
print(f"Result: {result}")
21+
22+
23+
if __name__ == "__main__":
24+
rerun = False # Default value for rerun
25+
file_paths = sys.argv[1:]
26+
27+
if "--rerun" in file_paths:
28+
rerun = True
29+
file_paths.remove("--rerun")
30+
31+
run_pickled(*file_paths, rerun=rerun)

pydra/engine/tests/test_workflow.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4092,7 +4092,7 @@ def test_wf_lzoutall_st_2a(plugin, tmpdir):
40924092
wf.plugin = plugin
40934093
wf.cache_dir = tmpdir
40944094

4095-
with Submitter(plugin="cf") as sub:
4095+
with Submitter(plugin=plugin) as sub:
40964096
sub(wf)
40974097

40984098
assert wf.output_dir.exists()

pydra/engine/workers.py

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -892,10 +892,158 @@ def close(self):
892892
pass
893893

894894

895+
class PsijWorker(Worker):
896+
"""A worker to execute tasks using PSI/J."""
897+
898+
def __init__(self, subtype, **kwargs):
899+
"""
900+
Initialize PsijWorker.
901+
902+
Parameters
903+
----------
904+
subtype : str
905+
Scheduler for PSI/J.
906+
"""
907+
try:
908+
import psij
909+
except ImportError:
910+
logger.critical("Please install psij.")
911+
raise
912+
logger.debug("Initialize PsijWorker")
913+
self.psij = psij
914+
915+
# Check if the provided subtype is valid
916+
valid_subtypes = ["local", "slurm"]
917+
if subtype not in valid_subtypes:
918+
raise ValueError(
919+
f"Invalid 'subtype' provided. Available options: {', '.join(valid_subtypes)}"
920+
)
921+
922+
self.subtype = subtype
923+
924+
def run_el(self, interface, rerun=False, **kwargs):
925+
"""Run a task."""
926+
return self.exec_psij(interface, rerun=rerun)
927+
928+
def make_spec(self, cmd=None, arg=None):
929+
"""
930+
Create a PSI/J job specification.
931+
932+
Parameters
933+
----------
934+
cmd : str, optional
935+
Executable command. Defaults to None.
936+
arg : list, optional
937+
List of arguments. Defaults to None.
938+
939+
Returns
940+
-------
941+
psij.JobSpec
942+
PSI/J job specification.
943+
"""
944+
spec = self.psij.JobSpec()
945+
spec.executable = cmd
946+
spec.arguments = arg
947+
948+
return spec
949+
950+
def make_job(self, spec, attributes):
951+
"""
952+
Create a PSI/J job.
953+
954+
Parameters
955+
----------
956+
spec : psij.JobSpec
957+
PSI/J job specification.
958+
attributes : any
959+
Job attributes.
960+
961+
Returns
962+
-------
963+
psij.Job
964+
PSI/J job.
965+
"""
966+
job = self.psij.Job()
967+
job.spec = spec
968+
return job
969+
970+
async def exec_psij(self, runnable, rerun=False):
971+
"""
972+
Run a task (coroutine wrapper).
973+
974+
Raises
975+
------
976+
Exception
977+
If stderr is not empty.
978+
979+
Returns
980+
-------
981+
None
982+
"""
983+
import pickle
984+
from pathlib import Path
985+
986+
jex = self.psij.JobExecutor.get_instance(self.subtype)
987+
absolute_path = Path(__file__).parent
988+
989+
if isinstance(runnable, TaskBase):
990+
cache_dir = runnable.cache_dir
991+
file_path = cache_dir / "runnable_function.pkl"
992+
with open(file_path, "wb") as file:
993+
pickle.dump(runnable._run, file)
994+
func_path = absolute_path / "run_pickled.py"
995+
spec = self.make_spec("python", [func_path, file_path])
996+
else: # it could be tuple that includes pickle files with tasks and inputs
997+
cache_dir = runnable[-1].cache_dir
998+
file_path_1 = cache_dir / "taskmain.pkl"
999+
file_path_2 = cache_dir / "ind.pkl"
1000+
ind, task_main_pkl, task_orig = runnable
1001+
with open(file_path_1, "wb") as file:
1002+
pickle.dump(task_main_pkl, file)
1003+
with open(file_path_2, "wb") as file:
1004+
pickle.dump(ind, file)
1005+
func_path = absolute_path / "run_pickled.py"
1006+
spec = self.make_spec(
1007+
"python",
1008+
[
1009+
func_path,
1010+
file_path_1,
1011+
file_path_2,
1012+
],
1013+
)
1014+
1015+
if rerun:
1016+
spec.arguments.append("--rerun")
1017+
1018+
spec.stdout_path = cache_dir / "demo.stdout"
1019+
spec.stderr_path = cache_dir / "demo.stderr"
1020+
1021+
job = self.make_job(spec, None)
1022+
jex.submit(job)
1023+
job.wait()
1024+
1025+
if spec.stderr_path.stat().st_size > 0:
1026+
with open(spec.stderr_path, "r") as stderr_file:
1027+
stderr_contents = stderr_file.read()
1028+
raise Exception(
1029+
f"stderr_path '{spec.stderr_path}' is not empty. Contents:\n{stderr_contents}"
1030+
)
1031+
1032+
return
1033+
1034+
def close(self):
1035+
"""Finalize the internal pool of tasks."""
1036+
pass
1037+
1038+
8951039
WORKERS = {
8961040
"serial": SerialWorker,
8971041
"cf": ConcurrentFuturesWorker,
8981042
"slurm": SlurmWorker,
8991043
"dask": DaskWorker,
9001044
"sge": SGEWorker,
1045+
**{
1046+
"psij-" + subtype: lambda subtype=subtype: PsijWorker(subtype=subtype)
1047+
for subtype in ["local", "slurm"]
1048+
},
9011049
}

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@ classifiers = [
4848
dynamic = ["version"]
4949

5050
[project.optional-dependencies]
51+
psij = [
52+
"psij-python",
53+
]
5154
dask = [
5255
"dask",
5356
"distributed",

0 commit comments

Comments
 (0)