Skip to content

Commit

Permalink
make cp2k interface more robust towards large input files
Browse files Browse the repository at this point in the history
  • Loading branch information
svandenhaute committed Dec 20, 2024
1 parent 2b295e2 commit 4500e23
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 21 deletions.
64 changes: 44 additions & 20 deletions psiflow/reference/_cp2k.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,15 +139,11 @@ def parse_cp2k_output(
return geometry


# typeguarding for some reason incompatible with WQ
def cp2k_singlepoint_pre(
def _prepare_input(
geometry: Geometry,
cp2k_input_dict: dict = {},
properties: tuple = (),
cp2k_command: str = "",
stdout: str = "",
stderr: str = "",
parsl_resource_specification: Optional[dict] = None,
outputs: list = [],
):
from psiflow.reference._cp2k import (
dict_to_str,
Expand All @@ -160,18 +156,33 @@ def cp2k_singlepoint_pre(
if "forces" in properties:
cp2k_input_dict["force_eval"]["print"] = {"FORCES": {}}
cp2k_input_str = dict_to_str(cp2k_input_dict)
with open(outputs[0], 'w') as f:
f.write(cp2k_input_str)


prepare_input = python_app(_prepare_input, executors=['default_threads'])


# typeguarding for some reason incompatible with WQ
def cp2k_singlepoint_pre(
cp2k_command: str = "",
stdout: str = "",
stderr: str = "",
inputs: list = [],
parsl_resource_specification: Optional[dict] = None,
):
tmp_command = 'mytmpdir=$(mktemp -d 2>/dev/null || mktemp -d -t "mytmpdir")'
cd_command = "cd $mytmpdir"
cp_command = "cp {} cp2k.inp".format(inputs[0].filepath)

# see https://unix.stackexchange.com/questions/30091/fix-or-alternative-for-mktemp-in-os-x
tmp_command = 'mytmpdir=$(mktemp -d 2>/dev/null || mktemp -d -t "mytmpdir");'
cd_command = "cd $mytmpdir;"
write_command = 'echo "{}" > cp2k.inp;'.format(cp2k_input_str)
command_list = [
tmp_command,
cd_command,
write_command,
cp2k_command,
cp_command,
cp2k_command
]
return " ".join(command_list)

return ' && '.join(command_list)


@typeguard.typechecked
Expand Down Expand Up @@ -222,13 +233,26 @@ def _create_apps(self):
app_pre = bash_app(cp2k_singlepoint_pre, executors=[self.executor])
app_post = python_app(cp2k_singlepoint_post, executors=["default_threads"])

self.app_pre = partial(
app_pre,
cp2k_input_dict=self.cp2k_input_dict,
properties=tuple(self.outputs),
cp2k_command=cp2k_command,
parsl_resource_specification=wq_resources,
)
# create wrapped pre app which first parses the input file and writes it to
# disk, then call the actual bash app with the input file as a DataFuture dependency
# This is necessary because for very large structures, the size of the cp2k input
# file is too long to pass as an argument in a command line
def wrapped_app_pre(geometry, stdout: str, stderr: str):
future = prepare_input(
geometry,
cp2k_input_dict=self.cp2k_input_dict,
properties=tuple(self.outputs),
outputs=[psiflow.context().new_file('cp2k_', '.inp')],
)
return app_pre(
cp2k_command=cp2k_command,
stdout=stdout,
stderr=stderr,
inputs=[future.outputs[0]],
parsl_resource_specification=wq_resources,
)

self.app_pre = wrapped_app_pre
self.app_post = partial(
app_post,
properties=tuple(self.outputs),
Expand Down
12 changes: 11 additions & 1 deletion psiflow/reference/reference.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,17 @@ class Reference(Computable):
outputs: tuple
batch_size: ClassVar[int] = 1 # not really used

def compute(self, dataset: Dataset, *outputs: Optional[Union[str, tuple]]):
def compute(
self,
arg: Union[Dataset, Geometry, AppFuture, list],
*outputs: Optional[Union[str, tuple]],
):
if isinstance(arg, Dataset):
dataset = arg
elif isinstance(arg, list):
dataset = Dataset(arg)
elif isinstance(arg, AppFuture) or isinstance(arg, Geometry):
dataset = Dataset([arg])
compute_outputs = compute_dataset(dataset, dataset.length(), self)
if len(outputs) == 0:
outputs_ = tuple(self.outputs)
Expand Down
12 changes: 12 additions & 0 deletions tests/test_reference.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,18 @@ def test_cp2k_failure(context, tmp_path):
assert "ABORT" in log # verify error is captured


def test_cp2k_memory(context, simple_cp2k_input):
reference = CP2K(simple_cp2k_input)
geometry = Geometry.from_data(
numbers=np.ones(4000),
positions=np.random.uniform(0, 20, size=(4000, 3)),
cell=20 * np.eye(3), # box way too large
)
energy, forces = reference.compute(geometry)
energy, forces = energy.result(), forces.result()
assert np.all(np.isnan(energy))


@pytest.mark.filterwarnings("ignore:Original input file not found")
def test_cp2k_timeout(context, simple_cp2k_input):
reference = CP2K(simple_cp2k_input)
Expand Down

0 comments on commit 4500e23

Please sign in to comment.