Skip to content

Commit

Permalink
Add --also-remote option to verdi process dump
Browse files Browse the repository at this point in the history
Tries to open a connection to the remote Computer using the transport of
the CalcJobNode, and `get`s the files and folders which were not
originally dumped, either as they weren't part of the CalcJobNode's
repository or retrieved.
  • Loading branch information
GeigerJ2 committed Oct 3, 2024
1 parent c7c289d commit 469ecfa
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 3 deletions.
22 changes: 20 additions & 2 deletions src/aiida/cmdline/commands/cmd_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -576,6 +576,13 @@ def process_repair(manager, broker, dry_run):
show_default=True,
help='Include extras in the `.aiida_node_metadata.yaml` written for every `ProcessNode`.',
)
@click.option(
'--also-remote',
is_flag=True,
default=False,
show_default=True,
help='If true, try to obtain also intermediate files on the remote computer that were not initially retrieved.',
)
@click.option(
'-f',
'--flat',
Expand All @@ -591,19 +598,21 @@ def process_dump(
include_outputs,
include_attributes,
include_extras,
also_remote,
flat,
) -> None:
"""Dump process input and output files to disk.
Child calculations/workflows (also called `CalcJob`s/`CalcFunction`s and `WorkChain`s/`WorkFunction`s in AiiDA
jargon) run by the parent workflow are contained in the directory tree as sub-folders and are sorted by their
creation time. The directory tree thus mirrors the logical execution of the workflow, which can also be queried by
creation time. The directory tree thus mirrors the logical execution of the workflow, which can also be queried by
running `verdi process status <pk>` on the command line.
By default, input and output files of each calculation can be found in the corresponding "inputs" and
"outputs" directories (the former also contains the hidden ".aiida" folder with machine-readable job execution
settings). Additional input and output files (depending on the type of calculation) are placed in the "node_inputs"
and "node_outputs", respectively.
and "node_outputs", respectively. Using the `--also-remote` flag, additional files of the `remote_workdir` on the
Computer where the CalcJobs were run can be retrieved (if they still exist on the remote).
Lastly, every folder also contains a hidden, human-readable `.aiida_node_metadata.yaml` file with the relevant AiiDA
node data for further inspection.
Expand All @@ -618,8 +627,17 @@ def process_dump(
include_extras=include_extras,
overwrite=overwrite,
flat=flat,
also_remote=also_remote,
)

if also_remote:
echo.echo_report(
'`--also-remote` set to True. Will try to retrieve additional files from the `workdir` of the remote Computer.' # noqa: E501
)
echo.echo_report(
'If files are non-existent, they will be skipped silently. Check if the output files are what you expect.'
)

try:
dump_path = process_dumper.dump(process_node=process, output_path=path)
except FileExistsError:
Expand Down
55 changes: 54 additions & 1 deletion src/aiida/tools/dumping/processes.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

from __future__ import annotations

import itertools as it
import logging
from pathlib import Path
from types import SimpleNamespace
Expand All @@ -18,7 +19,7 @@
import yaml

from aiida.common import LinkType
from aiida.common.exceptions import NotExistentAttributeError
from aiida.common.exceptions import ConfigurationError, NotExistent, NotExistentAttributeError
from aiida.orm import (
CalcFunctionNode,
CalcJobNode,
Expand All @@ -30,6 +31,7 @@
WorkFunctionNode,
)
from aiida.orm.utils import LinkTriple
from aiida.orm.utils.remote import get_calcjob_remote_paths

LOGGER = logging.getLogger(__name__)

Expand All @@ -42,13 +44,15 @@ def __init__(
include_attributes: bool = True,
include_extras: bool = True,
overwrite: bool = False,
also_remote: bool = False,
flat: bool = False,
) -> None:
self.include_inputs = include_inputs
self.include_outputs = include_outputs
self.include_attributes = include_attributes
self.include_extras = include_extras
self.overwrite = overwrite
self.also_remote = also_remote
self.flat = flat

@staticmethod
Expand Down Expand Up @@ -285,6 +289,55 @@ def _dump_calculation(
link_triples=output_links,
)

# Additional remote file retrieval should only apply for CalcJobNodes, not CalcFunctionNodes
if self.also_remote and isinstance(calculation_node, CalcJobNode):
self._dump_calculation_remote_files(calcjob_node=calculation_node, output_path=output_path)

def _dump_calculation_remote_files(self, calcjob_node: CalcJobNode, output_path: Path) -> None:
"""Dump the additional remote files attached to a `CalculationNode` to a specified output path.
:param calculation_node: The `CalculationNode` to be dumped.
:param output_path: The path where the files will be dumped.
:param io_dump_paths: Subdirectories created for the `CalculationNode`.
Default: ['inputs', 'outputs', 'node_inputs', 'node_outputs']
"""

remote_workdir = calcjob_node.get_remote_workdir()
if remote_workdir is None:
raise NotExistent(f"CalcJobNode <{calcjob_node.pk}> doesn't have a `remote_workdir`.")

# Exclude the objects that were already dumped, as they were either originally retrieved via `retrieve_list`
# or are already part of the file repository of the CalcJobNode, e.g. the `aiida.in` and `_aiidasubmit.sh`
retrieve_list = list(calcjob_node.get_retrieve_list()) # type: ignore[arg-type]
repository_list = calcjob_node.base.repository.list_object_names()
exclude_list = retrieve_list + repository_list

# Obtain a flattened list of the `RemoteData` objects.
# The computer UUIDs that are the keys of the returned dictionary of `get_calcjob_remote_paths` aren't
# needed, as we only run the function for a single CalcJobNode using its associated transport to get the files
calcjob_remote_paths = get_calcjob_remote_paths(pks=[calcjob_node.pk]) # type: ignore[list-item]
calcjob_remote_datas = list(it.chain.from_iterable(calcjob_remote_paths.values())) # type: ignore[union-attr]

# Unlike for the `retrieve_files_from_list` in `execmanager.py`, we only dump the files to disk, rather than
# also storing them in the repository via `FolderData`
try:
with calcjob_node.get_transport() as transport:
for calcjob_remote_data in calcjob_remote_datas:
# Obtain all objects from each of the RemoteDatas associated with the CalcJobNode
# (could this even be more than one?)
retrieve_objects = [_ for _ in calcjob_remote_data.listdir() if _ not in exclude_list]
remote_paths = [(Path(remote_workdir) / _).resolve() for _ in retrieve_objects]
local_paths = [(output_path / 'remote_files' / _).resolve() for _ in retrieve_objects]

# Transport.get() works for files and folders, so we don't need to make a distinction
for rem, loc in zip(remote_paths, local_paths):
transport.get(str(rem), str(loc), ignore_nonexisting=True)

# Getting the transport fails, propagate exception outwards
# (could just remove the except, but being explicit might make it clearer here)
except ConfigurationError:
raise

def _dump_calculation_io(self, parent_path: Path, link_triples: LinkManager | List[LinkTriple]):
"""Small helper function to dump linked input/output nodes of a `CalculationNode`.
Expand Down

0 comments on commit 469ecfa

Please sign in to comment.