diff --git a/src/aiida/engine/daemon/execmanager.py b/src/aiida/engine/daemon/execmanager.py index 00412e874e..f7517a0580 100644 --- a/src/aiida/engine/daemon/execmanager.py +++ b/src/aiida/engine/daemon/execmanager.py @@ -331,44 +331,58 @@ def _copy_remote_files(logger, node, computer, transport, remote_copy_list, remo def _copy_local_files(logger, node, transport, inputs, local_copy_list): - """Perform the copy instrctions of the ``local_copy_list``.""" - with TemporaryDirectory() as tmpdir: - dirpath = pathlib.Path(tmpdir) + """Perform the copy instructions of the ``local_copy_list``.""" - # The transport class can only copy files directly from the file system, so the files in the source node's repo - # have to first be copied to a temporary directory on disk. - for uuid, filename, target in local_copy_list: - logger.debug(f'[submission of calculation {node.uuid}] copying local file/folder to {target}') + for uuid, filename, target in local_copy_list: + logger.debug(f'[submission of calculation {node.uuid}] copying local file/folder to {target}') - try: - data_node = load_node(uuid=uuid) - except exceptions.NotExistent: - data_node = _find_data_node(inputs, uuid) if inputs else None + try: + data_node = load_node(uuid=uuid) + except exceptions.NotExistent: + data_node = _find_data_node(inputs, uuid) if inputs else None - if data_node is None: - logger.warning(f'failed to load Node<{uuid}> specified in the `local_copy_list`') - continue + if data_node is None: + logger.warning(f'failed to load Node<{uuid}> specified in the `local_copy_list`') + continue + + # The transport class can only copy files directly from the file system, so the files in the source node's repo + # have to first be copied to a temporary directory on disk. + with TemporaryDirectory() as tmpdir: + dirpath = pathlib.Path(tmpdir) # If no explicit source filename is defined, we assume the top-level directory filename_source = filename or '.' - filename_target = target or '' + filename_target = target or '.' + + file_type_source = data_node.base.repository.get_object(filename_source).file_type + + # The logic below takes care of an edge case where the source is a file but the target is a directory. In + # this case, the v2.5.1 implementation would raise an `IsADirectoryError` exception, because it would try + # to open the directory in the sandbox folder as a file when writing the contents. + if file_type_source == FileType.FILE and target and transport.isdir(target): + raise IsADirectoryError + + # In case the source filename is specified and it is a directory that already exists in the remote, we + # want to avoid nested directories in the target path to replicate the behavior of v2.5.1. This is done by + # setting the target filename to '.', which means the contents of the node will be copied in the top level + # of the temporary directory, whose contents are then copied into the target directory. + if filename and transport.isdir(filename): + filename_target = '.' - # Make the target filepath absolute and create any intermediate directories if they don't yet exist filepath_target = (dirpath / filename_target).resolve().absolute() filepath_target.parent.mkdir(parents=True, exist_ok=True) - if data_node.base.repository.get_object(filename_source).file_type == FileType.DIRECTORY: + if file_type_source == FileType.DIRECTORY: # If the source object is a directory, we copy its entire contents data_node.base.repository.copy_tree(filepath_target, filename_source) + transport.put(f'{dirpath}/*', target or '.', overwrite=True) else: # Otherwise, simply copy the file with filepath_target.open('wb') as handle: with data_node.base.repository.open(filename_source, 'rb') as source: shutil.copyfileobj(source, handle) - - # Now copy the contents of the temporary folder to the remote working directory using the transport - for filepath in dirpath.iterdir(): - transport.put(str(filepath), filepath.name) + transport.makedirs(str(pathlib.Path(target).parent), ignore_existing=True) + transport.put(str(filepath_target), target) def _copy_sandbox_files(logger, node, transport, folder): diff --git a/tests/engine/daemon/test_execmanager.py b/tests/engine/daemon/test_execmanager.py index 160139e8bb..d5fc8fdbcc 100644 --- a/tests/engine/daemon/test_execmanager.py +++ b/tests/engine/daemon/test_execmanager.py @@ -302,3 +302,286 @@ def test_upload_file_copy_operation_order(node_and_calc_info, tmp_path, order, e filepath = pathlib.Path(node.get_remote_workdir()) / 'file.txt' assert filepath.is_file() assert filepath.read_text() == expected + + +@pytest.mark.parametrize( + 'sandbox_hierarchy, local_copy_list, remote_copy_list, expected_hierarchy, expected_exception', + [ + ## Single `FileCopyOperation` + # Only Sandbox + ({'pseudo': {'Ba.upf': 'Ba pseudo'}}, (), (), {'pseudo': {'Ba.upf': 'Ba pseudo'}}, None), + # Only local copy of a `SinglefileData` to the "pseudo" directory + # -> Makes the parent directory and copies the file to the parent directory + # COUNTER-INTUITIVE: would fail with `cp` since the parent folder doesn't exist + ( + {}, + ((SinglefileData, 'Ba pseudo', 'Ba.upf', 'pseudo/Ba.upf'),), + (), + {'pseudo': {'Ba.upf': 'Ba pseudo'}}, + None, + ), + # Only local copy of a single file to the "pseudo" directory + # -> Makes the parent directory and copies the file to the parent directory + # COUNTER-INTUITIVE: would fail with `cp` since the parent folder doesn't exist + ( + {}, + ((FolderData, {'pseudo': {'Ba.upf': 'Ba pseudo'}}, 'pseudo/Ba.upf', 'pseudo/Ba.upf'),), + (), + {'pseudo': {'Ba.upf': 'Ba pseudo'}}, + None, + ), + # Only local copy of a single directory, specifying the target directory + # -> Copies the contents of the folder to the target directory + ( + {}, + ((FolderData, {'pseudo': {'Ba.upf': 'Ba pseudo'}}, 'pseudo', 'target'),), + (), + {'target': {'Ba.upf': 'Ba pseudo'}}, + None, + ), + # Only local copy of a single directory to the "current directory" + # -> Copies the contents of the folder to the target current directory + # COUNTER-INTUITIVE: emulates the behaviour of `cp` with forward slash: `cp -r pseudo/ .` + ( + {}, + ((FolderData, {'pseudo': {'Ba.upf': 'Ba pseudo'}}, 'pseudo', '.'),), + (), + {'Ba.upf': 'Ba pseudo'}, + None, + ), + # Only remote copy of a single file to the "pseudo" directory + # -> Copy fails silently since target directory does not exist: final directory structure is empty + ( + {}, + (), + (({'pseudo': {'Ba.upf': 'Ba pseudo'}}, 'pseudo/Ba.upf', 'pseudo/Ba.upf'),), + {}, + None, + ), + # -> Copy fails silently since target directory does not exist: final directory structure is empty + ( + {}, + (), + (({'Ba.upf': 'Ba pseudo'}, 'Ti.upf', 'Ti.upf'),), + {}, + None, + ), + # Only remote copy of a single directory, specifying the target directory + # -> Copies the contents of the folder to the target directory + ( + {}, + (), + (({'pseudo': {'Ba.upf': 'Ba pseudo'}}, 'pseudo', 'target'),), + {'target': {'Ba.upf': 'Ba pseudo'}}, + None, + ), + # Only remote copy of a single directory to the "current directory" + # -> Copies the folder to the target current directory + ( + {}, + (), + (({'pseudo': {'Ba.upf': 'Ba pseudo'}}, 'pseudo', '.'),), + {'pseudo': {'Ba.upf': 'Ba pseudo'}}, + None, + ), + ## Two `FileCopyOperation`s + # Sandbox creates folder; Local copy of a `SinglefileData` to target file in folder + # Note: This is the QE use case for the `PwCalculation` plugin + # -> Copies the file to the target file in the target folder + ( + {'pseudo': {}}, + ((SinglefileData, 'Ba pseudo', 'Ba.upf', 'pseudo/Ba.upf'),), + (), + {'pseudo': {'Ba.upf': 'Ba pseudo'}}, + None, + ), + # Sandbox creates folder; Local copy of two `SinglefileData` to target file in folder + # -> Copies both files to the target files in the target folder + ( + {'pseudo': {}}, + ( + (SinglefileData, 'Ba pseudo', 'Ba.upf', 'pseudo/Ba.upf'), + (SinglefileData, 'Ti pseudo', 'Ti.upf', 'pseudo/Ti.upf'), + ), + (), + {'pseudo': {'Ba.upf': 'Ba pseudo', 'Ti.upf': 'Ti pseudo'}}, + None, + ), + # Sandbox creates folder; Local copy of a `SinglefileData` file from to target folder + # -> Fails outright with `IsADirectoryError` since target folder exists + # COUNTER-INTUITIVE: would succeed with the desired hierarchy with `cp` + ( + {'pseudo': {}}, + ((SinglefileData, 'Ba pseudo', 'Ba.upf', 'pseudo'),), + (), + {'pseudo': {'Ba.upf': 'Ba pseudo'}}, + IsADirectoryError, + ), + # Sandbox creates folder; Local copy of a single file from a `FolderData` to target folder + # -> Fails outright since target folder exists + # COUNTER-INTUITIVE: would succeed with the desired hierarchy with `cp` + ( + {'pseudo': {}}, + ((FolderData, {'pseudo': {'Ba.upf': 'Ba pseudo'}}, 'pseudo/Ba.upf', 'pseudo'),), + (), + {'pseudo': {'Ba.upf': 'Ba pseudo'}}, + IsADirectoryError, + ), + # Sandbox creates folder; Local copy of a folder inside a `FolderData` + # -> Copies _contents_ of folder to target folder + # COUNTER-INTUITIVE: emulates the behaviour of `cp` with forward slash: `cp -r pseudo/ pseudo` + ( + {'pseudo': {}}, + ((FolderData, {'pseudo': {'Ba.upf': 'Ba pseudo'}}, 'pseudo', 'pseudo'),), + (), + {'pseudo': {'Ba.upf': 'Ba pseudo'}}, + None, + ), + # Sandbox creates folder; Remote copy of a single file to target file in folder + # -> Copies the remote file to the target file in the target folder + ( + {'pseudo': {}}, + (), + (({'pseudo': {'Ba.upf': 'Ba pseudo'}}, 'pseudo/Ba.upf', 'pseudo/Ba.upf'),), + {'pseudo': {'Ba.upf': 'Ba pseudo'}}, + None, + ), + # Sandbox creates folder; Remote copy of a single file to target folder + # -> Copies the remote file to the target folder + ( + {'pseudo': {}}, + (), + (({'pseudo': {'Ba.upf': 'Ba pseudo'}}, 'pseudo/Ba.upf', 'pseudo'),), + {'pseudo': {'Ba.upf': 'Ba pseudo'}}, + None, + ), + # Sandbox creates folder with nested folder; Local copy of nested folder to target nested folder + # -> Copies contents of nested folder to target nested folder + # COUNTER-INTUITIVE: emulates the behaviour of `cp` with forward slash + ( + {'folder': {'nested_folder': {'file': 'content'}}}, + ( + ( + FolderData, + {'folder': {'nested_folder': {'file': 'new_content'}}}, + 'folder/nested_folder', + 'folder/nested_folder', + ), + ), + (), + {'folder': {'nested_folder': {'file': 'new_content'}}}, + None, + ), + # Sandbox creates folder with nested folder; Local copy of top-level folder to target top-level folder + # -> Copies contents of top-level folder to target top-level folder + # COUNTER-INTUITIVE: emulates the behaviour of `cp` with forward slash + ( + {'folder': {'nested_folder': {'file': 'content'}}}, + ( + ( + FolderData, + {'folder': {'nested_folder': {'file': 'new_content'}}}, + 'folder', + 'folder', + ), + ), + (), + {'folder': {'nested_folder': {'file': 'new_content'}}}, + None, + ), + # Sandbox creates folder with nested folder; Remote copy of nested folder to target nested folder + # -> Copies the remote nested folder _into_ target nested folder + ( + {'folder': {'nested_folder': {'file': 'content'}}}, + (), + ( + ( + {'folder': {'nested_folder': {'file': 'new_content'}}}, + 'folder/nested_folder', + 'folder/nested_folder', + ), + ), + {'folder': {'nested_folder': {'file': 'content', 'nested_folder': {'file': 'new_content'}}}}, + None, + ), + ], +) +def test_upload_combinations( + fixture_sandbox, + node_and_calc_info, + tmp_path, + sandbox_hierarchy, + local_copy_list, + remote_copy_list, + expected_hierarchy, + expected_exception, + create_file_hierarchy, + serialize_file_hierarchy, +): + """Test the ``upload_calculation`` functions for various combinations of sandbox folders and copy lists. + + The `local_copy_list` is formatted as a list of tuples, where each tuple contains the following elements: + + - The class of the data node to be copied. + - The content of the data node to be copied. This can be either a string in case of a file, or a dictionary + representing the file hierarchy in case of a folder. + - The name of the file or directory to be copied. + - The relative path the data should be copied to. + + The `remote_copy_list` is formatted as a list of tuples, where each tuple contains the following elements: + + - A dictionary representing the file hierarchy that should be in the remote directory. + + """ + create_file_hierarchy(sandbox_hierarchy, fixture_sandbox) + + node, calc_info = node_and_calc_info + + calc_info.local_copy_list = [] + + for copy_id, (data_class, content, filename, target_path) in enumerate(local_copy_list): + # Create a sub directroy in the temporary folder for each copy to avoid conflicts + sub_tmp_path_local = tmp_path / f'local_{copy_id}' + + if issubclass(data_class, SinglefileData): + create_file_hierarchy({filename: content}, sub_tmp_path_local) + copy_node = SinglefileData(sub_tmp_path_local / filename).store() + + calc_info.local_copy_list.append((copy_node.uuid, copy_node.filename, target_path)) + + elif issubclass(data_class, FolderData): + create_file_hierarchy(content, sub_tmp_path_local) + serialize_file_hierarchy(sub_tmp_path_local, read_bytes=False) + folder = FolderData() + folder.base.repository.put_object_from_tree(sub_tmp_path_local) + folder.store() + + calc_info.local_copy_list.append((folder.uuid, filename, target_path)) + + calc_info.remote_copy_list = [] + + for copy_id, (hierarchy, source_path, target_path) in enumerate(remote_copy_list): + # Create a sub directroy in the temporary folder for each copy to avoid conflicts + sub_tmp_path_remote = tmp_path / f'remote_{copy_id}' + + create_file_hierarchy(hierarchy, sub_tmp_path_remote) + + calc_info.remote_copy_list.append( + (node.computer.uuid, (sub_tmp_path_remote / source_path).as_posix(), target_path) + ) + + if expected_exception is not None: + with pytest.raises(expected_exception): + with node.computer.get_transport() as transport: + execmanager.upload_calculation(node, transport, calc_info, fixture_sandbox) + + filepath_workdir = pathlib.Path(node.get_remote_workdir()) + + assert serialize_file_hierarchy(filepath_workdir, read_bytes=False) == expected_hierarchy + else: + with node.computer.get_transport() as transport: + execmanager.upload_calculation(node, transport, calc_info, fixture_sandbox) + + filepath_workdir = pathlib.Path(node.get_remote_workdir()) + + assert serialize_file_hierarchy(filepath_workdir, read_bytes=False) == expected_hierarchy