From 7006d3de6bea71c6c57bd41e2328719ff94258cd Mon Sep 17 00:00:00 2001 From: Ali Khosravi Date: Tue, 30 Jul 2024 17:44:30 +0200 Subject: [PATCH] Upgrade to be compatible with pyfirecreset v2.6.0 (#36) feat: Major enhancements to Transport and Scheduler plugins, and test updates This commit includes several updates: Transport plugin: - Refactored `put`, `get`, and `copy` to mimic `aiida-ssh` transport plugin behavior. - Added support for glob patterns in `put`, `get`, and `copy`. - Introduced `recursive` functionality for `listdir`. - Introduced `dereference` option for `put`, `get`, and `copy`. - Added methods for local secret storage, temporary directory allocation, direct transfer info retrieval, FirecREST API version fetching, file integrity checking, and directory transfer as tar files. - Implemented `payoff` function for optimal transfer method determination. Scheduler plugin: - Enhanced `get_job` with pagination support and additional data parsing. - Fixed bug where `get_job` would raise an error if the job could not be found. - Copied `_convert_time` and `_parse_time_string` from `slurm-plugin`. Tests: - Replaced FirecrestMockServer with monkeypatching pyfirecrest to reduce maintenance overhead. Miscellaneous: - Removed `FcPath` class from interface as it has been merged into pyfirecrest. This commit prepares for the v0.2.0 release. --------- Co-authored-by: Alexander Goscinski --- .firecrest-demo-config.json | 17 +- .github/workflows/server-tests.yml | 14 +- .github/workflows/tests.yml | 4 +- .gitignore | 3 + .pre-commit-config.yaml | 4 +- CHANGELOG.md | 30 + README.md | 126 +- aiida_firecrest/remote_path.py | 617 ---------- aiida_firecrest/scheduler.py | 134 ++- aiida_firecrest/transport.py | 1057 +++++++++++++---- aiida_firecrest/utils_test.py | 760 ------------ firecrest_demo.py | 8 + pyproject.toml | 5 +- tests/conftest.py | 573 +++++++-- tests/test_calculation.py | 38 - tests/test_computer.py | 162 ++- tests/test_scheduler.py | 144 ++- .../test_scheduler/test_write_script_full.txt | 20 - .../test_write_script_minimal.txt | 5 - tests/test_transport.py | 903 +++++++++++--- 20 files changed, 2534 insertions(+), 2090 deletions(-) delete mode 100644 aiida_firecrest/remote_path.py delete mode 100644 aiida_firecrest/utils_test.py delete mode 100644 tests/test_scheduler/test_write_script_full.txt delete mode 100644 tests/test_scheduler/test_write_script_minimal.txt diff --git a/.firecrest-demo-config.json b/.firecrest-demo-config.json index 9479e1d..271dd6b 100644 --- a/.firecrest-demo-config.json +++ b/.firecrest-demo-config.json @@ -1,8 +1,11 @@ -{ - "url": "http://localhost:8000/", - "token_uri": "http://localhost:8080/auth/realms/kcrealm/protocol/openid-connect/token", - "client_id": "firecrest-sample", - "client_secret": "b391e177-fa50-4987-beaf-e6d33ca93571", - "machine": "cluster", - "scratch_path": "/home/service-account-firecrest-sample" + { + "url": "", + "token_uri": "", + "client_id": "", + "client_secret": "", + "compute_resource": "", + "temp_directory": "", + "small_file_size_mb": 5.0, + "workdir": "", + "api_version": "1.16.0" } diff --git a/.github/workflows/server-tests.yml b/.github/workflows/server-tests.yml index 8b41a3f..175794e 100644 --- a/.github/workflows/server-tests.yml +++ b/.github/workflows/server-tests.yml @@ -3,16 +3,16 @@ name: Server +# note: there are several bugs with docker image of FirecREST +# that failes this test. We skip this test for now, but should be addressed in a seperate PR than #36 on: push: - branches: [main] - tags: - - 'v*' + branches-ignore: + - '**' pull_request: - paths-ignore: - - README.md - - CHANGELOG.md - - "docs/**" + branches-ignore: + - '**' + jobs: diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index e0ed17a..b543581 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -49,10 +49,10 @@ jobs: python -m pip install --upgrade pip pip install -e .[dev] - name: Test with pytest - run: pytest -vv --firecrest-requests --cov=aiida_firecrest --cov-report=xml --cov-report=term + run: pytest -vv --cov=aiida_firecrest --cov-report=xml --cov-report=term - name: Upload coverage reports to Codecov - uses: codecov/codecov-action@v3 + uses: codecov/codecov-action@v4 with: name: aiida-firecrest-pytests flags: pytests diff --git a/.gitignore b/.gitignore index bbf1cd9..23708d4 100644 --- a/.gitignore +++ b/.gitignore @@ -132,3 +132,6 @@ dmypy.json .vscode/ .demo-server/ _archive/ + + +.firecrest-config.json diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ce5828c..b20d7ff 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -44,5 +44,5 @@ repos: additional_dependencies: - "types-PyYAML" - "types-requests" - - "pyfirecrest~=1.4.0" - - "aiida-core~=2.4.0" + - "pyfirecrest>=2.6.0" + - "aiida-core>=2.6.0" # please change to 2.6.2 when released Issue #45 diff --git a/CHANGELOG.md b/CHANGELOG.md index e552098..1bc7fc1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,35 @@ # Changelog +## v0.2.0 - (not released yet) + +### Transport plugin +- Refactor `put` & `get` & `copy` now they mimic behavior `aiida-ssh` transport plugin. +- `put` & `get` & `copy` now support glob patterns. +- Added `dereference` option wherever relevant +- Added `recursive` functionality for `listdir` +- Added `_create_secret_file` to store user secret locally in `~/.firecrest/` +- Added `_validate_temp_directory` to allocate a temporary directory useful for `extract` and `compress` methods on FirecREST server. +- Added `_dynamic_info_direct_size` this is able to get info of direct transfer from the server rather than asking from users. Raise if fails to make a connection. +- Added `_dynamic_info_firecrest_version` to fetch which version of FirecREST api is interacting with. +- Added `_validate_checksum` to check integrity of downloaded/uploaded files. +- Added `_gettreetar` & `_puttreetar` to transfer directories as tar files internally. +- Added `payoff` function to calculate when is gainful to transfer as zip, and when to transfer individually. + +### Scheduler plugin +- `get_job` now supports for pagination for retrieving active jobs +- `get_job` is parsing more data than before: `submission_time`, `wallclock_time_seconds`, `start_time`, `time_left`, `nodelist`. see open issues [39](https://github.com/aiidateam/aiida-firecrest/issues/39) & [40](https://github.com/aiidateam/aiida-firecrest/issues/40) +- bug fix: `get_job` won't raise if the job cannot be find (completed/error/etc.) +- `_convert_time` and `_parse_time_string` copied over from `slurm-plugin` see [open issue](https://github.com/aiidateam/aiida-firecrest/issues/42) + +### Tests + +- The testing utils responsible for mocking the FirecREST server (specifically FirecrestMockServer) have been replaced with utils monkeypatching pyfirecrest. The FirecREST mocking utils introduced a maintenance overhead that is not in the responsibility of this repository. We still continue to support running with a real FirecREST server and plan to continue running the tests with the [demo docker image](https://github.com/eth-cscs/firecrest/tree/master/deploy/demo) offered by CSCS. The docker image has been disabled for the moment due to some problems (see issue #47). + + +### Miscellaneous + +- class `FcPath` is removed from interface here, as it has [merged](https://github.com/eth-cscs/pyfirecrest/pull/43) into pyfirecrest + ## v0.1.0 (December 2021) Initial release. diff --git a/README.md b/README.md index ea22933..9eb6ea0 100644 --- a/README.md +++ b/README.md @@ -5,11 +5,10 @@ AiiDA Transport/Scheduler plugins for interfacing with [FirecREST](https://products.cscs.ch/firecrest/), via [pyfirecrest](https://github.com/eth-cscs/pyfirecrest). -It is currently tested against [FirecREST v1.13.0](https://github.com/eth-cscs/firecrest/releases/tag/v1.13.0). +It is currently tested against [FirecREST v2.6.0](https://github.com/eth-cscs/pyfirecrest/tree/v2.6.0). -**NOTE:** This plugin is currently dependent on a fork of `aiida-core` from [PR #6043](https://github.com/aiidateam/aiida-core/pull/6043) -## Usage +## Installation Install via GitHub or PyPI: @@ -44,73 +43,59 @@ You can then create a `Computer` in AiiDA: $ verdi computer setup Report: enter ? for help. Report: enter ! to ignore the default and set no value. -Computer label: firecrest-client -Hostname: unused -Description []: My FirecREST client plugin +Computer label: firecrest-client # your choice +Hostname: unused # your choice, irrelevant +Description []: My FirecREST client plugin # your choice Transport plugin: firecrest Scheduler plugin: firecrest Shebang line (first line of each script, starting with #!) [#!/bin/bash]: Work directory on the computer [/scratch/{username}/aiida/]: Mpirun command [mpirun -np {tot_num_mpiprocs}]: -Default number of CPUs per machine: 2 -Default amount of memory per machine (kB).: 100 +Default number of CPUs per machine: 2 # depending on your compute resource +Default amount of memory per machine (kB).: 100 # depending on your compute resource Escape CLI arguments in double quotes [y/N]: Success: Computer<3> firecrest-client created Report: Note: before the computer can be used, it has to be configured with the command: -Report: verdi -p quicksetup computer configure firecrest firecrest-client +Report: verdi -p MYPROFILE computer configure firecrest firecrest-client ``` ```console -$ verdi -p quicksetup computer configure firecrest firecrest-client +$ verdi -p MYPROFILE computer configure firecrest firecrest-client Report: enter ? for help. Report: enter ! to ignore the default and set no value. -Server URL: https://firecrest.cscs.ch -Token URI: https://auth.cscs.ch/auth/realms/cscs/protocol/openid-connect/token +Server URL: https://firecrest.cscs.ch # this for CSCS +Token URI: https://auth.cscs.ch/auth/realms/firecrest-clients/protocol/openid-connect/token Client ID: username-client Client Secret: xyz -Client Machine: daint -Maximum file size for direct transfer (MB) [5.0]: +Compute resource (Machine): daint +Temp directory on server: /scratch/something/ # "A temp directory on user's space on the server for creating temporary files (compression, extraction, etc.)" +FirecREST api version [Enter 0 to get this info from server] [0]: 0 +Maximum file size for direct transfer (MB) [Enter 0 to get this info from server] [0]: 0 Report: Configuring computer firecrest-client for user chrisj_sewell@hotmail.com. Success: firecrest-client successfully configured for chrisj_sewell@hotmail.com ``` +You can always check your config with ```console $ verdi computer show firecrest-client ---------------------------- ------------------------------------ -Label firecrest-client -PK 3 -UUID 48813c55-1b2b-4afc-a1a1-e0d33a5b6868 -Description My FirecREST client plugin -Hostname unused -Transport type firecrest -Scheduler type firecrest -Work directory /scratch/{username}/aiida/ -Shebang #!/bin/bash -Mpirun command mpirun -np {tot_num_mpiprocs} -Default #procs/machine 2 -Default memory (kB)/machine 100 -Prepend text -Append text ---------------------------- ------------------------------------ ``` See also the [pyfirecrest CLI](https://github.com/eth-cscs/pyfirecrest), for directly interacting with a FirecREST server. -See [tests/test_calculation.py](tests/test_calculation.py) for a working example of how to use the plugin, via the AiiDA API. - -### Current Issues -Simple calculations are now running successfully [in the tests](tests/test_calculation.py), however, there are still some critical issues, before this could be production ready: +After this, everything should function normally through AiiDA with no problems. +See [tests/test_calculation.py](tests/test_calculation.py) for a working example of how to use the plugin, via the AiiDA API. -1. Currently uploading via firecrest changes `_aiidasubmit.sh` to `aiidasubmit.sh` 😱 ([see #191](https://github.com/eth-cscs/firecrest/issues/191)), so `metadata.options.submit_script_filename` should be set to this. +If you encounter any problems/bug, please don't hesitate to open an issue on this repository. -2. Handling of large (>5Mb) file uploads/downloads needs to be improved +### Current Issues -3. Handling of the client secret, which should likely not be stored in the database +Calculations are now running successfully, however, there are still issues regarding efficiency, Could be improved: -4. Monitoring / management of API request rates could to be improved +1. Monitoring / management of API request rates could to be improved. Currently this is left up to pyfirecrest. +2. Each transfer request includes 2 seconds of `sleep` time, imposed by pyfirecrest. One can takes use of their `async` client, but with current design of `aiida-core`, the gain will be minimum. (see the [closing comment of issue#94 on pyfirecrest](https://github.com/eth-cscs/pyfirecrest/issues/94) and [PR#6079 on aiida-core ](https://github.com/aiidateam/aiida-core/pull/6079)) -## Development +## For developers ```bash git clone @@ -134,27 +119,45 @@ It is recommended to run the tests via [tox](https://tox.readthedocs.io/en/lates tox ``` -By default, the tests are run using a mock FirecREST server, in a temporary folder -(see [aiida_fircrest.utils_test.FirecrestConfig](aiida_firecrest/utils_test.py)). -This allows for quick testing and debugging of the plugin, without needing to connect to a real server, -but is obviously not guaranteed to be fully representative of the real behaviour. +By default, the tests are run using a monkey patched pyfirecrest. +This allows for quick testing and debugging of the plugin, without needing to connect to a real server, but is obviously not guaranteed to be fully representative of the real behaviour. -You can also provide connections details to a real FirecREST server: +To have a guaranteed proof, you may also provide connections details to a real FirecREST server: ```bash tox -- --firecrest-config=".firecrest-demo-config.json" ``` -The format of the `.firecrest-demo-config.json` file is: + +If a config file is provided, tox sets up a client environment with the information +in the config file and uses pyfirecrest to communicate with the server. +```plaintext +┌─────────────────┐───►┌─────────────┐───►┌──────────────────┐ +│ aiida_firecrest │ │ pyfirecrest │ │ FirecREST server │ +└─────────────────┘◄───└─────────────┘◄───└──────────────────┘ +``` + +if a config file is not provided, it monkeypatches pyfirecrest so we never actually communicate with a server. +```plaintext +┌─────────────────┐───►┌─────────────────────────────┐ +│ aiida_firecrest │ │ pyfirecrest (monkeypatched) │ +└─────────────────┘◄───└─────────────────────────────┘ +``` + +The format of the `.firecrest-demo-config.json` file, for example is like: + ```json -{ - "url": "https://firecrest.cscs.ch", - "token_uri": "https://auth.cscs.ch/auth/realms/cscs/protocol/openid-connect/token", + { + "url": "https://firecrest-tds.cscs.ch", + "token_uri": "https://auth.cscs.ch/auth/realms/firecrest-clients/protocol/openid-connect/token", "client_id": "username-client", - "client_secret": "xyz", - "machine": "daint", - "scratch_path": "/scratch/snx3000/username" + "client_secret": "path-to-secret-file", + "compute_resource": "daint", + "temp_directory": "/scratch/snx3000/username/", + "small_file_size_mb": 5.0, + "workdir": "/scratch/snx3000/username/", + "api_version": "1.16.0" } ``` @@ -164,17 +167,18 @@ In this mode, if you want to inspect the generated files, after a failure, you c tox -- --firecrest-config=".firecrest-demo-config.json" --firecrest-no-clean ``` -See [firecrest_demo.py](firecrest_demo.py) for how to start up a demo server, -and also [server-tests.yml](.github/workflows/server-tests.yml) for how the tests are run against the demo server on GitHub Actions. +**These tests were successful against [FirecREST v1.16.0](https://github.com/eth-cscs/firecrest/releases/tag/v1.16.0), except those who require to list directories in a symlink directory, which fail due to a bug in FirecREST. [An issue](https://github.com/eth-cscs/firecrest/issues/205) is open on FirecREST repo about this.** + +Instead of a real server (which requires an account and credential), tests can also run against a docker image provided by FirecREST. See [firecrest_demo.py](firecrest_demo.py) for how to start up a demo server, and also [server-tests.yml](.github/workflows/server-tests.yml) for how the tests are run against the demo server on GitHub Actions. -If you want to analyse statistics of the API requests made by each test, + -### Notes on using the demo server on MacOS +#### Notes on using the demo server on MacOS A few issues have been noted when using the demo server on MacOS (non-Mx): @@ -195,3 +199,13 @@ although it is of note that you can find these files directly where you your `fi [codecov-link]: https://codecov.io/gh/aiidateam/aiida-firecrest [black-badge]: https://img.shields.io/badge/code%20style-black-000000.svg [black-link]: https://github.com/ambv/black + + +### :bug: Fishing Bugs :bug: + +First, start with running tests locally with no `config` file given, that would monkeypatch pyfirecrest. These set of test do not guarantee that the whole firecrest protocol is working, but it's very useful to quickly check if `aiida-firecrest` is behaving as it's expected to do. To run just simply use `pytest` or `tox`. + +If these tests pass and the bug persists, consider providing a `config` file to run the tests on a docker image or directly on a real server. Be aware of versioning, pyfirecrest doesn't check which version of api it's interacting with. (see https://github.com/eth-cscs/pyfirecrest/issues/116) + +If the bug persists and test still passes, then most certainly it's a problem of `aiida-firecrest`. +If not, probably the issue is from FirecREST, you might open an issue to [pyfirecrest](https://github.com/eth-cscs/pyfirecrest) or [`FirecREST`](https://github.com/eth-cscs/firecrest). diff --git a/aiida_firecrest/remote_path.py b/aiida_firecrest/remote_path.py deleted file mode 100644 index 107b0ce..0000000 --- a/aiida_firecrest/remote_path.py +++ /dev/null @@ -1,617 +0,0 @@ -"""A pathlib.Path-like object for accessing the file system via the Firecrest API. - -Note this is independent of AiiDA, -in fact it is awaiting possible inclusion in pyfirecrest: -https://github.com/eth-cscs/pyfirecrest/pull/43 -""" -from __future__ import annotations - -from collections.abc import Iterator -from dataclasses import dataclass -from functools import lru_cache -from io import BytesIO -import os -from pathlib import PurePosixPath -import stat -import tempfile -from typing import Callable, TypeVar - -from firecrest import ClientCredentialsAuth, Firecrest # type: ignore[attr-defined] - -from .utils import convert_header_exceptions - -# Note in python 3.11 could use typing.Self -SelfTv = TypeVar("SelfTv", bound="FcPath") - - -@dataclass -class ModeCache: - """A cache of the path's mode. - - This can be useful for limiting calls to the API, - particularly for paths generated via `/utilities/ls` - which already provides the st_mode for each file - """ - - st_mode: int | None = None - """The st_mode of the path, dereferencing symlinks.""" - lst_mode: int | None = None - """The st_mode of the path, without dereferencing symlinks.""" - - def reset(self) -> None: - """Reset the cache.""" - self.st_mode = None - self.lst_mode = None - - -class FcPath(os.PathLike[str]): - """A pathlib.Path-like object for accessing the file system via the Firecrest API.""" - - __slots__ = ("_client", "_machine", "_path", "_cache_enabled", "_cache") - - def __init__( - self, - client: Firecrest, - machine: str, - path: str | PurePosixPath, - *, - cache_enabled: bool = False, - _cache: None | ModeCache = None, - ) -> None: - """Construct a new FcPath instance. - - :param client: A Firecrest client object - :param machine: The machine name - :param path: The absolute path to the file or directory - :param cache_enabled: Enable caching of path statistics - This enables caching of path statistics, like mode, - which can be useful if you are using multiple methods on the same path, - as it avoids making multiple calls to the API. - You should only use this if you are sure that the file system is not being modified. - - """ - self._client = client - self._machine = machine - self._path = PurePosixPath(path) - if not self._path.is_absolute(): - raise ValueError(f"Path must be absolute: {str(self._path)!r}") - self._cache_enabled = cache_enabled - self._cache = _cache or ModeCache() - - @classmethod - def from_env_variables( - cls, machine: str, path: str | PurePosixPath, *, cache_enabled: bool = False - ) -> FcPath: - """Convenience method, to construct a new FcPath using environmental variables. - - The following environment variables are required: - - FIRECREST_URL - - FIRECREST_CLIENT_ID - - FIRECREST_CLIENT_SECRET - - AUTH_TOKEN_URL - """ - auth_obj = ClientCredentialsAuth( - os.environ["FIRECREST_CLIENT_ID"], - os.environ["FIRECREST_CLIENT_SECRET"], - os.environ["AUTH_TOKEN_URL"], - ) - client = Firecrest(os.environ["FIRECREST_URL"], authorization=auth_obj) - return cls(client, machine, path, cache_enabled=cache_enabled) - - @property - def client(self) -> Firecrest: - """The Firecrest client object.""" - return self._client - - @property - def machine(self) -> str: - """The machine name.""" - return self._machine - - @property - def path(self) -> str: - """Return the string representation of the path on the machine.""" - return str(self._path) - - @property - def pure_path(self) -> PurePosixPath: - """Return the pathlib representation of the path on the machine.""" - return self._path - - @property - def cache_enabled(self) -> bool: - """Enable caching of path statistics. - - This enables caching of path statistics, like mode, - which can be useful if you are using multiple methods on the same path, - as it avoids making multiple calls to the API. - - You should only use this if you are sure that the file system is not being modified. - """ - return self._cache_enabled - - @cache_enabled.setter - def cache_enabled(self, value: bool) -> None: - self._cache_enabled = value - - def enable_cache(self: SelfTv) -> SelfTv: - """Enable caching of path statistics.""" - self._cache_enabled = True - return self - - def clear_cache(self: SelfTv) -> SelfTv: - """Clear the cache of path statistics.""" - self._cache.reset() - return self - - def _new_path( - self: SelfTv, path: PurePosixPath, *, _cache: None | ModeCache = None - ) -> SelfTv: - """Construct a new FcPath object from a PurePosixPath object.""" - return self.__class__( - self._client, - self._machine, - path, - cache_enabled=self._cache_enabled, - _cache=_cache, - ) - - def __fspath__(self) -> str: - return str(self._path) - - def __str__(self) -> str: - return self.path - - def __repr__(self) -> str: - variables = [ - repr(self._client._firecrest_url), - repr(self._machine), - repr(self.path), - ] - if self._cache_enabled: - variables.append("CACHED") - return f"{self.__class__.__name__}({', '.join(variables)})" - - def as_posix(self) -> str: - """Return the string representation of the path.""" - return self._path.as_posix() - - @property - def name(self) -> str: - """The final path component, if any.""" - return self._path.name - - @property - def suffix(self) -> str: - """ - The final component's last suffix, if any. - - This includes the leading period. For example: '.txt' - """ - return self._path.suffix - - @property - def suffixes(self) -> list[str]: - """ - A list of the final component's suffixes, if any. - - These include the leading periods. For example: ['.tar', '.gz'] - """ - return self._path.suffixes - - @property - def stem(self) -> str: - """ - The final path component, minus its last suffix. - - If the final path component has no suffix, this is the same as name. - """ - return self._path.stem - - def with_name(self: SelfTv, name: str) -> SelfTv: - """Return a new path with the file name changed.""" - return self._new_path(self._path.with_name(name)) - - def with_suffix(self: SelfTv, suffix: str) -> SelfTv: - """Return a new path with the file suffix changed.""" - return self._new_path(self._path.with_suffix(suffix)) - - @property - def parts(self) -> tuple[str, ...]: - """The components of the path.""" - return self._path.parts - - @property - def parent(self: SelfTv) -> SelfTv: - """The path's parent directory.""" - return self._new_path(self._path.parent) - - def is_absolute(self) -> bool: - """Return True if the path is absolute.""" - return self._path.is_absolute() - - def __truediv__(self: SelfTv, other: str) -> SelfTv: - return self._new_path(self._path / other) - - def joinpath(self: SelfTv, *other: str) -> SelfTv: - """Combine this path with one or several arguments, and return a - new path representing either a subpath (if all arguments are relative - paths) or a totally different path (if one of the arguments is - anchored). - """ - return self._new_path(self._path.joinpath(*other)) - - def whoami(self) -> str: - """Return the username of the current user.""" - with convert_header_exceptions({"machine": self._machine}): - # TODO: use self._client.whoami(self._machine) - # requires https://github.com/eth-cscs/pyfirecrest/issues/58 - resp = self._client._get_request( - endpoint="/utilities/whoami", - additional_headers={"X-Machine-Name": self._machine}, - ) - return self._client._json_response([resp], 200)["output"] # type: ignore - - def checksum(self) -> str: - """Return the SHA256 (256-bit) checksum of the file.""" - # this is not part of the pathlib.Path API, but is useful - with convert_header_exceptions({"machine": self._machine, "path": self}): - return self._client.checksum(self._machine, self.path) - - # methods that utilise stat calls - - def _lstat_mode(self) -> int: - """Return the st_mode of the path, not following symlinks.""" - if self._cache_enabled and self._cache.lst_mode is not None: - return self._cache.lst_mode - self._cache.lst_mode = self.lstat().st_mode - if not stat.S_ISLNK(self._cache.lst_mode): - # if the path is not a symlink, - # then we also know the dereferenced mode - self._cache.st_mode = self._cache.lst_mode - return self._cache.lst_mode - - def _stat_mode(self) -> int: - """Return the st_mode of the path, following symlinks.""" - if self._cache_enabled and self._cache.st_mode is not None: - return self._cache.st_mode - self._cache.st_mode = self.stat().st_mode - return self._cache.st_mode - - def stat(self) -> os.stat_result: - """Return stat info for this path. - - If the path is a symbolic link, - stat will examine the file the link points to. - """ - with convert_header_exceptions( - {"machine": self._machine, "path": self}, - # TODO: remove this once issue fixed: https://github.com/eth-cscs/firecrest/issues/193 - {"X-A-Directory": FileNotFoundError}, - ): - stats = self._client.stat(self._machine, self.path, dereference=True) - return os.stat_result( - ( - stats["mode"], - stats["ino"], - stats["dev"], - stats["nlink"], - stats["uid"], - stats["gid"], - stats["size"], - stats["atime"], - stats["mtime"], - stats["ctime"], - ) - ) - - def lstat(self) -> os.stat_result: - """ - Like stat(), except if the path points to a symlink, the symlink's - status information is returned, rather than its target's. - """ - with convert_header_exceptions({"machine": self._machine, "path": self}): - stats = self._client.stat(self._machine, self.path, dereference=False) - return os.stat_result( - ( - stats["mode"], - stats["ino"], - stats["dev"], - stats["nlink"], - stats["uid"], - stats["gid"], - stats["size"], - stats["atime"], - stats["mtime"], - stats["ctime"], - ) - ) - - def exists(self) -> bool: - """Whether this path exists (follows symlinks).""" - try: - self.stat() - except FileNotFoundError: - return False - return True - - def is_dir(self) -> bool: - """Whether this path is a directory (follows symlinks).""" - try: - st_mode = self._stat_mode() - except FileNotFoundError: - return False - return stat.S_ISDIR(st_mode) - - def is_file(self) -> bool: - """Whether this path is a regular file (follows symlinks).""" - try: - st_mode = self._stat_mode() - except FileNotFoundError: - return False - return stat.S_ISREG(st_mode) - - def is_symlink(self) -> bool: - """Whether this path is a symbolic link.""" - try: - st_mode = self._lstat_mode() - except FileNotFoundError: - return False - return stat.S_ISLNK(st_mode) - - def is_block_device(self) -> bool: - """Whether this path is a block device (follows symlinks).""" - try: - st_mode = self._stat_mode() - except FileNotFoundError: - return False - return stat.S_ISBLK(st_mode) - - def is_char_device(self) -> bool: - """Whether this path is a character device (follows symlinks).""" - try: - st_mode = self._stat_mode() - except FileNotFoundError: - return False - return stat.S_ISCHR(st_mode) - - def is_fifo(self) -> bool: - """Whether this path is a FIFO (follows symlinks).""" - try: - st_mode = self._stat_mode() - except FileNotFoundError: - return False - return stat.S_ISFIFO(st_mode) - - def is_socket(self) -> bool: - """Whether this path is a socket (follows symlinks).""" - try: - st_mode = self._stat_mode() - except FileNotFoundError: - return False - return stat.S_ISSOCK(st_mode) - - def iterdir(self: SelfTv, hidden: bool = True) -> Iterator[SelfTv]: - """Iterate over the directory entries.""" - if not self.is_dir(): - return - with convert_header_exceptions({"machine": self._machine, "path": self}): - results = self._client.list_files( - self._machine, self.path, show_hidden=hidden - ) - for entry in results: - lst_mode = _ls_to_st_mode(entry["type"], entry["permissions"]) - # if the path is not a symlink, then we also know the dereferenced mode - st_mode = lst_mode if not stat.S_ISLNK(lst_mode) else None - yield self._new_path( - self._path / entry["name"], - _cache=ModeCache(lst_mode=lst_mode, st_mode=st_mode), - ) - - # operations that modify a file - - def chmod(self, mode: int | str) -> None: - """Change the mode of the path to the numeric mode. - - Note, if the path points to a symlink, - the symlink target's permissions are changed. - """ - # note: according to: - # https://www.gnu.org/software/coreutils/manual/html_node/chmod-invocation.html#chmod-invocation - # chmod never changes the permissions of symbolic links, - # i.e. this is chmod, not lchmod - if not isinstance(mode, (int, str)): - raise TypeError("mode must be an integer") - with convert_header_exceptions( - {"machine": self._machine, "path": self}, - {"X-Invalid-Mode": lambda p: ValueError(f"invalid mode: {mode}")}, - ): - self._client.chmod(self._machine, self.path, str(mode)) - self._cache.reset() - - def chown(self, uid: int | str, gid: int | str) -> None: - """Change the owner and group id of the path to the numeric uid and gid.""" - if not isinstance(uid, (str, int)): - raise TypeError("uid must be an integer") - if not isinstance(gid, (str, int)): - raise TypeError("gid must be an integer") - with convert_header_exceptions( - {"machine": self._machine, "path": self}, - { - "X-Invalid-Owner": lambda p: PermissionError(f"invalid uid: {uid}"), - "X-Invalid-Group": lambda p: PermissionError(f"invalid gid: {gid}"), - }, - ): - self._client.chown(self._machine, self.path, str(uid), str(gid)) - - def rename(self: SelfTv, target: str | os.PathLike[str]) -> SelfTv: - """Rename this path to the (absolute) target path. - - Returns the new Path instance pointing to the target path. - """ - target_path = self._new_path(PurePosixPath(target)) - with convert_header_exceptions({"machine": self._machine, "path": self}): - self._client.mv(self._machine, self.path, target_path.path) - return target_path - - def symlink_to(self, target: str | os.PathLike[str]) -> None: - """Make this path a symlink pointing to the target path.""" - target_path = PurePosixPath(target) - if not target_path.is_absolute(): - raise ValueError("target must be an absolute path") - with convert_header_exceptions( - {"machine": self._machine, "path": self}, - # TODO this is only here because of this bug: - # https://github.com/eth-cscs/firecrest/issues/190 - {"X-Error": FileExistsError}, - ): - self._client.symlink(self._machine, str(target_path), self.path) - - def copy_to(self: SelfTv, target: str | os.PathLike[str]) -> None: - """Copy this path to the target path - - Works for both files and directories (in which case the whole tree is copied). - """ - target_path = PurePosixPath(target) - if not target_path.is_absolute(): - raise ValueError("target must be an absolute path") - with convert_header_exceptions({"machine": self._machine, "path": self}): - # Note although this endpoint states that it is only for directories, - # it actually uses `cp -r`: - # https://github.com/eth-cscs/firecrest/blob/7f02d11b224e4faee7f4a3b35211acb9c1cc2c6a/src/utilities/utilities.py#L320 - self._client.copy(self._machine, self.path, str(target_path)) - - def mkdir( - self, mode: None = None, parents: bool = False, exist_ok: bool = False - ) -> None: - """Create a new directory at this given path.""" - if mode is not None: - raise NotImplementedError("mode is not supported yet") - try: - with convert_header_exceptions({"machine": self._machine, "path": self}): - self._client.mkdir(self._machine, self.path, p=parents) - except FileExistsError: - if not exist_ok: - raise - - def touch(self, mode: None = None, exist_ok: bool = True) -> None: - """Create a file at this given path. - - :param mode: ignored - :param exist_ok: if True, do not raise an exception if the path already exists - """ - if mode is not None: - raise NotImplementedError("mode is not supported yet") - if self.exists(): - if exist_ok: - return - raise FileExistsError(self) - try: - _, source_path = tempfile.mkstemp() - with convert_header_exceptions({"machine": self._machine, "path": self}): - self._client.simple_upload( - self._machine, source_path, self.parent.path, self.name - ) - finally: - os.remove(source_path) - - def read_bytes(self) -> bytes: - """Read the contents of the file as bytes. - - NOTE: This is only intended for small files. - """ - io = BytesIO() - with convert_header_exceptions({"machine": self._machine, "path": self}): - self._client.simple_download(self._machine, self.path, io) - return io.getvalue() - - def read_text(self, encoding: str = "utf-8", errors: str = "strict") -> str: - """Read the contents of the file as text. - - NOTE: This is only intended for small files. - """ - return self.read_bytes().decode(encoding, errors) - - def write_bytes(self, data: bytes) -> None: - """Write bytes to the file. - - NOTE: This is only intended for small files. - """ - buffer = BytesIO(data) - with convert_header_exceptions({"machine": self._machine, "path": self}): - self._client.simple_upload( - self._machine, buffer, self.parent.path, self.name - ) - - def write_text( - self, data: str, encoding: str = "utf-8", errors: str = "strict" - ) -> None: - """Write text to the file. - - NOTE: This is only intended for small files. - """ - self.write_bytes(data.encode(encoding, errors)) - - def unlink(self, missing_ok: bool = False) -> None: - """Remove this file.""" - # note /utilities/rm uses `rm -rf`, - # https://github.com/eth-cscs/firecrest/blob/7f02d11b224e4faee7f4a3b35211acb9c1cc2c6a/src/utilities/utilities.py#L347 - # so we have to be careful to check first what we are deleting - try: - st_mode = self._lstat_mode() - except FileNotFoundError: - if not missing_ok: - raise FileNotFoundError(self) from None - return - if stat.S_ISDIR(st_mode): - raise IsADirectoryError(self) - with convert_header_exceptions({"machine": self._machine, "path": self}): - self._client.simple_delete(self._machine, self.path) - self._cache.reset() - - def rmtree(self) -> None: - """Recursively delete a directory tree.""" - # note /utilities/rm uses `rm -rf`, - # https://github.com/eth-cscs/firecrest/blob/7f02d11b224e4faee7f4a3b35211acb9c1cc2c6a/src/utilities/utilities.py#L347 - # so we have to be careful to check first what we are deleting - try: - st_mode = self._lstat_mode() - except FileNotFoundError: - raise FileNotFoundError(self) from None - if not stat.S_ISDIR(st_mode): - raise NotADirectoryError(self) - with convert_header_exceptions({"machine": self._machine, "path": self}): - self._client.simple_delete(self._machine, self.path) - self._cache.reset() - - -@lru_cache(maxsize=256) -def _ls_to_st_mode(ftype: str, permissions: str) -> int: - """Use the return information from `utilities/ls` to create an st_mode value. - - Note, this does not dereference symlinks, and so is like lstat - - :param ftype: The file type, e.g. "-" for regular file, "d" for directory. - :param permissions: The file permissions, e.g. "rwxr-xr-x". - """ - ftypes = { - "b": "0060", # block device - "c": "0020", # character device - "d": "0040", # directory - "l": "0120", # Symbolic link - "s": "0140", # Socket. - "p": "0010", # FIFO - "-": "0100", # Regular file - } - if ftype not in ftypes: - raise ValueError(f"invalid file type: {ftype}") - p = permissions - r: Callable[[str], int] = lambda x: 4 if x == "r" else 0 # noqa: E731 - w: Callable[[str], int] = lambda x: 2 if x == "w" else 0 # noqa: E731 - x: Callable[[str], int] = lambda x: 1 if x == "x" else 0 # noqa: E731 - st_mode = ( - ((r(p[0]) + w(p[1]) + x(p[2])) * 100) - + ((r(p[3]) + w(p[4]) + x(p[5])) * 10) - + ((r(p[6]) + w(p[7]) + x(p[8])) * 1) - ) - return int(ftypes[ftype] + str(st_mode), 8) diff --git a/aiida_firecrest/scheduler.py b/aiida_firecrest/scheduler.py index 9a1f2bc..e18158e 100644 --- a/aiida_firecrest/scheduler.py +++ b/aiida_firecrest/scheduler.py @@ -1,14 +1,17 @@ """Scheduler interface.""" from __future__ import annotations +import datetime +import itertools import re import string +import time from typing import TYPE_CHECKING, Any, ClassVar from aiida.engine.processes.exit_code import ExitCode from aiida.schedulers import Scheduler, SchedulerError from aiida.schedulers.datastructures import JobInfo, JobState, JobTemplate -from aiida.schedulers.plugins.slurm import SlurmJobResource +from aiida.schedulers.plugins.slurm import _TIME_REGEXP, SlurmJobResource from firecrest.FirecrestException import FirecrestException from .utils import convert_header_exceptions @@ -26,9 +29,7 @@ class FirecrestScheduler(Scheduler): "can_query_by_user": False, } _logger = Scheduler._logger.getChild("firecrest") - - # TODO if this is missing is causes plugin info to fail on verdi - is_process_function = False + _DEFAULT_PAGE_SIZE = 25 @classmethod def get_description(cls) -> str: @@ -198,8 +199,7 @@ def submit_job(self, working_directory: str, filename: str) -> str | ExitCode: try: result = transport._client.submit( transport._machine, - transport._get_path(working_directory, filename), - local_file=False, + script_remote_path=transport._get_path(working_directory, filename), ) except FirecrestException as exc: raise SchedulerError(str(exc)) from exc @@ -211,21 +211,35 @@ def get_jobs( user: str | None = None, as_dict: bool = False, ) -> list[JobInfo] | dict[str, JobInfo]: + results = [] transport = self.transport + with convert_header_exceptions({"machine": transport._machine}): # TODO handle pagination (pageSize, pageNumber) if many jobs + # This will do pagination try: - results = transport._client.poll_active(transport._machine, jobs) + for page_iter in itertools.count(): + results += transport._client.poll_active( + transport._machine, jobs, page_number=page_iter + ) + if len(results) < self._DEFAULT_PAGE_SIZE * (page_iter + 1): + break except FirecrestException as exc: - raise SchedulerError(str(exc)) from exc + # firecrest returns error if the job is completed + # TODO: check what type of error is returned and handle it properly + if "Invalid job id specified" not in str(exc): + raise SchedulerError(str(exc)) from exc job_list = [] for raw_result in results: + # TODO: probably the if below is not needed, because recently + # the server should return only the jobs of the current user if user is not None and raw_result["user"] != user: continue - this_job = JobInfo() # type: ignore this_job.job_id = raw_result["jobid"] + # TODO: firecrest does not return the annotation, so set to an empty string. + # To be investigated how important that is. this_job.annotation = "" job_state_raw = raw_result["state"] @@ -276,12 +290,14 @@ def get_jobs( ) ) + # See issue https://github.com/aiidateam/aiida-firecrest/issues/39 # try: # this_job.num_mpiprocs = int(thisjob_dict['number_cpus']) # except ValueError: # self.logger.warning( # 'The number of allocated cores is not ' - # 'an integer ({}) for job id {}!'.format(thisjob_dict['number_cpus'], this_job.job_id) + # 'an integer ({}) for job id {}!'.format( + # thisjob_dict['number_cpus'], this_job.job_id) # ) # ALLOCATED NODES HERE @@ -290,34 +306,54 @@ def get_jobs( # therefore it requires some parsing, that is unnecessary now. # I just store is as a raw string for the moment, and I leave # this_job.allocated_machines undefined - # if this_job.job_state == JobState.RUNNING: - # this_job.allocated_machines_raw = thisjob_dict['allocated_machines'] + if this_job.job_state == JobState.RUNNING: + this_job.allocated_machines_raw = raw_result["nodelist"] this_job.queue_name = raw_result["partition"] - # try: - # walltime = (self._convert_time(thisjob_dict['time_limit'])) - # this_job.requested_wallclock_time_seconds = walltime # pylint: disable=invalid-name - # except ValueError: - # self.logger.warning(f'Error parsing the time limit for job id {this_job.job_id}') + try: + time_left = self._convert_time(raw_result["time_left"]) + start_time = self._convert_time(raw_result["start_time"]) + + if time_left is None or start_time is None: + this_job.requested_wallclock_time_seconds = 0 + else: + this_job.requested_wallclock_time_seconds = time_left + start_time + + except ValueError: + self.logger.warning( + f"Couldn't parse the time limit for job id {this_job.job_id}" + ) # Only if it is RUNNING; otherwise it is not meaningful, # and may be not set (in my test, it is set to zero) - # if this_job.job_state == JobState.RUNNING: - # try: - # this_job.wallclock_time_seconds = (self._convert_time(thisjob_dict['time_used'])) - # except ValueError: - # self.logger.warning(f'Error parsing time_used for job id {this_job.job_id}') + if this_job.job_state == JobState.RUNNING: + try: + wallclock_time_seconds = self._convert_time( + raw_result["start_time"] + ) + if wallclock_time_seconds is not None: + this_job.wallclock_time_seconds = wallclock_time_seconds + else: + this_job.wallclock_time_seconds = 0 + except ValueError: + self.logger.warning( + f"Couldn't parse time_used for job id {this_job.job_id}" + ) + # dispatch_time is not returned explicitly by the FirecREST server + # see: https://github.com/aiidateam/aiida-firecrest/issues/40 # try: # this_job.dispatch_time = self._parse_time_string(thisjob_dict['dispatch_time']) # except ValueError: # self.logger.warning(f'Error parsing dispatch_time for job id {this_job.job_id}') - # try: - # this_job.submission_time = self._parse_time_string(thisjob_dict['submission_time']) - # except ValueError: - # self.logger.warning(f'Error parsing submission_time for job id {this_job.job_id}') + try: + this_job.submission_time = self._parse_time_string(raw_result["time"]) + except ValueError: + self.logger.warning( + f"Couldn't parse submission_time for job id {this_job.job_id}" + ) this_job.title = raw_result["name"] @@ -354,6 +390,52 @@ def kill_job(self, jobid: str) -> bool: transport._client.cancel(transport._machine, jobid) return True + def _convert_time(self, string: str) -> int | None: + """ + Note: this function was copied from the Slurm scheduler plugin + Convert a string in the format DD-HH:MM:SS to a number of seconds. + """ + if string == "UNLIMITED": + return 2147483647 # == 2**31 - 1, largest 32-bit signed integer (68 years) + + if string == "NOT_SET" or string == "N/A": + return None + + groups = _TIME_REGEXP.match(string) + if groups is None: + raise ValueError("Unrecognized format for time string.") + + groupdict = groups.groupdict() + # should not raise a ValueError, they all match digits only + days = int(groupdict["days"] if groupdict["days"] is not None else 0) + hours = int(groupdict["hours"] if groupdict["hours"] is not None else 0) + mins = int(groupdict["minutes"] if groupdict["minutes"] is not None else 0) + secs = int(groupdict["seconds"] if groupdict["seconds"] is not None else 0) + + return days * 86400 + hours * 3600 + mins * 60 + secs + + def _parse_time_string( + self, string: str, fmt: str = "%Y-%m-%dT%H:%M:%S" + ) -> datetime.datetime: + """ + Note: this function was copied from the Slurm scheduler plugin + Parse a time string in the format returned from qstat -f and + returns a datetime object. + """ + + try: + time_struct = time.strptime(string, fmt) + except Exception as exc: + self.logger.debug( + f"Unable to parse time string {string}, the message was {exc}" + ) + raise ValueError("Problem parsing the time string.") from exc + + # I convert from a time_struct to a datetime object going through + # the seconds since epoch, as suggested on stackoverflow: + # http://stackoverflow.com/questions/1697815 + return datetime.datetime.fromtimestamp(time.mktime(time_struct)) + # see https://slurm.schedmd.com/squeue.html#lbAG # note firecrest returns full names, not abbreviations diff --git a/aiida_firecrest/transport.py b/aiida_firecrest/transport.py index ae1a35d..31a6743 100644 --- a/aiida_firecrest/transport.py +++ b/aiida_firecrest/transport.py @@ -1,24 +1,25 @@ """Transport interface.""" from __future__ import annotations +from contextlib import suppress import fnmatch +import hashlib import os from pathlib import Path -import platform import posixpath -import shutil -import time +import tarfile from typing import Any, Callable, ClassVar, TypedDict -from urllib import request +import uuid +from aiida.cmdline.params.options.interactive import InteractiveOption from aiida.cmdline.params.options.overridable import OverridableOption from aiida.transports import Transport -from aiida.transports.transport import validate_positive_number from aiida.transports.util import FileAttribute +from click.core import Context from click.types import ParamType from firecrest import ClientCredentialsAuth, Firecrest # type: ignore[attr-defined] - -from .remote_path import FcPath, convert_header_exceptions # type: ignore[attr-defined] +from firecrest.path import FcPath +from packaging.version import Version, parse class ValidAuthOption(TypedDict, total=False): @@ -32,6 +33,202 @@ class ValidAuthOption(TypedDict, total=False): callback: Callable[..., Any] # for validation +def _create_secret_file(ctx: Context, param: InteractiveOption, value: str) -> str: + """Create a secret file if the value is not a path to a secret file. + The path should be absolute, if it is not, the file will be created in ~/.firecrest. + """ + import click + + possible_path = Path(value) + if os.path.isabs(possible_path): + if not possible_path.exists(): + raise click.BadParameter(f"Secret file not found at {value}") + secret_path = possible_path + + else: + Path("~/.firecrest").expanduser().mkdir(parents=True, exist_ok=True) + _ = uuid.uuid4() + secret_path = Path(f"~/.firecrest/secret_{_}").expanduser() + while secret_path.exists(): + # instead of a random number one could use the label or pk of the computer being configured + secret_path = Path(f"~/.firecrest/secret_{_}").expanduser() + secret_path.write_text(value) + click.echo( + click.style("Fireport: ", bold=True, fg="magenta") + + f"Client Secret stored at {secret_path}" + ) + return str(secret_path) + + +def _validate_temp_directory(ctx: Context, param: InteractiveOption, value: str) -> str: + """Validate the temp directory on the server. + If it does not exist, create it. + If it is not empty, get a confirmation from the user to empty it. + """ + + import click + + firecrest_url = ctx.params["url"] + token_uri = ctx.params["token_uri"] + client_id = ctx.params["client_id"] + compute_resource = ctx.params["compute_resource"] + secret = ctx.params["client_secret"] + + transport = FirecrestTransport( + url=firecrest_url, + token_uri=token_uri, + client_id=client_id, + client_secret=secret, + compute_resource=compute_resource, + temp_directory=value, + small_file_size_mb=1.0, # small_file_size_mb is irrelevant here + api_version="100.0.0", # version is irrelevant here + ) + + # Temp directory routine + if transport._cwd.joinpath( + transport._temp_directory + ).is_file(): # self._temp_directory.is_file(): + raise click.BadParameter("Temp directory cannot be a file") + + if transport.path_exists(transport._temp_directory): + if transport.listdir(transport._temp_directory): + # if not configured: + confirm = click.confirm( + f"Temp directory {transport._temp_directory} is not empty. Do you want to flush it?" + ) + if confirm: + for item in transport.listdir(transport._temp_directory): + # TODO: maybe do recursive delete + transport.remove(transport._temp_directory.joinpath(item)) + else: + click.echo("Please provide an empty temp directory on the server.") + raise click.BadParameter( + f"Temp directory {transport._temp_directory} is not empty" + ) + + else: + try: + transport.mkdir(transport._temp_directory, ignore_existing=True) + except Exception as e: + raise click.BadParameter( + f"Could not create temp directory {transport._temp_directory} on server: {e}" + ) from e + click.echo( + click.style("Fireport: ", bold=True, fg="magenta") + + f"Temp directory is set to {value}" + ) + + return value + + +def _dynamic_info_firecrest_version( + ctx: Context, param: InteractiveOption, value: str +) -> str: + """Find the version of the FirecREST server.""" + # note: right now, unfortunately, the version is not exposed in the API. + # See issue https://github.com/eth-cscs/firecrest/issues/204 + # so here we just develope a workaround to get the version from the server + # basically we check if extract/compress endpoint is available + + import click + + if value != "0": + if parse(value) < parse("1.15.0"): + raise click.BadParameter(f"FirecREST api version {value} is not supported") + return value + + firecrest_url = ctx.params["url"] + token_uri = ctx.params["token_uri"] + client_id = ctx.params["client_id"] + compute_resource = ctx.params["compute_resource"] + secret = ctx.params["client_secret"] + temp_directory = ctx.params["temp_directory"] + + transport = FirecrestTransport( + url=firecrest_url, + token_uri=token_uri, + client_id=client_id, + client_secret=secret, + compute_resource=compute_resource, + temp_directory=temp_directory, + small_file_size_mb=0.0, + api_version="100.0.0", # version is irrelevant here + ) + try: + transport.listdir(transport._cwd.joinpath(temp_directory), recursive=True) + _version = "1.16.0" + except Exception: + # all sort of exceptions can be raised here, but we don't care. Since this is just a workaround + _version = "1.15.0" + + click.echo( + click.style("Fireport: ", bold=True, fg="magenta") + + f"Deployed version of FirecREST api: v{_version}" + ) + return _version + + +def _dynamic_info_direct_size( + ctx: Context, param: InteractiveOption, value: float +) -> float: + """Get dynamic information from the server, if the user enters 0 for the small_file_size_mb. + This is done by connecting to the server and getting the value of UTILITIES_MAX_FILE_SIZE. + Below this size, file bytes will be sent in a single API call. Above this size, + the file will be downloaded(uploaded) from(to) the object store and downloaded in chunks. + + :param ctx: the `click.Context` + :param param: the parameter + :param value: the value passed for the parameter + + :return: the value of small_file_size_mb. + + """ + import click + + if value > 0: + return value + + firecrest_url = ctx.params["url"] + token_uri = ctx.params["token_uri"] + client_id = ctx.params["client_id"] + compute_resource = ctx.params["compute_resource"] + secret = ctx.params["client_secret"] + temp_directory = ctx.params["temp_directory"] + + transport = FirecrestTransport( + url=firecrest_url, + token_uri=token_uri, + client_id=client_id, + client_secret=secret, + compute_resource=compute_resource, + temp_directory=temp_directory, + small_file_size_mb=0.0, + api_version="100.0.0", # version is irrelevant here + ) + + parameters = transport._client.parameters() + utilities_max_file_size = next( + ( + item + for item in parameters["utilities"] + if item["name"] == "UTILITIES_MAX_FILE_SIZE" + ), + None, + ) + small_file_size_mb = ( + float(utilities_max_file_size["value"]) + if utilities_max_file_size is not None + else 5.0 + ) + click.echo( + click.style("Fireport: ", bold=True, fg="magenta") + + f"Maximum file size for direct transfer: {small_file_size_mb} MB" + ) + + return small_file_size_mb + + class FirecrestTransport(Transport): """Transport interface for FirecREST.""" @@ -43,7 +240,6 @@ class FirecrestTransport(Transport): # but this would ideally require some "global" rate limiter, # across all transport instances # TODO upstream issue - # TODO also open an issue that the `verdi computer test won't work with a REST-API` _common_auth_options: ClassVar[list[Any]] = [] # type: ignore[misc] _DEFAULT_SAFE_OPEN_INTERVAL = 0.0 @@ -81,50 +277,49 @@ class FirecrestTransport(Transport): "type": str, "non_interactive_default": False, "prompt": "Client Secret", - "help": "FirecREST client secret", + "help": "FirecREST client secret or Absolute path to an existing FirecREST Secret Key", + "callback": _create_secret_file, }, ), - # ( - # # TODO: format of secret file, and lookup secret by default in ~/.firecrest/secrets.json - # "secret_path", - # { - # "type": AbsolutePathOrEmptyParamType(dir_okay=False, exists=True), - # "non_interactive_default": False, - # "prompt": "Secret key file", - # "help": "Absolute path to file containing FirecREST client secret", - # }, - # ), ( - "client_machine", + "compute_resource", { "type": str, "non_interactive_default": False, - "prompt": "Client Machine", - "help": "FirecREST machine secret", + "prompt": "Compute resource (Machine)", + "help": "Compute resources, for example 'daint', 'eiger', etc.", }, ), ( - # TODO you could potentially get this dynamically from server - # (via /status/parameters) - "small_file_size_mb", + "temp_directory", { - "type": float, - "default": 5.0, # limit set on the server is usually this + "type": str, + "non_interactive_default": False, + "prompt": "Temp directory on server", + "help": "A temp directory on server for creating temporary files (compression, extraction, etc.)", + "callback": _validate_temp_directory, + }, + ), + ( + "api_version", + { + "type": str, + "default": "0", "non_interactive_default": True, - "prompt": "Maximum file size for direct transfer (MB)", - "help": "Below this size, file bytes will be sent in a single API call.", - "callback": validate_positive_number, + "prompt": "FirecREST api version [Enter 0 to get this info from server]", + "help": "The version of the FirecREST api deployed on the server", + "callback": _dynamic_info_firecrest_version, }, ), ( - "file_transfer_poll_interval", + "small_file_size_mb", { "type": float, - "default": 0.1, # TODO what default to choose? + "default": 0, "non_interactive_default": True, - "prompt": "File transfer poll interval (s)", - "help": "Poll interval when waiting for large file transfers.", - "callback": validate_positive_number, + "prompt": "Maximum file size for direct transfer (MB) [Enter 0 to get this info from server]", + "help": "Below this size, file bytes will be sent in a single API call.", + "callback": _dynamic_info_direct_size, }, ), ] @@ -135,16 +330,28 @@ def __init__( url: str, token_uri: str, client_id: str, - client_secret: str | Path, - client_machine: str, - small_file_size_mb: float = 5.0, - file_transfer_poll_interval: float = 0.1, + client_secret: str, + compute_resource: str, + temp_directory: str, + small_file_size_mb: float, + api_version: str, # note, machine is provided by default, # for the hostname, but we don't use that # TODO ideally hostname would not be necessary on a computer **kwargs: Any, ): - """Construct a FirecREST transport.""" + """Construct a FirecREST transport object. + + :param url: URL to the FirecREST server + :param token_uri: URI for retrieving FirecREST authentication tokens + :param client_id: FirecREST client ID + :param client_secret: FirecREST client secret or str(Absolute path) to an existing FirecREST Secret Key + :param compute_resource: Compute resources, for example 'daint', 'eiger', etc. + :param small_file_size_mb: Maximum file size for direct transfer (MB) + :param temp_directory: A temp directory on server for creating temporary files (compression, extraction, etc.) + :param kwargs: Additional keyword arguments + """ + # there is no overhead for "opening" a connection to a REST-API, # but still allow the user to set a safe interval if they really want to kwargs.setdefault("safe_interval", 0) @@ -153,40 +360,61 @@ def __init__( assert isinstance(url, str), "url must be a string" assert isinstance(token_uri, str), "token_uri must be a string" assert isinstance(client_id, str), "client_id must be a string" - assert isinstance( - client_secret, (str, Path) - ), "client_secret must be a string or Path" - - assert isinstance(client_machine, str), "client_machine must be a string" + assert isinstance(client_secret, str), "client_secret must be a string" + assert isinstance(compute_resource, str), "compute_resource must be a string" + assert isinstance(temp_directory, str), "temp_directory must be a string" + assert isinstance(api_version, str), "api_version must be a string" assert isinstance( small_file_size_mb, float ), "small_file_size_mb must be a float" - assert isinstance( - file_transfer_poll_interval, float - ), "file_transfer_poll_interval must be a float" - self._machine = client_machine + self._machine = compute_resource self._url = url self._token_uri = token_uri self._client_id = client_id self._small_file_size_bytes = int(small_file_size_mb * 1024 * 1024) - self._file_transfer_poll_interval = file_transfer_poll_interval - secret = ( - client_secret.read_text() - if isinstance(client_secret, Path) - else client_secret - ) + self._payoff_override: bool | None = None - self._client = Firecrest( - firecrest_url=self._url, - authorization=ClientCredentialsAuth(client_id, secret, token_uri), - ) + secret = Path(client_secret).read_text().strip() + try: + self._client = Firecrest( + firecrest_url=self._url, + authorization=ClientCredentialsAuth(client_id, secret, token_uri), + ) + except Exception as e: + raise ValueError(f"Could not connect to FirecREST server: {e}") from e + + self._cwd: FcPath = FcPath(self._client, self._machine, "/", cache_enabled=True) + self._temp_directory = self._cwd.joinpath(temp_directory) - self._cwd: FcPath = FcPath(self._client, self._machine, "/") + self._api_version: Version = parse(api_version) - # TODO if this is missing is causes plugin info to fail on verdi - is_process_function = False + if self._api_version < parse("1.16.0"): + self._payoff_override = False + + # this makes no sense for firecrest, but we need to set this to True + # otherwise the aiida-core will complain that the transport is not open: + # aiida-core/src/aiida/orm/utils/remote:clean_remote() + self._is_open = True + + def __str__(self) -> str: + """Return the name of the plugin.""" + return self.__class__.__name__ + + @property + def is_open(self) -> bool: + return self._is_open + + @property + def payoff_override(self) -> bool | None: + return self._payoff_override + + @payoff_override.setter + def payoff_override(self, value: bool) -> None: + if not isinstance(value, bool): + raise ValueError("payoff_override must be a boolean value") + self._payoff_override = value @classmethod def get_description(cls) -> str: @@ -200,36 +428,46 @@ def get_description(cls) -> str: ) def open(self) -> None: # noqa: A003 + """Open the transport. + This is a no-op for the REST-API, as there is no connection to open. + """ pass def close(self) -> None: + """Close the transport. + This is a no-op for the REST-API, as there is no connection to close. + """ pass def getcwd(self) -> str: + """Return the current working directory.""" return str(self._cwd) def _get_path(self, *path: str) -> str: - return posixpath.normpath(self._cwd.joinpath(*path)) + """Return the path as a string.""" + return posixpath.normpath(self._cwd.joinpath(*path)) # type: ignore def chdir(self, path: str) -> None: + """Change the current working directory.""" new_path = self._cwd.joinpath(path) if not new_path.is_dir(): raise OSError(f"'{new_path}' is not a valid directory") self._cwd = new_path - def normalize(self, path: str = ".") -> str: - return posixpath.normpath(path) - def chmod(self, path: str, mode: str) -> None: + """Change the mode of a file.""" self._cwd.joinpath(path).chmod(mode) def chown(self, path: str, uid: str, gid: str) -> None: + """Change the owner of a file.""" self._cwd.joinpath(path).chown(uid, gid) def path_exists(self, path: str) -> bool: - return self._cwd.joinpath(path).exists() + """Check if a path exists on the remote.""" + return self._cwd.joinpath(path).exists() # type: ignore def get_attribute(self, path: str) -> FileAttribute: + """Get the attributes of a file.""" result = self._cwd.joinpath(path).stat() return FileAttribute( # type: ignore { @@ -243,20 +481,58 @@ def get_attribute(self, path: str) -> FileAttribute: ) def isdir(self, path: str) -> bool: - return self._cwd.joinpath(path).is_dir() + """Check if a path is a directory.""" + return self._cwd.joinpath(path).is_dir() # type: ignore def isfile(self, path: str) -> bool: - return self._cwd.joinpath(path).is_file() - - def listdir(self, path: str = ".", pattern: str | None = None) -> list[str]: - names = [p.name for p in self._cwd.joinpath(path).iterdir()] + """Check if a path is a file.""" + return self._cwd.joinpath(path).is_file() # type: ignore + + def listdir( + self, path: str = ".", pattern: str | None = None, recursive: bool = False + ) -> list[str]: + """List the contents of a directory. + + :param path: this could be relative or absolute path. + Note igolb() will usually call this with relative path. + + :param pattern: Unix shell-style wildcards to match the pattern: + - `*` matches everything + - `?` matches any single character + - `[seq]` matches any character in seq + - `[!seq]` matches any character not in seq + :param recursive: If True, list directories recursively + """ + path_abs = self._cwd.joinpath(path) + names = [p.relpath(path_abs) for p in path_abs.iterdir(recursive=recursive)] if pattern is not None: names = fnmatch.filter(names, pattern) return names - # TODO the default implementations of glob / iglob could be overriden + # TODO the default implementations of glob / iglob could be overridden # to be more performant, using cached FcPaths and https://github.com/chrisjsewell/virtual-glob + def makedirs(self, path: str, ignore_existing: bool = False) -> None: + """Make directories on the remote.""" + new_path = self._cwd.joinpath(path) + if not ignore_existing and new_path.exists(): + # Note: FirecREST does not raise an error if the directory already exists, and parent is True. + # which makes sense, but following the Superclass, we should raise an OSError in that case. + # AiiDA expects an OSError, instead of a FileExistsError + raise OSError(f"'{path}' already exists") + self._cwd.joinpath(path).mkdir(parents=True, exist_ok=ignore_existing) + + def mkdir(self, path: str, ignore_existing: bool = False) -> None: + """Make a directory on the remote.""" + try: + self._cwd.joinpath(path).mkdir(exist_ok=ignore_existing) + except FileExistsError as err: + raise OSError(f"'{path}' already exists") from err + + def normalize(self, path: str = ".") -> str: + """Resolve the path.""" + return posixpath.normpath(path) + def write_binary(self, path: str, data: bytes) -> None: """Write bytes to a file on the remote.""" # Note this is not part of the Transport interface, but is useful for testing @@ -267,9 +543,10 @@ def read_binary(self, path: str) -> bytes: """Read bytes from a file on the remote.""" # Note this is not part of the Transport interface, but is useful for testing # TODO will fail for files exceeding small_file_size_mb - return self._cwd.joinpath(path).read_bytes() + return self._cwd.joinpath(path).read_bytes() # type: ignore def symlink(self, remotesource: str, remotedestination: str) -> None: + """Create a symlink on the remote.""" source = self._cwd.joinpath(remotesource) destination = self._cwd.joinpath(remotedestination) destination.symlink_to(source) @@ -277,36 +554,67 @@ def symlink(self, remotesource: str, remotedestination: str) -> None: def copyfile( self, remotesource: str, remotedestination: str, dereference: bool = False ) -> None: - source = self._cwd.joinpath(remotesource).enable_cache() - destination = self._cwd.joinpath(remotedestination).enable_cache() - + """Copy a file on the remote. FirecREST does not support symlink copying. + + :param dereference: If True, copy the target of the symlink instead of the symlink itself. + """ + source = self._cwd.joinpath( + remotesource + ) # .enable_cache() it's removed from from path.py to be investigated + destination = self._cwd.joinpath( + remotedestination + ) # .enable_cache() it's removed from from path.py to be investigated + if dereference: + raise NotImplementedError("copyfile() does not support symlink dereference") if not source.exists(): raise FileNotFoundError(f"Source file does not exist: {source}") if not source.is_file(): - raise FileNotFoundError(f"Source is not a file: {source}") + raise ValueError(f"Source is not a file: {source}") + if not destination.exists() and not source.is_file(): + raise FileNotFoundError(f"Destination file does not exist: {destination}") - if not dereference and source.is_symlink(): - destination.symlink_to(source) - else: - source.copy_to(destination) + self._copy_to(source, destination) + # I removed symlink copy, becasue it's really not a file copy, it's a link copy + # and aiida-ssh have it in buggy manner, prrobably it's not used anyways + + def _copy_to(self, source: FcPath, target: FcPath) -> None: + """Copy source path to the target path. Both paths must be on remote. + + Works for both files and directories (in which case the whole tree is copied). + """ + with self._cwd.convert_header_exceptions(): + # Note although this endpoint states that it is only for directories, + # it actually uses `cp -r`: + # https://github.com/eth-cscs/firecrest/blob/7f02d11b224e4faee7f4a3b35211acb9c1cc2c6a/src/utilities/utilities.py#L320 + self._client.copy(self._machine, str(source), str(target)) def copytree( self, remotesource: str, remotedestination: str, dereference: bool = False ) -> None: - source = self._cwd.joinpath(remotesource).enable_cache().enable_cache() - destination = ( - self._cwd.joinpath(remotedestination).enable_cache().enable_cache() - ) - + """Copy a directory on the remote. FirecREST does not support symlink copying. + + :param dereference: If True, copy the target of the symlink instead of the symlink itself. + """ + # TODO: check if deference is set to False, symlinks will be functional after the copy in Firecrest server. + + source = self._cwd.joinpath( + remotesource + ) # .enable_cache().enable_cache() it's removed from from path.py to be investigated + destination = self._cwd.joinpath( + remotedestination + ) # .enable_cache().enable_cache() it's removed from from path.py to be investigated + if dereference: + raise NotImplementedError( + "Dereferencing not implemented in FirecREST server" + ) if not source.exists(): raise FileNotFoundError(f"Source file does not exist: {source}") if not source.is_dir(): - raise FileNotFoundError(f"Source is not a directory: {source}") + raise ValueError(f"Source is not a directory: {source}") + if not destination.exists(): + raise FileNotFoundError(f"Destination file does not exist: {destination}") - if not dereference and source.is_symlink(): - destination.symlink_to(source) - else: - source.copy_to(destination) + self._copy_to(source, destination) def copy( self, @@ -315,45 +623,88 @@ def copy( dereference: bool = False, recursive: bool = True, ) -> None: + """Copy a file or directory on the remote. FirecREST does not support symlink copying. + + :param recursive: If True, copy directories recursively. + note that the non-recursive option is not implemented in FirecREST server. + And it's not used in upstream, anyways... + + :param dereference: If True, copy the target of the symlink instead of the symlink itself. + """ + # TODO: investigate overwrite (?) + if not recursive: # TODO this appears to not actually be used upstream, so just remove there raise NotImplementedError("Non-recursive copy not implemented") - source = self._cwd.joinpath(remotesource).enable_cache() - destination = self._cwd.joinpath(remotedestination).enable_cache() - - if not source.exists(): - raise FileNotFoundError(f"Source file does not exist: {source}") + if dereference: + raise NotImplementedError( + "Dereferencing not implemented in FirecREST server" + ) + + if self.has_magic(str(remotesource)): # type: ignore + for item in self.iglob(remotesource): # type: ignore + # item is of str type, so we need to split it to get the file name + filename = item.split("/")[-1] if self.isfile(item) else "" + self.copy( + item, + remotedestination + filename, + dereference=dereference, + recursive=recursive, + ) + return - if not dereference and source.is_symlink(): - destination.symlink_to(source) - else: - source.copy_to(destination) + source = self._cwd.joinpath( + remotesource + ) # .enable_cache() it's removed from from path.py to be investigated + destination = self._cwd.joinpath( + remotedestination + ) # .enable_cache() it's removed from from path.py to be investigated - def makedirs(self, path: str, ignore_existing: bool = False) -> None: - self._cwd.joinpath(path).mkdir(parents=True, exist_ok=ignore_existing) + if not source.exists(): + raise FileNotFoundError(f"Source does not exist: {source}") + if not destination.exists() and not source.is_file(): + raise FileNotFoundError(f"Destination does not exist: {destination}") - def mkdir(self, path: str, ignore_existing: bool = False) -> None: - self._cwd.joinpath(path).mkdir(exist_ok=ignore_existing) + self._copy_to(source, destination) - # TODO check symlink handling for get methods # TODO do get/put methods need to handle glob patterns? + # Apparently not, but I'm not clear how glob() iglob() are going to behave here. We may need to implement them. def getfile( - self, remotepath: str | FcPath, localpath: str | Path, *args: Any, **kwargs: Any + self, + remotepath: str | FcPath, + localpath: str | Path, + dereference: bool = True, + *args: Any, + **kwargs: Any, ) -> None: + """Get a file from the remote. + + :param dereference: If True, follow symlinks. + note: we don't support downloading symlinks, so dereference should always be True + + """ + if not dereference: + raise NotImplementedError( + "Getting symlinks with `dereference=False` is not supported" + ) + local = Path(localpath) if not local.is_absolute(): raise ValueError("Destination must be an absolute path") remote = ( remotepath if isinstance(remotepath, FcPath) - else self._cwd.joinpath(remotepath).enable_cache() + else self._cwd.joinpath( + remotepath + ) # .enable_cache() it's removed from from path.py to be investigated ) if not remote.is_file(): raise FileNotFoundError(f"Source file does not exist: {remote}") remote_size = remote.lstat().st_size - - with convert_header_exceptions({"machine": self._machine, "path": remote}): + # if not local.exists(): + # local.mkdir(parents=True) + with self._cwd.convert_header_exceptions(): if remote_size < self._small_file_size_bytes: self._client.simple_download(self._machine, str(remote), localpath) else: @@ -363,77 +714,238 @@ def getfile( # to concurrently initiate internal file transfers to the object store (a.k.a. "staging area") # and downloading from the object store to the local machine - # this initiates the internal transfer of the file to the "staging area" + # I investigated asyncio, but it's not performant for this use case. + # Becasue in the end, FirecREST server ends up serializing the requests. + # see here: https://github.com/eth-cscs/pyfirecrest/issues/94 down_obj = self._client.external_download(self._machine, str(remote)) + down_obj.finish_download(local) + + self._validate_checksum(local, remote) - # this waits for the file to be moved to the staging area - # TODO handle the transfer stalling (timeout?) and optimise the polling interval - while down_obj.in_progress: - time.sleep(self._file_transfer_poll_interval) - - # this downloads the file from the "staging area" - url = down_obj.object_storage_data - if ( - os.environ.get("FIRECREST_LOCAL_TESTING") - and platform.system() == "Darwin" - ): - # TODO when using the demo server on a Mac, the wrong IP is provided - # and even then a 403 error is returned, due to a signature mismatch - # note you can directly directly download the file from: - # "/path/to/firecrest/deploy/demo/minio" + urlparse(url).path - url = url.replace("192.168.220.19", "localhost") - with request.urlopen(url) as response, local.open("wb") as handle: - shutil.copyfileobj(response, handle) - - # TODO use cwd.checksum to confirm download is not corrupted? + def _validate_checksum( + self, localpath: str | Path, remotepath: str | FcPath + ) -> None: + """Validate the checksum of a file. + Useful for checking if a file was transferred correctly. + it uses sha256 hash to compare the checksum of the local and remote files. + + Raises: ValueError: If the checksums do not match. + """ + + local = Path(localpath) + if not local.is_absolute(): + raise ValueError("Destination must be an absolute path") + remote = ( + remotepath + if isinstance(remotepath, FcPath) + else self._cwd.joinpath( + remotepath + ) # .enable_cache() it's removed from from path.py to be investigated + ) + if not remote.is_file(): + raise FileNotFoundError( + f"Cannot calculate checksum for a directory: {remote}" + ) + + sha256_hash = hashlib.sha256() + with open(local, "rb") as f: + for byte_block in iter(lambda: f.read(4096), b""): + sha256_hash.update(byte_block) + local_hash = sha256_hash.hexdigest() + + remote_hash = self._client.checksum(self._machine, remote) + + try: + assert local_hash == remote_hash + except AssertionError as e: + raise ValueError( + f"Checksum mismatch between local and remote files: {local} and {remote}" + ) from e + + def _gettreetar( + self, + remotepath: str | FcPath, + localpath: str | Path, + dereference: bool = True, + *args: Any, + **kwargs: Any, + ) -> None: + """Get a directory from the remote as a tar file and extract it locally. + This is useful for downloading a directory with many files, + as it might be more efficient than downloading each file individually. + Note that this method is not part of the Transport interface, and is not meant to be used publicly. + + :param dereference: If True, follow symlinks. + """ + + _ = uuid.uuid4() + remote_path_temp = self._temp_directory.joinpath(f"temp_{_}.tar") + + # Compress + self._client.compress( + self._machine, str(remotepath), remote_path_temp, dereference=dereference + ) + + # Download + localpath_temp = Path(localpath).joinpath(f"temp_{_}.tar") + try: + self.getfile(remote_path_temp, localpath_temp) + finally: + self.remove(remote_path_temp) + + # Extract the downloaded file locally + try: + # with tarfile.open(localpath_temp, "r") as tar: + # members = [m for m in tar.getmembers() if m.name.startswith(remotepath.name)] + # for member in members: + # member.name = os.path.relpath(member.name, remotepath.name) + # tar.extract(member, path=localpath) + os.system(f"tar -xf '{localpath_temp}' --strip-components=1 -C {localpath}") + finally: + localpath_temp.unlink() def gettree( - self, remotepath: str | FcPath, localpath: str | Path, *args: Any, **kwargs: Any + self, + remotepath: str | FcPath, + localpath: str | Path, + dereference: bool = True, + *args: Any, + **kwargs: Any, ) -> None: + """Get a directory from the remote. + + :param dereference: If True, follow symlinks. + note: dereference should be always True, otherwise the symlinks will not be functional. + """ + local = Path(localpath) if not local.is_absolute(): raise ValueError("Destination must be an absolute path") if local.is_file(): raise OSError("Cannot copy a directory into a file") - local.mkdir(parents=True, exist_ok=True) remote = ( remotepath if isinstance(remotepath, FcPath) - else self._cwd.joinpath(remotepath).enable_cache() + else self._cwd.joinpath( + remotepath + ) # .enable_cache() it's removed from from path.py to be investigated ) + local = Path(localpath) + if not remote.is_dir(): raise OSError(f"Source is not a directory: {remote}") - for remote_item in remote.iterdir(): - local_item = local.joinpath(remote_item.name) - if remote_item.is_dir(): - self.gettree(remote_item, local_item) - else: - self.getfile(remote_item, local_item) - def get(self, remotepath: str, localpath: str, *args: Any, **kwargs: Any) -> None: - remote = self._cwd.joinpath(remotepath).enable_cache() + # this block is added only to mimick the behavior that aiida expects + if local.exists(): + # Destination directory already exists, create remote directory name inside it + local = local.joinpath(remote.name) + local.mkdir(parents=True, exist_ok=True) + else: + # Destination directory does not exist, create and move content abc inside it + local.mkdir(parents=True, exist_ok=False) + + if self.payoff(remote): + # in this case send a request to the server to tar the files and then download the tar file + # unfortunately, the server does not provide a deferenced tar option, yet. + self._gettreetar(remote, local, dereference=dereference) + else: + # otherwise download the files one by one + for remote_item in remote.iterdir(recursive=True): + local_item = local.joinpath(remote_item.relpath(remote)) + if dereference and remote_item.is_symlink(): + target_path = remote_item._cache.link_target + if not Path(target_path).is_absolute(): + target_path = remote_item.parent.joinpath(target_path).resolve() + + target_path = self._cwd.joinpath(target_path) + if target_path.is_dir(): + self.gettree(target_path, local_item, dereference=True) + else: + target_path = remote_item + + if not target_path.is_dir(): + self.getfile(target_path, local_item) + else: + local_item.mkdir(parents=True, exist_ok=True) + + def get( + self, + remotepath: str, + localpath: str, + ignore_nonexisting: bool = False, + dereference: bool = True, + *args: Any, + **kwargs: Any, + ) -> None: + """Get a file or directory from the remote. + + :param ignore_nonexisting: If True, do not raise an error if the source file does not exist. + :param dereference: If True, follow symlinks. + note: dereference should be always True, otherwise the symlinks will not be functional. + """ + remote = self._cwd.joinpath( + remotepath + ) # .enable_cache() it's removed from from path.py to be investigated + if remote.is_dir(): self.gettree(remote, localpath) elif remote.is_file(): self.getfile(remote, localpath) - else: + elif self.has_magic(str(remotepath)): # type: ignore + for item in self.iglob(remotepath): # type: ignore + # item is of str type, so we need to split it to get the file name + filename = item.split("/")[-1] if self.isfile(item) else "" + self.get( + item, + localpath + filename, + dereference=dereference, + ignore_nonexisting=ignore_nonexisting, + ) + return + elif not ignore_nonexisting: raise FileNotFoundError(f"Source file does not exist: {remote}") def putfile( - self, localpath: str, remotepath: str, *args: Any, **kwargs: Any + self, + localpath: str | Path, + remotepath: str | FcPath, + dereference: bool = True, + *args: Any, + **kwargs: Any, ) -> None: - if not Path(localpath).is_absolute(): + """Put a file from the remote. + + :param dereference: If True, follow symlinks. + note: we don't support uploading symlinks, so dereference is always should be True + + """ + + if not dereference: + raise NotImplementedError( + "Getting symlinks with `dereference=False` is not supported" + ) + + localpath = Path(localpath) + if not localpath.is_absolute(): raise ValueError("The localpath must be an absolute path") - if not Path(localpath).is_file(): - raise ValueError(f"Input localpath is not a file: {localpath}") - local_size = Path(localpath).stat().st_size - remote = self._cwd.joinpath(remotepath).enable_cache() + if not localpath.is_file(): + if not localpath.exists(): + raise FileNotFoundError(f"Local file does not exist: {localpath}") + raise ValueError(f"Input localpath is not a file {localpath}") + remote = self._cwd.joinpath( + str(remotepath) + ) # .enable_cache() it's removed from from path.py to be investigated + + if remote.is_dir(): + raise ValueError(f"Destination is a directory: {remote}") + + local_size = localpath.stat().st_size # note this allows overwriting of existing files - with convert_header_exceptions({"machine": self._machine, "path": remote}): + with self._cwd.convert_header_exceptions(): if local_size < self._small_file_size_bytes: self._client.simple_upload( - self._machine, localpath, str(remote.parent), remote.name + self._machine, str(localpath), str(remote.parent), remote.name ) else: # TODO the following is a very basic implementation of uploading a large file @@ -442,108 +954,225 @@ def putfile( # to concurrently upload to the object store (a.k.a. "staging area"), # then wait for all files to finish being transferred to the target location - # this simply retrieves a location to upload on the "staging area" + # I investigated asyncio, but it's not performant for this use case. + # Becasue in the end, FirecREST server ends up serializing the requests. + # see here: https://github.com/eth-cscs/pyfirecrest/issues/94 up_obj = self._client.external_upload( - self._machine, localpath, str(remote) + self._machine, str(localpath), str(remote) ) - if ( - os.environ.get("FIRECREST_LOCAL_TESTING") - and platform.system() == "Darwin" - ): - # TODO when using the demo server on a Mac, the wrong IP is provided - up_obj.object_storage_data["command"] = up_obj.object_storage_data[ - "command" - ].replace("192.168.220.19", "localhost") - - # this uploads the file to the "staging area" - # TODO this calls curl in a subcommand, but you could also use the python requests library - # see: https://github.com/chrisjsewell/fireflow/blob/d45d41a0aced6502b7946c5557712a3c3cb1bebb/src/fireflow/process.py#L177 up_obj.finish_upload() - # this waits for the file in the staging area to be moved to the final location - # TODO handle the transfer stalling (timeout?) and optimise the polling interval - while up_obj.in_progress: - time.sleep(self._file_transfer_poll_interval) - # TODO use cwd.checksum to confirm upload is not corrupted? + self._validate_checksum(localpath, str(remote)) + + def payoff(self, path: str | FcPath | Path) -> bool: + """ + This function will be used to determine whether to tar the files before downloading + """ + # After discussing with the pyfirecrest team, it seems that server has some sort + # of serialization and "penalty" for sending multiple requests asycnhronusly or in a short time window. + # It responses in 1, 1.5, 3, 5, 7 seconds! + # So right now, I think if the number of files is more than 3, it pays off to tar everything + + # If payoff_override is set, return its value + if self.payoff_override is not None: + return bool(self.payoff_override) + + if ( + isinstance(path, FcPath) + and len(self.listdir(str(path), recursive=True)) > 3 + ): + return True + if isinstance(path, Path) and len(os.listdir(path)) > 3: + return True + return False + + def _puttreetar( + self, + localpath: str | Path, + remotepath: str | FcPath, + dereference: bool = True, + *args: Any, + **kwargs: Any, + ) -> None: + """Put a directory to the remote by sending as tar file in backend. + This is useful for uploading a directory with many files, + as it might be more efficient than uploading each file individually. + Note that this method is not part of the Transport interface, and is not meant to be used publicly. + + :param dereference: If True, follow symlinks. If False, symlinks are ignored from sending over. + """ + # this function will be used to send a folder as a tar file to the server and extract it on the server + + _ = uuid.uuid4() + + localpath = Path(localpath) + tarpath = localpath.parent.joinpath(f"temp_{_}.tar") + remote_path_temp = self._temp_directory.joinpath(f"temp_{_}.tar") + with tarfile.open(tarpath, "w", dereference=dereference) as tar: + for root, _, files in os.walk(localpath, followlinks=dereference): + for file in files: + full_path = os.path.join(root, file) + relative_path = os.path.relpath(full_path, localpath) + tar.add(full_path, arcname=relative_path) + + # Upload + try: + self.putfile(tarpath, remote_path_temp) + finally: + tarpath.unlink() + + # Attempt extract + try: + self._client.extract(self._machine, remote_path_temp, str(remotepath)) + finally: + self.remove(remote_path_temp) def puttree( - self, localpath: str | Path, remotepath: str, *args: Any, **kwargs: Any + self, + localpath: str | Path, + remotepath: str, + dereference: bool = True, + *args: Any, + **kwargs: Any, ) -> None: + """Put a directory to the remote. + + :param dereference: If True, follow symlinks. + note: dereference should be always True, otherwise the symlinks + will not be functional, therfore not supported. + """ + if not dereference: + raise NotImplementedError + localpath = Path(localpath) + remote = self._cwd.joinpath(remotepath) - # local checks if not localpath.is_absolute(): raise ValueError("The localpath must be an absolute path") if not localpath.exists(): raise OSError("The localpath does not exists") if not localpath.is_dir(): - raise ValueError(f"Input localpath is not a folder: {localpath}") + raise ValueError(f"Input localpath is not a directory: {localpath}") - for dirpath, _, filenames in os.walk(localpath): - # Get the relative path - rel_folder = os.path.relpath(path=dirpath, start=localpath) + # this block is added only to mimick the behavior that aiida expects + if remote.exists(): + # Destination directory already exists, create local directory name inside it + remote = self._cwd.joinpath(remote, localpath.name) + self.mkdir(remote, ignore_existing=False) + else: + # Destination directory does not exist, create and move content abc inside it + self.mkdir(remote, ignore_existing=False) + + if self.payoff(localpath): + # in this case send send everything as a tar file + self._puttreetar(localpath, remote) + else: + # otherwise send the files one by one + for dirpath, _, filenames in os.walk(localpath, followlinks=dereference): + rel_folder = os.path.relpath(path=dirpath, start=localpath) - if not self.path_exists(os.path.join(remotepath, rel_folder)): - self.mkdir(os.path.join(remotepath, rel_folder)) + rm_parent_now = remote.joinpath(rel_folder) + self.mkdir(rm_parent_now, ignore_existing=True) - for filename in filenames: - localfile_path = os.path.join(localpath, rel_folder, filename) - remotefile_path = os.path.join(remotepath, rel_folder, filename) - self.putfile(localfile_path, remotefile_path) + for filename in filenames: + localfile_path = os.path.join(localpath, rel_folder, filename) + remotefile_path = rm_parent_now.joinpath(filename) + self.putfile(localfile_path, remotefile_path) - def put(self, localpath: str, remotepath: str, *args: Any, **kwargs: Any) -> None: + def put( + self, + localpath: str, + remotepath: str, + ignore_nonexisting: bool = False, + dereference: bool = True, + *args: Any, + **kwargs: Any, + ) -> None: + """Put a file or directory to the remote. + + :param ignore_nonexisting: If True, do not raise an error if the source file does not exist. + :param dereference: If True, follow symlinks. + note: dereference should be always True, otherwise the symlinks will not be functional. + """ # TODO ssh does a lot more - if os.path.isdir(localpath): + # update on the TODO: I made a manual test with ssh. + # added some extra care in puttree and gettree and now it's working fine + + if not dereference: + raise NotImplementedError + + local = Path(localpath) + if not local.is_absolute(): + raise ValueError("The localpath must be an absolute path") + + if self.has_magic(str(localpath)): # type: ignore + for item in self.iglob(localpath): # type: ignore + # item is of str type, so we need to split it to get the file name + filename = item.split("/")[-1] if self.isfile(item) else "" + self.put( + item, + remotepath + filename, + dereference=dereference, + ignore_nonexisting=ignore_nonexisting, + ) + return + + if not Path(local).exists() and not ignore_nonexisting: + raise FileNotFoundError(f"Source file does not exist: {localpath}") + + if local.is_dir(): self.puttree(localpath, remotepath) - elif os.path.isfile(localpath): - if self.isdir(remotepath): - remote = os.path.join(remotepath, os.path.split(localpath)[1]) - self.putfile(localpath, remote) - else: - self.putfile(localpath, remotepath) + elif local.is_file(): + self.putfile(localpath, remotepath) - def remove(self, path: str) -> None: - self._cwd.joinpath(path).unlink() + def remove(self, path: str | FcPath) -> None: + """Remove a file or directory on the remote.""" + self._cwd.joinpath(str(path)).unlink() def rename(self, oldpath: str, newpath: str) -> None: + """Rename a file or directory on the remote.""" self._cwd.joinpath(oldpath).rename(self._cwd.joinpath(newpath)) def rmdir(self, path: str) -> None: - # TODO check if empty - self._cwd.joinpath(path).rmtree() + """Remove a directory on the remote. + If the directory is not empty, an OSError is raised.""" - def rmtree(self, path: str) -> None: - self._cwd.joinpath(path).rmtree() + if len(self.listdir(path)) == 0: + self._cwd.joinpath(path).rmtree() + else: + raise OSError(f"Directory not empty: {path}") - def whoami(self) -> str: - return self._cwd.whoami() + def rmtree(self, path: str) -> None: + """Remove a directory on the remote. + If the directory is not empty, it will be removed recursively, equivalent to `rm -rf`. + It does not raise an error if the directory does not exist. + """ + # TODO: suppress is to mimick the behaviour of `aiida-ssh`` transport, TODO: raise an issue on aiida + with suppress(FileNotFoundError): + self._cwd.joinpath(path).rmtree() + + def whoami(self) -> str | None: + """Return the username of the current user. + return None if the username cannot be determined. + """ + return self._client.whoami(machine=self._machine) def gotocomputer_command(self, remotedir: str) -> str: + """Not possible for REST-API. + It's here only because it's an abstract method in the base class.""" # TODO remove from interface raise NotImplementedError("firecrest does not support gotocomputer_command") def _exec_command_internal(self, command: str, **kwargs: Any) -> Any: + """Not possible for REST-API. + It's here only because it's an abstract method in the base class.""" # TODO remove from interface raise NotImplementedError("firecrest does not support command execution") def exec_command_wait_bytes( self, command: str, stdin: Any = None, **kwargs: Any ) -> Any: + """Not possible for REST-API. + It's here only because it's an abstract method in the base class.""" # TODO remove from interface raise NotImplementedError("firecrest does not support command execution") - - -def validate_non_empty_string(ctx, param, value): # type: ignore - """Validate that the number passed to this parameter is a positive number. - - :param ctx: the `click.Context` - :param param: the parameter - :param value: the value passed for the parameter - :raises `click.BadParameter`: if the value is not a positive number - """ - if not isinstance(value, str) or not value.strip(): - from click import BadParameter - - raise BadParameter(f"{value} is not string or is empty") - - return value diff --git a/aiida_firecrest/utils_test.py b/aiida_firecrest/utils_test.py deleted file mode 100644 index f5c2801..0000000 --- a/aiida_firecrest/utils_test.py +++ /dev/null @@ -1,760 +0,0 @@ -"""Utilities mainly for testing.""" -from __future__ import annotations - -from dataclasses import dataclass, field -from datetime import datetime -import io -from json import dumps as json_dumps -from pathlib import Path -import shutil -import stat -import subprocess -from typing import Any, BinaryIO -from urllib.parse import urlparse - -import requests -from requests.models import Response - - -@dataclass -class FirecrestConfig: - """Configuration returned from tests fixtures.""" - - url: str - token_uri: str - client_id: str - client_secret: str - machine: str - scratch_path: str - small_file_size_mb: float = 1.0 - - -class FirecrestMockServer: - """A mock server to imitate Firecrest (v1.13.0). - - This minimally mimics accessing the filesystem and submitting jobs, - enough to make tests pass, without having to run a real Firecrest server. - - See also: https://github.com/eth-cscs/pyfirecrest/blob/4dadce90d7fb01949d203a5b1d2c247048a5a3a9/tests/test_storage.py - """ - - def __init__( - self, tmpdir: Path, url: str = "https://test.com", machine: str = "test" - ) -> None: - self._url = url - self._url_parsed = urlparse(url) - self._machine = machine - self._scratch = tmpdir / "scratch" - self._scratch.mkdir() - self._client_id = "test_client_id" - self._client_secret = "test_client_secret" - self._token_url = "https://test.auth.com/token" - self._token_url_parsed = urlparse(self._token_url) - self._username = "test_user" - - self._slurm_job_id_counter = 0 - self._slurm_jobs: dict[str, dict[str, Any]] = {} - - self._task_id_counter = 0 - self._tasks: dict[str, Task] = {} - - self.max_size_bytes = 5 * 1024 * 1024 - - @property - def scratch(self) -> Path: - return self._scratch - - @property - def config(self) -> FirecrestConfig: - return FirecrestConfig( - url=self._url, - token_uri=self._token_url, - client_id=self._client_id, - client_secret=self._client_secret, - machine=self._machine, - scratch_path=str(self._scratch.absolute()), - ) - - def mock_request( - self, - url: str | bytes, - params: dict[str, Any] | None = None, - data: dict[str, Any] | None = None, - files: dict[str, tuple[str, BinaryIO]] | None = None, - **kwargs: Any, - ) -> Response: - """Mock a request to the Firecrest server.""" - response = Response() - response.encoding = "utf-8" - response.url = url if isinstance(url, str) else url.decode("utf-8") - parsed = urlparse(response.url) - endpoint = parsed.path - - if parsed.netloc == self._token_url_parsed.netloc: - if endpoint != "/token": - raise requests.exceptions.InvalidURL(f"Unknown endpoint: {endpoint}") - response.status_code = 200 - response.raw = io.BytesIO( - json_dumps( - { - "access_token": "test_access_token", - "expires_in": 3600, - } - ).encode(response.encoding) - ) - return response - - if parsed.netloc != self._url_parsed.netloc: - raise requests.exceptions.InvalidURL( - f"{parsed.netloc} != {self._url_parsed.netloc}" - ) - - if endpoint == "/utilities/whoami": - add_success_response(response, 200, self._username) - elif endpoint == "/utilities/stat": - self.utilities_stat(params or {}, response) - elif endpoint == "/utilities/file": - self.utilities_file(params or {}, response) - elif endpoint == "/utilities/ls": - self.utilities_ls(params or {}, response) - elif endpoint == "/utilities/symlink": - self.utilities_symlink(data or {}, response) - elif endpoint == "/utilities/mkdir": - self.utilities_mkdir(data or {}, response) - elif endpoint == "/utilities/rm": - self.utilities_rm(data or {}, response) - elif endpoint == "/utilities/copy": - self.utilities_copy(data or {}, response) - elif endpoint == "/utilities/chmod": - self.utilities_chmod(data or {}, response) - # elif endpoint == "/utilities/chown": - # utilities_chown(data or {}, response) - elif endpoint == "/utilities/rename": - self.utilities_rename(data or {}, response) - elif endpoint == "/utilities/upload": - self.utilities_upload(data or {}, files or {}, response) - elif endpoint == "/utilities/download": - self.utilities_download(params or {}, response) - elif endpoint == "/compute/jobs/path": - self.compute_jobs_path(data or {}, response) - elif endpoint == "/compute/jobs": - self.compute_jobs(params or {}, response) - elif endpoint.startswith("/tasks/"): - self.handle_task(endpoint[7:], response) - elif endpoint == "/storage/xfer-external/upload": - self.storage_xfer_external_upload(data or {}, response) - elif endpoint == "/storage/xfer-external/download": - self.storage_xfer_external_download(data or {}, response) - else: - raise requests.exceptions.InvalidURL(f"Unknown endpoint: {endpoint}") - - return response - - def new_task_id(self) -> str: - self._task_id_counter += 1 - return f"{self._task_id_counter}" - - def task_url(self, task_id: str) -> str: - return f"TASK_IP/tasks/{task_id}" - - def new_slurm_job_id(self) -> str: - """Generate a new SLURM job ID.""" - self._slurm_job_id_counter += 1 - return f"{self._slurm_job_id_counter}" - - def compute_jobs(self, params: dict[str, Any], response: Response) -> None: - # TODO pageSize pageNumber - jobs: None | list[str] = params["jobs"].split(",") if "jobs" in params else None - task_id = self.new_task_id() - self._tasks[task_id] = ActiveSchedulerJobsTask(task_id=task_id, jobs=jobs) - add_json_response( - response, - 200, - { - "success": "Task created", - "task_id": task_id, - "task_url": self.task_url(task_id), - }, - ) - - def compute_jobs_path(self, data: dict[str, Any], response: Response) -> Response: - script_path = Path(data["targetPath"]) - if not script_path.is_file(): - return add_json_response( - response, - 400, - {"description": "Failed to submit job", "data": "File does not exist"}, - {"X-Invalid-Path": f"{script_path} is an invalid path."}, - ) - - job_id = self.new_slurm_job_id() - - # read the file - script_content = script_path.read_text("utf8") - - # TODO this could be more rigorous - - # check that the first line is a shebang - if not script_content.startswith("#!/bin/bash"): - return add_json_response( - response, - 400, - { - "description": "Finished with errors", - "data": "First line must be a shebang `#!/bin/bash`", - }, - ) - - # get all sbatch options (see https://slurm.schedmd.com/sbatch.html) - sbatch_options: dict[str, Any] = {} - - for line in script_content.splitlines(): - if not line.startswith("#SBATCH"): - continue - arg = line[7:].strip().split("=", 1) - if len(arg) == 1: - assert arg[0].startswith("--"), f"Invalid sbatch option: {arg[0]}" - sbatch_options[arg[0][2:]] = True - elif len(arg) == 2: - assert arg[0].startswith("--"), f"Invalid sbatch option: {arg[0]}" - sbatch_options[arg[0][2:]] = arg[1].strip() - - # set stdout and stderror file - out_file = error_file = "slurm-%j.out" - if "output" in sbatch_options: - out_file = sbatch_options["output"] - if "error" in sbatch_options: - error_file = sbatch_options["error"] - out_file = out_file.replace("%j", job_id) - error_file = error_file.replace("%j", job_id) - - # we now just run the job straight away and blocking, no scheduling - # run the script in a subprocess, in the script's directory - # pipe stdout and stderr to the slurm output file - script_path.chmod(0o755) # make sure the script is executable - if out_file == error_file: - with open(script_path.parent / out_file, "w") as out: - subprocess.run( - [str(script_path)], - cwd=script_path.parent, - stdout=out, - stderr=subprocess.STDOUT, - ) - else: - with open(script_path.parent / out_file, "w") as out, open( - script_path.parent / error_file, "w" - ) as err: - subprocess.run( - [str(script_path)], cwd=script_path.parent, stdout=out, stderr=err - ) - - task_id = self.new_task_id() - self._tasks[task_id] = ScheduledJobTask( - task_id=task_id, - job_id=job_id, - script_path=script_path, - stdout_path=script_path.parent / out_file, - stderr_path=script_path.parent / error_file, - ) - self._slurm_jobs[job_id] = {} - return add_json_response( - response, - 201, - { - "success": "Task created", - "task_url": self.task_url(task_id), - "task_id": task_id, - }, - ) - - def storage_xfer_external_download( - self, data: dict[str, Any], response: Response - ) -> None: - source = Path(data["sourcePath"]) - if not source.exists(): - response.status_code = 400 - response.headers["X-Not-Found"] = "" - return - if source.is_dir(): - response.status_code = 400 - response.headers["X-A-Directory"] = "" - return - if not source.is_file(): - response.status_code = 400 - response.headers["X-Invalid-Path"] = "" - return - task_id = self.new_task_id() - self._tasks[task_id] = StorageXferExternalDownloadTask( - task_id=task_id, source_path=source - ) - add_json_response( - response, - 201, - { - "success": "Task created", - "task_id": task_id, - "task_url": self.task_url(task_id), - }, - ) - - def storage_xfer_external_upload( - self, data: dict[str, Any], response: Response - ) -> None: - source = Path(data["sourcePath"]) - target = Path(data["targetPath"]) - if not target.parent.exists(): - response.status_code = 400 - response.headers["X-Not-Found"] = "" - return - task_id = self.new_task_id() - self._tasks[task_id] = StorageXferExternalUploadTask( - task_id=task_id, source=source, target=target - ) - add_json_response( - response, - 201, - { - "success": "Task created", - "task_id": task_id, - "task_url": self.task_url(task_id), - }, - ) - - def utilities_file(self, params: dict[str, Any], response: Response) -> None: - path = Path(params["targetPath"]) - if not path.exists(): - response.status_code = 400 - response.headers["X-Invalid-Path"] = "" - return - add_success_response(response, 200, "directory" if path.is_dir() else "text") - - def utilities_symlink(self, data: dict[str, Any], response: Response) -> None: - target = Path(data["targetPath"]) - link = Path(data["linkPath"]) - if not target.exists(): - response.status_code = 400 - response.headers["X-Invalid-Path"] = "" - return - if link.exists(): - response.status_code = 400 - response.headers["X-Exists"] = "" - return - link.symlink_to(target) - add_success_response(response, 201) - - def utilities_stat(self, params: dict[str, Any], response: Response) -> None: - path = Path(params["targetPath"]) - dereference = params.get("dereference", False) - if not path.exists(): - response.status_code = 400 - response.headers["X-Invalid-Path"] = "" - return - result = path.stat() if dereference else path.lstat() - add_success_response( - response, - 200, - { - "mode": result.st_mode, - "uid": result.st_uid, - "gid": result.st_gid, - "size": result.st_size, - "atime": result.st_atime, - "mtime": result.st_mtime, - "ctime": result.st_ctime, - "nlink": result.st_nlink, - "ino": result.st_ino, - "dev": result.st_dev, - }, - ) - - def utilities_ls(self, params: dict[str, Any], response: Response) -> None: - path = Path(params["targetPath"]) - if not path.is_dir(): - response.status_code = 400 - response.headers["X-Invalid-Path"] = "" - return - data = [] - # note ls returns the file if the path is a file - for item in path.iterdir() if path.is_dir() else [path]: - _stat = item.lstat() - data.append( - { - "name": item.name, - "permissions": stat.filemode(_stat.st_mode)[1:], - "type": "l" if item.is_symlink() else "d" if item.is_dir() else "-", - "size": _stat.st_size, - } - ) - - add_success_response(response, 200, data) - - def utilities_chmod(self, data: dict[str, Any], response: Response) -> None: - path = Path(data["targetPath"]) - if not path.exists(): - response.status_code = 400 - response.headers["X-Invalid-Path"] = "" - return - path.chmod(int(data["mode"], 8)) - add_success_response(response, 200) - - def utilities_rename(self, data: dict[str, Any], response: Response) -> None: - source = Path(data["sourcePath"]) - target = Path(data["targetPath"]) - if not source.exists(): - response.status_code = 400 - response.headers["X-Invalid-Path"] = "" - return - if target.exists(): - response.status_code = 400 - response.headers["X-Exists"] = "" - return - source.rename(target) - add_success_response(response, 200) - - def utilities_mkdir(self, data: dict[str, Any], response: Response) -> None: - path = Path(data["targetPath"]) - if path.exists(): - response.status_code = 400 - response.headers["X-Exists"] = "" - return - path.mkdir(parents=data.get("p", False)) - add_success_response(response, 201) - - def utilities_rm(self, data: dict[str, Any], response: Response) -> None: - path = Path(data["targetPath"]) - if not path.exists(): - response.status_code = 400 - response.headers["X-Invalid-Path"] = "" - return - if path.is_dir(): - shutil.rmtree(path) - else: - path.unlink() - add_success_response(response, 204) - - def utilities_copy(self, data: dict[str, Any], response: Response) -> None: - source = Path(data["sourcePath"]) - target = Path(data["targetPath"]) - if not source.exists(): - response.status_code = 400 - response.headers["X-Invalid-Path"] = "" - return - if target.exists(): - response.status_code = 400 - response.headers["X-Exists"] = "" - return - if source.is_dir(): - shutil.copytree(source, target) - else: - shutil.copy2(source, target) - add_success_response(response, 201) - - def utilities_upload( - self, - data: dict[str, Any], - files: dict[str, tuple[str, BinaryIO]], - response: Response, - ) -> None: - # TODO files["file"] can be a tuple (name, stream) or just a stream with a name - fname, fbuffer = files["file"] - path = Path(data["targetPath"]) / fname - if not path.parent.exists(): - response.status_code = 400 - response.headers["X-Invalid-Path"] = "" - return - - fbytes = fbuffer.read() - if len(fbytes) > self.max_size_bytes: - add_json_response( - response, - 413, - { - "description": f"Failed to upload file. The file is over {self.max_size_bytes} bytes" - }, - ) - return - - path.write_bytes(fbytes) - add_success_response(response, 201) - - def utilities_download(self, params: dict[str, Any], response: Response) -> None: - path = Path(params["sourcePath"]) - if not path.is_file(): - response.status_code = 400 - response.headers["X-Invalid-Path"] = "" - return - - if path.lstat().st_size >= self.max_size_bytes: - add_json_response( - response, - 400, - { - "description": f"Failed to download file. The file is over {self.max_size_bytes} bytes" - }, - {"X-Size-Limit": "File size exceeds limit"}, - ) - return - - response.status_code = 200 - response.raw = io.BytesIO(path.read_bytes()) - - def handle_task(self, task_id: str, response: Response) -> Response: - if task_id not in self._tasks: - return add_json_response( - response, 404, {"error": f"Task {task_id} does not exist"} - ) - - task = self._tasks[task_id] - - if isinstance(task, ActiveSchedulerJobsTask): - return self.task_active_scheduler_jobs(task, response) - if isinstance(task, ScheduledJobTask): - return self.task_scheduled_job(task, response) - if isinstance(task, StorageXferExternalUploadTask): - return self.task_storage_xfer_external_upload(task, response) - if isinstance(task, StorageXferExternalDownloadTask): - return self.task_storage_xfer_external_download(task, response) - raise NotImplementedError(f"Unknown task type: {type(task)}") - - def task_active_scheduler_jobs( - self, task: ActiveSchedulerJobsTask, response: Response - ) -> Response: - if task.jobs is not None: - for job_id in task.jobs or []: - if job_id not in self._slurm_jobs: - return add_json_response( - response, - 400, - { - "description": "Failed to retrieve job information", - "error": f"{job_id} is not a valid job ID", - }, - ) - - # Note because we always run jobs straight away (see self.compute_jobs_path), - # then we can assume that there are never any active jobs. - # TODO add some basic way to simulate active jobs - job_data: dict[str, Any] = {} - - return add_json_response( - response, - 200, - { - "task": { - "task_id": task.task_id, - "service": "compute", - "status": "200", - "description": "Finished successfully", - "data": job_data, - "user": self._username, - "task_url": self.task_url(task.task_id), - "hash_id": task.task_id, - "created_at": task.created_str, - "last_modify": task.modified_str, - "updated_at": task.modified_str, - } - }, - ) - - def task_scheduled_job( - self, task: ScheduledJobTask, response: Response - ) -> Response: - return add_json_response( - response, - 200, - { - "task": { - "data": { - "job_data_err": "", - "job_data_out": "", - "job_file": str(task.script_path), - "job_file_err": str(str(task.stderr_path)), - "job_file_out": str(str(task.stdout_path)), - "job_info_extra": "Job info returned successfully", - "jobid": task.job_id, - "result": "Job submitted", - }, - "description": "Finished successfully", - "service": "compute", - "status": "200", - "task_id": task.task_id, - "user": self._username, - "task_url": self.task_url(task.task_id), - "hash_id": task.task_id, - "created_at": task.created_str, - "last_modify": task.modified_str, - "updated_at": task.modified_str, - } - }, - ) - - def task_storage_xfer_external_download( - self, task: StorageXferExternalDownloadTask, response: Response - ) -> Response: - # see: https://github.com/eth-cscs/pyfirecrest/blob/5edbe85d11b977ed8f6943ca22e4fdc3d6f5e12d/firecrest/BasicClient.py#L202 - return add_json_response( - response, - 200, - { - "task": { - "data": f"file://{task.source_path}", - "description": "Started upload from filesystem to Object Storage", - "hash_id": task.task_id, - "service": "storage", - "status": "117", - "task_id": task.task_id, - "task_url": self.task_url(task.task_id), - "user": "username", - "last_modify": task.modified_str, - } - }, - ) - - def task_storage_xfer_external_upload( - self, task: StorageXferExternalUploadTask, response: Response - ) -> Response: - # to mock this once the Form URL is retrieved (110), we move straight to the - # "Download from Object Storage to server has finished" (114) for the next request - # this skips statuses 111, 112 and 113, - # see: https://github.com/eth-cscs/pyfirecrest/blob/5edbe85d11b977ed8f6943ca22e4fdc3d6f5e12d/firecrest/BasicClient.py#L143 - # and so we are assuming that the file is uploaded to the server - - if not task.form_retrieved: - task.form_retrieved = True - return add_json_response( - response, - 200, - { - "task": { - "data": { - "msg": { - "command": "echo 'mock'", - # "command": "curl --show-error -s -i -X POST http://192.168.220.19:9000/service-account-firecrest-sample -F 'key=fd690c43e6ee509359b9e2c3237f4cc5/file.txt' -F 'x-amz-algorithm=AWS4-HMAC-SHA256' -F 'x-amz-credential=storage_access_key/202306/us-east-1/s3/aws4_request' -F 'x-amz-date=20230630T155026Z' -F 'policy=xxx' -F 'x-amz-signature=yyy' -F file=@/private/var/folders/t2/xbl15_3n4tsb1vr_ccmmtmbr0000gn/T/pytest-of-chrisjsewell/pytest-340/test_putfile_large0/file.txt", # noqa: E501 - "parameters": { - # "data": { - # "key": "fd690c43e6ee509359b9e2c3237f4cc5/file.txt", - # "policy": "xxx", - # "x-amz-algorithm": "AWS4-HMAC-SHA256", - # "x-amz-credential": "storage_access_key/202306/us-east-1/s3/aws4_request", - # "x-amz-date": "20230630T155026Z", - # "x-amz-signature": "yyy", - # }, - "data": {}, - "files": str(task.target), - "headers": {}, - "json": {}, - "method": "POST", - "params": {}, - # "url": "http://192.168.220.19:9000/service-account-firecrest-sample", - }, - }, - "status": "111", - "source": str(task.source), - "target": str(task.target), - "hash_id": task.task_id, - "user": self._username, - "system_addr": "machine_addr", - "system_name": self._machine, - "trace_id": "trace", - }, - "description": "Form URL from Object Storage received", - "service": "storage", - "status": "111", - "user": self._username, - "task_id": task.task_id, - "hash_id": task.task_id, - "task_url": self.task_url(task.task_id), - "created_at": task.created_str, - "last_modify": task.modified_str, - "updated_at": task.modified_str, - }, - }, - ) - else: - if not task.target.exists(): - shutil.copy(task.source, task.target) - return add_json_response( - response, - 200, - { - "task": { - "data": "Download from Object Storage to server has finished", - "description": "Download from Object Storage to server has finished", - "service": "storage", - "status": "114", - "task_id": task.task_id, - "user": self._username, - "hash_id": task.task_id, - "updated_at": task.modified_str, - "last_modify": task.modified_str, - "created_at": task.created_str, - "task_url": self.task_url(task.task_id), - } - }, - ) - - -@dataclass # note in python 3.10 we can use `(kw_only=False)` -class Task: - task_id: str - created_at: datetime = field(default_factory=datetime.now, init=False) - last_modified: datetime = field(default_factory=datetime.now, init=False) - - @property - def created_str(self) -> str: - return self.created_at.strftime("%Y-%m-%dT%H:%M:%S") - - @property - def modified_str(self) -> str: - return self.last_modified.strftime("%Y-%m-%dT%H:%M:%S") - - -@dataclass -class ActiveSchedulerJobsTask(Task): - jobs: list[str] | None - - -@dataclass -class ScheduledJobTask(Task): - job_id: str - script_path: Path - stdout_path: Path - stderr_path: Path - - -@dataclass -class StorageXferExternalUploadTask(Task): - source: Path - target: Path - form_retrieved: bool = False - - -@dataclass -class StorageXferExternalDownloadTask(Task): - source_path: Path - - -def add_json_response( - response: Response, - status_code: int, - json_data: dict[str, Any], - headers: dict[str, str] | None = None, -) -> Response: - response.status_code = status_code - response.raw = io.BytesIO(json_dumps(json_data).encode(response.encoding or "utf8")) - if headers: - response.headers.update(headers) - return response - - -def add_success_response( - response: Response, status_code: int, output: Any = "" -) -> Response: - return add_json_response( - response, - status_code, - { - "description": "success", - "output": output, - }, - ) diff --git a/firecrest_demo.py b/firecrest_demo.py index 743d752..21c5e30 100644 --- a/firecrest_demo.py +++ b/firecrest_demo.py @@ -1,3 +1,11 @@ +""" +This file is a CLI to generate a FirecREST demo server. +It clones FirecREST from a given URL and tag, builds the docker environment, and runs the docker environment. +It's useful for local testing and development. +Currently, it's not used in the CI/CD pipeline, and it's not part of the AiiDA plugin. +This code is not tested, with anything above version "v1.13.0" of FirecREST, and it's not maintained. +See also https://github.com/aiidateam/aiida-firecrest/issues/47 +""" from __future__ import annotations from argparse import ArgumentParser diff --git a/pyproject.toml b/pyproject.toml index 97551be..c9eeba4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,11 +24,10 @@ classifiers = [ keywords = ["aiida", "firecrest"] requires-python = ">=3.9" dependencies = [ - "aiida-core@git+https://github.com/chrisjsewell/aiida_core.git@aiida-firecrest#egg=aiida-core", + "aiida-core@git+https://github.com/aiidateam/aiida-core.git@954cbdd", "click", - "pyfirecrest~=1.4.0", + "pyfirecrest@git+https://github.com/eth-cscs/pyfirecrest.git@6cae414", "pyyaml", - "requests", ] [project.urls] diff --git a/tests/conftest.py b/tests/conftest.py index 2481be1..7d13b7f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,126 +1,396 @@ -"""Pytest configuration for the aiida-firecrest tests. - -This sets up the Firecrest server to use, and telemetry for API requests. -""" from __future__ import annotations -from functools import partial -from json import load as json_load +from dataclasses import dataclass +import hashlib +import itertools +import json +import os from pathlib import Path +import shutil +import stat from typing import Any, Callable from urllib.parse import urlparse -from _pytest.terminal import TerminalReporter -import firecrest as f7t +from aiida import orm +import firecrest +import firecrest.path import pytest import requests -import yaml -from aiida_firecrest.utils_test import FirecrestConfig, FirecrestMockServer + +class Values: + _DEFAULT_PAGE_SIZE: int = 25 + + +@pytest.fixture +def firecrest_computer(firecrest_config): + """Create and return a computer configured for Firecrest. + + Note, the computer is not stored in the database. + """ + + # create a temp directory and set it as the workdir + + computer = orm.Computer( + label="test_computer", + description="test computer", + hostname="-", + workdir=firecrest_config.workdir, + transport_type="firecrest", + scheduler_type="firecrest", + ) + computer.set_minimum_job_poll_interval(5) + computer.set_default_mpiprocs_per_machine(1) + computer.configure( + url=firecrest_config.url, + token_uri=firecrest_config.token_uri, + client_id=firecrest_config.client_id, + client_secret=firecrest_config.client_secret, + compute_resource=firecrest_config.compute_resource, + small_file_size_mb=firecrest_config.small_file_size_mb, + temp_directory=firecrest_config.temp_directory, + api_version=firecrest_config.api_version, + ) + return computer -def pytest_report_header(config): - if config.getoption("--firecrest-config"): - header = [ - "Running against FirecREST server: {}".format( - config.getoption("--firecrest-config") +class MockFirecrest: + """Mocks py:class:`pyfirecrest.Firecrest`.""" + + def __init__(self, firecrest_url, *args, **kwargs): + self._firecrest_url = firecrest_url + self.args = args + self.kwargs = kwargs + self.job_id_generator = itertools.cycle(range(1000, 999999)) + + def submit( + self, + machine: str, + script_str: str | None = None, + script_remote_path: str | None = None, + script_local_path: str | None = None, + local_file=False, + ): + if local_file: + raise DeprecationWarning("local_file is not supported") + + if script_remote_path and not Path(script_remote_path).exists(): + raise FileNotFoundError(f"File {script_remote_path} does not exist") + job_id = next(self.job_id_generator) + + # Filter out lines starting with '#SBATCH' + with open(script_remote_path) as file: + lines = file.readlines() + command = "".join([line for line in lines if not line.strip().startswith("#")]) + + # Make the dummy files + for line in lines: + if "--error" in line: + error_file = line.split("=")[1].strip() + (Path(script_remote_path).parent / error_file).touch() + elif "--output" in line: + output_file = line.split("=")[1].strip() + (Path(script_remote_path).parent / output_file).touch() + + # Execute the job, this is useful for test_calculation.py + if "aiida.in" in command: + # skip blank command like: '/bin/bash' + os.chdir(Path(script_remote_path).parent) + os.system(command) + + return {"jobid": job_id} + + def poll_active(self, machine: str, jobs: list[str], page_number: int = 0): + response = [] + # 12 satets are defined in firecrest + states = [ + "TIMEOUT", + "SUSPENDED", + "PREEMPTED", + "CANCELLED", + "NODE_FAIL", + "PENDING", + "FAILED", + "RUNNING", + "CONFIGURING", + "QUEUED", + "COMPLETED", + "COMPLETING", + ] + for i in range(len(jobs)): + response.append( + { + "job_data_err": "", + "job_data_out": "", + "job_file": "somefile.sh", + "job_file_err": "somefile-stderr.txt", + "job_file_out": "somefile-stdout.txt", + "job_info_extra": "Job info returned successfully", + "jobid": f"{jobs[i]}", + "name": "aiida-45", + "nodelist": "nid00049", + "nodes": "1", + "partition": "normal", + "start_time": "0:03", + "state": states[i % 12], + "time": "2024-06-21T10:44:42", + "time_left": "29:57", + "user": "Prof. Wang", + } ) + + return response[ + page_number + * Values._DEFAULT_PAGE_SIZE : (page_number + 1) + * Values._DEFAULT_PAGE_SIZE ] - if config.getoption("--firecrest-no-clean"): - header.append("Not cleaning up FirecREST server after tests!") - return header - return ["Running against Mock FirecREST server"] + def whoami(self, machine: str): + assert machine == "MACHINE_NAME" + return "test_user" -def pytest_terminal_summary( - terminalreporter: TerminalReporter, exitstatus: int, config: pytest.Config -): - """Called after all tests have run.""" - data = config.stash.get("firecrest_requests", None) - if data is None: - return - terminalreporter.write( - yaml.dump( - {"Firecrest requests telemetry": data}, - default_flow_style=False, - sort_keys=True, - ) - ) + def list_files( + self, + machine: str, + target_path: str, + recursive: bool = False, + show_hidden: bool = False, + ): + # this is mimiking the expected behaviour from the firecrest code. + content_list = [] + for root, dirs, files in os.walk(target_path): + if not recursive and root != target_path: + continue + for name in dirs + files: + full_path = os.path.join(root, name) + relative_path = ( + Path(os.path.relpath(root, target_path)).joinpath(name).as_posix() + ) + if os.path.islink(full_path): + content_type = "l" + link_target = ( + os.readlink(full_path) if os.path.islink(full_path) else None + ) + elif os.path.isfile(full_path): + content_type = "-" + link_target = None + elif os.path.isdir(full_path): + content_type = "d" + link_target = None + else: + content_type = "NON" + link_target = None + permissions = stat.filemode(Path(full_path).lstat().st_mode)[1:] + if name.startswith(".") and not show_hidden: + continue + content_list.append( + { + "name": relative_path, + "type": content_type, + "link_target": link_target, + "permissions": permissions, + } + ) -@pytest.fixture(scope="function") -def firecrest_server( - pytestconfig: pytest.Config, - request: pytest.FixtureRequest, - monkeypatch, - tmp_path: Path, -): - """A fixture which provides a mock Firecrest server to test against.""" - config_path: str | None = request.config.getoption("--firecrest-config") - no_clean: bool = request.config.getoption("--firecrest-no-clean") - record_requests: bool = request.config.getoption("--firecrest-requests") - telemetry: RequestTelemetry | None = None + return content_list - if config_path is not None: - # if given, use this config - with open(config_path, encoding="utf8") as handle: - config = json_load(handle) - config = FirecrestConfig(**config) - # rather than use the scratch_path directly, we use a subfolder, - # which we can then clean - config.scratch_path = config.scratch_path + "/pytest_tmp" - - # we need to connect to the client here, - # to ensure that the scratch path exists and is empty - client = f7t.Firecrest( - firecrest_url=config.url, - authorization=f7t.ClientCredentialsAuth( - config.client_id, config.client_secret, config.token_uri - ), + def stat(self, machine: str, targetpath: firecrest.path, dereference=True): + stats = os.stat( + targetpath, follow_symlinks=bool(dereference) if dereference else False ) - client.mkdir(config.machine, config.scratch_path, p=True) + return { + "ino": stats.st_ino, + "dev": stats.st_dev, + "nlink": stats.st_nlink, + "uid": stats.st_uid, + "gid": stats.st_gid, + "size": stats.st_size, + "atime": stats.st_atime, + "mtime": stats.st_mtime, + "ctime": stats.st_ctime, + } - if record_requests: - telemetry = RequestTelemetry() - monkeypatch.setattr(requests, "get", partial(telemetry.wrap, requests.get)) - monkeypatch.setattr( - requests, "post", partial(telemetry.wrap, requests.post) - ) - monkeypatch.setattr(requests, "put", partial(telemetry.wrap, requests.put)) - monkeypatch.setattr( - requests, "delete", partial(telemetry.wrap, requests.delete) - ) + def mkdir( + self, + machine: str, + target_path: str, + p: bool = False, + ignore_existing: bool = False, + ): + target = Path(target_path) + target.mkdir(exist_ok=ignore_existing, parents=p) - yield config - # Note this shouldn't really work, for folders but it does :shrug: - # because they use `rm -r`: - # https://github.com/eth-cscs/firecrest/blob/7f02d11b224e4faee7f4a3b35211acb9c1cc2c6a/src/utilities/utilities.py#L347 - if not no_clean: - client.simple_delete(config.machine, config.scratch_path) - else: - # otherwise use mock server - server = FirecrestMockServer(tmp_path) - if record_requests: - telemetry = RequestTelemetry() - mock_request = partial(telemetry.wrap, server.mock_request) + def simple_delete(self, machine: str, target_path: str): + if not Path(target_path).exists(): + raise FileNotFoundError(f"File or folder {target_path} does not exist") + if os.path.isdir(target_path): + shutil.rmtree(target_path) else: - mock_request = server.mock_request - monkeypatch.setattr(requests, "get", mock_request) - monkeypatch.setattr(requests, "post", mock_request) - monkeypatch.setattr(requests, "put", mock_request) - monkeypatch.setattr(requests, "delete", mock_request) - yield server.config + os.remove(target_path) + + def symlink(self, machine: str, target_path: str, link_path: str): + # this is how firecrest does it + os.system(f"ln -s {target_path} {link_path}") - # save data on the server - if telemetry is not None: - test_name = request.node.name - pytestconfig.stash.setdefault("firecrest_requests", {})[ - test_name - ] = telemetry.counts + def simple_download(self, machine: str, remote_path: str, local_path: str): + # this procedure is complecated in firecrest, but I am simplifying it here + # we don't care about the details of the download, we just want to make sure + # that the aiida-firecrest code is calling the right functions at right time + if Path(remote_path).is_dir(): + raise IsADirectoryError(f"{remote_path} is a directory") + if not Path(remote_path).exists(): + raise FileNotFoundError(f"{remote_path} does not exist") + os.system(f"cp {remote_path} {local_path}") + + def simple_upload( + self, + machine: str, + local_path: str, + remote_path: str, + file_name: str | None = None, + ): + # this procedure is complecated in firecrest, but I am simplifying it here + # we don't care about the details of the upload, we just want to make sure + # that the aiida-firecrest code is calling the right functions at right time + if Path(local_path).is_dir(): + raise IsADirectoryError(f"{local_path} is a directory") + if not Path(local_path).exists(): + raise FileNotFoundError(f"{local_path} does not exist") + if file_name: + remote_path = os.path.join(remote_path, file_name) + os.system(f"cp {local_path} {remote_path}") + + def copy(self, machine: str, source_path: str, target_path: str): + # this is how firecrest does it + # https://github.com/eth-cscs/firecrest/blob/db6ba4ba273c11a79ecbe940872f19d5cb19ac5e/src/utilities/utilities.py#L451C1-L452C1 + os.system(f"cp --force -dR --preserve=all -- '{source_path}' '{target_path}'") + + def compress( + self, machine: str, source_path: str, target_path: str, dereference: bool = True + ): + # this is how firecrest does it + # https://github.com/eth-cscs/firecrest/blob/db6ba4ba273c11a79ecbe940872f19d5cb19ac5e/src/utilities/utilities.py#L460 + basedir = os.path.dirname(source_path) + file_path = os.path.basename(source_path) + deref = "--dereference" if dereference else "" + os.system(f"tar {deref} -czf '{target_path}' -C '{basedir}' '{file_path}'") + + def extract(self, machine: str, source_path: str, target_path: str): + # this is how firecrest does it + # https://github.com/eth-cscs/firecrest/blob/db6ba4ba273c11a79ecbe940872f19d5cb19ac5e/src/common/cscs_api_common.py#L1110C18-L1110C65 + os.system(f"tar -xf '{source_path}' -C '{target_path}'") + + def checksum(self, machine: str, remote_path: str) -> int: + if not remote_path.exists(): + return False + # Firecrest uses sha256 + sha256_hash = hashlib.sha256() + with open(remote_path, "rb") as f: + for byte_block in iter(lambda: f.read(4096), b""): + sha256_hash.update(byte_block) + + return sha256_hash.hexdigest() + + def parameters( + self, + ): + # note: I took this from https://firecrest-tds.cscs.ch/ or https://firecrest.cscs.ch/ + # if code is not working but test passes, it means you need to update this dictionary + # with the latest FirecREST parameters + return { + "compute": [ + { + "description": "Type of resource and workload manager used in compute microservice", + "name": "WORKLOAD_MANAGER", + "unit": "", + "value": "Slurm", + } + ], + "storage": [ + { + "description": "Type of object storage, like `swift`, `s3v2` or `s3v4`.", + "name": "OBJECT_STORAGE", + "unit": "", + "value": "s3v4", + }, + { + "description": "Expiration time for temp URLs.", + "name": "STORAGE_TEMPURL_EXP_TIME", + "unit": "seconds", + "value": "86400", + }, + { + "description": "Maximum file size for temp URLs.", + "name": "STORAGE_MAX_FILE_SIZE", + "unit": "MB", + "value": "5120", + }, + { + "description": "Available filesystems through the API.", + "name": "FILESYSTEMS", + "unit": "", + "value": [ + { + "mounted": ["/project", "/store", "/scratch/snx3000tds"], + "system": "dom", + }, + { + "mounted": ["/project", "/store", "/capstor/scratch/cscs"], + "system": "pilatus", + }, + ], + }, + ], + "utilities": [ + { + "description": "The maximum allowable file size for various operations" + " of the utilities microservice", + "name": "UTILITIES_MAX_FILE_SIZE", + "unit": "MB", + "value": "5", + }, + { + "description": ( + "Maximum time duration for executing the commands " + "in the cluster for the utilities microservice." + ), + "name": "UTILITIES_TIMEOUT", + "unit": "seconds", + "value": "5", + }, + ], + } + + +class MockClientCredentialsAuth: + """Mocks py:class:`pyfirecrest.ClientCredentialsAuth`.""" + + def __init__(self, *args, **kwargs): + self.args = args + self.kwargs = kwargs + + +@dataclass +class ComputerFirecrestConfig: + """Configuration of a computer using FirecREST as transport plugin.""" + + url: str + token_uri: str + client_id: str + client_secret: str + compute_resource: str + temp_directory: str + workdir: str + api_version: str + small_file_size_mb: float = 1.0 class RequestTelemetry: - """A to gather telemetry on requests.""" + """A to gather telemetry on requests. + This class is stale and not used in the current implementation. + We keep it here for future use, if needed. + """ def __init__(self) -> None: self.counts = {} @@ -136,3 +406,112 @@ def wrap( self.counts.setdefault(endpoint, 0) self.counts[endpoint] += 1 return method(url, **kwargs) + + +@pytest.fixture(scope="function") +def firecrest_config( + request: pytest.FixtureRequest, + monkeypatch, + tmp_path: Path, +): + """ + If a config file is provided it sets up a client environment with the information + in the config file and uses pyfirecrest to communicate with the server. + ┌─────────────────┐───►┌─────────────┐───►┌──────────────────┐ + │ aiida_firecrest │ │ pyfirecrest │ │ FirecREST server │ + └─────────────────┘◄───└─────────────┘◄───└──────────────────┘ + + if a config file is not provided, it monkeypatches pyfirecrest so we never + actually communicate with a server. + ┌─────────────────┐───►┌─────────────────────────────┐ + │ aiida_firecrest │ │ pyfirecrest (monkeypatched) │ + └─────────────────┘◄───└─────────────────────────────┘ + """ + config_path: str | None = request.config.getoption("--firecrest-config") + no_clean: bool = request.config.getoption("--firecrest-no-clean") + # record_requests: bool = request.config.getoption("--firecrest-requests") + # TODO: record_requests is un-maintained after PR#36, and practically not used. + # But let's keep it commented for future use, if needed. + + if config_path is not None: + # telemetry: RequestTelemetry | None = None + # if given, use this config + with open(config_path, encoding="utf8") as handle: + config = json.load(handle) + config = ComputerFirecrestConfig(**config) + # # rather than use the scratch_path directly, we use a subfolder, + # # which we can then clean + config.workdir = config.workdir + "/pytest_tmp" + config.temp_directory = config.temp_directory + "/pytest_tmp" + + # # we need to connect to the client here, + # # to ensure that the scratch path exists and is empty + client = firecrest.Firecrest( + firecrest_url=config.url, + authorization=firecrest.ClientCredentialsAuth( + config.client_id, + Path(config.client_secret).read_text().strip(), + config.token_uri, + ), + ) + client.mkdir(config.compute_resource, config.workdir, p=True) + client.mkdir(config.compute_resource, config.temp_directory, p=True) + + # if record_requests: + # telemetry = RequestTelemetry() + # monkeypatch.setattr(requests, "get", partial(telemetry.wrap, requests.get)) + # monkeypatch.setattr( + # requests, "post", partial(telemetry.wrap, requests.post) + # ) + # monkeypatch.setattr(requests, "put", partial(telemetry.wrap, requests.put)) + # monkeypatch.setattr( + # requests, "delete", partial(telemetry.wrap, requests.delete) + # ) + yield config + # # Note this shouldn't really work, for folders but it does :shrug: + # # because they use `rm -r`: + # # https://github.com/eth-cscs/firecrest/blob/7f02d11b224e4faee7f4a3b35211acb9c1cc2c6a/src/utilities/utilities.py#L347 + if not no_clean: + client.simple_delete(config.compute_resource, config.workdir) + client.simple_delete(config.compute_resource, config.temp_directory) + + # if telemetry is not None: + # test_name = request.node.name + # pytestconfig.stash.setdefault("firecrest_requests", {})[ + # test_name + # ] = telemetry.counts + else: + # if no_clean or record_requests: + if no_clean: + raise ValueError( + "--firecrest-{no-clean,requests} options are only available" + " when a config file is passed using --firecrest-config." + ) + + monkeypatch.setattr(firecrest, "Firecrest", MockFirecrest) + monkeypatch.setattr( + firecrest, "ClientCredentialsAuth", MockClientCredentialsAuth + ) + + # dummy config + _temp_directory = tmp_path / "temp" + _temp_directory.mkdir() + + Path(tmp_path / ".firecrest").mkdir() + _secret_path = Path(tmp_path / ".firecrest/secretabc") + _secret_path.write_text("secret_string") + + workdir = tmp_path / "scratch" + workdir.mkdir() + + yield ComputerFirecrestConfig( + url="https://URI", + token_uri="https://TOKEN_URI", + client_id="CLIENT_ID", + client_secret=str(_secret_path), + compute_resource="MACHINE_NAME", + workdir=str(workdir), + small_file_size_mb=1.0, + temp_directory=str(_temp_directory), + api_version="2", + ) diff --git a/tests/test_calculation.py b/tests/test_calculation.py index d308671..fb34ae6 100644 --- a/tests/test_calculation.py +++ b/tests/test_calculation.py @@ -8,36 +8,6 @@ from aiida.parsers import Parser import pytest -from aiida_firecrest.utils_test import FirecrestConfig - - -@pytest.fixture(name="firecrest_computer") -def _firecrest_computer(firecrest_server: FirecrestConfig): - """Create and return a computer configured for Firecrest. - - Note, the computer is not stored in the database. - """ - computer = orm.Computer( - label="test_computer", - description="test computer", - hostname="-", - workdir=firecrest_server.scratch_path, - transport_type="firecrest", - scheduler_type="firecrest", - ) - computer.set_minimum_job_poll_interval(5) - computer.set_default_mpiprocs_per_machine(1) - computer.configure( - url=firecrest_server.url, - token_uri=firecrest_server.token_uri, - client_id=firecrest_server.client_id, - client_secret=firecrest_server.client_secret, - client_machine=firecrest_server.machine, - small_file_size_mb=firecrest_server.small_file_size_mb, - ) - computer.store() - return computer - @pytest.fixture(name="no_retries") def _no_retries(): @@ -64,9 +34,6 @@ def test_calculation_basic(firecrest_computer: orm.Computer): builder = code.get_builder() builder.x = orm.Int(1) builder.y = orm.Int(2) - # TODO currently uploading via firecrest changes _aiidasubmit.sh to aiidasubmit.sh 😱 - # https://github.com/eth-cscs/firecrest/issues/191 - builder.metadata.options.submit_script_filename = "aiidasubmit.sh" _, node = engine.run_get_node(builder) assert node.is_finished_ok @@ -127,11 +94,6 @@ class MultiFileCalcjob(engine.CalcJob): def define(cls, spec): """Define the process specification.""" super().define(spec) - spec.input( - "metadata.options.submit_script_filename", - valid_type=str, - default="aiidasubmit.sh", - ) spec.inputs["metadata"]["options"]["resources"].default = { "num_machines": 1, "num_mpiprocs_per_machine": 1, diff --git a/tests/test_computer.py b/tests/test_computer.py index 69faf82..6a2041a 100644 --- a/tests/test_computer.py +++ b/tests/test_computer.py @@ -1,40 +1,10 @@ -"""Tests for setting up an AiiDA computer for Firecrest, -and basic functionality of the Firecrest transport and scheduler plugins. -""" from pathlib import Path +from unittest.mock import Mock from aiida import orm +from click import BadParameter import pytest -from aiida_firecrest.utils_test import FirecrestConfig - - -@pytest.fixture(name="firecrest_computer") -def _firecrest_computer(firecrest_server: FirecrestConfig): - """Create and return a computer configured for Firecrest. - - Note, the computer is not stored in the database. - """ - computer = orm.Computer( - label="test_computer", - description="test computer", - hostname="-", - workdir=firecrest_server.scratch_path, - transport_type="firecrest", - scheduler_type="firecrest", - ) - computer.set_minimum_job_poll_interval(5) - computer.set_default_mpiprocs_per_machine(1) - computer.configure( - url=firecrest_server.url, - token_uri=firecrest_server.token_uri, - client_id=firecrest_server.client_id, - client_secret=firecrest_server.client_secret, - client_machine=firecrest_server.machine, - small_file_size_mb=firecrest_server.small_file_size_mb, - ) - return computer - @pytest.mark.usefixtures("aiida_profile_clean") def test_whoami(firecrest_computer: orm.Computer): @@ -43,34 +13,120 @@ def test_whoami(firecrest_computer: orm.Computer): assert isinstance(transport.whoami(), str) -@pytest.mark.usefixtures("aiida_profile_clean") -def test_create_temp_file(firecrest_computer: orm.Computer, tmp_path: Path): - """Check if it is possible to create a temporary file - and then delete it in the work directory. - """ - transport = firecrest_computer.get_transport() - authinfo = firecrest_computer.get_authinfo(orm.User.collection.get_default()) - workdir = authinfo.get_workdir().format(username=transport.whoami()) - transport.chdir(workdir) +def test_create_secret_file_with_existing_file(tmpdir: Path): + from aiida_firecrest.transport import _create_secret_file + + secret_file = Path(tmpdir / "secret") + secret_file.write_text("topsecret") + result = _create_secret_file(None, None, str(secret_file)) + assert isinstance(result, str) + assert result == str(secret_file) + assert Path(result).read_text() == "topsecret" - tmp_path.joinpath("test.txt").write_text("test") - transport.putfile(str(tmp_path.joinpath("test.txt")), "test.txt") - assert transport.path_exists("test.txt") +def test_create_secret_file_with_nonexistent_file(tmp_path): + from aiida_firecrest.transport import _create_secret_file - transport.getfile("test.txt", str(tmp_path.joinpath("test2.txt"))) + secret_file = tmp_path / "nonexistent" + with pytest.raises(BadParameter): + _create_secret_file(None, None, str(secret_file)) - assert tmp_path.joinpath("test2.txt").read_text() == "test" - transport.remove("test.txt") +def test_create_secret_file_with_secret_value(tmp_path, monkeypatch): + from aiida_firecrest.transport import _create_secret_file - assert not transport.path_exists("test.txt") + secret = "topsecret!~/" + monkeypatch.setattr( + Path, + "expanduser", + lambda x: tmp_path / str(x).lstrip("~/") if str(x).startswith("~/") else x, + ) + result = _create_secret_file(None, None, secret) + assert Path(result).parent.parts[-1] == ".firecrest" + assert Path(result).read_text() == secret @pytest.mark.usefixtures("aiida_profile_clean") -def test_get_jobs(firecrest_computer: orm.Computer): - """check if it is possible to determine the username.""" +def test_validate_temp_directory( + firecrest_computer: orm.Computer, firecrest_config, monkeypatch, tmpdir: Path +): + """ + Test the validation of the temp directory. + Note: this test depends on a functional putfile() method, for consistency with the real server tests. + Before running this test, make sure putfile() is working, which is tested in `test_putfile_getfile`. + """ + from aiida_firecrest.transport import _validate_temp_directory + + monkeypatch.setattr("click.echo", lambda x: None) + ctx = Mock() + ctx.params = { + "url": f"{firecrest_config.url}", + "token_uri": f"{firecrest_config.token_uri}", + "client_id": f"{firecrest_config.client_id}", + "client_secret": f"{firecrest_config.client_secret}", + "compute_resource": f"{firecrest_config.compute_resource}", + "small_file_size_mb": float(5), + "api_version": f"{firecrest_config.api_version}", + } + + # prepare some files and directories for testing transport = firecrest_computer.get_transport() - scheduler = firecrest_computer.get_scheduler() - scheduler.set_transport(transport) - assert isinstance(scheduler.get_jobs(), list) + _remote = transport._temp_directory + _local = tmpdir + Path(tmpdir / "_.txt").touch() + transport.mkdir(_remote / "temp_on_server_directory") + transport.putfile(tmpdir / "_.txt", _remote / "_.txt") + transport.putfile(tmpdir / "_.txt", _remote / "temp_on_server_directory" / "_.txt") + + # should raise if is_file + with pytest.raises(BadParameter): + result = _validate_temp_directory(ctx, None, Path(_remote / "_.txt").as_posix()) + + # should create the directory if it doesn't exist + result = _validate_temp_directory( + ctx, None, Path(_remote / "nonexisting").as_posix() + ) + assert result == Path(_remote / "nonexisting").as_posix() + assert transport._cwd.joinpath(_remote / "nonexisting").exists() + + # should get a confirmation if the directory exists and is not empty + monkeypatch.setattr("click.confirm", lambda x: False) + with pytest.raises(BadParameter): + result = _validate_temp_directory( + ctx, None, Path(_remote / "temp_on_server_directory").as_posix() + ) + + # should delete the content if I confirm + monkeypatch.setattr("click.confirm", lambda x: True) + result = _validate_temp_directory( + ctx, None, Path(_remote / "temp_on_server_directory").as_posix() + ) + assert result == Path(_remote / "temp_on_server_directory").as_posix() + assert not transport._cwd.joinpath( + _remote / "temp_on_server_directory" / "_.txt" + ).exists() + + +def test_dynamic_info(firecrest_config, monkeypatch, tmpdir: Path): + from aiida_firecrest.transport import _dynamic_info_direct_size + + monkeypatch.setattr("click.echo", lambda x: None) + ctx = Mock() + ctx.params = { + "url": f"{firecrest_config.url}", + "token_uri": f"{firecrest_config.token_uri}", + "client_id": f"{firecrest_config.client_id}", + "client_secret": f"{firecrest_config.client_secret}", + "compute_resource": f"{firecrest_config.compute_resource}", + "temp_directory": f"{firecrest_config.temp_directory}", + "api_version": f"{firecrest_config.api_version}", + } + + # should catch UTILITIES_MAX_FILE_SIZE if value is not provided + result = _dynamic_info_direct_size(ctx, None, 0) + assert result == 5 + + # should use the value if provided + # note: user cannot enter negative numbers anyways, click raise as this shoule be float not str + result = _dynamic_info_direct_size(ctx, None, 10) + assert result == 10 diff --git a/tests/test_scheduler.py b/tests/test_scheduler.py index 4126b61..5939e60 100644 --- a/tests/test_scheduler.py +++ b/tests/test_scheduler.py @@ -1,68 +1,76 @@ -"""Tests isolating only the Scheduler.""" -from aiida.schedulers import SchedulerError +from pathlib import Path + +from aiida import orm from aiida.schedulers.datastructures import CodeRunMode, JobTemplate import pytest from aiida_firecrest.scheduler import FirecrestScheduler -from aiida_firecrest.transport import FirecrestTransport -from aiida_firecrest.utils_test import FirecrestConfig - - -@pytest.fixture(name="transport") -def _transport(firecrest_server: FirecrestConfig): - transport = FirecrestTransport( - url=firecrest_server.url, - token_uri=firecrest_server.token_uri, - client_id=firecrest_server.client_id, - client_secret=firecrest_server.client_secret, - client_machine=firecrest_server.machine, - small_file_size_mb=firecrest_server.small_file_size_mb, - ) - transport.chdir(firecrest_server.scratch_path) - yield transport - - -def test_get_jobs_empty(transport: FirecrestTransport): - scheduler = FirecrestScheduler() - scheduler.set_transport(transport) - - with pytest.raises(SchedulerError, match="Failed to retrieve job"): - scheduler.get_jobs(["unknown"]) - - assert isinstance(scheduler.get_jobs(), list) +from conftest import Values -def test_submit_job(transport: FirecrestTransport): +@pytest.mark.usefixtures("aiida_profile_clean") +def test_submit_job(firecrest_computer: orm.Computer, tmp_path: Path): + transport = firecrest_computer.get_transport() scheduler = FirecrestScheduler() scheduler.set_transport(transport) - with pytest.raises(SchedulerError, match="invalid path"): + with pytest.raises(FileNotFoundError): scheduler.submit_job(transport.getcwd(), "unknown.sh") - # create a job script in a folder - transport.mkdir("test_submission") - transport.chdir("test_submission") - transport.write_binary("job.sh", b"#!/bin/bash\n\necho 'hello world'") + _script = Path(tmp_path / "job.sh") + _script.write_text("#!/bin/bash\n\necho 'hello world'") - job_id = scheduler.submit_job(transport.getcwd(), "job.sh") + job_id = scheduler.submit_job(transport.getcwd(), _script) + # this is how aiida expects the job_id to be returned assert isinstance(job_id, str) -def test_write_script_minimal(file_regression): +@pytest.mark.usefixtures("aiida_profile_clean") +def test_get_jobs(firecrest_computer: orm.Computer): + transport = firecrest_computer.get_transport() scheduler = FirecrestScheduler() - template = JobTemplate( - { - "job_resource": scheduler.create_job_resource( - num_machines=1, num_mpiprocs_per_machine=1 - ), - "codes_info": [], - "codes_run_mode": CodeRunMode.SERIAL, - } - ) - file_regression.check(scheduler.get_submit_script(template).rstrip() + "\n") - + scheduler.set_transport(transport) -def test_write_script_full(file_regression): + # test pagaination + scheduler._DEFAULT_PAGE_SIZE = 2 + Values._DEFAULT_PAGE_SIZE = 2 + + joblist = ["111", "222", "333", "444", "555"] + result = scheduler.get_jobs(joblist) + assert len(result) == 5 + for i in range(5): + assert result[i].job_id == str(joblist[i]) + # TODO: one could check states as well + + +def test_write_script_full(): + # to avoid false positive (overwriting on existing file), + # we check the output of the script instead of using `file_regression`` + expectaion = """ + #!/bin/bash + #SBATCH -H + #SBATCH --requeue + #SBATCH --mail-user=True + #SBATCH --mail-type=BEGIN + #SBATCH --mail-type=FAIL + #SBATCH --mail-type=END + #SBATCH --job-name="test_job" + #SBATCH --get-user-env + #SBATCH --output=test.out + #SBATCH --error=test.err + #SBATCH --partition=test_queue + #SBATCH --account=test_account + #SBATCH --qos=test_qos + #SBATCH --nice=100 + #SBATCH --nodes=1 + #SBATCH --ntasks-per-node=1 + #SBATCH --time=01:00:00 + #SBATCH --mem=1 + test_command + """ + expectaion_flat = "\n".join(line.strip() for line in expectaion.splitlines()).strip( + "\n" + ) scheduler = FirecrestScheduler() template = JobTemplate( { @@ -89,4 +97,42 @@ def test_write_script_full(file_regression): "custom_scheduler_commands": "test_command", } ) - file_regression.check(scheduler.get_submit_script(template).rstrip() + "\n") + try: + assert scheduler.get_submit_script(template).rstrip() == expectaion_flat + except AssertionError: + print(scheduler.get_submit_script(template).rstrip()) + print(expectaion) + raise + + +def test_write_script_minimal(): + # to avoid false positive (overwriting on existing file), + # we check the output of the script instead of using `file_regression`` + expectaion = """ + #!/bin/bash + #SBATCH --no-requeue + #SBATCH --error=slurm-%j.err + #SBATCH --nodes=1 + #SBATCH --ntasks-per-node=1 + """ + + expectaion_flat = "\n".join(line.strip() for line in expectaion.splitlines()).strip( + "\n" + ) + scheduler = FirecrestScheduler() + template = JobTemplate( + { + "job_resource": scheduler.create_job_resource( + num_machines=1, num_mpiprocs_per_machine=1 + ), + "codes_info": [], + "codes_run_mode": CodeRunMode.SERIAL, + } + ) + + try: + assert scheduler.get_submit_script(template).rstrip() == expectaion_flat + except AssertionError: + print(scheduler.get_submit_script(template).rstrip()) + print(expectaion) + raise diff --git a/tests/test_scheduler/test_write_script_full.txt b/tests/test_scheduler/test_write_script_full.txt deleted file mode 100644 index 6b834d2..0000000 --- a/tests/test_scheduler/test_write_script_full.txt +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash -#SBATCH -H -#SBATCH --requeue -#SBATCH --mail-user=True -#SBATCH --mail-type=BEGIN -#SBATCH --mail-type=FAIL -#SBATCH --mail-type=END -#SBATCH --job-name="test_job" -#SBATCH --get-user-env -#SBATCH --output=test.out -#SBATCH --error=test.err -#SBATCH --partition=test_queue -#SBATCH --account=test_account -#SBATCH --qos=test_qos -#SBATCH --nice=100 -#SBATCH --nodes=1 -#SBATCH --ntasks-per-node=1 -#SBATCH --time=01:00:00 -#SBATCH --mem=1 -test_command diff --git a/tests/test_scheduler/test_write_script_minimal.txt b/tests/test_scheduler/test_write_script_minimal.txt deleted file mode 100644 index 5bc0101..0000000 --- a/tests/test_scheduler/test_write_script_minimal.txt +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash -#SBATCH --no-requeue -#SBATCH --error=slurm-%j.err -#SBATCH --nodes=1 -#SBATCH --ntasks-per-node=1 diff --git a/tests/test_transport.py b/tests/test_transport.py index e30db7e..f9d970d 100644 --- a/tests/test_transport.py +++ b/tests/test_transport.py @@ -1,153 +1,788 @@ -"""Tests isolating only the Transport.""" +""" +Note: order of tests is important, as some tests are dependent on the previous ones. +""" + +import os from pathlib import Path -import platform +from unittest.mock import patch +from aiida import orm import pytest -from aiida_firecrest.transport import FirecrestTransport -from aiida_firecrest.utils_test import FirecrestConfig - - -@pytest.fixture(name="transport") -def _transport(firecrest_server: FirecrestConfig): - transport = FirecrestTransport( - url=firecrest_server.url, - token_uri=firecrest_server.token_uri, - client_id=firecrest_server.client_id, - client_secret=firecrest_server.client_secret, - client_machine=firecrest_server.machine, - small_file_size_mb=firecrest_server.small_file_size_mb, - ) - transport.chdir(firecrest_server.scratch_path) - yield transport - - -def test_path_exists(firecrest_server: FirecrestConfig, transport: FirecrestTransport): - assert transport.path_exists(firecrest_server.scratch_path) - assert not transport.path_exists(firecrest_server.scratch_path + "/file.txt") - - -def test_get_attribute( - firecrest_server: FirecrestConfig, transport: FirecrestTransport -): - transport._cwd.joinpath("test.txt").touch() - attrs = transport.get_attribute(firecrest_server.scratch_path + "/test.txt") - assert set(attrs) == { - "st_size", - "st_atime", - "st_mode", - "st_gid", - "st_mtime", - "st_uid", - } - assert isinstance(attrs.st_mode, int) - - -def test_isdir(firecrest_server: FirecrestConfig, transport: FirecrestTransport): - assert transport.isdir(firecrest_server.scratch_path) - assert not transport.isdir(firecrest_server.scratch_path + "/other") - - -def test_mkdir(firecrest_server: FirecrestConfig, transport: FirecrestTransport): - transport.mkdir(firecrest_server.scratch_path + "/test") - assert transport.isdir(firecrest_server.scratch_path + "/test") - - -def test_large_file_transfers( - firecrest_server: FirecrestConfig, transport: FirecrestTransport, tmp_path: Path -): - """Large file transfers (> 5MB by default) have to be downloaded/uploaded via a different pathway.""" - content = "a" * (transport._small_file_size_bytes + 1) - - # upload - remote_path = firecrest_server.scratch_path + "/file.txt" - assert not transport.isfile(remote_path) - file_path = tmp_path.joinpath("file.txt") - file_path.write_text(content) - transport.putfile(str(file_path), remote_path) - assert transport.isfile(remote_path) - # download - if transport._url.startswith("http://localhost") and platform.system() == "Darwin": - pytest.skip("Skipping large file download test on macOS with localhost server.") - # TODO this is a known issue whereby a 403 is returned when trying to download the supplied file url - # due to a signature mismatch - new_path = tmp_path.joinpath("file2.txt") - assert not new_path.is_file() - transport.getfile(remote_path, new_path) - assert new_path.is_file() - assert new_path.read_text() == content +@pytest.mark.usefixtures("aiida_profile_clean") +def test_mkdir(firecrest_computer: orm.Computer): + transport = firecrest_computer.get_transport() + tmpdir = transport._temp_directory + _scratch = tmpdir / "sampledir" + transport.mkdir(_scratch) + assert _scratch.exists() -def test_listdir(firecrest_server: FirecrestConfig, transport: FirecrestTransport): - assert transport.listdir(firecrest_server.scratch_path) == [] + _scratch = tmpdir / "sampledir2" / "subdir" + transport.makedirs(_scratch) + assert _scratch.exists() + # raise if directory already exists + with pytest.raises(OSError): + transport.mkdir(tmpdir / "sampledir") + with pytest.raises(OSError): + transport.makedirs(tmpdir / "sampledir2") -def test_copyfile(firecrest_server: FirecrestConfig, transport: FirecrestTransport): - from_path = firecrest_server.scratch_path + "/copy_from.txt" - to_path = firecrest_server.scratch_path + "/copy_to.txt" + # don't raise if directory already exists and ignore_existing is True + transport.mkdir(tmpdir / "sampledir", ignore_existing=True) + transport.makedirs(tmpdir / "sampledir2", ignore_existing=True) - transport.write_binary(from_path, b"test") - assert not transport.path_exists(to_path) - transport.copyfile(from_path, to_path) - assert transport.isfile(to_path) - assert transport.read_binary(to_path) == b"test" +@pytest.mark.usefixtures("aiida_profile_clean") +def test_is_dir(firecrest_computer: orm.Computer): + transport = firecrest_computer.get_transport() + tmpdir = transport._temp_directory + _scratch = tmpdir / "sampledir" + transport.mkdir(_scratch) -def test_copyfile_symlink_noderef( - firecrest_server: FirecrestConfig, transport: FirecrestTransport -): - from_path = firecrest_server.scratch_path + "/copy_from.txt" - from_path_symlink = firecrest_server.scratch_path + "/copy_from_symlink.txt" - to_path = firecrest_server.scratch_path + "/copy_to_symlink.txt" + assert transport.isdir(_scratch) + assert not transport.isdir(_scratch / "does_not_exist") - transport.write_binary(from_path, b"test") - transport.symlink(from_path, from_path_symlink) - - assert not transport.path_exists(to_path) - transport.copyfile(from_path_symlink, to_path, dereference=False) - assert transport.isfile(to_path) - assert transport.read_binary(to_path) == b"test" - - -def test_copyfile_symlink_deref( - firecrest_server: FirecrestConfig, transport: FirecrestTransport -): - from_path = firecrest_server.scratch_path + "/copy_from.txt" - from_path_symlink = firecrest_server.scratch_path + "/copy_from_symlink.txt" - to_path = firecrest_server.scratch_path + "/copy_to_symlink.txt" - - transport.write_binary(from_path, b"test") - transport.symlink(from_path, from_path_symlink) - - assert not transport.path_exists(to_path) - transport.copyfile(from_path_symlink, to_path, dereference=True) - assert transport.isfile(to_path) - assert transport.read_binary(to_path) == b"test" - - -def test_remove(firecrest_server: FirecrestConfig, transport: FirecrestTransport): - transport.write_binary(firecrest_server.scratch_path + "/file.txt", b"test") - assert transport.path_exists(firecrest_server.scratch_path + "/file.txt") - transport.remove(firecrest_server.scratch_path + "/file.txt") - assert not transport.path_exists(firecrest_server.scratch_path + "/file.txt") +@pytest.mark.usefixtures("aiida_profile_clean") +def test_normalize(firecrest_computer: orm.Computer): + transport = firecrest_computer.get_transport() + assert transport.normalize("/path/to/dir") == os.path.normpath("/path/to/dir") + assert transport.normalize("path/to/dir") == os.path.normpath("path/to/dir") + assert transport.normalize("path/to/dir/") == os.path.normpath("path/to/dir/") + assert transport.normalize("path/to/../dir") == os.path.normpath("path/to/../dir") + assert transport.normalize("path/to/../../dir") == os.path.normpath( + "path/to/../../dir" + ) + assert transport.normalize("path/to/../../dir/") == os.path.normpath( + "path/to/../../dir/" + ) + assert transport.normalize("path/to/../../dir/../") == os.path.normpath( + "path/to/../../dir/../" + ) -def test_rmtree(firecrest_server: FirecrestConfig, transport: FirecrestTransport): - transport.mkdir(firecrest_server.scratch_path + "/test") - transport.write_binary(firecrest_server.scratch_path + "/test/file.txt", b"test") - assert transport.path_exists(firecrest_server.scratch_path + "/test/file.txt") - transport.rmtree(firecrest_server.scratch_path + "/test") - assert not transport.path_exists(firecrest_server.scratch_path + "/test") +@pytest.mark.usefixtures("aiida_profile_clean") +def test_putfile_getfile(firecrest_computer: orm.Computer, tmpdir: Path): + """ + Note: putfile() and getfile() should be tested together, as they are dependent on each other. + It's written this way to be compatible with the real server testings. + """ + transport = firecrest_computer.get_transport() + tmpdir_remote = transport._temp_directory + + _remote = tmpdir_remote / "remotedir" + _local = tmpdir / "localdir" + _local_download = tmpdir / "download" + _remote.mkdir() + _local.mkdir() + _local_download.mkdir() + + Path(_local / "file1").write_text("file1") + Path(_local / ".hidden").write_text(".hidden") + os.symlink(_local / "file1", _local / "file1_link") + + # raise if file does not exist + with pytest.raises(FileNotFoundError): + transport.putfile(_local / "does_not_exist", _remote / "file1") + transport.getfile(_remote / "does_not_exist", _local) + + # raise if filename is not provided + with pytest.raises(ValueError): + transport.putfile(_local / "file1", _remote) + transport.getfile(_remote / "file1", _local) + + # raise if localpath is relative + with pytest.raises(ValueError): + transport.putfile(Path(_local / "file1").relative_to(tmpdir), _remote / "file1") + transport.getfile(_remote / "file1", Path(_local / "file1").relative_to(tmpdir)) + + # don't mix up directory with file + with pytest.raises(ValueError): + transport.putfile(_local, _remote / "file1") + transport.getfile(_remote, _local / "file1") + + # write where I tell you to + # note: if you change this block, you need to change the the following two blocks as well + transport.putfile(_local / "file1", _remote / "file1") + transport.putfile(_local / "file1", _remote / "file1-prime") + + transport.getfile(_remote / "file1", _local_download / "file1") + transport.getfile(_remote / "file1-prime", _local_download / "differentName") + + assert Path(_local_download / "file1").read_text() == "file1" + assert Path(_local_download / "differentName").read_text() == "file1" + + # always overwrite for putfile + # this block assumes "file1" has already been uploaded with the content "file1". + # for efficiency reasons (when the test is run against a real server). I didn't that repeat here. + Path(_local / "file1").write_text("notfile1") + transport.putfile(_local / "file1", _remote / "file1") + transport.getfile(_remote / "file1", _local_download / "file1_uploaded") + assert Path(_local_download / "file1_uploaded").read_text() == "notfile1" + + # always overwrite for getfile + # this block assumes "file1" has already been downloaded with the content "notfile1". + # for efficiency reasons (when the test is run against a real server). I didn't that repeat here. + transport.getfile(_remote / "file1", _local_download / "file1") + assert Path(_local_download / "file1").read_text() == "notfile1" + + # don't skip hidden files + transport.putfile(_local / ".hidden", _remote / ".hidden") + transport.getfile(_remote / ".hidden", _local_download / ".hidden") + assert Path(_local_download / ".hidden").read_text() == ".hidden" + + # follow links + # for putfile() + Path(_local / "file1").write_text("file1") + transport.putfile(_local / "file1_link", _remote / "file1_link") + assert not transport._cwd.joinpath( + _remote / "file1_link" + ).is_symlink() # should be copied as a file + transport.getfile(_remote / "file1_link", _local_download / "file1_link") + assert Path(_local_download / "file1_link").read_text() == "file1" + # for getfile() + transport.putfile(_local / "file1", _remote / "file1") + transport.symlink(_remote / "file1", _remote / "remote_link") + transport.getfile(_remote / "remote_link", _local_download / "remote_link") + assert not Path(_local_download / "remote_link").is_symlink() + assert Path(_local_download / "remote_link").read_text() == "file1" + + +@pytest.mark.usefixtures("aiida_profile_clean") +def test_remove(firecrest_computer: orm.Computer, tmpdir: Path): + transport = firecrest_computer.get_transport() + tmpdir_remote = transport._temp_directory + + _remote = tmpdir_remote + _local = tmpdir + + Path(_local / "samplefile").touch() + + # remove a non-empty directory with rmtree() + _scratch = transport._cwd.joinpath(_remote / "sampledir") + _scratch.mkdir() + transport.putfile(_local / "samplefile", _remote / "sampledir" / "samplefile") + transport.rmtree(_scratch) + assert not _scratch.exists() + + # remove a non-empty directory should raise with rmdir() + transport.mkdir(_remote / "sampledir") + transport.putfile(_local / "samplefile", _remote / "sampledir" / "samplefile") + with pytest.raises(OSError): + transport.rmdir(_remote / "sampledir") + + # remove a file with remove() + transport.remove(_remote / "sampledir" / "samplefile") + assert not transport._cwd.joinpath(_remote / "sampledir" / "samplefile").exists() + + # remove a empty directory with rmdir + transport.rmdir(_remote / "sampledir") + assert not _scratch.exists() + + +@pytest.mark.usefixtures("aiida_profile_clean") +def test_is_file(firecrest_computer: orm.Computer, tmpdir: Path): + transport = firecrest_computer.get_transport() + tmpdir_remote = transport._temp_directory + + _remote = tmpdir_remote + _local = tmpdir + + Path(_local / "samplefile").touch() + transport.putfile(_local / "samplefile", _remote / "samplefile") + assert transport.isfile(_remote / "samplefile") + assert not transport.isfile(_remote / "does_not_exist") + + +@pytest.mark.usefixtures("aiida_profile_clean") +def test_symlink(firecrest_computer: orm.Computer, tmpdir: Path): + transport = firecrest_computer.get_transport() + tmpdir_remote = transport._temp_directory + + _remote = tmpdir_remote + _local = tmpdir + + Path(_local / "samplefile").touch() + transport.putfile(_local / "samplefile", _remote / "samplefile") + transport.symlink(_remote / "samplefile", _remote / "samplelink") + + _symlink = transport._cwd.joinpath(_remote / "samplelink") + assert _symlink.is_symlink() + # TODO: check if the symlink is pointing to the correct file + # for this we need further development of FcPath.resolve() + # assert _symlink.resolve() == _remote / "samplefile" + + +@pytest.mark.usefixtures("aiida_profile_clean") +def test_listdir(firecrest_computer: orm.Computer, tmpdir: Path): + transport = firecrest_computer.get_transport() + tmpdir_remote = transport._temp_directory + + _remote = tmpdir_remote + _local = tmpdir + + # test basic & recursive + Path(_local / "file1").touch() + Path(_local / "dir1").mkdir() + Path(_local / ".hidden").touch() + Path(_local / "dir1" / "file2").touch() + transport.putfile(_local / "file1", _remote / "file1") + transport.mkdir(_remote / "dir1") + transport.putfile(_local / "dir1" / "file2", _remote / "dir1" / "file2") + transport.putfile(_local / ".hidden", _remote / ".hidden") + + assert set(transport.listdir(_remote)) == {"file1", "dir1", ".hidden"} + assert set(transport.listdir(_remote, recursive=True)) == { + "file1", + "dir1", + ".hidden", + "dir1/file2", + } -def test_rename(firecrest_server: FirecrestConfig, transport: FirecrestTransport): - transport.write_binary(firecrest_server.scratch_path + "/file.txt", b"test") - assert transport.path_exists(firecrest_server.scratch_path + "/file.txt") - transport.rename( - firecrest_server.scratch_path + "/file.txt", - firecrest_server.scratch_path + "/file2.txt", + # to test symlink + Path(_local / "dir1" / "dir2").mkdir() + Path(_local / "dir1" / "dir2" / "file3").touch() + transport.mkdir(_remote / "dir1" / "dir2") + transport.putfile( + _local / "dir1" / "dir2" / "file3", _remote / "dir1" / "dir2" / "file3" + ) + transport.symlink(_remote / "dir1" / "dir2", _remote / "dir2_link") + transport.symlink(_remote / "dir1" / "file2", _remote / "file_link") + + assert set(transport.listdir(_remote, recursive=True)) == { + "file1", + "dir1", + ".hidden", + "dir1/file2", + "dir1/dir2", + "dir1/dir2/file3", + "dir2_link", + "file_link", + } + # TODO: The following assert is not working as expected when testing against a real server, + # see the open issue on FirecREST: https://github.com/eth-cscs/firecrest/issues/205 + assert set(transport.listdir(_remote / "dir2_link", recursive=False)) == {"file3"} + + +@pytest.mark.usefixtures("aiida_profile_clean") +def test_put(firecrest_computer: orm.Computer, tmpdir: Path): + """ + This is minimal test is to check if put() is raising errors as expected, + and directing to putfile() and puttree() as expected. + Mainly just checking error handeling and folder creation. + For faster testing, we mock the subfucntions and don't actually do it. + """ + transport = firecrest_computer.get_transport() + tmpdir_remote = transport._temp_directory + + _remote = tmpdir_remote / "remotedir" + _local = tmpdir / "localdir" + transport.mkdir(_remote) + _local.mkdir() + + # check if the code is directing to putfile() or puttree() as expected + with patch.object(transport, "puttree", autospec=True) as mock_puttree: + transport.put(_local, _remote) + mock_puttree.assert_called_once() + + with patch.object(transport, "puttree", autospec=True) as mock_puttree: + os.symlink(_local, tmpdir / "dir_link") + transport.put(tmpdir / "dir_link", _remote) + mock_puttree.assert_called_once() + + with patch.object(transport, "putfile", autospec=True) as mock_putfile: + Path(_local / "file1").write_text("file1") + transport.put(_local / "file1", _remote / "file1") + mock_putfile.assert_called_once() + + with patch.object(transport, "putfile", autospec=True) as mock_putfile: + os.symlink(_local / "file1", _local / "file1_link") + transport.put(_local / "file1_link", _remote / "file1_link") + mock_putfile.assert_called_once() + + # raise if local file/folder does not exist + with pytest.raises(FileNotFoundError): + transport.put(_local / "does_not_exist", _remote) + transport.put(_local / "does_not_exist", _remote, ignore_nonexisting=True) + + # raise if localpath is relative + with pytest.raises(ValueError): + transport.put(Path(_local).relative_to(tmpdir), _remote) + with pytest.raises(ValueError): + transport.put(Path(_local / "file1").relative_to(tmpdir), _remote) + + +@pytest.mark.usefixtures("aiida_profile_clean") +def test_get(firecrest_computer: orm.Computer, tmpdir: Path): + """ + This is minimal test is to check if get() is raising errors as expected, + and directing to getfile() and gettree() as expected. + Mainly just checking error handeling and folder creation. + For faster testing, we mock the subfucntions and don't actually do it. + """ + transport = firecrest_computer.get_transport() + tmpdir_remote = transport._temp_directory + + _remote = tmpdir_remote / "remotedir" + _local = tmpdir / "localdir" + _remote.mkdir() + _local.mkdir() + + # check if the code is directing to getfile() or gettree() as expected + with patch.object(transport, "gettree", autospec=True) as mock_gettree: + transport.get(_remote, _local) + mock_gettree.assert_called_once() + + with patch.object(transport, "gettree", autospec=True) as mock_gettree: + transport.symlink(_remote, tmpdir_remote / "dir_link") + transport.get(tmpdir_remote / "dir_link", _local) + mock_gettree.assert_called_once() + + with patch.object(transport, "getfile", autospec=True) as mock_getfile: + Path(_local / "file1").write_text("file1") + transport.putfile(_local / "file1", _remote / "file1") + transport.get(_remote / "file1", _local / "file1") + mock_getfile.assert_called_once() + + with patch.object(transport, "getfile", autospec=True) as mock_getfile: + transport.symlink(_remote / "file1", _remote / "file1_link") + transport.get(_remote / "file1_link", _local / "file1_link") + mock_getfile.assert_called_once() + + # raise if remote file/folder does not exist + with pytest.raises(FileNotFoundError): + transport.get(_remote / "does_not_exist", _local) + transport.get(_remote / "does_not_exist", _local, ignore_nonexisting=True) + + # raise if localpath is relative + with pytest.raises(ValueError): + transport.get(_remote, Path(_local).relative_to(tmpdir)) + with pytest.raises(ValueError): + transport.get(_remote / "file1", Path(_local).relative_to(tmpdir)) + + +@pytest.mark.timeout(120) +@pytest.mark.parametrize("payoff", [True, False]) +@pytest.mark.usefixtures("aiida_profile_clean") +def test_puttree(firecrest_computer: orm.Computer, tmpdir: Path, payoff: bool): + """ + This test is to check `puttree` through both recursive transfer and tar mode. + payoff= False would put files one by one. + payoff= True would use the tar mode. + Note: this test depends on a functional getfile() method, for consistency with the real server tests. + Before running this test, make sure getfile() is working, which is tested in `test_putfile_getfile`. + """ + transport = firecrest_computer.get_transport() + transport.payoff_override = payoff + + # Note: + # SSH transport behaviour + # transport.put('somepath/abc', 'someremotepath/') == transport.put('somepath/abc', 'someremotepath') + # transport.put('somepath/abc', 'someremotepath/') != transport.put('somepath/abc/', 'someremotepath/') + # transport.put('somepath/abc', 'someremotepath/67') --> if 67 not exist, create and move content abc + # inside it (someremotepath/67) + # transport.put('somepath/abc', 'someremotepath/67') --> if 67 exist, create abc inside it (someremotepath/67/abc) + # transport.put('somepath/abc', 'someremotepath/6889/abc') --> useless Error: OSError + # Weired + # SSH "bug": + # transport.put('somepath/abc', 'someremotepath/') --> assuming someremotepath exists, make abc + # while + # transport.put('somepath/abc/', 'someremotepath/') --> assuming someremotepath exists, OSError: + # cannot make someremotepath + + tmpdir_remote = transport._temp_directory + _remote = tmpdir_remote / "remotedir" + _local = tmpdir / "localdir" + _local_download = tmpdir / "download" + _remote.mkdir() + _local.mkdir() + _local_download.mkdir() + # a typical tree + Path(_local / "dir1").mkdir() + Path(_local / "dir2").mkdir() + Path(_local / "file1").write_text("file1") + Path(_local / ".hidden").write_text(".hidden") + Path(_local / "dir1" / "file2").write_text("file2") + Path(_local / "dir2" / "file3").write_text("file3") + # with symlinks to a file even if pointing to a relative path + os.symlink(_local / "file1", _local / "dir1" / "file1_link") + os.symlink(Path("../file1"), _local / "dir1" / "file10_link") + # with symlinks to a folder even if pointing to a relative path + os.symlink(_local / "dir2", _local / "dir1" / "dir2_link") + os.symlink(Path("../dir2"), _local / "dir1" / "dir20_link") + + # raise if local file does not exist + with pytest.raises(OSError): + transport.puttree(_local / "does_not_exist", _remote) + + # raise if local is a file + with pytest.raises(ValueError): + Path(tmpdir / "isfile").touch() + transport.puttree(tmpdir / "isfile", _remote) + + # raise if localpath is relative + with pytest.raises(ValueError): + transport.puttree(Path(_local).relative_to(tmpdir), _remote) + + # If destination directory does not exists, AiiDA expects transport make the new path as root not using _local.name + transport.puttree(_local, _remote / "newdir") + _root = _remote / "newdir" + + # GET block: to retrieve the files we depend on a functional getfile(), + # this limitation is a price to pay for real server testing. + transport.getfile(_root / "file1", _local_download / "file1") + transport.getfile(_root / ".hidden", _local_download / ".hidden") + Path(_local_download / "dir1").mkdir() + Path(_local_download / "dir2").mkdir() + transport.getfile(_root / "dir1" / "file2", _local_download / "dir1" / "file2") + transport.getfile(_root / "dir2" / "file3", _local_download / "dir2" / "file3") + # note links should have been dereferenced while uploading + transport.getfile( + _root / "dir1" / "file1_link", _local_download / "dir1" / "file1_link" + ) + Path(_local_download / "dir1" / "dir2_link").mkdir() + transport.getfile( + _root / "dir1" / "dir2_link" / "file3", + _local_download / "dir1" / "dir2_link" / "file3", + ) + transport.getfile( + _root / "dir1" / "file10_link", _local_download / "dir1" / "file10_link" + ) + Path(_local_download / "dir1" / "dir20_link").mkdir() + transport.getfile( + _root / "dir1" / "dir20_link" / "file3", + _local_download / "dir1" / "dir20_link" / "file3", + ) + # End of GET block + + # ASSERT block: tree should be copied recursively + assert Path(_local_download / "file1").read_text() == "file1" + assert Path(_local_download / ".hidden").read_text() == ".hidden" + assert Path(_local_download / "dir1" / "file2").read_text() == "file2" + assert Path(_local_download / "dir2" / "file3").read_text() == "file3" + # symlink should be followed + assert Path(_local_download / "dir1" / "file1_link").read_text() == "file1" + assert Path(_local_download / "dir1" / "dir2_link" / "file3").read_text() == "file3" + assert Path(_local_download / "dir1" / "file10_link").read_text() == "file1" + assert ( + Path(_local_download / "dir1" / "dir20_link" / "file3").read_text() == "file3" + ) + assert not transport._cwd.joinpath(_root / "dir1" / "file1_link").is_symlink() + assert not transport._cwd.joinpath( + _root / "dir1" / "dir2_link" / "file3" + ).is_symlink() + assert not transport._cwd.joinpath(_root / "dir1" / "file10_link").is_symlink() + assert not transport._cwd.joinpath( + _root / "dir1" / "dir20_link" / "file3" + ).is_symlink() + # End of ASSERT block + + # If destination directory does exists, AiiDA expects transport make _local.name and write into it + # however this might have changed in the newer versions of AiiDA ~ 2.6.0 (IDK) + transport.puttree(_local, _remote / "newdir") + _root = _remote / "newdir" / Path(_local).name + + # GET block: to retrieve the files we depend on a functional getfile(), + # this limitation is a price to pay for real server testing. + _local_download = tmpdir / "download2" + _local_download.mkdir() + transport.getfile(_root / "file1", _local_download / "file1") + transport.getfile(_root / ".hidden", _local_download / ".hidden") + Path(_local_download / "dir1").mkdir() + Path(_local_download / "dir2").mkdir() + transport.getfile(_root / "dir1" / "file2", _local_download / "dir1" / "file2") + transport.getfile(_root / "dir2" / "file3", _local_download / "dir2" / "file3") + # note links should have been dereferenced while uploading + transport.getfile( + _root / "dir1" / "file1_link", _local_download / "dir1" / "file1_link" + ) + Path(_local_download / "dir1" / "dir2_link").mkdir() + transport.getfile( + _root / "dir1" / "dir2_link" / "file3", + _local_download / "dir1" / "dir2_link" / "file3", + ) + transport.getfile( + _root / "dir1" / "file10_link", _local_download / "dir1" / "file10_link" + ) + Path(_local_download / "dir1" / "dir20_link").mkdir() + transport.getfile( + _root / "dir1" / "dir20_link" / "file3", + _local_download / "dir1" / "dir20_link" / "file3", + ) + # End of GET block + + # ASSERT block: tree should be copied recursively + assert Path(_local_download / "file1").read_text() == "file1" + assert Path(_local_download / ".hidden").read_text() == ".hidden" + assert Path(_local_download / "dir1" / "file2").read_text() == "file2" + assert Path(_local_download / "dir2" / "file3").read_text() == "file3" + # symlink should be followed + assert Path(_local_download / "dir1" / "file1_link").read_text() == "file1" + assert Path(_local_download / "dir1" / "dir2_link" / "file3").read_text() == "file3" + assert Path(_local_download / "dir1" / "file10_link").read_text() == "file1" + assert ( + Path(_local_download / "dir1" / "dir20_link" / "file3").read_text() == "file3" + ) + assert not transport._cwd.joinpath(_root / "dir1" / "file1_link").is_symlink() + assert not transport._cwd.joinpath( + _root / "dir1" / "dir2_link" / "file3" + ).is_symlink() + assert not transport._cwd.joinpath(_root / "dir1" / "file10_link").is_symlink() + assert not transport._cwd.joinpath( + _root / "dir1" / "dir20_link" / "file3" + ).is_symlink() + # End of ASSERT block + + +@pytest.mark.timeout(120) +@pytest.mark.parametrize("payoff", [True, False]) +@pytest.mark.usefixtures("aiida_profile_clean") +def test_gettree(firecrest_computer: orm.Computer, tmpdir: Path, payoff: bool): + """ + This test is to check `gettree` through both recursive transfer and tar mode. + payoff= False would get files one by one. + payoff= True would use the tar mode. + Note: this test depends on a functional putfile() method, for consistency with the real server tests. + Before running this test, make sure putfile() is working, which is tested in `test_putfile_getfile`. + """ + transport = firecrest_computer.get_transport() + transport.payoff_override = payoff + + # Note: + # SSH transport behaviour, abc is a directory + # transport.get('somepath/abc', 'someremotepath/') == transport.get('somepath/abc', 'someremotepath') + # transport.get('somepath/abc', 'someremotepath/') == transport.get('somepath/abc/', 'someremotepath/') + # transport.get('someremotepath/abc', 'somepath/abc')--> if abc exist, create abc inside it ('somepath/abc/abc') + # transport.get('someremotepath/abc', 'somepath/abc')--> if abc noexist,create abc inside it ('somepath/abc') + # transport.get('somepath/abc', 'someremotepath/6889/abc') --> create everything, make_parent = True + tmpdir_remote = transport._temp_directory + _remote = tmpdir_remote / "remotedir" + _remote.mkdir() + _local = tmpdir / "localdir" + _local_for_upload = tmpdir / "forupload" + _local.mkdir() + _local_for_upload.mkdir() + + # a typical tree with symlinks + Path(_local_for_upload / "file1").write_text("file1") + Path(_local_for_upload / ".hidden").write_text(".hidden") + Path(_local_for_upload / "dir1").mkdir() + Path(_local_for_upload / "dir1" / "file2").write_text("file2") + Path(_local_for_upload / "dir2").mkdir() + Path(_local_for_upload / "dir2" / "file3").write_text("file3") + transport.putfile(_local_for_upload / "file1", _remote / "file1") + transport.putfile(_local_for_upload / ".hidden", _remote / ".hidden") + transport.mkdir(_remote / "dir1") + transport.mkdir(_remote / "dir2") + transport.putfile(_local_for_upload / "dir1" / "file2", _remote / "dir1" / "file2") + transport.putfile(_local_for_upload / "dir2" / "file3", _remote / "dir2" / "file3") + + transport.symlink(_remote / "file1", _remote / "dir1" / "file1_link") + transport.symlink(_remote / "dir2", _remote / "dir1" / "dir2_link") + # I cannot create & check relative links, because we don't have access on the server side + # os.symlink(Path("../file1"), _local_for_upload / "dir1" / "file10_link") + # os.symlink(Path("../dir2"), _local_for_upload / "dir1" / "dir20_link") + + # raise if remote file does not exist + with pytest.raises(OSError): + transport.gettree(_remote / "does_not_exist", _local) + + # raise if local is a file + Path(tmpdir / "isfile").touch() + with pytest.raises(OSError): + transport.gettree(_remote, tmpdir / "isfile") + + # raise if localpath is relative + with pytest.raises(ValueError): + transport.gettree(_remote, Path(_local).relative_to(tmpdir)) + + # If destination directory does not exists, AiiDA expects from transport to make a new path as root not _remote.name + transport.gettree(_remote, _local / "newdir") + _root = _local / "newdir" + # tree should be copied recursively + assert Path(_root / "file1").read_text() == "file1" + assert Path(_root / ".hidden").read_text() == ".hidden" + assert Path(_root / "dir1" / "file2").read_text() == "file2" + assert Path(_root / "dir2" / "file3").read_text() == "file3" + # symlink should be followed + assert not Path(_root / "dir1" / "file1_link").is_symlink() + assert not Path(_root / "dir1" / "dir2_link" / "file3").is_symlink() + assert Path(_root / "dir1" / "file1_link").read_text() == "file1" + assert Path(_root / "dir1" / "dir2_link" / "file3").read_text() == "file3" + + # If destination directory does exists, AiiDA expects transport make _remote.name and write into it + # however this might have changed in the newer versions of AiiDA ~ 2.6.0 (IDK) + transport.gettree(_remote, _local / "newdir") + _root = _local / "newdir" / Path(_remote).name + # tree should be copied recursively + assert Path(_root / "file1").read_text() == "file1" + assert Path(_root / ".hidden").read_text() == ".hidden" + assert Path(_root / "dir1" / "file2").read_text() == "file2" + assert Path(_root / "dir2" / "file3").read_text() == "file3" + # symlink should be followed + assert not Path(_root / "dir1" / "file1_link").is_symlink() + assert not Path(_root / "dir1" / "dir2_link" / "file3").is_symlink() + assert Path(_root / "dir1" / "file1_link").read_text() == "file1" + assert Path(_root / "dir1" / "dir2_link" / "file3").read_text() == "file3" + + +@pytest.mark.parametrize("to_test", ["copy", "copytree"]) +@pytest.mark.usefixtures("aiida_profile_clean") +def test_copy(firecrest_computer: orm.Computer, tmpdir: Path, to_test: str): + """ + This test is to check `copy` and `copytree`, + `copyfile` is that is only for files, and it is tested in `test_putfile_getfile`. + Also raising errors are somewhat method specific. + Note: this test depends on functional getfile() and putfile() methods, for consistency with the real server tests. + Before running this test, make sure `test_putfile_getfile` has passed. + """ + transport = firecrest_computer.get_transport() + if to_test == "copy": + testing = transport.copy + elif to_test == "copytree": + testing = transport.copytree + + tmpdir_remote = transport._temp_directory + _remote_1 = tmpdir_remote / "remotedir_1" + _remote_2 = tmpdir_remote / "remotedir_2" + _remote_1.mkdir() + _remote_2.mkdir() + _for_upload = tmpdir + + # raise if source or destination does not exist + with pytest.raises(FileNotFoundError): + testing(_remote_1 / "does_not_exist", _remote_2) + with pytest.raises(FileNotFoundError): + testing(_remote_1, _remote_2 / "does_not_exist") + + # a typical tree with symlinks to a file and a folder + Path(_for_upload / "file1").write_text("file1") + Path(_for_upload / ".hidden").write_text(".hidden") + Path(_for_upload / "dir1").mkdir() + Path(_for_upload / "dir2").mkdir() + Path(_for_upload / "dir1" / "file2").write_text("file2") + Path(_for_upload / "dir2" / "file3").write_text("file3") + + transport.putfile(_for_upload / "file1", _remote_1 / "file1") + transport.putfile(_for_upload / ".hidden", _remote_1 / ".hidden") + transport.mkdir(_remote_1 / "dir1") + transport.mkdir(_remote_1 / "dir2") + transport.putfile(_for_upload / "dir1" / "file2", _remote_1 / "dir1" / "file2") + transport.putfile(_for_upload / "dir2" / "file3", _remote_1 / "dir2" / "file3") + transport.symlink(_remote_1 / "file1", _remote_1 / "dir1" / "file1_link") + transport.symlink(_remote_1 / "dir2", _remote_1 / "dir1" / "dir2_link") + # I cannot create & check relative links, because we don't have access on the server side + # os.symlink(Path("../file1"), _remote_1 / "dir1" / "file10_link") + # os.symlink(Path("../dir2"), _remote_1 / "dir1" / "dir20_link") + + testing(_remote_1, _remote_2) + + _root_2 = _remote_2 / Path(_remote_1).name + + # GET block: to retrieve the files we depend on a functional getfile(), + # this limitation is a price to pay for real server testing. + _local_download = tmpdir / "download1" + _local_download.mkdir() + transport.getfile(_root_2 / "file1", _local_download / "file1") + transport.getfile(_root_2 / ".hidden", _local_download / ".hidden") + Path(_local_download / "dir1").mkdir() + Path(_local_download / "dir2").mkdir() + transport.getfile(_root_2 / "dir1" / "file2", _local_download / "dir1" / "file2") + transport.getfile(_root_2 / "dir2" / "file3", _local_download / "dir2" / "file3") + # note links should have been dereferenced while uploading + transport.getfile( + _root_2 / "dir1" / "file1_link", _local_download / "dir1" / "file1_link" + ) + Path(_local_download / "dir1" / "dir2_link").mkdir() + # TODO: The following is not working as expected when testing against a real server, see open issue on FirecREST: + # https://github.com/eth-cscs/firecrest/issues/205 + transport.getfile( + _root_2 / "dir1" / "dir2_link" / "file3", + _local_download / "dir1" / "dir2_link" / "file3", ) - assert not transport.path_exists(firecrest_server.scratch_path + "/file.txt") - assert transport.path_exists(firecrest_server.scratch_path + "/file2.txt") + # End of GET block + + # ASSERT block: tree should be copied recursively symlink should be followed + assert Path(_local_download / "dir1").exists() + assert Path(_local_download / "dir2").exists() + assert Path(_local_download / "file1").read_text() == "file1" + assert Path(_local_download / ".hidden").read_text() == ".hidden" + assert Path(_local_download / "dir1" / "file2").read_text() == "file2" + assert Path(_local_download / "dir2" / "file3").read_text() == "file3" + assert Path(_local_download / "dir1" / "dir2_link").exists() + assert Path(_local_download / "dir1" / "file1_link").read_text() == "file1" + assert Path(_local_download / "dir1" / "dir2_link" / "file3").read_text() == "file3" + assert transport._cwd.joinpath(_root_2 / "dir1" / "file1_link").is_symlink() + assert transport._cwd.joinpath(_root_2 / "dir1" / "dir2_link").is_symlink() + + # End of ASSERT block + + # raise if source is inappropriate + if to_test == "copytree": + with pytest.raises(ValueError): + testing(_remote_1 / "file1", _remote_2) + + +@pytest.mark.usefixtures("aiida_profile_clean") +def test_copyfile(firecrest_computer: orm.Computer, tmpdir: Path): + """ + Note: this test depends on functional getfile() and putfile() methods, for consistency with the real server tests. + Before running this test, make sure `test_putfile_getfile` has passed. + """ + transport = firecrest_computer.get_transport() + testing = transport.copyfile + + tmpdir_remote = transport._temp_directory + _remote_1 = tmpdir_remote / "remotedir_1" + _remote_2 = tmpdir_remote / "remotedir_2" + _for_upload = tmpdir / "forUpload" + _for_download = tmpdir / "forDownload" + _for_upload.mkdir() + _for_download.mkdir() + _remote_1.mkdir() + _remote_2.mkdir() + + # raise if source or destination does not exist + with pytest.raises(FileNotFoundError): + testing(_remote_1 / "does_not_exist", _remote_2) + # in this case don't raise and just create the file + Path(_for_upload / "_").touch() + transport.putfile(_for_upload / "_", _remote_1 / "_") + testing(_remote_1 / "_", _remote_2 / "does_not_exist") + + # raise if source is not a file + with pytest.raises(ValueError): + testing(_remote_1, _remote_2) + + # main functionality, including symlinks + Path(_for_upload / "file1").write_text("file1") + Path(_for_upload / ".hidden").write_text(".hidden") + Path(_for_upload / "notfile1").write_text("notfile1") + transport.putfile(_for_upload / "file1", _remote_1 / "file1") + transport.putfile(_for_upload / ".hidden", _remote_1 / ".hidden") + transport.putfile(_for_upload / "notfile1", _remote_1 / "notfile1") + transport.symlink(_remote_1 / "file1", _remote_1 / "file1_link") + + # write where I tell you to + testing(_remote_1 / "file1", _remote_2 / "file1") + transport.getfile(_remote_2 / "file1", _for_download / "file1") + assert Path(_for_download / "file1").read_text() == "file1" + + # always overwrite + testing(_remote_1 / "notfile1", _remote_2 / "file1") + transport.getfile(_remote_2 / "file1", _for_download / "file1-prime") + assert Path(_for_download / "file1-prime").read_text() == "notfile1" + + # don't skip hidden files + testing(_remote_1 / ".hidden", _remote_2 / ".hidden-prime") + transport.getfile(_remote_2 / ".hidden-prime", _for_download / ".hidden-prime") + assert Path(_for_download / ".hidden-prime").read_text() == ".hidden" + + # preserve links and don't follow them + testing(_remote_1 / "file1_link", _remote_2 / "file1_link") + assert transport._cwd.joinpath(_remote_2 / "file1_link").is_symlink() + transport.getfile(_remote_2 / "file1_link", _for_download / "file1_link") + assert Path(_for_download / "file1_link").read_text() == "file1"