From e7bbfa052103ff9dc56e31f3949bed3de1b86884 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Tue, 3 Sep 2024 16:57:18 +0200 Subject: [PATCH 01/43] Add audbackend.backend.MinIO --- audbackend/backend/__init__.py | 4 + audbackend/core/backend/minio.py | 321 ++++++++++++++++++++++++++++ docs/api-src/audbackend.backend.rst | 1 + 3 files changed, 326 insertions(+) create mode 100644 audbackend/core/backend/minio.py diff --git a/audbackend/backend/__init__.py b/audbackend/backend/__init__.py index 06be3a6a..ac4d3b51 100644 --- a/audbackend/backend/__init__.py +++ b/audbackend/backend/__init__.py @@ -6,3 +6,7 @@ from audbackend.core.backend.artifactory import Artifactory except ImportError: # pragma: no cover pass +try: + from audbackend.core.backend.minio import MinIO +except ImportError: # pragma: no cover + pass diff --git a/audbackend/core/backend/minio.py b/audbackend/core/backend/minio.py new file mode 100644 index 00000000..46ecd36a --- /dev/null +++ b/audbackend/core/backend/minio.py @@ -0,0 +1,321 @@ +import configparser +import os +import typing + +import minio + +import audeer + +from audbackend.core import utils +from audbackend.core.backend.base import Base + + +class MinIO(Base): + r"""Backend for MinIO. + + Args: + host: host address + repository: repository name + authentication: username, password / API key / access token tuple. + If ``None``, + it requests it by calling :meth:`get_authentication` + + """ # noqa: E501 + + def __init__( + self, + host: str, + repository: str, + *, + authentication: typing.Tuple[str, str] = None, + ): + super().__init__(host, repository, authentication=authentication) + + if authentication is None: + self.authentication = self.get_authentication(host) + + # Open MinIO client + self._client = minio.MinIO( + host, + access_key=self.authentication[0], + secret_key=self.authentication[1], + secure=False, # change later + ) + + @classmethod + def get_authentication(cls, host: str) -> typing.Tuple[str, str]: + """Username and password/access token for given host. + + Returns a authentication for MinIO server + as tuple. + + To get the authentication tuple, + the function looks first + for the two environment variables + ``MINIO_ACCESS_KEY`` and + ``MINIO_SECRET_KEY``. + Otherwise, + it tries to extract missing values + from a config file. + The default path of the config file + (:file:`~/.config/audbackend/minio.cfg`) + can be overwritten with the environment variable + ``MINIO_CONFIG_FILE``. + If no config file exists + or if it does not contain an + entry for ``access_key``, + the access key is set to ``"anonymous"`` + and the secret key to an empty string. + + Args: + host: hostname + + Returns: + access token tuple + + """ + access_key = os.getenv("MINIO_ACCESS_KEY", None) + secret_key = os.getenv("MINIO_SECRET_KEY", None) + config_file = os.getenv("MINIO_CONFIG_FILE", "~/.config/audbackend/minio.cfg") + config_file = audeer.path(config_file) + + if os.path.exists(config_file) and (access_key is None or secret_key is None): + config = configparser.ConfigParser(allow_no_value=True) + config.read(config_file) + if access_key is None: + access_key = config.get("minio", "access_key") + if secret_key is None: + secret_key = config.get("minio", "secret_key") + + return access_key, secret_key + + def _checksum( + self, + path: str, + ) -> str: + r"""MD5 checksum of file on backend.""" + path = self.path(path) + checksum = self._client.stat_object(self.repository, path).etag + return checksum + + def _close( + self, + ): + r"""Close connection to repository. + + An error should be raised, + if the connection to the backend + cannot be closed. + + """ + # At the moment, this is automatically handled. + + def _collapse( + self, + path, + ): + r"""Convert to virtual path. + + + -> + / + + """ + path = "/" + path + path = path.replace("/", self.sep) + return path + + def _copy_file( + self, + src_path: str, + dst_path: str, + verbose: bool, + ): + r"""Copy file on backend.""" + src_path = self.path(src_path) + dst_path = self.path(dst_path) + # This has a maximum size limit of 5GB. + stats = self._client.stat_object(self.repository, src_path) + if stats.size / 1024 / 1024 / 1024 > 5: + # TODO: implement copying with temporary file + pass + else: + self._client.copy_object( + self.repository, + src_path, + minio.commonconfig.CopySource(self.repository, dst_path), + ) + + def _create( + self, + ): + r"""Create repository.""" + if self._client.bucket_exists(self.repository): + utils.raise_file_exists_error(self.repository) + self._client.make_bucket(self.repository) + + def _date( + self, + path: str, + ) -> str: + r"""Get last modification date of file on backend.""" + path = self.path(path) + date = self.client.stat_object(self.repository, path).last_modified + date = utils.date_format(date) + return date + + def _delete( + self, + ): + r"""Delete repository and all its content.""" + self._client.remove_bucket(self.repository) + + def _exists( + self, + path: str, + ) -> bool: + r"""Check if file exists on backend.""" + path = self.path(path) + try: + self._client.stat_object(self.repository, path) + except minio.error.S3Error: + return False + return True + + def _get_file( + self, + src_path: str, + dst_path: str, + verbose: bool, + ): + r"""Get file from backend.""" + src_path = self.path(src_path) + src_size = self._client.stat_object(self.repository, src_path).size + chunk = (4 * 1024,) + with audeer.progress_bar(total=src_size, disable=not verbose) as pbar: + desc = audeer.format_display_message( + "Download {}".format(os.path.basename(str(src_path))), + pbar=True, + ) + pbar.set_description_str(desc) + pbar.refresh() + + dst_size = 0 + try: + response = self.client.get_object(self.repository, src_path) + with open(dst_path, "wb") as dst_fp: + while src_size > dst_size: + data = response.read(chunk) + n_data = len(data) + if n_data > 0: + dst_fp.write(data) + dst_size += n_data + pbar.update(n_data) + finally: + response.close() + response.release_conn() + + def _ls( + self, + path: str, + ) -> typing.List[str]: + r"""List all files under sub-path.""" + if not self._exists(path): + return [] + + path = self.path(path) + objects = self._client.list_objects( + self.repository, + prefix=path, + recursive=True, + ) + paths = [self._collapse(obj.object_name) for obj in objects] + + return paths + + def _move_file( + self, + src_path: str, + dst_path: str, + verbose: bool, + ): + r"""Move file on backend.""" + src_path = self.path(src_path) + dst_path = self.path(dst_path) + if not dst_path.parent.exists(): + dst_path.parent.mkdir() + src_path.move(dst_path) + + def _open( + self, + ): + r"""Open connection to backend.""" + # At the moment, session management is handled automatically. + # If we want to manage this ourselves, + # we need to use the `http_client` argument of `minio.MinIO` + if not self._client.bucket_exists(self.repository): + utils.raise_file_not_found_error(self.repository) + + def _owner( + self, + path: str, + ) -> str: + r"""Get owner of file on backend.""" + path = self.path(path) + # TODO: owner seems to be empty, + # need to check if we have to manage this ourselves? + owner = self._client.stat_object(self.repository, path).owner_name + return owner + + def path( + self, + path: str, + ) -> str: + r"""Convert to backend path. + + This extends the relative ``path`` on the backend + by :attr:`host` and :attr:`repository`, + and returns an :class:`artifactory.ArtifactoryPath` object. + + Args: + path: path on backend + + Returns: + Artifactory path object + + """ + path = path.replace(self.sep, "/") + if path.startswith("/"): + # /path -> path + path = path[1:] + return path + + def _put_file( + self, + src_path: str, + dst_path: str, + checksum: str, + verbose: bool, + ): + r"""Put file to backend.""" + dst_path = self.path(dst_path) + if verbose: # pragma: no cover + desc = audeer.format_display_message( + f"Deploy {src_path}", + pbar=False, + ) + print(desc, end="\r") + + self._client.fput_object(self.repository, dst_path, src_path) + + if verbose: # pragma: no cover + # Clear progress line + print(audeer.format_display_message(" ", pbar=False), end="\r") + + def _remove_file( + self, + path: str, + ): + r"""Remove file from backend.""" + path = self.path(path) + self._client.remove_object(self.repository, path) diff --git a/docs/api-src/audbackend.backend.rst b/docs/api-src/audbackend.backend.rst index 221e81a4..629d51f0 100644 --- a/docs/api-src/audbackend.backend.rst +++ b/docs/api-src/audbackend.backend.rst @@ -14,6 +14,7 @@ backends are supported: Artifactory FileSystem + MinIO Users can implement their own backend by deriving from From cf340710770340d85346a8703787619919d7a76f Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Tue, 3 Sep 2024 17:01:09 +0200 Subject: [PATCH 02/43] Add dependency to minio --- pyproject.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 78643310..fd0feea3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,8 +39,12 @@ dynamic = ['version'] artifactory = [ 'dohq-artifactory >=0.10.0', ] +minio = [ + 'minio', +] all = [ 'dohq-artifactory >=0.10.0', + 'minio', ] From dd642d0971772e99a610fa12d8240ef2e0aef74c Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Wed, 4 Sep 2024 08:37:51 +0200 Subject: [PATCH 03/43] Rename MinIO to Minio --- audbackend/backend/__init__.py | 2 +- audbackend/core/backend/minio.py | 17 ++++++++--------- docs/api-src/audbackend.backend.rst | 2 +- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/audbackend/backend/__init__.py b/audbackend/backend/__init__.py index ac4d3b51..01397be9 100644 --- a/audbackend/backend/__init__.py +++ b/audbackend/backend/__init__.py @@ -7,6 +7,6 @@ except ImportError: # pragma: no cover pass try: - from audbackend.core.backend.minio import MinIO + from audbackend.core.backend.minio import Minio except ImportError: # pragma: no cover pass diff --git a/audbackend/core/backend/minio.py b/audbackend/core/backend/minio.py index 46ecd36a..036ecb91 100644 --- a/audbackend/core/backend/minio.py +++ b/audbackend/core/backend/minio.py @@ -10,7 +10,7 @@ from audbackend.core.backend.base import Base -class MinIO(Base): +class Minio(Base): r"""Backend for MinIO. Args: @@ -35,7 +35,7 @@ def __init__( self.authentication = self.get_authentication(host) # Open MinIO client - self._client = minio.MinIO( + self._client = minio.Minio( host, access_key=self.authentication[0], secret_key=self.authentication[1], @@ -62,10 +62,9 @@ def get_authentication(cls, host: str) -> typing.Tuple[str, str]: can be overwritten with the environment variable ``MINIO_CONFIG_FILE``. If no config file exists - or if it does not contain an - entry for ``access_key``, - the access key is set to ``"anonymous"`` - and the secret key to an empty string. + or if it has missing entries, + ``None`` is returned + for the missing entries. Args: host: hostname @@ -83,9 +82,9 @@ def get_authentication(cls, host: str) -> typing.Tuple[str, str]: config = configparser.ConfigParser(allow_no_value=True) config.read(config_file) if access_key is None: - access_key = config.get("minio", "access_key") + access_key = config.get(host, "access_key", fallback=None) if secret_key is None: - secret_key = config.get("minio", "secret_key") + secret_key = config.get(host, "secret_key", fallback=None) return access_key, secret_key @@ -252,7 +251,7 @@ def _open( r"""Open connection to backend.""" # At the moment, session management is handled automatically. # If we want to manage this ourselves, - # we need to use the `http_client` argument of `minio.MinIO` + # we need to use the `http_client` argument of `minio.Minio` if not self._client.bucket_exists(self.repository): utils.raise_file_not_found_error(self.repository) diff --git a/docs/api-src/audbackend.backend.rst b/docs/api-src/audbackend.backend.rst index 629d51f0..3075e3f8 100644 --- a/docs/api-src/audbackend.backend.rst +++ b/docs/api-src/audbackend.backend.rst @@ -14,7 +14,7 @@ backends are supported: Artifactory FileSystem - MinIO + Minio Users can implement their own backend by deriving from From 40a26543b6978820f0fe85effa95438810595543 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Wed, 4 Sep 2024 09:23:42 +0200 Subject: [PATCH 04/43] Fix Minio.delete() to first remove objects --- audbackend/core/backend/minio.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/audbackend/core/backend/minio.py b/audbackend/core/backend/minio.py index 036ecb91..f3a23e68 100644 --- a/audbackend/core/backend/minio.py +++ b/audbackend/core/backend/minio.py @@ -167,6 +167,11 @@ def _delete( self, ): r"""Delete repository and all its content.""" + # Delete all objects in bucket + objects = self._client.list_objects(self.repository, recursive=True) + for obj in objects: + self._client.remove_object(self.repository, obj.object_name) + # Delete bucket self._client.remove_bucket(self.repository) def _exists( From 64941c5ebd557ebae97a197ef895f592d23c85e9 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Wed, 4 Sep 2024 09:24:09 +0200 Subject: [PATCH 05/43] Let Minio.exists() return True for "/" --- audbackend/core/backend/minio.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/audbackend/core/backend/minio.py b/audbackend/core/backend/minio.py index f3a23e68..073a5565 100644 --- a/audbackend/core/backend/minio.py +++ b/audbackend/core/backend/minio.py @@ -180,6 +180,9 @@ def _exists( ) -> bool: r"""Check if file exists on backend.""" path = self.path(path) + if path == "": + # Return True for root + return True try: self._client.stat_object(self.repository, path) except minio.error.S3Error: From fe4b88d25203357a87ca2757c02f0198fca564fb Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Wed, 4 Sep 2024 09:25:10 +0200 Subject: [PATCH 06/43] Fix Minio.ls() for non-existing paths --- audbackend/core/backend/minio.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/audbackend/core/backend/minio.py b/audbackend/core/backend/minio.py index 073a5565..bec8fdaa 100644 --- a/audbackend/core/backend/minio.py +++ b/audbackend/core/backend/minio.py @@ -227,9 +227,6 @@ def _ls( path: str, ) -> typing.List[str]: r"""List all files under sub-path.""" - if not self._exists(path): - return [] - path = self.path(path) objects = self._client.list_objects( self.repository, From 054174e4532d54c187cdde1cb20f6944d0bb0d85 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Wed, 4 Sep 2024 09:25:39 +0200 Subject: [PATCH 07/43] Add tests using a local MinIO server --- tests/conftest.py | 7 +- tests/test_backend_minio.py | 213 ++++++++++++++++++++++++++++++++++++ 2 files changed, 219 insertions(+), 1 deletion(-) create mode 100644 tests/test_backend_minio.py diff --git a/tests/conftest.py b/tests/conftest.py index 3ea04592..61a1d44e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -33,6 +33,7 @@ def hosts(tmpdir_factory): # like audbackend.access() "artifactory": "https://audeering.jfrog.io/artifactory", "file-system": str(tmpdir_factory.mktemp("host")), + "minio": "localhost:9000", "single-folder": str(tmpdir_factory.mktemp("host")), } @@ -76,14 +77,18 @@ def interface(tmpdir_factory, request): """ backend_cls, interface_cls = request.param + artifactory = False if ( hasattr(audbackend.backend, "Artifactory") and backend_cls == audbackend.backend.Artifactory ): artifactory = True host = "https://audeering.jfrog.io/artifactory" + elif ( + hasattr(audbackend.backend, "Minio") and backend_cls == audbackend.backend.Minio + ): + host = "localhost:9000" else: - artifactory = False host = str(tmpdir_factory.mktemp("host")) repository = f"unittest-{pytest.UID}-{audeer.uid()[:8]}" diff --git a/tests/test_backend_minio.py b/tests/test_backend_minio.py new file mode 100644 index 00000000..8a624f5a --- /dev/null +++ b/tests/test_backend_minio.py @@ -0,0 +1,213 @@ +import os + +import pytest + +import audeer + +import audbackend + + +@pytest.fixture(scope="function", autouse=False) +def hide_credentials(): + defaults = {} + + for key in [ + "MINIO_ACCESS_KEY", + "MINIO_SECRET_KEY", + "MINIO_CONFIG_FILE", + ]: + defaults[key] = os.environ.get(key, None) + + for key, value in defaults.items(): + if value is not None: + del os.environ[key] + + yield + + for key, value in defaults.items(): + if value is not None: + os.environ[key] = value + elif key in os.environ: + del os.environ[key] + + +def test_authentication(tmpdir, hosts, hide_credentials): + host = hosts["minio"] + config_path = audeer.path(tmpdir, "config.cfg") + os.environ["MINIO_CONFIG_FILE"] = config_path + + # config file does not exist + + backend = audbackend.backend.Minio(host, "repository") + assert backend.authentication == (None, None) + + # config file is empty + + audeer.touch(config_path) + backend = audbackend.backend.Minio(host, "repository") + assert backend.authentication == (None, None) + + # config file entry without username and password + + with open(config_path, "w") as fp: + fp.write(f"[{host}]\n") + + backend = audbackend.backend.Minio(host, "repository") + assert backend.authentication == (None, None) + + # config file entry with username and password + + access_key = "bad" + secret_key = "bad" + with open(config_path, "w") as fp: + fp.write(f"[{host}]\n") + fp.write(f"access_key = {access_key}\n") + fp.write(f"secret_key = {secret_key}\n") + + backend = audbackend.backend.Minio(host, "repository") + assert backend.authentication == ("bad", "bad") + with pytest.raises(audbackend.BackendError): + backend.open() + + +@pytest.mark.parametrize("host", ["localhost:9000"]) +@pytest.mark.parametrize("repository", [f"unittest-{pytest.UID}-{audeer.uid()[:8]}"]) +def test_create_delete_repositories(host, repository): + audbackend.backend.Minio.create(host, repository) + audbackend.backend.Minio.delete(host, repository) + + +@pytest.mark.parametrize("host", ["localhost:9000"]) +@pytest.mark.parametrize("repository", [f"unittest-{pytest.UID}-{audeer.uid()[:8]}"]) +@pytest.mark.parametrize("authentication", [("bad-access", "bad-secret")]) +def test_errors(host, repository, authentication): + backend = audbackend.backend.Minio(host, repository, authentication=authentication) + with pytest.raises(audbackend.BackendError): + backend.open() + + +@pytest.mark.parametrize( + "interface", + [(audbackend.backend.Minio, audbackend.interface.Maven)], + indirect=True, +) +@pytest.mark.parametrize( + "file, version, extensions, regex, expected", + [ + ( + "/file.tar.gz", + "1.0.0", + [], + False, + "/file.tar/1.0.0/file.tar-1.0.0.gz", + ), + ( + "/file.tar.gz", + "1.0.0", + ["tar.gz"], + False, + "/file/1.0.0/file-1.0.0.tar.gz", + ), + ( + "/.tar.gz", + "1.0.0", + ["tar.gz"], + False, + "/.tar/1.0.0/.tar-1.0.0.gz", + ), + ( + "/tar.gz", + "1.0.0", + ["tar.gz"], + False, + "/tar/1.0.0/tar-1.0.0.gz", + ), + ( + "/.tar.gz", + "1.0.0", + [], + False, + "/.tar/1.0.0/.tar-1.0.0.gz", + ), + ( + "/.tar", + "1.0.0", + [], + False, + "/.tar/1.0.0/.tar-1.0.0", + ), + ( + "/tar", + "1.0.0", + [], + False, + "/tar/1.0.0/tar-1.0.0", + ), + # test regex + ( + "/file.0.tar.gz", + "1.0.0", + [r"\d+.tar.gz"], + False, + "/file.0.tar/1.0.0/file.0.tar-1.0.0.gz", + ), + ( + "/file.0.tar.gz", + "1.0.0", + [r"\d+.tar.gz"], + True, + "/file/1.0.0/file-1.0.0.0.tar.gz", + ), + ( + "/file.99.tar.gz", + "1.0.0", + [r"\d+.tar.gz"], + True, + "/file/1.0.0/file-1.0.0.99.tar.gz", + ), + ( + "/file.prediction.99.tar.gz", + "1.0.0", + [r"prediction.\d+.tar.gz", r"truth.tar.gz"], + True, + "/file/1.0.0/file-1.0.0.prediction.99.tar.gz", + ), + ( + "/file.truth.tar.gz", + "1.0.0", + [r"prediction.\d+.tar.gz", r"truth.tar.gz"], + True, + "/file/1.0.0/file-1.0.0.truth.tar.gz", + ), + ( + "/file.99.tar.gz", + "1.0.0", + [r"(\d+.)?tar.gz"], + True, + "/file/1.0.0/file-1.0.0.99.tar.gz", + ), + ( + "/file.tar.gz", + "1.0.0", + [r"(\d+.)?tar.gz"], + True, + "/file/1.0.0/file-1.0.0.tar.gz", + ), + ], +) +def test_maven_file_structure( + tmpdir, interface, file, version, extensions, regex, expected +): + interface.extensions = extensions + interface.regex = regex + + src_path = audeer.touch(audeer.path(tmpdir, "tmp")) + interface.put_file(src_path, file, version) + + url = str(interface.backend.path(expected)) + url_expected = str( + interface.backend.path(interface._path_with_version(file, version)) + ) + assert url_expected == url + assert interface.ls(file) == [(file, version)] + assert interface.ls() == [(file, version)] From d19d390b1dc5f47929fd7a519e530e3e0c1125cb Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Wed, 4 Sep 2024 12:15:08 +0200 Subject: [PATCH 08/43] Expand tests to 100% code coverage --- tests/conftest.py | 5 ++++ tests/test_api.py | 6 +++++ tests/test_backend_minio.py | 38 +++++++++++++++++++++++++++++ tests/test_interface_unversioned.py | 9 +++++++ 4 files changed, 58 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index 61a1d44e..ac0d99a6 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -47,6 +47,11 @@ def owner(request): and backend_cls == audbackend.backend.Artifactory ): owner = backend_cls.get_authentication("audeering.jfrog.io/artifactory")[0] + elif ( + hasattr(audbackend.backend, "Minio") and backend_cls == audbackend.backend.Minio + ): + # There seems to be a MinIO bug here + owner = None else: if os.name == "nt": owner = "Administrators" diff --git a/tests/test_api.py b/tests/test_api.py index 1efb1d81..584c3d3d 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -20,6 +20,12 @@ f"unittest-{audeer.uid()[:8]}", audbackend.backend.Artifactory, ), + ( + "minio", + "minio", + f"unittest-{audeer.uid()[:8]}", + audbackend.backend.Minio, + ), pytest.param( # backend does not exist "bad-backend", None, diff --git a/tests/test_backend_minio.py b/tests/test_backend_minio.py index 8a624f5a..fa2e52c6 100644 --- a/tests/test_backend_minio.py +++ b/tests/test_backend_minio.py @@ -70,6 +70,44 @@ def test_authentication(tmpdir, hosts, hide_credentials): backend.open() +@pytest.mark.parametrize( + "interface", + [(audbackend.backend.Minio, audbackend.interface.Unversioned)], + indirect=True, +) +@pytest.mark.parametrize( + "src_path, dst_path,", + [ + ( + "/big.1.txt", + "/big.2.txt", + ), + ], +) +def test_copy_large_file(tmpdir, interface, src_path, dst_path): + r"""Test copying of large files. + + ``minio.Minio.copy_object()`` has a limit of 5 GB. + We mock the ``audbackend.backend.Minio._size()`` method + to return a value of ``5.01`` + to trigger the fall back copy mechanism for large files, + without having to create a large file. + + Args: + tmpdir: tmpdir fixture + interface: interface fixture + src_path: source path of file on backend + dst_path: destination of copy operation on backend + + """ + tmp_path = audeer.touch(audeer.path(tmpdir, "big.1.txt")) + interface.put_file(tmp_path, src_path) + interface._backend._size = lambda x: 5.01 * 1024 * 1024 * 1024 + interface.copy_file(src_path, dst_path) + assert interface.exists(src_path) + assert interface.exists(dst_path) + + @pytest.mark.parametrize("host", ["localhost:9000"]) @pytest.mark.parametrize("repository", [f"unittest-{pytest.UID}-{audeer.uid()[:8]}"]) def test_create_delete_repositories(host, repository): diff --git a/tests/test_interface_unversioned.py b/tests/test_interface_unversioned.py index 81a7c496..a5fa791b 100644 --- a/tests/test_interface_unversioned.py +++ b/tests/test_interface_unversioned.py @@ -91,6 +91,7 @@ def tree(tmpdir, request): [ (audbackend.backend.Artifactory, audbackend.interface.Unversioned), (audbackend.backend.FileSystem, audbackend.interface.Unversioned), + (audbackend.backend.Minio, audbackend.interface.Unversioned), (SingleFolder, audbackend.interface.Unversioned), ], indirect=True, @@ -172,6 +173,7 @@ def test_archive(tmpdir, tree, archive, files, tmp_root, interface, expected): [ (audbackend.backend.Artifactory, audbackend.interface.Unversioned), (audbackend.backend.FileSystem, audbackend.interface.Unversioned), + (audbackend.backend.Minio, audbackend.interface.Unversioned), (SingleFolder, audbackend.interface.Unversioned), ], indirect=True, @@ -557,6 +559,7 @@ def test_errors(tmpdir, interface): [ (audbackend.backend.Artifactory, audbackend.interface.Unversioned), (audbackend.backend.FileSystem, audbackend.interface.Unversioned), + (audbackend.backend.Minio, audbackend.interface.Unversioned), (SingleFolder, audbackend.interface.Unversioned), ], indirect=True, @@ -602,6 +605,10 @@ def test_exists(tmpdir, path, interface): (audbackend.backend.FileSystem, audbackend.interface.Unversioned), audbackend.backend.FileSystem, ), + ( + (audbackend.backend.Minio, audbackend.interface.Unversioned), + audbackend.backend.Minio, + ), ( (SingleFolder, audbackend.interface.Unversioned), SingleFolder, @@ -636,6 +643,7 @@ def test_file(tmpdir, src_path, dst_path, owner, interface): [ (audbackend.backend.Artifactory, audbackend.interface.Unversioned), (audbackend.backend.FileSystem, audbackend.interface.Unversioned), + (audbackend.backend.Minio, audbackend.interface.Unversioned), (SingleFolder, audbackend.interface.Unversioned), ], indirect=True, @@ -710,6 +718,7 @@ def test_ls(tmpdir, interface): [ (audbackend.backend.Artifactory, audbackend.interface.Unversioned), (audbackend.backend.FileSystem, audbackend.interface.Unversioned), + (audbackend.backend.Minio, audbackend.interface.Unversioned), (SingleFolder, audbackend.interface.Unversioned), ], indirect=True, From 07b34df287ba5918a83009bab248242779247507 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Wed, 4 Sep 2024 12:15:17 +0200 Subject: [PATCH 09/43] Fix remaining issues --- audbackend/core/api.py | 8 ++++++ audbackend/core/backend/minio.py | 42 ++++++++++++++++++-------------- 2 files changed, 32 insertions(+), 18 deletions(-) diff --git a/audbackend/core/api.py b/audbackend/core/api.py index 890fa65b..428c8ea5 100644 --- a/audbackend/core/api.py +++ b/audbackend/core/api.py @@ -233,3 +233,11 @@ def register( register("artifactory", Artifactory) except ImportError: # pragma: no cover pass + + # Register optional backends + try: + from audbackend.core.backend.minio import Minio + + register("minio", Minio) + except ImportError: # pragma: no cover + pass diff --git a/audbackend/core/backend/minio.py b/audbackend/core/backend/minio.py index bec8fdaa..84459183 100644 --- a/audbackend/core/backend/minio.py +++ b/audbackend/core/backend/minio.py @@ -1,5 +1,6 @@ import configparser import os +import tempfile import typing import minio @@ -133,16 +134,18 @@ def _copy_file( r"""Copy file on backend.""" src_path = self.path(src_path) dst_path = self.path(dst_path) - # This has a maximum size limit of 5GB. - stats = self._client.stat_object(self.repository, src_path) - if stats.size / 1024 / 1024 / 1024 > 5: - # TODO: implement copying with temporary file - pass + # `copy_object()` has a maximum size limit of 5GB. + if self._size(src_path) / 1024 / 1024 / 1024 > 5: + with tempfile.TemporaryDirectory() as tmp_dir: + tmp_path = audeer.path(tmp_dir, os.path.basename(src_path)) + self._get_file(src_path, tmp_path, verbose) + checksum = self._checksum(src_path) + self._put_file(tmp_path, dst_path, checksum, verbose) else: self._client.copy_object( self.repository, - src_path, - minio.commonconfig.CopySource(self.repository, dst_path), + dst_path, + minio.commonconfig.CopySource(self.repository, src_path), ) def _create( @@ -159,7 +162,7 @@ def _date( ) -> str: r"""Get last modification date of file on backend.""" path = self.path(path) - date = self.client.stat_object(self.repository, path).last_modified + date = self._client.stat_object(self.repository, path).last_modified date = utils.date_format(date) return date @@ -180,9 +183,6 @@ def _exists( ) -> bool: r"""Check if file exists on backend.""" path = self.path(path) - if path == "": - # Return True for root - return True try: self._client.stat_object(self.repository, path) except minio.error.S3Error: @@ -198,7 +198,7 @@ def _get_file( r"""Get file from backend.""" src_path = self.path(src_path) src_size = self._client.stat_object(self.repository, src_path).size - chunk = (4 * 1024,) + chunk = 4 * 1024 with audeer.progress_bar(total=src_size, disable=not verbose) as pbar: desc = audeer.format_display_message( "Download {}".format(os.path.basename(str(src_path))), @@ -209,7 +209,7 @@ def _get_file( dst_size = 0 try: - response = self.client.get_object(self.repository, src_path) + response = self._client.get_object(self.repository, src_path) with open(dst_path, "wb") as dst_fp: while src_size > dst_size: data = response.read(chunk) @@ -244,11 +244,8 @@ def _move_file( verbose: bool, ): r"""Move file on backend.""" - src_path = self.path(src_path) - dst_path = self.path(dst_path) - if not dst_path.parent.exists(): - dst_path.parent.mkdir() - src_path.move(dst_path) + self._copy_file(src_path, dst_path, verbose) + self._remove_file(src_path) def _open( self, @@ -323,3 +320,12 @@ def _remove_file( r"""Remove file from backend.""" path = self.path(path) self._client.remove_object(self.repository, path) + + def _size( + self, + path: str, + ) -> str: + r"""Get size of file on backend.""" + path = self.path(path) + size = self._client.stat_object(self.repository, path).size + return size From 3ed2685777bc1d5d9dcfd3c345233dfa1c9279d5 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Wed, 4 Sep 2024 12:18:10 +0200 Subject: [PATCH 10/43] Update docstrings --- audbackend/core/backend/minio.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/audbackend/core/backend/minio.py b/audbackend/core/backend/minio.py index 84459183..ea7dd684 100644 --- a/audbackend/core/backend/minio.py +++ b/audbackend/core/backend/minio.py @@ -17,7 +17,7 @@ class Minio(Base): Args: host: host address repository: repository name - authentication: username, password / API key / access token tuple. + authentication: username, password / access key, secret key token tuple. If ``None``, it requests it by calling :meth:`get_authentication` @@ -45,7 +45,7 @@ def __init__( @classmethod def get_authentication(cls, host: str) -> typing.Tuple[str, str]: - """Username and password/access token for given host. + """Access and secret tokens for given host. Returns a authentication for MinIO server as tuple. @@ -274,15 +274,11 @@ def path( ) -> str: r"""Convert to backend path. - This extends the relative ``path`` on the backend - by :attr:`host` and :attr:`repository`, - and returns an :class:`artifactory.ArtifactoryPath` object. - Args: path: path on backend Returns: - Artifactory path object + path """ path = path.replace(self.sep, "/") From 77b3a14292ccf92235f674e9e6e3e90803f886fe Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Wed, 4 Sep 2024 12:55:12 +0200 Subject: [PATCH 11/43] Define constant hosts values at single position --- tests/conftest.py | 17 ++++++++++++----- tests/test_backend_artifactory.py | 4 ++-- tests/test_backend_minio.py | 4 ++-- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index ac0d99a6..d4678a90 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -15,6 +15,12 @@ # unittest-- pytest.UID = audeer.uid()[:8] +# Define static hosts +pytest.HOSTS = { + "artifactory": "https://audeering.jfrog.io/artifactory", + "minio": "localhost:9000", +} + @pytest.fixture(scope="package", autouse=True) def register_single_folder(): @@ -31,9 +37,9 @@ def hosts(tmpdir_factory): return { # For tests based on backend names (deprecated), # like audbackend.access() - "artifactory": "https://audeering.jfrog.io/artifactory", + "artifactory": pytest.HOSTS["artifactory"], "file-system": str(tmpdir_factory.mktemp("host")), - "minio": "localhost:9000", + "minio": pytest.HOSTS["minio"], "single-folder": str(tmpdir_factory.mktemp("host")), } @@ -46,7 +52,8 @@ def owner(request): hasattr(audbackend.backend, "Artifactory") and backend_cls == audbackend.backend.Artifactory ): - owner = backend_cls.get_authentication("audeering.jfrog.io/artifactory")[0] + host_wo_https = pytest.HOSTS["artifactory"][8:] + owner = backend_cls.get_authentication(host_wo_https)[0] elif ( hasattr(audbackend.backend, "Minio") and backend_cls == audbackend.backend.Minio ): @@ -88,11 +95,11 @@ def interface(tmpdir_factory, request): and backend_cls == audbackend.backend.Artifactory ): artifactory = True - host = "https://audeering.jfrog.io/artifactory" + host = pytest.HOSTS["artifactory"] elif ( hasattr(audbackend.backend, "Minio") and backend_cls == audbackend.backend.Minio ): - host = "localhost:9000" + host = pytest.HOSTS["minio"] else: host = str(tmpdir_factory.mktemp("host")) repository = f"unittest-{pytest.UID}-{audeer.uid()[:8]}" diff --git a/tests/test_backend_artifactory.py b/tests/test_backend_artifactory.py index 42108fa3..48d77ffc 100644 --- a/tests/test_backend_artifactory.py +++ b/tests/test_backend_artifactory.py @@ -70,14 +70,14 @@ def test_authentication(tmpdir, hosts, hide_credentials): backend.open() -@pytest.mark.parametrize("host", ["https://audeering.jfrog.io/artifactory"]) +@pytest.mark.parametrize("host", [pytest.HOSTS["artifactory"]]) @pytest.mark.parametrize("repository", [f"unittest-{pytest.UID}-{audeer.uid()[:8]}"]) def test_create_delete_repositories(host, repository): audbackend.backend.Artifactory.create(host, repository) audbackend.backend.Artifactory.delete(host, repository) -@pytest.mark.parametrize("host", ["https://audeering.jfrog.io/artifactory"]) +@pytest.mark.parametrize("host", [pytest.HOSTS["artifactory"]]) @pytest.mark.parametrize("repository", [f"unittest-{pytest.UID}-{audeer.uid()[:8]}"]) @pytest.mark.parametrize("authentication", [("non-existing", "non-existing")]) def test_errors(host, repository, authentication): diff --git a/tests/test_backend_minio.py b/tests/test_backend_minio.py index fa2e52c6..fbc36f75 100644 --- a/tests/test_backend_minio.py +++ b/tests/test_backend_minio.py @@ -108,14 +108,14 @@ def test_copy_large_file(tmpdir, interface, src_path, dst_path): assert interface.exists(dst_path) -@pytest.mark.parametrize("host", ["localhost:9000"]) +@pytest.mark.parametrize("host", [pytest.HOSTS["minio"]]) @pytest.mark.parametrize("repository", [f"unittest-{pytest.UID}-{audeer.uid()[:8]}"]) def test_create_delete_repositories(host, repository): audbackend.backend.Minio.create(host, repository) audbackend.backend.Minio.delete(host, repository) -@pytest.mark.parametrize("host", ["localhost:9000"]) +@pytest.mark.parametrize("host", [pytest.HOSTS["minio"]]) @pytest.mark.parametrize("repository", [f"unittest-{pytest.UID}-{audeer.uid()[:8]}"]) @pytest.mark.parametrize("authentication", [("bad-access", "bad-secret")]) def test_errors(host, repository, authentication): From 3a8517d0bc37df5cffac475075b5be04ea53bbd9 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Wed, 4 Sep 2024 13:27:54 +0200 Subject: [PATCH 12/43] Run tests on play.min.io --- audbackend/core/backend/minio.py | 5 ++++- tests/conftest.py | 25 ++++++++++++++++++++++++- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/audbackend/core/backend/minio.py b/audbackend/core/backend/minio.py index ea7dd684..4a814366 100644 --- a/audbackend/core/backend/minio.py +++ b/audbackend/core/backend/minio.py @@ -40,7 +40,10 @@ def __init__( host, access_key=self.authentication[0], secret_key=self.authentication[1], - secure=False, # change later + # If you test on a local machine, + # `secure` needs to be set to `False`. + # We might want to read this from a config file? + # secure=False, ) @classmethod diff --git a/tests/conftest.py b/tests/conftest.py index d4678a90..3395d5e2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -18,10 +18,33 @@ # Define static hosts pytest.HOSTS = { "artifactory": "https://audeering.jfrog.io/artifactory", - "minio": "localhost:9000", + "minio": "play.min.io", } +@pytest.fixture(scope="package", autouse=True) +def authentication(): + """Provide authentication tokens for supported backends.""" + if pytest.HOSTS["minio"] == "play.min.io": + defaults = {} + for key in [ + "MINIO_ACCESS_KEY", + "MINIO_SECRET_KEY", + ]: + defaults[key] = os.environ.get(key, None) + + os.environ["MINIO_ACCESS_KEY"] = "Q3AM3UQ867SPQQA43P2F" + os.environ["MINIO_SECRET_KEY"] = "zuf+tfteSlswRu7BJ86wekitnifILbZam1KYY3TG" + + yield + + for key, value in defaults.items(): + if value is not None: + os.environ[key] = value + elif key in os.environ: + del os.environ[key] + + @pytest.fixture(scope="package", autouse=True) def register_single_folder(): warning = ( From 70730801f584d4d05119000074261f9518282fdb Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Wed, 4 Sep 2024 14:07:54 +0200 Subject: [PATCH 13/43] Be more conservative for other copy method --- audbackend/core/backend/minio.py | 2 +- tests/test_backend_minio.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/audbackend/core/backend/minio.py b/audbackend/core/backend/minio.py index 4a814366..c2893546 100644 --- a/audbackend/core/backend/minio.py +++ b/audbackend/core/backend/minio.py @@ -138,7 +138,7 @@ def _copy_file( src_path = self.path(src_path) dst_path = self.path(dst_path) # `copy_object()` has a maximum size limit of 5GB. - if self._size(src_path) / 1024 / 1024 / 1024 > 5: + if self._size(src_path) / 1024 / 1024 / 1024 >= 5: with tempfile.TemporaryDirectory() as tmp_dir: tmp_path = audeer.path(tmp_dir, os.path.basename(src_path)) self._get_file(src_path, tmp_path, verbose) diff --git a/tests/test_backend_minio.py b/tests/test_backend_minio.py index fbc36f75..bfe85c1e 100644 --- a/tests/test_backend_minio.py +++ b/tests/test_backend_minio.py @@ -89,7 +89,7 @@ def test_copy_large_file(tmpdir, interface, src_path, dst_path): ``minio.Minio.copy_object()`` has a limit of 5 GB. We mock the ``audbackend.backend.Minio._size()`` method - to return a value of ``5.01`` + to return a value equivalent to 5 GB. to trigger the fall back copy mechanism for large files, without having to create a large file. @@ -102,7 +102,7 @@ def test_copy_large_file(tmpdir, interface, src_path, dst_path): """ tmp_path = audeer.touch(audeer.path(tmpdir, "big.1.txt")) interface.put_file(tmp_path, src_path) - interface._backend._size = lambda x: 5.01 * 1024 * 1024 * 1024 + interface._backend._size = lambda x: 5 * 1024 * 1024 * 1024 interface.copy_file(src_path, dst_path) assert interface.exists(src_path) assert interface.exists(dst_path) From c5e6a27223e9196e9c54def4dd57d6aa32174394 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Wed, 4 Sep 2024 15:10:14 +0200 Subject: [PATCH 14/43] Add Minio.get_config() --- audbackend/core/backend/minio.py | 66 ++++++++++++++++++++++++-------- tests/test_backend_minio.py | 41 ++++++++++++++++++++ 2 files changed, 90 insertions(+), 17 deletions(-) diff --git a/audbackend/core/backend/minio.py b/audbackend/core/backend/minio.py index c2893546..45732893 100644 --- a/audbackend/core/backend/minio.py +++ b/audbackend/core/backend/minio.py @@ -35,15 +35,14 @@ def __init__( if authentication is None: self.authentication = self.get_authentication(host) + config = self.get_config(host) + # Open MinIO client self._client = minio.Minio( host, access_key=self.authentication[0], secret_key=self.authentication[1], - # If you test on a local machine, - # `secure` needs to be set to `False`. - # We might want to read this from a config file? - # secure=False, + secure=config.get("secure", True), ) @classmethod @@ -60,11 +59,8 @@ def get_authentication(cls, host: str) -> typing.Tuple[str, str]: ``MINIO_SECRET_KEY``. Otherwise, it tries to extract missing values - from a config file. - The default path of the config file - (:file:`~/.config/audbackend/minio.cfg`) - can be overwritten with the environment variable - ``MINIO_CONFIG_FILE``. + from a config file, + see :meth:`get_config`. If no config file exists or if it has missing entries, ``None`` is returned @@ -77,20 +73,56 @@ def get_authentication(cls, host: str) -> typing.Tuple[str, str]: access token tuple """ - access_key = os.getenv("MINIO_ACCESS_KEY", None) - secret_key = os.getenv("MINIO_SECRET_KEY", None) + config = cls.get_config(host) + access_key = os.getenv("MINIO_ACCESS_KEY", config.get("access_key", None)) + secret_key = os.getenv("MINIO_SECRET_KEY", config.get("secret_key", None)) + + return access_key, secret_key + + @classmethod + def get_config(cls, host: str) -> typing.Dict: + """Configuration of MinIO server. + + The default path of the config file is + :file:`~/.config/audbackend/minio.cfg`. + It can be overwritten with the environment variable + ``MINIO_CONFIG_FILE``. + + If no config file can be found, + or no entry for the requested host, + an empty dictionary is returned. + + The config file + expects one section per host, + e.g. + + .. code-block:: config + + ["play.min.io"] + access_key = "Q3AM3UQ867SPQQA43P2F" + secret_key = "zuf+tfteSlswRu7BJ86wekitnifILbZam1KYY3TG" + + Args: + host: hostname + + Returns: + config entries as dictionary + + """ config_file = os.getenv("MINIO_CONFIG_FILE", "~/.config/audbackend/minio.cfg") config_file = audeer.path(config_file) - if os.path.exists(config_file) and (access_key is None or secret_key is None): + if os.path.exists(config_file): config = configparser.ConfigParser(allow_no_value=True) config.read(config_file) - if access_key is None: - access_key = config.get(host, "access_key", fallback=None) - if secret_key is None: - secret_key = config.get(host, "secret_key", fallback=None) + try: + config = dict(config.items(host)) + except configparser.NoSectionError: + config = {} + else: + config = {} - return access_key, secret_key + return config def _checksum( self, diff --git a/tests/test_backend_minio.py b/tests/test_backend_minio.py index bfe85c1e..e3935499 100644 --- a/tests/test_backend_minio.py +++ b/tests/test_backend_minio.py @@ -124,6 +124,47 @@ def test_errors(host, repository, authentication): backend.open() +def test_get_config(tmpdir, hosts, hide_credentials): + host = hosts["minio"] + config_path = audeer.path(tmpdir, "config.cfg") + os.environ["MINIO_CONFIG_FILE"] = config_path + + # config file does not exist + config = audbackend.backend.Minio.get_config(host) + assert config == {} + + # config file is empty + audeer.touch(config_path) + config = audbackend.backend.Minio.get_config(host) + assert config == {} + + # config file has different host + with open(config_path, "w") as fp: + fp.write(f"[{host}.abc]\n") + config = audbackend.backend.Minio.get_config(host) + assert config == {} + + # config file entry without variables + with open(config_path, "w") as fp: + fp.write(f"[{host}]\n") + config = audbackend.backend.Minio.get_config(host) + assert config == {} + + # config file entry with variables + access_key = "user" + secret_key = "pass" + secure = True + with open(config_path, "w") as fp: + fp.write(f"[{host}]\n") + fp.write(f"access_key = {access_key}\n") + fp.write(f"secret_key = {secret_key}\n") + fp.write(f"secure = {secure}\n") + config = audbackend.backend.Minio.get_config(host) + assert config["access_key"] == access_key + assert config["secret_key"] == secret_key + assert config["secure"] + + @pytest.mark.parametrize( "interface", [(audbackend.backend.Minio, audbackend.interface.Maven)], From 393cff76f1a0f217b2568d7170cd5ce76144c3b3 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Wed, 4 Sep 2024 15:35:08 +0200 Subject: [PATCH 15/43] Add docstring example --- audbackend/core/backend/minio.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/audbackend/core/backend/minio.py b/audbackend/core/backend/minio.py index 45732893..54793c1b 100644 --- a/audbackend/core/backend/minio.py +++ b/audbackend/core/backend/minio.py @@ -21,6 +21,21 @@ class Minio(Base): If ``None``, it requests it by calling :meth:`get_authentication` + Examples: + >>> host = "play.min.io" # playground provided by https://min.io + >>> auth = ("Q3AM3UQ867SPQQA43P2F", "zuf+tfteSlswRu7BJ86wekitnifILbZam1KYY3TG") + >>> repository = "my-data" + audeer.uid() + >>> Minio.create(host, repository, authentication=auth) + >>> file = audeer.touch("file.txt") + >>> backend = Minio(host, repository, authentication=auth) + >>> try: + ... with backend: + ... backend.put_file(file, "/sub/file.txt") + ... backend.ls() + ... finally: + ... Minio.delete(host, repository, authentication=auth) + ['/sub/file.txt'] + """ # noqa: E501 def __init__( From 2284cfc5041f5434440b90acfdd39ff156d39f2d Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Wed, 4 Sep 2024 15:35:17 +0200 Subject: [PATCH 16/43] Correct get_config() docstring --- audbackend/core/backend/minio.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/audbackend/core/backend/minio.py b/audbackend/core/backend/minio.py index 54793c1b..b2b64ed5 100644 --- a/audbackend/core/backend/minio.py +++ b/audbackend/core/backend/minio.py @@ -111,9 +111,9 @@ def get_config(cls, host: str) -> typing.Dict: expects one section per host, e.g. - .. code-block:: config + .. code-block:: ini - ["play.min.io"] + [play.min.io] access_key = "Q3AM3UQ867SPQQA43P2F" secret_key = "zuf+tfteSlswRu7BJ86wekitnifILbZam1KYY3TG" From 9faf954bcb90f76d11a2b508ccc4ca2e6ed0c7ff Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Wed, 4 Sep 2024 15:43:38 +0200 Subject: [PATCH 17/43] Add secure argument --- audbackend/core/backend/minio.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/audbackend/core/backend/minio.py b/audbackend/core/backend/minio.py index b2b64ed5..06c618ec 100644 --- a/audbackend/core/backend/minio.py +++ b/audbackend/core/backend/minio.py @@ -20,6 +20,17 @@ class Minio(Base): authentication: username, password / access key, secret key token tuple. If ``None``, it requests it by calling :meth:`get_authentication` + secure: if ``None``, + it looks in the config file for it, + compare :meth:`get_config`. + If it cannot find a matching entry, + it defaults to ``True``. + Needs to be ``True`` + when using TLS for the connection, + and ``False`` otherwise, + e.g. when using a `local MinIO server`_. + + .. _local MinIO server: https://min.io/docs/minio/container/index.html Examples: >>> host = "play.min.io" # playground provided by https://min.io @@ -44,20 +55,23 @@ def __init__( repository: str, *, authentication: typing.Tuple[str, str] = None, + secure: bool = None, ): super().__init__(host, repository, authentication=authentication) if authentication is None: self.authentication = self.get_authentication(host) - config = self.get_config(host) + if secure is None: + config = self.get_config(host) + secure = config.get("secure", True) # Open MinIO client self._client = minio.Minio( host, access_key=self.authentication[0], secret_key=self.authentication[1], - secure=config.get("secure", True), + secure=secure, ) @classmethod From 1005c6b1759f0e8ae91abd3c09adc4b042381ce5 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Wed, 25 Sep 2024 13:43:03 +0200 Subject: [PATCH 18/43] Add possibility to provide **kwargs --- audbackend/core/backend/minio.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/audbackend/core/backend/minio.py b/audbackend/core/backend/minio.py index 06c618ec..408c92c0 100644 --- a/audbackend/core/backend/minio.py +++ b/audbackend/core/backend/minio.py @@ -56,6 +56,7 @@ def __init__( *, authentication: typing.Tuple[str, str] = None, secure: bool = None, + **kwargs, ): super().__init__(host, repository, authentication=authentication) @@ -72,6 +73,7 @@ def __init__( access_key=self.authentication[0], secret_key=self.authentication[1], secure=secure, + **kwargs, ) @classmethod From 92282f6fda411e743d9b0bd88e78feee8bdd4ff2 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Mon, 30 Sep 2024 15:55:27 +0200 Subject: [PATCH 19/43] Set content-type during upload --- audbackend/core/backend/minio.py | 9 ++++++++- tests/test_backend_minio.py | 27 +++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/audbackend/core/backend/minio.py b/audbackend/core/backend/minio.py index 408c92c0..24a2f6f7 100644 --- a/audbackend/core/backend/minio.py +++ b/audbackend/core/backend/minio.py @@ -1,4 +1,5 @@ import configparser +import mimetypes import os import tempfile import typing @@ -369,7 +370,13 @@ def _put_file( ) print(desc, end="\r") - self._client.fput_object(self.repository, dst_path, src_path) + content_type = mimetypes.guess_type(src_path)[0] or "application/octet-stream" + self._client.fput_object( + self.repository, + dst_path, + src_path, + content_type=content_type, + ) if verbose: # pragma: no cover # Clear progress line diff --git a/tests/test_backend_minio.py b/tests/test_backend_minio.py index e3935499..9ccf5265 100644 --- a/tests/test_backend_minio.py +++ b/tests/test_backend_minio.py @@ -70,6 +70,33 @@ def test_authentication(tmpdir, hosts, hide_credentials): backend.open() +@pytest.mark.parametrize( + "interface", + [(audbackend.backend.Minio, audbackend.interface.Unversioned)], + indirect=True, +) +@pytest.mark.parametrize( + "path, expected,", + [ + ("/text.txt", "text/plain"), + ], +) +def test_content_type(tmpdir, interface, path, expected): + r"""Test setting of content type. + + Args: + tmpdir: tmpdir fixture + interface: interface fixture + path: path of file on backend + expected: expected content type + + """ + tmp_path = audeer.touch(audeer.path(tmpdir, path[1:])) + interface.put_file(tmp_path, path) + stats = interface._backend._client.stat_object(interface.repository, path) + assert stats.content_type == expected + + @pytest.mark.parametrize( "interface", [(audbackend.backend.Minio, audbackend.interface.Unversioned)], From c70ae7e920da260dd75d536b59d695d40bc580d3 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Tue, 1 Oct 2024 09:21:51 +0200 Subject: [PATCH 20/43] Add **kwargs to docstring --- audbackend/core/backend/minio.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/audbackend/core/backend/minio.py b/audbackend/core/backend/minio.py index 24a2f6f7..8aaafb79 100644 --- a/audbackend/core/backend/minio.py +++ b/audbackend/core/backend/minio.py @@ -30,8 +30,10 @@ class Minio(Base): when using TLS for the connection, and ``False`` otherwise, e.g. when using a `local MinIO server`_. + **kwargs: keyword arguments passed on to `minio.Minio`_ .. _local MinIO server: https://min.io/docs/minio/container/index.html + .. _minio.Minio: https://min.io/docs/minio/linux/developers/python/API.html Examples: >>> host = "play.min.io" # playground provided by https://min.io From 371cc8cff9293b8bd7f90cd2cfad45a87fd68f27 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Fri, 18 Oct 2024 19:41:40 +0200 Subject: [PATCH 21/43] Add support for owner() (#234) * Add support for owner() * Be more conservative regarding owner --- audbackend/core/backend/minio.py | 23 ++++++++++++++++++++--- tests/conftest.py | 5 ----- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/audbackend/core/backend/minio.py b/audbackend/core/backend/minio.py index 8aaafb79..b3a206b9 100644 --- a/audbackend/core/backend/minio.py +++ b/audbackend/core/backend/minio.py @@ -1,4 +1,5 @@ import configparser +import getpass import mimetypes import os import tempfile @@ -215,6 +216,7 @@ def _copy_file( self.repository, dst_path, minio.commonconfig.CopySource(self.repository, src_path), + metadata=_metadata(), ) def _create( @@ -332,9 +334,13 @@ def _owner( ) -> str: r"""Get owner of file on backend.""" path = self.path(path) - # TODO: owner seems to be empty, - # need to check if we have to manage this ourselves? - owner = self._client.stat_object(self.repository, path).owner_name + # NOTE: + # we use a custom metadata entry to track the owner + # as stats.owner_name is always empty. + owner = "" + stats = self._client.stat_object(self.repository, path) + if "x-amz-meta-owner" in stats.metadata: + owner = stats.metadata["x-amz-meta-owner"] return owner def path( @@ -378,6 +384,7 @@ def _put_file( dst_path, src_path, content_type=content_type, + metadata=_metadata(), ) if verbose: # pragma: no cover @@ -400,3 +407,13 @@ def _size( path = self.path(path) size = self._client.stat_object(self.repository, path).size return size + + +def _metadata(): + """Dictionary with owner entry. + + When uploaded as metadata to MinIO, + it can be accessed under ``stat_object(...).metadata["x-amz-meta-owner"]``. + + """ + return {"owner": getpass.getuser()} diff --git a/tests/conftest.py b/tests/conftest.py index 3395d5e2..9598f5c4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -77,11 +77,6 @@ def owner(request): ): host_wo_https = pytest.HOSTS["artifactory"][8:] owner = backend_cls.get_authentication(host_wo_https)[0] - elif ( - hasattr(audbackend.backend, "Minio") and backend_cls == audbackend.backend.Minio - ): - # There seems to be a MinIO bug here - owner = None else: if os.name == "nt": owner = "Administrators" From 643c85d6f5b375f4d116d3e899bc96e41fe7e000 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Fri, 18 Oct 2024 19:48:21 +0200 Subject: [PATCH 22/43] Fix owner test under Windows --- tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 9598f5c4..1772808e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -79,7 +79,7 @@ def owner(request): owner = backend_cls.get_authentication(host_wo_https)[0] else: if os.name == "nt": - owner = "Administrators" + owner = "runneradmin" else: owner = getpass.getuser() From 29b5990e7349aa74136b7aa4c928539c9f202c77 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Fri, 18 Oct 2024 19:55:40 +0200 Subject: [PATCH 23/43] Revert "Fix owner test under Windows" This reverts commit 643c85d6f5b375f4d116d3e899bc96e41fe7e000. --- tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 1772808e..9598f5c4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -79,7 +79,7 @@ def owner(request): owner = backend_cls.get_authentication(host_wo_https)[0] else: if os.name == "nt": - owner = "runneradmin" + owner = "Administrators" else: owner = getpass.getuser() From 74f5c8f1eafdec7baaae896110d5c61eb35417ca Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Tue, 22 Oct 2024 11:23:30 +0200 Subject: [PATCH 24/43] Try to fix Windows owner test --- tests/conftest.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index 9598f5c4..0d87dc05 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -77,6 +77,13 @@ def owner(request): ): host_wo_https = pytest.HOSTS["artifactory"][8:] owner = backend_cls.get_authentication(host_wo_https)[0] + elif ( + hasattr(audbackend.backend, "Minio") and backend_cls == audbackend.backend.Minio + ): + if os.name == "nt": + owner = "runneradmin" + else: + owner = getpass.getuser() else: if os.name == "nt": owner = "Administrators" From 792fd5a8b4b45b5ba3545b71a01731921e418861 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Tue, 22 Oct 2024 11:33:45 +0200 Subject: [PATCH 25/43] Replace _close() with close() --- audbackend/core/backend/minio.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/audbackend/core/backend/minio.py b/audbackend/core/backend/minio.py index b3a206b9..bbca2353 100644 --- a/audbackend/core/backend/minio.py +++ b/audbackend/core/backend/minio.py @@ -159,6 +159,19 @@ def get_config(cls, host: str) -> typing.Dict: return config + def close( + self, + ): + r"""Close connection to backend. + + This will only change the status of + :attr:`audbackend.backend.Minio.opened` + as Minio handles closing the session itself. + + """ + if self.opened: + self.opened = False + def _checksum( self, path: str, @@ -168,18 +181,6 @@ def _checksum( checksum = self._client.stat_object(self.repository, path).etag return checksum - def _close( - self, - ): - r"""Close connection to repository. - - An error should be raised, - if the connection to the backend - cannot be closed. - - """ - # At the moment, this is automatically handled. - def _collapse( self, path, From 69011669b0a6272920b0dc87788f2daad419c373 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Tue, 22 Oct 2024 11:36:15 +0200 Subject: [PATCH 26/43] Limit copy size to 4.9 GB --- audbackend/core/backend/minio.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/audbackend/core/backend/minio.py b/audbackend/core/backend/minio.py index bbca2353..707dcac5 100644 --- a/audbackend/core/backend/minio.py +++ b/audbackend/core/backend/minio.py @@ -206,7 +206,8 @@ def _copy_file( src_path = self.path(src_path) dst_path = self.path(dst_path) # `copy_object()` has a maximum size limit of 5GB. - if self._size(src_path) / 1024 / 1024 / 1024 >= 5: + # We use 4.9GB to have some headroom + if self._size(src_path) / 1024 / 1024 / 1024 >= 4.9: with tempfile.TemporaryDirectory() as tmp_dir: tmp_path = audeer.path(tmp_dir, os.path.basename(src_path)) self._get_file(src_path, tmp_path, verbose) From 520c12916c675f474d4c055a1931fb362f373e80 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Tue, 22 Oct 2024 12:10:59 +0200 Subject: [PATCH 27/43] Ensure errors are tested for MinIO --- audbackend/core/backend/minio.py | 2 ++ tests/test_interface_unversioned.py | 1 + 2 files changed, 3 insertions(+) diff --git a/audbackend/core/backend/minio.py b/audbackend/core/backend/minio.py index 707dcac5..9037e698 100644 --- a/audbackend/core/backend/minio.py +++ b/audbackend/core/backend/minio.py @@ -399,6 +399,8 @@ def _remove_file( ): r"""Remove file from backend.""" path = self.path(path) + # Enforce error if path does not exist + self._client.stat_object(self.repository, path) self._client.remove_object(self.repository, path) def _size( diff --git a/tests/test_interface_unversioned.py b/tests/test_interface_unversioned.py index a5fa791b..4e28c800 100644 --- a/tests/test_interface_unversioned.py +++ b/tests/test_interface_unversioned.py @@ -217,6 +217,7 @@ def test_copy(tmpdir, src_path, dst_path, interface): [ (audbackend.backend.Artifactory, audbackend.interface.Unversioned), (audbackend.backend.FileSystem, audbackend.interface.Unversioned), + (audbackend.backend.Minio, audbackend.interface.Unversioned), (SingleFolder, audbackend.interface.Unversioned), ], indirect=True, From 86aaf4c20876e398a5aff4c60aba92d738cf67d3 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Tue, 22 Oct 2024 12:16:02 +0200 Subject: [PATCH 28/43] Add docstring to test_get_config --- tests/test_backend_minio.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/test_backend_minio.py b/tests/test_backend_minio.py index 9ccf5265..bba72c98 100644 --- a/tests/test_backend_minio.py +++ b/tests/test_backend_minio.py @@ -152,6 +152,17 @@ def test_errors(host, repository, authentication): def test_get_config(tmpdir, hosts, hide_credentials): + r"""Test parsing of configuration. + + The `get_config()` class method is responsible + for parsing a Minio backend config file. + + Args: + tmpdir: tmpdir fixture + hosts: hosts fixture + hide_credentials: hide_credentials fixture + + """ host = hosts["minio"] config_path = audeer.path(tmpdir, "config.cfg") os.environ["MINIO_CONFIG_FILE"] = config_path From 2a926421ec556a884f9a033beafad0d129541cb6 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Tue, 22 Oct 2024 12:19:31 +0200 Subject: [PATCH 29/43] Add docstring to test_maven_file_structure --- tests/test_backend_minio.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/test_backend_minio.py b/tests/test_backend_minio.py index bba72c98..5dd15f3c 100644 --- a/tests/test_backend_minio.py +++ b/tests/test_backend_minio.py @@ -315,6 +315,21 @@ def test_get_config(tmpdir, hosts, hide_credentials): def test_maven_file_structure( tmpdir, interface, file, version, extensions, regex, expected ): + """Test using the Maven interface with a Minio backend. + + Args: + tmpdir: tmpdir fixture + interface: interface fixture, + which needs to be called with the Minio backend + and the Maven interface + file: file name + version: file version + extensions: extensions considered by the Maven interface + regex: if ``True``, + ``extensions`` are considered as a regex + expected: expected file structure on backend + + """ interface.extensions = extensions interface.regex = regex From 52a95975b43d568c661ed743ddac01dfc34a04b8 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Tue, 22 Oct 2024 12:39:59 +0200 Subject: [PATCH 30/43] Ground truth var for interfaces in _unversioned.py --- tests/test_interface_unversioned.py | 69 +++++++---------------------- 1 file changed, 17 insertions(+), 52 deletions(-) diff --git a/tests/test_interface_unversioned.py b/tests/test_interface_unversioned.py index 4e28c800..a4d2efb2 100644 --- a/tests/test_interface_unversioned.py +++ b/tests/test_interface_unversioned.py @@ -15,6 +15,15 @@ from singlefolder import SingleFolder +# Backend-interface combinations to use in all tests +backend_interface_combinations = [ + (audbackend.backend.Artifactory, audbackend.interface.Unversioned), + (audbackend.backend.FileSystem, audbackend.interface.Unversioned), + (audbackend.backend.Minio, audbackend.interface.Unversioned), + (SingleFolder, audbackend.interface.Unversioned), +] + + @pytest.fixture(scope="function", autouse=False) def tree(tmpdir, request): r"""Create file tree.""" @@ -88,12 +97,7 @@ def tree(tmpdir, request): ) @pytest.mark.parametrize( "interface", - [ - (audbackend.backend.Artifactory, audbackend.interface.Unversioned), - (audbackend.backend.FileSystem, audbackend.interface.Unversioned), - (audbackend.backend.Minio, audbackend.interface.Unversioned), - (SingleFolder, audbackend.interface.Unversioned), - ], + backend_interface_combinations, indirect=True, ) def test_archive(tmpdir, tree, archive, files, tmp_root, interface, expected): @@ -170,12 +174,7 @@ def test_archive(tmpdir, tree, archive, files, tmp_root, interface, expected): ) @pytest.mark.parametrize( "interface", - [ - (audbackend.backend.Artifactory, audbackend.interface.Unversioned), - (audbackend.backend.FileSystem, audbackend.interface.Unversioned), - (audbackend.backend.Minio, audbackend.interface.Unversioned), - (SingleFolder, audbackend.interface.Unversioned), - ], + backend_interface_combinations, indirect=True, ) def test_copy(tmpdir, src_path, dst_path, interface): @@ -214,12 +213,7 @@ def test_copy(tmpdir, src_path, dst_path, interface): @pytest.mark.parametrize( "interface", - [ - (audbackend.backend.Artifactory, audbackend.interface.Unversioned), - (audbackend.backend.FileSystem, audbackend.interface.Unversioned), - (audbackend.backend.Minio, audbackend.interface.Unversioned), - (SingleFolder, audbackend.interface.Unversioned), - ], + backend_interface_combinations, indirect=True, ) def test_errors(tmpdir, interface): @@ -557,12 +551,7 @@ def test_errors(tmpdir, interface): ) @pytest.mark.parametrize( "interface", - [ - (audbackend.backend.Artifactory, audbackend.interface.Unversioned), - (audbackend.backend.FileSystem, audbackend.interface.Unversioned), - (audbackend.backend.Minio, audbackend.interface.Unversioned), - (SingleFolder, audbackend.interface.Unversioned), - ], + backend_interface_combinations, indirect=True, ) def test_exists(tmpdir, path, interface): @@ -598,22 +587,8 @@ def test_exists(tmpdir, path, interface): @pytest.mark.parametrize( "interface, owner", [ - ( - (audbackend.backend.Artifactory, audbackend.interface.Unversioned), - audbackend.backend.Artifactory, - ), - ( - (audbackend.backend.FileSystem, audbackend.interface.Unversioned), - audbackend.backend.FileSystem, - ), - ( - (audbackend.backend.Minio, audbackend.interface.Unversioned), - audbackend.backend.Minio, - ), - ( - (SingleFolder, audbackend.interface.Unversioned), - SingleFolder, - ), + ((backend, interface), backend) + for backend, interface in backend_interface_combinations ], indirect=True, ) @@ -641,12 +616,7 @@ def test_file(tmpdir, src_path, dst_path, owner, interface): @pytest.mark.parametrize( "interface", - [ - (audbackend.backend.Artifactory, audbackend.interface.Unversioned), - (audbackend.backend.FileSystem, audbackend.interface.Unversioned), - (audbackend.backend.Minio, audbackend.interface.Unversioned), - (SingleFolder, audbackend.interface.Unversioned), - ], + backend_interface_combinations, indirect=True, ) def test_ls(tmpdir, interface): @@ -716,12 +686,7 @@ def test_ls(tmpdir, interface): ) @pytest.mark.parametrize( "interface", - [ - (audbackend.backend.Artifactory, audbackend.interface.Unversioned), - (audbackend.backend.FileSystem, audbackend.interface.Unversioned), - (audbackend.backend.Minio, audbackend.interface.Unversioned), - (SingleFolder, audbackend.interface.Unversioned), - ], + backend_interface_combinations, indirect=True, ) def test_move(tmpdir, src_path, dst_path, interface): From fc86e530df57fe0e039650ca403799b14dbad735 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Tue, 22 Oct 2024 12:42:58 +0200 Subject: [PATCH 31/43] Ground truth var for interfaces in _versioned.py --- tests/test_interface_versioned.py | 52 ++++++++++--------------------- 1 file changed, 16 insertions(+), 36 deletions(-) diff --git a/tests/test_interface_versioned.py b/tests/test_interface_versioned.py index f42c0cd6..71253f20 100644 --- a/tests/test_interface_versioned.py +++ b/tests/test_interface_versioned.py @@ -15,6 +15,13 @@ from singlefolder import SingleFolder +# Backend-interface combinations to use in all tests +backend_interface_combinations = [ + (audbackend.backend.FileSystem, audbackend.interface.Unversioned), + (SingleFolder, audbackend.interface.Unversioned), +] + + @pytest.fixture(scope="function", autouse=False) def tree(tmpdir, request): r"""Create file tree.""" @@ -88,10 +95,7 @@ def tree(tmpdir, request): ) @pytest.mark.parametrize( "interface", - [ - (audbackend.backend.FileSystem, audbackend.interface.Versioned), - (SingleFolder, audbackend.interface.Versioned), - ], + backend_interface_combinations, indirect=True, ) def test_archive(tmpdir, tree, archive, files, tmp_root, interface, expected): @@ -181,10 +185,7 @@ def test_archive(tmpdir, tree, archive, files, tmp_root, interface, expected): ) @pytest.mark.parametrize( "interface", - [ - (audbackend.backend.FileSystem, audbackend.interface.Versioned), - (SingleFolder, audbackend.interface.Versioned), - ], + backend_interface_combinations, indirect=True, ) def test_copy(tmpdir, src_path, src_versions, dst_path, version, interface): @@ -237,10 +238,7 @@ def test_copy(tmpdir, src_path, src_versions, dst_path, version, interface): @pytest.mark.parametrize( "interface", - [ - (audbackend.backend.FileSystem, audbackend.interface.Versioned), - (SingleFolder, audbackend.interface.Versioned), - ], + backend_interface_combinations, indirect=True, ) def test_errors(tmpdir, interface): @@ -678,10 +676,7 @@ def test_errors(tmpdir, interface): ) @pytest.mark.parametrize( "interface", - [ - (audbackend.backend.FileSystem, audbackend.interface.Versioned), - (SingleFolder, audbackend.interface.Versioned), - ], + backend_interface_combinations, indirect=True, ) def test_exists(tmpdir, path, version, interface): @@ -721,14 +716,8 @@ def test_exists(tmpdir, path, version, interface): @pytest.mark.parametrize( "interface, owner", [ - ( - (audbackend.backend.FileSystem, audbackend.interface.Versioned), - audbackend.backend.FileSystem, - ), - ( - (SingleFolder, audbackend.interface.Versioned), - SingleFolder, - ), + ((backend, interface), backend) + for backend, interface in backend_interface_combinations ], indirect=True, ) @@ -756,10 +745,7 @@ def test_file(tmpdir, src_path, dst_path, version, interface, owner): @pytest.mark.parametrize( "interface", - [ - (audbackend.backend.FileSystem, audbackend.interface.Versioned), - (SingleFolder, audbackend.interface.Versioned), - ], + backend_interface_combinations, indirect=True, ) @pytest.mark.parametrize( @@ -1035,10 +1021,7 @@ def test_ls(tmpdir, interface, files, path, latest, pattern, expected): ) @pytest.mark.parametrize( "interface", - [ - (audbackend.backend.FileSystem, audbackend.interface.Versioned), - (SingleFolder, audbackend.interface.Versioned), - ], + backend_interface_combinations, indirect=True, ) def test_move(tmpdir, src_path, src_versions, dst_path, version, interface): @@ -1110,10 +1093,7 @@ def test_repr(): @pytest.mark.parametrize("dst_path", ["/file.ext", "/sub/file.ext"]) @pytest.mark.parametrize( "interface", - [ - (audbackend.backend.FileSystem, audbackend.interface.Versioned), - (SingleFolder, audbackend.interface.Versioned), - ], + backend_interface_combinations, indirect=True, ) def test_versions(tmpdir, dst_path, interface): From cb67cd3376ee6c4842a1dceb94952722aa4bb97f Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Tue, 22 Oct 2024 12:43:50 +0200 Subject: [PATCH 32/43] Ground truth var for interfaces in _maven.py --- tests/test_interface_maven.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/tests/test_interface_maven.py b/tests/test_interface_maven.py index 0c2ad182..e4a7b306 100644 --- a/tests/test_interface_maven.py +++ b/tests/test_interface_maven.py @@ -11,12 +11,16 @@ from singlefolder import SingleFolder +# Backend-interface combinations to use in all tests +backend_interface_combinations = [ + (audbackend.backend.FileSystem, audbackend.interface.Unversioned), + (SingleFolder, audbackend.interface.Unversioned), +] + + @pytest.mark.parametrize( "interface", - [ - (audbackend.backend.FileSystem, audbackend.interface.Maven), - (SingleFolder, audbackend.interface.Maven), - ], + backend_interface_combinations, indirect=True, ) def test_errors(tmpdir, interface): @@ -75,10 +79,7 @@ def test_errors(tmpdir, interface): @pytest.mark.parametrize( "interface", - [ - (audbackend.backend.FileSystem, audbackend.interface.Maven), - (SingleFolder, audbackend.interface.Maven), - ], + backend_interface_combinations, indirect=True, ) @pytest.mark.parametrize( From 3aabdf0a835988d3c36f7a22d580cbd3645271c7 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Tue, 22 Oct 2024 13:31:02 +0200 Subject: [PATCH 33/43] Fix typos --- tests/test_interface_maven.py | 4 ++-- tests/test_interface_versioned.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_interface_maven.py b/tests/test_interface_maven.py index e4a7b306..25d6e9f5 100644 --- a/tests/test_interface_maven.py +++ b/tests/test_interface_maven.py @@ -13,8 +13,8 @@ # Backend-interface combinations to use in all tests backend_interface_combinations = [ - (audbackend.backend.FileSystem, audbackend.interface.Unversioned), - (SingleFolder, audbackend.interface.Unversioned), + (audbackend.backend.FileSystem, audbackend.interface.Maven), + (SingleFolder, audbackend.interface.Maven), ] diff --git a/tests/test_interface_versioned.py b/tests/test_interface_versioned.py index 71253f20..e0c3b063 100644 --- a/tests/test_interface_versioned.py +++ b/tests/test_interface_versioned.py @@ -17,8 +17,8 @@ # Backend-interface combinations to use in all tests backend_interface_combinations = [ - (audbackend.backend.FileSystem, audbackend.interface.Unversioned), - (SingleFolder, audbackend.interface.Unversioned), + (audbackend.backend.FileSystem, audbackend.interface.Versioned), + (SingleFolder, audbackend.interface.Versioned), ] From a04420509e4b999c134646ef27d9a9aa1370296d Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Tue, 22 Oct 2024 13:56:35 +0200 Subject: [PATCH 34/43] Update get_file test --- tests/test_interface_unversioned.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/test_interface_unversioned.py b/tests/test_interface_unversioned.py index a4d2efb2..b5cf432c 100644 --- a/tests/test_interface_unversioned.py +++ b/tests/test_interface_unversioned.py @@ -603,6 +603,17 @@ def test_file(tmpdir, src_path, dst_path, owner, interface): interface.put_file(src_path, dst_path) assert interface.exists(dst_path) + # Download with already existing src_path + interface.get_file(dst_path, src_path) + assert os.path.exists(src_path) + assert interface.checksum(dst_path) == audeer.md5(src_path) + assert interface.owner(dst_path) == owner + date = datetime.datetime.today().strftime("%Y-%m-%d") + assert interface.date(dst_path) == date + + # Repeat, but remove src_path first + os.remove(src_path) + assert not os.path.exists(src_path) interface.get_file(dst_path, src_path) assert os.path.exists(src_path) assert interface.checksum(dst_path) == audeer.md5(src_path) From 4f65f5f0dd504c355be841e132ba74362058534b Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Tue, 22 Oct 2024 13:56:55 +0200 Subject: [PATCH 35/43] Fix typing of _size() --- audbackend/core/backend/minio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/audbackend/core/backend/minio.py b/audbackend/core/backend/minio.py index 9037e698..9ffe8c28 100644 --- a/audbackend/core/backend/minio.py +++ b/audbackend/core/backend/minio.py @@ -406,7 +406,7 @@ def _remove_file( def _size( self, path: str, - ) -> str: + ) -> int: r"""Get size of file on backend.""" path = self.path(path) size = self._client.stat_object(self.repository, path).size From ccf7d635f26223a0c077545994b4a93d84cfef97 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Tue, 22 Oct 2024 14:54:44 +0200 Subject: [PATCH 36/43] Remove default None in dict.get() Co-authored-by: sourcery-ai[bot] <58596630+sourcery-ai[bot]@users.noreply.github.com> --- audbackend/core/backend/minio.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/audbackend/core/backend/minio.py b/audbackend/core/backend/minio.py index 9ffe8c28..820da212 100644 --- a/audbackend/core/backend/minio.py +++ b/audbackend/core/backend/minio.py @@ -109,8 +109,8 @@ def get_authentication(cls, host: str) -> typing.Tuple[str, str]: """ config = cls.get_config(host) - access_key = os.getenv("MINIO_ACCESS_KEY", config.get("access_key", None)) - secret_key = os.getenv("MINIO_SECRET_KEY", config.get("secret_key", None)) + access_key = os.getenv("MINIO_ACCESS_KEY", config.get("access_key")) + secret_key = os.getenv("MINIO_SECRET_KEY", config.get("secret_key")) return access_key, secret_key From e131cba488c2cdfbb220c31f40c9eab295fbeb6f Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Tue, 22 Oct 2024 15:02:02 +0200 Subject: [PATCH 37/43] Improve code quality Co-authored-by: sourcery-ai[bot] <58596630+sourcery-ai[bot]@users.noreply.github.com> --- audbackend/core/backend/minio.py | 15 +++++---------- tests/test_backend_minio.py | 17 ++++++++--------- 2 files changed, 13 insertions(+), 19 deletions(-) diff --git a/audbackend/core/backend/minio.py b/audbackend/core/backend/minio.py index 820da212..560f8a03 100644 --- a/audbackend/core/backend/minio.py +++ b/audbackend/core/backend/minio.py @@ -178,8 +178,7 @@ def _checksum( ) -> str: r"""MD5 checksum of file on backend.""" path = self.path(path) - checksum = self._client.stat_object(self.repository, path).etag - return checksum + return self._client.stat_object(self.repository, path).etag def _collapse( self, @@ -192,9 +191,8 @@ def _collapse( / """ - path = "/" + path - path = path.replace("/", self.sep) - return path + path = f"/{path}" + return path.replace("/", self.sep) def _copy_file( self, @@ -274,8 +272,7 @@ def _get_file( chunk = 4 * 1024 with audeer.progress_bar(total=src_size, disable=not verbose) as pbar: desc = audeer.format_display_message( - "Download {}".format(os.path.basename(str(src_path))), - pbar=True, + f"Download {os.path.basename(str(src_path))}", pbar=True ) pbar.set_description_str(desc) pbar.refresh() @@ -306,9 +303,7 @@ def _ls( prefix=path, recursive=True, ) - paths = [self._collapse(obj.object_name) for obj in objects] - - return paths + return [self._collapse(obj.object_name) for obj in objects] def _move_file( self, diff --git a/tests/test_backend_minio.py b/tests/test_backend_minio.py index 5dd15f3c..6fc71e61 100644 --- a/tests/test_backend_minio.py +++ b/tests/test_backend_minio.py @@ -9,15 +9,14 @@ @pytest.fixture(scope="function", autouse=False) def hide_credentials(): - defaults = {} - - for key in [ - "MINIO_ACCESS_KEY", - "MINIO_SECRET_KEY", - "MINIO_CONFIG_FILE", - ]: - defaults[key] = os.environ.get(key, None) - + defaults = { + key: os.environ.get(key, None) + for key in [ + "MINIO_ACCESS_KEY", + "MINIO_SECRET_KEY", + "MINIO_CONFIG_FILE", + ] + } for key, value in defaults.items(): if value is not None: del os.environ[key] From 6292bfce8d288addcb918c4d1ec44ea690f317b2 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Tue, 22 Oct 2024 14:52:45 +0200 Subject: [PATCH 38/43] Add comment for MinIO credentials --- tests/conftest.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index 0d87dc05..d5c10a65 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -33,6 +33,9 @@ def authentication(): ]: defaults[key] = os.environ.get(key, None) + # MinIO credentials for the public read/write server + # at play.min.io, see + # https://min.io/docs/minio/linux/developers/python/minio-py.html os.environ["MINIO_ACCESS_KEY"] = "Q3AM3UQ867SPQQA43P2F" os.environ["MINIO_SECRET_KEY"] = "zuf+tfteSlswRu7BJ86wekitnifILbZam1KYY3TG" From 785305aeb714c02aba76dd2bb366c0cec612fe0c Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Tue, 22 Oct 2024 15:13:56 +0200 Subject: [PATCH 39/43] Improve code quality --- audbackend/core/backend/minio.py | 7 ++----- tests/conftest.py | 25 ++++++++++++------------- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/audbackend/core/backend/minio.py b/audbackend/core/backend/minio.py index 560f8a03..ebb2ad7e 100644 --- a/audbackend/core/backend/minio.py +++ b/audbackend/core/backend/minio.py @@ -334,11 +334,8 @@ def _owner( # NOTE: # we use a custom metadata entry to track the owner # as stats.owner_name is always empty. - owner = "" - stats = self._client.stat_object(self.repository, path) - if "x-amz-meta-owner" in stats.metadata: - owner = stats.metadata["x-amz-meta-owner"] - return owner + meta = self._client.stat_object(self.repository, path).metadata + return meta["x-amz-meta-owner"] if "x-amz-meta-owner" in meta else "" def path( self, diff --git a/tests/conftest.py b/tests/conftest.py index d5c10a65..e0e3d2eb 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -26,26 +26,25 @@ def authentication(): """Provide authentication tokens for supported backends.""" if pytest.HOSTS["minio"] == "play.min.io": - defaults = {} - for key in [ - "MINIO_ACCESS_KEY", - "MINIO_SECRET_KEY", - ]: - defaults[key] = os.environ.get(key, None) - + defaults = { + key: os.environ.get(key, None) + for key in ["MINIO_ACCESS_KEY", "MINIO_SECRET_KEY"] + } # MinIO credentials for the public read/write server # at play.min.io, see # https://min.io/docs/minio/linux/developers/python/minio-py.html os.environ["MINIO_ACCESS_KEY"] = "Q3AM3UQ867SPQQA43P2F" os.environ["MINIO_SECRET_KEY"] = "zuf+tfteSlswRu7BJ86wekitnifILbZam1KYY3TG" + else: + defaults = {} - yield + yield - for key, value in defaults.items(): - if value is not None: - os.environ[key] = value - elif key in os.environ: - del os.environ[key] + for key, value in defaults.items(): + if value is not None: + os.environ[key] = value + elif key in os.environ: + del os.environ[key] @pytest.fixture(scope="package", autouse=True) From 3f0a4b08f65f08b694c1c72f850dd21c62f1fbc1 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Tue, 22 Oct 2024 15:20:45 +0200 Subject: [PATCH 40/43] Add exception in _get_file() --- audbackend/core/backend/minio.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/audbackend/core/backend/minio.py b/audbackend/core/backend/minio.py index ebb2ad7e..9afd2aa4 100644 --- a/audbackend/core/backend/minio.py +++ b/audbackend/core/backend/minio.py @@ -288,6 +288,8 @@ def _get_file( dst_fp.write(data) dst_size += n_data pbar.update(n_data) + except Exception as e: + raise RuntimeError(f"Error downloading file: {e}") finally: response.close() response.release_conn() From 601a8250ff3c0fa6afe7b7d69825f89f0bd4a075 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Tue, 22 Oct 2024 15:35:16 +0200 Subject: [PATCH 41/43] Fix coverage --- audbackend/core/backend/minio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/audbackend/core/backend/minio.py b/audbackend/core/backend/minio.py index 9afd2aa4..6c3088af 100644 --- a/audbackend/core/backend/minio.py +++ b/audbackend/core/backend/minio.py @@ -288,7 +288,7 @@ def _get_file( dst_fp.write(data) dst_size += n_data pbar.update(n_data) - except Exception as e: + except Exception as e: # pragma: nocover raise RuntimeError(f"Error downloading file: {e}") finally: response.close() From 60c9479a4d37a2c51185babfb4b8feff38a63cd0 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Tue, 22 Oct 2024 16:10:48 +0200 Subject: [PATCH 42/43] DEBUG: coverage --- audbackend/core/backend/minio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/audbackend/core/backend/minio.py b/audbackend/core/backend/minio.py index 6c3088af..c019d609 100644 --- a/audbackend/core/backend/minio.py +++ b/audbackend/core/backend/minio.py @@ -289,7 +289,7 @@ def _get_file( dst_size += n_data pbar.update(n_data) except Exception as e: # pragma: nocover - raise RuntimeError(f"Error downloading file: {e}") + raise RuntimeError(f"Error downloading file: {e}") # pragma: nocover finally: response.close() response.release_conn() From af109fc3211eb4ab86c00b290c1525c724202824 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Tue, 22 Oct 2024 16:11:53 +0200 Subject: [PATCH 43/43] Try to fix coverage --- audbackend/core/backend/minio.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/audbackend/core/backend/minio.py b/audbackend/core/backend/minio.py index c019d609..18ff74bb 100644 --- a/audbackend/core/backend/minio.py +++ b/audbackend/core/backend/minio.py @@ -288,8 +288,8 @@ def _get_file( dst_fp.write(data) dst_size += n_data pbar.update(n_data) - except Exception as e: # pragma: nocover - raise RuntimeError(f"Error downloading file: {e}") # pragma: nocover + except Exception as e: # pragma: no cover + raise RuntimeError(f"Error downloading file: {e}") finally: response.close() response.release_conn()