diff --git a/audb/core/api.py b/audb/core/api.py index ee11cc0a..94b16176 100644 --- a/audb/core/api.py +++ b/audb/core/api.py @@ -46,6 +46,19 @@ def available( """ # noqa: E501 databases = [] + + def add_database(name: str, version: str, repository: Repository): + """Add database to list of databases.""" + databases.append( + [ + name, + repository.backend, + repository.host, + repository.name, + version, + ] + ) + for repository in config.REPOSITORIES: try: backend_interface = repository.create_backend_interface() @@ -58,32 +71,34 @@ def available( name = p.name try: for version in [str(x).split("/")[-1] for x in p / "db"]: - databases.append( - [ - name, - repository.backend, - repository.host, - repository.name, - version, - ] - ) + add_database(name, version, repository) except FileNotFoundError: # If the `db` folder does not exist, # we do not include the dataset pass + + elif repository.backend in ["minio", "s3"]: + # Avoid `ls(recursive=True)` for S3 and MinIO + # as this is slow for large databases + for obj in backend._client.list_objects(repository.name): + name = obj.object_name + header_file = f"/{name}/{define.HEADER_FILE}" + for _obj in backend._client.list_objects(repository.name, name): + version = _obj.object_name.split("/")[1] + header_file = f"/{name}/{version}/{define.HEADER_FILE}" + if version not in [ + "attachment", + "media", + "meta", + ] and backend.exists(header_file): + add_database(name, version, repository) + else: for path, version in backend_interface.ls("/"): if path.endswith(define.HEADER_FILE): name = path.split("/")[1] - databases.append( - [ - name, - repository.backend, - repository.host, - repository.name, - version, - ] - ) + add_database(name, version, repository) + except audbackend.BackendError: continue diff --git a/tests/conftest.py b/tests/conftest.py index fc7052f9..bd511c15 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -185,11 +185,14 @@ def persistent_repository(tmpdir_factory): @pytest.fixture(scope="module", autouse=False) def private_and_public_repository(): - r"""Private and public repository on Artifactory. + r"""Private and public repositories. Configure the following repositories: + * data-private: repo on public Artifactory without access + * audb-private: repo on public S3 without access * data-public: repo on public Artifactory with anonymous access + * audb-public: repo on public S3 with anonymous access * data-public2: repo on public Artifactory with anonymous access Note, that the order of the repos is important. @@ -197,13 +200,15 @@ def private_and_public_repository(): until it finds the requested database. """ - host = "https://audeering.jfrog.io/artifactory" - backend = "artifactory" current_repositories = audb.config.REPOSITORIES + public_artifactory_host = "https://audeering.jfrog.io/artifactory" + public_s3_host = "s3.dualstack.eu-north-1.amazonaws.com" audb.config.REPOSITORIES = [ - audb.Repository("data-private", host, backend), - audb.Repository("data-public", host, backend), - audb.Repository("data-public2", host, backend), + audb.Repository("data-private", public_artifactory_host, "artifactory"), + audb.Repository("audb-private", public_s3_host, "s3"), + audb.Repository("data-public", public_artifactory_host, "artifactory"), + audb.Repository("audb-public", public_s3_host, "s3"), + audb.Repository("data-public2", public_artifactory_host, "artifactory"), ] yield repository