From 92f9b75147b5f64d749396731a34ff7bd3a09d39 Mon Sep 17 00:00:00 2001 From: David Butenhof Date: Thu, 7 Sep 2023 17:29:40 -0400 Subject: [PATCH 01/21] Quick cache manager PBENCH-1249 On large datasets, our direct tarball extraction method can time out the API call. Unlike on a long intake, there is no persistent artifact so a retry will always time out as well. This applies to any `get_inventory` call, and therefore to the `/inventory`, `/visualize`, and `/compare` APIs; and given the central importance of those APIs for our Server 1.0 story, that's not an acceptable failure mode. This PR mitigates that problem with a "compromise" partial cache manager, leveraging the existing `unpack` method but adding a file lock to manage shared access. The idea is that any consumer of tarball contents (including the indexer) will unpack the entire tarball, but leave a "last reference" timestamp. A periodic timer service will check the cache unpack timestamps, and delete the unpack directories which aren't currently locked and which haven't been referenced for longer than a set time period. __NOTE__: I'm posting a draft mostly for coverage data after a lot of drift in the cache manager unit tests, to determine whether more work is necessary. The "last reference" and reclaim mechanism isn't yet implemented, though that should be the "easy part" now that I've got the server code working. --- .../test/unit/server/test_cache_manager.py | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/lib/pbench/test/unit/server/test_cache_manager.py b/lib/pbench/test/unit/server/test_cache_manager.py index df3e31a45a..6d11d9dc10 100644 --- a/lib/pbench/test/unit/server/test_cache_manager.py +++ b/lib/pbench/test/unit/server/test_cache_manager.py @@ -1,3 +1,4 @@ +from contextlib import contextmanager import errno import fcntl import hashlib @@ -583,6 +584,16 @@ def test_unpack_find_subprocess_exception( cache = Path("/mock/.cache") rmtree_called = True + locks: list[tuple[str, str]] = [] + + @contextmanager + def open(s, m, buffering=-1): + yield None + + def locker(fd, mode): + nonlocal locks + locks.append((fd, mode)) + def mock_rmtree(path: Path, ignore_errors=False): nonlocal rmtree_called rmtree_called = True @@ -624,6 +635,16 @@ def test_unpack_success(self, make_logger, db_session, monkeypatch): cache = Path("/mock/.cache") call = list() + locks: list[tuple[str, str]] = [] + + @contextmanager + def open(s, m, buffering=-1): + yield None + + def locker(fd, mode): + nonlocal locks + locks.append((fd, mode)) + def mock_run(args, **_kwargs): call.append(args[0]) From e5969e62820124e2e8d4f7163997f34de81532f9 Mon Sep 17 00:00:00 2001 From: David Butenhof Date: Fri, 8 Sep 2023 00:58:48 -0400 Subject: [PATCH 02/21] Implement cache reclaim and timer service. --- lib/pbench/server/cache_manager.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/lib/pbench/server/cache_manager.py b/lib/pbench/server/cache_manager.py index 20d4fd2d02..b27d92730b 100644 --- a/lib/pbench/server/cache_manager.py +++ b/lib/pbench/server/cache_manager.py @@ -469,6 +469,16 @@ def __init__(self, path: Path, resource_id: str, controller: "Controller"): # Record the path of the companion MD5 file self.md5_path: Path = path.with_suffix(".xz.md5") + # Record the lockf file path used to control cache access + self.lock: Path = self.cache / "lock" + + # Record a marker file path used to record the last cache access + # timestamp + self.last_ref: Path = self.cache / "last_ref" + + # Record the path of the companion MD5 file + self.md5_path: Path = path.with_suffix(".xz.md5") + # Record the name of the containing controller self.controller_name: str = controller.name @@ -1464,6 +1474,16 @@ def get_inventory(self, dataset_id: str, target: str) -> Optional[JSONOBJECT]: tarball = self.find_dataset(dataset_id) return tarball.get_inventory(target) + def cache_reclaim(self, dataset_id: str): + """Remove the unpacked tarball tree. + + Args: + dataset_id: Dataset resource ID to "uncache" + """ + tarball = self.find_dataset(dataset_id) + tarball.cache_delete() + self._clean_empties(tarball.controller.name) + def delete(self, dataset_id: str): """Delete the dataset as well as unpacked artifacts. From 9189f3a13906c66e1157cd0049f0b2bc384c1ab3 Mon Sep 17 00:00:00 2001 From: David Butenhof Date: Fri, 8 Sep 2023 09:44:52 -0400 Subject: [PATCH 03/21] More tweaks I've verified that the timer service removes sufficiently old cache data and that the data is unpacked again on request. The reclaim operation is audited. I should probably audit the unpack a well, but haven't done that here. I'm still hoping for a successful CI run to check cobertura coverage. --- lib/pbench/cli/server/tree_manage.py | 4 ++++ lib/pbench/test/unit/server/test_cache_manager.py | 2 ++ 2 files changed, 6 insertions(+) diff --git a/lib/pbench/cli/server/tree_manage.py b/lib/pbench/cli/server/tree_manage.py index e70fb93b97..7eb08f4ed1 100644 --- a/lib/pbench/cli/server/tree_manage.py +++ b/lib/pbench/cli/server/tree_manage.py @@ -30,6 +30,10 @@ def reclaim_cache(tree: CacheManager, logger: Logger, lifetime: float = CACHE_LI has_cache = 0 reclaimed = 0 reclaim_failed = 0 + total_count = 0 + has_cache = 0 + reclaimed = 0 + reclaim_failed = 0 for tarball in tree.datasets.values(): total_count += 1 if tarball.unpacked: diff --git a/lib/pbench/test/unit/server/test_cache_manager.py b/lib/pbench/test/unit/server/test_cache_manager.py index 6d11d9dc10..2e98b6989b 100644 --- a/lib/pbench/test/unit/server/test_cache_manager.py +++ b/lib/pbench/test/unit/server/test_cache_manager.py @@ -543,6 +543,8 @@ def __init__(self, path: Path, resource_id: str, controller: Controller): self.isolator = controller.path / resource_id self.lock = self.cache / "lock" self.last_ref = self.cache / "last_ref" + self.lock = self.cache / "lock" + self.last_ref = self.cache / "last_ref" self.unpacked = None self.controller = controller From 1e86cc3b14ac2b2250428aa02d9b369e86b357ca Mon Sep 17 00:00:00 2001 From: David Butenhof Date: Fri, 8 Sep 2023 13:22:44 -0400 Subject: [PATCH 04/21] Audit unpack We probably won't want to audit cache load longer term, but right now it probably makes sense to keep track. --- lib/pbench/server/cache_manager.py | 2 ++ lib/pbench/test/unit/server/test_cache_manager.py | 1 + 2 files changed, 3 insertions(+) diff --git a/lib/pbench/server/cache_manager.py b/lib/pbench/server/cache_manager.py index b27d92730b..6e8244a0ca 100644 --- a/lib/pbench/server/cache_manager.py +++ b/lib/pbench/server/cache_manager.py @@ -13,6 +13,8 @@ from pbench.common import MetadataLog, selinux from pbench.server import JSONOBJECT, OperationCode, PathLike, PbenchServerConfig from pbench.server.database.models.audit import Audit, AuditStatus, AuditType +from pbench.server import JSONOBJECT, OperationCode, PathLike, PbenchServerConfig +from pbench.server.database.models.audit import Audit, AuditStatus, AuditType from pbench.server.database.models.datasets import Dataset from pbench.server.utils import get_tarball_md5 diff --git a/lib/pbench/test/unit/server/test_cache_manager.py b/lib/pbench/test/unit/server/test_cache_manager.py index 2e98b6989b..362ed16b2d 100644 --- a/lib/pbench/test/unit/server/test_cache_manager.py +++ b/lib/pbench/test/unit/server/test_cache_manager.py @@ -13,6 +13,7 @@ import pytest +from pbench.server import JSONOBJECT, OperationCode from pbench.server import JSONOBJECT, OperationCode from pbench.server.cache_manager import ( BadDirpath, From fb3c5c0175722826ee18ef7063c23bee2c435e06 Mon Sep 17 00:00:00 2001 From: David Butenhof Date: Tue, 12 Sep 2023 20:25:43 -0400 Subject: [PATCH 05/21] Encapsulate and package locking Allow holding lock from unpack to stream, and conversion between `EX` and `SH` lock modes. --- lib/pbench/server/cache_manager.py | 24 ++++++++++------- lib/pbench/server/indexing_tarballs.py | 5 ++++ .../test/unit/server/test_cache_manager.py | 26 +++---------------- 3 files changed, 23 insertions(+), 32 deletions(-) diff --git a/lib/pbench/server/cache_manager.py b/lib/pbench/server/cache_manager.py index 6e8244a0ca..981effa6c3 100644 --- a/lib/pbench/server/cache_manager.py +++ b/lib/pbench/server/cache_manager.py @@ -23,6 +23,7 @@ class CacheManagerError(Exception): """Base class for exceptions raised from this module.""" pass + pass class BadDirpath(CacheManagerError): @@ -312,6 +313,7 @@ def __init__( self, stream: IO[bytes], lock: Optional[LockRef] = None, + lock: Optional[LockRef] = None, subproc: Optional[subprocess.Popen] = None, ): """Construct an instance to track extracted inventory @@ -323,6 +325,8 @@ def __init__( stream: the data stream of a specific tarball member lock: a cache lock reference subproc: a Popen object to clean up on close + lock: a cache lock reference + subproc: a Popen object to clean up on close """ self.stream = stream self.lock = lock @@ -331,6 +335,8 @@ def __init__( def close(self): """Close the byte stream and clean up the Popen object""" + exception = None + exception = None if self.subproc: try: @@ -368,6 +374,13 @@ def close(self): if exception: raise exception + # NOTE: if both subprocess cleanup and unlock fail with exceptions, we + # raise the latter, and the former will be ignored. In practice, that's + # not a problem as we only construct an Inventory with a subprocess + # reference for extract, which doesn't need to lock a cache directory. + if exception: + raise exception + def getbuffer(self): """Return the underlying byte buffer (used by send_file)""" return self.stream.getbuffer() @@ -995,6 +1008,7 @@ def get_results(self, lock: LockManager) -> Path: root=audit, status=AuditStatus.FAILURE if error else AuditStatus.SUCCESS, attributes=attributes, + attributes=attributes, ) lock.downgrade() self.last_ref.touch(exist_ok=True) @@ -1476,16 +1490,6 @@ def get_inventory(self, dataset_id: str, target: str) -> Optional[JSONOBJECT]: tarball = self.find_dataset(dataset_id) return tarball.get_inventory(target) - def cache_reclaim(self, dataset_id: str): - """Remove the unpacked tarball tree. - - Args: - dataset_id: Dataset resource ID to "uncache" - """ - tarball = self.find_dataset(dataset_id) - tarball.cache_delete() - self._clean_empties(tarball.controller.name) - def delete(self, dataset_id: str): """Delete the dataset as well as unpacked artifacts. diff --git a/lib/pbench/server/indexing_tarballs.py b/lib/pbench/server/indexing_tarballs.py index 9f33d1c104..26dec8ec84 100644 --- a/lib/pbench/server/indexing_tarballs.py +++ b/lib/pbench/server/indexing_tarballs.py @@ -363,6 +363,7 @@ def sighup_handler(*args): ptb = None tarobj: Optional[Tarball] = None tb_res = error_code["OK"] + lock = None try: # We need the fully unpacked cache tree to index it try: @@ -374,6 +375,8 @@ def sighup_handler(*args): dataset, f"Unable to find dataset: {e!s}", ) + if lock: + lock.release() continue with LockManager(tarobj.lock) as lock: @@ -509,6 +512,8 @@ def sighup_handler(*args): Audit.create( root=audit, status=doneness, attributes=attributes ) + if lock: + lock.release() try: ie_len = ie_filepath.stat().st_size except FileNotFoundError: diff --git a/lib/pbench/test/unit/server/test_cache_manager.py b/lib/pbench/test/unit/server/test_cache_manager.py index 362ed16b2d..8a757226ba 100644 --- a/lib/pbench/test/unit/server/test_cache_manager.py +++ b/lib/pbench/test/unit/server/test_cache_manager.py @@ -1,6 +1,4 @@ -from contextlib import contextmanager import errno -import fcntl import hashlib import io from logging import Logger @@ -505,6 +503,10 @@ def generate_test_result_tree(tmp_path: Path, dir_name: str) -> Path: (sub_dir / "metadata.log").write_text( "[pbench]\nkey = value\n", encoding="utf-8" ) + (sub_dir / "f1.json").write_text("{'json': 'value'}", encoding="utf-8") + (sub_dir / "metadata.log").write_text( + "[pbench]\nkey = value\n", encoding="utf-8" + ) for i in range(1, 4): (sub_dir / "subdir1" / f"subdir1{i}").mkdir(parents=True, exist_ok=True) (sub_dir / "subdir1" / "f11.txt").write_text( @@ -587,16 +589,6 @@ def test_unpack_find_subprocess_exception( cache = Path("/mock/.cache") rmtree_called = True - locks: list[tuple[str, str]] = [] - - @contextmanager - def open(s, m, buffering=-1): - yield None - - def locker(fd, mode): - nonlocal locks - locks.append((fd, mode)) - def mock_rmtree(path: Path, ignore_errors=False): nonlocal rmtree_called rmtree_called = True @@ -638,16 +630,6 @@ def test_unpack_success(self, make_logger, db_session, monkeypatch): cache = Path("/mock/.cache") call = list() - locks: list[tuple[str, str]] = [] - - @contextmanager - def open(s, m, buffering=-1): - yield None - - def locker(fd, mode): - nonlocal locks - locks.append((fd, mode)) - def mock_run(args, **_kwargs): call.append(args[0]) From 0ef687464ac72d168a4954e2f269e40810ec6a1d Mon Sep 17 00:00:00 2001 From: David Butenhof Date: Wed, 13 Sep 2023 16:18:07 -0400 Subject: [PATCH 06/21] Add some test coverage --- lib/pbench/test/unit/server/test_cache_manager.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/pbench/test/unit/server/test_cache_manager.py b/lib/pbench/test/unit/server/test_cache_manager.py index 8a757226ba..9af06cade9 100644 --- a/lib/pbench/test/unit/server/test_cache_manager.py +++ b/lib/pbench/test/unit/server/test_cache_manager.py @@ -1,4 +1,5 @@ import errno +import fcntl import hashlib import io from logging import Logger @@ -1063,6 +1064,7 @@ def fake_results(self, lock: LockRef) -> Path: stream: Inventory = file_info["stream"] assert stream.stream.read() == exp_stream stream.close() + assert create_called is not is_unpacked def test_cm_inventory(self, monkeypatch, server_config, make_logger): """Verify the happy path of the high level get_inventory""" From ee85aac311e2e2abaa03890c1ba97b2646045d70 Mon Sep 17 00:00:00 2001 From: David Butenhof Date: Wed, 27 Sep 2023 16:30:37 -0400 Subject: [PATCH 07/21] Separate basic LockRef from context manager --- lib/pbench/server/cache_manager.py | 1 + lib/pbench/test/unit/server/test_cache_manager.py | 1 + 2 files changed, 2 insertions(+) diff --git a/lib/pbench/server/cache_manager.py b/lib/pbench/server/cache_manager.py index 981effa6c3..8080487429 100644 --- a/lib/pbench/server/cache_manager.py +++ b/lib/pbench/server/cache_manager.py @@ -73,6 +73,7 @@ class MetadataError(CacheManagerError): def __init__(self, tarball: Path, error: Exception): super().__init__( f"A problem occurred processing metadata.log from {tarball}: {str(error)!r}" + f"A problem occurred processing metadata.log from {tarball}: {str(error)!r}" ) self.tarball = tarball self.error = str(error) diff --git a/lib/pbench/test/unit/server/test_cache_manager.py b/lib/pbench/test/unit/server/test_cache_manager.py index 9af06cade9..62c275c86e 100644 --- a/lib/pbench/test/unit/server/test_cache_manager.py +++ b/lib/pbench/test/unit/server/test_cache_manager.py @@ -24,6 +24,7 @@ DuplicateTarball, Inventory, LockManager, + LockManager, LockRef, MetadataError, Tarball, From 3548bd1c0df350963af328fef3f1dca255dc3f5b Mon Sep 17 00:00:00 2001 From: David Butenhof Date: Fri, 29 Sep 2023 09:41:39 -0400 Subject: [PATCH 08/21] Hack to fix functional tests I can't figure out why the default `ubi9` container configuration + EPEL is no longer finding `rsyslog-mmjsonparse`. I've found no relevant hits on searches nor any obvious workaround. For now, try changing `Pipeline.gy` to override the default `BASE_IMAGE` and use `centos:stream9` instead. --- jenkins/Pipeline.gy | 3 +++ 1 file changed, 3 insertions(+) diff --git a/jenkins/Pipeline.gy b/jenkins/Pipeline.gy index 43448e1fa8..7eff6fe7fa 100644 --- a/jenkins/Pipeline.gy +++ b/jenkins/Pipeline.gy @@ -19,6 +19,9 @@ pipeline { // otherwise we set it to the branch name (e.g., `main`). PB_IMAGE_TAG="${env.CHANGE_ID ?: env.BRANCH_NAME}" PB_SERVER_IMAGE_NAME="pbench-server" + // FIXME: ubi9 doesn't seem to have rsyslog-mmjsonparse available, so + // temporarily switch to centos:stream9 + BASE_IMAGE="quay.io/centos/centos:stream9" } stages { stage('Agent Python3.6 Check') { From e06a7c6d9f77518571a032de48739573bf49a5dd Mon Sep 17 00:00:00 2001 From: David Butenhof Date: Fri, 29 Sep 2023 16:43:18 -0400 Subject: [PATCH 09/21] Revert BASE_IMAGE hack... --- jenkins/Pipeline.gy | 3 --- 1 file changed, 3 deletions(-) diff --git a/jenkins/Pipeline.gy b/jenkins/Pipeline.gy index 7eff6fe7fa..43448e1fa8 100644 --- a/jenkins/Pipeline.gy +++ b/jenkins/Pipeline.gy @@ -19,9 +19,6 @@ pipeline { // otherwise we set it to the branch name (e.g., `main`). PB_IMAGE_TAG="${env.CHANGE_ID ?: env.BRANCH_NAME}" PB_SERVER_IMAGE_NAME="pbench-server" - // FIXME: ubi9 doesn't seem to have rsyslog-mmjsonparse available, so - // temporarily switch to centos:stream9 - BASE_IMAGE="quay.io/centos/centos:stream9" } stages { stage('Agent Python3.6 Check') { From 981b6b252b2823b4edd7c3889c2e63941e855a55 Mon Sep 17 00:00:00 2001 From: David Butenhof Date: Mon, 2 Oct 2023 16:51:49 -0400 Subject: [PATCH 10/21] Some refactoring 1. Fix Inventory.close() to always close the stream. 2. Make cache load more transparent by upgrading lock if we need to unpack. --- lib/pbench/server/cache_manager.py | 2 ++ lib/pbench/test/unit/server/test_cache_manager.py | 11 ++++++++++- lib/pbench/test/unit/server/test_indexing_tarballs.py | 2 +- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/lib/pbench/server/cache_manager.py b/lib/pbench/server/cache_manager.py index 8080487429..4a63edd885 100644 --- a/lib/pbench/server/cache_manager.py +++ b/lib/pbench/server/cache_manager.py @@ -356,11 +356,13 @@ def close(self): while self.subproc.stderr.read(4096): pass self.subproc.wait(60.0) + except Exception as e: except Exception as e: # Release our reference to the subprocess.Popen object so that the # object can be reclaimed. self.subproc = None exception = e + exception = e if self.lock: try: self.lock.release() diff --git a/lib/pbench/test/unit/server/test_cache_manager.py b/lib/pbench/test/unit/server/test_cache_manager.py index 62c275c86e..a9ff0b84bc 100644 --- a/lib/pbench/test/unit/server/test_cache_manager.py +++ b/lib/pbench/test/unit/server/test_cache_manager.py @@ -1065,7 +1065,6 @@ def fake_results(self, lock: LockRef) -> Path: stream: Inventory = file_info["stream"] assert stream.stream.read() == exp_stream stream.close() - assert create_called is not is_unpacked def test_cm_inventory(self, monkeypatch, server_config, make_logger): """Verify the happy path of the high level get_inventory""" @@ -1914,14 +1913,22 @@ def test_lockmanager(self, monkeypatch): assert not lock.exclusive assert lock.wait assert FakeLockRef.operations == [("acquire", False, True)] + assert FakeLockRef.operations == [("acquire", False, True)] lock.upgrade() assert FakeLockRef.operations == [("acquire", False, True), ("upgrade")] + assert FakeLockRef.operations == [("acquire", False, True), ("upgrade")] lock.downgrade() assert FakeLockRef.operations == [ ("acquire", False, True), ("upgrade"), ("downgrade"), ] + assert FakeLockRef.operations == [ + assert FakeLockRef.operations == [ + ("acquire", False, True), + ("upgrade"), + ("downgrade"), + ] assert FakeLockRef.operations == [ ("acquire", False, True), ("upgrade"), @@ -1946,6 +1953,8 @@ def test_lockmanager(self, monkeypatch): assert not lock.wait assert FakeLockRef.operations == [("acquire", False, False)] assert FakeLockRef.operations == [("acquire", False, False), ("release")] + assert FakeLockRef.operations == [("acquire", False, False)] + assert FakeLockRef.operations == [("acquire", False, False), ("release")] # keep option FakeLockRef.reset() diff --git a/lib/pbench/test/unit/server/test_indexing_tarballs.py b/lib/pbench/test/unit/server/test_indexing_tarballs.py index 219f9cd0dc..336e81b18c 100644 --- a/lib/pbench/test/unit/server/test_indexing_tarballs.py +++ b/lib/pbench/test/unit/server/test_indexing_tarballs.py @@ -5,7 +5,7 @@ from pathlib import Path from signal import SIGHUP import time -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Union import pytest From 738658b50073c5e6d8d74a76f8a2dd9f5d7be42e Mon Sep 17 00:00:00 2001 From: David Butenhof Date: Thu, 5 Oct 2023 16:43:02 -0400 Subject: [PATCH 11/21] Convert TOC to use cache PBENCH-1192 This has been on my wishlist for a while, but was blocked by not actually having a usable cache. PR #3550 introduces a functioning (if minimal) cache manager, and this PR layers on top of that. Note that, despite #3550 introducing a live cache, this PR represents the first actual use of the in-memory cache map, and some adjustments were necessary to make it work outside of the unit test environment. --- lib/pbench/server/api/__init__.py | 4 +- .../server/api/resources/datasets_contents.py | 134 +++++ .../api/resources/datasets_inventory.py | 3 +- .../query_apis/datasets/datasets_contents.py | 222 -------- lib/pbench/server/cache_manager.py | 212 ++++---- .../test/functional/server/test_datasets.py | 31 +- .../query_apis/test_datasets_contents.py | 507 ------------------ .../test/unit/server/test_cache_manager.py | 187 ++++--- .../unit/server/test_datasets_contents.py | 152 ++++++ 9 files changed, 509 insertions(+), 943 deletions(-) create mode 100644 lib/pbench/server/api/resources/datasets_contents.py delete mode 100644 lib/pbench/server/api/resources/query_apis/datasets/datasets_contents.py delete mode 100644 lib/pbench/test/unit/server/query_apis/test_datasets_contents.py create mode 100644 lib/pbench/test/unit/server/test_datasets_contents.py diff --git a/lib/pbench/server/api/__init__.py b/lib/pbench/server/api/__init__.py index 6b5a3cbb86..75ffc64237 100644 --- a/lib/pbench/server/api/__init__.py +++ b/lib/pbench/server/api/__init__.py @@ -15,15 +15,13 @@ from pbench.server import PbenchServerConfig from pbench.server.api.resources.api_key import APIKeyManage from pbench.server.api.resources.datasets_compare import DatasetsCompare +from pbench.server.api.resources.datasets_contents import DatasetsContents from pbench.server.api.resources.datasets_inventory import DatasetsInventory from pbench.server.api.resources.datasets_list import DatasetsList from pbench.server.api.resources.datasets_metadata import DatasetsMetadata from pbench.server.api.resources.datasets_visualize import DatasetsVisualize from pbench.server.api.resources.endpoint_configure import EndpointConfig from pbench.server.api.resources.query_apis.dataset import Datasets -from pbench.server.api.resources.query_apis.datasets.datasets_contents import ( - DatasetsContents, -) from pbench.server.api.resources.query_apis.datasets.datasets_detail import ( DatasetsDetail, ) diff --git a/lib/pbench/server/api/resources/datasets_contents.py b/lib/pbench/server/api/resources/datasets_contents.py new file mode 100644 index 0000000000..abf8eebe70 --- /dev/null +++ b/lib/pbench/server/api/resources/datasets_contents.py @@ -0,0 +1,134 @@ +from http import HTTPStatus +from pathlib import Path + +from flask import current_app, jsonify +from flask.wrappers import Request, Response + +from pbench.server import OperationCode, PbenchServerConfig +from pbench.server.api.resources import ( + APIAbort, + ApiAuthorizationType, + ApiBase, + ApiContext, + APIInternalError, + ApiMethod, + ApiParams, + ApiSchema, + Parameter, + ParamType, + Schema, +) +from pbench.server.cache_manager import ( + BadDirpath, + CacheExtractBadPath, + CacheManager, + CacheObject, + CacheType, + TarballNotFound, +) +from pbench.server.database.models.datasets import Dataset + + +class DatasetsContents(ApiBase): + """ + API class to retrieve inventory files from a dataset + """ + + def __init__(self, config: PbenchServerConfig): + super().__init__( + config, + ApiSchema( + ApiMethod.GET, + OperationCode.READ, + uri_schema=Schema( + Parameter("dataset", ParamType.DATASET, required=True), + Parameter("target", ParamType.STRING, required=False), + ), + authorization=ApiAuthorizationType.DATASET, + ), + ) + + def _get( + self, params: ApiParams, request: Request, context: ApiContext + ) -> Response: + """ + Returns metadata about the target file path within a tarball. + + Args: + params: includes the uri parameters, which provide the dataset and target. + request: Original incoming Request object + context: API context dictionary + + Raises: + APIAbort, reporting either "NOT_FOUND" or "UNSUPPORTED_MEDIA_TYPE" + + GET /api/v1/datasets/{dataset}/contents/{target} + """ + + dataset: Dataset = params.uri["dataset"] + target = params.uri.get("target") + path = Path("" if target in ("/", None) else target) + + current_app.logger.info( + "{} CONTENTS {!r}: path {!r}", dataset.name, target, str(path) + ) + + cache_m = CacheManager(self.config, current_app.logger) + try: + info = cache_m.get_info(dataset.resource_id, path) + except (BadDirpath, CacheExtractBadPath, TarballNotFound) as e: + raise APIAbort(HTTPStatus.NOT_FOUND, str(e)) + except Exception as e: + raise APIInternalError(str(e)) + + prefix = current_app.server_config.rest_uri + origin = ( + f"{self._get_uri_base(request).host}{prefix}/datasets/{dataset.resource_id}" + ) + + details: CacheObject = info["details"] + if details.type is CacheType.DIRECTORY: + children = info["children"] if "children" in info else {} + dir_list = [] + file_list = [] + + for c, value in children.items(): + p = path / c + d: CacheObject = value["details"] + if d.type == CacheType.DIRECTORY: + dir_list.append( + { + "name": c, + "type": d.type.name, + "uri": f"{origin}/contents/{p}", + } + ) + elif d.type == CacheType.FILE: + file_list.append( + { + "name": c, + "size": d.size, + "type": d.type.name, + "uri": f"{origin}/inventory/{p}", + } + ) + + dir_list.sort(key=lambda d: d["name"]) + file_list.sort(key=lambda d: d["name"]) + val = { + "name": details.name, + "type": details.type.name, + "directories": dir_list, + "files": file_list, + } + else: + val = { + "name": details.name, + "size": details.size, + "type": details.type.name, + } + + try: + return jsonify(val) + except Exception as e: + raise APIInternalError(str(e)) diff --git a/lib/pbench/server/api/resources/datasets_inventory.py b/lib/pbench/server/api/resources/datasets_inventory.py index 260ad6375a..9e79e9e02e 100644 --- a/lib/pbench/server/api/resources/datasets_inventory.py +++ b/lib/pbench/server/api/resources/datasets_inventory.py @@ -1,9 +1,8 @@ from http import HTTPStatus from pathlib import Path -from urllib.request import Request from flask import current_app, send_file -from flask.wrappers import Response +from flask.wrappers import Request, Response from pbench.server import OperationCode, PbenchServerConfig from pbench.server.api.resources import ( diff --git a/lib/pbench/server/api/resources/query_apis/datasets/datasets_contents.py b/lib/pbench/server/api/resources/query_apis/datasets/datasets_contents.py deleted file mode 100644 index c426f1d0bc..0000000000 --- a/lib/pbench/server/api/resources/query_apis/datasets/datasets_contents.py +++ /dev/null @@ -1,222 +0,0 @@ -from http import HTTPStatus - -from flask import current_app - -from pbench.server import OperationCode, PbenchServerConfig -from pbench.server.api.resources import ( - ApiAuthorizationType, - ApiMethod, - ApiParams, - ApiSchema, - JSON, - Parameter, - ParamType, - Schema, -) -from pbench.server.api.resources.query_apis import ApiContext, PostprocessError -from pbench.server.api.resources.query_apis.datasets import IndexMapBase - - -class DatasetsContents(IndexMapBase): - """ - Datasets Contents API returns the list of sub-directories and files - present under a directory. - """ - - MAX_SIZE = 10000 - - def __init__(self, config: PbenchServerConfig): - super().__init__( - config, - ApiSchema( - ApiMethod.GET, - OperationCode.READ, - uri_schema=Schema( - Parameter("dataset", ParamType.DATASET, required=True), - Parameter("target", ParamType.STRING, required=False), - ), - authorization=ApiAuthorizationType.DATASET, - ), - ) - - def assemble(self, params: ApiParams, context: ApiContext) -> JSON: - """ - Construct a pbench Elasticsearch query for getting a list of - documents which contains the user provided parent with files - and its sub-directories with metadata of run-toc index document - that belong to the given run id. - - Args: - params: ApiParams includes the uri parameters, which provide the dataset and target. - context: propagate the dataset and the "target" directory value. - """ - # Copy target directory metadata to CONTEXT for postprocessor - target = "/" + params.uri.get("target", "") - context["target"] = target - - dataset = context["dataset"] - - current_app.logger.info( - "Discover dataset {} Contents, directory {}", - dataset.name, - target, - ) - - # Retrieve the ES indices that belong to this run_id from the metadata - # table - indices = self.get_index(dataset, "run-toc") - - return { - "path": f"/{indices}/_search", - "kwargs": { - "json": { - "size": self.MAX_SIZE, - "query": { - "bool": { - "filter": [ - { - "dis_max": { - "queries": [ - {"term": {"directory": target}}, - {"term": {"parent": target}}, - ] - } - }, - {"term": {"run_data_parent": dataset.resource_id}}, - ], - "must_not": {"regexp": {"directory": f"{target}/[^/]+/.+"}}, - } - }, - } - }, - } - - def postprocess(self, es_json: JSON, context: ApiContext) -> JSON: - """ - Returns a JSON object (keyword/value pairs) whose values are lists of - entries describing individual directories and files. - - Example: These are the contents of es_json parameter. The - contents are the result of a request for directory "/1-default" - - { - "took": 6, - "timed_out": False, - "_shards": {"total": 3, "successful": 3, "skipped": 0, "failed": 0}, - "hits": { - "total": {"value": 2, "relation": "eq"}, - "max_score": 0.0, - "hits": [ - { - "_index": "riya-pbench.v6.run-toc.2021-05", - "_type": "_doc", - "_id": "d4a8cc7c4ecef7vshg4tjhrew174828d", - "_score": 0.0, - "_source": { - "parent": "/", - "directory": "/1-default", - "mtime": "2021-05-01T24:00:00", - "mode": "0o755", - "name": "1-default", - "files": [ - { - "name": "reference-result", - "mtime": "2021-05-01T24:00:00", - "size": 0, - "mode": "0o777", - "type": "sym", - "linkpath": "sample1", - } - ], - "run_data_parent": "ece030bdgfkjasdkf7435e6a7a6be804", - "authorization": {"owner": "1", "access": "private"}, - "@timestamp": "2021-05-01T24:00:00", - }, - }, - { - "_index": "riya-pbench.v6.run-toc.2021-05", - "_type": "_doc", - "_id": "3bba25b62fhdgfajgsfdty6797ed06a", - "_score": 0.0, - "_source": { - "parent": "/1-default", - "directory": "/1-default/sample1", - "mtime": "2021-05-01T24:00:00", - "mode": "0o755", - "name": "sample1", - "ancestor_path_elements": ["1-default"], - "files": [ - { - "name": "result.txt", - "mtime": "2021-05-01T24:00:00", - "size": 0, - "mode": "0o644", - "type": "reg", - }, - { - "name": "user-benchmark.cmd", - "mtime": "2021-05-01T24:00:00", - "size": 114, - "mode": "0o755", - "type": "reg", - }, - ], - "run_data_parent": "ece030bdgfkjasdkf7435e6a7a6be804", - "authorization": {"owner": "1", "access": "private"}, - "@timestamp": "2021-05-01T24:00:00", - }, - }, - ], - }, - } - - Output: - { - "directories": - [ - { - "name": "sample1", - "uri": "https://host/api/v1/datasets/id/contents/1-default/sample1" - } - ], - "files": [ - { - "name": "reference-result", - "mtime": "2021-05-01T24:00:00", - "size": 0, - "mode": "0o777", - "type": "sym", - "linkpath": "sample1", - "uri": "https://host/api/v1/datasets/id/inventory/1-default/reference-result" - } - ] - } - """ - request = context["request"] - resource_id = context["dataset"].resource_id - target = context["target"] - if len(es_json["hits"]["hits"]) == 0: - raise PostprocessError( - HTTPStatus.NOT_FOUND, - f"No directory {target!r} in {resource_id!r} contents.", - ) - - prefix = current_app.server_config.rest_uri - origin = f"{self._get_uri_base(request).host}{prefix}/datasets/{resource_id}" - path = "" if target == "/" else target - - dir_list = [] - file_list = [] - for val in es_json["hits"]["hits"]: - if val["_source"]["directory"] == target: - # Retrieve files list if present else add an empty list. - for f in val["_source"].get("files", []): - f["uri"] = f"{origin}/inventory{path}/{f['name']}" - file_list.append(f) - elif val["_source"]["parent"] == target: - name = val["_source"]["name"] - dir_list.append( - {"name": name, "uri": f"{origin}/contents{path}/{name}"} - ) - - return {"directories": dir_list, "files": file_list} diff --git a/lib/pbench/server/cache_manager.py b/lib/pbench/server/cache_manager.py index 4a63edd885..61bef50ae9 100644 --- a/lib/pbench/server/cache_manager.py +++ b/lib/pbench/server/cache_manager.py @@ -26,6 +26,14 @@ class CacheManagerError(Exception): pass +class CacheMapMissing(CacheManagerError): + """Cache map hasn't been built yet.""" + + def __init__(self, resource_id: str): + super().__init__(f"Dataset {resource_id} hasn't been processed") + self.id = resource_id + + class BadDirpath(CacheManagerError): """A bad directory path was given.""" @@ -126,6 +134,56 @@ class CacheObject: size: Optional[int] type: CacheType + @classmethod + def create(cls, root: Path, path: Path) -> "CacheObject": + """Collects the file info + + Args: + root: root directory of cache + path: path to a file/directory within cache + + Returns: + CacheObject with file/directory info + """ + resolve_path: Optional[Path] = None + resolve_type: Optional[CacheType] = None + size: Optional[int] = None + + if path.is_symlink(): + ftype = CacheType.SYMLINK + link_path = path.readlink() + try: + if link_path.is_absolute(): + raise ValueError("symlink path is absolute") + r_path = path.resolve(strict=True) + resolve_path = r_path.relative_to(root) + except (FileNotFoundError, ValueError): + resolve_path = link_path + resolve_type = CacheType.OTHER + else: + if r_path.is_dir(): + resolve_type = CacheType.DIRECTORY + elif r_path.is_file(): + resolve_type = CacheType.FILE + else: + resolve_type = CacheType.OTHER + elif path.is_file(): + ftype = CacheType.FILE + size = path.stat().st_size + elif path.is_dir(): + ftype = CacheType.DIRECTORY + else: + ftype = CacheType.OTHER + + return cls( + name="" if path == root else path.name, + location=Path("") if path == root else path.relative_to(root), + resolve_path=resolve_path, + resolve_type=resolve_type, + size=size, + type=ftype, + ) + # Type hint definitions for the cache map. # @@ -135,56 +193,6 @@ class CacheObject: CacheMap = dict[str, CacheMapEntry] -def make_cache_object(dir_path: Path, path: Path) -> CacheObject: - """Collects the file info - - Args: - dir_path: root directory parent path - path: path to a file/directory - - Returns: - CacheObject with file/directory info - """ - resolve_path: Optional[Path] = None - resolve_type: Optional[CacheType] = None - size: Optional[int] = None - - if path.is_symlink(): - ftype = CacheType.SYMLINK - link_path = path.readlink() - try: - if link_path.is_absolute(): - raise ValueError("symlink path is absolute") - r_path = path.resolve(strict=True) - resolve_path = r_path.relative_to(dir_path) - except (FileNotFoundError, ValueError): - resolve_path = link_path - resolve_type = CacheType.OTHER - else: - if r_path.is_dir(): - resolve_type = CacheType.DIRECTORY - elif r_path.is_file(): - resolve_type = CacheType.FILE - else: - resolve_type = CacheType.OTHER - elif path.is_file(): - ftype = CacheType.FILE - size = path.stat().st_size - elif path.is_dir(): - ftype = CacheType.DIRECTORY - else: - ftype = CacheType.OTHER - - return CacheObject( - name=path.name, - location=path.relative_to(dir_path), - resolve_path=resolve_path, - resolve_type=resolve_type, - size=size, - type=ftype, - ) - - class LockRef: """Keep track of a cache lock passed off to a caller""" @@ -631,39 +639,34 @@ def create( return cls(destination, resource_id, controller) - def cache_map(self, dir_path: Path): + def build_map(self): """Build hierarchical representation of results tree - NOTE: this structure isn't removed when we release the cache, as the - data remains valid. + This must be called with the cache locked (shared lock is enough) + and unpacked. - Args: - dir_path: root directory + NOTE: this structure isn't removed when we release the cache, as the + data remains valid so long as the dataset exists. """ - root_dir_path = dir_path.parent - cmap: CacheMap = { - dir_path.name: {"details": make_cache_object(root_dir_path, dir_path)} + cmap: CacheMapEntry = { + "details": CacheObject.create(self.unpacked, self.unpacked) } - dir_queue = deque(((dir_path, cmap),)) + dir_queue = deque(((self.unpacked, cmap),)) while dir_queue: - dir_path, parent_map = dir_queue.popleft() - tar_n = dir_path.name - + unpacked, parent_map = dir_queue.popleft() curr: CacheMapEntry = {} - for l_path in dir_path.glob("*"): - tar_info = make_cache_object(root_dir_path, l_path) - curr[l_path.name] = {"details": tar_info} - if l_path.is_symlink(): - continue - if l_path.is_dir(): - dir_queue.append((l_path, curr)) - parent_map[tar_n]["children"] = curr + for path in unpacked.glob("*"): + details = CacheObject.create(self.unpacked, path) + curr[path.name] = {"details": details} + if path.is_dir() and not path.is_symlink(): + dir_queue.append((path, curr[path.name])) + parent_map["children"] = curr self.cachemap = cmap - @staticmethod - def traverse_cmap(path: Path, cachemap: CacheMap) -> CacheMapEntry: - """Locate a path in the cache map + def traverse_cmap(self, path: Path) -> CacheMapEntry: + """Sequentially traverses the cachemap to find the leaf of a + relative path reference Args: path: relative path of the subdirectory/file @@ -673,34 +676,35 @@ def traverse_cmap(path: Path, cachemap: CacheMap) -> CacheMapEntry: BadDirpath if the directory/file path is not valid Returns: - Dictionary with directory/file details or children if present + cache map entry if present """ - file_list = path.parts[:-1] - f_entries = cachemap + if self.cachemap is None: + raise CacheMapMissing(self.resource_id) + + if str(path) in (".", ""): + return self.cachemap + + path_parts = path.parts[:-1] + node: CacheMapEntry = self.cachemap["children"] try: - for file_l in file_list: - info: CacheMapEntry = f_entries[file_l] - if info["details"].type == CacheType.DIRECTORY: - f_entries: CacheMap = info["children"] + for dir in path_parts: + info: CacheMapEntry = node[dir] + if info["details"].type is CacheType.DIRECTORY: + node = info["children"] else: raise BadDirpath( - f"Found a file {file_l!r} where a directory was expected in path {str(path)!r}" + f"Found a file {dir!r} where a directory was expected in path {str(path)!r}" ) - return f_entries[path.name] + return node[path.name] except KeyError as exc: raise BadDirpath( - f"directory {str(path)!r} doesn't have a {exc} file/directory." + f"Can't resolve path {str(path)!r}: component {exc} is missing." ) - def get_info(self, path: Path) -> JSONOBJECT: + def get_info(self, path: Path) -> CacheMapEntry: """Returns the details of the given file/directory in dict format - NOTE: If the cache manager doesn't already have a cache map for the - current Tarball, we'll unpack it here; however as the cache map isn't - dependent on the unpacked results tree, we immediately release the - cache lock. - Args: path: path of the file/subdirectory @@ -732,24 +736,7 @@ def get_info(self, path: Path) -> JSONOBJECT: with LockManager(self.lock) as lock: self.get_results(lock) - c_map = self.traverse_cmap(path, self.cachemap) - children = c_map["children"] if "children" in c_map else {} - fd_info = c_map["details"].__dict__.copy() - - if fd_info["type"] == CacheType.DIRECTORY: - fd_info["directories"] = [] - fd_info["files"] = [] - - for key, value in children.items(): - if value["details"].type == CacheType.DIRECTORY: - fd_info["directories"].append(key) - elif value["details"].type == CacheType.FILE: - fd_info["files"].append(key) - - fd_info["directories"].sort() - fd_info["files"].sort() - - return fd_info + return self.traverse_cmap(path) @staticmethod def extract(tarball_path: Path, path: str) -> Inventory: @@ -1000,7 +987,6 @@ def get_results(self, lock: LockManager) -> Path: find_command, self.cache, TarballModeChangeError, self.cache ) self.unpacked = self.cache / self.name - self.cache_map(self.unpacked) except Exception as e: error = str(e) raise @@ -1014,6 +1000,13 @@ def get_results(self, lock: LockManager) -> Path: attributes=attributes, ) lock.downgrade() + + # Even if we have an unpacked directory, if it wasn't done under this + # CacheManager instance we may not have a cachemap, so be prepared to + # build one. + if not self.cachemap: + self.build_map() + self.last_ref.touch(exist_ok=True) return self.unpacked @@ -1471,8 +1464,7 @@ def get_info(self, dataset_id: str, path: Path) -> dict[str, Any]: File Metadata """ tarball = self.find_dataset(dataset_id) - tmap = tarball.get_info(path) - return tmap + return tarball.get_info(path) def get_inventory(self, dataset_id: str, target: str) -> Optional[JSONOBJECT]: """Return filestream data for a file within a dataset tarball diff --git a/lib/pbench/test/functional/server/test_datasets.py b/lib/pbench/test/functional/server/test_datasets.py index 52ca1c88bb..81a50b29e5 100644 --- a/lib/pbench/test/functional/server/test_datasets.py +++ b/lib/pbench/test/functional/server/test_datasets.py @@ -662,46 +662,41 @@ def test_contents(self, server_client: PbenchServerClient, login_user): the datasets must have gotten through indexing. """ datasets = server_client.get_list( - owner="tester", - metadata=["server.archiveonly"], + owner="tester", metadata=["server.archiveonly"] ) - with_toc = False - without_toc = False + datasets_returned = False for dataset in datasets: + datasets_returned = True response = server_client.get( API.DATASETS_CONTENTS, {"dataset": dataset.resource_id, "target": ""}, raise_error=False, ) - archive = dataset.metadata["server.archiveonly"] - if archive: - assert ( - response.status_code == HTTPStatus.CONFLICT - ), f"Unexpected {response.json()['message']}" - assert response.json()["message"] == "Dataset indexing was disabled" - without_toc = True - continue - - with_toc = True assert ( response.ok ), f"CONTENTS {dataset.name} failed {response.status_code}:{response.json()['message']}" json = response.json() + archive_only = dataset.metadata["server.archiveonly"] + # assert that we have directories and/or files: an empty root # directory is technically possible, but not legal unless it's a # trivial "archiveonly" dataset. NOTE: this will also fail if # either the "directories" or "files" JSON keys are missing. - assert json["directories"] or json["files"] + assert archive_only or json["directories"] or json["files"] # Even if they're empty, both values must be lists assert isinstance(json["directories"], list) assert isinstance(json["files"], list) - # We need at least a metadata.log - assert "metadata.log" in (f["name"] for f in json["files"]) + # We expect to find at least a metadata.log at the top level of the + # tarball unless the dataset is marked archive-only. + assert archive_only or ( + "metadata.log" in (f["name"] for f in json["files"]) + ) + # All files and directories reported must have a valid API URI for d in json["directories"]: uri = server_client._uri( API.DATASETS_CONTENTS, @@ -715,7 +710,7 @@ def test_contents(self, server_client: PbenchServerClient, login_user): {"dataset": dataset.resource_id, "target": f["name"]}, ) assert f["uri"] == uri, f"{f['name']} uri is incorrect: {f['uri']}" - assert with_toc and without_toc, "expected archiveonly and indexed datasets" + assert datasets_returned # We successfully checked at least one dataset @pytest.mark.dependency(name="visualize", depends=["upload"], scope="session") def test_visualize(self, server_client: PbenchServerClient, login_user): diff --git a/lib/pbench/test/unit/server/query_apis/test_datasets_contents.py b/lib/pbench/test/unit/server/query_apis/test_datasets_contents.py deleted file mode 100644 index 670023eb24..0000000000 --- a/lib/pbench/test/unit/server/query_apis/test_datasets_contents.py +++ /dev/null @@ -1,507 +0,0 @@ -from http import HTTPStatus - -import pytest - -from pbench.server.api.resources import ApiMethod -from pbench.server.api.resources.query_apis.datasets.datasets_contents import ( - DatasetsContents, -) -from pbench.server.database.models.datasets import Dataset -from pbench.test.unit.server.query_apis.commons import Commons - - -class TestDatasetsContents(Commons): - """ - Unit testing for DatasetsContents class. - In a web service context, we access class functions mostly via the - Flask test client rather than trying to directly invoke the class - constructor and `get` service. - """ - - @pytest.fixture(autouse=True) - def _setup(self, client): - super()._setup( - cls_obj=DatasetsContents(client.config), - pbench_endpoint="/datasets/random_md5_string1/contents/1-default", - elastic_endpoint="/_search", - index_from_metadata="run-toc", - ) - - api_method = ApiMethod.GET - - def test_with_no_uri_args(self, client, server_config): - """ - Check the DatasetsContents API when no dataset or path is provided - """ - # remove the last two components of the pbench_endpoint - incorrect_endpoint = "/datasets/contents/" - response = client.get(f"{server_config.rest_uri}{incorrect_endpoint}/") - assert response.status_code == HTTPStatus.NOT_FOUND - - def test_with_incorrect_path(self, client, server_config, pbench_drb_token): - """ - Check the Contents API when an incorrect path is provided. - """ - incorrect_endpoint = ( - "/".join(self.pbench_endpoint.split("/")[:-1]) + "/random_md5_string2" - ) - response = client.get( - f"{server_config.rest_uri}{incorrect_endpoint}", - headers={"Authorization": "Bearer " + pbench_drb_token}, - ) - assert response.status_code == HTTPStatus.NOT_FOUND - - def test_query( - self, - server_config, - query_api, - pbench_drb_token, - build_auth_header, - find_template, - provide_metadata, - ): - """ - Check behaviour of Contents API when both sub-directories and - the list of files are present in the given payload. - """ - response_payload = { - "took": 6, - "timed_out": False, - "_shards": {"total": 3, "successful": 3, "skipped": 0, "failed": 0}, - "hits": { - "total": {"value": 2, "relation": "eq"}, - "max_score": 0.0, - "hits": [ - { - "_index": "riya-pbench.v6.run-toc.2021-05", - "_type": "_doc", - "_id": "d4a8cc7c4ecef7vshg4tjhrew174828d", - "_score": 0.0, - "_source": { - "parent": "/", - "directory": "/1-default", - "mtime": "2021-05-01T24:00:00", - "mode": "0o755", - "name": "1-default", - "files": [ - { - "name": "reference-result", - "mtime": "2021-05-01T24:00:00", - "size": 0, - "mode": "0o777", - "type": "sym", - "linkpath": "sample1", - } - ], - "run_data_parent": "ece030bdgfkjasdkf7435e6a7a6be804", - "authorization": {"owner": "1", "access": "private"}, - "@timestamp": "2021-05-01T24:00:00", - }, - }, - { - "_index": "riya-pbench.v6.run-toc.2021-05", - "_type": "_doc", - "_id": "3bba25b62fhdgfajgsfdty6797ed06a", - "_score": 0.0, - "_source": { - "parent": "/1-default", - "directory": "/1-default/sample1", - "mtime": "2021-05-01T24:00:00", - "mode": "0o755", - "name": "sample1", - "ancestor_path_elements": ["1-default"], - "files": [ - { - "name": "result.txt", - "mtime": "2021-05-01T24:00:00", - "size": 0, - "mode": "0o644", - "type": "reg", - }, - { - "name": "user-benchmark.cmd", - "mtime": "2021-05-01T24:00:00", - "size": 114, - "mode": "0o755", - "type": "reg", - }, - ], - "run_data_parent": "ece030bdgfkjasdkf7435e6a7a6be804", - "authorization": {"owner": "1", "access": "private"}, - "@timestamp": "2021-05-01T24:00:00", - }, - }, - ], - }, - } - index = self.build_index_from_metadata() - - # get_expected_status() expects to read username and access from the - # JSON client payload, however this API acquires that information - # from the Dataset. Construct a fake payload corresponding to the - # attach_dataset fixture. - auth_json = {"user": "drb", "access": "private"} - expected_status = self.get_expected_status( - auth_json, build_auth_header["header_param"] - ) - - response = query_api( - self.pbench_endpoint, - self.elastic_endpoint, - payload=None, - expected_index=index, - expected_status=expected_status, - json=response_payload, - status=HTTPStatus.OK, - headers=build_auth_header["header"], - request_method=self.api_method, - ) - if expected_status == HTTPStatus.OK: - res_json = response.json - expected_result = { - "directories": [ - { - "name": "sample1", - "uri": "https://localhost/api/v1/datasets/random_md5_string1/contents/1-default/sample1", - } - ], - "files": [ - { - "name": "reference-result", - "mtime": "2021-05-01T24:00:00", - "size": 0, - "mode": "0o777", - "type": "sym", - "linkpath": "sample1", - "uri": "https://localhost/api/v1/datasets/random_md5_string1/inventory/1-default/reference-result", - } - ], - } - assert expected_result == res_json - - def test_subdirectory_query( - self, - server_config, - query_api, - pbench_drb_token, - build_auth_header, - find_template, - provide_metadata, - ): - """ - Check the API when only sub-directories are present in the - payload and NO files list. - """ - response_payload = { - "took": 7, - "timed_out": False, - "_shards": {"total": 3, "successful": 3, "skipped": 0, "failed": 0}, - "hits": { - "total": {"value": 2, "relation": "eq"}, - "max_score": 0.0, - "hits": [ - { - "_index": "riya-pbench.v6.run-toc.2021-05", - "_type": "_doc", - "_id": "d4a8cc7c4ecef7vshg4tjhrew174828d", - "_score": 0.0, - "_source": { - "parent": "/", - "directory": "/1-default", - "mtime": "2021-05-01T24:00:00", - "mode": "0o755", - "name": "1-default", - "run_data_parent": "ece030bdgfkjasdkf7435e6a7a6be804", - "authorization": {"owner": "1", "access": "private"}, - "@timestamp": "2021-05-01T24:00:00", - }, - }, - { - "_index": "riya-pbench.v6.run-toc.2021-05", - "_type": "_doc", - "_id": "3bba25b62fhdgfajgsfdty6797ed06a", - "_score": 0.0, - "_source": { - "parent": "/1-default", - "directory": "/1-default/sample1", - "mtime": "2021-05-01T24:00:00", - "mode": "0o755", - "name": "sample1", - "ancestor_path_elements": ["1-default"], - "files": [ - { - "name": "result.txt", - "mtime": "2021-05-01T24:00:00", - "size": 0, - "mode": "0o644", - "type": "reg", - }, - { - "name": "user-benchmark.cmd", - "mtime": "2021-05-01T24:00:00", - "size": 114, - "mode": "0o755", - "type": "reg", - }, - ], - "run_data_parent": "ece030bdgfkjasdkf7435e6a7a6be804", - "authorization": {"owner": "1", "access": "private"}, - "@timestamp": "2021-05-01T24:00:00", - }, - }, - ], - }, - } - index = self.build_index_from_metadata() - - # get_expected_status() expects to read username and access from the - # JSON client payload, however this API acquires that information - # from the Dataset. Construct a fake payload corresponding to the - # attach_dataset fixture. - auth_json = {"user": "drb", "access": "private"} - expected_status = self.get_expected_status( - auth_json, build_auth_header["header_param"] - ) - - response = query_api( - self.pbench_endpoint, - self.elastic_endpoint, - payload=None, - expected_index=index, - expected_status=expected_status, - json=response_payload, - status=HTTPStatus.OK, - headers=build_auth_header["header"], - request_method=self.api_method, - ) - if expected_status == HTTPStatus.OK: - res_json = response.json - expected_result = { - "directories": [ - { - "name": "sample1", - "uri": "https://localhost/api/v1/datasets/random_md5_string1/contents/1-default/sample1", - } - ], - "files": [], - } - assert expected_result == res_json - - def test_files_query( - self, - server_config, - query_api, - pbench_drb_token, - build_auth_header, - find_template, - provide_metadata, - ): - """ - Checks the API when only list of files are present in a directory. - """ - response_payload = { - "took": 7, - "timed_out": False, - "_shards": {"total": 3, "successful": 3, "skipped": 0, "failed": 0}, - "hits": { - "total": {"value": 1, "relation": "eq"}, - "max_score": 0.0, - "hits": [ - { - "_index": "riya-pbench.v6.run-toc.2021-05", - "_type": "_doc", - "_id": "9e95ccb385b7a7a2d70ededa07c391da", - "_score": 0.0, - "_source": { - "parent": "/", - "directory": "/1-default", - "mtime": "2021-05-01T24:00:00", - "mode": "0o755", - "files": [ - { - "name": "default.csv", - "mtime": "2021-05-01T24:00:00", - "size": 122, - "mode": "0o644", - "type": "reg", - } - ], - "run_data_parent": "ece030bdgfkjasdkf7435e6a7a6be804", - "authorization": {"owner": "1", "access": "private"}, - "@timestamp": "2021-05-01T24:00:00", - }, - } - ], - }, - } - index = self.build_index_from_metadata() - - # get_expected_status() expects to read username and access from the - # JSON client payload, however this API acquires that information - # from the Dataset. Construct a fake payload corresponding to the - # attach_dataset fixture. - auth_json = {"user": "drb", "access": "private"} - expected_status = self.get_expected_status( - auth_json, build_auth_header["header_param"] - ) - - response = query_api( - self.pbench_endpoint, - self.elastic_endpoint, - payload=None, - expected_index=index, - expected_status=expected_status, - json=response_payload, - status=HTTPStatus.OK, - headers=build_auth_header["header"], - request_method=self.api_method, - ) - if expected_status == HTTPStatus.OK: - res_json = response.json - expected_result = { - "directories": [], - "files": [ - { - "name": "default.csv", - "mtime": "2021-05-01T24:00:00", - "size": 122, - "mode": "0o644", - "type": "reg", - "uri": "https://localhost/api/v1/datasets/random_md5_string1/inventory/1-default/default.csv", - } - ], - } - assert expected_result == res_json - - def test_no_subdirectory_no_files_query( - self, - server_config, - query_api, - pbench_drb_token, - build_auth_header, - find_template, - provide_metadata, - ): - """ - Check the API when no subdirectory or files are present. - """ - response_payload = { - "took": 7, - "timed_out": False, - "_shards": {"total": 3, "successful": 3, "skipped": 0, "failed": 0}, - "hits": { - "total": {"value": 1, "relation": "eq"}, - "max_score": 0.0, - "hits": [ - { - "_index": "riya-pbench.v6.run-toc.2021-05", - "_type": "_doc", - "_id": "9e95ccb385b7a7a2d70ededa07c391da", - "_score": 0.0, - "_source": { - "parent": "/", - "directory": "/1-default", - "mtime": "2021-05-01T24:00:00", - "mode": "0o755", - "run_data_parent": "ece030bdgfkjasdkf7435e6a7a6be804", - "authorization": {"owner": "1", "access": "private"}, - "@timestamp": "2021-05-01T24:00:00", - }, - } - ], - }, - } - index = self.build_index_from_metadata() - - # get_expected_status() expects to read username and access from the - # JSON client payload, however this API acquires that information - # from the Dataset. Construct a fake payload corresponding to the - # attach_dataset fixture. - auth_json = {"user": "drb", "access": "private"} - expected_status = self.get_expected_status( - auth_json, build_auth_header["header_param"] - ) - - response = query_api( - self.pbench_endpoint, - self.elastic_endpoint, - payload=None, - expected_index=index, - expected_status=expected_status, - json=response_payload, - status=HTTPStatus.OK, - headers=build_auth_header["header"], - request_method=self.api_method, - ) - if expected_status == HTTPStatus.OK: - res_json = response.json - expected_result = {"directories": [], "files": []} - assert expected_result == res_json - - def test_empty_query( - self, - server_config, - query_api, - pbench_drb_token, - build_auth_header, - find_template, - provide_metadata, - ): - """ - Check the API when a directory is empty. - """ - response_payload = { - "took": 55, - "timed_out": False, - "_shards": {"total": 3, "successful": 3, "skipped": 0, "failed": 0}, - "hits": { - "total": {"value": 0, "relation": "eq"}, - "max_score": None, - "hits": [], - }, - } - index = self.build_index_from_metadata() - - # get_expected_status() expects to read username and access from the - # JSON client payload, however this API acquires that information - # from the Dataset. Construct a fake payload corresponding to the - # attach_dataset fixture. - auth_json = {"user": "drb", "access": "private"} - expected_status = self.get_expected_status( - auth_json, build_auth_header["header_param"] - ) - - response = query_api( - self.pbench_endpoint, - self.elastic_endpoint, - payload=None, - expected_index=index, - expected_status=expected_status - if expected_status != HTTPStatus.OK - else HTTPStatus.NOT_FOUND, - json=response_payload, - status=HTTPStatus.OK, - headers=build_auth_header["header"], - request_method=self.api_method, - ) - if expected_status == HTTPStatus.NOT_FOUND: - res_json = response.json - expected_result = { - "message": "No directory '/1-default' in 'drb' contents." - } - assert expected_result == res_json - - def test_get_index(self, attach_dataset, provide_metadata): - drb = Dataset.query(name="drb") - indices = self.cls_obj.get_index(drb, self.root_index) - assert indices == "unit-test.v6.run-toc.2020-05" - - @pytest.mark.parametrize("name", ("wrong", "")) - def test_missing_name(self, client, server_config, pbench_drb_token, name): - expected_status = HTTPStatus.NOT_FOUND - incorrect_endpoint = self.pbench_endpoint.rsplit("/", 1)[0] + "/" + name - response = client.get( - incorrect_endpoint, - headers={"Authorization": "Bearer " + pbench_drb_token}, - ) - assert response.status_code == expected_status diff --git a/lib/pbench/test/unit/server/test_cache_manager.py b/lib/pbench/test/unit/server/test_cache_manager.py index a9ff0b84bc..ec97462f28 100644 --- a/lib/pbench/test/unit/server/test_cache_manager.py +++ b/lib/pbench/test/unit/server/test_cache_manager.py @@ -436,7 +436,7 @@ def generate_test_result_tree(tmp_path: Path, dir_name: str) -> Path: Directory Structure /tmp/ - / + / subdir1/ subdir11/ subdir12/ @@ -453,7 +453,7 @@ def generate_test_result_tree(tmp_path: Path, dir_name: str) -> Path: f1415_sym -> ./f1412_sym f1416_sym -> ../../subdir12/f122_sym f11.txt - f12_sym -> ../../.. + f12_sym -> .. f1.json metadata.log @@ -461,7 +461,7 @@ def generate_test_result_tree(tmp_path: Path, dir_name: str) -> Path: Generated cache map { - 'dir_name': { + '': { 'details': , 'children': { 'f1.json': {'details': }, @@ -519,7 +519,7 @@ def generate_test_result_tree(tmp_path: Path, dir_name: str) -> Path: ) (sub_dir / "subdir1" / "subdir14" / "subdir141" / "f1411.txt").touch() sym_file = sub_dir / "subdir1" / "f12_sym" - os.symlink(Path("../../.."), sym_file) + os.symlink(Path(".."), sym_file) sym_file = sub_dir / "subdir1" / "subdir12" / "f121_sym" os.symlink(Path("../..") / "subdir1" / "subdir15", sym_file) sym_file = sub_dir / "subdir1" / "subdir12" / "f122_sym" @@ -551,6 +551,7 @@ def __init__(self, path: Path, resource_id: str, controller: Controller): self.lock = self.cache / "lock" self.last_ref = self.cache / "last_ref" self.unpacked = None + self.cachemap = None self.controller = controller def test_unpack_tar_subprocess_exception( @@ -647,10 +648,12 @@ def mock_resolve(_path, _strict=False): raise AssertionError("Unexpected call to Path.resolve()") with monkeypatch.context() as m: + m.setattr(Audit, "create", lambda **kwargs: None) m.setattr(Path, "mkdir", lambda path, parents=False, exist_ok=False: None) m.setattr(Path, "touch", lambda path, exist_ok=False: None) - m.setattr("pbench.server.cache_manager.subprocess.run", mock_run) m.setattr(Path, "resolve", mock_resolve) + m.setattr(Path, "iterdir", lambda path: []) + m.setattr("pbench.server.cache_manager.subprocess.run", mock_run) m.setattr(Tarball, "__init__", TestCacheManager.MockTarball.__init__) m.setattr(Controller, "__init__", TestCacheManager.MockController.__init__) tb = Tarball( @@ -675,50 +678,51 @@ def test_cache_map_success(self, make_logger, monkeypatch, tmp_path): tar_dir = TestCacheManager.MockController.generate_test_result_tree( tmp_path, "dir_name" ) - tb.cache_map(tar_dir) + tb.unpacked = tar_dir + tb.build_map() - sd1 = tb.cachemap["dir_name"]["children"]["subdir1"] + sd1 = tb.cachemap["children"]["subdir1"] assert sd1["details"].name == "subdir1" sd141 = sd1["children"]["subdir14"]["children"]["subdir141"] - assert sd141["children"]["f1412_sym"]["details"].type == CacheType.SYMLINK + assert sd141["children"]["f1412_sym"]["details"].type is CacheType.SYMLINK @pytest.mark.parametrize( "file_path, expected_msg", [ ( - "/dir_name/subdir1/f11.txt", - "The path '/dir_name/subdir1/f11.txt' is an absolute path, " + "/subdir1/f11.txt", + "The path '/subdir1/f11.txt' is an absolute path, " "we expect relative path to the root directory.", ), ( - "dir_name/subdir1/subdir11/../f11.txt", - "directory 'dir_name/subdir1/subdir11/../f11.txt' doesn't have a '..' file/directory.", + "subdir1/subdir11/../f11.txt", + "Can't resolve path 'subdir1/subdir11/../f11.txt': component '..' is missing.", ), ( - "dir_name/subdir1/subdir14/subdir1", - "directory 'dir_name/subdir1/subdir14/subdir1' doesn't have a 'subdir1' file/directory.", + "subdir1/subdir14/subdir1", + "Can't resolve path 'subdir1/subdir14/subdir1': component 'subdir1' is missing.", ), ( - "dir_name/ne_dir", - "directory 'dir_name/ne_dir' doesn't have a 'ne_dir' file/directory.", + "ne_dir", + "Can't resolve path 'ne_dir': component 'ne_dir' is missing.", ), ( - "dir_name/subdir1/ne_file", - "directory 'dir_name/subdir1/ne_file' doesn't have a 'ne_file' file/directory.", + "subdir1/ne_file", + "Can't resolve path 'subdir1/ne_file': component 'ne_file' is missing.", ), ( - "dir_name/ne_dir/ne_file", - "directory 'dir_name/ne_dir/ne_file' doesn't have a 'ne_dir' file/directory.", + "ne_dir/ne_file", + "Can't resolve path 'ne_dir/ne_file': component 'ne_dir' is missing.", ), ( - "dir_name/subdir1/f11.txt/ne_subdir", - "Found a file 'f11.txt' where a directory was expected in path 'dir_name/subdir1/f11.txt/ne_subdir'", + "subdir1/f11.txt/ne_subdir", + "Found a file 'f11.txt' where a directory was expected in path 'subdir1/f11.txt/ne_subdir'", ), ( - "dir_name/subdir1/subdir14/subdir141/f1412_sym/ne_file", + "subdir1/subdir14/subdir141/f1412_sym/ne_file", "Found a file 'f1412_sym' where a directory was expected " - "in path 'dir_name/subdir1/subdir14/subdir141/f1412_sym/ne_file'", + "in path 'subdir1/subdir14/subdir141/f1412_sym/ne_file'", ), ], ) @@ -738,7 +742,8 @@ def test_cache_map_bad_dir_path( tar_dir = TestCacheManager.MockController.generate_test_result_tree( tmp_path, "dir_name" ) - tb.cache_map(tar_dir) + tb.unpacked = tar_dir + tb.build_map() with pytest.raises(BadDirpath) as exc: tb.get_info(Path(file_path)) assert str(exc.value) == expected_msg @@ -746,10 +751,10 @@ def test_cache_map_bad_dir_path( @pytest.mark.parametrize( "file_path, location, name, resolve_path, resolve_type, size, file_type", [ - ("dir_name", "dir_name", "dir_name", None, None, None, CacheType.DIRECTORY), + ("", "", "", None, None, None, CacheType.DIRECTORY), ( - "dir_name/f1.json", - "dir_name/f1.json", + "f1.json", + "f1.json", "f1.json", None, None, @@ -757,8 +762,8 @@ def test_cache_map_bad_dir_path( CacheType.FILE, ), ( - "dir_name/subdir1", - "dir_name/subdir1", + "subdir1", + "subdir1", "subdir1", None, None, @@ -766,8 +771,8 @@ def test_cache_map_bad_dir_path( CacheType.DIRECTORY, ), ( - "dir_name/subdir1/./f11.txt", - "dir_name/subdir1/f11.txt", + "subdir1/./f11.txt", + "subdir1/f11.txt", "f11.txt", None, None, @@ -775,8 +780,8 @@ def test_cache_map_bad_dir_path( CacheType.FILE, ), ( - "dir_name/subdir1//f11.txt", - "dir_name/subdir1/f11.txt", + "subdir1//f11.txt", + "subdir1/f11.txt", "f11.txt", None, None, @@ -784,8 +789,8 @@ def test_cache_map_bad_dir_path( CacheType.FILE, ), ( - "dir_name/subdir1/f11.txt", - "dir_name/subdir1/f11.txt", + "subdir1/f11.txt", + "subdir1/f11.txt", "f11.txt", None, None, @@ -793,17 +798,17 @@ def test_cache_map_bad_dir_path( CacheType.FILE, ), ( - "dir_name/subdir1/f12_sym", - "dir_name/subdir1/f12_sym", + "subdir1/f12_sym", + "subdir1/f12_sym", "f12_sym", - Path("../../.."), - CacheType.OTHER, + Path("."), + CacheType.DIRECTORY, None, CacheType.SYMLINK, ), ( - "dir_name/subdir1/subdir12/f121_sym", - "dir_name/subdir1/subdir12/f121_sym", + "subdir1/subdir12/f121_sym", + "subdir1/subdir12/f121_sym", "f121_sym", Path("../../subdir1/subdir15"), CacheType.OTHER, @@ -811,8 +816,8 @@ def test_cache_map_bad_dir_path( CacheType.SYMLINK, ), ( - "dir_name/subdir1/subdir12/f122_sym", - "dir_name/subdir1/subdir12/f122_sym", + "subdir1/subdir12/f122_sym", + "subdir1/subdir12/f122_sym", "f122_sym", Path("bad_subdir/nonexistent_file.txt"), CacheType.OTHER, @@ -820,8 +825,8 @@ def test_cache_map_bad_dir_path( CacheType.SYMLINK, ), ( - "dir_name/subdir1/subdir13/f131_sym", - "dir_name/subdir1/subdir13/f131_sym", + "subdir1/subdir13/f131_sym", + "subdir1/subdir13/f131_sym", "f131_sym", Path("/etc/passwd"), CacheType.OTHER, @@ -829,8 +834,8 @@ def test_cache_map_bad_dir_path( CacheType.SYMLINK, ), ( - "dir_name/subdir1/subdir14", - "dir_name/subdir1/subdir14", + "subdir1/subdir14", + "subdir1/subdir14", "subdir14", None, None, @@ -838,8 +843,8 @@ def test_cache_map_bad_dir_path( CacheType.DIRECTORY, ), ( - "dir_name/subdir1/subdir14/subdir141/f1411.txt", - "dir_name/subdir1/subdir14/subdir141/f1411.txt", + "subdir1/subdir14/subdir141/f1411.txt", + "subdir1/subdir14/subdir141/f1411.txt", "f1411.txt", None, None, @@ -847,8 +852,8 @@ def test_cache_map_bad_dir_path( CacheType.FILE, ), ( - "dir_name/subdir1/subdir14/subdir141/f1412_sym", - "dir_name/subdir1/subdir14/subdir141/f1412_sym", + "subdir1/subdir14/subdir141/f1412_sym", + "subdir1/subdir14/subdir141/f1412_sym", "f1412_sym", Path("/mock_absolute_path/subdir1/f11.txt"), CacheType.OTHER, @@ -856,35 +861,35 @@ def test_cache_map_bad_dir_path( CacheType.SYMLINK, ), ( - "dir_name/subdir1/subdir14/subdir141/f1413_sym", - "dir_name/subdir1/subdir14/subdir141/f1413_sym", + "subdir1/subdir14/subdir141/f1413_sym", + "subdir1/subdir14/subdir141/f1413_sym", "f1413_sym", - Path("dir_name/subdir1/subdir14/subdir141"), + Path("subdir1/subdir14/subdir141"), CacheType.DIRECTORY, None, CacheType.SYMLINK, ), ( - "dir_name/subdir1/subdir14/subdir141/f1414_sym", - "dir_name/subdir1/subdir14/subdir141/f1414_sym", + "subdir1/subdir14/subdir141/f1414_sym", + "subdir1/subdir14/subdir141/f1414_sym", "f1414_sym", - Path("dir_name/subdir1/subdir14/subdir141/f1411.txt"), + Path("subdir1/subdir14/subdir141/f1411.txt"), CacheType.FILE, None, CacheType.SYMLINK, ), ( - "dir_name/subdir1/subdir14/subdir141/f1415_sym", - "dir_name/subdir1/subdir14/subdir141/f1415_sym", + "subdir1/subdir14/subdir141/f1415_sym", + "subdir1/subdir14/subdir141/f1415_sym", "f1415_sym", - Path("dir_name/subdir1/f11.txt"), + Path("subdir1/f11.txt"), CacheType.FILE, None, CacheType.SYMLINK, ), ( - "dir_name/subdir1/subdir14/subdir141/f1416_sym", - "dir_name/subdir1/subdir14/subdir141/f1416_sym", + "subdir1/subdir14/subdir141/f1416_sym", + "subdir1/subdir14/subdir141/f1416_sym", "f1416_sym", Path("../../subdir12/f122_sym"), CacheType.OTHER, @@ -919,7 +924,8 @@ def test_cache_map_traverse_cmap( tar_dir = TestCacheManager.MockController.generate_test_result_tree( tmp_path, "dir_name" ) - tb.cache_map(tar_dir) + tb.unpacked = tar_dir + tb.build_map() # Since the result tree is dynamically generated by the test at runtime, # the parametrization for resolve_path cannot provide the correct value @@ -930,21 +936,30 @@ def test_cache_map_traverse_cmap( resolve_path = tar_dir / str(resolve_path).removeprefix(abs_pref) # test traverse with random path - c_map = Tarball.traverse_cmap(Path(file_path), tb.cachemap) + c_map = tb.traverse_cmap(Path(file_path)) + if file_type is CacheType.DIRECTORY: + assert sorted(c_map.keys()) == [ + "children", + "details", + ], "Directory should have children and details" + else: + assert sorted(c_map.keys()) == [ + "details" + ], "Non-directory should have only details" assert c_map["details"].location == Path(location) assert c_map["details"].name == name assert c_map["details"].resolve_path == resolve_path assert c_map["details"].resolve_type == resolve_type assert c_map["details"].size == size - assert c_map["details"].type == file_type + assert c_map["details"].type is file_type @pytest.mark.parametrize( "file_path, expected_msg", [ ( - "dir_name/subdir1/f11.txt", + "subdir1/f11.txt", { - "location": Path("dir_name/subdir1/f11.txt"), + "location": Path("subdir1/f11.txt"), "name": "f11.txt", "resolve_path": None, "resolve_type": None, @@ -953,11 +968,17 @@ def test_cache_map_traverse_cmap( }, ), ( - "dir_name/subdir1", + "subdir1", { - "directories": ["subdir11", "subdir12", "subdir13", "subdir14"], - "files": ["f11.txt"], - "location": Path("dir_name/subdir1"), + "children": [ + "f11.txt", + "f12_sym", + "subdir11", + "subdir12", + "subdir13", + "subdir14", + ], + "location": Path("subdir1"), "name": "subdir1", "resolve_path": None, "resolve_type": None, @@ -966,11 +987,10 @@ def test_cache_map_traverse_cmap( }, ), ( - "dir_name/subdir1/subdir11", + "subdir1/subdir11", { - "directories": [], - "files": [], - "location": Path("dir_name/subdir1/subdir11"), + "children": [], + "location": Path("subdir1/subdir11"), "name": "subdir11", "resolve_path": None, "resolve_type": None, @@ -979,11 +999,11 @@ def test_cache_map_traverse_cmap( }, ), ( - "dir_name/subdir1/subdir14/subdir141/f1413_sym", + "subdir1/subdir14/subdir141/f1413_sym", { - "location": Path("dir_name/subdir1/subdir14/subdir141/f1413_sym"), + "location": Path("subdir1/subdir14/subdir141/f1413_sym"), "name": "f1413_sym", - "resolve_path": Path("dir_name/subdir1/subdir14/subdir141"), + "resolve_path": Path("subdir1/subdir14/subdir141"), "resolve_type": CacheType.DIRECTORY, "size": None, "type": CacheType.SYMLINK, @@ -1007,11 +1027,16 @@ def test_cache_map_get_info_cmap( tar_dir = TestCacheManager.MockController.generate_test_result_tree( tmp_path, "dir_name" ) - tb.cache_map(tar_dir) + tb.unpacked = tar_dir + tb.build_map() # test get_info with random path file_info = tb.get_info(Path(file_path)) - assert file_info == expected_msg + for k in expected_msg.keys(): + if k == "children": + assert sorted(file_info[k].keys()) == expected_msg[k] + else: + assert getattr(file_info["details"], k) == expected_msg[k] @pytest.mark.parametrize( "file_path,is_unpacked,exp_stream", diff --git a/lib/pbench/test/unit/server/test_datasets_contents.py b/lib/pbench/test/unit/server/test_datasets_contents.py new file mode 100644 index 0000000000..49bcb4f6c9 --- /dev/null +++ b/lib/pbench/test/unit/server/test_datasets_contents.py @@ -0,0 +1,152 @@ +from http import HTTPStatus +from pathlib import Path +from typing import Optional + +import pytest +import requests + +from pbench.server.cache_manager import BadDirpath, CacheManager, CacheObject, CacheType +from pbench.server.database.models.datasets import Dataset, DatasetNotFound + + +class TestDatasetsAccess: + @pytest.fixture() + def query_get_as(self, client, server_config, more_datasets, pbench_drb_token): + """ + Helper fixture to perform the API query and validate an expected + return status. + + Args: + client: Flask test API client fixture + server_config: Pbench config fixture + more_datasets: Dataset construction fixture + pbench_drb_token: Authenticated user token fixture + """ + + def query_api( + dataset: str, target: str, expected_status: HTTPStatus + ) -> requests.Response: + try: + dataset_id = Dataset.query(name=dataset).resource_id + except DatasetNotFound: + dataset_id = dataset # Allow passing deliberately bad value + headers = {"authorization": f"bearer {pbench_drb_token}"} + response = client.get( + f"{server_config.rest_uri}/datasets/{dataset_id}/contents/{target}", + headers=headers, + ) + assert ( + response.status_code == expected_status + ), f"Unexpected failure '{response.json}'" + return response + + return query_api + + def test_get_no_dataset(self, query_get_as): + response = query_get_as( + "nonexistent-dataset", "metadata.log", HTTPStatus.NOT_FOUND + ) + assert response.json == {"message": "Dataset 'nonexistent-dataset' not found"} + + def test_dataset_not_present(self, query_get_as): + response = query_get_as("fio_2", "metadata.log", HTTPStatus.NOT_FOUND) + assert response.json == { + "message": "The dataset tarball named 'random_md5_string4' is not found" + } + + def test_unauthorized_access(self, query_get_as): + response = query_get_as("test", "metadata.log", HTTPStatus.FORBIDDEN) + assert response.json == { + "message": "User drb is not authorized to READ a resource owned by test with private access" + } + + @pytest.mark.parametrize("key", (None, "", "subdir1")) + def test_path_is_directory(self, query_get_as, monkeypatch, key): + base = Path("/mock/cache/ABC") + + def mock_get_info(_s, _d: str, path: Optional[Path]): + file = base / (path if path else "") + return { + "children": {}, + "details": CacheObject( + file.name, file, None, None, None, CacheType.DIRECTORY + ), + } + + monkeypatch.setattr(CacheManager, "get_info", mock_get_info) + monkeypatch.setattr(Path, "is_file", lambda self: False) + monkeypatch.setattr(Path, "exists", lambda self: True) + + response = query_get_as("fio_2", key if key else "", HTTPStatus.OK) + assert response.json == { + "directories": [], + "files": [], + "name": key if key else base.name, + "type": "DIRECTORY", + } + + def test_not_a_file(self, query_get_as, monkeypatch): + def mock_get_info(_s, _d: str, path: Optional[Path]): + raise BadDirpath("Nobody home") + + monkeypatch.setattr(CacheManager, "get_info", mock_get_info) + monkeypatch.setattr(Path, "is_file", lambda self: False) + monkeypatch.setattr(Path, "exists", lambda self: False) + + response = query_get_as("fio_2", "subdir1/f1_sym", HTTPStatus.NOT_FOUND) + assert response.json == {"message": "Nobody home"} + + def test_file_info(self, query_get_as, monkeypatch): + name = "f1.json" + base = Path("/mock/cache/ABC") + + def mock_get_info(_s, _d: str, path: Optional[Path]): + file = base / (path if path else "") + return { + "details": CacheObject(file.name, file, None, None, 16, CacheType.FILE) + } + + monkeypatch.setattr(CacheManager, "get_info", mock_get_info) + response = query_get_as("fio_2", name, HTTPStatus.OK) + assert response.status_code == HTTPStatus.OK + assert response.json == {"name": name, "size": 16, "type": "FILE"} + + def test_dir_info(self, query_get_as, monkeypatch): + name = "sample1" + base = Path("/mock/cache/ABC") + + def mock_get_info(_s, _d: str, path: Optional[Path]): + file = base / (path if path else "") + return { + "children": { + "default": { + "details": CacheObject( + "default", + base / name / "default", + None, + None, + None, + CacheType.DIRECTORY, + ) + } + }, + "details": CacheObject( + file.name, file, None, None, 16, CacheType.DIRECTORY + ), + } + + monkeypatch.setattr(CacheManager, "get_info", mock_get_info) + response = query_get_as("fio_2", "sample1", HTTPStatus.OK) + assert response.status_code == HTTPStatus.OK + assert response.json == { + "directories": [ + { + "name": "default", + "type": "DIRECTORY", + "uri": "https://localhost/api/v1/datasets/random_md5_string4/contents/sample1/default", + } + ], + "files": [], + "name": "sample1", + "type": "DIRECTORY", + } From c41301925738f67513b9d9648a16dc9cf998f9fc Mon Sep 17 00:00:00 2001 From: David Butenhof Date: Fri, 6 Oct 2023 09:48:00 -0400 Subject: [PATCH 12/21] Some refactoring, test coverage & cleanup --- .../server/api/resources/datasets_contents.py | 6 +- lib/pbench/server/cache_manager.py | 65 ++------- .../test/unit/server/test_cache_manager.py | 125 +++++------------- .../unit/server/test_datasets_contents.py | 16 +-- 4 files changed, 57 insertions(+), 155 deletions(-) diff --git a/lib/pbench/server/api/resources/datasets_contents.py b/lib/pbench/server/api/resources/datasets_contents.py index abf8eebe70..c3503a0ccb 100644 --- a/lib/pbench/server/api/resources/datasets_contents.py +++ b/lib/pbench/server/api/resources/datasets_contents.py @@ -69,13 +69,9 @@ def _get( target = params.uri.get("target") path = Path("" if target in ("/", None) else target) - current_app.logger.info( - "{} CONTENTS {!r}: path {!r}", dataset.name, target, str(path) - ) - cache_m = CacheManager(self.config, current_app.logger) try: - info = cache_m.get_info(dataset.resource_id, path) + info = cache_m.find_entry(dataset.resource_id, path) except (BadDirpath, CacheExtractBadPath, TarballNotFound) as e: raise APIAbort(HTTPStatus.NOT_FOUND, str(e)) except Exception as e: diff --git a/lib/pbench/server/cache_manager.py b/lib/pbench/server/cache_manager.py index 61bef50ae9..722d8606ff 100644 --- a/lib/pbench/server/cache_manager.py +++ b/lib/pbench/server/cache_manager.py @@ -26,14 +26,6 @@ class CacheManagerError(Exception): pass -class CacheMapMissing(CacheManagerError): - """Cache map hasn't been built yet.""" - - def __init__(self, resource_id: str): - super().__init__(f"Dataset {resource_id} hasn't been processed") - self.id = resource_id - - class BadDirpath(CacheManagerError): """A bad directory path was given.""" @@ -664,13 +656,11 @@ def build_map(self): self.cachemap = cmap - def traverse_cmap(self, path: Path) -> CacheMapEntry: - """Sequentially traverses the cachemap to find the leaf of a - relative path reference + def find_entry(self, path: Path) -> CacheMapEntry: + """Locate a node in the cache map Args: path: relative path of the subdirectory/file - cachemap: dictionary mapping of the root directory Raises: BadDirpath if the directory/file path is not valid @@ -678,8 +668,15 @@ def traverse_cmap(self, path: Path) -> CacheMapEntry: Returns: cache map entry if present """ - if self.cachemap is None: - raise CacheMapMissing(self.resource_id) + if str(path).startswith("/"): + raise BadDirpath( + f"The path {str(path)!r} is an absolute path," + " we expect relative path to the root directory." + ) + + if not self.cachemap: + with LockManager(self.lock) as lock: + self.get_results(lock) if str(path) in (".", ""): return self.cachemap @@ -702,42 +699,6 @@ def traverse_cmap(self, path: Path) -> CacheMapEntry: f"Can't resolve path {str(path)!r}: component {exc} is missing." ) - def get_info(self, path: Path) -> CacheMapEntry: - """Returns the details of the given file/directory in dict format - - Args: - path: path of the file/subdirectory - - Raises: - BadDirpath on bad directory path - - Returns: - Dictionary with Details of the file/directory - - format: - { - "directories": list of subdirectories under the given directory - "files": list of files under the given directory - "location": relative path to the given file/directory - "name": name of the file/directory - "resolve_path": resolved path of the file/directory if given path is a symlink - "resolve_type": CacheType describing the type of the symlink target - "size": size of the file - "type": CacheType describing the type of the file/directory - } - """ - if str(path).startswith("/"): - raise BadDirpath( - f"The path {str(path)!r} is an absolute path," - " we expect relative path to the root directory." - ) - - if not self.cachemap: - with LockManager(self.lock) as lock: - self.get_results(lock) - - return self.traverse_cmap(path) - @staticmethod def extract(tarball_path: Path, path: str) -> Inventory: """Returns a file stream for a file within a tarball @@ -1453,7 +1414,7 @@ def create(self, tarfile_path: Path) -> Tarball: self.datasets[tarball.resource_id] = tarball return tarball - def get_info(self, dataset_id: str, path: Path) -> dict[str, Any]: + def find_entry(self, dataset_id: str, path: Path) -> dict[str, Any]: """Get information about dataset files from the cache map Args: @@ -1464,7 +1425,7 @@ def get_info(self, dataset_id: str, path: Path) -> dict[str, Any]: File Metadata """ tarball = self.find_dataset(dataset_id) - return tarball.get_info(path) + return tarball.find_entry(path) def get_inventory(self, dataset_id: str, target: str) -> Optional[JSONOBJECT]: """Return filestream data for a file within a dataset tarball diff --git a/lib/pbench/test/unit/server/test_cache_manager.py b/lib/pbench/test/unit/server/test_cache_manager.py index ec97462f28..f55d4777bb 100644 --- a/lib/pbench/test/unit/server/test_cache_manager.py +++ b/lib/pbench/test/unit/server/test_cache_manager.py @@ -726,7 +726,7 @@ def test_cache_map_success(self, make_logger, monkeypatch, tmp_path): ), ], ) - def test_cache_map_bad_dir_path( + def test_find_bad_path( self, make_logger, monkeypatch, tmp_path, file_path, expected_msg ): """Test to check bad directory or file path""" @@ -745,9 +745,39 @@ def test_cache_map_bad_dir_path( tb.unpacked = tar_dir tb.build_map() with pytest.raises(BadDirpath) as exc: - tb.get_info(Path(file_path)) + tb.find_entry(Path(file_path)) assert str(exc.value) == expected_msg + def test_find_builds(self, monkeypatch, tmp_path, make_logger): + """Test that find can dynamically build a cache map""" + + called = False + entered = False + tb: Optional[Tarball] = None + + def fake_get_results(_l): + nonlocal called + called = True + tb.cachemap = {} + + def fake_enter(_s): + nonlocal entered + entered = True + + tar = Path("/mock/dir_name.tar.xz") + cache = Path("/mock/.cache") + with monkeypatch.context() as m: + m.setattr(Tarball, "__init__", TestCacheManager.MockTarball.__init__) + m.setattr(Controller, "__init__", TestCacheManager.MockController.__init__) + m.setattr(LockManager, "__enter__", fake_enter) + m.setattr(Path, "open", lambda _p, _m: None) + tb = Tarball( + tar, "ABC", Controller(Path("/mock/archive"), cache, make_logger) + ) + m.setattr(tb, "get_results", fake_get_results) + assert tb.find_entry(Path(".")) == {} + assert called and entered + @pytest.mark.parametrize( "file_path, location, name, resolve_path, resolve_type, size, file_type", [ @@ -898,7 +928,7 @@ def test_cache_map_bad_dir_path( ), ], ) - def test_cache_map_traverse_cmap( + def test_find_entry( self, make_logger, monkeypatch, @@ -936,7 +966,7 @@ def test_cache_map_traverse_cmap( resolve_path = tar_dir / str(resolve_path).removeprefix(abs_pref) # test traverse with random path - c_map = tb.traverse_cmap(Path(file_path)) + c_map = tb.find_entry(Path(file_path)) if file_type is CacheType.DIRECTORY: assert sorted(c_map.keys()) == [ "children", @@ -949,95 +979,10 @@ def test_cache_map_traverse_cmap( assert c_map["details"].location == Path(location) assert c_map["details"].name == name assert c_map["details"].resolve_path == resolve_path - assert c_map["details"].resolve_type == resolve_type + assert c_map["details"].resolve_type is resolve_type assert c_map["details"].size == size assert c_map["details"].type is file_type - @pytest.mark.parametrize( - "file_path, expected_msg", - [ - ( - "subdir1/f11.txt", - { - "location": Path("subdir1/f11.txt"), - "name": "f11.txt", - "resolve_path": None, - "resolve_type": None, - "size": 14, - "type": CacheType.FILE, - }, - ), - ( - "subdir1", - { - "children": [ - "f11.txt", - "f12_sym", - "subdir11", - "subdir12", - "subdir13", - "subdir14", - ], - "location": Path("subdir1"), - "name": "subdir1", - "resolve_path": None, - "resolve_type": None, - "size": None, - "type": CacheType.DIRECTORY, - }, - ), - ( - "subdir1/subdir11", - { - "children": [], - "location": Path("subdir1/subdir11"), - "name": "subdir11", - "resolve_path": None, - "resolve_type": None, - "size": None, - "type": CacheType.DIRECTORY, - }, - ), - ( - "subdir1/subdir14/subdir141/f1413_sym", - { - "location": Path("subdir1/subdir14/subdir141/f1413_sym"), - "name": "f1413_sym", - "resolve_path": Path("subdir1/subdir14/subdir141"), - "resolve_type": CacheType.DIRECTORY, - "size": None, - "type": CacheType.SYMLINK, - }, - ), - ], - ) - def test_cache_map_get_info_cmap( - self, make_logger, monkeypatch, tmp_path, file_path, expected_msg - ): - """Test to check if the info returned by the cachemap is correct""" - tar = Path("/mock/dir_name.tar.xz") - cache = Path("/mock/.cache") - - with monkeypatch.context() as m: - m.setattr(Tarball, "__init__", TestCacheManager.MockTarball.__init__) - m.setattr(Controller, "__init__", TestCacheManager.MockController.__init__) - tb = Tarball( - tar, "ABC", Controller(Path("/mock/archive"), cache, make_logger) - ) - tar_dir = TestCacheManager.MockController.generate_test_result_tree( - tmp_path, "dir_name" - ) - tb.unpacked = tar_dir - tb.build_map() - - # test get_info with random path - file_info = tb.get_info(Path(file_path)) - for k in expected_msg.keys(): - if k == "children": - assert sorted(file_info[k].keys()) == expected_msg[k] - else: - assert getattr(file_info["details"], k) == expected_msg[k] - @pytest.mark.parametrize( "file_path,is_unpacked,exp_stream", [ diff --git a/lib/pbench/test/unit/server/test_datasets_contents.py b/lib/pbench/test/unit/server/test_datasets_contents.py index 49bcb4f6c9..d1981dcbae 100644 --- a/lib/pbench/test/unit/server/test_datasets_contents.py +++ b/lib/pbench/test/unit/server/test_datasets_contents.py @@ -64,7 +64,7 @@ def test_unauthorized_access(self, query_get_as): def test_path_is_directory(self, query_get_as, monkeypatch, key): base = Path("/mock/cache/ABC") - def mock_get_info(_s, _d: str, path: Optional[Path]): + def mock_find_entry(_s, _d: str, path: Optional[Path]): file = base / (path if path else "") return { "children": {}, @@ -73,7 +73,7 @@ def mock_get_info(_s, _d: str, path: Optional[Path]): ), } - monkeypatch.setattr(CacheManager, "get_info", mock_get_info) + monkeypatch.setattr(CacheManager, "find_entry", mock_find_entry) monkeypatch.setattr(Path, "is_file", lambda self: False) monkeypatch.setattr(Path, "exists", lambda self: True) @@ -86,10 +86,10 @@ def mock_get_info(_s, _d: str, path: Optional[Path]): } def test_not_a_file(self, query_get_as, monkeypatch): - def mock_get_info(_s, _d: str, path: Optional[Path]): + def mock_find_entry(_s, _d: str, path: Optional[Path]): raise BadDirpath("Nobody home") - monkeypatch.setattr(CacheManager, "get_info", mock_get_info) + monkeypatch.setattr(CacheManager, "find_entry", mock_find_entry) monkeypatch.setattr(Path, "is_file", lambda self: False) monkeypatch.setattr(Path, "exists", lambda self: False) @@ -100,13 +100,13 @@ def test_file_info(self, query_get_as, monkeypatch): name = "f1.json" base = Path("/mock/cache/ABC") - def mock_get_info(_s, _d: str, path: Optional[Path]): + def mock_find_entry(_s, _d: str, path: Optional[Path]): file = base / (path if path else "") return { "details": CacheObject(file.name, file, None, None, 16, CacheType.FILE) } - monkeypatch.setattr(CacheManager, "get_info", mock_get_info) + monkeypatch.setattr(CacheManager, "find_entry", mock_find_entry) response = query_get_as("fio_2", name, HTTPStatus.OK) assert response.status_code == HTTPStatus.OK assert response.json == {"name": name, "size": 16, "type": "FILE"} @@ -115,7 +115,7 @@ def test_dir_info(self, query_get_as, monkeypatch): name = "sample1" base = Path("/mock/cache/ABC") - def mock_get_info(_s, _d: str, path: Optional[Path]): + def mock_find_entry(_s, _d: str, path: Optional[Path]): file = base / (path if path else "") return { "children": { @@ -135,7 +135,7 @@ def mock_get_info(_s, _d: str, path: Optional[Path]): ), } - monkeypatch.setattr(CacheManager, "get_info", mock_get_info) + monkeypatch.setattr(CacheManager, "find_entry", mock_find_entry) response = query_get_as("fio_2", "sample1", HTTPStatus.OK) assert response.status_code == HTTPStatus.OK assert response.json == { From 7eb02d243ec0d36614c477bd184fe64ef3cbd6b1 Mon Sep 17 00:00:00 2001 From: David Butenhof Date: Fri, 6 Oct 2023 12:23:38 -0400 Subject: [PATCH 13/21] More cleanup and testing --- .../server/api/resources/datasets_contents.py | 17 +++++- .../test/functional/server/test_datasets.py | 3 + .../unit/server/test_datasets_contents.py | 56 +++++++++++++------ 3 files changed, 56 insertions(+), 20 deletions(-) diff --git a/lib/pbench/server/api/resources/datasets_contents.py b/lib/pbench/server/api/resources/datasets_contents.py index c3503a0ccb..8c7af31201 100644 --- a/lib/pbench/server/api/resources/datasets_contents.py +++ b/lib/pbench/server/api/resources/datasets_contents.py @@ -83,20 +83,25 @@ def _get( ) details: CacheObject = info["details"] + current_app.logger.info( + "{!r} -> {{name: {!r}, loc: {!r}}}", + str(path), + details.name, + str(details.location), + ) if details.type is CacheType.DIRECTORY: children = info["children"] if "children" in info else {} dir_list = [] file_list = [] for c, value in children.items(): - p = path / c d: CacheObject = value["details"] if d.type == CacheType.DIRECTORY: dir_list.append( { "name": c, "type": d.type.name, - "uri": f"{origin}/contents/{p}", + "uri": f"{origin}/contents/{d.location}", } ) elif d.type == CacheType.FILE: @@ -105,23 +110,29 @@ def _get( "name": c, "size": d.size, "type": d.type.name, - "uri": f"{origin}/inventory/{p}", + "uri": f"{origin}/inventory/{d.location}", } ) dir_list.sort(key=lambda d: d["name"]) file_list.sort(key=lambda d: d["name"]) + + # Normalize because we want the "root" directory to be reported as + # "" rather than as Path's favored "." + loc = "" if str(details.location) == "." else str(details.location) val = { "name": details.name, "type": details.type.name, "directories": dir_list, "files": file_list, + "uri": f"{origin}/contents/{loc}", } else: val = { "name": details.name, "size": details.size, "type": details.type.name, + "uri": f"{origin}/inventory/{details.location}", } try: diff --git a/lib/pbench/test/functional/server/test_datasets.py b/lib/pbench/test/functional/server/test_datasets.py index 81a50b29e5..c5f701a970 100644 --- a/lib/pbench/test/functional/server/test_datasets.py +++ b/lib/pbench/test/functional/server/test_datasets.py @@ -689,6 +689,9 @@ def test_contents(self, server_client: PbenchServerClient, login_user): # Even if they're empty, both values must be lists assert isinstance(json["directories"], list) assert isinstance(json["files"], list) + assert json["name"] == "" + assert json["type"] == "DIRECTORY" + assert json["uri"] == response.url # We expect to find at least a metadata.log at the top level of the # tarball unless the dataset is marked archive-only. diff --git a/lib/pbench/test/unit/server/test_datasets_contents.py b/lib/pbench/test/unit/server/test_datasets_contents.py index d1981dcbae..32dfdb163a 100644 --- a/lib/pbench/test/unit/server/test_datasets_contents.py +++ b/lib/pbench/test/unit/server/test_datasets_contents.py @@ -62,14 +62,17 @@ def test_unauthorized_access(self, query_get_as): @pytest.mark.parametrize("key", (None, "", "subdir1")) def test_path_is_directory(self, query_get_as, monkeypatch, key): - base = Path("/mock/cache/ABC") - def mock_find_entry(_s, _d: str, path: Optional[Path]): - file = base / (path if path else "") + file = path if path else Path("") return { "children": {}, "details": CacheObject( - file.name, file, None, None, None, CacheType.DIRECTORY + file.name if key else "", + file, + None, + None, + None, + CacheType.DIRECTORY, ), } @@ -81,8 +84,9 @@ def mock_find_entry(_s, _d: str, path: Optional[Path]): assert response.json == { "directories": [], "files": [], - "name": key if key else base.name, + "name": key if key else "", "type": "DIRECTORY", + "uri": f"https://localhost/api/v1/datasets/random_md5_string4/contents/{key if key else ''}", } def test_not_a_file(self, query_get_as, monkeypatch): @@ -98,40 +102,50 @@ def mock_find_entry(_s, _d: str, path: Optional[Path]): def test_file_info(self, query_get_as, monkeypatch): name = "f1.json" - base = Path("/mock/cache/ABC") def mock_find_entry(_s, _d: str, path: Optional[Path]): - file = base / (path if path else "") return { - "details": CacheObject(file.name, file, None, None, 16, CacheType.FILE) + "details": CacheObject(path.name, path, None, None, 16, CacheType.FILE) } monkeypatch.setattr(CacheManager, "find_entry", mock_find_entry) response = query_get_as("fio_2", name, HTTPStatus.OK) assert response.status_code == HTTPStatus.OK - assert response.json == {"name": name, "size": 16, "type": "FILE"} + assert response.json == { + "name": name, + "size": 16, + "type": "FILE", + "uri": f"https://localhost/api/v1/datasets/random_md5_string4/inventory/{name}", + } def test_dir_info(self, query_get_as, monkeypatch): - name = "sample1" - base = Path("/mock/cache/ABC") - def mock_find_entry(_s, _d: str, path: Optional[Path]): - file = base / (path if path else "") + base = path if path else Path("") return { "children": { "default": { "details": CacheObject( "default", - base / name / "default", + base / "default", None, None, None, CacheType.DIRECTORY, ) - } + }, + "file.txt": { + "details": CacheObject( + "file.txt", + base / "file.txt", + None, + None, + 42, + CacheType.FILE, + ) + }, }, "details": CacheObject( - file.name, file, None, None, 16, CacheType.DIRECTORY + base.name, base, None, None, None, CacheType.DIRECTORY ), } @@ -146,7 +160,15 @@ def mock_find_entry(_s, _d: str, path: Optional[Path]): "uri": "https://localhost/api/v1/datasets/random_md5_string4/contents/sample1/default", } ], - "files": [], + "files": [ + { + "name": "file.txt", + "size": 42, + "type": "FILE", + "uri": "https://localhost/api/v1/datasets/random_md5_string4/inventory/sample1/file.txt", + } + ], "name": "sample1", "type": "DIRECTORY", + "uri": "https://localhost/api/v1/datasets/random_md5_string4/contents/sample1", } From c0dd74a390f9d04a7311d44770f9b3644fd8746a Mon Sep 17 00:00:00 2001 From: David Butenhof Date: Fri, 6 Oct 2023 15:41:13 -0400 Subject: [PATCH 14/21] Add SYMLINK "contents" support & testing --- .../server/api/resources/datasets_contents.py | 39 +++++++--- .../unit/server/test_datasets_contents.py | 71 ++++++++++++++++++- 2 files changed, 99 insertions(+), 11 deletions(-) diff --git a/lib/pbench/server/api/resources/datasets_contents.py b/lib/pbench/server/api/resources/datasets_contents.py index 8c7af31201..472c97e807 100644 --- a/lib/pbench/server/api/resources/datasets_contents.py +++ b/lib/pbench/server/api/resources/datasets_contents.py @@ -83,12 +83,6 @@ def _get( ) details: CacheObject = info["details"] - current_app.logger.info( - "{!r} -> {{name: {!r}, loc: {!r}}}", - str(path), - details.name, - str(details.location), - ) if details.type is CacheType.DIRECTORY: children = info["children"] if "children" in info else {} dir_list = [] @@ -96,7 +90,7 @@ def _get( for c, value in children.items(): d: CacheObject = value["details"] - if d.type == CacheType.DIRECTORY: + if d.type is CacheType.DIRECTORY: dir_list.append( { "name": c, @@ -104,7 +98,7 @@ def _get( "uri": f"{origin}/contents/{d.location}", } ) - elif d.type == CacheType.FILE: + elif d.type in (CacheType.FILE, CacheType.OTHER): file_list.append( { "name": c, @@ -113,6 +107,20 @@ def _get( "uri": f"{origin}/inventory/{d.location}", } ) + elif d.type is CacheType.SYMLINK: + if d.resolve_type is CacheType.DIRECTORY: + access = "contents" + else: + access = "inventory" + file_list.append( + { + "name": c, + "type": d.type.name, + "link": str(d.resolve_path), + "link_type": d.resolve_type.name, + "uri": f"{origin}/{access}/{d.resolve_path}", + } + ) dir_list.sort(key=lambda d: d["name"]) file_list.sort(key=lambda d: d["name"]) @@ -128,12 +136,23 @@ def _get( "uri": f"{origin}/contents/{loc}", } else: + access = "inventory" + if details.type is CacheType.SYMLINK: + link = str(details.resolve_path) + if details.resolve_type is CacheType.DIRECTORY: + access = "contents" + else: + link = str(details.location) val = { "name": details.name, - "size": details.size, "type": details.type.name, - "uri": f"{origin}/inventory/{details.location}", + "uri": f"{origin}/{access}/{link}", } + if details.type is CacheType.SYMLINK: + val["link"] = str(details.resolve_path) + val["link_type"] = details.resolve_type.name + elif details.type is CacheType.FILE: + val["size"] = details.size try: return jsonify(val) diff --git a/lib/pbench/test/unit/server/test_datasets_contents.py b/lib/pbench/test/unit/server/test_datasets_contents.py index 32dfdb163a..45d059cf40 100644 --- a/lib/pbench/test/unit/server/test_datasets_contents.py +++ b/lib/pbench/test/unit/server/test_datasets_contents.py @@ -118,6 +118,25 @@ def mock_find_entry(_s, _d: str, path: Optional[Path]): "uri": f"https://localhost/api/v1/datasets/random_md5_string4/inventory/{name}", } + def test_link_info(self, query_get_as, monkeypatch): + name = "test/slink" + + def mock_find_entry(_s, _d: str, path: Optional[Path]): + return { + "details": CacheObject(path.name, path, Path("test1"), CacheType.DIRECTORY, None, CacheType.SYMLINK) + } + + monkeypatch.setattr(CacheManager, "find_entry", mock_find_entry) + response = query_get_as("fio_2", name, HTTPStatus.OK) + assert response.status_code == HTTPStatus.OK + assert response.json == { + "name": "slink", + "type": "SYMLINK", + "link": "test1", + "link_type": "DIRECTORY", + "uri": "https://localhost/api/v1/datasets/random_md5_string4/contents/test1", + } + def test_dir_info(self, query_get_as, monkeypatch): def mock_find_entry(_s, _d: str, path: Optional[Path]): base = path if path else Path("") @@ -143,6 +162,36 @@ def mock_find_entry(_s, _d: str, path: Optional[Path]): CacheType.FILE, ) }, + "symfile": { + "details": CacheObject( + "symfile", + base / "symfile", + Path("metadata.log"), + CacheType.FILE, + None, + CacheType.SYMLINK, + ) + }, + "symdir": { + "details": CacheObject( + "symdir", + base / "symdir", + Path("realdir"), + CacheType.DIRECTORY, + None, + CacheType.SYMLINK, + ) + }, + "mount": { + "details": CacheObject( + "mount", + base / "mount", + None, + None, + 20, + CacheType.OTHER, + ) + }, }, "details": CacheObject( base.name, base, None, None, None, CacheType.DIRECTORY @@ -166,7 +215,27 @@ def mock_find_entry(_s, _d: str, path: Optional[Path]): "size": 42, "type": "FILE", "uri": "https://localhost/api/v1/datasets/random_md5_string4/inventory/sample1/file.txt", - } + }, + { + "name": "mount", + "size": 20, + "type": "OTHER", + "uri": "https://localhost/api/v1/datasets/random_md5_string4/inventory/sample1/mount", + }, + { + "name": "symdir", + "type": "SYMLINK", + "link": "realdir", + "link_type": "DIRECTORY", + "uri": "https://localhost/api/v1/datasets/random_md5_string4/contents/realdir", + }, + { + "name": "symfile", + "type": "SYMLINK", + "link": "metadata.log", + "link_type": "FILE", + "uri": "https://localhost/api/v1/datasets/random_md5_string4/inventory/metadata.log", + }, ], "name": "sample1", "type": "DIRECTORY", From 93d399d998aa6529654d021336f5e2ee00ca0b78 Mon Sep 17 00:00:00 2001 From: David Butenhof Date: Fri, 6 Oct 2023 15:53:22 -0400 Subject: [PATCH 15/21] Dang, I scratched the black paint ... --- lib/pbench/test/unit/server/test_datasets_contents.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/pbench/test/unit/server/test_datasets_contents.py b/lib/pbench/test/unit/server/test_datasets_contents.py index 45d059cf40..71deba059f 100644 --- a/lib/pbench/test/unit/server/test_datasets_contents.py +++ b/lib/pbench/test/unit/server/test_datasets_contents.py @@ -123,7 +123,14 @@ def test_link_info(self, query_get_as, monkeypatch): def mock_find_entry(_s, _d: str, path: Optional[Path]): return { - "details": CacheObject(path.name, path, Path("test1"), CacheType.DIRECTORY, None, CacheType.SYMLINK) + "details": CacheObject( + path.name, + path, + Path("test1"), + CacheType.DIRECTORY, + None, + CacheType.SYMLINK, + ) } monkeypatch.setattr(CacheManager, "find_entry", mock_find_entry) From 0a6380830bb854b5d7c8d4d3418e46a372b372a5 Mon Sep 17 00:00:00 2001 From: David Butenhof Date: Mon, 9 Oct 2023 07:44:09 -0400 Subject: [PATCH 16/21] small mock cleanup --- lib/pbench/test/unit/server/test_cache_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/pbench/test/unit/server/test_cache_manager.py b/lib/pbench/test/unit/server/test_cache_manager.py index f55d4777bb..685acc9c53 100644 --- a/lib/pbench/test/unit/server/test_cache_manager.py +++ b/lib/pbench/test/unit/server/test_cache_manager.py @@ -652,7 +652,7 @@ def mock_resolve(_path, _strict=False): m.setattr(Path, "mkdir", lambda path, parents=False, exist_ok=False: None) m.setattr(Path, "touch", lambda path, exist_ok=False: None) m.setattr(Path, "resolve", mock_resolve) - m.setattr(Path, "iterdir", lambda path: []) + m.setattr(Path, "glob", lambda path, p: []) m.setattr("pbench.server.cache_manager.subprocess.run", mock_run) m.setattr(Tarball, "__init__", TestCacheManager.MockTarball.__init__) m.setattr(Controller, "__init__", TestCacheManager.MockController.__init__) From a3d671344a65b40f16cf2c9274da15f9dcb27092 Mon Sep 17 00:00:00 2001 From: David Butenhof Date: Wed, 11 Oct 2023 17:15:00 -0400 Subject: [PATCH 17/21] Clean up symlink handling (Plus messy merge with cache manager.) --- .../server/api/resources/datasets_contents.py | 2 +- lib/pbench/server/cache_manager.py | 58 ++++++------ .../test/functional/server/test_datasets.py | 2 +- .../test/unit/server/test_cache_manager.py | 89 +++++++++---------- .../unit/server/test_indexing_tarballs.py | 12 +-- 5 files changed, 84 insertions(+), 79 deletions(-) diff --git a/lib/pbench/server/api/resources/datasets_contents.py b/lib/pbench/server/api/resources/datasets_contents.py index 472c97e807..5ba337c315 100644 --- a/lib/pbench/server/api/resources/datasets_contents.py +++ b/lib/pbench/server/api/resources/datasets_contents.py @@ -67,7 +67,7 @@ def _get( dataset: Dataset = params.uri["dataset"] target = params.uri.get("target") - path = Path("" if target in ("/", None) else target) + path = Path("." if target in ("/", None) else target) cache_m = CacheManager(self.config, current_app.logger) try: diff --git a/lib/pbench/server/cache_manager.py b/lib/pbench/server/cache_manager.py index 722d8606ff..b55bf674ce 100644 --- a/lib/pbench/server/cache_manager.py +++ b/lib/pbench/server/cache_manager.py @@ -13,8 +13,6 @@ from pbench.common import MetadataLog, selinux from pbench.server import JSONOBJECT, OperationCode, PathLike, PbenchServerConfig from pbench.server.database.models.audit import Audit, AuditStatus, AuditType -from pbench.server import JSONOBJECT, OperationCode, PathLike, PbenchServerConfig -from pbench.server.database.models.audit import Audit, AuditStatus, AuditType from pbench.server.database.models.datasets import Dataset from pbench.server.utils import get_tarball_md5 @@ -73,7 +71,6 @@ class MetadataError(CacheManagerError): def __init__(self, tarball: Path, error: Exception): super().__init__( f"A problem occurred processing metadata.log from {tarball}: {str(error)!r}" - f"A problem occurred processing metadata.log from {tarball}: {str(error)!r}" ) self.tarball = tarball self.error = str(error) @@ -100,11 +97,23 @@ def __init__(self, tarball: Path, error: str): class CacheType(Enum): - FILE = auto() + + """A directory""" + DIRECTORY = auto() - SYMLINK = auto() + + """A regular file""" + FILE = auto() + + """An invalid symlink (absolute or outside tarball)""" + BROKEN = auto() + + """A device or mount point or other unsupported inode type""" OTHER = auto() + """A relative symbolic link within the tarball""" + SYMLINK = auto() + @dataclass class CacheObject: @@ -144,21 +153,23 @@ def create(cls, root: Path, path: Path) -> "CacheObject": if path.is_symlink(): ftype = CacheType.SYMLINK link_path = path.readlink() - try: - if link_path.is_absolute(): - raise ValueError("symlink path is absolute") - r_path = path.resolve(strict=True) - resolve_path = r_path.relative_to(root) - except (FileNotFoundError, ValueError): + if link_path.is_absolute(): resolve_path = link_path - resolve_type = CacheType.OTHER + resolve_type = CacheType.BROKEN else: - if r_path.is_dir(): - resolve_type = CacheType.DIRECTORY - elif r_path.is_file(): - resolve_type = CacheType.FILE + try: + r_path = path.resolve(strict=True) + resolve_path = r_path.relative_to(root) + except (FileNotFoundError, ValueError): + resolve_path = link_path + resolve_type = CacheType.BROKEN else: - resolve_type = CacheType.OTHER + if r_path.is_dir(): + resolve_type = CacheType.DIRECTORY + elif r_path.is_file(): + resolve_type = CacheType.FILE + else: + resolve_type = CacheType.OTHER elif path.is_file(): ftype = CacheType.FILE size = path.stat().st_size @@ -169,7 +180,7 @@ def create(cls, root: Path, path: Path) -> "CacheObject": return cls( name="" if path == root else path.name, - location=Path("") if path == root else path.relative_to(root), + location=path.relative_to(root), resolve_path=resolve_path, resolve_type=resolve_type, size=size, @@ -314,7 +325,6 @@ def __init__( self, stream: IO[bytes], lock: Optional[LockRef] = None, - lock: Optional[LockRef] = None, subproc: Optional[subprocess.Popen] = None, ): """Construct an instance to track extracted inventory @@ -326,8 +336,6 @@ def __init__( stream: the data stream of a specific tarball member lock: a cache lock reference subproc: a Popen object to clean up on close - lock: a cache lock reference - subproc: a Popen object to clean up on close """ self.stream = stream self.lock = lock @@ -356,7 +364,6 @@ def close(self): while self.subproc.stderr.read(4096): pass self.subproc.wait(60.0) - except Exception as e: except Exception as e: # Release our reference to the subprocess.Popen object so that the # object can be reclaimed. @@ -470,7 +477,7 @@ def __init__(self, path: Path, resource_id: str, controller: "Controller"): self.cache.mkdir(parents=True, exist_ok=True) # Record hierarchy of a Tar ball - self.cachemap: Optional[CacheMap] = None + self.cachemap: Optional[CacheMapEntry] = None # Record the base of the unpacked files for cache management, which # is (self.cache / self.name) and will be None when the cache is @@ -646,7 +653,7 @@ def build_map(self): dir_queue = deque(((self.unpacked, cmap),)) while dir_queue: unpacked, parent_map = dir_queue.popleft() - curr: CacheMapEntry = {} + curr: CacheMap = {} for path in unpacked.glob("*"): details = CacheObject.create(self.unpacked, path) curr[path.name] = {"details": details} @@ -682,7 +689,7 @@ def find_entry(self, path: Path) -> CacheMapEntry: return self.cachemap path_parts = path.parts[:-1] - node: CacheMapEntry = self.cachemap["children"] + node: CacheMap = self.cachemap["children"] try: for dir in path_parts: @@ -958,7 +965,6 @@ def get_results(self, lock: LockManager) -> Path: root=audit, status=AuditStatus.FAILURE if error else AuditStatus.SUCCESS, attributes=attributes, - attributes=attributes, ) lock.downgrade() diff --git a/lib/pbench/test/functional/server/test_datasets.py b/lib/pbench/test/functional/server/test_datasets.py index c5f701a970..9ff88d23fa 100644 --- a/lib/pbench/test/functional/server/test_datasets.py +++ b/lib/pbench/test/functional/server/test_datasets.py @@ -654,7 +654,7 @@ def test_list_filter_typed(self, server_client: PbenchServerClient, login_user): class TestInventory: """Validate APIs involving tarball inventory""" - @pytest.mark.dependency(name="contents", depends=["index"], scope="session") + @pytest.mark.dependency(name="contents", depends=["upload"], scope="session") def test_contents(self, server_client: PbenchServerClient, login_user): """Check that we can retrieve the root directory TOC diff --git a/lib/pbench/test/unit/server/test_cache_manager.py b/lib/pbench/test/unit/server/test_cache_manager.py index 685acc9c53..331b219183 100644 --- a/lib/pbench/test/unit/server/test_cache_manager.py +++ b/lib/pbench/test/unit/server/test_cache_manager.py @@ -12,7 +12,6 @@ import pytest -from pbench.server import JSONOBJECT, OperationCode from pbench.server import JSONOBJECT, OperationCode from pbench.server.cache_manager import ( BadDirpath, @@ -24,7 +23,6 @@ DuplicateTarball, Inventory, LockManager, - LockManager, LockRef, MetadataError, Tarball, @@ -453,7 +451,7 @@ def generate_test_result_tree(tmp_path: Path, dir_name: str) -> Path: f1415_sym -> ./f1412_sym f1416_sym -> ../../subdir12/f122_sym f11.txt - f12_sym -> .. + f12_sym -> ../../.. f1.json metadata.log @@ -518,24 +516,38 @@ def generate_test_result_tree(tmp_path: Path, dir_name: str) -> Path: parents=True, exist_ok=True ) (sub_dir / "subdir1" / "subdir14" / "subdir141" / "f1411.txt").touch() - sym_file = sub_dir / "subdir1" / "f12_sym" - os.symlink(Path(".."), sym_file) - sym_file = sub_dir / "subdir1" / "subdir12" / "f121_sym" - os.symlink(Path("../..") / "subdir1" / "subdir15", sym_file) - sym_file = sub_dir / "subdir1" / "subdir12" / "f122_sym" - os.symlink(Path(".") / "bad_subdir" / "nonexistent_file.txt", sym_file) - sym_file = sub_dir / "subdir1" / "subdir13" / "f131_sym" - os.symlink(Path("/etc") / "passwd", sym_file) - sym_file = sub_dir / "subdir1" / "subdir14" / "subdir141" / "f1412_sym" - os.symlink(sub_dir / "subdir1" / "f11.txt", sym_file) - sym_file = sub_dir / "subdir1" / "subdir14" / "subdir141" / "f1413_sym" - os.symlink(Path("..") / "subdir141", sym_file) - sym_file = sub_dir / "subdir1" / "subdir14" / "subdir141" / "f1414_sym" - os.symlink(Path(".") / "f1411.txt", sym_file) - sym_file = sub_dir / "subdir1" / "subdir14" / "subdir141" / "f1415_sym" - os.symlink(Path(".") / "f1412_sym", sym_file) - sym_file = sub_dir / "subdir1" / "subdir14" / "subdir141" / "f1416_sym" - os.symlink(Path("../..") / "subdir12" / "f122_sym", sym_file) + os.symlink(Path("../../.."), sub_dir / "subdir1" / "f12_sym") + os.symlink( + Path("../..") / "subdir1" / "subdir15", + sub_dir / "subdir1" / "subdir12" / "f121_sym", + ) + os.symlink( + Path(".") / "bad_subdir" / "nonexistent_file.txt", + sub_dir / "subdir1" / "subdir12" / "f122_sym", + ) + os.symlink( + Path("/etc/passwd"), sub_dir / "subdir1" / "subdir13" / "f131_sym" + ) + os.symlink( + sub_dir / "subdir1" / "f11.txt", + sub_dir / "subdir1" / "subdir14" / "subdir141" / "f1412_sym", + ) + os.symlink( + Path("..") / "subdir141", + sub_dir / "subdir1" / "subdir14" / "subdir141" / "f1413_sym", + ) + os.symlink( + Path(".") / "f1411.txt", + sub_dir / "subdir1" / "subdir14" / "subdir141" / "f1414_sym", + ) + os.symlink( + Path(".") / "f1412_sym", + sub_dir / "subdir1" / "subdir14" / "subdir141" / "f1415_sym", + ) + os.symlink( + Path("../..") / "subdir12" / "f122_sym", + sub_dir / "subdir1" / "subdir14" / "subdir141" / "f1416_sym", + ) return sub_dir @@ -642,7 +654,7 @@ def mock_run(args, **_kwargs): args, returncode=0, stdout="Successfully Unpacked!", stderr=None ) - def mock_resolve(_path, _strict=False): + def mock_resolve(_path, strict: bool = False): """In this scenario, there are no symlinks, so resolve() should never be called.""" raise AssertionError("Unexpected call to Path.resolve()") @@ -831,8 +843,8 @@ def fake_enter(_s): "subdir1/f12_sym", "subdir1/f12_sym", "f12_sym", - Path("."), - CacheType.DIRECTORY, + Path("../../.."), + CacheType.BROKEN, None, CacheType.SYMLINK, ), @@ -841,7 +853,7 @@ def fake_enter(_s): "subdir1/subdir12/f121_sym", "f121_sym", Path("../../subdir1/subdir15"), - CacheType.OTHER, + CacheType.BROKEN, None, CacheType.SYMLINK, ), @@ -850,7 +862,7 @@ def fake_enter(_s): "subdir1/subdir12/f122_sym", "f122_sym", Path("bad_subdir/nonexistent_file.txt"), - CacheType.OTHER, + CacheType.BROKEN, None, CacheType.SYMLINK, ), @@ -859,7 +871,7 @@ def fake_enter(_s): "subdir1/subdir13/f131_sym", "f131_sym", Path("/etc/passwd"), - CacheType.OTHER, + CacheType.BROKEN, None, CacheType.SYMLINK, ), @@ -886,7 +898,7 @@ def fake_enter(_s): "subdir1/subdir14/subdir141/f1412_sym", "f1412_sym", Path("/mock_absolute_path/subdir1/f11.txt"), - CacheType.OTHER, + CacheType.BROKEN, None, CacheType.SYMLINK, ), @@ -922,7 +934,7 @@ def fake_enter(_s): "subdir1/subdir14/subdir141/f1416_sym", "f1416_sym", Path("../../subdir12/f122_sym"), - CacheType.OTHER, + CacheType.BROKEN, None, CacheType.SYMLINK, ), @@ -967,15 +979,8 @@ def test_find_entry( # test traverse with random path c_map = tb.find_entry(Path(file_path)) - if file_type is CacheType.DIRECTORY: - assert sorted(c_map.keys()) == [ - "children", - "details", - ], "Directory should have children and details" - else: - assert sorted(c_map.keys()) == [ - "details" - ], "Non-directory should have only details" + assert "details" in c_map + assert (file_type is not CacheType.DIRECTORY) ^ ("children" in c_map) assert c_map["details"].location == Path(location) assert c_map["details"].name == name assert c_map["details"].resolve_path == resolve_path @@ -1893,12 +1898,6 @@ def test_lockmanager(self, monkeypatch): ("upgrade"), ("downgrade"), ] - assert FakeLockRef.operations == [ - assert FakeLockRef.operations == [ - ("acquire", False, True), - ("upgrade"), - ("downgrade"), - ] assert FakeLockRef.operations == [ ("acquire", False, True), ("upgrade"), @@ -1923,8 +1922,6 @@ def test_lockmanager(self, monkeypatch): assert not lock.wait assert FakeLockRef.operations == [("acquire", False, False)] assert FakeLockRef.operations == [("acquire", False, False), ("release")] - assert FakeLockRef.operations == [("acquire", False, False)] - assert FakeLockRef.operations == [("acquire", False, False), ("release")] # keep option FakeLockRef.reset() diff --git a/lib/pbench/test/unit/server/test_indexing_tarballs.py b/lib/pbench/test/unit/server/test_indexing_tarballs.py index 336e81b18c..adc46a50dd 100644 --- a/lib/pbench/test/unit/server/test_indexing_tarballs.py +++ b/lib/pbench/test/unit/server/test_indexing_tarballs.py @@ -5,7 +5,7 @@ from pathlib import Path from signal import SIGHUP import time -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, List, Optional import pytest @@ -292,13 +292,15 @@ def __exit__(self, *exc): def upgrade(self): """Upgrade a shared lock to exclusive""" - if not self.exclusive: - self.exclusive = True + self.exclusive = True def downgrade(self): """Downgrade an exclusive lock to shared""" - if self.exclusive: - self.exclusive = False + self.exclusive = False + + def release(self): + """Release a lock kept past its expiration date""" + self.exclusive = False class FakeController: From 540bd2d4de2a1f8afbd56e7d7e4744e0ec6a489d Mon Sep 17 00:00:00 2001 From: David Butenhof Date: Thu, 12 Oct 2023 07:54:28 -0400 Subject: [PATCH 18/21] More cleanup again --- lib/pbench/cli/server/tree_manage.py | 4 ---- .../server/api/resources/datasets_contents.py | 8 +++++-- lib/pbench/server/cache_manager.py | 23 +------------------ lib/pbench/server/indexing_tarballs.py | 5 ---- .../test/unit/server/test_cache_manager.py | 4 ---- .../unit/server/test_datasets_contents.py | 17 +++++--------- 6 files changed, 13 insertions(+), 48 deletions(-) diff --git a/lib/pbench/cli/server/tree_manage.py b/lib/pbench/cli/server/tree_manage.py index 7eb08f4ed1..e70fb93b97 100644 --- a/lib/pbench/cli/server/tree_manage.py +++ b/lib/pbench/cli/server/tree_manage.py @@ -30,10 +30,6 @@ def reclaim_cache(tree: CacheManager, logger: Logger, lifetime: float = CACHE_LI has_cache = 0 reclaimed = 0 reclaim_failed = 0 - total_count = 0 - has_cache = 0 - reclaimed = 0 - reclaim_failed = 0 for tarball in tree.datasets.values(): total_count += 1 if tarball.unpacked: diff --git a/lib/pbench/server/api/resources/datasets_contents.py b/lib/pbench/server/api/resources/datasets_contents.py index 5ba337c315..5f89f7e7fa 100644 --- a/lib/pbench/server/api/resources/datasets_contents.py +++ b/lib/pbench/server/api/resources/datasets_contents.py @@ -127,9 +127,13 @@ def _get( # Normalize because we want the "root" directory to be reported as # "" rather than as Path's favored "." - loc = "" if str(details.location) == "." else str(details.location) + loc = str(details.location) + name = details.name + if loc == ".": + loc = "" + name = "" val = { - "name": details.name, + "name": name, "type": details.type.name, "directories": dir_list, "files": file_list, diff --git a/lib/pbench/server/cache_manager.py b/lib/pbench/server/cache_manager.py index b55bf674ce..642a3235d2 100644 --- a/lib/pbench/server/cache_manager.py +++ b/lib/pbench/server/cache_manager.py @@ -21,7 +21,6 @@ class CacheManagerError(Exception): """Base class for exceptions raised from this module.""" pass - pass class BadDirpath(CacheManagerError): @@ -344,8 +343,6 @@ def __init__( def close(self): """Close the byte stream and clean up the Popen object""" - exception = None - exception = None if self.subproc: try: @@ -369,7 +366,6 @@ def close(self): # object can be reclaimed. self.subproc = None exception = e - exception = e if self.lock: try: self.lock.release() @@ -384,13 +380,6 @@ def close(self): if exception: raise exception - # NOTE: if both subprocess cleanup and unlock fail with exceptions, we - # raise the latter, and the former will be ignored. In practice, that's - # not a problem as we only construct an Inventory with a subprocess - # reference for extract, which doesn't need to lock a cache directory. - if exception: - raise exception - def getbuffer(self): """Return the underlying byte buffer (used by send_file)""" return self.stream.getbuffer() @@ -494,16 +483,6 @@ def __init__(self, path: Path, resource_id: str, controller: "Controller"): # Record the path of the companion MD5 file self.md5_path: Path = path.with_suffix(".xz.md5") - # Record the lockf file path used to control cache access - self.lock: Path = self.cache / "lock" - - # Record a marker file path used to record the last cache access - # timestamp - self.last_ref: Path = self.cache / "last_ref" - - # Record the path of the companion MD5 file - self.md5_path: Path = path.with_suffix(".xz.md5") - # Record the name of the containing controller self.controller_name: str = controller.name @@ -685,7 +664,7 @@ def find_entry(self, path: Path) -> CacheMapEntry: with LockManager(self.lock) as lock: self.get_results(lock) - if str(path) in (".", ""): + if str(path) == ".": return self.cachemap path_parts = path.parts[:-1] diff --git a/lib/pbench/server/indexing_tarballs.py b/lib/pbench/server/indexing_tarballs.py index 26dec8ec84..9f33d1c104 100644 --- a/lib/pbench/server/indexing_tarballs.py +++ b/lib/pbench/server/indexing_tarballs.py @@ -363,7 +363,6 @@ def sighup_handler(*args): ptb = None tarobj: Optional[Tarball] = None tb_res = error_code["OK"] - lock = None try: # We need the fully unpacked cache tree to index it try: @@ -375,8 +374,6 @@ def sighup_handler(*args): dataset, f"Unable to find dataset: {e!s}", ) - if lock: - lock.release() continue with LockManager(tarobj.lock) as lock: @@ -512,8 +509,6 @@ def sighup_handler(*args): Audit.create( root=audit, status=doneness, attributes=attributes ) - if lock: - lock.release() try: ie_len = ie_filepath.stat().st_size except FileNotFoundError: diff --git a/lib/pbench/test/unit/server/test_cache_manager.py b/lib/pbench/test/unit/server/test_cache_manager.py index 331b219183..25958b34ca 100644 --- a/lib/pbench/test/unit/server/test_cache_manager.py +++ b/lib/pbench/test/unit/server/test_cache_manager.py @@ -560,8 +560,6 @@ def __init__(self, path: Path, resource_id: str, controller: Controller): self.isolator = controller.path / resource_id self.lock = self.cache / "lock" self.last_ref = self.cache / "last_ref" - self.lock = self.cache / "lock" - self.last_ref = self.cache / "last_ref" self.unpacked = None self.cachemap = None self.controller = controller @@ -1888,10 +1886,8 @@ def test_lockmanager(self, monkeypatch): assert not lock.exclusive assert lock.wait assert FakeLockRef.operations == [("acquire", False, True)] - assert FakeLockRef.operations == [("acquire", False, True)] lock.upgrade() assert FakeLockRef.operations == [("acquire", False, True), ("upgrade")] - assert FakeLockRef.operations == [("acquire", False, True), ("upgrade")] lock.downgrade() assert FakeLockRef.operations == [ ("acquire", False, True), diff --git a/lib/pbench/test/unit/server/test_datasets_contents.py b/lib/pbench/test/unit/server/test_datasets_contents.py index 71deba059f..ff967f3fdf 100644 --- a/lib/pbench/test/unit/server/test_datasets_contents.py +++ b/lib/pbench/test/unit/server/test_datasets_contents.py @@ -60,10 +60,10 @@ def test_unauthorized_access(self, query_get_as): "message": "User drb is not authorized to READ a resource owned by test with private access" } - @pytest.mark.parametrize("key", (None, "", "subdir1")) + @pytest.mark.parametrize("key", ("", ".", "subdir1")) def test_path_is_directory(self, query_get_as, monkeypatch, key): def mock_find_entry(_s, _d: str, path: Optional[Path]): - file = path if path else Path("") + file = path if path else Path(".") return { "children": {}, "details": CacheObject( @@ -77,16 +77,14 @@ def mock_find_entry(_s, _d: str, path: Optional[Path]): } monkeypatch.setattr(CacheManager, "find_entry", mock_find_entry) - monkeypatch.setattr(Path, "is_file", lambda self: False) - monkeypatch.setattr(Path, "exists", lambda self: True) - - response = query_get_as("fio_2", key if key else "", HTTPStatus.OK) + response = query_get_as("fio_2", key, HTTPStatus.OK) + name = "" if key == "." else key assert response.json == { "directories": [], "files": [], - "name": key if key else "", + "name": name, "type": "DIRECTORY", - "uri": f"https://localhost/api/v1/datasets/random_md5_string4/contents/{key if key else ''}", + "uri": f"https://localhost/api/v1/datasets/random_md5_string4/contents/{name}", } def test_not_a_file(self, query_get_as, monkeypatch): @@ -94,9 +92,6 @@ def mock_find_entry(_s, _d: str, path: Optional[Path]): raise BadDirpath("Nobody home") monkeypatch.setattr(CacheManager, "find_entry", mock_find_entry) - monkeypatch.setattr(Path, "is_file", lambda self: False) - monkeypatch.setattr(Path, "exists", lambda self: False) - response = query_get_as("fio_2", "subdir1/f1_sym", HTTPStatus.NOT_FOUND) assert response.json == {"message": "Nobody home"} From cb312b5766303c56f6df624fc0ce96fe32b6b6b6 Mon Sep 17 00:00:00 2001 From: David Butenhof Date: Thu, 12 Oct 2023 15:44:43 -0400 Subject: [PATCH 19/21] Tie down loose backups, and code cleanup --- .../server/api/resources/datasets_contents.py | 35 +++++++++++-------- lib/pbench/server/cache_manager.py | 21 ++++------- lib/pbench/test/unit/server/conftest.py | 9 +++-- .../unit/server/test_datasets_contents.py | 35 ++++++++++++++++++- 4 files changed, 64 insertions(+), 36 deletions(-) diff --git a/lib/pbench/server/api/resources/datasets_contents.py b/lib/pbench/server/api/resources/datasets_contents.py index 5f89f7e7fa..166692e3ff 100644 --- a/lib/pbench/server/api/resources/datasets_contents.py +++ b/lib/pbench/server/api/resources/datasets_contents.py @@ -75,7 +75,7 @@ def _get( except (BadDirpath, CacheExtractBadPath, TarballNotFound) as e: raise APIAbort(HTTPStatus.NOT_FOUND, str(e)) except Exception as e: - raise APIInternalError(str(e)) + raise APIInternalError(f"Cache find error: {str(e)!r}") prefix = current_app.server_config.rest_uri origin = ( @@ -98,29 +98,31 @@ def _get( "uri": f"{origin}/contents/{d.location}", } ) - elif d.type in (CacheType.FILE, CacheType.OTHER): - file_list.append( - { - "name": c, - "size": d.size, - "type": d.type.name, - "uri": f"{origin}/inventory/{d.location}", - } - ) elif d.type is CacheType.SYMLINK: if d.resolve_type is CacheType.DIRECTORY: - access = "contents" + uri = f"{origin}/contents/{d.resolve_path}" + elif d.resolve_type is CacheType.FILE: + uri = f"{origin}/inventory/{d.resolve_path}" else: - access = "inventory" + uri = f"{origin}/inventory/{d.location}" file_list.append( { "name": c, "type": d.type.name, "link": str(d.resolve_path), "link_type": d.resolve_type.name, - "uri": f"{origin}/{access}/{d.resolve_path}", + "uri": uri, } ) + else: + r = { + "name": c, + "type": d.type.name, + "uri": f"{origin}/inventory/{d.location}", + } + if d.type is CacheType.FILE: + r["size"] = d.size + file_list.append(r) dir_list.sort(key=lambda d: d["name"]) file_list.sort(key=lambda d: d["name"]) @@ -141,10 +143,13 @@ def _get( } else: access = "inventory" + link = str(details.location) if details.type is CacheType.SYMLINK: link = str(details.resolve_path) if details.resolve_type is CacheType.DIRECTORY: access = "contents" + elif details.resolve_type is not CacheType.FILE: + link = str(details.location) else: link = str(details.location) val = { @@ -153,7 +158,7 @@ def _get( "uri": f"{origin}/{access}/{link}", } if details.type is CacheType.SYMLINK: - val["link"] = str(details.resolve_path) + val["link"] = link val["link_type"] = details.resolve_type.name elif details.type is CacheType.FILE: val["size"] = details.size @@ -161,4 +166,4 @@ def _get( try: return jsonify(val) except Exception as e: - raise APIInternalError(str(e)) + raise APIInternalError(f"JSONIFY {val}: {str(e)!r}") diff --git a/lib/pbench/server/cache_manager.py b/lib/pbench/server/cache_manager.py index 642a3235d2..a1cfbff1bf 100644 --- a/lib/pbench/server/cache_manager.py +++ b/lib/pbench/server/cache_manager.py @@ -96,22 +96,13 @@ def __init__(self, tarball: Path, error: str): class CacheType(Enum): + """The type of a file or symlink destination""" - """A directory""" - - DIRECTORY = auto() - - """A regular file""" - FILE = auto() - - """An invalid symlink (absolute or outside tarball)""" - BROKEN = auto() - - """A device or mount point or other unsupported inode type""" - OTHER = auto() - - """A relative symbolic link within the tarball""" - SYMLINK = auto() + DIRECTORY = auto() # A directory + FILE = auto() # A regular file + BROKEN = auto() # An invalid symlink (absolute or outside tarball) + OTHER = auto() # An unsupported file type (mount point, etc.) + SYMLINK = auto() # A symbolic link @dataclass diff --git a/lib/pbench/test/unit/server/conftest.py b/lib/pbench/test/unit/server/conftest.py index 887231038e..3c80881ae2 100644 --- a/lib/pbench/test/unit/server/conftest.py +++ b/lib/pbench/test/unit/server/conftest.py @@ -14,10 +14,10 @@ from cryptography.hazmat.primitives.asymmetric import rsa from cryptography.hazmat.primitives.serialization import Encoding, PublicFormat -from flask.wrappers import Response from freezegun import freeze_time import jwt import pytest +from requests import Response import responses from pbench.common import MetadataLog @@ -226,10 +226,9 @@ def compare(message: str, response: Optional[Response]): if r.levelno == logging.ERROR: if internal.match(str(r.msg)): return - assert False, ( - f"Expected pattern {internal.pattern!r} not logged " - f"at level 'ERROR': {[str(r.msg) for r in caplog.get_records('call')]}" - ) + assert ( + False + ), f"Expected pattern {internal.pattern!r} not logged at level 'ERROR': {[str(r.msg) for r in caplog.get_records('call')]}" return compare diff --git a/lib/pbench/test/unit/server/test_datasets_contents.py b/lib/pbench/test/unit/server/test_datasets_contents.py index ff967f3fdf..d4bb5e1046 100644 --- a/lib/pbench/test/unit/server/test_datasets_contents.py +++ b/lib/pbench/test/unit/server/test_datasets_contents.py @@ -164,6 +164,26 @@ def mock_find_entry(_s, _d: str, path: Optional[Path]): CacheType.FILE, ) }, + "badlink": { + "details": CacheObject( + "badlink", + base / "badlink", + Path("../../../.."), + CacheType.BROKEN, + None, + CacheType.SYMLINK, + ), + }, + "mountlink": { + "details": CacheObject( + "mountlink", + base / "mountlink", + Path("mount"), + CacheType.OTHER, + None, + CacheType.SYMLINK, + ), + }, "symfile": { "details": CacheObject( "symfile", @@ -212,6 +232,13 @@ def mock_find_entry(_s, _d: str, path: Optional[Path]): } ], "files": [ + { + "name": "badlink", + "type": "SYMLINK", + "link": "../../../..", + "link_type": "BROKEN", + "uri": "https://localhost/api/v1/datasets/random_md5_string4/inventory/sample1/badlink", + }, { "name": "file.txt", "size": 42, @@ -220,10 +247,16 @@ def mock_find_entry(_s, _d: str, path: Optional[Path]): }, { "name": "mount", - "size": 20, "type": "OTHER", "uri": "https://localhost/api/v1/datasets/random_md5_string4/inventory/sample1/mount", }, + { + "name": "mountlink", + "type": "SYMLINK", + "link": "mount", + "link_type": "OTHER", + "uri": "https://localhost/api/v1/datasets/random_md5_string4/inventory/sample1/mountlink", + }, { "name": "symdir", "type": "SYMLINK", From 447c74b5cec2863c4ff43893787e199faeb13750 Mon Sep 17 00:00:00 2001 From: David Butenhof Date: Mon, 16 Oct 2023 10:34:59 -0400 Subject: [PATCH 20/21] Review comments --- .../server/api/resources/datasets_contents.py | 7 ++--- lib/pbench/server/cache_manager.py | 26 +++++++++---------- .../test/unit/server/test_cache_manager.py | 16 +++++------- .../unit/server/test_datasets_contents.py | 15 ++++++++--- 4 files changed, 32 insertions(+), 32 deletions(-) diff --git a/lib/pbench/server/api/resources/datasets_contents.py b/lib/pbench/server/api/resources/datasets_contents.py index 166692e3ff..9c1b8f12ae 100644 --- a/lib/pbench/server/api/resources/datasets_contents.py +++ b/lib/pbench/server/api/resources/datasets_contents.py @@ -145,13 +145,10 @@ def _get( access = "inventory" link = str(details.location) if details.type is CacheType.SYMLINK: - link = str(details.resolve_path) if details.resolve_type is CacheType.DIRECTORY: access = "contents" - elif details.resolve_type is not CacheType.FILE: - link = str(details.location) - else: - link = str(details.location) + if details.resolve_type in (CacheType.FILE, CacheType.DIRECTORY): + link = str(details.resolve_path) val = { "name": details.name, "type": details.type.name, diff --git a/lib/pbench/server/cache_manager.py b/lib/pbench/server/cache_manager.py index a1cfbff1bf..baf9834c8b 100644 --- a/lib/pbench/server/cache_manager.py +++ b/lib/pbench/server/cache_manager.py @@ -98,9 +98,9 @@ def __init__(self, tarball: Path, error: str): class CacheType(Enum): """The type of a file or symlink destination""" + BROKEN = auto() # An invalid symlink (absolute or outside tarball) DIRECTORY = auto() # A directory FILE = auto() # A regular file - BROKEN = auto() # An invalid symlink (absolute or outside tarball) OTHER = auto() # An unsupported file type (mount point, etc.) SYMLINK = auto() # A symbolic link @@ -143,23 +143,21 @@ def create(cls, root: Path, path: Path) -> "CacheObject": if path.is_symlink(): ftype = CacheType.SYMLINK link_path = path.readlink() - if link_path.is_absolute(): + try: + if link_path.is_absolute(): + raise ValueError("symlink target can't be absolute") + r_path = path.resolve(strict=True) + resolve_path = r_path.relative_to(root) + except (FileNotFoundError, ValueError): resolve_path = link_path resolve_type = CacheType.BROKEN else: - try: - r_path = path.resolve(strict=True) - resolve_path = r_path.relative_to(root) - except (FileNotFoundError, ValueError): - resolve_path = link_path - resolve_type = CacheType.BROKEN + if r_path.is_dir(): + resolve_type = CacheType.DIRECTORY + elif r_path.is_file(): + resolve_type = CacheType.FILE else: - if r_path.is_dir(): - resolve_type = CacheType.DIRECTORY - elif r_path.is_file(): - resolve_type = CacheType.FILE - else: - resolve_type = CacheType.OTHER + resolve_type = CacheType.OTHER elif path.is_file(): ftype = CacheType.FILE size = path.stat().st_size diff --git a/lib/pbench/test/unit/server/test_cache_manager.py b/lib/pbench/test/unit/server/test_cache_manager.py index 25958b34ca..edaa606cfe 100644 --- a/lib/pbench/test/unit/server/test_cache_manager.py +++ b/lib/pbench/test/unit/server/test_cache_manager.py @@ -503,10 +503,6 @@ def generate_test_result_tree(tmp_path: Path, dir_name: str) -> Path: (sub_dir / "metadata.log").write_text( "[pbench]\nkey = value\n", encoding="utf-8" ) - (sub_dir / "f1.json").write_text("{'json': 'value'}", encoding="utf-8") - (sub_dir / "metadata.log").write_text( - "[pbench]\nkey = value\n", encoding="utf-8" - ) for i in range(1, 4): (sub_dir / "subdir1" / f"subdir1{i}").mkdir(parents=True, exist_ok=True) (sub_dir / "subdir1" / "f11.txt").write_text( @@ -518,11 +514,11 @@ def generate_test_result_tree(tmp_path: Path, dir_name: str) -> Path: (sub_dir / "subdir1" / "subdir14" / "subdir141" / "f1411.txt").touch() os.symlink(Path("../../.."), sub_dir / "subdir1" / "f12_sym") os.symlink( - Path("../..") / "subdir1" / "subdir15", + Path("../../subdir1/subdir15"), sub_dir / "subdir1" / "subdir12" / "f121_sym", ) os.symlink( - Path(".") / "bad_subdir" / "nonexistent_file.txt", + Path("./bad_subdir/nonexistent_file.txt"), sub_dir / "subdir1" / "subdir12" / "f122_sym", ) os.symlink( @@ -533,7 +529,7 @@ def generate_test_result_tree(tmp_path: Path, dir_name: str) -> Path: sub_dir / "subdir1" / "subdir14" / "subdir141" / "f1412_sym", ) os.symlink( - Path("..") / "subdir141", + Path("../subdir141"), sub_dir / "subdir1" / "subdir14" / "subdir141" / "f1413_sym", ) os.symlink( @@ -541,11 +537,11 @@ def generate_test_result_tree(tmp_path: Path, dir_name: str) -> Path: sub_dir / "subdir1" / "subdir14" / "subdir141" / "f1414_sym", ) os.symlink( - Path(".") / "f1412_sym", + Path("./f1412_sym"), sub_dir / "subdir1" / "subdir14" / "subdir141" / "f1415_sym", ) os.symlink( - Path("../..") / "subdir12" / "f122_sym", + Path("../../subdir12/f122_sym"), sub_dir / "subdir1" / "subdir14" / "subdir141" / "f1416_sym", ) @@ -652,7 +648,7 @@ def mock_run(args, **_kwargs): args, returncode=0, stdout="Successfully Unpacked!", stderr=None ) - def mock_resolve(_path, strict: bool = False): + def mock_resolve(_path, **_kwargs): """In this scenario, there are no symlinks, so resolve() should never be called.""" raise AssertionError("Unexpected call to Path.resolve()") diff --git a/lib/pbench/test/unit/server/test_datasets_contents.py b/lib/pbench/test/unit/server/test_datasets_contents.py index d4bb5e1046..2b69d528c3 100644 --- a/lib/pbench/test/unit/server/test_datasets_contents.py +++ b/lib/pbench/test/unit/server/test_datasets_contents.py @@ -43,18 +43,21 @@ def query_api( return query_api def test_get_no_dataset(self, query_get_as): + """This fails in Dataset SQL lookup""" response = query_get_as( "nonexistent-dataset", "metadata.log", HTTPStatus.NOT_FOUND ) assert response.json == {"message": "Dataset 'nonexistent-dataset' not found"} def test_dataset_not_present(self, query_get_as): + """This fails in the cache manager find_dataset as there are none""" response = query_get_as("fio_2", "metadata.log", HTTPStatus.NOT_FOUND) assert response.json == { "message": "The dataset tarball named 'random_md5_string4' is not found" } def test_unauthorized_access(self, query_get_as): + """This fails because our default user can't read the 'test' dataset""" response = query_get_as("test", "metadata.log", HTTPStatus.FORBIDDEN) assert response.json == { "message": "User drb is not authorized to READ a resource owned by test with private access" @@ -62,12 +65,14 @@ def test_unauthorized_access(self, query_get_as): @pytest.mark.parametrize("key", ("", ".", "subdir1")) def test_path_is_directory(self, query_get_as, monkeypatch, key): + """Mock a directory cache node to check the returned data""" + def mock_find_entry(_s, _d: str, path: Optional[Path]): file = path if path else Path(".") return { "children": {}, "details": CacheObject( - file.name if key else "", + "" if key == "." else key, file, None, None, @@ -88,6 +93,8 @@ def mock_find_entry(_s, _d: str, path: Optional[Path]): } def test_not_a_file(self, query_get_as, monkeypatch): + """When 'find_entry' fails with an exception, we report the text""" + def mock_find_entry(_s, _d: str, path: Optional[Path]): raise BadDirpath("Nobody home") @@ -96,6 +103,7 @@ def mock_find_entry(_s, _d: str, path: Optional[Path]): assert response.json == {"message": "Nobody home"} def test_file_info(self, query_get_as, monkeypatch): + """Mock a file cache node to check the returned data""" name = "f1.json" def mock_find_entry(_s, _d: str, path: Optional[Path]): @@ -114,6 +122,7 @@ def mock_find_entry(_s, _d: str, path: Optional[Path]): } def test_link_info(self, query_get_as, monkeypatch): + """Mock a symlink cache node to check the returned data""" name = "test/slink" def mock_find_entry(_s, _d: str, path: Optional[Path]): @@ -130,7 +139,6 @@ def mock_find_entry(_s, _d: str, path: Optional[Path]): monkeypatch.setattr(CacheManager, "find_entry", mock_find_entry) response = query_get_as("fio_2", name, HTTPStatus.OK) - assert response.status_code == HTTPStatus.OK assert response.json == { "name": "slink", "type": "SYMLINK", @@ -140,6 +148,8 @@ def mock_find_entry(_s, _d: str, path: Optional[Path]): } def test_dir_info(self, query_get_as, monkeypatch): + """Confirm the returned data for a directory with various children""" + def mock_find_entry(_s, _d: str, path: Optional[Path]): base = path if path else Path("") return { @@ -222,7 +232,6 @@ def mock_find_entry(_s, _d: str, path: Optional[Path]): monkeypatch.setattr(CacheManager, "find_entry", mock_find_entry) response = query_get_as("fio_2", "sample1", HTTPStatus.OK) - assert response.status_code == HTTPStatus.OK assert response.json == { "directories": [ { From 5f485ff8b980bf98a603cb655273fc9679ffe718 Mon Sep 17 00:00:00 2001 From: David Butenhof Date: Mon, 16 Oct 2023 14:29:33 -0400 Subject: [PATCH 21/21] Fix broken rebase, and a few minor stuffs --- lib/pbench/test/unit/server/conftest.py | 9 +++++---- lib/pbench/test/unit/server/test_cache_manager.py | 2 +- lib/pbench/test/unit/server/test_datasets_contents.py | 9 ++------- 3 files changed, 8 insertions(+), 12 deletions(-) diff --git a/lib/pbench/test/unit/server/conftest.py b/lib/pbench/test/unit/server/conftest.py index 3c80881ae2..887231038e 100644 --- a/lib/pbench/test/unit/server/conftest.py +++ b/lib/pbench/test/unit/server/conftest.py @@ -14,10 +14,10 @@ from cryptography.hazmat.primitives.asymmetric import rsa from cryptography.hazmat.primitives.serialization import Encoding, PublicFormat +from flask.wrappers import Response from freezegun import freeze_time import jwt import pytest -from requests import Response import responses from pbench.common import MetadataLog @@ -226,9 +226,10 @@ def compare(message: str, response: Optional[Response]): if r.levelno == logging.ERROR: if internal.match(str(r.msg)): return - assert ( - False - ), f"Expected pattern {internal.pattern!r} not logged at level 'ERROR': {[str(r.msg) for r in caplog.get_records('call')]}" + assert False, ( + f"Expected pattern {internal.pattern!r} not logged " + f"at level 'ERROR': {[str(r.msg) for r in caplog.get_records('call')]}" + ) return compare diff --git a/lib/pbench/test/unit/server/test_cache_manager.py b/lib/pbench/test/unit/server/test_cache_manager.py index edaa606cfe..1c31c4ea18 100644 --- a/lib/pbench/test/unit/server/test_cache_manager.py +++ b/lib/pbench/test/unit/server/test_cache_manager.py @@ -533,7 +533,7 @@ def generate_test_result_tree(tmp_path: Path, dir_name: str) -> Path: sub_dir / "subdir1" / "subdir14" / "subdir141" / "f1413_sym", ) os.symlink( - Path(".") / "f1411.txt", + Path("./f1411.txt"), sub_dir / "subdir1" / "subdir14" / "subdir141" / "f1414_sym", ) os.symlink( diff --git a/lib/pbench/test/unit/server/test_datasets_contents.py b/lib/pbench/test/unit/server/test_datasets_contents.py index 2b69d528c3..dc05989ca6 100644 --- a/lib/pbench/test/unit/server/test_datasets_contents.py +++ b/lib/pbench/test/unit/server/test_datasets_contents.py @@ -66,24 +66,19 @@ def test_unauthorized_access(self, query_get_as): @pytest.mark.parametrize("key", ("", ".", "subdir1")) def test_path_is_directory(self, query_get_as, monkeypatch, key): """Mock a directory cache node to check the returned data""" + name = "" if key == "." else key def mock_find_entry(_s, _d: str, path: Optional[Path]): file = path if path else Path(".") return { "children": {}, "details": CacheObject( - "" if key == "." else key, - file, - None, - None, - None, - CacheType.DIRECTORY, + name, file, None, None, None, CacheType.DIRECTORY ), } monkeypatch.setattr(CacheManager, "find_entry", mock_find_entry) response = query_get_as("fio_2", key, HTTPStatus.OK) - name = "" if key == "." else key assert response.json == { "directories": [], "files": [],