diff --git a/lib/pbench/server/api/__init__.py b/lib/pbench/server/api/__init__.py index 6b5a3cbb86..75ffc64237 100644 --- a/lib/pbench/server/api/__init__.py +++ b/lib/pbench/server/api/__init__.py @@ -15,15 +15,13 @@ from pbench.server import PbenchServerConfig from pbench.server.api.resources.api_key import APIKeyManage from pbench.server.api.resources.datasets_compare import DatasetsCompare +from pbench.server.api.resources.datasets_contents import DatasetsContents from pbench.server.api.resources.datasets_inventory import DatasetsInventory from pbench.server.api.resources.datasets_list import DatasetsList from pbench.server.api.resources.datasets_metadata import DatasetsMetadata from pbench.server.api.resources.datasets_visualize import DatasetsVisualize from pbench.server.api.resources.endpoint_configure import EndpointConfig from pbench.server.api.resources.query_apis.dataset import Datasets -from pbench.server.api.resources.query_apis.datasets.datasets_contents import ( - DatasetsContents, -) from pbench.server.api.resources.query_apis.datasets.datasets_detail import ( DatasetsDetail, ) diff --git a/lib/pbench/server/api/resources/datasets_contents.py b/lib/pbench/server/api/resources/datasets_contents.py new file mode 100644 index 0000000000..abf8eebe70 --- /dev/null +++ b/lib/pbench/server/api/resources/datasets_contents.py @@ -0,0 +1,134 @@ +from http import HTTPStatus +from pathlib import Path + +from flask import current_app, jsonify +from flask.wrappers import Request, Response + +from pbench.server import OperationCode, PbenchServerConfig +from pbench.server.api.resources import ( + APIAbort, + ApiAuthorizationType, + ApiBase, + ApiContext, + APIInternalError, + ApiMethod, + ApiParams, + ApiSchema, + Parameter, + ParamType, + Schema, +) +from pbench.server.cache_manager import ( + BadDirpath, + CacheExtractBadPath, + CacheManager, + CacheObject, + CacheType, + TarballNotFound, +) +from pbench.server.database.models.datasets import Dataset + + +class DatasetsContents(ApiBase): + """ + API class to retrieve inventory files from a dataset + """ + + def __init__(self, config: PbenchServerConfig): + super().__init__( + config, + ApiSchema( + ApiMethod.GET, + OperationCode.READ, + uri_schema=Schema( + Parameter("dataset", ParamType.DATASET, required=True), + Parameter("target", ParamType.STRING, required=False), + ), + authorization=ApiAuthorizationType.DATASET, + ), + ) + + def _get( + self, params: ApiParams, request: Request, context: ApiContext + ) -> Response: + """ + Returns metadata about the target file path within a tarball. + + Args: + params: includes the uri parameters, which provide the dataset and target. + request: Original incoming Request object + context: API context dictionary + + Raises: + APIAbort, reporting either "NOT_FOUND" or "UNSUPPORTED_MEDIA_TYPE" + + GET /api/v1/datasets/{dataset}/contents/{target} + """ + + dataset: Dataset = params.uri["dataset"] + target = params.uri.get("target") + path = Path("" if target in ("/", None) else target) + + current_app.logger.info( + "{} CONTENTS {!r}: path {!r}", dataset.name, target, str(path) + ) + + cache_m = CacheManager(self.config, current_app.logger) + try: + info = cache_m.get_info(dataset.resource_id, path) + except (BadDirpath, CacheExtractBadPath, TarballNotFound) as e: + raise APIAbort(HTTPStatus.NOT_FOUND, str(e)) + except Exception as e: + raise APIInternalError(str(e)) + + prefix = current_app.server_config.rest_uri + origin = ( + f"{self._get_uri_base(request).host}{prefix}/datasets/{dataset.resource_id}" + ) + + details: CacheObject = info["details"] + if details.type is CacheType.DIRECTORY: + children = info["children"] if "children" in info else {} + dir_list = [] + file_list = [] + + for c, value in children.items(): + p = path / c + d: CacheObject = value["details"] + if d.type == CacheType.DIRECTORY: + dir_list.append( + { + "name": c, + "type": d.type.name, + "uri": f"{origin}/contents/{p}", + } + ) + elif d.type == CacheType.FILE: + file_list.append( + { + "name": c, + "size": d.size, + "type": d.type.name, + "uri": f"{origin}/inventory/{p}", + } + ) + + dir_list.sort(key=lambda d: d["name"]) + file_list.sort(key=lambda d: d["name"]) + val = { + "name": details.name, + "type": details.type.name, + "directories": dir_list, + "files": file_list, + } + else: + val = { + "name": details.name, + "size": details.size, + "type": details.type.name, + } + + try: + return jsonify(val) + except Exception as e: + raise APIInternalError(str(e)) diff --git a/lib/pbench/server/api/resources/datasets_inventory.py b/lib/pbench/server/api/resources/datasets_inventory.py index 260ad6375a..9e79e9e02e 100644 --- a/lib/pbench/server/api/resources/datasets_inventory.py +++ b/lib/pbench/server/api/resources/datasets_inventory.py @@ -1,9 +1,8 @@ from http import HTTPStatus from pathlib import Path -from urllib.request import Request from flask import current_app, send_file -from flask.wrappers import Response +from flask.wrappers import Request, Response from pbench.server import OperationCode, PbenchServerConfig from pbench.server.api.resources import ( diff --git a/lib/pbench/server/api/resources/query_apis/datasets/datasets_contents.py b/lib/pbench/server/api/resources/query_apis/datasets/datasets_contents.py deleted file mode 100644 index c426f1d0bc..0000000000 --- a/lib/pbench/server/api/resources/query_apis/datasets/datasets_contents.py +++ /dev/null @@ -1,222 +0,0 @@ -from http import HTTPStatus - -from flask import current_app - -from pbench.server import OperationCode, PbenchServerConfig -from pbench.server.api.resources import ( - ApiAuthorizationType, - ApiMethod, - ApiParams, - ApiSchema, - JSON, - Parameter, - ParamType, - Schema, -) -from pbench.server.api.resources.query_apis import ApiContext, PostprocessError -from pbench.server.api.resources.query_apis.datasets import IndexMapBase - - -class DatasetsContents(IndexMapBase): - """ - Datasets Contents API returns the list of sub-directories and files - present under a directory. - """ - - MAX_SIZE = 10000 - - def __init__(self, config: PbenchServerConfig): - super().__init__( - config, - ApiSchema( - ApiMethod.GET, - OperationCode.READ, - uri_schema=Schema( - Parameter("dataset", ParamType.DATASET, required=True), - Parameter("target", ParamType.STRING, required=False), - ), - authorization=ApiAuthorizationType.DATASET, - ), - ) - - def assemble(self, params: ApiParams, context: ApiContext) -> JSON: - """ - Construct a pbench Elasticsearch query for getting a list of - documents which contains the user provided parent with files - and its sub-directories with metadata of run-toc index document - that belong to the given run id. - - Args: - params: ApiParams includes the uri parameters, which provide the dataset and target. - context: propagate the dataset and the "target" directory value. - """ - # Copy target directory metadata to CONTEXT for postprocessor - target = "/" + params.uri.get("target", "") - context["target"] = target - - dataset = context["dataset"] - - current_app.logger.info( - "Discover dataset {} Contents, directory {}", - dataset.name, - target, - ) - - # Retrieve the ES indices that belong to this run_id from the metadata - # table - indices = self.get_index(dataset, "run-toc") - - return { - "path": f"/{indices}/_search", - "kwargs": { - "json": { - "size": self.MAX_SIZE, - "query": { - "bool": { - "filter": [ - { - "dis_max": { - "queries": [ - {"term": {"directory": target}}, - {"term": {"parent": target}}, - ] - } - }, - {"term": {"run_data_parent": dataset.resource_id}}, - ], - "must_not": {"regexp": {"directory": f"{target}/[^/]+/.+"}}, - } - }, - } - }, - } - - def postprocess(self, es_json: JSON, context: ApiContext) -> JSON: - """ - Returns a JSON object (keyword/value pairs) whose values are lists of - entries describing individual directories and files. - - Example: These are the contents of es_json parameter. The - contents are the result of a request for directory "/1-default" - - { - "took": 6, - "timed_out": False, - "_shards": {"total": 3, "successful": 3, "skipped": 0, "failed": 0}, - "hits": { - "total": {"value": 2, "relation": "eq"}, - "max_score": 0.0, - "hits": [ - { - "_index": "riya-pbench.v6.run-toc.2021-05", - "_type": "_doc", - "_id": "d4a8cc7c4ecef7vshg4tjhrew174828d", - "_score": 0.0, - "_source": { - "parent": "/", - "directory": "/1-default", - "mtime": "2021-05-01T24:00:00", - "mode": "0o755", - "name": "1-default", - "files": [ - { - "name": "reference-result", - "mtime": "2021-05-01T24:00:00", - "size": 0, - "mode": "0o777", - "type": "sym", - "linkpath": "sample1", - } - ], - "run_data_parent": "ece030bdgfkjasdkf7435e6a7a6be804", - "authorization": {"owner": "1", "access": "private"}, - "@timestamp": "2021-05-01T24:00:00", - }, - }, - { - "_index": "riya-pbench.v6.run-toc.2021-05", - "_type": "_doc", - "_id": "3bba25b62fhdgfajgsfdty6797ed06a", - "_score": 0.0, - "_source": { - "parent": "/1-default", - "directory": "/1-default/sample1", - "mtime": "2021-05-01T24:00:00", - "mode": "0o755", - "name": "sample1", - "ancestor_path_elements": ["1-default"], - "files": [ - { - "name": "result.txt", - "mtime": "2021-05-01T24:00:00", - "size": 0, - "mode": "0o644", - "type": "reg", - }, - { - "name": "user-benchmark.cmd", - "mtime": "2021-05-01T24:00:00", - "size": 114, - "mode": "0o755", - "type": "reg", - }, - ], - "run_data_parent": "ece030bdgfkjasdkf7435e6a7a6be804", - "authorization": {"owner": "1", "access": "private"}, - "@timestamp": "2021-05-01T24:00:00", - }, - }, - ], - }, - } - - Output: - { - "directories": - [ - { - "name": "sample1", - "uri": "https://host/api/v1/datasets/id/contents/1-default/sample1" - } - ], - "files": [ - { - "name": "reference-result", - "mtime": "2021-05-01T24:00:00", - "size": 0, - "mode": "0o777", - "type": "sym", - "linkpath": "sample1", - "uri": "https://host/api/v1/datasets/id/inventory/1-default/reference-result" - } - ] - } - """ - request = context["request"] - resource_id = context["dataset"].resource_id - target = context["target"] - if len(es_json["hits"]["hits"]) == 0: - raise PostprocessError( - HTTPStatus.NOT_FOUND, - f"No directory {target!r} in {resource_id!r} contents.", - ) - - prefix = current_app.server_config.rest_uri - origin = f"{self._get_uri_base(request).host}{prefix}/datasets/{resource_id}" - path = "" if target == "/" else target - - dir_list = [] - file_list = [] - for val in es_json["hits"]["hits"]: - if val["_source"]["directory"] == target: - # Retrieve files list if present else add an empty list. - for f in val["_source"].get("files", []): - f["uri"] = f"{origin}/inventory{path}/{f['name']}" - file_list.append(f) - elif val["_source"]["parent"] == target: - name = val["_source"]["name"] - dir_list.append( - {"name": name, "uri": f"{origin}/contents{path}/{name}"} - ) - - return {"directories": dir_list, "files": file_list} diff --git a/lib/pbench/server/cache_manager.py b/lib/pbench/server/cache_manager.py index 4a63edd885..61bef50ae9 100644 --- a/lib/pbench/server/cache_manager.py +++ b/lib/pbench/server/cache_manager.py @@ -26,6 +26,14 @@ class CacheManagerError(Exception): pass +class CacheMapMissing(CacheManagerError): + """Cache map hasn't been built yet.""" + + def __init__(self, resource_id: str): + super().__init__(f"Dataset {resource_id} hasn't been processed") + self.id = resource_id + + class BadDirpath(CacheManagerError): """A bad directory path was given.""" @@ -126,6 +134,56 @@ class CacheObject: size: Optional[int] type: CacheType + @classmethod + def create(cls, root: Path, path: Path) -> "CacheObject": + """Collects the file info + + Args: + root: root directory of cache + path: path to a file/directory within cache + + Returns: + CacheObject with file/directory info + """ + resolve_path: Optional[Path] = None + resolve_type: Optional[CacheType] = None + size: Optional[int] = None + + if path.is_symlink(): + ftype = CacheType.SYMLINK + link_path = path.readlink() + try: + if link_path.is_absolute(): + raise ValueError("symlink path is absolute") + r_path = path.resolve(strict=True) + resolve_path = r_path.relative_to(root) + except (FileNotFoundError, ValueError): + resolve_path = link_path + resolve_type = CacheType.OTHER + else: + if r_path.is_dir(): + resolve_type = CacheType.DIRECTORY + elif r_path.is_file(): + resolve_type = CacheType.FILE + else: + resolve_type = CacheType.OTHER + elif path.is_file(): + ftype = CacheType.FILE + size = path.stat().st_size + elif path.is_dir(): + ftype = CacheType.DIRECTORY + else: + ftype = CacheType.OTHER + + return cls( + name="" if path == root else path.name, + location=Path("") if path == root else path.relative_to(root), + resolve_path=resolve_path, + resolve_type=resolve_type, + size=size, + type=ftype, + ) + # Type hint definitions for the cache map. # @@ -135,56 +193,6 @@ class CacheObject: CacheMap = dict[str, CacheMapEntry] -def make_cache_object(dir_path: Path, path: Path) -> CacheObject: - """Collects the file info - - Args: - dir_path: root directory parent path - path: path to a file/directory - - Returns: - CacheObject with file/directory info - """ - resolve_path: Optional[Path] = None - resolve_type: Optional[CacheType] = None - size: Optional[int] = None - - if path.is_symlink(): - ftype = CacheType.SYMLINK - link_path = path.readlink() - try: - if link_path.is_absolute(): - raise ValueError("symlink path is absolute") - r_path = path.resolve(strict=True) - resolve_path = r_path.relative_to(dir_path) - except (FileNotFoundError, ValueError): - resolve_path = link_path - resolve_type = CacheType.OTHER - else: - if r_path.is_dir(): - resolve_type = CacheType.DIRECTORY - elif r_path.is_file(): - resolve_type = CacheType.FILE - else: - resolve_type = CacheType.OTHER - elif path.is_file(): - ftype = CacheType.FILE - size = path.stat().st_size - elif path.is_dir(): - ftype = CacheType.DIRECTORY - else: - ftype = CacheType.OTHER - - return CacheObject( - name=path.name, - location=path.relative_to(dir_path), - resolve_path=resolve_path, - resolve_type=resolve_type, - size=size, - type=ftype, - ) - - class LockRef: """Keep track of a cache lock passed off to a caller""" @@ -631,39 +639,34 @@ def create( return cls(destination, resource_id, controller) - def cache_map(self, dir_path: Path): + def build_map(self): """Build hierarchical representation of results tree - NOTE: this structure isn't removed when we release the cache, as the - data remains valid. + This must be called with the cache locked (shared lock is enough) + and unpacked. - Args: - dir_path: root directory + NOTE: this structure isn't removed when we release the cache, as the + data remains valid so long as the dataset exists. """ - root_dir_path = dir_path.parent - cmap: CacheMap = { - dir_path.name: {"details": make_cache_object(root_dir_path, dir_path)} + cmap: CacheMapEntry = { + "details": CacheObject.create(self.unpacked, self.unpacked) } - dir_queue = deque(((dir_path, cmap),)) + dir_queue = deque(((self.unpacked, cmap),)) while dir_queue: - dir_path, parent_map = dir_queue.popleft() - tar_n = dir_path.name - + unpacked, parent_map = dir_queue.popleft() curr: CacheMapEntry = {} - for l_path in dir_path.glob("*"): - tar_info = make_cache_object(root_dir_path, l_path) - curr[l_path.name] = {"details": tar_info} - if l_path.is_symlink(): - continue - if l_path.is_dir(): - dir_queue.append((l_path, curr)) - parent_map[tar_n]["children"] = curr + for path in unpacked.glob("*"): + details = CacheObject.create(self.unpacked, path) + curr[path.name] = {"details": details} + if path.is_dir() and not path.is_symlink(): + dir_queue.append((path, curr[path.name])) + parent_map["children"] = curr self.cachemap = cmap - @staticmethod - def traverse_cmap(path: Path, cachemap: CacheMap) -> CacheMapEntry: - """Locate a path in the cache map + def traverse_cmap(self, path: Path) -> CacheMapEntry: + """Sequentially traverses the cachemap to find the leaf of a + relative path reference Args: path: relative path of the subdirectory/file @@ -673,34 +676,35 @@ def traverse_cmap(path: Path, cachemap: CacheMap) -> CacheMapEntry: BadDirpath if the directory/file path is not valid Returns: - Dictionary with directory/file details or children if present + cache map entry if present """ - file_list = path.parts[:-1] - f_entries = cachemap + if self.cachemap is None: + raise CacheMapMissing(self.resource_id) + + if str(path) in (".", ""): + return self.cachemap + + path_parts = path.parts[:-1] + node: CacheMapEntry = self.cachemap["children"] try: - for file_l in file_list: - info: CacheMapEntry = f_entries[file_l] - if info["details"].type == CacheType.DIRECTORY: - f_entries: CacheMap = info["children"] + for dir in path_parts: + info: CacheMapEntry = node[dir] + if info["details"].type is CacheType.DIRECTORY: + node = info["children"] else: raise BadDirpath( - f"Found a file {file_l!r} where a directory was expected in path {str(path)!r}" + f"Found a file {dir!r} where a directory was expected in path {str(path)!r}" ) - return f_entries[path.name] + return node[path.name] except KeyError as exc: raise BadDirpath( - f"directory {str(path)!r} doesn't have a {exc} file/directory." + f"Can't resolve path {str(path)!r}: component {exc} is missing." ) - def get_info(self, path: Path) -> JSONOBJECT: + def get_info(self, path: Path) -> CacheMapEntry: """Returns the details of the given file/directory in dict format - NOTE: If the cache manager doesn't already have a cache map for the - current Tarball, we'll unpack it here; however as the cache map isn't - dependent on the unpacked results tree, we immediately release the - cache lock. - Args: path: path of the file/subdirectory @@ -732,24 +736,7 @@ def get_info(self, path: Path) -> JSONOBJECT: with LockManager(self.lock) as lock: self.get_results(lock) - c_map = self.traverse_cmap(path, self.cachemap) - children = c_map["children"] if "children" in c_map else {} - fd_info = c_map["details"].__dict__.copy() - - if fd_info["type"] == CacheType.DIRECTORY: - fd_info["directories"] = [] - fd_info["files"] = [] - - for key, value in children.items(): - if value["details"].type == CacheType.DIRECTORY: - fd_info["directories"].append(key) - elif value["details"].type == CacheType.FILE: - fd_info["files"].append(key) - - fd_info["directories"].sort() - fd_info["files"].sort() - - return fd_info + return self.traverse_cmap(path) @staticmethod def extract(tarball_path: Path, path: str) -> Inventory: @@ -1000,7 +987,6 @@ def get_results(self, lock: LockManager) -> Path: find_command, self.cache, TarballModeChangeError, self.cache ) self.unpacked = self.cache / self.name - self.cache_map(self.unpacked) except Exception as e: error = str(e) raise @@ -1014,6 +1000,13 @@ def get_results(self, lock: LockManager) -> Path: attributes=attributes, ) lock.downgrade() + + # Even if we have an unpacked directory, if it wasn't done under this + # CacheManager instance we may not have a cachemap, so be prepared to + # build one. + if not self.cachemap: + self.build_map() + self.last_ref.touch(exist_ok=True) return self.unpacked @@ -1471,8 +1464,7 @@ def get_info(self, dataset_id: str, path: Path) -> dict[str, Any]: File Metadata """ tarball = self.find_dataset(dataset_id) - tmap = tarball.get_info(path) - return tmap + return tarball.get_info(path) def get_inventory(self, dataset_id: str, target: str) -> Optional[JSONOBJECT]: """Return filestream data for a file within a dataset tarball diff --git a/lib/pbench/test/functional/server/test_datasets.py b/lib/pbench/test/functional/server/test_datasets.py index 52ca1c88bb..81a50b29e5 100644 --- a/lib/pbench/test/functional/server/test_datasets.py +++ b/lib/pbench/test/functional/server/test_datasets.py @@ -662,46 +662,41 @@ def test_contents(self, server_client: PbenchServerClient, login_user): the datasets must have gotten through indexing. """ datasets = server_client.get_list( - owner="tester", - metadata=["server.archiveonly"], + owner="tester", metadata=["server.archiveonly"] ) - with_toc = False - without_toc = False + datasets_returned = False for dataset in datasets: + datasets_returned = True response = server_client.get( API.DATASETS_CONTENTS, {"dataset": dataset.resource_id, "target": ""}, raise_error=False, ) - archive = dataset.metadata["server.archiveonly"] - if archive: - assert ( - response.status_code == HTTPStatus.CONFLICT - ), f"Unexpected {response.json()['message']}" - assert response.json()["message"] == "Dataset indexing was disabled" - without_toc = True - continue - - with_toc = True assert ( response.ok ), f"CONTENTS {dataset.name} failed {response.status_code}:{response.json()['message']}" json = response.json() + archive_only = dataset.metadata["server.archiveonly"] + # assert that we have directories and/or files: an empty root # directory is technically possible, but not legal unless it's a # trivial "archiveonly" dataset. NOTE: this will also fail if # either the "directories" or "files" JSON keys are missing. - assert json["directories"] or json["files"] + assert archive_only or json["directories"] or json["files"] # Even if they're empty, both values must be lists assert isinstance(json["directories"], list) assert isinstance(json["files"], list) - # We need at least a metadata.log - assert "metadata.log" in (f["name"] for f in json["files"]) + # We expect to find at least a metadata.log at the top level of the + # tarball unless the dataset is marked archive-only. + assert archive_only or ( + "metadata.log" in (f["name"] for f in json["files"]) + ) + # All files and directories reported must have a valid API URI for d in json["directories"]: uri = server_client._uri( API.DATASETS_CONTENTS, @@ -715,7 +710,7 @@ def test_contents(self, server_client: PbenchServerClient, login_user): {"dataset": dataset.resource_id, "target": f["name"]}, ) assert f["uri"] == uri, f"{f['name']} uri is incorrect: {f['uri']}" - assert with_toc and without_toc, "expected archiveonly and indexed datasets" + assert datasets_returned # We successfully checked at least one dataset @pytest.mark.dependency(name="visualize", depends=["upload"], scope="session") def test_visualize(self, server_client: PbenchServerClient, login_user): diff --git a/lib/pbench/test/unit/server/query_apis/test_datasets_contents.py b/lib/pbench/test/unit/server/query_apis/test_datasets_contents.py deleted file mode 100644 index 670023eb24..0000000000 --- a/lib/pbench/test/unit/server/query_apis/test_datasets_contents.py +++ /dev/null @@ -1,507 +0,0 @@ -from http import HTTPStatus - -import pytest - -from pbench.server.api.resources import ApiMethod -from pbench.server.api.resources.query_apis.datasets.datasets_contents import ( - DatasetsContents, -) -from pbench.server.database.models.datasets import Dataset -from pbench.test.unit.server.query_apis.commons import Commons - - -class TestDatasetsContents(Commons): - """ - Unit testing for DatasetsContents class. - In a web service context, we access class functions mostly via the - Flask test client rather than trying to directly invoke the class - constructor and `get` service. - """ - - @pytest.fixture(autouse=True) - def _setup(self, client): - super()._setup( - cls_obj=DatasetsContents(client.config), - pbench_endpoint="/datasets/random_md5_string1/contents/1-default", - elastic_endpoint="/_search", - index_from_metadata="run-toc", - ) - - api_method = ApiMethod.GET - - def test_with_no_uri_args(self, client, server_config): - """ - Check the DatasetsContents API when no dataset or path is provided - """ - # remove the last two components of the pbench_endpoint - incorrect_endpoint = "/datasets/contents/" - response = client.get(f"{server_config.rest_uri}{incorrect_endpoint}/") - assert response.status_code == HTTPStatus.NOT_FOUND - - def test_with_incorrect_path(self, client, server_config, pbench_drb_token): - """ - Check the Contents API when an incorrect path is provided. - """ - incorrect_endpoint = ( - "/".join(self.pbench_endpoint.split("/")[:-1]) + "/random_md5_string2" - ) - response = client.get( - f"{server_config.rest_uri}{incorrect_endpoint}", - headers={"Authorization": "Bearer " + pbench_drb_token}, - ) - assert response.status_code == HTTPStatus.NOT_FOUND - - def test_query( - self, - server_config, - query_api, - pbench_drb_token, - build_auth_header, - find_template, - provide_metadata, - ): - """ - Check behaviour of Contents API when both sub-directories and - the list of files are present in the given payload. - """ - response_payload = { - "took": 6, - "timed_out": False, - "_shards": {"total": 3, "successful": 3, "skipped": 0, "failed": 0}, - "hits": { - "total": {"value": 2, "relation": "eq"}, - "max_score": 0.0, - "hits": [ - { - "_index": "riya-pbench.v6.run-toc.2021-05", - "_type": "_doc", - "_id": "d4a8cc7c4ecef7vshg4tjhrew174828d", - "_score": 0.0, - "_source": { - "parent": "/", - "directory": "/1-default", - "mtime": "2021-05-01T24:00:00", - "mode": "0o755", - "name": "1-default", - "files": [ - { - "name": "reference-result", - "mtime": "2021-05-01T24:00:00", - "size": 0, - "mode": "0o777", - "type": "sym", - "linkpath": "sample1", - } - ], - "run_data_parent": "ece030bdgfkjasdkf7435e6a7a6be804", - "authorization": {"owner": "1", "access": "private"}, - "@timestamp": "2021-05-01T24:00:00", - }, - }, - { - "_index": "riya-pbench.v6.run-toc.2021-05", - "_type": "_doc", - "_id": "3bba25b62fhdgfajgsfdty6797ed06a", - "_score": 0.0, - "_source": { - "parent": "/1-default", - "directory": "/1-default/sample1", - "mtime": "2021-05-01T24:00:00", - "mode": "0o755", - "name": "sample1", - "ancestor_path_elements": ["1-default"], - "files": [ - { - "name": "result.txt", - "mtime": "2021-05-01T24:00:00", - "size": 0, - "mode": "0o644", - "type": "reg", - }, - { - "name": "user-benchmark.cmd", - "mtime": "2021-05-01T24:00:00", - "size": 114, - "mode": "0o755", - "type": "reg", - }, - ], - "run_data_parent": "ece030bdgfkjasdkf7435e6a7a6be804", - "authorization": {"owner": "1", "access": "private"}, - "@timestamp": "2021-05-01T24:00:00", - }, - }, - ], - }, - } - index = self.build_index_from_metadata() - - # get_expected_status() expects to read username and access from the - # JSON client payload, however this API acquires that information - # from the Dataset. Construct a fake payload corresponding to the - # attach_dataset fixture. - auth_json = {"user": "drb", "access": "private"} - expected_status = self.get_expected_status( - auth_json, build_auth_header["header_param"] - ) - - response = query_api( - self.pbench_endpoint, - self.elastic_endpoint, - payload=None, - expected_index=index, - expected_status=expected_status, - json=response_payload, - status=HTTPStatus.OK, - headers=build_auth_header["header"], - request_method=self.api_method, - ) - if expected_status == HTTPStatus.OK: - res_json = response.json - expected_result = { - "directories": [ - { - "name": "sample1", - "uri": "https://localhost/api/v1/datasets/random_md5_string1/contents/1-default/sample1", - } - ], - "files": [ - { - "name": "reference-result", - "mtime": "2021-05-01T24:00:00", - "size": 0, - "mode": "0o777", - "type": "sym", - "linkpath": "sample1", - "uri": "https://localhost/api/v1/datasets/random_md5_string1/inventory/1-default/reference-result", - } - ], - } - assert expected_result == res_json - - def test_subdirectory_query( - self, - server_config, - query_api, - pbench_drb_token, - build_auth_header, - find_template, - provide_metadata, - ): - """ - Check the API when only sub-directories are present in the - payload and NO files list. - """ - response_payload = { - "took": 7, - "timed_out": False, - "_shards": {"total": 3, "successful": 3, "skipped": 0, "failed": 0}, - "hits": { - "total": {"value": 2, "relation": "eq"}, - "max_score": 0.0, - "hits": [ - { - "_index": "riya-pbench.v6.run-toc.2021-05", - "_type": "_doc", - "_id": "d4a8cc7c4ecef7vshg4tjhrew174828d", - "_score": 0.0, - "_source": { - "parent": "/", - "directory": "/1-default", - "mtime": "2021-05-01T24:00:00", - "mode": "0o755", - "name": "1-default", - "run_data_parent": "ece030bdgfkjasdkf7435e6a7a6be804", - "authorization": {"owner": "1", "access": "private"}, - "@timestamp": "2021-05-01T24:00:00", - }, - }, - { - "_index": "riya-pbench.v6.run-toc.2021-05", - "_type": "_doc", - "_id": "3bba25b62fhdgfajgsfdty6797ed06a", - "_score": 0.0, - "_source": { - "parent": "/1-default", - "directory": "/1-default/sample1", - "mtime": "2021-05-01T24:00:00", - "mode": "0o755", - "name": "sample1", - "ancestor_path_elements": ["1-default"], - "files": [ - { - "name": "result.txt", - "mtime": "2021-05-01T24:00:00", - "size": 0, - "mode": "0o644", - "type": "reg", - }, - { - "name": "user-benchmark.cmd", - "mtime": "2021-05-01T24:00:00", - "size": 114, - "mode": "0o755", - "type": "reg", - }, - ], - "run_data_parent": "ece030bdgfkjasdkf7435e6a7a6be804", - "authorization": {"owner": "1", "access": "private"}, - "@timestamp": "2021-05-01T24:00:00", - }, - }, - ], - }, - } - index = self.build_index_from_metadata() - - # get_expected_status() expects to read username and access from the - # JSON client payload, however this API acquires that information - # from the Dataset. Construct a fake payload corresponding to the - # attach_dataset fixture. - auth_json = {"user": "drb", "access": "private"} - expected_status = self.get_expected_status( - auth_json, build_auth_header["header_param"] - ) - - response = query_api( - self.pbench_endpoint, - self.elastic_endpoint, - payload=None, - expected_index=index, - expected_status=expected_status, - json=response_payload, - status=HTTPStatus.OK, - headers=build_auth_header["header"], - request_method=self.api_method, - ) - if expected_status == HTTPStatus.OK: - res_json = response.json - expected_result = { - "directories": [ - { - "name": "sample1", - "uri": "https://localhost/api/v1/datasets/random_md5_string1/contents/1-default/sample1", - } - ], - "files": [], - } - assert expected_result == res_json - - def test_files_query( - self, - server_config, - query_api, - pbench_drb_token, - build_auth_header, - find_template, - provide_metadata, - ): - """ - Checks the API when only list of files are present in a directory. - """ - response_payload = { - "took": 7, - "timed_out": False, - "_shards": {"total": 3, "successful": 3, "skipped": 0, "failed": 0}, - "hits": { - "total": {"value": 1, "relation": "eq"}, - "max_score": 0.0, - "hits": [ - { - "_index": "riya-pbench.v6.run-toc.2021-05", - "_type": "_doc", - "_id": "9e95ccb385b7a7a2d70ededa07c391da", - "_score": 0.0, - "_source": { - "parent": "/", - "directory": "/1-default", - "mtime": "2021-05-01T24:00:00", - "mode": "0o755", - "files": [ - { - "name": "default.csv", - "mtime": "2021-05-01T24:00:00", - "size": 122, - "mode": "0o644", - "type": "reg", - } - ], - "run_data_parent": "ece030bdgfkjasdkf7435e6a7a6be804", - "authorization": {"owner": "1", "access": "private"}, - "@timestamp": "2021-05-01T24:00:00", - }, - } - ], - }, - } - index = self.build_index_from_metadata() - - # get_expected_status() expects to read username and access from the - # JSON client payload, however this API acquires that information - # from the Dataset. Construct a fake payload corresponding to the - # attach_dataset fixture. - auth_json = {"user": "drb", "access": "private"} - expected_status = self.get_expected_status( - auth_json, build_auth_header["header_param"] - ) - - response = query_api( - self.pbench_endpoint, - self.elastic_endpoint, - payload=None, - expected_index=index, - expected_status=expected_status, - json=response_payload, - status=HTTPStatus.OK, - headers=build_auth_header["header"], - request_method=self.api_method, - ) - if expected_status == HTTPStatus.OK: - res_json = response.json - expected_result = { - "directories": [], - "files": [ - { - "name": "default.csv", - "mtime": "2021-05-01T24:00:00", - "size": 122, - "mode": "0o644", - "type": "reg", - "uri": "https://localhost/api/v1/datasets/random_md5_string1/inventory/1-default/default.csv", - } - ], - } - assert expected_result == res_json - - def test_no_subdirectory_no_files_query( - self, - server_config, - query_api, - pbench_drb_token, - build_auth_header, - find_template, - provide_metadata, - ): - """ - Check the API when no subdirectory or files are present. - """ - response_payload = { - "took": 7, - "timed_out": False, - "_shards": {"total": 3, "successful": 3, "skipped": 0, "failed": 0}, - "hits": { - "total": {"value": 1, "relation": "eq"}, - "max_score": 0.0, - "hits": [ - { - "_index": "riya-pbench.v6.run-toc.2021-05", - "_type": "_doc", - "_id": "9e95ccb385b7a7a2d70ededa07c391da", - "_score": 0.0, - "_source": { - "parent": "/", - "directory": "/1-default", - "mtime": "2021-05-01T24:00:00", - "mode": "0o755", - "run_data_parent": "ece030bdgfkjasdkf7435e6a7a6be804", - "authorization": {"owner": "1", "access": "private"}, - "@timestamp": "2021-05-01T24:00:00", - }, - } - ], - }, - } - index = self.build_index_from_metadata() - - # get_expected_status() expects to read username and access from the - # JSON client payload, however this API acquires that information - # from the Dataset. Construct a fake payload corresponding to the - # attach_dataset fixture. - auth_json = {"user": "drb", "access": "private"} - expected_status = self.get_expected_status( - auth_json, build_auth_header["header_param"] - ) - - response = query_api( - self.pbench_endpoint, - self.elastic_endpoint, - payload=None, - expected_index=index, - expected_status=expected_status, - json=response_payload, - status=HTTPStatus.OK, - headers=build_auth_header["header"], - request_method=self.api_method, - ) - if expected_status == HTTPStatus.OK: - res_json = response.json - expected_result = {"directories": [], "files": []} - assert expected_result == res_json - - def test_empty_query( - self, - server_config, - query_api, - pbench_drb_token, - build_auth_header, - find_template, - provide_metadata, - ): - """ - Check the API when a directory is empty. - """ - response_payload = { - "took": 55, - "timed_out": False, - "_shards": {"total": 3, "successful": 3, "skipped": 0, "failed": 0}, - "hits": { - "total": {"value": 0, "relation": "eq"}, - "max_score": None, - "hits": [], - }, - } - index = self.build_index_from_metadata() - - # get_expected_status() expects to read username and access from the - # JSON client payload, however this API acquires that information - # from the Dataset. Construct a fake payload corresponding to the - # attach_dataset fixture. - auth_json = {"user": "drb", "access": "private"} - expected_status = self.get_expected_status( - auth_json, build_auth_header["header_param"] - ) - - response = query_api( - self.pbench_endpoint, - self.elastic_endpoint, - payload=None, - expected_index=index, - expected_status=expected_status - if expected_status != HTTPStatus.OK - else HTTPStatus.NOT_FOUND, - json=response_payload, - status=HTTPStatus.OK, - headers=build_auth_header["header"], - request_method=self.api_method, - ) - if expected_status == HTTPStatus.NOT_FOUND: - res_json = response.json - expected_result = { - "message": "No directory '/1-default' in 'drb' contents." - } - assert expected_result == res_json - - def test_get_index(self, attach_dataset, provide_metadata): - drb = Dataset.query(name="drb") - indices = self.cls_obj.get_index(drb, self.root_index) - assert indices == "unit-test.v6.run-toc.2020-05" - - @pytest.mark.parametrize("name", ("wrong", "")) - def test_missing_name(self, client, server_config, pbench_drb_token, name): - expected_status = HTTPStatus.NOT_FOUND - incorrect_endpoint = self.pbench_endpoint.rsplit("/", 1)[0] + "/" + name - response = client.get( - incorrect_endpoint, - headers={"Authorization": "Bearer " + pbench_drb_token}, - ) - assert response.status_code == expected_status diff --git a/lib/pbench/test/unit/server/test_cache_manager.py b/lib/pbench/test/unit/server/test_cache_manager.py index a9ff0b84bc..ec97462f28 100644 --- a/lib/pbench/test/unit/server/test_cache_manager.py +++ b/lib/pbench/test/unit/server/test_cache_manager.py @@ -436,7 +436,7 @@ def generate_test_result_tree(tmp_path: Path, dir_name: str) -> Path: Directory Structure /tmp/ - / + / subdir1/ subdir11/ subdir12/ @@ -453,7 +453,7 @@ def generate_test_result_tree(tmp_path: Path, dir_name: str) -> Path: f1415_sym -> ./f1412_sym f1416_sym -> ../../subdir12/f122_sym f11.txt - f12_sym -> ../../.. + f12_sym -> .. f1.json metadata.log @@ -461,7 +461,7 @@ def generate_test_result_tree(tmp_path: Path, dir_name: str) -> Path: Generated cache map { - 'dir_name': { + '': { 'details': , 'children': { 'f1.json': {'details': }, @@ -519,7 +519,7 @@ def generate_test_result_tree(tmp_path: Path, dir_name: str) -> Path: ) (sub_dir / "subdir1" / "subdir14" / "subdir141" / "f1411.txt").touch() sym_file = sub_dir / "subdir1" / "f12_sym" - os.symlink(Path("../../.."), sym_file) + os.symlink(Path(".."), sym_file) sym_file = sub_dir / "subdir1" / "subdir12" / "f121_sym" os.symlink(Path("../..") / "subdir1" / "subdir15", sym_file) sym_file = sub_dir / "subdir1" / "subdir12" / "f122_sym" @@ -551,6 +551,7 @@ def __init__(self, path: Path, resource_id: str, controller: Controller): self.lock = self.cache / "lock" self.last_ref = self.cache / "last_ref" self.unpacked = None + self.cachemap = None self.controller = controller def test_unpack_tar_subprocess_exception( @@ -647,10 +648,12 @@ def mock_resolve(_path, _strict=False): raise AssertionError("Unexpected call to Path.resolve()") with monkeypatch.context() as m: + m.setattr(Audit, "create", lambda **kwargs: None) m.setattr(Path, "mkdir", lambda path, parents=False, exist_ok=False: None) m.setattr(Path, "touch", lambda path, exist_ok=False: None) - m.setattr("pbench.server.cache_manager.subprocess.run", mock_run) m.setattr(Path, "resolve", mock_resolve) + m.setattr(Path, "iterdir", lambda path: []) + m.setattr("pbench.server.cache_manager.subprocess.run", mock_run) m.setattr(Tarball, "__init__", TestCacheManager.MockTarball.__init__) m.setattr(Controller, "__init__", TestCacheManager.MockController.__init__) tb = Tarball( @@ -675,50 +678,51 @@ def test_cache_map_success(self, make_logger, monkeypatch, tmp_path): tar_dir = TestCacheManager.MockController.generate_test_result_tree( tmp_path, "dir_name" ) - tb.cache_map(tar_dir) + tb.unpacked = tar_dir + tb.build_map() - sd1 = tb.cachemap["dir_name"]["children"]["subdir1"] + sd1 = tb.cachemap["children"]["subdir1"] assert sd1["details"].name == "subdir1" sd141 = sd1["children"]["subdir14"]["children"]["subdir141"] - assert sd141["children"]["f1412_sym"]["details"].type == CacheType.SYMLINK + assert sd141["children"]["f1412_sym"]["details"].type is CacheType.SYMLINK @pytest.mark.parametrize( "file_path, expected_msg", [ ( - "/dir_name/subdir1/f11.txt", - "The path '/dir_name/subdir1/f11.txt' is an absolute path, " + "/subdir1/f11.txt", + "The path '/subdir1/f11.txt' is an absolute path, " "we expect relative path to the root directory.", ), ( - "dir_name/subdir1/subdir11/../f11.txt", - "directory 'dir_name/subdir1/subdir11/../f11.txt' doesn't have a '..' file/directory.", + "subdir1/subdir11/../f11.txt", + "Can't resolve path 'subdir1/subdir11/../f11.txt': component '..' is missing.", ), ( - "dir_name/subdir1/subdir14/subdir1", - "directory 'dir_name/subdir1/subdir14/subdir1' doesn't have a 'subdir1' file/directory.", + "subdir1/subdir14/subdir1", + "Can't resolve path 'subdir1/subdir14/subdir1': component 'subdir1' is missing.", ), ( - "dir_name/ne_dir", - "directory 'dir_name/ne_dir' doesn't have a 'ne_dir' file/directory.", + "ne_dir", + "Can't resolve path 'ne_dir': component 'ne_dir' is missing.", ), ( - "dir_name/subdir1/ne_file", - "directory 'dir_name/subdir1/ne_file' doesn't have a 'ne_file' file/directory.", + "subdir1/ne_file", + "Can't resolve path 'subdir1/ne_file': component 'ne_file' is missing.", ), ( - "dir_name/ne_dir/ne_file", - "directory 'dir_name/ne_dir/ne_file' doesn't have a 'ne_dir' file/directory.", + "ne_dir/ne_file", + "Can't resolve path 'ne_dir/ne_file': component 'ne_dir' is missing.", ), ( - "dir_name/subdir1/f11.txt/ne_subdir", - "Found a file 'f11.txt' where a directory was expected in path 'dir_name/subdir1/f11.txt/ne_subdir'", + "subdir1/f11.txt/ne_subdir", + "Found a file 'f11.txt' where a directory was expected in path 'subdir1/f11.txt/ne_subdir'", ), ( - "dir_name/subdir1/subdir14/subdir141/f1412_sym/ne_file", + "subdir1/subdir14/subdir141/f1412_sym/ne_file", "Found a file 'f1412_sym' where a directory was expected " - "in path 'dir_name/subdir1/subdir14/subdir141/f1412_sym/ne_file'", + "in path 'subdir1/subdir14/subdir141/f1412_sym/ne_file'", ), ], ) @@ -738,7 +742,8 @@ def test_cache_map_bad_dir_path( tar_dir = TestCacheManager.MockController.generate_test_result_tree( tmp_path, "dir_name" ) - tb.cache_map(tar_dir) + tb.unpacked = tar_dir + tb.build_map() with pytest.raises(BadDirpath) as exc: tb.get_info(Path(file_path)) assert str(exc.value) == expected_msg @@ -746,10 +751,10 @@ def test_cache_map_bad_dir_path( @pytest.mark.parametrize( "file_path, location, name, resolve_path, resolve_type, size, file_type", [ - ("dir_name", "dir_name", "dir_name", None, None, None, CacheType.DIRECTORY), + ("", "", "", None, None, None, CacheType.DIRECTORY), ( - "dir_name/f1.json", - "dir_name/f1.json", + "f1.json", + "f1.json", "f1.json", None, None, @@ -757,8 +762,8 @@ def test_cache_map_bad_dir_path( CacheType.FILE, ), ( - "dir_name/subdir1", - "dir_name/subdir1", + "subdir1", + "subdir1", "subdir1", None, None, @@ -766,8 +771,8 @@ def test_cache_map_bad_dir_path( CacheType.DIRECTORY, ), ( - "dir_name/subdir1/./f11.txt", - "dir_name/subdir1/f11.txt", + "subdir1/./f11.txt", + "subdir1/f11.txt", "f11.txt", None, None, @@ -775,8 +780,8 @@ def test_cache_map_bad_dir_path( CacheType.FILE, ), ( - "dir_name/subdir1//f11.txt", - "dir_name/subdir1/f11.txt", + "subdir1//f11.txt", + "subdir1/f11.txt", "f11.txt", None, None, @@ -784,8 +789,8 @@ def test_cache_map_bad_dir_path( CacheType.FILE, ), ( - "dir_name/subdir1/f11.txt", - "dir_name/subdir1/f11.txt", + "subdir1/f11.txt", + "subdir1/f11.txt", "f11.txt", None, None, @@ -793,17 +798,17 @@ def test_cache_map_bad_dir_path( CacheType.FILE, ), ( - "dir_name/subdir1/f12_sym", - "dir_name/subdir1/f12_sym", + "subdir1/f12_sym", + "subdir1/f12_sym", "f12_sym", - Path("../../.."), - CacheType.OTHER, + Path("."), + CacheType.DIRECTORY, None, CacheType.SYMLINK, ), ( - "dir_name/subdir1/subdir12/f121_sym", - "dir_name/subdir1/subdir12/f121_sym", + "subdir1/subdir12/f121_sym", + "subdir1/subdir12/f121_sym", "f121_sym", Path("../../subdir1/subdir15"), CacheType.OTHER, @@ -811,8 +816,8 @@ def test_cache_map_bad_dir_path( CacheType.SYMLINK, ), ( - "dir_name/subdir1/subdir12/f122_sym", - "dir_name/subdir1/subdir12/f122_sym", + "subdir1/subdir12/f122_sym", + "subdir1/subdir12/f122_sym", "f122_sym", Path("bad_subdir/nonexistent_file.txt"), CacheType.OTHER, @@ -820,8 +825,8 @@ def test_cache_map_bad_dir_path( CacheType.SYMLINK, ), ( - "dir_name/subdir1/subdir13/f131_sym", - "dir_name/subdir1/subdir13/f131_sym", + "subdir1/subdir13/f131_sym", + "subdir1/subdir13/f131_sym", "f131_sym", Path("/etc/passwd"), CacheType.OTHER, @@ -829,8 +834,8 @@ def test_cache_map_bad_dir_path( CacheType.SYMLINK, ), ( - "dir_name/subdir1/subdir14", - "dir_name/subdir1/subdir14", + "subdir1/subdir14", + "subdir1/subdir14", "subdir14", None, None, @@ -838,8 +843,8 @@ def test_cache_map_bad_dir_path( CacheType.DIRECTORY, ), ( - "dir_name/subdir1/subdir14/subdir141/f1411.txt", - "dir_name/subdir1/subdir14/subdir141/f1411.txt", + "subdir1/subdir14/subdir141/f1411.txt", + "subdir1/subdir14/subdir141/f1411.txt", "f1411.txt", None, None, @@ -847,8 +852,8 @@ def test_cache_map_bad_dir_path( CacheType.FILE, ), ( - "dir_name/subdir1/subdir14/subdir141/f1412_sym", - "dir_name/subdir1/subdir14/subdir141/f1412_sym", + "subdir1/subdir14/subdir141/f1412_sym", + "subdir1/subdir14/subdir141/f1412_sym", "f1412_sym", Path("/mock_absolute_path/subdir1/f11.txt"), CacheType.OTHER, @@ -856,35 +861,35 @@ def test_cache_map_bad_dir_path( CacheType.SYMLINK, ), ( - "dir_name/subdir1/subdir14/subdir141/f1413_sym", - "dir_name/subdir1/subdir14/subdir141/f1413_sym", + "subdir1/subdir14/subdir141/f1413_sym", + "subdir1/subdir14/subdir141/f1413_sym", "f1413_sym", - Path("dir_name/subdir1/subdir14/subdir141"), + Path("subdir1/subdir14/subdir141"), CacheType.DIRECTORY, None, CacheType.SYMLINK, ), ( - "dir_name/subdir1/subdir14/subdir141/f1414_sym", - "dir_name/subdir1/subdir14/subdir141/f1414_sym", + "subdir1/subdir14/subdir141/f1414_sym", + "subdir1/subdir14/subdir141/f1414_sym", "f1414_sym", - Path("dir_name/subdir1/subdir14/subdir141/f1411.txt"), + Path("subdir1/subdir14/subdir141/f1411.txt"), CacheType.FILE, None, CacheType.SYMLINK, ), ( - "dir_name/subdir1/subdir14/subdir141/f1415_sym", - "dir_name/subdir1/subdir14/subdir141/f1415_sym", + "subdir1/subdir14/subdir141/f1415_sym", + "subdir1/subdir14/subdir141/f1415_sym", "f1415_sym", - Path("dir_name/subdir1/f11.txt"), + Path("subdir1/f11.txt"), CacheType.FILE, None, CacheType.SYMLINK, ), ( - "dir_name/subdir1/subdir14/subdir141/f1416_sym", - "dir_name/subdir1/subdir14/subdir141/f1416_sym", + "subdir1/subdir14/subdir141/f1416_sym", + "subdir1/subdir14/subdir141/f1416_sym", "f1416_sym", Path("../../subdir12/f122_sym"), CacheType.OTHER, @@ -919,7 +924,8 @@ def test_cache_map_traverse_cmap( tar_dir = TestCacheManager.MockController.generate_test_result_tree( tmp_path, "dir_name" ) - tb.cache_map(tar_dir) + tb.unpacked = tar_dir + tb.build_map() # Since the result tree is dynamically generated by the test at runtime, # the parametrization for resolve_path cannot provide the correct value @@ -930,21 +936,30 @@ def test_cache_map_traverse_cmap( resolve_path = tar_dir / str(resolve_path).removeprefix(abs_pref) # test traverse with random path - c_map = Tarball.traverse_cmap(Path(file_path), tb.cachemap) + c_map = tb.traverse_cmap(Path(file_path)) + if file_type is CacheType.DIRECTORY: + assert sorted(c_map.keys()) == [ + "children", + "details", + ], "Directory should have children and details" + else: + assert sorted(c_map.keys()) == [ + "details" + ], "Non-directory should have only details" assert c_map["details"].location == Path(location) assert c_map["details"].name == name assert c_map["details"].resolve_path == resolve_path assert c_map["details"].resolve_type == resolve_type assert c_map["details"].size == size - assert c_map["details"].type == file_type + assert c_map["details"].type is file_type @pytest.mark.parametrize( "file_path, expected_msg", [ ( - "dir_name/subdir1/f11.txt", + "subdir1/f11.txt", { - "location": Path("dir_name/subdir1/f11.txt"), + "location": Path("subdir1/f11.txt"), "name": "f11.txt", "resolve_path": None, "resolve_type": None, @@ -953,11 +968,17 @@ def test_cache_map_traverse_cmap( }, ), ( - "dir_name/subdir1", + "subdir1", { - "directories": ["subdir11", "subdir12", "subdir13", "subdir14"], - "files": ["f11.txt"], - "location": Path("dir_name/subdir1"), + "children": [ + "f11.txt", + "f12_sym", + "subdir11", + "subdir12", + "subdir13", + "subdir14", + ], + "location": Path("subdir1"), "name": "subdir1", "resolve_path": None, "resolve_type": None, @@ -966,11 +987,10 @@ def test_cache_map_traverse_cmap( }, ), ( - "dir_name/subdir1/subdir11", + "subdir1/subdir11", { - "directories": [], - "files": [], - "location": Path("dir_name/subdir1/subdir11"), + "children": [], + "location": Path("subdir1/subdir11"), "name": "subdir11", "resolve_path": None, "resolve_type": None, @@ -979,11 +999,11 @@ def test_cache_map_traverse_cmap( }, ), ( - "dir_name/subdir1/subdir14/subdir141/f1413_sym", + "subdir1/subdir14/subdir141/f1413_sym", { - "location": Path("dir_name/subdir1/subdir14/subdir141/f1413_sym"), + "location": Path("subdir1/subdir14/subdir141/f1413_sym"), "name": "f1413_sym", - "resolve_path": Path("dir_name/subdir1/subdir14/subdir141"), + "resolve_path": Path("subdir1/subdir14/subdir141"), "resolve_type": CacheType.DIRECTORY, "size": None, "type": CacheType.SYMLINK, @@ -1007,11 +1027,16 @@ def test_cache_map_get_info_cmap( tar_dir = TestCacheManager.MockController.generate_test_result_tree( tmp_path, "dir_name" ) - tb.cache_map(tar_dir) + tb.unpacked = tar_dir + tb.build_map() # test get_info with random path file_info = tb.get_info(Path(file_path)) - assert file_info == expected_msg + for k in expected_msg.keys(): + if k == "children": + assert sorted(file_info[k].keys()) == expected_msg[k] + else: + assert getattr(file_info["details"], k) == expected_msg[k] @pytest.mark.parametrize( "file_path,is_unpacked,exp_stream", diff --git a/lib/pbench/test/unit/server/test_datasets_contents.py b/lib/pbench/test/unit/server/test_datasets_contents.py new file mode 100644 index 0000000000..49bcb4f6c9 --- /dev/null +++ b/lib/pbench/test/unit/server/test_datasets_contents.py @@ -0,0 +1,152 @@ +from http import HTTPStatus +from pathlib import Path +from typing import Optional + +import pytest +import requests + +from pbench.server.cache_manager import BadDirpath, CacheManager, CacheObject, CacheType +from pbench.server.database.models.datasets import Dataset, DatasetNotFound + + +class TestDatasetsAccess: + @pytest.fixture() + def query_get_as(self, client, server_config, more_datasets, pbench_drb_token): + """ + Helper fixture to perform the API query and validate an expected + return status. + + Args: + client: Flask test API client fixture + server_config: Pbench config fixture + more_datasets: Dataset construction fixture + pbench_drb_token: Authenticated user token fixture + """ + + def query_api( + dataset: str, target: str, expected_status: HTTPStatus + ) -> requests.Response: + try: + dataset_id = Dataset.query(name=dataset).resource_id + except DatasetNotFound: + dataset_id = dataset # Allow passing deliberately bad value + headers = {"authorization": f"bearer {pbench_drb_token}"} + response = client.get( + f"{server_config.rest_uri}/datasets/{dataset_id}/contents/{target}", + headers=headers, + ) + assert ( + response.status_code == expected_status + ), f"Unexpected failure '{response.json}'" + return response + + return query_api + + def test_get_no_dataset(self, query_get_as): + response = query_get_as( + "nonexistent-dataset", "metadata.log", HTTPStatus.NOT_FOUND + ) + assert response.json == {"message": "Dataset 'nonexistent-dataset' not found"} + + def test_dataset_not_present(self, query_get_as): + response = query_get_as("fio_2", "metadata.log", HTTPStatus.NOT_FOUND) + assert response.json == { + "message": "The dataset tarball named 'random_md5_string4' is not found" + } + + def test_unauthorized_access(self, query_get_as): + response = query_get_as("test", "metadata.log", HTTPStatus.FORBIDDEN) + assert response.json == { + "message": "User drb is not authorized to READ a resource owned by test with private access" + } + + @pytest.mark.parametrize("key", (None, "", "subdir1")) + def test_path_is_directory(self, query_get_as, monkeypatch, key): + base = Path("/mock/cache/ABC") + + def mock_get_info(_s, _d: str, path: Optional[Path]): + file = base / (path if path else "") + return { + "children": {}, + "details": CacheObject( + file.name, file, None, None, None, CacheType.DIRECTORY + ), + } + + monkeypatch.setattr(CacheManager, "get_info", mock_get_info) + monkeypatch.setattr(Path, "is_file", lambda self: False) + monkeypatch.setattr(Path, "exists", lambda self: True) + + response = query_get_as("fio_2", key if key else "", HTTPStatus.OK) + assert response.json == { + "directories": [], + "files": [], + "name": key if key else base.name, + "type": "DIRECTORY", + } + + def test_not_a_file(self, query_get_as, monkeypatch): + def mock_get_info(_s, _d: str, path: Optional[Path]): + raise BadDirpath("Nobody home") + + monkeypatch.setattr(CacheManager, "get_info", mock_get_info) + monkeypatch.setattr(Path, "is_file", lambda self: False) + monkeypatch.setattr(Path, "exists", lambda self: False) + + response = query_get_as("fio_2", "subdir1/f1_sym", HTTPStatus.NOT_FOUND) + assert response.json == {"message": "Nobody home"} + + def test_file_info(self, query_get_as, monkeypatch): + name = "f1.json" + base = Path("/mock/cache/ABC") + + def mock_get_info(_s, _d: str, path: Optional[Path]): + file = base / (path if path else "") + return { + "details": CacheObject(file.name, file, None, None, 16, CacheType.FILE) + } + + monkeypatch.setattr(CacheManager, "get_info", mock_get_info) + response = query_get_as("fio_2", name, HTTPStatus.OK) + assert response.status_code == HTTPStatus.OK + assert response.json == {"name": name, "size": 16, "type": "FILE"} + + def test_dir_info(self, query_get_as, monkeypatch): + name = "sample1" + base = Path("/mock/cache/ABC") + + def mock_get_info(_s, _d: str, path: Optional[Path]): + file = base / (path if path else "") + return { + "children": { + "default": { + "details": CacheObject( + "default", + base / name / "default", + None, + None, + None, + CacheType.DIRECTORY, + ) + } + }, + "details": CacheObject( + file.name, file, None, None, 16, CacheType.DIRECTORY + ), + } + + monkeypatch.setattr(CacheManager, "get_info", mock_get_info) + response = query_get_as("fio_2", "sample1", HTTPStatus.OK) + assert response.status_code == HTTPStatus.OK + assert response.json == { + "directories": [ + { + "name": "default", + "type": "DIRECTORY", + "uri": "https://localhost/api/v1/datasets/random_md5_string4/contents/sample1/default", + } + ], + "files": [], + "name": "sample1", + "type": "DIRECTORY", + }