Skip to content

Commit

Permalink
Convert TOC to use cache map (#3555)
Browse files Browse the repository at this point in the history
PBENCH-1192

This has been on my wishlist for a while, but was blocked by not actually having a usable cache. PR #3550 introduces a functioning (if minimal) cache manager, and this PR layers on top of that. The immediate motivation stems from an email exchange regarding Crucible, and the fact that Andrew would like (not surprisingly) to be able to access the contents of an archived tarball. Having TOC code relying on the Pbench-specific run-toc Elasticsearch index is not sustainable.

Note that, despite #3550 introducing a live cache, this PR represents the first actual use of the in-memory cache map, and some adjustments were necessary to make it work outside of the unit test environment.
  • Loading branch information
dbutenhof authored Oct 16, 2023
1 parent 47eddf4 commit f9c43b5
Show file tree
Hide file tree
Showing 10 changed files with 721 additions and 1,072 deletions.
4 changes: 1 addition & 3 deletions lib/pbench/server/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,13 @@
from pbench.server import PbenchServerConfig
from pbench.server.api.resources.api_key import APIKeyManage
from pbench.server.api.resources.datasets_compare import DatasetsCompare
from pbench.server.api.resources.datasets_contents import DatasetsContents
from pbench.server.api.resources.datasets_inventory import DatasetsInventory
from pbench.server.api.resources.datasets_list import DatasetsList
from pbench.server.api.resources.datasets_metadata import DatasetsMetadata
from pbench.server.api.resources.datasets_visualize import DatasetsVisualize
from pbench.server.api.resources.endpoint_configure import EndpointConfig
from pbench.server.api.resources.query_apis.dataset import Datasets
from pbench.server.api.resources.query_apis.datasets.datasets_contents import (
DatasetsContents,
)
from pbench.server.api.resources.query_apis.datasets.datasets_detail import (
DatasetsDetail,
)
Expand Down
166 changes: 166 additions & 0 deletions lib/pbench/server/api/resources/datasets_contents.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
from http import HTTPStatus
from pathlib import Path

from flask import current_app, jsonify
from flask.wrappers import Request, Response

from pbench.server import OperationCode, PbenchServerConfig
from pbench.server.api.resources import (
APIAbort,
ApiAuthorizationType,
ApiBase,
ApiContext,
APIInternalError,
ApiMethod,
ApiParams,
ApiSchema,
Parameter,
ParamType,
Schema,
)
from pbench.server.cache_manager import (
BadDirpath,
CacheExtractBadPath,
CacheManager,
CacheObject,
CacheType,
TarballNotFound,
)
from pbench.server.database.models.datasets import Dataset


class DatasetsContents(ApiBase):
"""
API class to retrieve inventory files from a dataset
"""

def __init__(self, config: PbenchServerConfig):
super().__init__(
config,
ApiSchema(
ApiMethod.GET,
OperationCode.READ,
uri_schema=Schema(
Parameter("dataset", ParamType.DATASET, required=True),
Parameter("target", ParamType.STRING, required=False),
),
authorization=ApiAuthorizationType.DATASET,
),
)

def _get(
self, params: ApiParams, request: Request, context: ApiContext
) -> Response:
"""
Returns metadata about the target file path within a tarball.
Args:
params: includes the uri parameters, which provide the dataset and target.
request: Original incoming Request object
context: API context dictionary
Raises:
APIAbort, reporting either "NOT_FOUND" or "UNSUPPORTED_MEDIA_TYPE"
GET /api/v1/datasets/{dataset}/contents/{target}
"""

dataset: Dataset = params.uri["dataset"]
target = params.uri.get("target")
path = Path("." if target in ("/", None) else target)

cache_m = CacheManager(self.config, current_app.logger)
try:
info = cache_m.find_entry(dataset.resource_id, path)
except (BadDirpath, CacheExtractBadPath, TarballNotFound) as e:
raise APIAbort(HTTPStatus.NOT_FOUND, str(e))
except Exception as e:
raise APIInternalError(f"Cache find error: {str(e)!r}")

prefix = current_app.server_config.rest_uri
origin = (
f"{self._get_uri_base(request).host}{prefix}/datasets/{dataset.resource_id}"
)

details: CacheObject = info["details"]
if details.type is CacheType.DIRECTORY:
children = info["children"] if "children" in info else {}
dir_list = []
file_list = []

for c, value in children.items():
d: CacheObject = value["details"]
if d.type is CacheType.DIRECTORY:
dir_list.append(
{
"name": c,
"type": d.type.name,
"uri": f"{origin}/contents/{d.location}",
}
)
elif d.type is CacheType.SYMLINK:
if d.resolve_type is CacheType.DIRECTORY:
uri = f"{origin}/contents/{d.resolve_path}"
elif d.resolve_type is CacheType.FILE:
uri = f"{origin}/inventory/{d.resolve_path}"
else:
uri = f"{origin}/inventory/{d.location}"
file_list.append(
{
"name": c,
"type": d.type.name,
"link": str(d.resolve_path),
"link_type": d.resolve_type.name,
"uri": uri,
}
)
else:
r = {
"name": c,
"type": d.type.name,
"uri": f"{origin}/inventory/{d.location}",
}
if d.type is CacheType.FILE:
r["size"] = d.size
file_list.append(r)

dir_list.sort(key=lambda d: d["name"])
file_list.sort(key=lambda d: d["name"])

# Normalize because we want the "root" directory to be reported as
# "" rather than as Path's favored "."
loc = str(details.location)
name = details.name
if loc == ".":
loc = ""
name = ""
val = {
"name": name,
"type": details.type.name,
"directories": dir_list,
"files": file_list,
"uri": f"{origin}/contents/{loc}",
}
else:
access = "inventory"
link = str(details.location)
if details.type is CacheType.SYMLINK:
if details.resolve_type is CacheType.DIRECTORY:
access = "contents"
if details.resolve_type in (CacheType.FILE, CacheType.DIRECTORY):
link = str(details.resolve_path)
val = {
"name": details.name,
"type": details.type.name,
"uri": f"{origin}/{access}/{link}",
}
if details.type is CacheType.SYMLINK:
val["link"] = link
val["link_type"] = details.resolve_type.name
elif details.type is CacheType.FILE:
val["size"] = details.size

try:
return jsonify(val)
except Exception as e:
raise APIInternalError(f"JSONIFY {val}: {str(e)!r}")
3 changes: 1 addition & 2 deletions lib/pbench/server/api/resources/datasets_inventory.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
from http import HTTPStatus
from pathlib import Path
from urllib.request import Request

from flask import current_app, send_file
from flask.wrappers import Response
from flask.wrappers import Request, Response

from pbench.server import OperationCode, PbenchServerConfig
from pbench.server.api.resources import (
Expand Down
Loading

0 comments on commit f9c43b5

Please sign in to comment.