diff --git a/lib/pbench/server/api/resources/__init__.py b/lib/pbench/server/api/resources/__init__.py index d74eb095d4..131e38de1f 100644 --- a/lib/pbench/server/api/resources/__init__.py +++ b/lib/pbench/server/api/resources/__init__.py @@ -29,9 +29,11 @@ MetadataBadKey, MetadataError, OperationName, + OperationState, ) from pbench.server.database.models.server_settings import ServerSetting from pbench.server.database.models.users import User +from pbench.server.sync import Sync class APIAbort(Exception): @@ -1419,6 +1421,51 @@ class UriBase: host_value: str +@dataclass +class AuditContext: + """Manage API audit context""" + + audit: Optional[Audit] = None + finalize: bool = True + status: AuditStatus = AuditStatus.SUCCESS + reason: Optional[AuditReason] = None + attributes: Optional[JSONOBJECT] = None + + def add_attribute(self, key: str, value: Any): + """Add a single audit attribute + + Args: + key: key name + value: key value + """ + if self.attributes is None: + self.attributes = {key: value} + else: + self.attributes[key] = value + + def add_attributes(self, attr: JSONOBJECT): + """Add multiple audit attributes as a JSON dict + + Args: + attr: a JSON dict + """ + if self.attributes is None: + self.attributes = attr + else: + self.attributes.update(attr) + + def set_error(self, error: str, reason: Optional[AuditReason] = None): + """Set an audit error + + Args: + error: error string + reason: audit failure reason + """ + self.add_attribute("error", error) + self.status = AuditStatus.FAILURE + self.reason = reason + + class ApiBase(Resource): """A base class for Pbench queries that provides common parameter handling behavior for specialized subclasses. @@ -2031,43 +2078,29 @@ def _dispatch( # wants to emit a special audit sequence it can disable "finalize" # in the context. It can also pass "attributes" by setting that # field. - auditing = { - "audit": audit, - "finalize": bool(audit), - "status": AuditStatus.SUCCESS, - "reason": None, - "attributes": None, - } - + auditing = AuditContext(audit=audit) context = { "auditing": auditing, "attributes": schema.attributes, "raw_params": raw_params, + "sync": None, } response = None + sync_message = None try: response = execute(params, request, context) except APIInternalError as e: current_app.logger.exception("{} {}", api_name, e.details) + auditing.set_error(str(e), AuditReason.INTERNAL) + sync_message = str(e) abort(e.http_status, message=str(e)) except APIAbort as e: current_app.logger.warning( "{} client error {}: '{}'", api_name, e.http_status, e ) - if auditing["finalize"]: - attr = auditing.get("attributes", {"message": str(e)}) - try: - Audit.create( - root=auditing["audit"], - status=AuditStatus.FAILURE, - reason=auditing["reason"], - attributes=attr, - ) - except Exception: - current_app.logger.error( - "Unexpected exception on audit: {}", auditing - ) + auditing.set_error(str(e)) + sync_message = str(e) abort(e.http_status, message=str(e), **e.kwargs) except Exception as e: x = APIInternalError("Unexpected exception") @@ -2075,23 +2108,24 @@ def _dispatch( current_app.logger.exception( "Exception {} API error: {}: {!r}", api_name, x, auditing ) - if auditing["finalize"]: - attr = auditing.get("attributes", {}) - attr["message"] = str(e) + auditing.set_error(str(e), AuditReason.INTERNAL) + sync_message = str(e) + abort(x.http_status, message=x.message) + finally: + # If the operation created a Sync object, it will have been updated + # and removed unless the operation failed. This means we're here + # because of an exception, and one of the handlers has set an + # appropriate message to record in the operations table. + sync: Optional[Sync] = context.get("sync") + if sync: + sync.update(dataset, OperationState.FAILED, message=sync_message) + if auditing.audit and auditing.finalize: Audit.create( - root=auditing["audit"], - status=AuditStatus.FAILURE, - reason=AuditReason.INTERNAL, - attributes=attr, + root=auditing.audit, + status=auditing.status, + reason=auditing.reason, + attributes=auditing.attributes, ) - abort(x.http_status, message=x.message) - if auditing["finalize"]: - Audit.create( - root=auditing["audit"], - status=auditing["status"], - reason=auditing["reason"], - attributes=auditing["attributes"], - ) return response def _get(self, args: ApiParams, req: Request, context: ApiContext) -> Response: diff --git a/lib/pbench/server/api/resources/api_key.py b/lib/pbench/server/api/resources/api_key.py index 93a86853a9..922353785d 100644 --- a/lib/pbench/server/api/resources/api_key.py +++ b/lib/pbench/server/api/resources/api_key.py @@ -129,8 +129,9 @@ def _post(self, params: ApiParams, req: Request, context: ApiContext) -> Respons status = HTTPStatus.OK except Exception as e: raise APIInternalError(str(e)) from e - context["auditing"]["attributes"] = key.as_json() - response = jsonify(key.as_json()) + result = key.as_json() + context["auditing"].add_attributes(result) + response = jsonify(result) response.status_code = status return response @@ -162,7 +163,7 @@ def _delete(self, params: ApiParams, req: Request, context: ApiContext) -> Respo raise APIAbort(HTTPStatus.NOT_FOUND, "Requested key not found") key = keys[0] try: - context["auditing"]["attributes"] = key.as_json() + context["auditing"].add_attributes(key.as_json()) key.delete() return "deleted", HTTPStatus.OK except Exception as e: diff --git a/lib/pbench/server/api/resources/datasets_metadata.py b/lib/pbench/server/api/resources/datasets_metadata.py index 7403dbf938..7b395afd3c 100644 --- a/lib/pbench/server/api/resources/datasets_metadata.py +++ b/lib/pbench/server/api/resources/datasets_metadata.py @@ -131,7 +131,7 @@ def _put(self, params: ApiParams, req: Request, context: ApiContext) -> Response dataset = params.uri["dataset"] metadata = params.body["metadata"] - context["auditing"]["attributes"] = {"updated": metadata} + context["auditing"].add_attribute("updated", metadata) # Validate the authenticated user's authorization for the combination # of "owner" and "access". diff --git a/lib/pbench/server/api/resources/query_apis/datasets/__init__.py b/lib/pbench/server/api/resources/query_apis/datasets/__init__.py index 1595d41362..c1b57f9a5a 100644 --- a/lib/pbench/server/api/resources/query_apis/datasets/__init__.py +++ b/lib/pbench/server/api/resources/query_apis/datasets/__init__.py @@ -11,7 +11,7 @@ ParamType, ) from pbench.server.api.resources.query_apis import ElasticBase -from pbench.server.database.models.datasets import Dataset, Metadata +from pbench.server.database.models.datasets import Dataset from pbench.server.database.models.index_map import IndexMap from pbench.server.database.models.templates import Template @@ -95,40 +95,36 @@ def get_index( ) -> str: """Retrieve ES indices based on a given root_index_name. - Datasets marked "archiveonly" aren't indexed, and can't be referenced - in most APIs that rely on Elasticsearch. Instead, we'll raise a - CONFLICT error. + Datasets without an index can't be referenced in most APIs that rely on + Elasticsearch. Instead, we'll raise a NOT_FOUND error. However, the + /api/v1/datasets API will specify ok_no_index as they need to operate + on the dataset regardless of whether indexing is enabled. + + All indices are returned if root_index_name is omitted. Args: dataset: dataset object root_index_name: A root index name like "run-data" - ok_no_index: Don't fail on an archiveonly dataset + ok_no_index: Don't fail if dataset has no indices Raises: - APIAbort(CONFLICT) if indexing was disabled on the target dataset. - APIAbort(NOT_FOUND) if the dataset has no matching index data + APIAbort(NOT_FOUND) if index is required and the dataset has none Returns: A string that joins all selected indices with ",", suitable for use in an Elasticsearch query URI. """ - archive_only = Metadata.getvalue(dataset, Metadata.SERVER_ARCHIVE) - if archive_only: - if ok_no_index: - return "" - raise APIAbort(HTTPStatus.CONFLICT, "Dataset indexing was disabled") - - index_keys = list(IndexMap.indices(dataset, root_index_name)) + index_keys = IndexMap.indices(dataset, root_index_name) + if index_keys: + return ",".join(index_keys) + if ok_no_index: + return "" - if not index_keys: - raise APIAbort( - HTTPStatus.NOT_FOUND, - f"Dataset has no {root_index_name if root_index_name else 'indexed'!r} data", - ) - - indices = ",".join(index_keys) - return indices + raise APIAbort( + HTTPStatus.NOT_FOUND, + f"Dataset has no {root_index_name if root_index_name else 'indexed'!r} data", + ) def get_aggregatable_fields( self, mappings: JSON, prefix: AnyStr = "", result: Union[List, None] = None diff --git a/lib/pbench/server/api/resources/query_apis/datasets/datasets.py b/lib/pbench/server/api/resources/query_apis/datasets/datasets.py index 825ce6a49f..706dafbbcf 100644 --- a/lib/pbench/server/api/resources/query_apis/datasets/datasets.py +++ b/lib/pbench/server/api/resources/query_apis/datasets/datasets.py @@ -12,6 +12,7 @@ ApiMethod, ApiParams, ApiSchema, + AuditContext, MissingParameters, Parameter, ParamType, @@ -121,9 +122,9 @@ def assemble(self, params: ApiParams, context: ApiContext) -> JSONOBJECT: """ dataset = context["dataset"] + auditing: AuditContext = context["auditing"] action = context["attributes"].action context["action"] = action - audit_attributes = {} access = None owner = None elastic_options = {"ignore_unavailable": "true"} @@ -152,7 +153,6 @@ def assemble(self, params: ApiParams, context: ApiContext) -> JSONOBJECT: f"can't set {OperationState.WORKING.name} on {dataset.name}: {str(e)!r} " ) context["sync"] = sync - context["auditing"]["attributes"] = audit_attributes if action == "update": access = params.query.get("access") owner = params.query.get("owner") @@ -168,12 +168,12 @@ def assemble(self, params: ApiParams, context: ApiContext) -> JSONOBJECT: ) if access: - audit_attributes["access"] = access + auditing.add_attribute("access", access) context["access"] = access else: access = dataset.access if owner: - audit_attributes["owner"] = owner + auditing.add_attribute("owner", owner) context["owner"] = owner else: owner = dataset.owner_id @@ -187,7 +187,7 @@ def assemble(self, params: ApiParams, context: ApiContext) -> JSONOBJECT: # # It's important that all context fields required for postprocessing # of unindexed datasets have been set before this! - indices = self.get_index(dataset, ok_no_index=(action != "get")) + indices = self.get_index(dataset, ok_no_index=True) context["indices"] = indices if not indices: return {} @@ -234,7 +234,7 @@ def postprocess(self, es_json: JSONOBJECT, context: ApiContext) -> Response: and some other data. We mostly want to determine whether it was 100% successful (before updating or deleting the dataset), but we also summarize the results for the client. - * For get, we directly return the "hit list". + * For get, we return a count of documents for each index name. Args: es_json: the Elasticsearch response document @@ -245,25 +245,21 @@ def postprocess(self, es_json: JSONOBJECT, context: ApiContext) -> Response: """ action = context["action"] dataset = context["dataset"] + auditing: AuditContext = context["auditing"] current_app.logger.info("POSTPROCESS {}: {}", dataset.name, es_json) failures = 0 if action == "get": - count = None hits = [] - try: - count = es_json["hits"]["total"]["value"] - hits = es_json["hits"]["hits"] - if int(count) == 0: + if es_json: + try: + hits = es_json["hits"]["hits"] + except KeyError as e: raise APIInternalError( - f"Elasticsearch returned no matches for {dataset.name}" + f"Can't find search service match data for {dataset.name} ({e}) in {es_json!r}", ) - except KeyError as e: - raise APIInternalError( - f"Can't find Elasticsearch match data for {dataset.name} ({e}) in {es_json!r}", - ) - except ValueError as e: + if not isinstance(hits, list): raise APIInternalError( - f"Elasticsearch bad hit count {count!r} for {dataset.name}: {e}", + f"search service did not return hits list ({type(hits).__name__})" ) results = defaultdict(int) for hit in hits: @@ -283,7 +279,7 @@ def postprocess(self, es_json: JSONOBJECT, context: ApiContext) -> Response: "version_conflicts": 0, "failures": 0, } - context["auditing"]["attributes"]["results"] = results + auditing.add_attribute("results", results) if failures == 0: if action == "update": @@ -318,15 +314,19 @@ def postprocess(self, es_json: JSONOBJECT, context: ApiContext) -> Response: # it's still set then our `sync` local is valid and we want to get # it out of "WORKING" state. sync = context.get("sync") - auditing = context["auditing"] if sync: - state = OperationState.OK if not failures else OperationState.FAILED + if failures: + state = OperationState.FAILED + message = f"Unable to {action} some indexed documents" + else: + state = OperationState.OK + message = None try: - sync.update(dataset=dataset, state=state) + sync.update(dataset=dataset, state=state, message=message) + del context["sync"] except Exception as e: - auditing["attributes"] = {"message": str(e)} - auditing["status"] = AuditStatus.WARNING - auditing["reason"] = AuditReason.INTERNAL + auditing.set_error(str(e), reason=AuditReason.INTERNAL) + auditing.status = AuditStatus.WARNING raise APIInternalError( f"Unexpected sync error {dataset.name} {str(e)!r}" ) from e @@ -336,19 +336,15 @@ def postprocess(self, es_json: JSONOBJECT, context: ApiContext) -> Response: # experienced total failure. Either way, the operation can be retried # if some documents failed to update. if results["failures"] and results["failures"] == results["total"]: - auditing["status"] = AuditStatus.WARNING - auditing["reason"] = AuditReason.INTERNAL - auditing["attributes"][ - "message" - ] = f"Unable to {action} some indexed documents" + auditing.status = AuditStatus.WARNING + auditing.reason = AuditReason.INTERNAL + auditing.add_attribute( + "message", f"Unable to {action} some indexed documents" + ) raise APIInternalError( f"Failed to {action} any of {results['total']} " f"Elasticsearch documents: {es_json}" ) - elif sync: - sync.error( - dataset=dataset, - message=f"Unable to {action} some indexed documents", - ) + # construct response object return jsonify(results) diff --git a/lib/pbench/server/api/resources/server_settings.py b/lib/pbench/server/api/resources/server_settings.py index a5ea1cae90..9881c7242c 100644 --- a/lib/pbench/server/api/resources/server_settings.py +++ b/lib/pbench/server/api/resources/server_settings.py @@ -153,7 +153,7 @@ def _put_key(self, params: ApiParams, context: ApiContext) -> Response: f"No value found for server settings key {key!r}", ) - context["auditing"]["attributes"] = {"updated": {key: value}} + context["auditing"].add_attribute("updated", {key: value}) try: ServerSetting.set(key=key, value=value) @@ -186,7 +186,7 @@ def _put_body(self, params: ApiParams, context: ApiContext) -> Response: f"Unrecognized server settings {sorted(badkeys)!r} specified: valid settings are {sorted(ServerSetting.KEYS)!r}", ) - context["auditing"]["attributes"] = {"updated": params.body} + context["auditing"].add_attribute("updated", params.body) failures = [] response = {} diff --git a/lib/pbench/server/database/models/index_map.py b/lib/pbench/server/database/models/index_map.py index b9b188db49..f6c396daeb 100644 --- a/lib/pbench/server/database/models/index_map.py +++ b/lib/pbench/server/database/models/index_map.py @@ -1,4 +1,4 @@ -from typing import Iterator, NewType, Optional +from typing import NewType, Optional from sqlalchemy import Column, ForeignKey, Integer, String from sqlalchemy.exc import SQLAlchemyError @@ -186,7 +186,7 @@ def merge(cls, dataset: Dataset, merge_map: IndexMapType): cls.commit(dataset, "merge") @staticmethod - def indices(dataset: Dataset, root: Optional[str] = None) -> Iterator[str]: + def indices(dataset: Dataset, root: Optional[str] = None) -> list[str]: """Return the indices matching the specified root index name. Args: @@ -207,7 +207,7 @@ def indices(dataset: Dataset, root: Optional[str] = None) -> Iterator[str]: except SQLAlchemyError as e: raise IndexMapSqlError(e, operation="indices", dataset=dataset, name=root) - return (i.index for i in map) + return [str(i.index) for i in map] @staticmethod def exists(dataset: Dataset) -> bool: diff --git a/lib/pbench/test/functional/server/test_datasets.py b/lib/pbench/test/functional/server/test_datasets.py index acaffb144d..7112bce925 100644 --- a/lib/pbench/test/functional/server/test_datasets.py +++ b/lib/pbench/test/functional/server/test_datasets.py @@ -462,16 +462,23 @@ class TestIndexing: def test_details(self, server_client: PbenchServerClient, login_user): """Check access to indexed data - Perform a GET /datasets/details/{id} to be sure that basic run data - has been indexed and is available. + Perform a GET /datasets/details/{id} to confirm whether indexed run + data is available. """ print(" ... checking dataset RUN index ...") datasets = server_client.get_list( - metadata=["dataset.metalog.pbench,server.archiveonly"], owner="tester" + metadata=["dataset.metalog.pbench"], owner="tester" ) for d in datasets: - print(f"\t... checking run index for {d.name}") - indexed = not d.metadata["server.archiveonly"] + + # Query the dataset's indices: if the returned object is empty, + # there are none. + indices = server_client.get(API.DATASETS, {"dataset": d.resource_id}) + indexed = bool(indices.json()) + print( + f"\t... checking run details for {d.name} " + f"({'' if indexed else 'not '}indexed)" + ) response = server_client.get( API.DATASETS_DETAIL, {"dataset": d.resource_id}, raise_error=False ) @@ -486,9 +493,9 @@ def test_details(self, server_client: PbenchServerClient, login_user): ) else: assert ( - response.status_code == HTTPStatus.CONFLICT + response.status_code == HTTPStatus.NOT_FOUND ), f"Unexpected {response.json()['message']}" - print(f"\t\t... {d.name} is archiveonly") + assert response.json()["message"] == "Dataset has no 'run-data' data" class TestList: diff --git a/lib/pbench/test/unit/server/query_apis/conftest.py b/lib/pbench/test/unit/server/query_apis/conftest.py index d3aec57cad..2b5b8a457d 100644 --- a/lib/pbench/test/unit/server/query_apis/conftest.py +++ b/lib/pbench/test/unit/server/query_apis/conftest.py @@ -33,7 +33,7 @@ def query_api( expected_index: str, expected_status: str, headers: Optional[dict] = None, - request_method=ApiMethod.POST, + request_method: ApiMethod = ApiMethod.POST, query_params: Optional[JSONOBJECT] = None, expect_call: Optional[bool] = None, **kwargs, @@ -64,7 +64,8 @@ def query_api( The Pbench API response object """ base_uri = server_config.get("Indexing", "uri") - es_url = f"{base_uri}{expected_index}{es_uri}" + idx = expected_index if expected_index is not None else "/" + es_url = f"{base_uri}{idx}{es_uri}" client_method = getattr(client, request_method.name.lower()) if request_method == ApiMethod.GET: es_method = responses.GET diff --git a/lib/pbench/test/unit/server/query_apis/test_datasets.py b/lib/pbench/test/unit/server/query_apis/test_datasets.py index 34c22dc30c..1765475a4d 100644 --- a/lib/pbench/test/unit/server/query_apis/test_datasets.py +++ b/lib/pbench/test/unit/server/query_apis/test_datasets.py @@ -58,13 +58,16 @@ def _setup(self, client): ) @pytest.mark.parametrize( - "user,ao,expected_status", + "user,ao,idx,expected_status", [ - ("drb", False, HTTPStatus.OK), - ("drb", True, HTTPStatus.OK), - ("test_admin", False, HTTPStatus.OK), - ("test", False, HTTPStatus.FORBIDDEN), - (None, False, HTTPStatus.UNAUTHORIZED), + ("drb", False, True, HTTPStatus.OK), + ("drb", False, False, HTTPStatus.OK), + ("drb", True, True, HTTPStatus.OK), + ("drb", True, False, HTTPStatus.OK), + ("test_admin", False, True, HTTPStatus.OK), + ("test_admin", False, False, HTTPStatus.OK), + ("test", False, False, HTTPStatus.FORBIDDEN), + (None, False, False, HTTPStatus.UNAUTHORIZED), ], ) def test_empty_delete( @@ -73,6 +76,7 @@ def test_empty_delete( query_api, user, ao, + idx, expected_status, get_token_func, ): @@ -88,24 +92,30 @@ def test_empty_delete( else: user_id = None + drb = Dataset.query(name="drb") + index = None if ao: # Set archiveonly flag to disable index-map logic - drb = Dataset.query(name="drb") Metadata.setvalue(drb, Metadata.SERVER_ARCHIVE, True) - index = None - else: + + # If we want an index, build the expected path; otherwise make sure + # the dataset doesn't have one. + if idx: index = self.build_index_from_metadata() + else: + IndexMap.delete(drb) + expect_a_call = expected_status == HTTPStatus.OK and idx response = query_api( - self.pbench_endpoint, - self.elastic_endpoint, + pbench_uri=self.pbench_endpoint, + es_uri=self.elastic_endpoint, payload=None, expected_index=index, expected_status=expected_status, + headers=headers, request_method=self.api_method, + expect_call=expect_a_call, json=EMPTY_DELDATE_RESPONSE, - headers=headers, - expect_call=(expected_status == HTTPStatus.OK and not ao), ) if response.status_code == HTTPStatus.OK: expected = { @@ -148,24 +158,28 @@ def test_empty_delete( Dataset.query(name="drb") else: # On failure, the dataset should still exist - assert Dataset.query(name="drb") + Dataset.query(name="drb") assert response.json["message"].endswith( "is not authorized to DELETE a resource owned by drb with private access" ) + # permission errors should be caught before auditing + assert len(Audit.query()) == 0 + @pytest.mark.parametrize( - "user,ao,owner,access,expected_status", + "user,ao,idx,owner,access,expected_status", [ - ("drb", False, None, "public", HTTPStatus.OK), - ("drb", True, None, "public", HTTPStatus.OK), - ("test_admin", False, "test", None, HTTPStatus.OK), - ("test", False, None, "public", HTTPStatus.FORBIDDEN), - (None, False, None, "public", HTTPStatus.UNAUTHORIZED), - ("drb", False, "test", "public", HTTPStatus.FORBIDDEN), - ("drb", True, "test", None, HTTPStatus.FORBIDDEN), - ("test_admin", False, None, None, HTTPStatus.BAD_REQUEST), - ("test", False, "test", None, HTTPStatus.FORBIDDEN), - (None, False, "drb", None, HTTPStatus.UNAUTHORIZED), + ("drb", False, True, None, "public", HTTPStatus.OK), + ("drb", False, False, None, "public", HTTPStatus.OK), + ("drb", True, True, None, "public", HTTPStatus.OK), + ("test_admin", False, True, "test", None, HTTPStatus.OK), + ("test", False, True, None, "public", HTTPStatus.FORBIDDEN), + (None, False, True, None, "public", HTTPStatus.UNAUTHORIZED), + ("drb", False, True, "test", "public", HTTPStatus.FORBIDDEN), + ("drb", True, True, "test", None, HTTPStatus.FORBIDDEN), + ("test_admin", False, True, None, None, HTTPStatus.BAD_REQUEST), + ("test", False, True, "test", None, HTTPStatus.FORBIDDEN), + (None, False, True, "drb", None, HTTPStatus.UNAUTHORIZED), ], ) def test_update( @@ -175,6 +189,7 @@ def test_update( get_token_func, user, ao, + idx, owner, access, expected_status, @@ -192,12 +207,17 @@ def test_update( user_id = None drb = Dataset.query(name="drb") + index = None if ao: # Set archiveonly flag to disable index-map logic Metadata.setvalue(drb, Metadata.SERVER_ARCHIVE, True) - index = None - else: + + # If we want an index, build the expected path; otherwise make sure + # the dataset doesn't have one. + if idx: index = self.build_index_from_metadata() + else: + IndexMap.delete(drb) expected_owner = drb.owner_id original_owner = drb.owner_id @@ -222,16 +242,17 @@ def test_update( else: query = "" + expect_a_call = expected_status == HTTPStatus.OK and idx response = query_api( - f"/datasets/random_md5_string1{query}", - "/_update_by_query?ignore_unavailable=true&refresh=true", + pbench_uri=f"/datasets/random_md5_string1{query}", + es_uri="/_update_by_query?ignore_unavailable=true&refresh=true", payload=None, expected_index=index, expected_status=expected_status, + headers=headers, request_method=ApiMethod.POST, + expect_call=expect_a_call, json=EMPTY_DELDATE_RESPONSE, - headers=headers, - expect_call=(expected_status == HTTPStatus.OK and not ao), ) # Look up the post-update dataset @@ -326,15 +347,15 @@ def test_update_partial_failure(self, client, query_api, get_token_func): "failures": ["bad"], } response = query_api( - "/datasets/random_md5_string1?access=public", - "/_update_by_query?ignore_unavailable=true&refresh=true", + pbench_uri="/datasets/random_md5_string1?access=public", + es_uri="/_update_by_query?ignore_unavailable=true&refresh=true", payload=None, expected_index=index, expected_status=HTTPStatus.OK, - request_method=ApiMethod.POST, - json=es_json, headers=headers, + request_method=ApiMethod.POST, expect_call=True, + json=es_json, ) expected = { "total": 2, @@ -345,6 +366,31 @@ def test_update_partial_failure(self, client, query_api, get_token_func): } assert expected == response.json + @pytest.mark.parametrize("op", (ApiMethod.POST, ApiMethod.DELETE)) + def test_update_es_failure(self, client, query_api, get_token_func, op): + """Check that update operation status is finalized on failure""" + + token = get_token_func("drb") + assert token + headers = {"authorization": f"bearer {token}"} + index = self.build_index_from_metadata() + drb = Dataset.query(name="drb") + path = "update" if op is ApiMethod.POST else "delete" + query_api( + pbench_uri=f"/datasets/{drb.resource_id}?access=public", + es_uri=f"/_{path}_by_query?ignore_unavailable=true&refresh=true", + payload=None, + expected_index=index, + expected_status=HTTPStatus.INTERNAL_SERVER_ERROR, + headers=headers, + request_method=op, + expect_call=True, + body=Exception("I'm a crumbly search instance"), + ) + ops = Metadata.getvalue(drb, "dataset.operations") + record = ops[path.upper()] + assert "FAILED" == record["state"], f"Unexpected operational state {ops}" + def test_update_total_failure(self, client, query_api, get_token_func): """Check update with all Elasticsearch operations failing""" @@ -370,20 +416,22 @@ def test_update_total_failure(self, client, query_api, get_token_func): "failures": ["bad", "me too"], } query_api( - "/datasets/random_md5_string1?access=public", - "/_update_by_query?ignore_unavailable=true&refresh=true", + pbench_uri="/datasets/random_md5_string1?access=public", + es_uri="/_update_by_query?ignore_unavailable=true&refresh=true", payload=None, expected_index=index, expected_status=HTTPStatus.INTERNAL_SERVER_ERROR, - request_method=ApiMethod.POST, - json=es_json, headers=headers, + request_method=ApiMethod.POST, expect_call=True, + json=es_json, ) - @pytest.mark.parametrize("ao", (True, False)) + @pytest.mark.parametrize( + "ao,idx", ((True, True), (True, False), (False, True), (False, False)) + ) def test_get( - self, monkeypatch, more_datasets, client, query_api, build_auth_header, ao + self, monkeypatch, more_datasets, client, query_api, build_auth_header, ao, idx ): """Check on the GET summary behavior @@ -401,46 +449,45 @@ def test_get( ds = Dataset.query(name="drb") json = copy.deepcopy(self.empty_es_response_payload) + index = None if ao: # Set archiveonly flag to disable index-map logic Metadata.setvalue(ds, Metadata.SERVER_ARCHIVE, True) - index = None - if expected_status == HTTPStatus.OK: - expected_status = HTTPStatus.CONFLICT - monkeypatch.setattr(IndexMap, "indices", lambda _d: []) - expected = {} - else: - indices = list(IndexMap.indices(ds)) + + expected = {} + if idx: + indices = IndexMap.indices(ds) index = "/" + ",".join(indices) hits = [] - expected = {} for i, n in enumerate(indices): hits.append({"_index": n, "_id": i, "_source": {"data": f"{n}_{i}"}}) expected[n] = 1 - json["hits"]["total"]["value"] = str(len(hits)) + json["hits"]["total"]["value"] = len(hits) json["hits"]["hits"] = hits + else: + IndexMap.delete(ds) + expect_a_call = expected_status == HTTPStatus.OK and idx response = query_api( - self.pbench_endpoint, - "/_search?ignore_unavailable=true", - self.payload, - index, - expected_status, - request_method=ApiMethod.GET, + pbench_uri=self.pbench_endpoint, + es_uri="/_search?ignore_unavailable=true", + payload=self.payload, + expected_index=index, + expected_status=expected_status, headers=build_auth_header["header"], + request_method=ApiMethod.GET, + expect_call=expect_a_call, json=json, ) if expected_status == HTTPStatus.OK: assert expected == response.json - elif expected_status == HTTPStatus.CONFLICT: - assert {"message": "Dataset indexing was disabled"} == response.json else: assert { "message": "Unauthenticated client is not authorized to READ a resource owned by drb with private access" } == response.json - @pytest.mark.parametrize("value", (None, "not-integer", 0)) - def test_bad_get(self, client, query_api, get_token_func, value): + @pytest.mark.parametrize("hits", (None, 0, "string", {})) + def test_bad_get(self, client, query_api, get_token_func, hits): """Check a GET with bad Elasticsearch hit counts""" token = get_token_func("drb") @@ -458,20 +505,20 @@ def test_bad_get(self, client, query_api, get_token_func, value): "hits": [], }, } - if value is None: - del json["hits"]["total"]["value"] + if hits is None: + del json["hits"]["hits"] else: - json["hits"]["total"]["value"] = value + json["hits"]["hits"] = hits query_api( - self.pbench_endpoint, - "/_search?ignore_unavailable=true", - self.payload, - index, - HTTPStatus.INTERNAL_SERVER_ERROR, - request_method=ApiMethod.GET, + pbench_uri=self.pbench_endpoint, + es_uri="/_search?ignore_unavailable=true", + payload=self.payload, + expected_index=index, + expected_status=HTTPStatus.INTERNAL_SERVER_ERROR, headers=headers, - json=json, + request_method=ApiMethod.GET, expect_call=True, + json=json, ) def test_update_unstable(self, monkeypatch, client, query_api, get_token_func): @@ -491,14 +538,14 @@ def test_update_unstable(self, monkeypatch, client, query_api, get_token_func): ) response = query_api( - "/datasets/random_md5_string1?access=public", - "/_update_by_query?ignore_unavailable=true&refresh=true", + pbench_uri="/datasets/random_md5_string1?access=public", + es_uri="/_update_by_query?ignore_unavailable=true&refresh=true", payload=None, expected_index=index, expected_status=HTTPStatus.CONFLICT, + headers=headers, request_method=ApiMethod.POST, json=EMPTY_DELDATE_RESPONSE, - headers=headers, ) assert {"message": "Dataset is working on INDEX"} == response.json @@ -519,14 +566,14 @@ def fails(_self, _dataset, _state): monkeypatch.setattr(Sync, "update", fails) query_api( - "/datasets/random_md5_string1?access=public", - "/_update_by_query?ignore_unavailable=true&refresh=true", + pbench_uri="/datasets/random_md5_string1?access=public", + es_uri="/_update_by_query?ignore_unavailable=true&refresh=true", payload=None, expected_index=index, expected_status=HTTPStatus.INTERNAL_SERVER_ERROR, + headers=headers, request_method=ApiMethod.POST, json=EMPTY_DELDATE_RESPONSE, - headers=headers, ) def test_update_bad_final_sync( @@ -548,15 +595,15 @@ def fails( monkeypatch.setattr(Sync, "update", fails) query_api( - "/datasets/random_md5_string1?access=public", - "/_update_by_query?ignore_unavailable=true&refresh=true", + pbench_uri="/datasets/random_md5_string1?access=public", + es_uri="/_update_by_query?ignore_unavailable=true&refresh=true", payload=None, expected_index=index, expected_status=HTTPStatus.INTERNAL_SERVER_ERROR, - request_method=ApiMethod.POST, - json=EMPTY_DELDATE_RESPONSE, headers=headers, + request_method=ApiMethod.POST, expect_call=True, + json=EMPTY_DELDATE_RESPONSE, ) def test_update_bad_update(self, monkeypatch, client, query_api, get_token_func): @@ -573,15 +620,15 @@ def fails(_s): monkeypatch.setattr(Dataset, "update", fails) query_api( - "/datasets/random_md5_string1?access=public", - "/_update_by_query?ignore_unavailable=true&refresh=true", + pbench_uri="/datasets/random_md5_string1?access=public", + es_uri="/_update_by_query?ignore_unavailable=true&refresh=true", payload=None, expected_index=index, expected_status=HTTPStatus.INTERNAL_SERVER_ERROR, - request_method=ApiMethod.POST, - json=EMPTY_DELDATE_RESPONSE, headers=headers, + request_method=ApiMethod.POST, expect_call=True, + json=EMPTY_DELDATE_RESPONSE, ) def test_update_bad_delete(self, monkeypatch, client, query_api, get_token_func): @@ -598,13 +645,13 @@ def fails(_s): monkeypatch.setattr(Dataset, "delete", fails) query_api( - "/datasets/random_md5_string1", - "/_delete_by_query?ignore_unavailable=true&refresh=true", + pbench_uri="/datasets/random_md5_string1", + es_uri="/_delete_by_query?ignore_unavailable=true&refresh=true", payload=None, expected_index=index, expected_status=HTTPStatus.INTERNAL_SERVER_ERROR, - request_method=ApiMethod.DELETE, - json=EMPTY_DELDATE_RESPONSE, headers=headers, + request_method=ApiMethod.DELETE, expect_call=True, + json=EMPTY_DELDATE_RESPONSE, ) diff --git a/lib/pbench/test/unit/server/test_api_key.py b/lib/pbench/test/unit/server/test_api_key.py index 933f846a56..634ee7d5d1 100644 --- a/lib/pbench/test/unit/server/test_api_key.py +++ b/lib/pbench/test/unit/server/test_api_key.py @@ -67,7 +67,9 @@ def test_unauthorized_access(self, query_post_as, pbench_drb_token_invalid): assert audit[1].user_id is None assert audit[1].user_name is None assert audit[1].reason is None - assert audit[1].attributes is None + assert audit[1].attributes == { + "error": "User provided access_token is invalid or expired" + } def test_successful_api_key_generation_with_name( self, query_post_as, pbench_drb_token @@ -319,7 +321,7 @@ def test_delete_api_key_notfound( assert audit[1].status == AuditStatus.FAILURE assert audit[1].name == "apikey" assert audit[1].object_type == AuditType.API_KEY - assert audit[1].attributes is None + assert audit[1].attributes == {"error": "Requested key not found"} def test_delete_api_key_fail( self, query_delete_as, get_token_func, pbench_drb_api_key, create_user diff --git a/server/pbenchinacan/README.md b/server/pbenchinacan/README.md new file mode 100644 index 0000000000..a78af18430 --- /dev/null +++ b/server/pbenchinacan/README.md @@ -0,0 +1,22 @@ +# Private CA + +The "pbench in a can" build of the Pbench Server relies on a private +Certificate Authority cert called `pbench_CA.crt`. This expires at 5 +year intervals and needs to be periodically regenerated: + +``` +openssl req -x509 -new -nodes \ + -key server/pbenchinacan/etc/pki/tls/private/pbench_CA.key \ + -sha256 -days 1826 \ + -out server/pbenchinacan/etc/pki/tls/certs/pbench_CA.crt \ + -subj '/CN=pbench.redhat.com/C=US/L=Westford, MA' +``` + +Note that the private key file doesn't need to be regenerated. + +You can view the current certificate with + +``` +openssl x509 -text \ + -in server/pbenchinacan/etc/pki/tls/private/certs/pbench_CA.crt +``` diff --git a/server/pbenchinacan/etc/pki/tls/certs/pbench_CA.crt b/server/pbenchinacan/etc/pki/tls/certs/pbench_CA.crt index fa44dade03..8c9c312700 100644 --- a/server/pbenchinacan/etc/pki/tls/certs/pbench_CA.crt +++ b/server/pbenchinacan/etc/pki/tls/certs/pbench_CA.crt @@ -1,8 +1,8 @@ -----BEGIN CERTIFICATE----- -MIIDYTCCAkmgAwIBAgIUUtVXi1qMBbg1wLVBmeUM/tMuWsMwDQYJKoZIhvcNAQEL +MIIDYTCCAkmgAwIBAgIUDWClad/f+A7kcX0y0fpB2MaHWHQwDQYJKoZIhvcNAQEL BQAwQDEaMBgGA1UEAwwRcGJlbmNoLnJlZGhhdC5jb20xCzAJBgNVBAYTAlVTMRUw -EwYDVQQHDAxXZXN0Zm9yZCwgTUEwHhcNMjMwNTA1MjEzNzU0WhcNMjQwNDI1MjEz -NzU0WjBAMRowGAYDVQQDDBFwYmVuY2gucmVkaGF0LmNvbTELMAkGA1UEBhMCVVMx +EwYDVQQHDAxXZXN0Zm9yZCwgTUEwHhcNMjQwNTA0MTI1MzM2WhcNMjkwNTA0MTI1 +MzM2WjBAMRowGAYDVQQDDBFwYmVuY2gucmVkaGF0LmNvbTELMAkGA1UEBhMCVVMx FTATBgNVBAcMDFdlc3Rmb3JkLCBNQTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCC AQoCggEBAJVWR8CY3cvtLu5Ss9XAWBp5PNE/X0zWJtRrph/Xz0qtQsxpqn9fhEjF kLr36AkewbOoW8HmqKzSrS9bgCrdglH1oqefLntt6q9F10SOCXF2jbQA3r63f9Kb @@ -11,11 +11,11 @@ Xxtl6vUi9zoM7b3I1I0Cztg23e86ZsEVd+OZVDQbYLd4A3uBmzcmepHP6mwNc+Gm yhNeQ0ovu03Zz8j9W64Jau8Tpaja90s48pk0VRdfQX//N4mntAo3vYwd3Ab4Pq4o 2c2GGpihLURlOCk9fNGo/s9atP/0+NsCAwEAAaNTMFEwHQYDVR0OBBYEFMz+SX+d JyWELuNukm1Szpz+l7qiMB8GA1UdIwQYMBaAFMz+SX+dJyWELuNukm1Szpz+l7qi -MA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQELBQADggEBACxI0EXHjJtPhPkX -1gLDxeID1+HMSmQVfnEd0zcBz/DhACVPnAPHF+lQ0QLfqJobmSKRei9s0pa5XEfo -vIVcBvzKE7tuEM7ZeCKx0PBftp7poMEQyIEPezoaD9j20rXE14KS2fCOnFkahGjp -CeYqHjnnf+LMkYf1nXM3Yhxz4w3uzFQmYO+pRVAE6Vjeftz2d3s2w+1G/bNPKgEu -8NbG/6T25ZNe0T+wE8rxvB1+tDuPbIc83or7SrpiaxbSo1wqAm/ajxW6bdXftP0l -aLLlVemlt3oWE9lkVDtuMJTbt0noCjb3FlWrDVwm5Zm3ilVf8L2JOsG7LYjYUAQs -5VgnCv8= +MA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQELBQADggEBAG2NFW0pWJuUuFoh +w2GZkoFz1uSJs3O1LCHWa2A+3g4fRWs6OOKHS4joRll0S0ExtrTGt2FKMS+3IUXJ +JFcKfLfmzgIu6rX4G/BEHu1Jr4MkT3HJUkHfGD4aGF99IuhXT6u/6pPzl9ddvgRK +8S2AGOWIQOXO9gzlu9BsfrFkolKdnogG3Kcf5DqiFKEb9OA39Yute4VsrBAbT4ng +TEMI7Duz0hlef+beLHe0YGbR3vH2/e6EvZEa0kF127jdXo1v+h/r5ESlT067dA3M +dxDQ768G0TqUq8lxDDYX2/2u9JtNQSsz/pBO2/abha/tkTwqhOT39Iw/Z81orvLh +M7htpEM= -----END CERTIFICATE----- diff --git a/server/pbenchinacan/load_keycloak.sh b/server/pbenchinacan/load_keycloak.sh index 7c40345a18..81810c66a2 100755 --- a/server/pbenchinacan/load_keycloak.sh +++ b/server/pbenchinacan/load_keycloak.sh @@ -48,7 +48,7 @@ echo "Keycloak redirect URI list is <${keycloak_redirect_uris}>." ADMIN_TOKEN="" while true; do - ADMIN_TOKEN=$(curl -s -f -X POST \ + ADMIN_TOKEN=$(curl -sS -f -X POST \ "${KEYCLOAK_HOST_PORT}/realms/master/protocol/openid-connect/token" \ -H "Content-Type: application/x-www-form-urlencoded" \ -d "username=${ADMIN_USERNAME}" \ @@ -70,7 +70,7 @@ echo echo "Keycloak connection successful on : ${KEYCLOAK_HOST_PORT}" echo -status_code=$(curl -f -s -o /dev/null -w "%{http_code}" -X POST \ +status_code=$(curl -f -sS -o /dev/null -w "%{http_code}" -X POST \ "${KEYCLOAK_HOST_PORT}/admin/realms" \ -H "Authorization: Bearer ${ADMIN_TOKEN}" \ -H "Content-Type: application/json" \ @@ -88,7 +88,7 @@ fi # a token from Keycloak using a . # Having in the aud claim of the token is essential for the token # to be validated. -curl -si -f -X POST \ +curl -sSi -f -X POST \ "${KEYCLOAK_HOST_PORT}/admin/realms/${REALM}/client-scopes" \ -H "Authorization: Bearer ${ADMIN_TOKEN}" \ -H "Content-Type: application/json" \ @@ -117,7 +117,7 @@ curl -si -f -X POST \ ] }' -CLIENT_CONF=$(curl -si -f -X POST \ +CLIENT_CONF=$(curl -sSi -f -X POST \ "${KEYCLOAK_HOST_PORT}/admin/realms/${REALM}/clients" \ -H "Authorization: Bearer ${ADMIN_TOKEN}" \ -H "Content-Type: application/json" \ @@ -138,7 +138,7 @@ else echo "Created ${CLIENT} client" fi -status_code=$(curl -s -o /dev/null -w "%{http_code}" -X POST \ +status_code=$(curl -sS -o /dev/null -w "%{http_code}" -X POST \ "${KEYCLOAK_HOST_PORT}/admin/realms/${REALM}/clients/${CLIENT_ID}/roles" \ -H "Authorization: Bearer ${ADMIN_TOKEN}" \ -H "Content-Type: application/json" \ @@ -151,7 +151,7 @@ else echo "Created an 'ADMIN' role under ${CLIENT} client of the ${REALM} realm" fi -ROLE_ID=$(curl -s -f "${KEYCLOAK_HOST_PORT}/admin/realms/${REALM}/clients/${CLIENT_ID}/roles" \ +ROLE_ID=$(curl -sS -f "${KEYCLOAK_HOST_PORT}/admin/realms/${REALM}/clients/${CLIENT_ID}/roles" \ -H "Authorization: Bearer ${ADMIN_TOKEN}" | jq -r '.[0].id') if [[ -z "${ROLE_ID}" ]]; then @@ -159,7 +159,7 @@ if [[ -z "${ROLE_ID}" ]]; then exit 1 fi -USER=$(curl -si -f -X POST \ +USER=$(curl -sSi -f -X POST \ "${KEYCLOAK_HOST_PORT}/admin/realms/${REALM}/users" \ -H "Authorization: Bearer ${ADMIN_TOKEN}" \ -H "Content-Type: application/json" \ @@ -174,7 +174,7 @@ else echo "Created an 'admin' user inside ${REALM} realm" fi -status_code=$(curl -s -o /dev/null -w "%{http_code}" -X POST \ +status_code=$(curl -sS -o /dev/null -w "%{http_code}" -X POST \ "${KEYCLOAK_HOST_PORT}/admin/realms/${REALM}/users/${USER_ID}/role-mappings/clients/${CLIENT_ID}" \ -H "Authorization: Bearer ${ADMIN_TOKEN}" \ -H "Content-Type: application/json" \ @@ -188,7 +188,7 @@ else fi # Verify that the user id has an 'ADMIN' role assigned to it -USER_ROLES=$(curl -s "${KEYCLOAK_HOST_PORT}/admin/realms/${REALM}/users/${USER_ID}/role-mappings/clients/${CLIENT_ID}" \ +USER_ROLES=$(curl -sS "${KEYCLOAK_HOST_PORT}/admin/realms/${REALM}/users/${USER_ID}/role-mappings/clients/${CLIENT_ID}" \ -H "Authorization: Bearer ${ADMIN_TOKEN}" \ -H "Content-Type: application/json" | jq -r '.[].name')