From 542fc4d199f48073a54f944425b6306b8a71dbfe Mon Sep 17 00:00:00 2001 From: Isaac To Date: Tue, 8 Oct 2024 18:17:59 -0700 Subject: [PATCH 01/11] Repurpose `trim_validation_results()` to `trim_validation_results()` Not only trimming but ensuring the `source` field is available for serialization --- .../cli/models.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/dandisets_linkml_status_tools/cli/models.py b/src/dandisets_linkml_status_tools/cli/models.py index 69e68eb..8282ec4 100644 --- a/src/dandisets_linkml_status_tools/cli/models.py +++ b/src/dandisets_linkml_status_tools/cli/models.py @@ -18,29 +18,32 @@ ) -def trim_validation_results( +def polish_validation_results( errs: list[ValidationResult], ) -> list[TrimmedValidationResult]: """ - Trim the `ValidationResult` objects in a list to exclude their `instance` field. + Polish the `ValidationResult` objects in a list to exclude their `instance` field + and include their `source` field for serialization. - :param errs: The list of `ValidationResult` objects to be trimmed. + :param errs: The list of `ValidationResult` objects to be polished. - :return: The list of `TrimmedValidationResult` objects representing the trimmed + :return: The list of `TrimmedValidationResult` objects representing the polished `ValidationResult` objects. """ - trimmed_errs = [] + polished_errs = [] for err in errs: err_as_dict = err.model_dump() del err_as_dict["instance"] - trimmed_errs.append(err_as_dict) - return trimmed_errs + err_as_dict["source"] = err.source + + polished_errs.append(err_as_dict) + return polished_errs DandisetMetadataType = dict[str, Any] PydanticValidationErrsType = list[dict[str, Any]] LinkmlValidationErrsType = Annotated[ - list[ValidationResult], PlainSerializer(trim_validation_results) + list[ValidationResult], PlainSerializer(polish_validation_results) ] dandiset_metadata_adapter = TypeAdapter(DandisetMetadataType) From da2a624cfc2c84a1867d2207dfbd5b1ec8943992 Mon Sep 17 00:00:00 2001 From: Isaac To Date: Tue, 8 Oct 2024 18:20:38 -0700 Subject: [PATCH 02/11] Rename `TrimmedValidationResult` to `PolishedValidationResult` --- src/dandisets_linkml_status_tools/cli/models.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/dandisets_linkml_status_tools/cli/models.py b/src/dandisets_linkml_status_tools/cli/models.py index 8282ec4..d03e843 100644 --- a/src/dandisets_linkml_status_tools/cli/models.py +++ b/src/dandisets_linkml_status_tools/cli/models.py @@ -8,8 +8,8 @@ # A `TypedDict` that has a key corresponding to each field in `ValidationResult` # except for the `instance` field -TrimmedValidationResult = TypedDict( - "TrimmedValidationResult", +PolishedValidationResult = TypedDict( + "PolishedValidationResult", { name: info.annotation for name, info in ValidationResult.model_fields.items() @@ -20,14 +20,14 @@ def polish_validation_results( errs: list[ValidationResult], -) -> list[TrimmedValidationResult]: +) -> list[PolishedValidationResult]: """ Polish the `ValidationResult` objects in a list to exclude their `instance` field and include their `source` field for serialization. :param errs: The list of `ValidationResult` objects to be polished. - :return: The list of `TrimmedValidationResult` objects representing the polished + :return: The list of `PolishedValidationResult` objects representing the polished `ValidationResult` objects. """ polished_errs = [] From 3d9897505bb1810b44d4ec3d436eca648a0e4693 Mon Sep 17 00:00:00 2001 From: Isaac To Date: Tue, 8 Oct 2024 19:01:15 -0700 Subject: [PATCH 03/11] Make `source` field in `PolishedValidationResult` serializable in JSON --- .../cli/models.py | 35 ++++++++++++++----- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/src/dandisets_linkml_status_tools/cli/models.py b/src/dandisets_linkml_status_tools/cli/models.py index d03e843..350f46d 100644 --- a/src/dandisets_linkml_status_tools/cli/models.py +++ b/src/dandisets_linkml_status_tools/cli/models.py @@ -1,20 +1,34 @@ +from collections.abc import Sequence from datetime import datetime -from typing import Annotated, Any +from typing import Annotated, Any, Union from dandi.dandiapi import VersionStatus from linkml.validator.report import ValidationResult from pydantic import BaseModel, Json, PlainSerializer, TypeAdapter from typing_extensions import TypedDict # Required for Python < 3.12 by Pydantic -# A `TypedDict` that has a key corresponding to each field in `ValidationResult` -# except for the `instance` field + +class JsonValidationErrorView(BaseModel): + """ + A Pydantic model to represent a `jsonschema.exceptions.ValidationError` object, + by including selective fields or properties of the original object, + for serialization + """ + + absolute_path: Sequence[Union[str, int]] + absolute_schema_path: Sequence[Union[str, int]] + + +# Build a `TypedDict` for representing a polished version of `ValidationResult` +field_annotations = { + name: info.annotation + for name, info in ValidationResult.model_fields.items() + if name not in {"instance", "source"} +} +field_annotations["source"] = JsonValidationErrorView PolishedValidationResult = TypedDict( "PolishedValidationResult", - { - name: info.annotation - for name, info in ValidationResult.model_fields.items() - if name != "instance" - }, + field_annotations, ) @@ -34,7 +48,10 @@ def polish_validation_results( for err in errs: err_as_dict = err.model_dump() del err_as_dict["instance"] - err_as_dict["source"] = err.source + err_as_dict["source"] = JsonValidationErrorView( + absolute_path=err.source.absolute_path, + absolute_schema_path=err.source.absolute_schema_path, + ) polished_errs.append(err_as_dict) return polished_errs From 1aab5826173effec36698746392c211102d96d0f Mon Sep 17 00:00:00 2001 From: Isaac To Date: Wed, 9 Oct 2024 10:25:42 -0700 Subject: [PATCH 04/11] Ensure args to `polish_validation_results()` are valid --- .../cli/models.py | 24 +++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/src/dandisets_linkml_status_tools/cli/models.py b/src/dandisets_linkml_status_tools/cli/models.py index 350f46d..bf33739 100644 --- a/src/dandisets_linkml_status_tools/cli/models.py +++ b/src/dandisets_linkml_status_tools/cli/models.py @@ -3,6 +3,7 @@ from typing import Annotated, Any, Union from dandi.dandiapi import VersionStatus +from jsonschema.exceptions import ValidationError from linkml.validator.report import ValidationResult from pydantic import BaseModel, Json, PlainSerializer, TypeAdapter from typing_extensions import TypedDict # Required for Python < 3.12 by Pydantic @@ -39,18 +40,37 @@ def polish_validation_results( Polish the `ValidationResult` objects in a list to exclude their `instance` field and include their `source` field for serialization. + Note: This function is intended to be used to handle `ValidationResult` objects + produced by `linkml.validator.plugins.JsonschemaValidationPlugin`. The `source` + field of these `ValidationResult` objects is expected to be a + `jsonschema.exceptions.ValidationError` object. + :param errs: The list of `ValidationResult` objects to be polished. :return: The list of `PolishedValidationResult` objects representing the polished `ValidationResult` objects. + + :raises ValueError: If the `source` field of a `ValidationResult` object is not a + `jsonschema.exceptions.ValidationError` object. """ polished_errs = [] for err in errs: err_as_dict = err.model_dump() + + # Remove the `instance` field del err_as_dict["instance"] + + # Include the `source` field as a `JsonValidationErrorView` object + result_source = err.source + if not isinstance(result_source, ValidationError): + msg = ( + f"Expected `source` field of a `ValidationResult` object to be " + f"a {ValidationError!r} object, but got {result_source!r}" + ) + raise ValueError(msg) # noqa: TRY004 err_as_dict["source"] = JsonValidationErrorView( - absolute_path=err.source.absolute_path, - absolute_schema_path=err.source.absolute_schema_path, + absolute_path=result_source.absolute_path, + absolute_schema_path=result_source.absolute_schema_path, ) polished_errs.append(err_as_dict) From 302d0f504473e8ccc6f9248d6d95611bbef9bc2b Mon Sep 17 00:00:00 2001 From: Isaac To Date: Wed, 9 Oct 2024 10:29:23 -0700 Subject: [PATCH 05/11] Rname `errs` to `results` in `polish_validation_results()` --- src/dandisets_linkml_status_tools/cli/models.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/dandisets_linkml_status_tools/cli/models.py b/src/dandisets_linkml_status_tools/cli/models.py index bf33739..1c01b25 100644 --- a/src/dandisets_linkml_status_tools/cli/models.py +++ b/src/dandisets_linkml_status_tools/cli/models.py @@ -34,7 +34,7 @@ class JsonValidationErrorView(BaseModel): def polish_validation_results( - errs: list[ValidationResult], + results: list[ValidationResult], ) -> list[PolishedValidationResult]: """ Polish the `ValidationResult` objects in a list to exclude their `instance` field @@ -45,7 +45,7 @@ def polish_validation_results( field of these `ValidationResult` objects is expected to be a `jsonschema.exceptions.ValidationError` object. - :param errs: The list of `ValidationResult` objects to be polished. + :param results: The list of `ValidationResult` objects to be polished. :return: The list of `PolishedValidationResult` objects representing the polished `ValidationResult` objects. @@ -54,7 +54,7 @@ def polish_validation_results( `jsonschema.exceptions.ValidationError` object. """ polished_errs = [] - for err in errs: + for err in results: err_as_dict = err.model_dump() # Remove the `instance` field From 6e98e659bdd72fe0f9c3b9adae18cf8398c2a447 Mon Sep 17 00:00:00 2001 From: Isaac To Date: Wed, 9 Oct 2024 10:31:42 -0700 Subject: [PATCH 06/11] Rname `err` to `result` in `polish_validation_results()` --- src/dandisets_linkml_status_tools/cli/models.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/dandisets_linkml_status_tools/cli/models.py b/src/dandisets_linkml_status_tools/cli/models.py index 1c01b25..8700255 100644 --- a/src/dandisets_linkml_status_tools/cli/models.py +++ b/src/dandisets_linkml_status_tools/cli/models.py @@ -54,14 +54,14 @@ def polish_validation_results( `jsonschema.exceptions.ValidationError` object. """ polished_errs = [] - for err in results: - err_as_dict = err.model_dump() + for result in results: + err_as_dict = result.model_dump() # Remove the `instance` field del err_as_dict["instance"] # Include the `source` field as a `JsonValidationErrorView` object - result_source = err.source + result_source = result.source if not isinstance(result_source, ValidationError): msg = ( f"Expected `source` field of a `ValidationResult` object to be " From 6fa423c8106aa4bdcfda5aa86432ca9b793429c2 Mon Sep 17 00:00:00 2001 From: Isaac To Date: Wed, 9 Oct 2024 10:32:59 -0700 Subject: [PATCH 07/11] Rname `err_as_dict` to `result_as_dict` in `polish_validation_results()` --- src/dandisets_linkml_status_tools/cli/models.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/dandisets_linkml_status_tools/cli/models.py b/src/dandisets_linkml_status_tools/cli/models.py index 8700255..56991a6 100644 --- a/src/dandisets_linkml_status_tools/cli/models.py +++ b/src/dandisets_linkml_status_tools/cli/models.py @@ -55,10 +55,10 @@ def polish_validation_results( """ polished_errs = [] for result in results: - err_as_dict = result.model_dump() + result_as_dict = result.model_dump() # Remove the `instance` field - del err_as_dict["instance"] + del result_as_dict["instance"] # Include the `source` field as a `JsonValidationErrorView` object result_source = result.source @@ -68,12 +68,12 @@ def polish_validation_results( f"a {ValidationError!r} object, but got {result_source!r}" ) raise ValueError(msg) # noqa: TRY004 - err_as_dict["source"] = JsonValidationErrorView( + result_as_dict["source"] = JsonValidationErrorView( absolute_path=result_source.absolute_path, absolute_schema_path=result_source.absolute_schema_path, ) - polished_errs.append(err_as_dict) + polished_errs.append(result_as_dict) return polished_errs From 89b48866d24cd1229b354ca421b28b2d82401c6a Mon Sep 17 00:00:00 2001 From: Isaac To Date: Wed, 9 Oct 2024 10:34:15 -0700 Subject: [PATCH 08/11] Rename `polished_errs` to `polished_results` in `polish_validation_results()` --- src/dandisets_linkml_status_tools/cli/models.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/dandisets_linkml_status_tools/cli/models.py b/src/dandisets_linkml_status_tools/cli/models.py index 56991a6..f70a8b7 100644 --- a/src/dandisets_linkml_status_tools/cli/models.py +++ b/src/dandisets_linkml_status_tools/cli/models.py @@ -53,7 +53,7 @@ def polish_validation_results( :raises ValueError: If the `source` field of a `ValidationResult` object is not a `jsonschema.exceptions.ValidationError` object. """ - polished_errs = [] + polished_results = [] for result in results: result_as_dict = result.model_dump() @@ -73,8 +73,8 @@ def polish_validation_results( absolute_schema_path=result_source.absolute_schema_path, ) - polished_errs.append(result_as_dict) - return polished_errs + polished_results.append(result_as_dict) + return polished_results DandisetMetadataType = dict[str, Any] From eb718a3a2adc222f18df2fed39ab0a3b55d8ba12 Mon Sep 17 00:00:00 2001 From: Isaac To Date: Mon, 14 Oct 2024 09:33:37 -0700 Subject: [PATCH 09/11] feat: Include `validator` and `validator_value` in `JsonValidationErrorView` This two values can be used to categorize the errors --- src/dandisets_linkml_status_tools/cli/models.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/dandisets_linkml_status_tools/cli/models.py b/src/dandisets_linkml_status_tools/cli/models.py index f70a8b7..2757940 100644 --- a/src/dandisets_linkml_status_tools/cli/models.py +++ b/src/dandisets_linkml_status_tools/cli/models.py @@ -18,6 +18,8 @@ class JsonValidationErrorView(BaseModel): absolute_path: Sequence[Union[str, int]] absolute_schema_path: Sequence[Union[str, int]] + validator: str + validator_value: Any # Build a `TypedDict` for representing a polished version of `ValidationResult` @@ -68,9 +70,12 @@ def polish_validation_results( f"a {ValidationError!r} object, but got {result_source!r}" ) raise ValueError(msg) # noqa: TRY004 + # noinspection PyTypeChecker result_as_dict["source"] = JsonValidationErrorView( absolute_path=result_source.absolute_path, absolute_schema_path=result_source.absolute_schema_path, + validator=result_source.validator, + validator_value=result_source.validator_value, ) polished_results.append(result_as_dict) From 58952fac05d61253496016502f3f1eab0d2978ff Mon Sep 17 00:00:00 2001 From: Isaac To Date: Mon, 14 Oct 2024 10:06:35 -0700 Subject: [PATCH 10/11] feat: Include `message` in `JsonValidationErrorView` This allows the viewing of the error message from the JSON schema validator --- src/dandisets_linkml_status_tools/cli/models.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/dandisets_linkml_status_tools/cli/models.py b/src/dandisets_linkml_status_tools/cli/models.py index 2757940..57ae9fd 100644 --- a/src/dandisets_linkml_status_tools/cli/models.py +++ b/src/dandisets_linkml_status_tools/cli/models.py @@ -16,6 +16,7 @@ class JsonValidationErrorView(BaseModel): for serialization """ + message: str absolute_path: Sequence[Union[str, int]] absolute_schema_path: Sequence[Union[str, int]] validator: str @@ -72,6 +73,7 @@ def polish_validation_results( raise ValueError(msg) # noqa: TRY004 # noinspection PyTypeChecker result_as_dict["source"] = JsonValidationErrorView( + message=result_source.message, absolute_path=result_source.absolute_path, absolute_schema_path=result_source.absolute_schema_path, validator=result_source.validator, From e8c397aa11680f05fc7f63ed9903946cab0a1b06 Mon Sep 17 00:00:00 2001 From: Isaac To Date: Thu, 17 Oct 2024 18:55:05 -0700 Subject: [PATCH 11/11] Move checking of source field type to validation of `LinkmlValidationErrsType` This will eliminate code duplications --- .../cli/models.py | 38 +++++++++++++++---- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/src/dandisets_linkml_status_tools/cli/models.py b/src/dandisets_linkml_status_tools/cli/models.py index 57ae9fd..c2d68fa 100644 --- a/src/dandisets_linkml_status_tools/cli/models.py +++ b/src/dandisets_linkml_status_tools/cli/models.py @@ -5,7 +5,7 @@ from dandi.dandiapi import VersionStatus from jsonschema.exceptions import ValidationError from linkml.validator.report import ValidationResult -from pydantic import BaseModel, Json, PlainSerializer, TypeAdapter +from pydantic import AfterValidator, BaseModel, Json, PlainSerializer, TypeAdapter from typing_extensions import TypedDict # Required for Python < 3.12 by Pydantic @@ -36,6 +36,32 @@ class JsonValidationErrorView(BaseModel): ) +def check_source_jsonschema_validation_error( + results: list[ValidationResult], +) -> list[ValidationResult]: + """ + Check if the `source` field of each `ValidationResult` object in a given list is a + `jsonschema.exceptions.ValidationError` object. + + :param results: The list of `ValidationResult` objects to be checked. + + :return: The list of `ValidationResult` objects if all `source` fields are + `jsonschema.exceptions.ValidationError` objects. + + :raises ValueError: If the `source` field of a `ValidationResult` object is not a + `jsonschema.exceptions.ValidationError` object. + """ + for result in results: + result_source = result.source + if not isinstance(result_source, ValidationError): + msg = ( + f"Expected `source` field of a `ValidationResult` object to be " + f"a {ValidationError!r} object, but got {result_source!r}" + ) + raise ValueError(msg) # noqa: TRY004 + return results + + def polish_validation_results( results: list[ValidationResult], ) -> list[PolishedValidationResult]: @@ -65,12 +91,6 @@ def polish_validation_results( # Include the `source` field as a `JsonValidationErrorView` object result_source = result.source - if not isinstance(result_source, ValidationError): - msg = ( - f"Expected `source` field of a `ValidationResult` object to be " - f"a {ValidationError!r} object, but got {result_source!r}" - ) - raise ValueError(msg) # noqa: TRY004 # noinspection PyTypeChecker result_as_dict["source"] = JsonValidationErrorView( message=result_source.message, @@ -87,7 +107,9 @@ def polish_validation_results( DandisetMetadataType = dict[str, Any] PydanticValidationErrsType = list[dict[str, Any]] LinkmlValidationErrsType = Annotated[ - list[ValidationResult], PlainSerializer(polish_validation_results) + list[ValidationResult], + AfterValidator(check_source_jsonschema_validation_error), + PlainSerializer(polish_validation_results), ] dandiset_metadata_adapter = TypeAdapter(DandisetMetadataType)