From 8ddb4cdb057ac54773a50db50fd99b14c437cd16 Mon Sep 17 00:00:00 2001 From: Viicos <65306057+Viicos@users.noreply.github.com> Date: Wed, 21 Feb 2024 16:59:28 +0100 Subject: [PATCH 1/4] [#3688] Add required dependencies for JSON Schema parsing --- requirements/base.in | 3 +++ requirements/base.txt | 31 +++++++++++++++++++++--------- requirements/ci.txt | 38 +++++++++++++++++++++++++------------ requirements/dev.txt | 38 +++++++++++++++++++++++++------------ requirements/extensions.txt | 37 +++++++++++++++++++++++++----------- 5 files changed, 103 insertions(+), 44 deletions(-) diff --git a/requirements/base.in b/requirements/base.in index 95fdd3b567..c3a19c7977 100644 --- a/requirements/base.in +++ b/requirements/base.in @@ -7,6 +7,8 @@ defusedxml furl glom maykin-json-logic-py +jsonschema +jsonschema_specifications jq html5lib # see https://github.com/onelogin/python3-saml/issues/292 and @@ -24,6 +26,7 @@ python-magic tablib[xlsx] tinycss2 xmltodict +referencing self-certifi semantic-version tabulate diff --git a/requirements/base.txt b/requirements/base.txt index 751fd8d447..0672bfa7d0 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -18,10 +18,11 @@ asn1crypto==1.5.1 # via webauthn async-timeout==4.0.2 # via redis -attrs==20.3.0 +attrs==23.2.0 # via # glom # jsonschema + # referencing # zeep beautifulsoup4==4.10.0 # via o365 @@ -282,15 +283,19 @@ josepy==1.8.0 # via mozilla-django-oidc jq==1.3.0 # via -r requirements/base.in -jsonschema==4.17.3 +jsonschema==4.21.1 # via + # -r requirements/base.in # drf-jsonschema-serializer # drf-spectacular - # jsonschema-spec # openapi-schema-validator # openapi-spec-validator -jsonschema-spec==0.1.6 +jsonschema-spec==0.2.4 # via openapi-spec-validator +jsonschema-specifications==2023.7.1 + # via + # jsonschema + # openapi-schema-validator kombu==5.2.4 # via celery lazy-object-proxy==1.9.0 @@ -322,11 +327,11 @@ o365==2.0.31 # via -r requirements/base.in oauthlib==3.2.2 # via requests-oauthlib -openapi-schema-validator==0.4.4 +openapi-schema-validator==0.6.2 # via openapi-spec-validator -openapi-spec-validator==0.5.5 +openapi-spec-validator==0.6.0 # via openapi3-parser -openapi3-parser==1.1.11 +openapi3-parser==1.1.16 # via -r requirements/base.in openpyxl==3.0.7 # via tablib @@ -372,8 +377,6 @@ pyphen==0.14.0 # via weasyprint pypng==0.20220715.0 # via qrcode -pyrsistent==0.17.3 - # via jsonschema python-dateutil==2.8.2 # via # django-camunda @@ -406,6 +409,12 @@ redis==4.5.4 # celery-once # django-redis # portalocker +referencing==0.30.2 + # via + # -r requirements/base.in + # jsonschema + # jsonschema-spec + # jsonschema-specifications requests==2.31.0 # via # ape-pie @@ -430,6 +439,10 @@ requests-toolbelt==1.0.0 # via zeep rfc3339-validator==0.1.4 # via openapi-schema-validator +rpds-py==0.18.0 + # via + # jsonschema + # referencing ruamel-yaml==0.17.32 # via prance ruamel-yaml-clib==0.2.7 diff --git a/requirements/ci.txt b/requirements/ci.txt index 7ae2707358..7ecf377d3b 100644 --- a/requirements/ci.txt +++ b/requirements/ci.txt @@ -33,13 +33,14 @@ async-timeout==4.0.2 # -c requirements/base.txt # -r requirements/base.txt # redis -attrs==20.3.0 +attrs==23.2.0 # via # -c requirements/base.txt # -r requirements/base.txt # glom # hypothesis # jsonschema + # referencing # zeep babel==2.9.1 # via sphinx @@ -527,20 +528,25 @@ jq==1.3.0 # via # -c requirements/base.txt # -r requirements/base.txt -jsonschema==4.17.3 +jsonschema==4.21.1 # via # -c requirements/base.txt # -r requirements/base.txt # drf-jsonschema-serializer # drf-spectacular - # jsonschema-spec # openapi-schema-validator # openapi-spec-validator -jsonschema-spec==0.1.6 +jsonschema-spec==0.2.4 # via # -c requirements/base.txt # -r requirements/base.txt # openapi-spec-validator +jsonschema-specifications==2023.7.1 + # via + # -c requirements/base.txt + # -r requirements/base.txt + # jsonschema + # openapi-schema-validator kombu==5.2.4 # via # -c requirements/base.txt @@ -612,17 +618,17 @@ oauthlib==3.2.2 # -c requirements/base.txt # -r requirements/base.txt # requests-oauthlib -openapi-schema-validator==0.4.4 +openapi-schema-validator==0.6.2 # via # -c requirements/base.txt # -r requirements/base.txt # openapi-spec-validator -openapi-spec-validator==0.5.5 +openapi-spec-validator==0.6.0 # via # -c requirements/base.txt # -r requirements/base.txt # openapi3-parser -openapi3-parser==1.1.11 +openapi3-parser==1.1.16 # via # -c requirements/base.txt # -r requirements/base.txt @@ -744,11 +750,6 @@ pypng==0.20220715.0 # qrcode pyquery==1.4.1 # via -r requirements/test-tools.in -pyrsistent==0.17.3 - # via - # -c requirements/base.txt - # -r requirements/base.txt - # jsonschema pytest==7.4.0 # via -r requirements/test-tools.in python-dateutil==2.8.2 @@ -809,6 +810,13 @@ redis==4.5.4 # celery-once # django-redis # portalocker +referencing==0.30.2 + # via + # -c requirements/base.txt + # -r requirements/base.txt + # jsonschema + # jsonschema-spec + # jsonschema-specifications requests==2.31.0 # via # -c requirements/base.txt @@ -853,6 +861,12 @@ rfc3339-validator==0.1.4 # -c requirements/base.txt # -r requirements/base.txt # openapi-schema-validator +rpds-py==0.18.0 + # via + # -c requirements/base.txt + # -r requirements/base.txt + # jsonschema + # referencing ruamel-yaml==0.17.32 # via # -c requirements/base.txt diff --git a/requirements/dev.txt b/requirements/dev.txt index 4ce59ad675..a466416f68 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -36,13 +36,14 @@ async-timeout==4.0.2 # -c requirements/ci.txt # -r requirements/ci.txt # redis -attrs==20.3.0 +attrs==23.2.0 # via # -c requirements/ci.txt # -r requirements/ci.txt # glom # hypothesis # jsonschema + # referencing # zeep autopep8==1.5.7 # via django-silk @@ -599,20 +600,25 @@ jq==1.3.0 # via # -c requirements/ci.txt # -r requirements/ci.txt -jsonschema==4.17.3 +jsonschema==4.21.1 # via # -c requirements/ci.txt # -r requirements/ci.txt # drf-jsonschema-serializer # drf-spectacular - # jsonschema-spec # openapi-schema-validator # openapi-spec-validator -jsonschema-spec==0.1.6 +jsonschema-spec==0.2.4 # via # -c requirements/ci.txt # -r requirements/ci.txt # openapi-spec-validator +jsonschema-specifications==2023.7.1 + # via + # -c requirements/ci.txt + # -r requirements/ci.txt + # jsonschema + # openapi-schema-validator kombu==5.2.4 # via # -c requirements/ci.txt @@ -699,17 +705,17 @@ oauthlib==3.2.2 # -c requirements/ci.txt # -r requirements/ci.txt # requests-oauthlib -openapi-schema-validator==0.4.4 +openapi-schema-validator==0.6.2 # via # -c requirements/ci.txt # -r requirements/ci.txt # openapi-spec-validator -openapi-spec-validator==0.5.5 +openapi-spec-validator==0.6.0 # via # -c requirements/ci.txt # -r requirements/ci.txt # openapi3-parser -openapi3-parser==1.1.11 +openapi3-parser==1.1.16 # via # -c requirements/ci.txt # -r requirements/ci.txt @@ -862,11 +868,6 @@ pyquery==1.4.1 # via # -c requirements/ci.txt # -r requirements/ci.txt -pyrsistent==0.17.3 - # via - # -c requirements/ci.txt - # -r requirements/ci.txt - # jsonschema pytest==7.4.0 # via # -c requirements/ci.txt @@ -931,6 +932,13 @@ redis==4.5.4 # celery-once # django-redis # portalocker +referencing==0.30.2 + # via + # -c requirements/ci.txt + # -r requirements/ci.txt + # jsonschema + # jsonschema-spec + # jsonschema-specifications requests==2.31.0 # via # -c requirements/ci.txt @@ -977,6 +985,12 @@ rfc3339-validator==0.1.4 # -c requirements/ci.txt # -r requirements/ci.txt # openapi-schema-validator +rpds-py==0.18.0 + # via + # -c requirements/ci.txt + # -r requirements/ci.txt + # jsonschema + # referencing ruamel-yaml==0.17.32 # via # -c requirements/ci.txt diff --git a/requirements/extensions.txt b/requirements/extensions.txt index fa2a5c1e15..cd955ccaa5 100644 --- a/requirements/extensions.txt +++ b/requirements/extensions.txt @@ -27,11 +27,12 @@ async-timeout==4.0.2 # via # -r requirements/base.txt # redis -attrs==20.3.0 +attrs==23.2.0 # via # -r requirements/base.txt # glom # jsonschema + # referencing # zeep beautifulsoup4==4.10.0 # via @@ -434,18 +435,24 @@ jq==1.3.0 # via # -c requirements/base.in # -r requirements/base.txt -jsonschema==4.17.3 +jsonschema==4.21.1 # via + # -c requirements/base.in # -r requirements/base.txt # drf-jsonschema-serializer # drf-spectacular - # jsonschema-spec # openapi-schema-validator # openapi-spec-validator -jsonschema-spec==0.1.6 +jsonschema-spec==0.2.4 # via # -r requirements/base.txt # openapi-spec-validator +jsonschema-specifications==2023.7.1 + # via + # -c requirements/base.in + # -r requirements/base.txt + # jsonschema + # openapi-schema-validator kombu==5.2.4 # via # -r requirements/base.txt @@ -504,15 +511,15 @@ open-forms-ext-haalcentraal-hr==0.3.0 # via -r requirements/extensions.in open-forms-ext-token-exchange==0.4.0 # via -r requirements/extensions.in -openapi-schema-validator==0.4.4 +openapi-schema-validator==0.6.2 # via # -r requirements/base.txt # openapi-spec-validator -openapi-spec-validator==0.5.5 +openapi-spec-validator==0.6.0 # via # -r requirements/base.txt # openapi3-parser -openapi3-parser==1.1.11 +openapi3-parser==1.1.16 # via # -c requirements/base.in # -r requirements/base.txt @@ -595,10 +602,6 @@ pypng==0.20220715.0 # via # -r requirements/base.txt # qrcode -pyrsistent==0.17.3 - # via - # -r requirements/base.txt - # jsonschema python-dateutil==2.8.2 # via # -r requirements/base.txt @@ -645,6 +648,13 @@ redis==4.5.4 # celery-once # django-redis # portalocker +referencing==0.30.2 + # via + # -c requirements/base.in + # -r requirements/base.txt + # jsonschema + # jsonschema-spec + # jsonschema-specifications requests==2.31.0 # via # -r requirements/base.txt @@ -678,6 +688,11 @@ rfc3339-validator==0.1.4 # via # -r requirements/base.txt # openapi-schema-validator +rpds-py==0.18.0 + # via + # -r requirements/base.txt + # jsonschema + # referencing ruamel-yaml==0.17.32 # via # -r requirements/base.txt From 7abd9a800ba658fa1037cacb096834f485fc9edf Mon Sep 17 00:00:00 2001 From: Viicos <65306057+Viicos@users.noreply.github.com> Date: Wed, 21 Feb 2024 16:59:52 +0100 Subject: [PATCH 2/4] [#3688] Add utility function to iter over JSON schema --- .../contrib/objects_api/json_schema.py | 70 ++++++ .../objects_api/tests/test_json_schema.py | 211 ++++++++++++++++++ 2 files changed, 281 insertions(+) create mode 100644 src/openforms/registrations/contrib/objects_api/json_schema.py create mode 100644 src/openforms/registrations/contrib/objects_api/tests/test_json_schema.py diff --git a/src/openforms/registrations/contrib/objects_api/json_schema.py b/src/openforms/registrations/contrib/objects_api/json_schema.py new file mode 100644 index 0000000000..0a7cfb56fc --- /dev/null +++ b/src/openforms/registrations/contrib/objects_api/json_schema.py @@ -0,0 +1,70 @@ +from dataclasses import dataclass +from typing import Iterator, Literal, overload + +from jsonschema_specifications import REGISTRY +from referencing import Resource +from referencing.exceptions import Unresolvable +from referencing.jsonschema import ObjectSchema + + +@dataclass +class InvalidReference: + """A class representing an unknown/invalid reference.""" + + uri: str + """The URI of the unknown reference.""" + + +@overload +def iter_json_schema_paths( + json_schema: ObjectSchema, fail_fast: Literal[False] +) -> Iterator[tuple[list[str], ObjectSchema | InvalidReference]]: ... + + +@overload +def iter_json_schema_paths( + json_schema: ObjectSchema, fail_fast: Literal[True] = ... +) -> Iterator[tuple[list[str], ObjectSchema]]: ... + + +def iter_json_schema_paths( + json_schema: ObjectSchema, fail_fast: bool = True +) -> Iterator[tuple[list[str], ObjectSchema | InvalidReference]]: + """Recursively iterate over the JSON Schema paths, resolving references if required. + + Yields a two-tuple containing the current path (as a list of string segments) and the matching (sub) JSON Schema. + + Known to be unsupported: + - Composition (https://json-schema.org/understanding-json-schema/reference/combining) + """ + resource = Resource.from_contents(json_schema) + # Or referencing.jsonschema.EMPTY_REGISTRY? + resolver = REGISTRY.resolver_with_root(resource) + + parent_json_path: list[str] = [] + + def _iter_json_schema( + json_schema: ObjectSchema, parent_json_path: list[str] + ) -> Iterator[tuple[list[str], ObjectSchema | InvalidReference]]: + assert json_schema.get("type") == "object" + + yield parent_json_path, json_schema + + for k, v in json_schema["properties"].items(): + json_path = parent_json_path + [k] + match v: + case {"type": "object"}: + yield from _iter_json_schema(v, json_path) + case {"$ref": str(uri)}: + try: + resolved = resolver.lookup(uri) + except Unresolvable: + if fail_fast: + raise + yield json_path, InvalidReference(uri) + else: + yield from _iter_json_schema(resolved.contents, json_path) + case {}: + yield json_path, v + + yield from _iter_json_schema(json_schema, parent_json_path) diff --git a/src/openforms/registrations/contrib/objects_api/tests/test_json_schema.py b/src/openforms/registrations/contrib/objects_api/tests/test_json_schema.py new file mode 100644 index 0000000000..d831d22c99 --- /dev/null +++ b/src/openforms/registrations/contrib/objects_api/tests/test_json_schema.py @@ -0,0 +1,211 @@ +from django.test import SimpleTestCase + +from referencing.exceptions import Unresolvable + +from ..json_schema import InvalidReference, iter_json_schema + +JSON_SCHEMA_NO_REFS = { + "$id": "noise-complaint.schema", + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": False, + "properties": { + "complaintDescription": {"type": "string"}, + "measuredDecibels": {"type": "array", "items": {"type": "number"}}, + "complainant": { + "type": "object", + "properties": { + "first.name": {"type": "string"}, + "last.name": {"type": "string"}, + }, + }, + }, +} + +JSON_SCHEMA_REFS = { + "$id": "noise-complaint.schema", + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": False, + "properties": { + "complainant": {"$ref": "#/definitions/person"}, + "noisyAddress": {"$ref": "#/definitions/address"}, + }, + "definitions": { + "person": { + "type": "object", + "properties": { + "first.name": {"type": "string"}, + "last.name": {"type": "string"}, + }, + }, + "address": { + "type": "object", + "properties": { + "street": {"type": "string"}, + }, + }, + }, +} + + +JSON_SCHEMA_NESTED_REFS = { + "$id": "noise-complaint.schema", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Noise complaint example V2", + "type": "object", + "additionalProperties": False, + "properties": { + "complainant": {"$ref": "#/definitions/person"}, + }, + "definitions": { + "person": { + "type": "object", + "properties": { + "residence": {"$ref": "#/definitions/address"}, + }, + }, + "address": { + "type": "object", + "properties": { + "street": {"type": "string"}, + }, + }, + }, +} + +JSON_SCHEMA_UNKOWN_REF = { + "$id": "noise-complaint.schema", + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": False, + "properties": { + "invalid": {"$ref": "#/invalidref"}, + }, +} + +JSON_SCHEMA_EXTERNAL_REF = { + "$id": "noise-complaint.schema", + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": False, + "properties": { + "external": {"$ref": "http://example.com/external-schema.json"}, + }, +} + + +class IterJsonSchemaTests(SimpleTestCase): + + def test_iter_json_schema_no_refs(self): + paths_list = list(iter_json_schema(JSON_SCHEMA_NO_REFS)) + + self.assertListEqual( + paths_list, + [ + ([], JSON_SCHEMA_NO_REFS), + (["complaintDescription"], {"type": "string"}), + (["measuredDecibels"], {"type": "array", "items": {"type": "number"}}), + ( + ["complainant"], + { + "properties": { + "first.name": {"type": "string"}, + "last.name": {"type": "string"}, + }, + "type": "object", + }, + ), + (["complainant", "first.name"], {"type": "string"}), + (["complainant", "last.name"], {"type": "string"}), + ], + ) + + def test_iter_json_schema_refs(self): + paths_list = list(iter_json_schema(JSON_SCHEMA_REFS)) + + self.assertListEqual( + paths_list, + [ + ([], JSON_SCHEMA_REFS), + ( + ["complainant"], + { + "properties": { + "first.name": {"type": "string"}, + "last.name": {"type": "string"}, + }, + "type": "object", + }, + ), + (["complainant", "first.name"], {"type": "string"}), + (["complainant", "last.name"], {"type": "string"}), + ( + ["noisyAddress"], + { + "properties": { + "street": {"type": "string"}, + }, + "type": "object", + }, + ), + (["noisyAddress", "street"], {"type": "string"}), + ], + ) + + def test_iter_json_schema_nested_refs(self): + paths_list = list(iter_json_schema(JSON_SCHEMA_NESTED_REFS)) + + self.assertListEqual( + paths_list, + [ + ([], JSON_SCHEMA_NESTED_REFS), + ( + ["complainant"], + { + "properties": {"residence": {"$ref": "#/definitions/address"}}, + "type": "object", + }, + ), + ( + ["complainant", "residence"], + {"properties": {"street": {"type": "string"}}, "type": "object"}, + ), + (["complainant", "residence", "street"], {"type": "string"}), + ], + ) + + def test_iter_json_schema_unknown_ref(self): + self.assertRaises( + Unresolvable, + lambda: list(iter_json_schema(JSON_SCHEMA_UNKOWN_REF, fail_fast=True)), + ) + + paths_list = list(iter_json_schema(JSON_SCHEMA_UNKOWN_REF, fail_fast=False)) + + self.assertEqual( + paths_list, + [ + ([], JSON_SCHEMA_UNKOWN_REF), + (["invalid"], InvalidReference("#/invalidref")), + ], + ) + + def test_iter_json_schema_external_ref(self): + self.assertRaises( + Unresolvable, + lambda: list(iter_json_schema(JSON_SCHEMA_EXTERNAL_REF, fail_fast=True)), + ) + + paths_list = list(iter_json_schema(JSON_SCHEMA_EXTERNAL_REF, fail_fast=False)) + + self.assertEqual( + paths_list, + [ + ([], JSON_SCHEMA_EXTERNAL_REF), + ( + ["external"], + InvalidReference("http://example.com/external-schema.json"), + ), + ], + ) From 2eff4e7b9d5f217090ea2659b9dd3ff7c9199fbb Mon Sep 17 00:00:00 2001 From: Viicos <65306057+Viicos@users.noreply.github.com> Date: Thu, 22 Feb 2024 12:53:20 +0100 Subject: [PATCH 3/4] [#3688] Take required keys into account --- .../contrib/objects_api/json_schema.py | 95 ++++++++-- .../objects_api/tests/test_json_schema.py | 166 +++++++++++++++--- 2 files changed, 221 insertions(+), 40 deletions(-) diff --git a/src/openforms/registrations/contrib/objects_api/json_schema.py b/src/openforms/registrations/contrib/objects_api/json_schema.py index 0a7cfb56fc..46dbb00d9b 100644 --- a/src/openforms/registrations/contrib/objects_api/json_schema.py +++ b/src/openforms/registrations/contrib/objects_api/json_schema.py @@ -1,35 +1,65 @@ -from dataclasses import dataclass +from __future__ import annotations + +from dataclasses import dataclass, field, replace from typing import Iterator, Literal, overload from jsonschema_specifications import REGISTRY from referencing import Resource from referencing.exceptions import Unresolvable from referencing.jsonschema import ObjectSchema +from typing_extensions import Self @dataclass class InvalidReference: - """A class representing an unknown/invalid reference.""" + """An unknown/invalid reference.""" uri: str """The URI of the unknown reference.""" + exc: Unresolvable + """The ``referencing`` catched exception.""" + + +@dataclass(eq=True) +class JsonSchemaPath: + """A representation of a location in a JSON document, as a list of string segments.""" + + segments: list[str] = field(default_factory=list) + """The segments of the JSON location.""" + + required: bool = False + """Whether this path is marked as ``required`` in the JSON Schema.""" + + def __truediv__(self, key: str) -> Self: + return replace(self, segments=self.segments + [key]) + + def startswith(self, other: JsonSchemaPath | list[str], /) -> bool: + """Return ``True`` if the path starts with the specified path, ``False`` otherwise.""" + + other_segments = other.segments if isinstance(other, JsonSchemaPath) else other + + return ( + len(other_segments) <= len(self.segments) + and self.segments[: len(other_segments)] == other_segments + ) + @overload def iter_json_schema_paths( json_schema: ObjectSchema, fail_fast: Literal[False] -) -> Iterator[tuple[list[str], ObjectSchema | InvalidReference]]: ... +) -> Iterator[tuple[JsonSchemaPath, ObjectSchema | InvalidReference]]: ... @overload def iter_json_schema_paths( json_schema: ObjectSchema, fail_fast: Literal[True] = ... -) -> Iterator[tuple[list[str], ObjectSchema]]: ... +) -> Iterator[tuple[JsonSchemaPath, ObjectSchema]]: ... def iter_json_schema_paths( json_schema: ObjectSchema, fail_fast: bool = True -) -> Iterator[tuple[list[str], ObjectSchema | InvalidReference]]: +) -> Iterator[tuple[JsonSchemaPath, ObjectSchema | InvalidReference]]: """Recursively iterate over the JSON Schema paths, resolving references if required. Yields a two-tuple containing the current path (as a list of string segments) and the matching (sub) JSON Schema. @@ -41,30 +71,73 @@ def iter_json_schema_paths( # Or referencing.jsonschema.EMPTY_REGISTRY? resolver = REGISTRY.resolver_with_root(resource) - parent_json_path: list[str] = [] + parent_json_path = JsonSchemaPath() def _iter_json_schema( - json_schema: ObjectSchema, parent_json_path: list[str] - ) -> Iterator[tuple[list[str], ObjectSchema | InvalidReference]]: + json_schema: ObjectSchema, parent_json_path: JsonSchemaPath + ) -> Iterator[tuple[JsonSchemaPath, ObjectSchema | InvalidReference]]: assert json_schema.get("type") == "object" yield parent_json_path, json_schema + required = json_schema.get("required", []) + + k: str for k, v in json_schema["properties"].items(): - json_path = parent_json_path + [k] + json_path = parent_json_path / k + json_path.required = k in required + match v: case {"type": "object"}: yield from _iter_json_schema(v, json_path) case {"$ref": str(uri)}: try: resolved = resolver.lookup(uri) - except Unresolvable: + except Unresolvable as exc: if fail_fast: raise - yield json_path, InvalidReference(uri) + yield json_path, InvalidReference(uri, exc) else: yield from _iter_json_schema(resolved.contents, json_path) case {}: yield json_path, v yield from _iter_json_schema(json_schema, parent_json_path) + + +def get_missing_required_paths( + json_schema: ObjectSchema, paths: list[list[str]] +) -> list[list[str]]: + """Return a list of required ``JsonSchemaPath`` instances not covered by the provided paths. + + .. code-block:: pycon + + >>> json_schema = { + ... "type": "object", + ... "properties": { + ... "a": {...}, + ... "b": { + ... "type": "object", + ... "properties": { + ... "c": {...}, + ... "d": {...}, + ... }, + ... "required": ["c", "d"], + ... }, + ... "required": ["a", "b"], + ... } + >>> get_missing_required_paths(json_schema, [["a"], ["b", "c"]]) + [JsonSchemaPath(segments=['b', 'd'], required=True)] + """ + + return [ + r_path.segments + for r_path, _ in iter_json_schema_paths(json_schema) + if r_path.required + # If a child key is provided (e.g. "a.b"), any parent required key is dismissed (e.g. "a"): + if not any(JsonSchemaPath(path).startswith(r_path) for path in paths) + # If a parent key is provided (e.g. "a"), any child required key is dismissed (e.g. "a.b") + # (this one assumes the provided "a" value is valid with respect to the required children keys. + # We are dealing with path segments, so we can't really make any assumptions on the provided value): + if not any(r_path.startswith(path) for path in paths) + ] diff --git a/src/openforms/registrations/contrib/objects_api/tests/test_json_schema.py b/src/openforms/registrations/contrib/objects_api/tests/test_json_schema.py index d831d22c99..6d882ce12a 100644 --- a/src/openforms/registrations/contrib/objects_api/tests/test_json_schema.py +++ b/src/openforms/registrations/contrib/objects_api/tests/test_json_schema.py @@ -2,7 +2,7 @@ from referencing.exceptions import Unresolvable -from ..json_schema import InvalidReference, iter_json_schema +from ..json_schema import get_missing_required_paths, iter_json_schema_paths JSON_SCHEMA_NO_REFS = { "$id": "noise-complaint.schema", @@ -94,16 +94,47 @@ }, } +JSON_SCHEMA_REQUIRED_PATHS = { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "a": {"type": "string"}, + "b": { + "type": "object", + "properties": { + "c": {"type": "string"}, + "d": { + "type": "object", + "properties": { + "e": {"type": "string"}, + "f": {"type": "string"}, + }, + "required": ["e"], + }, + }, + "required": ["c", "d"], + }, + }, + "required": ["a", "b"], +} + class IterJsonSchemaTests(SimpleTestCase): + """Test cases to assert the JSON Schemas are correctly iterated over. + + The first path element being the root one, it is not included as it could + simply be tested as ``([], CURRENT_SCHEMA_TESTED)``. + """ def test_iter_json_schema_no_refs(self): - paths_list = list(iter_json_schema(JSON_SCHEMA_NO_REFS)) + paths_list = [ + (path.segments, schema) + for path, schema in iter_json_schema_paths(JSON_SCHEMA_NO_REFS) + ] self.assertListEqual( - paths_list, + paths_list[1:], [ - ([], JSON_SCHEMA_NO_REFS), (["complaintDescription"], {"type": "string"}), (["measuredDecibels"], {"type": "array", "items": {"type": "number"}}), ( @@ -122,12 +153,14 @@ def test_iter_json_schema_no_refs(self): ) def test_iter_json_schema_refs(self): - paths_list = list(iter_json_schema(JSON_SCHEMA_REFS)) + paths_list = [ + (path.segments, schema) + for path, schema in iter_json_schema_paths(JSON_SCHEMA_REFS) + ] self.assertListEqual( - paths_list, + paths_list[1:], [ - ([], JSON_SCHEMA_REFS), ( ["complainant"], { @@ -154,12 +187,14 @@ def test_iter_json_schema_refs(self): ) def test_iter_json_schema_nested_refs(self): - paths_list = list(iter_json_schema(JSON_SCHEMA_NESTED_REFS)) + paths_list = [ + (path.segments, schema) + for path, schema in iter_json_schema_paths(JSON_SCHEMA_NESTED_REFS) + ] self.assertListEqual( - paths_list, + paths_list[1:], [ - ([], JSON_SCHEMA_NESTED_REFS), ( ["complainant"], { @@ -178,34 +213,107 @@ def test_iter_json_schema_nested_refs(self): def test_iter_json_schema_unknown_ref(self): self.assertRaises( Unresolvable, - lambda: list(iter_json_schema(JSON_SCHEMA_UNKOWN_REF, fail_fast=True)), + lambda: list( + iter_json_schema_paths(JSON_SCHEMA_UNKOWN_REF, fail_fast=True) + ), ) - paths_list = list(iter_json_schema(JSON_SCHEMA_UNKOWN_REF, fail_fast=False)) + paths_list = [ + (path.segments, schema) + for path, schema in iter_json_schema_paths( + JSON_SCHEMA_UNKOWN_REF, fail_fast=False + ) + ] - self.assertEqual( - paths_list, - [ - ([], JSON_SCHEMA_UNKOWN_REF), - (["invalid"], InvalidReference("#/invalidref")), - ], - ) + self.assertListEqual(paths_list[1][0], ["invalid"]) + self.assertEqual(paths_list[1][1].uri, "#/invalidref") def test_iter_json_schema_external_ref(self): self.assertRaises( Unresolvable, - lambda: list(iter_json_schema(JSON_SCHEMA_EXTERNAL_REF, fail_fast=True)), + lambda: list( + iter_json_schema_paths(JSON_SCHEMA_EXTERNAL_REF, fail_fast=True) + ), ) - paths_list = list(iter_json_schema(JSON_SCHEMA_EXTERNAL_REF, fail_fast=False)) + paths_list = [ + (path.segments, schema) + for path, schema in iter_json_schema_paths( + JSON_SCHEMA_EXTERNAL_REF, fail_fast=False + ) + ] + self.assertListEqual(paths_list[1][0], ["external"]) self.assertEqual( - paths_list, - [ - ([], JSON_SCHEMA_EXTERNAL_REF), - ( - ["external"], - InvalidReference("http://example.com/external-schema.json"), - ), - ], + paths_list[1][1].uri, "http://example.com/external-schema.json" ) + + +class RequiredJsonSchemaPathsTests(SimpleTestCase): + """Test cases to assert required paths are correctly picked up when iterating over JSON Schemas + + (``IterJsonSchemaTests`` only made assertions on the returned paths). + """ + + def test_required_json_schema_paths(self): + required_paths = [ + path.segments + for path, _ in iter_json_schema_paths(JSON_SCHEMA_REQUIRED_PATHS) + if path.required + ] + + self.assertListEqual( + required_paths, [["a"], ["b"], ["b", "c"], ["b", "d"], ["b", "d", "e"]] + ) + + +class MissingRequiredPathsTests(SimpleTestCase): + """Test cases to assert missing required paths are picked up.""" + + def test_no_missing_required_paths(self): + + with self.subTest("top level"): + missing_paths = get_missing_required_paths( + JSON_SCHEMA_REQUIRED_PATHS, [["a"], ["b"]] + ) + + self.assertListEqual(missing_paths, []) + + with self.subTest("nested paths"): + missing_paths = get_missing_required_paths( + JSON_SCHEMA_REQUIRED_PATHS, [["a"], ["b", "c"], ["b", "d", "e"]] + ) + + self.assertListEqual(missing_paths, []) + + def test_missing_required_paths(self): + + with self.subTest("Missing 'a'"): + missing_paths = get_missing_required_paths( + JSON_SCHEMA_REQUIRED_PATHS, [["b"]] + ) + + self.assertListEqual(missing_paths, [["a"]]) + + with self.subTest("Missing 'b'"): + missing_paths = get_missing_required_paths( + JSON_SCHEMA_REQUIRED_PATHS, [["a"]] + ) + + self.assertListEqual( + missing_paths, [["b"], ["b", "c"], ["b", "d"], ["b", "d", "e"]] + ) + + with self.subTest("Missing 'c'"): + missing_paths = get_missing_required_paths( + JSON_SCHEMA_REQUIRED_PATHS, [["a"], ["b", "d"]] + ) + + self.assertListEqual(missing_paths, [["b", "c"]]) + + with self.subTest("Missing 'e'"): + missing_paths = get_missing_required_paths( + JSON_SCHEMA_REQUIRED_PATHS, [["a"], ["b", "c"], ["b", "d", "f"]] + ) + + self.assertListEqual(missing_paths, [["b", "d", "e"]]) From 78a9fa5fea8cb93d163ac322e090161ae05325c9 Mon Sep 17 00:00:00 2001 From: Viicos <65306057+Viicos@users.noreply.github.com> Date: Thu, 22 Feb 2024 17:35:08 +0100 Subject: [PATCH 4/4] [#3688] PR feedback --- .../contrib/objects_api/json_schema.py | 36 ++++++++++------- .../objects_api/tests/test_json_schema.py | 40 ++++++++----------- 2 files changed, 38 insertions(+), 38 deletions(-) diff --git a/src/openforms/registrations/contrib/objects_api/json_schema.py b/src/openforms/registrations/contrib/objects_api/json_schema.py index 46dbb00d9b..2cc04d51b5 100644 --- a/src/openforms/registrations/contrib/objects_api/json_schema.py +++ b/src/openforms/registrations/contrib/objects_api/json_schema.py @@ -18,7 +18,7 @@ class InvalidReference: """The URI of the unknown reference.""" exc: Unresolvable - """The ``referencing`` catched exception.""" + """The ``referencing`` caught exception.""" @dataclass(eq=True) @@ -108,7 +108,7 @@ def _iter_json_schema( def get_missing_required_paths( json_schema: ObjectSchema, paths: list[list[str]] ) -> list[list[str]]: - """Return a list of required ``JsonSchemaPath`` instances not covered by the provided paths. + """Return a list of required path segments from the JSON Schema not covered by the provided paths. .. code-block:: pycon @@ -127,17 +127,25 @@ def get_missing_required_paths( ... "required": ["a", "b"], ... } >>> get_missing_required_paths(json_schema, [["a"], ["b", "c"]]) - [JsonSchemaPath(segments=['b', 'd'], required=True)] + [['b', 'd']] """ + missing_paths: list[list[str]] = [] - return [ - r_path.segments - for r_path, _ in iter_json_schema_paths(json_schema) - if r_path.required - # If a child key is provided (e.g. "a.b"), any parent required key is dismissed (e.g. "a"): - if not any(JsonSchemaPath(path).startswith(r_path) for path in paths) - # If a parent key is provided (e.g. "a"), any child required key is dismissed (e.g. "a.b") - # (this one assumes the provided "a" value is valid with respect to the required children keys. - # We are dealing with path segments, so we can't really make any assumptions on the provided value): - if not any(r_path.startswith(path) for path in paths) - ] + for r_path, _ in iter_json_schema_paths(json_schema): + if not r_path.required: + continue + + # If a child key is provided (e.g. "a.b"), any required parent key is dismissed (e.g. "a"). + if any(JsonSchemaPath(path).startswith(r_path) for path in paths): + continue + + # If a parent key is provided (e.g. "a"), any required child key is dismissed (e.g. "a.b"). + # This one assumes the provided value for "a" is valid with respect to the required children keys. + # The JSON Schema could specify "a" as an object with some required keys, but we are dealing with + # path segments, so we can't really make any assumptions on the provided value. + if any(r_path.startswith(path) for path in paths): + continue + + missing_paths.append(r_path.segments) + + return missing_paths diff --git a/src/openforms/registrations/contrib/objects_api/tests/test_json_schema.py b/src/openforms/registrations/contrib/objects_api/tests/test_json_schema.py index 6d882ce12a..9fbf192227 100644 --- a/src/openforms/registrations/contrib/objects_api/tests/test_json_schema.py +++ b/src/openforms/registrations/contrib/objects_api/tests/test_json_schema.py @@ -132,7 +132,7 @@ def test_iter_json_schema_no_refs(self): for path, schema in iter_json_schema_paths(JSON_SCHEMA_NO_REFS) ] - self.assertListEqual( + self.assertEqual( paths_list[1:], [ (["complaintDescription"], {"type": "string"}), @@ -158,7 +158,7 @@ def test_iter_json_schema_refs(self): for path, schema in iter_json_schema_paths(JSON_SCHEMA_REFS) ] - self.assertListEqual( + self.assertEqual( paths_list[1:], [ ( @@ -192,7 +192,7 @@ def test_iter_json_schema_nested_refs(self): for path, schema in iter_json_schema_paths(JSON_SCHEMA_NESTED_REFS) ] - self.assertListEqual( + self.assertEqual( paths_list[1:], [ ( @@ -211,12 +211,8 @@ def test_iter_json_schema_nested_refs(self): ) def test_iter_json_schema_unknown_ref(self): - self.assertRaises( - Unresolvable, - lambda: list( - iter_json_schema_paths(JSON_SCHEMA_UNKOWN_REF, fail_fast=True) - ), - ) + with self.assertRaises(Unresolvable): + list(iter_json_schema_paths(JSON_SCHEMA_UNKOWN_REF, fail_fast=True)) paths_list = [ (path.segments, schema) @@ -225,16 +221,12 @@ def test_iter_json_schema_unknown_ref(self): ) ] - self.assertListEqual(paths_list[1][0], ["invalid"]) + self.assertEqual(paths_list[1][0], ["invalid"]) self.assertEqual(paths_list[1][1].uri, "#/invalidref") def test_iter_json_schema_external_ref(self): - self.assertRaises( - Unresolvable, - lambda: list( - iter_json_schema_paths(JSON_SCHEMA_EXTERNAL_REF, fail_fast=True) - ), - ) + with self.assertRaises(Unresolvable): + list(iter_json_schema_paths(JSON_SCHEMA_EXTERNAL_REF, fail_fast=True)) paths_list = [ (path.segments, schema) @@ -243,7 +235,7 @@ def test_iter_json_schema_external_ref(self): ) ] - self.assertListEqual(paths_list[1][0], ["external"]) + self.assertEqual(paths_list[1][0], ["external"]) self.assertEqual( paths_list[1][1].uri, "http://example.com/external-schema.json" ) @@ -262,7 +254,7 @@ def test_required_json_schema_paths(self): if path.required ] - self.assertListEqual( + self.assertEqual( required_paths, [["a"], ["b"], ["b", "c"], ["b", "d"], ["b", "d", "e"]] ) @@ -277,14 +269,14 @@ def test_no_missing_required_paths(self): JSON_SCHEMA_REQUIRED_PATHS, [["a"], ["b"]] ) - self.assertListEqual(missing_paths, []) + self.assertEqual(missing_paths, []) with self.subTest("nested paths"): missing_paths = get_missing_required_paths( JSON_SCHEMA_REQUIRED_PATHS, [["a"], ["b", "c"], ["b", "d", "e"]] ) - self.assertListEqual(missing_paths, []) + self.assertEqual(missing_paths, []) def test_missing_required_paths(self): @@ -293,14 +285,14 @@ def test_missing_required_paths(self): JSON_SCHEMA_REQUIRED_PATHS, [["b"]] ) - self.assertListEqual(missing_paths, [["a"]]) + self.assertEqual(missing_paths, [["a"]]) with self.subTest("Missing 'b'"): missing_paths = get_missing_required_paths( JSON_SCHEMA_REQUIRED_PATHS, [["a"]] ) - self.assertListEqual( + self.assertEqual( missing_paths, [["b"], ["b", "c"], ["b", "d"], ["b", "d", "e"]] ) @@ -309,11 +301,11 @@ def test_missing_required_paths(self): JSON_SCHEMA_REQUIRED_PATHS, [["a"], ["b", "d"]] ) - self.assertListEqual(missing_paths, [["b", "c"]]) + self.assertEqual(missing_paths, [["b", "c"]]) with self.subTest("Missing 'e'"): missing_paths = get_missing_required_paths( JSON_SCHEMA_REQUIRED_PATHS, [["a"], ["b", "c"], ["b", "d", "f"]] ) - self.assertListEqual(missing_paths, [["b", "d", "e"]]) + self.assertEqual(missing_paths, [["b", "d", "e"]])