diff --git a/requirements/base.in b/requirements/base.in index 95fdd3b567..c3a19c7977 100644 --- a/requirements/base.in +++ b/requirements/base.in @@ -7,6 +7,8 @@ defusedxml furl glom maykin-json-logic-py +jsonschema +jsonschema_specifications jq html5lib # see https://github.com/onelogin/python3-saml/issues/292 and @@ -24,6 +26,7 @@ python-magic tablib[xlsx] tinycss2 xmltodict +referencing self-certifi semantic-version tabulate diff --git a/requirements/base.txt b/requirements/base.txt index 309cfbcea6..66d27dfaab 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -18,10 +18,11 @@ asn1crypto==1.5.1 # via webauthn async-timeout==4.0.2 # via redis -attrs==20.3.0 +attrs==23.2.0 # via # glom # jsonschema + # referencing # zeep beautifulsoup4==4.10.0 # via o365 @@ -284,15 +285,19 @@ josepy==1.8.0 # via mozilla-django-oidc jq==1.3.0 # via -r requirements/base.in -jsonschema==4.17.3 +jsonschema==4.21.1 # via + # -r requirements/base.in # drf-jsonschema-serializer # drf-spectacular - # jsonschema-spec # openapi-schema-validator # openapi-spec-validator -jsonschema-spec==0.1.6 +jsonschema-spec==0.2.4 # via openapi-spec-validator +jsonschema-specifications==2023.7.1 + # via + # jsonschema + # openapi-schema-validator kombu==5.2.4 # via celery lazy-object-proxy==1.9.0 @@ -324,11 +329,11 @@ o365==2.0.31 # via -r requirements/base.in oauthlib==3.2.2 # via requests-oauthlib -openapi-schema-validator==0.4.4 +openapi-schema-validator==0.6.2 # via openapi-spec-validator -openapi-spec-validator==0.5.5 +openapi-spec-validator==0.6.0 # via openapi3-parser -openapi3-parser==1.1.11 +openapi3-parser==1.1.16 # via -r requirements/base.in openpyxl==3.0.7 # via tablib @@ -373,8 +378,6 @@ pyphen==0.14.0 # via weasyprint pypng==0.20220715.0 # via qrcode -pyrsistent==0.17.3 - # via jsonschema python-dateutil==2.8.2 # via # django-camunda @@ -407,6 +410,12 @@ redis==4.5.4 # celery-once # django-redis # portalocker +referencing==0.30.2 + # via + # -r requirements/base.in + # jsonschema + # jsonschema-spec + # jsonschema-specifications requests==2.31.0 # via # ape-pie @@ -431,6 +440,10 @@ requests-toolbelt==1.0.0 # via zeep rfc3339-validator==0.1.4 # via openapi-schema-validator +rpds-py==0.18.0 + # via + # jsonschema + # referencing ruamel-yaml==0.17.32 # via prance ruamel-yaml-clib==0.2.7 diff --git a/requirements/ci.txt b/requirements/ci.txt index 36af549d6d..5f196e7a8f 100644 --- a/requirements/ci.txt +++ b/requirements/ci.txt @@ -33,13 +33,14 @@ async-timeout==4.0.2 # -c requirements/base.txt # -r requirements/base.txt # redis -attrs==20.3.0 +attrs==23.2.0 # via # -c requirements/base.txt # -r requirements/base.txt # glom # hypothesis # jsonschema + # referencing # zeep babel==2.9.1 # via sphinx @@ -529,20 +530,25 @@ jq==1.3.0 # via # -c requirements/base.txt # -r requirements/base.txt -jsonschema==4.17.3 +jsonschema==4.21.1 # via # -c requirements/base.txt # -r requirements/base.txt # drf-jsonschema-serializer # drf-spectacular - # jsonschema-spec # openapi-schema-validator # openapi-spec-validator -jsonschema-spec==0.1.6 +jsonschema-spec==0.2.4 # via # -c requirements/base.txt # -r requirements/base.txt # openapi-spec-validator +jsonschema-specifications==2023.7.1 + # via + # -c requirements/base.txt + # -r requirements/base.txt + # jsonschema + # openapi-schema-validator kombu==5.2.4 # via # -c requirements/base.txt @@ -614,17 +620,17 @@ oauthlib==3.2.2 # -c requirements/base.txt # -r requirements/base.txt # requests-oauthlib -openapi-schema-validator==0.4.4 +openapi-schema-validator==0.6.2 # via # -c requirements/base.txt # -r requirements/base.txt # openapi-spec-validator -openapi-spec-validator==0.5.5 +openapi-spec-validator==0.6.0 # via # -c requirements/base.txt # -r requirements/base.txt # openapi3-parser -openapi3-parser==1.1.11 +openapi3-parser==1.1.16 # via # -c requirements/base.txt # -r requirements/base.txt @@ -745,11 +751,6 @@ pypng==0.20220715.0 # qrcode pyquery==1.4.1 # via -r requirements/test-tools.in -pyrsistent==0.17.3 - # via - # -c requirements/base.txt - # -r requirements/base.txt - # jsonschema pytest==7.4.0 # via -r requirements/test-tools.in python-dateutil==2.8.2 @@ -810,6 +811,13 @@ redis==4.5.4 # celery-once # django-redis # portalocker +referencing==0.30.2 + # via + # -c requirements/base.txt + # -r requirements/base.txt + # jsonschema + # jsonschema-spec + # jsonschema-specifications requests==2.31.0 # via # -c requirements/base.txt @@ -854,6 +862,12 @@ rfc3339-validator==0.1.4 # -c requirements/base.txt # -r requirements/base.txt # openapi-schema-validator +rpds-py==0.18.0 + # via + # -c requirements/base.txt + # -r requirements/base.txt + # jsonschema + # referencing ruamel-yaml==0.17.32 # via # -c requirements/base.txt diff --git a/requirements/dev.txt b/requirements/dev.txt index a7dc27b70a..53297cc5f2 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -36,13 +36,14 @@ async-timeout==4.0.2 # -c requirements/ci.txt # -r requirements/ci.txt # redis -attrs==20.3.0 +attrs==23.2.0 # via # -c requirements/ci.txt # -r requirements/ci.txt # glom # hypothesis # jsonschema + # referencing # zeep autopep8==1.5.7 # via django-silk @@ -601,20 +602,25 @@ jq==1.3.0 # via # -c requirements/ci.txt # -r requirements/ci.txt -jsonschema==4.17.3 +jsonschema==4.21.1 # via # -c requirements/ci.txt # -r requirements/ci.txt # drf-jsonschema-serializer # drf-spectacular - # jsonschema-spec # openapi-schema-validator # openapi-spec-validator -jsonschema-spec==0.1.6 +jsonschema-spec==0.2.4 # via # -c requirements/ci.txt # -r requirements/ci.txt # openapi-spec-validator +jsonschema-specifications==2023.7.1 + # via + # -c requirements/ci.txt + # -r requirements/ci.txt + # jsonschema + # openapi-schema-validator kombu==5.2.4 # via # -c requirements/ci.txt @@ -701,17 +707,17 @@ oauthlib==3.2.2 # -c requirements/ci.txt # -r requirements/ci.txt # requests-oauthlib -openapi-schema-validator==0.4.4 +openapi-schema-validator==0.6.2 # via # -c requirements/ci.txt # -r requirements/ci.txt # openapi-spec-validator -openapi-spec-validator==0.5.5 +openapi-spec-validator==0.6.0 # via # -c requirements/ci.txt # -r requirements/ci.txt # openapi3-parser -openapi3-parser==1.1.11 +openapi3-parser==1.1.16 # via # -c requirements/ci.txt # -r requirements/ci.txt @@ -863,11 +869,6 @@ pyquery==1.4.1 # via # -c requirements/ci.txt # -r requirements/ci.txt -pyrsistent==0.17.3 - # via - # -c requirements/ci.txt - # -r requirements/ci.txt - # jsonschema pytest==7.4.0 # via # -c requirements/ci.txt @@ -932,6 +933,13 @@ redis==4.5.4 # celery-once # django-redis # portalocker +referencing==0.30.2 + # via + # -c requirements/ci.txt + # -r requirements/ci.txt + # jsonschema + # jsonschema-spec + # jsonschema-specifications requests==2.31.0 # via # -c requirements/ci.txt @@ -978,6 +986,12 @@ rfc3339-validator==0.1.4 # -c requirements/ci.txt # -r requirements/ci.txt # openapi-schema-validator +rpds-py==0.18.0 + # via + # -c requirements/ci.txt + # -r requirements/ci.txt + # jsonschema + # referencing ruamel-yaml==0.17.32 # via # -c requirements/ci.txt diff --git a/requirements/extensions.txt b/requirements/extensions.txt index c03fd9148c..087f994488 100644 --- a/requirements/extensions.txt +++ b/requirements/extensions.txt @@ -27,11 +27,12 @@ async-timeout==4.0.2 # via # -r requirements/base.txt # redis -attrs==20.3.0 +attrs==23.2.0 # via # -r requirements/base.txt # glom # jsonschema + # referencing # zeep beautifulsoup4==4.10.0 # via @@ -436,18 +437,24 @@ jq==1.3.0 # via # -c requirements/base.in # -r requirements/base.txt -jsonschema==4.17.3 +jsonschema==4.21.1 # via + # -c requirements/base.in # -r requirements/base.txt # drf-jsonschema-serializer # drf-spectacular - # jsonschema-spec # openapi-schema-validator # openapi-spec-validator -jsonschema-spec==0.1.6 +jsonschema-spec==0.2.4 # via # -r requirements/base.txt # openapi-spec-validator +jsonschema-specifications==2023.7.1 + # via + # -c requirements/base.in + # -r requirements/base.txt + # jsonschema + # openapi-schema-validator kombu==5.2.4 # via # -r requirements/base.txt @@ -506,15 +513,15 @@ open-forms-ext-haalcentraal-hr==0.3.0 # via -r requirements/extensions.in open-forms-ext-token-exchange==0.4.0 # via -r requirements/extensions.in -openapi-schema-validator==0.4.4 +openapi-schema-validator==0.6.2 # via # -r requirements/base.txt # openapi-spec-validator -openapi-spec-validator==0.5.5 +openapi-spec-validator==0.6.0 # via # -r requirements/base.txt # openapi3-parser -openapi3-parser==1.1.11 +openapi3-parser==1.1.16 # via # -c requirements/base.in # -r requirements/base.txt @@ -596,10 +603,6 @@ pypng==0.20220715.0 # via # -r requirements/base.txt # qrcode -pyrsistent==0.17.3 - # via - # -r requirements/base.txt - # jsonschema python-dateutil==2.8.2 # via # -r requirements/base.txt @@ -646,6 +649,13 @@ redis==4.5.4 # celery-once # django-redis # portalocker +referencing==0.30.2 + # via + # -c requirements/base.in + # -r requirements/base.txt + # jsonschema + # jsonschema-spec + # jsonschema-specifications requests==2.31.0 # via # -r requirements/base.txt @@ -679,6 +689,11 @@ rfc3339-validator==0.1.4 # via # -r requirements/base.txt # openapi-schema-validator +rpds-py==0.18.0 + # via + # -r requirements/base.txt + # jsonschema + # referencing ruamel-yaml==0.17.32 # via # -r requirements/base.txt diff --git a/src/openforms/registrations/contrib/objects_api/json_schema.py b/src/openforms/registrations/contrib/objects_api/json_schema.py new file mode 100644 index 0000000000..2cc04d51b5 --- /dev/null +++ b/src/openforms/registrations/contrib/objects_api/json_schema.py @@ -0,0 +1,151 @@ +from __future__ import annotations + +from dataclasses import dataclass, field, replace +from typing import Iterator, Literal, overload + +from jsonschema_specifications import REGISTRY +from referencing import Resource +from referencing.exceptions import Unresolvable +from referencing.jsonschema import ObjectSchema +from typing_extensions import Self + + +@dataclass +class InvalidReference: + """An unknown/invalid reference.""" + + uri: str + """The URI of the unknown reference.""" + + exc: Unresolvable + """The ``referencing`` caught exception.""" + + +@dataclass(eq=True) +class JsonSchemaPath: + """A representation of a location in a JSON document, as a list of string segments.""" + + segments: list[str] = field(default_factory=list) + """The segments of the JSON location.""" + + required: bool = False + """Whether this path is marked as ``required`` in the JSON Schema.""" + + def __truediv__(self, key: str) -> Self: + return replace(self, segments=self.segments + [key]) + + def startswith(self, other: JsonSchemaPath | list[str], /) -> bool: + """Return ``True`` if the path starts with the specified path, ``False`` otherwise.""" + + other_segments = other.segments if isinstance(other, JsonSchemaPath) else other + + return ( + len(other_segments) <= len(self.segments) + and self.segments[: len(other_segments)] == other_segments + ) + + +@overload +def iter_json_schema_paths( + json_schema: ObjectSchema, fail_fast: Literal[False] +) -> Iterator[tuple[JsonSchemaPath, ObjectSchema | InvalidReference]]: ... + + +@overload +def iter_json_schema_paths( + json_schema: ObjectSchema, fail_fast: Literal[True] = ... +) -> Iterator[tuple[JsonSchemaPath, ObjectSchema]]: ... + + +def iter_json_schema_paths( + json_schema: ObjectSchema, fail_fast: bool = True +) -> Iterator[tuple[JsonSchemaPath, ObjectSchema | InvalidReference]]: + """Recursively iterate over the JSON Schema paths, resolving references if required. + + Yields a two-tuple containing the current path (as a list of string segments) and the matching (sub) JSON Schema. + + Known to be unsupported: + - Composition (https://json-schema.org/understanding-json-schema/reference/combining) + """ + resource = Resource.from_contents(json_schema) + # Or referencing.jsonschema.EMPTY_REGISTRY? + resolver = REGISTRY.resolver_with_root(resource) + + parent_json_path = JsonSchemaPath() + + def _iter_json_schema( + json_schema: ObjectSchema, parent_json_path: JsonSchemaPath + ) -> Iterator[tuple[JsonSchemaPath, ObjectSchema | InvalidReference]]: + assert json_schema.get("type") == "object" + + yield parent_json_path, json_schema + + required = json_schema.get("required", []) + + k: str + for k, v in json_schema["properties"].items(): + json_path = parent_json_path / k + json_path.required = k in required + + match v: + case {"type": "object"}: + yield from _iter_json_schema(v, json_path) + case {"$ref": str(uri)}: + try: + resolved = resolver.lookup(uri) + except Unresolvable as exc: + if fail_fast: + raise + yield json_path, InvalidReference(uri, exc) + else: + yield from _iter_json_schema(resolved.contents, json_path) + case {}: + yield json_path, v + + yield from _iter_json_schema(json_schema, parent_json_path) + + +def get_missing_required_paths( + json_schema: ObjectSchema, paths: list[list[str]] +) -> list[list[str]]: + """Return a list of required path segments from the JSON Schema not covered by the provided paths. + + .. code-block:: pycon + + >>> json_schema = { + ... "type": "object", + ... "properties": { + ... "a": {...}, + ... "b": { + ... "type": "object", + ... "properties": { + ... "c": {...}, + ... "d": {...}, + ... }, + ... "required": ["c", "d"], + ... }, + ... "required": ["a", "b"], + ... } + >>> get_missing_required_paths(json_schema, [["a"], ["b", "c"]]) + [['b', 'd']] + """ + missing_paths: list[list[str]] = [] + + for r_path, _ in iter_json_schema_paths(json_schema): + if not r_path.required: + continue + + # If a child key is provided (e.g. "a.b"), any required parent key is dismissed (e.g. "a"). + if any(JsonSchemaPath(path).startswith(r_path) for path in paths): + continue + + # If a parent key is provided (e.g. "a"), any required child key is dismissed (e.g. "a.b"). + # This one assumes the provided value for "a" is valid with respect to the required children keys. + # The JSON Schema could specify "a" as an object with some required keys, but we are dealing with + # path segments, so we can't really make any assumptions on the provided value. + if any(r_path.startswith(path) for path in paths): + continue + + missing_paths.append(r_path.segments) + + return missing_paths diff --git a/src/openforms/registrations/contrib/objects_api/tests/test_json_schema.py b/src/openforms/registrations/contrib/objects_api/tests/test_json_schema.py new file mode 100644 index 0000000000..9fbf192227 --- /dev/null +++ b/src/openforms/registrations/contrib/objects_api/tests/test_json_schema.py @@ -0,0 +1,311 @@ +from django.test import SimpleTestCase + +from referencing.exceptions import Unresolvable + +from ..json_schema import get_missing_required_paths, iter_json_schema_paths + +JSON_SCHEMA_NO_REFS = { + "$id": "noise-complaint.schema", + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": False, + "properties": { + "complaintDescription": {"type": "string"}, + "measuredDecibels": {"type": "array", "items": {"type": "number"}}, + "complainant": { + "type": "object", + "properties": { + "first.name": {"type": "string"}, + "last.name": {"type": "string"}, + }, + }, + }, +} + +JSON_SCHEMA_REFS = { + "$id": "noise-complaint.schema", + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": False, + "properties": { + "complainant": {"$ref": "#/definitions/person"}, + "noisyAddress": {"$ref": "#/definitions/address"}, + }, + "definitions": { + "person": { + "type": "object", + "properties": { + "first.name": {"type": "string"}, + "last.name": {"type": "string"}, + }, + }, + "address": { + "type": "object", + "properties": { + "street": {"type": "string"}, + }, + }, + }, +} + + +JSON_SCHEMA_NESTED_REFS = { + "$id": "noise-complaint.schema", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Noise complaint example V2", + "type": "object", + "additionalProperties": False, + "properties": { + "complainant": {"$ref": "#/definitions/person"}, + }, + "definitions": { + "person": { + "type": "object", + "properties": { + "residence": {"$ref": "#/definitions/address"}, + }, + }, + "address": { + "type": "object", + "properties": { + "street": {"type": "string"}, + }, + }, + }, +} + +JSON_SCHEMA_UNKOWN_REF = { + "$id": "noise-complaint.schema", + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": False, + "properties": { + "invalid": {"$ref": "#/invalidref"}, + }, +} + +JSON_SCHEMA_EXTERNAL_REF = { + "$id": "noise-complaint.schema", + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "additionalProperties": False, + "properties": { + "external": {"$ref": "http://example.com/external-schema.json"}, + }, +} + +JSON_SCHEMA_REQUIRED_PATHS = { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "a": {"type": "string"}, + "b": { + "type": "object", + "properties": { + "c": {"type": "string"}, + "d": { + "type": "object", + "properties": { + "e": {"type": "string"}, + "f": {"type": "string"}, + }, + "required": ["e"], + }, + }, + "required": ["c", "d"], + }, + }, + "required": ["a", "b"], +} + + +class IterJsonSchemaTests(SimpleTestCase): + """Test cases to assert the JSON Schemas are correctly iterated over. + + The first path element being the root one, it is not included as it could + simply be tested as ``([], CURRENT_SCHEMA_TESTED)``. + """ + + def test_iter_json_schema_no_refs(self): + paths_list = [ + (path.segments, schema) + for path, schema in iter_json_schema_paths(JSON_SCHEMA_NO_REFS) + ] + + self.assertEqual( + paths_list[1:], + [ + (["complaintDescription"], {"type": "string"}), + (["measuredDecibels"], {"type": "array", "items": {"type": "number"}}), + ( + ["complainant"], + { + "properties": { + "first.name": {"type": "string"}, + "last.name": {"type": "string"}, + }, + "type": "object", + }, + ), + (["complainant", "first.name"], {"type": "string"}), + (["complainant", "last.name"], {"type": "string"}), + ], + ) + + def test_iter_json_schema_refs(self): + paths_list = [ + (path.segments, schema) + for path, schema in iter_json_schema_paths(JSON_SCHEMA_REFS) + ] + + self.assertEqual( + paths_list[1:], + [ + ( + ["complainant"], + { + "properties": { + "first.name": {"type": "string"}, + "last.name": {"type": "string"}, + }, + "type": "object", + }, + ), + (["complainant", "first.name"], {"type": "string"}), + (["complainant", "last.name"], {"type": "string"}), + ( + ["noisyAddress"], + { + "properties": { + "street": {"type": "string"}, + }, + "type": "object", + }, + ), + (["noisyAddress", "street"], {"type": "string"}), + ], + ) + + def test_iter_json_schema_nested_refs(self): + paths_list = [ + (path.segments, schema) + for path, schema in iter_json_schema_paths(JSON_SCHEMA_NESTED_REFS) + ] + + self.assertEqual( + paths_list[1:], + [ + ( + ["complainant"], + { + "properties": {"residence": {"$ref": "#/definitions/address"}}, + "type": "object", + }, + ), + ( + ["complainant", "residence"], + {"properties": {"street": {"type": "string"}}, "type": "object"}, + ), + (["complainant", "residence", "street"], {"type": "string"}), + ], + ) + + def test_iter_json_schema_unknown_ref(self): + with self.assertRaises(Unresolvable): + list(iter_json_schema_paths(JSON_SCHEMA_UNKOWN_REF, fail_fast=True)) + + paths_list = [ + (path.segments, schema) + for path, schema in iter_json_schema_paths( + JSON_SCHEMA_UNKOWN_REF, fail_fast=False + ) + ] + + self.assertEqual(paths_list[1][0], ["invalid"]) + self.assertEqual(paths_list[1][1].uri, "#/invalidref") + + def test_iter_json_schema_external_ref(self): + with self.assertRaises(Unresolvable): + list(iter_json_schema_paths(JSON_SCHEMA_EXTERNAL_REF, fail_fast=True)) + + paths_list = [ + (path.segments, schema) + for path, schema in iter_json_schema_paths( + JSON_SCHEMA_EXTERNAL_REF, fail_fast=False + ) + ] + + self.assertEqual(paths_list[1][0], ["external"]) + self.assertEqual( + paths_list[1][1].uri, "http://example.com/external-schema.json" + ) + + +class RequiredJsonSchemaPathsTests(SimpleTestCase): + """Test cases to assert required paths are correctly picked up when iterating over JSON Schemas + + (``IterJsonSchemaTests`` only made assertions on the returned paths). + """ + + def test_required_json_schema_paths(self): + required_paths = [ + path.segments + for path, _ in iter_json_schema_paths(JSON_SCHEMA_REQUIRED_PATHS) + if path.required + ] + + self.assertEqual( + required_paths, [["a"], ["b"], ["b", "c"], ["b", "d"], ["b", "d", "e"]] + ) + + +class MissingRequiredPathsTests(SimpleTestCase): + """Test cases to assert missing required paths are picked up.""" + + def test_no_missing_required_paths(self): + + with self.subTest("top level"): + missing_paths = get_missing_required_paths( + JSON_SCHEMA_REQUIRED_PATHS, [["a"], ["b"]] + ) + + self.assertEqual(missing_paths, []) + + with self.subTest("nested paths"): + missing_paths = get_missing_required_paths( + JSON_SCHEMA_REQUIRED_PATHS, [["a"], ["b", "c"], ["b", "d", "e"]] + ) + + self.assertEqual(missing_paths, []) + + def test_missing_required_paths(self): + + with self.subTest("Missing 'a'"): + missing_paths = get_missing_required_paths( + JSON_SCHEMA_REQUIRED_PATHS, [["b"]] + ) + + self.assertEqual(missing_paths, [["a"]]) + + with self.subTest("Missing 'b'"): + missing_paths = get_missing_required_paths( + JSON_SCHEMA_REQUIRED_PATHS, [["a"]] + ) + + self.assertEqual( + missing_paths, [["b"], ["b", "c"], ["b", "d"], ["b", "d", "e"]] + ) + + with self.subTest("Missing 'c'"): + missing_paths = get_missing_required_paths( + JSON_SCHEMA_REQUIRED_PATHS, [["a"], ["b", "d"]] + ) + + self.assertEqual(missing_paths, [["b", "c"]]) + + with self.subTest("Missing 'e'"): + missing_paths = get_missing_required_paths( + JSON_SCHEMA_REQUIRED_PATHS, [["a"], ["b", "c"], ["b", "d", "f"]] + ) + + self.assertEqual(missing_paths, [["b", "d", "e"]])