Skip to content

Commit

Permalink
Use the referencing library
Browse files Browse the repository at this point in the history
This is to avoid the `DeprecationWarning` about
*retrieving remote references being a security vulnerability*.

(We have only used this feature for referencing items in the local file
system, so the software should not have been compromised. Nevertheless
it is best to modify the code so we don't get this warning.)
  • Loading branch information
matthewrmshin committed Nov 29, 2023
1 parent 1acbc7f commit 5b5dcb2
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 31 deletions.
45 changes: 36 additions & 9 deletions src/yamlprocessor/dataprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,24 @@
from contextlib import suppress
from datetime import datetime
from errno import ENOENT
import json
import logging
import logging.config
import os
from pathlib import Path
import re
import sys
from urllib.parse import urlparse

from dateutil.parser import parse as datetimeparse
from dateutil.relativedelta import relativedelta
from dateutil.tz import tzlocal
import jmespath
import jsonschema
try:
from referencing import Registry, Resource
from referencing.jsonschema import DRAFT202012
except ImportError:
pass # python < 3.8
from ruamel.yaml import YAML
from ruamel.yaml.constructor import ConstructorError

Expand Down Expand Up @@ -700,22 +705,44 @@ def validate_data(
"""
if not schema_location:
return
schema = {"$ref": schema_location}
if not urlparse(schema_location).scheme:
schema_path = Path(schema_location)
if schema_path.exists():
schema = {"$ref": schema_path.absolute().as_uri()}
elif self.schema_prefix:
schema = {"$ref": self.schema_prefix + schema_location}
try:
jsonschema.validate(schema=schema, instance=data)
registry = Registry(retrieve=self.get_schema_file)
jsonschema.Draft202012Validator(
{
'$schema': 'https://json-schema.org/draft/2020-12/schema',
'$ref': schema_location,
},
registry=registry,
).validate(data)
except NameError:
ref = 'file://' + str(self._get_schema_file(schema_location))
jsonschema.validate(schema={'$ref': ref}, instance=data)
except jsonschema.exceptions.ValidationError as exc:
logging.error(f'not ok {out_file_name}')
logging.exception(exc)
raise
else:
logging.info(f'ok {out_file_name}')

def get_schema_file(self, schema_location: str):
schema_path = self._get_schema_file(schema_location)
return Resource(
contents=json.loads(schema_path.read_text()),
specification=DRAFT202012)

def _get_schema_file(self, schema_location: str):
if schema_location.startswith('file://'):
schema_location = schema_location.replace('file://', '', 1)
schema_path = Path(schema_location)
if not schema_path.exists():
for prefix in [self.schema_prefix]: # FIXME
if prefix.startswith('file://'):
prefix = prefix.replace('file://', '', 1)
if (Path(prefix) / schema_path).exists():
schema_path = Path(prefix) / schema_path
break
return schema_path


def main(argv=None):
configure_basic_logging()
Expand Down
1 change: 1 addition & 0 deletions src/yamlprocessor/tests/test_dataprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -556,6 +556,7 @@ def test_main_15(tmp_path, yaml):
def test_main_validate_1(tmp_path, capsys, yaml):
"""Test main, YAML with JSON schema validation."""
schema = {
'$schema': 'https://json-schema.org/draft/2020-12/schema',
'type': 'object',
'properties': {'hello': {'type': 'string'}},
'required': ['hello'],
Expand Down
67 changes: 45 additions & 22 deletions src/yamlprocessor/tests/test_schemaprocess.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
import json
from pathlib import Path

import jsonschema
try:
from referencing import Registry, Resource
from referencing.jsonschema import DRAFT202012
except ImportError:
pass # python < 3.8

from ..schemaprocess import INCLUDE_SCHEMA, INCLUDE_SCHEMA_FILENAME, main

Expand Down Expand Up @@ -75,17 +81,8 @@ def test_main_one(monkeypatch, tmp_path):
('schema-root.json', SAMPLE_MAIN_ONE_INC),
('schema-a.json', SAMPLE_TESTING),
):
try:
jsonschema.validate(
schema={
'$ref': (tmp_path / schema_filename).absolute().as_uri()
},
instance=sample_data,
)
except jsonschema.exceptions.ValidationError as exc:
assert False, f"{schema_filename} does not validate data:\n{exc}"
else:
assert True, f"{schema_filename} works OK with data"
assert_jsonschema_validate(
(tmp_path / schema_filename).absolute().as_uri(), sample_data)


def test_main_three(monkeypatch, tmp_path):
Expand Down Expand Up @@ -189,14 +186,40 @@ def test_main_three(monkeypatch, tmp_path):
('schema-b.json', SAMPLE_EXAMINING),
('schema-c.json', SAMPLE_EXAMINING_NAME_ONE),
):
try:
jsonschema.validate(
schema={
'$ref': (tmp_path / schema_filename).absolute().as_uri()
},
instance=sample_data,
)
except jsonschema.exceptions.ValidationError as exc:
assert False, f"{schema_filename} does not validate data:\n{exc}"
else:
assert True, f"{schema_filename} works OK with data"
assert_jsonschema_validate(
(tmp_path / schema_filename).absolute().as_uri(), sample_data)


def assert_jsonschema_validate(schema_uri, sample_data):
"""Helper to assert schema can be used to validate sample data."""
try:
registry = Registry(retrieve=get_schema_file)
jsonschema.Draft202012Validator(
{
'$schema': 'https://json-schema.org/draft/2020-12/schema',
'$ref': schema_uri
},
registry=registry,
).validate(sample_data)
except NameError:
ref = 'file://' + str(_get_schema_file(schema_uri))
jsonschema.validate(schema={'$ref': ref}, instance=sample_data)
except jsonschema.exceptions.ValidationError as exc:
assert False, f"{schema_uri} does not validate data:\n{exc}"
else:
assert True, f"{schema_uri} works OK with data"


def get_schema_file(schema_location: str):
"""Helper to retrieve a local schema file as Resource."""
schema_path = _get_schema_file(schema_location)
return Resource(
contents=json.loads(schema_path.read_text()),
specification=DRAFT202012)


def _get_schema_file(schema_location: str):
"""Helper to retrieve a local schema file."""
if schema_location.startswith('file://'):
schema_location = schema_location.replace('file://', '', 1)
return Path(schema_location)

0 comments on commit 5b5dcb2

Please sign in to comment.