diff --git a/CHANGELOG.md b/CHANGELOG.md index 2963961bdc..aa65cc1a18 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,19 @@ Versioned according to [Semantic Versioning](http://semver.org/). ## Unreleased +Fixed: + + * `ocrd_utils.generate_range`: `maxsplits` should be 1, not 2, #880 + * Typos in CHANGELOG, README and code comments, #890 + +Changed: + + * Consistenly use snake_case but continue to support CamelCase for kwargs and CLI options, #874, #862 + * Update to spec to 3.19.0, introducing greater flexibility in describing parameters, #872, #848, OCR-D/spec#206 + * `ocrd workspace merge`: support mapping `file_id` and `page_id` in addition to `file_grp`, #886, #888 + * `ocrd workspace merge`: rebase `OcrdFile.url` to target workspace, #887, #888 + * Replace `resource_filename` et al from pkg_resources with faster alternatives, #881, #882 + ## [2.35.0] - 2022-06-02 Changed: diff --git a/Makefile b/Makefile index 6da911a0f2..2d5ebda7e9 100644 --- a/Makefile +++ b/Makefile @@ -68,7 +68,7 @@ deps-test: # (Re)install the tool install: - $(PIP) install -U pip wheel + $(PIP) install -U pip wheel setuptools fastentrypoints for mod in $(BUILD_ORDER);do (cd $$mod ; $(PIP_INSTALL) .);done # Install with pip install -e diff --git a/ocrd/ocrd/cli/workspace.py b/ocrd/ocrd/cli/workspace.py index 84e2b5f733..92e0292ffd 100644 --- a/ocrd/ocrd/cli/workspace.py +++ b/ocrd/ocrd/cli/workspace.py @@ -67,10 +67,10 @@ def workspace_cli(ctx, directory, mets, mets_basename, backup): def workspace_validate(ctx, mets_url, download, skip, page_textequiv_consistency, page_coordinate_consistency): """ Validate a workspace - + METS_URL can be a URL, an absolute path or a path relative to $PWD. If not given, use --mets accordingly. - + Check that the METS and its referenced file contents abide by the OCR-D specifications. """ @@ -183,8 +183,8 @@ def workspace_add_file(ctx, file_grp, file_id, mimetype, page_id, ignore, check_ except KeyError: log.error("Cannot guess mimetype from extension '%s' for '%s'. Set --mimetype explicitly" % (Path(fname).suffix, fname)) - kwargs = {'fileGrp': file_grp, 'ID': file_id, 'mimetype': mimetype, 'pageId': page_id, 'force': force, 'ignore': ignore} - log.debug("Adding '%s' (%s)", fname, kwargs) + log.debug("Adding '%s'", fname) + local_filename = None if not (fname.startswith('http://') or fname.startswith('https://')): if not fname.startswith(ctx.directory): if not isabs(fname) and exists(join(ctx.directory, fname)): @@ -202,12 +202,11 @@ def workspace_add_file(ctx, file_grp, file_id, mimetype, page_id, ignore, check_ sys.exit(1) if fname.startswith(ctx.directory): fname = relpath(fname, ctx.directory) - kwargs['local_filename'] = fname + local_filename = fname - kwargs['url'] = fname if not page_id: log.warning("You did not provide '--page-id/-g', so the file you added is not linked to a specific page.") - workspace.mets.add_file(**kwargs) + workspace.add_file(file_grp, file_id=file_id, mimetype=mimetype, page_id=page_id, force=force, ignore=ignore, local_filename=local_filename, url=fname) workspace.save_mets() # ---------------------------------------------------------------------- @@ -306,7 +305,7 @@ def workspace_cli_bulk_add(ctx, regex, mimetype, page_id, file_id, url, file_grp file_id_ = file_id or safe_filename(str(file_path)) # set up file info - file_dict = {'url': url, 'mimetype': mimetype, 'ID': file_id_, 'pageId': page_id, 'fileGrp': file_grp} + file_dict = {'url': url, 'mimetype': mimetype, 'file_id': file_id_, 'page_id': page_id, 'file_grp': file_grp} # guess mime type if not file_dict['mimetype']: @@ -350,7 +349,7 @@ def workspace_cli_bulk_add(ctx, regex, mimetype, page_id, file_id, url, file_grp destpath.write_bytes(srcpath.read_bytes()) # Add to workspace (or not) - fileGrp = file_dict.pop('fileGrp') + fileGrp = file_dict.pop('file_grp') if dry_run: log.info('workspace.add_file(%s)' % file_dict) else: @@ -372,8 +371,11 @@ def workspace_cli_bulk_add(ctx, regex, mimetype, page_id, file_id, url, file_grp type=click.Choice([ 'url', 'mimetype', + 'page_id', 'pageId', + 'file_id', 'ID', + 'file_grp', 'fileGrp', 'basename', 'basename_without_extension', @@ -389,14 +391,16 @@ def workspace_find(ctx, file_grp, mimetype, page_id, file_id, output_field, down (If any ``FILTER`` starts with ``//``, then its remainder will be interpreted as a regular expression.) """ + snake_to_camel = {"file_id": "ID", "page_id": "pageId", "file_grp": "fileGrp"} + output_field = [snake_to_camel.get(x, x) for x in output_field] modified_mets = False ret = list() workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename) - for f in workspace.mets.find_files( - ID=file_id, - fileGrp=file_grp, + for f in workspace.find_files( + file_id=file_id, + file_grp=file_grp, mimetype=mimetype, - pageId=page_id, + page_id=page_id, ): if download and not f.local_filename: workspace.download_file(f) @@ -428,7 +432,7 @@ def workspace_find(ctx, file_grp, mimetype, page_id, file_id, output_field, down def workspace_remove_file(ctx, id, force, keep_file): # pylint: disable=redefined-builtin """ Delete files (given by their ID attribute ``ID``). - + (If any ``ID`` starts with ``//``, then its remainder will be interpreted as a regular expression.) """ @@ -467,7 +471,7 @@ def rename_group(ctx, old, new): def remove_group(ctx, group, recursive, force, keep_files): """ Delete fileGrps (given by their USE attribute ``GROUP``). - + (If any ``GROUP`` starts with ``//``, then its remainder will be interpreted as a regular expression.) """ @@ -495,11 +499,11 @@ def prune_files(ctx, file_grp, mimetype, page_id, file_id): """ workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename, automatic_backup=ctx.automatic_backup) with pushd_popd(workspace.directory): - for f in workspace.mets.find_files( - ID=file_id, - fileGrp=file_grp, + for f in workspace.find_files( + file_id=file_id, + file_grp=file_grp, mimetype=mimetype, - pageId=page_id, + page_id=page_id, ): try: if not f.local_filename or not exists(f.local_filename): @@ -573,16 +577,24 @@ def set_id(ctx, id): # pylint: disable=redefined-builtin # ocrd workspace merge # ---------------------------------------------------------------------- +def _handle_json_option(ctx, param, value): + return parse_json_string_or_file(value) if value else None + @workspace_cli.command('merge') @click.argument('METS_PATH') @click.option('--copy-files/--no-copy-files', is_flag=True, help="Copy files as well", default=True, show_default=True) -@click.option('--fileGrp-mapping', help="JSON object mapping src to dest fileGrp") +@click.option('--fileGrp-mapping', help="JSON object mapping src to dest fileGrp", callback=_handle_json_option) +@click.option('--fileId-mapping', help="JSON object mapping src to dest file ID", callback=_handle_json_option) +@click.option('--pageId-mapping', help="JSON object mapping src to dest page ID", callback=_handle_json_option) @mets_find_options @pass_workspace def merge(ctx, copy_files, filegrp_mapping, file_grp, file_id, page_id, mimetype, mets_path): # pylint: disable=redefined-builtin """ Merges this workspace with the workspace that contains ``METS_PATH`` + Pass a JSON string or file to ``--fileGrp-mapping``, ``--fileId-mapping`` or ``--pageId-mapping`` + in order to rename all fileGrp, file ID or page ID values, respectively. + The ``--file-id``, ``--page-id``, ``--mimetype`` and ``--file-grp`` options have the same semantics as in ``ocrd workspace find``, see ``ocrd workspace find --help`` for an explanation. @@ -596,9 +608,11 @@ def merge(ctx, copy_files, filegrp_mapping, file_grp, file_id, page_id, mimetype other_workspace, copy_files=copy_files, fileGrp_mapping=filegrp_mapping, - fileGrp=file_grp, - ID=file_id, - pageId=page_id, + fileId_mapping=fileid_mapping, + pageId_mapping=pageid_mapping, + file_grp=file_grp, + file_id=file_id, + page_id=page_id, mimetype=mimetype, ) workspace.save_mets() diff --git a/ocrd/ocrd/constants.py b/ocrd/ocrd/constants.py index 1d436a7fa9..2e9c17c649 100644 --- a/ocrd/ocrd/constants.py +++ b/ocrd/ocrd/constants.py @@ -1,7 +1,7 @@ """ Constants for ocrd. """ -from pkg_resources import resource_filename +from ocrd_utils.package_resources import resource_filename __all__ = [ 'TMP_PREFIX', diff --git a/ocrd/ocrd/processor/builtin/dummy_processor.py b/ocrd/ocrd/processor/builtin/dummy_processor.py index 9a1ad511e7..615652f926 100644 --- a/ocrd/ocrd/processor/builtin/dummy_processor.py +++ b/ocrd/ocrd/processor/builtin/dummy_processor.py @@ -1,6 +1,6 @@ # pylint: disable=missing-module-docstring,invalid-name from os.path import join, basename -from pkg_resources import resource_string +from ocrd_utils.package_resources import resource_string import click @@ -17,7 +17,7 @@ ) from ocrd_modelfactory import page_from_file -OCRD_TOOL = parse_json_string_with_comments(resource_string(__name__, 'dummy/ocrd-tool.json').decode('utf8')) +OCRD_TOOL = parse_json_string_with_comments(resource_string(__name__, 'ocrd-tool.json').decode('utf8')) class DummyProcessor(Processor): """ @@ -40,9 +40,9 @@ def process(self): if input_file.mimetype == MIMETYPE_PAGE: # Source file is PAGE-XML: Write out in-memory PcGtsType self.workspace.add_file( - ID=file_id, + file_id=file_id, file_grp=self.output_file_grp, - pageId=input_file.pageId, + page_id=input_file.pageId, mimetype=input_file.mimetype, local_filename=local_filename, content=to_xml(pcgts).encode('utf-8')) @@ -51,9 +51,9 @@ def process(self): with open(input_file.local_filename, 'rb') as f: content = f.read() self.workspace.add_file( - ID=file_id, + file_id=file_id, file_grp=self.output_file_grp, - pageId=input_file.pageId, + page_id=input_file.pageId, mimetype=input_file.mimetype, local_filename=local_filename, content=content) @@ -66,9 +66,9 @@ def process(self): LOG.info("Add PAGE-XML %s generated for %s at %s", page_file_id, file_id, page_filename) self.workspace.add_file( - ID=page_file_id, + file_id=page_file_id, file_grp=self.output_file_grp, - pageId=input_file.pageId, + page_id=input_file.pageId, mimetype=MIMETYPE_PAGE, local_filename=page_filename, content=to_xml(pcgts).encode('utf-8')) diff --git a/ocrd/ocrd/processor/builtin/dummy/ocrd-tool.json b/ocrd/ocrd/processor/builtin/ocrd-tool.json similarity index 100% rename from ocrd/ocrd/processor/builtin/dummy/ocrd-tool.json rename to ocrd/ocrd/processor/builtin/ocrd-tool.json diff --git a/ocrd/ocrd/workspace.py b/ocrd/ocrd/workspace.py index 4d4a957c75..66430d46cb 100644 --- a/ocrd/ocrd/workspace.py +++ b/ocrd/ocrd/workspace.py @@ -33,6 +33,8 @@ polygon_from_points, xywh_from_bbox, pushd_popd, + is_local_filename, + deprecated_alias, MIME_TO_EXT, MIME_TO_PIL, MIMETYPE_PAGE, @@ -93,6 +95,10 @@ def reload_mets(self): """ self.mets = OcrdMets(filename=self.mets_target) + @deprecated_alias(pageId="page_id") + @deprecated_alias(ID="file_id") + @deprecated_alias(fileGrp="file_grp") + @deprecated_alias(fileGrp_mapping="filegrp_mapping") def merge(self, other_workspace, copy_files=True, **kwargs): """ Merge ``other_workspace`` into this one @@ -103,7 +109,13 @@ def merge(self, other_workspace, copy_files=True, **kwargs): copy_files (boolean): Whether to copy files from `other_workspace` to this one """ def after_add_cb(f): + """callback to run on merged OcrdFile instances in the destination""" if not copy_files: + fpath_src = Path(other_workspace.directory).resolve() + fpath_dst = Path(self.directory).resolve() + dstprefix = fpath_src.relative_to(fpath_dst) # raises ValueError if not a subpath + if is_local_filename(f.url): + f.url = str(Path(dstprefix, f.url)) return fpath_src = Path(other_workspace.directory, f.url) fpath_dest = Path(self.directory, f.url) @@ -114,6 +126,15 @@ def after_add_cb(f): makedirs(str(fpath_dest.parent)) with open(str(fpath_src), 'rb') as fstream_in, open(str(fpath_dest), 'wb') as fstream_out: copyfileobj(fstream_in, fstream_out) + if 'page_id' in kwargs: + kwargs['pageId'] = kwargs.pop('page_id') + if 'file_id' in kwargs: + kwargs['ID'] = kwargs.pop('file_id') + if 'file_grp' in kwargs: + kwargs['fileGrp'] = kwargs.pop('file_grp') + if 'filegrp_mapping' in kwargs: + kwargs['fileGrp_mapping'] = kwargs.pop('filegrp_mapping') + self.mets.merge(other_workspace.mets, after_add_cb=after_add_cb, **kwargs) @@ -161,12 +182,12 @@ def download_file(self, f, _recursion_count=0): f.local_filename = f.url return f - def remove_file(self, ID, force=False, keep_file=False, page_recursive=False, page_same_group=False): + def remove_file(self, file_id, force=False, keep_file=False, page_recursive=False, page_same_group=False): """ Remove a METS `file` from the workspace. Arguments: - ID (string|:py:class:`ocrd_models.ocrd_file.OcrdFile`): `@ID` of the METS `file` + file_id (string|:py:class:`ocrd_models.ocrd_file.OcrdFile`): `@ID` of the METS `file` to delete or the file itself Keyword Args: force (boolean): Continue removing even if file not found in METS @@ -177,19 +198,19 @@ def remove_file(self, ID, force=False, keep_file=False, page_recursive=False, pa Has no effect unless ``page_recursive`` is `True`. """ log = getLogger('ocrd.workspace.remove_file') - log.debug('Deleting mets:file %s', ID) + log.debug('Deleting mets:file %s', file_id) if not force and self.overwrite_mode: force = True - if isinstance(ID, OcrdFile): - ID = ID.ID + if isinstance(file_id, OcrdFile): + file_id = file_id.ID try: try: - ocrd_file = next(self.mets.find_files(ID=ID)) + ocrd_file = next(self.mets.find_files(ID=file_id)) except StopIteration: - if ID.startswith(REGEX_PREFIX): + if file_id.startswith(REGEX_PREFIX): # allow empty results if filter criteria involve a regex return None - raise FileNotFoundError("File %s not found in METS" % ID) + raise FileNotFoundError("File %s not found in METS" % file_id) if page_recursive and ocrd_file.mimetype == MIMETYPE_PAGE: with pushd_popd(self.directory): ocrd_page = parse(self.download_file(ocrd_file).local_filename, silence=True) @@ -209,7 +230,7 @@ def remove_file(self, ID, force=False, keep_file=False, page_recursive=False, pa log.info("rm %s [cwd=%s]", ocrd_file.local_filename, self.directory) unlink(ocrd_file.local_filename) # Remove from METS only after the recursion of AlternativeImages - self.mets.remove_file(ID) + self.mets.remove_file(file_id) return ocrd_file except FileNotFoundError as e: if not force: @@ -326,6 +347,8 @@ def rename_file_group(self, old, new): if Path(old).is_dir() and not listdir(old): Path(old).rmdir() + @deprecated_alias(pageId="page_id") + @deprecated_alias(ID="file_id") def add_file(self, file_grp, content=None, **kwargs): """ Add a file to the :py:class:`ocrd_models.ocrd_mets.OcrdMets` of the workspace. @@ -345,15 +368,15 @@ def add_file(self, file_grp, content=None, **kwargs): file_grp, kwargs.get('local_filename'), content is not None) - if 'pageId' not in kwargs: - raise ValueError("workspace.add_file must be passed a 'pageId' kwarg, even if it is None.") - if content is not None and 'local_filename' not in kwargs: + if 'page_id' not in kwargs: + raise ValueError("workspace.add_file must be passed a 'page_id' kwarg, even if it is None.") + if content is not None and not kwargs.get('local_filename'): raise Exception("'content' was set but no 'local_filename'") if self.overwrite_mode: kwargs['force'] = True with pushd_popd(self.directory): - if 'local_filename' in kwargs: + if kwargs.get('local_filename'): # If the local filename has folder components, create those folders local_filename_dir = kwargs['local_filename'].rsplit('/', 1)[0] if local_filename_dir != kwargs['local_filename'] and not Path(local_filename_dir).is_dir(): @@ -362,6 +385,10 @@ def add_file(self, file_grp, content=None, **kwargs): kwargs['url'] = kwargs['local_filename'] # print(kwargs) + kwargs["pageId"] = kwargs.pop("page_id") + if "file_id" in kwargs: + kwargs["ID"] = kwargs.pop("file_id") + ret = self.mets.add_file(file_grp, **kwargs) if content is not None: @@ -1000,8 +1027,8 @@ def save_image_file(self, image, file_path = str(Path(file_grp, '%s%s' % (file_id, MIME_TO_EXT[mimetype]))) out = self.add_file( file_grp, - ID=file_id, - pageId=page_id, + file_id=file_id, + page_id=page_id, local_filename=file_path, mimetype=mimetype, content=image_bytes.getvalue(), @@ -1010,6 +1037,28 @@ def save_image_file(self, image, file_id, file_grp, out.local_filename) return file_path + def find_files(self, *args, **kwargs): + """ + Search ``mets:file`` entries in wrapped METS document and yield results. + + Delegator to :py:func:`ocrd_models.ocrd_mets.OcrdMets.find_files` + + Keyword Args: + **kwargs: See :py:func:`ocrd_models.ocrd_mets.OcrdMets.find_files` + Returns: + Generator which yields :py:class:`ocrd_models:ocrd_file:OcrdFile` instantiations + """ + log = getLogger('ocrd.workspace.find_files') + log.debug('find files in mets. kwargs=%s' % kwargs) + if "page_id" in kwargs: + kwargs["pageId"] = kwargs.pop("page_id") + if "file_id" in kwargs: + kwargs["ID"] = kwargs.pop("file_id") + if "file_grp" in kwargs: + kwargs["fileGrp"] = kwargs.pop("file_grp") + with pushd_popd(self.directory): + return self.mets.find_files(*args, **kwargs) + def _crop(log, name, segment, parent_image, parent_coords, op='cropped', **kwargs): segment_coords = parent_coords.copy() # get polygon outline of segment relative to parent image: diff --git a/ocrd/ocrd/workspace_bagger.py b/ocrd/ocrd/workspace_bagger.py index 28ae155b9a..ac215fa7e7 100644 --- a/ocrd/ocrd/workspace_bagger.py +++ b/ocrd/ocrd/workspace_bagger.py @@ -6,8 +6,6 @@ import re import tempfile import sys - -from pkg_resources import get_distribution from bagit import Bag, make_manifests # pylint: disable=no-name-in-module from ocrd_utils import ( @@ -22,6 +20,7 @@ from ocrd_validators.constants import BAGIT_TXT, TMP_BAGIT_PREFIX, OCRD_BAGIT_PROFILE_URL from ocrd_modelfactory import page_from_file from ocrd_models.ocrd_page import to_xml +from ocrd_utils.package_resources import get_distribution from .workspace import Workspace diff --git a/ocrd/requirements.txt b/ocrd/requirements.txt index 2da0163b74..ca62ed9370 100644 --- a/ocrd/requirements.txt +++ b/ocrd/requirements.txt @@ -7,4 +7,4 @@ opencv-python-headless Flask jsonschema pyyaml -Deprecated == 1.2.0 +Deprecated == 1.2.0 \ No newline at end of file diff --git a/ocrd/setup.py b/ocrd/setup.py index 0c8c0fa2ae..0269893e28 100644 --- a/ocrd/setup.py +++ b/ocrd/setup.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +import fastentrypoints from setuptools import setup, find_packages from ocrd_utils import VERSION diff --git a/ocrd_models/ocrd_models/constants.py b/ocrd_models/ocrd_models/constants.py index 6c8b0e1017..b3fe89a4c9 100644 --- a/ocrd_models/ocrd_models/constants.py +++ b/ocrd_models/ocrd_models/constants.py @@ -1,7 +1,7 @@ """ Constants for ocrd_models. """ -from pkg_resources import resource_string +from ocrd_utils.package_resources import resource_string import re __all__ = [ diff --git a/ocrd_models/ocrd_models/ocrd_mets.py b/ocrd_models/ocrd_models/ocrd_mets.py index 8161684c58..0e4a3e2dda 100644 --- a/ocrd_models/ocrd_models/ocrd_mets.py +++ b/ocrd_models/ocrd_models/ocrd_mets.py @@ -167,7 +167,7 @@ def find_files(self, ID=None, fileGrp=None, pageId=None, mimetype=None, url=None pageIds_expanded = [] for pageId_ in pageIds: if '..' in pageId_: - pageIds_expanded += generate_range(*pageId_.split('..', 2)) + pageIds_expanded += generate_range(*pageId_.split('..', 1)) pageIds += pageIds_expanded for page in self._tree.getroot().xpath( '//mets:div[@TYPE="page"]', namespaces=NS): @@ -485,7 +485,7 @@ def remove_physical_page_fptr(self, fileId): mets_div.remove(mets_fptr) return ret - def merge(self, other_mets, fileGrp_mapping=None, after_add_cb=None, **kwargs): + def merge(self, other_mets, fileGrp_mapping=None, fileId_mapping=None, pageId_mapping=None, after_add_cb=None, **kwargs): """ Add all files from other_mets. @@ -493,16 +493,24 @@ def merge(self, other_mets, fileGrp_mapping=None, after_add_cb=None, **kwargs): Keyword Args: fileGrp_mapping (dict): Map :py:attr:`other_mets` fileGrp to fileGrp in this METS + fileId_mapping (dict): Map :py:attr:`other_mets` file ID to file ID in this METS + pageId_mapping (dict): Map :py:attr:`other_mets` page ID to page ID in this METS after_add_cb (function): Callback received after file is added to the METS """ if not fileGrp_mapping: fileGrp_mapping = {} + if not fileId_mapping: + fileId_mapping = {} + if not pageId_mapping: + pageId_mapping = {} for f_src in other_mets.find_files(**kwargs): f_dest = self.add_file( fileGrp_mapping.get(f_src.fileGrp, f_src.fileGrp), mimetype=f_src.mimetype, url=f_src.url, - ID=f_src.ID, - pageId=f_src.pageId) + ID=fileId_mapping.get(f_src.ID, f_src.ID), + pageId=pageId_mapping.get(f_src.pageId, f_src.pageId)) + # FIXME: merge metsHdr, amdSec, dmdSec as well + # FIXME: merge structMap logical and structLink as well if after_add_cb: after_add_cb(f_dest) diff --git a/ocrd_utils/ocrd_utils/constants.py b/ocrd_utils/ocrd_utils/constants.py index 121e5df612..1164025081 100644 --- a/ocrd_utils/ocrd_utils/constants.py +++ b/ocrd_utils/ocrd_utils/constants.py @@ -1,11 +1,12 @@ """ Constants for ocrd_utils. """ -from pkg_resources import get_distribution from re import compile as regex_compile from os import environ from os.path import join, expanduser +from ocrd_utils.package_resources import get_distribution + __all__ = [ 'EXT_TO_MIME', 'LOG_FORMAT', diff --git a/ocrd_utils/ocrd_utils/package_resources.py b/ocrd_utils/ocrd_utils/package_resources.py new file mode 100644 index 0000000000..ee01d046f4 --- /dev/null +++ b/ocrd_utils/ocrd_utils/package_resources.py @@ -0,0 +1,50 @@ +import atexit +from contextlib import ExitStack +from pathlib import Path + +try: + from importlib.resources import path, read_binary +except ImportError: + from importlib_resources import path, read_binary # type: ignore + +try: + from importlib.metadata import distribution as get_distribution +except ImportError: + from importlib_metadata import distribution as get_distribution + +# See https://importlib-resources.readthedocs.io/en/latest/migration.html#pkg-resources-resource-filename +_file_manager = ExitStack() +atexit.register(_file_manager.close) + + +def resource_filename(package: str, resource: str) -> Path: + """ + Reimplementation of the function with the same name from pkg_resources + + Using importlib for better performance + + package : str + The package from where to start looking for resource (often __name__) + resource : str + The resource to look up + """ + parent_package = package.rsplit('.',1)[0] + return _file_manager.enter_context(path(parent_package, resource)) + + +def resource_string(package: str, resource: str) -> bytes: + """ + Reimplementation of the function with the same name from pkg_resources + + Using importlib for better performance + + package : str + The package from where to start looking for resource (often __name__) + resource : str + The resource to look up + """ + parent_package = package.rsplit('.',1)[0] + return read_binary(parent_package, resource) + + +__all__ = ['resource_filename', 'resource_string', 'get_distribution'] diff --git a/ocrd_utils/requirements.txt b/ocrd_utils/requirements.txt index 300ed90949..de4e7adee3 100644 --- a/ocrd_utils/requirements.txt +++ b/ocrd_utils/requirements.txt @@ -3,3 +3,5 @@ Pillow >= 7.2.0 # tensorflow versions might require different versions numpy atomicwrites >= 1.3.0 +importlib_metadata;python_version<'3.8' +importlib_resources;python_version<'3.8' diff --git a/ocrd_validators/ocrd_validators/constants.py b/ocrd_validators/ocrd_validators/constants.py index 25d2e0e53b..5497102f25 100644 --- a/ocrd_validators/ocrd_validators/constants.py +++ b/ocrd_validators/ocrd_validators/constants.py @@ -2,7 +2,7 @@ Constants for ocrd_validators. """ import yaml -from pkg_resources import resource_string, resource_filename +from ocrd_utils.package_resources import resource_string, resource_filename __all__ = [ 'OCRD_TOOL_SCHEMA', diff --git a/ocrd_validators/ocrd_validators/json_validator.py b/ocrd_validators/ocrd_validators/json_validator.py index 57a0a9a37c..c920fc7c2d 100644 --- a/ocrd_validators/ocrd_validators/json_validator.py +++ b/ocrd_validators/ocrd_validators/json_validator.py @@ -3,7 +3,7 @@ """ import json -from jsonschema import Draft4Validator, validators # pylint: disable=import-error +from jsonschema import Draft6Validator, validators # pylint: disable=import-error from ocrd_models import ValidationReport @@ -28,7 +28,7 @@ def set_defaults(validator, properties, instance, schema): return validators.extend(validator_class, {"properties": set_defaults}) -DefaultValidatingDraft4Validator = extend_with_default(Draft4Validator) +DefaultValidatingDraft6Validator = extend_with_default(Draft6Validator) # # ------------------------------------------------- @@ -52,13 +52,13 @@ def validate(obj, schema): obj = json.loads(obj) return JsonValidator(schema)._validate(obj) # pylint: disable=protected-access - def __init__(self, schema, validator_class=Draft4Validator): + def __init__(self, schema, validator_class=Draft6Validator): """ Construct a JsonValidator. Args: schema (dict): - validator_class (Draft4Validator|DefaultValidatingDraft4Validator): + validator_class (Draft6Validator|DefaultValidatingDraft6Validator): """ self.validator = validator_class(schema) diff --git a/ocrd_validators/ocrd_validators/parameter_validator.py b/ocrd_validators/ocrd_validators/parameter_validator.py index 91cb01fbb4..20dd6ff2b7 100644 --- a/ocrd_validators/ocrd_validators/parameter_validator.py +++ b/ocrd_validators/ocrd_validators/parameter_validator.py @@ -1,7 +1,7 @@ """ Validate parameters against ocrd-tool.json. """ -from .json_validator import JsonValidator, DefaultValidatingDraft4Validator +from .json_validator import JsonValidator, DefaultValidatingDraft6Validator # # ------------------------------------------------- @@ -45,4 +45,4 @@ def __init__(self, ocrd_tool): "required": required, "additionalProperties": False, "properties": p - }, DefaultValidatingDraft4Validator) + }, DefaultValidatingDraft6Validator) diff --git a/ocrd_validators/ocrd_validators/resource_list_validator.py b/ocrd_validators/ocrd_validators/resource_list_validator.py index ab1b53a2f6..72a11c34de 100644 --- a/ocrd_validators/ocrd_validators/resource_list_validator.py +++ b/ocrd_validators/ocrd_validators/resource_list_validator.py @@ -4,7 +4,7 @@ See `specs `_. """ from .constants import RESOURCE_LIST_SCHEMA -from .json_validator import JsonValidator, DefaultValidatingDraft4Validator +from .json_validator import JsonValidator, DefaultValidatingDraft6Validator # # ------------------------------------------------- @@ -20,5 +20,5 @@ def validate(obj, schema=RESOURCE_LIST_SCHEMA): """ Validate against ``resource_list.schema.yml`` schema. """ - return JsonValidator(schema, validator_class=DefaultValidatingDraft4Validator)._validate(obj) + return JsonValidator(schema, validator_class=DefaultValidatingDraft6Validator)._validate(obj) diff --git a/tests/cli/test_workspace.py b/tests/cli/test_workspace.py index 1a7462040d..807e07b722 100644 --- a/tests/cli/test_workspace.py +++ b/tests/cli/test_workspace.py @@ -51,9 +51,9 @@ def test_add(self): ws_api = self.resolver.workspace_from_nothing(directory=tempdir) ws_api.add_file( file_grp, - ID=ID, + file_id=ID, content=content, - pageId=page_id, + page_id=page_id, mimetype=mimetype, local_filename=local_filename ) @@ -255,7 +255,6 @@ def test_add_existing_checked(self): f = ws.mets.find_all_files()[0] self.assertEqual(f.url, 'test.tif') - def test_find_all_files(self): with TemporaryDirectory() as tempdir: wsdir = join(tempdir, 'ws') @@ -265,6 +264,18 @@ def test_find_all_files(self): self.assertEqual(result.output, 'OCR-D-IMG-BIN\nOCR-D-IMG-BIN\n') self.assertEqual(result.exit_code, 0) + def test_find_all_files_outputfield(self): + with TemporaryDirectory() as tempdir: + wsdir = join(tempdir, 'ws') + copytree(assets.path_to('SBB0000F29300010000/data'), wsdir) + with pushd_popd(wsdir): + result = self.runner.invoke(workspace_cli, + ['find', '-G', 'OCR-D-IMG-BIN', '-k', + 'file_grp', '-k', 'file_id', '-k', 'page_id']) + self.assertEqual(result.exit_code, 0) + self.assertEqual(result.output, 'OCR-D-IMG-BIN\tFILE_0001_IMAGE_BIN\tPHYS_0001\n' + 'OCR-D-IMG-BIN\tFILE_0002_IMAGE_BIN\tPHYS_0002\n') + def test_prune_files(self): with TemporaryDirectory() as tempdir: copytree(assets.path_to('SBB0000F29300010000/data'), join(tempdir, 'ws')) @@ -452,7 +463,7 @@ def test_bulk_add0(self): def test_bulk_add_missing_param(self): with pushd_popd(tempdir=True) as wsdir: ws = self.resolver.workspace_from_nothing(directory=wsdir) - with pytest.raises(ValueError, match=r"OcrdFile attribute 'pageId' unset"): + with pytest.raises(ValueError, match=r"OcrdFile attribute 'page_id' unset"): _, out, err = self.invoke_cli(workspace_cli, [ 'bulk-add', '-r', r'(?P.*) (?P.*) (?P.*) (?P.*) (?P.*) (?P.*)', diff --git a/tests/processor/test_processor.py b/tests/processor/test_processor.py index 726f48681f..57e09eec20 100644 --- a/tests/processor/test_processor.py +++ b/tests/processor/test_processor.py @@ -105,10 +105,10 @@ def test_zip_input_files(self): class ZipTestProcessor(Processor): pass with pushd_popd(tempdir=True) as tempdir: ws = self.resolver.workspace_from_nothing(directory=tempdir) - ws.add_file('GRP1', mimetype=MIMETYPE_PAGE, ID='foobar1', pageId='phys_0001') - ws.add_file('GRP2', mimetype='application/alto+xml', ID='foobar2', pageId='phys_0001') - ws.add_file('GRP1', mimetype=MIMETYPE_PAGE, ID='foobar3', pageId='phys_0002') - ws.add_file('GRP2', mimetype=MIMETYPE_PAGE, ID='foobar4', pageId='phys_0002') + ws.add_file('GRP1', mimetype=MIMETYPE_PAGE, file_id='foobar1', page_id='phys_0001') + ws.add_file('GRP2', mimetype='application/alto+xml', file_id='foobar2', page_id='phys_0001') + ws.add_file('GRP1', mimetype=MIMETYPE_PAGE, file_id='foobar3', page_id='phys_0002') + ws.add_file('GRP2', mimetype=MIMETYPE_PAGE, file_id='foobar4', page_id='phys_0002') for page_id in [None, 'phys_0001,phys_0002']: with self.subTest(page_id=page_id): proc = ZipTestProcessor(workspace=ws, input_file_grp='GRP1,GRP2', page_id=page_id) @@ -125,12 +125,12 @@ def test_zip_input_files_multi_mixed(self): class ZipTestProcessor(Processor): pass with pushd_popd(tempdir=True) as tempdir: ws = self.resolver.workspace_from_nothing(directory=tempdir) - ws.add_file('GRP1', mimetype=MIMETYPE_PAGE, ID='foobar1', pageId='phys_0001') - ws.add_file('GRP1', mimetype='image/png', ID='foobar1img1', pageId='phys_0001') - ws.add_file('GRP1', mimetype='image/png', ID='foobar1img2', pageId='phys_0001') - ws.add_file('GRP2', mimetype=MIMETYPE_PAGE, ID='foobar2', pageId='phys_0001') - ws.add_file('GRP1', mimetype=MIMETYPE_PAGE, ID='foobar3', pageId='phys_0002') - ws.add_file('GRP2', mimetype='image/tiff', ID='foobar4', pageId='phys_0002') + ws.add_file('GRP1', mimetype=MIMETYPE_PAGE, file_id='foobar1', page_id='phys_0001') + ws.add_file('GRP1', mimetype='image/png', file_id='foobar1img1', page_id='phys_0001') + ws.add_file('GRP1', mimetype='image/png', file_id='foobar1img2', page_id='phys_0001') + ws.add_file('GRP2', mimetype=MIMETYPE_PAGE, file_id='foobar2', page_id='phys_0001') + ws.add_file('GRP1', mimetype=MIMETYPE_PAGE, file_id='foobar3', page_id='phys_0002') + ws.add_file('GRP2', mimetype='image/tiff', file_id='foobar4', page_id='phys_0002') for page_id in [None, 'phys_0001,phys_0002']: with self.subTest(page_id=page_id): proc = ZipTestProcessor(workspace=ws, input_file_grp='GRP1,GRP2', page_id=page_id) @@ -141,7 +141,7 @@ class ZipTestProcessor(Processor): pass print("PAGE-filtered") tuples = [(one.ID, two) for one, two in proc.zip_input_files(mimetype=MIMETYPE_PAGE)] assert ('foobar3', None) in tuples - ws.add_file('GRP2', mimetype='image/tiff', ID='foobar4dup', pageId='phys_0002') + ws.add_file('GRP2', mimetype='image/tiff', file_id='foobar4dup', page_id='phys_0002') for page_id in [None, 'phys_0001,phys_0002']: with self.subTest(page_id=page_id): proc = ZipTestProcessor(workspace=ws, input_file_grp='GRP1,GRP2', page_id=page_id) @@ -152,7 +152,7 @@ class ZipTestProcessor(Processor): pass assert ('foobar3', None) in tuples with self.assertRaisesRegex(Exception, "No PAGE-XML for page .* in fileGrp .* but multiple matches."): tuples = proc.zip_input_files(on_error='abort') - ws.add_file('GRP2', mimetype=MIMETYPE_PAGE, ID='foobar2dup', pageId='phys_0001') + ws.add_file('GRP2', mimetype=MIMETYPE_PAGE, file_id='foobar2dup', page_id='phys_0001') for page_id in [None, 'phys_0001,phys_0002']: with self.subTest(page_id=page_id): proc = ZipTestProcessor(workspace=ws, input_file_grp='GRP1,GRP2', page_id=page_id) @@ -164,8 +164,8 @@ class ZipTestProcessor(Processor): pass self.capture_out_err() with pushd_popd(tempdir=True) as tempdir: ws = self.resolver.workspace_from_nothing(directory=tempdir) - ws.add_file('GRP1', mimetype=MIMETYPE_PAGE, ID='foobar1', pageId=None) - ws.add_file('GRP2', mimetype=MIMETYPE_PAGE, ID='foobar2', pageId='phys_0001') + ws.add_file('GRP1', mimetype=MIMETYPE_PAGE, file_id='foobar1', page_id=None) + ws.add_file('GRP2', mimetype=MIMETYPE_PAGE, file_id='foobar2', page_id='phys_0001') for page_id in [None, 'phys_0001,phys_0002']: with self.subTest(page_id=page_id): proc = ZipTestProcessor(workspace=ws, input_file_grp='GRP1,GRP2', page_id=page_id) diff --git a/tests/test_decorators.py b/tests/test_decorators.py index c1debf5bce..ae2cec57ca 100644 --- a/tests/test_decorators.py +++ b/tests/test_decorators.py @@ -119,10 +119,10 @@ def _sample_ws_for_overwrite(self): resolver = Resolver() with TemporaryDirectory() as tempdir: ws = resolver.workspace_from_nothing(directory=tempdir) - ws.add_file('IN-GRP', pageId='pID1', ID='fID1', mimetype='image/tiff', content='CONTENT', local_filename=join(tempdir, 'ID1.tif')) - ws.add_file('OUT-GRP', pageId='pID2', ID='fID2', mimetype='image/tiff', content='CONTENT', local_filename=join(tempdir, 'ID2.tif')) - ws.add_file('OUT-GRP', pageId='pID3', ID='fID3', mimetype='image/tiff', content='CONTENT', local_filename=join(tempdir, 'ID3.tif')) - ws.add_file('OUT-GRP', pageId='pID4', ID='fID4', mimetype='image/tiff', content='CONTENT', local_filename=join(tempdir, 'ID4.tif')) + ws.add_file('IN-GRP', page_id='pID1', file_id='fID1', mimetype='image/tiff', content='CONTENT', local_filename=join(tempdir, 'ID1.tif')) + ws.add_file('OUT-GRP', page_id='pID2', file_id='fID2', mimetype='image/tiff', content='CONTENT', local_filename=join(tempdir, 'ID2.tif')) + ws.add_file('OUT-GRP', page_id='pID3', file_id='fID3', mimetype='image/tiff', content='CONTENT', local_filename=join(tempdir, 'ID3.tif')) + ws.add_file('OUT-GRP', page_id='pID4', file_id='fID4', mimetype='image/tiff', content='CONTENT', local_filename=join(tempdir, 'ID4.tif')) ws.save_mets() yield ws diff --git a/tests/test_task_sequence.py b/tests/test_task_sequence.py index e33da7c5f7..23300712bf 100644 --- a/tests/test_task_sequence.py +++ b/tests/test_task_sequence.py @@ -138,7 +138,7 @@ def test_task_run(self): with copy_of_directory(assets.path_to('kant_aufklaerung_1784/data')) as wsdir: with pushd_popd(wsdir): ws = resolver.workspace_from_url('mets.xml') - ws.add_file('GRP0', content='', local_filename='GRP0/foo', ID='file0', mimetype=MIMETYPE_PAGE, pageId=None) + ws.add_file('GRP0', content='', local_filename='GRP0/foo', file_id='file0', mimetype=MIMETYPE_PAGE, page_id=None) ws.save_mets() files_before = len(ws.mets.find_all_files()) run_tasks('mets.xml', 'DEBUG', None, [ diff --git a/tests/test_workspace.py b/tests/test_workspace.py index de2c0ef833..1b9841a82e 100644 --- a/tests/test_workspace.py +++ b/tests/test_workspace.py @@ -63,10 +63,10 @@ def test_workspace_add_file(plain_workspace): # act plain_workspace.add_file( 'GRP', - ID='ID1', + file_id='ID1', mimetype='image/tiff', content='CONTENT', - pageId=None, + page_id=None, local_filename=fpath ) f = plain_workspace.mets.find_all_files()[0] @@ -80,7 +80,7 @@ def test_workspace_add_file(plain_workspace): def test_workspace_add_file_basename_no_content(plain_workspace): - plain_workspace.add_file('GRP', ID='ID1', mimetype='image/tiff', pageId=None) + plain_workspace.add_file('GRP', file_id='ID1', mimetype='image/tiff', page_id=None) f = next(plain_workspace.mets.find_files()) # assert @@ -89,7 +89,7 @@ def test_workspace_add_file_basename_no_content(plain_workspace): def test_workspace_add_file_binary_content(plain_workspace): fpath = join(plain_workspace.directory, 'subdir', 'ID1.tif') - plain_workspace.add_file('GRP', ID='ID1', content=b'CONTENT', local_filename=fpath, url='http://foo/bar', pageId=None) + plain_workspace.add_file('GRP', file_id='ID1', content=b'CONTENT', local_filename=fpath, url='http://foo/bar', page_id=None) # assert assert exists(fpath) @@ -98,7 +98,7 @@ def test_workspace_add_file_binary_content(plain_workspace): def test_workspacec_add_file_content_wo_local_filename(plain_workspace): # act with pytest.raises(Exception) as fn_exc: - plain_workspace.add_file('GRP', ID='ID1', content=b'CONTENT', pageId='foo1234') + plain_workspace.add_file('GRP', file_id='ID1', content=b'CONTENT', page_id='foo1234') assert "'content' was set but no 'local_filename'" in str(fn_exc.value) @@ -106,9 +106,9 @@ def test_workspacec_add_file_content_wo_local_filename(plain_workspace): def test_workspacec_add_file_content_wo_pageid(plain_workspace): # act with pytest.raises(ValueError) as val_err: - plain_workspace.add_file('GRP', ID='ID1', content=b'CONTENT', local_filename='foo') + plain_workspace.add_file('GRP', file_id='ID1', content=b'CONTENT', local_filename='foo') - assert "workspace.add_file must be passed a 'pageId' kwarg, even if it is None." in str(val_err.value) + assert "workspace.add_file must be passed a 'page_id' kwarg, even if it is None." in str(val_err.value) def test_workspace_str(plain_workspace): @@ -260,7 +260,7 @@ def test_remove_file_force(sbb_data_workspace): def test_remove_file_remote_not_available_raises_exception(plain_workspace): - plain_workspace.add_file('IMG', ID='page1_img', mimetype='image/tiff', url='http://remote', pageId=None) + plain_workspace.add_file('IMG', file_id='page1_img', mimetype='image/tiff', url='http://remote', page_id=None) with pytest.raises(Exception) as not_avail_exc: plain_workspace.remove_file('page1_img') @@ -270,7 +270,7 @@ def test_remove_file_remote_not_available_raises_exception(plain_workspace): def test_remove_file_remote(plain_workspace): # act - plain_workspace.add_file('IMG', ID='page1_img', mimetype='image/tiff', url='http://remote', pageId=None) + plain_workspace.add_file('IMG', file_id='page1_img', mimetype='image/tiff', url='http://remote', page_id=None) # must succeed because removal is enforced assert plain_workspace.remove_file('page1_img', force=True) @@ -342,7 +342,7 @@ def test_remove_file_group_flat(plain_workspace): """ # act - added_res = plain_workspace.add_file('FOO', ID='foo', mimetype='foo/bar', local_filename='file.ext', content='foo', pageId=None).url + added_res = plain_workspace.add_file('FOO', file_id='foo', mimetype='foo/bar', local_filename='file.ext', content='foo', page_id=None).url # requires additional prepending of current path because not pushd_popd-magic at work added_path = Path(join(plain_workspace.directory, added_res)) @@ -382,8 +382,8 @@ def test_download_to_directory_from_workspace_download_file(plain_workspace): """ https://github.com/OCR-D/core/issues/342 """ - f1 = plain_workspace.add_file('IMG', ID='page1_img', mimetype='image/tiff', local_filename='test.tif', content='', pageId=None) - f2 = plain_workspace.add_file('GT', ID='page1_gt', mimetype='text/xml', local_filename='test.xml', content='', pageId=None) + f1 = plain_workspace.add_file('IMG', file_id='page1_img', mimetype='image/tiff', local_filename='test.tif', content='', page_id=None) + f2 = plain_workspace.add_file('GT', file_id='page1_gt', mimetype='text/xml', local_filename='test.xml', content='', page_id=None) assert f1.url == 'test.tif' assert f2.url == 'test.xml' @@ -577,7 +577,7 @@ def test_downsample_16bit_image(plain_workspace): tif_out.write(gzip_in.read()) # act - plain_workspace.add_file('IMG', ID='foo', url=img_path, mimetype='image/tiff', pageId=None) + plain_workspace.add_file('IMG', file_id='foo', url=img_path, mimetype='image/tiff', page_id=None) # assert pil_before = Image.open(img_path) @@ -620,6 +620,65 @@ def test_merge(tmp_path): assert len(ws1.mets.find_all_files()) == 41 assert exists(join(dst_path1, 'OCR-D-IMG/FILE_0001_IMAGE.tif')) +def test_merge_no_copy_files(tmp_path): + + # arrange + dst_path1 = tmp_path / 'ws1' + dst_path1.mkdir() + dst_path2 = dst_path1 / 'ws2' + dst_path2.mkdir() + + ws1 = Resolver().workspace_from_nothing(directory=dst_path1) + ws2 = Resolver().workspace_from_nothing(directory=dst_path2) + + ws2.add_file('GRP2', pageId='p01', mimetype='text/plain', ID='f1', local_filename='GRP2/f1', content='ws2') + + ws1.merge(ws2, copy_files=False, fileId_mapping={'f1': 'f1_copy_files'}) + assert next(ws1.mets.find_files(ID='f1_copy_files')).url == 'ws2/GRP2/f1' + ws1.merge(ws2, copy_files=True, fileId_mapping={'f1': 'f1_no_copy_files'}) + assert next(ws1.mets.find_files(ID='f1_no_copy_files')).url == 'GRP2/f1' + +def test_merge_overwrite(tmp_path): + # arrange + dst_path1 = tmp_path / 'ws1' + dst_path1.mkdir() + dst_path2 = dst_path1 / 'ws2' + dst_path2.mkdir() + + ws1 = Resolver().workspace_from_nothing(directory=dst_path1) + ws2 = Resolver().workspace_from_nothing(directory=dst_path2) + + with pytest.raises(Exception) as exc: + ws1.add_file('X', pageId='X', mimetype='X', ID='id123', local_filename='X/X', content='ws1') + ws2.add_file('X', pageId='X', mimetype='X', ID='id456', local_filename='X/X', content='ws2') + ws1.merge(ws2) + assert "would overwrite" == str(exc.value) + +def test_merge_with_filter(plain_workspace, tmp_path): + # arrange + page_id1, file_id1, file_grp1 = 'page1', 'ID1', 'GRP1' + plain_workspace.add_file(file_grp1, file_id='ID1', mimetype='image/tiff', page_id='page1') + + dst_path2 = tmp_path / 'foo' + resolver = Resolver() + ws2 = resolver.workspace_from_nothing(directory=dst_path2) + page_id2, file_id2, file_grp2 = 'page2', 'ID2', 'GRP2' + ws2.add_file('GRP2', file_id=file_id2, mimetype='image/tiff', page_id=page_id2, url='bar') + ws2.add_file('GRP2', file_id='ID2-2', mimetype='image/tiff', page_id='page3', url='bar') + + # act + plain_workspace.merge(ws2, copy_files=False, page_id=page_id2, file_id=file_id2, + file_grp=file_grp2, filegrp_mapping={file_grp2: file_grp1}) + + # assert: + files = list(plain_workspace.find_files()) + assert len(files) == 2 + + for f in files: + assert f.fileGrp == file_grp1 + assert f.pageId in [page_id1, page_id2] + assert f.ID in [file_id1, file_id2] + if __name__ == '__main__': main(__file__) diff --git a/tests/validator/test_json_validator.py b/tests/validator/test_json_validator.py index 546195326e..8a8387d4b6 100644 --- a/tests/validator/test_json_validator.py +++ b/tests/validator/test_json_validator.py @@ -1,5 +1,5 @@ from tests.base import TestCase, main -from ocrd_validators.json_validator import JsonValidator, DefaultValidatingDraft4Validator +from ocrd_validators.json_validator import JsonValidator, DefaultValidatingDraft6Validator class TestParameterValidator(TestCase): @@ -15,7 +15,7 @@ def setUp(self): } } } - self.defaults_validator = JsonValidator(self.schema, DefaultValidatingDraft4Validator) + self.defaults_validator = JsonValidator(self.schema, DefaultValidatingDraft6Validator) super().setUp() def test_validate_string(self): diff --git a/tests/validator/test_parameter_validator.py b/tests/validator/test_parameter_validator.py index f18937779a..f0d9d41d2c 100644 --- a/tests/validator/test_parameter_validator.py +++ b/tests/validator/test_parameter_validator.py @@ -45,6 +45,28 @@ def test_default_assignment(self): self.assertTrue(report.is_valid) self.assertEqual(obj, {'baz': '23', "num-param": 1}) +def test_min_max(): + validator = ParameterValidator({ + "parameters": { + "num-param": { + "type": "number", + "exclusiveMinimum": 10, + "maximum": 100, + "multipleOf": 2 + } + } + }) + report = validator.validate({'num-param': 23}) + assert not report.is_valid + assert 'is not a multiple of 2' in report.errors[0] + report = validator.validate({'num-param': 102}) + assert not report.is_valid + assert 'is greater than the maximum of' in report.errors[0] + report = validator.validate({'num-param': 8}) + assert not report.is_valid + assert 'is less than or equal to the minimum of' in report.errors[0] + + if __name__ == '__main__': - main() + main(__name__)