diff --git a/ocrd/cli/__init__.py b/ocrd/cli/__init__.py index e69de29bb..1056e6f04 100644 --- a/ocrd/cli/__init__.py +++ b/ocrd/cli/__init__.py @@ -0,0 +1,19 @@ +import click + +from ocrd.cli.validate_ocrd_tool import validate_ocrd_tool_cli +from ocrd.cli.workspace import workspace_cli +from ocrd.cli.generate_swagger import generate_swagger_cli +from ocrd.cli.process import process_cli +from ocrd.cli.server import server_cli + +@click.group() +def cli(): + """ + CLI to OCR-D + """ + +cli.add_command(validate_ocrd_tool_cli) +cli.add_command(workspace_cli) +cli.add_command(generate_swagger_cli) +cli.add_command(process_cli) +cli.add_command(server_cli) diff --git a/ocrd/cli/generate_swagger.py b/ocrd/cli/generate_swagger.py new file mode 100644 index 000000000..7fe507f37 --- /dev/null +++ b/ocrd/cli/generate_swagger.py @@ -0,0 +1,21 @@ +import json +import yaml + +import click + +from ocrd import OcrdSwagger + +# ---------------------------------------------------------------------- +# ocrd generate-swagger +# ---------------------------------------------------------------------- + +@click.command('generate-swagger', help="Generate Swagger schema from ocrd-tool.json files") +@click.option('-S', '--swagger-template', help="Swagger template to add operations to. Use builtin if not specified.") +@click.option('-T', '--ocrd-tool', multiple=True, help="ocrd-tool.json file to generate from. Repeatable") +@click.option('-f', '--format', help="Format to generate, JSON or YAML", type=click.Choice(['JSON', 'YAML']), default='JSON') +def generate_swagger_cli(swagger_template, ocrd_tool, **kwargs): + swagger = OcrdSwagger.from_ocrd_tools(swagger_template, *ocrd_tool) + if kwargs['format'] == 'YAML': + print(yaml.dump(swagger)) + else: + print(json.dumps(swagger, indent=2)) diff --git a/ocrd/cli/process.py b/ocrd/cli/process.py new file mode 100644 index 000000000..61def415f --- /dev/null +++ b/ocrd/cli/process.py @@ -0,0 +1,41 @@ +import json +import codecs + +import click + +from ocrd import run_cli, Resolver +from ocrd.decorators import ocrd_cli_options + +# ---------------------------------------------------------------------- +# ocrd process +# ---------------------------------------------------------------------- + +@click.command('process') +@ocrd_cli_options +@click.option('-T', '--ocrd-tool', multiple=True) +@click.argument('steps', nargs=-1) +def process_cli(mets_url, **kwargs): + """ + Execute OCR-D processors for a METS file directly. + """ + resolver = Resolver(cache_enabled=True) + workspace = resolver.workspace_from_url(mets_url) + + cmds = [] + for ocrd_tool_file in kwargs['ocrd_tool']: + with codecs.open(ocrd_tool_file, encoding='utf-8') as f: + obj = json.loads(f.read()) + for tool in obj['tools']: + cmds.append(tool['binary']) + + for cmd in kwargs['steps']: + if cmd not in cmds: + raise Exception("Tool not registered: '%s'" % cmd) + + for cmd in kwargs['steps']: + run_cli(cmd, mets_url, resolver, workspace) + + workspace.reload_mets() + + # print('\n'.join(k + '=' + str(kwargs[k]) for k in kwargs)) + print(workspace) diff --git a/ocrd/cli/run.py b/ocrd/cli/run.py deleted file mode 100644 index 5ad121fb8..000000000 --- a/ocrd/cli/run.py +++ /dev/null @@ -1,150 +0,0 @@ -import codecs -import json - -import click -import yaml - -from ocrd import run_cli, OcrdSwagger, Resolver, WorkspaceValidator, OcrdToolValidator, Workspace -from ocrd.decorators import ocrd_cli_options - -from ocrd.webservice.processor import create as create_processor_ws -from ocrd.webservice.repository import create as create_repository_ws - -@click.group() -def cli(): - """ - CLI to OCR-D - """ -# ---------------------------------------------------------------------- -# ocrd validate-ocrd-tool -# ---------------------------------------------------------------------- - -@cli.command('validate-ocrd-tool', help='Validate an ocrd-tool.json') -@click.argument('json_file', "ocrd-tool.json to validate") -def validate_ocrd_tool(json_file): - with codecs.open(json_file, encoding='utf-8') as f: - report = OcrdToolValidator.validate_json(f.read()) - print(report.to_xml()) - if not report.is_valid: - return 128 - -# ---------------------------------------------------------------------- -# ocrd workspace -# ---------------------------------------------------------------------- - -@cli.group("workspace", help="Working with workspace") -def workspace_cli(): - pass - -# ---------------------------------------------------------------------- -# ocrd workspace validate -# ---------------------------------------------------------------------- - -@workspace_cli.command('validate', help='Validate a workspace') -@click.option('-m', '--mets-url', help="METS URL to validate", required=True) -def validate_workspace(mets_url): - resolver = Resolver(cache_enabled=True) - report = WorkspaceValidator.validate_url(resolver, mets_url) - print(report.to_xml()) - if not report.is_valid: - return 128 - -@workspace_cli.command('create-from-url', help="Create a workspace from a METS URL and return the directory") -@click.option('-m', '--mets-url', help="METS URL to create workspace for", required=True) -@click.option('-a', '--download-all', is_flag=True, default=False, help="Whether to download all files into the workspace") -def workspace_create(mets_url, download_all): - resolver = Resolver(cache_enabled=True) - workspace = resolver.workspace_from_url(mets_url) - if download_all: - for fileGrp in workspace.mets.file_groups: - for f in workspace.mets.find_files(fileGrp=fileGrp): - workspace.download_file(f, subdir=fileGrp, basename=f.ID) - workspace.save_mets() - print(workspace.directory) - -@workspace_cli.command('add-file', help="Add a file to METS in a workspace") -@click.option('-w', '--working-dir', help="Directory of the workspace", required=True) -@click.option('-G', '--filegrp', help="fileGrp USE", required=True) -@click.option('-i', '--fileid', help="ID for the file") -@click.option('-g', '--groupid', help="GROUPID") -@click.argument('local_filename') -def workspace_add_file(working_dir, filegrp, local_filename, fileid, groupid): - resolver = Resolver(cache_enabled=True) - workspace = Workspace(resolver, working_dir) - workspace.mets.add_file(filegrp, local_filename=local_filename) - workspace.save_mets() - -# ---------------------------------------------------------------------- -# ocrd generate-swagger -# ---------------------------------------------------------------------- - -@cli.command('generate-swagger', help="Generate Swagger schema from ocrd-tool.json files") -@click.option('-S', '--swagger-template', help="Swagger template to add operations to. Use builtin if not specified.") -@click.option('-T', '--ocrd-tool', multiple=True, help="ocrd-tool.json file to generate from. Repeatable") -@click.option('-f', '--format', help="Format to generate, JSON or YAML", type=click.Choice(['JSON', 'YAML']), default='JSON') -def generate_swagger(swagger_template, ocrd_tool, **kwargs): - swagger = OcrdSwagger.from_ocrd_tools(swagger_template, *ocrd_tool) - if kwargs['format'] == 'YAML': - print(yaml.dump(swagger)) - else: - print(json.dumps(swagger, indent=2)) - -# ---------------------------------------------------------------------- -# ocrd process -# ---------------------------------------------------------------------- - -@cli.command('process') -@ocrd_cli_options -@click.option('-T', '--ocrd-tool', multiple=True) -@click.argument('steps', nargs=-1) -def process_cli(mets_url, **kwargs): - """ - Execute OCR-D processors for a METS file directly. - """ - resolver = Resolver(cache_enabled=True) - workspace = resolver.workspace_from_url(mets_url) - - cmds = [] - for ocrd_tool_file in kwargs['ocrd_tool']: - with codecs.open(ocrd_tool_file, encoding='utf-8') as f: - obj = json.loads(f.read()) - for tool in obj['tools']: - cmds.append(tool['binary']) - - for cmd in kwargs['steps']: - if cmd not in cmds: - raise Exception("Tool not registered: '%s'" % cmd) - - for cmd in kwargs['steps']: - run_cli(cmd, mets_url, resolver, workspace) - - workspace.reload_mets() - - # print('\n'.join(k + '=' + str(kwargs[k]) for k in kwargs)) - print(workspace) - -# ---------------------------------------------------------------------- -# ocrd server -# ---------------------------------------------------------------------- - -@cli.group('server') -def server_cli(): - """ - Start OCR-D web services - """ - -@server_cli.command('process') -@click.option('-p', '--port', help="Port to run processor webservice on", default=5010) -def _start_processor(port): - """ - Start a server exposing the processors as webservices - """ - create_processor_ws().run(port=port) - -@server_cli.command('repository') -@click.option('-p', '--port', help="Port to run repository webservice on", default=5000) -def _start_repository(port): - """ - Start a minimal repository. - """ - create_repository_ws().run(port=port) diff --git a/ocrd/cli/server.py b/ocrd/cli/server.py new file mode 100644 index 000000000..239d19b79 --- /dev/null +++ b/ocrd/cli/server.py @@ -0,0 +1,30 @@ +import click + +from ocrd.webservice.processor import create as create_processor_ws +from ocrd.webservice.repository import create as create_repository_ws + +# ---------------------------------------------------------------------- +# ocrd server +# ---------------------------------------------------------------------- + +@click.group('server') +def server_cli(): + """ + Start OCR-D web services + """ + +@server_cli.command('process') +@click.option('-p', '--port', help="Port to run processor webservice on", default=5010) +def _start_processor(port): + """ + Start a server exposing the processors as webservices + """ + create_processor_ws().run(port=port) + +@server_cli.command('repository') +@click.option('-p', '--port', help="Port to run repository webservice on", default=5000) +def _start_repository(port): + """ + Start a minimal repository. + """ + create_repository_ws().run(port=port) diff --git a/ocrd/cli/validate_ocrd_tool.py b/ocrd/cli/validate_ocrd_tool.py new file mode 100644 index 000000000..ee027c6d9 --- /dev/null +++ b/ocrd/cli/validate_ocrd_tool.py @@ -0,0 +1,18 @@ +import codecs + +import click + +from ocrd import OcrdToolValidator + +# ---------------------------------------------------------------------- +# ocrd validate-ocrd-tool +# ---------------------------------------------------------------------- + +@click.command('validate-ocrd-tool', help='Validate an ocrd-tool.json') +@click.argument('json_file', "ocrd-tool.json to validate") +def validate_ocrd_tool_cli(json_file): + with codecs.open(json_file, encoding='utf-8') as f: + report = OcrdToolValidator.validate_json(f.read()) + print(report.to_xml()) + if not report.is_valid: + return 128 diff --git a/ocrd/cli/workspace.py b/ocrd/cli/workspace.py new file mode 100644 index 000000000..2c93afa8b --- /dev/null +++ b/ocrd/cli/workspace.py @@ -0,0 +1,147 @@ +import os +import sys + +import click + +from ocrd import Resolver, WorkspaceValidator, Workspace + +class WorkspaceCtx(object): + + def __init__(self, directory): + self.directory = directory + self.resolver = Resolver(cache_enabled=True) + self.config = {} + self.verbose = False + +pass_workspace = click.make_pass_decorator(WorkspaceCtx) + +# ---------------------------------------------------------------------- +# ocrd workspace +# ---------------------------------------------------------------------- + +@click.group("workspace", help="Working with workspace") +@click.option( + '-d', + '--directory', + envvar='WORKSPACE_DIR', + default=os.path.abspath('.'), + type=click.Path(file_okay=False), + metavar='PATH', + help='Changes the repository folder location.' +) +@click.option( + '-c', + '--config', + nargs=2, + multiple=True, + metavar='KEY VALUE', + help='Overrides a config key/value pair.' +) +@click.option( + '-v', + '--verbose', + is_flag=True, + help='Enables verbose mode.' +) +@click.pass_context +def workspace_cli(ctx, directory, config, verbose): + ctx.obj = WorkspaceCtx(os.path.abspath(directory)) + ctx.obj.verbose = verbose + for key, value in config: + ctx.obj.config[key] = value + +# ---------------------------------------------------------------------- +# ocrd workspace validate +# ---------------------------------------------------------------------- + +@workspace_cli.command('validate', help=''' + + Validate a workspace + +''') +@click.option('-m', '--mets-url', help="METS URL to validate") +@pass_workspace +def validate_workspace(ctx, mets_url=None): + if mets_url is None: + mets_url = 'file://%s/mets.xml' % ctx.directory + report = WorkspaceValidator.validate_url(ctx.resolver, mets_url) + print(report.to_xml()) + if not report.is_valid: + sys.exit(128) + +# ---------------------------------------------------------------------- +# ocrd workspace clone +# ---------------------------------------------------------------------- + +@workspace_cli.command('clone', help=""" + + Create a workspace from a METS URL and return the directory + +""") +@click.option('-m', '--mets-url', help="METS URL to create workspace for", required=True) +@click.option('-a', '--download-all', is_flag=True, default=False, help="Whether to download all files into the workspace") +@pass_workspace +def workspace_create(ctx, mets_url, download_all): + workspace = ctx.resolver.workspace_from_url(mets_url) + if download_all: + for fileGrp in workspace.mets.file_groups: + for f in workspace.mets.find_files(fileGrp=fileGrp): + workspace.download_file(f, subdir=fileGrp, basename=f.ID) + workspace.save_mets() + print(workspace.directory) + +# ---------------------------------------------------------------------- +# ocrd workspace add +# ---------------------------------------------------------------------- + +@workspace_cli.command('add', help=""" + + Add a file to METS in a workspace. + +""") +@click.option('-G', '--file-grp', help="fileGrp USE", required=True) +@click.option('-i', '--file-id', help="ID for the file", required=True) +@click.option('-m', '--mimetype', help="Media type of the file", required=True) +@click.option('-g', '--group-id', help="GROUPID") +@click.argument('local_filename', type=click.Path(dir_okay=False, readable=True, resolve_path=True)) +@pass_workspace +def workspace_add_file(ctx, file_grp, file_id, mimetype, group_id, local_filename): + workspace = Workspace(ctx.resolver, directory=ctx.directory) + workspace.mets.add_file( + file_grp=file_grp, + file_id=file_id, + mimetype=mimetype, + group_id=group_id, + local_filename=local_filename + ) + workspace.save_mets() + +# ---------------------------------------------------------------------- +# ocrd workspace pack +# ---------------------------------------------------------------------- + +@workspace_cli.command('pack', help=""" + + Pack workspace as ZIP + +""") +@click.argument('output_filename', type=click.Path(dir_okay=False, writable=True, readable=False, resolve_path=True)) +@pass_workspace +def pack(ctx, output_filename): + workspace = Workspace(ctx.resolver, directory=ctx.directory) + ctx.resolver.pack_workspace(workspace, output_filename) + +# ---------------------------------------------------------------------- +# ocrd workspace unpack +# ---------------------------------------------------------------------- + +@workspace_cli.command('unpack', help=""" + + Unpack ZIP as workspace + +""") +@click.argument('input_filename', type=click.Path(dir_okay=False, readable=True, resolve_path=True)) +@pass_workspace +def unpack(ctx, input_filename): + workspace = ctx.resolver.unpack_workspace_from_filename(input_filename) + print(workspace) diff --git a/ocrd/resolver.py b/ocrd/resolver.py index bd3bb535a..4c5d6b0b4 100644 --- a/ocrd/resolver.py +++ b/ocrd/resolver.py @@ -100,7 +100,7 @@ def pack_workspace(self, workspace, zpath=None): return zpath - def unpack_workspace_from_filename(self, zip_filename): + def unpack_workspace_from_filename(self, zip_filename, directory=None): """ :TODO: @@ -113,7 +113,8 @@ def unpack_workspace_from_filename(self, zip_filename): Args: zip_filename (string) : Path to OCRD-ZIP file """ - directory = tempfile.mkdtemp(prefix=TMP_PREFIX) + if directory is None: + directory = tempfile.mkdtemp(prefix=TMP_PREFIX) log.debug("Unpacking to %s", directory) with ZipFile(zip_filename, 'r') as z: z.extractall(path=directory) diff --git a/ocrd/workspace.py b/ocrd/workspace.py index 7772d7af7..56afb76cb 100644 --- a/ocrd/workspace.py +++ b/ocrd/workspace.py @@ -73,22 +73,28 @@ def download_file(self, f, **kwargs): f.local_filename = self.download_url(f.url, **kwargs) return f - def download_files_in_group(self, use): + def download_files_in_group(self, file_grp): """ Download all the :py:mod:`ocrd.model.ocrd_file.OcrdFile` in the file group given. """ - for input_file in self.mets.find_files(fileGrp=use): - self.download_file(input_file, subdir=use) + for input_file in self.mets.find_files(fileGrp=file_grp): + self.download_file(input_file, subdir=file_grp) - def add_file(self, use, basename=None, content=None, local_filename=None, **kwargs): + def add_file(self, file_grp, basename=None, content=None, local_filename=None, **kwargs): """ Add an output file. Creates an :class:`OcrdFile` to pass around and adds that to the OcrdMets OUTPUT section. """ - log.debug('outputfile use=%s basename=%s local_filename=%s content=%s', use, basename, local_filename, content is not None) + log.debug( + 'outputfile file_grp=%s basename=%s local_filename=%s content=%s', + file_grp, + basename, + local_filename, + content is not None + ) if basename is not None: - if use is not None: - basename = os.path.join(use, basename) + if file_grp is not None: + basename = os.path.join(file_grp, basename) local_filename = os.path.join(self.directory, basename) local_filename_dir = local_filename.rsplit('/', 1)[0] @@ -98,7 +104,7 @@ def add_file(self, use, basename=None, content=None, local_filename=None, **kwar if 'url' not in kwargs: kwargs['url'] = 'file://' + local_filename - self.mets.add_file(use, local_filename=local_filename, **kwargs) + self.mets.add_file(file_grp, local_filename=local_filename, **kwargs) if content is not None: with open(local_filename, 'wb') as f: diff --git a/setup.py b/setup.py index 9a254d4c7..e1b859619 100644 --- a/setup.py +++ b/setup.py @@ -34,7 +34,7 @@ }, entry_points={ 'console_scripts': [ - 'ocrd=ocrd.cli.run:cli', + 'ocrd=ocrd.cli:cli', ] }, )