Skip to content

Commit

Permalink
enable bashlib test in test_modules_process_helpers
Browse files Browse the repository at this point in the history
  • Loading branch information
kba committed Apr 8, 2024
1 parent 5a1f501 commit cfb26f1
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 142 deletions.
62 changes: 34 additions & 28 deletions tests/cli/test_bashlib.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from contextlib import contextmanager
from tests.base import CapturingTestCase as TestCase, main, assets, copy_of_directory

import os, sys
from os import environ
import traceback
import subprocess
import tempfile
Expand Down Expand Up @@ -110,34 +112,38 @@ def test_bashlib_minversion(self):
assert "ERROR: ocrd/core is too old" in err

def test_bashlib_cp_processor(self):
script = (Path(__file__).parent.parent / 'data/bashlib_cp_processor.sh').read_text()
ocrd_tool = json.loads((Path(__file__).parent.parent / 'data/bashlib_cp_processor.ocrd-tool.json').read_text())
with copy_of_directory(assets.path_to('kant_aufklaerung_1784/data')) as wsdir:
with pushd_popd(wsdir):
with open('ocrd-tool.json', 'w') as f:
f.write(json.dumps(ocrd_tool))
# run on 1 input
exit_code, out, err = self.invoke_bash(
script, '-I', 'OCR-D-GT-PAGE', '-O', 'OCR-D-GT-PAGE2', '-P', 'message', 'hello world',
executable='ocrd-cp')
print({'exit_code': exit_code, 'out': out, 'err': err})
assert 'single input fileGrp' in err
assert 'processing PAGE-XML' in err
assert exit_code == 0
assert 'hello world' in out
path = pathlib.Path('OCR-D-GT-PAGE2')
assert path.is_dir()
assert next(path.glob('*.xml'), None)
# run on 2 inputs
exit_code, out, err = self.invoke_bash(
script, '-I', 'OCR-D-IMG,OCR-D-GT-PAGE', '-O', 'OCR-D-IMG2',
executable='ocrd-cp')
assert 'multiple input fileGrps' in err
assert exit_code == 0
assert 'ignoring application/vnd.prima.page+xml' in err
path = pathlib.Path('OCR-D-IMG2')
assert path.is_dir()
assert next(path.glob('*.tif'), None)
# script = (Path(__file__).parent.parent / 'data/bashlib_cp_processor.sh').read_text()
# ocrd_tool = json.loads((Path(__file__).parent.parent / 'data/bashlib_cp_processor.ocrd-tool.json').read_text())
scriptdir = Path(__file__).parent.parent / 'data'

with copy_of_directory(assets.path_to('kant_aufklaerung_1784/data')) as wsdir, pushd_popd(wsdir):
with open(f'{scriptdir}/ocrd-cp', 'r', encoding='utf-8') as script_f:
script = script_f.read()
with open(f'{scriptdir}/ocrd-cp.ocrd-tool.json', 'r', encoding='utf-8') as tool_in, \
open(f'{wsdir}/ocrd-tool.json', 'w', encoding='utf-8') as tool_out:
tool_out.write(tool_in.read())
# run on 1 input
exit_code, out, err = self.invoke_bash(
script, '-I', 'OCR-D-GT-PAGE', '-O', 'OCR-D-GT-PAGE2', '-P', 'message', 'hello world',
executable='ocrd-cp')
print({'exit_code': exit_code, 'out': out, 'err': err})
assert 'single input fileGrp' in err
assert 'processing PAGE-XML' in err
assert exit_code == 0
assert 'hello world' in out
path = pathlib.Path('OCR-D-GT-PAGE2')
assert path.is_dir()
assert next(path.glob('*.xml'), None)
# run on 2 inputs
exit_code, out, err = self.invoke_bash(
script, '-I', 'OCR-D-IMG,OCR-D-GT-PAGE', '-O', 'OCR-D-IMG2',
executable='ocrd-cp')
assert 'multiple input fileGrps' in err
assert exit_code == 0
assert 'ignoring application/vnd.prima.page+xml' in err
path = pathlib.Path('OCR-D-IMG2')
assert path.is_dir()
assert next(path.glob('*.tif'), None)

if __name__ == "__main__":
main(__file__)
Expand Down
18 changes: 0 additions & 18 deletions tests/data/bashlib_cp_processor.ocrd-tool.json

This file was deleted.

65 changes: 0 additions & 65 deletions tests/data/bashlib_cp_processor.sh

This file was deleted.

1 change: 0 additions & 1 deletion tests/model/test_ocrd_mets.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
# -*- coding: utf-8 -*-

from datetime import datetime

from os.path import join
Expand Down
76 changes: 46 additions & 30 deletions tests/network/test_modules_process_helpers.py
Original file line number Diff line number Diff line change
@@ -1,38 +1,54 @@
from contextlib import contextmanager
from os import environ
from pathlib import Path
from src.ocrd.processor.builtin.dummy_processor import DummyProcessor
from src.ocrd_network.constants import NetworkLoggingDirs
from src.ocrd_network.logging_utils import get_root_logging_dir
from src.ocrd_network.process_helpers import invoke_processor
from src.ocrd_network.utils import generate_id

from ocrd.processor.builtin.dummy_processor import DummyProcessor
from ocrd_network.constants import NetworkLoggingDirs
from ocrd_network.logging_utils import get_root_logging_dir
from ocrd_network.process_helpers import invoke_processor
from ocrd_network.utils import generate_id

from tests.base import assets

@contextmanager
def temp_env_var(k, v):
v_before = environ.get(k, None)
environ[k] = v
yield
if v_before is not None:
environ[k] = v_before
else:
del environ[k]

# TODO: Fix this
def _test_invoke_processor_bash():
bash_lib_cp_processor = "tests/data/bashlib_cp_processor.sh"
assert Path(bash_lib_cp_processor).exists(), f"Bash lib test processor not found in: {bash_lib_cp_processor}"

workspace_root = "kant_aufklaerung_1784/data"
path_to_mets = assets.path_to(f"{workspace_root}/mets.xml")
assert Path(path_to_mets).exists()
log_dir_root = get_root_logging_dir(module_name=NetworkLoggingDirs.PROCESSING_JOBS)
job_id = generate_id()
path_to_log_file = Path(log_dir_root, job_id)
input_file_grp = "OCR-D-IMG"
output_file_grp = f"OCR-D-BASH-TEST-{job_id}"
invoke_processor(
processor_class=None, # required only for pythonic processors
executable=bash_lib_cp_processor,
abs_path_to_mets=path_to_mets,
input_file_grps=[input_file_grp],
output_file_grps=[output_file_grp],
page_id="PHYS_0017,PHYS_0020",
parameters={},
log_filename=path_to_log_file,
log_level="DEBUG"
)
assert Path(assets.path_to(f"{workspace_root}/{output_file_grp}")).exists()
assert Path(path_to_log_file).exists()
def test_invoke_processor_bash():
scriptdir = Path(__file__).parent.parent / 'data'
with temp_env_var('PATH', f'{scriptdir}:{environ["PATH"]}'):
workspace_root = "kant_aufklaerung_1784/data"
path_to_mets = assets.path_to(f"{workspace_root}/mets.xml")
assert Path(path_to_mets).exists()
log_dir_root = get_root_logging_dir(module_name=NetworkLoggingDirs.PROCESSING_JOBS)
job_id = generate_id()
path_to_log_file = Path(log_dir_root, job_id)
input_file_grp = "OCR-D-IMG"
output_file_grp = f"OCR-D-BASH-TEST-{job_id}"
try:
invoke_processor(
processor_class=None, # required only for pythonic processors
executable='ocrd-cp',
abs_path_to_mets=path_to_mets,
input_file_grps=[input_file_grp],
output_file_grps=[output_file_grp],
page_id="PHYS_0017,PHYS_0020",
parameters={},
log_filename=path_to_log_file,
log_level="DEBUG"
)
except:
with open(path_to_log_file, 'r', encoding='utf-8') as f:
print(f.read())
assert Path(assets.path_to(f"{workspace_root}/{output_file_grp}")).exists()
assert Path(path_to_log_file).exists()


def test_invoke_processor_pythonic():
Expand Down

0 comments on commit cfb26f1

Please sign in to comment.