diff --git a/.circleci/config.yml b/.circleci/config.yml index 4e282a6..51c9e7b 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,56 +1,96 @@ version: 2.1 -jobs: + +commands: + dockerhub-store: + description: "store an image on Dockerhub" + parameters: + tag: + description: "image tag (full variant name) to be saved" + type: + string + steps: + - run: + name: Login to Docker Hub + command: echo "$DOCKERHUB_PASS" | docker login --username "$DOCKERHUB_USERNAME" --password-stdin + - run: + name: save image + command: | + docker tag ocrd/all:<< parameters.tag >> ocrd/all:<< parameters.tag >>-${CIRCLE_SHA1} + docker push ocrd/all:<< parameters.tag >>-${CIRCLE_SHA1} + no_output_timeout: 30m + dockerhub-load: + description: "load an image from Dockerhub" + parameters: + tag: + description: "image tag (full variant name) to be pulled" + type: + string + steps: + - run: + name: load previous image + command: | + docker pull ocrd/all:<< parameters.tag >>-${CIRCLE_SHA1} + docker tag ocrd/all:<< parameters.tag >>-${CIRCLE_SHA1} ocrd/all:<< parameters.tag >> + dockerhub-remove: + description: "remove an image on Dockerhub" + parameters: + tag: + description: "image tag (full variant name) to be removed" + type: + string + steps: + - run: + name: use Dockerhub API to remove temporary image + command: | + HUB_TOKEN=$(curl -s -H "Content-Type: application/json" -X POST -d "{\"username\": \"$DOCKERHUB_USERNAME\", \"password\": \"$DOCKERHUB_PASS\"}" https://hub.docker.com/v2/users/login/ | jq -r .token) + set -x + curl -i -X DELETE -H "Accept: application/json" -H "Authorization: JWT $HUB_TOKEN" https://hub.docker.com/v2/namespaces/ocrd/repositories/all/tags/<< parameters.tag >>-${CIRCLE_SHA1}/ + # must be unconditional (both on_error and on_success) + when: always build: - docker: - - image: cimg/base:current-22.04 - resource_class: large + description: "template for all docker-* build jobs" + parameters: + tag: + description: "image tag (full variant name) to be built" + type: + string + dep: + description: "image tag (full variant name) this depends on (should _not_ be rebuilt)" + type: + string + default: "core" steps: - checkout - - setup_remote_docker # https://circleci.com/docs/2.0/building-docker-images/ + - setup_remote_docker - run: - name: build image - command: make docker-maximum-cuda GIT_DEPTH=--single-branch DOCKER_PARALLEL=-j4 + name: Build Docker image + command: make docker-<< parameters.tag >> -o docker-<< parameters.dep >> GIT_DEPTH=--single-branch no_output_timeout: 30m - run: name: test image command: | mkdir test-results - docker run --rm -v $PWD:/data ocrd/all:maximum-cuda make -C /build/core deps-test test PYTEST_ARGS=--junitxml=/data/test-results/core.xml + # cannot use docker run -v because the docker executor does not support volumes + # docker run --rm -v $PWD:/data ocrd/all:<< parameters.tag >> make -C /build/core deps-test test PYTEST_ARGS=--junitxml=/data/test-results/core.xml + id=`docker create ocrd/all:<< parameters.tag >> make -C /build/core deps-test test PYTEST_ARGS=--junitxml=core.xml` + docker start -a $id + docker cp $id:/build/core/core.xml test-results/ + docker rm $id - store_test_results: - path: test-results - - when: - # takes too long for 1h1m CircleCI timeout overall - # also, storage is limited... - condition: false - steps: - - run: - name: persist image - command: | - sudo apt install pigz - docker image save ocrd/all:maximum-cuda | pigz --fast > ocrd-all-maximum.tar.gz - no_output_timeout: 30m - # can be downloaded from CircleCI.com and imported via "docker image load" - - store_artifacts: - path: ocrd-all-maximum.tar.gz - destination: artifacts + path: test-results/core.xml deploy: - docker: - - image: cimg/base:current-22.04 + description: "template for all docker-* deploy jobs" parameters: - variant: + tag: + description: "image tag (full variant name) to be pushed" type: string steps: - checkout - - setup_remote_docker # https://circleci.com/docs/2.0/building-docker-images/ + - setup_remote_docker - run: - name: Build Docker image - command: make docker-<< parameters.variant >> GIT_DEPTH=--single-branch - # fails due to pip races: DOCKER_PARALLEL=-j3 - no_output_timeout: 30m - - run: - name: Alias Docker images - command: docker tag ocrd/all:<< parameters.variant >> ocrd/all:<< parameters.variant >>-git + name: Alias Docker image + command: docker tag ocrd/all:<< parameters.tag >> ocrd/all:<< parameters.tag >>-git - run: name: Login to Docker Hub command: echo "$DOCKERHUB_PASS" | docker login --username "$DOCKERHUB_USERNAME" --password-stdin @@ -58,21 +98,12 @@ jobs: name: Push images to Docker Hub no_output_timeout: 2.5h command: | - docker push ocrd/all:<< parameters.variant >> - docker push ocrd/all:<< parameters.variant >>-git + docker push ocrd/all:<< parameters.tag >> + docker push ocrd/all:<< parameters.tag >>-git - when: condition: - equal: [ maximum, << parameters.variant >> ] + equal: [ maximum, << parameters.tag >> ] steps: - - run: - name: Alias and push intermediate variants - command: | - docker tag ocrd/all:medium ocrd/all:medium-git - docker tag ocrd/all:minimum ocrd/all:minimum-git - docker push ocrd/all:minimum - docker push ocrd/all:minimum-git - docker push ocrd/all:medium - docker push ocrd/all:medium-git - run: name: store ocrd-all-tool.json command: | @@ -91,7 +122,7 @@ jobs: destination: ocrd-all-module-dir.json - when: condition: - equal: [ maximum-cuda, << parameters.variant >> ] + equal: [ maximum-cuda, << parameters.tag >> ] steps: - run: name: Create a date-versioned mirror of ocrd/all:maximum-cuda @@ -99,20 +130,178 @@ jobs: - run: name: Update badge command: curl -X POST "$MICROBADGER_WEBHOOK" || true + - when: + # takes too long for 1h1m CircleCI timeout overall + # also, storage is limited... + condition: false + steps: + - run: + name: persist image + command: | + sudo apt install pigz + docker image save ocrd/all:<< parameters.tag >> | pigz --fast > ocrd-all-maximum.tar.gz + no_output_timeout: 30m + # can be downloaded from CircleCI.com and imported via "docker image load" + - store_artifacts: + path: ocrd-all-maximum.tar.gz + destination: artifacts + +jobs: + build-mini: + docker: + - image: cimg/base:current-22.04 + parameters: + variant: + type: + string + steps: + - build: + tag: minim<< parameters.variant >> + - dockerhub-store: + tag: minim<< parameters.variant >> + build-medi: + docker: + - image: cimg/base:current-22.04 + parameters: + variant: + type: + string + steps: + - dockerhub-load: + tag: minim<< parameters.variant >> + - unless: + condition: + equal: [ master, << pipeline.git.branch >>] + # only PR testing โ€“ not needed anymore + steps: + - dockerhub-remove: + tag: minim<< parameters.variant >> + - build: + dep: minim<< parameters.variant >> + tag: medi<< parameters.variant >> + - dockerhub-store: + tag: medi<< parameters.variant >> + build-maxi: + docker: + - image: cimg/base:current-22.04 + parameters: + variant: + type: + string + steps: + - dockerhub-load: + tag: medi<< parameters.variant >> + - unless: + condition: + equal: [ master, << pipeline.git.branch >>] + # only PR testing โ€“ not needed anymore + steps: + - dockerhub-remove: + tag: medi<< parameters.variant >> + - build: + dep: medi<< parameters.variant >> + tag: maxim<< parameters.variant >> + - when: + condition: + equal: [ master, << pipeline.git.branch >>] + # only PR testing โ€“ not needed + steps: + - dockerhub-store: + tag: maxim<< parameters.variant >> + deploy-mini: + docker: + - image: cimg/base:current-22.04 + parameters: + variant: + type: + string + steps: + - dockerhub-load: + tag: minim<< parameters.variant >> + - deploy: + tag: minim<< parameters.variant >> + - dockerhub-remove: + tag: minim<< parameters.variant >> + deploy-medi: + docker: + - image: cimg/base:current-22.04 + parameters: + variant: + type: + string + steps: + - dockerhub-load: + tag: medi<< parameters.variant >> + - deploy: + tag: medi<< parameters.variant >> + - dockerhub-remove: + tag: medi<< parameters.variant >> + deploy-maxi: + docker: + - image: cimg/base:current-22.04 + parameters: + variant: + type: + string + steps: + - dockerhub-load: + tag: maxim<< parameters.variant >> + - deploy: + tag: maxim<< parameters.variant >> + - dockerhub-remove: + tag: maxim<< parameters.variant >> + workflows: version: 2 - build-master: + build: jobs: - - deploy: + - build-mini: + name: build-minim<< matrix.variant >> + matrix: + parameters: + variant: [um, um-cuda] + - build-medi: + name: build-medi<< matrix.variant >> + matrix: + parameters: + variant: [um, um-cuda] + requires: + - build-minim<< matrix.variant >> + - build-maxi: + name: build-maxim<< matrix.variant >> matrix: parameters: - variant: [maximum, maximum-cuda] + variant: [um, um-cuda] + requires: + - build-medi<< matrix.variant >> + - deploy-mini: + name: deploy-minim<< matrix.variant >> + matrix: + parameters: + variant: [um, um-cuda] + requires: + - build-minim<< matrix.variant >> filters: branches: only: master - build-pull-request: - jobs: - - build: + - deploy-medi: + name: deploy-medi<< matrix.variant >> + matrix: + parameters: + variant: [um, um-cuda] + requires: + - build-medi<< matrix.variant >> + filters: + branches: + only: master + - deploy-maxi: + name: deploy-maxim<< matrix.variant >> + matrix: + parameters: + variant: [um, um-cuda] + requires: + - build-maxim<< matrix.variant >> filters: branches: - ignore: master + only: master + diff --git a/.gitmodules b/.gitmodules index e63bf02..62d1779 100644 --- a/.gitmodules +++ b/.gitmodules @@ -94,3 +94,6 @@ [submodule "ocrd_froc"] path = ocrd_froc url = https://github.com/OCR-D/ocrd_froc +[submodule "ocrd_page2alto"] + path = ocrd_page2alto + url = https://github.com/OCR-D/page-to-alto diff --git a/CHANGELOG.md b/CHANGELOG.md index 1095763..170b52a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,353 @@ ## Unreleased +## [v2024-10-15](https://github.com/OCR-D/ocrd_all/releases/v2024-10-15) + +Changes: + + - added new module ocrd_page2alto (also in ocrd_fileformat, now with standalone processor) + - new fixup recipes for shared `venv` without dependency conflicts + - protect `venv` creation by semaphore as well + - docker: update OCRD_MODULES (default selection for custom `make docker`) + - docker: fix `minimum` and `medium` module lists + - docker: do not rm `venv` created by previous stage + - CI/CD: rewrite CircleCI config to split up mini/medi/maxi into interdependent incremental jobs + - CI/CD: fix storing test results + +### [core](https://github.com/OCR-D/core) [92b217e](https://github.com/OCR-D/core/commits/92b217e)..[85bde15](https://github.com/OCR-D/core/commits/85bde15) + +> Release: [v2.70.0](https://github.com/OCR-D/core/releases/v2.70.0) + + > * PyPI: do not upload deprecated distribution aliases anymore + > * deps-cuda: retry micro.mamba.pm even more + > * :package: v2.70.0 + > * :memo: changelog + > * create PyPI CD + > * :memo: changelog + > * Merge remote-tracking branch 'github/cli-decorator-import-network' + > * deps-cuda: retry if micromamba is unresponsive + > * Merge branch 'master' of https://github.com/OCR-D/core + > * :memo: changelog + > * Merge remote-tracking branch 'github/fix_mets_server_zombies' + > * :memo: changelog + > * Merge remote-tracking branch 'github/deps-torch-torchvision' + > * :memo: changelog + > * Merge branch 'network_client_block_prints' + > * Merge pull request #1280 from OCR-D/fix-docker-cuda-torch + > * :package: v2.69.0 + > * :memo: changelog + > * Merge branch 'mexthecat-master' + > * :memo: update changelog again + > * :memo: changelog: remove spurious entries + > * :memo: changelog + > * disableLogging: clearer comment + > * ocrd.cli.workspace: use physical_pages if possible, fix default output_field + > * OcrdMets.get_physical_pages: cover return_divs w/o for_fileIds for_pageIds + > * update OcrdPage from generateds + > * OcrdPage: add PageType.get_ReadingOrderGroups() + > * ocrd.cli.workspace: assert non-server in cmds mutating METS + > * tests: make sure ocrd_utils.config gets reset whenever changing it globally + > * lib.bash: fix errexit + > * run_processor: be robust if ocrd_tool is missing steps + > * ocrd.cli.validate tasks: pass on --mets-server-url, too + > * ocrd.cli.workspace server: add 'reload' and 'save' + > * ocrd.cli.workspace: consistently pass on --mets-server-url and --backup (also, simplify) + > * METS Server: also export+delegate physical_pages + > * PcGts.Page.id / make_xml_id: replace '/' with '_' + > * test_mets_server: add test for force (overwrite) + > * OcrdMetsServer.add_file: pass on 'force' kwarg, too + > * Workspace.reload_mets: fix for METS server case + > * add test for OcrdEnvConfig.reset_defaults() + > * ocrd_utils.config: add reset_defaults() + > * bashlib: re-add --log-filename, implement as stderr redirect + > * test-logging: also remove ocrd.log from tempdir + > * disableLogging: re-instate root logger, to + > * test_mets_server: use tmpdir to avoid side effects between suites + > * ClientSideOcrdMets: use same logger name prefix as server + > * pylint: try ignoring generateds (again) + > * update pylintrc + > * OcrdMets.add_agent: does not have positional args + > * cli.workspace: pass fileGrp as well, improve description + > * adapt to PIL.Image moved constants + > * fix exception + > * fix --log-filename (6fc606027a): apply in ocrd_cli_wrap_processor + > * tests report.is_valid: improve output on failure + > * Processor.zip_input_files: more verbose log msg + > * Processor.zip_input_files: warning instead of exception for missing input files + > * fix imports + > * ocrd_utils: forgot to export scale_coordinates at toplvl + > * allow "from ocrd_models import OcrdPage + > * improve output in case of assertion failures + > * hide/test expected deprecation warnings + > * use up-to-date kwargs (avoiding old deprecations) + > * mets_server: ClientSideOcrdMets needs OcrdMets-like kwargs (without deprecation) + > * test_mets_server: fix arg vs kwarg + > * processor CLI: delegate --resolve-resource, too + > * :package: v2.68.0 + > * :memo: changelog + > * refactor client cli: process -> run + > * Merge branch 'master' into extend-network-client + > * :memo: changelog + > * Merge pull request #1270 from OCR-D/fix-parsing + > * fix: exception handling + > * add: check processing job log file + > * add: discovery cli, processors and processor + > * add sort to network agents + > * add: parameter_override + > * fix: the annoying string dict + > * fix: check report validation outside try block + > * fix: set ps address if None in constructor + > * Fix: server_utils.py > 404 to 400 + > * Fix: rename to block + > * add docstring to cli commands + > * fix: required job id + > * add: help section to the cli + > * add cli job status check + > * add help for new env + > * refine status check methods + > * Update src/ocrd_network/client_utils.py + > * add timeout and wait to configs + > * add: client workflow run + > * fix: client processing request + > * fix test + > * refactor status checks + > * remove the client server + > * try docker host ip + > * Fix flag typo + > * integration test for client + > * update network client + > * fix the test dir path in docker + > * add integration test for client + > * Merge branch 'resolve-1257' + > * :memo: changelog + > * revert, and just use < v43.0.0 + > * set paramiko logging to INFO + > * fix: supress paramiko warnings + > * set: propagate 0, logging config + > * set: paramiko logging to ERROR + > * remove downloading tool json + > * add: default ocrd-all-tool.json + > * download tool json if missing + > * Merge branch 'master' into resolve-1257 + > * load tool json locally + +### [dinglehopper](https://github.com/qurator-spk/dinglehopper) [129e6eb](https://github.com/qurator-spk/dinglehopper/commits/129e6eb)..[071e6a8](https://github.com/qurator-spk/dinglehopper/commits/071e6a8) + +> Release: [v0.9.7](https://github.com/qurator-spk/dinglehopper/releases/v0.9.7) + + > * Merge pull request #120 from joschrew/dockerfile + > * Merge pull request #113 from qurator-spk/python-3.13 + > * โœ” pre-commit: Add license check + > * ๐Ÿ› Fix --version option in OCR-D CLI + > * โœจ Support --version option in CLI + > * โš™ pyproject.toml: Add license.file + > * โš™ pre-commit: Update hooks + +### [docstruct](https://github.com/bertsky/docstruct) [a7ffdda](https://github.com/bertsky/docstruct/commits/a7ffdda)..[004e6ec](https://github.com/bertsky/docstruct/commits/004e6ec) + + > * add GHA CD via Dockerhub + +Submodule eynollah 032a99e...51f6ef6: + > * Merge pull request #137 from qurator-spk/dockerfile + > * Merge pull request #132 from qurator-spk/extracting_images_only + > * Merge pull request #133 from qurator-spk/src-layout + > * :package: v0.3.1 + > * :memo: changelog + > * Merge pull request #129 from qurator-spk/resolving_issue_106 + > * update Makefile model location + > * update pyproject.toml for v0.3.1 + > * update pyproject.toml + > * Update README.md + > * rename GH action + > * create draft pyproject.toml + > * format options table + > * Update README.md + > * improve huggingface url + > * remove CircleCI + > * Update model download url + > * Merge pull request #127 from bertsky/new-namespace-pkg + > * update GitHub actions + > * Update README.md + > * update supported Python+Tensorflow version combinations + > * pin tf2 version to 2.12.1 + > * use tf1 compatibility for keras backend + < adapt to OcrdFile.local_filename now :Path + < adapt to ocrd>=2.54 url vs local_filename + > * comment unnecessary print commands + > * add supported OS to readme + > * filtering separators in a correct way without missing them + > * Merge pull request #117 from qurator-spk/tf-2.12-or-greater + > * apply missed commit #a56988a back + > * Merge pull request #116 from qurator-spk/fix-typos + > * Merge pull request #113 from qurator-spk/tf_<2.12.0 + > * Update citation + > * Update bibtex entry + > * format citation info as bibtex + > * add HIP'23 paper reference + > * Merge pull request #109 from bertsky/patch-3 + > * Merge pull request #105 from bertsky/fix-model-archive-path + < Revert "Merge pull request #97 from qurator-spk/420-namespace-package" + > * Merge pull request #104 from bertsky/reinstate-namespace-pkg + > * Merge pull request #102 from qurator-spk/right2left_reading_order + > * delete printing resized image shape + > * issue #67 solved + > * improve links to GT guidelines + > * Update README.md + > * Update CHANGELOG.md + > * Update ocrd-tool.json + > * Merge pull request #86 from qurator-spk/eynollah_light + +### [nmalign](https://github.com/bertsky/nmalign) [7832c90](https://github.com/bertsky/nmalign/commits/7832c90)..[1426dbc](https://github.com/bertsky/nmalign/commits/1426dbc) + +> Release: [v0.0.3](https://github.com/bertsky/nmalign/releases/v0.0.3) + + > * fix dockerfile + > * add GHA CD via Dockerhub + +### [ocrd_calamari](https://github.com/OCR-D/ocrd_calamari) [caac953](https://github.com/OCR-D/ocrd_calamari/commits/caac953)..[d9cde1f](https://github.com/OCR-D/ocrd_calamari/commits/d9cde1f) + +> Release: [v1.0.6](https://github.com/OCR-D/ocrd_calamari/releases/v1.0.6) + + > * Merge pull request #116 from bertsky/limit-batch-size + > * Merge pull request #115 from OCR-D/pyproject_toml + > * โœ” CircleCI: Don't test on Python 3.7 anymore + > * โš™ pre-commit: Update hooks + > * ๐Ÿงน ruff: Update config + > * ๐ŸŽจ Reformat (Black) + > * ๐Ÿงน Update OCR-D API usage w.r.t. workspace.add_file() + > * โš™ pre-commit: Update hooks + +### [ocrd_cis](https://github.com/cisocrgroup/ocrd_cis) [38ce45b](https://github.com/cisocrgroup/ocrd_cis/commits/38ce45b)..[db65d7f](https://github.com/cisocrgroup/ocrd_cis/commits/db65d7f) + +> Release: [v0.1.5](https://github.com/cisocrgroup/ocrd_cis/releases/v0.1.5) + + > * Merge pull request #6 from MehmedGIT/fix-align-import-levenshtein + > * segment: adapt to numpy deprecation + > * Updated config.yml + > * CI: try testing in parallel + > * test_lib.bash: update GT Github URL + > * fix+update dockerfile + > * recognize: replace python-levenshtein with rapidfuzz + > * fix 53ae7d69 (already str not bytes) + > * use importlib instead of pkg_resources via ocrd_utils + > * docker: adapt to core using /build already + > * ocrd-tool.json: add Ocropy default model resources + > * resegment: fix 2 edge cases + +### [ocrd_detectron2](https://github.com/bertsky/ocrd_detectron2) [1f56273](https://github.com/bertsky/ocrd_detectron2/commits/1f56273)..[218e0b5](https://github.com/bertsky/ocrd_detectron2/commits/218e0b5) + +> Release: [v0.1.8](https://github.com/bertsky/ocrd_detectron2/releases/v0.1.8) + + > * CI: out py3.7, in py3.10 + > * CD: publish to ocrd/detectron2 instead of bertsky/ocrd_detectron2 + > * Docker: rebase to ocrd/core-cuda-torch + > * update from pillow deprecations + > * make deps: defer to detectron2 master (and always build from src) + +### [ocrd_doxa](https://github.com/bertsky/ocrd_doxa) [a95f8e7](https://github.com/bertsky/ocrd_doxa/commits/a95f8e7)..[15e8423](https://github.com/bertsky/ocrd_doxa/commits/15e8423) + +> Release: [v0.0.2](https://github.com/bertsky/ocrd_doxa/releases/v0.0.2) + + > * add DH badge + > * fix GHA username + > * fix GHA syntax + > * add Dockerhub CD + > * Merge pull request #5 from joschrew/dockerfile + +### [ocrd_fileformat](https://github.com/OCR-D/ocrd_fileformat) [ff46bd7](https://github.com/OCR-D/ocrd_fileformat/commits/ff46bd7)..[8ab078d](https://github.com/OCR-D/ocrd_fileformat/commits/8ab078d) + +> Release: [v0.11.1](https://github.com/OCR-D/ocrd_fileformat/releases/v0.11.1) + + > * update ocr-fileformat to include UB-Mannheim/ocr-fileformat#188 + > * test_convert: fix choice of assets (w/ wf. METS) + > * Merge pull request #60 from bertsky/patch-4 + > * Merge pull request #57 from OCR-D/license + +### [ocrd_im6convert](https://github.com/OCR-D/ocrd_im6convert) [be794f6](https://github.com/OCR-D/ocrd_im6convert/commits/be794f6)..[82bd491](https://github.com/OCR-D/ocrd_im6convert/commits/82bd491) + +> Release: [v0.1.1](https://github.com/OCR-D/ocrd_im6convert/releases/v0.1.1) + + > * CD: fix GHCR image tag + > * dockerfile: oops, confused im6convert with fileformat + > * Merge pull request #14 from OCR-D/docker-build-args + +### [ocrd_keraslm](https://github.com/OCR-D/ocrd_keraslm) [ea79b2a](https://github.com/OCR-D/ocrd_keraslm/commits/ea79b2a)..[2c466bd](https://github.com/OCR-D/ocrd_keraslm/commits/2c466bd) + +> Release: [v0.4.3](https://github.com/OCR-D/ocrd_keraslm/releases/v0.4.3) + + > * update assets + > * Revert "test: replace buggy workspace clone with cp -r" + > * test: replace buggy workspace clone with cp -r + > * docker: base on ocrd/core-cuda:2.69 (via env var) + +### [ocrd_olahd_client](https://github.com/OCR-D/ocrd_olahd_client) [6bcbb4b](https://github.com/OCR-D/ocrd_olahd_client/commits/6bcbb4b)..[56c9272](https://github.com/OCR-D/ocrd_olahd_client/commits/56c9272) + +> Release: [v0.0.2](https://github.com/OCR-D/ocrd_olahd_client/releases/v0.0.2) + + > * Merge branch 'dockerfile' + +### [ocrd_segment](https://github.com/OCR-D/ocrd_segment) [de824e9](https://github.com/OCR-D/ocrd_segment/commits/de824e9)..[064b7a8](https://github.com/OCR-D/ocrd_segment/commits/064b7a8) + +> Release: [v0.1.24](https://github.com/OCR-D/ocrd_segment/releases/v0.1.24) + + > * Merge pull request #68 from OCR-D/update-docker-2.67 + +### [ocrd_tesserocr](https://github.com/OCR-D/ocrd_tesserocr) [d23992b](https://github.com/OCR-D/ocrd_tesserocr/commits/d23992b)..[dcbd522](https://github.com/OCR-D/ocrd_tesserocr/commits/dcbd522) + +> Release: [v0.19.1](https://github.com/OCR-D/ocrd_tesserocr/releases/v0.19.1) + + > * Merge pull request #219 from OCR-D/fix-docker + > * GHA CD: forgot ghcr.io prefix + > * GHA CD: lowercase repo name + > * Merge pull request #215 from bertsky/circleci-cd-add-pypi + > * dockerfile: build Tesseract in parallel + > * dockerfile: adapt to weird dockerfile COPY syntax + > * docker: unblock+copy .gitmodules + > * dockerignore: unblock .git + > * dockerfile: copy .git repo as well (so submodule update works) during build + > * makefile: ensure submodules are checked out + > * makefile: fix clean-tesseract + > * Merge pull request #218 from OCR-D/update-docker-2.67 + +### [ocrd_wrap](https://github.com/bertsky/ocrd_wrap) [2cd800d](https://github.com/bertsky/ocrd_wrap/commits/2cd800d)..[fd4a2bc](https://github.com/bertsky/ocrd_wrap/commits/fd4a2bc) + +> Release: [v0.1.8](https://github.com/bertsky/ocrd_wrap/releases/v0.1.8) + + > * forgot Dockerfile + > * add GHA CD via Dockerhub + +### [opencv-python](https://github.com/skvark/opencv-python) [cce7c99](https://github.com/skvark/opencv-python/commits/cce7c99)..[6a181ce](https://github.com/skvark/opencv-python/commits/6a181ce) + +> Release: [84](https://github.com/skvark/opencv-python/releases/84) + + > * Merge pull request #1015 from Gornoka:patch-1 + +### [sbb_binarization](https://github.com/qurator-spk/sbb_binarization) [978f425](https://github.com/qurator-spk/sbb_binarization/commits/978f425)..[d259795](https://github.com/qurator-spk/sbb_binarization/commits/d259795) + +> Release: [v0.1.0](https://github.com/qurator-spk/sbb_binarization/releases/v0.1.0) + + > * docker: rebase on core-cuda stage + > * forgot to include package data + > * dockerfile: switch to pyproject.toml + > * relax TF requirement + > * CI: remove py37 from matrix + > * remove setup.py + > * add pyproject.toml + > * make docker: fix docker tag + > * remove shebang from setup.py (somehow breaking py39) + > * CI: increase memory on VM + > * make install: update setuptools, too + > * add GHA CD via Dockerhub + +### [workflow-configuration](https://github.com/bertsky/workflow-configuration) [eeea260](https://github.com/bertsky/workflow-configuration/commits/eeea260)..[63e9969](https://github.com/bertsky/workflow-configuration/commits/63e9969) + +> Release: [0.1.3](https://github.com/bertsky/workflow-configuration/releases/0.1.3) + + > * Adapt dockerimage to ocrd-d-core changes + > * :memo: update readme + > * new pair of XSLTs: un/flatten text regions in arbitrary regions + > * use mkdir -p when creating SHAREDIR + ## [v2024-07-17](https://github.com/OCR-D/ocrd_all/releases/v2024-07-17) Changes: diff --git a/Dockerfile b/Dockerfile index c4b6e91..73df1c2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -59,7 +59,7 @@ ENV TF_FORCE_GPU_ALLOW_GROWTH=true # allow passing build-time parameter for list of tools to be installed # (defaults to medium, which also requires those modules to be present) -ARG OCRD_MODULES="core dinglehopper format-converters ocrd_calamari ocrd_cis ocrd_im6convert ocrd_keraslm ocrd_olena ocrd_segment ocrd_tesserocr tesseract tesserocr cor-asv-ann workflow-configuration" +ARG OCRD_MODULES="core cor-asv-ann dinglehopper docstruct format-converters nmalign ocrd_calamari ocrd_cis ocrd_fileformat ocrd_im6convert ocrd_keraslm ocrd_olahd_client ocrd_olena ocrd_pagetopdf ocrd_repair_inconsistencies ocrd_segment ocrd_tesserocr ocrd_wrap workflow-configuration" # persist that variable for build-time make to pick it up and run-time users to know what to expect ENV OCRD_MODULES="${OCRD_MODULES}" @@ -83,9 +83,6 @@ RUN echo "Acquire::http::Timeout \"3000\";" >> /etc/apt/apt.conf.d/99network && WORKDIR /build -# create virtual environment -RUN rm $VIRTUAL_ENV/bin/pip* && apt-get purge -y python3-pip && python3 -m venv $VIRTUAL_ENV && python3 -m pip install --force pip - # from-stage already contains a clone clashing with build context RUN rm -rf /build/core/.git diff --git a/Makefile b/Makefile index bd109da..ed8b3d6 100644 --- a/Makefile +++ b/Makefile @@ -210,8 +210,8 @@ $(BIN)/pip: $(ACTIVATE_VENV) . $(ACTIVATE_VENV) && $(SEMPIP) pip install --upgrade pip setuptools %/bin/activate: - $(PYTHON) -m venv $(subst /bin/activate,,$@) - . $@ && pip install --upgrade pip setuptools wheel + $(SEMPIP) $(PYTHON) -m venv $(subst /bin/activate,,$@) + . $@ && $(SEMPIP) pip install --upgrade pip setuptools wheel .PHONY: wheel wheel: $(BIN)/wheel @@ -288,13 +288,20 @@ endif endif ifneq ($(filter ocrd_detectron2, $(OCRD_MODULES)),) -# ocrd_detectron patches detectron2 until there is a new detectron2 release. -# See https://github.com/facebookresearch/detectron2/pull/5011 for details. -CUSTOM_DEPS += patch OCRD_EXECUTABLES += $(OCRD_DETECTRON2) OCRD_DETECTRON2 := $(BIN)/ocrd-detectron2-segment -$(call multirule,$(OCRD_DETECTRON2)): ocrd_detectron2 $(BIN)/ocrd | $(OCRD_KRAKEN) +$(OCRD_DETECTRON2): ocrd_detectron2 $(BIN)/ocrd | $(OCRD_KRAKEN) . $(ACTIVATE_VENV) && $(MAKE) -C $< deps + # pre-empt conflict around typing-extensions + . $(ACTIVATE_VENV) && $(SEMPIP) pip install -i https://download.pytorch.org/whl/cpu torchvision==0.16.2 torch==2.1.2 + $(pip_install) +endif + +ifneq ($(filter ocrd_page2alto, $(OCRD_MODULES)),) +OCRD_EXECUTABLES += $(OCRD_PAGE_TO_ALTO) +OCRD_PAGE_TO_ALTO := $(BIN)/ocrd-page2alto-transform +OCRD_PAGE_TO_ALTO += $(BIN)/page-to-alto +$(call multirule,$(OCRD_PAGE_TO_ALTO)): ocrd_page2alto $(BIN)/ocrd $(pip_install) endif @@ -539,8 +546,6 @@ install-models-calamari: $(BIN)/ocrd OCRD_EXECUTABLES += $(OCRD_CALAMARI) OCRD_CALAMARI := $(BIN)/ocrd-calamari-recognize $(OCRD_CALAMARI): ocrd_calamari $(BIN)/ocrd - @# workaround for Calamari#337: - . $(ACTIVATE_VENV) && $(SEMPIP) pip install "protobuf<4" $(pip_install) endif @@ -597,6 +602,8 @@ SBB_BINARIZATION := $(BIN)/ocrd-sbb-binarize SBB_BINARIZATION += $(BIN)/sbb_binarize $(call multirule,$(SBB_BINARIZATION)): sbb_binarization $(BIN)/ocrd $(pip_install) + # work around #67 - switch to version pinned by eynollah: + . $(ACTIVATE_VENV) && $(SEMPIP) pip install "tensorflow==2.12.1" endif ifneq ($(filter eynollah, $(OCRD_MODULES)),) @@ -608,6 +615,8 @@ OCRD_EXECUTABLES += $(EYNOLLAH_SEGMENT) EYNOLLAH_SEGMENT := $(BIN)/ocrd-eynollah-segment $(EYNOLLAH_SEGMENT): eynollah $(BIN)/ocrd $(pip_install) + # solve conflict with ocrd_calamari: + . $(ACTIVATE_VENV) && $(SEMPIP) pip install "protobuf<4" endif ifneq ($(filter ocrd_repair_inconsistencies, $(OCRD_MODULES)),) @@ -815,7 +824,8 @@ deps-cuda: core $(ACTIVATE_VENV) tf1nvidia: $(ACTIVATE_VENV) $(pip_install_tf1nvidia) -# post-fix workaround for clash between cuDNN of Tensorflow 2.12 (โ†’8.6) and Pytorch 1.13 (โ†’8.5) +# post-fix workaround for clash between cuDNN of Tensorflow 2.12 (โ†’8.6) and Pytorch 1.13 (โ†’8.5) / 2.1 (8.7) +# (which also involves conflict around typing-extensions version) # the latter is explicit (but unnecessary), the former is implicit (and causes "DNN library not found" crashes at runtime) # so we have three potential options: # 1. revert to the version required by TF after pip overruled our choice via Torch dependency @@ -824,9 +834,13 @@ tf1nvidia: $(ACTIVATE_VENV) # pip3 install "tensorflow<2.12" # 3. upgrade Torch so there is no overt conflict # pip install "torch>=2.0" -# Since ATM we don't know whether Torch 2.x will work everywhere, we opt for 2: +# Since ATM we already need TF 2.12, we choose for (modified) option 3: fix-cuda: $(ACTIVATE_VENV) - . $(ACTIVATE_VENV) && $(SEMPIP) pip install "tensorflow<2.12" + . $(ACTIVATE_VENV) && $(SEMPIP) pip install -i https://download.pytorch.org/whl/cu118 torchvision==0.16.2+cu118 torch==2.1.2+cu118 +# displace CUDA 12 libs pulled via Pytorch from PyPI + if test -d $(SUB_VENV_TF1); then \ + . $(SUB_VENV_TF1)/bin/activate && pip install -i https://download.pytorch.org/whl/cu118 torchvision==0.16.2+cu118 torch==2.1.2+cu118; \ + fi .PHONY: deps-cuda tf1nvidia fix-cuda @@ -847,10 +861,12 @@ dockers: docker-minimum docker-minimum-cuda docker-medium docker-medium-cuda doc docker-%: PIP_OPTIONS = -e # Minimum-size selection: use Ocropy binarization, use Tesseract from git -docker-mini%: DOCKER_MODULES := core ocrd_cis ocrd_fileformat ocrd_im6convert ocrd_pagetopdf ocrd_repair_inconsistencies ocrd_tesserocr ocrd_wrap workflow-configuration ocrd_olahd_client +DOCKER_MODULES_MINI := core ocrd_cis ocrd_fileformat ocrd_im6convert ocrd_olahd_client ocrd_page2alto ocrd_pagetopdf ocrd_repair_inconsistencies ocrd_tesserocr ocrd_wrap workflow-configuration +docker-mini%: DOCKER_MODULES := $(DOCKER_MODULES_MINI) # Medium-size selection: add Olena binarization and Calamari, add evaluation -docker-medi%: DOCKER_MODULES := core cor-asv-ann dinglehopper docstruct format-converters nmalign ocrd_calamari ocrd_cis ocrd_fileformat ocrd_im6convert ocrd_keraslm ocrd_olahd_client ocrd_olena ocrd_pagetopdf ocrd_repair_inconsistencies ocrd_segment ocrd_tesserocr ocrd_wrap workflow-configuration -# Maximum-size selection: use all modules +DOCKER_MODULES_MEDI := $(DOCKER_MODULES_MINI) cor-asv-ann dinglehopper docstruct format-converters nmalign ocrd_calamari ocrd_keraslm ocrd_olena ocrd_segment +docker-medi%: DOCKER_MODULES := $(DOCKER_MODULES_MEDI) +# Maximum-size selection: use all enabled modules docker-maxi%: DOCKER_MODULES := $(OCRD_MODULES) # DOCKER_BASE_IMAGE diff --git a/core b/core index 92b217e..85bde15 160000 --- a/core +++ b/core @@ -1 +1 @@ -Subproject commit 92b217e36b68891724efe030542378ca995e4178 +Subproject commit 85bde1574293ea8b7ba29255fbb8e07312c28eb1 diff --git a/dinglehopper b/dinglehopper index 129e6eb..071e6a8 160000 --- a/dinglehopper +++ b/dinglehopper @@ -1 +1 @@ -Subproject commit 129e6eb427b0d5d306f76c7f443ee7cd08e83495 +Subproject commit 071e6a8bd14b47dd65c21a6acaa278677b0f352b diff --git a/docstruct b/docstruct index a7ffdda..004e6ec 160000 --- a/docstruct +++ b/docstruct @@ -1 +1 @@ -Subproject commit a7ffdda68a4c9c4e0b0494e7b0f865d92297ac30 +Subproject commit 004e6ecf67860ae06c174bb1b349843632147aa2 diff --git a/eynollah b/eynollah index 032a99e..51f6ef6 160000 --- a/eynollah +++ b/eynollah @@ -1 +1 @@ -Subproject commit 032a99ef11b19d8cb97566a2ce086878544991a0 +Subproject commit 51f6ef63f5734ccb62447ad4c74f1336c0371bac diff --git a/nmalign b/nmalign index 7832c90..1426dbc 160000 --- a/nmalign +++ b/nmalign @@ -1 +1 @@ -Subproject commit 7832c9032a8ee5bee8104b80a8fa423e331ae728 +Subproject commit 1426dbc00c100118228bf6718fc746022845e029 diff --git a/ocrd_calamari b/ocrd_calamari index caac953..d9cde1f 160000 --- a/ocrd_calamari +++ b/ocrd_calamari @@ -1 +1 @@ -Subproject commit caac9532213f4685f5c4b738965c7b43fef04ccf +Subproject commit d9cde1f5c8b2957dae4fddb0df9b7188ebff1037 diff --git a/ocrd_cis b/ocrd_cis index 38ce45b..db65d7f 160000 --- a/ocrd_cis +++ b/ocrd_cis @@ -1 +1 @@ -Subproject commit 38ce45bf016546b748cce65031cad3fe24a35c0d +Subproject commit db65d7fbea89d9fe8055eddb302eeb6c7294159e diff --git a/ocrd_detectron2 b/ocrd_detectron2 index 1f56273..218e0b5 160000 --- a/ocrd_detectron2 +++ b/ocrd_detectron2 @@ -1 +1 @@ -Subproject commit 1f56273d08fe098ac8b3f606c0a19927f8425225 +Subproject commit 218e0b5db7bf50880a22e1b0835f107a1c26c182 diff --git a/ocrd_doxa b/ocrd_doxa index a95f8e7..15e8423 160000 --- a/ocrd_doxa +++ b/ocrd_doxa @@ -1 +1 @@ -Subproject commit a95f8e77886c9860101392d088742ca0af277945 +Subproject commit 15e84239eb67ea89e827de3ec20ab64646b577c3 diff --git a/ocrd_fileformat b/ocrd_fileformat index ff46bd7..8ab078d 160000 --- a/ocrd_fileformat +++ b/ocrd_fileformat @@ -1 +1 @@ -Subproject commit ff46bd7862096904c97cd612301a299d727ffd3f +Subproject commit 8ab078dee246b0715db40a15ef7d288668febdf2 diff --git a/ocrd_im6convert b/ocrd_im6convert index be794f6..82bd491 160000 --- a/ocrd_im6convert +++ b/ocrd_im6convert @@ -1 +1 @@ -Subproject commit be794f67a2117ac3f8bcf42ce9eb496afbe33573 +Subproject commit 82bd49189e7c3c512059e4222ad7062e5a34f075 diff --git a/ocrd_keraslm b/ocrd_keraslm index ea79b2a..2c466bd 160000 --- a/ocrd_keraslm +++ b/ocrd_keraslm @@ -1 +1 @@ -Subproject commit ea79b2ab495c1d7ab3db678be27c89965b76e3b0 +Subproject commit 2c466bd6c07df5e6883672fdcbe5f48c0c3881e2 diff --git a/ocrd_olahd_client b/ocrd_olahd_client index 6bcbb4b..56c9272 160000 --- a/ocrd_olahd_client +++ b/ocrd_olahd_client @@ -1 +1 @@ -Subproject commit 6bcbb4bbb6847e581bdb84aa1c2c32b632d083f2 +Subproject commit 56c927282b3217e0eae2a41695974a9e94f43be8 diff --git a/ocrd_page2alto b/ocrd_page2alto new file mode 160000 index 0000000..8877e8f --- /dev/null +++ b/ocrd_page2alto @@ -0,0 +1 @@ +Subproject commit 8877e8f050df76448ff99bf27f43994b30f894ba diff --git a/ocrd_segment b/ocrd_segment index de824e9..064b7a8 160000 --- a/ocrd_segment +++ b/ocrd_segment @@ -1 +1 @@ -Subproject commit de824e9d5bb9a56ac253b7c6dd7d7c012cdddc64 +Subproject commit 064b7a86aff30677655c00af31c43b5b68c95fcd diff --git a/ocrd_tesserocr b/ocrd_tesserocr index d23992b..dcbd522 160000 --- a/ocrd_tesserocr +++ b/ocrd_tesserocr @@ -1 +1 @@ -Subproject commit d23992b0068e6fa9612cddcfe106628461538442 +Subproject commit dcbd5227f834c219e36d5dc41606a08e85e67a15 diff --git a/ocrd_wrap b/ocrd_wrap index 2cd800d..fd4a2bc 160000 --- a/ocrd_wrap +++ b/ocrd_wrap @@ -1 +1 @@ -Subproject commit 2cd800d9eccbc084751558a87972ac22ee60e87a +Subproject commit fd4a2bcab2dfd18f831b0e101763b3438afe1c8b diff --git a/opencv-python b/opencv-python index cce7c99..6a181ce 160000 --- a/opencv-python +++ b/opencv-python @@ -1 +1 @@ -Subproject commit cce7c994d46406205eb39300bb7ca9c48d80185a +Subproject commit 6a181cedae5f23e332ae87d2e90d36635932855e diff --git a/sbb_binarization b/sbb_binarization index 978f425..d259795 160000 --- a/sbb_binarization +++ b/sbb_binarization @@ -1 +1 @@ -Subproject commit 978f425bd154458e92888d2a974fe759bb3a5c06 +Subproject commit d2597959bab3e1c6910775b506327be1c5a8f256 diff --git a/workflow-configuration b/workflow-configuration index eeea260..63e9969 160000 --- a/workflow-configuration +++ b/workflow-configuration @@ -1 +1 @@ -Subproject commit eeea2609a8d5415537a5e26d4d662e1f24d7f3c2 +Subproject commit 63e996995238a4e1f60204b0039ba4dfcdc56880