From 4317224318b6b78678f3a2fe68d547fb1cf79c2a Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Fri, 24 May 2024 12:41:48 +0000 Subject: [PATCH 01/14] docker: build maxi/medi/mini/core on top of each other (staged) --- Makefile | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 9f2cbaa9..1fea97e9 100644 --- a/Makefile +++ b/Makefile @@ -832,6 +832,7 @@ fix-cuda: $(ACTIVATE_VENV) # Docker builds. DOCKER_TAG ?= ocrd/all +DOCKER_BASE_IMAGE ?= ocrd/core:$(CORE_VERSION) # Several predefined selections # (note: to arrive at smallest possible image size individually, @@ -845,16 +846,25 @@ dockers: docker-minimum docker-minimum-cuda docker-medium docker-medium-cuda doc docker-%: PIP_OPTIONS = -e # Minimum-size selection: use Ocropy binarization, use Tesseract from git -docker-mini%: DOCKER_MODULES := core ocrd_cis ocrd_fileformat ocrd_im6convert ocrd_pagetopdf ocrd_repair_inconsistencies ocrd_tesserocr ocrd_wrap workflow-configuration ocrd_olahd_client +docker-mini%: DOCKER_MODULES := ocrd_cis ocrd_fileformat ocrd_im6convert ocrd_pagetopdf ocrd_repair_inconsistencies ocrd_tesserocr ocrd_wrap workflow-configuration ocrd_olahd_client # Medium-size selection: add Olena binarization and Calamari, add evaluation -docker-medi%: DOCKER_MODULES := core cor-asv-ann dinglehopper docstruct format-converters nmalign ocrd_calamari ocrd_cis ocrd_fileformat ocrd_im6convert ocrd_keraslm ocrd_olahd_client ocrd_olena ocrd_pagetopdf ocrd_repair_inconsistencies ocrd_segment ocrd_tesserocr ocrd_wrap workflow-configuration +docker-medi%: DOCKER_MODULES := cor-asv-ann dinglehopper docstruct format-converters nmalign ocrd_calamari ocrd_cis ocrd_fileformat ocrd_im6convert ocrd_keraslm ocrd_olahd_client ocrd_olena ocrd_pagetopdf ocrd_repair_inconsistencies ocrd_segment ocrd_tesserocr ocrd_wrap workflow-configuration # Maximum-size selection: use all modules docker-maxi%: DOCKER_MODULES := $(OCRD_MODULES) # DOCKER_BASE_IMAGE -docker-%um: DOCKER_BASE_IMAGE = docker.io/ocrd/core:$(CORE_VERSION) +docker-minimum: DOCKER_BASE_IMAGE = ocrd/core:$(CORE_VERSION) +docker-medium: DOCKER_BASE_IMAGE = $(DOCKER_TAG):minimum +docker-maximum: DOCKER_BASE_IMAGE = $(DOCKER_TAG):medium # CUDA variants -docker-%-cuda: DOCKER_BASE_IMAGE = docker.io/ocrd/core-cuda:$(CORE_VERSION) +docker-minimum-cuda: DOCKER_BASE_IMAGE = ocrd/core-cuda:$(CORE_VERSION) +docker-medium-cuda: DOCKER_BASE_IMAGE = $(DOCKER_TAG):minimum-cuda +docker-maximum-cuda: DOCKER_BASE_IMAGE = $(DOCKER_TAG):medium-cuda +# explicit interdependencies +docker-medium: docker-minimum +docker-maximum: docker-medium +docker-medium-cuda: docker-minimum-cuda +docker-maximum-cuda: docker-medium-cuda # Build rule for all selections # FIXME: $(DOCKER_MODULES) ref does not work at phase 1; workaround: all modules From 7d1f25f3f462c2a84ed03d39caeb6726e6a583d4 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Fri, 24 May 2024 12:42:17 +0000 Subject: [PATCH 02/14] ocrd_segment: move to top-level venv --- Makefile | 8 -------- 1 file changed, 8 deletions(-) diff --git a/Makefile b/Makefile index 1fea97e9..1a1c0031 100644 --- a/Makefile +++ b/Makefile @@ -441,16 +441,8 @@ OCRD_SEGMENT += $(BIN)/ocrd-segment-replace-text OCRD_SEGMENT += $(BIN)/ocrd-segment-repair OCRD_SEGMENT += $(BIN)/ocrd-segment-project $(call multirule,$(OCRD_SEGMENT)): ocrd_segment $(BIN)/ocrd -ifeq (0,$(MAKELEVEL)) - $(MAKE) -o $< $(notdir $(OCRD_SEGMENT)) VIRTUAL_ENV=$(SUB_VENV_TF1) - $(call delegate_venv,$(OCRD_SEGMENT),$(SUB_VENV_TF1)) -ocrd_segment-check: - $(MAKE) check OCRD_MODULES=ocrd_segment VIRTUAL_ENV=$(SUB_VENV_TF1) -else - $(pip_install_tf1nvidia) $(pip_install) endif -endif ifneq ($(filter ocrd_tesserocr, $(OCRD_MODULES)),) ocrd_tesserocr: GIT_RECURSIVE = --recursive From 36462ca5ae68638318b6a46644d730dbfa43b823 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Fri, 24 May 2024 12:42:52 +0000 Subject: [PATCH 03/14] ocrd_kraken: move to sub-venv (shapely v1) --- Makefile | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Makefile b/Makefile index 1a1c0031..e4453bad 100644 --- a/Makefile +++ b/Makefile @@ -276,8 +276,16 @@ OCRD_KRAKEN := $(BIN)/ocrd-kraken-binarize OCRD_KRAKEN += $(BIN)/ocrd-kraken-segment OCRD_KRAKEN += $(BIN)/ocrd-kraken-recognize $(call multirule,$(OCRD_KRAKEN)): ocrd_kraken $(BIN)/ocrd +# now needs to be in sub-venv because shapely<2 clashes with shapely>=2 in other modules +ifeq (0,$(MAKELEVEL)) + $(MAKE) -o $< $(notdir $(OCRD_KRAKEN)) VIRTUAL_ENV=$(SUB_VENV_TF1) + $(call delegate_venv,$(OCRD_KRAKEN),$(SUB_VENV_TF1)) +ocrd_kraken-check: + $(MAKE) check OCRD_MODULES=ocrd_kraken VIRTUAL_ENV=$(SUB_VENV_TF1) +else $(pip_install) endif +endif ifneq ($(filter ocrd_detectron2, $(OCRD_MODULES)),) # ocrd_detectron patches detectron2 until there is a new detectron2 release. From bebc2756e65b32e28f95199e4feb8a28fe0d9366 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Fri, 24 May 2024 12:43:18 +0000 Subject: [PATCH 04/14] ocrd_detectron2: disable in py311 onwards --- Makefile | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Makefile b/Makefile index e4453bad..b848046c 100644 --- a/Makefile +++ b/Makefile @@ -76,11 +76,17 @@ ifneq ($(PYTHON_VERSION),3.8) DEFAULT_DISABLED_MODULES += cor-asv-ann ocrd_keraslm endif endif +ifeq ($(PYTHON_VERSION),3.11) +# Detectron2 relies on Pytorch 1 which still uses pkg_resources +DEFAULT_DISABLED_MODULES += ocrd_detectron2 +endif ifeq ($(PYTHON_VERSION),3.12) # The required tensorflow is not available for Python 3.12. DEFAULT_DISABLED_MODULES += eynollah ocrd_anybaseocr ocrd_calamari sbb_binarization # The required coremltools does not support Python 3.12. DEFAULT_DISABLED_MODULES += ocrd_kraken +# Detectron2 relies on Pytorch 1 which still uses pkg_resources +DEFAULT_DISABLED_MODULES += ocrd_detectron2 endif ifeq ($(shell uname -s),Darwin) # Disable ocrd_olena for macOS because build is broken. From 528a5ea3e8f368ac99cacd271bc97c2afcfaa152 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Fri, 24 May 2024 12:44:06 +0000 Subject: [PATCH 05/14] update core (for editable docker build) --- core | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core b/core index c5b5580b..cc6ea575 160000 --- a/core +++ b/core @@ -1 +1 @@ -Subproject commit c5b5580ba5c517e6ee151a5b3a1d8fe1b3ba0d88 +Subproject commit cc6ea575f18cb89732d9a19ab12544ffe539fde3 From 32ebd37ae327548e8f1b052f1e81294d3c44b036 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Fri, 24 May 2024 12:53:07 +0000 Subject: [PATCH 06/14] update ocrd_anybaseocr#101 --- ocrd_anybaseocr | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocrd_anybaseocr b/ocrd_anybaseocr index eb6ea8ea..3459b419 160000 --- a/ocrd_anybaseocr +++ b/ocrd_anybaseocr @@ -1 +1 @@ -Subproject commit eb6ea8ea62c6577a1a126413e09bf1bbbec2a833 +Subproject commit 3459b41989589f4767c958ca354d4c2cba7604ad From 7a58f78f51640603cfdf38ee084df11832546216 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Fri, 24 May 2024 12:53:26 +0000 Subject: [PATCH 07/14] update ocrd_fileformat#52 --- ocrd_fileformat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocrd_fileformat b/ocrd_fileformat index ba79de91..899a7cf2 160000 --- a/ocrd_fileformat +++ b/ocrd_fileformat @@ -1 +1 @@ -Subproject commit ba79de915577107c0a2d1a89f74b5f65f730dc70 +Subproject commit 899a7cf2f857696a99ebc0a70816ca1132af6520 From 58f1f322a0c77347f70df52fd23602428a37f2fc Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Fri, 24 May 2024 12:55:53 +0000 Subject: [PATCH 08/14] update submodules --- dinglehopper | 2 +- ocrd_cis | 2 +- ocrd_detectron2 | 2 +- ocrd_froc | 2 +- ocrd_keraslm | 2 +- ocrd_segment | 2 +- ocrd_tesserocr | 2 +- sbb_binarization | 2 +- workflow-configuration | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/dinglehopper b/dinglehopper index f8e31089..bc5818da 160000 --- a/dinglehopper +++ b/dinglehopper @@ -1 +1 @@ -Subproject commit f8e31089b3e7f8db7d5ef8648aa99015b3fba45c +Subproject commit bc5818da9f9d0ae44fcc7580ed458eb8a900be89 diff --git a/ocrd_cis b/ocrd_cis index 1d2e858d..1abc3b7b 160000 --- a/ocrd_cis +++ b/ocrd_cis @@ -1 +1 @@ -Subproject commit 1d2e858d2bb2a5f1d64bf761b81b4173946781b9 +Subproject commit 1abc3b7b617b1c342908e6b69f6e706a14fc666f diff --git a/ocrd_detectron2 b/ocrd_detectron2 index a8402d8e..1f56273d 160000 --- a/ocrd_detectron2 +++ b/ocrd_detectron2 @@ -1 +1 @@ -Subproject commit a8402d8e58570fdefb80bdf44cfc4aea8f914d4c +Subproject commit 1f56273d08fe098ac8b3f606c0a19927f8425225 diff --git a/ocrd_froc b/ocrd_froc index 45d5dcde..42f1ce0e 160000 --- a/ocrd_froc +++ b/ocrd_froc @@ -1 +1 @@ -Subproject commit 45d5dcdefe156becb74c100faa7f722966936d3a +Subproject commit 42f1ce0e369d22948f330148035ff02f9866b806 diff --git a/ocrd_keraslm b/ocrd_keraslm index 472197f8..ea79b2ab 160000 --- a/ocrd_keraslm +++ b/ocrd_keraslm @@ -1 +1 @@ -Subproject commit 472197f8f6bbbba0c55d14da5786657cd2e98322 +Subproject commit ea79b2ab495c1d7ab3db678be27c89965b76e3b0 diff --git a/ocrd_segment b/ocrd_segment index 39931391..de824e9d 160000 --- a/ocrd_segment +++ b/ocrd_segment @@ -1 +1 @@ -Subproject commit 3993139175ed986aa4c3f4e914d028340c6004d3 +Subproject commit de824e9d5bb9a56ac253b7c6dd7d7c012cdddc64 diff --git a/ocrd_tesserocr b/ocrd_tesserocr index ed73d966..75a782da 160000 --- a/ocrd_tesserocr +++ b/ocrd_tesserocr @@ -1 +1 @@ -Subproject commit ed73d9665258e2a2a05c9589d27697bc1beaa065 +Subproject commit 75a782dacb3195313a5628ccb9eb024d43240bfc diff --git a/sbb_binarization b/sbb_binarization index b89ec490..978f425b 160000 --- a/sbb_binarization +++ b/sbb_binarization @@ -1 +1 @@ -Subproject commit b89ec490befc56f7b471307561c84dc56a609054 +Subproject commit 978f425bd154458e92888d2a974fe759bb3a5c06 diff --git a/workflow-configuration b/workflow-configuration index bd149f83..e39fdafd 160000 --- a/workflow-configuration +++ b/workflow-configuration @@ -1 +1 @@ -Subproject commit bd149f831a6bc5227e2b17f623c33221f307ff78 +Subproject commit e39fdafda3cb5923e2c892390b7b345dd554ec36 From ec3402090fdcc060b3333f0619a4991c092f659e Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Fri, 24 May 2024 14:11:31 +0000 Subject: [PATCH 09/14] CircleCI: adapt to staged dockers --- .circleci/config.yml | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index fb273a20..1a5beec3 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -52,6 +52,19 @@ jobs: command: | docker push ocrd/all:<< parameters.variant >> docker push ocrd/all:<< parameters.variant >>-git + - when: + condition: + equal: [ maximum, << parameters.variant >> ] + steps: + - run: + name: Alias and push intermediate variants + command: | + docker tag ocrd/all:medium ocrd/all:medium-git + docker tag ocrd/all:minimum ocrd/all:minimum-git + docker push ocrd/all:minimum + docker push ocrd/all:minimum-git + docker push ocrd/all:medium + docker push ocrd/all:medium-git - when: condition: equal: [ maximum-cuda, << parameters.variant >> ] @@ -69,7 +82,7 @@ workflows: - deploy: matrix: parameters: - variant: [minimum, medium, maximum, maximum-cuda] + variant: [maximum, maximum-cuda] filters: branches: only: master From 6cb25e557b94538148a7a9b45e09dd361a77e62d Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Fri, 24 May 2024 14:11:44 +0000 Subject: [PATCH 10/14] CircleCI: also upload ocrd-all-tool.json --- .circleci/config.yml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index 1a5beec3..75cd0db6 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -10,6 +10,22 @@ jobs: name: build image command: make docker-maximum-cuda GIT_DEPTH=--single-branch no_output_timeout: 30m + - run: + name: store ocrd-all-tool.json + command: | + id=`docker create ocrd/all:maximum-cuda` + docker cp $id:/build/ocrd-all-tool.json . + - store_artifacts: + path: ocrd-all-tool.json + destination: artifacts + - run: + name: store ocrd-all-module-dir.json + command: | + id=`docker create ocrd/all:maximum-cuda` + docker cp $id:/build/ocrd-all-module-dir.json . + - store_artifacts: + path: ocrd-all-module-dir.json + destination: artifacts - when: # takes too long for 1h1m CircleCI timeout overall # also, storage is limited... From c5683cb37a3169ac2ee425e59be231a2b1171873 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> Date: Fri, 24 May 2024 18:22:06 +0200 Subject: [PATCH 11/14] Github makedocker: adapt to newest image --- .github/workflows/makedocker.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/makedocker.yml b/.github/workflows/makedocker.yml index 0d6b3e86..3a94b8ae 100644 --- a/.github/workflows/makedocker.yml +++ b/.github/workflows/makedocker.yml @@ -46,7 +46,6 @@ jobs: df -h docker images docker rmi $(docker images --filter=reference="alpine:*" -q) - docker rmi $(docker images --filter=reference="buildpack-deps:*" -q) docker rmi $(docker images --filter=reference="debian:*" -q) docker rmi $(docker images --filter=reference="node:*" -q) df -h / From edd4e16d590c8ee4efb559578221c6cfe1ebb456 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Fri, 24 May 2024 22:45:57 +0000 Subject: [PATCH 12/14] update ocrd_cis --- ocrd_cis | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocrd_cis b/ocrd_cis index 1abc3b7b..6e95b384 160000 --- a/ocrd_cis +++ b/ocrd_cis @@ -1 +1 @@ -Subproject commit 1abc3b7b617b1c342908e6b69f6e706a14fc666f +Subproject commit 6e95b3847ec5532c039062062129ce3c1c1a6bf7 From 512230daa72de653c44eda64bcef623d072004be Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Fri, 24 May 2024 23:49:34 +0000 Subject: [PATCH 13/14] CircleCI: replace tab with spaces --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 75cd0db6..82b16f71 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -14,7 +14,7 @@ jobs: name: store ocrd-all-tool.json command: | id=`docker create ocrd/all:maximum-cuda` - docker cp $id:/build/ocrd-all-tool.json . + docker cp $id:/build/ocrd-all-tool.json . - store_artifacts: path: ocrd-all-tool.json destination: artifacts From 21e0f88b60a3b38c835d1c027a3421b67fb82eef Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Sat, 25 May 2024 06:35:43 +0000 Subject: [PATCH 14/14] CircleCI: store JSON artifacts in deploy, not in build --- .circleci/config.yml | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 82b16f71..f1fb701f 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -10,22 +10,6 @@ jobs: name: build image command: make docker-maximum-cuda GIT_DEPTH=--single-branch no_output_timeout: 30m - - run: - name: store ocrd-all-tool.json - command: | - id=`docker create ocrd/all:maximum-cuda` - docker cp $id:/build/ocrd-all-tool.json . - - store_artifacts: - path: ocrd-all-tool.json - destination: artifacts - - run: - name: store ocrd-all-module-dir.json - command: | - id=`docker create ocrd/all:maximum-cuda` - docker cp $id:/build/ocrd-all-module-dir.json . - - store_artifacts: - path: ocrd-all-module-dir.json - destination: artifacts - when: # takes too long for 1h1m CircleCI timeout overall # also, storage is limited... @@ -81,6 +65,22 @@ jobs: docker push ocrd/all:minimum-git docker push ocrd/all:medium docker push ocrd/all:medium-git + - run: + name: store ocrd-all-tool.json + command: | + id=`docker create ocrd/all:maximum` + docker cp $id:/build/ocrd-all-tool.json . + - store_artifacts: + path: ocrd-all-tool.json + destination: ocrd-all-tool.json + - run: + name: store ocrd-all-module-dir.json + command: | + id=`docker create ocrd/all:maximum` + docker cp $id:/build/ocrd-all-module-dir.json . + - store_artifacts: + path: ocrd-all-module-dir.json + destination: ocrd-all-module-dir.json - when: condition: equal: [ maximum-cuda, << parameters.variant >> ]