Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Docker build staged #436

Merged
merged 14 commits into from
May 25, 2024
31 changes: 30 additions & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,22 @@ jobs:
name: build image
command: make docker-maximum-cuda GIT_DEPTH=--single-branch
no_output_timeout: 30m
- run:
name: store ocrd-all-tool.json
command: |
id=`docker create ocrd/all:maximum-cuda`
docker cp $id:/build/ocrd-all-tool.json .
- store_artifacts:
path: ocrd-all-tool.json
destination: artifacts
- run:
name: store ocrd-all-module-dir.json
command: |
id=`docker create ocrd/all:maximum-cuda`
docker cp $id:/build/ocrd-all-module-dir.json .
- store_artifacts:
path: ocrd-all-module-dir.json
destination: artifacts
- when:
# takes too long for 1h1m CircleCI timeout overall
# also, storage is limited...
Expand Down Expand Up @@ -52,6 +68,19 @@ jobs:
command: |
docker push ocrd/all:<< parameters.variant >>
docker push ocrd/all:<< parameters.variant >>-git
- when:
condition:
equal: [ maximum, << parameters.variant >> ]
steps:
- run:
name: Alias and push intermediate variants
command: |
docker tag ocrd/all:medium ocrd/all:medium-git
docker tag ocrd/all:minimum ocrd/all:minimum-git
docker push ocrd/all:minimum
docker push ocrd/all:minimum-git
docker push ocrd/all:medium
docker push ocrd/all:medium-git
- when:
condition:
equal: [ maximum-cuda, << parameters.variant >> ]
Expand All @@ -69,7 +98,7 @@ workflows:
- deploy:
matrix:
parameters:
variant: [minimum, medium, maximum, maximum-cuda]
variant: [maximum, maximum-cuda]
filters:
branches:
only: master
Expand Down
1 change: 0 additions & 1 deletion .github/workflows/makedocker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ jobs:
df -h
docker images
docker rmi $(docker images --filter=reference="alpine:*" -q)
docker rmi $(docker images --filter=reference="buildpack-deps:*" -q)
docker rmi $(docker images --filter=reference="debian:*" -q)
docker rmi $(docker images --filter=reference="node:*" -q)
df -h /
Expand Down
40 changes: 28 additions & 12 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,17 @@ ifneq ($(PYTHON_VERSION),3.8)
DEFAULT_DISABLED_MODULES += cor-asv-ann ocrd_keraslm
endif
endif
ifeq ($(PYTHON_VERSION),3.11)
# Detectron2 relies on Pytorch 1 which still uses pkg_resources
DEFAULT_DISABLED_MODULES += ocrd_detectron2
endif
ifeq ($(PYTHON_VERSION),3.12)
# The required tensorflow is not available for Python 3.12.
DEFAULT_DISABLED_MODULES += eynollah ocrd_anybaseocr ocrd_calamari sbb_binarization
# The required coremltools does not support Python 3.12.
DEFAULT_DISABLED_MODULES += ocrd_kraken
# Detectron2 relies on Pytorch 1 which still uses pkg_resources
DEFAULT_DISABLED_MODULES += ocrd_detectron2
endif
ifeq ($(shell uname -s),Darwin)
# Disable ocrd_olena for macOS because build is broken.
Expand Down Expand Up @@ -276,8 +282,16 @@ OCRD_KRAKEN := $(BIN)/ocrd-kraken-binarize
OCRD_KRAKEN += $(BIN)/ocrd-kraken-segment
OCRD_KRAKEN += $(BIN)/ocrd-kraken-recognize
$(call multirule,$(OCRD_KRAKEN)): ocrd_kraken $(BIN)/ocrd
# now needs to be in sub-venv because shapely<2 clashes with shapely>=2 in other modules
ifeq (0,$(MAKELEVEL))
$(MAKE) -o $< $(notdir $(OCRD_KRAKEN)) VIRTUAL_ENV=$(SUB_VENV_TF1)
$(call delegate_venv,$(OCRD_KRAKEN),$(SUB_VENV_TF1))
ocrd_kraken-check:
$(MAKE) check OCRD_MODULES=ocrd_kraken VIRTUAL_ENV=$(SUB_VENV_TF1)
else
$(pip_install)
endif
endif

ifneq ($(filter ocrd_detectron2, $(OCRD_MODULES)),)
# ocrd_detectron patches detectron2 until there is a new detectron2 release.
Expand Down Expand Up @@ -441,16 +455,8 @@ OCRD_SEGMENT += $(BIN)/ocrd-segment-replace-text
OCRD_SEGMENT += $(BIN)/ocrd-segment-repair
OCRD_SEGMENT += $(BIN)/ocrd-segment-project
$(call multirule,$(OCRD_SEGMENT)): ocrd_segment $(BIN)/ocrd
ifeq (0,$(MAKELEVEL))
$(MAKE) -o $< $(notdir $(OCRD_SEGMENT)) VIRTUAL_ENV=$(SUB_VENV_TF1)
$(call delegate_venv,$(OCRD_SEGMENT),$(SUB_VENV_TF1))
ocrd_segment-check:
$(MAKE) check OCRD_MODULES=ocrd_segment VIRTUAL_ENV=$(SUB_VENV_TF1)
else
$(pip_install_tf1nvidia)
$(pip_install)
endif
endif

ifneq ($(filter ocrd_tesserocr, $(OCRD_MODULES)),)
ocrd_tesserocr: GIT_RECURSIVE = --recursive
Expand Down Expand Up @@ -832,6 +838,7 @@ fix-cuda: $(ACTIVATE_VENV)

# Docker builds.
DOCKER_TAG ?= ocrd/all
DOCKER_BASE_IMAGE ?= ocrd/core:$(CORE_VERSION)

# Several predefined selections
# (note: to arrive at smallest possible image size individually,
Expand All @@ -845,16 +852,25 @@ dockers: docker-minimum docker-minimum-cuda docker-medium docker-medium-cuda doc
docker-%: PIP_OPTIONS = -e

# Minimum-size selection: use Ocropy binarization, use Tesseract from git
docker-mini%: DOCKER_MODULES := core ocrd_cis ocrd_fileformat ocrd_im6convert ocrd_pagetopdf ocrd_repair_inconsistencies ocrd_tesserocr ocrd_wrap workflow-configuration ocrd_olahd_client
docker-mini%: DOCKER_MODULES := ocrd_cis ocrd_fileformat ocrd_im6convert ocrd_pagetopdf ocrd_repair_inconsistencies ocrd_tesserocr ocrd_wrap workflow-configuration ocrd_olahd_client
# Medium-size selection: add Olena binarization and Calamari, add evaluation
docker-medi%: DOCKER_MODULES := core cor-asv-ann dinglehopper docstruct format-converters nmalign ocrd_calamari ocrd_cis ocrd_fileformat ocrd_im6convert ocrd_keraslm ocrd_olahd_client ocrd_olena ocrd_pagetopdf ocrd_repair_inconsistencies ocrd_segment ocrd_tesserocr ocrd_wrap workflow-configuration
docker-medi%: DOCKER_MODULES := cor-asv-ann dinglehopper docstruct format-converters nmalign ocrd_calamari ocrd_cis ocrd_fileformat ocrd_im6convert ocrd_keraslm ocrd_olahd_client ocrd_olena ocrd_pagetopdf ocrd_repair_inconsistencies ocrd_segment ocrd_tesserocr ocrd_wrap workflow-configuration
# Maximum-size selection: use all modules
docker-maxi%: DOCKER_MODULES := $(OCRD_MODULES)

# DOCKER_BASE_IMAGE
docker-%um: DOCKER_BASE_IMAGE = docker.io/ocrd/core:$(CORE_VERSION)
docker-minimum: DOCKER_BASE_IMAGE = ocrd/core:$(CORE_VERSION)
docker-medium: DOCKER_BASE_IMAGE = $(DOCKER_TAG):minimum
docker-maximum: DOCKER_BASE_IMAGE = $(DOCKER_TAG):medium
# CUDA variants
docker-%-cuda: DOCKER_BASE_IMAGE = docker.io/ocrd/core-cuda:$(CORE_VERSION)
docker-minimum-cuda: DOCKER_BASE_IMAGE = ocrd/core-cuda:$(CORE_VERSION)
docker-medium-cuda: DOCKER_BASE_IMAGE = $(DOCKER_TAG):minimum-cuda
docker-maximum-cuda: DOCKER_BASE_IMAGE = $(DOCKER_TAG):medium-cuda
# explicit interdependencies
docker-medium: docker-minimum
docker-maximum: docker-medium
docker-medium-cuda: docker-minimum-cuda
docker-maximum-cuda: docker-medium-cuda

# Build rule for all selections
# FIXME: $(DOCKER_MODULES) ref does not work at phase 1; workaround: all modules
Expand Down
2 changes: 1 addition & 1 deletion core
Submodule core updated 97 files
+0 −13 .circleci/config.yml
+4 −4 .github/workflows/docker-image.yml
+10 −7 .github/workflows/network-testing.yml
+3 −3 .github/workflows/unit-test.yml
+4 −0 .scrutinizer.yml
+42 −8 CHANGELOG.md
+6 −4 Dockerfile
+29 −6 Makefile
+3 −3 README.md
+1 −1 VERSION
+1 −1 pyproject.toml
+1 −1 requirements.txt
+1 −2 requirements_test.txt
+1 −3 src/ocrd/cli/__init__.py
+4 −0 src/ocrd/cli/bashlib.py
+0 −1 src/ocrd/cli/network.py
+1 −1 src/ocrd/cli/resmgr.py
+8 −6 src/ocrd/decorators/__init__.py
+2 −2 src/ocrd/decorators/ocrd_cli_options.py
+7 −4 src/ocrd/mets_server.py
+1 −1 src/ocrd/processor/base.py
+1 −2 src/ocrd/task_sequence.py
+3 −4 src/ocrd/workspace_bagger.py
+1 −1 src/ocrd_models/ocrd_exif.py
+2 −2 src/ocrd_models/ocrd_mets.py
+1 −1 src/ocrd_models/ocrd_page_generateds.py
+3 −6 src/ocrd_network/__init__.py
+2 −6 src/ocrd_network/cli/client.py
+1 −4 src/ocrd_network/cli/processing_server.py
+1 −6 src/ocrd_network/cli/processing_worker.py
+3 −7 src/ocrd_network/cli/processor_server.py
+17 −18 src/ocrd_network/client.py
+60 −2 src/ocrd_network/constants.py
+34 −16 src/ocrd_network/database.py
+0 −568 src/ocrd_network/deployer.py
+0 −48 src/ocrd_network/logging.py
+52 −0 src/ocrd_network/logging_utils.py
+2 −10 src/ocrd_network/models/__init__.py
+13 −32 src/ocrd_network/models/job.py
+7 −7 src/ocrd_network/models/messages.py
+13 −12 src/ocrd_network/param_validators.py
+20 −23 src/ocrd_network/process_helpers.py
+482 −662 src/ocrd_network/processing_server.py
+104 −145 src/ocrd_network/processing_worker.py
+67 −78 src/ocrd_network/processor_server.py
+20 −9 src/ocrd_network/rabbitmq_utils/__init__.py
+75 −71 src/ocrd_network/rabbitmq_utils/connector.py
+24 −19 src/ocrd_network/rabbitmq_utils/constants.py
+11 −42 src/ocrd_network/rabbitmq_utils/consumer.py
+106 −0 src/ocrd_network/rabbitmq_utils/helpers.py
+39 −51 src/ocrd_network/rabbitmq_utils/ocrd_messages.py
+12 −64 src/ocrd_network/rabbitmq_utils/publisher.py
+0 −142 src/ocrd_network/runtime_data.py
+14 −0 src/ocrd_network/runtime_data/__init__.py
+53 −0 src/ocrd_network/runtime_data/config_parser.py
+28 −73 src/ocrd_network/runtime_data/connection_clients.py
+174 −0 src/ocrd_network/runtime_data/deployer.py
+225 −0 src/ocrd_network/runtime_data/hosts.py
+110 −0 src/ocrd_network/runtime_data/network_agents.py
+160 −0 src/ocrd_network/runtime_data/network_services.py
+80 −89 src/ocrd_network/server_cache.py
+209 −62 src/ocrd_network/server_utils.py
+64 −95 src/ocrd_network/utils.py
+4 −4 src/ocrd_utils/config.py
+1 −1 src/ocrd_utils/introspect.py
+1 −1 src/ocrd_utils/logging.py
+5 −5 src/ocrd_utils/os.py
+6 −0 src/ocrd_validators/message_processing.schema.yml
+7 −8 src/ocrd_validators/message_result.schema.yml
+1 −4 src/ocrd_validators/ocrd_network_message_validator.py
+34 −45 tests/cli/test_bashlib.py
+1 −0 tests/conftest.py
+8 −1 tests/data/ocrd-cp
+18 −0 tests/data/ocrd-cp.ocrd-tool.json
+1 −1 tests/data/wf_testcase.py
+1 −2 tests/model/test_ocrd_mets.py
+3 −3 tests/model/test_ocrd_page.py
+5 −5 tests/network/config.py
+1 −2 tests/network/fixtures_mongodb.py
+17 −0 tests/network/fixtures_processing_requests.py
+12 −24 tests/network/fixtures_rabbitmq.py
+50 −46 tests/network/test_integration_1_db.py
+22 −38 tests/network/test_integration_2_rabbitmq.py
+167 −0 tests/network/test_integration_3_server_cache_requests.py
+118 −0 tests/network/test_integration_4_processing_worker.py
+70 −0 tests/network/test_integration_5_processing_server.py
+19 −0 tests/network/test_integration_ocrd_all.py
+34 −0 tests/network/test_modules_logging_utils.py
+68 −0 tests/network/test_modules_param_validators.py
+75 −0 tests/network/test_modules_process_helpers.py
+135 −0 tests/network/test_modules_server_cache_pages.py
+0 −97 tests/network/test_processing_server.py
+47 −0 tests/network/utils.py
+1 −1 tests/test_logging_conf.py
+3 −3 tests/test_resolver.py
+1 −1 tests/test_workspace.py
+1 −1 tests/utils/test_deprecate.py
2 changes: 1 addition & 1 deletion ocrd_detectron2
Submodule ocrd_detectron2 updated 1 files
+1 −1 Makefile
2 changes: 1 addition & 1 deletion ocrd_fileformat
2 changes: 1 addition & 1 deletion sbb_binarization
2 changes: 1 addition & 1 deletion workflow-configuration