Merge pull request #408 from DigitalSlideArchive/limit-ocr-concurrency

The default thread pool concurrency is too much for ocr
DigitalSlideArchive · May 23, 2024 · 73a1b13 · 73a1b13
2 parents 7404f19 + b7ce51b
commit 73a1b13
Show file tree

Hide file tree

Showing 3 changed files with 14 additions and 5 deletions.
diff --git a/.trivyignore b/.trivyignore
@@ -1,5 +1,12 @@
 # Accept these; revisit as needed
-CVE-2024-26597
-CVE-2024-26865
+CVE-2023-52433
+CVE-2024-26642
+CVE-2024-26643
+CVE-2024-26800
 CVE-2024-26828
-CVE-2024-26585
+CVE-2024-26865
+CVE-2024-26921
+CVE-2024-26923
+CVE-2024-26924
+CVE-2024-26925
+CVE-2024-27397
diff --git a/docs/INSTALL.rst b/docs/INSTALL.rst
@@ -36,7 +36,7 @@ Hardware Requirements
 
 The recommended hardware is 32 GBytes of memory on a system with at least 4 cores.  If you are using OCR, it is highly recommended to have an NVidia Cuda-capable GPU.
 
-The minimum hardware is 4 GBytes of memory on a system with at least 2 cores (12 GBytes if using iSyntax files).
+The minimum hardware is 4 GBytes of memory on a system with at least 2 cores, 8 GBytes is using OCR, and 12 GBytes if using iSyntax files.
 
 The speed of your storage greatly affects the speed of the system.  SSD drives will for import, local storage, and export are recommended.
 

diff --git a/wsi_deid/jobs.py b/wsi_deid/jobs.py
@@ -1,4 +1,5 @@
 import concurrent.futures
+import os
 
 from girder import logger
 from girder.models.item import Item
@@ -227,7 +228,8 @@ def associate_unfiled_images(job):  # noqa
         # for itemId in itemIds:
         #     label_text = get_label_text_for_item(itemId, job)
         label_text_list = []
-        with concurrent.futures.ThreadPoolExecutor() as executor:
+        with concurrent.futures.ThreadPoolExecutor(
+                max_workers=max(1, os.cpu_count() // 2)) as executor:
             futures = []
             for itemId in itemIds:
                 futures.append(executor.submit(get_label_text_for_item, itemId, job))