From 043e488a78c45df88d8a532d6f36fb720cd769cc Mon Sep 17 00:00:00 2001 From: Hadrien Mary Date: Sun, 3 Nov 2024 16:01:49 -0500 Subject: [PATCH 01/12] wip add docling and deepsearch-glm --- recipes/deepsearch-glm/recipe.yaml | 74 ++++++++++++++++++++++++++++++ recipes/docling/recipe.yaml | 68 +++++++++++++++++++++++++++ 2 files changed, 142 insertions(+) create mode 100644 recipes/deepsearch-glm/recipe.yaml create mode 100644 recipes/docling/recipe.yaml diff --git a/recipes/deepsearch-glm/recipe.yaml b/recipes/deepsearch-glm/recipe.yaml new file mode 100644 index 0000000000000..477b0b2241d17 --- /dev/null +++ b/recipes/deepsearch-glm/recipe.yaml @@ -0,0 +1,74 @@ +context: + name: deepsearch-glm + version: 0.26.1 + +package: + name: ${{ name|lower }} + version: ${{ version }} + +source: + url: https://pypi.org/packages/source/${{ name[0] }}/${{ name }}/deepsearch_glm-${{ version }}.tar.gz + sha256: c2938e99c4f9f48a8686d3c357778645ec76a78781c89d955720ef78502da830 + +build: + number: 0 + script: + content: python -m pip install . -vv --no-deps --no-build-isolation + env: + USE_SYSTEM_DEPS: "on" + +requirements: + build: + - if: build_platform != target_platform + then: + - python + - cross-python_${{ target_platform }} + + - ${{ compiler('cxx') }} + - ${{ compiler('c') }} + - ${{ stdlib("c") }} + - cmake + - ${{ "make" if unix else "ninja" }} + host: + - python + - poetry-core + - pybind11 >=2.13.1 + - pip + - fmt + - cxxopts + - nlohmann_json + - loguru-cpp + - utfcpp + - fasttext + - json_schema_validator + - pcre2 + - sentencepiece + - pkg-config + run: + - python + - docling-core >=2.0 + - tabulate >=0.8.9 + - numpy + - pandas + - python-dotenv >=1.0.0 + - tqdm >=4.64.0 + - rich >=13.7.0 + - docutils !=0.21 + - requests + - ${{ "pywin32 >=305" if win }} + +tests: + - python: + imports: + - deepsearch_glm + pip_check: true + +about: + summary: Create fast graph language models from converted PDF documents for knowledge extraction and Q&A. + license: MIT + license_file: LICENSE + homepage: https://github.com/DS4SD/deepsearch-glm/ + +extra: + recipe-maintainers: + - hadim diff --git a/recipes/docling/recipe.yaml b/recipes/docling/recipe.yaml new file mode 100644 index 0000000000000..3724e53b91b9a --- /dev/null +++ b/recipes/docling/recipe.yaml @@ -0,0 +1,68 @@ +context: + name: docling + version: 2.3.1 + +package: + name: ${{ name|lower }} + version: ${{ version }} + +source: + url: https://pypi.org/packages/source/${{ name[0] }}/${{ name }}/docling-${{ version }}.tar.gz + sha256: f68a0f8a97e9f566b4a9140d854886577135e76ccfae2e899c318e57367ab12a + +build: + number: 0 + noarch: python + script: python -m pip install . -vv --no-deps --no-build-isolation + python: + entry_points: + - docling = docling.cli.main:app + +requirements: + host: + - python >=3.10 + - poetry-core + - pip + run: + - python >=3.10 + - pydantic >=2.0.0 + - docling-core >=2.3.0 + - docling-ibm-models >=2.0.3 + - deepsearch-glm >=0.26.1 + - filetype >=1.2.0 + - pypdfium2 >=4.30.0 + - pydantic-settings >=2.3.0 + - huggingface_hub >=0.23 + - requests >=2.32.3 + - easyocr >=1.7.0 + - docling-parse >=2.0.2 + - certifi >=2024.7.4 + - rtree >=1.3.0 + - scipy >=1.14.1 + - pyarrow >=16.1.0 + - typer >=0.12.5 + - python-docx >=1.1.2 + - python-pptx >=1.0.2 + - beautifulsoup4 >=4.12.3 + - pandas >=2.1.4 + - marko >=2.1.2 + run_constraints: + - tesserocr >=2.7.1 + +tests: + - python: + imports: + - docling + pip_check: true + - script: + - docling --help + +about: + summary: Docling PDF conversion package + license: MIT + license_file: LICENSE + homepage: https://github.com/DS4SD/docling + +extra: + recipe-maintainers: + - hadim From 63359b4384b6e3d4116a5d24aca88ea8dfbb9c67 Mon Sep 17 00:00:00 2001 From: Hadrien Mary Date: Mon, 4 Nov 2024 15:04:25 -0500 Subject: [PATCH 02/12] use fasttext-for-deepsearch-glm instead of fasttext --- recipes/deepsearch-glm/recipe.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/deepsearch-glm/recipe.yaml b/recipes/deepsearch-glm/recipe.yaml index 477b0b2241d17..a71786f78422a 100644 --- a/recipes/deepsearch-glm/recipe.yaml +++ b/recipes/deepsearch-glm/recipe.yaml @@ -39,7 +39,7 @@ requirements: - nlohmann_json - loguru-cpp - utfcpp - - fasttext + - fasttext-for-deepsearch-glm - json_schema_validator - pcre2 - sentencepiece From f018e80b028508f23d24d27bb62ca76d6656e650 Mon Sep 17 00:00:00 2001 From: Hadrien Mary Date: Mon, 4 Nov 2024 19:02:56 -0500 Subject: [PATCH 03/12] noarch new syntax --- recipes/docling/recipe.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/recipes/docling/recipe.yaml b/recipes/docling/recipe.yaml index 3724e53b91b9a..bd80ecb90a3f7 100644 --- a/recipes/docling/recipe.yaml +++ b/recipes/docling/recipe.yaml @@ -20,11 +20,11 @@ build: requirements: host: - - python >=3.10 + - python {{ python_min }}.* - poetry-core - pip run: - - python >=3.10 + - python >={{ python_min }} - pydantic >=2.0.0 - docling-core >=2.3.0 - docling-ibm-models >=2.0.3 From 8facd519e5c2aa6165d4eb3c8b6effcc3a56c6f5 Mon Sep 17 00:00:00 2001 From: Hadrien Mary Date: Tue, 5 Nov 2024 07:37:30 -0500 Subject: [PATCH 04/12] fix syntax --- recipes/docling/recipe.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/recipes/docling/recipe.yaml b/recipes/docling/recipe.yaml index bd80ecb90a3f7..db45cf0b49199 100644 --- a/recipes/docling/recipe.yaml +++ b/recipes/docling/recipe.yaml @@ -20,11 +20,11 @@ build: requirements: host: - - python {{ python_min }}.* + - python ${{ python_min }}.* - poetry-core - pip run: - - python >={{ python_min }} + - python >=${{ python_min }} - pydantic >=2.0.0 - docling-core >=2.3.0 - docling-ibm-models >=2.0.3 From 0cd6d8b1ae249da677aa9326e9bd640de40105f3 Mon Sep 17 00:00:00 2001 From: Hadrien Mary Date: Tue, 5 Nov 2024 10:50:41 -0500 Subject: [PATCH 05/12] python_min fix --- recipes/docling/recipe.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/recipes/docling/recipe.yaml b/recipes/docling/recipe.yaml index db45cf0b49199..2f21b19a32d66 100644 --- a/recipes/docling/recipe.yaml +++ b/recipes/docling/recipe.yaml @@ -1,6 +1,7 @@ context: name: docling version: 2.3.1 + python_min: "3.9" # remove this line once https://github.com/prefix-dev/rattler-build/pull/1122 is available package: name: ${{ name|lower }} From abf05c2f7abf9de4bde492c9e67b587102e42f34 Mon Sep 17 00:00:00 2001 From: Hadrien Mary Date: Wed, 6 Nov 2024 21:02:02 -0500 Subject: [PATCH 06/12] fix builds --- recipes/deepsearch-glm/fix-utfcpp.patch | 16 ++++++++++++++++ recipes/deepsearch-glm/recipe.yaml | 5 ++++- recipes/docling/recipe.yaml | 2 +- 3 files changed, 21 insertions(+), 2 deletions(-) create mode 100644 recipes/deepsearch-glm/fix-utfcpp.patch diff --git a/recipes/deepsearch-glm/fix-utfcpp.patch b/recipes/deepsearch-glm/fix-utfcpp.patch new file mode 100644 index 0000000000000..43098f0c09c39 --- /dev/null +++ b/recipes/deepsearch-glm/fix-utfcpp.patch @@ -0,0 +1,16 @@ +diff --git a/cmake/extlib_utf8.git.cmake b/cmake/extlib_utf8.git.cmake +index f35e1e9..26c4d9c 100644 +--- a/cmake/extlib_utf8.git.cmake ++++ b/cmake/extlib_utf8.git.cmake +@@ -4,9 +4,9 @@ message(STATUS "entering in extlib_utf8.cmake") + set(ext_name "utf8") + + if(USE_SYSTEM_DEPS) +- find_package(utf8cpp REQUIRED) ++ # find_package(utf8cpp REQUIRED) + add_library(${ext_name} INTERFACE IMPORTED) +- add_dependencies(${ext_name} utf8cpp) ++ add_dependencies(${ext_name} utfcpp) + + else() + diff --git a/recipes/deepsearch-glm/recipe.yaml b/recipes/deepsearch-glm/recipe.yaml index a71786f78422a..f0547760389aa 100644 --- a/recipes/deepsearch-glm/recipe.yaml +++ b/recipes/deepsearch-glm/recipe.yaml @@ -9,6 +9,8 @@ package: source: url: https://pypi.org/packages/source/${{ name[0] }}/${{ name }}/deepsearch_glm-${{ version }}.tar.gz sha256: c2938e99c4f9f48a8686d3c357778645ec76a78781c89d955720ef78502da830 + patches: + - fix-utfcpp.patch build: number: 0 @@ -44,6 +46,7 @@ requirements: - pcre2 - sentencepiece - pkg-config + - zlib run: - python - docling-core >=2.0 @@ -61,7 +64,7 @@ tests: - python: imports: - deepsearch_glm - pip_check: true + pip_check: false about: summary: Create fast graph language models from converted PDF documents for knowledge extraction and Q&A. diff --git a/recipes/docling/recipe.yaml b/recipes/docling/recipe.yaml index 2f21b19a32d66..1677b02774a92 100644 --- a/recipes/docling/recipe.yaml +++ b/recipes/docling/recipe.yaml @@ -54,7 +54,7 @@ tests: - python: imports: - docling - pip_check: true + pip_check: false - script: - docling --help From f94d0a619378715103b6b7e3eb862f00c93c16e7 Mon Sep 17 00:00:00 2001 From: Hadrien Mary Date: Wed, 6 Nov 2024 21:04:30 -0500 Subject: [PATCH 07/12] py310 for docling as min version --- recipes/docling/recipe.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/docling/recipe.yaml b/recipes/docling/recipe.yaml index 1677b02774a92..98bc7f5c0fcfc 100644 --- a/recipes/docling/recipe.yaml +++ b/recipes/docling/recipe.yaml @@ -1,7 +1,7 @@ context: name: docling version: 2.3.1 - python_min: "3.9" # remove this line once https://github.com/prefix-dev/rattler-build/pull/1122 is available + python_min: "3.10" # remove this line once https://github.com/prefix-dev/rattler-build/pull/1122 is available package: name: ${{ name|lower }} From a08d2ad3a4f7a68fb4cd8c89b7b86afcaf4e1821 Mon Sep 17 00:00:00 2001 From: Hadrien Mary Date: Mon, 11 Nov 2024 10:50:21 -0500 Subject: [PATCH 08/12] python_min --- recipes/docling/recipe.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/docling/recipe.yaml b/recipes/docling/recipe.yaml index 98bc7f5c0fcfc..4185d2947b30f 100644 --- a/recipes/docling/recipe.yaml +++ b/recipes/docling/recipe.yaml @@ -21,7 +21,7 @@ build: requirements: host: - - python ${{ python_min }}.* + - python ${{ python_min }} - poetry-core - pip run: From 1d2c503945c945345c7080eaf8be9a5f2d5aaa4d Mon Sep 17 00:00:00 2001 From: Hadrien Mary Date: Mon, 11 Nov 2024 10:59:25 -0500 Subject: [PATCH 09/12] disable windows for deepsearch-glm --- recipes/deepsearch-glm/recipe.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/recipes/deepsearch-glm/recipe.yaml b/recipes/deepsearch-glm/recipe.yaml index f0547760389aa..186f7cf09734e 100644 --- a/recipes/deepsearch-glm/recipe.yaml +++ b/recipes/deepsearch-glm/recipe.yaml @@ -14,6 +14,7 @@ source: build: number: 0 + skip: win script: content: python -m pip install . -vv --no-deps --no-build-isolation env: From c1e19a389d4ecc9cc7d9db11f83add144cdbb527 Mon Sep 17 00:00:00 2001 From: Hadrien Mary Date: Mon, 11 Nov 2024 11:03:37 -0500 Subject: [PATCH 10/12] python_min --- recipes/docling/recipe.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/docling/recipe.yaml b/recipes/docling/recipe.yaml index 4185d2947b30f..0f9167bba29e2 100644 --- a/recipes/docling/recipe.yaml +++ b/recipes/docling/recipe.yaml @@ -1,7 +1,7 @@ context: name: docling version: 2.3.1 - python_min: "3.10" # remove this line once https://github.com/prefix-dev/rattler-build/pull/1122 is available + python_min: "3.9" # remove this line once https://github.com/prefix-dev/rattler-build/pull/1122 is available package: name: ${{ name|lower }} From e6232e4d763a66a3f8fe7be0a4e64eaded0feb0a Mon Sep 17 00:00:00 2001 From: Hadrien Mary Date: Mon, 11 Nov 2024 11:14:19 -0500 Subject: [PATCH 11/12] python_min=3.10 for docling --- recipes/docling/recipe.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/docling/recipe.yaml b/recipes/docling/recipe.yaml index 0f9167bba29e2..4185d2947b30f 100644 --- a/recipes/docling/recipe.yaml +++ b/recipes/docling/recipe.yaml @@ -1,7 +1,7 @@ context: name: docling version: 2.3.1 - python_min: "3.9" # remove this line once https://github.com/prefix-dev/rattler-build/pull/1122 is available + python_min: "3.10" # remove this line once https://github.com/prefix-dev/rattler-build/pull/1122 is available package: name: ${{ name|lower }} From bbf4944b8eacdc81b1386fde915aad0a8cfca838 Mon Sep 17 00:00:00 2001 From: Hadrien Mary Date: Tue, 12 Nov 2024 14:35:23 -0500 Subject: [PATCH 12/12] Update recipe.yaml --- recipes/docling/recipe.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/docling/recipe.yaml b/recipes/docling/recipe.yaml index 4185d2947b30f..17d84cb6a5d68 100644 --- a/recipes/docling/recipe.yaml +++ b/recipes/docling/recipe.yaml @@ -1,7 +1,7 @@ context: name: docling version: 2.3.1 - python_min: "3.10" # remove this line once https://github.com/prefix-dev/rattler-build/pull/1122 is available + python_min: "3.10" package: name: ${{ name|lower }}