diff --git a/CHANGELOG.md b/CHANGELOG.md index 3d82eb80..6a6902b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ -## 0.0.45-dev0 +## 0.0.45 * Drop `detection_class_prob` from the element metadata. This broke backwards compatibility when library users called `partition_via_api`. +* Bump unstructured to 0.10.15 ## 0.0.44 diff --git a/requirements/base.txt b/requirements/base.txt index 203a5079..7acce535 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -31,7 +31,7 @@ click==8.1.3 # uvicorn coloredlogs==15.0.1 # via onnxruntime -contourpy==1.1.0 +contourpy==1.1.1 # via matplotlib cryptography==41.0.3 # via pdfminer-six @@ -51,7 +51,7 @@ exceptiongroup==1.1.3 # via anyio fastapi==0.103.1 # via -r requirements/base.in -filelock==3.12.3 +filelock==3.12.4 # via # huggingface-hub # torch @@ -62,11 +62,11 @@ flatbuffers==23.5.26 # via onnxruntime fonttools==4.42.1 # via matplotlib -fsspec==2023.9.0 +fsspec==2023.9.1 # via huggingface-hub h11==0.14.0 # via uvicorn -huggingface-hub==0.17.1 +huggingface-hub==0.17.2 # via # timm # transformers @@ -99,7 +99,7 @@ markupsafe==2.1.3 # via jinja2 marshmallow==3.20.1 # via dataclasses-json -matplotlib==3.7.3 +matplotlib==3.8.0 # via pycocotools mpmath==1.3.0 # via sympy @@ -111,7 +111,7 @@ networkx==3.1 # via torch nltk==3.8.1 # via unstructured -numpy==1.25.2 +numpy==1.26.0 # via # contourpy # layoutparser @@ -146,6 +146,7 @@ packaging==23.1 # onnxruntime # pytesseract # transformers + # unstructured-pytesseract pandas==2.1.0 # via # layoutparser @@ -160,7 +161,7 @@ pdfminer-six==20221105 # unstructured pdfplumber==0.10.2 # via layoutparser -pillow==10.0.0 +pillow==10.0.1 # via # layoutparser # matplotlib @@ -169,7 +170,8 @@ pillow==10.0.0 # pytesseract # python-pptx # torchvision -portalocker==2.7.0 + # unstructured-pytesseract +portalocker==2.8.2 # via iopath protobuf==4.24.3 # via @@ -181,7 +183,7 @@ pycocotools==2.0.7 # via effdet pycparser==2.21 # via cffi -pycryptodome==3.18.0 +pycryptodome==3.19.0 # via -r requirements/base.in pydantic==1.10.12 # via @@ -191,7 +193,7 @@ pypandoc==1.11 # via unstructured pyparsing==3.1.1 # via matplotlib -pypdf==3.16.0 +pypdf==3.16.1 # via -r requirements/base.in pypdfium2==4.20.0 # via pdfplumber @@ -275,12 +277,11 @@ tqdm==4.66.1 # iopath # nltk # transformers -transformers==4.33.1 +transformers==4.33.2 # via unstructured-inference -typing-extensions==4.7.1 +typing-extensions==4.8.0 # via # fastapi - # filelock # huggingface-hub # iopath # onnx @@ -293,9 +294,11 @@ typing-inspect==0.9.0 # via dataclasses-json tzdata==2023.3 # via pandas -unstructured[local-inference]==0.10.14 +unstructured[local-inference]==0.10.15 # via -r requirements/base.in -unstructured-inference==0.5.25 +unstructured-inference==0.5.28 + # via unstructured +unstructured-pytesseract==0.3.12 # via unstructured urllib3==2.0.4 # via requests @@ -303,5 +306,5 @@ uvicorn==0.23.2 # via -r requirements/base.in xlrd==2.0.1 # via unstructured -xlsxwriter==3.1.3 +xlsxwriter==3.1.4 # via python-pptx diff --git a/requirements/test.txt b/requirements/test.txt index a3b0b937..2dfdd243 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -87,7 +87,7 @@ comm==0.1.4 # via # ipykernel # ipywidgets -contourpy==1.1.0 +contourpy==1.1.1 # via # -r requirements/base.txt # matplotlib @@ -105,7 +105,7 @@ dataclasses-json==0.6.0 # via # -r requirements/base.txt # unstructured -debugpy==1.7.0 +debugpy==1.8.0 # via ipykernel decorator==5.1.1 # via ipython @@ -146,7 +146,7 @@ fastcore==1.5.29 # nbdev fastjsonschema==2.18.0 # via nbformat -filelock==3.12.3 +filelock==3.12.4 # via # -r requirements/base.txt # huggingface-hub @@ -168,7 +168,7 @@ fonttools==4.42.1 # matplotlib fqdn==1.5.1 # via jsonschema -fsspec==2023.9.0 +fsspec==2023.9.1 # via # -r requirements/base.txt # huggingface-hub @@ -183,7 +183,7 @@ httpcore==0.18.0 # via httpx httpx==0.25.0 # via -r requirements/test.in -huggingface-hub==0.17.1 +huggingface-hub==0.17.2 # via # -r requirements/base.txt # timm @@ -220,7 +220,7 @@ ipython==8.15.0 # jupyter-console ipython-genutils==0.2.0 # via qtconsole -ipywidgets==8.1.0 +ipywidgets==8.1.1 # via jupyter isoduration==20.11.0 # via jsonschema @@ -284,7 +284,7 @@ jupyter-server==2.7.3 # notebook-shim jupyter-server-terminals==0.4.4 # via jupyter-server -jupyterlab==4.0.5 +jupyterlab==4.0.6 # via notebook jupyterlab-pygments==0.2.2 # via nbconvert @@ -292,7 +292,7 @@ jupyterlab-server==2.25.0 # via # jupyterlab # notebook -jupyterlab-widgets==3.0.8 +jupyterlab-widgets==3.0.9 # via ipywidgets kiwisolver==1.4.5 # via @@ -322,7 +322,7 @@ marshmallow==3.20.1 # via # -r requirements/base.txt # dataclasses-json -matplotlib==3.7.3 +matplotlib==3.8.0 # via # -r requirements/base.txt # pycocotools @@ -363,7 +363,7 @@ nbformat==5.9.2 # jupyter-server # nbclient # nbconvert -nest-asyncio==1.5.7 +nest-asyncio==1.5.8 # via ipykernel networkx==3.1 # via @@ -379,7 +379,7 @@ notebook-shim==0.2.3 # via # jupyterlab # notebook -numpy==1.25.2 +numpy==1.26.0 # via # -r requirements/base.txt # contourpy @@ -440,6 +440,7 @@ packaging==23.1 # qtconsole # qtpy # transformers + # unstructured-pytesseract pandas==2.1.0 # via # -r requirements/base.txt @@ -469,7 +470,7 @@ pexpect==4.8.0 # via ipython pickleshare==0.7.5 # via ipython -pillow==10.0.0 +pillow==10.0.1 # via # -r requirements/base.txt # layoutparser @@ -479,13 +480,14 @@ pillow==10.0.0 # pytesseract # python-pptx # torchvision + # unstructured-pytesseract platformdirs==3.10.0 # via # black # jupyter-core pluggy==1.3.0 # via pytest -portalocker==2.7.0 +portalocker==2.8.2 # via # -r requirements/base.txt # iopath @@ -520,7 +522,7 @@ pycparser==2.21 # via # -r requirements/base.txt # cffi -pycryptodome==3.18.0 +pycryptodome==3.19.0 # via -r requirements/base.txt pydantic==1.10.12 # via @@ -542,7 +544,7 @@ pyparsing==3.1.1 # via # -r requirements/base.txt # matplotlib -pypdf==3.16.0 +pypdf==3.16.1 # via -r requirements/base.txt pypdfium2==4.20.0 # via @@ -638,7 +640,7 @@ rfc3986-validator==0.1.1 # via # jsonschema # jupyter-events -rpds-py==0.10.2 +rpds-py==0.10.3 # via # jsonschema # referencing @@ -737,7 +739,7 @@ tqdm==4.66.1 # iopath # nltk # transformers -traitlets==5.9.0 +traitlets==5.10.0 # via # comm # ipykernel @@ -754,17 +756,16 @@ traitlets==5.9.0 # nbconvert # nbformat # qtconsole -transformers==4.33.1 +transformers==4.33.2 # via # -r requirements/base.txt # unstructured-inference -typing-extensions==4.7.1 +typing-extensions==4.8.0 # via # -r requirements/base.txt # async-lru # black # fastapi - # filelock # huggingface-hub # iopath # mypy @@ -781,9 +782,13 @@ tzdata==2023.3 # via # -r requirements/base.txt # pandas -unstructured[local-inference]==0.10.14 +unstructured[local-inference]==0.10.15 # via -r requirements/base.txt -unstructured-inference==0.5.25 +unstructured-inference==0.5.28 + # via + # -r requirements/base.txt + # unstructured +unstructured-pytesseract==0.3.12 # via # -r requirements/base.txt # unstructured @@ -809,13 +814,13 @@ websocket-client==1.6.3 # via jupyter-server wheel==0.41.2 # via astunparse -widgetsnbextension==4.0.8 +widgetsnbextension==4.0.9 # via ipywidgets xlrd==2.0.1 # via # -r requirements/base.txt # unstructured -xlsxwriter==3.1.3 +xlsxwriter==3.1.4 # via # -r requirements/base.txt # python-pptx diff --git a/scripts/parallel-mode-test.sh b/scripts/parallel-mode-test.sh index 5b212bcf..d2e34ea0 100755 --- a/scripts/parallel-mode-test.sh +++ b/scripts/parallel-mode-test.sh @@ -27,7 +27,9 @@ do echo Testing: "$curl_command" # Run in single mode - $curl_command 2> /dev/null | jq -S > output.json + # Note(austin): Parallel mode screws up hierarchy! While we deal with that, + # let's ignore parent_id fields in the results + $curl_command 2> /dev/null | jq -S 'del(..|.parent_id?)' > output.json # Stop if curl didn't work if [ ! -s output.json ]; then @@ -38,7 +40,7 @@ do # Run in parallel mode curl_command="curl $base_url_2/general/v0/general $params" - $curl_command 2> /dev/null | jq -S > parallel_output.json + $curl_command 2> /dev/null | jq -S 'del(..|.parent_id?)' > parallel_output.json # Stop if curl didn't work if [ ! -s parallel_output.json ]; then