Skip to content

Commit

Permalink
fix: convert to pixels (#74)
Browse files Browse the repository at this point in the history
Fixed failure to convert points to pixels when loading embedded elements in PDF. Alse added paddleocr dependency for x86_64 machines, and corrected an incompatibility between `LayoutElement` and its `unstructured` equivalent.
  • Loading branch information
qued authored Mar 29, 2023
1 parent 43887e6 commit db173d0
Show file tree
Hide file tree
Showing 8 changed files with 59 additions and 27 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
## 0.3.0

* Fix for text block detection
* Add paddleocr dependency to setup for x86_64 machines

## 0.2.14

* Suppressed processing progress bars
Expand Down
18 changes: 9 additions & 9 deletions requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,22 +22,22 @@ coloredlogs==15.0.1
# via onnxruntime
contourpy==1.0.7
# via matplotlib
cryptography==39.0.2
cryptography==40.0.1
# via pdfminer-six
cycler==0.11.0
# via matplotlib
effdet==0.3.0
# via layoutparser
fastapi==0.95.0
# via unstructured-inference (setup.py)
filelock==3.10.0
filelock==3.10.7
# via
# huggingface-hub
# torch
# transformers
flatbuffers==23.3.3
# via onnxruntime
fonttools==4.39.2
fonttools==4.39.3
# via matplotlib
h11==0.14.0
# via uvicorn
Expand Down Expand Up @@ -86,7 +86,7 @@ omegaconf==2.3.0
# via effdet
onnxruntime==1.14.1
# via unstructured-inference (setup.py)
opencv-python==4.7.0.72
opencv-python==4.6.0.66
# via
# layoutparser
# unstructured-inference (setup.py)
Expand Down Expand Up @@ -121,7 +121,7 @@ pycocotools==2.0.6
# via effdet
pycparser==2.21
# via cffi
pydantic==1.10.6
pydantic==1.10.7
# via fastapi
pyparsing==3.0.9
# via matplotlib
Expand All @@ -133,7 +133,7 @@ python-dateutil==2.8.2
# pandas
python-multipart==0.0.6
# via unstructured-inference (setup.py)
pytz==2022.7.1
pytz==2023.3
# via pandas
pyyaml==6.0
# via
Expand All @@ -142,7 +142,7 @@ pyyaml==6.0
# omegaconf
# timm
# transformers
regex==2022.10.31
regex==2023.3.23
# via transformers
requests==2.28.2
# via
Expand All @@ -161,7 +161,7 @@ sympy==1.11.1
# via
# onnxruntime
# torch
timm==0.6.12
timm==0.6.13
# via effdet
tokenizers==0.13.2
# via transformers
Expand All @@ -181,7 +181,7 @@ tqdm==4.65.0
# huggingface-hub
# iopath
# transformers
transformers==4.27.2
transformers==4.27.4
# via unstructured-inference (setup.py)
typing-extensions==4.5.0
# via
Expand Down
25 changes: 17 additions & 8 deletions requirements/dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
#
anyio==3.6.2
# via jupyter-server
appnope==0.1.3
# via
# ipykernel
# ipython
argon2-cffi==21.3.0
# via
# jupyter-server
Expand All @@ -31,7 +35,7 @@ cffi==1.15.1
# via argon2-cffi-bindings
click==8.1.3
# via pip-tools
comm==0.1.2
comm==0.1.3
# via ipykernel
debugpy==1.6.6
# via ipykernel
Expand All @@ -57,14 +61,15 @@ importlib-resources==5.12.0
# via jsonschema
ipykernel==6.22.0
# via
# ipywidgets
# jupyter
# jupyter-console
# nbclassic
# notebook
# qtconsole
ipython==8.11.0
# via
# -r dev.in
# -r requirements/dev.in
# ipykernel
# ipywidgets
# jupyter-console
Expand All @@ -73,7 +78,7 @@ ipython-genutils==0.2.0
# nbclassic
# notebook
# qtconsole
ipywidgets==8.0.5
ipywidgets==8.0.6
# via jupyter
isoduration==20.11.0
# via jsonschema
Expand Down Expand Up @@ -126,7 +131,7 @@ jupyter-server-terminals==0.4.4
# via jupyter-server
jupyterlab-pygments==0.2.2
# via nbconvert
jupyterlab-widgets==3.0.6
jupyterlab-widgets==3.0.7
# via ipywidgets
markupsafe==2.1.2
# via
Expand Down Expand Up @@ -184,7 +189,7 @@ pip-tools==6.12.3
# via -r requirements/dev.in
pkgutil-resolve-name==1.3.10
# via jsonschema
platformdirs==3.1.1
platformdirs==3.2.0
# via jupyter-core
prometheus-client==0.16.0
# via
Expand Down Expand Up @@ -234,7 +239,7 @@ pyzmq==25.0.2
# qtconsole
qtconsole==5.4.1
# via jupyter
qtpy==2.3.0
qtpy==2.3.1
# via qtconsole
rfc3339-validator==0.1.4
# via
Expand Down Expand Up @@ -269,6 +274,10 @@ terminado==0.17.1
# notebook
tinycss2==1.2.1
# via nbconvert
tomli==2.0.1
# via
# build
# pyproject-hooks
tornado==6.2
# via
# ipykernel
Expand Down Expand Up @@ -299,7 +308,7 @@ uri-template==1.2.0
# via jsonschema
wcwidth==0.2.6
# via prompt-toolkit
webcolors==1.12
webcolors==1.13
# via jsonschema
webencodings==0.5.1
# via
Expand All @@ -309,7 +318,7 @@ websocket-client==1.5.1
# via jupyter-server
wheel==0.40.0
# via pip-tools
widgetsnbextension==4.0.6
widgetsnbextension==4.0.7
# via ipywidgets
zipp==3.15.0
# via
Expand Down
8 changes: 4 additions & 4 deletions requirements/test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ appdirs==1.4.4
# via label-studio-tools
attrs==22.2.0
# via pytest
black==23.1.0
black==23.3.0
# via -r requirements/test.in
certifi==2022.12.7
# via
Expand All @@ -29,7 +29,7 @@ coverage[toml]==7.2.2
# pytest-cov
exceptiongroup==1.1.1
# via pytest
filelock==3.10.0
filelock==3.10.7
# via huggingface-hub
flake8==6.0.0
# via
Expand Down Expand Up @@ -82,13 +82,13 @@ pdf2image==1.16.3
# via -r requirements/test.in
pillow==9.4.0
# via pdf2image
platformdirs==3.1.1
platformdirs==3.2.0
# via black
pluggy==1.0.0
# via pytest
pycodestyle==2.10.0
# via flake8
pydantic==1.10.6
pydantic==1.10.7
# via label-studio-sdk
pydocstyle==6.3.0
# via flake8-docstrings
Expand Down
3 changes: 1 addition & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
limitations under the License.
"""
from setuptools import setup, find_packages
from platform import machine

from unstructured_inference.__version__ import __version__

Expand Down Expand Up @@ -60,6 +59,6 @@
"opencv-python==4.6.0.66",
"onnxruntime",
"transformers",
'unstructured.PaddleOCR ; platform_machine=="x86_64"',
],
extras_require={"paddle-ocr": "unstructured.PaddleOCR"},
)
2 changes: 1 addition & 1 deletion unstructured_inference/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.2.14" # pragma: no cover
__version__ = "0.3.0" # pragma: no cover
12 changes: 11 additions & 1 deletion unstructured_inference/inference/elements.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,11 @@ def is_in(self, other: Rectangle, error_margin: Optional[int] = None):
]
)

@property
def coordinates(self):
"""Gets coordinates of the rectangle"""
return ((self.x1, self.y1), (self.x1, self.y2), (self.x2, self.y2), (self.x2, self.y1))


@dataclass
class TextRegion(Rectangle):
Expand All @@ -77,7 +82,12 @@ class LayoutElement(TextRegion):

def to_dict(self) -> dict:
"""Converts the class instance to dictionary form."""
return self.__dict__
out_dict = {
"coordinates": self.coordinates,
"text": self.text,
"type": self.type,
}
return out_dict

@classmethod
def from_region(cls, region: Rectangle):
Expand Down
13 changes: 11 additions & 2 deletions unstructured_inference/inference/layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,12 +363,21 @@ def load_pdf(
)
word_objs = [
TextRegion(
x1=word["x0"], y1=word["top"], x2=word["x1"], y2=word["bottom"], text=word["text"]
x1=word["x0"] * dpi / 72,
y1=word["top"] * dpi / 72,
x2=word["x1"] * dpi / 72,
y2=word["bottom"] * dpi / 72,
text=word["text"],
)
for word in plumber_words
]
image_objs = [
ImageTextRegion(x1=image["x0"], y1=image["y0"], x2=image["x1"], y2=image["y1"])
ImageTextRegion(
x1=image["x0"] * dpi / 72,
y1=image["y0"] * dpi / 72,
x2=image["x1"] * dpi / 72,
y2=image["y1"] * dpi / 72,
)
for image in page.images
]
layout = word_objs + image_objs
Expand Down

0 comments on commit db173d0

Please sign in to comment.