Ruff fixes (#40)

* Reformatted codebase with new ruff preview * Ruff fixes for utils * ruff: more fixes * ruff: remaining utils issues * Fix Path usages in utils * 2 more ruff ignores
useblocks · Feb 22, 2024 · 518ead3 · 518ead3
1 parent 29fe5a3
commit 518ead3
Show file tree

Hide file tree

Showing 32 changed files with 271 additions and 185 deletions.
diff --git a/libpdf/__init__.py b/libpdf/__init__.py
@@ -22,7 +22,7 @@
 from libpdf.core import main_cli
 
 # define importable objects
-__all__ = ["load", "__version__", "__summary__"]
+__all__ = ["__summary__", "__version__", "load"]
 
 # Enable running
 #   python -m libpdf.__init__

diff --git a/libpdf/_import_forks.py b/libpdf/_import_forks.py
@@ -9,6 +9,7 @@
 
 These 2 methods take time, so below solution is a short-term workaround.
 """
+
 import os
 import sys
 

diff --git a/libpdf/catalog.py b/libpdf/catalog.py
@@ -1,4 +1,5 @@
 """PDF catalog extraction."""
+
 import logging
 import re
 from typing import Any, Dict, List, Union
@@ -211,14 +212,12 @@ def chapter_number_giver(
 
         if chapter_number:
             #  The assumption is that only one match is found
-            chapters_in_outline[idx_chapter].update({"number": chapter_number[0]})
-            chapters_in_outline[idx_chapter].update(
-                {"title": chapter_title.replace(chapter_number[0], "", 1).strip()}
-            )
+            chapter.update({"number": chapter_number[0]})
+            chapter.update({
+                "title": chapter_title.replace(chapter_number[0], "", 1).strip()
+            })
         else:
-            chapters_in_outline[idx_chapter].update(
-                {"number": f"virt.{new_hierarchical_level}"}
-            )
+            chapter.update({"number": f"virt.{new_hierarchical_level}"})
 
         if chapter["content"]:
             # next deeper level
@@ -653,9 +652,9 @@ def _resolve_pdf_obj_refs(
                         )
                         resolved_dict[key] = ret_list
                     else:
-                        resolved_dict[
-                            key
-                        ] = resolved  # add resolved element to dictionary
+                        resolved_dict[key] = (
+                            resolved  # add resolved element to dictionary
+                        )
             else:
                 # leave other types as they are
                 resolved_dict[key] = value

diff --git a/libpdf/core.py b/libpdf/core.py
@@ -10,7 +10,7 @@
 # not importing load(), so no circular import when importing from root __init__.py
 from libpdf import __summary__, __version__, parameters  # pylint: disable=cyclic-import
 from libpdf.apiobjects import ApiObjects
-from libpdf.extract import LibpdfException, extract
+from libpdf.extract import LibpdfError, extract
 from libpdf.log import config_logger, get_level_name, set_log_level
 from libpdf.parameters import RENDER_ELEMENTS
 from libpdf.process import output_dump
@@ -139,7 +139,7 @@ def main(  # pylint: disable=too-many-arguments,too-many-locals  # no reasonable
                 no_rects,
                 overall_pbar,
             )
-        except LibpdfException:
+        except LibpdfError:
             if cli_usage:
                 LOG.critical("Exiting with code 1")
                 sys.exit(1)

diff --git a/libpdf/exceptions.py b/libpdf/exceptions.py
@@ -1,5 +1,12 @@
 """Libpdf exceptions."""
 
 
-class LibpdfException(Exception):
+class LibpdfError(Exception):
     """Generic libpdf exception class."""
+
+
+class TextContainsNewlineError(ValueError):
+    """Text cannot contain newline character."""
+
+    def __init__(self, text: str):
+        super().__init__(f'Input text "{text}" contains a new line character.')
diff --git a/libpdf/extract.py b/libpdf/extract.py
@@ -1,4 +1,5 @@
 """Core routines for PDF extraction."""
+
 import itertools
 import logging
 import os
@@ -14,7 +15,7 @@
 from libpdf import process as pro
 from libpdf.apiobjects import ApiObjects
 from libpdf.catalog import catalog, extract_catalog
-from libpdf.exceptions import LibpdfException
+from libpdf.exceptions import LibpdfError
 from libpdf.log import logging_needed
 from libpdf.models.figure import Figure
 from libpdf.models.file import File
@@ -86,7 +87,7 @@ def extract(  # pylint: disable=too-many-locals, too-many-branches, too-many-sta
     :param no_rects: flag triggering the exclusion of rects
     :param overall_pbar: total progress bar for whole libpdf run
     :return: instance of Objects class
-    :raise LibpdfException: PDF contains no pages
+    :raise LibpdfError: PDF contains no pages
     """
     LOG.info("PDF extraction started ...")
 
@@ -116,7 +117,7 @@ def extract(  # pylint: disable=too-many-locals, too-many-branches, too-many-sta
             if len(pdf.pages) == 0:
                 message = "Page range selection: no pages left in the PDF to analyze."
                 LOG.critical(message)
-                raise LibpdfException(message)
+                raise LibpdfError(message)
 
         overall_pbar.update(5)
         pdf = delete_page_ann(pdf)
@@ -131,7 +132,7 @@ def extract(  # pylint: disable=too-many-locals, too-many-branches, too-many-sta
         pages_list = extract_page_metadata(pdf)
 
         if not pages_list:
-            raise LibpdfException("PDF contains no pages")
+            raise LibpdfError("PDF contains no pages")
 
         overall_pbar.update(1)
 
@@ -530,15 +531,15 @@ def _get_datetime_format(date: str):
     if "CreationDate" in pdf.metadata:
         preprocessed_date = _time_preprocess(pdf.metadata["CreationDate"])
         time_format = _get_datetime_format(preprocessed_date)
-        file_meta_params.update(
-            {"creation_date": datetime.strptime(preprocessed_date, time_format)}
-        )
+        file_meta_params.update({
+            "creation_date": datetime.strptime(preprocessed_date, time_format)
+        })
     if "ModDate" in pdf.metadata:
         preprocessed_date = _time_preprocess(pdf.metadata["ModDate"])
         time_format = _get_datetime_format(preprocessed_date)
-        file_meta_params.update(
-            {"modified_date": datetime.strptime(preprocessed_date, time_format)}
-        )
+        file_meta_params.update({
+            "modified_date": datetime.strptime(preprocessed_date, time_format)
+        })
     if "Trapped" in pdf.metadata:
         file_meta_params.update({"trapped": pdf.metadata["Trapped"]})
 
@@ -705,7 +706,7 @@ def extract_rects(
                 )
 
                 LOG.info(
-                    f"found rect at {rect_bbox} at page {idx_page+1}: color {non_stroking_color}"
+                    f"found rect at {rect_bbox} at page {idx_page + 1}: color {non_stroking_color}"
                 )
                 lt_textbox = lt_textbox_crop(
                     rect_bbox,
@@ -722,7 +723,9 @@ def extract_rects(
                 rect_list.append(rect)
 
         else:
-            LOG.info(f"found no rects on page {idx_page+1}: {page_crop.objects.keys()}")
+            LOG.info(
+                f"found no rects on page {idx_page + 1}: {page_crop.objects.keys()}"
+            )
 
     # return figure_list
     return rect_list

diff --git a/libpdf/models/element.py b/libpdf/models/element.py
@@ -1,4 +1,5 @@
 """Definition for PDF elements."""
+
 from abc import ABC, abstractmethod
 from typing import TYPE_CHECKING
 

diff --git a/libpdf/models/figure.py b/libpdf/models/figure.py
@@ -1,4 +1,5 @@
 """Definition for PDF figures."""
+
 from typing import TYPE_CHECKING, List
 
 from libpdf.models.element import Element

diff --git a/libpdf/models/file.py b/libpdf/models/file.py
@@ -1,4 +1,5 @@
 """Definition for PDF file."""
+
 from typing import TYPE_CHECKING
 
 from libpdf.models.file_meta import FileMeta

diff --git a/libpdf/models/file_meta.py b/libpdf/models/file_meta.py
@@ -1,4 +1,5 @@
 """Definition for PDF file meta data."""
+
 from datetime import datetime
 from typing import TYPE_CHECKING
 

diff --git a/libpdf/models/horizontal_box.py b/libpdf/models/horizontal_box.py
@@ -1,6 +1,6 @@
 """Definition of HorizontalBox to contain text in the PDF."""
 
-from typing import List
+from __future__ import annotations
 
 
 class Char:  # pylint: disable=too-few-public-methods # simplicity is good.
@@ -11,30 +11,32 @@ class Char:  # pylint: disable=too-few-public-methods # simplicity is good.
     :vartype text: str
     :ivar x0: distance from the left of the page to the left edge of the character
     :vartype x0: float
-    :ivar y0: distance from the bottom of the page to the lower edge of the character (less than y1)
+    :ivar y0: distance from the bottom of the page to the lower edge of the character
+        (less than y1)
     :vartype y0: float
     :ivar x1: distance from the left of the page to the right edge of the character
     :vartype x1: float
-    :ivar y1: distance from the bottom of the page to the upper edge of the character (greater than y0)
+    :ivar y1: distance from the bottom of the page to the upper edge of the character
+        (greater than y0)
     :vartype y1: float
     """
 
     def __init__(
         self,
         text: str,
-        x0: float = None,
-        y0: float = None,
-        x1: float = None,
-        y1: float = None,
+        x0: float | None = None,
+        y0: float | None = None,
+        x1: float | None = None,
+        y1: float | None = None,
     ):
-        """Init the class with plain char of a character and its rectangular coordinates."""
+        """Init with plain char of a character and its rectangular coordinates."""
         self.x0 = x0
         self.y0 = y0
         self.x1 = x1
         self.y1 = y1
         self.text = text
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         """Make the text part of the repr for better debugging."""
         return f"{type(self).__name__}({self.text})"
 
@@ -51,11 +53,11 @@ class Word:
 
     def __init__(
         self,
-        chars: List[Char],
-        x0: float = None,
-        y0: float = None,
-        x1: float = None,
-        y1: float = None,
+        chars: list[Char],
+        x0: float | None = None,
+        y0: float | None = None,
+        x1: float | None = None,
+        y1: float | None = None,
     ):
         """Init the class with plain text of a word and its rectangular coordinates."""
         self.x0 = x0
@@ -71,11 +73,11 @@ def __init__(
             self.y1 = max(text_obj.y1 for text_obj in self.chars)
 
     @property
-    def text(self):
+    def text(self) -> str:
         """Return plain text."""
         return "".join([x.text for x in self.chars])
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         """Make the text part of the repr for better debugging."""
         return f"{type(self).__name__}({self.text})"
 
@@ -92,13 +94,13 @@ class HorizontalLine:
 
     def __init__(
         self,
-        words: List[Word],
-        x0: float = None,
-        y0: float = None,
-        x1: float = None,
-        y1: float = None,
+        words: list[Word],
+        x0: float | None = None,
+        y0: float | None = None,
+        x1: float | None = None,
+        y1: float | None = None,
     ):
-        """Init the class with plain text of a horizontal line and its rectangular coordinates."""
+        """Init with plain text of a horizontal line and its rectangular coordinates."""
         self.x0 = x0
         self.y0 = y0
         self.x1 = x1
@@ -112,11 +114,11 @@ def __init__(
             self.y1 = max(text_obj.y1 for text_obj in self.words)
 
     @property
-    def text(self):
+    def text(self) -> str:
         """Return plain text."""
         return " ".join([x.text for x in self.words])
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         """Make the text part of the repr for better debugging."""
         return f"{type(self).__name__}({self.text})"
 
@@ -133,13 +135,13 @@ class HorizontalBox:
 
     def __init__(
         self,
-        lines: List[HorizontalLine],
-        x0: float = None,
-        y0: float = None,
-        x1: float = None,
-        y1: float = None,
+        lines: list[HorizontalLine],
+        x0: float | None = None,
+        y0: float | None = None,
+        x1: float | None = None,
+        y1: float | None = None,
     ):
-        """Init the class with plain text of a horizontal box and its rectangular coordinates."""
+        """Init with plain text of a horizontal box and its rectangular coordinates."""
         self.x0 = x0
         self.y0 = y0
         self.x1 = x1
@@ -153,11 +155,11 @@ def __init__(
             self.y1 = max(text_obj.y1 for text_obj in self.lines)
 
     @property
-    def text(self):
+    def text(self) -> str:
         """Return plain text."""
         return "\n".join([x.text for x in self.lines])
 
-    def __repr__(self):
+    def __repr__(self) -> str | None:
         """Make the text part of the repr for better debugging."""
         if self.lines:
             return f"{type(self).__name__}({self.text})"

diff --git a/libpdf/models/link.py b/libpdf/models/link.py
@@ -1,4 +1,5 @@
 """Definition for PDF linked text."""
+
 from typing import TYPE_CHECKING, Dict, Union
 
 from libpdf.models.model_base import ModelBase

diff --git a/libpdf/models/model_base.py b/libpdf/models/model_base.py
@@ -1,4 +1,5 @@
 """Base class for all PDF model classes."""
+
 import logging
 
 LOG = logging.getLogger(__name__)

diff --git a/libpdf/models/page.py b/libpdf/models/page.py
@@ -1,4 +1,5 @@
 """Definition for PDF pages."""
+
 from typing import TYPE_CHECKING, List, Union
 
 from libpdf.models.model_base import ModelBase

diff --git a/libpdf/models/paragraph.py b/libpdf/models/paragraph.py
@@ -1,4 +1,5 @@
 """Definition for PDF textblocks."""
+
 from typing import TYPE_CHECKING, List
 
 from libpdf.models.element import Element

diff --git a/libpdf/models/position.py b/libpdf/models/position.py
@@ -1,4 +1,5 @@
 """Definition of positions in the PDF."""
+
 from typing import TYPE_CHECKING
 
 from libpdf.parameters import TARGET_COOR_TOLERANCE

diff --git a/libpdf/models/rect.py b/libpdf/models/rect.py
@@ -1,4 +1,5 @@
 """Definition for PDF rects."""
+
 from __future__ import annotations
 
 from typing import TYPE_CHECKING

diff --git a/libpdf/models/root.py b/libpdf/models/root.py
@@ -1,4 +1,5 @@
 """Definition for PDF root element."""
+
 from typing import List, Union
 
 from libpdf.models.chapter import Chapter

diff --git a/libpdf/models/table.py b/libpdf/models/table.py
@@ -1,4 +1,5 @@
 """Definition for PDF tables."""
+
 from operator import attrgetter
 from typing import List
-Original file line number
+Diff line change
@@ Expand Up / @@ -9,6 +9,7 @@ @@
     These 2 methods take time, so below solution is a short-term workaround.
     """
     import os
     import sys
@@ Expand Down @@