Skip to content

Commit

Permalink
Merge branch 'main' into hf-kklein-patch-4
Browse files Browse the repository at this point in the history
  • Loading branch information
hf-kklein authored Oct 30, 2024
2 parents 75da354 + f6dd8aa commit 4106cb5
Show file tree
Hide file tree
Showing 12 changed files with 2,461 additions and 21 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -132,3 +132,7 @@ dmypy.json

# vscode settings
.vscode/

# version number for ebdamame; gets auto-generated during the command
# python -m build
src/ebdamame/version.py
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
# to update all repo revisions just run: pre-commit autoupdate
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.3.0
rev: v5.0.0
hooks:
- id: check-yaml
- id: end-of-file-fixer
- id: trailing-whitespace
- repo: https://github.com/psf/black
rev: 22.10.0
rev: 24.10.0
hooks:
- id: black
language_version: python3
- repo: https://github.com/pycqa/isort
rev: 5.10.1
rev: 5.13.2
hooks:
- id: isort
name: isort (python)
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# ebdamame

[![License: GPL](https://img.shields.io/badge/License-GPL-yellow.svg)](LICENSE)
![Python Versions (officially) supported](https://img.shields.io/pypi/pyversions/ebdamame.svg)
![Unittests status badge](https://github.com/Hochfrequenz/ebdamame/workflows/Unittests/badge.svg)
![Coverage status badge](https://github.com/Hochfrequenz/ebdamame/workflows/Coverage/badge.svg)
![Linting status badge](https://github.com/Hochfrequenz/ebdamame/workflows/Linting/badge.svg)
Expand Down
11 changes: 8 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ dynamic = ["readme", "version"]

[project.optional-dependencies]
coverage = [
"coverage==7.6.3"
"coverage==7.6.4"
]
formatting = [
"black==24.10.0",
Expand All @@ -47,10 +47,11 @@ test_packaging = [
tests = [
"pytest==8.3.3",
"pytest-datafiles==3.0.0",
"pytest-subtests==0.13.1"
"pytest-subtests==0.13.1",
"syrupy==4.7.2"
]
type_check = [
"mypy==1.11.2"
"mypy==1.13.0"
]

[project.urls]
Expand Down Expand Up @@ -96,3 +97,7 @@ exclude = ["/unittests"]
[tool.hatch.build.targets.wheel]
only-include = ["src"]
sources = ["src"]

[tool.pytest.ini_options]
pythonpath = ["."]
markers = ["snapshot: mark a test as a snapshot test"]
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ charset-normalizer==3.3.2
# via requests
click==8.1.7
# via -r requirements.in
colorama==0.4.6
# via click
idna==3.7
# via requests
lxml==5.2.1
Expand All @@ -30,7 +32,7 @@ networkx==3.3
# via rebdhuhn
python-docx==1.1.2
# via -r requirements.in
rebdhuhn==0.3.0
rebdhuhn==0.3.1
# via -r requirements.in
requests==2.32.0
# via rebdhuhn
Expand Down
5 changes: 4 additions & 1 deletion src/ebdamame/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def _get_tables_and_paragraphs(document: DocumentType) -> Generator[Union[Table,


_ebd_key_pattern = re.compile(r"^E_\d{4}$")
_ebd_key_with_heading_pattern = re.compile(r"^(?P<key>E_\d{4})_(?P<title>.*)\s*$")
_ebd_key_with_heading_pattern = re.compile(r"^(?P<key>E_\d{4})_?(?P<title>.*)\s*$")


class TableNotFoundError(Exception):
Expand Down Expand Up @@ -255,6 +255,9 @@ def get_all_ebd_keys(docx_file_path: Path) -> Dict[str, Tuple[str, EbdChapterInf
for paragraph, ebd_kapitel in _enrich_paragraphs_with_sections(document.paragraphs):
match = _ebd_key_with_heading_pattern.match(paragraph.text)
if match is None:
contains_ebd_number = paragraph.text.lstrip().startswith("E_")
if contains_ebd_number:
_logger.warning("Found EBD number but could not match: '%s'", paragraph.text)
continue
ebd_key = match.groupdict()["key"]
title = match.groupdict()["title"]
Expand Down
8 changes: 6 additions & 2 deletions src/ebdamame/docxtableconverter.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,10 @@ def __init__(self, docx_tables: List[Table], ebd_key: str, chapter: str, sub_cha
# remove duplicates. Although there are usually only 5 columns visible, technically there might be even 8.
# In these cases (e.g. for E_0453) columns like 'Prüfergebnis' simply occur twice in the docx table header.
distinct_cell_texts: List[str] = [
x[0] for x in groupby(first(docx_tables).row_cells(row_index), lambda cell: cell.text)
x[0]
for x in groupby(
first(docx_tables).rows[row_index].cells, lambda cell: cell.text
) # row_cells() is deprecated and returns false rows
]
for column_index, table_cell_text in enumerate(distinct_cell_texts):
if row_index == 0 and _is_pruefende_rolle_cell_text(table_cell_text):
Expand All @@ -188,7 +191,8 @@ def __init__(self, docx_tables: List[Table], ebd_key: str, chapter: str, sub_cha
self._column_index_result_code = column_index
elif table_cell_text == "Hinweis":
self._column_index_note = column_index

# if not self._column_index_step_number:
# self._column_index_step_number = 0
self._metadata = EbdTableMetaData(ebd_code=ebd_key, sub_chapter=sub_chapter, chapter=chapter, role=role)

@staticmethod
Expand Down
8 changes: 8 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,14 @@ deps =
setenv = PYTHONPATH = {toxinidir}/src
commands = python -m pytest --basetemp={envtmpdir} {posargs}

[testenv:snapshots]
# the tests environment is called by the Github action that runs the unit tests
deps =
-r requirements.txt
.[tests]
setenv = PYTHONPATH = {toxinidir}/src
commands = python -m pytest -m snapshot --basetemp={envtmpdir} {posargs} --snapshot-update

[testenv:linting]
# the linting environment is called by the Github Action that runs the linter
deps =
Expand Down
Loading

0 comments on commit 4106cb5

Please sign in to comment.