Skip to content

Commit

Permalink
Merge branch 'release/0.5.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
stumpylog committed Nov 7, 2023
2 parents f65968a + feac0d8 commit 8ba7709
Show file tree
Hide file tree
Showing 11 changed files with 428 additions and 248 deletions.
2 changes: 1 addition & 1 deletion .docker/docker-compose.ci-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
version: "3"
services:
tika:
image: ghcr.io/paperless-ngx/tika:latest
image: docker.io/apache/tika:latest
hostname: tika
container_name: tika
network_mode: host
Expand Down
22 changes: 13 additions & 9 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
contents: read
steps:
-
uses: actions/checkout@v3
uses: actions/checkout@v4
-
name: Set up Python 3.10
uses: actions/setup-python@v4
Expand Down Expand Up @@ -52,11 +52,11 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ['3.8', '3.9', '3.10', '3.11', '3.12-dev', 'pypy3.8', 'pypy3.9']
python-version: ['3.8', '3.9', '3.10', '3.11', '3.12', 'pypy3.8', 'pypy3.9', 'pypy3.10']

steps:
-
uses: actions/checkout@v3
uses: actions/checkout@v4
-
name: Start containers
run: |
Expand All @@ -74,6 +74,10 @@ jobs:
name: Install Hatch
run: |
pip3 --quiet install --upgrade hatch
-
name: List installed dependencies
run: |
hatch run pip-list
-
name: Run tests
run: |
Expand Down Expand Up @@ -101,7 +105,7 @@ jobs:
- lint
steps:
-
uses: actions/checkout@v3
uses: actions/checkout@v4
-
name: Set up Python 3.10
uses: actions/setup-python@v4
Expand All @@ -119,7 +123,7 @@ jobs:
-
uses: actions/upload-artifact@v3
with:
name: artifacts
name: build-artifacts
path: dist/*
if-no-files-found: error
retention-days: 7
Expand All @@ -135,11 +139,11 @@ jobs:
- test
steps:
-
uses: actions/checkout@v3
uses: actions/checkout@v4
-
uses: actions/download-artifact@v3
with:
name: artifacts
name: build-artifacts
path: dist
-
name: Get latest release info
Expand Down Expand Up @@ -173,8 +177,8 @@ jobs:
-
uses: actions/download-artifact@v3
with:
name: artifacts
name: build-artifacts
path: dist
-
name: Publish build to PyPI
uses: pypa/[email protected].8
uses: pypa/[email protected].10
2 changes: 1 addition & 1 deletion .github/workflows/codeql.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ jobs:

steps:
- name: Checkout repository
uses: actions/checkout@v3
uses: actions/checkout@v4

# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
Expand Down
10 changes: 5 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
repos:
# General hooks
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
rev: v4.5.0
hooks:
- id: check-docstring-first
- id: check-json
Expand All @@ -27,7 +27,7 @@ repos:
- id: check-case-conflict
- id: detect-private-key
- repo: https://github.com/pre-commit/mirrors-prettier
rev: 'v3.0.0'
rev: 'v3.0.3'
hooks:
- id: prettier
types_or:
Expand All @@ -36,11 +36,11 @@ repos:
- markdown
exclude: "(^Pipfile\\.lock$)"
# Python hooks
- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: 'v0.0.280'
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: 'v0.1.4'
hooks:
- id: ruff
- repo: https://github.com/psf/black
rev: 23.7.0
rev: 23.10.1
hooks:
- id: black
14 changes: 14 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,20 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.5.0] - 2023-11-07

### Added

- Testing on PyPy 3.10
- Testing on released Python 3.12

### Changed

- `.github` and `.docker` folders are no longer included in the source distribution
- Changed the license to Mozilla Public License Version 2.0
- `pypa/gh-action-pypi-publish` updated to v1.8.10
- CI testing now uses the official Apache Tika image (minimal) instead of the paperless-ngx image

## [0.4.0] - 2023-07-27

### Added
Expand Down
549 changes: 345 additions & 204 deletions LICENSE.txt

Large diffs are not rendered by default.

66 changes: 45 additions & 21 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,17 @@ build-backend = "hatchling.build"
[project]
name = "tika-client"
dynamic = ["version"]
description = 'A modern REST client for Apache Tika server'
description = "A modern REST client for Apache Tika server"
readme = "README.md"
requires-python = ">=3.8"
license = "GPL-3.0-only"
license = "MPL-2.0"
keywords = []
authors = [
{ name = "Trenton H", email = "[email protected].com" },
{ name = "Trenton H", email = "rda0128ou@mozmail.com" },
]
classifiers = [
"Development Status :: 4 - Beta",
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
"License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)",
"Operating System :: OS Independent",
"Intended Audience :: Developers",
"Environment :: Web Environment",
Expand All @@ -30,7 +30,10 @@ classifiers = [
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy"
]
dependencies = ["httpx ~= 0.24"]
dependencies = [
"httpx ~= 0.25; python_version >= '3.9'",
"httpx ~= 0.24; python_version < '3.9'"
]

[project.urls]
Documentation = "https://github.com/stumpylog/tika-rest-client#readme"
Expand All @@ -40,14 +43,21 @@ Source = "https://github.com/stumpylog/tika-rest-client"
[tool.hatch.version]
path = "src/tika_client/__about__.py"

[tool.hatch.build.targets.sdist]
exclude = [
".github",
".docker"
]

[tool.hatch.envs.default]
dependencies = [
"coverage[toml]>=6.5",
"pytest",
"coverage[toml] >= 7.3",
"pytest >= 7.4",
"pytest-sugar",
"pytest-cov",
"pytest-xdist",
"pytest-httpx",
"pytest-httpx ~= 0.26; python_version >= '3.9'",
"pytest-httpx ~= 0.22; python_version < '3.9'",
"python-magic",
]

Expand All @@ -69,16 +79,17 @@ cov = [
"cov-report",
"cov-json"
]
pip-list = "pip list"

[[tool.hatch.envs.all.matrix]]
python = ["3.8", "3.9", "3.10", "3.11"]

[tool.hatch.envs.lint]
detached = true
dependencies = [
"black>=23.1.0",
"mypy>=1.0.0",
"ruff>=0.0.243",
"black>=23.10.0",
"mypy>=1.5.0",
"ruff>=0.1.3",
"httpx",
]

Expand All @@ -103,43 +114,56 @@ target-version = ["py38"]
line-length = 120

[tool.ruff]
# https://beta.ruff.rs/docs/settings/
# https://docs.astral.sh/ruff/settings/
fix = true
format = "grouped"
output-format = "grouped"
target-version = "py38"
line-length = 120
# https://beta.ruff.rs/docs/rules/
# https://docs.astral.sh/ruff/rules/
extend-select = [
"A",
"ARG",
"B",
"C",
"COM",
"C4",
"COM",
"DTZ",
"E",
"EM",
"EXE",
"ERA",
"EXE",
"F",
"FBT",
"FLY",
"I",
"ICN",
"INP",
"INP",
"INT",
"ISC",
"N",
"PIE",
"PTH",
"PERF",
"PIE",
"PGH",
"PTH",
"PL",
"PLC",
"PLE",
"PLR",
"PLW",
"PT",
"Q",
"RSE",
"RSE",
"RUF",
"S",
"SIM",
"SIM",
"SLF",
"T",
"T10",
"T20",
"TCH",
"TD",
"TID",
"TRY",
"UP",
"W",
"YTT",
Expand Down
2 changes: 1 addition & 1 deletion src/tika_client/__about__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.4.0"
__version__ = "0.5.0"
5 changes: 1 addition & 4 deletions src/tika_client/_resource_recursive.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,7 @@ def _common_call(
"""
Given a specific endpoint and a file, do a multipart put to the endpoint
"""
documents: List[TikaResponse] = []
for item in self._put_multipart(endpoint, filepath, mime_type):
documents.append(self._decoded_response(item))
return documents
return [self._decoded_response(item) for item in self._put_multipart(endpoint, filepath, mime_type)]


class _RecursiveMetaHtml(_TikaRmetaBase):
Expand Down
2 changes: 1 addition & 1 deletion src/tika_client/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def _put_multipart(self, endpoint: str, filepath: Path, mime_type: MimeType = No
if mime_type is not None:
files = {"upload-file": (filepath.name, handle, mime_type)}
else:
files = {"upload-file": (filepath.name, handle)} # type: ignore
files = {"upload-file": (filepath.name, handle)} # type: ignore [dict-item]
try:
# Filename is valid ASCII, use it
filepath.name.encode("ascii")
Expand Down
2 changes: 1 addition & 1 deletion src/tika_client/data_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# Based on https://cwiki.apache.org/confluence/display/TIKA/Metadata+Overview

logger = logging.getLogger("tika-client.data")

_TIME_RE = re.compile(
r"(?P<year>\d{4})-"
r"(?P<month>\d{2})-"
Expand Down Expand Up @@ -87,7 +88,6 @@ def __init__(self, data: Dict) -> None:
self.title = self.get_optional_string(DublinCoreKey.Title)

# Xmp keys
# TODO: Implement more of these
self.xmp_created = self.get_optional_datetime(XmpKey.Created)
self.page_count = self.get_optional_int(XmpKey.NumPages)

Expand Down

0 comments on commit 8ba7709

Please sign in to comment.