Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add utility function to compute/check ZIM descriptions #111

Merged
merged 7 commits into from
Aug 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,12 @@ All notable changes to this project are documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) (as of version 1.5.0).

## [3.1.1]
## [Unreleased]

### Added
- Add utility function to compute/check ZIM descriptions #110

## [3.1.1] - 2023-07-18

### Changed

Expand Down
1 change: 1 addition & 0 deletions src/zimscraperlib/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
),
}

RECOMMENDED_MAX_TITLE_LENGTH = 30
MAXIMUM_DESCRIPTION_METADATA_LENGTH = 80
MAXIMUM_LONG_DESCRIPTION_METADATA_LENGTH = 4000

Expand Down
59 changes: 57 additions & 2 deletions src/zimscraperlib/inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,18 @@
import pathlib
import shutil
import tempfile
from typing import Optional, Union
from typing import Optional, Tuple, Union

from . import logger
from .constants import MAXIMUM_DESCRIPTION_METADATA_LENGTH as MAX_DESC_LENGTH
from .constants import MAXIMUM_LONG_DESCRIPTION_METADATA_LENGTH as MAX_LONG_DESC_LENGTH
from .download import stream_file


def handle_user_provided_file(
source: Optional[Union[pathlib.Path, str]] = None,
dest: Optional[pathlib.Path] = None,
in_dir: pathlib.Path = None,
in_dir: Optional[pathlib.Path] = None,
nocopy: bool = False,
) -> Union[pathlib.Path, None]:
"""path to downloaded or copied a user provided file (URL or path)
Expand Down Expand Up @@ -49,3 +51,56 @@ def handle_user_provided_file(
shutil.copy(source, dest)

return dest


def compute_descriptions(
default_description: str,
user_description: Optional[str],
user_long_description: Optional[str],
) -> Tuple[str, Optional[str]]:
"""Computes short and long descriptions compliant with ZIM standard.

Based on provided parameters, the function computes a short and a long description
which are compliant with the ZIM standard (in terms of length).

User description(s) are used if set. They are checked to not exceed ZIM standard
maximum length ; an error is thrown otherwise ; if ok, they are returned.

If user_description is not set, the description is computed based on the default
description, truncated if needed.

If user_long_description is not set and default description is too long for the
description field, the long_description is computed based on the default description
(truncated if needed), otherwise no long description is returned.

args:
default_description: the description which will be used if user descriptions
are not set (typically fetched online)
user_description: the description set by the user (typically set by a
CLI argument)
user_long_description: the long description set by the user (typically set by a
CLI argument)

Returns a tuple of (description, long_description)
"""

if user_description and len(user_description) > MAX_DESC_LENGTH:
raise ValueError(
f"Description too long ({len(user_description)}>{MAX_DESC_LENGTH})"
)
if user_long_description and len(user_long_description) > MAX_LONG_DESC_LENGTH:
raise ValueError(
f"LongDescription too long ({len(user_long_description)}"
f">{MAX_LONG_DESC_LENGTH})"
)

if not user_long_description and len(default_description) > MAX_DESC_LENGTH:
user_long_description = default_description[0:MAX_LONG_DESC_LENGTH]
if len(default_description) > MAX_LONG_DESC_LENGTH:
user_long_description = user_long_description[:-1] + "…"
if not user_description:
user_description = default_description[0:MAX_DESC_LENGTH]
if len(default_description) > MAX_DESC_LENGTH:
user_description = user_description[:-1] + "…"

return (user_description, user_long_description)
3 changes: 2 additions & 1 deletion src/zimscraperlib/zim/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
MANDATORY_ZIM_METADATA_KEYS,
MAXIMUM_DESCRIPTION_METADATA_LENGTH,
MAXIMUM_LONG_DESCRIPTION_METADATA_LENGTH,
RECOMMENDED_MAX_TITLE_LENGTH,
)
from ..i18n import is_valid_iso_639_3
from ..image.probing import is_valid_image
Expand Down Expand Up @@ -40,7 +41,7 @@ def validate_standard_str_types(name: str, value: str):

def validate_title(name: str, value: str):
"""ensures Title metadata is within recommended length"""
if name == "Title" and len(value) > 30:
if name == "Title" and len(value) > RECOMMENDED_MAX_TITLE_LENGTH:
raise ValueError(f"{name} is too long.")


Expand Down
175 changes: 174 additions & 1 deletion tests/inputs/test_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,17 @@
# vim: ai ts=4 sts=4 et sw=4 nu

import pathlib
from typing import Optional

import pytest

from zimscraperlib.inputs import handle_user_provided_file
from zimscraperlib.constants import (
MAXIMUM_DESCRIPTION_METADATA_LENGTH as MAX_DESC_LENGTH,
)
from zimscraperlib.constants import (
MAXIMUM_LONG_DESCRIPTION_METADATA_LENGTH as MAX_LONG_DESC_LENGTH,
)
from zimscraperlib.inputs import compute_descriptions, handle_user_provided_file


def test_with_none():
Expand Down Expand Up @@ -72,3 +79,169 @@ def test_remote_indir(tmp_path, valid_http_url):
assert fpath is not None
assert fpath.exists()
assert fpath.parent == tmp_path


TEXT_NOT_USED = "text not used"

LONG_TEXT = (
"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor "
"incididunt ut labore et dolore magna aliqua. At erat pellentesque adipiscing "
"commodo elit at imperdiet. Rutrum tellus pellentesque eu tincidunt tortor aliquam"
" nulla facilisi. Eget lorem dolor sed viverra ipsum nunc. Ipsum nunc aliquet "
"bibendum enim facilisis gravida neque convallis. Aliquam malesuada bibendum arcu "
"vitae elementum curabitur. Platea dictumst quisque sagittis purus sit amet "
"volutpat. Blandit libero volutpat sed cras ornare. In eu mi bibendum neque "
"egestas. Egestas dui id ornare arcu odio. Pulvinar neque laoreet suspendisse "
"interdum. Fames ac turpis egestas integer eget aliquet nibh praesent tristique. Et"
" egestas quis ipsum suspendisse ultrices gravida dictum fusce. Malesuada fames ac "
"turpis egestas. Tincidunt nunc pulvinar sapien et ligula ullamcorper malesuada "
"proin libero. In arcu cursus euismod quis viverra. Faucibus in ornare quam viverra"
". Curabitur vitae nunc sed velit dignissim sodales ut eu sem. Velit scelerisque in"
" dictum non consectetur a erat nam. Proin fermentum leo vel orci porta non. Fames"
" ac turpis egestas sed tempus. Vitae justo eget magna fermentum iaculis eu non. "
"Imperdiet massa tincidunt nunc pulvinar sapien et ligula. Laoreet sit amet cursus "
"sit amet dictum sit amet. Quis hendrerit dolor magna eget. Orci ac auctor augue "
"mauris augue. Consequat interdum varius sit amet mattis. At ultrices mi tempus "
"imperdiet nulla malesuada pellentesque elit. Volutpat est velit egestas dui. "
"Potenti nullam ac tortor vitae. At tempor commodo ullamcorper a lacus vestibulum "
"sed arcu non. Duis ut diam quam nulla. Vestibulum mattis ullamcorper velit sed "
"ullamcorper. Sit amet commodo nulla facilisi nullam vehicula. Faucibus purus in "
"massa tempor nec feugiat. Sem fringilla ut morbi tincidunt augue interdum velit. "
"Etiam dignissim diam quis enim lobortis scelerisque fermentum dui. Nunc vel risus "
"commodo viverra maecenas accumsan. Aenean sed adipiscing diam donec adipiscing "
"tristique. Maecenas accumsan lacus vel facilisis volutpat est velit egestas. Nulla"
" aliquet porttitor lacus luctus accumsan tortor posuere ac. Habitant morbi "
"tristique senectus et netus et. Eget mi proin sed libero enim sed faucibus turpis "
"in. Vulputate enim nulla aliquet porttitor lacus. Dui ut ornare lectus sit amet "
"est. Quam lacus suspendisse faucibus interdum posuere. Sagittis orci a scelerisque"
" purus semper eget duis at tellus. Tellus molestie nunc non blandit massa. Feugiat"
" vivamus at augue eget arcu dictum varius duis at. Varius morbi enim nunc faucibus"
" a pellentesque sit. Id aliquet lectus proin nibh nisl condimentum id venenatis a."
" Tortor dignissim convallis aenean et tortor at risus viverra adipiscing. Aliquam "
"malesuada bibendum arcu vitae elementum curabitur vitae nunc sed. Habitasse platea"
" dictumst quisque sagittis purus sit amet volutpat. Vitae auctor eu augue ut "
"lectus. At varius vel pharetra vel turpis nunc eget. Dictum at tempor commodo "
"ullamcorper a lacus vestibulum sed arcu. Pellentesque massa placerat duis "
"ultricies. Enim nunc faucibus a pellentesque sit amet porttitor eget dolor. "
"Volutpat blandit aliquam etiam erat velit scelerisque in. Amet mattis vulputate "
"enim nulla aliquet porttitor. Egestas maecenas pharetra convallis posuere morbi "
"leo urna molestie. Duis ut diam quam nulla porttitor massa id. In fermentum "
"posuere urna nec tincidunt praesent. Turpis egestas sed tempus urna et pharetra "
"pharetra massa. Tellus molestie nunc non blandit massa. Diam phasellus vestibulum "
"lorem sed risus ultricies. Egestas erat imperdiet sed euismod nisi porta lorem. "
"Quam viverra orci sagittis eu volutpat odio facilisis mauris sit. Ornare aenean "
"euismod elementum nisi quis. Laoreet non curabitur gravida arcu ac tortor "
"dignissim convallis aenean. Sagittis aliquam malesuada bibendum arcu vitae "
"elementum. Sed blandit libero volutpat sed cras ornare. Sagittis eu volutpat odio "
"facilisis mauris. Facilisis volutpat est velit egestas dui id ornare arcu odio. "
"Eu feugiat pretium nibh."
)


@pytest.mark.parametrize(
"user_description, user_long_description, default_description, raises, "
"expected_description, expected_long_description",
[
# user description set and is short, user long descripion not set, default
# description doe not matter
(
LONG_TEXT[0:MAX_DESC_LENGTH],
None,
TEXT_NOT_USED,
False,
LONG_TEXT[0:MAX_DESC_LENGTH],
None,
),
# user description set and is too long, default description does not matter
(LONG_TEXT[0 : MAX_DESC_LENGTH + 1], None, TEXT_NOT_USED, True, None, None),
# user description not set and default description is short enough
(
None,
None,
LONG_TEXT[0:MAX_DESC_LENGTH],
False,
LONG_TEXT[0:MAX_DESC_LENGTH],
None,
),
# user description not set and default description is too long for description
# but ok for long description
(
None,
None,
LONG_TEXT[0 : MAX_DESC_LENGTH + 1],
False,
LONG_TEXT[0 : MAX_DESC_LENGTH - 1] + "…",
LONG_TEXT[0 : MAX_DESC_LENGTH + 1],
),
(
None,
None,
LONG_TEXT[0:MAX_LONG_DESC_LENGTH],
False,
LONG_TEXT[0 : MAX_DESC_LENGTH - 1] + "…",
LONG_TEXT[0:MAX_LONG_DESC_LENGTH],
),
# user description not set and default description is too long for description
# and long description
(
None,
None,
LONG_TEXT[0 : MAX_LONG_DESC_LENGTH + 1],
False,
LONG_TEXT[0 : MAX_DESC_LENGTH - 1] + "…",
LONG_TEXT[0 : MAX_LONG_DESC_LENGTH - 1] + "…",
),
# user description set and is short, user long descripion set and is short,
# default description does not matter
(
LONG_TEXT[0:MAX_DESC_LENGTH],
LONG_TEXT[0:MAX_LONG_DESC_LENGTH],
TEXT_NOT_USED,
False,
LONG_TEXT[0:MAX_DESC_LENGTH],
LONG_TEXT[0:MAX_LONG_DESC_LENGTH],
),
# user description set and is short, user long descripion set and is too long,
# default description does not matter
(
LONG_TEXT[0:MAX_DESC_LENGTH],
LONG_TEXT[0 : MAX_LONG_DESC_LENGTH + 1],
TEXT_NOT_USED,
True,
None,
None,
),
# user description not set, user long descripion set and is short,
# default description set to something different than long desc
(
None,
LONG_TEXT[0:MAX_LONG_DESC_LENGTH],
LONG_TEXT[10:MAX_LONG_DESC_LENGTH],
False,
LONG_TEXT[10 : MAX_DESC_LENGTH + 9] + "…",
LONG_TEXT[0:MAX_LONG_DESC_LENGTH],
),
],
)
def test_description(
user_description: str,
user_long_description: Optional[str],
default_description: str,
*,
raises: bool,
expected_description: str,
expected_long_description: str,
):
if raises:
with pytest.raises(ValueError):
compute_descriptions(
default_description, user_description, user_long_description
)
return
else:
(description, long_description) = compute_descriptions(
default_description, user_description, user_long_description
)

assert description == expected_description
assert long_description == expected_long_description
3 changes: 2 additions & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,15 @@ deps =
pytest-cov
-r{toxinidir}/requirements.txt
commands =
pytest --runslow --runinstalled --cov=zimscraperlib --cov-report=term --cov-report term-missing
pytest --runslow --runinstalled --cov=zimscraperlib --cov-report=term --cov-report term-missing {posargs}

[testenv:black]
deps =
black>=23.1.0,<24
commands = black --check .

[testenv:isort]
base_python=py38
deps =
isort>=5.12.0,<5.13
commands = isort --profile black --check src tests
Expand Down