Skip to content

Commit

Permalink
Merge pull request #292 from IMayBeABitShy/issue_290
Browse files Browse the repository at this point in the history
Upgrade python-scraperlib to 3.X (implement #290)
  • Loading branch information
benoit74 authored Mar 27, 2024
2 parents a8a7f32 + 677d51e commit 14fdc87
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 34 deletions.
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
kiwixstorage>=0.8.1,<1.0
pif>=0.8.2,<0.9
zimscraperlib>=2.1,<3.0
zimscraperlib>=3.3.0,<4.0
xml_to_dict>=0.1.6,<0.2
cli-formatter>=1.2.0,<1.3
py7zr>=0.20.4,<0.21
Expand Down
6 changes: 4 additions & 2 deletions src/sotoki/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,12 @@ class Sotoconf:
name: str
title: Optional[str] = ""
description: Optional[str] = ""
long_description: Optional[str] = ""
author: Optional[str] = ""
publisher: Optional[str] = ""
fname: Optional[str] = ""
tag: List[str] = field(default_factory=list)
flavour: Optional[str] = ""
iso_langs_1: List[str] = field(default_factory=list) # ISO-639-1
iso_langs_3: List[str] = field(default_factory=list) # ISO-639-3

Expand Down Expand Up @@ -193,9 +195,9 @@ def __post_init__(self):
self.dump_domain = self.domain # dumps are named after unfixed domains
self.domain = FIXED_DOMAINS.get(self.domain, self.domain)
self.iso_langs_1, self.iso_langs_3 = langs_for_domain(self.domain)
variant = "nopic" if self.without_images else "all"
self.flavour = "nopic" if self.without_images else "all"
lang_in_name = self.iso_langs_1[0] if len(self.iso_langs_1) == 1 else "mul"
self.name = self.name or f"{self.domain}_{lang_in_name}_{variant}"
self.name = self.name or f"{self.domain}_{lang_in_name}_{self.flavour}"
self.output_dir = pathlib.Path(self._output_dir).expanduser().resolve()
self.output_dir.mkdir(parents=True, exist_ok=True)
self.tmp_dir = pathlib.Path(self._tmp_dir).expanduser().resolve()
Expand Down
5 changes: 5 additions & 0 deletions src/sotoki/entrypoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,11 @@ def main():
"--description",
help="Custom description for your ZIM. Site tagline otherwise",
)
metadata.add_argument(
"--long-description",
help="Custom long description for your ZIM, defaults to description if description is too long",
required=False,
)

metadata.add_argument(
"--favicon",
Expand Down
28 changes: 7 additions & 21 deletions src/sotoki/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import datetime

from zimscraperlib.zim.items import URLItem
from zimscraperlib.inputs import handle_user_provided_file
from zimscraperlib.inputs import handle_user_provided_file, compute_descriptions
from zimscraperlib.image.convertion import convert_image
from zimscraperlib.image.transformation import resize_image

Expand Down Expand Up @@ -77,9 +77,12 @@ def sanitize_inputs(self):
self.conf.title = Global.site["LongName"]
self.conf.title = self.conf.title.strip()

if not self.conf.description:
self.conf.description = Global.site["Tagline"]
self.conf.description = self.conf.description.strip()
default_description = Global.site["Tagline"].strip()
if self.conf.description:
user_description = self.conf.description.strip()
else:
user_description = None
self.conf.description, self.conf.long_description = compute_descriptions(default_description, user_description, self.conf.long_description)

if not self.conf.author:
self.conf.author = "Stack Exchange"
Expand All @@ -90,23 +93,6 @@ def sanitize_inputs(self):
self.conf.publisher = self.conf.publisher.strip()

def add_illustrations(self):
src_illus_fpath = self.build_dir / "illustration"

# if user provided a custom favicon, retrieve that
if not self.conf.favicon:
self.conf.favicon = Global.site["BadgeIconUrl"]
handle_user_provided_file(source=self.conf.favicon, dest=src_illus_fpath)

# convert to PNG (might already be PNG but it's OK)
illus_fpath = src_illus_fpath.with_suffix(".png")
convert_image(src_illus_fpath, illus_fpath)

# resize to appropriate size (ZIM uses 48x48 so we double for retina)
for size in (96, 48):
resize_image(illus_fpath, width=size, height=size, method="thumbnail")
with open(illus_fpath, "rb") as fh:
Global.creator.add_illustration(size, fh.read())

# download and add actual favicon (ICO file)
favicon_fpath = self.build_dir / "favicon.ico"
handle_user_provided_file(source=Global.site["IconUrl"], dest=favicon_fpath)
Expand Down
46 changes: 36 additions & 10 deletions src/sotoki/utils/shared.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,11 @@

from zimscraperlib.zim.creator import Creator
from zimscraperlib.logging import getLogger as lib_getLogger
from zimscraperlib.inputs import handle_user_provided_file
from zimscraperlib.image.convertion import convert_image
from zimscraperlib.image.transformation import resize_image

from ..constants import NAME
from ..constants import NAME, SCRAPER


class Global:
Expand Down Expand Up @@ -100,18 +103,41 @@ def setup():

Global.renderer = Renderer()

# load illustration data, required for creator metadata setup
# the following code section is taken from sotoki.scraper.add_illustrations()
src_illus_fpath = Global.conf.build_dir / "illustration"
if not Global.conf.favicon:
Global.conf.favicon = Global.site["BadgeIconUrl"]
handle_user_provided_file(source=Global.conf.favicon, dest=src_illus_fpath)

# convert to PNG (might already be PNG but it's OK)
illus_fpath = src_illus_fpath.with_suffix(".png")
convert_image(src_illus_fpath, illus_fpath)

# resize to appropriate size
resize_image(illus_fpath, width=48, height=48, method="thumbnail")
with open(illus_fpath, "rb") as fh:
illustration_data = fh.read()

Global.creator = Creator(
filename=Global.conf.output_dir.joinpath(Global.conf.fname),
main_path="questions",
favicon_path="illustration",
language=Global.conf.iso_langs_3,
title=Global.conf.title,
description=Global.conf.description,
creator=Global.conf.author,
publisher=Global.conf.publisher,
name=Global.conf.name,
tags=";".join(Global.conf.tags),
date=datetime.date.today(),
).config_metadata(
Name=Global.conf.name,
Language=",".join(Global.conf.iso_langs_3), # python-scraperlib needs language list as a single string
Title=Global.conf.title,
Description=Global.conf.description,
LongDescription=Global.conf.long_description,
Creator=Global.conf.author,
Publisher=Global.conf.publisher,
Date=datetime.date.today(),
Illustration_48x48_at_1=illustration_data,
Tags=Global.conf.tags,
Scraper=SCRAPER,
Flavour=Global.conf.flavour,
# Source=,
License="CC-BY-SA", # as per stack exchange ToS, see about page in ZIM
# Relation=,
).config_verbose(True)


Expand Down

0 comments on commit 14fdc87

Please sign in to comment.