diff --git a/poetry.lock b/poetry.lock index be722e5..8c37a31 100644 --- a/poetry.lock +++ b/poetry.lock @@ -174,13 +174,13 @@ files = [ [[package]] name = "kfsmedia" -version = "2.0.3" +version = "2.1.0" description = "" optional = false python-versions = ">=3.11.0,<4.0.0" files = [ - {file = "kfsmedia-2.0.3-py3-none-any.whl", hash = "sha256:cfe628e721778faa3544822d24aa2a02c72d8fb0175c0e2b8aed5e3b71e5e36f"}, - {file = "kfsmedia-2.0.3.tar.gz", hash = "sha256:ac3ff2830686d87b16d57bfa399715ecc892da606d6623d8b859f53e0708d865"}, + {file = "kfsmedia-2.1.0-py3-none-any.whl", hash = "sha256:7140c2d69de750a8b9ff974c293ed57995250364c3975a06269c24a6c4911440"}, + {file = "kfsmedia-2.1.0.tar.gz", hash = "sha256:4ed77987765a4bea3cf1a59cff6366a0eded0e549c2e33814a1ab3c6b2cea750"}, ] [package.dependencies] @@ -297,4 +297,4 @@ zstd = ["zstandard (>=0.18.0)"] [metadata] lock-version = "2.0" python-versions = "^3.11.0" -content-hash = "c2068853ded30292e9a22e717981594d39976b0cca6689a518628ff2b84d86e4" +content-hash = "64f4f39eee8a97ef816eedae53c24e5b8d8a4d68f8270f3de7dd98556cedd133" diff --git a/pyproject.toml b/pyproject.toml index 0fd82b3..0f4aee8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,13 +5,13 @@ license = "MIT" name = "x" # can't leave empty because of bug with `poetry install` from poetry.lock file readme = "readme.md" repository = "https://github.com/9-FS/2021-11-15-nHentai-to-PDF" -version = "1.0.1" +version = "1.0.3" [tool.poetry.dependencies] kfsconfig = "^1.0.0" kfsfstr = "^1.0.0" kfslog = "^1.0.0" -kfsmedia = "^2.0.0" +kfsmedia = "^2.1.0" python = "^3.11.0" [build-system] diff --git a/requirements.txt b/requirements.txt index 3133207..e24790f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,7 @@ kfsconfig==1.0.2 ; python_full_version >= "3.11.0" and python_full_version < "4. kfsfstr==1.0.2 ; python_full_version >= "3.11.0" and python_full_version < "4.0.0" kfslog==1.0.1 ; python_full_version >= "3.11.0" and python_full_version < "4.0.0" kfsmath==1.0.1 ; python_full_version >= "3.11.0" and python_full_version < "4.0.0" -kfsmedia==2.0.3 ; python_full_version >= "3.11.0" and python_full_version < "4.0.0" +kfsmedia==2.1.0 ; python_full_version >= "3.11.0" and python_full_version < "4.0.0" pillow==10.0.1 ; python_full_version >= "3.11.0" and python_full_version < "4.0.0" requests==2.31.0 ; python_full_version >= "3.11.0" and python_full_version < "4.0.0" urllib3==2.0.5 ; python_full_version >= "3.11.0" and python_full_version < "4.0.0" diff --git a/src/Hentai.py b/src/Hentai.py index 6a96bab..f9cda44 100644 --- a/src/Hentai.py +++ b/src/Hentai.py @@ -95,27 +95,24 @@ def _increment_fails(self, image_list: list[str]) -> None: Takes list of filepaths that could not be downloaded or converted and increments appropiate failure counter. """ - # re_match_1: re.Match|None - re_match_2: re.Match|None - - - for image in image_list: - # re_match_1=re.match("https://nhentai.net/g/[0-9]+/(?P[0-9]+)/", image) # parse page number in URL - re_match_2=re.match("[0-9]+-(?P[0-9]+)", os.path.splitext(os.path.basename(image))[0]) # parse page number in filepath - - # if re_match_1!=None: - # self._fails[int(re_match_1.groupdict()["page_no"])-1]+=1 # increment appropiate counter - # if 10<=self._fails[int(re_match_1.groupdict()["page_no"])-1]: # if any counter 10 or above: give hentai up - # self._give_up=True - - if re_match_2!=None: - self._fails[int(re_match_2.groupdict()["page_no"])-1]+=1 # increment appropiate counter - if 10<=self._fails[int(re_match_2.groupdict()["page_no"])-1]: # if any counter 10 or above: give hentai up - self._give_up=True - - else: # if naming unexpected: skip - logging.error(f"Incrementing fails counter of \"{image}\" failed.") - continue + PATTERNS: list[str]=[ + r"^((?P[0-9]+)\.(jpg|png))$", # page URL pattern + r"^([0-9]+-(?P[0-9]+)\.(jpg|png))$", # image filepath pattern + ] + re_match: re.Match|None + + + for image in image_list: # for each image: + for pattern in PATTERNS: # with each pattern: + re_match=re.search(pattern, image.split("/")[-1]) # try to parse page number, use only filename not path + if re_match!=None: # if page number could be parsed: + self._fails[int(re_match.groupdict()["page_no"])-1]+=1 # increment appropiate fails counter + if 10<=self._fails[int(re_match.groupdict()["page_no"])-1]: # if any counter 10 or above: + self._give_up=True # give hentai up + break + else: # if page number can't be parsed: + logging.critical(f"Incrementing fails counter of \"{image}\" failed.") # don't know which counter to increment, critical error because should not happen + raise RuntimeError(f"Error in {self._increment_fails.__name__}{inspect.signature(self._increment_fails)}: Incrementing fails counter of \"{image}\" failed.") return @@ -156,23 +153,27 @@ def download(self) -> None: raise FileExistsError(f"File \"{PDF_filepath}\" already exists. Skipped download.") # raise exception to skip upload in main if os.path.isdir(PDF_filepath)==True: # if PDF already exists as directory: skip download, append to failures logging.error(f"\"{PDF_filepath}\" already exists as directory. Skipped download.") - raise self.DownloadError(f"Error in {self.download.__name__}{inspect.signature(self.download)}: \"{PDF_filepath}\" already exists as directory. Skipped download.") + raise KFSmedia.DownloadError(f"Error in {self.download.__name__}{inspect.signature(self.download)}: \"{PDF_filepath}\" already exists as directory. Skipped download.") - while self._give_up==False: # while not giving up: try to download and convert - KFSmedia.download_medias(pages_URL, images_filepath) # download images # type:ignore - + while self._give_up==False: # while not giving up: try to download and convert + try: + KFSmedia.download_medias(pages_URL, images_filepath) # download images # type:ignore + except KFSmedia.DownloadError as e: + self._increment_fails(e.args[0]) # increment fails, may trigger giving up + continue + try: KFSmedia.convert_images_to_PDF(images_filepath, PDF_filepath) # convert images to PDF except KFSmedia.ConversionError as e: - self._increment_fails(e.args[0]) # increment conversion fails, may trigger giving up + self._increment_fails(e.args[0]) # increment fails, may trigger giving up continue else: # if conversion successful: self.PDF_filepath=PDF_filepath # save PDF filepath break # break out else: # if giving up: - logging.error(f"Tried to convert hentai \"{self}\" several times, but failed. Giving up.") - raise self.DownloadError(f"Error in {self.download.__name__}{inspect.signature(self.download)}: Tried to convert hentai \"{self}\" several times, but failed. Giving up.") + logging.error(f"Tried to download and convert hentai \"{self}\" several times, but failed. Giving up.") + raise KFSmedia.DownloadError(f"Error in {self.download.__name__}{inspect.signature(self.download)}: Tried to download and convert hentai \"{self}\" several times, but failed. Giving up.") if os.path.isdir(f"./hentai/{self.ID}") and len(os.listdir(f"./hentai/{self.ID}"))==0: # if cache folder still exists and is empty: @@ -181,12 +182,4 @@ def download(self) -> None: except PermissionError: # may fail if another process is still using directory like dropbox pass # don't warn because will be retried in main - return - - - class DownloadError(Exception): - """ - Raised when self.download(...) fails. - """ - - pass \ No newline at end of file + return \ No newline at end of file diff --git a/src/main.py b/src/main.py index 499e9e7..44d85f3 100644 --- a/src/main.py +++ b/src/main.py @@ -3,6 +3,7 @@ from KFSconfig import KFSconfig from KFSfstr import KFSfstr from KFSlog import KFSlog +from KFSmedia import KFSmedia import logging import os from get_hentai_ID_list import get_hentai_ID_list @@ -48,7 +49,7 @@ def main(): hentai.download() # download hentai except FileExistsError: # if hentai already exists: continue # skip to next hentai - except Hentai.DownloadError: + except KFSmedia.DownloadError: with open("FAILURES.txt", "at") as fails_file: # append in failure file fails_file.write(f"{hentai.ID}\n") logging.info("--------------------------------------------------")