Skip to content

Commit

Permalink
fix disk-usage, allow override for skipping deleted files by default
Browse files Browse the repository at this point in the history
  • Loading branch information
chapmanjacobd committed Nov 11, 2024
1 parent b07cd8c commit 9acb9db
Show file tree
Hide file tree
Showing 9 changed files with 47 additions and 24 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ requires-python = ">=3.11"

[project.optional-dependencies]
dev = ["black", "isort", "ssort"]
test = ["ruff", "pytest", "pytest-regressions", "freezegun", "pyfakefs"]
test = ["ruff", "pytest", "pytest-regressions", "freezegun", "pandas", "pyfakefs"]
deluxe = [
"aiohttp",
"annoy",
Expand Down
3 changes: 2 additions & 1 deletion xklb/fsdb/disk_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@ def get_data(args) -> list[dict]:
if args.database:
media = list(args.db.query(*sqlgroups.fs_sql(args, limit=None)))
else:
args.paths = [p for p in args.paths if os.path.exists(p)]
if args.hide_deleted:
args.paths = [p for p in args.paths if os.path.exists(p)]
media = arg_utils.gen_d(args)
media = [d if "size" in d else file_utils.get_filesize(d) for d in media]

Expand Down
2 changes: 1 addition & 1 deletion xklb/mediadb/download_status.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def download_status() -> None:
extractor_stats[extractor_key]["never_attempted"] += 1

media = [{"extractor_key": extractor_key, **d} for extractor_key, d in extractor_stats.items()]
media = sorted(media, key=lambda x: (-x["never_attempted"], -x["retry_queued"], x["extractor_key"]))
media = sorted(media, key=lambda x: (-x["never_attempted"], -x["retry_queued"], x["extractor_key"] or 0))

media_printer.media_printer(args, media, units="extractors")

Expand Down
2 changes: 1 addition & 1 deletion xklb/mediafiles/images_to_pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
def parse_args() -> argparse.Namespace:
parser = argparse_utils.ArgumentParser(usage=usage.images_to_pdf)
parser.add_argument(
"--delete-original", action=argparse.BooleanOptionalAction, default=False, help="Delete source files"
"--delete-original", action=argparse.BooleanOptionalAction, default=False, help="Delete source images"
)

parser.add_argument("--output-path", "-o", help="Output PDF file (optional)")
Expand Down
14 changes: 7 additions & 7 deletions xklb/mediafiles/process_ffmpeg.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,20 +313,20 @@ def process_path(args, path, **kwargs):
output_path.name.replace(".%03d", ".000")
) # TODO: support / return multiple paths...

delete_original = args.delete_original
delete_larger = args.delete_larger
delete_transcode = False

if not output_path.exists():
return path if path.exists() else None

output_stats = output_path.stat()

# Never set delete_original to True. That setting comes from args and it is default True
# Never set delete_larger to True. That setting comes from args and it is default True
transcode_invalid = False
if output_stats.st_size == 0:
transcode_invalid = True
elif output_stats.st_size > original_stats.st_size:
delete_original = False
elif delete_larger and output_stats.st_size > original_stats.st_size:
delete_larger = False
delete_transcode = True
else:
try:
Expand All @@ -344,15 +344,15 @@ def process_path(args, path, **kwargs):
if args.delete_unplayable:
delete_transcode = False
else:
delete_original = False
delete_larger = False
delete_transcode = True
if video_stream and args.audio_only and not args.no_preserve_video:
delete_original = False
delete_larger = False

if delete_transcode:
output_path.unlink()
return path
elif delete_original:
elif delete_larger:
path.unlink()

os.utime(output_path, (original_stats.st_atime, original_stats.st_mtime))
Expand Down
6 changes: 3 additions & 3 deletions xklb/mediafiles/process_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def parse_args() -> argparse.Namespace:
parser.add_argument("--max-image-height", type=int, default=2400)
parser.add_argument("--max-image-width", type=int, default=2400)
parser.add_argument(
"--delete-original", action=argparse.BooleanOptionalAction, default=True, help="Delete source files"
"--delete-larger", "--delete-original", action=argparse.BooleanOptionalAction, default=True, help="Delete larger of transcode or original files"
)
parser.add_argument("--clean-path", action=argparse.BooleanOptionalAction, default=True, help="Clean output path")
arggroups.clobber(parser)
Expand Down Expand Up @@ -96,11 +96,11 @@ def process_path(args, path):
if not output_path.exists():
return path if path.exists else None

if original_stats.st_size > 0 and output_path.stat().st_size > original_stats.st_size:
if original_stats.st_size > 0 and args.delete_larger and output_path.stat().st_size > original_stats.st_size:
output_path.unlink() # Remove transcode
return path
else:
if args.delete_original:
if args.delete_larger:
path.unlink() # Remove original
os.utime(output_path, (original_stats.st_atime, original_stats.st_mtime))

Expand Down
10 changes: 5 additions & 5 deletions xklb/mediafiles/process_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def parse_args() -> argparse.Namespace:
parser.add_argument("--max-image-height", type=int, default=2400)
parser.add_argument("--max-image-width", type=int, default=2400)
parser.add_argument(
"--delete-original", action=argparse.BooleanOptionalAction, default=True, help="Delete source files"
"--delete-larger", "--delete-original", action=argparse.BooleanOptionalAction, default=True, help="Delete larger of transcode or original files"
)
parser.add_argument("--clean-path", action=argparse.BooleanOptionalAction, default=True, help="Clean output path")
arggroups.clobber(parser)
Expand Down Expand Up @@ -94,7 +94,7 @@ def convert_to_text_pdf(args, path):
log.warning("[%s]: Could not run OCR. %s", path, e)
else:
if os.path.exists(pdf_path):
if args.delete_original and not os.path.samefile(path, pdf_path):
if args.delete_larger and not os.path.samefile(path, pdf_path):
os.unlink(path)
path = pdf_path

Expand Down Expand Up @@ -201,7 +201,7 @@ def process_path(args, path):
image_paths = file_utils.rglob(str(output_path), consts.IMAGE_EXTENSIONS, quiet=True)[0]

mp_image_args = argparse.Namespace(
**{k: v for k, v in args.__dict__.items() if k not in {"db"}} | {"delete_original": True}
**{k: v for k, v in args.__dict__.items() if k not in {"db"}} | {"delete_larger": True}
)
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = {
Expand All @@ -218,11 +218,11 @@ def process_path(args, path):
update_references(text_path, replacements)

# compare final output size
if path_utils.folder_size(output_path) > original_stats.st_size:
if args.delete_larger and path_utils.folder_size(output_path) > original_stats.st_size:
devices.rmtree(args, output_path) # Remove transcode
return path

if args.delete_original:
if args.delete_larger:
path.unlink() # Remove original
path_utils.folder_utime(output_path, (original_stats.st_atime, original_stats.st_mtime))

Expand Down
30 changes: 26 additions & 4 deletions xklb/utils/arg_utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import argparse, operator, random
import argparse, operator, os.path, random
from collections import defaultdict
from copy import copy
from pathlib import Path
from typing import Generator

from xklb.utils import consts, file_utils, iterables, nums, processes, strings
from xklb.utils.consts import SC
from xklb.utils.log_utils import log


def gen_paths(args, default_exts=None):
Expand All @@ -15,7 +17,8 @@ def gen_paths(args, default_exts=None):
for path in args.paths:
json_data = strings.safe_json_loads(path)
if isinstance(json_data, list):
yield from (d["path"] for d in json_data)
for d in json_data:
yield d["path"]
elif isinstance(json_data, dict):
yield json_data["path"]
else:
Expand All @@ -26,6 +29,11 @@ def gen_paths(args, default_exts=None):
p = Path(path)
if p.is_dir():
yield from file_utils.rglob(str(p), args.ext or default_exts, getattr(args, "exclude", None))[0]
elif args.hide_deleted:
if os.path.exists(p):
yield path
else:
log.info("Skipping non-existent file %s", path)
else:
yield path

Expand All @@ -38,9 +46,18 @@ def gen_d(args, default_exts=None):
for path in args.paths:
json_data = strings.safe_json_loads(path)
if isinstance(json_data, list):
yield from json_data
for json_item in json_data:
if args.hide_deleted:
if os.path.exists(json_item["path"]):
yield json_item
else:
yield json_item
elif isinstance(json_data, dict):
yield json_data
if args.hide_deleted:
if os.path.exists(json_data["path"]):
yield json_data
else:
yield json_data
else:
raise TypeError
else:
Expand All @@ -50,6 +67,11 @@ def gen_d(args, default_exts=None):
if p.is_dir():
for sp in file_utils.rglob(str(p), args.ext or default_exts, getattr(args, "exclude", None))[0]:
yield {"path": sp}
elif args.hide_deleted:
if os.path.exists(p):
yield {"path": path}
else:
log.info("Skipping non-existent file %s", path)
else:
yield {"path": path}

Expand Down
2 changes: 1 addition & 1 deletion xklb/utils/arggroups.py
Original file line number Diff line number Diff line change
Expand Up @@ -1327,7 +1327,7 @@ def process_ffmpeg(parent_parser):
"--delete-no-audio", action="store_true", help="Delete files with no audio instead of transcoding video"
)
parser.add_argument(
"--delete-original", action=argparse.BooleanOptionalAction, default=True, help="Delete source files"
"--delete-larger", "--delete-original", action=argparse.BooleanOptionalAction, default=True, help="Delete larger of transcode or original files"
)
parser.add_argument("--clean-path", action=argparse.BooleanOptionalAction, default=True, help="Clean output path")

Expand Down

0 comments on commit 9acb9db

Please sign in to comment.