From 6e8097d7e2fd1a7ec4cb94cddd7607fbda8c1f39 Mon Sep 17 00:00:00 2001 From: Siddhanth Rathod Date: Fri, 22 Sep 2023 01:50:15 +0530 Subject: [PATCH] Enhancement: eclean-dist handle git checkouts A new feature for eclean-dist to clean git3-src. Optionally, cleaning the git3-src can be skipped with --skip-git. Bug: https://bugs.gentoo.org/622938 Signed-off-by: Siddhanth Rathod --- pym/gentoolkit/eclean/clean.py | 35 +++++++++++++++++++++++++-- pym/gentoolkit/eclean/cli.py | 42 +++++++++++++++++++++++---------- pym/gentoolkit/eclean/output.py | 4 +++- pym/gentoolkit/eclean/search.py | 32 ++++++++++++++++++++++--- 4 files changed, 94 insertions(+), 19 deletions(-) diff --git a/pym/gentoolkit/eclean/clean.py b/pym/gentoolkit/eclean/clean.py index 87d7aaca..1d4f4c20 100644 --- a/pym/gentoolkit/eclean/clean.py +++ b/pym/gentoolkit/eclean/clean.py @@ -5,6 +5,7 @@ import os +import shutil import sys import gentoolkit.pprinter as pp @@ -23,7 +24,7 @@ def __init__(self, controller, quiet): self.controller = controller self.quiet = quiet - def clean_dist(self, clean_dict): + def clean_dist(self, clean_dict, git): """Calculate size of each entry for display, prompt user if needed, delete files if approved and return the total size of files that have been deleted. @@ -39,6 +40,7 @@ def clean_dist(self, clean_dict): for key in sorted(clean_dict): clean_size += self._clean_files(clean_dict[key], key, file_type) # return total size of deleted or to delete files + clean_size += self._clean_git_src(git) return clean_size def clean_pkgs(self, clean_dict, pkgdir): @@ -69,7 +71,7 @@ def clean_pkgs(self, clean_dict, pkgdir): # return total size of deleted or to delete files return clean_size - def pretend_clean(self, clean_dict): + def pretend_clean(self, clean_dict, git={}): """Shortcut function that calculates total space savings for the files in clean_dict. @@ -80,6 +82,8 @@ def pretend_clean(self, clean_dict): file_type = "file" clean_size = 0 # tally all entries one by one; sorting helps reading + if git: + clean_size += self._clean_git_src(git, pretend=True) for key in sorted(clean_dict): key_size = self._get_size(clean_dict[key]) self.controller(key_size, key, clean_dict[key], file_type) @@ -148,3 +152,30 @@ def _clean_files(self, files, key, file_type): print(pp.error("Could not delete " + file_), file=sys.stderr) print(pp.error("Error: %s" % str(er)), file=sys.stderr) return clean_size + + def _clean_git_src(self, deprecated_git, pretend=False): + clean_size = 0 + for checkout in deprecated_git: + csize = 0 + for path, dirs, files in os.walk(checkout): + for f in files: + fp = os.path.join(path, f) + try: + statinfo = os.stat(fp) + except OSError as er: + print( + pp.error("Could not get stat info for:" + fp), + file=sys.stderr, + ) + print(pp.error("Error: %s" % str(er)), file=sys.stderr) + clean_size += statinfo.st_size + csize += statinfo.st_size + + try: + self.controller(csize, checkout, checkout, "checkout") + if not pretend: + shutil.rmtree(checkout) + except OSError as er: + print(pp.error("Could not delete " + checkout), file=sys.stderr) + print(pp.error("Error: %s" % str(er)), file=sys.stderr) + return clean_size diff --git a/pym/gentoolkit/eclean/cli.py b/pym/gentoolkit/eclean/cli.py index b180641b..a3120f58 100644 --- a/pym/gentoolkit/eclean/cli.py +++ b/pym/gentoolkit/eclean/cli.py @@ -13,25 +13,25 @@ __productname__ = "eclean" __description__ = "A cleaning tool for Gentoo distfiles and binaries." +import getopt import os -import sys import re +import sys import time -import getopt import portage -from portage.output import white, yellow, turquoise, green, red +from portage.output import green, red, turquoise, white, yellow import gentoolkit.pprinter as pp +from gentoolkit.eclean.clean import CleanUp +from gentoolkit.eclean.exclude import ParseExcludeFileException, parseExcludeFile +from gentoolkit.eclean.output import OutputControl from gentoolkit.eclean.search import ( DistfilesSearch, findPackages, - port_settings, pkgdir, + port_settings, ) -from gentoolkit.eclean.exclude import parseExcludeFile, ParseExcludeFileException -from gentoolkit.eclean.clean import CleanUp -from gentoolkit.eclean.output import OutputControl # from gentoolkit.eclean.dbapi import Dbapi from gentoolkit.eprefix import EPREFIX @@ -309,6 +309,10 @@ def printUsage(_error=None, help=None, unresolved_invalids=None): " " + '"two hundreds kilobytes", etc. Units are: ' + "G, M, K and B.", file=out, ) + print( + yellow(" --skip-git") + " - skip cleaning of git3_src ", + file=out, + ) print(file=out) print( "More detailed instruction can be found in", @@ -420,6 +424,8 @@ def optionSwitch(option, opts, action=None): options["unique-use"] = True elif o in ("--no-clean-invalid"): options["no-clean-invalid"] = True + elif o in ("--skip-git"): + options["skip-git"] = True else: return_code = False # sanity check of --deep only options: @@ -457,7 +463,11 @@ def optionSwitch(option, opts, action=None): "verbose", ] getopt_options["short"]["distfiles"] = "fs:" - getopt_options["long"]["distfiles"] = ["fetch-restricted", "size-limit="] + getopt_options["long"]["distfiles"] = [ + "fetch-restricted", + "size-limit=", + "skip-git", + ] getopt_options["short"]["packages"] = "iu" getopt_options["long"]["packages"] = [ "ignore-failure", @@ -481,6 +491,7 @@ def optionSwitch(option, opts, action=None): options["ignore-failure"] = False options["no-clean-invalid"] = False options["unique-use"] = False + options["skip-git"] = False # if called by a well-named symlink, set the action accordingly: action = None # temp print line to ensure it is the svn/branch code running, etc.. @@ -546,6 +557,7 @@ def doAction(action, options, exclude={}, output=None): files_type = "distfiles" saved = {} deprecated = {} + git = [] # find files to delete, depending on the action if not options["quiet"]: output.einfo("Building file list for " + action + " cleaning...") @@ -567,7 +579,7 @@ def doAction(action, options, exclude={}, output=None): # portdb=Dbapi(portage.db[portage.root]["porttree"].dbapi), # var_dbapi=Dbapi(portage.db[portage.root]["vartree"].dbapi), ) - clean_me, saved, deprecated = engine.findDistfiles( + clean_me, saved, deprecated, git = engine.findDistfiles( exclude=exclude, destructive=options["destructive"], fetch_restricted=options["fetch-restricted"], @@ -581,7 +593,7 @@ def doAction(action, options, exclude={}, output=None): cleaner = CleanUp(output.progress_controller, options["quiet"]) # actually clean files if something was found - if clean_me: + if clean_me or git: # verbose pretend message if options["pretend"] and not options["quiet"]: output.einfo("Here are the " + files_type + " that would be deleted:") @@ -590,9 +602,13 @@ def doAction(action, options, exclude={}, output=None): output.einfo("Cleaning " + files_type + "...") # do the cleanup, and get size of deleted files if options["pretend"]: - clean_size = cleaner.pretend_clean(clean_me) + if options["skip-git"]: + git = {} + clean_size = cleaner.pretend_clean(clean_me, git) elif action in ["distfiles"]: - clean_size = cleaner.clean_dist(clean_me) + if options["skip-git"]: + git = {} + clean_size = cleaner.clean_dist(clean_me, git) elif action in ["packages"]: clean_size = cleaner.clean_pkgs(clean_me, pkgdir) # vocabulary for final message @@ -602,7 +618,7 @@ def doAction(action, options, exclude={}, output=None): verb = "were" # display freed space if not options["quiet"]: - output.total("normal", clean_size, len(clean_me), verb, action) + output.total("normal", clean_size, len(clean_me) + len(git), verb, action) # nothing was found elif not options["quiet"]: output.einfo("Your " + action + " directory was already clean.") diff --git a/pym/gentoolkit/eclean/output.py b/pym/gentoolkit/eclean/output.py index ebba499c..67387380 100644 --- a/pym/gentoolkit/eclean/output.py +++ b/pym/gentoolkit/eclean/output.py @@ -5,7 +5,9 @@ import sys -from portage.output import blue, yellow, teal, green, red + +from portage.output import blue, green, red, teal, yellow + from gentoolkit.pprinter import cpv, number diff --git a/pym/gentoolkit/eclean/search.py b/pym/gentoolkit/eclean/search.py index a2ac0ce4..c01a6ba4 100644 --- a/pym/gentoolkit/eclean/search.py +++ b/pym/gentoolkit/eclean/search.py @@ -7,6 +7,7 @@ import os import stat import sys +import shlex from functools import partial from typing import Optional @@ -16,13 +17,12 @@ import gentoolkit.pprinter as pp from gentoolkit.eclean.exclude import ( - exclDictMatchCP, exclDictExpand, exclDictExpandPkgname, + exclDictMatchCP, exclMatchFilename, ) - # Misc. shortcuts to some portage stuff: port_settings = portage.settings pkgdir = port_settings["PKGDIR"] @@ -134,6 +134,7 @@ def findDistfiles( # gather the files to be cleaned self.output("...checking limits for %d ebuild sources" % len(pkgs)) + git = self.git_check(_distdir) checks = self._get_default_checks(size_limit, time_limit, exclude, destructive) checks.extend(extra_checks) clean_me = self._check_limits(_distdir, checks, clean_me) @@ -148,7 +149,7 @@ def findDistfiles( + "%s remaining candidates to clean" % len(clean_me) ) clean_me, saved = self._check_excludes(exclude, clean_me) - return clean_me, saved, deprecated + return clean_me, saved, deprecated, git # begin _check_limits code block @@ -332,6 +333,31 @@ def _non_destructive( deprecated.update(_deprecated) return pkgs, deprecated + def git_check(self, distdir): + """Checks $DISTDIR/git3-src for checkouts which are not in the vardb""" + git_src = os.path.join(distdir, "git3-src") + expected_dirs = set() + for i in set(self.vardb.cpv_all()): + if "live" in self.vardb.aux_get(i, ["PROPERTIES"]): + try: + # try to get the dir names of the cloned + # repos from the environment file. + vcs_dir = { + i.split("=")[-1].strip('"') + for i in ( + shlex.split( + self.vardb._aux_env_search(i, ["EVCS_STORE_DIRS"])[ + "EVCS_STORE_DIRS" + ].strip("()") + ) + ) + } + expected_dirs.update(vcs_dir) + except KeyError: + pass + actual_dirs = {os.path.join(git_src, i) for i in os.listdir(git_src)} + return actual_dirs.difference(expected_dirs) + def _fetch_restricted(self, pkgs_, cpvs): """perform fetch restricted non-destructive source filename lookups