Skip to content

Commit

Permalink
Added trigger file option
Browse files Browse the repository at this point in the history
Can now specify `--dir-trigger` to choose a top-level file within the
directories in the target. If the file exists, its timestamp will be
used as the folder's timestamp and calculations done that way.
  • Loading branch information
pdarragh committed Apr 12, 2016
1 parent e3a1e06 commit dbe079d
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 43 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ $ cleanup_manager.py [-hvnV] [-l log] [-k date] [-f format] target
| `-k date`, `--keep-after date` | The date to keep items after. Default is seven days prior to invocation. |
| `-d format`, `--date-format format` | Format of the given date. Useful if you have that one particular way of formatting your dates and you don't want to change. |
| `-f size`, `--freeup size` | The amount of space to attempt to free up. |
| `-t trigger`, `--dir-trigger trigger` | A specific file to set a directory's timestamp from within that directory. |
| `--delete-oldest-first` | When deleting by size, older items are deleted first. This is the default. |
| `--delete-largest-first` | When deleting by size, larger items are deleted first. |
| `--overflow` | Allows the script to delete more than just the size specified to hit target. |
Expand Down Expand Up @@ -94,6 +95,7 @@ This is a short, reverse-chronological summary of the updates to this project.

| Date | Version | Update Description |
|------------|:---------:|------------------------------------------------------------------------------|
| 2016-04-12 | 1.5.0 | Added `--dir-trigger` option for folder-level trigger files. |
| 2015-07-01 | 1.4.0 | Updated logging to change outputs slightly (for better readability). |
| 2015-05-19 | 1.3.0 | New `--overflow` flag ensures specified disk space will be cleared. |
| 2015-05-18 | 1.2.0 | Added increased verbosity availability via more `-V` flags. |
Expand Down
2 changes: 1 addition & 1 deletion cleanup_management/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import analysis
import cleanup

__version__ = '1.4.0'
__version__ = '1.5.0'
__all__ = ['analysis', 'cleanup']

if __name__ == "__main__":
Expand Down
108 changes: 69 additions & 39 deletions cleanup_management/analysis.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import os


def get_date_based_deletable_inventory(keep_after, logger, target=None, folders=None, files=None, links=None):
def get_date_based_deletable_inventory(keep_after, logger, target=None, folders=None, files=None, links=None, trigger=None):
"""
Finds all of the items within an inventory that can be deleted based on
their last modification date.
:param keep_after: a unix timestamp; any files or directories with a last
modification time after this will be removed
:param logger: a Management Tools logger object
Expand All @@ -26,18 +26,48 @@ def get_date_based_deletable_inventory(keep_after, logger, target=None, folders=
folders = list(folders)
files = list(files)
links = list(links)

logger.verbose("Getting date-based deletable inventory:")


# Check if any of the folders contain the trigger file.
# If they do, and if that trigger file's last modified timestamp is within
# the specified range, set that folder to be removed.
delete_folders = []
if trigger is not None:
pop_indices = []
# Iterate over the folders with their index in the list.
for index, folder in enumerate(folders):
triggerpath = os.path.join(folder[0], trigger)
# Test if the trigger file exists in the folder.
try:
if os.path.getmtime(triggerpath) < keep_after:
# If the file exists and its timestamp is old enough, then
# the folder should be deleted.
pop_indices.append(index)
except OSError:
# The file does not exist in the folder. That's fine; we'll just
# continue on and let it be deleted naturally if it ought to be.
continue
# Go through the marked indices and add the appropriate folders to the
# deletion list.
for index in pop_indices:
delete_folders.append(folders[index])
# Then go through and remove those folders from the original list.
for index in pop_indices:
folders.pop(index)
# Note that these steps must be separated. If the folders were popped at
# the same time they're added to the delete_folders list, the indices
# would change and you would be adding the wrong folders.

# Find the folders and files that need to be deleted.
# If the item's score is above the threshold value, it will be deleted.
# Folder and file lists are assumed to contain tuples as:
# (path, age, size)
# Effectively, for each folder/file: if that item has a timestamp that is
# less than the 'keep_after' value, it gets added to the list.
delete_folders = [folder[0] for folder in folders if folder[1] < keep_after]
delete_files = [file[0] for file in files if file[1] < keep_after]
delete_folders += [folder[0] for folder in folders if folder[1] < keep_after]
delete_files = [file[0] for file in files if file[1] < keep_after]

# Now handle links. This is a bit trickier.
# Link array is assumed to contain tuples as:
# (link location, target location, inside)
Expand All @@ -54,15 +84,15 @@ def get_date_based_deletable_inventory(keep_after, logger, target=None, folders=
if link[0].startswith(folder) or link[1].startswith(folder):
delete_links.append(link[0])
break

# Print out lots of fun information if it's warranted.
for folder in delete_folders:
logger.debug(" Set to remove folder: {}".format(folder))
for file in delete_files:
logger.debug(" Set to remove file: {}".format(file))
for link in delete_links:
logger.debug(" Set to remove link: {}".format(link))

# Return the deletable inventory.
return delete_folders, delete_files, delete_links

Expand All @@ -71,7 +101,7 @@ def get_size_based_deletable_inventory(target_space, logger, target=None, oldest
"""
Finds all of the items within an inventory that can be deleted based on a
given target amount of space to attempt to free up.
:param target_space: the amount of space to attempt to clean up
:param logger: a Management Tools logger object
:type target_space: int
Expand All @@ -95,14 +125,14 @@ def get_size_based_deletable_inventory(target_space, logger, target=None, oldest
folders = list(folders)
files = list(files)
links = list(links)

logger.verbose("Getting size-based deletable inventory:")

# Initialize lists to be returned.
delete_folders = []
delete_files = []
delete_links = []

# # Set the index key based on oldest/largest preference.
# if oldest_first:
# key = 1
Expand All @@ -111,7 +141,7 @@ def get_size_based_deletable_inventory(target_space, logger, target=None, oldest
# Initialize an accumulated_size counter to keep track of how much stuff is
# going to be deleted.
accumulated_size = 0

# Build up the deletion lists.
while accumulated_size < target_space:
logger.verbose(" target_space = {}".format(target_space))
Expand All @@ -135,7 +165,7 @@ def get_size_based_deletable_inventory(target_space, logger, target=None, oldest
# This way we can keep iterating over the list and not get stuck on
# one value.
folders.remove(folder)

# Files but no folders.
elif files and not folders:
if oldest_first:
Expand All @@ -153,7 +183,7 @@ def get_size_based_deletable_inventory(target_space, logger, target=None, oldest
# Even if the file is too big to be deleted, remove it from the list
# of files so we don't have to see it again.
files.remove(file)

# Maybe we have both! That's kind of tricky.
elif files and folders:
# Take the maximum value from each of 'folders' and 'files'.
Expand Down Expand Up @@ -187,13 +217,13 @@ def get_size_based_deletable_inventory(target_space, logger, target=None, oldest
logger.verbose(" deleting file")
# Remove the file from the list of files.
files.remove(file)

# We don't have any folders or files left, so quit the loop.
# If this gets triggered, it means that there weren't enough items in
# the target directory to fill up the 'total_size' alotment.
else:
break

# Now handle links. This is a bit trickier.
# Link array is assumed to contain tuples as:
# (link location, target location, inside)
Expand All @@ -209,15 +239,15 @@ def get_size_based_deletable_inventory(target_space, logger, target=None, oldest
if link[0].startswith(folder) or link[1].startswith(folder):
delete_links.append(link[0])
break

# Print out lots of fun information if it's warranted.
for folder in delete_folders:
logger.debug(" Set to remove folder: {}".format(folder))
for file in delete_files:
logger.debug(" Set to remove file: {}".format(file))
for link in delete_links:
logger.debug(" Set to remove link: {}".format(link))

# Return the deletable inventory and accumulated size.
return delete_folders, delete_files, delete_links, accumulated_size

Expand All @@ -226,7 +256,7 @@ def get_inventory(target, logger):
"""
Given a target directory, finds all subitems within that directory and
stores them in separate lists, ie folders, files, and links.
Folder and file lists are full of tuples as:
(folder/file path, modification timestamp, size)
where:
Expand All @@ -236,33 +266,33 @@ def get_inventory(target, logger):
Folder sizes are just the sum of their content, and folder modification
times are considered to be the most-recent timestamp among all objects
within that folder.
Link list is full of tuples as:
(link path, target path, internal)
where:
link path: the path to the link object
target path: the target that the link points to
internal: whether the target is in this inventory
:param target: directory to search for inventory
:param logger: a Management Tools logger object
:return: a tuple containing lists containing tuples describing the contents
as (folders, files, links)
"""
if not os.path.isdir(target):
raise ValueError("The target must be a valid, existing directory.")

logger.verbose("Getting top-level inventory:")

##--------------------------------------------------------------------------
## Get top-level directory listings.
##--------------------------------------------------------------------------

# Initialize lists to hold tuples.
folders = []
files = []
links = []

# Walk through everything in just the top directory.
for path, subdirs, subfiles in os.walk(target):
for folder in subdirs:
Expand All @@ -273,7 +303,7 @@ def get_inventory(target, logger):
else:
folders.append(folder)
logger.verbose(" Found folder: {}".format(folder))

for file in subfiles:
file = os.path.join(path, file)
if os.path.islink(file):
Expand All @@ -282,27 +312,27 @@ def get_inventory(target, logger):
else:
files.append(file)
logger.verbose(" Found file: {}".format(file))

# Prevent recursion to reduce time (we don't need everything indexed).
break

##--------------------------------------------------------------------------
## Get file information.
##--------------------------------------------------------------------------

# Get the age and size of each file.
files = [(file, os.path.getmtime(file), os.path.getsize(file)) for file in files]

##--------------------------------------------------------------------------
## Get folder information.
##--------------------------------------------------------------------------

# Get the age of each folder.
for i in range(len(folders)):
folder = folders[i]
age = os.path.getmtime(folder)
size = 0

for path, subdirs, subfiles in os.walk(folder):
for directory in subdirs:
directory = os.path.join(path, directory)
Expand All @@ -314,7 +344,7 @@ def get_inventory(target, logger):
# Is the directory a link?
if os.path.islink(directory):
links.append(directory)

for file in subfiles:
file = os.path.join(path, file)
file_age = 0
Expand All @@ -328,14 +358,14 @@ def get_inventory(target, logger):
# directory, overwrite the directory's age with the file's.
if file_age > age:
age = file_age

folders[i] = (folder, age, size)

##--------------------------------------------------------------------------
## Get link information.
##--------------------------------------------------------------------------

# Determine whether each link connects to a point within the top directory.
links = [(link, os.path.realpath(link), os.path.realpath(link).startswith(target)) for link in links]

return folders, files, links
15 changes: 12 additions & 3 deletions cleanup_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
raise e


def main(target, keep_after, free_space, oldest_first, skip_prompt, overflow, logger):
def main(target, keep_after, free_space, oldest_first, skip_prompt, overflow, dir_trigger, logger):
# Get an absolute reference to the target path.
target = os.path.abspath(os.path.expanduser(target))

Expand All @@ -31,7 +31,7 @@ def main(target, keep_after, free_space, oldest_first, skip_prompt, overflow, lo

# Build the appropriate deletion inventory.
if keep_after is not None:
delete_folders, delete_files, delete_links = cleanup_management.analysis.get_date_based_deletable_inventory(keep_after=keep_after, logger=logger, folders=folders, files=files, links=links)
delete_folders, delete_files, delete_links = cleanup_management.analysis.get_date_based_deletable_inventory(keep_after=keep_after, logger=logger, folders=folders, files=files, links=links, trigger=dir_trigger)
elif free_space is not None and oldest_first is not None:
delete_folders, delete_files, delete_links, deleted_space = cleanup_management.analysis.get_size_based_deletable_inventory(target_space=free_space, logger=logger, oldest_first=oldest_first, overflow=overflow, folders=folders, files=files, links=links)
else:
Expand Down Expand Up @@ -158,6 +158,13 @@ def usage():
default: '%Y-%m-%d'
-f size, --freeup size
The amount of space to attempt to free up.
-t trigger, --dir-trigger trigger
Sets a specific file to look for in the top-level of directories inside
the specified target directory. If this file exists, its timestamp will
be used in place of the directory's timestamp to determine removal. If
the file does not exist, the timestamp for the directory will be found
through the usual method.
(Only has an effect with date-based deletion.)
--delete-oldest-first
When deleting by size, older items are deleted first to free up the
designated `--freeup` space.
Expand Down Expand Up @@ -445,6 +452,7 @@ def volume_size_target(size, target, logger=None):
parser.add_argument('-k', '--keep-after', default=None)
parser.add_argument('-d', '--date-format', default='%Y-%m-%d')
parser.add_argument('-f', '--freeup', default=None)
parser.add_argument('-t', '--dir-trigger', default=None)
parser.add_argument('--delete-oldest-first', action='store_true', default=True)
parser.add_argument('--delete-largest-first', action='store_false', dest='delete_oldest_first')
parser.add_argument('--overflow', action='store_true')
Expand Down Expand Up @@ -507,9 +515,10 @@ def volume_size_target(size, target, logger=None):
oldest_first = args.delete_oldest_first,
skip_prompt = args.skip_prompt,
overflow = args.overflow,
dir_trigger = args.dir_trigger,
logger = logger,
)
except:
# Output the exception with the error name and its message. Suppresses the stack trace.
logger.error("{errname}: {error}".format(errname=sys.exc_info()[0].__name__, error=' '.join(sys.exc_info()[1])))
logger.error("{errname}: {error}".format(errname=sys.exc_info()[0].__name__, error=' '.join([str(x) for x in sys.exc_info()[1]])))
raise
Binary file not shown.

0 comments on commit dbe079d

Please sign in to comment.