diff --git a/disk_objectstore/backup_utils.py b/disk_objectstore/backup_utils.py index 4eb17a9..1444619 100644 --- a/disk_objectstore/backup_utils.py +++ b/disk_objectstore/backup_utils.py @@ -15,13 +15,10 @@ from disk_objectstore.container import Container +logging.basicConfig() logger = logging.getLogger(__name__) -def _log(msg, end="\n"): - print(msg, end=end) - - def split_remote_and_path(dest: str): """extract remote and path from :""" split_dest = dest.split(":") @@ -30,338 +27,342 @@ def split_remote_and_path(dest: str): if len(split_dest) == 2: return split_dest[0], Path(split_dest[1]) # more than 1 colon: - raise ValueError + raise ValueError("Invalid destination format: :") def is_exe_found(exe: str) -> bool: return shutil.which(exe) is not None -def run_cmd(args: list, remote: Optional[str] = None): - """ - Run a command locally or remotely. - """ - all_args = args[:] - if remote: - all_args = ["ssh", remote] + all_args +class BackupUtilities: + """Utilities to make a backup of the disk-objectstore container""" + + def __init__( + self, dest: str, keep: int, rsync_exe: str, logger_: logging.Logger + ) -> None: + self.dest = dest + self.keep = keep + self.rsync_exe = rsync_exe + self.logger = logger_ + self.remote, self.path = split_remote_and_path(dest) + + def run_cmd(self, args: list): + """ + Run a command locally or remotely. + """ + all_args = args[:] + if self.remote: + all_args = ["ssh", self.remote] + all_args + + res = subprocess.run(all_args, capture_output=True, text=True, check=False) + exit_code = res.returncode + + self.logger.debug( + f"Command: {all_args}\n" + f" Exit Code: {exit_code}\n" + f" stdout/stderr: {res.stdout}\n{res.stderr}" + ) - res = subprocess.run(all_args, capture_output=True, text=True, check=False) - exit_code = res.returncode + success = exit_code == 0 - _log( - f"Command: {all_args}\n" - f" Exit Code: {exit_code}\n" - f" stdout/stderr: {res.stdout}\n{res.stderr}" - ) + return success, res.stdout - success = exit_code == 0 + def check_if_remote_accessible(self) -> bool: + """Check if remote host is accessible via ssh""" + self.logger.info(f"Checking if '{self.remote}' is accessible...") + success = self.run_cmd(["exit"])[0] + if not success: + self.logger.error(f"Remote '{self.remote}' is not accessible!") + return False + self.logger.info("Success! '%s' is accessible!", self.remote) + return True + + def check_path_exists(self, path: Path) -> bool: + cmd = ["[", "-e", str(path), "]"] + return self.run_cmd(cmd)[0] + + def validate_inputs(self) -> bool: + """Validate inputs to the backup cli command + + :return: + True if validation passes, False otherwise. + """ + if self.keep < 0: + self.logger.error("keep variable can't be negative!") + return False - return success, res.stdout + if self.remote: + if not self.check_if_remote_accessible(): + return False + if not is_exe_found(self.rsync_exe): + self.logger.error(f"{self.rsync_exe} not accessible.") + return False -def check_if_remote_accessible(remote: str) -> bool: - """Check if remote host is accessible via ssh""" - _log(f"Checking if '{remote}' is accessible...") - success = run_cmd(["exit"], remote=remote)[0] - if not success: - _log(f"Error: Remote '{remote}' is not accessible!") - return False - _log(f"Success! '{remote}' is accessible!") - return True + path_exists = self.check_path_exists(self.path) + + if not path_exists: + success = self.run_cmd(["mkdir", str(self.path)])[0] + if not success: + self.logger.error(f"Couldn't access/create '{str(self.path)}'!") + return False + + return True + + def call_rsync( # pylint: disable=too-many-arguments + self, + args: list, + src: Path, + dest: Path, + link_dest: Optional[Path] = None, + src_trailing_slash: bool = False, + dest_trailing_slash: bool = False, + ) -> bool: + """Call rsync with specified arguments and handle possible errors & stdout/stderr + + :param link_dest: + Path to the hardlinked files location (previous backup). + + :param src_trailing_slash: + Add a trailing slash to the source path. This makes rsync copy the contents + of the folder instead of the folder itself. + + :param dest_trailing_slash: + Add a trailing slash to the destination path. This makes rsync interpret the + destination as a folder and create it if it doesn't exists. + + :return: + True if successful and False if unsuccessful. + """ + + all_args = args[:] + if link_dest: + if not self.remote: + # for local paths, use resolve() to get absolute path + link_dest_str = str(link_dest.resolve()) + else: + # for remote paths, we require absolute paths anyways + link_dest_str = str(link_dest) + all_args += [f"--link-dest={link_dest_str}"] + + if src_trailing_slash: + all_args += [str(src) + "/"] + else: + all_args += [str(src)] + dest_str = str(dest) + if dest_trailing_slash: + dest_str += "/" -def check_path_exists(path: Path, remote: Optional[str] = None) -> bool: - cmd = ["[", "-e", str(path), "]"] - return run_cmd(cmd, remote=remote)[0] + if not self.remote: + all_args += [dest_str] + else: + all_args += [f"{self.remote}:{dest_str}"] + try: + res = subprocess.run(all_args, capture_output=True, text=True, check=True) + except subprocess.CalledProcessError as exc: + self.logger.error(f"{exc}") + return False + exit_code = res.returncode + + self.logger.debug( + "Command: %s\n Exit Code: %s\n stdout/stderr: %s\n%s", + str(all_args), + exit_code, + res.stdout, + res.stderr, + ) -def call_rsync( # pylint: disable=too-many-arguments - args: list, - src: Path, - dest: Path, - link_dest: Optional[Path] = None, - remote: Optional[str] = None, - src_trailing_slash: bool = False, - dest_trailing_slash: bool = False, -) -> bool: - """Call rsync with specified arguments and handle possible errors & stdout/stderr + success = exit_code == 0 - :param link_dest: - Path to the hardlinked files location (previous backup). + return success - :param src_trailing_slash: - Add a trailing slash to the source path. This makes rsync copy the contents - of the folder instead of the folder itself. + def backup_container( # pylint: disable=too-many-return-statements, too-many-branches + self, + container: Container, + path: Path, + prev_backup: Optional[Path] = None, + ) -> bool: + """Create a backup of the disk-objectstore container - :param dest_trailing_slash: - Add a trailing slash to the destination path. This makes rsync interpret the - destination as a folder and create it if it doesn't exists. + This is safe to perform when the container is being used. - :return: - True if successful and False if unsuccessful. - """ + It should be done in the following order: + 1) loose files; + 2) sqlite database; + 3) packed files. - all_args = args[:] - if link_dest: - if not remote: - # for local paths, use resolve() to get absolute path - link_dest_str = str(link_dest.resolve()) - else: - # for remote paths, we require absolute paths anyways - link_dest_str = str(link_dest) - all_args += [f"--link-dest={link_dest_str}"] - - if src_trailing_slash: - all_args += [str(src) + "/"] - else: - all_args += [str(src)] - - dest_str = str(dest) - if dest_trailing_slash: - dest_str += "/" - - if not remote: - all_args += [dest_str] - else: - all_args += [f"{remote}:{dest_str}"] - - try: - res = subprocess.run(all_args, capture_output=True, text=True, check=True) - except subprocess.CalledProcessError as exc: - _log(f"Error: {exc}") - return False - exit_code = res.returncode - _log( - f"Command: {all_args}\n" - f" Exit Code: {exit_code}\n" - f" stdout/stderr: {res.stdout}\n{res.stderr}" - ) - - success = exit_code == 0 - - return success - - -def validate_inputs( - path: Path, - remote: Optional[str], - keep: int, - rsync_exe: str = "rsync", -) -> bool: - """Validate inputs to the backup cli command - - :return: - True if validation passes, False otherwise. - """ - if keep < 0: - _log("Error: keep variable can't be negative!") - return False - - if remote: - if not check_if_remote_accessible(remote): - return False + :return: + True if successful and False if unsuccessful. + """ - if not is_exe_found(rsync_exe): - _log(f"Error: {rsync_exe} not accessible.") - return False + # subprocess arguments shared by all rsync calls: + rsync_args = [self.rsync_exe, "-azh", "-vv", "--no-whole-file"] - path_exists = check_path_exists(path, remote) + container_root_path = container.get_folder() + loose_path = container._get_loose_folder() # pylint: disable=protected-access + packs_path = container._get_pack_folder() # pylint: disable=protected-access + sqlite_path = ( + container._get_pack_index_path() # pylint: disable=protected-access + ) - if not path_exists: - success = run_cmd(["mkdir", str(path)], remote=remote)[0] + # step 1: back up loose files + loose_path_rel = loose_path.relative_to(container_root_path) + prev_backup_loose = prev_backup / loose_path_rel if prev_backup else None + success = self.call_rsync( + rsync_args, loose_path, path, link_dest=prev_backup_loose + ) + if not success: + return False + self.logger.info(f"Transferred {str(loose_path)} to {str(path)}") + + # step 2: back up sqlite db + + # make a temporary directory to dump sqlite db locally + with tempfile.TemporaryDirectory() as temp_dir_name: + sqlite_temp_loc = Path(temp_dir_name) / "packs.idx" + + # Safe way to make a backup of the sqlite db, while it might potentially be accessed + # https://docs.python.org/3/library/sqlite3.html#sqlite3.Connection.backup + src = sqlite3.connect(str(sqlite_path)) + dst = sqlite3.connect(str(sqlite_temp_loc)) + with dst: + src.backup(dst) + dst.close() + src.close() + + if sqlite_temp_loc.is_file(): + self.logger.info( + f"Dumped the SQLite database to {str(sqlite_temp_loc)}" + ) + else: + self.logger.error("'%s' was not created.", str(sqlite_temp_loc)) + return False + + # step 3: transfer the SQLITE database file + success = self.call_rsync( + rsync_args, sqlite_temp_loc, path, link_dest=prev_backup + ) + if not success: + return False + self.logger.info(f"Transferred SQLite database to {str(path)}") + + # step 4: transfer the packed files + packs_path_rel = packs_path.relative_to(container_root_path) + success = self.call_rsync(rsync_args, packs_path, path, link_dest=prev_backup) + if not success: + return False + self.logger.info(f"Transferred {str(packs_path)} to {str(path)}") + + # step 5: transfer anything else in the container folder + success = self.call_rsync( + rsync_args + + [ + "--exclude", + str(loose_path_rel), + "--exclude", + "packs.idx", + "--exclude", + str(packs_path_rel), + ], + container_root_path, + path, + link_dest=prev_backup, + src_trailing_slash=True, + ) if not success: - _log(f"Error: Couldn't access/create '{str(path)}'!") return False - return True - - -def backup_container( # pylint: disable=too-many-return-statements, too-many-branches - container: Container, - path: Path, - remote: Optional[str] = None, - prev_backup: Optional[Path] = None, - rsync_exe: str = "rsync", -) -> bool: - """Create a backup of the disk-objectstore container - - This is safe to perform when the container is being used. - - It should be done in the following order: - 1) loose files; - 2) sqlite database; - 3) packed files. - - :return: - True if successful and False if unsuccessful. - """ - - # subprocess arguments shared by all rsync calls: - rsync_args = [rsync_exe, "-azh", "-vv", "--no-whole-file"] - - container_root_path = container.get_folder() - loose_path = container._get_loose_folder() # pylint: disable=protected-access - packs_path = container._get_pack_folder() # pylint: disable=protected-access - sqlite_path = container._get_pack_index_path() # pylint: disable=protected-access - - # step 1: back up loose files - loose_path_rel = loose_path.relative_to(container_root_path) - prev_backup_loose = prev_backup / loose_path_rel if prev_backup else None - success = call_rsync( - rsync_args, loose_path, path, remote=remote, link_dest=prev_backup_loose - ) - if not success: - return False - - # step 2: back up sqlite db - - # make a temporary directory to dump sqlite db locally - with tempfile.TemporaryDirectory() as temp_dir_name: - sqlite_temp_loc = Path(temp_dir_name) / "packs.idx" - - # Safe way to make a backup of the sqlite db, while it might potentially be accessed - # https://docs.python.org/3/library/sqlite3.html#sqlite3.Connection.backup - src = sqlite3.connect(str(sqlite_path)) - dst = sqlite3.connect(str(sqlite_temp_loc)) - with dst: - src.backup(dst) - dst.close() - src.close() - - if sqlite_temp_loc.is_file(): - _log(f"Dumped the SQLite database to {str(sqlite_temp_loc)}") - else: - _log(f"Error: '{str(sqlite_temp_loc)}' was not created.") + return True + + def delete_old_backups(self) -> bool: + """Get all folders matching the backup pattern, and delete oldest ones.""" + success, stdout = self.run_cmd( + [ + "find", + str(self.path), + "-maxdepth", + "1", + "-type", + "d", + "-name", + "backup_*_*", + "-print", + ], + ) + if not success: return False - # step 3: transfer the SQLITE database file - success = call_rsync( - rsync_args, sqlite_temp_loc, path, remote=remote, link_dest=prev_backup + sorted_folders = sorted(stdout.splitlines()) + to_delete = sorted_folders[: -(self.keep + 1)] + for folder in to_delete: + success = self.run_cmd(["rm", "-rf", folder])[0] + if success: + self.logger.info(f"Deleted old backup: {folder}") + else: + self.logger.warning("Warning: couldn't delete old backup: %s", folder) + return True + + def backup_auto_folders( + self, + container: Container, + ): + """Create a backup, managing live and previous backup folders automatically + + The running backup is done to `/live-backup`. When it completes, it is moved to + the final path: `/backup__` and the symlink `/last-backup will + be set to point to it. Rsync `link-dest` is used between live-backup and last-backup + to keep the backups incremental and performant. + + :param path: + Path to where the backup will be created. If 'remote' is specified, must be an absolute path, + otherwise can be relative. + + :param remote: + Remote host of the backup location. 'ssh' executable is called via subprocess and therefore remote + hosts configured for it are supported (e.g. via .ssh/config file). + + :return: + True is successful and False if unsuccessful. + """ + + live_folder = self.path / "live-backup" + last_symlink = self.path / "last-backup" + + prev_exists = self.check_path_exists(last_symlink) + if prev_exists: + self.logger.info( + f"'{str(last_symlink)}' exists, using it for rsync --link-dest." + ) + + success = self.backup_container( + container, + live_folder, + prev_backup=last_symlink if prev_exists else None, ) if not success: return False - # step 4: transfer the packed files - packs_path_rel = packs_path.relative_to(container_root_path) - success = call_rsync( - rsync_args, packs_path, path, remote=remote, link_dest=prev_backup - ) - if not success: - return False - - # step 5: transfer anything else in the container folder - success = call_rsync( - rsync_args - + [ - "--exclude", - str(loose_path_rel), - "--exclude", - "packs.idx", - "--exclude", - str(packs_path_rel), - ], - container_root_path, - path, - link_dest=prev_backup, - remote=remote, - src_trailing_slash=True, - ) - if not success: - return False - - return True - - -def delete_old_backups(path: Path, remote: Optional[str] = None, keep: int = 1) -> bool: - """Get all folders matching the backup pattern, and delete oldest ones.""" - success, stdout = run_cmd( - [ - "find", - str(path), - "-maxdepth", - "1", - "-type", - "d", - "-name", - "backup_*_*", - "-print", - ], - remote=remote, - ) - if not success: - return False - - sorted_folders = sorted(stdout.splitlines()) - to_delete = sorted_folders[: -(keep + 1)] - for folder in to_delete: - success = run_cmd(["rm", "-rf", folder], remote=remote)[0] - if success: - _log(f"Deleted old backup: {folder}") - else: - _log(f"Warning: couldn't delete old backup: {folder}") - return True - - -def backup_auto_folders( - container: Container, - path: Path, - remote: Optional[str] = None, - keep: int = 1, - rsync_exe: str = "rsync", -): - """Create a backup, managing live and previous backup folders automatically - - The running backup is done to `/live-backup`. When it completes, it is moved to - the final path: `/backup__` and the symlink `/last-backup will - be set to point to it. Rsync `link-dest` is used between live-backup and last-backup - to keep the backups incremental and performant. - - :param path: - Path to where the backup will be created. If 'remote' is specified, must be an absolute path, - otherwise can be relative. - - :param remote: - Remote host of the backup location. 'ssh' executable is called via subprocess and therefore remote - hosts configured for it are supported (e.g. via .ssh/config file). - - :return: - True is successful and False if unsuccessful. - """ - - live_folder = path / "live-backup" - last_symlink = path / "last-backup" - - prev_exists = check_path_exists(last_symlink, remote) - - success = backup_container( - container, - live_folder, - remote=remote, - prev_backup=last_symlink if prev_exists else None, - rsync_exe=rsync_exe, - ) - if not success: - return False - - # move live-backup -> backup__ - timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S") - randstr = "".join(random.choices(string.ascii_lowercase + string.digits, k=4)) - folder_name = f"backup_{timestamp}_{randstr}" - - success = run_cmd(["mv", str(live_folder), str(path / folder_name)], remote=remote)[ - 0 - ] - if not success: - return False - - # update last-backup symlink - success = run_cmd( - ["ln", "-sfn", str(folder_name), str(last_symlink)], remote=remote - )[0] - if not success: - return False - _log(f"Backup moved from '{str(live_folder)}' to '{str(path / folder_name)}'.") - - delete_old_backups(path, remote=remote, keep=keep) - - return True + # move live-backup -> backup__ + timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S") + randstr = "".join(random.choices(string.ascii_lowercase + string.digits, k=4)) + folder_name = f"backup_{timestamp}_{randstr}" + + success = self.run_cmd(["mv", str(live_folder), str(self.path / folder_name)])[ + 0 + ] + if not success: + return False + + # update last-backup symlink + success = self.run_cmd(["ln", "-sfn", str(folder_name), str(last_symlink)])[0] + if not success: + return False + self.logger.info( + f"Backup moved from '{str(live_folder)}' to '{str(self.path / folder_name)}'." + ) + + self.delete_old_backups() + + return True diff --git a/disk_objectstore/cli.py b/disk_objectstore/cli.py index 6465ef0..b41e95b 100644 --- a/disk_objectstore/cli.py +++ b/disk_objectstore/cli.py @@ -1,6 +1,7 @@ """A small CLI tool for managing stores.""" import dataclasses import json +import logging import os import sys from pathlib import Path @@ -197,12 +198,14 @@ def optimize( default="rsync", help="Specify the 'rsync' executable, if not in PATH. Used for both local and remote destinations.", ) +@click.option( + "--verbosity", + default="info", + help="Set verbosity [silent|info|debug], default is 'info'.", +) @pass_dostore def backup( - dostore: ContainerContext, - dest: str, - keep: int, - rsync_exe: str, + dostore: ContainerContext, dest: str, keep: int, rsync_exe: str, verbosity: str ): """Create a backup of the container to destination location DEST, in a subfolder backup__ and point a symlink called `last-backup` to it. @@ -221,22 +224,31 @@ def backup( non-UNIX environments. """ + if verbosity == "silent": + backup_utils.logger.setLevel(logging.ERROR) + elif verbosity == "info": + backup_utils.logger.setLevel(logging.INFO) + elif verbosity == "debug": + backup_utils.logger.setLevel(logging.DEBUG) + else: + click.echo("Unsupported verbosity.") + return + try: - remote, path = backup_utils.split_remote_and_path(dest) - except ValueError: - click.echo("Unsupported destination.") - return False + backup_utils_instance = backup_utils.BackupUtilities( + dest, keep, rsync_exe, backup_utils.logger + ) + except ValueError as e: + click.echo(f"Error: {e}") + return - success = backup_utils.validate_inputs(path, remote, keep, rsync_exe=rsync_exe) + success = backup_utils_instance.validate_inputs() if not success: click.echo("Input validation failed.") - return False + return with dostore.container as container: - return backup_utils.backup_auto_folders( - container, - path, - remote=remote, - keep=keep, - rsync_exe=rsync_exe, - ) + success = backup_utils_instance.backup_auto_folders(container) + if not success: + click.echo("Error: backup failed.") + return