diff --git a/tsdb/utils/file.py b/tsdb/utils/file.py index 2f2bd58..818b25b 100644 --- a/tsdb/utils/file.py +++ b/tsdb/utils/file.py @@ -15,6 +15,42 @@ from .logging import logger +def check_path( + path: str, + check_exists: bool = False, +) -> str: + """Check the given path and return the absolute path. + + Parameters + ---------- + path : + The path to be checked. + + check_exists : + If True, check if the path exists, and will raise an AssertionError if the path does not exist. + + Returns + ------- + checked_path: + The absolute path of the given path. + + """ + # expand the home dir if the path starts with "~" + if path.startswith("~"): + checked_path = path.replace("~", os.path.expanduser("~")) + else: + checked_path = path + + checked_path = os.path.abspath(checked_path) + + if check_exists: + assert os.path.exists( + checked_path + ), f"The given path {checked_path} does not exists" + + return checked_path + + def pickle_dump(data: object, path: str) -> Optional[str]: """Pickle the given object. @@ -31,6 +67,9 @@ def pickle_dump(data: object, path: str) -> Optional[str]: `path` if succeed else None """ + # check the given path + path = check_path(path) + try: with open(path, "wb") as f: pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL) @@ -55,6 +94,8 @@ def pickle_load(path: str) -> object: Pickled object. """ + # check the given path + path = check_path(path, check_exists=True) try: with open(path, "rb") as f: data = pickle.load(f) @@ -76,9 +117,8 @@ def purge_path(path: str, ignore_errors: bool = True) -> None: Errors are ignored if ignore_errors is set. """ - assert os.path.exists( - path - ), f"The given path {path} does not exists. Operation aborted." + # check the given path, no need to check if the path exists because ignore_errors is set + path = check_path(path) try: if os.path.isdir(path): @@ -98,18 +138,19 @@ def purge_path(path: str, ignore_errors: bool = True) -> None: def determine_data_home(): + # default path + default_path = check_path("~/.pypots/tsdb") + # read data_home from the config file + # data_home may be changed by users, hence not necessarily equal to the default path config = read_configs() data_home_path = config.get("path", "data_home") - # replace '~' with the absolute path if existing in the path - data_home_path = data_home_path.replace("~", os.path.expanduser("~")) + data_home_path = check_path(data_home_path) # old cached dataset dir path used in TSDB v0.2 - old_cached_dataset_dir_02 = os.path.join( - os.path.expanduser("~"), ".tsdb_cached_datasets" - ) + old_cached_dataset_dir_02 = check_path("~/.tsdb_cached_datasets") # old cached dataset dir path used in TSDB v0.4 - old_cached_dataset_dir_04 = os.path.join(os.path.expanduser("~"), ".tsdb") + old_cached_dataset_dir_04 = check_path("~/.tsdb") if os.path.exists(old_cached_dataset_dir_02) or os.path.exists( old_cached_dataset_dir_04 @@ -127,15 +168,18 @@ def determine_data_home(): # use the path directly, may be in a portable disk cached_dataset_dir = data_home_path else: - # use the default path for initialization, - # e.g. `data_home_path` in a portable disk but the disk is not connected - default_path = os.path.join(os.path.expanduser("~"), ".pypots", "tsdb") - cached_dataset_dir = default_path - if os.path.abspath(data_home_path) != os.path.abspath(default_path): + # if the preset data_home path does not exist, + # e.g. `data_home_path` is in a portable disk that is not connected + # then use the default path + if check_path(data_home_path) != check_path(default_path): logger.warning( - f"‼️ The preset data_home path '{data_home_path}' doesn't exist. " - f"Using the default path '{default_path}'" + f"❗️ The preset data_home {data_home_path} doesn't exist. " + f"This may be caused by the portable disk not connected." ) + logger.warning(f"‼️ Using the default path {default_path} for now") + + cached_dataset_dir = default_path + return cached_dataset_dir @@ -151,20 +195,20 @@ def migrate(old_path: str, new_path: str) -> None: The new path of the dataset. """ - if not os.path.exists(old_path): - raise FileNotFoundError(f"Given old_path {old_path} does not exist.") + # check both old_path and new_path + old_path = check_path(old_path, check_exists=True) + new_path = check_path(new_path) + # create new_path if not exists if not os.path.exists(new_path): - # if new_path does not exist, just rename the old_path into it - new_parent_dir = os.path.abspath(os.path.join(new_path, "..")) - if not os.path.exists(new_parent_dir): - os.makedirs(new_parent_dir, exist_ok=True) + os.makedirs(new_path, exist_ok=True) + else: + logger.warning(f"‼️ Note that new_path {new_path} already exists.") - logger.warning(f"‼️ Please note that new_path {new_path} already exists.") - # if new_path exists, we have to move everything from old_path into it all_old_files = os.listdir(old_path) for f in all_old_files: old_f_path = os.path.join(old_path, f) + if os.path.isdir(old_f_path): new_f_path = os.path.join(new_path, f) shutil.copytree(old_f_path, new_f_path) @@ -172,9 +216,8 @@ def migrate(old_path: str, new_path: str) -> None: shutil.move(old_f_path, new_path) shutil.rmtree(old_path, ignore_errors=True) - logger.info( - f"Successfully migrated {old_path} to {new_path}, and deleted {old_path}" - ) + logger.info(f"Successfully migrated {old_path} to {new_path}") + logger.info(f"Purged the old path {old_path}") def migrate_cache(target_path: str) -> None: @@ -186,6 +229,9 @@ def migrate_cache(target_path: str) -> None: The new path for TSDB to store cached datasets. """ + # check the target path + target_path = check_path(target_path) + cached_dataset_dir = determine_data_home() migrate(cached_dataset_dir, target_path) config_parser = read_configs()