From 1f1ff0e6300c16941237a1b01e02ec6074aec651 Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Wed, 8 Feb 2023 08:34:31 -0800 Subject: [PATCH 1/3] WIP --- cloudpathlib/__init__.py | 7 +++ cloudpathlib/patches.py | 112 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 119 insertions(+) create mode 100644 cloudpathlib/patches.py diff --git a/cloudpathlib/__init__.py b/cloudpathlib/__init__.py index da4fe28e..03cf245b 100644 --- a/cloudpathlib/__init__.py +++ b/cloudpathlib/__init__.py @@ -1,9 +1,11 @@ +import os import sys from .anypath import AnyPath from .azure.azblobclient import AzureBlobClient from .azure.azblobpath import AzureBlobPath from .cloudpath import CloudPath, implementation_registry +from .patches import patch_open from .s3.s3client import S3Client from .gs.gspath import GSPath from .gs.gsclient import GSClient @@ -27,6 +29,11 @@ "implementation_registry", "GSClient", "GSPath", + "patch_open" "S3Client", "S3Path", ] + + +if bool(os.environ.get("CLOUDPATHLIB_PATCH_OPEN", "")): + patch_open() diff --git a/cloudpathlib/patches.py b/cloudpathlib/patches.py new file mode 100644 index 00000000..a1a2b645 --- /dev/null +++ b/cloudpathlib/patches.py @@ -0,0 +1,112 @@ +import os + +from .cloudpath import CloudPath + + +def _cloudpath_open(*args, **kwargs): + if isinstance(args[0], CloudPath): + return args[0].open(*args[1:], **kwargs) + else: + return open(*args, **kwargs) + + +def patch_open(): + open = _cloudpath_open + + +def _dispatch_to_pathlib(path, pathlib_func, os_func, pathlib_args=None, pathlib_kwargs=None, *args, **kwargs): + if pathlib_args is None: + pathlib_args = args + + if pathlib_kwargs is None: + pathlib_kwargs = kwargs + + if isinstance(path, CloudPath): + return pathlib_func(path, *pathlib_args, **pathlib_kwargs) + else: + return os_func(*args, **kwargs) + + +def _cloudpath_os_listdir(path="."): + return _dispatch_to_pathlib(path, lambda path: list(path.iterdir()), os.listdir, path=path) + + +def _cloudpath_os_lstat(path, *, dir_fd=None): + return _dispatch_to_pathlib(path, CloudPath.stat, os.lstat, path, dir_fd=dir_fd) + +def _cloudpath_os_mkdir(path, mode=0o777, *, dir_fd=None): + return _dispatch_to_pathlib(path, CloudPath.mkdir, os.mkdir, path, dir_fd=dir_fd) + +def _cloudpath_os_makedirs(name, mode=0o777, exist_ok=False): + pass + +def _cloudpath_os_remove(path, *, dir_fd=None): + pass + +def _cloudpath_os_removedirs(name): + pass + +def _cloudpath_os_rename(src, dst, *, src_dir_fd=None, dst_dir_fd=None): + pass + +def _cloudpath_os_renames(old, new): + pass + +def _cloudpath_os_replace(src, dst, *, src_dir_fd=None, dst_dir_fd=None): + pass + +def _cloudpath_os_rmdir(path, *, dir_fd=None): + pass + +def _cloudpath_os_scandir(path='.'): + pass + +def _cloudpath_os_stat(path, *, dir_fd=None, follow_symlinks=True): + if isinstance(path, CloudPath): + return path.stat() + else: + return os.stat(path, dir_fd=dir_fd, follow_symlinks=follow_symlinks) + +def _cloudpath_os_unlink(path, *, dir_fd=None): + pass + +def _cloudpath_os_walk(top, topdown=True, onerror=None, followlinks=False): + pass + +def _cloudpath_os_path_basename(path): + pass + +def _cloudpath_os_path_exists(path): + pass + +def _cloudpath_os_path_getatime(path): + pass + +def _cloudpath_os_path_getmtime(path): + pass + +def _cloudpath_os_path_getctime(path): + pass + +def _cloudpath_os_path_getsize(path): + pass + +def _cloudpath_os_path_isfile(path): + pass + +def _cloudpath_os_path_isdir(path): + pass + +def _cloudpath_os_path_join(path, *paths): + pass + +def _cloudpath_os_path_split(path): + pass + +def _cloudpath_os_path_splitext(path): + pass + + +def patch_os_function(): + os.listdir = _cloudpath_os_listdir + From 0cd2397e751c8eebcbb2ac2342d65ca089f34aa4 Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Sat, 11 Feb 2023 11:36:23 -0800 Subject: [PATCH 2/3] Working implementation --- cloudpathlib/__init__.py | 5 +- cloudpathlib/cloudpath.py | 8 ++ cloudpathlib/patches.py | 209 ++++++++++++++++++++++++++++---------- 3 files changed, 164 insertions(+), 58 deletions(-) diff --git a/cloudpathlib/__init__.py b/cloudpathlib/__init__.py index 03cf245b..4bff1b32 100644 --- a/cloudpathlib/__init__.py +++ b/cloudpathlib/__init__.py @@ -5,7 +5,7 @@ from .azure.azblobclient import AzureBlobClient from .azure.azblobpath import AzureBlobPath from .cloudpath import CloudPath, implementation_registry -from .patches import patch_open +from .patches import patch_open, patch_os_functions from .s3.s3client import S3Client from .gs.gspath import GSPath from .gs.gsclient import GSClient @@ -29,7 +29,8 @@ "implementation_registry", "GSClient", "GSPath", - "patch_open" + "patch_open", + "patch_os_functions", "S3Client", "S3Path", ] diff --git a/cloudpathlib/cloudpath.py b/cloudpathlib/cloudpath.py index a81237cf..0bcfa985 100644 --- a/cloudpathlib/cloudpath.py +++ b/cloudpathlib/cloudpath.py @@ -177,12 +177,20 @@ class CloudPath(metaclass=CloudPathMeta): def __init__( self: DerivedCloudPath, cloud_path: Union[str, DerivedCloudPath], + *parts: str, client: Optional["Client"] = None, ) -> None: # handle if local file gets opened. must be set at the top of the method in case any code # below raises an exception, this prevents __del__ from raising an AttributeError self._handle: Optional[IO] = None + if parts: + # ensure first part ends in "/"; (sometimes it is just prefix, sometimes a longer path) + if not str(cloud_path).endswith("/"): + cloud_path = str(cloud_path) + "/" + + cloud_path = str(cloud_path) + "/".join(p.strip("/") for p in parts) + self.is_valid_cloudpath(cloud_path, raise_on_error=True) # versions of the raw string that provide useful methods diff --git a/cloudpathlib/patches.py b/cloudpathlib/patches.py index a1a2b645..6dc90db0 100644 --- a/cloudpathlib/patches.py +++ b/cloudpathlib/patches.py @@ -1,112 +1,209 @@ +import builtins import os +import os.path from .cloudpath import CloudPath -def _cloudpath_open(*args, **kwargs): - if isinstance(args[0], CloudPath): - return args[0].open(*args[1:], **kwargs) - else: - return open(*args, **kwargs) +def _check_first_arg(*args, **kwargs): + return isinstance(args[0], CloudPath) -def patch_open(): - open = _cloudpath_open +def _check_first_arg_first_index(*args, **kwargs): + return isinstance(args[0][0], CloudPath) + +def _patch_factory(original_version, cpl_version, cpl_check=_check_first_arg): + _original = original_version -def _dispatch_to_pathlib(path, pathlib_func, os_func, pathlib_args=None, pathlib_kwargs=None, *args, **kwargs): - if pathlib_args is None: - pathlib_args = args + def _patched_version(*args, **kwargs): + if cpl_check(*args, **kwargs): + return cpl_version(*args, **kwargs) + else: + return _original(*args, **kwargs) - if pathlib_kwargs is None: - pathlib_kwargs = kwargs + original_version = _patched_version + return _patched_version - if isinstance(path, CloudPath): - return pathlib_func(path, *pathlib_args, **pathlib_kwargs) - else: - return os_func(*args, **kwargs) + +def patch_open(): + patched = _patch_factory( + builtins.open, + CloudPath.open, + ) + builtins.open = patched + return patched def _cloudpath_os_listdir(path="."): - return _dispatch_to_pathlib(path, lambda path: list(path.iterdir()), os.listdir, path=path) + return list(path.iterdir()) + +def _cloudpath_lstat(path, *, dir_fd=None): + return path.stat() -def _cloudpath_os_lstat(path, *, dir_fd=None): - return _dispatch_to_pathlib(path, CloudPath.stat, os.lstat, path, dir_fd=dir_fd) -def _cloudpath_os_mkdir(path, mode=0o777, *, dir_fd=None): - return _dispatch_to_pathlib(path, CloudPath.mkdir, os.mkdir, path, dir_fd=dir_fd) +def _cloudpath_mkdir(path, *, dir_fd=None): + return path.mkdir() + def _cloudpath_os_makedirs(name, mode=0o777, exist_ok=False): - pass + return CloudPath.mkdir(name, parents=True, exist_ok=exist_ok) + def _cloudpath_os_remove(path, *, dir_fd=None): - pass + return path.unlink() + def _cloudpath_os_removedirs(name): - pass + for d in name.parents: + d.rmdir() + def _cloudpath_os_rename(src, dst, *, src_dir_fd=None, dst_dir_fd=None): - pass + return src.rename(dst) + def _cloudpath_os_renames(old, new): - pass + old.rename(new) # move file + _cloudpath_os_removedirs(old) # remove previous directories if empty + def _cloudpath_os_replace(src, dst, *, src_dir_fd=None, dst_dir_fd=None): - pass + return src.rename(dst) + def _cloudpath_os_rmdir(path, *, dir_fd=None): - pass + return path.rmdir() + + +def _cloudpath_os_scandir(path="."): + return path.iterdir() -def _cloudpath_os_scandir(path='.'): - pass def _cloudpath_os_stat(path, *, dir_fd=None, follow_symlinks=True): - if isinstance(path, CloudPath): - return path.stat() - else: - return os.stat(path, dir_fd=dir_fd, follow_symlinks=follow_symlinks) + return path.stat() + def _cloudpath_os_unlink(path, *, dir_fd=None): - pass + return path.unlink() + def _cloudpath_os_walk(top, topdown=True, onerror=None, followlinks=False): - pass + try: + dirs, files = [], [] + for p in top.iterdir(): + dirs.append(p) if p.is_dir() else files.append(p) + + if topdown: + yield (top, files, dirs) + + for d in dirs: + yield from _cloudpath_os_walk(d, topdown=topdown, onerror=onerror) + + if not topdown: + yield (top, files, dirs) + + except Exception as e: + if onerror is not None: + onerror(e) + else: + raise + def _cloudpath_os_path_basename(path): - pass + return path.name + + +def __common(parts): + i = 0 + + try: + while all(item[i] == parts[0][i] for item in parts[1:]): + i += 1 + except IndexError: + pass + + return parts[0][:i] + + +def _cloudpath_os_path_commonpath(paths): + common = __common([p.parts for p in paths]) + return paths[0].client.CloudPath(*common) + + +def _cloudpath_os_path_commonprefix(list): + common = __common([str(p) for p in list]) + return common + + +def _cloudpath_os_path_dirname(path): + return path.parent -def _cloudpath_os_path_exists(path): - pass def _cloudpath_os_path_getatime(path): - pass + return (path.stat().st_atime,) + def _cloudpath_os_path_getmtime(path): - pass + return (path.stat().st_mtime,) + def _cloudpath_os_path_getctime(path): - pass + return (path.stat().st_ctime,) -def _cloudpath_os_path_getsize(path): - pass -def _cloudpath_os_path_isfile(path): - pass +def _cloudpath_os_path_getsize(path): + return (path.stat().st_size,) -def _cloudpath_os_path_isdir(path): - pass def _cloudpath_os_path_join(path, *paths): - pass + for p in paths: + path /= p + return path -def _cloudpath_os_path_split(path): - pass - -def _cloudpath_os_path_splitext(path): - pass +def _cloudpath_os_path_split(path): + return path.parent, path.name -def patch_os_function(): - os.listdir = _cloudpath_os_listdir +def _cloudpath_os_path_splitext(path): + return str(path)[: -len(path.suffix)], path.suffix + + +def patch_os_functions(): + os.listdir = _patch_factory(os.listdir, _cloudpath_os_listdir) + os.lstat = _patch_factory(os.lstat, _cloudpath_lstat) + os.mkdir = _patch_factory(os.mkdir, _cloudpath_mkdir) + os.makedirs = _patch_factory(os.makedirs, _cloudpath_os_makedirs) + os.remove = _patch_factory(os.remove, _cloudpath_os_remove) + os.removedirs = _patch_factory(os.removedirs, _cloudpath_os_removedirs) + os.rename = _patch_factory(os.rename, _cloudpath_os_rename) + os.renames = _patch_factory(os.renames, _cloudpath_os_renames) + os.replace = _patch_factory(os.replace, _cloudpath_os_replace) + os.rmdir = _patch_factory(os.rmdir, _cloudpath_os_rmdir) + os.scandir = _patch_factory(os.scandir, _cloudpath_os_scandir) + os.stat = _patch_factory(os.stat, _cloudpath_os_stat) + os.unlink = _patch_factory(os.unlink, _cloudpath_os_unlink) + os.walk = _patch_factory(os.walk, _cloudpath_os_walk) + + os.path.basename = _patch_factory(os.path.basename, _cloudpath_os_path_basename) + os.path.commonpath = _patch_factory( + os.path.commonpath, _cloudpath_os_path_commonpath, cpl_check=_check_first_arg_first_index + ) + os.path.commonprefix = _patch_factory( + os.path.commonprefix, + _cloudpath_os_path_commonprefix, + cpl_check=_check_first_arg_first_index, + ) + os.path.dirname = _patch_factory(os.path.dirname, _cloudpath_os_path_dirname) + os.path.exists = _patch_factory(os.path.exists, CloudPath.exists) + os.path.getatime = _patch_factory(os.path.getatime, _cloudpath_os_path_getatime) + os.path.getmtime = _patch_factory(os.path.getmtime, _cloudpath_os_path_getmtime) + os.path.getctime = _patch_factory(os.path.getctime, _cloudpath_os_path_getctime) + os.path.getsize = _patch_factory(os.path.getsize, _cloudpath_os_path_getsize) + os.path.isfile = _patch_factory(os.path.isfile, CloudPath.is_file) + os.path.isdir = _patch_factory(os.path.isdir, CloudPath.is_dir) + os.path.join = _patch_factory(os.path.join, _cloudpath_os_path_join) + os.path.split = _patch_factory(os.path.split, _cloudpath_os_path_split) + os.path.splitext = _patch_factory(os.path.splitext, _cloudpath_os_path_splitext) From 53084253d32e896f718dcbe317e177eef83352ed Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Wed, 15 Feb 2023 15:54:47 -0800 Subject: [PATCH 3/3] more WIP --- cloudpathlib/patches.py | 1 + test-open.py | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+) create mode 100644 test-open.py diff --git a/cloudpathlib/patches.py b/cloudpathlib/patches.py index 6dc90db0..5d222cf5 100644 --- a/cloudpathlib/patches.py +++ b/cloudpathlib/patches.py @@ -32,6 +32,7 @@ def patch_open(): CloudPath.open, ) builtins.open = patched + CloudPath.__fspath__ = lambda x: x # turn off `fspath` return patched diff --git a/test-open.py b/test-open.py new file mode 100644 index 00000000..47fa12a0 --- /dev/null +++ b/test-open.py @@ -0,0 +1,25 @@ +import os +from cloudpathlib import CloudPath, patch_open, patch_os_functions + + +def hello(cp): + with open(cp, "a") as f: + f.write(" written") + + +if __name__ == "__main__": + patch_open() + + cp = CloudPath("s3://cloudpathlib-test-bucket/manual/text_file.txt") + cp.write_text("yah") + + hello(cp) + + print(cp.read_text()) + cp.unlink() + + patch_os_functions() + + print(list(os.walk("."))) + print(list(cp.parent.client._list_dir(cp.parent, recursive=True))) + print(list(os.walk(cp.parent)))