diff --git a/HISTORY.md b/HISTORY.md index 121ee4cb59..0f987baf83 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -4,7 +4,6 @@ ## Improvements -* Add wrapping for ls_recursive by @kounelisagis in https://github.com/TileDB-Inc/TileDB-Py/pull/1933 * Migrate away from deprecated TileDB C++ APIs by @kounelisagis in https://github.com/TileDB-Inc/TileDB-Py/pull/1958 * Pybind11 Config should honor prefix for iter by @Shelnutt2 in https://github.com/TileDB-Inc/TileDB-Py/pull/1962 * Fix test skipping by @kounelisagis in https://github.com/TileDB-Inc/TileDB-Py/pull/1957 diff --git a/tiledb/cc/vfs.cc b/tiledb/cc/vfs.cc index debf87df1d..c4028d61d0 100644 --- a/tiledb/cc/vfs.cc +++ b/tiledb/cc/vfs.cc @@ -1,7 +1,5 @@ #include -#include -#include #include #include #include @@ -43,34 +41,6 @@ void init_vfs(py::module &m) { .def("_copy_file", &VFS::copy_file) .def("_ls", &VFS::ls) - .def( - "_ls_recursive", - // 1. the user provides a callback, the user callback is passed to the - // C++ function. Return an empty vector. - // 2. no callback is passed and a default callback is used. The - // default callback just appends each path gathered. Return the paths. - [](VFS &vfs, std::string uri, py::object callback) { - tiledb::Context ctx; - if (callback.is_none()) { - std::vector paths; - tiledb::VFSExperimental::ls_recursive( - ctx, vfs, uri, - [&](const std::string_view path, - uint64_t object_size) -> bool { - paths.push_back(std::string(path)); - return true; - }); - return paths; - } else { - tiledb::VFSExperimental::ls_recursive( - ctx, vfs, uri, - [&](const std::string_view path, - uint64_t object_size) -> bool { - return callback(path, object_size).cast(); - }); - return std::vector(); - } - }) .def("_touch", &VFS::touch); } diff --git a/tiledb/libtiledb.pxd b/tiledb/libtiledb.pxd index 1a3d0d9aed..d05f4ae7a5 100644 --- a/tiledb/libtiledb.pxd +++ b/tiledb/libtiledb.pxd @@ -1053,7 +1053,8 @@ cdef extern from "tiledb/tiledb.h": tiledb_ctx_t * ctx, tiledb_vfs_t * vfs, const char * path, - void * data) nogil + int (*callback)(const char *, void *) noexcept, + void * data) int tiledb_vfs_move_file( tiledb_ctx_t* ctx, diff --git a/tiledb/tests/test_vfs.py b/tiledb/tests/test_vfs.py index 5559cb3f8b..a6f1862cbc 100644 --- a/tiledb/tests/test_vfs.py +++ b/tiledb/tests/test_vfs.py @@ -270,7 +270,7 @@ def test_ls(self): basepath = self.path("test_vfs_ls") self.vfs.create_dir(basepath) for id in (1, 2, 3): - dir = os.path.join(basepath, f"dir{id}") + dir = os.path.join(basepath, "dir" + str(id)) self.vfs.create_dir(dir) fname = os.path.join(basepath, "file_" + str(id)) with tiledb.FileIO(self.vfs, fname, "wb") as fio: @@ -291,106 +291,6 @@ def test_ls(self): ), ) - @pytest.mark.skipif( - pytest.tiledb_vfs not in ["s3", "file"], reason="Only test on S3 and local" - ) - def test_ls_recursive(self): - # Create a nested directory structure to test recursive listing - basepath = self.path("test_vfs_ls_recursive") - self.vfs.create_dir(basepath) - - dir = os.path.join(basepath, "dir1") - self.vfs.create_dir(dir) - - fname = os.path.join(dir, "file_1") - with tiledb.FileIO(self.vfs, fname, "wb") as fio: - fio.write(b"") - - fname = os.path.join(dir, "file_2") - with tiledb.FileIO(self.vfs, fname, "wb") as fio: - fio.write(b"") - - dir = os.path.join(basepath, "dir2") - self.vfs.create_dir(dir) - - dir2 = os.path.join(dir, "dir2_1") - self.vfs.create_dir(dir2) - - fname = os.path.join(dir2, "file_1") - with tiledb.FileIO(self.vfs, fname, "wb") as fio: - fio.write(b"") - fname = os.path.join(dir2, "file_2") - with tiledb.FileIO(self.vfs, fname, "wb") as fio: - fio.write(b"") - - dir2 = os.path.join(dir, "dir2_2") - self.vfs.create_dir(dir2) - - fname = os.path.join(dir2, "file_1") - with tiledb.FileIO(self.vfs, fname, "wb") as fio: - fio.write(b"") - - expected = [ - "dir1", - "dir1/file_1", - "dir1/file_2", - "dir2", - "dir2/dir2_1", - "dir2/dir2_1/file_1", - "dir2/dir2_1/file_2", - "dir2/dir2_2", - "dir2/dir2_2/file_1", - ] - - self.assertSetEqual( - set(expected), - set( - map( - # # Keep only the paths after the basepath and normalize them to work on all platforms - lambda x: os.path.normpath( - x.split("test_vfs_ls_recursive/")[1] - ).replace("\\", "/"), - self.vfs.ls_recursive(basepath), - ) - ), - ) - - # Check with user provided callback - callback_results = [] - - def callback(uri, _): # we don't use the second argument 'is_dir' - callback_results.append(uri) - return True - - self.vfs.ls_recursive(basepath, callback) - - self.assertSetEqual( - set(expected), - set( - map( - # Keep only the paths after the basepath and normalize them to work on all platforms - lambda x: os.path.normpath( - x.split("test_vfs_ls_recursive/")[1] - ).replace("\\", "/"), - callback_results, - ) - ), - ) - - # Can also be called by calling ls with recursive=True - self.assertSetEqual( - set(expected), - set( - map( - # Keep only the paths after the basepath and normalize them to work on all platforms - lambda x: os.path.normpath( - x.split("test_vfs_ls_recursive/")[1] - ).replace("\\", "/"), - self.vfs.ls(basepath, recursive=True), - ) - ), - ) - def test_dir_size(self): vfs = tiledb.VFS() diff --git a/tiledb/vfs.py b/tiledb/vfs.py index 456232c782..dc5f0f5ebf 100644 --- a/tiledb/vfs.py +++ b/tiledb/vfs.py @@ -1,7 +1,7 @@ import io import os from types import TracebackType -from typing import Callable, List, Optional, Type, Union +from typing import List, Optional, Type, Union import numpy as np @@ -287,39 +287,17 @@ def copy_file(self, old_uri: _AnyPath, new_uri: _AnyPath): """ return self._copy_file(_to_path_str(old_uri), _to_path_str(new_uri)) - def ls(self, uri: _AnyPath, recursive: bool = False) -> List[str]: + def ls(self, uri: _AnyPath) -> List[str]: """Retrieves the children in directory `uri`. This function is non-recursive, i.e., it focuses in one level below `uri`. :param str uri: Input URI of the directory - :param bool recursive: If True, recursively list all children in the directory :rtype: List[str] :return: The children in directory `uri` """ - if recursive: - return VFS._ls_recursive(self, _to_path_str(uri), None) - return self._ls(_to_path_str(uri)) - def ls_recursive( - self, uri: _AnyPath, callback: Optional[Callable[[str, int], bool]] = None - ): - """Recursively lists objects at the input URI, invoking the provided callback - on each entry gathered. The callback is passed the data pointer provided - on each invocation and is responsible for writing the collected results - into this structure. If the callback returns True, the walk will continue. - If False, the walk will stop. If an error is thrown, the walk will stop and - the error will be propagated to the caller using std::throw_with_nested. - - Currently only S3 is supported, and the `path` must be a valid S3 URI. - - :param str uri: Input URI of the directory - :param callback: Callback function to invoke on each entry - - """ - return VFS._ls_recursive(self, _to_path_str(uri), callback) - def touch(self, uri: _AnyPath): """Touches a file with the input URI, i.e., creates a new empty file.