Skip to content

Commit

Permalink
Revert "Add wrapping for ls_recursive (#1933)"
Browse files Browse the repository at this point in the history
This reverts commit 4cb1d37.
  • Loading branch information
kounelisagis authored May 9, 2024
1 parent 228a027 commit a820b7a
Show file tree
Hide file tree
Showing 4 changed files with 5 additions and 156 deletions.
30 changes: 0 additions & 30 deletions tiledb/cc/vfs.cc
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
#include <tiledb/tiledb>
#include <tiledb/tiledb_experimental>

#include <pybind11/functional.h>
#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
#include <pybind11/pytypes.h>
Expand Down Expand Up @@ -43,34 +41,6 @@ void init_vfs(py::module &m) {
.def("_copy_file", &VFS::copy_file)

.def("_ls", &VFS::ls)
.def(
"_ls_recursive",
// 1. the user provides a callback, the user callback is passed to the
// C++ function. Return an empty vector.
// 2. no callback is passed and a default callback is used. The
// default callback just appends each path gathered. Return the paths.
[](VFS &vfs, std::string uri, py::object callback) {
tiledb::Context ctx;
if (callback.is_none()) {
std::vector<std::string> paths;
tiledb::VFSExperimental::ls_recursive(
ctx, vfs, uri,
[&](const std::string_view path,
uint64_t object_size) -> bool {
paths.push_back(std::string(path));
return true;
});
return paths;
} else {
tiledb::VFSExperimental::ls_recursive(
ctx, vfs, uri,
[&](const std::string_view path,
uint64_t object_size) -> bool {
return callback(path, object_size).cast<bool>();
});
return std::vector<std::string>();
}
})
.def("_touch", &VFS::touch);
}

Expand Down
3 changes: 2 additions & 1 deletion tiledb/libtiledb.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -1053,7 +1053,8 @@ cdef extern from "tiledb/tiledb.h":
tiledb_ctx_t * ctx,
tiledb_vfs_t * vfs,
const char * path,
void * data) nogil
int (*callback)(const char *, void *) noexcept,
void * data)

int tiledb_vfs_move_file(
tiledb_ctx_t* ctx,
Expand Down
102 changes: 1 addition & 101 deletions tiledb/tests/test_vfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ def test_ls(self):
basepath = self.path("test_vfs_ls")
self.vfs.create_dir(basepath)
for id in (1, 2, 3):
dir = os.path.join(basepath, f"dir{id}")
dir = os.path.join(basepath, "dir" + str(id))
self.vfs.create_dir(dir)
fname = os.path.join(basepath, "file_" + str(id))
with tiledb.FileIO(self.vfs, fname, "wb") as fio:
Expand All @@ -291,106 +291,6 @@ def test_ls(self):
),
)

@pytest.mark.skipif(
pytest.tiledb_vfs not in ["s3", "file"], reason="Only test on S3 and local"
)
def test_ls_recursive(self):
# Create a nested directory structure to test recursive listing
basepath = self.path("test_vfs_ls_recursive")
self.vfs.create_dir(basepath)

dir = os.path.join(basepath, "dir1")
self.vfs.create_dir(dir)

fname = os.path.join(dir, "file_1")
with tiledb.FileIO(self.vfs, fname, "wb") as fio:
fio.write(b"")

fname = os.path.join(dir, "file_2")
with tiledb.FileIO(self.vfs, fname, "wb") as fio:
fio.write(b"")

dir = os.path.join(basepath, "dir2")
self.vfs.create_dir(dir)

dir2 = os.path.join(dir, "dir2_1")
self.vfs.create_dir(dir2)

fname = os.path.join(dir2, "file_1")
with tiledb.FileIO(self.vfs, fname, "wb") as fio:
fio.write(b"")
fname = os.path.join(dir2, "file_2")
with tiledb.FileIO(self.vfs, fname, "wb") as fio:
fio.write(b"")

dir2 = os.path.join(dir, "dir2_2")
self.vfs.create_dir(dir2)

fname = os.path.join(dir2, "file_1")
with tiledb.FileIO(self.vfs, fname, "wb") as fio:
fio.write(b"")

expected = [
"dir1",
"dir1/file_1",
"dir1/file_2",
"dir2",
"dir2/dir2_1",
"dir2/dir2_1/file_1",
"dir2/dir2_1/file_2",
"dir2/dir2_2",
"dir2/dir2_2/file_1",
]

self.assertSetEqual(
set(expected),
set(
map(
# # Keep only the paths after the basepath and normalize them to work on all platforms
lambda x: os.path.normpath(
x.split("test_vfs_ls_recursive/")[1]
).replace("\\", "/"),
self.vfs.ls_recursive(basepath),
)
),
)

# Check with user provided callback
callback_results = []

def callback(uri, _): # we don't use the second argument 'is_dir'
callback_results.append(uri)
return True

self.vfs.ls_recursive(basepath, callback)

self.assertSetEqual(
set(expected),
set(
map(
# Keep only the paths after the basepath and normalize them to work on all platforms
lambda x: os.path.normpath(
x.split("test_vfs_ls_recursive/")[1]
).replace("\\", "/"),
callback_results,
)
),
)

# Can also be called by calling ls with recursive=True
self.assertSetEqual(
set(expected),
set(
map(
# Keep only the paths after the basepath and normalize them to work on all platforms
lambda x: os.path.normpath(
x.split("test_vfs_ls_recursive/")[1]
).replace("\\", "/"),
self.vfs.ls(basepath, recursive=True),
)
),
)

def test_dir_size(self):
vfs = tiledb.VFS()

Expand Down
26 changes: 2 additions & 24 deletions tiledb/vfs.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import io
import os
from types import TracebackType
from typing import Callable, List, Optional, Type, Union
from typing import List, Optional, Type, Union

import numpy as np

Expand Down Expand Up @@ -287,39 +287,17 @@ def copy_file(self, old_uri: _AnyPath, new_uri: _AnyPath):
"""
return self._copy_file(_to_path_str(old_uri), _to_path_str(new_uri))

def ls(self, uri: _AnyPath, recursive: bool = False) -> List[str]:
def ls(self, uri: _AnyPath) -> List[str]:
"""Retrieves the children in directory `uri`. This function is
non-recursive, i.e., it focuses in one level below `uri`.
:param str uri: Input URI of the directory
:param bool recursive: If True, recursively list all children in the directory
:rtype: List[str]
:return: The children in directory `uri`
"""
if recursive:
return VFS._ls_recursive(self, _to_path_str(uri), None)

return self._ls(_to_path_str(uri))

def ls_recursive(
self, uri: _AnyPath, callback: Optional[Callable[[str, int], bool]] = None
):
"""Recursively lists objects at the input URI, invoking the provided callback
on each entry gathered. The callback is passed the data pointer provided
on each invocation and is responsible for writing the collected results
into this structure. If the callback returns True, the walk will continue.
If False, the walk will stop. If an error is thrown, the walk will stop and
the error will be propagated to the caller using std::throw_with_nested.
Currently only S3 is supported, and the `path` must be a valid S3 URI.
:param str uri: Input URI of the directory
:param callback: Callback function to invoke on each entry
"""
return VFS._ls_recursive(self, _to_path_str(uri), callback)

def touch(self, uri: _AnyPath):
"""Touches a file with the input URI, i.e., creates a new empty file.
Expand Down

0 comments on commit a820b7a

Please sign in to comment.