From 806d1aed01e85adb1173fedbab8777c2c076b536 Mon Sep 17 00:00:00 2001 From: Agisilaos Kounelis <36283973+kounelisagis@users.noreply.github.com> Date: Mon, 25 Nov 2024 08:14:10 +0200 Subject: [PATCH 1/6] Implement serialization functionality for `Config` and `VFS` objects (#2110) * Implement serialization functionality for VFS objects * Implement serialization for Config * Remove PatchedConfig and PatchedCtx --------- Co-authored-by: Theodore Tsirpanis --- tiledb/ctx.py | 14 ++++++++ tiledb/tests/conftest.py | 48 ------------------------- tiledb/tests/test_context_and_config.py | 19 ++++++++++ tiledb/tests/test_vfs.py | 16 +++++++++ tiledb/vfs.py | 25 ++++++++++--- 5 files changed, 69 insertions(+), 53 deletions(-) diff --git a/tiledb/ctx.py b/tiledb/ctx.py index b4898f3767..cc1a6314d6 100644 --- a/tiledb/ctx.py +++ b/tiledb/ctx.py @@ -293,6 +293,20 @@ def save(self, uri: str): """ self.save_to_file(uri) + def __reduce__(self): + """ + Customize the pickling process by defining how to serialize + and reconstruct the Config object. + """ + state = self.dict() + return (self.__class__, (), state) + + def __setstate__(self, state): + """ + Customize how the Config object is restored from its serialized state. + """ + self.__init__(state) + class ConfigKeys: """ diff --git a/tiledb/tests/conftest.py b/tiledb/tests/conftest.py index 6565dd89e9..ecce429e50 100644 --- a/tiledb/tests/conftest.py +++ b/tiledb/tests/conftest.py @@ -52,54 +52,6 @@ def pytest_configure(config): # default must be set here rather than globally pytest.tiledb_vfs = "file" - vfs_config(config) - - -def vfs_config(pytestconfig): - vfs_config_override = {} - - vfs = pytestconfig.getoption("vfs") - if vfs == "s3": - pytest.tiledb_vfs = "s3" - - vfs_config_override.update( - { - "vfs.s3.endpoint_override": "localhost:9999", - "vfs.s3.aws_access_key_id": "minio", - "vfs.s3.aws_secret_access_key": "miniosecretkey", - "vfs.s3.scheme": "https", - "vfs.s3.verify_ssl": False, - "vfs.s3.use_virtual_addressing": False, - } - ) - - vfs_config_arg = pytestconfig.getoption("vfs-config", None) - if vfs_config_arg: - pass - - tiledb._orig_ctx = tiledb.Ctx - - def get_config(config): - final_config = {} - if isinstance(config, tiledb.Config): - final_config = config.dict() - elif config: - final_config = config - - final_config.update(vfs_config_override) - return final_config - - class PatchedCtx(tiledb.Ctx): - def __init__(self, config=None): - super().__init__(get_config(config)) - - class PatchedConfig(tiledb.Config): - def __init__(self, params=None): - super().__init__(get_config(params)) - - tiledb.Ctx = PatchedCtx - tiledb.Config = PatchedConfig - @pytest.fixture(scope="function", autouse=True) def isolate_os_fork(original_os_fork): diff --git a/tiledb/tests/test_context_and_config.py b/tiledb/tests/test_context_and_config.py index 054338991c..13ce7b72fa 100644 --- a/tiledb/tests/test_context_and_config.py +++ b/tiledb/tests/test_context_and_config.py @@ -1,4 +1,6 @@ +import io import os +import pickle import subprocess import sys import xml @@ -261,3 +263,20 @@ def test_config_repr_html(self): pytest.fail( f"Could not parse config._repr_html_(). Saw {config._repr_html_()}" ) + + def test_config_pickle(self): + # test that Config can be pickled and unpickled + config = tiledb.Config( + { + "rest.use_refactored_array_open": "false", + "rest.use_refactored_array_open_and_query_submit": "true", + "vfs.azure.storage_account_name": "myaccount", + } + ) + with io.BytesIO() as buf: + pickle.dump(config, buf) + buf.seek(0) + config2 = pickle.load(buf) + + self.assertIsInstance(config2, tiledb.Config) + self.assertEqual(config2.dict(), config.dict()) diff --git a/tiledb/tests/test_vfs.py b/tiledb/tests/test_vfs.py index 8219539a02..da71057741 100644 --- a/tiledb/tests/test_vfs.py +++ b/tiledb/tests/test_vfs.py @@ -1,6 +1,7 @@ import io import os import pathlib +import pickle import random import sys @@ -239,6 +240,21 @@ def test_io(self): txtio = io.TextIOWrapper(f2, encoding="utf-8") self.assertEqual(txtio.readlines(), lines) + def test_pickle(self): + # test that vfs can be pickled and unpickled with config options + config = tiledb.Config( + {"vfs.s3.region": "eu-west-1", "vfs.max_parallel_ops": "1"} + ) + vfs = tiledb.VFS(config) + with io.BytesIO() as buf: + pickle.dump(vfs, buf) + buf.seek(0) + vfs2 = pickle.load(buf) + + self.assertIsInstance(vfs2, tiledb.VFS) + self.assertEqual(vfs2.config()["vfs.s3.region"], "eu-west-1") + self.assertEqual(vfs2.config()["vfs.max_parallel_ops"], "1") + def test_sc42569_vfs_memoryview(self): # This test is to ensure that giving np.ndarray buffer to readinto works # when trying to write bytes that cannot be converted to float32 or int32 diff --git a/tiledb/vfs.py b/tiledb/vfs.py index 7a703fae06..5b27131915 100644 --- a/tiledb/vfs.py +++ b/tiledb/vfs.py @@ -25,7 +25,7 @@ class VFS(lt.VFS): """ def __init__(self, config: Union[Config, dict] = None, ctx: Optional[Ctx] = None): - ctx = ctx or default_ctx() + self.ctx = ctx or default_ctx() if config: from .libtiledb import Config @@ -39,12 +39,12 @@ def __init__(self, config: Union[Config, dict] = None, ctx: Optional[Ctx] = None raise ValueError("`config` argument must be of type Config or dict") # Convert all values to strings - config = {k: str(v) for k, v in config.items()} + self.config_dict = {k: str(v) for k, v in config.items()} - ccfg = tiledb.Config(config) - super().__init__(ctx, ccfg) + ccfg = tiledb.Config(self.config_dict) + super().__init__(self.ctx, ccfg) else: - super().__init__(ctx) + super().__init__(self.ctx) def ctx(self) -> Ctx: """ @@ -329,6 +329,21 @@ def touch(self, uri: _AnyPath): isfile = is_file size = file_size + # pickling support + def __getstate__(self): + # self.config_dict might not exist. In that case use the config from ctx. + if hasattr(self, "config_dict"): + config_dict = self.config_dict + else: + config_dict = self.config().dict() + return (config_dict,) + + def __setstate__(self, state): + config_dict = state[0] + config = Config(params=config_dict) + ctx = Ctx(config) + self.__init__(config=config, ctx=ctx) + class FileIO(io.RawIOBase): """TileDB FileIO class that encapsulates files opened by tiledb.VFS. The file From cc0a5f3fcb154b1eda15824bac54c378dc9cd227 Mon Sep 17 00:00:00 2001 From: Agisilaos Kounelis <36283973+kounelisagis@users.noreply.github.com> Date: Mon, 2 Dec 2024 22:51:15 +0700 Subject: [PATCH 2/6] Change minimum libtiledb version for `CurrentDomain` to 2.26 to fix daily tests - this PR cannot be included in a 0.31.* release (#2117) --- tiledb/__init__.py | 2 +- tiledb/array_schema.py | 4 ++-- tiledb/cc/current_domain.cc | 2 +- tiledb/cc/enum.cc | 2 +- tiledb/cc/schema.cc | 2 +- tiledb/schema_evolution.cc | 2 +- tiledb/schema_evolution.py | 2 +- tiledb/tests/test_current_domain.py | 4 ++-- 8 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tiledb/__init__.py b/tiledb/__init__.py index c51250bb27..e307f5b9fe 100644 --- a/tiledb/__init__.py +++ b/tiledb/__init__.py @@ -21,7 +21,7 @@ from tiledb.libtiledb import version as libtiledb_version -if libtiledb_version()[0] == 2 and libtiledb_version()[1] >= 25: +if libtiledb_version()[0] == 2 and libtiledb_version()[1] >= 26: from .current_domain import CurrentDomain from .ndrectangle import NDRectangle diff --git a/tiledb/array_schema.py b/tiledb/array_schema.py index d44d4b5138..7bdddc909b 100644 --- a/tiledb/array_schema.py +++ b/tiledb/array_schema.py @@ -14,7 +14,7 @@ from .domain import Domain from .filter import Filter, FilterList -if libtiledb_version()[0] == 2 and libtiledb_version()[1] >= 25: +if libtiledb_version()[0] == 2 and libtiledb_version()[1] >= 26: from .current_domain import CurrentDomain _tiledb_order_to_string = { @@ -388,7 +388,7 @@ def has_dim_label(self, name: str) -> bool: """ return self._has_dim_label(self._ctx, name) - if libtiledb_version()[0] == 2 and libtiledb_version()[1] >= 25: + if libtiledb_version()[0] == 2 and libtiledb_version()[1] >= 26: @property def current_domain(self) -> CurrentDomain: diff --git a/tiledb/cc/current_domain.cc b/tiledb/cc/current_domain.cc index 2436fbf730..713abf7476 100644 --- a/tiledb/cc/current_domain.cc +++ b/tiledb/cc/current_domain.cc @@ -16,7 +16,7 @@ using namespace tiledbpy::common; namespace py = pybind11; void init_current_domain(py::module &m) { -#if TILEDB_VERSION_MAJOR >= 2 && TILEDB_VERSION_MINOR >= 25 +#if TILEDB_VERSION_MAJOR >= 2 && TILEDB_VERSION_MINOR >= 26 py::class_(m, "NDRectangle") .def(py::init()) diff --git a/tiledb/cc/enum.cc b/tiledb/cc/enum.cc index 0aea25ce44..5b3b8f8c7c 100644 --- a/tiledb/cc/enum.cc +++ b/tiledb/cc/enum.cc @@ -175,7 +175,7 @@ void init_enums(py::module &m) { .value("TIFF", TILEDB_MIME_TIFF) .value("PDF", TILEDB_MIME_PDF); -#if TILEDB_VERSION_MAJOR >= 2 && TILEDB_VERSION_MINOR >= 25 +#if TILEDB_VERSION_MAJOR >= 2 && TILEDB_VERSION_MINOR >= 26 py::enum_(m, "CurrentDomainType") .value("NDRECTANGLE", TILEDB_NDRECTANGLE); #endif diff --git a/tiledb/cc/schema.cc b/tiledb/cc/schema.cc index 0069c8c83a..0b8e47a99b 100644 --- a/tiledb/cc/schema.cc +++ b/tiledb/cc/schema.cc @@ -278,7 +278,7 @@ void init_schema(py::module &m) { ArraySchemaExperimental::add_enumeration(ctx, schema, enmr); }) -#if TILEDB_VERSION_MAJOR >= 2 && TILEDB_VERSION_MINOR >= 25 +#if TILEDB_VERSION_MAJOR >= 2 && TILEDB_VERSION_MINOR >= 26 .def("_current_domain", [](const ArraySchema &schema, const Context &ctx) { return ArraySchemaExperimental::current_domain(ctx, schema); diff --git a/tiledb/schema_evolution.cc b/tiledb/schema_evolution.cc index 89889e0adc..3d68796a83 100644 --- a/tiledb/schema_evolution.cc +++ b/tiledb/schema_evolution.cc @@ -100,7 +100,7 @@ void init_schema_evolution(py::module &m) { } }) -#if TILEDB_VERSION_MAJOR >= 2 && TILEDB_VERSION_MINOR >= 25 +#if TILEDB_VERSION_MAJOR >= 2 && TILEDB_VERSION_MINOR >= 26 .def("expand_current_domain", [](ArraySchemaEvolution &inst, py::object current_domain_py) { tiledb_current_domain_t *current_domain_c = diff --git a/tiledb/schema_evolution.py b/tiledb/schema_evolution.py index ead06858c6..16fae5a541 100644 --- a/tiledb/schema_evolution.py +++ b/tiledb/schema_evolution.py @@ -52,7 +52,7 @@ def extend_enumeration(self, enmr: Enumeration): self.ase.extend_enumeration(enmr) - if libtiledb_version()[0] == 2 and libtiledb_version()[1] >= 25: + if libtiledb_version()[0] == 2 and libtiledb_version()[1] >= 26: from .current_domain import CurrentDomain def expand_current_domain(self, current_domain: CurrentDomain): diff --git a/tiledb/tests/test_current_domain.py b/tiledb/tests/test_current_domain.py index 25a39f80be..402d407b0e 100644 --- a/tiledb/tests/test_current_domain.py +++ b/tiledb/tests/test_current_domain.py @@ -7,9 +7,9 @@ import tiledb import tiledb.cc as lt -if not (tiledb.libtiledb.version()[0] == 2 and tiledb.libtiledb.version()[1] >= 25): +if not (tiledb.libtiledb.version()[0] == 2 and tiledb.libtiledb.version()[1] >= 26): pytest.skip( - "CurrentDomain is only available in TileDB 2.25 and later", + "CurrentDomain is only available in TileDB 2.26 and later", allow_module_level=True, ) From f72e5280547c05cf79babad99f177cfd7772f0ed Mon Sep 17 00:00:00 2001 From: Agisilaos Kounelis <36283973+kounelisagis@users.noreply.github.com> Date: Tue, 10 Dec 2024 15:03:38 +0700 Subject: [PATCH 3/6] Fix TypeError: 'FragmentsInfoIterator' object is not iterable (#2119) --- tiledb/fragment.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tiledb/fragment.py b/tiledb/fragment.py index 04be4021ad..09c96c9954 100644 --- a/tiledb/fragment.py +++ b/tiledb/fragment.py @@ -188,6 +188,9 @@ def __init__(self, fragments): self._fragments = fragments self._index = 0 + def __iter__(self): + return self + def __next__(self): if self._index < len(self._fragments): fi = FragmentInfo(self._fragments, self._index) From c8c687a6f16852bf35faf3d0b044b0daa113ec14 Mon Sep 17 00:00:00 2001 From: Agisilaos Kounelis <36283973+kounelisagis@users.noreply.github.com> Date: Wed, 11 Dec 2024 17:10:23 +0700 Subject: [PATCH 4/6] Eliminate hacky use of timestamps and `time.sleep` in tests (#2120) --- tiledb/tests/cc/test_group.py | 1 - tiledb/tests/test_fixes.py | 21 ++++---------- tiledb/tests/test_group.py | 33 +++++++++------------ tiledb/tests/test_libtiledb.py | 9 +----- tiledb/tests/test_metadata.py | 52 +++++++--------------------------- 5 files changed, 31 insertions(+), 85 deletions(-) diff --git a/tiledb/tests/cc/test_group.py b/tiledb/tests/cc/test_group.py index b9513990db..f2245758e7 100644 --- a/tiledb/tests/cc/test_group.py +++ b/tiledb/tests/cc/test_group.py @@ -28,7 +28,6 @@ def test_group_metadata(tmp_path): assert_array_equal(grp._get_metadata("flt", False)[0], flt_data) grp._close() - time.sleep(0.001) grp._open(lt.QueryType.WRITE) grp._delete_metadata("int") grp._close() diff --git a/tiledb/tests/test_fixes.py b/tiledb/tests/test_fixes.py index a4ab4f65f0..93f7bcd9eb 100644 --- a/tiledb/tests/test_fixes.py +++ b/tiledb/tests/test_fixes.py @@ -364,7 +364,7 @@ class SOMA919Test(DiskTestCase): We've distilled @atolopko-czi's gist example using the TileDB-Py API directly. """ - def run_test(self, use_timestamps): + def run_test(self): import tempfile import numpy as np @@ -372,18 +372,8 @@ def run_test(self, use_timestamps): import tiledb root_uri = tempfile.mkdtemp() - - if use_timestamps: - group_ctx100 = tiledb.Ctx( - { - "sm.group.timestamp_start": 100, - "sm.group.timestamp_end": 100, - } - ) - timestamp = 100 - else: - group_ctx100 = tiledb.Ctx() - timestamp = None + group_ctx100 = tiledb.Ctx() + timestamp = None # create the group and add a dummy subgroup "causes_bug" tiledb.Group.create(root_uri, ctx=group_ctx100) @@ -411,13 +401,12 @@ def run_test(self, use_timestamps): tiledb.libtiledb.version() < (2, 15, 0), reason="SOMA919 fix implemented in libtiledb 2.15", ) - @pytest.mark.parametrize("use_timestamps", [True, False]) - def test_soma919(self, use_timestamps): + def test_soma919(self): N = 100 fails = 0 for i in range(N): try: - self.run_test(use_timestamps) + self.run_test() except AssertionError: fails += 1 if fails > 0: diff --git a/tiledb/tests/test_group.py b/tiledb/tests/test_group.py index 000120d879..c347843a68 100644 --- a/tiledb/tests/test_group.py +++ b/tiledb/tests/test_group.py @@ -120,10 +120,7 @@ def test_move_group(self): ), ), ) - @pytest.mark.parametrize("use_timestamps", [True, False]) - def test_group_metadata( - self, int_data, flt_data, str_data, str_type, capfd, use_timestamps - ): + def test_group_metadata(self, int_data, flt_data, str_data, str_type, capfd): def values_equal(lhs, rhs): if isinstance(lhs, np.ndarray): if not isinstance(rhs, np.ndarray): @@ -139,13 +136,13 @@ def values_equal(lhs, rhs): grp_path = self.path("test_group_metadata") tiledb.Group.create(grp_path) - cfg = tiledb.Config({"sm.group.timestamp_end": 1} if use_timestamps else {}) + cfg = tiledb.Config() with tiledb.Group(grp_path, "w", cfg) as grp: grp.meta["int"] = int_data grp.meta["flt"] = flt_data grp.meta["str"] = str_data - cfg = tiledb.Config({"sm.group.timestamp_end": 1} if use_timestamps else {}) + cfg = tiledb.Config() with tiledb.Group(grp_path, "r", cfg) as grp: assert len(grp.meta) == 3 assert "int" in grp.meta @@ -162,11 +159,11 @@ def values_equal(lhs, rhs): assert "Type: DataType.INT" in metadata_dump assert f"Type: DataType.{str_type}" in metadata_dump - cfg = tiledb.Config({"sm.group.timestamp_end": 2} if use_timestamps else {}) + cfg = tiledb.Config() with tiledb.Group(grp_path, "w", cfg) as grp: del grp.meta["int"] - cfg = tiledb.Config({"sm.group.timestamp_end": 2} if use_timestamps else {}) + cfg = tiledb.Config() with tiledb.Group(grp_path, "r", cfg) as grp: assert len(grp.meta) == 2 assert "int" not in grp.meta @@ -373,8 +370,7 @@ class GroupMetadataTest(GroupTestCase): (np.array([1, 2, 3]), np.array([1.5, 2.5, 3.5]), np.array(["x"])), ), ) - @pytest.mark.parametrize("use_timestamps", [True, False]) - def test_group_metadata(self, int_data, flt_data, str_data, use_timestamps): + def test_group_metadata(self, int_data, flt_data, str_data): def values_equal(lhs, rhs): if isinstance(lhs, np.ndarray): if not isinstance(rhs, np.ndarray): @@ -390,13 +386,13 @@ def values_equal(lhs, rhs): grp_path = self.path("test_group_metadata") tiledb.Group.create(grp_path) - cfg = tiledb.Config({"sm.group.timestamp_end": 1} if use_timestamps else {}) + cfg = tiledb.Config() with tiledb.Group(grp_path, "w", cfg) as grp: grp.meta["int"] = int_data grp.meta["flt"] = flt_data grp.meta["str"] = str_data - cfg = tiledb.Config({"sm.group.timestamp_end": 1} if use_timestamps else {}) + cfg = tiledb.Config() with tiledb.Group(grp_path, "r", cfg) as grp: assert grp.meta.keys() == {"int", "flt", "str"} assert len(grp.meta) == 3 @@ -407,11 +403,11 @@ def values_equal(lhs, rhs): assert "str" in grp.meta assert values_equal(grp.meta["str"], str_data) - cfg = tiledb.Config({"sm.group.timestamp_end": 2} if use_timestamps else {}) + cfg = tiledb.Config() with tiledb.Group(grp_path, "w", cfg) as grp: del grp.meta["int"] - cfg = tiledb.Config({"sm.group.timestamp_end": 2} if use_timestamps else {}) + cfg = tiledb.Config() with tiledb.Group(grp_path, "r", cfg) as grp: assert len(grp.meta) == 2 assert "int" not in grp.meta @@ -641,21 +637,20 @@ def test_numpy(self, test_vals, ndarray): self.assert_metadata_roundtrip(grp.meta, test_vals) grp.close() - @pytest.mark.parametrize("use_timestamps", [True, False]) - def test_consolidation_and_vac(self, use_timestamps): + def test_consolidation_and_vac(self): vfs = tiledb.VFS() path = self.path("test_consolidation_and_vac") tiledb.Group.create(path) - cfg = tiledb.Config({"sm.group.timestamp_end": 1} if use_timestamps else {}) + cfg = tiledb.Config() with tiledb.Group(path, "w", cfg) as grp: grp.meta["meta"] = 1 - cfg = tiledb.Config({"sm.group.timestamp_end": 2} if use_timestamps else {}) + cfg = tiledb.Config() with tiledb.Group(path, "w", cfg) as grp: grp.meta["meta"] = 2 - cfg = tiledb.Config({"sm.group.timestamp_end": 3} if use_timestamps else {}) + cfg = tiledb.Config() with tiledb.Group(path, "w", cfg) as grp: grp.meta["meta"] = 3 diff --git a/tiledb/tests/test_libtiledb.py b/tiledb/tests/test_libtiledb.py index bddefd68c9..02f46a5551 100644 --- a/tiledb/tests/test_libtiledb.py +++ b/tiledb/tests/test_libtiledb.py @@ -823,8 +823,7 @@ def test_ncell_int(self): assert_array_equal(T, R) assert_array_equal(T, R.multi_index[0:2][""]) - @pytest.mark.parametrize("use_timestamps", [True, False]) - def test_open_with_timestamp(self, use_timestamps): + def test_open_with_timestamp(self): A = np.zeros(3) dom = tiledb.Domain(tiledb.Dim(domain=(0, 2), tile=3, dtype=np.int64)) @@ -841,15 +840,9 @@ def test_open_with_timestamp(self, use_timestamps): self.assertEqual(T[1], 0) self.assertEqual(T[2], 0) - if use_timestamps: - # sleep 200ms and write - time.sleep(0.2) with tiledb.DenseArray(self.path("foo"), mode="w") as T: T[0:1] = 1 - if use_timestamps: - # sleep 200ms and write - time.sleep(0.2) with tiledb.DenseArray(self.path("foo"), mode="w") as T: T[1:2] = 2 diff --git a/tiledb/tests/test_metadata.py b/tiledb/tests/test_metadata.py index 0b13073abe..1b2a0bca22 100644 --- a/tiledb/tests/test_metadata.py +++ b/tiledb/tests/test_metadata.py @@ -189,8 +189,7 @@ def test_basic(self, test_vals): @given(st_metadata, st_ndarray) @settings(deadline=None) - @pytest.mark.parametrize("use_timestamps", [True, False]) - def test_numpy(self, use_timestamps, test_vals, ndarray): + def test_numpy(self, test_vals, ndarray): test_vals["ndarray"] = ndarray path = self.path() @@ -203,9 +202,6 @@ def test_numpy(self, use_timestamps, test_vals, ndarray): with tiledb.Array(path) as A: self.assert_metadata_roundtrip(A.meta, test_vals) - if use_timestamps: - # test resetting a key with a ndarray value to a non-ndarray value - time.sleep(0.001) with tiledb.Array(path, "w") as A: A.meta["ndarray"] = 42 test_vals["ndarray"] = 42 @@ -221,9 +217,6 @@ def test_numpy(self, use_timestamps, test_vals, ndarray): with tiledb.Array(path) as A: self.assert_metadata_roundtrip(A.meta, test_vals) - if use_timestamps: - # test del ndarray key - time.sleep(0.001) with tiledb.Array(path, "w") as A: del A.meta["ndarray"] del test_vals["ndarray"] @@ -231,9 +224,6 @@ def test_numpy(self, use_timestamps, test_vals, ndarray): with tiledb.Array(path) as A: self.assert_metadata_roundtrip(A.meta, test_vals) - if use_timestamps: - # test update - time.sleep(0.001) with tiledb.Array(path, mode="w") as A: test_vals.update(ndarray=np.stack([ndarray, ndarray]), transp=ndarray.T) A.meta.update(ndarray=np.stack([ndarray, ndarray]), transp=ndarray.T) @@ -245,8 +235,7 @@ def test_numpy(self, use_timestamps, test_vals, ndarray): @tiledb.scope_ctx( {"sm.vacuum.mode": "array_meta", "sm.consolidation.mode": "array_meta"} ) - @pytest.mark.parametrize("use_timestamps", [True, False]) - def test_consecutive(self, use_timestamps): + def test_consecutive(self): vfs = tiledb.VFS() path = self.path("test_md_consecutive") @@ -259,17 +248,10 @@ def test_consecutive(self, use_timestamps): randutf8s = [rand_utf8(i) for i in np.random.randint(1, 30, size=write_count)] # write 100 times, then consolidate - if use_timestamps: - for i in range(write_count): - with tiledb.Array(path, mode="w") as A: - A.meta["randint"] = int(randints[i]) - A.meta["randutf8"] = randutf8s[i] - time.sleep(0.001) - else: - for i in range(write_count): - with tiledb.Array(path, mode="w") as A: - A.meta["randint"] = int(randints[i]) - A.meta["randutf8"] = randutf8s[i] + for i in range(write_count): + with tiledb.Array(path, mode="w") as A: + A.meta["randint"] = int(randints[i]) + A.meta["randutf8"] = randutf8s[i] self.assertEqual(len(vfs.ls(os.path.join(path, "__meta"))), 100) @@ -296,23 +278,11 @@ def test_consecutive(self, use_timestamps): self.assertEqual(A.meta["randutf8"], randutf8s[-1]) # use randutf8s as keys, then consolidate - if use_timestamps: - for _ in range(2): - for i in range(write_count): - with tiledb.Array(path, mode="w") as A: - A.meta[randutf8s[i] + "{}".format(randints[i])] = int( - randints[i] - ) - A.meta[randutf8s[i]] = randutf8s[i] - time.sleep(0.001) - else: - for _ in range(2): - for i in range(write_count): - with tiledb.Array(path, mode="w") as A: - A.meta[randutf8s[i] + "{}".format(randints[i])] = int( - randints[i] - ) - A.meta[randutf8s[i]] = randutf8s[i] + for _ in range(2): + for i in range(write_count): + with tiledb.Array(path, mode="w") as A: + A.meta[randutf8s[i] + "{}".format(randints[i])] = int(randints[i]) + A.meta[randutf8s[i]] = randutf8s[i] # test data with tiledb.Array(path) as A: From 07bb76af986e02dd6717939a014ead35920b69ad Mon Sep 17 00:00:00 2001 From: Agisilaos Kounelis Date: Thu, 12 Dec 2024 13:13:26 +0200 Subject: [PATCH 5/6] Make assertion generic --- tiledb/tests/test_schema_evolution.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/tiledb/tests/test_schema_evolution.py b/tiledb/tests/test_schema_evolution.py index efadd0a741..877332d034 100644 --- a/tiledb/tests/test_schema_evolution.py +++ b/tiledb/tests/test_schema_evolution.py @@ -234,7 +234,10 @@ def test_schema_evolution_extend_check_bad_type(): reason="Dropping a fixed-sized attribute and adding it back" "as a var-sized attribute is not supported in TileDB < 2.27", ) -def test_schema_evolution_drop_fixed_attribute_and_add_back_as_var_sized(tmp_path): +@pytest.mark.parametrize("dtype_str", ["S", "U"]) +def test_schema_evolution_drop_fixed_attribute_and_add_back_as_var_sized( + tmp_path, dtype_str +): ctx = tiledb.default_ctx() uri = str(tmp_path) attrs = [ @@ -260,21 +263,23 @@ def test_schema_evolution_drop_fixed_attribute_and_add_back_as_var_sized(tmp_pat assert A.schema.attr("b").dtype == np.int32 se = tiledb.ArraySchemaEvolution(ctx) - newattr = tiledb.Attr("a", dtype="S", var=True) + newattr = tiledb.Attr("a", dtype=dtype_str, var=True) se.add_attribute(newattr) se.array_evolve(uri) # check schema and data after adding attribute back as a var-sized attribute with tiledb.open(uri) as A: assert A.schema.has_attr("a") - assert A.schema.attr("a").dtype == "S" + assert A.schema.attr("a").dtype == dtype_str assert A.schema.attr("b").dtype == np.int32 - # check that each value == b'\x80' (empty byte) - assert_array_equal(A[:]["a"], np.array([b"\x80" for _ in range(10)])) + # check that each value equals to the fill value of "a" attribute + assert_array_equal(A[:]["a"], np.array([newattr.fill] * 10, dtype=dtype_str)) + # check that nothing has changed for the "b" attribute + assert_array_equal(A[:]["b"], original_data) # add new data to the array new_data = np.array( - ["tiledb-string-n.{}".format(i) for i in range(1, 11)], dtype="S" + ["tiledb-string-n.{}".format(i) for i in range(1, 11)], dtype=dtype_str ) with tiledb.open(uri, "w") as A: A[:] = {"a": new_data, "b": original_data} From cabc3e8c75260a0580fd0b0a53f65208eebf241d Mon Sep 17 00:00:00 2001 From: Agisilaos Kounelis Date: Thu, 12 Dec 2024 13:23:47 +0200 Subject: [PATCH 6/6] Need 2.27 to avoid test-skipping --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index dfd75c62dc..6d8ed37ed9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -46,8 +46,8 @@ if (NOT TileDB_FOUND) message(STATUS "Downloading TileDB default version ...") # Download latest release fetch_prebuilt_tiledb( - VERSION 2.26.2 - RELLIST_HASH SHA256=86c19d7c5246cb18e370a4272cead63ea84bd651789842e618de4d57d4510522 + VERSION 2.27.0 + RELLIST_HASH SHA256=8056514b1949cdab19405376e32e299578491a6d3e953321d12d761f94dc19b9 ) endif() find_package(TileDB REQUIRED)