From e6ab05dabff026e826ca5bf542c603c1e6cf3b0e Mon Sep 17 00:00:00 2001 From: Agis Kounelis Date: Thu, 28 Mar 2024 14:06:42 +0200 Subject: [PATCH 01/10] Add pytest input parameter to only set the ordering timestamp for one variation --- tiledb/tests/test_fixes.py | 27 +-- tiledb/tests/test_fragments.py | 242 +++++++++++++++++---------- tiledb/tests/test_group.py | 33 ++-- tiledb/tests/test_libtiledb.py | 141 ++++++++++------ tiledb/tests/test_metadata.py | 60 ++++--- tiledb/tests/test_query_condition.py | 62 ++++--- 6 files changed, 363 insertions(+), 202 deletions(-) diff --git a/tiledb/tests/test_fixes.py b/tiledb/tests/test_fixes.py index f27f615720..52bb70fdde 100644 --- a/tiledb/tests/test_fixes.py +++ b/tiledb/tests/test_fixes.py @@ -258,7 +258,7 @@ class SOMA919Test(DiskTestCase): We've distilled @atolopko-czi's gist example using the TileDB-Py API directly. """ - def run_test(self): + def run_test(self, use_timestamps): import tempfile import numpy as np @@ -267,13 +267,17 @@ def run_test(self): root_uri = tempfile.mkdtemp() - # this tiledb.Ctx is how we set the write timestamps for tiledb.Group - group_ctx100 = tiledb.Ctx( - { - "sm.group.timestamp_start": 100, - "sm.group.timestamp_end": 100, - } - ) + if use_timestamps: + group_ctx100 = tiledb.Ctx( + { + "sm.group.timestamp_start": 100, + "sm.group.timestamp_end": 100, + } + ) + timestamp = 100 + else: + group_ctx100 = tiledb.Ctx() + timestamp = None # create the group and add a dummy subgroup "causes_bug" tiledb.Group.create(root_uri, ctx=group_ctx100) @@ -284,7 +288,7 @@ def run_test(self): # add an array to the group (in a separate write operation) with tiledb.Group(root_uri, mode="w", ctx=group_ctx100) as expt: df_path = os.path.join(root_uri, "df") - tiledb.from_numpy(df_path, np.ones((100, 100)), timestamp=100) + tiledb.from_numpy(df_path, np.ones((100, 100)), timestamp=timestamp) expt.add(name="df", uri=df_path) # check our view of the group at current time; @@ -301,12 +305,13 @@ def run_test(self): tiledb.libtiledb.version() < (2, 15, 0), reason="SOMA919 fix implemented in libtiledb 2.15", ) - def test_soma919(self): + @pytest.mark.parametrize("use_timestamps", [True, False]) + def test_soma919(self, use_timestamps): N = 100 fails = 0 for i in range(N): try: - self.run_test() + self.run_test(use_timestamps) except AssertionError: fails += 1 if fails > 0: diff --git a/tiledb/tests/test_fragments.py b/tiledb/tests/test_fragments.py index a3c3c25035..05439d7824 100644 --- a/tiledb/tests/test_fragments.py +++ b/tiledb/tests/test_fragments.py @@ -22,7 +22,8 @@ def test_uri_dne(self): with self.assertRaises(tiledb.TileDBError): tiledb.array_fragments("does_not_exist") - def test_array_fragments(self): + @pytest.mark.parametrize("use_timestamps", [True, False]) + def test_array_fragments(self, use_timestamps): fragments = 3 A = np.zeros(fragments) @@ -34,10 +35,15 @@ def test_array_fragments(self): tiledb.DenseArray.create(uri, schema) - for fragment_idx in range(fragments): - timestamp = fragment_idx + 1 - with tiledb.DenseArray(uri, mode="w", timestamp=timestamp) as T: - T[fragment_idx : fragment_idx + 1] = fragment_idx + if use_timestamps: + for fragment_idx in range(fragments): + timestamp = fragment_idx + 1 + with tiledb.DenseArray(uri, mode="w", timestamp=timestamp) as T: + T[fragment_idx : fragment_idx + 1] = fragment_idx + else: + for fragment_idx in range(fragments): + with tiledb.DenseArray(uri, mode="w") as T: + T[fragment_idx : fragment_idx + 1] = fragment_idx fi = tiledb.array_fragments(uri) @@ -47,7 +53,8 @@ def test_array_fragments(self): assert fi.has_consolidated_metadata == (False, False, False) assert fi.nonempty_domain == (((0, 0),), ((1, 1),), ((2, 2),)) assert fi.sparse == (False, False, False) - assert fi.timestamp_range == ((1, 1), (2, 2), (3, 3)) + if use_timestamps: # timestamps cannot be predicted if not used on write + assert fi.timestamp_range == ((1, 1), (2, 2), (3, 3)) assert fi.to_vacuum == () assert hasattr(fi, "version") # don't pin to a specific version @@ -56,7 +63,8 @@ def test_array_fragments(self): assert frag.has_consolidated_metadata is False assert frag.nonempty_domain == ((idx, idx),) assert frag.sparse is False - assert frag.timestamp_range == (idx + 1, idx + 1) + if use_timestamps: # timestamps cannot be predicted if not used on write + assert frag.timestamp_range == (idx + 1, idx + 1) assert hasattr(frag, "version") # don't pin to a specific version try: assert xml.etree.ElementTree.fromstring(frag._repr_html_()) is not None @@ -70,7 +78,8 @@ def test_array_fragments(self): except: pytest.fail(f"Could not parse fi._repr_html_(). Saw {fi._repr_html_()}") - def test_array_fragments_var(self): + @pytest.mark.parametrize("use_timestamps", [True, False]) + def test_array_fragments_var(self, use_timestamps): fragments = 3 uri = self.path("test_array_fragments_var") @@ -86,18 +95,28 @@ def test_array_fragments_var(self): tiledb.SparseArray.create(uri, schema) for fragment_idx in range(fragments): - timestamp = fragment_idx + 1 data = np.array( [ - np.array([timestamp] * 1, dtype=np.int32), - np.array([timestamp] * 2, dtype=np.int32), - np.array([timestamp] * 3, dtype=np.int32), + np.array( + [fragment_idx + 1] * 1, + dtype=np.int32, + ), + np.array( + [fragment_idx + 1] * 2, + dtype=np.int32, + ), + np.array( + [fragment_idx + 1] * 3, + dtype=np.int32, + ), ], dtype="O", ) - with tiledb.SparseArray(uri, mode="w", timestamp=timestamp) as T: + with tiledb.SparseArray( + uri, mode="w", timestamp=fragment_idx + 1 if use_timestamps else None + ) as T: T[["zero", "one", "two"]] = data fragments_info = tiledb.array_fragments(uri) @@ -110,7 +129,8 @@ def test_array_fragments_var(self): for frag in fragments_info: self.assertEqual(frag.nonempty_domain, (("one", "zero"),)) - def test_dense_fragments(self): + @pytest.mark.parametrize("use_timestamps", [True, False]) + def test_dense_fragments(self, use_timestamps): fragments = 3 A = np.zeros(fragments) @@ -123,45 +143,48 @@ def test_dense_fragments(self): tiledb.DenseArray.create(uri, schema) for fragment_idx in range(fragments): - timestamp = fragment_idx + 1 + timestamp = fragment_idx + 1 if use_timestamps else None with tiledb.DenseArray(uri, mode="w", timestamp=timestamp) as T: T[fragment_idx : fragment_idx + 1] = fragment_idx fragment_info = PyFragmentInfo(uri, schema, False, tiledb.default_ctx()) self.assertEqual(fragment_info.get_num_fragments(), fragment_idx + 1) - all_expected_uris = [] - for fragment_idx in range(fragments): - timestamp = fragment_idx + 1 + if use_timestamps: # asserts are not predictable without timestamps + all_expected_uris = [] + for fragment_idx in range(fragments): + timestamp = fragment_idx + 1 - self.assertEqual( - fragment_info.get_timestamp_range()[fragment_idx], - (timestamp, timestamp), - ) + self.assertEqual( + fragment_info.get_timestamp_range()[fragment_idx], + (timestamp, timestamp), + ) - expected_uri = f"__{timestamp}_{timestamp}" - actual_uri = fragment_info.get_uri()[fragment_idx] + expected_uri = f"__{timestamp}_{timestamp}" + actual_uri = fragment_info.get_uri()[fragment_idx] - all_expected_uris.append(expected_uri) + all_expected_uris.append(expected_uri) - # use .contains because the protocol can vary - self.assertTrue(expected_uri in actual_uri) - self.assertTrue( - actual_uri.endswith(str(fragment_info.get_version()[fragment_idx])) - ) - self.assertFalse(fragment_info.get_sparse()[fragment_idx]) + self.assertTrue(expected_uri in actual_uri) + self.assertTrue( + actual_uri.endswith(str(fragment_info.get_version()[fragment_idx])) + ) + self.assertFalse(fragment_info.get_sparse()[fragment_idx]) - all_actual_uris = fragment_info.get_uri() - for actual_uri, expected_uri in zip(all_actual_uris, all_expected_uris): - self.assertTrue(expected_uri in actual_uri) - self.assertTrue( - actual_uri.endswith(str(fragment_info.get_version()[fragment_idx])) - ) + all_actual_uris = fragment_info.get_uri() + for actual_uri, expected_uri in zip(all_actual_uris, all_expected_uris): + self.assertTrue(expected_uri in actual_uri) + self.assertTrue( + actual_uri.endswith(str(fragment_info.get_version()[fragment_idx])) + ) - self.assertEqual(fragment_info.get_timestamp_range(), ((1, 1), (2, 2), (3, 3))) - self.assertEqual(fragment_info.get_sparse(), (False, False, False)) + self.assertEqual( + fragment_info.get_timestamp_range(), ((1, 1), (2, 2), (3, 3)) + ) + self.assertEqual(fragment_info.get_sparse(), (False, False, False)) - def test_sparse_fragments(self): + @pytest.mark.parametrize("use_timestamps", [True, False]) + def test_sparse_fragments(self, use_timestamps): fragments = 3 A = np.zeros(fragments) @@ -174,45 +197,48 @@ def test_sparse_fragments(self): tiledb.SparseArray.create(uri, schema) for fragment_idx in range(fragments): - timestamp = fragment_idx + 1 + timestamp = fragment_idx + 1 if use_timestamps else None with tiledb.SparseArray(uri, mode="w", timestamp=timestamp) as T: T[fragment_idx] = fragment_idx fragment_info = PyFragmentInfo(uri, schema, False, tiledb.default_ctx()) self.assertEqual(fragment_info.get_num_fragments(), fragment_idx + 1) - all_expected_uris = [] - for fragment_idx in range(fragments): - timestamp = fragment_idx + 1 + if use_timestamps: # asserts are not predictable without timestamps + all_expected_uris = [] + for fragment_idx in range(fragments): + timestamp = fragment_idx + 1 - self.assertEqual( - fragment_info.get_timestamp_range()[fragment_idx], - (timestamp, timestamp), - ) + self.assertEqual( + fragment_info.get_timestamp_range()[fragment_idx], + (timestamp, timestamp), + ) - if uri[0] != "/": - uri = "/" + uri.replace("\\", "/") + if uri[0] != "/": + uri = "/" + uri.replace("\\", "/") - expected_uri = f"/__{timestamp}_{timestamp}" - actual_uri = fragment_info.get_uri()[fragment_idx] + expected_uri = f"/__{timestamp}_{timestamp}" + actual_uri = fragment_info.get_uri()[fragment_idx] - all_expected_uris.append(expected_uri) + all_expected_uris.append(expected_uri) - self.assertTrue(expected_uri in actual_uri) - self.assertTrue( - actual_uri.endswith(str(fragment_info.get_version()[fragment_idx])) - ) - self.assertTrue(fragment_info.get_sparse()[fragment_idx]) + self.assertTrue(expected_uri in actual_uri) + self.assertTrue( + actual_uri.endswith(str(fragment_info.get_version()[fragment_idx])) + ) + self.assertTrue(fragment_info.get_sparse()[fragment_idx]) - all_actual_uris = fragment_info.get_uri() - for actual_uri, expected_uri in zip(all_actual_uris, all_expected_uris): - self.assertTrue(expected_uri in actual_uri) - self.assertTrue( - actual_uri.endswith(str(fragment_info.get_version()[fragment_idx])) - ) + all_actual_uris = fragment_info.get_uri() + for actual_uri, expected_uri in zip(all_actual_uris, all_expected_uris): + self.assertTrue(expected_uri in actual_uri) + self.assertTrue( + actual_uri.endswith(str(fragment_info.get_version()[fragment_idx])) + ) - self.assertEqual(fragment_info.get_timestamp_range(), ((1, 1), (2, 2), (3, 3))) - self.assertEqual(fragment_info.get_sparse(), (True, True, True)) + self.assertEqual( + fragment_info.get_timestamp_range(), ((1, 1), (2, 2), (3, 3)) + ) + self.assertEqual(fragment_info.get_sparse(), (True, True, True)) def test_nonempty_domain(self): uri = self.path("test_nonempty_domain") @@ -408,7 +434,8 @@ def test_fragments_to_vacuum(self): "tiledb.libtiledb.version() < (2, 5, 0)" ), ) - def test_get_mbr(self): + @pytest.mark.parametrize("use_timestamps", [True, False]) + def test_get_mbr(self, use_timestamps): fragments = 3 uri = self.path("test_get_mbr") @@ -419,7 +446,9 @@ def test_get_mbr(self): for fragi in range(fragments): timestamp = fragi + 1 - with tiledb.open(uri, mode="w", timestamp=timestamp) as T: + with tiledb.open( + uri, mode="w", timestamp=timestamp if use_timestamps else None + ) as T: T[np.array(range(0, fragi + 1))] = [fragi] * (fragi + 1) expected_mbrs = ((((0, 0),),), (((0, 1),),), (((0, 2),),)) @@ -453,7 +482,8 @@ def test_get_mbr(self): "tiledb.libtiledb.version() < (2, 5, 0)" ), ) - def test_get_var_sized_dim_mbrs(self): + @pytest.mark.parametrize("use_timestamps", [True, False]) + def test_get_var_sized_dim_mbrs(self, use_timestamps): fragments = 3 uri = self.path("test_get_var_sized_dim_mbrs") @@ -464,7 +494,9 @@ def test_get_var_sized_dim_mbrs(self): for fragi in range(fragments): timestamp = fragi + 1 - with tiledb.open(uri, mode="w", timestamp=timestamp) as T: + with tiledb.open( + uri, mode="w", timestamp=timestamp if use_timestamps else None + ) as T: coords = [chr(i) * (fragi + 1) for i in range(97, fragi + 98)] T[np.array(coords)] = [fragi] * (fragi + 1) @@ -497,7 +529,8 @@ class CreateArrayFromFragmentsTest(DiskTestCase): @pytest.mark.skipif( sys.platform == "win32", reason="VFS.copy() does not run on windows" ) - def test_create_array_from_fragments(self): + @pytest.mark.parametrize("use_timestamps", [True, False]) + def test_create_array_from_fragments(self, use_timestamps): dshape = (1, 3) num_frags = 10 @@ -509,7 +542,9 @@ def create_array(target_path, dshape): def write_fragments(target_path, dshape, num_frags): for i in range(1, num_frags + 1): - with tiledb.open(target_path, "w", timestamp=i) as A: + with tiledb.open( + target_path, "w", timestamp=i if use_timestamps else None + ) as A: A[[1, 2, 3]] = np.random.rand(dshape[1]) src_path = self.path("test_create_array_from_fragments_src") @@ -521,13 +556,22 @@ def write_fragments(target_path, dshape, num_frags): write_fragments(src_path, dshape, num_frags) frags = tiledb.FragmentInfoList(src_path) assert len(frags) == 10 - assert frags.timestamp_range == ts - - tiledb.create_array_from_fragments(src_path, dst_path, (3, 6)) + if use_timestamps: + assert frags.timestamp_range == ts + + if use_timestamps: + tiledb.create_array_from_fragments(src_path, dst_path, (3, 6)) + else: + tiledb.create_array_from_fragments( + src_path, + dst_path, + (frags.timestamp_range[2][0], frags.timestamp_range[5][1]), + ) frags = tiledb.FragmentInfoList(dst_path) assert len(frags) == 4 - assert frags.timestamp_range == ts[2:6] + if use_timestamps: + assert frags.timestamp_range == ts[2:6] class CopyFragmentsToExistingArrayTest(DiskTestCase): @@ -631,7 +675,8 @@ def write_fragments(target_path): class DeleteFragmentsTest(DiskTestCase): - def test_delete_fragments(self): + @pytest.mark.parametrize("use_timestamps", [True, False]) + def test_delete_fragments(self, use_timestamps): dshape = (1, 3) num_writes = 10 @@ -643,7 +688,9 @@ def create_array(target_path, dshape): def write_fragments(target_path, dshape, num_writes): for i in range(1, num_writes + 1): - with tiledb.open(target_path, "w", timestamp=i) as A: + with tiledb.open( + target_path, "w", timestamp=i if use_timestamps else None + ) as A: A[[1, 2, 3]] = np.random.rand(dshape[1]) path = self.path("test_delete_fragments") @@ -654,16 +701,24 @@ def write_fragments(target_path, dshape, num_writes): write_fragments(path, dshape, num_writes) frags = tiledb.array_fragments(path) assert len(frags) == 10 - assert frags.timestamp_range == ts + if use_timestamps: + assert frags.timestamp_range == ts with tiledb.open(path, "m") as A: - A.delete_fragments(3, 6) + if use_timestamps: + A.delete_fragments(3, 6) + else: + A.delete_fragments( + frags.timestamp_range[2][0], frags.timestamp_range[5][1] + ) frags = tiledb.array_fragments(path) assert len(frags) == 6 - assert frags.timestamp_range == ts[:2] + ts[6:] + if use_timestamps: + assert frags.timestamp_range == ts[:2] + ts[6:] - def test_delete_fragments_with_schema_evolution(self): + @pytest.mark.parametrize("use_timestamps", [True, False]) + def test_delete_fragments_with_schema_evolution(self, use_timestamps): path = self.path("test_delete_fragments_with_schema_evolution") dshape = (1, 3) @@ -673,8 +728,12 @@ def test_delete_fragments_with_schema_evolution(self): tiledb.libtiledb.Array.create(path, schema) ts1_data = np.random.rand(3) - with tiledb.open(path, "w", timestamp=1) as A: - A[[1, 2, 3]] = ts1_data + if use_timestamps: + with tiledb.open(path, "w", timestamp=1) as A: + A[[1, 2, 3]] = ts1_data + else: + with tiledb.open(path, "w") as A: + A[[1, 2, 3]] = ts1_data ctx = tiledb.default_ctx() se = tiledb.ArraySchemaEvolution(ctx) @@ -682,8 +741,12 @@ def test_delete_fragments_with_schema_evolution(self): se.array_evolve(path) ts2_data = np.random.rand(3) - with tiledb.open(path, "w", timestamp=2) as A: - A[[1, 2, 3]] = {"a1": ts2_data, "a2": ts2_data} + if use_timestamps: + with tiledb.open(path, "w", timestamp=2) as A: + A[[1, 2, 3]] = {"a1": ts2_data, "a2": ts2_data} + else: + with tiledb.open(path, "w") as A: + A[[1, 2, 3]] = {"a1": ts2_data, "a2": ts2_data} frags = tiledb.array_fragments(path) assert len(frags) == 2 @@ -693,7 +756,12 @@ def test_delete_fragments_with_schema_evolution(self): assert_array_equal(A[:]["a2"], ts2_data) with tiledb.open(path, "m") as A: - A.delete_fragments(2, 2) + if use_timestamps: + A.delete_fragments(2, 2) + else: + A.delete_fragments( + frags.timestamp_range[1][0], frags.timestamp_range[1][1] + ) frags = tiledb.array_fragments(path) assert len(frags) == 1 diff --git a/tiledb/tests/test_group.py b/tiledb/tests/test_group.py index 7466c1d461..9c06a11354 100644 --- a/tiledb/tests/test_group.py +++ b/tiledb/tests/test_group.py @@ -117,7 +117,10 @@ def test_move_group(self): ), ), ) - def test_group_metadata(self, int_data, flt_data, str_data, str_type, capfd): + @pytest.mark.parametrize("use_timestamps", [True, False]) + def test_group_metadata( + self, int_data, flt_data, str_data, str_type, capfd, use_timestamps + ): def values_equal(lhs, rhs): if isinstance(lhs, np.ndarray): if not isinstance(rhs, np.ndarray): @@ -133,13 +136,13 @@ def values_equal(lhs, rhs): grp_path = self.path("test_group_metadata") tiledb.Group.create(grp_path) - cfg = tiledb.Config({"sm.group.timestamp_end": 1}) + cfg = tiledb.Config({"sm.group.timestamp_end": 1} if use_timestamps else {}) with tiledb.Group(grp_path, "w", cfg) as grp: grp.meta["int"] = int_data grp.meta["flt"] = flt_data grp.meta["str"] = str_data - cfg = tiledb.Config({"sm.group.timestamp_end": 1}) + cfg = tiledb.Config({"sm.group.timestamp_end": 1} if use_timestamps else {}) with tiledb.Group(grp_path, "r", cfg) as grp: assert len(grp.meta) == 3 assert "int" in grp.meta @@ -156,11 +159,11 @@ def values_equal(lhs, rhs): assert "Type: DataType.INT" in metadata_dump assert f"Type: DataType.{str_type}" in metadata_dump - cfg = tiledb.Config({"sm.group.timestamp_end": 2}) + cfg = tiledb.Config({"sm.group.timestamp_end": 2} if use_timestamps else {}) with tiledb.Group(grp_path, "w", cfg) as grp: del grp.meta["int"] - cfg = tiledb.Config({"sm.group.timestamp_end": 2}) + cfg = tiledb.Config({"sm.group.timestamp_end": 2} if use_timestamps else {}) with tiledb.Group(grp_path, "r", cfg) as grp: assert len(grp.meta) == 2 assert "int" not in grp.meta @@ -356,7 +359,8 @@ class GroupMetadataTest(GroupTestCase): (np.array([1, 2, 3]), np.array([1.5, 2.5, 3.5]), np.array(["x"])), ), ) - def test_group_metadata(self, int_data, flt_data, str_data): + @pytest.mark.parametrize("use_timestamps", [True, False]) + def test_group_metadata(self, int_data, flt_data, str_data, use_timestamps): def values_equal(lhs, rhs): if isinstance(lhs, np.ndarray): if not isinstance(rhs, np.ndarray): @@ -372,13 +376,13 @@ def values_equal(lhs, rhs): grp_path = self.path("test_group_metadata") tiledb.Group.create(grp_path) - cfg = tiledb.Config({"sm.group.timestamp_end": 1}) + cfg = tiledb.Config({"sm.group.timestamp_end": 1} if use_timestamps else {}) with tiledb.Group(grp_path, "w", cfg) as grp: grp.meta["int"] = int_data grp.meta["flt"] = flt_data grp.meta["str"] = str_data - cfg = tiledb.Config({"sm.group.timestamp_end": 1}) + cfg = tiledb.Config({"sm.group.timestamp_end": 1} if use_timestamps else {}) with tiledb.Group(grp_path, "r", cfg) as grp: assert grp.meta.keys() == {"int", "flt", "str"} assert len(grp.meta) == 3 @@ -389,11 +393,11 @@ def values_equal(lhs, rhs): assert "str" in grp.meta assert values_equal(grp.meta["str"], str_data) - cfg = tiledb.Config({"sm.group.timestamp_end": 2}) + cfg = tiledb.Config({"sm.group.timestamp_end": 2} if use_timestamps else {}) with tiledb.Group(grp_path, "w", cfg) as grp: del grp.meta["int"] - cfg = tiledb.Config({"sm.group.timestamp_end": 2}) + cfg = tiledb.Config({"sm.group.timestamp_end": 2} if use_timestamps else {}) with tiledb.Group(grp_path, "r", cfg) as grp: assert len(grp.meta) == 2 assert "int" not in grp.meta @@ -570,20 +574,21 @@ def test_basic(self, test_vals): self.assert_metadata_roundtrip(grp.meta, test_vals) grp.close() - def test_consolidation_and_vac(self): + @pytest.mark.parametrize("use_timestamps", [True, False]) + def test_consolidation_and_vac(self, use_timestamps): vfs = tiledb.VFS() path = self.path("test_consolidation_and_vac") tiledb.Group.create(path) - cfg = tiledb.Config({"sm.group.timestamp_end": 1}) + cfg = tiledb.Config({"sm.group.timestamp_end": 1} if use_timestamps else {}) with tiledb.Group(path, "w", cfg) as grp: grp.meta["meta"] = 1 - cfg = tiledb.Config({"sm.group.timestamp_end": 2}) + cfg = tiledb.Config({"sm.group.timestamp_end": 2} if use_timestamps else {}) with tiledb.Group(path, "w", cfg) as grp: grp.meta["meta"] = 2 - cfg = tiledb.Config({"sm.group.timestamp_end": 3}) + cfg = tiledb.Config({"sm.group.timestamp_end": 3} if use_timestamps else {}) with tiledb.Group(path, "w", cfg) as grp: grp.meta["meta"] = 3 diff --git a/tiledb/tests/test_libtiledb.py b/tiledb/tests/test_libtiledb.py index 97ebc3e667..b9797aadaa 100644 --- a/tiledb/tests/test_libtiledb.py +++ b/tiledb/tests/test_libtiledb.py @@ -337,7 +337,8 @@ def test_upgrade_version(self): with tiledb.open(path) as A: assert A.schema.version >= 15 - def test_array_delete_fragments(self): + @pytest.mark.parametrize("use_timestamps", [True, False]) + def test_array_delete_fragments(self, use_timestamps): dshape = (1, 3) num_writes = 10 @@ -349,7 +350,9 @@ def create_array(target_path, dshape): def write_fragments(target_path, dshape, num_writes): for i in range(1, num_writes + 1): - with tiledb.open(target_path, "w", timestamp=i) as A: + with tiledb.open( + target_path, "w", timestamp=i if use_timestamps else None + ) as A: A[[1, 2, 3]] = np.random.rand(dshape[1]) path = self.path("test_array_delete_fragments") @@ -360,14 +363,21 @@ def write_fragments(target_path, dshape, num_writes): write_fragments(path, dshape, num_writes) frags = tiledb.array_fragments(path) assert len(frags) == 10 - assert frags.timestamp_range == ts + if use_timestamps: + assert frags.timestamp_range == ts - with tiledb.open(path, "m") as arr: - arr.delete_fragments(3, 6) + if use_timestamps: + with tiledb.open(path, "m") as arr: + arr.delete_fragments(3, 6) + else: + timestamps = [t[0] for t in tiledb.array_fragments(path).timestamp_range] + with tiledb.open(path, "m") as arr: + arr.delete_fragments(timestamps[2], timestamps[5]) frags = tiledb.array_fragments(path) assert len(frags) == 6 - assert frags.timestamp_range == ts[:2] + ts[6:] + if use_timestamps: + assert frags.timestamp_range == ts[:2] + ts[6:] def test_array_delete(self): uri = self.path("test_array_delete") @@ -802,7 +812,8 @@ def test_ncell_int(self): assert_array_equal(T, R) assert_array_equal(T, R.multi_index[0:2][""]) - def test_open_with_timestamp(self): + @pytest.mark.parametrize("use_timestamps", [True, False]) + def test_open_with_timestamp(self, use_timestamps): A = np.zeros(3) dom = tiledb.Domain(tiledb.Dim(domain=(0, 2), tile=3, dtype=np.int64)) @@ -821,8 +832,9 @@ def test_open_with_timestamp(self): self.assertEqual(T[1], 0) self.assertEqual(T[2], 0) - # sleep 200ms and write - time.sleep(0.2) + if use_timestamps: + # sleep 200ms and write + time.sleep(0.2) with tiledb.DenseArray(self.path("foo"), mode="w") as T: T[0:1] = 1 @@ -831,8 +843,9 @@ def test_open_with_timestamp(self): read2_timestamp = T.timestamp_range self.assertTrue(read2_timestamp > read1_timestamp) - # sleep 200ms and write - time.sleep(0.2) + if use_timestamps: + # sleep 200ms and write + time.sleep(0.2) with tiledb.DenseArray(self.path("foo"), mode="w") as T: T[1:2] = 2 @@ -865,7 +878,8 @@ def test_open_with_timestamp(self): self.assertEqual(T[1], 2) self.assertEqual(T[2], 0) - def test_open_timestamp_range(self): + @pytest.mark.parametrize("use_timestamps", [True, False]) + def test_open_timestamp_range(self, use_timestamps): A = np.zeros(3) path = self.path("open_timestamp_range") @@ -875,33 +889,42 @@ def test_open_timestamp_range(self): tiledb.DenseArray.create(path, schema) # write - with tiledb.DenseArray(path, timestamp=1, mode="w") as T: - T[:] = A * 1 - with tiledb.DenseArray(path, timestamp=2, mode="w") as T: - T[:] = A * 2 - with tiledb.DenseArray(path, timestamp=3, mode="w") as T: - T[:] = A * 3 - with tiledb.DenseArray(path, timestamp=4, mode="w") as T: - T[:] = A * 4 + if use_timestamps: + with tiledb.DenseArray(path, mode="w", timestamp=1) as T: + T[:] = A * 1 + with tiledb.DenseArray(path, mode="w", timestamp=2) as T: + T[:] = A * 2 + with tiledb.DenseArray(path, mode="w", timestamp=3) as T: + T[:] = A * 3 + with tiledb.DenseArray(path, mode="w", timestamp=4) as T: + T[:] = A * 4 + else: + with tiledb.DenseArray(path, mode="w") as T: + T[:] = A * 1 + T[:] = A * 2 + T[:] = A * 3 + T[:] = A * 4 def assert_ts(timestamp, result): with tiledb.DenseArray(path, mode="r", timestamp=timestamp) as T: assert_array_equal(T, result) + timestamps = [t[0] for t in tiledb.array_fragments(path).timestamp_range] + assert_ts(0, A * np.nan) - assert_ts(1, A * 1) - assert_ts(2, A * 2) - assert_ts(3, A * 3) - assert_ts((1, 2), A * 2) - assert_ts((0, 3), A * 3) - assert_ts((1, 3), A * 3) - assert_ts((2, 3), A * 3) - assert_ts((2, 4), A * 3) - assert_ts((None, 2), A * 2) - assert_ts((None, 3), A * 3) - assert_ts((2, None), A * 3) - assert_ts((3, None), A * 3) - assert_ts((3, None), A * 3) + assert_ts(timestamps[0], A * 1) + assert_ts(timestamps[1], A * 2) + assert_ts(timestamps[2], A * 3) + assert_ts((timestamps[0], timestamps[1]), A * 2) + assert_ts((0, timestamps[2]), A * 3) + assert_ts((timestamps[0], timestamps[2]), A * 3) + assert_ts((timestamps[1], timestamps[2]), A * 3) + assert_ts((timestamps[1], timestamps[3]), A * 3) + assert_ts((None, timestamps[1]), A * 2) + assert_ts((None, timestamps[2]), A * 3) + assert_ts((timestamps[1], None), A * 3) + assert_ts((timestamps[2], None), A * 3) + assert_ts((timestamps[2], None), A * 3) def test_open_attr(self): uri = self.path("test_open_attr") @@ -1174,7 +1197,8 @@ def __setitem__(self, s, v): ): D[np.array([1, 2]), np.array([0, 0])] = np.array([0, 2]) - def test_reopen_dense_array(self): + @pytest.mark.parametrize("use_timestamps", [True, False]) + def test_reopen_dense_array(self, use_timestamps): uri = self.path("test_reopen_dense_array") dom = tiledb.Domain(tiledb.Dim(domain=(0, 9), tile=10, dtype=np.int64)) @@ -1184,13 +1208,18 @@ def test_reopen_dense_array(self): data = np.arange(0, 10, dtype=np.int64) - with tiledb.DenseArray(uri, mode="w", timestamp=1) as T: - T[:] = data - - with tiledb.DenseArray(uri, mode="w", timestamp=2) as T: - T[:] = data * 2 + if use_timestamps: + with tiledb.DenseArray(uri, mode="w", timestamp=1) as T: + T[:] = data + with tiledb.DenseArray(uri, mode="w", timestamp=2) as T: + T[:] = data * 2 + else: + with tiledb.DenseArray(uri, mode="w") as T: + T[:] = data + T[:] = data * 2 - T = tiledb.DenseArray(uri, mode="r", timestamp=1) + timestamps = [t[0] for t in tiledb.array_fragments(uri).timestamp_range] + T = tiledb.DenseArray(uri, mode="r", timestamp=timestamps[0]) assert_array_equal(T[:], data) T.reopen() @@ -2762,7 +2791,8 @@ def test_pickle_with_config(self): T2.close() @pytest.mark.parametrize("sparse", [True, False]) - def test_pickle_with_tuple_timestamps(self, sparse): + @pytest.mark.parametrize("use_timestamps", [True, False]) + def test_pickle_with_tuple_timestamps(self, sparse, use_timestamps): A = np.random.randint(10, size=3) path = self.path("test_pickle_with_tuple_timestamps") @@ -2772,13 +2802,16 @@ def test_pickle_with_tuple_timestamps(self, sparse): tiledb.libtiledb.Array.create(path, schema) for ts in range(1, 5): - with tiledb.open(path, timestamp=ts, mode="w") as T: + with tiledb.open( + path, timestamp=ts if use_timestamps else None, mode="w" + ) as T: if sparse: T[[0, 1, 2]] = A * ts else: T[:] = A * ts - with tiledb.open(path, timestamp=(2, 3), mode="r") as T: + timestamps = [t[0] for t in tiledb.array_fragments(path).timestamp_range] + with tiledb.open(path, timestamp=(timestamps[1], timestamps[2]), mode="r") as T: with io.BytesIO() as buf: pickle.dump(T, buf) buf.seek(0) @@ -2787,9 +2820,11 @@ def test_pickle_with_tuple_timestamps(self, sparse): assert_array_equal(T[:][""], T2[:][""]) else: assert_array_equal(T[:], T2[:]) - assert T2.timestamp_range == (2, 3) + assert T2.timestamp_range == (timestamps[1], timestamps[2]) - with io.BytesIO() as buf, tiledb.open(path, timestamp=(2, 3)) as V: + with io.BytesIO() as buf, tiledb.open( + path, timestamp=(timestamps[1], timestamps[2]) + ) as V: pickle.dump(V, buf) buf.seek(0) with pickle.load(buf) as V2: @@ -2798,7 +2833,7 @@ def test_pickle_with_tuple_timestamps(self, sparse): assert_array_equal(V[:][""], V2[:][""]) else: assert_array_equal(V[:], V2[:]) - assert V2.timestamp_range == (2, 3) + assert V2.timestamp_range == (timestamps[1], timestamps[2]) class ArrayViewTest(DiskTestCase): @@ -3208,7 +3243,8 @@ def write_fragments(target_path, dshape, num_writes): tiledb.vacuum(path) assert len(tiledb.array_fragments(path)) == 3 - def test_array_consolidate_with_uris(self): + @pytest.mark.parametrize("use_timestamps", [True, False]) + def test_array_consolidate_with_uris(self, use_timestamps): dshape = (1, 3) num_writes = 10 @@ -3220,7 +3256,9 @@ def create_array(target_path, dshape): def write_fragments(target_path, dshape, num_writes): for i in range(1, num_writes + 1): - with tiledb.open(target_path, "w", timestamp=i) as A: + with tiledb.open( + target_path, "w", timestamp=i if use_timestamps else None + ) as A: A[[1, 2, 3]] = np.random.rand(dshape[1]) path = self.path("test_array_consolidate_with_uris") @@ -3243,7 +3281,12 @@ def write_fragments(target_path, dshape, num_writes): "passed to `fragment_uris` will be consolidate" ), ): - tiledb.consolidate(path, fragment_uris=frag_names[4:8], timestamp=(9, 10)) + timestamps = [t[0] for t in tiledb.array_fragments(path).timestamp_range] + tiledb.consolidate( + path, + fragment_uris=frag_names[4:8], + timestamp=(timestamps[5], timestamps[6]), + ) assert len(tiledb.array_fragments(path)) == 4 diff --git a/tiledb/tests/test_metadata.py b/tiledb/tests/test_metadata.py index 4f704045dc..0b13073abe 100644 --- a/tiledb/tests/test_metadata.py +++ b/tiledb/tests/test_metadata.py @@ -189,7 +189,8 @@ def test_basic(self, test_vals): @given(st_metadata, st_ndarray) @settings(deadline=None) - def test_numpy(self, test_vals, ndarray): + @pytest.mark.parametrize("use_timestamps", [True, False]) + def test_numpy(self, use_timestamps, test_vals, ndarray): test_vals["ndarray"] = ndarray path = self.path() @@ -202,8 +203,9 @@ def test_numpy(self, test_vals, ndarray): with tiledb.Array(path) as A: self.assert_metadata_roundtrip(A.meta, test_vals) - # test resetting a key with a ndarray value to a non-ndarray value - time.sleep(0.001) + if use_timestamps: + # test resetting a key with a ndarray value to a non-ndarray value + time.sleep(0.001) with tiledb.Array(path, "w") as A: A.meta["ndarray"] = 42 test_vals["ndarray"] = 42 @@ -219,8 +221,9 @@ def test_numpy(self, test_vals, ndarray): with tiledb.Array(path) as A: self.assert_metadata_roundtrip(A.meta, test_vals) - # test del ndarray key - time.sleep(0.001) + if use_timestamps: + # test del ndarray key + time.sleep(0.001) with tiledb.Array(path, "w") as A: del A.meta["ndarray"] del test_vals["ndarray"] @@ -228,8 +231,9 @@ def test_numpy(self, test_vals, ndarray): with tiledb.Array(path) as A: self.assert_metadata_roundtrip(A.meta, test_vals) - # test update - time.sleep(0.001) + if use_timestamps: + # test update + time.sleep(0.001) with tiledb.Array(path, mode="w") as A: test_vals.update(ndarray=np.stack([ndarray, ndarray]), transp=ndarray.T) A.meta.update(ndarray=np.stack([ndarray, ndarray]), transp=ndarray.T) @@ -241,7 +245,8 @@ def test_numpy(self, test_vals, ndarray): @tiledb.scope_ctx( {"sm.vacuum.mode": "array_meta", "sm.consolidation.mode": "array_meta"} ) - def test_consecutive(self): + @pytest.mark.parametrize("use_timestamps", [True, False]) + def test_consecutive(self, use_timestamps): vfs = tiledb.VFS() path = self.path("test_md_consecutive") @@ -254,11 +259,17 @@ def test_consecutive(self): randutf8s = [rand_utf8(i) for i in np.random.randint(1, 30, size=write_count)] # write 100 times, then consolidate - for i in range(write_count): - with tiledb.Array(path, mode="w") as A: - A.meta["randint"] = int(randints[i]) - A.meta["randutf8"] = randutf8s[i] - time.sleep(0.001) + if use_timestamps: + for i in range(write_count): + with tiledb.Array(path, mode="w") as A: + A.meta["randint"] = int(randints[i]) + A.meta["randutf8"] = randutf8s[i] + time.sleep(0.001) + else: + for i in range(write_count): + with tiledb.Array(path, mode="w") as A: + A.meta["randint"] = int(randints[i]) + A.meta["randutf8"] = randutf8s[i] self.assertEqual(len(vfs.ls(os.path.join(path, "__meta"))), 100) @@ -285,12 +296,23 @@ def test_consecutive(self): self.assertEqual(A.meta["randutf8"], randutf8s[-1]) # use randutf8s as keys, then consolidate - for _ in range(2): - for i in range(write_count): - with tiledb.Array(path, mode="w") as A: - A.meta[randutf8s[i] + "{}".format(randints[i])] = int(randints[i]) - A.meta[randutf8s[i]] = randutf8s[i] - time.sleep(0.001) + if use_timestamps: + for _ in range(2): + for i in range(write_count): + with tiledb.Array(path, mode="w") as A: + A.meta[randutf8s[i] + "{}".format(randints[i])] = int( + randints[i] + ) + A.meta[randutf8s[i]] = randutf8s[i] + time.sleep(0.001) + else: + for _ in range(2): + for i in range(write_count): + with tiledb.Array(path, mode="w") as A: + A.meta[randutf8s[i] + "{}".format(randints[i])] = int( + randints[i] + ) + A.meta[randutf8s[i]] = randutf8s[i] # test data with tiledb.Array(path) as A: diff --git a/tiledb/tests/test_query_condition.py b/tiledb/tests/test_query_condition.py index 93468d8d5b..f8222fa4b4 100644 --- a/tiledb/tests/test_query_condition.py +++ b/tiledb/tests/test_query_condition.py @@ -1019,7 +1019,8 @@ def test_basic_dense(self): ): A.query() - def test_with_fragments(self): + @pytest.mark.parametrize("use_timestamps", [True, False]) + def test_with_fragments(self, use_timestamps): path = self.path("test_with_fragments") dom = tiledb.Domain(tiledb.Dim(domain=(1, 3), tile=1)) @@ -1027,28 +1028,36 @@ def test_with_fragments(self): schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=True) tiledb.Array.create(path, schema) - with tiledb.open(path, "w", timestamp=1) as A: - A[1] = 1 + if use_timestamps: + with tiledb.open(path, "w", timestamp=1) as A: + A[1] = 1 - with tiledb.open(path, "w", timestamp=2) as A: - A[2] = 2 + with tiledb.open(path, "w", timestamp=2) as A: + A[2] = 2 - with tiledb.open(path, "w", timestamp=3) as A: - A[3] = 3 + with tiledb.open(path, "w", timestamp=3) as A: + A[3] = 3 + else: + with tiledb.open(path, "w") as A: + A[1] = 1 + A[2] = 2 + A[3] = 3 with tiledb.open(path, "r") as A: assert_array_equal([1, 2, 3], A[:]["ints"]) - with tiledb.open(path, "d", timestamp=3) as A: + timestamps = [t[0] for t in tiledb.array_fragments(path).timestamp_range] + + with tiledb.open(path, "d", timestamp=timestamps[2]) as A: A.query(cond="ints == 1").submit() - with tiledb.open(path, "r", timestamp=1) as A: + with tiledb.open(path, "r", timestamp=timestamps[0]) as A: assert_array_equal([1], A[:]["ints"]) - with tiledb.open(path, "r", timestamp=2) as A: + with tiledb.open(path, "r", timestamp=timestamps[1]) as A: assert_array_equal([1, 2], A[:]["ints"]) - with tiledb.open(path, "r", timestamp=3) as A: + with tiledb.open(path, "r", timestamp=timestamps[2]) as A: assert_array_equal([2, 3], A[:]["ints"]) assert len(tiledb.array_fragments(path)) == 3 @@ -1062,7 +1071,8 @@ def test_with_fragments(self): assert A.nonempty_domain() == ((1, 3),) assert_array_equal([2, 3], A[:]["ints"]) - def test_purge_deleted_cells(self): + @pytest.mark.parametrize("use_timestamps", [True, False]) + def test_purge_deleted_cells(self, use_timestamps): path = self.path("test_with_fragments") dom = tiledb.Domain(tiledb.Dim(domain=(1, 3), tile=1)) @@ -1070,28 +1080,36 @@ def test_purge_deleted_cells(self): schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=True) tiledb.Array.create(path, schema) - with tiledb.open(path, "w", timestamp=1) as A: - A[1] = 1 + if use_timestamps: + with tiledb.open(path, "w", timestamp=1) as A: + A[1] = 1 - with tiledb.open(path, "w", timestamp=2) as A: - A[2] = 2 + with tiledb.open(path, "w", timestamp=2) as A: + A[2] = 2 - with tiledb.open(path, "w", timestamp=3) as A: - A[3] = 3 + with tiledb.open(path, "w", timestamp=3) as A: + A[3] = 3 + else: + with tiledb.open(path, "w") as A: + A[1] = 1 + A[2] = 2 + A[3] = 3 with tiledb.open(path, "r") as A: assert_array_equal([1, 2, 3], A[:]["ints"]) - with tiledb.open(path, "d", timestamp=3) as A: + timestamps = [t[0] for t in tiledb.array_fragments(path).timestamp_range] + + with tiledb.open(path, "d", timestamp=timestamps[2]) as A: A.query(cond="ints == 1").submit() - with tiledb.open(path, "r", timestamp=1) as A: + with tiledb.open(path, "r", timestamp=timestamps[0]) as A: assert_array_equal([1], A[:]["ints"]) - with tiledb.open(path, "r", timestamp=2) as A: + with tiledb.open(path, "r", timestamp=timestamps[1]) as A: assert_array_equal([1, 2], A[:]["ints"]) - with tiledb.open(path, "r", timestamp=3) as A: + with tiledb.open(path, "r", timestamp=timestamps[2]) as A: assert_array_equal([2, 3], A[:]["ints"]) cfg = tiledb.Config({"sm.consolidation.purge_deleted_cells": "true"}) From 74f1fd3dba7fd2e90c6a1702226db0e574bb3fdc Mon Sep 17 00:00:00 2001 From: Agis Kounelis Date: Thu, 28 Mar 2024 14:08:57 +0200 Subject: [PATCH 02/10] change libtiledb version to 'dev' --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index bc2c1190a4..cb98a34dc1 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,7 @@ # - this is for builds-from-source # - release builds are controlled by `misc/azure-release.yml` # - this should be set to the current core release, not `dev` -TILEDB_VERSION = "2.21.1" +TILEDB_VERSION = "dev" # allow overriding w/ environment variable TILEDB_VERSION = ( From 2ec6a472a6608ac10ddf7209b5675e12d6467ba7 Mon Sep 17 00:00:00 2001 From: Agis Kounelis Date: Mon, 15 Apr 2024 21:49:49 +0300 Subject: [PATCH 03/10] Add test --- tiledb/tests/test_timestamp_overrides.py | 77 ++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 tiledb/tests/test_timestamp_overrides.py diff --git a/tiledb/tests/test_timestamp_overrides.py b/tiledb/tests/test_timestamp_overrides.py new file mode 100644 index 0000000000..2e74c133fc --- /dev/null +++ b/tiledb/tests/test_timestamp_overrides.py @@ -0,0 +1,77 @@ +import datetime +import os +import subprocess +import sys + +import numpy as np +import pytest + +import tiledb +from tiledb.main import PyFragmentInfo + +from .common import DiskTestCase + +# def has_libfaketime(): +# find a way to check if libfaketime is installed + + +class TimestampOverridesTest(DiskTestCase): + @pytest.mark.skipif( + sys.platform == "win32", + reason="libfaketime is not supported on Windows", + ) + # @pytest.mark.skipif( + # not has_libfaketime(), + # reason="libfaketime not installed", + # ) + def test_timestamp_overrides(self): + uri = self.path("time_test") + + python_exe = sys.executable + cmd = ( + f"from tiledb.tests.test_timestamp_overrides import TimestampOverridesTest; " + f"TimestampOverridesTest().helper('{uri}')" + ) + test_path = os.path.dirname(os.path.abspath(__file__)) + + try: + # "+x0" is the time multiplier, which makes the time freeze during the test + subprocess.run( + ["faketime", "-f", "+x0", python_exe, "-c", cmd], cwd=test_path + ) + except subprocess.CalledProcessError as e: + raise e + + def helper(self, uri): + start_datetime = datetime.datetime.now() + + fragments = 25 + A = np.zeros(fragments) + + dom = tiledb.Domain(tiledb.Dim(domain=(0, 24), tile=fragments, dtype=np.int64)) + att = tiledb.Attr(dtype=A.dtype) + schema = tiledb.ArraySchema(domain=dom, attrs=(att,)) + + tiledb.DenseArray.create(uri, schema) + + uris_seen = set() + chronological_order = [] + + for fragment_idx in range(fragments): + with tiledb.DenseArray(uri, mode="w") as T: + T[fragment_idx : fragment_idx + 1] = fragment_idx + + fragment_info = PyFragmentInfo(uri, schema, False, tiledb.default_ctx()) + uris = fragment_info.get_uri() + new_uris = set(uris) - uris_seen + uris_seen.update(uris) + chronological_order.extend(new_uris) + + end_datetime = datetime.datetime.now() + self.assertTrue(start_datetime == end_datetime) + + # check if fragment_info.get_uri() returns the uris in chronological order + fragment_info = PyFragmentInfo(uri, schema, False, tiledb.default_ctx()) + final_uris = fragment_info.get_uri() + for uri1, uri2 in zip(chronological_order, final_uris): + assert uri1 == uri2 From 76cc80171e3c0140fe053ae28da09ae118b80289 Mon Sep 17 00:00:00 2001 From: Agis Kounelis Date: Tue, 16 Apr 2024 02:06:17 +0300 Subject: [PATCH 04/10] Add more assetions and handle assertion exits --- tiledb/tests/test_timestamp_overrides.py | 36 ++++++++++++++++++++---- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/tiledb/tests/test_timestamp_overrides.py b/tiledb/tests/test_timestamp_overrides.py index 2e74c133fc..f876526118 100644 --- a/tiledb/tests/test_timestamp_overrides.py +++ b/tiledb/tests/test_timestamp_overrides.py @@ -36,7 +36,7 @@ def test_timestamp_overrides(self): try: # "+x0" is the time multiplier, which makes the time freeze during the test - subprocess.run( + subprocess.check_output( ["faketime", "-f", "+x0", python_exe, "-c", cmd], cwd=test_path ) except subprocess.CalledProcessError as e: @@ -61,6 +61,11 @@ def helper(self, uri): with tiledb.DenseArray(uri, mode="w") as T: T[fragment_idx : fragment_idx + 1] = fragment_idx + # Read the data back immediately after writing to ensure it is correct + with tiledb.DenseArray(uri, mode="r") as T: + read_data = T[fragment_idx : fragment_idx + 1] + self.assertEqual(read_data, np.array([fragment_idx])) + fragment_info = PyFragmentInfo(uri, schema, False, tiledb.default_ctx()) uris = fragment_info.get_uri() new_uris = set(uris) - uris_seen @@ -68,10 +73,31 @@ def helper(self, uri): chronological_order.extend(new_uris) end_datetime = datetime.datetime.now() - self.assertTrue(start_datetime == end_datetime) + self.assertEqual(start_datetime, end_datetime) - # check if fragment_info.get_uri() returns the uris in chronological order + # Check if fragment_info.get_uri() returns the uris in chronological order fragment_info = PyFragmentInfo(uri, schema, False, tiledb.default_ctx()) final_uris = fragment_info.get_uri() - for uri1, uri2 in zip(chronological_order, final_uris): - assert uri1 == uri2 + + # Keep only the last part of the uris + final_uris = [os.path.basename(uri) for uri in final_uris] + chronological_order = [os.path.basename(uri) for uri in chronological_order] + + # Check that timestamps are the same (faketime is working) + timestamps = set() + for uri in final_uris: + parts = uri.split("_") + timestamps.add((parts[2], parts[3])) + + self.assertEqual(len(timestamps), 1) + + # Check that UUIDs are unique + uuids = set() + for uri in final_uris: + parts = uri.split("_") + uuids.add(parts[4]) + + self.assertEqual(len(uuids), fragments) + + # Sort order for the fragment info matches the write order + self.assertEqual(final_uris, chronological_order) From 79f8cd9f849cff0cfff7c3f717690acd94cd6eef Mon Sep 17 00:00:00 2001 From: Agis Kounelis Date: Tue, 16 Apr 2024 02:33:42 +0300 Subject: [PATCH 05/10] Add group metadata test --- tiledb/tests/test_timestamp_overrides.py | 62 ++++++++++++++++++++++-- 1 file changed, 57 insertions(+), 5 deletions(-) diff --git a/tiledb/tests/test_timestamp_overrides.py b/tiledb/tests/test_timestamp_overrides.py index f876526118..831b0847d0 100644 --- a/tiledb/tests/test_timestamp_overrides.py +++ b/tiledb/tests/test_timestamp_overrides.py @@ -30,7 +30,8 @@ def test_timestamp_overrides(self): python_exe = sys.executable cmd = ( f"from tiledb.tests.test_timestamp_overrides import TimestampOverridesTest; " - f"TimestampOverridesTest().helper('{uri}')" + f"TimestampOverridesTest().helper_fragments('{uri}'); " + f"TimestampOverridesTest().helper_group_metadata('{uri}')" ) test_path = os.path.dirname(os.path.abspath(__file__)) @@ -42,7 +43,7 @@ def test_timestamp_overrides(self): except subprocess.CalledProcessError as e: raise e - def helper(self, uri): + def helper_fragments(self, uri): start_datetime = datetime.datetime.now() fragments = 25 @@ -68,14 +69,13 @@ def helper(self, uri): fragment_info = PyFragmentInfo(uri, schema, False, tiledb.default_ctx()) uris = fragment_info.get_uri() - new_uris = set(uris) - uris_seen + new_uri = set(uris) - uris_seen uris_seen.update(uris) - chronological_order.extend(new_uris) + chronological_order.extend(new_uri) end_datetime = datetime.datetime.now() self.assertEqual(start_datetime, end_datetime) - # Check if fragment_info.get_uri() returns the uris in chronological order fragment_info = PyFragmentInfo(uri, schema, False, tiledb.default_ctx()) final_uris = fragment_info.get_uri() @@ -101,3 +101,55 @@ def helper(self, uri): # Sort order for the fragment info matches the write order self.assertEqual(final_uris, chronological_order) + + def helper_group_metadata(self, uri): + vfs = tiledb.VFS() + + start_datetime = datetime.datetime.now() + + tiledb.Group.create(uri) + loop_count = 30 + uris_seen = set() + chronological_order = [] + meta_path = f"{uri}/__meta" + + for i in range(loop_count): + with tiledb.Group(uri, "w") as grp: + grp.meta["meta"] = i + + # Read the data back immediately after writing to ensure it is correct + with tiledb.Group(uri, "r") as grp: + self.assertEqual(grp.meta["meta"], i) + + uris = vfs.ls(meta_path) + new_uri = set(uris) - uris_seen + uris_seen.update(uris) + chronological_order.extend(new_uri) + + end_datetime = datetime.datetime.now() + self.assertEqual(start_datetime, end_datetime) + + final_uris = vfs.ls(meta_path) + + # Keep only the last part of the uris + final_uris = [os.path.basename(uri) for uri in final_uris] + chronological_order = [os.path.basename(uri) for uri in chronological_order] + + # Check that timestamps are the same (faketime is working) + timestamps = set() + for uri in final_uris: + parts = uri.split("_") + timestamps.add((parts[2], parts[3])) + + self.assertEqual(len(timestamps), 1) + + # Check that UUIDs are unique + uuids = set() + for uri in final_uris: + parts = uri.split("_") + uuids.add(parts[4]) + + self.assertEqual(len(uuids), loop_count) + + # Sort order for the fragment info matches the write order + self.assertEqual(final_uris, chronological_order) From 397200b7e8ff82c8c2509f54e192063bf3edeca8 Mon Sep 17 00:00:00 2001 From: Agis Kounelis Date: Tue, 16 Apr 2024 14:45:49 +0300 Subject: [PATCH 06/10] Fix test for linux --- tiledb/tests/test_timestamp_overrides.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/tiledb/tests/test_timestamp_overrides.py b/tiledb/tests/test_timestamp_overrides.py index 831b0847d0..ea88525718 100644 --- a/tiledb/tests/test_timestamp_overrides.py +++ b/tiledb/tests/test_timestamp_overrides.py @@ -25,13 +25,14 @@ class TimestampOverridesTest(DiskTestCase): # reason="libfaketime not installed", # ) def test_timestamp_overrides(self): - uri = self.path("time_test") + uri_fragments = self.path("time_test_fragments") + uri_group_metadata = self.path("time_test_group_metadata") python_exe = sys.executable cmd = ( f"from tiledb.tests.test_timestamp_overrides import TimestampOverridesTest; " - f"TimestampOverridesTest().helper_fragments('{uri}'); " - f"TimestampOverridesTest().helper_group_metadata('{uri}')" + f"TimestampOverridesTest().helper_fragments('{uri_fragments}'); " + f"TimestampOverridesTest().helper_group_metadata('{uri_group_metadata}')" ) test_path = os.path.dirname(os.path.abspath(__file__)) @@ -99,8 +100,8 @@ def helper_fragments(self, uri): self.assertEqual(len(uuids), fragments) - # Sort order for the fragment info matches the write order - self.assertEqual(final_uris, chronological_order) + # Ensure that write order is correct + self.assertEqual(chronological_order, sorted(final_uris)) def helper_group_metadata(self, uri): vfs = tiledb.VFS() @@ -108,7 +109,7 @@ def helper_group_metadata(self, uri): start_datetime = datetime.datetime.now() tiledb.Group.create(uri) - loop_count = 30 + loop_count = 10 uris_seen = set() chronological_order = [] meta_path = f"{uri}/__meta" @@ -151,5 +152,5 @@ def helper_group_metadata(self, uri): self.assertEqual(len(uuids), loop_count) - # Sort order for the fragment info matches the write order - self.assertEqual(final_uris, chronological_order) + # Ensure that write order is correct + self.assertEqual(chronological_order, sorted(final_uris)) From 71f70d556eae9aa8fc39919997d5d8740f2d87b8 Mon Sep 17 00:00:00 2001 From: Agis Kounelis Date: Tue, 16 Apr 2024 14:46:16 +0300 Subject: [PATCH 07/10] Set core to 2.22.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index cb98a34dc1..28e68bccd3 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,7 @@ # - this is for builds-from-source # - release builds are controlled by `misc/azure-release.yml` # - this should be set to the current core release, not `dev` -TILEDB_VERSION = "dev" +TILEDB_VERSION = "2.22.0" # allow overriding w/ environment variable TILEDB_VERSION = ( From 1a14e28f841a6b92c232ca9ba6c1b0e69d09579a Mon Sep 17 00:00:00 2001 From: Agis Kounelis Date: Tue, 16 Apr 2024 14:47:23 +0300 Subject: [PATCH 08/10] Add faketime CI requirement for tests --- .github/workflows/ci.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7e5053f27f..b23a7ee62b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -124,6 +124,14 @@ jobs: - name: "Check build directory" run: ls -Rl + - name: "Install libfaketime (linux and macOS)" + if: matrix.os == 'ubuntu-latest' || matrix.os == 'macos-12' + run: | + git clone https://github.com/wolfcw/libfaketime/ + cd libfaketime + sudo make install + cd .. + - name: "Run tests" run: pytest -vv --showlocals From 944723bc597bb7e4cf249c3073bc9ecd1f121356 Mon Sep 17 00:00:00 2001 From: Agis Kounelis Date: Wed, 17 Apr 2024 19:49:28 +0300 Subject: [PATCH 09/10] Skip tests if libfaketime is not installed --- tiledb/tests/test_timestamp_overrides.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/tiledb/tests/test_timestamp_overrides.py b/tiledb/tests/test_timestamp_overrides.py index ea88525718..ebc2559623 100644 --- a/tiledb/tests/test_timestamp_overrides.py +++ b/tiledb/tests/test_timestamp_overrides.py @@ -11,8 +11,13 @@ from .common import DiskTestCase -# def has_libfaketime(): -# find a way to check if libfaketime is installed + +def has_libfaketime(): + try: + subprocess.check_output(["which", "faketime"]) + return True + except subprocess.CalledProcessError: + return False class TimestampOverridesTest(DiskTestCase): @@ -20,10 +25,10 @@ class TimestampOverridesTest(DiskTestCase): sys.platform == "win32", reason="libfaketime is not supported on Windows", ) - # @pytest.mark.skipif( - # not has_libfaketime(), - # reason="libfaketime not installed", - # ) + @pytest.mark.skipif( + not has_libfaketime(), + reason="libfaketime not installed.", + ) def test_timestamp_overrides(self): uri_fragments = self.path("time_test_fragments") uri_group_metadata = self.path("time_test_group_metadata") From 8dc87281469881584cf52ee6fb4891e7d5431094 Mon Sep 17 00:00:00 2001 From: Agis Kounelis Date: Thu, 18 Apr 2024 11:53:17 +0300 Subject: [PATCH 10/10] Fixes --- tiledb/tests/test_timestamp_overrides.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tiledb/tests/test_timestamp_overrides.py b/tiledb/tests/test_timestamp_overrides.py index ebc2559623..c964a2157a 100644 --- a/tiledb/tests/test_timestamp_overrides.py +++ b/tiledb/tests/test_timestamp_overrides.py @@ -8,8 +8,7 @@ import tiledb from tiledb.main import PyFragmentInfo - -from .common import DiskTestCase +from tiledb.tests.common import DiskTestCase def has_libfaketime(): @@ -20,7 +19,7 @@ def has_libfaketime(): return False -class TimestampOverridesTest(DiskTestCase): +class TestTimestampOverrides(DiskTestCase): @pytest.mark.skipif( sys.platform == "win32", reason="libfaketime is not supported on Windows", @@ -35,9 +34,9 @@ def test_timestamp_overrides(self): python_exe = sys.executable cmd = ( - f"from tiledb.tests.test_timestamp_overrides import TimestampOverridesTest; " - f"TimestampOverridesTest().helper_fragments('{uri_fragments}'); " - f"TimestampOverridesTest().helper_group_metadata('{uri_group_metadata}')" + f"from tiledb.tests.test_timestamp_overrides import TestTimestampOverrides; " + f"TestTimestampOverrides().helper_fragments('{uri_fragments}'); " + f"TestTimestampOverrides().helper_group_metadata('{uri_group_metadata}')" ) test_path = os.path.dirname(os.path.abspath(__file__))