From 004d54439febe265b8f340bfa598b101b659d9a1 Mon Sep 17 00:00:00 2001 From: Agisilaos Kounelis Date: Tue, 10 Dec 2024 17:18:25 +0200 Subject: [PATCH] delete other tests --- tiledb/tests/test_aggregates.py | 444 --- tiledb/tests/test_array_schema.py | 311 -- tiledb/tests/test_attribute.py | 313 -- tiledb/tests/test_basic_import.py | 47 - tiledb/tests/test_cloud.py | 74 - tiledb/tests/test_compat.py | 185 - tiledb/tests/test_consolidation_plan.py | 53 - tiledb/tests/test_context_and_config.py | 263 -- tiledb/tests/test_core.py | 156 - tiledb/tests/test_current_domain.py | 215 -- tiledb/tests/test_dask.py | 206 -- tiledb/tests/test_dimension.py | 125 - tiledb/tests/test_dimension_label.py | 485 --- tiledb/tests/test_domain.py | 84 - tiledb/tests/test_domain_index.py | 159 - tiledb/tests/test_enumeration.py | 223 -- tiledb/tests/test_examples.py | 83 - tiledb/tests/test_filestore.py | 91 - tiledb/tests/test_filters.py | 258 -- tiledb/tests/test_fixes.py | 424 --- tiledb/tests/test_fork_ctx.py | 104 - tiledb/tests/test_fragments.py | 769 ----- tiledb/tests/test_group.py | 896 ----- tiledb/tests/test_hypothesis.py | 62 - tiledb/tests/test_libtiledb.py | 3894 ---------------------- tiledb/tests/test_metadata.cc | 45 - tiledb/tests/test_metadata.py | 398 --- tiledb/tests/test_multi_index-hp.py | 170 - tiledb/tests/test_multi_index.py | 1038 ------ tiledb/tests/test_pandas_dataframe.py | 1800 ---------- tiledb/tests/test_query.py | 40 - tiledb/tests/test_query_condition.py | 1187 ------- tiledb/tests/test_read_subarray.py | 488 --- tiledb/tests/test_repr.py | 111 - tiledb/tests/test_serialization.cc | 95 - tiledb/tests/test_serialization.py | 30 - tiledb/tests/test_stats.py | 70 - tiledb/tests/test_subarray.py | 127 - tiledb/tests/test_timestamp_overrides.py | 156 - tiledb/tests/test_util.py | 182 - tiledb/tests/test_vfs.py | 445 --- tiledb/tests/test_webp.cc | 29 - tiledb/tests/test_webp.py | 186 -- tiledb/tests/test_write_subarray.py | 293 -- 44 files changed, 16814 deletions(-) delete mode 100644 tiledb/tests/test_aggregates.py delete mode 100644 tiledb/tests/test_array_schema.py delete mode 100644 tiledb/tests/test_attribute.py delete mode 100644 tiledb/tests/test_basic_import.py delete mode 100644 tiledb/tests/test_cloud.py delete mode 100644 tiledb/tests/test_compat.py delete mode 100644 tiledb/tests/test_consolidation_plan.py delete mode 100644 tiledb/tests/test_context_and_config.py delete mode 100644 tiledb/tests/test_core.py delete mode 100644 tiledb/tests/test_current_domain.py delete mode 100644 tiledb/tests/test_dask.py delete mode 100644 tiledb/tests/test_dimension.py delete mode 100644 tiledb/tests/test_dimension_label.py delete mode 100644 tiledb/tests/test_domain.py delete mode 100644 tiledb/tests/test_domain_index.py delete mode 100644 tiledb/tests/test_enumeration.py delete mode 100644 tiledb/tests/test_examples.py delete mode 100644 tiledb/tests/test_filestore.py delete mode 100644 tiledb/tests/test_filters.py delete mode 100644 tiledb/tests/test_fixes.py delete mode 100644 tiledb/tests/test_fork_ctx.py delete mode 100644 tiledb/tests/test_fragments.py delete mode 100644 tiledb/tests/test_group.py delete mode 100644 tiledb/tests/test_hypothesis.py delete mode 100644 tiledb/tests/test_libtiledb.py delete mode 100644 tiledb/tests/test_metadata.cc delete mode 100644 tiledb/tests/test_metadata.py delete mode 100644 tiledb/tests/test_multi_index-hp.py delete mode 100644 tiledb/tests/test_multi_index.py delete mode 100644 tiledb/tests/test_pandas_dataframe.py delete mode 100644 tiledb/tests/test_query.py delete mode 100644 tiledb/tests/test_query_condition.py delete mode 100644 tiledb/tests/test_read_subarray.py delete mode 100644 tiledb/tests/test_repr.py delete mode 100644 tiledb/tests/test_serialization.cc delete mode 100644 tiledb/tests/test_serialization.py delete mode 100644 tiledb/tests/test_stats.py delete mode 100644 tiledb/tests/test_subarray.py delete mode 100644 tiledb/tests/test_timestamp_overrides.py delete mode 100644 tiledb/tests/test_util.py delete mode 100644 tiledb/tests/test_vfs.py delete mode 100644 tiledb/tests/test_webp.cc delete mode 100644 tiledb/tests/test_webp.py delete mode 100644 tiledb/tests/test_write_subarray.py diff --git a/tiledb/tests/test_aggregates.py b/tiledb/tests/test_aggregates.py deleted file mode 100644 index 5f402eedd6..0000000000 --- a/tiledb/tests/test_aggregates.py +++ /dev/null @@ -1,444 +0,0 @@ -import numpy as np -import pytest - -import tiledb - -from .common import DiskTestCase - - -class AggregateTest(DiskTestCase): - @pytest.mark.parametrize("sparse", [True, False]) - @pytest.mark.parametrize( - "dtype", - [ - np.uint8, - np.int8, - np.uint16, - np.int16, - np.uint32, - np.int32, - np.uint64, - np.int64, - np.float32, - np.float64, - ], - ) - def test_basic(self, sparse, dtype): - path = self.path("test_basic") - dom = tiledb.Domain(tiledb.Dim(name="d", domain=(0, 9), dtype=np.int32)) - attrs = [tiledb.Attr(name="a", dtype=dtype)] - schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=sparse) - tiledb.Array.create(path, schema) - - data = np.random.randint(1, 10, size=10) - - with tiledb.open(path, "w") as A: - if sparse: - A[np.arange(0, 10)] = data - else: - A[:] = data - - all_aggregates = ("count", "sum", "min", "max", "mean") - - with tiledb.open(path, "r") as A: - # entire column - q = A.query() - expected = q[:]["a"] - - with pytest.raises(tiledb.TileDBError): - q.agg("bad")[:] - - with pytest.raises(tiledb.TileDBError): - q.agg("null_count")[:] - - with pytest.raises(NotImplementedError): - q.agg("count").df[:] - - assert q.agg("sum")[:] == sum(expected) - assert q.agg("min")[:] == min(expected) - assert q.agg("max")[:] == max(expected) - assert q.agg("mean")[:] == sum(expected) / len(expected) - assert q.agg("count")[:] == len(expected) - - assert q.agg({"a": "sum"})[:] == sum(expected) - assert q.agg({"a": "min"})[:] == min(expected) - assert q.agg({"a": "max"})[:] == max(expected) - assert q.agg({"a": "mean"})[:] == sum(expected) / len(expected) - assert q.agg({"a": "count"})[:] == len(expected) - - actual = q.agg(all_aggregates)[:] - assert actual["sum"] == sum(expected) - assert actual["min"] == min(expected) - assert actual["max"] == max(expected) - assert actual["mean"] == sum(expected) / len(expected) - assert actual["count"] == len(expected) - - actual = q.agg({"a": all_aggregates})[:] - assert actual["sum"] == sum(expected) - assert actual["min"] == min(expected) - assert actual["max"] == max(expected) - assert actual["mean"] == sum(expected) / len(expected) - assert actual["count"] == len(expected) - - # subarray - expected = A[4:7]["a"] - - assert q.agg("sum")[4:7] == sum(expected) - assert q.agg("min")[4:7] == min(expected) - assert q.agg("max")[4:7] == max(expected) - assert q.agg("mean")[4:7] == sum(expected) / len(expected) - assert q.agg("count")[4:7] == len(expected) - - assert q.agg({"a": "sum"})[4:7] == sum(expected) - assert q.agg({"a": "min"})[4:7] == min(expected) - assert q.agg({"a": "max"})[4:7] == max(expected) - assert q.agg({"a": "mean"})[4:7] == sum(expected) / len(expected) - assert q.agg({"a": "count"})[4:7] == len(expected) - - actual = q.agg(all_aggregates)[4:7] - assert actual["sum"] == sum(expected) - assert actual["min"] == min(expected) - assert actual["max"] == max(expected) - assert actual["mean"] == sum(expected) / len(expected) - assert actual["count"] == len(expected) - - actual = q.agg({"a": all_aggregates})[4:7] - assert actual["sum"] == sum(expected) - assert actual["min"] == min(expected) - assert actual["max"] == max(expected) - assert actual["mean"] == sum(expected) / len(expected) - assert actual["count"] == len(expected) - - @pytest.mark.parametrize("sparse", [True, False]) - @pytest.mark.parametrize( - "dtype", - [ - np.uint8, - np.int8, - np.uint16, - np.int16, - np.uint32, - np.int32, - np.uint64, - np.int64, - np.float32, - np.float64, - ], - ) - def test_multi_index(self, sparse, dtype): - path = self.path("test_multi_index") - dom = tiledb.Domain(tiledb.Dim(name="d", domain=(0, 9), dtype=np.int32)) - attrs = [tiledb.Attr(name="a", dtype=dtype)] - schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=sparse) - tiledb.Array.create(path, schema) - - data = np.random.randint(1, 10, size=10) - - with tiledb.open(path, "w") as A: - if sparse: - A[np.arange(0, 10)] = data - else: - A[:] = data - - all_aggregates = ("count", "sum", "min", "max", "mean") - - with tiledb.open(path, "r") as A: - # entire column - q = A.query() - expected = q.multi_index[:]["a"] - - with pytest.raises(tiledb.TileDBError): - q.agg("bad")[:] - - with pytest.raises(tiledb.TileDBError): - q.agg("null_count")[:] - - assert q.agg("sum").multi_index[:] == sum(expected) - assert q.agg("min").multi_index[:] == min(expected) - assert q.agg("max").multi_index[:] == max(expected) - assert q.agg("mean").multi_index[:] == sum(expected) / len(expected) - assert q.agg("count").multi_index[:] == len(expected) - - actual = q.agg(all_aggregates).multi_index[:] - assert actual["sum"] == sum(expected) - assert actual["min"] == min(expected) - assert actual["max"] == max(expected) - assert actual["mean"] == sum(expected) / len(expected) - assert actual["count"] == len(expected) - - actual = q.agg({"a": all_aggregates}).multi_index[:] - assert actual["sum"] == sum(expected) - assert actual["min"] == min(expected) - assert actual["max"] == max(expected) - assert actual["mean"] == sum(expected) / len(expected) - assert actual["count"] == len(expected) - - # subarray - expected = A.multi_index[4:7]["a"] - - assert q.agg("sum").multi_index[4:7] == sum(expected) - assert q.agg("min").multi_index[4:7] == min(expected) - assert q.agg("max").multi_index[4:7] == max(expected) - assert q.agg("mean").multi_index[4:7] == sum(expected) / len(expected) - assert q.agg("count").multi_index[4:7] == len(expected) - - actual = q.agg(all_aggregates).multi_index[4:7] - assert actual["sum"] == sum(expected) - assert actual["min"] == min(expected) - assert actual["max"] == max(expected) - assert actual["mean"] == sum(expected) / len(expected) - assert actual["count"] == len(expected) - - @pytest.mark.parametrize( - "dtype", - [ - np.uint8, - np.int8, - np.uint16, - np.int16, - np.uint32, - np.int32, - np.uint64, - np.int64, - np.float32, - np.float64, - ], - ) - def test_with_query_condition(self, dtype): - path = self.path("test_with_query_condition") - dom = tiledb.Domain(tiledb.Dim(name="d", domain=(0, 9), dtype=np.int32)) - attrs = [tiledb.Attr(name="a", dtype=dtype)] - schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=True) - tiledb.Array.create(path, schema) - - with tiledb.open(path, "w") as A: - # hardcode the first value to be 1 to ensure that the a < 5 - # query condition always returns a non-empty result - data = np.random.randint(1, 10, size=10) - data[0] = 1 - - A[np.arange(0, 10)] = data - - all_aggregates = ("count", "sum", "min", "max", "mean") - - with tiledb.open(path, "r") as A: - q = A.query(cond="a < 5") - - expected = q[:]["a"] - actual = q.agg(all_aggregates)[:] - assert actual["sum"] == sum(expected) - assert actual["min"] == min(expected) - assert actual["max"] == max(expected) - assert actual["mean"] == sum(expected) / len(expected) - assert actual["count"] == len(expected) - - expected = q.multi_index[:]["a"] - actual = q.agg(all_aggregates).multi_index[:] - assert actual["sum"] == sum(expected) - assert actual["min"] == min(expected) - assert actual["max"] == max(expected) - assert actual["mean"] == sum(expected) / len(expected) - assert actual["count"] == len(expected) - - # no value matches query condition - q = A.query(cond="a > 10") - - expected = q[:] - actual = q.agg(all_aggregates)[:] - assert actual["sum"] == 0 - if dtype in (np.float32, np.float64): - assert np.isnan(actual["min"]) - assert np.isnan(actual["max"]) - else: - assert actual["min"] is None - assert actual["max"] is None - assert np.isnan(actual["mean"]) - assert actual["count"] == 0 - - expected = q.multi_index[:] - actual = q.agg(all_aggregates).multi_index[:] - assert actual["sum"] == 0 - if dtype in (np.float32, np.float64): - assert np.isnan(actual["min"]) - assert np.isnan(actual["max"]) - else: - assert actual["min"] is None - assert actual["max"] is None - assert np.isnan(actual["mean"]) - assert actual["count"] == 0 - - @pytest.mark.parametrize("sparse", [True, False]) - def test_nullable(self, sparse): - path = self.path("test_nullable") - dom = tiledb.Domain(tiledb.Dim(name="d", domain=(0, 9), dtype=np.int32)) - attrs = [ - tiledb.Attr(name="integer", nullable=True, dtype=int), - tiledb.Attr(name="float", nullable=True, dtype=float), - ] - schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=sparse) - tiledb.Array.create(path, schema) - - # set index 5 and 7 to be null - data = np.random.rand(10) - data[5], data[7] = np.nan, np.nan - - # write data - with tiledb.open(path, "w") as A: - if sparse: - A[np.arange(0, 10)] = {"integer": data, "float": data} - else: - A[:] = {"integer": data, "float": data} - - with tiledb.open(path, "r") as A: - agg = A.query().agg - - result = agg("null_count") - assert result[0]["integer"]["null_count"] == 0 - assert result[:6]["integer"]["null_count"] == 1 - assert result[5:8]["integer"]["null_count"] == 2 - assert result[5]["integer"]["null_count"] == 1 - assert result[6:]["integer"]["null_count"] == 1 - assert result[7]["integer"]["null_count"] == 1 - assert result[:]["integer"]["null_count"] == 2 - - assert result[0]["float"]["null_count"] == 0 - assert result[:6]["float"]["null_count"] == 1 - assert result[5:8]["float"]["null_count"] == 2 - assert result[5]["float"]["null_count"] == 1 - assert result[6:]["float"]["null_count"] == 1 - assert result[7]["float"]["null_count"] == 1 - assert result[:]["float"]["null_count"] == 2 - - all_aggregates = ("count", "sum", "min", "max", "mean", "null_count") - - actual = agg({"integer": all_aggregates, "float": all_aggregates})[:] - - expected = A[:]["integer"] - expected_no_null = A[:]["integer"].compressed() - assert actual["integer"]["sum"] == sum(expected_no_null) - assert actual["integer"]["min"] == min(expected_no_null) - assert actual["integer"]["max"] == max(expected_no_null) - assert actual["integer"]["mean"] == sum(expected_no_null) / len( - expected_no_null - ) - assert actual["integer"]["count"] == len(expected) - assert actual["integer"]["null_count"] == np.count_nonzero(expected.mask) - - expected = A[:]["float"] - expected_no_null = A[:]["float"].compressed() - assert actual["float"]["sum"] == sum(expected_no_null) - assert actual["float"]["min"] == min(expected_no_null) - assert actual["float"]["max"] == max(expected_no_null) - assert actual["float"]["mean"] == sum(expected_no_null) / len( - expected_no_null - ) - assert actual["float"]["count"] == len(expected) - assert actual["float"]["null_count"] == np.count_nonzero(expected.mask) - - # no valid values - actual = agg({"integer": all_aggregates, "float": all_aggregates})[5] - - assert actual["integer"]["sum"] is None - assert actual["integer"]["min"] is None - assert actual["integer"]["max"] is None - assert actual["integer"]["mean"] is None - assert actual["integer"]["count"] == 1 - assert actual["integer"]["null_count"] == 1 - - assert np.isnan(actual["float"]["sum"]) - assert np.isnan(actual["float"]["min"]) - assert np.isnan(actual["float"]["max"]) - assert np.isnan(actual["float"]["mean"]) - assert actual["float"]["count"] == 1 - assert actual["float"]["null_count"] == 1 - - @pytest.mark.parametrize("sparse", [True, False]) - def test_empty(self, sparse): - path = self.path("test_empty_sparse") - dom = tiledb.Domain(tiledb.Dim(name="d", domain=(0, 9), dtype=np.int32)) - attrs = [ - tiledb.Attr(name="integer", nullable=True, dtype=int), - tiledb.Attr(name="float", nullable=True, dtype=float), - ] - schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=sparse) - tiledb.Array.create(path, schema) - - data = np.random.rand(5) - - # write data - with tiledb.open(path, "w") as A: - if sparse: - A[np.arange(0, 5)] = {"integer": data, "float": data} - else: - A[:5] = {"integer": data, "float": data} - - with tiledb.open(path, "r") as A: - invalid_aggregates = ("sum", "min", "max", "mean") - actual = A.query().agg(invalid_aggregates)[6:] - - assert actual["integer"]["sum"] is None - assert actual["integer"]["min"] is None - assert actual["integer"]["max"] is None - assert actual["integer"]["mean"] is None - - assert np.isnan(actual["float"]["sum"]) - assert np.isnan(actual["float"]["min"]) - assert np.isnan(actual["float"]["max"]) - assert np.isnan(actual["float"]["mean"]) - - def test_multiple_attrs(self): - path = self.path("test_multiple_attrs") - dom = tiledb.Domain(tiledb.Dim(name="d", domain=(0, 9), dtype=np.int32)) - attrs = [ - tiledb.Attr(name="integer", dtype=int), - tiledb.Attr(name="float", dtype=float), - tiledb.Attr(name="string", dtype=str), - ] - schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=True) - tiledb.Array.create(path, schema) - - with tiledb.open(path, "w") as A: - A[np.arange(0, 10)] = { - "integer": np.random.randint(1, 10, size=10), - "float": np.random.randint(1, 10, size=10), - "string": np.random.randint(1, 10, size=10).astype(str), - } - - with tiledb.open(path, "r") as A: - actual = A.query()[:] - agg = A.query().agg - - assert agg({"string": "count"})[:] == len(actual["string"]) - invalid_aggregates = ("sum", "min", "max", "mean") - for invalid_agg in invalid_aggregates: - with pytest.raises(tiledb.TileDBError): - agg({"string": invalid_agg})[:] - - result = agg("count")[:] - assert result["integer"]["count"] == len(actual["integer"]) - assert result["float"]["count"] == len(actual["float"]) - assert result["string"]["count"] == len(actual["string"]) - - with pytest.raises(tiledb.TileDBError): - agg("sum")[:] - - result = agg({"integer": "sum", "float": "sum"})[:] - assert "string" not in result - assert result["integer"]["sum"] == sum(actual["integer"]) - assert result["float"]["sum"] == sum(actual["float"]) - - result = agg( - { - "string": ("count",), - "integer": "sum", - "float": ["max", "min", "sum", "mean"], - } - )[:] - assert result["string"]["count"] == len(actual["string"]) - assert result["integer"]["sum"] == sum(actual["integer"]) - assert result["float"]["max"] == max(actual["float"]) - assert result["float"]["min"] == min(actual["float"]) - assert result["float"]["sum"] == sum(actual["float"]) - assert result["float"]["mean"] == sum(actual["float"]) / len( - actual["float"] - ) diff --git a/tiledb/tests/test_array_schema.py b/tiledb/tests/test_array_schema.py deleted file mode 100644 index 99aa567576..0000000000 --- a/tiledb/tests/test_array_schema.py +++ /dev/null @@ -1,311 +0,0 @@ -import xml.etree.ElementTree - -import numpy as np -import pytest -from numpy.testing import assert_array_equal - -import tiledb -from tiledb.tests.common import DiskTestCase, assert_captured - - -class ArraySchemaTest(DiskTestCase): - def test_schema_basic(self): - dom = tiledb.Domain( - tiledb.Dim("d1", (1, 4), 2, dtype="u8"), - tiledb.Dim("d2", (1, 4), 2, dtype="u8"), - ) - - attr1 = tiledb.Attr("foo", dtype=float) - attr2 = tiledb.Attr("foo", dtype=int) - - # test unique attributes - with self.assertRaises(tiledb.TileDBError): - tiledb.ArraySchema(domain=dom, attrs=(attr1, attr2)) - - # test schema.check - schema = tiledb.ArraySchema(domain=dom, attrs=(attr1,)) - # valid schema does not raise - schema.check() - - try: - assert xml.etree.ElementTree.fromstring(schema._repr_html_()) is not None - except: - pytest.fail( - f"Could not parse schema._repr_html_(). Saw {schema._repr_html_()}" - ) - - def test_dense_array_schema(self): - domain = tiledb.Domain( - tiledb.Dim(domain=(1, 8), tile=2), tiledb.Dim(domain=(1, 8), tile=2) - ) - a1 = tiledb.Attr("val", dtype="f8") - schema = tiledb.ArraySchema(domain=domain, attrs=(a1,)) - assert schema.sparse is False - assert schema.cell_order == "row-major" - assert schema.tile_order == "row-major" - assert schema.domain == domain - assert schema.ndim == 2 - assert schema.shape == (8, 8) - assert schema.nattr == 1 - assert schema.domain.homogeneous is True - assert hasattr(schema, "version") # don't pin to a specific version - assert schema.attr(0) == a1 - assert schema.has_attr("val") is True - assert schema.has_attr("nononoattr") is False - assert schema == tiledb.ArraySchema(domain=domain, attrs=(a1,)) - assert schema != tiledb.ArraySchema(domain=domain, attrs=(a1,), sparse=True) - - with self.assertRaises(tiledb.TileDBError): - schema.allows_duplicates - # test iteration over attributes - assert list(schema) == [a1] - - with self.assertRaisesRegex( - tiledb.TileDBError, - "Cannot set cell order; Hilbert order is only applicable to sparse arrays", - ): - tiledb.ArraySchema( - domain=domain, attrs=(a1,), sparse=False, cell_order="hilbert" - ) - - def test_dense_array_schema_fp_domain_error(self): - dom = tiledb.Domain(tiledb.Dim(domain=(1, 8), tile=2, dtype=np.float64)) - att = tiledb.Attr("val", dtype=np.float64) - - with self.assertRaises(tiledb.TileDBError): - tiledb.ArraySchema(domain=dom, attrs=(att,)) - - def test_dense_array_schema_invalid_cell_and_tile_order(self): - dom = tiledb.Domain(tiledb.Dim(domain=(1, 8), tile=2, dtype=np.float64)) - att = tiledb.Attr("val", dtype=np.float64) - - with self.assertRaises(ValueError): - tiledb.ArraySchema(domain=dom, attrs=(att,), cell_order="invalid") - - with self.assertRaises(ValueError): - tiledb.ArraySchema(domain=dom, attrs=(att,), tile_order="invalid") - - def test_sparse_schema(self): - # create dimensions - d1 = tiledb.Dim("d1", domain=(1, 1000), tile=10, dtype="uint64") - d2 = tiledb.Dim("d2", domain=(101, 10000), tile=100, dtype="uint64") - - # create domain - domain = tiledb.Domain(d1, d2) - - # create attributes - a1 = tiledb.Attr("a1", dtype="int32,int32,int32") - a2 = tiledb.Attr( - "a2", filters=tiledb.FilterList([tiledb.GzipFilter(-1)]), dtype="int32" - ) - - # create sparse array with schema - coords_filters = tiledb.FilterList([tiledb.ZstdFilter(4)]) - offsets_filters = tiledb.FilterList([tiledb.LZ4Filter(5)]) - validity_filters = tiledb.FilterList([tiledb.GzipFilter(9)]) - - with pytest.warns( - DeprecationWarning, - match="coords_filters is deprecated; set the FilterList for each dimension", - ): - schema = tiledb.ArraySchema( - domain=domain, - attrs=(a1, a2), - capacity=10, - cell_order="col-major", - tile_order="row-major", - allows_duplicates=True, - sparse=True, - coords_filters=coords_filters, - offsets_filters=offsets_filters, - validity_filters=validity_filters, - ) - - # schema.dump() - # assert_captured(capfd, "Array type: sparse") - - assert schema.sparse is True - assert schema.capacity == 10 - assert schema.cell_order, "co == major" - assert schema.tile_order, "ro == major" - - # - # assert schema.coords_compressor, ('zstd' == 4) - # assert schema.offsets_compressor, ('lz4' == 5) - assert len(schema.coords_filters) == 0 - assert len(schema.offsets_filters) == 1 - assert len(schema.validity_filters) == 1 - - assert schema.domain == domain - assert schema.ndim == 2 - assert schema.shape, 1000 == 9900 - assert schema.nattr == 2 - assert schema.attr(0) == a1 - assert schema.attr("a2") == a2 - assert schema.allows_duplicates is True - - assert schema.domain.dim("d1").filters == coords_filters - assert schema.domain.dim("d2").filters == coords_filters - - with pytest.warns( - DeprecationWarning, - match="coords_filters is deprecated; set the FilterList for each dimension", - ): - schema2 = tiledb.ArraySchema( - domain=domain, - attrs=(a1, a2), - capacity=10, - cell_order="col-major", - tile_order="row-major", - allows_duplicates=True, - sparse=True, - coords_filters=coords_filters, - offsets_filters=offsets_filters, - validity_filters=validity_filters, - ) - assert schema == schema2 - - # test iteration over attributes - assert list(schema) == [a1, a2] - - with self.assertRaisesRegex( - tiledb.TileDBError, - "Cannot set tile order; Hilbert order is not applicable to tiles", - ): - tiledb.ArraySchema( - domain=domain, attrs=(a1,), sparse=True, tile_order="hilbert" - ) - - def test_sparse_schema_filter_list(self, capfd): - # create dimensions - d1 = tiledb.Dim("d1", domain=(1, 1000), tile=10, dtype="uint64") - d2 = tiledb.Dim("d2", domain=(101, 10000), tile=100, dtype="uint64") - - # create domain - domain = tiledb.Domain(d1, d2) - - # create attributes - a1 = tiledb.Attr("a1", dtype="int32,int32,int32") - filter_list = tiledb.FilterList([tiledb.GzipFilter()]) - a2 = tiledb.Attr("a2", filters=filter_list, dtype="float32") - - off_filters_pylist = [tiledb.ZstdFilter(level=10)] - off_filters = tiledb.FilterList(filters=off_filters_pylist, chunksize=2048) - - coords_filters_pylist = [tiledb.Bzip2Filter(level=5)] - coords_filters = tiledb.FilterList( - filters=coords_filters_pylist, chunksize=4096 - ) - - validity_filters_pylist = [tiledb.GzipFilter(level=9)] - validity_filters = tiledb.FilterList( - filters=validity_filters_pylist, chunksize=1024 - ) - - # create sparse array with schema - with pytest.warns( - DeprecationWarning, - match="coords_filters is deprecated; set the FilterList for each dimension", - ): - schema = tiledb.ArraySchema( - domain=domain, - attrs=(a1, a2), - capacity=10, - cell_order="col-major", - tile_order="row-major", - coords_filters=coords_filters, - offsets_filters=off_filters, - validity_filters=validity_filters, - sparse=True, - ) - self.assertTrue(schema.sparse) - - assert len(schema.coords_filters) == 0 - - assert len(schema.domain.dim("d1").filters) == 1 - assert schema.domain.dim("d1").filters[0] == tiledb.Bzip2Filter(level=5) - assert schema.domain.dim("d2").filters[0] == tiledb.Bzip2Filter(level=5) - - assert len(schema.offsets_filters) == 1 - assert schema.offsets_filters[0] == tiledb.ZstdFilter(level=10) - - assert len(schema.validity_filters) == 1 - assert schema.validity_filters[0] == tiledb.GzipFilter(level=9) - - schema.dump() - assert_captured(capfd, "Array type: sparse") - - # make sure we can construct ArraySchema with python lists of filters - with pytest.warns( - DeprecationWarning, - match="coords_filters is deprecated; set the FilterList for each dimension", - ): - schema2 = tiledb.ArraySchema( - domain=domain, - attrs=(a1, a2), - capacity=10, - cell_order="col-major", - tile_order="row-major", - coords_filters=coords_filters_pylist, - offsets_filters=off_filters, - validity_filters=validity_filters, - sparse=True, - ) - assert len(schema2.coords_filters) == 0 - - assert schema.domain.dim("d1").filters == coords_filters_pylist - assert schema.domain.dim("d2").filters == coords_filters_pylist - - assert len(schema2.domain.dim("d1").filters) == 1 - assert schema2.domain.dim("d1").filters[0] == tiledb.Bzip2Filter(level=5) - assert schema2.domain.dim("d2").filters[0] == tiledb.Bzip2Filter(level=5) - - assert len(schema2.offsets_filters) == 1 - assert schema2.offsets_filters[0] == tiledb.ZstdFilter(level=10) - - assert len(schema2.validity_filters) == 1 - assert schema2.validity_filters[0] == tiledb.GzipFilter(level=9) - - def test_none_filter_list(self): - with self.assertRaises(ValueError): - tiledb.FilterList([None]) - - with self.assertRaises(ValueError): - fl = tiledb.FilterList() - fl.append(None) - - def test_mixed_string_schema(self): - path = self.path("test_mixed_string_schema") - - dims = [ - tiledb.Dim(name="dpos", domain=(-100.0, 100.0), tile=10, dtype=np.float64), - tiledb.Dim(name="str_index", tile=None, dtype=np.bytes_), - ] - dom = tiledb.Domain(*dims) - attrs = [tiledb.Attr(name="val", dtype=np.float64)] - - schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=True) - - self.assertTrue(schema.domain.has_dim("str_index")) - self.assertFalse(schema.domain.has_dim("nonono_str_index")) - self.assertTrue(schema.domain.dim("str_index").isvar) - self.assertFalse(schema.domain.dim("dpos").isvar) - self.assertEqual(schema.domain.dim("dpos").dtype, np.double) - self.assertEqual(schema.domain.dim("str_index").dtype, np.bytes_) - self.assertFalse(schema.domain.homogeneous) - - tiledb.Array.create(path, schema) - with tiledb.open(path, "r") as arr: - assert_array_equal(arr[:]["str_index"], np.array([], dtype="|S1")) - - def test_schema_dump(self, capfd): - dom = tiledb.Domain( - tiledb.Dim(name="x", domain=(0, 99), tile=100, dtype=np.int64) - ) - schema = tiledb.ArraySchema( - domain=dom, sparse=True, attrs=[tiledb.Attr(name="a", dtype=str)] - ) - - schema.dump() - - assert_captured(capfd, "Array type: sparse") diff --git a/tiledb/tests/test_attribute.py b/tiledb/tests/test_attribute.py deleted file mode 100644 index 63965fa461..0000000000 --- a/tiledb/tests/test_attribute.py +++ /dev/null @@ -1,313 +0,0 @@ -import sys -import xml.etree.ElementTree - -import numpy as np -import pytest -from numpy.testing import assert_array_equal - -import tiledb - -from .common import DiskTestCase, assert_captured, has_pandas - - -class AttributeTest(DiskTestCase): - def test_minimal_attribute(self): - attr = tiledb.Attr() - self.assertEqual(attr, attr) - self.assertTrue(attr.isanon) - self.assertEqual(attr.name, "") - self.assertEqual(attr.dtype, np.float64) - self.assertFalse(attr.isvar) - self.assertFalse(attr.isnullable) - - try: - assert xml.etree.ElementTree.fromstring(attr._repr_html_()) is not None - except: - pytest.fail(f"Could not parse attr._repr_html_(). Saw {attr._repr_html_()}") - - def test_attribute_name_only(self, capfd): - attr = tiledb.Attr("foo") - - attr.dump() - assert_captured(capfd, "Name: foo") - - assert attr == attr - assert attr.name == "foo" - assert attr.dtype == np.float64, "default attribute type is float64" - - @pytest.mark.parametrize( - "dtype, fill", - [ - (np.dtype(bytes), b"abc"), - (str, "defg"), - (np.float32, np.float32(0.4023573667780681)), - (np.float64, np.float64(0.0560602549760851)), - (np.dtype("M8[ns]"), np.timedelta64(11, "ns")), - (np.dtype([("f0", "@\x00\x00\x00\x00\x00\x00$@", - # representation of POLYGON ((3 1, 4 5, 2 2, 1 2, 3 1)) - ( - b"\x01\x03\x00\x00\x00\x01\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08@" - b"\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x10@\x00\x00\x00\x00\x00\x00\x14@" - b"\x00\x00\x00\x00\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x00@\x00\x00\x00\x00\x00\x00\xf0?" - b"\x00\x00\x00\x00\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x08@\x00\x00\x00\x00\x00\x00\xf0?" - ), - ], - ) - - dom = tiledb.Domain(tiledb.Dim(domain=(0, 1), tile=2)) - att = tiledb.Attr(dtype="wkb", var=True) - - schema = tiledb.ArraySchema(dom, (att,)) - - tiledb.DenseArray.create(self.path("foo"), schema) - with tiledb.DenseArray(self.path("foo"), mode="w") as T: - T[:] = A - - # read back the data - with tiledb.DenseArray(self.path("foo"), mode="r") as T: - for i in range(2): - assert_array_equal(T[:][i].tobytes(), A[i]) diff --git a/tiledb/tests/test_basic_import.py b/tiledb/tests/test_basic_import.py deleted file mode 100644 index 201c52d7c7..0000000000 --- a/tiledb/tests/test_basic_import.py +++ /dev/null @@ -1,47 +0,0 @@ -import ast -import subprocess -import sys - -from packaging.version import Version - - -def tiledb_cloud_eagerly_imports_pandas() -> bool: - try: - import pandas - - import tiledb.cloud - except ImportError: - # Can't import something that's not installed. - return False - if Version(tiledb.cloud.__version__) < Version("0.10.21"): - # Old versions of tiledb-cloud will import Pandas eagerly. - return True - if Version(pandas.__version__) < Version("1.5"): - # If an old version of Pandas is installed, tiledb-cloud needs to - # import it eagerly to patch it. - return True - return False - - -def test_dont_import_pandas() -> None: - """Verifies that when we import TileDB, we don't import Pandas eagerly.""" - - # We import tiledb.cloud within tiledb-py, if available, in order to hook - # Array.apply and other functionality. If the version of tiledb-cloud - # we have installed would import Pandas eagerly on its own, we need to - # suppress its importation. - suppress_cloud = ( - "sys.modules['tiledb.cloud'] = None;" - if tiledb_cloud_eagerly_imports_pandas() - else "" - ) - # Get a list of all modules from a completely fresh interpreter. - all_mods_str = subprocess.check_output( - ( - sys.executable, - "-c", - f"import sys; {suppress_cloud} import tiledb; print(list(sys.modules))", - ) - ) - all_mods = ast.literal_eval(all_mods_str.decode()) - assert "pandas" not in all_mods diff --git a/tiledb/tests/test_cloud.py b/tiledb/tests/test_cloud.py deleted file mode 100644 index 8b75d0d833..0000000000 --- a/tiledb/tests/test_cloud.py +++ /dev/null @@ -1,74 +0,0 @@ -import datetime -import os -import random -import string - -import numpy as np -import pytest - -import tiledb -from tiledb.tests.common import DiskTestCase - -tiledb_token = os.getenv("TILEDB_TOKEN") -tiledb_namespace = os.getenv("TILEDB_NAMESPACE") -s3_bucket = os.getenv("S3_BUCKET") - - -@pytest.mark.skipif( - os.getenv("CI") == None - or tiledb_token == None - or tiledb_namespace == None - or s3_bucket == None, - reason="No token was provided in a non-CI environment. Please set the TILEDB_TOKEN environment variable to run this test.", -) -class CloudTest(DiskTestCase): - def test_save_and_open_array_from_cloud(self): - config = tiledb.Config({"rest.token": tiledb_token}) - ctx = tiledb.Ctx(config=config) - - # Useful to include the datetime in the array name to handle multiple consecutive runs of the test. - # Random letters are added to the end to ensure that conflicts are avoided, especially in CI environments where multiple tests may run in parallel. - array_name = ( - datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S") - + "-" - + "".join(random.choice(string.ascii_letters) for _ in range(5)) - ) - uri = f"tiledb://{tiledb_namespace}/s3://{s3_bucket}/{array_name}" - - with tiledb.from_numpy(uri, np.random.rand(3, 2), ctx=ctx) as T: - self.assertTrue(tiledb.array_exists(uri, ctx=ctx)) - self.assertTrue( - T.schema - == tiledb.ArraySchema( - domain=tiledb.Domain( - tiledb.Dim( - name="__dim_0", - domain=(0, 2), - tile=3, - dtype="uint64", - filters=tiledb.FilterList([tiledb.ZstdFilter(level=-1)]), - ), - tiledb.Dim( - name="__dim_1", - domain=(0, 1), - tile=2, - dtype="uint64", - filters=tiledb.FilterList([tiledb.ZstdFilter(level=-1)]), - ), - ), - attrs=[ - tiledb.Attr( - name="", - dtype="float64", - var=False, - nullable=False, - enum_label=None, - ), - ], - cell_order="row-major", - tile_order="row-major", - sparse=False, - ) - ) - - tiledb.Array.delete_array(uri, ctx=ctx) diff --git a/tiledb/tests/test_compat.py b/tiledb/tests/test_compat.py deleted file mode 100644 index 1474a7c3fc..0000000000 --- a/tiledb/tests/test_compat.py +++ /dev/null @@ -1,185 +0,0 @@ -import base64 -import io -import tarfile - -import numpy as np -import pytest -from numpy.testing import assert_array_equal - -import tiledb - -from .common import DiskTestCase - - -# This test writes to local filesystem, skip -# TODO: unskip if we support transparent file ops on a VFS -@pytest.mark.skipif( - pytest.tiledb_vfs != "file", reason="Do not run compat test against non-file VFS" -) -class TestBackwardCompatibility(DiskTestCase): - def test_compat_tiledb_py_0_5_anon_attr_dense(self): - # array written with the following script: - """ - import tiledb, numpy as np - dom = tiledb.Domain(tiledb.Dim(name="d", domain=(0, 0), tile=1, dtype=np.uint8)) - attrs = (tiledb.Attr(name="_attr_", dtype=np.uint8),) - schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=False) - path = "py0.5.9-test" - tiledb.DenseArray.create(path, schema) - with tiledb.open(path, "w") as A: - A[0] = 1 - """ - # save and print tgz of array directory: - # f = open("/tmp/py0.5.9-testa2.tgz",'rb').read() - # s = base64.encodebytes(f) - # print(f"{s.decode():>32}") - - array_tgz = b"""H4sIADjvS2AAA+2YzW4TMRCA7fIX0SJVFdz9AAg8XtubvbR9AF6gEpLjJg4FmgRttwJuReKAuFFe - oUcO9A165NJ7jxWPwBOwXq3RZgnNtmkiBPNJ2bEnY89uRjMZrzGgQal2ArFUXNZm0sa8D7GL2tpJ - SKIk6XIFTiVxlIg4UY9JEzjnMeeskFoVkpfzAAPJhYh1LLVmXIDgQJhqtPuM7O9lNs1v5flwlGaj - 4R/tXu84t3vBPuMPxa79PueEmS3+xvRT+2zghpkZuMz2bGYfZb3tcR9T4g8AuhZ/paOYML6IH+A/ - j//N/KPL8b2go+HbteJKiVfQW/5SjCr23mK1nNOK7g3t9jqd86Vtzfr59JCseU+hXoQVTT15++Wa - p6DznjbzFYwsoYtLuPi1Y2X8gFzMi1KelpKXCz/TSdbI38/M9d9mWfp7yR9j6v+/ULX6H4GUWP8X - Aa1IWtMh/z55AqepfWv2ujtuMKF3uw6m5b+AWv6DiiTH/F8EvhPYKsdPg65hs+Ht/Rmt2mwEXd5s - WHKD7rdOT05a71dWnnxh3zdWOx+/vrt/8Oruh9twdtBeXz8+Omo9vPPJdQj58W15Y47PiUzGmN1R - 9+V88j5w6fM/RFoIzP9FYIpze7P3OFflCvGHSOL7HwRBEARBEARBEARBEARBkFn4CRFQSoEAKAAA""" - - path = self.path("tiledb_py_0_6_anon_attr") - with tarfile.open(fileobj=io.BytesIO(base64.b64decode(array_tgz))) as tf: - try: - tf.extractall(path, filter="fully_trusted") - except TypeError: - tf.extractall(path) - - with tiledb.open(path) as A: - self.assertEqual(A.schema.attr(0).name, "") - self.assertEqual(A.schema.attr(0)._internal_name, "__attr") - self.assertEqual(A[0], 1) - mres = A.multi_index[0] - self.assertEqual(mres[""], 1) - - qres = A.query(coords=True).multi_index[0] - self.assertEqual(qres["d"], 0) - - def test_compat_py_0_5_anon_attr_sparse(self): - # This array was written with TileDB-Py 0.5.9: - # - using the invocation below, followed by - """ - tiledb.Array.create("path", tiledb.ArraySchema( - domain=tiledb.Domain(*[ - tiledb.Dim(name='d', domain=(0, 2), tile=2, dtype='uint64'),]), - attrs=[tiledb.Attr(name='', dtype='int64'),], sparse=True,)) - with tiledb.open("path", 'w') as A: - A[[0,1,2]] = np.array([1.0,2.0,5.0]) - """ - # - followed by `tar czf array.tgz -C path` - # - followed by `base64.encodebytes(open("sp6.tgz", 'rb').read())` - test_array = b"""H4sIANDnmV8AA+2Xz2vUQBTHJ6mLlnpYBGkRD0EQBGV3ZpLJdBFk9bBnj3pKJpvESrsbmo2otyoI - Pe/JSy9ePXnwruJBPPYv0P4VRRDNhAxm07o/dBN6eJ9lMpmXSd6Eb96bt602qhyMMcfYyHqbZT3O - xwqDmNyyzfRnWwYmFDOCDFb90hB6MkpEnC7l8TCKk2j413lPt4JgZ8pzJl/KWPo6K6LVdpxBkIgq - P4OF9Gck1d+kHPSvBan/TtTfbiW+V5WPf9CfM44MXNWCioD+johj8dwZ9beCgajiO5ilP6V2SX9m - cdC/Fs6lTQm+q2yaunopO2pIGrSGPGRnhfl30tbMx1rB9kzrC9d1fbd5//yh++HCEcXvXu7/6qJx - 7/J3fffuZmP/497qgTYOVo6Ojz+Px9d6zfU3r15o6O322r0q3xgoIuOf2NjsULJppVHHSiOPh6Hn - 9ZnAWFicsk4YspCEOOAd7jFO56kbFq7/KCXEhv2/Dv5bf8cJY/FoEAyTrI70RXJiD5mhPyEWKelv - M0Yh/9eBzP+38/PryjZn/pfz19Fk/le2NP/7rvtNFz1D+/Rlb/WrhvQf6Ip0p1KGum1ed3L+Wsmd - skl33fQOA+ngYgEXf9ALkyUreX8r77vodKK8P8x7lj/gtXbabOCMsYT8L5Iknvq3Yeb+z6xS/rew - bUL+rwMVpRt5K9pUSmjUuiKgTpYQ//0oiv3RlAwwK/7JifrfMjnUf7VQjP+raLJmULYb79s/jY0D - hB6kdpUUdHTz4cWspAAAAAAAAAAAAAAA4IzzG7vsp0oAKAAA""" - - path = self.path("test_tiledb_py_0_5_anon_attr_sparse") - with tarfile.open(fileobj=io.BytesIO(base64.b64decode(test_array))) as tf: - try: - tf.extractall(path, filter="fully_trusted") - except TypeError: - tf.extractall(path) - - with tiledb.open(path) as A: - assert_array_equal(A[:][""], np.array([1.0, 2.0, 5.0])) - - def test_tiledb_py_0_6_anon_attr(self): - # same creation steps as above for 0.5 - tgz_sparse = b"""H4sIAJKNpWAAA+2aPW/TQBjHz2nTFlGJClUoAxIuA0ICpXf2vdhbGWBiYEIgihI7MRT1JVKairKh - qgNfgA2kDnwFVga+ABtfgE8AEwsS5/ROTUzBjWpbKv3/JPexLxc/l/59zz3PJc1lUjqUUiWEO7Ty - 0GqsPbxgnArmUymk71LmUc6JK8ofGiE724Oor4fyYqvXH/S2/trv5VqSbPzjPuMfyi18nCXRXG61 - NpNBVOZjMIH+XEip9fc9xaB/FaT6b/Q6681BNy7Lh/5/SM4n0l8JPf9pWQMaBfq3on4/etXa7qwl - m1EZz0Ge/p6X1V9wKaF/FdT1sWrOXxs77dhXLw//OiRtcNKuzvBspH+gjwVz7Yy07TqdhNTuzcw4 - OwtT0407qzM3Hi58vzZH7678cN99rl9f2ji40JZ77T0Wzb+JD/rdp8SZnfta2gcFx5LOfyY9xqXn - ByoIVeYqDJMu44GOyGHCeRIGKuHCF1HsRRGLaacl8jOHifM/z2M+8r9KOL3+zd56jo8J1n+rPxcC - 8b8KjvRnvlSh8rJXcRJ2Euor7gne8XgsJdVPhAoSFXZFogrWX6//aqg/p9C/Ck6vf6Hx3+rPmEL8 - r4IC9G+1nvWj55vJ1mC4k9CNBpkqImf+a7VFRn8phI/5XwVpUh+Yc9fYk+b/af9FMp7/27Zd51vc - brf3Y7c+e//BFeJ8IJfSG9hoYd9zUl9p/4sZX7ZN1xrdlXrquwYXcAEXx7s4ojbSOWXK2NtknBVy - Mmxc/GKsZ2781tifxj4xjj8Zu2Qc79sBgKopYP3v5u0Z5uX/7I/8z6ce9n8rwYaAhj6ukvE4Yttu - flz+5TbeE/JIt9vYUSO3Hs8Pwww4wxQw/3O/Msit/wXP1n9Sof6vhNH538i02ak+njyA/4kC9v+L - rP/N/q8UmP/VgPofLuDiXLg4AvU/MBSw/hdZ/5v1XxcCDOt/FaD+P98UMP+LrP/t7z8Uxe8/KgH1 - PwAAAAAAAAAAAAAAAAAAAAAAAHD2+Q18oX51AFAAAA==""" - - path = self.path("0_6_anon_sparse") - with tarfile.open(fileobj=io.BytesIO(base64.b64decode(tgz_sparse))) as tf: - try: - tf.extractall(path, filter="fully_trusted") - except TypeError: - tf.extractall(path) - - with tiledb.open(path) as A: - if A.schema.sparse: - assert_array_equal(A[:][""], np.array([1.0, 2.0, 5.0])) - - ########################################################################################### - # This test checks that anonymous attributes internally stored as "__attr" are presented - # as "". - # The following steps were run under TileDB-Py 0.6 - # Normally, we can't actually write an attribute named "__attr" anymore, so - # restored a schema written by a patched libtiledb, and rename the attr file. - - # schema_data = b"\x05\x00\x00\x00]\x00\x00\x00\x00\x00\x00\x00q\x00\x00\x00\x00\x00\x00\x00\x04\x01\x00\x00\x00\x00\x00\x00\x00\x00\x12\x00\x00\x00\x00\x00\x01\x00\x01\x00\x00\x00\x01\x05\x00\x00\x00\x01\x01\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00q\x00\x00\x009\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00q\x00\x00\x009\x00\x00\x00x\x01ce\x80\x00\x01u(\x83\x81\x11\x08\x19\x18\x98XA\xc4\x7f `\xc0\x10\x01\xc9\x83p\n\x1b\x88\x84\xb0\x81\x8a\xc1l\x88\x00H\x9c\r\x88\xe3\xe3\x13KJ\x8aP\x94\x01\x00\xa2c\x0bD" - - # path = self.path("tiledb_py_0_6_anon_attr") - # ctx = tiledb.default_ctx() - # dom = tiledb.Domain(tiledb.Dim(name="d", domain=(0, 0), tile=1, dtype=np.uint8)) - # attrs = (tiledb.Attr(name="_attr_", dtype=np.uint8, ctx=ctx),) - - # schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=False, ctx=ctx) - # tiledb.DenseArray.create(path, schema, ctx=ctx) - - # with tiledb.open(path, "w") as A: - # A[0] = 1 - - # fragment_name = os.path.split(list(A.last_write_info.keys())[0])[-1] - # fragment_path = os.path.join(path, fragment_name) - - ## fix up the array the override schema - # with open(os.path.join(path, "__array_schema.tdb"), "wb") as f: - # f.write(schema_data) - - # shutil.move( - # os.path.join(fragment_path, "_attr_.tdb"), - # os.path.join(fragment_path, "__attr.tdb"), - # ) - - tgz_dense = b"""H4sIAL6RpWAAA+2YPW/TQBjH71qQKiKkAEIqYvEIS3p3uRd5A4kB0QUxdUHm/AJFzQu4rlrUoa3K - EFWMDB2Y+AQs7CAhJD5HPgBfgXNyRq4pdVNyHtDzk5z/3fni55y/L8+TdFaQcwghSghvonKqhkKn - HcqJoF1KpOx6hDLCCfKE+6UhtLWZ6dQs5eVgmGbDwV/nba8nSe+M65y8KW/u63REZyUI+kmmXT4G - s/vfZZKD/02Q+98bRhudLA5dxTCfh+R8Jv8VV8gjrhZUBvwPdJrqN8FmtJ70tYvnoM5/xmjFf8El - A/+b4LI5ntr2a6uXcHH2+uQVo3wA51PxpFWa75ujbfu4NLaDo2Qf4a07hwfXlm4tH6/d/7bnPfvS - xj8OX125PXr76eDoa2+EHn64OhqPb6w+Onr8HqOPUeuBy5sF/iDf/1QyymWXK6GYqvS4r3gcR2Gi - lc9JSLTvKxVqbRK6r0jsB6Iz3KiJMfP3P2OCwf5vhH/3v75ynLn+Y4wRCvVfE8zB/yB4nuoX/WSQ - TX5JxDqrVBE1+59RKSv+S8lh/zdCntSLHbxk9bz5P5/fQifzfzG2g8fhvtE11CqHKKaeN0T7lBDF - mCkx4nvmHR5agBAQAkKcHuL3FUvtm+hiRFa/W71rL/jO6k+rTxam+tnq8uJUdxcvGBhwxFzyv86y - 9Iw/DmrrfyYq+Z9TTiH/NwEuKa6MAQAAAAAAAAAAAADwf/ALzPk2VwAoAAA=""" - - path = self.path("0_6_anon_dense") - with tarfile.open(fileobj=io.BytesIO(base64.b64decode(tgz_dense))) as tf: - try: - tf.extractall(path, filter="fully_trusted") - except TypeError: - tf.extractall(path) - - with tiledb.open(path) as A: - self.assertEqual(A.schema.attr(0).name, "") - self.assertEqual(A.schema.attr(0)._internal_name, "__attr") - self.assertEqual(A[0], 1) - mres = A.multi_index[0] - self.assertEqual(mres[""], 1) - - qres = A.query(coords=True).multi_index[0] - self.assertEqual(qres["d"], 0) diff --git a/tiledb/tests/test_consolidation_plan.py b/tiledb/tests/test_consolidation_plan.py deleted file mode 100644 index b82190024f..0000000000 --- a/tiledb/tests/test_consolidation_plan.py +++ /dev/null @@ -1,53 +0,0 @@ -import json -import xml - -import numpy as np -import pytest - -import tiledb -from tiledb.tests.common import DiskTestCase - - -class ConsolidationPlanTest(DiskTestCase): - def test_consolidation_plan(self): - path = self.path("test_consolidation_plan") - - array = np.random.rand(4) - tiledb.from_numpy(path, array) - - with tiledb.open(path, "r") as A: - cons_plan = tiledb.ConsolidationPlan(tiledb.default_ctx(), A, 2) - assert cons_plan.num_nodes == 1 - assert cons_plan.num_nodes == len(cons_plan) - assert cons_plan.num_fragments(0) == 1 - # check that it has a nodes key - assert "nodes" in json.loads(cons_plan.dump()) - # check that it has a list of nodes - assert isinstance(json.loads(cons_plan.dump())["nodes"], list) - # check that each node has a uri key - for node in json.loads(cons_plan.dump())["nodes"]: - assert "uri" in node["uris"][0] - # test __repr__ - try: - assert ( - xml.etree.ElementTree.fromstring(cons_plan._repr_html_()) - is not None - ) - except: - pytest.fail( - f"Could not parse cons_plan._repr_html_(). Saw {cons_plan._repr_html_()}" - ) - # test __getitem__ - assert cons_plan[0] == { - "num_fragments": 1, - "fragment_uris": [cons_plan.fragment_uri(0, 0)], - } - - # write a second fragment to the array and check the new consolidation plan - with tiledb.open(path, "w") as A: - A[:] = np.random.rand(4) - - with tiledb.open(path, "r") as A: - cons_plan = tiledb.ConsolidationPlan(tiledb.default_ctx(), A, 4) - assert cons_plan.num_nodes == 1 - assert cons_plan.num_fragments(0) == 2 diff --git a/tiledb/tests/test_context_and_config.py b/tiledb/tests/test_context_and_config.py deleted file mode 100644 index 054338991c..0000000000 --- a/tiledb/tests/test_context_and_config.py +++ /dev/null @@ -1,263 +0,0 @@ -import os -import subprocess -import sys -import xml - -import pytest - -import tiledb - -from .common import DiskTestCase - - -# Wrapper to execute specific code in subprocess so that we can ensure the thread count -# init is correct. Necessary because multiprocess.get_context is only available in Python 3.4+, -# and the multiprocessing method may be set to fork by other tests (e.g. dask). -def init_test_wrapper(cfg=None): - python_exe = sys.executable - cmd = ( - f"from tiledb.tests.test_context_and_config import init_test_helper; " - f"init_test_helper({cfg})" - ) - test_path = os.path.dirname(os.path.abspath(__file__)) - - sp_output = subprocess.check_output([python_exe, "-c", cmd], cwd=test_path) - return int(sp_output.decode("UTF-8").strip()) - - -def init_test_helper(cfg=None): - tiledb.default_ctx(cfg) - concurrency_level = tiledb.default_ctx().config()["sm.io_concurrency_level"] - print(int(concurrency_level)) - - -class ContextTest(DiskTestCase): - def test_default_ctx(self): - ctx = tiledb.default_ctx() - self.assertIsInstance(ctx, tiledb.Ctx) - assert isinstance(ctx.config(), tiledb.libtiledb.Config) - - def test_default_ctx_errors(self): - config = tiledb.Config() - ctx = tiledb.Ctx(config=config) - - with pytest.raises(ValueError) as excinfo: - tiledb.default_ctx(ctx) - assert ( - "default_ctx takes in `tiledb.Config` object or dictionary with " - "config parameters." - ) == str(excinfo.value) - - def test_scope_ctx(self): - key = "sm.memory_budget" - ctx0 = tiledb.default_ctx() - new_config_dict = {key: 42} - new_config = tiledb.Config({key: 78}) - new_ctx = tiledb.Ctx({key: 61}) - - assert tiledb.default_ctx() is ctx0 - assert tiledb.default_ctx().config()[key] == "5368709120" - - with tiledb.scope_ctx(new_config_dict) as ctx1: - assert tiledb.default_ctx() is ctx1 - assert tiledb.default_ctx().config()[key] == "42" - with tiledb.scope_ctx(new_config) as ctx2: - assert tiledb.default_ctx() is ctx2 - assert tiledb.default_ctx().config()[key] == "78" - with tiledb.scope_ctx(new_ctx) as ctx3: - assert tiledb.default_ctx() is ctx3 is new_ctx - assert tiledb.default_ctx().config()[key] == "61" - assert tiledb.default_ctx() is ctx2 - assert tiledb.default_ctx().config()[key] == "78" - assert tiledb.default_ctx() is ctx1 - assert tiledb.default_ctx().config()[key] == "42" - - assert tiledb.default_ctx() is ctx0 - assert tiledb.default_ctx().config()[key] == "5368709120" - - def test_scope_ctx_error(self): - with pytest.raises(ValueError) as excinfo: - with tiledb.scope_ctx([]): - pass - assert ( - "scope_ctx takes in `tiledb.Ctx` object, `tiledb.Config` object, " - "or dictionary with config parameters." - ) == str(excinfo.value) - - @pytest.mark.skipif( - "pytest.tiledb_vfs == 's3'", reason="Test not yet supported with S3" - ) - @pytest.mark.filterwarnings( - # As of 0.17.0, a warning is emitted for the aarch64 conda builds with - # the messsage: - # : MADV_DONTNEED does not work (memset will be used instead) - # : (This is the expected behaviour if you are running under QEMU) - # This can be ignored as this is being run in a Docker image / QEMU and - # is therefore expected behavior - "ignore:This is the expected behaviour if you are running under QEMU" - ) - def test_init_config(self): - self.assertEqual( - int(tiledb.default_ctx().config()["sm.io_concurrency_level"]), - init_test_wrapper(), - ) - - self.assertEqual(3, init_test_wrapper({"sm.io_concurrency_level": 3})) - - -@pytest.mark.skipif( - "pytest.tiledb_vfs == 's3'", reason="Test not yet supported with S3" -) -class TestConfig(DiskTestCase): - def test_config(self): - config = tiledb.Config() - config["sm.memory_budget"] = 103 - assert repr(config) is not None - tiledb.Ctx(config) - - def test_ctx_config(self): - ctx = tiledb.Ctx({"sm.memory_budget": 103}) - config = ctx.config() - self.assertEqual(config["sm.memory_budget"], "103") - - def test_vfs_config(self): - config = tiledb.Config() - config["vfs.min_parallel_size"] = 1 - ctx = tiledb.Ctx() - self.assertEqual(ctx.config()["vfs.min_parallel_size"], "10485760") - vfs = tiledb.VFS(config, ctx=ctx) - self.assertEqual(vfs.config()["vfs.min_parallel_size"], "1") - - def test_config_iter(self): - config = tiledb.Config() - k, v = [], [] - for p in config.items(): - k.append(p[0]) - v.append(p[1]) - self.assertTrue(len(k) > 0) - - k, v = [], [] - for p in config.items("vfs.s3."): - k.append(p[0]) - v.append(p[1]) - self.assertTrue(len(k) > 0) - # Validate the prefix is not included - self.assertTrue("vfs.s3." not in k[0]) - - def test_config_bad_param(self): - config = tiledb.Config() - config["sm.foo"] = "bar" - ctx = tiledb.Ctx(config) - self.assertEqual(ctx.config()["sm.foo"], "bar") - - def test_config_unset(self): - config = tiledb.Config() - config["sm.memory_budget"] = 103 - del config["sm.memory_budget"] - # check that config parameter is default - self.assertEqual( - config["sm.memory_budget"], tiledb.Config()["sm.memory_budget"] - ) - - def test_config_from_file(self): - # skip: beacuse Config.load doesn't support VFS-supported URIs? - if pytest.tiledb_vfs == "s3": - pytest.skip( - "TODO need more plumbing to make pandas use TileDB VFS to read CSV files" - ) - - config_path = self.path("config") - with tiledb.FileIO(self.vfs, config_path, "wb") as fh: - fh.write("sm.memory_budget 100") - config = tiledb.Config.load(config_path) - self.assertEqual(config["sm.memory_budget"], "100") - - def test_ctx_config_from_file(self): - config_path = self.path("config") - vfs = tiledb.VFS() - with tiledb.FileIO(vfs, config_path, "wb") as fh: - fh.write("sm.memory_budget 100") - ctx = tiledb.Ctx(config=tiledb.Config.load(config_path)) - config = ctx.config() - self.assertEqual(config["sm.memory_budget"], "100") - - def test_ctx_config_dict(self): - ctx = tiledb.Ctx(config={"sm.memory_budget": "100"}) - config = ctx.config() - assert issubclass(type(config), tiledb.libtiledb.Config) - self.assertEqual(config["sm.memory_budget"], "100") - - def test_config_repr_sensitive_params_hidden(self): - # checks that the sensitive parameters set are not printed, - # sensitive parameters not set are printed as '', - # non-sensitive parameters set are printed as is, and - # non-sensitive parameters not set are not hidden (some are empty, some are default) - - unserialized_params_ = { - "vfs.azure.storage_account_name", - "vfs.azure.storage_account_key", - "vfs.azure.storage_sas_token", - "vfs.s3.proxy_username", - "vfs.s3.proxy_password", - "vfs.s3.aws_access_key_id", - "vfs.s3.aws_secret_access_key", - "vfs.s3.aws_session_token", - "vfs.s3.aws_role_arn", - "vfs.s3.aws_external_id", - "vfs.s3.aws_load_frequency", - "vfs.s3.aws_session_name", - "vfs.gcs.service_account_key", - "vfs.gcs.workload_identity_configuration", - "vfs.gcs.impersonate_service_account", - "rest.username", - "rest.password", - "rest.token", - } - - random_sensitive_params = { - "vfs.azure.storage_account_name": "myaccount", - "vfs.s3.aws_access_key_id": "myaccesskey", - "vfs.gcs.service_account_key": "myserviceaccountkey", - "rest.username": "myusername", - } - - random_non_sensitive_params = { - "rest.use_refactored_array_open": "false", - "rest.use_refactored_array_open_and_query_submit": "true", - "sm.allow_separate_attribute_writes": "true", - "sm.allow_updates_experimental": "false", - } - - config = tiledb.Config() - - for param, value in random_sensitive_params.items(): - config[param] = value - - for param, value in random_non_sensitive_params.items(): - config[param] = value - - # skip first two lines - for line in repr(config).split("\n")[2:]: - param, value = line.split("|") - # remove leading and trailing spaces - param = param.strip() - value = value.strip() - if param in unserialized_params_: - if param in random_sensitive_params: - self.assertEqual(value, "*" * 10) - else: - self.assertEqual(value, "''") - else: - if param in random_non_sensitive_params: - self.assertEqual(value, f"'{random_non_sensitive_params[param]}'") - else: - self.assertNotEqual(value, "*" * 10) - - def test_config_repr_html(self): - config = tiledb.Config() - try: - assert xml.etree.ElementTree.fromstring(config._repr_html_()) is not None - except: - pytest.fail( - f"Could not parse config._repr_html_(). Saw {config._repr_html_()}" - ) diff --git a/tiledb/tests/test_core.py b/tiledb/tests/test_core.py deleted file mode 100644 index 119687cf36..0000000000 --- a/tiledb/tests/test_core.py +++ /dev/null @@ -1,156 +0,0 @@ -import copy -import random - -import numpy as np -from numpy.testing import assert_array_equal - -import tiledb -import tiledb.main as core - -from .common import DiskTestCase, rand_ascii - - -class CoreCCTest(DiskTestCase): - def test_pyquery_basic(self): - ctx = tiledb.Ctx() - uri = self.path("test_pyquery_basic") - with tiledb.from_numpy(uri, np.random.rand(4)): - pass - - with tiledb.open(uri) as a: - with tiledb.scope_ctx({"py.init_buffer_bytes": "abcd"}) as testctx: - with self.assertRaises(ValueError): - core.PyQuery(testctx, a, ("",), (), 0, False) - - q = core.PyQuery(ctx, a, ("",), (), 0, False) - - try: - q._test_err("bad foo happened") - except Exception as exc: - assert isinstance(exc, tiledb.TileDBError) - assert str(exc) == "bad foo happened" - - with tiledb.open(uri) as a: - q2 = core.PyQuery(ctx, a, ("",), (), 0, False) - subarray = tiledb.Subarray(a) - subarray.add_ranges([[(0, 3)]]) - q2.set_subarray(subarray) - q2.submit() - res = q2.results()[""][0] - res.dtype = np.double - assert_array_equal(res, a[:]) - - def test_pyquery_init(self): - uri = self.path("test_pyquery_init") - intmax = np.iinfo(np.int64).max - config_dict = { - "sm.tile_cache_size": "100", - "py.init_buffer_bytes": str(intmax), - "py.alloc_max_bytes": str(intmax), - } - with tiledb.scope_ctx(config_dict) as ctx: - with tiledb.from_numpy(uri, np.random.rand(4)): - pass - - with tiledb.open(uri) as a: - q = core.PyQuery(ctx, a, ("",), (), 0, False) - self.assertEqual(q._test_init_buffer_bytes, intmax) - self.assertEqual(q._test_alloc_max_bytes, intmax) - - with self.assertRaisesRegex( - ValueError, - "Invalid parameter: 'py.alloc_max_bytes' must be >= 1 MB ", - ), tiledb.scope_ctx({"py.alloc_max_bytes": 10}) as ctx2: - q = core.PyQuery(ctx2, a, ("",), (), 0, False) - - def test_import_buffer(self): - uri = self.path("test_import_buffer") - - def_tile = 1 - if tiledb.libtiledb.version() < (2, 2): - def_tile = 2 - - dom = tiledb.Domain( - tiledb.Dim(domain=(0, 3), tile=def_tile, dtype=np.int64), - tiledb.Dim(domain=(0, 3), tile=def_tile, dtype=np.int64), - ) - attrs = [ - tiledb.Attr(name="", dtype=np.float64), - tiledb.Attr(name="foo", dtype=np.int32), - tiledb.Attr(name="str", dtype=str), - ] - schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=False) - tiledb.DenseArray.create(uri, schema) - - data_orig = { - "": 2.5 * np.identity(4, dtype=np.float64), - "foo": 8 * np.identity(4, dtype=np.int32), - "str": np.array( - [rand_ascii(random.randint(0, 5)) for _ in range(16)], dtype="U0" - ).reshape(4, 4), - } - - with tiledb.open(uri, "w") as A: - A[:] = data_orig - - with tiledb.open(uri) as B: - assert_array_equal(B[:][""], data_orig[""]), - assert_array_equal(B[:]["foo"], data_orig["foo"]) - - data_mod = { - "": 5 * np.identity(4, dtype=np.float64), - "foo": 32 * np.identity(4, dtype=np.int32), - "str": np.array( - [rand_ascii(random.randint(1, 7)) for _ in range(16)], dtype="U0" - ).reshape(4, 4), - } - - str_offsets = np.array( - [0] + [len(x) for x in data_mod["str"].flatten()[:-1]], dtype=np.uint64 - ) - str_offsets = np.cumsum(str_offsets) - - str_raw = np.array( - [ord(c) for c in "".join([x for x in data_mod["str"].flatten()])], - dtype=np.uint8, - ) - - data_mod_bfr = { - "": (data_mod[""].flatten().view(np.uint8), np.array([], dtype=np.uint64)), - "foo": ( - data_mod["foo"].flatten().view(np.uint8), - np.array([], dtype=np.uint64), - ), - "str": (str_raw.flatten().view(np.uint8), str_offsets), - } - - with tiledb.open(uri) as C: - res = C.multi_index[0:3, 0:3] - assert_array_equal(res[""], data_orig[""]) - assert_array_equal(res["foo"], data_orig["foo"]) - assert_array_equal(res["str"], data_orig["str"]) - - C._set_buffers(copy.deepcopy(data_mod_bfr)) - res = C.multi_index[0:3, 0:3] - assert_array_equal(res[""], data_mod[""]) - assert_array_equal(res["foo"], data_mod["foo"]) - assert_array_equal(res["str"], data_mod["str"]) - - with tiledb.open(uri) as D: - D._set_buffers(copy.deepcopy(data_mod_bfr)) - res = D[:, :] - assert_array_equal(res[""], data_mod[""]) - assert_array_equal(res["foo"], data_mod["foo"]) - assert_array_equal(res["str"], data_mod["str"]) - - with tiledb.DenseArray(uri, mode="r") as E, tiledb.scope_ctx() as ctx: - # Ensure that query only returns specified attributes - q = core.PyQuery(ctx, E, ("foo",), (), 0, False) - subarray = tiledb.Subarray(E, ctx) - subarray.add_ranges([[(0, 1)]]) - q.set_subarray(subarray) - q.submit() - r = q.results() - self.assertTrue("foo" in r) - self.assertTrue("str" not in r) - del q diff --git a/tiledb/tests/test_current_domain.py b/tiledb/tests/test_current_domain.py deleted file mode 100644 index 25a39f80be..0000000000 --- a/tiledb/tests/test_current_domain.py +++ /dev/null @@ -1,215 +0,0 @@ -import tempfile -import unittest - -import numpy as np -import pytest - -import tiledb -import tiledb.cc as lt - -if not (tiledb.libtiledb.version()[0] == 2 and tiledb.libtiledb.version()[1] >= 25): - pytest.skip( - "CurrentDomain is only available in TileDB 2.25 and later", - allow_module_level=True, - ) - - -class NDRectangleTest(unittest.TestCase): - def test_ndrectagle_standalone_string(self): - ctx = tiledb.Ctx() - dom = tiledb.Domain( - tiledb.Dim(name="d1", dtype="S0"), - tiledb.Dim(name="d2", dtype="S0"), - ) - ndrect = tiledb.NDRectangle(ctx, dom) - - range_one = ("a", "c") - range_two = ("b", "db") - - ndrect.set_range(0, range_one[0], range_one[1]) - ndrect.set_range(1, range_two[0], range_two[1]) - - self.assertEqual(ndrect.range(0), range_one) - self.assertEqual(ndrect.range(1), range_two) - - # should be the same - self.assertEqual(ndrect.range("d1"), range_one) - self.assertEqual(ndrect.range("d2"), range_two) - - def test_ndrectagle_standalone_integer(self): - ctx = tiledb.Ctx() - dom = tiledb.Domain( - tiledb.Dim(name="x", domain=(0, 100), tile=10, dtype=np.int64), - tiledb.Dim(name="y", domain=(0, 100), tile=10, dtype=np.int64), - ) - ndrect = tiledb.NDRectangle(ctx, dom) - - range_one = (10, 20) - range_two = (30, 40) - - ndrect.set_range(0, range_one[0], range_one[1]) - ndrect.set_range(1, range_two[0], range_two[1]) - - self.assertEqual(ndrect.range(0), range_one) - self.assertEqual(ndrect.range(1), range_two) - - # should be the same - self.assertEqual(ndrect.range("x"), range_one) - self.assertEqual(ndrect.range("y"), range_two) - - -class CurrentDomainTest(unittest.TestCase): - def test_current_domain_with_ndrectangle_integer(self): - ctx = tiledb.Ctx() - dom = tiledb.Domain( - tiledb.Dim(name="x", domain=(0, 100), tile=10, dtype=np.int64), - tiledb.Dim(name="y", domain=(0, 100), tile=10, dtype=np.int64), - ) - ndrect = tiledb.NDRectangle(ctx, dom) - - range_one = (10, 20) - range_two = (30, 40) - - ndrect.set_range(0, range_one[0], range_one[1]) - ndrect.set_range(1, range_two[0], range_two[1]) - - self.assertEqual(ndrect.range(0), range_one) - self.assertEqual(ndrect.range(1), range_two) - - current_domain = tiledb.CurrentDomain(ctx) - - self.assertTrue(current_domain.is_empty) - current_domain.set_ndrectangle(ndrect) - self.assertFalse(current_domain.is_empty) - - # let's try to get the NDRectangle back from the current domain object - rect = current_domain.ndrectangle - - range1 = rect.range(0) - range2 = rect.range(1) - - self.assertEqual(range1, range_one) - self.assertEqual(range2, range_two) - - range1 = rect.range("x") - range2 = rect.range("y") - - # should be the same - self.assertEqual(range1, range_one) - self.assertEqual(range2, range_two) - - def test_current_domain_with_ndrectangle_string(self): - ctx = tiledb.Ctx() - dom = tiledb.Domain( - tiledb.Dim(name="d1", dtype="S0"), - tiledb.Dim(name="d2", dtype="S0"), - ) - ndrect = tiledb.NDRectangle(ctx, dom) - - range_one = ("a", "c") - range_two = ("b", "db") - - ndrect.set_range(0, range_one[0], range_one[1]) - ndrect.set_range(1, range_two[0], range_two[1]) - - self.assertEqual(ndrect.range(0), range_one) - self.assertEqual(ndrect.range(1), range_two) - - current_domain = tiledb.CurrentDomain(ctx) - - self.assertTrue(current_domain.is_empty) - current_domain.set_ndrectangle(ndrect) - self.assertFalse(current_domain.is_empty) - - # let's try to get the NDRectangle back from the current domain object - rect = current_domain.ndrectangle - - range1 = rect.range(0) - range2 = rect.range(1) - - self.assertEqual(range1, range_one) - self.assertEqual(range2, range_two) - - range1 = rect.range("d1") - range2 = rect.range("d2") - - # should be the same - self.assertEqual(range1, range_one) - self.assertEqual(range2, range_two) - - def test_array_schema_with_current_domain_with_ndrectangle(self): - uri = tempfile.mkdtemp() - ctx = tiledb.Ctx() - dom = tiledb.Domain( - tiledb.Dim(name="d", domain=(1, 999), tile=2, dtype=np.int64), - tiledb.Dim(name="d2", domain=(1, 999), tile=2, dtype=np.int64), - ) - att = tiledb.Attr(name="a", dtype=np.int64) - schema = tiledb.ArraySchema(sparse=True, ctx=ctx, domain=dom, attrs=(att,)) - - ndrect = tiledb.NDRectangle(ctx, dom) - range_one = (10, 20) - range_two = (30, 40) - ndrect.set_range(0, range_one[0], range_one[1]) - ndrect.set_range(1, range_two[0], range_two[1]) - - current_domain = tiledb.CurrentDomain(ctx) - current_domain.set_ndrectangle(ndrect) - schema.set_current_domain(current_domain) - - # create the array - tiledb.Array.create(uri, schema) - - # open the array and check the current domain and the NDRectangle - A = tiledb.Array(uri, mode="r") - - cd = A.schema.current_domain - self.assertFalse(cd.is_empty) - self.assertEqual(cd.type, lt.CurrentDomainType.NDRECTANGLE) - - ndr = cd.ndrectangle - self.assertEqual(ndr.range(0), range_one) - self.assertEqual(ndr.range(1), range_two) - - # a 3rd dimension should raise an error - with self.assertRaises(tiledb.TileDBError): - ndr.range(2) - - A.close() - - def test_current_domain_evolve(self): - uri = tempfile.mkdtemp() - ctx = tiledb.Ctx() - dom = tiledb.Domain( - tiledb.Dim(name="d", domain=(1, 999), tile=2, dtype=np.int64), - ) - - att = tiledb.Attr(name="a", dtype=np.int64) - schema = tiledb.ArraySchema(sparse=True, ctx=ctx, domain=dom, attrs=(att,)) - - ndrect = tiledb.NDRectangle(ctx, dom) - range_one = (10, 20) - ndrect.set_range(0, range_one[0], range_one[1]) - - current_domain = tiledb.CurrentDomain(ctx) - current_domain.set_ndrectangle(ndrect) - schema.set_current_domain(current_domain) - - tiledb.Array.create(uri, schema) - - new_range = (5, 30) - new_ndrect = tiledb.NDRectangle(ctx, dom) - new_ndrect.set_range(0, new_range[0], new_range[1]) - new_current_domain = tiledb.CurrentDomain(ctx) - new_current_domain.set_ndrectangle(new_ndrect) - - se = tiledb.ArraySchemaEvolution(ctx) - se.expand_current_domain(new_current_domain) - se.array_evolve(uri) - - A = tiledb.Array(uri, mode="r") - s = A.schema - cd = s.current_domain - n = cd.ndrectangle - self.assertEqual(n.range(0), new_range) - A.close() diff --git a/tiledb/tests/test_dask.py b/tiledb/tests/test_dask.py deleted file mode 100644 index 4d27519023..0000000000 --- a/tiledb/tests/test_dask.py +++ /dev/null @@ -1,206 +0,0 @@ -import sys -import warnings -from datetime import datetime - -import numpy as np -import pytest - -import tiledb - -from .common import DiskTestCase - -# Skip this test if dask is unavailable -da_array = pytest.importorskip("dask.array") -da_distributed = pytest.importorskip("dask.distributed") - - -class TestDaskSupport(DiskTestCase): - def test_dask_from_numpy_1d(self): - uri = self.path("np_1attr") - A = np.random.randn(50, 50) - T = tiledb.from_numpy(uri, A, tile=50) - T.close() - - with tiledb.open(uri) as T: - D = da_array.from_tiledb(T) - np.testing.assert_array_equal(D, A) - - D2 = da_array.from_tiledb(uri) - np.testing.assert_array_equal(D2, A) - self.assertAlmostEqual( - np.mean(A), D2.mean().compute(scheduler="single-threaded") - ) - - def _make_multiattr_2d(self, uri, shape=(0, 100), tile=10): - dom = tiledb.Domain( - tiledb.Dim("x", (0, 10), dtype=np.uint64, tile=tile), - tiledb.Dim("y", (0, 50), dtype=np.uint64, tile=tile), - ) - schema = tiledb.ArraySchema( - attrs=(tiledb.Attr("attr1"), tiledb.Attr("attr2")), domain=dom - ) - - tiledb.DenseArray.create(uri, schema) - - @pytest.mark.filterwarnings("ignore:There is no current event loop") - def test_dask_multiattr_2d(self): - uri = self.path("multiattr") - - self._make_multiattr_2d(uri) - - with tiledb.DenseArray(uri, "w") as T: - ar1 = np.random.randn(*T.schema.shape) - ar2 = np.random.randn(*T.schema.shape) - T[:] = {"attr1": ar1, "attr2": ar2} - with tiledb.DenseArray(uri, mode="r", attr="attr2") as T: - # basic round-trip from dask.array - D = da_array.from_tiledb(T, attribute="attr2") - np.testing.assert_array_equal(ar2, np.array(D)) - - # smoke-test computation - # note: re-init from_tiledb each time, or else dask just uses the cached materialization - D = da_array.from_tiledb(uri, attribute="attr2") - self.assertAlmostEqual(np.mean(ar2), D.mean().compute(scheduler="threads")) - D = da_array.from_tiledb(uri, attribute="attr2") - self.assertAlmostEqual( - np.mean(ar2), D.mean().compute(scheduler="single-threaded") - ) - D = da_array.from_tiledb(uri, attribute="attr2") - self.assertAlmostEqual(np.mean(ar2), D.mean().compute(scheduler="processes")) - - # test dask.distributed - D = da_array.from_tiledb(uri, attribute="attr2") - with da_distributed.Client(): - np.testing.assert_approx_equal(D.mean().compute(), np.mean(ar2)) - - def test_dask_write(self): - uri = self.path("dask_w") - D = da_array.random.random(10, 10) - D.to_tiledb(uri) - DT = da_array.from_tiledb(uri) - np.testing.assert_array_equal(D, DT) - - def test_dask_overlap_blocks(self): - uri = self.path("np_overlap_blocks") - A = np.ones((2, 50, 50)) - T = tiledb.from_numpy(uri, A, tile=(1, 5, 5)) - T.close() - - with tiledb.open(uri) as T: - D = da_array.from_tiledb(T) - np.testing.assert_array_equal(D, A) - - D2 = da_array.from_tiledb(uri) - np.testing.assert_array_equal(D2, A) - - D3 = D2.map_overlap( - lambda x: x + 1, depth={0: 0, 1: 1, 2: 1}, dtype=A.dtype, boundary="none" - ).compute() - np.testing.assert_array_equal(D2 * 2, D3) - - def test_labeled_dask_overlap_blocks(self): - uri = self.path("np_labeled_overlap_blocks") - A = np.ones((2, 50, 50)) - - dom = tiledb.Domain( - tiledb.Dim(name="BANDS", domain=(0, 1), tile=1), - tiledb.Dim(name="Y", domain=(0, 49), tile=5, dtype=np.uint64), - tiledb.Dim(name="X", domain=(0, 49), tile=5, dtype=np.uint64), - ) - - schema = tiledb.ArraySchema( - domain=dom, - sparse=False, - attrs=[tiledb.Attr(name="TDB_VALUES", dtype=A.dtype)], - ) - - tiledb.DenseArray.create(uri, schema) - - with tiledb.open(uri, "w", attr="TDB_VALUES") as T: - T[:] = A - - D2 = da_array.from_tiledb(uri, attribute="TDB_VALUES") - - D3 = D2.map_overlap( - lambda x: x + 1, depth={0: 0, 1: 1, 2: 1}, dtype=D2.dtype, boundary="none" - ).compute() - np.testing.assert_array_equal(D2 + 1, D3) - - def test_labeled_dask_blocks(self): - uri = self.path("np_labeled_map_blocks") - A = np.ones((2, 50, 50)) - - dom = tiledb.Domain( - tiledb.Dim(name="BANDS", domain=(0, 1), tile=1), - tiledb.Dim(name="Y", domain=(0, 49), tile=5, dtype=np.uint64), - tiledb.Dim(name="X", domain=(0, 49), tile=5, dtype=np.uint64), - ) - - schema = tiledb.ArraySchema( - domain=dom, - sparse=False, - attrs=[tiledb.Attr(name="TDB_VALUES", dtype=A.dtype)], - ) - - tiledb.DenseArray.create(uri, schema) - with tiledb.open(uri, "w", attr="TDB_VALUES") as D1: - D1[:] = A - - D2 = da_array.from_tiledb(uri, attribute="TDB_VALUES") - - D3 = D2.map_blocks(lambda x: x + 1, dtype=D2.dtype).compute( - scheduler="processes" - ) - np.testing.assert_array_equal(D2 + 1, D3) - - -def test_sc33742_dask_array_object_dtype_conversion(): - # This test verifies that an array can be converted to buffer after serialization - # through several dask.distributed compute steps. The original source of the issue - # was that a `dtype == dtype("O")` check was returning false, presumably because the - # dtype object was not === after serialization. - import random - - import dask - import numpy as np - - @dask.delayed - def get_data(): - dd = dask.delayed( - lambda x=0: { - "Z": np.array( - [ - np.zeros((random.randint(60, 100),), np.dtype("float64")), - np.zeros((random.randint(1, 50),), np.dtype("float64")), - ], - dtype=np.dtype("O"), - ) - } - )() - return dask.delayed([dd]) - - @dask.delayed - def use_data(data): - f = dask.compute(data, traverse=True)[0][0] - - from tiledb import main - - main.array_to_buffer(f["Z"], True, False) - - # Various warnings are raised by dask.distributed in different Python versions and - # package combinations (eg Python 3.7 and older tornado), but they are not relevant to - # this test. - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - global client - client = da_distributed.Client( - da_distributed.LocalCluster(scheduler_port=9786, dashboard_address=9787) - ) - - w = [] - - data = dask.delayed(get_data)() - w.append(use_data(data)) - - futures = client.compute(w) - client.gather(futures) diff --git a/tiledb/tests/test_dimension.py b/tiledb/tests/test_dimension.py deleted file mode 100644 index e4701ff839..0000000000 --- a/tiledb/tests/test_dimension.py +++ /dev/null @@ -1,125 +0,0 @@ -import unittest -import xml.etree.ElementTree - -import numpy as np -import pytest - -import tiledb - - -class DimensionTest(unittest.TestCase): - def test_minimal_dimension(self): - dim = tiledb.Dim(domain=(0, 4), tile=5) - self.assertEqual(dim.name, "__dim_0", "automatic dimension name is incorrect") - self.assertEqual(dim.shape, (5,)) - self.assertEqual(dim.tile, 5) - self.assertEqual(dim, dim) - - def test_dimension(self): - dim = tiledb.Dim(name="d1", domain=(0, 3), tile=2) - self.assertEqual(dim.name, "d1") - self.assertEqual(dim.shape, (4,)) - self.assertEqual(dim.tile, 2) - self.assertEqual(dim, dim) - try: - assert xml.etree.ElementTree.fromstring(dim._repr_html_()) is not None - except: - pytest.fail(f"Could not parse dim._repr_html_(). Saw {dim._repr_html_()}") - - def test_dimension_filter(self): - filters = [tiledb.GzipFilter(2)] - dim = tiledb.Dim(name="df", domain=(0, 2), tile=1, filters=filters) - self.assertEqual(dim.filters, filters) - self.assertEqual(dim, dim) - - filter_list = tiledb.FilterList(filters) - dim = tiledb.Dim(name="df", domain=(0, 2), tile=1, filters=filter_list) - self.assertEqual(dim.filters, filter_list) - self.assertEqual(dim, dim) - - with self.assertRaises(TypeError): - tiledb.Dim(name="df", domain=(0, 2), tile=1, filters=1) - - def test_datetime_dimension(self): - # Regular usage - dim = tiledb.Dim( - name="d1", - domain=(np.datetime64("2010-01-01"), np.datetime64("2020-01-01")), - tile=np.timedelta64(20, "D"), - dtype=np.datetime64("", "D"), - ) - self.assertEqual(dim, dim) - self.assertEqual(dim.dtype, np.dtype(np.datetime64("", "D"))) - self.assertEqual(dim.tile, np.timedelta64(20, "D")) - self.assertNotEqual(dim.tile, np.timedelta64(21, "D")) - self.assertNotEqual(dim.tile, np.timedelta64(20, "W")) # Sanity check unit - self.assertTupleEqual( - dim.domain, (np.datetime64("2010-01-01"), np.datetime64("2020-01-01")) - ) - self.assertEqual(dim.shape, (3653,)) - - # No tile extent specified: this is not an error in 2.2 - if tiledb.libtiledb.version() < (2, 2): - with self.assertRaises(tiledb.TileDBError): - tiledb.Dim( - name="d1", - domain=(np.datetime64("2010-01-01"), np.datetime64("2020-01-01")), - dtype=np.datetime64("", "D"), - ) - - # Integer tile extent is ok - dim = tiledb.Dim( - name="d1", - domain=(np.datetime64("2010-01-01"), np.datetime64("2020-01-01")), - tile=20, - dtype=np.datetime64("", "D"), - ) - self.assertEqual(dim, dim) - self.assertEqual(dim.dtype, np.dtype(np.datetime64("", "D"))) - self.assertEqual(dim.tile, np.timedelta64(20, "D")) - - # Year resolution - dim = tiledb.Dim( - name="d1", - domain=(np.datetime64("2010"), np.datetime64("2020")), - tile=5, - dtype=np.datetime64("", "Y"), - ) - self.assertEqual(dim, dim) - self.assertEqual(dim.dtype, np.dtype(np.datetime64("", "Y"))) - self.assertEqual(dim.tile, np.timedelta64(5, "Y")) - self.assertTupleEqual( - dim.domain, (np.datetime64("2010", "Y"), np.datetime64("2020", "Y")) - ) - - # End domain promoted to day resolution - dim = tiledb.Dim( - name="d1", - domain=(np.datetime64("2010-01-01"), np.datetime64("2020")), - tile=2, - dtype=np.datetime64("", "D"), - ) - self.assertEqual(dim, dim) - self.assertEqual(dim.tile, np.timedelta64(2, "D")) - self.assertTupleEqual( - dim.domain, - (np.datetime64("2010-01-01", "D"), np.datetime64("2020-01-01", "D")), - ) - - # Domain values can't be integral - with self.assertRaises(TypeError): - dim = tiledb.Dim( - name="d1", domain=(-10, 10), tile=2, dtype=np.datetime64("", "D") - ) - - def test_shape(self): - dim = tiledb.Dim(name="", dtype="|S0", var=True) - with self.assertRaisesRegex( - TypeError, - "shape only valid for integer and datetime dimension domains", - ): - dim.shape - - @pytest.mark.xfail - def test_fail_on_0_extent(self): - tiledb.Dim(domain=(0, 10), tile=0) diff --git a/tiledb/tests/test_dimension_label.py b/tiledb/tests/test_dimension_label.py deleted file mode 100644 index bb7c98e5bd..0000000000 --- a/tiledb/tests/test_dimension_label.py +++ /dev/null @@ -1,485 +0,0 @@ -from collections import OrderedDict - -import numpy as np -import pytest - -import tiledb -from tiledb.tests.common import DiskTestCase - - -class DimensionLabelTestCase(DiskTestCase): - def test_dim_label_schema(self): - dim_label_schema1 = tiledb.DimLabelSchema( - "decreasing", label_dtype=np.float64, dim_dtype=np.int32 - ) - filter = tiledb.FilterList() - dim_label_schema2 = tiledb.DimLabelSchema( - "increasing", - label_dtype=np.float32, - dim_dtype=np.int64, - dim_tile=20, - label_filters=filter, - ) - - assert dim_label_schema1.label_order == "decreasing" - assert dim_label_schema1.label_dtype == np.float64 - assert dim_label_schema1.dim_dtype == np.int32 - assert dim_label_schema1.dim_tile is None - assert dim_label_schema1.label_filters is None - - assert dim_label_schema2.label_order == "increasing" - assert dim_label_schema2.label_dtype == np.float32 - assert dim_label_schema2.dim_dtype == np.int64 - assert dim_label_schema2.dim_tile == 20 - assert dim_label_schema2.label_filters == filter - - def test_dim_label_schema_from_dim(self): - dim = tiledb.Dim("dim", domain=(1, 10), dtype=np.int32, tile=10) - dim_label_schema3 = dim.create_label_schema("decreasing", np.int32, tile=2) - filter = tiledb.FilterList() - dim_label_schema2 = dim.create_label_schema( - order="increasing", dtype=np.float32, tile=5, filters=filter - ) - dim_label_schema1 = dim.create_label_schema("decreasing", np.float64, tile=None) - - assert dim_label_schema1.label_order == "decreasing" - assert dim_label_schema1.label_dtype == np.float64 - assert dim_label_schema1.dim_dtype == np.int32 - assert dim_label_schema1.dim_tile == 10 - assert dim_label_schema1.label_filters is None - - assert dim_label_schema3.label_order == "decreasing" - assert dim_label_schema3.label_dtype == np.int32 - assert dim_label_schema3.dim_dtype == np.int32 - assert dim_label_schema3.dim_tile == 2 - assert dim_label_schema3.label_filters is None - - assert dim_label_schema2.label_order == "increasing" - assert dim_label_schema2.label_dtype == np.float32 - assert dim_label_schema2.dim_dtype == np.int32 - assert dim_label_schema2.dim_tile == 5 - assert dim_label_schema2.label_filters == filter - assert dim.tile == 10 - - @pytest.mark.skipif( - tiledb.libtiledb.version()[0] == 2 and tiledb.libtiledb.version()[1] < 15, - reason="dimension labels requires libtiledb version 2.15 or greater", - ) - def test_add_to_array_schema(self): - dim = tiledb.Dim("dim", domain=(1, 10)) - dom = tiledb.Domain(dim) - att = tiledb.Attr("val", dtype=np.uint64) - filters = tiledb.FilterList([tiledb.ZstdFilter(10)]) - dim_labels = { - 0: { - "l1": tiledb.DimLabelSchema( - "increasing", - label_dtype=np.float64, - dim_dtype=dim.dtype, - dim_tile=10, - label_filters=filters, - ) - } - } - schema = tiledb.ArraySchema(domain=dom, attrs=(att,), dim_labels=dim_labels) - assert schema.has_dim_label("l1") - assert not schema.has_dim_label("fake_name") - - # Check the dimension label properties - dim_label = schema.dim_label("l1") - assert dim_label.dtype == np.float64 - assert not dim_label.isvar - assert not dim_label.isascii - - # Create array check values in dimension label schema - uri = self.path("array_with_label") - tiledb.Array.create(uri, schema) - - # Load the array schema for the dimension label - base_array_schema = tiledb.ArraySchema.load(uri) - dim_label = base_array_schema.dim_label("l1") - label_array_schema = tiledb.ArraySchema.load(dim_label.uri) - - # Chack the array schema for the dimension label - label_dim = label_array_schema.domain.dim(0) - assert label_dim.tile == 10 - assert label_dim.dtype == np.uint64 - label_attr = label_array_schema.attr(dim_label.label_attr_name) - assert label_attr.dtype == np.float64 - assert label_attr.filters == filters - - @pytest.mark.skipif( - tiledb.libtiledb.version()[0] == 2 and tiledb.libtiledb.version()[1] < 15, - reason="dimension labels requires libtiledb version 2.15 or greater", - ) - def test_add_to_array_schema_out_of_bounds(self): - dim = tiledb.Dim("label", domain=(1, 10)) - dom = tiledb.Domain(dim) - att = tiledb.Attr("val", dtype=np.uint64) - dim_labels = { - 2: { - "l1": tiledb.DimLabelSchema( - "increasing", label_dtype=dim.dtype, dim_dtype=dim.dtype - ) - } - } - - with pytest.raises(tiledb.TileDBError): - tiledb.ArraySchema(domain=dom, attrs=(att,), dim_labels=dim_labels) - - @pytest.mark.skipif( - tiledb.libtiledb.version()[0] == 2 and tiledb.libtiledb.version()[1] < 15, - reason="dimension labels requires libtiledb version 2.15 or greater", - ) - def test_add_to_array_schema_dim_dtype_mismatch(self): - dim = tiledb.Dim("label", domain=(1, 10)) - dom = tiledb.Domain(dim) - att = tiledb.Attr("val", dtype=np.uint64) - dim_labels = { - 2: { - "label": tiledb.DimLabelSchema( - "increasing", label_dtype=dim.dtype, dim_dtype=np.int32 - ) - } - } - - with pytest.raises(tiledb.TileDBError): - tiledb.ArraySchema(domain=dom, attrs=(att,), dim_labels=dim_labels) - - @pytest.mark.skipif( - tiledb.libtiledb.version()[0] == 2 and tiledb.libtiledb.version()[1] < 15, - reason="dimension labels requires libtiledb version 2.15 or greater", - ) - @pytest.mark.parametrize("var", [True, False]) - def test_dimension_label_round_trip_dense_array(self, var): - # Create array schema with dimension labels - dim = tiledb.Dim("d1", domain=(1, 10)) - dom = tiledb.Domain(dim) - att = tiledb.Attr("a1", dtype=np.int64) - dim_labels = {0: {"l1": dim.create_label_schema("increasing", np.int64)}} - if var: - dim_labels = {0: {"l1": dim.create_label_schema("increasing", np.bytes_)}} - schema = tiledb.ArraySchema(domain=dom, attrs=(att,), dim_labels=dim_labels) - - # Create array - uri = self.path("dense_array_with_label") - tiledb.Array.create(uri, schema) - - # Write data to the array and the label - attr_data = np.arange(1, 11) - label_data = np.arange(-9, 10, 2) - if var: - label_data = np.array( - [str(chr(ord("a") + c) * (10 - c)).encode("utf-8") for c in range(10)] - ) - with tiledb.open(uri, "w") as array: - array[:] = {"a1": attr_data, "l1": label_data} - - # Load the array schema and get the URI of the dimension label - schema = tiledb.ArraySchema.load(uri) - dim_label = schema.dim_label("l1") - - # Read and check the data directly from the dimension label - with tiledb.open(dim_label.uri, "r") as L1: - output_data = L1[:] - output_label_data = output_data[dim_label.label_attr_name] - np.testing.assert_array_equal(output_label_data, label_data) - - # Read and check the data using label indexer on parent array - with tiledb.open(uri, "r") as array: - indexer = array.label_index(["l1"]) - - # Read full array - result = indexer[label_data[0] : label_data[-1]] - - np.testing.assert_array_equal(result["a1"], attr_data) - np.testing.assert_array_equal(result["l1"], label_data) - - # Read each individual index - for index in range(10): - label_index = label_data[index] - result = indexer[label_index:label_index] - assert result["a1"][0] == attr_data[index] - assert result["l1"][0] == label_index - - for index in range(10): - label_index = label_data[index:] - result = indexer[label_index[0] : label_index[-1]] - np.testing.assert_array_equal(result["a1"], attr_data[index:]) - np.testing.assert_array_equal(result["l1"], label_index) - - @pytest.mark.skipif( - tiledb.libtiledb.version()[0] == 2 and tiledb.libtiledb.version()[1] < 15, - reason="dimension labels requires libtiledb version 2.15 or greater", - ) - @pytest.mark.parametrize("var", [True, False]) - def test_dimension_label_round_trip_multidim_dense_array(self, var): - # Create array schema with dimension labels - dim1 = tiledb.Dim("x_index", domain=(1, 8)) - dim2 = tiledb.Dim("y_index", domain=(1, 8)) - dom = tiledb.Domain(dim1, dim2) - att = tiledb.Attr("value", dtype=np.int64) - dim_labels = { - 0: { - "x1": dim1.create_label_schema( - "increasing", np.float64 if not var else "U" - ), - "x2": dim1.create_label_schema("decreasing", np.int64), - }, - 1: { - "y1": dim2.create_label_schema("increasing", np.int64), - }, - } - schema = tiledb.ArraySchema(domain=dom, attrs=(att,), dim_labels=dim_labels) - - # Create array - uri = self.path("dense_array_with_label") - tiledb.Array.create(uri, schema) - - # Write data to the array and the label - attr_data = np.reshape(np.arange(1, 65), (8, 8)) - x1_data = np.linspace(-1.0, 1.0, 8) - if var: - x1_data = np.array( - [str(chr(ord("a") + c - 1) * c).encode("utf-8") for c in range(1, 9)] - ) - x2_data = np.arange(8, 0, -1) - y1_data = np.arange(9, 17) - with tiledb.open(uri, "w") as array: - array[:, :] = { - "value": attr_data, - "x1": x1_data, - "y1": y1_data, - "x2": x2_data, - } - - # Test querying by label - with tiledb.open(uri, "r") as array: - # Read full array: labels on both ranges - result = array.label_index(["x1", "y1"])[x1_data[0] : x1_data[-1], 9:17] - np.testing.assert_array_equal(result["value"], attr_data) - np.testing.assert_array_equal(result["x1"], x1_data) - np.testing.assert_array_equal(result["y1"], y1_data) - assert "x2" not in result - - # Read full array: label only on first range - result = array.label_index(["x2"])[0:8] - np.testing.assert_array_equal(result["value"], attr_data) - np.testing.assert_array_equal(result["x2"], x2_data) - assert "x1" not in result - assert "y1" not in result - - # Read full array: Label only on second range - result = array.label_index(["y1"])[:, 9:17] - np.testing.assert_array_equal(result["value"], attr_data) - np.testing.assert_array_equal(result["y1"], y1_data) - assert "x1" not in result - assert "x2" not in result - - # Check conflicting labels are not allowed - with pytest.raises(tiledb.TileDBError): - array.label_index(["x1", "x2"]) - - @pytest.mark.skipif( - tiledb.libtiledb.version()[0] == 2 and tiledb.libtiledb.version()[1] < 15, - reason="dimension labels requires libtiledb version 2.15 or greater", - ) - @pytest.mark.parametrize("var", [True, False]) - def test_dimension_label_round_trip_sparse_array(self, var): - # Create array schema with dimension labels - dim = tiledb.Dim("index", domain=(1, 10)) - dom = tiledb.Domain(dim) - att = tiledb.Attr("value", dtype=np.int64) - dim_labels = { - 0: { - "l1": dim.create_label_schema( - "increasing", np.int64 if not var else "ascii" - ) - } - } - schema = tiledb.ArraySchema( - domain=dom, attrs=(att,), dim_labels=dim_labels, sparse=True - ) - - # Create array - uri = self.path("sparse_array_with_label") - tiledb.Array.create(uri, schema) - - # Write data to the array and the label - index_data = np.arange(1, 11) - attr_data = np.arange(11, 21) - label_data = np.arange(-10, 0) - if var: - label_data = np.array( - [str(chr(ord("a") + c) * (10 - c)).encode("utf-8") for c in range(10)] - ) - with tiledb.open(uri, "w") as array: - array[index_data] = {"value": attr_data, "l1": label_data} - - # Load the array schema and get the URI of the dimension label - schema = tiledb.ArraySchema.load(uri) - dim_label = schema.dim_label("l1") - - # Read and check the data directly from the dimension label - with tiledb.open(dim_label.uri, "r") as label1: - output_label_data = label1[:][dim_label.label_attr_name] - np.testing.assert_array_equal(output_label_data, label_data) - - @pytest.mark.skipif( - tiledb.libtiledb.version()[0] == 2 and tiledb.libtiledb.version()[1] < 15, - reason="dimension labels requires libtiledb version 2.15 or greater", - ) - def test_dimension_label_round_trip_dense_var(self): - # Create array schema with dimension labels - dims = [ - tiledb.Dim("d1", domain=(1, 10), dtype=np.int64), - tiledb.Dim("d2", domain=(1, 10), dtype=np.int64), - ] - dom = tiledb.Domain(*dims) - att = tiledb.Attr("value", var=True, dtype="S") - dim_labels = { - 0: { - "l1": dims[0].create_label_schema("increasing", np.float32), - }, - 1: { - "l2": dims[1].create_label_schema("decreasing", np.int32), - "l3": dims[1].create_label_schema("increasing", np.bytes_), - }, - } - - schema = tiledb.ArraySchema( - domain=dom, attrs=(att,), dim_labels=dim_labels, sparse=False - ) - - # Create array - uri = self.path("dense_array_with_var_label2") - tiledb.Array.create(uri, schema) - - # Write data to the array and the label - attr_data = np.array( - [ - [str(chr(ord("z") - c) * (10 - c)).encode("utf-8") for c in range(10)] - for i in range(10) - ] - ) - l1_data = np.arange(10, dtype=np.float32) - l2_data = np.arange(10, 0, -1, dtype=np.int32) - l3_data = np.array( - [str(chr(ord("a") + c) * (c + 1)).encode("utf-8") for c in range(10)] - ) - - with tiledb.open(uri, "w") as array: - array[:, :] = { - "value": attr_data, - "l1": l1_data, - "l2": l2_data, - "l3": l3_data, - } - - # Load the array schema and get the URI of the dimension label - schema = tiledb.ArraySchema.load(uri) - for label_name, label_data in { - "l1": l1_data, - "l2": l2_data, - "l3": l3_data, - }.items(): - dim_label = schema.dim_label(label_name) - # Read and check the data directly from the dimension label - with tiledb.open(dim_label.uri, "r") as label: - output_label_data = label[:][dim_label.label_attr_name] - np.testing.assert_array_equal(output_label_data, label_data) - - with tiledb.open(uri, "r") as array: - indexer = array.label_index([label_name]) - lower = min(label_data[0], label_data[-1]) - upper = max(label_data[0], label_data[-1]) - if label_name == "l1": - all_data = indexer[lower:upper] - else: - all_data = indexer[:, lower:upper] - np.testing.assert_array_equal(all_data[label_name], label_data) - np.testing.assert_array_equal(all_data["value"], attr_data) - - # Slice array with varying sizes. - for index in range(10): - label_index = label_data[index:] - lower = min(label_index[0], label_index[-1]) - upper = max(label_index[0], label_index[-1]) - if label_name == "l1": - result = indexer[lower:upper] - # Check against dim1 - np.testing.assert_array_equal( - result["value"], attr_data[index:, :] - ) - else: - result = indexer[:, lower:upper] - # Check against dim2 - np.testing.assert_array_equal( - result["value"], attr_data[:, index:] - ) - np.testing.assert_array_equal(result[label_name], label_index) - - @pytest.mark.skipif( - tiledb.libtiledb.version()[0] == 2 and tiledb.libtiledb.version()[1] < 15, - reason="dimension labels requires libtiledb version 2.15 or greater", - ) - def test_dimension_label_on_query(self): - uri = self.path("query_label_index") - - dim1 = tiledb.Dim("d1", domain=(1, 4)) - dim2 = tiledb.Dim("d2", domain=(1, 3)) - dom = tiledb.Domain(dim1, dim2) - att = tiledb.Attr("a1", dtype=np.int64) - dim_labels = { - 0: {"l1": dim1.create_label_schema("decreasing", np.int64)}, - 1: { - "l2": dim2.create_label_schema("increasing", np.int64), - "l3": dim2.create_label_schema("increasing", np.float64), - }, - } - schema = tiledb.ArraySchema(domain=dom, attrs=(att,), dim_labels=dim_labels) - tiledb.Array.create(uri, schema) - - a1_data = np.reshape(np.arange(1, 13), (4, 3)) - l1_data = np.arange(4, 0, -1) - l2_data = np.arange(-1, 2) - l3_data = np.linspace(0, 1.0, 3) - - with tiledb.open(uri, "w") as A: - A[:] = {"a1": a1_data, "l1": l1_data, "l2": l2_data, "l3": l3_data} - - with tiledb.open(uri, "r") as A: - np.testing.assert_equal( - A.query().label_index(["l1"])[3:4], - OrderedDict( - {"l1": np.array([4, 3]), "a1": np.array([[1, 2, 3], [4, 5, 6]])} - ), - ) - np.testing.assert_equal( - A.query().label_index(["l1", "l3"])[2, 0.5:1.0], - OrderedDict( - { - "l3": np.array([0.5, 1.0]), - "l1": np.array([2]), - "a1": np.array([[8, 9]]), - } - ), - ) - np.testing.assert_equal( - A.query().label_index(["l2"])[:, -1:0], - OrderedDict( - { - "l2": np.array([-1, 0]), - "a1": np.array([[1, 2], [4, 5], [7, 8], [10, 11]]), - }, - ), - ) - np.testing.assert_equal( - A.query().label_index(["l3"])[:, 0.5:1.0], - OrderedDict( - { - "l3": np.array([0.5, 1.0]), - "a1": np.array([[2, 3], [5, 6], [8, 9], [11, 12]]), - }, - ), - ) diff --git a/tiledb/tests/test_domain.py b/tiledb/tests/test_domain.py deleted file mode 100644 index 777b336508..0000000000 --- a/tiledb/tests/test_domain.py +++ /dev/null @@ -1,84 +0,0 @@ -import xml.etree.ElementTree - -import numpy as np -import pytest - -import tiledb - -from .common import DiskTestCase, assert_captured - - -class DomainTest(DiskTestCase): - def test_domain(self, capfd): - dims = [ - tiledb.Dim("d1", (1, 4), 2, dtype="u8"), - tiledb.Dim("d2", (1, 4), 2, dtype="u8"), - ] - dom = tiledb.Domain(*dims) - - # check that dumping works - dom.dump() - assert_captured(capfd, "Name: d1") - - self.assertEqual(dom.ndim, 2) - self.assertEqual(dom.dtype, np.dtype("uint64")) - self.assertEqual(dom.shape, (4, 4)) - - # check that we can iterate over the dimensions - dim_names = [dim.name for dim in dom] - self.assertEqual(["d1", "d2"], dim_names) - - # check that we can access dim by name - dim_d1 = dom.dim("d1") - self.assertEqual(dim_d1, dom.dim(0)) - - # check that we can construct directly from a List[Dim] - dom2 = tiledb.Domain(dims) - self.assertEqual(dom, dom2) - - try: - assert xml.etree.ElementTree.fromstring(dom._repr_html_()) is not None - except: - pytest.fail(f"Could not parse dom._repr_html_(). Saw {dom._repr_html_()}") - - def test_datetime_domain(self): - dim = tiledb.Dim( - name="d1", - domain=(np.datetime64("2010-01-01"), np.datetime64("2020-01-01")), - tile=np.timedelta64(20, "D"), - dtype=np.datetime64("", "D"), - ) - dom = tiledb.Domain(dim) - self.assertEqual(dom, dom) - self.assertEqual(dom.dtype, np.datetime64("", "D")) - - def test_domain_mixed_names_error(self): - with self.assertRaises(tiledb.TileDBError): - tiledb.Domain( - tiledb.Dim("d1", (1, 4), 2, dtype="u8"), - tiledb.Dim("__dim_0", (1, 4), 2, dtype="u8"), - ) - - def test_ascii_domain(self, capfd): - path = self.path("test_ascii_domain") - - dim = tiledb.Dim(name="d", dtype="ascii") - assert dim.dtype == np.bytes_ - - dom = tiledb.Domain(dim) - self.assertEqual(dom, dom) - dom.dump() - assert_captured(capfd, "Type: STRING_ASCII") - - att = tiledb.Attr(name="a", dtype=np.int64) - schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=True) - tiledb.SparseArray.create(path, schema) - - ascii_coords = ["a", "b", "c", "ABC"] - unicode_coords = ["±", "×", "÷", "√"] - data = [1, 2, 3, 4] - - with tiledb.open(path, "w") as A: - with self.assertRaises(tiledb.TileDBError): - A[unicode_coords] = data - A[ascii_coords] = data diff --git a/tiledb/tests/test_domain_index.py b/tiledb/tests/test_domain_index.py deleted file mode 100644 index daa0f47133..0000000000 --- a/tiledb/tests/test_domain_index.py +++ /dev/null @@ -1,159 +0,0 @@ -# %% - -import numpy as np - -import tiledb - -from .common import DiskTestCase, assert_array_equal, assert_equal - - -class DomainIndexingSparseTest(DiskTestCase): - def test_int_domain_indexing(self): - path = self.path("int_domain_indexing") - - dom = tiledb.Domain( - tiledb.Dim(name="x", domain=(-10, 10), tile=1, dtype=np.int64) - ) - schema = tiledb.ArraySchema( - domain=dom, sparse=True, attrs=[tiledb.Attr(name="a", dtype=np.float64)] - ) - - tiledb.SparseArray.create(path, schema) - - X = np.arange(-10, 11, step=1) - val = np.random.rand(len(X)) - - with tiledb.SparseArray(path, mode="w") as A: - A[X] = val - - with tiledb.SparseArray(path) as A: - assert_array_equal(A.domain_index[X[0]]["a"], val[0]) - assert_array_equal(A.domain_index[X[-1]]["a"], val[-1]) - assert_array_equal(A.domain_index[X[0] : X[-1]]["a"], val[:]) - # sanity check - assert_array_equal(A.domain_index[X[0] : X[-1]]["x"], X[:]) - - def test_fp_domain_indexing(self): - array_path = self.path("test_domain_idx") - - # test case from https://github.com/TileDB-Inc/TileDB-Py/issues/201 - tile = 1 - dom = tiledb.Domain( - tiledb.Dim(name="x", domain=(-89.75, 89.75), tile=tile, dtype=np.float64), - tiledb.Dim(name="y", domain=(-179.75, 179.75), tile=tile, dtype=np.float64), - tiledb.Dim(name="z", domain=(157498, 157863), tile=tile, dtype=np.float64), - ) - schema = tiledb.ArraySchema( - domain=dom, sparse=True, attrs=[tiledb.Attr(name="data", dtype=np.float64)] - ) - - tiledb.SparseArray.create(array_path, schema) - - # fake data - X = np.linspace(-89.75, 89.75, 359) - Y = np.linspace(-179.75, 179.75, 359) - Z = np.linspace(157498, 157857, 359) - - # data = np.random.rand(*map(lambda x: x[0], (X.shape, Y.shape, Z.shape))) - data = np.random.rand(X.shape[0]) - - with tiledb.SparseArray(array_path, mode="w") as A: - A[X, Y, Z] = data - - with tiledb.SparseArray(array_path) as A: - # check direct slicing - assert_array_equal(A.domain_index[X[0], Y[0], Z[0]]["data"], data[0]) - - # check small slice ranges - tmp = A.domain_index[ - X[0] : np.nextafter(X[0], 0), - Y[0] : np.nextafter(Y[0], 0), - Z[0] : np.nextafter(Z[0], Z[0] + 1), - ] - assert_array_equal(tmp["data"], data[0]) - - # check slicing last element - tmp = A.domain_index[X[-1], Y[-1], Z[-1]] - assert_array_equal(tmp["data"], data[-1]) - - # check slice range multiple components - tmp = A.domain_index[X[1] : X[2], Y[1] : Y[2], Z[1] : Z[2]] - assert_array_equal(tmp["data"], data[1:3]) - - # check an interior point - coords = X[145], Y[145], Z[145] - tmp = A.domain_index[coords] - assert_array_equal(tmp["x"], X[145]) - assert_array_equal(tmp["data"], data[145]) - - # check entire domain - tmp = A.domain_index[X[0] : X[-1], Y[0] : Y[-1], Z[0] : Z[-1]] - assert_array_equal(tmp["data"], data[:]) - - # check entire domain - # TODO uncomment if vectorized indexing is available - # coords = np.array([X,Y,Z]).transpose().flatten() - # tmp = A.domain_index[X,Y,Z] - # assert_array_equal( - # tmp['data'], - # data[:] - # ) - - def test_fp_domain_count(self): - array_path = self.path("test_domain_count") - tile = 1 - - dom = tiledb.Domain( - tiledb.Dim(name="x", domain=(0.0, 2.0), tile=tile, dtype=np.float64), - tiledb.Dim(name="y", domain=(0.0, 2.0), tile=tile, dtype=np.float64), - ) - schema = tiledb.ArraySchema( - domain=dom, sparse=True, attrs=[tiledb.Attr(name="data", dtype=np.float64)] - ) - - tiledb.SparseArray.create(array_path, schema) - - # fake data - X = [1.0] - Y = [1.0] - data = [1.0] - - with tiledb.SparseArray(array_path, mode="w") as A: - A[X, Y] = data - - with tiledb.SparseArray(array_path) as A: - # check direct slicing - assert_array_equal(A.domain_index[X[0], Y[0]]["data"], data[0]) - - # check counting by slice - assert_equal(A.domain_index[0:2.0, 0:1.0]["x"].shape[0], 1) - assert_equal(A.domain_index[0:2.0, 0:1.0]["y"].shape[0], 1) - assert_equal(A.domain_index[0:2.0, np.nextafter(1.0, 2.0)]["x"].shape[0], 0) - assert_equal(A.domain_index[0:2.0, np.nextafter(1.0, 2.0)]["y"].shape[0], 0) - - -class DomainIndexingDenseTest(DiskTestCase): - def test_int_domain_indexing(self): - path = self.path("dense_int_domain_indexing") - - dom = tiledb.Domain( - tiledb.Dim(name="x", domain=(0, 10), tile=1, dtype=np.int64) - ) - schema = tiledb.ArraySchema( - domain=dom, sparse=False, attrs=[tiledb.Attr(name="a", dtype=np.float64)] - ) - - tiledb.DenseArray.create(path, schema) - - X = np.arange(0, 11, step=1) - val = np.random.rand(len(X)) - - with tiledb.DenseArray(path, mode="w") as A: - A[:] = val - - with tiledb.DenseArray(path) as A: - assert_array_equal(A.domain_index[X[0]]["a"], val[0]) - assert_array_equal(A.domain_index[X[-1]]["a"], val[-1]) - assert_array_equal(A.domain_index[X[0] : X[-1]]["a"], val[:]) - # sanity check - assert_array_equal(A.domain_index[X[0] : X[-1]]["x"], X[:]) diff --git a/tiledb/tests/test_enumeration.py b/tiledb/tests/test_enumeration.py deleted file mode 100644 index f6af612b2c..0000000000 --- a/tiledb/tests/test_enumeration.py +++ /dev/null @@ -1,223 +0,0 @@ -import re - -import numpy as np -import pytest -from numpy.testing import assert_array_equal - -import tiledb - -from .common import DiskTestCase, has_pandas, has_pyarrow - - -class EnumerationTest(DiskTestCase): - @pytest.mark.parametrize( - "name,data", - ( - ("int", np.array([0])), - ("float", np.array([1.0, 2.2, 5.8234, 94.23])), - ("str", np.array(["abc", "defghi", "jk"])), - ("utf8", np.array(["abc", "defghi", "jk"], dtype=np.str_)), - ("ascii", np.array([b"abc", b"defghi", b"jk"], dtype=np.bytes_)), - ), - ) - @pytest.mark.parametrize("ordered", [True, False]) - def test_enumeration_basic(self, name, ordered, data): - enmr = tiledb.Enumeration(name, ordered, data) - - assert enmr.name == name - assert enmr.ordered == ordered - assert_array_equal(enmr.values(), data) - if name in ("str", "utf8", "ascii"): - assert enmr.cell_val_num == tiledb.cc.TILEDB_VAR_NUM() - assert enmr.dtype.kind == data.dtype.kind - else: - assert enmr.cell_val_num == 1 - assert enmr.dtype.kind == data.dtype.kind - - def test_attribute_enumeration(self): - attr = tiledb.Attr() - attr.enum = "enum" - assert attr.enum == "enum" - - def test_enumeration_repr(self): - """Doesn't check exact string, just makes sure each component is matched, in case order is changed in the future.""" - enmr = tiledb.Enumeration("e", False, [1, 2, 3]) - # Get its string representation - repr_str = repr(enmr) - - # Define patterns to match each component in the representation - patterns = { - "Enumeration": r"Enumeration", - "name": r"name='e'", - # use regex because it is depending on platform - "dtype": r"dtype=int\d+", - "dtype_name": r"dtype_name='int\d+'", - "cell_val_num": r"cell_val_num=1", - "ordered": r"ordered=False", - "values": r"values=\[1, 2, 3\]", - } - - # Check that each pattern is found in the representation string - for key, pattern in patterns.items(): - assert re.search(pattern, repr_str), f"{key} not found or incorrect in repr" - - def test_array_schema_enumeration(self): - uri = self.path("test_array_schema_enumeration") - dom = tiledb.Domain(tiledb.Dim(domain=(1, 8), tile=1)) - enum1 = tiledb.Enumeration("enmr1", False, np.arange(3) * 10) - enum2 = tiledb.Enumeration("enmr2", False, ["a", "bb", "ccc"]) - attr1 = tiledb.Attr("attr1", dtype=np.int32, enum_label="enmr1") - attr2 = tiledb.Attr("attr2", dtype=np.int32, enum_label="enmr2") - attr3 = tiledb.Attr("attr3", dtype=np.int32) - schema = tiledb.ArraySchema( - domain=dom, attrs=(attr1, attr2, attr3), enums=(enum1, enum2) - ) - tiledb.Array.create(uri, schema) - - data1 = np.random.randint(0, 3, 8) - data2 = np.random.randint(0, 3, 8) - data3 = np.random.randint(0, 3, 8) - - with tiledb.open(uri, "w") as A: - A[:] = {"attr1": data1, "attr2": data2, "attr3": data3} - - with tiledb.open(uri, "r") as A: - assert A.enum("enmr1") == enum1 - assert attr1.enum_label == "enmr1" - assert A.attr("attr1").enum_label == "enmr1" - - assert A.enum("enmr2") == enum2 - assert attr2.enum_label == "enmr2" - assert A.attr("attr2").enum_label == "enmr2" - - with self.assertRaises(tiledb.TileDBError) as excinfo: - assert A.enum("enmr3") == [] - assert ( - "ArraySchema: Unable to check if unknown enumeration is loaded. No enumeration named 'enmr3'." - == str(excinfo.value) - ) - assert attr3.enum_label is None - assert A.attr("attr3").enum_label is None - - if has_pandas(): - assert_array_equal(A.df[:]["attr1"].cat.codes, data1) - assert_array_equal(A.df[:]["attr2"].cat.codes, data2) - - assert_array_equal(A.df[:]["attr1"], A.multi_index[:]["attr1"]) - assert_array_equal(A.df[:]["attr2"], A.multi_index[:]["attr2"]) - - assert_array_equal(A.df[:]["attr1"], A[:]["attr1"]) - assert_array_equal(A.df[:]["attr2"], A[:]["attr2"]) - - @pytest.mark.skipif( - not has_pyarrow() or not has_pandas(), - reason="pyarrow>=1.0 and/or pandas>=1.0,<3.0 not installed", - ) - @pytest.mark.parametrize("sparse", [True, False]) - @pytest.mark.parametrize("pass_df", [True, False]) - def test_array_schema_enumeration_nullable(self, sparse, pass_df): - import pyarrow as pa - - uri = self.path("test_array_schema_enumeration_nullable") - enmr = tiledb.Enumeration("e", False, ["alpha", "beta", "gamma"]) - dom = tiledb.Domain(tiledb.Dim("d", domain=(1, 5), dtype="int64")) - att = tiledb.Attr("a", dtype="int8", nullable=True, enum_label="e") - schema = tiledb.ArraySchema( - domain=dom, attrs=[att], enums=[enmr], sparse=sparse - ) - tiledb.Array.create(uri, schema) - - with tiledb.open(uri, "w") as A: - dims = pa.array([1, 2, 3, 4, 5]) - data = pa.array([1.0, 2.0, None, 0, 1.0]) - if pass_df: - dims = dims.to_pandas() - data = data.to_pandas() - - if sparse: - A[dims] = data - else: - A[:] = data - - with tiledb.open(uri, "r") as A: - expected_validity = [False, False, True, False, False] - assert_array_equal(A[:]["a"].mask, expected_validity) - assert_array_equal(A.df[:]["a"].isna(), expected_validity) - assert_array_equal(A.query(attrs=["a"])[:]["a"].mask, expected_validity) - - @pytest.mark.parametrize( - "dtype, values", - [ - (np.int8, np.array([1, 2, 3], np.int8)), - (np.uint8, np.array([1, 2, 3], np.uint8)), - (np.int16, np.array([1, 2, 3], np.int16)), - (np.uint16, np.array([1, 2, 3], np.uint16)), - (np.int32, np.array([1, 2, 3], np.int32)), - (np.uint32, np.array([1, 2, 3], np.uint32)), - (np.int64, np.array([1, 2, 3], np.int64)), - (np.uint64, np.array([1, 2, 3], np.uint64)), - (np.dtype("S"), np.array(["a", "b", "c"], np.dtype("S"))), - (np.dtype("U"), np.array(["a", "b", "c"], np.dtype("U"))), - ], - ) - def test_enum_dtypes(self, dtype, values): - # create empty - enmr = tiledb.Enumeration("e", False, dtype=dtype) - if dtype in (np.dtype("S"), np.dtype("U")): - assert enmr.dtype.kind == enmr.values().dtype.kind == dtype.kind - else: - assert enmr.dtype == enmr.values().dtype == dtype - assert_array_equal(enmr.values(), []) - - # then extend with values - enmr = enmr.extend(values) - if dtype in (np.dtype("S"), np.dtype("U")): - assert enmr.dtype.kind == enmr.values().dtype.kind == dtype.kind - else: - assert enmr.dtype == enmr.values().dtype == dtype - assert_array_equal(enmr.values(), values) - - # create with values - enmr = tiledb.Enumeration("e", False, values=values) - if dtype in (np.dtype("S"), np.dtype("U")): - assert enmr.dtype.kind == enmr.values().dtype.kind == dtype.kind - else: - assert enmr.dtype == enmr.values().dtype == dtype - assert_array_equal(enmr.values(), values) - - @pytest.mark.skipif(not has_pandas(), reason="pandas>=1.0,<3.0 not installed") - def test_from_pandas_dtype_mismatch(self): - import pandas as pd - - schema = tiledb.ArraySchema( - enums=[ - tiledb.Enumeration(name="enum1", values=["a", "b", "c"], ordered=False) - ], - domain=tiledb.Domain( - tiledb.Dim(name="dim1", dtype=np.int32, domain=(0, 1)) - ), - attrs=[tiledb.Attr(name="attr1", dtype=np.int32, enum_label="enum1")], - sparse=True, - ) - - # Pandas category's categories matches the TileDB enumeration's values - df1 = pd.DataFrame(data={"dim1": [0, 1], "attr1": ["b", "c"]}) - df1["attr1"] = pd.Categorical(values=df1.attr1, categories=["a", "b", "c"]) - - array_path = self.path("arr1") - tiledb.Array.create(array_path, schema) - tiledb.from_pandas(array_path, df1, schema=schema, mode="append") - - actual_values = tiledb.open(array_path).df[:]["attr1"].values.tolist() - assert actual_values == ["b", "c"] - - # Pandas category's categories does not match the TileDB enumeration's values - df2 = pd.DataFrame(data={"dim1": [0, 1], "attr1": ["b", "c"]}) - df2["attr1"] = df2["attr1"].astype("category") - - array_path = self.path("arr2") - tiledb.Array.create(array_path, schema) - tiledb.from_pandas(array_path, df2, schema=schema, mode="append") - - actual_values = tiledb.open(array_path).df[:]["attr1"].values.tolist() - assert actual_values == ["b", "c"] diff --git a/tiledb/tests/test_examples.py b/tiledb/tests/test_examples.py deleted file mode 100644 index 6dcd20a612..0000000000 --- a/tiledb/tests/test_examples.py +++ /dev/null @@ -1,83 +0,0 @@ -import doctest -import glob -import os -import subprocess -import sys -import tempfile - -import pytest - -from .common import has_pandas - - -# override locally to avoid conflict with capsys used below -@pytest.fixture(scope="function", autouse=True) -def no_output(): - pass - - -class ExamplesTest: - """Test runnability of scripts in examples/""" - - PROJECT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../")) - - @pytest.mark.parametrize( - "path", glob.glob(os.path.join(PROJECT_DIR, "examples", "*.py")) - ) - def test_examples(self, path): - # run example script - # - in a separate process - # - in tmpdir so we don't pollute the source tree - # - with exit status checking (should fail tests if example fails) - requires_pd = [ - os.path.join(self.PROJECT_DIR, "examples", f"{fn}.py") - for fn in [ - "incomplete_iteration", - "parallel_csv_ingestion", - "query_condition_datetime", - ] - ] - if not has_pandas() and path in requires_pd: - pytest.mark.skip("pandas>=1.0,<3.0 not installed") - else: - with tempfile.TemporaryDirectory() as tmpdir: - try: - subprocess.run( - [sys.executable, path], - cwd=tmpdir, - check=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - encoding="utf8", - ) - except subprocess.CalledProcessError as ex: - pytest.fail(ex.stderr, pytrace=False) - - @pytest.mark.skipif( - sys.platform == "win32", - reason="Some doctests are missing a clean-up step on windows", - ) - @pytest.mark.parametrize( - "path", - [ - os.path.join(PROJECT_DIR, "tiledb", "libtiledb.pyx"), - os.path.join(PROJECT_DIR, "tiledb", "fragment.py"), - ], - ) - def test_docs(self, path, capsys): - failures, _ = doctest.testfile( - path, - module_relative=False, - verbose=False, - optionflags=doctest.NORMALIZE_WHITESPACE, - ) - if failures: - stderr = capsys.readouterr().out - if "No module named 'pandas'" in stderr or ( - "Pandas version >= 1.0 and < 3.0 required for dataframe functionality" - in stderr - and not has_pandas() - ): - pytest.skip("pandas>=1.0,<3.0 not installed") - else: - pytest.fail(stderr) diff --git a/tiledb/tests/test_filestore.py b/tiledb/tests/test_filestore.py deleted file mode 100644 index 9d352ff27f..0000000000 --- a/tiledb/tests/test_filestore.py +++ /dev/null @@ -1,91 +0,0 @@ -import numpy as np -import pytest - -import tiledb - -from .common import DiskTestCase, assert_captured - - -class FilestoreTest(DiskTestCase): - @pytest.fixture - def text_fname(self): - path = self.path("text_fname") - vfs = tiledb.VFS() - vfs.touch(path) - with vfs.open(path, "wb") as fio: - fio.write(b"Simple text file.\n") - fio.write(b"With two lines.") - return path - - def test_buffer(self, capfd): - path = self.path("test_buffer") - data = b"buffer" - - fs = tiledb.Filestore(path) - - with self.assertRaises(tiledb.TileDBError): - fs.write(data) - - schema = tiledb.ArraySchema.from_file() - tiledb.Array.create(path, schema) - - assert schema.attr(0).name == "contents" - assert schema.attr(0).dtype == np.bytes_ - - schema.attr(0).dump() - assert_captured(capfd, "Type: BLOB") - - fs = tiledb.Filestore(path) - fs.write(data) - assert bytes(data) == fs.read() - - def test_small_buffer(self, capfd): - path = self.path("test_small_buffer") - # create a 4 byte array - data = b"abcd" - - fs = tiledb.Filestore(path) - - with self.assertRaises(tiledb.TileDBError): - fs.write(data) - - schema = tiledb.ArraySchema.from_file() - tiledb.Array.create(path, schema) - - assert schema.attr(0).name == "contents" - assert schema.attr(0).dtype == np.bytes_ - - schema.attr(0).dump() - assert_captured(capfd, "Type: BLOB") - - fs = tiledb.Filestore(path) - fs.write(data) - assert data[3:4] == fs.read(offset=3, size=1) - - def test_uri(self, text_fname): - path = self.path("test_uri") - schema = tiledb.ArraySchema.from_file(text_fname) - tiledb.Array.create(path, schema) - - fs = tiledb.Filestore(path) - tiledb.Filestore.copy_from(path, text_fname) - with open(text_fname, "rb") as text: - data = text.read() - assert data == fs.read(0, len(data)) - assert len(fs) == len(data) - - def test_multiple_writes(self): - path = self.path("test_buffer") - schema = tiledb.ArraySchema.from_file() - tiledb.Array.create(path, schema) - - fs = tiledb.Filestore(path) - for i in range(1, 4): - fs.write(("x" * i).encode()) - - assert fs.read() == ("x" * i).encode() - - timestamps = [t[0] for t in tiledb.array_fragments(path).timestamp_range] - for i, ts in enumerate(timestamps, start=1): - with tiledb.open(path, timestamp=ts) as A: - assert A.meta["file_size"] == i diff --git a/tiledb/tests/test_filters.py b/tiledb/tests/test_filters.py deleted file mode 100644 index fedf706918..0000000000 --- a/tiledb/tests/test_filters.py +++ /dev/null @@ -1,258 +0,0 @@ -import warnings - -import numpy as np -import pytest -from numpy.testing import assert_allclose, assert_array_equal - -import tiledb - -from .common import DiskTestCase - -all_filter_types = [ - tiledb.NoOpFilter, - tiledb.GzipFilter, - tiledb.ZstdFilter, - tiledb.LZ4Filter, - tiledb.RleFilter, - tiledb.Bzip2Filter, - tiledb.DeltaFilter, - tiledb.DoubleDeltaFilter, - tiledb.DictionaryFilter, - tiledb.BitWidthReductionFilter, - tiledb.BitShuffleFilter, - tiledb.ByteShuffleFilter, - tiledb.PositiveDeltaFilter, - tiledb.ChecksumSHA256Filter, - tiledb.ChecksumMD5Filter, - tiledb.FloatScaleFilter, -] - - -def filter_applicable(filter_type, attr_type) -> bool: - """Return bool indicating filter applicability to a given attribute type.""" - if not isinstance(attr_type, type): - # guard issubclass below: first argument must be a type - return True - elif issubclass(attr_type, np.floating) and filter_type in [ - tiledb.DoubleDeltaFilter - ]: - return False - - return True - - -class TestFilterTest(DiskTestCase): - def test_filter(self): - gzip_filter = tiledb.GzipFilter(level=10) - self.assertIsInstance(gzip_filter, tiledb.Filter) - self.assertEqual(gzip_filter.level, 10) - - bw_filter = tiledb.BitWidthReductionFilter(window=10) - self.assertIsInstance(bw_filter, tiledb.Filter) - self.assertEqual(bw_filter.window, 10) - - filter_list = tiledb.FilterList([gzip_filter, bw_filter], chunksize=1024) - self.assertEqual(filter_list.chunksize, 1024) - self.assertEqual(len(filter_list), 2) - self.assertEqual(filter_list[0].level, gzip_filter.level) - self.assertEqual(filter_list[1].window, bw_filter.window) - - # test filter list iteration - self.assertEqual(len(list(filter_list)), 2) - - # test `filters` kwarg accepts python list of filters - tiledb.Attr("foo", dtype=np.int64, filters=[gzip_filter]) - tiledb.Attr("foo", dtype=np.int64, filters=(gzip_filter,)) - - attr = tiledb.Attr("foo", dtype=np.int64, filters=filter_list) - - self.assertEqual(len(attr.filters), 2) - self.assertEqual(attr.filters.chunksize, filter_list.chunksize) - - @pytest.mark.parametrize("attr_type", [np.int64]) - @pytest.mark.parametrize("filter_type", all_filter_types) - def test_filter_list(self, attr_type, filter_type): - if not filter_applicable(filter_type, attr_type): - pytest.mark.skip("Filter not supported for attribute type '{attr_type}'") - - # should be constructible without a `filters` keyword arg set - filter_list1 = tiledb.FilterList() - filter_list1.append(filter_type()) - self.assertEqual(len(filter_list1), 1) - repr(filter_list1) - - filter_list2 = [x for x in filter_list1] - attr = tiledb.Attr(filters=filter_list2, dtype=attr_type) - self.assertEqual(len(attr.filters), 1) - - @pytest.mark.parametrize( - "filter_type,name", - [ - (tiledb.NoOpFilter, "NONE"), - (tiledb.GzipFilter, "GZIP"), - (tiledb.ZstdFilter, "ZSTD"), - (tiledb.LZ4Filter, "LZ4"), - (tiledb.RleFilter, "RLE"), - (tiledb.Bzip2Filter, "BZIP2"), - (tiledb.DeltaFilter, "DELTA"), - (tiledb.DoubleDeltaFilter, "DOUBLE_DELTA"), - (tiledb.DictionaryFilter, "DICTIONARY"), - (tiledb.BitWidthReductionFilter, "BIT_WIDTH_REDUCTION"), - (tiledb.BitShuffleFilter, "BITSHUFFLE"), - (tiledb.ByteShuffleFilter, "BYTESHUFFLE"), - (tiledb.PositiveDeltaFilter, "POSITIVE_DELTA"), - (tiledb.ChecksumSHA256Filter, "CHECKSUM_SHA256"), - (tiledb.ChecksumMD5Filter, "CHECKSUM_MD5"), - (tiledb.FloatScaleFilter, "SCALE_FLOAT"), - ], - ) - def test_filter_name(self, filter_type, name): - assert filter_type().filter_name == name - - @pytest.mark.parametrize("filter", all_filter_types) - def test_all_filters(self, filter): - # test initialization - - # make sure that repr works and round-trips correctly - # some of these have attributes, so we just check the class name here - self.assertTrue(filter.__name__ in repr(filter)) - - tmp_globals = dict() - setup = "from tiledb import *" - exec(setup, tmp_globals) - - filter_repr = repr(filter()) - new_filter = None - try: - new_filter = eval(filter_repr, tmp_globals) - except Exception: - warn_str = ( - """Exception during FilterTest filter repr eval""" - + """, filter repr string was:\n""" - + """'''""" - + """\n{}\n'''""".format(filter_repr) - ) - warnings.warn(warn_str) - raise - - self.assertEqual(new_filter, filter()) - - def test_dictionary_encoding(self): - path = self.path("test_dictionary_encoding") - dom = tiledb.Domain(tiledb.Dim(name="row", domain=(0, 9), dtype=np.uint64)) - attr = tiledb.Attr( - dtype="ascii", - var=True, - filters=tiledb.FilterList([tiledb.DictionaryFilter()]), - ) - schema = tiledb.ArraySchema(domain=dom, attrs=[attr], sparse=True) - tiledb.Array.create(path, schema) - - data = [b"x" * i for i in np.random.randint(1, 10, size=10)] - - with tiledb.open(path, "w") as A: - A[np.arange(10)] = data - - with tiledb.open(path, "r") as A: - assert_array_equal(A[:][""], data) - - @pytest.mark.parametrize("factor", [1, 0.5, 2]) - @pytest.mark.parametrize("offset", [0]) - @pytest.mark.parametrize("bytewidth", [1, 8]) - def test_float_scaling_filter(self, factor, offset, bytewidth): - path = self.path("test_float_scaling_filter") - dom = tiledb.Domain(tiledb.Dim(name="row", domain=(0, 9), dtype=np.uint64)) - - filter = tiledb.FloatScaleFilter(factor, offset, bytewidth) - - attr = tiledb.Attr(dtype=np.float64, filters=tiledb.FilterList([filter])) - schema = tiledb.ArraySchema(domain=dom, attrs=[attr], sparse=True) - tiledb.Array.create(path, schema) - - data = np.random.rand(10) - - with tiledb.open(path, "w") as A: - A[np.arange(10)] = data - - with tiledb.open(path, "r") as A: - filter = A.schema.attr("").filters[0] - assert filter.factor == factor - assert filter.offset == offset - assert filter.bytewidth == bytewidth - - # TODO compute the correct tolerance here - assert_allclose(data, A[:][""], rtol=1, atol=1) - - @pytest.mark.parametrize( - "attr_dtype,reinterp_dtype,expected_reinterp_dtype", - [ - (np.uint64, None, None), - (np.float64, np.uint64, np.uint64), - (np.float64, tiledb.cc.DataType.UINT64, np.uint64), - ], - ) - def test_delta_filter(self, attr_dtype, reinterp_dtype, expected_reinterp_dtype): - path = self.path("test_delta_filter") - - dom = tiledb.Domain(tiledb.Dim(name="row", domain=(0, 9), dtype=np.uint64)) - - if reinterp_dtype is None: - filter = tiledb.DeltaFilter() - else: - filter = tiledb.DeltaFilter(reinterp_dtype=reinterp_dtype) - assert filter.reinterp_dtype == expected_reinterp_dtype - - attr = tiledb.Attr(dtype=attr_dtype, filters=tiledb.FilterList([filter])) - - assert attr.filters[0].reinterp_dtype == expected_reinterp_dtype - - schema = tiledb.ArraySchema(domain=dom, attrs=[attr], sparse=False) - tiledb.Array.create(path, schema) - - data = np.random.randint(0, 10_000_000, size=10) - if attr_dtype == np.float64: - data = data.astype(np.float64) - - with tiledb.open(path, "w") as A: - A[:] = data - - with tiledb.open(path) as A: - res = A[:] - assert_array_equal(res, data) - - @pytest.mark.parametrize( - "attr_dtype,reinterp_dtype,expected_reinterp_dtype", - [ - (np.uint64, None, None), - (np.float64, np.uint64, np.uint64), - (np.float64, tiledb.cc.DataType.UINT64, np.uint64), - ], - ) - def test_double_delta_filter( - self, attr_dtype, reinterp_dtype, expected_reinterp_dtype - ): - path = self.path("test_delta_filter") - - dom = tiledb.Domain(tiledb.Dim(name="row", domain=(0, 9), dtype=np.uint64)) - - if reinterp_dtype is None: - filter = tiledb.DoubleDeltaFilter() - else: - filter = tiledb.DoubleDeltaFilter(reinterp_dtype=reinterp_dtype) - assert filter.reinterp_dtype == expected_reinterp_dtype - - attr = tiledb.Attr(dtype=attr_dtype, filters=tiledb.FilterList([filter])) - assert attr.filters[0].reinterp_dtype == expected_reinterp_dtype - schema = tiledb.ArraySchema(domain=dom, attrs=[attr], sparse=False) - tiledb.Array.create(path, schema) - - data = np.random.randint(0, 10_000_000, size=10) - if attr_dtype == np.float64: - data = data.astype(np.float64) - - with tiledb.open(path, "w") as A: - A[:] = data - - with tiledb.open(path) as A: - res = A[:] - assert_array_equal(res, data) diff --git a/tiledb/tests/test_fixes.py b/tiledb/tests/test_fixes.py deleted file mode 100644 index a4ab4f65f0..0000000000 --- a/tiledb/tests/test_fixes.py +++ /dev/null @@ -1,424 +0,0 @@ -import concurrent -import concurrent.futures -import json -import os -import subprocess -import sys - -import numpy as np -import pytest -from numpy.testing import assert_array_equal - -import tiledb - -from .common import DiskTestCase, has_pandas, has_pyarrow - -pd = pytest.importorskip("pandas") -tm = pd._testing - - -class FixesTest(DiskTestCase): - def test_sc50378_overflowerror_python_int_too_large_to_convert_to_c_long(self): - uri = self.path( - "test_sc50378_overflowerror_python_int_too_large_to_convert_to_c_long" - ) - MAX_UINT64 = np.iinfo(np.uint64).max - dim = tiledb.Dim( - name="id", - domain=(0, MAX_UINT64 - 1), - dtype=np.dtype(np.uint64), - ) - dom = tiledb.Domain(dim) - text_attr = tiledb.Attr(name="text", dtype=np.dtype("U1"), var=True) - attrs = [text_attr] - schema = tiledb.ArraySchema( - domain=dom, - sparse=True, - allows_duplicates=False, - attrs=attrs, - ) - tiledb.Array.create(uri, schema) - - with tiledb.open(uri, "w") as A: - external_ids = np.array([0, 100, MAX_UINT64 - 1], dtype=np.dtype(np.uint64)) - data = {"text": np.array(["foo", "bar", "baz"], dtype="= (2, 27): - assert """"Context.Query.Reader.loop_num": 1""" in stats_dump_str - else: - assert ( - """"Context.StorageManager.Query.Reader.loop_num": 1""" - in stats_dump_str - ) - tiledb.stats_disable() - - def test_sc58286_fix_stats_dump_return_value_broken(self): - uri = self.path("test_sc58286_fix_stats_dump_return_value_broken") - dim1 = tiledb.Dim(name="d1", dtype="int64", domain=(1, 3)) - att = tiledb.Attr(name="a1", dtype="= (2, 27, 0): - assert get_config_with_env({}, "vfs.s3.region") == "" - else: - assert get_config_with_env({}, "vfs.s3.region") == "us-east-1" - assert get_config_with_env({"AWS_DEFAULT_REGION": ""}, "vfs.s3.region") == "" - assert get_config_with_env({"AWS_REGION": ""}, "vfs.s3.region") == "" - - @pytest.mark.skipif(not has_pandas(), reason="pandas>=1.0,<3.0 not installed") - @pytest.mark.parametrize("is_sparse", [True, False]) - def test_sc1430_nonexisting_timestamp(self, is_sparse): - path = self.path("nonexisting_timestamp") - - if is_sparse: - tiledb.from_pandas( - path, pd.DataFrame({"a": np.random.rand(4)}), sparse=True - ) - - with tiledb.open(path, timestamp=1) as A: - assert pd.DataFrame.equals( - A.df[:]["a"], pd.Series([], dtype=np.float64) - ) - else: - with tiledb.from_numpy(path, np.random.rand(4)) as A: - pass - - with tiledb.open(path, timestamp=1) as A: - assert_array_equal(A[:], np.ones(4) * np.nan) - - def test_sc27374_hilbert_default_tile_order(self): - import os - import shutil - - import tiledb - - uri = "repro" - if os.path.exists(uri): - shutil.rmtree(uri) - - dom = tiledb.Domain( - tiledb.Dim( - name="var_id", - domain=(None, None), - dtype="ascii", - filters=[tiledb.ZstdFilter(level=1)], - ), - ) - - attrs = [] - - sch = tiledb.ArraySchema( - domain=dom, - attrs=attrs, - sparse=True, - allows_duplicates=False, - offsets_filters=[ - tiledb.DoubleDeltaFilter(), - tiledb.BitWidthReductionFilter(), - tiledb.ZstdFilter(), - ], - capacity=1000, - cell_order="hilbert", - tile_order=None, # <-------------------- note - ) - - tiledb.Array.create(uri, sch) - - with tiledb.open(uri) as A: - assert A.schema.cell_order == "hilbert" - assert A.schema.tile_order is None - - def test_sc43221(self): - # GroupMeta object did not have a representation test; repr failed due to non-existent attribute access in check. - tiledb.Group.create("mem://tmp1") - a = tiledb.Group("mem://tmp1") - repr(a.meta) - - def test_sc56611(self): - # test from_numpy with sparse argument set to True - uri = self.path("test_sc56611") - data = np.random.rand(10, 10) - with pytest.raises(tiledb.cc.TileDBError) as exc_info: - tiledb.from_numpy(uri, data, sparse=True) - assert str(exc_info.value) == "from_numpy only supports dense arrays" - - -class SOMA919Test(DiskTestCase): - """ - ORIGINAL CONTEXT: - https://github.com/single-cell-data/TileDB-SOMA/issues/919 - https://gist.github.com/atolopko-czi/26683305258a9f77a57ccc364916338f - - We've distilled @atolopko-czi's gist example using the TileDB-Py API directly. - """ - - def run_test(self, use_timestamps): - import tempfile - - import numpy as np - - import tiledb - - root_uri = tempfile.mkdtemp() - - if use_timestamps: - group_ctx100 = tiledb.Ctx( - { - "sm.group.timestamp_start": 100, - "sm.group.timestamp_end": 100, - } - ) - timestamp = 100 - else: - group_ctx100 = tiledb.Ctx() - timestamp = None - - # create the group and add a dummy subgroup "causes_bug" - tiledb.Group.create(root_uri, ctx=group_ctx100) - with tiledb.Group(root_uri, "w", ctx=group_ctx100) as expt: - tiledb.Group.create(root_uri + "/causes_bug", ctx=group_ctx100) - expt.add(name="causes_bug", uri=root_uri + "/causes_bug") - - # add an array to the group (in a separate write operation) - with tiledb.Group(root_uri, mode="w", ctx=group_ctx100) as expt: - df_path = os.path.join(root_uri, "df") - tiledb.from_numpy(df_path, np.ones((100, 100)), timestamp=timestamp) - expt.add(name="df", uri=df_path) - - # check our view of the group at current time; - # (previously, "df" is sometimes missing (non-deterministic) - with tiledb.Group(root_uri) as expt: - assert "df" in expt - - # IMPORTANT: commenting out either line 29 or 32 (individually) makes df always visible. - # That is, to invite the bug we must BOTH add the causes_bug sibling element AND then reopen - # the group write handle to add df. The separate reopen (line 32) simulates - # tiledbsoma.tdb_handles.Wrapper._flush_hack(). - - @pytest.mark.skipif( - tiledb.libtiledb.version() < (2, 15, 0), - reason="SOMA919 fix implemented in libtiledb 2.15", - ) - @pytest.mark.parametrize("use_timestamps", [True, False]) - def test_soma919(self, use_timestamps): - N = 100 - fails = 0 - for i in range(N): - try: - self.run_test(use_timestamps) - except AssertionError: - fails += 1 - if fails > 0: - pytest.fail(f"SOMA919 test, failure rate {100*fails/N}%") diff --git a/tiledb/tests/test_fork_ctx.py b/tiledb/tests/test_fork_ctx.py deleted file mode 100644 index 81034cfb2d..0000000000 --- a/tiledb/tests/test_fork_ctx.py +++ /dev/null @@ -1,104 +0,0 @@ -"""Tests combining fork with tiledb context threads. - -Background: the core tiledb library uses threads and it's easy to -experience deadlocks when forking a process that is using tiledb. The -project doesn't have a solution for this at the moment other than to -avoid using fork(), which is the same recommendation that Python makes. -Python 3.12 warns if you fork() when multiple threads are detected and -Python 3.14 will make it so you never accidentally fork(): -multiprocessing will default to "spawn" on Linux. -""" - -import multiprocessing -import os -import sys -import warnings - -import pytest - -import tiledb - - -@pytest.mark.skipif( - sys.platform == "win32", reason="fork() is not available on Windows" -) -def test_no_warning_fork_without_ctx(): - """Get no warning if no tiledb context exists.""" - with warnings.catch_warnings(): - warnings.simplefilter("error") - pid = os.fork() - if pid == 0: - os._exit(0) - else: - os.wait() - - -@pytest.mark.skipif( - sys.platform == "win32", reason="fork() is not available on Windows" -) -@pytest.mark.filterwarnings("ignore::DeprecationWarning") -def test_warning_fork_with_ctx(): - """Get a warning if we fork after creating a tiledb context.""" - _ = tiledb.Ctx() - with pytest.warns(UserWarning, match="TileDB is a multithreading library"): - pid = os.fork() - if pid == 0: - os._exit(0) - else: - os.wait() - - -@pytest.mark.skipif( - sys.platform == "win32", reason="fork() is not available on Windows" -) -@pytest.mark.filterwarnings("ignore::DeprecationWarning") -def test_warning_fork_with_default_ctx(): - """Get a warning if we fork after creating a default context.""" - _ = tiledb.default_ctx() - with pytest.warns(UserWarning, match="TileDB is a multithreading library"): - pid = os.fork() - if pid == 0: - os._exit(0) - else: - os.wait() - - -@pytest.mark.skipif( - sys.platform == "win32", reason="fork() is not available on Windows" -) -def test_no_warning_multiprocessing_without_ctx(): - """Get no warning if no tiledb context exists.""" - with warnings.catch_warnings(): - warnings.simplefilter("error") - mp = multiprocessing.get_context("fork") - p = mp.Process() - p.start() - p.join() - - -@pytest.mark.skipif( - sys.platform == "win32", reason="fork() is not available on Windows" -) -@pytest.mark.filterwarnings("ignore::DeprecationWarning") -def test_warning_multiprocessing_with_ctx(): - """Get a warning if we fork after creating a tiledb context.""" - _ = tiledb.Ctx() - mp = multiprocessing.get_context("fork") - p = mp.Process() - with pytest.warns(UserWarning, match="TileDB is a multithreading library"): - p.start() - p.join() - - -@pytest.mark.skipif( - sys.platform == "win32", reason="fork() is not available on Windows" -) -@pytest.mark.filterwarnings("ignore::DeprecationWarning") -def test_warning_multiprocessing_with_default_ctx(): - """Get a warning if we fork after creating a default context.""" - _ = tiledb.default_ctx() - mp = multiprocessing.get_context("fork") - p = mp.Process() - with pytest.warns(UserWarning, match="TileDB is a multithreading library"): - p.start() - p.join() diff --git a/tiledb/tests/test_fragments.py b/tiledb/tests/test_fragments.py deleted file mode 100644 index 87dccb7524..0000000000 --- a/tiledb/tests/test_fragments.py +++ /dev/null @@ -1,769 +0,0 @@ -import itertools -import sys -import xml.etree.ElementTree - -import numpy as np -import pytest -from numpy.testing import assert_array_equal - -import tiledb -from tiledb.main import PyFragmentInfo - -from .common import DiskTestCase - - -class FragmentInfoTest(DiskTestCase): - def setUp(self): - super().setUp() - if not tiledb.libtiledb.version() >= (2, 2): - pytest.skip("Only run FragmentInfo test with TileDB>=2.2") - - def test_uri_dne(self): - with self.assertRaises(tiledb.TileDBError): - tiledb.array_fragments("does_not_exist") - - @pytest.mark.parametrize("use_timestamps", [True, False]) - def test_array_fragments(self, use_timestamps): - fragments = 3 - - A = np.zeros(fragments) - - uri = self.path("test_dense_fragments") - dom = tiledb.Domain(tiledb.Dim(domain=(0, 2), tile=fragments, dtype=np.int64)) - att = tiledb.Attr(dtype=A.dtype) - schema = tiledb.ArraySchema(domain=dom, attrs=(att,)) - - tiledb.DenseArray.create(uri, schema) - - if use_timestamps: - for fragment_idx in range(fragments): - timestamp = fragment_idx + 1 - with tiledb.DenseArray(uri, mode="w", timestamp=timestamp) as T: - T[fragment_idx : fragment_idx + 1] = fragment_idx - else: - for fragment_idx in range(fragments): - with tiledb.DenseArray(uri, mode="w") as T: - T[fragment_idx : fragment_idx + 1] = fragment_idx - - fi = tiledb.array_fragments(uri) - - assert len(fi) == 3 - assert fi.unconsolidated_metadata_num == 3 - assert fi.cell_num == (3, 3, 3) - assert fi.has_consolidated_metadata == (False, False, False) - assert fi.nonempty_domain == (((0, 0),), ((1, 1),), ((2, 2),)) - assert fi.sparse == (False, False, False) - if use_timestamps: # timestamps cannot be predicted if not used on write - assert fi.timestamp_range == ((1, 1), (2, 2), (3, 3)) - assert fi.to_vacuum == () - assert hasattr(fi, "version") # don't pin to a specific version - - for idx, frag in enumerate(fi): - assert frag.cell_num == 3 - assert frag.has_consolidated_metadata is False - assert frag.nonempty_domain == ((idx, idx),) - assert frag.sparse is False - if use_timestamps: # timestamps cannot be predicted if not used on write - assert frag.timestamp_range == (idx + 1, idx + 1) - assert hasattr(frag, "version") # don't pin to a specific version - try: - assert xml.etree.ElementTree.fromstring(frag._repr_html_()) is not None - except: - pytest.fail( - f"Could not parse frag._repr_html_(). Saw {frag._repr_html_()}" - ) - - try: - assert xml.etree.ElementTree.fromstring(fi._repr_html_()) is not None - except: - pytest.fail(f"Could not parse fi._repr_html_(). Saw {fi._repr_html_()}") - - @pytest.mark.parametrize("use_timestamps", [True, False]) - def test_array_fragments_var(self, use_timestamps): - fragments = 3 - - uri = self.path("test_array_fragments_var") - dom = tiledb.Domain( - tiledb.Dim(name="dim", domain=(None, None), tile=None, dtype=np.bytes_) - ) - schema = tiledb.ArraySchema( - domain=dom, - sparse=True, - attrs=[tiledb.Attr(name="1s", dtype=np.int32, var=True)], - ) - - tiledb.SparseArray.create(uri, schema) - - for fragment_idx in range(fragments): - - data = np.array( - [ - np.array( - [fragment_idx + 1] * 1, - dtype=np.int32, - ), - np.array( - [fragment_idx + 1] * 2, - dtype=np.int32, - ), - np.array( - [fragment_idx + 1] * 3, - dtype=np.int32, - ), - ], - dtype="O", - ) - - with tiledb.SparseArray( - uri, mode="w", timestamp=fragment_idx + 1 if use_timestamps else None - ) as T: - T[["zero", "one", "two"]] = data - - fragments_info = tiledb.array_fragments(uri) - - self.assertEqual( - fragments_info.nonempty_domain, - ((("one", "zero"),), (("one", "zero"),), (("one", "zero"),)), - ) - - for frag in fragments_info: - self.assertEqual(frag.nonempty_domain, (("one", "zero"),)) - - @pytest.mark.parametrize("use_timestamps", [True, False]) - def test_dense_fragments(self, use_timestamps): - fragments = 3 - - A = np.zeros(fragments) - - uri = self.path("test_dense_fragments") - dom = tiledb.Domain(tiledb.Dim(domain=(0, 2), tile=fragments, dtype=np.int64)) - att = tiledb.Attr(dtype=A.dtype) - schema = tiledb.ArraySchema(domain=dom, attrs=(att,)) - - tiledb.DenseArray.create(uri, schema) - - for fragment_idx in range(fragments): - timestamp = fragment_idx + 1 if use_timestamps else None - with tiledb.DenseArray(uri, mode="w", timestamp=timestamp) as T: - T[fragment_idx : fragment_idx + 1] = fragment_idx - - fragment_info = PyFragmentInfo(uri, schema, False, tiledb.default_ctx()) - self.assertEqual(fragment_info.get_num_fragments(), fragment_idx + 1) - - if use_timestamps: # asserts are not predictable without timestamps - all_expected_uris = [] - for fragment_idx in range(fragments): - timestamp = fragment_idx + 1 - - self.assertEqual( - fragment_info.get_timestamp_range()[fragment_idx], - (timestamp, timestamp), - ) - - expected_uri = f"__{timestamp}_{timestamp}" - actual_uri = fragment_info.get_uri()[fragment_idx] - - all_expected_uris.append(expected_uri) - - self.assertTrue(expected_uri in actual_uri) - self.assertTrue( - actual_uri.endswith(str(fragment_info.get_version()[fragment_idx])) - ) - self.assertFalse(fragment_info.get_sparse()[fragment_idx]) - - all_actual_uris = fragment_info.get_uri() - for actual_uri, expected_uri in zip(all_actual_uris, all_expected_uris): - self.assertTrue(expected_uri in actual_uri) - self.assertTrue( - actual_uri.endswith(str(fragment_info.get_version()[fragment_idx])) - ) - - self.assertEqual( - fragment_info.get_timestamp_range(), ((1, 1), (2, 2), (3, 3)) - ) - self.assertEqual(fragment_info.get_sparse(), (False, False, False)) - - @pytest.mark.parametrize("use_timestamps", [True, False]) - def test_sparse_fragments(self, use_timestamps): - fragments = 3 - - A = np.zeros(fragments) - - uri = self.path("test_sparse_fragments") - dom = tiledb.Domain(tiledb.Dim(domain=(0, 2), tile=fragments, dtype=np.int64)) - att = tiledb.Attr(dtype=A.dtype) - schema = tiledb.ArraySchema(sparse=True, domain=dom, attrs=(att,)) - - tiledb.SparseArray.create(uri, schema) - - for fragment_idx in range(fragments): - timestamp = fragment_idx + 1 if use_timestamps else None - with tiledb.SparseArray(uri, mode="w", timestamp=timestamp) as T: - T[fragment_idx] = fragment_idx - - fragment_info = PyFragmentInfo(uri, schema, False, tiledb.default_ctx()) - self.assertEqual(fragment_info.get_num_fragments(), fragment_idx + 1) - - if use_timestamps: # asserts are not predictable without timestamps - all_expected_uris = [] - for fragment_idx in range(fragments): - timestamp = fragment_idx + 1 - - self.assertEqual( - fragment_info.get_timestamp_range()[fragment_idx], - (timestamp, timestamp), - ) - - if uri[0] != "/": - uri = "/" + uri.replace("\\", "/") - - expected_uri = f"/__{timestamp}_{timestamp}" - actual_uri = fragment_info.get_uri()[fragment_idx] - - all_expected_uris.append(expected_uri) - - self.assertTrue(expected_uri in actual_uri) - self.assertTrue( - actual_uri.endswith(str(fragment_info.get_version()[fragment_idx])) - ) - self.assertTrue(fragment_info.get_sparse()[fragment_idx]) - - all_actual_uris = fragment_info.get_uri() - for actual_uri, expected_uri in zip(all_actual_uris, all_expected_uris): - self.assertTrue(expected_uri in actual_uri) - self.assertTrue( - actual_uri.endswith(str(fragment_info.get_version()[fragment_idx])) - ) - - self.assertEqual( - fragment_info.get_timestamp_range(), ((1, 1), (2, 2), (3, 3)) - ) - self.assertEqual(fragment_info.get_sparse(), (True, True, True)) - - def test_nonempty_domain(self): - uri = self.path("test_nonempty_domain") - dom = tiledb.Domain( - tiledb.Dim(name="x", domain=(1, 4)), - tiledb.Dim(name="y", domain=(-2.0, 2.0), dtype=np.float32), - ) - att = tiledb.Attr() - schema = tiledb.ArraySchema(sparse=True, domain=dom, attrs=(att,)) - - tiledb.SparseArray.create(uri, schema) - - with tiledb.SparseArray(uri, mode="w") as T: - coords = np.array( - list(itertools.product(np.arange(1, 5), np.arange(-1, 3))) - ) - x = coords[:, 0] - y = coords[:, 1] - T[x, y] = np.array(range(16)) - - with tiledb.SparseArray(uri, mode="w") as T: - x = [1, 3] - y = [-1.5, -1.25] - T[x, y] = np.array(range(2)) - - fragment_info = PyFragmentInfo(uri, schema, False, tiledb.default_ctx()) - - self.assertEqual( - fragment_info.get_nonempty_domain(), - (((1, 4), (-1.0, 2.0)), ((1, 3), (-1.5, -1.25))), - ) - - def test_nonempty_domain_date(self): - uri = self.path("test_nonempty_domain") - dom = tiledb.Domain( - tiledb.Dim( - name="day", - domain=(np.datetime64("2010-01-01"), np.datetime64("2020")), - dtype="datetime64[D]", - ) - ) - att = tiledb.Attr() - schema = tiledb.ArraySchema(sparse=True, domain=dom, attrs=(att,)) - - tiledb.SparseArray.create(uri, schema) - - with tiledb.SparseArray(uri, mode="w") as T: - dates = np.array( - ["2017-04-01", "2019-10-02", "2019-10-03", "2019-12-04"], - dtype="datetime64[D]", - ) - T[dates] = np.array(range(4)) - - with tiledb.SparseArray(uri, mode="w") as T: - dates = np.array( - ["2010-01-01", "2013-10-02", "2014-10-03"], dtype="datetime64[D]" - ) - T[dates] = np.array(range(3)) - - fragment_info = PyFragmentInfo(uri, schema, False, tiledb.default_ctx()) - - self.assertEqual( - fragment_info.get_nonempty_domain(), - ( - ((np.datetime64("2017-04-01"), np.datetime64("2019-12-04")),), - ((np.datetime64("2010-01-01"), np.datetime64("2014-10-03")),), - ), - ) - - def test_nonempty_domain_strings(self): - uri = self.path("test_nonempty_domain_strings") - dom = tiledb.Domain( - tiledb.Dim(name="x", domain=(None, None), dtype=np.bytes_), - tiledb.Dim(name="y", domain=(None, None), dtype=np.bytes_), - ) - att = tiledb.Attr() - schema = tiledb.ArraySchema(sparse=True, domain=dom, attrs=(att,)) - - tiledb.SparseArray.create(uri, schema) - - with tiledb.SparseArray(uri, mode="w") as T: - x_dims = [b"a", b"b", b"c", b"d"] - y_dims = [b"e", b"f", b"g", b"h"] - T[x_dims, y_dims] = np.array([1, 2, 3, 4]) - - with tiledb.SparseArray(uri, mode="w") as T: - x_dims = [b"a", b"b"] - y_dims = [b"e", b"f"] - T[x_dims, y_dims] = np.array([1, 2]) - - fragment_info = PyFragmentInfo(uri, schema, False, tiledb.default_ctx()) - - self.assertEqual( - fragment_info.get_nonempty_domain(), - ((("a", "d"), ("e", "h")), (("a", "b"), ("e", "f"))), - ) - - def test_cell_num(self): - uri = self.path("test_cell_num") - dom = tiledb.Domain(tiledb.Dim(domain=(1, 4))) - att = tiledb.Attr() - schema = tiledb.ArraySchema(sparse=True, domain=dom, attrs=(att,)) - - tiledb.SparseArray.create(uri, schema) - - fragment_info = PyFragmentInfo(uri, schema, False, tiledb.default_ctx()) - - with tiledb.SparseArray(uri, mode="w") as T: - a = np.array([1, 2, 3, 4]) - T[a] = a - - with tiledb.SparseArray(uri, mode="w") as T: - b = np.array([1, 2]) - T[b] = b - - fragment_info = PyFragmentInfo(uri, schema, False, tiledb.default_ctx()) - - self.assertEqual(fragment_info.get_cell_num(), (len(a), len(b))) - - def test_consolidated_fragment_metadata(self): - fragments = 3 - - A = np.zeros(fragments) - - uri = self.path("test_consolidated_fragment_metadata") - dom = tiledb.Domain(tiledb.Dim(domain=(0, 2), dtype=np.int64)) - att = tiledb.Attr(dtype=A.dtype) - schema = tiledb.ArraySchema(domain=dom, attrs=(att,)) - - tiledb.DenseArray.create(uri, schema) - - for fragment_idx in range(fragments): - with tiledb.DenseArray(uri, mode="w") as T: - T[fragment_idx : fragment_idx + 1] = fragment_idx - - fragment_info = PyFragmentInfo(uri, schema, False, tiledb.default_ctx()) - - self.assertEqual(fragment_info.get_unconsolidated_metadata_num(), 3) - self.assertEqual( - fragment_info.get_has_consolidated_metadata(), (False, False, False) - ) - - tiledb.consolidate( - uri, config=tiledb.Config(params={"sm.consolidation.mode": "fragment_meta"}) - ) - - fragment_info = PyFragmentInfo(uri, schema, False, tiledb.default_ctx()) - - self.assertEqual(fragment_info.get_unconsolidated_metadata_num(), 0) - self.assertEqual( - fragment_info.get_has_consolidated_metadata(), (True, True, True) - ) - - def test_fragments_to_vacuum(self): - fragments = 3 - - A = np.zeros(fragments) - - uri = self.path("test_fragments_to_vacuum") - dom = tiledb.Domain(tiledb.Dim(domain=(0, 2), dtype=np.int64)) - att = tiledb.Attr(dtype=A.dtype) - schema = tiledb.ArraySchema(domain=dom, attrs=(att,)) - - tiledb.DenseArray.create(uri, schema) - - for fragment_idx in range(fragments): - with tiledb.DenseArray(uri, mode="w") as T: - T[fragment_idx : fragment_idx + 1] = fragment_idx - - fragment_info = PyFragmentInfo(uri, schema, False, tiledb.default_ctx()) - - expected_vacuum_uri = fragment_info.get_uri()[0] - - tiledb.consolidate( - uri, config=tiledb.Config(params={"sm.vacuum.mode": "fragments"}) - ) - - fragment_info = PyFragmentInfo(uri, schema, False, tiledb.default_ctx()) - - assert len(fragment_info.get_to_vacuum()) == 3 - assert fragment_info.get_to_vacuum()[0] == expected_vacuum_uri - - tiledb.vacuum(uri) - - fragment_info = PyFragmentInfo(uri, schema, False, tiledb.default_ctx()) - - assert len(fragment_info.get_to_vacuum()) == 0 - - @pytest.mark.skipif( - tiledb.libtiledb.version() < (2, 5, 0), - reason=( - "MBRs in FragmentInfo only available in " - "tiledb.libtiledb.version() < (2, 5, 0)" - ), - ) - @pytest.mark.parametrize("use_timestamps", [True, False]) - def test_get_mbr(self, use_timestamps): - fragments = 3 - - uri = self.path("test_get_mbr") - dom = tiledb.Domain(tiledb.Dim(domain=(0, 2), tile=fragments, dtype=np.int64)) - att = tiledb.Attr(dtype=np.uint64) - schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=True) - tiledb.Array.create(uri, schema) - - for fragi in range(fragments): - timestamp = fragi + 1 - with tiledb.open( - uri, mode="w", timestamp=timestamp if use_timestamps else None - ) as T: - T[np.array(range(0, fragi + 1))] = [fragi] * (fragi + 1) - - expected_mbrs = ((((0, 0),),), (((0, 1),),), (((0, 2),),)) - - py_fragment_info = PyFragmentInfo(uri, schema, True, tiledb.default_ctx()) - assert py_fragment_info.get_mbrs() == expected_mbrs - - array_fragments = tiledb.array_fragments(uri) - with pytest.raises(AttributeError) as excinfo: - array_fragments.mbrs - assert "retrieving minimum bounding rectangles is disabled" in str( - excinfo.value - ) - - with self.assertRaises(AttributeError): - array_fragments[0].mbrs - assert "retrieving minimum bounding rectangles is disabled" in str( - excinfo.value - ) - - array_fragments = tiledb.array_fragments(uri, include_mbrs=True) - assert array_fragments.mbrs == expected_mbrs - assert array_fragments[0].mbrs == expected_mbrs[0] - assert array_fragments[1].mbrs == expected_mbrs[1] - assert array_fragments[2].mbrs == expected_mbrs[2] - - @pytest.mark.skipif( - tiledb.libtiledb.version() < (2, 5, 0), - reason=( - "MBRs in FragmentInfo only available in " - "tiledb.libtiledb.version() < (2, 5, 0)" - ), - ) - @pytest.mark.parametrize("use_timestamps", [True, False]) - def test_get_var_sized_dim_mbrs(self, use_timestamps): - fragments = 3 - - uri = self.path("test_get_var_sized_dim_mbrs") - dom = tiledb.Domain(tiledb.Dim(dtype="ascii")) - att = tiledb.Attr(dtype=np.uint64) - schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=True) - tiledb.Array.create(uri, schema) - - for fragi in range(fragments): - timestamp = fragi + 1 - with tiledb.open( - uri, mode="w", timestamp=timestamp if use_timestamps else None - ) as T: - coords = [chr(i) * (fragi + 1) for i in range(97, fragi + 98)] - T[np.array(coords)] = [fragi] * (fragi + 1) - - expected_mbrs = (((("a", "a"),),), ((("aa", "bb"),),), ((("aaa", "ccc"),),)) - - py_fragment_info = PyFragmentInfo(uri, schema, True, tiledb.default_ctx()) - assert py_fragment_info.get_mbrs() == expected_mbrs - - array_fragments = tiledb.array_fragments(uri) - with pytest.raises(AttributeError) as excinfo: - array_fragments.mbrs - assert "retrieving minimum bounding rectangles is disabled" in str( - excinfo.value - ) - - with self.assertRaises(AttributeError): - array_fragments[0].mbrs - assert "retrieving minimum bounding rectangles is disabled" in str( - excinfo.value - ) - - array_fragments = tiledb.array_fragments(uri, include_mbrs=True) - assert array_fragments.mbrs == expected_mbrs - assert array_fragments[0].mbrs == expected_mbrs[0] - assert array_fragments[1].mbrs == expected_mbrs[1] - assert array_fragments[2].mbrs == expected_mbrs[2] - - -class CreateArrayFromFragmentsTest(DiskTestCase): - @pytest.mark.skipif( - sys.platform == "win32", reason="VFS.copy() does not run on windows" - ) - @pytest.mark.parametrize("use_timestamps", [True, False]) - def test_create_array_from_fragments(self, use_timestamps): - dshape = (1, 3) - num_frags = 10 - - def create_array(target_path, dshape): - dom = tiledb.Domain(tiledb.Dim(domain=dshape, tile=len(dshape))) - att = tiledb.Attr(dtype="int64") - schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=True) - tiledb.libtiledb.Array.create(target_path, schema) - - def write_fragments(target_path, dshape, num_frags): - for i in range(1, num_frags + 1): - with tiledb.open( - target_path, "w", timestamp=i if use_timestamps else None - ) as A: - A[[1, 2, 3]] = np.random.rand(dshape[1]) - - src_path = self.path("test_create_array_from_fragments_src") - dst_path = self.path("test_create_array_from_fragments_dst") - - ts = tuple((t, t) for t in range(1, 11)) - - create_array(src_path, dshape) - write_fragments(src_path, dshape, num_frags) - frags = tiledb.FragmentInfoList(src_path) - assert len(frags) == 10 - if use_timestamps: - assert frags.timestamp_range == ts - - if use_timestamps: - tiledb.create_array_from_fragments(src_path, dst_path, (3, 6)) - else: - tiledb.create_array_from_fragments( - src_path, - dst_path, - (frags.timestamp_range[2][0], frags.timestamp_range[5][1]), - ) - - frags = tiledb.FragmentInfoList(dst_path) - assert len(frags) == 4 - if use_timestamps: - assert frags.timestamp_range == ts[2:6] - - -class CopyFragmentsToExistingArrayTest(DiskTestCase): - @pytest.mark.skipif( - sys.platform == "win32", reason="VFS.copy() does not run on windows" - ) - def test_copy_fragments_to_existing_array(self): - def create_array(target_path, dshape): - dom = tiledb.Domain(tiledb.Dim(domain=dshape, tile=len(dshape))) - att = tiledb.Attr(dtype="int64") - schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=True) - tiledb.libtiledb.Array.create(target_path, schema) - - def write_fragments(target_path, dshape, num_frags, ts_start=1): - for i in range(ts_start, ts_start + num_frags): - with tiledb.open(target_path, "w", timestamp=i) as A: - A[[1, 2, 3]] = np.random.rand(dshape[1]) - - tiledb.VFS() - - src_dshape = (1, 3) - src_num_frags = 10 - src_path = self.path("test_copy_fragments_to_existing_array_src") - create_array(src_path, src_dshape) - write_fragments(src_path, src_dshape, src_num_frags) - - dst_dshape = (1, 3) - dst_num_frags = 10 - dst_path = self.path("test_copy_fragments_to_existing_array_dst") - create_array(dst_path, dst_dshape) - write_fragments(dst_path, dst_dshape, dst_num_frags, 11) - - ts = tuple((t, t) for t in range(1, 21)) - - frags = tiledb.array_fragments(dst_path) - assert len(frags) == 10 - assert frags.timestamp_range == ts[10:] - - tiledb.copy_fragments_to_existing_array(src_path, dst_path, (3, 6)) - - frags = tiledb.FragmentInfoList(dst_path) - assert len(frags) == 14 - assert frags.timestamp_range == ts[2:6] + ts[10:] - - @pytest.mark.skipif( - sys.platform == "win32", reason="VFS.copy() does not run on windows" - ) - def test_copy_fragments_to_existing_array_mismatch(self): - def create_array(target_path, attr_type): - dom = tiledb.Domain(tiledb.Dim(domain=(1, 3), tile=3)) - att = tiledb.Attr(dtype=attr_type) - schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=True) - tiledb.libtiledb.Array.create(target_path, schema) - - def write_fragments(target_path): - for i in range(10): - with tiledb.open(target_path, "w") as A: - A[[1, 2, 3]] = np.random.rand(3) - - src_path = self.path("test_copy_fragments_to_existing_array_evolved_src") - create_array(src_path, "int64") - write_fragments(src_path) - - dst_path = self.path("test_copy_fragments_to_existing_array_evolved_dst") - create_array(dst_path, "int32") - write_fragments(dst_path) - - with self.assertRaises(tiledb.TileDBError): - tiledb.copy_fragments_to_existing_array(src_path, dst_path, (3, 6)) - - @pytest.mark.skipif( - sys.platform == "win32", reason="VFS.copy() does not run on windows" - ) - def test_copy_fragments_to_existing_array_evolved(self): - def create_array(target_path): - dom = tiledb.Domain(tiledb.Dim(domain=(1, 3), tile=3)) - att = tiledb.Attr(dtype="int64") - schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=True) - tiledb.libtiledb.Array.create(target_path, schema) - - def write_fragments(target_path): - for i in range(10): - with tiledb.open(target_path, "w") as A: - A[[1, 2, 3]] = np.random.rand(3) - - src_path = self.path("test_copy_fragments_to_existing_array_evolved_src") - create_array(src_path) - write_fragments(src_path) - - dst_path = self.path("test_copy_fragments_to_existing_array_evolved_dst") - create_array(dst_path) - write_fragments(dst_path) - - ctx = tiledb.default_ctx() - se = tiledb.ArraySchemaEvolution(ctx) - se.add_attribute(tiledb.Attr("a2", dtype=np.float64)) - se.array_evolve(src_path) - - with self.assertRaises(tiledb.TileDBError): - tiledb.copy_fragments_to_existing_array(src_path, dst_path, (3, 6)) - - -class DeleteFragmentsTest(DiskTestCase): - @pytest.mark.parametrize("use_timestamps", [True, False]) - def test_delete_fragments(self, use_timestamps): - dshape = (1, 3) - num_writes = 10 - - def create_array(target_path, dshape): - dom = tiledb.Domain(tiledb.Dim(domain=dshape, tile=len(dshape))) - att = tiledb.Attr(dtype="int64") - schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=True) - tiledb.libtiledb.Array.create(target_path, schema) - - def write_fragments(target_path, dshape, num_writes): - for i in range(1, num_writes + 1): - with tiledb.open( - target_path, "w", timestamp=i if use_timestamps else None - ) as A: - A[[1, 2, 3]] = np.random.rand(dshape[1]) - - path = self.path("test_delete_fragments") - - ts = tuple((t, t) for t in range(1, 11)) - - create_array(path, dshape) - write_fragments(path, dshape, num_writes) - frags = tiledb.array_fragments(path) - assert len(frags) == 10 - if use_timestamps: - assert frags.timestamp_range == ts - - if use_timestamps: - tiledb.Array.delete_fragments(path, 3, 6) - else: - tiledb.Array.delete_fragments( - path, frags.timestamp_range[2][0], frags.timestamp_range[5][1] - ) - - frags = tiledb.array_fragments(path) - assert len(frags) == 6 - if use_timestamps: - assert frags.timestamp_range == ts[:2] + ts[6:] - - @pytest.mark.parametrize("use_timestamps", [True, False]) - def test_delete_fragments_with_schema_evolution(self, use_timestamps): - path = self.path("test_delete_fragments_with_schema_evolution") - dshape = (1, 3) - - dom = tiledb.Domain(tiledb.Dim(domain=dshape, tile=len(dshape))) - att = tiledb.Attr(name="a1", dtype=np.float64) - schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=True) - tiledb.libtiledb.Array.create(path, schema) - - ts1_data = np.random.rand(3) - if use_timestamps: - with tiledb.open(path, "w", timestamp=1) as A: - A[[1, 2, 3]] = ts1_data - else: - with tiledb.open(path, "w") as A: - A[[1, 2, 3]] = ts1_data - - ctx = tiledb.default_ctx() - se = tiledb.ArraySchemaEvolution(ctx) - se.add_attribute(tiledb.Attr("a2", dtype=np.float64)) - se.array_evolve(path) - - ts2_data = np.random.rand(3) - if use_timestamps: - with tiledb.open(path, "w", timestamp=2) as A: - A[[1, 2, 3]] = {"a1": ts2_data, "a2": ts2_data} - else: - with tiledb.open(path, "w") as A: - A[[1, 2, 3]] = {"a1": ts2_data, "a2": ts2_data} - - frags = tiledb.array_fragments(path) - assert len(frags) == 2 - - with tiledb.open(path, "r") as A: - assert_array_equal(A[:]["a1"], ts2_data) - assert_array_equal(A[:]["a2"], ts2_data) - - if use_timestamps: - tiledb.Array.delete_fragments(path, 2, 2) - else: - tiledb.Array.delete_fragments( - path, frags.timestamp_range[1][0], frags.timestamp_range[1][1] - ) - - frags = tiledb.array_fragments(path) - assert len(frags) == 1 - - with tiledb.open(path, "r") as A: - assert_array_equal(A[:]["a1"], ts1_data) - assert_array_equal(A[:]["a2"], [np.nan, np.nan, np.nan]) diff --git a/tiledb/tests/test_group.py b/tiledb/tests/test_group.py deleted file mode 100644 index 000120d879..0000000000 --- a/tiledb/tests/test_group.py +++ /dev/null @@ -1,896 +0,0 @@ -import base64 -import io -import os -import pathlib -import tarfile - -import numpy as np -import pytest -from hypothesis import given, settings -from hypothesis import strategies as st -from hypothesis.extra import numpy as st_np - -import tiledb - -from .common import DiskTestCase, assert_captured - -MIN_INT = np.iinfo(np.int64).min -MAX_INT = np.iinfo(np.int64).max -st_int = st.integers(min_value=MIN_INT, max_value=MAX_INT) -st_float = st.floats(allow_nan=False) -st_metadata = st.fixed_dictionaries( - { - "int": st_int, - "double": st_float, - "bytes": st.binary(), - "str": st.text(), - "list_int": st.lists(st_int), - "tuple_int": st.lists(st_int).map(tuple), - "list_float": st.lists(st_float), - "tuple_float": st.lists(st_float).map(tuple), - } -) -st_ndarray = st_np.arrays( - dtype=st.one_of( - st_np.integer_dtypes(endianness="<"), - st_np.unsigned_integer_dtypes(endianness="<"), - st_np.floating_dtypes(endianness="<", sizes=(32, 64)), - st_np.byte_string_dtypes(max_len=1), - st_np.unicode_string_dtypes(endianness="<", max_len=1), - st_np.datetime64_dtypes(endianness="<"), - ), - shape=st_np.array_shapes(min_dims=0, max_dims=3, min_side=0, max_side=10), -) - - -class GroupTestCase(DiskTestCase): - def setup_method(self): - super().setup_method() - - self.group1 = self.path("group1") - self.group2 = self.path("group1/group2") - self.group3 = self.path("group1/group3") - self.group4 = self.path("group1/group3/group4") - - tiledb.group_create(self.group1) - tiledb.group_create(self.group2) - tiledb.group_create(self.group3) - tiledb.group_create(self.group4) - - def is_group(self, uri): - return tiledb.object_type(uri) == "group" - - -class GroupTest(GroupTestCase): - def test_is_group(self): - self.assertTrue(self.is_group(self.group1)) - self.assertTrue(self.is_group(self.group2)) - self.assertTrue(self.is_group(self.group3)) - self.assertTrue(self.is_group(self.group4)) - - def test_walk_group(self): - if pytest.tiledb_vfs == "s3": - pytest.skip("S3 does not have empty directories") - - groups = [] - - def append_to_groups(path, obj): - groups.append((os.path.normpath(path), obj)) - - tiledb.walk(self.path(""), append_to_groups, order="preorder") - - groups.sort() - - self.assertTrue(groups[0][0].endswith(self.group1) and groups[0][1] == "group") - self.assertTrue(groups[1][0].endswith(self.group2) and groups[1][1] == "group") - self.assertTrue(groups[2][0].endswith(self.group3) and groups[2][1] == "group") - self.assertTrue(groups[3][0].endswith(self.group4) and groups[3][1] == "group") - - groups = [] - - tiledb.walk(self.path(""), append_to_groups, order="postorder") - - self.assertTrue(groups[0][0].endswith(self.group2) and groups[0][1] == "group") - self.assertTrue(groups[1][0].endswith(self.group4) and groups[1][1] == "group") - self.assertTrue(groups[2][0].endswith(self.group3) and groups[2][1] == "group") - self.assertTrue(groups[3][0].endswith(self.group1) and groups[3][1] == "group") - - def test_remove_group(self): - tiledb.remove(self.group3) - - self.assertFalse(self.is_group(self.group3)) - self.assertFalse(self.is_group(self.group4)) - - def test_move_group(self): - self.assertTrue(self.is_group(self.group2)) - tiledb.move(self.group2, self.group2 + "_moved") - self.assertFalse(self.is_group(self.group2)) - self.assertTrue(self.is_group(self.group2 + "_moved")) - - @pytest.mark.parametrize( - "int_data,flt_data,str_data,str_type", - ( - (-1, -1.5, "asdf", "STRING_UTF8"), - ([1, 2, 3], [1.5, 2.5, 3.5], b"asdf", "BLOB"), - ( - np.array([1, 2, 3]), - np.array([1.5, 2.5, 3.5]), - np.array(["x"]), - "STRING_UTF8", - ), - ), - ) - @pytest.mark.parametrize("use_timestamps", [True, False]) - def test_group_metadata( - self, int_data, flt_data, str_data, str_type, capfd, use_timestamps - ): - def values_equal(lhs, rhs): - if isinstance(lhs, np.ndarray): - if not isinstance(rhs, np.ndarray): - return False - return np.array_equal(lhs, rhs) - elif isinstance(lhs, (list, tuple)): - if not isinstance(rhs, (list, tuple)): - return False - return tuple(lhs) == tuple(rhs) - else: - return lhs == rhs - - grp_path = self.path("test_group_metadata") - tiledb.Group.create(grp_path) - - cfg = tiledb.Config({"sm.group.timestamp_end": 1} if use_timestamps else {}) - with tiledb.Group(grp_path, "w", cfg) as grp: - grp.meta["int"] = int_data - grp.meta["flt"] = flt_data - grp.meta["str"] = str_data - - cfg = tiledb.Config({"sm.group.timestamp_end": 1} if use_timestamps else {}) - with tiledb.Group(grp_path, "r", cfg) as grp: - assert len(grp.meta) == 3 - assert "int" in grp.meta - assert values_equal(grp.meta["int"], int_data) - assert "flt" in grp.meta - assert values_equal(grp.meta["flt"], flt_data) - assert "str" in grp.meta - assert values_equal(grp.meta["str"], str_data) - - grp.meta.dump() - metadata_dump = capfd.readouterr().out - - assert "Type: DataType.FLOAT" in metadata_dump - assert "Type: DataType.INT" in metadata_dump - assert f"Type: DataType.{str_type}" in metadata_dump - - cfg = tiledb.Config({"sm.group.timestamp_end": 2} if use_timestamps else {}) - with tiledb.Group(grp_path, "w", cfg) as grp: - del grp.meta["int"] - - cfg = tiledb.Config({"sm.group.timestamp_end": 2} if use_timestamps else {}) - with tiledb.Group(grp_path, "r", cfg) as grp: - assert len(grp.meta) == 2 - assert "int" not in grp.meta - - def test_group_members(self): - grp_path = self.path("test_group_members") - tiledb.Group.create(grp_path) - - grp = tiledb.Group(grp_path, "w") - assert os.path.basename(grp.uri) == os.path.basename(grp_path) - array_path = self.path("test_group_members_array") - domain = tiledb.Domain(tiledb.Dim(domain=(1, 8), tile=2)) - a1 = tiledb.Attr("val", dtype="f8") - schema = tiledb.ArraySchema(domain=domain, attrs=(a1,)) - tiledb.Array.create(array_path, schema) - - grp0_path = self.path("test_group_0") - tiledb.Group.create(grp0_path) - grp.add(grp0_path) - grp.add(array_path) - grp.close() - assert not grp.isopen - - grp.open("r") - assert grp.mode == "r" - assert grp.isopen - assert len(grp) == 2 - - type_to_basename = { - tiledb.Array: os.path.basename(array_path), - tiledb.Group: os.path.basename(grp0_path), - } - - assert grp[0].type in type_to_basename - assert type_to_basename[grp[0].type] == os.path.basename(grp[0].uri) - assert grp[0].name is None - - assert grp[1].type in type_to_basename - assert type_to_basename[grp[1].type] == os.path.basename(grp[1].uri) - assert grp[1].name is None - - assert "test_group_members GROUP" in repr(grp) - assert "|-- test_group_members_array ARRAY" in repr(grp) - assert "|-- test_group_0 GROUP" in repr(grp) - - grp.close() - - grp.open("w") - assert grp.mode == "w" - grp.remove(grp0_path) - grp.close() - - grp = tiledb.Group(grp_path, "r") - assert len(grp) == 1 - for mbr in grp: - assert os.path.basename(mbr.uri) == os.path.basename(array_path) - assert mbr.type == tiledb.Array - grp.close() - - def test_group_named_members(self): - grp_path = self.path("test_group_named_members") - tiledb.Group.create(grp_path) - - subgrp_path = self.path("subgroup") - tiledb.Group.create(subgrp_path) - - array_path = self.path("subarray") - domain = tiledb.Domain(tiledb.Dim(domain=(1, 8), tile=2)) - a1 = tiledb.Attr("val", dtype="f8") - schema = tiledb.ArraySchema(domain=domain, attrs=(a1,)) - tiledb.Array.create(array_path, schema) - - grp = tiledb.Group(grp_path, "w") - grp.add(subgrp_path, "subgroup") - grp.add(array_path, "subarray") - grp.close() - - grp.open("r") - assert os.path.basename(grp["subarray"].uri) == os.path.basename(array_path) - assert os.path.basename(grp["subgroup"].uri) == os.path.basename(subgrp_path) - - assert "dne" not in grp - - assert "subarray" in grp - assert grp["subarray"].type == tiledb.Array - - assert "subgroup" in grp - assert grp["subgroup"].type == tiledb.Group - - for mbr in grp: - if "subarray" in mbr.uri: - assert mbr.name == "subarray" - elif "subgroup" in mbr.uri: - assert mbr.name == "subgroup" - - grp.close() - - with tiledb.Group(grp_path, "w") as grp: # test __enter__ and __exit__ - del grp["subarray"] - grp.remove("subgroup") - - grp.open("r") - assert len(grp) == 0 - grp.close() - - def test_pass_context(self): - foo = self.path("foo") - bar = self.path("foo/bar") - - tiledb.group_create(foo) - tiledb.group_create(bar) - - ctx = tiledb.Ctx() - with tiledb.Group(foo, mode="w", ctx=ctx) as G: - G.add(bar, name="bar") - - with tiledb.Group(foo, mode="r", ctx=ctx) as G: - assert "bar" in G - - def test_relative(self): - group1 = self.path("group1") - group2_1 = self.path("group1/group2_1") - group2_2 = self.path("group1/group2_2") - - tiledb.group_create(group2_1) - tiledb.group_create(group2_2) - - with tiledb.Group(group1, mode="w") as G: - G.add(group2_1, name="group2_1", relative=False) - G.add("group2_2", name="group2_2", relative=True) - - with tiledb.Group(group1, mode="r") as G: - assert G.is_relative("group2_1") is False - assert G.is_relative("group2_2") is True - - def test_set_config(self): - group_uri = self.path("foo") - array_uri_1 = self.path("foo/a") - array_uri_2 = self.path("foo/b") - - tiledb.group_create(group_uri) - - dom = tiledb.Domain(tiledb.Dim("id", dtype="ascii")) - attr = tiledb.Attr("value", dtype=np.int64) - sch = tiledb.ArraySchema(domain=dom, attrs=(attr,), sparse=True) - - tiledb.Array.create(array_uri_1, sch) - tiledb.Array.create(array_uri_2, sch) - - cfg = tiledb.Config({"sm.group.timestamp_end": 2000}) - with tiledb.Group(group_uri, "w", cfg) as G: - G.add(name="a", uri="a", relative=True) - - cfg = tiledb.Config({"sm.group.timestamp_end": 3000}) - with tiledb.Group(group_uri, "w", cfg) as G: - G.add(name="b", uri="b", relative=True) - - ms = np.arange(1000, 4000, 1000, dtype=np.int64) - - for sz, m in enumerate(ms): - cfg = tiledb.Config({"sm.group.timestamp_end": m}) - - G = tiledb.Group(group_uri) - - # Cannot set config on open group - with self.assertRaises(ValueError): - G.set_config(cfg) - - G.close() - G.set_config(cfg) - - G.open() - assert len(G) == sz - G.close() - - for sz, m in enumerate(ms): - cfg = tiledb.Config({"sm.group.timestamp_end": m}) - - with tiledb.Group(group_uri, config=cfg) as G: - assert len(G) == sz - - def test_invalid_object_type(self): - path = self.path() - schema = tiledb.ArraySchema( - domain=tiledb.Domain(tiledb.Dim("id", dtype="ascii")), - attrs=(tiledb.Attr("value", dtype=np.int64),), - sparse=True, - ) - tiledb.Array.create(path, schema) - with self.assertRaises(tiledb.TileDBError): - tiledb.Group(uri=path, mode="w") - - def test_group_does_not_exist(self): - with self.assertRaises(tiledb.TileDBError): - tiledb.Group("does-not-exist") - - -class GroupMetadataTest(GroupTestCase): - @pytest.mark.parametrize( - "int_data,flt_data,str_data", - ( - (-1, -1.5, "asdf"), - ([1, 2, 3], [1.5, 2.5, 3.5], b"asdf"), - (np.array([1, 2, 3]), np.array([1.5, 2.5, 3.5]), np.array(["x"])), - ), - ) - @pytest.mark.parametrize("use_timestamps", [True, False]) - def test_group_metadata(self, int_data, flt_data, str_data, use_timestamps): - def values_equal(lhs, rhs): - if isinstance(lhs, np.ndarray): - if not isinstance(rhs, np.ndarray): - return False - return np.array_equal(lhs, rhs) - elif isinstance(lhs, (list, tuple)): - if not isinstance(rhs, (list, tuple)): - return False - return tuple(lhs) == tuple(rhs) - else: - return lhs == rhs - - grp_path = self.path("test_group_metadata") - tiledb.Group.create(grp_path) - - cfg = tiledb.Config({"sm.group.timestamp_end": 1} if use_timestamps else {}) - with tiledb.Group(grp_path, "w", cfg) as grp: - grp.meta["int"] = int_data - grp.meta["flt"] = flt_data - grp.meta["str"] = str_data - - cfg = tiledb.Config({"sm.group.timestamp_end": 1} if use_timestamps else {}) - with tiledb.Group(grp_path, "r", cfg) as grp: - assert grp.meta.keys() == {"int", "flt", "str"} - assert len(grp.meta) == 3 - assert "int" in grp.meta - assert values_equal(grp.meta["int"], int_data) - assert "flt" in grp.meta - assert values_equal(grp.meta["flt"], flt_data) - assert "str" in grp.meta - assert values_equal(grp.meta["str"], str_data) - - cfg = tiledb.Config({"sm.group.timestamp_end": 2} if use_timestamps else {}) - with tiledb.Group(grp_path, "w", cfg) as grp: - del grp.meta["int"] - - cfg = tiledb.Config({"sm.group.timestamp_end": 2} if use_timestamps else {}) - with tiledb.Group(grp_path, "r", cfg) as grp: - assert len(grp.meta) == 2 - assert "int" not in grp.meta - - def assert_equal_md_values(self, written_value, read_value): - if isinstance(written_value, np.ndarray): - self.assertIsInstance(read_value, np.ndarray) - self.assertEqual(read_value.dtype, written_value.dtype) - np.testing.assert_array_equal(read_value, written_value) - elif not isinstance(written_value, (list, tuple)): - assert read_value == written_value - # we don't round-trip perfectly sequences - elif len(written_value) == 1: - # sequences of length 1 are read as a single scalar element - self.assertEqual(read_value, written_value[0]) - else: - # sequences of length != 1 are read as tuples - self.assertEqual(read_value, tuple(written_value)) - - def assert_metadata_roundtrip(self, tdb_meta, dict_meta): - for k, v in dict_meta.items(): - # test __contains__ - self.assertTrue(k in tdb_meta) - # test __getitem__ - self.assert_equal_md_values(v, tdb_meta[k]) - # test get - self.assert_equal_md_values(v, tdb_meta.get(k)) - - # test __contains__, __getitem__, get for non-key - non_key = str(object()) - self.assertFalse(non_key in tdb_meta) - with self.assertRaises(KeyError): - tdb_meta[non_key] - self.assertIsNone(tdb_meta.get(non_key)) - self.assertEqual(tdb_meta.get(non_key, 42), 42) - - # test __len__ - self.assertEqual(len(tdb_meta), len(dict_meta)) - - # test __iter__() - self.assertEqual(set(tdb_meta), set(tdb_meta.keys())) - - # test keys() - self.assertSetEqual(set(tdb_meta.keys()), set(dict_meta.keys())) - - # test values() and items() - read_values = list(tdb_meta.values()) - read_items = list(tdb_meta.items()) - self.assertEqual(len(read_values), len(read_items)) - for (item_key, item_value), value in zip(read_items, read_values): - self.assertTrue(item_key in dict_meta) - self.assert_equal_md_values(dict_meta[item_key], item_value) - self.assert_equal_md_values(dict_meta[item_key], value) - - def assert_not_implemented_methods(self, tdb_meta): - with self.assertRaises(NotImplementedError): - tdb_meta.setdefault("nokey", "hello!") - with self.assertRaises(NotImplementedError): - tdb_meta.pop("nokey", "hello!") - with self.assertRaises(NotImplementedError): - tdb_meta.popitem() - with self.assertRaises(NotImplementedError): - tdb_meta.clear() - - def test_errors(self): - path = self.path("test_errors") - - tiledb.Group.create(path) - - grp = tiledb.Group(path, "w") - grp.close() - - # can't read from a closed array - grp.open("r") - grp.close() - with self.assertRaises(tiledb.TileDBError): - grp.meta["x"] - - grp.open("r") - # can't write to a mode='r' array - with self.assertRaises(tiledb.TileDBError): - grp.meta["invalid_write"] = 1 - - # missing key raises KeyError - with self.assertRaises(KeyError): - grp.meta["xyz123nokey"] - - self.assert_not_implemented_methods(grp.meta) - grp.close() - - # test invalid input - grp.open("w") - # keys must be strings - with self.assertRaises(TypeError): - grp.meta[123] = 1 - - # # can't write an int > typemax(Int64) - with self.assertRaises(OverflowError): - grp.meta["bigint"] = MAX_INT + 1 - - # can't write str list - with self.assertRaises(TypeError): - grp.meta["str_list"] = ["1", "2.1"] - - # can't write str tuple - with self.assertRaises(TypeError): - grp.meta["mixed_list"] = ("1", "2.1") - - # can't write objects - with self.assertRaises(TypeError): - grp.meta["object"] = object() - - self.assert_not_implemented_methods(grp.meta) - grp.close() - - def test_null(self): - path = self.path() - tiledb.Group.create(path) - - grp = tiledb.Group(path, "w") - grp.meta["empty_byte"] = b"" - grp.meta["null_byte"] = b"\x00" - grp.meta["zero"] = "xxx" - grp.close() - - grp = tiledb.Group(path, "r") - assert grp.meta["empty_byte"] == b"" - assert grp.meta["null_byte"] == b"\x00" - assert grp.meta["zero"] == "xxx" - grp.close() - - @given(st_metadata) - @settings(deadline=None) - def test_basic(self, test_vals): - path = self.path() - tiledb.Group.create(path) - - grp = tiledb.Group(path, "w") - grp.meta.update(test_vals) - grp.close() - - grp = tiledb.Group(path, "r") - self.assert_metadata_roundtrip(grp.meta, test_vals) - grp.close() - - # test a 1 MB blob - blob = np.random.rand(int((1024**2) / 8)).tobytes() - grp = tiledb.Group(path, "w") - test_vals["bigblob"] = blob - grp.meta["bigblob"] = blob - grp.close() - - grp = tiledb.Group(path, "r") - self.assert_metadata_roundtrip(grp.meta, test_vals) - grp.close() - - # test del key - grp = tiledb.Group(path, "w") - del test_vals["bigblob"] - del grp.meta["bigblob"] - grp.close() - - grp = tiledb.Group(path, "r") - self.assert_metadata_roundtrip(grp.meta, test_vals) - grp.close() - - # test update - grp = tiledb.Group(path, "w") - test_vals.update(foo="bar", double=3.14) - grp.meta.update(foo="bar", double=3.14) - grp.close() - - grp = tiledb.Group(path, "r") - self.assert_metadata_roundtrip(grp.meta, test_vals) - grp.close() - - @given(st_metadata, st_ndarray) - @settings(deadline=None) - def test_numpy(self, test_vals, ndarray): - test_vals["ndarray"] = ndarray - - path = self.path() - tiledb.Group.create(path) - - grp = tiledb.Group(path, "w") - grp.meta.update(test_vals) - grp.close() - - grp = tiledb.Group(path, "r") - self.assert_metadata_roundtrip(grp.meta, test_vals) - grp.close() - - grp = tiledb.Group(path, "w") - grp.meta["ndarray"] = 42 - test_vals["ndarray"] = 42 - grp.close() - - grp = tiledb.Group(path, "r") - self.assert_metadata_roundtrip(grp.meta, test_vals) - grp.close() - - # test resetting a key with a non-ndarray value to a ndarray value - grp = tiledb.Group(path, "w") - grp.meta["bytes"] = ndarray - test_vals["bytes"] = ndarray - grp.close() - - grp = tiledb.Group(path, "r") - self.assert_metadata_roundtrip(grp.meta, test_vals) - grp.close() - - grp = tiledb.Group(path, "w") - del grp.meta["ndarray"] - del test_vals["ndarray"] - grp.close() - - grp = tiledb.Group(path, "r") - self.assert_metadata_roundtrip(grp.meta, test_vals) - grp.close() - - grp = tiledb.Group(path, "w") - test_vals.update(ndarray=np.stack([ndarray, ndarray]), transp=ndarray.T) - grp.meta.update(ndarray=np.stack([ndarray, ndarray]), transp=ndarray.T) - grp.close() - - grp = tiledb.Group(path, "r") - self.assert_metadata_roundtrip(grp.meta, test_vals) - grp.close() - - @pytest.mark.parametrize("use_timestamps", [True, False]) - def test_consolidation_and_vac(self, use_timestamps): - vfs = tiledb.VFS() - path = self.path("test_consolidation_and_vac") - tiledb.Group.create(path) - - cfg = tiledb.Config({"sm.group.timestamp_end": 1} if use_timestamps else {}) - with tiledb.Group(path, "w", cfg) as grp: - grp.meta["meta"] = 1 - - cfg = tiledb.Config({"sm.group.timestamp_end": 2} if use_timestamps else {}) - with tiledb.Group(path, "w", cfg) as grp: - grp.meta["meta"] = 2 - - cfg = tiledb.Config({"sm.group.timestamp_end": 3} if use_timestamps else {}) - with tiledb.Group(path, "w", cfg) as grp: - grp.meta["meta"] = 3 - - meta_path = pathlib.Path(path) / "__meta" - assert len(vfs.ls(meta_path)) == 3 - - tiledb.Group.consolidate_metadata(path, cfg) - tiledb.Group.vacuum_metadata(path, cfg) - - assert len(vfs.ls(meta_path)) == 1 - - @pytest.mark.skipif( - tiledb.libtiledb.version() < (2, 18, 0), - reason="Group consolidation and vacuuming not available < 2.18", - ) - def test_consolidation_and_vac_no_config(self): - vfs = tiledb.VFS() - path = self.path("test_consolidation_and_vac") - tiledb.Group.create(path) - - with tiledb.Group(path, "w") as grp: - grp.meta["meta"] = 1 - - with tiledb.Group(path, "w") as grp: - grp.meta["meta"] = 2 - - with tiledb.Group(path, "w") as grp: - grp.meta["meta"] = 3 - - meta_path = pathlib.Path(path) / "__meta" - assert len(vfs.ls(meta_path)) == 3 - - tiledb.Group.consolidate_metadata(path) - tiledb.Group.vacuum_metadata(path) - - assert len(vfs.ls(meta_path)) == 1 - - def test_string_metadata(self, capfd): - # this test ensures that string metadata is correctly stored and - # retrieved from the metadata store. It also tests that the metadata - # dump method works correctly for string metadata. - uri = self.path("test_ascii_metadata") - tiledb.Group.create(uri) - - grp = tiledb.Group(uri, "w") - grp.meta["abc"] = "xyz" - grp.close() - - grp = tiledb.Group(uri, "r") - assert grp.meta["abc"] == "xyz" - grp.meta.dump() - assert_captured(capfd, "Type: DataType.STRING_UTF8") - grp.close() - - def test_array_or_list_of_strings_metadata_error(self): - # this test ensures that an error is raised when trying to store - # an array or list of strings as metadata in a group. - # numpy arrays of single characters are supported since we don't need - # any extra offset information to retrieve them. - uri = self.path("test_ascii_metadata") - tiledb.Group.create(uri) - - grp = tiledb.Group(uri, "w") - with pytest.raises(TypeError) as exc: - grp.meta["abc"] = ["x", "1"] - assert "Unsupported item type" in str(exc.value) - - with pytest.raises(TypeError) as exc: - grp.meta["abc"] = ["foo", "foofoo"] - - with pytest.raises(TypeError) as exc: - grp.meta["abc"] = np.array(["foo", "12345"]) - - grp.meta["abc"] = np.array(["1", "2", "3", "f", "o", "o"], dtype="U1") - grp.close() - - grp = tiledb.Group(uri, "r") - self.assert_metadata_roundtrip( - grp.meta, {"abc": np.array(["1", "2", "3", "f", "o", "o"], dtype="U1")} - ) - grp.close() - - grp = tiledb.Group(uri, "w") - grp.meta["abc"] = np.array(["T", "i", "l", "e", "D", "B", "!"], dtype="S1") - grp.close() - - grp = tiledb.Group(uri, "r") - self.assert_metadata_roundtrip( - grp.meta, - {"abc": np.array([b"T", b"i", b"l", b"e", b"D", b"B", b"!"], dtype="S1")}, - ) - grp.close() - - def test_bytes_metadata(self, capfd): - # this test ensures that bytes metadata is correctly stored and - # retrieved from the metadata store. It also tests that the metadata - # dump method works correctly for bytes metadata. - path = self.path() - tiledb.Group.create(path) - - grp = tiledb.Group(path, "w") - grp.meta["bytes"] = b"blob" - grp.close() - - grp = tiledb.Group(path, "r") - assert grp.meta["bytes"] == b"blob" - grp.meta.dump() - assert_captured(capfd, "Type: DataType.BLOB") - grp.close() - - def test_group_metadata_backwards_compat(self): - # This test ensures that metadata written with the TileDB-Py 0.32.3 - # will be read correctly in the future versions. - - # === The following code creates a group with metadata using the current version of TileDB-Py === - path_new = self.path("new_group") - tiledb.Group.create(path_new) - group = tiledb.Group(path_new, "w") - - # python primitive types - group.meta["python_int"] = -1234 - group.meta["python_float"] = 3.14 - group.meta["python_str"] = "hello" - group.meta["python_bytes"] = b"hello" - group.meta["python_bool"] = False - - # numpy primitive types - group.meta["numpy_int"] = np.int64(-93) - group.meta["numpy_uint"] = np.uint64(42) - group.meta["numpy_float64"] = np.float64(3.14) - group.meta["numpy_bytes"] = np.bytes_("hello") - group.meta["numpy_str"] = np.str_("hello") - group.meta["numpy_bool"] = np.bool_(False) - - # lists/tuples - group.meta["list_int"] = [7] - group.meta["tuple_int"] = (7,) - group.meta["list_ints"] = [1, -2, 3] - group.meta["tuple_ints"] = (1, 2, 3) - group.meta["list_float"] = [1.1] - group.meta["tuple_float"] = (1.1,) - group.meta["list_floats"] = [1.1, 2.2, 3.3] - group.meta["tuple_floats"] = (1.1, 2.2, 3.3) - group.meta["list_empty"] = [] - group.meta["tuple_empty"] = () - - # numpy arrays - group.meta["numpy_int"] = np.array([-11], dtype=np.int64) - group.meta["numpy_ints"] = np.array([1, -2, 3], dtype=np.int64) - group.meta["numpy_uint"] = np.array([22], dtype=np.uint64) - group.meta["numpy_uints"] = np.array([1, 2, 3], dtype=np.uint64) - group.meta["numpy_float"] = np.array([3.14], dtype=np.float64) - group.meta["numpy_floats"] = np.array([1.1, 2.2, 3.3], dtype=np.float64) - group.meta["numpy_byte"] = np.array([b"hello"], dtype="S5") - group.meta["numpy_str"] = np.array(["hello"], dtype="U5") - group.meta["numpy_bool"] = np.array([True, False, True]) - - group.close() - # === End of the code that creates the group with metadata === - - # The following commented out code was used to generate the base64 encoded string of the group - # from the TileDB-Py 0.32.3 after creating the group with metadata in the exact same way as above. - ''' - # Compress the contents of the group folder to tgz - with tarfile.open("test.tar.gz", "w:gz") as tar: - with os.scandir(path_new) as entries: - for entry in entries: - tar.add(entry.path, arcname=entry.name) - - # Read the .tgz file and encode it to base64 - with open("test.tar.gz", 'rb') as f: - s = base64.encodebytes(f.read()) - - # Print the base64 encoded string - group_tgz = f"""{s.decode():>32}""" - print(group_tgz) - ''' - - # The following base64 encoded string is the contents of the group folder compressed - # to a tgz file using TileDB-Py 0.32.3. - group_tgz = b"""H4sICO/+G2cC/3Rlc3QudGFyANPT19N3CEis8EhNTEktYqAJMIAAXLSBgbEJgg0SNzQwMjRiUKhg - oAMoLS5JLAJazzAygZGFQm5JZm6qraG5kaWFhbmlhbGekaGphbGlJRfDKBj2ID4+N7UkUZ+mdoAy - tbmpKYQ2g9AGRqh53tDE3MDM3Nzc2NQcmP8NDc3NGRRM6Zn/E9Mzi/GpAypLSxt+8a83KMp/Y8zy - 33C0/KdL+W+Otfy3NBot/kdS+R8fj4h/YPSj8UxTktOSjQxMjNPMzS0MDCxTjVLNTUwS01IMzMxM - zJMTicj/ZiYmuMp/QwNjM9Ty38jQAFhdKBjQM/+P0PJfDIhfMULYV1khNAsjTFYITDIygAQYQbKM - YBYDQv0xIEcAymdEEqtgbA1x9DtsIBATrJgRpRfwgC18R8GqqqXxD1gDJwZtnTTb5YbtE0YbprhD - 8y0KH7SwVJTnps9d9sorMOX8Met7M8+yMHzas+bz0rgbMet7z3b75kqb3mSdtisqonQnu8GrGvHI - 6WGxX/Jm+7UW7V45+8/OVSZ3+O+Ic/0Sloo+8OKG6hqutaun9NgfXjqDz9ftBZNBwLvXt6+fX94/ - ++EfK0X1S2nBpVv5jQ0cut7nS8T3/wn7rOpq5q9/Jn2XW8OhQ/frZTLrkycxHt1evlKvrtbsXeIX - 2dw33D0fd0yt5vqe8T/k3d3wtO4UI5Vm8yMvspXTJE+ozFY+13ZA7e+avDertDwP+b1mcjq0JPar - QLS26mvFLQH6D97dDbyZlx1b8X/ZHYmHWpqMjTP6QiVvrZX/3nsqxv3WwofHjtgmbk+YGnhC/U1D - v5+z0SvXZ5YfmXhYiw4Ynmi727rZteXvpZULJ/jvNikQV1/tuiM73XDytc2ZVu6PRcy4NN3Cuze9 - 0GJc1KHr+mXOAxexJaUFAv/kVgi/K+FaI+2wZfqOxoYWocQPGzNeG9h9edh+3DfBJMYzOKL2l+em - ezc0Hyq98xaQ8eT40PDoxpYX60KKnogs7Ht2d+cf9lm5m9pGy8fhDvRG+/+j/X+M9p+JqYGJ+WgD - cES0/0oyc1JTkuLTi/JLC/RKUpJok//xtP+w9P+NTUD9v9H232j5P1r+D0j5b2ZoYDZa/o+I8h9c - 8NN0AJiM8V8TA9PR8d9RMApGwSgYBaNgFIyCUTAKRsEooCYAAP1+F2wAKAAA""" - - # Ceate a new group by extracting the contents of the tgz file - path_original = self.path("original_group") - with tarfile.open(fileobj=io.BytesIO(base64.b64decode(group_tgz))) as tf: - try: - tf.extractall(path_original, filter="fully_trusted") - except TypeError: - tf.extractall(path_original) - - # Open both the original and the new group and compare the metadata both in values and types - group_original = tiledb.Group(path_original, "r") - group_new = tiledb.Group(path_new, "r") - - self.assert_metadata_roundtrip(group_new.meta, group_original.meta) - - group_original.close() - group_new.close() - - def test_group_metadata_new_types(self): - # This kind of data was not supported for TileDB-Py <= 0.32.3 - path_new = self.path("new_group") - - tiledb.Group.create(path_new) - group = tiledb.Group(path_new, "w") - test_vals = { - "int64": np.array(-1111, dtype=np.int64), - "uint64": np.array(2, dtype=np.uint64), - "float64": np.array(3.14, dtype=np.float64), - "bool": np.array(True, dtype=bool), - "str": np.array(["a", "b", "c"], dtype="S"), - "unicode": np.array(["a", "b", "c"], dtype="U"), - "bytes": np.array([b"a", b"b", b"c"]), - "datetime": np.array( - [np.datetime64("2021-01-01"), np.datetime64("2021-01-02")] - ), - } - group.meta.update(test_vals) - group.close() - - group = tiledb.Group(path_new, "r") - self.assert_metadata_roundtrip(group.meta, test_vals) - group.close() diff --git a/tiledb/tests/test_hypothesis.py b/tiledb/tests/test_hypothesis.py deleted file mode 100644 index 6494f420da..0000000000 --- a/tiledb/tests/test_hypothesis.py +++ /dev/null @@ -1,62 +0,0 @@ -import time - -import hypothesis as hp -import hypothesis.strategies as st -import numpy as np -import pytest - -import tiledb - -from .common import has_pandas - -pd = pytest.importorskip("pandas") -tm = pd._testing - - -@pytest.mark.skipif(not has_pandas(), reason="pandas>=1.0,<3.0 not installed") -@pytest.mark.parametrize("mode", ["np", "df"]) -@hp.settings(deadline=None, verbosity=hp.Verbosity.verbose) -@hp.given(st.binary()) -def test_bytes_npdf(checked_path, mode, data): - start = time.time() - - uri = "mem://" + checked_path.path() - hp.note(f"!!! path '{uri}' time: {time.time() - start}") - - array = np.array([data], dtype="S0") - - start_ingest = time.time() - if mode == "np": - with tiledb.from_numpy(uri, array) as A: - pass - else: - series = pd.Series(array) - df = pd.DataFrame({"": series}) - # NOTE: ctx required here for mem:// - tiledb.from_pandas(uri, df, sparse=False, ctx=tiledb.default_ctx()) - - hp.note(f"{mode} ingest time: {time.time() - start_ingest}") - - # DEBUG - tiledb.stats_enable() - tiledb.stats_reset() - # END DEBUG - - with tiledb.open(uri) as A: - if mode == "np": - np.testing.assert_array_equal(A.multi_index[:][""], array) - else: - tm.assert_frame_equal(A.df[:], df) - - hp.note(tiledb.stats_dump(print_out=False)) - - # DEBUG - tiledb.stats_disable() - - duration = time.time() - start - hp.note(f"!!! test_bytes_{mode} duration: {duration}") - if duration > 2: - # Hypothesis setup is (maybe) causing deadline exceeded errors - # https://github.com/TileDB-Inc/TileDB-Py/issues/1194 - # Set deadline=None and use internal timing instead. - pytest.fail(f"!!! {mode} function body duration exceeded 2s: {duration}") diff --git a/tiledb/tests/test_libtiledb.py b/tiledb/tests/test_libtiledb.py deleted file mode 100644 index bddefd68c9..0000000000 --- a/tiledb/tests/test_libtiledb.py +++ /dev/null @@ -1,3894 +0,0 @@ -# ruff: noqa: F811 - -import base64 -import gc -import io -import os -import pickle -import random -import sys -import tarfile -import time -import warnings -from collections import OrderedDict - -import numpy as np -import psutil -import pytest -from numpy.testing import assert_array_equal - -import tiledb -from tiledb.datatypes import DataType - -from .common import ( - DiskTestCase, - assert_captured, - assert_subarrays_equal, - assert_unordered_equal, - fx_sparse_cell_order, # noqa: F401 - has_pandas, - has_pyarrow, - rand_ascii, - rand_ascii_bytes, - rand_utf8, -) - - -@pytest.fixture(scope="class") -def test_incomplete_return_array(tmpdir_factory, request): - tmp_path = str(tmpdir_factory.mktemp("array")) - ncells = 20 - nvals = 10 - - data = np.array([rand_utf8(nvals - i % 2) for i in range(ncells)], dtype="O") - - dom = tiledb.Domain(tiledb.Dim(domain=(0, len(data) - 1), tile=len(data))) - att = tiledb.Attr(dtype=str, var=True) - - allows_duplicates = request.param - schema = tiledb.ArraySchema( - dom, (att,), sparse=True, allows_duplicates=allows_duplicates - ) - - coords = np.arange(ncells) - - tiledb.SparseArray.create(tmp_path, schema) - with tiledb.SparseArray(tmp_path, mode="w") as T: - T[coords] = data - - with tiledb.SparseArray(tmp_path, mode="r") as T: - assert_subarrays_equal(data, T[:][""]) - - return tmp_path - - -class VersionTest(DiskTestCase): - def test_libtiledb_version(self): - v = tiledb.libtiledb.version() - self.assertIsInstance(v, tuple) - self.assertTrue(len(v) == 3) - self.assertTrue(v[0] >= 1, "TileDB major version must be >= 1") - - def test_tiledbpy_version(self): - v = tiledb.version.version - self.assertIsInstance(v, str) - - v = tiledb.version() - self.assertIsInstance(v, tuple) - self.assertTrue(3 <= len(v) <= 5) - - -class ArrayTest(DiskTestCase): - def create_array_schema(self): - filters = tiledb.FilterList([tiledb.ZstdFilter(level=5)]) - domain = tiledb.Domain( - tiledb.Dim(domain=(1, 8), tile=2, filters=filters), - tiledb.Dim(domain=(1, 8), tile=2, filters=filters), - ) - a1 = tiledb.Attr("val", dtype="f8") - return tiledb.ArraySchema(domain=domain, attrs=(a1,)) - - def test_array_create(self): - config = tiledb.Config() - config["sm.consolidation.step_min_frag"] = 0 - config["sm.consolidation.steps"] = 1 - schema = self.create_array_schema() - - # persist array schema - tiledb.libtiledb.Array.create(self.path("foo"), schema) - - # these should be no-ops - # full signature - tiledb.consolidate(self.path("foo"), config=config) - # kw signature - tiledb.consolidate(uri=self.path("foo")) - - # load array in readonly mode - array = tiledb.libtiledb.Array(self.path("foo"), mode="r") - self.assertTrue(array.isopen) - self.assertEqual(array.schema, schema) - self.assertEqual(array.mode, "r") - self.assertEqual(array.uri, self.path("foo")) - - # test that we cannot consolidate an array in readonly mode - with self.assertRaises(tiledb.TileDBError): - array.consolidate(config=config) - - # we have not written anything, so the array is empty - self.assertIsNone(array.nonempty_domain()) - - array.reopen() - self.assertTrue(array.isopen) - - array.close() - self.assertEqual(array.isopen, False) - - with self.assertRaises(tiledb.TileDBError): - # cannot get schema from closed array - array.schema - - with self.assertRaises(tiledb.TileDBError): - # cannot re-open a closed array - array.reopen() - - def test_array_create_with_ctx(self): - schema = self.create_array_schema() - - with self.assertRaises(TypeError): - tiledb.libtiledb.Array.create(self.path("foo"), schema, ctx="foo") - - # persist array schema - tiledb.libtiledb.Array.create(self.path("foo"), schema, ctx=tiledb.Ctx()) - - @pytest.mark.skipif( - not (sys.platform == "win32" and tiledb.libtiledb.version() >= (2, 3, 0)), - reason="Shared network drive only on Win32", - ) - def test_array_create_on_shared_drive(self): - schema = self.create_array_schema() - uri = self.path(basename="foo", shared=True) - - tiledb.libtiledb.Array.create(uri, schema) - - # load array in readonly mode - array = tiledb.libtiledb.Array(uri, mode="r") - self.assertTrue(array.isopen) - self.assertEqual(array.schema, schema) - self.assertEqual(array.mode, "r") - self.assertEqual(array.uri, uri) - - # we have not written anything, so the array is empty - self.assertIsNone(array.nonempty_domain()) - - array.reopen() - self.assertTrue(array.isopen) - - array.close() - self.assertEqual(array.isopen, False) - - with self.assertRaises(tiledb.TileDBError): - # cannot get schema from closed array - array.schema - - with self.assertRaises(tiledb.TileDBError): - # cannot re-open a closed array - array.reopen() - - def test_array_create_encrypted(self): - config = tiledb.Config() - config["sm.consolidation.step_min_frags"] = 0 - config["sm.consolidation.steps"] = 1 - schema = self.create_array_schema() - key = "0123456789abcdeF0123456789abcdeF" - # persist array schema - tiledb.libtiledb.Array.create(self.path("foo"), schema, key=key) - - # check that we can open the array sucessfully - config = tiledb.Config() - config["sm.encryption_key"] = key - config["sm.encryption_type"] = "AES_256_GCM" - ctx = tiledb.Ctx(config=config) - - with tiledb.libtiledb.Array(self.path("foo"), mode="r", ctx=ctx) as array: - self.assertTrue(array.isopen) - self.assertEqual(array.schema, schema) - self.assertEqual(array.mode, "r") - with tiledb.open(self.path("foo"), mode="r", key=key, ctx=ctx) as array: - self.assertTrue(array.isopen) - self.assertEqual(array.schema, schema) - self.assertEqual(array.mode, "r") - - tiledb.consolidate(uri=self.path("foo"), ctx=tiledb.Ctx(config)) - - config = tiledb.Config() - config["sm.encryption_key"] = "0123456789abcdeF0123456789abcdeX" - config["sm.encryption_type"] = "AES_256_GCM" - ctx = tiledb.Ctx(config=config) - # check that opening the array with the wrong key fails: - with self.assertRaises(tiledb.TileDBError): - tiledb.libtiledb.Array(self.path("foo"), mode="r", ctx=ctx) - - config = tiledb.Config() - config["sm.encryption_key"] = "0123456789abcdeF0123456789abcde" - config["sm.encryption_type"] = "AES_256_GCM" - ctx = tiledb.Ctx(config=config) - # check that opening the array with the wrong key length fails: - with self.assertRaises(tiledb.TileDBError): - tiledb.libtiledb.Array(self.path("foo"), mode="r", ctx=ctx) - - config = tiledb.Config() - config["sm.encryption_key"] = "0123456789abcdeF0123456789abcde" - config["sm.encryption_type"] = "AES_256_GCM" - ctx = tiledb.Ctx(config=config) - # check that consolidating the array with the wrong key fails: - with self.assertRaises(tiledb.TileDBError): - tiledb.consolidate(self.path("foo"), config=config, ctx=ctx) - - # needs core fix in 2.2.4 - @pytest.mark.skipif( - (sys.platform == "win32" and tiledb.libtiledb.version() == (2, 2, 3)), - reason="Skip array_doesnt_exist test on Win32 / libtiledb 2.2.3", - ) - def test_array_doesnt_exist(self): - with self.assertRaises(tiledb.TileDBError): - tiledb.libtiledb.Array(self.path("foo"), mode="r") - - def test_create_schema_matches(self): - dims = (tiledb.Dim(domain=(0, 6), tile=2),) - dom = tiledb.Domain(*dims) - att = tiledb.Attr(dtype=np.byte) - - schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=True) - uri = self.path("s1") - with self.assertRaises(ValueError): - tiledb.DenseArray.create(uri, schema) - - dense_schema = tiledb.ArraySchema(domain=dom, attrs=(att,)) - uri = self.path("d1") - with self.assertRaises(ValueError): - tiledb.SparseArray.create(uri, dense_schema) - - class MySparseArray(tiledb.SparseArray): - pass - - with self.assertRaises(ValueError): - MySparseArray.create(uri, dense_schema) - - def test_nonempty_domain_scalar(self): - uri = self.path("test_nonempty_domain_scalar") - dims = tiledb.Dim(domain=(-10, 10), dtype=np.int64, tile=1) - schema = tiledb.ArraySchema( - tiledb.Domain(dims), attrs=[tiledb.Attr(dtype=np.int32)], sparse=True - ) - - tiledb.Array.create(uri, schema) - - with tiledb.open(uri, "w") as A: - A[-1] = 10 - A[1] = 11 - - with tiledb.open(uri, "r") as A: - ned = A.nonempty_domain() - assert_array_equal(ned, ((-1, 1),)) - assert isinstance(ned[0][0], int) - assert isinstance(ned[0][1], int) - - def test_nonempty_domain_empty_string(self): - uri = self.path("test_nonempty_domain_empty_string") - dims = [tiledb.Dim(dtype="ascii")] - schema = tiledb.ArraySchema(tiledb.Domain(dims), sparse=True) - tiledb.Array.create(uri, schema) - - with tiledb.open(uri, "r") as A: - assert_array_equal(A.nonempty_domain(), ((None, None),)) - - with tiledb.open(uri, "w") as A: - A[""] = None - - with tiledb.open(uri, "r") as A: - assert_array_equal(A.nonempty_domain(), ((b"", b""),)) - - def test_create_array_overwrite(self): - uri = self.path("test_create_array_overwrite") - dims = tiledb.Dim(domain=(0, 10), dtype=np.int64) - schema = tiledb.ArraySchema( - tiledb.Domain(dims), attrs=[tiledb.Attr(dtype=np.int32)], sparse=True - ) - - with pytest.warns(UserWarning, match="Overwrite set, but array does not exist"): - tiledb.Array.create(uri, schema, overwrite=True) - - with tiledb.open(uri, "w") as A: - A[0] = 1 - - with tiledb.open(uri, "r") as A: - assert A.nonempty_domain() == ((0, 0),) - - # cannot overwrite the array by default - with self.assertRaises(tiledb.TileDBError): - tiledb.Array.create(uri, schema) - - tiledb.Array.create(uri, schema, overwrite=True) - - # make the old array has been deleted and replaced - with tiledb.open(uri, "r") as A: - assert A.nonempty_domain() is None - - def test_upgrade_version(self): - tgz_sparse = b"""H4sIAJKNpWAAA+2aPW/TQBjHz2nTFlGJClUoAxIuA0ICpXf2vdhbGWBiYEIgihI7MRT1JVKairKh - qgNfgA2kDnwFVga+ABtfgE8AEwsS5/ROTUzBjWpbKv3/JPexLxc/l/59zz3PJc1lUjqUUiWEO7Ty - 0GqsPbxgnArmUymk71LmUc6JK8ofGiE724Oor4fyYqvXH/S2/trv5VqSbPzjPuMfyi18nCXRXG61 - NpNBVOZjMIH+XEip9fc9xaB/FaT6b/Q6681BNy7Lh/5/SM4n0l8JPf9pWQMaBfq3on4/etXa7qwl - m1EZz0Ge/p6X1V9wKaF/FdT1sWrOXxs77dhXLw//OiRtcNKuzvBspH+gjwVz7Yy07TqdhNTuzcw4 - OwtT0407qzM3Hi58vzZH7678cN99rl9f2ji40JZ77T0Wzb+JD/rdp8SZnfta2gcFx5LOfyY9xqXn - ByoIVeYqDJMu44GOyGHCeRIGKuHCF1HsRRGLaacl8jOHifM/z2M+8r9KOL3+zd56jo8J1n+rPxcC - 8b8KjvRnvlSh8rJXcRJ2Euor7gne8XgsJdVPhAoSFXZFogrWX6//aqg/p9C/Ck6vf6Hx3+rPmEL8 - r4IC9G+1nvWj55vJ1mC4k9CNBpkqImf+a7VFRn8phI/5XwVpUh+Yc9fYk+b/af9FMp7/27Zd51vc - brf3Y7c+e//BFeJ8IJfSG9hoYd9zUl9p/4sZX7ZN1xrdlXrquwYXcAEXx7s4ojbSOWXK2NtknBVy - Mmxc/GKsZ2781tifxj4xjj8Zu2Qc79sBgKopYP3v5u0Z5uX/7I/8z6ce9n8rwYaAhj6ukvE4Yttu - flz+5TbeE/JIt9vYUSO3Hs8Pwww4wxQw/3O/Msit/wXP1n9Sof6vhNH538i02ak+njyA/4kC9v+L - rP/N/q8UmP/VgPofLuDiXLg4AvU/MBSw/hdZ/5v1XxcCDOt/FaD+P98UMP+LrP/t7z8Uxe8/KgH1 - PwAAAAAAAAAAAAAAAAAAAAAAAHD2+Q18oX51AFAAAA==""" - - path = self.path("test_upgrade_version") - - with tarfile.open(fileobj=io.BytesIO(base64.b64decode(tgz_sparse))) as tf: - try: - tf.extractall(path, filter="fully_trusted") - except TypeError: - tf.extractall(path) - - with tiledb.open(path) as A: - assert A.schema.version == 5 - - A.upgrade_version() - - with tiledb.open(path) as A: - assert A.schema.version >= 15 - - @pytest.mark.parametrize("use_timestamps", [True, False]) - def test_array_delete_fragments(self, use_timestamps): - dshape = (1, 3) - num_writes = 10 - - def create_array(target_path, dshape): - dom = tiledb.Domain(tiledb.Dim(domain=dshape, tile=len(dshape))) - att = tiledb.Attr(dtype="int64") - schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=True) - tiledb.libtiledb.Array.create(target_path, schema) - - def write_fragments(target_path, dshape, num_writes): - for i in range(1, num_writes + 1): - with tiledb.open( - target_path, "w", timestamp=i if use_timestamps else None - ) as A: - A[[1, 2, 3]] = np.random.rand(dshape[1]) - - path = self.path("test_array_delete_fragments") - - ts = tuple((t, t) for t in range(1, 11)) - - create_array(path, dshape) - write_fragments(path, dshape, num_writes) - frags = tiledb.array_fragments(path) - assert len(frags) == 10 - if use_timestamps: - assert frags.timestamp_range == ts - - if use_timestamps: - tiledb.Array.delete_fragments(path, 3, 6) - else: - timestamps = [t[0] for t in tiledb.array_fragments(path).timestamp_range] - tiledb.Array.delete_fragments(path, timestamps[2], timestamps[5]) - - frags = tiledb.array_fragments(path) - assert len(frags) == 6 - if use_timestamps: - assert frags.timestamp_range == ts[:2] + ts[6:] - - def test_array_delete(self): - uri = self.path("test_array_delete") - data = np.random.rand(10) - - tiledb.from_numpy(uri, data) - - with tiledb.open(uri) as A: - assert_array_equal(A[:], data) - - assert tiledb.array_exists(uri) is True - - tiledb.Array.delete_array(uri) - - assert tiledb.array_exists(uri) is False - - @pytest.mark.skipif( - not has_pyarrow() or not has_pandas(), - reason="pyarrow>=1.0 and/or pandas>=1.0,<3.0 not installed", - ) - @pytest.mark.parametrize("sparse", [True, False]) - @pytest.mark.parametrize("pass_df", [True, False]) - def test_array_write_nullable(self, sparse, pass_df): - import pyarrow as pa - - uri = self.path("test_array_write_nullable") - dom = tiledb.Domain(tiledb.Dim("d", domain=(1, 5), dtype="int64")) - att1 = tiledb.Attr("a1", dtype="int8", nullable=True) - att2 = tiledb.Attr("a2", dtype="str", nullable=True) - schema = tiledb.ArraySchema(domain=dom, attrs=[att1, att2], sparse=sparse) - tiledb.Array.create(uri, schema) - - with tiledb.open(uri, "w") as A: - dims = pa.array([1, 2, 3, 4, 5]) - data1 = pa.array([1.0, 2.0, None, 0, 1.0]) - data2 = pa.array(["a", "b", None, None, "c"]) - if pass_df: - dims = dims.to_pandas() - data1 = data1.to_pandas() - data2 = data2.to_pandas() - - if sparse: - A[dims] = {"a1": data1, "a2": data2} - else: - A[:] = {"a1": data1, "a2": data2} - - with tiledb.open(uri, "r") as A: - expected_validity1 = [False, False, True, False, False] - assert_array_equal(A[:]["a1"].mask, expected_validity1) - assert_array_equal(A.df[:]["a1"].isna(), expected_validity1) - assert_array_equal(A.query(attrs=["a1"])[:]["a1"].mask, expected_validity1) - - expected_validity2 = [False, False, True, True, False] - assert_array_equal(A[:]["a2"].mask, expected_validity2) - assert_array_equal(A.df[:]["a2"].isna(), expected_validity2) - assert_array_equal(A.query(attrs=["a2"])[:]["a2"].mask, expected_validity2) - - with tiledb.open(uri, "w") as A: - dims = pa.array([1, 2, 3, 4, 5]) - data1 = pa.array([None, None, None, None, None]) - data2 = pa.array([None, None, None, None, None]) - if pass_df: - dims = dims.to_pandas() - data1 = data1.to_pandas() - data2 = data2.to_pandas() - - if sparse: - A[dims] = {"a1": data1, "a2": data2} - else: - A[:] = {"a1": data1, "a2": data2} - - with tiledb.open(uri, "r") as A: - expected_validity1 = [True, True, True, True, True] - assert_array_equal(A[:]["a1"].mask, expected_validity1) - assert_array_equal(A.df[:]["a1"].isna(), expected_validity1) - assert_array_equal(A.query(attrs=["a1"])[:]["a1"].mask, expected_validity1) - - expected_validity2 = [True, True, True, True, True] - assert_array_equal(A[:]["a2"].mask, expected_validity2) - assert_array_equal(A.df[:]["a2"].isna(), expected_validity2) - assert_array_equal(A.query(attrs=["a2"])[:]["a2"].mask, expected_validity2) - - -class DenseArrayTest(DiskTestCase): - def test_array_1d(self): - A = np.arange(1050) - - dom = tiledb.Domain(tiledb.Dim(domain=(0, 1049), tile=100, dtype=np.int64)) - att = tiledb.Attr(dtype=A.dtype) - schema = tiledb.ArraySchema(domain=dom, attrs=(att,)) - tiledb.DenseArray.create(self.path("foo"), schema) - - with tiledb.DenseArray(self.path("foo"), mode="r") as T: - self.assertEqual(len(A), len(T)) - self.assertEqual(A.ndim, T.ndim) - self.assertEqual(A.shape, T.shape) - - self.assertEqual(1, T.nattr) - self.assertEqual(A.dtype, T.attr(0).dtype) - self.assertEqual(T.dim(T.schema.domain.dim(0).name), T.dim(0)) - with self.assertRaises(ValueError): - T.dim(1.0) - - self.assertIsInstance(T.timestamp_range, tuple) - self.assertTrue(T.timestamp_range[1] > 0) - - # check empty array - B = T[:] - - self.assertEqual(A.shape, B.shape) - self.assertEqual(A.dtype, B.dtype) - self.assertIsNone(T.nonempty_domain()) - - with tiledb.DenseArray(self.path("foo"), mode="w") as T: - # check set array - T[:] = A - - read1_timestamp = -1 - with tiledb.DenseArray(self.path("foo"), mode="r") as T: - self.assertEqual(((0, 1049),), T.nonempty_domain()) - - # check timestamp - read1_timestamp = T.timestamp_range - self.assertTrue(read1_timestamp[1] > 0) - - # check slicing - assert_array_equal(A, np.array(T)) - assert_array_equal(A, T[:]) - assert_array_equal(A, T[...]) - assert_array_equal(A, T[slice(None)]) - assert_array_equal(A[:10], T[:10]) - assert_array_equal(A[10:20], T[10:20]) - assert_array_equal(A[-10:], T[-10:]) - - # ellipsis - assert_array_equal(A[:10, ...], T[:10, ...]) - assert_array_equal(A[10:50, ...], T[10:50, ...]) - assert_array_equal(A[-50:, ...], T[-50:, ...]) - assert_array_equal(A[..., :10], T[..., :10]) - assert_array_equal(A[..., 10:20], T[..., 10:20]) - assert_array_equal(A[..., -50:], T[..., -50:]) - - # across tiles - assert_array_equal(A[:150], T[:150]) - assert_array_equal(A[-250:], T[-250:]) - - # point index - self.assertEqual(A[0], T[0]) - self.assertEqual(A[-1], T[-1]) - - # point index with all index types - self.assertEqual(A[123], T[np.int8(123)]) - self.assertEqual(A[123], T[np.uint8(123)]) - self.assertEqual(A[123], T[np.int16(123)]) - self.assertEqual(A[123], T[np.uint16(123)]) - self.assertEqual(A[123], T[np.int64(123)]) - self.assertEqual(A[123], T[np.uint64(123)]) - self.assertEqual(A[123], T[np.int32(123)]) - self.assertEqual(A[123], T[np.uint32(123)]) - - # mixed-type slicing - # https://github.com/TileDB-Inc/TileDB-Py/issues/140 - self.assertEqual(A[0:1], T[0 : np.uint16(1)]) - self.assertEqual(A[0:1], T[np.int64(0) : 1]) - with self.assertRaises(IndexError): - # this is a consequence of NumPy promotion rules - self.assertEqual(A[0:1], T[np.uint64(0) : 1]) - - # basic step - assert_array_equal(A[:50:2], T[:50:2]) - assert_array_equal(A[:2:50], T[:2:50]) - assert_array_equal(A[10:-1:50], T[10:-1:50]) - - # indexing errors - with self.assertRaises(IndexError): - T[:, :] - with self.assertRaises(IndexError): - T[:, 50] - with self.assertRaises(IndexError): - T[50, :] - with self.assertRaises(IndexError): - T[0, 0] - - # check single ellipsis - with self.assertRaises(IndexError): - T[..., 1:5, ...] - - with tiledb.DenseArray(self.path("foo"), mode="w") as T: - # check partial assignment - B = np.arange(1e5, 2e5).astype(A.dtype) - T[190:310] = B[190:310] - - read2_timestamp = -1 - with tiledb.DenseArray(self.path("foo"), mode="r") as T: - assert_array_equal(A[:190], T[:190]) - assert_array_equal(B[190:310], T[190:310]) - assert_array_equal(A[310:], T[310:]) - - # test timestamps are updated - read2_timestamp = T.timestamp_range - self.assertTrue(read2_timestamp > read1_timestamp) - - def test_array_1d_set_scalar(self): - A = np.zeros(50) - - dom = tiledb.Domain(tiledb.Dim(domain=(0, 49), tile=50)) - att = tiledb.Attr(dtype=A.dtype) - schema = tiledb.ArraySchema(dom, (att,)) - - tiledb.DenseArray.create(self.path("foo"), schema) - - with tiledb.DenseArray(self.path("foo"), mode="w") as T: - T[:] = A - - with tiledb.DenseArray(self.path("foo"), mode="r") as T: - assert_array_equal(A, T[:]) - - with tiledb.DenseArray(self.path("foo"), mode="w") as T: - value = -1, 3, 10 - A[0], A[1], A[3] = value - T[0], T[1], T[3] = value - with tiledb.DenseArray(self.path("foo"), mode="r") as T: - assert_array_equal(A, T[:]) - - for value in (-1, 3, 10): - with tiledb.DenseArray(self.path("foo"), mode="w") as T: - A[5:25] = value - T[5:25] = value - with tiledb.DenseArray(self.path("foo"), mode="r") as T: - assert_array_equal(A, T[:]) - with tiledb.DenseArray(self.path("foo"), mode="w") as T: - A[:] = value - T[:] = value - with tiledb.DenseArray(self.path("foo"), mode="r") as T: - assert_array_equal(A, T[:]) - - def test_array_id_point_queries(self): - # TODO: handle queries like T[[2, 5, 10]] = ? - pass - - @pytest.mark.parametrize("dtype", ["u1", "u2", "u4", "u8", "i1", "i2", "i4", "i8"]) - def test_dense_index_dtypes(self, dtype): - path = self.path() - data = np.arange(0, 3).astype(dtype) - with tiledb.from_numpy(path, data): - pass - with tiledb.open(path) as B: - assert_array_equal(B[:], data) - - @pytest.mark.skipif( - tiledb.libtiledb.version() < (2, 10), - reason="TILEDB_BOOL introduced in libtiledb 2.10", - ) - def test_dense_index_bool(self): - path = self.path() - data = np.random.randint(0, 1, 10, dtype=bool) - with tiledb.from_numpy(path, data): - pass - with tiledb.open(path) as B: - assert_array_equal(B[:], data) - - def test_array_2d(self): - A = np.arange(10000).reshape((1000, 10)) - - dom = tiledb.Domain( - tiledb.Dim(domain=(0, 999), tile=100), tiledb.Dim(domain=(0, 9), tile=2) - ) - att = tiledb.Attr(dtype=A.dtype) - schema = tiledb.ArraySchema(dom, (att,)) - - tiledb.DenseArray.create(self.path("foo"), schema) - - with tiledb.DenseArray(self.path("foo"), mode="r") as T: - self.assertEqual(len(A), len(T)) - self.assertEqual(A.ndim, T.ndim) - self.assertEqual(A.shape, T.shape) - - self.assertEqual(1, T.nattr) - self.assertEqual(A.dtype, T.attr(0).dtype) - - # check that the non-empty domain is None - self.assertIsNone(T.nonempty_domain()) - - with tiledb.DenseArray(self.path("foo"), mode="w") as T: - # Set data - T[:] = A - - with tiledb.DenseArray(self.path("foo"), mode="r") as T: - assert_array_equal(A, T[:]) - - # check the non-empty domain spans the whole domain - self.assertEqual(((0, 999), (0, 9)), T.nonempty_domain()) - - # check array-like - assert_array_equal(A, np.array(T)) - - # slicing - assert_array_equal(A, T[:]) - assert_array_equal(A, T[...]) - assert_array_equal(A, T[slice(None)]) - - # slice first dimension - assert_array_equal(A[:10], T[:10]) - assert_array_equal(A[:10], T[:10]) - assert_array_equal(A[10:20], T[10:20]) - assert_array_equal(A[-10:], T[-10:]) - assert_array_equal(A[:10, :], T[:10, :]) - assert_array_equal(A[10:20, :], T[10:20, :]) - assert_array_equal(A[-10:, :], T[-10:, :]) - assert_array_equal(A[:10, ...], T[:10, ...]) - assert_array_equal(A[10:20, ...], T[10:20, ...]) - assert_array_equal(A[-10:, ...], T[-10:, ...]) - assert_array_equal(A[:10, :, ...], T[:10, :, ...]) - assert_array_equal(A[10:20, :, ...], T[10:20, :, ...]) - assert_array_equal(A[-10:, :, ...], T[-10:, :, ...]) - - # slice second dimension - assert_array_equal(A[:, :2], T[:, :2]) - assert_array_equal(A[:, 2:4], T[:, 2:4]) - assert_array_equal(A[:, -2:], T[:, -2:]) - assert_array_equal(A[..., :2], T[..., :2]) - assert_array_equal(A[..., 2:4], T[..., 2:4]) - assert_array_equal(A[..., -2:], T[..., -2:]) - assert_array_equal(A[:, ..., :2], T[:, ..., :2]) - assert_array_equal(A[:, ..., 2:4], T[:, ..., 2:4]) - assert_array_equal(A[:, ..., -2:], T[:, ..., -2:]) - - # slice both dimensions - assert_array_equal(A[:10, :2], T[:10, :2]) - assert_array_equal(A[10:20, 2:4], T[10:20, 2:4]) - assert_array_equal(A[-10:, -2:], T[-10:, -2:]) - - # slice across tile boundries - assert_array_equal(A[:110], T[:110]) - assert_array_equal(A[190:310], T[190:310]) - assert_array_equal(A[-110:], T[-110:]) - assert_array_equal(A[:110, :], T[:110, :]) - assert_array_equal(A[190:310, :], T[190:310, :]) - assert_array_equal(A[-110:, :], T[-110:, :]) - assert_array_equal(A[:, :3], T[:, :3]) - assert_array_equal(A[:, 3:7], T[:, 3:7]) - assert_array_equal(A[:, -3:], T[:, -3:]) - assert_array_equal(A[:110, :3], T[:110, :3]) - assert_array_equal(A[190:310, 3:7], T[190:310, 3:7]) - assert_array_equal(A[-110:, -3:], T[-110:, -3:]) - - # single row/col/item - assert_array_equal(A[0], T[0]) - assert_array_equal(A[-1], T[-1]) - assert_array_equal(A[:, 0], T[:, 0]) - assert_array_equal(A[:, -1], T[:, -1]) - self.assertEqual(A[0, 0], T[0, 0]) - self.assertEqual(A[-1, -1], T[-1, -1]) - - # too many indices - with self.assertRaises(IndexError): - T[:, :, :] - with self.assertRaises(IndexError): - T[0, :, :] - with self.assertRaises(IndexError): - T[:, 0, :] - with self.assertRaises(IndexError): - T[:, :, 0] - with self.assertRaises(IndexError): - T[0, 0, 0] - - # only single ellipsis allowed - with self.assertRaises(IndexError): - T[..., ...] - - with tiledb.DenseArray(self.path("foo"), mode="w") as T: - # check partial assignment - B = np.arange(10000, 20000).reshape((1000, 10)) - T[190:310, 3:7] = B[190:310, 3:7] - - with tiledb.DenseArray(self.path("foo"), mode="r") as T: - assert_array_equal(A[:190], T[:190]) - assert_array_equal(A[:, :3], T[:, :3]) - assert_array_equal(B[190:310, 3:7], T[190:310, 3:7]) - assert_array_equal(A[310:], T[310:]) - assert_array_equal(A[:, 7:], T[:, 7:]) - - @pytest.mark.skipif( - not (sys.platform == "win32" and tiledb.libtiledb.version() >= (2, 3, 0)), - reason="Shared network drive only on Win32", - ) - def test_array_1d_shared_drive(self): - A = np.zeros(50) - - dom = tiledb.Domain(tiledb.Dim(domain=(0, 49), tile=50)) - att = tiledb.Attr(dtype=A.dtype) - schema = tiledb.ArraySchema(dom, (att,)) - uri = self.path("foo", shared=True) - - tiledb.DenseArray.create(uri, schema) - - with tiledb.DenseArray(uri, mode="w") as T: - T[:] = A - - with tiledb.DenseArray(uri, mode="r") as T: - assert_array_equal(A, T[:]) - - with tiledb.DenseArray(uri, mode="w") as T: - value = -1, 3, 10 - A[0], A[1], A[3] = value - T[0], T[1], T[3] = value - with tiledb.DenseArray(uri, mode="r") as T: - assert_array_equal(A, T[:]) - - for value in (-1, 3, 10): - with tiledb.DenseArray(uri, mode="w") as T: - A[5:25] = value - T[5:25] = value - with tiledb.DenseArray(uri, mode="r") as T: - assert_array_equal(A, T[:]) - with tiledb.DenseArray(uri, mode="w") as T: - A[:] = value - T[:] = value - with tiledb.DenseArray(uri, mode="r") as T: - assert_array_equal(A, T[:]) - - def test_fixed_string(self): - a = np.array(["ab", "cd", "ef", "gh", "ij", "kl", "", "op"], dtype="|S2") - with tiledb.from_numpy(self.path("fixed_string"), a) as T: - with tiledb.open(self.path("fixed_string")) as R: - self.assertEqual(T.dtype, R.dtype) - self.assertEqual(R.attr(0).ncells, 2) - assert_array_equal(T, R) - - def test_ncell_int(self): - a = np.array([(1, 2), (3, 4), (5, 6)], dtype=[("", np.int16), ("", np.int16)]) - with tiledb.from_numpy(self.path("ncell_int16"), a) as T: - with tiledb.open(self.path("ncell_int16")) as R: - self.assertEqual(T.dtype, R.dtype) - self.assertEqual(R.attr(0).ncells, 2) - assert_array_equal(T, R) - assert_array_equal(T, R.multi_index[0:2][""]) - - @pytest.mark.parametrize("use_timestamps", [True, False]) - def test_open_with_timestamp(self, use_timestamps): - A = np.zeros(3) - - dom = tiledb.Domain(tiledb.Dim(domain=(0, 2), tile=3, dtype=np.int64)) - att = tiledb.Attr(dtype=A.dtype) - schema = tiledb.ArraySchema(domain=dom, attrs=(att,)) - tiledb.DenseArray.create(self.path("foo"), schema) - - # write - with tiledb.DenseArray(self.path("foo"), mode="w") as T: - T[:] = A - - with tiledb.DenseArray(self.path("foo"), mode="r") as T: - self.assertEqual(T[0], 0) - self.assertEqual(T[1], 0) - self.assertEqual(T[2], 0) - - if use_timestamps: - # sleep 200ms and write - time.sleep(0.2) - with tiledb.DenseArray(self.path("foo"), mode="w") as T: - T[0:1] = 1 - - if use_timestamps: - # sleep 200ms and write - time.sleep(0.2) - with tiledb.DenseArray(self.path("foo"), mode="w") as T: - T[1:2] = 2 - - frags = tiledb.array_fragments(self.path("foo")) - # timestamps are in the form of (start, end) for each fragment, with start == end, - # as we are not dealing with consolidated fragments. Let's simply read from 0 to the end timestamp. - read_timestamps = [(0, frag.timestamp_range[1]) for frag in frags] - - # read at first timestamp - with tiledb.DenseArray( - self.path("foo"), timestamp=read_timestamps[0], mode="r" - ) as T: - self.assertEqual(T[0], 0) - self.assertEqual(T[1], 0) - self.assertEqual(T[2], 0) - - # read at second timestamp - with tiledb.DenseArray( - self.path("foo"), timestamp=read_timestamps[1], mode="r" - ) as T: - self.assertEqual(T[0], 1) - self.assertEqual(T[1], 0) - self.assertEqual(T[2], 0) - - # read at third timestamp - with tiledb.DenseArray( - self.path("foo"), timestamp=read_timestamps[2], mode="r" - ) as T: - self.assertEqual(T[0], 1) - self.assertEqual(T[1], 2) - self.assertEqual(T[2], 0) - - @pytest.mark.parametrize("use_timestamps", [True, False]) - def test_open_timestamp_range(self, use_timestamps): - A = np.zeros(3) - path = self.path("open_timestamp_range") - - dom = tiledb.Domain(tiledb.Dim(domain=(0, 2), tile=3, dtype=np.int64)) - att = tiledb.Attr(dtype=A.dtype) - schema = tiledb.ArraySchema(domain=dom, attrs=(att,)) - tiledb.DenseArray.create(path, schema) - - # write - if use_timestamps: - with tiledb.DenseArray(path, mode="w", timestamp=1) as T: - T[:] = A * 1 - with tiledb.DenseArray(path, mode="w", timestamp=2) as T: - T[:] = A * 2 - with tiledb.DenseArray(path, mode="w", timestamp=3) as T: - T[:] = A * 3 - with tiledb.DenseArray(path, mode="w", timestamp=4) as T: - T[:] = A * 4 - else: - with tiledb.DenseArray(path, mode="w") as T: - T[:] = A * 1 - T[:] = A * 2 - T[:] = A * 3 - T[:] = A * 4 - - def assert_ts(timestamp, result): - with tiledb.DenseArray(path, mode="r", timestamp=timestamp) as T: - assert_array_equal(T, result) - - timestamps = [t[0] for t in tiledb.array_fragments(path).timestamp_range] - - assert_ts(0, A * np.nan) - assert_ts(timestamps[0], A * 1) - assert_ts(timestamps[1], A * 2) - assert_ts(timestamps[2], A * 3) - assert_ts((timestamps[0], timestamps[1]), A * 2) - assert_ts((0, timestamps[2]), A * 3) - assert_ts((timestamps[0], timestamps[2]), A * 3) - assert_ts((timestamps[1], timestamps[2]), A * 3) - assert_ts((timestamps[1], timestamps[3]), A * 3) - assert_ts((None, timestamps[1]), A * 2) - assert_ts((None, timestamps[2]), A * 3) - assert_ts((timestamps[1], None), A * 3) - assert_ts((timestamps[2], None), A * 3) - assert_ts((timestamps[2], None), A * 3) - - def test_open_attr(self): - uri = self.path("test_open_attr") - schema = tiledb.ArraySchema( - domain=tiledb.Domain( - tiledb.Dim(name="dim0", dtype=np.uint32, domain=(1, 4)) - ), - attrs=( - tiledb.Attr(name="x", dtype=np.int32), - tiledb.Attr(name="y", dtype=np.int32), - ), - ) - tiledb.Array.create(uri, schema) - - with tiledb.open(uri, mode="w") as A: - A[:] = {"x": np.array((1, 2, 3, 4)), "y": np.array((5, 6, 7, 8))} - - with self.assertRaises(KeyError): - tiledb.open(uri, attr="z") - - with self.assertRaises(KeyError): - tiledb.open(uri, attr="dim0") - - with tiledb.open(uri, attr="x") as A: - assert_array_equal(A[:], np.array((1, 2, 3, 4))) - assert list(A.multi_index[:].keys()) == ["x"] - - def test_ncell_attributes(self): - dom = tiledb.Domain(tiledb.Dim(domain=(0, 9), tile=10, dtype=int)) - attr = tiledb.Attr(dtype=[("", np.int32), ("", np.int32), ("", np.int32)]) - schema = tiledb.ArraySchema(domain=dom, attrs=(attr,)) - tiledb.DenseArray.create(self.path("foo"), schema) - - A = np.ones((10,), dtype=[("", np.int32), ("", np.int32), ("", np.int32)]) - self.assertEqual(A.dtype, attr.dtype) - - with tiledb.DenseArray(self.path("foo"), mode="w") as T: - T[:] = A - with tiledb.DenseArray(self.path("foo"), mode="r") as T: - assert_array_equal(A, T[:]) - assert_array_equal(A[:5], T[:5]) - - def test_complex_attributes(self): - dom = tiledb.Domain(tiledb.Dim(domain=(0, 9), tile=10, dtype=int)) - attr = tiledb.Attr(dtype=np.complex64) - schema = tiledb.ArraySchema(domain=dom, attrs=(attr,)) - tiledb.DenseArray.create(self.path("foo"), schema) - - A = np.random.rand(20).astype(np.float32).view(dtype=np.complex64) - - self.assertEqual(schema, tiledb.schema_like(A, dim_dtype=int)) - self.assertEqual(A.dtype, attr.dtype) - - with tiledb.DenseArray(self.path("foo"), mode="w") as T: - T[:] = A - - with tiledb.DenseArray(self.path("foo"), mode="r") as T: - assert_array_equal(A, T[:]) - assert_array_equal(A[:5], T[:5]) - - def test_multiple_attributes(self): - dom = tiledb.Domain( - tiledb.Dim(domain=(0, 1), tile=1, dtype=np.int64), - tiledb.Dim(domain=(0, 3), tile=4, dtype=np.int64), - ) - attr_int = tiledb.Attr("ints", dtype=int) - attr_float = tiledb.Attr("floats", dtype=float) - schema = tiledb.ArraySchema(domain=dom, attrs=(attr_int, attr_float)) - tiledb.DenseArray.create(self.path("foo"), schema) - - V_ints = np.array([[0, 1, 2, 3], [4, 6, 7, 5]]) - V_floats = np.array([[0.0, 1.0, 2.0, 3.0], [4.0, 6.0, 7.0, 5.0]]) - - V = {"ints": V_ints, "floats": V_floats} - with tiledb.DenseArray(self.path("foo"), mode="w") as T: - T[:] = V - - # check setting attribute in different order from Attr definition - # https://github.com/TileDB-Inc/TileDB-Py/issues/299 - with tiledb.DenseArray(self.path("foo"), mode="w") as T: - T[:] = V - - with tiledb.DenseArray(self.path("foo"), mode="r") as T: - R = T[:] - assert_array_equal(V["ints"], R["ints"]) - assert_array_equal(V["floats"], R["floats"]) - - R = T.query(attrs=("ints",))[1:3] - assert_array_equal(V["ints"][1:3], R["ints"]) - - R = T.query(attrs=("floats",), order="F")[:] - self.assertTrue(R["floats"].flags.f_contiguous) - - R = T.query(attrs=("ints",), coords=True)[0, 0:3] - self.assertTrue("__dim_0" in R) - self.assertTrue("__dim_1" in R) - assert_array_equal(R["__dim_0"], np.array([0, 0, 0])) - assert_array_equal(R["__dim_1"], np.array([0, 1, 2])) - - # Global order returns results as a linear buffer - R = T.query(attrs=("ints",), order="G")[:] - self.assertEqual(R["ints"].shape, (8,)) - - with self.assertRaises(tiledb.TileDBError): - T.query(attrs=("unknown",))[:] - - with tiledb.DenseArray(self.path("foo"), mode="w") as T: - # check error ncells length - V["ints"] = V["ints"][1:2].copy() - with self.assertRaises(tiledb.TileDBError): - T[:] = V - - # check error attribute does not exist - V["foo"] = V["ints"].astype(np.int8) - with self.assertRaises(tiledb.TileDBError): - T[:] = V - - def test_array_2d_s1(self): - # This array is currently read back with dtype object - A = np.array([["A", "B"], ["C", ""]], dtype="S") - - uri = self.path() - dom = tiledb.Domain( - tiledb.Dim(name="rows", domain=(0, 1), tile=2, dtype=np.int64), - tiledb.Dim(name="cols", domain=(0, 1), tile=2, dtype=np.int64), - ) - - schema = tiledb.ArraySchema( - domain=dom, sparse=False, attrs=[tiledb.Attr(name="a", dtype="S")] - ) - - tiledb.DenseArray.create(uri, schema) - - with tiledb.DenseArray(uri, mode="w") as T: - T[...] = A - - with tiledb.DenseArray(uri) as T: - assert_array_equal(A, T) - - res = T.multi_index[(0, 1), (0, 1)]["a"] - assert_array_equal(A, res) - - def test_nd_roundtrip(self): - dim_set = np.int64([3 + x % 2 for x in range(2, 12)]) - for i, last in enumerate(range(2, len(dim_set))): - dims = dim_set[:last] - data = np.random.rand(*dims).astype("int32") - with tiledb.from_numpy(self.path(f"nd_roundtrip{i}"), data) as A: - assert_array_equal(data, A[:]) - - def test_array_2d_s3_mixed(self): - # This array is currently read back with dtype object - A = np.array([["AAA", "B"], ["AB", "C"]], dtype="S3") - - uri = self.path() - dom = tiledb.Domain( - tiledb.Dim(name="rows", domain=(0, 1), tile=2, dtype=np.int64), - tiledb.Dim(name="cols", domain=(0, 1), tile=2, dtype=np.int64), - ) - - schema = tiledb.ArraySchema( - domain=dom, sparse=False, attrs=[tiledb.Attr(name="a", dtype="S3")] - ) - - tiledb.DenseArray.create(uri, schema) - - with tiledb.DenseArray(uri, mode="w") as T: - T[...] = A - - with tiledb.DenseArray(uri) as T: - assert_array_equal(A, T) - - res = T.multi_index[(0, 1), (0, 1)]["a"] - assert_array_equal(A, res) - - def test_incomplete_dense(self): - path = self.path("incomplete_dense") - # create 10 MB array - data = np.arange(1310720, dtype=np.int64) - # if `tile` is not set, it defaults to the full array and we - # only read 8 bytes at a time. - use_tile = 131072 - # use_tile = None - with tiledb.from_numpy(path, data, tile=use_tile) as A: - pass - - # create context with 1 MB memory budget (2 MB total, 1 MB usable) - config = tiledb.Config( - {"sm.memory_budget": 2 * 1024**2, "py.init_buffer_bytes": 1024**2} - ) - self.assertEqual(config["py.init_buffer_bytes"], str(1024**2)) - # TODO would be good to check repeat count here. Not currently exposed by retry loop. - with tiledb.DenseArray(path, ctx=tiledb.Ctx(config)) as A: - res_mr = A.multi_index[slice(0, len(data) - 1)] - assert_array_equal(res_mr[""], data) - res_idx = A[:] - assert_array_equal(res_idx, data) - - if has_pandas(): - df = A.df[:] - assert_array_equal(df[""], data) - - def test_written_fragment_info(self): - uri = self.path("test_written_fragment_info") - - dom = tiledb.Domain(tiledb.Dim(domain=(0, 9), tile=10, dtype=np.int64)) - att = tiledb.Attr(dtype=np.int64) - schema = tiledb.ArraySchema(domain=dom, attrs=(att,)) - tiledb.DenseArray.create(uri, schema) - - with tiledb.DenseArray(uri, mode="w") as T: - T[:] = np.arange(0, 10, dtype=np.int64) - - self.assertTrue(T.last_write_info is not None) - self.assertTrue(len(T.last_write_info.keys()) == 1) - t_w1, t_w2 = list(T.last_write_info.values())[0] - self.assertTrue(t_w1 > 0) - self.assertTrue(t_w2 > 0) - - def test_missing_schema_error(self): - uri = self.path("test_missing_schema_error") - - dom = tiledb.Domain(tiledb.Dim(domain=(0, 9), tile=10, dtype=np.int64)) - att = tiledb.Attr(dtype=np.int64) - schema = tiledb.ArraySchema(domain=dom, attrs=(att,)) - tiledb.DenseArray.create(uri, schema) - - with tiledb.DenseArray(uri, mode="w") as T: - T[:] = np.arange(0, 10, dtype=np.int64) - - if tiledb.libtiledb.version() < (2, 4): - tiledb.VFS().remove_file(os.path.join(uri, "__array_schema.tdb")) - else: - tiledb.VFS().remove_dir(os.path.join(uri, "__schema")) - - # new ctx is required running against S3 because otherwise the schema - # will simply be read from the cache. - with tiledb.scope_ctx(): - with self.assertRaises(tiledb.TileDBError): - tiledb.DenseArray(uri) - - @pytest.mark.xfail( - tiledb.libtiledb.version() >= (2, 5), - reason="Skip sparse_write_to_dense with libtiledb 2.5+", - ) - def test_sparse_write_to_dense(self): - class AssignAndCheck: - def __init__(self, outer, *shape): - self.outer = outer - self.shape = shape - - def __setitem__(self, s, v): - A = np.random.rand(*self.shape) - - uri = self.outer.path( - f"sparse_write_to_dense{random.randint(0,np.uint64(-1))}" - ) - - tiledb.from_numpy(uri, A).close() - with tiledb.open(uri, "w") as B: - B[s] = v - - A[s] = v - with tiledb.open(uri) as B: - assert_array_equal(A, B[:]) - - D = AssignAndCheck(self, 5, 5) - with pytest.warns( - DeprecationWarning, match="Sparse writes to dense arrays is deprecated" - ): - D[np.array([1, 2]), np.array([0, 0])] = np.array([0, 2]) - - @pytest.mark.parametrize("use_timestamps", [True, False]) - def test_reopen_dense_array(self, use_timestamps): - uri = self.path("test_reopen_dense_array") - - dom = tiledb.Domain(tiledb.Dim(domain=(0, 9), tile=10, dtype=np.int64)) - att = tiledb.Attr(dtype=np.int64) - schema = tiledb.ArraySchema(domain=dom, attrs=(att,)) - tiledb.DenseArray.create(uri, schema) - - data = np.arange(0, 10, dtype=np.int64) - - if use_timestamps: - with tiledb.DenseArray(uri, mode="w", timestamp=1) as T: - T[:] = data - with tiledb.DenseArray(uri, mode="w", timestamp=2) as T: - T[:] = data * 2 - else: - with tiledb.DenseArray(uri, mode="w") as T: - T[:] = data - T[:] = data * 2 - - timestamps = [t[0] for t in tiledb.array_fragments(uri).timestamp_range] - T = tiledb.DenseArray(uri, mode="r", timestamp=timestamps[0]) - assert_array_equal(T[:], data) - - T.reopen() - assert_array_equal(T[:], data * 2) - - T.close() - - def test_data_begins_with_null_chars(self): - path = self.path("test_data_begins_with_null_chars") - data = np.array(["", "", "", "a", "", "", "", "", "", "b"], dtype=np.str_) - - dom = tiledb.Domain(tiledb.Dim(domain=(1, len(data)), tile=len(data))) - att = tiledb.Attr(dtype=np.str_, var=True) - schema = tiledb.ArraySchema(dom, (att,)) - tiledb.Array.create(path, schema) - - with tiledb.open(path, mode="w") as T: - T[:] = data - - with tiledb.open(path, mode="r") as T: - assert_array_equal(data, T[:]) - - def test_match_numpy_schema_dimensions(self): - path = self.path("test_match_numpy_schema_dimensions") - - dom = tiledb.Domain( - tiledb.Dim(name="dim_0", domain=(1, 5), dtype=np.int64), - tiledb.Dim(name="dim_1", domain=(1, 10), dtype=np.int64), - tiledb.Dim(name="dim_2", domain=(1, 20), dtype=np.int64), - ) - att = tiledb.Attr(name="a", dtype=np.int64) - schema = tiledb.ArraySchema(dom, (att,)) - tiledb.Array.create(path, schema) - - with tiledb.open(path, "w") as A: - with self.assertRaises(ValueError): - A[:] = np.zeros((10, 20, 5)) - A[:] = np.zeros((5, 10, 20)) - - -class TestVarlen(DiskTestCase): - def test_varlen_write_bytes(self): - A = np.array( - [ - "aa", - "bbb", - "ccccc", - "ddddddddddddddddddddd", - "ee", - "ffffff", - "g", - "hhhhhhhhhh", - ], - dtype=bytes, - ) - - # basic write - dom = tiledb.Domain(tiledb.Dim(domain=(1, len(A)), tile=len(A))) - att = tiledb.Attr(dtype=np.bytes_) - - schema = tiledb.ArraySchema(dom, (att,)) - - tiledb.DenseArray.create(self.path("foo"), schema) - with tiledb.DenseArray(self.path("foo"), mode="w") as T: - T[:] = A - - with tiledb.DenseArray(self.path("foo"), mode="r") as T: - assert_array_equal(A[:], T[:]) - - assert_array_equal(A, T.multi_index[1 : len(A)][""]) - - def test_varlen_sparse_all_empty_strings(self): - A = np.array(["", "", "", "", ""], dtype=object) - dim_len = len(A) - uri = self.path("varlen_all_empty_strings") - - dom = tiledb.Domain(tiledb.Dim(domain=(1, dim_len), tile=dim_len)) - att = tiledb.Attr(name="a1", dtype=np.str_, var=True) - - schema = tiledb.ArraySchema(dom, (att,), sparse=True) - - tiledb.Array.create(uri, schema) - - with tiledb.open(uri, mode="w") as T: - T[np.arange(1, dim_len + 1)] = {"a1": A} - - with tiledb.open(uri, mode="r") as T: - # check interior range - assert_array_equal(A[1:-1], T[2:-1]["a1"]) - assert_array_equal(A[1:-1], T.multi_index[2 : dim_len - 1]["a1"]) - - def test_varlen_write_unicode(self): - A = np.array( - [ - "aa", - "bbb", - "ccccc", - "ddddddddddddddddddddd", - "ee", - "ffffff", - "g", - "", - "hhhhhhhhhh", - ], - dtype=np.str_, - ) - - # basic write - dom = tiledb.Domain(tiledb.Dim(domain=(1, len(A)), tile=len(A))) - att = tiledb.Attr(dtype=np.str_, var=True) - - schema = tiledb.ArraySchema(dom, (att,)) - - tiledb.DenseArray.create(self.path("foo"), schema) - with tiledb.DenseArray(self.path("foo"), mode="w") as T: - T[:] = A - - with tiledb.DenseArray(self.path("foo"), mode="r") as T: - assert_array_equal(A[:], T[:]) - - def test_varlen_write_floats(self): - # Generates 8 variable-length float64 subarrays (subarray len and content are randomized) - A = np.array( - [np.random.rand(x) for x in np.random.randint(1, 12, 8)], dtype=object - ) - - # basic write - dom = tiledb.Domain(tiledb.Dim(domain=(1, len(A)), tile=len(A))) - att = tiledb.Attr(dtype=np.float64, var=True) - - schema = tiledb.ArraySchema(dom, (att,)) - tiledb.DenseArray.create(self.path("foo"), schema) - - with tiledb.DenseArray(self.path("foo"), mode="w") as T: - T[:] = A - - with tiledb.DenseArray(self.path("foo"), mode="r") as T: - T_ = T[:] - # TODO/note: the return is a 0-element array. - assert_array_equal(A[0], T[1][()]) - assert_array_equal(A[-1], T[-1][()]) - self.assertEqual(len(A), len(T_)) - # can't use assert_array_equal w/ object array - self.assertTrue(all(np.array_equal(x, A[i]) for i, x in enumerate(T_))) - - def test_varlen_write_floats_2d(self): - A = np.array( - [np.random.rand(x) for x in np.arange(1, 10)], dtype=object - ).reshape(3, 3) - - # basic write - dom = tiledb.Domain( - tiledb.Dim(domain=(1, 3), tile=len(A)), - tiledb.Dim(domain=(1, 3), tile=len(A)), - ) - att = tiledb.Attr(dtype=np.float64, var=True) - - schema = tiledb.ArraySchema(dom, (att,)) - tiledb.DenseArray.create(self.path("foo"), schema) - - with tiledb.DenseArray(self.path("foo"), mode="w") as T: - T[:] = A - - with tiledb.DenseArray(self.path("foo"), mode="r") as T: - T_ = T[:] - self.assertEqual(len(A), len(T_)) - # can't use assert_array_equal w/ object array - self.assertTrue( - np.all( - [np.array_equal(A.flat[i], T[:].flat[i]) for i in np.arange(0, 9)] - ) - ) - - def test_varlen_write_int_subarray(self): - A = np.array( - list( - map( - lambda x: np.array(x, dtype=np.uint64), - [np.arange(i, 2 * i + 1) for i in np.arange(0, 16)], - ) - ), - dtype="O", - ).reshape(4, 4) - - uri = self.path("test_varlen_write_int_subarray") - - dom = tiledb.Domain( - tiledb.Dim(domain=(0, 3), tile=len(A)), - tiledb.Dim(domain=(0, 3), tile=len(A)), - ) - att = tiledb.Attr(dtype=np.uint64, var=True) - schema = tiledb.ArraySchema(dom, (att,)) - - tiledb.DenseArray.create(uri, schema) - - # NumPy forces single-element object arrays into a contiguous layout - # so we alternate the size to get a consistent baseline array. - A_onestwos = np.array( - list( - map( - lambda x: np.array(x, dtype=np.uint64), - list([(1,) if x % 2 == 0 else (1, 2) for x in range(16)]), - ) - ), - dtype=np.dtype("O"), - ).reshape(4, 4) - - with tiledb.open(uri, "w") as T: - T[:] = A_onestwos - - with tiledb.open(uri, "w") as T: - T[1:3, 1:3] = A[1:3, 1:3] - - A_assigned = A_onestwos.copy() - A_assigned[1:3, 1:3] = A[1:3, 1:3] - - with tiledb.open(uri) as T: - assert_subarrays_equal(A_assigned, T[:]) - - def test_varlen_write_fixedbytes(self): - # The actual dtype of this array is 'S21' - A = np.array( - [ - "aa", - "bbb", - "ccccc", - "ddddddddddddddddddddd", - "ee", - "ffffff", - "g", - "hhhhhhhhhh", - ], - dtype=np.dtype("S"), - ) - - # basic write - dom = tiledb.Domain(tiledb.Dim(domain=(1, len(A)), tile=len(A))) - att = tiledb.Attr(dtype=np.bytes_) - - schema = tiledb.ArraySchema(dom, (att,)) - - tiledb.DenseArray.create(self.path("foo"), schema) - with tiledb.DenseArray(self.path("foo"), mode="w") as T: - T[:] = A - - with tiledb.DenseArray(self.path("foo"), mode="r") as T: - assert_array_equal(A[:], T[:]) - - def test_varlen_write_fixedunicode(self): - A = np.array( - [ - "aa", - "bbb", - "ccccc", - "ddddddddddddddddddddd", - "ee", - "ffffff", - "", - "g", - "hhhhhhhhhh", - ], - dtype=np.dtype("U"), - ) - - # basic write - dom = tiledb.Domain(tiledb.Dim(domain=(1, len(A)), tile=len(A))) - att = tiledb.Attr(dtype=np.str_) - - schema = tiledb.ArraySchema(dom, (att,)) - - tiledb.DenseArray.create(self.path("foo"), schema) - with tiledb.DenseArray(self.path("foo"), mode="w") as T: - T[:] = A - - with tiledb.DenseArray(self.path("foo"), mode="r") as T: - assert_array_equal(A[:], T[:]) - - def test_varlen_write_ints(self): - A = np.array( - [ - np.uint64(np.random.randint(0, pow(10, 6), x)) - for x in np.random.randint(1, 12, 8) - ], - dtype=object, - ) - - # basic write - dom = tiledb.Domain(tiledb.Dim(domain=(1, len(A)), tile=len(A))) - att = tiledb.Attr(dtype=np.int64, var=True) - - schema = tiledb.ArraySchema(dom, (att,)) - - tiledb.DenseArray.create(self.path("foo"), schema) - with tiledb.DenseArray(self.path("foo"), mode="w") as T: - T[:] = A - - with tiledb.DenseArray(self.path("foo"), mode="r") as T: - T_ = T[:] - self.assertEqual(len(A), len(T)) - # can't use assert_array_equal w/ object array - self.assertTrue(all(np.array_equal(x, A[i]) for i, x in enumerate(T_))) - - def test_varlen_wrong_domain(self): - A = np.array( - [ - "aa", - "bbb", - "ccccc", - "ddddddddddddddddddddd", - "ee", - "ffffff", - "g", - "hhhhhhhhhh", - ] - ) - dom = tiledb.Domain(tiledb.Dim(domain=(1, 3), tile=3)) - att = tiledb.Attr(dtype=np.bytes_) - - schema = tiledb.ArraySchema(dom, (att,)) - - tiledb.DenseArray.create(self.path("foo"), schema) - with tiledb.DenseArray(self.path("foo"), mode="w") as T: - with self.assertRaises(ValueError): - T[:] = A - - def test_array_varlen_mismatched(self): - # Test that we raise a TypeError when passing a heterogeneous object array. - A = np.array([b"aa", b"bbb", b"cccc", np.uint64([1, 3, 4])], dtype=object) - - dom = tiledb.Domain(tiledb.Dim(domain=(0, 3), tile=4)) - att = tiledb.Attr(dtype=np.bytes_, var=True) - - schema = tiledb.ArraySchema(dom, (att,)) - - tiledb.DenseArray.create(self.path("foo"), schema) - with tiledb.DenseArray(self.path("foo"), mode="w") as T: - with self.assertRaises(TypeError): - T[:] = A - - def test_array_varlen_2d_s_fixed(self): - A = np.array( - [["AAAAAAAAAa", "BBB"], ["ACCC", "BBBCBCBCBCCCBBCBCBCCBC"]], dtype="S" - ) - - uri = self.path("varlen_2d_s_fixed") - dom = tiledb.Domain( - tiledb.Dim(name="rows", domain=(0, 1), tile=2, dtype=np.int64), - tiledb.Dim(name="cols", domain=(0, 1), tile=2, dtype=np.int64), - ) - - schema = tiledb.ArraySchema( - domain=dom, sparse=False, attrs=[tiledb.Attr(name="a", dtype="S", var=True)] - ) - - tiledb.DenseArray.create(uri, schema) - - with tiledb.DenseArray(uri, mode="w") as T: - T[...] = A - - with tiledb.DenseArray(uri) as T: - assert_array_equal(A, T) - - -class TestSparseArray(DiskTestCase): - @pytest.mark.xfail - def test_simple_1d_sparse_vector(self): - dom = tiledb.Domain(tiledb.Dim(domain=(0, 3), tile=4, dtype=int)) - att = tiledb.Attr(dtype=int) - schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=True) - tiledb.SparseArray.create(self.path("foo"), schema) - - values = np.array([3, 4]) - with tiledb.SparseArray(self.path("foo"), mode="w") as T: - T[[1, 2]] = values - - with tiledb.SparseArray(self.path("foo"), mode="r") as T: - assert_array_equal(T[[1, 2]], values) - - @pytest.mark.xfail - def test_simple_2d_sparse_vector(self): - attr = tiledb.Attr(dtype=float) - dom = tiledb.Domain( - tiledb.Dim(domain=(0, 3), tile=4, dtype=int), - tiledb.Dim(domain=(0, 3), tile=4, dtype=int), - ) - schema = tiledb.ArraySchema(domain=dom, attrs=(attr,), sparse=True) - tiledb.SparseArray.create(self.path("foo"), schema) - - values = np.array([3, 4], dtype=float) - with tiledb.SparseArray(self.path("foo"), mode="w") as T: - T[[1, 2], [1, 2]] = values - - with tiledb.SparseArray(self.path("foo"), mode="r") as T: - assert_array_equal(T[[1, 2], [1, 2]], values) - - def test_simple3d_sparse_vector(self): - uri = self.path("simple3d_sparse_vector") - dom = tiledb.Domain( - tiledb.Dim("x", domain=(0, 3), tile=4, dtype=int), - tiledb.Dim("y", domain=(0, 3), tile=4, dtype=int), - tiledb.Dim("z", domain=(0, 3), tile=4, dtype=int), - ) - attr = tiledb.Attr(dtype=float) - schema = tiledb.ArraySchema(domain=dom, attrs=(attr,), sparse=True) - tiledb.SparseArray.create(uri, schema) - - values = np.array([3, 4], dtype=float) - coords = (1, 2), (1, 2), (1, 2) - with tiledb.SparseArray(uri, mode="w") as T: - T[coords] = values - - with tiledb.SparseArray(uri, mode="r") as T: - res = T.multi_index[coords] - assert_array_equal(res[""], values) - assert_array_equal(res["x"], coords[0]) - assert_array_equal(res["y"], coords[1]) - assert_array_equal(res["z"], coords[2]) - - @pytest.mark.xfail - def test_sparse_ordered_fp_domain(self): - dom = tiledb.Domain(tiledb.Dim("x", domain=(0.0, 10.0), tile=2.0, dtype=float)) - attr = tiledb.Attr(dtype=float) - attr = tiledb.Attr(dtype=float) - schema = tiledb.ArraySchema(domain=dom, attrs=(attr,), sparse=True) - tiledb.SparseArray.create(self.path("foo"), schema) - - values = np.array([3.3, 2.7]) - with tiledb.SparseArray(self.path("foo"), mode="w") as T: - T[[2.5, 4.2]] = values - with tiledb.SparseArray(self.path("foo"), mode="r") as T: - assert_array_equal(T[[2.5, 4.2]], values) - - @pytest.mark.xfail - def test_sparse_unordered_fp_domain(self): - dom = tiledb.Domain(tiledb.Dim("x", domain=(0.0, 10.0), tile=2.0, dtype=float)) - attr = tiledb.Attr(dtype=float) - schema = tiledb.ArraySchema(domain=dom, attrs=(attr,), sparse=True) - tiledb.SparseArray.create(self.path("foo"), schema) - values = np.array([3.3, 2.7]) - with tiledb.SparseArray(self.path("foo"), mode="w") as T: - T[[4.2, 2.5]] = values - - with tiledb.SparseArray(self.path("foo"), mode="r") as T: - assert_array_equal(T[[2.5, 4.2]], values[::-1]) - - @pytest.mark.xfail - def test_multiple_attributes(self): - uri = self.path() - - dom = tiledb.Domain( - tiledb.Dim(domain=(1, 10), tile=10, dtype=int), - tiledb.Dim(domain=(1, 10), tile=10, dtype=int), - ) - attr_int = tiledb.Attr("ints", dtype=int) - attr_float = tiledb.Attr("floats", dtype="float") - schema = tiledb.ArraySchema( - domain=dom, attrs=(attr_int, attr_float), sparse=True - ) - tiledb.SparseArray.create(self.path("foo"), schema) - - IJ = (np.array([1, 1, 1, 2, 3, 3, 3, 4]), np.array([1, 2, 4, 3, 1, 6, 7, 5])) - - V_ints = np.array([0, 1, 2, 3, 4, 6, 7, 5]) - V_floats = np.array([0.0, 1.0, 2.0, 3.0, 4.0, 6.0, 7.0, 5.0]) - - V = {"ints": V_ints, "floats": V_floats} - with tiledb.SparseArray(uri, mode="w") as T: - T[IJ] = V - with tiledb.SparseArray(uri, mode="r") as T: - R = T[IJ] - assert_array_equal(V["ints"], R["ints"]) - assert_array_equal(V["floats"], R["floats"]) - - # check error attribute does not exist - # TODO: should this be an attribute error? - with tiledb.SparseArray(uri, mode="w") as T: - V["foo"] = V["ints"].astype(np.int8) - with self.assertRaises(tiledb.TileDBError): - T[IJ] = V - - # check error ncells length - V["ints"] = V["ints"][1:2].copy() - with self.assertRaises(AttributeError): - T[IJ] = V - - def test_query_real_multi_index(self, fx_sparse_cell_order): - uri = self.path("query_real_multi_index") - - dom = tiledb.Domain( - tiledb.Dim("x", domain=(-10.0, 10.0), tile=2.0, dtype=float) - ) - attr = tiledb.Attr("a", dtype=np.float32) - schema = tiledb.ArraySchema( - domain=dom, attrs=(attr,), sparse=True, cell_order=fx_sparse_cell_order - ) - tiledb.SparseArray.create(uri, schema) - - values = np.array([3.3, 2.7]) - with tiledb.SparseArray(uri, mode="w") as T: - T[[2.5, 4.2]] = values - with tiledb.SparseArray(uri, mode="r") as T: - assert_array_equal( - T.query(coords=True).multi_index[-10.0 : np.nextafter(4.2, 0)]["a"], - np.float32(3.3), - ) - assert_array_equal( - T.query(coords=True).multi_index[-10.0 : np.nextafter(4.2, 0)]["x"], - np.float32([2.5]), - ) - assert_array_equal( - T.query(coords=False).multi_index[-10.0:5.0]["a"], - np.float32([3.3, 2.7]), - ) - self.assertTrue( - "coords" not in T.query(coords=False).multi_index[-10.0:5.0] - ) - - @pytest.mark.skipif(not has_pandas(), reason="pandas>=1.0,<3.0 not installed") - @pytest.mark.parametrize("dtype", ["u1", "u2", "u4", "u8", "i1", "i2", "i4", "i8"]) - def test_sparse_index_dtypes(self, dtype): - path = self.path() - - dtype_min, dtype_max = DataType.from_numpy(dtype).domain - dim = tiledb.Dim("d0", domain=(dtype_min, dtype_max - 1), dtype=dtype, tile=1) - attr = tiledb.Attr("attr", dtype=dtype) - schema = tiledb.ArraySchema(tiledb.Domain(dim), [attr], sparse=True) - tiledb.SparseArray.create(path, schema) - - data = np.arange(0, 3).astype(dtype) - with tiledb.open(path, "w") as A: - A[data] = data - - with tiledb.open(path) as B: - assert_array_equal(B[:]["attr"], data) - assert B[data[0]]["attr"] == data[0] - assert B[data[1]]["attr"] == data[1] - assert B.multi_index[data[0]]["attr"] == data[0] - - @pytest.mark.skipif(not has_pandas(), reason="pandas>=1.0,<3.0 not installed") - @pytest.mark.skipif( - tiledb.libtiledb.version() < (2, 10), - reason="TILEDB_BOOL introduced in libtiledb 2.10", - ) - def test_sparse_index_bool(self): - path = self.path() - data = np.random.randint(0, 1, 10, dtype=bool) - - dom = tiledb.Domain(tiledb.Dim("d0", domain=(1, 10), dtype=int)) - attr = tiledb.Attr("attr", dtype=np.bool_) - schema = tiledb.ArraySchema( - domain=dom, attrs=(attr,), sparse=True, allows_duplicates=True - ) - tiledb.SparseArray.create(path, schema) - - with tiledb.open(path, "w") as A: - A[np.arange(1, 11)] = data - - with tiledb.open(path) as B: - assert_array_equal(B[:]["attr"], data) - assert_array_equal(B.multi_index[:]["attr"], data) - - def test_query_real_exact(self, fx_sparse_cell_order): - """ - Test and demo of querying at floating point representable boundaries - - Concise representation of expected behavior: - - c0,c1,c2 = [3.0100000000000002, 3.0100000000000007, 3.010000000000001] - values = [1,2,3] - - [c0:c0] -> [1] - [c1:c1] -> [2] - [c2:c2] -> [3] - - [c0:c1] -> [1,2] - [c0:c2] -> [1,2,3] - - [c0 - nextafter(c0,0) : c0] -> [1] - [c0 - nextafter(c0,0) : c0 - nextafter(c0,0)] -> [] - - [c2:c2+nextafter(c2)] -> [3] - [c2+nextafter(c2) : c2+nextafter(c2)] -> [] - - """ - uri = self.path() - - dom = tiledb.Domain( - tiledb.Dim("x", domain=(-10.0, 10.0), tile=2.0, dtype=float) - ) - attr = tiledb.Attr("", dtype=np.float32) - schema = tiledb.ArraySchema( - domain=dom, attrs=(attr,), sparse=True, cell_order=fx_sparse_cell_order - ) - tiledb.SparseArray.create(uri, schema) - - c0 = np.nextafter(3.01, 4) # smaller - c1 = np.nextafter(c0, 4) - c2 = np.nextafter(c1, 4) # larger - - # for debugging use: - # np.set_printoptions(precision=16, floatmode='maxprec') - # print(c0,c1,c2) - - values = np.array([1, 2, 3]) - with tiledb.SparseArray(uri, mode="w") as T: - T[[c0, c1, c2]] = values - - with tiledb.SparseArray(uri, mode="r") as T: - for i, c in enumerate([c0, c1, c2]): - assert_array_equal(T.query(coords=True).multi_index[c:c][""], values[i]) - # test (coord, coord + nextafter) - c0_prev = np.nextafter(c0, 0) - c2_next = np.nextafter(c2, 4) - assert_array_equal(T.query(coords=True).multi_index[c0:c1][""], [1, 2]) - assert_array_equal(T.query(coords=True).multi_index[c0:c2][""], [1, 2, 3]) - assert_array_equal(T.query(coords=True).multi_index[c2:c2_next][""], 3) - assert_array_equal(T.query(coords=True).multi_index[c0_prev:c0][""], 1) - assert_array_equal( - T.query(coords=True).multi_index[c0_prev:c0_prev][""], [] - ) - # test (coord + nextafter, coord + nextafter) - assert_array_equal( - T.query(coords=True).multi_index[c2_next:c2_next][""], np.array([]) - ) - # test (coord - nextafter, coord) - assert_array_equal( - T.query(coords=True).multi_index[c0:c1][""], values[[0, 1]] - ) - # test (coord - nextafter, coord + nextafter) - assert_array_equal( - T.query(coords=True).multi_index[c0:c2][""], values[[0, 1, 2]] - ) - - def test_sparse_query_specified_dim_coords(self, fx_sparse_cell_order): - uri = self.path("sparse_query_specified_dim_coords") - - dom = tiledb.Domain( - tiledb.Dim("i", domain=(1, 10), tile=1, dtype=int), - tiledb.Dim("j", domain=(11, 20), tile=1, dtype=int), - ) - att = tiledb.Attr("", dtype=int) - schema = tiledb.ArraySchema( - domain=dom, attrs=(att,), sparse=True, cell_order=fx_sparse_cell_order - ) - tiledb.SparseArray.create(uri, schema) - - i = np.array([1, 1, 1, 2, 3, 3, 3, 4]) - j = np.array([11, 12, 14, 13, 11, 16, 17, 15]) - - with tiledb.SparseArray(uri, mode="w") as A: - A[i, j] = np.array([0, 1, 2, 3, 4, 6, 7, 5]) - - # data is returned in Hilbert order, so we need to check sorted - with tiledb.SparseArray(uri, mode="r") as A: - Ai = A.query(dims=["i"])[:] - self.assertTrue("i" in Ai) - self.assertFalse("j" in Ai) - assert_unordered_equal(Ai["i"], i, fx_sparse_cell_order == "hilbert") - - Aj = A.query(dims=["j"])[:] - self.assertFalse("i" in Aj) - self.assertTrue("j" in Aj) - assert_unordered_equal(Aj["j"], j, fx_sparse_cell_order == "hilbert") - - Aij = A.query(dims=["i", "j"])[:] - self.assertTrue("i" in Aij) - self.assertTrue("j" in Aij) - assert_unordered_equal(Aij["i"], i, fx_sparse_cell_order == "hilbert") - assert_unordered_equal(Aij["j"], j, fx_sparse_cell_order == "hilbert") - - def test_dense_query_specified_dim_coords(self): - uri = self.path("dense_query_specified_dim_coords") - - dom = tiledb.Domain( - tiledb.Dim("i", domain=(1, 3), tile=1, dtype=int), - tiledb.Dim("j", domain=(4, 6), tile=1, dtype=int), - ) - att = tiledb.Attr("", dtype=int) - schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=False) - tiledb.Array.create(uri, schema) - - with tiledb.open(uri, mode="w") as A: - A[:, :] = np.arange(9).reshape(3, 3) - - with tiledb.open(uri, mode="r") as A: - i = np.array([[1, 1, 1], [2, 2, 2], [3, 3, 3]]) - j = np.array([[4, 5, 6], [4, 5, 6], [4, 5, 6]]) - - Ai = A.query(dims=["i"])[:] - self.assertTrue("i" in Ai) - self.assertFalse("j" in Ai) - assert_array_equal(Ai["i"], i) - - Aj = A.query(dims=["j"])[:] - self.assertFalse("i" in Aj) - self.assertTrue("j" in Aj) - assert_array_equal(Aj["j"], j) - - Aij = A.query(dims=["i", "j"])[:] - self.assertTrue("i" in Aij) - self.assertTrue("j" in Aij) - assert_array_equal(Aij["i"], i) - assert_array_equal(Aij["j"], j) - - def test_subarray(self, fx_sparse_cell_order): - dom = tiledb.Domain(tiledb.Dim("x", domain=(1, 10000), tile=100, dtype=int)) - att = tiledb.Attr("", dtype=float) - schema = tiledb.ArraySchema( - domain=dom, attrs=(att,), sparse=True, cell_order=fx_sparse_cell_order - ) - tiledb.SparseArray.create(self.path("foo"), schema) - - with tiledb.SparseArray(self.path("foo"), mode="r") as T: - self.assertIsNone(T.nonempty_domain()) - - with tiledb.SparseArray(self.path("foo"), mode="w") as T: - T[[50, 60, 100]] = [1.0, 2.0, 3.0] - - with tiledb.SparseArray(self.path("foo"), mode="r") as T: - self.assertEqual(((50, 100),), T.nonempty_domain()) - - # stepped ranges are not supported - with self.assertRaises(IndexError) as idxerr: - T[40:61:5] - assert str(idxerr.value) == "steps are not supported for sparse arrays" - - # retrieve just valid coordinates in subarray T[40:60] - assert_array_equal(T[40:61]["x"], [50, 60]) - - # TODO: dropping coords with one anon value returns just an array - res = T.query(coords=False)[40:61] - assert_array_equal(res[""], [1.0, 2.0]) - self.assertEqual(("coords" in res), False) - - def test_sparse_bytes(self, fx_sparse_cell_order): - dom = tiledb.Domain(tiledb.Dim("x", domain=(1, 10000), tile=100, dtype=int)) - att = tiledb.Attr("", var=True, dtype=np.bytes_) - schema = tiledb.ArraySchema( - domain=dom, attrs=(att,), sparse=True, cell_order=fx_sparse_cell_order - ) - tiledb.SparseArray.create(self.path("foo"), schema) - - with tiledb.SparseArray(self.path("foo"), mode="r") as T: - self.assertIsNone(T.nonempty_domain()) - A = np.array( - [b"aaa", b"bbbbbbbbbbbbbbbbbbbb", b"ccccccccccccccccccccccccc"], - dtype=np.bytes_, - ) - - with tiledb.SparseArray(self.path("foo"), mode="w") as T: - T[[50, 60, 100]] = A - - with tiledb.SparseArray(self.path("foo"), mode="r") as T: - self.assertEqual(((50, 100),), T.nonempty_domain()) - - # retrieve just valid coordinates in subarray T[40:60] - assert_array_equal(T[40:61]["x"], [50, 60]) - - # TODO: dropping coords with one anon value returns just an array - res = T.query(coords=False)[40:61] - assert_array_equal(res[""], A[0:2]) - self.assertEqual(("coords" in res), False) - - # empty sparse varlen result - res = T[1000] - assert_array_equal(res[""], np.array("", dtype="S1")) - assert_array_equal(res["x"], np.array([], dtype=np.int64)) - - def test_sparse_unicode(self, fx_sparse_cell_order): - dom = tiledb.Domain(tiledb.Dim("x", domain=(1, 10000), tile=100, dtype=int)) - att = tiledb.Attr("", var=True, dtype=np.str_) - schema = tiledb.ArraySchema( - domain=dom, attrs=(att,), sparse=True, cell_order=fx_sparse_cell_order - ) - tiledb.SparseArray.create(self.path("foo"), schema) - - with tiledb.SparseArray(self.path("foo"), mode="r") as T: - self.assertIsNone(T.nonempty_domain()) - - A = np.array( - [ - "1234545lkjalsdfj", - "mnopqrs", - "ijkl", - "gh", - "abcdef", - "aαbββcγγγdδδδδ", - "aαbββc", - "", - "γγγdδδδδ", - ], - dtype=object, - ) - - with tiledb.SparseArray(self.path("foo"), mode="w") as T: - T[[3, 4, 5, 6, 7, 50, 60, 70, 100]] = A - - with tiledb.SparseArray(self.path("foo"), mode="r") as T: - self.assertEqual(((3, 100),), T.nonempty_domain()) - - # retrieve just valid coordinates in subarray T[40:60] - assert_array_equal(T[40:61]["x"], [50, 60]) - - # TODO: dropping coords with one anon value returns just an array - res = T.query(coords=False)[40:61] - assert_array_equal(res[""], A[5:7]) - self.assertEqual(("coords" in res), False) - - # empty sparse varlen result - res = T[1000] - assert_array_equal(res[""], np.array("", dtype="U1")) - assert_array_equal(res["x"], np.array([], dtype=np.int64)) - - def test_sparse_query(self, fx_sparse_cell_order): - uri = self.path("test_sparse_query") - dom = tiledb.Domain( - tiledb.Dim("x", domain=(1, 10000), tile=100, dtype=np.float64) - ) - - att = tiledb.Attr("", dtype=float) - schema = tiledb.ArraySchema( - domain=dom, attrs=(att,), sparse=True, cell_order=fx_sparse_cell_order - ) - tiledb.SparseArray.create(uri, schema) - - coords = np.random.uniform(low=1, high=10000, size=100) - data = np.random.rand(100) - - with tiledb.SparseArray(uri, mode="w") as T: - T[coords] = data - - # Test that TILEDB_UNORDERED works correctly - with tiledb.SparseArray(uri, mode="r") as A: - res = A[1:10001][""] # index past the end here to ensure inclusive result - res = A.multi_index[1:10000][""] - assert_array_equal(np.sort(res), np.sort(data)) - res = A.query(order="U").multi_index[1:10000][""] - assert_array_equal(np.sort(res), np.sort(data)) - - def test_sparse_fixes(self, fx_sparse_cell_order): - uri = self.path("test_sparse_fixes") - # indexing a 1 element item in a sparse array - # (issue directly reported) - # the test here is that the indexing does not raise - dims = ( - tiledb.Dim("foo", domain=(0, 6), tile=2), - tiledb.Dim("bar", domain=(0, 6), tile=1), - tiledb.Dim("baz", domain=(0, 100), tile=1), - ) - dom = tiledb.Domain(*dims) - att = tiledb.Attr(name="strattr", dtype="S1") - schema = tiledb.ArraySchema( - domain=dom, attrs=(att,), sparse=True, cell_order=fx_sparse_cell_order - ) - tiledb.SparseArray.create(uri, schema) - with tiledb.SparseArray(uri) as T: - T[:] - - # - test that assigning incompatible value to fixed-len str raises error - # - test that value-conversion error raises exception w/ attr name context - c = np.vstack( - list((x, y, z) for x in range(7) for y in range(7) for z in range(101)) - ) - with tiledb.SparseArray(uri, "w") as T: - with self.assertRaises(ValueError): - T[c[:, 0], c[:, 1], c[:, 2]] = {"strattr": np.random.rand(7, 7, 101)} - save_exc = list() - try: - T[c[:, 0], c[:, 1], c[:, 2]] = {"strattr": np.random.rand(7, 7, 101)} - except ValueError as e: - save_exc.append(e) - exc = save_exc.pop() - self.assertEqual( - str(exc.__context__), - "Cannot write a string value to non-string typed attribute 'strattr'!", - ) - - @tiledb.scope_ctx({"sm.check_coord_dups": False}) - def test_sparse_fixes_ch1560(self, fx_sparse_cell_order): - uri = self.path("sparse_fixes_ch1560") - schema = tiledb.ArraySchema( - domain=tiledb.Domain( - *[tiledb.Dim(name="id", domain=(1, 5000), tile=25, dtype="int32")] - ), - attrs=[ - tiledb.Attr(name="a1", dtype="datetime64[s]"), - tiledb.Attr(name="a2", dtype="|S0"), - tiledb.Attr(name="a3", dtype="|S0"), - tiledb.Attr(name="a4", dtype="int32"), - tiledb.Attr(name="a5", dtype="int8"), - tiledb.Attr(name="a6", dtype="int32"), - ], - cell_order=fx_sparse_cell_order, - tile_order="row-major", - sparse=True, - ) - - tiledb.SparseArray.create(uri, schema) - - data = OrderedDict( - [ - ( - "a1", - np.array( - [ - "2017-04-01T04:00:00", - "2019-10-01T00:00:00", - "2019-10-01T00:00:00", - "2019-10-01T00:00:00", - ], - dtype="datetime64[s]", - ), - ), - ("a2", [b"Bus", b"The RIDE", b"The RIDE", b"The RIDE"]), - ("a3", [b"Bus", b"The RIDE", b"The RIDE", b"The RIDE"]), - ("a4", np.array([6911721, 138048, 138048, 138048], dtype="int32")), - ("a5", np.array([20, 23, 23, 23], dtype="int8")), - ("a6", np.array([345586, 6002, 6002, 6002], dtype="int32")), - ] - ) - - with tiledb.open(uri, "w") as A: - A[[1, 462, 462, 462]] = data - - with tiledb.open(uri) as A: - res = A[:] - res.pop("id") - for k, v in res.items(): - if isinstance(data[k], (np.ndarray, list)): - assert_array_equal(res[k], data[k]) - else: - self.assertEqual(res[k], data[k]) - - def test_sparse_2d_varlen_int(self, fx_sparse_cell_order): - path = self.path("test_sparse_2d_varlen_int") - dtype = np.int32 - dom = tiledb.Domain( - tiledb.Dim(domain=(1, 4), tile=2), tiledb.Dim(domain=(1, 4), tile=2) - ) - att = tiledb.Attr(dtype=dtype, var=True) - schema = tiledb.ArraySchema( - dom, (att,), sparse=True, cell_order=fx_sparse_cell_order - ) - - tiledb.SparseArray.create(path, schema) - - if tiledb.libtiledb.version() >= (2, 3) and fx_sparse_cell_order == "hilbert": - c1 = np.array([2, 1, 3, 4]) - c2 = np.array([1, 2, 3, 4]) - else: - c1 = np.array([1, 2, 3, 4]) - c2 = np.array([2, 1, 3, 4]) - - data = np.array( - [ - np.array([1, 1], dtype=np.int32), - np.array([2], dtype=np.int32), - np.array([3, 3, 3], dtype=np.int32), - np.array([4], dtype=np.int32), - ], - dtype="O", - ) - - with tiledb.SparseArray(path, "w") as A: - A[c1, c2] = data - - with tiledb.SparseArray(path) as A: - res = A[:] - assert_subarrays_equal(res[""], data) - assert_unordered_equal(res["__dim_0"], c1) - assert_unordered_equal(res["__dim_1"], c2) - - def test_sparse_mixed_domain_uint_float64(self, fx_sparse_cell_order): - path = self.path("mixed_domain_uint_float64") - dims = [ - tiledb.Dim(name="index", domain=(0, 51), tile=11, dtype=np.uint64), - tiledb.Dim(name="dpos", domain=(-100.0, 100.0), tile=10, dtype=np.float64), - ] - dom = tiledb.Domain(*dims) - attrs = [tiledb.Attr(name="val", dtype=np.float64)] - - schema = tiledb.ArraySchema( - domain=dom, attrs=attrs, sparse=True, cell_order=fx_sparse_cell_order - ) - tiledb.SparseArray.create(path, schema) - - data = np.random.rand(50, 63) - coords1 = np.repeat(np.arange(0, 50), 63) - coords2 = np.linspace(-100.0, 100.0, num=3150) - - with tiledb.open(path, "w") as A: - A[coords1, coords2] = data - - # tiledb returns coordinates in sorted order, so we need to check the output - # sorted by the first dim coordinates - sidx = np.argsort(coords1, kind="stable") - coords2_idx = np.tile(np.arange(0, 63), 50)[sidx] - - with tiledb.open(path) as A: - res = A[:] - assert_subarrays_equal( - data[coords1[sidx], coords2_idx[sidx]], - res["val"], - fx_sparse_cell_order != "hilbert", - ) - a_nonempty = A.nonempty_domain() - self.assertEqual(a_nonempty[0], (0, 49)) - self.assertEqual(a_nonempty[1], (-100.0, 100.0)) - - def test_sparse_string_domain(self, fx_sparse_cell_order): - path = self.path("sparse_string_domain") - dom = tiledb.Domain(tiledb.Dim(name="d", domain=(None, None), dtype=np.bytes_)) - att = tiledb.Attr(name="a", dtype=np.int64) - schema = tiledb.ArraySchema( - domain=dom, - attrs=(att,), - sparse=True, - cell_order=fx_sparse_cell_order, - capacity=10000, - ) - tiledb.SparseArray.create(path, schema) - - data = [1, 2, 3, 4] - coords = [b"aa", b"bbb", b"c", b"dddd"] - - with tiledb.open(path, "w") as A: - A[coords] = data - - with tiledb.open(path) as A: - ned = A.nonempty_domain()[0] - res = A[ned[0] : ned[1]] - assert_array_equal(res["a"], data) - self.assertEqual(set(res["d"]), set(coords)) - self.assertEqual(A.nonempty_domain(), ((b"aa", b"dddd"),)) - - def test_sparse_string_domain2(self, fx_sparse_cell_order): - path = self.path("sparse_string_domain2") - with self.assertRaises(ValueError): - dims = [ - tiledb.Dim( - name="str", domain=(None, None, None), tile=None, dtype=np.bytes_ - ) - ] - dims = [tiledb.Dim(name="str", domain=(None, None), tile=None, dtype=np.bytes_)] - dom = tiledb.Domain(*dims) - attrs = [tiledb.Attr(name="val", dtype=np.float64)] - - schema = tiledb.ArraySchema( - domain=dom, attrs=attrs, sparse=True, cell_order=fx_sparse_cell_order - ) - tiledb.SparseArray.create(path, schema) - - data = np.random.rand(10) - coords = [rand_ascii_bytes(random.randint(5, 50)) for _ in range(10)] - - with tiledb.open(path, "w") as A: - A[coords] = data - - with tiledb.open(path) as A: - ned = A.nonempty_domain()[0] - res = A[ned[0] : ned[1]] - self.assertTrue(set(res["str"]) == set(coords)) - # must check data ordered by coords - assert_array_equal(res["val"], data[np.argsort(coords, kind="stable")]) - - def test_sparse_mixed_domain(self, fx_sparse_cell_order): - uri = self.path("sparse_mixed_domain") - dims = [ - tiledb.Dim(name="p", domain=(-100.0, 100.0), tile=10, dtype=np.float64), - tiledb.Dim(name="str", domain=(None, None), tile=None, dtype=np.bytes_), - ] - dom = tiledb.Domain(*dims) - attrs = [tiledb.Attr(name="val", dtype=np.float64)] - - schema = tiledb.ArraySchema( - domain=dom, attrs=attrs, sparse=True, cell_order=fx_sparse_cell_order - ) - tiledb.SparseArray.create(uri, schema) - - nrows = 5 - idx_f64 = np.random.rand(nrows) - idx_str = [rand_ascii(5).encode("utf-8") for _ in range(nrows)] - data = np.random.rand(nrows) - - with tiledb.SparseArray(uri, "w") as A: - A[idx_f64, idx_str] = {"val": data} - - # test heterogeneous dim nonempty_domain - ned_f64 = (np.array(np.min(idx_f64)), np.array(np.max(idx_f64))) - idx_str.sort() - ned_str = idx_str[0], idx_str[-1] - - with tiledb.SparseArray(uri, "r") as A: - self.assertEqual(A.nonempty_domain(), (ned_f64, ned_str)) - - def test_sparse_get_unique_dim_values(self, fx_sparse_cell_order): - uri = self.path("get_non_empty_coords") - dim1 = tiledb.Dim(name="dim1", domain=(None, None), tile=None, dtype=np.bytes_) - dim2 = tiledb.Dim(name="dim2", domain=(0, 1), tile=1, dtype=np.float64) - attr = tiledb.Attr(name="attr", dtype=np.float32) - dom = tiledb.Domain(dim1, dim2) - schema = tiledb.ArraySchema( - domain=dom, sparse=True, cell_order=fx_sparse_cell_order, attrs=[attr] - ) - tiledb.Array.create(uri, schema) - - with tiledb.open(uri, "w") as A: - A["a1", 0] = 1 - A["a1", 0.25] = 2 - A["a2", 0.5] = 3 - A["a3", 0.25] = 4 - - with tiledb.open(uri, "r") as A: - self.assertEqual( - A.unique_dim_values(), - OrderedDict( - [("dim1", (b"a1", b"a2", b"a3")), ("dim2", (0.0, 0.25, 0.5))] - ), - ) - - self.assertEqual(A.unique_dim_values("dim1"), (b"a1", b"a2", b"a3")) - self.assertEqual(A.unique_dim_values("dim2"), (0, 0.25, 0.5)) - - with self.assertRaises(ValueError): - A.unique_dim_values(0) - - with self.assertRaises(ValueError): - A.unique_dim_values("dim3") - - def test_sparse_write_for_zero_attrs(self): - uri = self.path("test_sparse_write_to_zero_attrs") - dim = tiledb.Dim(name="dim", domain=(0, 9), dtype=np.float64) - schema = tiledb.ArraySchema(domain=tiledb.Domain(dim), sparse=True) - tiledb.Array.create(uri, schema) - - coords = [1, 2.0, 3.5] - - with tiledb.open(uri, "w") as A: - A[coords] = None - - with tiledb.open(uri, "r") as A: - output = A.query()[:] - assert list(output.keys()) == ["dim"] - assert_array_equal(output["dim"][:], coords) - - def test_sparse_write_nullable_default(self): - uri = self.path("test_sparse_write_nullable_default") - - dim1 = tiledb.Dim(name="d1", dtype="|S0", var=True) - att = tiledb.Attr(name="a1", dtype=" 1 - dom = tiledb.Domain(tiledb.Dim(domain=(0, 2), tile=3)) - foo = tiledb.Attr("foo", dtype="i8") - bar = tiledb.Attr("bar", dtype="i8") - schema = tiledb.ArraySchema(domain=dom, attrs=(foo, bar)) - tiledb.DenseArray.create(self.path("arr2"), schema) - with self.assertRaises(ValueError): - with tiledb.DenseArray(self.path("arr2"), mode="r") as arr2: - np.array(arr2) - - def test_array_getindex(self): - # Tests that __getindex__ interface works - np_array = np.arange(1, 10) - with tiledb.from_numpy(self.path("foo"), np_array) as arr: - assert_array_equal(arr[5:10], np_array[5:10]) - - def test_to_array1d_attr_name(self): - np_array = np.array([1.0, 2.0, 3.0]) - with tiledb.from_numpy(self.path("foo"), np_array, attr_name="a") as arr: - assert_array_equal(arr[:]["a"], np_array) - - def test_from_numpy_timestamp(self): - path = self.path() - with tiledb.from_numpy(path, np.array([1, 2, 3]), timestamp=10) as A: - pass - with tiledb.open(path, timestamp=(0, 9)) as A: - assert A.nonempty_domain() is None - with tiledb.open(path, timestamp=(10, 10)) as A: - assert A.nonempty_domain() == ((0, 2),) - - def test_from_numpy_schema_only(self): - uri = self.path("test_from_numpy_schema_only") - - arr1 = np.array([1.0, 2.0, 3.0]) - with tiledb.from_numpy(uri, arr1, mode="schema_only") as arr: - assert arr.nonempty_domain() is None - - def test_from_numpy_append(self): - uri = self.path("test_from_numpy_append") - - arr1 = np.array([1.0, 2.0, 3.0]) - - with tiledb.from_numpy(uri, arr1, full_domain=True) as A: - assert A.nonempty_domain() == ((0, 2),) - assert_array_equal(A[0:3], arr1) - - arr2 = np.array([4.0, 5.0, 6.0]) - - with tiledb.from_numpy(uri, arr2, mode="append") as A: - assert A.nonempty_domain() == ((0, 5),) - assert_array_equal(A[0:6], np.append(arr1, arr2)) - - def test_from_numpy_start_idx(self): - uri = self.path("test_from_numpy_start_idx") - - arr1 = np.array([1.0, 2.0, 3.0]) - - with tiledb.from_numpy(uri, arr1) as A: - assert A.nonempty_domain() == ((0, 2),) - assert_array_equal(A[0:3], arr1) - - arr2 = np.array([4.0, 5.0, 6.0]) - - with tiledb.from_numpy(uri, arr2, mode="append", start_idx=0) as A: - assert A.nonempty_domain() == ((0, 2),) - assert_array_equal(A[0:3], arr2) - - def test_from_numpy_append_array2d(self): - uri = self.path("test_from_numpy_append_array2d") - - arr1 = np.random.rand(10, 5) - - with tiledb.from_numpy(uri, arr1, full_domain=True) as A: - assert A.nonempty_domain() == ((0, 9), (0, 4)) - assert_array_equal(A[0:10, 0:5], arr1) - - # error out if number of dimensions do not match - with self.assertRaises(ValueError): - arr2 = np.random.rand(5) - tiledb.from_numpy(uri, arr2, mode="append") - - # error out if number of dimensions do not match - with self.assertRaises(ValueError): - arr2 = np.random.rand(4, 4) - tiledb.from_numpy(uri, arr2, mode="append") - - arr2 = np.random.rand(5, 5) - - with tiledb.from_numpy(uri, arr2, mode="append") as A: - assert A.nonempty_domain() == ((0, 14), (0, 4)) - assert_array_equal(A[0:15, 0:5], np.append(arr1, arr2, axis=0)) - - @pytest.mark.parametrize("append_dim", (0, 1, 2, 3)) - def test_from_numpy_append_array3d(self, append_dim): - uri = self.path("test_from_numpy_append_array3d") - - arr1 = np.random.rand(2, 2, 2) - - with tiledb.from_numpy(uri, arr1, full_domain=True) as A: - assert A.nonempty_domain() == ((0, 1), (0, 1), (0, 1)) - assert_array_equal(A[0:2, 0:2, 0:2], arr1) - - arr2 = np.random.rand(2, 2, 2) - - # error out if index is out of bounds - if append_dim == 3: - with self.assertRaises(IndexError): - tiledb.from_numpy(uri, arr2, mode="append", append_dim=append_dim) - return - - with tiledb.from_numpy(uri, arr2, mode="append", append_dim=append_dim) as A: - if append_dim == 0: - assert A.nonempty_domain() == ((0, 3), (0, 1), (0, 1)) - result = A[0:4, 0:2, 0:2] - elif append_dim == 1: - assert A.nonempty_domain() == ((0, 1), (0, 3), (0, 1)) - result = A[0:2, 0:4, 0:2] - elif append_dim == 2: - assert A.nonempty_domain() == ((0, 1), (0, 1), (0, 3)) - result = A[0:2, 0:2, 0:4] - - assert_array_equal(result, np.append(arr1, arr2, axis=append_dim)) - - @pytest.mark.parametrize("append_dim", (0, 1, 2, 3)) - def test_from_numpy_append_array3d_overwrite(self, append_dim): - uri = self.path("test_from_numpy_append_array3d") - - arr1 = np.random.rand(2, 2, 2) - - with tiledb.from_numpy(uri, arr1) as A: - assert A.nonempty_domain() == ((0, 1), (0, 1), (0, 1)) - assert_array_equal(A[0:2, 0:2, 0:2], arr1) - - arr2 = np.random.rand(2, 2, 2) - - # error out if index is out of bounds - if append_dim == 3: - with self.assertRaises(IndexError): - tiledb.from_numpy(uri, arr2, mode="append", append_dim=append_dim) - return - - with tiledb.from_numpy( - uri, arr2, mode="append", append_dim=append_dim, start_idx=0 - ) as A: - assert A.nonempty_domain() == ((0, 1), (0, 1), (0, 1)) - assert_array_equal(A[0:2, 0:2, 0:2], arr2) - - @pytest.mark.parametrize("empty_str", ["", b""]) - @pytest.mark.parametrize("num_strs", [1, 1000]) - def test_from_numpy_empty_str(self, empty_str, num_strs): - uri = self.path("test_from_numpy_empty_str") - np_array = np.asarray([empty_str] * num_strs, dtype="O") - tiledb.from_numpy(uri, np_array) - - with tiledb.open(uri, "r") as A: - assert_array_equal(A[:], np_array) - if has_pandas(): - assert_array_equal(A.query(use_arrow=True).df[:][""], np_array) - assert_array_equal(A.query(use_arrow=False).df[:][""], np_array) - - -class ConsolidationTest(DiskTestCase): - def test_array_vacuum(self): - dshape = (0, 19) - num_writes = 10 - - def create_array(target_path): - dom = tiledb.Domain(tiledb.Dim(domain=dshape, tile=3)) - att = tiledb.Attr(dtype="int64") - schema = tiledb.ArraySchema(domain=dom, attrs=(att,)) - tiledb.libtiledb.Array.create(target_path, schema) - - def write_fragments(target_path): - for i in range(num_writes): - with tiledb.open(target_path, "w") as A: - A[i : dshape[1]] = np.random.rand(dshape[1] - i) - - # array #1 - path = self.path("test_array_vacuum") - create_array(path) - write_fragments(path) - - fi = tiledb.array_fragments(path) - self.assertEqual(len(fi), num_writes) - - tiledb.consolidate(path) - tiledb.vacuum(path) - - fi = tiledb.array_fragments(path) - self.assertEqual(len(fi), 1) - - # array #2 - path2 = self.path("test_array_vacuum_fragment_meta") - create_array(path2) - write_fragments(path2) - - fi = tiledb.array_fragments(path2) - self.assertEqual(fi.unconsolidated_metadata_num, num_writes) - - tiledb.consolidate( - path2, config=tiledb.Config({"sm.consolidation.mode": "fragment_meta"}) - ) - tiledb.vacuum(path2, config=tiledb.Config({"sm.vacuum.mode": "fragment_meta"})) - - fi = tiledb.array_fragments(path2) - self.assertEqual(fi.unconsolidated_metadata_num, 0) - - # array #3 - path3 = self.path("test_array_vacuum2") - create_array(path3) - write_fragments(path3) - - fi = tiledb.array_fragments(path3) - self.assertEqual(fi.unconsolidated_metadata_num, num_writes) - - conf = tiledb.Config({"sm.consolidation.mode": "fragment_meta"}) - tiledb.consolidate(uri=path3, config=conf) - - fi = tiledb.array_fragments(path3) - self.assertEqual(fi.unconsolidated_metadata_num, 0) - - def test_array_consolidate_with_timestamp(self): - dshape = (1, 3) - num_writes = 10 - - def create_array(target_path, dshape): - dom = tiledb.Domain(tiledb.Dim(domain=dshape, tile=len(dshape))) - att = tiledb.Attr(dtype="int64") - schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=True) - tiledb.libtiledb.Array.create(target_path, schema) - - def write_fragments(target_path, dshape, num_writes): - for i in range(1, num_writes + 1): - with tiledb.open(target_path, "w", timestamp=i) as A: - A[[1, 2, 3]] = np.random.rand(dshape[1]) - - path = self.path("test_array_consolidate_with_timestamp") - - create_array(path, dshape) - write_fragments(path, dshape, num_writes) - frags = tiledb.array_fragments(path) - assert len(frags) == 10 - - tiledb.consolidate(path, timestamp=(1, 4)) - - frags = tiledb.array_fragments(path) - assert len(frags) == 7 - assert len(frags.to_vacuum) == 4 - - with pytest.warns( - DeprecationWarning, - match=( - "Partial vacuuming via timestamp will be deprecrated in " - "a future release and replaced by passing in fragment URIs." - ), - ): - tiledb.vacuum(path, timestamp=(1, 2)) - - tiledb.vacuum(path) - frags = tiledb.array_fragments(path) - assert len(frags.to_vacuum) == 0 - - conf = tiledb.Config( - {"sm.consolidation.timestamp_start": 5, "sm.consolidation.timestamp_end": 9} - ) - tiledb.consolidate(path, config=conf) - tiledb.vacuum(path) - assert len(tiledb.array_fragments(path)) == 3 - - @pytest.mark.parametrize("use_timestamps", [True, False]) - def test_array_consolidate_with_uris(self, use_timestamps): - dshape = (1, 3) - num_writes = 10 - - def create_array(target_path, dshape): - dom = tiledb.Domain(tiledb.Dim(domain=dshape, tile=len(dshape))) - att = tiledb.Attr(dtype="int64") - schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=True) - tiledb.libtiledb.Array.create(target_path, schema) - - def write_fragments(target_path, dshape, num_writes): - for i in range(1, num_writes + 1): - with tiledb.open( - target_path, "w", timestamp=i if use_timestamps else None - ) as A: - A[[1, 2, 3]] = np.random.rand(dshape[1]) - - path = self.path("test_array_consolidate_with_uris") - - create_array(path, dshape) - write_fragments(path, dshape, num_writes) - frags = tiledb.array_fragments(path) - assert len(frags) == 10 - - frag_names = [os.path.basename(f) for f in frags.uri] - - tiledb.consolidate(path, fragment_uris=frag_names[:4]) - - assert len(tiledb.array_fragments(path)) == 7 - - with pytest.warns( - DeprecationWarning, - match=( - "The `timestamp` argument will be ignored and only fragments " - "passed to `fragment_uris` will be consolidate" - ), - ): - timestamps = [t[0] for t in tiledb.array_fragments(path).timestamp_range] - tiledb.consolidate( - path, - fragment_uris=frag_names[4:8], - timestamp=(timestamps[5], timestamps[6]), - ) - - assert len(tiledb.array_fragments(path)) == 4 - - def test_array_consolidate_with_key(self): - dshape = (1, 3) - num_writes = 10 - - path = self.path("test_array_consolidate_with_key") - key = "0123456789abcdeF0123456789abcdeF" - - config = tiledb.Config() - config["sm.encryption_key"] = key - config["sm.encryption_type"] = "AES_256_GCM" - ctx = tiledb.Ctx(config=config) - - def create_array(target_path, dshape): - dom = tiledb.Domain(tiledb.Dim(domain=dshape, tile=len(dshape))) - att = tiledb.Attr(dtype="int64") - schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=True) - tiledb.libtiledb.Array.create(target_path, schema, ctx=ctx) - - def write_fragments(target_path, dshape, num_writes): - for i in range(1, num_writes + 1): - with tiledb.open(target_path, "w", timestamp=i, ctx=ctx) as A: - A[[1, 2, 3]] = np.random.rand(dshape[1]) - - create_array(path, dshape) - write_fragments(path, dshape, num_writes) - frags = tiledb.array_fragments(path, ctx=ctx) - assert len(frags) == 10 - - frag_names = [os.path.basename(f) for f in frags.uri] - - tiledb.consolidate(path, ctx=ctx, config=config, fragment_uris=frag_names[:4]) - - assert len(tiledb.array_fragments(path, ctx=ctx)) == 7 - - -@pytest.mark.skipif(sys.platform == "win32", reason="Only run MemoryTest on linux") -class MemoryTest(DiskTestCase): - # sanity check that memory usage doesn't increase more than 2x when reading 40MB 100x - # https://github.com/TileDB-Inc/TileDB-Py/issues/150 - @staticmethod - def use_many_buffers(path): - # https://stackoverflow.com/questions/938733/total-memory-used-by-python-process - process = psutil.Process(os.getpid()) - - x = np.ones(10000000, dtype=np.float32) - d1 = tiledb.Dim( - "test_domain", domain=(0, x.shape[0] - 1), tile=10000, dtype="uint32" - ) - domain = tiledb.Domain(d1) - v = tiledb.Attr("test_value", dtype="float32") - - schema = tiledb.ArraySchema( - domain=domain, attrs=(v,), cell_order="row-major", tile_order="row-major" - ) - - A = tiledb.DenseArray.create(path, schema) - - with tiledb.DenseArray(path, mode="w") as A: - A[:] = {"test_value": x} - - with tiledb.DenseArray(path, mode="r") as data: - data[:] - initial = process.memory_info().rss - print(" initial RSS: {}".format(round(initial / 1e6, 2))) - for i in range(100): - # read but don't store: this memory should be freed - data[:] - - if i % 10 == 0: - print( - " read iter {}, RSS (MB): {}".format( - i, round(process.memory_info().rss / 1e6, 2) - ) - ) - - return initial - - def test_memory_cleanup(self, capfd): - # run function which reads 100x from a 40MB test array - # TODO: RSS is too loose to do this end-to-end, so should use instrumentation. - print("Starting TileDB-Py memory test:") - initial = self.use_many_buffers(self.path("test_memory_cleanup")) - - process = psutil.Process(os.getpid()) - final = process.memory_info().rss - print(" final RSS: {}".format(round(final / 1e6, 2))) - - gc.collect() - - final_gc = process.memory_info().rss - print(" final RSS after forced GC: {}".format(round(final_gc / 1e6, 2))) - - assert_captured(capfd, "final RSS") - self.assertTrue(final < (2 * initial)) - - -class TestHighlevel(DiskTestCase): - def test_open(self): - uri = self.path("test_open") - array = np.random.rand(10) - schema = tiledb.schema_like(array) - tiledb.Array.create(uri, schema) - with tiledb.open(uri, "w") as A: - A[:] = array * 10 - A[:] = array - last_fragment_ts = list(A.last_write_info.items())[0][1][0] - - ctx = tiledb.Ctx() - with tiledb.DenseArray(uri, ctx=ctx) as A: - self.assertEqual(A._ctx_(), ctx) - - # test `open` with timestamp - with tiledb.open(uri, timestamp=last_fragment_ts) as A: - assert_array_equal(A[:], array) - - with tiledb.open(uri, ctx=ctx) as A: - self.assertEqual(A._ctx_(), ctx) - - config = tiledb.Config() - with tiledb.open(uri, config=config) as A: - self.assertEqual(A._ctx_().config(), config) - - with self.assertRaises(KeyError): - # This path must test `tiledb.open` specifically - # https://github.com/TileDB-Inc/TileDB-Py/issues/277 - tiledb.open(uri, "r", attr="the-missing-attr") - - def test_ctx_thread_cleanup(self): - # This test checks that contexts are destroyed correctly. - # It creates new contexts repeatedly, in-process, and - # checks that the total number of threads stays stable. - threads = ( - "sm.num_reader_threads" - if tiledb.libtiledb.version() < (2, 10) - else "sm.compute_concurrency_level" - ) - config = {threads: 128} - uri = self.path("test_ctx_thread_cleanup") - with tiledb.from_numpy(uri, np.random.rand(100)) as A: - pass - - thisproc = psutil.Process(os.getpid()) - - with tiledb.DenseArray(uri, ctx=tiledb.Ctx(config)) as A: - A[:] - start_threads = len(thisproc.threads()) - - for n in range(1, 10): - retry = 0 - while retry < 3: - try: - # checking exact thread count is unreliable, so - # make sure we are holding < 2x per run. - self.assertTrue(len(thisproc.threads()) < 2 * start_threads) - break - except RuntimeError as rterr: - retry += 1 - if retry > 2: - raise rterr - warnings.warn( - f"Thread cleanup test RuntimeError: {rterr} \n on iteration: {n}" - ) - - with tiledb.DenseArray(uri, ctx=tiledb.Ctx(config)) as A: - A[:] - - -class GetStatsTest(DiskTestCase): - def test_ctx(self): - tiledb.stats_enable() - ctx = tiledb.default_ctx() - uri = self.path("test_ctx") - dom = tiledb.Domain(tiledb.Dim(domain=(0, 2), dtype=np.int64)) - att = tiledb.Attr(dtype=np.int64) - schema = tiledb.ArraySchema(domain=dom, attrs=(att,)) - tiledb.Array.create(uri, schema) - - with tiledb.open(uri, mode="w", ctx=ctx) as T: - T[:] = np.random.randint(10, size=3) - - stats = ctx.get_stats(print_out=False) - # check that the stats are non-empty - assert stats - - def test_query(self): - tiledb.stats_enable() - uri = self.path("test_ctx") - dom = tiledb.Domain(tiledb.Dim(domain=(0, 2), dtype=np.int64)) - att = tiledb.Attr(dtype=np.int64) - schema = tiledb.ArraySchema(domain=dom, attrs=(att,)) - tiledb.Array.create(uri, schema) - - with tiledb.open(uri, mode="w") as T: - T[:] = np.random.randint(10, size=3) - - with tiledb.open(uri, mode="r") as T: - q = T.query() - assert "" == q.get_stats() - - q[:] - - stats = q.get_stats(print_out=False) - # check that the stats are non-empty - assert stats - - -class NullableIOTest(DiskTestCase): - def test_nullable_write(self): - uri = self.path("nullable_write_test") - - schema = tiledb.ArraySchema( - domain=tiledb.Domain( - *[tiledb.Dim(name="__dim_0", domain=(0, 3), tile=4, dtype="uint64")] - ), - attrs=[tiledb.Attr(name="", dtype="int64", var=False, nullable=True)], - ) - tiledb.Array.create(uri, schema) - - with tiledb.open(uri, "w") as A: - A._setitem_impl( - slice(0, 4), np.ones(4), {"": np.array([0, 1, 0, 1], dtype=np.uint8)} - ) - - -class IncompleteTest(DiskTestCase): - @pytest.mark.parametrize("non_overlapping_ranges", [True, False]) - def test_incomplete_dense_varlen(self, non_overlapping_ranges): - ncells = 10 - path = self.path("incomplete_dense_varlen") - str_data = [rand_utf8(random.randint(0, n)) for n in range(ncells)] - data = np.array(str_data, dtype=np.str_) - - # basic write - dom = tiledb.Domain(tiledb.Dim(domain=(1, len(data)), tile=len(data))) - att = tiledb.Attr(dtype=np.str_, var=True) - - schema = tiledb.ArraySchema(dom, (att,)) - - tiledb.DenseArray.create(path, schema) - with tiledb.DenseArray(path, mode="w") as T: - T[:] = data - - with tiledb.DenseArray(path, mode="r") as T: - assert_array_equal(data, T[:]) - - # set the memory to the max length of a cell - # these settings force ~100 retries - # TODO would be good to check repeat count here; not yet exposed - # Also would be useful to have max cell config in libtiledb. - init_buffer_bytes = 1024**2 - config = tiledb.Config( - { - "sm.memory_budget": ncells, - "sm.memory_budget_var": ncells, - "py.init_buffer_bytes": init_buffer_bytes, - "sm.query.sparse_unordered_with_dups.non_overlapping_ranges": non_overlapping_ranges, - "sm.skip_unary_partitioning_budget_check": True, - } - ) - self.assertEqual(config["py.init_buffer_bytes"], str(init_buffer_bytes)) - - with tiledb.DenseArray(path, mode="r", ctx=tiledb.Ctx(config)) as T2: - result = T2.query(attrs=[""])[:] - assert_array_equal(result, data) - - @pytest.mark.parametrize("allows_duplicates", [True, False]) - @pytest.mark.parametrize("non_overlapping_ranges", [True, False]) - def test_incomplete_sparse_varlen(self, allows_duplicates, non_overlapping_ranges): - ncells = 100 - - path = self.path("incomplete_sparse_varlen") - str_data = [rand_utf8(random.randint(0, n)) for n in range(ncells)] - data = np.array(str_data, dtype=np.str_) - coords = np.arange(ncells) - - # basic write - dom = tiledb.Domain(tiledb.Dim(domain=(0, len(data) + 100), tile=len(data))) - att = tiledb.Attr(dtype=np.str_, var=True) - - schema = tiledb.ArraySchema( - dom, (att,), sparse=True, allows_duplicates=allows_duplicates - ) - - tiledb.SparseArray.create(path, schema) - with tiledb.SparseArray(path, mode="w") as T: - T[coords] = data - - with tiledb.SparseArray(path, mode="r") as T: - assert_array_equal(data, T[:][""]) - - # set the memory to the max length of a cell - # these settings force ~100 retries - # TODO would be good to check repeat count here; not yet exposed - # Also would be useful to have max cell config in libtiledb. - init_buffer_bytes = 1024**2 - config = tiledb.Config( - { - "sm.memory_budget": ncells, - "sm.memory_budget_var": ncells, - "py.init_buffer_bytes": init_buffer_bytes, - } - ) - self.assertEqual(config["py.init_buffer_bytes"], str(init_buffer_bytes)) - - with tiledb.SparseArray(path, mode="r", ctx=tiledb.Ctx(config)) as T2: - assert_array_equal(data, T2[:][""]) - - assert_array_equal(data, T2.multi_index[0:ncells][""]) - - # ensure that empty results are handled correctly - assert_array_equal( - T2.multi_index[101:105][""], np.array([], dtype=np.dtype(" 0 - assert est_results[""].offsets_bytes > 0 - assert est_results[""].data_bytes > 0 - - for result in iterable: - if return_arrow: - assert isinstance(result, pa.Table) - df = result.to_pandas() - else: - if indexer == "df": - assert isinstance(result, pd.DataFrame) - df = result - else: - assert isinstance(result, OrderedDict) - df = pd.DataFrame(result) - - to_slice = slice(idx, idx + len(df)) - chunk = full_data[to_slice] - - assert np.all(chunk == df[""].values) - assert np.all(df["__dim_0"] == np.arange(idx, idx + len(df))) - # update the current read count - idx += len(df) - - assert idx == len(full_data) - - @pytest.mark.parametrize("cell_order", ["col-major", "row-major", "hilbert"]) - @pytest.mark.parametrize("tile_order", ["col-major", "row-major"]) - @pytest.mark.parametrize("non_overlapping_ranges", [True, False]) - def test_incomplete_global_order( - self, cell_order, tile_order, non_overlapping_ranges - ): - uri = self.path("test_incomplete_global_order") - dom = tiledb.Domain(tiledb.Dim(domain=(0, 30), tile=10, dtype=np.int64)) - att = tiledb.Attr(dtype=np.int64) - schema = tiledb.ArraySchema( - domain=dom, - attrs=(att,), - sparse=True, - allows_duplicates=True, - cell_order=cell_order, - tile_order=tile_order, - ) - tiledb.Array.create(uri, schema) - - expected_data = np.random.randint(0, 10, 30) - - with tiledb.open(uri, mode="w") as T: - T[np.arange(30)] = expected_data - - init_buffer_bytes = 200 - cfg = tiledb.Config( - { - "py.init_buffer_bytes": init_buffer_bytes, - "py.exact_init_buffer_bytes": "true", - "sm.query.sparse_unordered_with_dups.non_overlapping_ranges": non_overlapping_ranges, - } - ) - - with tiledb.open(uri, mode="r", ctx=tiledb.Ctx(cfg)) as T: - actual_data = T.query(order="G")[:][""] - assert_array_equal(actual_data, expected_data) - - @pytest.mark.parametrize("exact_init_buffer_bytes", ["true", "false"]) - @pytest.mark.parametrize("non_overlapping_ranges", [True, False]) - def test_offset_can_fit_data_var_size_cannot( - self, exact_init_buffer_bytes, non_overlapping_ranges - ): - """ - One condition that would be nice to get more coverage on is when the offset buffer can fit X cells, but the var size data of those cells cannot fit the buffer. In this case, the reader does adjust the results back. - @Luc Rancourt so would we test this by having really large var-size content in each cell? - Isaiah 4 days ago - eg something like: we set buffers that can hold 100kb, but each var-len cell has 20kb, so we can read at most 5 cells into the data buffer, but theoretically the offsets buffer could hold many more? - """ - tiledb.stats_enable() - uri = self.path("test_incomplete_global_order") - dom = tiledb.Domain(tiledb.Dim(domain=(0, 4), tile=1, dtype=np.int64)) - att = tiledb.Attr(dtype=np.int64, var=True) - schema = tiledb.ArraySchema( - domain=dom, - attrs=(att,), - sparse=True, - allows_duplicates=True, - ) - tiledb.Array.create(uri, schema) - - with tiledb.open(uri, mode="w") as T: - T[np.arange(5)] = np.array( - [ - np.random.randint(0, 10, 10000, dtype=np.int64), - np.random.randint(0, 10, 10000, dtype=np.int64), - np.random.randint(0, 10, 10000, dtype=np.int64), - np.random.randint(0, 10, 10000, dtype=np.int64), - np.random.randint(0, 10, 101, dtype=np.int64), - ], - dtype="O", - ) - - init_buffer_bytes = 160000 - cfg = tiledb.Config( - { - "py.init_buffer_bytes": init_buffer_bytes, - "py.exact_init_buffer_bytes": exact_init_buffer_bytes, - "sm.query.sparse_unordered_with_dups.non_overlapping_ranges": non_overlapping_ranges, - } - ) - - with tiledb.open(uri, mode="r", ctx=tiledb.Ctx(cfg)) as T: - qry = T.query() - qry[:][""] - # assert_array_equal(actual_data, expected_data) - - tiledb.stats_disable() - - -class TestPath(DiskTestCase): - def test_path(self, pytestconfig): - path = self.path("foo") - if pytestconfig.getoption("vfs") == "s3": - assert path.startswith("s3://") - - @pytest.mark.skipif( - sys.platform == "win32", reason="no_output fixture disabled on Windows" - ) - @pytest.mark.xfail( - True, reason="This test prints, and should fail because of no_output fixture!" - ) - def test_no_output(self): - print("this test should fail") - - -class TestAsBuilt(DiskTestCase): - def test_as_built(self): - dump = tiledb.as_built(return_json_string=True) - assert isinstance(dump, str) - # ensure we get a non-empty string - assert len(dump) > 0 - dump_dict = tiledb.as_built() - assert isinstance(dump_dict, dict) - # ensure we get a non-empty dict - assert len(dump_dict) > 0 - - # validate top-level key - assert "as_built" in dump_dict - assert isinstance(dump_dict["as_built"], dict) - assert len(dump_dict["as_built"]) > 0 - - # validate parameters key - assert "parameters" in dump_dict["as_built"] - assert isinstance(dump_dict["as_built"]["parameters"], dict) - assert len(dump_dict["as_built"]["parameters"]) > 0 - - # validate storage_backends key - assert "storage_backends" in dump_dict["as_built"]["parameters"] - assert isinstance(dump_dict["as_built"]["parameters"]["storage_backends"], dict) - assert len(dump_dict["as_built"]["parameters"]["storage_backends"]) > 0 - - x = dump_dict["as_built"]["parameters"]["storage_backends"] - - # validate storage_backends attributes - vfs = tiledb.VFS() - if vfs.supports("azure"): - assert x["azure"]["enabled"] == True - else: - assert x["azure"]["enabled"] == False - - if vfs.supports("gcs"): - assert x["gcs"]["enabled"] == True - else: - assert x["gcs"]["enabled"] == False - - if vfs.supports("hdfs"): - assert x["hdfs"]["enabled"] == True - else: - assert x["hdfs"]["enabled"] == False - - if vfs.supports("s3"): - assert x["s3"]["enabled"] == True - else: - assert x["s3"]["enabled"] == False - - # validate support key - assert "support" in dump_dict["as_built"]["parameters"] - assert isinstance(dump_dict["as_built"]["parameters"]["support"], dict) - assert len(dump_dict["as_built"]["parameters"]["support"]) > 0 - - # validate support attributes - check only if boolean - assert dump_dict["as_built"]["parameters"]["support"]["serialization"][ - "enabled" - ] in [True, False] diff --git a/tiledb/tests/test_metadata.cc b/tiledb/tests/test_metadata.cc deleted file mode 100644 index f93c3d2d96..0000000000 --- a/tiledb/tests/test_metadata.cc +++ /dev/null @@ -1,45 +0,0 @@ - -#include -#include -#include -#include - -#include - -#define TILEDB_DEPRECATED -#define TILEDB_DEPRECATED_EXPORT - -#include "../util.h" -#include // C++ - -#if !defined(NDEBUG) -// #include "debug.cc" -#endif - -namespace tiledbpy { - -using namespace std; -using namespace tiledb; -namespace py = pybind11; -using namespace pybind11::literals; - -class PyASCIIMetadataTest { - -public: - static void write_ascii(py::str uri) { - Context ctx; - Array array(ctx, uri, TILEDB_WRITE); - - std::string st = "xyz"; - array.put_metadata("abc", TILEDB_STRING_ASCII, st.length(), st.c_str()); - - array.close(); - } -}; - -void init_test_metadata(py::module &m) { - py::class_(m, "metadata_test_aux") - .def_static("write_ascii", &PyASCIIMetadataTest::write_ascii); -} - -}; // namespace tiledbpy diff --git a/tiledb/tests/test_metadata.py b/tiledb/tests/test_metadata.py deleted file mode 100644 index 0b13073abe..0000000000 --- a/tiledb/tests/test_metadata.py +++ /dev/null @@ -1,398 +0,0 @@ -import os -import time -import warnings - -import numpy as np -import pytest -from hypothesis import given, settings -from hypothesis import strategies as st -from hypothesis.extra import numpy as st_np - -import tiledb -from tiledb.main import metadata_test_aux - -from .common import DiskTestCase, assert_captured, rand_utf8 - -MIN_INT = np.iinfo(np.int64).min -MAX_INT = np.iinfo(np.int64).max -st_int = st.integers(min_value=MIN_INT, max_value=MAX_INT) -st_float = st.floats(allow_nan=False) -st_metadata = st.fixed_dictionaries( - { - "int": st_int, - "double": st_float, - "bytes": st.binary(), - "str": st.text(), - "list_int": st.lists(st_int), - "tuple_int": st.lists(st_int).map(tuple), - "list_float": st.lists(st_float), - "tuple_float": st.lists(st_float).map(tuple), - } -) -st_ndarray = st_np.arrays( - dtype=st.one_of( - st_np.integer_dtypes(endianness="<"), - st_np.unsigned_integer_dtypes(endianness="<"), - st_np.floating_dtypes(endianness="<", sizes=(32, 64)), - st_np.byte_string_dtypes(max_len=1), - st_np.unicode_string_dtypes(endianness="<", max_len=1), - st_np.datetime64_dtypes(endianness="<"), - ), - shape=st_np.array_shapes(min_dims=0, max_dims=3, min_side=0, max_side=10), -) - - -class MetadataTest(DiskTestCase): - def assert_equal_md_values(self, written_value, read_value): - if isinstance(written_value, np.ndarray): - self.assertIsInstance(read_value, np.ndarray) - self.assertEqual(read_value.dtype, written_value.dtype) - np.testing.assert_array_equal(read_value, written_value) - elif not isinstance(written_value, (list, tuple)): - self.assertEqual(read_value, written_value) - # we don't round-trip perfectly sequences - elif len(written_value) == 1: - # sequences of length 1 are read as a single scalar element - self.assertEqual(read_value, written_value[0]) - else: - # sequences of length != 1 are read as tuples - self.assertEqual(read_value, tuple(written_value)) - - def assert_metadata_roundtrip(self, tdb_meta, dict_meta): - for k, v in dict_meta.items(): - # test __contains__ - self.assertTrue(k in tdb_meta) - # test __getitem__ - self.assert_equal_md_values(v, tdb_meta[k]) - # test get - self.assert_equal_md_values(v, tdb_meta.get(k)) - - # test __contains__, __getitem__, get for non-key - non_key = str(object()) - self.assertFalse(non_key in tdb_meta) - with self.assertRaises(KeyError): - tdb_meta[non_key] - self.assertIsNone(tdb_meta.get(non_key)) - self.assertEqual(tdb_meta.get(non_key, 42), 42) - - # test __len__ - self.assertEqual(len(tdb_meta), len(dict_meta)) - - # test __iter__() is consistent with keys() - self.assertEqual(list(tdb_meta), tdb_meta.keys()) - - # test keys() - self.assertSetEqual(set(tdb_meta.keys()), set(dict_meta.keys())) - - # test values() and items() - read_values = tdb_meta.values() - read_items = tdb_meta.items() - self.assertEqual(len(read_values), len(read_items)) - for (item_key, item_value), value in zip(read_items, read_values): - self.assertTrue(item_key in dict_meta) - self.assert_equal_md_values(dict_meta[item_key], item_value) - self.assert_equal_md_values(dict_meta[item_key], value) - - def assert_not_implemented_methods(self, tdb_meta): - with self.assertRaises(NotImplementedError): - tdb_meta.setdefault("nokey", "hello!") - with self.assertRaises(NotImplementedError): - tdb_meta.pop("nokey", "hello!") - with self.assertRaises(NotImplementedError): - tdb_meta.popitem() - with self.assertRaises(NotImplementedError): - tdb_meta.clear() - - def test_errors(self): - path = self.path("test_md_errors") - with tiledb.from_numpy(path, np.ones((5,), np.float64)): - pass - - # can't read from a closed array - A = tiledb.open(path) - A.close() - with self.assertRaises(tiledb.TileDBError): - A.meta["x"] - - with tiledb.Array(path) as A: - # can't write to a mode='r' array - with self.assertRaises(tiledb.TileDBError): - A.meta["invalid_write"] = 1 - - # missing key raises KeyError - with self.assertRaises(KeyError): - A.meta["xyz123nokey"] - - self.assert_not_implemented_methods(A.meta) - - # test invalid input - with tiledb.Array(path, "w") as A: - # keys must be strings - with self.assertRaises(TypeError): - A.meta[123] = 1 - - # can't write an int > typemax(Int64) - with self.assertRaises(OverflowError): - A.meta["bigint"] = MAX_INT + 1 - - # can't write mixed-type list - with self.assertRaises(TypeError): - A.meta["mixed_list"] = [1, 2.1] - - # can't write mixed-type tuple - with self.assertRaises(TypeError): - A.meta["mixed_list"] = (0, 3.1) - - # can't write objects - with self.assertRaises(TypeError): - A.meta["object"] = object() - - self.assert_not_implemented_methods(A.meta) - - @given(st_metadata) - @settings(deadline=None) - def test_basic(self, test_vals): - path = self.path() - with tiledb.from_numpy(path, np.ones((5,), np.float64)): - pass - - with tiledb.Array(path, mode="w") as A: - A.meta.update(test_vals) - - with tiledb.Array(path) as A: - self.assert_metadata_roundtrip(A.meta, test_vals) - - # test a 1 MB blob - blob = np.random.rand(int((1024**2) / 8)).tobytes() - with tiledb.Array(path, "w") as A: - test_vals["bigblob"] = blob - A.meta["bigblob"] = blob - - with tiledb.Array(path) as A: - self.assert_metadata_roundtrip(A.meta, test_vals) - - # test del key - with tiledb.Array(path, "w") as A: - del test_vals["bigblob"] - del A.meta["bigblob"] - - with tiledb.Array(path) as A: - self.assert_metadata_roundtrip(A.meta, test_vals) - - # test update - with tiledb.Array(path, mode="w") as A: - test_vals.update(foo="bar", double=3.14) - A.meta.update(foo="bar", double=3.14) - - with tiledb.Array(path) as A: - self.assert_metadata_roundtrip(A.meta, test_vals) - - @given(st_metadata, st_ndarray) - @settings(deadline=None) - @pytest.mark.parametrize("use_timestamps", [True, False]) - def test_numpy(self, use_timestamps, test_vals, ndarray): - test_vals["ndarray"] = ndarray - - path = self.path() - with tiledb.from_numpy(path, np.ones((5,), np.float64)): - pass - - with tiledb.Array(path, mode="w") as A: - A.meta.update(test_vals) - - with tiledb.Array(path) as A: - self.assert_metadata_roundtrip(A.meta, test_vals) - - if use_timestamps: - # test resetting a key with a ndarray value to a non-ndarray value - time.sleep(0.001) - with tiledb.Array(path, "w") as A: - A.meta["ndarray"] = 42 - test_vals["ndarray"] = 42 - - with tiledb.Array(path) as A: - self.assert_metadata_roundtrip(A.meta, test_vals) - - # test resetting a key with a non-ndarray value to a ndarray value - with tiledb.Array(path, "w") as A: - A.meta["bytes"] = ndarray - test_vals["bytes"] = ndarray - - with tiledb.Array(path) as A: - self.assert_metadata_roundtrip(A.meta, test_vals) - - if use_timestamps: - # test del ndarray key - time.sleep(0.001) - with tiledb.Array(path, "w") as A: - del A.meta["ndarray"] - del test_vals["ndarray"] - - with tiledb.Array(path) as A: - self.assert_metadata_roundtrip(A.meta, test_vals) - - if use_timestamps: - # test update - time.sleep(0.001) - with tiledb.Array(path, mode="w") as A: - test_vals.update(ndarray=np.stack([ndarray, ndarray]), transp=ndarray.T) - A.meta.update(ndarray=np.stack([ndarray, ndarray]), transp=ndarray.T) - - with tiledb.Array(path) as A: - self.assert_metadata_roundtrip(A.meta, test_vals) - - @pytest.mark.filterwarnings("ignore::UserWarning") - @tiledb.scope_ctx( - {"sm.vacuum.mode": "array_meta", "sm.consolidation.mode": "array_meta"} - ) - @pytest.mark.parametrize("use_timestamps", [True, False]) - def test_consecutive(self, use_timestamps): - vfs = tiledb.VFS() - path = self.path("test_md_consecutive") - - write_count = 100 - - with tiledb.from_numpy(path, np.ones((5,), np.float64)): - pass - - randints = np.random.randint(0, MAX_INT - 1, size=write_count, dtype=np.int64) - randutf8s = [rand_utf8(i) for i in np.random.randint(1, 30, size=write_count)] - - # write 100 times, then consolidate - if use_timestamps: - for i in range(write_count): - with tiledb.Array(path, mode="w") as A: - A.meta["randint"] = int(randints[i]) - A.meta["randutf8"] = randutf8s[i] - time.sleep(0.001) - else: - for i in range(write_count): - with tiledb.Array(path, mode="w") as A: - A.meta["randint"] = int(randints[i]) - A.meta["randutf8"] = randutf8s[i] - - self.assertEqual(len(vfs.ls(os.path.join(path, "__meta"))), 100) - - with tiledb.Array(path) as A: - self.assertEqual(A.meta["randint"], randints[-1]) - self.assertEqual(A.meta["randutf8"], randutf8s[-1]) - - with tiledb.Array(path, mode="w") as aw: - aw.meta.consolidate() - - try: - self.assertEqual(len(vfs.ls(os.path.join(path, "__meta"))), 102) - except AssertionError: - # this test is broken under libtiledb 2.3, see ch 7449 - if tiledb.libtiledb.version() >= (2, 3): - warnings.warn( - "Suppressed assertion error with libtiledb 2.3! see ch 7449" - ) - else: - raise - - with tiledb.Array(path) as A: - self.assertEqual(A.meta["randint"], randints[-1]) - self.assertEqual(A.meta["randutf8"], randutf8s[-1]) - - # use randutf8s as keys, then consolidate - if use_timestamps: - for _ in range(2): - for i in range(write_count): - with tiledb.Array(path, mode="w") as A: - A.meta[randutf8s[i] + "{}".format(randints[i])] = int( - randints[i] - ) - A.meta[randutf8s[i]] = randutf8s[i] - time.sleep(0.001) - else: - for _ in range(2): - for i in range(write_count): - with tiledb.Array(path, mode="w") as A: - A.meta[randutf8s[i] + "{}".format(randints[i])] = int( - randints[i] - ) - A.meta[randutf8s[i]] = randutf8s[i] - - # test data - with tiledb.Array(path) as A: - for i in range(write_count): - key_int = randutf8s[i] + "{}".format(randints[i]) - self.assertEqual(A.meta[key_int], randints[i]) - self.assertEqual(A.meta[randutf8s[i]], randutf8s[i]) - - # test expected number of fragments before consolidating - try: - self.assertEqual(len(vfs.ls(os.path.join(path, "__meta"))), 302) - except AssertionError: - # this test is broken under libtiledb 2.3, see ch 7449 - if tiledb.libtiledb.version() >= (2, 3): - warnings.warn( - "Suppressed assertion error with libtiledb 2.3! see ch 7449" - ) - else: - raise - - with tiledb.Array(path, mode="w") as A: - A.meta.consolidate() - - # test expected number of fragments before vacuuming - try: - self.assertEqual(len(vfs.ls(os.path.join(path, "__meta"))), 304) - except AssertionError: - # this test is broken under libtiledb 2.3, see ch 7449 - if tiledb.libtiledb.version() >= (2, 3): - warnings.warn( - "Suppressed assertion error with libtiledb 2.3! see ch 7449" - ) - else: - raise - - tiledb.vacuum(path) - - # should only have one fragment+'.ok' after vacuuming - try: - self.assertEqual(len(vfs.ls(os.path.join(path, "__meta"))), 1) - except AssertionError: - # this test is broken under libtiledb 2.3, see ch 7449 - if tiledb.libtiledb.version() >= (2, 3): - warnings.warn( - "Suppressed assertion error with libtiledb 2.3! see ch 7449" - ) - else: - raise - - # test data again after consolidation - with tiledb.Array(path) as A: - for i in range(write_count): - key_int = randutf8s[i] + "{}".format(randints[i]) - self.assertEqual(A.meta[key_int], randints[i]) - self.assertEqual(A.meta[randutf8s[i]], randutf8s[i]) - - def test_ascii_metadata(self, capfd): - uri = self.path("test_ascii_metadata") - - dom = tiledb.Domain(tiledb.Dim(domain=(0, 2), tile=1, dtype=np.int64)) - att = tiledb.Attr(dtype=np.int64) - schema = tiledb.ArraySchema(sparse=True, domain=dom, attrs=(att,)) - tiledb.Array.create(uri, schema) - - metadata_test_aux.write_ascii(uri) - - with tiledb.open(uri) as A: - assert A.meta["abc"] == b"xyz" - A.meta.dump() - assert_captured(capfd, "Type: STRING_ASCII") - - def test_bytes_metadata(self, capfd): - path = self.path() - with tiledb.from_numpy(path, np.ones((5,), np.float64)): - pass - - with tiledb.Array(path, mode="w") as A: - A.meta["bytes"] = b"blob" - - with tiledb.Array(path, mode="r") as A: - assert A.meta["bytes"] == b"blob" - A.meta.dump() - assert_captured(capfd, "Type: BLOB") diff --git a/tiledb/tests/test_multi_index-hp.py b/tiledb/tests/test_multi_index-hp.py deleted file mode 100644 index 08e6f1192b..0000000000 --- a/tiledb/tests/test_multi_index-hp.py +++ /dev/null @@ -1,170 +0,0 @@ -# -# Property-based tests for Array.multi_index using Hypothesis -# - -import warnings - -import hypothesis as hp -import numpy as np -import pytest -from hypothesis import assume, given -from hypothesis import strategies as st -from numpy.testing import assert_array_equal - -import tiledb -from tiledb import SparseArray - -from .strategies import bounded_ntuple, ranged_slices - - -def is_boundserror(exc: Exception): - assert str(exc) != "" - - vals = [ - "out of domain bounds", - "Cannot add range to dimension", - "cannot be larger than the higher bound", - ] - - return any(x in str(exc) for x in vals) - - -def _direct_query_ranges(array: SparseArray, ranges, order): - order_map = {"C": 0, "F": 1, "U": 3} - layout = order_map[order] - with tiledb.scope_ctx() as ctx: - q = tiledb.main.PyQuery(ctx, array, ("a",), (), layout, False) - subarray = tiledb.Subarray(array) - subarray.add_ranges(ranges) - q.set_subarray(subarray) - - q.submit() - - if ranges == [[]]: - # empty range should give empty result - return {k: [] for k in q.results()} - else: - return {k: v[0].view(array.attr(0).dtype) for k, v in q.results().items()} - - -# Compound strategies to build valid inputs for multi_index -subindex_obj = st.one_of(st.integers(), ranged_slices()) - -index_obj = st.one_of(subindex_obj, st.tuples(st.lists(subindex_obj))) - - -class TestMultiIndexPropertySparse: - dmin, dmax = -100, 100 - - @classmethod - @pytest.fixture(scope="class") - def sparse_array_1d(cls, checked_path): - def write_sparse_contig(uri): - data = np.arange(cls.dmin, cls.dmax, dtype=np.int64) - with tiledb.open(uri, "w") as A: - A[data] = data - - def create_array(uri): - schema = tiledb.ArraySchema( - tiledb.Domain( - [tiledb.Dim(dtype=np.int64, domain=(cls.dmin, cls.dmax))] - ), - attrs=[ - tiledb.Attr(name="a", dtype="float64", var=False, nullable=False) - ], - cell_order="row-major", - tile_order="row-major", - capacity=10000, - sparse=True, - ) - - tiledb.Array.create(uri, schema) - - uri = checked_path.path() - - create_array(uri) - write_sparse_contig(uri) - - return uri - - @given( - order=st.sampled_from(["C", "F", "U"]), - ranges=st.lists(bounded_ntuple(length=2, min_value=-100, max_value=100)), - ) - @hp.settings(deadline=None) - def test_multi_index_two_way_query(self, order, ranges, sparse_array_1d): - """This test checks the result of "direct" range queries using PyQuery - against the result of `multi_index` on the same ranges.""" - uri = sparse_array_1d - - assert isinstance(uri, str) - assume(v[0] <= v[1] for v in ranges) - - try: - with tiledb.open(uri) as A: - r1 = A.query(order=order).multi_index[ranges]["a"] - r2 = _direct_query_ranges(A, [ranges], order)["a"] - - assert_array_equal(r1, r2) - except tiledb.TileDBError as exc: - if is_boundserror(exc): - # out of bounds, this is ok so we tell hypothesis to ignore - # TODO these should all be IndexError - assume(False) - raise - - @given(index_obj) - @hp.settings(deadline=None) - def test_multi_index_inputs(self, sparse_array_1d, ind): - # TODO - # currently we don't have a comparison target/mockup to check - # as there is no direct numpy equivalent for this indexing mode - # but we could still assert more details about the result - # - coordinates are inbounds - # - values are within known attribute range from write - # another option for indirect testing - # - densify slices and ranges and compare to numpy - # numpy vectorized indexing result - - uri = sparse_array_1d - - try: - with tiledb.open(uri) as A: - r1 = A.multi_index[ind] - r1_array = r1["a"] - r1_coords = r1["__dim_0"] - - assert isinstance(r1_array, np.ndarray) - assert isinstance(r1_coords, np.ndarray) - - # some results may be empty - if len(r1_array): - # assertions based on input data - assert r1_array.min() >= self.dmin - assert r1_array.max() <= self.dmax - assert r1_coords.min() >= self.dmin - assert r1_coords.max() <= self.dmax - except tiledb.TileDBError as exc: - # bounds errors are not failures - if is_boundserror(exc): - assume(False) - elif "Failed to cast dim range" in str(exc): - # TODO this should be IndexError - assume(False) - else: - raise - except ValueError as exc: - if "Stepped slice ranges are not supported" in str(exc): - # stepped slice errors are ok - assume(False) - elif "Cannot convert to scalar" in str(exc): - assume(False) - else: - raise - except TypeError as exc: - if "Unsupported selection" in str(exc): - # mostly ok but warn for cross-check - warnings.warn(str(exc)) - assume(False) - else: - raise diff --git a/tiledb/tests/test_multi_index.py b/tiledb/tests/test_multi_index.py deleted file mode 100644 index 115ef4370e..0000000000 --- a/tiledb/tests/test_multi_index.py +++ /dev/null @@ -1,1038 +0,0 @@ -""" -TODO -- # implement mock of expected behavior in pure numpy w/ test function - - -- implement read function and tests (single [x], multi-attribute [ ]) -- implement custom indexer -- implement oindex... -""" - -import random - -import numpy as np -import pytest -from numpy.testing import assert_array_equal - -import tiledb -from tiledb.multirange_indexing import getitem_ranges, mr_dense_result_shape - -from .common import ( - SUPPORTED_DATETIME64_DTYPES, - DiskTestCase, - assert_dict_arrays_equal, - assert_tail_equal, - has_pandas, - has_pyarrow, - intspace, - rand_datetime64_array, -) - - -def make_1d_dense(path, attr_name="", attr_dtype=np.int64, dim_dtype=np.uint64): - a_orig = np.arange(36) - - dom = tiledb.Domain(tiledb.Dim(domain=(0, 35), tile=35, dtype=dim_dtype)) - att = tiledb.Attr(name=attr_name, dtype=attr_dtype) - schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=False) - tiledb.DenseArray.create(path, schema) - - with tiledb.DenseArray(path, "w") as A: - A[:] = a_orig - - -def make_2d_dense(path, attr_name="", attr_dtype=np.int64): - a_orig = np.arange(1, 37).reshape(9, 4) - - dom = tiledb.Domain( - tiledb.Dim(domain=(0, 8), tile=9, dtype=np.uint64), - tiledb.Dim(domain=(0, 3), tile=4, dtype=np.uint64), - ) - att = tiledb.Attr(name=attr_name, dtype=attr_dtype) - schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=False) - tiledb.DenseArray.create(path, schema) - - with tiledb.DenseArray(path, "w") as A: - A[:] = a_orig - - -class TestMultiRangeAuxiliary(DiskTestCase): - def test_shape_funcs(self): - range1el = (((1, 1),),) - self.assertEqual(mr_dense_result_shape(range1el), (1,)) - - range1d = tuple([((1, 2), (4, 4))]) - self.assertEqual(mr_dense_result_shape(range1d), (3,)) - - range2d1 = (((3, 6), (7, 7), (10, 12)), ((5, 7),)) - self.assertEqual(mr_dense_result_shape(range2d1), (8, 3)) - - # range2d2 = ([(3, 6), (7, 7), (10, 12)], [(5, 7), (10, 10)]) - - # def test_3d(self): - # range3d1 = (((2, 4),), ((3, 6),), ((1, 4), (5, 9))) - # - # # self.assertEqual() - - def test_sel_to_ranges(self): - class Obj(object): - pass - - class IBI(object): - def __getitem__(self, idx): - return idx - - def make_arr(ndim): - arr = Obj() - arr.schema = Obj() - arr.schema.domain = Obj() - arr.schema.domain.ndim = ndim - arr.schema.sparse = False - arr.array = Obj() - # place-holder for attribute that is not used in these tests - arr.nonempty_domain = lambda: [()] * ndim - return arr - - ibi = IBI() - # ndim = 1 - arr = make_arr(1) - self.assertEqual(getitem_ranges(arr, ibi[[1]]), (((1, 1),),)) - self.assertEqual(getitem_ranges(arr, ibi[[1, 2]]), (((1, 1), (2, 2)),)) - self.assertEqual(getitem_ranges(arr, ibi[slice(1, 2)]), (((1, 2),),)) - self.assertEqual(getitem_ranges(arr, ibi[1:2]), (((1, 2),),)) - - # ndim = 2 - arr2 = make_arr(2) - self.assertEqual(getitem_ranges(arr2, ibi[[1]]), (((1, 1),), ())) - self.assertEqual(getitem_ranges(arr2, ibi[slice(1, 33)]), (((1, 33),), ())) - self.assertEqual( - getitem_ranges(arr2, ibi[[1, 2], [[1], slice(1, 3)]]), - (((1, 1), (2, 2)), ((1, 1), (1, 3))), - ) - - # ndim = 3 - arr3 = make_arr(3) - self.assertEqual( - getitem_ranges(arr3, ibi[1, 2, 3]), (((1, 1),), ((2, 2),), ((3, 3),)) - ) - self.assertEqual(getitem_ranges(arr3, ibi[1, 2]), ((((1, 1),), ((2, 2),), ()))) - self.assertEqual( - getitem_ranges(arr3, ibi[1:2, 3:4]), (((1, 2),), ((3, 4),), ()) - ) - self.assertEqual( - getitem_ranges(arr3, ibi[1:2, 3:4, 5:6]), (((1, 2),), ((3, 4),), ((5, 6),)) - ) - self.assertEqual( - getitem_ranges(arr3, ibi[[1], [2], [5, 6]]), - (((1, 1),), ((2, 2),), ((5, 5), (6, 6))), - ) - self.assertEqual( - getitem_ranges(arr3, ibi[1, [slice(3, 6), 8], slice(4, 6)]), - (((1, 1),), ((3, 6), (8, 8)), ((4, 6),)), - ) - self.assertEqual(getitem_ranges(arr3, ibi[(1, 2)]), (((1, 1),), ((2, 2),), ())) - self.assertEqual(getitem_ranges(arr3, ibi[[(1, 2)]]), (((1, 2),), (), ())) - self.assertEqual( - getitem_ranges(arr3, ibi[[(1, 2), 4], [slice(1, 4)]]), - (((1, 2), (4, 4)), ((1, 4),), ()), - ) - - -class TestMultiRange(DiskTestCase): - @pytest.mark.skipif( - not has_pyarrow() or not has_pandas(), - reason="pyarrow>=1.0 and/or pandas>=1.0,<3.0 not installed", - ) - def test_return_arrow_indexers(self): - uri = self.path("multirange_behavior_sparse") - - schema = tiledb.ArraySchema( - domain=tiledb.Domain( - tiledb.Dim(name="idx", domain=(-5, 5), dtype=np.int64) - ), - attrs=[tiledb.Attr(name="data", dtype=np.int64)], - ) - tiledb.Array.create(uri, schema) - data = np.random.randint(-10, 10, size=11) - - with tiledb.open(uri, "w") as A: - A[:] = data - - with tiledb.open(uri, "r") as A: - with self.assertRaisesRegex( - tiledb.TileDBError, - "Cannot initialize return_arrow with use_arrow=False", - ): - q = A.query(return_arrow=True, use_arrow=False) - - q = A.query(return_arrow=True) - - with self.assertRaisesRegex( - tiledb.TileDBError, - "`return_arrow=True` requires .df indexer", - ): - q[:] - - with self.assertRaisesRegex( - tiledb.TileDBError, - "`return_arrow=True` requires .df indexer", - ): - q.multi_index[:] - - assert_array_equal(q.df[:]["data"], data) - - @pytest.mark.skipif( - not has_pyarrow() or not has_pandas(), - reason="pyarrow>=1.0 and/or pandas>=1.0,<3.0 not installed", - ) - @pytest.mark.parametrize("sparse", [True, False]) - def test_return_large_arrow_table(self, sparse): - num = 2**16 - 1 - uri = self.path("test_return_large_arrow_table") - dom = tiledb.Domain(tiledb.Dim(domain=(0, num - 1), dtype=np.uint16)) - att = tiledb.Attr(dtype=np.uint64) - schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=sparse) - tiledb.Array.create(uri, schema) - - expected_data = np.arange(num) - - with tiledb.open(uri, "w") as A: - if sparse: - A[np.arange(num)] = expected_data - else: - A[:] = expected_data - - with tiledb.open(uri, "r") as arr: - actual_data = arr.query(return_arrow=True).df[:] - assert_array_equal(actual_data[:][""], expected_data) - - def test_multirange_behavior(self): - uri = self.path("multirange_behavior_sparse") - - schema = tiledb.ArraySchema( - domain=tiledb.Domain( - *[ - tiledb.Dim( - name="idx", - domain=(-1.0, 0.7999999999999996), - tile=2.0, - dtype="float64", - ) - ] - ), - attrs=[tiledb.Attr(name="data", dtype="float64", var=False)], - cell_order="row-major", - tile_order="row-major", - capacity=10000, - sparse=True, - allows_duplicates=True, - ) - tiledb.SparseArray.create(uri, schema) - data = np.random.rand(10) - idx = np.arange(-1, 1, 0.2) - - with tiledb.open(uri, "w") as A: - A[idx] = {"data": data} - - with tiledb.open(uri) as A: - res = A.multi_index[:] - # always return data - self.assertTrue("data" in res) - # return coordinates for sparse - self.assertTrue("idx" in res) - assert_array_equal(res["data"], data) - assert_array_equal(res["idx"], idx) - - uri = self.path("multirange_behavior_dense") - with tiledb.from_numpy(uri, data): - pass - - with tiledb.open(uri) as B: - res = B.multi_index[0:9] # TODO: this should accept [:] - # always return data - self.assertTrue("" in res) - # don't return coordinates for dense - self.assertTrue("idx" not in res) - - def test_multirange_empty(self): - path1 = self.path("test_multirange_empty_1d") - make_1d_dense(path1, attr_dtype=np.uint16) - with tiledb.open(path1) as A: - res = A.multi_index[tiledb.EmptyRange] - assert res[""].dtype == np.uint16 - assert res[""].shape == (0,) - - path2 = self.path("test_multirange_empty_2d") - make_2d_dense(path2, attr_dtype=np.float32) - with tiledb.open(path2) as A: - res = A.multi_index[tiledb.EmptyRange] - assert res[""].dtype == np.float32 - assert res[""].shape == (0,) - - def test_multirange_1d_1dim_ranges(self): - path = self.path("test_multirange_1d_1dim_ranges") - attr_name = "a" - - make_1d_dense(path, attr_name=attr_name) - - with tiledb.DenseArray(path) as A: - ranges = (((0, 0),),) - expected = np.array([0], dtype=np.int64) - res = tiledb.libtiledb.multi_index(A, (attr_name,), ranges) - a = res[attr_name] - assert_array_equal(a, expected) - self.assertEqual(a.dtype, expected.dtype) - self.assertEqual(len(res.keys()), 2) - - ranges2 = (((1, 1), (5, 8)),) - expected2 = np.array([1, 5, 6, 7, 8], dtype=np.int64) - a2 = tiledb.libtiledb.multi_index(A, (attr_name,), ranges2)[attr_name] - assert_array_equal(a2, expected2) - self.assertEqual(a2.dtype, expected2.dtype) - - def test_multirange_2d_1dim_ranges(self): - path = self.path("test_multirange_1dim_ranges") - attr_name = "a" - - make_2d_dense(path, attr_name=attr_name) - - expected = np.array( - [ - 1, - 2, - 3, - 4, - 21, - 22, - 23, - 24, - 25, - 26, - 27, - 28, - 29, - 30, - 31, - 32, - 33, - 34, - 35, - 36, - ], - dtype=np.uint64, - ) - - ranges = (((0, 0), (5, 8)),) - - with tiledb.DenseArray(path) as A: - a = tiledb.libtiledb.multi_index(A, (attr_name,), ranges)[attr_name] - - assert_array_equal(a, expected) - - def test_multirange_2d_2dim_ranges(self): - path = self.path("test_multirange_2dim_ranges") - attr_name = "a" - - make_2d_dense(path, attr_name=attr_name) - - expected = np.arange(1, 21) - - ranges = (((0, 4),), ((0, 3),)) - - with tiledb.DenseArray(path) as A: - a = tiledb.libtiledb.multi_index(A, (attr_name,), ranges)[attr_name] - assert_array_equal(a, expected) - - # test slicing start=end on 1st dim at 0 (bug fix) - assert_tail_equal( - np.array([[1, 2, 3, 4]]), - A.multi_index[:0][attr_name], - A.multi_index[0:0][attr_name], - ) - - # test slicing start=end on 2nd dim at 0 (bug fix) - assert_tail_equal( - np.arange(1, 34, 4).reshape((9, 1)), - A.multi_index[:, :0][attr_name], - A.multi_index[:, 0:0][attr_name], - ) - - # test slicing start=end on 1st dim at 1 - assert_array_equal(np.array([[5, 6, 7, 8]]), A.multi_index[1:1][attr_name]) - - # test slicing start=end on 2nd dim at 1 - assert_array_equal( - np.arange(2, 35, 4).reshape((9, 1)), A.multi_index[:, 1:1][attr_name] - ) - - # test slicing start=end on 1st dim at max range - assert_array_equal( - np.array([[33, 34, 35, 36]]), A.multi_index[8:8][attr_name] - ) - - # test slicing start=end on 2nd dim at max range - assert_tail_equal( - np.arange(4, 37, 4).reshape((9, 1)), A.multi_index[:, 3:3][attr_name] - ) - - def test_multirange_1d_dense_int64(self): - attr_name = "" - path = self.path("multi_index_1d") - - dom = tiledb.Domain( - tiledb.Dim(name="coords", domain=(-10, 10), tile=9, dtype=np.int64) - ) - att = tiledb.Attr(name=attr_name, dtype=np.float32) - schema = tiledb.ArraySchema(domain=dom, attrs=(att,)) - tiledb.DenseArray.create(path, schema) - - orig_array = np.random.rand(schema.domain.dim(0).size).astype(np.float32) - with tiledb.open(path, "w") as A: - A[:] = orig_array - - with tiledb.open(path) as A: - # stepped ranges are not supported - with self.assertRaises(ValueError): - A.query(coords=True).multi_index[1::2] - - assert_array_equal(orig_array[[0, -1]], A.multi_index[[-10, 10]][attr_name]) - self.assertEqual(orig_array[0], A.multi_index[-10][attr_name]) - self.assertEqual( - -10, A.query(coords=True).multi_index[-10]["coords"].view("i8") - ) - assert_array_equal(orig_array[0:], A.multi_index[[(-10, 10)]][attr_name]) - assert_array_equal( - orig_array[0:], A.multi_index[[slice(-10, 10)]][attr_name] - ) - assert_array_equal( - orig_array[0:10], A.multi_index[-10 : np.int64(-1)][attr_name] - ) - assert_array_equal(orig_array, A.multi_index[:][attr_name]) - ned = A.nonempty_domain()[0] - assert_array_equal( - A.multi_index[ned[0] : ned[1]][attr_name], A.multi_index[:][attr_name] - ) - - def test_multirange_1d_sparse_double(self): - attr_name = "" - path = self.path("mr_1d_sparse_double") - - dom = tiledb.Domain( - tiledb.Dim(name="coords", domain=(0, 30), tile=10, dtype=np.float64) - ) - att = tiledb.Attr(name=attr_name, dtype=np.float64) - schema = tiledb.ArraySchema(domain=dom, sparse=True, attrs=(att,)) - tiledb.SparseArray.create(path, schema) - - coords = np.linspace(0, 30, num=31) - orig_array = np.random.rand(coords.size) - - with tiledb.open(path, "w") as A: - A[coords] = orig_array - - with tiledb.open(path) as A: - assert_array_equal(orig_array[[0]], A.multi_index[[0]][attr_name]) - assert_array_equal(orig_array[-1], A.multi_index[30][attr_name]) - assert_array_equal(orig_array[-1], A.multi_index[30.0][attr_name]) - assert_array_equal( - orig_array[coords.size - 3 : coords.size], - A.multi_index[ - (28.0, 30.0), - ][attr_name], - ) - - res = A.multi_index[slice(0, 5)] - assert_array_equal(orig_array[0:6], res[attr_name]) - assert_array_equal(coords[0:6], res["coords"].astype(np.float64)) - - # test slice range indexing - ned = A.nonempty_domain() - res = A.multi_index[: ned[0][1]] - assert_array_equal(coords, res["coords"].astype(np.float64)) - - res = A.multi_index[ned[0][0] : coords[15]] - assert_array_equal(coords[:16], res["coords"].astype(np.float64)) - - def test_multirange_2d_sparse_domain_utypes(self): - attr_name = "foo" - - types = (np.uint8, np.uint16, np.uint32, np.uint64) - - for dtype in types: - min = 0 - max = int(np.iinfo(dtype).max) - 1 - path = self.path("multi_index_2d_sparse_" + str(dtype.__name__)) - - dom = tiledb.Domain(tiledb.Dim(domain=(min, max), tile=1, dtype=dtype)) - - att = tiledb.Attr(name=attr_name, dtype=dtype) - schema = tiledb.ArraySchema(domain=dom, sparse=True, attrs=(att,)) - tiledb.SparseArray.create(path, schema) - - coords = intspace(min, max, num=100, dtype=dtype) - - with tiledb.open(path, "w") as A: - A[coords] = coords - - with tiledb.open(path) as A: - res = A.multi_index[slice(coords[0], coords[-1])] - assert_array_equal(res[attr_name], coords) - assert_array_equal(res["__dim_0"].astype(dtype), coords) - - res = A.multi_index[coords[0]] - assert_array_equal(res[attr_name], coords[0]) - assert_array_equal(res["__dim_0"].astype(dtype), coords[0]) - - res = A.multi_index[coords[-1]] - assert_array_equal(res[attr_name], coords[-1]) - assert_array_equal(res["__dim_0"].astype(dtype), coords[-1]) - - midpoint = len(coords) // 2 - start = midpoint - 20 - stop = midpoint + 20 - srange = slice(coords[start], coords[stop]) - res = A.multi_index[srange] - assert_array_equal(res[attr_name], coords[start : stop + 1]) - assert_array_equal( - res["__dim_0"].astype(dtype), coords[start : stop + 1] - ) - - def test_multirange_2d_sparse_float(self): - attr_name = "" - path = self.path("mr_2d_sparse_float") - - dom = tiledb.Domain( - tiledb.Dim(domain=(0, 10), tile=1, dtype=np.float32), - tiledb.Dim(domain=(0, 10), tile=1, dtype=np.float32), - ) - att = tiledb.Attr(name=attr_name, dtype=np.float64) - schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=True) - tiledb.SparseArray.create(path, schema) - - orig_array = np.random.rand(11, 11) - d1 = np.linspace(0, 10, num=11, dtype=np.float32) - d2 = np.linspace(0, 10, num=11, dtype=np.float32) - coords_d1, coords_d2 = np.meshgrid(d1, d2, indexing="ij") - - with tiledb.open(path, "w") as A: - A[coords_d1.flatten(), coords_d2.flatten()] = orig_array - - with tiledb.open(path) as A: - res = A.multi_index[[0], :] - assert_array_equal(orig_array[[0], :].squeeze(), res[attr_name]) - assert_array_equal(coords_d1[0, :], res["__dim_0"]) - - # === - res = A.multi_index[10, :] - assert_array_equal(orig_array[[-1], :].squeeze(), res[attr_name]) - assert_array_equal(coords_d2[[-1], :].squeeze(), res["__dim_1"]) - - # === - res = A.multi_index[[slice(0, 2), [5]]] - assert_array_equal( - np.vstack([orig_array[0:3, :], orig_array[5, :]]).flatten(), - res[attr_name], - ) - assert_array_equal( - np.vstack((coords_d1[0:3], coords_d1[5])).flatten(), res["__dim_0"] - ) - - # === - res = A.multi_index[slice(0.0, 2.0), slice(2.0, 5.0)] - assert_array_equal(orig_array[0:3, 2:6].flatten(), res[attr_name]) - assert_array_equal(coords_d1[0:3, 2:6].flatten(), res["__dim_0"]) - assert_array_equal(coords_d2[0:3, 2:6].flatten(), res["__dim_1"]) - res = A.multi_index[ - slice(np.float32(0.0), np.float32(2.0)), - slice(np.float32(2.0), np.float32(5.0)), - ] - assert_array_equal(orig_array[0:3, 2:6].flatten(), res[attr_name]) - assert_array_equal(coords_d1[0:3, 2:6].flatten(), res["__dim_0"]) - assert_array_equal(coords_d2[0:3, 2:6].flatten(), res["__dim_1"]) - - def test_multirange_1d_sparse_query(self): - path = self.path("mr_1d_sparse_query") - - dom = tiledb.Domain( - tiledb.Dim(name="coords", domain=(-100, 100), tile=1, dtype=np.float32) - ) - attrs = [ - tiledb.Attr(name="U", dtype=np.float64), - tiledb.Attr(name="V", dtype=np.uint32), - ] - - schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=True) - tiledb.SparseArray.create(path, schema) - - U = np.random.rand(11) - V = np.random.randint(0, np.iinfo(np.uint32).max, 11, dtype=np.uint32) - - coords = np.linspace(-10, 10, num=11, dtype=np.float32) - data = {"U": U, "V": V} - - with tiledb.open(path, "w") as A: - A[coords] = data - - with tiledb.open(path) as A: - for k, d in data.items(): - Q = A.query(attrs=k) - - res = Q.multi_index[[-10]] - assert_array_equal(d[[0]], res[k]) - - assert_array_equal(coords[[0]], res["coords"].view("f4")) - - res = A.multi_index[10] - assert_array_equal(d[[-1]].squeeze(), res[k]) - - assert_array_equal(coords[[-1]], res["coords"].view("f4")) - - res = A.multi_index[[slice(coords[0], coords[2]), [coords[-1]]]] - assert_array_equal(np.hstack([d[0:3], d[-1]]), res[k]) - - # make sure full slice indexing works on query - res = Q.multi_index[:] - assert_array_equal(coords, res["coords"]) - - # TODO: this should be an error - # res = A.multi_index[10, :] - # assert_array_equal( - # d[[-1]].squeeze(), - # res[k] - # ) - - with tiledb.open(path) as A: - Q = A.query(coords=False, attrs=["U"]) - res = Q.multi_index[:] - self.assertTrue("U" in res) - self.assertTrue("V" not in res) - self.assertTrue("coords" not in res) - assert_array_equal(res["U"], data["U"]) - - def test_multirange_1d_dense_vectorized(self): - path = self.path("mr_1d_dense_vectorized") - - dom = tiledb.Domain(tiledb.Dim(domain=(0, 999), tile=1000, dtype=np.uint32)) - attrs = tiledb.Attr(name="", dtype=np.float64) - - schema = tiledb.ArraySchema(domain=dom, attrs=(attrs,), sparse=False) - tiledb.DenseArray.create(path, schema) - - data = np.random.rand(1000) - with tiledb.DenseArray(path, "w") as A: - A[0] = data[0] - A[-1] = data[-1] - A[:] = data - - for _ in range(0, 50): - with tiledb.DenseArray(path) as A: - idxs = random.sample(range(0, 999), k=100) - res = A.multi_index[idxs] - assert_array_equal(data[idxs], res[""]) - - def test_multirange_2d_dense_float(self): - attr_name = "" - path = self.path("multirange_2d_dense_float") - - dom = tiledb.Domain( - tiledb.Dim(domain=(0, 10), tile=1, dtype=np.int64), - tiledb.Dim(domain=(0, 10), tile=1, dtype=np.int64), - ) - att = tiledb.Attr(name=attr_name, dtype=np.float64) - schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=False) - tiledb.DenseArray.create(path, schema) - - orig_array = np.random.rand(11, 11) - - with tiledb.open(path, "w") as A: - A[:] = orig_array - - with tiledb.open(path) as A: - assert_array_equal(orig_array[[0], :], A.multi_index[[0], :][attr_name]) - assert_array_equal( - orig_array[[-1, -1], :], A.multi_index[[10, 10], :][attr_name] - ) - assert_array_equal( - orig_array[0:4, 7:10], A.multi_index[[(0, 3)], slice(7, 9)][attr_name] - ) - assert_array_equal(orig_array[:, :], A.multi_index[:, :][attr_name]) - # TODO this should be an error to match NumPy 1.12 semantics - # assert_array_equal( - # orig_array[0:4,7:10], - # A.multi_index[[(np.float64(0),np.float64(3.0))], slice(7,9)][attr_name] - # ) - - @pytest.mark.parametrize("dtype", SUPPORTED_DATETIME64_DTYPES) - def test_multirange_1d_sparse_datetime64(self, dtype): - path = self.path("multirange_1d_sparse_datetime64") - - dates = rand_datetime64_array(10, dtype=dtype) - dom = tiledb.Domain( - tiledb.Dim(domain=(dates.min(), dates.max()), dtype=dtype, tile=1) - ) - - attr_name = "" - att = tiledb.Attr(name=attr_name, dtype=dtype) - schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=True) - tiledb.SparseArray.create(path, schema) - - with tiledb.SparseArray(path, mode="w") as T: - T[dates] = dates - - with tiledb.open(path) as A: - res = A.multi_index[:] - # check full range - assert_tail_equal(dates, res[""], res["__dim_0"]) - - # check range pairs - for i in range(len(dates) - 1): - start, stop = dates[i : i + 2] - assert_tail_equal( - dates[i : i + 2], - A.multi_index[start:stop][""], - A.multi_index[start:stop]["__dim_0"], - ) - - def test_fix_473_sparse_index_bug(self): - # test of fix for issue raised in - # https://github.com/TileDB-Inc/TileDB-Py/pull/473#issuecomment-784675012 - - uri = self.path("test_fix_473_sparse_index_bug") - dom = tiledb.Domain( - tiledb.Dim(name="x", domain=(0, 2**64 - 2), tile=1, dtype=np.uint64) - ) - schema = tiledb.ArraySchema( - domain=dom, sparse=True, attrs=[tiledb.Attr(name="a", dtype=np.uint64)] - ) - - tiledb.SparseArray.create(uri, schema) - - slice_index = slice(0, 4, None) - - with tiledb.SparseArray(uri, mode="r") as A: - data = A.multi_index[slice_index] - - assert_array_equal(data["a"], np.array([], dtype=np.uint64)) - assert_array_equal(A.multi_index[:], []) - - with tiledb.open(uri, mode="w") as A: - A[[10]] = {"a": [10]} - - with tiledb.open(uri) as A: - assert_tail_equal( - A.multi_index[slice_index]["a"], - A.multi_index[:], - A.multi_index[0:], - A.multi_index[1:], - A.multi_index[:10], - A.multi_index[:11], - np.array([], dtype=np.uint64), - ) - - @pytest.mark.skipif(not has_pandas(), reason="pandas>=1.0,<3.0 not installed") - def test_fixed_multi_attr_df(self): - uri = self.path("test_fixed_multi_attr_df") - dom = tiledb.Domain( - tiledb.Dim(name="dim", domain=(0, 0), tile=None, dtype=np.int32) - ) - schema = tiledb.ArraySchema( - domain=dom, - sparse=True, - attrs=[ - tiledb.Attr( - name="111", dtype=[("", np.int32), ("", np.int32), ("", np.int32)] - ) - ], - ) - - tiledb.SparseArray.create(uri, schema) - - data_111 = np.array( - [(1, 1, 1)], dtype=[("", np.int32), ("", np.int32), ("", np.int32)] - ) - with tiledb.SparseArray(uri, mode="w") as A: - A[0] = data_111 - - with tiledb.SparseArray(uri, mode="r") as A: - result = A.query(attrs=["111"])[0] - assert_array_equal(result["111"], data_111) - - with self.assertRaises(tiledb.TileDBError): - result = A.query(attrs=["111"]).df[0] - - result = A.query(attrs=["111"], use_arrow=False) - assert_array_equal(result.df[0]["111"], data_111) - - @pytest.mark.skipif(not has_pandas(), reason="pandas>=1.0,<3.0 not installed") - def test_var_multi_attr_df(self): - uri = self.path("test_var_multi_attr_df") - dom = tiledb.Domain( - tiledb.Dim(name="dim", domain=(0, 2), tile=None, dtype=np.int32) - ) - schema = tiledb.ArraySchema( - domain=dom, - sparse=True, - attrs=[tiledb.Attr(name="1s", dtype=np.int32, var=True)], - ) - - tiledb.SparseArray.create(uri, schema) - - data = np.array( - [ - np.array([1], dtype=np.int32), - np.array([1, 1], dtype=np.int32), - np.array([1, 1, 1], dtype=np.int32), - ], - dtype="O", - ) - with tiledb.SparseArray(uri, mode="w") as A: - A[[0, 1, 2]] = data - - with tiledb.SparseArray(uri, mode="r") as A: - result = A.query(attrs=["1s"]) - assert_array_equal(result[0]["1s"][0], data[0]) - assert_array_equal(result[1]["1s"][0], data[1]) - assert_array_equal(result[2]["1s"][0], data[2]) - - with self.assertRaises(tiledb.TileDBError): - result = A.query(attrs=["1s"]).df[0] - - result = A.query(attrs=["1s"], use_arrow=False) - assert_array_equal(result.df[0]["1s"][0], data[0]) - assert_array_equal(result.df[1]["1s"][0], data[1]) - assert_array_equal(result.df[2]["1s"][0], data[2]) - - def test_multi_index_with_implicit_full_string_range(self): - uri = self.path("test_multi_index_with_implicit_full_string_range") - dom = tiledb.Domain( - tiledb.Dim(name="dint", domain=(0, 4), tile=5, dtype=np.int32), - tiledb.Dim(name="dstr", domain=(None, None), tile=None, dtype=np.bytes_), - ) - schema = tiledb.ArraySchema( - domain=dom, sparse=True, attrs=[tiledb.Attr(name="", dtype=np.int32)] - ) - - tiledb.Array.create(uri, schema) - with tiledb.open(uri, mode="w") as A: - d1 = np.concatenate((np.arange(5), np.arange(5))) - d2 = np.asarray( - ["a", "b", "ab", "ab", "c", "c", "c", "c", "d", "e"], dtype=np.bytes_ - ) - A[d1, d2] = np.array(np.random.randint(10, size=10), dtype=np.int32) - - with tiledb.open(uri, mode="r") as A: - assert_array_equal(A[:][""], A.multi_index[:][""]) - assert_array_equal(A.multi_index[:][""], A.multi_index[:, :][""]) - - assert_array_equal(A[1:4][""], A.multi_index[1:3][""]) - assert_array_equal(A.multi_index[1:3][""], A.multi_index[1:3, :][""]) - - assert_array_equal(A[0][""], A.multi_index[0][""]) - assert_array_equal(A.multi_index[0][""], A.multi_index[0, :][""]) - - def test_multi_index_open_timestamp_with_empty_nonempty_domain(self): - uri = self.path("test_multi_index_open_timestamp_with_empty_nonempty_domain") - dom = tiledb.Domain(tiledb.Dim(domain=(1, 3))) - attr = tiledb.Attr(name="", dtype=np.int32) - schema = tiledb.ArraySchema(domain=dom, sparse=True, attrs=[attr]) - tiledb.Array.create(uri, schema) - - with tiledb.open(uri, mode="w", timestamp=2) as A: - d1 = np.array(np.random.randint(1, 11, size=3, dtype=np.int32)) - A[np.arange(1, 4)] = d1 - - with tiledb.open(uri, mode="r", timestamp=1) as A: - assert A.nonempty_domain() is None - assert_array_equal(A.multi_index[:][""], A[:][""]) - - @pytest.mark.skipif(not has_pandas(), reason="pandas>=1.0,<3.0 not installed") - def test_multi_index_query_args(self): - uri = self.path("test_multi_index_query_args") - schema = tiledb.ArraySchema( - domain=tiledb.Domain(tiledb.Dim(name="dim", domain=(0, 9), dtype=np.uint8)), - sparse=True, - attrs=[ - tiledb.Attr(name="a", dtype=np.uint8), - tiledb.Attr(name="b", dtype=np.uint8), - ], - ) - tiledb.Array.create(uri, schema) - - a = np.array(np.random.randint(10, size=10), dtype=np.int8) - b = np.array(np.random.randint(10, size=10), dtype=np.int8) - - with tiledb.open(uri, mode="w") as A: - A[np.arange(10)] = {"a": a, "b": b} - - with tiledb.open(uri, mode="r") as A: - q = A.query(cond="a >= 5", attrs=["a"]) - assert {"a", "dim"} == q.multi_index[:].keys() == q[:].keys() - assert_array_equal(q.multi_index[:]["a"], q[:]["a"]) - assert_array_equal(q.multi_index[:]["a"], q.df[:]["a"]) - assert all(q[:]["a"] >= 5) - - @pytest.mark.skipif(not has_pandas(), reason="pandas>=1.0,<3.0 not installed") - def test_multi_index_timing(self): - path = self.path("test_multi_index_timing") - attr_name = "a" - - make_1d_dense(path, attr_name=attr_name) - tiledb.stats_enable() - with tiledb.open(path) as A: - assert_array_equal(A.df[:][attr_name], np.arange(36)) - internal_stats = tiledb.main.python_internal_stats() - assert "py.getitem_time :" in internal_stats - assert "py.getitem_time.buffer_conversion_time :" in internal_stats - assert "py.getitem_time.pandas_index_update_time :" in internal_stats - tiledb.stats_disable() - - @pytest.mark.skipif(not has_pandas(), reason="pandas>=1.0,<3.0 not installed") - def test_fixed_width_char(self): - uri = self.path("test_fixed_width_char") - schema = tiledb.ArraySchema( - domain=tiledb.Domain(tiledb.Dim(name="dim", domain=(0, 2), dtype=np.uint8)), - attrs=[tiledb.Attr(dtype="|S3")], - ) - tiledb.Array.create(uri, schema) - - data = np.array(["cat", "dog", "hog"], dtype="|S3") - - with tiledb.open(uri, mode="w") as A: - A[:] = data - - with tiledb.open(uri, mode="r") as A: - assert all(A.query(use_arrow=True).df[:][""] == data) - - @pytest.mark.skipif(not has_pandas(), reason="pandas>=1.0,<3.0 not installed") - def test_empty_idx(self): - uri = self.path("test_empty_idx") - - schema = tiledb.ArraySchema( - domain=tiledb.Domain(tiledb.Dim(name="dim", domain=(0, 9), dtype=np.uint8)), - sparse=True, - attrs=[tiledb.Attr(name="a", dtype=np.float64)], - ) - tiledb.Array.create(uri, schema) - - data = np.array(np.random.randint(10, size=10), dtype=np.float64) - - with tiledb.open(uri, mode="w") as A: - A[np.arange(10)] = data - - with tiledb.open(uri, mode="r") as A: - assert_array_equal(A.df[tiledb.EmptyRange]["a"], []) - assert_array_equal(A.multi_index[tiledb.EmptyRange]["a"], []) - assert_array_equal(A.df[[]]["a"], []) - assert_array_equal(A.multi_index[[]]["a"], []) - assert_array_equal(A.df[()]["a"], []) - assert_array_equal(A.multi_index[()]["a"], []) - - -# parametrize dtype and sparse -@pytest.mark.parametrize( - "dim_dtype", - [ - np.int64, - np.uint64, - np.int32, - np.uint32, - np.int16, - np.uint16, - np.int8, - np.uint8, - ], -) -class TestMultiIndexND(DiskTestCase): - def test_multi_index_ndarray(self, dim_dtype): - # TODO support for dense? - sparse = True # ndarray indexing currently only supported for sparse - - path = self.path("test_multi_index_ndarray") - - ncells = 10 - data = np.arange(ncells - 1) - coords = np.arange(ncells - 1) - - # use negative range for sparse - if sparse and np.issubdtype(dim_dtype, np.signedinteger): - coords -= 4 - - dom = tiledb.Domain( - tiledb.Dim(domain=(coords.min(), coords.max()), dtype=dim_dtype) - ) - att = tiledb.Attr(dtype=np.int8) - schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=sparse) - tiledb.Array.create(path, schema) - - with tiledb.open(path, "w") as A: - if sparse: - A[coords] = data - else: - A[:] = data - - with tiledb.open(path) as A: - assert_dict_arrays_equal( - A.multi_index[coords.tolist()], A.multi_index[coords] - ) - assert_dict_arrays_equal( - A.multi_index[coords.tolist()], A.multi_index[coords] - ) - - def test_multi_index_ndarray_2d(self, dim_dtype): - sparse = False - - path = self.path("test_multi_index_ndarray_2d") - - ncells = 10 - ext = ncells - 1 - if sparse: - data = np.arange(ext) - else: - data = np.arange(ext**2).reshape(ext, ext) - d1_coords = np.arange(ext) - d2_coords = np.arange(ext, 0, -1) - - # use negative range for sparse - if sparse and np.issubdtype(dim_dtype, np.signedinteger): - d1_coords -= 4 - - d1 = tiledb.Dim( - name="d1", domain=(d1_coords.min(), d1_coords.max()), dtype=dim_dtype - ) - d2 = tiledb.Dim( - name="d2", domain=(d2_coords.min(), d2_coords.max()), dtype=dim_dtype - ) - dom = tiledb.Domain([d1, d2]) - - att = tiledb.Attr(dtype=np.int8) - schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=sparse) - tiledb.Array.create(path, schema) - - with tiledb.open(path, "w") as A: - if sparse: - A[d1_coords.tolist(), d2_coords.tolist()] = {"": data} - else: - A[:] = data - # raise ValueError("Test only support sparse") - - with tiledb.open(path) as A: - assert_dict_arrays_equal( - A.multi_index[d1_coords.tolist(), d2_coords.tolist()], - A.multi_index[d1_coords, d2_coords], - ) - - # note: np.flip below because coords are in reverse order, which is how - # tiledb will return the results for the first query, but not second - assert_dict_arrays_equal( - A.multi_index[d1_coords.tolist(), np.flip(d2_coords.tolist())], - A.multi_index[d1_coords, :], - ) - - slc = slice(0, ncells - 1, 2) - assert_dict_arrays_equal( - A.multi_index[d1_coords[slc].tolist(), :], - A.multi_index[d1_coords[slc], :], - ) - assert_dict_arrays_equal( - A.multi_index[:, d2_coords[slc]], A.multi_index[:, d2_coords[slc]] - ) diff --git a/tiledb/tests/test_pandas_dataframe.py b/tiledb/tests/test_pandas_dataframe.py deleted file mode 100644 index f7374252c0..0000000000 --- a/tiledb/tests/test_pandas_dataframe.py +++ /dev/null @@ -1,1800 +0,0 @@ -import copy -import glob -import os -import random -import string -import sys -import uuid -from collections import OrderedDict - -import numpy as np -import pyarrow -import pytest -from numpy.testing import assert_array_equal - -import tiledb -from tiledb.dataframe_ import ColumnInfo - -from .common import ( - DiskTestCase, - assert_dict_arrays_equal, - dtype_max, - dtype_min, - has_pandas, - rand_ascii, - rand_ascii_bytes, - rand_datetime64_array, - rand_utf8, -) -from .datatypes import RaggedDtype - -if not has_pandas(): - pytest.skip("pandas>=1.0,<3.0 not installed", allow_module_level=True) -else: - import pandas as pd - - tm = pd._testing - - -def make_dataframe_basic1(col_size=10): - # ensure no duplicates when using as string dim - chars = list() - for _ in range(col_size): - next = rand_ascii_bytes(2) - while next in chars: - next = rand_ascii_bytes(2) - chars.append(next) - - data_dict = { - "time": rand_datetime64_array(col_size, include_extremes=False), - "x": np.array([rand_ascii(4).encode("UTF-8") for _ in range(col_size)]), - "chars": np.array(chars), - "cccc": np.arange(0, col_size), - "q": np.array([rand_utf8(np.random.randint(1, 100)) for _ in range(col_size)]), - "t": np.array([rand_utf8(4) for _ in range(col_size)]), - "r": np.array( - [rand_ascii_bytes(np.random.randint(1, 100)) for _ in range(col_size)] - ), - "s": np.array([rand_ascii() for _ in range(col_size)]), - "u": np.array([rand_ascii_bytes().decode() for _ in range(col_size)]), - "v": np.array([rand_ascii_bytes() for _ in range(col_size)]), - "vals_int64": np.random.randint( - dtype_max(np.int64), size=col_size, dtype=np.int64 - ), - "vals_float64": np.random.rand(col_size), - } - - # TODO: dump this dataframe to pickle/base64 so that it can be reconstructed if - # there are weird failures on CI? - - df = pd.DataFrame.from_dict(data_dict) - return df - - -def make_dataframe_basic2(): - # This code is from Pandas feather i/o tests "test_basic" function: - # https://github.com/pandas-dev/pandas/blob/master/pandas/tests/io/test_feather.py - # (available under BSD 3-clause license - # https://github.com/pandas-dev/pandas/blob/master/LICENSE - df = pd.DataFrame( - { - "string": list("abc"), - "int": list(range(1, 4)), - "uint": np.arange(3, 6).astype("u1"), - "float": np.arange(4.0, 7.0, dtype="float64"), - # TODO "float_with_null": [1.0, np.nan, 3], - "bool": [True, False, True], - # TODO "bool_with_null": [True, np.nan, False], - # "cat": pd.Categorical(list("abc")), - "dt": pd.date_range("20130101", periods=3), - # "dttz": pd.date_range("20130101", periods=3, tz="US/Eastern"), - # "dt_with_null": [ - # pd.Timestamp("20130101"), - # pd.NaT, - # pd.Timestamp("20130103"), - # ], - "dtns": pd.date_range("20130101", periods=3, freq="ns"), - } - ) - - return df - - -def make_dataframe_basic3(col_size=10, time_range=(None, None)): - df_dict = { - "time": rand_datetime64_array( - col_size, start=time_range[0], stop=time_range[1], include_extremes=False - ), - "double_range": np.linspace(-1000, 1000, col_size), - "int_vals": np.random.randint( - dtype_max(np.int64), size=col_size, dtype=np.int64 - ), - } - df = pd.DataFrame(df_dict) - return df - - -def make_dataframe_categorical(): - df = pd.DataFrame( - { - "int": [0, 1, 2, 3], - "categorical_string": pd.Series(["A", "B", "A", "B"], dtype="category"), - "categorical_int": pd.Series( - np.array([1, 2, 3, 4], dtype=np.int64), dtype="category" - ), - # 'categorical_bool': pd.Series([True, False, True, False], dtype="category"), - } - ) - - return df - - -class TestColumnInfo(DiskTestCase): - def assertColumnInfo(self, info, info_dtype, info_repr=None, info_nullable=False): - assert isinstance(info.dtype, np.dtype) - assert info.dtype == info_dtype - - assert info.repr is None or isinstance(info.repr, str) - assert info.repr == info_repr - - assert isinstance(info.nullable, bool) - assert info.nullable == info_nullable - - @pytest.mark.parametrize( - "type_specs, info_dtype, info_repr, info_nullable", - [ - # bool types - ( - [bool, "b1"], - np.dtype("uint8" if tiledb.libtiledb.version() < (2, 10) else "bool"), - "bool", - False, - ), - ( - [pd.BooleanDtype()], - np.dtype("uint8" if tiledb.libtiledb.version() < (2, 10) else "bool"), - "boolean", - True, - ), - # numeric types - ([np.uint8, "u1"], np.dtype("uint8"), None, False), - ([np.uint16, "u2"], np.dtype("uint16"), None, False), - ([np.uint32, "u4"], np.dtype("uint32"), None, False), - ([np.uint64, "u8"], np.dtype("uint64"), None, False), - ([np.int8, "i1"], np.dtype("int8"), None, False), - ([np.int16, "i2"], np.dtype("int16"), None, False), - ([np.int32, "i4"], np.dtype("int32"), None, False), - ([np.int64, "i8"], np.dtype("int64"), None, False), - ([np.float32, "f4"], np.dtype("float32"), None, False), - ([np.float64, "f8", float], np.dtype("float64"), None, False), - # nullable int types - ([pd.UInt8Dtype(), "UInt8"], np.dtype("uint8"), "UInt8", True), - ([pd.UInt16Dtype(), "UInt16"], np.dtype("uint16"), "UInt16", True), - ([pd.UInt32Dtype(), "UInt32"], np.dtype("uint32"), "UInt32", True), - ([pd.UInt64Dtype(), "UInt64"], np.dtype("uint64"), "UInt64", True), - ([pd.Int8Dtype(), "Int8"], np.dtype("int8"), "Int8", True), - ([pd.Int16Dtype(), "Int16"], np.dtype("int16"), "Int16", True), - ([pd.Int32Dtype(), "Int32"], np.dtype("int32"), "Int32", True), - ([pd.Int64Dtype(), "Int64"], np.dtype("int64"), "Int64", True), - # datetime types - (["datetime64[ns]"], np.dtype("= (2, 2, 3): - pytest.skip("Only run QueryCondition test with TileDB>=2.2.3") - - @pytest.mark.parametrize("sparse", [True, False]) - def test_errors(self, sparse): - uri = self.create_input_array_UIDSA(sparse) - - with self.assertRaises(tiledb.TileDBError): - with tiledb.open(uri) as A: - A.query(cond="1.324 < 1")[:] - - with self.assertRaises(tiledb.TileDBError): - with tiledb.open(uri) as A: - A.query(cond="foo >= bar")[:] - - with self.assertRaises(tiledb.TileDBError): - with tiledb.open(uri) as A: - A.query(cond="'foo' == 'bar'")[:] - - with self.assertRaises(tiledb.TileDBError): - with tiledb.open(uri) as A: - A.query(cond="U < 10000000000000000000000.0", attrs=["U"])[:] - - with self.assertRaises(tiledb.TileDBError): - with tiledb.open(uri) as A: - A.query(cond="D", attrs=["D"])[:] - - with self.assertRaises(tiledb.TileDBError): - with tiledb.open(uri) as A: - A.query(cond="D,", attrs=["D"])[:] - - with self.assertRaises(tiledb.TileDBError): - with tiledb.open(uri) as A: - A.query(cond="D > ", attrs=["D"])[:] - - def test_qc_dense(self): - path = self.path("test_qc_dense") - - dom = tiledb.Domain( - tiledb.Dim(name="d", domain=(1, 10), tile=1, dtype=np.uint8) - ) - attrs = [tiledb.Attr(name="a", dtype=np.uint8)] - schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=False) - tiledb.Array.create(path, schema) - - with tiledb.open(path) as A: - A.query(cond="a < 5") - - def test_unsigned_sparse(self): - with tiledb.open(self.create_input_array_UIDSA(sparse=True)) as A: - A.query(cond="U < 5", attrs=["U"])[:] - - result = A.query(cond="U < 5", attrs=["U"])[:] - assert all(result["U"] < 5) - - def test_unsigned_dense(self): - with tiledb.open(self.create_input_array_UIDSA(sparse=False)) as A: - mask = A.attr("U").fill - - A.query(cond="U < 5", attrs=["U"])[:] - - result = A.query(cond="U < 5", attrs=["U"])[:] - assert all(self.filter_dense(result["U"], mask) < 5) - - def test_signed_sparse(self): - uri = self.create_input_array_UIDSA(sparse=True) - - with tiledb.open(uri) as A: - result = A.query(cond="I < 1", attrs=["I"])[:] - assert all(result["I"] < 1) - - result = A.query(cond="I < +1", attrs=["I"])[:] - assert all(result["I"] < +1) - - result = A.query(cond="I < ---1", attrs=["I"])[:] - assert all(result["I"] < ---1) - - result = A.query(cond="-5 < I < 5", attrs=["I"])[:] - assert all(-5 < result["I"]) - assert all(result["I"] < 5) - - def test_signed_dense(self): - uri = self.create_input_array_UIDSA(sparse=False) - - with tiledb.open(uri) as A: - mask = A.attr("I").fill - - result = A.query(cond="I < 1", attrs=["I"])[:] - assert all(self.filter_dense(result["I"], mask) < 1) - - result = A.query(cond="I < +1", attrs=["I"])[:] - assert all(self.filter_dense(result["I"], mask) < +1) - - result = A.query(cond="I < ---1", attrs=["I"])[:] - assert all(self.filter_dense(result["I"], mask) < ---1) - - result = A.query(cond="-5 < I < 5", attrs=["I"])[:] - assert all(-5 < self.filter_dense(result["I"], mask)) - assert all(self.filter_dense(result["I"], mask) < 5) - - def test_floats_sparse(self): - with tiledb.open(self.create_input_array_UIDSA(sparse=True)) as A: - result = A.query(cond="D > 5.0", attrs=["D"])[:] - assert all(result["D"] > 5.0) - - result = A.query(cond="(D > 0.7) & (D < 3.5)", attrs=["D"])[:] - assert all((result["D"] > 0.7) & (result["D"] < 3.5)) - - result = A.query(cond="0.2 < D < 0.75", attrs=["D", "D"])[:] - assert all(0.2 < result["D"]) - assert all(result["D"] < 0.75) - - def test_floats_dense(self): - with tiledb.open(self.create_input_array_UIDSA(sparse=False)) as A: - mask = A.attr("D").fill - - result = A.query(cond="D > 5.0", attrs=["D"])[:] - assert all(self.filter_dense(result["D"], mask) > 5.0) - - result = A.query(cond="(D > 0.7) & (D < 3.5)", attrs=["D"])[:] - assert all(self.filter_dense(result["D"], mask) > 0.7) - assert all(self.filter_dense(result["D"], mask) < 3.5) - - result = A.query(cond="0.2 < D < 0.75", attrs=["D", "D"])[:] - assert all(0.2 < self.filter_dense(result["D"], mask)) - assert all(self.filter_dense(result["D"], mask) < 0.75) - - def test_string_sparse(self): - with tiledb.open(self.create_input_array_UIDSA(sparse=True)) as A: - with self.assertRaises(tiledb.TileDBError) as exc_info: - A.query(cond="S == c", attrs=["S"])[:] - assert ( - "right-hand sides must be constant expressions, not variables -- did you mean to quote the right-hand side as a string?" - in str(exc_info.value) - ) - - result = A.query(cond="S == 'c'", attrs=["S"])[:] - assert len(result["S"]) == 1 - assert result["S"][0] == b"c" - - result = A.query(cond="A == 'a'", attrs=["A"])[:] - assert len(result["A"]) == 1 - assert result["A"][0] == b"a" - - if tiledb.libtiledb.version() > (2, 14): - for t in A.query(attrs=["UTF"])[:]["UTF"]: - cond = f"""UTF == '{t}'""" - result = A.query(cond=cond, attrs=["UTF"])[:] - assert result["UTF"] == t - - def test_string_dense(self): - with tiledb.open(self.create_input_array_UIDSA(sparse=False)) as A: - with self.assertRaises(tiledb.TileDBError) as exc_info: - A.query(cond="S == ccc", attrs=["S"])[:] - assert ( - "right-hand sides must be constant expressions, not variables -- did you mean to quote the right-hand side as a string?" - in str(exc_info.value) - ) - - result = A.query(cond="S == 'ccc'", attrs=["S"])[:] - assert all(self.filter_dense(result["S"], A.attr("S").fill) == b"c") - - result = A.query(cond="A == 'ccc'", attrs=["A"])[:] - assert all(self.filter_dense(result["A"], A.attr("A").fill) == b"ccc") - - if tiledb.libtiledb.version() > (2, 14): - for t in A.query(attrs=["UTF"])[:]["UTF"]: - cond = f"""UTF == '{t}'""" - result = A.query(cond=cond, attrs=["UTF"])[:] - assert all( - self.filter_dense(result["UTF"], A.attr("UTF").fill) == t - ) - - def test_combined_types_sparse(self): - with tiledb.open(self.create_input_array_UIDSA(sparse=True)) as A: - qc = "(I > 0) & ((-3 < D) & (D < 3.0))" - result = A.query(cond=qc, attrs=["I", "D"])[:] - assert all((result["I"] > 0) & ((-3 < result["D"]) & (result["D"] < 3.0))) - - qc = "U >= 3 and 0.7 < D" - result = A.query(cond=qc, attrs=["U", "D"])[:] - assert all(result["U"] >= 3) & all(0.7 < result["D"]) - - qc = "(0.2 < D and D < 0.75) and (-5 < I < 5)" - result = A.query(cond=qc, attrs=["D", "I"])[:] - assert all((0.2 < result["D"]) & (result["D"] < 0.75)) - assert all((-5 < result["I"]) & (result["I"] < 5)) - - qc = "(-5 < I <= -1) and (0.2 < D < 0.75)" - result = A.query(cond=qc, attrs=["D", "I"])[:] - assert all((0.2 < result["D"]) & (result["D"] < 0.75)) - assert all((-5 < result["I"]) & (result["I"] <= -1)) - - qc = "(0.2 < D < 0.75) and (-5 < I < 5)" - result = A.query(cond=qc, attrs=["D", "I"])[:] - assert all((0.2 < result["D"]) & (result["D"] < 0.75)) - assert all((-5 < result["I"]) & (result["I"] < 5)) - - def test_combined_types_dense(self): - with tiledb.open(self.create_input_array_UIDSA(sparse=False)) as A: - mask_U = A.attr("U").fill - mask_I = A.attr("I").fill - mask_D = A.attr("D").fill - - qc = "(I > 0) & ((-3 < D) & (D < 3.0))" - result = A.query(cond=qc, attrs=["I", "D"])[:] - res_I = self.filter_dense(result["I"], mask_I) - res_D = self.filter_dense(result["D"], mask_D) - assert all(res_I > 0) & all(-3 < res_D) & all(res_D < 3.0) - - qc = "U >= 3 and 0.7 < D" - result = A.query(cond=qc, attrs=["U", "D"])[:] - res_U = self.filter_dense(result["U"], mask_U) - res_D = self.filter_dense(result["D"], mask_D) - assert all(res_U >= 3) & all(0.7 < res_D) - - qc = "(0.2 < D and D < 0.75) and (-5 < I < 5)" - result = A.query(cond=qc, attrs=["D", "I"])[:] - res_D = self.filter_dense(result["D"], mask_D) - res_I = self.filter_dense(result["I"], mask_I) - assert all((0.2 < res_D) & (res_D < 0.75)) - assert all((-5 < res_I) & (res_I < 5)) - - qc = "(-5 < I <= -1) and (0.2 < D < 0.75)" - result = A.query(cond=qc, attrs=["D", "I"])[:] - res_D = self.filter_dense(result["D"], mask_D) - res_I = self.filter_dense(result["I"], mask_I) - assert all((0.2 < res_D) & (res_D < 0.75)) - assert all((-5 < res_I) & (res_I <= -1)) - - qc = "(0.2 < D < 0.75) and (-5 < I < 5)" - result = A.query(cond=qc, attrs=["D", "I"])[:] - res_D = self.filter_dense(result["D"], mask_D) - res_I = self.filter_dense(result["I"], mask_I) - assert all((0.2 < res_D) & (res_D < 0.75)) - assert all((-5 < res_I) & (res_I < 5)) - - def test_check_attrs_sparse(self): - with tiledb.open(self.create_input_array_UIDSA(sparse=True)) as A: - result = A.query(cond="U < 0.1", attrs=["U"])[:] - assert all(result["U"] < 0.1) - - result = A.query(cond="U < 1.0", attrs=["U"])[:] - assert all(result["U"] < 1.0) - - with self.assertRaises(tiledb.TileDBError): - A.query(cond="U < '1'", attrs=["U"])[:] - - with self.assertRaises(tiledb.TileDBError): - A.query(cond="U < 'one'", attrs=["U"])[:] - - def test_check_attrs_dense(self): - with tiledb.open(self.create_input_array_UIDSA(sparse=False)) as A: - mask = A.attr("U").fill - - result = A.query(cond="U < 0.1", attrs=["U"])[:] - assert all(self.filter_dense(result["U"], mask) < 0.1) - - result = A.query(cond="U < 1.0", attrs=["U"])[:] - assert all(self.filter_dense(result["U"], mask) < 1.0) - - with self.assertRaises(tiledb.TileDBError): - A.query(cond="U < '1'", attrs=["U"])[:] - - with self.assertRaises(tiledb.TileDBError): - A.query(cond="U < 'one'", attrs=["U"])[:] - - def test_attr_and_val_casting_num(self): - path = self.path("test_attr_and_val_casting_num") - - dom = tiledb.Domain( - tiledb.Dim(name="dim", domain=(1, 10), tile=1, dtype=np.uint32) - ) - attrs = [ - tiledb.Attr(name="64-bit integer", dtype=np.int64), - tiledb.Attr(name="double", dtype=np.float64), - ] - - schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=True) - tiledb.Array.create(path, schema) - - with tiledb.open(path, "w") as arr: - arr[np.arange(1, 11)] = { - "64-bit integer": np.random.randint(-5, 5, 10), - "double": np.random.rand(10), - } - - with tiledb.open(path) as arr: - result = arr.query(cond="attr('64-bit integer') <= val(0)")[:] - assert all(result["64-bit integer"] <= 0) - - result = arr.query(cond="attr('64-bit integer') <= 0")[:] - assert all(result["64-bit integer"] <= 0) - - result = arr.query(cond="double <= 0.5")[:] - assert all(result["double"] <= 0.5) - - result = arr.query(cond="attr('double') <= 0.5")[:] - assert all(result["double"] <= 0.5) - - result = arr.query(cond="double <= val(0.5)")[:] - assert all(result["double"] <= 0.5) - - result = arr.query(cond="attr('double') <= val(0.5)")[:] - assert all(result["double"] <= 0.5) - - def test_casting_str(self): - path = self.path("test_attr_and_val_casting_str") - - dom = tiledb.Domain(tiledb.Dim(name="dim with spaces", dtype="ascii")) - attrs = [tiledb.Attr(name="attr with spaces", dtype="ascii", var=True)] - - schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=True) - tiledb.Array.create(path, schema) - - A = np.array( - [ - "value with spaces", - "nospaces", - "value with spaces", - "another value", - "", - ] - ) - - with tiledb.open(path, "w") as arr: - arr[["a", "b", "c", "d", "e"]] = {"attr with spaces": A} - - with tiledb.open(path) as arr: - qc = "attr('attr with spaces') == 'value with spaces'" - result = arr.query(cond=qc)[:] - assert list(result["dim with spaces"]) == [b"a", b"c"] - - with pytest.raises(tiledb.TileDBError) as exc_info: - result = arr.query(cond="dim('attr with spaces') == 'd'")[:] - assert "is not a dimension" in str(exc_info.value) - - qc = "attr('attr with spaces') == val('value with spaces')" - result = arr.query(cond=qc)[:] - assert list(result["dim with spaces"]) == [b"a", b"c"] - - with pytest.raises(tiledb.TileDBError) as exc_info: - result = arr.query(cond="attr('dim with spaces') == 'd'")[:] - assert "is not an attribute" in str(exc_info.value) - - result = arr.query(cond="dim('dim with spaces') == 'd'")[:] - assert list(result["dim with spaces"]) == [b"d"] - - @pytest.mark.skipif( - tiledb.libtiledb.version() < (2, 7, 0), - reason="var-length np.bytes_ query condition support introduced in 2.7.0", - ) - def test_var_length_str(self): - path = self.path("test_var_length_str") - - dom = tiledb.Domain(tiledb.Dim(name="d", domain=(0, 4))) - attrs = [ - tiledb.Attr(name="ascii", dtype="ascii", var=True), - tiledb.Attr(name="bytes", dtype=np.bytes_, var=True), - ] - - schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=True) - tiledb.Array.create(path, schema) - - def create_array(func): - return np.array([func[i - 1] * i for i in range(1, 6)], dtype=np.bytes_) - - ascii_data = create_array(string.ascii_lowercase) - bytes_data = create_array(string.ascii_uppercase) - - with tiledb.open(path, "w") as arr: - arr[np.arange(5)] = {"ascii": ascii_data, "bytes": bytes_data} - - with tiledb.open(path, "r") as arr: - for s in ascii_data: - result = arr.query(cond=f"ascii == '{s.decode()}'")[:] - assert result["ascii"][0] == s - - for s in bytes_data: - result = arr.query(cond=f"bytes == '{s.decode()}'")[:] - assert result["bytes"][0] == s - - @pytest.mark.skipif( - tiledb.libtiledb.version() < (2, 10, 0), - reason="OR query condition operator introduced in libtiledb 2.10", - ) - def test_or_sparse(self): - with tiledb.open(self.create_input_array_UIDSA(sparse=True)) as A: - result = A.query(cond="(D < 0.25) | (D > 0.75)", attrs=["D"])[:] - assert all((result["D"] < 0.25) | (result["D"] > 0.75)) - - result = A.query(cond="(D < 0.25) or (D > 0.75)", attrs=["D"])[:] - assert all((result["D"] < 0.25) | (result["D"] > 0.75)) - - @pytest.mark.skipif( - tiledb.libtiledb.version() < (2, 10, 0), - reason="OR query condition operator introduced in libtiledb 2.10", - ) - def test_or_dense(self): - with tiledb.open(self.create_input_array_UIDSA(sparse=False)) as A: - mask = A.attr("D").fill - - result = A.query(cond="(D < 0.25) | (D > 0.75)", attrs=["D"])[:] - res = self.filter_dense(result["D"], mask) - assert all((res < 0.25) | (res > 0.75)) - - result = A.query(cond="(D < 0.25) or (D > 0.75)", attrs=["D"])[:] - res = self.filter_dense(result["D"], mask) - assert all((res < 0.25) | (res > 0.75)) - - @pytest.mark.skipif( - tiledb.libtiledb.version() < (2, 10, 0), - reason="OR query condition operator and bool type introduced in libtiledb 2.10", - ) - def test_01(self): - path = self.path("test_01") - - dom = tiledb.Domain(tiledb.Dim(domain=(1, 10), tile=1, dtype=np.uint32)) - attrs = [ - tiledb.Attr(name="a", dtype=np.uint8), - tiledb.Attr(name="b", dtype=np.uint8), - tiledb.Attr(name="c", dtype=np.uint8), - ] - schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=True) - tiledb.Array.create(path, schema) - - with tiledb.open(path, "w") as arr: - arr[np.arange(1, 11)] = { - "a": np.random.randint(0, high=2, size=10), - "b": np.random.randint(0, high=2, size=10), - "c": np.random.randint(0, high=2, size=10), - } - - with tiledb.open(path) as A: - result = A.query(cond="a == 1 and b == 1 and c == 1")[:] - assert all(result["a"] & result["b"] & result["c"]) - - result = A.query(cond="a == 1 and b == 1 or c == 1")[:] - assert all(result["a"] & result["b"] | result["c"]) - - result = A.query(cond="a == 1 or b == 1 and c == 1")[:] - assert all(result["a"] | result["b"] & result["c"]) - - result = A.query(cond="a == 1 or b == 1 or c == 1")[:] - assert all(result["a"] | result["b"] | result["c"]) - - result = A.query(cond="(a == 1 and b == 1) or c == 1")[:] - assert all(result["a"] & result["b"] | result["c"]) - - result = A.query(cond="a == 1 and (b == 1 or c == 1)")[:] - assert all(result["a"] & (result["b"] | result["c"])) - - result = A.query(cond="(a == 1 or b == 1) and c == 1")[:] - assert all((result["a"] | result["b"]) & result["c"]) - - result = A.query(cond="a == 1 or (b == 1 and c == 1)")[:] - assert all(result["a"] | result["b"] & result["c"]) - - def test_in_operator_sparse(self): - with tiledb.open(self.create_input_array_UIDSA(sparse=True)) as A: - result = A.query(cond="U in [1, 2, 3]", attrs=["U"])[:] - for val in result["U"]: - assert val in [1, 2, 3] - - result = A.query(cond="S in ['a', 'e', 'i', 'o', 'u']", attrs=["S"])[:] - for val in result["S"]: - assert val in [b"a", b"e", b"i", b"o", b"u"] - - qc = "S in ['a', 'e', 'i', 'o', 'u'] and U in [5, 6, 7]" - result = A.query(cond=qc)[:] - for val in result["U"]: - assert val in [5, 6, 7] - for val in result["S"]: - assert val in [b"a", b"e", b"i", b"o", b"u"] - - result = A.query(cond="U in [8]")[:] - for val in result["U"]: - assert val == 8 - - result = A.query(cond="S in ['8']")[:] - assert len(result["S"]) == 0 - - result = A.query(cond="U not in [5, 6, 7]")[:] - for val in result["U"]: - assert val not in [5, 6, 7] - - with pytest.raises(tiledb.TileDBError) as exc_info: - A.query(cond="U not in []")[:] - assert "At least one value must be provided to the set membership" in str( - exc_info.value - ) - - def test_in_operator_dense(self): - with tiledb.open(self.create_input_array_UIDSA(sparse=False)) as A: - U_mask = A.attr("U").fill - S_mask = A.attr("S").fill - - result = A.query(cond="U in [1, 2, 3]", attrs=["U"])[:] - for val in self.filter_dense(result["U"], U_mask): - assert val in [1, 2, 3] - - result = A.query(cond="S in ['a', 'e', 'i', 'o', 'u']", attrs=["S"])[:] - for val in self.filter_dense(result["S"], S_mask): - assert val in [b"a", b"e", b"i", b"o", b"u"] - - qc = "S in ['a', 'e', 'i', 'o', 'u'] and U in [5, 6, 7]" - result = A.query(cond=qc)[:] - for val in self.filter_dense(result["U"], U_mask): - assert val in [5, 6, 7] - for val in self.filter_dense(result["S"], S_mask): - assert val in [b"a", b"e", b"i", b"o", b"u"] - - result = A.query(cond="U in [8]")[:] - for val in self.filter_dense(result["U"], U_mask): - assert val == 8 - - result = A.query(cond="S in ['8']")[:] - assert len(self.filter_dense(result["S"], S_mask)) == 0 - - result = A.query(cond="U not in [5, 6, 7]")[:] - for val in self.filter_dense(result["U"], U_mask): - assert val not in [5, 6, 7] - - with pytest.raises(tiledb.TileDBError) as exc_info: - A.query(cond="U not in []")[:] - assert "At least one value must be provided to the set membership" in str( - exc_info.value - ) - - @pytest.mark.parametrize( - "expression_and_message", - [ - ["foo is True", "the `is` operator is not supported"], - ["foo is not True", "the `is not` operator is not supported"], - [ - "foo &&& bar", - "Could not parse the given QueryCondition statement: foo &&& bar", - ], - ], - ) - @pytest.mark.parametrize("sparse", [True, False]) - def test_not_supported_operators(self, expression_and_message, sparse): - with tiledb.open(self.create_input_array_UIDSA(sparse=sparse)) as A: - expression, message = expression_and_message - with self.assertRaisesRegex(tiledb.TileDBError, message): - A.query(cond=expression)[:] - - @pytest.mark.skipif(not has_pandas(), reason="pandas>=1.0,<3.0 not installed") - def test_dense_datetime(self): - import pandas as pd - - uri = self.path("query-filter-dense-datetime.tdb") - - data = pd.DataFrame( - np.random.randint(438923600, 243892360000, 20, dtype=np.int64), - columns=["dates"], - ) - - tiledb.from_pandas( - uri, - data, - column_types={"dates": "datetime64[ns]"}, - ) - - with tiledb.open(uri) as A: - idx = 5 - - dt_mask = A.attr("dates").fill - search_date = data["dates"][idx] - - result = A.query(cond=f"dates == {search_date}").df[:] - - assert all(self.filter_dense(result["dates"], dt_mask) == A[idx]["dates"]) - - def test_array_with_bool_but_unused(self): - path = self.path("test_array_with_bool_but_unused") - - dom = tiledb.Domain( - tiledb.Dim(name="d", domain=(1, 3), tile=1, dtype=np.uint32) - ) - attrs = [ - tiledb.Attr(name="myint", dtype=int), - tiledb.Attr(name="mystr", dtype=str), - tiledb.Attr(name="mybool", dtype=bool), - ] - - schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=True) - tiledb.Array.create(path, schema) - - data = { - "myint": np.asarray([10, 20, 30]), - "mystr": np.asarray(["apple", "ball", "cat"]), - "mybool": np.asarray([True, False, True]), - } - - with tiledb.open(path, "w") as A: - A[np.arange(1, 4)] = data - - with tiledb.open(path) as A: - result = A.query(cond="myint > 10", attrs=["myint"])[:] - assert all(result["myint"] > 10) - - def test_do_not_return_queried_attr(self): - with tiledb.open(self.create_input_array_UIDSA(sparse=True)) as A: - qc = "U < 3" - - i_result = A.query(cond=qc, attrs=["I", "U"])[:] - assert "I" in i_result.keys() - assert "U" in i_result.keys() - assert all(i_result["U"] < 5) - - u_result = A.query(cond=qc, attrs=["I"])[:] - assert "I" in u_result.keys() - assert "U" not in u_result.keys() - assert_array_equal(i_result["I"], u_result["I"]) - - def test_deprecate_attr_cond(self): - with tiledb.open(self.create_input_array_UIDSA(sparse=True)) as A: - qc = "U < 3" - - A.query(cond=qc) - A.query(cond=qc).cond - A.subarray(1, cond=qc) - - def test_on_dense_dimensions(self): - with tiledb.open(self.create_input_array_UIDSA(sparse=False)) as A: - with pytest.raises(tiledb.TileDBError) as excinfo: - A.query(cond="2 <= d < 6")[:] - assert ( - "Cannot apply query condition to dimensions on dense arrays" - ) in str(excinfo.value) - - def test_on_sparse_dimensions(self): - with tiledb.open(self.create_input_array_UIDSA(sparse=True)) as A: - result = A.query(cond="2 <= d < 6")[:] - assert_array_equal(result["d"], A[2:6]["d"]) - - def test_overlapping(self): - path = self.path("test_overlapping") - - dom = tiledb.Domain(tiledb.Dim(name="dim", domain=(0, 10), dtype=np.uint32)) - attrs = [tiledb.Attr(name="data", dtype=np.uint32)] - - schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=True) - tiledb.Array.create(path, schema) - - with tiledb.open(path, "w") as A: - A[np.arange(11)] = np.arange(11) - - with tiledb.open(path, "r") as A: - result = A.query(cond="2 <= dim < 7 and 5 <= dim < 9")[:] - assert_array_equal(result["dim"], A[5:7]["dim"]) - - result = A.query(cond="2 <= dim < 6 or 5 <= dim < 9")[:] - assert_array_equal(result["dim"], A[2:9]["dim"]) - - result = A.query(cond="2 <= data < 7 and 5 <= data < 9")[:] - assert_array_equal(result["data"], A[5:7]["data"]) - - result = A.query(cond="2 <= data < 6 or 5 <= data < 9")[:] - assert_array_equal(result["data"], A[2:9]["data"]) - - def test_with_whitespace(self): - with tiledb.open(self.create_input_array_UIDSA(sparse=True)) as A: - result = A.query(cond=" d < 6")[:] - assert_array_equal(result["d"], A[:6]["d"]) - - result = A.query(cond=" ( d < 6) ")[:] - assert_array_equal(result["d"], A[:6]["d"]) - - result = A.query(cond=" ( \n d \n\t< 6) ")[:] - assert_array_equal(result["d"], A[:6]["d"]) - - qc = """ - U < 5 - or - I >= 5 - """ - result = A.query(cond=qc)[:] - assert all((result["U"] < 5) | (result["U"] > 5)) - - qc = """ - - A == ' a' - - """ - result = A.query(cond=qc)[:] - # ensures that ' a' does not match 'a' - assert len(result["A"]) == 0 - - def test_attribute_with_dot(self): - path = self.path("test_with_dot") - dom = tiledb.Domain(tiledb.Dim(name="dim", domain=(0, 10), dtype=np.uint32)) - attrs = [ - tiledb.Attr(name="attr.one", dtype=np.uint32), - tiledb.Attr(name="attr.two", dtype=np.uint32), - ] - schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=True) - tiledb.Array.create(path, schema) - with tiledb.open(path, "w") as A: - A[np.arange(11)] = {"attr.one": np.arange(11), "attr.two": np.arange(11)} - with tiledb.open(path, "r") as A: - with pytest.raises(tiledb.TileDBError) as exc_info: - A.query(cond="attr.one < 6")[:] - assert ( - "TileDBError: Unhandled dot operator in Attribute(value=Name(id='attr', ctx=Load()), attr='one', ctx=Load()) -- if your attribute name has a dot in it, e.g. `orig.ident`, please wrap it with `attr(\"...\")`, e.g. `attr(\"orig.ident\")`" - in str(exc_info.value) - ) - with pytest.raises(tiledb.TileDBError) as exc_info: - A.query(cond="attr.two >= 6")[:] - assert ( - "TileDBError: Unhandled dot operator in Attribute(value=Name(id='attr', ctx=Load()), attr='two', ctx=Load()) -- if your attribute name has a dot in it, e.g. `orig.ident`, please wrap it with `attr(\"...\")`, e.g. `attr(\"orig.ident\")`" - in str(exc_info.value) - ) - - # now test with the correct syntax - result = A.query(cond='attr("attr.one") < 6')[:] - assert_array_equal(result["attr.one"], A[:6]["attr.one"]) - result = A.query(cond='attr("attr.two") >= 6')[:] - assert_array_equal(result["attr.two"], A[6:]["attr.two"]) - - @pytest.mark.skipif(not has_pandas(), reason="pandas>=1.0,<3.0 not installed") - def test_do_not_return_attrs(self): - with tiledb.open(self.create_input_array_UIDSA(sparse=True)) as A: - cond = None - assert "D" in A.query(cond=cond, attrs=None)[:] - assert "D" not in A.query(cond=cond, attrs=[])[:] - assert "D" in A.query(cond=cond, attrs=None).df[:] - assert "D" not in A.query(cond=cond, attrs=[]).df[:] - assert "D" in A.query(cond=cond, attrs=None).multi_index[:] - assert "D" not in A.query(cond=cond, attrs=[]).multi_index[:] - - cond = "D > 100" - assert "D" in A.query(cond=cond, attrs=None)[:] - assert "D" not in A.query(cond=cond, attrs=[])[:] - assert "D" in A.query(cond=cond, attrs=None).df[:] - assert "D" not in A.query(cond=cond, attrs=[]).df[:] - assert "D" in A.query(cond=cond, attrs=None).multi_index[:] - assert "D" not in A.query(cond=cond, attrs=[]).multi_index[:] - - def test_boolean_sparse(self): - path = self.path("test_boolean_sparse") - - dom = tiledb.Domain(tiledb.Dim(domain=(1, 10), tile=1, dtype=np.uint32)) - attrs = [ - tiledb.Attr(name="a", dtype=np.bool_), - tiledb.Attr(name="b", dtype=np.bool_), - tiledb.Attr(name="c", dtype=np.bool_), - ] - schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=True) - tiledb.Array.create(path, schema) - - with tiledb.open(path, "w") as arr: - arr[np.arange(1, 11)] = { - "a": np.random.randint(0, high=2, size=10), - "b": np.random.randint(0, high=2, size=10), - "c": np.random.randint(0, high=2, size=10), - } - - with tiledb.open(path) as A: - result = A.query(cond="a == True")[:] - assert all(result["a"]) - - result = A.query(cond="a == False")[:] - assert all(~result["a"]) - - result = A.query(cond="a == True and b == True")[:] - assert all(result["a"]) - assert all(result["b"]) - - result = A.query(cond="a == False and c == True")[:] - assert all(~result["a"]) - assert all(result["c"]) - - def test_boolean_dense(self): - path = self.path("test_boolean_dense") - - dom = tiledb.Domain(tiledb.Dim(domain=(1, 10), tile=1, dtype=np.uint32)) - attrs = [ - tiledb.Attr(name="a", dtype=np.bool_), - tiledb.Attr(name="b", dtype=np.bool_), - tiledb.Attr(name="c", dtype=np.bool_), - ] - schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=False) - tiledb.Array.create(path, schema) - - with tiledb.open(path, "w") as arr: - arr[:] = { - "a": np.random.randint(0, high=2, size=10), - "b": np.random.randint(0, high=2, size=10), - "c": np.random.randint(0, high=2, size=10), - } - - with tiledb.open(path) as A: - mask = A.attr("a").fill - - result = A.query(cond="a == True")[:] - assert all(self.filter_dense(result["a"], mask)) - - result = A.query(cond="a == True and b == True")[:] - assert all(self.filter_dense(result["a"], mask)) - assert all(self.filter_dense(result["b"], mask)) - - def test_qc_enumeration(self): - uri = self.path("test_qc_enumeration") - dom = tiledb.Domain(tiledb.Dim(domain=(1, 8), tile=1)) - enum1 = tiledb.Enumeration("enmr1", True, [0, 1, 2]) - enum2 = tiledb.Enumeration("enmr2", True, ["a", "bb", "ccc"]) - attr1 = tiledb.Attr("attr1", dtype=np.int32, enum_label="enmr1") - attr2 = tiledb.Attr("attr2", dtype=np.int32, enum_label="enmr2") - schema = tiledb.ArraySchema( - domain=dom, attrs=(attr1, attr2), enums=(enum1, enum2) - ) - tiledb.Array.create(uri, schema) - - data1 = np.random.randint(0, 3, 8) - data2 = np.random.randint(0, 3, 8) - - with tiledb.open(uri, "w") as A: - A[:] = {"attr1": data1, "attr2": data2} - - with tiledb.open(uri, "r") as A: - mask = A.attr("attr1").fill - result = A.query(cond="attr1 < 2", attrs=["attr1"])[:] - assert all(self.filter_dense(result["attr1"], mask) < 2) - - result = A.query(cond="attr1 <= 2", attrs=["attr1"])[:] - assert all(self.filter_dense(result["attr1"], mask) <= 2) - - result = A.query(cond="attr1 > 0", attrs=["attr1"])[:] - assert all(self.filter_dense(result["attr1"], mask) > 0) - - result = A.query(cond="attr1 != 1", attrs=["attr1"])[:] - assert all(self.filter_dense(result["attr1"], mask) != 1) - - mask = A.attr("attr2").fill - result = A.query(cond="attr2 == 'bb'", attrs=["attr2"])[:] - assert all( - self.filter_dense(result["attr2"], mask) - == list(enum2.values()).index("bb") - ) - - mask = A.attr("attr2").fill - result = A.query(cond="attr2 < 'ccc'", attrs=["attr2"])[:] - assert list(enum2.values()).index("ccc") not in self.filter_dense( - result["attr2"], mask - ) - - result = A.query(cond="attr2 == 'b'", attrs=["attr2"])[:] - assert all(self.filter_dense(result["attr2"], mask) == []) - - result = A.query(cond="attr2 in ['b']", attrs=["attr2"])[:] - assert all(self.filter_dense(result["attr2"], mask) == []) - - result = A.query(cond="attr2 not in ['b']", attrs=["attr2"])[:] - assert len(result["attr2"]) == len(data2) - - result = A.query(cond="attr2 not in ['b', 'ccc']", attrs=["attr2"])[:] - assert list(enum2.values()).index("ccc") not in self.filter_dense( - result["attr2"], mask - ) - - result = A.query( - cond="attr1 < 2 and attr2 == 'bb'", attrs=["attr1", "attr2"] - )[:] - assert all(self.filter_dense(result["attr1"], mask) < 2) and all( - self.filter_dense(result["attr2"], mask) - == list(enum2.values()).index("bb") - ) - - result = A.query(cond="attr1 == 2", attrs=["attr1"])[:] - assert all(self.filter_dense(result["attr1"], mask) == 2) - - result = A.query( - cond="attr1 == 0 or attr2 == 'ccc'", attrs=["attr1", "attr2"] - )[:] - assert any(self.filter_dense(result["attr1"], mask) == 0) or any( - self.filter_dense(result["attr2"], mask) - == list(enum2.values()).index("ccc") - ) - - def test_boolean_insert(self): - path = self.path("test_boolean_insert") - attr = tiledb.Attr("a", dtype=np.bool_, var=False) - dom = tiledb.Domain(tiledb.Dim(domain=(1, 10), tile=1, dtype=np.uint32)) - schema = tiledb.ArraySchema(domain=dom, sparse=True, attrs=[attr]) - tiledb.Array.create(path, schema) - a = np.array( - list( - [ - np.array([True], dtype=np.bool_), - np.array([True], dtype=np.bool_), - np.array([True], dtype=np.bool_), - np.array([True], dtype=np.bool_), - ] - ), - dtype=object, - ) - with tiledb.open(path, "w") as A: - A[range(1, len(a) + 1)] = {"a": a} - - with tiledb.open(path, "r") as A: - for k in A[:]["a"]: - assert k == True # noqa: E712 - - def test_qc_dense_empty(self): - path = self.path("test_qc_dense_empty") - - dom = tiledb.Domain(tiledb.Dim(name="d", domain=(1, 1), tile=1, dtype=np.uint8)) - attrs = [tiledb.Attr(name="a", dtype=np.uint8)] - schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=False) - tiledb.Array.create(path, schema) - - with tiledb.open(path, mode="w") as A: - A[:] = np.arange(1) - - with tiledb.open(path) as A: - assert_array_equal(A.query(cond="")[:]["a"], [0]) - - def test_qc_sparse_empty(self): - path = self.path("test_qc_sparse_empty") - - dom = tiledb.Domain( - tiledb.Dim(name="d", domain=(1, 10), tile=1, dtype=np.uint8) - ) - attrs = [tiledb.Attr(name="a", dtype=np.uint8)] - schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=True) - tiledb.Array.create(path, schema) - - with tiledb.open(path, mode="w") as A: - A[1] = {"a": np.arange(1)} - - with tiledb.open(path) as A: - assert_array_equal(A.query(cond="")[:]["a"], [0]) - - -class QueryDeleteTest(DiskTestCase): - def test_basic_sparse(self): - path = self.path("test_basic_sparse") - dom = tiledb.Domain(tiledb.Dim(domain=(1, 10), tile=1, dtype=np.uint32)) - attrs = [tiledb.Attr("ints", dtype=np.uint32)] - schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=True) - tiledb.Array.create(path, schema) - - data = np.random.randint(1, 10, 10) - - qc = "ints < 5" - - with tiledb.open(path, "w") as A: - A[np.arange(1, 11)] = data - - with pytest.raises( - tiledb.TileDBError, - match="SparseArray must be opened in read or delete mode", - ): - A.query(cond=qc).submit() - - with tiledb.open(path, "r") as A: - assert_array_equal(data, A[:]["ints"]) - - with tiledb.open(path, "d") as A: - with pytest.raises( - tiledb.TileDBError, - match="Cannot initialize deletes; One condition is needed", - ): - A.query().submit() - - A.query(cond=qc).submit() - - with tiledb.open(path, "r") as A: - assert all(A[:]["ints"] >= 5) - - def test_basic_dense(self): - path = self.path("test_basic_dense") - - dom = tiledb.Domain(tiledb.Dim(domain=(1, 10), tile=1)) - attrs = [tiledb.Attr("ints", dtype=np.uint8)] - schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=False) - tiledb.Array.create(path, schema) - - with tiledb.open(path, "d") as A: - with pytest.raises( - tiledb.TileDBError, - match="DenseArray must be opened in read mode", - ): - A.query() - - @pytest.mark.parametrize("use_timestamps", [True, False]) - def test_with_fragments(self, use_timestamps): - path = self.path("test_with_fragments") - - dom = tiledb.Domain(tiledb.Dim(domain=(1, 3), tile=1)) - attrs = [tiledb.Attr("ints", dtype=np.uint8)] - schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=True) - tiledb.Array.create(path, schema) - - if use_timestamps: - with tiledb.open(path, "w", timestamp=1) as A: - A[1] = 1 - - with tiledb.open(path, "w", timestamp=2) as A: - A[2] = 2 - - with tiledb.open(path, "w", timestamp=3) as A: - A[3] = 3 - else: - with tiledb.open(path, "w") as A: - A[1] = 1 - A[2] = 2 - A[3] = 3 - - with tiledb.open(path, "r") as A: - assert_array_equal([1, 2, 3], A[:]["ints"]) - - timestamps = [t[0] for t in tiledb.array_fragments(path).timestamp_range] - - with tiledb.open(path, "d", timestamp=timestamps[2]) as A: - A.query(cond="ints == 1").submit() - - with tiledb.open(path, "r", timestamp=timestamps[0]) as A: - assert_array_equal([1], A[:]["ints"]) - - with tiledb.open(path, "r", timestamp=timestamps[1]) as A: - assert_array_equal([1, 2], A[:]["ints"]) - - with tiledb.open(path, "r", timestamp=timestamps[2]) as A: - assert_array_equal([2, 3], A[:]["ints"]) - - assert len(tiledb.array_fragments(path)) == 3 - - tiledb.consolidate(path) - tiledb.vacuum(path) - - assert len(tiledb.array_fragments(path)) == 1 - - with tiledb.open(path, "r") as A: - assert A.nonempty_domain() == ((1, 3),) - assert_array_equal([2, 3], A[:]["ints"]) - - @pytest.mark.parametrize("use_timestamps", [True, False]) - def test_purge_deleted_cells(self, use_timestamps): - path = self.path("test_with_fragments") - - dom = tiledb.Domain(tiledb.Dim(domain=(1, 3), tile=1)) - attrs = [tiledb.Attr("ints", dtype=np.uint8)] - schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=True) - tiledb.Array.create(path, schema) - - if use_timestamps: - with tiledb.open(path, "w", timestamp=1) as A: - A[1] = 1 - - with tiledb.open(path, "w", timestamp=2) as A: - A[2] = 2 - - with tiledb.open(path, "w", timestamp=3) as A: - A[3] = 3 - else: - with tiledb.open(path, "w") as A: - A[1] = 1 - A[2] = 2 - A[3] = 3 - - with tiledb.open(path, "r") as A: - assert_array_equal([1, 2, 3], A[:]["ints"]) - - timestamps = [t[0] for t in tiledb.array_fragments(path).timestamp_range] - - with tiledb.open(path, "d", timestamp=timestamps[2]) as A: - A.query(cond="ints == 1").submit() - - with tiledb.open(path, "r", timestamp=timestamps[0]) as A: - assert_array_equal([1], A[:]["ints"]) - - with tiledb.open(path, "r", timestamp=timestamps[1]) as A: - assert_array_equal([1, 2], A[:]["ints"]) - - with tiledb.open(path, "r", timestamp=timestamps[2]) as A: - assert_array_equal([2, 3], A[:]["ints"]) - - cfg = tiledb.Config({"sm.consolidation.purge_deleted_cells": "true"}) - with tiledb.scope_ctx(cfg): - tiledb.consolidate(path) - tiledb.vacuum(path) - - with tiledb.open(path, "r") as A: - assert A.nonempty_domain() == ((2, 3),) - assert_array_equal([2, 3], A[:]["ints"]) - - def test_delete_with_string_dimension(self): - path = self.path("test_delete_with_string_dimension") - - schema = tiledb.ArraySchema( - domain=tiledb.Domain(tiledb.Dim(name="d", dtype="|S0", var=True)), - attrs=[tiledb.Attr(name="a", dtype="uint32")], - sparse=True, - ) - - tiledb.Array.create(path, schema) - - with tiledb.open(path, "w") as A: - A[["a", "b", "c"]] = [10, 20, 30] - - with tiledb.open(path, "d") as A: - A.query(cond="a == 20").submit() - - with tiledb.open(path, "r") as A: - assert_array_equal(A[:]["d"], [b"a", b"c"]) - assert_array_equal(A[:]["a"], [10, 30]) - - with tiledb.open(path, "d") as A: - A.query(cond="d == 'a'").submit() - - with tiledb.open(path, "r") as A: - assert_array_equal(A[:]["d"], [b"c"]) - assert_array_equal(A[:]["a"], [30]) diff --git a/tiledb/tests/test_read_subarray.py b/tiledb/tests/test_read_subarray.py deleted file mode 100644 index 2268bb9dd7..0000000000 --- a/tiledb/tests/test_read_subarray.py +++ /dev/null @@ -1,488 +0,0 @@ -from collections import OrderedDict - -import numpy as np -import pytest - -import tiledb - -from .common import ( - DiskTestCase, - assert_array_equal, - assert_dict_arrays_equal, -) - -SUPPORTED_INTEGER_DTYPES = ( - np.uint8, - np.uint16, - np.uint32, - np.uint64, - np.int8, - np.int16, - np.int32, - np.int64, -) - -SUPPORTED_DATETIME64_RESOLUTION = ("Y", "M", "W", "D", "h", "m", "s", "ms", "us", "ns") - - -@pytest.mark.parametrize("sparse", (True, False)) -class TestReadSubarray1D(DiskTestCase): - data1 = np.random.rand(101) - data2 = np.random.randint(-1000, 1000, (101,), dtype=np.int16) - label_data = np.linspace(-1.0, 1.0, 101) - - @pytest.fixture - def array_uri(self, sparse): - """Create TileDB array, write data, and return the URI.""" - suffix = "1d_label_sparse" if sparse else "1d_label_dense" - uri = self.path(f"read_subarray_{suffix}") - dim1 = tiledb.Dim(name="d1", domain=(0, 100), tile=101, dtype=np.int32) - schema = tiledb.ArraySchema( - domain=tiledb.Domain(dim1), - attrs=[ - tiledb.Attr(name="a1", dtype=np.float64), - tiledb.Attr(name="a2", dtype=np.int16), - ], - dim_labels={ - 0: { - "l1": dim1.create_label_schema("increasing", np.float64), - "l2": dim1.create_label_schema("decreasing", np.float64), - } - }, - sparse=sparse, - ) - tiledb.Array.create(uri, schema) - data_buffers = { - "a1": self.data1, - "a2": self.data2, - "l1": self.label_data, - "l2": np.flip(self.label_data), - } - with tiledb.open(uri, "w") as array: - if sparse: - array[np.arange(101, dtype=np.int32)] = data_buffers - else: - array[...] = data_buffers - return uri - - def test_read_full_array(self, array_uri): - with tiledb.open(array_uri, "r") as array: - sparse = array.schema.sparse - subarray = tiledb.Subarray(array) - subarray.add_dim_range(0, (0, 100)) - result = array.read_subarray(subarray) - - expected = OrderedDict() - if sparse: - expected["d1"] = np.arange(101, dtype=np.int32) - expected["a1"] = self.data1 - expected["a2"] = self.data2 - - assert_dict_arrays_equal(result, expected, not sparse) - - def test_read_partial(self, array_uri): - with tiledb.open(array_uri, "r") as array: - sparse = array.schema.sparse - subarray = tiledb.Subarray(array) - subarray.add_dim_range(0, (10, 20)) - result = array.read_subarray(subarray) - - expected = OrderedDict() - if sparse: - expected["d1"] = np.arange(10, 21, dtype=np.int32) - expected["a1"] = self.data1[10:21] - expected["a2"] = self.data2[10:21] - - assert_dict_arrays_equal(result, expected, not sparse) - - def test_read_multiple_ranges(self, array_uri): - with tiledb.open(array_uri, "r") as array: - sparse = array.schema.sparse - subarray = tiledb.Subarray(array) - subarray.add_dim_range(0, (3, 3)) - subarray.add_dim_range(0, (1, 2)) - subarray.add_dim_range(0, (5, 10)) - result = array.read_subarray(subarray) - - expected = OrderedDict() - d1_expected = np.array([3, 1, 2, 5, 6, 7, 8, 9, 10], dtype=np.int32) - if sparse: - expected["d1"] = d1_expected - expected["a1"] = self.data1[d1_expected] - expected["a2"] = self.data2[d1_expected] - - assert_dict_arrays_equal(result, expected, not sparse) - - def test_read_single_attr(self, array_uri): - with tiledb.open(array_uri, attr="a1", mode="r") as array: - sparse = array.schema.sparse - subarray = tiledb.Subarray(array) - subarray.add_dim_range(0, (10, 20)) - result = array.read_subarray(subarray) - - expected = OrderedDict() - if sparse: - expected["d1"] = np.arange(10, 21, dtype=np.int32) - expected["a1"] = self.data1[10:21] - - assert_dict_arrays_equal(result, expected, not sparse) - - def test_read_full_array_by_increasing_label(self, array_uri): - with tiledb.open(array_uri, "r") as array: - sparse = array.schema.sparse - subarray = tiledb.Subarray(array) - subarray.add_label_range("l1", (-1.0, 1.0)) - result = array.read_subarray(subarray) - - expected = OrderedDict() - if sparse: - expected["d1"] = np.arange(101, dtype=np.int32) - expected["a1"] = self.data1 - expected["a2"] = self.data2 - - assert_dict_arrays_equal(result, expected, not sparse) - - def test_read_partial_by_increasing_label(self, array_uri): - with tiledb.open(array_uri, "r") as array: - sparse = array.schema.sparse - subarray = tiledb.Subarray(array) - subarray.add_label_range("l1", (0.0, 1.0)) - result = array.read_subarray(subarray) - - expected = OrderedDict() - if sparse: - expected["d1"] = np.arange(50, 101, dtype=np.int32) - expected["a1"] = self.data1[50:] - expected["a2"] = self.data2[50:] - - assert_dict_arrays_equal(result, expected, not sparse) - - def test_read_partial_by_decreasing_label(self, array_uri): - with tiledb.open(array_uri, "r") as array: - sparse = array.schema.sparse - subarray = tiledb.Subarray(array) - subarray.add_label_range("l2", (0.0, 1.0)) - result = array.read_subarray(subarray) - - expected = OrderedDict() - if sparse: - expected["d1"] = np.arange(51, dtype=np.int32) - expected["a1"] = self.data1[:51] - expected["a2"] = self.data2[:51] - - assert_dict_arrays_equal(result, expected, not sparse) - - def test_read_by_label_no_data(self, array_uri): - with tiledb.open(array_uri, "r") as array: - subarray = tiledb.Subarray(array) - subarray.add_label_range("l1", (0.01, 0.012)) - with pytest.raises(tiledb.TileDBError): - array.read_subarray(subarray) - - -@pytest.mark.parametrize("dim_res", SUPPORTED_DATETIME64_RESOLUTION) -@pytest.mark.parametrize("sparse", (True, False)) -class TestReadSubarrayDenseDatetime1D(DiskTestCase): - data = np.random.rand(100) - - @pytest.fixture - def array_uri(self, dim_res, sparse): - """Create TileDB array, write data, and return the URI.""" - suffix = f"datetime_{dim_res}_sparse" if sparse else f"datetime_{dim_res}_dense" - uri = self.path(f"read_subarray_1d_datetime_{suffix}") - start_time = np.datetime64("2000-01-01", dim_res) - domain = (start_time, start_time + np.timedelta64(99, dim_res)) - schema = tiledb.ArraySchema( - tiledb.Domain( - tiledb.Dim( - name="d1", - domain=domain, - tile=100, - dtype=np.dtype(f"M8[{dim_res}]"), - ) - ), - [tiledb.Attr(name="a1", dtype=np.float64)], - sparse=sparse, - ) - tiledb.Array.create(uri, schema) - with tiledb.open(uri, "w") as array: - if sparse: - array[ - np.arange( - domain[0], - domain[1] + np.timedelta64(1, dim_res), - np.timedelta64(1, dim_res), - ) - ] = self.data - else: - array[...] = self.data - return uri - - def test_read_full_array(self, array_uri, dim_res): - with tiledb.open(array_uri, "r") as array: - sparse = array.schema.sparse - subarray = tiledb.Subarray(array) - start_time = np.datetime64("2000-01-01", dim_res) - domain = (start_time, start_time + np.timedelta64(99, dim_res)) - subarray.add_dim_range(0, domain) - result = array.read_subarray(subarray) - - expected = OrderedDict() - if sparse: - expected["d1"] = np.arange( - start_time, - start_time + np.timedelta64(100, dim_res), - np.timedelta64(1, dim_res), - ) - expected["a1"] = self.data - - assert_dict_arrays_equal(result, expected, not sparse) - - def test_read_partial(self, array_uri, dim_res): - with tiledb.open(array_uri, "r") as array: - sparse = array.schema.sparse - subarray = tiledb.Subarray(array) - start_time = np.datetime64("2000-01-01", dim_res) - dim_range = ( - start_time + np.timedelta64(10, dim_res), - start_time + np.timedelta64(20, dim_res), - ) - subarray.add_dim_range(0, dim_range) - result = array.read_subarray(subarray) - assert_array_equal(result["a1"], self.data[10:21]) - - expected = OrderedDict() - if sparse: - expected["d1"] = np.arange( - start_time + np.timedelta64(10, dim_res), - start_time + np.timedelta64(21, dim_res), - np.timedelta64(1, dim_res), - ) - expected["a1"] = self.data[10:21] - - assert_dict_arrays_equal(result, expected, not sparse) - - -@pytest.mark.parametrize("sparse", (True, False)) -class TestReadSubarray2D(DiskTestCase): - data_a1 = np.random.rand(16).reshape(4, 4) - data_a2 = np.random.randint(-1000, 1000, (4, 4), dtype=np.int16) - data_l1 = np.arange(-2, 2) - data_l2 = np.arange(1, -3, -1) - - @pytest.fixture - def array_uri(self, sparse): - """Create TileDB array, write data, and return the URI.""" - suffix = "2d_sparse" if sparse else "2d_dense" - uri = self.path(f"read_subarray_{suffix}") - dim1 = tiledb.Dim(name="d1", domain=(0, 3), tile=4, dtype=np.int32) - dim2 = tiledb.Dim(name="d2", domain=(0, 3), tile=4, dtype=np.int32) - schema = tiledb.ArraySchema( - domain=tiledb.Domain(dim1, dim2), - attrs=[ - tiledb.Attr(name="a1", dtype=np.float64), - tiledb.Attr(name="a2", dtype=np.int16), - ], - dim_labels={ - 0: {"l1": dim1.create_label_schema("increasing", np.int32)}, - 1: {"l2": dim1.create_label_schema("decreasing", np.int32)}, - }, - sparse=sparse, - ) - tiledb.Array.create(uri, schema) - if sparse: - _schema = tiledb.ArraySchema.load(uri) - with tiledb.open(_schema.dim_label("l1").uri, mode="w") as label1: - label1[:] = self.data_l1 - with tiledb.open(_schema.dim_label("l2").uri, mode="w") as label2: - label2[:] = self.data_l2 - data_d1, data_d2 = np.meshgrid(np.arange(4), np.arange(4), indexing="ij") - with tiledb.open(uri, "w") as array: - array[data_d1.flatten(), data_d2.flatten()] = { - "a1": self.data_a1, - "a2": self.data_a2, - } - else: - with tiledb.open(uri, "w") as array: - array[...] = { - "a1": self.data_a1, - "a2": self.data_a2, - "l1": self.data_l1, - "l2": self.data_l2, - } - - return uri - - def test_read_full_array(self, array_uri): - with tiledb.open(array_uri) as array: - sparse = array.schema.sparse - subarray = tiledb.Subarray(array) - subarray.add_dim_range(0, (0, 3)) - subarray.add_dim_range(1, (0, 3)) - result = array.read_subarray(subarray) - if sparse: - # Construct the expected result - data_d1, data_d2 = np.meshgrid( - np.arange(4, dtype=np.int32), - np.arange(4, dtype=np.int32), - indexing="ij", - ) - expected = { - "d1": data_d1.flatten(), - "d2": data_d2.flatten(), - "a1": self.data_a1.flatten(), - "a2": self.data_a2.flatten(), - } - else: - expected = {"a1": self.data_a1, "a2": self.data_a2} - assert_dict_arrays_equal(result, expected, not sparse) - - def test_read_mixed_ranges(self, array_uri): - with tiledb.open(array_uri) as array: - sparse = array.schema.sparse - subarray = tiledb.Subarray(array) - subarray.add_dim_range(0, (0, 1)) - result = array.read_subarray(subarray) - if sparse: - data_d1, data_d2 = np.meshgrid( - np.arange(2, dtype=np.int32), - np.arange(4, dtype=np.int32), - indexing="ij", - ) - expected = { - "d1": data_d1.flatten(), - "d2": data_d2.flatten(), - "a1": self.data_a1[0:2, :].flatten(), - "a2": self.data_a2[0:2, :].flatten(), - } - else: - expected = {"a1": self.data_a1[0:2, :], "a2": self.data_a2[0:2, :]} - assert_dict_arrays_equal(result, expected, not sparse) - - -@pytest.mark.parametrize("sparse", (True, False)) -class TestReadSubarrayNegativeDomain2D(DiskTestCase): - data_a1 = np.random.rand(121).reshape(11, 11) - data_a2 = np.random.randint(-1000, 1000, (11, 11), dtype=np.int16) - - @pytest.fixture - def array_uri(self, sparse): - """Create TileDB array, write data, and return the URI.""" - suffix = "_sparse" if sparse else "_dense" - uri = self.path(f"read_subarray_{suffix}") - dim1 = tiledb.Dim(name="d1", domain=(-5, 5), tile=4, dtype=np.int32) - dim2 = tiledb.Dim(name="d2", domain=(-5, 5), tile=4, dtype=np.int32) - schema = tiledb.ArraySchema( - domain=tiledb.Domain(dim1, dim2), - attrs=[ - tiledb.Attr(name="a1", dtype=np.float64), - tiledb.Attr(name="a2", dtype=np.int16), - ], - sparse=sparse, - ) - tiledb.Array.create(uri, schema) - if sparse: - data_d1, data_d2 = np.meshgrid( - np.arange(-5, 6), np.arange(-5, 6), indexing="ij" - ) - with tiledb.open(uri, "w") as array: - array[data_d1.flatten(), data_d2.flatten()] = { - "a1": self.data_a1, - "a2": self.data_a2, - } - else: - with tiledb.open(uri, "w") as array: - array[...] = {"a1": self.data_a1, "a2": self.data_a2} - - return uri - - def test_read_full_array(self, array_uri): - with tiledb.open(array_uri) as array: - sparse = array.schema.sparse - subarray = tiledb.Subarray(array) - subarray.add_dim_range(0, (-5, 5)) - subarray.add_dim_range(1, (-5, 5)) - result = array.read_subarray(subarray) - if sparse: - # Construct the expected result - data_d1, data_d2 = np.meshgrid( - np.arange(-5, 6, dtype=np.int32), - np.arange(-5, 6, dtype=np.int32), - indexing="ij", - ) - expected = { - "d1": data_d1.flatten(), - "d2": data_d2.flatten(), - "a1": self.data_a1.flatten(), - "a2": self.data_a2.flatten(), - } - else: - expected = {"a1": self.data_a1, "a2": self.data_a2} - assert_dict_arrays_equal(result, expected, not sparse) - - def test_read_mixed_ranges(self, array_uri): - with tiledb.open(array_uri) as array: - sparse = array.schema.sparse - subarray = tiledb.Subarray(array) - subarray.add_dim_range(1, (-1, 2)) - result = array.read_subarray(subarray) - - if sparse: - data_d1, data_d2 = np.meshgrid( - np.arange(-5, 6, dtype=np.int32), - np.arange(-1, 3, dtype=np.int32), - indexing="ij", - ) - expected = { - "d1": data_d1.flatten(), - "d2": data_d2.flatten(), - "a1": self.data_a1[:, 4:8].flatten(), - "a2": self.data_a2[:, 4:8].flatten(), - } - else: - expected = {"a1": self.data_a1[:, 4:8], "a2": self.data_a2[:, 4:8]} - assert_dict_arrays_equal(result, expected, not sparse) - - -class TestReadSubarraySparseArray1D(DiskTestCase): - data_dim1 = np.linspace(-1.0, 1.0, 5) - data_attr1 = np.arange(5, dtype=np.uint32) - - @pytest.fixture - def array_uri(self): - uri = self.path("test_read_subarray_array_1d") - schema = tiledb.ArraySchema( - domain=tiledb.Domain( - tiledb.Dim(name="d1", domain=(-1.0, 1.0), tile=2.0, dtype=np.float64) - ), - attrs=[tiledb.Attr(name="a1", dtype=np.uint32)], - sparse=True, - ) - tiledb.Array.create(uri, schema) - with tiledb.open(uri, "w") as array: - array[self.data_dim1] = self.data_attr1 - return uri - - def test_read_full_array(self, array_uri): - with tiledb.open(array_uri, "r") as array: - subarray = tiledb.Subarray(array) - subarray.add_dim_range(0, (-1.0, 1.0)) - result = array.read_subarray(subarray) - - expected = OrderedDict([("d1", self.data_dim1), ("a1", self.data_attr1)]) - - assert_dict_arrays_equal(result, expected, False) - - def test_empty_result(self, array_uri): - with tiledb.open(array_uri, "r") as array: - subarray = tiledb.Subarray(array) - subarray.add_dim_range(0, (-0.9, -0.89)) - result = array.read_subarray(subarray) - - expected = OrderedDict( - [ - ("d1", np.array([], dtype=np.float64)), - ("a1", np.array([], dtype=np.uint32)), - ] - ) - assert_dict_arrays_equal(result, expected, True) diff --git a/tiledb/tests/test_repr.py b/tiledb/tests/test_repr.py deleted file mode 100644 index 1132c9a6ae..0000000000 --- a/tiledb/tests/test_repr.py +++ /dev/null @@ -1,111 +0,0 @@ -import itertools -import re -import textwrap -import warnings - -import numpy as np - -import tiledb - -from .common import ( - DiskTestCase, - fx_sparse_cell_order, # noqa: F401 -) - - -class ReprTest(DiskTestCase): - def test_attr_repr(self): - attr = tiledb.Attr(name="itsanattr", dtype=np.float64) - self.assertTrue( - re.match( - r"Attr\(name=[u]?'itsanattr', dtype='float64', var=False, nullable=False, enum_label=None\)", - repr(attr), - ) - ) - - g = dict() - exec("from tiledb import Attr; from numpy import float64", g) - self.assertEqual(eval(repr(attr), g), attr) - - def test_dim_repr(self): - dtype_set = [bytes, np.bytes_] - opts = { - None: None, - "var": True, - "domain": (None, None), - "filters": [tiledb.GzipFilter()], - } - - dim_test_imports = textwrap.dedent( - """ - from tiledb import Dim, FilterList, GzipFilter - import numpy - from numpy import float64 - """ - ) - - for dtype in dtype_set: - opt_choices = [ - itertools.combinations(opts.keys(), r=n) - for n in range(1, len(opts) + 1) - ] - for opt_set in itertools.chain(*opt_choices): - opt_kwarg = {k: opts[k] for k in opt_set if k} - g = dict() - exec(dim_test_imports, g) - - dim = tiledb.Dim(name="d1", dtype=dtype, **opt_kwarg) - self.assertEqual(eval(repr(dim), g), dim) - - # test datetime - g = dict() - exec(dim_test_imports, g) - dim = tiledb.Dim( - name="d1", - domain=(np.datetime64("2010-01-01"), np.datetime64("2020")), - tile=2, - dtype=np.datetime64("", "D"), - ) - self.assertEqual(eval(repr(dim), g), dim) - - def test_arrayschema_repr(self, fx_sparse_cell_order): # noqa: F811 - filters = tiledb.FilterList([tiledb.ZstdFilter(-1)]) - for sparse in [False, True]: - cell_order = fx_sparse_cell_order if sparse else None - domain = tiledb.Domain( - tiledb.Dim(domain=(1, 8), tile=2), tiledb.Dim(domain=(1, 8), tile=2) - ) - a1 = tiledb.Attr("val", dtype="f8", filters=filters) - orig_schema = tiledb.ArraySchema( - domain=domain, attrs=(a1,), sparse=sparse, cell_order=cell_order - ) - - schema_repr = repr(orig_schema) - g = dict() - setup = "from tiledb import *\n" "import numpy as np\n" - - exec(setup, g) - new_schema = None - try: - new_schema = eval(schema_repr, g) - except Exception: - warn_str = ( - """Exception during ReprTest schema eval""" - + """, schema string was:\n""" - + """'''""" - + """\n{}\n'''""".format(schema_repr) - ) - warnings.warn(warn_str) - raise - - self.assertEqual(new_schema, orig_schema) - - def test_arrayschema_repr_hilbert(self): - domain = tiledb.Domain(tiledb.Dim(domain=(1, 8), tile=2)) - a = tiledb.Attr("a", dtype="f8") - schema = tiledb.ArraySchema( - domain=domain, attrs=(a,), cell_order="hilbert", sparse=True - ) - - assert schema.cell_order == "hilbert" - assert schema.tile_order is None diff --git a/tiledb/tests/test_serialization.cc b/tiledb/tests/test_serialization.cc deleted file mode 100644 index 31df7663a0..0000000000 --- a/tiledb/tests/test_serialization.cc +++ /dev/null @@ -1,95 +0,0 @@ - -#include -#include -#include -#include - -#include - -#define TILEDB_DEPRECATED -#define TILEDB_DEPRECATED_EXPORT - -#include "../util.h" -#include // C++ -#include // C - -#if !defined(NDEBUG) -// #include "debug.cc" -#endif - -namespace tiledbpy { - -using namespace std; -using namespace tiledb; -namespace py = pybind11; -using namespace pybind11::literals; - -class PySerializationTest { - -public: - static py::bytes create_serialized_test_query(py::object pyctx, - py::object pyarray) { - int rc; - - tiledb_ctx_t *ctx; - tiledb_array_t *array; - - ctx = (py::capsule)pyctx.attr("__capsule__")(); - if (ctx == nullptr) - TPY_ERROR_LOC("Invalid context pointer."); - - tiledb_ctx_alloc(NULL, &ctx); - array = (py::capsule)pyarray.attr("__capsule__")(); - if (array == nullptr) - TPY_ERROR_LOC("Invalid array pointer."); - - uint32_t subarray_v[] = {3, 7}; - int64_t data[5]; - uint64_t data_size = sizeof(data); - - tiledb_subarray_t *subarray; - tiledb_subarray_alloc(ctx, array, &subarray); - tiledb_subarray_set_subarray(ctx, subarray, &subarray_v); - - tiledb_query_t *query; - tiledb_query_alloc(ctx, array, TILEDB_READ, &query); - tiledb_query_set_subarray_t(ctx, query, subarray); - tiledb_query_set_layout(ctx, query, TILEDB_UNORDERED); - tiledb_query_set_data_buffer(ctx, query, "", data, &data_size); - - tiledb_buffer_list_t *buff_list; - tiledb_buffer_t *buff; - - rc = tiledb_serialize_query(ctx, query, TILEDB_CAPNP, 1, &buff_list); - if (rc == TILEDB_ERR) - TPY_ERROR_LOC("Could not serialize the query."); - - rc = tiledb_buffer_list_flatten(ctx, buff_list, &buff); - if (rc == TILEDB_ERR) - TPY_ERROR_LOC("Could not flatten the buffer list."); - - void *buff_data; - uint64_t buff_num_bytes; - - rc = tiledb_buffer_get_data(ctx, buff, &buff_data, &buff_num_bytes); - if (rc == TILEDB_ERR) - TPY_ERROR_LOC("Could not get the data from the buffer."); - - py::bytes output((char *)buff_data, buff_num_bytes); - - tiledb_buffer_free(&buff); - tiledb_buffer_list_free(&buff_list); - tiledb_subarray_free(&subarray); - tiledb_query_free(&query); - - return output; - } -}; - -void init_test_serialization(py::module &m) { - py::class_(m, "test_serialization") - .def_static("create_serialized_test_query", - &PySerializationTest::create_serialized_test_query); -} - -}; // namespace tiledbpy diff --git a/tiledb/tests/test_serialization.py b/tiledb/tests/test_serialization.py deleted file mode 100644 index 75529904db..0000000000 --- a/tiledb/tests/test_serialization.py +++ /dev/null @@ -1,30 +0,0 @@ -import numpy as np -import pytest - -import tiledb - -from .common import DiskTestCase - -try: - from tiledb.main import test_serialization as ser_test -except ImportError: - pytest.skip("Serialization not enabled.", allow_module_level=True) - - -class SerializationTest(DiskTestCase): - def test_query_deserialization(self): - path = self.path("test_query_deserialization") - dom = tiledb.Domain(tiledb.Dim(domain=(1, 10), dtype=np.uint32)) - attrs = [tiledb.Attr(dtype=np.int64)] - schema = tiledb.ArraySchema(domain=dom, attrs=attrs, sparse=True) - tiledb.Array.create(path, schema) - - data = np.random.randint(-5, 5, 10) - - with tiledb.open(path, "w") as A: - A[np.arange(1, 11)] = data - - with tiledb.open(path, "r") as A: - ctx = tiledb.default_ctx() - ser_qry = ser_test.create_serialized_test_query(ctx, A) - np.testing.assert_array_equal(A.query()[3:8][""], A.set_query(ser_qry)[""]) diff --git a/tiledb/tests/test_stats.py b/tiledb/tests/test_stats.py deleted file mode 100644 index b94dac8097..0000000000 --- a/tiledb/tests/test_stats.py +++ /dev/null @@ -1,70 +0,0 @@ -import numpy as np -from numpy.testing import assert_array_equal - -import tiledb - -from .common import ( - DiskTestCase, - assert_captured, -) - - -class StatsTest(DiskTestCase): - def test_stats(self, capfd): - tiledb.stats_enable() - tiledb.stats_reset() - tiledb.stats_disable() - - tiledb.stats_enable() - - path = self.path("test_stats") - - with tiledb.from_numpy(path, np.arange(10)) as T: - pass - - # basic output check for read stats - tiledb.stats_reset() - with tiledb.open(path) as T: - tiledb.stats_enable() - assert_array_equal(T, np.arange(10)) - - # test stdout version - tiledb.stats_dump() - assert_captured(capfd, "TileDB Embedded Version:") - - # test string version - stats_v = tiledb.stats_dump(print_out=False) - if tiledb.libtiledb.version() < (2, 3): - self.assertTrue("==== READ ====" in stats_v) - else: - self.assertTrue('"timers": {' in stats_v) - self.assertTrue("==== Python Stats ====" in stats_v) - - stats_quiet = tiledb.stats_dump(print_out=False, verbose=False) - if tiledb.libtiledb.version() < (2, 3): - self.assertTrue("Time to load array schema" not in stats_quiet) - - # TODO seems to be a regression, no JSON - stats_json = tiledb.stats_dump(json=True) - self.assertTrue(isinstance(stats_json, dict)) - self.assertTrue("CONSOLIDATE_COPY_ARRAY" in stats_json) - else: - self.assertTrue("==== READ ====" in stats_quiet) - - def test_stats_include_python_json(self): - tiledb.stats_enable() - - path = self.path("test_stats") - - with tiledb.from_numpy(path, np.arange(10)) as T: - pass - - tiledb.stats_reset() - with tiledb.open(path) as T: - tiledb.stats_enable() - assert_array_equal(T, np.arange(10)) - json_stats = tiledb.stats_dump(print_out=False, json=True) - assert isinstance(json_stats, str) - assert "python" in json_stats - assert "timers" in json_stats - assert "counters" in json_stats diff --git a/tiledb/tests/test_subarray.py b/tiledb/tests/test_subarray.py deleted file mode 100644 index 2f2d11b580..0000000000 --- a/tiledb/tests/test_subarray.py +++ /dev/null @@ -1,127 +0,0 @@ -import numpy as np -import pytest - -import tiledb -from tiledb import TileDBError -from tiledb.tests.common import DiskTestCase - - -class SubarrayTest(DiskTestCase): - def test_add_range(self): - dim1 = tiledb.Dim("row", domain=(1, 10)) - dim2 = tiledb.Dim("col", domain=(1, 10)) - dom = tiledb.Domain(dim1, dim2) - att = tiledb.Attr("val", dtype=np.uint64) - schema = tiledb.ArraySchema(domain=dom, attrs=(att,)) - uri = self.path("dense_array") - tiledb.Array.create(uri, schema) - - with tiledb.open(uri, "w") as array: - array[1:5, 1:5] = np.reshape(np.arange(1, 17, dtype=np.float64), (4, 4)) - - with tiledb.open(uri, "r") as array: - subarray1 = tiledb.Subarray(array) - - # Check number of ranges: each dimension should have the default range. - assert subarray1.num_dim_ranges(0) == 1 - assert subarray1.num_dim_ranges(1) == 1 - assert subarray1.shape() == (10, 10) - - # Add range to first dim and check still only 1 range (replace default). - subarray1.add_dim_range(0, (1, 2)) - assert subarray1.num_dim_ranges(0) == 1 - assert subarray1.shape() == (2, 10) - - # Add additional range to first dim and check 2 ranges. - subarray1.add_dim_range(0, (4, 4)) - assert subarray1.num_dim_ranges(0) == 2 - assert subarray1.shape() == (3, 10) - - def test_add_ranges_basic(self): - uri = self.path("test_pyquery_basic") - with tiledb.from_numpy(uri, np.random.rand(4)): - pass - - with tiledb.open(uri) as array: - subarray = tiledb.Subarray(array) - - subarray.add_ranges([[(0, 3)]]) - - with self.assertRaises(TileDBError): - subarray.add_ranges([[(0, 3.0)]]) - - subarray.add_ranges([[(0, np.int32(3))]]) - - with self.assertRaises(TileDBError): - subarray.add_ranges([[(3, "a")]]) - - with self.assertRaisesRegex( - TileDBError, - "Failed to cast dim range '\\(1.2344, 5.6789\\)' to dim type UINT64.*$", - ): - subarray.add_ranges([[(1.2344, 5.6789)]]) - - with self.assertRaisesRegex( - TileDBError, - "Failed to cast dim range '\\('aa', 'bbbb'\\)' to dim type UINT64.*$", - ): - subarray.add_ranges([[("aa", "bbbb")]]) - - @pytest.mark.skipif( - tiledb.libtiledb.version()[0] == 2 and tiledb.libtiledb.version()[1] < 15, - reason="dimension labels requires libtiledb version 2.15 or greater", - ) - def test_add_label_ranges_1d(self): - # Create array schema with dimension labels - dim = tiledb.Dim("d1", domain=(1, 10), dtype=np.uint32) - dom = tiledb.Domain(dim) - att = tiledb.Attr("a1", dtype=np.int64) - dim_labels = {0: {"l1": dim.create_label_schema("increasing", np.int64)}} - schema = tiledb.ArraySchema(domain=dom, attrs=(att,), dim_labels=dim_labels) - - # Create array - uri = self.path("dense_array_with_label") - tiledb.Array.create(uri, schema) - - # Add dimension label ranges - with tiledb.open(uri, "r") as array: - subarray1 = tiledb.Subarray(array) - assert subarray1.num_dim_ranges(0) == 1 - - subarray1.add_label_range("l1", (-1, 1)) - assert subarray1.num_dim_ranges(0) == 0 - assert subarray1.num_label_ranges("l1") == 1 - - def test_copy_ranges(self): - # Create array schema with dimension labels - d1 = tiledb.Dim("d1", domain=(1, 10), dtype=np.uint32) - d2 = tiledb.Dim("d2", domain=(1, 10), dtype=np.uint32) - d3 = tiledb.Dim("d3", domain=(1, 10), dtype=np.uint32) - d4 = tiledb.Dim("d4", domain=(1, 10), dtype=np.uint32) - dom = tiledb.Domain(d1, d2, d3, d4) - att = tiledb.Attr("a1", dtype=np.int64) - schema = tiledb.ArraySchema(domain=dom, attrs=(att,)) - - # Create array - uri = self.path("array4d") - tiledb.Array.create(uri, schema) - - # Add ranges (1, 1) and (3, 3) to each dimension of the subarray. - with tiledb.open(uri, "r") as array: - subarray1 = tiledb.Subarray(array) - subarray1.add_ranges( - (((1, 1), (3, 3)), ((1, 1), (3, 3)), ((1, 1), (3, 3)), ((1, 1), (3, 3))) - ) - assert subarray1.num_dim_ranges(0) == 2 - assert subarray1.num_dim_ranges(1) == 2 - assert subarray1.num_dim_ranges(2) == 2 - assert subarray1.num_dim_ranges(3) == 2 - - # Should copy ranges from d1 and d3. - # All other dimensions should only have default range. - subarray2 = tiledb.Subarray(array) - subarray2.copy_ranges(subarray1, [0, 2]) - assert subarray2.num_dim_ranges(0) == 2 - assert subarray2.num_dim_ranges(1) == 1 - assert subarray2.num_dim_ranges(2) == 2 - assert subarray2.num_dim_ranges(3) == 1 diff --git a/tiledb/tests/test_timestamp_overrides.py b/tiledb/tests/test_timestamp_overrides.py deleted file mode 100644 index f0a114ea3e..0000000000 --- a/tiledb/tests/test_timestamp_overrides.py +++ /dev/null @@ -1,156 +0,0 @@ -import datetime -import os -import subprocess -import sys - -import numpy as np -import pytest - -import tiledb -from tiledb.main import PyFragmentInfo -from tiledb.tests.common import DiskTestCase - - -def has_libfaketime(): - try: - subprocess.check_output(["which", "faketime"]) - return True - except subprocess.CalledProcessError: - return False - - -@pytest.mark.skipif( - sys.platform == "win32" or not has_libfaketime(), - reason=f"libfaketime not installed. {'Not supported on Windows.' if sys.platform == 'win32' else ''}", -) -class TestTimestampOverrides(DiskTestCase): - def test_timestamp_overrides(self): - uri_fragments = self.path("time_test_fragments") - uri_group_metadata = self.path("time_test_group_metadata") - - python_exe = sys.executable - cmd = ( - f"from tiledb.tests.test_timestamp_overrides import TestTimestampOverrides; " - f"TestTimestampOverrides().helper_fragments('{uri_fragments}'); " - f"TestTimestampOverrides().helper_group_metadata('{uri_group_metadata}')" - ) - test_path = os.path.dirname(os.path.abspath(__file__)) - - try: - # "+x0" is the time multiplier, which makes the time freeze during the test - subprocess.check_output( - ["faketime", "-f", "+x0", python_exe, "-c", cmd], cwd=test_path - ) - except subprocess.CalledProcessError as e: - raise e - - def helper_fragments(self, uri): - start_datetime = datetime.datetime.now() - - fragments = 5 - A = np.zeros(fragments) - - dom = tiledb.Domain(tiledb.Dim(domain=(0, 4), tile=fragments, dtype=np.int64)) - att = tiledb.Attr(dtype=A.dtype) - schema = tiledb.ArraySchema(domain=dom, attrs=(att,)) - - tiledb.DenseArray.create(uri, schema) - - uris_seen = set() - chronological_order = [] - - for fragment_idx in range(fragments): - with tiledb.DenseArray(uri, mode="w") as T: - T[fragment_idx : fragment_idx + 1] = fragment_idx - - # Read the data back immediately after writing to ensure it is correct - with tiledb.DenseArray(uri, mode="r") as T: - read_data = T[fragment_idx : fragment_idx + 1] - self.assertEqual(read_data, np.array([fragment_idx])) - - fragment_info = PyFragmentInfo(uri, schema, False, tiledb.default_ctx()) - uris = fragment_info.get_uri() - new_uri = set(uris) - uris_seen - uris_seen.update(uris) - chronological_order.extend(new_uri) - - end_datetime = datetime.datetime.now() - self.assertEqual(start_datetime, end_datetime) - - fragment_info = PyFragmentInfo(uri, schema, False, tiledb.default_ctx()) - final_uris = fragment_info.get_uri() - - # Keep only the last part of the uris - final_uris = [os.path.basename(uri) for uri in final_uris] - chronological_order = [os.path.basename(uri) for uri in chronological_order] - - # Check that timestamps are the same (faketime is working) - timestamps = set() - for uri in final_uris: - parts = uri.split("_") - timestamps.add((parts[2], parts[3])) - - self.assertEqual(len(timestamps), 1) - - # Check that UUIDs are unique - uuids = set() - for uri in final_uris: - parts = uri.split("_") - uuids.add(parts[4]) - - self.assertEqual(len(uuids), fragments) - - # Ensure that write order is correct - self.assertEqual(chronological_order, sorted(final_uris)) - - def helper_group_metadata(self, uri): - vfs = tiledb.VFS() - - start_datetime = datetime.datetime.now() - - tiledb.Group.create(uri) - loop_count = 10 - uris_seen = set() - chronological_order = [] - meta_path = f"{uri}/__meta" - - for i in range(loop_count): - with tiledb.Group(uri, "w") as grp: - grp.meta["meta"] = i - - # Read the data back immediately after writing to ensure it is correct - with tiledb.Group(uri, "r") as grp: - self.assertEqual(grp.meta["meta"], i) - - uris = vfs.ls(meta_path) - new_uri = set(uris) - uris_seen - uris_seen.update(uris) - chronological_order.extend(new_uri) - - end_datetime = datetime.datetime.now() - self.assertEqual(start_datetime, end_datetime) - - final_uris = vfs.ls(meta_path) - - # Keep only the last part of the uris - final_uris = [os.path.basename(uri) for uri in final_uris] - chronological_order = [os.path.basename(uri) for uri in chronological_order] - - # Check that timestamps are the same (faketime is working) - timestamps = set() - for uri in final_uris: - parts = uri.split("_") - timestamps.add((parts[2], parts[3])) - - self.assertEqual(len(timestamps), 1) - - # Check that UUIDs are unique - uuids = set() - for uri in final_uris: - parts = uri.split("_") - uuids.add(parts[4]) - - self.assertEqual(len(uuids), loop_count) - - # Ensure that write order is correct - self.assertEqual(chronological_order, sorted(final_uris)) diff --git a/tiledb/tests/test_util.py b/tiledb/tests/test_util.py deleted file mode 100644 index 297e18800f..0000000000 --- a/tiledb/tests/test_util.py +++ /dev/null @@ -1,182 +0,0 @@ -import tempfile -from pathlib import Path - -import numpy as np -from numpy.testing import assert_array_equal - -import tiledb - -from .common import DiskTestCase - - -class UtilTest(DiskTestCase): - def test_empty_like(self): - arr = np.zeros((10, 10), dtype=np.float32) - - def check_schema(self, s): - self.assertEqual(s.attr(0).dtype, np.float32) - self.assertEqual(s.shape, (10, 10)) - self.assertEqual(s.domain.dim(0).shape, (10,)) - self.assertEqual(s.domain.dim(1).shape, (10,)) - - with self.assertRaises(ValueError): - tiledb.schema_like("", None) - - schema = tiledb.schema_like(arr, tile=1) - self.assertIsInstance(schema, tiledb.ArraySchema) - check_schema(self, schema) - - uri = self.path("empty_like") - T = tiledb.empty_like(uri, arr) - check_schema(self, T.schema) - self.assertEqual(T.shape, arr.shape) - self.assertEqual(T.dtype, arr.dtype) - - uri = self.path("empty_like_shape") - T = tiledb.empty_like(uri, arr.shape, dtype=arr.dtype) - check_schema(self, T.schema) - self.assertEqual(T.shape, arr.shape) - self.assertEqual(T.dtype, arr.dtype) - - # test a fake object with .shape, .ndim, .dtype - class FakeArray(object): - def __init__(self, shape, dtype): - self.shape = shape - self.ndim = len(shape) - self.dtype = dtype - - fake = FakeArray((3, 3), np.int16) - schema2 = tiledb.empty_like(self.path("fake_like"), fake) - self.assertIsInstance(schema2, tiledb.Array) - self.assertEqual(schema2.shape, fake.shape) - self.assertEqual(schema2.dtype, fake.dtype) - self.assertEqual(schema2.ndim, fake.ndim) - - # test passing shape and dtype directly - schema3 = tiledb.schema_like(shape=(4, 4), dtype=np.float32) - self.assertIsInstance(schema3, tiledb.ArraySchema) - self.assertEqual(schema3.attr(0).dtype, np.float32) - self.assertEqual(schema3.domain.dim(0).tile, 4) - schema3 = tiledb.schema_like(shape=(4, 4), dtype=np.float32, tile=1) - self.assertEqual(schema3.domain.dim(0).tile, 1) - - def test_open(self): - uri = self.path("load") - with tiledb.from_numpy(uri, np.array(np.arange(3))) as T: - with tiledb.open(uri) as T2: - self.assertEqual(T.schema, T2.schema) - assert_array_equal(T, T2) - - def test_save(self): - uri = self.path("test_save") - arr = np.array(np.arange(3)) - with tiledb.save(uri, arr): - with tiledb.open(uri) as T: - assert_array_equal(arr, T) - - def test_array_exists(self): - with tempfile.NamedTemporaryFile() as tmpfn: - self.assertFalse(tiledb.array_exists(tmpfn.name)) - - uri = self.path("test_array_exists_dense") - with tiledb.from_numpy(uri, np.arange(0, 5)) as T: - self.assertTrue(tiledb.array_exists(uri)) - self.assertTrue(tiledb.array_exists(uri, isdense=True)) - self.assertFalse(tiledb.array_exists(uri, issparse=True)) - - uri = self.path("test_array_exists_sparse") - dom = tiledb.Domain(tiledb.Dim(domain=(0, 3), tile=4, dtype=int)) - att = tiledb.Attr(dtype=int) - schema = tiledb.ArraySchema(domain=dom, attrs=(att,), sparse=True) - tiledb.Array.create(uri, schema) - - with tiledb.SparseArray(uri, mode="w") as T: - T[[0, 1]] = np.array([0, 1]) - - self.assertTrue(tiledb.array_exists(uri)) - self.assertTrue(tiledb.array_exists(uri, issparse=True)) - self.assertFalse(tiledb.array_exists(uri, isdense=True)) - - uri3 = self.path("test_array_exists_deleted") - with tiledb.from_numpy(uri3, np.arange(0, 5)) as T: - self.assertTrue(tiledb.array_exists(uri3)) - tiledb.Array.delete_array(uri3) - self.assertFalse(tiledb.array_exists(uri3)) - - # test with context - ctx = tiledb.Ctx() - self.assertFalse(tiledb.array_exists(uri3, ctx=ctx)) - with tiledb.from_numpy(uri3, np.arange(0, 5), ctx=ctx) as T: - self.assertTrue(tiledb.array_exists(uri3, ctx=ctx)) - - def test_ls(self): - def create_array(array_name, sparse): - dom = tiledb.Domain( - tiledb.Dim(name="rows", domain=(1, 4), tile=4, dtype=np.int32), - tiledb.Dim(name="cols", domain=(1, 4), tile=4, dtype=np.int32), - ) - schema = tiledb.ArraySchema( - domain=dom, sparse=sparse, attrs=[tiledb.Attr(name="a", dtype=np.int32)] - ) - tiledb.Array.create(array_name, schema) - - uri = tempfile.mkdtemp() - - tiledb.group_create(str(Path(uri) / "my_group")) - tiledb.group_create(str(Path(uri) / "my_group" / "dense_arrays")) - tiledb.group_create(str(Path(uri) / "my_group" / "sparse_arrays")) - - create_array(str(Path(uri) / "my_group" / "dense_arrays" / "array_A"), False) - create_array(str(Path(uri) / "my_group" / "dense_arrays" / "array_B"), False) - create_array(str(Path(uri) / "my_group" / "sparse_arrays" / "array_C"), True) - create_array(str(Path(uri) / "my_group" / "sparse_arrays" / "array_D"), True) - - group_uri = "{}/my_group".format(uri) - # List children - results = [] - tiledb.ls( - group_uri, - lambda obj_path, obj_type: results.append((Path(obj_path).name, obj_type)), - ) - self.assertEqual( - results, - [ - ("dense_arrays", "group"), - ("sparse_arrays", "group"), - ], - ) - - # List children of a group - dense_arrays_uri = "{}/my_group/dense_arrays".format(uri) - results = [] - tiledb.ls( - dense_arrays_uri, - lambda obj_path, obj_type: results.append((Path(obj_path).name, obj_type)), - ) - self.assertEqual( - results, - [ - ("array_A", "array"), - ("array_B", "array"), - ], - ) - - # test with a callback that always throws an exception to see if it is propagated - with self.assertRaises(tiledb.TileDBError) as excinfo: - tiledb.ls(dense_arrays_uri, lambda x, y: 1 / 0) - assert "ZeroDivisionError: division by zero" in str(excinfo.value) - - def test_object_type(self): - uri = self.path("test_object_type") - - # None case - self.assertIsNone(tiledb.object_type(uri)) - - # Array case - with tiledb.from_numpy(uri, np.arange(0, 5)) as T: - self.assertEqual(tiledb.object_type(uri), "array") - tiledb.Array.delete_array(uri) - - # Group case - tiledb.group_create(uri) - self.assertEqual(tiledb.object_type(uri), "group") diff --git a/tiledb/tests/test_vfs.py b/tiledb/tests/test_vfs.py deleted file mode 100644 index 8219539a02..0000000000 --- a/tiledb/tests/test_vfs.py +++ /dev/null @@ -1,445 +0,0 @@ -import io -import os -import pathlib -import random -import sys - -import numpy as np -import pytest - -import tiledb - -from .common import DiskTestCase, rand_utf8 - - -class TestVFS(DiskTestCase): - def test_supports(self): - vfs = tiledb.VFS() - - self.assertTrue(vfs.supports("file")) - self.assertIsInstance(vfs.supports("s3"), bool) - self.assertIsInstance(vfs.supports("hdfs"), bool) - self.assertIsInstance(vfs.supports("gcs"), bool) - self.assertIsInstance(vfs.supports("azure"), bool) - - with self.assertRaises(ValueError): - vfs.supports("invalid") - - def test_vfs_config(self): - opt = {"region": "us-west-x1234"} - params = [opt, tiledb.Config(opt)] - for param in params: - vfs = tiledb.VFS(param) - assert vfs.config()["region"] == opt["region"] - - def test_dir(self): - vfs = tiledb.VFS() - - dir = self.path("foo") - self.assertFalse(vfs.is_dir(dir)) - - # create - vfs.create_dir(dir) - if pytest.tiledb_vfs != "s3": - self.assertTrue(vfs.is_dir(dir)) - - # remove - vfs.remove_dir(dir) - self.assertFalse(vfs.is_dir(dir)) - - # create nested path - dir = self.path("foo/bar") - if pytest.tiledb_vfs != "s3": - # this fails locally because "foo" base path does not exist - # this will not fail on s3 because there is no concept of directory - with self.assertRaises(tiledb.TileDBError): - vfs.create_dir(dir) - - vfs.create_dir(self.path("foo")) - vfs.create_dir(self.path("foo/bar")) - if pytest.tiledb_vfs != "s3": - self.assertTrue(vfs.is_dir(dir)) - - def test_file(self): - vfs = tiledb.VFS() - - file = self.path("foo") - self.assertFalse(vfs.is_file(file)) - - # create - vfs.touch(file) - self.assertTrue(vfs.is_file(file)) - - # remove - vfs.remove_file(file) - self.assertFalse(vfs.is_file(file)) - - # check nested path - file = self.path("foo/bar") - if pytest.tiledb_vfs != "s3": - # this fails locally because "foo" base path does not exist - # this will not fail on s3 because there is no concept of directory - with self.assertRaises(tiledb.TileDBError): - vfs.touch(file) - - def test_move(self): - vfs = tiledb.VFS() - - vfs.create_dir(self.path("foo")) - vfs.create_dir(self.path("bar")) - vfs.touch(self.path("bar/baz")) - - self.assertTrue(vfs.is_file(self.path("bar/baz"))) - - vfs.move_file(self.path("bar/baz"), self.path("foo/baz")) - - self.assertFalse(vfs.is_file(self.path("bar/baz"))) - self.assertTrue(vfs.is_file(self.path("foo/baz"))) - - # moving to invalid dir should raise an error - if pytest.tiledb_vfs != "s3": - # this fails locally because "foo" base path does not exist - # this will not fail on s3 because there is no concept of directory - with self.assertRaises(tiledb.TileDBError): - vfs.move_dir(self.path("foo/baz"), self.path("do_not_exist/baz")) - - @pytest.mark.skipif( - sys.platform == "win32", - reason="VFS copy commands from core are not supported on Windows", - ) - def test_copy(self): - vfs = tiledb.VFS() - - vfs.create_dir(self.path("foo")) - vfs.create_dir(self.path("bar")) - vfs.touch(self.path("foo/baz")) - - self.assertTrue(vfs.is_file(self.path("foo/baz"))) - - vfs.copy_file(self.path("foo/baz"), self.path("bar/baz")) - - self.assertTrue(vfs.is_file(self.path("foo/baz"))) - self.assertTrue(vfs.is_file(self.path("bar/baz"))) - - vfs.copy_dir(self.path("foo"), self.path("baz")) - - self.assertTrue(vfs.is_file(self.path("baz/baz"))) - - # copying to invalid dir should raise an error - if pytest.tiledb_vfs != "s3": - # this fails locally because "foo" base path does not exist - # this will not fail on s3 because there is no concept of directory - with self.assertRaises(tiledb.TileDBError): - vfs.copy_dir(self.path("foo/baz"), self.path("do_not_exist/baz")) - - def test_write_read(self): - vfs = tiledb.VFS() - - buffer = b"bar" - fio = vfs.open(pathlib.Path(self.path("foo")), "wb") - fio.write(buffer) - self.assertEqual(vfs.file_size(self.path("foo")), 3) - - fio = vfs.open(self.path("foo").encode("utf-8"), "rb") - self.assertEqual(fio.read(3), buffer) - # test read with numpy integers - fio.seek(np.int64(0)) - self.assertEqual(fio.read(np.int32(3)), buffer) - fio.seek(np.int64(0)) - self.assertEqual(fio.read(np.uint64(3)), buffer) - fio.close() - - # write / read empty input - fio = vfs.open(self.path("baz"), "wb") - fio.write(b"") - fio.close() - self.assertEqual(vfs.file_size(self.path("baz")), 0) - - fio = vfs.open(self.path("baz"), "rb") - self.assertEqual(fio.read(0), b"") - fio.close() - - # read from file that does not exist - with self.assertRaises(tiledb.TileDBError): - vfs.open(self.path("do_not_exist"), "rb") - - def test_io(self): - vfs = tiledb.VFS() - - buffer = b"0123456789" - with tiledb.FileIO(vfs, self.path("foo"), mode="wb") as fio: - fio.write(buffer) - fio.flush() - self.assertEqual(fio.tell(), len(buffer)) - - with tiledb.FileIO(vfs, self.path("foo"), mode="rb") as fio: - with self.assertRaises(IOError): - fio.write(b"foo") - - self.assertEqual(vfs.file_size(self.path("foo")), len(buffer)) - - fio = tiledb.FileIO(vfs, self.path("foo"), mode="rb") - self.assertEqual(fio.read(3), b"012") - self.assertEqual(fio.tell(), 3) - self.assertEqual(fio.read(3), b"345") - self.assertEqual(fio.tell(), 6) - self.assertEqual(fio.read(10), b"6789") - self.assertEqual(fio.tell(), 10) - - # seek from beginning - fio.seek(0) - self.assertEqual(fio.tell(), 0) - self.assertEqual(fio.read(), buffer) - - # seek must be positive when SEEK_SET - with self.assertRaises(ValueError): - fio.seek(-1, 0) - - # seek from current positfion - fio.seek(5) - self.assertEqual(fio.tell(), 5) - fio.seek(3, 1) - self.assertEqual(fio.tell(), 8) - fio.seek(-3, 1) - self.assertEqual(fio.tell(), 5) - - # seek from end - fio.seek(-4, 2) - self.assertEqual(fio.tell(), 6) - - # Test readall - fio.seek(0) - self.assertEqual(fio.readall(), buffer) - self.assertEqual(fio.tell(), 10) - - fio.seek(5) - self.assertEqual(fio.readall(), buffer[5:]) - self.assertEqual(fio.readall(), b"") - - # Test readinto - fio.seek(0) - test_bytes = bytearray(10) - self.assertEqual(fio.readinto(test_bytes), 10) - self.assertEqual(test_bytes, buffer) - - # Reading from the end should return empty - fio.seek(0) - fio.read() - self.assertEqual(fio.read(), b"") - - # Test writing and reading lines with TextIOWrapper - lines = [rand_utf8(random.randint(0, 50)) + "\n" for _ in range(10)] - rand_uri = self.path("test_fio.rand") - with tiledb.FileIO(vfs, rand_uri, "wb") as f: - txtio = io.TextIOWrapper(f, encoding="utf-8") - txtio.writelines(lines) - txtio.flush() - - with tiledb.FileIO(vfs, rand_uri, "rb") as f2: - txtio = io.TextIOWrapper(f2, encoding="utf-8") - self.assertEqual(txtio.readlines(), lines) - - def test_sc42569_vfs_memoryview(self): - # This test is to ensure that giving np.ndarray buffer to readinto works - # when trying to write bytes that cannot be converted to float32 or int32 - vfs = tiledb.VFS() - - buffer = b"012\x00\x01" - with tiledb.FileIO(vfs, self.path("foo"), mode="wb") as fio: - fio.write(buffer) - fio.flush() - self.assertEqual(fio.tell(), len(buffer)) - - fio = tiledb.FileIO(vfs, self.path("foo"), mode="rb") - - # Test readinto with np.float32 - fio.seek(0) - test_np_array = np.empty(5, dtype=np.float32) - n_bytes = fio.readinto(test_np_array) - self.assertEqual(n_bytes, 5) - self.assertEqual(test_np_array.tobytes()[:n_bytes], buffer) - - # Test readinto with np.int32 - fio.seek(0) - test_np_array = np.empty(5, dtype=np.int32) - n_bytes = fio.readinto(test_np_array) - self.assertEqual(n_bytes, 5) - self.assertEqual(test_np_array.tobytes()[:n_bytes], buffer) - - def test_ls(self): - basepath = self.path("test_vfs_ls") - self.vfs.create_dir(basepath) - for id in (1, 2, 3): - dir = os.path.join(basepath, f"dir{id}") - self.vfs.create_dir(dir) - fname = os.path.join(basepath, "file_" + str(id)) - with tiledb.FileIO(self.vfs, fname, "wb") as fio: - fio.write(b"") - - expected = ("file_1", "file_2", "file_3") - # empty directories do not "exist" on s3 - if pytest.tiledb_vfs != "s3": - expected = expected + ("dir1", "dir2", "dir3") - - self.assertSetEqual( - set(expected), - set( - map( - lambda x: os.path.basename(x.split("test_vfs_ls")[1]), - self.vfs.ls(basepath), - ) - ), - ) - - @pytest.mark.skipif( - pytest.tiledb_vfs not in ["file", "s3", "azure", "gcs"], - reason="Only test on local, S3, Azure, and GCS", - ) - def test_ls_recursive(self): - # Create a nested directory structure to test recursive listing - basepath = self.path("test_vfs_ls_recursive") - self.vfs.create_dir(basepath) - - dir = os.path.join(basepath, "dir1") - self.vfs.create_dir(dir) - - fname = os.path.join(dir, "file_1") - with tiledb.FileIO(self.vfs, fname, "wb") as fio: - fio.write(b"") - - fname = os.path.join(dir, "file_2") - with tiledb.FileIO(self.vfs, fname, "wb") as fio: - fio.write(b"") - - dir = os.path.join(basepath, "dir2") - self.vfs.create_dir(dir) - - dir2 = os.path.join(dir, "dir2_1") - self.vfs.create_dir(dir2) - - fname = os.path.join(dir2, "file_1") - with tiledb.FileIO(self.vfs, fname, "wb") as fio: - fio.write(b"") - fname = os.path.join(dir2, "file_2") - with tiledb.FileIO(self.vfs, fname, "wb") as fio: - fio.write(b"") - - dir2 = os.path.join(dir, "dir2_2") - self.vfs.create_dir(dir2) - - fname = os.path.join(dir2, "file_1") - with tiledb.FileIO(self.vfs, fname, "wb") as fio: - fio.write(b"") - - expected = [ - "dir1", - "dir1/file_1", - "dir1/file_2", - "dir2", - "dir2/dir2_1", - "dir2/dir2_1/file_1", - "dir2/dir2_1/file_2", - "dir2/dir2_2", - "dir2/dir2_2/file_1", - ] - - self.assertSetEqual( - set(expected), - set( - map( - # # Keep only the paths after the basepath and normalize them to work on all platforms - lambda x: os.path.normpath( - x.split("test_vfs_ls_recursive/")[1] - ).replace("\\", "/"), - self.vfs.ls_recursive(basepath), - ) - ), - ) - - # Check with user provided callback - callback_results = [] - - def callback(uri, _): # we don't use the second argument 'is_dir' - callback_results.append(uri) - return True - - self.vfs.ls_recursive(basepath, callback) - - self.assertSetEqual( - set(expected), - set( - map( - # Keep only the paths after the basepath and normalize them to work on all platforms - lambda x: os.path.normpath( - x.split("test_vfs_ls_recursive/")[1] - ).replace("\\", "/"), - callback_results, - ) - ), - ) - - # Can also be called by calling ls with recursive=True - self.assertSetEqual( - set(expected), - set( - map( - # Keep only the paths after the basepath and normalize them to work on all platforms - lambda x: os.path.normpath( - x.split("test_vfs_ls_recursive/")[1] - ).replace("\\", "/"), - self.vfs.ls(basepath, recursive=True), - ) - ), - ) - - def test_dir_size(self): - vfs = tiledb.VFS() - - path = self.path("test_vfs_dir_size") - vfs.create_dir(path) - rand_sizes = np.random.choice(100, size=4, replace=False) - for size in rand_sizes: - file_path = os.path.join(path, "f_" + str(size)) - with tiledb.FileIO(vfs, file_path, "wb") as f: - data = os.urandom(size) - f.write(data) - - self.assertEqual(vfs.dir_size(path), sum(rand_sizes)) - - def test_open_with(self): - uri = self.path("test_open_with") - vfs = tiledb.VFS() - buffer = b"0123456789" - - with vfs.open(uri, mode="wb") as fio: - fio.write(buffer) - fio.flush() - self.assertEqual(fio.tell(), len(buffer)) - - with vfs.open(uri, mode="rb") as fio: - with self.assertRaises(IOError): - fio.write(b"foo") - self.assertEqual(fio.read(len(buffer)), buffer) - - -def test_vfs_isdir(tmp_path): - """isdir is an alias for is_dir.""" - fs = tiledb.VFS() - assert fs.isdir(tmp_path.as_posix()) - - -def test_vfs_isfile(tmp_path): - """isfile is an alias for is_file.""" - tmp_file = tmp_path.joinpath("foo") - tmp_file.touch() - fs = tiledb.VFS() - assert fs.isfile(tmp_file.as_posix()) - - -def test_vfs_size(tmp_path): - """size is an alias for file_size.""" - tmp_file = tmp_path.joinpath("foo") - buffer = b"0123456789" - tmp_file.write_bytes(buffer) - fs = tiledb.VFS() - assert fs.size(tmp_file.as_posix()) == len(buffer) diff --git a/tiledb/tests/test_webp.cc b/tiledb/tests/test_webp.cc deleted file mode 100644 index 68c0ef43b1..0000000000 --- a/tiledb/tests/test_webp.cc +++ /dev/null @@ -1,29 +0,0 @@ -#include -#include - -#include - -namespace tiledbpy { -using namespace tiledb; -namespace py = pybind11; - -class WebpFilter { -public: - static bool webp_filter_exists() { - Context ctx; - try { - auto f = Filter(ctx, TILEDB_FILTER_WEBP); - } catch (TileDBError &) { - // Can't create WebP filter; built with TILEDB_WEBP=OFF - return false; - } - return true; - } -}; - -void init_test_webp_filter(py::module &m) { - py::class_(m, "test_webp_filter") - .def_static("webp_filter_exists", &WebpFilter::webp_filter_exists); -} - -}; // namespace tiledbpy \ No newline at end of file diff --git a/tiledb/tests/test_webp.py b/tiledb/tests/test_webp.py deleted file mode 100644 index a7c9eb0eb1..0000000000 --- a/tiledb/tests/test_webp.py +++ /dev/null @@ -1,186 +0,0 @@ -import shutil -import tempfile - -import numpy as np -import pytest -from numpy.testing import assert_allclose, assert_array_equal - -import tiledb -import tiledb.main as main - - -@pytest.mark.skipif( - not main.test_webp_filter.webp_filter_exists(), - reason="Can't create WebP filter; built with TILEDB_WEBP=OFF", -) -@pytest.mark.parametrize( - "format, quality, lossless", - [ - ( - tiledb.filter.WebpFilter.WebpInputFormat.WEBP_RGB, - 100.0, - False, - ), # Test setting format with enum values - (tiledb.filter.WebpFilter.WebpInputFormat.WEBP_BGR, 50.0, True), - (tiledb.filter.WebpFilter.WebpInputFormat.WEBP_RGBA, 25.5, False), - (4, 0.0, True), # Test setting format with integral type - ], -) -def test_webp_ctor(format, quality, lossless): - webp_filter = tiledb.WebpFilter( - input_format=format, quality=quality, lossless=lossless - ) - np.testing.assert_equal( - webp_filter.input_format, tiledb.filter.WebpFilter.WebpInputFormat(format) - ) - np.testing.assert_equal(webp_filter.quality, quality) - np.testing.assert_equal(webp_filter.lossless, lossless) - - -@pytest.mark.skipif( - not main.test_webp_filter.webp_filter_exists(), - reason="Can't create WebP filter; built with TILEDB_WEBP=OFF", -) -@pytest.mark.parametrize( - "attr_dtype, dim_dtype, var, sparse", - [ - (np.int64, np.int64, None, True), # Sparse arrays are not supported - (np.int64, np.int64, True, False), # Variable attributes are not supported - ], -) -def test_webp_init(attr_dtype, dim_dtype, var, sparse): - with pytest.raises(tiledb.TileDBError): - tiledb.ArraySchema( - domain=tiledb.Domain( - [ - tiledb.Dim("y", domain=(1, 100), dtype=dim_dtype), - tiledb.Dim("x", domain=(1, 300), dtype=dim_dtype), - ] - ), - attrs=[ - tiledb.Attr( - "rgb", dtype=attr_dtype, var=var, filters=[tiledb.WebpFilter()] - ) - ], - sparse=sparse, - ) - - -def make_image_data(width, height, pixel_depth): - center_x = width / 2 - center_y = height / 2 - - colors = { - "red": [255, 0, 0], - "green": [0, 255, 0], - "blue": [0, 0, 255], - "white": [255, 255, 255], - "black": [0, 0, 0], - } - if pixel_depth > 3: - for color in colors.values(): - color.append(255) - - rgb = [] - for row in range(0, height): - r = [] - for col in range(0, width): - if row < center_y and col < center_x: - r.append(colors["red"]) - elif row < center_y and col > center_x: - r.append(colors["green"]) - elif row > center_y and col < center_x: - r.append(colors["blue"]) - elif row > center_y and col > center_x: - r.append(colors["white"]) - elif row == center_y or col == center_x: - r.append(colors["black"]) - rgb.append(r) - return rgb - - -@pytest.mark.skipif( - not main.test_webp_filter.webp_filter_exists(), - reason="Can't create WebP filter; built with TILEDB_WEBP=OFF", -) -@pytest.mark.parametrize( - "width, height", - [ - (3, 7), - (20, 20), - (40, 40), - (479, 149), - (1213, 1357), - (1111, 3333), - ], -) -@pytest.mark.parametrize( - "colorspace", - [ - tiledb.filter.WebpFilter.WebpInputFormat.WEBP_RGB, - tiledb.filter.WebpFilter.WebpInputFormat.WEBP_BGR, - tiledb.filter.WebpFilter.WebpInputFormat.WEBP_RGBA, - tiledb.filter.WebpFilter.WebpInputFormat.WEBP_BGRA, - ], -) -@pytest.mark.parametrize("lossless", [True, False]) -def test_webp_filter(width, height, colorspace, lossless): - pixel_depth = ( - 3 - if int(colorspace) < int(tiledb.filter.WebpFilter.WebpInputFormat.WEBP_RGBA) - else 4 - ) - data = make_image_data(width, height, pixel_depth) - data = np.array(data, dtype=np.uint8).reshape(height, width * pixel_depth) - - y_tile = round(height / 2) - x_tile = round(width / 2) * pixel_depth - - dim_dtype = np.min_scalar_type(data.size) - dims = ( - tiledb.Dim( - name="Y", - domain=(1, height), - dtype=dim_dtype, - tile=y_tile, - ), - tiledb.Dim( - name="X", - domain=(1, width * pixel_depth), - dtype=dim_dtype, - tile=x_tile, - ), - ) - schema = tiledb.ArraySchema( - domain=tiledb.Domain(*dims), - attrs=[ - tiledb.Attr( - name="rgb", - dtype=np.uint8, - filters=[ - tiledb.WebpFilter( - input_format=colorspace, quality=100.0, lossless=lossless - ) - ], - ) - ], - ) - - uri = tempfile.mkdtemp() - tiledb.Array.create(uri, schema) - with tiledb.open(uri, "w") as A: - A[:] = data - with tiledb.open(uri, "r") as A: - read_image = A[:] - - if lossless: - assert_array_equal( - data.reshape(np.array(read_image["rgb"]).shape), read_image["rgb"] - ) - else: - assert_allclose( - data.reshape(np.array(read_image["rgb"]).shape), read_image["rgb"], 125 - ) - - # Cleanup. - shutil.rmtree(uri) diff --git a/tiledb/tests/test_write_subarray.py b/tiledb/tests/test_write_subarray.py deleted file mode 100644 index 20a6caaa27..0000000000 --- a/tiledb/tests/test_write_subarray.py +++ /dev/null @@ -1,293 +0,0 @@ -from collections import OrderedDict - -import numpy as np -import pytest - -import tiledb - -from .common import DiskTestCase, assert_array_equal, assert_dict_arrays_equal - -SUPPORTED_INTEGER_DTYPES = ( - np.uint8, - np.uint16, - np.uint32, - np.uint64, - np.int8, - np.int16, - np.int32, - np.int64, -) - - -SUPPORTED_DATETIME64_RESOLUTION = ("Y", "M", "W", "D", "h", "m", "s", "ms", "us", "ns") - - -class TestWriteSubarrayDense(DiskTestCase): - @pytest.mark.parametrize("dim_res", SUPPORTED_DATETIME64_RESOLUTION) - def test_1d_datetime_full_write(self, dim_res): - """Create TileDB array, write data, and return the URI.""" - # Create array. - uri = self.path(f"write_subarray_1d_datetime_{dim_res}") - start_time = np.datetime64("2000-01-01", dim_res) - domain = (start_time, start_time + np.timedelta64(99, dim_res)) - schema = tiledb.ArraySchema( - tiledb.Domain( - tiledb.Dim( - name="d1", - domain=domain, - tile=100, - dtype=np.dtype(f"M8[{dim_res}]"), - ) - ), - [tiledb.Attr(name="", dtype=np.float64)], - ) - tiledb.Array.create(uri, schema) - - # Write data. - data = np.random.rand(100) - with tiledb.open(uri, "w") as array: - subarray = tiledb.Subarray(array) - subarray.add_dim_range(0, domain) - array.write_subarray(subarray, data) - - # Check results. - with tiledb.open(uri, "r") as array: - result = array[...] - assert_array_equal(result, data) - - def test_1d_full_write(self): - # Create array. - uri = self.path("dense_write_subarray_1d_full_write") - schema = tiledb.ArraySchema( - tiledb.Domain(tiledb.Dim(name="d1", domain=(0, 999), tile=1000)), - [tiledb.Attr(name="", dtype=np.float64)], - ) - tiledb.Array.create(uri, schema) - - # Write data. - data = np.random.rand(1000) - with tiledb.open(uri, "w") as array: - subarray = tiledb.Subarray(array) - subarray.add_dim_range(0, (0, 999)) - array.write_subarray(subarray, data) - - # Check results. - with tiledb.open(uri, "r") as array: - result = array[...] - assert_array_equal(result, data) - - def test_1d_partial_write(self): - # Create array. - uri = self.path("dense_write_subarray_1d_multiple_partial_writes") - schema = tiledb.ArraySchema( - tiledb.Domain(tiledb.Dim(name="d1", domain=(0, 99), tile=100)), - [tiledb.Attr(name="", dtype=np.float32)], - ) - tiledb.Array.create(uri, schema) - - # Write data. - data = np.random.rand(10).astype(np.float32) - with tiledb.open(uri, "w") as array: - subarray = tiledb.Subarray(array) - subarray.add_dim_range(0, (10, 19)) - array.write_subarray(subarray, data) - - # Check results. - with tiledb.open(uri, "r") as array: - result = array[10:20] - assert_array_equal(result, data) - - def test_multidim_set_all_ranges(self): - # Create array. - uri = self.path("dense_write_subarray_multidim_set_all_ranges") - schema = tiledb.ArraySchema( - tiledb.Domain( - tiledb.Dim(name="d1", domain=(0, 99), tile=100), - tiledb.Dim(name="d2", domain=(0, 99), tile=100), - tiledb.Dim(name="d3", domain=(0, 99), tile=100), - ), - [ - tiledb.Attr(name="a1", dtype=np.float64), - tiledb.Attr(name="a2", dtype=np.float64), - ], - ) - tiledb.Array.create(uri, schema) - - # Write data. - data = OrderedDict( - [ - ("a1", np.random.rand(1000).reshape((10, 10, 10))), - ("a2", np.random.rand(1000).reshape((10, 10, 10))), - ] - ) - - with tiledb.open(uri, "w") as array: - subarray = tiledb.Subarray(array) - subarray.add_dim_range(0, (0, 9)) - subarray.add_dim_range(1, (10, 19)) - subarray.add_dim_range(2, (20, 29)) - array.write_subarray(subarray, data) - - # Check results. - with tiledb.open(uri, "r") as array: - nonempty = array.nonempty_domain() - assert nonempty == ((0, 9), (10, 19), (20, 29)) - result = array[0:10, 10:20, 20:30] - assert_dict_arrays_equal(result, data) - - def test_multidim_set_some_ranges(self): - # Create array. - uri = self.path("dense_write_subarray_multidim_set_some_ranges") - schema = tiledb.ArraySchema( - tiledb.Domain( - tiledb.Dim(name="d1", domain=(0, 99), tile=100), - tiledb.Dim(name="d2", domain=(0, 99), tile=100), - ), - [tiledb.Attr(name="a1", dtype=np.float64)], - ) - tiledb.Array.create(uri, schema) - - # Write data. - data = np.random.rand(1000) - with tiledb.open(uri, "w") as array: - subarray = tiledb.Subarray(array) - subarray.add_dim_range(1, (11, 20)) - array.write_subarray(subarray, {"a1": data}) - - # Check results. - with tiledb.open(uri, "r") as array: - nonempty = array.nonempty_domain() - assert nonempty[0] == (0, 99) - assert nonempty[1] == (11, 20) - result = array[:, 11:21] - assert_dict_arrays_equal(result, {"a1": data.reshape(100, 10)}) - - def test_with_negative_domain(self): - # Create array. - uri = self.path("dense_write_subarray_by_labels") - schema = tiledb.ArraySchema( - tiledb.Domain( - tiledb.Dim(name="d1", domain=(-100, 100), tile=201, dtype=np.int32) - ), - [tiledb.Attr(name="a1", dtype=np.float64)], - ) - tiledb.Array.create(uri, schema) - - # Define the data. - data = OrderedDict([("a1", np.random.rand(5))]) - - # Write full data and label data - with tiledb.open(uri, "w") as array: - subarray = tiledb.Subarray(array) - subarray.add_dim_range(0, (-2, 2)) - array.write_subarray(subarray, data["a1"]) - - # Check results - with tiledb.open(uri, "r") as array: - nonempty = array.nonempty_domain() - assert nonempty[0] == (-2, 2) - result = array.multi_index[-2:2] - - assert_dict_arrays_equal(result, data) - - def test_with_labels(self): - # Create array. - uri = self.path("dense_write_subarray_with_labels") - dim1 = tiledb.Dim(name="d1", domain=(0, 10), tile=11) - dim2 = tiledb.Dim(name="d2", domain=(0, 10), tile=11) - schema = tiledb.ArraySchema( - tiledb.Domain(dim1, dim2), - [tiledb.Attr(name="a1", dtype=np.float64)], - dim_labels={ - 0: {"l1": dim1.create_label_schema("increasing", np.int32)}, - 1: {"l2": dim1.create_label_schema("decreasing", np.int32)}, - }, - ) - tiledb.Array.create(uri, schema) - - data = OrderedDict( - [ - ("a1", np.random.rand(121).reshape(11, 11)), - ("l1", np.arange(-5, 6)), - ("l2", np.arange(5, -6, -1)), - ] - ) - - # Write full data and label data - with tiledb.open(uri, "w") as array: - subarray = tiledb.Subarray(array) - subarray.add_dim_range(0, (0, 10)) - subarray.add_dim_range(1, (0, 10)) - array.write_subarray(subarray, data) - - # Check results - with tiledb.open(uri, "r") as array: - result = array.label_index(["l1", "l2"])[-5:5, -5:5] - assert_dict_arrays_equal(result, data) - - def test_by_labels(self): - # Create array. - uri = self.path("dense_write_subarray_by_labels") - dim1 = tiledb.Dim(name="d1", domain=(0, 10), tile=11) - schema = tiledb.ArraySchema( - tiledb.Domain(dim1), - [tiledb.Attr(name="a1", dtype=np.float64)], - dim_labels={0: {"l1": dim1.create_label_schema("increasing", np.int32)}}, - ) - tiledb.Array.create(uri, schema) - - # Define the data. - data = OrderedDict( - [("a1", np.random.rand(5)), ("l1", np.arange(-5, 6, dtype=np.int32))] - ) - - # Reload to get the label uris and write the labels. - schema = tiledb.ArraySchema.load(uri) - with tiledb.open(schema.dim_label("l1").uri, mode="w") as array: - array[:] = data["l1"] - - # Write full data and label data - with tiledb.open(uri, "w") as array: - subarray = tiledb.Subarray(array) - subarray.add_label_range("l1", (-2, 2)) - with pytest.raises(tiledb.TileDBError): - array.write_subarray(subarray, data["a1"]) - - def test_with_var_label(self): - # Create array. - uri = self.path("dense_write_subarray_by_var_label") - dim1 = tiledb.Dim(name="d1", domain=(0, 10), tile=11) - schema = tiledb.ArraySchema( - tiledb.Domain(dim1), - [tiledb.Attr(name="a1", dtype=np.float64)], - dim_labels={ - 0: {"l1": dim1.create_label_schema("increasing", "U")}, - }, - ) - tiledb.Array.create(uri, schema) - - # Write array. - data = OrderedDict( - [ - ("a1", np.random.rand(5)), - ( - "l1", - np.array( - ["alpha", "beta", "gamma", "kappa", "sigma"], dtype=object - ), - ), - ] - ) - with tiledb.open(uri, "w") as array: - subarray = tiledb.Subarray(array) - subarray.add_dim_range(0, (3, 7)) - array.write_subarray(subarray, data) - - # Check results. - with tiledb.open(uri, "r") as array: - nonempty = array.nonempty_domain() - assert nonempty[0] == (3, 7) - with tiledb.open(array.schema.dim_label("l1").uri, "r") as label_array: - nonempty_label = label_array.nonempty_domain() - assert nonempty_label[0] == (3, 7) - array.label_index(["l1"])["alpha":"sigma"]