diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5239d37dc3..f3fd8e17ca 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,6 +4,6 @@ repos: hooks: - id: black - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.284 + rev: v0.4.4 hooks: - id: ruff diff --git a/HISTORY.md b/HISTORY.md index 121ee4cb59..518f5909f3 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,3 +1,20 @@ +# Release 0.29.1 + +## Improvements + +* Expose WebP enums by @kounelisagis in https://github.com/TileDB-Inc/TileDB-Py/pull/1974 +* Add Array.query in docs and improve docs in general by @kounelisagis in https://github.com/TileDB-Inc/TileDB-Py/pull/1965 +* Add support for creating WKB/WKT attributes by @jp-dark in https://github.com/TileDB-Inc/TileDB-Py/pull/1912 +* Add wrapping for ls recursive by @kounelisagis in https://github.com/TileDB-Inc/TileDB-Py/pull/1968 +* Fix compatibility for delete_fragments by @kounelisagis in https://github.com/TileDB-Inc/TileDB-Py/pull/1966 + +## Build system changes + +* Fix ModuleNotFoundError: No module named 'numpy' on build by @kounelisagis in https://github.com/TileDB-Inc/TileDB-Py/pull/1979 +* Add support for numpy2 by @kounelisagis in https://github.com/TileDB-Inc/TileDB-Py/pull/1969 +* Fix syntax error in nightly build workflow by @ihnorton in https://github.com/TileDB-Inc/TileDB-Py/pull/1970 +* Set an upper bound for numpy to dodge 2.0 by @sgillies in https://github.com/TileDB-Inc/TileDB-Py/pull/1963 + # Release 0.29.0 * TileDB-Py 0.29.0 includes TileDB Embedded [2.23.0](https://github.com/TileDB-Inc/TileDB/releases/tag/2.23.0) diff --git a/doc/source/conf.py b/doc/source/conf.py index dc89f4d73e..a13dc2032b 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -75,7 +75,7 @@ # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = None +language = "en" # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. diff --git a/doc/source/python-api.rst b/doc/source/python-api.rst index c2dc44e379..994e368f9c 100644 --- a/doc/source/python-api.rst +++ b/doc/source/python-api.rst @@ -68,7 +68,7 @@ Filters .. automethod:: __getitem__(idx) .. automethod:: __len__ -.. autoclass:: tiledb.libtiledb.CompressionFilter +.. autoclass:: tiledb.CompressionFilter :members: .. autoclass:: tiledb.GzipFilter :members: @@ -116,22 +116,21 @@ Dense Array ----------- .. autoclass:: tiledb.DenseArray - :members: - - .. automethod:: __getitem__(selection) - .. automethod:: __setitem__(selection, value) - .. automethod:: query - .. automethod:: from_numpy(uri, array, ctx=None, **kwargs) + :members: query + :special-members: __getitem__, __setitem__ Sparse Array ------------ .. autoclass:: tiledb.SparseArray - :members: + :members: query + :special-members: __getitem__, __setitem__ - .. automethod:: __getitem__(selection) - .. automethod:: __setitem__(selection, value) - .. automethod:: query +Query +--------------- + +.. autoclass:: tiledb.libtiledb.Query + :members: Query Condition --------------- diff --git a/pyproject.toml b/pyproject.toml index 70fd186d52..82378e357f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -80,6 +80,9 @@ extend-select = ["I001"] extend-exclude = ["doc"] fix = true +[tool.ruff.lint] +select = ["NPY201"] + [tool.ruff.per-file-ignores] "tiledb/__init__.py" = ["F401"] diff --git a/tiledb/__init__.py b/tiledb/__init__.py index d0e9852611..ca9ae687c0 100644 --- a/tiledb/__init__.py +++ b/tiledb/__init__.py @@ -39,6 +39,7 @@ Bzip2Filter, ChecksumMD5Filter, ChecksumSHA256Filter, + CompressionFilter, DeltaFilter, DictionaryFilter, DoubleDeltaFilter, @@ -73,6 +74,8 @@ ) from .libtiledb import ( Array, + DenseArrayImpl, + SparseArrayImpl, consolidate, ls, move, @@ -85,8 +88,6 @@ vacuum, walk, ) -from .libtiledb import DenseArrayImpl as DenseArray -from .libtiledb import SparseArrayImpl as SparseArray from .multirange_indexing import EmptyRange from .object import Object from .parquet_ import from_parquet @@ -116,13 +117,19 @@ try: from tiledb.cloud.cloudarray import CloudArray except ImportError: - pass + + class DenseArray(DenseArrayImpl): + pass + + class SparseArray(SparseArrayImpl): + pass + else: - class DenseArray(DenseArray, CloudArray): + class DenseArray(DenseArrayImpl, CloudArray): pass - class SparseArray(SparseArray, CloudArray): + class SparseArray(SparseArrayImpl, CloudArray): pass del CloudArray diff --git a/tiledb/array_schema.py b/tiledb/array_schema.py index 260a0f337f..4e399c81b6 100644 --- a/tiledb/array_schema.py +++ b/tiledb/array_schema.py @@ -322,7 +322,7 @@ def version(self) -> int: """The array's schema (storage) version. :rtype: int - :raises :py:exc:`tiledb.TileDBError` + :raises: :py:exc:`tiledb.TileDBError` """ return self._version diff --git a/tiledb/ctx.py b/tiledb/ctx.py index 441f054e2a..05d2b85b5e 100644 --- a/tiledb/ctx.py +++ b/tiledb/ctx.py @@ -209,7 +209,7 @@ def dict(self, prefix: str = ""): :param str prefix: return only parameters with a given prefix :rtype: dict - :return: Config parameter / values as a a Python dict + :return: Config parameter / values as a Python dict """ return dict(ConfigItems(self, prefix=prefix)) diff --git a/tiledb/filter.py b/tiledb/filter.py index 871a8cf0a5..5bf646f393 100644 --- a/tiledb/filter.py +++ b/tiledb/filter.py @@ -79,7 +79,7 @@ class CompressionFilter(Filter): >>> with tempfile.TemporaryDirectory() as tmp: ... dom = tiledb.Domain(tiledb.Dim(domain=(0, 9), tile=2, dtype=np.uint64)) ... a1 = tiledb.Attr(name="a1", dtype=np.int64, - ... filters=tiledb.FilterList([tiledb.GzipFilter(level=10)])) + ... filters=tiledb.FilterList([tiledb.CompressionFilter(level=10)])) ... schema = tiledb.ArraySchema(domain=dom, attrs=(a1,)) ... tiledb.DenseArray.create(tmp + "/array", schema) @@ -326,8 +326,7 @@ class DoubleDeltaFilter(CompressionFilter): :param level: -1 (default) sets the compressor level to the default level as specified in TileDB core. Otherwise, sets the compressor level to the given value. :type level: int - :param reinterp_dtype: (optional) sets the compressor to compress the data treating - as the new datatype. + :param reinterp_dtype: (optional) sets the compressor to compress the data treating as the new datatype. **Example:** @@ -501,7 +500,8 @@ class PositiveDeltaFilter(Filter): :param ctx: A TileDB Context :type ctx: tiledb.Ctx :param window: -1 (default) sets the max window size for the filter to the default window size as specified in TileDB core. Otherwise, sets the compressor level to the given value. - :type window: int + :type window: int + **Example:** >>> import tiledb, numpy as np, tempfile @@ -754,6 +754,9 @@ class WebpFilter(Filter): lt.FilterOption.WEBP_LOSSLESS, ) + # Expose WebP enums at the top level + WebpInputFormat = lt.WebpInputFormat + def __init__( self, input_format: lt.WebpInputFormat = None, diff --git a/tiledb/highlevel.py b/tiledb/highlevel.py index 6bc4ab1cea..5a1b11b6b3 100644 --- a/tiledb/highlevel.py +++ b/tiledb/highlevel.py @@ -224,7 +224,7 @@ def is_ndarray_like(arr): elif shape and dtype: if np.issubdtype(np.bytes_, dtype): dtype = np.dtype("S") - elif np.issubdtype(dtype, np.unicode_): + elif np.issubdtype(dtype, np.str_): dtype = np.dtype("U") ndim = len(shape) diff --git a/tiledb/libtiledb.pyx b/tiledb/libtiledb.pyx index 29a33de467..02373596b4 100644 --- a/tiledb/libtiledb.pyx +++ b/tiledb/libtiledb.pyx @@ -12,8 +12,7 @@ import io import warnings import collections.abc from collections import OrderedDict -from json import dumps as json_dumps -from json import loads as json_loads +from json import dumps as json_dumps, loads as json_loads from ._generated_version import version_tuple as tiledbpy_version from .array_schema import ArraySchema @@ -35,10 +34,7 @@ np.import_array() # Integer types supported by Python / System _inttypes = (int, np.integer) - -# Numpy initialization code (critical) -# https://docs.scipy.org/doc/numpy/reference/c-api.array.html#c.import_array -np.import_array() +np.set_printoptions(legacy='1.21') # use unified numpy printing cdef tiledb_ctx_t* safe_ctx_ptr(object ctx): @@ -145,8 +141,7 @@ cdef _write_array( if attr.isvar: try: if attr.isnullable: - if(np.issubdtype(attr.dtype, np.unicode_) - or np.issubdtype(attr.dtype, np.string_) + if(np.issubdtype(attr.dtype, np.str_) or np.issubdtype(attr.dtype, np.bytes_)): attr_val = np.array(["" if v is None else v for v in values[i]]) else: @@ -601,7 +596,7 @@ def index_domain_subarray(array: Array, dom, idx: tuple): dim = dom.dim(r) dim_dtype = dim.dtype - if array.mode == 'r' and (np.issubdtype(dim_dtype, np.unicode_) or np.issubdtype(dim_dtype, np.bytes_)): + if array.mode == 'r' and (np.issubdtype(dim_dtype, np.str_) or np.issubdtype(dim_dtype, np.bytes_)): # NED can only be retrieved in read mode ned = array.nonempty_domain() (dim_lb, dim_ub) = ned[r] if ned else (None, None) @@ -612,7 +607,11 @@ def index_domain_subarray(array: Array, dom, idx: tuple): if not isinstance(dim_slice, slice): raise IndexError("invalid index type: {!r}".format(type(dim_slice))) + # numpy2 doesn't allow addition beween int and np.int64 - NEP 50 start, stop, step = dim_slice.start, dim_slice.stop, dim_slice.step + start = np.int64(start) if isinstance(start, int) else start + stop = np.int64(stop) if isinstance(stop, int) else stop + step = np.int64(step) if isinstance(step, int) else step if np.issubdtype(dim_dtype, np.str_) or np.issubdtype(dim_dtype, np.bytes_): if start is None or stop is None: @@ -1503,7 +1502,7 @@ cdef class Array(object): cdef _ndarray_is_varlen(self, np.ndarray array): return (np.issubdtype(array.dtype, np.bytes_) or - np.issubdtype(array.dtype, np.unicode_) or + np.issubdtype(array.dtype, np.str_) or array.dtype == object) @property @@ -1525,8 +1524,8 @@ cdef class Array(object): ** Example ** - >>> import tiledb, numpy as np - >>> + >>> import tiledb, numpy as np, tempfile + >>> from collections import OrderedDict >>> dim1 = tiledb.Dim("d1", domain=(1, 4)) >>> dim2 = tiledb.Dim("d2", domain=(1, 3)) >>> dom = tiledb.Domain(dim1, dim2) @@ -1551,21 +1550,30 @@ cdef class Array(object): ... A[:] = {"a1": a1_data, "l1": l1_data, "l2": l2_data, "l3": l3_data} ... ... with tiledb.open(tmp, "r") as A: - ... A.label_index(["l1"])[3:4] # doctest: +ELLIPSIS - ... A.label_index(["l1", "l3"])[2, 0.5:1.0] # doctest: +ELLIPSIS - ... A.label_index(["l2"])[:, -1:0] # doctest: +ELLIPSIS - ... A.label_index(["l3"])[:, 0.5:1.0] # doctest: +ELLIPSIS - OrderedDict(...'l1'... array([4, 3])..., ...'a1'... array([[1, 2, 3], - [4, 5, 6]])...) - OrderedDict(...'l3'... array([0.5, 1. ])..., ...'l1'... array([2])..., ...'a1'... array([[8, 9]])...) - OrderedDict(...'l2'... array([-1, 0])..., ...'a1'... array([[ 1, 2], - [ 4, 5], - [ 7, 8], - [10, 11]])...) - OrderedDict(...'l3'... array([0.5, 1. ])..., ...'a1'... array([[ 2, 3], - [ 5, 6], - [ 8, 9], - [11, 12]])...) + ... np.testing.assert_equal( + ... A.label_index(["l1"])[3:4], + ... OrderedDict({"l1": [4, 3], "a1": [[1, 2, 3], [4, 5, 6]]}), + ... ) + ... np.testing.assert_equal( + ... A.label_index(["l1", "l3"])[2, 0.5:1.0], + ... OrderedDict( + ... {"l3": [0.5, 1.0], "l1": [2], "a1": [[8, 9]]} + ... ), + ... ) + ... np.testing.assert_equal( + ... A.label_index(["l2"])[:, -1:0], + ... OrderedDict( + ... {"l2": [-1, 0], + ... "a1": [[1, 2], [4, 5], [7, 8], [10, 11]]}, + ... ), + ... ) + ... np.testing.assert_equal( + ... A.label_index(["l3"])[:, 0.5:1.0], + ... OrderedDict( + ... {"l3": [0.5, 1.], + ... "a1": [[2, 3], [5, 6], [8, 9], [11, 12]]}, + ... ), + ... ) :param labels: List of labels to use when querying. Can only use at most one label per dimension. @@ -1574,6 +1582,7 @@ cdef class Array(object): query the array on the corresponding dimension. :returns: dict of {'label/attribute': result}. :raises: :py:exc:`tiledb.TileDBError` + """ # Delayed to avoid circular import from .multirange_indexing import LabelIndexer @@ -1886,6 +1895,10 @@ cdef class Query(object): if not use_arrow: raise TileDBError("Cannot initialize return_arrow with use_arrow=False") self.use_arrow = use_arrow + + if return_incomplete and not array.schema.sparse: + raise TileDBError("Incomplete queries are only supported for sparse arrays at this time") + self.return_incomplete = return_incomplete self.domain_index = DomainIndexer(array, query=self) @@ -2158,8 +2171,7 @@ cdef class DenseArrayImpl(Array): def query(self, attrs=None, attr_cond=None, cond=None, dims=None, coords=False, order='C', use_arrow=None, return_arrow=False, return_incomplete=False): - """ - Construct a proxy Query object for easy subarray queries of cells + """Construct a proxy Query object for easy subarray queries of cells for an item or region of the array across one or more attributes. Optionally subselect over attributes, return dense result coordinate values, @@ -2202,8 +2214,8 @@ cdef class DenseArrayImpl(Array): ... A[0:10] = {"a1": np.zeros((10)), "a2": np.ones((10))} ... with tiledb.DenseArray(tmp + "/array", mode='r') as A: ... # Access specific attributes individually. - ... A.query(attrs=("a1",))[0:5] # doctest: +ELLIPSIS - OrderedDict(...'a1'... array([0, 0, 0, 0, 0])...) + ... np.testing.assert_equal(A.query(attrs=("a1",))[0:5], + ... {"a1": np.zeros(5)}) """ if not self.isopen or self.mode != 'r': @@ -2257,8 +2269,8 @@ cdef class DenseArrayImpl(Array): ... A[0:10] = {"a1": np.zeros((10)), "a2": np.ones((10))} ... with tiledb.DenseArray(tmp + "/array", mode='r') as A: ... # A[0:5], attribute a1, row-major without coordinates - ... A.subarray((slice(0, 5),), attrs=("a1",), coords=False, order='C') # doctest: +ELLIPSIS - OrderedDict(...'a1'... array([0, 0, 0, 0, 0])...) + ... np.testing.assert_equal(A.subarray((slice(0, 5),), attrs=("a1",), coords=False, order='C'), + ... OrderedDict({'a1': np.zeros(5)})) """ from .subarray import Subarray @@ -2517,8 +2529,8 @@ cdef class DenseArrayImpl(Array): dtype=np.uint8 ) else: - if (np.issubdtype(attr.dtype, np.string_) and not - (np.issubdtype(attr_val.dtype, np.string_) or attr_val.dtype == np.dtype('O'))): + if (np.issubdtype(attr.dtype, np.bytes_) and not + (np.issubdtype(attr_val.dtype, np.bytes_) or attr_val.dtype == np.dtype('O'))): raise ValueError("Cannot write a string value to non-string " "typed attribute '{}'!".format(name)) @@ -2532,7 +2544,7 @@ cdef class DenseArrayImpl(Array): dtype=np.uint8 ) - if np.issubdtype(attr.dtype, np.string_): + if np.issubdtype(attr.dtype, np.bytes_): attr_val = np.array( ["" if v is None else v for v in attr_val]) else: @@ -2566,8 +2578,8 @@ cdef class DenseArrayImpl(Array): if attr.isnullable and name not in nullmaps: nullmaps[name] = np.array([int(v is None) for v in val], dtype=np.uint8) else: - if (np.issubdtype(attr.dtype, np.string_) and not - (np.issubdtype(val.dtype, np.string_) or val.dtype == np.dtype('O'))): + if (np.issubdtype(attr.dtype, np.bytes_) and not + (np.issubdtype(val.dtype, np.bytes_) or val.dtype == np.dtype('O'))): raise ValueError("Cannot write a string value to non-string " "typed attribute '{}'!".format(name)) @@ -3054,8 +3066,8 @@ def _setitem_impl_sparse(self: Array, selection, val, dict nullmaps): nullmaps[name] = np.array( [int(v is not None) for v in attr_val], dtype=np.uint8) else: - if (np.issubdtype(attr.dtype, np.string_) - and not (np.issubdtype(attr_val.dtype, np.string_) + if (np.issubdtype(attr.dtype, np.bytes_) + and not (np.issubdtype(attr_val.dtype, np.bytes_) or attr_val.dtype == np.dtype('O'))): raise ValueError("Cannot write a string value to non-string " "typed attribute '{}'!".format(name)) @@ -3067,7 +3079,7 @@ def _setitem_impl_sparse(self: Array, selection, val, dict nullmaps): nullmaps[name] = np.array( [int(v is not None) for v in attr_val], dtype=np.uint8) - if np.issubdtype(attr.dtype, np.string_): + if np.issubdtype(attr.dtype, np.bytes_): attr_val = np.array(["" if v is None else v for v in attr_val]) else: attr_val = np.nan_to_num(attr_val) @@ -3178,6 +3190,7 @@ cdef class SparseArrayImpl(Array): **Example:** >>> import tiledb, numpy as np, tempfile + >>> from collections import OrderedDict >>> # Write to multi-attribute 2D array >>> with tempfile.TemporaryDirectory() as tmp: ... dom = tiledb.Domain( @@ -3195,10 +3208,12 @@ cdef class SparseArrayImpl(Array): ... "a2": np.array([3, 4])} ... with tiledb.SparseArray(tmp + "/array", mode='r') as A: ... # Return an OrderedDict with values and coordinates - ... A[0:3, 0:10] # doctest: +ELLIPSIS + ... np.testing.assert_equal(A[0:3, 0:10], OrderedDict({'a1': np.array([1, 2]), + ... 'a2': np.array([3, 4]), 'y': np.array([0, 2], dtype=np.uint64), + ... 'x': np.array([0, 3], dtype=np.uint64)})) ... # Return just the "x" coordinates values - ... A[0:3, 0:10]["x"] # doctest: +ELLIPSIS - OrderedDict(...'a1'... array([1, 2])..., ...'a2'... array([3, 4])..., ...'y'... array([0, 2], dtype=uint64)..., ...'x'... array([0, 3], dtype=uint64)...) + ... A[0:3, 0:10]["x"] + array([0, 3], dtype=uint64) With a floating-point array domain, index bounds are inclusive, e.g.: @@ -3255,6 +3270,7 @@ cdef class SparseArrayImpl(Array): **Example:** >>> import tiledb, numpy as np, tempfile + >>> from collections import OrderedDict >>> # Write to multi-attribute 2D array >>> with tempfile.TemporaryDirectory() as tmp: ... dom = tiledb.Domain( @@ -3271,8 +3287,8 @@ cdef class SparseArrayImpl(Array): ... A[I, J] = {"a1": np.array([1, 2]), ... "a2": np.array([3, 4])} ... with tiledb.SparseArray(tmp + "/array", mode='r') as A: - ... A.query(attrs=("a1",), coords=False, order='G')[0:3, 0:10] # doctest: +ELLIPSIS - OrderedDict(...'a1'... array([1, 2])...) + ... np.testing.assert_equal(A.query(attrs=("a1",), coords=False, order='G')[0:3, 0:10], + ... OrderedDict({'a1': np.array([1, 2])})) """ if not self.isopen or self.mode not in ('r', 'd'): @@ -3364,6 +3380,7 @@ cdef class SparseArrayImpl(Array): **Example:** >>> import tiledb, numpy as np, tempfile + >>> from collections import OrderedDict >>> # Write to multi-attribute 2D array >>> with tempfile.TemporaryDirectory() as tmp: ... dom = tiledb.Domain( @@ -3381,8 +3398,10 @@ cdef class SparseArrayImpl(Array): ... "a2": np.array([3, 4])} ... with tiledb.SparseArray(tmp + "/array", mode='r') as A: ... # A[0:3, 0:10], attribute a1, row-major without coordinates - ... A.subarray((slice(0, 3), slice(0, 10)), attrs=("a1",), coords=False, order='G') # doctest: +ELLIPSIS - OrderedDict(...'a1'... array([1, 2])...) + ... np.testing.assert_equal( + ... A.subarray((slice(0, 3), slice(0, 10)), attrs=("a1",), coords=False, order='G'), + ... OrderedDict({'a1': np.array([1, 2])}) + ... ) """ from .subarray import Subarray diff --git a/tiledb/multirange_indexing.py b/tiledb/multirange_indexing.py index 010836f18f..5509435bad 100644 --- a/tiledb/multirange_indexing.py +++ b/tiledb/multirange_indexing.py @@ -422,7 +422,7 @@ def __init__( # Until list attributes are supported in core, error with a clear message. if use_arrow and any( (attr.isvar or len(attr.dtype) > 1) - and attr.dtype not in (np.unicode_, np.bytes_) + and attr.dtype not in (np.str_, np.bytes_) for attr in map(array.attr, query.attrs or ()) ): raise TileDBError( diff --git a/tiledb/tests/cc/test_cc.py b/tiledb/tests/cc/test_cc.py index 2aed7e104b..0f441907e0 100644 --- a/tiledb/tests/cc/test_cc.py +++ b/tiledb/tests/cc/test_cc.py @@ -275,8 +275,11 @@ def test_schema(): with pytest.raises(lt.TileDBError): schema._tile_order = lt.LayoutType.HILBERT - schema._tile_order = lt.LayoutType.UNORDERED - assert schema._tile_order == lt.LayoutType.UNORDERED + if tiledb.libtiledb.version() >= (2, 24, 0): + with pytest.raises(lt.TileDBError): + schema._tile_order = lt.LayoutType.UNORDERED + schema._tile_order = lt.LayoutType.ROW_MAJOR + assert schema._tile_order == lt.LayoutType.ROW_MAJOR # TODO schema._set_coords_filter_list(...) # TODO assert schema._coords_filter_list() == lt.FilterListType.NONE diff --git a/tiledb/tests/test_attribute.py b/tiledb/tests/test_attribute.py index d79c8c683a..4f91211589 100644 --- a/tiledb/tests/test_attribute.py +++ b/tiledb/tests/test_attribute.py @@ -16,7 +16,7 @@ def test_minimal_attribute(self): self.assertEqual(attr, attr) self.assertTrue(attr.isanon) self.assertEqual(attr.name, "") - self.assertEqual(attr.dtype, np.float_) + self.assertEqual(attr.dtype, np.float64) self.assertFalse(attr.isvar) self.assertFalse(attr.isnullable) diff --git a/tiledb/tests/test_libtiledb.py b/tiledb/tests/test_libtiledb.py index cd13ad9cde..e8736b1ca7 100644 --- a/tiledb/tests/test_libtiledb.py +++ b/tiledb/tests/test_libtiledb.py @@ -1230,10 +1230,10 @@ def test_reopen_dense_array(self, use_timestamps): def test_data_begins_with_null_chars(self): path = self.path("test_data_begins_with_null_chars") - data = np.array(["", "", "", "a", "", "", "", "", "", "b"], dtype=np.unicode_) + data = np.array(["", "", "", "a", "", "", "", "", "", "b"], dtype=np.str_) dom = tiledb.Domain(tiledb.Dim(domain=(1, len(data)), tile=len(data))) - att = tiledb.Attr(dtype=np.unicode_, var=True) + att = tiledb.Attr(dtype=np.str_, var=True) schema = tiledb.ArraySchema(dom, (att,)) tiledb.Array.create(path, schema) @@ -1325,12 +1325,12 @@ def test_varlen_write_unicode(self): "", "hhhhhhhhhh", ], - dtype=np.unicode_, + dtype=np.str_, ) # basic write dom = tiledb.Domain(tiledb.Dim(domain=(1, len(A)), tile=len(A))) - att = tiledb.Attr(dtype=np.unicode_, var=True) + att = tiledb.Attr(dtype=np.str_, var=True) schema = tiledb.ArraySchema(dom, (att,)) @@ -1487,7 +1487,7 @@ def test_varlen_write_fixedunicode(self): # basic write dom = tiledb.Domain(tiledb.Dim(domain=(1, len(A)), tile=len(A))) - att = tiledb.Attr(dtype=np.unicode_) + att = tiledb.Attr(dtype=np.str_) schema = tiledb.ArraySchema(dom, (att,)) @@ -1991,7 +1991,7 @@ def test_sparse_bytes(self, fx_sparse_cell_order): def test_sparse_unicode(self, fx_sparse_cell_order): dom = tiledb.Domain(tiledb.Dim("x", domain=(1, 10000), tile=100, dtype=int)) - att = tiledb.Attr("", var=True, dtype=np.unicode_) + att = tiledb.Attr("", var=True, dtype=np.str_) schema = tiledb.ArraySchema( domain=dom, attrs=(att,), sparse=True, cell_order=fx_sparse_cell_order ) @@ -3514,11 +3514,11 @@ def test_incomplete_dense_varlen(self, non_overlapping_ranges): ncells = 10 path = self.path("incomplete_dense_varlen") str_data = [rand_utf8(random.randint(0, n)) for n in range(ncells)] - data = np.array(str_data, dtype=np.unicode_) + data = np.array(str_data, dtype=np.str_) # basic write dom = tiledb.Domain(tiledb.Dim(domain=(1, len(data)), tile=len(data))) - att = tiledb.Attr(dtype=np.unicode_, var=True) + att = tiledb.Attr(dtype=np.str_, var=True) schema = tiledb.ArraySchema(dom, (att,)) @@ -3556,12 +3556,12 @@ def test_incomplete_sparse_varlen(self, allows_duplicates, non_overlapping_range path = self.path("incomplete_sparse_varlen") str_data = [rand_utf8(random.randint(0, n)) for n in range(ncells)] - data = np.array(str_data, dtype=np.unicode_) + data = np.array(str_data, dtype=np.str_) coords = np.arange(ncells) # basic write dom = tiledb.Domain(tiledb.Dim(domain=(0, len(data) + 100), tile=len(data))) - att = tiledb.Attr(dtype=np.unicode_, var=True) + att = tiledb.Attr(dtype=np.str_, var=True) schema = tiledb.ArraySchema( dom, (att,), sparse=True, allows_duplicates=allows_duplicates @@ -3598,6 +3598,22 @@ def test_incomplete_sparse_varlen(self, allows_duplicates, non_overlapping_range T2.multi_index[101:105][""], np.array([], dtype=np.dtype(" bytes: """ if isinstance(file, FileIO): raise lt.TileDBError( - "`tiledb.VFS().open` now returns a a FileIO object. Use " + "`tiledb.VFS().open` now returns a FileIO object. Use " "`FileIO.seek` and `FileIO.read`. This message will be removed " "in 0.21.0." ) @@ -436,9 +436,7 @@ def flush(self): def seek(self, offset: int, whence: int = 0): """ :param int offset: Byte position to set the file pointer - :param int whence: Reference point. A whence value of 0 measures from the - beginning of the file, 1 uses the current file position, and 2 uses the - end of the file as the reference point. whence can be omitted and defaults to 0. + :param int whence: Reference point. A whence value of 0 measures from the beginning of the file, 1 uses the current file position, and 2 uses the end of the file as the reference point. whence can be omitted and defaults to 0. """ if not np.issubdtype(type(offset), np.integer): raise TypeError( @@ -475,8 +473,7 @@ def read(self, size: int = -1) -> bytes: """ Read the file from the current pointer position. - :param int size: Number of bytes to read. By default, size is set to -1 - which will read until the end of the file. + :param int size: Number of bytes to read. By default, size is set to -1 which will read until the end of the file. :rtype: bytes :return: The bytes in the file