From ce083fbbca245b9d97accd2f39d809542588f79c Mon Sep 17 00:00:00 2001 From: FayazRahman Date: Fri, 8 Sep 2023 12:54:07 +0530 Subject: [PATCH 1/8] init --- conftest.py | 1 + deeplake/api/tests/test_api.py | 4 +- deeplake/core/tensor.py | 30 ++++++- deeplake/core/transform/transform.py | 2 +- deeplake/enterprise/dataloader.py | 6 +- deeplake/enterprise/test_pytorch.py | 37 ++++++--- deeplake/enterprise/test_tensorflow.py | 71 +++++++--------- deeplake/htype.py | 90 ++++++++++++++++++++- deeplake/integrations/tests/test_pytorch.py | 4 +- deeplake/util/exceptions.py | 8 +- 10 files changed, 188 insertions(+), 65 deletions(-) diff --git a/conftest.py b/conftest.py index bef2e2d0b2..3cb3db25f3 100644 --- a/conftest.py +++ b/conftest.py @@ -33,6 +33,7 @@ tqdm.monitor_interval = 0 + def pytest_configure(config): config.addinivalue_line( "markers", diff --git a/deeplake/api/tests/test_api.py b/deeplake/api/tests/test_api.py index 95d28f0913..f5e8791cc4 100644 --- a/deeplake/api/tests/test_api.py +++ b/deeplake/api/tests/test_api.py @@ -1922,7 +1922,9 @@ def test_dataset_copy( [ ("local_ds_generator", "local_path", "hub_cloud_dev_token"), pytest.param( - "s3_ds_generator", "s3_path", "hub_cloud_dev_token", + "s3_ds_generator", + "s3_path", + "hub_cloud_dev_token", marks=pytest.mark.slow, ), pytest.param( diff --git a/deeplake/core/tensor.py b/deeplake/core/tensor.py index 63f4ab5247..34f2385a39 100644 --- a/deeplake/core/tensor.py +++ b/deeplake/core/tensor.py @@ -10,7 +10,7 @@ from typing import Dict, List, Sequence, Union, Optional, Tuple, Any, Callable from functools import reduce, partial from deeplake.core.index import Index, IndexEntry, replace_ellipsis_with_slices -from deeplake.core.meta.tensor_meta import TensorMeta +from deeplake.core.meta.tensor_meta import TensorMeta, _validate_htype_exists from deeplake.core.storage import StorageProvider from deeplake.core.chunk_engine import ChunkEngine from deeplake.core.compression import _read_timestamps @@ -63,6 +63,7 @@ parse_mesh_to_dict, get_mesh_vertices, ) +from deeplake.htype import HTYPE_CONVERSION_LHS, HTYPE_CONSTRAINTS import warnings import webbrowser @@ -552,6 +553,13 @@ def htype(self): htype = f"link[{htype}]" return htype + @htype.setter + def htype(self, value): + self._check_compatibility_with_htype(value) + self.meta.htype = value + self.meta.is_dirty = True + self.dataset.maybe_flush() + @property def hidden(self) -> bool: """Whether this tensor is a hidden tensor.""" @@ -1376,3 +1384,23 @@ def creds_key(self): def invalidate_libdeeplake_dataset(self): """Invalidates the libdeeplake dataset object.""" self.dataset.libdeeplake_dataset = None + + def _check_compatibility_with_htype(self, htype): + """Checks if the tensor is compatible with the given htype. + Raises an error if not compatible. + """ + _validate_htype_exists(htype) + if self.htype not in HTYPE_CONVERSION_LHS: + raise NotImplementedError( + f"Changing the htype of a tensor of htype {self.htype} is not supported." + ) + if htype not in HTYPE_CONSTRAINTS: + raise NotImplementedError( + f"Changing the htype to {htype} is not supported." + ) + if self.meta.sample_compression or self.meta.chunk_compression: + raise NotImplementedError( + "Changing the htype of a compressed tensor is not supported." + ) + constraints = HTYPE_CONSTRAINTS[htype] + constraints(self.shape, self.dtype) diff --git a/deeplake/core/transform/transform.py b/deeplake/core/transform/transform.py index bd863077fa..81423b19d6 100644 --- a/deeplake/core/transform/transform.py +++ b/deeplake/core/transform/transform.py @@ -323,7 +323,7 @@ def my_fn(sample_in: Any, samples_out, my_arg0, my_arg1=0): index=index, sample=sample, samples_processed=samples_processed, - suggest=suggest, + suggest=suggest, ) from e finally: reload_and_rechunk( diff --git a/deeplake/enterprise/dataloader.py b/deeplake/enterprise/dataloader.py index 8692cb74c6..57c59e58ad 100644 --- a/deeplake/enterprise/dataloader.py +++ b/deeplake/enterprise/dataloader.py @@ -199,7 +199,11 @@ def sampler(self): @property def batch_sampler(self): - return BatchSampler(self.sampler, self.batch_size, self.drop_last) if BatchSampler else None + return ( + BatchSampler(self.sampler, self.batch_size, self.drop_last) + if BatchSampler + else None + ) @property def generator(self): diff --git a/deeplake/enterprise/test_pytorch.py b/deeplake/enterprise/test_pytorch.py index 63598e4634..b7888fb316 100644 --- a/deeplake/enterprise/test_pytorch.py +++ b/deeplake/enterprise/test_pytorch.py @@ -81,8 +81,11 @@ def test_setting_woker_init_function(local_auth_ds): @pytest.mark.parametrize( "ds", [ - pytest.param("hub_cloud_ds", marks=[pytest.mark.slow, pytest.mark.skip("Causing lockups")]), - "local_auth_ds" + pytest.param( + "hub_cloud_ds", + marks=[pytest.mark.slow, pytest.mark.skip("Causing lockups")], + ), + "local_auth_ds", ], indirect=True, ) @@ -554,10 +557,13 @@ def test_rename(local_auth_ds): @requires_torch @requires_libdeeplake -@pytest.mark.parametrize("num_workers", [ - 0, - pytest.param(2, marks=pytest.mark.skip(reason="causing lockups")), -]) +@pytest.mark.parametrize( + "num_workers", + [ + 0, + pytest.param(2, marks=pytest.mark.skip(reason="causing lockups")), + ], +) @pytest.mark.slow @pytest.mark.flaky def test_indexes(local_auth_ds, num_workers): @@ -580,10 +586,13 @@ def test_indexes(local_auth_ds, num_workers): @requires_torch @requires_libdeeplake @pytest.mark.slow -@pytest.mark.parametrize("num_workers", [ - 0, - pytest.param(2, marks=pytest.mark.skip("causing lockups")), -]) +@pytest.mark.parametrize( + "num_workers", + [ + 0, + pytest.param(2, marks=pytest.mark.skip("causing lockups")), + ], +) @pytest.mark.flaky def test_indexes_transform(local_auth_ds, num_workers): shuffle = False @@ -611,7 +620,9 @@ def test_indexes_transform(local_auth_ds, num_workers): @requires_torch @requires_libdeeplake -@pytest.mark.parametrize("num_workers", [0, pytest.param(2, marks=pytest.mark.skip("causing lockups"))]) +@pytest.mark.parametrize( + "num_workers", [0, pytest.param(2, marks=pytest.mark.skip("causing lockups"))] +) @pytest.mark.slow @pytest.mark.flaky def test_indexes_transform_dict(local_auth_ds, num_workers): @@ -650,7 +661,9 @@ def test_indexes_transform_dict(local_auth_ds, num_workers): @requires_torch @requires_libdeeplake -@pytest.mark.parametrize("num_workers", [0, pytest.param(2, marks=pytest.mark.skip("causing lockups"))]) +@pytest.mark.parametrize( + "num_workers", [0, pytest.param(2, marks=pytest.mark.skip("causing lockups"))] +) @pytest.mark.slow @pytest.mark.flaky def test_indexes_tensors(local_auth_ds, num_workers): diff --git a/deeplake/enterprise/test_tensorflow.py b/deeplake/enterprise/test_tensorflow.py index 168dac178a..2becb9f9dd 100644 --- a/deeplake/enterprise/test_tensorflow.py +++ b/deeplake/enterprise/test_tensorflow.py @@ -62,9 +62,7 @@ def test_tensorflow_small(local_auth_ds): ds.create_tensor("image2", max_chunk_size=TF_TESTS_MAX_CHUNK_SIZE) ds.image2.extend(np.array([i * np.ones((12, 12)) for i in range(16)])) - if isinstance( - get_base_storage(ds.storage), (MemoryProvider, GCSProvider) - ): + if isinstance(get_base_storage(ds.storage), (MemoryProvider, GCSProvider)): with pytest.raises(ValueError): dl = ds.dataloader() return @@ -130,9 +128,7 @@ def test_tensorflow_transform(local_auth_ds): ds.create_tensor("image2", max_chunk_size=TF_TESTS_MAX_CHUNK_SIZE) ds.image2.extend(np.array([i * np.ones((12, 12)) for i in range(16)])) - if isinstance( - get_base_storage(ds.storage), (MemoryProvider, GCSProvider) - ): + if isinstance(get_base_storage(ds.storage), (MemoryProvider, GCSProvider)): with pytest.raises(ValueError): dl = ds.dataloader() return @@ -167,18 +163,12 @@ def test_tensorflow_transform_dict(local_auth_ds): ds.create_tensor("image3", max_chunk_size=TF_TESTS_MAX_CHUNK_SIZE) ds.image3.extend(np.array([i * np.ones((12, 12)) for i in range(16)])) - if isinstance( - get_base_storage(ds.storage), (MemoryProvider, GCSProvider) - ): + if isinstance(get_base_storage(ds.storage), (MemoryProvider, GCSProvider)): with pytest.raises(ValueError): dl = ds.dataloader() return - dl = ( - ds.dataloader() - .transform({"image": double, "image2": None}) - .tensorflow() - ) + dl = ds.dataloader().transform({"image": double, "image2": None}).tensorflow() assert len(dl.dataset) == 16 @@ -222,9 +212,7 @@ def test_tensorflow_with_compression(local_auth_ds: Dataset): images.extend(np.ones((16, 12, 12, 3), dtype="uint8")) labels.extend(np.ones((16, 1), dtype="uint32")) - if isinstance( - get_base_storage(ds.storage), (MemoryProvider, GCSProvider) - ): + if isinstance(get_base_storage(ds.storage), (MemoryProvider, GCSProvider)): with pytest.raises(ValueError): dl = ds.dataloader() return @@ -250,9 +238,7 @@ def test_custom_tensor_order(local_auth_ds): ds.create_tensor(t, max_chunk_size=TF_TESTS_MAX_CHUNK_SIZE) ds[t].extend(np.random.random((3, 4, 5))) - if isinstance( - get_base_storage(ds.storage), (MemoryProvider, GCSProvider) - ): + if isinstance(get_base_storage(ds.storage), (MemoryProvider, GCSProvider)): with pytest.raises(ValueError): dl = ds.dataloader() return @@ -260,9 +246,7 @@ def test_custom_tensor_order(local_auth_ds): with pytest.raises(TensorDoesNotExistError): dl = ds.dataloader().tensorflow(tensors=["c", "d", "e"]) - dl = ds.dataloader().tensorflow( - tensors=["c", "d", "a"], return_index=False - ) + dl = ds.dataloader().tensorflow(tensors=["c", "d", "a"], return_index=False) for i, batch in enumerate(dl): c1, d1, a1 = batch @@ -334,12 +318,8 @@ def test_groups(local_auth_ds, compressed_image_paths): img1 = deeplake.read(compressed_image_paths["jpeg"][0]) img2 = deeplake.read(compressed_image_paths["png"][0]) with local_auth_ds as ds: - ds.create_tensor( - "images/jpegs/cats", htype="image", sample_compression="jpeg" - ) - ds.create_tensor( - "images/pngs/flowers", htype="image", sample_compression="png" - ) + ds.create_tensor("images/jpegs/cats", htype="image", sample_compression="jpeg") + ds.create_tensor("images/pngs/flowers", htype="image", sample_compression="png") for _ in range(10): ds.images.jpegs.cats.append(img1) ds.images.pngs.flowers.append(img2) @@ -559,10 +539,13 @@ def test_rename(local_auth_ds): @requires_tensorflow @requires_libdeeplake -@pytest.mark.parametrize("num_workers", [ - 0, - pytest.param(2, marks=pytest.mark.skip("causing lockups")), -]) +@pytest.mark.parametrize( + "num_workers", + [ + 0, + pytest.param(2, marks=pytest.mark.skip("causing lockups")), + ], +) @pytest.mark.slow @pytest.mark.flaky def test_indexes(local_auth_ds, num_workers): @@ -573,9 +556,7 @@ def test_indexes(local_auth_ds, num_workers): ds.xyz.append(i * np.ones((2, 2))) ptds = ( - ds.dataloader() - .batch(4) - .tensorflow(num_workers=num_workers, return_index=True) + ds.dataloader().batch(4).tensorflow(num_workers=num_workers, return_index=True) ) if shuffle: ptds = ptds.shuffle() @@ -588,10 +569,13 @@ def test_indexes(local_auth_ds, num_workers): @requires_tensorflow @requires_libdeeplake -@pytest.mark.parametrize("num_workers", [ - 0, - pytest.param(2, marks=pytest.mark.skip("causing lockups")), -]) +@pytest.mark.parametrize( + "num_workers", + [ + 0, + pytest.param(2, marks=pytest.mark.skip("causing lockups")), + ], +) @pytest.mark.slow @pytest.mark.flaky def test_indexes_transform(local_auth_ds, num_workers): @@ -660,10 +644,9 @@ def test_indexes_transform_dict(local_auth_ds, num_workers): @requires_tensorflow @requires_libdeeplake -@pytest.mark.parametrize("num_workers", [ - 0, - pytest.param(2, marks=pytest.mark.skip("causing lockups")) -]) +@pytest.mark.parametrize( + "num_workers", [0, pytest.param(2, marks=pytest.mark.skip("causing lockups"))] +) @pytest.mark.slow @pytest.mark.flaky def test_indexes_tensors(local_auth_ds, num_workers): diff --git a/deeplake/htype.py b/deeplake/htype.py index 4c9abbc854..6c5b40f7e7 100644 --- a/deeplake/htype.py +++ b/deeplake/htype.py @@ -1,4 +1,4 @@ -from typing import Dict +from typing import Callable, Dict from deeplake.compression import ( IMAGE_COMPRESSIONS, VIDEO_COMPRESSIONS, @@ -8,6 +8,7 @@ POINT_CLOUD_COMPRESSIONS, MESH_COMPRESSIONS, ) +from deeplake.util.exceptions import IncompatibleHtypeError class htype: @@ -98,7 +99,92 @@ class htype: htype.INTRINSICS: {"dtype": "float32"}, htype.POLYGON: {"dtype": "float32"}, htype.MESH: {"sample_compression": "ply"}, - htype.EMBEDDING: {}, +} + +HTYPE_CONVERSION_LHS = {htype.DEFAULT, htype.IMAGE} + + +class constraints: + """Constraints for converting a tensor to a htype""" + + ndim_error = ( + lambda htype, ndim: f"Incompatible number of dimensions for htype {htype}: {ndim}" + ) + shape_error = ( + lambda htype, shape: f"Incompatible shape of tensor for htype {htype}: {shape}" + ) + + @staticmethod + def IMAGE(shape, dtype): + if len(shape) not in (3, 4): + raise IncompatibleHtypeError(constraints.ndim_error("image", len(shape))) + if len(shape) == 4 and shape[-1] not in (1, 3, 4): + raise IncompatibleHtypeError(constraints.shape_error("image", shape)) + + @staticmethod + def CLASS_LABEL(shape, dtype): + if len(shape) != 2: + raise IncompatibleHtypeError( + constraints.ndim_error("class_label", len(shape)) + ) + + @staticmethod + def BBOX(shape, dtype): + if len(shape) not in (2, 3): + raise IncompatibleHtypeError(constraints.ndim_error("bbox", len(shape))) + if shape[-1] != 4: + raise IncompatibleHtypeError(constraints.shape_error("bbox", shape)) + + @staticmethod + def BBOX_3D(shape, dtype): + if len(shape) not in (2, 3): + raise IncompatibleHtypeError(constraints.ndim_error("bbox.3d", len(shape))) + if shape[-1] != 8: + raise IncompatibleHtypeError(constraints.shape_error("bbox.3d", shape)) + + EMBEDDING = lambda shape, dtype: True + + @staticmethod + def BINARY_MASK(shape, dtype): + if len(shape) not in (3, 4): + raise IncompatibleHtypeError( + constraints.ndim_error("binary_mask", len(shape)) + ) + + SEGMENT_MASK = BINARY_MASK + + @staticmethod + def KEYPOINTS_COCO(shape, dtype): + if len(shape) != 3: + raise IncompatibleHtypeError( + constraints.ndim_error("keypoints_coco", len(shape)) + ) + if shape[1] % 3 != 0: + raise IncompatibleHtypeError( + constraints.shape_error("keypoints_coco", shape) + ) + + INSTANCE_LABEL = lambda shape, dtype: True + + @staticmethod + def POINT(shape, dtype): + if len(shape) != 3: + raise IncompatibleHtypeError(constraints.ndim_error("point", len(shape))) + if shape[-1] not in (2, 3): + raise IncompatibleHtypeError(constraints.shape_error("point", shape)) + + +HTYPE_CONSTRAINTS: Dict[str, Callable] = { + htype.IMAGE: constraints.IMAGE, + htype.CLASS_LABEL: constraints.CLASS_LABEL, + htype.BBOX: constraints.BBOX, + htype.BBOX_3D: constraints.BBOX_3D, + htype.EMBEDDING: constraints.EMBEDDING, + htype.BINARY_MASK: constraints.BINARY_MASK, + htype.SEGMENT_MASK: constraints.SEGMENT_MASK, + htype.INSTANCE_LABEL: constraints.INSTANCE_LABEL, + htype.KEYPOINTS_COCO: constraints.KEYPOINTS_COCO, + htype.POINT: constraints.POINT, } HTYPE_VERIFICATIONS: Dict[str, Dict] = { diff --git a/deeplake/integrations/tests/test_pytorch.py b/deeplake/integrations/tests/test_pytorch.py index 196ea511eb..bc74930f52 100644 --- a/deeplake/integrations/tests/test_pytorch.py +++ b/deeplake/integrations/tests/test_pytorch.py @@ -602,7 +602,9 @@ def test_pytorch_collate(local_ds, shuffle, buffer_size): @pytest.mark.slow @requires_torch -@pytest.mark.parametrize("shuffle", [True, pytest.param(False, marks=pytest.mark.skip("causing lockups"))]) +@pytest.mark.parametrize( + "shuffle", [True, pytest.param(False, marks=pytest.mark.skip("causing lockups"))] +) @pytest.mark.flaky def test_pytorch_transform_collate(local_ds, shuffle): local_ds.create_tensor("a") diff --git a/deeplake/util/exceptions.py b/deeplake/util/exceptions.py index 5fa8c92403..b501c9f658 100644 --- a/deeplake/util/exceptions.py +++ b/deeplake/util/exceptions.py @@ -1,6 +1,5 @@ import numpy as np import deeplake -from deeplake.htype import HTYPE_CONFIGURATIONS from typing import Any, List, Sequence, Tuple, Optional, Union @@ -483,7 +482,7 @@ def __init__(self, expected: Union[np.dtype, str], actual: str, htype: str): # TODO: we may want to raise this error at the API level to determine if the user explicitly overwrote the `dtype` or not. (to make this error message more precise) # TODO: because if the user uses `dtype=np.uint8`, but the `htype` the tensor is created with has it's default dtype set as `uint8` also, then this message is ambiguous - htype_dtype = HTYPE_CONFIGURATIONS[htype].get("dtype", None) + htype_dtype = deeplake.HTYPE_CONFIGURATIONS[htype].get("dtype", None) if htype_dtype is not None and htype_dtype == expected: msg += f" Htype '{htype}' expects samples to have dtype='{htype_dtype}'." super().__init__("") @@ -1088,3 +1087,8 @@ def __init__(self): "Please either use different embedding function or exclude invalid " "files that are not supported by the embedding function. " ) + + +class IncompatibleHtypeError(Exception): + def __init__(self, msg): + super().__init__(msg) From fd3cafa62b838416b9f79a4a4b83b51accbf21cc Mon Sep 17 00:00:00 2001 From: FayazRahman Date: Fri, 8 Sep 2023 13:27:39 +0530 Subject: [PATCH 2/8] update --- deeplake/core/tensor.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/deeplake/core/tensor.py b/deeplake/core/tensor.py index 2c0c210350..b9327c429f 100644 --- a/deeplake/core/tensor.py +++ b/deeplake/core/tensor.py @@ -42,6 +42,7 @@ TensorDoesNotExistError, InvalidKeyTypeError, TensorAlreadyExistsError, + UnsupportedCompressionError, ) from deeplake.util.iteration_warning import check_if_iteration from deeplake.hooks import dataset_read, dataset_written @@ -63,7 +64,12 @@ parse_mesh_to_dict, get_mesh_vertices, ) -from deeplake.htype import HTYPE_CONVERSION_LHS, HTYPE_CONSTRAINTS +from deeplake.util.htype import parse_complex_htype +from deeplake.htype import ( + HTYPE_CONVERSION_LHS, + HTYPE_CONSTRAINTS, + HTYPE_SUPPORTED_COMPRESSIONS, +) import warnings import webbrowser @@ -1392,6 +1398,9 @@ def _check_compatibility_with_htype(self, htype): """Checks if the tensor is compatible with the given htype. Raises an error if not compatible. """ + is_sequence, is_link, htype = parse_complex_htype(htype) + if is_sequence or is_link: + raise ValueError(f"Cannot change htype to a sequence or link.") _validate_htype_exists(htype) if self.htype not in HTYPE_CONVERSION_LHS: raise NotImplementedError( @@ -1401,9 +1410,10 @@ def _check_compatibility_with_htype(self, htype): raise NotImplementedError( f"Changing the htype to {htype} is not supported." ) - if self.meta.sample_compression or self.meta.chunk_compression: - raise NotImplementedError( - "Changing the htype of a compressed tensor is not supported." - ) + compression = self.meta.sample_compression or self.meta.chunk_compression + if compression: + supported_compressions = HTYPE_SUPPORTED_COMPRESSIONS.get(htype) + if supported_compressions and compression not in supported_compressions: + raise UnsupportedCompressionError(compression, htype) constraints = HTYPE_CONSTRAINTS[htype] constraints(self.shape, self.dtype) From 7e0b7870a346f03dceb0618653b5290b304e04f4 Mon Sep 17 00:00:00 2001 From: FayazRahman Date: Fri, 8 Sep 2023 14:06:53 +0530 Subject: [PATCH 3/8] add dtype constraint for binary mask --- deeplake/htype.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/deeplake/htype.py b/deeplake/htype.py index 6c5b40f7e7..af7840500b 100644 --- a/deeplake/htype.py +++ b/deeplake/htype.py @@ -1,4 +1,6 @@ from typing import Callable, Dict + +import numpy as np from deeplake.compression import ( IMAGE_COMPRESSIONS, VIDEO_COMPRESSIONS, @@ -113,6 +115,9 @@ class constraints: shape_error = ( lambda htype, shape: f"Incompatible shape of tensor for htype {htype}: {shape}" ) + dtype_error = ( + lambda htype, dtype: f"Incompatible dtype of tensor for htype {htype}: {dtype}" + ) @staticmethod def IMAGE(shape, dtype): @@ -150,6 +155,8 @@ def BINARY_MASK(shape, dtype): raise IncompatibleHtypeError( constraints.ndim_error("binary_mask", len(shape)) ) + if dtype != np.dtype("bool"): + raise IncompatibleHtypeError(constraints.dtype_error("binary_mask", dtype)) SEGMENT_MASK = BINARY_MASK From 7ec75ff3da4ee34034b7d49f9f45ead3c9bc5428 Mon Sep 17 00:00:00 2001 From: FayazRahman Date: Fri, 8 Sep 2023 15:20:42 +0530 Subject: [PATCH 4/8] rollback binary mask --- deeplake/core/tensor.py | 2 ++ deeplake/htype.py | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/deeplake/core/tensor.py b/deeplake/core/tensor.py index b9327c429f..94a5fe9023 100644 --- a/deeplake/core/tensor.py +++ b/deeplake/core/tensor.py @@ -566,6 +566,8 @@ def htype(self): def htype(self, value): self._check_compatibility_with_htype(value) self.meta.htype = value + if value == "class_label": + self.meta._disable_temp_transform = False self.meta.is_dirty = True self.dataset.maybe_flush() diff --git a/deeplake/htype.py b/deeplake/htype.py index af7840500b..a33150b8fa 100644 --- a/deeplake/htype.py +++ b/deeplake/htype.py @@ -155,8 +155,6 @@ def BINARY_MASK(shape, dtype): raise IncompatibleHtypeError( constraints.ndim_error("binary_mask", len(shape)) ) - if dtype != np.dtype("bool"): - raise IncompatibleHtypeError(constraints.dtype_error("binary_mask", dtype)) SEGMENT_MASK = BINARY_MASK From 9fd6885d39dd28d0c9b3f3763f871418a4cd96cc Mon Sep 17 00:00:00 2001 From: FayazRahman Date: Fri, 8 Sep 2023 16:23:44 +0530 Subject: [PATCH 5/8] test --- deeplake/api/tests/test_api.py | 124 +++++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) diff --git a/deeplake/api/tests/test_api.py b/deeplake/api/tests/test_api.py index b97e31cc0b..8acba53bb1 100644 --- a/deeplake/api/tests/test_api.py +++ b/deeplake/api/tests/test_api.py @@ -17,6 +17,7 @@ from deeplake.core.storage import GCSProvider from deeplake.util.exceptions import ( GroupInfoNotSupportedError, + IncompatibleHtypeError, InvalidOperationError, SampleAppendError, TensorDoesNotExistError, @@ -2907,3 +2908,126 @@ def test_tensor_extend_ignore(local_ds, lfpw_links, compression_args): # Commit should work ds.commit() + + +def test_change_htype(local_ds_generator): + with local_ds_generator() as ds: + ds.create_tensor("images", sample_compression="jpg") + ds.images.extend(np.random.randint(0, 256, (10, 10, 3), dtype=np.uint8)) + + ds.create_tensor("labels") + ds.labels.extend([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) + + ds.create_tensor("boxes") + ds.boxes.extend(np.random.randn(10, 5, 4)) + + ds.create_tensor("boxes_3d") + ds.boxes_3d.extend(np.random.randn(10, 5, 8)) + + ds.create_tensor("embeddings") + ds.embeddings.extend(np.random.randn(10, 1536)) + + mask = np.zeros((10, 100, 100, 5), dtype=bool) + mask[:, :, :512, 1] = 1 + ds.create_tensor("masks") + ds.masks.extend(mask) + ds.create_tensor("image_masks", htype="image", sample_compression=None) + ds.image_masks.extend(mask) + + ds.create_tensor("keypoints") + ds.keypoints.extend(np.zeros((10, 9, 5))) + + ds.create_tensor("points") + ds.points.extend(np.zeros((10, 5, 3))) + + ds.images.htype = "image" + ds.labels.htype = "class_label" + ds.boxes.htype = "bbox" + ds.boxes_3d.htype = "bbox.3d" + ds.embeddings.htype = "embedding" + ds.masks.htype = "binary_mask" + ds.image_masks.htype = "binary_mask" + ds.keypoints.htype = "keypoints_coco" + ds.points.htype = "point" + + with local_ds_generator() as ds: + assert ds.images.htype == "image" + assert ds.labels.htype == "class_label" + assert ds.boxes.htype == "bbox" + assert ds.boxes_3d.htype == "bbox.3d" + assert ds.embeddings.htype == "embedding" + assert ds.masks.htype == "binary_mask" + assert ds.image_masks.htype == "binary_mask" + assert ds.keypoints.htype == "keypoints_coco" + assert ds.points.htype == "point" + + +def test_change_htype_fail(local_ds_generator): + with local_ds_generator() as ds: + ds.create_tensor("images") + ds.images.extend(np.zeros((10, 5, 5, 5, 5))) + with pytest.raises(IncompatibleHtypeError): + ds.images.htype = "image" + + ds.create_tensor("images2") + ds.images2.extend(np.zeros((10, 5, 5, 6))) + with pytest.raises(IncompatibleHtypeError): + ds.images2.htype = "image" + + ds.create_tensor("labels") + ds.labels.extend(np.ones((10, 5, 5))) + with pytest.raises(IncompatibleHtypeError): + ds.labels.htype = "class_label" + + ds.create_tensor("boxes") + ds.boxes.extend(np.zeros((10, 5, 5, 2))) + with pytest.raises(IncompatibleHtypeError): + ds.boxes.htype = "bbox" + with pytest.raises(IncompatibleHtypeError): + ds.boxes.htype = "bbox.3d" + + ds.create_tensor("boxes2") + ds.boxes2.extend(np.zeros((10, 5, 5))) + with pytest.raises(IncompatibleHtypeError): + ds.boxes2.htype = "bbox" + with pytest.raises(IncompatibleHtypeError): + ds.boxes2.htype = "bbox.3d" + + ds.create_tensor("masks") + ds.masks.extend(np.zeros((10, 5, 5, 5, 5))) + with pytest.raises(IncompatibleHtypeError): + ds.masks.htype = "binary_mask" + + ds.create_tensor("keypoints") + ds.keypoints.extend(np.zeros((10, 5, 5, 5))) + with pytest.raises(IncompatibleHtypeError): + ds.keypoints.htype = "keypoints_coco" + + ds.create_tensor("keypoints2") + ds.keypoints2.extend(np.zeros((10, 10, 5))) + with pytest.raises(IncompatibleHtypeError): + ds.keypoints2.htype = "keypoints_coco" + + ds.create_tensor("points") + ds.points.extend(np.zeros((10, 5, 5, 5))) + with pytest.raises(IncompatibleHtypeError): + ds.points.htype = "point" + + ds.create_tensor("points2") + ds.points2.extend(np.zeros((10, 5, 5))) + with pytest.raises(IncompatibleHtypeError): + ds.points2.htype = "point" + + with pytest.raises(ValueError): + ds.images.htype = "link[image]" + + with pytest.raises(ValueError): + ds.images.htype = "sequence[image]" + + ds.create_tensor("boxes3", htype="bbox") + ds.boxes3.extend(np.zeros((10, 5, 4), dtype=np.float32)) + with pytest.raises(NotImplementedError): + ds.boxes3.htype = "embedding" + + with pytest.raises(NotImplementedError): + ds.images.htype = "text" From d3f0d3604436026b8a9c805159c2b0469412ac16 Mon Sep 17 00:00:00 2001 From: FayazRahman Date: Mon, 11 Sep 2023 13:30:54 +0530 Subject: [PATCH 6/8] rollback --- deeplake/htype.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deeplake/htype.py b/deeplake/htype.py index a33150b8fa..23e7d4c880 100644 --- a/deeplake/htype.py +++ b/deeplake/htype.py @@ -69,7 +69,7 @@ class htype: htype.BBOX: {"dtype": "float32", "coords": {}, "_info": ["coords"]}, htype.BBOX_3D: {"dtype": "float32", "coords": {}, "_info": ["coords"]}, htype.AUDIO: {"dtype": "float64"}, - htype.EMBEDDING: {"dtype": "float32"}, + htype.EMBEDDING: {}, htype.VIDEO: {"dtype": "uint8"}, htype.BINARY_MASK: { "dtype": "bool" From c7162ce13de58d270cce7abc0206eb6f0c27d3f4 Mon Sep 17 00:00:00 2001 From: FayazRahman Date: Mon, 11 Sep 2023 16:47:52 +0530 Subject: [PATCH 7/8] lint --- deeplake/core/dataset/deeplake_query_tensor.py | 4 ++++ deeplake/util/htype.py | 4 +--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/deeplake/core/dataset/deeplake_query_tensor.py b/deeplake/core/dataset/deeplake_query_tensor.py index 422e8c57bf..2ffa695df3 100644 --- a/deeplake/core/dataset/deeplake_query_tensor.py +++ b/deeplake/core/dataset/deeplake_query_tensor.py @@ -106,6 +106,10 @@ def htype(self): htype = f"link[{htype}]" return htype + @htype.setter + def htype(self, value): + raise NotImplementedError("htype of a query tensor cannot be set.") + @property def sample_compression(self): return self.indra_tensor.sample_compression diff --git a/deeplake/util/htype.py b/deeplake/util/htype.py index 5c9b14981b..3aba8f713b 100644 --- a/deeplake/util/htype.py +++ b/deeplake/util/htype.py @@ -1,11 +1,9 @@ -# type: ignore - from typing import Tuple, Optional from deeplake.htype import htype as HTYPE, HTYPE_CONFIGURATIONS from deeplake.util.exceptions import TensorMetaInvalidHtype -def parse_complex_htype(htype: Optional[str]) -> Tuple[bool, bool, str]: +def parse_complex_htype(htype: Optional[str]) -> Tuple[bool, bool, Optional[str]]: is_sequence = False is_link = False From 35dd6cc1c7c50aeb32b2ce73f44332f8f4b77050 Mon Sep 17 00:00:00 2001 From: FayazRahman Date: Thu, 14 Sep 2023 14:08:48 +0530 Subject: [PATCH 8/8] tests --- deeplake/api/tests/test_api.py | 4 ++++ deeplake/htype.py | 7 +++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/deeplake/api/tests/test_api.py b/deeplake/api/tests/test_api.py index 8acba53bb1..91e882870b 100644 --- a/deeplake/api/tests/test_api.py +++ b/deeplake/api/tests/test_api.py @@ -3031,3 +3031,7 @@ def test_change_htype_fail(local_ds_generator): with pytest.raises(NotImplementedError): ds.images.htype = "text" + + ds.create_tensor("images3", htype="image", sample_compression="jpg") + with pytest.raises(UnsupportedCompressionError): + ds.images3.htype = "embedding" diff --git a/deeplake/htype.py b/deeplake/htype.py index 23e7d4c880..fa0fb490f2 100644 --- a/deeplake/htype.py +++ b/deeplake/htype.py @@ -119,6 +119,9 @@ class constraints: lambda htype, dtype: f"Incompatible dtype of tensor for htype {htype}: {dtype}" ) + EMBEDDING = lambda shape, dtype: True + INSTANCE_LABEL = lambda shape, dtype: True + @staticmethod def IMAGE(shape, dtype): if len(shape) not in (3, 4): @@ -147,8 +150,6 @@ def BBOX_3D(shape, dtype): if shape[-1] != 8: raise IncompatibleHtypeError(constraints.shape_error("bbox.3d", shape)) - EMBEDDING = lambda shape, dtype: True - @staticmethod def BINARY_MASK(shape, dtype): if len(shape) not in (3, 4): @@ -169,8 +170,6 @@ def KEYPOINTS_COCO(shape, dtype): constraints.shape_error("keypoints_coco", shape) ) - INSTANCE_LABEL = lambda shape, dtype: True - @staticmethod def POINT(shape, dtype): if len(shape) != 3: