Skip to content

[AL-2405] Add ability to change htype #11057

[AL-2405] Add ability to change htype

[AL-2405] Add ability to change htype #11057

This check has been archived and is scheduled for deletion. Learn more about checks retention
GitHub Actions / JUnit Test Report failed Sep 8, 2023 in 0s

4431 tests run, 2154 passed, 2270 skipped, 7 failed.

Annotations

Check failure on line 685 in deeplake/api/tests/test_api.py

See this annotation in the file changed.

@github-actions github-actions / JUnit Test Report

test_api.test_htype

deeplake.util.exceptions.SampleAppendError: Failed to append a sample to the tensor 'embedding'. See more details in the traceback.
Raw output
self = <deeplake.core.chunk_engine.ChunkEngine object at 0x7fcf9d79a230>
samples = [array([6.01713555e-01, 5.95197424e-01, 7.36985237e-01, 7.25783711e-01,
       2.79171742e-01, 7.97253927e-01, 9.04439....02978856e-01, 9.22093883e-01, 6.37840299e-01,
       9.78959985e-02, 2.60911053e-01, 1.77929683e-01, 6.20053580e-01])]
progressbar = False
link_callback = <bound method Tensor._extend_links of Tensor(key='embedding')>
pg_callback = None, ignore_errors = False

    def extend(
        self,
        samples,
        progressbar: bool = False,
        link_callback: Optional[Callable] = None,
        pg_callback=None,
        ignore_errors: bool = False,
    ):
        try:
            assert not (progressbar and pg_callback)
            self.check_link_ready()
            if not self.write_initialization_done:
                self._write_initialization()
                self.write_initialization_done = True
    
            initial_autoflush = self.cache.autoflush
            self.cache.autoflush = False
            num_samples = self.tensor_length
    
            if self.is_sequence:
                self._extend_sequence(
                    samples, progressbar, link_callback, ignore_errors
                )
            else:
>               verified_samples = self._extend(
                    samples,
                    progressbar,
                    pg_callback=pg_callback,
                    ignore_errors=ignore_errors,
                )

deeplake/core/chunk_engine.py:1157: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/chunk_engine.py:1039: in _extend
    samples = self._samples_to_chunks(
deeplake/core/chunk_engine.py:843: in _samples_to_chunks
    num_samples_added = current_chunk.extend_if_has_space(
deeplake/core/chunk/uncompressed_chunk.py:36: in extend_if_has_space
    return self._extend_if_has_space_list(
deeplake/core/chunk/uncompressed_chunk.py:162: in _extend_if_has_space_list
    serialized_sample, shape = self.serialize_sample(incoming_sample)
deeplake/core/chunk/base_chunk.py:388: in serialize_sample
    incoming_sample, shape = serialize_numpy_and_base_types(
deeplake/core/serialize.py:538: in serialize_numpy_and_base_types
    out = intelligent_cast(incoming_sample, dtype, htype)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

sample = array([6.01713555e-01, 5.95197424e-01, 7.36985237e-01, 7.25783711e-01,
       2.79171742e-01, 7.97253927e-01, 9.044397...9.02978856e-01, 9.22093883e-01, 6.37840299e-01,
       9.78959985e-02, 2.60911053e-01, 1.77929683e-01, 6.20053580e-01])
dtype = 'float32', htype = 'embedding'

    def intelligent_cast(
        sample: Any, dtype: Union[np.dtype, str], htype: str
    ) -> np.ndarray:
        # TODO: docstring (note: sample can be a scalar)/statictyping
        # TODO: implement better casting here
        if isinstance(sample, Sample):
            sample = sample.array
    
        if hasattr(sample, "dtype") and sample.dtype == dtype:
            return sample
    
        err_dtype = get_incompatible_dtype(sample, dtype)
        if err_dtype:
>           raise TensorDtypeMismatchError(
                dtype,
                err_dtype,
                htype,
            )
E           deeplake.util.exceptions.TensorDtypeMismatchError: Dtype was expected to be 'float32' instead it was 'float64'. If you called `create_tensor` explicitly with `dtype`, your samples should also be of that dtype. Htype 'embedding' expects samples to have dtype='float32'.

deeplake/util/casting.py:103: TensorDtypeMismatchError

The above exception was the direct cause of the following exception:

memory_ds = Dataset(path='mem://hub_pytest/test_api/test_htype', tensors=['image', 'bbox', 'label', 'video', 'bin_mask', 'segment_mask', 'keypoints_coco', 'point', 'point_cloud', 'intrinsics', 'embedding'])

    def test_htype(memory_ds: Dataset):
        image = memory_ds.create_tensor("image", htype="image", sample_compression="png")
        bbox = memory_ds.create_tensor("bbox", htype="bbox")
        label = memory_ds.create_tensor(
            "label", htype="class_label", class_names=["a", "b", "c", "d", "e", "f"]
        )
        video = memory_ds.create_tensor("video", htype="video", sample_compression="mkv")
        bin_mask = memory_ds.create_tensor("bin_mask", htype="binary_mask")
        segment_mask = memory_ds.create_tensor(
            "segment_mask", htype="segment_mask", class_names=["a", "b", "c"]
        )
        keypoints_coco = memory_ds.create_tensor(
            "keypoints_coco",
            htype="keypoints_coco",
            keypoints=["arm", "leg", "torso"],
            connections=[[0, 2], [1, 2]],
        )
        point = memory_ds.create_tensor("point", htype="point")
        point_cloud = memory_ds.create_tensor(
            "point_cloud", htype="point_cloud", sample_compression="las"
        )
        intrinsics = memory_ds.create_tensor("intrinsics", htype="intrinsics")
        embedding = memory_ds.create_tensor("embedding", htype="embedding")
    
        image.append(np.ones((28, 28, 3), dtype=np.uint8))
        bbox.append(np.array([1.0, 1.0, 0.0, 0.5], dtype=np.float32))
        # label.append(5)
        label.append(np.array(5, dtype=np.uint32))
        with pytest.raises(SampleAppendError):
            video.append(np.ones((10, 28, 28, 3), dtype=np.uint8))
        bin_mask.append(np.zeros((28, 28), dtype=bool))
        segment_mask.append(np.ones((28, 28), dtype=np.uint32))
        keypoints_coco.append(np.ones((51, 2), dtype=np.int32))
        point.append(np.ones((11, 2), dtype=np.int32))
    
        point_cloud.append(
            deeplake.read(
                os.path.join(get_dummy_data_path("point_cloud"), "point_cloud.las")
            )
        )
        point_cloud_dummy_data_path = pathlib.Path(get_dummy_data_path("point_cloud"))
        point_cloud.append(deeplake.read(point_cloud_dummy_data_path / "point_cloud.las"))
        # Along the first direcection three matrices are concatenated, the first matrix is P,
        # the second one is Tr and the third one is R
        intrinsics.append(np.zeros((3, 4, 4), dtype=np.float32))
>       embedding.append(np.random.rand((100)))

deeplake/api/tests/test_api.py:685: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/util/invalid_view_op.py:22: in inner
    return callable(x, *args, **kwargs)
deeplake/core/tensor.py:414: in append
    self.extend([sample], progressbar=False)
deeplake/util/invalid_view_op.py:22: in inner
    return callable(x, *args, **kwargs)
deeplake/core/tensor.py:325: in extend
    self.chunk_engine.extend(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <deeplake.core.chunk_engine.ChunkEngine object at 0x7fcf9d79a230>
samples = [array([6.01713555e-01, 5.95197424e-01, 7.36985237e-01, 7.25783711e-01,
       2.79171742e-01, 7.97253927e-01, 9.04439....02978856e-01, 9.22093883e-01, 6.37840299e-01,
       9.78959985e-02, 2.60911053e-01, 1.77929683e-01, 6.20053580e-01])]
progressbar = False
link_callback = <bound method Tensor._extend_links of Tensor(key='embedding')>
pg_callback = None, ignore_errors = False

    def extend(
        self,
        samples,
        progressbar: bool = False,
        link_callback: Optional[Callable] = None,
        pg_callback=None,
        ignore_errors: bool = False,
    ):
        try:
            assert not (progressbar and pg_callback)
            self.check_link_ready()
            if not self.write_initialization_done:
                self._write_initialization()
                self.write_initialization_done = True
    
            initial_autoflush = self.cache.autoflush
            self.cache.autoflush = False
            num_samples = self.tensor_length
    
            if self.is_sequence:
                self._extend_sequence(
                    samples, progressbar, link_callback, ignore_errors
                )
            else:
                verified_samples = self._extend(
                    samples,
                    progressbar,
                    pg_callback=pg_callback,
                    ignore_errors=ignore_errors,
                )
                if link_callback:
                    verified_samples = self._prepare_samples_for_link_callback(
                        verified_samples
                    )
                    self._extend_link_callback(
                        link_callback,
                        verified_samples,
                        None,
                        progressbar,
                        ignore_errors,
                    )
    
            self.cache.autoflush = initial_autoflush
            self.cache.maybe_flush()
        except Exception as e:
            num_samples_added = self.tensor_length - num_samples
            for _ in range(num_samples_added):
                self.pop()
>           raise SampleAppendError(self.name) from e
E           deeplake.util.exceptions.SampleAppendError: Failed to append a sample to the tensor 'embedding'. See more details in the traceback.

deeplake/core/chunk_engine.py:1181: SampleAppendError

Check failure on line 2904 in deeplake/api/tests/test_api.py

See this annotation in the file changed.

@github-actions github-actions / JUnit Test Report

test_api.test_tensor_extend_ignore[compression_args0]

Failed: Timeout >60.0s
Raw output
local_ds = Dataset(path='./hub_pytest/test_api/test_tensor_extend_ignore-compression_args0-', tensors=['images', 'tiled_images', 'seq_images', 'link_images'])
lfpw_links = ['https://cm1.theinsider.com/media/0/428/93/spl41194_011.0.0.0x0.636x912.jpeg', 'https://cm1.theinsider.com/media/0/42...ws/FILM_Queen_6.jpg', 'https://img2.timeinc.net/people/i/2008/features/theysaid/080331/kimora_lee_simmons400.jpg', ...]
compression_args = {'sample_compression': None}

    @pytest.mark.parametrize(
        "compression_args",
        [
            {"sample_compression": None},
            {"sample_compression": "jpg"},
            {"chunk_compression": "jpg"},
        ],
    )
    def test_tensor_extend_ignore(local_ds, lfpw_links, compression_args):
        with local_ds as ds:
            ds.create_tensor("images", htype="image", **compression_args)
            ds.create_tensor(
                "tiled_images",
                htype="image",
                tiling_threshold=1 * KB,
                max_chunk_size=1 * KB,
                **compression_args,
            )
            ds.create_tensor("seq_images", htype="sequence[image]", **compression_args)
            ds.create_tensor("link_images", htype="link[image]", **compression_args)
    
        images = [deeplake.read(link) for link in lfpw_links]
        ds.images.extend(images, ignore_errors=True)
        ds.tiled_images.extend(images, ignore_errors=True)
    
        seqs = [
            list(map(deeplake.read, lfpw_links[i : i + 2]))
            for i in range(0, len(lfpw_links), 2)
        ]
>       ds.seq_images.extend(seqs, ignore_errors=True)

deeplake/api/tests/test_api.py:2904: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/util/invalid_view_op.py:22: in inner
    return callable(x, *args, **kwargs)
deeplake/core/tensor.py:325: in extend
    self.chunk_engine.extend(
deeplake/core/chunk_engine.py:1153: in extend
    self._extend_sequence(
deeplake/core/chunk_engine.py:1079: in _extend_sequence
    verified_sample = self._extend(
deeplake/core/chunk_engine.py:1039: in _extend
    samples = self._samples_to_chunks(
deeplake/core/chunk_engine.py:843: in _samples_to_chunks
    num_samples_added = current_chunk.extend_if_has_space(
deeplake/core/chunk/uncompressed_chunk.py:36: in extend_if_has_space
    return self._extend_if_has_space_list(
deeplake/core/chunk/uncompressed_chunk.py:162: in _extend_if_has_space_list
    serialized_sample, shape = self.serialize_sample(incoming_sample)
deeplake/core/chunk/base_chunk.py:354: in serialize_sample
    incoming_sample, shape = serialize_sample_object(  # type: ignore
deeplake/core/serialize.py:610: in serialize_sample_object
    shape = incoming_sample.shape
deeplake/core/sample.py:161: in shape
    self._read_meta()
deeplake/core/sample.py:196: in _read_meta
    f = self._read_from_path()
deeplake/core/sample.py:450: in _read_from_path
    self._buffer = self._read_from_http()
deeplake/core/sample.py:514: in _read_from_http
    result = requests.get(self.path, headers=headers)
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/requests/api.py:73: in get
    return request("get", url, params=params, **kwargs)
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/requests/api.py:59: in request
    return session.request(method=method, url=url, **kwargs)
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/requests/sessions.py:589: in request
    resp = self.send(prep, **send_kwargs)
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/requests/sessions.py:703: in send
    r = adapter.send(request, **kwargs)
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/requests/adapters.py:486: in send
    resp = conn.urlopen(
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/urllib3/connectionpool.py:714: in urlopen
    httplib_response = self._make_request(
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/urllib3/connectionpool.py:403: in _make_request
    self._validate_conn(conn)
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/urllib3/connectionpool.py:1053: in _validate_conn
    conn.connect()
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/urllib3/connection.py:363: in connect
    self.sock = conn = self._new_conn()
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/urllib3/connection.py:174: in _new_conn
    conn = connection.create_connection(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

address = ('cache.thephoenix.com', 443), timeout = None, source_address = None
socket_options = [(6, 1, 1)]

    def create_connection(
        address,
        timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
        source_address=None,
        socket_options=None,
    ):
        """Connect to *address* and return the socket object.
    
        Convenience function.  Connect to *address* (a 2-tuple ``(host,
        port)``) and return the socket object.  Passing the optional
        *timeout* parameter will set the timeout on the socket instance
        before attempting to connect.  If no *timeout* is supplied, the
        global default timeout setting returned by :func:`socket.getdefaulttimeout`
        is used.  If *source_address* is set it must be a tuple of (host, port)
        for the socket to bind as a source address before making the connection.
        An host of '' or port 0 tells the OS to use the default.
        """
    
        host, port = address
        if host.startswith("["):
            host = host.strip("[]")
        err = None
    
        # Using the value from allowed_gai_family() in the context of getaddrinfo lets
        # us select whether to work with IPv4 DNS records, IPv6 records, or both.
        # The original create_connection function always returns all records.
        family = allowed_gai_family()
    
        try:
            host.encode("idna")
        except UnicodeError:
            return six.raise_from(
                LocationParseError(u"'%s', label empty or too long" % host), None
            )
    
        for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
            af, socktype, proto, canonname, sa = res
            sock = None
            try:
                sock = socket.socket(af, socktype, proto)
    
                # If provided, set socket level options before connecting.
                _set_socket_options(sock, socket_options)
    
                if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
                    sock.settimeout(timeout)
                if source_address:
                    sock.bind(source_address)
>               sock.connect(sa)
E               Failed: Timeout >60.0s

/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/urllib3/util/connection.py:85: Failed

Check failure on line 2907 in deeplake/api/tests/test_api.py

See this annotation in the file changed.

@github-actions github-actions / JUnit Test Report

test_api.test_tensor_extend_ignore[compression_args1]

Failed: Timeout >60.0s
Raw output
local_ds = Dataset(path='./hub_pytest/test_api/test_tensor_extend_ignore-compression_args1-', tensors=['images', 'tiled_images', 'seq_images', 'link_images'])
lfpw_links = ['https://cm1.theinsider.com/media/0/428/93/spl41194_011.0.0.0x0.636x912.jpeg', 'https://cm1.theinsider.com/media/0/42...ws/FILM_Queen_6.jpg', 'https://img2.timeinc.net/people/i/2008/features/theysaid/080331/kimora_lee_simmons400.jpg', ...]
compression_args = {'sample_compression': 'jpg'}

    @pytest.mark.parametrize(
        "compression_args",
        [
            {"sample_compression": None},
            {"sample_compression": "jpg"},
            {"chunk_compression": "jpg"},
        ],
    )
    def test_tensor_extend_ignore(local_ds, lfpw_links, compression_args):
        with local_ds as ds:
            ds.create_tensor("images", htype="image", **compression_args)
            ds.create_tensor(
                "tiled_images",
                htype="image",
                tiling_threshold=1 * KB,
                max_chunk_size=1 * KB,
                **compression_args,
            )
            ds.create_tensor("seq_images", htype="sequence[image]", **compression_args)
            ds.create_tensor("link_images", htype="link[image]", **compression_args)
    
        images = [deeplake.read(link) for link in lfpw_links]
        ds.images.extend(images, ignore_errors=True)
        ds.tiled_images.extend(images, ignore_errors=True)
    
        seqs = [
            list(map(deeplake.read, lfpw_links[i : i + 2]))
            for i in range(0, len(lfpw_links), 2)
        ]
        ds.seq_images.extend(seqs, ignore_errors=True)
    
        links = [deeplake.link(link) for link in lfpw_links]
>       ds.link_images.extend(links, ignore_errors=True)

deeplake/api/tests/test_api.py:2907: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/util/invalid_view_op.py:22: in inner
    return callable(x, *args, **kwargs)
deeplake/core/tensor.py:325: in extend
    self.chunk_engine.extend(
deeplake/core/chunk_engine.py:1157: in extend
    verified_samples = self._extend(
deeplake/core/chunk_engine.py:1036: in _extend
    samples, verified_samples = self._sanitize_samples(
deeplake/core/chunk_engine.py:687: in _sanitize_samples
    verified_samples = self.check_each_sample(
deeplake/core/linked_chunk_engine.py:273: in check_each_sample
    read_linked_sample(
deeplake/core/linked_sample.py:29: in read_linked_sample
    return _read_http_linked_sample(
deeplake/core/linked_sample.py:43: in wrapper
    return f(linked_creds, sample_creds_key, *args, **kwargs)
deeplake/core/linked_sample.py:72: in _read_http_linked_sample
    return deeplake.read(sample_path, verify=verify, creds=creds)
deeplake/api/read.py:61: in read
    return Sample(
deeplake/core/sample.py:101: in __init__
    compressed_bytes = self._read_from_path()
deeplake/core/sample.py:450: in _read_from_path
    self._buffer = self._read_from_http()
deeplake/core/sample.py:514: in _read_from_http
    result = requests.get(self.path, headers=headers)
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/requests/api.py:73: in get
    return request("get", url, params=params, **kwargs)
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/requests/api.py:59: in request
    return session.request(method=method, url=url, **kwargs)
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/requests/sessions.py:589: in request
    resp = self.send(prep, **send_kwargs)
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/requests/sessions.py:703: in send
    r = adapter.send(request, **kwargs)
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/requests/adapters.py:486: in send
    resp = conn.urlopen(
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/urllib3/connectionpool.py:714: in urlopen
    httplib_response = self._make_request(
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/urllib3/connectionpool.py:403: in _make_request
    self._validate_conn(conn)
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/urllib3/connectionpool.py:1053: in _validate_conn
    conn.connect()
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/urllib3/connection.py:363: in connect
    self.sock = conn = self._new_conn()
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/urllib3/connection.py:174: in _new_conn
    conn = connection.create_connection(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

address = ('cache.thephoenix.com', 443), timeout = None, source_address = None
socket_options = [(6, 1, 1)]

    def create_connection(
        address,
        timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
        source_address=None,
        socket_options=None,
    ):
        """Connect to *address* and return the socket object.
    
        Convenience function.  Connect to *address* (a 2-tuple ``(host,
        port)``) and return the socket object.  Passing the optional
        *timeout* parameter will set the timeout on the socket instance
        before attempting to connect.  If no *timeout* is supplied, the
        global default timeout setting returned by :func:`socket.getdefaulttimeout`
        is used.  If *source_address* is set it must be a tuple of (host, port)
        for the socket to bind as a source address before making the connection.
        An host of '' or port 0 tells the OS to use the default.
        """
    
        host, port = address
        if host.startswith("["):
            host = host.strip("[]")
        err = None
    
        # Using the value from allowed_gai_family() in the context of getaddrinfo lets
        # us select whether to work with IPv4 DNS records, IPv6 records, or both.
        # The original create_connection function always returns all records.
        family = allowed_gai_family()
    
        try:
            host.encode("idna")
        except UnicodeError:
            return six.raise_from(
                LocationParseError(u"'%s', label empty or too long" % host), None
            )
    
        for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
            af, socktype, proto, canonname, sa = res
            sock = None
            try:
                sock = socket.socket(af, socktype, proto)
    
                # If provided, set socket level options before connecting.
                _set_socket_options(sock, socket_options)
    
                if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
                    sock.settimeout(timeout)
                if source_address:
                    sock.bind(source_address)
>               sock.connect(sa)
E               Failed: Timeout >60.0s

/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/urllib3/util/connection.py:85: Failed

Check failure on line 2904 in deeplake/api/tests/test_api.py

See this annotation in the file changed.

@github-actions github-actions / JUnit Test Report

test_api.test_tensor_extend_ignore[compression_args2]

Failed: Timeout >60.0s
Raw output
local_ds = Dataset(path='./hub_pytest/test_api/test_tensor_extend_ignore-compression_args2-', tensors=['images', 'tiled_images', 'seq_images', 'link_images'])
lfpw_links = ['https://cm1.theinsider.com/media/0/428/93/spl41194_011.0.0.0x0.636x912.jpeg', 'https://cm1.theinsider.com/media/0/42...ws/FILM_Queen_6.jpg', 'https://img2.timeinc.net/people/i/2008/features/theysaid/080331/kimora_lee_simmons400.jpg', ...]
compression_args = {'chunk_compression': 'jpg'}

    @pytest.mark.parametrize(
        "compression_args",
        [
            {"sample_compression": None},
            {"sample_compression": "jpg"},
            {"chunk_compression": "jpg"},
        ],
    )
    def test_tensor_extend_ignore(local_ds, lfpw_links, compression_args):
        with local_ds as ds:
            ds.create_tensor("images", htype="image", **compression_args)
            ds.create_tensor(
                "tiled_images",
                htype="image",
                tiling_threshold=1 * KB,
                max_chunk_size=1 * KB,
                **compression_args,
            )
            ds.create_tensor("seq_images", htype="sequence[image]", **compression_args)
            ds.create_tensor("link_images", htype="link[image]", **compression_args)
    
        images = [deeplake.read(link) for link in lfpw_links]
        ds.images.extend(images, ignore_errors=True)
        ds.tiled_images.extend(images, ignore_errors=True)
    
        seqs = [
            list(map(deeplake.read, lfpw_links[i : i + 2]))
            for i in range(0, len(lfpw_links), 2)
        ]
>       ds.seq_images.extend(seqs, ignore_errors=True)

deeplake/api/tests/test_api.py:2904: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/util/invalid_view_op.py:22: in inner
    return callable(x, *args, **kwargs)
deeplake/core/tensor.py:325: in extend
    self.chunk_engine.extend(
deeplake/core/chunk_engine.py:1153: in extend
    self._extend_sequence(
deeplake/core/chunk_engine.py:1079: in _extend_sequence
    verified_sample = self._extend(
deeplake/core/chunk_engine.py:1039: in _extend
    samples = self._samples_to_chunks(
deeplake/core/chunk_engine.py:843: in _samples_to_chunks
    num_samples_added = current_chunk.extend_if_has_space(
deeplake/core/chunk/chunk_compressed_chunk.py:55: in extend_if_has_space
    return self.extend_if_has_space_image_compression(
deeplake/core/chunk/chunk_compressed_chunk.py:319: in extend_if_has_space_image_compression
    incoming_sample, shape = self.process_sample_img_compr(incoming_sample)
deeplake/core/chunk/chunk_compressed_chunk.py:524: in process_sample_img_compr
    sample = intelligent_cast(sample, self.dtype, self.htype)
deeplake/util/casting.py:96: in intelligent_cast
    sample = sample.array
deeplake/core/sample.py:395: in array
    self._decompress()
deeplake/core/sample.py:339: in _decompress
    compression = self.compression
deeplake/core/sample.py:167: in compression
    self._read_meta()
deeplake/core/sample.py:196: in _read_meta
    f = self._read_from_path()
deeplake/core/sample.py:450: in _read_from_path
    self._buffer = self._read_from_http()
deeplake/core/sample.py:514: in _read_from_http
    result = requests.get(self.path, headers=headers)
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/requests/api.py:73: in get
    return request("get", url, params=params, **kwargs)
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/requests/api.py:59: in request
    return session.request(method=method, url=url, **kwargs)
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/requests/sessions.py:589: in request
    resp = self.send(prep, **send_kwargs)
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/requests/sessions.py:703: in send
    r = adapter.send(request, **kwargs)
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/requests/adapters.py:486: in send
    resp = conn.urlopen(
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/urllib3/connectionpool.py:714: in urlopen
    httplib_response = self._make_request(
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/urllib3/connectionpool.py:403: in _make_request
    self._validate_conn(conn)
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/urllib3/connectionpool.py:1053: in _validate_conn
    conn.connect()
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/urllib3/connection.py:363: in connect
    self.sock = conn = self._new_conn()
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/urllib3/connection.py:174: in _new_conn
    conn = connection.create_connection(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

address = ('cache.thephoenix.com', 443), timeout = None, source_address = None
socket_options = [(6, 1, 1)]

    def create_connection(
        address,
        timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
        source_address=None,
        socket_options=None,
    ):
        """Connect to *address* and return the socket object.
    
        Convenience function.  Connect to *address* (a 2-tuple ``(host,
        port)``) and return the socket object.  Passing the optional
        *timeout* parameter will set the timeout on the socket instance
        before attempting to connect.  If no *timeout* is supplied, the
        global default timeout setting returned by :func:`socket.getdefaulttimeout`
        is used.  If *source_address* is set it must be a tuple of (host, port)
        for the socket to bind as a source address before making the connection.
        An host of '' or port 0 tells the OS to use the default.
        """
    
        host, port = address
        if host.startswith("["):
            host = host.strip("[]")
        err = None
    
        # Using the value from allowed_gai_family() in the context of getaddrinfo lets
        # us select whether to work with IPv4 DNS records, IPv6 records, or both.
        # The original create_connection function always returns all records.
        family = allowed_gai_family()
    
        try:
            host.encode("idna")
        except UnicodeError:
            return six.raise_from(
                LocationParseError(u"'%s', label empty or too long" % host), None
            )
    
        for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
            af, socktype, proto, canonname, sa = res
            sock = None
            try:
                sock = socket.socket(af, socktype, proto)
    
                # If provided, set socket level options before connecting.
                _set_socket_options(sock, socket_options)
    
                if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
                    sock.settimeout(timeout)
                if source_address:
                    sock.bind(source_address)
>               sock.connect(sa)
E               Failed: Timeout >60.0s

/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/urllib3/util/connection.py:85: Failed

Check failure on line 97 in deeplake/core/vectorstore/test_deeplake_vectorstore.py

See this annotation in the file changed.

@github-actions github-actions / JUnit Test Report

test_deeplake_vectorstore.test_id_backward_compatibility

deeplake.util.exceptions.SampleExtendError: Failed to append a sample to the tensor 'embedding'. See more details in the traceback. If you wish to skip the samples that cause errors, please specify `ignore_errors=True`.
Raw output
self = <deeplake.core.chunk_engine.ChunkEngine object at 0x7fd046018d00>
samples = [array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0.,...., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])]
progressbar = False
link_callback = <bound method Tensor._extend_links of Tensor(key='embedding')>
pg_callback = None, ignore_errors = False

    def extend(
        self,
        samples,
        progressbar: bool = False,
        link_callback: Optional[Callable] = None,
        pg_callback=None,
        ignore_errors: bool = False,
    ):
        try:
            assert not (progressbar and pg_callback)
            self.check_link_ready()
            if not self.write_initialization_done:
                self._write_initialization()
                self.write_initialization_done = True
    
            initial_autoflush = self.cache.autoflush
            self.cache.autoflush = False
            num_samples = self.tensor_length
    
            if self.is_sequence:
                self._extend_sequence(
                    samples, progressbar, link_callback, ignore_errors
                )
            else:
>               verified_samples = self._extend(
                    samples,
                    progressbar,
                    pg_callback=pg_callback,
                    ignore_errors=ignore_errors,
                )

deeplake/core/chunk_engine.py:1157: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
deeplake/core/chunk_engine.py:1039: in _extend
    samples = self._samples_to_chunks(
deeplake/core/chunk_engine.py:843: in _samples_to_chunks
    num_samples_added = current_chunk.extend_if_has_space(
deeplake/core/chunk/uncompressed_chunk.py:36: in extend_if_has_space
    return self._extend_if_has_space_list(
deeplake/core/chunk/uncompressed_chunk.py:162: in _extend_if_has_space_list
    serialized_sample, shape = self.serialize_sample(incoming_sample)
deeplake/core/chunk/base_chunk.py:388: in serialize_sample
    incoming_sample, shape = serialize_numpy_and_base_types(
deeplake/core/serialize.py:538: in serialize_numpy_and_base_types
    out = intelligent_cast(incoming_sample, dtype, htype)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

sample = array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., ...0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
dtype = 'float32', htype = 'embedding'

    def intelligent_cast(
        sample: Any, dtype: Union[np.dtype, str], htype: str
    ) -> np.ndarray:
        # TODO: docstring (note: sample can be a scalar)/statictyping
        # TODO: implement better casting here
        if isinstance(sample, Sample):
            sample = sample.array
    
        if hasattr(sample, "dtype") and sample.dtype == dtype:
            return sample
    
        err_dtype = get_incompatible_dtype(sample, dtype)
        if err_dtype:
>           raise TensorDtypeMismatchError(
                dtype,
                err_dtype,
                htype,
            )
E           deeplake.util.exceptions.TensorDtypeMismatchError: Dtype was expected to be 'float32' instead it was 'float64'. If you called `create_tensor` explicitly with `dtype`, your samples should also be of that dtype. Htype 'embedding' expects samples to have dtype='float32'.

deeplake/util/casting.py:103: TensorDtypeMismatchError

The above exception was the direct cause of the following exception:

local_path = './hub_pytest/test_deeplake_vectorstore/test_id_backward_compatibility'

    def test_id_backward_compatibility(local_path):
        num_of_items = 10
        embedding_dim = 100
    
        ids = [f"{i}" for i in range(num_of_items)]
        embedding = [np.zeros(embedding_dim) for i in range(num_of_items)]
        text = ["aadfv" for i in range(num_of_items)]
        metadata = [{"key": i} for i in range(num_of_items)]
    
        ds = deeplake.empty(local_path, overwrite=True)
        ds.create_tensor("ids", htype="text")
        ds.create_tensor("embedding", htype="embedding")
        ds.create_tensor("text", htype="text")
        ds.create_tensor("metadata", htype="json")
    
>       ds.extend(
            {
                "ids": ids,
                "embedding": embedding,
                "text": text,
                "metadata": metadata,
            }
        )

deeplake/core/vectorstore/test_deeplake_vectorstore.py:97: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = Dataset(path='./hub_pytest/test_deeplake_vectorstore/test_id_backward_compatibility', tensors=['ids', 'embedding', 'text', 'metadata'])
samples = {'embedding': [array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0...ey': 2}, {'key': 3}, {'key': 4}, {'key': 5}, ...], 'text': ['aadfv', 'aadfv', 'aadfv', 'aadfv', 'aadfv', 'aadfv', ...]}
skip_ok = False, append_empty = False, ignore_errors = False
progressbar = False

    def extend(
        self,
        samples: Dict[str, Any],
        skip_ok: bool = False,
        append_empty: bool = False,
        ignore_errors: bool = False,
        progressbar: bool = False,
    ):
        """Appends multiple rows of samples to mutliple tensors at once. This method expects all tensors being updated to be of the same length.
    
        Args:
            samples (Dict[str, Any]): Dictionary with tensor names as keys and samples as values.
            skip_ok (bool): Skip tensors not in ``samples`` if set to True.
            append_empty (bool): Append empty samples to tensors not specified in ``sample`` if set to ``True``. If True, ``skip_ok`` is ignored.
            ignore_errors (bool): Skip samples that cause errors while extending, if set to ``True``.
            progressbar (bool): Displays a progress bar if set to ``True``.
    
        Raises:
            KeyError: If any tensor in the dataset is not a key in ``samples`` and ``skip_ok`` is ``False``.
            TensorDoesNotExistError: If tensor in ``samples`` does not exist.
            ValueError: If all tensors being updated are not of the same length.
            NotImplementedError: If an error occurs while writing tiles.
            SampleExtendError: If the extend failed while appending a sample.
            Exception: Error while attempting to rollback appends.
        """
        extend = False
        if isinstance(samples, Dataset):
            samples = samples.tensors
            extend = True
        elif set(map(type, samples.values())) == {np.ndarray}:
            extend = True
        if not samples:
            return
        n = len(samples[next(iter(samples.keys()))])
        for v in samples.values():
            if len(v) != n:
                sizes = {k: len(v) for (k, v) in samples.items()}
                raise ValueError(
                    f"Incoming samples are not of equal lengths. Incoming sample sizes: {sizes}"
                )
        [f() for f in list(self._update_hooks.values())]
        if extend:
            if ignore_errors:
                warnings.warn(
                    "`ignore_errors` argument will be ignored while extending with numpy arrays or tensors."
                )
            return self._append_or_extend(
                samples, extend=True, skip_ok=skip_ok, append_empty=append_empty
            )
        with self:
            if progressbar:
                indices = tqdm(range(n))
            else:
                indices = range(n)
            for i in indices:
                try:
                    self.append(
                        {k: v[i] for k, v in samples.items()},
                        skip_ok=skip_ok,
                        append_empty=append_empty,
                    )
                except Exception as e:
                    if ignore_errors:
                        continue
                    else:
                        if isinstance(e, SampleAppendError):
>                           raise SampleExtendError(str(e)) from e.__cause__
E                           deeplake.util.exceptions.SampleExtendError: Failed to append a sample to the tensor 'embedding'. See more details in the traceback. If you wish to skip the samples that cause errors, please specify `ignore_errors=True`.

deeplake/core/dataset/dataset.py:3142: SampleExtendError

Check failure on line 685 in deeplake/api/tests/test_api.py

See this annotation in the file changed.

@github-actions github-actions / JUnit Test Report

test_api.test_htype

deeplake.util.exceptions.SampleAppendError: Failed to append a sample to the tensor 'embedding'. See more details in the traceback.
Raw output
self = <deeplake.core.chunk_engine.ChunkEngine object at 0x0000024F75589960>
samples = [array([6.01713555e-01, 5.95197424e-01, 7.36985237e-01, 7.25783711e-01,
       2.79171742e-01, 7.97253927e-01, 9.04439....02978856e-01, 9.22093883e-01, 6.37840299e-01,
       9.78959985e-02, 2.60911053e-01, 1.77929683e-01, 6.20053580e-01])]
progressbar = False
link_callback = <bound method Tensor._extend_links of Tensor(key='embedding')>
pg_callback = None, ignore_errors = False

    def extend(
        self,
        samples,
        progressbar: bool = False,
        link_callback: Optional[Callable] = None,
        pg_callback=None,
        ignore_errors: bool = False,
    ):
        try:
            assert not (progressbar and pg_callback)
            self.check_link_ready()
            if not self.write_initialization_done:
                self._write_initialization()
                self.write_initialization_done = True
    
            initial_autoflush = self.cache.autoflush
            self.cache.autoflush = False
            num_samples = self.tensor_length
    
            if self.is_sequence:
                self._extend_sequence(
                    samples, progressbar, link_callback, ignore_errors
                )
            else:
>               verified_samples = self._extend(
                    samples,
                    progressbar,
                    pg_callback=pg_callback,
                    ignore_errors=ignore_errors,
                )

deeplake\core\chunk_engine.py:1157: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake\core\chunk_engine.py:1039: in _extend
    samples = self._samples_to_chunks(
deeplake\core\chunk_engine.py:843: in _samples_to_chunks
    num_samples_added = current_chunk.extend_if_has_space(
deeplake\core\chunk\uncompressed_chunk.py:36: in extend_if_has_space
    return self._extend_if_has_space_list(
deeplake\core\chunk\uncompressed_chunk.py:162: in _extend_if_has_space_list
    serialized_sample, shape = self.serialize_sample(incoming_sample)
deeplake\core\chunk\base_chunk.py:388: in serialize_sample
    incoming_sample, shape = serialize_numpy_and_base_types(
deeplake\core\serialize.py:538: in serialize_numpy_and_base_types
    out = intelligent_cast(incoming_sample, dtype, htype)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

sample = array([6.01713555e-01, 5.95197424e-01, 7.36985237e-01, 7.25783711e-01,
       2.79171742e-01, 7.97253927e-01, 9.044397...9.02978856e-01, 9.22093883e-01, 6.37840299e-01,
       9.78959985e-02, 2.60911053e-01, 1.77929683e-01, 6.20053580e-01])
dtype = 'float32', htype = 'embedding'

    def intelligent_cast(
        sample: Any, dtype: Union[np.dtype, str], htype: str
    ) -> np.ndarray:
        # TODO: docstring (note: sample can be a scalar)/statictyping
        # TODO: implement better casting here
        if isinstance(sample, Sample):
            sample = sample.array
    
        if hasattr(sample, "dtype") and sample.dtype == dtype:
            return sample
    
        err_dtype = get_incompatible_dtype(sample, dtype)
        if err_dtype:
>           raise TensorDtypeMismatchError(
                dtype,
                err_dtype,
                htype,
            )
E           deeplake.util.exceptions.TensorDtypeMismatchError: Dtype was expected to be 'float32' instead it was 'float64'. If you called `create_tensor` explicitly with `dtype`, your samples should also be of that dtype. Htype 'embedding' expects samples to have dtype='float32'.

deeplake\util\casting.py:103: TensorDtypeMismatchError

The above exception was the direct cause of the following exception:

memory_ds = Dataset(path='mem://hub_pytest/test_api/test_htype', tensors=['image', 'bbox', 'label', 'video', 'bin_mask', 'segment_mask', 'keypoints_coco', 'point', 'point_cloud', 'intrinsics', 'embedding'])

    def test_htype(memory_ds: Dataset):
        image = memory_ds.create_tensor("image", htype="image", sample_compression="png")
        bbox = memory_ds.create_tensor("bbox", htype="bbox")
        label = memory_ds.create_tensor(
            "label", htype="class_label", class_names=["a", "b", "c", "d", "e", "f"]
        )
        video = memory_ds.create_tensor("video", htype="video", sample_compression="mkv")
        bin_mask = memory_ds.create_tensor("bin_mask", htype="binary_mask")
        segment_mask = memory_ds.create_tensor(
            "segment_mask", htype="segment_mask", class_names=["a", "b", "c"]
        )
        keypoints_coco = memory_ds.create_tensor(
            "keypoints_coco",
            htype="keypoints_coco",
            keypoints=["arm", "leg", "torso"],
            connections=[[0, 2], [1, 2]],
        )
        point = memory_ds.create_tensor("point", htype="point")
        point_cloud = memory_ds.create_tensor(
            "point_cloud", htype="point_cloud", sample_compression="las"
        )
        intrinsics = memory_ds.create_tensor("intrinsics", htype="intrinsics")
        embedding = memory_ds.create_tensor("embedding", htype="embedding")
    
        image.append(np.ones((28, 28, 3), dtype=np.uint8))
        bbox.append(np.array([1.0, 1.0, 0.0, 0.5], dtype=np.float32))
        # label.append(5)
        label.append(np.array(5, dtype=np.uint32))
        with pytest.raises(SampleAppendError):
            video.append(np.ones((10, 28, 28, 3), dtype=np.uint8))
        bin_mask.append(np.zeros((28, 28), dtype=bool))
        segment_mask.append(np.ones((28, 28), dtype=np.uint32))
        keypoints_coco.append(np.ones((51, 2), dtype=np.int32))
        point.append(np.ones((11, 2), dtype=np.int32))
    
        point_cloud.append(
            deeplake.read(
                os.path.join(get_dummy_data_path("point_cloud"), "point_cloud.las")
            )
        )
        point_cloud_dummy_data_path = pathlib.Path(get_dummy_data_path("point_cloud"))
        point_cloud.append(deeplake.read(point_cloud_dummy_data_path / "point_cloud.las"))
        # Along the first direcection three matrices are concatenated, the first matrix is P,
        # the second one is Tr and the third one is R
        intrinsics.append(np.zeros((3, 4, 4), dtype=np.float32))
>       embedding.append(np.random.rand((100)))

deeplake\api\tests\test_api.py:685: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake\util\invalid_view_op.py:22: in inner
    return callable(x, *args, **kwargs)
deeplake\core\tensor.py:414: in append
    self.extend([sample], progressbar=False)
deeplake\util\invalid_view_op.py:22: in inner
    return callable(x, *args, **kwargs)
deeplake\core\tensor.py:325: in extend
    self.chunk_engine.extend(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

self = <deeplake.core.chunk_engine.ChunkEngine object at 0x0000024F75589960>
samples = [array([6.01713555e-01, 5.95197424e-01, 7.36985237e-01, 7.25783711e-01,
       2.79171742e-01, 7.97253927e-01, 9.04439....02978856e-01, 9.22093883e-01, 6.37840299e-01,
       9.78959985e-02, 2.60911053e-01, 1.77929683e-01, 6.20053580e-01])]
progressbar = False
link_callback = <bound method Tensor._extend_links of Tensor(key='embedding')>
pg_callback = None, ignore_errors = False

    def extend(
        self,
        samples,
        progressbar: bool = False,
        link_callback: Optional[Callable] = None,
        pg_callback=None,
        ignore_errors: bool = False,
    ):
        try:
            assert not (progressbar and pg_callback)
            self.check_link_ready()
            if not self.write_initialization_done:
                self._write_initialization()
                self.write_initialization_done = True
    
            initial_autoflush = self.cache.autoflush
            self.cache.autoflush = False
            num_samples = self.tensor_length
    
            if self.is_sequence:
                self._extend_sequence(
                    samples, progressbar, link_callback, ignore_errors
                )
            else:
                verified_samples = self._extend(
                    samples,
                    progressbar,
                    pg_callback=pg_callback,
                    ignore_errors=ignore_errors,
                )
                if link_callback:
                    verified_samples = self._prepare_samples_for_link_callback(
                        verified_samples
                    )
                    self._extend_link_callback(
                        link_callback,
                        verified_samples,
                        None,
                        progressbar,
                        ignore_errors,
                    )
    
            self.cache.autoflush = initial_autoflush
            self.cache.maybe_flush()
        except Exception as e:
            num_samples_added = self.tensor_length - num_samples
            for _ in range(num_samples_added):
                self.pop()
>           raise SampleAppendError(self.name) from e
E           deeplake.util.exceptions.SampleAppendError: Failed to append a sample to the tensor 'embedding'. See more details in the traceback.

deeplake\core\chunk_engine.py:1181: SampleAppendError

Check failure on line 97 in deeplake/core/vectorstore/test_deeplake_vectorstore.py

See this annotation in the file changed.

@github-actions github-actions / JUnit Test Report

test_deeplake_vectorstore.test_id_backward_compatibility

deeplake.util.exceptions.SampleExtendError: Failed to append a sample to the tensor 'embedding'. See more details in the traceback. If you wish to skip the samples that cause errors, please specify `ignore_errors=True`.
Raw output
self = <deeplake.core.chunk_engine.ChunkEngine object at 0x0000024F75BF2080>
samples = [array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0.,...., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])]
progressbar = False
link_callback = <bound method Tensor._extend_links of Tensor(key='embedding')>
pg_callback = None, ignore_errors = False

    def extend(
        self,
        samples,
        progressbar: bool = False,
        link_callback: Optional[Callable] = None,
        pg_callback=None,
        ignore_errors: bool = False,
    ):
        try:
            assert not (progressbar and pg_callback)
            self.check_link_ready()
            if not self.write_initialization_done:
                self._write_initialization()
                self.write_initialization_done = True
    
            initial_autoflush = self.cache.autoflush
            self.cache.autoflush = False
            num_samples = self.tensor_length
    
            if self.is_sequence:
                self._extend_sequence(
                    samples, progressbar, link_callback, ignore_errors
                )
            else:
>               verified_samples = self._extend(
                    samples,
                    progressbar,
                    pg_callback=pg_callback,
                    ignore_errors=ignore_errors,
                )

deeplake\core\chunk_engine.py:1157: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake\core\chunk_engine.py:1039: in _extend
    samples = self._samples_to_chunks(
deeplake\core\chunk_engine.py:843: in _samples_to_chunks
    num_samples_added = current_chunk.extend_if_has_space(
deeplake\core\chunk\uncompressed_chunk.py:36: in extend_if_has_space
    return self._extend_if_has_space_list(
deeplake\core\chunk\uncompressed_chunk.py:162: in _extend_if_has_space_list
    serialized_sample, shape = self.serialize_sample(incoming_sample)
deeplake\core\chunk\base_chunk.py:388: in serialize_sample
    incoming_sample, shape = serialize_numpy_and_base_types(
deeplake\core\serialize.py:538: in serialize_numpy_and_base_types
    out = intelligent_cast(incoming_sample, dtype, htype)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

sample = array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., ...0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
dtype = 'float32', htype = 'embedding'

    def intelligent_cast(
        sample: Any, dtype: Union[np.dtype, str], htype: str
    ) -> np.ndarray:
        # TODO: docstring (note: sample can be a scalar)/statictyping
        # TODO: implement better casting here
        if isinstance(sample, Sample):
            sample = sample.array
    
        if hasattr(sample, "dtype") and sample.dtype == dtype:
            return sample
    
        err_dtype = get_incompatible_dtype(sample, dtype)
        if err_dtype:
>           raise TensorDtypeMismatchError(
                dtype,
                err_dtype,
                htype,
            )
E           deeplake.util.exceptions.TensorDtypeMismatchError: Dtype was expected to be 'float32' instead it was 'float64'. If you called `create_tensor` explicitly with `dtype`, your samples should also be of that dtype. Htype 'embedding' expects samples to have dtype='float32'.

deeplake\util\casting.py:103: TensorDtypeMismatchError

The above exception was the direct cause of the following exception:

local_path = './hub_pytest/test_deeplake_vectorstore/test_id_backward_compatibility'

    def test_id_backward_compatibility(local_path):
        num_of_items = 10
        embedding_dim = 100
    
        ids = [f"{i}" for i in range(num_of_items)]
        embedding = [np.zeros(embedding_dim) for i in range(num_of_items)]
        text = ["aadfv" for i in range(num_of_items)]
        metadata = [{"key": i} for i in range(num_of_items)]
    
        ds = deeplake.empty(local_path, overwrite=True)
        ds.create_tensor("ids", htype="text")
        ds.create_tensor("embedding", htype="embedding")
        ds.create_tensor("text", htype="text")
        ds.create_tensor("metadata", htype="json")
    
>       ds.extend(
            {
                "ids": ids,
                "embedding": embedding,
                "text": text,
                "metadata": metadata,
            }
        )

deeplake\core\vectorstore\test_deeplake_vectorstore.py:97: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

self = Dataset(path='./hub_pytest/test_deeplake_vectorstore/test_id_backward_compatibility', tensors=['ids', 'embedding', 'text', 'metadata'])
samples = {'embedding': [array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0...ey': 2}, {'key': 3}, {'key': 4}, {'key': 5}, ...], 'text': ['aadfv', 'aadfv', 'aadfv', 'aadfv', 'aadfv', 'aadfv', ...]}
skip_ok = False, append_empty = False, ignore_errors = False
progressbar = False

    def extend(
        self,
        samples: Dict[str, Any],
        skip_ok: bool = False,
        append_empty: bool = False,
        ignore_errors: bool = False,
        progressbar: bool = False,
    ):
        """Appends multiple rows of samples to mutliple tensors at once. This method expects all tensors being updated to be of the same length.
    
        Args:
            samples (Dict[str, Any]): Dictionary with tensor names as keys and samples as values.
            skip_ok (bool): Skip tensors not in ``samples`` if set to True.
            append_empty (bool): Append empty samples to tensors not specified in ``sample`` if set to ``True``. If True, ``skip_ok`` is ignored.
            ignore_errors (bool): Skip samples that cause errors while extending, if set to ``True``.
            progressbar (bool): Displays a progress bar if set to ``True``.
    
        Raises:
            KeyError: If any tensor in the dataset is not a key in ``samples`` and ``skip_ok`` is ``False``.
            TensorDoesNotExistError: If tensor in ``samples`` does not exist.
            ValueError: If all tensors being updated are not of the same length.
            NotImplementedError: If an error occurs while writing tiles.
            SampleExtendError: If the extend failed while appending a sample.
            Exception: Error while attempting to rollback appends.
        """
        extend = False
        if isinstance(samples, Dataset):
            samples = samples.tensors
            extend = True
        elif set(map(type, samples.values())) == {np.ndarray}:
            extend = True
        if not samples:
            return
        n = len(samples[next(iter(samples.keys()))])
        for v in samples.values():
            if len(v) != n:
                sizes = {k: len(v) for (k, v) in samples.items()}
                raise ValueError(
                    f"Incoming samples are not of equal lengths. Incoming sample sizes: {sizes}"
                )
        [f() for f in list(self._update_hooks.values())]
        if extend:
            if ignore_errors:
                warnings.warn(
                    "`ignore_errors` argument will be ignored while extending with numpy arrays or tensors."
                )
            return self._append_or_extend(
                samples, extend=True, skip_ok=skip_ok, append_empty=append_empty
            )
        with self:
            if progressbar:
                indices = tqdm(range(n))
            else:
                indices = range(n)
            for i in indices:
                try:
                    self.append(
                        {k: v[i] for k, v in samples.items()},
                        skip_ok=skip_ok,
                        append_empty=append_empty,
                    )
                except Exception as e:
                    if ignore_errors:
                        continue
                    else:
                        if isinstance(e, SampleAppendError):
>                           raise SampleExtendError(str(e)) from e.__cause__
E                           deeplake.util.exceptions.SampleExtendError: Failed to append a sample to the tensor 'embedding'. See more details in the traceback. If you wish to skip the samples that cause errors, please specify `ignore_errors=True`.

deeplake\core\dataset\dataset.py:3142: SampleExtendError