[AL-2405] Add ability to change htype #11057
4431 tests run, 2154 passed, 2270 skipped, 7 failed.
Annotations
Check failure on line 685 in deeplake/api/tests/test_api.py
github-actions / JUnit Test Report
test_api.test_htype
deeplake.util.exceptions.SampleAppendError: Failed to append a sample to the tensor 'embedding'. See more details in the traceback.
Raw output
self = <deeplake.core.chunk_engine.ChunkEngine object at 0x7fcf9d79a230>
samples = [array([6.01713555e-01, 5.95197424e-01, 7.36985237e-01, 7.25783711e-01,
2.79171742e-01, 7.97253927e-01, 9.04439....02978856e-01, 9.22093883e-01, 6.37840299e-01,
9.78959985e-02, 2.60911053e-01, 1.77929683e-01, 6.20053580e-01])]
progressbar = False
link_callback = <bound method Tensor._extend_links of Tensor(key='embedding')>
pg_callback = None, ignore_errors = False
def extend(
self,
samples,
progressbar: bool = False,
link_callback: Optional[Callable] = None,
pg_callback=None,
ignore_errors: bool = False,
):
try:
assert not (progressbar and pg_callback)
self.check_link_ready()
if not self.write_initialization_done:
self._write_initialization()
self.write_initialization_done = True
initial_autoflush = self.cache.autoflush
self.cache.autoflush = False
num_samples = self.tensor_length
if self.is_sequence:
self._extend_sequence(
samples, progressbar, link_callback, ignore_errors
)
else:
> verified_samples = self._extend(
samples,
progressbar,
pg_callback=pg_callback,
ignore_errors=ignore_errors,
)
deeplake/core/chunk_engine.py:1157:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/chunk_engine.py:1039: in _extend
samples = self._samples_to_chunks(
deeplake/core/chunk_engine.py:843: in _samples_to_chunks
num_samples_added = current_chunk.extend_if_has_space(
deeplake/core/chunk/uncompressed_chunk.py:36: in extend_if_has_space
return self._extend_if_has_space_list(
deeplake/core/chunk/uncompressed_chunk.py:162: in _extend_if_has_space_list
serialized_sample, shape = self.serialize_sample(incoming_sample)
deeplake/core/chunk/base_chunk.py:388: in serialize_sample
incoming_sample, shape = serialize_numpy_and_base_types(
deeplake/core/serialize.py:538: in serialize_numpy_and_base_types
out = intelligent_cast(incoming_sample, dtype, htype)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
sample = array([6.01713555e-01, 5.95197424e-01, 7.36985237e-01, 7.25783711e-01,
2.79171742e-01, 7.97253927e-01, 9.044397...9.02978856e-01, 9.22093883e-01, 6.37840299e-01,
9.78959985e-02, 2.60911053e-01, 1.77929683e-01, 6.20053580e-01])
dtype = 'float32', htype = 'embedding'
def intelligent_cast(
sample: Any, dtype: Union[np.dtype, str], htype: str
) -> np.ndarray:
# TODO: docstring (note: sample can be a scalar)/statictyping
# TODO: implement better casting here
if isinstance(sample, Sample):
sample = sample.array
if hasattr(sample, "dtype") and sample.dtype == dtype:
return sample
err_dtype = get_incompatible_dtype(sample, dtype)
if err_dtype:
> raise TensorDtypeMismatchError(
dtype,
err_dtype,
htype,
)
E deeplake.util.exceptions.TensorDtypeMismatchError: Dtype was expected to be 'float32' instead it was 'float64'. If you called `create_tensor` explicitly with `dtype`, your samples should also be of that dtype. Htype 'embedding' expects samples to have dtype='float32'.
deeplake/util/casting.py:103: TensorDtypeMismatchError
The above exception was the direct cause of the following exception:
memory_ds = Dataset(path='mem://hub_pytest/test_api/test_htype', tensors=['image', 'bbox', 'label', 'video', 'bin_mask', 'segment_mask', 'keypoints_coco', 'point', 'point_cloud', 'intrinsics', 'embedding'])
def test_htype(memory_ds: Dataset):
image = memory_ds.create_tensor("image", htype="image", sample_compression="png")
bbox = memory_ds.create_tensor("bbox", htype="bbox")
label = memory_ds.create_tensor(
"label", htype="class_label", class_names=["a", "b", "c", "d", "e", "f"]
)
video = memory_ds.create_tensor("video", htype="video", sample_compression="mkv")
bin_mask = memory_ds.create_tensor("bin_mask", htype="binary_mask")
segment_mask = memory_ds.create_tensor(
"segment_mask", htype="segment_mask", class_names=["a", "b", "c"]
)
keypoints_coco = memory_ds.create_tensor(
"keypoints_coco",
htype="keypoints_coco",
keypoints=["arm", "leg", "torso"],
connections=[[0, 2], [1, 2]],
)
point = memory_ds.create_tensor("point", htype="point")
point_cloud = memory_ds.create_tensor(
"point_cloud", htype="point_cloud", sample_compression="las"
)
intrinsics = memory_ds.create_tensor("intrinsics", htype="intrinsics")
embedding = memory_ds.create_tensor("embedding", htype="embedding")
image.append(np.ones((28, 28, 3), dtype=np.uint8))
bbox.append(np.array([1.0, 1.0, 0.0, 0.5], dtype=np.float32))
# label.append(5)
label.append(np.array(5, dtype=np.uint32))
with pytest.raises(SampleAppendError):
video.append(np.ones((10, 28, 28, 3), dtype=np.uint8))
bin_mask.append(np.zeros((28, 28), dtype=bool))
segment_mask.append(np.ones((28, 28), dtype=np.uint32))
keypoints_coco.append(np.ones((51, 2), dtype=np.int32))
point.append(np.ones((11, 2), dtype=np.int32))
point_cloud.append(
deeplake.read(
os.path.join(get_dummy_data_path("point_cloud"), "point_cloud.las")
)
)
point_cloud_dummy_data_path = pathlib.Path(get_dummy_data_path("point_cloud"))
point_cloud.append(deeplake.read(point_cloud_dummy_data_path / "point_cloud.las"))
# Along the first direcection three matrices are concatenated, the first matrix is P,
# the second one is Tr and the third one is R
intrinsics.append(np.zeros((3, 4, 4), dtype=np.float32))
> embedding.append(np.random.rand((100)))
deeplake/api/tests/test_api.py:685:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/util/invalid_view_op.py:22: in inner
return callable(x, *args, **kwargs)
deeplake/core/tensor.py:414: in append
self.extend([sample], progressbar=False)
deeplake/util/invalid_view_op.py:22: in inner
return callable(x, *args, **kwargs)
deeplake/core/tensor.py:325: in extend
self.chunk_engine.extend(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <deeplake.core.chunk_engine.ChunkEngine object at 0x7fcf9d79a230>
samples = [array([6.01713555e-01, 5.95197424e-01, 7.36985237e-01, 7.25783711e-01,
2.79171742e-01, 7.97253927e-01, 9.04439....02978856e-01, 9.22093883e-01, 6.37840299e-01,
9.78959985e-02, 2.60911053e-01, 1.77929683e-01, 6.20053580e-01])]
progressbar = False
link_callback = <bound method Tensor._extend_links of Tensor(key='embedding')>
pg_callback = None, ignore_errors = False
def extend(
self,
samples,
progressbar: bool = False,
link_callback: Optional[Callable] = None,
pg_callback=None,
ignore_errors: bool = False,
):
try:
assert not (progressbar and pg_callback)
self.check_link_ready()
if not self.write_initialization_done:
self._write_initialization()
self.write_initialization_done = True
initial_autoflush = self.cache.autoflush
self.cache.autoflush = False
num_samples = self.tensor_length
if self.is_sequence:
self._extend_sequence(
samples, progressbar, link_callback, ignore_errors
)
else:
verified_samples = self._extend(
samples,
progressbar,
pg_callback=pg_callback,
ignore_errors=ignore_errors,
)
if link_callback:
verified_samples = self._prepare_samples_for_link_callback(
verified_samples
)
self._extend_link_callback(
link_callback,
verified_samples,
None,
progressbar,
ignore_errors,
)
self.cache.autoflush = initial_autoflush
self.cache.maybe_flush()
except Exception as e:
num_samples_added = self.tensor_length - num_samples
for _ in range(num_samples_added):
self.pop()
> raise SampleAppendError(self.name) from e
E deeplake.util.exceptions.SampleAppendError: Failed to append a sample to the tensor 'embedding'. See more details in the traceback.
deeplake/core/chunk_engine.py:1181: SampleAppendError
Check failure on line 2904 in deeplake/api/tests/test_api.py
github-actions / JUnit Test Report
test_api.test_tensor_extend_ignore[compression_args0]
Failed: Timeout >60.0s
Raw output
local_ds = Dataset(path='./hub_pytest/test_api/test_tensor_extend_ignore-compression_args0-', tensors=['images', 'tiled_images', 'seq_images', 'link_images'])
lfpw_links = ['https://cm1.theinsider.com/media/0/428/93/spl41194_011.0.0.0x0.636x912.jpeg', 'https://cm1.theinsider.com/media/0/42...ws/FILM_Queen_6.jpg', 'https://img2.timeinc.net/people/i/2008/features/theysaid/080331/kimora_lee_simmons400.jpg', ...]
compression_args = {'sample_compression': None}
@pytest.mark.parametrize(
"compression_args",
[
{"sample_compression": None},
{"sample_compression": "jpg"},
{"chunk_compression": "jpg"},
],
)
def test_tensor_extend_ignore(local_ds, lfpw_links, compression_args):
with local_ds as ds:
ds.create_tensor("images", htype="image", **compression_args)
ds.create_tensor(
"tiled_images",
htype="image",
tiling_threshold=1 * KB,
max_chunk_size=1 * KB,
**compression_args,
)
ds.create_tensor("seq_images", htype="sequence[image]", **compression_args)
ds.create_tensor("link_images", htype="link[image]", **compression_args)
images = [deeplake.read(link) for link in lfpw_links]
ds.images.extend(images, ignore_errors=True)
ds.tiled_images.extend(images, ignore_errors=True)
seqs = [
list(map(deeplake.read, lfpw_links[i : i + 2]))
for i in range(0, len(lfpw_links), 2)
]
> ds.seq_images.extend(seqs, ignore_errors=True)
deeplake/api/tests/test_api.py:2904:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/util/invalid_view_op.py:22: in inner
return callable(x, *args, **kwargs)
deeplake/core/tensor.py:325: in extend
self.chunk_engine.extend(
deeplake/core/chunk_engine.py:1153: in extend
self._extend_sequence(
deeplake/core/chunk_engine.py:1079: in _extend_sequence
verified_sample = self._extend(
deeplake/core/chunk_engine.py:1039: in _extend
samples = self._samples_to_chunks(
deeplake/core/chunk_engine.py:843: in _samples_to_chunks
num_samples_added = current_chunk.extend_if_has_space(
deeplake/core/chunk/uncompressed_chunk.py:36: in extend_if_has_space
return self._extend_if_has_space_list(
deeplake/core/chunk/uncompressed_chunk.py:162: in _extend_if_has_space_list
serialized_sample, shape = self.serialize_sample(incoming_sample)
deeplake/core/chunk/base_chunk.py:354: in serialize_sample
incoming_sample, shape = serialize_sample_object( # type: ignore
deeplake/core/serialize.py:610: in serialize_sample_object
shape = incoming_sample.shape
deeplake/core/sample.py:161: in shape
self._read_meta()
deeplake/core/sample.py:196: in _read_meta
f = self._read_from_path()
deeplake/core/sample.py:450: in _read_from_path
self._buffer = self._read_from_http()
deeplake/core/sample.py:514: in _read_from_http
result = requests.get(self.path, headers=headers)
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/requests/api.py:73: in get
return request("get", url, params=params, **kwargs)
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/requests/api.py:59: in request
return session.request(method=method, url=url, **kwargs)
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/requests/sessions.py:589: in request
resp = self.send(prep, **send_kwargs)
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/requests/sessions.py:703: in send
r = adapter.send(request, **kwargs)
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/requests/adapters.py:486: in send
resp = conn.urlopen(
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/urllib3/connectionpool.py:714: in urlopen
httplib_response = self._make_request(
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/urllib3/connectionpool.py:403: in _make_request
self._validate_conn(conn)
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/urllib3/connectionpool.py:1053: in _validate_conn
conn.connect()
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/urllib3/connection.py:363: in connect
self.sock = conn = self._new_conn()
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/urllib3/connection.py:174: in _new_conn
conn = connection.create_connection(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
address = ('cache.thephoenix.com', 443), timeout = None, source_address = None
socket_options = [(6, 1, 1)]
def create_connection(
address,
timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
source_address=None,
socket_options=None,
):
"""Connect to *address* and return the socket object.
Convenience function. Connect to *address* (a 2-tuple ``(host,
port)``) and return the socket object. Passing the optional
*timeout* parameter will set the timeout on the socket instance
before attempting to connect. If no *timeout* is supplied, the
global default timeout setting returned by :func:`socket.getdefaulttimeout`
is used. If *source_address* is set it must be a tuple of (host, port)
for the socket to bind as a source address before making the connection.
An host of '' or port 0 tells the OS to use the default.
"""
host, port = address
if host.startswith("["):
host = host.strip("[]")
err = None
# Using the value from allowed_gai_family() in the context of getaddrinfo lets
# us select whether to work with IPv4 DNS records, IPv6 records, or both.
# The original create_connection function always returns all records.
family = allowed_gai_family()
try:
host.encode("idna")
except UnicodeError:
return six.raise_from(
LocationParseError(u"'%s', label empty or too long" % host), None
)
for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
af, socktype, proto, canonname, sa = res
sock = None
try:
sock = socket.socket(af, socktype, proto)
# If provided, set socket level options before connecting.
_set_socket_options(sock, socket_options)
if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
sock.settimeout(timeout)
if source_address:
sock.bind(source_address)
> sock.connect(sa)
E Failed: Timeout >60.0s
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/urllib3/util/connection.py:85: Failed
Check failure on line 2907 in deeplake/api/tests/test_api.py
github-actions / JUnit Test Report
test_api.test_tensor_extend_ignore[compression_args1]
Failed: Timeout >60.0s
Raw output
local_ds = Dataset(path='./hub_pytest/test_api/test_tensor_extend_ignore-compression_args1-', tensors=['images', 'tiled_images', 'seq_images', 'link_images'])
lfpw_links = ['https://cm1.theinsider.com/media/0/428/93/spl41194_011.0.0.0x0.636x912.jpeg', 'https://cm1.theinsider.com/media/0/42...ws/FILM_Queen_6.jpg', 'https://img2.timeinc.net/people/i/2008/features/theysaid/080331/kimora_lee_simmons400.jpg', ...]
compression_args = {'sample_compression': 'jpg'}
@pytest.mark.parametrize(
"compression_args",
[
{"sample_compression": None},
{"sample_compression": "jpg"},
{"chunk_compression": "jpg"},
],
)
def test_tensor_extend_ignore(local_ds, lfpw_links, compression_args):
with local_ds as ds:
ds.create_tensor("images", htype="image", **compression_args)
ds.create_tensor(
"tiled_images",
htype="image",
tiling_threshold=1 * KB,
max_chunk_size=1 * KB,
**compression_args,
)
ds.create_tensor("seq_images", htype="sequence[image]", **compression_args)
ds.create_tensor("link_images", htype="link[image]", **compression_args)
images = [deeplake.read(link) for link in lfpw_links]
ds.images.extend(images, ignore_errors=True)
ds.tiled_images.extend(images, ignore_errors=True)
seqs = [
list(map(deeplake.read, lfpw_links[i : i + 2]))
for i in range(0, len(lfpw_links), 2)
]
ds.seq_images.extend(seqs, ignore_errors=True)
links = [deeplake.link(link) for link in lfpw_links]
> ds.link_images.extend(links, ignore_errors=True)
deeplake/api/tests/test_api.py:2907:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/util/invalid_view_op.py:22: in inner
return callable(x, *args, **kwargs)
deeplake/core/tensor.py:325: in extend
self.chunk_engine.extend(
deeplake/core/chunk_engine.py:1157: in extend
verified_samples = self._extend(
deeplake/core/chunk_engine.py:1036: in _extend
samples, verified_samples = self._sanitize_samples(
deeplake/core/chunk_engine.py:687: in _sanitize_samples
verified_samples = self.check_each_sample(
deeplake/core/linked_chunk_engine.py:273: in check_each_sample
read_linked_sample(
deeplake/core/linked_sample.py:29: in read_linked_sample
return _read_http_linked_sample(
deeplake/core/linked_sample.py:43: in wrapper
return f(linked_creds, sample_creds_key, *args, **kwargs)
deeplake/core/linked_sample.py:72: in _read_http_linked_sample
return deeplake.read(sample_path, verify=verify, creds=creds)
deeplake/api/read.py:61: in read
return Sample(
deeplake/core/sample.py:101: in __init__
compressed_bytes = self._read_from_path()
deeplake/core/sample.py:450: in _read_from_path
self._buffer = self._read_from_http()
deeplake/core/sample.py:514: in _read_from_http
result = requests.get(self.path, headers=headers)
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/requests/api.py:73: in get
return request("get", url, params=params, **kwargs)
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/requests/api.py:59: in request
return session.request(method=method, url=url, **kwargs)
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/requests/sessions.py:589: in request
resp = self.send(prep, **send_kwargs)
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/requests/sessions.py:703: in send
r = adapter.send(request, **kwargs)
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/requests/adapters.py:486: in send
resp = conn.urlopen(
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/urllib3/connectionpool.py:714: in urlopen
httplib_response = self._make_request(
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/urllib3/connectionpool.py:403: in _make_request
self._validate_conn(conn)
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/urllib3/connectionpool.py:1053: in _validate_conn
conn.connect()
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/urllib3/connection.py:363: in connect
self.sock = conn = self._new_conn()
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/urllib3/connection.py:174: in _new_conn
conn = connection.create_connection(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
address = ('cache.thephoenix.com', 443), timeout = None, source_address = None
socket_options = [(6, 1, 1)]
def create_connection(
address,
timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
source_address=None,
socket_options=None,
):
"""Connect to *address* and return the socket object.
Convenience function. Connect to *address* (a 2-tuple ``(host,
port)``) and return the socket object. Passing the optional
*timeout* parameter will set the timeout on the socket instance
before attempting to connect. If no *timeout* is supplied, the
global default timeout setting returned by :func:`socket.getdefaulttimeout`
is used. If *source_address* is set it must be a tuple of (host, port)
for the socket to bind as a source address before making the connection.
An host of '' or port 0 tells the OS to use the default.
"""
host, port = address
if host.startswith("["):
host = host.strip("[]")
err = None
# Using the value from allowed_gai_family() in the context of getaddrinfo lets
# us select whether to work with IPv4 DNS records, IPv6 records, or both.
# The original create_connection function always returns all records.
family = allowed_gai_family()
try:
host.encode("idna")
except UnicodeError:
return six.raise_from(
LocationParseError(u"'%s', label empty or too long" % host), None
)
for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
af, socktype, proto, canonname, sa = res
sock = None
try:
sock = socket.socket(af, socktype, proto)
# If provided, set socket level options before connecting.
_set_socket_options(sock, socket_options)
if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
sock.settimeout(timeout)
if source_address:
sock.bind(source_address)
> sock.connect(sa)
E Failed: Timeout >60.0s
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/urllib3/util/connection.py:85: Failed
Check failure on line 2904 in deeplake/api/tests/test_api.py
github-actions / JUnit Test Report
test_api.test_tensor_extend_ignore[compression_args2]
Failed: Timeout >60.0s
Raw output
local_ds = Dataset(path='./hub_pytest/test_api/test_tensor_extend_ignore-compression_args2-', tensors=['images', 'tiled_images', 'seq_images', 'link_images'])
lfpw_links = ['https://cm1.theinsider.com/media/0/428/93/spl41194_011.0.0.0x0.636x912.jpeg', 'https://cm1.theinsider.com/media/0/42...ws/FILM_Queen_6.jpg', 'https://img2.timeinc.net/people/i/2008/features/theysaid/080331/kimora_lee_simmons400.jpg', ...]
compression_args = {'chunk_compression': 'jpg'}
@pytest.mark.parametrize(
"compression_args",
[
{"sample_compression": None},
{"sample_compression": "jpg"},
{"chunk_compression": "jpg"},
],
)
def test_tensor_extend_ignore(local_ds, lfpw_links, compression_args):
with local_ds as ds:
ds.create_tensor("images", htype="image", **compression_args)
ds.create_tensor(
"tiled_images",
htype="image",
tiling_threshold=1 * KB,
max_chunk_size=1 * KB,
**compression_args,
)
ds.create_tensor("seq_images", htype="sequence[image]", **compression_args)
ds.create_tensor("link_images", htype="link[image]", **compression_args)
images = [deeplake.read(link) for link in lfpw_links]
ds.images.extend(images, ignore_errors=True)
ds.tiled_images.extend(images, ignore_errors=True)
seqs = [
list(map(deeplake.read, lfpw_links[i : i + 2]))
for i in range(0, len(lfpw_links), 2)
]
> ds.seq_images.extend(seqs, ignore_errors=True)
deeplake/api/tests/test_api.py:2904:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/util/invalid_view_op.py:22: in inner
return callable(x, *args, **kwargs)
deeplake/core/tensor.py:325: in extend
self.chunk_engine.extend(
deeplake/core/chunk_engine.py:1153: in extend
self._extend_sequence(
deeplake/core/chunk_engine.py:1079: in _extend_sequence
verified_sample = self._extend(
deeplake/core/chunk_engine.py:1039: in _extend
samples = self._samples_to_chunks(
deeplake/core/chunk_engine.py:843: in _samples_to_chunks
num_samples_added = current_chunk.extend_if_has_space(
deeplake/core/chunk/chunk_compressed_chunk.py:55: in extend_if_has_space
return self.extend_if_has_space_image_compression(
deeplake/core/chunk/chunk_compressed_chunk.py:319: in extend_if_has_space_image_compression
incoming_sample, shape = self.process_sample_img_compr(incoming_sample)
deeplake/core/chunk/chunk_compressed_chunk.py:524: in process_sample_img_compr
sample = intelligent_cast(sample, self.dtype, self.htype)
deeplake/util/casting.py:96: in intelligent_cast
sample = sample.array
deeplake/core/sample.py:395: in array
self._decompress()
deeplake/core/sample.py:339: in _decompress
compression = self.compression
deeplake/core/sample.py:167: in compression
self._read_meta()
deeplake/core/sample.py:196: in _read_meta
f = self._read_from_path()
deeplake/core/sample.py:450: in _read_from_path
self._buffer = self._read_from_http()
deeplake/core/sample.py:514: in _read_from_http
result = requests.get(self.path, headers=headers)
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/requests/api.py:73: in get
return request("get", url, params=params, **kwargs)
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/requests/api.py:59: in request
return session.request(method=method, url=url, **kwargs)
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/requests/sessions.py:589: in request
resp = self.send(prep, **send_kwargs)
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/requests/sessions.py:703: in send
r = adapter.send(request, **kwargs)
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/requests/adapters.py:486: in send
resp = conn.urlopen(
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/urllib3/connectionpool.py:714: in urlopen
httplib_response = self._make_request(
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/urllib3/connectionpool.py:403: in _make_request
self._validate_conn(conn)
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/urllib3/connectionpool.py:1053: in _validate_conn
conn.connect()
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/urllib3/connection.py:363: in connect
self.sock = conn = self._new_conn()
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/urllib3/connection.py:174: in _new_conn
conn = connection.create_connection(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
address = ('cache.thephoenix.com', 443), timeout = None, source_address = None
socket_options = [(6, 1, 1)]
def create_connection(
address,
timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
source_address=None,
socket_options=None,
):
"""Connect to *address* and return the socket object.
Convenience function. Connect to *address* (a 2-tuple ``(host,
port)``) and return the socket object. Passing the optional
*timeout* parameter will set the timeout on the socket instance
before attempting to connect. If no *timeout* is supplied, the
global default timeout setting returned by :func:`socket.getdefaulttimeout`
is used. If *source_address* is set it must be a tuple of (host, port)
for the socket to bind as a source address before making the connection.
An host of '' or port 0 tells the OS to use the default.
"""
host, port = address
if host.startswith("["):
host = host.strip("[]")
err = None
# Using the value from allowed_gai_family() in the context of getaddrinfo lets
# us select whether to work with IPv4 DNS records, IPv6 records, or both.
# The original create_connection function always returns all records.
family = allowed_gai_family()
try:
host.encode("idna")
except UnicodeError:
return six.raise_from(
LocationParseError(u"'%s', label empty or too long" % host), None
)
for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
af, socktype, proto, canonname, sa = res
sock = None
try:
sock = socket.socket(af, socktype, proto)
# If provided, set socket level options before connecting.
_set_socket_options(sock, socket_options)
if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
sock.settimeout(timeout)
if source_address:
sock.bind(source_address)
> sock.connect(sa)
E Failed: Timeout >60.0s
/opt/hostedtoolcache/Python/3.10.13/x64/lib/python3.10/site-packages/urllib3/util/connection.py:85: Failed
Check failure on line 97 in deeplake/core/vectorstore/test_deeplake_vectorstore.py
github-actions / JUnit Test Report
test_deeplake_vectorstore.test_id_backward_compatibility
deeplake.util.exceptions.SampleExtendError: Failed to append a sample to the tensor 'embedding'. See more details in the traceback. If you wish to skip the samples that cause errors, please specify `ignore_errors=True`.
Raw output
self = <deeplake.core.chunk_engine.ChunkEngine object at 0x7fd046018d00>
samples = [array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0.,...., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])]
progressbar = False
link_callback = <bound method Tensor._extend_links of Tensor(key='embedding')>
pg_callback = None, ignore_errors = False
def extend(
self,
samples,
progressbar: bool = False,
link_callback: Optional[Callable] = None,
pg_callback=None,
ignore_errors: bool = False,
):
try:
assert not (progressbar and pg_callback)
self.check_link_ready()
if not self.write_initialization_done:
self._write_initialization()
self.write_initialization_done = True
initial_autoflush = self.cache.autoflush
self.cache.autoflush = False
num_samples = self.tensor_length
if self.is_sequence:
self._extend_sequence(
samples, progressbar, link_callback, ignore_errors
)
else:
> verified_samples = self._extend(
samples,
progressbar,
pg_callback=pg_callback,
ignore_errors=ignore_errors,
)
deeplake/core/chunk_engine.py:1157:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake/core/chunk_engine.py:1039: in _extend
samples = self._samples_to_chunks(
deeplake/core/chunk_engine.py:843: in _samples_to_chunks
num_samples_added = current_chunk.extend_if_has_space(
deeplake/core/chunk/uncompressed_chunk.py:36: in extend_if_has_space
return self._extend_if_has_space_list(
deeplake/core/chunk/uncompressed_chunk.py:162: in _extend_if_has_space_list
serialized_sample, shape = self.serialize_sample(incoming_sample)
deeplake/core/chunk/base_chunk.py:388: in serialize_sample
incoming_sample, shape = serialize_numpy_and_base_types(
deeplake/core/serialize.py:538: in serialize_numpy_and_base_types
out = intelligent_cast(incoming_sample, dtype, htype)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
sample = array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., ...0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
dtype = 'float32', htype = 'embedding'
def intelligent_cast(
sample: Any, dtype: Union[np.dtype, str], htype: str
) -> np.ndarray:
# TODO: docstring (note: sample can be a scalar)/statictyping
# TODO: implement better casting here
if isinstance(sample, Sample):
sample = sample.array
if hasattr(sample, "dtype") and sample.dtype == dtype:
return sample
err_dtype = get_incompatible_dtype(sample, dtype)
if err_dtype:
> raise TensorDtypeMismatchError(
dtype,
err_dtype,
htype,
)
E deeplake.util.exceptions.TensorDtypeMismatchError: Dtype was expected to be 'float32' instead it was 'float64'. If you called `create_tensor` explicitly with `dtype`, your samples should also be of that dtype. Htype 'embedding' expects samples to have dtype='float32'.
deeplake/util/casting.py:103: TensorDtypeMismatchError
The above exception was the direct cause of the following exception:
local_path = './hub_pytest/test_deeplake_vectorstore/test_id_backward_compatibility'
def test_id_backward_compatibility(local_path):
num_of_items = 10
embedding_dim = 100
ids = [f"{i}" for i in range(num_of_items)]
embedding = [np.zeros(embedding_dim) for i in range(num_of_items)]
text = ["aadfv" for i in range(num_of_items)]
metadata = [{"key": i} for i in range(num_of_items)]
ds = deeplake.empty(local_path, overwrite=True)
ds.create_tensor("ids", htype="text")
ds.create_tensor("embedding", htype="embedding")
ds.create_tensor("text", htype="text")
ds.create_tensor("metadata", htype="json")
> ds.extend(
{
"ids": ids,
"embedding": embedding,
"text": text,
"metadata": metadata,
}
)
deeplake/core/vectorstore/test_deeplake_vectorstore.py:97:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = Dataset(path='./hub_pytest/test_deeplake_vectorstore/test_id_backward_compatibility', tensors=['ids', 'embedding', 'text', 'metadata'])
samples = {'embedding': [array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0...ey': 2}, {'key': 3}, {'key': 4}, {'key': 5}, ...], 'text': ['aadfv', 'aadfv', 'aadfv', 'aadfv', 'aadfv', 'aadfv', ...]}
skip_ok = False, append_empty = False, ignore_errors = False
progressbar = False
def extend(
self,
samples: Dict[str, Any],
skip_ok: bool = False,
append_empty: bool = False,
ignore_errors: bool = False,
progressbar: bool = False,
):
"""Appends multiple rows of samples to mutliple tensors at once. This method expects all tensors being updated to be of the same length.
Args:
samples (Dict[str, Any]): Dictionary with tensor names as keys and samples as values.
skip_ok (bool): Skip tensors not in ``samples`` if set to True.
append_empty (bool): Append empty samples to tensors not specified in ``sample`` if set to ``True``. If True, ``skip_ok`` is ignored.
ignore_errors (bool): Skip samples that cause errors while extending, if set to ``True``.
progressbar (bool): Displays a progress bar if set to ``True``.
Raises:
KeyError: If any tensor in the dataset is not a key in ``samples`` and ``skip_ok`` is ``False``.
TensorDoesNotExistError: If tensor in ``samples`` does not exist.
ValueError: If all tensors being updated are not of the same length.
NotImplementedError: If an error occurs while writing tiles.
SampleExtendError: If the extend failed while appending a sample.
Exception: Error while attempting to rollback appends.
"""
extend = False
if isinstance(samples, Dataset):
samples = samples.tensors
extend = True
elif set(map(type, samples.values())) == {np.ndarray}:
extend = True
if not samples:
return
n = len(samples[next(iter(samples.keys()))])
for v in samples.values():
if len(v) != n:
sizes = {k: len(v) for (k, v) in samples.items()}
raise ValueError(
f"Incoming samples are not of equal lengths. Incoming sample sizes: {sizes}"
)
[f() for f in list(self._update_hooks.values())]
if extend:
if ignore_errors:
warnings.warn(
"`ignore_errors` argument will be ignored while extending with numpy arrays or tensors."
)
return self._append_or_extend(
samples, extend=True, skip_ok=skip_ok, append_empty=append_empty
)
with self:
if progressbar:
indices = tqdm(range(n))
else:
indices = range(n)
for i in indices:
try:
self.append(
{k: v[i] for k, v in samples.items()},
skip_ok=skip_ok,
append_empty=append_empty,
)
except Exception as e:
if ignore_errors:
continue
else:
if isinstance(e, SampleAppendError):
> raise SampleExtendError(str(e)) from e.__cause__
E deeplake.util.exceptions.SampleExtendError: Failed to append a sample to the tensor 'embedding'. See more details in the traceback. If you wish to skip the samples that cause errors, please specify `ignore_errors=True`.
deeplake/core/dataset/dataset.py:3142: SampleExtendError
Check failure on line 685 in deeplake/api/tests/test_api.py
github-actions / JUnit Test Report
test_api.test_htype
deeplake.util.exceptions.SampleAppendError: Failed to append a sample to the tensor 'embedding'. See more details in the traceback.
Raw output
self = <deeplake.core.chunk_engine.ChunkEngine object at 0x0000024F75589960>
samples = [array([6.01713555e-01, 5.95197424e-01, 7.36985237e-01, 7.25783711e-01,
2.79171742e-01, 7.97253927e-01, 9.04439....02978856e-01, 9.22093883e-01, 6.37840299e-01,
9.78959985e-02, 2.60911053e-01, 1.77929683e-01, 6.20053580e-01])]
progressbar = False
link_callback = <bound method Tensor._extend_links of Tensor(key='embedding')>
pg_callback = None, ignore_errors = False
def extend(
self,
samples,
progressbar: bool = False,
link_callback: Optional[Callable] = None,
pg_callback=None,
ignore_errors: bool = False,
):
try:
assert not (progressbar and pg_callback)
self.check_link_ready()
if not self.write_initialization_done:
self._write_initialization()
self.write_initialization_done = True
initial_autoflush = self.cache.autoflush
self.cache.autoflush = False
num_samples = self.tensor_length
if self.is_sequence:
self._extend_sequence(
samples, progressbar, link_callback, ignore_errors
)
else:
> verified_samples = self._extend(
samples,
progressbar,
pg_callback=pg_callback,
ignore_errors=ignore_errors,
)
deeplake\core\chunk_engine.py:1157:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake\core\chunk_engine.py:1039: in _extend
samples = self._samples_to_chunks(
deeplake\core\chunk_engine.py:843: in _samples_to_chunks
num_samples_added = current_chunk.extend_if_has_space(
deeplake\core\chunk\uncompressed_chunk.py:36: in extend_if_has_space
return self._extend_if_has_space_list(
deeplake\core\chunk\uncompressed_chunk.py:162: in _extend_if_has_space_list
serialized_sample, shape = self.serialize_sample(incoming_sample)
deeplake\core\chunk\base_chunk.py:388: in serialize_sample
incoming_sample, shape = serialize_numpy_and_base_types(
deeplake\core\serialize.py:538: in serialize_numpy_and_base_types
out = intelligent_cast(incoming_sample, dtype, htype)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
sample = array([6.01713555e-01, 5.95197424e-01, 7.36985237e-01, 7.25783711e-01,
2.79171742e-01, 7.97253927e-01, 9.044397...9.02978856e-01, 9.22093883e-01, 6.37840299e-01,
9.78959985e-02, 2.60911053e-01, 1.77929683e-01, 6.20053580e-01])
dtype = 'float32', htype = 'embedding'
def intelligent_cast(
sample: Any, dtype: Union[np.dtype, str], htype: str
) -> np.ndarray:
# TODO: docstring (note: sample can be a scalar)/statictyping
# TODO: implement better casting here
if isinstance(sample, Sample):
sample = sample.array
if hasattr(sample, "dtype") and sample.dtype == dtype:
return sample
err_dtype = get_incompatible_dtype(sample, dtype)
if err_dtype:
> raise TensorDtypeMismatchError(
dtype,
err_dtype,
htype,
)
E deeplake.util.exceptions.TensorDtypeMismatchError: Dtype was expected to be 'float32' instead it was 'float64'. If you called `create_tensor` explicitly with `dtype`, your samples should also be of that dtype. Htype 'embedding' expects samples to have dtype='float32'.
deeplake\util\casting.py:103: TensorDtypeMismatchError
The above exception was the direct cause of the following exception:
memory_ds = Dataset(path='mem://hub_pytest/test_api/test_htype', tensors=['image', 'bbox', 'label', 'video', 'bin_mask', 'segment_mask', 'keypoints_coco', 'point', 'point_cloud', 'intrinsics', 'embedding'])
def test_htype(memory_ds: Dataset):
image = memory_ds.create_tensor("image", htype="image", sample_compression="png")
bbox = memory_ds.create_tensor("bbox", htype="bbox")
label = memory_ds.create_tensor(
"label", htype="class_label", class_names=["a", "b", "c", "d", "e", "f"]
)
video = memory_ds.create_tensor("video", htype="video", sample_compression="mkv")
bin_mask = memory_ds.create_tensor("bin_mask", htype="binary_mask")
segment_mask = memory_ds.create_tensor(
"segment_mask", htype="segment_mask", class_names=["a", "b", "c"]
)
keypoints_coco = memory_ds.create_tensor(
"keypoints_coco",
htype="keypoints_coco",
keypoints=["arm", "leg", "torso"],
connections=[[0, 2], [1, 2]],
)
point = memory_ds.create_tensor("point", htype="point")
point_cloud = memory_ds.create_tensor(
"point_cloud", htype="point_cloud", sample_compression="las"
)
intrinsics = memory_ds.create_tensor("intrinsics", htype="intrinsics")
embedding = memory_ds.create_tensor("embedding", htype="embedding")
image.append(np.ones((28, 28, 3), dtype=np.uint8))
bbox.append(np.array([1.0, 1.0, 0.0, 0.5], dtype=np.float32))
# label.append(5)
label.append(np.array(5, dtype=np.uint32))
with pytest.raises(SampleAppendError):
video.append(np.ones((10, 28, 28, 3), dtype=np.uint8))
bin_mask.append(np.zeros((28, 28), dtype=bool))
segment_mask.append(np.ones((28, 28), dtype=np.uint32))
keypoints_coco.append(np.ones((51, 2), dtype=np.int32))
point.append(np.ones((11, 2), dtype=np.int32))
point_cloud.append(
deeplake.read(
os.path.join(get_dummy_data_path("point_cloud"), "point_cloud.las")
)
)
point_cloud_dummy_data_path = pathlib.Path(get_dummy_data_path("point_cloud"))
point_cloud.append(deeplake.read(point_cloud_dummy_data_path / "point_cloud.las"))
# Along the first direcection three matrices are concatenated, the first matrix is P,
# the second one is Tr and the third one is R
intrinsics.append(np.zeros((3, 4, 4), dtype=np.float32))
> embedding.append(np.random.rand((100)))
deeplake\api\tests\test_api.py:685:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake\util\invalid_view_op.py:22: in inner
return callable(x, *args, **kwargs)
deeplake\core\tensor.py:414: in append
self.extend([sample], progressbar=False)
deeplake\util\invalid_view_op.py:22: in inner
return callable(x, *args, **kwargs)
deeplake\core\tensor.py:325: in extend
self.chunk_engine.extend(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <deeplake.core.chunk_engine.ChunkEngine object at 0x0000024F75589960>
samples = [array([6.01713555e-01, 5.95197424e-01, 7.36985237e-01, 7.25783711e-01,
2.79171742e-01, 7.97253927e-01, 9.04439....02978856e-01, 9.22093883e-01, 6.37840299e-01,
9.78959985e-02, 2.60911053e-01, 1.77929683e-01, 6.20053580e-01])]
progressbar = False
link_callback = <bound method Tensor._extend_links of Tensor(key='embedding')>
pg_callback = None, ignore_errors = False
def extend(
self,
samples,
progressbar: bool = False,
link_callback: Optional[Callable] = None,
pg_callback=None,
ignore_errors: bool = False,
):
try:
assert not (progressbar and pg_callback)
self.check_link_ready()
if not self.write_initialization_done:
self._write_initialization()
self.write_initialization_done = True
initial_autoflush = self.cache.autoflush
self.cache.autoflush = False
num_samples = self.tensor_length
if self.is_sequence:
self._extend_sequence(
samples, progressbar, link_callback, ignore_errors
)
else:
verified_samples = self._extend(
samples,
progressbar,
pg_callback=pg_callback,
ignore_errors=ignore_errors,
)
if link_callback:
verified_samples = self._prepare_samples_for_link_callback(
verified_samples
)
self._extend_link_callback(
link_callback,
verified_samples,
None,
progressbar,
ignore_errors,
)
self.cache.autoflush = initial_autoflush
self.cache.maybe_flush()
except Exception as e:
num_samples_added = self.tensor_length - num_samples
for _ in range(num_samples_added):
self.pop()
> raise SampleAppendError(self.name) from e
E deeplake.util.exceptions.SampleAppendError: Failed to append a sample to the tensor 'embedding'. See more details in the traceback.
deeplake\core\chunk_engine.py:1181: SampleAppendError
Check failure on line 97 in deeplake/core/vectorstore/test_deeplake_vectorstore.py
github-actions / JUnit Test Report
test_deeplake_vectorstore.test_id_backward_compatibility
deeplake.util.exceptions.SampleExtendError: Failed to append a sample to the tensor 'embedding'. See more details in the traceback. If you wish to skip the samples that cause errors, please specify `ignore_errors=True`.
Raw output
self = <deeplake.core.chunk_engine.ChunkEngine object at 0x0000024F75BF2080>
samples = [array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0.,...., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])]
progressbar = False
link_callback = <bound method Tensor._extend_links of Tensor(key='embedding')>
pg_callback = None, ignore_errors = False
def extend(
self,
samples,
progressbar: bool = False,
link_callback: Optional[Callable] = None,
pg_callback=None,
ignore_errors: bool = False,
):
try:
assert not (progressbar and pg_callback)
self.check_link_ready()
if not self.write_initialization_done:
self._write_initialization()
self.write_initialization_done = True
initial_autoflush = self.cache.autoflush
self.cache.autoflush = False
num_samples = self.tensor_length
if self.is_sequence:
self._extend_sequence(
samples, progressbar, link_callback, ignore_errors
)
else:
> verified_samples = self._extend(
samples,
progressbar,
pg_callback=pg_callback,
ignore_errors=ignore_errors,
)
deeplake\core\chunk_engine.py:1157:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
deeplake\core\chunk_engine.py:1039: in _extend
samples = self._samples_to_chunks(
deeplake\core\chunk_engine.py:843: in _samples_to_chunks
num_samples_added = current_chunk.extend_if_has_space(
deeplake\core\chunk\uncompressed_chunk.py:36: in extend_if_has_space
return self._extend_if_has_space_list(
deeplake\core\chunk\uncompressed_chunk.py:162: in _extend_if_has_space_list
serialized_sample, shape = self.serialize_sample(incoming_sample)
deeplake\core\chunk\base_chunk.py:388: in serialize_sample
incoming_sample, shape = serialize_numpy_and_base_types(
deeplake\core\serialize.py:538: in serialize_numpy_and_base_types
out = intelligent_cast(incoming_sample, dtype, htype)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
sample = array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., ...0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
dtype = 'float32', htype = 'embedding'
def intelligent_cast(
sample: Any, dtype: Union[np.dtype, str], htype: str
) -> np.ndarray:
# TODO: docstring (note: sample can be a scalar)/statictyping
# TODO: implement better casting here
if isinstance(sample, Sample):
sample = sample.array
if hasattr(sample, "dtype") and sample.dtype == dtype:
return sample
err_dtype = get_incompatible_dtype(sample, dtype)
if err_dtype:
> raise TensorDtypeMismatchError(
dtype,
err_dtype,
htype,
)
E deeplake.util.exceptions.TensorDtypeMismatchError: Dtype was expected to be 'float32' instead it was 'float64'. If you called `create_tensor` explicitly with `dtype`, your samples should also be of that dtype. Htype 'embedding' expects samples to have dtype='float32'.
deeplake\util\casting.py:103: TensorDtypeMismatchError
The above exception was the direct cause of the following exception:
local_path = './hub_pytest/test_deeplake_vectorstore/test_id_backward_compatibility'
def test_id_backward_compatibility(local_path):
num_of_items = 10
embedding_dim = 100
ids = [f"{i}" for i in range(num_of_items)]
embedding = [np.zeros(embedding_dim) for i in range(num_of_items)]
text = ["aadfv" for i in range(num_of_items)]
metadata = [{"key": i} for i in range(num_of_items)]
ds = deeplake.empty(local_path, overwrite=True)
ds.create_tensor("ids", htype="text")
ds.create_tensor("embedding", htype="embedding")
ds.create_tensor("text", htype="text")
ds.create_tensor("metadata", htype="json")
> ds.extend(
{
"ids": ids,
"embedding": embedding,
"text": text,
"metadata": metadata,
}
)
deeplake\core\vectorstore\test_deeplake_vectorstore.py:97:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = Dataset(path='./hub_pytest/test_deeplake_vectorstore/test_id_backward_compatibility', tensors=['ids', 'embedding', 'text', 'metadata'])
samples = {'embedding': [array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0...ey': 2}, {'key': 3}, {'key': 4}, {'key': 5}, ...], 'text': ['aadfv', 'aadfv', 'aadfv', 'aadfv', 'aadfv', 'aadfv', ...]}
skip_ok = False, append_empty = False, ignore_errors = False
progressbar = False
def extend(
self,
samples: Dict[str, Any],
skip_ok: bool = False,
append_empty: bool = False,
ignore_errors: bool = False,
progressbar: bool = False,
):
"""Appends multiple rows of samples to mutliple tensors at once. This method expects all tensors being updated to be of the same length.
Args:
samples (Dict[str, Any]): Dictionary with tensor names as keys and samples as values.
skip_ok (bool): Skip tensors not in ``samples`` if set to True.
append_empty (bool): Append empty samples to tensors not specified in ``sample`` if set to ``True``. If True, ``skip_ok`` is ignored.
ignore_errors (bool): Skip samples that cause errors while extending, if set to ``True``.
progressbar (bool): Displays a progress bar if set to ``True``.
Raises:
KeyError: If any tensor in the dataset is not a key in ``samples`` and ``skip_ok`` is ``False``.
TensorDoesNotExistError: If tensor in ``samples`` does not exist.
ValueError: If all tensors being updated are not of the same length.
NotImplementedError: If an error occurs while writing tiles.
SampleExtendError: If the extend failed while appending a sample.
Exception: Error while attempting to rollback appends.
"""
extend = False
if isinstance(samples, Dataset):
samples = samples.tensors
extend = True
elif set(map(type, samples.values())) == {np.ndarray}:
extend = True
if not samples:
return
n = len(samples[next(iter(samples.keys()))])
for v in samples.values():
if len(v) != n:
sizes = {k: len(v) for (k, v) in samples.items()}
raise ValueError(
f"Incoming samples are not of equal lengths. Incoming sample sizes: {sizes}"
)
[f() for f in list(self._update_hooks.values())]
if extend:
if ignore_errors:
warnings.warn(
"`ignore_errors` argument will be ignored while extending with numpy arrays or tensors."
)
return self._append_or_extend(
samples, extend=True, skip_ok=skip_ok, append_empty=append_empty
)
with self:
if progressbar:
indices = tqdm(range(n))
else:
indices = range(n)
for i in indices:
try:
self.append(
{k: v[i] for k, v in samples.items()},
skip_ok=skip_ok,
append_empty=append_empty,
)
except Exception as e:
if ignore_errors:
continue
else:
if isinstance(e, SampleAppendError):
> raise SampleExtendError(str(e)) from e.__cause__
E deeplake.util.exceptions.SampleExtendError: Failed to append a sample to the tensor 'embedding'. See more details in the traceback. If you wish to skip the samples that cause errors, please specify `ignore_errors=True`.
deeplake\core\dataset\dataset.py:3142: SampleExtendError