Skip to content

Commit

Permalink
[AL-1775] Point Cloud htype (#1685)
Browse files Browse the repository at this point in the history
* Rebased on main

* Removed unecessary changes after rebase

* Added point cloud decompression test

* Added bbox.3d, segmet_mask.3d and binary_mask.3d

* Fixed python linting

* Added instance segmentation dtype

* Removed uneccessary htype

* Changed tests.txt

* Fixed error

* Changed the code to support downgraded version of numpy

* Fixed python linting

* Small test fixes

* Removed pickle parser

* Fixed python linting

* Increased code coverage rate

* Fixed python linting

* Added corrupted sample

* Fixed a type in test_point_cloud.py

* Few more tests

* Changed according to comments

* Removed get_full_point_cloud_numpy_array method from tensor.py

* Removed uneccessary code block

* Fixed typo

* Changed according to comments
  • Loading branch information
adolkhan authored Aug 23, 2022
1 parent 73f8aa2 commit 416c334
Show file tree
Hide file tree
Showing 23 changed files with 482 additions and 23 deletions.
7 changes: 5 additions & 2 deletions hub/api/read.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import pathlib
from hub.core.sample import Sample # type: ignore
from typing import Optional, Dict
from typing import Optional, Dict, Union

from hub.core.storage.provider import StorageProvider
from hub.util.path import convert_pathlib_to_string_if_needed


def read(
path: str,
path: Union[str, pathlib.Path],
verify: bool = False,
creds: Optional[Dict] = None,
compression: Optional[str] = None,
Expand Down Expand Up @@ -53,6 +55,7 @@ def read(
Returns:
Sample: Sample object. Call `sample.array` to get the `np.ndarray`.
"""
path = convert_pathlib_to_string_if_needed(path)
return Sample(
path, verify=verify, compression=compression, creds=creds, storage=storage
)
24 changes: 23 additions & 1 deletion hub/api/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from hub.core.dataset import Dataset
from hub.core.tensor import Tensor

from hub.tests.common import assert_array_lists_equal, is_opt_true
from hub.tests.common import assert_array_lists_equal, is_opt_true, get_dummy_data_path
from hub.tests.storage_fixtures import enabled_remote_storages
from hub.core.storage import GCSProvider
from hub.util.exceptions import (
Expand Down Expand Up @@ -560,6 +560,12 @@ def test_htype(memory_ds: Dataset):
segment_mask = memory_ds.create_tensor("segment_mask", htype="segment_mask")
keypoints_coco = memory_ds.create_tensor("keypoints_coco", htype="keypoints_coco")
point = memory_ds.create_tensor("point", htype="point")
point_cloud = memory_ds.create_tensor(
"point_cloud", htype="point_cloud", sample_compression="las"
)
memory_ds.create_tensor(
"point_cloud_calibration_matrix", htype="point_cloud.calibration_matrix"
)

image.append(np.ones((28, 28, 3), dtype=np.uint8))
bbox.append(np.array([1.0, 1.0, 0.0, 0.5], dtype=np.float32))
Expand All @@ -572,6 +578,17 @@ def test_htype(memory_ds: Dataset):
keypoints_coco.append(np.ones((51, 2), dtype=np.int32))
point.append(np.ones((11, 2), dtype=np.int32))

point_cloud.append(
hub.read(os.path.join(get_dummy_data_path("point_cloud"), "point_cloud.las"))
)
point_cloud_dummy_data_path = pathlib.Path(get_dummy_data_path("point_cloud"))
point_cloud.append(hub.read(point_cloud_dummy_data_path / "point_cloud.las"))
# Along the forst direcection three matrices are concatenated, the first matrix is P,
# the second one is Tr and the third one is R
memory_ds.point_cloud_calibration_matrix.append(
np.zeros((3, 4, 4), dtype=np.float32)
)


def test_dtype(memory_ds: Dataset):
tensor = memory_ds.create_tensor("tensor")
Expand Down Expand Up @@ -969,6 +986,7 @@ def test_compressions_list():
"ico",
"jpeg",
"jpeg2000",
"las",
"lz4",
"mkv",
"mp3",
Expand All @@ -991,17 +1009,21 @@ def test_htypes_list():
assert hub.htypes == [
"audio",
"bbox",
"bbox.3d",
"binary_mask",
"class_label",
"dicom",
"generic",
"image",
"image.gray",
"image.rgb",
"instance_label",
"json",
"keypoints_coco",
"list",
"point",
"point_cloud",
"point_cloud.calibration_matrix",
"segment_mask",
"text",
"video",
Expand Down
140 changes: 140 additions & 0 deletions hub/api/tests/test_point_cloud.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
import pytest

import hub
from hub.core.dataset import Dataset
from hub.core.compression import compress_multiple
from hub.tests.common import get_dummy_data_path
from hub.util.exceptions import CorruptedSampleError

import numpy as np


def test_point_cloud(local_ds, point_cloud_paths):
for i, (compression, path) in enumerate(point_cloud_paths.items()):
if compression == "las":
tensor = local_ds.create_tensor(
f"point_cloud_{i}", htype="point_cloud", sample_compression=compression
)
sample = hub.read(path)

if "point_cloud" in path: # check shape only for internal test point_clouds
assert sample.shape[0] == 20153

assert len(sample.meta) == 6
assert len(sample.meta["dimension_names"]) == 18
assert len(sample.meta["las_header"]) == 19

assert sample.meta["las_header"]["DEFAULT_VERSION"] == {
"major": 1,
"minor": 2,
}
assert sample.meta["las_header"]["creation_date"] == {
"year": 2022,
"month": 5,
"day": 24,
}
assert sample.meta["las_header"]["version"] == {"major": 1, "minor": 2}
assert (
sample.meta["las_header"]["uuid"]
== "00000000-0000-0000-0000-000000000000"
)

tensor.append(sample)
tensor.append(sample)
tensor.append(sample)
assert tensor.shape == (3, 20153, 18)

shape_tester(local_ds, path, sample, tensor, feature_size=18)

with pytest.raises(NotImplementedError):
arrays = np.zeros((5, 1000, 3))
compress_multiple(arrays, compression)

local_ds.create_tensor(
"point_cloud_without_sample_compression",
htype="point_cloud",
sample_compression=None,
)
local_ds.point_cloud_without_sample_compression.append(
np.zeros((1000, 3), dtype=np.float32)
)
np.testing.assert_array_equal(
local_ds.point_cloud_without_sample_compression[0].numpy(),
np.zeros((1000, 3), dtype=np.float32),
)
local_ds.point_cloud_without_sample_compression.data()
assert len(local_ds.point_cloud_without_sample_compression.data()) == 0

local_ds.point_cloud_without_sample_compression.append(hub.read(path))
assert local_ds.point_cloud_without_sample_compression[1].numpy().shape == (
20153,
3,
)
assert len(local_ds.point_cloud_without_sample_compression.numpy(aslist=True)) == 2
assert len(local_ds.point_cloud_without_sample_compression.data(aslist=True)) == 2
local_ds.create_tensor(
"point_cloud_with_sample_compression",
htype="point_cloud",
sample_compression="las",
)
with pytest.raises(NotImplementedError):
local_ds.point_cloud_with_sample_compression.append(
np.zeros((1000, 3), dtype=np.float32)
)

with pytest.raises(CorruptedSampleError):
local_ds.point_cloud_with_sample_compression.append(
hub.read(get_dummy_data_path("point_cloud/corrupted_point_cloud.las"))
)

local_ds.point_cloud_with_sample_compression.append(hub.read(path, verify=True))
assert local_ds.point_cloud_with_sample_compression.shape == (1, 20153, 18)

local_ds.create_tensor(
"point_cloud_data_method_type_tester",
htype="point_cloud",
sample_compression="las",
)
local_ds.point_cloud_data_method_type_tester.append(sample)
assert isinstance(local_ds.point_cloud_data_method_type_tester.data(), dict)

local_ds.point_cloud_data_method_type_tester.append(sample)
assert isinstance(
local_ds.point_cloud_data_method_type_tester.data(aslist=True), list
)


def shape_tester(local_ds, path, sample, tensor, feature_size):
with local_ds:
for _ in range(5):
tensor.append(hub.read(path)) # type: ignore
tensor.extend([hub.read(path) for _ in range(5)]) # type: ignore

for i in range(10):
assert tensor[i].numpy().shape[0] == sample.shape[0] # type: ignore
assert len(tensor[i].data()) == feature_size


def test_point_cloud_slicing(local_ds: Dataset, point_cloud_paths):
for compression, path in point_cloud_paths.items():
if compression == "las":
dummy = np.zeros((20153, 3))
local_ds.create_tensor(
"point_cloud", htype="point_cloud", sample_compression=compression
)
local_ds.point_cloud.append(hub.read(path))
assert local_ds.point_cloud[0][0:5].numpy().shape == dummy[0:5].shape
assert local_ds.point_cloud[0][100:120].numpy().shape == dummy[100:120].shape
assert local_ds.point_cloud[0][120].numpy().shape == dummy[120].shape
assert local_ds.point_cloud[0][-1].numpy().shape == dummy[-1].shape
assert local_ds.point_cloud[0][10:5:-2].numpy().shape == dummy[10:5:-2].shape
assert (
local_ds.point_cloud[0][-3:-10:-1].numpy().shape == dummy[-3:-10:-1].shape
)
assert (
local_ds.point_cloud[0][-25:100:-2].numpy().shape == dummy[-25:100:-2].shape
)
assert local_ds.point_cloud[0][::-1].numpy().shape == dummy[::-1].shape
assert local_ds.point_cloud[0][:5:-1].numpy().shape == dummy[:5:-1].shape
assert local_ds.point_cloud[0][-1].numpy().shape == dummy[-1].shape
return
10 changes: 7 additions & 3 deletions hub/compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,22 +80,23 @@

AUDIO_COMPRESSIONS = ["mp3", "flac", "wav"]

POINT_CLOUD_COMPRESSIONS = ["las"]


# Just constants
BYTE_COMPRESSION = "byte"
IMAGE_COMPRESSION = "image"
VIDEO_COMPRESSION = "video"
AUDIO_COMPRESSION = "audio"


COMPRESSION_TYPES = [BYTE_COMPRESSION, IMAGE_COMPRESSION, AUDIO_COMPRESSION]
POINT_CLOUD_COMPRESSION = "point_cloud"


COMPRESSION_TYPES = [
BYTE_COMPRESSION,
IMAGE_COMPRESSION,
AUDIO_COMPRESSION,
VIDEO_COMPRESSION,
POINT_CLOUD_COMPRESSION,
]

# Pillow plugins for some formats might not be installed:
Expand All @@ -113,6 +114,7 @@
*IMAGE_COMPRESSIONS,
*AUDIO_COMPRESSIONS,
*VIDEO_COMPRESSIONS,
*POINT_CLOUD_COMPRESSIONS,
]
SUPPORTED_COMPRESSIONS = list(sorted(set(SUPPORTED_COMPRESSIONS))) # type: ignore
SUPPORTED_COMPRESSIONS.append(None) # type: ignore
Expand All @@ -133,6 +135,8 @@
_compression_types[c] = VIDEO_COMPRESSION
for c in AUDIO_COMPRESSIONS:
_compression_types[c] = AUDIO_COMPRESSION
for c in POINT_CLOUD_COMPRESSIONS:
_compression_types[c] = POINT_CLOUD_COMPRESSION


def get_compression_type(c):
Expand Down
3 changes: 2 additions & 1 deletion hub/core/chunk_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -1479,7 +1479,8 @@ def get_basic_sample(self, global_sample_index, index, fetch_chunks=False):
chunk_id, partial_chunk_bytes=worst_case_header_size
)
return chunk.read_sample(
local_sample_index, cast=self.tensor_meta.htype != "dicom"
local_sample_index,
cast=self.tensor_meta.htype != "dicom",
)[tuple(entry.value for entry in index.values[1:])]

def get_non_tiled_sample(self, global_sample_index, index, fetch_chunks=False):
Expand Down
Loading

0 comments on commit 416c334

Please sign in to comment.