diff --git a/CHANGELOG.md b/CHANGELOG.md index 0dd51a13..d85d60d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,15 +11,26 @@ This project uses a [CalVer](https://calver.org/) versioning scheme with monthly - internal dependencies are now listed as regular dependencies in the `setup.py` file to overcome issues and make the installation process less complicated. This implies you need to install packages according to their dependencies and can no longer use the `external` tag as in `pip install airo-typing[external]`. see [issue #91](https://github.com/airo-ugent/airo-mono/issues/91) and [PR](https://github.com/airo-ugent/airo-mono/pull/108) for more details. + - `PointCloud` dataclass replaces the `ColoredPointCloudType` to support point cloud attritubes + ### Added +- `PointCloud` dataclass as the main data structure for point clouds in airo-mono +- Notebooks to get started with point clouds, checking performance and logging to rerun +- Functions to crop point clouds and filter points with a mask (e.g. low-confidence points)) +- Functions to convert from our numpy-based dataclass to and from open3d point clouds +- `BoundingBox3DType` + + ### Changed - dropped support for python 3.8 and added 3.11 to the testing matrix [#103](https://github.com/airo-ugent/airo-mono/issues/103) ### Fixed +- Fixed bug in `get_colored_point_cloud()` that removed some points see issue #25. ### Removed +- `ColoredPointCloudType` ## 2024.1.0 diff --git a/README.md b/README.md index 5c557749..065a8880 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ Below is a short overview of the packages: | Package | Description| owner | |-------|-------|--------| -| `airo-camera-toolkit`|code for working with RGB(D) cameras, images and pointclouds |@tlpss| +| `airo-camera-toolkit`|code for working with RGB(D) cameras, images and point clouds |@tlpss| |`airo-dataset-tools`| code for creating, loading and working with datasets| @Victorlouisdg| | `airo-robots`| minimal interfaces for interacting with the controllers of robot arms and grippers| @tlpss| | `airo-spatial-algebra`|code for working with SE3 poses |@tlpss| diff --git a/airo-camera-toolkit/.gitignore b/airo-camera-toolkit/.gitignore index 5449f3dc..2fa335b4 100644 --- a/airo-camera-toolkit/.gitignore +++ b/airo-camera-toolkit/.gitignore @@ -1,3 +1,4 @@ airo_camera_toolkit/image_transforms/tutorial_image.jpg airo_camera_toolkit/calibration/saved_calibrations **/calibration_20**/ +notebooks/data \ No newline at end of file diff --git a/airo-camera-toolkit/README.md b/airo-camera-toolkit/README.md index 755fa0a7..e577b8b8 100644 --- a/airo-camera-toolkit/README.md +++ b/airo-camera-toolkit/README.md @@ -1,5 +1,5 @@ # airo-camera-toolkit -This package contains code for working with RGB(D) cameras, images and pointclouds. +This package contains code for working with RGB(D) cameras, images and point clouds. Overview of the functionality and the structure: @@ -9,6 +9,7 @@ airo-camera-toolkit/ ├── cameras/ # actual camera drivers ├── image_transformations/ # reversible geometric 2D transforms ├── pinhole_operations/ # 2D-3D operations +├── point_clouds/ # conversions and operations ├── utils/ # a.o. annotation tool and converter ├── interfaces.py └── cli.py @@ -87,9 +88,12 @@ See [annotation_tool.md](./airo_camera_toolkit/annotation_tool.md) for usage ins See the [README](./airo_camera_toolkit/image_transforms/README.md) in the `image_transforms` folder for more details. ## Real-time visualisation -For realtime visualisation of robotics data we strongly encourage using [rerun.io](https://www.rerun.io/) instead of manually hacking something together with opencv/pyqt/... No wrappers are needed here, just pip install the SDK. An example notebook to get to know this tool and its potential can be found [here](docs/rerun-zed-example.ipynb). +For realtime visualisation of robotics data we strongly encourage using [rerun.io](https://www.rerun.io/) instead of manually hacking something together with opencv/pyqt/... No wrappers are needed here, just pip install the SDK. An example notebook to get to know this tool and its potential can be found [here](notebooks/rerun-zed-tutorial.ipynb). See this [README](./docs/rerun.md) for more details. +## Point clouds +See the tutorial notebook [here](notebooks/point_clouds_tutorial.ipynb) for an introduction. + ## Multiprocessing Camera processing can be computationally expensive. If this is a problem for your application, see [multiprocess/README.md](./airo_camera_toolkit/cameras/multiprocess/README.md). diff --git a/airo-camera-toolkit/airo_camera_toolkit/cameras/zed/zed2i.py b/airo-camera-toolkit/airo_camera_toolkit/cameras/zed/zed2i.py index 2ac4d790..28f0b02a 100644 --- a/airo-camera-toolkit/airo_camera_toolkit/cameras/zed/zed2i.py +++ b/airo-camera-toolkit/airo_camera_toolkit/cameras/zed/zed2i.py @@ -20,18 +20,19 @@ import time +import cv2 import numpy as np from airo_camera_toolkit.interfaces import StereoRGBDCamera from airo_camera_toolkit.utils.image_converter import ImageConverter from airo_typing import ( CameraIntrinsicsMatrixType, CameraResolutionType, - ColoredPointCloudType, HomogeneousMatrixType, NumpyDepthMapType, NumpyFloatImageType, NumpyIntImageType, OpenCVIntImageType, + PointCloud, ) @@ -149,7 +150,10 @@ def __init__( # type: ignore[no-any-unimported] self.image_matrix = sl.Mat() self.depth_image_matrix = sl.Mat() self.depth_matrix = sl.Mat() - self.pointcloud_matrix = sl.Mat() + self.point_cloud_matrix = sl.Mat() + + self.confidence_matrix = sl.Mat() + self.confidence_map = None @property def resolution(self) -> CameraResolutionType: @@ -188,7 +192,7 @@ def pose_of_right_view_in_left_view(self) -> HomogeneousMatrixType: @property def depth_enabled(self) -> bool: - """Runtime parameter to enable/disable the depth & pointcloud computation. This speeds up the RGB image capture.""" + """Runtime parameter to enable/disable the depth & point_cloud computation. This speeds up the RGB image capture.""" return self.runtime_params.enable_depth @depth_enabled.setter @@ -218,9 +222,10 @@ def _retrieve_rgb_image_as_int(self, view: str = StereoRGBDCamera.LEFT_RGB) -> N else: view = sl.VIEW.LEFT self.camera.retrieve_image(self.image_matrix, view) - image: OpenCVIntImageType = self.image_matrix.get_data() - image = image[..., :3] # remove alpha channel - image = image[..., ::-1] # convert from BGR to RGB + image_bgra: OpenCVIntImageType = self.image_matrix.get_data() + # image = image[..., :3] # remove alpha channel + # image = image[..., ::-1] # convert from BGR to RGB + image = cv2.cvtColor(image_bgra, cv2.COLOR_BGRA2RGB) return image def _retrieve_depth_map(self) -> NumpyDepthMapType: @@ -238,31 +243,31 @@ def _retrieve_depth_image(self) -> NumpyIntImageType: image = image[..., :3] return image - def get_colored_point_cloud(self) -> ColoredPointCloudType: + def _retrieve_colored_point_cloud(self) -> PointCloud: assert self.depth_mode != self.NONE_DEPTH_MODE, "Cannot retrieve depth data if depth mode is NONE" assert self.depth_enabled, "Cannot retrieve depth data if depth is disabled" - - self._grab_images() - self.camera.retrieve_measure(self.pointcloud_matrix, sl.MEASURE.XYZRGBA) + self.camera.retrieve_measure(self.point_cloud_matrix, sl.MEASURE.XYZ) # shape (width, height, 4) with the 4th dim being x,y,z,(rgba packed into float) - # can be nan,nan,nan, nan (no point in the pointcloud on this pixel) + # can be nan,nan,nan, nan (no point in the point_cloud on this pixel) # or x,y,z, nan (no color information on this pixel??) # or x,y,z, value (color information on this pixel) - # filter out all that have nan in any of the positions of the 3th dim - # and reshape to (width*height, 4) - point_cloud = self.pointcloud_matrix.get_data() - point_cloud = point_cloud[~np.isnan(point_cloud).any(axis=2), :] + point_cloud = self.point_cloud_matrix.get_data() + points = point_cloud[:, :, :3].reshape(-1, 3) + colors = self._retrieve_rgb_image_as_int().reshape(-1, 3) + + return PointCloud(points, colors) - # unpack the colors, drop alpha channel and convert to 0-1 range - points = point_cloud[:, :3] - colors = point_cloud[:, 3] - rgba = np.ravel(colors).view(np.uint8).reshape(-1, 4) - rgb = rgba[:, :3] - rgb_float = rgb.astype(np.float32) / 255.0 # convert to 0-1 range + def _retrieve_confidence_map(self) -> NumpyFloatImageType: + self.camera.retrieve_measure(self.confidence_matrix, sl.MEASURE.CONFIDENCE) + return self.confidence_matrix.get_data() # single channel float32 image - colored_pointcloud = np.concatenate((points, rgb_float), axis=1) - return colored_pointcloud + def get_colored_point_cloud(self) -> PointCloud: + assert self.depth_mode != self.NONE_DEPTH_MODE, "Cannot retrieve depth data if depth mode is NONE" + assert self.depth_enabled, "Cannot retrieve depth data if depth is disabled" + + self._grab_images() + return self._retrieve_colored_point_cloud() @staticmethod def list_camera_serial_numbers() -> List[str]: @@ -297,7 +302,7 @@ def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None: # test rgbd stereo camera with Zed2i(Zed2i.RESOLUTION_2K, fps=15, depth_mode=Zed2i.PERFORMANCE_DEPTH_MODE) as zed: - print(zed.get_colored_point_cloud()[0]) # TODO: test the pointcloud more explicity? + print(zed.get_colored_point_cloud().points) # TODO: test the point_cloud more explicity? manual_test_stereo_rgbd_camera(zed) # profile rgb throughput, should be at 60FPS, i.e. 0.017s diff --git a/airo-camera-toolkit/airo_camera_toolkit/interfaces.py b/airo-camera-toolkit/airo_camera_toolkit/interfaces.py index e47e5574..f8dc29b7 100644 --- a/airo-camera-toolkit/airo_camera_toolkit/interfaces.py +++ b/airo-camera-toolkit/airo_camera_toolkit/interfaces.py @@ -3,11 +3,11 @@ from airo_typing import ( CameraIntrinsicsMatrixType, CameraResolutionType, - ColoredPointCloudType, HomogeneousMatrixType, NumpyDepthMapType, NumpyFloatImageType, NumpyIntImageType, + PointCloud, ) @@ -103,13 +103,13 @@ def get_depth_image(self) -> NumpyIntImageType: self._grab_images() return self._retrieve_depth_image() - def get_colored_point_cloud(self) -> ColoredPointCloudType: + def get_colored_point_cloud(self) -> PointCloud: """Get the latest point cloud of the camera. - The point cloud contains 6D arrays of floats, that provide the estimated position in the camera frame - of points on the image plane (pixels). The last 3 floats are the corresponding RGB color (in the range [0, 1]). + The point cloud contains the estimated position in the camera frame of points on the image plane (pixels). + Each point also has a color associated with it, which is the color of the corresponding pixel in the RGB image. Returns: - np.ndarray: Nx6 array containing PointCloud with color information. Each entry is (x,y,z,r,g,b) + PointCloud: the points (= positions) and colors """ # TODO: offer a base implementation that uses the depth map and the rgb image to construct this pointcloud? raise NotImplementedError diff --git a/airo-camera-toolkit/airo_camera_toolkit/pinhole_operations/unprojection.py b/airo-camera-toolkit/airo_camera_toolkit/pinhole_operations/unprojection.py index e5a87683..dc4432f0 100644 --- a/airo-camera-toolkit/airo_camera_toolkit/pinhole_operations/unprojection.py +++ b/airo-camera-toolkit/airo_camera_toolkit/pinhole_operations/unprojection.py @@ -128,7 +128,7 @@ def extract_depth_from_depthmap_heuristic( This function takes the percentile of a region around the specified point and assumes we are interested in the nearest object present. This is not always true (think about the backside of a box looking under a 45 degree angle) but it serves as a good proxy. The more confident you are of your keypoints and the better the heatmaps are, the lower you could set the mask size and percentile. If you are very, very confident - you could directly take the pointcloud as well instead of manually querying the heatmap, but I find that they are more noisy. + you could directly take the point cloud as well instead of manually querying the heatmap, but I find that they are more noisy. Also note that this function assumes there are no negative infinity values (no objects closer than 30cm!) diff --git a/airo-camera-toolkit/airo_camera_toolkit/point_clouds/__init__.py b/airo-camera-toolkit/airo_camera_toolkit/point_clouds/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/airo-camera-toolkit/airo_camera_toolkit/point_clouds/conversions.py b/airo-camera-toolkit/airo_camera_toolkit/point_clouds/conversions.py new file mode 100644 index 00000000..15da8914 --- /dev/null +++ b/airo-camera-toolkit/airo_camera_toolkit/point_clouds/conversions.py @@ -0,0 +1,52 @@ +from typing import Any + +import open3d as o3d +import open3d.core as o3c +from airo_typing import PointCloud + + +def point_cloud_to_open3d(point_cloud: PointCloud) -> Any: # TODO: change Any back to o3d.t.geometry.PointCloud + """Converts a PointCloud dataclass object to an open3d tensor point cloud. + Note that the memory buffers of the underlying numpy arrays are shared between the two. + + Args: + point_cloud: the point cloud to convert + + Returns: + pcd: the open3d tensor point cloud + """ + positions = o3c.Tensor.from_numpy(point_cloud.points) + + map_to_tensors = { + "positions": positions, + } + + if point_cloud.colors is not None: + colors = o3c.Tensor.from_numpy(point_cloud.colors) + map_to_tensors["colors"] = colors + + if point_cloud.attributes is not None: + for attribute_name, array in point_cloud.attributes.items(): + map_to_tensors[attribute_name] = o3c.Tensor.from_numpy(array) + + pcd = o3d.t.geometry.PointCloud(map_to_tensors) + return pcd + + +def open3d_to_point_cloud(pcd: Any) -> PointCloud: # TODO: change Any back to o3d.t.geometry.PointCloud + """Converts an open3d point cloud to a PointCloud dataclass object. + Note that the memory buffers of the underlying numpy arrays are shared between the two. + + Args: + pcd: the open3d tensor point cloud + """ + points = pcd.point.positions.numpy() + colors = pcd.point.colors.numpy() if "colors" in pcd.point else None + + attributes = {} + for attribute_name, array in pcd.point.items(): + if attribute_name in ["positions", "colors"]: + continue + attributes[attribute_name] = array.numpy() + + return PointCloud(points, colors) diff --git a/airo-camera-toolkit/airo_camera_toolkit/point_clouds/operations.py b/airo-camera-toolkit/airo_camera_toolkit/point_clouds/operations.py new file mode 100644 index 00000000..d9c0891e --- /dev/null +++ b/airo-camera-toolkit/airo_camera_toolkit/point_clouds/operations.py @@ -0,0 +1,63 @@ +from typing import Any + +import numpy as np +from airo_typing import BoundingBox3DType, PointCloud + + +def filter_point_cloud(point_cloud: PointCloud, mask: Any) -> PointCloud: + """Creates a new point cloud that is filtered by the given mask. + Will also filter the colors and attributes if they are present. + + Args: + point_cloud: the point cloud to filter + mask: the mask to filter the point cloud by, used to index the attribute arrays, can be boolean or indices + + Returns: + the new filtered point cloud + """ + points = point_cloud.points[mask] + colors = None if point_cloud.colors is None else point_cloud.colors[mask] + + attributes = None + if point_cloud.attributes is not None: + attributes = {} + for key, value in point_cloud.attributes.items(): + attributes[key] = value[mask] + + point_cloud_filtered = PointCloud(points, colors, attributes) + return point_cloud_filtered + + +def generate_point_cloud_crop_mask(point_cloud: PointCloud, bounding_box: BoundingBox3DType) -> np.ndarray: + """Creates a mask that can be used to filter a point cloud to the given bounding box. + + Args: + bounding_box: the bounding box that surrounds the points to keep + point_cloud: the point cloud to crop + + Returns: + the mask that can be used to filter the point cloud + """ + points = point_cloud.points + x, y, z = points[:, 0], points[:, 1], points[:, 2] + (x_min, y_min, z_min), (x_max, y_max, z_max) = bounding_box + crop_mask = (x >= x_min) & (x <= x_max) & (y >= y_min) & (y <= y_max) & (z >= z_min) & (z <= z_max) + return crop_mask + + +def crop_point_cloud( + point_cloud: PointCloud, + bounding_box: BoundingBox3DType, +) -> PointCloud: + """Creates a new point cloud that is cropped to the given bounding box. + Will also crop the colors and attributes if they are present. + + Args: + bounding_box: the bounding box that surrounds the points to keep + point_cloud: the point cloud to crop + + Returns: + the new cropped point cloud + """ + crop_mask = generate_point_cloud_crop_mask(point_cloud, bounding_box) + return filter_point_cloud(point_cloud, crop_mask.nonzero()) diff --git a/airo-camera-toolkit/airo_camera_toolkit/point_clouds/visualization.py b/airo-camera-toolkit/airo_camera_toolkit/point_clouds/visualization.py new file mode 100644 index 00000000..a9c1e35d --- /dev/null +++ b/airo-camera-toolkit/airo_camera_toolkit/point_clouds/visualization.py @@ -0,0 +1,24 @@ +from typing import Any, Tuple + +import open3d as o3d +from airo_typing import Vector3DType + + +def open3d_point( + position: Vector3DType, color: Tuple[float, float, float], radius: float = 0.01 +) -> Any: # Change Any back to o3d.geometry.TriangleMesh + """Creates a small sphere mesh for visualization in open3d. + + Args: + position: 3D position of the point + color: RGB color of the point as 0-1 floats + radius: radius of the sphere + + Returns: + sphere: an open3d mesh + """ + sphere = o3d.geometry.TriangleMesh.create_sphere(radius=radius) + sphere.translate(position) + sphere.paint_uniform_color(color) + sphere.compute_vertex_normals() + return sphere diff --git a/airo-camera-toolkit/docs/rerun.md b/airo-camera-toolkit/docs/rerun.md index c3a70ce2..509b306a 100644 --- a/airo-camera-toolkit/docs/rerun.md +++ b/airo-camera-toolkit/docs/rerun.md @@ -11,7 +11,7 @@ rerun.log_image("zed_top", image) rerun.log_scalar("force_z", force[2]) ... ``` -See the [example notebook](./rerun-zed-example.ipynb) for more. +See the [example notebook](../notebooks/rerun-zed-example.ipynb) for more. > :information_source: A note on starting the Rerun viewer: you can start it by calling `rerun.spawn()` from Python. However when starting Rerun like that, [there is no way to specify a memory limit](https://www.rerun.io/docs/howto/limit-ram). This quickly becomes a problem when logging images, so we recommend starting Rerun from a terminal: >``` diff --git a/airo-camera-toolkit/notebooks/point_cloud_performance.ipynb b/airo-camera-toolkit/notebooks/point_cloud_performance.ipynb new file mode 100644 index 00000000..37de4f9f --- /dev/null +++ b/airo-camera-toolkit/notebooks/point_cloud_performance.ipynb @@ -0,0 +1,627 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Point Cloud Performance\n", + "\n", + "> Download this data from [here](https://ugentbe-my.sharepoint.com/:f:/g/personal/victorlouis_degusseme_ugent_be/EkIZoyySsnZBg56hRq1BqdkBuGlvhAwPWT9HDuqaUB-psA?e=iSehj6) and save the folder in a folder called `data` relative to this notebook.\n", + "\n", + "Main takeaways:\n", + "* \"Conversion\" is ~instant because array memory is shared.\n", + "* Filtering time scales with the amount of `True` values in the mask.\n", + "* Using `&` on mask is fast, so it's better to `&` lots of masks together than to filter sequentially.\n", + "* Filtering with our dataclass/in numpy is faster than Open3D's `select_by_index` or `select_by_mask`.\n", + "* Using `.nonzero()` on a boolean array before indexing is faster than using a boolean array directly." + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "data_dir = os.path.join(\"data\", \"competition_sample_0000\")" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "calibration\t confidence_map.tiff image_left.png\tintrinsics.json\n", + "camera_pose.json depth_map.tiff image_right.png\tpointcloud.ply\n" + ] + } + ], + "source": [ + "!ls $data_dir" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "import cv2\n", + "import numpy as np\n", + "import open3d as o3d\n", + "\n", + "from airo_typing import NumpyDepthMapType\n", + "from airo_dataset_tools.data_parsers.camera_intrinsics import CameraIntrinsics\n", + "from airo_dataset_tools.data_parsers.pose import Pose\n", + "from airo_camera_toolkit.point_clouds.conversions import open3d_to_point_cloud, point_cloud_to_open3d\n", + "from airo_camera_toolkit.point_clouds.operations import filter_point_cloud, crop_point_cloud\n", + "from airo_camera_toolkit.point_clouds.operations import generate_point_cloud_crop_mask\n" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "intrinsics_path = os.path.join(data_dir, \"intrinsics.json\")\n", + "image_left_path = os.path.join(data_dir, \"image_left.png\")\n", + "image_right_path = os.path.join(data_dir, \"image_right.png\")\n", + "depth_map_path = os.path.join(data_dir, \"depth_map.tiff\")\n", + "confidence_map_path = os.path.join(data_dir, \"confidence_map.tiff\")\n", + "point_cloud_path = os.path.join(data_dir, \"pointcloud.ply\")\n", + "camera_pose_path = os.path.join(data_dir, \"camera_pose.json\")" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Resolution: (2208, 1242)\n", + "Intrinsics: \n", + " [[1056.831 0. 1103.95 ]\n", + " [ 0. 1056.831 621.699]\n", + " [ 0. 0. 1. ]]\n", + "Extrinsics: \n", + " [[ 1. -0.001 -0.004 0.475]\n", + " [ 0.003 -0.365 0.931 -1.318]\n", + " [-0.002 -0.931 -0.365 0.9 ]\n", + " [ 0. 0. 0. 1. ]]\n" + ] + } + ], + "source": [ + "with open(intrinsics_path, \"r\") as f:\n", + " intrinsics_model = CameraIntrinsics.model_validate_json(f.read())\n", + " intrinsics = intrinsics_model.as_matrix()\n", + " resolution = intrinsics_model.image_resolution.as_tuple()\n", + "\n", + "with open(camera_pose_path, \"r\") as f:\n", + " camera_pose = Pose.model_validate_json(f.read()).as_homogeneous_matrix()\n", + "\n", + "with np.printoptions(precision=3, suppress=True):\n", + " print(\"Resolution:\", resolution)\n", + " print(\"Intrinsics: \\n\", intrinsics)\n", + " print(\"Extrinsics: \\n\", camera_pose)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "PointCloud on CPU:0 [2742336 points (Float32)].\n", + "Attributes: colors (dtype = UInt8, shape = {2742336, 3})." + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def binarize_confidence(image: NumpyDepthMapType, threshold=50.0):\n", + " confident = image <= threshold\n", + " return confident\n", + "\n", + "depth_map = cv2.imread(depth_map_path, cv2.IMREAD_ANYDEPTH)\n", + "confidence_map = cv2.imread(confidence_map_path, cv2.IMREAD_ANYDEPTH)\n", + "\n", + "# Confidence mask\n", + "threshold = 1.0 # a value of 1.0 means only the most confidence points will be kept\n", + "confidence_binarized = binarize_confidence(confidence_map, threshold)\n", + "confidence_mask = confidence_binarized.reshape(-1)\n", + "\n", + "# Bounding box\n", + "bbox = (0.35, -0.3, 0.1), (0.7, 0.1, 0.95)\n", + "bbox_o3d = o3d.t.geometry.AxisAlignedBoundingBox(*bbox)\n", + "\n", + "# Open3D point clouds\n", + "pcd_in_camera = o3d.t.io.read_point_cloud(point_cloud_path)\n", + "\n", + "# This conversion to float32 can be removed once the data is saved as float32\n", + "pcd_in_camera.point.positions = o3d.core.Tensor(pcd_in_camera.point.positions.numpy().astype(np.float32))\n", + "\n", + "pcd = pcd_in_camera.transform(camera_pose) # transform to world frame (= base frame of left robot)\n", + "pcd_filtered = pcd.select_by_mask(confidence_mask)\n", + "pcd_cropped = pcd_filtered.crop(bbox_o3d)\n", + "\n", + "# Airo-mono point clouds\n", + "point_cloud = open3d_to_point_cloud(pcd)\n", + "point_cloud_filtered = filter_point_cloud(point_cloud, confidence_mask)\n", + "point_cloud_cropped = crop_point_cloud(point_cloud_filtered, bbox)\n", + "\n", + "pcd" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Confidence mask: 85.61% is True\n" + ] + } + ], + "source": [ + "print(f\"Confidence mask: {100.0 * confidence_mask.mean():.2f}% is True\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Performance comparison\n", + "\n", + "#### Conversion" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "13.7 µs ± 320 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)\n" + ] + } + ], + "source": [ + "%%timeit\n", + "point_cloud_to_open3d(point_cloud)" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "17.3 µs ± 54.6 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)\n" + ] + } + ], + "source": [ + "%%timeit\n", + "open3d_to_point_cloud(pcd)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Filtering" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "59.3 ms ± 160 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" + ] + } + ], + "source": [ + "%%timeit\n", + "filter_point_cloud(point_cloud, confidence_mask.nonzero()) # adding nonzero() is faster" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "67.5 ms ± 204 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" + ] + } + ], + "source": [ + "%%timeit\n", + "filter_point_cloud(point_cloud, confidence_mask)" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.all(point_cloud.points[confidence_mask] == point_cloud.points[confidence_mask.nonzero()])" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [], + "source": [ + "confidence_mask_95_false = confidence_mask.copy()\n", + "confidence_mask_95_false[:int(0.95 * len(confidence_mask))] = False" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2.95 ms ± 7.01 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" + ] + } + ], + "source": [ + "%%timeit\n", + "filter_point_cloud(point_cloud, confidence_mask_95_false.nonzero())" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "140 ms ± 608 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" + ] + } + ], + "source": [ + "%%timeit\n", + "pcd.select_by_index(np.where(confidence_mask)[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "238 ms ± 3.37 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" + ] + } + ], + "source": [ + "%%timeit\n", + "pcd.select_by_mask(confidence_mask)" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [], + "source": [ + "pcd_legacy = pcd.to_legacy()" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "388 ms ± 3.16 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" + ] + } + ], + "source": [ + "%%timeit\n", + "pcd_legacy.select_by_index(np.where(confidence_mask)[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Cropping" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "16.9 ms ± 135 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" + ] + } + ], + "source": [ + "%%timeit\n", + "crop_point_cloud(point_cloud_filtered, bbox)" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "31.1 ms ± 903 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" + ] + } + ], + "source": [ + "%%timeit\n", + "pcd_filtered.crop(bbox_o3d)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Downsampling" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3.04 ms ± 37.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" + ] + } + ], + "source": [ + "%%timeit\n", + "pcd_cropped.voxel_down_sample(voxel_size=0.01)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Removing low confidence points and cropping as fast as possible" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [], + "source": [ + "crop_mask = generate_point_cloud_crop_mask(point_cloud, bbox)" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "139 µs ± 3.25 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n" + ] + } + ], + "source": [ + "%%timeit\n", + "crop_mask & confidence_mask" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "21.1 ms ± 252 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" + ] + } + ], + "source": [ + "%%timeit\n", + "crop_mask = generate_point_cloud_crop_mask(point_cloud, bbox)\n", + "mask = crop_mask & confidence_mask\n", + "filter_point_cloud(point_cloud, mask.nonzero())" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "74.9 ms ± 231 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" + ] + } + ], + "source": [ + "%%timeit\n", + "point_cloud_filtered = filter_point_cloud(point_cloud, confidence_mask.nonzero())\n", + "point_cloud_cropped = crop_point_cloud(point_cloud_filtered, bbox)" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "25 ms ± 102 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" + ] + } + ], + "source": [ + "%%timeit\n", + "crop_mask = generate_point_cloud_crop_mask(point_cloud, bbox)\n", + "mask = crop_mask & confidence_mask\n", + "pcd.select_by_index(mask.nonzero())" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [], + "source": [ + "pcd2 = o3d.t.geometry.PointCloud(pcd)\n", + "pcd2.point.confidence = o3d.core.Tensor(confidence_mask)" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "49.8 ms ± 160 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" + ] + } + ], + "source": [ + "%%timeit\n", + "pcd2_cropped = pcd2.crop(bbox_o3d)\n", + "pcd2_cropped.select_by_index(pcd2_cropped.point.confidence.numpy().nonzero())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "execution": { + "allow_errors": true, + "timeout": 300 + }, + "kernelspec": { + "display_name": "airo-mono", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/airo-camera-toolkit/notebooks/point_cloud_tutorial.ipynb b/airo-camera-toolkit/notebooks/point_cloud_tutorial.ipynb new file mode 100644 index 00000000..49f2c952 --- /dev/null +++ b/airo-camera-toolkit/notebooks/point_cloud_tutorial.ipynb @@ -0,0 +1,602 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Point Cloud Tutorial\n", + "\n", + "In this notebook we will cover:\n", + "1. Loading RGBD data and creating an [Open3D](http://www.open3d.org/) point cloud\n", + "2. Removing points with inaccurate depth\n", + "3. Cropping and downsampling a point cloud\n", + "4. Getting the highest, lowest and random points.\n", + "5. Project points from 3D to 2D\n", + "\n", + "We will use data of a robot holding a shirt in the air, prerecorded with a ZED 2i.\n", + "\n", + "> Download this data from [here](https://ugentbe-my.sharepoint.com/:f:/g/personal/victorlouis_degusseme_ugent_be/EkIZoyySsnZBg56hRq1BqdkBuGlvhAwPWT9HDuqaUB-psA?e=iSehj6) and save the folder in a folder called `data` relative to this notebook.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "data_dir = os.path.join(\"data\", \"competition_sample_0000\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!ls $data_dir" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import cv2\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import open3d as o3d\n", + "from ipywidgets import interact\n", + "\n", + "from airo_typing import NumpyDepthMapType\n", + "from airo_dataset_tools.data_parsers.camera_intrinsics import CameraIntrinsics\n", + "from airo_dataset_tools.data_parsers.pose import Pose\n", + "from airo_camera_toolkit.point_clouds.conversions import open3d_to_point_cloud, point_cloud_to_open3d\n", + "from airo_camera_toolkit.point_clouds.operations import filter_point_cloud, crop_point_cloud\n", + "from airo_camera_toolkit.point_clouds.visualization import open3d_point\n", + "from airo_camera_toolkit.pinhole_operations.projection import project_points_to_image_plane\n", + "from airo_typing import PointCloudPositionsType" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "intrinsics_path = os.path.join(data_dir, \"intrinsics.json\")\n", + "image_left_path = os.path.join(data_dir, \"image_left.png\")\n", + "image_right_path = os.path.join(data_dir, \"image_right.png\")\n", + "depth_map_path = os.path.join(data_dir, \"depth_map.tiff\")\n", + "confidence_map_path = os.path.join(data_dir, \"confidence_map.tiff\")\n", + "point_cloud_path = os.path.join(data_dir, \"pointcloud.ply\")\n", + "camera_pose_path = os.path.join(data_dir, \"camera_pose.json\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Loading the data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1.1 Loading the camera parameters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with open(intrinsics_path, \"r\") as f:\n", + " intrinsics_model = CameraIntrinsics.model_validate_json(f.read())\n", + " intrinsics = intrinsics_model.as_matrix()\n", + " resolution = intrinsics_model.image_resolution.as_tuple()\n", + "\n", + "with open(camera_pose_path, \"r\") as f:\n", + " camera_pose = Pose.model_validate_json(f.read()).as_homogeneous_matrix()\n", + "\n", + "with np.printoptions(precision=3, suppress=True):\n", + " print(\"Resolution:\", resolution)\n", + " print(\"Intrinsics: \\n\", intrinsics)\n", + " print(\"Extrinsics: \\n\", camera_pose)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1.2 Loading the color images" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "image_left = plt.imread(image_left_path) # you can also use cv2.imread but then you get BGR instead of RGB\n", + "image_right = plt.imread(image_right_path)\n", + "\n", + "plt.figure(figsize=(20, 10))\n", + "plt.subplot(1, 2, 1)\n", + "plt.imshow(image_left)\n", + "plt.title(\"Left image\")\n", + "plt.subplot(1, 2, 2)\n", + "plt.imshow(image_right)\n", + "plt.title(\"Right image\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1.3 Loading the depth and confidence map\n", + "\n", + "> Note: the confidence map has range [0.0, 100.0] where 0.0 is the **most confident**." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "depth_map = cv2.imread(depth_map_path, cv2.IMREAD_ANYDEPTH)\n", + "confidence_map = cv2.imread(confidence_map_path, cv2.IMREAD_ANYDEPTH)\n", + "\n", + "print(\"depth_map.dtype:\", depth_map.dtype)\n", + "print(\"confidence_map.dtype:\", confidence_map.dtype)\n", + "\n", + "plt.figure(figsize=(20, 10))\n", + "plt.subplot(1, 2, 1)\n", + "plt.imshow(depth_map)\n", + "plt.title(\"Depth map\")\n", + "plt.colorbar(fraction=0.025, pad=0.04)\n", + "plt.subplot(1, 2, 2)\n", + "plt.imshow(confidence_map)\n", + "plt.title(\"Confidence map\")\n", + "plt.colorbar(fraction=0.025, pad=0.04)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1.4 Loading the point cloud\n", + "\n", + "Open3D uses the abbreviation `pcd` for their [PointCloud](http://www.open3d.org/docs/release/python_api/open3d.geometry.PointCloud.html#open3d.geometry.PointCloud) object, we will use this too to distinguish between them and numpy point_clouds." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pcd_in_camera = o3d.t.io.read_point_cloud(point_cloud_path)\n", + "\n", + "# This conversion to float32 can be removed once the data is saved as float32\n", + "pcd_in_camera.point.positions = o3d.core.Tensor(pcd_in_camera.point.positions.numpy().astype(np.float32))\n", + "\n", + "pcd = pcd_in_camera.transform(camera_pose) # transform to world frame (= base frame of left robot)\n", + "pcd" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "o3d.visualization.draw_geometries([pcd.to_legacy()])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1.5 Numpy point clouds\n", + "\n", + "Open3D provides a lot functionality for point clouds, see their [tutorial](http://www.open3d.org/docs/release/tutorial/geometry/pointcloud.html).\n", + "However sometimes you need something custom, (e.g. getting the lowest and highest points).\n", + "This can be done easily by converting the Open3D point cloud to numpy arrays.\n", + "We've also found that some operations such as filtering with a boolean mask are faster in numpy." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "point_cloud = open3d_to_point_cloud(pcd)\n", + "points, colors = point_cloud.points, point_cloud.colors\n", + "\n", + "print(\"points:\", points.shape, points.dtype)\n", + "if colors is not None:\n", + " print(\"colors:\", colors.shape, colors.dtype)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Removing low confidence points" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2.1 Binarizing the confidence map" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def binarize_confidence(image: NumpyDepthMapType, threshold=50.0):\n", + " confident = image <= threshold\n", + " return confident\n", + "\n", + "\n", + "@interact(threshold=(0.0, 100.0, 1.0))\n", + "def show_confidence_binarized(threshold=50.0):\n", + " confidence_binarized = binarize_confidence(confidence_map, threshold)\n", + " confidence_image = confidence_binarized.astype(np.uint8) * 255\n", + " plt.figure(figsize=(10, 5))\n", + " plt.imshow(confidence_image, vmin=0, vmax=255)\n", + " plt.colorbar(fraction=0.025, pad=0.04)\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "threshold = 1.0 # a value of 1.0 means only the most confidence points will be kept\n", + "confidence_binarized = binarize_confidence(confidence_map, threshold)\n", + "confidence_mask = confidence_binarized.reshape(-1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "confidence_mask.shape, confidence_mask.dtype, confidence_mask[:5], " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2.2 Filtering with a binary map" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "point_cloud_filtered = filter_point_cloud(point_cloud, confidence_mask)\n", + "\n", + "n = len(point_cloud.points)\n", + "n_filtered = len(point_cloud_filtered.points)\n", + "\n", + "print(f\"Number of points: {n} -> {n_filtered} ({n_filtered / n * 100:.1f}%)\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "o3d.visualization.draw_geometries([point_cloud_to_open3d(point_cloud_filtered).to_legacy()])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Cropping and downsampling" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3.1 Cropping" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "bbox = (0.35, -0.3, 0.1), (0.7, 0.1, 0.95)\n", + "\n", + "point_cloud_cropped = crop_point_cloud(point_cloud_filtered, bbox)\n", + "\n", + "n = point_cloud.points.shape[0]\n", + "n_cropped = point_cloud_cropped.points.shape[0]\n", + "\n", + "print(f\"Number of points: {n} -> {n_cropped} ({n_cropped / n * 100:.1f}%)\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "bbox_o3d = o3d.geometry.AxisAlignedBoundingBox(*bbox)\n", + "bbox_o3d.color = (1.0, 0.0, 1.0)\n", + "\n", + "o3d.visualization.draw_geometries([point_cloud_to_open3d(point_cloud_cropped).to_legacy(), bbox_o3d])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3.2 Downsampling" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pcd_cropped = point_cloud_to_open3d(point_cloud_cropped)\n", + "pcd_downsampled = pcd_cropped.voxel_down_sample(voxel_size=0.01)\n", + "\n", + "n_cropped = len(pcd_cropped.point.positions)\n", + "n_downsampled = len(pcd_downsampled.point.positions)\n", + "\n", + "\n", + "print(f\"Number of points: {n_cropped} -> {n_downsampled} ({n_downsampled / n_cropped * 100:.2f}%)\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "o3d.visualization.draw_geometries([pcd_downsampled.to_legacy()])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Getting the highest, lowest and random points\n", + "\n", + "### 4.1 Highest and lowest points" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def highest_point(points: PointCloudPositionsType) -> np.ndarray:\n", + " return points[np.argmax(points[:, 2])]\n", + "\n", + "def lowest_point(points: PointCloudPositionsType) -> np.ndarray:\n", + " return points[np.argmin(points[:, 2])]\n", + "\n", + "highest = highest_point(point_cloud_cropped.points)\n", + "lowest = lowest_point(point_cloud_cropped.points)\n", + "\n", + "with np.printoptions(precision=3, suppress=True):\n", + " print(\"Highest point:\", highest)\n", + " print(\"Lowest point:\", lowest)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "o3d.visualization.draw_geometries(\n", + " [pcd_cropped.to_legacy(), open3d_point(highest, (0.0, 1.0, 0.0)), open3d_point(lowest, (1.0, 0.0, 0.0))]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4.2 Random points" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# set random seed for reproducibility\n", + "np.random.seed(0)\n", + "\n", + "random_indices = np.random.choice(len(point_cloud_cropped.points), size=10, replace=False)\n", + "random_indices" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "random_points = point_cloud_cropped.points[random_indices]\n", + "\n", + "open3d_points = [open3d_point(p, (0.0, 0.0, 1.0)) for p in random_points]\n", + "\n", + "o3d.visualization.draw_geometries([pcd_cropped.to_legacy(), *open3d_points])\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Projection\n", + "\n", + "### 5.1 Projecting points from 3D to 2D" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from airo_spatial_algebra import transform_points\n", + "from airo_spatial_algebra.operations import _HomogeneousPoints\n", + "\n", + "# the points are currentlty expressed in the world frame p_W, we need p_C\n", + "# p_C = X_C_W @ p_W\n", + "X_C_W = np.linalg.inv(camera_pose)\n", + "\n", + "# lowest\n", + "lowest_2d = project_points_to_image_plane(\n", + " transform_points(X_C_W, lowest),\n", + " intrinsics,\n", + ").squeeze()\n", + "\n", + "lowest_2d_int = np.rint(lowest_2d).astype(int)\n", + "lowest_2d, lowest_2d_int" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "highest_2d = project_points_to_image_plane(\n", + " transform_points(X_C_W, highest),\n", + " intrinsics,\n", + ").squeeze()\n", + "\n", + "random_2d = project_points_to_image_plane(\n", + " transform_points(X_C_W, random_points),\n", + " intrinsics,\n", + ")\n", + "\n", + "highest_2d_int = np.rint(highest_2d).astype(int)\n", + "random_2d_int = np.rint(random_2d).astype(int)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "image_annotated = image_left.copy()\n", + "cv2.circle(image_annotated, tuple(lowest_2d_int), 10, (0, 1.0, 0), thickness=2)\n", + "cv2.circle(image_annotated, tuple(highest_2d_int), 10, (1.0, 0, 0), thickness=2)\n", + "for p in random_2d_int:\n", + " cv2.circle(image_annotated, tuple(p), 10, (0, 0, 1.0), thickness=2)\n", + "\n", + "plt.figure(figsize=(10, 5))\n", + "plt.imshow(image_annotated)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 5.2 Point cloud to segmented image (advanced)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "rgbd_image = pcd_cropped.project_to_rgbd_image(*resolution, intrinsics, extrinsics=np.linalg.inv(camera_pose))\n", + "\n", + "image_rgb_float = np.asarray(rgbd_image.color)\n", + "depth_map_float = np.asarray(rgbd_image.depth).squeeze()\n", + "\n", + "print(depth_map_float.shape)\n", + "\n", + "# make background white where depth is 0.0\n", + "image_rgb_float[depth_map_float == 0.0] = 1.0\n", + "\n", + "plt.figure(figsize=(10, 5))\n", + "plt.imshow(image_rgb_float)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "execution": { + "allow_errors": true, + "timeout": 300 + }, + "kernelspec": { + "display_name": "airo-mono", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/airo-camera-toolkit/docs/rerun-zed-example.ipynb b/airo-camera-toolkit/notebooks/rerun_zed_tutorial.ipynb similarity index 84% rename from airo-camera-toolkit/docs/rerun-zed-example.ipynb rename to airo-camera-toolkit/notebooks/rerun_zed_tutorial.ipynb index 723da881..ea170ca0 100644 --- a/airo-camera-toolkit/docs/rerun-zed-example.ipynb +++ b/airo-camera-toolkit/notebooks/rerun_zed_tutorial.ipynb @@ -6,7 +6,7 @@ "metadata": {}, "source": [ "# Rerun RGBD camera example\n", - "This example shows how to use [rerun](https://www.rerun.io/) for logging images, image annotations, pointclouds, transforms between elements in the world, time-series in a way that you might want to do for a robotic manipulation setup. Make sure you have a **ZED2I camera connected**.\n", + "This example shows how to use [rerun](https://www.rerun.io/) for logging images, image annotations, point clouds, transforms between elements in the world, time-series in a way that you might want to do for a robotic manipulation setup. Make sure you have a **ZED2I camera connected**.\n", "\n", "\n", "Rerun has more features such as logging meshes, logging 3D bboxes, URDFs (in process). Check the docs to learn more. \n", @@ -26,7 +26,7 @@ }, "outputs": [], "source": [ - "from airo_camera_toolkit.cameras.zed2i import Zed2i\n", + "from airo_camera_toolkit.cameras.zed.zed2i import Zed2i\n", "import rerun as rr\n", "#autoreload\n", "%load_ext autoreload\n", @@ -59,7 +59,7 @@ "metadata": {}, "outputs": [], "source": [ - "pointcloud = zed.get_colored_point_cloud()\n", + "point_cloud = zed.get_colored_point_cloud()\n", "rgb = zed.get_rgb_image()\n", "depth = zed.get_depth_image()" ] @@ -70,8 +70,8 @@ "metadata": {}, "outputs": [], "source": [ - "# log the colored pointcloud to the UI\n", - "rr.log(\"world/camera1/pointcloud\", rr.Points3D(positions=pointcloud[:,:3],colors=pointcloud[:,3:6]))\n" + "# log the colored point_cloud to the UI\n", + "rr.log(\"world/camera1/point_cloud\", rr.Points3D(positions=point_cloud.points,colors=point_cloud.colors))\n" ] }, { @@ -80,17 +80,7 @@ "metadata": {}, "outputs": [], "source": [ - "pointcloud[3156][3:6]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# log the non-colored pointcloud to the UI in the same world/camera1 space\n", - "rr.log(\"world/camera1/non_color_pointcloud\", rr.Points3D(positions=pointcloud[:,:3],colors=[0.5,0.5,0.5]))" + "point_cloud.colors[3156]" ] }, { @@ -168,8 +158,8 @@ "source": [ "# log some more data\n", "for _ in range(5):\n", - " pointcloud = zed.get_colored_point_cloud()\n", - " rr.log(\"world/camera1/pointcloud\", rr.Points3D(positions=pointcloud[:,:3],colors=pointcloud[:,3:6]))" + " point_cloud = zed.get_colored_point_cloud()\n", + " rr.log(\"world/camera1/point_cloud\", rr.Points3D(positions=point_cloud.points,colors=point_cloud.colors))" ] }, { @@ -220,7 +210,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.18" + "version": "3.10.13" }, "vscode": { "interpreter": { diff --git a/airo-camera-toolkit/docs/live_charuco_pose.py b/airo-camera-toolkit/scripts/live_charuco_pose.py similarity index 100% rename from airo-camera-toolkit/docs/live_charuco_pose.py rename to airo-camera-toolkit/scripts/live_charuco_pose.py diff --git a/airo-camera-toolkit/setup.py b/airo-camera-toolkit/setup.py index 337a5b5d..23e2b7f1 100644 --- a/airo-camera-toolkit/setup.py +++ b/airo-camera-toolkit/setup.py @@ -16,6 +16,7 @@ "matplotlib", "rerun-sdk>=0.11.0", "click", + "open3d", "loguru", "airo-typing", "airo-spatial-algebra", diff --git a/airo-camera-toolkit/test/test_point_clouds.py b/airo-camera-toolkit/test/test_point_clouds.py new file mode 100644 index 00000000..8a58e9f8 --- /dev/null +++ b/airo-camera-toolkit/test/test_point_clouds.py @@ -0,0 +1,49 @@ +import numpy as np +from airo_camera_toolkit.point_clouds.conversions import point_cloud_to_open3d +from airo_camera_toolkit.point_clouds.operations import crop_point_cloud +from airo_typing import PointCloud + + +def test_crop_known(): + n = 11 + points_x = np.arange(0, n) # 11 points, 0 to 10 + points = np.zeros((n, 3)) + points[:, 0] = points_x + + point_cloud = PointCloud(points) + + bbox = (2.5, -1, -1), (7.5, 1, 1) # should include points (3, 0, 0) up to (7, 0, 0) + + point_cloud_cropped = crop_point_cloud(point_cloud, bbox) + + assert np.all(point_cloud_cropped.points == np.array([[3, 0, 0], [4, 0, 0], [5, 0, 0], [6, 0, 0], [7, 0, 0]])) + + +def test_crop_is_inside_bbox(): + n = 100 + points = np.random.rand(n, 3) # 100 3D points in the unit cube + bbox = (0.25, 0.25, 0.25), (0.75, 0.75, 0.75) + + point_cloud = PointCloud(points) + point_cloud_cropped = crop_point_cloud(point_cloud, bbox) + + assert np.all(point_cloud_cropped.points >= np.array(bbox[0])) + assert np.all(point_cloud_cropped.points <= np.array(bbox[1])) + + +def test_crop_equal_to_open3d(): + import open3d as o3d + + n = 100 + points = np.random.rand(n, 3) # 100 3D points in the unit cube + + bbox = (0.25, 0.25, 0.25), (0.75, 0.75, 0.75) + bbox_o3d = o3d.t.geometry.AxisAlignedBoundingBox(*bbox) + + point_cloud = PointCloud(points) + pcd = point_cloud_to_open3d(point_cloud) + + point_cloud_cropped = crop_point_cloud(point_cloud, bbox) + pcd_cropped = pcd.crop(bbox_o3d) + + assert np.all(np.asarray(pcd_cropped.point.positions.numpy()) == point_cloud_cropped.points) diff --git a/airo-spatial-algebra/airo_spatial_algebra/operations.py b/airo-spatial-algebra/airo_spatial_algebra/operations.py index eaa629eb..24282ca5 100644 --- a/airo-spatial-algebra/airo_spatial_algebra/operations.py +++ b/airo-spatial-algebra/airo_spatial_algebra/operations.py @@ -9,22 +9,21 @@ """ import numpy as np -from airo_typing import HomogeneousMatrixType, Vectors3DType +from airo_typing import HomogeneousMatrixType, Vector3DArrayType, Vectors3DType class _HomogeneousPoints: + """Helper class to facilitate multiplicating 4x4 matrices with one or more 3D points. + This class internally handles the addition / removal of a dimension to the points. + """ + # TODO: extend to generic dimensions (1D,2D,3D). - # TODO: more apropaite name? this class mainly serves as 'input validation' and conversion to homogeneous coordinates def __init__(self, points: Vectors3DType): if not self.is_valid_points_type(points): raise ValueError(f"Invalid argument for {_HomogeneousPoints.__name__}.__init__ ") - if self.is_single_point(points): - self._homogeneous_points = np.concatenate([points, np.ones(1, dtype=np.float32)]) - self._homogeneous_points = self._homogeneous_points[np.newaxis, :] - else: - self._homogeneous_points = np.concatenate( - [points, np.ones((points.shape[0], 1), dtype=np.float32)], axis=1 - ) + + points = _HomogeneousPoints.ensure_array_2d(points) + self._homogeneous_points = np.concatenate([points, np.ones((points.shape[0], 1), dtype=np.float32)], axis=1) @staticmethod def is_valid_points_type(points: Vectors3DType) -> bool: @@ -37,8 +36,13 @@ def is_valid_points_type(points: Vectors3DType) -> bool: return False @staticmethod - def is_single_point(points: Vectors3DType) -> bool: - return len(points.shape) == 1 + def ensure_array_2d(points: Vectors3DType) -> Vector3DArrayType: + """If points is a single shape (3,) point, then it is reshaped to (1,3).""" + if len(points.shape) == 1: + if len(points) != 3: + raise ValueError("points has only one dimension, but it's length is not 3") + points = points.reshape((1, 3)) + return points @property def homogeneous_points(self) -> np.ndarray: @@ -51,7 +55,6 @@ def points(self) -> Vectors3DType: # normalize points (for safety, should never be necessary with affine transforms) # but we've had bugs of this type with projection operations, so better safe than sorry? scalars = self._homogeneous_points[:, 3][:, np.newaxis] - print(scalars) points = self.homogeneous_points[:, :3] / scalars # TODO: if the original poitns was (1,3) matrix, then the resulting points would be a (3,) vector. # Is this desirable? and if not, how to avoid it? diff --git a/airo-typing/airo_typing/__init__.py b/airo-typing/airo_typing/__init__.py index e83f23ab..3add0c87 100644 --- a/airo-typing/airo_typing/__init__.py +++ b/airo-typing/airo_typing/__init__.py @@ -1,4 +1,5 @@ -from typing import Tuple, Union +from dataclasses import dataclass +from typing import Dict, Optional, Tuple, Union import numpy as np @@ -68,6 +69,11 @@ shorthand notation is ^C T^B_A, where C is the frame the velocity is measured in, B is the frame the velocity is expressed in. """ +# manipulator types + +JointConfigurationType = np.ndarray +"""an (N,) numpy array that represents the joint angles for a robot""" + ###################### # camera related types ###################### @@ -99,13 +105,25 @@ """4x4 camera extrinsic matrix, this is the homogeneous matrix that describes the camera pose in the world frame""" +########################## +# 3D and point cloud types +########################## -PointCloudType = Vector3DArrayType -""" a (N,3) numpy array that represents a point cloud""" +BoundingBox3DType = Tuple[Tuple[float, float, float], Tuple[float, float, float]] +""" a tuple of two tuples that represent the min and max corners of a 3D bounding box""" -ColoredPointCloudType = np.ndarray -" an (N,6) numpy array that represents a point cloud with color information. Color is in RGB, float (0-1) format." -# manipulator types +PointCloudPositionsType = Vector3DArrayType +""" a (N,3) float32 numpy array that represents a point cloud""" -JointConfigurationType = np.ndarray -"""an (N,) numpy array that represents the joint angles for a robot""" +PointCloudColorsType = np.ndarray +""" a (N,3) uint8 numpy array that represents the RGB colors of a point cloud""" + +PointCloudAttributesType = Dict[str, np.ndarray] +""" a dictionary of numpy arrays that represent additional attributes of a point cloud, e.g. normals, confidence, etc. """ + + +@dataclass +class PointCloud: + points: PointCloudPositionsType + colors: Optional[PointCloudColorsType] = None + attributes: Optional[PointCloudAttributesType] = None diff --git a/mypy.ini b/mypy.ini index f891e6cd..0b631243 100644 --- a/mypy.ini +++ b/mypy.ini @@ -62,3 +62,5 @@ ignore_missing_imports=True ignore_missing_imports=True [mypy-albumentations.*] ignore_missing_imports=True +[mypy-open3d.*] +ignore_missing_imports=True