Merge branch 'main' into multiprocess

airo-ugent · Jul 30, 2024 · 8c12d87 · 8c12d87
2 parents 1653808 + f78efa3
commit 8c12d87
Show file tree

Hide file tree

Showing 15 changed files with 469 additions and 132 deletions.
diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml
@@ -32,7 +32,7 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         pip install wheel setuptools
-        pip install pytest
+        pip install "pytest<8.0.0"
         pip install airo-typing/ airo-spatial-algebra/ airo-camera-toolkit/ airo-robots/ airo-teleop/ airo-dataset-tools/
     - name: Run Tests
       run: pytest ${{matrix.package}}/

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -15,16 +15,18 @@ This project uses a [CalVer](https://calver.org/) versioning scheme with monthly
 
 
 ### Added
+- add method `as_single_polygon` to combine disconnected parts of a binary mask into a single polygon to the `Mask` class, useful for data formats that only allow for a single polygon such as YOLO.
 - `PointCloud` dataclass as the main data structure for point clouds in airo-mono
 - Notebooks to get started with point clouds, checking performance and logging to rerun
-- Functions to crop point clouds and filter points with a mask (e.g. low-confidence points)
+- Functions to crop point clouds, filter points with a mask (e.g. low-confidence points), and transform point clouds
 - Functions to convert from our numpy-based dataclass to and from open3d point clouds
 - `BoundingBox3DType`
 - `Zed2i.ULTRA_DEPTH_MODE` to enable the ultra depth setting for the Zed2i cameras
-
-
+- `OpenCVVideoCapture` implementation of `RGBCamera` for working with arbitrary cameras
+- `MultiprocessRGBRerunLogger` and `MultiprocessRGBDRerunLogger` now allow you to pass an `entity_path` value which determines where the RGB and depth images will be logged
 
 ### Changed
+- `coco-to-yolo` conversion now creates a single polygon of all disconnected parts of the mask instead of simply taking the first polygon of the list.
 - Dropped support for python 3.8 and added 3.11 to the testing matrix [#103](https://github.com/airo-ugent/airo-mono/issues/103).
 - Set python version to 3.10 because of an issue with the `ur_rtde` wheels [#121](https://github.com/airo-ugent/airo-mono/issues/121). Updated README.md to reflect this change.
 
@@ -35,6 +37,8 @@ This project uses a [CalVer](https://calver.org/) versioning scheme with monthly
 - Added `__init__.py` to `realsense` and `utils` in `airo_camera_toolkit.cameras`, fixing installs with pip and issue #113.
 - Fixed bug that returned a transposed resolution in `MultiprocessRGBReceiver`.
 - Using `Zed2i.PERFORMANCE_DEPTH_MODE` will now correctly use the performance mode instead of the quality mode.
+- Shared memory files that were not properly cleaned up are now unlinked and then recreated.
+- The wait interval for shared memory files has been reduced to .5 seconds (from 5), to speed up application start times.
 
 ### Removed
 - `ColoredPointCloudType`

diff --git a/README.md b/README.md
diff --git a/airo-camera-toolkit/README.md b/airo-camera-toolkit/README.md
@@ -26,6 +26,11 @@ Instructions can be found in the following files:
 * [ZED Installation](airo_camera_toolkit/cameras/zed_installation.md)
 * [RealSense Installation](airo_camera_toolkit/cameras/realsense_installation.md)
 
+Additionally, to ensure you have `airo-robots` installed for the hand-eye calibration, install the extra dependencies:
+```
+pip install .[hand-eye-calibration]
+```
+
 ## Getting started with cameras
 Camera can be accessed by instantiating the corresponding class:, e.g. for a ZED camera:
 ```python

diff --git a/airo-camera-toolkit/airo_camera_toolkit/cameras/README.md b/airo-camera-toolkit/airo_camera_toolkit/cameras/README.md
@@ -7,6 +7,8 @@ This subpackage contains implementations of the camera interface for the cameras
 
 It also contains code to enable multiprocessed use of the camera streams: [multiprocessed camera](./multiprocess/)
 
+There is also an implementation for generic RGB cameras using OpenCV `VideoCapture`: [OpenCV VideoCapture](./opencv_videocapture/)
+
 ## 1. Installation
 Implementations usually require the installation of SDKs, drivers etc. to communicate with the camera.
 This information can be found in `READMEs` for each camera:

diff --git a/airo-camera-toolkit/airo_camera_toolkit/cameras/multiprocess/multiprocess_rerun_logger.py b/airo-camera-toolkit/airo_camera_toolkit/cameras/multiprocess/multiprocess_rerun_logger.py
@@ -18,13 +18,17 @@ def __init__(
         shared_memory_namespace: str,
         rerun_application_id: str = "rerun",
         image_transform: Optional[ImageTransform] = None,
+        entity_path: Optional[str] = None,
     ):
         super().__init__(daemon=True)
         self._shared_memory_namespace = shared_memory_namespace
         self.shutdown_event = multiprocessing.Event()
         self._rerun_application_id = rerun_application_id
         self._image_transform = image_transform
 
+        # If the entity path is not given, we use the `_shared_memory_namespace` value as entity path (maintaining backwards compatibility).
+        self._entity_path = entity_path if entity_path is not None else shared_memory_namespace
+
     def _log_rgb_image(self) -> None:
         import rerun as rr
 
@@ -39,7 +43,7 @@ def _log_rgb_image(self) -> None:
         if self._image_transform is not None:
             image_rgb = self._image_transform.transform_image(image_rgb)
 
-        rr.log(self._shared_memory_namespace, rr.Image(image_rgb).compress(jpeg_quality=90))
+        rr.log(self._entity_path, rr.Image(image_rgb).compress(jpeg_quality=90))
 
     def run(self) -> None:
         """main loop of the process, runs until the process is terminated"""
@@ -63,13 +67,18 @@ def __init__(
         shared_memory_namespace: str,
         rerun_application_id: str = "rerun",
         image_transform: Optional[ImageTransform] = None,
+        entity_path: Optional[str] = None,
+        entity_path_depth: Optional[str] = None,
     ):
         super().__init__(
             shared_memory_namespace,
             rerun_application_id,
             image_transform,
+            entity_path,
         )
 
+        self._entity_path_depth = entity_path_depth if entity_path_depth is not None else f"{self._entity_path}_depth"
+
     def _log_depth_image(self) -> None:
         import rerun as rr
 
@@ -78,7 +87,7 @@ def _log_depth_image(self) -> None:
         depth_image = self._receiver.get_depth_image()
         if self._image_transform is not None:
             depth_image = self._image_transform.transform_image(depth_image)
-        rr.log(f"{self._shared_memory_namespace}_depth", rr.Image(depth_image).compress(jpeg_quality=90))
+        rr.log(self._entity_path_depth, rr.Image(depth_image).compress(jpeg_quality=90))
 
     def run(self) -> None:
         """main loop of the process, runs until the process is terminated"""

diff --git a/airo-camera-toolkit/airo_camera_toolkit/cameras/multiprocess/multiprocess_rgb_camera.py b/airo-camera-toolkit/airo_camera_toolkit/cameras/multiprocess/multiprocess_rgb_camera.py
@@ -36,11 +36,13 @@ def shared_memory_block_like(array: np.ndarray, name: str) -> Tuple[shared_memor
     try:
         shm = shared_memory.SharedMemory(create=True, size=array.nbytes, name=name)
     except FileExistsError:
-        logger.warning(f"Shared memory block with name {name} already exists, reusing it.")
-        # If we close() and unlink() here, receivers that are already accessing it freeze.
-        # So we try to reuse the existing shared memory block.
-        # However, if it is too small, the array creation will throw an error.
-        shm = shared_memory.SharedMemory(create=False, size=array.nbytes, name=name)
+        logger.warning(f"Shared memory file {name} exists. Will unlink and re-create it.")
+
+        shm_old = shared_memory.SharedMemory(create=False, size=array.nbytes, name=name)
+        shm_old.unlink()
+
+        shm = shared_memory.SharedMemory(create=True, size=array.nbytes, name=name)
+
     shm_array: np.ndarray = np.ndarray(array.shape, dtype=array.dtype, buffer=shm.buf)
     shm_array[:] = array[:]
     return shm, shm_array

diff --git a/airo-camera-toolkit/airo_camera_toolkit/cameras/opencv_videocapture/__init__.py b/airo-camera-toolkit/airo_camera_toolkit/cameras/opencv_videocapture/__init__.py
diff --git a/airo-camera-toolkit/airo_camera_toolkit/cameras/opencv_videocapture/opencv_videocapture.py b/airo-camera-toolkit/airo_camera_toolkit/cameras/opencv_videocapture/opencv_videocapture.py
@@ -0,0 +1,115 @@
+from __future__ import annotations
+
+import math
+import os
+from typing import Any, Optional, Tuple
+
+import cv2
+from airo_camera_toolkit.interfaces import RGBCamera
+from airo_camera_toolkit.utils.image_converter import ImageConverter
+from airo_typing import CameraIntrinsicsMatrixType, CameraResolutionType, NumpyFloatImageType, NumpyIntImageType
+
+
+class OpenCVVideoCapture(RGBCamera):
+    """Wrapper around OpenCV's VideoCapture so we can test the camera interface without external cameras."""
+
+    # Some standard resolutions that are likely to be supported by webcams.
+    # 16:9
+    RESOLUTION_1080 = (1920, 1080)
+    RESOLUTION_720 = (1280, 720)
+    # 4:3
+    RESOLUTION_768 = (1024, 768)
+    RESOLUTION_480 = (640, 480)
+
+    def __init__(
+        self,
+        video_capture_args: Tuple[Any] = (0,),
+        intrinsics_matrix: Optional[CameraIntrinsicsMatrixType] = None,
+        resolution: CameraResolutionType = RESOLUTION_480,
+        fps: int = 30,
+    ) -> None:
+        self.video_capture = cv2.VideoCapture(*video_capture_args)
+
+        # If passing a video file, we want to check if it exists. Then, we can throw a more meaningful
+        # error if it does not.
+        if len(video_capture_args) > 0 and isinstance(video_capture_args[0], str):
+            if not os.path.isfile(video_capture_args[0]):
+                raise FileNotFoundError(f"Could not find video file {video_capture_args[0]}")
+        if not self.video_capture.isOpened():
+            raise RuntimeError(f"Cannot open camera {video_capture_args[0]}. Is it connected?")
+
+        # Note that the following will not forcibly set the resolution. If the user's webcam
+        # does not support the desired resolution, OpenCV will silently select a close match.
+        self.video_capture.set(cv2.CAP_PROP_FRAME_WIDTH, resolution[0])
+        self.video_capture.set(cv2.CAP_PROP_FRAME_HEIGHT, resolution[1])
+        self.video_capture.set(cv2.CAP_PROP_FPS, fps)
+
+        self._intrinsics_matrix = intrinsics_matrix
+
+        self.fps = self.video_capture.get(cv2.CAP_PROP_FPS)
+        self._resolution = (
+            math.floor(self.video_capture.get(cv2.CAP_PROP_FRAME_WIDTH)),
+            math.floor(self.video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT)),
+        )
+
+    @property
+    def resolution(self) -> CameraResolutionType:
+        return self._resolution
+
+    def __enter__(self) -> RGBCamera:
+        return self
+
+    def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
+        self.video_capture.release()
+
+    def intrinsics_matrix(self) -> CameraIntrinsicsMatrixType:
+        """Obtain the intrinsics matrix of the camera.
+
+        Raises:
+            RuntimeError: You must explicitly pass an intrinsics object to the constructor.
+
+        Returns:
+            CameraIntrinsicsMatrixType: The intrinsics matrix.
+        """
+        if self._intrinsics_matrix is None:
+            raise RuntimeError(
+                "OpenCVVideoCapture does not have a preset intrinsics matrix. Pass it to the constructor if you know it."
+            )
+        return self._intrinsics_matrix
+
+    def _grab_images(self) -> None:
+        ret, image = self.video_capture.read()
+        if not ret:  # When streaming a video, we will at some point reach the end.
+            raise EOFError("Can't receive frame (stream end?). Exiting...")
+
+        self._frame = image
+
+    def _retrieve_rgb_image(self) -> NumpyFloatImageType:
+        return ImageConverter.from_opencv_format(self._frame).image_in_numpy_format
+
+    def _retrieve_rgb_image_as_int(self) -> NumpyIntImageType:
+        return ImageConverter.from_opencv_format(self._frame).image_in_numpy_int_format
+
+
+if __name__ == "__main__":
+    import airo_camera_toolkit.cameras.manual_test_hw as test
+    import numpy as np
+
+    camera = OpenCVVideoCapture(intrinsics_matrix=np.eye(3))
+
+    # Perform tests
+    test.manual_test_camera(camera)
+    test.manual_test_rgb_camera(camera)
+    test.profile_rgb_throughput(camera)
+
+    # Live viewer
+    cv2.namedWindow("OpenCV Webcam RGB", cv2.WINDOW_NORMAL)
+
+    while True:
+        color_image = camera.get_rgb_image_as_int()
+        color_image = ImageConverter.from_numpy_int_format(color_image).image_in_opencv_format
+
+        cv2.imshow("OpenCV Webcam RGB", color_image)
+        key = cv2.waitKey(1)
+        if key == ord("q"):
+            break
diff --git a/...t/airo_camera_toolkit/cameras/opencv_videocapture/opencv_videocapture_camera.md b/...t/airo_camera_toolkit/cameras/opencv_videocapture/opencv_videocapture_camera.md
@@ -0,0 +1,6 @@
+# Generic OpenCV camera
+
+This `RGBCamera` implementation allows testing arbitrary cameras through the OpenCV `VideoCapture` interface.
+
+We currently do not support intrinsics calibration in airo-camera-toolkit. You can find the intrinsics of your camera
+using [these instructions](https://docs.opencv.org/4.x/dc/dbb/tutorial_py_calibration.html).
diff --git a/airo-camera-toolkit/airo_camera_toolkit/point_clouds/operations.py b/airo-camera-toolkit/airo_camera_toolkit/point_clouds/operations.py
@@ -1,7 +1,8 @@
 from typing import Any
 
 import numpy as np
-from airo_typing import BoundingBox3DType, PointCloud
+from airo_spatial_algebra.operations import transform_points
+from airo_typing import BoundingBox3DType, HomogeneousMatrixType, PointCloud
 
 
 def filter_point_cloud(point_cloud: PointCloud, mask: Any) -> PointCloud:
@@ -61,3 +62,23 @@ def crop_point_cloud(
     """
     crop_mask = generate_point_cloud_crop_mask(point_cloud, bounding_box)
     return filter_point_cloud(point_cloud, crop_mask.nonzero())
+
+
+def transform_point_cloud(point_cloud: PointCloud, frame_transformation: HomogeneousMatrixType) -> PointCloud:
+    """Creates a new point cloud for which the points are transformed to the desired frame.
+    Will keep colors and attributes if they are present.
+
+    The `frame_transformation` is a homogeneous matrix expressing the current point cloud frame in the target point cloud frame.
+    For example, if you capture a point cloud from a camera with the extrinsics matrix `X_W_C`, expressing the camera's pose in
+    the world frame, then you can express the point cloud in the world frame with:
+
+    `point_cloud_in_world = transform_point_cloud(point_cloud, X_W_C)`
+
+    Args:
+        point_cloud: The point cloud to transform.
+        frame_transformation: The transformation matrix from the current point cloud frame to the new desired frame.
+
+    Returns:
+        The new transformed point cloud."""
+    new_points = transform_points(frame_transformation, point_cloud.points)
+    return PointCloud(new_points, point_cloud.colors, point_cloud.attributes)
diff --git a/airo-camera-toolkit/setup.py b/airo-camera-toolkit/setup.py
@@ -20,9 +20,9 @@
         "loguru",
         "airo-typing",
         "airo-spatial-algebra",
-        "airo-robots",
         "airo-dataset-tools",
     ],
+    extras_require={"hand-eye-calibration": ["airo-robots"]},
     packages=setuptools.find_packages(exclude=["test"]),
     entry_points={
         "console_scripts": [

diff --git a/airo-dataset-tools/airo_dataset_tools/coco_tools/coco_instances_to_yolo.py b/airo-dataset-tools/airo_dataset_tools/coco_tools/coco_instances_to_yolo.py
@@ -92,18 +92,16 @@ def create_yolo_dataset_from_coco_instances_dataset(
                 yolo_id = yolo_category_index.index(category)
                 if use_segmentation:
                     segmentation = annotation.segmentation
-                    # convert to polygon if required
+                    # convert to **single** polygon
                     segmentation = BinarySegmentationMask.from_coco_segmentation_mask(segmentation, width, height)
-                    segmentation = segmentation.as_polygon
+                    segmentation = segmentation.as_single_polygon
 
                     if segmentation is None:
                         # should actually never happen as each annotation is assumed to have a segmentation if you pass use_segmentation=True
                         # but we filter it for convenience to deal with edge cases
                         print(f"skipping annotation for image {image_path}, as it has no segmentation")
                         continue
-                    segmentation = segmentation[
-                        0
-                    ]  # only use first polygon, since coco does not support multiple polygons?
+
                     file.write(f"{yolo_id}")
                     for (x, y) in zip(segmentation[0::2], segmentation[1::2]):
                         file.write(f" {x/width} {y/height}")

diff --git a/airo-dataset-tools/airo_dataset_tools/coco_tools/transform_dataset.py b/airo-dataset-tools/airo_dataset_tools/coco_tools/transform_dataset.py
@@ -3,9 +3,9 @@
 from typing import Any, Callable, List, Optional
 
 import albumentations as A
+import cv2
 import numpy as np
 import tqdm
-from airo_dataset_tools.coco_tools.transforms import PillowResize
 from airo_dataset_tools.data_parsers.coco import (
     CocoImage,
     CocoInstanceAnnotation,
@@ -90,7 +90,15 @@ def apply_transform_to_coco_dataset(  # type: ignore # noqa: C901
                 # convert coco keypoints to list of (x,y) keypoints
 
             if transform_bbox:
-                all_bboxes.append(annotation.bbox)
+                bbox = annotation.bbox
+                # set bbox width, height to at least 1
+                if bbox[3] < 1 or bbox[2] < 1:
+                    # x_min must be < x_max for albumentations check
+                    bbox = [0, 0, 1, 1]
+                    print(
+                        f"Invalid bbox for image {coco_image.file_name} and annotation {annotation.id}. Setting to [0,0,1,1]"
+                    )
+                all_bboxes.append(bbox)
 
             if transform_segmentation:
                 # convert segmentation to binary mask
@@ -123,7 +131,10 @@ def apply_transform_to_coco_dataset(  # type: ignore # noqa: C901
         if not os.path.exists(transformed_image_dir):
             os.makedirs(transformed_image_dir)
         # specify quality to use for JPEG, (format is determined by file extension)
-        transformed_image.save(os.path.join(target_image_path, coco_image.file_name), quality=95)
+        # transformed_image.save(os.path.join(target_image_path, coco_image.file_name))
+        # convert to BGR for opencv
+        transformed_image_cv2 = cv2.cvtColor(np.array(transformed_image), cv2.COLOR_RGB2BGR)
+        cv2.imwrite(os.path.join(target_image_path, coco_image.file_name), transformed_image_cv2)
 
         # change the metadata of the image coco object
         coco_image.width = transformed_image.width
@@ -184,7 +195,7 @@ def resize_coco_dataset(
         transformed_dataset_dir = target_dataset_dir
     os.makedirs(transformed_dataset_dir, exist_ok=True)
 
-    transforms = [PillowResize(height, width)]
+    transforms = [A.Resize(height, width)]
     coco_json = json.load(open(annotations_json_path, "r"))
     coco_dataset = CocoInstancesDataset(**coco_json)
     transformed_dataset = apply_transform_to_coco_dataset(