diff --git a/README.md b/README.md new file mode 100644 index 0000000..4be9267 --- /dev/null +++ b/README.md @@ -0,0 +1,88 @@ +# 基于设备上解码的 Yolo 检测 + +![Yolo-on-device](https://user-images.githubusercontent.com/56075061/144863222-a52be87e-b1f0-4a0a-b39b-f865bbb6e4a4.png) + +该存储库 (修改自 [device-decoding](https://github.com/luxonis/depthai-experiments/tree/master/gen2-yolo/device-decoding)) +包含直接使用 DepthAI SDK (`main_sdk.py`) 或 DepthAI API (`main_api.py`) 在设备上解码运行 Yolo 目标检测的代码。目前,支持的版本有: + +* `YoloV3` & `YoloV3-tiny`, +* `YoloV4` & `YoloV4-tiny`, +* `YoloV5`, +* `YoloV6`, +* `YoloV7`. + +我们在 `main_sdk.py` 和 `main_api.py` 中使用相同样式的 JSON 解析,但您也可以在代码中手动设置这两种情况下的值。 + +> `models` 目录下文件可使用 [git lfs](https://support.huaweicloud.com/usermanual-codehub/devcloud_hlp_0960.html#devcloud_hlp_0960__section286116283444) 下载 + +### 导出模型 + +由于模型必须以某种方式导出转换到 OpenVINO IR,我们提供了关于训练和导出的教程: + +* `YoloV3`, `YoloV4`, 和它们的 `tiny` 版本: + * 训练: + * [YoloV3_V4_tiny_training.ipynb](https://github.com/luxonis/depthai-ml-training/blob/master/colab-notebooks/YoloV3_V4_tiny_training.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/luxonis/depthai-ml-training/blob/master/colab-notebooks/YoloV3_V4_tiny_training.ipynb) + * [https://github.com/AlexeyAB/darknet](https://github.com/AlexeyAB/darknet) + * 导出转换: + * [https://github.com/luxonis/yolo2openvino](https://github.com/luxonis/yolo2openvino) + +* `YoloV5`, `YoloV6`, 和 `YoloV7` : + * 训练可参考原始仓库: + * [YoloV5](https://github.com/ultralytics/yolov5), + * [YoloV6](https://github.com/meituan/YOLOv6), + * [YoloV7](https://github.com/WongKinYiu/yolov7) + * 导出转换: + * 可使用 [https://tools.luxonis.com/](https://tools.luxonis.com/) + 网页在线转换, + * 或参考 [https://github.com/luxonis/tools/tree/master/yolo](https://github.com/luxonis/tools/tree/master/yolo) + 和 [https://github.com/luxonis/tools/tree/master/yolov7](https://github.com/luxonis/tools/tree/master/yolov7) + 进行本地转换 + +## 用法 + +1. 安装依赖 + ```python + python3 -m pip install -r requirements.txt + ``` +2. 运行脚本 + ```shell + python3 main_sdk.py -m -c + ``` + 或者 + ```shell + python3 main_api.py -m -c + ``` + Tips: + + * `` 是来自 DepthAI 模型库 (https:zoo.luxonis.com) 的模型名称或 blob 文件的相对路径。请查看我们的模型库以查看可用的预训练模型。 + * `` 是带有 Yolo 模型元数据(输入形状、锚点、标签等)的 JSON 的相对路径。 + +## JSONs + +我们已经为常见的 Yolo 版本提供了一些 JSON。您可以编辑它们并为您的模型设置它们,如上述教程中的后续步骤部分所述。如果您要更改教程中的某些参数,则应编辑相应的参数。一般来说,JSON +中的设置应该遵循模型的 CFG 中的设置。对于 YoloV5,默认设置应与 YoloV3 相同。 + +**Note**:值必须与训练期间在 CFG 中设置的值相匹配。如果您使用不同的输入宽度,您还应该将 `side32` 更改为 `sideX` +并将 `side16` 更改为 `sideY`,其中 `X = width16` 和 `Y = width32`。如果您使用的是非微型模型,则这些值为 `width8`、`width16` +和 `width32`。 + +您还可以更改 IOU 和置信度阈值。如果多次检测到同一个目标,则增加 IOU +阈值。如果没有检测到足够的目标,则降低置信度阈值。请注意,这不会神奇地改善您的目标检测器,但如果某些目标由于阈值太高而被过滤掉,则可能会有所帮助。 + +## Depth 信息 + +DepthAI 使您能够利用深度信息并获取检测到的对象的 `x`、`y` 和 `z` 坐标。 + +```shell +python3 main_sdk.py -m -c --spatial +``` + +或者 + +```shell +python3 main_api.py -m -c --spatial +``` + +如果您对使用 Yolo 检测器的深度信息感兴趣, +请查看我们的 [文档](https://docs.oakchina.cn/projects/api/samples/SpatialDetection/spatial_tiny_yolo.html)。 +![SpatialObjectDetection](https://user-images.githubusercontent.com/56075061/144864639-4519699e-d3da-4172-b66b-0495ea11317e.png) \ No newline at end of file diff --git a/json/yolov3-tiny.json b/json/yolov3-tiny.json new file mode 100644 index 0000000..447d6f7 --- /dev/null +++ b/json/yolov3-tiny.json @@ -0,0 +1,107 @@ +{ + "nn_config": + { + "output_format" : "detection", + "NN_family" : "YOLO", + "input_size": "416x416", + "NN_specific_metadata" : + { + "classes" : 80, + "coordinates" : 4, + "anchors" : [10,14, 23,27, 37,58, 81,82, 135,169, 344,319], + "anchor_masks" : + { + "side26" : [0,1,2], + "side13" : [3,4,5] + }, + "iou_threshold" : 0.8, + "confidence_threshold" : 0.5 + } + }, + "mappings": + { + "labels": + [ + "person", + "bicycle", + "car", + "motorbike", + "aeroplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "sofa", + "pottedplant", + "bed", + "diningtable", + "toilet", + "tvmonitor", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush" + ] + } +} diff --git a/json/yolov3.json b/json/yolov3.json new file mode 100644 index 0000000..fc39500 --- /dev/null +++ b/json/yolov3.json @@ -0,0 +1,109 @@ +{ + "nn_config": + { + "output_format" : "detection", + "NN_family" : "YOLO", + "input_size": "416x416", + "NN_specific_metadata" : + { + "classes" : 80, + "coordinates" : 4, + "anchors" : [10.0, 13.0, 16.0, 30.0, 33.0, 23.0, 30.0, 61.0, 62.0, 45.0, 59.0, 119.0,116.0, 90.0, 156.0,198.0,373.0, 326.0], + "anchor_masks" : + { + "side52" : [0,1,2], + "side26" : [3,4,5], + "side13" : [6,7,8] + }, + "iou_threshold" : 0.5, + "confidence_threshold" : 0.5 + } + }, + "mappings": + { + "labels": + [ + "person", + "bicycle", + "car", + "motorbike", + "aeroplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "sofa", + "pottedplant", + "bed", + "diningtable", + "toilet", + "tvmonitor", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush" + ] + } +} + diff --git a/json/yolov4-tiny.json b/json/yolov4-tiny.json new file mode 100644 index 0000000..8c33f67 --- /dev/null +++ b/json/yolov4-tiny.json @@ -0,0 +1,107 @@ +{ + "nn_config": + { + "output_format" : "detection", + "NN_family" : "YOLO", + "input_size": "416x416", + "NN_specific_metadata" : + { + "classes" : 80, + "coordinates" : 4, + "anchors" : [10,14, 23,27, 37,58, 81,82, 135,169, 344,319], + "anchor_masks" : + { + "side26" : [1,2,3], + "side13" : [3,4,5] + }, + "iou_threshold" : 0.5, + "confidence_threshold" : 0.5 + } + }, + "mappings": + { + "labels": + [ + "person", + "bicycle", + "car", + "motorbike", + "aeroplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "sofa", + "pottedplant", + "bed", + "diningtable", + "toilet", + "tvmonitor", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush" + ] + } +} diff --git a/json/yolov4.json b/json/yolov4.json new file mode 100644 index 0000000..550daf9 --- /dev/null +++ b/json/yolov4.json @@ -0,0 +1,109 @@ +{ + "nn_config": + { + "output_format" : "detection", + "NN_family" : "YOLO", + "input_size": "608x608", + "NN_specific_metadata" : + { + "classes" : 80, + "coordinates" : 4, + "anchors" : [12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401], + "anchor_masks" : + { + "side76" : [0,1,2], + "side38" : [3,4,5], + "side19" : [6,7,8] + }, + "iou_threshold" : 0.5, + "confidence_threshold" : 0.5 + } + }, + "mappings": + { + "labels": + [ + "person", + "bicycle", + "car", + "motorbike", + "aeroplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "sofa", + "pottedplant", + "bed", + "diningtable", + "toilet", + "tvmonitor", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush" + ] + } +} + diff --git a/json/yolov5n.json b/json/yolov5n.json new file mode 100644 index 0000000..88daa80 --- /dev/null +++ b/json/yolov5n.json @@ -0,0 +1,139 @@ +{ + "model": { + "xml": "yolov5n.xml", + "bin": "yolov5n.bin" + }, + "nn_config": { + "output_format": "detection", + "NN_family": "YOLO", + "input_size": "416x416", + "NN_specific_metadata": { + "classes": 80, + "coordinates": 4, + "anchors": [ + 10.0, + 13.0, + 16.0, + 30.0, + 33.0, + 23.0, + 30.0, + 61.0, + 62.0, + 45.0, + 59.0, + 119.0, + 116.0, + 90.0, + 156.0, + 198.0, + 373.0, + 326.0 + ], + "anchor_masks": { + "side52": [ + 0, + 1, + 2 + ], + "side26": [ + 3, + 4, + 5 + ], + "side13": [ + 6, + 7, + 8 + ] + }, + "iou_threshold": 0.5, + "confidence_threshold": 0.5 + } + }, + "mappings": { + "labels": [ + "person", + "bicycle", + "car", + "motorcycle", + "airplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "couch", + "potted plant", + "bed", + "dining table", + "toilet", + "tv", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush" + ] + }, + "version": 1 +} \ No newline at end of file diff --git a/json/yolov6n.json b/json/yolov6n.json new file mode 100644 index 0000000..425e545 --- /dev/null +++ b/json/yolov6n.json @@ -0,0 +1,104 @@ +{ + "model": { + "xml": "yolov6n.xml", + "bin": "yolov6n.bin" + }, + "nn_config": { + "output_format": "detection", + "NN_family": "YOLO", + "input_size": "416x416", + "NN_specific_metadata": { + "classes": 80, + "coordinates": 4, + "anchors": [], + "anchor_masks": {}, + "iou_threshold": 0.5, + "confidence_threshold": 0.5 + } + }, + "mappings": { + "labels": [ + "person", + "bicycle", + "car", + "motorbike", + "aeroplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "sofa", + "pottedplant", + "bed", + "diningtable", + "toilet", + "tvmonitor", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush" + ] + }, + "version": 1 +} \ No newline at end of file diff --git a/json/yolov6t.json b/json/yolov6t.json new file mode 100644 index 0000000..32765cb --- /dev/null +++ b/json/yolov6t.json @@ -0,0 +1,104 @@ +{ + "model": { + "xml": "yolov6t.xml", + "bin": "yolov6t.bin" + }, + "nn_config": { + "output_format": "detection", + "NN_family": "YOLO", + "input_size": "416x416", + "NN_specific_metadata": { + "classes": 80, + "coordinates": 4, + "anchors": [], + "anchor_masks": {}, + "iou_threshold": 0.5, + "confidence_threshold": 0.5 + } + }, + "mappings": { + "labels": [ + "person", + "bicycle", + "car", + "motorbike", + "aeroplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "sofa", + "pottedplant", + "bed", + "diningtable", + "toilet", + "tvmonitor", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush" + ] + }, + "version": 1 +} \ No newline at end of file diff --git a/json/yolov7.json b/json/yolov7.json new file mode 100644 index 0000000..1395248 --- /dev/null +++ b/json/yolov7.json @@ -0,0 +1,139 @@ +{ + "model": { + "xml": "yolov7.xml", + "bin": "yolov7.bin" + }, + "nn_config": { + "output_format": "detection", + "NN_family": "YOLO", + "input_size": "416x416", + "NN_specific_metadata": { + "classes": 80, + "coordinates": 4, + "anchors": [ + 12.0, + 16.0, + 19.0, + 36.0, + 40.0, + 28.0, + 36.0, + 75.0, + 76.0, + 55.0, + 72.0, + 146.0, + 142.0, + 110.0, + 192.0, + 243.0, + 459.0, + 401.0 + ], + "anchor_masks": { + "side52": [ + 0, + 1, + 2 + ], + "side26": [ + 3, + 4, + 5 + ], + "side13": [ + 6, + 7, + 8 + ] + }, + "iou_threshold": 0.5, + "confidence_threshold": 0.5 + } + }, + "mappings": { + "labels": [ + "person", + "bicycle", + "car", + "motorcycle", + "airplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "couch", + "potted plant", + "bed", + "dining table", + "toilet", + "tv", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush" + ] + }, + "version": 1 +} \ No newline at end of file diff --git a/json/yolov7tiny.json b/json/yolov7tiny.json new file mode 100644 index 0000000..d656681 --- /dev/null +++ b/json/yolov7tiny.json @@ -0,0 +1,139 @@ +{ + "model": { + "xml": "yolov7tiny.xml", + "bin": "yolov7tiny.bin" + }, + "nn_config": { + "output_format": "detection", + "NN_family": "YOLO", + "input_size": "416x416", + "NN_specific_metadata": { + "classes": 80, + "coordinates": 4, + "anchors": [ + 12.0, + 16.0, + 19.0, + 36.0, + 40.0, + 28.0, + 36.0, + 75.0, + 76.0, + 55.0, + 72.0, + 146.0, + 142.0, + 110.0, + 192.0, + 243.0, + 459.0, + 401.0 + ], + "anchor_masks": { + "side52": [ + 0, + 1, + 2 + ], + "side26": [ + 3, + 4, + 5 + ], + "side13": [ + 6, + 7, + 8 + ] + }, + "iou_threshold": 0.5, + "confidence_threshold": 0.5 + } + }, + "mappings": { + "labels": [ + "person", + "bicycle", + "car", + "motorcycle", + "airplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "couch", + "potted plant", + "bed", + "dining table", + "toilet", + "tv", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush" + ] + }, + "version": 1 +} \ No newline at end of file diff --git a/main_api.py b/main_api.py new file mode 100644 index 0000000..0f5be51 --- /dev/null +++ b/main_api.py @@ -0,0 +1,377 @@ +#!/usr/bin/env python3 +""" +The code is edited from docs (https://docs.luxonis.com/projects/api/en/latest/samples/Yolo/tiny_yolo/) +We add parsing from JSON files that contain configuration +""" + +from pathlib import Path +import sys +import cv2 +import depthai as dai +import numpy as np +import time +import argparse +import json +import blobconverter + +# parse arguments +parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) +parser.add_argument( + "-m", + "--model", + help="Provide model name or model path for inference", + default="yolov4_tiny_coco_416x416", + type=str, +) +parser.add_argument( + "-c", + "--config", + help="Provide config path for inference", + default="json/yolov4-tiny.json", + type=str, +) +parser.add_argument( + "-s", + "--spatial", + help="Display spatial information", + action="store_true", + default=False, +) +parser.add_argument( + "-F", + "--fullFov", + help="If to :code:`False`, " + "it will first center crop the frame to meet the NN aspect ratio and then scale down the image", + default=True, + type=bool, +) +parser.add_argument( + "--syncNN", + help="Show synced frame", + action="store_true", + default=False, +) +args = parser.parse_args() +print("args: {}".format(args)) + +if args.spatial: + lr = True # Better handling for occlusions + extended = False # Closer-in minimum depth, disparity range is doubled + subpixel = ( + False # Better accuracy for longer distance, fractional disparity 32-levels + ) + +# parse config +configPath = Path(args.config) +if not configPath.exists(): + raise ValueError("Path {} does not exist!".format(configPath)) + +with configPath.open() as f: + config = json.load(f) +nnConfig = config.get("nn_config", {}) + +# parse input shape +if "input_size" in nnConfig: + W, H = tuple(map(int, nnConfig.get("input_size").split("x"))) + +# extract metadata +metadata = nnConfig.get("NN_specific_metadata", {}) +classes = metadata.get("classes", {}) +coordinates = metadata.get("coordinates", {}) +anchors = metadata.get("anchors", {}) +anchorMasks = metadata.get("anchor_masks", {}) +iouThreshold = metadata.get("iou_threshold", {}) +confidenceThreshold = metadata.get("confidence_threshold", {}) + +print("config: {}".format(metadata)) + +# parse labels +nnMappings = config.get("mappings", {}) +labels = nnMappings.get("labels", {}) + +# get model path +nnPath = args.model +if not Path(nnPath).exists(): + print("No blob found at {}. Looking into DepthAI model zoo.".format(nnPath)) + nnPath = str( + blobconverter.from_zoo( + args.model, + shaves=6, + zoo_type="depthai", + use_cache=True, + output_dir="models", + ) + ) + + +def drawText( + frame, + text, + org, + color=(255, 255, 255), + bg_color=(128, 128, 128), + fontScale=0.5, + thickness=1, +): + cv2.putText( + frame, + text, + org, + cv2.FONT_HERSHEY_SIMPLEX, + fontScale, + bg_color, + thickness + 3, + cv2.LINE_AA, + ) + cv2.putText( + frame, + text, + org, + cv2.FONT_HERSHEY_SIMPLEX, + fontScale, + color, + thickness, + cv2.LINE_AA, + ) + + +def drawRect( + frame, p1, p2, color=(255, 255, 255), bg_color=(128, 128, 128), thickness=1 +): + cv2.rectangle(frame, pt1=p1, pt2=p2, color=bg_color, thickness=thickness + 3) + cv2.rectangle(frame, pt1=p1, pt2=p2, color=color, thickness=thickness) + + +def getDeviceInfo(deviceId=None, debug=False) -> dai.DeviceInfo: + """ + Find a correct :obj:`depthai.DeviceInfo` object, either matching provided :code:`deviceId` or selected by the user (if multiple devices available) + Useful for almost every app where there is a possibility of multiple devices being connected simultaneously + + Args: + deviceId (str, optional): Specifies device MX ID, for which the device info will be collected + + Returns: + depthai.DeviceInfo: Object representing selected device info + + Raises: + RuntimeError: if no DepthAI device was found or, if :code:`deviceId` was specified, no device with matching MX ID was found + ValueError: if value supplied by the user when choosing the DepthAI device was incorrect + """ + deviceInfos = [] + if debug: + deviceInfos = dai.XLinkConnection.getAllConnectedDevices() + else: + deviceInfos = dai.Device.getAllAvailableDevices() + + if len(deviceInfos) == 0: + raise RuntimeError("No DepthAI device found!") + else: + print("Available devices:") + for i, deviceInfo in enumerate(deviceInfos): + print( + f"[{i}] {deviceInfo.name} {deviceInfo.getMxId()} [{deviceInfo.state.name}]" + ) + + if deviceId == "list": + raise SystemExit(0) + elif deviceId is not None: + matchingDevice = next( + filter(lambda info: info.getMxId() == deviceId, deviceInfos), None + ) + if matchingDevice is None: + raise RuntimeError( + f"No DepthAI device found with id matching {deviceId} !" + ) + return matchingDevice + elif len(deviceInfos) == 1: + return deviceInfos[0] + else: + val = input("Which DepthAI Device you want to use: ") + try: + return deviceInfos[int(val)] + except: + raise ValueError("Incorrect value supplied: {}".format(val)) + + +def create_pipeline(): + # Create pipeline + pipeline = dai.Pipeline() + + # Define sources and outputs + camRgb = pipeline.create(dai.node.ColorCamera) + if args.spatial: + monoLeft = pipeline.createMonoCamera() + monoLeft.setBoardSocket(dai.CameraBoardSocket.LEFT) + monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P) + monoLeft.setFps(60) + + monoRight = pipeline.createMonoCamera() + monoRight.setBoardSocket(dai.CameraBoardSocket.RIGHT) + monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P) + monoRight.setFps(60) + + stereo = pipeline.createStereoDepth() + stereo.initialConfig.setConfidenceThreshold(245) + stereo.setLeftRightCheck(lr) + stereo.setExtendedDisparity(extended) + stereo.setSubpixel(subpixel) + stereo.setDepthAlign(dai.CameraBoardSocket.RGB) + if args.syncNN: + stereo.setOutputSize(*camRgb.getPreviewSize()) + else: + stereo.setOutputSize(*camRgb.getVideoSize()) + monoLeft.out.link(stereo.left) + monoRight.out.link(stereo.right) + + detectionNetwork = pipeline.create(dai.node.YoloSpatialDetectionNetwork) + + stereo.depth.link(detectionNetwork.inputDepth) + detectionNetwork.setDepthLowerThreshold(100) + detectionNetwork.setDepthUpperThreshold(10000) + detectionNetwork.setBoundingBoxScaleFactor(0.3) + else: + detectionNetwork = pipeline.create(dai.node.YoloDetectionNetwork) + xoutRgb = pipeline.create(dai.node.XLinkOut) + nnOut = pipeline.create(dai.node.XLinkOut) + + xoutRgb.setStreamName("rgb") + nnOut.setStreamName("nn") + + # Properties + camRgb.setPreviewSize(W, H) + + camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P) + camRgb.setInterleaved(False) + camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.BGR) + camRgb.setFps(60) + camRgb.setPreviewKeepAspectRatio(not args.fullFov) + + # Network specific settings + detectionNetwork.setConfidenceThreshold(confidenceThreshold) + detectionNetwork.setNumClasses(classes) + detectionNetwork.setCoordinateSize(coordinates) + detectionNetwork.setAnchors(anchors) + detectionNetwork.setAnchorMasks(anchorMasks) + detectionNetwork.setIouThreshold(iouThreshold) + detectionNetwork.setBlobPath(nnPath) + # detectionNetwork.setNumInferenceThreads(2) + detectionNetwork.input.setBlocking(False) + detectionNetwork.input.setQueueSize(1) + + # Linking + camRgb.preview.link(detectionNetwork.input) + if args.syncNN: + detectionNetwork.passthrough.link(xoutRgb.input) + else: + camRgb.video.link(xoutRgb.input) + detectionNetwork.out.link(nnOut.input) + + return pipeline + + +def main(): + # Connect to device and start pipeline + with dai.Device(create_pipeline(), getDeviceInfo()) as device: + # Output queues will be used to get the rgb frames and nn data from the outputs defined above + qRgb = device.getOutputQueue(name="rgb", maxSize=4, blocking=False) + qDet = device.getOutputQueue(name="nn", maxSize=4, blocking=False) + + frame = None + detections = [] + startTime = time.monotonic() + counter = 0 + bboxColors = ( + np.random.random(size=(256, 3)) * 256 + ) # Random Colors for bounding boxes + + # nn data, being the bounding box locations, are in <0..1> range - they need to be normalized with frame width/height + def frameNorm(frame, bbox, NN_SIZE=None): + if NN_SIZE is not None: + # Check difference in aspect ratio and apply correction to BBs + ar_diff = NN_SIZE[0] / NN_SIZE[1] - frame.shape[0] / frame.shape[1] + sel = 0 if 0 < ar_diff else 1 + bbox[sel::2] *= 1 - abs(ar_diff) + bbox[sel::2] += abs(ar_diff) / 2 + # Normalize bounding boxes + normVals = np.full(len(bbox), frame.shape[0]) + normVals[::2] = frame.shape[1] + return (np.clip(bbox, 0, 1) * normVals).astype(int) + + def displayFrame(name, frame, detections): + for detection in detections: + bbox = frameNorm( + frame, + (detection.xmin, detection.ymin, detection.xmax, detection.ymax), + None if args.fullFov else [W, H] + ) + drawText( + frame, + labels[detection.label], + (bbox[0] + 10, bbox[1] + 20), + bboxColors[detection.label], + ) + drawText( + frame, + f"{int(detection.confidence * 100)}%", + (bbox[0] + 10, bbox[1] + 40), + bboxColors[detection.label], + ) + drawRect( + frame, + (bbox[0], bbox[1]), + (bbox[2], bbox[3]), + bboxColors[detection.label], + ) + if hasattr( + detection, "spatialCoordinates" + ): # Display spatial coordinates as well + xMeters = detection.spatialCoordinates.x / 1000 + yMeters = detection.spatialCoordinates.y / 1000 + zMeters = detection.spatialCoordinates.z / 1000 + drawText( + frame, + "X: {:.2f} m".format(xMeters), + (bbox[0] + 10, bbox[1] + 60), + ) + drawText( + frame, + "Y: {:.2f} m".format(yMeters), + (bbox[0] + 10, bbox[1] + 75), + ) + drawText( + frame, + "Z: {:.2f} m".format(zMeters), + (bbox[0] + 10, bbox[1] + 90), + ) + # Show the frame + cv2.imshow( + name, + frame if args.syncNN else cv2.resize(frame, (0, 0), fx=0.5, fy=0.5), + ) + + while True: + inRgb = qRgb.get() + inDet = qDet.get() + + if inRgb is not None: + frame = inRgb.getCvFrame() + drawText( + frame, + "NN fps: {:.2f}".format(counter / (time.monotonic() - startTime)), + (2, frame.shape[0] - 4), + fontScale=1, + ) + + if inDet is not None: + detections = inDet.detections + counter += 1 + + if frame is not None: + displayFrame("rgb", frame, detections) + + if cv2.waitKey(1) == ord("q"): + break + + +if __name__ == "__main__": + main() diff --git a/main_sdk.py b/main_sdk.py new file mode 100644 index 0000000..373b6ff --- /dev/null +++ b/main_sdk.py @@ -0,0 +1,147 @@ +from depthai_sdk import Previews, FPSHandler, getDeviceInfo +from depthai_sdk.managers import ( + PipelineManager, + PreviewManager, + BlobManager, + NNetManager, +) +import depthai as dai +import cv2 +import argparse +from pathlib import Path + +# parse arguments +parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) +parser.add_argument( + "-m", + "--model", + help="Provide model path for inference", + default="yolov4_tiny_coco_416x416", + type=str, +) +parser.add_argument( + "-c", + "--config", + help="Provide config path for inference", + default="json/yolov4-tiny.json", + type=str, +) +parser.add_argument( + "-s", + "--spatial", + help="Display spatial information", + action="store_true", + default=False, +) +parser.add_argument( + "-F", + "--fullFov", + help="If to :code:`False`, " + "it will first center crop the frame to meet the NN aspect ratio and then scale down the image", + default=True, + type=bool, +) +args = parser.parse_args() +CONFIG_PATH = args.config + +# create blob, NN, and preview managers +if Path(args.model).exists(): + # initialize blob manager with path to the blob + bm = BlobManager(blobPath=args.model) +else: + # initialize blob manager with the name of the model otherwise + bm = BlobManager(zooName=args.model) + +nm = NNetManager(nnFamily="YOLO", inputSize=4) +nm.readConfig(CONFIG_PATH) # this will also parse the correct input size + +pm = PipelineManager() +pm.createColorCam( + previewSize=nm.inputSize, + res=dai.ColorCameraProperties.SensorResolution.THE_1080_P, + fps=60, + fullFov=args.fullFov, + orientation=None, + colorOrder=dai.ColorCameraProperties.ColorOrder.BGR, + xout=True, + xoutVideo=False, + xoutStill=False, + control=False, +) +if args.spatial: + pm.createLeftCam( + res=dai.MonoCameraProperties.SensorResolution.THE_400_P, + fps=60, + orientation=None, + xout=False, + control=False, + ) + pm.createRightCam( + res=dai.MonoCameraProperties.SensorResolution.THE_400_P, + fps=60, + orientation=None, + xout=False, + control=False, + ) + pm.createDepth( + dct=245, + median=None, + sigma=0, + lr=True, + lrcThreshold=5, + extended=False, + subpixel=False, + useDisparity=False, + useDepth=False, + useRectifiedLeft=False, + useRectifiedRight=False, + runtimeSwitch=False, + alignment=dai.CameraBoardSocket.RGB, + control=False, + ) +# create preview manager +fpsHandler = FPSHandler() +pv = PreviewManager(display=[Previews.color.name], + fpsHandler=fpsHandler) + +# create NN with managers +nn = nm.createNN( + pipeline=pm.pipeline, + nodes=pm.nodes, + blobPath=bm.getBlob( + shaves=6, openvinoVersion=pm.pipeline.getOpenVINOVersion(), zooType="depthai" + ), + source=Previews.color.name, + useDepth=args.spatial, + minDepth=100, + maxDepth=10000, + sbbScaleFactor=0.3, + fullFov=args.fullFov, + useImageManip=False, +) +pm.addNn(nn) + +# initialize pipeline +with dai.Device(pm.pipeline, getDeviceInfo()) as device: + # create outputs + pv.createQueues(device) + nm.createQueues(device) + + nnData = [] + + while True: + + # parse outputs + pv.prepareFrames() + inNn = nm.outputQueue.tryGet() + + if inNn is not None: + nnData = nm.decode(inNn) + # count FPS + fpsHandler.tick("color") + + nm.draw(pv, nnData) + pv.showFrames() + + if cv2.waitKey(1) == ord("q"): + break diff --git a/models/.gitattributes b/models/.gitattributes new file mode 100644 index 0000000..e3a5ed8 --- /dev/null +++ b/models/.gitattributes @@ -0,0 +1 @@ +*.blob filter=lfs diff=lfs merge=lfs -text diff --git a/models/yolo-v3-tf_openvino_2021.4_6shave.blob b/models/yolo-v3-tf_openvino_2021.4_6shave.blob new file mode 100644 index 0000000..6ee6256 --- /dev/null +++ b/models/yolo-v3-tf_openvino_2021.4_6shave.blob @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f4f2c551286dc5308dabc09b2eb5e52b25227967aafc5743a61f738030701ff +size 124266688 diff --git a/models/yolo-v3-tiny-tf_openvino_2021.4_6shave.blob b/models/yolo-v3-tiny-tf_openvino_2021.4_6shave.blob new file mode 100644 index 0000000..e7447e0 --- /dev/null +++ b/models/yolo-v3-tiny-tf_openvino_2021.4_6shave.blob @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2729a42e277e994ba68f0e914b66fe52ce665175a80a5596b8d50d18b220343 +size 17750208 diff --git a/models/yolo-v4-tf_openvino_2021.4_6shave.blob b/models/yolo-v4-tf_openvino_2021.4_6shave.blob new file mode 100644 index 0000000..cbfb206 --- /dev/null +++ b/models/yolo-v4-tf_openvino_2021.4_6shave.blob @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29f39fdd55ee093293977482e5ff5d2b641ed0ea21a7835db02ee75a690753df +size 129907840 diff --git a/models/yolo-v4-tiny-tf_openvino_2021.4_6shave.blob b/models/yolo-v4-tiny-tf_openvino_2021.4_6shave.blob new file mode 100644 index 0000000..f068fc8 --- /dev/null +++ b/models/yolo-v4-tiny-tf_openvino_2021.4_6shave.blob @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0229c068ff220631affd323b84eece865bfe28d5fe8950ab602825a8444b86f4 +size 12168064 diff --git a/models/yolov4_tiny_coco_416x416_openvino_2021.4_6shave.blob b/models/yolov4_tiny_coco_416x416_openvino_2021.4_6shave.blob new file mode 100644 index 0000000..86e4a15 --- /dev/null +++ b/models/yolov4_tiny_coco_416x416_openvino_2021.4_6shave.blob @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c6c2ca020bb7b17d199058c098d770224d19ebf703d7559234cdc2177cac242 +size 12169344 diff --git a/models/yolov5n_openvino_2021.4_6shave.blob b/models/yolov5n_openvino_2021.4_6shave.blob new file mode 100644 index 0000000..5f62f8b --- /dev/null +++ b/models/yolov5n_openvino_2021.4_6shave.blob @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9772ab406cdf0bc33248677027f213847117a95c951817c458187512f1fe377e +size 3786368 diff --git a/models/yolov6n_openvino_2021.4_6shave.blob b/models/yolov6n_openvino_2021.4_6shave.blob new file mode 100644 index 0000000..7d74879 --- /dev/null +++ b/models/yolov6n_openvino_2021.4_6shave.blob @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f532fc66a4504c7ccefe35aefac70de4a5ce467296219e02ace10ab493a7688 +size 8690816 diff --git a/models/yolov6t_openvino_2021.4_6shave.blob b/models/yolov6t_openvino_2021.4_6shave.blob new file mode 100644 index 0000000..e30f45c --- /dev/null +++ b/models/yolov6t_openvino_2021.4_6shave.blob @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac1428170efc90211c679573bb32077990d745979a9ac852498f0ded6fee0439 +size 30049728 diff --git a/models/yolov7_openvino_2021.4_6shave.blob b/models/yolov7_openvino_2021.4_6shave.blob new file mode 100644 index 0000000..96df574 --- /dev/null +++ b/models/yolov7_openvino_2021.4_6shave.blob @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e51e52f58304690c3788caaf3665f60c1781cd4de135ab3489879741435b23f +size 74211072 diff --git a/models/yolov7tiny_openvino_2021.4_6shave.blob b/models/yolov7tiny_openvino_2021.4_6shave.blob new file mode 100644 index 0000000..e32a4c9 --- /dev/null +++ b/models/yolov7tiny_openvino_2021.4_6shave.blob @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22a80c16788ac133aa300e996b2c937cfebea92342b20521263bfc2699cc085f +size 12511936 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..f7b4723 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,10 @@ +--extra-index-url https://www.piwheels.org/simple +opencv-python==4.4.0.46 ; platform_machine == "armv6l" or platform_machine == "armv7l" +opencv-python==4.5.1.48 ; platform_machine != "armv6l" and platform_machine != "armv7l" and python_version >= "3.7" and python_version < "3.10" +opencv-python==4.5.4.58 ; platform_machine != "armv6l" and platform_machine != "armv7l" and python_version == "3.10" +opencv-contrib-python==4.4.0.46 ; platform_machine == "armv6l" or platform_machine == "armv7l" +opencv-contrib-python==4.5.1.48 ; platform_machine != "armv6l" and platform_machine != "armv7l" and python_version >= "3.7" and python_version < "3.10" +opencv-contrib-python==4.5.4.58 ; platform_machine != "armv6l" and platform_machine != "armv7l" and python_version == "3.10" +depthai>=2.16.0.0 +depthai-sdk>=1.2.0 +numpy>=1.18.5