refactor: optimize preprocess (#227)

* build(deps-dev): bump vitepress from 1.0.0-rc.40 to 1.0.0-rc.44 Bumps [vitepress](https://github.com/vuejs/vitepress) from 1.0.0-rc.40 to 1.0.0-rc.44. - [Release notes](https://github.com/vuejs/vitepress/releases) - [Changelog](https://github.com/vuejs/vitepress/blob/main/CHANGELOG.md) - [Commits](vuejs/vitepress@v1.0.0-rc.40...v1.0.0-rc.44) --- updated-dependencies: - dependency-name: vitepress dependency-type: direct:development update-type: version-update:semver-patch ... * feat: add support for sensor data classification * build(deps-dev): bump vitepress from 1.0.0-rc.44 to 1.0.1 Bumps [vitepress](https://github.com/vuejs/vitepress) from 1.0.0-rc.44 to 1.0.1. - [Release notes](https://github.com/vuejs/vitepress/releases) - [Changelog](https://github.com/vuejs/vitepress/blob/main/CHANGELOG.md) - [Commits](vuejs/vitepress@v1.0.0-rc.44...v1.0.1) --- updated-dependencies: - dependency-name: vitepress dependency-type: direct:development update-type: version-update:semver-patch ... * build(deps-dev): bump vitepress from 1.0.1 to 1.1.4 Bumps [vitepress](https://github.com/vuejs/vitepress) from 1.0.1 to 1.1.4. - [Release notes](https://github.com/vuejs/vitepress/releases) - [Changelog](https://github.com/vuejs/vitepress/blob/main/CHANGELOG.md) - [Commits](vuejs/vitepress@v1.0.1...v1.1.4) --- updated-dependencies: - dependency-name: vitepress dependency-type: direct:development update-type: version-update:semver-minor ... * refactor: async copy between device and cpu * refactor: async copy in data preprocess * refactor: cached mosaic * refactor: replace transform modules from mmyolo * chore: register modified modules * chore: update configs * fix: epochs --------- Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: LynnL4 <[email protected]> Co-authored-by: nullptr <nullptr@localhost> Co-authored-by: Hongtai Liu <[email protected]>
Seeed-Studio · Jun 13, 2024 · 9fdfde3 · 9fdfde3
1 parent f67a9a8
commit 9fdfde3
Show file tree

Hide file tree

Showing 24 changed files with 2,587 additions and 561 deletions.
diff --git a/configs/accelerometer/3axes_accelerometer_62.5Hz_1s_classify.py b/configs/accelerometer/3axes_accelerometer_62.5Hz_1s_classify.py
@@ -1,4 +1,5 @@
 # Copyright (c) Seeed Technology Co.,Ltd. All rights reserved.
+
 _base_ = './base.py'
 
 default_scope = 'sscma'

diff --git a/configs/accelerometer/base.py b/configs/accelerometer/base.py
@@ -1,4 +1,5 @@
 # Copyright (c) Seeed Technology Co.,Ltd. All rights reserved.
+
 _base_ = '../_base_/default_runtime_cls.py'
 
 # defaults input type image

diff --git a/configs/swift_yolo/base_arch.py b/configs/swift_yolo/base_arch.py
@@ -40,7 +40,11 @@
 # Number of input data per iteration in the model training phase
 batch = 16
 # Number of threads used to load data during training, this value should be adjusted accordingly to the training batch
-workers = 4
+workers = 16
+# Whether to use cached data when performing data augmentation
+use_cached = True
+# The maximum number of cached images
+max_cached_images = 4096
 # Optimizer weight decay value
 weight_decay = 0.0005
 # SGD momentum/Adam beta1
@@ -49,6 +53,8 @@
 lr_factor = 0.01
 # persistent_workers must be False if num_workers is 0
 persistent_workers = True
+# Disable mosaic augmentation for final 10 epochs (stage 2)
+close_mosaic_epochs = 15
 
 # VAL
 # Batch size of a single GPU during validation
@@ -116,7 +122,7 @@
 model = dict(
     type='sscma.YOLODetector',
     data_preprocessor=dict(
-        type='mmdet.DetDataPreprocessor', mean=[0.0, 0.0, 0.0], std=[255.0, 255.0, 255.0], bgr_to_rgb=True
+        type='sscma.DetDataPreprocessor', mean=[0.0, 0.0, 0.0], std=[255.0, 255.0, 255.0], bgr_to_rgb=True
     ),
     backbone=dict(
         type='YOLOv5CSPDarknet',
@@ -183,32 +189,58 @@
 
 pre_transform = [
     dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')),
-    dict(type='LoadAnnotations', with_bbox=True),
+    dict(type='sscma.YOLOLoadAnnotations', with_bbox=True),
+]
+
+last_transform = [
+    dict(
+        type='mmdet.Albu',
+        transforms=albu_train_transforms,
+        bbox_params=dict(type='BboxParams', format='pascal_voc', label_fields=['gt_bboxes_labels', 'gt_ignore_flags']),
+        keymap={'img': 'image', 'gt_bboxes': 'bboxes'},
+    ),
+    dict(type='sscma.YOLOv5HSVRandomAug'),
+    dict(type='mmdet.RandomFlip', prob=0.5),
+    dict(
+        type='mmdet.PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', 'flip_direction')
+    ),
 ]
 
 train_pipeline = [
     *pre_transform,
-    dict(type='Mosaic', img_scale=imgsz, pad_val=114.0, pre_transform=pre_transform),
     dict(
-        type='YOLOv5RandomAffine',
+        type='sscma.Mosaic',
+        img_scale=imgsz,
+        pad_val=114.0,
+        use_cached=use_cached,
+        max_cached_images=max_cached_images,
+    ),
+    dict(
+        type='sscma.YOLOv5RandomAffine',
         max_rotate_degree=0.0,
         max_shear_degree=0.0,
         scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
         # imgsz is (width, height)
         border=(-imgsz[0] // 2, -imgsz[1] // 2),
         border_val=(114, 114, 114),
     ),
+    *last_transform,
+]
+
+train_pipeline_stage2 = [
+    *pre_transform,
+    dict(type='sscma.YOLOv5KeepRatioResize', scale=imgsz),
+    dict(type='sscma.LetterResize', scale=imgsz, allow_scale_up=True, pad_val=dict(img=114.0)),
     dict(
-        type='mmdet.Albu',
-        transforms=albu_train_transforms,
-        bbox_params=dict(type='BboxParams', format='pascal_voc', label_fields=['gt_bboxes_labels', 'gt_ignore_flags']),
-        keymap={'img': 'image', 'gt_bboxes': 'bboxes'},
-    ),
-    dict(type='YOLOv5HSVRandomAug'),
-    dict(type='mmdet.RandomFlip', prob=0.5),
-    dict(
-        type='mmdet.PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', 'flip_direction')
+        type='sscma.YOLOv5RandomAffine',
+        max_rotate_degree=0.0,
+        max_shear_degree=0.0,
+        scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
+        # imgsz is (width, height)
+        border=(-imgsz[0] // 2, -imgsz[1] // 2),
+        border_val=(114, 114, 114),
     ),
+    *last_transform,
 ]
 
 train_dataloader = dict(
@@ -227,11 +259,12 @@
     ),
 )
 
+
 test_pipeline = [
-    dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')),
-    dict(type='YOLOv5KeepRatioResize', scale=imgsz),
-    dict(type='LetterResize', scale=imgsz, allow_scale_up=False, pad_val=dict(img=114)),
-    dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'),
+    dict(type='sscma.LoadImageFromFile', file_client_args=dict(backend='disk')),
+    dict(type='sscma.YOLOv5KeepRatioResize', scale=imgsz),
+    dict(type='sscma.LetterResize', scale=imgsz, allow_scale_up=False, pad_val=dict(img=114)),
+    dict(type='sscma.LoadAnnotations', with_bbox=True, _scope_='mmdet'),
     dict(
         type='mmdet.PackDetInputs',
         meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', 'pad_param'),
@@ -277,7 +310,12 @@
 custom_hooks = [
     dict(
         type='EMAHook', ema_type='ExpMomentumEMA', momentum=0.0001, update_buffers=True, strict_load=False, priority=49
-    )
+    ),
+    dict(
+        type='mmdet.PipelineSwitchHook',
+        switch_epoch=epochs - close_mosaic_epochs,
+        switch_pipeline=train_pipeline_stage2,
+    ),
 ]
 
 val_evaluator = dict(type='mmdet.CocoMetric', proposal_nums=(100, 1, 10), ann_file=data_root + val_ann, metric='bbox')

diff --git a/configs/swift_yolo/swift_yolo_large_1xb16_300e_coco.py b/configs/swift_yolo/swift_yolo_large_1xb16_300e_coco.py
@@ -19,7 +19,9 @@
 height = 640
 width = 640
 batch = 16
-workers = 2
+workers = 16
+use_cached = True
+max_cached_images = 4096
 val_batch = batch
 val_workers = workers
 imgsz = (width, height)
@@ -83,32 +85,58 @@
 
 pre_transform = [
     dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')),
-    dict(type='LoadAnnotations', with_bbox=True),
+    dict(type='sscma.YOLOLoadAnnotations', with_bbox=True),
+]
+
+last_transform = [
+    dict(
+        type='mmdet.Albu',
+        transforms=albu_train_transforms,
+        bbox_params=dict(type='BboxParams', format='pascal_voc', label_fields=['gt_bboxes_labels', 'gt_ignore_flags']),
+        keymap={'img': 'image', 'gt_bboxes': 'bboxes'},
+    ),
+    dict(type='sscma.YOLOv5HSVRandomAug'),
+    dict(type='mmdet.RandomFlip', prob=0.5),
+    dict(
+        type='mmdet.PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', 'flip_direction')
+    ),
 ]
 
 train_pipeline = [
     *pre_transform,
-    dict(type='Mosaic', img_scale=imgsz, pad_val=114.0, pre_transform=pre_transform),
     dict(
-        type='YOLOv5RandomAffine',
+        type='sscma.Mosaic',
+        img_scale=imgsz,
+        pad_val=114.0,
+        use_cached=use_cached,
+        max_cached_images=max_cached_images,
+    ),
+    dict(
+        type='sscma.YOLOv5RandomAffine',
         max_rotate_degree=0.0,
         max_shear_degree=0.0,
         scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
         # imgsz is (width, height)
         border=(-imgsz[0] // 2, -imgsz[1] // 2),
         border_val=(114, 114, 114),
     ),
+    *last_transform,
+]
+
+train_pipeline_stage2 = [
+    *pre_transform,
+    dict(type='sscma.YOLOv5KeepRatioResize', scale=imgsz),
+    dict(type='sscma.LetterResize', scale=imgsz, allow_scale_up=True, pad_val=dict(img=114.0)),
     dict(
-        type='mmdet.Albu',
-        transforms=albu_train_transforms,
-        bbox_params=dict(type='BboxParams', format='pascal_voc', label_fields=['gt_bboxes_labels', 'gt_ignore_flags']),
-        keymap={'img': 'image', 'gt_bboxes': 'bboxes'},
-    ),
-    dict(type='YOLOv5HSVRandomAug'),
-    dict(type='mmdet.RandomFlip', prob=0.5),
-    dict(
-        type='mmdet.PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', 'flip_direction')
+        type='sscma.YOLOv5RandomAffine',
+        max_rotate_degree=0.0,
+        max_shear_degree=0.0,
+        scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
+        # imgsz is (width, height)
+        border=(-imgsz[0] // 2, -imgsz[1] // 2),
+        border_val=(114, 114, 114),
     ),
+    *last_transform,
 ]
 
 train_dataloader = dict(

diff --git a/configs/swift_yolo/swift_yolo_mb2_1xb16_300e_coco.py b/configs/swift_yolo/swift_yolo_mb2_1xb16_300e_coco.py
@@ -35,7 +35,9 @@
 # batch
 batch = 8
 # workers
-workers = 2
+workers = 8
+use_cached = True
+max_cached_images = 4096
 # Batch size of a single GPU during validation
 val_batch = 1
 # Worker to pre-fetch data for each single GPU during validation
@@ -181,32 +183,58 @@
 
 pre_transform = [
     dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')),
-    dict(type='LoadAnnotations', with_bbox=True),
+    dict(type='sscma.YOLOLoadAnnotations', with_bbox=True),
+]
+
+last_transform = [
+    dict(
+        type='mmdet.Albu',
+        transforms=albu_train_transforms,
+        bbox_params=dict(type='BboxParams', format='pascal_voc', label_fields=['gt_bboxes_labels', 'gt_ignore_flags']),
+        keymap={'img': 'image', 'gt_bboxes': 'bboxes'},
+    ),
+    dict(type='sscma.YOLOv5HSVRandomAug'),
+    dict(type='mmdet.RandomFlip', prob=0.5),
+    dict(
+        type='mmdet.PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', 'flip_direction')
+    ),
 ]
 
 train_pipeline = [
     *pre_transform,
-    dict(type='Mosaic', img_scale=imgsz, pad_val=114.0, pre_transform=pre_transform),
     dict(
-        type='YOLOv5RandomAffine',
+        type='sscma.Mosaic',
+        img_scale=imgsz,
+        pad_val=114.0,
+        use_cached=use_cached,
+        max_cached_images=max_cached_images,
+    ),
+    dict(
+        type='sscma.YOLOv5RandomAffine',
         max_rotate_degree=0.0,
         max_shear_degree=0.0,
         scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
         # imgsz is (width, height)
         border=(-imgsz[0] // 2, -imgsz[1] // 2),
         border_val=(114, 114, 114),
     ),
+    *last_transform,
+]
+
+train_pipeline_stage2 = [
+    *pre_transform,
+    dict(type='sscma.YOLOv5KeepRatioResize', scale=imgsz),
+    dict(type='sscma.LetterResize', scale=imgsz, allow_scale_up=True, pad_val=dict(img=114.0)),
     dict(
-        type='mmdet.Albu',
-        transforms=albu_train_transforms,
-        bbox_params=dict(type='BboxParams', format='pascal_voc', label_fields=['gt_bboxes_labels', 'gt_ignore_flags']),
-        keymap={'img': 'image', 'gt_bboxes': 'bboxes'},
-    ),
-    dict(type='YOLOv5HSVRandomAug'),
-    dict(type='mmdet.RandomFlip', prob=0.5),
-    dict(
-        type='mmdet.PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', 'flip_direction')
+        type='sscma.YOLOv5RandomAffine',
+        max_rotate_degree=0.0,
+        max_shear_degree=0.0,
+        scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
+        # imgsz is (width, height)
+        border=(-imgsz[0] // 2, -imgsz[1] // 2),
+        border_val=(114, 114, 114),
     ),
+    *last_transform,
 ]
 
 train_dataloader = dict(

diff --git a/configs/swift_yolo/swift_yolo_medium_1xb16_300e_coco.py b/configs/swift_yolo/swift_yolo_medium_1xb16_300e_coco.py
@@ -19,7 +19,9 @@
 height = 640
 width = 640
 batch = 16
-workers = 2
+workers = 16
+use_cached = True
+max_cached_images = 4096
 val_batch = batch
 val_workers = workers
 imgsz = (width, height)
@@ -83,32 +85,58 @@
 
 pre_transform = [
     dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')),
-    dict(type='LoadAnnotations', with_bbox=True),
+    dict(type='sscma.YOLOLoadAnnotations', with_bbox=True),
+]
+
+last_transform = [
+    dict(
+        type='mmdet.Albu',
+        transforms=albu_train_transforms,
+        bbox_params=dict(type='BboxParams', format='pascal_voc', label_fields=['gt_bboxes_labels', 'gt_ignore_flags']),
+        keymap={'img': 'image', 'gt_bboxes': 'bboxes'},
+    ),
+    dict(type='sscma.YOLOv5HSVRandomAug'),
+    dict(type='mmdet.RandomFlip', prob=0.5),
+    dict(
+        type='mmdet.PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', 'flip_direction')
+    ),
 ]
 
 train_pipeline = [
     *pre_transform,
-    dict(type='Mosaic', img_scale=imgsz, pad_val=114.0, pre_transform=pre_transform),
     dict(
-        type='YOLOv5RandomAffine',
+        type='sscma.Mosaic',
+        img_scale=imgsz,
+        pad_val=114.0,
+        use_cached=use_cached,
+        max_cached_images=max_cached_images,
+    ),
+    dict(
+        type='sscma.YOLOv5RandomAffine',
         max_rotate_degree=0.0,
         max_shear_degree=0.0,
         scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
         # imgsz is (width, height)
         border=(-imgsz[0] // 2, -imgsz[1] // 2),
         border_val=(114, 114, 114),
     ),
+    *last_transform,
+]
+
+train_pipeline_stage2 = [
+    *pre_transform,
+    dict(type='sscma.YOLOv5KeepRatioResize', scale=imgsz),
+    dict(type='sscma.LetterResize', scale=imgsz, allow_scale_up=True, pad_val=dict(img=114.0)),
     dict(
-        type='mmdet.Albu',
-        transforms=albu_train_transforms,
-        bbox_params=dict(type='BboxParams', format='pascal_voc', label_fields=['gt_bboxes_labels', 'gt_ignore_flags']),
-        keymap={'img': 'image', 'gt_bboxes': 'bboxes'},
-    ),
-    dict(type='YOLOv5HSVRandomAug'),
-    dict(type='mmdet.RandomFlip', prob=0.5),
-    dict(
-        type='mmdet.PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', 'flip_direction')
+        type='sscma.YOLOv5RandomAffine',
+        max_rotate_degree=0.0,
+        max_shear_degree=0.0,
+        scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
+        # imgsz is (width, height)
+        border=(-imgsz[0] // 2, -imgsz[1] // 2),
+        border_val=(114, 114, 114),
     ),
+    *last_transform,
 ]
 
 train_dataloader = dict(