From 68676c38f29420af10c74bdcdfdbdea1719afc33 Mon Sep 17 00:00:00 2001 From: MikasaLee <770486267@qq.com> Date: Mon, 5 Aug 2024 09:16:19 +0000 Subject: [PATCH] robust_scanner_0805 --- .../datasets/CASIA_HWDB_official_1x_data.py | 14 + .../config/satrn/satrn_shallow_5e_st_HWCR.py | 11 +- .../datasets/CASIA_HWDB_chineseocr_data.py | 14 + .../datasets/CASIA_HWDB_official_1x_data.py | 14 + .../datasets/CASIA_HWDB_official_2x_data.py | 21 + .../_base_/datasets/bnu_EnsExam_ppocrlabel.py | 14 + .../configs/_base_/datasets/coco_text_v1.py | 8 + .../configs/_base_/datasets/cute80.py | 8 + .../configs/_base_/datasets/icdar2011.py | 8 + .../configs/_base_/datasets/icdar2013.py | 21 + .../configs/_base_/datasets/icdar2015.py | 21 + .../configs/_base_/datasets/iiit5k.py | 14 + .../configs/_base_/datasets/mjsynth.py | 13 + .../configs/_base_/datasets/scut_hccdoc.py | 24 + .../configs/_base_/datasets/svt.py | 14 + .../configs/_base_/datasets/svtp.py | 14 + .../configs/_base_/datasets/synthtext.py | 19 + .../configs/_base_/datasets/synthtext_add.py | 8 + .../configs/_base_/datasets/totaltext.py | 15 + .../configs/_base_/datasets/toy_data.py | 17 + .../configs/_base_/default_runtime.py | 52 ++ .../_base_/schedules/schedule_adadelta_5e.py | 9 + .../_base_/schedules/schedule_adam_base.py | 13 + .../_base_/schedules/schedule_adam_step_5e.py | 9 + .../_base_/schedules/schedule_adamw_cos_6e.py | 21 + .../configs/robust_scanner/README.md | 62 ++ .../_base_robustscanner_resnet31.py | 117 ++++ .../_base_robustscanner_resnet31_HWCR.py | 132 +++++ .../configs/robust_scanner/metafile.yml | 59 ++ ...anner_resnet31_5e_st-sub_mj-sub_sa_real.py | 70 +++ .../robustscanner_resnet31_5e_toy.py | 37 ++ .../robustscanner_resnet31_HWCR.py | 111 ++++ .../show_results/vis_data/config.py | 561 ++++++++++++++++++ mmocr/engine/hooks/visualization_hook.py | 1 + nohup_train_script.sh | 3 +- 35 files changed, 1545 insertions(+), 4 deletions(-) create mode 100644 lrr_ocr/lrr_SATRN/config/_base_/datasets/CASIA_HWDB_official_1x_data.py create mode 100644 lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/CASIA_HWDB_chineseocr_data.py create mode 100644 lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/CASIA_HWDB_official_1x_data.py create mode 100644 lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/CASIA_HWDB_official_2x_data.py create mode 100644 lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/bnu_EnsExam_ppocrlabel.py create mode 100644 lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/coco_text_v1.py create mode 100644 lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/cute80.py create mode 100644 lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/icdar2011.py create mode 100644 lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/icdar2013.py create mode 100644 lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/icdar2015.py create mode 100644 lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/iiit5k.py create mode 100644 lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/mjsynth.py create mode 100644 lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/scut_hccdoc.py create mode 100644 lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/svt.py create mode 100644 lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/svtp.py create mode 100644 lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/synthtext.py create mode 100644 lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/synthtext_add.py create mode 100644 lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/totaltext.py create mode 100755 lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/toy_data.py create mode 100644 lrr_ocr/lrr_robust_scanner/configs/_base_/default_runtime.py create mode 100644 lrr_ocr/lrr_robust_scanner/configs/_base_/schedules/schedule_adadelta_5e.py create mode 100644 lrr_ocr/lrr_robust_scanner/configs/_base_/schedules/schedule_adam_base.py create mode 100644 lrr_ocr/lrr_robust_scanner/configs/_base_/schedules/schedule_adam_step_5e.py create mode 100644 lrr_ocr/lrr_robust_scanner/configs/_base_/schedules/schedule_adamw_cos_6e.py create mode 100644 lrr_ocr/lrr_robust_scanner/configs/robust_scanner/README.md create mode 100644 lrr_ocr/lrr_robust_scanner/configs/robust_scanner/_base_robustscanner_resnet31.py create mode 100644 lrr_ocr/lrr_robust_scanner/configs/robust_scanner/_base_robustscanner_resnet31_HWCR.py create mode 100644 lrr_ocr/lrr_robust_scanner/configs/robust_scanner/metafile.yml create mode 100644 lrr_ocr/lrr_robust_scanner/configs/robust_scanner/robustscanner_resnet31_5e_st-sub_mj-sub_sa_real.py create mode 100644 lrr_ocr/lrr_robust_scanner/configs/robust_scanner/robustscanner_resnet31_5e_toy.py create mode 100644 lrr_ocr/lrr_robust_scanner/configs/robust_scanner/robustscanner_resnet31_HWCR.py create mode 100644 lrr_ocr/lrr_robust_scanner/show_results/vis_data/config.py diff --git a/lrr_ocr/lrr_SATRN/config/_base_/datasets/CASIA_HWDB_official_1x_data.py b/lrr_ocr/lrr_SATRN/config/_base_/datasets/CASIA_HWDB_official_1x_data.py new file mode 100644 index 0000000..3a3f124 --- /dev/null +++ b/lrr_ocr/lrr_SATRN/config/_base_/datasets/CASIA_HWDB_official_1x_data.py @@ -0,0 +1,14 @@ +CASIA_HWDB_official_1x_data_textrecog_data_root = '/lirunrui/datasets/mmocr_CASIA_HWDB_official_1x_dataset' + +CASIA_HWDB_official_1x_data_textrecog_train = dict( + type='OCRDataset', + data_root=CASIA_HWDB_official_1x_data_textrecog_data_root, + ann_file='textrecog_train.json', + pipeline=None) + +CASIA_HWDB_official_1x_data_textrecog_test = dict( + type='OCRDataset', + data_root=CASIA_HWDB_official_1x_data_textrecog_data_root, + ann_file='textrecog_test.json', + test_mode=True, + pipeline=None) diff --git a/lrr_ocr/lrr_SATRN/config/satrn/satrn_shallow_5e_st_HWCR.py b/lrr_ocr/lrr_SATRN/config/satrn/satrn_shallow_5e_st_HWCR.py index c5a8151..b702824 100644 --- a/lrr_ocr/lrr_SATRN/config/satrn/satrn_shallow_5e_st_HWCR.py +++ b/lrr_ocr/lrr_SATRN/config/satrn/satrn_shallow_5e_st_HWCR.py @@ -1,6 +1,7 @@ _base_ = [ '../_base_/datasets/scut_hccdoc.py', '../_base_/datasets/CASIA_HWDB_chineseocr_data.py', + '../_base_/datasets/CASIA_HWDB_official_1x_data.py', '../_base_/datasets/CASIA_HWDB_official_2x_data.py', '../_base_/datasets/bnu_EnsExam_ppocrlabel.py', '../_base_/default_runtime.py', @@ -14,12 +15,16 @@ _base_.CASIA_HWDB_official_2x_data_textrecog_train, _base_.CASIA_HWDB_official_2x_data_textrecog_val, _base_.scut_hccdoc_textrecog_train, + # new add CASIA_HWDB_official_1x + _base_.CASIA_HWDB_official_1x_data_textrecog_train, ] test_list = [ _base_.CASIA_HWDB_official_2x_data_textrecog_test, _base_.scut_hccdoc_textrecog_test, _base_.bnu_EnsExam_ppocrlabel_textrecog_test, + # new add CASIA_HWDB_official_1x + _base_.CASIA_HWDB_official_1x_data_textrecog_test, ] train_dataset = dict( @@ -28,7 +33,7 @@ type='ConcatDataset', datasets=test_list, pipeline=_base_.test_pipeline) default_hooks = dict( - logger=dict(type='LoggerHook', interval=1000), # 原本是100,但是batch_size设小的话那这个就输出的太多了。 + logger=dict(type='LoggerHook', interval=2000), # 原本是100,但是batch_size设小的话那这个就输出的太多了。 checkpoint=dict(type='CheckpointHook', interval=1, by_epoch=True, @@ -39,7 +44,7 @@ ) train_dataloader = dict( - batch_size=32, # 原本是64,小一点。 + batch_size=24, # 原本是64,小一点。 num_workers=32, persistent_workers=True, pin_memory=True, @@ -67,7 +72,7 @@ dict(type='CharMetric'), dict(type='CRandARMetric') ], - dataset_prefixes=['Casia_Hwdb_2x','SCUT_HccDoc','bnu_EnsExam_ppocrlabel']) # 这个顺序要和 test_list 一致,要不然就乱了 + dataset_prefixes=['Casia_Hwdb_2x','SCUT_HccDoc','bnu_EnsExam_ppocrlabel','Casia_Hwdb_1x']) # 这个顺序要和 test_list 一致,要不然就乱了 test_evaluator = val_evaluator diff --git a/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/CASIA_HWDB_chineseocr_data.py b/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/CASIA_HWDB_chineseocr_data.py new file mode 100644 index 0000000..4b17b66 --- /dev/null +++ b/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/CASIA_HWDB_chineseocr_data.py @@ -0,0 +1,14 @@ +CASIA_HWDB_chineseocr_data_textrecog_data_root = '/lirunrui/datasets/mmocr_CASIA_HWDB_chinese_ocr_dataset' + +CASIA_HWDB_chineseocr_data_textrecog_train = dict( + type='OCRDataset', + data_root=CASIA_HWDB_chineseocr_data_textrecog_data_root, + ann_file='textrecog_train.json', + pipeline=None) + +CASIA_HWDB_chineseocr_data_textrecog_test = dict( + type='OCRDataset', + data_root=CASIA_HWDB_chineseocr_data_textrecog_data_root, + ann_file='textrecog_test.json', + test_mode=True, + pipeline=None) diff --git a/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/CASIA_HWDB_official_1x_data.py b/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/CASIA_HWDB_official_1x_data.py new file mode 100644 index 0000000..3a3f124 --- /dev/null +++ b/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/CASIA_HWDB_official_1x_data.py @@ -0,0 +1,14 @@ +CASIA_HWDB_official_1x_data_textrecog_data_root = '/lirunrui/datasets/mmocr_CASIA_HWDB_official_1x_dataset' + +CASIA_HWDB_official_1x_data_textrecog_train = dict( + type='OCRDataset', + data_root=CASIA_HWDB_official_1x_data_textrecog_data_root, + ann_file='textrecog_train.json', + pipeline=None) + +CASIA_HWDB_official_1x_data_textrecog_test = dict( + type='OCRDataset', + data_root=CASIA_HWDB_official_1x_data_textrecog_data_root, + ann_file='textrecog_test.json', + test_mode=True, + pipeline=None) diff --git a/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/CASIA_HWDB_official_2x_data.py b/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/CASIA_HWDB_official_2x_data.py new file mode 100644 index 0000000..578133e --- /dev/null +++ b/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/CASIA_HWDB_official_2x_data.py @@ -0,0 +1,21 @@ +CASIA_HWDB_official_2x_data_textrecog_data_root = '/lirunrui/datasets/mmocr_CASIA_HWDB_official_2x_dataset' + +CASIA_HWDB_official_2x_data_textrecog_train = dict( + type='OCRDataset', + data_root=CASIA_HWDB_official_2x_data_textrecog_data_root, + ann_file='textrecog_train.json', + pipeline=None) + +CASIA_HWDB_official_2x_data_textrecog_val = dict( + type='OCRDataset', + data_root=CASIA_HWDB_official_2x_data_textrecog_data_root, + ann_file='textrecog_val.json', + # test_mode=True, # 这玩意我也当做训练集扔进去 + pipeline=None) + +CASIA_HWDB_official_2x_data_textrecog_test = dict( + type='OCRDataset', + data_root=CASIA_HWDB_official_2x_data_textrecog_data_root, + ann_file='textrecog_test.json', + test_mode=True, + pipeline=None) diff --git a/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/bnu_EnsExam_ppocrlabel.py b/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/bnu_EnsExam_ppocrlabel.py new file mode 100644 index 0000000..b362c1e --- /dev/null +++ b/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/bnu_EnsExam_ppocrlabel.py @@ -0,0 +1,14 @@ +bnu_EnsExam_ppocrlabel_textrecog_data_root = '/lirunrui/datasets/mmocr_bnu_EnsExam_PPOCRLabel' + +bnu_EnsExam_ppocrlabel_textrecog_train = dict( + type='OCRDataset', + data_root=bnu_EnsExam_ppocrlabel_textrecog_data_root, + ann_file='textrecog_train.json', + pipeline=None) + +bnu_EnsExam_ppocrlabel_textrecog_test = dict( + type='OCRDataset', + data_root=bnu_EnsExam_ppocrlabel_textrecog_data_root, + ann_file='textrecog_train.json', + test_mode=True, + pipeline=None) diff --git a/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/coco_text_v1.py b/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/coco_text_v1.py new file mode 100644 index 0000000..b88bcd3 --- /dev/null +++ b/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/coco_text_v1.py @@ -0,0 +1,8 @@ +cocotextv1_textrecog_data_root = 'data/rec/coco_text_v1' + +cocotextv1_textrecog_train = dict( + type='OCRDataset', + data_root=cocotextv1_textrecog_data_root, + ann_file='train_labels.json', + test_mode=False, + pipeline=None) diff --git a/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/cute80.py b/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/cute80.py new file mode 100644 index 0000000..9d96e36 --- /dev/null +++ b/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/cute80.py @@ -0,0 +1,8 @@ +cute80_textrecog_data_root = 'data/cute80' + +cute80_textrecog_test = dict( + type='OCRDataset', + data_root=cute80_textrecog_data_root, + ann_file='textrecog_test.json', + test_mode=True, + pipeline=None) diff --git a/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/icdar2011.py b/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/icdar2011.py new file mode 100644 index 0000000..6071c25 --- /dev/null +++ b/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/icdar2011.py @@ -0,0 +1,8 @@ +icdar2011_textrecog_data_root = 'data/rec/icdar_2011/' + +icdar2011_textrecog_train = dict( + type='OCRDataset', + data_root=icdar2011_textrecog_data_root, + ann_file='train_labels.json', + test_mode=False, + pipeline=None) diff --git a/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/icdar2013.py b/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/icdar2013.py new file mode 100644 index 0000000..e002b32 --- /dev/null +++ b/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/icdar2013.py @@ -0,0 +1,21 @@ +icdar2013_textrecog_data_root = 'data/icdar2013' + +icdar2013_textrecog_train = dict( + type='OCRDataset', + data_root=icdar2013_textrecog_data_root, + ann_file='textrecog_train.json', + pipeline=None) + +icdar2013_textrecog_test = dict( + type='OCRDataset', + data_root=icdar2013_textrecog_data_root, + ann_file='textrecog_test.json', + test_mode=True, + pipeline=None) + +icdar2013_857_textrecog_test = dict( + type='OCRDataset', + data_root=icdar2013_textrecog_data_root, + ann_file='textrecog_test_857.json', + test_mode=True, + pipeline=None) diff --git a/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/icdar2015.py b/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/icdar2015.py new file mode 100644 index 0000000..d1268ce --- /dev/null +++ b/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/icdar2015.py @@ -0,0 +1,21 @@ +icdar2015_textrecog_data_root = 'data/icdar2015' + +icdar2015_textrecog_train = dict( + type='OCRDataset', + data_root=icdar2015_textrecog_data_root, + ann_file='textrecog_train.json', + pipeline=None) + +icdar2015_textrecog_test = dict( + type='OCRDataset', + data_root=icdar2015_textrecog_data_root, + ann_file='textrecog_test.json', + test_mode=True, + pipeline=None) + +icdar2015_1811_textrecog_test = dict( + type='OCRDataset', + data_root=icdar2015_textrecog_data_root, + ann_file='textrecog_test_1811.json', + test_mode=True, + pipeline=None) diff --git a/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/iiit5k.py b/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/iiit5k.py new file mode 100644 index 0000000..f1ef085 --- /dev/null +++ b/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/iiit5k.py @@ -0,0 +1,14 @@ +iiit5k_textrecog_data_root = 'data/iiit5k' + +iiit5k_textrecog_train = dict( + type='OCRDataset', + data_root=iiit5k_textrecog_data_root, + ann_file='textrecog_train.json', + pipeline=None) + +iiit5k_textrecog_test = dict( + type='OCRDataset', + data_root=iiit5k_textrecog_data_root, + ann_file='textrecog_test.json', + test_mode=True, + pipeline=None) diff --git a/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/mjsynth.py b/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/mjsynth.py new file mode 100644 index 0000000..defe84a --- /dev/null +++ b/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/mjsynth.py @@ -0,0 +1,13 @@ +mjsynth_textrecog_data_root = 'data/mjsynth' + +mjsynth_textrecog_train = dict( + type='OCRDataset', + data_root=mjsynth_textrecog_data_root, + ann_file='textrecog_train.json', + pipeline=None) + +mjsynth_sub_textrecog_train = dict( + type='OCRDataset', + data_root=mjsynth_textrecog_data_root, + ann_file='subset_textrecog_train.json', + pipeline=None) diff --git a/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/scut_hccdoc.py b/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/scut_hccdoc.py new file mode 100644 index 0000000..578abe0 --- /dev/null +++ b/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/scut_hccdoc.py @@ -0,0 +1,24 @@ +scut_hccdoc_textrecog_data_root = '/lirunrui/datasets/mmocr_SCUT_HCCDoc_Dataset' +default_hooks = dict( + + visualization=dict( + type='VisualizationHook', + interval=1, + enable=False, + show=False, + draw_gt=False, + draw_pred=False, + font_properties='/usr/share/fonts/fonts_library/simsun.ttc'), +) +scut_hccdoc_textrecog_train = dict( + type='OCRDataset', + data_root=scut_hccdoc_textrecog_data_root, + ann_file='textrecog_train.json', + pipeline=None) + +scut_hccdoc_textrecog_test = dict( + type='OCRDataset', + data_root=scut_hccdoc_textrecog_data_root, + ann_file='textrecog_test.json', + test_mode=True, + pipeline=None) diff --git a/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/svt.py b/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/svt.py new file mode 100644 index 0000000..259b9ed --- /dev/null +++ b/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/svt.py @@ -0,0 +1,14 @@ +svt_textrecog_data_root = 'data/svt' + +svt_textrecog_train = dict( + type='OCRDataset', + data_root=svt_textrecog_data_root, + ann_file='textrecog_train.json', + pipeline=None) + +svt_textrecog_test = dict( + type='OCRDataset', + data_root=svt_textrecog_data_root, + ann_file='textrecog_test.json', + test_mode=True, + pipeline=None) diff --git a/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/svtp.py b/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/svtp.py new file mode 100644 index 0000000..a917bea --- /dev/null +++ b/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/svtp.py @@ -0,0 +1,14 @@ +svtp_textrecog_data_root = 'data/svtp' + +svtp_textrecog_train = dict( + type='OCRDataset', + data_root=svtp_textrecog_data_root, + ann_file='textrecog_train.json', + pipeline=None) + +svtp_textrecog_test = dict( + type='OCRDataset', + data_root=svtp_textrecog_data_root, + ann_file='textrecog_test.json', + test_mode=True, + pipeline=None) diff --git a/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/synthtext.py b/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/synthtext.py new file mode 100644 index 0000000..6d3aa88 --- /dev/null +++ b/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/synthtext.py @@ -0,0 +1,19 @@ +synthtext_textrecog_data_root = '/lirunrui/datasets/synthtext' + +synthtext_textrecog_train = dict( + type='OCRDataset', + data_root=synthtext_textrecog_data_root, + ann_file='textrecog_train.json', + pipeline=None) + +synthtext_sub_textrecog_train = dict( + type='OCRDataset', + data_root=synthtext_textrecog_data_root, + ann_file='subset_textrecog_train.json', + pipeline=None) + +synthtext_an_textrecog_train = dict( + type='OCRDataset', + data_root=synthtext_textrecog_data_root, + ann_file='alphanumeric_textrecog_train.json', + pipeline=None) diff --git a/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/synthtext_add.py b/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/synthtext_add.py new file mode 100644 index 0000000..f31e41f --- /dev/null +++ b/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/synthtext_add.py @@ -0,0 +1,8 @@ +synthtext_add_textrecog_data_root = 'data/rec/synthtext_add/' + +synthtext_add_textrecog_train = dict( + type='OCRDataset', + data_root=synthtext_add_textrecog_data_root, + ann_file='train_labels.json', + test_mode=False, + pipeline=None) diff --git a/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/totaltext.py b/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/totaltext.py new file mode 100644 index 0000000..0774343 --- /dev/null +++ b/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/totaltext.py @@ -0,0 +1,15 @@ +totaltext_textrecog_data_root = 'data/totaltext/' + +totaltext_textrecog_train = dict( + type='OCRDataset', + data_root=totaltext_textrecog_data_root, + ann_file='textrecog_train.json', + test_mode=False, + pipeline=None) + +totaltext_textrecog_test = dict( + type='OCRDataset', + data_root=totaltext_textrecog_data_root, + ann_file='textrecog_test.json', + test_mode=True, + pipeline=None) diff --git a/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/toy_data.py b/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/toy_data.py new file mode 100755 index 0000000..ca73d19 --- /dev/null +++ b/lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/toy_data.py @@ -0,0 +1,17 @@ +toy_data_root = 'tests/data/rec_toy_dataset/' + +toy_rec_train = dict( + type='OCRDataset', + data_root=toy_data_root, + data_prefix=dict(img_path='imgs/'), + ann_file='labels.json', + pipeline=None, + test_mode=False) + +toy_rec_test = dict( + type='OCRDataset', + data_root=toy_data_root, + data_prefix=dict(img_path='imgs/'), + ann_file='labels.json', + pipeline=None, + test_mode=True) diff --git a/lrr_ocr/lrr_robust_scanner/configs/_base_/default_runtime.py b/lrr_ocr/lrr_robust_scanner/configs/_base_/default_runtime.py new file mode 100644 index 0000000..a3c72b9 --- /dev/null +++ b/lrr_ocr/lrr_robust_scanner/configs/_base_/default_runtime.py @@ -0,0 +1,52 @@ +default_scope = 'mmocr' +env_cfg = dict( + cudnn_benchmark=False, + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + dist_cfg=dict(backend='nccl'), +) +randomness = dict(seed=None) + +# default_hooks = dict( +# timer=dict(type='IterTimerHook'), +# logger=dict(type='LoggerHook', interval=100), +# param_scheduler=dict(type='ParamSchedulerHook'), +# checkpoint=dict(type='CheckpointHook', interval=1), +# sampler_seed=dict(type='DistSamplerSeedHook'), +# sync_buffer=dict(type='SyncBuffersHook'), +# visualization=dict( +# type='VisualizationHook', +# interval=1, +# enable=False, +# show=False, +# draw_gt=False, +# draw_pred=False), +# ) + + +# Logging +log_level = 'INFO' +log_processor = dict(type='LogProcessor', window_size=10, by_epoch=True) + +load_from = None +resume = False + +# Evaluation +val_evaluator = dict( + type='MultiDatasetsEvaluator', + metrics=[ + dict( + type='WordMetric', + mode=['exact', 'ignore_case', 'ignore_case_symbol']), + dict(type='CharMetric') + ], + dataset_prefixes=None) +test_evaluator = val_evaluator + +# Visualization +vis_backends = [dict(type='LocalVisBackend')] +visualizer = dict( + type='TextRecogLocalVisualizer', + name='visualizer', + vis_backends=vis_backends) + +tta_model = dict(type='EncoderDecoderRecognizerTTAModel') diff --git a/lrr_ocr/lrr_robust_scanner/configs/_base_/schedules/schedule_adadelta_5e.py b/lrr_ocr/lrr_robust_scanner/configs/_base_/schedules/schedule_adadelta_5e.py new file mode 100644 index 0000000..465072e --- /dev/null +++ b/lrr_ocr/lrr_robust_scanner/configs/_base_/schedules/schedule_adadelta_5e.py @@ -0,0 +1,9 @@ +optim_wrapper = dict( + type='OptimWrapper', optimizer=dict(type='Adadelta', lr=1.0)) +train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=5, val_interval=1) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') +# learning rate +param_scheduler = [ + dict(type='ConstantLR', factor=1.0), +] diff --git a/lrr_ocr/lrr_robust_scanner/configs/_base_/schedules/schedule_adam_base.py b/lrr_ocr/lrr_robust_scanner/configs/_base_/schedules/schedule_adam_base.py new file mode 100644 index 0000000..744f328 --- /dev/null +++ b/lrr_ocr/lrr_robust_scanner/configs/_base_/schedules/schedule_adam_base.py @@ -0,0 +1,13 @@ +# Note: This schedule config serves as a base config for other schedules. +# Users would have to at least fill in "max_epochs" and "val_interval" +# in order to use this config in their experiments. + +# optimizer +optim_wrapper = dict(type='OptimWrapper', optimizer=dict(type='Adam', lr=3e-4)) +train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=None, val_interval=1) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') +# learning policy +param_scheduler = [ + dict(type='ConstantLR', factor=1.0), +] diff --git a/lrr_ocr/lrr_robust_scanner/configs/_base_/schedules/schedule_adam_step_5e.py b/lrr_ocr/lrr_robust_scanner/configs/_base_/schedules/schedule_adam_step_5e.py new file mode 100644 index 0000000..73aad76 --- /dev/null +++ b/lrr_ocr/lrr_robust_scanner/configs/_base_/schedules/schedule_adam_step_5e.py @@ -0,0 +1,9 @@ +# optimizer +optim_wrapper = dict(type='OptimWrapper', optimizer=dict(type='Adam', lr=1e-3)) +train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=5, val_interval=1) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') +# learning policy +param_scheduler = [ + dict(type='MultiStepLR', milestones=[3, 4], end=5), +] diff --git a/lrr_ocr/lrr_robust_scanner/configs/_base_/schedules/schedule_adamw_cos_6e.py b/lrr_ocr/lrr_robust_scanner/configs/_base_/schedules/schedule_adamw_cos_6e.py new file mode 100644 index 0000000..cd9d293 --- /dev/null +++ b/lrr_ocr/lrr_robust_scanner/configs/_base_/schedules/schedule_adamw_cos_6e.py @@ -0,0 +1,21 @@ +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict( + type='AdamW', + lr=4e-4, + betas=(0.9, 0.999), + eps=1e-08, + weight_decay=0.05)) +train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=6, val_interval=1) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') + +# learning policy +param_scheduler = [ + dict( + type='CosineAnnealingLR', + T_max=6, + eta_min=4e-6, + convert_to_iter_based=True) +] diff --git a/lrr_ocr/lrr_robust_scanner/configs/robust_scanner/README.md b/lrr_ocr/lrr_robust_scanner/configs/robust_scanner/README.md new file mode 100644 index 0000000..bc7403e --- /dev/null +++ b/lrr_ocr/lrr_robust_scanner/configs/robust_scanner/README.md @@ -0,0 +1,62 @@ +# RobustScanner + +> [RobustScanner: Dynamically Enhancing Positional Clues for Robust Text Recognition](https://arxiv.org/abs/2007.07542) + + + +## Abstract + +The attention-based encoder-decoder framework has recently achieved impressive results for scene text recognition, and many variants have emerged with improvements in recognition quality. However, it performs poorly on contextless texts (e.g., random character sequences) which is unacceptable in most of real application scenarios. In this paper, we first deeply investigate the decoding process of the decoder. We empirically find that a representative character-level sequence decoder utilizes not only context information but also positional information. Contextual information, which the existing approaches heavily rely on, causes the problem of attention drift. To suppress such side-effect, we propose a novel position enhancement branch, and dynamically fuse its outputs with those of the decoder attention module for scene text recognition. Specifically, it contains a position aware module to enable the encoder to output feature vectors encoding their own spatial positions, and an attention module to estimate glimpses using the positional clue (i.e., the current decoding time step) only. The dynamic fusion is conducted for more robust feature via an element-wise gate mechanism. Theoretically, our proposed method, dubbed \\emph{RobustScanner}, decodes individual characters with dynamic ratio between context and positional clues, and utilizes more positional ones when the decoding sequences with scarce context, and thus is robust and practical. Empirically, it has achieved new state-of-the-art results on popular regular and irregular text recognition benchmarks while without much performance drop on contextless benchmarks, validating its robustness in both contextual and contextless application scenarios. + +
+ +
+ +## Dataset + +### Train Dataset + +| trainset | instance_num | repeat_num | source | +| :--------: | :----------: | :--------: | :------------------------: | +| icdar_2011 | 3567 | 20 | real | +| icdar_2013 | 848 | 20 | real | +| icdar2015 | 4468 | 20 | real | +| coco_text | 42142 | 20 | real | +| IIIT5K | 2000 | 20 | real | +| SynthText | 2400000 | 1 | synth | +| SynthAdd | 1216889 | 1 | synth, 1.6m in [\[1\]](#1) | +| Syn90k | 2400000 | 1 | synth | + +### Test Dataset + +| testset | instance_num | type | +| :-----: | :----------: | :---------------------------: | +| IIIT5K | 3000 | regular | +| SVT | 647 | regular | +| IC13 | 1015 | regular | +| IC15 | 2077 | irregular | +| SVTP | 645 | irregular, 639 in [\[1\]](#1) | +| CT80 | 288 | irregular | + +## Results and Models + +| Methods | GPUs | | Regular Text | | | | Irregular Text | | download | +| :------------------------------------------------------------------: | :--: | :----: | :----------: | :-------: | :-: | :-------: | :------------: | :----: | :-------------------------------------------------------------------: | +| | | IIIT5K | SVT | IC13-1015 | | IC15-2077 | SVTP | CT80 | | +| [RobustScanner](/configs/textrecog/robust_scanner/robustscanner_resnet31_5e_st-sub_mj-sub_sa_real.py) | 4 | 0.9510 | 0.9011 | 0.9320 | | 0.7578 | 0.8078 | 0.8750 | [model](https://download.openmmlab.com/mmocr/textrecog/robust_scanner/robustscanner_resnet31_5e_st-sub_mj-sub_sa_real/robustscanner_resnet31_5e_st-sub_mj-sub_sa_real_20220915_152447-7fc35929.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/robust_scanner/robustscanner_resnet31_5e_st-sub_mj-sub_sa_real/20220915_152447.log) | +| [RobustScanner-TTA](/configs/textrecog/robust_scanner/robustscanner_resnet31_5e_st-sub_mj-sub_sa_real.py) | 4 | 0.9487 | 0.9011 | 0.9261 | | 0.7805 | 0.8124 | 0.8819 | | + +## References + +\[1\] Li, Hui and Wang, Peng and Shen, Chunhua and Zhang, Guyu. Show, attend and read: A simple and strong baseline for irregular text recognition. In AAAI 2019. + +## Citation + +```bibtex +@inproceedings{yue2020robustscanner, + title={RobustScanner: Dynamically Enhancing Positional Clues for Robust Text Recognition}, + author={Yue, Xiaoyu and Kuang, Zhanghui and Lin, Chenhao and Sun, Hongbin and Zhang, Wayne}, + booktitle={European Conference on Computer Vision}, + year={2020} +} +``` diff --git a/lrr_ocr/lrr_robust_scanner/configs/robust_scanner/_base_robustscanner_resnet31.py b/lrr_ocr/lrr_robust_scanner/configs/robust_scanner/_base_robustscanner_resnet31.py new file mode 100644 index 0000000..3577940 --- /dev/null +++ b/lrr_ocr/lrr_robust_scanner/configs/robust_scanner/_base_robustscanner_resnet31.py @@ -0,0 +1,117 @@ +dictionary = dict( + type='Dictionary', + dict_file='{{ fileDirname }}/../../../dicts/english_digits_symbols.txt', + with_start=True, + with_end=True, + same_start_end=True, + with_padding=True, + with_unknown=True) + +model = dict( + type='RobustScanner', + data_preprocessor=dict( + type='TextRecogDataPreprocessor', + mean=[127, 127, 127], + std=[127, 127, 127]), + backbone=dict(type='ResNet31OCR'), + encoder=dict( + type='ChannelReductionEncoder', in_channels=512, out_channels=128), + decoder=dict( + type='RobustScannerFuser', + hybrid_decoder=dict( + type='SequenceAttentionDecoder', dim_input=512, dim_model=128), + position_decoder=dict( + type='PositionAttentionDecoder', dim_input=512, dim_model=128), + in_channels=[512, 512], + postprocessor=dict(type='AttentionPostprocessor'), + module_loss=dict( + type='CEModuleLoss', ignore_first_char=True, reduction='mean'), + dictionary=dictionary, + max_seq_len=30)) + +train_pipeline = [ + dict(type='LoadImageFromFile', ignore_empty=True, min_size=2), + dict(type='LoadOCRAnnotations', with_text=True), + dict( + type='RescaleToHeight', + height=48, + min_width=48, + max_width=160, + width_divisor=4), + dict(type='PadToWidth', width=160), + dict( + type='PackTextRecogInputs', + meta_keys=('img_path', 'ori_shape', 'img_shape', 'valid_ratio')) +] + +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='RescaleToHeight', + height=48, + min_width=48, + max_width=160, + width_divisor=4), + dict(type='PadToWidth', width=160), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadOCRAnnotations', with_text=True), + dict( + type='PackTextRecogInputs', + meta_keys=('img_path', 'ori_shape', 'img_shape', 'valid_ratio')) +] + +tta_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='TestTimeAug', + transforms=[ + [ + dict( + type='ConditionApply', + true_transforms=[ + dict( + type='ImgAugWrapper', + args=[dict(cls='Rot90', k=0, keep_size=False)]) + ], + condition="results['img_shape'][1] None: self._visualizer: Visualizer = Visualizer.get_current_instance() self.interval = interval diff --git a/nohup_train_script.sh b/nohup_train_script.sh index 6c0cf21..7b52895 100755 --- a/nohup_train_script.sh +++ b/nohup_train_script.sh @@ -8,4 +8,5 @@ # CUDA_VISIBLE_DEVICES=0,1,2,3,4,6 PORT=29501 tools/dist_train.sh lrr_ocr/lrr_maskrcnn/config/maskrcnn/mask-rcnn_resnet50_fpn_160e_ScutHccdoc.py 6 #CUDA_VISIBLE_DEVICES=0,1 PORT=29503 tools/dist_train.sh lrr_ocr/lrr_SAR/config/sar/sar_resnet31_parallel-decoder_5e_st_handwritting_chinese_recog.py 2 --work-dir work_dirs/sar_resnet31_HWCR_20240427/ --resume #CUDA_VISIBLE_DEVICES=0,1 PORT=29503 tools/dist_train.sh lrr_ocr/lrr_SAR/config/sar/sar_resnet31_parallel-decoder_5e_st_handwritting_chinese_recog.py 2 --work-dir work_dirs/sar_resnet31_HWCR_withNewSize_20240429/ --resume -CUDA_VISIBLE_DEVICES=0,1,2 PORT=29503 tools/dist_train.sh lrr_ocr/lrr_SATRN/config/satrn/satrn_shallow_5e_st_HWCR.py 3 --work-dir work_dirs/SATRN_HWCR_0525/ --resume +#CUDA_VISIBLE_DEVICES=0,1,2 PORT=29503 tools/dist_train.sh lrr_ocr/lrr_SATRN/config/satrn/satrn_shallow_5e_st_HWCR.py 3 --work-dir work_dirs/SATRN_HWCR_0525/ --resume +CUDA_VISIBLE_DEVICES=0,1 PORT=29503 tools/dist_train.sh lrr_ocr/lrr_SATRN/config/satrn/satrn_shallow_5e_st_HWCR.py 2 --work-dir work_dirs/satrn_HWCR_3rd_20240704 --resume