Skip to content

Commit

Permalink
robust_scanner_0805
Browse files Browse the repository at this point in the history
  • Loading branch information
MikasaLee committed Aug 5, 2024
1 parent eaf4d70 commit 68676c3
Show file tree
Hide file tree
Showing 35 changed files with 1,545 additions and 4 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
CASIA_HWDB_official_1x_data_textrecog_data_root = '/lirunrui/datasets/mmocr_CASIA_HWDB_official_1x_dataset'

CASIA_HWDB_official_1x_data_textrecog_train = dict(
type='OCRDataset',
data_root=CASIA_HWDB_official_1x_data_textrecog_data_root,
ann_file='textrecog_train.json',
pipeline=None)

CASIA_HWDB_official_1x_data_textrecog_test = dict(
type='OCRDataset',
data_root=CASIA_HWDB_official_1x_data_textrecog_data_root,
ann_file='textrecog_test.json',
test_mode=True,
pipeline=None)
11 changes: 8 additions & 3 deletions lrr_ocr/lrr_SATRN/config/satrn/satrn_shallow_5e_st_HWCR.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
_base_ = [
'../_base_/datasets/scut_hccdoc.py',
'../_base_/datasets/CASIA_HWDB_chineseocr_data.py',
'../_base_/datasets/CASIA_HWDB_official_1x_data.py',
'../_base_/datasets/CASIA_HWDB_official_2x_data.py',
'../_base_/datasets/bnu_EnsExam_ppocrlabel.py',
'../_base_/default_runtime.py',
Expand All @@ -14,12 +15,16 @@
_base_.CASIA_HWDB_official_2x_data_textrecog_train,
_base_.CASIA_HWDB_official_2x_data_textrecog_val,
_base_.scut_hccdoc_textrecog_train,
# new add CASIA_HWDB_official_1x
_base_.CASIA_HWDB_official_1x_data_textrecog_train,
]

test_list = [
_base_.CASIA_HWDB_official_2x_data_textrecog_test,
_base_.scut_hccdoc_textrecog_test,
_base_.bnu_EnsExam_ppocrlabel_textrecog_test,
# new add CASIA_HWDB_official_1x
_base_.CASIA_HWDB_official_1x_data_textrecog_test,
]

train_dataset = dict(
Expand All @@ -28,7 +33,7 @@
type='ConcatDataset', datasets=test_list, pipeline=_base_.test_pipeline)

default_hooks = dict(
logger=dict(type='LoggerHook', interval=1000), # 原本是100,但是batch_size设小的话那这个就输出的太多了。
logger=dict(type='LoggerHook', interval=2000), # 原本是100,但是batch_size设小的话那这个就输出的太多了。
checkpoint=dict(type='CheckpointHook',
interval=1,
by_epoch=True,
Expand All @@ -39,7 +44,7 @@
)

train_dataloader = dict(
batch_size=32, # 原本是64,小一点。
batch_size=24, # 原本是64,小一点。
num_workers=32,
persistent_workers=True,
pin_memory=True,
Expand Down Expand Up @@ -67,7 +72,7 @@
dict(type='CharMetric'),
dict(type='CRandARMetric')
],
dataset_prefixes=['Casia_Hwdb_2x','SCUT_HccDoc','bnu_EnsExam_ppocrlabel']) # 这个顺序要和 test_list 一致,要不然就乱了
dataset_prefixes=['Casia_Hwdb_2x','SCUT_HccDoc','bnu_EnsExam_ppocrlabel','Casia_Hwdb_1x']) # 这个顺序要和 test_list 一致,要不然就乱了

test_evaluator = val_evaluator

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
CASIA_HWDB_chineseocr_data_textrecog_data_root = '/lirunrui/datasets/mmocr_CASIA_HWDB_chinese_ocr_dataset'

CASIA_HWDB_chineseocr_data_textrecog_train = dict(
type='OCRDataset',
data_root=CASIA_HWDB_chineseocr_data_textrecog_data_root,
ann_file='textrecog_train.json',
pipeline=None)

CASIA_HWDB_chineseocr_data_textrecog_test = dict(
type='OCRDataset',
data_root=CASIA_HWDB_chineseocr_data_textrecog_data_root,
ann_file='textrecog_test.json',
test_mode=True,
pipeline=None)
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
CASIA_HWDB_official_1x_data_textrecog_data_root = '/lirunrui/datasets/mmocr_CASIA_HWDB_official_1x_dataset'

CASIA_HWDB_official_1x_data_textrecog_train = dict(
type='OCRDataset',
data_root=CASIA_HWDB_official_1x_data_textrecog_data_root,
ann_file='textrecog_train.json',
pipeline=None)

CASIA_HWDB_official_1x_data_textrecog_test = dict(
type='OCRDataset',
data_root=CASIA_HWDB_official_1x_data_textrecog_data_root,
ann_file='textrecog_test.json',
test_mode=True,
pipeline=None)
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
CASIA_HWDB_official_2x_data_textrecog_data_root = '/lirunrui/datasets/mmocr_CASIA_HWDB_official_2x_dataset'

CASIA_HWDB_official_2x_data_textrecog_train = dict(
type='OCRDataset',
data_root=CASIA_HWDB_official_2x_data_textrecog_data_root,
ann_file='textrecog_train.json',
pipeline=None)

CASIA_HWDB_official_2x_data_textrecog_val = dict(
type='OCRDataset',
data_root=CASIA_HWDB_official_2x_data_textrecog_data_root,
ann_file='textrecog_val.json',
# test_mode=True, # 这玩意我也当做训练集扔进去
pipeline=None)

CASIA_HWDB_official_2x_data_textrecog_test = dict(
type='OCRDataset',
data_root=CASIA_HWDB_official_2x_data_textrecog_data_root,
ann_file='textrecog_test.json',
test_mode=True,
pipeline=None)
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
bnu_EnsExam_ppocrlabel_textrecog_data_root = '/lirunrui/datasets/mmocr_bnu_EnsExam_PPOCRLabel'

bnu_EnsExam_ppocrlabel_textrecog_train = dict(
type='OCRDataset',
data_root=bnu_EnsExam_ppocrlabel_textrecog_data_root,
ann_file='textrecog_train.json',
pipeline=None)

bnu_EnsExam_ppocrlabel_textrecog_test = dict(
type='OCRDataset',
data_root=bnu_EnsExam_ppocrlabel_textrecog_data_root,
ann_file='textrecog_train.json',
test_mode=True,
pipeline=None)
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
cocotextv1_textrecog_data_root = 'data/rec/coco_text_v1'

cocotextv1_textrecog_train = dict(
type='OCRDataset',
data_root=cocotextv1_textrecog_data_root,
ann_file='train_labels.json',
test_mode=False,
pipeline=None)
8 changes: 8 additions & 0 deletions lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/cute80.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
cute80_textrecog_data_root = 'data/cute80'

cute80_textrecog_test = dict(
type='OCRDataset',
data_root=cute80_textrecog_data_root,
ann_file='textrecog_test.json',
test_mode=True,
pipeline=None)
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
icdar2011_textrecog_data_root = 'data/rec/icdar_2011/'

icdar2011_textrecog_train = dict(
type='OCRDataset',
data_root=icdar2011_textrecog_data_root,
ann_file='train_labels.json',
test_mode=False,
pipeline=None)
21 changes: 21 additions & 0 deletions lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/icdar2013.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
icdar2013_textrecog_data_root = 'data/icdar2013'

icdar2013_textrecog_train = dict(
type='OCRDataset',
data_root=icdar2013_textrecog_data_root,
ann_file='textrecog_train.json',
pipeline=None)

icdar2013_textrecog_test = dict(
type='OCRDataset',
data_root=icdar2013_textrecog_data_root,
ann_file='textrecog_test.json',
test_mode=True,
pipeline=None)

icdar2013_857_textrecog_test = dict(
type='OCRDataset',
data_root=icdar2013_textrecog_data_root,
ann_file='textrecog_test_857.json',
test_mode=True,
pipeline=None)
21 changes: 21 additions & 0 deletions lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/icdar2015.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
icdar2015_textrecog_data_root = 'data/icdar2015'

icdar2015_textrecog_train = dict(
type='OCRDataset',
data_root=icdar2015_textrecog_data_root,
ann_file='textrecog_train.json',
pipeline=None)

icdar2015_textrecog_test = dict(
type='OCRDataset',
data_root=icdar2015_textrecog_data_root,
ann_file='textrecog_test.json',
test_mode=True,
pipeline=None)

icdar2015_1811_textrecog_test = dict(
type='OCRDataset',
data_root=icdar2015_textrecog_data_root,
ann_file='textrecog_test_1811.json',
test_mode=True,
pipeline=None)
14 changes: 14 additions & 0 deletions lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/iiit5k.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
iiit5k_textrecog_data_root = 'data/iiit5k'

iiit5k_textrecog_train = dict(
type='OCRDataset',
data_root=iiit5k_textrecog_data_root,
ann_file='textrecog_train.json',
pipeline=None)

iiit5k_textrecog_test = dict(
type='OCRDataset',
data_root=iiit5k_textrecog_data_root,
ann_file='textrecog_test.json',
test_mode=True,
pipeline=None)
13 changes: 13 additions & 0 deletions lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/mjsynth.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
mjsynth_textrecog_data_root = 'data/mjsynth'

mjsynth_textrecog_train = dict(
type='OCRDataset',
data_root=mjsynth_textrecog_data_root,
ann_file='textrecog_train.json',
pipeline=None)

mjsynth_sub_textrecog_train = dict(
type='OCRDataset',
data_root=mjsynth_textrecog_data_root,
ann_file='subset_textrecog_train.json',
pipeline=None)
24 changes: 24 additions & 0 deletions lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/scut_hccdoc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
scut_hccdoc_textrecog_data_root = '/lirunrui/datasets/mmocr_SCUT_HCCDoc_Dataset'
default_hooks = dict(

visualization=dict(
type='VisualizationHook',
interval=1,
enable=False,
show=False,
draw_gt=False,
draw_pred=False,
font_properties='/usr/share/fonts/fonts_library/simsun.ttc'),
)
scut_hccdoc_textrecog_train = dict(
type='OCRDataset',
data_root=scut_hccdoc_textrecog_data_root,
ann_file='textrecog_train.json',
pipeline=None)

scut_hccdoc_textrecog_test = dict(
type='OCRDataset',
data_root=scut_hccdoc_textrecog_data_root,
ann_file='textrecog_test.json',
test_mode=True,
pipeline=None)
14 changes: 14 additions & 0 deletions lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/svt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
svt_textrecog_data_root = 'data/svt'

svt_textrecog_train = dict(
type='OCRDataset',
data_root=svt_textrecog_data_root,
ann_file='textrecog_train.json',
pipeline=None)

svt_textrecog_test = dict(
type='OCRDataset',
data_root=svt_textrecog_data_root,
ann_file='textrecog_test.json',
test_mode=True,
pipeline=None)
14 changes: 14 additions & 0 deletions lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/svtp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
svtp_textrecog_data_root = 'data/svtp'

svtp_textrecog_train = dict(
type='OCRDataset',
data_root=svtp_textrecog_data_root,
ann_file='textrecog_train.json',
pipeline=None)

svtp_textrecog_test = dict(
type='OCRDataset',
data_root=svtp_textrecog_data_root,
ann_file='textrecog_test.json',
test_mode=True,
pipeline=None)
19 changes: 19 additions & 0 deletions lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/synthtext.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
synthtext_textrecog_data_root = '/lirunrui/datasets/synthtext'

synthtext_textrecog_train = dict(
type='OCRDataset',
data_root=synthtext_textrecog_data_root,
ann_file='textrecog_train.json',
pipeline=None)

synthtext_sub_textrecog_train = dict(
type='OCRDataset',
data_root=synthtext_textrecog_data_root,
ann_file='subset_textrecog_train.json',
pipeline=None)

synthtext_an_textrecog_train = dict(
type='OCRDataset',
data_root=synthtext_textrecog_data_root,
ann_file='alphanumeric_textrecog_train.json',
pipeline=None)
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
synthtext_add_textrecog_data_root = 'data/rec/synthtext_add/'

synthtext_add_textrecog_train = dict(
type='OCRDataset',
data_root=synthtext_add_textrecog_data_root,
ann_file='train_labels.json',
test_mode=False,
pipeline=None)
15 changes: 15 additions & 0 deletions lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/totaltext.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
totaltext_textrecog_data_root = 'data/totaltext/'

totaltext_textrecog_train = dict(
type='OCRDataset',
data_root=totaltext_textrecog_data_root,
ann_file='textrecog_train.json',
test_mode=False,
pipeline=None)

totaltext_textrecog_test = dict(
type='OCRDataset',
data_root=totaltext_textrecog_data_root,
ann_file='textrecog_test.json',
test_mode=True,
pipeline=None)
17 changes: 17 additions & 0 deletions lrr_ocr/lrr_robust_scanner/configs/_base_/datasets/toy_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
toy_data_root = 'tests/data/rec_toy_dataset/'

toy_rec_train = dict(
type='OCRDataset',
data_root=toy_data_root,
data_prefix=dict(img_path='imgs/'),
ann_file='labels.json',
pipeline=None,
test_mode=False)

toy_rec_test = dict(
type='OCRDataset',
data_root=toy_data_root,
data_prefix=dict(img_path='imgs/'),
ann_file='labels.json',
pipeline=None,
test_mode=True)
Loading

0 comments on commit 68676c3

Please sign in to comment.