-
Notifications
You must be signed in to change notification settings - Fork 56
/
svtr_ppocrv3_ch.yaml
151 lines (139 loc) · 3.52 KB
/
svtr_ppocrv3_ch.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
system:
mode: 0 # 0 for graph mode, 1 for pynative mode in MindSpore
distribute: True
amp_level: O2
amp_level_infer: O2 # running inference in O2 mode
seed: 42
log_interval: 10
val_while_train: True
drop_overflow_update: True
ckpt_max_keep: 5
common:
character_dict_path: &character_dict_path mindocr/utils/dict/ch_dict.txt
num_classes: &num_classes 6624 # num_chars_in_dict + 1
max_text_len: &max_text_len 25
use_space_char: &use_space_char True
batch_size: &batch_size 128
model:
type: rec
transform: null
backbone:
name: mobilenet_v1_enhance
scale: 0.5
last_conv_stride: [ 1, 2 ]
last_pool_type: avg
last_pool_kernel_size: [ 2, 2 ]
pretrained: False
head:
name: MultiHead
out_channels_list:
- CTCLabelDecode: 6625
- SARLabelDecode: 6627
head_list:
- CTCHead:
Neck:
name: svtr
out_channels: *num_classes
- SARHead:
enc_dim: 512
max_text_length: *max_text_len
pretrained: https://download-mindspore.osinfra.cn/toolkits/mindocr/svtr/svtr_lcnet_ppocrv3-6c1d0085.ckpt
postprocess:
name: CTCLabelDecode
character_dict_path: *character_dict_path
use_space_char: *use_space_char
metric:
name: RecMetric
main_indicator: acc
lower: False
character_dict_path: *character_dict_path
ignore_space: True
print_flag: False
loss:
name: MultiLoss
loss_config_list:
- CTCLossForSVTR:
- SARLoss:
scheduler:
scheduler: warmup_cosine_decay
min_lr: 0.000001
lr: 0.0001
num_epochs: 500
warmup_epochs: 0
decay_epochs: 500
optimizer:
opt: Adam
beta1: 0.9
beta2: 0.999
weight_decay: 3.0e-05
loss_scaler:
type: dynamic
loss_scale: 512
scale_factor: 2.0
scale_window: 1000
train:
ema: True
ckpt_save_dir: ./tmp_rec
dataset_sink_mode: False
clip_grad: True
clip_norm: 0.1
dataset:
type: RecDataset
dataset_root: dir/to/data/
data_dir: training/
label_file: gt_training.txt
sample_ratio: 1.0
shuffle: True
extra_data_num: 2
filter_max_len: True
max_text_len: *max_text_len
transform_pipeline:
- DecodeImage:
img_mode: BGR
to_float32: False
- RecAug:
- MultiLabelEncode:
character_dict_path: *character_dict_path
max_text_length: *max_text_len
use_space_char: *use_space_char
- RecResizeImgForSVTR:
image_shape: [3, 48, 320]
output_columns: ["image", "label_ctc", "label_sar", "text_length", "valid_ratio", "valid_width_mask"]
net_input_column_index: [0, 1, 2, 3, 4, 5]
label_column_index: [1, 2, 3, 4]
loader:
shuffle: True
batch_size: *batch_size
drop_remainder: False
max_rowsize: 12
num_workers: 1
eval:
ckpt_load_path: ./tmp_rec/best.ckpt
dataset_sink_mode: False
dataset:
type: RecDataset
dataset_root: dir/to/data/
data_dir: validation/
label_file: gt_validation.txt
sample_ratio: 1.0
shuffle: False
transform_pipeline:
- DecodeImage:
img_mode: BGR
to_float32: False
- MultiLabelEncode:
character_dict_path: *character_dict_path
max_text_length: *max_text_len
use_space_char: *use_space_char
- RecResizeImgForSVTR:
image_shape: [ 3, 48, 320 ]
width_downsample_ratio: 0.125
output_columns: ["image", "text_padded", "text_length"]
net_input_column_index: [ 0 ]
label_column_index: [ 1, 2 ]
loader:
shuffle: False
batch_size: 128
drop_remainder: False
max_rowsize: 12
num_workers: 1