-
Notifications
You must be signed in to change notification settings - Fork 0
/
config.cfg
372 lines (289 loc) · 14.4 KB
/
config.cfg
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
#########################################################################################################################################
# This is the configuration file used for performing luigi bound tasks with the LSTree processing tools. #
# It is based on the tools present in https://github.com/fmi-basel/LSTree created by Raphael Ortiz and Gustavo Q. G. de Medeiros, #
# and is part of "Multiscale light-sheet organoid imaging framework", de Medeiros et al., (BioRxiv 2021) publication. #
# This file is updated by demand while issues are being solved and functions improved. #
# For any issues, please contact [email protected] #
#########################################################################################################################################
### This configuration file is quite extensive, and so before you go ahead here are some important general points that may help you in saving time:
# 0) Note that some tasks use multiprocessing when internal operations do not release the global interpreter lock (if you are curious and want to know more: https://wiki.python.org/moin/GlobalInterpreterLock).
# 1) Although the point above might be a bit cryptic, pragmatically here it is: When processing a single dataset use 'pool_workers ~= number of cores'. This way the computer can be used at its max.
# 2) For all instances of 'movie_dirs' please provide a list of full paths to the corresponding channel folders containing the datasets, following the folder structure described in the main Readme of the Repository (https://github.com/fmi-basel/LSTree).
# 3) During training, an initial folder with the name of the model is created, and after the training finishes a new folder with an added '_inference' suffix is added. While the first one holds the parameters 'weights_best.h5' and 'weights_latest.h5', the latter holds the fully trained model which should be used for prediction. For convenience, we have added the two intermediate weights files into the '_inference' folders for all trained models in order to facilitate retraining if needed.
########################################################################
# General ##############################################################
# General luigi configuration parameters - should usually not be changed for an initial run unless you want to tweak things at your own risk:
[DEFAULT] # expected folder structure for input/output. Follows the folder structure present in the repository.
pattern={subdir}/{fname}-T{time:04d}.{ext}
[resources]
#gpu: needs to be set for luigi. LSTree has not been tested for multiple GPUs (as of yet!), so better leave it as 1.
#pool_workers: sets the maximum number of threads for parallel processing (e.g. while saving compressed .tiffs)
#memory: sets maximum RAM availability
gpu=1
pool_workers=16
memory=64000
[core]
#workers: sets the maximum number of parallel tasks to run, and
#log_level: logging parameters, for more information please see: https://luigi.readthedocs.io/en/stable/logging.html
#outdir: logging saving path
workers=16
log_level=INFO
outdir = ./
########################################################################
# Denoising/Deconvolution ##############################################
# Performs first pre-processing steps. Denoising and deconvolution can minimally aid in curating predicted trees via e.g. Mastodon, as images in the BigDataViewer windows are then of higher contrast.
# IMPORTANT: this is NOT a requirement! All other steps can be ran currently if the raw data is copied into a fodler with the same name as initially, adding the suffix '-Deconv'.
[BuildDenoiseTrainingRecordTask]
#Creates all necessary tensorflow records files (.tfrec) files containing the annotation/raw image pairs with correct patch size used later for trainig. This is also present in all other teasks that can perform training.
#training_base_dir: base directory where corresponding models can be found / saved
#base_dir: base directory for the image data
#train_fraction: fraction of images used for model training
#valid_fraction: fraction of images used for model validation
#min_patch_size: minimum image size used for training
training_base_dir=models/denoise
base_dir=example/data
n_images=30
train_fraction=0.9
valid_fraction=0.1
min_patch_size=(512,512)
[DenoiseTrainingTask] # Sets all network parameters for the actual training. This is also present in all other teasks that can perform training.
training_base_dir=models/denoise
#images_dirs: directory relative to base_dir where the data can be found for training
base_dir=example/data
images_dirs=["*/Channel0", "*/Channel1", "*/Channel2"]
# below are the network parameters which reflect the base parameters for a RDCNet network. For more detailed information please refer to the RDCNet publication: https://arxiv.org/abs/2010.00991 . These parameters are also present in all other tasks that can perform training.
#downsamplig_factor: refers to the downsampling in (Z,Y,X) to be performed on the image. Is directly related with the size of the receptive field
downsampling_factor=(4,)
n_downsampling_channels=16
n_groups=8
dilation_rates=(1, 2, 4)
channels_per_group=32
n_steps=5
dropout=0.1
# training specific parameters. These parameters are also present in all other tasks that can perform training.
#n_restarts: at each restart all of the weights are offset to higher values in order to impede the network to get stuck within local loss minima.
#suffix: suffix that is appended to the model folder both during training and after training finishes. If a model with the same name exists, there will be no training to avoid rewriting on the model '.pb' file.
train_batch_size=16
valid_batch_size=32
epochs=200
n_restarts=5
learning_rate=0.0001
patch_size=(128,128,1)
suffix=20200311
# If a specific model needs to be retrained, or if a training stopped due to an error and needs to be continued, one can resume the weights based on "weights_latest.h5" or "weights_best.h5" from the previous model by uncommenting below. For example, considering the default denoise training from the repository retraining following the same baseline cam be done via uncommenting:
#resume_weights = models/denoise/out/RDCNet-F4-DC16-OC1-G8-DR1-2-4-GC32-S5-D0.1_20200311_inference/weights_latest.h5
intensity_offset_sigma=0.5
intensity_scaling_bounds=(0.1, 10.)
[ChannelBoundsTask]
[DenoiseTask]
out_suffix=-Denoised
[DeconvolutionTask]
#psf_dir: directory where the point-spread-functions are located
#niter: number of iterations during deconvolution process
psf_dir=models/deconv/20190830
out_suffix=-Deconv
niter=128
max_patch_size=(9999,9999,9999)
[MultiDeconvolutionTask] # runs denoising and deconvolution on all datasets specified below.
#movie_dirs: path where the images are for processing. Inside this folder should be the channel subdirectories
#ch_subdirs: name of the subdirectories which are being processed
movie_dirs=["example/data/*-*"]
ch_subdirs=["Channel0", "Channel1", "Channel2"]
########################################################################
# Nuclei segmentation ##################################################
# Nuclei Segmentation is divided into training and prediction tasks
[BuildNucleiTrainingRecordTask] # creates all necessary tensorflow records files (.tfrec) files containing the annotation/raw image pairs with correct patch size used later for trainig
#spacing: tfrecord image spacing (Z,Y,X). Should reflect the sapcing of the images used during training, as well as the model spacing.
training_base_dir=models/nuclei_seg
ch_subdir=Channel1
annot_subdir=nuclei_annot
spacing=(2,0.26,0.26)
train_fraction=0.9
valid_fraction=0.1
min_patch_size=(32,256,256)
patch_margins=(6,40,40)
[BuildWeakNucleiTrainingRecordTask]
training_base_dir=models/nuclei_seg
ch_subdir=Channel1
spacing=(2,0.26,0.26)
train_fraction=1.
valid_fraction=0.
min_patch_size=(32,256,256)
patch_margins=(6,40,40)
[NucleiWeakAnnotTask]
ch_subdir=Channel1
out_subdir=nuclei_weak_annot
[NucleiSegmentationTrainingTask]
training_base_dir=models/nuclei_seg
movie_dirs=["example/data/*-*"]
downsampling_factor=(1,8,8)
n_downsampling_channels=32
n_groups=4
dilation_rates=(1, 2, 4, 8)
channels_per_group=32
n_steps=5
dropout=0.1
n_classes=2
spacing=(2,0.26,0.26)
train_batch_size=4
train_batches_per_epoch=200
valid_batch_size=8
epochs=300
n_restarts=5
learning_rate=0.0001
patch_size=(24,192,192,1)
suffix=20210227
intra_margin=2.0
inter_margin=6.0
jaccard_hinge=0.3
jaccard_eps=0.1
# If a specific model needs to be retrained, or if a training stopped due to an error and needs to be continued, one can resume the weights based on "weights_latest.h5" or "weights_best.h5" from the previous model by uncommenting below. For example, considering the default nuclei prediction model from the repository retraining following the same baseline cam be done via uncommenting:
#resume_weights = models/nuclei_seg/out/RDCNet-F1-8-8-DC32-OC5-G4-DR1-2-4-8-GC32-S5-D0.1_20210227_inference/weights_latest.h5
intensity_offset_sigma=0.5
intensity_scaling_bounds=(0.1, 10.)
[NucleiSegmentationTask]
ch_subdir=Channel1
out_subdir=nuclei_segmentation
[MultiNucleiSegmentationTask]
movie_dirs=["example/data/*-*"]
########################################################################
# Cell segmentation ####################################################
# Cell and lumen segmentation are divided into training and prediction tasks
[BuildLumenTrainingRecordTask]
training_base_dir=models/cell_seg
ch_subdir=Channel0
annot_subdir=lumen_annot
spacing=(2,0.26,0.26)
train_fraction=0.9
valid_fraction=0.1
min_patch_size=(32,256,256)
patch_margins=(12,80,80)
[BuildWeakCellTrainingRecordTask]
training_base_dir=models/cell_seg
ch_subdir=Channel0
spacing=(2,0.26,0.26)
train_fraction=0.9
valid_fraction=0.1
min_patch_size=(32,256,256)
patch_margins=(6,40,40)
[CellSegmentationTrainingTask]
training_base_dir=models/cell_seg
movie_dirs_lumen=["example/data/*-*"]
movie_dirs_cell=["example/data/*-*"]
downsampling_factor=(1,8,8)
n_downsampling_channels=32
n_groups=4
dilation_rates=(1, 2, 4, 8)
channels_per_group=32
n_steps=5
dropout=0.1
n_classes=3
spacing=(2,0.26,0.26)
train_batch_size=8
train_batches_per_epoch=200
valid_batch_size=32
epochs=200
n_restarts=5
learning_rate=0.0001
patch_size=(24,192,192,1)
suffix=20210227
intra_margin=2.0
inter_margin=6.0
jaccard_hinge=0.3
jaccard_eps=1.0
# If a specific model needs to be retrained, or if a training stopped due to an error and needs to be continued, one can resume the weights based on "weights_latest.h5" or "weights_best.h5" from the previous model by uncommenting below. For example, considering the default cell prediction model from the repository retraining following the same baseline cam be done via uncommenting:
#resume_weights = models/cell_seg/out/RDCNet-F1-8-8-DC32-OC6-G4-DR1-2-4-8-GC32-S5-D0.1_20210227_inference/weights_latest.h5
intensity_offset_sigma=0.5
intensity_scaling_bounds=(0.1, 10.)
[CellSegmentationTask]
ch_subdir=Channel0
out_subdir_lumen=lumen_segmentation
out_subdir_cell=cell_segmentation
[MultiCellSegmentationTask]
movie_dirs=["example/data/*-*"]
########################################################################
# Lineage ##############################################################
# calculates all basic properties from the existing lineage tree. Included properties are: distance to parent, time since last division, etc.
[TreePropsTask]
out_subdir=tree_props
xml_tree=mamut_deconv.xml
########################################################################
# Features #############################################################
# Extracts all features from segmentation results
[ExtractFeaturesTask]
out_subdir=features
nuclei_subdir=Channel1
[MultiAggregateFeaturesTask]
# datasets to process that are tracked
movie_dirs=["example/data/*-*"]
[MultiAggregateOrganoidFeaturesTask]
# datasets to process that are not tracked (lumen/organoid volume only)
movie_dirs=[]
########################################################################
# Meshes ###############################################################
[SegmentationMeshTask]
[VolumeGridTask]
raw_channel_subdirs=["Channel1", "Channel0"]
ref_mesh_subdir=cell_mesh
colormaps=["gray", "red"]
blending_mode=max
out_subdir=rgb_grid
[ViewerTask]
movie_dirs=["example/data/*-*"]
nuclei_seg_subdir=nuclei_segmentation
cell_seg_subdir=cell_segmentation
########################################################################
########################################################################
# Tracking #############################################################
# Tracking requires that a model is already in place. If not, training from scratch / fine tuning existing models works by adding the path to each new lineage trees as MaMuT .xml format as an entry to the movie_dirs list in [TrackingTrainingTask] below.
[BuildTrackingTrainingRecordTask]
training_base_dir=models/tracking
ch_subdir=Channel1
spacing=(2,0.26,0.26)
train_fraction=0.95
valid_fraction=0.05
min_patch_size=(32,256,256,2)
patch_margins=(3,24,24,0)
xml_tree=mamut_deconv.xml
[TrackingTrainingTask]
plot_dataset=true
training_base_dir=models/tracking
movie_dirs=["example/data/*-*"]
downsampling_factor=(1,8,8)
n_downsampling_channels=64
n_groups=4
dilation_rates=(1, 2, 4, 8)
channels_per_group=64
n_steps=6
dropout=0.1
n_classes=2
spacing=(2,0.26,0.26)
train_batch_size=1
train_batches_per_epoch=200
valid_batch_size=8
epochs=3000
n_restarts=5
learning_rate=0.0001
patch_size=(24,192,192,-1)
suffix=20210306
intra_margin=2.0
inter_margin=6.0
jaccard_hinge=0.3
jaccard_eps=0.1
# If a specific model needs to be retrained, or if a training stopped due to an error and needs to be continued, one can resume the weights based on "weights_latest.h5" or "weights_best.h5" from the previous model by uncommenting below. For example, considering the default tracking prediction model from the repository retraining following the same baseline cam be done via uncommenting:
#resume_weights=models/tracking/out/RDCNet-F1-8-8-DC64-OC10-G4-DR1-2-4-8-GC64-S6-D0.1_20210306_inference/weights_latest.h5
intensity_offset_sigma=0.5
intensity_scaling_bounds=(0.1, 10.)
[NucleiTrackingTask]
ch_subdir=Channel1
out_subdir_nuclei=track_nuclei
out_subdir_link=track_link
out_subdir_score=track_score
[ExtractTrackingFeaturesTask]
out_subdir=track_props
[BuildTreeTask]
xml_bdv=dataset_deconv.xml
max_n_nuclei = 256
min_track_length = 0
[MultiBuildTreeTask]
movie_dirs=["example/data/*-*"]