config.cfg

#########################################################################################################################################
# This is the configuration file used for performing luigi bound tasks with the LSTree processing tools.                                #
# It is based on the tools present in https://github.com/fmi-basel/LSTree created by Raphael Ortiz and Gustavo Q. G. de Medeiros,       #
# and is part of "Multiscale light-sheet organoid imaging framework", de Medeiros et al., (BioRxiv 2021) publication.                   #
# This file is updated by demand while issues are being solved and functions improved.                                                  #
# For any issues, please contact gustavo.medeiros@fmi.ch                                                                                #
#########################################################################################################################################


### This configuration file is quite extensive, and so before you go ahead here are some important general points that may help you in saving time:

# 0) Note that some tasks use multiprocessing when internal operations do not release the global interpreter lock (if you are curious and want to know more: https://wiki.python.org/moin/GlobalInterpreterLock).
# 1) Although the point above might be a bit cryptic, pragmatically here it is: When processing a single dataset use 'pool_workers ~= number of cores'. This way the computer can be used at its max.
# 2) For all instances of 'movie_dirs' please provide a list of full paths to the corresponding channel folders containing the datasets, following the folder structure described in the main Readme of the Repository (https://github.com/fmi-basel/LSTree).
# 3) During training, an initial folder with the name of the model is created, and after the training finishes a new folder with an added '_inference' suffix is added. While the first one holds the parameters 'weights_best.h5' and 'weights_latest.h5', the latter holds the fully trained model which should be used for prediction. For convenience, we have added the two intermediate weights files into the '_inference' folders for all trained models in order to facilitate retraining if needed.


########################################################################
# General ##############################################################
# General luigi configuration parameters - should usually not be changed for an initial run unless you want to tweak things at your own risk: 

[DEFAULT]   # expected folder structure for input/output. Follows the folder structure present in the repository.
pattern={subdir}/{fname}-T{time:04d}.{ext}

[resources]
#gpu: needs to be set for luigi. LSTree has not been tested for multiple GPUs (as of yet!), so better leave it as 1.
#pool_workers: sets the maximum number of threads for parallel processing (e.g. while saving compressed .tiffs)
#memory: sets maximum RAM availability

gpu=1
pool_workers=16 
memory=64000

[core]
#workers: sets the maximum number of parallel tasks to run, and 
#log_level: logging parameters, for more information please see: https://luigi.readthedocs.io/en/stable/logging.html
#outdir: logging saving path

workers=16
log_level=INFO
outdir = ./

########################################################################
# Denoising/Deconvolution ##############################################
# Performs first pre-processing steps. Denoising and deconvolution can minimally aid in curating predicted trees via e.g. Mastodon, as images in the BigDataViewer windows are then of higher contrast.  
# IMPORTANT: this is NOT a requirement! All other steps can be ran currently if the raw data is copied into a fodler with the same name as initially, adding the suffix '-Deconv'.

[BuildDenoiseTrainingRecordTask]    
#Creates all necessary tensorflow records files (.tfrec) files containing the annotation/raw image pairs with correct patch size used later for trainig. This is also present in all other teasks that can perform training.
#training_base_dir: base directory where corresponding models can be found / saved
#base_dir: base directory for the image data
#train_fraction: fraction of images used for model training
#valid_fraction: fraction of images used for model validation
#min_patch_size: minimum image size used for training

training_base_dir=models/denoise
base_dir=example/data
n_images=30
train_fraction=0.9
valid_fraction=0.1
min_patch_size=(512,512)

[DenoiseTrainingTask] # Sets all network parameters for the actual training. This is also present in all other teasks that can perform training.
training_base_dir=models/denoise
#images_dirs: directory relative to base_dir where the data can be found for training

base_dir=example/data
images_dirs=["*/Channel0", "*/Channel1", "*/Channel2"]

# below are the network parameters which reflect the base parameters for a RDCNet network. For more detailed information please refer to the RDCNet publication: https://arxiv.org/abs/2010.00991 . These parameters are also present in all other tasks that can perform training.
#downsamplig_factor: refers to the downsampling in (Z,Y,X) to be performed on the image. Is directly related with the size of the receptive field

downsampling_factor=(4,)
n_downsampling_channels=16
n_groups=8
dilation_rates=(1, 2, 4)
channels_per_group=32
n_steps=5
dropout=0.1

# training specific parameters. These parameters are also present in all other tasks that can perform training.
#n_restarts: at each restart all of the weights are offset to higher values in order to impede the network to get stuck within local loss minima. 
#suffix: suffix that is appended to the model folder both during training and after training finishes. If a model with the same name exists, there will be no training to avoid rewriting on the model '.pb' file.

train_batch_size=16 
valid_batch_size=32 
epochs=200
n_restarts=5    
learning_rate=0.0001
patch_size=(128,128,1)
suffix=20200311

# If a specific model needs to be retrained, or if a training stopped due to an error and needs to be continued, one can resume the weights based on "weights_latest.h5" or "weights_best.h5" from the previous model by uncommenting below. For example, considering the default denoise training from the repository retraining following the same baseline cam be done via uncommenting:

#resume_weights = models/denoise/out/RDCNet-F4-DC16-OC1-G8-DR1-2-4-GC32-S5-D0.1_20200311_inference/weights_latest.h5

intensity_offset_sigma=0.5
intensity_scaling_bounds=(0.1, 10.)

[ChannelBoundsTask]


[DenoiseTask]
out_suffix=-Denoised

[DeconvolutionTask]
#psf_dir: directory where the point-spread-functions are located
#niter: number of iterations during deconvolution process
  
psf_dir=models/deconv/20190830
out_suffix=-Deconv
niter=128
max_patch_size=(9999,9999,9999)


[MultiDeconvolutionTask] # runs denoising and deconvolution on all datasets specified below.
#movie_dirs: path where the images are for processing. Inside this folder should be the channel subdirectories
#ch_subdirs: name of the subdirectories which are being processed

movie_dirs=["example/data/*-*"]
ch_subdirs=["Channel0", "Channel1", "Channel2"]


########################################################################
# Nuclei segmentation ##################################################
# Nuclei Segmentation is divided into training and prediction tasks

[BuildNucleiTrainingRecordTask] # creates all necessary tensorflow records files (.tfrec) files containing the annotation/raw image pairs with correct patch size used later for trainig 
#spacing: tfrecord image spacing (Z,Y,X). Should reflect the sapcing of the images used during training, as well as the model spacing. 

training_base_dir=models/nuclei_seg
ch_subdir=Channel1
annot_subdir=nuclei_annot
spacing=(2,0.26,0.26)
train_fraction=0.9
valid_fraction=0.1
min_patch_size=(32,256,256)
patch_margins=(6,40,40)

[BuildWeakNucleiTrainingRecordTask]
training_base_dir=models/nuclei_seg
ch_subdir=Channel1
spacing=(2,0.26,0.26)
train_fraction=1.
valid_fraction=0.
min_patch_size=(32,256,256)
patch_margins=(6,40,40)

[NucleiWeakAnnotTask]
ch_subdir=Channel1
out_subdir=nuclei_weak_annot

[NucleiSegmentationTrainingTask]
training_base_dir=models/nuclei_seg
movie_dirs=["example/data/*-*"]

downsampling_factor=(1,8,8)
n_downsampling_channels=32
n_groups=4
dilation_rates=(1, 2, 4, 8)
channels_per_group=32
n_steps=5
dropout=0.1
n_classes=2
spacing=(2,0.26,0.26)

train_batch_size=4
train_batches_per_epoch=200
valid_batch_size=8
epochs=300
n_restarts=5
learning_rate=0.0001
patch_size=(24,192,192,1)
suffix=20210227

intra_margin=2.0
inter_margin=6.0
jaccard_hinge=0.3
jaccard_eps=0.1

# If a specific model needs to be retrained, or if a training stopped due to an error and needs to be continued, one can resume the weights based on "weights_latest.h5" or "weights_best.h5" from the previous model by uncommenting below. For example, considering the default nuclei prediction model from the repository retraining following the same baseline cam be done via uncommenting:

#resume_weights = models/nuclei_seg/out/RDCNet-F1-8-8-DC32-OC5-G4-DR1-2-4-8-GC32-S5-D0.1_20210227_inference/weights_latest.h5

intensity_offset_sigma=0.5
intensity_scaling_bounds=(0.1, 10.)

[NucleiSegmentationTask]
ch_subdir=Channel1
out_subdir=nuclei_segmentation

[MultiNucleiSegmentationTask]
movie_dirs=["example/data/*-*"]

########################################################################
# Cell segmentation ####################################################
# Cell and lumen segmentation are divided into training and prediction tasks

[BuildLumenTrainingRecordTask]
training_base_dir=models/cell_seg
ch_subdir=Channel0
annot_subdir=lumen_annot
spacing=(2,0.26,0.26)
train_fraction=0.9
valid_fraction=0.1
min_patch_size=(32,256,256)
patch_margins=(12,80,80)

[BuildWeakCellTrainingRecordTask]
training_base_dir=models/cell_seg
ch_subdir=Channel0
spacing=(2,0.26,0.26)
train_fraction=0.9
valid_fraction=0.1
min_patch_size=(32,256,256)
patch_margins=(6,40,40)

[CellSegmentationTrainingTask]
training_base_dir=models/cell_seg
movie_dirs_lumen=["example/data/*-*"]
movie_dirs_cell=["example/data/*-*"]

downsampling_factor=(1,8,8)
n_downsampling_channels=32
n_groups=4
dilation_rates=(1, 2, 4, 8)
channels_per_group=32
n_steps=5
dropout=0.1
n_classes=3
spacing=(2,0.26,0.26)

train_batch_size=8
train_batches_per_epoch=200
valid_batch_size=32
epochs=200
n_restarts=5
learning_rate=0.0001
patch_size=(24,192,192,1)
suffix=20210227

intra_margin=2.0
inter_margin=6.0
jaccard_hinge=0.3
jaccard_eps=1.0

# If a specific model needs to be retrained, or if a training stopped due to an error and needs to be continued, one can resume the weights based on "weights_latest.h5" or "weights_best.h5" from the previous model by uncommenting below. For example, considering the default cell prediction model from the repository retraining following the same baseline cam be done via uncommenting:

#resume_weights = models/cell_seg/out/RDCNet-F1-8-8-DC32-OC6-G4-DR1-2-4-8-GC32-S5-D0.1_20210227_inference/weights_latest.h5

intensity_offset_sigma=0.5
intensity_scaling_bounds=(0.1, 10.)

[CellSegmentationTask]
ch_subdir=Channel0
out_subdir_lumen=lumen_segmentation
out_subdir_cell=cell_segmentation

[MultiCellSegmentationTask]
movie_dirs=["example/data/*-*"]

########################################################################
# Lineage ##############################################################
# calculates all basic properties from the existing lineage tree. Included properties are: distance to parent, time since last division, etc.

[TreePropsTask]
out_subdir=tree_props
xml_tree=mamut_deconv.xml

########################################################################
# Features #############################################################
# Extracts all features from segmentation results

[ExtractFeaturesTask]
out_subdir=features
nuclei_subdir=Channel1

[MultiAggregateFeaturesTask]
# datasets to process that are tracked
movie_dirs=["example/data/*-*"]


[MultiAggregateOrganoidFeaturesTask]
# datasets to process that are not tracked (lumen/organoid volume only)
movie_dirs=[]

########################################################################
# Meshes ###############################################################
[SegmentationMeshTask]

[VolumeGridTask]
raw_channel_subdirs=["Channel1", "Channel0"]
ref_mesh_subdir=cell_mesh
colormaps=["gray", "red"]
blending_mode=max

out_subdir=rgb_grid

[ViewerTask]
movie_dirs=["example/data/*-*"]
nuclei_seg_subdir=nuclei_segmentation
cell_seg_subdir=cell_segmentation

########################################################################
########################################################################
# Tracking #############################################################
# Tracking requires that a model is already in place. If not, training from scratch / fine tuning existing models works by adding the path to each new lineage trees as MaMuT .xml format as an entry to the movie_dirs list in [TrackingTrainingTask] below.

[BuildTrackingTrainingRecordTask]
training_base_dir=models/tracking
ch_subdir=Channel1
spacing=(2,0.26,0.26)
train_fraction=0.95
valid_fraction=0.05
min_patch_size=(32,256,256,2)
patch_margins=(3,24,24,0)
xml_tree=mamut_deconv.xml

[TrackingTrainingTask]
plot_dataset=true
training_base_dir=models/tracking
movie_dirs=["example/data/*-*"]


downsampling_factor=(1,8,8)
n_downsampling_channels=64
n_groups=4
dilation_rates=(1, 2, 4, 8)
channels_per_group=64
n_steps=6
dropout=0.1
n_classes=2
spacing=(2,0.26,0.26)

train_batch_size=1
train_batches_per_epoch=200
valid_batch_size=8
epochs=3000
n_restarts=5
learning_rate=0.0001
patch_size=(24,192,192,-1)
suffix=20210306

intra_margin=2.0
inter_margin=6.0
jaccard_hinge=0.3
jaccard_eps=0.1

# If a specific model needs to be retrained, or if a training stopped due to an error and needs to be continued, one can resume the weights based on "weights_latest.h5" or "weights_best.h5" from the previous model by uncommenting below. For example, considering the default tracking prediction model from the repository retraining following the same baseline cam be done via uncommenting:

#resume_weights=models/tracking/out/RDCNet-F1-8-8-DC64-OC10-G4-DR1-2-4-8-GC64-S6-D0.1_20210306_inference/weights_latest.h5

intensity_offset_sigma=0.5
intensity_scaling_bounds=(0.1, 10.)

[NucleiTrackingTask]
ch_subdir=Channel1
out_subdir_nuclei=track_nuclei
out_subdir_link=track_link
out_subdir_score=track_score

[ExtractTrackingFeaturesTask]
out_subdir=track_props

[BuildTreeTask]
xml_bdv=dataset_deconv.xml
max_n_nuclei = 256
min_track_length = 0

[MultiBuildTreeTask]
movie_dirs=["example/data/*-*"]