-
Notifications
You must be signed in to change notification settings - Fork 1
/
Snakefile
executable file
·120 lines (98 loc) · 3.21 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import os
import pathlib
import pandas as pd
configfile: "files/config.yaml"
configfile: "files/config.yaml"
OUTDIR = config["output"]["dir"]
DATA_DIR = pathlib.Path(config["input"]["datadir"])
# assumed unique row identify linking to embryo name
samples = pd.read_csv(config["input"]["logfile"])
assert "file" in samples.columns
samples = samples.set_index(
samples.apply(lambda x: x.file.replace("/", "_").split(".")[0], axis=1)
)
for emb in samples.file:
emb_path = DATA_DIR.joinpath(emb)
if not emb_path.exists():
print(f"{emb_path} not found")
print("Samples:\n\t" + "\n\t".join(samples.index) + "\n")
rule all:
input:
os.path.join(OUTDIR, "final", "counts.csv"),
def get_embryo_param(wc, col):
return samples.at[wc.embryo, col]
def get_image(wc):
return DATA_DIR.joinpath(samples.loc[wc.embryo, "file"])
rule normalize_pmc_stains:
input:
image=lambda wc: get_image(wc),
params:
channel_name="pmc",
channels=lambda wc: get_embryo_param(wc, "channel_order"),
z_start=lambda wc: get_embryo_param(wc, "z_start"),
z_end=lambda wc: get_embryo_param(wc, "z_end"),
output:
h5=temp(
os.path.join(OUTDIR, "pmc_norm", "{embryo}.h5"),
),
conda:
"envs/hcr_quant.yaml"
script:
"scripts/normalize_pmc_stain.py"
rule predict_pmcs:
input:
image=os.path.join(OUTDIR, "pmc_norm", "{embryo}.h5"),
model=config["ilastik"]["model"],
params:
ilastik_loc=config["ilastik"]["loc"],
output:
temp(os.path.join(OUTDIR, "pmc_probs", "{embryo}.h5")),
log:
os.path.join(OUTDIR, "logs", "prediction", "{embryo}.log"),
shell:
"({params.ilastik_loc} --headless "
"--project={input.model} "
"--output_format=hdf5 "
"--output_filename_format={output} "
"{input.image}) 2> {log}"
rule label_pmcs:
input:
stain=os.path.join(OUTDIR, "pmc_norm", "{embryo}.h5"),
probs=os.path.join(OUTDIR, "pmc_probs", "{embryo}.h5"),
output:
labels=os.path.join(OUTDIR, "labels", "{embryo}_pmc_labels.h5"),
log:
log=os.path.join("logs", "labels", "{embryo}.log"),
conda:
"envs/segmentation.yaml"
script:
"scripts/label_pmcs.py"
rule quantify_expression:
input:
image=lambda wc: get_image(wc),
labels=os.path.join(OUTDIR, "labels", "{embryo}_pmc_labels.h5"),
params:
gene_params=config["quant"]["genes"],
channels=lambda wc: get_embryo_param(wc, "channel_order"),
z_start=lambda wc: get_embryo_param(wc, "z_start"),
z_end=lambda wc: get_embryo_param(wc, "z_end"),
crop_image=True,
quant_method="both",
output:
image=os.path.join(OUTDIR, "expression", "{embryo}.nc"),
csv=os.path.join(OUTDIR, "counts", "{embryo}.csv"),
log:
"logs/quant/{embryo}.log",
conda:
"envs/hcr_quant.yaml"
script:
"scripts/count_spots.py"
rule combine_counts:
input:
counts=expand(
os.path.join(OUTDIR, "counts", "{embryo}.csv"), embryo=samples.index
),
output:
final=os.path.join(OUTDIR, "final", "counts.csv"),
script:
"scripts/combine_counts.py"