forked from hippolyte456/Hackathon_NGS_2022
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathReproHackathon.py
186 lines (136 loc) · 5.45 KB
/
ReproHackathon.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
import os
import sys
#######################
# Define Workflow #
#######################
#--------------------#
# GetFastq files #
#--------------------#
# ~~ input
__params_GF_NCBI_id = "{ncbi_id}"
# ~~ parameters depending on inputs
__container_GF = "./" + config["GetFastq"]["container"]
__repertory_GF = config["GetFastq"]["Repertory"]
# ~~ output
__output_GF_1 = config["GetFastq"]["Repertory"] + "{ncbi_id}/{ncbi_id}" + "_1.fastq"
__output_GF_2 = config["GetFastq"]["Repertory"] + "{ncbi_id}/{ncbi_id}" + "_2.fastq"
#---------------#
# GetGenome #
#---------------#
# Conversion des fichiers SRA en fichier FATSQ compressé
# ~~ parameters depending on inputs
__params_GG_Chromosom = "{chromosom}"
__container_GG = "./" + config["GetGenome"]["container"]
__repertory_GG = config["GetGenome"]["Repertory"]
# ~~ output
__output_GG_Genome = config["GetGenome"]["Repertory"] + "{chromosom}" + config["GetGenome"]["Extension"]
#--------------#
# GetAnnot #
#--------------#
# ~~ parameters depending on inputs
__genome_ref_GA = config["GetAnnot"]["genome_ref"]
__file_name_GA = config["GetAnnot"]["Filename"]
__container_GA = "./" + config["GetAnnot"]["container"]
__repertory_GA = config["GetAnnot"]["Repertory"]
# ~~ output
__output_GA_Genome = config["GetAnnot"]["Repertory"] + config["GetAnnot"]["Filename"] + ".gtf"
#-----------------#
# IndexGenome #
#-----------------#
# Indexation du génome humain à partir du fichier avec les séquences des chromosomes concaténées
# ~~ input
__input_IG_Genome = expand("{repertory}{filename}{extension}",
repertory = config["GetGenome"]["Repertory"],
filename = config["Chromosom"],
extension = config["GetGenome"]["Extension"])
# ~~ parameters depending on inputs
__container_IG = "./" + config["IndexGenome"]["container"]
__repertory_IG = config["IndexGenome"]["Repertory"]
__input_IG_annot = __output_GA_Genome
# ~~ output
__output_CI_ChrL = config["IndexGenome"]["Repertory"] + "chrLength.txt"
__output_CI_ChrNL = config["IndexGenome"]["Repertory"] + "chrNameLength.txt"
__output_CI_ChrN = config["IndexGenome"]["Repertory"] + "chrName.txt"
__output_CI_ChrS = config["IndexGenome"]["Repertory"] + "chrStart.txt"
__output_CI_Genome = config["IndexGenome"]["Repertory"] + "Genome"
__output_CI_GenomeP = config["IndexGenome"]["Repertory"] + "genomeParameters.txt"
__output_CI_SA = config["IndexGenome"]["Repertory"] + "SA"
__output_CI_SAi= config["IndexGenome"]["Repertory"] + "SAindex"
#------------#
# FASTQC #
#------------#
# Quality check
# ~~ input
__input_QC_1 = __output_GF_1
__input_QC_2 = __output_GF_2
__container_QC = "./" + config["Fastqc"]["container"]
__repertory_QC = config["Fastqc"]["Repertory"]
# ~~ parameters depending on inputs
__params_QC_NCBI_id = "{ncbi_id}"
# ~~ output
__output_QC = config["Fastqc"]["Repertory"] + "{ncbi_id}" + config["Fastqc"]["Extension1"]
#-----------------#
# MappingSTAR #
#-----------------#
# Mapping des fichiers FASTQ compressés avec le génome index (alignement des séquences avec le génôme humain)
# ~~ input
__input_MS_1 = __output_GF_1
__input_MS_2 = __output_GF_2
__input_MS_index = __output_CI_SAi
__container_MS = "./" + config["MappingSTAR"]["container"]
__repertory_MS = config["MappingSTAR"]["Repertory"]
# ~~ parameters depending on inputs
__params_MS_NCBI_id = "{ncbi_id}"
# ~~ output
__output_MSTAR_bam = config["MappingSTAR"]["Repertory"] + "{ncbi_id}" + config["MappingSTAR"]["Extension"]
#----------------#
# CountReads #
#----------------#
# ~~inputs
__input_CR_counts = expand("{repertory}{filename}{extension}",
repertory = config["MappingSTAR"]["Repertory"],
filename = config["NCBI_id"],
extension = config["MappingSTAR"]["Extension"])
__gtf_CR = __output_GA_Genome
__mapping_CR = __output_MSTAR_bam
__fastqc_CR = expand("{repertory}{filename}{extension}",
repertory = config["Fastqc"]["Repertory"],
filename = config["NCBI_id"] ,
extension = config["Fastqc"]["Extension1"])
# ~~ parameters depending on inputs
__file_name_CR = config["CountReads"]["Filename"]
__container_CR = "./" + config["CountReads"]["container"]
__repertory_CR = config["CountReads"]["Repertory"]
# ~~ output
__output_CR = config["CountReads"]["Repertory"] + config["CountReads"]["Filename"] + ".counts"
#-----------#
# Deseq #
#-----------#
# ~~ input
__input_DES_count = __output_CR
# ~~ parameters depending on inputs
__container_DES = "./" + config["Deseq"]["container"]
__repertory_DES = config["Deseq"]["Repertory"]
# ~~ output
__output_PCA = config["Deseq"]["Repertory"] + "pca" + config["Deseq"]["Extension"]
__output_PlotMA = config["Deseq"]["Repertory"] + "plotMA_res" + config["Deseq"]["Extension"]
#####################
# Include Rules #
#####################
include : os.getcwd() + "/rules/GetFastq.py"
include : os.getcwd() + "/rules/GetGenome.py"
include : os.getcwd() + "/rules/IndexGenome.py"
include : os.getcwd() + "/rules/MappingStar.py"
include : os.getcwd() + "/rules/GetAnnot.py"
include : os.getcwd() + "/rules/CountReads.py"
include : os.getcwd() + "/rules/Fastqc.py"
include : os.getcwd() + "/rules/Deseq.py"
###################
# Rule Target #
###################
rule targets :
input :
__output_PCA,
__output_PlotMA
message :
"All the workflow is done !!"