-
Notifications
You must be signed in to change notification settings - Fork 4
/
Model_building_timeseries_Recon2.m
191 lines (157 loc) · 7.06 KB
/
Model_building_timeseries_Recon2.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
%(c) Ali Kishk, Maria Piress Pacheco & Thomas Sauter
% 01 June 2020
% Luxembourg University
addpath(genpath(pwd)); %add all files and folders in the working folder to matlab
addpath(genpath('~/cobratoolbox/'));
changeCobraSolver('ibm_cplex');
addpath(genpath('~/FASTCORMICS RNAseq/'));
solverOK=changeCobraSolver('ibm_cplex','all');
% Find fpkm files in a dir
fileList = dir('./data/time_series_study/FPKM_divided/*.csv');
fileList = {fileList.name};
fileList_col = replace(fileList,'.csv','');
%Load Recon2 model
load('consistRecon2_4.mat')
%Removing gene version from the model
Recon2 = model;
genes = Recon2.genes;
for i=1:numel(genes)
x = strsplit(table2array(genes(i,1)),'.');
genes(i,1) = cellstr(x(1,1));
end
Recon2.genes = genes;
%Set reversible rxns
Recon2.rev= zeros(numel(Recon2.rxns),1);
Recon2.rev(Recon2.lb<0)=1;
%setting model reconstruction parameters
already_mapped_tag = 0;
consensus_proportion = 0.9;
epsilon = 1e-4;
% inhouse dictionary for recon model
% for other models and data, the user has to create a dictionary using for~
% instance biomart or db2db
if exist('data/dico_201911.mat.gz')>=1 % Extracting the compressed dictionary
gunzip('data/dico_201911.mat.gz')
end
load('./dico_201911.mat');
% add viral biomass equation from https://www.ebi.ac.uk/biomodels/MODEL2003020001
% the equaation metabolites was changed according to metobilte nomeclature
% in Recon2
equation = importfile("./viral_biomass_rxn_recon.txt", [1, Inf]);
equation = table2array(equation);
Recon2_inf = addReaction( Recon2,'biomass_virus',cell2mat(equation));
Recon2_inf.rev = Recon2_inf.lb < 0; % the rev field will be a logical array
A = fastcc_4_fastcormics(Recon2_inf, 1e-4, 0);
models_keep = zeros(numel(Recon2_inf.rxns), 1);
models_keep(A,1) = 1;
Recon2_inf = removeRxns(Recon2_inf,Recon2_inf.rxns(setdiff(1:numel(Recon2_inf.rxns),find(models_keep(:,1)))));
optional_settings.func = {'DM_atp_c_','biomass_reaction'};
optional_settings_.func = {'DM_atp_c_','biomass_reaction','biomass_virus'};
for i=1:numel(fileList)
clear -regexp ^genes ^model ^gr
rpkm = readtable('./data/time_series_study/FPKM_divided/'+string(fileList(i)));
rownames = rpkm(:,1);
rownames = table2array(rownames);
%rpkm.Properties.RowNames = table2array(rownames);
% remove gene names form the 1st column
rpkm = rpkm(:,2:end);
%select health and infected samples by 'mock' string
colnames = rpkm.Properties.VariableNames;
colnames_ctl = colnames(contains(colnames,'mock'));
colnames_cov = colnames(~contains(colnames,'mock'));
rpkm = table2array(rpkm); %transform table to array
rpkm_ctl = rpkm(:,contains(colnames,'mock'));
rpkm_cov = rpkm(:,~contains(colnames,'mock'));
% Reconstruct infected model
discretized = discretize_FPKM(rpkm_cov, colnames_cov);
[~, A] = fastcormics_RNAseq(Recon2_inf, discretized, rownames, dico, already_mapped_tag, consensus_proportion, 1e-4, optional_settings_);
% check model consistency
models_keep = zeros(numel(Recon2_inf.rxns), 1);
models_keep(A,1) = 1;
model_cov = removeRxns(Recon2_inf,Recon2_inf.rxns(setdiff(1:numel(Recon2_inf.rxns),find(models_keep(:,1)))));
% Remove unused genes
model_cov = removeUnusedGenes(model_cov);
% check consistency
sanity= fastcc_4_fastcormics(model_cov,1e-4,0);
if numel(sanity)==numel(model_cov.rxns)
disp('Consistent Control Model')
else
disp('Inconsistent Control Model')
end
% Adjust Objective function for infected models
model_cov = changeObjective(model_cov,'biomass_reaction',1);
sol=optimizeCbModel(model_cov);
idx_biomass = find(ismember(model_cov.rxns,'biomass_reaction'));
idx_biomass_virus = find(ismember(model_cov.rxns,'biomass_virus'));
model_cov.c(idx_biomass)=100;
model_cov.c(idx_biomass_virus)=1;
model_cov.ub(idx_biomass)=0.1 *sol.f;
%Save the reconstructed model
save('./models/timeseries/'+string(fileList_col(i))+'SARS_CoV2_model_2.mat','model_cov');
% Single gene KO on the infected models
[grRatio_cov, grRateKO_cov, grRateWT_cov, ~, ~, ~, geneList]= singleGeneDeletion_MISB(...
model_cov, 'FBA', [], 0, 1);
threshold = 0.2;
genes_cov = geneList(grRatio_cov<= threshold);
if numel(colnames_ctl)>=2
% Reconstruct the mock model
discretized = discretize_FPKM(rpkm_ctl, colnames_ctl);
[~, A] = fastcormics_RNAseq(Recon2, discretized, rownames, dico, already_mapped_tag, consensus_proportion, 1e-4, optional_settings);
% check model consistency
models_keep = zeros(numel(Recon2.rxns), 1);
models_keep(A,1) = 1;
model_ctl = removeRxns(Recon2,Recon2.rxns(setdiff(1:numel(Recon2.rxns),find(models_keep(:,1)))));
% Remove unused genes
model_ctl = removeUnusedGenes(model_ctl);
% check consistency
sanity= fastcc_4_fastcormics(model_ctl,1e-4,0);
if numel(sanity)==numel(model_ctl.rxns)
disp('Consistent Control Model')
else
disp('Inconsistent Control Model')
end
% Adjust Objective function for infected models
model_ctl = changeObjective(model_ctl,'biomass_reaction',1);
save('./models/timeseries/'+string(fileList_col(i))+'_Mock_model_2.mat','model_ctl');
% essential genes from the infected model model exist in the reconstructed mock
% model
essential_genes_in_ctl = intersect(genes_cov,model_ctl.genes);
[grRatio_ctl, grRateKO_ctl, grRateWT_ctl, ~, ~, ~, geneList2]= singleGeneDeletion_MISB(...
model_ctl, 'FBA', essential_genes_in_ctl, 0, 1);
genes_ctl = essential_genes_in_ctl(grRatio_ctl<= threshold);
% define essential genes in the infected model, that dont exist in
% the reconstructed mock model as unkown safety
SKO_unk = setdiff(genes_cov,model_ctl.genes);
else
essential_genes_in_ctl = {'0'};
grRatio_ctl = {'0'};
model_ctl = {'0'};
genes_ctl = {'0'};
% define essential genes in the infected model, where there is no
% mock model as unkown safety.
SKO_unk = [];
end
%% Find SKO that is safe on healthy models
SKO_safe = setdiff(essential_genes_in_ctl,genes_ctl);
SKO_toxic = intersect(essential_genes_in_ctl,genes_ctl);
% Save SKO outputs
save('./KO_data/timeseries/SKO_'+string(fileList_col(i))+'_2.mat');
end
for i=1:numel(fileList)
% Double gene KO
load('./KO_data/timeseries/SKO_'+string(fileList_col(i))+'_2.mat');
[grRatio_cov_, grRateKO_cov_, grRateWT_cov_]= doubleGeneDeletion(...
model_cov, 'FBA');
% Extracting Infected DKO outputs
[DKO_all,DKO_non_ess,DKO_syn,DKO_both] = Find_Double_KO_Outputs(...
grRatio_cov_,model_cov.genes,genes_cov);
if numel(colnames_ctl)>0
%% find DKO that is safe on healthy model
[DKO_safe,DKO_toxic, grRateWT_ctl] = Find_Safe_DKO(...
DKO_both,model_ctl,0.1);
else
DKO_toxic = DKO_both;
DKO_safe = ['NaN','NaN'];
end
save('./KO_data/timeseries/DKO_'+string(fileList_col(i))+'_2.mat');
end