-
Notifications
You must be signed in to change notification settings - Fork 8
/
example_protein_cofactor_network_analysis.sh
156 lines (131 loc) · 6.36 KB
/
example_protein_cofactor_network_analysis.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
# This file contains all the necessary steps for current flow analysis of a protein-cofactor
# trajectory from command-line arguments.
#
# For more information about the steps, functions and inputs, see the jupyter notebook tutorial.
#
# A. Decide what to do.
# The variables below indicate whether or not a calculation/step should be done.
# If set to 1, the calculation will be carried out, otherwise not.
# The steps follow a logical order (more details are found in the jupyter notebook):
# 1.a Compute the protein contact map.
# 1.b Compute the cofactor contribution to the contact map.
# 2.a Compute cofactor fluctuations.
# 2.b Compute the mutual information (MI) matrix (using the contact map as input).
# 3. Compute the current flow using the contact map and MI matrix as input.
# 4. Plotting the current flow on the beta column in the .PDB (using current flow as input).
#
compute_prot_cmap=1
compute_cofactor_prot_cmap=1
compute_interactor_node_fluctuations=1
compute_MI=1
current_flow=1
project_on_structure=1
# B. Set general input parameters
top_file=input_data/my_system.pdb
trajs=(input_data/system_traj1.dcd input_data/system_traj2.dcd)
n_cores=4
n_bootstraps=10
out_dir=Results_data/
out_dir_MI=${out_dir}MI_data/
file_end_name=my_system
dt=1
n_chains=4 # number of chains in a homomeric protein.
cofactor_domain_sel=input_data/cofactor_domain_selection.txt # Input file with selections for
# cofactor domains (defining interactors
# - see juputer notebook for more info)
source_inds=input_data/inds_sources.txt # Residue indices of sources used in the current flow
sink_inds=input_data/inds_sinks.txt # Residue indices of sinks used in the current flow
aux_inds=input_data/auxiliary_prot_inds.txt # Residue indices of auxiliary subunits.
# Used when symmeterizing current flow over
# homomeric subunits (chains).
# This assumes one auxiliary protein per chain, ie.
# n_chains identical auxiliary proteins.
# If there is no auxiliary subunit, just remove
# the "-iaux ${aux_inds}"
# input to the current flow script.
# C. Call the python scripts
if [ $compute_prot_cmap -eq 1 ]
then
# 1.a Compute the protein contact map.
python run_contact_map.py -top ${top_file} -trj ${trajs[@]} -fe ${file_end_name} -od ${out_dir} -dt ${dt}
fi
if [ ${compute_cofactor_prot_cmap} -eq 1 ]
then
# 1.b Compute the cofactor contribution to the contact map.
python run_cofactor_interactors.py -top ${top_file} -trj ${trajs[@]} -fe ${file_end_name} \
-od ${out_dir} -dt ${dt} -cofactor_domain_sel ${cofactor_domain_sel} -dt ${dt}
fi
if [ ${compute_interactor_node_fluctuations} -eq 1 ]
then
# 2.a Compute cofactor fluctuations.
#
# By adding the flag "-fluctuations" we will compute the node fluctuations.
# We also supply the cofactor coordinates and permutation indices computed in the previous step
# to avoid redoing this.
python run_cofactor_interactors.py -top ${top_file} -trj ${trajs[@]} \
-fe ${file_end_name} -od ${out_dir} -dt ${dt} \
-cofactor_coords ${out_dir}cofactor_interactor_coords_${file_end_name}.npy \
-cofactor_inds ${out_dir}cofactor_interactor_indices_${file_end_name}.npy \
-fluctuations
fi
if [ $compute_MI -eq 1 ]
then
# 2.b Compute the mutual information (MI) matrix (using the contact map as input).
#
# Computing MI with 4 blocks per matrix column => 4*(4-1)/2 + 4 = 10 blocks in total.
#
# The density is by default estimated with 1 to 5 number of Gaussian components.
# To change the range of allowed components, use the flag "-GMM_range ${min_comp} ${max_comp}",
# e.g. "-GMM_range 1 5" corresponds to the default setting.
for((i_block=1; i_block<=10; i_block++))
do
echo $i_block
python run_mutual_information.py -top ${top_file} -trj ${trajs[@]} -fe ${file_end_name} \
-od $out_dir_MI -dt ${dt} -n ${n_cores} -i_block $i_block -n_blocks_col 4 \
-cmap ${out_dir}distance_matrix_semi_bin_${file_end_name}.txt \
-aif ${out_dir}interactor_centroid_fluctuations_${file_end_name}.npy \
-aipc ${out_dir}cofactor_protein_residue_semi_binary_cmap_${file_end_name}.npy \
-n_splits $n_bootstraps
done
# When the contact map is used as input, we might need to compute the MI the matrix diagonal.
# This should only be done if the MI on the diagonal is used to normalize the MIs prior to the current flow analysis.
python run_mutual_information.py -top ${top_file} -trj ${trajs[@]} -od ${out_dir_MI} \
-fe ${file_end_name} -n ${n_cores} -i_block 0 -n_blocks_col 1 \
-aif ${out_dir}interactor_centroid_fluctuations_${file_end_name}.npy \
-aipc ${out_dir}cofactor_protein_residue_semi_binary_cmap_${file_end_name}.npy \
-n_splits ${n_bootstraps} -MI_map_diag -dt ${dt}
# Build a complete MI matrix from the blocks (written to compressed format)
python allopath/from_matrix_blocks.py -f ${out_dir_MI}res_res_MI_part_ \
-fe ${file_end_name} -od ${out_dir_MI} -n_blocks 10
fi
if [ $current_flow -eq 1 ]
then
# 3. Compute the current flow using the contact map and MI matrix as input.
#
# Current flow betweenness is computed by default. To compute current flow closeness,
# we need to add the flag "-CFC"
#
# Current flow analysis is averaged over subunits (ie. chains) if n_chains > 1
#
# Note: Adding the flag "-norm" and input "-f_map_diag ${out_dir_MI}diagonal_MI_${file_end_name}.npy"
# would result in symmetric uncertainty normalization of the MI (Witter & Frank, 2005).
python run_current_flow_analysis.py -f_map ${out_dir_MI}res_res_MI_compressed_${file_end_name}.npy \
-cmap ${out_dir}distance_matrix_semi_bin_${file_end_name}.txt \
-od ${out_dir} -fe ${file_end_name} \
-f_source ${source_inds} \
-f_sink ${sink_inds} \
-iaux ${aux_inds} \
-aipc ${out_dir}cofactor_protein_residue_semi_binary_cmap_${file_end_name}.npy \
-n_cores ${n_cores} -n_chains ${n_chains}
fi
if [ $project_on_structure -eq 1 ]
then
# 4. Plotting the current flow on the beta column in the .PDB (using current flow as input).
#
# Plotting the current flow on the beta-column in the PDB file. Note: It has to be .pdb
# Note: Adding the flag "-norm" would scale the current flow values between 0 and 1 in
# the generated .pdb.
python allopath/make_pdb.py -top ${top_file} -f ${out_dir}average_current_flow_${file_end_name}.npy \
-od ${out_dir}PDBs/ -fe ${file_end_name} \
-int_atom_inds ${out_dir}cofactor_interactor_atom_indices_${file_end_name}.npy \
fi