-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path12.picrust_stratified.sh
79 lines (59 loc) · 2.74 KB
/
12.picrust_stratified.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#!/bin/bash
# The purpose of this script is to run the stratified module of picrust on the qiagen samples - takes a few hours total, especially the pathway abundance inference step
# Note: All files within the KEGG directory are not provided as they are not publicly accessible.
#SBATCH --time=12:00:00
#SBATCH --nodes=2
#SBATCH --mem=10gb
#SBATCH -o picrust_stratified_qiagen.txt
#SBATCH -A exd44
d1=$(date +%s)
BIOM="output/picrust_seq_tab_qiagen"
SEQ="output/picrust_seq_qiagen"
OUT_DIR="output/picrust2_qiagen_output"
KEGG_MAP="KEGG/ko_to_path"
KEGG_NAME="KEGG/ko_names"
pwd
echo "Job started"
# load conda environment
module load anaconda3
source activate picrust2
# make directory for picrust2 output
mkdir $OUT_DIR
# format sequence file
grep -v '^x' ${SEQ}.txt | awk '{print ">seq"$1"\n"$2}' > ${SEQ}.fna
# sequence placement
place_seqs.py -s ${SEQ}.fna -o ${OUT_DIR}/out.tre -p 1 --intermediate placement_working
# hidden state prediction
hsp.py -i 16S -t ${OUT_DIR}/out.tre -o ${OUT_DIR}/marker_nsti_predicted.tsv.gz -p 1 -n
hsp.py -i EC -t ${OUT_DIR}/out.tre -o ${OUT_DIR}/EC_predicted.tsv.gz -p 1
hsp.py -i KO -t ${OUT_DIR}/out.tre -o ${OUT_DIR}/KO_predicted.tsv.gz -p 1
# metagenome prediction
metagenome_pipeline.py -i ${BIOM}.biom \
-m ${OUT_DIR}/marker_nsti_predicted.tsv.gz \
-f ${OUT_DIR}/KO_predicted.tsv.gz \
-o ${OUT_DIR}/KO_metagenome_out_stratified \
--strat_out
# infer pathway abundances
pathway_pipeline.py -i ${OUT_DIR}/KO_metagenome_out_stratified/pred_metagenome_contrib.tsv.gz \
-o ${OUT_DIR}/KEGG_pathways_out_stratified \
--no_regroup \
--map ${KEGG_MAP}.tsv \
--per_sequence_contrib \
--per_sequence_abun ${OUT_DIR}/KO_metagenome_out_stratified/seqtab_norm.tsv.gz \
--per_sequence_function ${OUT_DIR}/KO_predicted.tsv.gz
# add descriptions
add_descriptions.py -i ${OUT_DIR}/KEGG_pathways_out_stratified/path_abun_unstrat.tsv \
-o ${OUT_DIR}/KEGG_pathways_out_stratified/path_abun_unstrat_descrip.tsv \
--custom_map_table ${KEGG_NAME}.tsv
add_descriptions.py -i ${OUT_DIR}/KEGG_pathways_out_stratified/path_abun_unstrat_per_seq.tsv \
-o ${OUT_DIR}/KEGG_pathways_out_stratified/path_abun_unstrat_per_seq_descrip.tsv \
--custom_map_table ${KEGG_NAME}.tsv
# unzip output contents for downstream analysis
gunzip ${OUT_DIR}/KEGG_pathways_out_stratified/*.gz
gunzip ${OUT_DIR}/KO_metagenome_out_stratified/*.gz
sleep 60
echo "Job ended"
d2=$(date +%s)
sec=$(( ( $d2 - $d1 ) ))
hour=$(echo - | awk '{ print '$sec'/3600}')
echo Runtime: $hour hours \($sec\s\)