-
Notifications
You must be signed in to change notification settings - Fork 1
/
script.sh
134 lines (103 loc) · 5.21 KB
/
script.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# Project_pbfb_Edisa_Xiu
# Pipeline to analyze 16S rDNA sequences combining USEARCH and QIIME and community diversity analysis using R.
####################################################################################################################
##1
##Obtain sequences from Dini-Andreote et al. 2014 The ISME Journal (30 samples in total: 2 months * 5 stages of succession * 3 replicates)
##The sequences are already merged and quality filtered in QIIME. (Sequences files : seqs.fna) (Information of the samples in MappingMJ1.txt)
##IMPORTANT: To check the sequence files just open the head in the cluster or the terminal. The size of the files are too big and your text editor may crash.
####################################################################################################################
##2
##The original fna file has the following header: >1FMay_1 H23S4DR01B9UER orig_bc=CGTGTCTCTA new_bc=CGTGTCTCTA bc_diffs=0
##and we need to change it into: >1FMay_1
sed 's/\s.*$//' seqs.fna > seqs.fa
####################################################################################################################
##3
##Use USEARCH to pick OTU and make OTU table
#You need to have the USEARCH software installed.
#Write a script1.sh and run it in the Peregrine cluster
sbatch script1.sh
#!/bin/bash
#SBATCH --job-name=project_EX
#SBATCH --time=00:10:00
#SBATCH --nodes=1
#SBATCH --ntasks=4
#SBATCH --cpus-per-task=1
#SBATCH --mem=2G
#SBATCH --partition=short
pwd
# Size annotation and remove singletons
/data/miceco/software/usearch9.2.64_i86linux32 -fastx_uniques seqs.fa -fastaout uniques.fa -sizeout -relabel Uniq
head uniques.fasta
# Pick OTUs
/data/miceco/software/usearch9.2.64_i86linux32 -cluster_otus uniques.fa -otus otus.fa -relabel OTU -minsize 2
head otus.fa
grep -c "^>" otus.fa
# Make OTU table
/data/miceco/software/usearch9.2.64_i86linux32 -usearch_global seqs.fna -db otus.fa -strand plus -id 0.97 -otutabout otu_table.txt
less –S otu_table.txt
####################################################################################################################
##4
# Using reorder_file.py to change the usearch format in "otu_table.txt" to QIIME format in order to use it in the pick representative OTU sequences step.
##to run the python script
python reorder_file.py
##the reorder_file content:
#!/usr/bin/env python
filename = "otu_table.txt"
outfile = "otu_table_reformatted.txt"
out = file(outfile, "wb")
with open(filename, "rb") as f:
header = f.readline().split("\t")
for line in f:
splitline = line.split("\t")
out.write(splitline[0])
for i in range(1,len(splitline)):
if splitline[i] == "1":
out.write("\t" + header[i])
out.write("\n")
out.close()
#Edit the header of "otu_table_reformatted.txt" (replace "OTU" to "denovo")
cp otu_table_reformatted.txt seqs_otus.txt; sed -i -e 's/OTU/denovo/g' seqs_otus.txt
##5
#Edit the header of the output file from the pick otus step
cp otus.fa otus_edited.fa; sed -i -e 's/>OTU/>denovo/g' otus_edited.fa
####################################################################################################################
##6
#Use QIIME to assign taxonomy and make OTU table with taxonomy
#Write script 2 and run it in Peregrine cluster
sbatch script2.sh
#!/bin/bash
#SBATCH --job-name=project_EX
#SBATCH --time=00:30:00
#SBATCH --nodes=1
#SBATCH --ntasks=4
#SBATCH --cpus-per-task=1
#SBATCH --mem=2G
#SBATCH --partition=short
module load QIIME/1.9.1-foss-2016a-Python-2.7.11-tmp
module list
# Pick representative OTU sequences
pick_rep_set.py -i seqs_otus.txt -f seqs.fa -o rep.fna
# Assign_taxonomy
assign_taxonomy.py -i rep_edited.fa -m uclust -o uclust_assigned_taxonomy -r 94_otus.fasta -t 94_otu_taxonomy.txt
####### Make_otu_table with taxonomy######
make_otu_table.py -i seqs_otus.txt -t uclust_assigned_taxonomy/rep_tax_assignments.txt -o otu_table1.biom
# Remove singletons (Alternate step):
filter_otus_from_otu_table.py -i otu_table1.biom -o otu_table2.biom -n 2
# Remove Unassigned
filter_taxa_from_otu_table.py -i otu_table2.biom -n Unassigned -o otu_table.biom
# convert to txt file with taxonomy:
biom convert -i otu_table.biom -o OTUtable.from_biom_w_taxonomy.txt --header-key taxonomy --to-tsv
####################################################################################################################
##7
# Analyze microbial community diversity
# The following steps run on R version 3.3.2 (2016-10-31) using RStudio interface
# Install the libraries using Rpackages.R
# Load the OTU table (OTUtable.from_biom_w_taxonomy.txt) to R and change the OTU table format using OTUtableformat.R
# Change the header of OTUtable_nontaxon.txt using Header_mapping.csv in Microsoft Excel 2010
####################################################################################################################
##8
# Using OTUtable_nontaxon.txt to calculate alpha diversity (Richness, shannon and simpson) and
# draw the bar plot (Richness.png, Shannon.png and Simpson.png) using alpha_diversity.R
####################################################################################################################
##9
# Using OTUtable_nontaxon.txt to calculate beta diversity (bray-curtis) and make a plot (NMDS.png) using NMDS.R