From 203d396500357da8956fb83766287ef577d3ec4b Mon Sep 17 00:00:00 2001 From: rjstevick Date: Wed, 1 Jul 2020 21:09:14 +0200 Subject: [PATCH] add new supp figure 1 - control bar plots --- MetatranscriptomeAnalysis/README.md | 2 +- README.md | 10 +- .../{Figure3_S1.R => Figure3_S1_S2.R} | 141 ++++++++++++------ ...{Figure4_S1_S2_S4.R => Figure4_S2_S3_S5.R} | 113 ++++++-------- .../{FigureS3_lefse.R => FigureS4_lefse.R} | 10 +- .../{FigureS5-S8.R => FigureS6-S9.R} | 46 ++---- TNCpaper_ScriptsData/README.md | 8 +- 7 files changed, 176 insertions(+), 154 deletions(-) rename TNCpaper_ScriptsData/{Figure3_S1.R => Figure3_S1_S2.R} (76%) rename TNCpaper_ScriptsData/{Figure4_S1_S2_S4.R => Figure4_S2_S3_S5.R} (90%) rename TNCpaper_ScriptsData/{FigureS3_lefse.R => FigureS4_lefse.R} (98%) rename TNCpaper_ScriptsData/{FigureS5-S8.R => FigureS6-S9.R} (90%) diff --git a/MetatranscriptomeAnalysis/README.md b/MetatranscriptomeAnalysis/README.md index 42bcfee..9ec6a4a 100644 --- a/MetatranscriptomeAnalysis/README.md +++ b/MetatranscriptomeAnalysis/README.md @@ -5,7 +5,7 @@ This folder contains scripts used to perform modified SAMSA2 analysis on the met - Main modified SAMSA2 script (masterscript*.sh) - Scripts to run Rscripts on the command line (deSEQ*.sh) - Script to compute DIAMOND annotations against refSeq dbs (newannot.sh) -- Raw sequence counts and file inputs +- Raw sequence counts and file inputs (rawcounts.txt and samples_transcriptomes.txt) #### 2. R_scripts - DeSeq2 files to run states for the susbsytems based on deSEQ_groups.sh or deSEQ_subsys.sh diff --git a/README.md b/README.md index 67f1ac9..0cf9200 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ This folder contains scripts used to perform modified SAMSA2 analysis on the met - Main modified SAMSA2 script (masterscript*.sh) - Scripts to run Rscripts on the command line (deSEQ*.sh) - Script to compute DIAMOND annotations against refSeq dbs (newannot.sh) -- Raw sequence counts and file inputs +- Raw sequence counts and file inputs (rawcounts.txt and samples_transcriptomes.txt) #### 2. R_scripts - DeSeq2 files to run states for the susbsytems based on deSEQ_groups.sh or deSEQ_subsys.sh @@ -53,8 +53,8 @@ This folder contains all the processed data files from the analysis described in - Metatranscriptomic annotation taxonomy results with RefSeq #### Scripts - **Figure1_2.R** - makes the map of sites and PCA of environmental data. -- **Figure3_S1.R** - 16S rRNA alpha- and beta-diversity and rarefaction plots. -- **Figure4_S1_S2_S4.R** - compares taxonomy generated from RefSeq/metatranscriptomes and 16S rRNA data with heatmaps and UpSetR plots (4 and S2). Metatranscriptome taxonomy rarefaction and beta-diversity plots (S1 and S4). +- **Figure3_S1_S2.R** - 16S rRNA percent abundance barplots (including controls), rarefaction plots, and alpha- and beta-diversity. +- **Figure4_S2_S3_S5.R** - compares taxonomy generated from RefSeq/metatranscriptomes and 16S rRNA data with heatmaps and UpSetR plots (4 and S3). Metatranscriptome taxonomy rarefaction and beta-diversity plots (S2 and S5). - **Figure5_6.R** - Metaranscriptomic functional data at pathway levels, subset for stress response (N vs S), nitrogen and phosphorus metabolism (each site vs mean) with barplots. -- **FigureS3_lefse.R** - 16S rRNA amplicon data at order level output from LefSe (Taxonomy/TNC_summaryLefSeResults.xlsx) -- **FigureS5-S8.R** - Metaranscriptomic functional data at gene level for stress response, nitrogen metabolism, phosphorus metabolism (heatmaps) +- **FigureS4_lefse.R** - 16S rRNA amplicon data at order level output from LefSe (Taxonomy/TNC_summaryLefSeResults.xlsx) +- **FigureS6-S9.R** - Metaranscriptomic functional data at gene level for stress response, nitrogen metabolism, phosphorus metabolism (heatmaps) diff --git a/TNCpaper_ScriptsData/Figure3_S1.R b/TNCpaper_ScriptsData/Figure3_S1_S2.R similarity index 76% rename from TNCpaper_ScriptsData/Figure3_S1.R rename to TNCpaper_ScriptsData/Figure3_S1_S2.R index 8697c15..6a69943 100644 --- a/TNCpaper_ScriptsData/Figure3_S1.R +++ b/TNCpaper_ScriptsData/Figure3_S1_S2.R @@ -1,11 +1,10 @@ # Stevick et al 2020 Oyster Gut Microbiome Function in an Estuary -# 16S diversity and rarefaction plots -# Figures 3 and S1 -# updated 4/16/2020 for resubmission +# 16S controls, rarefaction, and diversity plots +# Figures 3, S1, and S2 +# RJS updated 6/30/2020 for resubmission # 16S Amplicon data at ASV level - library(tidyr) library(dplyr) library(ggplot2) @@ -18,7 +17,7 @@ library(ggbiplot) library(vegan) library(ggpubr) library(stringr) - +library(scales) #import data - ASV counts, **not normalized** data<-read_xlsx("Taxonomy/16SallData_SILVAtaxa.xlsx", sheet="ASVcounts") @@ -28,7 +27,7 @@ metadata<-read_xlsx("Taxonomy/16SallData_SILVAtaxa.xlsx", sheet="Metadata") #add in all taxa hierarchy to ASV counts datafull<-full_join(data, taxakey) #convert to long form -datafulllong<-gather(datafull, SampleID, count, "TNC01":"TNC64") +datafulllong<-gather(datafull, SampleID, count, "TNC01":"TNC64") #### @@ -45,10 +44,9 @@ SampleReadCounts<- # add read counts in to main table metadata$SampleTotalReads<-SampleReadCounts$SampleTotalReads datafulllong<- - full_join(datafulllong, SampleReadCounts, by="SampleID") -# calculate percent abundances -datafulllong$percent<- - datafulllong$count/datafulllong$SampleTotalReads + full_join(datafulllong, SampleReadCounts, by="SampleID") %>% + # calculate percent abundances + mutate(percent=count/SampleTotalReads) #add in the metadata datafulllongmeta<-full_join(datafulllong, metadata, by = "SampleID", copy=FALSE, suffix=c(".x",".y")) @@ -56,19 +54,19 @@ datafulllongmeta<-full_join(datafulllong, metadata, by = "SampleID", #select only gut, water metadatags<-filter(metadata, SampleType=="gut" | SampleType=="water") -# Number of reads per sample +# Number of QCd sequencing reads per sample ggplot(metadatags,aes(SampleName,SampleTotalReads,fill=Station))+geom_bar(stat="identity")+ facet_grid(.~SampleType+Station, scales="free",space="free")+ theme(legend.position = "none", axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.ticks.length = unit(0.6, 'lines'))+ labs(y="Number of reads \nper sample",x=NULL)+ - scale_fill_manual(values=c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc"), + scale_fill_manual(values=c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc"), labels=c("1. Providence River", "2. Greenwich Bay", "3. Bissel Cove", "4. Narrow River", "5. Ninigret Pond")) #### -### 16S Rarefaction curve and coverage - Figure S1 ---------------------------------------------- +### 16S Rarefaction curve and coverage - Figure S2 ---------------------------------------------- #### datamat<-data[1:61] # remove controls @@ -78,7 +76,6 @@ raremax <- min(rowSums(datamatt)) S <- specnumber(datamatt) # observed number of ASVs Srare <- rarefy(datamatt, raremax) - #define colors based on gut/site or water colors<-c("#253494","#253494","#253494","#253494","#253494","#253494","#253494","#253494","#253494","#253494", "#0868ac","#0868ac","#0868ac","#0868ac","#0868ac","#0868ac","#0868ac","#0868ac","#0868ac","#0868ac", @@ -86,10 +83,9 @@ colors<-c("#253494","#253494","#253494","#253494","#253494","#253494","#253494", "#7bccc4","#7bccc4","#7bccc4","#7bccc4","#7bccc4","#7bccc4","#7bccc4","#7bccc4","#7bccc4","#7bccc4", "#bae4bc","#bae4bc","#bae4bc","#bae4bc","#bae4bc","#bae4bc","#bae4bc","#bae4bc","#bae4bc","#bae4bc", "darkred","darkred","darkred","darkred","darkred","darkred","darkred","darkred","darkred","darkred") -#put plotting parameters into pars +#put plotting colors into pars pars <- expand.grid(col = colors, stringsAsFactors = FALSE) - #plot number of ASVs vs. rarefied #550x400 plot(S, Srare, pch=21, cex=2, col="black", bg=colors, @@ -102,11 +98,11 @@ legend(700,250, c("1.PVD gut","2.GB gut","3.BIS gut","4.NAR gut","5.NIN gut", "A #generate rarefaction curves #NOTE: THIS STEP TAKES A LITTLE WHILE TO RUN. -outw <- with(pars, rarecurve(datamatt, step = 20, +outw <- with(pars, rarecurve(datamatt, step = 20, sample = raremax, col = colors, label = FALSE)) #determine limits of the graph Smax <- sapply(outw, max) -#plot the rarefaction curves +#plot the rarefaction curves - Figure S2A #550x400 plot(c(1, 80000), c(1, max(Smax)), xlab = "Number of Reads", ylab = "Observed Number of ASVs", type = "n") @@ -119,12 +115,14 @@ for (i in seq_along(outw)) { # check slopes of curves at the end, make sure they're ~0 end<-raremax-1 slopes<-rareslope(datamatt, end) +# Figure S2B plot(slopes, type="p", pch=21, bg = colors, cex=2, xlab = "16S Amplicon Samples", ylim=c(0,0.05)) # coverage proxy: coverage<-100-100*rareslope(datamatt, end) +# Figure S2C plot(coverage, type="p", pch=21, bg = colors, - cex=2, xlab = "16S Amplicon Samples", + cex=2, xlab = "16S Amplicon Samples", ylab="Coverage (%)", ylim=c(95,100)) mean(coverage) @@ -140,7 +138,7 @@ sd(coverage) # remove chloroplast reads datafulllong <- datafulllong %>% - filter(str_detect(Taxon, "Chloroplast", negate=TRUE)) + filter(str_detect(Taxon, "Chloroplast", negate=TRUE)) # calculate new number of reads per sample SampleReadCounts<- @@ -150,10 +148,10 @@ SampleReadCounts<- # add read counts in to main table metadata$SampleTotalReads<-SampleReadCounts$SampleTotalReads datafulllong<- - full_join(datafulllong, SampleReadCounts, by="SampleID") -# calculate percent abundances -datafulllong$percent<- - datafulllong$count/datafulllong$SampleTotalReads + full_join(datafulllong, SampleReadCounts, by="SampleID") %>% + # calculate percent abundances + mutate(percent=count/SampleTotalReads) + #add in the metadata datafulllongmeta<-full_join(datafulllong, metadata, by = "SampleID", copy=FALSE, suffix=c(".x",".y")) @@ -161,6 +159,69 @@ datafulllongmeta<-full_join(datafulllong, metadata, by = "SampleID", dataASVmetagw<-filter(datafulllongmeta, SampleType=="gut" | SampleType=="water") +#### +### Bar plots with Negative & Positive Controls - Figure S1 ----------------------------------------------------- +#### + +# sum ASV percentages by phylum +phylumperc<- datafulllongmeta %>% + group_by(Phylum,SampleID, SampleName, SampleType, Station, TypeStationGroup) %>% + dplyr::summarise(physum=sum(percent)) + +# show only the top 10 phyla, put the rest in "Other" +phylumperc$PhylumOther<-forcats::fct_lump(f=phylumperc$Phylum, w=phylumperc$physum, other_level="Others", n=10) + +# define palette +palette<-c("#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C", "#FB9A99", "#E31A1C", + "#FDBF6F", "#FF7F00", "#CAB2D6","gray48","gray30") + +barp<-ggplot(phylumperc, + (aes(x=SampleName, y=physum, + fill=factor(PhylumOther, levels=c("Actinobacteria", "Bacteroidetes","Cyanobacteria","Firmicutes","Fusobacteria","Planctomycetes","Proteobacteria", "Tenericutes","Verrucomicrobia","Unknown","Others")))))+ + facet_grid(.~TypeStationGroup, scales="free", space="free")+ + geom_col(position="fill", alpha=0.8)+theme_minimal()+ + theme(legend.text = element_text(size=12, colour="gray20"), + legend.position = "right", + axis.text.x = element_text(angle=60, hjust=1, vjust=1), + axis.ticks = element_line(inherit.blank=FALSE, color="grey30"))+ + facet_grid(.~SampleType+Station, scales="free",space="free")+ + scale_fill_manual(values=c(palette))+ + labs(y="Percent abundance",x=NULL,fill="Phylum")+ + scale_y_continuous(labels = scales::percent, expand=c(0,0)) + + +# control samples ASV percentages +controlASVperc<- datafulllongmeta %>% + filter(SampleType=="control") %>% + group_by(ASVID, Taxon,SampleID, SampleName, SampleType, Station, TypeStationGroup) %>% + dplyr::summarise(Taxonsum=sum(percent)) %>% + mutate(TaxonOther=forcats::fct_lump(f=Taxon, w=Taxonsum, other_level="Others", n=20)) + +palette2<-c("#7c342b","#7a3261","#d24359","#d48c3b","#538636","#47bd8c","#c64ecc","#48477d", + "#84ac84","#c58bb9","#633dbb","#375731","#d74d2a","#b4aa44","#d44a91","#56a1be", + "#7a81d7","#7e622e","#d28978","#6dc041",'gray30') + +# plot controls at ASV level +ctrlp<-ggplot(dplyr::arrange(controlASVperc,TaxonOther), (aes(x=SampleName, y=Taxonsum, fill=TaxonOther)))+ + geom_col(position="fill", alpha=0.8)+theme_minimal()+ + coord_flip()+ + theme(legend.text = element_text(size=9, colour="gray20"), + legend.position = "bottom", + axis.ticks = element_line(inherit.blank=FALSE, color="grey30"))+ + scale_fill_manual(values=c(palette2))+ + labs(y="Percent abundance",x=NULL,fill=NULL)+ + scale_y_continuous(labels = scales::label_percent(accuracy=1), breaks=breaks_pretty(n=10), expand=c(0,0))+ + guides(fill = guide_legend(ncol = 2)) + + +# Figure S1 +cowplot::plot_grid(barp, ctrlp, nrow=2, rel_heights = c(60,40), labels=c("A","B")) +ggsave("FigureS1.png", width = 15, height = 10, dpi=400) +ggsave("FigureS1.pdf", width = 15, height = 10) +ggsave("FigureS1.svg", width = 15, height = 10) + + + #### @@ -170,7 +231,7 @@ dataASVmetagw<-filter(datafulllongmeta, SampleType=="gut" | SampleType=="water") #convert normalized data (with chloros removed) back to wide dataASVwide<-select(dataASVmetagw, SampleID, ASVID, percent) dataASVtable<-spread(dataASVwide, ASVID, percent) %>% - tibble::column_to_rownames("SampleID") + tibble::column_to_rownames("SampleID") # calculate diversity diversitytotal<-diversity(dataASVtable, index="simpson") @@ -187,11 +248,10 @@ dataag_water<-filter(metadatags, SampleType=="water") divplot<-ggplot(metadatags,aes(x=Station,y=Simpsons, fill=Station))+ geom_boxplot()+ geom_point(size=4, shape=23)+ - facet_grid(~SampleType, labeller=as_labeller(c(gut="Gut samples (n=10)", water="Water samples (n=2)"))) + + facet_grid(~SampleType, labeller=as_labeller(c(gut="Gut samples (n=10)", water="Water samples (n=2)"))) + labs(x=NULL,y="Simpson's Index of Diversity",fill="Site")+ scale_color_manual(values=c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc")) + - scale_fill_manual(values=c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc")) + - # labels=c("1. Providence River", "2. Greenwich Bay", "3. Bissel Cove", "4. Narrow River", "5. Ninigret Pond"))+ + scale_fill_manual(values=c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc")) + theme_bw()+scale_y_continuous(limits=c(0,1.0))+ theme(legend.position = "none", strip.background = element_rect("grey90")) @@ -201,9 +261,9 @@ divlegend<-ggplot(metadatags,aes(x=Station,y=Simpsons, fill=Station))+ geom_point() + facet_grid(~SampleType) + theme_bw()+ labs(x=NULL,y="Simpson's Index of Diversity",fill="Site")+ scale_color_manual(values=c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc")) + - scale_fill_manual(values=c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc")) + + scale_fill_manual(values=c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc")) + theme(legend.position = c(.86,.33), - legend.text = element_text(size=14, colour="gray20"), + legend.text = element_text(size=14, colour="gray20"), strip.background = element_rect("grey90"), legend.background = element_rect(color="grey40")) @@ -243,15 +303,13 @@ compare_means(data=metadatags, Simpsons~Station, group.by="SampleType", # ellipse function theme_set(theme_bw()) -veganCovEllipse<-function (cov, center = c(0, 0), scale = 1, npoints = 100) +veganCovEllipse<-function (cov, center = c(0, 0), scale = 1, npoints = 100) { theta <- (0:npoints) * 2 * pi/npoints Circle <- cbind(cos(theta), sin(theta)) t(center + scale * t(Circle %*% chol(cov))) } - - # Get spread of gut points based on Station abund_table<-dataASVtable[c(1:50),] meta_table<-dataag_gut @@ -274,11 +332,11 @@ for(g in levels(NMDS$Station)){ head(df_ell) NMDS.mean=aggregate(NMDS[,1:2],list(group=NMDS$Station),mean) #Now do the actual plotting -gutNMDS<-ggplot(data=NMDS,aes(x,y,colour=Station,fill=Station))+theme_bw() + - geom_path(data=df_ell, aes(x=NMDS1, y=NMDS2, lty=Station), size=1) + +gutNMDS<-ggplot(data=NMDS,aes(x,y,colour=Station,fill=Station))+theme_bw() + + geom_path(data=df_ell, aes(x=NMDS1, y=NMDS2, lty=Station), size=1) + geom_point(size=4, alpha=0.9,aes(shape=Station))+scale_shape_manual(values = c(21,22,23,24,25))+ - annotate("text",x=NMDS.mean$x,y=NMDS.mean$y,label=NMDS.mean$group,size=5, color="gray40") + - # annotate("text",x=NMDS$x+0.2,y=NMDS$y+0.05,label=paste(NMDS$Station,NMDS$OysterNum),size=4, color="gray40") + # to determine which point is which + annotate("text",x=NMDS.mean$x,y=NMDS.mean$y,label=NMDS.mean$group,size=5, color="gray40") + + # annotate("text",x=NMDS$x+0.2,y=NMDS$y+0.05,label=paste(NMDS$Station,NMDS$OysterNum),size=4, color="gray40") + # to determine which point is which scale_fill_manual(values=c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc"))+ scale_colour_manual(values=c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc"))+ scale_linetype_manual(values=c("solid","dotted","twodash","longdash", "solid"), labels=c("1. Providence River", "2. Greenwich Bay", "3. Bissel Cove", "4. Narrow River", "5. Ninigret Pond"))+ @@ -288,7 +346,6 @@ gutNMDS<-ggplot(data=NMDS,aes(x,y,colour=Station,fill=Station))+theme_bw() + adonis2(abund_table~Station, data=NMDS, by=NULL,method="bray", k=2) - # Get spread of points based on Type abund_table<-dataASVtable[c(1:60),] meta_table<-metadatags @@ -313,10 +370,10 @@ head(df_ell) NMDS.mean=aggregate(NMDS[,1:2],list(group=NMDS$Type),mean) head(NMDS.mean) #Now do the actual plotting -typeNMDS<-ggplot(data=NMDS,aes(x,y,colour=Type,fill=Type))+theme_bw() + - geom_path(data=df_ell, aes(x=NMDS1, y=NMDS2, lty=Type), size=1) + +typeNMDS<-ggplot(data=NMDS,aes(x,y,colour=Type,fill=Type))+theme_bw() + + geom_path(data=df_ell, aes(x=NMDS1, y=NMDS2, lty=Type), size=1) + geom_point(size=4, alpha=0.9,aes(shape=Station))+ - annotate("text",x=NMDS.mean$x,y=NMDS.mean$y,label=NMDS.mean$group,size=6, color="gray10") + + annotate("text",x=NMDS.mean$x,y=NMDS.mean$y,label=NMDS.mean$group,size=6, color="gray10") + scale_fill_manual(values=c("orange","darkred"), labels=c("Gut","Water"))+ scale_colour_manual(values=c("orange","darkred"), labels=c("Gut","Water"))+ scale_linetype_manual(values=c("solid","twodash"), labels=c("Gut","Water"))+ @@ -341,7 +398,7 @@ nmds<-cowplot::plot_grid(gutNMDS+theme(legend.position = "none"), typeNMDS+theme(legend.position = "none"), align="hv", axis="t",nrow=1) -plots<-cowplot::plot_grid(divplot+theme(legend.position = "none"),nmds, nrow=2, +plots<-cowplot::plot_grid(divplot+theme(legend.position = "none"),nmds, nrow=2, align="v", axis="l",rel_heights = c(40,60)) legends<-cowplot::plot_grid(divleg,typeleg,gutleg, ncol = 1,rel_heights = c(50,20,50)) diff --git a/TNCpaper_ScriptsData/Figure4_S1_S2_S4.R b/TNCpaper_ScriptsData/Figure4_S2_S3_S5.R similarity index 90% rename from TNCpaper_ScriptsData/Figure4_S1_S2_S4.R rename to TNCpaper_ScriptsData/Figure4_S2_S3_S5.R index 9122d66..8b1df66 100644 --- a/TNCpaper_ScriptsData/Figure4_S1_S2_S4.R +++ b/TNCpaper_ScriptsData/Figure4_S2_S3_S5.R @@ -1,12 +1,11 @@ # Stevick et al 2020 Oyster Gut Microbiome Function in an Estuary # Compare taxonomy generated from RefSeq/metatranscriptomes and 16S data -# Figures 4, S1 S2, and S4 in the publication +# Figures 4, S2, S3, and S5 in the publication # updated 4/16/2020 for resubmission # Metaranscriptomic taxonomy data # 16S Amplicon data at order level - library(ggplot2) library(dplyr) library(tidyr) @@ -15,7 +14,6 @@ library(ggpubr) library(readxl) library(vegan) - data<-read_xlsx("Taxonomy/SAMSA_metatranscriptomes_RefSeqtaxa_edit.xlsx", sheet=2) taxakey<- read_xlsx("Taxonomy/SAMSA_metatranscriptomes_RefSeqtaxa_edit.xlsx", sheet=3) data$Sample<-as.character(data$Sample) @@ -40,7 +38,7 @@ condenseddataord<-datatax %>% select(Sample, Order, Percent, SampleType) %>% dataordtab<-spread(condenseddataord, Order, PercentSum) %>% column_to_rownames("Sample") - +# make a ginormous palette palette<-c(brewer.pal(12, "Set3"),brewer.pal(8, "Set2"),brewer.pal(12, "Set3"), brewer.pal(12, "Set3"),brewer.pal(8, "Set2"),brewer.pal(12, "Set3"), brewer.pal(12, "Set3"),brewer.pal(8, "Set2"),brewer.pal(12, "Set3"), @@ -138,15 +136,15 @@ alldatatax_norm<-ddply(alldatatax,.(Sample),transform,rescale=sqrt(Percent)) # remove lowest abundance taxa alldatatax_normg<- alldatatax_norm %>% dplyr::group_by(Order) # group taxsums<-dplyr::summarise(alldatatax_normg, sums=sum(Percent)) # calculate sums -taxsums<-taxsums[order(-taxsums$sums),] # reorder +taxsums<-taxsums[order(-taxsums$sums),] # reorder toptax<-taxsums[1:30,] # extract top 30 Orders topdatatax_norm <- alldatatax_norm[alldatatax_norm$Order %in% toptax$Order,] ## FIGURE 4B ############################################## # 1030x550 -ggplot(topdatatax_norm, aes(Sample, Order_Name)) + - geom_tile(aes(fill = rescale),colour = "white") + ggpubr::theme_transparent() + +ggplot(topdatatax_norm, aes(Sample, Order_Name)) + + geom_tile(aes(fill = rescale),colour = "white") + ggpubr::theme_transparent() + facet_grid(factor(Phylum, levels=c("Actinobacteria","Bacteroidetes","Cyanobacteria","Firmicutes","Fusobacteria", "Proteobacteria","Tenericutes","Verrucomicrobia","Unknown"))~ factor(SampleType, levels = c("water","gut"))+Method+Station, @@ -154,7 +152,7 @@ ggplot(topdatatax_norm, aes(Sample, Order_Name)) + scale_fill_gradientn(na.value = "salmon",labels = scales::percent,limits=c(0,1), colours=c("white","#fecc5c","#fd8d3c","#f03b20","#bd0026","darkred"))+ scale_x_discrete(expand = c(0, 0)) + - scale_y_discrete(expand = c(0, 0)) + + scale_y_discrete(expand = c(0, 0)) + theme(legend.position = "bottom",axis.ticks = element_blank(), axis.text.y = element_text(size=10, colour="grey40"), axis.text.x = element_blank(), @@ -195,10 +193,10 @@ guttrans<-guttrans[2:68] # simple venn diagrams venn(list(gut16S=gut16S, water16S=water16S)) -siteshey<-venn(list("1.PVD Gut 16S"=gut16S1, - "2.GB Gut 16S"=gut16S2, - "3.BIS Gut 16S"=gut16S3, - "4.NAR Gut 16S"=gut16S4, +siteshey<-venn(list("1.PVD Gut 16S"=gut16S1, + "2.GB Gut 16S"=gut16S2, + "3.BIS Gut 16S"=gut16S3, + "4.NAR Gut 16S"=gut16S4, "5.NIN Gut 16S"=gut16S5)) hey<- venn(list("gut 16S"=gut16S, "water 16S"=water16S, "gut metatranscriptome"=guttrans)) summaryvenn<-attr(hey, "intersections") @@ -206,7 +204,7 @@ summaryvennsites<-attr(siteshey, "intersections") -## UPSETR -------------------- +## UPSETR plots -------------------- library(UpSetR) library(reshape2) @@ -233,34 +231,31 @@ UpSet(t(m), set_order = order(c("water16S","gutRNA","gut16S")), ## FIGURE 4A ############################################## #1000x155 -UpSet(m, set_order = order(c("water16S","gutRNA","gut16S")), +UpSet(m, set_order = order(c("water16S","gutRNA","gut16S")), pt_size = unit(.35, "cm"),lwd=3, - left_annotation = rowAnnotation(" " = anno_barplot(set_size(m), bar_width=0.7, + left_annotation = rowAnnotation(" " = anno_barplot(set_size(m), bar_width=0.7, axis_param = list(direction = "reverse", side = "top",labels_rot = 0), border = FALSE, annotation_name_side = "top", - gp = gpar(fill = "black"), - width = unit(4, "cm"))), + gp = gpar(fill = "black"), + width = unit(4, "cm"))), right_annotation = NULL,row_names_side = "left", top_annotation = upset_top_annotation(m,bar_width = 0.9)) - - - # -------------------------------------------------------- -read_sets = list("1.PVD Gut 16S"=gut16S1, - "2.GB Gut 16S"=gut16S2, - "3.BIS Gut 16S"=gut16S3, - "4.NAR Gut 16S"=gut16S4, +read_sets = list("1.PVD Gut 16S"=gut16S1, + "2.GB Gut 16S"=gut16S2, + "3.BIS Gut 16S"=gut16S3, + "4.NAR Gut 16S"=gut16S4, "5.NIN Gut 16S"=gut16S5, "All water 16S"=water16S) mgutw = make_comb_mat(read_sets) -## FIGURE S2 ############################################## - +## FIGURE S3 ############################################## #900x400 + UpSet(mgutw,comb_col = c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc","grey40", "black","black","black","grey40","black","black", "black","grey40","black","black","grey40","black", @@ -268,41 +263,33 @@ UpSet(mgutw,comb_col = c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc","grey "grey40","black","grey40","black","black","grey40", "black","grey40","black","grey40","grey40","black", "black","grey40","black","grey40","black","grey40", - "grey40","black","grey40", "black", "grey40", "grey40", + "grey40","black","grey40", "black", "grey40", "grey40", "grey40", "grey40", "grey40", "grey40"), set_order = order(c("2.GB Gut 16S","3.BIS Gut 16S", - "4.NAR Gut 16S", "5.NIN Gut 16S","All water 16S","1.PVD Gut 16S")), + "4.NAR Gut 16S", "5.NIN Gut 16S","All water 16S","1.PVD Gut 16S")), pt_size = unit(.5, "cm"),lwd=3, - right_annotation = rowAnnotation(" " = anno_barplot(set_size(mgutw), bar_width=0.7, + right_annotation = rowAnnotation(" " = anno_barplot(set_size(mgutw), bar_width=0.7, axis_param = list(side = "top",labels_rot = 0), border = FALSE, annotation_name_side = "top", - gp = gpar(fill = c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc","grey40")), - width = unit(4, "cm"))), + gp = gpar(fill = c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc","grey40")), + width = unit(4, "cm"))), row_names_side = "left", top_annotation = upset_top_annotation(mgutw,bar_width = 0.9, height = unit(6, "cm"))) - - - - - - - - ### NMDS of Metatranscriptome taxonomy ----------------- ################################################################################### -Site<-c("1.PVD","1.PVD","1.PVD","1.PVD","1.PVD", +Site<-c("1.PVD","1.PVD","1.PVD","1.PVD","1.PVD", "2.GB","2.GB","2.GB","2.GB","2.GB", "3.BIS","3.BIS","3.BIS","3.BIS","3.BIS", - "4.NAR","4.NAR","4.NAR","4.NAR","4.NAR", + "4.NAR","4.NAR","4.NAR","4.NAR","4.NAR", "5.NIN","5.NIN","5.NIN","5.NIN","5.NIN") library(vegan) theme_set(theme_bw()) -veganCovEllipse<-function (cov, center = c(0, 0), scale = 1, npoints = 100) +veganCovEllipse<-function (cov, center = c(0, 0), scale = 1, npoints = 100) { theta <- (0:npoints) * 2 * pi/npoints Circle <- cbind(cos(theta), sin(theta)) @@ -336,16 +323,16 @@ for(g in levels(NMDS$Site)){ head(df_ell) NMDS.mean=aggregate(NMDS[,1:2],list(group=NMDS$Site),mean) -## FIGURE S4A ############################################## -metatranssp<- - ggplot(data=NMDS,aes(x,y,colour=Site,fill=Site))+theme_bw() + - geom_path(data=df_ell, aes(x=NMDS1, y=NMDS2, lty=Site), size=1) + +## FIGURE S5A ############################################## +metatranssp<- + ggplot(data=NMDS,aes(x,y,colour=Site,fill=Site))+theme_bw() + + geom_path(data=df_ell, aes(x=NMDS1, y=NMDS2, lty=Site), size=1) + geom_point(size=4, alpha=0.9,aes(shape=Site))+scale_shape_manual(values = c(21,22,23,24,25))+ - annotate("text",x=NMDS.mean$x,y=NMDS.mean$y,label=NMDS.mean$group,size=5, color="gray40") + + annotate("text",x=NMDS.mean$x,y=NMDS.mean$y,label=NMDS.mean$group,size=5, color="gray40") + scale_fill_manual(values=c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc"))+ scale_colour_manual(values=c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc"))+ scale_linetype_manual(values=c("solid","dotted","twodash","longdash", "solid"), labels=c("1. Providence River", "2. Greenwich Bay", "3. Bissel Cove", "4. Narrow River", "5. Ninigret Pond"))+ - theme(legend.text = element_text(size=14, colour="gray20"), + theme(legend.text = element_text(size=14, colour="gray20"), legend.position = "none", legend.title = element_blank(),legend.box="horizontal") + ggtitle("Species level annotation") @@ -380,16 +367,16 @@ for(g in levels(NMDS$Site)){ head(df_ell) NMDS.mean=aggregate(NMDS[,1:2],list(group=NMDS$Site),mean) -## FIGURE S4B ############################################## -metatransord<- - ggplot(data=NMDS,aes(x,y,colour=Site,fill=Site))+theme_bw() + - geom_path(data=df_ell, aes(x=NMDS1, y=NMDS2, lty=Site), size=1) + +## FIGURE S5B ############################################## +metatransord<- + ggplot(data=NMDS,aes(x,y,colour=Site,fill=Site))+theme_bw() + + geom_path(data=df_ell, aes(x=NMDS1, y=NMDS2, lty=Site), size=1) + geom_point(size=4, alpha=0.9,aes(shape=Site))+scale_shape_manual(values = c(21,22,23,24,25))+ - annotate("text",x=NMDS.mean$x,y=NMDS.mean$y,label=NMDS.mean$group,size=5, color="gray40") + + annotate("text",x=NMDS.mean$x,y=NMDS.mean$y,label=NMDS.mean$group,size=5, color="gray40") + scale_fill_manual(values=c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc"))+ scale_colour_manual(values=c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc"))+ scale_linetype_manual(values=c("solid","dotted","twodash","longdash", "solid"), labels=c("1. Providence River", "2. Greenwich Bay", "3. Bissel Cove", "4. Narrow River", "5. Ninigret Pond"))+ - theme(legend.text = element_text(size=14, colour="gray20"), + theme(legend.text = element_text(size=14, colour="gray20"), legend.position = "right", legend.title = element_blank(),legend.box="horizontal")+ ggtitle("Order level annotation") @@ -397,19 +384,14 @@ metatransord<- adonis2(abund_tableord~Site, data=NMDS, by=NULL,method="bray", k=2) +## FIGURE S5 TOTAL ############################################## #1000x800 cowplot::plot_grid(metatranssp, metatransord+theme(legend.position="none"), get_legend(metatransord)) - - - - - - #### -### Metatranscriptome Rarefaction curve - Figure S1 ---------------------------------------------- +### Metatranscriptome Rarefaction curve - Figure S2 ---------------------------------------------- #### # Load in the raw annotation data @@ -448,11 +430,11 @@ legend(700,250, c("1.PVD gut","2.GB gut","3.BIS gut","4.NAR gut","5.NIN gut", "A #generate rarefaction curves #NOTE: THIS STEP TAKES A WHILE TO RUN. # Step size has been increased so it doesn't take a day or so -outw <- with(pars, rarecurve(datamatt, step = 800, +outw <- with(pars, rarecurve(datamatt, step = 800, sample = raremax, col = colors, label = FALSE)) #determine limits of the graph Smax <- sapply(outw, max) -#plot the rarefaction curves +#plot the rarefaction curves - Figure S2A #550x400 plot(c(1, 1103294), c(1, max(Smax)), xlab = "Number of Reads", ylab = "Observed Number of Annotated Species", type = "n") @@ -465,14 +447,15 @@ for (i in seq_along(outw)) { # check slopes of curves at the end, make sure they're ~0 end<-raremax-1 slopes<-rareslope(datamatt, end) +# Figure S2B plot(slopes, type="p", pch=23, bg = colors, cex=2, xlab = "Metatranscriptome Samples", ylim=c(0,0.05)) # coverage proxy: coverage<-100-100*rareslope(datamatt, end) +# Figure S2C plot(coverage, type="p", pch=23, bg = colors, - cex=2, xlab = "Metatranscriptome Samples", + cex=2, xlab = "Metatranscriptome Samples", ylab="Coverage (%)", ylim=c(95,100)) mean(coverage) sd(coverage) - \ No newline at end of file diff --git a/TNCpaper_ScriptsData/FigureS3_lefse.R b/TNCpaper_ScriptsData/FigureS4_lefse.R similarity index 98% rename from TNCpaper_ScriptsData/FigureS3_lefse.R rename to TNCpaper_ScriptsData/FigureS4_lefse.R index 10e7e8f..61ab19f 100644 --- a/TNCpaper_ScriptsData/FigureS3_lefse.R +++ b/TNCpaper_ScriptsData/FigureS4_lefse.R @@ -1,6 +1,6 @@ # Stevick et al 2020 Oyster Gut Microbiome Function in an Estuary # 16S abundances using LefSe -# Figure S3 +# Figure S4 # updated 4/16/2020 for resubmission # 16S Amplicon data at order level analyzed with LefSe @@ -41,16 +41,18 @@ splot<- datasites %>% scale_fill_manual(values=c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc")) -# Plot together +# Plot together - FIGURE S4 cowplot::plot_grid(tplot,splot, ncol=2, align="hv", axis="tbr") #1300x800 + + # Just top 30 taxa -------------------- # show only if they are significantly different # get this variable from script for Figure 4 -toptax +toptax # Extact data for orders included in toptax topdatatype <- datatype[datatype$Order %in% toptax$Order,] @@ -78,5 +80,3 @@ splot<- topdatasites %>% labs(y="LDA Score (log 10)",x=NULL)+ ggtitle("Sites")+ scale_fill_manual(values=c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc")) - - diff --git a/TNCpaper_ScriptsData/FigureS5-S8.R b/TNCpaper_ScriptsData/FigureS6-S9.R similarity index 90% rename from TNCpaper_ScriptsData/FigureS5-S8.R rename to TNCpaper_ScriptsData/FigureS6-S9.R index daaf349..722c912 100644 --- a/TNCpaper_ScriptsData/FigureS5-S8.R +++ b/TNCpaper_ScriptsData/FigureS6-S9.R @@ -1,20 +1,19 @@ # Stevick et al 2020 Oyster Gut Microbiome Function in an Estuary # Script to analyze gene-level functional expression from SAMSA -# Figures S5, S6, S7, S8 +# Figures S6, S7, S8, S9 # updated 4/16/2020 for resubmission # Metaranscriptomic functional data at gene level - library("ggplot2") library("tidyr") library("dplyr") library("readxl") library("gridExtra") library("leaflet") -library(RColorBrewer) -library(viridis) -library(pheatmap) +library("RColorBrewer") +library("viridis") +library("pheatmap") data<-read_excel("Function/DESeq_subsys_results_level4_summary.xlsx", sheet=1) hierarchy<-read_excel("Function/DESeq_subsys_results_level4_summary.xlsx", sheet=2) @@ -25,7 +24,6 @@ data$padj_BIS<-as.numeric(data$padj_BIS) data$padj_NAR<-as.numeric(data$padj_NAR) data$padj_NIN<-as.numeric(data$padj_NIN) - datah<-merge(data, hierarchy) # NITROGEN ------------------------------------ @@ -39,11 +37,11 @@ datahdenitgp<-gather(datahdenit, padj, padjvalue, "padj_PVD","padj_GB","padj_BIS datahdenitg$padj<-datahdenitgp$padj datahdenitg$padjvalue<-datahdenitgp$padjvalue -datahdenitg$signif<-ifelse(datahdenitgp$padjvalue <=0.01, "**", +datahdenitg$signif<-ifelse(datahdenitgp$padjvalue <=0.01, "**", ifelse(datahdenitgp$padjvalue <=0.05, "*", ifelse("ns"))) -datahdenitg$foldchange = factor(datahdenitg$foldchange, +datahdenitg$foldchange = factor(datahdenitg$foldchange, levels = c("log2FoldChange_PVD", "log2FoldChange_GB","log2FoldChange_BIS", "log2FoldChange_NAR","log2FoldChange_NIN")) @@ -55,7 +53,7 @@ ggplot(datahdenitg, aes(Level4, foldvalue,fill=foldchange))+ scale_fill_manual(values=c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc"))+ geom_text(aes(label=signif), position = position_dodge(width = 1), size=6) -datahdenitg$Level3f<-factor(datahdenitg$Level3, +datahdenitg$Level3f<-factor(datahdenitg$Level3, levels=c("Nitric_oxide_synthase","Allantoin_Utilization", "Amidase_clustered_with_urea_and_nitrile_hydratase_functions", "Denitrification", @@ -66,7 +64,7 @@ datahdenitg$Level3f<-factor(datahdenitg$Level3, -ggplot(datahdenitg, aes(Level4, foldchange, label=signif, +ggplot(datahdenitg, aes(Level4, foldchange, label=signif, fill=foldvalue,color=signif))+theme_minimal()+ geom_tile(size=0.7)+facet_grid(Level3f~., scales="free", space="free")+ scale_fill_viridis()+coord_flip()+ @@ -80,7 +78,6 @@ ggplot(datahdenitg, aes(Level4, foldchange, label=signif, ### PHOSPHOROUS ----------------------- - datahphos<-filter(datah, Level2=="Phosphorus Metabolism") datahphosg<-gather(datahphos, foldchange, foldvalue, "log2FoldChange_PVD", "log2FoldChange_GB","log2FoldChange_BIS", @@ -90,11 +87,11 @@ datahphosgp<-gather(datahphos, padj, padjvalue, "padj_PVD","padj_GB","padj_BIS", datahphosg$padj<-datahphosgp$padj datahphosg$padjvalue<-datahphosgp$padjvalue -datahphosg$signif<-ifelse(datahphosgp$padjvalue <=0.01, "**", +datahphosg$signif<-ifelse(datahphosgp$padjvalue <=0.01, "**", ifelse(datahphosgp$padjvalue <=0.05, "*", ifelse("ns"))) -datahphosg$foldchange = factor(datahphosg$foldchange, +datahphosg$foldchange = factor(datahphosg$foldchange, levels = c("log2FoldChange_PVD", "log2FoldChange_GB","log2FoldChange_BIS", "log2FoldChange_NAR","log2FoldChange_NIN")) @@ -102,7 +99,6 @@ datahphosg$foldchange = factor(datahphosg$foldchange, ggplot(datahphosg, aes(Level4, foldvalue,fill=foldchange))+ geom_col(position = "dodge")+coord_flip()+ facet_grid(Level3~., scales="free",space="free")+ -# theme(strip.text.y = element_text(angle=0))+ scale_fill_manual(values=c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc"))+ geom_text(aes(label=signif), position = position_dodge(width = 1), size=6) @@ -123,8 +119,7 @@ ggplot(datahphosg,aes(Level4, foldchange, label=signif, ### STRESS RESPONSE ----------------------- - -datahstress<-filter(datah,Level2=="Osmotic stress" | Level2=="Periplasmic Stress" | +datahstress<-filter(datah,Level2=="Osmotic stress" | Level2=="Periplasmic Stress" | Level2=="Oxidative stress") datahstressg<-gather(datahstress, foldchange, foldvalue, "log2FoldChange_PVD", "log2FoldChange_GB","log2FoldChange_BIS", @@ -134,33 +129,28 @@ datahstressgp<-gather(datahstress, padj, padjvalue, "padj_PVD","padj_GB","padj_B datahstressg$padj<-datahstressgp$padj datahstressg$padjvalue<-datahstressgp$padjvalue -datahstressg$signif<-ifelse(datahstressgp$padjvalue <=0.01, "**", +datahstressg$signif<-ifelse(datahstressgp$padjvalue <=0.01, "**", ifelse(datahstressgp$padjvalue <=0.05, "*", ifelse("ns"))) -datahstressg$foldchange = factor(datahstressg$foldchange, +datahstressg$foldchange = factor(datahstressg$foldchange, levels = c("log2FoldChange_PVD", "log2FoldChange_GB","log2FoldChange_BIS", "log2FoldChange_NAR","log2FoldChange_NIN")) - data3gpfpe<-gather(datahstress, folderror, errorvalue, "lfcSE_PVD","lfcSE_GB", "lfcSE_BIS","lfcSE_NAR","lfcSE_NIN") datahstressg$folderror<-data3gpfpe$folderror datahstressg$errorvalue<-data3gpfpe$errorvalue - ggplot(datahstressg, aes(Level4, foldvalue,fill=foldchange))+ geom_col(position = "dodge")+coord_flip()+ facet_grid(Level2~., scales="free", space="free")+ - geom_errorbar(aes(ymin=foldvalue-errorvalue, ymax=foldvalue+errorvalue), + geom_errorbar(aes(ymin=foldvalue-errorvalue, ymax=foldvalue+errorvalue), size=0.8,position = position_dodge(width=0.9), width = 0, color="grey60")+ - # theme(strip.text.y = element_text(angle=0))+ scale_fill_manual(values=c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc"))+ geom_text(aes(label=signif), position = position_dodge(width = 1), size=6) - - ggplot(datahstressg, aes(Level4, foldchange, label=signif, fill=foldvalue,color=signif))+theme_minimal()+ geom_tile(size=0.7)+facet_grid(Level2~., scales="free", space="free")+ @@ -170,11 +160,3 @@ ggplot(datahstressg, aes(Level4, foldchange, label=signif, theme(legend.position="bottom", plot.background = element_rect(fill = "transparent", color = NA),rect = element_rect(fill = "transparent"))+ scale_color_manual(values=c("darkred","red","black")) - - - - - -#write.csv(datahdenit,"nitrogen.csv") -#write.csv(datahphos,"phosphorus.csv") -#write.csv(datahstress,"stress.csv") diff --git a/TNCpaper_ScriptsData/README.md b/TNCpaper_ScriptsData/README.md index 9ad1d58..2053eb3 100644 --- a/TNCpaper_ScriptsData/README.md +++ b/TNCpaper_ScriptsData/README.md @@ -5,11 +5,11 @@ This folder contains all the processed data files from the analysis described in #### Scripts - **Figure1_2.R** - makes the map of sites and PCA of environmental data. -- **Figure3_S1.R** - 16S rRNA alpha- and beta-diversity and rarefaction plots. -- **Figure4_S1_S2_S4.R** - compares taxonomy generated from RefSeq/metatranscriptomes and 16S rRNA data with heatmaps and UpSetR plots (4 and S2). Metatranscriptome taxonomy rarefaction and beta-diversity plots (S1 and S4). +- **Figure3_S1_S2.R** - 16S rRNA percent abundance barplots (including controls), rarefaction plots, and alpha- and beta-diversity. +- **Figure4_S2_S3_S5.R** - compares taxonomy generated from RefSeq/metatranscriptomes and 16S rRNA data with heatmaps and UpSetR plots (4 and S3). Metatranscriptome taxonomy rarefaction and beta-diversity plots (S2 and S5). - **Figure5_6.R** - Metaranscriptomic functional data at pathway levels, subset for stress response (N vs S), nitrogen and phosphorus metabolism (each site vs mean) with barplots. -- **FigureS3_lefse.R** - 16S rRNA amplicon data at order level output from LefSe (Taxonomy/TNC_summaryLefSeResults.xlsx) -- **FigureS5-S8.R** - Metaranscriptomic functional data at gene level for stress response, nitrogen metabolism, phosphorus metabolism (heatmaps) +- **FigureS4_lefse.R** - 16S rRNA amplicon data at order level output from LefSe (Taxonomy/TNC_summaryLefSeResults.xlsx) +- **FigureS6-S9.R** - Metaranscriptomic functional data at gene level for stress response, nitrogen metabolism, phosphorus metabolism (heatmaps) #### 1. Function/ (from [MetatranscriptomeAnalysis](/MetatranscriptomeAnalysis)) - DeSeq2 results for level-2 SEED functional annnotation of metatranscriptomes (each station vs. mean)