From 203d396500357da8956fb83766287ef577d3ec4b Mon Sep 17 00:00:00 2001
From: rjstevick <rstevick@my.uri.edu>
Date: Wed, 1 Jul 2020 21:09:14 +0200
Subject: [PATCH] add new supp figure 1 - control bar plots

---
 MetatranscriptomeAnalysis/README.md           |   2 +-
 README.md                                     |  10 +-
 .../{Figure3_S1.R => Figure3_S1_S2.R}         | 141 ++++++++++++------
 ...{Figure4_S1_S2_S4.R => Figure4_S2_S3_S5.R} | 113 ++++++--------
 .../{FigureS3_lefse.R => FigureS4_lefse.R}    |  10 +-
 .../{FigureS5-S8.R => FigureS6-S9.R}          |  46 ++----
 TNCpaper_ScriptsData/README.md                |   8 +-
 7 files changed, 176 insertions(+), 154 deletions(-)
 rename TNCpaper_ScriptsData/{Figure3_S1.R => Figure3_S1_S2.R} (76%)
 rename TNCpaper_ScriptsData/{Figure4_S1_S2_S4.R => Figure4_S2_S3_S5.R} (90%)
 rename TNCpaper_ScriptsData/{FigureS3_lefse.R => FigureS4_lefse.R} (98%)
 rename TNCpaper_ScriptsData/{FigureS5-S8.R => FigureS6-S9.R} (90%)

diff --git a/MetatranscriptomeAnalysis/README.md b/MetatranscriptomeAnalysis/README.md
index 42bcfee..9ec6a4a 100644
--- a/MetatranscriptomeAnalysis/README.md
+++ b/MetatranscriptomeAnalysis/README.md
@@ -5,7 +5,7 @@ This folder contains scripts used to perform modified SAMSA2 analysis on the met
 - Main modified SAMSA2 script (masterscript*.sh)
 - Scripts to run Rscripts on the command line (deSEQ*.sh)
 - Script to compute DIAMOND annotations against refSeq dbs (newannot.sh)
-- Raw sequence counts and file inputs
+- Raw sequence counts and file inputs (rawcounts.txt and samples_transcriptomes.txt)
 
 #### 2. R_scripts
 - DeSeq2 files to run states for the susbsytems based on deSEQ_groups.sh or deSEQ_subsys.sh
diff --git a/README.md b/README.md
index 67f1ac9..0cf9200 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,7 @@ This folder contains scripts used to perform modified SAMSA2 analysis on the met
 - Main modified SAMSA2 script (masterscript*.sh)
 - Scripts to run Rscripts on the command line (deSEQ*.sh)
 - Script to compute DIAMOND annotations against refSeq dbs (newannot.sh)
-- Raw sequence counts and file inputs
+- Raw sequence counts and file inputs (rawcounts.txt and samples_transcriptomes.txt)
 #### 2. R_scripts
 - DeSeq2 files to run states for the susbsytems based on deSEQ_groups.sh or deSEQ_subsys.sh
 
@@ -53,8 +53,8 @@ This folder contains all the processed data files from the analysis described in
 - Metatranscriptomic annotation taxonomy results with RefSeq
 #### Scripts
 - **Figure1_2.R** - makes the map of sites and PCA of environmental data.
-- **Figure3_S1.R** - 16S rRNA alpha- and beta-diversity and rarefaction plots.
-- **Figure4_S1_S2_S4.R** - compares taxonomy generated from RefSeq/metatranscriptomes and 16S rRNA data with heatmaps and UpSetR plots (4 and S2). Metatranscriptome taxonomy rarefaction and beta-diversity plots (S1 and S4).
+- **Figure3_S1_S2.R** - 16S rRNA percent abundance barplots (including controls), rarefaction plots, and alpha- and beta-diversity.
+- **Figure4_S2_S3_S5.R** - compares taxonomy generated from RefSeq/metatranscriptomes and 16S rRNA data with heatmaps and UpSetR plots (4 and S3). Metatranscriptome taxonomy rarefaction and beta-diversity plots (S2 and S5).
 - **Figure5_6.R** - Metaranscriptomic functional data at pathway levels, subset for stress response (N vs S), nitrogen and phosphorus metabolism (each site vs mean) with barplots.
-- **FigureS3_lefse.R** - 16S rRNA amplicon data at order level output from LefSe (Taxonomy/TNC_summaryLefSeResults.xlsx)
-- **FigureS5-S8.R** - Metaranscriptomic functional data at gene level for stress response, nitrogen metabolism, phosphorus metabolism (heatmaps)
+- **FigureS4_lefse.R** - 16S rRNA amplicon data at order level output from LefSe (Taxonomy/TNC_summaryLefSeResults.xlsx)
+- **FigureS6-S9.R** - Metaranscriptomic functional data at gene level for stress response, nitrogen metabolism, phosphorus metabolism (heatmaps)
diff --git a/TNCpaper_ScriptsData/Figure3_S1.R b/TNCpaper_ScriptsData/Figure3_S1_S2.R
similarity index 76%
rename from TNCpaper_ScriptsData/Figure3_S1.R
rename to TNCpaper_ScriptsData/Figure3_S1_S2.R
index 8697c15..6a69943 100644
--- a/TNCpaper_ScriptsData/Figure3_S1.R
+++ b/TNCpaper_ScriptsData/Figure3_S1_S2.R
@@ -1,11 +1,10 @@
 # Stevick et al 2020 Oyster Gut Microbiome Function in an Estuary
-# 16S diversity and rarefaction plots
-# Figures 3 and S1
-# updated 4/16/2020 for resubmission
+# 16S controls, rarefaction, and diversity plots
+# Figures 3, S1, and S2
+# RJS updated 6/30/2020 for resubmission
 
 # 16S Amplicon data at ASV level
 
-
 library(tidyr)
 library(dplyr)
 library(ggplot2)
@@ -18,7 +17,7 @@ library(ggbiplot)
 library(vegan)
 library(ggpubr)
 library(stringr)
-
+library(scales)
 
 #import data - ASV counts, **not normalized**
 data<-read_xlsx("Taxonomy/16SallData_SILVAtaxa.xlsx", sheet="ASVcounts")
@@ -28,7 +27,7 @@ metadata<-read_xlsx("Taxonomy/16SallData_SILVAtaxa.xlsx", sheet="Metadata")
 #add in all taxa hierarchy to ASV counts
 datafull<-full_join(data, taxakey)
 #convert to long form
-datafulllong<-gather(datafull, SampleID, count, "TNC01":"TNC64") 
+datafulllong<-gather(datafull, SampleID, count, "TNC01":"TNC64")
 
 
 ####
@@ -45,10 +44,9 @@ SampleReadCounts<-
 # add read counts in to main table
 metadata$SampleTotalReads<-SampleReadCounts$SampleTotalReads
 datafulllong<-
-  full_join(datafulllong, SampleReadCounts, by="SampleID")
-# calculate percent abundances
-datafulllong$percent<-
-  datafulllong$count/datafulllong$SampleTotalReads
+  full_join(datafulllong, SampleReadCounts, by="SampleID") %>%
+  # calculate percent abundances
+  mutate(percent=count/SampleTotalReads)
 #add in the metadata
 datafulllongmeta<-full_join(datafulllong, metadata, by = "SampleID",
                             copy=FALSE, suffix=c(".x",".y"))
@@ -56,19 +54,19 @@ datafulllongmeta<-full_join(datafulllong, metadata, by = "SampleID",
 #select only gut, water
 metadatags<-filter(metadata, SampleType=="gut" | SampleType=="water")
 
-# Number of reads per sample
+# Number of QCd sequencing reads per sample
 ggplot(metadatags,aes(SampleName,SampleTotalReads,fill=Station))+geom_bar(stat="identity")+
   facet_grid(.~SampleType+Station, scales="free",space="free")+
   theme(legend.position = "none", axis.text.x = element_blank(),
         axis.ticks.x = element_blank(), axis.ticks.length = unit(0.6, 'lines'))+
   labs(y="Number of reads \nper sample",x=NULL)+
-  scale_fill_manual(values=c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc"), 
+  scale_fill_manual(values=c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc"),
                     labels=c("1. Providence River", "2. Greenwich Bay", "3. Bissel Cove", "4. Narrow River", "5. Ninigret Pond"))
 
 
 
 ####
-### 16S Rarefaction curve and coverage - Figure S1 ----------------------------------------------
+### 16S Rarefaction curve and coverage - Figure S2 ----------------------------------------------
 ####
 
 datamat<-data[1:61] # remove controls
@@ -78,7 +76,6 @@ raremax <- min(rowSums(datamatt))
 S <- specnumber(datamatt) # observed number of ASVs
 Srare <- rarefy(datamatt, raremax)
 
-
 #define colors based on gut/site or water
 colors<-c("#253494","#253494","#253494","#253494","#253494","#253494","#253494","#253494","#253494","#253494",
           "#0868ac","#0868ac","#0868ac","#0868ac","#0868ac","#0868ac","#0868ac","#0868ac","#0868ac","#0868ac",
@@ -86,10 +83,9 @@ colors<-c("#253494","#253494","#253494","#253494","#253494","#253494","#253494",
           "#7bccc4","#7bccc4","#7bccc4","#7bccc4","#7bccc4","#7bccc4","#7bccc4","#7bccc4","#7bccc4","#7bccc4",
           "#bae4bc","#bae4bc","#bae4bc","#bae4bc","#bae4bc","#bae4bc","#bae4bc","#bae4bc","#bae4bc","#bae4bc",
           "darkred","darkred","darkred","darkred","darkred","darkred","darkred","darkred","darkred","darkred")
-#put plotting parameters into pars
+#put plotting colors into pars
 pars <- expand.grid(col = colors, stringsAsFactors = FALSE)
 
-
 #plot number of ASVs vs. rarefied
 #550x400
 plot(S, Srare, pch=21, cex=2, col="black", bg=colors,
@@ -102,11 +98,11 @@ legend(700,250, c("1.PVD gut","2.GB gut","3.BIS gut","4.NAR gut","5.NIN gut", "A
 
 #generate rarefaction curves
 #NOTE: THIS STEP TAKES A LITTLE WHILE TO RUN.
-outw <- with(pars, rarecurve(datamatt, step = 20, 
+outw <- with(pars, rarecurve(datamatt, step = 20,
                              sample = raremax, col = colors, label = FALSE))
 #determine limits of the graph
 Smax <- sapply(outw, max)
-#plot the rarefaction curves
+#plot the rarefaction curves - Figure S2A
 #550x400
 plot(c(1, 80000), c(1, max(Smax)), xlab = "Number of Reads",
      ylab = "Observed Number of ASVs", type = "n")
@@ -119,12 +115,14 @@ for (i in seq_along(outw)) {
 # check slopes of curves at the end, make sure they're ~0
 end<-raremax-1
 slopes<-rareslope(datamatt, end)
+# Figure S2B
 plot(slopes, type="p", pch=21, bg = colors,
      cex=2, xlab = "16S Amplicon Samples", ylim=c(0,0.05))
 # coverage proxy:
 coverage<-100-100*rareslope(datamatt, end)
+# Figure S2C
 plot(coverage, type="p", pch=21, bg = colors,
-     cex=2, xlab = "16S Amplicon Samples", 
+     cex=2, xlab = "16S Amplicon Samples",
      ylab="Coverage (%)", ylim=c(95,100))
 
 mean(coverage)
@@ -140,7 +138,7 @@ sd(coverage)
 
 # remove chloroplast reads
 datafulllong <- datafulllong %>%
-  filter(str_detect(Taxon, "Chloroplast", negate=TRUE)) 
+  filter(str_detect(Taxon, "Chloroplast", negate=TRUE))
 
 # calculate new number of reads per sample
 SampleReadCounts<-
@@ -150,10 +148,10 @@ SampleReadCounts<-
 # add read counts in to main table
 metadata$SampleTotalReads<-SampleReadCounts$SampleTotalReads
 datafulllong<-
-  full_join(datafulllong, SampleReadCounts, by="SampleID")
-# calculate percent abundances
-datafulllong$percent<-
-  datafulllong$count/datafulllong$SampleTotalReads
+  full_join(datafulllong, SampleReadCounts, by="SampleID") %>%
+  # calculate percent abundances
+  mutate(percent=count/SampleTotalReads)
+
 #add in the metadata
 datafulllongmeta<-full_join(datafulllong, metadata, by = "SampleID",
                             copy=FALSE, suffix=c(".x",".y"))
@@ -161,6 +159,69 @@ datafulllongmeta<-full_join(datafulllong, metadata, by = "SampleID",
 dataASVmetagw<-filter(datafulllongmeta, SampleType=="gut" | SampleType=="water")
 
 
+####
+### Bar plots with Negative & Positive Controls - Figure S1 -----------------------------------------------------
+####
+
+# sum ASV percentages by phylum
+phylumperc<- datafulllongmeta %>%
+  group_by(Phylum,SampleID, SampleName, SampleType, Station, TypeStationGroup) %>%
+  dplyr::summarise(physum=sum(percent))
+
+# show only the top 10 phyla, put the rest in "Other"
+phylumperc$PhylumOther<-forcats::fct_lump(f=phylumperc$Phylum, w=phylumperc$physum, other_level="Others", n=10)
+
+# define palette
+palette<-c("#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C", "#FB9A99", "#E31A1C",
+           "#FDBF6F", "#FF7F00", "#CAB2D6","gray48","gray30")
+
+barp<-ggplot(phylumperc,
+       (aes(x=SampleName, y=physum,
+            fill=factor(PhylumOther, levels=c("Actinobacteria",  "Bacteroidetes","Cyanobacteria","Firmicutes","Fusobacteria","Planctomycetes","Proteobacteria", "Tenericutes","Verrucomicrobia","Unknown","Others")))))+
+  facet_grid(.~TypeStationGroup, scales="free", space="free")+
+  geom_col(position="fill", alpha=0.8)+theme_minimal()+
+  theme(legend.text = element_text(size=12, colour="gray20"),
+        legend.position = "right",
+        axis.text.x = element_text(angle=60, hjust=1, vjust=1),
+        axis.ticks = element_line(inherit.blank=FALSE, color="grey30"))+
+  facet_grid(.~SampleType+Station, scales="free",space="free")+
+  scale_fill_manual(values=c(palette))+
+  labs(y="Percent abundance",x=NULL,fill="Phylum")+
+  scale_y_continuous(labels = scales::percent, expand=c(0,0))
+
+
+# control samples ASV percentages
+controlASVperc<- datafulllongmeta %>%
+  filter(SampleType=="control") %>%
+  group_by(ASVID, Taxon,SampleID, SampleName, SampleType, Station, TypeStationGroup) %>%
+  dplyr::summarise(Taxonsum=sum(percent)) %>%
+  mutate(TaxonOther=forcats::fct_lump(f=Taxon, w=Taxonsum, other_level="Others", n=20))
+
+palette2<-c("#7c342b","#7a3261","#d24359","#d48c3b","#538636","#47bd8c","#c64ecc","#48477d",
+            "#84ac84","#c58bb9","#633dbb","#375731","#d74d2a","#b4aa44","#d44a91","#56a1be",
+            "#7a81d7","#7e622e","#d28978","#6dc041",'gray30')
+
+# plot controls at ASV level
+ctrlp<-ggplot(dplyr::arrange(controlASVperc,TaxonOther), (aes(x=SampleName, y=Taxonsum, fill=TaxonOther)))+
+  geom_col(position="fill", alpha=0.8)+theme_minimal()+
+  coord_flip()+
+  theme(legend.text = element_text(size=9, colour="gray20"),
+        legend.position = "bottom",
+        axis.ticks = element_line(inherit.blank=FALSE, color="grey30"))+
+  scale_fill_manual(values=c(palette2))+
+  labs(y="Percent abundance",x=NULL,fill=NULL)+
+  scale_y_continuous(labels = scales::label_percent(accuracy=1), breaks=breaks_pretty(n=10), expand=c(0,0))+
+  guides(fill = guide_legend(ncol = 2))
+
+
+# Figure S1
+cowplot::plot_grid(barp, ctrlp, nrow=2, rel_heights = c(60,40), labels=c("A","B"))
+ggsave("FigureS1.png", width = 15, height = 10, dpi=400)
+ggsave("FigureS1.pdf", width = 15, height = 10)
+ggsave("FigureS1.svg", width = 15, height = 10)
+
+
+
 
 
 ####
@@ -170,7 +231,7 @@ dataASVmetagw<-filter(datafulllongmeta, SampleType=="gut" | SampleType=="water")
 #convert normalized data (with chloros removed) back to wide
 dataASVwide<-select(dataASVmetagw, SampleID, ASVID, percent)
 dataASVtable<-spread(dataASVwide, ASVID, percent) %>%
-  tibble::column_to_rownames("SampleID") 
+  tibble::column_to_rownames("SampleID")
 
 # calculate diversity
 diversitytotal<-diversity(dataASVtable, index="simpson")
@@ -187,11 +248,10 @@ dataag_water<-filter(metadatags, SampleType=="water")
 divplot<-ggplot(metadatags,aes(x=Station,y=Simpsons, fill=Station))+
   geom_boxplot()+
   geom_point(size=4, shape=23)+
-  facet_grid(~SampleType, labeller=as_labeller(c(gut="Gut samples (n=10)", water="Water samples (n=2)"))) + 
+  facet_grid(~SampleType, labeller=as_labeller(c(gut="Gut samples (n=10)", water="Water samples (n=2)"))) +
   labs(x=NULL,y="Simpson's Index of Diversity",fill="Site")+
   scale_color_manual(values=c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc")) +
-  scale_fill_manual(values=c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc")) + 
-  #                    labels=c("1. Providence River", "2. Greenwich Bay", "3. Bissel Cove", "4. Narrow River", "5. Ninigret Pond"))+
+  scale_fill_manual(values=c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc")) +
   theme_bw()+scale_y_continuous(limits=c(0,1.0))+
   theme(legend.position = "none",
         strip.background = element_rect("grey90"))
@@ -201,9 +261,9 @@ divlegend<-ggplot(metadatags,aes(x=Station,y=Simpsons, fill=Station))+
   geom_point() + facet_grid(~SampleType) + theme_bw()+
   labs(x=NULL,y="Simpson's Index of Diversity",fill="Site")+
   scale_color_manual(values=c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc")) +
-  scale_fill_manual(values=c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc")) + 
+  scale_fill_manual(values=c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc")) +
   theme(legend.position = c(.86,.33),
-        legend.text = element_text(size=14, colour="gray20"), 
+        legend.text = element_text(size=14, colour="gray20"),
         strip.background = element_rect("grey90"),
         legend.background = element_rect(color="grey40"))
 
@@ -243,15 +303,13 @@ compare_means(data=metadatags, Simpsons~Station, group.by="SampleType",
 
 # ellipse function
 theme_set(theme_bw())
-veganCovEllipse<-function (cov, center = c(0, 0), scale = 1, npoints = 100) 
+veganCovEllipse<-function (cov, center = c(0, 0), scale = 1, npoints = 100)
 {
   theta <- (0:npoints) * 2 * pi/npoints
   Circle <- cbind(cos(theta), sin(theta))
   t(center + scale * t(Circle %*% chol(cov)))
 }
 
-
-
 # Get spread of gut points based on Station
 abund_table<-dataASVtable[c(1:50),]
 meta_table<-dataag_gut
@@ -274,11 +332,11 @@ for(g in levels(NMDS$Station)){
 head(df_ell)
 NMDS.mean=aggregate(NMDS[,1:2],list(group=NMDS$Station),mean)
 #Now do the actual plotting
-gutNMDS<-ggplot(data=NMDS,aes(x,y,colour=Station,fill=Station))+theme_bw() + 
-  geom_path(data=df_ell, aes(x=NMDS1, y=NMDS2, lty=Station), size=1) + 
+gutNMDS<-ggplot(data=NMDS,aes(x,y,colour=Station,fill=Station))+theme_bw() +
+  geom_path(data=df_ell, aes(x=NMDS1, y=NMDS2, lty=Station), size=1) +
   geom_point(size=4, alpha=0.9,aes(shape=Station))+scale_shape_manual(values = c(21,22,23,24,25))+
-  annotate("text",x=NMDS.mean$x,y=NMDS.mean$y,label=NMDS.mean$group,size=5, color="gray40") + 
-  #  annotate("text",x=NMDS$x+0.2,y=NMDS$y+0.05,label=paste(NMDS$Station,NMDS$OysterNum),size=4, color="gray40") + # to determine which point is which 
+  annotate("text",x=NMDS.mean$x,y=NMDS.mean$y,label=NMDS.mean$group,size=5, color="gray40") +
+  #  annotate("text",x=NMDS$x+0.2,y=NMDS$y+0.05,label=paste(NMDS$Station,NMDS$OysterNum),size=4, color="gray40") + # to determine which point is which
   scale_fill_manual(values=c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc"))+
   scale_colour_manual(values=c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc"))+
   scale_linetype_manual(values=c("solid","dotted","twodash","longdash", "solid"), labels=c("1. Providence River", "2. Greenwich  Bay", "3. Bissel Cove", "4. Narrow River", "5. Ninigret Pond"))+
@@ -288,7 +346,6 @@ gutNMDS<-ggplot(data=NMDS,aes(x,y,colour=Station,fill=Station))+theme_bw() +
 adonis2(abund_table~Station, data=NMDS, by=NULL,method="bray", k=2)
 
 
-
 # Get spread of points based on Type
 abund_table<-dataASVtable[c(1:60),]
 meta_table<-metadatags
@@ -313,10 +370,10 @@ head(df_ell)
 NMDS.mean=aggregate(NMDS[,1:2],list(group=NMDS$Type),mean)
 head(NMDS.mean)
 #Now do the actual plotting
-typeNMDS<-ggplot(data=NMDS,aes(x,y,colour=Type,fill=Type))+theme_bw() + 
-  geom_path(data=df_ell, aes(x=NMDS1, y=NMDS2, lty=Type), size=1) + 
+typeNMDS<-ggplot(data=NMDS,aes(x,y,colour=Type,fill=Type))+theme_bw() +
+  geom_path(data=df_ell, aes(x=NMDS1, y=NMDS2, lty=Type), size=1) +
   geom_point(size=4, alpha=0.9,aes(shape=Station))+
-  annotate("text",x=NMDS.mean$x,y=NMDS.mean$y,label=NMDS.mean$group,size=6, color="gray10") + 
+  annotate("text",x=NMDS.mean$x,y=NMDS.mean$y,label=NMDS.mean$group,size=6, color="gray10") +
   scale_fill_manual(values=c("orange","darkred"), labels=c("Gut","Water"))+
   scale_colour_manual(values=c("orange","darkred"), labels=c("Gut","Water"))+
   scale_linetype_manual(values=c("solid","twodash"), labels=c("Gut","Water"))+
@@ -341,7 +398,7 @@ nmds<-cowplot::plot_grid(gutNMDS+theme(legend.position = "none"),
                          typeNMDS+theme(legend.position = "none"),
                          align="hv", axis="t",nrow=1)
 
-plots<-cowplot::plot_grid(divplot+theme(legend.position = "none"),nmds, nrow=2, 
+plots<-cowplot::plot_grid(divplot+theme(legend.position = "none"),nmds, nrow=2,
                           align="v", axis="l",rel_heights = c(40,60))
 legends<-cowplot::plot_grid(divleg,typeleg,gutleg, ncol = 1,rel_heights = c(50,20,50))
 
diff --git a/TNCpaper_ScriptsData/Figure4_S1_S2_S4.R b/TNCpaper_ScriptsData/Figure4_S2_S3_S5.R
similarity index 90%
rename from TNCpaper_ScriptsData/Figure4_S1_S2_S4.R
rename to TNCpaper_ScriptsData/Figure4_S2_S3_S5.R
index 9122d66..8b1df66 100644
--- a/TNCpaper_ScriptsData/Figure4_S1_S2_S4.R
+++ b/TNCpaper_ScriptsData/Figure4_S2_S3_S5.R
@@ -1,12 +1,11 @@
 # Stevick et al 2020 Oyster Gut Microbiome Function in an Estuary
 # Compare taxonomy generated from RefSeq/metatranscriptomes and 16S data
-# Figures 4, S1 S2, and S4 in the publication
+# Figures 4, S2, S3, and S5 in the publication
 # updated 4/16/2020 for resubmission
 
 # Metaranscriptomic taxonomy data
 # 16S Amplicon data at order level
 
-
 library(ggplot2)
 library(dplyr)
 library(tidyr)
@@ -15,7 +14,6 @@ library(ggpubr)
 library(readxl)
 library(vegan)
 
-
 data<-read_xlsx("Taxonomy/SAMSA_metatranscriptomes_RefSeqtaxa_edit.xlsx", sheet=2)
 taxakey<- read_xlsx("Taxonomy/SAMSA_metatranscriptomes_RefSeqtaxa_edit.xlsx", sheet=3)
 data$Sample<-as.character(data$Sample)
@@ -40,7 +38,7 @@ condenseddataord<-datatax %>% select(Sample, Order, Percent, SampleType) %>%
 dataordtab<-spread(condenseddataord, Order, PercentSum) %>%
   column_to_rownames("Sample")
 
-
+# make a ginormous palette
 palette<-c(brewer.pal(12, "Set3"),brewer.pal(8, "Set2"),brewer.pal(12, "Set3"),
            brewer.pal(12, "Set3"),brewer.pal(8, "Set2"),brewer.pal(12, "Set3"),
            brewer.pal(12, "Set3"),brewer.pal(8, "Set2"),brewer.pal(12, "Set3"),
@@ -138,15 +136,15 @@ alldatatax_norm<-ddply(alldatatax,.(Sample),transform,rescale=sqrt(Percent))
 # remove lowest abundance taxa
 alldatatax_normg<- alldatatax_norm %>% dplyr::group_by(Order) # group
 taxsums<-dplyr::summarise(alldatatax_normg, sums=sum(Percent)) # calculate sums
-taxsums<-taxsums[order(-taxsums$sums),] # reorder 
+taxsums<-taxsums[order(-taxsums$sums),] # reorder
 toptax<-taxsums[1:30,] # extract top 30 Orders
 topdatatax_norm <- alldatatax_norm[alldatatax_norm$Order %in% toptax$Order,]
 
 
 ## FIGURE 4B ##############################################
 # 1030x550
-ggplot(topdatatax_norm, aes(Sample, Order_Name)) + 
-  geom_tile(aes(fill = rescale),colour = "white") + ggpubr::theme_transparent() + 
+ggplot(topdatatax_norm, aes(Sample, Order_Name)) +
+  geom_tile(aes(fill = rescale),colour = "white") + ggpubr::theme_transparent() +
   facet_grid(factor(Phylum, levels=c("Actinobacteria","Bacteroidetes","Cyanobacteria","Firmicutes","Fusobacteria",
                                      "Proteobacteria","Tenericutes","Verrucomicrobia","Unknown"))~
                factor(SampleType, levels = c("water","gut"))+Method+Station,
@@ -154,7 +152,7 @@ ggplot(topdatatax_norm, aes(Sample, Order_Name)) +
   scale_fill_gradientn(na.value = "salmon",labels = scales::percent,limits=c(0,1),
                        colours=c("white","#fecc5c","#fd8d3c","#f03b20","#bd0026","darkred"))+
   scale_x_discrete(expand = c(0, 0)) +
-  scale_y_discrete(expand = c(0, 0)) + 
+  scale_y_discrete(expand = c(0, 0)) +
   theme(legend.position = "bottom",axis.ticks = element_blank(),
         axis.text.y = element_text(size=10, colour="grey40"),
         axis.text.x = element_blank(),
@@ -195,10 +193,10 @@ guttrans<-guttrans[2:68]
 
 # simple venn diagrams
 venn(list(gut16S=gut16S, water16S=water16S))
-siteshey<-venn(list("1.PVD Gut 16S"=gut16S1, 
-          "2.GB Gut 16S"=gut16S2, 
-          "3.BIS Gut 16S"=gut16S3, 
-          "4.NAR Gut 16S"=gut16S4, 
+siteshey<-venn(list("1.PVD Gut 16S"=gut16S1,
+          "2.GB Gut 16S"=gut16S2,
+          "3.BIS Gut 16S"=gut16S3,
+          "4.NAR Gut 16S"=gut16S4,
           "5.NIN Gut 16S"=gut16S5))
 hey<- venn(list("gut 16S"=gut16S, "water 16S"=water16S, "gut metatranscriptome"=guttrans))
 summaryvenn<-attr(hey, "intersections")
@@ -206,7 +204,7 @@ summaryvennsites<-attr(siteshey, "intersections")
 
 
 
-## UPSETR --------------------
+## UPSETR plots --------------------
 
 library(UpSetR)
 library(reshape2)
@@ -233,34 +231,31 @@ UpSet(t(m), set_order = order(c("water16S","gutRNA","gut16S")),
 
 ## FIGURE 4A ##############################################
 #1000x155
-UpSet(m, set_order = order(c("water16S","gutRNA","gut16S")), 
+UpSet(m, set_order = order(c("water16S","gutRNA","gut16S")),
       pt_size = unit(.35, "cm"),lwd=3,
-      left_annotation = rowAnnotation(" " = anno_barplot(set_size(m), bar_width=0.7, 
+      left_annotation = rowAnnotation(" " = anno_barplot(set_size(m), bar_width=0.7,
                             axis_param = list(direction = "reverse", side = "top",labels_rot = 0),
                             border = FALSE, annotation_name_side = "top",
-                            gp = gpar(fill = "black"), 
-                            width = unit(4, "cm"))), 
+                            gp = gpar(fill = "black"),
+                            width = unit(4, "cm"))),
       right_annotation = NULL,row_names_side = "left",
       top_annotation = upset_top_annotation(m,bar_width = 0.9))
 
 
-
-
-
 # --------------------------------------------------------
 
-read_sets = list("1.PVD Gut 16S"=gut16S1, 
-                 "2.GB Gut 16S"=gut16S2, 
-                 "3.BIS Gut 16S"=gut16S3, 
-                 "4.NAR Gut 16S"=gut16S4, 
+read_sets = list("1.PVD Gut 16S"=gut16S1,
+                 "2.GB Gut 16S"=gut16S2,
+                 "3.BIS Gut 16S"=gut16S3,
+                 "4.NAR Gut 16S"=gut16S4,
                  "5.NIN Gut 16S"=gut16S5,
                  "All water 16S"=water16S)
 mgutw = make_comb_mat(read_sets)
 
 
-## FIGURE S2 ##############################################
-
+## FIGURE S3 ##############################################
 #900x400
+
 UpSet(mgutw,comb_col = c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc","grey40",
                          "black","black","black","grey40","black","black",
                          "black","grey40","black","black","grey40","black",
@@ -268,41 +263,33 @@ UpSet(mgutw,comb_col = c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc","grey
                          "grey40","black","grey40","black","black","grey40",
                          "black","grey40","black","grey40","grey40","black",
                          "black","grey40","black","grey40","black","grey40",
-                         "grey40","black","grey40", "black", "grey40", "grey40", 
+                         "grey40","black","grey40", "black", "grey40", "grey40",
                          "grey40", "grey40", "grey40", "grey40"),
       set_order = order(c("2.GB Gut 16S","3.BIS Gut 16S",
-                          "4.NAR Gut 16S", "5.NIN Gut 16S","All water 16S","1.PVD Gut 16S")), 
+                          "4.NAR Gut 16S", "5.NIN Gut 16S","All water 16S","1.PVD Gut 16S")),
       pt_size = unit(.5, "cm"),lwd=3,
-      right_annotation = rowAnnotation(" " = anno_barplot(set_size(mgutw), bar_width=0.7, 
+      right_annotation = rowAnnotation(" " = anno_barplot(set_size(mgutw), bar_width=0.7,
                                                          axis_param = list(side = "top",labels_rot = 0),
                                                          border = FALSE, annotation_name_side = "top",
-                                                         gp = gpar(fill = c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc","grey40")), 
-                                                         width = unit(4, "cm"))), 
+                                                         gp = gpar(fill = c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc","grey40")),
+                                                         width = unit(4, "cm"))),
       row_names_side = "left",
       top_annotation = upset_top_annotation(mgutw,bar_width = 0.9, height = unit(6, "cm")))
 
 
 
-
-
-
-
-
-
-
-
 ### NMDS of Metatranscriptome taxonomy -----------------
 ###################################################################################
 
-Site<-c("1.PVD","1.PVD","1.PVD","1.PVD","1.PVD", 
+Site<-c("1.PVD","1.PVD","1.PVD","1.PVD","1.PVD",
           "2.GB","2.GB","2.GB","2.GB","2.GB",
           "3.BIS","3.BIS","3.BIS","3.BIS","3.BIS",
-          "4.NAR","4.NAR","4.NAR","4.NAR","4.NAR", 
+          "4.NAR","4.NAR","4.NAR","4.NAR","4.NAR",
           "5.NIN","5.NIN","5.NIN","5.NIN","5.NIN")
 
 library(vegan)
 theme_set(theme_bw())
-veganCovEllipse<-function (cov, center = c(0, 0), scale = 1, npoints = 100) 
+veganCovEllipse<-function (cov, center = c(0, 0), scale = 1, npoints = 100)
 {
   theta <- (0:npoints) * 2 * pi/npoints
   Circle <- cbind(cos(theta), sin(theta))
@@ -336,16 +323,16 @@ for(g in levels(NMDS$Site)){
 head(df_ell)
 NMDS.mean=aggregate(NMDS[,1:2],list(group=NMDS$Site),mean)
 
-## FIGURE S4A ##############################################
-metatranssp<- 
-  ggplot(data=NMDS,aes(x,y,colour=Site,fill=Site))+theme_bw() + 
-  geom_path(data=df_ell, aes(x=NMDS1, y=NMDS2, lty=Site), size=1) + 
+## FIGURE S5A ##############################################
+metatranssp<-
+  ggplot(data=NMDS,aes(x,y,colour=Site,fill=Site))+theme_bw() +
+  geom_path(data=df_ell, aes(x=NMDS1, y=NMDS2, lty=Site), size=1) +
   geom_point(size=4, alpha=0.9,aes(shape=Site))+scale_shape_manual(values = c(21,22,23,24,25))+
-  annotate("text",x=NMDS.mean$x,y=NMDS.mean$y,label=NMDS.mean$group,size=5, color="gray40") + 
+  annotate("text",x=NMDS.mean$x,y=NMDS.mean$y,label=NMDS.mean$group,size=5, color="gray40") +
   scale_fill_manual(values=c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc"))+
   scale_colour_manual(values=c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc"))+
   scale_linetype_manual(values=c("solid","dotted","twodash","longdash", "solid"), labels=c("1. Providence River", "2. Greenwich  Bay", "3. Bissel Cove", "4. Narrow River", "5. Ninigret Pond"))+
-  theme(legend.text = element_text(size=14, colour="gray20"), 
+  theme(legend.text = element_text(size=14, colour="gray20"),
         legend.position = "none",
         legend.title = element_blank(),legend.box="horizontal") +
   ggtitle("Species level annotation")
@@ -380,16 +367,16 @@ for(g in levels(NMDS$Site)){
 head(df_ell)
 NMDS.mean=aggregate(NMDS[,1:2],list(group=NMDS$Site),mean)
 
-## FIGURE S4B ##############################################
-metatransord<- 
-  ggplot(data=NMDS,aes(x,y,colour=Site,fill=Site))+theme_bw() + 
-  geom_path(data=df_ell, aes(x=NMDS1, y=NMDS2, lty=Site), size=1) + 
+## FIGURE S5B ##############################################
+metatransord<-
+  ggplot(data=NMDS,aes(x,y,colour=Site,fill=Site))+theme_bw() +
+  geom_path(data=df_ell, aes(x=NMDS1, y=NMDS2, lty=Site), size=1) +
   geom_point(size=4, alpha=0.9,aes(shape=Site))+scale_shape_manual(values = c(21,22,23,24,25))+
-  annotate("text",x=NMDS.mean$x,y=NMDS.mean$y,label=NMDS.mean$group,size=5, color="gray40") + 
+  annotate("text",x=NMDS.mean$x,y=NMDS.mean$y,label=NMDS.mean$group,size=5, color="gray40") +
   scale_fill_manual(values=c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc"))+
   scale_colour_manual(values=c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc"))+
   scale_linetype_manual(values=c("solid","dotted","twodash","longdash", "solid"), labels=c("1. Providence River", "2. Greenwich  Bay", "3. Bissel Cove", "4. Narrow River", "5. Ninigret Pond"))+
-  theme(legend.text = element_text(size=14, colour="gray20"), 
+  theme(legend.text = element_text(size=14, colour="gray20"),
         legend.position = "right",
         legend.title = element_blank(),legend.box="horizontal")+
   ggtitle("Order level annotation")
@@ -397,19 +384,14 @@ metatransord<-
 adonis2(abund_tableord~Site, data=NMDS, by=NULL,method="bray", k=2)
 
 
+## FIGURE S5 TOTAL ##############################################
 #1000x800
 cowplot::plot_grid(metatranssp, metatransord+theme(legend.position="none"),
                    get_legend(metatransord))
 
 
-
-
-
-
-
-
 ####
-### Metatranscriptome Rarefaction curve - Figure S1 ----------------------------------------------
+### Metatranscriptome Rarefaction curve - Figure S2 ----------------------------------------------
 ####
 
 # Load in the raw annotation data
@@ -448,11 +430,11 @@ legend(700,250, c("1.PVD gut","2.GB gut","3.BIS gut","4.NAR gut","5.NIN gut", "A
 #generate rarefaction curves
 #NOTE: THIS STEP TAKES A WHILE TO RUN.
 # Step size has been increased so it doesn't take a day or so
-outw <- with(pars, rarecurve(datamatt, step = 800, 
+outw <- with(pars, rarecurve(datamatt, step = 800,
                              sample = raremax, col = colors, label = FALSE))
 #determine limits of the graph
 Smax <- sapply(outw, max)
-#plot the rarefaction curves
+#plot the rarefaction curves - Figure S2A
 #550x400
 plot(c(1, 1103294), c(1, max(Smax)), xlab = "Number of Reads",
      ylab = "Observed Number of Annotated Species", type = "n")
@@ -465,14 +447,15 @@ for (i in seq_along(outw)) {
 # check slopes of curves at the end, make sure they're ~0
 end<-raremax-1
 slopes<-rareslope(datamatt, end)
+# Figure S2B
 plot(slopes, type="p", pch=23, bg = colors,
      cex=2, xlab = "Metatranscriptome Samples", ylim=c(0,0.05))
 # coverage proxy:
 coverage<-100-100*rareslope(datamatt, end)
+# Figure S2C
 plot(coverage, type="p", pch=23, bg = colors,
-     cex=2, xlab = "Metatranscriptome Samples", 
+     cex=2, xlab = "Metatranscriptome Samples",
      ylab="Coverage (%)", ylim=c(95,100))
 
 mean(coverage)
 sd(coverage)
-        
\ No newline at end of file
diff --git a/TNCpaper_ScriptsData/FigureS3_lefse.R b/TNCpaper_ScriptsData/FigureS4_lefse.R
similarity index 98%
rename from TNCpaper_ScriptsData/FigureS3_lefse.R
rename to TNCpaper_ScriptsData/FigureS4_lefse.R
index 10e7e8f..61ab19f 100644
--- a/TNCpaper_ScriptsData/FigureS3_lefse.R
+++ b/TNCpaper_ScriptsData/FigureS4_lefse.R
@@ -1,6 +1,6 @@
 # Stevick et al 2020 Oyster Gut Microbiome Function in an Estuary
 # 16S abundances using LefSe
-# Figure S3
+# Figure S4
 # updated 4/16/2020 for resubmission
 
 # 16S Amplicon data at order level analyzed with LefSe
@@ -41,16 +41,18 @@ splot<- datasites %>%
   scale_fill_manual(values=c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc"))
 
 
-# Plot together
+# Plot together - FIGURE S4
 cowplot::plot_grid(tplot,splot, ncol=2, align="hv", axis="tbr")
 #1300x800
 
 
+
+
 # Just top 30 taxa --------------------
 # show only if they are significantly different
 
 # get this variable from script for Figure 4
-toptax 
+toptax
 
 # Extact data for orders included in toptax
 topdatatype <- datatype[datatype$Order %in% toptax$Order,]
@@ -78,5 +80,3 @@ splot<- topdatasites %>%
   labs(y="LDA Score (log 10)",x=NULL)+
   ggtitle("Sites")+
   scale_fill_manual(values=c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc"))
-
-
diff --git a/TNCpaper_ScriptsData/FigureS5-S8.R b/TNCpaper_ScriptsData/FigureS6-S9.R
similarity index 90%
rename from TNCpaper_ScriptsData/FigureS5-S8.R
rename to TNCpaper_ScriptsData/FigureS6-S9.R
index daaf349..722c912 100644
--- a/TNCpaper_ScriptsData/FigureS5-S8.R
+++ b/TNCpaper_ScriptsData/FigureS6-S9.R
@@ -1,20 +1,19 @@
 # Stevick et al 2020 Oyster Gut Microbiome Function in an Estuary
 # Script to analyze gene-level functional expression from SAMSA
-# Figures S5, S6, S7, S8
+# Figures S6, S7, S8, S9
 # updated 4/16/2020 for resubmission
 
 # Metaranscriptomic functional data at gene level
 
-
 library("ggplot2")
 library("tidyr")
 library("dplyr")
 library("readxl")
 library("gridExtra")
 library("leaflet")
-library(RColorBrewer)
-library(viridis)
-library(pheatmap)
+library("RColorBrewer")
+library("viridis")
+library("pheatmap")
 
 data<-read_excel("Function/DESeq_subsys_results_level4_summary.xlsx", sheet=1)
 hierarchy<-read_excel("Function/DESeq_subsys_results_level4_summary.xlsx", sheet=2)
@@ -25,7 +24,6 @@ data$padj_BIS<-as.numeric(data$padj_BIS)
 data$padj_NAR<-as.numeric(data$padj_NAR)
 data$padj_NIN<-as.numeric(data$padj_NIN)
 
-
 datah<-merge(data, hierarchy)
 
 # NITROGEN ------------------------------------
@@ -39,11 +37,11 @@ datahdenitgp<-gather(datahdenit, padj, padjvalue, "padj_PVD","padj_GB","padj_BIS
 datahdenitg$padj<-datahdenitgp$padj
 datahdenitg$padjvalue<-datahdenitgp$padjvalue
 
-datahdenitg$signif<-ifelse(datahdenitgp$padjvalue <=0.01, "**", 
+datahdenitg$signif<-ifelse(datahdenitgp$padjvalue <=0.01, "**",
                         ifelse(datahdenitgp$padjvalue <=0.05, "*",
                                ifelse("ns")))
 
-datahdenitg$foldchange = factor(datahdenitg$foldchange, 
+datahdenitg$foldchange = factor(datahdenitg$foldchange,
                              levels = c("log2FoldChange_PVD",
                     "log2FoldChange_GB","log2FoldChange_BIS",
                     "log2FoldChange_NAR","log2FoldChange_NIN"))
@@ -55,7 +53,7 @@ ggplot(datahdenitg, aes(Level4, foldvalue,fill=foldchange))+
   scale_fill_manual(values=c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc"))+
    geom_text(aes(label=signif), position = position_dodge(width = 1), size=6)
 
-datahdenitg$Level3f<-factor(datahdenitg$Level3, 
+datahdenitg$Level3f<-factor(datahdenitg$Level3,
                            levels=c("Nitric_oxide_synthase","Allantoin_Utilization",
                                     "Amidase_clustered_with_urea_and_nitrile_hydratase_functions",
                                     "Denitrification",
@@ -66,7 +64,7 @@ datahdenitg$Level3f<-factor(datahdenitg$Level3,
 
 
 
-ggplot(datahdenitg, aes(Level4, foldchange, label=signif, 
+ggplot(datahdenitg, aes(Level4, foldchange, label=signif,
                          fill=foldvalue,color=signif))+theme_minimal()+
   geom_tile(size=0.7)+facet_grid(Level3f~., scales="free", space="free")+
   scale_fill_viridis()+coord_flip()+
@@ -80,7 +78,6 @@ ggplot(datahdenitg, aes(Level4, foldchange, label=signif,
 
 ### PHOSPHOROUS -----------------------
 
-
 datahphos<-filter(datah, Level2=="Phosphorus Metabolism")
 datahphosg<-gather(datahphos, foldchange, foldvalue, "log2FoldChange_PVD",
                     "log2FoldChange_GB","log2FoldChange_BIS",
@@ -90,11 +87,11 @@ datahphosgp<-gather(datahphos, padj, padjvalue, "padj_PVD","padj_GB","padj_BIS",
 datahphosg$padj<-datahphosgp$padj
 datahphosg$padjvalue<-datahphosgp$padjvalue
 
-datahphosg$signif<-ifelse(datahphosgp$padjvalue <=0.01, "**", 
+datahphosg$signif<-ifelse(datahphosgp$padjvalue <=0.01, "**",
                            ifelse(datahphosgp$padjvalue <=0.05, "*",
                                   ifelse("ns")))
 
-datahphosg$foldchange = factor(datahphosg$foldchange, 
+datahphosg$foldchange = factor(datahphosg$foldchange,
                                 levels = c("log2FoldChange_PVD",
                                            "log2FoldChange_GB","log2FoldChange_BIS",
                                            "log2FoldChange_NAR","log2FoldChange_NIN"))
@@ -102,7 +99,6 @@ datahphosg$foldchange = factor(datahphosg$foldchange,
 ggplot(datahphosg, aes(Level4, foldvalue,fill=foldchange))+
   geom_col(position = "dodge")+coord_flip()+
   facet_grid(Level3~., scales="free",space="free")+
-#  theme(strip.text.y = element_text(angle=0))+
   scale_fill_manual(values=c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc"))+
   geom_text(aes(label=signif), position = position_dodge(width = 1), size=6)
 
@@ -123,8 +119,7 @@ ggplot(datahphosg,aes(Level4, foldchange, label=signif,
 
 ### STRESS RESPONSE -----------------------
 
-
-datahstress<-filter(datah,Level2=="Osmotic stress" | Level2=="Periplasmic Stress" | 
+datahstress<-filter(datah,Level2=="Osmotic stress" | Level2=="Periplasmic Stress" |
                     Level2=="Oxidative stress")
 datahstressg<-gather(datahstress, foldchange, foldvalue, "log2FoldChange_PVD",
                    "log2FoldChange_GB","log2FoldChange_BIS",
@@ -134,33 +129,28 @@ datahstressgp<-gather(datahstress, padj, padjvalue, "padj_PVD","padj_GB","padj_B
 datahstressg$padj<-datahstressgp$padj
 datahstressg$padjvalue<-datahstressgp$padjvalue
 
-datahstressg$signif<-ifelse(datahstressgp$padjvalue <=0.01, "**", 
+datahstressg$signif<-ifelse(datahstressgp$padjvalue <=0.01, "**",
                           ifelse(datahstressgp$padjvalue <=0.05, "*",
                                  ifelse("ns")))
 
-datahstressg$foldchange = factor(datahstressg$foldchange, 
+datahstressg$foldchange = factor(datahstressg$foldchange,
                                levels = c("log2FoldChange_PVD",
                                           "log2FoldChange_GB","log2FoldChange_BIS",
                                           "log2FoldChange_NAR","log2FoldChange_NIN"))
 
-
 data3gpfpe<-gather(datahstress, folderror, errorvalue, "lfcSE_PVD","lfcSE_GB",
                    "lfcSE_BIS","lfcSE_NAR","lfcSE_NIN")
 datahstressg$folderror<-data3gpfpe$folderror
 datahstressg$errorvalue<-data3gpfpe$errorvalue
 
-
 ggplot(datahstressg, aes(Level4, foldvalue,fill=foldchange))+
   geom_col(position = "dodge")+coord_flip()+
   facet_grid(Level2~., scales="free", space="free")+
-  geom_errorbar(aes(ymin=foldvalue-errorvalue, ymax=foldvalue+errorvalue), 
+  geom_errorbar(aes(ymin=foldvalue-errorvalue, ymax=foldvalue+errorvalue),
                 size=0.8,position = position_dodge(width=0.9), width = 0, color="grey60")+
- #   theme(strip.text.y = element_text(angle=0))+
   scale_fill_manual(values=c("#253494","#0868ac","#43a2ca","#7bccc4","#bae4bc"))+
   geom_text(aes(label=signif), position = position_dodge(width = 1), size=6)
 
-
-
 ggplot(datahstressg, aes(Level4, foldchange, label=signif,
                          fill=foldvalue,color=signif))+theme_minimal()+
   geom_tile(size=0.7)+facet_grid(Level2~., scales="free", space="free")+
@@ -170,11 +160,3 @@ ggplot(datahstressg, aes(Level4, foldchange, label=signif,
   theme(legend.position="bottom",
         plot.background = element_rect(fill = "transparent", color = NA),rect = element_rect(fill = "transparent"))+
   scale_color_manual(values=c("darkred","red","black"))
-
-
-
-
-
-#write.csv(datahdenit,"nitrogen.csv")
-#write.csv(datahphos,"phosphorus.csv")
-#write.csv(datahstress,"stress.csv")
diff --git a/TNCpaper_ScriptsData/README.md b/TNCpaper_ScriptsData/README.md
index 9ad1d58..2053eb3 100644
--- a/TNCpaper_ScriptsData/README.md
+++ b/TNCpaper_ScriptsData/README.md
@@ -5,11 +5,11 @@ This folder contains all the processed data files from the analysis described in
 
 #### Scripts
 - **Figure1_2.R** - makes the map of sites and PCA of environmental data.
-- **Figure3_S1.R** - 16S rRNA alpha- and beta-diversity and rarefaction plots.
-- **Figure4_S1_S2_S4.R** - compares taxonomy generated from RefSeq/metatranscriptomes and 16S rRNA data with heatmaps and UpSetR plots (4 and S2). Metatranscriptome taxonomy rarefaction and beta-diversity plots (S1 and S4).
+- **Figure3_S1_S2.R** - 16S rRNA percent abundance barplots (including controls), rarefaction plots, and alpha- and beta-diversity.
+- **Figure4_S2_S3_S5.R** - compares taxonomy generated from RefSeq/metatranscriptomes and 16S rRNA data with heatmaps and UpSetR plots (4 and S3). Metatranscriptome taxonomy rarefaction and beta-diversity plots (S2 and S5).
 - **Figure5_6.R** - Metaranscriptomic functional data at pathway levels, subset for stress response (N vs S), nitrogen and phosphorus metabolism (each site vs mean) with barplots.
-- **FigureS3_lefse.R** - 16S rRNA amplicon data at order level output from LefSe (Taxonomy/TNC_summaryLefSeResults.xlsx)
-- **FigureS5-S8.R** - Metaranscriptomic functional data at gene level for stress response, nitrogen metabolism, phosphorus metabolism (heatmaps)
+- **FigureS4_lefse.R** - 16S rRNA amplicon data at order level output from LefSe (Taxonomy/TNC_summaryLefSeResults.xlsx)
+- **FigureS6-S9.R** - Metaranscriptomic functional data at gene level for stress response, nitrogen metabolism, phosphorus metabolism (heatmaps)
 
 #### 1. Function/ (from [MetatranscriptomeAnalysis](/MetatranscriptomeAnalysis))
 - DeSeq2 results for level-2 SEED functional annnotation of metatranscriptomes (each station vs. mean)