Franction.Addembly.Processes_rna_dna.Rmd

---
title: "Quantify the contribution of each assembly processes for the RNA/DNA dataset"
author: "Jia Xiu"
email: "x.jia@rug.nl; xibeihenai@gmail.com"
date: "`r format(Sys.time(), '%d %B, %Y')`"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```


### Initiate libraries
```{r load_packages_01, message=FALSE}

rm(list=ls())

library(vegan)
library(ggplot2)
library(RColorBrewer)
library(reshape2) 
library(scales) 
library(ggforce)
library(ggpubr)
library(dplyr)

# plot theme
mytheme <- theme_bw()+ 
  theme(text = element_text(size = 12),
        strip.background = element_blank(),
        strip.placement = "outside",
        strip.text = element_text(face="bold"),
        legend.box.background = element_rect(),
        #legend.box.margin = margin(1, 1, 1, 1),
        legend.title=element_text(face = "bold"),
        legend.justification=c(1, 0.8), 
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank())
```


### all two datasets
```{r}

# names of two datasets 
data.set.names = c('DNA', 'RNA') 

# make a matrix to store the number of pairwise samples of each assembly process
df <- matrix(NA, nrow = 2, ncol = 5)
colnames(df) <- c("Variable.selection", "Homogeneous.selection", 
                   "Dispersal.limitation", "Homogenizing.dispersal", "Undominated.processes")
rownames(df) <- data.set.names
df

# a loop to calculte for the assembly processes of each dataset 
for (data.set.name in data.set.names) {
  
  cat("\nFor dataset:", data.set.name, "\n")
  
  # load datasets
    nti <- read.csv(paste('weighted_bNTI_', data.set.name, ".csv", sep=""), 
                    header=1, row.names=1, check.names=FALSE)
    rc <- read.csv(paste('RC-bray', data.set.name, "999.csv", sep="_"), 
                   header=1, row.names=1, check.names=FALSE)

  # expand weighted beta NTI
    if (data.set.name == "DNA") {
      row.names(nti) <- sub("DNA_", "", row.names(nti))
      colnames(nti) <- sub("DNA_", "", colnames(nti))} else {
        row.names(nti) <- sub("cDNA_", "", row.names(nti))
        colnames(nti) <- sub("cDNA_", "", colnames(nti))
      }
    
  nti <- as.matrix(nti)
  # Function to extract pairwise value from a n*n lower trianglar matrix
  nti <- data.frame(as.table(nti))[lower.tri(nti, diag = FALSE), ]
  cat("For no. of NTI pairs: Observed = Predict", (60*60-60)/2 == length(nti$Freq), "\n")
  cat("the mean beta-NTI is:", round(mean(na.omit(nti$Freq)),2), "\n")
  row.names(nti) <- paste(nti$Var1, nti$Var2, sep = "_")
  str(nti)
   
  # expand RC-bray
  if (data.set.name == "DNA") {
    row.names(rc) <- sub("DNA_", "", row.names(rc))
    colnames(rc) <- sub("DNA_", "", colnames(rc))} else {
      row.names(rc) <- sub("cDNA_", "", row.names(rc))
      colnames(rc) <- sub("cDNA_", "", colnames(rc))
    }
  
  rc <- as.matrix(rc)
  rc <- data.frame(as.table(rc))[lower.tri(rc, diag = FALSE), ]
  cat("For no. of RC pairs: Observed = Predict", (60*60-60)/2 == length(rc$Freq), "\n")
  cat("the mean RC-bray is:", round(mean(na.omit(rc$Freq)),2), "\n")
  row.names(rc) <- paste(rc$Var1, rc$Var2, sep = "_")
  str(rc)
  
  # Combine the beta-NTI values with RC-bray
  nti.rc <- merge(nti, rc, by=0, all=TRUE)  # merge by row names (by=0 or by="row.names")
  nti.rc <- data.frame(nti = nti.rc$Freq.x, rc = nti.rc$Freq.y, row.names = nti.rc$Row.names)
  
  # Invalid the value of RC-bray in which the beta-NTI larger than +2 or less than -2
  for (i in 1:nrow(nti.rc)) {
    if (nti.rc[i,1] > 2 | nti.rc[i,1] < -2) {
      nti.rc[i, 2] <- NA
    }
  }
  head(nti.rc)
  str(nti.rc)
  
  # Quantify each assembly process 
  if (data.set.name == 'DNA') { 
    i = 1 } else if (data.set.name == 'RNA') {
      i = 2
    } 
  
  # Variable selection
  Variable.selection <- nti.rc$nti > 2
  cat('variable  selection:', table(Variable.selection)['TRUE'], 'within', nrow(nti.rc), 'pairwise samples\n')
  df[i, 1] <- length(Variable.selection[Variable.selection == TRUE])
  
  # Homogenous selction
  Homogeneous.selection <- nti.rc$nti < -2
  cat('homogenous selection:', table(Homogeneous.selection)['TRUE'], 'within', nrow(nti.rc), 'pairwise samples\n')
  df[i, 2] <- table(Homogeneous.selection)['TRUE']#length(c[Homogeneous.selection == TRUE])
  
  # Dispersal limitation
  Dispersal.limitation <- na.omit(nti.rc$rc) > 0.95
  cat('dispersal limitation:', table(Dispersal.limitation)['TRUE'], 'within', nrow(nti.rc), 'pairwise samples\n')
  df[i, 3] <- length(Dispersal.limitation[Dispersal.limitation == TRUE])
  
  # Homogenizing dispersal
  Homogenizing.dispersal <- na.omit(nti.rc$rc) < -0.95
  cat('homogenizing dispersal:', table(Homogenizing.dispersal)['TRUE'], 'within', nrow(nti.rc), 'pairwise samples\n')
  df[i, 4] <- length(Homogenizing.dispersal[Homogenizing.dispersal == TRUE])
  
  # Undominated processes
  Undominated.processes <- na.omit(nti.rc$rc) <= 0.95 & na.omit(nti.rc$rc) >= -0.95
  cat('Undominated processes:', table(Undominated.processes)['TRUE'], 'within', nrow(nti.rc), 'pairwise samples\n')
  df[i, 5] <- length(Undominated.processes[Undominated.processes == TRUE])
}

# calculate relatice impacts of each process
df1 <- melt(df)
df1$value <- round(df1$value*100/nrow(nti.rc), 2)
group_info <- data.frame(row.names=row.names(df1$Var1), t(as.data.frame(strsplit(as.character(df1$Var1), "_"))))


df <- data.frame(Datasets = df1$Var1,
                 Processes = df1$Var2,
                 Value = df1$value,
                 row.names=row.names(df1))
df[df == 0] <- NA
df <- na.omit(df)
df$Processes <- factor(df$Processes, levels = c('Variable.selection', 'Homogeneous.selection', 'Dispersal.limitation', 'Homogenizing.dispersal', 'Undominated.processes'), labels = c('Variable selection', 'Homogeneous selection', 'Dispersal limitation', 'Homogenizing dispersal', 'Undominated processes'))
df$Datasets <- factor(df$Datasets, levels = c("RNA", "DNA"), labels = c("RNA", "DNA"))
head(df)
str(df)

# first way to generate a pie plot
# clockwise

# calculate the start and end angles for each pie
dat_pies <- left_join(df,
                      df %>% 
                        group_by(Datasets) %>%
                        summarize(value_total = sum(Value))) %>%
  group_by(Datasets) %>%
  mutate(end_angle = 2*pi*cumsum(Value)/value_total,      # ending angle for each pie slice
         start_angle = lag(end_angle, default = 0),   # starting angle for each pie slice
         mid_angle = 0.5*(start_angle + end_angle))   # middle of each pie slice, for the text label

rpie = 1 # pie radius
rlabel = 0.6 * rpie # radius of the labels; a number slightly larger than 0.5 seems to work better, 0.5 would place it exactly in the middle as the question asks for.

# draw the pies
(pie <- ggplot(dat_pies) + 
  geom_arc_bar(aes(x0 = 0, y0 = 0, r0 = 0, r = rpie, start = start_angle, end = end_angle, fill = Processes)) +
  geom_text(aes(x = rlabel*sin(mid_angle), y = rlabel*cos(mid_angle), label = paste(round(Value,2), "%")), 
            hjust = 0.5, vjust = 0.5, size=4) +
  coord_fixed() +
  scale_x_continuous(limits = c(-1, 1), name = "", breaks = NULL, labels = NULL) +
  scale_y_continuous(limits = c(-1, 1), name = "", breaks = NULL, labels = NULL) +
  facet_wrap(. ~ Datasets, ncol = 4)+
  scale_fill_manual(values = c("#00A087B2","#FFDB6D", "#DC0000B2", "#4DBBD5B2", "#7570B3")) +
  theme_minimal()+
  theme(text = element_text(size=12),
        axis.text.x=element_blank(),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        legend.box.background = element_rect(),
        legend.box.margin = margin(1,1,1,1),
        legend.title=element_text(face = "bold", size=12),
        panel.border = element_blank(),
        panel.grid=element_blank(),
        axis.ticks = element_blank()))

# ggsave("Fraction_assembly_processes_DNA_RNA_raw.pdf", width = 9, height = 3.5, units = "cm", pie, scale = 2)

```


## plotting based on sucessional year (5 groups)
```{r}
# names of datasets 
data.set.names = c('DNA', 'RNA')

# make a list to store the assembly process results of each dataset
datalist <- list()

# a loop to calculte for the assembly processes of each dataset 
for (data.set.name in data.set.names) {
  
  cat("\n\nFor dataset:", data.set.name, "\n")
  
  # load datasets
  nti <- read.csv(paste('weighted_bNTI_', data.set.name, ".csv", sep=""), 
                  header=1, row.names=1, check.names=FALSE)
  rc <- read.csv(paste('RC-bray', data.set.name, "999.csv", sep="_"), 
                 header=1, row.names=1, check.names=FALSE)
  
  # expand weighted beta NTI
  # colnames(nti) <- sub("cDNA_", "", colnames(nti)); row.names(nti) <- sub("cDNA_", "", row.names(nti))
  nti <- as.matrix(nti)
  # Function to extract pairwise value from a n*n lower trianglar matrix
  nti <- data.frame(as.table(nti))[lower.tri(nti, diag = FALSE), ]
  cat("NO. of NTI pairs, Is Observed = Predict?", (60*60-60)/2 == length(nti$Freq), "\n")
  cat("the mean beta-NTI is:", round(mean(na.omit(nti$Freq)),2), "\n")
  row.names(nti) <- paste(nti$Var1, nti$Var2, sep = "_")
  group <- data.frame(row.names=rownames(nti), t(as.data.frame(strsplit(as.character(row.names(nti)), "_"))))
  # keep pairs with same successional year
  nti <- data.frame(row.names = rownames(nti), X2 = group$X2, X6 = group$X6, Freq = nti$Freq)
  nti <- nti[which(nti$X2==nti$X6),]
  head(nti)
  str(nti)
  
  # expand RC-bray
  # colnames(rc) <- sub("cDNA_", "", colnames(rc)); row.names(rc) <- sub("cDNA_", "", row.names(rc))
  rc <- as.matrix(rc)
  rc <- data.frame(as.table(rc))[lower.tri(rc, diag = FALSE), ]
  cat("NO. of RC pairs, Is Observed = Predict?", (60*60-60)/2 == length(rc$Freq), "\n")
  cat("the mean RC-bray is:", round(mean(na.omit(rc$Freq)),2), "\n")
  row.names(rc) <- paste(rc$Var1, rc$Var2, sep = "_")
  group <- data.frame(row.names=rownames(rc), t(as.data.frame(strsplit(as.character(row.names(rc)), "_"))))
  # keep pairs with same successional year
  rc <- data.frame(row.names = rownames(rc), X2 = group$X2, X6 = group$X6, Freq = rc$Freq)
  rc <- rc[which(rc$X2==rc$X6),]
  head(rc)
  str(rc)
  
  # Combine the beta-NTI values with RC-bray
  nti.rc <- merge(nti, rc, by=0, all=TRUE)  # merge by row names (by=0 or by="row.names")
  nti.rc <- data.frame(Year = nti.rc$X2.x, nti = nti.rc$Freq.x, rc = nti.rc$Freq.y, row.names = nti.rc$Row.names)
  
  # Invalid the value of RC-bray in which the beta-NTI larger than +2 or less than -2
  for (i in 1:nrow(nti.rc)) {
    if (nti.rc[i,2] > 2 | nti.rc[i,2] < -2) {
      nti.rc[i, 3] <- NA
    }
  }
  
  nti.rc$Year <- factor(nti.rc$Year, levels=c("0", "10", "40", "70", "110"))
  head(nti.rc)
  str(nti.rc)
  
  
  # calculate the relative influence of each assembly processes for each successional stage -----
  # make a matrix to store the number of pairwise samples of each assembly process
  df <- matrix(NA, nrow = 5, ncol = 6)
  df[, 1] <- c("0", "10", "40", "70", "110")
  colnames(df) <- c("Year", "Variable.selection", "Homogeneous.selection", 
                    "Dispersal.limitation", "Homogenizing.dispersal", "Undominated.processes")
  
  for (year in levels(nti.rc$Year) ) {
    
    cat("\nfor", year, "years :\n")
    # Variable selection
    Variable.selection <- nti.rc[nti.rc$Year == year, ]$nti > 2
    cat("Number of variable  selection :",  table(Variable.selection)["TRUE"], 
        "within", nrow(nti.rc[nti.rc$Year == year, ]), "pairwise samples\n")
    df[df[,1] == year, 2] <- length(Variable.selection[Variable.selection == TRUE])
    
    # Homogeneous selction
    Homogeneous.selection <- nti.rc[nti.rc$Year == year, ]$nti < -2
    cat("Number of Homogeneous selection is:", table(Homogeneous.selection)["TRUE"], 
        "within", nrow(nti.rc[nti.rc$Year == year, ]), "pairwise samples\n")
    df[df[,1] == year, 3] <-  length(Homogeneous.selection[Homogeneous.selection == TRUE]) # table(Homogeneous.selection)["TRUE"] #
    
    # Dispersal limitation
    Dispersal.limitation <- na.omit(nti.rc[nti.rc$Year == year, ]$rc) > 0.95
    cat("Number of dispersal limitation is:", table(Dispersal.limitation)["TRUE"], 
        "within", nrow(nti.rc[nti.rc$Year == year, ]), "pairwise samples\n")
    df[df[,1] == year, 4] <- length(Dispersal.limitation[Dispersal.limitation == TRUE])
    
    # Homogenizing dispersal
    Homogenizing.dispersal <- na.omit(nti.rc[nti.rc$Year == year, ]$rc) < -0.95
    cat("Number of homogenizing dispersal is:", table(Homogenizing.dispersal)["TRUE"], 
        "within", nrow(nti.rc[nti.rc$Year == year, ]), "pairwise samples\n")
    df[df[,1] == year, 5] <- length(Homogenizing.dispersal[Homogenizing.dispersal == TRUE])
    
    # Undominated processes
    Undominated.processes <- na.omit(nti.rc[nti.rc$Year == year, ]$rc) <= 0.95 & 
      na.omit(nti.rc[nti.rc$Year == year, ]$rc) >= -0.95
    cat("Number of Undominated processes is:", table(Undominated.processes)["TRUE"], 
        "within", nrow(nti.rc[nti.rc$Year == year, ]), "pairwise samples\n")
    df[df[,1] == year, 6] <- length(Undominated.processes[Undominated.processes == TRUE])
  }
  datalist[[data.set.name]] <- df
}


# calculate relative impacts of each process 
str(datalist)
df <- do.call(rbind.data.frame, datalist)
df$Datasets <- factor(gsub(".{2}$", "", row.names(df)))
df1 <- melt(df, id=c("Datasets", "Year"))
df1[is.na(df1)] <- 0
df1$value <- as.numeric(df1$value)
cat("should get", 66*5*2, "pairwise comparision index,\nactually got", sum(df1$value), "pairwise comparision index")
df1[is.na(df1)] <- 0
df1$value <- round(df1$value*100/66, 2)
df1[,4][df1[,4] == 0] <- NA; df1 <- na.omit(df1)

df1$Datasets <- factor(df1$Datasets, levels = c("RNA", "DNA"), labels = c("RNA", "DNA"))

df1$Processes <- factor(df1$variable, levels = c("Variable.selection", "Homogeneous.selection", "Dispersal.limitation", "Homogenizing.dispersal", "Undominated.processes"),  labels = c("Variable selection", "Homogeneous selection", "Dispersal limitation",  "Homogenizing dispersal", "Undominated processes"))

df1$Year <- factor(df1$Year, levels = c("0", "10", "40", "70", "110"))

head(df1)
str(df1)

# stacked-bar plot
(f1 <- ggplot(df1, aes(x=Year, y=value, fill=Processes)) + #, label=round(value,2)
    facet_grid(~Datasets) +
    geom_bar(stat="identity", width=0.8, colour = "black") +
    scale_y_continuous(expand = c(0, 0), limits = c(0,105))+
    scale_fill_manual(values = c("#00A087B2","#FFDB6D", "#4DBBD5B2", "#7570B3")) +
    labs(x="Stage of succession (Years)", y="Relative Influence (%)", title=" ") +
    mytheme)

# ggsave("Assembly_processes_Year.png", width = 12, height = 6, units = "cm", f1, scale = 1.5, dpi = 300)

(p <- ggarrange(pie, f1, labels = c("(a)", "(b)"), 
                common.legend = TRUE, legend = "right", ncol = 1, nrow = 2))

# ggsave("Assembly_processes_pie_year_raw.pdf", width = 10, height = 12, units = "cm", p, scale = 1.5)


```

## Plotting based on sampling month (4 groups)
```{r}

# dataset names
data.set.names = c('DNA', 'RNA')

# make a list to store the assembly process results of each dataset
datalist <- list()

# a loop to calculte for the assembly processes of each dataset 
for (data.set.name in data.set.names) {
  
  cat("\n\nFor dataset:", data.set.name, "\n")
  
  # load datasets
  nti <- read.csv(paste('weighted_bNTI_', data.set.name, ".csv", sep=""), 
                  header=1, row.names=1, check.names=FALSE)
  rc <- read.csv(paste('RC-bray', data.set.name, "999.csv", sep="_"), 
                 header=1, row.names=1, check.names=FALSE)
  nti <- as.matrix(nti)
  # Function to extract pairwise value from a n*n lower trianglar matrix
  nti <- data.frame(as.table(nti))[lower.tri(nti, diag = FALSE), ]
  cat("NO. of NTI pairs, Is Observed = Predict?", (60*60-60)/2 == length(nti$Freq), "\n")
  cat("the mean beta-NTI is:", round(mean(na.omit(nti$Freq)),2), "\n")
  row.names(nti) <- paste(nti$Var1, nti$Var2, sep = "_")
  group <- data.frame(row.names=rownames(nti), t(as.data.frame(strsplit(as.character(row.names(nti)), "_"))))
  
  # keep pairs with same successional year
  nti <- data.frame(row.names = rownames(nti), X3 = group$X3, X7 = group$X7, Freq = nti$Freq)
  nti <- nti[which(nti$X3==nti$X7),]
  head(nti)
  str(nti)
  
  # expand RC-bray
  rc <- as.matrix(rc)
  rc <- data.frame(as.table(rc))[lower.tri(rc, diag = FALSE), ]
  cat("NO. of RC pairs, Is Observed = Predict?", (60*60-60)/2 == length(rc$Freq), "\n")
  cat("the mean RC-bray is:", round(mean(na.omit(rc$Freq)),2), "\n")
  row.names(rc) <- paste(rc$Var1, rc$Var2, sep = "_")
  group <- data.frame(row.names=rownames(rc), t(as.data.frame(strsplit(as.character(row.names(rc)), "_"))))
  # keep pairs with same successional year
  rc <- data.frame(row.names = rownames(rc), X3 = group$X3, X7 = group$X7, Freq = rc$Freq)
  rc <- rc[which(rc$X3==rc$X7),]
  head(rc)
  str(rc)
  
  # Combine the beta-NTI values with RC-bray
  nti.rc <- merge(nti, rc, by=0, all=TRUE)  # merge by row names (by=0 or by="row.names")
  nti.rc <- data.frame(Month = nti.rc$X3.x, nti = nti.rc$Freq.x, rc = nti.rc$Freq.y, row.names = nti.rc$Row.names)
  
  # Invalid the value of RC-bray in which the beta-NTI larger than +2 or less than -2
  for (i in 1:nrow(nti.rc)) {
    if (nti.rc[i,2] > 2 | nti.rc[i,2] < -2) {
      nti.rc[i, 3] <- NA
    }
  }
  
  nti.rc$Month <- factor(nti.rc$Month, levels=c("5", "7", "9", "11"))
  head(nti.rc)
  str(nti.rc)
  
  
  # calculate the relative influence of each assembly processes for each successional stage -----
  # make a matrix to store the number of pairwise samples of each assembly process
  df <- matrix(NA, nrow = 4, ncol = 6)
  df[, 1] <- c("5", "7", "9", "11")
  colnames(df) <- c("Month", "Variable.selection", "Homogeneous.selection", 
                    "Dispersal.limitation", "Homogenizing.dispersal", "Undominated.processes")
  
  for (Month in levels(nti.rc$Month) ) {
    
    cat("\nfor", Month, "Months :\n")
    # Variable selection
    Variable.selection <- nti.rc[nti.rc$Month == Month, ]$nti > 2
    cat("Number of variable  selection :",  table(Variable.selection)["TRUE"], 
        "within", nrow(nti.rc[nti.rc$Month == Month, ]), "pairwise samples\n")
    df[df[,1] == Month, 2] <- length(Variable.selection[Variable.selection == TRUE])
    
    # Homogeneous selction
    Homogeneous.selection <- nti.rc[nti.rc$Month == Month, ]$nti < -2
    cat("Number of Homogeneous selection is:", table(Homogeneous.selection)["TRUE"], 
        "within", nrow(nti.rc[nti.rc$Month == Month, ]), "pairwise samples\n")
    df[df[,1] == Month, 3] <-  length(Homogeneous.selection[Homogeneous.selection == TRUE]) # table(Homogeneous.selection)["TRUE"] #
    
    # Dispersal limitation
    Dispersal.limitation <- na.omit(nti.rc[nti.rc$Month == Month, ]$rc) > 0.95
    cat("Number of dispersal limitation is:", table(Dispersal.limitation)["TRUE"], 
        "within", nrow(nti.rc[nti.rc$Month == Month, ]), "pairwise samples\n")
    df[df[,1] == Month, 4] <- length(Dispersal.limitation[Dispersal.limitation == TRUE])
    
    # Homogenizing dispersal
    Homogenizing.dispersal <- na.omit(nti.rc[nti.rc$Month == Month, ]$rc) < -0.95
    cat("Number of homogenizing dispersal is:", table(Homogenizing.dispersal)["TRUE"], 
        "within", nrow(nti.rc[nti.rc$Month == Month, ]), "pairwise samples\n")
    df[df[,1] == Month, 5] <- length(Homogenizing.dispersal[Homogenizing.dispersal == TRUE])
    
    # Undominated processes
    Undominated.processes <- na.omit(nti.rc[nti.rc$Month == Month, ]$rc) <= 0.95 & 
      na.omit(nti.rc[nti.rc$Month == Month, ]$rc) >= -0.95
    cat("Number of Undominated processes is:", table(Undominated.processes)["TRUE"], 
        "within", nrow(nti.rc[nti.rc$Month == Month, ]), "pairwise samples\n")
    df[df[,1] == Month, 6] <- length(Undominated.processes[Undominated.processes == TRUE])
  }
  datalist[[data.set.name]] <- df
}


# calculate relative impacts of each process
str(datalist)
df <- do.call(rbind.data.frame, datalist)
df$Datasets <- factor(gsub(".{2}$", "", row.names(df)))
df1 <- melt(df, id=c("Datasets", "Month"))
df1[is.na(df1)] <- 0
df1$value <- as.numeric(df1$value)
cat("should get", 105*4*2, "pairwise comparision index,\nactually got", sum(df1$value), "pairwise comparision index")

df1[is.na(df1)] <- 0
df1$value <- round(df1$value*100/105, 2)
df1[,4][df1[,4] == 0] <- NA; df1 <- na.omit(df1)

df1$Datasets <- factor(df1$Datasets, levels = c("RNA", "DNA"), labels = c("RNA", "DNA"))

df1$Processes <- factor(df1$variable, levels = c("Variable.selection", "Homogeneous.selection", "Dispersal.limitation",  "Homogenizing.dispersal", "Undominated.processes"), labels = c("Variable selection", "Homogeneous selection", "Dispersal limitation", "Homogenizing dispersal", "Undominated processes"))

df1$Month <- factor(df1$Month, levels=c("5", "7", "9", "11"), 
                    labels=c("May", "Jul", "Sep", "Nov"))
head(df)
str(df1)

# stacked-bar plot
(f2 <- ggplot(df1, aes(x=Month, y=value, fill=Processes)) +
    facet_grid(~Datasets) +
    geom_bar(stat="identity", width=0.64, colour = "black") +
    scale_y_continuous(expand = c(0, 0), limits = c(0,105))+
    scale_fill_manual(values = c("#00A087B2","#FFDB6D", "#DC0000B2", "#4DBBD5B2", "#7570B3")) +
    labs(x="Sampling Month", y="Relative Influence (%)", title="") +
    mytheme)

# ggsave("Assembly_processes_Month.jpg", width = 10, height = 6, units = "cm", f2, scale = 1.5, dpi = 300)
# ggsave("Assembly_processes_pie_year_raw.pdf", width = 10, height = 12, units = "cm", p, scale = 1.5)

# combine plots
(fp <- ggarrange(f1, f2, labels = c("A", "B"),common.legend = TRUE, legend = "right", ncol = 1, nrow = 2))

# ggsave("Assembly_processes_Year_Month_raw.pdf", width = 12, height = 12, units = "cm", fp, scale = 1.5)


```

## Session Info

```{r}
sessionInfo()
```