true.Rmd

---
output: html_document
---

```{r}
library(readxl)
library(tidyverse)
library(openxlsx)
library(ggpubr)
library(maps)
library(broom)
library(reshape2)
library(pheatmap)
```

# World map

```{r, message=FALSE}
data <- read.xlsx("./Verrucomicrobia全球分布数据.xlsx", sheet = 1)
df <- data
color_scheme <- c("#AFD888", "#63ADD0", "#FF4540", "#D836C4", "#EE6B9E", "#FF8F40", "#FFFA73", "#00A779")

# 绘制地图
world_map <- map_data("world")
ggplot() +
  geom_polygon(data = world_map, 
               aes(x = long, y = lat, group = group), 
               fill = "lightgray", color = "gray") +
  geom_point(data = df, aes(x = longitude_deg,
                            y = latitude_deg, size = `cell%`, 
                            fill = interaction(empo_1, empo_2)), 
             alpha = 0.5, shape = 21, 
             color = 'grey40', stroke = 0.5) +
  scale_size_continuous(name = "Node size for relative abundance (%)", guide = guide_legend(override.aes = list(color = "black",fill="black"))) +
  scale_fill_manual(values = color_scheme, 
                  name = "Node color for sample environmental type", guide = guide_legend(override.aes = list(size = 6))) +
  labs(x = "longitude", y = "latitude") +
  theme_classic() +
  theme(
    plot.title = element_text(size = 18, face = "bold"),  # 设置标题的大小和字体样式
    axis.title = element_text(size = 14),  # 设置坐标轴标签的大小
    axis.text = element_text(size = 12),  # 设置坐标轴刻度标签的大小
    legend.title = element_text(size = 14),  # 设置图例标题的大小
    legend.text = element_text(size = 12),  # 设置图例标签的大小
    legend.key.size = unit(1.5, "lines")  # 设置图例的大小
  ) +
  NULL
# ggsave("world.pdf",units = "in",width = 17.27, height = 6.8)
```

# P-cell

```{r}
read_data <- function(n) {
  data <- read.xlsx("./Verrucomicrobia全球分布数据.xlsx", sheet = n,colNames = FALSE, rowNames = FALSE)
  k <- data.frame(t(data), header = FALSE)
  k <- cbind(number = 0:(nrow(k) - 1), k)
  colnames(k) <- k[1, ]
  rownames(k) <- k[, 1]
  k <- k[-1, ]
  k <- k[, -1]

  
  return (k)
}

```


```{r}
p_cell <- read_data(2)
p_cell[, 4:ncol(p_cell)] <- p_cell[, 4:ncol(p_cell)] %>% mutate_if(is.character, as.numeric)
p_cell
```


```{r}
ver <- as.integer(p_cell$`d:Bacteria, p:"Verrucomicrobia"`)
# total <- as.integer(p_cell$`p-all`) - as.integer(p_cell$Un)
# pre <- ver / total
new <- cbind(ver, p_cell)
new <- new %>% filter(empo_1 != 'ALL')
```


```{r}
head(new)
```

```{r}
df=new[,1:10]
```

```{r}
library(ggplot2)
library(dplyr)
library(purrr)
library(broom)
```


```{r}
grouped_data <- new %>%
  group_by(empo_1, empo_2, empo_3) %>%
  summarize(mean_ver = mean(ver, na.rm = TRUE))

# 然后，我们可以使用ggplot2创建堆叠条形图
ggplot(grouped_data, aes(x=factor(""), y=mean_ver, fill=interaction(empo_1, empo_2, empo_3))) +
  geom_bar(stat="identity", width=1) +
  theme_minimal() +
  xlab("Categories") +
  ylab("Average of ver") +
  theme(legend.position="right")
```

```{r}
# Load the necessary libraries
library(ggplot2)
library(readxl)

# Load the data
data <- read_excel("Verrucomicrobia全球分布数据.xlsx", sheet = "p-cell")

# Clean the data: remove the first two rows and convert to numeric
data <- data[-c(1,2,3),]
data <- sapply(data, as.numeric)

# Calculate the mean and standard deviation
mean_values <- colMeans(data, na.rm = TRUE)
std_values <- apply(data, 2, sd, na.rm = TRUE)

# Plot the bar graph with error bars
df <- data.frame(Habitat = names(mean_values), Mean = mean_values, SD = std_values)
ggplot(df, aes(x = Habitat, y = Mean)) +
  geom_bar(stat = "identity", fill = "skyblue", width = 0.7) +
  geom_errorbar(aes(ymin = (Mean - SD)/100, ymax = (Mean + SD)/100), width = 0.2) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  labs(title = "Mean Values of Verrucomicrobia Across Different Habitats with Error Bars",
       x = "Habitat",
       y = "Mean Value")

# Correlation analysis
correlation_matrix <- cor(data, use = "pairwise.complete.obs", method = "pearson")
print(correlation_matrix)

```

```{r}
data <- read_excel("Verrucomicrobia全球分布数据.xlsx", sheet = "p-cell")
data
```

# f-cell

```{r}
# Load the data
# df <- read_excel("f-cell.xlsx")
df <- read_excel("Verrucomicrobia全球分布数据.xlsx", sheet = "f-cell")

# Filter for rows containing 'p:"Verrucomicrobia"' and remove 'ALL' column
verrucomicrobia_df <- df %>% 
  filter(grepl('p:"Verrucomicrobia"', empo_1)) %>%
  select(-ALL)

# Convert all columns except for 'empo_1' to numeric
for (col in colnames(verrucomicrobia_df)[-1]) {
  verrucomicrobia_df[[col]] <- as.numeric(verrucomicrobia_df[[col]])
}

# Melt the dataframe for easier plotting
verrucomicrobia_melted_df <- verrucomicrobia_df %>%
  gather(key = "habitat", value = "count", -empo_1) %>%
  mutate(habitat = gsub("\\..*$", "", habitat))

# Compute the percentage
verrucomicrobia_melted_df <- verrucomicrobia_melted_df %>%
  group_by(habitat) %>%
  mutate(percentage = count / sum(count) * 100)


# ggplot(verrucomicrobia_melted_df, aes(x = habitat, y = percentage, fill = empo_1)) +
#   geom_bar(stat = "identity", position = "dodge") +
#   theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
#   labs(x = "Habitat", y = "Percentage (%)", fill = "Bacteria", title = "Distribution of 'p:Verrucomicrobia' in different habitats") +
#   guides(fill = guide_legend(ncol = 1, override.aes = list(size = 4)))

# Define a vector of colors
my_colors <- c("#AFD888", "#63ADD0", "#FF4540", "#D836C4")

# Define a vector of labels
my_labels <- c("f:Opitutaceae", "f:Puniceicoccaceae", "f:Rubritaleaceae", "f:Verrucomicrobiaceae")


# Define a dodge width
my_width <- 0.8

# Define a bar width
my_bar_width <- 0.7

# Use expand in scale_x_discrete()
ggplot(verrucomicrobia_melted_df, aes(x = habitat, y = percentage, fill = empo_1)) +
  geom_bar(stat = "identity", position = position_dodge(width = my_width), color = "black", width = my_bar_width) + # Use different widths for position and geom
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(x = "Habitat", y = "Percentage (%)", fill = "Bacteria", title = "Distribution of 'p:Verrucomicrobia' in different habitats") +
  guides(fill = guide_legend(ncol = 1, override.aes = list(size = 4))) +
  scale_fill_manual(values = my_colors, labels = my_labels) +
  theme_classic() +
  scale_y_discrete(expand = c(0, 0))
```

# f-otu


```{r}
df <- read_excel("Verrucomicrobia全球分布数据.xlsx", sheet = "f-otu",skip=2)
df=
# Filter for rows containing 'p:"Verrucomicrobia"' and remove 'ALL' column
verrucomicrobia_df <- df %>% 
  filter(grepl('p:"Verrucomicrobia"', empo_3)) %>%
  select(-ALL)

# Convert all columns except for 'empo_1' to numeric
for (col in colnames(verrucomicrobia_df)[-1]) {
  verrucomicrobia_df[[col]] <- as.numeric(verrucomicrobia_df[[col]])
}

# Melt the dataframe for easier plotting
verrucomicrobia_melted_df <- verrucomicrobia_df %>%
  gather(key = "habitat", value = "count", -empo_3) %>%
  mutate(habitat = gsub("\\..*$", "", habitat))

# Compute the percentage
verrucomicrobia_melted_df <- verrucomicrobia_melted_df %>%
  group_by(habitat) %>%
  mutate(percentage = count / sum(count) * 100)

# Define a vector of colors
my_colors <- c("#AFD888", "#63ADD0", "#FF4540", "#D836C4")

# Define a vector of labels
my_labels <- c("f:Opitutaceae", "f:Puniceicoccaceae", "f:Rubritaleaceae", "f:Verrucomicrobiaceae")


my_width <- 0.8

# Define a bar width
my_bar_width <- 0.7

ggplot(verrucomicrobia_melted_df, aes(x = habitat, y = percentage, fill = empo_3)) +
  geom_bar(stat = "identity", position = position_dodge(width = my_width), color = "black", width = my_bar_width) +
  
  labs(x = "Habitat", y = "Percentage (%)", fill = "Bacteria", title = "Distribution of 'p:Verrucomicrobia' in different habitats") +
  theme(axis.text.x = element_text(angle = 60, hjust = 1)) +
  guides(fill = guide_legend(ncol = 1, override.aes = list(size = 4))) +
  scale_fill_manual(values = my_colors, labels = my_labels) +
  theme_classic() +
  scale_y_discrete(expand = c(0, 0))+
  theme(axis.text.x = element_text(angle = 60, hjust = 1))
ggsave("f-otu.pdf",units = "in",width = 12, height = 5)

```

# Heat-maps f-cell

```{r}
# Load the data
data <- read_excel("Verrucomicrobia全球分布数据.xlsx", sheet = "f-otu",skip=2)

# Remove the meta-data rows
# data <- data[-c(1:3), ]
data <- data[-1, ]
data <- data %>% select(-ALL)

# Convert the data to numeric
data[, -1] <- sapply(data[, -1], as.numeric)

# Filter rows where empo_1 contains 'Verrucomicrobia'
verrucomicrobia_data <- data[grep("Verrucomicrobia", data$empo_3), ]
verrucomicrobia_data[1]=c('f:Opitutaceae', 'f:Puniceicoccaceae','f:Rubritaleaceae','f:Verrucomicrobiaceae')
# Calculate the proportion for each environment

proportions <- sapply(verrucomicrobia_data[-1], function(x) x / sum(x))
rownames(proportions) <- c('f:Opitutaceae', 'f:Puniceicoccaceae','f:Rubritaleaceae','f:Verrucomicrobiaceae')
# Create the heatmap
heatmap <- pheatmap(proportions)


pdf("heatmap.pdf")
heatmap
dev.off()

```

## g-cell

```{r}
data <- read_excel("Verrucomicrobia全球分布数据.xlsx", sheet = "g-cell",skip=2)

# Remove the meta-data rows
data <- data[-1, ]
data <- data %>% select(-ALL)
sample=c("g:Alterococcus","g:Opitutus","g:Cerasicoccus","g:Coraliomargarita",'g:Pelagicoccus','g:Puniceicoccus','g:Spartobacteria_genera_incertae_sedis','g:Terrimicrobium','g:Xiphinematobacter','g:Limisphaera',"g:Subdivision3_genera_incertae_sedis","g:Subdivision5_genera_incertae_sedis","g:Rubritalea","g:Akkermansia","g:Haloferula","g:Luteolibacter","g:Persicirhabdus","g:Prosthecobacter","g:Roseibacillus","g:Roseimicrobium","g:Verrucomicrobium")
# Convert the data to numeric
data[, -1] <- sapply(data[, -1], as.numeric)

# Filter rows where empo_1 contains 'Verrucomicrobia'
verrucomicrobia_data <- data[grep('p:"Verrucomicrobia"', data$empo_3), ]
verrucomicrobia_data[1]=sample
# Calculate the proportion for each environment

proportions <- sapply(verrucomicrobia_data[-1], function(x) x / sum(x))
rownames(proportions) <- sample
# Create the heatmap
heatmap <- pheatmap(proportions)
pdf("g-cell-heatmap.pdf")
heatmap
dev.off()
```
```{r}
data <- read_excel("Verrucomicrobia全球分布数据.xlsx", sheet = "g-otu",skip=2)

# Remove the meta-data rows
data <- data[-1, ]
data <- data %>% select(-ALL)
sample=c("g:Alterococcus","g:Opitutus","g:Cerasicoccus","g:Coraliomargarita",'g:Pelagicoccus','g:Puniceicoccus','g:Spartobacteria_genera_incertae_sedis','g:Terrimicrobium','g:Xiphinematobacter','g:Limisphaera',"g:Subdivision3_genera_incertae_sedis","g:Subdivision5_genera_incertae_sedis","g:Rubritalea","g:Akkermansia","g:Haloferula","g:Luteolibacter","g:Persicirhabdus","g:Prosthecobacter","g:Roseibacillus","g:Roseimicrobium","g:Verrucomicrobium")
# Convert the data to numeric
data[, -1] <- sapply(data[, -1], as.numeric)

# Filter rows where empo_1 contains 'Verrucomicrobia'
verrucomicrobia_data <- data[grep('p:"Verrucomicrobia"', data$empo_3), ]
verrucomicrobia_data[1]=sample
# Calculate the proportion for each environment

proportions <- sapply(verrucomicrobia_data[-1], function(x) x / sum(x))
rownames(proportions) <- sample
# Create the heatmap
heatmap <- pheatmap(proportions)
pdf("g-otu-heatmap.pdf")
heatmap
dev.off()
```


```{r}

```