This script is for microbial co-ocurrence networks in 3 different enviroments.
library(phyloseq)
library(ggplot2)
library(scales)
library(RColorBrewer)
library(ggpubr)
library(readr)
library(patchwork)
library(plyr)
library(tidyverse)
library(cluster)
library(factoextra)
library(ggcorrplot)
library(corrplot)raw_biom <- import_biom("~/dc_workshop/pocean_metagenome/data/pacific_ocean.biom")
#Give name to taxonomic categories
raw_biom@tax_table@.Data<-substring(raw_biom@tax_table@.Data,4)
colnames(raw_biom@tax_table@.Data)<- c("Kingdom","Phylum", "Class", "Order","Family", "Genus", "Species")
# Read metadata
metadatos<-read.csv("~/dc_workshop/pocean_metagenome/data/metadata_sampling.csv") 
row.names(metadatos) <- metadatos$Samples
metadatos$Zone<-factor(metadatos$Zone, levels = c("Epipelagic", "Mesopelagic", "Abyssopelagic"))
# Generate otu_table
otu<-otu_table(raw_biom@otu_table@.Data, taxa_are_rows = TRUE) 
colnames(otu) <-metadatos$Samples
# Generate taxonomy table
tax<-tax_table(raw_biom@tax_table@.Data)
# Generate metadata table
metad<-sample_data(metadatos)
# It is the complete phyloseq object with the 3 previous tables
global<-phyloseq(otu, tax, metad)
# Put the complete species names in the column for Species (instead of the epithet only)
global@tax_table@.Data[,7]<-paste(global@tax_table@.Data[,6], global@tax_table@.Data[,7], sep= "_")Make the object
# Agglomerate at Phylum level
phylum_global<- tax_glom(global, "Phylum")
#Relative abundance
relative_phylum_global<- transform_sample_counts(phylum_global, function(x) x / sum(x) ) 
rel_phyl_global_df<-psmelt(relative_phylum_global) # convert to data frame
rel_phyl_global_df$Phylum<- as.character(rel_phyl_global_df$Phylum) # convert to character type
# Agglomerate phylum with less than 0.01 abundance
rel_phyl_global_df$Phylum[rel_phyl_global_df$Abundance < 0.01] <- "Fila < 1% de abundancia" 
rel_phyl_global_df$Phylum<- as.factor(rel_phyl_global_df$Phylum) # convert to factor type
rel_phyl_global_df$Samples<- factor(rel_phyl_global_df$Samples, levels = c("SRR5788415", "SRR5788416", "SRR5788417", "SRR5788420", "SRR5788421", "SRR5788422"))  # Put Samples names in levelsMake the abundance plot by samples.
phylum_colors<- brewer.pal(length(levels(rel_phyl_global_df$Phylum)),"Dark2") 
global_rel_phyl_plot<-ggplot(rel_phyl_global_df, aes(x=Samples, y=Abundance, fill=Phylum)) +
  geom_bar(aes(), stat="identity", position="stack") +
  scale_fill_manual(values=phylum_colors) + 
  labs(x= "Samples", y = "Relative abundance") +
  facet_grid(~Zone, scales = "free_x")+
  ggtitle("Relative abundance by Zones")+
  theme(axis.title = element_text(size = 14), axis.text.x = element_text(size = 8), plot.title = element_text(size = 20))
ggsave("~/dc_workshop/pocean_metagenome/images/abund_rel_phyl_global.svg", plot = global_rel_phyl_plot, width = 6.5, height = 3.65, units = "in", dpi = 400)
global_rel_phyl_plotMake the objects
# Absolute abundance
abs_phyl_global_df<-psmelt(phylum_global) # convert to data frame
# Filter OTU table
abs_phyl_global_df_E <- abs_phyl_global_df %>%
  filter(abs_phyl_global_df$Zone == "Epipelagic") %>%
  select(OTU, Sample, Abundance) %>%
  pivot_wider(names_from = OTU, values_from = Abundance)
abs_phyl_global_df_M <- abs_phyl_global_df %>%
  filter(abs_phyl_global_df$Zone == "Mesopelagic")%>%
  select(OTU, Sample, Abundance) %>%
  pivot_wider(names_from = OTU, values_from = Abundance)
abs_phyl_global_df_A <- abs_phyl_global_df %>%
  filter(abs_phyl_global_df$Zone == "Abyssopelagic")%>%
  select(OTU, Sample, Abundance) %>%
  pivot_wider(names_from = OTU, values_from = Abundance)Create the otutable directory
mkdir otutableSave otu tables without row names.
#write.csv(abs_phyl_global_df_E,paste("~/dc_workshop/pocean_metagenome/Overrepresentation_analysis/otutable/","Epipelagic",".csv",sep=""), row.names= FALSE)
#write.csv(abs_phyl_global_df_M,paste("~/dc_workshop/pocean_metagenome/Overrepresentation_analysis/otutable/","Mesopelagic",".csv",sep=""), row.names= FALSE)
#write.csv(abs_phyl_global_df_A,paste("~/dc_workshop/pocean_metagenome/Overrepresentation_analysis/otutable/","Abyssopelagic",".csv",sep=""), row.names= FALSE)Calculate the ordinate objet with Jaccard distance.
meta.ord <- ordinate(physeq = relative_phylum_global, method = "NMDS", 
                     distance = "jaccard")## Run 0 stress 0 
## Run 1 stress 0 
## ... Procrustes: rmse 0.1116033  max resid 0.1575744 
## Run 2 stress 0 
## ... Procrustes: rmse 0.09833171  max resid 0.1393525 
## Run 3 stress 9.802441e-05 
## ... Procrustes: rmse 0.1344138  max resid 0.2384963 
## Run 4 stress 9.966834e-05 
## ... Procrustes: rmse 0.1466619  max resid 0.249883 
## Run 5 stress 0.1557467 
## Run 6 stress 0 
## ... Procrustes: rmse 0.1343425  max resid 0.1768282 
## Run 7 stress 0.0002896358 
## ... Procrustes: rmse 0.1709236  max resid 0.3062635 
## Run 8 stress 0.1557467 
## Run 9 stress 0 
## ... Procrustes: rmse 0.05797811  max resid 0.09812407 
## Run 10 stress 0.1557467 
## Run 11 stress 0 
## ... Procrustes: rmse 0.09930643  max resid 0.1464909 
## Run 12 stress 0 
## ... Procrustes: rmse 0.1150026  max resid 0.1439802 
## Run 13 stress 9.84357e-05 
## ... Procrustes: rmse 0.1054029  max resid 0.2141803 
## Run 14 stress 0 
## ... Procrustes: rmse 0.1734049  max resid 0.227205 
## Run 15 stress 0.2761342 
## Run 16 stress 8.980857e-05 
## ... Procrustes: rmse 0.1599885  max resid 0.2219469 
## Run 17 stress 0 
## ... Procrustes: rmse 0.1090451  max resid 0.1326152 
## Run 18 stress 9.992838e-05 
## ... Procrustes: rmse 0.1050076  max resid 0.210506 
## Run 19 stress 0 
## ... Procrustes: rmse 0.1207826  max resid 0.1558563 
## Run 20 stress 9.986879e-05 
## ... Procrustes: rmse 0.1111995  max resid 0.2149204 
## *** No convergence -- monoMDS stopping criteria:
##      1: no. of iterations >= maxit
##     15: stress < smin
##      1: stress ratio > sratmax
##      3: scale factor of the gradient < sfgrminPlot the clusters.
rel_phylum_cluster_Jaccard <- plot_ordination(physeq = relative_phylum_global, ordination = meta.ord, color="Zone")+
  geom_text(aes(label=Samples), size=3)
ggsave("cluster_jaccard.svg", plot = rel_phylum_cluster_Jaccard, width = 6.5, height = 3.65, units = "in", dpi = 400)
rel_phylum_cluster_JaccardCompare clusters with different methods.
dist = "jaccard" # especify distance
ord_meths = c("DCA", "CCA", "RDA", "NMDS", "MDS", "PCoA") #vector of methods names
plist = llply(as.list(ord_meths), function(i, physeq, dist){
  ordi = ordinate(physeq, method=i, distance=dist)
  plot_ordination(physeq, ordi, "Samples", color="Zone")
}, physeq=relative_phylum_global, dist)## Run 0 stress 0 
## Run 1 stress 0 
## ... Procrustes: rmse 0.09862521  max resid 0.1270772 
## Run 2 stress 0 
## ... Procrustes: rmse 0.09227392  max resid 0.1138774 
## Run 3 stress 0 
## ... Procrustes: rmse 0.1242844  max resid 0.197133 
## Run 4 stress 9.443781e-05 
## ... Procrustes: rmse 0.1147886  max resid 0.2212059 
## Run 5 stress 0 
## ... Procrustes: rmse 0.1249011  max resid 0.172647 
## Run 6 stress 0.2761337 
## Run 7 stress 0 
## ... Procrustes: rmse 0.1212303  max resid 0.1632644 
## Run 8 stress 0 
## ... Procrustes: rmse 0.1539816  max resid 0.2022742 
## Run 9 stress 0.2269841 
## Run 10 stress 0.001590993 
## Run 11 stress 1.992066e-05 
## ... Procrustes: rmse 0.1999354  max resid 0.2476482 
## Run 12 stress 9.915231e-05 
## ... Procrustes: rmse 0.1371223  max resid 0.23079 
## Run 13 stress 0 
## ... Procrustes: rmse 0.08179261  max resid 0.09947291 
## Run 14 stress 0.0002141331 
## ... Procrustes: rmse 0.1832949  max resid 0.2824196 
## Run 15 stress 0.1557467 
## Run 16 stress 0 
## ... Procrustes: rmse 0.09299676  max resid 0.1300814 
## Run 17 stress 1.506566e-05 
## ... Procrustes: rmse 0.1659642  max resid 0.2325868 
## Run 18 stress 9.762141e-05 
## ... Procrustes: rmse 0.1572659  max resid 0.2592858 
## Run 19 stress 1.029677e-05 
## ... Procrustes: rmse 0.1104634  max resid 0.2153393 
## Run 20 stress 0.2181459 
## *** No convergence -- monoMDS stopping criteria:
##      2: no. of iterations >= maxit
##     14: stress < smin
##      2: stress ratio > sratmax
##      2: scale factor of the gradient < sfgrminnames(plist) <- ord_meths
#Dataframe for plots
pdataframe = ldply(plist, function(x){
  df = x$data[, 1:2]
  colnames(df) = c("Axis_1", "Axis_2")
  return(cbind(df, x$data))
})
names(pdataframe)[1] = "method"
#Plots with all methods
p = ggplot(pdataframe, aes(Axis_1, Axis_2, color=Zone, shape=Zone))
p = p + geom_point(size=2) + geom_text(aes(label=Samples), size=2,vjust = "inward", hjust = "inward")
p = p + facet_wrap(~method, scales="free")
p = p + scale_fill_brewer(type="qual", palette="Set1")
p = p + scale_colour_brewer(type="qual", palette="Set1") 
p= p + labs(x="", y="")
pClusters with all methods
cluster and
factoExtra packages.Filter the data set by samples and Phylum.
abs_filter <- daply(
  .data = rel_phyl_global_df,
  .variables = c("Samples", "Phylum"),
  .fun = function(x) sum(x$Abundance)
)
abs_filter <- as.data.frame(abs_filter)
abs_filter[is.na(abs_filter)] = 0Compute the distance matrix with pearson distance.
#Computing correlation based distance: pearson, spearman or kendall
dist_corr <- get_dist(abs_filter, method = "pearson")Plot distance matrix.
Heatmap of distance matrix
Clusters with different methods
set.seed(12345)
hc_single <- hclust(d=dist_corr, method = "single")
hc_average <- hclust(d=dist_corr, method = "average")
hc_complete <- hclust(d=dist_corr, method = "complete")
hc_ward <- hclust(d=dist_corr, method = "ward.D2")
hc_median <- hclust(d=dist_corr, method = "median")
hc_centroid <- hclust(d=dist_corr, method = "centroid")Plot clusters with different methods.
Cluster with average method
Cluster with ward method
Cluster with single method
Cluster with complete method
Cluster with median method
Cluster with centroid method
Compare methods.
##   average    single  complete   ward.D2    median  centroid 
## 0.9876304 0.9876253 0.9876278 0.9876183 0.9876304 0.9876304PCA proyection.
Clusters and PCA
Groups obtained.
grupos_average <-sort(cutree(hc_average, 3))
grupos_average## SRR5788415 SRR5788416 SRR5788417 SRR5788420 SRR5788421 SRR5788422 
##          1          1          1          2          2          3cor_base <- cor(abs_filter, use="complete.obs", method = "spearman")
cor.mtest <- function(mat, ...) {
  mat <- as.matrix(mat)
  n <- ncol(mat)
  p.mat<- matrix(NA, n, n)
  diag(p.mat) <- 0
  for (i in 1:(n - 1)) {
    for (j in (i + 1):n) {
      tmp <- cor.test(mat[, i], mat[, j], ...)
      p.mat[i, j] <- p.mat[j, i] <- tmp$p.value
    }
  }
  colnames(p.mat) <- rownames(p.mat) <- colnames(mat)
  p.mat
}
# matrix of the p-value of the correlation
p.mat <- cor.mtest(cor_base)
corr_plot <- corrplot(cor_base, type = "upper", order = "hclust", tl.col = "black", tl.srt = 45,
         p.mat = p.mat, sig.level = 0.05)[1] Ma, B., Wang, Y., Ye, S. et al. Earth microbial co-occurrence network reveals interconnection pattern across microbiomes. Microbiome 8, 82 (2020). https://doi.org/10.1186/s40168-020-00857-2
[2] C. Zirion. Zamia-bacterial-BGCs. https://czirion.github.io/Zamia-bacterial-BGCs/
[3] Data processing and visualization for metagenomics. https://carpentries-incubator.github.io/metagenomics/