Co-ocurrence analysis

Set working directory and load libraries

setwd("C:/Users/Orlando Camargo/Desktop/")
#BiocManager::install("rhdf5")
#BiocManager::install("phyloseq")
library("phyloseq")
library("ggplot2")
## Warning: package 'ggplot2' was built under R version 4.1.3
library("readr")
library("patchwork")

Import BIOM file

merged_metagenome_op<- import_biom("pacific_ocean.biom")
mydata<-merged_metagenome_op@otu_table
mydata<-as.data.frame(mydata)
head(mydata)
##         SRR5788415 SRR5788416 SRR5788417 SRR5788420 SRR5788421 SRR5788422
## 1890424        553        521       1152         81         54        629
## 1218         45458      44051     127642        848        672      10672
## 1219       1064361    1138750    3109010       9773       3609      55610
## 1501268      42846      43832     132412        410        203        985
## 1501269       7961      12166      21462        276        174      10743
## 1924285        152        164        484          2          1          0
data<-mydata
rownames(data)<-1:nrow(data)
head(data)
##   SRR5788415 SRR5788416 SRR5788417 SRR5788420 SRR5788421 SRR5788422
## 1        553        521       1152         81         54        629
## 2      45458      44051     127642        848        672      10672
## 3    1064361    1138750    3109010       9773       3609      55610
## 4      42846      43832     132412        410        203        985
## 5       7961      12166      21462        276        174      10743
## 6        152        164        484          2          1          0

Presence and absence of species

data$first<-rep(1, each=nrow(data))
data<- data[ , c("first",names(data)[names(data) != "first"])]
data$last<-rep(1, each=nrow(data))
y=as.single(c())
df_n<-data.frame(row.names = rownames(data))
for (i in colnames(data)){
  print(paste("running:",i,"..."))
  df_n[,i]<-y
  for(z in rownames(df_n)){
    if(data[z,i]>=5){ #este número es arbitrario podemos usar porcentajes de abudancia
      y[z]=1
    }else{
      y[z]=0
    }
  }
}
## [1] "running: first ..."
## [1] "running: SRR5788415 ..."
## [1] "running: SRR5788416 ..."
## [1] "running: SRR5788417 ..."
## [1] "running: SRR5788420 ..."
## [1] "running: SRR5788421 ..."
## [1] "running: SRR5788422 ..."
## [1] "running: last ..."
df_n<-df_n[,-c(1:2)]
colnames(df_n)<-colnames(mydata)
head(df_n)
##   SRR5788415 SRR5788416 SRR5788417 SRR5788420 SRR5788421 SRR5788422
## 1          1          1          1          1          1          1
## 2          1          1          1          1          1          1
## 3          1          1          1          1          1          1
## 4          1          1          1          1          1          1
## 5          1          1          1          1          1          1
## 6          1          1          1          0          0          0

Co-ocurrence analysis by C-score and Spearman correlation

Permutation of 10 OTU’s like test

#install.packages("combinat")
library("combinat")
## 
## Attaching package: 'combinat'
## The following object is masked from 'package:utils':
## 
##     combn
#sam<-sample(1:400, 10, replace = TRUE )
df_n1<-df_n[c(716:726),]
#df_n1<-df_n
comn<- as.data.frame(combn(1:nrow(df_n1), 2))
data1<-mydata
rownames(data1)<-1:nrow(data1)
head(data1)
##   SRR5788415 SRR5788416 SRR5788417 SRR5788420 SRR5788421 SRR5788422
## 1        553        521       1152         81         54        629
## 2      45458      44051     127642        848        672      10672
## 3    1064361    1138750    3109010       9773       3609      55610
## 4      42846      43832     132412        410        203        985
## 5       7961      12166      21462        276        174      10743
## 6        152        164        484          2          1          0

Selection of OTUs by C-score and Spearman correlation

list_oc <-list()
for (i in colnames(comn)){
  print(comn[,i])
  perm=comn[,i]
  ocdf=df_n1[c(min(perm), max(perm)),]
  names=rownames(data1[c(min(perm), max(perm)),])
  cij<-(length(ocdf[1,])-sum(ocdf[1,]))*(length(ocdf[2,])-sum(ocdf[2,]))
  spearm=data1[c(min(perm),max(perm)),]
  cort<-cor.test(as.numeric(spearm[1,]),as.numeric(spearm[2,]), method = "spearman", conf.level = 0.95, exact=FALSE)
  if (cij>0 & cort$p.value<1){
    list_oc[[i]]<-c(names,ocdf,cij,cort$p.value)
  }else{
    print(paste("drop..."))
  }
}
## [1] 1 2
## [1] "drop..."
## [1] 1 3
## [1] "drop..."
## [1] 1 4
## [1] "drop..."
## [1] 1 5
## [1] "drop..."
## [1] 1 6
## [1] "drop..."
## [1] 1 7
## [1] "drop..."
## [1] 1 8
## [1] "drop..."
## [1] 1 9
## [1] "drop..."
## [1]  1 10
## [1] "drop..."
## [1]  1 11
## [1] "drop..."
## [1] 2 3
## [1] 2 4
## [1] "drop..."
## [1] 2 5
## [1] "drop..."
## [1] 2 6
## [1] "drop..."
## [1] 2 7
## [1] "drop..."
## [1] 2 8
## [1] 2 9
## [1]  2 10
## [1] "drop..."
## [1]  2 11
## [1] "drop..."
## [1] 3 4
## [1] "drop..."
## [1] 3 5
## [1] "drop..."
## [1] 3 6
## [1] "drop..."
## [1] 3 7
## [1] "drop..."
## [1] 3 8
## [1] 3 9
## [1]  3 10
## [1] "drop..."
## [1]  3 11
## [1] "drop..."
## [1] 4 5
## [1] "drop..."
## [1] 4 6
## [1] "drop..."
## [1] 4 7
## [1] "drop..."
## [1] 4 8
## [1] "drop..."
## [1] 4 9
## [1] "drop..."
## [1]  4 10
## [1] "drop..."
## [1]  4 11
## [1] "drop..."
## [1] 5 6
## [1] "drop..."
## [1] 5 7
## [1] "drop..."
## [1] 5 8
## [1] "drop..."
## [1] 5 9
## [1] "drop..."
## [1]  5 10
## [1] "drop..."
## [1]  5 11
## [1] "drop..."
## [1] 6 7
## [1] "drop..."
## [1] 6 8
## [1] "drop..."
## [1] 6 9
## [1] "drop..."
## [1]  6 10
## [1] "drop..."
## [1]  6 11
## [1] "drop..."
## [1] 7 8
## [1] "drop..."
## [1] 7 9
## [1] "drop..."
## [1]  7 10
## [1] "drop..."
## [1]  7 11
## [1] "drop..."
## [1] 8 9
## [1]  8 10
## [1] "drop..."
## [1]  8 11
## [1] "drop..."
## [1]  9 10
## [1] "drop..."
## [1]  9 11
## [1] "drop..."
## [1] 10 11
## [1] "drop..."
head(list_oc,1)
## $V11
## $V11[[1]]
## [1] "2"
## 
## $V11[[2]]
## [1] "3"
## 
## $V11$SRR5788415
## [1] 0 0
## 
## $V11$SRR5788416
## [1] 0 0
## 
## $V11$SRR5788417
## [1] 0 1
## 
## $V11$SRR5788420
## [1] 0 0
## 
## $V11$SRR5788421
## [1] 1 0
## 
## $V11$SRR5788422
## [1] 1 0
## 
## $V11[[9]]
## [1] 20
## 
## $V11[[10]]
## [1] 0.004804665