Set working directory and load libraries
setwd("C:/Users/Orlando Camargo/Desktop/")
#BiocManager::install("rhdf5")
#BiocManager::install("phyloseq")
library("phyloseq")
library("ggplot2")
## Warning: package 'ggplot2' was built under R version 4.1.3
library("readr")
library("patchwork")
merged_metagenome_op<- import_biom("pacific_ocean.biom")
mydata<-merged_metagenome_op@otu_table
mydata<-as.data.frame(mydata)
head(mydata)
## SRR5788415 SRR5788416 SRR5788417 SRR5788420 SRR5788421 SRR5788422
## 1890424 553 521 1152 81 54 629
## 1218 45458 44051 127642 848 672 10672
## 1219 1064361 1138750 3109010 9773 3609 55610
## 1501268 42846 43832 132412 410 203 985
## 1501269 7961 12166 21462 276 174 10743
## 1924285 152 164 484 2 1 0
data<-mydata
rownames(data)<-1:nrow(data)
head(data)
## SRR5788415 SRR5788416 SRR5788417 SRR5788420 SRR5788421 SRR5788422
## 1 553 521 1152 81 54 629
## 2 45458 44051 127642 848 672 10672
## 3 1064361 1138750 3109010 9773 3609 55610
## 4 42846 43832 132412 410 203 985
## 5 7961 12166 21462 276 174 10743
## 6 152 164 484 2 1 0
data$first<-rep(1, each=nrow(data))
data<- data[ , c("first",names(data)[names(data) != "first"])]
data$last<-rep(1, each=nrow(data))
y=as.single(c())
df_n<-data.frame(row.names = rownames(data))
for (i in colnames(data)){
print(paste("running:",i,"..."))
df_n[,i]<-y
for(z in rownames(df_n)){
if(data[z,i]>=5){ #este número es arbitrario podemos usar porcentajes de abudancia
y[z]=1
}else{
y[z]=0
}
}
}
## [1] "running: first ..."
## [1] "running: SRR5788415 ..."
## [1] "running: SRR5788416 ..."
## [1] "running: SRR5788417 ..."
## [1] "running: SRR5788420 ..."
## [1] "running: SRR5788421 ..."
## [1] "running: SRR5788422 ..."
## [1] "running: last ..."
df_n<-df_n[,-c(1:2)]
colnames(df_n)<-colnames(mydata)
head(df_n)
## SRR5788415 SRR5788416 SRR5788417 SRR5788420 SRR5788421 SRR5788422
## 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1
## 3 1 1 1 1 1 1
## 4 1 1 1 1 1 1
## 5 1 1 1 1 1 1
## 6 1 1 1 0 0 0
Permutation of 10 OTU’s like test
#install.packages("combinat")
library("combinat")
##
## Attaching package: 'combinat'
## The following object is masked from 'package:utils':
##
## combn
#sam<-sample(1:400, 10, replace = TRUE )
df_n1<-df_n[c(716:726),]
#df_n1<-df_n
comn<- as.data.frame(combn(1:nrow(df_n1), 2))
data1<-mydata
rownames(data1)<-1:nrow(data1)
head(data1)
## SRR5788415 SRR5788416 SRR5788417 SRR5788420 SRR5788421 SRR5788422
## 1 553 521 1152 81 54 629
## 2 45458 44051 127642 848 672 10672
## 3 1064361 1138750 3109010 9773 3609 55610
## 4 42846 43832 132412 410 203 985
## 5 7961 12166 21462 276 174 10743
## 6 152 164 484 2 1 0
Selection of OTUs by C-score and Spearman correlation
list_oc <-list()
for (i in colnames(comn)){
print(comn[,i])
perm=comn[,i]
ocdf=df_n1[c(min(perm), max(perm)),]
names=rownames(data1[c(min(perm), max(perm)),])
cij<-(length(ocdf[1,])-sum(ocdf[1,]))*(length(ocdf[2,])-sum(ocdf[2,]))
spearm=data1[c(min(perm),max(perm)),]
cort<-cor.test(as.numeric(spearm[1,]),as.numeric(spearm[2,]), method = "spearman", conf.level = 0.95, exact=FALSE)
if (cij>0 & cort$p.value<1){
list_oc[[i]]<-c(names,ocdf,cij,cort$p.value)
}else{
print(paste("drop..."))
}
}
## [1] 1 2
## [1] "drop..."
## [1] 1 3
## [1] "drop..."
## [1] 1 4
## [1] "drop..."
## [1] 1 5
## [1] "drop..."
## [1] 1 6
## [1] "drop..."
## [1] 1 7
## [1] "drop..."
## [1] 1 8
## [1] "drop..."
## [1] 1 9
## [1] "drop..."
## [1] 1 10
## [1] "drop..."
## [1] 1 11
## [1] "drop..."
## [1] 2 3
## [1] 2 4
## [1] "drop..."
## [1] 2 5
## [1] "drop..."
## [1] 2 6
## [1] "drop..."
## [1] 2 7
## [1] "drop..."
## [1] 2 8
## [1] 2 9
## [1] 2 10
## [1] "drop..."
## [1] 2 11
## [1] "drop..."
## [1] 3 4
## [1] "drop..."
## [1] 3 5
## [1] "drop..."
## [1] 3 6
## [1] "drop..."
## [1] 3 7
## [1] "drop..."
## [1] 3 8
## [1] 3 9
## [1] 3 10
## [1] "drop..."
## [1] 3 11
## [1] "drop..."
## [1] 4 5
## [1] "drop..."
## [1] 4 6
## [1] "drop..."
## [1] 4 7
## [1] "drop..."
## [1] 4 8
## [1] "drop..."
## [1] 4 9
## [1] "drop..."
## [1] 4 10
## [1] "drop..."
## [1] 4 11
## [1] "drop..."
## [1] 5 6
## [1] "drop..."
## [1] 5 7
## [1] "drop..."
## [1] 5 8
## [1] "drop..."
## [1] 5 9
## [1] "drop..."
## [1] 5 10
## [1] "drop..."
## [1] 5 11
## [1] "drop..."
## [1] 6 7
## [1] "drop..."
## [1] 6 8
## [1] "drop..."
## [1] 6 9
## [1] "drop..."
## [1] 6 10
## [1] "drop..."
## [1] 6 11
## [1] "drop..."
## [1] 7 8
## [1] "drop..."
## [1] 7 9
## [1] "drop..."
## [1] 7 10
## [1] "drop..."
## [1] 7 11
## [1] "drop..."
## [1] 8 9
## [1] 8 10
## [1] "drop..."
## [1] 8 11
## [1] "drop..."
## [1] 9 10
## [1] "drop..."
## [1] 9 11
## [1] "drop..."
## [1] 10 11
## [1] "drop..."
head(list_oc,1)
## $V11
## $V11[[1]]
## [1] "2"
##
## $V11[[2]]
## [1] "3"
##
## $V11$SRR5788415
## [1] 0 0
##
## $V11$SRR5788416
## [1] 0 0
##
## $V11$SRR5788417
## [1] 0 1
##
## $V11$SRR5788420
## [1] 0 0
##
## $V11$SRR5788421
## [1] 1 0
##
## $V11$SRR5788422
## [1] 1 0
##
## $V11[[9]]
## [1] 20
##
## $V11[[10]]
## [1] 0.004804665