宏基因组下游分析
2023-01-27 本文已影响0人
皮尔洛_dys
###################part1-species_analysis###################################
setwd("D:/metagenomics/upstream")
library("vegan")
library(ggsignif)
library("xlsx")
#样本显著性检
diversity <- read.table( "metadata.txt", header=T, row.names=1, sep="\t", comment.char="", stringsAsFactors = F) #View(diversity)
diversity
mytable <- xtabs(~gender+Group,data=diversity);mytable
ages.p <- wilcox.test(ages ~ Group, data = diversity, var.equal = TRUE)$p.value
gender.p <- fisher.test(xtabs(~gender+Group,data=diversity))$p.value
bmi.p <- wilcox.test(bmi ~ Group, data = diversity, var.equal = TRUE)$p.value
education.p <- fisher.test(xtabs(~education+Group,data=diversity),simulate.p.value=TRUE)$p.value
rowname=c("ages","gender","bmi","education")
div=data.frame(c(ages.p,gender.p,bmi.p,education.p),row.names = rowname)
colnames(div)="P value"
if (!file.exists("./species_analysis/phenotype_diff"))
{
dir.create("./species_analysis/phenotype_diff",recursive = TRUE)
}
write.xlsx(div,file="./species_analysis/phenotype_diff/meta_diff.xlsx")
#置换多元方差分析
otu <- read.table("merged_abundance_table_species.txt",header = T,sep = "\t",row.names = 1)
meta = read.table("metadata.txt",sep = "\t",header = T,row.names = 1)
otu = otu[,rownames(meta)]
otu = t(otu)
otu.adonis = adonis2(otu ~ Group+ages+bmi+education+gender,data = meta,permutations = 9999,by="margin")
write.xlsx(otu.adonis,file="./species_analysis/phenotype_diff/permanova_diff.xlsx")
#alpha多样性
library("vegan")
library("ggsignif")
library("xlsx")
library("ggplot2")
library("ggsci")
#清空变量
rm(list = ls())
#将当前工作目录下文件名为.txt的文件找
file=list.files(getwd(),pattern = "merged_abundance_table_.*[^1].txt")
#使用循环嵌套分别对class,family,order,phylum,genus和species进行alpha多样性分析(shanno,simpson,invsimpson)
for (i in file)
{
#读取otu列表
name=sub(".txt","",i)###将文件名.txt前面的名字提出来,也为了后面批量生成文件名准
otu <- read.table(i,header = T,sep = "\t")
row.names(otu) <- otu$clade_name
otu <- subset(otu, select = -clade_name )
otu<-t(otu)
#读取样本分组信息
metadata <- read.table("metadata.txt",header = T,sep = "\t")
metadata <- metadata[,c("ID","Group")]
#将otu列表中的样本的排列顺序与metadata中样本的顺序保持一�?
otu=otu[metadata$ID,]
#分别计算shannon,simpson,invsimpson多样�?
shannon <- diversity(otu ,"shannon")
simpson <- diversity(otu,"simpson")
invsimpson <- diversity(otu,"invsimpson")
#将三种alpha多样性写入excel(append=T,设为追加模式,防止之后的循环覆盖之前的写入文件,同时将不同分类水平的alpha多样性写入不同的sheet)
alpha_div = cbind(shannon,simpson,invsimpson)
if (!file.exists("./species_analysis/alpha"))
{
dir.create("./species_analysis/alpha",recursive = TRUE)
}
write.xlsx(alpha_div, file="./species_analysis/alpha/alpha_div.xlsx", sheetName=sub("merged_abundance_table_","",name), append=TRUE)
#将计算得到的alpha多样性结果与分组信息合并(!!!若未对metadata与otu列表的样本顺序统一处理会造成分析结果错误)
Group <- metadata$Group
shannon=data.frame(shannon,Group)
simpson=data.frame(simpson,Group)
invsimpson=data.frame(invsimpson,Group)
#分别对三种多样性绘制Boxplot
for (j in list(shannon,simpson,invsimpson))
{
p = ggplot(j, aes(x=Group, y=j[,1], color=Group))+
# 添加数据、xy值�? 颜色参数给画图函数ggplot
geom_boxplot(alpha=1,size=1.2,width=0.5)+
# 盒图
labs(title="Alpha diversity", x="Group",y = paste(str(j)," index"))+
# 标题
theme(plot.title=element_text(hjust=0.5),legend.position="top",panel.background = element_rect(fill = "transparent",colour = NA),axis.line = element_line(colour = "black"))+
geom_jitter(width = 0.2)+
geom_signif(comparisons = list(c("control","experience")),
test= wilcox.test,
#step_increase = 10,
#y_position = 5,
position = "identity",
tip_length = 0,
size=0.9,color = "black",
map_signif_level = T)+
ylab(paste0(colnames(j)[1]," Index In ",sub("merged_abundance_table_","",name)," Level"))+
#scale_fill_manual(values=c(experience = "red", control = "blue"))
scale_color_aaas(a=0.55)+scale_fill_aaas(a=0.55)
pdf_name=""
pdf_name=paste(pdf_name,name,sep = "")
pdf_name=paste(pdf_name,colnames(j)[1],sep = "_")
pdf_name=paste(pdf_name,".png",sep = "")
pdf_name = sub("merged_abundance_table_","",pdf_name)
pdf_name = paste("Alpha plot of ",pdf_name,sep="")
ggsave(p,file =paste("./species_analysis/alpha/ ",pdf_name,sep = "") ,width=5,height=6)##ggsave批量生成name变量为名字的pdf文件
}
}
#置换多元方差分析(不同分类水平)
file=list.files(getwd(),pattern = "merged_abundance_table_.*[^1].txt")
for (i in file)
{
name = sub("merged_abundance_table_","",i)
name = sub(".txt","",name)
name = paste(name,".xlsx",sep = "_permanova")
print(name)
otu <- read.table(i,header = T,sep = "\t",row.names = 1)
meta = read.table("metadata.txt",sep = "\t",header = T,row.names = 1)
otu = otu[,rownames(meta)]
otu = t(otu)
otu.adonis = adonis2(otu ~ Group,data = meta,permutations = 9999,by="margin")
if (!file.exists("./species_analysis/permanova"))
{
dir.create("./species_analysis/permanova",recursive = TRUE)
}
write.xlsx(otu.adonis,file=paste("./species_analysis/permanova/",name,sep = ""))
}
#beta多样性分析
#PCA
library(ggsci)
library("ggpubr")
library("factoextra")
library("vegan")
library("ggtext")
library(ggsignif)
rm(list = ls())
file=list.files(getwd(),pattern = "merged_abundance_table_.*[^1].txt")
for (i in file)
{
name=sub(".txt","",i)
tax = sub("merged_abundance_table_","",name)
otu =read.table(i, row.names= 1, header=T, sep="\t", comment.char="", stringsAsFactors = F)
meta=read.table("metadata.txt", row.names = 1, header=T, sep="\t", comment.char="", stringsAsFactors = F)
otu = otu[,row.names(meta)]
tmpotu= otu
tmpmeta = meta
tmpotu = data.frame(t(tmpotu))
group = meta[,"Group"]
group = data.frame(group)
result = adonis2(tmpotu~group,group,distance="bray",permutations = 10000)
otu.pca <- prcomp(t(otu), scale. =F)
p <- fviz_eig(otu.pca, addlabels = TRUE)
p = p+theme(panel.background = element_rect(fill = "white"),plot.background=element_rect(fill = "white"))
p$labels$title=paste("Scree plot of ",tax,sep = "")
if (!file.exists("./species_analysis/beta/PCA"))
{
dir.create("./species_analysis/beta/PCA",recursive = TRUE)
}
ggsave(paste("./species_analysis/beta/PCA/scree plot of ",".pdf",sep = tax), p, width=180, height=150, units="mm")
ggsave(paste("./species_analysis/beta/PCA/scree plot of ",".png",sep = tax), p, width=180, height=150, units="mm")
p1 <- fviz_pca_ind(otu.pca,
geom.ind = "point", # show points only ( not "text")
col.ind = meta$Group, # color by groups
addEllipses = TRUE, # Concentration ellipses
legend.title = "Groups")
p1=p1+theme(axis.line = element_line(colour = "black"))+theme_bw()
p1$labels$x=sub("Dim","PCA",p1$labels$x)
p1$labels$y=sub("Dim","PCA",p1$labels$y)
p1$labels$title=paste("PCA of ",tax,sep = "")
p1 = p1 + scale_color_aaas(a=0.55)+scale_fill_aaas(a=0.55)
text_annotation=paste("PERMANOVA:<p>\nexperience vs control<p>\np-value = ",as.character(round((result$`Pr(>F)`[1]),digits = 4),sep = ""))
#p1 = p1 + annotate("label",x=Inf,y=Inf,label=text_annotation,vjust=1.1,hjust=1.05)
p1 =p1 + ggtext::geom_richtext (aes (vjust = 1.1,hjust = 1.05,x= Inf, y=Inf,label=text_annotation, family = "sans", label.colour = 'black'))
PC1 <- otu.pca$x[,1]
PC2 <- otu.pca$x[,2]
otu.pca.data <- data.frame(rownames(otu.pca$x),PC1,PC2,meta$Group)
colnames(otu.pca.data) <- c("sample", "PC1", "PC2", "group")
my_comparisons = list(c('control','experience'))
p2 <- ggplot(otu.pca.data, aes(x = group, y= PC2, colour = group)) +
geom_boxplot() +
theme(panel.background = element_rect(fill = "white",colour = "white"),
panel.grid = element_blank(),
axis.text.y = element_blank(),
axis.ticks=element_blank(),
legend.position="none") +
geom_signif(comparisons = list(c("control","experience")),
test= wilcox.test,
position = "identity",
tip_length = 0,
size=0.9,color = "black",
map_signif_level=c("***"=0.001,"**"=0.01, "*"=0.05, " "=2))+
xlab("") + ylab("")+
scale_color_aaas(a=0.55)
p3 <- ggplot(otu.pca.data, aes(x = group, y= PC1, colour = group)) +
geom_boxplot() +
theme(panel.background = element_rect(fill = "white",colour = "white"),
panel.grid = element_blank(),
axis.text.x = element_blank(),
axis.ticks=element_blank(),
legend.position="none") +
xlab("") + ylab("") +
coord_flip() + # coord_flip()函数翻转坐标�?
geom_signif(comparisons = list(c("control","experience")),
test= wilcox.test,
#step_increase = 10,
#y_position = 5,
position = "identity",
tip_length = 0,
size=0.9,color = "black",
map_signif_level=c("***"=0.001,"**"=0.01, "*"=0.05, " "=2))+
scale_color_aaas(a=0.55)
p4 <- ggarrange(p3, NULL, p1, p2, widths = c(5,2), heights = c(2,4), align = "hv")+theme(plot.background=element_rect(fill = "white"))
ggsave(paste("./species_analysis/beta/PCA/PCA of ",".pdf",sep = tax), p4, width=180, height=150, units="mm")
ggsave(paste("./species_analysis/beta/PCA/PCA of ",".png",sep = tax), p4, width=180, height=150, units="mm")
}
#PCoA
#########################################################
library("amplicon")
library(xlsx)
library(ggsci)
library(ggsignif)
library("ggpubr")
rm(list = ls())
rm(list = ls())
#setwd("D:/metagenomic")
file=list.files(getwd(),pattern = "merged_abundance_table_.*[^1].txt")
for (i in file){
name=sub(".txt","",i)
tax = sub("merged_abundance_table_","",name)
otu =read.table(i, row.names= 1, header=T, sep="\t", comment.char="", stringsAsFactors = F)
meta=read.table("metadata.txt", row.names = 1, header=T, sep="\t", comment.char="", stringsAsFactors = F)
tmpotu= otu
tmpmeta = meta
tmpotu = data.frame(t(tmpotu))
group = meta[,"Group"]
group = data.frame(group)
result = adonis2(tmpotu~group,group,distance="bray",permutations = 10000)
otu=otu[,row.names(meta)]
bray_dis = vegdist(data.frame(t(otu)),method = "bray")
bray_dis = as.matrix(bray_dis)
bray_dis = as.data.frame(bray_dis)
if (!file.exists("./species_analysis/beta/PCoA"))
{
dir.create("./species_analysis/beta/PCoA",recursive = TRUE)
}
write.xlsx(bray_dis,file=paste("./species_analysis/beta/PCoA/bray ",".xlsx",sep = tax))
p = beta_pcoa(bray_dis,metadata = meta)
p = p+scale_color_aaas(a=0.55)+theme_bw()
text_annotation=paste("PERMANOVA:<p>\nexperience vs control<p>\np-value = ",as.character(round((result$`Pr(>F)`[1]),digits = 4),sep = ""))
p = p + ggtext::geom_richtext (aes (x= Inf, y=Inf,label=text_annotation ),color="black",label.colour = 'black',vjust = 1.1,hjust = 1.05)
p = p + labs(title = paste("PCoA of",tax,sep = " "))
p2 <- ggplot(p$data, aes(x = group, y= y, colour = group)) +
geom_boxplot() +
theme(panel.background = element_rect(fill = "white",colour = "white"),
panel.grid = element_blank(),
axis.text.y = element_blank(),
axis.ticks=element_blank(),
legend.position="none") +
geom_signif(comparisons = list(c("control","experience")),
test= wilcox.test,
position = "identity",
tip_length = 0,
size=0.9,color = "black",
map_signif_level=c("***"=0.001,"**"=0.01, "*"=0.05, " "=2))+
xlab("") + ylab("")+
scale_color_aaas(a=0.55)
p3 <- ggplot(p$data, aes(x = group, y=x, colour = group)) +
geom_boxplot() +
theme(panel.background = element_rect(fill = "white",colour = "white"),
panel.grid = element_blank(),
axis.text.x = element_blank(),
axis.ticks=element_blank(),
legend.position="none") +
xlab("") + ylab("") +
coord_flip() +
geom_signif(comparisons = list(c("control","experience")),
test= wilcox.test,
position = "identity",
tip_length = 0,
size=0.9,color = "black",
map_signif_level=c("***"=0.001,"**"=0.01, "*"=0.05, " "=2))+
scale_color_aaas(a=0.55)
if (!file.exists("./species_analysis/beta/PCoA/picture1"))
{
dir.create("./species_analysis/beta/PCoA/picture1",recursive = TRUE)
}
p4 <- ggarrange(p3, NULL, p, p2, widths = c(4,1), heights = c(1,3), align = "hv")+theme(plot.background=element_rect(fill = "white"))
ggsave(paste("./species_analysis/beta/PCoA/picture1/PCoA of ",".pdf",sep = tax), p4, width=180, height=150, units="mm")
ggsave(paste("./species_analysis/beta/PCoA/picture1/PCoA of ",".png",sep = tax), p4, width=180, height=150, units="mm")
}
#NMDS
library(phyloseq)
library(ggsci)
library("ggpubr")
setwd("D:")
#setwd("D:/metagenomic")
nmds_my=function(otu,group){
library("vegan")
library("ggplot2")
library("ggsci")
otu <- t(otu)
otu = otu[row.names(group),]
bray_dis <- vegdist(otu, method = 'bray')
otu_nmds <- metaMDS(bray_dis,distance = 'bray')
otu_nmds
otu_nmds.stress <- otu_nmds$stress
otu_nmds.point <- data.frame(otu_nmds$point)
otu_nmds.species <- data.frame(otu_nmds$species)
sample_site <- otu_nmds.point[1:2]
sample_site$names <- rownames(sample_site)
colnames(sample_site)[1:2] <- c('NMDS1', 'NMDS2')
#合并分组数据
sample_site <- cbind(sample_site,Group=group$Group)
#sample_site = sample_site[,1:4]
#sample_site$NMDS2 = -sample_site$NMDS2
p <- ggplot(data=sample_site,aes(NMDS1,NMDS2)) +
#geom_hline(aes(yintercept=0),colour="#d8d6d6",linetype=5)+
#geom_vline(aes(xintercept=0),colour="#d8d6d6",linetype=5)+
geom_point(aes(color =Group),shape = 19,size = 3.5)+
#scale_color_aaas(alpha=0.7)+
stat_ellipse(aes(color = Group),geom="polygon",level=0.90,alpha=0)+labs(x= "NMDS1", y = "NMDS2")+
scale_color_aaas(alpha=0.7)+
theme(plot.title = element_text(hjust = 0.5))+
theme_bw()+
#theme(panel.background = element_blank(),axis.line = element_line(color = "black"))+
geom_text(aes(x = -0.4 , y =0.5,label = paste("stress = ",round(otu_nmds.stress,2))),size=5)+
xlim(-0.5, 0.5)+ylim(-0.5,0.5)
return(p)
}
file=list.files(getwd(),pattern = "merged_abundance_table_.*[^1].txt")
for (i in file){
name=sub(".txt","",i)
tax = sub("merged_abundance_table_","",name)
otu = read.table(i,header = T,row.names = 1,sep = "\t")
group = read.table("metadata.txt",header = T,row.names = 1,sep = "\t")
p = nmds_my(otu = otu,group = group)
if (!file.exists("./species_analysis/beta/NMDS/picture1"))
{
dir.create("./species_analysis/beta/NMDS/picture1",recursive = TRUE)
}
ggsave(paste("./species_analysis/beta/NMDS/picture1/NMDS of ",".pdf",sep = tax), p, width=180, height=150, units="mm")
ggsave(paste("./species_analysis/beta/NMDS/picture1/NMDS of ",".png",sep = tax), p, width=180, height=150, units="mm")
}
#CCA
my_cca <- function(meta = meta ,otu =otu,env = env,tax=tax)
{
library("ggplot2")
library("vegan")
otu = otu[,rownames(groups)]
env = env[rownames(groups),]
decorana(otu)
cca <- cca(X = t(otu), Y = env)
x.sig = anova(cca)
x.p = x.sig$Pr[1]
x.p
x.F = x.sig$F[1]
x.F
F1 <- paste("anova F: ", round(x.F, 2), sep = "")
pp1 = paste("p: ", round(x.p, 2), sep = "")
# 对CCA的结果进行summary, 提取对应信息;
cca.sum <- summary(cca)
str(cca.sum)
cca.sum$sites
cca.sum$biplot
sites <- data.frame(cca.sum$sites[, c(1, 2)], groups, check.names = F)
biplot <- as.data.frame(cca.sum$biplot, check.names = F)
species <- as.data.frame(cca.sum$species, check.names = F)
eig <- as.data.frame(cca.sum$concont$importance, check.names = F)
labs <- sprintf("%s(%.2f%%)", colnames(sites)[1:2], eig[2,1:2]*100)
library("ggplot2")
library(ggrepel)
p = ggplot() + geom_point(data = sites, aes(x = CCA1, y = CCA2, fill = Group), pch = 21, colour = "black", size = 3,alpha=0.7) +
geom_segment(data =as.data.frame(cca.sum$biplot) , aes(x = 0, y = 0, xend = CCA1, yend = CCA2), arrow = arrow(angle = 22.5,length = unit(0.2, "cm")), linetype = 1, size = 1,colour = "black") +
geom_label_repel(data = as.data.frame(cca.sum$biplot) ,aes(CCA1*1.1, CCA2*1.1,label=row.names(as.data.frame(cca.sum$biplot) )),fontface="bold",color="black",alpha=0.6,
box.padding=unit(1, "lines"), point.padding=unit(0.3, "lines"),
segment.colour = "grey50",segment.size=0.75,segment.alpha=0.9,
label.r=0.15,label.size=0.5, max.overlaps = 50,size=3)+
labs(x = labs[1],
y = labs[2])
p = p + theme_bw() + geom_hline(aes(yintercept = 0), colour = "black", linetype = 2) +
geom_vline(aes(xintercept = 0), colour = "black", linetype = "dashed") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
plot.title = element_text(vjust = -0.5, hjust = 0.1,size = 20),
axis.title.y = element_text(size = 8, face = "bold", colour = "black"),
axis.title.x = element_text(size = 8, face = "bold", colour = "black"),
axis.text = element_text(size = 8, face = "bold"),
axis.text.x = element_text(colour = "black",size = 8),
axis.text.y = element_text(colour = "black", size = 8),
legend.text = element_text(size = 8,face = "bold"))+
ggtext::geom_richtext(
aes(
x = Inf , y = Inf,
vjust = 1.1,hjust = 1.03,
label = paste(F1,' <p>\n p-value = ',pp1, sep = "")),
size =5, family = "sans",colour = 'black',label.colour="black")+scale_color_aaas(a=0.55)+scale_fill_aaas(a=0.55)
# 添加箭头,标记为环境因子;
#p <- p+scale_x_continuous(limits = c(min(sites$CCA1)*0.6,max(sites$CCA1)*0.6))
#p <- p+scale_y_continuous(limits = c(min(sites$CCA2)*0.6,max(sites$CCA2)*0.6))
p = p+labs(title = paste("CCA of ",tax,sep = ""))
return(p)
}
#RDA
my_rda <- function(meta = meta ,otu =otu,env = env,tax=tax)
{
library("ggplot2")
library("vegan")
otu = otu[,rownames(groups)]
env = env[rownames(groups),]
decorana(otu)
rda <- rda(X = t(otu), Y = env)
x.sig = anova(rda)
x.p = x.sig$Pr[1]
x.p
x.F = x.sig$F[1]
x.F
F1 <- paste("anova F: ", round(x.F, 2), sep = "")
pp1 = paste("p: ", round(x.p, 2), sep = "")
# 对CCA的结果进行summary, 提取对应信息;
rda.sum <- summary(rda)
str(rda.sum)
rda.sum$sites
rda.sum$biplot
sites <- data.frame(rda.sum$sites[, c(1, 2)], groups, check.names = F)
biplot <- as.data.frame(rda.sum$biplot, check.names = F)
species <- as.data.frame(rda.sum$species, check.names = F)
eig <- as.data.frame(rda.sum$concont$importance, check.names = F)
labs <- sprintf("%s(%.2f%%)", colnames(sites)[1:2], eig[2,1:2]*100)
library("ggplot2")
library(ggrepel)
p = ggplot() + geom_point(data = sites, aes(x = RDA1, y = RDA2, fill = Group), pch = 21, colour = "black", size = 3,alpha=0.7) +
geom_segment(data =as.data.frame(rda.sum$biplot) , aes(x = 0, y = 0, xend = RDA1, yend = RDA2), arrow = arrow(angle = 22.5,length = unit(0.2, "cm")), linetype = 1, size = 1,colour = "black") +
geom_label_repel(data = as.data.frame(rda.sum$biplot) ,aes(RDA1*1.1, RDA2*1.1,label=row.names(as.data.frame(rda.sum$biplot) )),fontface="bold",color="black",alpha=0.6,
box.padding=unit(1, "lines"), point.padding=unit(0.3, "lines"),
segment.colour = "grey50",segment.size=0.75,segment.alpha=0.9,
label.r=0.15,label.size=0.5, max.overlaps = 50,size=3)+
labs(x = labs[1],
y = labs[2])
p = p + theme_bw() + geom_hline(aes(yintercept = 0), colour = "black", linetype = 2) +
geom_vline(aes(xintercept = 0), colour = "black", linetype = "dashed") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
plot.title = element_text(vjust = -0.5, hjust = 0.1,size = 20),
axis.title.y = element_text(size = 8, face = "bold", colour = "black"),
axis.title.x = element_text(size = 8, face = "bold", colour = "black"),
axis.text = element_text(size = 8, face = "bold"),
axis.text.x = element_text(colour = "black",size = 8),
axis.text.y = element_text(colour = "black", size = 8),
legend.text = element_text(size = 8,face = "bold"))+
ggtext::geom_richtext(
aes(
x = Inf , y = Inf,
vjust = 1.1,hjust = 1.03,
label = paste(F1,' <p>\n p-value = ',pp1, sep = "")),
size =5, family = "sans",colour = 'black',label.colour="black")+scale_color_aaas(a=0.55)+scale_fill_aaas(a=0.55)
# 添加箭头,标记为环境因子;
#p <- p+scale_x_continuous(limits = c(min(sites$RDA1)*0.6,max(sites$RDA1)*0.6))
# p <- p+scale_y_continuous(limits = c(min(sites$RDA2)*0.6,max(sites$RDA2)*0.6))
p = p+labs(title = paste("RDA of ",tax,sep = ""))
return(p)
}
#p = my_rda(meta = groups,env=env,otu=otu,tax="species")
library(devtools)
library(amplicon)
library("ggsci")
file=list.files(getwd(),pattern = "merged_abundance_table_.*[^1].txt")
for (i in file){
name=sub(".txt","",i)
tax = sub("merged_abundance_table_","",name)
otu =read.table(i, row.names= 1, header=T, sep="\t", comment.char="", stringsAsFactors = F)
map=read.table("metadata.txt", row.names = 1, header=T, sep="\t", comment.char="", stringsAsFactors = F)
env=read.delim("env.txt",row.names=1)
p = my_rda (otu=otu,meta = map,env=env,tax = tax)
if (!file.exists("./species_analysis/beta/RDA/picture1"))
{
dir.create("./species_analysis/beta/RDA/picture1",recursive = TRUE)
}
ggsave(paste("./species_analysis/beta/RDA/picture1/RDA plot of ",".pdf",sep = tax), p, width=180, height=150, units="mm")
ggsave(paste("./species_analysis/beta/RDA/picture1/RDA plot of ",".png",sep = tax), p, width=180, height=150, units="mm")
}
#优势物种分析
#堆叠图
dominant_species <- function(n=n,otutab = otutab,metadata=metadata,tax= tax)
{
library(tidyverse)
library("ggsci")
library("ggplot2")
library("RColorBrewer")
getpalette =colorRampPalette(brewer.pal(12,"Set3"))
otu = otutab
map = metadata
#将实验组和对照组的样本分开
experience = list()
control = list()
for (i in row.names(map))
{
if (map[i,"Group"] == "experience")
{
experience = append(experience,i)
}
if (map[i,"Group"] == "control")
{
control = append(control,i)
}
}
#将otu列表区分为实验组otu和对照组otu
exp.otu = otu[which(colnames(otu) %in% experience)]
ctr.otu = otu[which(colnames(otu) %in% control)]
#对实验组和对照组中不同物种的数量进行求和
exp.otu.sum = rowSums(exp.otu)
ctr.otu.sum = rowSums(ctr.otu)
#将实验组与对照组求和数据进行合并
df = as.data.frame(cbind(exp.otu.sum,ctr.otu.sum))
colnames(df) = c("experience","control")
#直接计算相对丰度
df$experience = df$experience/sum(df$experience)
df$control = df$control/sum(df$control)
#计算不同物种在对照组以及实验组之间的平均值并根据平均值排序
df$average = (df$experience+df$control)/2
df = df[order(df$average,decreasing = T),]
#找出top n 的物种,其余物种并入other并计算other的总和
df.top10 = df[1:n-1,]
df.res = df[n:length(row.names(df)),]
other = colSums(df.res)
#将other加入top n数据中并更改名字
a = rbind(df.top10,other)
rownames(a)[length(rownames(a))]="Other"
a$Phylum = row.names(a)
df = a
df = df[,which(colnames(df)!="average")]
#设置线段内容
link_dat= data.frame()
sum.experience =0
sum.control=0
for (i in length(rownames(df)):1)
{
sum.control = sum.control + df$control[i]
link_dat[rownames(df)[i],"control"] = sum.control
sum.experience = sum.experience + df$experience[i]
link_dat[rownames(df)[i],"experience"] = sum.experience
}
link_dat$Phylum = row.names(link_dat)
df.long <- reshape2::melt(df, value.name='abundance', variable.name='group')
#绘图
df.long$Phylum = factor(df.long$Phylum,levels = rev(link_dat$Phylum))
#a = df.long$Phylum
df.long$group = factor(df.long$group,levels = c("control","experience"))
p = ggplot(df.long, aes(x=group, y=abundance, fill=Phylum)) +
geom_bar(stat = "identity", width=0.5, col='black',size=1) +
geom_segment(data=link_dat, aes(x=1.25, xend=1.75, y=control, yend=experience),size=1)+
theme_bw() +
#scale_color_aaas(a=0.10)+scale_fill_aaas(a=0.10)+
ylab("Relative abundance(%)")+
theme(axis.title.x=element_text(vjust=2, size=20,face = "bold"),
axis.title.y =element_text(vjust=2, size=20,face = "bold"),
legend.title = element_text(size = 20,face = "bold")
)
name = paste("Top ",n,sep = "")
name = paste(name,tax,sep = " abundant ")
p = p+scale_fill_manual(values = getpalette(n))+labs(fill=name)+labs(x="Groups")
return(p)
}
otu =read.table("out_ARGcategory.txt", row.names= 1, header=T, sep="\t", comment.char="", stringsAsFactors = F)
map=read.table("metadata.2.txt", row.names = 1, header=T, sep="\t", comment.char="", stringsAsFactors = F)
p= dominant_species(n=10,metadata = map,otutab = otu,tax = "ARGcategory")
p
#多组箱线图
setwd("D://metagenomics//upstream")
otu =read.table("merged_abundance_table_species.txt", row.names= 1, header=T, sep="\t", comment.char="", stringsAsFactors = F)
map=read.table("metadata.txt", row.names = 1, header=T, sep="\t", comment.char="", stringsAsFactors = F)
multi_boxplot <- function(otu = otu ,meta = meta,n =n,tax=tax)
{
get_top_abundant <- function(n=n,otutab = otutab,metadata=metadata)
{
library(tidyverse)
library("ggsci")
library("ggplot2")
library("RColorBrewer")
library(ggsignif)
getpalette = brewer.pal(12,"Set3")
otu = otutab
map = metadata
#将实验组和对照组的样本分开
experience = list()
control = list()
for (i in row.names(map))
{
# print(i)
if (map[i,"Group"] == "experience")
{
experience = append(experience,i)
}
if (map[i,"Group"] == "control")
{
control = append(control,i)
}
}
#将otu列表区分为实验组otu和对照组otu
exp.otu = otu[which(colnames(otu) %in% experience)]
ctr.otu = otu[which(colnames(otu) %in% control)]
#对实验组和对照组中不同物种的数量进行求和
exp.otu.sum = rowSums(exp.otu)
ctr.otu.sum = rowSums(ctr.otu)
#将实验组与对照组求和数据进行合并
df = as.data.frame(cbind(exp.otu.sum,ctr.otu.sum))
colnames(df) = c("experience","control")
#计算不同物种在对照组以及实验组之间的平均值并根据平均值排序
df$average = (df$experience+df$control)/2
df = df[order(df$average,decreasing = T),]
#找出top n 的物种,其余物种并入other并计算other的总和
p = rownames(df)[1:n]
q = rownames(df)[n+1:length(row.names(df))]
#将other加入top n数据中并更改名字
return(p)
}
#获取丰度前十的物种名称
result= get_top_abundant(n=n,otutab = otu,metadata = map)
name_list = result
name_list = as.list(name_list)
#调整otu顺序与meta信息保持一致
otu =as.data.frame( t(otu))
otu = otu[row.names(map),]
#根据返回的top abundant信息提取对应的otu信息
otu =as.data.frame( otu[which(colnames(otu) %in% name_list)])
#根据name_list的顺序调整otu的物种(列)顺序
name_list = as.character(name_list)
otu = otu[,name_list]
#给矩阵的每一个元素取对数
otu = log10(otu)
#给每一个样本添加分组信息
otu$Group = NULL
for (i in rownames(otu))
{
otu[i,"Group"]=map[i,"Group"]
}
library(simplevis)
library(ggplot2)
library(tidyverse)
library("ggsci")
library(ggsignif)
data.long <- pivot_longer(otu, cols = 1:length(colnames(otu))-1, names_to ="index", values_to = "num")
data.long$Group <- as.factor(data.long$Group)
data.long$index <- factor(data.long$index,levels =rev(colnames(otu)) )
data.long <- data.long[!is.infinite(data.long$num),]
str(data.long)
p = ggplot(data.long, aes(x=index, y=num,fill=Group))+theme_bw()+
geom_boxplot(alpha=1,size=0.8,width=0.5,outlier.color = "gray",position=position_dodge(width=0.8))+#调整control和experience之间的距离
theme(axis.text.x = element_text(angle = 90),
plot.title = element_text(hjust = 0.5),#强行标题居中
#axis.text.y = element_text(angle = 90),
axis.title.x=element_text(vjust=2, size=10,face = "bold"),
axis.title.y =element_text(vjust=2, size=10,face = "bold"),
legend.title = element_text(hjust = 0.5, size = 10,face = "bold"))+coord_flip()+
stat_compare_means(aes(group=Group),
method = "wilcox.test",
label="p.signif",
hide.ns = T,
vjust = 0.7,)+
scale_fill_aaas(a=0.55)+scale_color_aaas(a=0.55)+labs(y="Relative Abundance(log10)",x=NULL,title =paste("Top abundant ",tax,sep = "") ,hjust=0.5)
return(p)
}
p = multi_boxplot(otu =otu,meta = map,n=10,tax="species")
p
#分面堆叠图
otu =read.table("merged_abundance_table_species.txt", row.names= 1, header=T, sep="\t", comment.char="", stringsAsFactors = F)
map=read.table("metadata.txt", row.names = 1, header=T, sep="\t", comment.char="", stringsAsFactors = F)
facet_stackplot <- function(otu =otu ,meta=meta)
{
get_other_abundant <- function(n=n,otutab = otutab,metadata=metadata)
{
library(tidyverse)
library("ggsci")
library("ggplot2")
library("RColorBrewer")
library(ggsignif)
#getpalette = brewer.pal(12,"Set3")
otu = otutab
map = metadata
#将实验组和对照组的样本分开
experience = list()
control = list()
for (i in row.names(map))
{
# print(i)
if (map[i,"Group"] == "experience")
{
experience = append(experience,i)
}
if (map[i,"Group"] == "control")
{
control = append(control,i)
}
}
#将otu列表区分为实验组otu和对照组otu
exp.otu = otu[which(colnames(otu) %in% experience)]
ctr.otu = otu[which(colnames(otu) %in% control)]
#对实验组和对照组中不同物种的数量进行求和
exp.otu.sum = rowSums(exp.otu)
ctr.otu.sum = rowSums(ctr.otu)
#将实验组与对照组求和数据进行合并
df = as.data.frame(cbind(exp.otu.sum,ctr.otu.sum))
colnames(df) = c("experience","control")
#计算不同物种在对照组以及实验组之间的平均值并根据平均值排序
df$average = (df$experience+df$control)/2
df = df[order(df$average,decreasing = T),]
#找出top n 的物种,其余物种并入other并计算other的总和
p = rownames(df)[1:n]
q = rownames(df)[n+1:length(row.names(df))]
#将other加入top n数据中并更改名字
return(q)
}
get_top_abundant <- function(n=n,otutab = otutab,metadata=metadata)
{
library(tidyverse)
library("ggsci")
library("ggplot2")
library("RColorBrewer")
library(ggsignif)
#getpalette = brewer.pal(12,"Set3")
otu = otutab
map = metadata
#将实验组和对照组的样本分开
experience = list()
control = list()
for (i in row.names(map))
{
# print(i)
if (map[i,"Group"] == "experience")
{
experience = append(experience,i)
}
if (map[i,"Group"] == "control")
{
control = append(control,i)
}
}
#将otu列表区分为实验组otu和对照组otu
exp.otu = otu[which(colnames(otu) %in% experience)]
ctr.otu = otu[which(colnames(otu) %in% control)]
#对实验组和对照组中不同物种的数量进行求和
exp.otu.sum = rowSums(exp.otu)
ctr.otu.sum = rowSums(ctr.otu)
#将实验组与对照组求和数据进行合并
df = as.data.frame(cbind(exp.otu.sum,ctr.otu.sum))
colnames(df) = c("experience","control")
#计算不同物种在对照组以及实验组之间的平均值并根据平均值排序
df$average = (df$experience+df$control)/2
df = df[order(df$average,decreasing = T),]
#找出top n 的物种,其余物种并入other并计算other的总和
p = rownames(df)[1:n]
q = rownames(df)[n+1:length(row.names(df))]
#将other加入top n数据中并更改名字
return(p)
}
name_list = get_top_abundant(n=10,otutab = otu,metadata = map)
name_list = as.list(name_list)
other_list = get_other_abundant(n=10,otutab = otu,metadata = map)
other_list = as.list(other_list)
otu.top = otu[which(row.names(otu) %in% name_list),]
otu.other = otu[which(row.names(otu) %in% other_list),]
t= colSums(otu.other)
df=as.data.frame( rbind(otu.top,t))
rownames(df)[length(rownames(df))]="Other"
otu = df
otu = otu/100
otu = as.data.frame(otu)
otu$Taxonomy = row.names(otu)
otu
data_frame=melt(otu, id='Taxonomy')
names(data_frame)[2]='sample_id'#更改列名
map$sample_id = row.names(map)
data_group = map[,c("sample_id","Group")]
data_frame=merge(data_frame, data_group, by='sample_id')
ylim =c(0,100)
xlim =c(0,100)
library(ggplot2)
library("RColorBrewer")
library(ggsignif)
library("ggsci")
getpalette =colorRampPalette(brewer.pal(12,"Set3"))
name_list = append(name_list,"Other")
name_list = as.character(name_list)
data_frame$Taxonomy = factor(data_frame$Taxonomy,levels = name_list )
stack_plot=ggplot(data_frame, aes(x=sample_id, fill=Taxonomy, y=value*100))+
# 数据输入:样本、物种、丰度
geom_col(position='stack') +
# stack:堆叠图
labs(x='Samples', y='Relative Abundance (%)')+
# 给xy轴取名
scale_y_continuous(expand=c(0, 0))+
# 调整y轴属性
theme(axis.text.x=element_text(angle=90, hjust=1,size = 5),
axis.title.x=element_text(vjust=2, size=10,face = "bold"),
axis.title.y =element_text(vjust=2, size=10,face = "bold"),
#legend.title = element_text(hjust = 0.5, size = 10,face = "bold"),
legend.title = element_blank()
)+
facet_wrap(~Group, scales = 'free_x', ncol = 2)+ # 按group组,X轴,分2面
scale_fill_manual(values = getpalette(20))+labs(x=NULL)
return(stack_plot)
}
p = facet_stackplot(otu = otu,meta =map)
p
# Maaslin2
library(Maaslin2)
otu = read.table(file = "merged_abundance_table_species.txt", header = T,row.names = 1,sep = "\t")
metadata = read.table(file = "metadata.txt", header = T,row.names = 1,sep = "\t")
fit_data = Maaslin2(
t(otu),
metadata,
output = "demo_output",
fixed_effects = c("Group","ages","gender","education","bmi"),
#random_effects = c("ages","gender","education","bmi"),
plot_heatmap=TRUE,
heatmap_first_n=10)
###############################PART2-function_analysis######################################
#####my funcation################
setwd("D:\\metagenomics\\upstream")
#####my funcation################
setwd("D:\\metagenomic")
#####PCA##########################
my_pca <- function(otu = otu,meta = metadata ,title = title)
{
library(ggsci)
library("ggpubr")
library("factoextra")
library("vegan")
library("ggtext")
library(ggsignif)
#otu =t(read.table("./out_GBM.txt",header = T,row.names = 1,sep = "\t"))
#meta = read.table("./metadata.txt",header = T,row.names = 1,sep = "\t")
otu = t(otu)
otu = otu[row.names(meta),]
permanova = adonis2(otu ~ Group, data = meta, permutations = 9999)
#PCA
otu.pca <- prcomp(otu, scale. =F)
p <- fviz_eig(otu.pca, addlabels = TRUE)
p = p+theme(panel.background = element_rect(fill = "white"),plot.background=element_rect(fill = "white"))
p1 <- fviz_pca_ind(otu.pca,
geom.ind = "point", # show points only ( not "text")
col.ind = meta$Group, # color by groups
addEllipses = TRUE, # Concentration ellipses
legend.title = "Groups")
p1=p1+theme(axis.line = element_line(colour = "black"))+theme_bw()
p1$labels$x=sub("Dim","PCA",p1$labels$x)
p1$labels$y=sub("Dim","PCA",p1$labels$y)
p1=p1+labs(title = "PCA of path.function")+scale_color_aaas(a=0.55)+scale_fill_aaas(a=0.55)
text_annotation=paste("PERMANOVA:<p>\nexperience vs control<p>\np-value = ",as.character(round((permanova$`Pr(>F)`[1]),digits = 4),sep = ""))
#p1 = p1 + annotate("label",x=Inf,y=Inf,label=text_annotation,vjust=1.1,hjust=1.05)
p1 =p1 + ggtext::geom_richtext (aes (vjust = 1.1,hjust = 1.05,x= Inf, y=Inf,label=text_annotation, family = "sans", label.colour = 'black'))
#p1 =p1 +geom_point( alpha = 0.3)
PC1 <- otu.pca$x[,1]
PC2 <- otu.pca$x[,2]
df = as.data.frame(cbind(PC1,PC2))
df$groups = meta$Group
pd = ggplot(data = df, aes(x=PC1,y=PC2))+geom_point(aes(color=groups,shape=groups,color = groups,fill=groups),size=2,alpha=0.7)+
ggtext::geom_richtext (aes (vjust = 1.1,hjust = 1.05,x= Inf, y=Inf,label=text_annotation, label.colour = 'black',text.colour = "black"))
#pd
#pd = pd +theme_bw()+stat_ellipse(aes(fill=groups),type="norm",geom="polygon",alpha=0.2,color=NA)
pd = pd+stat_ellipse(aes(
color=groups,
fill=groups),
level = 0.99,
geom = "polygon",
alpha=0.1)+scale_fill_d3(a=0.5)+theme_bw()+scale_color_d3(a=0.5)
#+geom_point(shape=c(1,2))
#pd=pd+theme(panel.grid.major = element_blank())
pd=pd+labs(x=paste("PCA1(","%)",sep = as.character(round(p$data$eig[1],2))),y=paste("PCA2(","%)",sep = as.character(round(p$data$eig[2],2))))
#pd = pd+ ggtext::geom_richtext (aes (vjust = 1.1,hjust = 1.05,x= Inf, y=Inf,label=text_annotation, family = "sans", label.colour = 'black'))
pd = pd+labs(title = paste("PCA of ",title,sep = ""))
otu.pca.data <- data.frame(rownames(otu.pca$x),PC1,PC2,meta$Group)
colnames(otu.pca.data) <- c("sample", "PC1", "PC2", "group")
my_comparisons = list(c('control','experience'))
p2 <- ggplot(otu.pca.data, aes(x = group, y= PC2, colour = group)) +
geom_boxplot() +
theme(panel.background = element_rect(fill = "white",colour = "white"),
panel.grid = element_blank(),
axis.text.y = element_blank(),
axis.ticks=element_blank(),
legend.position="none") +
geom_signif(comparisons = list(c("control","experience")),
test= wilcox.test,
position = "identity",
tip_length = 0,
size=0.9,color = "black",
map_signif_level=c("***"=0.001,"**"=0.01, "*"=0.05, " "=2))+
xlab("") + ylab("")+
scale_color_d3(a=0.5)
p3 <- ggplot(otu.pca.data, aes(x = group, y= PC1, colour = group)) +
geom_boxplot() +
theme(panel.background = element_rect(fill = "white",colour = "white"),
panel.grid = element_blank(),
axis.text.x = element_blank(),
axis.ticks=element_blank(),
legend.position="none") +
xlab("") + ylab("") +
coord_flip() + # coord_flip()函数翻转坐标�?
geom_signif(comparisons = list(c("control","experience")),
test= wilcox.test,
#step_increase = 10,
#y_position = 5,
position = "identity",
tip_length = 0,
size=0.9,color = "black",
map_signif_level=c("***"=0.001,"**"=0.01, "*"=0.05, " "=2))+
scale_color_d3(a=0.5)
p4 <- ggarrange(p3, NULL, pd, p2, widths = c(5,2), heights = c(2,4), align = "hv")+theme(plot.background=element_rect(fill = "white"))
return(p4)
}
#测试
#otu =read.table("./out_GBM.txt",header = T,row.names = 1,sep = "\t")
#meta = read.table("./metadata.txt",header = T,row.names = 1,sep = "\t")
#pca=my_pca(otu = otu ,meta = meta ,title = "GBM")
###############PcoA####################
my_pcoa <- function(otu=otu,meta=meta,title=title)
{
library("amplicon")
library(xlsx)
library(ggsci)
library(ggsignif)
library("ggpubr")
Beta_PCOA <- function (dis_mat, metadata, groupID = "Group", ellipse = T,
label = F, PCo = 12)
{
p_list = c("ggplot2", "vegan", "ggrepel")
for (p in p_list) {
if (!requireNamespace(p)) {
install.packages(p)
}
suppressWarnings(suppressMessages(library(p, character.only = T)))
}
idx = rownames(metadata) %in% rownames(dis_mat)
metadata = metadata[idx, , drop = F]
dis_mat = dis_mat[rownames(metadata), rownames(metadata)]
sampFile = as.data.frame(metadata[, groupID], row.names = row.names(metadata))
pcoa = cmdscale(dis_mat, k = 3, eig = T)
points = as.data.frame(pcoa$points)
eig = pcoa$eig
points = cbind(points, sampFile[rownames(points), ])
colnames(points) = c("x", "y", "z", "group")
if (PCo == 12) {
p = ggplot(points, aes(x = x, y = y, color = group)) +
labs(x = paste("PCo 1 (", format(100 * eig[1]/sum(eig),
digits = 4), "%)", sep = ""), y = paste("PCo 2 (",
format(100 * eig[2]/sum(eig), digits = 4), "%)",
sep = ""), color = groupID)
}
if (PCo == 13) {
p = ggplot(points, aes(x = x, y = z, color = group)) +
labs(x = paste("PCo 1 (", format(100 * eig[1]/sum(eig),
digits = 4), "%)", sep = ""), y = paste("PCo 3 (",
format(100 * eig[2]/sum(eig), digits = 4), "%)",
sep = ""), color = groupID)
}
if (PCo == 23) {
p = ggplot(points, aes(x = y, y = z, color = group)) +
labs(x = paste("PCo 2 (", format(100 * eig[1]/sum(eig),
digits = 4), "%)", sep = ""), y = paste("PCo 3 (",
format(100 * eig[2]/sum(eig), digits = 4), "%)",
sep = ""), color = groupID)
}
p = p + geom_point(alpha = 0.6, size = 3) + theme_classic() +
theme(text = element_text(family = "sans", size = 7))
if (ellipse == T) {
p = p + stat_ellipse(level = 0.68)
}
if (label == T) {
p = p + geom_text_repel(label = paste(rownames(points)),
colour = "black", size = 3)
}
p
}
#setwd("D:/metagenomic")
#file=list.files(getwd(),pattern = "merged_abundance_table_.*[^1].txt")
#otu = t(read.table("out_path.function.txt",sep = "\t",header = T,row.names = 1))
#abun = t(read.table("out_pathabundance.txt",sep = "\t",header = T,row.names = 1))
# meta = read.table("metadata.txt",sep = "\t",header = T,row.names = 1)
otu = t(otu)
otu = otu[row.names(meta),]
#otu = otu[,-c(1,2)]
permanova = adonis2(otu~Group,data = meta,permutations = 9999)
bray_dis = vegdist(data.frame(otu),method = "bray")
bray_dis = as.matrix(bray_dis)
bray_dis = as.data.frame(bray_dis)
#write.xlsx(bray_dis,file=paste("./function/humann3/bray_dis ",".xlsx",sep =""))
p = Beta_PCOA(bray_dis,metadata = meta)
p = p+scale_color_d3(a=0.5)+theme_bw()
text_annotation=paste("PERMANOVA:<p>\nexperience vs control<p>\np-value = ",as.character(round((permanova$`Pr(>F)`[1]),digits = 4),sep = ""))
p = p + ggtext::geom_richtext (aes (x= Inf, y=Inf,label=text_annotation ),color="black",label.colour = 'black',vjust = 1.1,hjust = 1.05)
p = p + labs(title = paste("PCoA of",title,sep = " "))
p2 <- ggplot(p$data, aes(x = group, y= y, colour = group)) +
geom_boxplot() +
theme(panel.background = element_rect(fill = "white",colour = "white"),
panel.grid = element_blank(),
axis.text.y = element_blank(),
axis.ticks=element_blank(),
legend.position="none") +
geom_signif(comparisons = list(c("control","experience")),
test= wilcox.test,
position = "identity",
tip_length = 0,
size=0.9,color = "black",
map_signif_level=c("***"=0.001,"**"=0.01, "*"=0.05, " "=2))+
xlab("") + ylab("")+
scale_color_d3(a=0.5)
p3 <- ggplot(p$data, aes(x = group, y=x, colour = group)) +
geom_boxplot() +
theme(panel.background = element_rect(fill = "white",colour = "white"),
panel.grid = element_blank(),
axis.text.x = element_blank(),
axis.ticks=element_blank(),
legend.position="none") +
xlab("") + ylab("") +
coord_flip() +
geom_signif(comparisons = list(c("control","experience")),
test= wilcox.test,
position = "identity",
tip_length = 0,
size=0.9,color = "black",
map_signif_level=c("***"=0.001,"**"=0.01, "*"=0.05, " "=2))+
scale_color_d3(a=0.5)
p4 <- ggarrange(p3, NULL, p, p2, widths = c(4,1), heights = c(1,3), align = "hv")+theme(plot.background=element_rect(fill = "white"))
return(p4)
}
#pcoa= my_pcoa(otu = otu,meta = meta ,title = "BGM")
###############棒棒糖图##################
library(xlsx)
data = read.xlsx("D:\\metagenomics\\upstream\\function\\GBM\\table_result.xlsx",sheetName = "sheet0")
mydotchart <- function(data=data)
{
library(ggplot2)
library(ggpubr)
data = data[,c("anno","P-value","effect_size")]#"anno" "P.value" "effect_size"
data = data[which(data$`P-value` < 0.05),]#挑选显著性
data = data[order(data$`P-value`),]
if(length(data$`P-value`) > 20)
{
data = data[1:20,]
}
p = ggdotchart(data, x = "anno", y = "effect_size",
add = "segments",
color = "P-value",
sorting = "descending",
add.params = list(color = "lightgray", size = 2),#改变线的粗细和颜色
dot.size = 9,
label = round(data$effect_size,1),
font.label = list(color = "white", size = 9,
vjust = 0.5),
) + geom_hline(yintercept = 0, linetype = 2, color = "lightgray")+
theme(legend.position = "right",
axis.text.x = element_text(angle = 40,vjust = 1,hjust = 1,size = 5)
)+
labs(x="",y="effect size")+ scale_color_continuous(low="#FF7F0EFF",high="#1F77B4FF")
return(p)
}
#data = read.xlsx("table_result.xlsx",sheetName = "sheet0")
p = mydotchart(data = data)
p
##################差异分析####################
my_diff <- function(otu = otu,meta = meta,anno = F,anno_file=anno_file)
{
options(scipen = 200)
otu = t(otu)
otu = otu[row.names(meta),]
groups = meta$Group
df = data.frame(cbind(otu,groups))
name = anno_file
table_result = data.frame()#创建储存结果的输出数据框
q.value=list()#储存p-value计算q-value
#计算实验组和对照组的个数
control.num = as.numeric(table(df$groups)["control"])
experience.num = as.numeric(table(df$groups)["experience"])
#
for (i in 1:(ncol(df)-1))
{
if (anno == F)
{
}else
{
table_result[colnames(df)[i],"anno"] = name[colnames(df)[i],"anno"]
}
table_result[colnames(df)[i],"P-value"] = wilcox.test(as.numeric(df[which(df$groups == "control"),i]) ,as.numeric(df[which(df$groups== "experience"),i]) )$p.value
pvalue = table_result[colnames(df)[i],"P-value"]
q.value = append(q.value, pvalue)
diffusion = data.frame(
number = c(as.numeric(df[which(df$groups== "control"),i]),as.numeric(df[which(df$groups == "experience"),i])),
Group = factor(c(groups[which(groups=="control")],groups[which(groups=="experience")]))
#Group = factor(rep(c()))
)
effect.z = coin::wilcox_test(number~Group,diffusion)@statistic@standardizedlinearstatistic
effect_size = effect.z/sqrt(nrow(df))
table_result[colnames(df)[i],"effect_size"] = effect_size
mean.abun = tapply(as.numeric(df[,i]) , df$groups , mean)
#print(mean.abun[1])
table_result[colnames(df)[i],"mean_abun_control"] = mean.abun[1]
table_result[colnames(df)[i],"mean_abun_experience"] = mean.abun[2]
#####
rk = rank(as.numeric(df[,i]))
mean.rank = tapply(rk,df$groups,mean)
#print(mean.rank[1])
table_result[colnames(df)[i],"mean_rank_control"] = mean.rank[1]
table_result[colnames(df)[i],"mean_rank_experience"] = mean.rank[2]
mean.occ = tapply(df[,i]>0, df$groups, mean)
table_result[colnames(df)[i],"mean_occ_control"] = mean.occ[1]
table_result[colnames(df)[i],"mean_occ_experience"] = mean.occ[2]
if (mean.abun[1] > mean.abun[2])
{
table_result[colnames(df)[i],"enrichment"] = "control > experience"
}
if(mean.abun[1] < mean.abun[2])
{
table_result[colnames(df)[i],"enrichment"] = "control < experience"
}
table_result[colnames(df)[i],"mean_abunt"] = (mean.abun[1]*control.num+mean.abun[2]*experience.num)/(control.num+experience.num)
table_result[colnames(df)[i],"mean_occ"] = (mean.occ[1]*control.num+mean.occ[2]*experience.num)/(control.num+experience.num)
}
table_result$Q_value = p.adjust(table_result$`P-value`,method = "BH")
table_result = table_result[order(table_result$`P-value`),]
return(table_result)
}
#测试GBM
otu = read.table("./out_GBM.txt",header = T,row.names = 1,sep = "\t")
meta = read.table("./metadata.txt",header = T,row.names = 1,sep = "\t")
name = read.table("./GBM.anno.txt",header = T,row.names = 1,sep = "\t")
result = my_diff(meta = meta,otu = otu,anno = T,anno_file = name)
#测试GMM
otu = read.table("./out_GMM.txt",header = T,row.names = 1,sep = "\t")
meta = read.table("./metadata.txt",header = T,row.names = 1,sep = "\t")
name = read.table("./GMM.anno.txt",header = T,row.names = 1,sep = "\t")
if (!file.exists("./function/GMM"))
{
dir.create("./function/GMM",recursive = TRUE)
}
result = my_diff(meta = meta,otu = otu,anno = T,anno_file = name)
write.xlsx(result, "./table_result.xlsx",sheetName = "sheet0")
data = read.xlsx("table_result.xlsx",sheetName = "sheet0")
dotchar = mydotchart(data = data)
pca = my_pca(meta = meta,otu = otu ,title = "GMM")
pcoa = my_pcoa(meta = meta,otu = otu ,title = "GMM")
permanova = adonis2(t(otu)~Group,data = meta,permutations = 9999)
###Pathway 富集条形图
otu = read.table("./out_Pathway.txt",header = T,row.names = 1,sep = "\t")
anno = read.table("./79_Pathway_ano.txt",header = T,row.names = 1,sep = "\t",quote = "")
meta = read.table("./metadata.txt",header = T,row.names = 1,sep = "\t")
data = data.frame()
for (i in 1:length(rownames(anno)))
{
#print(anno[i,"anno"])
unlist(strsplit(anno[1,"anno"],split = ";"))[1]
data[i,"1"]=unlist(strsplit(anno[i,"anno"],split = ";"))[1]
data[i,"2"]=unlist(strsplit(anno[i,"anno"],split = ";"))[2]
data[i,"3"]=unlist(strsplit(anno[i,"anno"],split = ";"))[3]
rownames(data)[i] = rownames(anno)[i]
#print(rownames(anno[i,]))
}
anno = data
#顺序
otu = t(otu)
otu = otu[rownames(meta),]
otu_meta = cbind(otu,Group = meta$Group)
otu_meta = as.data.frame(otu_meta)
#秩和检验
#wilcox.test(bmi ~ Group, data = otu_meta, var.equal = TRUE)$p.value
#改变value的类型
for (i in 1:length(colnames(otu_meta))-1)
{
#print(colnames(otu_meta)[i])
#print(otu_meta[,colnames(otu_meta)[i]])
otu_meta[,colnames(otu_meta)[i]] = as.numeric(otu_meta[,colnames(otu_meta)[i]])
#print(wilcox.test(map05223 ~ Group, data = otu_meta, var.equal = TRUE)$p.value)
# wilcox.test(colnames(otu_meta)[i] ~ Group, data = otu_meta, var.equal = TRUE)$p.value
}
#将有显著差异的通路挑选出来
select_list = data.frame()
count = 1
for (i in 1:(length(colnames(otu_meta))-1))
{
pv=wilcox.test(otu_meta[which(otu_meta$Group == "control"),colnames(otu_meta)[i]],otu_meta[which(otu_meta$Group == "experience"),colnames(otu_meta)[i]])$p.value
#print(pv)
if(!is.nan(pv))
{
print(pv)
if(pv < 0.05)
{
select_list[count,"pathway"] = colnames(otu_meta)[i]
select_list[count,"P.value"] = pv
count = count +1
#select_list = append(select_list,colnames(otu_meta)[i])
}
}
}
#将显著差异的通路的otu表格提取
for (i in 1:length(row.names(select_list)))
{
#print(select_list[i,"pathway"])
if (select_list[i,"pathway"] %in% rownames(anno))
{
select_list[i,"label1"] = anno[select_list[i,"pathway"],"1"]
select_list[i,"label2"] = anno[select_list[i,"pathway"],"3"]
}
}
select_list=select_list[which(select_list$label1 != "Microbial metabolism in diverse environments"),]
#绘制条形图
select_list$P.value = -log10(select_list$P.value)
select_list = select_list[order(select_list$label1,-select_list$P.value,decreasing = T),]
select_list$label2 = factor(select_list$label2,levels = select_list$label2)
p = ggplot(select_list, aes(x=label2, y=P.value,fill = label1)) +
geom_bar(stat = "identity", width=0.7,size=2,position=position_dodge(2))+
labs(y="-log10(P value)",title = "Pathway",x="")
p = p+theme(axis.text.x = element_text(angle = 90),legend.title = element_blank(),plot.title = element_text(hjust = 0))+coord_flip()+scale_fill_d3(a= 0.5)+theme_bw()
p = p+guides(fill=guide_legend(title=NULL))
p
if (!file.exists("./function/pathway"))
{
dir.create("./function/pathway",recursive = TRUE)
}
ggsave("./function/pathway/pathway.pdf", p, width=280, height=700, units="mm")
###########################permanova
otu <- read.table("merged_abundance_table_species.txt",header = T,sep = "\t",row.names = 1)
meta = read.table("metadata.txt",sep = "\t",header = T,row.names = 1)
otu = otu[,rownames(meta)]
otu = t(otu)
otu.adonis = adonis2(otu ~ Group+ages+bmi+education,data = meta,permutations = 9999,by="margin")
#####################boxplot-module
function_boxplot <- function(otu=otu,meta=meta,save_path=save_path)
{
otu = t(otu)
otu = otu[rownames(meta),]
otu_meta = cbind(otu,Group = meta$Group)
otu_meta = as.data.frame(otu_meta)
#numeric数据
for (i in 1:length(colnames(otu_meta))-1)
{
otu_meta[,colnames(otu_meta)[i]] = as.numeric(otu_meta[,colnames(otu_meta)[i]])
}
#挑选p-value小于0.05的module
select_list=data.frame()
count=1
for (i in 1:(length(colnames(otu_meta))-1))
{
pv = wilcox.test(otu_meta[which(otu_meta$Group == "control"),colnames(otu_meta)[i]],otu_meta[which(otu_meta$Group == "experience"),colnames(otu_meta)[i]])$p.value
if(!is.nan(pv))
{
print(pv)
if(pv < 0.05)
{
select_list[count,"pathway"] = colnames(otu_meta)[i]
select_list[count,"P.value"] = pv
count = count +1
#select_list = append(select_list,colnames(otu_meta)[i])
}
}
}
for (i in 1:length(select_list$pathway))
{
#print(select_list$pathway[i])
tmp = data.frame()
tmp =as.data.frame(cbind(pathway = otu_meta[,select_list$pathway[i]],Group = otu_meta[,"Group"]))
tmp$pathway =as.numeric(tmp$pathway)
tmp$Name= select_list$pathway[i]
p = ggplot(data = tmp,aes(x=Group,y=pathway,color=Group))+
geom_boxplot(alpha=1,size=1.2,width=0.5)+theme_bw()
p=p + geom_jitter(width = 0.2)+
geom_signif(comparisons = list(c("control","experience")),
test= wilcox.test,
#step_increase = 10,
#y_position = 5,
position = "identity",
tip_length = 0,
size=0.9,color = "black",
map_signif_level = T)+scale_color_d3(a=0.5)+
geom_signif(comparisons = list(c("control","experience")),
test= wilcox.test,
#step_increase = 10,
#y_position = 5,
position = "identity",
tip_length = 0,
vjust=2,
size=0.9,color = "black",
map_signif_level = F)+scale_color_d3(a=0.5)+
facet_grid(. ~Name ) + theme(legend.position = "bo",#设置分类标签位置
#y轴刻度线去掉
axis.ticks.y= element_blank(),
#y轴字去掉
axis.text.y = element_blank(),
#设置图例字体样式和大小
text = element_text(family = "sans",size = 10),
#设置顶部分面背景
strip.background = element_rect(fill="grey"),
strip.switch.pad.grid =unit(0.2, "cm"),
strip.text = element_text(size = 25, colour="black",#face="italic",
margin = margin(0.5,0,0.5,0, "cm")))+
labs(x="",y="")
#pictures = append(pictures,p)
####
if (!file.exists(save_path))
{
dir.create(save_path,recursive = TRUE)
}
#修改保存的路径部分
ggsave(paste( save_path,".png",sep = select_list$pathway[i]), p, width=180, height=150, units="mm")
}
}
otu = read.table("./out_GBM.txt",header = T,row.names = 1,sep = "\t")
meta = read.table("./metadata.txt",header = T,row.names = 1,sep = "\t")
#name = read.table("./GBM.anno.txt",header = T,row.names = 1,sep = "\t")
#绘制boxplot
#pictures = list()
p
otu_meta2 = otu_meta[,which()]
pictures[1]
############################################
####################
otu = read.table("./out_Pathway.txt",header = T,row.names = 1,sep = "\t")
anno = read.table("./79_Pathway_ano.txt",header = T,row.names = 1,sep = "\t",quote = "")
meta = read.table("./metadata.txt",header = T,row.names = 1,sep = "\t")
data = data.frame()
for (i in 1:length(rownames(anno)))
{
#print(anno[i,"anno"])
unlist(strsplit(anno[1,"anno"],split = ";"))[1]
data[i,"1"]=unlist(strsplit(anno[i,"anno"],split = ";"))[1]
data[i,"2"]=unlist(strsplit(anno[i,"anno"],split = ";"))[2]
data[i,"3"]=unlist(strsplit(anno[i,"anno"],split = ";"))[3]
rownames(data)[i] = rownames(anno)[i]
#print(rownames(anno[i,]))
}
#找出属于代谢途径的pathway
select_name =rownames(data)[which(data$`1`=="Metabolism")]
#找出相应的otu
otu_metabolism = which(rownames(otu) %in% select_name)
otu2= otu[otu_metabolism,]
#转置后拼接分组信息
otu2 = t(otu2)
otu2 = otu2[row.names(meta),]
otu2 =as.data.frame(cbind(otu2 ,Group = meta$Group))
otu =otu2
for (i in 1:(length(colnames(otu))-1))
{
#print(colnames(otu)[i])
colnames(otu)[i] = data[colnames(otu)[i],3]
}
#
library(simplevis)
library(ggplot2)
library(tidyverse)
library("ggsci")
library(ggsignif)
data.long <- pivot_longer(otu, cols = 1:length(colnames(otu))-1, names_to ="index", values_to = "num")
data.long$Group <- as.factor(data.long$Group)
data.long$index <- factor(data.long$index,levels =rev(colnames(otu)) )
data.long <- data.long[!is.infinite(data.long$num),]
data.long$num = as.numeric(data.long$num)
data.long = as.data.frame(data.long)
data.long[i,"index"]=as.character(data.long[i,"index"])
data.long$num = log10(data.long$num)
##############改index名字
#str(data.long)
p = ggplot(data.long, aes(x=index, y=num,fill=Group))+theme_bw()+
geom_boxplot(alpha=1,size=0.5,width=0.5,outlier.color = "gray",position=position_dodge(width=0.8),outlier.size = 0.5)+#调整control和experience之间的距离
theme(axis.text.x = element_text(angle = 90),
plot.title = element_text(hjust = 0.5),#强行标题居中
#axis.text.y = element_text(angle = 90),
axis.title.x=element_text(vjust=2, size=10,face = "bold"),
axis.title.y =element_text(vjust=2, size=10,face = "bold"),
legend.title = element_text(hjust = 0.5, size = 10,face = "bold"))+coord_flip()+
scale_fill_aaas(a=0.55)+scale_color_aaas(a=0.55)+labs(y="Relative Abundance",x=NULL,title = "Metabolism",hjust=0.5)+
stat_compare_means(aes(group=Group),
method = "wilcox.test",
label="p.signif",
hide.ns = T,
vjust = 0.8,
position = "identity",
map_signif_level=c("***"=0.001,"**"=0.01, "*"=0.05, " "=2))
# geom_signif(comparisons = list(c("control","experience")),
# test= wilcox.test,
# position = "identity",
# tip_length = 0,
# size=0.9,color = "black",
# map_signif_level = T)
#p
ggsave("test.pdf", p, width=280, height=700, units="mm")
#------------------------------------------
#humann3
library(xlsx)
#1.diff
otu = read.table("out_path.function.txt",sep = "\t",header = T,row.names = 1)
otu = otu[3:length(rownames(otu)),]
meta = read.table("metadata.txt",sep = "\t",header = T,row.names = 1)
table_result=my_diff(otu=otu,meta = meta,anno = F,anno_file = F)
if (!file.exists("./function/humann"))
{
dir.create("./function/humann",recursive = TRUE)
}
write.xlsx(table_result,sheetName = "sheet0",file = "./function/humann/diff.xlsx")
#func = func[rownames(meta),]
#2.PCA
p = my_pca(otu = otu,meta = meta,title = "path.function")
if (!file.exists("./function/humann"))
{
dir.create("./function/humann",recursive = TRUE)
}
ggsave("./function/humann/PCA of humann.png",p, width=180, height=150, units="mm")
#3.PCoa
p = my_pcoa(otu = otu,meta = meta,title = "path.function")
if (!file.exists("./function/humann"))
{
dir.create("./function/humann",recursive = TRUE)
}
ggsave("./function/humann/PCoA of humann.png",p, width=180, height=150, units="mm")
#GBM
otu = read.table("out_GBM.txt",sep = "\t",header = T,row.names = 1)
meta = read.table("metadata.txt",sep = "\t",header = T,row.names = 1)
anno = read.table("GBM.anno.txt",sep = "\t",header = T,row.names = 1)
#func = func[rownames(meta),]
table_result=my_diff(otu=otu,meta = meta,anno = T,anno_file = anno)
if (!file.exists("./function/GBM"))
{
dir.create("./function/GBM",recursive = TRUE)
}
write.xlsx(table_result,sheetName = "sheet0",file = "./function/GBM/diff.xlsx")
#2.PCA
p = my_pca(otu = otu,meta = meta,title = "path.function")
ggsave("./function/GBM/PCA of GBM.png",p, width=180, height=150, units="mm")
#3.PCoa
p = my_pcoa(otu = otu,meta = meta,title = "path.function")
ggsave("./function/GBM/PCoA of GBM.png",p, width=180, height=150, units="mm")
#4.bangbangtang
#data = table_result[,2:length(colnames(table_result))]
p = mydotchart(data = table_result)
ggsave("./function/GBM/dotchart of GBM.png",p, width=180, height=150, units="mm")
#5箱线图
function_boxplot(meta = meta,otu = otu,save_path = "./function/GBM/")
#GMM
otu = read.table("out_GMM.txt",sep = "\t",header = T,row.names = 1)
meta = read.table("metadata.txt",sep = "\t",header = T,row.names = 1)
anno = read.table("GMM.anno.txt",sep = "\t",header = T,row.names = 1)
#diff
table_result=my_diff(otu=otu,meta = meta,anno = T,anno_file = anno)
if (!file.exists("./function/GMM"))
{
dir.create("./function/GMM",recursive = TRUE)
}
write.xlsx(table_result,sheetName = "sheet0",file = "./function/GMM/diff.xlsx")
#2.PCA
p = my_pca(otu = otu,meta = meta,title = "path.function")
ggsave("./function/GMM/PCA of GMM.png",p, width=180, height=150, units="mm")
#3.PCoa
p = my_pcoa(otu = otu,meta = meta,title = "path.function")
ggsave("./function/GMM/PCoA of GMM.png",p, width=180, height=150, units="mm")
#4.bangbangtang
#data = table_result[,2:length(colnames(table_result))]
p = mydotchart(data = table_result)
ggsave("./function/GMM/dotchart of GMM.png",p, width=180, height=150, units="mm")
#5箱线图
function_boxplot(meta = meta,otu = otu,save_path = "./function/GMM/")
#KO
otu = read.table("out_KO.txt",sep = "\t",header = T,row.names = 1)
meta = read.table("metadata.txt",sep = "\t",header = T,row.names = 1)
anno = read.table("KO.ann.txt",sep = "\t",header = T,row.names = 1)
#diff
table_result=my_diff(otu=otu,meta = meta,anno = T,anno_file = anno)
if (!file.exists("./function/KO"))
{
dir.create("./function/KO",recursive = TRUE)
}
write.xlsx(table_result,sheetName = "sheet0",file = "./function/KO/diff.xlsx")
#2.PCA
p = my_pca(otu = otu,meta = meta,title = "KO")
ggsave("./function/KO/PCA of KO.png",p, width=180, height=150, units="mm")
#3.PCoa
p = my_pcoa(otu = otu,meta = meta,title = "KO")
ggsave("./function/KO/PCoA of KO.png",p, width=180, height=150, units="mm")
#4.bangbangtang
#data = table_result[,2:length(colnames(table_result))]
p = mydotchart(data = table_result)
ggsave("./function/KO/dotchart of KO.png",p, width=180, height=150, units="mm")
#5箱线图
function_boxplot(meta = meta,otu = otu,save_path = "./function/KO/")
#module
otu = read.table("out_Module.txt",sep = "\t",header = T,row.names = 1)
meta = read.table("metadata.txt",sep = "\t",header = T,row.names = 1)
anno = read.table("79_Module_ano.txt",sep = "\t",header = T,row.names = 1)
#diff
table_result=my_diff(otu=otu,meta = meta,anno = T,anno_file = anno)
if (!file.exists("./function/Module"))
{
dir.create("./function/Module",recursive = TRUE)
}
write.xlsx(table_result,sheetName = "sheet0",file = "./function/Module/diff.xlsx")
#2.PCA
p = my_pca(otu = otu,meta = meta,title = "Module")
ggsave("./function/Module/PCA of Module.png",p, width=180, height=150, units="mm")
#3.PCoa
p = my_pcoa(otu = otu,meta = meta,title = "Module")
ggsave("./function/Module/PCoA of Module.png",p, width=180, height=150, units="mm")
#4.bangbangtang
#data = table_result[,2:length(colnames(table_result))]
p = mydotchart(data = table_result)
ggsave("./function/Module/dotchart of Module.png",p, width=180, height=150, units="mm")
#5箱线图
function_boxplot(meta = meta,otu = otu,save_path = "./function/Module/")
###########################################6pathwaysPCA
otu = read.table("./out_Pathway.txt",header = T,row.names = 1,sep = "\t")
anno = read.table("./79_Pathway_ano.txt",header = T,row.names = 1,sep = "\t",quote = "")
meta = read.table("./metadata.txt",header = T,row.names = 1,sep = "\t")
data = data.frame()
for (i in 1:length(rownames(anno)))
{
#print(anno[i,"anno"])
unlist(strsplit(anno[1,"anno"],split = ";"))[1]
data[i,"1"]=unlist(strsplit(anno[i,"anno"],split = ";"))[1]
data[i,"2"]=unlist(strsplit(anno[i,"anno"],split = ";"))[2]
data[i,"3"]=unlist(strsplit(anno[i,"anno"],split = ";"))[3]
rownames(data)[i] = rownames(anno)[i]
#print(rownames(anno[i,]))
}
data = data[which(data$`1`=="Cellular Processes"),]
path_list = rownames(data)
otu = otu[path_list,]
p= my_pca(otu=otu,meta = meta ,title = "Cellular Processes")
###########################
otu = read.table("./out_ARGcategory.txt",header = T,row.names = 1,sep = "\t")
dominant_species <- function(n=n,otutab = otutab,metadata=metadata,tax= tax)
{
library(tidyverse)
library("ggsci")
library("ggplot2")
library("RColorBrewer")
getpalette = brewer.pal(12,"Set3")
otu = otutab
map = metadata
#将实验组和对照组的样本分开
experience = list()
control = list()
for (i in row.names(map))
{
# print(i)
if (map[i,"Group"] == "experience")
{
experience = append(experience,i)
}
if (map[i,"Group"] == "control")
{
control = append(control,i)
}
}
#将otu列表区分为实验组otu和对照组otu
exp.otu = otu[which(colnames(otu) %in% experience)]
ctr.otu = otu[which(colnames(otu) %in% control)]
#对实验组和对照组中不同物种的数量进行求和
exp.otu.sum = rowSums(exp.otu)
ctr.otu.sum = rowSums(ctr.otu)
#将实验组与对照组求和数据进行合并
df = as.data.frame(cbind(exp.otu.sum,ctr.otu.sum))
colnames(df) = c("experience","control")
#计算不同物种在对照组以及实验组之间的平均值并根据平均值排序
df$average = (df$experience+df$control)/2
df = df[order(df$average,decreasing = T),]
#找出top n 的物种,其余物种并入other并计算other的总和
df.top10 = df[1:n-1,]
df.res = df[n:length(row.names(df)),]
other = colSums(df.res)
#将other加入top n数据中并更改名字
a = rbind(df.top10,other)
rownames(a)[length(rownames(a))]="Other"
a$Phylum = row.names(a)
a = a[order(a$average,decreasing = T),]
#将物种数量转化为相对丰富
df = select(a,-average)
df$experience = df$experience/sum(df$experience)
df$control = df$control/sum(df$control)
#设置线段内容
link_dat <- df %>%
arrange(by=experience) %>%
mutate(experience=cumsum(experience), control=cumsum(control))
#
df.long <- reshape2::melt(df, value.name='abundance', variable.name='group')
#绘图
df.long$Phylum = factor(df.long$Phylum,levels = rev(link_dat$Phylum))
#a = df.long$Phylum
p = ggplot(df.long, aes(x=group, y=abundance, fill=Phylum)) +
geom_bar(stat = "identity", width=0.5, col='black',size=2) +
geom_segment(data=link_dat, aes(x=1.25, xend=1.75, y=experience, yend=control),size=2)+
theme_bw() +
#scale_color_aaas(a=0.10)+scale_fill_aaas(a=0.10)+
ylab("Relative abundance(%)")+
theme(axis.title.x=element_text(vjust=2, size=20,face = "bold"),
axis.title.y =element_text(vjust=2, size=20,face = "bold"),
legend.title = element_text(size = 20,face = "bold")
)
name = paste("Top ",n,sep = "")
name = paste(name,tax,sep = " abundant ")
p = p+scale_fill_manual(values = getpalette)+labs(fill=name)
return(p)
}
otu =read.table("merged_abundance_table_class.txt", row.names= 1, header=T, sep="\t", comment.char="", stringsAsFactors = F)
map=read.table("metadata.txt", row.names = 1, header=T, sep="\t", comment.char="", stringsAsFactors = F)
p= dominant_species(n=10,metadata = map,otutab = otu,tax = "class")
p
#######抗性基因
otu =read.table("out_KO.txt", row.names= 1, header=T, sep="\t", comment.char="", stringsAsFactors = F)
meta=read.table("metadata.txt", row.names = 1, header=T, sep="\t", comment.char="", stringsAsFactors = F)
anno = read.table("KO.ann.txt", row.names= 1, header=T, sep="\t", comment.char="", stringsAsFactors = F,quote = "")
meta[which(meta$Group==1),"Group"] = "control"
meta[which(meta$Group==2),"Group"] = "experience"
p = my_pca(otu = otu ,meta = meta,title = "ARGcategory")
p = my_pcoa(otu = otu ,meta = meta,title = "ARGcategory")
result_table = my_diff(otu = otu,meta = meta,anno = T,anno_file = anno)