生信分析

微生物多样性qiime2分析流程(11) 数据可视化分析(中)

2020-11-16  本文已影响0人  R语言数据分析指南

轻轻松松的完成数据分析,高高兴兴的发论文,一个完美的PCA分析流程呈现给各位,喜欢请关注,点赞;后面的内容更精彩!!!

pacman::p_load(tidyverse,ggrepel,FactoMineR,magrittr)

da <- function(da) {
  (da / (da %>% colSums() %>% as.numeric() %>% rep(each = nrow(da)) %>%
    matrix(ncol = ncol(da), nrow = nrow(da)))) %>% t() %>% return()
}

da <- "data.xls" %>% 
read.delim(header = T, check.names = F, row.names = 1, sep = "\t") %>% da()
groups <- "group2.txt" %>% read.delim(header = T, sep = "\t")

pca <- da %>% 
PCA(scale.unit = F, graph = F)
pca %$% write.csv(ind$coord, file = "pca.xls")
group <- NULL

for (i in seq_along(groups$Sample)) {
  group[i] <- groups$Group[rownames(pca$ind$coord)[i] %>% 
                             grep(groups$Sample)]
}

if (ncol(groups) == 2) {
  pcadata <- data.frame(rownames(pca$ind$coord),
                        pca$ind$coord[, 1],pca$ind$coord[, 2], group) %>%
    set_colnames(c("sample", "PC1", "PC2", "group"))
}

ggplot(pcadata, aes(PC1,PC2)) +
  geom_point(aes(fill = group, color = group), size = 4) +
  geom_vline(aes(xintercept = 0), linetype = "dotted") +
  geom_hline(aes(yintercept = 0), linetype = "dotted") +
  labs(title = "PCA Plot",
       x = (floor(pca$eig[1,2] * 100) / 100) %>% 
         paste0("PC1 ( ", ., "%", " )"),
       y = (floor(pca$eig[2,2] * 100) / 100) %>% 
         paste0("PC2 ( ", ., "%", " )")) +
  theme(text = element_text(family = "Times", size = 12)) +
  theme(panel.background = element_rect(fill = 'white', colour = 'black'),
        axis.title.x = element_text(colour="black",size = 14),
        axis.title.y = element_text(colour="black",size = 14),
        axis.text = element_text(colour="black", size = 12),
        legend.title = element_text(colour="black",size = 12,
                                    face = "bold"),
        legend.key = element_blank(),
        plot.title = element_text(size = 12, colour = "black",
                                  hjust = 0.5,face = "bold")) +
  theme(legend.position = c(0.9, 0.9)) +
  theme(legend.title = element_blank())
PCA.png

PCOA分析

rm(list=ls())
pacman::p_load(tidyverse,ggrepel,vegan,ape)
data <- read.csv("data.xls", header = T,check.names = F,
                 sep="\t",row.names = 1) %>% t()
data[is.na(data)] <- 0

pcoa <- vegdist(data,method = "bray") %>% pcoa(correction = "none", rn = NULL)

groups <- read.table("group2.txt",sep = "\t",header = T) %>% as.list()
PC1 = pcoa$vectors[,1]
PC2 = pcoa$vectors[,2]

pcoadata <- data.frame(rownames(pcoa$vectors),PC1,PC2,groups$Group)
colnames(pcoadata) <-c("sample","PC1","PC2","group")

ggplot(pcoadata, aes(PC1, PC2)) +
  geom_point(aes(colour=group,fill=group),size=4)+
  labs(title="PCoA",
       x=(floor(pcoa$values$Relative_eig[1]*100)) %>% 
         paste0("PC1 ( ", ., "%", " )"),
       y=(floor(pcoa$values$Relative_eig[2]*100)) %>% 
         paste0("PC2 ( ", ., "%", " )")) +
  theme(text=element_text(size=10))+
  geom_vline(aes(xintercept = 0),linetype="dotted")+
  geom_hline(aes(yintercept = 0),linetype="dotted")+
  theme(panel.background = element_rect(fill='white', colour='black'),
        axis.title.x=element_text(colour='black', size=12),
        axis.title.y=element_text(colour='black', size=12),
        axis.text=element_text(colour='black',size=12),
        legend.title=element_blank(),
        legend.key.height=unit(0.6,"cm"))

可以通过以下代码添加置信区间与标签

  stat_ellipse(aes(fill = group),geom = "polygon",
               level = 0.95,alpha = 0.3)+
  geom_label_repel(aes(PC1,PC2,label = sample),
                   fill = "white",color = "black",
                   box.padding = unit(0.6,"lines"),segment.colour = "grey50",
                   label.padding = unit(0.2,"lines"))
pcoa.jpeg

NMDS分析

Stress小于0.2时,表示NMDS分析具有一定的可靠性;
小于0.05则认为结果很好;
小于0.01认为结果极好。
pacman::p_load(tidyverse,vegan)

nmds <- read.delim("data.xls",header=T,row.names=1,
                   check.names = F,sep="\t") %>% 
  t() %>% metaMDS(distance="bray")
#capture.output(nmds,file = "Stress.txt")
scores <- scores(nmds, choices = c(1,2))
write.table(scores,file="NMDS_scores.txt")
scores <-  read.table("NMDS_scores.txt",header=T)

groups <- read.table("group2.txt", header=T,sep = "\t")
plotdata <- data.frame(rownames(scores),scores$NMDS1,
                       scores$NMDS2,groups$Group)
colnames(plotdata)=c("sample","MDS1","MDS2","group")
plotdata$sample <- factor(plotdata$sample)
plotdata$MDS1 <- as.numeric(as.vector(plotdata$MDS1))
plotdata$MDS2 <- as.numeric(as.vector(plotdata$MDS2))

ggplot(plotdata, aes(MDS1, MDS2)) +
  geom_point(aes(colour=group,fill=group),size=3)+
  labs(title="NMDS Plot") + xlab(paste("MDS1")) +
  ylab(paste("MDS2"))+
  theme(text=element_text(size=14))+
  geom_vline(aes(xintercept = 0),linetype="dotted")+
  geom_hline(aes(yintercept = 0),linetype="dotted")+
  geom_text(aes(x=max(MDS1),y=max(MDS2)),hjust=3,vjust=0.5,
            size=5,label=paste("Stress = ",round(nmds$stress,3),sep = ""),
            colour="black")+
  theme(panel.background = element_rect(fill='white', colour='black'),
        panel.grid=element_blank(), 
        axis.title = element_text(color='black',size=14),
        axis.ticks.length = unit(0.4,"lines"), 
        axis.ticks = element_line(color='black'),
        axis.line = element_line(colour = "black"), 
        axis.title.x=element_text(colour='black', size=14),
        axis.title.y=element_text(colour='black', size=14),
        axis.text=element_text(colour='black',size=14),
        legend.title=element_blank(),
        legend.text=element_text(family="Times", size=14),
        legend.key=element_blank())+
  theme(plot.title = element_text(size=14,colour = "black",face = "plain"))
NMDS.png

示例文件:
链接: https://pan.baidu.com/s/1UatWUnVUGoIvxzWAKKwA9w 提取码: x485

上一篇 下一篇

猜你喜欢

热点阅读