把显著性添加给玩明白

2023-08-16  本文已影响0人  小洁忘了怎么分身

1.编示例数据

set.seed(10086)
exp = matrix(rnorm(480,mean = 6),ncol = 120)
exp = round(exp,2)
rownames(exp) = paste0("gene",1:4)
colnames(exp) = paste0("test",1:120)
exp[,50:80] = exp[,50:80]+2
exp[,90:120] = exp[,90:120]+4
exp[1:4,1:4]
##       test1 test2 test3 test4
## gene1  6.55  5.37  6.32  4.18
## gene2  3.26  6.25  5.63  5.78
## gene3  6.57  6.80  8.11  6.58
## gene4  6.49  7.08  8.49  4.38

到这一步,拿到了一个编的表达矩阵。

2.格式转换

表达矩阵要画ggplot2箱线图,肯定是要先转变格式的,否则ggplot2代码无法下手写。

library(tidyr)
library(tibble)
library(dplyr)
dat = t(exp) %>% 
  as.data.frame() %>% 
  rownames_to_column() %>% 
  mutate(group = rep(c("control","treat1","treat2"),each = 40))

pdat = dat%>% 
  pivot_longer(cols = starts_with("gene"),
               names_to = "gene",
               values_to = "value")
head(pdat)
## # A tibble: 6 × 4
##   rowname group   gene  value
##   <chr>   <chr>   <chr> <dbl>
## 1 test1   control gene1  6.55
## 2 test1   control gene2  3.26
## 3 test1   control gene3  6.57
## 4 test1   control gene4  6.49
## 5 test2   control gene1  5.37
## 6 test2   control gene2  6.25

3.画箱线图

library(ggplot2)
p1 = ggplot(pdat, aes(y=value, x=gene))+
  geom_boxplot(aes(fill=group),outlier.shape = NA)+
  theme_bw()+
  scale_fill_manual(values = c("#2fa1dd", "#f87669", "#e6b707"))
p1

[图片上传失败...(image-382a55-1692258856792)]

p2 = ggplot(pdat, aes(y=value, x=gene))+
  geom_bar(aes(fill=group),position = position_dodge(0.9),stat = "summary")+
  theme_bw()+
  scale_fill_manual(values = c("#2fa1dd", "#f87669", "#e6b707"))+
  stat_summary(aes(group = group), fun.data = 'mean_se', 
               geom = "errorbar", colour = "black",
               width = 0.2,position = position_dodge(0.9))

p2

[图片上传失败...(image-297a28-1692258856792)]

4.加显著性标记

4.1习以为常版

library(ggpubr)
p1 + 
  stat_compare_means(aes(group = group, label = after_stat(p.signif)),method = "wilcox.test")

[图片上传失败...(image-ee95bd-1692258856792)]

p2 + stat_compare_means(aes(group = group, label = after_stat(p.signif)),method = "wilcox.test",label.y = 10.5)

[图片上传失败...(image-2a7112-1692258856792)]

这个超级简单,但显著性统一在头顶上,然后也不支持每个基因在两两分组中的显著性单独计算。

4.2 错落有致版

用rstatix里的函数计算出显著性和添加显著性的横纵坐标位置,再用stat_pvalue_manual添加上去。

library(rstatix)
stat.test <- pdat %>% 
  group_by(gene) %>% 
  wilcox_test(value ~ group) %>%
  adjust_pvalue() %>% 
  add_significance("p.adj") %>% 
  add_xy_position(x="gene")
p1 + stat_pvalue_manual(stat.test,label = "p.adj.signif",hide.ns = F,
                   tip.length = 0.01)

[图片上传失败...(image-241141-1692258856792)]

p2 + stat_pvalue_manual(stat.test,label = "p.adj.signif",hide.ns = F,
                   tip.length = 0.01)

[图片上传失败...(image-16df19-1692258856792)]

4.3 手动添加版

例如上次画的AUC值柱状图,它的纵坐标和误差棒不是画图时计算出来的,而是拿了现成的数值来画,就无法用上面的方法计算。如果能自行用其他方法计算出组间比较的显著性如何,可以手动添加。

rm(list = ls())
load("dats.Rdata")
p = ggplot(dats, aes(x = category, y = y, fill = x)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_text(aes(label = round(y,2)), vjust = 4, size = 3,
            position = position_dodge(width = 0.9)) +
  geom_errorbar(aes(ymin = min, ymax = max), 
                width = 0.2,
                position = position_dodge(width = 0.9)) +
  scale_fill_manual(values = c("#2fa1dd", "#f87669", "#e6b707"))+
  theme_bw()
p

[图片上传失败...(image-c65f9d-1692258856792)]

p +   geom_signif(data = dats,y_position=c(1,0.97,0.83,0.89,0.89,0.92), 
              xmin=c(0.7,1,1.7,2,2.7,3), 
              xmax=c(1,1.3,2,2.3,3,3.3), 
              annotation=c("*","**","***","ns","*","**"), 
              tip_length=0.01, 
              vjust = 0.3)

[图片上传失败...(image-75aa38-1692258856791)]

上一篇 下一篇

猜你喜欢

热点阅读