小明数据分析R语言gwas学习

跟着Nature Genetics学作图:R语言ggplot2曼

2022-07-08  本文已影响0人  小明的数据分析笔记本

论文

Plasma proteome analyses in individuals of European and African ancestry identify cis-pQTLs and models for proteome-wide association studies

https://www.nature.com/articles/s41588-022-01051-w

本地pdf s41588-022-01051-w.pdf

代码链接

https://zenodo.org/record/6332981#.YroV0nZBzic

https://github.com/Jingning-Zhang/PlasmaProtein/tree/v1.2

今天的推文重复一下论文中的Figure4中的一个小图

部分示例数据截图

image.png

读取数据

dat01<-read.delim("data/20220627/Fig4.tsv",
                  header=TRUE,
                  sep="\t")

dim(dat01)
head(dat01)

根据disease列对数据进行筛选

dat_all<-dat01[dat01$disease=="Urate",]

统计染色体数

nCHR<-length(unique(dat_all$CHR))
nCHR

计算每个染色体中心位置坐标

这个是用来添加横坐标文本标签的

library(tidyverse)

axis.set<-dat_all %>% 
  group_by(CHR) %>% 
  summarise(center=(max(BPcum)+min(BPcum))/2)
axis.set

根据tissue列对数据进行筛选

dat <- dat_all[dat_all$tissue=="Plasma",]

作图代码

ggplot(data = dat,
       aes(x=BPcum,y=-log10(P),
           color=as.factor(CHR),
           size=-log10(P)))+
  geom_point()+
  scale_x_continuous(labels = axis.set$CHR,
                     breaks = axis.set$center,
                     limits = c(min(dat_all$BPcum),max(dat_all$BPcum)))+
  scale_y_continuous(expand = c(0,0), limits = c(0, 32 )) +
  scale_color_manual(values = rep(c("#4292c6", "#08306b"), nCHR)) +
  scale_size_continuous(range = c(0.5,3))+
  geom_hline(yintercept = -log10(3.7*10^(-5)),
             lty="dashed")+
  guides(color = "none") + 
  labs(x = NULL, 
       title = NULL) + 
  ylab( TeX("$-log_{10}(p)$") )+
  theme_minimal() +
  theme(
    panel.border = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    axis.text.x = element_text(angle = 0, size = 5, vjust = 0.5),
    axis.text.y = element_text(angle = 0, size = 6, vjust = 0.5),
    axis.title = element_text(size=7),
    plot.title = element_text(size = 7, face = "bold"),
    plot.subtitle = element_text(size = 7),
    axis.line = element_line(),
    axis.ticks = element_line()
  ) -> p

p
image.png

给选定的基因添加文本标签

label <- c("INHBB","ITIH1","BTN3A3","INHBA","C11orf68","B3GAT3","INHBC(7.95e-63)","SNUPN","NEO1","FASN")

labels_df.pwas <- data.frame(label=label,
                             logP=-log10(dat$P[match(label,dat$ID)]),
                             BPcum=dat$BPcum[match(label,dat$ID)],
                             CHR=dat$CHR[match(label,dat$ID)])
labels_df.pwas <- labels_df.pwas[order(labels_df.pwas$BPcum),]

p +
  ggrepel::geom_label_repel(data = labels_df.pwas[1:4,],
                            aes(x = .data$BPcum,
                                y = .data$logP,
                                label = .data$label), col="black",
                            size = 1.5, segment.size = 0.2,
                            point.padding = 0.3, 
                            ylim = c(5, 30),
                            nudge_y=0.2,
                            direction = "y",
                            min.segment.length = 0, force = 2,
                            box.padding = 0.5) +
  ggrepel::geom_label_repel(data = labels_df.pwas[7,],
                            aes(x = .data$BPcum,
                                y = .data$logP,
                                label = .data$label), col="black",
                            size = 1.5, segment.size = 0.2,
                            point.padding = 0.3, 
                            direction = "y",
                            ylim = c(20, 30),
                            min.segment.length = 0, force = 2,
                            box.padding = 0.5)+
  ggrepel::geom_label_repel(data = labels_df.pwas[c(5,6,8:10),],
                            aes(x = .data$BPcum,
                                y = .data$logP,
                                label = .data$label), col="black",
                            size = 1.5, segment.size = 0.2,
                            point.padding = 0.3, 
                            ylim = c(6, 25),
                            min.segment.length = 0, force = 2,
                            box.padding = 0.5)
image.png

拼图

p+
  scale_color_manual(values = rep(c("red", "darkgreen"), nCHR))+
  theme(legend.direction = "horizontal")+
  p2 + theme(legend.direction = "horizontal")+
  plot_layout(guides="collect")+
  plot_annotation(theme = theme(legend.position = "top"))
image.png

示例数据和代码可以自己到论文中获取,或者给本篇推文点赞,点击在看,然后留言获取

欢迎大家关注我的公众号

小明的数据分析笔记本

小明的数据分析笔记本 公众号 主要分享:1、R语言和python做数据分析和数据可视化的简单小例子;2、园艺植物相关转录组学、基因组学、群体遗传学文献阅读笔记;3、生物信息学入门学习资料及自己的学习笔记!

上一篇 下一篇

猜你喜欢

热点阅读