researchIMP researchR for statistics

跟着Nature学作图:R语言ggplot2柱形图添加误差线和频

2022-12-11  本文已影响0人  小明的数据分析笔记本

论文

A saturated map of common genetic variants associated with human height

https://www.nature.com/articles/s41586-022-05275-y

s41586-022-05275-y.pdf

代码没有公开,但是作图数据基本都公开了,争取把每个图都重复一遍

今天的推文重复论文中的extended Figure5 频率分布直方图和柱形图添加误差线

image.png

其中图b的数据没有找到,我们只重复其他5个小图

首先是两个频率分布直方图

这两个作图代码是一样的

library(readxl)

dat01<-read_excel("data/20221014/extendFig5.xlsx",
                  sheet = "Source Data for Panels a - c")
head(dat01)

library(ggplot2)
library(ggh4x)
p1<-ggplot(data=dat01,aes(x=`Effect Size`))+
  geom_histogram(bins = 70,color="black",fill="grey")+
  geom_vline(xintercept = 0,lty="dashed",color="green")+
  scale_x_continuous(breaks = seq(-1.5,1.5,by=0.5))+
  scale_y_continuous(breaks = seq(0,1000,by=200))+
  theme_classic()+
  guides(x=guide_axis_truncated(trunc_lower = -1.5,
                                trunc_upper = 1.5),
         y=guide_axis_truncated(trunc_lower = 0,
                                trunc_upper = 1000))+
  labs(x="Estimated effect of minor haplotype",
       y="Frequency")
p1

p3<-ggplot(data=dat01,aes(x=`Variance Explained`*100))+
  geom_histogram(bins = 30,color="black",fill="grey")+
  scale_x_continuous(breaks = seq(0,0.006,by=0.001))+
  scale_y_continuous(breaks = seq(0,6000,by=2000))+
  theme_classic()+
  guides(x=guide_axis_truncated(trunc_lower = 0,
                                trunc_upper = 0.006),
         y=guide_axis_truncated(trunc_lower = 0,
                                trunc_upper = 6000))+
  labs(x="Variance explained by each haplotype (in %)",
       y="Frequency")
p3

图d柱形图误差线叠加散点图

dat02<-read_excel("data/20221014/extendFig5.xlsx",
                  sheet = "Panel d")
dat02 %>% colnames()

library(tidyverse)
dat02 %>% 
  mutate(x=paste0(`Variance Explained by underlying Causal variants (q2)`*100,"%")) %>% 
  group_by(x) %>% 
  summarise(mean_value=mean(`Signal Density Detected`),
            sd_value=sd(`Signal Density Detected`)) %>% 
  ungroup() -> dat02.1

dat02 %>% 
  mutate(x=paste0(`Variance Explained by underlying Causal variants (q2)`*100,"%")) -> dat02.2
p4<-ggplot()+
  geom_errorbar(data=dat02.1,
                aes(x=x,
                    ymin=mean_value-0.1,
                    ymax=mean_value+sd_value),
                width=0.3,
                color="#e27765")+
  geom_col(data=dat02.1,
           aes(x=x,y=mean_value),
           fill="#daa421")+
  geom_point(data=dat02.2,
             aes(x=x,y=`Signal Density Detected`),
             color="gray",size=3)+
  theme_classic()+
  scale_y_continuous(breaks = seq(0,10,by=2),
                     expand=expansion(mult = c(0,0.1)))+
  scale_x_discrete(labels=c("0.5%\n(1.5)","1%\n(2.0)",
                            "2%\n(2.8)","5%\n(4.5)"))+
  theme(axis.line.x = element_blank(),
        axis.ticks.x = element_blank())+
  guides(y=guide_axis_truncated(trunc_lower = 0,
                                trunc_upper = 10))+
  coord_cartesian(clip = "off")+
  labs(x="Variabce explained by causal variant\n(median allelic effect across simulation replicates - in SD)",
       y="Mean density (+S.E.) of genome-wide\nsignigicant SNPs within 100kb")

p4

普通柱形图添加误差线

dat03<-read_excel("data/20221014/extendFig5.xlsx",
                  sheet = "Panel e")
dat03 %>% colnames()
p5<-dat03 %>% 
  mutate(Ancestries=factor(Ancestries,
                           levels = Ancestries)) %>% 
  ggplot(aes(x=Ancestries,y=`Variance in VNTR length explained by 25 GWS SNPs near ACAN`))+
  geom_col(fill="#fe7357")+
  geom_errorbar(aes(ymin=`Variance in VNTR length explained by 25 GWS SNPs near ACAN`-`Standard Error`,
                    ymax=`Variance in VNTR length explained by 25 GWS SNPs near ACAN`+`Standard Error`),
                width=0.3,
                color="#fe7357")+
  theme_classic()+
  theme(axis.line.x = element_blank(),
        axis.ticks.x = element_blank())+
  scale_y_continuous(breaks = seq(0,0.8,by=0.2),
                     limits = c(0,0.8),
                     expand = expansion(mult=c(0,0)))+
  scale_x_discrete(labels=c("SAS\n(N=9,219)","EUR\n(N=414,429)",
                            "AFR\n(N=7,543)","EAS(N=1,496)"))

p5

水平柱形图添加误差线

dat04<-read_excel("data/20221014/extendFig5.xlsx",
                  sheet = "Panel f")

dat04 %>% colnames()
p6<-dat04 %>% 
  mutate(`Statistical Model`=factor(`Statistical Model`,
                                    levels = `Statistical Model`)) %>% 
  ggplot(aes(x=`Variance explained`,y=`Statistical Model`))+
  geom_col(fill="#006403")+
  geom_errorbarh(aes(xmin=`Variance explained`-`Standard-Error`,
                     xmax=`Variance explained`+`Standard-Error`),
                 height=0.2,
                 color="#d19f84")+
  theme_classic()+
  theme(axis.line.y = element_blank(),
        axis.ticks.y = element_blank())+
  scale_y_discrete(labels=scales::label_wrap(30))+
  scale_x_continuous(limits = c(0,0.0055),
                     breaks = seq(0,0.005,by=0.001))+
  guides(x=guide_axis_truncated(trunc_lower = 0,
                                trunc_upper = 0.005))+
  labs(y=NULL)
p6

最后是所有图组合到一起

library(patchwork)

(p1+theme(axis.title = element_text(size=10))+
            plot_spacer()+
            p3+
            theme(axis.title = element_text(size=10)))/(p4+
                         theme(axis.title = element_text(size=10))+
                         p5+
                         theme(axis.title = element_text(size=10),
                               axis.text.x = element_text(size=10),)+
                         p6+
                         theme(axis.text.y = element_text(size=10)))+
  plot_annotation(tag_levels = "a")
image.png

示例数据和代码可以给公众号推文点赞,点击在看,最后留言获取

欢迎大家关注我的公众号

小明的数据分析笔记本

小明的数据分析笔记本 公众号 主要分享:1、R语言和python做数据分析和数据可视化的简单小例子;2、园艺植物相关转录组学、基因组学、群体遗传学文献阅读笔记;3、生物信息学入门学习资料及自己的学习笔记!

上一篇下一篇

猜你喜欢

热点阅读