桑基图绘制(sankey diagram)
2020-07-26 本文已影响0人
落寞的橙子
R常用的方法有:
ggalluvial
sankeyNetwork
主要是文件准备比较麻烦,代码如下
sankey_make<-function(exp_deg_dir,trans_deg_dir,prefix,FC,p_value){
suppressMessages(library(tidyverse))
exp_deg<-read.csv(exp_deg_dir)
exp_deg$"Gene_id"<-unlist(lapply(as.character(exp_deg[,1]), FUN = function(x) {return(strsplit(x, split = ".",fixed = T)[[1]][1])}))
trans_deg<-read.csv(trans_deg_dir)
trans_deg$"Gene_id"<-unlist(lapply(as.character(trans_deg[,1]), FUN = function(x) {return(strsplit(x, split = "_",fixed = T)[[1]][2])}))
trans_deg$"Gene_id"<-unlist(lapply(as.character(trans_deg[,"Gene_id"]), FUN = function(x) {return(strsplit(x, split = ".",fixed = T)[[1]][1])}))
total_list=intersect(exp_deg$Gene_id,trans_deg$Gene_id)
#gene expression
exp_deg_sig_up<-exp_deg %>% filter(pvalue<p_value & log2FoldChange>FC)
exp_deg_sig_up_list<-as.character(exp_deg_sig_up$Gene_id)
exp_deg_sig_up_list<-intersect(exp_deg_sig_up_list,total_list)
exp_deg_sig_down<-exp_deg %>% filter(pvalue<p_value & log2FoldChange<(-FC))
exp_deg_sig_down_list<-as.character(exp_deg_sig_down$Gene_id)
exp_deg_sig_down_list<-intersect(exp_deg_sig_down_list,total_list)
exp_nochange_list<-setdiff(total_list,c(exp_deg_sig_up_list,exp_deg_sig_down_list))
#transcripts
get_list<-function(a,b){b[is.element(b,a)]}
trans_deg_sig_up<-trans_deg %>% filter(pvalue<p_value & log2FoldChange>FC)
trans_deg_sig_up_list<-as.character(trans_deg_sig_up$Gene_id)
trans_deg_sig_up_list<-get_list(a=total_list,b=trans_deg_sig_up_list)
trans_deg_sig_down<-trans_deg %>% filter(pvalue<p_value & log2FoldChange<(-FC))
trans_deg_sig_down_list<-as.character(trans_deg_sig_down$Gene_id)
trans_deg_sig_down_list<-get_list(a=total_list,b=trans_deg_sig_down_list)
trans_nochange<-trans_deg %>% filter(pvalue>p_value | abs(log2FoldChange)<FC)
trans_nochange_list_rt<-as.character(trans_nochange$Gene_id)
trans_nochange_list<-get_list(a=total_list,b=trans_nochange_list_rt)
#caculate number
#first intersection
total_number<-length(intersect(exp_deg$Gene_id,trans_deg$Gene_id))
gene_up_number=length(exp_deg_sig_up_list)
gene_down_number=length(exp_deg_sig_down_list)
gene_nochange_number=length(exp_nochange_list)
##second intersection
# up_vs_up_number<-length(na.omit(intersect(exp_deg_sig_up_list,trans_deg_sig_up_list)))
# up_vs_down_number<-length(na.omit(intersect(exp_deg_sig_up_list,trans_deg_sig_down_list)))
# up_vs_no_number<-length(na.omit(intersect(exp_deg_sig_up_list,trans_nochange_list)))
# up_vs_up_number+up_vs_down_number+up_vs_no_number
#
# down_vs_up_number<-length(na.omit(intersect(exp_deg_sig_down_list,trans_deg_sig_up_list)))
# down_vs_down_number<-length(na.omit(intersect(exp_deg_sig_down_list,trans_deg_sig_down_list)))
# down_vs_no_number<-length(na.omit(intersect(exp_deg_sig_down_list,trans_nochange_list)))
# down_vs_up_number+down_vs_down_number+down_vs_no_number
#
# no_vs_up_number<-length(na.omit(intersect(exp_nochange_list,trans_deg_sig_up_list)))
# no_vs_down_number<-length(na.omit(intersect(exp_nochange_list,trans_deg_sig_down_list)))
# no_vs_no_number<-length(na.omit(intersect(exp_nochange_list,trans_nochange_list)))
# no_vs_up_number+no_vs_down_number+no_vs_no_number
# up_vs_up_number+up_vs_down_number+up_vs_no_number+down_vs_up_number+down_vs_down_number+down_vs_no_number+no_vs_up_number+no_vs_down_number+no_vs_no_number
cat_number<-function(a,b){length(b[is.element(b,a)])}
up_vs_up_number<-cat_number(a=exp_deg_sig_up_list,b=trans_deg_sig_up_list)
up_vs_down_number<-cat_number(a=exp_deg_sig_up_list,b=trans_deg_sig_down_list)
up_vs_no_number<-cat_number(a=exp_deg_sig_up_list,b=trans_nochange_list)
down_vs_up_number<-cat_number(a=exp_deg_sig_down_list,b=trans_deg_sig_down_list)
down_vs_down_number<-cat_number(a=exp_deg_sig_down_list,b=trans_deg_sig_down_list)
down_vs_no_number<-cat_number(a=exp_deg_sig_down_list,b=trans_nochange_list)
no_vs_up_number<-cat_number(a=exp_nochange_list,b=trans_deg_sig_down_list)
no_vs_down_number<-cat_number(a=exp_nochange_list,b=trans_deg_sig_down_list)
no_vs_no_number<-cat_number(a=exp_nochange_list,b=trans_nochange_list)
up_vs_up_number+up_vs_down_number+up_vs_no_number+down_vs_up_number+down_vs_down_number+down_vs_no_number+no_vs_up_number+no_vs_down_number+no_vs_no_number
value=c(gene_up_number,gene_down_number,gene_nochange_number,
up_vs_up_number,up_vs_down_number,up_vs_no_number,
down_vs_up_number,down_vs_down_number,down_vs_no_number,
no_vs_up_number,no_vs_down_number,no_vs_no_number
)
source<-c(rep(prefix,3),
rep("Gene Up-regulated",3),
rep("Gene Down-regulated",3),
rep("Gene Not Changed",3)
)
target<-c("Gene Up-regulated","Gene Down-regulated","Gene Not Changed",
rep(c("Transcript Up-regulated","Transcript Down-regulated","Transcript Not Changed"),3))
data<-data.frame(source,target,value)
return(data)
}
FC=0.5
p_value=0.05
human_exp_deg_dir="~/AllGenes_DESeq2.csv"
human_trans_deg_dir="~/diffExp/human_transcript_degs.csv"
human_sankey<-sankey_make(exp_deg_dir=human_exp_deg_dir,trans_deg_dir=human_trans_deg_dir,prefix="Human",FC=FC,p_value=p_value)
mouse_exp_deg_dir="~/AllGenes_DESeq2.csv"
mouse_trans_deg_dir="~/diffExp/mouse_transcript_degs.csv"
mouse_sankey<-sankey_make(exp_deg_dir=mouse_exp_deg_dir,trans_deg_dir=mouse_trans_deg_dir,prefix="Mouse",FC=FC,p_value=p_value)
snakey_df<-rbind(human_sankey,mouse_sankey)
snakey_df$"Species"<-c(rep("Human",nrow(human_sankey)),
rep("Mouse",nrow(mouse_sankey))
)
write.csv(snakey_df,"~/snakey.csv",row.names = F)
###
rm(list = ls())
library(tidyverse)
library(viridis)
library(patchwork)
library(networkD3)
library(RColorBrewer)
snakey_df<-read.csv("~/Snakey/snakey.csv")
# is_alluvia_form(as.data.frame(snakey_df), axes = 1:3, silent = TRUE)
# snakey_df$target <- paste(snakey_df$target, " ", sep="")
set.seed(2020)
nodes <- data.frame(name=c(as.character(snakey_df$source), as.character(snakey_df$target)) %>% unique())
snakey_df$IDsource=match(snakey_df$source, nodes$name)-1
snakey_df$IDtarget=match(snakey_df$target, nodes$name)-1
sankeyNetwork(Links = snakey_df, Nodes = nodes,
Source = "IDsource", Target = "IDtarget",
Value = "value", NodeID = "name",
sinksRight=F,nodeWidth=20, fontSize=14, nodePadding=20,
LinkGroup = 'Species')
图形格式
数据格式