生物信息学R语言源码R语言 生信

一时兴起写的一个函数,备份

2021-03-31  本文已影响0人  一只烟酒僧
#从gene_info中的到不同指标、不同步长的染色体上的SNP分布
get_distrubution_from_geneinfo<-function(gene_info,filter_key=list("Sample1wt.index==F","Sample2wt.index==F","Sample3wt.index==F"),step=2e6,plot_dir="./"){
  require(patchwork)
  require(stringr)
  require(reshape2)
  require(ggplot2)
  require(Hmisc)
  require(patchwork)
  data("human_karyotype")
  step=step
  test<-lapply(filter_key,function(x){
    eval(parse(text = paste0("subset(gene_info,",x,")")))
  })
  test<-lapply(test,function(x){
    data.frame(SNPid=paste("SNP",1:nrow(x),sep = "_"),
               Chr=str_split(x$loc,"_",simplify = T)[,1],
               position=str_split(x$loc,"_",simplify = T)[,2])
  })
  
  #使用ggplot绘图
  #1、对染色体进行分区
  
  chrom_region<-apply(human_karyotype,1,function(x){
    a=seq(from=1,
          to=x[3],
          by=step)
    a[length(a)+1]=x[3]
    return(a)
  })
  names(chrom_region)<-paste("chr",human_karyotype$Chr,sep = "")
  #2、统计分区内的snp数目
  test<-lapply(test,function(x){
    x<-split(x,x$Chr)
    x<-x[names(chrom_region)]
  })
  
  sampleinfo_stat<-lapply(test,function(x){
    m=lapply(1:length(x),function(y){
      a=as.data.frame(table(cut(as.numeric(as.character(x[[y]]$position)),breaks = as.numeric(chrom_region[[y]]))))
      a[,1]<-chrom_region[[y]][-which(chrom_region[[y]]==max(chrom_region[[y]]))]
      
      return(a)
    })
    names(m)=names(chrom_region)
    return(m)
  })
  #3、使用ggplot可视化
  for (chr_id in names(chrom_region)) {
    
    if(T){
      SNP_samples_stat<-do.call(rbind,lapply(sampleinfo_stat,function(x){x[[chr_id]]}))
      SNP_samples_stat$group=paste("Condition",rep(c(1:length(filter_key)),each=nrow(SNP_samples_stat)/length(filter_key)),sep = "_")
      SNP_samples_stat$Var1<-factor(SNP_samples_stat$Var1,levels = unique(as.numeric(SNP_samples_stat$Var1)))
      p1<-ggplot(SNP_samples_stat,aes(x=Var1,y=Freq,group=group,color=group))+
        geom_point()+geom_line()+
        theme(axis.text.x = element_text(angle = 90))+
        labs(title=capitalize(chr_id))
      p2<-ggplot(SNP_samples_stat,aes(x=Var1,y=group))+
        geom_tile(aes(fill=Freq),color="white")+
        scale_fill_gradient(low = "white",high = "red")+
        theme(axis.text.x = element_text(angle = 90))
      p3=p1/p2
      ggsave(paste(plot_dir,chr_id,".png",sep = ""),plot =p3,width = 15,height = 10 )
    }
    
  }
  
}


get_distrubution_from_geneinfo(gene_info)
image.png
上一篇下一篇

猜你喜欢

热点阅读