生物信息学

G矩阵-基因组关系矩阵

2021-08-11  本文已影响0人  宗肃書
cd /public/jychu/chicken_body_size/project/chr_hardfilter/chicken477/Autosomes
plink --bfile chicken477.prunein --chr-set 33 --recode vcf --out chicken477.prunein
cat chicken477.prunein.vcf | grep "^##" -v |  sed 's/ /\t/g' |awk '{for(i=10;i<NF;i++)printf("%s ",$i);print $NF}'  |sed -e "s/0\/0/0/g;s/0\/1/1/g;s/1\/0/1/g;s/1\/1/2/g" >bodyGT
grep "BC" bodyGT |tr " " "\n"|tr "_" "\t"|cut -f1|tr "\n" " ">head1.tmp
sed  "1d"  bodyGT>body.tmp
cat head1.tmp body.tmp>body
rm -f *.tmp bodyGT
cat body |tr " " "\t" >body.txt
#转置数据
awk '{for (i=1;i<=NF;i++){ if (NR==1){res[i]=$i} else{res[i]=res[i]" "$i} }}END{for(j=1;j<=NF;j++){print res[j]}}'  body >body.trs  #速度太慢
#用R语言
conda activate dna
R
data=read.table(file="body",sep=" ",header=T)
after<-t(data)  #转置
as.data.frame(after)
after[1:5,1:5]
M1=after                                 #Efficient methods to compute genomic predictions中的方法,参考https://luansheng.netlify.app/2020/12/03/how-to-construct-g-matrix/
M= M1[,1:ncol(M1)]-1
p1=round((apply(M,2,sum)+nrow(M))/(nrow(M)*2),3)
p=2*(p1-.5)
P = matrix(p,byrow=T,nrow=nrow(M),ncol=ncol(M))
Z = as.matrix(M-P)
b=1-p1
c=p1*b
d=2*(sum(c))
ZZt = Z %*% t(Z)
G = (ZZt/d)
library(ggcorrplot)
library(ggthemes)
library(ggplot2)
p<-ggcorrplot(G,hc.order=TRUE,hc.method="ward.D",ggtheme=theme_bw())
ggsave("G.tiff",width=48,height=48)
p<-ggcorrplot(G,hc.order=TRUE,hc.method="ward.D",ggtheme=theme_bw(),colors = c("#18bd83", "white", "#9d148d"),tl.cex=0)+theme(legend.title=element_text(size=28),legend.text = element_text(size=18,face="bold"))+theme(legend.key.size=unit(2,'cm')) +guides(fill=guide_legend(title="Relationship")) #tl.cex=0不显示标签
ggsave("G2.png",width=25,height=25)
write.table(G,file="G.matrix")
#绘制基因组关系矩阵热图
library(pheatmap)
pheatmap(G,treeheight_col = 90,color =c("#18bd83", "#f5f7be","white", "#9d148d","red"),filename = "G-hot.png", width = 40, height = 40)
# 基因组关系矩阵转换成亲缘系数矩阵   来源: https://cloud.tencent.com/developer/article/1550322
n = dim(G)[1] #1
id = row.names(G) #2
mat = matrix(0,n,n) #3
for(i in 1:n){ #4
  for(j in i:n){
    mat[i,j] = G[i,j]/(sqrt(G[i,i]*G[j,j]))
    mat[j,i] = mat[i,j]
  }
}
rownames(mat)<-id #5
colnames(mat)<-id
re = data.frame(ID1= rep(id,n),ID2=rep(id,each = n),y = round(as.vector(mat))) #6
p<-ggcorrplot(mat,hc.order=TRUE,hc.method="ward.D",ggtheme=theme_bw(),colors = c("#18bd83", "white", "#9d148d"))
ggsave("GR.tiff",width=48,height=48)
write.table(mat,file="GR.matrix")
p<-ggcorrplot(mat,hc.order=TRUE,hc.method="ward.D",ggtheme=theme_bw(),colors = c("#18bd83", "white", "#9d148d"),tl.cex=0)+theme(legend.title=element_text(size=26),legend.text = element_text(size=18,face="bold"))+theme(legend.key.size=unit(2,'cm'))
ggsave("GR2.png",width=25,height=25)
p<-ggcorrplot(mat,hc.order=TRUE,hc.method="ward.D",ggtheme=theme_bw(),colors = c("#1fa214", "#f5f7be", "#9d148d"),tl.cex=0)+theme(legend.title=element_text(size=28),legend.text = element_text(size=18,face="bold"))+theme(legend.key.size=unit(2,'cm'))
ggsave("GR3.png",width=25,height=25)
上一篇下一篇

猜你喜欢

热点阅读