SNP

加性关系矩阵

2019-11-08  本文已影响0人  董八七

GS中的GBLUP需要估计分子关系矩阵。所有的估计方法都是参考[@VanRaden, 2008]中的公式G=\frac{ZZ^{\prime}}{2\sum p_i(1-p_i)}及几种变型进行计算,但是这些估计方法给出的结果略有不同,可能是对缺失值的估计方法不同。另外,也有函数可以将得到的G矩阵转成ASRreml可用的格式。

数据

library(tidyverse)
library(magrittr)
library(synbreed)
library(SNPassoc)
# 准备数据
data("SNPs")
pheno <- SNPs %>% select(id, blood.pre, protein)
genos <- SNPs[,-c(1:5)] %>% `rownames<-`(pheno$id)
pheno$id <- factor(pheno$id)

G_syn <- create.gpData(geno = as.matrix(genos)) %>% 
  codeGeno(impute=T, impute.type="random", verbose = T) %>% 
  extract2("geno") %>% 
  `rownames<-`(as.character(pheno$id))
  1. 手动计算
## M matrix
M <- G_syn %>% as.matrix() %>% -1
## P matrix
p1 <- round((apply(M,2,sum)+nrow(M))/(nrow(M)*2),3)
P <- 2*(p1-.5) %>% matrix(byrow=T,nrow=nrow(M),ncol=ncol(M))
## Z matrix
Z <- as.matrix(M-P)
## G matrix
b <-(1-p1)*p1
c <-2*(sum(b))
G_manual  <- tcrossprod(Z)/c
G_manual[1:6,1:6]
# 1           2           3          4           5
# 1 1.24659132  0.01689597  0.95224675  0.3202860  0.13448711
# 2 0.01689597  0.37627016 -0.03297637 -0.2982287  0.24938906
# 3 0.95224675 -0.03297637  1.63579113  0.2704137  0.08461478
# 4 0.32028601 -0.29822875  0.27041368  0.4941058 -0.18063760
# 5 0.13448711  0.24938906  0.08461478 -0.1806376  0.36698021
  1. synbreed::kin
G_synbreed <- create.gpData(geno = as.matrix(genos)) %>% 
  codeGeno(impute=T, impute.type="random", verbose = T) %>% 
  kin(ret = "realized")
G_synbreed[1:5,1:5]
# 1           2           3          4           5
# 1 1.25030317  0.01839622  0.95906853  0.3228688  0.13598026
# 2 0.01839622  0.37582055 -0.02832591 -0.2977569  0.24889209
# 3 0.95906853 -0.02832591  1.64588391  0.2761467  0.08925812
# 4 0.32286880 -0.29775689  0.27614666  0.4957407 -0.18017285
# 5 0.13598026  0.24889209  0.08925812 -0.1801729  0.36647612
# attr(,"class")
# [1] "relationshipMatrix" "matrix" 
  1. sommer::A.mat 【Calculates the realized additive relationship matrix. Is a wrapper of the A.mat function from the rrBLUP package published by Endelman (2011).】
G_sommer <- sommer::A.mat(M) #【注意这里是M矩阵,即-1,0,1矩阵】
G_sommer[1:5,1:5]
# 1            2           3          4           5
# 1 1.232542816  0.009838941  0.94631188  0.3131815  0.12806476
# 2 0.009838941  0.374627820 -0.03216234 -0.2989482  0.24862399
# 3 0.946311884 -0.032162338  1.63699957  0.2711802  0.08606348
# 4 0.313181505 -0.298948234  0.27118023  0.4928536 -0.18072241
# 5 0.128064761  0.248623985  0.08606348 -0.1807224  0.36684981

转asreml格式

# 该函数来自[@Borgognone2016]附录
mat2sparse <- function (X, rowNames = dimnames(X)[[1]]){
  which <- (X != 0 & lower.tri(X, diag = TRUE))
  df <- data.frame(row = t(row(X))[t(which)], col = t(col(X))[t(which)],
                   val = t(X)[t(which)])
  if (is.null(rowNames))
    rowNames <- as.character(1:nrow(X))
  attr(df, "rowNames") <- rowNames
  df
}
write.table(mat2sparse(G_sommer), "Gmat.grm",row.names=FALSE) #【注意:这个不是逆矩阵,只能用于asreml单机版。用`synbreed`中的`write.relationshipMatrix`也是一样的】
write.relationshipMatrix(G_sommer, sorting = "ASReml", type = "none")
# 逆矩阵
write.relationshipMatrix(G_sommer, sorting = "ASReml", type = "ginv")
# 逆矩阵是用`MASS`中的`ginv`完成的,参见这个函数的源代码。

VanRaden PM (2008) Efficient methods to compute genomic predictions. J Dairy Sci 91:4414–4423. doi: 10.3168/jds.2007-0980
Borgognone, M.G., Butler, D.G., Ogbonnaya, F.C., Dreccer, M.F., 2016. Molecular marker information in the analysis of multi-environment trials helps differentiate superior genotypes from promising parents. Crop Sci 56, 2612–2628. https://doi.org/10.2135/cropsci2016.03.0151

上一篇下一篇

猜你喜欢

热点阅读