加性关系矩阵
2019-11-08 本文已影响0人
董八七
GS中的GBLUP需要估计分子关系矩阵。所有的估计方法都是参考[@VanRaden, 2008]中的公式及几种变型进行计算,但是这些估计方法给出的结果略有不同,可能是对缺失值的估计方法不同。另外,也有函数可以将得到的G矩阵转成ASRreml可用的格式。
数据
library(tidyverse)
library(magrittr)
library(synbreed)
library(SNPassoc)
# 准备数据
data("SNPs")
pheno <- SNPs %>% select(id, blood.pre, protein)
genos <- SNPs[,-c(1:5)] %>% `rownames<-`(pheno$id)
pheno$id <- factor(pheno$id)
G_syn <- create.gpData(geno = as.matrix(genos)) %>%
codeGeno(impute=T, impute.type="random", verbose = T) %>%
extract2("geno") %>%
`rownames<-`(as.character(pheno$id))
- 手动计算
## M matrix
M <- G_syn %>% as.matrix() %>% -1
## P matrix
p1 <- round((apply(M,2,sum)+nrow(M))/(nrow(M)*2),3)
P <- 2*(p1-.5) %>% matrix(byrow=T,nrow=nrow(M),ncol=ncol(M))
## Z matrix
Z <- as.matrix(M-P)
## G matrix
b <-(1-p1)*p1
c <-2*(sum(b))
G_manual <- tcrossprod(Z)/c
G_manual[1:6,1:6]
# 1 2 3 4 5
# 1 1.24659132 0.01689597 0.95224675 0.3202860 0.13448711
# 2 0.01689597 0.37627016 -0.03297637 -0.2982287 0.24938906
# 3 0.95224675 -0.03297637 1.63579113 0.2704137 0.08461478
# 4 0.32028601 -0.29822875 0.27041368 0.4941058 -0.18063760
# 5 0.13448711 0.24938906 0.08461478 -0.1806376 0.36698021
- synbreed::kin
G_synbreed <- create.gpData(geno = as.matrix(genos)) %>%
codeGeno(impute=T, impute.type="random", verbose = T) %>%
kin(ret = "realized")
G_synbreed[1:5,1:5]
# 1 2 3 4 5
# 1 1.25030317 0.01839622 0.95906853 0.3228688 0.13598026
# 2 0.01839622 0.37582055 -0.02832591 -0.2977569 0.24889209
# 3 0.95906853 -0.02832591 1.64588391 0.2761467 0.08925812
# 4 0.32286880 -0.29775689 0.27614666 0.4957407 -0.18017285
# 5 0.13598026 0.24889209 0.08925812 -0.1801729 0.36647612
# attr(,"class")
# [1] "relationshipMatrix" "matrix"
- sommer::A.mat 【Calculates the realized additive relationship matrix. Is a wrapper of the A.mat function from the rrBLUP package published by Endelman (2011).】
G_sommer <- sommer::A.mat(M) #【注意这里是M矩阵,即-1,0,1矩阵】
G_sommer[1:5,1:5]
# 1 2 3 4 5
# 1 1.232542816 0.009838941 0.94631188 0.3131815 0.12806476
# 2 0.009838941 0.374627820 -0.03216234 -0.2989482 0.24862399
# 3 0.946311884 -0.032162338 1.63699957 0.2711802 0.08606348
# 4 0.313181505 -0.298948234 0.27118023 0.4928536 -0.18072241
# 5 0.128064761 0.248623985 0.08606348 -0.1807224 0.36684981
转asreml格式
# 该函数来自[@Borgognone2016]附录
mat2sparse <- function (X, rowNames = dimnames(X)[[1]]){
which <- (X != 0 & lower.tri(X, diag = TRUE))
df <- data.frame(row = t(row(X))[t(which)], col = t(col(X))[t(which)],
val = t(X)[t(which)])
if (is.null(rowNames))
rowNames <- as.character(1:nrow(X))
attr(df, "rowNames") <- rowNames
df
}
write.table(mat2sparse(G_sommer), "Gmat.grm",row.names=FALSE) #【注意:这个不是逆矩阵,只能用于asreml单机版。用`synbreed`中的`write.relationshipMatrix`也是一样的】
write.relationshipMatrix(G_sommer, sorting = "ASReml", type = "none")
# 逆矩阵
write.relationshipMatrix(G_sommer, sorting = "ASReml", type = "ginv")
# 逆矩阵是用`MASS`中的`ginv`完成的,参见这个函数的源代码。
VanRaden PM (2008) Efficient methods to compute genomic predictions. J Dairy Sci 91:4414–4423. doi: 10.3168/jds.2007-0980
Borgognone, M.G., Butler, D.G., Ogbonnaya, F.C., Dreccer, M.F., 2016. Molecular marker information in the analysis of multi-environment trials helps differentiate superior genotypes from promising parents. Crop Sci 56, 2612–2628. https://doi.org/10.2135/cropsci2016.03.0151