随机生存森林

2023-04-17  本文已影响0人  小洁忘了怎么分身

1.示例数据

rm(list = ls())
library(randomForestSRC)
library(survival)
data(vdv, package = "randomForestSRC")
dim(vdv)
## [1]   78 4707
vdv[1:5,1:5]
##    Time Censoring AA555029_RC AA598803_RC     AB002301
## 1 12.53         0  -0.5049331  -0.2425008 -0.199315682
## 2  6.44         0  -0.5879813   0.4384945 -0.621200562
## 3 10.66         0  -0.3521244  -0.2258911  0.006643856
## 4 13.00         0  -0.4750357   0.5016111 -0.671029449
## 5 11.98         0  -0.1660964   0.1361991  0.989934564
boxplot(vdv[,3:10])

2.代码

这里使用的是官方文档里的例子 https://cran.r-project.org/web/packages/randomForestSRC/randomForestSRC.pdf

var.select的示例。

和cox结合,按照单因素cox的p值设置了权重。

cox.weights <- function(rfsrc.f, rfsrc.data) {
  event.names <- all.vars(rfsrc.f)[1:2]
  p <- ncol(rfsrc.data) - 2
  event.pt <- match(event.names, names(rfsrc.data))
  xvar.pt <- setdiff(1:ncol(rfsrc.data), event.pt)
  sapply(1:p, function(j) {
    cox.out <- coxph(rfsrc.f, rfsrc.data[, c(event.pt, xvar.pt[j])])
    pvalue <- summary(cox.out)$coef[5]
    if (is.na(pvalue)) 1.0 else 1/(pvalue + 1e-100)
  })
}       
rfsrc.f <- as.formula(Surv(Time, Censoring) ~ .)
rfsrc.f
## Surv(Time, Censoring) ~ .

cox.weights函数就是根据单因素coxp值计算权重用的。rfsrc.f是生存模型公式

cox.wts <- cox.weights(rfsrc.f, vdv)
#vh.model <- rfsrc(rfsrc.f, vdv , nsplit = 10, 
#                  xvar.wt = cox.wts,importance = "random",
#                  na.action ="na.impute",ntree = 1000)

变量选择

vh.breast.cox <- var.select(rfsrc.f, vdv, method = "vh", nstep = 5,
                            nrep = 10, xvar.wt = cox.wts)

## ---------------------  Iteration: 1   ---------------------
##   selecting variables using Variable Hunting ...
##   iteration:  1   # vars: 7   joint-vimp: 0.093 
     iteration:  2   # vars: 12   joint-vimp: 0.152 
     iteration:  3   # vars: 17   joint-vimp: 0.176 
     iteration:  4   # vars: 22   joint-vimp: 0.193 
     iteration:  5   # vars: 27   joint-vimp: 0.192 

     PE: 23.1884      dim: 22 
## ---------------------  Iteration: 2   ---------------------
##   selecting variables using Variable Hunting ...
##   iteration:  1   # vars: 41   joint-vimp: 0.263 
     iteration:  2   # vars: 44   joint-vimp: 0.264 
     iteration:  3   # vars: 48   joint-vimp: 0.257 

     PE: 27.381      dim: 44 
## ---------------------  Iteration: 3   ---------------------
##   selecting variables using Variable Hunting ...
##   iteration:  1   # vars: 36   joint-vimp: 0.255 
     iteration:  2   # vars: 39   joint-vimp: 0.251 

     PE: 29.7619      dim: 36 
## ---------------------  Iteration: 4   ---------------------
##   selecting variables using Variable Hunting ...
##   iteration:  1   # vars: 11   joint-vimp: 0.256 
     iteration:  2   # vars: 16   joint-vimp: 0.286 
     iteration:  3   # vars: 20   joint-vimp: 0.291 
     iteration:  4   # vars: 25   joint-vimp: 0.294 
     iteration:  5   # vars: 30   joint-vimp: 0.302 
     iteration:  6   # vars: 35   joint-vimp: 0.3 

     PE: 38.6905      dim: 30 
## ---------------------  Iteration: 5   ---------------------
##   selecting variables using Variable Hunting ...
##   iteration:  1   # vars: 39   joint-vimp: 0.186 
     iteration:  2   # vars: 42   joint-vimp: 0.186 
     iteration:  3   # vars: 46   joint-vimp: 0.19 
     iteration:  4   # vars: 49   joint-vimp: 0.196 
     iteration:  5   # vars: 52   joint-vimp: 0.196 
     iteration:  6   # vars: 56   joint-vimp: 0.192 

     PE: 14.4928      dim: 52 
## ---------------------  Iteration: 6   ---------------------
##   selecting variables using Variable Hunting ...
##   iteration:  1   # vars: 38   joint-vimp: 0.27 
     iteration:  2   # vars: 41   joint-vimp: 0.277 
     iteration:  3   # vars: 45   joint-vimp: 0.301 
     iteration:  4   # vars: 48   joint-vimp: 0.302 
     iteration:  5   # vars: 52   joint-vimp: 0.302 
     iteration:  6   # vars: 55   joint-vimp: 0.3 

     PE: 46.4286      dim: 52 
## ---------------------  Iteration: 7   ---------------------
##   selecting variables using Variable Hunting ...
##   iteration:  1   # vars: 10   joint-vimp: 0.139 
     iteration:  2   # vars: 15   joint-vimp: 0.166 
     iteration:  3   # vars: 20   joint-vimp: 0.178 
     iteration:  4   # vars: 24   joint-vimp: 0.178 
     iteration:  5   # vars: 29   joint-vimp: 0.191 
     iteration:  6   # vars: 34   joint-vimp: 0.199 
     iteration:  7   # vars: 38   joint-vimp: 0.192 

     PE: 25      dim: 34 
## ---------------------  Iteration: 8   ---------------------
##   selecting variables using Variable Hunting ...
##   iteration:  1   # vars: 47   joint-vimp: 0.233 
     iteration:  2   # vars: 50   joint-vimp: 0.236 
     iteration:  3   # vars: 53   joint-vimp: 0.235 

     PE: 15.873      dim: 50 
## ---------------------  Iteration: 9   ---------------------
##   selecting variables using Variable Hunting ...
##   iteration:  1   # vars: 9   joint-vimp: 0.193 
     iteration:  2   # vars: 14   joint-vimp: 0.219 
     iteration:  3   # vars: 19   joint-vimp: 0.233 
     iteration:  4   # vars: 24   joint-vimp: 0.231 

     PE: 22.619      dim: 19 
## ---------------------  Iteration: 10   ---------------------
##   selecting variables using Variable Hunting ...
##   iteration:  1   # vars: 8   joint-vimp: 0.177 
     iteration:  2   # vars: 13   joint-vimp: 0.219 
     iteration:  3   # vars: 18   joint-vimp: 0.225 
     iteration:  4   # vars: 23   joint-vimp: 0.238 
     iteration:  5   # vars: 28   joint-vimp: 0.241 
     iteration:  6   # vars: 33   joint-vimp: 0.25 
     iteration:  7   # vars: 37   joint-vimp: 0.249 

     PE: 34.5238      dim: 33 
## fitting forests to final selected variables ...
## 
## 
## -----------------------------------------------------------
## family             : surv 
## var. selection     : Variable Hunting 
## conservativeness   : medium 
## dimension          : 4705 
## sample size        : 78 
## K-fold             : 5 
## no. reps           : 10 
## nstep              : 5 
## ntree              : 500 
## nsplit             : 10 
## mvars              : 942 
## nodesize           : 2 
## refitted forest    : TRUE 
## depth ratio        : 3.1277 
## model size         : 37.2 +/- 11.9796 
## PE (K-fold)        : 27.7959 +/- 9.9451 
## 
## 
## Top variables:
##                depth rel.freq
## AL080059       7.653      100
## NM_005915      7.548      100
## NM_016448      7.668      100
## AA555029_RC    7.827       90
## Contig35251_RC 7.712       90
## NM_006201      7.786       90
## NM_006681      7.890       90
## NM_015239      7.983       90
## Contig43983_RC 7.667       70
## Contig46223_RC 7.916       70
## Contig51464_RC 7.670       70
## NM_001216      8.008       70
## NM_006931      8.091       70
## NM_016577      8.087       70
## Contig44409    7.872       60
## Contig47405_RC 8.154       60
## NM_000436      8.181       60
## NM_020974      8.209       60
## Contig20217_RC 8.109       50
## Contig32185_RC 8.169       50
## Contig48328_RC 8.258       50
## Contig54742_RC 8.208       50
## NM_014246      7.750       50
## NM_018265      8.225       50
## AB020689       8.251       40
## AF052162       8.246       40
## Contig55377_RC 7.957       40
## NM_000507      8.274       40
## NM_004504      8.219       40
## NM_018354      8.284       40
## NM_020142      7.918       40
## AL137718       8.330       30
## Contig25343_RC 8.429       30
## Contig38726_RC 8.354       30
## Contig53268_RC 8.047       30
## Contig63102_RC 7.432       30
## NM_001673      8.368       30
## NM_014968      8.421       30
## -----------------------------------------------------------
#nrep指定变量选择过程的重复次数,实际使用可以写1000,但会增加计算时间
vh.breast.cox$topvars
##  [1] "AL080059"       "NM_005915"      "NM_016448"      "AA555029_RC"   
##  [5] "Contig35251_RC" "NM_006201"      "NM_006681"      "NM_015239"     
##  [9] "Contig43983_RC" "Contig46223_RC" "Contig51464_RC" "NM_001216"     
## [13] "NM_006931"      "NM_016577"      "Contig44409"    "Contig47405_RC"
## [17] "NM_000436"      "NM_020974"      "Contig20217_RC" "Contig32185_RC"
## [21] "Contig48328_RC" "Contig54742_RC" "NM_014246"      "NM_018265"     
## [25] "AB020689"       "AF052162"       "Contig55377_RC" "NM_000507"     
## [29] "NM_004504"      "NM_018354"      "NM_020142"      "AL137718"      
## [33] "Contig25343_RC" "Contig38726_RC" "Contig53268_RC" "Contig63102_RC"
## [37] "NM_001673"      "NM_014968"

没有常规意义上的系数和cox模型那样的公式,可以用作和lasso回归一样的变量筛选,缩小一下范围,但不用做最后一步,

上一篇 下一篇

猜你喜欢

热点阅读