R语言做生信特征变量选择生物信息学R语言源码

pROC包学习

2019-04-04  本文已影响41人  drlee_fc74

ROC曲线 > 我们在做诊断性实验的时候,最常用的选择最佳cutoff的方法是使用ROC曲线。本次主要介绍的是pROC包对于 ROC曲线的的绘制和分析

本次数据使用的是pROC内置的数据aSAH

##install.packages("pROC")
library(pROC)    
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
data("aSAH")
head(aSAH)
##    gos6 outcome gender age wfns s100b  ndka
## 29    5    Good Female  42    1  0.13  3.01
## 30    5    Good Female  37    1  0.14  8.54
## 31    5    Good Female  42    1  0.10  8.09
## 32    5    Good Female  27    1  0.04 10.42
## 33    1    Poor Female  42    3  0.13 17.40
## 34    1    Poor   Male  48    2  0.10 12.75

roc roc是这个包的主要函数。通过roc函数我们可以构建一个ROC曲线,并返回一个ROC的列表。这个列表可以被“打印”, 可以被“plot”,同时可以使用auc, ci, smooth.roc and coords等辅助函数调出结果。还可以通过 roc.test #####roc当中的几种表达方式

##以下两种方式是等价的
roc(outcome ~ s100b, aSAH)
## 
## Call:
## roc.formula(formula = outcome ~ s100b, data = aSAH)
## 
## Data: s100b in 72 controls (outcome Good) < 41 cases (outcome Poor).
## Area under the curve: 0.7314
roc(aSAH$outcome ~ aSAH$s100b)
## 
## Call:
## roc.formula(formula = aSAH$outcome ~ aSAH$s100b)
## 
## Data: aSAH$s100b in 72 controls (aSAH$outcome Good) < 41 cases (aSAH$outcome Poor).
## Area under the curve: 0.7314
在亚组中进行roc分析
roc(outcome ~ s100b, data=aSAH, subset=(gender == "Male"))
## 
## Call:
## roc.formula(formula = outcome ~ s100b, data = aSAH, subset = (gender ==     "Male"))
## 
## Data: s100b in 22 controls (outcome Good) < 20 cases (outcome Poor).
## Area under the curve: 0.7727
roc(outcome ~ s100b, data=aSAH, subset=(gender == "Female"))
## 
## Call:
## roc.formula(formula = outcome ~ s100b, data = aSAH, subset = (gender ==     "Female"))
## 
## Data: s100b in 50 controls (outcome Good) < 21 cases (outcome Poor).
## Area under the curve: 0.72
改变case的因子水平
roc(aSAH$outcome, aSAH$s100b,
  levels=c("Poor", "Good"))
## 
## Call:
## roc.default(response = aSAH$outcome, predictor = aSAH$s100b,     levels = c("Poor", "Good"))
## 
## Data: aSAH$s100b in 41 controls (aSAH$outcome Poor) > 72 cases (aSAH$outcome Good).
## Area under the curve: 0.7314
改变结果的输出
##不显示结果
roc(aSAH$outcome, aSAH$s100b, quiet = T)
## 
## Call:
## roc.default(response = aSAH$outcome, predictor = aSAH$s100b,     quiet = T)
## 
## Data: aSAH$s100b in 72 controls (aSAH$outcome Good) < 41 cases (aSAH$outcome Poor).
## Area under the curve: 0.7314
##计算AUC。默认的为T
roc(aSAH$outcome, aSAH$s100b, auc = T)
## 
## Call:
## roc.default(response = aSAH$outcome, predictor = aSAH$s100b,     auc = T)
## 
## Data: aSAH$s100b in 72 controls (aSAH$outcome Good) < 41 cases (aSAH$outcome Poor).
## Area under the curve: 0.7314
##AUC变成百分比
roc(aSAH$outcome, aSAH$s100b, percent = T)
## 
## Call:
## roc.default(response = aSAH$outcome, predictor = aSAH$s100b,     percent = T)
## 
## Data: aSAH$s100b in 72 controls (aSAH$outcome Good) < 41 cases (aSAH$outcome Poor).
## Area under the curve: 73.14%
##计算95%CI
roc(aSAH$outcome, aSAH$s100b, ci = T)
## 
## Call:
## roc.default(response = aSAH$outcome, predictor = aSAH$s100b,     ci = T)
## 
## Data: aSAH$s100b in 72 controls (aSAH$outcome Good) < 41 cases (aSAH$outcome Poor).
## Area under the curve: 0.7314
## 95% CI: 0.6301-0.8326 (DeLong)

roc.test roc.test用来比较两个或者多个roc曲线是否有差别。

###通过roc函数来比较两个roc的区别
roc1 <- roc(aSAH$outcome, aSAH$s100b)
roc2 <- roc(aSAH$outcome, aSAH$wfns)
roc.test(roc1, roc2)
## 
##  DeLong's test for two correlated ROC curves
## 
## data:  roc1 and roc2
## Z = -2.209, p-value = 0.02718
## alternative hypothesis: true difference in AUC is not equal to 0
## sample estimates:
## AUC of roc1 AUC of roc2 
##   0.7313686   0.8236789
###如果是一个数据框可以在一个公式中比较
roc.test(aSAH$outcome, aSAH$s100b, aSAH$wfns)
## 
##  DeLong's test for two correlated ROC curves
## 
## data:  aSAH$s100b and aSAH$wfns by aSAH$outcome (Good, Poor)
## Z = -2.209, p-value = 0.02718
## alternative hypothesis: true difference in AUC is not equal to 0
## sample estimates:
## AUC of roc1 AUC of roc2 
##   0.7313686   0.8236789
###改变比较的方法默认的的是delong法。可选bootstrap
roc.test(roc1, roc2, method = "bootstrap")
## 
##  Bootstrap test for two correlated ROC curves
## 
## data:  roc1 and roc2
## D = -2.2935, boot.n = 2000, boot.stratified = 1, p-value = 0.02182
## alternative hypothesis: true difference in AUC is not equal to 0
## sample estimates:
## AUC of roc1 AUC of roc2 
##   0.7313686   0.8236789
###比较两个ROC的灵敏度和特异度
roc.test(roc1, roc2, method="specificity", specificity=0.9)
## 
##  Specificity test for two correlated ROC curves
## 
## data:  roc1 and roc2
## D = -1.3119, boot.n = 2000, boot.stratified = 1, p-value = 0.1895
## alternative hypothesis: true difference in AUC is not equal to 0
## sample estimates:
## AUC of roc1 AUC of roc2 
##   0.7313686   0.8236789
roc.test(roc1, roc2, method="sensitivity", sensitivity=0.9)
## 
##  Sensitivity test for two correlated ROC curves
## 
## data:  roc1 and roc2
## D = -3.1677, boot.n = 2000, boot.stratified = 1, p-value =
## 0.001537
## alternative hypothesis: true difference in AUC is not equal to 0
## sample estimates:
## AUC of roc1 AUC of roc2 
##   0.7313686   0.8236789
###如果两个roc的样本量不同则需要进行配置
roc7 <- roc(aSAH$outcome, aSAH$s100b)
roc8 <- roc(aSAH$outcome[1:100], aSAH$s100b[1:100])
roc.test(roc7, roc8, paired=FALSE, method="delong")
## 
##  DeLong's test for two ROC curves
## 
## data:  roc7 and roc8
## D = 0.16859, df = 205.38, p-value = 0.8663
## alternative hypothesis: true difference in AUC is not equal to 0
## sample estimates:
## AUC of roc1 AUC of roc2 
##   0.7313686   0.7183601

auc 通过AUC来计算ROC曲线下面积

rocobj <- roc(aSAH$outcome, aSAH$s100b)
auc(rocobj)
## Area under the curve: 0.7314
###同样的可以使用roc函数计算
roc(aSAH$outcome, aSAH$s100b)$auc
## Area under the curve: 0.7314

ci.auc 计算一个roc曲线的曲线下面积以及95%CI

rocobj <- roc(aSAH$outcome, aSAH$s100b)
result <- ci.auc(rocobj)
###结果其实包括三个变量分别是auc, lower, upper
result[1]; result[2]; result[3]
## [1] 0.6301182
## [1] 0.7313686
## [1] 0.8326189

coords 返回ROC曲线计算过程中的变量值,

其中包括阈值、灵敏度、特异度、准确率、真阴性数、真阳性数、假阴性数、假阳性数、阳性预测值、阴性预测值…… coords(roc, x, input, ret,as.list, drop, best.method) 参数当中x可以是数字代表input的值,all代表ROC所有点的值。 best代表灵敏度和特异度和最大的点。local maximasROC曲线局部最大的点 input如果X是数字的需要制定input。参数时“threshold”, “specificity” or “sensitivity”这三个的一个。同时可以简写为“t”,“sp”和“se” ret返回值。可以是“threshold”, “specificity”, “sensitivity”, “accuracy”, “tn” (true negative count), “tp” (true positive count), “fn” (false negative count), “fp” (false positive count), “npv” (negative predictive value), “ppv” (positive predictive value), “precision”, “recall”. “1-specificity”, “1-sensitivity”, “1-accuracy”, “1-npv” and “1-ppv” drop以最简单的形式返回结果 as.list以列表的形式输出结果 best.method逻辑值。通过这个参数可以决定最佳的阈值

##制定X值
coords(roc=rocobj, x=0.5, input="sensitivity", ret="sensitivity")
## [1] 0.5
##得到所有灵敏度的值
sensitivities <- coords(rocobj, "all", ret="se")
##得到最佳的阈值的cutoff
coords(rocobj, "b", ret="t")
## [1] 0.205
##得到作用结果
coords(rocobj, "best", ret=c("threshold", "specificity", "sensitivity", "accuracy",
"tn", "tp", "fn", "fp", "npv", "ppv", "1-specificity",
"1-sensitivity", "1-accuracy", "1-npv", "1-ppv",
"precision", "recall"), drop = T)
##     threshold   specificity   sensitivity      accuracy            tn 
##     0.2050000     0.8055556     0.6341463     0.7433628    58.0000000 
##            tp            fn            fp           npv           ppv 
##    26.0000000    15.0000000    14.0000000     0.7945205     0.6500000 
## 1-specificity 1-sensitivity    1-accuracy         1-npv         1-ppv 
##     0.1944444     0.3658537     0.2566372     0.2054795     0.3500000 
##     precision        recall 
##     0.6500000     0.6341463

plot.roc 用来绘制ROC曲线的最主要的函数。

add如果是绘制两个或者多个图的话,使用这个可以绘制到同一个图上。 smooth逻辑值,曲线是否平滑 type选择线条的类型:“l”只是线条、“p”只是点、“b”既有线条又有点、“n”啥都没有 legacy.axes X轴的参数。如果为F则为sp,如果为T则为1-sp。一般而言是1-sp xlim,ylim,xlab,ylab,asp,mar,mgp基本画图参数。 col, lty, lwd线条的颜色、类型和宽度 print.thres 标注ROC曲线当中最佳的位置。 print.thres.pch, print.thres.adj, print.thres.col, print.thres.cex 修改thres位置点的类型、字符位置的横向调整、颜色、字体的大小。 print.thres.pattern自定义阈值的内容 print.auc打印曲线下面积。 print.auc.pattern自定义AUC的内容 print.auc.x/print.auc.y定义auc的位置 print.auc.adj/cex/col定义auc的横向位置、字体大小以及颜色

##基本图形
plot.roc(roc1)
image.png
plot.roc(roc1, type = "b")
image.png
##在原来的图形上添加
plot.roc(roc1)
plot.roc(smooth(roc1), add=TRUE, col="blue")
legend("bottomright", legend=c("Empirical", "Smoothed"),
col=c(par("fg"), "blue"), lwd=2)
image.png
##对于线条的调整
plot.roc(roc1, xlim = c(1,0), ylim = c(0,1), col = "blue", lwd = 2)
image.png
##对于阈值的调整
plot.roc(roc1, print.thres = T)
image.png
plot.roc(roc1, print.thres = T, print.thres.pch = 12, print.thres.adj = 1.1, print.thres.col = "red", print.thres.cex = 1)
image.png
##对于auc的调整
plot.roc(roc1, print.auc = T)
image.png
plot.roc(roc1, print.auc = T, print.auc.x = 0.5, print.auc.y = 0.6, print.auc.col = "red")
image.png
上一篇下一篇

猜你喜欢

热点阅读