R包学习-pROC

2020-03-26  本文已影响0人  Thinkando

我们在做诊断性实验的时候,最常用的选择最佳cutoff的方法是使用ROC曲线。本次主要介绍的是pROC包对于 ROC曲线的的绘制和分析

https://www.jianshu.com/p/04e3bb3bc990

> library(pROC)
> data("aSAH")
> head(aSAH)
   gos6 outcome gender age wfns s100b  ndka
29    5    Good Female  42    1  0.13  3.01
30    5    Good Female  37    1  0.14  8.54
31    5    Good Female  42    1  0.10  8.09
32    5    Good Female  27    1  0.04 10.42
33    1    Poor Female  42    3  0.13 17.40
34    1    Poor   Male  48    2  0.10 12.75
> ##以下两种方式是等价的
> roc(aSAH$outcome ~ aSAH$s100b)
> roc(outcome ~ s100b, aSAH)
>roc(aSAH$outcome, aSAH$s100b,direction = ">") # Area under the curve: 0.2686
Setting levels: control = Good, case = Poor
Setting direction: controls < cases

Call:
roc.formula(formula = outcome ~ s100b, data = aSAH)

Data: s100b in 72 controls (outcome Good) < 41 cases (outcome Poor).
Area under the curve: 0.7314

在亚组中进行roc分析

roc(outcome ~ s100b, data=aSAH, subset=(gender == "Male"))
roc(outcome ~ s100b, data=aSAH, subset=(gender == "Female"))

改变case的因子水平

> roc(aSAH$outcome, aSAH$s100b,
+     levels=c("Poor", "Good"))
Setting direction: controls > cases

Call:
roc.default(response = aSAH$outcome, predictor = aSAH$s100b,     levels = c("Poor", "Good"))

Data: aSAH$s100b in 41 controls (aSAH$outcome Poor) > 72 cases (aSAH$outcome Good).
Area under the curve: 0.7314

##AUC变成百分比
roc(aSAH$outcome, aSAH$s100b, percent = T)
> ##计算95%CI
> roc(aSAH$outcome, aSAH$s100b, ci = T)
Setting levels: control = Good, case = Poor
Setting direction: controls < cases

Call:
roc.default(response = aSAH$outcome, predictor = aSAH$s100b,     ci = T)

Data: aSAH$s100b in 72 controls (aSAH$outcome Good) < 41 cases (aSAH$outcome Poor).
Area under the curve: 0.7314
95% CI: 0.6301-0.8326 (DeLong)

roc.test roc.test用来比较两个或者多个roc曲线是否有差别。

###通过roc函数来比较两个roc的区别
roc1 <- roc(aSAH$outcome, aSAH$s100b)
roc2 <- roc(aSAH$outcome, aSAH$wfns)
roc.test(roc1, roc2)
## 
##  DeLong's test for two correlated ROC curves
## 
## data:  roc1 and roc2
## Z = -2.209, p-value = 0.02718
## alternative hypothesis: true difference in AUC is not equal to 0
## sample estimates:
## AUC of roc1 AUC of roc2 
##   0.7313686   0.8236789
###如果是一个数据框可以在一个公式中比较
roc.test(aSAH$outcome, aSAH$s100b, aSAH$wfns)
## 
##  DeLong's test for two correlated ROC curves
## 
## data:  aSAH$s100b and aSAH$wfns by aSAH$outcome (Good, Poor)
## Z = -2.209, p-value = 0.02718
## alternative hypothesis: true difference in AUC is not equal to 0
## sample estimates:
## AUC of roc1 AUC of roc2 
##   0.7313686   0.8236789
###改变比较的方法默认的的是delong法。可选bootstrap
roc.test(roc1, roc2, method = "bootstrap")
###比较两个ROC的灵敏度和特异度
roc.test(roc1, roc2, method="specificity", specificity=0.9)
## 
##  Specificity test for two correlated ROC curves
## 
## data:  roc1 and roc2
## D = -1.3119, boot.n = 2000, boot.stratified = 1, p-value = 0.1895
## alternative hypothesis: true difference in AUC is not equal to 0
## sample estimates:
## AUC of roc1 AUC of roc2 
##   0.7313686   0.8236789
roc.test(roc1, roc2, method="sensitivity", sensitivity=0.9)
## 
##  Sensitivity test for two correlated ROC curves
## 
## data:  roc1 and roc2
## D = -3.1677, boot.n = 2000, boot.stratified = 1, p-value =
## 0.001537
## alternative hypothesis: true difference in AUC is not equal to 0
## sample estimates:
## AUC of roc1 AUC of roc2 
##   0.7313686   0.8236789
###如果两个roc的样本量不同则需要进行配置
roc7 <- roc(aSAH$outcome, aSAH$s100b)
roc8 <- roc(aSAH$outcome[1:100], aSAH$s100b[1:100])
roc.test(roc7, roc8, paired=FALSE, method="delong")
## 
##  DeLong's test for two ROC curves
## 
## data:  roc7 and roc8
## D = 0.16859, df = 205.38, p-value = 0.8663
## alternative hypothesis: true difference in AUC is not equal to 0
## sample estimates:
## AUC of roc1 AUC of roc2 
##   0.7313686   0.7183601

auc 通过AUC来计算ROC曲线下面积

rocobj <- roc(aSAH$outcome, aSAH$s100b)
auc(rocobj)
###同样的可以使用roc函数计算
roc(aSAH$outcome, aSAH$s100b)$auc
## Area under the curve: 0.7314

ci.auc 计算一个roc曲线的曲线下面积以及95%CI

rocobj <- roc(aSAH$outcome, aSAH$s100b)
result <- ci.auc(rocobj)
###结果其实包括三个变量分别是auc, lower, upper
result[1]; result[2]; result[3]
## [1] 0.6301182
## [1] 0.7313686
## [1] 0.8326189

coords 返回ROC曲线计算过程中的变量值

##制定X值
coords(roc=rocobj, x=0.5, input="sensitivity", ret="sensitivity")
## [1] 0.5
##得到所有灵敏度的值
sensitivities <- coords(rocobj, "all", ret="se")
##得到最佳的阈值的cutoff
coords(rocobj, "b", ret="t")
## [1] 0.205
##得到作用结果
coords(rocobj, "best", ret=c("threshold", "specificity", "sensitivity", "accuracy",
"tn", "tp", "fn", "fp", "npv", "ppv", "1-specificity",
"1-sensitivity", "1-accuracy", "1-npv", "1-ppv",
"precision", "recall"), drop = T)
##     threshold   specificity   sensitivity      accuracy            tn 
##     0.2050000     0.8055556     0.6341463     0.7433628    58.0000000 
##            tp            fn            fp           npv           ppv 
##    26.0000000    15.0000000    14.0000000     0.7945205     0.6500000 
## 1-specificity 1-sensitivity    1-accuracy         1-npv         1-ppv 
##     0.1944444     0.3658537     0.2566372     0.2054795     0.3500000 
##     precision        recall 
##     0.6500000     0.6341463

plot.roc 用来绘制ROC曲线的最主要的函数。

##基本图形
plot.roc(roc1)
image.png

<meta charset="utf-8">

image

可以进行ROC分析和ROC 曲线展示的R包。


#1. 安装

> install.packages("pROC")

#2. 数据导入

> library(pROC)
> data(aSAH)
> head(aSAH)
   gos6 outcome gender age wfns s100b  ndka
29    5    Good Female  42    1  0.13  3.01
30    5    Good Female  37    1  0.14  8.54
31    5    Good Female  42    1  0.10  8.09
32    5    Good Female  27    1  0.04 10.42
33    1    Poor Female  42    3  0.13 17.40
34    1    Poor   Male  48    2  0.10 12.75

#3. ROC分析

##3.1 使用roc()进行ROC分析

> roc(aSAH$outcome, aSAH$s100b,plot = T)
> roc(outcome ~ s100b, aSAH,plot=T,levels=c("Good", "Poor"))
> roc(controls=aSAH$s100b[aSAH$outcome=="Good"], cases=aSAH$s100b[aSAH$outcome=="Poor"])
Call:
roc.formula(formula = outcome ~ s100b, data = aSAH, plot = T)

Data: s100b in 72 controls (outcome Good) < 41 cases (outcome Poor).
Area under the curve: 0.7314

image

##3.2 roc()参数详细解释

roc(...)
# S3 method for formula
roc(formula, data, ...)
# S3 method for default
roc(response, predictor, controls, cases,
density.controls, density.cases,
levels=base::levels(as.factor(response)), percent=FALSE, na.rm=TRUE,
direction=c("auto", "<", "="">"), algorithm = 5, quiet = TRUE, 
smooth=FALSE, auc=TRUE, ci=FALSE, plot=FALSE, smooth.method="binormal",
ci.method=NULL, density=NULL, ...)

roc1 <- roc(aSAH$outcome,
            aSAH$s100b, percent=TRUE,
            # 设置auc参数
            partial.auc=c(100, 90), partial.auc.correct=TRUE,
            partial.auc.focus="sens",
            # 设置ci参数
            ci=TRUE, boot.n=100, ci.alpha=0.9, stratified=FALSE,
            # 设置画图参数
            plot=TRUE, auc.polygon=TRUE, max.auc.polygon=TRUE, grid=TRUE,
            show.thres=TRUE
            )

#在原有的图上加ROC曲线
roc2 <- roc(aSAH$outcome, aSAH$wfns,
            plot=TRUE, add=TRUE, percent=roc1$percent)   

##3.3 返回ROC计算对象

coords(roc1, "best", ret=c("threshold", "specificity", "1-npv"))
coords(roc2, "local maximas", ret=c("threshold", "sens", "spec", "ppv", "npv"))

##3.4 置信区间

# Of the AUC
ci(roc2)

# Of the curve
sens.ci <- ci.se(roc1, specificities=seq(0, 100, 5))
plot(sens.ci, type="shape", col="lightblue")
plot(sens.ci, type="bars")

# need to re-add roc2 over the shape
plot(roc2, add=TRUE)

# CI of thresholds
plot(ci.thresholds(roc2))

##3.5 roc.test()对ROC进行统计检验

# Test on the whole AUC
> roc.test(roc1, roc2, reuse.auc=FALSE)
DeLong's test for two correlated ROC curves

data:  roc1 and roc2
Z = -2.209, p-value = 0.02718
alternative hypothesis: true difference in AUC is not equal to 0
sample estimates:
AUC of roc1 AUC of roc2 
   73.13686    82.36789 

# Test on a portion of the whole AUC
> roc.test(roc1, roc2, reuse.auc=FALSE, partial.auc=c(100, 90),
         partial.auc.focus="se", partial.auc.correct=TRUE)

    # With modified bootstrap parameters
> roc.test(roc1, roc2, reuse.auc=FALSE, partial.auc=c(100, 90),
         partial.auc.correct=TRUE, boot.n=1000, boot.stratified=FALSE)

##3.6 roc.test()参数

alternative:“two.sided”, “less” ,“greater”。对于method="venkatraman",只能使用 “two.sided”
paired: 是否时配对,会自动检测。
boot.n:指定method="bootstrap"中自检举重复次数和 method="venkatraman"中置换次数;默认,2000。
boot.stratified:每次自检举过程中,cases/controls 比例与原始样本中比例一致。
method

##3.7 roc.test()中统计学方法

  1. 自检举抽样,通过boot.n指定自检举次数。

  2. 计算AUC。

  3. 计算标准偏差。
    [图片上传失败...(image-eabcc6-1596545977177)]

  4. D与正态分布进行比较

##3.7 样本量

# Two ROC curves
power.roc.test(roc1, roc2, reuse.auc=FALSE)
power.roc.test(roc1, roc2, power=0.9, reuse.auc=FALSE)

# One ROC curve
power.roc.test(auc=0.8, ncases=41, ncontrols=72)
power.roc.test(auc=0.8, power=0.9)
power.roc.test(auc=0.8, ncases=41, ncontrols=72, sig.level=0.01)
power.roc.test(ncases=41, ncontrols=72, power=0.9)

#4 pROC使用更多实例

EXPASY基于R代码上给出了pROC的6个示例,见pROC: Screenshots,下面看一个例子:

library(pROC)
data(aSAH)
rocobj <- plot.roc(aSAH$outcome, aSAH$s100b, percent = TRUE, main="Smoothing")
lines(smooth(rocobj), # smoothing (default: binormal)
col = "#1c61b6")
lines(smooth(rocobj, method = "density"), # density smoothing
col = "#008600")
lines(smooth(rocobj, method = "fitdistr", # fit a distribution
density = "lognormal"), # let the distribution be log-normal
col = "#840000")
legend("bottomright", legend = c("Empirical", "Binormal", "Density", "Fitdistr\n(Log-normal)"), col = c("black", "#1c61b6", "#008600", "#840000"),lwd = 2)

image
上一篇下一篇

猜你喜欢

热点阅读