[R|script] 批量计算OR/HR
2019-09-30 本文已影响0人
drlee_fc74
写在前面
我们在做logistic回归经常要求回归分析当中的其OR值。同样的我们在做COX回归的时候也需要求HR值。在R基础包当中没有之间返回这些值的结果。我们需要用数据自己来求。同样的我们也可以使用tableone
包当中的ShowRegTable
来计算。但是这个包。返回的结果格式,不是自己想要的,所以就自己重新写一下函数。
tableone::ShowRegTable
这个包当中的ShowRegTable
使用方法是
library(tableone)
library(survival)
data(pbc)
head(pbc)
## id time status trt age sex ascites hepato spiders edema bili chol
## 1 1 400 2 1 58.76523 f 1 1 1 1.0 14.5 261
## 2 2 4500 0 1 56.44627 f 0 1 1 0.0 1.1 302
## 3 3 1012 2 1 70.07255 m 0 0 0 0.5 1.4 176
## 4 4 1925 2 1 54.74059 f 0 1 1 0.5 1.8 244
## 5 5 1504 1 2 38.10541 f 0 1 1 0.0 3.4 279
## 6 6 2503 2 2 66.25873 f 0 1 0 0.0 0.8 248
## albumin copper alk.phos ast trig platelet protime stage
## 1 2.60 156 1718.0 137.95 172 190 12.2 4
## 2 4.14 54 7394.8 113.52 88 221 10.6 3
## 3 3.48 210 516.0 96.10 55 151 12.0 4
## 4 2.54 64 6121.8 60.63 92 183 10.3 4
## 5 3.53 143 671.0 113.15 72 136 10.9 3
## 6 3.98 50 944.0 93.00 63 NA 11.0 3
fit <- coxph(formula = Surv(time, status == 2) ~ trt + sex + albumin + ascites,
data = pbc)
ShowRegTable(fit)
## exp(coef) [confint] p
## trt 0.93 [0.65, 1.33] 0.698
## sexf 0.64 [0.40, 1.03] 0.064
## albumin 0.25 [0.16, 0.40] <0.001
## ascites 3.28 [1.86, 5.76] <0.001
自定义函数
单因素回归结果
逻辑回归
单因素的logit回归
#单因素的logit回归
logitUniVar <- function(dat, group, var, digit = 3){
formu <- as.formula(paste0(group, " ~ ", var))
dat[[group]] <- as.factor(dat[[group]])
subgroup <- levels(as.factor(dat[[group]]))
subgroup1 <- paste0(subgroup[2], " vs ", subgroup[1])
fit <- glm(formu, data = dat, family = binomial())
unisum <- summary(fit)
OR <- exp(coef(fit))[2]
OR <- round(OR, digit)
ci <- exp(confint(fit))[2,]
ci <- round(ci, digit)
cito <- paste0(ci[1], " - ", ci[2])
p <- unisum$coefficients[2, "Pr(>|z|)"]
p <- ifelse(p < 0.001, "< 0.001", round(p, 3))
var1 <- names(exp(coef(fit)))[2]
result <- c(var1, group,subgroup1, OR, cito, p)
names(result) <- c("var", "group","subgroup", "OR", "95%CI", "p.val")
return(result)
}
# 多因素logit回归
logitMultiVar <- function(dat, group, var, adjvar,digit = 3){
if(length(adjvar) == 1){
formu <- as.formula(paste0(group, " ~ ", var, "+", adjvar))
}else{
formu <- as.formula(paste0(group, " ~ ", var, "+", paste(adjvar, collapse = "+")))
}
dat[[group]] <- as.factor(dat[[group]])
subgroup <- levels(as.factor(dat[[group]]))
subgroup1 <- paste0(subgroup[2], " vs ", subgroup[1])
fit <- glm(formu, data = dat, family = binomial())
unisum <- summary(fit)
OR <- exp(coef(fit))[2]
OR <- round(OR, digit)
ci <- exp(confint(fit))[2,]
ci <- round(ci, digit)
cito <- paste0(ci[1], " - ", ci[2])
p <- unisum$coefficients[2, "Pr(>|z|)"]
p <- ifelse(p < 0.001, "< 0.001", round(p, 3))
var1 <- names(exp(coef(fit)))[2]
result <- c(var1, group,subgroup1, OR, cito, p)
names(result) <- c("var", "group", "subgroup","OR", "95%CI", "p.val")
return(result)
}
我们构建了两个函数,一个用于单因素的逻辑回归的结果分析,一个用于多因素的结果分析。两个函数的参数包括
- dat: 我们需要分析的数据集
- group: 分析的结局变量
- var: 分析的主要变量。
- adjvar: 多因素分析的时候用来矫正的变量
- digit: 结果的保留小数点的位数
head(pbc)
## id time status trt age sex ascites hepato spiders edema bili chol
## 1 1 400 2 1 58.76523 f 1 1 1 1.0 14.5 261
## 2 2 4500 0 1 56.44627 f 0 1 1 0.0 1.1 302
## 3 3 1012 2 1 70.07255 m 0 0 0 0.5 1.4 176
## 4 4 1925 2 1 54.74059 f 0 1 1 0.5 1.8 244
## 5 5 1504 1 2 38.10541 f 0 1 1 0.0 3.4 279
## 6 6 2503 2 2 66.25873 f 0 1 0 0.0 0.8 248
## albumin copper alk.phos ast trig platelet protime stage
## 1 2.60 156 1718.0 137.95 172 190 12.2 4
## 2 4.14 54 7394.8 113.52 88 221 10.6 3
## 3 3.48 210 516.0 96.10 55 151 12.0 4
## 4 2.54 64 6121.8 60.63 92 183 10.3 4
## 5 3.53 143 671.0 113.15 72 136 10.9 3
## 6 3.98 50 944.0 93.00 63 NA 11.0 3
logitUniVar(pbc, group = "trt", var = "sex")
## Waiting for profiling to be done...
## var group subgroup OR
## "sexf" "trt" "2 vs 1" "1.42"
## 95%CI p.val
## "0.707 - 2.917" "0.328"
logitMultiVar(dat = pbc, group = "trt", var = "sex",
adjvar = c("age", "bili"))
## Waiting for profiling to be done...
## var group subgroup OR
## "sexf" "trt" "2 vs 1" "1.176"
## 95%CI p.val
## "0.571 - 2.461" "0.662"
如果我们要进行批量的分析
multivar <- c("bili", "chol", "albumin", "copper")
logitRes <- lapply(multivar, function(x) logitUniVar(pbc, group = "trt", var = x))
## Waiting for profiling to be done...
## Waiting for profiling to be done...
## Waiting for profiling to be done...
## Waiting for profiling to be done...
logitResDat <- do.call(rbind, logitRes)
logitResDat
## var group subgroup OR 95%CI p.val
## [1,] "bili" "trt" "2 vs 1" "1.04" "0.989 - 1.097" "0.136"
## [2,] "chol" "trt" "2 vs 1" "1" "0.999 - 1.001" "0.747"
## [3,] "albumin" "trt" "2 vs 1" "1.044" "0.614 - 1.778" "0.873"
## [4,] "copper" "trt" "2 vs 1" "1" "0.997 - 1.003" "0.999"
cox回归
#单因素的cox回归
library(survival)
CoxUniVar <- function(dat, status, times, var, digit = 3){
dat[[status]] <- as.factor(dat[[status]])
subgroup <- levels(as.factor(dat[[status]]))
subgroup1 <- paste0(subgroup[2], " vs ", subgroup[1])
dat[[status]] <- as.numeric(dat[[status]])
formu <- as.formula(paste0("Surv(",times,",",status,") ~", var))
fit <- coxph(formu,data= dat)
unisum <- summary(fit)
HR <- exp(coef(fit))[1]
HR <- round(HR, digit)
ci <- exp(confint(fit))[1,]
ci <- round(ci, digit)
cito <- paste0(ci[1], " - ", ci[2])
p <- unisum$coefficients[1, "Pr(>|z|)"]
p <- ifelse(p < 0.001, "< 0.001", round(p, 3))
var1 <- names(exp(coef(fit)))[1]
result <- c(var1, status,subgroup1, HR, cito, p)
names(result) <- c("var", "group", "subgroup","HR", "95%CI", "p.val")
return(result)
}
# 多因素cox回归
CoxMultiVar <- function(dat, status, times,var, adjvar,digit = 3){
if(length(adjvar) == 1){
formu <- as.formula(paste0("Surv(",times,",",status,") ~", var, "+", adjvar))
}else{
formu <- as.formula(paste0("Surv(",times,",",status,") ~", var, "+", paste(adjvar, collapse = "+")))
}
dat[[status]] <- as.factor(dat[[status]])
subgroup <- levels(as.factor(dat[[status]]))
subgroup1 <- paste0(subgroup[2], " vs ", subgroup[1])
dat[[status]] <- as.numeric(dat[[status]])
fit <- coxph(formu,data= dat)
unisum <- summary(fit)
HR <- exp(coef(fit))[1]
HR <- round(HR, digit)
ci <- exp(confint(fit))[1,]
ci <- round(ci, digit)
cito <- paste0(ci[1], " - ", ci[2])
p <- unisum$coefficients[1, "Pr(>|z|)"]
p <- ifelse(p < 0.001, "< 0.001", round(p, 3))
var1 <- names(exp(coef(fit)))[1]
result <- c(var1, status,subgroup1, HR, cito, p)
names(result) <- c("var", "group", "subgroup","HR", "95%CI", "p.val")
return(result)
}
我们构建了两个函数,一个用于单因素的逻辑回归的结果分析,一个用于多因素的结果分析。两个函数的参数包括
- dat: 我们需要分析的数据集
- status: 分析的结局变量
- times: 分析的时间变量
- var: 分析的主要变量。
- adjvar: 多因素分析的时候用来矫正的变量
- digit: 结果的保留小数点的位数
# 单因素分析
CoxUniVar(pbc, status = "trt", times = "time", var = "age")
## var group subgroup HR
## "age" "trt" "2 vs 1" "0.995"
## 95%CI p.val
## "0.978 - 1.011" "0.522"
# 多因素分析
CoxMultiVar(pbc, status = "trt", times = "time", var = "sex", adjvar = c("age", "status"))
## var group subgroup HR
## "sexf" "trt" "2 vs 1" "1.529"
## 95%CI p.val
## "0.887 - 2.635" "0.127"
如果我们要进行批量的分析
multivar <- c("bili", "chol", "albumin", "copper")
CoxRes <- lapply(multivar, function(x) CoxUniVar(pbc, status = "trt", times = "time",var = x))
CoxResDat <- do.call(rbind, CoxRes)
CoxResDat
## var group subgroup HR 95%CI p.val
## [1,] "bili" "trt" "2 vs 1" "1.149" "1.114 - 1.185" "< 0.001"
## [2,] "chol" "trt" "2 vs 1" "1.001" "1 - 1.002" "0.011"
## [3,] "albumin" "trt" "2 vs 1" "0.328" "0.211 - 0.509" "< 0.001"
## [4,] "copper" "trt" "2 vs 1" "1.004" "1.002 - 1.006" "< 0.001"
结果解读
函数返回的结果当中包括6个结果:
- var: 回归当中的变量.
如果是分类变量的话,返回的是变量名+对照分组。如果是连续性变量的话,则是返回变量名 - group: 回归当中的结局变量.
- subgroup: 结局变量的主要分组,方便进行多个变量之间的比较.
- HR/OR: 回归的HR/OR.
- 95%CI: HR/OR的95%可信区间.
- p.val: 结果的p值。