临床基线表与COX回归
2020-01-14 本文已影响0人
米开朗巨魔
在临床研究统计分析中,临床基线表对于展示研究数据的信息和结构尤为重要。
本文以R语言中自带临床数据集lung为例,展示基本的COX回归与临床基线表的制作。
首先是数据的生存分析
#获取R自带数据集lung
lung <- lung
#查看数据的基本结构
head(lung)
# 如下所示:
# inst time status age sex ph.ecog ph.karno pat.karno
# 1 3 306 2 74 1 1 90 100
# 2 3 455 2 68 1 0 90 90
# 3 3 1010 1 56 1 0 90 90
# 4 5 210 2 57 1 1 90 60
# 5 1 883 2 60 1 0 100 90
# 6 12 1022 1 74 1 1 50 80
# meal.cal wt.loss
# 1 1175 NA
# 2 1225 15
# 3 NA 15
# 4 1150 11
# 5 NA 0
# 6 513 0
#----------------------生存分析(Kaplan-Meier曲线)----------------------#
# 基本流程:
# 1)Surv()提取生存时间
# 2)如果不分组,就用survfit()拟合模型
# 3)如果有分组,就用survdiff()分析差异
# 1)Surv()提取生存时间
Lusurv <- Surv(time = lung$time,event = lung$status)
# 2)不分组,就用survfit()拟合模型并作图
Lufit <- survfit(Lusurv~1)
plot(Lufit)
# 3)有分组,就用survdiff()分析分组差异再作图
Lufit <- survfit(Lusurv~lung$sex)
plot(Lufit,conf.int = "none",
col = c("red","blue"),
lwd = 2,xlab = "Time(month)",ylab = "Survival Probability",
mark.time = T)
abline(h=0.5,lty=3) #中位生存时间线
legend("bottomleft",c("Male","Female"),
col = c("red","blue"),lwd = 2)
# 查看不同性别生存时间是否差异显著
survdiff(Lusurv~lung$sex)
#如下:
# Call:
# survdiff(formula = Lusurv ~ lung$sex)
#
# N Observed Expected (O-E)^2/E (O-E)^2/V
# lung$sex=1 138 112 91.6 4.55 10.3
# lung$sex=2 90 53 73.4 5.68 10.3
#
# Chisq= 10.3 on 1 degrees of freedom, p= 0.001
#可以看出p= 0.001 ,不同性别生存时间有显著差异
然后是单因素COX回归
#-----------------单因素COX回归-----------------#
# 提取生存时间
Lusurv <- Surv(time = lung$time,event = lung$status)
# 将生存时间加入原先的lung数据(dataframe)
lung$Lusurv <- with(lung,Lusurv)
#单因素cox回归(性别)
cox_sex <- coxph(Lusurv~sex,data = lung)
# 我们需要的是风险比,95%置信区间和pvalue
cox_sexSum <- summary(cox_sex)
cox_sexSum$coefficients # exp(coef) 即风险比
# coef exp(coef) se(coef) z Pr(>|z|)
# sex -0.531 0.588 0.167 -3.18 0.00149
cox_sexSum$conf.int # lower .95 upper .95 即置信区间
# exp(coef) exp(-coef) lower .95 upper .95
# sex 0.588 1.7 0.424 0.816
# paste0d的collapse参数,可以把这些字符串拼成一个长字符串,而不是放在一个向量中
HR <- round(cox_sexSum$coefficients[,2],2)
PValue <- round(cox_sexSum$coefficients[,5],3) #保留3位
CI <- paste0(round(cox_sexSum$conf.int[,3:4],2),collapse = '-')
Unicox <- data.frame("Characteristics" = "sex",
"Hazard Ratio" = HR,
"CI95" = CI,
"P Value" = PValue)
#----------自定义制作表格函数Unicoxtable()---------#
Unicoxtable <- function(x){
FML <- as.formula(paste0("Lusurv~",x))
cox_sex <- coxph(FML,data = lung)
cox_sexSum <- summary(cox_sex)
HR <- round(cox_sexSum$coefficients[,2],2)
PValue <- round(cox_sexSum$coefficients[,5],3) #保留3位
CI <- paste0(round(cox_sexSum$conf.int[,3:4],2),collapse = '-')
Unicox <- data.frame("Characteristics" = x,
"Hazard Ratio" = HR,
"CI95" = CI,
"P Value" = PValue)
return(Unicox)
}
#将变量名输入即可,例如age
Unicoxtable("age")
#结果如下:
# Characteristics Hazard.Ratio CI95 P.Value
# 1 age 1.02 1-1.04 0.042
#---------多个变量同时处理--------#
library(plyr)
library(xlsx)
Varname <- colnames(lung)[-c(2:3,11)] #提取部分变量名
Univar <- lapply(Varname, Unicoxtable)
Univar <- ldply(Univar,data.frame)
# 将pvalue小于0.001的特殊显示
Univar$P.Value[Univar$P.Value == 0] <- "<0.001"
#最后输出文件
write.xlsx(Univar,"Unicoxtable.xlsx",
col.names = T,row.names = F,
showNA = F)
接下来是多因素COX回归
#----------------------多因素COX回归---------------------#
# 筛选pvalue小于0.05的变量用于后续多因素分析
Univar$Characteristics[Univar$P.Value<0.05]
fml <- as.formula(paste0("Lusurv~",paste0(Univar$Characteristics[Univar$P.Value<0.05],collapse = "+")))
MultiCox <- coxph(fml,data = lung)
MultiSum <- summary(MultiCox)
MultiName <- as.character(Univar$Characteristics[Univar$P.Value<0.05])
MHR <- round(MultiSum$coefficients[,2],2)
MPValue <- round(MultiSum$coefficients[,5],3)
MCILow <- round(MultiSum$conf.int[,3],2)
MCIUp <- round(MultiSum$conf.int[,4],2)
MCI <- paste0(MCILow,"-",MCIUp)
MulCox <- data.frame("Characteristics" = MultiName,
"Hazard Ratio" = MHR,
"CI95" = MCI,
"P Value" = MPValue)
MulCox$P.Value[MulCox$P.Value == 0] <- "<0.001"
#输出结果
write.xlsx(MulCox,"MulCoxtable.xlsx",
col.names = T,row.names = F,
showNA = F)
#---------------------将单因素和多因素的数据框合并---------------------#
# 两个数据库行不一样,按照变量名匹配
outPut <- merge.data.frame(Univar,MulCox,
by="Characteristics",
all = T,sort = T)
#输出结果
write.xlsx(outPut,"COXtable.xlsx",
col.names = T,row.names = F,
showNA = F)
至此,就完成了生存分析、单/多因素COX回归以及基线表的制作
附诗一首
晚风吻尽荷花叶
任我醉倒在池边
夜梦美人来相伴
乍醒犹思残酒甜