rr

临床基线表与COX回归

2020-01-14  本文已影响0人  米开朗巨魔

在临床研究统计分析中,临床基线表对于展示研究数据的信息和结构尤为重要。
本文以R语言中自带临床数据集lung为例,展示基本的COX回归与临床基线表的制作。

首先是数据的生存分析

#获取R自带数据集lung
lung <- lung
#查看数据的基本结构
head(lung)
# 如下所示:
# inst time status age sex ph.ecog ph.karno pat.karno
# 1    3  306      2  74   1       1       90       100
# 2    3  455      2  68   1       0       90        90
# 3    3 1010      1  56   1       0       90        90
# 4    5  210      2  57   1       1       90        60
# 5    1  883      2  60   1       0      100        90
# 6   12 1022      1  74   1       1       50        80
# meal.cal wt.loss
# 1     1175      NA
# 2     1225      15
# 3       NA      15
# 4     1150      11
# 5       NA       0
# 6      513       0

#----------------------生存分析(Kaplan-Meier曲线)----------------------#
#    基本流程:
#    1)Surv()提取生存时间
#    2)如果不分组,就用survfit()拟合模型
#    3)如果有分组,就用survdiff()分析差异

#    1)Surv()提取生存时间
Lusurv <- Surv(time = lung$time,event = lung$status)

#    2)不分组,就用survfit()拟合模型并作图
Lufit <- survfit(Lusurv~1)
plot(Lufit)

#    3)有分组,就用survdiff()分析分组差异再作图
Lufit <- survfit(Lusurv~lung$sex)
plot(Lufit,conf.int = "none",
     col = c("red","blue"),
     lwd = 2,xlab = "Time(month)",ylab = "Survival Probability",
     mark.time = T)
abline(h=0.5,lty=3)   #中位生存时间线
legend("bottomleft",c("Male","Female"),
       col = c("red","blue"),lwd = 2)

# 查看不同性别生存时间是否差异显著
survdiff(Lusurv~lung$sex)
#如下:
# Call:
#   survdiff(formula = Lusurv ~ lung$sex)
# 
# N Observed Expected (O-E)^2/E (O-E)^2/V
# lung$sex=1 138      112     91.6      4.55      10.3
# lung$sex=2  90       53     73.4      5.68      10.3
# 
# Chisq= 10.3  on 1 degrees of freedom, p= 0.001 
#可以看出p= 0.001 ,不同性别生存时间有显著差异

然后是单因素COX回归

#-----------------单因素COX回归-----------------#
# 提取生存时间
Lusurv <- Surv(time = lung$time,event = lung$status)
# 将生存时间加入原先的lung数据(dataframe)
lung$Lusurv <- with(lung,Lusurv)  

#单因素cox回归(性别)
cox_sex <- coxph(Lusurv~sex,data = lung)
# 我们需要的是风险比,95%置信区间和pvalue

cox_sexSum <- summary(cox_sex)

cox_sexSum$coefficients  # exp(coef) 即风险比
# coef exp(coef) se(coef)     z Pr(>|z|)
# sex -0.531     0.588    0.167 -3.18  0.00149

cox_sexSum$conf.int      # lower .95 upper .95 即置信区间
# exp(coef) exp(-coef) lower .95 upper .95
# sex     0.588        1.7     0.424     0.816

# paste0d的collapse参数,可以把这些字符串拼成一个长字符串,而不是放在一个向量中
HR <- round(cox_sexSum$coefficients[,2],2)
PValue <- round(cox_sexSum$coefficients[,5],3) #保留3位
CI <- paste0(round(cox_sexSum$conf.int[,3:4],2),collapse = '-') 
Unicox <- data.frame("Characteristics" = "sex",
                     "Hazard Ratio" = HR,
                     "CI95" = CI,
                     "P Value" = PValue)



#----------自定义制作表格函数Unicoxtable()---------#
Unicoxtable <- function(x){
  FML <- as.formula(paste0("Lusurv~",x))
  cox_sex <- coxph(FML,data = lung)
  cox_sexSum <- summary(cox_sex)
  HR <- round(cox_sexSum$coefficients[,2],2)
  PValue <- round(cox_sexSum$coefficients[,5],3) #保留3位
  CI <- paste0(round(cox_sexSum$conf.int[,3:4],2),collapse = '-') 
  Unicox <- data.frame("Characteristics" = x,
                       "Hazard Ratio" = HR,
                       "CI95" = CI,
                       "P Value" = PValue)
  return(Unicox)
}
#将变量名输入即可,例如age
Unicoxtable("age")     
#结果如下:
# Characteristics Hazard.Ratio   CI95 P.Value
# 1             age         1.02 1-1.04   0.042

#---------多个变量同时处理--------#
library(plyr)
library(xlsx)
Varname <- colnames(lung)[-c(2:3,11)]     #提取部分变量名
Univar <- lapply(Varname, Unicoxtable)
Univar <- ldply(Univar,data.frame)

# 将pvalue小于0.001的特殊显示
Univar$P.Value[Univar$P.Value == 0] <- "<0.001"
#最后输出文件
write.xlsx(Univar,"Unicoxtable.xlsx",
           col.names = T,row.names = F,
           showNA = F)

接下来是多因素COX回归

#----------------------多因素COX回归---------------------#
# 筛选pvalue小于0.05的变量用于后续多因素分析
Univar$Characteristics[Univar$P.Value<0.05]

fml <- as.formula(paste0("Lusurv~",paste0(Univar$Characteristics[Univar$P.Value<0.05],collapse = "+")))
MultiCox <- coxph(fml,data = lung)
MultiSum <- summary(MultiCox)

MultiName <- as.character(Univar$Characteristics[Univar$P.Value<0.05])
MHR <- round(MultiSum$coefficients[,2],2)
MPValue <- round(MultiSum$coefficients[,5],3)
MCILow <- round(MultiSum$conf.int[,3],2)
MCIUp <- round(MultiSum$conf.int[,4],2)
MCI <- paste0(MCILow,"-",MCIUp)
MulCox <- data.frame("Characteristics" = MultiName,
                     "Hazard Ratio" = MHR,
                     "CI95" = MCI,
                     "P Value" = MPValue)
MulCox$P.Value[MulCox$P.Value == 0] <- "<0.001"
#输出结果
write.xlsx(MulCox,"MulCoxtable.xlsx",
           col.names = T,row.names = F,
           showNA = F)

#---------------------将单因素和多因素的数据框合并---------------------#
# 两个数据库行不一样,按照变量名匹配
outPut <- merge.data.frame(Univar,MulCox,
                           by="Characteristics",
                           all = T,sort = T)
#输出结果
write.xlsx(outPut,"COXtable.xlsx",
           col.names = T,row.names = F,
           showNA = F)

至此,就完成了生存分析、单/多因素COX回归以及基线表的制作


附诗一首

晚风吻尽荷花叶
任我醉倒在池边
夜梦美人来相伴
乍醒犹思残酒甜

上一篇下一篇

猜你喜欢

热点阅读