生物信息学R语言源码特征变量选择

单因素Cox回归循环function

2019-09-29  本文已影响0人  陈宇乔
Uni_cox<- function(gene){
  phe$group=ifelse(exprSet[gene,]>quantile(exprSet[gene,],0.5),'high','low')
  survival_dat <- data.frame(group=phe$group,grade=phe$grade,T_stage=phe$T_stage,N_stage=phe$N_stage,stringsAsFactors = F)
  mySurv=with(phe,Surv(time, event))
  # m=coxph(mySurv ~ grade + T_stage +N_stage +group, data =  survival_dat)
  m=coxph(mySurv ~ group, data =  survival_dat)
  beta <- coef(m)
  se <- sqrt(diag(vcov(m)))
  HR <- exp(beta)
  HRse <- HR * se

  #summary(m)
  tmp <- round(cbind(coef = beta, se = se, z = beta/se, p = 1 - pchisq((beta/se)^2, 1),
                   HR = HR, HRse = HRse,
                   HRz = (HR - 1) / HRse, HRp = 1 - pchisq(((HR - 1)/HRse)^2, 1),
                   HRCILL = exp(beta - qnorm(.975, 0, 1) * se),
                   HRCIUL = exp(beta + qnorm(.975, 0, 1) * se)), 7)
  # rownames(tmp)<- gene
  return(tmp['grouplow',])
}

Uni_cox('GPD1L')
VarNames<- rownames(exprSet)[1:10]
UniVar<- lapply(VarNames,Uni_cox)
library(plyr)
Uri_cox_results<- ldply(UniVar)

麦子版本

# ########################## 麦子函数版本
# mySurv=with(phe,Surv(time, event)) 
# survival_dat[1:10,1:6]
# survival_dat<- phe
# UniCox<- function(x){
#   FML<- as.formula(paste0('mySurv~',x))
#   Gcox<- coxph(FML,data = survival_dat)
#   GSum<- summary(Gcox)
#   HR<- round(GSum$coefficients[,2],2)
#   Pvalue<- round(GSum$coefficients[,5],3)
#   CI<- paste0(round(GSum$conf.int[,3:4],2),collapse = '-')
#   Unicox<- data.frame('characterisiics'=x,
#                       'Hazard Ratio'=HR,
#                       'Pval'=Pvalue)
#   return(Unicox)
# }

有时候因为基因表达量太低,导致分组时出现错误,解决如下。

cox_res<- as.data.frame(do.call('rbind', setNames(t(cox_results), names(cox_results)))) ##### 这个太关键了,终于把rownames 搞出来了

# ## 批量生存分析 使用 coxph 回归方法
rm(list = ls())
load()
exprSet<- expr_TCGA
phe<- pheno
library(survival)
library(survminer)
phe$event=as.numeric(phe$event)

colnames(phe)
mySurv=with(phe,Surv(time, event))
cox_results <-apply(exprSet , 1 , function(gene){
  phe$group=ifelse(gene>median(gene),'high','low')
  if(table(table(phe$group))!=1){
  # survival_dat <- data.frame(group=phe$group,grade=phe$grade,size=phe$size,stringsAsFactors = F)
  # m=coxph(mySurv ~ grade + size + group, data =  survival_dat)
  survival_dat <- data.frame(group=phe$group,T_stage=phe$pathologic_T,N_stage=phe$pathologic_N,M_stage=phe$pathologic_M,stringsAsFactors = F)
  m=coxph(mySurv ~T_stage+N_stage+M_stage + group, data =  survival_dat)
  # m=coxph(mySurv ~group, data =  survival_dat)
  beta <- coef(m)
  se <- sqrt(diag(vcov(m)))
  HR <- exp(beta)
  HRse <- HR * se

  #summary(m)
  tmp <- round(cbind(coef = beta, se = se, z = beta/se, p = 1 - pchisq((beta/se)^2, 1),
                     HR = HR, HRse = HRse,
                     HRz = (HR - 1) / HRse, HRp = 1 - pchisq(((HR - 1)/HRse)^2, 1),
                     HRCILL = exp(beta - qnorm(.975, 0, 1) * se),
                     HRCIUL = exp(beta + qnorm(.975, 0, 1) * se)), 7)
  return(tmp['grouplow',])}
  if(table(table(phe$group))!=1){
    return(NA)}
})

gene_name<- names(cox_results)

cox_res<- as.data.frame(do.call('rbind', setNames(t(cox_results), names(cox_results)))) ##### 这个太关键了,终于把rownames 搞出来了

save(cox_res,exprSet,phe,file = './Rdata/surviva_uni_coxph.Rdata')
上一篇下一篇

猜你喜欢

热点阅读