TCGA

TCGA预后基因筛选(单因素和多因素cox分析)

2021-08-10  本文已影响0人  萍智医信

①单因素cox分析

输入文件.png
library(survival)      #引用包
pFilter=0.05           #显著性过滤条件
setwd("E:\\research")     #设置工作目录
rt=read.table("pairTime.txt", header=T, sep="\t", check.names=F, row.names=1)     #读取输入文件
rt$futime=rt$futime/365      #生存单位改成年

#单因素过滤条件
outTab=data.frame()
sigGenes=c("futime","fustat")
for(gene in colnames(rt[,3:ncol(rt)])){
    cox=coxph(Surv(futime, fustat) ~ rt[,gene], data = rt)
    coxSummary = summary(cox)
    coxP=coxSummary$coefficients[,"Pr(>|z|)"]
        
    if(coxP<pFilter){
        sigGenes=c(sigGenes,gene)
        outTab=rbind(outTab,
                   cbind(gene=gene,
                   HR=coxSummary$conf.int[,"exp(coef)"],
                   HR.95L=coxSummary$conf.int[,"lower .95"],
                   HR.95H=coxSummary$conf.int[,"upper .95"],
                   pvalue=coxP) )
    }
}

#输出单因素结果
write.table(outTab,file="uniCox.txt",sep="\t",row.names=F,quote=F)
surSigExp=rt[,sigGenes]
surSigExp=cbind(id=row.names(surSigExp),surSigExp)
write.table(surSigExp,file="uniSigExp.txt",sep="\t",row.names=F,quote=F)

单因素cox分析的输出文件,也为多因素cox分析的输入文件


uniCox.txt.png
uniSigExp.png

②多因素cox分析

#引用包
library(survival)
library(survminer)
library(glmnet)
setwd("E:\\Master research")         #设置工作目录
rt=read.table("uniSigExp.txt", header=T, sep="\t", check.names=F, row.names=1)     #读取输入文件

###lasso筛选免疫lncRNA对
#rt$futime[rt$futime<=0]=0.003
x=as.matrix(rt[,c(3:ncol(rt))])
y=data.matrix(Surv(rt$futime,rt$fustat))
fit <- glmnet(x, y, family = "cox", maxit = 1000)
pdf("lasso.lambda.pdf")
plot(fit, xvar = "lambda", label = TRUE)
dev.off()
cvfit <- cv.glmnet(x, y, family="cox", maxit = 1000)
pdf("lasso.cvfit.pdf")
plot(cvfit)
abline(v=log(c(cvfit$lambda.min,cvfit$lambda.1se)),lty="dashed")
dev.off()
coef <- coef(fit, s=cvfit$lambda.min)
index <- which(coef != 0)
actCoef <- coef[index]
lassoGene=row.names(coef)[index]
lassoGene=c("futime","fustat",lassoGene)
lassoSigExp=rt[,lassoGene]
lassoSigExp=cbind(id=row.names(lassoSigExp),lassoSigExp)
write.table(lassoSigExp,file="lasso.SigExp.txt",sep="\t",row.names=F,quote=F)

#COX模型构建
rt=read.table("lasso.SigExp.txt",header=T,sep="\t",check.names=F,row.names=1)
multiCox=coxph(Surv(futime, fustat) ~ ., data = rt)
multiCox=step(multiCox, direction="both")
multiCoxSum=summary(multiCox)

#输出模型参数
outTab=data.frame()
outTab=cbind(
             coef=multiCoxSum$coefficients[,"coef"],
             HR=multiCoxSum$conf.int[,"exp(coef)"],
             HR.95L=multiCoxSum$conf.int[,"lower .95"],
             HR.95H=multiCoxSum$conf.int[,"upper .95"],
             pvalue=multiCoxSum$coefficients[,"Pr(>|z|)"])
outTab=cbind(id=row.names(outTab),outTab)
outTab=gsub("`","",outTab)
write.table(outTab,file="multi.Cox.txt",sep="\t",row.names=F,quote=F)

#输出病人风险值
riskScore=predict(multiCox, type="risk", newdata=rt)
coxGene=rownames(multiCoxSum$coefficients)
coxGene=gsub("`", "", coxGene)
outCol=c("futime", "fustat", coxGene)
riskOut=cbind(rt[,outCol], riskScore)
riskOut=cbind(id=rownames(riskOut), riskOut)
write.table(riskOut, file="riskScore.txt", sep="\t", quote=F, row.names=F)


############绘制森林图函数############
bioForest=function(coxFile=null, forestFile=null, forestCol=null){
    #读取输入文件
    rt <- read.table(coxFile,header=T,sep="\t",row.names=1,check.names=F)
    gene <- rownames(rt)
    hr <- sprintf("%.3f",rt$"HR")
    hrLow  <- sprintf("%.3f",rt$"HR.95L")
    hrHigh <- sprintf("%.3f",rt$"HR.95H")
    Hazard.ratio <- paste0(hr,"(",hrLow,"-",hrHigh,")")
    pVal <- ifelse(rt$pvalue<0.001, "<0.001", sprintf("%.3f", rt$pvalue))
        
    #输出图形
    pdf(file=forestFile, width=9, height=7)
    n <- nrow(rt)
    nRow <- n+1
    ylim <- c(1,nRow)
    layout(matrix(c(1,2),nc=2),width=c(3,2.5))
        
    #绘制森林图左边的临床信息
    xlim = c(0,3)
    par(mar=c(4,2.5,2,1))
    plot(1,xlim=xlim,ylim=ylim,type="n",axes=F,xlab="",ylab="")
    text.cex=0.8
    text(0,n:1,gene,adj=0,cex=text.cex)
    text(2.08-0.5*0.2,n:1,pVal,adj=1,cex=text.cex);text(2.08-0.5*0.2,n+1,'pvalue',cex=text.cex,font=2,adj=1)
    text(3.12,n:1,Hazard.ratio,adj=1,cex=text.cex);text(3.12,n+1,'Hazard ratio',cex=text.cex,font=2,adj=1,)
        
    #绘制森林图
    par(mar=c(4,1,2,1),mgp=c(2,0.5,0))
    xlim = c(0,max(as.numeric(hrLow),as.numeric(hrHigh)))
    plot(1,xlim=xlim,ylim=ylim,type="n",axes=F,ylab="",xaxs="i",xlab="Hazard ratio")
    arrows(as.numeric(hrLow),n:1,as.numeric(hrHigh),n:1,angle=90,code=3,length=0.05,col="darkblue",lwd=2.5)
    abline(v=1,col="black",lty=2,lwd=2)
    boxcolor = ifelse(as.numeric(hr) > 1, forestCol[1], forestCol[2])
    points(as.numeric(hr), n:1, pch = 15, col = boxcolor, cex=1.6)
    axis(1)
    dev.off()
}
#绘制模型森林图
bioForest(coxFile="multi.Cox.txt", forestFile="model.multiForest.pdf", forestCol=c("red","green"))

#绘制模型中基因单因素森林图
uniRT=read.table("uniCox.txt",header=T,sep="\t",row.names=1,check.names=F)
uniRT=uniRT[coxGene,]
uniRT=cbind(id=row.names(uniRT), uniRT)
write.table(uniRT, file="unicox.forest.txt", sep="\t", row.names=F, quote=F)
bioForest(coxFile="unicox.forest.txt", forestFile="model.uniForest.pdf", forestCol=c("red","green"))
上一篇 下一篇

猜你喜欢

热点阅读