2020-11-15
和多个临床特征相关联的作图,以均值作为比较项
library(limma)
library(ggpubr)
expFile="TCGA_rpkm.txt" #表达数据文件
cliFile="clinical.txt" #临床数据文件
geneFile="hubGene.txt" #基因列表文件
读取表达文件,并对输入文件整理
rt=read.table(expFile,sep="\t",header=T,row.names = 1,check.names=F)
data=rt
data=avereps(data)
data=data[rowMeans(data)>0,]
读取基因列表
gene=read.table(geneFile,header=F,sep="\t",check.names=F)
data=data[as.vector(gene[,1]),]
删掉正常样品
group=sapply(strsplit(colnames(data),"\-"),"[",4)
group=sapply(strsplit(group,""),"[",1)
group=gsub("2","1",group)
data=data[,group==0]
colnames(data)=gsub("(.?)\-(.?)\-(.?)\-(.?)\-.*","\1\-\2\-\3",colnames(data))
data=as.data.frame(data)
data2=rbind(colnames(data),data)
write.csv(data2,file="TCGA-3-RPKM.csv")
读取临床数据
cli=read.table(cliFile,sep="\t",check.names=F,header=T,row.names=1) #读取临床文件
xlabel=colnames(cli)[1]
数据合并并输出结果
sameSample=intersect(colnames(data),rownames(cli))
data=t(data[,sameSample])
cli=cli[sameSample,]
rt=cbind(as.data.frame(data),cli)
rt1=cbind(as.data.frame(data),cli)
for(gene in colnames(rt)[1:(ncol(rt)-1)]){
data=rt[,c(gene,"cli")]
data=data[datacli))
datacli,levels=group)
comp=combn(group,2)
my_comparisons=list()
for(i in 1:ncol(comp)) {my_comparisons[[i]]=comp[,i]}
b=ggboxplot(data,x="cli",y="gene",color = "cli",xlab=xlabel,
ylab=paste(gene,"expression"),legend.title=xlabel,
add = "jitter")+
stat_compare_means(comparisons = my_comparisons)
pdf("gene.pdf",height=10,width=10)
print(b)
dev.off ()
}