TCGA找差异表达
firebrowse下载TCGA数据
##用R处理数据
dat = read.table("mirna.txt",header=T,sep="\t")
View(head(dat))
dim(dat)
dat1 = dat[-1,c(1,seq(3,ncol(dat)-1,3))]
View(head(dat1))
dat2 = dat1[,-1]
rownames(dat2) = dat1[,1]
View(head(dat2))
library(stringr)
dat.tumor = dat2[,str_detect(colnames(dat2),'.01')]
dim(dat.tumor)
dat.normal = dat2[,str_detect(colnames(dat2),'.11A')]
dim(dat.normal)
dat.final = cbind(dat.tumor,dat.normal)
write.table(dat.final,"datExp.txt",col.names = T,row.names = T,quote=F,sep="\t")
dat.exp = read.table("datExp.txt",header=T,sep="\t")
View(head(dat.exp))
dat.exp[dat.exp == 0] = 1
dat.exp = log(dat.exp,2)
View(dat.exp)
#####-----
pheno = NULL
pheno$sample = colnames(dat.exp)
pheno$group = c(rep('tumor',ncol(dat.tumor)),rep('normal',ncol(dat.normal)))
pheno = as.data.frame(pheno)
head(pheno)
library(limma)
Group = factor(pheno$group,levels=c('tumor','normal'))
design = model.matrix(~0+Group)
colnames(design) <- c('tumor','normal')
design
fit <- lmFit(dat.exp, design)
#构建比对模型,比较两个条件下的表达数据
contrast.matrix <- makeContrasts(tumor-normal,#1
levels=design)
fit2 <- contrasts.fit(fit, contrast.matrix)
#贝叶斯检验
fit2 <- eBayes(fit2)
#找出差异基因检验结果并输出符合条件的结果
all.deg = NULL
diff = topTable(fit2,adjust.method="fdr",coef=1,p.value=0.05,
lfc=log(2,2),number=50000,sort.by = 'logFC')
write.table(diff,'DEG.txt',col.names=T,row.names=T,quote=F,sep="\t")
##火山图
diff0 = diff
P.value = diff0$adj.P.Val
FC = diff0$logFC
df <- data.frame(P.value,FC)
df$threshold = as.factor(abs(df$FC) > log(1.5,2) & df$P.value < 0.05)
levels(df$threshold) = c('grey','red')
df$logp = -log10(df$P.value)
#using base
plot(x = df[,2],y = df[,4], pch=16, col=df$threshold,
xlab="Fold change", ylab="-Log10(Pvalue)",
cex=0.5, main="Volcano Plot")
abline(v=c(-log(1.5,2),log(1.5,2)), h=-log10(0.05),col="green")
#using ggplot2
library(ggplot2)
ggplot(data=df,aes(x=FC,y=-log10(P.value)))+
geom_point(color=df$threshold)