GEOquery
2019-06-20 本文已影响0人
ctomclancy
下载GSE
rm(list = ls())
options(stringsAsFactors = F)
library(GEOquery)
library(limma)
library(dplyr)
gset <- getGEO('GSE112790',destdir = '.',AnnotGPL = T,getGPL = T)
exprSet <- exprs(gset[[1]])
dim(exprSet)
pData <- pData(gset[[1]])
group_list=as.character(pData$geo_accession)
过滤表达矩阵
library(hgu133plus2.db)
ids <- toTable(hgu133plus2SYMBOL)
length(unique(ids$symbol))
exprSet <- exprSet[rownames(exprSet) %in% ids$probe_id, ]
删除表达量低的探针,取最大表达量
ids <- ids[match(rownames(exprSet),ids$probe_id),]
head(ids)
exprSet[1:5,1:5]
tmp <- by(exprSet,ids$symbol,function(x) rownames(x)[which.max(rowMeans(x))] )
probes <- as.character(tmp)
exprSet <- exprSet[rownames(exprSet) %in% probes,]
dim(exprSet)
head(exprSet)
探针名转为基因名
rownames(exprSet) <- ids[match(rownames(exprSet),ids$probe_id),2]
exprSet[1:5,1:5]
write.csv(exprSet,"GSE112790_matrix.csv")
制作分组矩阵
sample <- pData$geo_accession
tissue <- rep(c("normal","tumor"),c(15,183))
design_df <- data.frame(sample,tissue)
制作差异比较矩阵
cont.matrix <- makeContrasts(
vs = TStumor-TSnormal,levels = design)
lmFit
TS <- tissue
TS <- factor(TS, levels = unique(TS))
design <- model.matrix(~0+TS)
fit <- lmFit(exprSet, design)
eBayes
fit2 <- contrasts.fit(fit, cont.matrix)
fit2 <- eBayes(fit2)
topTable
tempOutput = topTable(fit2, coef=1, n=Inf)
nrDEG = na.omit(tempOutput)
head(nrDEG)
write.csv(nrDEG,"limma_notrend.results.csv",quote = F)
results <- decideTests(fit2)
vennDiagram(results)