GEO数据下载
2019-03-26 本文已影响0人
白云梦_7
1.R包GEOquery进行下载整理GEO数据
根据文章找到所需的芯片信息GSE29250
library(GEOquery)
library(Biobase)
gse=getGEO("GSE29250",GSEMatrix = TRUE,destdir = ".",getGPL = T,AnnotGPL = T)#destdir设置当前目录,getGPL 和AnnotGPL都设置TRUE,可以下载和获得平台的注释文件
得到文件
GEO files
GPL file
GSE-GPL1file
GPLanno file
GSE GPL2 file
表达数据
exprs<-exprs(gse[[1]])
样品处理分组等信息
pdata<-pData(gse[[1]])
> head(pdata)
title geo_accession status submission_date last_update_date type
GSM723159 NSCLC 1 GSM723159 Public on Sep 16 2012 May 12 2011 Sep 16 2012 RNA
GSM723160 adjacent normal tissue 1 GSM723160 Public on Sep 16 2012 May 12 2011 Sep 16 2012 RNA
GSM723161 NSCLC 2 GSM723161 Public on Sep 16 2012 May 12 2011 Sep 16 2012 RNA
GSM723162 adjacent normal tissue 2 GSM723162 Public on Sep 16 2012 May 12 2011 Sep 16 2012 RNA
GSM723163 NSCLC 3 GSM723163 Public on Sep 16 2012 May 12 2011 Sep 16 2012 RNA
GSM723164 adjacent normal tissue 3 GSM723164 Public on Sep 16 2012 May 12 2011 Sep 16 2012 RNA
channel_count source_name_ch1 organism_ch1 characteristics_ch1 characteristics_ch1.1
GSM723159 1 lung Homo sapiens gender: male disease state: NSCLC
GSM723160 1 lung Homo sapiens gender: male disease state: NSCLC
GSM723161 1 lung Homo sapiens gender: male disease state: NSCLC
GSM723162 1 lung Homo sapiens gender: male disease state: NSCLC
GSM723163 1 lung Homo sapiens gender: male disease state: NSCLC
GSM723164 1 lung Homo sapiens gender: male disease state: NSCLC
characteristics_ch1.2 characteristics_ch1.3 characteristics_ch1.4 treatment_protocol_ch1
GSM723159 nsclc type: squamous tissue: cancer tnm: /4/1/0/ none
GSM723160 nsclc type: squamous tissue: adjacent normal tnm: /4/1/0/ none
GSM723161 nsclc type: squamous tissue: cancer tnm: /4/2/0/ none
GSM723162 nsclc type: squamous tissue: adjacent normal tnm: /4/2/0/ none
GSM723163 nsclc type: squamous tissue: cancer tnm: /4/0/0/ none
GSM723164 nsclc type: squamous tissue: adjacent normal tnm: /4/0/0/ none
growth_protocol_ch1 molecule_ch1
GSM723159 none total RNA
GSM723160 none total RNA
GSM723161 none total RNA
GSM723162 none total RNA
GSM723163 none total RNA
GSM723164 none total RNA
extract_protocol_ch1
GSM723159 RNA was extracted with Trizol reagent, followed by QIAGEN RNeasy mini kit in accordance with the prescribed protocol provided with the kit. Quality control was performed with 1% agrose electrophoresis .
GSM723160 RNA was extracted with Trizol reagent, followed by QIAGEN RNeasy mini kit in accordance with the prescribed protocol provided with the kit. Quality control was performed with 1% agrose electrophoresis .
GSM723161 RNA was extracted with Trizol reagent, followed by QIAGEN RNeasy mini kit in accordance with the prescribed protocol provided with the kit. Quality control was performed with 1% agrose electrophoresis .
GSM723162 RNA was extracted with Trizol reagent, followed by QIAGEN RNeasy mini kit in accordance with the prescribed protocol provided with the kit. Quality control was performed with 1% agrose electrophoresis .
GSM723163 RNA was extracted with Trizol reagent, followed by QIAGEN RNeasy mini kit in accordance with the prescribed protocol provided with the kit. Quality control was performed with 1% agrose electrophoresis .
GSM723164 RNA was extracted with Trizol reagent, followed by QIAGEN RNeasy mini kit in accordance with the prescribed protocol provided with the kit. Quality control was performed with 1% agrose electrophoresis .
label_ch1 label_protocol_ch1
GSM723159 biotin Biotinylated cRNA were prepared with the Ambion MessageAmp kit for Illumina arrays
GSM723160 biotin Biotinylated cRNA were prepared with the Ambion MessageAmp kit for Illumina arrays
GSM723161 biotin Biotinylated cRNA were prepared with the Ambion MessageAmp kit for Illumina arrays
GSM723162 biotin Biotinylated cRNA were prepared with the Ambion MessageAmp kit for Illumina arrays
GSM723163 biotin Biotinylated cRNA were prepared with the Ambion MessageAmp kit for Illumina arrays
GSM723164 biotin Biotinylated cRNA were prepared with the Ambion MessageAmp kit for Illumina arrays
taxid_ch1 hyb_protocol scan_protocol description
GSM723159 9606 Standard Illumina hybridization protocol Standard Illumina scanning protocol SAMPLE 1
GSM723160 9606 Standard Illumina hybridization protocol Standard Illumina scanning protocol SAMPLE 2
GSM723161 9606 Standard Illumina hybridization protocol Standard Illumina scanning protocol SAMPLE 3
GSM723162 9606 Standard Illumina hybridization protocol Standard Illumina scanning protocol SAMPLE 4
GSM723163 9606 Standard Illumina hybridization protocol Standard Illumina scanning protocol SAMPLE 5
GSM723164 9606 Standard Illumina hybridization protocol Standard Illumina scanning protocol SAMPLE 6
description.1
GSM723159 replicate 1
GSM723160 replicate 1
GSM723161 replicate 1
GSM723162 replicate 1
GSM723163 replicate 1
GSM723164 replicate 1
data_processing platform_id
GSM723159 The data were normalised using average normalisation with Illumina Genomestudio software GPL10558
GSM723160 The data were normalised using average normalisation with Illumina Genomestudio software GPL10558
GSM723161 The data were normalised using average normalisation with Illumina Genomestudio software GPL10558
GSM723162 The data were normalised using average normalisation with Illumina Genomestudio software GPL10558
GSM723163 The data were normalised using average normalisation with Illumina Genomestudio software GPL10558
GSM723164 The data were normalised using average normalisation with Illumina Genomestudio software GPL10558
contact_name contact_email contact_institute contact_address contact_city
GSM723159 lina,,ma malina209@163.com Zhoushan Hospital Renmin North Road No.238 Zhoushan
GSM723160 lina,,ma malina209@163.com Zhoushan Hospital Renmin North Road No.238 Zhoushan
GSM723161 lina,,ma malina209@163.com Zhoushan Hospital Renmin North Road No.238 Zhoushan
GSM723162 lina,,ma malina209@163.com Zhoushan Hospital Renmin North Road No.238 Zhoushan
GSM723163 lina,,ma malina209@163.com Zhoushan Hospital Renmin North Road No.238 Zhoushan
GSM723164 lina,,ma malina209@163.com Zhoushan Hospital Renmin North Road No.238 Zhoushan
contact_zip/postal_code contact_country supplementary_file data_row_count disease state:ch1
GSM723159 316004 China NONE 47225 NSCLC
GSM723160 316004 China NONE 47225 NSCLC
GSM723161 316004 China NONE 47225 NSCLC
GSM723162 316004 China NONE 47225 NSCLC
GSM723163 316004 China NONE 47225 NSCLC
GSM723164 316004 China NONE 47225 NSCLC
gender:ch1 nsclc type:ch1 tissue:ch1 tnm:ch1
GSM723159 male squamous cancer /4/1/0/
GSM723160 male squamous adjacent normal /4/1/0/
GSM723161 male squamous cancer /4/2/0/
GSM723162 male squamous adjacent normal /4/2/0/
GSM723163 male squamous cancer /4/0/0/
GSM723164 male squamous adjacent normal /4/0/0/
芯片平台的设计注释信息
> fdata<-fData(gse[[1]])
> head(fdata)
ID Gene title Gene symbol Gene ID UniGene title
ILMN_1343291 ILMN_1343291 eukaryotic translation elongation factor 1 alpha 1 EEF1A1 1915
ILMN_1343295 ILMN_1343295 glyceraldehyde-3-phosphate dehydrogenase GAPDH 2597
ILMN_1651199 ILMN_1651199 NA
ILMN_1651209 ILMN_1651209 solute carrier family 35 member E2 SLC35E2 9906
ILMN_1651210 ILMN_1651210 dual specificity phosphatase 22 DUSP22 56940
ILMN_1651221 ILMN_1651221 NA
UniGene symbol UniGene ID
ILMN_1343291
ILMN_1343295
ILMN_1651199
ILMN_1651209
ILMN_1651210
ILMN_1651221
Nucleotide Title
ILMN_1343291 Homo sapiens eukaryotic translation elongation factor 1 alpha 1 (EEF1A1), mRNA
ILMN_1343295 Homo sapiens glyceraldehyde-3-phosphate dehydrogenase (GAPDH), transcript variant 1, mRNA
ILMN_1651199
ILMN_1651209 Homo sapiens solute carrier family 35 member E2 (SLC35E2), transcript variant 1, mRNA
ILMN_1651210 Homo sapiens dual specificity phosphatase 22 (DUSP22), transcript variant 2, mRNA
ILMN_1651221
GI GenBank Accession Platform_CLONEID Platform_ORF Platform_SPOTID Chromosome location
ILMN_1343291 83367078 NM_001402 6q14.1
ILMN_1343295 576583510 NM_002046 12p13
ILMN_1651199 NA
ILMN_1651209 315139027 NM_182838 1p36.33
ILMN_1651210 557440873 NM_020185 6p25.3
ILMN_1651221 NA
Chromosome annotation
ILMN_1343291 Chromosome 6, NC_000006.12 (73515750..73521032, complement)
ILMN_1343295 Chromosome 12, NC_000012.12 (6534405..6538375)
ILMN_1651199
ILMN_1651209 Chromosome 1, NC_000001.11 (1724838..1745999, complement)
ILMN_1651210 Chromosome 6, NC_000006.12 (292057..351355)
ILMN_1651221
GO:Function
ILMN_1343291 GTP binding///GTPase activity///poly(A) RNA binding///protein binding///protein kinase binding///tRNA binding///translation elongation factor activity
ILMN_1343295 NAD binding///NADP binding///glyceraldehyde-3-phosphate dehydrogenase (NAD+) (phosphorylating) activity///glyceraldehyde-3-phosphate dehydrogenase (NAD+) (phosphorylating) activity///glyceraldehyde-3-phosphate dehydrogenase (NAD+) (phosphorylating) activity///identical protein binding///microtubule binding///peptidyl-cysteine S-nitrosylase activity///protein binding
ILMN_1651199
ILMN_1651209
ILMN_1651210 protein tyrosine phosphatase activity///protein tyrosine/serine/threonine phosphatase activity
ILMN_1651221
GO:Process
ILMN_1343291 cellular response to epidermal growth factor stimulus///regulation of chaperone-mediated autophagy///regulation of transcription, DNA-templated///transcription, DNA-templated///translational elongation
ILMN_1343295 canonical glycolysis///cellular response to interferon-gamma///gluconeogenesis///microtubule cytoskeleton organization///negative regulation of translation///negative regulation of translation///neuron apoptotic process///peptidyl-cysteine S-trans-nitrosylation///protein stabilization///regulation of macroautophagy
ILMN_1651199
ILMN_1651209
ILMN_1651210 apoptotic process///cell proliferation///inactivation of MAPK activity///multicellular organism development///negative regulation of T cell activation///negative regulation of T cell mediated immunity///negative regulation of T cell receptor signaling pathway///negative regulation of transcription from RNA polymerase II promoter///peptidyl-tyrosine dephosphorylation///positive regulation of JNK cascade///protein dephosphorylation///regulation of cell proliferation///transforming growth factor beta receptor signaling pathway
ILMN_1651221
GO:Component
ILMN_1343291 cortical actin cytoskeleton///cytoplasm///cytoplasm///cytoplasm///cytoplasmic side of lysosomal membrane///cytosol///cytosol///eukaryotic translation elongation factor 1 complex///extracellular exosome///extracellular space///membrane///nucleolus///nucleus///ruffle membrane
ILMN_1343295 GAIT complex///cytoplasm///cytoplasm///cytosol///cytosol///cytosol///extracellular exosome///extracellular matrix///intracellular membrane-bounded organelle///intracellular ribonucleoprotein complex///lipid particle///membrane///microtubule cytoskeleton///nuclear membrane///nucleus///nucleus///perinuclear region of cytoplasm///plasma membrane///vesicle
ILMN_1651199
ILMN_1651209 integral component of membrane
ILMN_1651210 cytoplasm///nucleus
ILMN_1651221
GO:Function ID
ILMN_1343291 GO:0005525///GO:0003924///GO:0044822///GO:0005515///GO:0019901///GO:0000049///GO:0003746
ILMN_1343295 GO:0051287///GO:0050661///GO:0004365///GO:0004365///GO:0004365///GO:0042802///GO:0008017///GO:0035605///GO:0005515
ILMN_1651199
ILMN_1651209
ILMN_1651210 GO:0004725///GO:0008138
ILMN_1651221
GO:Process ID
ILMN_1343291 GO:0071364///GO:1904714///GO:0006355///GO:0006351///GO:0006414
ILMN_1343295 GO:0061621///GO:0071346///GO:0006094///GO:0000226///GO:0017148///GO:0017148///GO:0051402///GO:0035606///GO:0050821///GO:0016241
ILMN_1651199
ILMN_1651209
ILMN_1651210 GO:0006915///GO:0008283///GO:0000188///GO:0007275///GO:0050868///GO:0002710///GO:0050860///GO:0000122///GO:0035335///GO:0046330///GO:0006470///GO:0042127///GO:0007179
ILMN_1651221
GO:Component ID
ILMN_1343291 GO:0030864///GO:0005737///GO:0005737///GO:0005737///GO:0098574///GO:0005829///GO:0005829///GO:0005853///GO:0070062///GO:0005615///GO:0016020///GO:0005730///GO:0005634///GO:0032587
ILMN_1343295 GO:0097452///GO:0005737///GO:0005737///GO:0005829///GO:0005829///GO:0005829///GO:0070062///GO:0031012///GO:0043231///GO:0030529///GO:0005811///GO:0016020///GO:0015630///GO:0031965///GO:0005634///GO:0005634///GO:0048471///GO:0005886///GO:0031982
ILMN_1651199
ILMN_1651209 GO:0016021
ILMN_1651210 GO:0005737///GO:0005634
ILMN_1651221
Platform_SEQUENCE
ILMN_1343291 TGTGTTGAGAGCTTCTCAGACTATCCACCTTTGGGTCGCTTTGCTGTTCG
ILMN_1343295 CTTCAACAGCGACACCCACTCCTCCACCTTTGACGCTGGGGCTGGCATTG
ILMN_1651199 ATGCGAGGCCCCAGGGTTCGGCCCCGCAGCGCCGCTGAGTCCAAGGACCG
ILMN_1651209 TCACGGCGTACGCCCTCATGGGGAAAATCTCCCCGGTGACTTTCAGGTCC
ILMN_1651210 TGTGGACATGAGAGTTAGTTCTGTTTTGCCTGCACGGTGGGAGCGGCGTA
ILMN_1651221 GCCGCCCCCTGCTTCACGGAGCCTGGTCCCATCAACCGCCGAAGGGCTGA
result22.直接下载
RAWdata
GEO自带差异分析:GEO2R
GEO2R-1点击
GEO2R-2选择平台,设置分组
GEO2R-3Top250/see all
result1:第一列是有差异的ID,点击可看到具体表达情况
result1
# Version info: R 3.2.3, Biobase 2.30.0, GEOquery 2.40.0, limma 3.26.8
# R scripts generated Sat Sep 29 03:49:34 EDT 2018
################################################################
# Differential expression analysis with limma
library(Biobase)
library(GEOquery)
library(limma)
# load series and platform data from GEO
gset <- getGEO("GSE29250", GSEMatrix =TRUE, AnnotGPL=FALSE)
if (length(gset) > 1) idx <- grep("GPL8179", attr(gset, "names")) else idx <- 1
gset <- gset[[idx]]
# make proper column names to match toptable
fvarLabels(gset) <- make.names(fvarLabels(gset))
# group names for all samples
gsms <- "010101010101"
sml <- c()
for (i in 1:nchar(gsms)) { sml[i] <- substr(gsms,i,i) }
# log2 transform
ex <- exprs(gset)
qx <- as.numeric(quantile(ex, c(0., 0.25, 0.5, 0.75, 0.99, 1.0), na.rm=T))
LogC <- (qx[5] > 100) ||
(qx[6]-qx[1] > 50 && qx[2] > 0) ||
(qx[2] > 0 && qx[2] < 1 && qx[4] > 1 && qx[4] < 2)
if (LogC) { ex[which(ex <= 0)] <- NaN
exprs(gset) <- log2(ex) }
# set up the data and proceed with analysis
sml <- paste("G", sml, sep="") # set group names
fl <- as.factor(sml)
gset$description <- fl
design <- model.matrix(~ description + 0, gset)
colnames(design) <- levels(fl)
fit <- lmFit(gset, design)
cont.matrix <- makeContrasts(G1-G0, levels=design)
fit2 <- contrasts.fit(fit, cont.matrix)
fit2 <- eBayes(fit2, 0.01)
tT <- topTable(fit2, adjust="fdr", sort.by="B", number=250)
tT <- subset(tT, select=c("ID","adj.P.Val","P.Value","t","B","logFC","SEQUENCE","miRNA_ID","SPOT_ID"))
write.table(tT, file=stdout(), row.names=F, sep="\t")
################################################################
# Boxplot for selected GEO samples
library(Biobase)
library(GEOquery)
# load series and platform data from GEO
gset <- getGEO("GSE29250", GSEMatrix =TRUE, getGPL=FALSE)
if (length(gset) > 1) idx <- grep("GPL8179", attr(gset, "names")) else idx <- 1
gset <- gset[[idx]]
# group names for all samples in a series
gsms <- "010101010101"
sml <- c()
for (i in 1:nchar(gsms)) { sml[i] <- substr(gsms,i,i) }
sml <- paste("G", sml, sep="") set group names
# order samples by group
ex <- exprs(gset)[ , order(sml)]
sml <- sml[order(sml)]
fl <- as.factor(sml)
labels <- c("test","control")
# set parameters and draw the plot
palette(c("#f4dfdf","#dfeaf4", "#AABBCC"))
dev.new(width=4+dim(gset)[[2]]/5, height=6)
par(mar=c(2+round(max(nchar(sampleNames(gset)))/2),4,2,1))
title <- paste ("GSE29250", '/', annotation(gset), " selected samples", sep ='')
boxplot(ex, boxwex=0.6, notch=T, main=title, outline=FALSE, las=2, col=fl)
legend("topleft", labels, fill=palette(), bty="n")