生物信息分析

GEO数据下载

2019-03-26  本文已影响0人  白云梦_7

1.R包GEOquery进行下载整理GEO数据

根据文章找到所需的芯片信息GSE29250

library(GEOquery)
library(Biobase)
gse=getGEO("GSE29250",GSEMatrix = TRUE,destdir = ".",getGPL = T,AnnotGPL = T)#destdir设置当前目录,getGPL 和AnnotGPL都设置TRUE,可以下载和获得平台的注释文件

得到文件


GEO files
GPL file
GSE-GPL1file
GPLanno file
GSE GPL2 file

表达数据
exprs<-exprs(gse[[1]])

expr
样品处理分组等信息
pdata<-pData(gse[[1]])
> head(pdata)
                             title geo_accession                status submission_date last_update_date type
GSM723159                  NSCLC 1     GSM723159 Public on Sep 16 2012     May 12 2011      Sep 16 2012  RNA
GSM723160 adjacent normal tissue 1     GSM723160 Public on Sep 16 2012     May 12 2011      Sep 16 2012  RNA
GSM723161                  NSCLC 2     GSM723161 Public on Sep 16 2012     May 12 2011      Sep 16 2012  RNA
GSM723162 adjacent normal tissue 2     GSM723162 Public on Sep 16 2012     May 12 2011      Sep 16 2012  RNA
GSM723163                  NSCLC 3     GSM723163 Public on Sep 16 2012     May 12 2011      Sep 16 2012  RNA
GSM723164 adjacent normal tissue 3     GSM723164 Public on Sep 16 2012     May 12 2011      Sep 16 2012  RNA
          channel_count source_name_ch1 organism_ch1 characteristics_ch1 characteristics_ch1.1
GSM723159             1            lung Homo sapiens        gender: male  disease state: NSCLC
GSM723160             1            lung Homo sapiens        gender: male  disease state: NSCLC
GSM723161             1            lung Homo sapiens        gender: male  disease state: NSCLC
GSM723162             1            lung Homo sapiens        gender: male  disease state: NSCLC
GSM723163             1            lung Homo sapiens        gender: male  disease state: NSCLC
GSM723164             1            lung Homo sapiens        gender: male  disease state: NSCLC
          characteristics_ch1.2   characteristics_ch1.3 characteristics_ch1.4 treatment_protocol_ch1
GSM723159  nsclc type: squamous          tissue: cancer          tnm: /4/1/0/                   none
GSM723160  nsclc type: squamous tissue: adjacent normal          tnm: /4/1/0/                   none
GSM723161  nsclc type: squamous          tissue: cancer          tnm: /4/2/0/                   none
GSM723162  nsclc type: squamous tissue: adjacent normal          tnm: /4/2/0/                   none
GSM723163  nsclc type: squamous          tissue: cancer          tnm: /4/0/0/                   none
GSM723164  nsclc type: squamous tissue: adjacent normal          tnm: /4/0/0/                   none
          growth_protocol_ch1 molecule_ch1
GSM723159                none    total RNA
GSM723160                none    total RNA
GSM723161                none    total RNA
GSM723162                none    total RNA
GSM723163                none    total RNA
GSM723164                none    total RNA
                                                                                                                                                                                                  extract_protocol_ch1
GSM723159 RNA was extracted with Trizol reagent, followed by QIAGEN RNeasy mini kit in accordance with the prescribed protocol provided with the kit. Quality control was performed with 1% agrose electrophoresis   .
GSM723160 RNA was extracted with Trizol reagent, followed by QIAGEN RNeasy mini kit in accordance with the prescribed protocol provided with the kit. Quality control was performed with 1% agrose electrophoresis   .
GSM723161 RNA was extracted with Trizol reagent, followed by QIAGEN RNeasy mini kit in accordance with the prescribed protocol provided with the kit. Quality control was performed with 1% agrose electrophoresis   .
GSM723162 RNA was extracted with Trizol reagent, followed by QIAGEN RNeasy mini kit in accordance with the prescribed protocol provided with the kit. Quality control was performed with 1% agrose electrophoresis   .
GSM723163 RNA was extracted with Trizol reagent, followed by QIAGEN RNeasy mini kit in accordance with the prescribed protocol provided with the kit. Quality control was performed with 1% agrose electrophoresis   .
GSM723164 RNA was extracted with Trizol reagent, followed by QIAGEN RNeasy mini kit in accordance with the prescribed protocol provided with the kit. Quality control was performed with 1% agrose electrophoresis   .
          label_ch1                                                                 label_protocol_ch1
GSM723159    biotin Biotinylated cRNA were prepared with the Ambion MessageAmp kit for Illumina arrays
GSM723160    biotin Biotinylated cRNA were prepared with the Ambion MessageAmp kit for Illumina arrays
GSM723161    biotin Biotinylated cRNA were prepared with the Ambion MessageAmp kit for Illumina arrays
GSM723162    biotin Biotinylated cRNA were prepared with the Ambion MessageAmp kit for Illumina arrays
GSM723163    biotin Biotinylated cRNA were prepared with the Ambion MessageAmp kit for Illumina arrays
GSM723164    biotin Biotinylated cRNA were prepared with the Ambion MessageAmp kit for Illumina arrays
          taxid_ch1                             hyb_protocol                       scan_protocol description
GSM723159      9606 Standard Illumina hybridization protocol Standard Illumina scanning protocol    SAMPLE 1
GSM723160      9606 Standard Illumina hybridization protocol Standard Illumina scanning protocol    SAMPLE 2
GSM723161      9606 Standard Illumina hybridization protocol Standard Illumina scanning protocol    SAMPLE 3
GSM723162      9606 Standard Illumina hybridization protocol Standard Illumina scanning protocol    SAMPLE 4
GSM723163      9606 Standard Illumina hybridization protocol Standard Illumina scanning protocol    SAMPLE 5
GSM723164      9606 Standard Illumina hybridization protocol Standard Illumina scanning protocol    SAMPLE 6
          description.1
GSM723159   replicate 1
GSM723160   replicate 1
GSM723161   replicate 1
GSM723162   replicate 1
GSM723163   replicate 1
GSM723164   replicate 1
                                                                                   data_processing platform_id
GSM723159 The data were normalised using average normalisation with Illumina Genomestudio software    GPL10558
GSM723160 The data were normalised using average normalisation with Illumina Genomestudio software    GPL10558
GSM723161 The data were normalised using average normalisation with Illumina Genomestudio software    GPL10558
GSM723162 The data were normalised using average normalisation with Illumina Genomestudio software    GPL10558
GSM723163 The data were normalised using average normalisation with Illumina Genomestudio software    GPL10558
GSM723164 The data were normalised using average normalisation with Illumina Genomestudio software    GPL10558
          contact_name     contact_email contact_institute          contact_address contact_city
GSM723159     lina,,ma malina209@163.com Zhoushan Hospital Renmin North Road No.238     Zhoushan
GSM723160     lina,,ma malina209@163.com Zhoushan Hospital Renmin North Road No.238     Zhoushan
GSM723161     lina,,ma malina209@163.com Zhoushan Hospital Renmin North Road No.238     Zhoushan
GSM723162     lina,,ma malina209@163.com Zhoushan Hospital Renmin North Road No.238     Zhoushan
GSM723163     lina,,ma malina209@163.com Zhoushan Hospital Renmin North Road No.238     Zhoushan
GSM723164     lina,,ma malina209@163.com Zhoushan Hospital Renmin North Road No.238     Zhoushan
          contact_zip/postal_code contact_country supplementary_file data_row_count disease state:ch1
GSM723159                  316004           China               NONE          47225             NSCLC
GSM723160                  316004           China               NONE          47225             NSCLC
GSM723161                  316004           China               NONE          47225             NSCLC
GSM723162                  316004           China               NONE          47225             NSCLC
GSM723163                  316004           China               NONE          47225             NSCLC
GSM723164                  316004           China               NONE          47225             NSCLC
          gender:ch1 nsclc type:ch1      tissue:ch1 tnm:ch1
GSM723159       male       squamous          cancer /4/1/0/
GSM723160       male       squamous adjacent normal /4/1/0/
GSM723161       male       squamous          cancer /4/2/0/
GSM723162       male       squamous adjacent normal /4/2/0/
GSM723163       male       squamous          cancer /4/0/0/
GSM723164       male       squamous adjacent normal /4/0/0/

芯片平台的设计注释信息

> fdata<-fData(gse[[1]])
> head(fdata)
                       ID                                         Gene title Gene symbol Gene ID UniGene title
ILMN_1343291 ILMN_1343291 eukaryotic translation elongation factor 1 alpha 1      EEF1A1    1915              
ILMN_1343295 ILMN_1343295           glyceraldehyde-3-phosphate dehydrogenase       GAPDH    2597              
ILMN_1651199 ILMN_1651199                                                                     NA              
ILMN_1651209 ILMN_1651209                 solute carrier family 35 member E2     SLC35E2    9906              
ILMN_1651210 ILMN_1651210                    dual specificity phosphatase 22      DUSP22   56940              
ILMN_1651221 ILMN_1651221                                                                     NA              
             UniGene symbol UniGene ID
ILMN_1343291                          
ILMN_1343295                          
ILMN_1651199                          
ILMN_1651209                          
ILMN_1651210                          
ILMN_1651221                          
                                                                                      Nucleotide Title
ILMN_1343291            Homo sapiens eukaryotic translation elongation factor 1 alpha 1 (EEF1A1), mRNA
ILMN_1343295 Homo sapiens glyceraldehyde-3-phosphate dehydrogenase (GAPDH), transcript variant 1, mRNA
ILMN_1651199                                                                                          
ILMN_1651209     Homo sapiens solute carrier family 35 member E2 (SLC35E2), transcript variant 1, mRNA
ILMN_1651210         Homo sapiens dual specificity phosphatase 22 (DUSP22), transcript variant 2, mRNA
ILMN_1651221                                                                                          
                    GI GenBank Accession Platform_CLONEID Platform_ORF Platform_SPOTID Chromosome location
ILMN_1343291  83367078         NM_001402                                                            6q14.1
ILMN_1343295 576583510         NM_002046                                                             12p13
ILMN_1651199        NA                                                                                    
ILMN_1651209 315139027         NM_182838                                                           1p36.33
ILMN_1651210 557440873         NM_020185                                                            6p25.3
ILMN_1651221        NA                                                                                    
                                                   Chromosome annotation
ILMN_1343291 Chromosome 6, NC_000006.12 (73515750..73521032, complement)
ILMN_1343295              Chromosome 12, NC_000012.12 (6534405..6538375)
ILMN_1651199                                                            
ILMN_1651209   Chromosome 1, NC_000001.11 (1724838..1745999, complement)
ILMN_1651210                 Chromosome 6, NC_000006.12 (292057..351355)
ILMN_1651221                                                            
                                                                                                                                                                                                                                                                                                                                                                                  GO:Function
ILMN_1343291                                                                                                                                                                                                                           GTP binding///GTPase activity///poly(A) RNA binding///protein binding///protein kinase binding///tRNA binding///translation elongation factor activity
ILMN_1343295 NAD binding///NADP binding///glyceraldehyde-3-phosphate dehydrogenase (NAD+) (phosphorylating) activity///glyceraldehyde-3-phosphate dehydrogenase (NAD+) (phosphorylating) activity///glyceraldehyde-3-phosphate dehydrogenase (NAD+) (phosphorylating) activity///identical protein binding///microtubule binding///peptidyl-cysteine S-nitrosylase activity///protein binding
ILMN_1651199                                                                                                                                                                                                                                                                                                                                                                                 
ILMN_1651209                                                                                                                                                                                                                                                                                                                                                                                 
ILMN_1651210                                                                                                                                                                                                                                                                                   protein tyrosine phosphatase activity///protein tyrosine/serine/threonine phosphatase activity
ILMN_1651221                                                                                                                                                                                                                                                                                                                                                                                 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    GO:Process
ILMN_1343291                                                                                                                                                                                                                                                                                                                                         cellular response to epidermal growth factor stimulus///regulation of chaperone-mediated autophagy///regulation of transcription, DNA-templated///transcription, DNA-templated///translational elongation
ILMN_1343295                                                                                                                                                                                                                      canonical glycolysis///cellular response to interferon-gamma///gluconeogenesis///microtubule cytoskeleton organization///negative regulation of translation///negative regulation of translation///neuron apoptotic process///peptidyl-cysteine S-trans-nitrosylation///protein stabilization///regulation of macroautophagy
ILMN_1651199                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  
ILMN_1651209                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  
ILMN_1651210 apoptotic process///cell proliferation///inactivation of MAPK activity///multicellular organism development///negative regulation of T cell activation///negative regulation of T cell mediated immunity///negative regulation of T cell receptor signaling pathway///negative regulation of transcription from RNA polymerase II promoter///peptidyl-tyrosine dephosphorylation///positive regulation of JNK cascade///protein dephosphorylation///regulation of cell proliferation///transforming growth factor beta receptor signaling pathway
ILMN_1651221                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  
                                                                                                                                                                                                                                                                                                                                                                   GO:Component
ILMN_1343291                                                                                 cortical actin cytoskeleton///cytoplasm///cytoplasm///cytoplasm///cytoplasmic side of lysosomal membrane///cytosol///cytosol///eukaryotic translation elongation factor 1 complex///extracellular exosome///extracellular space///membrane///nucleolus///nucleus///ruffle membrane
ILMN_1343295 GAIT complex///cytoplasm///cytoplasm///cytosol///cytosol///cytosol///extracellular exosome///extracellular matrix///intracellular membrane-bounded organelle///intracellular ribonucleoprotein complex///lipid particle///membrane///microtubule cytoskeleton///nuclear membrane///nucleus///nucleus///perinuclear region of cytoplasm///plasma membrane///vesicle
ILMN_1651199                                                                                                                                                                                                                                                                                                                                                                   
ILMN_1651209                                                                                                                                                                                                                                                                                                                                     integral component of membrane
ILMN_1651210                                                                                                                                                                                                                                                                                                                                                cytoplasm///nucleus
ILMN_1651221                                                                                                                                                                                                                                                                                                                                                                   
                                                                                                                 GO:Function ID
ILMN_1343291                           GO:0005525///GO:0003924///GO:0044822///GO:0005515///GO:0019901///GO:0000049///GO:0003746
ILMN_1343295 GO:0051287///GO:0050661///GO:0004365///GO:0004365///GO:0004365///GO:0042802///GO:0008017///GO:0035605///GO:0005515
ILMN_1651199                                                                                                                   
ILMN_1651209                                                                                                                   
ILMN_1651210                                                                                            GO:0004725///GO:0008138
ILMN_1651221                                                                                                                   
                                                                                                                                                                      GO:Process ID
ILMN_1343291                                                                                                         GO:0071364///GO:1904714///GO:0006355///GO:0006351///GO:0006414
ILMN_1343295                                        GO:0061621///GO:0071346///GO:0006094///GO:0000226///GO:0017148///GO:0017148///GO:0051402///GO:0035606///GO:0050821///GO:0016241
ILMN_1651199                                                                                                                                                                       
ILMN_1651209                                                                                                                                                                       
ILMN_1651210 GO:0006915///GO:0008283///GO:0000188///GO:0007275///GO:0050868///GO:0002710///GO:0050860///GO:0000122///GO:0035335///GO:0046330///GO:0006470///GO:0042127///GO:0007179
ILMN_1651221                                                                                                                                                                       
                                                                                                                                                                                                                                                  GO:Component ID
ILMN_1343291                                                                  GO:0030864///GO:0005737///GO:0005737///GO:0005737///GO:0098574///GO:0005829///GO:0005829///GO:0005853///GO:0070062///GO:0005615///GO:0016020///GO:0005730///GO:0005634///GO:0032587
ILMN_1343295 GO:0097452///GO:0005737///GO:0005737///GO:0005829///GO:0005829///GO:0005829///GO:0070062///GO:0031012///GO:0043231///GO:0030529///GO:0005811///GO:0016020///GO:0015630///GO:0031965///GO:0005634///GO:0005634///GO:0048471///GO:0005886///GO:0031982
ILMN_1651199                                                                                                                                                                                                                                                     
ILMN_1651209                                                                                                                                                                                                                                           GO:0016021
ILMN_1651210                                                                                                                                                                                                                              GO:0005737///GO:0005634
ILMN_1651221                                                                                                                                                                                                                                                     
                                              Platform_SEQUENCE
ILMN_1343291 TGTGTTGAGAGCTTCTCAGACTATCCACCTTTGGGTCGCTTTGCTGTTCG
ILMN_1343295 CTTCAACAGCGACACCCACTCCTCCACCTTTGACGCTGGGGCTGGCATTG
ILMN_1651199 ATGCGAGGCCCCAGGGTTCGGCCCCGCAGCGCCGCTGAGTCCAAGGACCG
ILMN_1651209 TCACGGCGTACGCCCTCATGGGGAAAATCTCCCCGGTGACTTTCAGGTCC
ILMN_1651210 TGTGGACATGAGAGTTAGTTCTGTTTTGCCTGCACGGTGGGAGCGGCGTA
ILMN_1651221 GCCGCCCCCTGCTTCACGGAGCCTGGTCCCATCAACCGCCGAAGGGCTGA

2.直接下载


RAWdata

GEO自带差异分析:GEO2R


GEO2R-1点击
GEO2R-2选择平台,设置分组
GEO2R-3Top250/see all
result1:第一列是有差异的ID,点击可看到具体表达情况
result1
result2
# Version info: R 3.2.3, Biobase 2.30.0, GEOquery 2.40.0, limma 3.26.8
# R scripts generated  Sat Sep 29 03:49:34 EDT 2018

################################################################
#   Differential expression analysis with limma
library(Biobase)
library(GEOquery)
library(limma)

# load series and platform data from GEO

gset <- getGEO("GSE29250", GSEMatrix =TRUE, AnnotGPL=FALSE)
if (length(gset) > 1) idx <- grep("GPL8179", attr(gset, "names")) else idx <- 1
gset <- gset[[idx]]

# make proper column names to match toptable 
fvarLabels(gset) <- make.names(fvarLabels(gset))

# group names for all samples
gsms <- "010101010101"
sml <- c()
for (i in 1:nchar(gsms)) { sml[i] <- substr(gsms,i,i) }

# log2 transform
ex <- exprs(gset)
qx <- as.numeric(quantile(ex, c(0., 0.25, 0.5, 0.75, 0.99, 1.0), na.rm=T))
LogC <- (qx[5] > 100) ||
          (qx[6]-qx[1] > 50 && qx[2] > 0) ||
          (qx[2] > 0 && qx[2] < 1 && qx[4] > 1 && qx[4] < 2)
if (LogC) { ex[which(ex <= 0)] <- NaN
  exprs(gset) <- log2(ex) }

# set up the data and proceed with analysis
sml <- paste("G", sml, sep="")    # set group names
fl <- as.factor(sml)
gset$description <- fl
design <- model.matrix(~ description + 0, gset)
colnames(design) <- levels(fl)
fit <- lmFit(gset, design)
cont.matrix <- makeContrasts(G1-G0, levels=design)
fit2 <- contrasts.fit(fit, cont.matrix)
fit2 <- eBayes(fit2, 0.01)
tT <- topTable(fit2, adjust="fdr", sort.by="B", number=250)

tT <- subset(tT, select=c("ID","adj.P.Val","P.Value","t","B","logFC","SEQUENCE","miRNA_ID","SPOT_ID"))
write.table(tT, file=stdout(), row.names=F, sep="\t")


################################################################
#   Boxplot for selected GEO samples
library(Biobase)
library(GEOquery)

# load series and platform data from GEO

gset <- getGEO("GSE29250", GSEMatrix =TRUE, getGPL=FALSE)
if (length(gset) > 1) idx <- grep("GPL8179", attr(gset, "names")) else idx <- 1
gset <- gset[[idx]]

# group names for all samples in a series
gsms <- "010101010101"
sml <- c()
for (i in 1:nchar(gsms)) { sml[i] <- substr(gsms,i,i) }
sml <- paste("G", sml, sep="")  set group names

# order samples by group
ex <- exprs(gset)[ , order(sml)]
sml <- sml[order(sml)]
fl <- as.factor(sml)
labels <- c("test","control")

# set parameters and draw the plot
palette(c("#f4dfdf","#dfeaf4", "#AABBCC"))
dev.new(width=4+dim(gset)[[2]]/5, height=6)
par(mar=c(2+round(max(nchar(sampleNames(gset)))/2),4,2,1))
title <- paste ("GSE29250", '/', annotation(gset), " selected samples", sep ='')
boxplot(ex, boxwex=0.6, notch=T, main=title, outline=FALSE, las=2, col=fl)
legend("topleft", labels, fill=palette(), bty="n")
上一篇下一篇

猜你喜欢

热点阅读