鸡易呕

2018-12-20日总结

2018-12-23  本文已影响37人  小梦游仙境

GEO数据库基础知识

三篇老大参考链接

1.https://mp.weixin.qq.com/s?__biz=MzAxMDkxODM1Ng==&mid=2247486063&idx=1&sn=156bee5397e979722b36b78284188538&chksm=9b484ad4ac3fc3c2d025b9e4bb1c3c8392839c08d84697754d7d95d041b539479a45f19cf5d5&scene=21#wechat_redirect

2.http://www.bio-info-trainee.com/bioconductor_China/software/GEOquery.html

3.http://www.bio-info-trainee.com/1085.html

GEO包下载:

source("http://www.bioconductor.org/biocLite.R")
biocLite("GEOquery")
library(GEOquery)
options(warn=-1)
suppressMessages(library(GEOquery))
gds858 <- getGEO('GDS858', destdir=".")
names(Meta(gds858))
Table(gds858)[1:5,1:5]
image
library(GEOquery)
if(!file.exists(f)){
  gset <- getGEO('GSE76275', destdir=".",
                 AnnotGPL = F,     ## 注释文件
                 getGPL = F)       ## 平台文件
  save(gset,file=f)                ## 保存到本地
}
load('GSE76275_eSet.Rdata')        ## 载入数据
class(gset)
length(gset)
class(gset[[1]])
a=gset[[1]] ## 降级提取a
dat=exprs(a)  ## 获取表达矩阵
dim(dat)
dat[1:4,1:4]
pd=pData(a) 使用函数?pData获取样本临床信息(如性别、年龄、肿瘤分期等等)
trait=pd[,51:53]
head(trait)
trait$T=substring(trait[,2],2,2)
trait$N=substring(trait[,2],4,4)
trait$M=substring(trait[,2],6,6)
colnames(trait)=c('age','tmn','bmi','T','M','N')
head(trait)
save(trait,file='trait.Rdata')

group_list = ifelse(pd$characteristics_ch1.1=='triple-negative status: not TN',
   'noTNBC','TNBC')
table(group_list)
save(dat,group_list,file = 'step1-output.Rdata')

dat

image-20181221092415595

dat[1:4;1:4]

image

trait=[ ,51:53]

image

head(trait)

image
trait=pd[,51:53]
head(trait)
trait$T=substring(trait[,2],2,2)
trait$N=substring(trait[,2],4,4)
trait$M=substring(trait[,2],6,6)
colnames(trait)=c('age','tmn','bmi','T','M','N')
head(trait)
save(trait,file='trait.Rdata')
image
group_list = ifelse(pd$characteristics_ch1.1=='triple-negative status: not TN','noTNBC','TNBC')
table(group_list)
image
save(dat,group_list,file = 'step1-output.Rdata')

以上第一步结束了,生成“step-output.Rdata文件”

上一篇下一篇

猜你喜欢

热点阅读