生物信息学

Q:ArrayExpress包常用函数及分析?

2021-04-12  本文已影响0人  高大石头

GEO数据库对应的R包是GEOquery,强大的ArrayExpress数据库也有对应的R包——ArrayExpress
可以通过其访问ArrayExpress数据库,并构建ExpressonSet、AffyBatch和NChannelSet数据结构。

核心函数:

  • getAE(accession, type="full")

  • ae2bioc(rawset)

对于处理过的数据:
cnames <- getcolproc(mexp1422)
mexp1422proc <- procset(mexp1422,cnames[2])

1.数据下载

rm(list = ls())
library(ArrayExpress)
library(affy)
acs="E-MEXP-1422"
mexp1422 <- getAE(acs,type = "full", local =T ) 
#type = "raw" 下载raw data
#type = "processed" 下载processed data
#type = "full" 两者都下载
# local = T 当前目录下有,不用再次下载

mexp1422raw <- ae2bioc(mexp1422) #从raw data构建ExpressionSet对象
## Reading in : E:/panCancer/27-GIlnc/AF15.CEL
## Reading in : E:/panCancer/27-GIlnc/AF16.CEL
## Reading in : E:/panCancer/27-GIlnc/AF6.CEL
## Reading in : E:/panCancer/27-GIlnc/AF14.CEL
## Reading in : E:/panCancer/27-GIlnc/AF7.CEL
## Reading in : E:/panCancer/27-GIlnc/AF8.CEL
cnames <- getcolproc(mexp1422)
mexp1422proc <- procset(mexp1422,cnames[2]) #从processed data构建ExpressionSet对象
## Reading processed data matrix file,  E:/panCancer/27-GIlnc/expression_and_calls.txt.magetab

2.数据预处理

mexp1422norm <- rma(mexp1422raw) #背景校正归一化
## Background correcting
## Normalizing
## Calculating Expression
boxplot(mexp1422norm)
image.png

可以看出校正后效果挺好

mexp1422norm1 <- exprs(mexp1422proc)
boxplot(mexp1422norm1) #可以看出processed data是归一化过的数据
image.png

3.差异分析

pd <- pData(mexp1422raw)
pd <- pd %>% 
  select("Factor.Value..RNAi.")
colnames(pd) <- "group"
pd$group <- case_when(pd$group=="GFP siRNA"~"ctrl",
                      pd$group=="PROX1 siRNA #1"~"siRNA1",
                      pd$group=="PROX1 siRNA #2"~"siRNA2")
group_list <- factor(pd$group,levels = c("ctrl","siRNA1","siRNA2"))
names(group_list) <- rownames(group_list)
design <- model.matrix(~0+group_list)
colnames(design) <- levels(group_list)
library(limma)
cont.matrix <- makeContrasts(si1.normal="siRNA1-ctrl",
                             si2.normal="siRNA2-ctrl",
                             si2.si1="siRNA2-siRNA1",
                             levels = design)
fit <- lmFit(mexp1422norm,design)
fit2 <- contrasts.fit(fit,cont.matrix)
fit2 <- eBayes(fit2)
deg <- topTable(fit2,adjust.method = "BH",sort.by = "F",n=Inf) %>% 
  rownames_to_column("probe_id")

# 探针注释
library(AnnoProbe)
probe2id <- idmap("GPL570")

volcanoInput <- probe2id %>% 
  inner_join(deg,by = "probe_id") %>% 
  select(-probe_id)

4. 火山图

volcanoInput$type <- case_when(volcanoInput$adj.P.Val<0.05&volcanoInput$si2.normal>1 ~ "up",
                               volcanoInput$adj.P.Val<0.05&volcanoInput$si2.normal< -1 ~ "down",
                               T~"stable")
ggplot(volcanoInput,aes(x=si2.normal,y=-log10(adj.P.Val),color=type))+
  geom_point(alpha=0.4,size=3.5)+
  scale_color_manual(values=c("#546de5", "#d2dae2","#ff4757"))+
  geom_vline(xintercept=c(-1,1),lty=4,col="black",lwd=0.8) +
  geom_hline(yintercept = -log10(0.05),lty=4,col="black",lwd=0.8) +
  labs(x="log2(fold change)",
       y="-log10 (p-value)")+
  theme_bw()+
  theme(plot.title = element_text(hjust = 0.5), 
        legend.position="right", 
        legend.title = element_blank())
image.png

参考链接:

ArrayExpress

R语言limma包差异基因分析(两组或两组以上)

R绘图:ggplot2绘制火山图

上一篇 下一篇

猜你喜欢

热点阅读