【细胞通讯】PlantPhoneDB(2)
前面一个帖子主要介绍了PlantPhoneDB怎么做的和做了啥,今天这个帖子主要分享相关R包的使用和测试情况。
====安装====
首先需要安装相关的依赖包:
install.packages(c("devtools", "Seurat", "tidyverse", "ggplot2", "ggsci", "pheatmap", "ggpubr", "RColorBrewer", "patchwork", "lsa", "viridis", "hrbrthemes", "circlize", "chorddiag", "ggplotify", "data.table", "parmigene", "infotheo", "igraph", "cowplot", "grid", "dplyr"))
library(devtools)
install_github("Jasonxu0109/PlantPhoneDB")
=====测试例子(一)=======
我们用这篇文章的:Dynamics of Gene Expression in Single Root Cells of Arabidopsis thaliana数据,主要包含两组 control和heat处理的,我们先用cellranger分析了一下。
library(Seurat)
library(tidyverse)
library(ggplot2)
library(ggsci)
library(ggpubr)
library(pheatmap)
library(RColorBrewer)
library(patchwork)
library(lsa)
library(viridis)
library(hrbrthemes)
library(circlize)
library(chorddiag)
library(ggplotify)
library(data.table)
library(parmigene)
library(readxl)
library(infotheo)
library(igraph)
library(muxViz)
library(rgl)
library(tidyverse)
library(dplyr)
//参数选择和作者paper中用的参数一样
pbmc <- readRDS("pbmc3k_final.rds")
pbmc@meta.data$labels <- Idents(pbmc)
control.data <- Read10X(data.dir = "control/filtered_feature_bc_matrix/")
control<- CreateSeuratObject(counts = control.data, project = "control", min.cells = 3, min.features = 200)
control <- subset(control, subset = nFeature_RNA > 200 & nCount_RNA > 1000)
control <- SCTransform(control, verbose = FALSE)
heat.data <- Read10X(data.dir = "heat/filtered_feature_bc_matrix/")
heat<- CreateSeuratObject(counts = heat.data, project = "heat", min.cells = 3, min.features = 200)
heat <- subset(heat, subset = nFeature_RNA > 200 & nCount_RNA > 1000)
heat <- SCTransform(heat, verbose = FALSE)
datasets <- c(control,heat)
features <- SelectIntegrationFeatures(object.list = datasets, nfeatures = 8000)
datasets <- PrepSCTIntegration(object.list = datasets, anchor.features = features, verbose = TRUE)
datasets <- lapply(X = datasets, FUN = RunPCA, verbose = FALSE, features = features)
anchors <- FindIntegrationAnchors(object.list = datasets, normalization.method = "SCT",anchor.features = features, verbose = TRUE, reference=1,reduction = "cca")
objs <- IntegrateData(anchorset = anchors, normalization.method = "SCT", verbose = TRUE)
objs <- RunPCA(objs, verbose = FALSE, approx = FALSE, npcs = 50)
objs <- RunUMAP(objs, reduction = "pca", dims = 1:50, umap.method = "umap-learn", metric = "correlation")
objs <- RunTSNE(objs, reduction = "pca",dims = 1:50,tsne.method = "Rtsne")
objs <- FindNeighbors(objs, reduction = "pca",dims = 1:50)
objs <- FindClusters(objs, resolution = 0.5, algorithm = 2)
DefaultAssay(objs) <- 'SCT'
objs<- PrepSCTFindMarkers(objs,assay = "SCT", verbose = TRUE)
DEG <- FindAllMarkers(objs,
logfc.threshold=0.25,
min.diff.pct = 0.25,
max.cells.per.ident = 10000,
only.pos=T)
mark_gene <- DEG %>% mutate(avg_logFC=avg_log2FC) %>% filter(p_val_adj<0.05)
signature <- readxl::read_excel('../ath_doi_202104.xlsx')
sig_gene <- signature %>% as.data.frame() %>% filter(Tissue=="Root") %>% mutate(V1=`Cell Type`,V2=Cell_Marker) %>% unique(.) %>% select(V1,V2)
注:注释我自己根据这篇文章的marker基因简单做了一下注释,没有用作者文章用的那个包。做的比较粗糙,自己的paper的话 这一步要多花点时间
new.cluster.ids <- c("Columella","Vasculature","Cortex","Endodermis","Stele","Trichoblast","Non-hair cell","Non-hair cell","Xylem pole pericycle","Phloem pole pericycle","Trichoblast","Xylem","Lateral root cap","Non-hair cell","Phloem companion cell","Root cap","Protoxylem")
names(new.cluster.ids) <- levels(objs)
objs <- RenameIdents(objs, new.cluster.ids)
saveRDS(objs,"objs.rds")
下面开始按paper的教程画paper中类似的图。
最基本的聚类图:
pdf("pic1.pdf")
options(repr.plot.width=6, repr.plot.height=5)
pic1 <- DimPlot(objs,group.by = 'labels', label=TRUE, label.size = 6, reduction='umap',cols=mycolor)+NoLegend()+ggtitle("")
pic1
也可以按project的来源,分开画
objs@meta.data$treatment <- objs@meta.data$orig.ident
pdf("pics1.pdf")
options(repr.plot.width=6, repr.plot.height=5)
pics1 <- DimPlot(objs,group.by = 'labels', split.by= 'treatment', label=TRUE, label.size = 6,cols=mycolor,ncol =1)+NoLegend()+ggtitle("")
pics1
计算各cell类型的比例:
pdf("pics2.pdf")
options(repr.plot.width=10, repr.plot.height=4)
pics2 <- objs@meta.data %>%
ggplot(aes(treatment,fill=labels,color=I('white')))+
geom_bar(position = "fill")+
coord_flip()+
theme_bw()+
ylab("")+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border=element_blank(),
axis.title=element_text(size=7.82,face="bold"),
axis.text=element_text(size=7.82,color='black'),
legend.text=element_text(size=7.82),
plot.title = element_text(size = 7.82, face = "bold"),
axis.line=element_line(color='black'),
legend.title = element_text(size = 7.82))+
scale_y_continuous(position = "right",expand = c(0,0))+
scale_fill_manual(values=mycolor)
pics2
运用fisher检验热处理下细胞响应胁迫的偏好性:
tbl <- table(objs@meta.data$labels,objs@meta.data$treatment)
res = chisq.test(tbl)
expected = res$expected
roe = tbl/expected
# Preference of each cell type under heat shock stress. RO/E above 1 indicates enrichment. 大于1的说明在heat处理下富集
marker gene热图展示
meta <- objs@meta.data %>% select(seurat_clusters,labels) %>% unique()
mark_gene <- DEG %>% mutate(avg_logFC=avg_log2FC) %>%
filter(p_val_adj<0.05 & avg_log2FC>1.5 & gene %in% sig_gene$V2) %>%
inner_join(meta,c("cluster" = "seurat_clusters")) %>%
arrange(p_val)
expr <- AverageExpression(objs,assays ='SCT')
expr <- expr$SCT
expr <- expr[mark_gene$gene,] //取出自己想展示的mark gene
//expr <- expr[,c('Trichoblast','Lateral root','Cortex','Atrichoblast','Endodermis','Xylem','Meristem','Phloem','Pericycle')]
pdf("pic3.pdf")
options(repr.plot.width=8, repr.plot.height=5)
pic3 <- pheatmap(t(expr), scale="column",angle_col=90,cluster_rows = F,cluster_cols = T,show_colnames=F)
pic3
加载PlantPhoneDB相关的包:
source("PlantPhoneDB-main/R/CCI_circle.r")
source("PlantPhoneDB-main/R/CCI_network.r")
source("PlantPhoneDB-main/R/heatmap_count.r")
source("PlantPhoneDB-main/R/LR_pathway.r")
source("PlantPhoneDB-main/R/LRscore.r")
因为这个数据集是拟南芥的,所以加载拟南芥的配体-受体对:
load("PlantPhoneDB-main/LR_pair_ath.RDa")
LR_pair <- LR_pair %>% filter(source!="orthologs") %>% select(Ligands, Receptors) %>% unique()
计算配体-受体互作的分数:
objs_heat <- subset(objs, treatment!="Control")
Heat <- LRscore(objs_heat@assays$SCT@data, LRdb=LR_pair, cluster = Idents(objs_heat), min.pct = 0.1,iterations=100, method='Average')
获得每个细胞类型在control和heat处理之间的差异基因:
DE_test <- NULL
for(i in unique(objs$labels))
{
i
objs_flt <- subset(objs,labels==i)
Idents(objs_flt) <- objs_flt$treatment
objs_flt <- PrepSCTFindMarkers(objs_flt ,assay = "SCT", verbose = TRUE)
degs <- FindAllMarkers(objs_flt,logfc.threshold=0.25,min.diff.pct = 0.25,max.cells.per.ident = 10000)
if(length(degs)!=0)
{
degs <- degs %>% mutate(avg_logFC=avg_log2FC, labels=i) %>% filter(p_val_adj<0.05) %>% mutate(label_gene=paste0(labels,"_",gene))
DE_test <- rbind(DE_test,degs)
}
}
识别热胁迫下显著差异的配体-受体互作对:
Heat_sig <- Heat %>% filter(Pvalue<0.05) %>%
mutate(Ligands_cell_gene=paste0(Ligands_cell,"_",Ligands), Receptors_cell_gene=paste0(Receptors_cell,"_",Receptors)) %>%
filter(Ligands_cell_gene %in% DE_test$label_gene | Receptors_cell_gene %in% DE_test$label_gene) %>%
select(-Ligands_cell_gene,-Receptors_cell_gene)
interaction_count <- Heat_sig %>% group_by(Ligands_cell,Receptors_cell) %>% summarise(Number=n(),.groups = 'drop')
sum(interaction_count$Number)
interaction_count %>% mutate(Type=ifelse(Ligands_cell==Receptors_cell,"Autocrine","Paracrine")) %>% group_by(Type) %>% summarise(Number=sum(Number))
Autocrine <- interaction_count[interaction_count$Ligands_cell==interaction_count$Receptors_cell,]
Paracrine <- interaction_count[interaction_count$Ligands_cell!=interaction_count$Receptors_cell,]
旁分泌相互作用的细胞间通信的circos图:
CCI_circle(Paracrine, mycolor2) //color和细胞类型的数目一致
自分泌相互作用的细胞间通信的circos图:
CCI_circle(Autocrine, mycolor3) //color和细胞类型的数目一致
不同细胞类型之间互作的配体-受体对的数目:
top10的配体-受体对在不同细胞互作之间的heatmap:
Top10 <- Heat_sig %>%
arrange(desc(Score)) %>%
select(LR_pair) %>%
unique() %>%
head(10) %>%
inner_join(Heat) %>%
select(LR_pair,Cell_pair,Score) %>%
spread(.,Cell_pair,Score) %>%
replace(is.na(.), 0)
rownames(Top10) <- Top10$LR_pair
Top10 <- Top10[,-1]
Top10 <- t(Top10)
Top10 <- apply(Top10,2,function(x){x/max(x)})
pic6 <- pheatmap(Top10, scale="none",angle_col=45,fontsize_row=4,cluster_rows = T,cluster_cols = F,show_colnames=T)
构建细胞之间的信号通路:
geneSet <- fread('../plantGSAD/Ara_ALL.KEGG.txt')
geneSet$Gene <- toupper(geneSet$Gene)
CellA <- "Trichoblast"
CellB <- "Cortex"
lr <- subset(Heat_sig,Ligands_cell==CellA & Receptors_cell==CellB )
pathway_result2 <- LR_pathway(lr, objs_heat, CellA, CellB, neighbor=2, geneSet)
pathway_result2$FDR <- p.adjust(pathway_result2$Pvalue,method='BH')
pathway_result2 <- pathway_result2[order(pathway_result2$Pvalue),]
本文使用 文章同步助手 同步