SCS【18】细胞交互：受体-配体及其相互作用的细胞通讯数据库

肿瘤微环境(TME)中肿瘤细胞与其他细胞之间的串扰在肿瘤微环境(TME)中起着重要作用。在肿瘤进展、转移和治疗耐药性中起关键作用。iTALK描述和说明细胞间通信信号的计算方法。多细胞肿瘤生态系统使用单细胞RNA测序数据。原则上可以用于分析细胞间通信的复杂性、多样性和动态性广泛的细胞过程。TME已成为肿瘤进展、免疫逃避和免疫应答的关键调节因子抗肿瘤治疗耐药机制的出现。该TME包括的多样性细胞类型如肿瘤细胞、异质免疫细胞和非免疫细胞间质成分。肿瘤细胞与这些非肿瘤细胞动态协调和相互作用元件，以及它们之间的串扰被认为提供了关键信号，可以指导和促进肿瘤细胞生长和迁移。通过这种细胞间的交流，肿瘤细胞能引起其他TME细胞的深刻表型变化，如肿瘤相关成纤维细胞，巨噬细胞和T细胞，并重新编程TME，以逃避免疫监视为了生存。因此，更好地了解细胞与细胞之间的通信信号可能有助于确定新的调节治疗策略，以更好地为患者带来好处。然而,这一直受到缺乏有效的数据分析和生物信息学工具的阻碍可视化。

软件包安装

这里安装的时候需要注意一下有很多依赖包需要单独安装，或者有些软件包的版本不对，也需要删除重新安装。

if(!require(devtools)) install.packages("devtools")
  devtools::install_github("Coolgenome/iTALK", build_vignettes = TRUE)

实例操作

软件包中例子

数据输入文件为单细胞转录组数据整理后的count矩阵。首先找到高表达配体-受体对，可以是找到前50%的高表达基因，从高表达基因中找到配体-受体对。

library(iTALK)
library(Seurat)
library(Matrix)
library(dplyr)

# read the data
data <- read.table("./iTALK-master/example/example_data.txt", sep = "\t", header = T,
    stringsAsFactors = F)

## highly expressed ligand-receptor pairs

# find top 50 percent highly expressed genes
highly_exprs_genes <- rawParse(data, top_genes = 50, stats = "mean")
# find the ligand-receptor pairs from highly expressed genes
comm_list <- c("growth factor", "other", "cytokine", "checkpoint")
cell_col <- structure(c("#4a84ad", "#4a1dc6", "#e874bf", "#b79eed", "#ff636b", "#52c63b",
    "#9ef49a"), names = unique(data$cell_type))
par(mfrow = c(1, 2))
res <- NULL
for (comm_type in comm_list) {
    res_cat <- FindLR(highly_exprs_genes, datatype = "mean count", comm_type = comm_type)
    res_cat <- res_cat[order(res_cat$cell_from_mean_exprs * res_cat$cell_to_mean_exprs,
        decreasing = T), ]
    # plot by ligand category overall network plot
    NetView(res_cat, col = cell_col, vertex.label.cex = 1, arrow.width = 1, edge.max.width = 5)
    # top 20 ligand-receptor pairs
    LRPlot(res_cat[1:20, ], datatype = "mean count", cell_col = cell_col, link.arr.lwd = res_cat$cell_from_mean_exprs[1:20],
        link.arr.width = res_cat$cell_to_mean_exprs[1:20])
    title(comm_type)
    res <- rbind(res, res_cat)
}

绘制网络图：

res <- res[order(res$cell_from_mean_exprs * res$cell_to_mean_exprs, decreasing = T),
    ][1:20, ]
NetView(res, col = cell_col, vertex.label.cex = 1, arrow.width = 1, edge.max.width = 5)

## IGRAPH 5aedb9f DN-- 6 20 -- 
## + attr: name (v/c), size (v/n), color (v/c), label.color (v/c),
## | label.cex (v/n), n (e/n), label (e/n), width (e/n), arrow.width
## | (e/n), label.color (e/c), label.cex (e/n), color (e/c), loop.angle
## | (e/n)
## + edges from 5aedb9f (vertex names):
##  [1] b_cells    ->memory_t     cd56_nk    ->cytotoxic_t 
##  [3] cd56_nk    ->memory_t     cd56_nk    ->naive_t     
##  [5] cd56_nk    ->regulatory_t cytotoxic_t->cytotoxic_t 
##  [7] cytotoxic_t->memory_t     cytotoxic_t->naive_t     
##  [9] cytotoxic_t->regulatory_t memory_t   ->cytotoxic_t 
## + ... omitted several edges

绘制圈图：

LRPlot(res[1:20, ], datatype = "mean count", cell_col = cell_col, link.arr.lwd = res$cell_from_mean_exprs[1:20],
    link.arr.width = res$cell_to_mean_exprs[1:20])

比较组间配体-受体对显著

每个样本随机分配比较组，发现两组间调节性T细胞和NK细胞的DEGenes

## significant ligand-receptor pairs between compare groups

# randomly assign the compare group to each sample
data <- data %>%
    mutate(compare_group = sample(2, nrow(data), replace = TRUE))
# find DEGenes of regulatory T cells and NK cells between these 2 groups
deg_t <- DEG(data %>%
    filter(cell_type == "regulatory_t"), method = "Wilcox", contrast = c(2, 1))
deg_nk <- DEG(data %>%
    filter(cell_type == "cd56_nk"), method = "Wilcox", contrast = c(2, 1))
# find significant ligand-receptor pairs and do the plotting
par(mfrow = c(1, 2))
res <- NULL
for (comm_type in comm_list) {
    res_cat <- FindLR(deg_t, deg_nk, datatype = "DEG", comm_type = comm_type)
    res_cat <- res_cat[order(res_cat$cell_from_logFC * res_cat$cell_to_logFC, decreasing = T),
        ]
    # plot by ligand category
    if (nrow(res_cat) == 0) {
        next
    } else if (nrow(res_cat >= 20)) {
        LRPlot(res_cat[1:20, ], datatype = "DEG", cell_col = cell_col, link.arr.lwd = res_cat$cell_from_logFC[1:20],
            link.arr.width = res_cat$cell_to_logFC[1:20])
    } else {
        LRPlot(res_cat, datatype = "DEG", cell_col = cell_col, link.arr.lwd = res_cat$cell_from_logFC,
            link.arr.width = res_cat$cell_to_logFC)
    }
    NetView(res_cat, col = cell_col, vertex.label.cex = 1, arrow.width = 1, edge.max.width = 5)
    title(comm_type)
    res <- rbind(res, res_cat)
}

没有发现重要的配对

我只是随机地将比较组分配给没有生物学差异的样本，以展示如何使用包装。所以应该没有明显的基因可以期待。

if (is.null(res)) {
    print("No significant pairs found")
} else if (nrow(res) >= 20) {
    res <- res[order(res$cell_from_logFC * res$cell_to_logFC, decreasing = T), ][1:20,
        ]
    NetView(res, col = cell_col, vertex.label.cex = 1, arrow.width = 1, edge.max.width = 5)
    LRPlot(res[1:20, ], datatype = "DEG", cell_col = cell_col, link.arr.lwd = res$cell_from_logFC[1:20],
        link.arr.width = res$cell_to_logFC[1:20])
} else {
    NetView(res, col = cell_col, vertex.label.cex = 1, arrow.width = 1, edge.max.width = 5)
    LRPlot(res, datatype = "DEG", cell_col = cell_col, link.arr.lwd = res$cell_from_logFC,
        link.arr.width = res$cell_to_logFC)
}
## [1] "No significant pairs found"
# I just randomly assigned the compare group to samples which has no biological
# difference for showing how to use the package.  So there should be no
# significant genes to be expected.

pbmc3k实操

我们利用SeuratData 软件包里面的pbmc3k数据集进行实操，数据筛选及标准化使用软件包 Seurat 进行处理，具体使用方法可以参考SCS【4】单细胞转录组数据可视化分析 (Seurat 4.0)

将QC指标可视化绘制小提琴图

library(SeuratData)
# AvailableData()
data("pbmc3k")
pbmc3k[["percent.mt"]] <- PercentageFeatureSet(pbmc3k, pattern = "^MT-")
pbmc3k <- subset(pbmc3k, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt <
    5)
VlnPlot(pbmc3k, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)

标准化分析并进行高频变异基因识别：

pbmc3k <- subset(pbmc3k, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt <
    5)
pbmc3k <- NormalizeData(pbmc3k, normalization.method = "LogNormalize", scale.factor = 10000)
pbmc3k <- FindVariableFeatures(pbmc3k, selection.method = "vst", nfeatures = 2000)
all.genes <- rownames(pbmc3k)
pbmc3k <- ScaleData(pbmc3k, features = all.genes)
pbmc3k <- ScaleData(pbmc3k, vars.to.regress = "percent.mt")
pbmc3k <- RunPCA(pbmc3k, features = VariableFeatures(object = pbmc3k))

线性降维

VizDimLoadings(pbmc3k, dims = 1:2, reduction = "pca")

维度聚类图：

pbmc3k <- FindNeighbors(pbmc3k, dims = 1:10)
pbmc3k <- FindClusters(pbmc3k, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2638
## Number of edges: 95893
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8735
## Number of communities: 9
## Elapsed time: 0 seconds
head(Idents(pbmc3k), 5)
## AAACATACAACCAC AAACATTGAGCTAC AAACATTGATCAGC AAACCGTGCTTCCG AAACCGTGTATGCG 
##              0              3              2              1              6 
## Levels: 0 1 2 3 4 5 6 7 8
pbmc3k <- RunTSNE(pbmc3k, dims = 1:10)
DimPlot(pbmc3k, reduction = "tsne", label = TRUE, pt.size = 1.5)

差异表达基因(聚类生物标志物)：

pbmc.markers <- FindAllMarkers(pbmc3k, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
pbmc.markers %>%
    group_by(cluster) %>%
    top_n(n = 2, wt = avg_log2FC)
## # A tibble: 18 × 7
## # Groups:   cluster [9]
##        p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene    
##        <dbl>      <dbl> <dbl> <dbl>     <dbl> <fct>   <chr>   
##  1 1.88e-117       1.08 0.913 0.588 2.57e-113 0       LDHB    
##  2 5.01e- 85       1.34 0.437 0.108 6.88e- 81 0       CCR7    
##  3 0               5.57 0.996 0.215 0         1       S100A9  
##  4 0               5.48 0.975 0.121 0         1       S100A8  
##  5 1.93e- 80       1.27 0.981 0.65  2.65e- 76 2       LTB     
##  6 2.91e- 58       1.27 0.667 0.251 3.98e- 54 2       CD2     
##  7 0               4.31 0.939 0.042 0         3       CD79A   
##  8 1.06e-269       3.59 0.623 0.022 1.45e-265 3       TCL1A   
##  9 3.60e-221       3.21 0.984 0.226 4.93e-217 4       CCL5    
## 10 4.27e-176       3.01 0.573 0.05  5.85e-172 4       GZMK    
## 11 3.51e-184       3.31 0.975 0.134 4.82e-180 5       FCGR3A  
## 12 2.03e-125       3.09 1     0.315 2.78e-121 5       LST1    
## 13 3.17e-267       4.83 0.961 0.068 4.35e-263 6       GZMB    
## 14 1.04e-189       5.28 0.961 0.132 1.43e-185 6       GNLY    
## 15 1.48e-220       3.87 0.812 0.011 2.03e-216 7       FCER1A  
## 16 1.67e- 21       2.87 1     0.513 2.28e- 17 7       HLA-DPB1
## 17 7.73e-200       7.24 1     0.01  1.06e-195 8       PF4     
## 18 3.68e-110       8.58 1     0.024 5.05e-106 8       PPBP
cell.types <- vector("logical", length = ncol(pbmc3k))
names(cell.types) <- colnames(pbmc3k)
cell.types[pbmc3k@meta.data$RNA_snn_res.0.5 == "0"] <- "Naive CD4 T"
cell.types[pbmc3k@meta.data$RNA_snn_res.0.5 == "1"] <- "Memory CD4 T"
cell.types[pbmc3k@meta.data$RNA_snn_res.0.5 == "2"] <- "CD14+ Mono"
cell.types[pbmc3k@meta.data$RNA_snn_res.0.5 == "3"] <- "B"
cell.types[pbmc3k@meta.data$RNA_snn_res.0.5 == "4"] <- "CD8 T"
cell.types[pbmc3k@meta.data$RNA_snn_res.0.5 == "5"] <- "FCGR3A+ Mono"
cell.types[pbmc3k@meta.data$RNA_snn_res.0.5 == "6"] <- "NK"
cell.types[pbmc3k@meta.data$RNA_snn_res.0.5 == "7"] <- "DC"
cell.types[pbmc3k@meta.data$RNA_snn_res.0.5 == "8"] <- "Platelet"

读入iTALK，并给pbmc3k加入两列数据：cell.types， group，将 pbmc3k 的cell.type和group 两列数据加入到 iTALK_data中，检查数据是否导入成功：

iTALK_data <- as.data.frame(t(pbmc3k@assays$RNA@counts))
pbmc3k[["cell.types"]] <- cell.types
pbmc3k[["group"]] <- pbmc3k$orig.ident
iTALK_data$cell_type <- pbmc3k@meta.data$cell.types
iTALK_data$compare_group <- pbmc3k@meta.data$group
unique(iTALK_data$cell_type)
## [1] "Naive CD4 T"  "B"            "CD14+ Mono"   "Memory CD4 T" "NK"          
## [6] "CD8 T"        "FCGR3A+ Mono" "DC"           "Platelet"
unique(iTALK_data$compare_group)
## [1] pbmc3k
## Levels: pbmc3k

绘图网络图：

my10colors <- my36colors <- c("#E5D2DD", "#53A85F", "#F1BB72", "#F3B1A0", "#D6E7A3",
    "#57C3F3", "#476D87", "#E95C59", "#E59CC4", "#AB3282")
head(my10colors)
## [1] "#E5D2DD" "#53A85F" "#F1BB72" "#F3B1A0" "#D6E7A3" "#57C3F3"
highly_exprs_genes <- rawParse(iTALK_data, top_genes = 50, stats = "mean")
head(highly_exprs_genes, 5)
##     gene    exprs   cell_type
## 1 MALAT1 78.11001 Naive CD4 T
## 2    B2M 37.58251 Naive CD4 T
## 3  RPL10 36.86037 Naive CD4 T
## 4  RPL13 35.64739 Naive CD4 T
## 5 TMSB4X 35.20592 Naive CD4 T
comm_list <- c("growth factor", "other", "cytokine", "checkpoint")
cell_types <- unique(iTALK_data$cell_type)
head(cell_types, 5)
## [1] "Naive CD4 T"  "B"            "CD14+ Mono"   "Memory CD4 T" "NK"
cell_col <- structure(my10colors[1:length(cell_types)], names = cell_types)
head(cell_col, 5)
##  Naive CD4 T            B   CD14+ Mono Memory CD4 T           NK 
##    "#E5D2DD"    "#53A85F"    "#F1BB72"    "#F3B1A0"    "#D6E7A3"
iTalk_res <- NULL
for (comm_type in comm_list) {
    res_cat <- FindLR(highly_exprs_genes, datatype = "mean count", comm_type = comm_type)
    iTalk_res <- rbind(iTalk_res, res_cat)
}
iTalk_res <- iTalk_res[order(iTalk_res$cell_from_mean_exprs * iTalk_res$cell_to_mean_exprs,
    decreasing = T), ][1:20, ]

NetView(iTalk_res, col = cell_col, vertex.label.cex = 1, arrow.width = 1, edge.max.width = 5)

## IGRAPH bb79d6d DN-- 6 20 -- 
## + attr: name (v/c), size (v/n), color (v/c), label.color (v/c),
## | label.cex (v/n), n (e/n), label (e/n), width (e/n), arrow.width
## | (e/n), label.color (e/c), label.cex (e/n), color (e/c), loop.angle
## | (e/n)
## + edges from bb79d6d (vertex names):
##  [1] CD14+ Mono  ->CD14+ Mono  CD14+ Mono  ->CD8 T      
##  [3] CD14+ Mono  ->Naive CD4 T CD14+ Mono  ->NK         
##  [5] CD8 T       ->CD14+ Mono  CD8 T       ->CD8 T      
##  [7] CD8 T       ->Naive CD4 T CD8 T       ->NK         
##  [9] DC          ->CD14+ Mono  DC          ->CD8 T      
## + ... omitted several edges

绘制圈图：

LRPlot(iTalk_res[1:20, ], datatype = "mean count", cell_col = cell_col, link.arr.lwd = iTalk_res$cell_from_mean_exprs[1:20],
    link.arr.width = iTalk_res$cell_to_mean_exprs[1:20])

References:

1. Wang, Yuanxin et al. “iTALK: an R Package to Characterize and Illustrate Intercellular Communication.” bioRxiv (2019): n. pag.

这个细胞互作软件包代码量还是很多的，需要具有一定 R 语言编程基础，并不是看起来那么简单，所有好多老师想直接自己学习教程来分析，但是实质上没有基础还是很难实现，每步报错都不知道该怎样处理，是最崩溃的！！！

本文使用文章同步助手同步

SCS【18】细胞交互：受体-配体及其相互作用的细胞通讯数据库

单细胞生信分析教程

简介

软件包安装

实例操作

软件包中例子

比较组间配体-受体对显著

没有发现重要的配对

pbmc3k实操

猜你喜欢

热点阅读

SCS【18】细胞交互：受体-配体及其相互作用的细胞通讯数据库

单细胞生信分析教程

简 介

软件包安装

实例操作

软件包中例子

比较组间配体-受体对显著

没有发现重要的配对

pbmc3k实操

猜你喜欢

热点阅读

简介