下游数据分析

R语言分析2-1:免疫浸润分析(CIBERSORT)

2023-06-09  本文已影响0人  cc的生信随记

免疫浸润分析就是为了明确免疫细胞在人体微环境内的组成情况,进而明确究竟是哪种免疫细胞在疾病的发生发展中产生了重要作用。目前,计算分析免疫浸润的方法主要有两类:1)基于cellMarker的,得到的是每个样本某个细胞的富集分数
                            2) 去卷积,得到的是每个样本免疫细胞的比例

1. 使用\color{green}{CIBERSORT}反/去卷积

CIBERSORT是基于线性支持向量回归(linear support vector regression)原理对人类免疫细胞亚型的表达矩阵进行去卷积的工具。用于芯片表达矩阵测序表达矩阵,对未知混合物和含有相近的细胞类型的表达矩阵的去卷积分析优于其他方法 (LLSR,LLSR,PERT,RLR,MMAD,DSA) 。
★ 该方法基于已知参考数据集,默认提供22种免疫细胞亚型的基因表达特征集:LM22

1.1 准备免疫细胞特征基因文件和自己需要分析的表达矩阵

# 安装并加载所需的R包
# install.packages('devtools')
# library(devtools)
# devtools::install_github('shenorrlab/bseqsc')

library(bseqsc) #携带大量CIBERSORT的依赖
library(CIBERSORT)

library(ggplot2)
library(pheatmap)
library(ComplexHeatmap)

data(LM22) # 自带LM22文件
data(LM22)
##       B cells naive B cells memory Plasma cells T cells CD8 T cells CD4 naive
## ABCB4        555.71          10.74        7.226       4.311             4.606
## ABCB9         15.60          22.09      653.392      24.224            35.672
## ACAP1        215.31         321.62       38.617    1055.613          1790.097
## ACHE          15.12          16.65       22.124      13.428            27.188
## ACP5         605.90        1935.20     1120.105     306.313           744.657

# 同上篇《生信数据库2: TCGA(RNA-Seq)》一致,但此处提取fpkm数据
matrix = data.frame(matrix(nrow = 60660, ncol = 0))
for (i in 1:length(count_file)){
  path = paste0('counts/', count_file[i])
  data <- read.delim(path, fill = TRUE, header = FALSE, row.names = 1)
  colnames(data) <- data[2, ]
  data <- data[-c(1:6), ]
  data <- data[7]   #取出fpkm_unstranded列(第7列),即fpkm数据
  colnames(data) <- file_sample$sample_id[which(file_sample$file_name == count_file_name[i])]
  matrix <- cbind(matrix, data)
}

# 将行名转换为Gene Symbol
path = paste0('counts/', count_file[1])
data <- as.matrix(read.delim(path, fill = TRUE, header = FALSE, row.names = 1))
gene_name <- data[-c(1:6), 1] 
matrix0 <- cbind(gene_name, matrix)

matrix0 <- aggregate( . ~ gene_name, data = matrix0, max) 

rownames(matrix0) <- matrix0[, 1]
matrix0 <- matrix0[, -1]

FPKM <- as.data.frame(lapply(matrix0, as.numeric))
row.names(FPKM) <- row.names(matrix0)
colnames(FPKM) <- colnames(matrix0)

# 过滤,并转换为矩阵
FPKM <- FPKM[which(rowSums(FPKM)/ncol(FPKM) >= 1),] 
FPKM <- as.matrix(FPKM)

FPKM[1:5, 1:5]
##        TCGA-35-5375-01A-01R-1628-07 TCGA-55-A4DF-01A-11R-A24H-07 TCGA-95-8039-01A-11R-2241-07 TCGA-MP-A4T4-01A-11R-A262-07 TCGA-62-A471-01A-12R-A24H-07
## A2M                         40.9806                      45.5385                     226.9787                     125.4271                      23.3876
## A4GALT                       6.3743                       7.8681                       2.7125                       9.7885                      20.6624
## AAAS                         6.3150                       8.2626                       8.2346                       8.3154                      12.2582
## AACS                         2.6022                       3.5811                       1.5165                       2.0158                       5.4609
## AADAC                       28.5316                       1.8770                      10.4736                       0.0450                       4.0560

1.2 运行CIBERSORT

# perm置换次数 = 1000,QN分位数归一化 = TRUE 
results <- cibersort(sig_matrix = LM22, mixture_file = FPKM)

results[1:6,1:5]
##                              B cells naive B cells memory Plasma cells T cells CD8 T cells CD4 naive
## TCGA-35-5375-01A-01R-1628-07    0.00000000     0.09647333    0.1754904  0.14560600                 0
## TCGA-55-A4DF-01A-11R-A24H-07    0.06919370     0.00000000    0.1453946  0.09952461                 0
## TCGA-95-8039-01A-11R-2241-07    0.06389702     0.00000000    0.0280821  0.06016787                 0
## TCGA-MP-A4T4-01A-11R-A262-07    0.03455365     0.00000000    0.2690141  0.17314611                 0
## TCGA-62-A471-01A-12R-A24H-07    0.13974877     0.00000000    0.3551352  0.12827269                 0
## TCGA-L9-A5IP-01A-21R-A39D-07    0.11928189     0.00000000    0.2968442  0.11819394                 0

library(reshape)

# 构造注释文件
group_list <- ifelse(as.numeric(substring(rownames(results), 14, 15)) < 10, "Tumor","Normal") %>% factor(., levels = c("Normal", "Tumor"))

# 取前22列细胞亚群组
LUAD_data <- as.data.frame(results[, 1:22])

LUAD_data$group <- group_list
LUAD_data$sample <- row.names(results)

# 融合数据
LUAD_New = melt(LUAD_data)
colnames(LUAD_New) = c("Group", "Sample", "Celltype", "Composition")  #设置行名
head(LUAD_New)
##   Group                       Sample      Celltype Composition
##  1 Tumor TCGA-35-5375-01A-01R-1628-07 B cells naive  0.00000000
##  2 Tumor TCGA-55-A4DF-01A-11R-A24H-07 B cells naive  0.06919370
##  3 Tumor TCGA-95-8039-01A-11R-2241-07 B cells naive  0.06389702
##  4 Tumor TCGA-MP-A4T4-01A-11R-A262-07 B cells naive  0.03455365
##  5 Tumor TCGA-62-A471-01A-12R-A24H-07 B cells naive  0.13974877
##  6 Tumor TCGA-L9-A5IP-01A-21R-A39D-07 B cells naive  0.11928189

★ results的结果:
    1) 第一列为样本,第一行为细胞类群
    2) 除细胞类群外,还有P-value、Correlation及RMSE三列
         ꔷ P-value:去卷积的结果在所有细胞类群中是否具有差异,置换检验(蒙特卡罗方法),越小越可信
         ꔷ Correlation:参考矩阵与输入矩阵的特征基因相关性
         ꔷ RMSE:Root mean squared error,参考矩阵与输入矩阵的特征基因标准差,越小效果越好

1.3 可视化

1.3.1 直方图

library(tidyr)
library(RColorBrewer)

mypalette <- colorRampPalette(brewer.pal(8,"Set1"))

ggplot(LUAD_New, aes(Sample, Composition, fill = Celltype)) + 
  geom_bar(stat = "identity") +
  labs(fill = "Celltype",x = "", y = "Estiamted Proportion") + 
  theme_bw() +
  theme(axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        legend.position = "bottom") + 
  scale_y_continuous(expand = c(0.01,0)) +
  scale_fill_manual(values = mypalette(22))
CIBERSORT-1

1.3.2 箱式图

ggplot(LUAD_New, aes(x = Celltype, y = Composition))+ 
  labs(y="Cell Proportion", x =  NULL, title = "LUAD Cell Proportion")+  
  geom_boxplot(aes(fill = Group), position = position_dodge(0.5), width = 0.5, outlier.alpha = 0)+ 
  scale_fill_manual(values = c("#096EA9", "#B33D27")) +
  theme_bw() + 
  theme(plot.title = element_text(size = 12,color="black",hjust = 0.5), 
        axis.text.x = element_text(angle = 45, hjust = 1 ),
        panel.grid = element_blank(),
        legend.position = "top",
        legend.text = element_text(size= 12),
        legend.title= element_text(size= 12)) + 
  stat_compare_means(aes(group =  Group),
                     label = "p.signif",
                     method = "wilcox.test",
                     hide.ns = T)
CIBERSORT-2
上一篇 下一篇

猜你喜欢

热点阅读