hdWGCNA系列推文（二）：分析单细胞转录组数据

2023-06-03 本文已影响0人生信宝库

前言

Immugent在hdWGCNA系列推文第一篇：hdWGCNA：将单细胞和空间转录组的WGCNA分析变成现实中,介绍了hdWGCNA包的主要功能框架，并在上一期推文:hdWGCNA系列推文（一）：分析流程的搭建中给大家说明了如何安装hdWGCNA。那么从本期推文开始，正式开始讲解如何在实际分析数据的过程中使用hdWGCNA。

在正式开始之前呢，我们先来回忆一下WGCNA分析到底是个啥？其实简单总结一下就是一句话：关联表型和基因。WGCNA通过将基因进行分组（module），把基因模块和表型进行关联，实现了快速锁定核心基因的目的。整体来讲，WGCNA的分析流程是很繁琐的，一个全套的分析可能会涉及8-9个步骤，但是，这里面有很多步骤其实无关紧要，跟分析的主线，也就是“筛选与表型相关的核心基因”是脱离的。

还有一点最主要的是以往的WGCNA分析都是针对bulk测序数据，而hdWGCNA包的开发专门用于单细胞测序数据的使用，而且作者极大的简化了WGCNA分析部分的流程，而添加了很多可视化的流程。废话不多说，下面开始展示。

代码流程

首先就是导入模拟数据，大家也可以用自己的单细胞测序数据。

# single-cell analysis package
library(Seurat)

# plotting and data science packages
library(tidyverse)
library(cowplot)
library(patchwork)

# co-expression network analysis packages:
library(WGCNA)
library(hdWGCNA)

# using the cowplot theme for ggplot
theme_set(theme_cowplot())

# set random seed for reproducibility
set.seed(12345)

# optionally enable multithreading
enableWGCNAThreads(nThreads = 8)

# load the Zhou et al snRNA-seq dataset
seurat_obj <- readRDS('Zhou_2020.rds')

p <- DimPlot(seurat_obj, group.by='cell_type', label=TRUE) +   umap_theme() + ggtitle('Zhou et al Control Cortex') + NoLegend()
p

image.png

Set up Seurat object for WGCNA

seurat_obj <- SetupForWGCNA(
  seurat_obj,
  gene_select = "fraction", # the gene selection approach
  fraction = 0.05, # fraction of cells that a gene needs to be expressed in order to be included
  wgcna_name = "tutorial" # the name of the hdWGCNA experiment
)

# construct metacells  in each group
seurat_obj <- MetacellsByGroups(
  seurat_obj = seurat_obj,
  group.by = c("cell_type", "Sample"), # specify the columns in seurat_obj@meta.data to group by
  reduction = 'harmony', # select the dimensionality reduction to perform KNN on
  k = 25, # nearest-neighbors parameter
  max_shared = 10, # maximum number of shared cells between two metacells
  ident.group = 'cell_type' # set the Idents of the metacell seurat object
)

# normalize metacell expression matrix:
seurat_obj <- NormalizeMetacells(seurat_obj)

Co-expression network analysis

seurat_obj <- SetDatExpr(
  seurat_obj,
  group_name = "INH", # the name of the group of interest in the group.by column
  group.by='cell_type', # the metadata column containing the cell type info. This same column should have also been used in MetacellsByGroups
  assay = 'RNA', # using RNA assay
  slot = 'data' # using normalized data
)

# Test different soft powers:
seurat_obj <- TestSoftPowers(
  seurat_obj,
  networkType = 'signed' # you can also use "unsigned" or "signed hybrid"
)

# plot the results:
plot_list <- PlotSoftPowers(seurat_obj)

# assemble with patchwork
wrap_plots(plot_list, ncol=2)

image.png

Construct co-expression network

power_table <- GetPowerTable(seurat_obj)
head(power_table)

# construct co-expression network:
seurat_obj <- ConstructNetwork(
  seurat_obj, soft_power=9,
  setDatExpr=FALSE,
  tom_name = 'INH' # name of the topoligical overlap matrix written to disk
)

PlotDendrogram(seurat_obj, main='INH hdWGCNA Dendrogram')

image.png

Module Eigengenes and Connectivity

# need to run ScaleData first or else harmony throws an error:
seurat_obj <- ScaleData(seurat_obj, features=VariableFeatures(seurat_obj))

# compute all MEs in the full single-cell dataset
seurat_obj <- ModuleEigengenes(
 seurat_obj,
 group.by.vars="Sample"
)

# harmonized module eigengenes:
hMEs <- GetMEs(seurat_obj)

# module eigengenes:
MEs <- GetMEs(seurat_obj, harmonized=FALSE)

# compute eigengene-based connectivity (kME):
seurat_obj <- ModuleConnectivity(
  seurat_obj,
  group.by = 'cell_type', group_name = 'INH'
)

# rename the modules
seurat_obj <- ResetModuleNames(
  seurat_obj,
  new_name = "INH-M"
)

# plot genes ranked by kME for each module
p <- PlotKMEs(seurat_obj, ncol=5)

p

image.png

# get the module assignment table:
modules <- GetModules(seurat_obj)

# show the first 6 columns:
head(modules[,1:6])

# get hub genes
hub_df <- GetHubGenes(seurat_obj, n_hubs = 10)

head(hub_df)

# compute gene scoring for the top 25 hub genes by kME for each module
# with Seurat method
seurat_obj <- ModuleExprScore(
  seurat_obj,
  n_genes = 25,
  method='Seurat'
)

# compute gene scoring for the top 25 hub genes by kME for each module
# with UCell method
library(UCell)
seurat_obj <- ModuleExprScore(
  seurat_obj,
  n_genes = 25,
  method='UCell'
)

Basic Visualization

# make a featureplot of hMEs for each module
plot_list <- ModuleFeaturePlot(
  seurat_obj,
  features='hMEs', # plot the hMEs
  order=TRUE # order so the points with highest hMEs are on top
)

# stitch together with patchwork
wrap_plots(plot_list, ncol=6)

image.png

Module Correlations

# plot module correlagram
ModuleCorrelogram(seurat_obj)

image.png

# get hMEs from seurat object
MEs <- GetMEs(seurat_obj, harmonized=TRUE)
mods <- colnames(MEs); mods <- mods[mods != 'grey']

# add hMEs to Seurat meta-data:
seurat_obj@meta.data <- cbind(seurat_obj@meta.data, MEs)

# plot with Seurat's DotPlot function
p <- DotPlot(seurat_obj, features=mods, group.by = 'cell_type')

# flip the x/y axes, rotate the axis labels, and change color scheme:
p <- p +
  coord_flip() +
  RotatedAxis() +
  scale_color_gradient2(high='red', mid='grey95', low='blue')

# plot output
p

image.png

# Plot INH-M4 hME using Seurat VlnPlot function
p <- VlnPlot(
  seurat_obj,
  features = 'INH-M12',
  group.by = 'cell_type',
  pt.size = 0 # don't show actual data points
)

# add box-and-whisker plots on top:
p <- p + geom_boxplot(width=.25, fill='white')

# change axis labels and remove legend:
p <- p + xlab('') + ylab('hME') + NoLegend()

# plot output
p

image.png

说在最后

整体来说，hdWGCNA分析的这个过程并不复杂，就是把已经构建好的Seurat对象中每群细胞的特征模块提取出来。从上面的结果可以看出，hdWGCNA对一些细胞群的特征提取还是很特异的，但是其中对某几群细胞的特征提取并没有成功。导致这种结果的原因有很多，比如这群细胞数量多少的问题，以及测序质量的问题，亦或者其中有几群细胞本身就很相似。但是我们也不能太挑剔，所有的优质算法都是一步步不断完善的，希望hdWGCNA能出下一个版本，并且能更好的解决这些短板。

好啦，本期分享到这里就结束了，我们下期再会~~

hdWGCNA系列推文（二）：分析单细胞转录组数据

前言

代码流程

说在最后

猜你喜欢

热点阅读