10X单细胞(10X空间转录组)分析回顾之一些细节绘图操作
2022-01-26 本文已影响0人
单细胞空间交响乐
临近年底,我们要总结,马上回家了,大家今天到底挣了点什么??反省一下,今天的自己,有没有更优秀,这一篇我给大家分享一些单细胞分析图片的细节操作,图片是我们分析结果的展示,要美观大方,就算我们过年要相亲了,自己也要打扮的精精神神的,😄
图片.png第一张图,Cluster之间的相关性
sce <- as.SingleCellExperiment(seurat)
reducedDim(sce, 'PCA_sub') <- reducedDim(sce, 'PCA')[,1:15, drop = FALSE]
ass_prob <- bootstrapCluster(sce, FUN = function(x) {
g <- buildSNNGraph(x, use.dimred = 'PCA_sub')
igraph::cluster_walktrap(g)$membership
},
clusters = sce$seurat_clusters
)
p <- ass_prob %>%
as_tibble() %>%
rownames_to_column(var = 'cluster_1') %>%
pivot_longer(
cols = 2:ncol(.),
names_to = 'cluster_2',
values_to = 'probability'
) %>%
mutate(
cluster_1 = as.character(as.numeric(cluster_1) - 1),
cluster_1 = factor(cluster_1, levels = rev(unique(cluster_1))),
cluster_2 = factor(cluster_2, levels = unique(cluster_2))
) %>%
ggplot(aes(cluster_2, cluster_1, fill = probability)) +
geom_tile(color = 'white') +
geom_text(aes(label = round(probability, digits = 2)), size = 2.5) +
scale_x_discrete(name = 'Cluster', position = 'top') +
scale_y_discrete(name = 'Cluster') +
scale_fill_gradient(
name = 'Probability', low = 'white', high = '#c0392b', na.value = '#bdc3c7',
limits = c(0,1),
guide = guide_colorbar(
frame.colour = 'black', ticks.colour = 'black', title.position = 'left',
title.theme = element_text(hjust = 1, angle = 90),
barwidth = 0.75, barheight = 10
)
) +
coord_fixed() +
theme_bw() +
theme(
legend.position = 'right',
panel.grid.major = element_blank()
)
ggsave('plots/cluster_stability.png', p, height = 6, width = 7)
图片.png
Silhouette plot
library(cluster)
distance_matrix <- dist(Embeddings(seurat[['pca']])[, 1:15])
clusters <- seurat@meta.data$seurat_clusters
silhouette <- silhouette(as.numeric(clusters), dist = distance_matrix)
seurat@meta.data$silhouette_score <- silhouette[,3]
mean_silhouette_score <- mean(seurat@meta.data$silhouette_score)
p <- seurat@meta.data %>%
mutate(barcode = rownames(.)) %>%
arrange(seurat_clusters,-silhouette_score) %>%
mutate(barcode = factor(barcode, levels = barcode)) %>%
ggplot() +
geom_col(aes(barcode, silhouette_score, fill = seurat_clusters), show.legend = FALSE) +
geom_hline(yintercept = mean_silhouette_score, color = 'red', linetype = 'dashed') +
scale_x_discrete(name = 'Cells') +
scale_y_continuous(name = 'Silhouette score') +
scale_fill_manual(values = custom_colors$discrete) +
theme_bw() +
theme(
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()
)
ggsave('plots/silhouette_plot.png', p, height = 4, width = 8)
图片.png
Cluster tree
seurat <- BuildClusterTree(
seurat,
dims = 1:15,
reorder = FALSE,
reorder.numeric = FALSE
)
tree <- seurat@tools$BuildClusterTree
tree$tip.label <- paste0("Cluster ", tree$tip.label)
p <- ggtree::ggtree(tree, aes(x, y)) +
scale_y_reverse() +
ggtree::geom_tree() +
ggtree::theme_tree() +
ggtree::geom_tiplab(offset = 1) +
ggtree::geom_tippoint(color = custom_colors$discrete[1:length(tree$tip.label)], shape = 16, size = 5) +
coord_cartesian(clip = 'off') +
theme(plot.margin = unit(c(0,2.5,0,0), 'cm'))
ggsave('plots/cluster_tree.png', p, height = 4, width = 6)
图片.png
比例图
temp_labels <- seurat@meta.data %>%
group_by(sample) %>%
tally()
p1 <- table_samples_by_clusters %>%
select(-c('total_cell_count')) %>%
reshape2::melt(id.vars = 'sample') %>%
mutate(sample = factor(sample, levels = levels(seurat@meta.data$sample))) %>%
ggplot(aes(sample, value)) +
geom_bar(aes(fill = variable), position = 'fill', stat = 'identity') +
geom_text(
data = temp_labels,
aes(x = sample, y = Inf, label = paste0('n = ', format(n, big.mark = ',', trim = TRUE)), vjust = -1),
color = 'black', size = 2.8
) +
scale_fill_manual(name = 'Cluster', values = custom_colors$discrete) +
scale_y_continuous(name = 'Percentage [%]', labels = scales::percent_format(), expand = c(0.01,0)) +
coord_cartesian(clip = 'off') +
theme_bw() +
theme(
legend.position = 'left',
plot.title = element_text(hjust = 0.5),
text = element_text(size = 16),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.title.x = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
plot.margin = margin(t = 20, r = 0, b = 0, l = 0, unit = 'pt')
)
temp_labels <- seurat@meta.data %>%
group_by(seurat_clusters) %>%
tally() %>%
dplyr::rename('cluster' = seurat_clusters)
p2 <- table_clusters_by_samples %>%
select(-c('total_cell_count')) %>%
reshape2::melt(id.vars = 'cluster') %>%
mutate(cluster = factor(cluster, levels = levels(seurat@meta.data$seurat_clusters))) %>%
ggplot(aes(cluster, value)) +
geom_bar(aes(fill = variable), position = 'fill', stat = 'identity') +
geom_text(
data = temp_labels, aes(x = cluster, y = Inf, label = paste0('n = ', format(n, big.mark = ',', trim = TRUE)), vjust = -1),
color = 'black', size = 2.8
) +
scale_fill_manual(name = 'Sample', values = custom_colors$discrete) +
scale_y_continuous(name = 'Percentage [%]', labels = scales::percent_format(), expand = c(0.01,0)) +
coord_cartesian(clip = 'off') +
theme_bw() +
theme(
legend.position = 'right',
plot.title = element_text(hjust = 0.5),
text = element_text(size = 16),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.title = element_blank(),
plot.margin = margin(t = 20, r = 0, b = 0, l = 10, unit = 'pt')
)
ggsave(
'plots/composition_samples_clusters_by_percent.png',
p1 + p2 +
plot_layout(ncol = 2, widths = c(
seurat@meta.data$sample %>% unique() %>% length(),
seurat@meta.data$seurat_clusters %>% unique() %>% length()
)),
width = 18, height = 8
)
图片.png
temp_labels <- seurat@meta.data %>%
group_by(sample) %>%
tally()
p1 <- table_samples_by_cell_cycle %>%
select(-c('total_cell_count')) %>%
reshape2::melt(id.vars = 'sample') %>%
mutate(sample = factor(sample, levels = levels(seurat@meta.data$sample))) %>%
ggplot(aes(sample, value)) +
geom_bar(aes(fill = variable), position = 'fill', stat = 'identity') +
geom_text(
data = temp_labels,
aes(x = sample, y = Inf, label = paste0('n = ', format(n, big.mark = ',', trim = TRUE)), vjust = -1),
color = 'black', size = 2.8
) +
scale_fill_manual(name = 'Cell cycle', values = custom_colors$cell_cycle) +
scale_y_continuous(name = 'Percentage [%]', labels = scales::percent_format(), expand = c(0.01,0)) +
coord_cartesian(clip = 'off') +
theme_bw() +
theme(
legend.position = 'none',
plot.title = element_text(hjust = 0.5),
text = element_text(size = 16),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.title.x = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
plot.margin = margin(t = 20, r = 0, b = 0, l = 0, unit = 'pt')
)
temp_labels <- seurat@meta.data %>%
group_by(seurat_clusters) %>%
tally() %>%
dplyr::rename('cluster' = seurat_clusters)
p2 <- table_clusters_by_cell_cycle %>%
select(-c('total_cell_count')) %>%
reshape2::melt(id.vars = 'cluster') %>%
mutate(cluster = factor(cluster, levels = levels(seurat@meta.data$seurat_clusters))) %>%
ggplot(aes(cluster, value)) +
geom_bar(aes(fill = variable), position = 'fill', stat = 'identity') +
geom_text(
data = temp_labels,
aes(x = cluster, y = Inf, label = paste0('n = ', format(n, big.mark = ',', trim = TRUE)), vjust = -1),
color = 'black', size = 2.8
) +
scale_fill_manual(name = 'Cell cycle', values = custom_colors$cell_cycle) +
scale_y_continuous(name = 'Percentage [%]', labels = scales::percent_format(), expand = c(0.01,0)) +
coord_cartesian(clip = 'off') +
theme_bw() +
theme(
legend.position = 'right',
plot.title = element_text(hjust = 0.5),
text = element_text(size = 16),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.title = element_blank(),
plot.margin = margin(t = 20, r = 0, b = 0, l = 10, unit = 'pt')
)
ggsave(
'plots/composition_samples_clusters_by_cell_cycle_by_percent.png',
p1 + p2 +
plot_layout(ncol = 2, widths = c(
seurat@meta.data$sample %>% unique() %>% length(),
seurat@meta.data$seurat_clusters %>% unique() %>% length()
)),
width = 18, height = 8
)
图片.png
SNN graph
library(ggnetwork)
SCT_snn <- seurat@graphs$SCT_snn %>%
as.matrix() %>%
ggnetwork() %>%
left_join(seurat@meta.data %>% mutate(vertex.names = rownames(.)), by = 'vertex.names')
p1 <- ggplot(SCT_snn, aes(x = x, y = y, xend = xend, yend = yend)) +
geom_edges(color = 'grey50', alpha = 0.05) +
geom_nodes(aes(color = sample), size = 0.5) +
scale_color_manual(
name = 'Sample', values = custom_colors$discrete,
guide = guide_legend(ncol = 1, override.aes = list(size = 2))
) +
theme_blank() +
theme(legend.position = 'left') +
annotate(
geom = 'text', x = Inf, y = -Inf,
label = paste0('n = ', format(nrow(seurat@meta.data), big.mark = ',', trim = TRUE)),
vjust = -1.5, hjust = 1.25, color = 'black', size = 2.5
)
p2 <- ggplot(SCT_snn, aes(x = x, y = y, xend = xend, yend = yend)) +
geom_edges(color = 'grey50', alpha = 0.05) +
geom_nodes(aes(color = seurat_clusters), size = 0.5) +
scale_color_manual(
name = 'Cluster', values = custom_colors$discrete,
guide = guide_legend(ncol = 1, override.aes = list(size = 2))
) +
theme_blank() +
theme(legend.position = 'right') +
annotate(
geom = 'text', x = Inf, y = -Inf,
label = paste0('n = ', format(nrow(seurat@meta.data), big.mark = ',', trim = TRUE)),
vjust = -1.5, hjust = 1.25, color = 'black', size = 2.5
)
ggsave(
'plots/snn_graph_by_sample_cluster.png',
p1 + p2 + plot_layout(ncol = 2),
height = 5, width = 11
)
图片.png
UMAP图
plot_umap_by_nCount <- bind_cols(seurat@meta.data, as.data.frame(seurat@reductions$UMAP@cell.embeddings)) %>%
ggplot(aes(UMAP_1, UMAP_2, color = nCount_RNA)) +
geom_point(size = 0.2) +
theme_bw() +
scale_color_viridis(
guide = guide_colorbar(frame.colour = 'black', ticks.colour = 'black'),
labels = scales::comma,
) +
labs(color = 'Number of\ntranscripts') +
theme(legend.position = 'left') +
coord_fixed() +
annotate(
geom = 'text', x = Inf, y = -Inf,
label = paste0('n = ', format(nrow(seurat@meta.data), big.mark = ',', trim = TRUE)),
vjust = -1.5, hjust = 1.25, color = 'black', size = 2.5
)
plot_umap_by_sample <- bind_cols(seurat@meta.data, as.data.frame(seurat@reductions$UMAP@cell.embeddings)) %>%
ggplot(aes(UMAP_1, UMAP_2, color = sample)) +
geom_point(size = 0.2) +
theme_bw() +
scale_color_manual(values = custom_colors$discrete) +
labs(color = 'Sample') +
guides(colour = guide_legend(override.aes = list(size = 2))) +
theme(legend.position = 'right') +
coord_fixed() +
annotate(
geom = 'text', x = Inf, y = -Inf,
label = paste0('n = ', format(nrow(seurat@meta.data), big.mark = ',', trim = TRUE)),
vjust = -1.5, hjust = 1.25, color = 'black', size = 2.5
)
plot_umap_by_cluster <- bind_cols(seurat@meta.data, as.data.frame(seurat@reductions$UMAP@cell.embeddings)) %>%
ggplot(aes(UMAP_1, UMAP_2, color = seurat_clusters)) +
geom_point(size = 0.2) +
theme_bw() +
scale_color_manual(
name = 'Cluster', values = custom_colors$discrete,
guide = guide_legend(ncol = 2, override.aes = list(size = 2))
) +
theme(legend.position = 'left') +
coord_fixed() +
annotate(
geom = 'text', x = Inf, y = -Inf,
label = paste0('n = ', format(nrow(seurat@meta.data), big.mark = ',', trim = TRUE)),
vjust = -1.5, hjust = 1.25, color = 'black', size = 2.5
)
plot_umap_by_cell_cycle <- bind_cols(seurat@meta.data, as.data.frame(seurat@reductions$UMAP@cell.embeddings)) %>%
ggplot(aes(UMAP_1, UMAP_2, color = cell_cycle_seurat)) +
geom_point(size = 0.2) +
theme_bw() +
scale_color_manual(values = custom_colors$cell_cycle) +
labs(color = 'Cell cycle') +
guides(colour = guide_legend(override.aes = list(size = 2))) +
theme(legend.position = 'right') +
coord_fixed() +
annotate(
geom = 'text', x = Inf, y = -Inf,
label = paste0('n = ', format(nrow(seurat@meta.data), big.mark = ',', trim = TRUE)),
vjust = -1.5, hjust = 1.25, color = 'black', size = 2.5
)
ggsave(
'plots/umap.png',
plot_umap_by_nCount + plot_umap_by_sample +
plot_umap_by_cluster + plot_umap_by_cell_cycle +
plot_layout(ncol = 2),
height = 6,
width = 8.5
)
图片.png
Alluvial plots
## get sample and cluster names
samples <- levels(seurat@meta.data$sample)
clusters <- levels(seurat@meta.data$seurat_clusters)
## create named vector holding the color assignments for both samples and
## clusters
color_assignments <- setNames(
c(custom_colors$discrete[1:length(samples)], custom_colors$discrete[1:length(clusters)]),
c(samples,clusters)
)
## prepare data for the plot; factor() calls are necessary for the right order
## of columns (first samples then clusters) and boxes within each column (
## cluster 1, 2, 3, ..., not 1, 10, 11, ...)
data <- seurat@meta.data %>%
group_by(sample,seurat_clusters) %>%
tally() %>%
ungroup() %>%
gather_set_data(1:2) %>%
dplyr::mutate(
x = factor(x, levels = unique(x)),
y = factor(y, levels = unique(y))
)
DataFrame(data)
# DataFrame with 114 rows and 6 columns
# sample seurat_clusters n id x y
# <factor> <factor> <integer> <integer> <factor> <factor>
# 1 A 0 301 1 sample A
# 2 A 1 137 2 sample A
# 3 A 2 121 3 sample A
# 4 A 3 223 4 sample A
# 5 A 4 78 5 sample A
# ... ... ... ... ... ... ...
# 110 E 14 19 53 seurat_clusters 14
# 111 E 15 18 54 seurat_clusters 15
# 112 E 16 10 55 seurat_clusters 16
# 113 E 17 9 56 seurat_clusters 17
# 114 E 18 15 57 seurat_clusters 18
## create sample and cluster labels; hjust defines whether a label will be
## aligned to the right (1) or to the left (0); the nudge_x parameter is used
## to move the label outside of the boxes
data_labels <- tibble(
group = c(
rep('sample', length(samples)),
rep('seurat_clusters', length(clusters))
)
) %>%
mutate(
hjust = ifelse(group == 'sample', 1, 0),
nudge_x = ifelse(group == 'sample', -0.1, 0.1)
)
DataFrame(data_labels)
# DataFrame with 22 rows and 3 columns
# group hjust nudge_x
# <character> <numeric> <numeric>
# 1 sample 1 -0.1
# 2 sample 1 -0.1
# 3 sample 1 -0.1
# 4 seurat_clusters 0 0.1
# 5 seurat_clusters 0 0.1
# ... ... ... ...
# 18 seurat_clusters 0 0.1
# 19 seurat_clusters 0 0.1
# 20 seurat_clusters 0 0.1
# 21 seurat_clusters 0 0.1
# 22 seurat_clusters 0 0.1
## create plot
p1 <- ggplot(data, aes(x, id = id, split = y, value = n)) +
geom_parallel_sets(aes(fill = seurat_clusters), alpha = 0.75, axis.width = 0.15) +
geom_parallel_sets_axes(aes(fill = y), color = 'black', axis.width = 0.1) +
geom_text(
aes(y = n, split = y), stat = 'parallel_sets_axes', fontface = 'bold',
hjust = data_labels$hjust, nudge_x = data_labels$nudge_x
) +
scale_x_discrete(labels = c('Sample','Cluster')) +
scale_fill_manual(values = color_assignments) +
theme_bw() +
theme(
legend.position = 'none',
axis.title = element_blank(),
axis.text.x = element_text(face = 'bold', colour = 'black', size = 15),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank()
)
clusters <- levels(seurat@meta.data$seurat_clusters)
cell_types <- sort(unique(seurat@meta.data$cell_type_singler_blueprintencode_main))
color_assignments <- setNames(
c(custom_colors$discrete[1:length(clusters)], custom_colors$discrete[1:length(cell_types)]),
c(clusters,cell_types)
)
data <- seurat@meta.data %>%
dplyr::rename(cell_type = cell_type_singler_blueprintencode_main) %>%
dplyr::mutate(cell_type = factor(cell_type, levels = cell_types)) %>%
group_by(seurat_clusters, cell_type) %>%
tally() %>%
ungroup() %>%
gather_set_data(1:2) %>%
dplyr::mutate(
x = factor(x, levels = unique(x)),
y = factor(y, levels = c(clusters,cell_types))
)
data_labels <- tibble(
group = c(
rep('seurat_clusters', length(clusters)),
rep('cell_type', length(cell_types))
)
) %>%
mutate(
hjust = ifelse(group == 'seurat_clusters', 1, 0),
nudge_x = ifelse(group == 'seurat_clusters', -0.1, 0.1)
)
p2 <- ggplot(data, aes(x, id = id, split = y, value = n)) +
geom_parallel_sets(aes(fill = seurat_clusters), alpha = 0.75, axis.width = 0.15) +
geom_parallel_sets_axes(aes(fill = y), color = 'black', axis.width = 0.1) +
geom_text(
aes(y = n, split = y), stat = 'parallel_sets_axes', fontface = 'bold',
hjust = data_labels$hjust, nudge_x = data_labels$nudge_x
) +
scale_x_discrete(labels = c('Cluster','Cell type')) +
scale_fill_manual(values = color_assignments) +
theme_bw() +
theme(
legend.position = 'none',
axis.title = element_blank(),
axis.text.x = element_text(face = 'bold', colour = 'black', size = 15),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank()
)
ggsave(
'plots/samples_clusters_cell_types_alluvial.png',
p1 + p2 + plot_layout(ncol = 2),
height = 6, width = 8
)
图片.png
UMAP by cell type
UMAP_centers_cell_type <- tibble(
UMAP_1 = as.data.frame(seurat@reductions$UMAP@cell.embeddings)$UMAP_1,
UMAP_2 = as.data.frame(seurat@reductions$UMAP@cell.embeddings)$UMAP_2,
cell_type = seurat@meta.data$cell_type_singler_blueprintencode_main
) %>%
group_by(cell_type) %>%
summarize(x = median(UMAP_1), y = median(UMAP_2))
p <- bind_cols(seurat@meta.data, as.data.frame(seurat@reductions$UMAP@cell.embeddings)) %>%
ggplot(aes(UMAP_1, UMAP_2, color = cell_type_singler_blueprintencode_main)) +
geom_point(size = 0.2) +
geom_label(
data = UMAP_centers_cell_type,
mapping = aes(x, y, label = cell_type),
size = 3.5,
fill = 'white',
color = 'black',
fontface = 'bold',
alpha = 0.5,
label.size = 0,
show.legend = FALSE
) +
theme_bw() +
expand_limits(x = c(-22,15)) +
scale_color_manual(values = custom_colors$discrete) +
labs(color = 'Cell type') +
guides(colour = guide_legend(override.aes = list(size = 2))) +
theme(legend.position = 'right') +
coord_fixed() +
annotate(
geom = 'text', x = Inf, y = -Inf,
label = paste0('n = ', format(nrow(seurat@meta.data), big.mark = ',', trim = TRUE)),
vjust = -1.5, hjust = 1.25, color = 'black', size = 2.5
)
ggsave(
'plots/umap_by_cell_type_singler_blueprintencode_main.png',
p,
height = 4,
width = 6
)
图片.png
Dot plot (multiple genes)
# cells will be grouped by samples that they have been assigned to
group_ids <- unique(seurat@meta.data$cell_type_singler_blueprintencode_main)
# select a set of genes for which we want to show expression
genes_to_show <- seurat@misc$marker_genes$cerebro_seurat$cell_type_singler_blueprintencode_main %>%
group_by(cell_type_singler_blueprintencode_main) %>%
arrange(p_val_adj) %>%
filter(row_number() == 1) %>%
arrange(cell_type_singler_blueprintencode_main) %>%
pull(gene)
# for every sample-gene combination, calculate the average expression across
# all cells and then transform the data into a data frame
expression_levels_per_group <- vapply(
group_ids, FUN.VALUE = numeric(length(genes_to_show)), function(x) {
cells_in_current_group <- which(seurat@meta.data$cell_type_singler_blueprintencode_main == x)
Matrix::rowMeans(seurat@assays$SCT@data[genes_to_show,cells_in_current_group])
}
) %>%
t() %>%
as.data.frame() %>%
mutate(cell_type_singler_blueprintencode_main = rownames(.)) %>%
select(cell_type_singler_blueprintencode_main, everything()) %>%
pivot_longer(
cols = c(2:ncol(.)),
names_to = 'gene'
) %>%
dplyr::rename(expression = value) %>%
mutate(id_to_merge = paste0(cell_type_singler_blueprintencode_main, '_', gene))
# for every sample-gene combination, calculate the percentage of cells in the
# respective group that has at least 1 transcript (this means we consider it
# as expressing the gene) and then transform the data into a data frame
percentage_of_cells_expressing_gene <- vapply(
group_ids, FUN.VALUE = numeric(length(genes_to_show)), function(x) {
cells_in_current_group <- which(seurat@meta.data$cell_type_singler_blueprintencode_main == x)
Matrix::rowSums(seurat@assays$SCT@data[genes_to_show,cells_in_current_group] != 0)
}
) %>%
t() %>%
as.data.frame() %>%
mutate(cell_type_singler_blueprintencode_main = rownames(.)) %>%
select(cell_type_singler_blueprintencode_main, everything()) %>%
pivot_longer(
cols = c(2:ncol(.)),
names_to = 'gene'
) %>%
dplyr::rename(cell_count = value) %>%
left_join(
.,
seurat@meta.data %>%
group_by(cell_type_singler_blueprintencode_main) %>%
tally(),
by = 'cell_type_singler_blueprintencode_main') %>%
mutate(
id_to_merge = paste0(cell_type_singler_blueprintencode_main, '_', gene),
percent_cells = cell_count / n
)
# merge the two data frames created before and plot the data
p <- left_join(
expression_levels_per_group,
percentage_of_cells_expressing_gene %>% select(id_to_merge, percent_cells),
by = 'id_to_merge'
) %>%
mutate(
cell_type_singler_blueprintencode_main = factor(cell_type_singler_blueprintencode_main, levels = rev(group_ids)),
gene = factor(gene, levels = genes_to_show)
) %>%
arrange(gene) %>%
ggplot(aes(gene, cell_type_singler_blueprintencode_main)) +
geom_point(aes(color = expression, size = percent_cells)) +
scale_color_distiller(
palette = 'Reds',
direction = 1,
name = 'Log-normalised\nexpression',
guide = guide_colorbar(frame.colour = "black", ticks.colour = "black")
) +
scale_size(name = 'Percent\nof cells', labels = scales::percent) +
labs(y = 'Cell type', color = 'Expression') +
coord_fixed() +
theme_bw() +
theme(
axis.title = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1)
)
ggsave('plots/marker_genes_by_cell_type_as_dot_plot.png', p, height = 5, width = 6)
图片.png
group_ids <- levels(seurat@meta.data$seurat_clusters)
genes_to_show <- seurat@misc$marker_genes$cerebro_seurat$seurat_clusters %>%
group_by(seurat_clusters) %>%
arrange(p_val_adj) %>%
filter(row_number() == 1) %>%
arrange(seurat_clusters) %>%
pull(gene)
expression_levels_per_group <- vapply(
group_ids, FUN.VALUE = numeric(length(genes_to_show)), function(x) {
cells_in_current_group <- which(seurat@meta.data$seurat_clusters == x)
Matrix::rowMeans(seurat@assays$SCT@data[genes_to_show,cells_in_current_group])
}
) %>%
t() %>%
as.data.frame() %>%
mutate(cluster = rownames(.)) %>%
select(cluster, everything()) %>%
pivot_longer(
cols = c(2:ncol(.)),
names_to = 'gene'
) %>%
dplyr::rename(expression = value) %>%
mutate(id_to_merge = paste0(cluster, '_', gene))
percentage_of_cells_expressing_gene <- vapply(
group_ids, FUN.VALUE = numeric(length(genes_to_show)), function(x) {
cells_in_current_group <- which(seurat@meta.data$seurat_cluster == x)
Matrix::rowSums(seurat@assays$SCT@data[genes_to_show,cells_in_current_group] != 0)
}
) %>%
t() %>%
as.data.frame() %>%
mutate(cluster = rownames(.)) %>%
select(cluster, everything()) %>%
pivot_longer(
cols = c(2:ncol(.)),
names_to = 'gene'
) %>%
dplyr::rename(cell_count = value) %>%
left_join(
.,
seurat@meta.data %>%
group_by(seurat_clusters) %>%
tally() %>%
dplyr::rename(cluster = seurat_clusters),
by = 'cluster') %>%
mutate(
id_to_merge = paste0(cluster, '_', gene),
percent_cells = cell_count / n
)
p <- left_join(
expression_levels_per_group,
percentage_of_cells_expressing_gene %>% select(id_to_merge, percent_cells),
by = 'id_to_merge'
) %>%
mutate(
cluster = factor(cluster, levels = rev(group_ids)),
gene = factor(gene, levels = genes_to_show)
) %>%
arrange(gene) %>%
ggplot(aes(gene, cluster)) +
geom_point(aes(color = expression, size = percent_cells)) +
scale_color_distiller(
palette = 'Reds',
direction = 1,
name = 'Log-normalised\nexpression',
guide = guide_colorbar(frame.colour = "black", ticks.colour = "black")
) +
scale_size(name = 'Percent\nof cells', labels = scales::percent) +
labs(y = 'Cluster', color = 'Expression') +
coord_fixed() +
theme_bw() +
theme(
axis.title.x = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1)
)
ggsave('plots/marker_genes_by_cluster_as_dot_plot.png', p, height = 7, width = 8)
图片.png
Gene set enrichment analysis
# function to read GMT file
read_GMT_file <- function(file) {
gmt <- readr::read_delim(
file,
delim = ';',
col_names = c('X1'),
col_types = readr::cols()
)
gene_set_genes <- list()
for ( i in seq_len(nrow(gmt)) )
{
temp_genes <- strsplit(gmt$X1[i], split = '\t')[[1]] %>% unlist()
temp_genes <- temp_genes[3:length(temp_genes)]
gene_set_genes[[i]] <- temp_genes
}
gene_set_loaded <- list(
genesets = gene_set_genes,
geneset.names = lapply(strsplit(gmt$X1, split = '\t'), '[', 1) %>% unlist(),
geneset.description = lapply(
strsplit(gmt$X1, split = '\t'), '[', 2
) %>% unlist()
)
return(gene_set_loaded)
}
# load gene sets from GMT file
gene_sets <- read_GMT_file('h.all.v7.2.symbols.gmt')
# set gene set names
names(gene_sets$genesets) <- gene_sets$geneset.names
# get indices of cells which are either HSC or monocytes
cells_to_analyze <- seurat@meta.data %>%
mutate(row_number = row_number()) %>%
filter(grepl(cell_type_singler_blueprintencode_main, pattern = 'HSC|Monocytes')) %>%
arrange(cell_type_singler_blueprintencode_main) %>%
pull(row_number)
# get list of genes unique genes across all gene sets
genes_to_analyze <- gene_sets$genesets %>% unlist() %>% unique()
# filter gene list for those which are present in the data set
genes_to_analyze <- genes_to_analyze[which(genes_to_analyze %in% rownames(seurat@assays$SCT@counts))]
# get expression matrix and reduce it to cells and genes of interest
expression_matrix <- seurat@assays$SCT@counts[ genes_to_analyze , cells_to_analyze] %>% as.matrix()
# perform GSVA
gsva <- GSVA::gsva(
expression_matrix,
gset.idx.list = gene_sets$genesets,
parallel.sz = 1
)
# load limma library for statistical testing
library(limma)
# generate design matrix
design_matrix <- tibble(
control = 1,
test = c(
rep(0, seurat@meta.data %>% filter(cell_type_singler_blueprintencode_main == 'HSC') %>% nrow()),
rep(1, seurat@meta.data %>% filter(cell_type_singler_blueprintencode_main == 'Monocytes') %>% nrow())
)
)
head(design_matrix)
# A tibble: 6 x 2
# control test
# <dbl> <dbl>
# 1 1 0
# 2 1 0
# 3 1 0
# 4 1 0
# 5 1 0
# 6 1 0
# fit linear model, followed by empirical Bayes statistics for differential
# enrichment analysis
fit <- lmFit(gsva, design_matrix)
fit <- eBayes(fit)
# prepare data for plotting
data <- topTable(fit, coef = 'test', number = 50) %>%
mutate(gene_set = rownames(fit$t)) %>%
arrange(t) %>%
mutate(
gene_set = factor(gene_set, levels = gene_set),
just = ifelse(t < 0, 0, 1),
nudge_y = ifelse(t < 0, 1, -1),
color = ifelse(t < -5 | t > 5, 'black', 'grey')
)
DataFrame(data)
# DataFrame with 50 rows and 10 columns
# logFC AveExpr t P.Value adj.P.Val B gene_set just nudge_y color
# <numeric> <numeric> <numeric> <numeric> <numeric> <numeric> <factor> <numeric> <numeric> <character>
# 1 -0.369443 -0.1569690 -35.1619 1.26066e-186 1.05055e-185 415.980 HALLMARK_TGF_BETA_SIGNALING 0 1 black
# 2 -0.251413 -0.0820012 -27.2261 1.96152e-127 8.91598e-127 279.761 HALLMARK_NOTCH_SIGNALING 0 1 black
# 3 -0.288169 -0.1202293 -26.1464 1.40712e-119 5.41201e-119 261.689 HALLMARK_ESTROGEN_RESPONSE_EARLY 0 1 black
# 4 -0.135034 -0.1511146 -22.3565 8.50194e-93 2.36165e-92 200.094 HALLMARK_INTERFERON_ALPHA_RESPONSE 0 1 black
# 5 -0.203651 -0.1049498 -22.0728 7.44006e-91 1.95791e-90 195.628 HALLMARK_INTERFERON_GAMMA_RESPONSE 0 1 black
# ... ... ... ... ... ... ... ... ... ... ...
# 46 0.200259 0.25341594 35.9250 2.31901e-192 2.31901e-191 429.182 HALLMARK_WNT_BETA_CATENIN_SIGNALING 1 -1 black
# 47 0.293113 -0.08115610 36.1995 2.00784e-194 2.50980e-193 433.929 HALLMARK_MITOTIC_SPINDLE 1 -1 black
# 48 0.338449 0.11583774 36.2560 7.56196e-195 1.26033e-193 434.906 HALLMARK_CHOLESTEROL_HOMEOSTASIS 1 -1 black
# 49 0.251678 0.00262136 37.5117 2.86772e-204 7.16930e-203 456.592 HALLMARK_HYPOXIA 1 -1 black
# 50 0.282907 -0.05021404 48.5971 1.72523e-285 8.62616e-284 643.571 HALLMARK_TNFA_SIGNALING_VIA_NFKB 1 -1 black
# plot t-value
p <- ggplot(data = data, aes(x = gene_set, y = t, fill = t)) +
geom_col() +
geom_hline(yintercept = c(-5,5), linetype = 'dashed', color = 'grey80') +
geom_text(
aes(
x = gene_set,
y = 0,
label = gene_set,
hjust = just,
color = color
),
nudge_y = data$nudge_y, size = 3
) +
scale_x_discrete(name = '', labels = NULL) +
scale_y_continuous(name = 't-value', limits = c(-55,55)) +
scale_fill_distiller(palette = 'Spectral', limits = c(-max(data$t), max(data$t))) +
scale_color_manual(values = c('black' = 'black', 'grey' = 'grey')) +
coord_flip() +
theme_bw() +
theme(
panel.grid = element_blank(),
axis.ticks.y = element_blank(),
legend.position = 'none'
)
ggsave('plots/gsva_hallmark_gene_sets_monocytes_vs_hsc.png', height = 7.5, width = 7.5)
图片.png
通讯热图
library(pheatmap)
library(argparse)
heatmap <- function (Matrix,labels){
plot_heatmap <- pheatmap(Matrix,scale='none',cluster_cols = F,cluster_rows = F,cellwidth = 20,cellheight = 20,color = colorRampPalette(colors = c("white","red"))(100),display_numbers=labels)
return(plot_heatmap)
}
getSig <- function(dc) {
if(dc > pvalue){sc <- ""}
if(dc < pvalue){sc <- "*"}}
parser = ArgumentParser()
parser$add_argument("--LR_score", help="the file of sample.LR.score.xls",required =T)
parser$add_argument("--pvalue", help="the file of sample.LR.score.xls",default = '0.1')
parser$add_argument("--outdir", help="the outdir",default='.')
args <- parser$parse_args()
str(args)
LR_score = args$LR_score
pvalue = args$pvalue
outdir = args$outdir
pvalue <- as.numeric(pvalue)
if(!dir.exists(outdir)){
dir.create(outdir)
}
setwd(outdir)
file <- read.table(LR_score,sep='\t',header=T,row.names=1,check.names=F)
len <- as.numeric(length(colnames(file)))
Max <- max(as.numeric(sapply(strsplit(colnames(file),'_'),"[",2)[1:len/2]))
for (i in 1:Max){
index_score <- as.array(which(sapply(strsplit(colnames(file),'_'),"[",1) == paste0('cluster',as.character(i))))
index_pvalue <- as.array(which(sapply(strsplit(colnames(file),'_'),"[",2) == paste0('pvalue',as.character(i))))
index <- c(index_score,index_pvalue)
Matrix <- file[,index]
LR <- rownames(as.matrix(sort(apply(Matrix,1,sum),decreasing=T)[1:30]))
Matrix_LR <- Matrix[LR,]
label <- as.matrix(sapply(Matrix_LR[,as.numeric(Max+1):as.numeric(length(colnames(Matrix_LR)))],function(x){ifelse(x>pvalue,sc<-"",sc<-"*")}),ncol=Max)
pdf(paste0('cluster',i,'_others_LR.pdf'),width=6,height=14)
heatmap(Matrix_LR[,1:Max],label)
dev.off()
png(paste0('cluster',i,'_others_LR.png'),width=6*200,height=14*200,res=200,type = 'cario-png')
dev.off()
}
图片.png
生活很好,有你更好