单细胞空间数据分析之velocity
2024-05-16 本文已影响0人
单细胞空间交响乐
作者,Evil Genius
今天我们要完成另外一项工程,单细胞和空间的velocity,拿到如下的结果
单细胞 空间在生物的世界里,velocyto衡量恶性细胞的进化方向非常常见。
这也是轨迹分析和CNV分析相结合的分析点。
调试了半天,大晚上的,夜景远没有上海漂亮~~~
现在真的是两极分化,要么从源头上写代码分析数据,要么就用别人现成的内容,可调整的空间大大缩小。
from IPython.display import display, HTML, Image
display(HTML("<style>.container { width:95% !important; }</style>"))
%load_ext autoreload
%autoreload 2
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import matplotlib.cbook
warnings.simplefilter(action='ignore', category=matplotlib.cbook.MatplotlibDeprecationWarning)
import os
import numpy as np
import pandas as pd
import scanpy as sc
from scipy.sparse import csr_matrix
import scvelo as scv
import matplotlib.pyplot as plt
import sys
sys.path.append("../../lib")
from stpalette import palette_WM4007_rna, palette_WM4237_rna
from pathways import genesDicts
from plots import plotSpatialAll
from plots import plotPhiHeatmap, plotFiForAd
from utils import computFullGridVelocity, getGridPhi, getGridVelocityForSourceSink, getFiOfEmb, loadAddLayersVelocytoData
需要加载的stpalette、pathways、plots、utils放在了百度网盘,接受服务,我也学一学别人接一下分析项目。
model = 'WM4007'
palette_rna = palette_WM4007_rna if model=='WM4007' else palette_WM4237_rna
initialLibrarySizes = {'WM4007_T0_S1_ST': 30967.0,
'WM4007_T0_S2_ST': 31210.0,
'WM4007_T1_S1_ST': 27910.0,
'WM4007_T1_S2_ST': 28700.0,
'WM4007_T2_S1_ST': 20318.0,
'WM4007_T2_S2_ST': 14094.0,
'WM4007_T3_S1_ST': 25289.0,
'WM4007_T3_S2_ST': 24304.0,
'WM4007_T4_S1_ST': 26625.0,
'WM4007_T4_S2_ST': 30744.0,
'WM4007_TC_S1_ST': 26802.0,
'WM4007_TC_S2_ST': 37152.0}
dataPath = '../../data/'
preprocessedStDataPath = 'd:/from HPCC 11 28 2022/results_NF1-nod-t2t-k35/%s/' % model
preprocessedVelDataPath = 'd:/from HPCC 11 28 2022/results_NF3-nod-t2t-k35/%s/' % model
ids = sorted(np.loadtxt(dataPath + 'ids_%s_ST.txt' % model, dtype=str))
print(ids, '\n')
ids = pd.Series(ids)
cond1 = ids.str.contains('E_S')
cond2 = ids.str.contains('0_S')
cond3 = ids.str.contains('C_S')
#ids = ids[cond1].values.tolist() + ids[cond2].values.tolist() + ids[cond3].values.tolist() + ids[~cond1 & ~cond2 & ~cond3].values.tolist()
ids = ids[cond2].values.tolist() + ids[cond3].values.tolist() + ids[~cond1 & ~cond2 & ~cond3].values.tolist()
# ids = ids.values.tolist()
print(ids)
加载数据
ad_all = sc.read(dataPath + 'ad_all_human_clustered_st_%s.h5ad' % model)
ad_all = ad_all[ad_all.obs['human_ratio'] > 0.25]
ad_all.shape
sc.pl.umap(ad_all, color='cluster', palette=palette_rna)
ads = {id: ad_all[ad_all.obs['sample']==id, :] for id in ids}
images = {id: sc.read(preprocessedStDataPath + '%s/st_adata_processed.h5ad' % id).uns['spatial'] for id in ids}
for sample in ids[:]:
ads[sample] = ad_all[ad_all.obs['sample']==sample, :].copy()
ads[sample].uns['spatial'] = images[sample]
plotSpatialAll(ads, identity='cluster', palette=palette_rna, f=0.65)
loadAddLayersVelocytoData(ids=ids[:], ads=ads, dataPath=preprocessedVelDataPath, identity='cluster', initialLibrarySizes=initialLibrarySizes)
def getLayersAll():
df_var = pd.concat([ads[id].var for id in ids])
ad_all_temp = sc.concat({id:ads[id] for id in ids[:]}, label='sample', join='outer')
ad_all_temp.var = df_var.loc[~df_var.index.duplicated()].reindex(ad_all_temp.var.index)
ad_all_temp = ad_all_temp[:, ad_all_temp.var['is_human_gene']]
return ad_all_temp.layers['spliced'], ad_all_temp.layers['unspliced'], ad_all_temp.obs.index, ad_all_temp.var.index
layer_spliced, layer_unspliced, spots, genes = getLayersAll()
ad_all_temp = ad_all[spots, genes].copy()
ad_all_temp.layers['spliced'] = layer_spliced
ad_all_temp.layers['unspliced'] = layer_unspliced
del layer_spliced, layer_unspliced
Calculate velocity for the entire model
scv.tl.velocity_graph(ad_all_temp, basis='umap')
plt.rcParams["figure.figsize"] = (10,10)
identity = 'cluster'
scv.pl.velocity_embedding_stream(ad_all_temp, basis='X_umap', arrow_size=1.2, color=identity, palette=palette_rna, title='', size=75, alpha=0.75, legend_loc='none')
plt.rcParams["figure.figsize"] = (5,5)
plotFiForAd(ad_all_temp, size=20, identity='cluster', palette=palette_rna)
plt.rcParams["figure.figsize"] = (7,7)
scv.pl.velocity(ad_all_temp, ['CREB3', 'DCT', 'RAF1', 'TYRP1'], ncols=2)
plt.rcParams["figure.figsize"] = (4,4)
scv.pl.scatter(ad_all_temp, 'CDKN2A', color=['cluster', 'velocity'], size=5, palette=palette_rna, wspace=0.35, add_outline='4')
# This step needs plenty of memory
scv.tl.rank_velocity_genes(ad_all_temp, groupby='cluster', min_corr=0.3)
df = scv.DataFrame(ad_all_temp.uns['rank_velocity_genes']['names'])
kwargs = dict()
scv.pl.scatter(ad_all_temp, df['4'][:5], ylabel='4', frameon=False, size=5, linewidth=1.5, add_outline='4')
scv.tl.velocity_confidence(ad_all_temp)
keys = 'velocity_length', 'velocity_confidence'
scv.pl.scatter(ad_all_temp, c=keys, cmap='coolwarm', perc=[5, 95], title=['Differentiation rate', 'Coherence'])
df = ad_all_temp.obs.groupby('cluster')[keys].mean().T
df.style.background_gradient(cmap='coolwarm', axis=1)
scv.tl.paga(ad_all_temp, groups='cluster')
scv.pl.paga(ad_all_temp, basis='umap', size=50, alpha=0.1, min_edge_width=2, node_size_scale=1.5)
Calculate velocity on UMAP per timepoint
ad_all_temp_T = ad_all_temp[ad_all_temp.obs['T'].isin(['TC']), :]
sc.pp.neighbors(ad_all_temp_T, use_rep='X_pca_harmony')
sc.tl.umap(ad_all_temp_T)
scv.tl.velocity(ad_all_temp_T, mode='stochastic')
scv.tl.velocity_graph(ad_all_temp_T, n_jobs=4, basis='umap')
plt.rcParams["figure.figsize"] = (5, 5)
scv.pl.velocity_embedding_stream(ad_all_temp_T, basis='X_umap', arrow_size=1.2, color=identity, palette=palette_rna, title='', size=75, alpha=0.75, legend_loc='none')
plotFiForAd(ad_all_temp_T)
f = 1.0
ncols = 4
nrows = 3
fig, axs = plt.subplots(nrows, ncols, figsize=(f*4.5*ncols, f*4.5*nrows))
for isample, sample in enumerate(ids):
try:
ax = axs[int((isample - isample%ncols)/nrows), isample%ncols]
identity = 'cluster'
ad = ads[sample]
scv.pl.velocity_embedding_stream(ad, basis='X_spatial', arrow_size=0.6, color=identity, palette=palette_rna,
title=sample, size=75, alpha=0.75, show=False, ax=ax, legend_loc='none') # 250 for 12x12
(xmin, xmax), (ymin, ymax) = ax.get_xlim(), ax.get_ylim()
sf = ads[sample].uns['spatial']['library_id']['scalefactors']['tissue_lowres_scalef']
image = ad.uns['spatial']['library_id']['images']['lowres'].copy()
ax.imshow(image, interpolation='none', extent=[0, image.shape[1]/sf, -20/sf-image.shape[1]/sf, 0], zorder=-1)
ax.legend(frameon=False, loc='upper left', fontsize=10, markerscale=1.5)
except Exception as exception:
print(exception)
plt.subplots_adjust(wspace=0.15, hspace=0.2)
# fig.savefig('spatial rna velocity.png', dpi=450)
for id in ids:
ad = ads[id]
try:
X_grid_full, V_grid_full, X_emb = computFullGridVelocity(ad, emb='spatial')
gridPhi = getGridPhi(X_grid_full, V_grid_full, R=2)
plotPhiHeatmap(gridPhi, X_grid_full, emb='spatial');
if True:
fig, axs = plt.subplots(1, 2, figsize=(10, 5))
params = dict(density=1, arrow_size=100, arrow_length=2000, color=identity, palette=palette_rna, size=45, alpha=0.75)
ax = axs[0]
X_source, V_source = getGridVelocityForSourceSink(X_grid_full, V_grid_full, gridPhi, isSource=True)
scv.pl.velocity_embedding_grid(ad, basis='X_spatial', X_grid=X_source, V_grid=V_source, legend_loc='none', title='Source', ax=axs[0], show=False, **params)
(xmin, xmax), (ymin, ymax) = ax.get_xlim(), ax.get_ylim()
sf = ads[sample].uns['spatial']['library_id']['scalefactors']['tissue_lowres_scalef']
image = ad.uns['spatial']['library_id']['images']['lowres'].copy()
ax.imshow(image, interpolation='none', extent=[0, image.shape[1]/sf, -20/sf-image.shape[1]/sf, 0], zorder=-1)
ax.legend(frameon=False, loc='upper left', fontsize=10, markerscale=1.5)
ax = axs[1]
X_sink, V_sink = getGridVelocityForSourceSink(X_grid_full, V_grid_full, gridPhi, isSource=False)
scv.pl.velocity_embedding_grid(ad, basis='X_spatial', X_grid=X_sink, V_grid=V_sink, legend_loc='none', title='Sink', ax=axs[1], show=False, **params)
(xmin, xmax), (ymin, ymax) = ax.get_xlim(), ax.get_ylim()
sf = ads[sample].uns['spatial']['library_id']['scalefactors']['tissue_lowres_scalef']
image = ad.uns['spatial']['library_id']['images']['lowres'].copy()
ax.imshow(image, interpolation='none', extent=[0, image.shape[1]/sf, -20/sf-image.shape[1]/sf, 0], zorder=-1)
ax.legend(frameon=False, loc='upper left', fontsize=10, markerscale=1.5)
Fi_emb = getFiOfEmb(X_grid_full, gridPhi, X_emb)
ad.obs['Fi_spatial'] = Fi_emb
sc.pl.spatial(ad, color='Fi_spatial', cmap='bwr', vmin=-1, vmax=1)
sc.pl.violin(ad, keys='Fi_spatial', groupby='cluster')
except Exception as exception:
print(exception)