单细胞空间数据分析之velocity

2024-05-16 本文已影响0人单细胞空间交响乐

作者，Evil Genius

今天我们要完成另外一项工程，单细胞和空间的velocity，拿到如下的结果

单细胞

空间

在生物的世界里，velocyto衡量恶性细胞的进化方向非常常见。

这也是轨迹分析和CNV分析相结合的分析点。

调试了半天，大晚上的，夜景远没有上海漂亮~~~

现在真的是两极分化，要么从源头上写代码分析数据，要么就用别人现成的内容，可调整的空间大大缩小。

from IPython.display import display, HTML, Image
display(HTML("<style>.container { width:95% !important; }</style>"))

%load_ext autoreload
%autoreload 2

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import matplotlib.cbook
warnings.simplefilter(action='ignore', category=matplotlib.cbook.MatplotlibDeprecationWarning)

import os
import numpy as np
import pandas as pd
import scanpy as sc
from scipy.sparse import csr_matrix
import scvelo as scv
import matplotlib.pyplot as plt

import sys
sys.path.append("../../lib")

from stpalette import palette_WM4007_rna, palette_WM4237_rna
from pathways import genesDicts
from plots import plotSpatialAll
from plots import plotPhiHeatmap, plotFiForAd
from utils import computFullGridVelocity, getGridPhi, getGridVelocityForSourceSink, getFiOfEmb, loadAddLayersVelocytoData

需要加载的stpalette、pathways、plots、utils放在了百度网盘，接受服务，我也学一学别人接一下分析项目。

model = 'WM4007'

palette_rna = palette_WM4007_rna if model=='WM4007' else palette_WM4237_rna

initialLibrarySizes = {'WM4007_T0_S1_ST': 30967.0,
                    'WM4007_T0_S2_ST': 31210.0,
                    'WM4007_T1_S1_ST': 27910.0,
                    'WM4007_T1_S2_ST': 28700.0,
                    'WM4007_T2_S1_ST': 20318.0,
                    'WM4007_T2_S2_ST': 14094.0,
                    'WM4007_T3_S1_ST': 25289.0,
                    'WM4007_T3_S2_ST': 24304.0,
                    'WM4007_T4_S1_ST': 26625.0,
                    'WM4007_T4_S2_ST': 30744.0,
                    'WM4007_TC_S1_ST': 26802.0,
                    'WM4007_TC_S2_ST': 37152.0}

dataPath = '../../data/'
preprocessedStDataPath = 'd:/from HPCC 11 28 2022/results_NF1-nod-t2t-k35/%s/' % model
preprocessedVelDataPath = 'd:/from HPCC 11 28 2022/results_NF3-nod-t2t-k35/%s/' % model

ids = sorted(np.loadtxt(dataPath + 'ids_%s_ST.txt' % model, dtype=str))
print(ids, '\n')

ids = pd.Series(ids)
cond1 = ids.str.contains('E_S')
cond2 = ids.str.contains('0_S')
cond3 = ids.str.contains('C_S')
#ids = ids[cond1].values.tolist() + ids[cond2].values.tolist() + ids[cond3].values.tolist() + ids[~cond1 & ~cond2 & ~cond3].values.tolist()
ids = ids[cond2].values.tolist() + ids[cond3].values.tolist() + ids[~cond1 & ~cond2 & ~cond3].values.tolist()
# ids = ids.values.tolist()
print(ids)

加载数据

ad_all = sc.read(dataPath + 'ad_all_human_clustered_st_%s.h5ad' % model)
ad_all = ad_all[ad_all.obs['human_ratio'] > 0.25]
ad_all.shape

sc.pl.umap(ad_all, color='cluster', palette=palette_rna)

ads = {id: ad_all[ad_all.obs['sample']==id, :] for id in ids}

images = {id: sc.read(preprocessedStDataPath + '%s/st_adata_processed.h5ad' % id).uns['spatial'] for id in ids}

for sample in ids[:]:
    ads[sample] = ad_all[ad_all.obs['sample']==sample, :].copy()    
    ads[sample].uns['spatial'] = images[sample]

plotSpatialAll(ads, identity='cluster', palette=palette_rna, f=0.65)

loadAddLayersVelocytoData(ids=ids[:], ads=ads, dataPath=preprocessedVelDataPath, identity='cluster', initialLibrarySizes=initialLibrarySizes)

def getLayersAll():   
    df_var = pd.concat([ads[id].var for id in ids])
    ad_all_temp = sc.concat({id:ads[id] for id in ids[:]}, label='sample', join='outer')
    ad_all_temp.var = df_var.loc[~df_var.index.duplicated()].reindex(ad_all_temp.var.index)
    ad_all_temp = ad_all_temp[:, ad_all_temp.var['is_human_gene']]
    return ad_all_temp.layers['spliced'], ad_all_temp.layers['unspliced'], ad_all_temp.obs.index, ad_all_temp.var.index

layer_spliced, layer_unspliced, spots, genes = getLayersAll()

ad_all_temp = ad_all[spots, genes].copy()
ad_all_temp.layers['spliced'] = layer_spliced
ad_all_temp.layers['unspliced'] = layer_unspliced
del layer_spliced, layer_unspliced

Calculate velocity for the entire model

scv.tl.velocity_graph(ad_all_temp, basis='umap')

plt.rcParams["figure.figsize"] = (10,10)
identity = 'cluster'
scv.pl.velocity_embedding_stream(ad_all_temp, basis='X_umap', arrow_size=1.2, color=identity, palette=palette_rna, title='', size=75, alpha=0.75, legend_loc='none')

plt.rcParams["figure.figsize"] = (5,5)
plotFiForAd(ad_all_temp, size=20, identity='cluster', palette=palette_rna)

plt.rcParams["figure.figsize"] = (7,7)
scv.pl.velocity(ad_all_temp, ['CREB3', 'DCT', 'RAF1', 'TYRP1'], ncols=2)

plt.rcParams["figure.figsize"] = (4,4)
scv.pl.scatter(ad_all_temp, 'CDKN2A', color=['cluster', 'velocity'], size=5, palette=palette_rna, wspace=0.35, add_outline='4')

# This step needs plenty of memory
scv.tl.rank_velocity_genes(ad_all_temp, groupby='cluster', min_corr=0.3)

df = scv.DataFrame(ad_all_temp.uns['rank_velocity_genes']['names'])

kwargs = dict()
scv.pl.scatter(ad_all_temp, df['4'][:5], ylabel='4', frameon=False, size=5, linewidth=1.5, add_outline='4')

scv.tl.velocity_confidence(ad_all_temp)

keys = 'velocity_length', 'velocity_confidence'
scv.pl.scatter(ad_all_temp, c=keys, cmap='coolwarm', perc=[5, 95], title=['Differentiation rate', 'Coherence'])

df = ad_all_temp.obs.groupby('cluster')[keys].mean().T
df.style.background_gradient(cmap='coolwarm', axis=1)

scv.tl.paga(ad_all_temp, groups='cluster')

scv.pl.paga(ad_all_temp, basis='umap', size=50, alpha=0.1, min_edge_width=2, node_size_scale=1.5)

Calculate velocity on UMAP per timepoint

ad_all_temp_T = ad_all_temp[ad_all_temp.obs['T'].isin(['TC']), :]
sc.pp.neighbors(ad_all_temp_T, use_rep='X_pca_harmony')
sc.tl.umap(ad_all_temp_T)
scv.tl.velocity(ad_all_temp_T, mode='stochastic')
scv.tl.velocity_graph(ad_all_temp_T, n_jobs=4, basis='umap')
plt.rcParams["figure.figsize"] = (5, 5)
scv.pl.velocity_embedding_stream(ad_all_temp_T, basis='X_umap', arrow_size=1.2, color=identity, palette=palette_rna, title='', size=75, alpha=0.75, legend_loc='none')
plotFiForAd(ad_all_temp_T)



f = 1.0
ncols = 4
nrows = 3
fig, axs = plt.subplots(nrows, ncols, figsize=(f*4.5*ncols, f*4.5*nrows))
for isample, sample in enumerate(ids):
    try:
        ax = axs[int((isample - isample%ncols)/nrows), isample%ncols]
        identity = 'cluster'
        ad = ads[sample]

        scv.pl.velocity_embedding_stream(ad, basis='X_spatial', arrow_size=0.6, color=identity, palette=palette_rna, 
                                         title=sample, size=75, alpha=0.75, show=False, ax=ax, legend_loc='none') # 250 for 12x12

        (xmin, xmax), (ymin, ymax) = ax.get_xlim(), ax.get_ylim()
        sf = ads[sample].uns['spatial']['library_id']['scalefactors']['tissue_lowres_scalef']
        image = ad.uns['spatial']['library_id']['images']['lowres'].copy()
        ax.imshow(image, interpolation='none', extent=[0, image.shape[1]/sf, -20/sf-image.shape[1]/sf, 0], zorder=-1)

        ax.legend(frameon=False, loc='upper left', fontsize=10, markerscale=1.5)
    except Exception as exception:
        print(exception)

plt.subplots_adjust(wspace=0.15, hspace=0.2)
# fig.savefig('spatial rna velocity.png', dpi=450)

for id in ids:
    ad = ads[id]

    try:
        X_grid_full, V_grid_full, X_emb = computFullGridVelocity(ad, emb='spatial')
        gridPhi = getGridPhi(X_grid_full, V_grid_full, R=2)
        plotPhiHeatmap(gridPhi, X_grid_full, emb='spatial');

        if True:
            fig, axs = plt.subplots(1, 2, figsize=(10, 5))

            params = dict(density=1, arrow_size=100, arrow_length=2000, color=identity, palette=palette_rna, size=45, alpha=0.75) 

            ax = axs[0]
            X_source, V_source = getGridVelocityForSourceSink(X_grid_full, V_grid_full, gridPhi, isSource=True)
            scv.pl.velocity_embedding_grid(ad, basis='X_spatial', X_grid=X_source, V_grid=V_source, legend_loc='none', title='Source', ax=axs[0], show=False, **params)

            (xmin, xmax), (ymin, ymax) = ax.get_xlim(), ax.get_ylim()
            sf = ads[sample].uns['spatial']['library_id']['scalefactors']['tissue_lowres_scalef']
            image = ad.uns['spatial']['library_id']['images']['lowres'].copy()
            ax.imshow(image, interpolation='none', extent=[0, image.shape[1]/sf, -20/sf-image.shape[1]/sf, 0], zorder=-1)
            ax.legend(frameon=False, loc='upper left', fontsize=10, markerscale=1.5)

            ax = axs[1]
            X_sink, V_sink = getGridVelocityForSourceSink(X_grid_full, V_grid_full, gridPhi, isSource=False)
            scv.pl.velocity_embedding_grid(ad, basis='X_spatial', X_grid=X_sink, V_grid=V_sink, legend_loc='none', title='Sink', ax=axs[1], show=False, **params)

            (xmin, xmax), (ymin, ymax) = ax.get_xlim(), ax.get_ylim()
            sf = ads[sample].uns['spatial']['library_id']['scalefactors']['tissue_lowres_scalef']
            image = ad.uns['spatial']['library_id']['images']['lowres'].copy()
            ax.imshow(image, interpolation='none', extent=[0, image.shape[1]/sf, -20/sf-image.shape[1]/sf, 0], zorder=-1)
            ax.legend(frameon=False, loc='upper left', fontsize=10, markerscale=1.5)

        Fi_emb = getFiOfEmb(X_grid_full, gridPhi, X_emb)
        ad.obs['Fi_spatial'] = Fi_emb
        sc.pl.spatial(ad, color='Fi_spatial', cmap='bwr', vmin=-1, vmax=1)
        sc.pl.violin(ad, keys='Fi_spatial', groupby='cluster')
    except Exception as exception:
        print(exception)

单细胞空间数据分析之velocity

作者，Evil Genius

今天我们要完成另外一项工程，单细胞和空间的velocity，拿到如下的结果

在生物的世界里，velocyto衡量恶性细胞的进化方向非常常见。

这也是轨迹分析和CNV分析相结合的分析点。

调试了半天，大晚上的，夜景远没有上海漂亮~~~

现在真的是两极分化，要么从源头上写代码分析数据，要么就用别人现成的内容，可调整的空间大大缩小。

需要加载的stpalette、pathways、plots、utils放在了百度网盘，接受服务，我也学一学别人接一下分析项目。

加载数据

Calculate velocity for the entire model

Calculate velocity on UMAP per timepoint

还有下面这样的

生活很好，有你更好

猜你喜欢

热点阅读