scVelo计算RNA速率
2020-11-25 本文已影响0人
生信编程日常
一.导入需要的文件
需要的文件:由R里面对Seurat对象的数据导出的
1.velocyto pipeline 跑出来的loom文件
2.细胞名字文件
3.细胞属于的类群
4.细胞坐标文件
#RNA剪切信息的loom文件
sample_one = anndata.read_loom("NC.loom")
#包含细胞名的data.frame
sample_obs = pd.read_csv("NC_cellID_obs.csv")
#包含细胞类群注释信息的数据框
cell_clusters = pd.read_csv("/NC_clusters.csv")
二.更改文件细胞名字,使其一致:
1.对于loom文件
sample_one.obs

#去除NC和x只留barcode名字
sample_one.obs=sample_one.obs.rename(index = lambda x: x.replace('NC:', ''))
sample_one.obs=sample_one.obs.rename(index = lambda x: x.replace('x', ''))
sample_one.obs.head()

2.对于细胞名
sample_obs.head()

#去除barcode的_1
sample_obs.x=sample_obs.x.replace({"_1":""},regex=True)
sample_obs.head()

3.umap坐标文件的细胞名字更改
umap.head()

umap[["Unnamed: 0"]]=umap[["Unnamed: 0"]].replace({"_1":""},regex=True)
umap = umap.rename(columns = {"Unnamed: 0":'Cell ID'})
umap.head()

4.对于细胞类群文件
cell_clusters.head()

cell_clusters[["Unnamed: 0"]]=cell_clusters[["Unnamed: 0"]].replace({"_1":""},regex=True)
cell_clusters.head()

三.对细胞进行过滤并排序
#对细胞文件和RNA剪切速率文件取交集
sample_one = sample_one[np.isin(sample_one.obs.index,sample_obs["x"])]
sample_one.obs.head()
#对UMAP坐标文件取交集
sample_one_index = pd.DataFrame(sample_one.obs.index)
sample_one_index = sample_one_index.rename(columns = {0:'Cell ID'})
umap_ordered = sample_one_index.merge(umap,on="Cell ID")
umap_ordered.head()
#对 细胞类群文件细胞取交集
cell_clusters[["Unnamed: 0"]]=cell_clusters[["Unnamed: 0"]].replace({"_1":""},regex=True)
cell_clusters = cell_clusters.rename(columns = {"Unnamed: 0":'Cell ID'})
#order
cell_clusters = sample_one_index.merge(cell_clusters,on="Cell ID")
cell_clusters.head()
四.将umap坐标与cluster信息加入sample_one
#将umap信息加入sample_one
umap_ordered = umap_ordered.iloc[:,1:]
sample_one.obsm['X_umap'] = umap_ordered.values
#将cell_clusters加入sample_one
cell_clusters_ordered=cell_clusters.iloc[:,2]
sample_one.obs['cell_clusters']=cell_clusters_ordered.values
五.运行RNA Velocity
#Running RNA Velocity
scv.pp.filter_and_normalize(sample_one,min_shared_counts=30, n_top_genes=2000)
scv.pp.moments(sample_one, n_pcs=30, n_neighbors=30)
scv.tl.velocity(sample_one, mode = "stochastic")
scv.tl.velocity_graph(sample_one)
scv.pl.velocity_embedding(sample_one, basis='X_umap',arrow_size=5)
ident_colours = ["#F8766D","#7CAE00","#00BFC4","#C77CFF"]
scv.pl.velocity_embedding_stream(sample_one, basis='X_umap',color = "cell_clusters",palette = ident_colours)