转录组scRNA

10X单细胞转录组下游流程-2-分群及可视化

2019-11-14  本文已影响0人  大吉岭猹

1. R实现进度条和并行运算

1.1. 进度条

1.2. 并行运算

library(parallel)
cl.cores <- detectCores() #检查当前电脑可用核数。
cl <- makeCluster(cl.cores) #使用刚才检测的核并行运算

2. 准备工作

2.1. 下载数据

rm(list = ls())
options(warn=-1)
suppressMessages(library(Seurat))

# 读取表达矩阵
start_time <- Sys.time()
raw_dataPBMC <- read.csv('./GSE117988_raw.expMatrix_PBMC.csv.gz', header = TRUE, row.names = 1)
end_time <- Sys.time()
end_time - start_time
# Time difference of 53.70495 secs

dim(raw_dataPBMC)
# [1] 17712 12874
# 17712个基因,12874个细胞

2.2. 归一化

dataPBMC <- log2(1 + sweep(raw_dataPBMC, 2,
                           median(colSums(raw_dataPBMC))/colSums(raw_dataPBMC), '*'))

2.3. 提取时间点信息

# 提取时间点这一分组信息
head(colnames(dataPBMC))
timePoints <- sapply(head(colnames(dataPBMC)),
                     function(x) unlist(strsplit(x, "[.]"))[2])
timePoints <- sapply(colnames(dataPBMC), function(x) unlist(strsplit(x, "[.]"))[2])
table(timePoints)

timePoints <-ifelse(timePoints == '1', 'PBMC_Pre',
                    ifelse(timePoints == '2', 'PBMC_EarlyD27',
                           ifelse(timePoints == '3', 'PBMC_RespD376', 'PBMC_ARD614')))
table(timePoints)

2.4. 质控

# 关注两点
# 第一点:基因在多少细胞表达
fivenum(apply(dataPBMC,1,function(x) sum(x>0) ))
# RP4-669L17.10         LAMB3         NAT10    AC093673.5         RPL21
# 1             6            50           207         12102
boxplot(apply(dataPBMC,1,function(x) sum(x>0) ))

# 第二点:细胞中有多少表达的基因
fivenum(apply(dataPBMC,2,function(x) sum(x>0) ))
# CAAGAAATCGATCCCT.2 GGCCGATTCCAGAGGA.3 TCAACGAAGAGCTGGT.3
# 10                321                395
# TGCCAAAGTCTGCGGT.4 TCTGGAATCTATCGCC.3
# 481               2865
hist(apply(dataPBMC,2,function(x) sum(x>0) ))

3. 分群、可视化

3.1. 创建Seurat对象

# Seurat V3
PBMC <- CreateSeuratObject(dataPBMC,
                           min.cells = 1,
                           min.features = 0,
                           project = '10x_PBMC')

PBMC
# An object of class Seurat
# 17712 features across 12874 samples within 1 assay
# Active assay: RNA (17712 features)

3.2. 添加metadata

PBMC <- AddMetaData(object = PBMC,
                    metadata = apply(raw_dataPBMC, 2, sum),
                    col.name = 'nUMI_raw')
PBMC <- AddMetaData(object = PBMC, metadata = timePoints, col.name = 'TimePoints')
head(PBMC@meta.data)

#                    orig.ident nCount_RNA nFeature_RNA # nUMI_raw TimePoints
# AAACCTGAGCGAAGGG.1   10x_PBMC   561.5564          517     # 1896   PBMC_Pre
# AAACCTGAGGTCATCT.1   10x_PBMC   553.9634          328      # 751   PBMC_Pre
# AAACCTGAGTCCTCCT.1   10x_PBMC   522.4279          380     # 1228   PBMC_Pre
# AAACCTGCACCAGCAC.1   10x_PBMC   590.6022          573     # 1919   PBMC_Pre
# AAACCTGGTAACGTTC.1   10x_PBMC   479.7199          263      # 698   PBMC_Pre
# AAACCTGGTAAGGATT.1   10x_PBMC   634.6747          433      # 763   PBMC_Pre

3.3. 聚类标准流程

##step1 标准化
PBMC <- ScaleData(object = PBMC, vars.to.regress = c('nUMI_raw'), model.use = 'linear', use.umi = FALSE)

##step2 找差异基因
PBMC <- FindVariableFeatures(object = PBMC, mean.function = ExpMean, dispersion.function = LogVMR, mean.cutoff = c(0.0125,3), dispersion.cutoff = c(0.5,Inf))

##step3 根据这些基因进行PCA降维
PBMC <- RunPCA(object = PBMC, pc.genes = PBMC@var.genes)

##step4 根据PCA结果确定分群
PBMC <- FindNeighbors(PBMC, reduction = "pca", dims = 1:10,
                      k.param = 35)
PBMC <- FindClusters(object = PBMC,
                     resolution = 1, verbose=F)

##step5 t-SNE降维
PBMC <- RunTSNE(object = PBMC, dims.use = 1:10)

##step6 可视化
DimPlot(PBMC)

3.4. 保存对象

save(PBMC,raw_dataPBMC,file = 'patient1.PBMC.output.Rdata')
上一篇 下一篇

猜你喜欢

热点阅读