runGenie3-临时0717
2023-07-16 本文已影响0人
oceanandshore
############################### runGenie3 ################################
integrated = readRDS( file = "./integrated_0708.rds")
# 增加一列seurat_clusters,下一步改成celltype
integrated@meta.data$seurat_clusters_1 = integrated@meta.data$seurat_clusters
# 把metadata里面的seurat_clusters_1改成celltype
current.cluster.ids <- c(0:20)
new.cluster.ids <- c("Naive CD8+ ",
"CD4 TCM",
"CD8 TCM",
"Naive CD4+",
"NK",
"CD8 TEM",
"CD4 TEM",
"Naive B cells",
"CD4 CTL ",
"Tregs",
"Other T ",
"γδ T ",
"Unknown",
"CD16 Mono",
"Memory B ",
"MAIT",
"CD14 Mono ",
"Platelet ",
"Dc",
"Eryth",
"Double-negative T ")
integrated@meta.data$celltype = plyr::mapvalues(x = integrated@meta.data[,"seurat_clusters_1"], from = current.cluster.ids, to = new.cluster.ids)
dim(integrated)
#21249 18578
### Seurat 对象读取
exprMat = as.matrix(integrated@assays$RNA@counts) #取出表达矩阵
#准备细胞meta信息
cellInfo = as.data.frame(integrated@meta.data) #取出注释信息
colnames(cellInfo)[which(colnames(cellInfo)=="orig.ident")] <- "sample"
colnames(cellInfo)[which(colnames(cellInfo)=="seurat_clusters")] <- "cluster"
colnames(cellInfo)[which(colnames(cellInfo)=="celltype")] <- "celltype"
cellInfo <- cellInfo[,c("sample","cluster","celltype")]
saveRDS(cellInfo, file="int/cellInfo.Rds")
###设置分析环境
org <- "hgnc"
dbDir <- "./cisTarget_databases"
myDatasetTitle <- "R_SCENIC_integrated"
data(defaultDbNames)
dbs <- defaultDbNames[[org]]
### 初始化
scenicOptions <- initializeScenic(org=org, dbDir=dbDir, dbs=dbs,
datasetTitle="HNSCC", nCores=10)
scenicOptions@inputDatasetInfo$cellInfo <- "int/cellInfo.Rds"
saveRDS(scenicOptions, file= "int/scenicOption.rds")
##基因过滤
#过滤标准是基因表达量之和>细胞数*3%,且在1%的细胞中表达
genesKept <- geneFiltering(exprMat, scenicOptions,
minCountsPerGene = 3 * 0.01 * ncol(exprMat),
minSamples = ncol(exprMat) * 0.01)
exprMat_filtered <- exprMat[genesKept, ]
dim(exprMat_filtered)
# 7320 18578
#移除exprMat等无关紧要的
rm(exprMat)
rm(integrated)
rm()
rm()
rm()
rm()
rm()
#计算相关性矩阵
runCorrelation(exprMat_filtered, scenicOptions) # 7320基因,18578细胞,12核,这一步10mins
#TF-Targets相关性回归分析
exprMat_filtered_log <- log2(exprMat_filtered+1)
#library(future)` `
#plan()
#plan("multicore", workers = 10)
#options(future.globals.maxSize=1000*1024*1024)##default有最大可使用内存限制,可更改
#detach("package:future")
memory.limit(1000000)
set.seed(1001)
runGenie3(exprMat_filtered_log, scenicOptions,nParts=1000) # 7.8日 20:00开始跑
#需要注意nParts参数,它的作用是把表达矩阵分成n份分开计算,目的是防止数据量大时内存不够。