Single-cellscRNA

GEO单细胞测序数据生信分析复现-1

2021-06-19  本文已影响0人  18b79e7933ad

复现文章:Discovery and Validation of a Metastasis-Related Prognostic and Diagnostic Biomarker for Melanoma Based on Single Cell and Gene Expression Datasets
数据来源:GSE115978

图1: image.png
一:数据下载、准备
image.png

下载前两个文件


image.png
1.筛选肿瘤细胞 2.区分primary tumor 和 metastasis
library(Seurat)
library(dplyr)
library(patchwork)
library(mindr)
library(Matrix)

a <- read.csv("GSE115978_counts.csv",header=TRUE,row.names = 1)
b<-read.csv("GSE115978_cell.annotations.csv",header = T,row.names = 1)   #样本信息
table(b$cell.type)

#筛选肿瘤细胞
index<-which(b$cell.types=="Mal")
group<-b[index,]         #筛选的是行
a.filt<-a[,index]        #筛选的是列
dim(a.filt)
identical(colnames(a.filt),row.names(group))

#构建Seurat对象
pbmc<- CreateSeuratObject(counts = a.filt,min.cells = 5, min.features = 2000)
head(pbmc@meta.data)

#添加primary tumour/metastasis 标签
lesiontype <- pbmc@meta.data$orig.ident
table(lesiontype)
lesiontype <-ifelse(lesiontype == 'cy84', 'primary tumor', 
                    ifelse(lesiontype == 'cy129PA', 'primary tumor',
                           ifelse(lesiontype=='cy129','primary tumor',
                                  ifelse(lesiontype == 'cy105', 'primary tumor',
                                         ifelse(lesiontype == 'CY84', 'primary tumor','metastasis')))))
#这一步我用的是傻办法,请大神赐教怎么更简单
table(lesiontype)
pbmc <- AddMetaData(object = pbmc, metadata = lesiontype, col.name = 'LesionType')

pbmc
An object of class Seurat
20557 features across 1958 samples within 1 assay
Active assay: RNA (20557 features, 0 variable features)

Figure1A

VlnPlot(pbmc, features = c("nFeature_RNA", "nCount_RNA"), group.by="LesionType",pt.size = 1.5)
image.png

Figure1B

pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
top10 <- head(VariableFeatures(pbmc), 10)
plot1 <- VariableFeaturePlot(pbmc)
plot2 <- LabelPoints(plot = plot1, points = top10,repel = TRUE)
plot1+plot2
image.png

Figure1C

all.genes <- rownames(pbmc)
pbmc <- ScaleData(pbmc, features = all.genes)

pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
print(pbmc[["pca"]], dims = 1:5, nfeatures = 5)
VizDimLoadings(object = pbmc, dims = 1:4, reduction = "pca",nfeatures = 20)  #4个PC 20个基因

### 确定PCA维度数量
#根据JackStrawPlot来确定下一步降维作用的PC数量,P值越小说明PC越重要,应该纳入下一步分析
pbmc <- JackStraw(object = pbmc, num.replicate = 100)
pbmc <- ScoreJackStraw(object = pbmc, dims = 1:20)
JackStrawPlot(object = pbmc, dims = 1:15,reduction = "pca")
ElbowPlot(pbmc,reduction="pca")  #根据ElbowPlot确定PC数量
image.png
image.png

Figure1D

##TSNE聚类分析
pbmc <- FindNeighbors(pbmc, dims = 1:15)   
pbmc <- FindClusters(pbmc, resolution = 0.5)  
#这里的resolution参数可以修改细胞类型的多少。resolution越大细胞的类型越多
head(pbmc@meta.data)

#UMAP降维
pbmc <- RunUMAP(pbmc, dims = 1:15)
DimPlot(pbmc, reduction = "umap",group.by = "LesionType",pt.size = 1)    

#tSNE降维
pbmc <- RunTSNE(object = pbmc, dims = 1:15)   
DimPlot(pbmc, reduction = "tsne",pt.size = 1,label = TRUE,group.by = "LesionType")
image.png
image.png

Figure1 F

s.genes=Seurat::cc.genes.updated.2019$s.genes
g2m.genes=Seurat::cc.genes.updated.2019$g2m.genes
pbmc=CellCycleScoring(object = pbmc, 
                      s.features = s.genes, 
                      g2m.features = g2m.genes, 
                      set.ident = TRUE)
p4=VlnPlot(pbmc, features = c("S.Score", "G2M.Score"), group.by = "LesionType", 
           ncol = 2, pt.size = 0.1)
p4

pbmc@meta.data  %>% ggplot(aes(S.Score,G2M.Score))+geom_point(aes(color=Phase))+
  theme_minimal()
### S.Score较高的为S期,G2M.Score较高的为G2M期,都比较低的为G1期
DimPlot(pbmc, reduction = "tsne",pt.size = 1,label = TRUE,group.by = "Phase")
image.png
image.png
上一篇 下一篇

猜你喜欢

热点阅读