GEO单细胞测序数据生信分析复现-1
2021-06-19 本文已影响0人
18b79e7933ad
复现文章:Discovery and Validation of a Metastasis-Related Prognostic and Diagnostic Biomarker for Melanoma Based on Single Cell and Gene Expression Datasets
数据来源:GSE115978
![](https://img.haomeiwen.com/i18343199/c2ad653cfaafb112.png)
一:数据下载、准备
![](https://img.haomeiwen.com/i18343199/4b7c0501797dc58f.png)
下载前两个文件
![](https://img.haomeiwen.com/i18343199/9e5455f935961cba.png)
1.筛选肿瘤细胞 2.区分primary tumor 和 metastasis
library(Seurat)
library(dplyr)
library(patchwork)
library(mindr)
library(Matrix)
a <- read.csv("GSE115978_counts.csv",header=TRUE,row.names = 1)
b<-read.csv("GSE115978_cell.annotations.csv",header = T,row.names = 1) #样本信息
table(b$cell.type)
#筛选肿瘤细胞
index<-which(b$cell.types=="Mal")
group<-b[index,] #筛选的是行
a.filt<-a[,index] #筛选的是列
dim(a.filt)
identical(colnames(a.filt),row.names(group))
#构建Seurat对象
pbmc<- CreateSeuratObject(counts = a.filt,min.cells = 5, min.features = 2000)
head(pbmc@meta.data)
#添加primary tumour/metastasis 标签
lesiontype <- pbmc@meta.data$orig.ident
table(lesiontype)
lesiontype <-ifelse(lesiontype == 'cy84', 'primary tumor',
ifelse(lesiontype == 'cy129PA', 'primary tumor',
ifelse(lesiontype=='cy129','primary tumor',
ifelse(lesiontype == 'cy105', 'primary tumor',
ifelse(lesiontype == 'CY84', 'primary tumor','metastasis')))))
#这一步我用的是傻办法,请大神赐教怎么更简单
table(lesiontype)
pbmc <- AddMetaData(object = pbmc, metadata = lesiontype, col.name = 'LesionType')
pbmc
An object of class Seurat
20557 features across 1958 samples within 1 assay
Active assay: RNA (20557 features, 0 variable features)
Figure1A
VlnPlot(pbmc, features = c("nFeature_RNA", "nCount_RNA"), group.by="LesionType",pt.size = 1.5)
![](https://img.haomeiwen.com/i18343199/98a5026c819b6636.png)
Figure1B
pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
top10 <- head(VariableFeatures(pbmc), 10)
plot1 <- VariableFeaturePlot(pbmc)
plot2 <- LabelPoints(plot = plot1, points = top10,repel = TRUE)
plot1+plot2
![](https://img.haomeiwen.com/i18343199/44c5b3dbdc11f584.png)
Figure1C
all.genes <- rownames(pbmc)
pbmc <- ScaleData(pbmc, features = all.genes)
pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
print(pbmc[["pca"]], dims = 1:5, nfeatures = 5)
VizDimLoadings(object = pbmc, dims = 1:4, reduction = "pca",nfeatures = 20) #4个PC 20个基因
### 确定PCA维度数量
#根据JackStrawPlot来确定下一步降维作用的PC数量,P值越小说明PC越重要,应该纳入下一步分析
pbmc <- JackStraw(object = pbmc, num.replicate = 100)
pbmc <- ScoreJackStraw(object = pbmc, dims = 1:20)
JackStrawPlot(object = pbmc, dims = 1:15,reduction = "pca")
ElbowPlot(pbmc,reduction="pca") #根据ElbowPlot确定PC数量
![](https://img.haomeiwen.com/i18343199/ee57159d8bd62275.png)
![](https://img.haomeiwen.com/i18343199/8b2fa0bb8e733ef1.png)
Figure1D
##TSNE聚类分析
pbmc <- FindNeighbors(pbmc, dims = 1:15)
pbmc <- FindClusters(pbmc, resolution = 0.5)
#这里的resolution参数可以修改细胞类型的多少。resolution越大细胞的类型越多
head(pbmc@meta.data)
#UMAP降维
pbmc <- RunUMAP(pbmc, dims = 1:15)
DimPlot(pbmc, reduction = "umap",group.by = "LesionType",pt.size = 1)
#tSNE降维
pbmc <- RunTSNE(object = pbmc, dims = 1:15)
DimPlot(pbmc, reduction = "tsne",pt.size = 1,label = TRUE,group.by = "LesionType")
![](https://img.haomeiwen.com/i18343199/e998eb45e0c12da4.png)
![](https://img.haomeiwen.com/i18343199/2d918091b696d9b8.png)
Figure1 F
s.genes=Seurat::cc.genes.updated.2019$s.genes
g2m.genes=Seurat::cc.genes.updated.2019$g2m.genes
pbmc=CellCycleScoring(object = pbmc,
s.features = s.genes,
g2m.features = g2m.genes,
set.ident = TRUE)
p4=VlnPlot(pbmc, features = c("S.Score", "G2M.Score"), group.by = "LesionType",
ncol = 2, pt.size = 0.1)
p4
pbmc@meta.data %>% ggplot(aes(S.Score,G2M.Score))+geom_point(aes(color=Phase))+
theme_minimal()
### S.Score较高的为S期,G2M.Score较高的为G2M期,都比较低的为G1期
DimPlot(pbmc, reduction = "tsne",pt.size = 1,label = TRUE,group.by = "Phase")
![](https://img.haomeiwen.com/i18343199/677a7eebc75a564a.png)
![](https://img.haomeiwen.com/i18343199/a5b0423307a4f67e.png)