单细胞GSEA分析运行记录2
2021-03-27 本文已影响0人
Seurat_Satija
> #加载R包
> library(tidyverse)
-- Attaching packages ------------------------------------------------------------------------- tidyverse 1.3.0 --
√ ggplot2 3.3.3 √ purrr 0.3.4
√ tibble 3.0.4 √ dplyr 1.0.2
√ tidyr 1.1.2 √ stringr 1.4.0
√ readr 1.4.0 √ forcats 0.5.0
-- Conflicts ---------------------------------------------------------------------------- tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag() masks stats::lag()
> library(Seurat)
Registered S3 method overwritten by 'spatstat':
method from
print.boxx cli
Attaching SeuratObject
Warning message:
程辑包‘Seurat’是用R版本4.0.4 来建造的
> workflow
Error: object 'workflow' not found
> # Load the PBMC dataset
> pbmc.data <- Read10X(data.dir = "pbmc5k_v3_filtered_feature_bc_matrix/")
> #workflow
> # Load the PBMC dataset
> pbmc.data <- Read10X(data.dir = "pbmc5k_v3_filtered_feature_bc_matrix/")
> # Initialize the Seurat object with the raw (non-normalized data).
> pbmc <- CreateSeuratObject(counts = pbmc.data, project = "pbmc5k", min.cells = 3, min.features = 200)
> pbmc
An object of class Seurat
18791 features across 4962 samples within 1 assay
Active assay: RNA (18791 features, 0 variable features)
> #QC
> pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
> # Visualize QC metrics as a violin plot
> VlnPlot(pbmc, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
> pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 5000 & percent.mt < 25)
> #Normalization
> pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
> #高变基因选择
> pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
> #标准化
> all.genes <- rownames(pbmc)
> pbmc <- ScaleData(pbmc, features = all.genes)
Centering and scaling data matrix
|========================================================================================================| 100%
> #去除MT,重新进行标准化
> pbmc <- ScaleData(pbmc, vars.to.regress = "percent.mt")
Regressing out percent.mt
|========================================================================================================| 100%
Centering and scaling data matrix
|========================================================================================================| 100%
Warning message:
In strsplit(conditionMessage(e), "\n") :
input string 1 is invalid in this locale
> #PCA
> pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc), verbose = FALSE)
> #聚类
> pbmc <- FindNeighbors(pbmc, dims = 1:20)
Computing nearest neighbor graph
Computing SNN
> pbmc <- FindClusters(pbmc, resolution = 0.5)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 4595
Number of edges: 165660
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.9001
Number of communities: 13
Elapsed time: 0 seconds
> #可视化
> pbmc <- RunUMAP(pbmc, dims = 1:20)
Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session
13:05:18 UMAP embedding parameters a = 0.9922 b = 1.112
13:05:18 Read 4595 rows and found 20 numeric columns
13:05:18 Using Annoy for neighbor search, n_neighbors = 30
13:05:18 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
13:05:19 Writing NN index file to temp file C:\Users\Nano\AppData\Local\Temp\RtmpMfsJdO\file2d3c3a9c70be
13:05:19 Searching Annoy index using 1 thread, search_k = 3000
13:05:21 Annoy recall = 100%
13:05:21 Commencing smooth kNN distance calibration using 1 thread
13:05:22 Initializing from normalized Laplacian + noise
13:05:23 Commencing optimization for 500 epochs, with 192042 positive edges
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
13:05:44 Optimization finished
> pbmc<- RunTSNE(pbmc, dims = 1:20)
> ## after we run UMAP and TSNE, there are more entries in the reduction slot
> str(pbmc@reductions)
List of 3
$ pca :Formal class 'DimReduc' [package "SeuratObject"] with 9 slots
.. ..@ cell.embeddings : num [1:4595, 1:50] -31.75 -27.91 4.87 11.28 -30.22 ...
.. .. ..- attr(*, "dimnames")=List of 2
.. .. .. ..$ : chr [1:4595] "AAACCCAAGCGTATGG-1" "AAACCCAGTCCTACAA-1" "AAACGCTAGGGCATGT-1" "AAACGCTGTAGGTACG-1" ...
.. .. .. ..$ : chr [1:50] "PC_1" "PC_2" "PC_3" "PC_4" ...
.. ..@ feature.loadings : num [1:2000, 1:50] -0.014365 -0.000161 0.005058 -0.013235 -0.058571 ...
.. .. ..- attr(*, "dimnames")=List of 2
.. .. .. ..$ : chr [1:2000] "FCER1A" "PPBP" "IGLC2" "C1QA" ...
.. .. .. ..$ : chr [1:50] "PC_1" "PC_2" "PC_3" "PC_4" ...
.. ..@ feature.loadings.projected: num[0 , 0 ]
.. ..@ assay.used : chr "RNA"
.. ..@ global : logi FALSE
.. ..@ stdev : num [1:50] 15.65 7.92 6.7 5.5 4.53 ...
.. ..@ key : chr "PC_"
.. ..@ jackstraw :Formal class 'JackStrawData' [package "SeuratObject"] with 4 slots
.. .. .. ..@ empirical.p.values : num[0 , 0 ]
.. .. .. ..@ fake.reduction.scores : num[0 , 0 ]
.. .. .. ..@ empirical.p.values.full: num[0 , 0 ]
.. .. .. ..@ overall.p.values : num[0 , 0 ]
.. ..@ misc :List of 1
.. .. ..$ total.variance: num 1563
$ umap:Formal class 'DimReduc' [package "SeuratObject"] with 9 slots
.. ..@ cell.embeddings : num [1:4595, 1:2] -7.7 -9.575 -5.893 0.395 -7.803 ...
.. .. ..- attr(*, "scaled:center")= num [1:2] -1.1672 0.0318
.. .. ..- attr(*, "dimnames")=List of 2
.. .. .. ..$ : chr [1:4595] "AAACCCAAGCGTATGG-1" "AAACCCAGTCCTACAA-1" "AAACGCTAGGGCATGT-1" "AAACGCTGTAGGTACG-1" ...
.. .. .. ..$ : chr [1:2] "UMAP_1" "UMAP_2"
.. ..@ feature.loadings : num[0 , 0 ]
.. ..@ feature.loadings.projected: num[0 , 0 ]
.. ..@ assay.used : chr "RNA"
.. ..@ global : logi TRUE
.. ..@ stdev : num(0)
.. ..@ key : chr "UMAP_"
.. ..@ jackstraw :Formal class 'JackStrawData' [package "SeuratObject"] with 4 slots
.. .. .. ..@ empirical.p.values : num[0 , 0 ]
.. .. .. ..@ fake.reduction.scores : num[0 , 0 ]
.. .. .. ..@ empirical.p.values.full: num[0 , 0 ]
.. .. .. ..@ overall.p.values : num[0 , 0 ]
.. ..@ misc : list()
$ tsne:Formal class 'DimReduc' [package "SeuratObject"] with 9 slots
.. ..@ cell.embeddings : num [1:4595, 1:2] -7.65 10.12 -32.29 -22.12 1.52 ...
.. .. ..- attr(*, "dimnames")=List of 2
.. .. .. ..$ : chr [1:4595] "AAACCCAAGCGTATGG-1" "AAACCCAGTCCTACAA-1" "AAACGCTAGGGCATGT-1" "AAACGCTGTAGGTACG-1" ...
.. .. .. ..$ : chr [1:2] "tSNE_1" "tSNE_2"
.. ..@ feature.loadings : num[0 , 0 ]
.. ..@ feature.loadings.projected: num[0 , 0 ]
.. ..@ assay.used : chr "RNA"
.. ..@ global : logi TRUE
.. ..@ stdev : num(0)
.. ..@ key : chr "tSNE_"
.. ..@ jackstraw :Formal class 'JackStrawData' [package "SeuratObject"] with 4 slots
.. .. .. ..@ empirical.p.values : num[0 , 0 ]
.. .. .. ..@ fake.reduction.scores : num[0 , 0 ]
.. .. .. ..@ empirical.p.values.full: num[0 , 0 ]
.. .. .. ..@ overall.p.values : num[0 , 0 ]
.. ..@ misc : list()
> DimPlot(pbmc, reduction = "umap", label = TRUE)
> #查找marker基因
> pbmc.markers <- FindAllMarkers(pbmc, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
Calculating cluster 0
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=04s
Calculating cluster 1
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=28s
Calculating cluster 2
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=03s
Calculating cluster 3
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=22s
Calculating cluster 4
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=06s
Calculating cluster 5
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=06s
Calculating cluster 6
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=06s
Calculating cluster 7
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=23s
Calculating cluster 8
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=09s
Calculating cluster 9
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=04s
Calculating cluster 10
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=16s
Calculating cluster 11
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=09s
Calculating cluster 12
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=30s
> saveRDS(pbmc, "pbmc_5k_v3.rds")
> saveRDS(pbmc, "pbmc_5k_v3.rds",compress = TRUE)
> saveRDS(pbmc, "pbmc_5k_v3.rds",compress = F)
> #如图所示在处理以后,进行细胞分群后可以分为14个细胞亚群,分别为cluster 0-13。
> write.csv(pbmc.markers,file = 'pbmc.markers.csv')
> library(Seurat)
> library(patchwork)
> p1 <- DimPlot(pbmc, reduction = "umap", label = TRUE)
> p1 <- DimPlot(pbmc, reduction = "umap", label = TRUE)
> ggsave("DimPlot1.pdf", plot=p1, width=8, height=6)
> ggsave("DimPlot1.png", plot=p1, width=8, height=6)
> library(Seurat)
> pbmc<- readRDS("pbmc_5k_v3.rds")
> #library(devtools)
> #install_github('immunogenomics/presto')
> library(presto)
Error in library(presto) : 不存在叫‘presto’这个名字的程辑包
> install_github('immunogenomics/presto')
Error in install_github("immunogenomics/presto") :
could not find function "install_github"
> library(devtools)
载入需要的程辑包:usethis
Warning message:
程辑包‘devtools’是用R版本4.0.4 来建造的
> library(devtools)
> install_github('immunogenomics/presto')
Downloading GitHub repo immunogenomics/presto@HEAD
immunogenomics-presto-052085d/data/object_sce.rda: truncated gzip input
tar.exe: Error exit delayed from previous errors.
Error: Failed to install 'presto' from GitHub:
Does not appear to be an R package (no DESCRIPTION)
In addition: Warning messages:
1: In utils::untar(tarfile, ...) :
‘tar.exe -xf "C:\Users\Nano\AppData\Local\Temp\RtmpMfsJdO\file2d3c26343827.tar.gz" -C "C:/Users/Nano/AppData/Local/Temp/RtmpMfsJdO/remotes2d3c40465af0"’ returned error code 1
2: In system(cmd, intern = TRUE) :
running command 'tar.exe -tf "C:\Users\Nano\AppData\Local\Temp\RtmpMfsJdO\file2d3c26343827.tar.gz"' had status 1
> library(devtools)
> install_github('immunogenomics/presto')
Downloading GitHub repo immunogenomics/presto@HEAD
These packages have more recent versions available.
It is recommended to update all of them.
Which would you like to update?
1: All
2: CRAN packages only
3: None
4: Rcpp (1.0.5 -> 1.0.6 ) [CRAN]
5: utf8 (1.1.4 -> 1.2.1 ) [CRAN]
6: crayon (1.3.4 -> 1.4.1 ) [CRAN]
7: cli (2.2.0 -> 2.3.1 ) [CRAN]
8: pillar (1.4.7 -> 1.5.1 ) [CRAN]
9: fansi (0.4.1 -> 0.4.2 ) [CRAN]
10: lifecycle (0.2.0 -> 1.0.0 ) [CRAN]
11: farver (2.0.3 -> 2.1.0 ) [CRAN]
12: withr (2.3.0 -> 2.4.1 ) [CRAN]
13: tibble (3.0.4 -> 3.1.0 ) [CRAN]
14: rlang (0.4.9 -> 0.4.10 ) [CRAN]
15: isoband (0.2.3 -> 0.2.4 ) [CRAN]
16: fastmap (1.0.1 -> 1.1.0 ) [CRAN]
17: cachem (1.0.1 -> 1.0.4 ) [CRAN]
18: memoise (1.1.0 -> 2.0.0 ) [CRAN]
19: mime (0.9 -> 0.10 ) [CRAN]
20: RSQLite (2.2.2 -> 2.2.4 ) [CRAN]
21: DBI (1.1.0 -> 1.1.1 ) [CRAN]
22: XML (3.99-0.5 -> 3.99-0.6 ) [CRAN]
23: formatR (1.7 -> 1.8 ) [CRAN]
24: matrixStats (0.57.0 -> 0.58.0 ) [CRAN]
25: RCurl (1.98-1.2 -> 1.98-1.3 ) [CRAN]
26: DelayedArray (0.16.0 -> 0.16.3 ) [CRAN]
27: GenomeInfoDb (1.26.2 -> 1.26.4 ) [CRAN]
28: MatrixGen... (1.2.0 -> 1.2.1 ) [CRAN]
29: RcppArmad... (0.10.1.2.0 -> 0.10.2.2.0) [CRAN]
30: cpp11 (0.2.4 -> 0.2.6 ) [CRAN]
31: dplyr (1.0.2 -> 1.0.5 ) [CRAN]
32: DESeq2 (1.30.0 -> 1.30.1 ) [CRAN]
33: tidyr (1.1.2 -> 1.1.3 ) [CRAN]
34: data.table (1.13.6 -> 1.14.0 ) [CRAN]
Enter one or more numbers, or an empty line to skip updates:3
√ checking for file 'C:\Users\Nano\AppData\Local\Temp\RtmpMfsJdO\remotes2d3c24d31e5c\immunogenomics-presto-052085d/DESCRIPTION' ...
- preparing 'presto':
√ checking DESCRIPTION meta-information ...
- cleaning src
- checking for LF line-endings in source and make files and shell scripts
- checking for empty or unneeded directories
- looking to see if a 'data/datalist' file should be added
- building 'presto_1.0.0.tar.gz'
Installing package into ‘C:/Users/Nano/Documents/R/win-library/4.0’
(as ‘lib’ is unspecified)
* installing *source* package 'presto' ...
** using staged installation
** libs
"C:/rtools40/mingw64/bin/"g++ -std=gnu++11 -I"C:/PROGRA~1/R/R-40~1.3/include" -DNDEBUG -I'C:/Users/Nano/Documents/R/win-library/4.0/Rcpp/include' -I'C:/Users/Nano/Documents/R/win-library/4.0/RcppArmadillo/include' -O2 -Wall -mfpmath=sse -msse2 -mstackrealign -c RcppExports.cpp -o RcppExports.o
"C:/rtools40/mingw64/bin/"g++ -std=gnu++11 -I"C:/PROGRA~1/R/R-40~1.3/include" -DNDEBUG -I'C:/Users/Nano/Documents/R/win-library/4.0/Rcpp/include' -I'C:/Users/Nano/Documents/R/win-library/4.0/RcppArmadillo/include' -O2 -Wall -mfpmath=sse -msse2 -mstackrealign -c fast_wilcox.cpp -o fast_wilcox.o
fast_wilcox.cpp: In function 'std::__cxx11::list<float> cpp_in_place_rank_mean(arma::vec&, int, int)':
fast_wilcox.cpp:116:34: warning: comparison of integer expressions of different signedness: 'size_t' {aka 'long long unsigned int'} and 'int' [-Wsign-compare]
for (size_t i = idx_begin; i <= idx_end; i++) {
~~^~~~~~~~~~
C:/rtools40/mingw64/bin/g++ -shared -s -static-libgcc -o presto.dll tmp.def RcppExports.o fast_wilcox.o -LC:/PROGRA~1/R/R-40~1.3/bin/x64 -lR
installing to C:/Users/Nano/Documents/R/win-library/4.0/00LOCK-presto/00new/presto/libs/x64
** R
** data
** byte-compile and prepare package for lazy loading
** help
*** installing help indices
converting help for package 'presto'
finding HTML links ... 好了
exprs html
nnzeroGroups html
object_sce html
object_seurat html
pipe html
presto html
rank_matrix html
sumGroups html
top_markers html
wilcoxauc html
y html
** building package indices
** installing vignettes
** testing if installed package can be loaded from temporary location
** testing if installed package can be loaded from final location
** testing if installed package keeps a record of temporary installation path
* DONE (presto)
> #install_github('immunogenomics/presto')
> library(presto)
载入需要的程辑包:Rcpp
载入需要的程辑包:data.table
data.table 1.13.6 using 2 threads (see ?getDTthreads). Latest news: r-datatable.com
**********
用中文运行data.table。软件包只提供英语支持。当在在线搜索帮助时,也要确保检查英语错误信息。这个可以通过查看软件包源文件中的po/R-zh_CN.po和po/zh_CN.po文件获得,这个文件可以并排找到母语和英语错误信息。
**********
载入程辑包:‘data.table’
The following objects are masked from ‘package:dplyr’:
between, first, last
The following object is masked from ‘package:purrr’:
transpose
> #Loading required package: Rcpp
> pbmc.genes <- wilcoxauc(pbmc, 'seurat_clusters')
> head(pbmc.genes)
feature group avgExpr logFC statistic auc pval padj pct_in
1 AL627309.1 0 0.004801016 -0.0048337160 2113594 0.4966881 0.0451711892 0.0817186691 0.5443235
2 AL627309.3 0 0.000000000 -0.0004923881 2126401 0.4996978 0.3781096732 0.4754137751 0.0000000
3 AL669831.5 0 0.058225088 -0.0101451241 2081447 0.4891337 0.0182067030 0.0364774662 6.8429238
4 FAM87B 0 0.000000000 -0.0017590952 2121900 0.4986401 0.0612494406 0.1063124181 0.0000000
5 LINC00115 0 0.033568380 -0.0054911605 2090280 0.4912094 0.0148011270 0.0301102065 3.7325039
6 FAM41C 0 0.015927696 -0.0162813395 2077756 0.4882664 0.0001622764 0.0004571717 2.0217729
pct_out
1 1.20882442
2 0.06044122
3 9.21728619
4 0.27198549
5 5.59081293
6 4.38198852
> # 我们拥有每个cluster的所有基因
> dplyr::count(pbmc.genes, group)
group n
1 0 18791
2 1 18791
3 10 18791
4 11 18791
5 12 18791
6 2 18791
7 3 18791
8 4 18791
9 5 18791
10 6 18791
11 7 18791
12 8 18791
13 9 18791
> #使用fgsea进行基因集富集
> library(msigdbr)
Warning message:
程辑包‘msigdbr’是用R版本4.0.4 来建造的
> library(fgsea)
> library(dplyr)
> library(ggplot2)
> msigdbr_show_species()#我们看看都有神马物种的数据
[1] "Bos taurus" "Caenorhabditis elegans" "Canis lupus familiaris"
[4] "Danio rerio" "Drosophila melanogaster" "Gallus gallus"
[7] "Homo sapiens" "Mus musculus" "Rattus norvegicus"
[10] "Saccharomyces cerevisiae" "Sus scrofa"
Warning message:
'msigdbr_show_species' is deprecated.
Use 'msigdbr_species' instead.
See help("Deprecated")
> m_df<- msigdbr(species = "Homo sapiens", category = "C7")#我们使用C7免疫基因集
> head(m_df)
# A tibble: 6 x 17
gs_cat gs_subcat gs_name entrez_gene gene_symbol human_entrez_ge~ human_gene_symb~ gs_id gs_pmid
<chr> <chr> <chr> <int> <chr> <int> <chr> <chr> <chr>
1 C7 "" GOLDRA~ 20 ABCA2 20 ABCA2 M3044 164927~
2 C7 "" GOLDRA~ 10057 ABCC5 10057 ABCC5 M3044 164927~
3 C7 "" GOLDRA~ 25864 ABHD14A 25864 ABHD14A M3044 164927~
4 C7 "" GOLDRA~ 34 ACADM 34 ACADM M3044 164927~
5 C7 "" GOLDRA~ 54 ACP5 54 ACP5 M3044 164927~
6 C7 "" GOLDRA~ 51205 ACP6 51205 ACP6 M3044 164927~
# ... with 8 more variables: gs_geoid <chr>, gs_exact_source <chr>, gs_url <chr>,
# gs_description <chr>, species_name <chr>, species_common_name <chr>, ortholog_sources <chr>,
# num_ortholog_sources <dbl>
> fgsea_sets<- m_df %>% split(x = .$gene_symbol, f = .$gs_name)
> fgsea_sets$GSE11057_NAIVE_VS_MEMORY_CD4_TCELL_UP
[1] "ABLIM1" "ACER1" "ADGRA3" "ADGRL1" "AEBP1" "AGRN"
[7] "AIF1" "ALG10B" "AMN1" "ANKRD36BP2" "APBA2" "APBB1"
[13] "ARMCX2" "BACH2" "BEND5" "BNIP3L" "BTBD3" "CA6"
[19] "CADPS2" "CAMK4" "CD248" "CD55" "CENPV" "CEP41"
[25] "CHI3L2" "CHML" "CHMP7" "CIAPIN1" "CLCN5" "COL5A2"
[31] "CRLF3" "CYHR1" "DDR1" "DNHD1" "DNTT" "DSC1"
[37] "EDAR" "EEA1" "EFNA1" "ENGASE" "EXPH5" "FCGRT"
[43] "GAL3ST4" "GNAI1" "GP5" "GPR160" "GPRASP1" "GPRASP2"
[49] "GPRC5B" "HEMGN" "HIPK2" "HSF2" "IGF1R" "IGIP"
[55] "ITGA6" "KCNQ5" "KCTD3" "KLF3-AS1" "KLHDC1" "KLHL13"
[61] "KLHL24" "KRT18" "KRT2" "KRT72" "KRT73" "LAGE3P1"
[67] "LEF1-AS1" "LINC02175" "LINC02604" "LMLN" "LRRN3" "MALL"
[73] "MAML2" "MANSC1" "ME3" "MEF2A" "MEST" "METAP1D"
[79] "MIR101-1" "MIR600HG" "MLXIP" "MPP1" "MPP7" "MRPL45P2"
[85] "MYB" "MZF1" "NAA16" "NBEA" "NDC1" "NDFIP1"
[91] "NET1" "NPAS2" "NPM3" "NSUN5" "NUCB2" "NUDT12"
[97] "NUDT17" "OBSCN-AS1" "PADI4" "PCED1B" "PCSK5" "PDCD4-AS1"
[103] "PDE3B" "PDE7A" "PDE7B" "PDE9A" "PDK1" "PECAM1"
[109] "PHGDH" "PIGL" "PIK3CD" "PIK3IP1" "PITPNM2" "PJVK"
[115] "PKIG" "PLA2G12A" "PLAG1" "PLLP" "PRKCQ-AS1" "PRRT1"
[121] "PRXL2A" "PTPRK" "PXYLP1" "RAPGEF6" "REG4" "RETREG1"
[127] "RGS10" "RHPN2" "RIN3" "RNF157-AS1" "RNF175" "RNF227"
[133] "ROBO3" "SATB1" "SCAI" "SCARB1" "SCML1" "SCML2"
[139] "SERPINE2" "SERTAD2" "SERTM1" "SETD1B" "SFMBT2" "SFXN2"
[145] "SH3RF3" "SIAH1" "SLC11A2" "SLC25A25-AS1" "SLC25A37" "SLC29A2"
[151] "SLC2A11" "SMPD1" "SNHG32" "SNORD104" "SNPH" "SNRPN"
[157] "SNX9" "SORCS3" "SPPL2B" "SREBF1" "STAP1" "STK17A"
[163] "TAF4B" "TARBP1" "TBC1D32" "TGFBR2" "TIMP2" "TLE2"
[169] "TMEM170B" "TMEM198B" "TMEM220" "TMEM263" "TMEM272" "TMEM41B"
[175] "TMIGD2" "TOM1L2" "TSPAN3" "TTC28" "TUG1" "UBE2E2"
[181] "USP44" "VPS52" "ZBTB18" "ZC4H2" "ZMYND8" "ZNF182"
[187] "ZNF229" "ZNF496" "ZNF506" "ZNF516" "ZNF546" "ZNF563"
[193] "ZNF662" "ZNF780B" "ZNRD1ASP"
> # [161] "SMPD1" "SNORD104" "SNPH" "SNRPN"
> # [165] "SNX9" "SORCS3" "SPPL2B" "SREBF1"
> # [169] "STAP1" "STK17A" "TAF4B" "TARBP1"
> # [173] "TGFBR2" "TIMP2" "TLE2" "TMEM170B"
> # [177] "TMEM220" "TMEM41B" "TMEM48" "TMIGD2"
> # [181] "TOM1L2" "TSPAN3" "TTC28" "TUG1"
> # [185] "UBE2E2" "USP44" "VPS52" "ZC4H2"
> # [189] "ZMYND8" "ZNF182" "ZNF229" "ZNF238"
> # [193] "ZNF496" "ZNF506" "ZNF516" "ZNF546"
> # [197] "ZNF563" "ZNF662" "ZNF780B" "ZNRD1-AS1"
> GSE11057_NAIVE_VS_MEMORY_CD4_TCELL_UP#代表神马呢?
Error: object 'GSE11057_NAIVE_VS_MEMORY_CD4_TCELL_UP' not found
> # Naive CD4+ T cells
> pbmc.genes %>%
+ dplyr::filter(group == "0") %>%
+ arrange(desc(logFC), desc(auc)) %>%
+ head(n = 10) #进行降序排序
feature group avgExpr logFC statistic auc pval padj pct_in pct_out
1 LDHB 0 2.236842 1.1323561 3729084 0.8763234 0.000000e+00 0.000000e+00 98.75583 74.94711
2 TRAC 0 2.001595 1.1146542 3292895 0.7738204 9.776911e-196 1.716990e-193 97.43390 45.21003
3 CD3E 0 1.875531 1.0540950 3318700 0.7798844 1.776334e-204 3.441143e-202 97.97823 44.90783
4 TCF7 0 1.587248 1.0267936 3595559 0.8449454 0.000000e+00 0.000000e+00 94.71229 43.94077
5 LTB 0 2.393454 1.0051991 3036392 0.7135429 5.121301e-114 4.130230e-112 98.75583 65.30674
6 CD3D 0 1.732485 0.9454245 3220600 0.7568311 3.387398e-174 5.092207e-172 96.81182 42.85283
7 CCR7 0 1.178436 0.9284103 3607895 0.8478444 0.000000e+00 0.000000e+00 85.38103 22.69568
8 IL7R 0 1.648002 0.8906921 3109680 0.7307653 2.634379e-148 3.113372e-146 88.88025 35.75098
9 SARAF 0 2.226737 0.8493958 3722765 0.8748385 0.000000e+00 0.000000e+00 99.14463 89.36235
10 TRABD2A 0 1.041894 0.8438752 3554334 0.8352577 0.000000e+00 0.000000e+00 80.09331 19.49229
> # 仅选择fgsea的feature和auc列
> cluster0.genes<- pbmc.genes %>%
+ dplyr::filter(group == "0") %>%
+ arrange(desc(auc)) %>%
+ dplyr::select(feature, auc)
> ranks<- deframe(cluster0.genes)
> head(ranks)
RPL30 RPS3A RPS13 RPL35A RPL32 RPL34
0.9376551 0.9358956 0.9250745 0.9196189 0.9185743 0.9132980
> fgseaRes<- fgsea(fgsea_sets, stats = ranks, nperm = 1000)
Warning messages:
1: In fgsea(fgsea_sets, stats = ranks, nperm = 1000) :
You are trying to run fgseaSimple. It is recommended to use fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument in the fgsea function call.
2: In preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, :
There are ties in the preranked stats (20.92% of the list).
The order of those tied genes will be arbitrary, which may produce unexpected results.
3: In preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, :
All values in the stats vector are greater than zero and scoreType is "std", maybe you should switch to scoreType = "pos".
> fgseaResTidy <- fgseaRes %>%
+ as_tibble() %>%
+ arrange(desc(NES))
> fgseaResTidy %>%
+ dplyr::select(-leadingEdge, -ES, -nMoreExtreme) %>%
+ arrange(padj) %>%
+ head()
# A tibble: 6 x 5
pathway pval padj NES size
<chr> <dbl> <dbl> <dbl> <int>
1 GSE10325_CD4_TCELL_VS_MYELOID_UP 0.00122 0.00688 9.87 196
2 GSE10325_LUPUS_CD4_TCELL_VS_LUPUS_MYELOID_UP 0.00122 0.00688 9.81 193
3 GSE22886_NAIVE_TCELL_VS_MONOCYTE_UP 0.00122 0.00688 9.44 196
4 GSE22886_NAIVE_CD4_TCELL_VS_MONOCYTE_UP 0.00122 0.00688 9.30 195
5 GSE22886_NAIVE_TCELL_VS_DC_UP 0.00122 0.00688 8.90 195
6 GSE10325_LUPUS_CD4_TCELL_VS_LUPUS_BCELL_UP 0.00122 0.00688 8.29 196
> #应用标准化富集分数绘制barplot
> # 显示top20信号通路
> ggplot(fgseaResTidy %>% filter(padj < 0.008) %>% head(n= 20), aes(reorder(pathway, NES), NES)) +
+ geom_col(aes(fill= NES < 7.5)) +
+ coord_flip() +
+ labs(x="Pathway", y="Normalized Enrichment Score",
+ title="Hallmark pathways NES from GSEA") +
+ theme_minimal() ####以7.5进行绘图填色
> #GSEA style plot
> plotEnrichment(fgsea_sets[["GSE10325_CD4_TCELL_VS_MYELOID_UP"]],
+ ranks) + labs(title="GSE10325 CD4 TCELL VS MYELOID UP")
> #应用标准化富集分数绘制barplot
> # 显示top20信号通路
> p2 <- ggplot(fgseaResTidy %>% filter(padj < 0.008) %>% head(n= 20), aes(reorder(pathway, NES), NES)) +
+ geom_col(aes(fill= NES < 7.5)) +
+ coord_flip() +
+ labs(x="Pathway", y="Normalized Enrichment Score",
+ title="Hallmark pathways NES from GSEA") +
+ theme_minimal() ####以7.5进行绘图填色
> p2
> ggsave("top20.pdf", plot=p2, width=8, height=6)
> ggsave("top20.png", plot=p2, width=8, height=6)
> #GSEA style plot
> p3 <- plotEnrichment(fgsea_sets[["GSE10325_CD4_TCELL_VS_MYELOID_UP"]],
+ ranks) + labs(title="GSE10325 CD4 TCELL VS MYELOID UP")
> p3
> ggsave("plotEnrichment.pdf", plot=p3, width=8, height=6)
> ggsave("plotEnrichment.png", plot=p3, width=8, height=6)
> head(ranks)
RPL30 RPS3A RPS13 RPL35A RPL32 RPL34
0.9376551 0.9358956 0.9250745 0.9196189 0.9185743 0.9132980
> ranks<- deframe(cluster0.genes)
> head(ranks)
RPL30 RPS3A RPS13 RPL35A RPL32 RPL34
0.9376551 0.9358956 0.9250745 0.9196189 0.9185743 0.9132980