How to calculate clinical parame
2021-01-07 本文已影响0人
生信学习者2
library(dplyr)
library(tibble)
library(ggplot2)
library(survival)
library(survminer)
library(data.table)
library(stringr)
load phenotype
survival_data <- fread("../../Result/phenotype/common_survival_data.tsv")
clinical parameters
get_clinical <- function(x){
dat_clin <- x
# pathologic_T
dat_clin$Te1 <- str_extract(dat_clin$TNM, "T\\d[a-z]+")
dat_clin$Te2 <- str_extract(dat_clin$TNM, "T\\d")
dat_clin$Te1 <- with(dat_clin, ifelse(is.na(Te1), Te2, Te1))
# table(dat_clin$Te1)
# table(dat_clin$Te2)
dat_clin$Te2[dat_clin$Te2 == "T1"] <- "T1-T2"
dat_clin$Te2[dat_clin$Te2 == "T2"] <- "T1-T2"
dat_clin$Te2[dat_clin$Te2 == "T3"] <- "T1-T2"
dat_clin$Te2[dat_clin$Te2 == "T4"] <- "T3-T4"
Te2 <- table(dat_clin$Te2)
# pathologic_N
dat_clin$N <- str_extract(dat_clin$TNM, "N\\d")
dat_clin$N <- str_extract(dat_clin$N, "\\d")
# table(dat_clin$N)
dat_clin$N[dat_clin$N == "0"] <- "NO"
dat_clin$N[dat_clin$N == "1"] <- "Yes"
dat_clin$N[dat_clin$N == "2"] <- "Yes"
dat_clin$N[dat_clin$N == "3"] <- "Yes"
N <- table(dat_clin$N)
# pathologic_M
dat_clin$M <- str_extract(dat_clin$TNM, "M\\d")
dat_clin$M <- str_extract(dat_clin$M, "\\d")
# table(dat_clin$M)
dat_clin$M[dat_clin$M == "0"] <- "NO"
dat_clin$M[dat_clin$M == "1"] <- "Yes"
M <- table(dat_clin$M)
# Age
dat_clin$Age <- ifelse(dat_clin$Age > 60, "old", "young")
Age <- table(dat_clin$Age)
# Gender
dat_clin$Gender <- ifelse(dat_clin$Gender == "MALE", "Male", "Female")
Gender <- table(dat_clin$Gender)
# stage
dat_clin$Stage1 <- str_trim(str_extract(dat_clin$Stage, "\\s[H-Z]+"),
side = c("both", "left", "right"))
# table(dat_clin$Stage1)
dat_clin$Stage1[dat_clin$Stage1 == "I"] <- "Stage I-II"
dat_clin$Stage1[dat_clin$Stage1 == "II"] <- "Stage I-II"
dat_clin$Stage1[dat_clin$Stage1 == "III"] <- "Stage III-IV"
dat_clin$Stage1[dat_clin$Stage1 == "IV"] <- "Stage III-IV"
Stage <- table(dat_clin$Stage1)
dat_cal <- data.frame(Clinicopathological=c(
rep("Age", length(Age)),
rep("Gender", length(Gender)),
rep("Stage", length(Stage)),
rep("T", length(Te2)),
rep("N", length(N)),
rep("M", length(M))),
Name = c(names(Age), names(Gender),
names(Stage), names(Te2),
names(N), names(M)),
Number = c(as.numeric(Age), as.numeric(Gender),
as.numeric(Stage), as.numeric(Te2),
as.numeric(N), as.numeric(M))
)
return(dat_cal)
}
DT::datatable(get_clinical(survival_data))
curation
get_plot <- function(dat=survival_data, tag="Gender"){
# dat=survival_data
# tag="Gender"
# pvalue
colnames(dat)[which(colnames(dat) == tag)] <- "group_info"
dat$group_info <- factor(dat$group_info)
factors <- unique(as.character(dat$group_info))
cox <- coxph(Surv(OS.Time, OS) ~ group_info, data = dat)
tmp <- summary(cox)
tmp.wald <- data.frame(t(tmp$waldtest)) %>%
setNames(c("Wald_test", "Wald_df", "Wald_pvlaue"))
tmp.lg <- data.frame(t(tmp$logtest)) %>%
setNames(c("lg_rank", "lg_rank_df", "lg_rank_pvlaue"))
tmp.total <- cbind(tmp.wald, tmp.lg)
pvalue <- paste(paste0("Log-Rank P=", signif(tmp.lg$lg_rank_pvlaue, 3)),
paste0("Cox P=", signif(tmp.wald$Wald_pvlaue, 3)), sep = "\n")
# plot
fit <- survfit(Surv(OS.Time, OS) ~ group_info, data = dat)
info <- data.frame(time = fit$time,
n.risk = fit$n.risk,
n.event = fit$n.event,
n.censor = fit$n.censor,
surv = fit$surv,
upper = fit$upper,
lower = fit$lower)
pl <- ggsurvplot(fit,
data = dat,
surv.median.line = "hv",
add.all = TRUE,
palette = "aaas",
risk.table = TRUE,
xlab = "Follow up time(Years)",
legend = c(0.8, 0.2),
legend.title = "",
legend.labs = c("all", factors),
break.x.by = 2,
font.legend = c(10, "italic"),
ggtheme = theme_bw())
pl$plot <- pl$plot +
annotate("text", x=3, y=0.2, label=pvalue)
res <- list(info=info, pl=pl)
return(res)
}
plot
# gender
gender_plot <- get_plot(tag = "Gender")
gender_plot$pl
# stage
stage_plot <- get_plot(tag = "Stage")
stage_plot$pl
version
sessionInfo()
R version 4.0.2 (2020-06-22)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 19042)
Matrix products: default
locale:
[1] LC_COLLATE=English_United States.1252 LC_CTYPE=English_United States.1252
[3] LC_MONETARY=English_United States.1252 LC_NUMERIC=C
[5] LC_TIME=English_United States.1252
system code page: 936
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] stringr_1.4.0 impute_1.62.0 data.table_1.13.2 survminer_0.4.8 ggpubr_0.4.0
[6] survival_3.2-7 ggplot2_3.3.2 tibble_3.0.3 dplyr_1.0.2
loaded via a namespace (and not attached):
[1] Rcpp_1.0.5 lattice_0.20-41 tidyr_1.1.2 zoo_1.8-8 digest_0.6.25
[6] R6_2.5.0 cellranger_1.1.0 plyr_1.8.6 backports_1.1.10 evaluate_0.14
[11] pillar_1.4.6 rlang_0.4.7 curl_4.3 readxl_1.3.1 rstudioapi_0.11
[16] car_3.0-10 Matrix_1.2-18 DT_0.16 rmarkdown_2.5 splines_4.0.2
[21] foreign_0.8-80 htmlwidgets_1.5.2 munsell_0.5.0 broom_0.7.2 compiler_4.0.2
[26] xfun_0.19 pkgconfig_2.0.3 htmltools_0.5.0 tidyselect_1.1.0 gridExtra_2.3
[31] km.ci_0.5-2 rio_0.5.16 reshape_0.8.8 crayon_1.3.4 withr_2.3.0
[36] grid_4.0.2 jsonlite_1.7.1 xtable_1.8-4 gtable_0.3.0 lifecycle_0.2.0
[41] magrittr_1.5 KMsurv_0.1-5 scales_1.1.1 zip_2.1.1 stringi_1.5.3
[46] carData_3.0-4 ggsignif_0.6.0 ellipsis_0.3.1 survMisc_0.5.5 generics_0.1.0
[51] vctrs_0.3.4 openxlsx_4.2.2 tools_4.0.2 forcats_0.5.0 glue_1.4.2
[56] purrr_0.3.4 hms_0.5.3 crosstalk_1.1.0.1 rsconnect_0.8.16 abind_1.4-5
[61] yaml_2.2.1 colorspace_1.4-1 rstatix_0.6.0 knitr_1.30 haven_2.3.1