How to calculate clinical parame

2021-01-07  本文已影响0人  生信学习者2
library(dplyr)
library(tibble)
library(ggplot2)
library(survival)
library(survminer)
library(data.table)
library(stringr)

load phenotype

survival_data <- fread("../../Result/phenotype/common_survival_data.tsv")

clinical parameters

get_clinical <- function(x){
  
  dat_clin <- x
  # pathologic_T 
  dat_clin$Te1 <- str_extract(dat_clin$TNM, "T\\d[a-z]+")
  dat_clin$Te2 <- str_extract(dat_clin$TNM, "T\\d")
  dat_clin$Te1 <- with(dat_clin, ifelse(is.na(Te1), Te2, Te1))
  # table(dat_clin$Te1)
  # table(dat_clin$Te2)
  dat_clin$Te2[dat_clin$Te2 == "T1"] <- "T1-T2"
  dat_clin$Te2[dat_clin$Te2 == "T2"] <- "T1-T2"
  dat_clin$Te2[dat_clin$Te2 == "T3"] <- "T1-T2"
  dat_clin$Te2[dat_clin$Te2 == "T4"] <- "T3-T4"
  Te2 <- table(dat_clin$Te2)
  
  # pathologic_N
  dat_clin$N <- str_extract(dat_clin$TNM, "N\\d")
  dat_clin$N <- str_extract(dat_clin$N, "\\d")
  # table(dat_clin$N)
  
  dat_clin$N[dat_clin$N == "0"] <- "NO"
  dat_clin$N[dat_clin$N == "1"] <- "Yes"
  dat_clin$N[dat_clin$N == "2"] <- "Yes"
  dat_clin$N[dat_clin$N == "3"] <- "Yes"
  N <- table(dat_clin$N)
  
  # pathologic_M
  dat_clin$M <- str_extract(dat_clin$TNM, "M\\d")
  dat_clin$M <- str_extract(dat_clin$M, "\\d")
  # table(dat_clin$M)
  
  dat_clin$M[dat_clin$M == "0"] <- "NO"
  dat_clin$M[dat_clin$M == "1"] <- "Yes"
  M <- table(dat_clin$M)
  
  # Age
  dat_clin$Age <- ifelse(dat_clin$Age > 60, "old", "young")
  Age <- table(dat_clin$Age)
  
  # Gender
  dat_clin$Gender <- ifelse(dat_clin$Gender == "MALE", "Male", "Female")
  Gender <- table(dat_clin$Gender)
  
  # stage
  dat_clin$Stage1 <- str_trim(str_extract(dat_clin$Stage, "\\s[H-Z]+"),
                              side = c("both", "left", "right"))
  # table(dat_clin$Stage1)
  dat_clin$Stage1[dat_clin$Stage1 == "I"] <- "Stage I-II"
  dat_clin$Stage1[dat_clin$Stage1 == "II"] <- "Stage I-II"
  dat_clin$Stage1[dat_clin$Stage1 == "III"] <- "Stage III-IV"
  dat_clin$Stage1[dat_clin$Stage1 == "IV"] <- "Stage III-IV"
  Stage <- table(dat_clin$Stage1)
  
  dat_cal <- data.frame(Clinicopathological=c(
                          rep("Age", length(Age)),
                          rep("Gender", length(Gender)),
                          rep("Stage", length(Stage)),
                          rep("T", length(Te2)),
                          rep("N", length(N)),
                          rep("M", length(M))), 
                        Name = c(names(Age), names(Gender), 
                                 names(Stage), names(Te2),
                                 names(N), names(M)),
                        Number = c(as.numeric(Age), as.numeric(Gender), 
                                 as.numeric(Stage), as.numeric(Te2),
                                 as.numeric(N), as.numeric(M))
                        )
  return(dat_cal)  
}

DT::datatable(get_clinical(survival_data))

curation

get_plot <- function(dat=survival_data, tag="Gender"){
  
  # dat=survival_data
  # tag="Gender"
  
  # pvalue
  colnames(dat)[which(colnames(dat) == tag)] <- "group_info"
  dat$group_info <- factor(dat$group_info)
  factors <- unique(as.character(dat$group_info))
  
  cox <- coxph(Surv(OS.Time, OS) ~ group_info, data = dat)
  tmp <- summary(cox)
  tmp.wald <- data.frame(t(tmp$waldtest)) %>%
        setNames(c("Wald_test", "Wald_df", "Wald_pvlaue"))
  tmp.lg <- data.frame(t(tmp$logtest)) %>%
        setNames(c("lg_rank", "lg_rank_df", "lg_rank_pvlaue"))
  tmp.total <- cbind(tmp.wald, tmp.lg) 
  
  pvalue <- paste(paste0("Log-Rank P=", signif(tmp.lg$lg_rank_pvlaue, 3)), 
                  paste0("Cox P=", signif(tmp.wald$Wald_pvlaue, 3)), sep = "\n")
  
  # plot
  fit <- survfit(Surv(OS.Time, OS) ~ group_info, data = dat)
  info <- data.frame(time = fit$time,
                  n.risk = fit$n.risk,
                  n.event = fit$n.event,
                  n.censor = fit$n.censor,
                  surv = fit$surv,
                  upper = fit$upper,
                  lower = fit$lower)
  
  pl <- ggsurvplot(fit,
             data = dat,  
             surv.median.line = "hv",
             add.all = TRUE,
             palette = "aaas",
             risk.table = TRUE, 
             xlab = "Follow up time(Years)", 
             legend = c(0.8, 0.2), 
             legend.title = "",
             legend.labs = c("all", factors), 
             break.x.by = 2,
             font.legend = c(10, "italic"),
             ggtheme = theme_bw())
  pl$plot <- pl$plot + 
             annotate("text", x=3, y=0.2, label=pvalue)
  
  res <- list(info=info, pl=pl)
  return(res)
}

plot

# gender 
gender_plot <- get_plot(tag = "Gender")
gender_plot$pl

# stage 
stage_plot <- get_plot(tag = "Stage")
stage_plot$pl

version

sessionInfo()
R version 4.0.2 (2020-06-22)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 19042)

Matrix products: default

locale:
[1] LC_COLLATE=English_United States.1252  LC_CTYPE=English_United States.1252   
[3] LC_MONETARY=English_United States.1252 LC_NUMERIC=C                          
[5] LC_TIME=English_United States.1252    
system code page: 936

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] stringr_1.4.0     impute_1.62.0     data.table_1.13.2 survminer_0.4.8   ggpubr_0.4.0     
[6] survival_3.2-7    ggplot2_3.3.2     tibble_3.0.3      dplyr_1.0.2      

loaded via a namespace (and not attached):
 [1] Rcpp_1.0.5        lattice_0.20-41   tidyr_1.1.2       zoo_1.8-8         digest_0.6.25    
 [6] R6_2.5.0          cellranger_1.1.0  plyr_1.8.6        backports_1.1.10  evaluate_0.14    
[11] pillar_1.4.6      rlang_0.4.7       curl_4.3          readxl_1.3.1      rstudioapi_0.11  
[16] car_3.0-10        Matrix_1.2-18     DT_0.16           rmarkdown_2.5     splines_4.0.2    
[21] foreign_0.8-80    htmlwidgets_1.5.2 munsell_0.5.0     broom_0.7.2       compiler_4.0.2   
[26] xfun_0.19         pkgconfig_2.0.3   htmltools_0.5.0   tidyselect_1.1.0  gridExtra_2.3    
[31] km.ci_0.5-2       rio_0.5.16        reshape_0.8.8     crayon_1.3.4      withr_2.3.0      
[36] grid_4.0.2        jsonlite_1.7.1    xtable_1.8-4      gtable_0.3.0      lifecycle_0.2.0  
[41] magrittr_1.5      KMsurv_0.1-5      scales_1.1.1      zip_2.1.1         stringi_1.5.3    
[46] carData_3.0-4     ggsignif_0.6.0    ellipsis_0.3.1    survMisc_0.5.5    generics_0.1.0   
[51] vctrs_0.3.4       openxlsx_4.2.2    tools_4.0.2       forcats_0.5.0     glue_1.4.2       
[56] purrr_0.3.4       hms_0.5.3         crosstalk_1.1.0.1 rsconnect_0.8.16  abind_1.4-5      
[61] yaml_2.2.1        colorspace_1.4-1  rstatix_0.6.0     knitr_1.30        haven_2.3.1

Reference

  1. 解决生存分析和临床参数相关分析
上一篇下一篇

猜你喜欢

热点阅读