TCGAR语言收藏

TCGA临床数据提取

2020-04-12  本文已影响0人  养猪场小老板

1、读入clinical.json文件

clinical_traits <- fromJSON(file = "clinical.cart.2019-09-28.json")
#计算文件长度n,在这里n为348

n = length(clinical_traits)

2、初始化变量

id = classfication_of_tumor = c(rep(0, n))
tumor_stage = gender = c(rep(0, n))
year_to_birth = year_to_death =  c(rep(0, n))
year_to_diagnosis = days_to_death = c(rep(0, n))
age = deadORlive = race = alcohol = smoked = c(rep(0, n))

3、利用一个for循环由json文件中提取信息

for (i in 1:n) {
id[i] = clinical_traits[[i]]$diagnoses[[1]]$submitter_id
classfication_of_tumor[i]=clinical_traits[[i]]$diagnoses[[1]]$classification_of_tumor
tumor_stage[i] = clinical_traits[[i]]$diagnoses[[1]]$tumor_stage
gender[i] = clinical_traits[[i]]$demographic$gender
year_to_birth[i] = ifelse(
    is.null(clinical_traits[[i]]$demographic$year_of_birth),
    "notReport",
    clinical_traits[[i]]$demographic$year_of_birth
  )
year_to_death[i] = ifelse(
    is.null(clinical_traits[[i]]$demographic$year_of_death),
    "notReport",
    clinical_traits[[i]]$demographic$year_of_death
  )
year_to_diagnosis[i] = ifelse(
    is.null(clinical_traits[[i]]$diagnoses[[1]]$year_of_diagnosis),
    "notReport",
    clinical_traits[[i]]$diagnoses[[1]]$year_of_diagnosis
  )
days_to_death[i] = ifelse(
    is.null(clinical_traits[[i]]$demographic$days_to_death),
    "notReport",
    clinical_traits[[i]]$demographic$days_to_death
  )
age[i] = ifelse(
    is.null(clinical_traits[[i]]$demographic$age_at_index),
    "notReport",
    clinical_traits[[i]]$demographic$age_at_index
  )
deadORlive[i] = ifelse(
    is.null(clinical_traits[[i]]$demographic$vital_status),
    "notReport",
    clinical_traits[[i]]$demographic$vital_status
  )
race[i] = ifelse(
    is.null(clinical_traits[[i]]$demographic$race),
    "notReport",
    clinical_traits[[i]]$demographic$race
  )
alcohol[i] = ifelse(
    is.null(clinical_traits[[i]]$exposures[[1]]$alcohol_history),
    "notReprot",
    clinical_traits[[i]]$exposures[[1]]$alcohol_history
  )
smoked[i] = ifelse(
    is.null(clinical_traits[[i]]$exposures[[1]]$years_smoked),
    "notReport",
    clinical_traits[[i]]$exposures[[1]]$years_smoked
  )
}

4、将提取的信息做成一个dataFrame

gastric_clinic <- data.frame(
  id,
  classfication_of_tumor,
  tumor_stage,
  gender,
  year_to_birth,
  year_to_death,
  year_to_diagnosis,
  days_to_death,
  age,
  deadORlive,
  race,
  alcohol,
  smoked
)

转自

上一篇下一篇

猜你喜欢

热点阅读