01NHANES数据提取

2023-05-10  本文已影响0人  Jachin111

数据准备:所有数据按cycle分模块保存,MORT_data为生存数据

image.png
image.png
数据提取函数
常规数据提取
merge_files <- function(names){  #"*rhq*.xpt"
  dir_name <- Sys.glob(paste("../data/*/*/", pattern=names, sep=''))
  for (i in 1:length(dir_name)){
    files1 <- read_xpt(dir_name[i])
    files1$cycle <- substr(dir_name[i],9,17)
    if (i==1){
      files <- files1
    } else{
      files <- dplyr::bind_rows(files, files1)
    }
  }
return(files)
}

如提取人口模块数据使用:

dir.demo <- merge_files("*demo*.xpt")

备注:查看对应路径下所有数据文件名称,无法一次性匹配的应单独进行提取

Sys.glob(paste("../data/*/*/",pattern="*demo*.xpt",sep=''))

生存数据提取函数:

mergeSurveys <- function(names){  #*MORT*.dat
  dir_name <- Sys.glob(paste("../../data/*/", pattern=names, sep=''))
  for (i in 1:length(dir_name)){
    files1 <- read_fwf(file=dir_name[i],
                       col_types="iiiiiiii",
                       fwf_cols(SEQN=c(1,6),
                                ELIGSTAT=c(15,15),
                                MORTSTAT=c(16,16),
                                UCOD_LEADING=c(17,19),
                                DIABETES=c(20,20),
                                HYPERTEN=c(21,21),
                                PERMTH_INT=c(43,45),
                                PERMTH_EXM=c(46,48)
                       ),
                       na = c("", ".")
    )
    files1$cycle <- gsub("_", "-", substr(dir_name[i],29,37))
    if (i==1){
      files <- files1
    } else{
      files <- dplyr::bind_rows(files, files1)
    }
  }
  return(files)
}

生存数据各项含义查看:

table(dsn$eligstat)
#1 = "Eligible"
#2 = "Under age 18, not available for public release"
#3 = "Ineligible"

#MORTSTAT: Final Mortality Status
table(dsn$mortstat, useNA="ifany")
# 0 = Assumed alive
# 1 = Assumed deceased
# <NA> = Ineligible or under age 18

#UCOD_LEADING: Underlying Cause of Death: Recode
table(dsn$ucod_leading, useNA="ifany")
# 1 = Diseases of heart (I00-I09, I11, I13, I20-I51)
# 2 = Malignant neoplasms (C00-C97)
# 3 = Chronic lower respiratory diseases (J40-J47)
# 4 = Accidents (unintentional injuries) (V01-X59, Y85-Y86)
# 5 = Cerebrovascular diseases (I60-I69)
# 6 = Alzheimer's disease (G30)
# 7 = Diabetes mellitus (E10-E14)
# 8 = Influenza and pneumonia (J09-J18)
# 9 = Nephritis, nephrotic syndrome and nephrosis (N00-N07, N17-N19, N25-N27)
# 10 = All other causes (residual)
# <NA> = Ineligible, under age 18, assumed alive, or no cause of death data available

#DIABETES: Diabetes Flag from Multiple Cause of Death (MCOD)
table(dsn$diabetes, useNA="ifany")
# 0 = No - Condition not listed as a multiple cause of death
# 1 = Yes - Condition listed as a multiple cause of death
# <NA> = Assumed alive, under age 18, ineligible for mortality follow-up, or MCOD not available

#HYPERTEN: Hypertension Flag from Multiple Cause of Death (MCOD)
table(dsn$hyperten, useNA="ifany")
# 0 = No - Condition not listed as a multiple cause of death
# 1 = Yes - Condition listed as a multiple cause of death
# <NA> = Assumed alive, under age 18, ineligible for mortality follow-up, or MCOD not available
上一篇 下一篇

猜你喜欢

热点阅读