NHANES数据库NHANES数据分析教程

Nhanes数据库介绍及使用(二)

2020-08-26  本文已影响0人  不学临床的医学生

1. Nhanes研究设计

2. 样本权重

3. 多周期合并时样本权重计算

4. 样本权重计算与否时的结果区别(以频数、百分比计算结果为例)

* Unweighted interview sample *;
proc freq data = demo order=formatted;
  tables ridreth3 / nocum;/*以下5行均为格式设置,整体格式设置代码附在最后*/
  format ridreth3 r3ordf. ;
  title "Percent of 2015-2016 sample, by race and Hispanic origin";
  title2 "Unweighted interview sample"; 
  footnote "Non-Hispanic other includes non-Hispanic persons who reported a race other than white, black, or Asian or who reported multiple races.";
  label ridreth3 ="Race and Hispanic origin";
run;
* Weighted with interview sample weight *;
proc freq data = demo order=formatted;
  tables ridreth3 / nocum ;
  weight wtint2yr; /*以下5行均为格式设置,整体格式设置代码附在最后*/
  format ridreth3 r3ordf. ;
  title "Percent of 2015-2016 sample, by race and Hispanic origin";
  title2 "Weighted with interview weight"; 
  footnote "Non-Hispanic other includes non-Hispanic persons who reported a race other than white, black, or Asian or who reported multiple races.";
  label ridreth3 ="Race and Hispanic origin";
run;

5. 参考内容

https://wwwn.cdc.gov/nchs/nhanes/tutorials/module3.aspx
https://wwwn.cdc.gov/nchs/data/tutorials/module3_examples_SAS_Survey.sas

6. 整体格式设置代码

*******************;
** Download data **;
*******************;
** Paths to 2015-2016 data files on the NHANES website *;
* DEMO demographic *;
filename demo_i url 'https://wwwn.cdc.gov/nchs/nhanes/2015-2016/demo_i.xpt'; 
libname demo_i xport;

* BPX blood pressure exam *;
filename bpx_i url 'https://wwwn.cdc.gov/nchs/nhanes/2015-2016/bpx_i.xpt'; 
libname bpx_i xport;

* BPQ blood pressure questionnaire *;
filename bpq_i url 'https://wwwn.cdc.gov/nchs/nhanes/2015-2016/bpq_i.xpt'; 
libname bpq_i xport;

* Download SAS transport files and create temporary SAS datasets *;
data demo;
  set demo_i.demo_i(keep=seqn riagendr ridageyr ridreth3 sdmvstra sdmvpsu wtmec2yr wtint2yr ridexprg );  
run;

data bpx_i;
  set bpx_i.bpx_i;
run;

data bpq_i;
  set bpq_i.bpq_i;
run;

** Prepare dataset for hypertension example **;
data bpdata; 
  merge demo
        bpx_i (keep = seqn bpxsy1-bpxsy4 bpxdi1-bpxdi4)
        bpq_i (keep = seqn bpq050a);
  by seqn;
  **Hypertension prevalence**;
  ** Count Number of Nonmissing SBPs & DBPs **;
  n_sbp = n(of bpxsy1-bpxsy4);
  n_dbp = n(of bpxdi1-bpxdi4);
  ** Set DBP Values Of 0 To Missing For Calculating Average **;
  array _DBP bpxdi1-bpxdi4;
  do over _DBP;
    if (_DBP = 0) then _DBP = .;
  end;  
  ** Calculate Mean Systolic and Diastolic **;
  mean_sbp = mean(of bpxsy1-bpxsy4);
  mean_dbp = mean(of bpxdi1-bpxdi4);

  ** "Old" Hypertensive Category variable: taking medication or measured BP > 140/90 **;
  * as used in NCHS Data Brief No. 289 *;
  * variable bpq050a: now taking prescribed medicine for hypertension *;
  if (mean_sbp >= 140 or mean_dbp >= 90 or bpq050a = 1) then HTN_old = 100;  
  else if (n_sbp > 0 and n_dbp > 0) then HTN_old = 0;

  ** Create Hypertensive Category Variable: "new" definition based on taking medication or measured BP > 130/80 **;
  ** From 2017 ACC/AHA hypertension guidelines **;
  * Not used in Data Brief No. 289 - provided for reference *;
  if (mean_sbp >= 130 or mean_dbp >= 80 or bpq050a = 1) then HTN_new = 100;  
  else if (n_sbp > 0 and n_dbp > 0) then HTN_new = 0;

  * race and Hispanic origin categories for hypertension analysis - generate new variable named raceEthCat *;
  select (ridreth3);
    when (1,2) raceEthCat=4; * Hispanic ;
    when (3) raceEthCat=1; * Non-Hispanic white ;
    when (4) raceEthCat=2; * Non-Hispanic black ;
    when (6) raceEthCat=3; * Non-Hispanic Asian ;
    when (7) raceEthCat=5; * Non-Hispanic other race or Non-Hispanic persons of multiple races *;
    otherwise;
  end;

  * age categories for adults aged 18 and over *;
  if 18<=ridageyr<40 then ageCat_18=1;
  else if 40 <=ridageyr<60 then ageCat_18=2;
  else if 60 <=ridageyr then ageCat_18=3;

  * Define subpopulation of interest: non-pregnant adults aged 18 and over who have at least 1 valid systolic OR diastolic BP measure *;
  inAnalysis = (ridageyr >=18 and ridexprg ne 1 and (n_sbp ne 0 or n_dbp ne 0)) ;

  drop bpxsy1-bpxsy4 bpxdi1-bpxdi4;
run;


**********************************************************************************************;
** Estimates for graph - Distribution of race and Hispanic origin, NHANES 2015-2016          *;
* Module 3, Examples Demonstrating the Importance of Using Weights in Your Analyses          *;
* Section "Adjusting for oversampling"                                                       *;
**********************************************************************************************;

proc format;
  * format to combine and reorder the levels of race and Hispanic origin variable ridreth3 *;
  value r3ordf
  1,2="3 Hispanic"
  3,7="4 Non-Hispanic white and other"
  4="1 Non-Hispanic black"
  6="2 Non-Hispanic Asian"
  ;
run;
上一篇 下一篇

猜你喜欢

热点阅读