大数据SAS统计软件学习

Proc Freq 置信区间

2019-06-13  本文已影响0人  不连续小姐

SAS Day 41

Background Story:

Most of the time we use Proc Freq binomial to generate confidence interval for 2-level categorical variable comparisons, such as Success or Failure. We also use CI interval to represent the proportion estimated in the dataset (Upper Limit, Lower Limit).

Once instead of directly applying the Upper Limit and Lower Limit, my boss asked me to do 1- upperLimit= lowerLimit, i forgot the reason behind it, because I was so into Chinese Rap.

I was working with CI last week, then i started to wonder the special case again. I decide to make a summary about Proc Freq so i can watch This is Hip Hop.

[caption id="attachment_2426" align="alignnone" width="500"] image

moritz320 / Pixabay[/caption]

Proc Freq Case:

I generalized Proc Freq Binomial into 2 big cases(One Group, Two Groups) with sublevel cases:

Note: Case 1-4 we can extract 95% directly from the Proc Freq output dataset. Case 5 (a group with missing desired category) need to use
1-lowerlimit=upper limit vice versa.

image

Proc Freq CI Basic Syntax:

  1. one group categorical variable with the desired test variable Level
*General Proc Freq syntax ;
proc freq data=resp2;  
tables orr/ binomial (exact ) alpha=0.05 /*95 ci*/; 
output out=_ci95 binomial; 
run;
data CI_orr; set _ci95; 
length C1 $100 lbl $150;
C2='('||put(100*(xl_bin),4.1)||'% , '||put(100*(xu_bin),4.1)||'%)'; 
run;
  1. One group categorical variable with undesired test variable level
*General Proc Freq syntax ; 
proc freq data=resp2; 
tables orr/ binomial (exact level="1") alpha=0.05 /*95 ci*/; 
output out=_ci95 binomial; 
run;
data CI_orr; set _ci95; 
length C1 $100 lbl $150;
C2='('||put(100*(xl_bin),4.1)||'% , '||put(100*(xu_bin),4.1)||'%)'; 
run;
  1. Two groups data with the desired categorical test variable Level
*General Proc Freq syntax ; 
proc freq data=resp2;
by trt;
tables orr/ binomial (exact ) alpha=0.05 /*95 ci*/; 
output out=_ci95 binomial; 
run;
data CI_orr; set _ci95; 
length C1 $100 lbl $150;
C2='('||put(100*(xl_bin),4.1)||'% , '||put(100*(xu_bin),4.1)||'%)'; 
run;

4. Two groups data with the undesired categorical test variable Level

*General Proc Freq syntax ; 
proc freq data=resp2; 
by trt;
tables orr/ binomial (exact level="1" ) alpha=0.05 /*95 ci*/; 
output out=_ci95 binomial; 
run;
data CI_orr; set _ci95; 
length C1 $100 lbl $150;
C2='('||put(100*(xl_bin),4.1)||'% , '||put(100*(xu_bin),4.1)||'%)'; 
run;

5. Two group data with one group missing the required categorical test variable Level

ods output BinomialCLs=exactci; 
proc freq data=pop2;  
tables orr / binomial(exact level="1") alpha=0.05 out=freqnum1;  
by trt; 
run;  
ods output close; 

data exactci1; 
set exactci; 
length ci $20.; 
ci='('||compress(put((1-UpperCL)*100,8.1))||' , '||compress(put((1-LowerCL)*100,8.1))||')'; 
label=" 95% Exact CI of ORR (LCL, UCL)";
keep label trt ci;
run;

Key options:

By: test for different response group.
Exact: Clopper Pearson method
Level: Lowest response level is used by Default. if we need to calculate the higher level, we need to specify - level="x"

*General Proc Freq syntax
proc freq data=resp2; 
by trtpn;  /*by: For two groups*/
tables orr/ binomial (exact ) /* level="1"  for un-ordered data*/ alpha=0.05 /*95 ci*/; 
output out=_ci95 binomial; 
run;

Proc Freq Sample Macro:

*orr;
%macro orr(cond= , ord=, n=, label=, out= );
proc sql noprint;
  create table cnt_orr as select count(distinct pt) as n, &ord as ord, "&label (N=%trim(&n)) " as lbl length=150,
                                       strip(put(calculated n*100/ &n,5.1))||"%" as C1 length=100,
                                       calculated n*100/&n as oc1
  from resp2
  where orr=1 and &cond ;
quit;

/*** 95% CI ***/
proc freq data=resp2(where=(&cond ));
/*  by trtpn;*/
  tables orr/ binomial (exact level="1" ) alpha=0.05;
  output out=_ci95 binomial; 
run;

data CI_orr;  
set _ci95;
  length C1 $100 lbl $150;
  C2='('||put(100*(xl_bin),4.1)||'% , '||put(100*(xu_bin),4.1)||'%)';
  clow=100*(xl_bin);
  chigh=100*(xu_bin);
  ord=⩝
  lbl="&label (N=%trim(&n)) ";
run;
data &out;
  merge cnt_orr CI_orr(keep=ord lbl c2 clow chigh);
  by lbl;
run;
%mend;
*1.overall;
%orr(cond= pt^=" ", ord=1, n=&n1, label=Overall  , out=o1 );

Happy SAS Practicing ! 👌

上一篇下一篇

猜你喜欢

热点阅读