SAS Programming for Data Mining Applications: Bayesian Computation (3)

In Chapter 3 of "Bayesian Computation with R", Jim Albert talked about how to conduct 2 fundamental tasks of Statistics, namely Estimation and Hypothesis Testing in a single parameter framework.

The structure of this chapter is organized as the following:

                  +--------Non-infomative Prior: estimate variance parameter in a normal model
                  |
                  |
+------Estimation +--------Informative Prior : estimate a gamma model
|                 |
|                 |
|                 +--------Sensitivity to the choice of Prior : Robustness
|
|
+----- Hypothesis Testing : A binomial test


/*** 3.2 ***/
proc import datafile="c:\footballscores.txt"  out=football
            dbms=tab  replace;
run;

data football;
     set  football  end=eof;
     d=favorite - underdog -spread;
     d2=d**2;
     if eof then call symput('n', _n_);
run;

proc means data=football  noprint;
     var  d2;
     output  out=sum  sum(d2)=v;
run;
data _null_;
     set sum;
     call  symput('v', v);
run;


%put &n;
%put &v;


data P;
     call streaminit(100000);
     do i=1 to 1000;
        p=rand('CHISQUARE', &n)/&v;
        s=sqrt(1/p);
        output;
        drop i;
     end;
run;


ods select none;
proc univariate data=p  ;
     var s;
     histogram /midpoints=12.5 to 15.5 by 0.25                      
                cbarline=blue  cfill=white   
                outhistogram=hist;
     title "Histogram of ps";
run;
title;
ods select all;

axis1 label=(angle=90 "Frequency");
proc gbarline data=hist;
     bar _midpt_/discrete  sumvar=_count_  space=0  axis=axis1 ;
     title " ";
run;
title;

/* 3.3 */
data y;            
     retain alpha 16
            beta 15174
            yobs 1
            ex 66
     ;
     lam=alpha/beta; 
     scale=1/beta;
     do y=0 to 10;
        py1=pdf('POIS', y,  lam * ex);
        py2=pdf('GAMMA', lam, alpha, scale);
        py3=pdf('GAMMA', lam, alpha + y, 1/(beta+ex));
        py = round(py1*py2/py3, 0.001);
        output;
        keep y py;
     end;
run;


data lambdaA;
     retain alpha  16  
            yobs   1
            beta   15174
            ex     66
            seed   89878765
     ;
     shape=alpha+yobs;
     scale=1/(beta +ex);
     do i=1 to 1000;
        rannum=rangam(seed, shape)*scale;
        output;
        keep rannum;
     end;
run;       


%let n=20;
%let y=5;
%let a=10;
%let p=0.5;

data _null_;
     m1 = pdf('binomial', &y, &p, &n)
        * pdf('beta', &p, &a, &a)
        / pdf('beta', &p, &a+&y, &a+&n-&y);
  put m1=;
     lambda = pdf('binomial', &y, &p, &n)
            / (pdf('binomial', &y, &p, &n)+m1);
     put lambda=;
run;


data lambda;
     interval=(5-(-4))/100;
     do loga=-4 to 5 by interval;
        a=exp(loga);
        m1 = pdf('binomial', &y, &p, &n)
           * pdf('beta', &p, a, a)                
           / pdf('beta', &p, a+&y, a+&n-&y);
       lambda = pdf('binomial', &y, &p, &n)
              / (pdf('binomial', &y, &p, &n)+m1);   
        output; 
     end;
run;

symbol interpol=j;
axis1  label=(angle=90  "Prob(coin is fair)");
axis2  label=("log(a)");
proc gplot data=lambda;
     plot lambda*loga /vaxis=axis1 haxis=axis2;
run;quit;
goptions reset=all;



/* Robustness of Bayesian Method 
   In this example, Jim tries to estimate the IQ of Joe based on two 
   different prior distributions, Normal and T. The posterior distribution 
   of the estimate will be compared to demonstrate the robustness of 
   Bayesian method.
*/

data summ1;
      retain mu 100 tau 12.16  sigma  15  n 4;
   input ybar;
      se=sigma/sqrt(n);
   tau1=1/sqrt(1/se**2 + 1/tau**2);
   mu1= (ybar/se**2 + mu/tau**2)*(tau1**2);
      keep ybar mu1  tau1;
cards;
110
125
140
;
run; 

data theta;
      interval=(140-60)/200;
   tscale=20/quantile('T', 0.95, 2);
   do theta=60 to 140 by interval;
         tdensity=1/tscale*pdf('T', (theta-mu)/tscale, 2);
   ndensity=1/10*pdf('NORMAL', (theta-mu)/tau);
   output;
   keep theta tdensity  ndensity;
   end;
run;

data summ2;
      tscale=20/quantile('T', 0.95, 2);
      input ybar;
   do theta=60 to 180 by (180-60)/500;
         like=pdf('NORMAL', (theta-ybar)/7.5);
   prior=pdf('T', (theta-mu)/tscale, 2);
   post=prior*like;   
         output; 
   end;
cards;
110
125
140
;
run;

proc stdize data=summ2  method=sum  out=summ2;
      by ybar;
      var post;
run;

data summ2v;
      set summ2;
      m=theta*post;
   s=theta**2*post;
run;
proc means data=summ2v  noprint;
      by ybar;
   var m s;
   output out=summ2(keep=ybar  m  s)  sum(m)=m  sum(s)=s;
run;
data summ2;
      set summ2;
   s=sqrt(s-m**2);
run;

data summ;
      merge summ1  summ2;
run;


/* Hypothesis Testing 
   In this example
*/
data _null_;
     pvalue=2*cdf('BINOMIAL', 5, 20, 0.5);
  put pvalue= ;
run;

data _null_;
     retain n 20
         y  5
   a 10
   p  0.5
   ;
  m1=pdf('BINOMIAL', y, n, p) * pdf('BETA', p, a, a) / pdf('BETA', p, a+y, a+n-y);
  lambda = pdf('BINOMIAL', y, n, p)/(pdf('BINOMIAL', y, n, p) + m1);
  put lambda= best.;
run;


data pbetat_out;
     length _stat_ $ 12;
     input p0  prob  a  b  s  n;
  f=n-s;
  lbf=s*log(p0) + f*log(1-p0) + logbeta(a, b) - logbeta(a+s, b+f);
  bf=exp(lbf);
  post=prob*bf/(prob*bf + 1 - prob);
  _stat_='BayesFactor'; nvalue=bf;
  output;
  _stat_='Posterior';  nvalue=post;
  output;
  keep _stat_ nvalue;
cards;
0.5  0.5 10 10 5 20
;
run;
 
     
data lambda;
      retain n 20
             y  5  
             p  0.5
    a 10
      ;
      do loga=-4 to 5 by (5+4)/100;
         a=exp(loga);
   _x=pdf('BINOMIAL', y, p, n);
         m2=_x*pdf('BETA', p, a, a)/pdf('BETA', p, a+y, a+n-y);
   lambda=_x/(_x + m2);
   keep loga m2 lambda;
   output;
   end;
run;

proc sgplot data=lambda;
      series x=loga  y=lambda;
   label  lambda='Prob(coin is fair)'
          loga='log(a)'
    ;
   format lambda 7.1  loga 7.0;
run;

SAS Programming for Data Mining Applications

Page Title

Thursday, October 06, 2011

Bayesian Computation (3)

0 comments:

Blog Top Sites

TopBlogSite