Tuesday, December 01, 2009

Applying Delta Method in SAS

Delta method is a method in statistics for deriving an approximate probability distribution for a function of an asymptotically normally distributed random variable from knowledge of the limiting variance of that random variable.
See "http://en.wikipedia.org/wiki/Delta_method".

In order to apply delta method, it is necessary to obtain the first order derivatives of the function with respect to the randome variables, which may require either symbolic or finite difference approximation computing power. SAS doesn’t explicitly have symbolic computing power nor does it ouptut intermediate result from finite approximation computations and we have to turn to PROC MODEL in ETS (or PROC NLIN in SAS/STAT) to obtain the symbolic derivatives of a given function F(.). PROC MODEL output three pieces of related information:

1. Internal machine code for computing each part of the derivatives, say g(b). This is the table called CodeList;
2. Summary symbolic result for first order derivatives of f(b), i.e. the complete symbolic expression of g(b); This is the table called ProgList;
3. Indicator of partial derivatives, this is the table called Derivatives

The code list is the human-readable code for internal computing and I use this to guide further computing on the derivatives. I also use the ProgList in the first step to obtain the expression of first order derivatives of f(b), i.e. expression of g(b).

Suppose you have a translog model, called F(p1, p2, p3; b1, b2, b3), where p1, p2, p3 are input prices and b1, b2, b3 are parameter vectors (parameters should be more in real translog model). The elasticity of substitution of input 1 and input2 is defined as [@F()/@p1]/[@F()/@p2]=g(b1, b2). *NOTE: @ indicates derivative operator;

In my first step, I use PROC MODEL on F(p1, p2, p3; b1, b2, b3) to estimate parameter vector {b1, b2, b3}; In this step, we obtain tables CodeList_0 and ProgList_0;

In the second step, I tweak PROC MODEL to obtain the partial derivetives: @F()/@p1, and @F()/@p2. Then, I read in ProgList table to form the explicit AUES expression g(p1, p2| b1, b2, b3)= [@F()/@p1]/[@F()/@p2]; This step generates tables CodeList, ProgList;


Since delta method based approach to estimate V(g) relies on the first order derivative of function g() w.r.t parameter vector (b1, b2, b3), we also need to obtain the partial derivatives of g() with respect to each of the parameters:, ie. @g()/@b1, @g()/@b2, @g()/@b3. Therefore, I have to call PROC MODEL once again to obtain these formula. Tables CodeList_g and ProgList_g were generated in this step.

Later, I parse the instructions in CodeList_g to calculate @g()/@b1, @g()/@b2, @g()/@b3 at every data point of the sample. Once we obtain these values, we can use delta method to get the asymptotical mean and variance of g() function

V[g()]=[@g()/@b1, @g()/@b2, @g()/@b3]’ %*% V(b1, b2, b3) %*% [@g()/@b1, @g()/@b2, @g()/@b3]


In summary, my program only ask user to input the original model for estimation as well as the two input prices for cross elasticity calculation, users don’t need to specify the derivative functions themselves. If we use IML, we have to specify explicit formula for vector [@g()/@b1, @g()/@b2, @g()/@b3], which may very tedious to calculate.

Below is sample code using data from Econometrics Analysis of W.Greene, 6th Ed.


data Green;
input Year   Cost    K        L      E       M        Pk      Pl      Pe     Pm;
datalines;
1947  182.373 0.05107 0.24727 0.04253 0.65913 1.00000 1.00000 1.00000 1.00000
1948  183.161 0.05817 0.27716 0.05127 0.61340 1.00270 1.15457 1.30258 1.05525
1949  186.533 0.04602 0.25911 0.05075 0.64411 0.74371 1.15584 1.19663 1.06625
1950  221.710 0.04991 0.24794 0.04606 0.65609 0.92497 1.23535 1.12442 1.12430
1951  255.945 0.05039 0.25487 0.04482 0.64992 1.04877 1.33784 1.25179 1.21694
1952  264.699 0.04916 0.26655 0.04460 0.63969 0.99744 1.37949 1.27919 1.19961
1953  291.160 0.04728 0.26832 0.04369 0.64071 1.00653 1.43458 1.27505 1.19044
1954  274.457 0.05635 0.27167 0.04787 0.62411 1.08757 1.45362 1.30356 1.20612
1955  308.908 0.05258 0.26465 0.04517 0.63760 1.10315 1.51120 1.34277 1.23835
1956  328.286 0.04604 0.26880 0.04576 0.63940 0.99606 1.58186 1.37154 1.29336
1957  338.633 0.05033 0.27184 0.04820 0.62962 1.06321 1.64641 1.38010 1.30703
1958  323.318 0.06015 0.27283 0.04836 0.61886 1.15619 1.67389 1.39338 1.32699
1959  358.435 0.06185 0.27303 0.04563 0.61948 1.30758 1.73430 1.36756 1.30774
1960  366.251 0.05788 0.27738 0.04585 0.61889 1.25413 1.78280 1.38025 1.33946
1961  366.162 0.05903 0.27839 0.04640 0.61617 1.26328 1.81977 1.37630 1.34319
1962  390.668 0.05578 0.28280 0.04530 0.61613 1.26525 1.88531 1.37689 1.34745
1963  412.188 0.05601 0.27968 0.04470 0.61962 1.32294 1.93379 1.34737 1.33143
1964  433.768 0.05452 0.28343 0.04392 0.61814 1.32798 2.00998 1.38969 1.35197
1965  474.969 0.05467 0.27996 0.04114 0.62423 1.40659 2.05539 1.38635 1.37542
1966  521.291 0.05460 0.28363 0.04014 0.62163 1.45100 2.13441 1.40102 1.41878
1967  540.941 0.05443 0.28646 0.04074 0.61837 1.38617 2.20616 1.39197 1.42428
1968  585.447 0.05758 0.28883 0.03971 0.61388 1.49901 2.33869 1.43388 1.43481
1969  630.450 0.05410 0.29031 0.03963 0.61597 1.44957 2.46412 1.46481 1.53356
1970  623.466 0.05255 0.29755 0.04348 0.60642 1.32464 2.60532 1.45907 1.54758
1971  658.235 0.04675 0.28905 0.04479 0.61940 1.20177 2.76025 1.64689 1.54978
;
run;

data translog;
     set green;
  l_C=log(cost);
     lpk = log(pk); 
     lpl = log(pl); 
     lpe = log(pe); 
  lpm = log(pm);
run;

/* Specify the prices used in elasticity measurements */
%let covar1=lpk;
%let covar2=lpl;
/* Specify the translog cost function. It can be a nonlinear function, too */
%let model = %bquote(l_c=b0 + bk*lpk + bl*lpl + be*lpe + bm*lpm +
            bkk*0.5*lpk*lpk + bll*0.5*lpl*lpl +
      bee*0.5*lpe*lpe + bmm*0.5*lpm*lpm +
      bkl*lpk*lpl + bke*lpk*lpe + bkm*lpk*lpm +
      ble*lpl*lpe + blm*lpl*lpm + bem*lpe*lpm + exp(lpk););

/*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/
/*                          do not change code below                    */
/*@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@*/
/* Estimate original model */
ods select none ;
ods output FirstDerivatives=Derivatives_0;
ods output CodeList=CodeList_0;
ods output ProgList=ProgList_0;
ods output ParameterEstimates=Est;
ods output CovB=CovB;
proc model data=Translog ListCode ListDer  covb ;
     *endogenous K L E; 
     *parms  b0 bk bl be bm
               bkk bll bee bmm
               bkl bke bkm
               ble blm
               bem;     
      &model;
      *K = bk + bkk*lpk + bkl*lpl + bke*lpe + bkm*lpm; 
      *L = bl + bll*lpk + bkl*lpk + ble*lpe + blm*lpm;
   *E = be + bee*lpe + bke*lpk + ble*lpl + bem*lpm;
      fit   l_c / outsused = smatrix  ols  outcov  outest=beta ; 
   *estimate '&PRED.K/@bkk' exp(lpk*bkk);  
   run;
quit;     
ods select all;


proc sql noprint;
     select wrt into :para_names separated by ' '
  from   derivatives_0
  ;
quit;
%put ¶_names;

data covb;
        length _TYPE_ $ 5;
        set covb;  
  _TYPE_='PARMS';
  rename Parameter=_NAME_;
run;
 

proc transpose data=Est out=Est_t name=Parameter;
     var Estimate;
  id Parameter;
run;

%let elastic_covars=lpk lpl;
data _new;
     set Translog(drop=&elastic_covars); set Est_t;  
run;


/* Obtain elasticity measurements */
ods select none ;
options error=0;
ods output FirstDerivatives=Derivatives;
ods output CodeList=CodeList;
ods output ProgList=ProgList;
proc model data=_new ListCode ListDer  maxiter=0;
      *endogenous Sk Sl Se; 
      &model;
      fit  l_c /  ols;    
   run;
quit;     
ods select all;

/* obtain analytical derivatives w.r.t input prices */

data _null_;
     length _temp_ $ 1024;
     set ProgList;
  if indexc(source, '@')>0 then do;
     _temp_='';
  x1=indexc(source, '/')+2; x2=indexc(source, '=')-1;
  name=substr(source, x1, x2-x1); put name=;
     _temp_=cats(_temp_, compress(substr(source, find(source, '=')+1), '; '));  
  j=_n_+1;  
     do while( indexc(source, ';')=0);
           set ProgList point=j ;
           _temp_=cats(_temp_, compress(substr(source, find(source, '=')+1), '; '));  
     j+1;
  end;
  call symput(compress('foc_'||name), _temp_);
      end;
   put _temp_=;
run;

%put %bquote((&&foc_&covar1)/(&&foc_&covar2));


/* Obtain derivatives of elasticity measurements w.r.t parameters */
data _new;
     set Translog;
     if _n_>1 then stop; 
run;
ods select none;
ods output FirstDerivatives=Derivatives_g;
ods output CodeList=CodeList_g;
ods output ProgList=ProgList_g;
proc model data=_new ListCode ListDer  maxiter=0;     
      *parms  b0 bk bl be bm
                bkk bll bee bmm
                bkl bke bkm
                ble blm
                bem;     
   nom=&&foc_&covar1;
   denom=&&foc_&covar2;
      l_c= nom / denom;
      fit  l_c / ols;    
   run;
quit;     
ods select all;
/*
proc sql noprint;
     select name into :para_names separated by ' '
  from   sashelp.vcolumn
  where  varnum>=2 
         &  libname='WORK' 
         &  memname='EST_T'
  ;
quit;
%put ¶_names;
*/

data _null_;
     set CodeList_g end=eof;
  if _n_=1 then do;
     call execute('data compute;');
  call execute('if _n_=1 then set Est_T;');
  call execute('set translog;');
  call execute('retain ¶_names ;');
  /*call execute('lpk = log(pk); lpl = log(pl); lpe = log(pe);' );*/

  declare hash h();
  length word $ 32 wordtrans $ 32;
  h.defineKey('word');
  h.defineData('word', 'wordtrans');
  h.defineDone();
  call missing(word, wordtrans);

  declare hiter hi('h');
  end;
  /* check if it is a Operation item */
  where index(col1, 'Oper')>0;
  if index(col1, 'eeocf')>0 then col1=tranwrd(col1, '=', 'eeocf');
     j0=index(col3, '@'); j1=indexc(col3, ':')+1; j2=indexc(col3, '<')+2;     
  s1=trim(substr(col3, 1, j1-2));
  s2=compress(substr(col3, j1, j2-j1-2));
  s3=trim(substr(col3, j2));

  s2B=translate(s2, '___', '/@.');
  *put s2=  s2B=;
  if indexc(s2, '@./')>0 then do;
     rc=h.find(key:s2);
     if rc=0 then h.replace(key:s2, data:s2, data:s2B);
     else h.add(key:s2, data:s2, data:s2B);
  end;

  j=1;
  do until (scan(trim(col3), j, ' ')='');
     w=subpad(scan(col3, j, ' '), 1, 32);  
        if h.find(key:w)=0 then do;
     word2=wordtrans;
     s3=tranwrd(s3, compress(w), compress(wordtrans));
     *put s3=  wordtrans=;
     *put '^^^^^^^^^';
  end;

  j+1;
  end;

  if s1 in ('*', '-', '+', '/', '**') then  s3=translate(trim(left(s3)), s1, ' '); 
  else if s1 not in ('eeocf', '=') then s3=compress(cats(s1, '(', s3, ')'));
  rc=h.find(key: subpad(s2, 1, 32));
  if rc=0 then  s3=cats(compress(wordtrans), '=', s3);   
  else s3=cats(s2, '=', s3);

  s3=tranwrd(s3, 'ACTUAL.', ' ');
  *put s3;  
  *put '________________';
  if index(s3, 'ERROR')=0 | index(s3, 'RESID')=0 then
     call execute(s3||';');
  if eof then do;
     call execute ('drop ¶_names;');
        call execute ('run;');
  h.output(dataset:'_test');
  end;
run;



data  Mean(drop=_PRED:)   FirstDerivatives(drop=PRED:);
        set compute(keep=Year PRED: _PRED:);  
run;

proc sql noprint;
        select cats(a.name, ' = ', substr(a.name, index(a.name, '__')+2)) into:renamestmt separated by ' '
  from  sashelp.vcolumn as a
  join  _test as b
    on  compress(a.name)=compress(b.wordtrans)
  where  a.libname='WORK' and a.memname='FIRSTDERIVATIVES'
  ;
quit;
%put &renamestmt;

proc sort data=derivatives_0; by wrt; run;
proc sort data=derivatives_g; by wrt; run;
data _dev;
     merge derivatives_0(keep=wrt in=_0) 
        derivatives_g(keep=wrt in=_g)  end=eof
  ;
  by wrt;
  length _temp_ $ 1024;
  retain _temp_ ;
  diff=(_0-_g); 
  if _n_=1 then _temp_=';';
  if diff=1 then _temp_=cats(_temp_, ';', wrt, '=0');
  put _temp_=;
  if eof then call symput ('impute', _temp_);
run;
%put %bquote(&impute);


data FirstDerivatives;
        set FirstDerivatives;
  rename &renamestmt;
  &impute;
  ;
run;

proc score data=firstderivatives  score=covb  type=parms  out=_temp1;        
        var ¶_names;
run;

data _temp1;
     set _temp1(drop=¶_names);  
run;
proc sql noprint;
     select cats(name, '=', translate(name, '', '2')) into :renamestmt separated by ' '
  from   sashelp.vcolumn
  where  libname='WORK' 
      &  memname='_TEMP1'
   &  varnum>1
   ;
quit;
%put &renamestmt;

data _temp1_score;
     set _temp1;
  rename &renamestmt;
  _TYPE_='parms';
  _NAME_=compress('v'||year);
run;

proc score data=firstderivatives  score=_temp1_score  type=parms  out=v_G(keep=Year v:);        
     by year;
     var ¶_names;
run;

data v_G;
     set v_G;
  _var_=sum(of v:, 0);
  drop v:;
run;

proc datasets library=work nolist;
     delete _temp1 _temp1_score  _new _test;
quit;

No comments: