Tuesday, June 01, 2010

Boost to tackle nonlinearity


data nonlinear;
     do x=1 to 627;
     p=(sin(x/100)+1)*0.45;
     do j=1 to 100;
     x1=x+(j-1)/100;
     if ranuni(8655645)<=p then y=1; else y=0;
     output; drop p j;
  end;
  end;
run;

proc rank data=nonlinear  out=nonlinearrank groups=20;
     var x1;
  ranks rank1;
run;

proc means data=nonlinearrank noprint;
     class rank1;
  var y x1;
  output out=_mean(where=(_type_=1))  mean(y)=y  mean(x1)=x1;
run;

%inc "C:\Documents and Settings\lxie\Desktop\SAS Prog and Docs\Boost macro2 ver3.0.sas";
%inc "C:\Documents and Settings\lxie\Desktop\SAS Prog and Docs\predict macro.sas";

%boost2(nonlinear, 1, outputds,  outwts, 100, 3);

%macro pred(niter);
%do i=1 %to &niter;
  %predict(nonlinear, 1, outputds, outlogds, out_pred, sumpred&i, 3, &i);
%end;
data sumpred_all;
     merge %do i=1 %to &niter;
            sumpred&i.(rename=(sum_pred=sum_pred&i))
     %end;;
run;
%mend;

%pred(100);

proc datasets library=work nolist;
     delete sumpred1-sumpred100;
quit;

options nosource;
proc export data=sumpred_all  outfile="c:\sumpred.csv"  dbms=csv replace; run;
options source;

In R, use this code piece to recover the animation:

library(caTools);
test<-read.csv('c:/sumpred.csv', header=T)
minmax<-quantile(as.matrix(test), c(0,1))
sz=314; p=100;
y<-sort(sample(1:62700, size=sz))
image=array(0, c(sz, p, 100))
for (i in (1:100)){
  pic=matrix(0, ncol=p, nrow=sz)
  trace=round((test[y,i]-minmax[1])/(minmax[2]-minmax[1])*p);
  for(j in (1:sz)){
     image[j, trace[j], i]=1
  }
}
write.gif(image, "c:/boost.gif", col=gray(1:2/2))
im = read.gif("c:/boost.gif")
for(i in 1:100){
   image(im$image[,,i], col=(im$col), 
         main=paste('Iter', i), y=1:100, x=1:314, 
         xlab="Index", ylab="Percentage of 1")
}

test2<-test[,-1]-test[-100]
test2<-cbind(test[,1], test2)

par(mfrow=c(2,1))
for (i in (1:100)){
    plot(test2[,i], type='l', ylim=c(-0.5, 1), 
         main=paste('Iteration', i, 'Delta'),
         ylab='Delta')
    plot(test[,i], type='l', ylim=minmax, 
         main=paste('Iteration', i, 'Score'),
         ylab='Score')
    Sys.sleep(0.1)
}



0 comments: