Data defines the model by dint of genetic programming, producing the best decile table.


SAS Code for Bootstrapped Decile Analysis
Bruce Ratner, PhD

bs-dec

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
options source nonotes;
options nomprint nomlogic nosymbolgen;

%let data_in = IN;
%let depvar  =  Y;
%let indvars = X1 X2 X3 X4;


data_in
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
%let samsize_bs=30940
;
%let n_sampl_bs=50;

proc surveyselect data=&data_in method=urs out=sample 
     n=&samsize_bs  rep=&n_sampl_bs outhits;
run;

%macro loop;
%do rep=1 %to &n_sampl_bs;
data Replicate&Rep;
set sample;
if Replicate=&Rep;
run;

%let dsn=Replicate&Rep;
ods exclude ODDSRATIOS;
proc logistic data=&dsn nosimple noprint des outest=coef;
model &depvar = &indvars;
run;

proc score data=&dsn predict type=parms score=coef
out=score;
var &indvars;
run;

data score;
set score;
estimate=&depvar.2;

data notdot;
set score;
if estimate ne .;

proc means data=notdot sum noprint; var wt;
output out=samsize (keep=samsize) sum=samsize;
run;

data scoresam (drop=samsize);
set samsize score;
retain n;
if _n_=1 then n=samsize;
if _n_=1 then delete;
run;

proc sort data=scoresam;by descending estimate;
run;

data score;
set scoresam;
if estimate ne . then cum_n+wt;
if estimate = . then dec=.;
else dec=floor(cum_n*10/(n+1));
run;

proc summary data=score missing;class dec;
var &depvar wt;
output out=sum_dec sum=sum_can sum_wt;

data sum_dec;
set sum_dec;
avg_can=sum_can/sum_wt;
run;

data avg_rr;
set sum_dec;
if dec=.;
keep avg_can;
run;

data sum_dec1;
set sum_dec;
if dec=. or dec=10 then delete;
cum_n +sum_wt;
r =sum_can;
cum_r +sum_can;
cum_rr=(cum_r/cum_n)*100;
avg_cann=avg_can*100;
run;

data avg_rr;
set sum_dec1;
if dec=9;
keep avg_can;
avg_can=cum_rr/100;
run;

%let scoresam=&Rep;
data scoresam&Rep ;
set avg_rr sum_dec1 ;
retain n;
if _n_=1 then n=avg_can;
if _n_=1 then delete;
lift&Rep = (cum_rr/n);
if dec ne .;
keep dec lift&Rep;
run;

proc sort data=scoresam&Rep;by dec;
run;

%end;

data combine;
merge %do i=1 %to &n_sampl_bs;
scoresam&i

%end;;
by dec;
run;

data bs_lift_SE;
set combine;
bs_est=mean(of lift:);
bs_std=std(of lift:);
bs_SE=1.28*bs_std;
keep dec bs_est bs_SE;
run;

ods exclude ODDSRATIOS;
proc logistic data=&data_in nosimple noprint des outest=coef;
model &depvar = &indvars;
run;

proc score data=&data_in predict type=parms score=coef
out=score;
var &indvars;
run;

data score;
set score;
estimate=&depvar.2;

data notdot;
set score;
if estimate ne .;

proc means data=notdot sum noprint; var wt;
output out=samsize (keep=samsize) sum=samsize;
run;

data scoresam (drop=samsize);
set samsize score;
retain n;
if _n_=1 then n=samsize;
if _n_=1 then delete;
run;

proc sort data=scoresam;by descending estimate;
run;

data score;
set scoresam;
if estimate ne . then cum_n+wt;
if estimate = . then dec=.;
else dec=floor(cum_n*10/(n+1));
run;

proc summary data=score missing;class dec;
var &depvar wt;
output out=sum_dec sum=sum_can sum_wt;
run;

data sum_dec;
set sum_dec;
avg_can=sum_can/sum_wt;
run;

data avg_rr;
set sum_dec;
if dec=.;
keep avg_can;
run;

data sum_dec1;
set sum_dec;
if dec=. or dec=10 then delete;
cum_n +sum_wt;
r =sum_can;
cum_r +sum_can;
cum_rr=(cum_r/cum_n)*100;
avg_cann=avg_can*100;
run;

data avg_rr;
set sum_dec1;
if dec=9;
keep avg_can;
avg_can=cum_rr/100;
run;

data scoresam ;
set avg_rr sum_dec1 ;
retain n;
if _n_=1 then n=avg_can;
if _n_=1 then delete;
lift=(cum_rr/n);
if dec ne .;
_2SAM_EST=2*lift;
keep dec _2SAM_EST lift;
run;

data boot;
merge
bs_lift_SE scoresam;
lift_bs=_2SAM_EST-bs_est;
keep dec _2SAM_EST bs_est lift_bs bs_SE;
run;

%end;

%mend;
dm 'clear log';

%loop

ods exclude ODDSRATIOS;
proc logistic data=&data_in nosimple noprint des outest=coef;
model &depvar = &indvars;
freq wt;
run;

proc score data=&data_in predict type=parms score=coef
out=score;
var &indvars;
run;

data score;
set score;
estimate=&depvar.2;
run;

data notdot;
set score ;
if estimate ne .;

proc means data=notdot sum noprint;var wt;
output out=samsize (keep=samsize) sum=samsize;
run;

data scoresam (drop=samsize);
set samsize score;
retain n;
if _n_=1 then n=samsize;
if _n_=1 then delete;
run;

proc sort data=scoresam;by descending estimate;
run;

data score;
set scoresam;
if estimate ne . then cum_n+wt;
if estimate = . then dec=.;
else dec=floor(cum_n*10/(n+1));
run;

proc summary data=score missing;class dec;
var &depvar wt;
output out=sum_dec sum=sum_can sum_wt;

data sum_dec;
set sum_dec;
avg_can=sum_can/sum_wt;
run;

data avg_rr;
set sum_dec;
if dec=.;
keep avg_can;
run;

data sum_dec1;
set sum_dec;
if dec=. or dec=10 then delete;
cum_n +sum_wt;
r =sum_can;
cum_r +sum_can;
cum_rr=(cum_r/cum_n)*100;
avg_cann=avg_can*100;
run;

data avg_rr;
set sum_dec1;
if dec=9;
keep avg_can;
avg_can=cum_rr/100;
run;

data scoresam ;
set avg_rr sum_dec1;
retain n;
if _n_=1 then n=avg_can;
if _n_=1 then delete;
lift=(cum_rr/n);
if dec=0 then decc=' top ';
if dec=1 then decc=' 2 ';
if dec=2 then decc=' 3 ';
if dec=3 then decc=' 4 ';
if dec=4 then decc=' 5 ';
if dec=5 then decc=' 6 ';
if dec=6 then decc=' 7 ';
if dec=7 then decc=' 8 ';
if dec=8 then decc=' 9 ';
if dec=9 then decc='bottom';
if dec ne .;
run;

proc sort data= scoresam;by dec;run;
proc sort data= boot ;by dec;run;

data scoresam_bs;
merge
scoresam boot;by dec;
run;

options label;
title1' ';
title2" samsize_bs=&samsize_bs, n_sampl_bs=&n_sampl_bs ";

proc print data=scoresam_bs d split='*' noobs;
var decc sum_wt r avg_cann cum_rr lift lift_bs bs_SE;
label decc='DECILE'
sum_wt ='NUMBER OF*INDIVIDUALS'
r ='NUMBER OF*RESPONDERS'
cum_r ='CUM No. CUSTOMERS w/* RESPONDERS'
avg_cann ='RESPONSE *RATE (%)'
cum_rr ='CUM RESPONSE * RATE (%)'
lift ='C U M*Single-Sample*LIFT (%)'
lift_bs ='C U M*BOOTSTRAP*LIFT (%)'
bs_SE='BOOTSTRAP*MARGIN of*ERROR (80%)'
;
sum sum_wt r;
format sum_wt r cum_n cum_r comma3.0;
format avg_cann cum_rr 5.2;
format lift lift_bs 3.0;
format bs_SE 5.1;
run



For more information about this article, call Bruce Ratner at 516.791.3544 or 1 800 DM STAT-1; or e-mail at br@dmstat1.com.
Sign-up for a free GenIQ webcast: Click here.