Professional Documents
Culture Documents
Theil - Sen Multivariate - SAS
Theil - Sen Multivariate - SAS
Theil - Sen Multivariate - SAS
edu 4016;
options comamid=TCP remote=WRDS;
signon username=_prompt_;
rsubmit;
data dsp500list; set crspa.dsp500list; run;
proc download data=dsp500list; run;
data msp500; set crspa.msp500; run;
proc download data=msp500; run;
data funda; set compm.funda;
if 1998<=year(datadate)<=2013;
if indfmt='INDL' and datafmt='STD' and popsrc='D' and consol='C';
keep gvkey conm datadate fyear fyr cik cusip tic
at ib lt seq csho prcc_f sich au ajex;
run;
proc download data=funda; run;
data CCMXPF_LINKTABLE; set crspa.CCMXPF_LINKTABLE; run;
proc download data=CCMXPF_LINKTABLE; run;
endrsubmit;
/* The following generates all possible combinations of GVKEYs of size 2 for eac
h FYEAR */
%MACRO combination;
data comb_1; set funda_6; keep fyear gvkey; if fyear=&I; run;
proc sql;
create table comb_2 as select a.fyear, a.gvkey as gvkey1, b.gvkey as gvkey2
from comb_1 as a left join comb_1 as b on a.gvkey < b.gvkey;
quit;
data comb_3; set comb_2; if gvkey2=. then delete; run;
data comb_3; set comb_3; replicate=_n_; run;
proc transpose data=comb_3 out=comb_4 (drop=_NAME_ rename=(COL1=gvkey));
var gvkey1-gvkey2;
by replicate fyear;
quit;
proc sql;
create table comb_5 as select a.*, b.leadib, b.mve, b.at, b.lt, b.ib, b.bv, b.le
adeps, b.price, b.eps, b.lagprice
from comb_4 as a left join funda_6 as b
on a.fyear = b.fyear and a.gvkey = b.gvkey
order by fyear, replicate, gvkey;
quit;
/*
The following is an alternative quicker but dirtier way to generate 100,000 comb
inations of GVKEYs of size 2 for each FYEAR.
This may result in the same combination to enter the sample multiple times.
However, this does not affect the results materially in this example.
*/
%MACRO combination;
data funda_6_&I; set funda_6; if fyear=&I; run;
proc surveyselect data=funda_6_&I method=srs n=2 seed=3965 out=comb_5 rep=100000
noprint; quit;
filename templog dummy;
proc printto log=templog; run;
proc reg data=comb_5 noprint rsquare outest=ts_1_est_&I (keep=fyear replicate at
lt _rsq_);
by fyear replicate; model leadib = at lt / noint noprint; quit;
data ts_1_est_&I; set ts_1_est_&I; tse_1a=at; tse_1b=lt; run;
proc means data=ts_1_est_&I noprint; by fyear; var tse_1a tse_1b;
output out=ts_1_&I (drop= _TYPE_ _FREQ_) median(tse_1a)=ts_1a median(tse_1b)=ts_
1b; quit;
proc reg data=comb_5 noprint rsquare outest=ts_2_est_&I (keep=fyear replicate at
lt _rsq_);
by fyear replicate; model mve = at lt / noint noprint; quit;
data ts_2_est_&I; set ts_2_est_&I; tse_2a=at; tse_2b=lt; run;
proc means data=ts_2_est_&I noprint; by fyear; var tse_2a tse_2b;
output out=ts_2_&I (drop= _TYPE_ _FREQ_) median(tse_2a)=ts_2a median(tse_2b)=ts_
2b; quit;
proc printto; run;
%MEND combination;
%MACRO LOOP; %DO I=2001 %TO 2010; %combination; %END; %MEND LOOP; %LOOP;
data ts_est_1; set ts_1_2001-ts_1_2010; run;
data ts_est_2; set ts_2_2001-ts_2_2010; run;
/* The following generates all possible combinations of GVKEYs of size 3 for eac
h FYEAR */
%MACRO combination;
data comb_1; set funda_6; keep fyear gvkey; if fyear=&I; run;
data funda_6_&I; set funda_6; keep fyear gvkey leadib mve at lt ib; if fyear=&I;
run;
proc sql;
create table comb_2 as select a.fyear, a.gvkey as gvkey1, b.gvkey as gvkey2
from comb_1 as a left join comb_1 as b on a.gvkey < b.gvkey;
quit;
proc sql;
create table comb_3 as select a.*, b.gvkey as gvkey3
from comb_2 as a left join comb_1 as b on a.gvkey2 < b.gvkey;
quit;
data comb_3; set comb_3; if gvkey2=. then delete; if gvkey3=. then delete; run;
data comb_3; set comb_3; replicate=_n_; run;
proc transpose data=comb_3 out=comb_4 (drop=_NAME_ rename=(COL1=gvkey));
var gvkey1-gvkey3;
by fyear replicate;
quit;
proc sql;
create table comb_5 as select a.*, b.leadib, b.mve, b.at, b.lt, b.ib
from comb_4 as a left join funda_6_&I as b
on a.fyear = b.fyear and a.gvkey = b.gvkey
order by fyear, replicate;
quit;
filename templog dummy;
proc printto log=templog; run;
proc reg data=comb_5 noprint rsquare outest=ts_7_est_&I (keep=fyear replicate at
lt ib _rsq_);
by fyear replicate; model leadib = at lt ib / noint noprint; quit;
data ts_7_est_&I; set ts_7_est_&I; tse_7a=at; tse_7b=lt; tse_7c=ib; drop at lt i
b _RSQ_; run;
proc univariate data=ts_7_est_&I noprint; var tse_7a; output out=ts_7a_&I pctlpt
s=50 pctlpre=ts_7a_; quit;
proc univariate data=ts_7_est_&I noprint; var tse_7b; output out=ts_7b_&I pctlpt
s=50 pctlpre=ts_7b_; quit;
proc univariate data=ts_7_est_&I noprint; var tse_7c; output out=ts_7c_&I pctlpt
s=50 pctlpre=ts_7c_; quit;
data ts_7_&I;
merge ts_7a_&I ts_7b_&I ts_7c_&I;
fyear=&I;
ts_7a=ts_7a_50; ts_7b=ts_7b_50; ts_7c=ts_7c_50;
drop ts_7a_50 ts_7b_50 ts_7c_50;
run;
proc reg data=comb_5 noprint rsquare outest=ts_8_est_&I (keep=fyear replicate at
lt ib _rsq_);
by fyear replicate; model mve = at lt ib / noint noprint; quit;
data ts_8_est_&I; set ts_8_est_&I; tse_8a=at; tse_8b=lt; tse_8c=ni; drop at lt i
b _RSQ_; run;
proc univariate data=ts_8_est_&I noprint; var tse_8a; output out=ts_8a_&I pctlpt
s=50 pctlpre=ts_8a_; quit;
proc univariate data=ts_8_est_&I noprint; var tse_8b; output out=ts_8b_&I pctlpt
s=50 pctlpre=ts_8b_; quit;
proc univariate data=ts_8_est_&I noprint; var tse_8c; output out=ts_8c_&I pctlpt
s=50 pctlpre=ts_8c_; quit;
data ts_8_&I;
merge ts_8a_&I ts_8b_&I ts_8c_&I;
fyear=&I;
ts_8a=ts_8a_50; ts_8b=ts_8b_50; ts_8c=ts_8c_50;
drop ts_8a_50 ts_8b_50 ts_8c_50;
run;
proc printto; run;
%MEND combination;
%MACRO LOOP; %DO I=2001 %TO 2010; %combination; %END; %MEND LOOP; %LOOP;
data ts_est_7; set ts_7_2001-ts_7_2010; run;
data ts_est_8; set ts_8_2001-ts_8_2010; run;
/*
The following is an alternative quicker but dirtier way to generate 100,000 comb
inations of GVKEYs of size 3 for each FYEAR.
*/
%MACRO combination;
data funda_6_&I; set funda_6; if fyear=&I; run;
proc surveyselect data=funda_6_&I method=srs n=3 seed=3965 out=comb_5 rep=100000
noprint; quit;
filename templog dummy;
proc printto log=templog; run;
proc reg data=comb_5 noprint rsquare outest=ts_7_est_&I (keep=fyear replicate at
lt ib _rsq_);
by fyear replicate; model leadib = at lt ib / noint noprint; quit;
data ts_7_est_&I; set ts_7_est_&I; tse_7a=at; tse_7b=lt; tse_7c=ib; drop at lt i
b _RSQ_; run;
proc univariate data=ts_7_est_&I noprint; var tse_7a; output out=ts_7a_&I pctlpt
s=50 pctlpre=ts_7a_; quit;
proc univariate data=ts_7_est_&I noprint; var tse_7b; output out=ts_7b_&I pctlpt
s=50 pctlpre=ts_7b_; quit;
proc univariate data=ts_7_est_&I noprint; var tse_7c; output out=ts_7c_&I pctlpt
s=50 pctlpre=ts_7c_; quit;
data ts_7_&I;
merge ts_7a_&I ts_7b_&I ts_7c_&I;
fyear=&I;
ts_7a=ts_7a_50; ts_7b=ts_7b_50; ts_7c=ts_7c_50;
drop ts_7a_50 ts_7b_50 ts_7c_50;
run;
proc reg data=comb_5 noprint rsquare outest=ts_8_est_&I (keep=fyear replicate at
lt ib _rsq_);
by fyear replicate; model mve = at lt ib / noint noprint; quit;
data ts_8_est_&I; set ts_8_est_&I; tse_8a=at; tse_8b=lt; tse_8c=ni; drop at lt i
b _RSQ_; run;
proc univariate data=ts_8_est_&I noprint; var tse_8a; output out=ts_8a_&I pctlpt
s=50 pctlpre=ts_8a_; quit;
proc univariate data=ts_8_est_&I noprint; var tse_8b; output out=ts_8b_&I pctlpt
s=50 pctlpre=ts_8b_; quit;
proc univariate data=ts_8_est_&I noprint; var tse_8c; output out=ts_8c_&I pctlpt
s=50 pctlpre=ts_8c_; quit;
data ts_8_&I;
merge ts_8a_&I ts_8b_&I ts_8c_&I;
fyear=&I;
ts_8a=ts_8a_50; ts_8b=ts_8b_50; ts_8c=ts_8c_50;
drop ts_8a_50 ts_8b_50 ts_8c_50;
run;
proc printto; run;
%MEND combination;
%MACRO LOOP; %DO I=2001 %TO 2010; %combination; %END; %MEND LOOP; %LOOP;
data ts_est_7; set ts_7_2001-ts_7_2010; run;
data ts_est_8; set ts_8_2001-ts_8_2010; run;