Theil - Sen Multivariate - SAS

You might also like

Download as txt, pdf, or txt
Download as txt, pdf, or txt
You are on page 1of 6

%let wrds=wrds.wharton.upenn.

edu 4016;
options comamid=TCP remote=WRDS;
signon username=_prompt_;
rsubmit;
data dsp500list; set crspa.dsp500list; run;
proc download data=dsp500list; run;
data msp500; set crspa.msp500; run;
proc download data=msp500; run;
data funda; set compm.funda;
if 1998<=year(datadate)<=2013;
if indfmt='INDL' and datafmt='STD' and popsrc='D' and consol='C';
keep gvkey conm datadate fyear fyr cik cusip tic
at ib lt seq csho prcc_f sich au ajex;
run;
proc download data=funda; run;
data CCMXPF_LINKTABLE; set crspa.CCMXPF_LINKTABLE; run;
proc download data=CCMXPF_LINKTABLE; run;
endrsubmit;

proc sort data=funda; by gvkey DESCENDING fyear; run;


data funda_1;
set funda;
format sdate datadate yymmddn8.;
sdate=intnx("month",datadate,-11,"beg");
if gvkey=lag1(gvkey) and fyear=lag1(fyear)-1 then lead1=1; else lead1=.;
leadib=lag1(ib)*lead1;
drop lead1;
run;
proc sort data=funda_1; by gvkey fyear; run;
data funda_2; set funda_1;
if gvkey=lag1(gvkey) and fyear=lag1(fyear)+1 then lag1=1; else lag1=.;
mve = prcc_f*csho;
leadib_s = leadib/at;
mve_s = mve/at;
bv = seq;
bv_s = bv/at;
ib_s = ib/at;
lt_s = lt/at;
leadeps = leadib/csho;
price = prcc_f;
adj=ajex/(lag(ajex)*lag1); /* adjustment factor for stock splits */
lagprice = (lag(prcc_f)*lag1)*adj;
eps=ib/csho;
leadeps_s = leadeps/lagprice;
eps_s = eps/lagprice;
price_s = price/lagprice;
if at=. then delete; if lt=. then delete; if bv=. then delete; if bv_s=. then de
lete; if lt_s=. then delete; if ib_s=. then delete;
if price=. then delete; if mve=. then delete; if price=. then delete; if lagpric
e=. then delete;
if eps=. then delete; if leadeps=. then delete; if leadeps_s=. then delete; if e
ps_s=. then delete; if price_s=. then delete;
run;
proc sql;
create table funda_3 as select a.*, b.lpermno as permno
from funda_2 as a left join ccmxpf_linktable as b
on a.gvkey = b.gvkey
and b.LINKTYPE in ("LU","LC","LD","LN","LS","LX")
and b.LINKPRIM in ("C","P")
and b.USEDFLAG=1
and (b.LINKDT <= a.sdate or b.LINKDT=.B)
and (a.datadate <= b.LINKENDDT or b.LINKENDDT=.E);
quit;
data funda_4; set funda_3; if permno=. then delete; run;
proc sql;
create table funda_5 as select a.*, b.start, b.ending
from funda_4 as a left join dsp500list as b
on a.permno = b.permno;
quit;
data funda_6; set funda_5;
*if start <= sdate <= datadate <= ending;
if start <= datadate <= ending;
if 2001<=fyear<=2010;
keep gvkey fyear at lt ib ib_s bv bv_s lt_s mve mve_s price lagprice eps leadeps
leadeps_s eps_s price_s leadib leadib_s;
run;
proc datasets lib=work memtype=data; modify funda_6; attrib _all_ label=' '; qui
t;
proc sort data=funda_6; by fyear gvkey; quit;

/* The following generates all possible combinations of GVKEYs of size 2 for eac
h FYEAR */
%MACRO combination;
data comb_1; set funda_6; keep fyear gvkey; if fyear=&I; run;
proc sql;
create table comb_2 as select a.fyear, a.gvkey as gvkey1, b.gvkey as gvkey2
from comb_1 as a left join comb_1 as b on a.gvkey < b.gvkey;
quit;
data comb_3; set comb_2; if gvkey2=. then delete; run;
data comb_3; set comb_3; replicate=_n_; run;
proc transpose data=comb_3 out=comb_4 (drop=_NAME_ rename=(COL1=gvkey));
var gvkey1-gvkey2;
by replicate fyear;
quit;
proc sql;
create table comb_5 as select a.*, b.leadib, b.mve, b.at, b.lt, b.ib, b.bv, b.le
adeps, b.price, b.eps, b.lagprice
from comb_4 as a left join funda_6 as b
on a.fyear = b.fyear and a.gvkey = b.gvkey
order by fyear, replicate, gvkey;
quit;

filename templog dummy;


proc printto log=templog; run;
proc reg data=comb_5 noprint rsquare outest=ts_1_est_&I (keep=fyear replicate at
lt _rsq_);
by fyear replicate; model leadib = at lt / noint noprint; quit;
data ts_1_est_&I; set ts_1_est_&I; tse_1a=at; tse_1b=lt; run;
proc means data=ts_1_est_&I noprint; by fyear; var tse_1a tse_1b;
output out=ts_1_&I (drop= _TYPE_ _FREQ_) median(tse_1a)=ts_1a median(tse_1b)=ts_
1b; quit;
proc reg data=comb_5 noprint rsquare outest=ts_2_est_&I (keep=fyear replicate at
lt _rsq_);
by fyear replicate; model mve = at lt / noint noprint; quit;
data ts_2_est_&I; set ts_2_est_&I; tse_2a=at; tse_2b=lt; run;
proc means data=ts_2_est_&I noprint; by fyear; var tse_2a tse_2b;
output out=ts_2_&I (drop= _TYPE_ _FREQ_) median(tse_2a)=ts_2a median(tse_2b)=ts_
2b; quit;
proc printto; run;
%MEND combination;
%MACRO LOOP; %DO I=2001 %TO 2010; %combination; %END; %MEND LOOP; %LOOP;
data ts_est_1; set ts_1_2001-ts_1_2010; run;
data ts_est_2; set ts_2_2001-ts_2_2010; run;

/*
The following is an alternative quicker but dirtier way to generate 100,000 comb
inations of GVKEYs of size 2 for each FYEAR.
This may result in the same combination to enter the sample multiple times.
However, this does not affect the results materially in this example.
*/
%MACRO combination;
data funda_6_&I; set funda_6; if fyear=&I; run;
proc surveyselect data=funda_6_&I method=srs n=2 seed=3965 out=comb_5 rep=100000
noprint; quit;
filename templog dummy;
proc printto log=templog; run;
proc reg data=comb_5 noprint rsquare outest=ts_1_est_&I (keep=fyear replicate at
lt _rsq_);
by fyear replicate; model leadib = at lt / noint noprint; quit;
data ts_1_est_&I; set ts_1_est_&I; tse_1a=at; tse_1b=lt; run;
proc means data=ts_1_est_&I noprint; by fyear; var tse_1a tse_1b;
output out=ts_1_&I (drop= _TYPE_ _FREQ_) median(tse_1a)=ts_1a median(tse_1b)=ts_
1b; quit;
proc reg data=comb_5 noprint rsquare outest=ts_2_est_&I (keep=fyear replicate at
lt _rsq_);
by fyear replicate; model mve = at lt / noint noprint; quit;
data ts_2_est_&I; set ts_2_est_&I; tse_2a=at; tse_2b=lt; run;
proc means data=ts_2_est_&I noprint; by fyear; var tse_2a tse_2b;
output out=ts_2_&I (drop= _TYPE_ _FREQ_) median(tse_2a)=ts_2a median(tse_2b)=ts_
2b; quit;
proc printto; run;
%MEND combination;
%MACRO LOOP; %DO I=2001 %TO 2010; %combination; %END; %MEND LOOP; %LOOP;
data ts_est_1; set ts_1_2001-ts_1_2010; run;
data ts_est_2; set ts_2_2001-ts_2_2010; run;

/* The following generates all possible combinations of GVKEYs of size 3 for eac
h FYEAR */
%MACRO combination;
data comb_1; set funda_6; keep fyear gvkey; if fyear=&I; run;
data funda_6_&I; set funda_6; keep fyear gvkey leadib mve at lt ib; if fyear=&I;
run;
proc sql;
create table comb_2 as select a.fyear, a.gvkey as gvkey1, b.gvkey as gvkey2
from comb_1 as a left join comb_1 as b on a.gvkey < b.gvkey;
quit;
proc sql;
create table comb_3 as select a.*, b.gvkey as gvkey3
from comb_2 as a left join comb_1 as b on a.gvkey2 < b.gvkey;
quit;
data comb_3; set comb_3; if gvkey2=. then delete; if gvkey3=. then delete; run;
data comb_3; set comb_3; replicate=_n_; run;
proc transpose data=comb_3 out=comb_4 (drop=_NAME_ rename=(COL1=gvkey));
var gvkey1-gvkey3;
by fyear replicate;
quit;
proc sql;
create table comb_5 as select a.*, b.leadib, b.mve, b.at, b.lt, b.ib
from comb_4 as a left join funda_6_&I as b
on a.fyear = b.fyear and a.gvkey = b.gvkey
order by fyear, replicate;
quit;
filename templog dummy;
proc printto log=templog; run;
proc reg data=comb_5 noprint rsquare outest=ts_7_est_&I (keep=fyear replicate at
lt ib _rsq_);
by fyear replicate; model leadib = at lt ib / noint noprint; quit;
data ts_7_est_&I; set ts_7_est_&I; tse_7a=at; tse_7b=lt; tse_7c=ib; drop at lt i
b _RSQ_; run;
proc univariate data=ts_7_est_&I noprint; var tse_7a; output out=ts_7a_&I pctlpt
s=50 pctlpre=ts_7a_; quit;
proc univariate data=ts_7_est_&I noprint; var tse_7b; output out=ts_7b_&I pctlpt
s=50 pctlpre=ts_7b_; quit;
proc univariate data=ts_7_est_&I noprint; var tse_7c; output out=ts_7c_&I pctlpt
s=50 pctlpre=ts_7c_; quit;
data ts_7_&I;
merge ts_7a_&I ts_7b_&I ts_7c_&I;
fyear=&I;
ts_7a=ts_7a_50; ts_7b=ts_7b_50; ts_7c=ts_7c_50;
drop ts_7a_50 ts_7b_50 ts_7c_50;
run;
proc reg data=comb_5 noprint rsquare outest=ts_8_est_&I (keep=fyear replicate at
lt ib _rsq_);
by fyear replicate; model mve = at lt ib / noint noprint; quit;
data ts_8_est_&I; set ts_8_est_&I; tse_8a=at; tse_8b=lt; tse_8c=ni; drop at lt i
b _RSQ_; run;
proc univariate data=ts_8_est_&I noprint; var tse_8a; output out=ts_8a_&I pctlpt
s=50 pctlpre=ts_8a_; quit;
proc univariate data=ts_8_est_&I noprint; var tse_8b; output out=ts_8b_&I pctlpt
s=50 pctlpre=ts_8b_; quit;
proc univariate data=ts_8_est_&I noprint; var tse_8c; output out=ts_8c_&I pctlpt
s=50 pctlpre=ts_8c_; quit;
data ts_8_&I;
merge ts_8a_&I ts_8b_&I ts_8c_&I;
fyear=&I;
ts_8a=ts_8a_50; ts_8b=ts_8b_50; ts_8c=ts_8c_50;
drop ts_8a_50 ts_8b_50 ts_8c_50;
run;
proc printto; run;
%MEND combination;
%MACRO LOOP; %DO I=2001 %TO 2010; %combination; %END; %MEND LOOP; %LOOP;
data ts_est_7; set ts_7_2001-ts_7_2010; run;
data ts_est_8; set ts_8_2001-ts_8_2010; run;
/*
The following is an alternative quicker but dirtier way to generate 100,000 comb
inations of GVKEYs of size 3 for each FYEAR.
*/
%MACRO combination;
data funda_6_&I; set funda_6; if fyear=&I; run;
proc surveyselect data=funda_6_&I method=srs n=3 seed=3965 out=comb_5 rep=100000
noprint; quit;
filename templog dummy;
proc printto log=templog; run;
proc reg data=comb_5 noprint rsquare outest=ts_7_est_&I (keep=fyear replicate at
lt ib _rsq_);
by fyear replicate; model leadib = at lt ib / noint noprint; quit;
data ts_7_est_&I; set ts_7_est_&I; tse_7a=at; tse_7b=lt; tse_7c=ib; drop at lt i
b _RSQ_; run;
proc univariate data=ts_7_est_&I noprint; var tse_7a; output out=ts_7a_&I pctlpt
s=50 pctlpre=ts_7a_; quit;
proc univariate data=ts_7_est_&I noprint; var tse_7b; output out=ts_7b_&I pctlpt
s=50 pctlpre=ts_7b_; quit;
proc univariate data=ts_7_est_&I noprint; var tse_7c; output out=ts_7c_&I pctlpt
s=50 pctlpre=ts_7c_; quit;
data ts_7_&I;
merge ts_7a_&I ts_7b_&I ts_7c_&I;
fyear=&I;
ts_7a=ts_7a_50; ts_7b=ts_7b_50; ts_7c=ts_7c_50;
drop ts_7a_50 ts_7b_50 ts_7c_50;
run;
proc reg data=comb_5 noprint rsquare outest=ts_8_est_&I (keep=fyear replicate at
lt ib _rsq_);
by fyear replicate; model mve = at lt ib / noint noprint; quit;
data ts_8_est_&I; set ts_8_est_&I; tse_8a=at; tse_8b=lt; tse_8c=ni; drop at lt i
b _RSQ_; run;
proc univariate data=ts_8_est_&I noprint; var tse_8a; output out=ts_8a_&I pctlpt
s=50 pctlpre=ts_8a_; quit;
proc univariate data=ts_8_est_&I noprint; var tse_8b; output out=ts_8b_&I pctlpt
s=50 pctlpre=ts_8b_; quit;
proc univariate data=ts_8_est_&I noprint; var tse_8c; output out=ts_8c_&I pctlpt
s=50 pctlpre=ts_8c_; quit;
data ts_8_&I;
merge ts_8a_&I ts_8b_&I ts_8c_&I;
fyear=&I;
ts_8a=ts_8a_50; ts_8b=ts_8b_50; ts_8c=ts_8c_50;
drop ts_8a_50 ts_8b_50 ts_8c_50;
run;
proc printto; run;
%MEND combination;
%MACRO LOOP; %DO I=2001 %TO 2010; %combination; %END; %MEND LOOP; %LOOP;
data ts_est_7; set ts_7_2001-ts_7_2010; run;
data ts_est_8; set ts_8_2001-ts_8_2010; run;

You might also like