forked from edwinhu/sas
-
Notifications
You must be signed in to change notification settings - Fork 0
/
CRSP_MERGE.sas
165 lines (142 loc) · 5.49 KB
/
CRSP_MERGE.sas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
/*
Author: Edwin Hu
Date: 2013-05-24
# CRSP_MERGE #
## Summary ##
Basic ETL script for CRSP data.
Merges CRSP daily or monthly with corresponding events and names files.
Also computes market equity and delisting adjusted returns following Shumway (1997).
Adapted from Rabih Moussawi, Luis Palacios WRDS.
## Variables ##
- s: frequency (d,m)
- start: 31DEC1925
- end: 31DEC2013
- sfvars: variables to grab from stock files
- sevars: variables to grab from event files
- filters: additional filters `(shrcd in (10,11) and exchcd in (1,2,3))`
- outlib: output library, default `user`
- final_ds: output dataset name, if not set defaults to `&outlib..crsp_&s.`
- debug: debug mode (keep or delete temporary files)
## Usage ##
```
%IMPORT "~/git/sas/CRSP_MERGE.sas";
%CRSP_MERGE(s=m,
start=31DEC1925,end=31DEC2013,
sfvars=vol,sevars=,
filters=shrcd in (10,11) and exchcd in (1,2,3),
outlib=user,final_ds=crsp_m,
debug=n);
```
*/
%macro CRSP_MERGE(s=m,
start=31DEC1925,end=31DEC2013,
sfvars=,sevars=,
filters=,
outlib=user,final_ds=,
debug=n);
/* Check Series: Daily or Monthly and define datasets - Default is Monthly */
%if &s=D %then %let s=d; %else %if &s ne d %then %let s=m;
%let sf = crsp.&s.sf ;
%let se = crsp.&s.seall ;
%let senames = crsp.&s.senames ;
%put ; %put ; %put ; %put ; %put ;
%put #### ## # Merging CRSP Stock File (&s.sf) and Event File (&s.se) # ## #### ;
options nonotes;
%let sdate = %sysfunc(putn("&start"d,5.)) ;
%let edate = %sysfunc(putn("&end"d,5.)) ;
%let sfvars = ret prc shrout &sfvars.;
%let sfvars = %sysfunc(compbl(%sysfunc(lowcase(&sfvars))));
%put;%put ### sfvars=(&sfvars.);
%let sevars = &sevars. dlret ticker ncusip exchcd shrcd;
%let sevars = %sysfunc(compbl(%sysfunc(lowcase(&sevars))));
%let nsevars = %eval(%sysfunc(length(&sevars))-%sysfunc(length(%sysfunc(compress(&sevars))))+1);
%put;%put ### sevars=(&sevars.);
%* create lag event variable names to be used in the RETAIN statement ;
%let sevars_l = lag_%sysfunc(tranwrd(&sevars,%str( ),%str( lag_)));
%if %length(&filters) > 2 %then %let filters = and &filters;
%else %let filters = %str( );
%if &final_ds = %str() %then %let final_ds = &outlib..crsp_&s.;
%put #;
/* Get stock data */
proc sql;
create table _sfdata
as select *
from &sf (keep= permco permno date &sfvars)
where date between &sdate and &edate and permno in
(select distinct permno from
&senames(WHERE=(&edate>=NAMEDT and &sdate<=NAMEENDT)
keep=permno namedt nameendt) )
order by permno, date;
quit;
%put ##;
/* Get event data */
proc sql;
create table _sedata
as select a.*
from &se (keep= permco permno date &sevars) as a,
(select distinct permno, min(namedt) as minnamedt from
&senames(WHERE=(&edate>=NAMEDT and &sdate<=NAMEENDT)
keep=permno namedt nameendt) group by permno) as b
where a.date >= b.minnamedt and a.date <= &edate and a.permno =b.permno
order by a.permno, a.date;
quit;
%put ###;
/* Merge stock and event data */
%let eventvars = ticker comnam ncusip shrout siccd exchcd shrcls shrcd shrflg trtscd nmsind mmcnt nsdinx;
* variables whose values need to be retain to fill the blanks;
data _merge (keep=permco permno date &sfvars &sevars);
merge _sedata (in=eventdata) _sfdata (in=stockdata);
by permno date; retain &sevars_l;
%do i = 1 %to &nsevars;
%let var = %scan(&sevars,&i,%str( ));
%let var_l = %scan(&sevars_l,&i,%str( ));
%if %sysfunc(index(&eventvars,&var))>0 %then
%do;
if eventdata or first.permno then &var_l = &var. ;
else if not eventdata then &var = &var_l. ;
%end;
%end;
if eventdata and not stockdata then delete;
drop &sevars_l ;
format date yymmdd10.;
run;
%put ####;
/* ------------------------------------------------------------------------------ */
/* The following sort is included to handle duplicate observations when a company */
/* has more than one distribution on a given date. For example, a stock and cash */
/* distribution on the same date will generate two records, identical except for */
/* different DISTCD and DISTAMT (and possibly other) values. The NODUPLICATES */
/* option only deletes a record if all values for all variables are the same as */
/* those in another record. So, in the above example, if DISTCD is included in */
/* &sevars a record will not be deleted, but a redundant record will be deleted */
/* if DISTCD and DISTAMT are not included in &sevars. */
/* ------------------------------------------------------------------------------ */
proc sort data=_merge noduplicates;
/* the "exchcd" condition below removes rows with empty stock price data created */
/* because CRSP event file track some event information even before the stock */
/* is trading in major stock exchange */
where 1 &filters;
by date permno;
run;
%put #####;
proc sql;
create table &final_ds.(drop=dlret) as
select a.*,
abs(prc*shrout) as ME label='Market Equity',
(1+ret)*sum(1,dlret)-1 as RET_ADJ label='Returns adjusted for delisting',
b.CDATE label='CRSP Date (int)'
from _merge a,
(select caldt, monotonic() as CDATE from crsp.&s.siy) b
where a.date = b.caldt
;
quit;
%put ######;
%if %SUBSTR(%LOWCASE(&debug),1,1) = n %then %do;
proc sql;
drop table _sedata, _sfdata, _merge;
quit;
%end;
options notes;
%put ####### Done: Dataset &final_ds Created! #######;
%put ;
%mend CRSP_MERGE;