-
Notifications
You must be signed in to change notification settings - Fork 8
/
randomset.sas
150 lines (113 loc) · 4.25 KB
/
randomset.sas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
%macro RandomSet(ds,recs,type=1,out=RANDOM,multi=100000) / des='Obtain random data';
/********************************************************************************
BEGIN MACRO HEADER
********************************************************************************
Name: RandomSet
Author: Chris Swenson
Created: 2009-06-03
Purpose: Obtain random data using one of two methods
Arguments: ds - input data set to obtain random data from
recs - number of random records to obtain
type - run type of macro, either 1 for using PROC SORT on the
input data set (good for small data sets) or 2 for only
selecting the random observations from the data set
(good for large data sets), defaulted to 1
out= - name of output data set, defaulted to RANDOM
multi= - multiplier applied to RAND('UNIFORM') function,
defaulted to 100,000
Revisions
¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
Date Author Comments
¯¯¯¯¯¯¯¯¯¯ ¯¯¯¯¯¯ ¯¯¯¯¯¯¯¯
YYYY-MM-DD III Please use this format and insert new entries above
********************************************************************************
END MACRO HEADER
********************************************************************************/
%local i;
/* Check arguments */
%if "&ds"="" %then %do;
%put %str(E)RROR: No data set specified.;
%return;
%end;
%if %sysfunc(exist(&DS))=0 %then %do;
%put %str(E)RROR: The specified data set does not exist.;
%return;
%end;
%if "&recs"="" %then %do;
%put %str(E)RROR: No record count specified.;
%return;
%end;
/* Drop the table if it already exists */
%droptable(&OUT);
/* Type 1 */
%if &type= or &type=1 %then %do;
%Time(B);
data &OUT;
set &ds;
random=RAND('UNIFORM')*&multi;
run;
proc sort data=&OUT;
by random;
run;
data &OUT(drop=random);
set &OUT(obs=&recs);
run;
%Time(E);
%end;
/* Type 2 */
%if &type=2 %then %do;
%Time(B);
%local dsnobs keeprecs;
/* Count Each Observation in the data set */
proc sql noprint;
select sum(1)
into :dsnobs
from &ds
;
quit;
%let dsnobs=&dsnobs;
%put NOTE: Number of records in &ds: &dsnobs;
/* Set the number of records with a random number */
data &OUT;
format i random 12.;
do i=1 to &dsnobs;
random=RAND('UNIFORM')*&multi;
output;
end;
run;
proc sort data=&OUT;
by random;
run;
/* Keep the number of desired records */
/* Note: These are the record numbers of the desired records */
data &OUT(drop=random);
set &OUT(obs=&recs);
run;
/* Put the record numbers into one macro variable */
proc sql noprint;
select i
into :keeprecs separated by ' '
from &OUT
;
quit;
proc sql;
drop table &OUT;
quit;
/* Pull each record out one at a time */
%do i=1 %to &recs;
%put NOTE: Record - %scan(&keeprecs,&i);
/* Note: The set statement with obs= and firstobs= set to the same number
only pulls that one record out of the data set */
data _temp_;
set &ds(obs=%scan(&keeprecs,&i) firstobs=%scan(&keeprecs,&i));
run;
/* Append to final */
proc append base=&OUT data=_temp_;
run;
%end;
proc sql; drop table _temp_; quit;
%Time(E);
%end;
%put ;
%put NOTE: %nobs(&OUT) randomly-selected records output to &OUT..;
%mend RandomSet;