-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathpmt_eligible_train_test.ado
134 lines (105 loc) · 7.53 KB
/
pmt_eligible_train_test.ado
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
program define pmt_eligible_train_test, rclass
// July 26 2010
// July 31 2010: Modified pmt.ado so that it does not need a regression model - the eligible variable is passed as a parameter.
// keep in mind, need to fix quantile - it should now be a vector/variable that is passed as an option
// August 03 2010 : I want to replace the sum command with one that uses survey settings. I will use svy : mean var1, the ereturn value of which is
// stored in e(b) as _coeff[var1]. This works well. Next one will add option to use betas from
// a subsample to predict consumption for the rest of the sample (that change actually applies only to pmt2.ado, not pmt_eligible (this program)
// Also removing reduncancy for pmt_eligible in having to specifiy the Quantiles variable AND the number of Quantiles.
// November 01, 2011 : Modified to calculate eligibility compliance
// January 30, 2013 : Want to calculate inclusion error rate (% of beneficiaries that are outside the target group) and exclusion error rate (% of target group that are not beneficiaries)
// May 9, 2016: Modified to produce measures on training and testing datasets
syntax varname [if], Poor(varname numeric) QUantilec(varname numeric) evaluationSubset(varname numeric) [Graphme(real -1)]
version 9.1
marksample touse
tempvar eligible eligible_notpoorest20 ineligible_poorest20 errors_incl errors_excl poor_eligible nonpoor_ineligible Quantilec nonpoor_eligible poor_ineligible Quantilec correctly_classified
qui gen `Quantilec' = `quantilec' // seems pointless I know. I just wanted to change the capitalization of this variable to match code that I had already written.
// need to get the number of quantiles
qui svy: tab `quantilec'
local quantiles = e(r)
qui gen `eligible' = `1' // This is the group that is deemed eligible for the program
qui gen `nonpoor_eligible' = .
qui gen `poor_ineligible' = .
qui replace `nonpoor_eligible' = 1 if `eligible' == 1 & `poor' ~= 1 & `poor' ~= . & `touse'
qui replace `nonpoor_eligible' = 0 if (`eligible' ~= 1 | `poor' == 1) & `touse'
qui replace `poor_ineligible' = 1 if `eligible' == 0 & `poor' == 1 & `poor' ~= . & `touse'
qui replace `poor_ineligible' = 0 if (`eligible' == 1 | `poor' == 0) & `poor' ~= . & `touse'
qui gen `nonpoor_ineligible' =.
replace `nonpoor_ineligible' = 1 if `poor'==0 & `eligible'==0 & `touse'
replace `nonpoor_ineligible' = 0 if (`poor'==1 | `eligible'==1) & `touse'
qui gen `poor_eligible' =.
replace `poor_eligible' = 1 if `poor'==1 & `eligible'==1 & `touse'
replace `poor_eligible' = 0 if (`poor'==0 | `eligible'==0) & `touse'
qui gen `correctly_classified' =.
replace `correctly_classified' = 1 if (`poor_eligible'==1 | `nonpoor_ineligible'==1)
replace `correctly_classified' = 0 if (`poor_eligible'==0 & `nonpoor_ineligible'==0)
performance_measurement `nonpoor_eligible', groupOfInterest(`eligible') dataSubsetIndicator(`evaluationSubset')
local leakage = r(performanceMeasure)
local inclusion_error_rate = `leakage'
performance_measurement `poor_ineligible', groupOfInterest(`poor') dataSubsetIndicator(`evaluationSubset')
local undercoverage = r(performanceMeasure)
local exclusion_error_rate = `undercoverage'
performance_measurement `correctly_classified', dataSubsetIndicator(`evaluationSubset')
local elig_compliance = r(performanceMeasure)
// Here is where we calculate the coverage for each decile
// First generate quantile indicators
forv q = 1/`quantiles' {
// While the quintile is classified according to the entire sample, "coverage" is defined based on the subsample only. This is what makes most sense.
// For e.g., if there is only 1 observation in the first quantile in this subsample, and 10,000 in the rest, and this one is covered, then the
// coverage will be 100% for quantile1 for this subsample.
qui count if `Quantilec' == `q' & `touse' & `evaluationSubset'
if (r(N) != 0) {
cap qui svy : mean `eligible' if `Quantilec' == `q' & `touse' & `evaluationSubset'
if (_rc ~= 0) {
return scalar coverage_cutoff_quantile`q' = .
}
else {
return scalar coverage_cutoff_quantile`q' = _coef[`eligible']
}
}
else {
return scalar coverage_cutoff_quantile`q' = .
}
}
// the fraction of the total population covered of the evaluationSubset (e.g. training set, testing set, validation set), i.e. overall coverage rate
performance_measurement `eligible', dataSubsetIndicator(`evaluationSubset')
local fraction_covered = r(performanceMeasure)
performance_measurement `poor', dataSubsetIndicator(`evaluationSubset')
local fractionoftotal_in_targetgroup = r(performanceMeasure)
// coverage rate of target group
performance_measurement `poor_eligible', groupOfInterest(`poor') dataSubsetIndicator(`evaluationSubset')
local coverage_rate_targetgroup = r(performanceMeasure)
// *****************************************************************
// targeting accuracy is: coverage rate of the target group * fraction of the total population that is in the target group / overall coverage rate
// alternatively, use the sum of poor_eligible over all eligible, and take the mean
performance_measurement `poor_eligible', groupOfInterest(`eligible') dataSubsetIndicator(`evaluationSubset')
local targeting_accuracy = r(performanceMeasure)
if (`graphme'~= -1) {
if ("`filter'"=="filter") {
twoway (scatter `logpccd_predicted' `1' if `poor_ineligible'==1, xline(`logpline', lcolor(0)) yline(`logcutoff') mc(red) m(x) ) /*
*/ (scatter `logpccd_predicted' `1' if `nonpoor_ineligible' == 1, mc(green) m(x)) /*
*/ (scatter `logpccd_predicted' `1' if `nonpoor_eligible'==1, mc(black) m(x)) /* (scatter `logpccd_predicted' `1' if `Quantilec'==10 & logpccd_predicted < `logcutoff' & `1' ~= . & `logpccd_predicted' ~= ., mc(blue) m(Oh))
*/ (scatter `logpccd_predicted' `1' if `poor_eligible' == 1, mc(blue) m(x) xlabel(8(1)12) ylabel(8(1)12) ysc(r(8 12)) xsc(r(8 12)) ) /*
*/ (scatter `logpccd_predicted' `1' if `usesubsamplebetas' == 0, mc(purple) m(oh)) /*
*/ , title("Cutoff `x' pctile (TJK 2009)") legend(lab(1 "Errors of Exclusion") lab(3 "Errors of Inclusion") lab(2 "Nonpoor, Ineligible") lab(4 "Poor Eligible") lab(5 "Filtered Out") )
}
else {
twoway (scatter `logpccd_predicted' `1' if `poor_ineligible'==1, xline(`logpline', lcolor(0)) yline(`logcutoff') mc(red) m(x) ) /*
*/ (scatter `logpccd_predicted' `1' if `nonpoor_ineligible' == 1, mc(green) m(x)) /*
*/ (scatter `logpccd_predicted' `1' if `nonpoor_eligible'==1, mc(black) m(x)) /* (scatter `logpccd_predicted' `1' if `Quantilec'==10 & logpccd_predicted < `logcutoff' & `1' ~= . & `logpccd_predicted' ~= ., mc(blue) m(Oh))
*/ /*
*/ (scatter `logpccd_predicted' `1' if `poor_eligible' == 1, mc(blue) m(x) xlabel(``xlimlower''(1)``xlimupper'') ylabel(``ylimlower''(1)``ylimupper'') ysc(r(``ylimlower'' ``ylimupper'')) xsc(r(``xlimlower'' ``xlimupper'')) ) /*
*/ , title("Cutoff `x'") legend(lab(1 "Errors of Exclusion") lab(3 "Errors of Inclusion") lab(2 "Nonpoor, Ineligible") lab(4 "Poor Eligible") )
}
}
// Need to store the results
return scalar leakage = `leakage'
return scalar undercoverage = `undercoverage'
return scalar targeting_accuracy = `targeting_accuracy'
return scalar coverage_rate_targetgroup = `coverage_rate_targetgroup'
return scalar fractionoftotal_in_targetgroup = `fractionoftotal_in_targetgroup'
return scalar fraction_covered = `fraction_covered'
return scalar elig_compliance =`elig_compliance'
return scalar inclusion_error_rate = `inclusion_error_rate'
return scalar exclusion_error_rate = `exclusion_error_rate'
end program