-
Notifications
You must be signed in to change notification settings - Fork 1
/
infanalysis.py
342 lines (255 loc) · 15.6 KB
/
infanalysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
"""
This module contains the code for regression analysis for the bug reporting
processes considered
"""
import time
import logging
import statsmodels.api as sm
import pandas as pd
import matplotlib.pyplot as plt
import gtconfig
import penaltyexp
import simdata
import syseval
if gtconfig.is_windows:
import winsound
logger = gtconfig.get_logger("regression_analysis", "regression_analysis.txt", level=logging.INFO)
INDEPENDENT_VARIABLE = 'normalized_value'
DEPENDENT_VARIABLES = ['severe_time_ratio', 'severe_completed', 'severe_fixed_ratio', 'severe_fixed_ratio_active']
DEV_TEAM_RATIO = 1
def apply_gatekeeper_error(independent_variable_value, input_params, empirical_profile, original_team_size,
simulation_configuration):
normalized_success_rate = independent_variable_value / 100.0
simulation_configuration["SUCCESS_RATE"] = normalized_success_rate
input_params.catcher_generator.configure(values=[True, False],
probabilities=[normalized_success_rate, (1 - normalized_success_rate)])
return normalized_success_rate
def apply_inflation_factor(independent_variable_value, input_params, empirical_profile, original_team_size,
simulation_configuration):
normalized_rate = independent_variable_value / 100.0
profile_after_inflation, offender_number = syseval.generate_inflated_profile(normalized_rate, empirical_profile)
syseval.apply_strategy_profile(input_params.player_configuration, profile_after_inflation)
return offender_number
def apply_team_reduction(independent_variable_value, input_params, empirical_profile, original_team_size,
simulation_configuration):
normalized_rate = independent_variable_value / 100.0
new_team_size = int(original_team_size * normalized_rate)
input_params.dev_team_size = new_team_size
return new_team_size
def configure_simulation(independent_variable_value, input_params, empirical_profile, original_team_size,
configuration_function, simulation_configuration):
"""
Adjusts the simulation configuration to the current independent variable value.
:param independent_variable_value:
:return:
"""
return configuration_function(independent_variable_value=independent_variable_value, input_params=input_params,
empirical_profile=empirical_profile, original_team_size=original_team_size,
simulation_configuration=simulation_configuration)
def get_performance_dataframe(input_params, simfunction, simulation_configuration, empirical_profile,
original_team_size, step, desc, configuration_function=apply_inflation_factor):
"""
Produces a dataframe containing performance measure values per several configurations of inflation probability.
:param input_params: Simulation inputs.
:param simfunction: Simulation function.
:param simulation_configuration: Simulation configuration.
:param empirical_profile: Empirical strategy profile.
:param step: Offset between inflation probabilities.
:return: Dataframe instance.
"""
regression_data = []
logger.info("Reporters in population: " + str(len(empirical_profile.keys())))
independent_variable_values = range(0, 100, step)
for independent_variable_value in independent_variable_values:
logger.info(
"Simulating " + desc + " with an independent variable of " + str(independent_variable_value))
normalized_value = configure_simulation(independent_variable_value, input_params, empirical_profile,
original_team_size,
configuration_function=configuration_function,
simulation_configuration=simulation_configuration)
simulation_output = syseval.run_scenario(simfunction, input_params, simulation_configuration)
simulation_output_file = "csv/" + desc + "_simulaton_results.csv"
pd.DataFrame(simulation_output.get_consolidated_output(input_params.player_configuration)).to_csv(
simulation_output_file)
logger.info("The simulation output was stored at: " + simulation_output_file)
performance_metrics = zip(simulation_output.get_time_ratio_per_priority(simdata.SEVERE_PRIORITY),
simulation_output.get_completed_per_real_priority(simdata.SEVERE_PRIORITY),
simulation_output.get_fixed_ratio_per_priority(simdata.SEVERE_PRIORITY,
exclude_open=False),
simulation_output.get_fixed_ratio_per_priority(simdata.SEVERE_PRIORITY,
exclude_open=True))
regression_data += [{'independent_variable_value': independent_variable_value,
'normalized_value': normalized_value,
'severe_time_ratio': severe_time_ratio,
'severe_completed': severe_completed,
'severe_fixed_ratio': severe_fixed_ratio,
'severe_fixed_ratio_active': severe_fixed_ratio_active
} for severe_time_ratio, severe_completed, severe_fixed_ratio, severe_fixed_ratio_active in
performance_metrics]
return pd.DataFrame(regression_data)
def perform_regression_analysis(desc, dataframe):
"""
Performs the regression analysis, logging the output and generating a plot.
:param desc: Description of the scenario.
:param dataframe: Dataframe with performance values.
:param dependent_variable: Name of the dependant variable.
:param independent_variable: Name of the independent variable.
:return: None.
"""
regression_results = {}
regression_results['dataframe'] = dataframe
for dependent_variable in DEPENDENT_VARIABLES:
detailed_desc = desc + '_' + dependent_variable
dependent_values = dataframe[dependent_variable]
independent_values = sm.add_constant(dataframe[INDEPENDENT_VARIABLE])
ols_instance = sm.OLS(dependent_values, independent_values)
regression_result = ols_instance.fit()
logger.info(detailed_desc + " -> regression_result.summary(): " + str(regression_result.summary()))
plt.clf()
axis = dataframe.plot(INDEPENDENT_VARIABLE, dependent_variable, style='o')
plt.xlabel(INDEPENDENT_VARIABLE)
plt.ylabel(dependent_variable)
plt.title(detailed_desc)
sm.graphics.abline_plot(model_results=regression_result, ax=axis)
file_name = "img/" + detailed_desc + '_regression_analysis.png'
plt.savefig(file_name)
logger.info("Image stored at " + file_name)
regression_results[dependent_variable] = ols_instance
return regression_results
def do_unsupervised_prioritization(simulation_configuration, input_params, simfunction, empirical_profile,
original_team_size,
step):
simulation_configuration["THROTTLING_ENABLED"] = False
simulation_configuration["GATEKEEPER_CONFIG"] = None
desc = "UNSUPERVISED_PRIORITIZATION"
logger.info("Starting " + desc + " analysis ...")
dataframe = get_performance_dataframe(input_params=input_params, simfunction=simfunction,
simulation_configuration=simulation_configuration,
empirical_profile=empirical_profile, original_team_size=original_team_size,
step=step, desc=desc)
return perform_regression_analysis(desc=desc, dataframe=dataframe)
def do_gatekeeper(simulation_configuration, input_params, simfunction, empirical_profile, original_team_size, step):
simulation_configuration["THROTTLING_ENABLED"] = False
simulation_configuration['GATEKEEPER_CONFIG'] = penaltyexp.DEFAULT_GATEKEEPER_CONFIG
queue_configurations = [True, False]
dev_team_factors = [0.5, 1.0]
gatekeeper_results = {}
original_team_size = input_params.dev_team_size
logger.info("Original team size: " + str(original_team_size))
for queue_configuration in queue_configurations:
for dev_team_factor in dev_team_factors:
logger.info("Using dev team factor " + str(dev_team_factor))
input_params.dev_team_size = int(original_team_size * dev_team_factor)
desc = "GATEKEEPER_PRIQUEUE_" + str(queue_configuration) + "_DEV_FACTOR_" + str(dev_team_factor)
logger.info("Starting " + desc + " analysis ...")
simulation_configuration["PRIORITY_QUEUE"] = queue_configuration
logger.info("Using Priority Queue? " + str(queue_configuration))
dataframe = get_performance_dataframe(input_params=input_params, simfunction=simfunction,
simulation_configuration=simulation_configuration,
empirical_profile=empirical_profile,
original_team_size=original_team_size, step=step, desc=desc,
configuration_function=apply_gatekeeper_error)
gatekeeper_results[desc] = perform_regression_analysis(desc=desc, dataframe=dataframe)
return gatekeeper_results
def do_throttling(simulation_configuration, input_params, simfunction, empirical_profile, original_team_size, step):
simulation_configuration["THROTTLING_ENABLED"] = True
penalty_values = [1, 3, 5]
# TODO(cgavidia): Remove later
penalty_values = [3]
throttling_results = {}
for penalty in penalty_values:
simulation_configuration["INFLATION_FACTOR"] = penalty / 100.0
desc = "THROTTLING_INF00" + str(penalty)
logger.info("Starting " + desc + " analysis ...")
dataframe = get_performance_dataframe(input_params=input_params, simfunction=simfunction,
simulation_configuration=simulation_configuration,
empirical_profile=empirical_profile,
original_team_size=original_team_size, step=step, desc=desc,
configuration_function=apply_gatekeeper_error)
throttling_results[desc] = perform_regression_analysis(desc=desc, dataframe=dataframe)
return throttling_results
def plot_comparison(plot_configs, y_min, y_max, desc):
"""
Plots several regression lines for the sake of comparison.
:param plot_configs: Each plot parameters
:param y_min: Y axis minimum value
:param y_max: Y axis maximum value
:param desc: Variable under analysis.
:return: None
"""
plt.clf()
for plot_config in plot_configs:
plt.plot(plot_config['x_values'], plot_config['fitted_values'], plot_config['color'],
label=plot_config['legend'])
plt.legend()
plt.ylim(y_min, y_max)
plt.xlim(0, 175)
plt.xlabel(INDEPENDENT_VARIABLE)
plt.ylabel(desc)
plt.title('Performance Comparison: ' + desc)
file_name = "img/" + desc + "_performance_comparison.png"
plt.savefig(file_name)
logger.info("Performance comparison plot was stored in " + file_name)
def compare_regression_results(uo_regression_results, throt_regression_results, gate_regression_results):
for performance_metric in DEPENDENT_VARIABLES:
y_min = 0.0
y_max = 1.0
if performance_metric == "severe_completed":
y_min = 800
y_max = 1200
elif performance_metric == "severe_time_ratio":
y_min = 0.05
y_max = 0.1
plot_comparison(plot_configs=[{"x_values": uo_regression_results['dataframe'][INDEPENDENT_VARIABLE],
"fitted_values": uo_regression_results[performance_metric].fit().fittedvalues,
"color": "red",
"legend": "Unsupervised Prioritization"},
{"x_values": throt_regression_results['THROTTLING_INF005']['dataframe'][
INDEPENDENT_VARIABLE],
"fitted_values": throt_regression_results['THROTTLING_INF005'][
performance_metric].fit().fittedvalues,
"color": "blue",
"legend": "Throttling with 0.05 penalty"},
{"x_values": gate_regression_results['GATEKEEPER_SUCC90']['dataframe'][
INDEPENDENT_VARIABLE],
"fitted_values": gate_regression_results['GATEKEEPER_SUCC90'][
performance_metric].fit().fittedvalues,
"color": "green",
"legend": "Gatekeeper with 10% error rate"}], desc=performance_metric,
y_min=y_min, y_max=y_max)
def main():
replications_per_rate = 120
step = 20
logger.info("Experiment configuration: Replications per Inflation Rate " + str(
replications_per_rate) + " Offset between rates " + str(step))
simulation_configuration, simfunction, input_params, empirical_profile = syseval.gather_experiment_inputs()
original_team_size = input_params.dev_team_size
input_params.dev_team_size = int(input_params.dev_team_size * DEV_TEAM_RATIO)
logger.info("The original dev team size is " + str(original_team_size) + " . The size under analysis is " + str(
input_params.dev_team_size))
simulation_configuration['REPLICATIONS_PER_PROFILE'] = replications_per_rate
gate_regression_results = do_gatekeeper(simulation_configuration=simulation_configuration, simfunction=simfunction,
input_params=input_params, empirical_profile=empirical_profile,
original_team_size=original_team_size, step=step)
# So far, we are only concerned in the regression analysis for the Gatekeeper process
# uo_regression_results = do_unsupervised_prioritization(simulation_configuration=simulation_configuration,
# simfunction=simfunction,
# input_params=input_params,
# empirical_profile=empirical_profile,
# original_team_size=original_team_size, step=step)
#
# throt_regression_results = do_throttling(simulation_configuration=simulation_configuration, simfunction=simfunction,
# input_params=input_params, empirical_profile=empirical_profile,
# original_team_size=original_team_size, step=step)
#
# compare_regression_results(uo_regression_results=uo_regression_results,
# throt_regression_results=throt_regression_results,
# gate_regression_results=gate_regression_results)
if __name__ == "__main__":
start_time = time.time()
try:
main()
finally:
if gtconfig.is_windows:
winsound.Beep(2500, 1000)
logger.info("Execution time in seconds: " + str(time.time() - start_time))