forked from rspeer/dominionstats
-
Notifications
You must be signed in to change notification settings - Fork 17
/
stats.py
123 lines (94 loc) · 3.27 KB
/
stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#!/usr/bin/python
# -*- coding: utf-8 -*-
""" The stats mdoule contains two objects for tracking distributions.
The MeanVarStat keeps a running total of frequence, mean, and variance of
a random variable.
DiffStat supports finding the difference between two MeanVarStat objects.
"""
import math
import primitive_util
import mergeable
class MeanVarStat(primitive_util.ListSlotPrimitiveConversion,
mergeable.MergeableObject):
__slots__ = ('freq', 'sum', 'sum_sq')
def __init__(self, prior_freq=0, prior_sum=0, prior_sum_sq=0):
self.freq = prior_freq
self.sum = prior_sum
self.sum_sq = prior_sum_sq
def add_outcome(self, val):
self.freq += 1
self.sum += val
self.sum_sq += val * val
def add_many_outcomes(self, val, freq):
self.freq += freq
self.sum += val * freq
self.sum_sq += val * val * freq
def frequency(self):
return self.freq
def mean(self):
return self.sum / self.freq
def variance(self):
if self.freq <= 1:
return 1e10
return (((self.sum_sq) - ((self.sum) ** 2) / (self.freq)) /
(self.freq - 1))
def std_dev(self):
return self.variance() ** .5
def sample_std_dev(self):
return (self.variance() / (self.freq or 1)) ** .5
def __add__(self, o):
ret = MeanVarStat()
ret.freq = self.freq + o.freq
ret.sum = self.sum + o.sum
ret.sum_sq = self.sum_sq + o.sum_sq
return ret
def __sub__(self, o):
ret = MeanVarStat()
ret.freq = self.freq - o.freq
ret.sum = self.sum - o.sum
ret.sum_sq = self.sum_sq - o.sum_sq
return ret
def mean_diff(self, o):
return DiffStat(self, o)
def render_interval(self, factor=2, sig_digits=2):
if self.sample_std_dev() >= 10000:
return u'-'
fmt = u'%.' + unicode(sig_digits) + u'f'
fmt = fmt + u' ± ' + fmt
return fmt % (self.mean(), factor * self.sample_std_dev())
def __eq__(self, o):
assert type(o) == MeanVarStat
return (self.freq == o.freq and
self.sum == o.sum and
self.sum_sq == o.sum_sq)
def merge(self, obj):
self.freq += obj.freq
self.sum += obj.sum
self.sum_sq += obj.sum_sq
def __str__(self):
return '%s, %s, %s' % (self.freq, self.sum, self.sum_sq)
class DiffStat(object):
"""
Statistics about the difference in means of two distributions.
"""
def __init__(self, mvs1, mvs2):
self.mvs1 = mvs1
self.mvs2 = mvs2
@property
def freq(self):
return self.mvs1.freq
def render_interval(self, factor=2, sig_digits=2):
if self.sample_std_dev() >= 10000:
return u'-'
return u'%.2f ± %.2f' % (self.mean(), factor * self.sample_std_dev())
def render_std_devs(self):
if not self.freq:
return u'-'
return u'%.2f' % (self.mean() / self.sample_std_dev())
def mean(self):
return self.mvs1.mean() - self.mvs2.mean()
def sample_std_dev(self):
return math.hypot(self.mvs1.sample_std_dev(),
self.mvs2.sample_std_dev())
def mean_diff(self, o):
return DiffStat(self, o)