forked from zurda/thinkStats2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfirst.py
executable file
·160 lines (115 loc) · 4.07 KB
/
first.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
"""This file contains code used in "Think Stats",
by Allen B. Downey, available from greenteapress.com
Copyright 2014 Allen B. Downey
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
"""
from __future__ import print_function
import math
import numpy as np
import nsfg
import thinkstats2
import thinkplot
def MakeFrames():
"""Reads pregnancy data and partitions first babies and others.
returns: DataFrames (all live births, first babies, others)
"""
preg = nsfg.ReadFemPreg()
live = preg[preg.outcome == 1]
firsts = live[live.birthord == 1]
others = live[live.birthord != 1]
assert len(live) == 9148
assert len(firsts) == 4413
assert len(others) == 4735
return live, firsts, others
def Summarize(live, firsts, others):
"""Print various summary statistics."""
mean = live.prglngth.mean()
var = live.prglngth.var()
std = live.prglngth.std()
print('Live mean', mean)
print('Live variance', var)
print('Live std', std)
mean1 = firsts.prglngth.mean()
mean2 = others.prglngth.mean()
var1 = firsts.prglngth.var()
var2 = others.prglngth.var()
print('Mean')
print('First babies', mean1)
print('Others', mean2)
print('Variance')
print('First babies', var1)
print('Others', var2)
print('Difference in weeks', mean1 - mean2)
print('Difference in hours', (mean1 - mean2) * 7 * 24)
print('Difference relative to 39 weeks', (mean1 - mean2) / 39 * 100)
d = thinkstats2.CohenEffectSize(firsts.prglngth, others.prglngth)
print('Cohen d', d)
def PrintExtremes(live):
"""Plots the histogram of pregnancy lengths and prints the extremes.
live: DataFrame of live births
"""
hist = thinkstats2.Hist(live.prglngth)
thinkplot.Hist(hist, label='live births')
thinkplot.Save(root='first_nsfg_hist_live',
title='Histogram',
xlabel='weeks',
ylabel='frequency')
print('Shortest lengths:')
for weeks, freq in hist.Smallest(10):
print(weeks, freq)
print('Longest lengths:')
for weeks, freq in hist.Largest(10):
print(weeks, freq)
def MakeHists(live):
"""Plot Hists for live births
live: DataFrame
others: DataFrame
"""
hist = thinkstats2.Hist(live.birthwgt_lb, label='birthwgt_lb')
thinkplot.Hist(hist)
thinkplot.Save(root='first_wgt_lb_hist',
xlabel='pounds',
ylabel='frequency',
axis=[-1, 14, 0, 3200])
hist = thinkstats2.Hist(live.birthwgt_oz, label='birthwgt_oz')
thinkplot.Hist(hist)
thinkplot.Save(root='first_wgt_oz_hist',
xlabel='ounces',
ylabel='frequency',
axis=[-1, 16, 0, 1200])
hist = thinkstats2.Hist(np.floor(live.agepreg), label='agepreg')
thinkplot.Hist(hist)
thinkplot.Save(root='first_agepreg_hist',
xlabel='years',
ylabel='frequency')
hist = thinkstats2.Hist(live.prglngth, label='prglngth')
thinkplot.Hist(hist)
thinkplot.Save(root='first_prglngth_hist',
xlabel='weeks',
ylabel='frequency',
axis=[-1, 53, 0, 5000])
def MakeComparison(firsts, others):
"""Plots histograms of pregnancy length for first babies and others.
firsts: DataFrame
others: DataFrame
"""
first_hist = thinkstats2.Hist(firsts.prglngth, label='first')
other_hist = thinkstats2.Hist(others.prglngth, label='other')
width = 0.45
thinkplot.PrePlot(2)
thinkplot.Hist(first_hist, align='right', width=width)
thinkplot.Hist(other_hist, align='left', width=width)
thinkplot.Save(root='first_nsfg_hist',
title='Histogram',
xlabel='weeks',
ylabel='frequency',
axis=[27, 46, 0, 2700])
def main(script):
live, firsts, others = MakeFrames()
MakeHists(live)
PrintExtremes(live)
MakeComparison(firsts, others)
Summarize(live, firsts, others)
if __name__ == '__main__':
import sys
main(*sys.argv)