This repository has been archived by the owner on Mar 1, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
/
linearmodels_graphs.py
104 lines (81 loc) · 4.59 KB
/
linearmodels_graphs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import re
import argparse
import pandas as pd
from scipy.stats import zscore
import numpy as np
from plot import plt, sns
from matplotlib.ticker import MultipleLocator
def parse_args():
parser = argparse.ArgumentParser(prog='linearmodel_graphs', usage='%(prog)s [options]')
parser.add_argument('linearmodel', help="linear relations")
return parser.parse_args()
def get_diff(df):
df_a = df[df.scale == 1]
df_b = df[df.scale != 1]
merge = df_a.merge(df_b, on=["service-a", "service-b", "metric-a", "metric-b"], how='inner', suffixes=('-x', '-y'))
merge["slope-diff"] = pd.Series(merge["slope-y"] / merge["slope-x"], index=merge.index)
merge["quotient-diff"] = pd.Series(merge["quotient-y"] / merge["quotient-x"], index=merge.index)
return merge
def write_summary(target, df):
with open(target, "w+") as f:
for scale in df["scale-y"].unique():
f.write("<h1>scale-y: %d</h1>\n" % scale)
f.write(df[df["scale-y"] == scale].describe().to_html())
f.write("\n")
def main():
args = parse_args()
df = pd.read_csv(args.linearmodel, sep="\t")
df.columns = df.columns.str.replace("_", "-")
significant = df[df["p-value"] < 0.10]
first_of_each_scale = significant[significant["name"].isin(significant.groupby(["scale"]).nth(1).name.unique())]
diff = get_diff(df)
plt.axes().yaxis.set_minor_locator(MultipleLocator(1))
plt.subplots_adjust(top=0.85)
plot = sns.factorplot("slope-diff", col="scale-y", data=diff[(diff["slope-diff"] < 15) & (diff["slope-diff"] > -15)], kind="violin")
plot.set(xlabel=r'$\displaystyle -15 < \frac{slope_{scale_y}}{slope_{scale_1}} < 15$')
plot.savefig("relative_slope_increasing_scale.png", dpi=300)
plot.fig.suptitle("Relative Slope difference of linear models with increasing scale")
g = sns.FacetGrid(first_of_each_scale, col="scale")
g = g.map(plt.hist, "slope", bins=np.arange(-1, 1 + 0.1, 0.1))
g.savefig("slope-distribution.png")
g = sns.FacetGrid(first_of_each_scale, col="scale")
g = g.map(plt.hist, "quotient", bins=np.arange(-1, 1 + 0.1, 0.1))
g.savefig("quotient-distribution.png")
plt.clf()
g = sns.countplot("name", data=first_of_each_scale.groupby(["service-a", "service-b", "metric-a", "metric-b"])["name"].count().reset_index())
g.set(ylabel="number of common relationships across scale")
g.get_figure().savefig("common_relations_across_scale.png")
g = sns.FacetGrid(significant.groupby(["service-a", "service-b", "metric-a", "metric-b", "scale"]).count().reset_index(), col="scale")
g.map(plt.hist, "name")
g.set(ylabel="Common relations count per metric")
g.fig.savefig("common_relations_same_scale.png")
plt.clf()
g = sns.countplot("name", data=first_of_each_scale[first_of_each_scale.scale != 1].groupby(["service-a", "service-b", "metric-a", "metric-b"])["name"].count().reset_index())
g.set(ylabel="number of common relationships excluding 1")
g.get_figure().savefig("common_relations_across_scale_without_1.png")
#metrics = grouped.filter(lambda x: x.count() == 4)
#describe = metrics.groupby([df.service, df.metric]).describe().unstack()
#plt.clf()
#g = sns.countplot("scale", data=first_of_each_scale)
#g.set(ylabel="Number of linear relations with $p < 0.10$")
#g.get_figure().tight_layout()
#g.get_figure().savefig("significant-count-10percent.png")
#plt.clf()
#significant5 = df[(df["p-value"] < 0.5)]
#first_of_each_scale5 = significant5.groupby(["service-a", "service-b", "metric-a", "metric-b", "scale"]).nth(1).reset_index()
#g = sns.countplot("scale", hue="lower10", data=first_of_each_scale5)
#g.set(ylabel="Number of linear relations with $p < 0.5$")
#g.get_figure().tight_layout()
#g.get_figure().savefig("significant-count-5percent.png")
#g = sns.FacetGrid(significant.groupby(["service-a", "service-b", "metric-a", "metric-b", "scale"]), col="scale")
#g = g.map(plt.hist, "quotient", bins=np.arange(-1, 1 + 0.1, 0.1))
#g.savefig("quotient-distribution.png")
write_summary("relative_slope_increasing_scale.html", diff)
plot = sns.factorplot("quotient-diff", col="scale-y", data=diff[(diff["quotient-diff"] < 15) & (diff["quotient-diff"] > -15)], kind="violin")
plot.set(xlabel=r'$\displaystyle -15 < \frac{quotient_{scale_y}}{quotient_{scale_1}} < 15$')
plot.fig.suptitle("Relative Quotient difference of linear with increasing scale")
plot.savefig("relative_quotient_increasing_scale.png", dpi=300)
write_summary("relative_quotient_increasing_scale.html", diff)
import pdb; pdb.set_trace()
if __name__ == "__main__":
main()