-
Notifications
You must be signed in to change notification settings - Fork 0
/
visualization_methods.py
223 lines (194 loc) · 7.62 KB
/
visualization_methods.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
# visualization_methods.py
"""A set of functions that enable visualization
of dataframes or series of spending and income
information
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
# Function for generating a pie chart of expenses
def visualize_expenses_by_group(year, year_data, colors, out_file="", spending=True):
"""Build a pie chart with a legend that shows spending
by spending group for a year.
year - year (str) that grouped spending totals are for
year_data - a series with spending totals fby spending group
colors - a dict of assigned matplotlib colormap vals for each group
out_file - an optional output file to write the visualization to
spending - optional. Set to false if visualizing income transactions
"""
# Build a legend for a Pie Chart that includes the Spending Category and the Amount
legend_label = []
for idx in year_data.index:
if year_data.loc[idx] <= 0:
# Remove categories with "negative spending"
if year_data.loc[idx] < 0:
print(
"Spending on "
+ idx
+ ": ${:,.2f}".format(year_data.loc[idx])
+ " was really income."
)
year_data.drop([idx], inplace=True)
else:
# Otherwise add the group name and amount to the legend
legend_label.append(idx + ": ${:,.2f}".format(year_data.loc[idx]))
expenses_for_year = year_data.sum()
# Plot the chart
if spending:
title = str(year) + " Spending: " + "${:,.2f}".format(expenses_for_year)
else:
title = str(year) + " Income: " + "${:,.2f}".format(expenses_for_year)
year_data.plot(
kind="pie",
figsize=(8, 8),
title=title,
colors=[colors[key] for key in year_data.index],
)
plt.legend(legend_label, loc="center right", bbox_to_anchor=(1.9, 0.5))
# Write chart to a file or stdout
if out_file:
plt.savefig(out_file, bbox_inches="tight")
else:
plt.show()
plt.close()
def visualize_average_spending_by_group(sbg, title, colors, out_file=""):
"""Build a pie chart with a legend that shows spending
the average spending by group for previous years.
sbg - a dataframe with Spending Groups as index and a column called "Average"
title - the title for the visualization
colors - a dictionary of assigned matplotlib colormap values for each spending group
out_file - an optional output file to write the visualization to
"""
# Build a legend for a Pie Chart that includes the Spending Category and the Amount
legend_label = []
average_total = sbg["Average"].sum()
for idx in sbg.index:
if sbg.loc[idx].Average < 0:
print(
"Average spending on "
+ idx
+ ": ${:,.2f}".format(sbg.loc[idx].Average)
+ " is negative. Removing from analysis."
)
average_total -= sbg.loc[idx].Average
sbg.drop([idx], inplace=True)
else:
legend_label.append(idx + ": ${:,.2f}".format(sbg.loc[idx].Average))
sbg.plot(
kind="pie",
y="Average",
figsize=(8, 8),
colors=[colors[key] for key in sbg.index],
title=title + ": " + "${:,.2f}".format(sbg.Average.sum()),
)
plt.legend(legend_label, loc="center left", bbox_to_anchor=(1.5, 0.5))
if out_file:
plt.savefig(out_file, bbox_inches="tight")
else:
plt.show()
plt.close()
#
# Create an assigned color for each category so the colors are consistent
def assign_colors_to_groups(df):
"""Create a dictionary to map each spending category in a
dataframe to a specific color so that they are consistent
across different visaulizations
df - a dataframe indexed by a spending group
"""
# Tab20 is a matplotlib colormap
# Others can be found here:
# https://matplotlib.org/stable/tutorials/colors/colormaps.html
tab20_colors = list(plt.cm.tab20.colors)
color_list = tab20_colors
# If we have more than 20 Spending Groups grow
# the color list so that each Group gets a specific color
# Repeats should not be close to each other
while len(color_list) < len(df.index):
color_list += tab20_colors
color_list = tuple(color_list)
# Assign each Spending Group a specific color
return dict(zip(df.index, color_list[: len(df.index)]))
def read_structured_transactions(
structured_transactions,
raw_transactions,
index,
structured_data_description="structured transaction data",
):
"""Create a dataframe from a csv file that has contains a subset of
transactions which have been structured to include only spending or income,
with spending groups, etc
structured_stransactions - a CSV file with the structured transaction data
raw_transactions - the raw mint transaction data used to create the structured data
index - the column that should be used for the index in the returned dataframe
structured_data_description - description for error messages
"""
# Make sure structured is newer than raw mint data
try:
f1 = os.path.getmtime(structured_transactions)
f2 = os.path.getmtime(raw_transactions)
if f1 < f2:
print(
"Raw mint transactions data: "
+ raw_transactions
+ " is newer than the "
+ structured_data_description
+ ": "
+ structured_transactions
+ ".\n"
+ 'Please run "python extract_spending_and_income.py" first.'
)
quit()
except BaseException as e:
print(
"Failed to read "
+ structured_data_description
+ ": "
+ structured_transactions
+ "\n{}".format(e)
)
print('Please run "python extract_spending_and_income.py" first.')
quit()
# Read the summarized annual expenses by spending group
try:
print(
"Reading "
+ structured_data_description
+ " from "
+ structured_transactions
)
df = pd.read_csv(structured_transactions)
df.set_index(index, inplace=True)
except BaseException as e:
print("Failed to read " + structured_data_description + ": {}".format(e))
quit()
return df
def build_summary_table(df, ret_df=None):
"""Creata dataframe suitable for output as a table
to provide a "big picture" view of year over year
spending/income by category
If included add predicted retirment spending in the summary
"""
# Calculate the Annual Totals for all Groups
sum_row = {col: df[col].sum() for col in df}
# Calculate the Annual Total of Retirement expenses
if ret_df is not None:
sum_ret_row = {col: ret_df[col].sum() for col in ret_df}
# Add these new rows to the table for display
df.loc[" "] = pd.Series(" ")
df.loc["Total"] = sum_row
if ret_df is not None:
df.loc["Retirement Total"] = sum_ret_row
# Dispaly nueric values as currency, NaN as an empty row
pd.options.display.float_format = "${:,.2f}".format
df.replace(np.nan, "", inplace=True)
return df
def build_category_details(df, category=""):
if category != "":
category_costs = df[df["Spending Group"] == category]
else:
category_costs = df
category_expenses = category_costs.groupby(["Category"]).sum()
category_expenses = category_expenses.sort_index(axis=1)
category_expenses.loc["Total"] = category_expenses.sum()
return category_expenses