-
Notifications
You must be signed in to change notification settings - Fork 0
/
scratch.py
executable file
·85 lines (60 loc) · 1.98 KB
/
scratch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!python3
import glob
import os
import pickle
from typing import Dict, List
from icecream import ic
import life
from life import GetPychiatristReport
tmp = os.path.expanduser("~/tmp")
def load_all_reports():
# load from pickle file
return pickle.load(open(f"{tmp}/reports.pkl", "rb"))
def load_raw_reports():
reports: List[life.GetPychiatristReport] = []
report_path = os.path.expanduser("~/tmp/journal_report")
# loop through the globbed json files
for file in glob.glob(report_path + "/*.json"):
try:
text = open(file).read()
report = life.GetPychiatristReport.model_validate_json(text)
reports.append(report)
except Exception as e:
ic("failed to load file: ", file)
ic(e)
return reports
# pickle.dump(load_raw_reports(), open(f"{tmp}/reports.pkl", "wb"))
reports = load_all_reports()
print(len(reports))
import pandas as pd # noqa: E402, performance
# df = pd.DataFrame([to_people_sentiment_dict(r) for r in reports]).set_index("date")
# df["tori"].value_counts()
# df["clive"].value_counts()
# group df by month, and then do the valueconts for Tori
def t():
m = df.groupby(pd.Grouper(freq="Y"))["tori"].value_counts()
for t in m:
print(t)
t()
def report_to_positive(r: GetPychiatristReport):
row: Dict = {"date": r.Date}
row["Positive"] = [c.reason for c in r.PostiveEmotionCause]
return row
def report_to_negative(r: GetPychiatristReport):
row: Dict = {"date": r.Date}
row["Negative"] = [c.reason for c in r.NegativeEmotionCause]
return row
def report_to_summary(r: GetPychiatristReport):
row: Dict = {"date": r.Date}
row["Summary"] = r.PointFormSummaryOfEntry
return row
df = (
pd.DataFrame([report_to_summary(r) for r in reports])
.set_index("date")
.sort_index()
.explode("Summary")
)
df
# output df.posiive to tmp/positive
# filter df to oonly yeras 2012-2013
df["2021-01-1":"2033/1/1"].to_csv(f"{tmp}/summary.csv")