-
Notifications
You must be signed in to change notification settings - Fork 2
/
temporal_shifts.py
158 lines (133 loc) · 5.89 KB
/
temporal_shifts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
from transformers import pipeline
from collections import Counter
import pandas as pd
import matplotlib.pyplot as plt
import sys
# Initialize the zero-shot classifier and define the categories
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
categories = [
'Insomnia', 'Anxiety', 'Depression', 'Career Confusion',
'Positive Outlook', 'Stress', 'Health Anxiety', 'Eating Disorder'
]
# Category to Score Mapping
category_scores = {
'Positive Outlook': 8,
'Career Confusion': 7,
'Health Anxiety': 6,
'Anxiety': 5,
'Stress': 4,
'Depression': 3,
'Insomnia': 2,
'Eating Disorder': 1
}
score_categories = {v: k for k, v in category_scores.items()}
# Function to predict the category and return scores
def predict_category(concern):
result = classifier(concern, candidate_labels=categories, multi_label=False)
category_scores = {label: score for label, score in zip(result['labels'], result['scores'])}
return category_scores
# Read sentences from CSV file
input_file = "input_sentences.csv" # Replace with your input file path
output_file = "output_analysis.txt"
df_input = pd.read_csv(input_file)
# Daily analysis
daily_categories = []
daily_scores = []
# Process each sentence
for sentence in df_input['Sentence']:
scores = predict_category(sentence)
max_prob_category = max(scores, key=scores.get)
# Handle ties by rounding probabilities to 2 decimal places
top_categories = [cat for cat, score in scores.items() if round(score, 2) == round(scores[max_prob_category], 2)]
day_score = sum(category_scores[cat] for cat in top_categories) / len(top_categories)
daily_categories.append(top_categories)
daily_scores.append(day_score)
# Create DataFrame with Day, Category, and Score columns
df = pd.DataFrame({
'Day': range(1, len(daily_scores) + 1),
'Category': daily_categories,
'Score': daily_scores
})
# Weekly and Monthly columns for plotting
df['Week'] = (df['Day'] - 1) // 7 + 1
df['Month'] = (df['Day'] - 1) // 28 + 1
# Plot Daily Emotional Score for First Week
first_week_df = df[df['Week'] == 1]
plt.figure(figsize=(10, 4))
plt.plot(first_week_df['Day'], first_week_df['Score'], marker='o', linestyle='-', color='purple')
plt.xticks(first_week_df['Day'])
plt.xlabel('Day')
plt.ylabel('Emotional Score')
plt.title('Daily Emotional Score (First Week)')
plt.grid(True)
plt.savefig("daily_emotional_score_first_week.png") # Save the plot as an image file
# Plot Weekly Emotional Score (most frequent score for each week)
weekly_emotional_score = []
for week in df['Week'].unique():
week_df = df[df['Week'] == week]
mode_scores = week_df['Score'].mode().values
weekly_emotional_score.append(mode_scores)
plt.figure(figsize=(12, 4))
for i, mode in enumerate(weekly_emotional_score, start=1):
if len(mode) == 1:
plt.plot(i, mode[0], 'bo') # Single mode point
else:
for m in mode:
plt.plot(i, m, 'bo') # Multiple points for ties
plt.xticks(range(1, len(weekly_emotional_score) + 1))
plt.xlabel('Week')
plt.ylabel('Most Frequent Emotional Score')
plt.title('Weekly Emotional Score')
plt.grid(True)
plt.savefig("weekly_emotional_score.png") # Save the plot as an image file
# Plot Monthly Emotional Score (most frequent score for each month)
monthly_emotional_score = []
for month in df['Month'].unique():
month_df = df[df['Month'] == month]
mode_scores = month_df['Score'].mode().values
monthly_emotional_score.append(mode_scores)
plt.figure(figsize=(12, 4))
for i, mode in enumerate(monthly_emotional_score, start=1):
if len(mode) == 1:
plt.plot(i, mode[0], 'go')
else:
for m in mode:
plt.plot(i, m, 'go')
plt.xticks(range(1, len(monthly_emotional_score) + 1))
plt.xlabel('Month')
plt.ylabel('Most Frequent Emotional Score')
plt.title('Monthly Emotional Score')
plt.grid(True)
plt.savefig("monthly_emotional_score.png") # Save the plot as an image file
# Print and save daily analysis for the first week
with open(output_file, "w") as f:
sys.stdout = f
print("**Daily Analysis (First Week):**")
for day in first_week_df['Day']:
score = first_week_df[first_week_df['Day'] == day]['Score'].values[0]
category = score_categories[score]
print(f"Day {day}: Predominant emotional category is '{category}' with a score of {score}.")
# Print transitions within the first week
print("\n**Transitions within the First Week:**")
for i in range(1, len(first_week_df)):
prev_score = first_week_df.iloc[i - 1]['Score']
curr_score = first_week_df.iloc[i]['Score']
prev_category = score_categories[prev_score]
curr_category = score_categories[curr_score]
if prev_category != curr_category:
print(f"Transition from '{prev_category}' on Day {first_week_df.iloc[i - 1]['Day']} to '{curr_category}' on Day {first_week_df.iloc[i]['Day']}.")
# Weekly Analysis
print("\n**Weekly Analysis (All Weeks):**")
for week in df['Week'].unique():
week_df = df[df['Week'] == week]
week_mode_score = week_df['Score'].mode().values
week_mode_categories = [score_categories[score] for score in week_mode_score]
print(f"Week {week}: Most frequent category is {', '.join(week_mode_categories)}.")
if week > 1:
prev_week_df = df[df['Week'] == week - 1]
prev_week_mode_score = prev_week_df['Score'].mode().values
prev_week_mode_categories = [score_categories[score] for score in prev_week_mode_score]
if set(prev_week_mode_categories) != set(week_mode_categories):
print(f"Transition from Week {week - 1} to Week {week}: Predominant category shifted from {', '.join(prev_week_mode_categories)} to {', '.join(week_mode_categories)}.")
sys.stdout = sys.__stdout__
print(f"Analysis complete. Results saved to '{output_file}', and graphs saved as PNG files.")