-
Notifications
You must be signed in to change notification settings - Fork 3
/
data_collection.py
72 lines (60 loc) · 2.95 KB
/
data_collection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import argparse
import os
from collections import defaultdict
from utils import load_from_file, save_to_file
def aggregate_responses(raw_fpath, result_fpath):
"""
:param raw_fpath: file path to raw csv containing Google form responses
:param result_fpath: file path to aggregated results
Assume fields of csv file are Timestamp, Username, LTL template type, Number of Propositions, Utterance.
The raw responses from Google form have many empty columns in each row.
To aggregate responses and save them in a csv file,
1. Fix incorrect responses in Google Sheets if needed.
2. Download the responses to the Google form from Google Sheets as a csv file then place it in the data folder.
3. run python data_collection.py
"""
raw_data = load_from_file(raw_fpath, noheader=False)
fields = raw_data.pop(0)
result_fields = fields[:4] + ["Utterance"]
results = [result_fields]
for row in raw_data:
result_row = [None] * len(result_fields)
for col_idx, (field, col) in enumerate(zip(fields, row)):
if "Number" in field or "number" in field:
if col:
result_row[3] = col
elif "Utterance" in field:
if col:
if result_row[-1]: # more than 1 utterances recorded from this participant
results.append(result_row)
result_row = result_row[:] # start a new row
result_row[-1] = col # every col same as last row except utterance
else: # 1st utterance recorded from this participant
result_row[-1] = col
else:
result_row[col_idx] = col
results.append(result_row)
save_to_file(results, result_fpath)
def analyze_responses(result_fpath, analysis_fpath):
"""
:param result_fpath: file path to aggregated results
:param analysis_fpath: path to file containing analysis of the results
"""
results = load_from_file(result_fpath, noheader=True)
counter = defaultdict(lambda: defaultdict(int))
for _, _, ltl_type, nprops, _ in results:
counter[ltl_type][nprops] += 1
analysis = [["LTL Template Type", "Number of Propositions", "Number of Utterances"]]
for ltl_type, nprops2count in counter.items():
for nprops, count in nprops2count.items():
analysis.append([ltl_type, nprops, count])
save_to_file(analysis, analysis_fpath)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--batch", type=int, default=2, help="batch id")
args = parser.parse_args()
raw_fpath = os.path.join("data", f"raw_responses_batch{args.batch}.csv")
result_fpath = os.path.join("data", f"aggregated_responses_batch{args.batch}.csv")
analysis_fpath = os.path.join("data", f"analysis_batch{args.batch}.csv")
aggregate_responses(raw_fpath, result_fpath)
analyze_responses(result_fpath, analysis_fpath)