-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathELANtoCSV.py
86 lines (76 loc) Β· 3.59 KB
/
ELANtoCSV.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
from xml.dom import minidom
import os
from pathlib import Path
import pandas as pd
from globals import GROUP_IDs
#os.chdir("../")
# 0: bocca chiusa
# 1: parla
# 2: porta la forchetta alla bocca
# 3: mastica
# 4: sorride / ride
# 5: mastica e parla
COLUMNS = ["id", "classe", "inizio", "fine"]
def getTimeSlot(slots, id):
for s in slots:
if s.getAttribute("TIME_SLOT_ID") == id:
return s.getAttribute("TIME_VALUE")
return None
def removeOverlappingSignals(signals):
result = [signals[0]]
for i in range(1, len(signals) - 1):
signal = signals[i].copy()
prev = signals[i - 1]
next = signals[i + 1]
if signal[2] > prev[3] and signal[3] < next[2]:
result.append(signal)
else:
if signal[2] < prev[3] and signal[3] > prev[3]:
signal[2] = prev[3]
if signal[3] > next[2] and signal[2] < next[2]:
signal[3] = next[2]
result.append(signal)
return result
if __name__ == "__main__":
Path("csv_from_elan").mkdir(parents=True, exist_ok=True)
files = os.scandir("elan")
for file in files:
if os.path.isfile(file):
print(f'converting file {os.path.join("elan", file.name)}')
result = {"Left": [], "Right": []}
tree = minidom.parse(os.path.join("elan", file.name))
video_id = file.name[file.name.find("_") + 1:-4]
# openposefile = os.path.join("0_data/csv", "out" + participant_id + ".csv")
slots = tree.getElementsByTagName("TIME_SLOT")
tiers = tree.getElementsByTagName("TIER")
for tier in tiers:
behavior_class = None
if tier.getAttribute("LINGUISTIC_TYPE_REF") == "Speaking":
behavior_class = 1
elif tier.getAttribute("LINGUISTIC_TYPE_REF") == "Eating":
behavior_class = 3
elif tier.getAttribute("LINGUISTIC_TYPE_REF") == "Smiling":
behavior_class = 4
elif tier.getAttribute("LINGUISTIC_TYPE_REF") == "Gaze":
behavior_class = None
elif tier.getAttribute("LINGUISTIC_TYPE_REF") == "Actions":
behavior_class = 2
elif tier.getAttribute("LINGUISTIC_TYPE_REF") == "Errors":
behavior_class = None
if behavior_class is not None:
side = tier.getAttribute("PARTICIPANT")
participant_id = GROUP_IDs[int(video_id)][side]
signals = tier.getElementsByTagName("ALIGNABLE_ANNOTATION")
for signal in signals:
start = getTimeSlot(slots, signal.getAttribute("TIME_SLOT_REF1"))
end = getTimeSlot(slots, signal.getAttribute("TIME_SLOT_REF2"))
if start is not None and end is not None:
start = int(int(start) / 1000 * 25)
end = int(int(end) / 1000 * 25)
result[side].append([participant_id, behavior_class, start, end])
result["Left"].sort(key=lambda x: x[2])
#result["Left"] = removeOverlappingSignals(result["Left"])
pd.DataFrame(result["Left"], columns=COLUMNS).to_csv(os.path.join(f'csv_from_elan/{GROUP_IDs[int(video_id)]["Left"]}.csv'), index=False)
result["Right"].sort(key=lambda x: x[2])
#result["Right"] = removeOverlappingSignals(result["Right"])
pd.DataFrame(result["Right"], columns=COLUMNS).to_csv(os.path.join(f'csv_from_elan/{GROUP_IDs[int(video_id)]["Right"]}.csv'), index=False)