forked from eriksargent/nn-diarization
-
Notifications
You must be signed in to change notification settings - Fork 0
/
read_csv.py
36 lines (31 loc) · 1.31 KB
/
read_csv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import numpy as np
import math
def read_csv(filename, time_step=0.05, time_delta=0.5):
input_data = np.genfromtxt(
filename, dtype=float, delimiter=',', names=True)
start_time = 0
end_time = np.max(input_data['tmax'])
duration = end_time - start_time
# number of samples per data point
samplesPerPoint = int(time_delta / time_step)
# number of data points of length(time_delta)
numDataPoints = math.ceil(duration / time_delta)
# set up resulting data (padded with zeros)
speaker_data = np.zeros((numDataPoints, samplesPerPoint), dtype=int)
# index of active measured result
active_index = 0
for i in range(numDataPoints):
for j in range(samplesPerPoint):
if active_index >= len(input_data['text']):
continue
# increment active index if current time passes boundary
# of labeled csv file
if (i * samplesPerPoint + j) * time_step - start_time >= input_data['tmax'][active_index]:
active_index += 1
if active_index >= len(input_data['text']):
continue
try:
speaker_data[i, j] = input_data['text'][active_index]
except ValueError:
print("Bad data in CSV file: {}".format(filename))
return speaker_data