-
Notifications
You must be signed in to change notification settings - Fork 1
/
DatasetParsers.jl
43 lines (40 loc) · 1.37 KB
/
DatasetParsers.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# Copyright (c) 2021 Idiap Research Institute, http://www.idiap.ch/
# Niccolò Antonello <[email protected]>
export get_uttID2file
function get_uttID2file(dataset_path,folder::String)
uttID2file = Dict{String,String}()
for (root,dir,files) in walkdir(joinpath(dataset_path,folder);follow_symlinks=true)
wavs = files[findall(contains.(files,".wav"))]
folders = split(root, "/")
spkID = folders[end]
type = folders[end-1]
for f in wavs
sentenceID = split(f, "."; limit=2)[1]
uttID = "$(spkID)-$(type)-$(sentenceID)"
uttID2file[uttID] = joinpath(root,f)
end
end
return uttID2file
end
export get_uttID2text
function get_uttID2text(uttID2file::Dict)
d = Dict(
'z' => "ZERO", '3' => "THREE", '7' => "SEVEN",
'o' => "OH", '4' => "FOUR", '8' => "EIGHT",
'1' => "ONE", '5' => "FIVE", '9' => "NINE",
'2' => "TWO", '6' => "SIX", 'a' =>"", 'b'=>"")
uttID2text = Dict{String,String}()
for uttID in keys(uttID2file)
text = split(uttID,"-")[3]
try
uttID2text[uttID] = strip(prod([d[t] for t in text].*" "))
catch
error("$text is an invalid filename, invalid dataset!")
end
end
return uttID2text
end
function get_uttID2wav(uttID2file::Dict; T=Float32)
uttID2wav = Dict(uttID => T.(load(uttID2file[uttID]).data)[:]
for uttID in keys(uttID2file))
end