Skip to content
Snippets Groups Projects
Commit 0d04b5f9 authored by simon devauchelle's avatar simon devauchelle
Browse files

Adding TIMIT phonetic alignments.

parent 991ec3b9
No related branches found
No related tags found
No related merge requests found
...@@ -10,6 +10,7 @@ export ...@@ -10,6 +10,7 @@ export
# ManifestItem # ManifestItem
Recording, Recording,
Supervision, Supervision,
Alignment,
load, load,
# Manifest interface # Manifest interface
......
...@@ -250,6 +250,14 @@ function timit_prepare(timitdir, dir; audio_fmt="SPHERE") ...@@ -250,6 +250,14 @@ function timit_prepare(timitdir, dir; audio_fmt="SPHERE")
open(manifestpath, "w") do f open(manifestpath, "w") do f
writemanifest(f, supervisions) writemanifest(f, supervisions)
end end
# Alignments
manifestpath = joinpath(dir, "alignments-$(subset).jsonl")
@debug "Preparing $manifestpath"
alignments = timit_alignments(sdir)
open(manifestpath, "w") do f
writemanifest(f, alignments)
end
end end
end end
...@@ -310,7 +318,7 @@ function timit_supervisions(dir) ...@@ -310,7 +318,7 @@ function timit_supervisions(dir)
"text" => join(words, " "), "text" => join(words, " "),
"dialect" => dialect, "dialect" => dialect,
"speaker" => spk, "speaker" => spk,
"sex" => string(first(spk)) "sex" => string(first(spk)),
) )
) )
end end
...@@ -319,11 +327,60 @@ function timit_supervisions(dir) ...@@ -319,11 +327,60 @@ function timit_supervisions(dir)
end end
function timit_alignments(dir)
! isdir(dir) && throw(ArgumentError("expected directory $dir"))
splitline(line) = rsplit(line, limit=3)
alignments = Dict()
for (root, subdirs, files) in walkdir(dir)
for file in files
name, ext = splitext(file)
ext != ".phn" && continue
_, dialect, spk = rsplit(root, "/", limit=3)
path = joinpath(root, file)
id = "timit_$(spk)_$(name)"
slines = map(splitline, eachline(path))
starts, ends, phones = zip(slines...)
start_sample = parse(Int, first(starts))
end_sample = parse(Int, last(ends))
dur = end_sample - start_sample
palign = [(parse(Int, s), parse(Int, e), TIMIT_PHONE_MAP48[p]) for (s, e, p) in zip(starts, ends, phones)]
@show palign
alignments[id] = Alignment(
id,
id,
start_sample,
dur,
Dict(
"phones" => palign,
"dialect" => dialect,
"speaker" => spk,
"sex" => string(first(spk))
)
)
end
end
alignments
end
function TIMIT(timitdir, dir, subset) function TIMIT(timitdir, dir, subset)
if ! (isfile(joinpath(dir, "recordings.jsonl")) && if ! (isfile(joinpath(dir, "recordings.jsonl")) &&
isfile(joinpath(dir, "supervisions-train.jsonl")) && isfile(joinpath(dir, "supervisions-train.jsonl")) &&
isfile(joinpath(dir, "supervisions-dev.jsonl")) && isfile(joinpath(dir, "supervisions-dev.jsonl")) &&
isfile(joinpath(dir, "supervisions-test.jsonl"))) isfile(joinpath(dir, "supervisions-test.jsonl")) &&
isfile(joinpath(dir, "alignments-train.jsonl")) &&
isfile(joinpath(dir, "alignments-dev.jsonl")) &&
isfile(joinpath(dir, "alignments-test.jsonl")))
timit_prepare(timitdir, dir) timit_prepare(timitdir, dir)
end end
dataset(dir, subset) dataset(dir, subset)
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
struct SpeechDataset <: MLUtils.AbstractDataContainer struct SpeechDataset <: MLUtils.AbstractDataContainer
idxs::Vector{AbstractString} idxs::Vector{AbstractString}
supervisions::Dict{AbstractString, Supervision} supervisions::Dict{AbstractString, Supervision}
alignments::Dict{AbstractString, Alignment}
recordings::Dict{AbstractString, Recording} recordings::Dict{AbstractString, Recording}
partition::Symbol partition::Symbol
end end
...@@ -34,15 +35,17 @@ julia> ds[1] ...@@ -34,15 +35,17 @@ julia> ds[1]
""" """
function dataset(manifestroot, partition) function dataset(manifestroot, partition)
sup_path = joinpath(manifestroot, "supervisions-$(partition).jsonl") sup_path = joinpath(manifestroot, "supervisions-$(partition).jsonl")
alg_path = joinpath(manifestroot, "alignments-$(partition).jsonl")
rec_path = joinpath(manifestroot, "recordings.jsonl") rec_path = joinpath(manifestroot, "recordings.jsonl")
supervisions = load(Supervision, sup_path) supervisions = load(Supervision, sup_path)
alignments = load(Alignment, alg_path)
recordings = load(Recording, rec_path) recordings = load(Recording, rec_path)
dataset(supervisions, recordings, partition) dataset(supervisions, alignments, recordings, partition)
end end
function dataset(supervisions, recordings, partition) function dataset(supervisions, alignments, recordings, partition)
idxs = collect(keys(supervisions)) idxs = collect(keys(supervisions))
SpeechDataset(idxs, supervisions, recordings, Symbol(partition)) SpeechDataset(idxs, supervisions, alignments, recordings, Symbol(partition))
end end
function Base.getindex(d::SpeechDataset, key::AbstractString) function Base.getindex(d::SpeechDataset, key::AbstractString)
......
...@@ -105,6 +105,14 @@ Supervision(d::Dict) = Supervision( ...@@ -105,6 +105,14 @@ Supervision(d::Dict) = Supervision(
d["data"] d["data"]
) )
Alignment(d::Dict) = Alignment(
d["id"],
d["recording_id"],
d["start"],
d["duration"],
d["data"]
)
#=====================================================================# #=====================================================================#
# Writing / reading manifest from file. # Writing / reading manifest from file.
...@@ -127,12 +135,14 @@ end ...@@ -127,12 +135,14 @@ end
# Some utilities # Some utilities
manifestname(::Type{<:Recording}, name) = "recordings.jsonl" manifestname(::Type{<:Recording}, name) = "recordings.jsonl"
manifestname(::Type{<:Supervision}, name) = "supervisions-$name.jsonl" manifestname(::Type{<:Supervision}, name) = "supervisions-$name.jsonl"
manifestname(::Type{<:Alignment}, name) = "alignments-$name.jsonl"
""" """
load(Supervision, path) load(Supervision, path)
load(Alignments, path)
load(Recording, path) load(Recording, path)
Load Recording/Supervision manifest from `path`. Load Recording/Supervisions/Alignments manifest from `path`.
""" """
load(T::Type{<:Union{Recording,Supervision}}, path) = open(f -> readmanifest(f, T), path, "r") load(T::Type{<:Union{Recording, Supervision, Alignment}}, path) = open(f -> readmanifest(f, T), path, "r")
...@@ -81,6 +81,40 @@ end ...@@ -81,6 +81,40 @@ end
Supervision(id, recid; channels = missing, start = -1, duration = -1, data = missing) = Supervision(id, recid; channels = missing, start = -1, duration = -1, data = missing) =
Supervision(id, recid, start, duration, channels, data) Supervision(id, recid, start, duration, channels, data)
"""
struct Alignments <: ManifestItem
id::AbstractString
recording_id::AbstractString
start::Float64
duration::Float64
data::Dict
end
An "alignment" defines a segment of a recording on a single channel.
The `data` field is an arbitrary dictionary holdin the nature of the
alignments. `start` and `duration` (in seconds) defines,
where the segment is locatated within the recoding `recording_id`.
# Constructor
Alignment(id, recording_id, start, duration, data)
Alignment(id, recording_id[;start = -1, duration = -1, data = missing)
If `start` and/or `duration` are negative, the segment is considered to
be the whole sequence length of the recording.
"""
struct Alignment <: ManifestItem
id::AbstractString
recording_id::AbstractString
start::Float64
duration::Float64
data::Dict
end
Alignment(id, recid; start = -1, duration = -1, data = missing) =
Alignment(id, recid, start, duration, data)
""" """
load(recording[; start = -1, duration = -1, channels = recording.channels]) load(recording[; start = -1, duration = -1, channels = recording.channels])
load(recording, supervision) load(recording, supervision)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment