Skip to content
Snippets Groups Projects
Commit b26cc7ad authored by Nicolas Denier's avatar Nicolas Denier
Browse files

Merge branch 'formants_TIMIT' into 'main'

Loading of new TIMIT data added : formant values and bandwidth annotations

See merge request ptal/speechdatasets.jl!2
parents 9d82afed 241d1e10
No related branches found
No related tags found
1 merge request!2Loading of new TIMIT data added : formant values and bandwidth annotations
......@@ -224,9 +224,12 @@ TIMIT_PHONE_MAP39 = Dict(
#######################################################################
function timit_prepare(timitdir, dir; audio_fmt="SPHERE")
function timit_prepare(timitdir, dir, formantsdir=nothing; audio_fmt="SPHERE")
# Validate the data directory
! isdir(timitdir) && throw(ArgumentError("invalid path $(timitdir)"))
if formantsdir !== nothing
! isdir(formantsdir) && throw(ArgumentError("invalid path $(formantsdir)"))
end
# Create the output directory.
dir = mkpath(dir)
......@@ -256,9 +259,9 @@ function timit_prepare(timitdir, dir; audio_fmt="SPHERE")
# Annotations
@info "Extracting annotations from $timitdir/train"
train_annotations = timit_annotations(joinpath(timitdir, "train"))
train_annotations = timit_annotations(joinpath(timitdir, "train"), formantsdir)
@info "Extracting annotations from $timitdir/test"
test_annotations = timit_annotations(joinpath(timitdir, "test"))
test_annotations = timit_annotations(joinpath(timitdir, "test"), formantsdir)
annotations = merge(train_annotations, test_annotations)
......@@ -332,8 +335,15 @@ function timit_recordings(dir::AbstractString; fmt="SPHERE")
end
function timit_annotations(dir)
function timit_annotations(dir, formantsdir=nothing)
! isdir(dir) && throw(ArgumentError("expected directory $dir"))
if formantsdir !== nothing
ddir = last(splitdir(dir))
formantsdir = joinpath(formantsdir, ddir)
! isdir(formantsdir) && throw(ArgumentError("expected directory $formantsdir"))
end
splitline(line) = rsplit(line, limit=3)
annotations = Dict()
......@@ -355,11 +365,32 @@ function timit_annotations(dir)
# Phones
ppath = joinpath(root, name * ".phn")
palign = Tuple{Int,Int,String}[]
for line in eachline(ppath)
t0, t1, p = split(line)
push!(palign, (parse(Int, t0), parse(Int, t1), String(p)))
if formantsdir !== nothing
forpath = joinpath(formantsdir, dialect, spk, name * ".ft")
else
forpath = ""
end
if isfile(forpath)
# Read availabled formants values
palign = Tuple{Int,Int,String,NTuple{8, Float32}}[]
for line in eachline(forpath)
t0, t1, p, f1, f2, f3, f4, b1, b2, b3, b4 = split(line)
push!(palign,
(
parse(Int, t0), parse(Int, t1), String(p),
(parse(Float32, f1), parse(Float32, f2), parse(Float32, f3), parse(Float32, f4),
parse(Float32, b1), parse(Float32, b2), parse(Float32, b3), parse(Float32, b4)))
)
end
else
palign = Tuple{Int,Int,String}[]
for line in eachline(ppath)
t0, t1, p = split(line)
push!(palign, (parse(Int, t0), parse(Int, t1), String(p)))
end
end
sentence_type = if startswith(name, "sa")
"dialect"
......@@ -391,12 +422,12 @@ function timit_annotations(dir)
end
function TIMIT(timitdir, dir, subset)
function TIMIT(timitdir, dir, subset, formantsdir=nothing)
if ! (isfile(joinpath(dir, "recordings.jsonl")) &&
isfile(joinpath(dir, "annotations-train.jsonl")) &&
isfile(joinpath(dir, "annotations-dev.jsonl")) &&
isfile(joinpath(dir, "annotations-test.jsonl")))
timit_prepare(timitdir, dir)
timit_prepare(timitdir, dir, formantsdir)
end
dataset(dir, subset)
end
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment