diff --git a/Project.toml b/Project.toml index ed6c029fc1f31d933f81fc25205acbe15c40ed71..71b423d92d3fd26ed477a4fa3c8250bba8a0b72d 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "SpeechDatasets" uuid = "ae813453-fab8-46d9-ab8f-a64c05464021" authors = ["Lucas ONDEL YANG <lucas.ondel@cnrs.fr>", "Simon DEVAUCHELLE <simon.devauchelle@universite-paris-saclay.fr>", "Nicolas DENIER <nicolas.denier@cnrs.fr>", "Remi URO <uro@lisn.fr>"] -version = "0.21.0" +version = "0.21.1" [deps] AudioSources = "09fc2aa8-47ce-428a-ad90-e701fa7ea67f" diff --git a/src/corpora/synthetic_vowel_dataset.jl b/src/corpora/synthetic_vowel_dataset.jl index fe19e7afa3cf5bc0c32584442d55205657b7fdff..90c7c5c62f4cffb86cb5c9b240cec94780cf63d9 100644 --- a/src/corpora/synthetic_vowel_dataset.jl +++ b/src/corpora/synthetic_vowel_dataset.jl @@ -44,12 +44,12 @@ function synset_recordings(dir::AbstractString) ext != ".wav" && continue - spk = join(sname[1:3], "_") + spkid = sname[3] gender, vowel, sigid = sname[4], sname[5], sname[6] path = joinpath(root, file) - id = "$(spk)_$(gender)_$(vowel)_$(sigid)" + id = "spk_$(spkid)_$(gender)_$(vowel)_$(sigid)" audio_src = AudioSources.FileAudioSource(path) @@ -79,7 +79,8 @@ function synset_metadata(dir) for row in eachrow(df) # Get metadata - spk = row["fname"] + spk = split(row["fname"], "_") + spkid = spk[3] gender = row["gender"] vowel = row["vowel"] sigid = split(row["signal"], "_")[end] @@ -90,7 +91,7 @@ function synset_metadata(dir) angles = Dict(["θ$i" => row["θ$i"] for i in 1:Int(countfilter/2)]) magnitudes = Dict(["r$i" => row["r$i"] for i in 1:Int(countfilter/2)]) - id = "$(spk)_$(gender)_$(vowel)_$(sigid)" + id = "spk_$(spkid)_$(gender)_$(vowel)_$(sigid)" metadata[id] = Annotation( id, # recording id and annotation id are the same since we have id, # a one-to-one mapping @@ -98,7 +99,7 @@ function synset_metadata(dir) -1, # recording [1], # only 1 channel (mono recording) Dict( - "spk" => spk, + "spk" => spkid, "gender" => gender, "vowel" => vowel, "sigid" => sigid,