Skip to content
Snippets Groups Projects
Verified Commit 64b3a647 authored by Lucas Ondel Yang's avatar Lucas Ondel Yang
Browse files

updated tests

parent 716edf54
No related branches found
No related tags found
1 merge request!14Adding "Synthetic Dataset" in the available set of corpora
Pipeline #3595 passed
......@@ -15,6 +15,6 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
[compat]
CSV = "0.10.15"
DataFrames = "1.7.0"
SyntheticVowelDataset = "0.1.1"
CSV = "0.10"
DataFrames = "1.7"
SyntheticVowelDataset = "0.1"
......@@ -5,7 +5,7 @@
function prepare(::Val{:SYNTHETIC_VOWEL_DATASET}, synsetdir, odir)
# Validate the data directory
! isdir(synsetdir) && throw(ArgumentError("invalid path $(synsetdir)"))
# Create the output directory.
dir = mkpath(odir)
rm(joinpath(odir, "recordings.jsonl"), force=true)
......@@ -41,7 +41,7 @@ function synset_recordings(dir::AbstractString)
for file in files
lname, ext = splitext(file)
sname = split(lname, "_")
ext != ".wav" && continue
spk = join(sname[1:3], "_")
......@@ -70,14 +70,14 @@ function synset_metadata(dir)
metadata = Dict()
fpath = "$dir/$(basename(dir))_detailed.csv"
fpath = joinpath("$dir", "$(basename(dir))_detailed.csv")
df = DataFrame(CSV.File(fpath))
# Get number of filter coefficients
countfilter = count(col -> occursin(r"^a_\d+$", col), names(df))
for row in eachrow(df)
# Get metadata
spk = row["fname"]
gender = row["gender"]
......@@ -86,10 +86,10 @@ function synset_metadata(dir)
f₀ = row["f0"]
ϕ = row["ϕ"]
vtl = row["vtl"]
filter = Dict(["a_$i" => row["a_$i"] for i in 1:countfilter])
filter = Dict(["a_$i" => row["a_$i"] for i in 1:countfilter])
angles = Dict([$i" => row[$i"] for i in 1:Int(countfilter/2)])
magnitudes = Dict(["r$i" => row["r$i"] for i in 1:Int(countfilter/2)])
magnitudes = Dict(["r$i" => row["r$i"] for i in 1:Int(countfilter/2)])
id = "$(spk)_$(gender)_$(vowel)_$(sigid)"
metadata[id] = Annotation(
id, # recording id and annotation id are the same since we have
......
using JSON
using SpeechDatasets
using SyntheticVowelDataset
using Test
## The following tests do not work on the Gitlab CI because the volumes are not mounted on the runner. TODO find better tests
using JSON
PATHS = JSON.parsefile(
joinpath(@__DIR__, "src/corpora", "corpora.json")
)
#PATHS = JSON.parsefile(
# joinpath(@__DIR__, "../src/corpora", "corpora.json")
#)
# rm("/tmp/pfc/recordings.jsonl", force=true)
......@@ -45,11 +45,14 @@ PATHS = JSON.parsefile(
# @test isfile("/tmp/pfc/recordings.jsonl")
# @test isfile("/tmp/pfc/annotations.jsonl")
using SyntheticVowelDataset
println("Testing SYNTHETIC_VOWEL_DATASET loading")
SyntheticVowelDataset.generate("/tmp/synthetic_vowel_dataset_dir", "calliope"; classes_number=1, signals_number=1)
ds = SpeechDatasets.dataset(:SYNTHETIC_VOWEL_DATASET, "/tmp/synthetic_vowel_dataset_dir", "/tmp/synset")
@testset "SYNTHETIC_VOWEL_DATASET" begin
datadir = mktempdir(; cleanup = false)
manifestdir = mktempdir(; cleanup = false)
SyntheticVowelDataset.generate(datadir, "calliope"; classes_number=1, signals_number=1)
ds = SpeechDatasets.dataset(:SYNTHETIC_VOWEL_DATASET, datadir, manifestdir)
@test isfile(joinpath(manifestdir, "recordings.jsonl"))
@test isfile(joinpath(manifestdir, "annotations.jsonl"))
end
@test isfile("/tmp/synset/recordings.jsonl")
@test isfile("/tmp/synset/annotations.jsonl")
  • 🤖 CI Bot @project_1247_bot_1b5f29a72d826746f0de20d4c092a6ca

    mentioned in commit f6266ec7

    ·

    mentioned in commit f6266ec7

    Toggle commit list
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment