using JSON
using SpeechDatasets
using SyntheticVowelDataset
using Test


## The following tests do not work on the Gitlab CI because the volumes are not mounted on the runner. TODO find better tests

#PATHS = JSON.parsefile(
#    joinpath(@__DIR__, "../src/corpora", "corpora.json")
#)


# rm("/tmp/pfc/recordings.jsonl", force=true)
# rm("/tmp/pfc/annotations.jsonl", force=true)

# rm("/tmp/timit/recordings.jsonl", force=true)
# rm("/tmp/timit/annotations.jsonl", force=true)

# rm("/tmp/FAETAR_ASR_CHALLENGE_2025/recordings.jsonl", force=true)
# rm("/tmp/FAETAR_ASR_CHALLENGE_2025/annotations.jsonl", force=true)

# rm("/tmp/synset/recordings.jsonl", force=true)
# rm("/tmp/synset/annotations.jsonl", force=true)


# println("Testing FAETAR_ASR_CHALLENGE_2025 loading")
# ds = SpeechDatasets.dataset(:FAETAR_ASR_CHALLENGE_2025, PATHS["FAETAR_ASR_CHALLENGE_2025"]["path"], "/tmp/FAETAR_ASR_CHALLENGE_2025")

# @test isfile("/tmp/FAETAR_ASR_CHALLENGE_2025/recordings.jsonl")
# @test isfile("/tmp/FAETAR_ASR_CHALLENGE_2025/annotations.jsonl")


# println("Testing TIMIT loading")
# ds = SpeechDatasets.dataset(:TIMIT, PATHS["TIMIT"]["path"], "/tmp/timit")

# @test isfile("/tmp/timit/recordings.jsonl")
# @test isfile("/tmp/timit/annotations.jsonl")


# println("Testing PFC_LISN loading")
# ds = SpeechDatasets.dataset(:PFC_LISN, PATHS["PFC_LISN"]["path"], "/tmp/pfc")

# @test isfile("/tmp/pfc/recordings.jsonl")
# @test isfile("/tmp/pfc/annotations.jsonl")


@testset "SYNTHETIC_VOWEL_DATASET" begin
    datadir = mktempdir(; cleanup = false)
    manifestdir = mktempdir(; cleanup = false)
    SyntheticVowelDataset.generate(datadir, "calliope"; classes_number=1, signals_number=1)
    ds = SpeechDatasets.dataset(:SYNTHETIC_VOWEL_DATASET, datadir, manifestdir)

    @test isfile(joinpath(manifestdir, "recordings.jsonl"))
    @test isfile(joinpath(manifestdir, "annotations.jsonl"))
end