Skip to content
Snippets Groups Projects
timit.jl 10.7 KiB
Newer Older
# SPDX-License-Identifier: CECILL-C

#######################################################################

const TIMIT_SUBSETS = Dict(
    "train" => "train",
    "dev" => "dev",
    "test" => "test"
)


const TIMIT_DEV_SPK_LIST = Set([
"faks0",
    "fdac1",
    "fjem0",
    "mgwt0",
    "mjar0",
    "mmdb1",
    "mmdm2",
    "mpdf0",
    "fcmh0",
    "fkms0",
    "mbdg0",
    "mbwm0",
    "mcsh0",
    "fadg0",
    "fdms0",
    "fedw0",
    "mgjf0",
    "mglb0",
    "mrtk0",
    "mtaa0",
    "mtdt0",
    "mthc0",
    "mwjg0",
    "fnmr0",
    "frew0",
    "fsem0",
    "mbns0",
    "mmjr0",
    "mdls0",
    "mdlf0",
    "mdvc0",
    "mers0",
    "fmah0",
    "fdrw0",
    "mrcs0",
    "mrjm4",
    "fcal1",
    "mmwh0",
    "fjsj0",
    "majc0",
    "mjsw0",
    "mreb0",
    "fgjd0",
    "fjmg0",
    "mroa0",
    "mteb0",
    "mjfc0",
    "mrjr0",
    "fmml0",
    "mrws1"
])

const TIMIT_TEST_SPK_LIST = Set([
    "mdab0",
    "mwbt0",
    "felc0",
    "mtas1",
    "mwew0",
    "fpas0",
    "mjmp0",
    "mlnt0",
    "fpkt0",
    "mlll0",
    "mtls0",
    "fjlm0",
    "mbpm0",
    "mklt0",
    "fnlp0",
    "mcmj0",
    "mjdh0",
    "fmgd0",
    "mgrt0",
    "mnjm0",
    "fdhc0",
    "mjln0",
    "mpam0",
    "fmld0"
])

TIMIT_PHONE_MAP48 = Dict(
    "aa"    => "aa",
    "ae"    => "ae",
    "ah"    => "ah",
    "ao"    => "ao",
    "aw"    => "aw",
    "ax"    => "ax",
    "ax-h"  => "ax",
    "axr"   => "er",
    "ay"    => "ay",
    "b"     => "b",
    "bcl"   => "vcl",
    "ch"    => "ch",
    "d"     => "d",
    "dcl"   => "vcl",
    "dh"    => "dh",
    "dx"    => "dx",
    "eh"    => "eh",
    "el"    => "el",
    "em"    => "m",
    "en"    => "en",
    "eng"   => "ng",
    "epi"   => "epi",
    "er"    => "er",
    "ey"    => "ey",
    "f"     => "f",
    "g"     => "g",
    "gcl"   => "vcl",
    "h#"    => "sil",
    "hh"    => "hh",
    "hv"    => "hh",
    "ih"    => "ih",
    "ix"    => "ix",
    "iy"    => "iy",
    "jh"    => "jh",
    "k"     => "k",
    "kcl"   => "cl",
    "l"     => "l",
    "m"     => "m",
    "n"     => "n",
    "ng"    => "ng",
    "nx"    => "n",
    "ow"    => "ow",
    "oy"    => "oy",
    "p"     => "p",
    "pau"   => "sil",
    "pcl"   => "cl",
    "q"     => "",
    "r"     => "r",
    "s"     => "s",
    "sh"    => "sh",
    "t"     => "t",
    "tcl"   => "cl",
    "th"    => "th",
    "uh"    => "uh",
    "uw"    => "uw",
    "ux"    => "uw",
    "v"     => "v",
    "w"     => "w",
    "y"     => "y",
    "z"     => "z",
    "zh"    => "zh"
)

TIMIT_PHONE_MAP39 = Dict(
    "aa"    => "aa",
    "ae"    => "ae",
    "ah"    => "ah",
    "ao"    => "aa",
    "aw"    => "aw",
    "ax"    => "ah",
    "ax-h"  => "ah",
    "axr"   => "er",
    "ay"    => "ay",
    "b"     => "b",
    "bcl"   => "sil",
    "ch"    => "ch",
    "d"     => "d",
    "dcl"   => "sil",
    "dh"    => "dh",
    "dx"    => "dx",
    "eh"    => "eh",
    "el"    => "l",
    "em"    => "m",
    "en"    => "n",
    "eng"   => "ng",
    "epi"   => "sil",
    "er"    => "er",
    "ey"    => "ey",
    "f"     => "f",
    "g"     => "g",
    "gcl"   => "sil",
    "h#"    => "sil",
    "hh"    => "hh",
    "hv"    => "hh",
    "ih"    => "ih",
    "ix"    => "ih",
    "iy"    => "iy",
    "jh"    => "jh",
    "k"     => "k",
    "kcl"   => "sil",
    "l"     => "l",
    "m"     => "m",
    "n"     => "n",
    "ng"    => "ng",
    "nx"    => "n",
    "ow"    => "ow",
    "oy"    => "oy",
    "p"     => "p",
    "pau"   => "sil",
    "pcl"   => "sil",
    "q"     => "",
    "r"     => "r",
    "s"     => "s",
    "sh"    => "sh",
    "t"     => "t",
    "tcl"   => "sil",
    "th"    => "th",
    "uh"    => "uh",
    "uw"    => "uw",
    "ux"    => "uw",
    "v"     => "v",
    "w"     => "w",
    "y"     => "y",
    "z"     => "z",
    "zh"    => "sh"
)

#######################################################################


function timit_prepare(timitdir, dir, formantsdir=nothing; audio_fmt="SPHERE")
    # Validate the data directory
    ! isdir(timitdir) && throw(ArgumentError("invalid path $(timitdir)"))
Simon Devauchelle's avatar
Simon Devauchelle committed
        ! isdir(formantsdir) && throw(ArgumentError("invalid path $(formantsdir)"))

    # Create the output directory.
    dir = mkpath(dir)
    rm(joinpath(dir, "recordings.jsonl"), force=true)
Lucas Ondel Yang's avatar
Lucas Ondel Yang committed
    ## Recordings
    @info "Extracting recordings from $timitdir/train"
    train_recordings = timit_recordings(joinpath(timitdir, "train"); fmt=audio_fmt)

    # We extract the name of speakers that are not in the dev set
    TIMIT_TRAIN_SPK_LIST = Set()
    for id in keys(train_recordings)
        _, spk, _ = split(id, "_")
        if spk  TIMIT_DEV_SPK_LIST
            push!(TIMIT_TRAIN_SPK_LIST, spk)
Martin Kocour's avatar
Martin Kocour committed
        end
Lucas Ondel Yang's avatar
Lucas Ondel Yang committed
    end

    @info "Extracting recordings from $timitdir/test"
    test_recordings = timit_recordings(joinpath(timitdir, "test"); fmt=audio_fmt)
    recordings = merge(train_recordings, test_recordings)

    manifestpath = joinpath(dir, "recordings.jsonl")
    open(manifestpath, "a") do f
        writemanifest(f, recordings)
    end

    # Annotations
    @info "Extracting annotations from $timitdir/train"
    train_annotations = timit_annotations(joinpath(timitdir, "train"), formantsdir)
Lucas Ondel Yang's avatar
Lucas Ondel Yang committed
    @info "Extracting annotations from $timitdir/test"
    test_annotations = timit_annotations(joinpath(timitdir, "test"), formantsdir)
Lucas Ondel Yang's avatar
Lucas Ondel Yang committed
    annotations = merge(train_annotations, test_annotations)


    train_annotations = filter(annotations) do (k, v)
        stype = v.data["sentence type"]
        spk = v.data["speaker"]
        (
            (stype == "compact" || stype == "diverse") &&
            spk  TIMIT_TRAIN_SPK_LIST
        )
    end

    dev_annotations = filter(annotations) do (k, v)
        stype = v.data["sentence type"]
        spk = v.data["speaker"]
        (
            (stype == "compact" || stype == "diverse") &&
            spk  TIMIT_DEV_SPK_LIST
        )
    end

    test_annotations = filter(annotations) do (k, v)
        stype = v.data["sentence type"]
        spk = v.data["speaker"]
        (
            (stype == "compact" || stype == "diverse") &&
            spk  TIMIT_TEST_SPK_LIST
        )
    end

    for (x, y) in ("train" => train_annotations,
                   "dev" => dev_annotations,
                   "test" => test_annotations)
        manifestpath = joinpath(dir, "annotations-$(x).jsonl")
        @info "Creating $manifestpath"
Martin Kocour's avatar
Martin Kocour committed

        open(manifestpath, "w") do f
Lucas Ondel Yang's avatar
Lucas Ondel Yang committed
            writemanifest(f, y)
Martin Kocour's avatar
Martin Kocour committed
function timit_recordings(dir::AbstractString; fmt="SPHERE")
    ! isdir(dir) && throw(ArgumentError("expected directory $dir"))

    recordings = Dict()
    for (root, subdirs, files) in walkdir(dir)
        for file in files
Martin Kocour's avatar
Martin Kocour committed
            name, ext = splitext(file)
            ext != ".wav" && continue
Martin Kocour's avatar
Martin Kocour committed
            spk = basename(root)
            path = joinpath(root, file)
            id = "timit_$(spk)_$(name)"

            audio_src = if fmt == "SPHERE"
Martin Kocour's avatar
Martin Kocour committed
                CmdAudioSource(`sph2pipe -f wav $path`)
            else
                FileAudioSource(path)
            end

            recordings[id] = Recording(
                id,
                audio_src;
                channels = [1],
                samplerate = 16000
            )
Martin Kocour's avatar
Martin Kocour committed
    recordings
end
function timit_annotations(dir, formantsdir=nothing)
Martin Kocour's avatar
Martin Kocour committed
    ! isdir(dir) && throw(ArgumentError("expected directory $dir"))
    
    if formantsdir !== nothing
        ddir = last(splitdir(dir))
        formantsdir = joinpath(formantsdir, ddir)
        ! isdir(formantsdir) && throw(ArgumentError("expected directory $formantsdir"))
    end

Martin Kocour's avatar
Martin Kocour committed
    splitline(line) = rsplit(line, limit=3)
Lucas Ondel Yang's avatar
Lucas Ondel Yang committed
    processed = Set()
Martin Kocour's avatar
Martin Kocour committed
    for (root, subdirs, files) in walkdir(dir)
        for file in files
Martin Kocour's avatar
Martin Kocour committed
            _, dialect, spk = rsplit(root, "/", limit=3)
Lucas Ondel Yang's avatar
Lucas Ondel Yang committed
            # Annotation files already processed (".wrd" and ".phn")
            idtuple = (dialect, spk, name)
            (idtuple in processed) && continue
            push!(processed, (dialect, spk, name))

Simon Devauchelle's avatar
Simon Devauchelle committed
            # Words
Lucas Ondel Yang's avatar
Lucas Ondel Yang committed
            wpath = joinpath(root, name * ".wrd")
            words = [last(split(line)) for line in eachline(wpath)]
Simon Devauchelle's avatar
Simon Devauchelle committed
            ppath = joinpath(root, name * ".phn")
            if formantsdir !== nothing
                forpath = joinpath(formantsdir, dialect, spk, name * ".ft")
            else 
                forpath = ""
            end
            
            if isfile(forpath)
                # Read availabled formants values  
                palign = Tuple{Int,Int,String,NTuple{2, Float32}, NTuple{2, Float32}, NTuple{2, Float32}, NTuple{2, Float32}}[]
                for line in eachline(forpath)
                    t0, t1, p, f1, f2, f3, f4, b1, b2, b3, b4 = split(line)
                    push!(palign, 
                    (
                        parse(Int, t0), parse(Int, t1), String(p),
                        (parse(Float32, f1), parse(Float32, b1)), 
                        (parse(Float32, f2), parse(Float32, b2)),
                        (parse(Float32, f3), parse(Float32, b3)), 
                        (parse(Float32, f4), parse(Float32, b4))
                    ))
                end
            else 
                palign = Tuple{Int,Int,String}[]
                for line in eachline(ppath)
                    t0, t1, p = split(line)
                    push!(palign, (parse(Int, t0), parse(Int, t1), String(p)))
                end
Lucas Ondel Yang's avatar
Lucas Ondel Yang committed
            end
Lucas Ondel Yang's avatar
Lucas Ondel Yang committed

            sentence_type = if startswith(name, "sa")
                "dialect"
            elseif startswith(name, "sx")
                "compact"
            else # startswith(name, "si")
                "diverse"
            end

            id = "timit_$(spk)_$(name)"
Lucas Ondel Yang's avatar
Lucas Ondel Yang committed
                id,  # recording id and annotation id are the same since we have
                id,  # a one-to-one mapping
                -1,  # start and duration is -1 means that we take the whole
                -1,  # recording
                [1], # only 1 channel (mono recording)
Martin Kocour's avatar
Martin Kocour committed
                Dict(
                     "text" => join(words, " "),
Lucas Ondel Yang's avatar
Lucas Ondel Yang committed
                     "sentence type" => sentence_type,
Martin Kocour's avatar
Martin Kocour committed
                     "dialect" => dialect,
                     "speaker" => spk,
                     "sex" => string(first(spk)),
function TIMIT(timitdir, dir, subset, formantsdir=nothing)
    if ! (isfile(joinpath(dir, "recordings.jsonl")) &&
          isfile(joinpath(dir, "annotations-train.jsonl")) &&
          isfile(joinpath(dir, "annotations-dev.jsonl")) &&
          isfile(joinpath(dir, "annotations-test.jsonl")))
        timit_prepare(timitdir, dir, formantsdir)
Martin Kocour's avatar
Martin Kocour committed
    end
    dataset(dir, subset)
Martin Kocour's avatar
Martin Kocour committed
end