# SPDX-License-Identifier: CECILL-2.1 ####################################################################### const TIMIT_SUBSETS = Dict( "train" => "train", "dev" => "dev", "test" => "test" ) const TIMIT_LANG = "eng" const TIMIT_NAME = "timit" const TIMIT_DEV_SPK_LIST = Set([ "faks0", "fdac1", "fjem0", "mgwt0", "mjar0", "mmdb1", "mmdm2", "mpdf0", "fcmh0", "fkms0", "mbdg0", "mbwm0", "mcsh0", "fadg0", "fdms0", "fedw0", "mgjf0", "mglb0", "mrtk0", "mtaa0", "mtdt0", "mthc0", "mwjg0", "fnmr0", "frew0", "fsem0", "mbns0", "mmjr0", "mdls0", "mdlf0", "mdvc0", "mers0", "fmah0", "fdrw0", "mrcs0", "mrjm4", "fcal1", "mmwh0", "fjsj0", "majc0", "mjsw0", "mreb0", "fgjd0", "fjmg0", "mroa0", "mteb0", "mjfc0", "mrjr0", "fmml0", "mrws1" ]) const TIMIT_TEST_SPK_LIST = Set([ "mdab0", "mwbt0", "felc0", "mtas1", "mwew0", "fpas0", "mjmp0", "mlnt0", "fpkt0", "mlll0", "mtls0", "fjlm0", "mbpm0", "mklt0", "fnlp0", "mcmj0", "mjdh0", "fmgd0", "mgrt0", "mnjm0", "fdhc0", "mjln0", "mpam0", "fmld0" ]) TIMIT_PHONE_MAP48 = Dict( "aa" => "aa", "ae" => "ae", "ah" => "ah", "ao" => "ao", "aw" => "aw", "ax" => "ax", "ax-h" => "ax", "axr" => "er", "ay" => "ay", "b" => "b", "bcl" => "vcl", "ch" => "ch", "d" => "d", "dcl" => "vcl", "dh" => "dh", "dx" => "dx", "eh" => "eh", "el" => "el", "em" => "m", "en" => "en", "eng" => "ng", "epi" => "epi", "er" => "er", "ey" => "ey", "f" => "f", "g" => "g", "gcl" => "vcl", "h#" => "sil", "hh" => "hh", "hv" => "hh", "ih" => "ih", "ix" => "ix", "iy" => "iy", "jh" => "jh", "k" => "k", "kcl" => "cl", "l" => "l", "m" => "m", "n" => "n", "ng" => "ng", "nx" => "n", "ow" => "ow", "oy" => "oy", "p" => "p", "pau" => "sil", "pcl" => "cl", "q" => "", "r" => "r", "s" => "s", "sh" => "sh", "t" => "t", "tcl" => "cl", "th" => "th", "uh" => "uh", "uw" => "uw", "ux" => "uw", "v" => "v", "w" => "w", "y" => "y", "z" => "z", "zh" => "zh" ) TIMIT_PHONE_MAP39 = Dict( "aa" => "aa", "ae" => "ae", "ah" => "ah", "ao" => "aa", "aw" => "aw", "ax" => "ah", "ax-h" => "ah", "axr" => "er", "ay" => "ay", "b" => "b", "bcl" => "sil", "ch" => "ch", "d" => "d", "dcl" => "sil", "dh" => "dh", "dx" => "dx", "eh" => "eh", "el" => "l", "em" => "m", "en" => "n", "eng" => "ng", "epi" => "sil", "er" => "er", "ey" => "ey", "f" => "f", "g" => "g", "gcl" => "sil", "h#" => "sil", "hh" => "hh", "hv" => "hh", "ih" => "ih", "ix" => "ih", "iy" => "iy", "jh" => "jh", "k" => "k", "kcl" => "sil", "l" => "l", "m" => "m", "n" => "n", "ng" => "ng", "nx" => "n", "ow" => "ow", "oy" => "oy", "p" => "p", "pau" => "sil", "pcl" => "sil", "q" => "", "r" => "r", "s" => "s", "sh" => "sh", "t" => "t", "tcl" => "sil", "th" => "th", "uh" => "uh", "uw" => "uw", "ux" => "uw", "v" => "v", "w" => "w", "y" => "y", "z" => "z", "zh" => "sh" ) ####################################################################### struct TIMIT<: SpeechCorpus datapath::AbstractString end lang(::TIMIT) = "eng" name(::TIMIT) = "timit" function prepare(timit::TIMIT, dir; audio_fmt="SPHERE") # Validate the data directory ! isdir(timit.datapath) && throw(ArgumentError("invalid path $(timit.datapath)")) # Create the output directory. dir = mkpath(dir) for (subset, subdir) in [("train", "train"), ("dev", "train"), ("test", "test")] sdir = joinpath(timit.datapath, subdir) # Recordings manifestpath = joinpath(dir, manifestname(Recording, subset)) @debug "preparing $manifestpath" recordings = timit_recordings(sdir; fmt=audio_fmt) open(manifestpath, "w") do f writemanifest(f, recordings) end # Supervision manifestpath = joinpath(dir, manifestname(Supervision, subset)) @debug "Preparing $manifestpath" supervisions = timit_supervisions(sdir) open(manifestpath, "w") do f writemanifest(f, supervisions) end end end function timit_recordings(dir::AbstractString; fmt="SPHERE") ! isdir(dir) && throw(ArgumentError("expected directory $dir")) recordings = Dict() for (root, subdirs, files) in walkdir(dir) for file in files name, ext = splitext(file) ext != ".wav" && continue spk = basename(root) path = joinpath(root, file) id = "timit_$(spk)_$(name)" audio_src = if fmt == "SPHERE" CmdAudioSource(`sph2pipe -f wav $path`) else FileAudioSource(path) end recordings[id] = Recording( id, audio_src; channels = [1], samplerate = 16000 ) end end recordings end function timit_supervisions(dir) ! isdir(dir) && throw(ArgumentError("expected directory $dir")) splitline(line) = rsplit(line, limit=3) supervisions = Dict() for (root, subdirs, files) in walkdir(dir) for file in files name, ext = splitext(file) ext != ".wrd" && continue _, dialect, spk = rsplit(root, "/", limit=3) path = joinpath(root, file) id = "timit_$(spk)_$(name)" slines = map(splitline, eachline(path)) starts, ends, words = zip(slines...) start_ts = parse(Int, first(starts)) / 16000 end_ts = parse(Int, last(ends)) / 16000 dur = end_ts - start_ts supervisions[id] = Supervision( id, id, start_ts, dur, [1], Dict( "text" => join(words, " "), "dialect" => dialect, "speaker" => spk, "sex" => string(first(spk)) ) ) end end supervisions end timit_lexicon(t::TIMIT) = timit_lexicon(t.datapath) function timit_lexicon(dir) dictfile = joinpath(dir, "doc", "timitdic.txt") iscomment(line) = first(line) == ';' lexicon = Pair{String, String}[] for line in eachline(dictfile) iscomment(line) && continue wrd, pron = split(line, limit=2) pron = strip(pron, ['/', '\t', ' ']) wrd = '~' in wrd ? split(wrd, "~", limit=2)[1] : wrd push!(lexicon, wrd => uppercase(pron)) end lexicon end