Skip to content
Snippets Groups Projects
Verified Commit 8dfd49d2 authored by Lucas Ondel Yang's avatar Lucas Ondel Yang
Browse files

preparing recipe timit

parent 7cf16bf1
Branches
Tags
No related merge requests found
......@@ -23,11 +23,15 @@ export
# Corpora interface
download,
lang,
name,
path,
prepare,
# Corpora
MultilingualLibriSpeech,
MiniLibriSpeech
MiniLibriSpeech,
TIMIT
......@@ -46,5 +50,6 @@ include("manifest_item.jl")
include("manifest_io.jl")
include("corpora/multilingual_librispeech.jl")
include("corpora/mini_librispeech.jl")
include("corpora/timit.jl")
end
# SPDX-License-Identifier: CECILL-2.1
#######################################################################
const TIMIT_SUBSETS = Dict(
"train" => "train",
"dev" => "dev",
"test" => "test"
)
const TIMIT_LANG = "eng"
const TIMIT_NAME = "timit"
const TIMIT_DEV_SPK_LIST = Set([
"faks0",
"fdac1",
"fjem0",
"mgwt0",
"mjar0",
"mmdb1",
"mmdm2",
"mpdf0",
"fcmh0",
"fkms0",
"mbdg0",
"mbwm0",
"mcsh0",
"fadg0",
"fdms0",
"fedw0",
"mgjf0",
"mglb0",
"mrtk0",
"mtaa0",
"mtdt0",
"mthc0",
"mwjg0",
"fnmr0",
"frew0",
"fsem0",
"mbns0",
"mmjr0",
"mdls0",
"mdlf0",
"mdvc0",
"mers0",
"fmah0",
"fdrw0",
"mrcs0",
"mrjm4",
"fcal1",
"mmwh0",
"fjsj0",
"majc0",
"mjsw0",
"mreb0",
"fgjd0",
"fjmg0",
"mroa0",
"mteb0",
"mjfc0",
"mrjr0",
"fmml0",
"mrws1"
])
const TIMIT_TEST_SPK_LIST = Set([
"mdab0",
"mwbt0",
"felc0",
"mtas1",
"mwew0",
"fpas0",
"mjmp0",
"mlnt0",
"fpkt0",
"mlll0",
"mtls0",
"fjlm0",
"mbpm0",
"mklt0",
"fnlp0",
"mcmj0",
"mjdh0",
"fmgd0",
"mgrt0",
"mnjm0",
"fdhc0",
"mjln0",
"mpam0",
"fmld0"
])
TIMIT_PHONE_MAP48 = Dict(
"aa" => "aa",
"ae" => "ae",
"ah" => "ah",
"ao" => "ao",
"aw" => "aw",
"ax" => "ax",
"ax-h" => "ax",
"axr" => "er",
"ay" => "ay",
"b" => "b",
"bcl" => "vcl",
"ch" => "ch",
"d" => "d",
"dcl" => "vcl",
"dh" => "dh",
"dx" => "dx",
"eh" => "eh",
"el" => "el",
"em" => "m",
"en" => "en",
"eng" => "ng",
"epi" => "epi",
"er" => "er",
"ey" => "ey",
"f" => "f",
"g" => "g",
"gcl" => "vcl",
"h#" => "sil",
"hh" => "hh",
"hv" => "hh",
"ih" => "ih",
"ix" => "ix",
"iy" => "iy",
"jh" => "jh",
"k" => "k",
"kcl" => "cl",
"l" => "l",
"m" => "m",
"n" => "n",
"ng" => "ng",
"nx" => "n",
"ow" => "ow",
"oy" => "oy",
"p" => "p",
"pau" => "sil",
"pcl" => "cl",
"q" => "",
"r" => "r",
"s" => "s",
"sh" => "sh",
"t" => "t",
"tcl" => "cl",
"th" => "th",
"uh" => "uh",
"uw" => "uw",
"ux" => "uw",
"v" => "v",
"w" => "w",
"y" => "y",
"z" => "z",
"zh" => "zh"
)
#######################################################################
struct TIMIT<: SpeechCorpus
datapath::AbstractString
end
lang(::TIMIT) = "eng"
name(::TIMIT) = "timit"
function prepare(timit::TIMIT)
end
......@@ -7,24 +7,38 @@ Abstract type for all speech corpora.
"""
abstract type SpeechCorpus end
"""
lang(corpus)
Return the ISO 639-3 code of the language of the corpus.
"""
lang
"""
name(corpus)
Return the name identifier of the corpus.
"""
name
"""
path(corpus)
Path to the directory where is stored the corpus' data.
Path to the directory where are stored the manifests of the corpus.
"""
path(corpus::SpeechCorpus, dir) = joinpath(dir, corpus.lang, corpus.name)
path(corpus::SpeechCorpus) = joinpath(dir, corpus.lang, corpus.name)
"""
download(corpus[, dir = homedir()])
download(corpus)
Download the data of the corpus to `dir`.
"""
Base.download(corpus::SpeechCorpus) = download(corpus, SPEECH_CORPORA_ROOTDIR)
Base.download(corpus::SpeechCorpus)
"""
prepare(corpus[, dir = homedir()])
prepare(corpus)
Prepare the manifests of corpus to `dir`.
Prepare the manifests of corpus.
"""
prepare(corpus::SpeechCorpus) = prepare(corpus, SPEECH_CORPORA_ROOTDIR)
prepare(corpus::SpeechCorpus)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment