Skip to content
Snippets Groups Projects
Verified Commit 3f0ec788 authored by Lucas Ondel Yang's avatar Lucas Ondel Yang
Browse files

updated readme and changelog

parent 386df4df
No related branches found
No related tags found
No related merge requests found
name = "SpeechCorpora" name = "SpeechCorpora"
uuid = "3225a15e-d855-4a07-9546-2418058331ae" uuid = "3225a15e-d855-4a07-9546-2418058331ae"
authors = ["Lucas ONDEL YANG <lucas.ondel@cnrs.fr>"] authors = ["Lucas ONDEL YANG <lucas.ondel@cnrs.fr>"]
version = "0.1.1" version = "0.2.0"
[deps] [deps]
HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3" HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
......
...@@ -4,7 +4,7 @@ A Julia package to download and prepare speech corpus. ...@@ -4,7 +4,7 @@ A Julia package to download and prepare speech corpus.
## Installation ## Installation
Make sure to add the [FAST registry](https://src.koda.cnrs.fr/fast/registry) Make sure to add the [FAST registry](https://gitlab.lisn.upsaclay.fr/fast/registry)
to your julia installation. Then, install the package as usual: to your julia installation. Then, install the package as usual:
``` ```
pkg> add SpeechCorpora pkg> add SpeechCorpora
......
...@@ -23,11 +23,13 @@ export ...@@ -23,11 +23,13 @@ export
prepare, prepare,
# Corpora # Corpora
MultilingualLibriSpeech MultilingualLibriSpeech,
MiniLibriSpeech
include("manifest_item.jl") include("manifest_item.jl")
include("manifest_io.jl") include("manifest_io.jl")
include("corpora/multilingual_librispeech.jl") include("corpora/multilingual_librispeech.jl")
include("corpora/mini_librispeech.jl")
end end
...@@ -59,7 +59,7 @@ function Base.download(::MultilingualLibriSpeechCorpus, lang, outdir) ...@@ -59,7 +59,7 @@ function Base.download(::MultilingualLibriSpeechCorpus, lang, outdir)
@info "dataset in $outdir" @info "dataset in $outdir"
end end
function recordings(lang, dir, subset) function recordings(::MultilingualLibriSpeechCorpus, lang, dir, subset)
subsetdir = joinpath(dir, "mls_$(MLS_LANG_CODE[lang])", subset, "audio") subsetdir = joinpath(dir, "mls_$(MLS_LANG_CODE[lang])", subset, "audio")
recs = Dict() recs = Dict()
...@@ -80,7 +80,7 @@ function recordings(lang, dir, subset) ...@@ -80,7 +80,7 @@ function recordings(lang, dir, subset)
recs recs
end end
function supervisions(lang, dir, subset) function supervisions(::MultilingualLibriSpeechCorpus, lang, dir, subset)
trans = joinpath(dir, "mls_$(MLS_LANG_CODE[lang])", subset, "transcripts.txt") trans = joinpath(dir, "mls_$(MLS_LANG_CODE[lang])", subset, "transcripts.txt")
sups = Dict() sups = Dict()
open(trans, "r") do f open(trans, "r") do f
...@@ -94,13 +94,13 @@ function supervisions(lang, dir, subset) ...@@ -94,13 +94,13 @@ function supervisions(lang, dir, subset)
sups sups
end end
function prepare(::MultilingualLibriSpeechCorpus, lang, dir) function prepare(multils::MultilingualLibriSpeechCorpus, lang, dir)
# 1. Recording manifests. # 1. Recording manifests.
for subset in ["train", "dev", "test"] for subset in ["train", "dev", "test"]
out = joinpath(dir, "recording-manifest-$subset.jsonl") out = joinpath(dir, "recording-manifest-$subset.jsonl")
@info "preparing recording manifest ($subset) $out" @info "preparing recording manifest ($subset) $out"
if ! isfile(out) if ! isfile(out)
recs = recordings(lang, dir, subset) recs = recordings(multils, lang, dir, subset)
open(out, "w") do f open(out, "w") do f
writemanifest(f, recs) writemanifest(f, recs)
end end
...@@ -112,7 +112,7 @@ function prepare(::MultilingualLibriSpeechCorpus, lang, dir) ...@@ -112,7 +112,7 @@ function prepare(::MultilingualLibriSpeechCorpus, lang, dir)
out = joinpath(dir, "supervision-manifest-$subset.jsonl") out = joinpath(dir, "supervision-manifest-$subset.jsonl")
@info "preparing supervision manifest ($subset) $out" @info "preparing supervision manifest ($subset) $out"
if ! isfile(out) if ! isfile(out)
sups = supervisions(lang, dir, subset) sups = supervisions(multils, lang, dir, subset)
open(out, "w") do f open(out, "w") do f
writemanifest(f, sups) writemanifest(f, sups)
end end
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment