Skip to content
Snippets Groups Projects
Commit 38b05991 authored by Lucas Ondel Yang's avatar Lucas Ondel Yang
Browse files

Merge branch 'test' into 'main'

rename Supervision calls to Annotation

See merge request fast/speechdatasets.jl!14
parents d1a0cc49 e5aeb52d
No related branches found
No related tags found
No related merge requests found
......@@ -42,7 +42,7 @@ function minils_recordings(dir, subset)
recs
end
function minils_supervisions(dir, subset)
function minils_annotations(dir, subset)
subsetdir = joinpath(dir, "LibriSpeech", MINILS_SUBSETS[subset])
sups = Dict()
for d1 in readdir(subsetdir; join = true)
......@@ -52,8 +52,8 @@ function minils_supervisions(dir, subset)
open(joinpath(d2, "$(k1)-$(k2).trans.txt"), "r") do f
for line in eachline(f)
tokens = split(line)
s = Supervision(
tokens[1], # supervision id
s = Annotation(
tokens[1], # annotation id
tokens[1]; # recording id
channels = [1],
data = Dict("text" => join(tokens[2:end], " "))
......@@ -98,12 +98,12 @@ function minils_prepare(dir)
end
end
# 2. Supervision manifests.
# 2. Annotation manifests.
for (subset, name) in [("train", "train"), ("dev", "dev"), ("dev", "test")]
out = joinpath(dir, "supervisions-$name.jsonl")
out = joinpath(dir, "annotations-$name.jsonl")
if ! isfile(out)
@debug "preparing supervision manifest ($subset) $out"
sups = minils_supervisions(dir, subset)
@debug "preparing annotation manifest ($subset) $out"
sups = minils_annotations(dir, subset)
open(out, "w") do f
writemanifest(f, sups)
end
......
......@@ -89,13 +89,13 @@ function recordings(corpus::MultilingualLibriSpeech, dir, subset)
recs
end
function supervisions(corpus::MultilingualLibriSpeech, dir, subset)
function annotations(corpus::MultilingualLibriSpeech, dir, subset)
trans = joinpath(dir, "mls_$(MLS_LANG_CODE[corpus.lang])", subset, "transcripts.txt")
sups = Dict()
open(trans, "r") do f
for line in eachline(f)
tokens = split(line)
s = Supervision(tokens[1], tokens[1]; channel = 1,
s = Annotation(tokens[1], tokens[1]; channel = 1,
data = Dict("text" => join(tokens[2:end], " ")))
sups[s.id] = s
end
......@@ -118,12 +118,12 @@ function prepare(corpus::MultilingualLibriSpeech, outdir)
end
end
# 2. Supervision manifests.
# 2. Annotation manifests.
for subset in ["train", "dev", "test"]
out = joinpath(dir, "supervision-manifest-$subset.jsonl")
@info "preparing supervision manifest ($subset) $out"
out = joinpath(dir, "annotation-manifest-$subset.jsonl")
@info "preparing annotation manifest ($subset) $out"
if ! isfile(out)
sups = supervisions(corpus, dir, subset)
sups = annotations(corpus, dir, subset)
open(out, "w") do f
writemanifest(f, sups)
end
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment