Skip to content
Snippets Groups Projects
Commit 1da44ae0 authored by Martin Kocour's avatar Martin Kocour Committed by Lucas Ondel Yang
Browse files

Fix channels

parent 955e185b
No related branches found
No related tags found
No related merge requests found
# SPDX-License-Identifier: CECILL-2.1 # SPDX-License-Identifier: CECILL-2.1
"""
FastDataset(supervisions, recordings, partition)
Constructor for dataset represented as JSONL files (a.k.a. manifests).
"""
struct SpeechDataset <: MLUtils.AbstractDataContainer struct SpeechDataset <: MLUtils.AbstractDataContainer
idxs::Vector{AbstractString} idxs::Vector{AbstractString}
supervisions::Dict{AbstractString, Supervision} supervisions::Dict{AbstractString, Supervision}
...@@ -14,10 +8,10 @@ struct SpeechDataset <: MLUtils.AbstractDataContainer ...@@ -14,10 +8,10 @@ struct SpeechDataset <: MLUtils.AbstractDataContainer
end end
""" """
dataset(manifestroot, subset) dataset(manifestroot, partition)
Load `SpeechDataset` from manifest files stored in `manifestroot`. Load `SpeechDataset` from manifest files stored in `manifestroot`.
Partition is specified by `subset`, e.g. `:train`, `:test`. Partition is specified by `partition`, e.g. `:train`, `:test`.
Each item of the dataset is a nested tuple `((samples, sampling_rate), Supervision.data)`. Each item of the dataset is a nested tuple `((samples, sampling_rate), Supervision.data)`.
...@@ -38,14 +32,17 @@ julia> ds[1] ...@@ -38,14 +32,17 @@ julia> ds[1]
) )
``` ```
""" """
function dataset(manifestroot::AbstractString, subset) function dataset(manifestroot::AbstractString, partition)
sup_path = joinpath(manifestroot, "supervisions-$(subset).jsonl") sup_path = joinpath(manifestroot, "supervisions-$(subset).jsonl")
rec_path = joinpath(manifestroot, "recordings.jsonl") rec_path = joinpath(manifestroot, "recordings.jsonl")
supervisions = load(Supervision, sup_path) supervisions = load(Supervision, sup_path)
recordings = load(Recording, rec_path) recordings = load(Recording, rec_path)
dataset(supervisions, recordings)
end
function dataset(supervisions, recordings, partition)
idxs = collect(keys(supervisions)) idxs = collect(keys(supervisions))
SpeechDataset(idxs, supervisions, recordings, Symbol(subset)) SpeechDataset(idxs, supervisions, recordings, Symbol(partition))
end end
function Base.getindex(d::SpeechDataset, key::AbstractString) function Base.getindex(d::SpeechDataset, key::AbstractString)
......
...@@ -52,7 +52,7 @@ end ...@@ -52,7 +52,7 @@ end
recording_id::AbstractString recording_id::AbstractString
start::Float64 start::Float64
duration::Float64 duration::Float64
channel::Int channel::Union{Vector, Colon}
data::Dict data::Dict
end end
...@@ -74,7 +74,7 @@ struct Supervision <: ManifestItem ...@@ -74,7 +74,7 @@ struct Supervision <: ManifestItem
recording_id::AbstractString recording_id::AbstractString
start::Float64 start::Float64
duration::Float64 duration::Float64
channels::Vector{Integer} channels::Union{Vector{Integer}, Colon}
data::Dict data::Dict
end end
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment