Newer
Older
# SPDX-License-Identifier: CECILL-2.1
"""
abstract type ManifestItem end
Base class for all manifest item. Every manifest item should have an
`id` attribute.
"""
abstract type ManifestItem end
"""
struct Recording{Ts<:AbstractAudioSource} <: ManifestItem
id::AbstractString
source::Ts
channels::Vector{Int}
samplerate::Int
end
A recording is an audio source associated with and id.
# Constructors
Recording(id, source, channels, samplerate)
Recording(id, source[; channels = missing, samplerate = missing])
If the channels or the sample rate are not provided then they will be
read from `source`.
When preparing large corpus, not providing the channes and/or the
sample rate can drastically reduce the speed as it forces to read
source.
"""
struct Recording{Ts<:AbstractAudioSource} <: ManifestItem
id::AbstractString
source::Ts
channels::Vector{Int}
samplerate::Int
end
function Recording(uttid, s::AbstractAudioSource; channels = missing, samplerate = missing)
if ismissing(channels) || ismissing(samplerate)
samplerate = ismissing(samplerate) ? Int(sr) : samplerate
channels = ismissing(channels) ? collect(1:size(x,2)) : channels
end
Recording(uttid, s, channels, samplerate)
end
"""
struct Supervision <: ManifestItem
id::AbstractString
recording_id::AbstractString
start::Float64
duration::Float64
channel::Int
data::Dict
end
A "supervision" defines a segment of a recording on a single channel.
The `data` field is an arbitrary dictionary holdin the nature of the
supervision. `start` and `duration` (in seconds) defines,
where the segment is locatated within the recoding `recording_id`.
# Constructor
Supervision(id, recording_id, start, duration, channel, data)
Supervision(id, recording_id[; channel = missing, start = -1, duration = -1, data = missing)
If `start` and/or `duration` are negative, the segment is considered to
be the whole sequence length of the recording.
"""
struct Supervision <: ManifestItem
id::AbstractString
recording_id::AbstractString
start::Float64
duration::Float64
Supervision(id, recid; channels = missing, start = -1, duration = -1, data = missing) =
Supervision(id, recid, start, duration, channels, data)
"""
load(recording[; start = -1, duration = -1, channels = recording.channels])
load(recording, supervision)
Load the signal from a recording. `start`, `duration` (in seconds) can
be used to load only a segment. If a `supervision` is given, function
will return on the portion of the signal corresponding to the
supervision segment.
The function returns a tuple `(x, sr)` where `x` is a ``NxC`` array
- ``N`` is the length of the signal and ``C`` is the number of channels
- and `sr` is the sampling rate of the signal.
"""
function load(r::Recording; start = -1, duration = -1, channels = r.channels)
if start >= 0 && duration >= 0
s = Int(floor(start * r.samplerate + 1))
e = Int(ceil(duration * r.samplerate))
subrange = (s:e)
else
subrange = (:)
end
x[:,channels], sr
end
load(r::Recording, s::Supervision) = load(r; start = s.start, duration = s.duration, channels = s.channels)