Skip to content
Snippets Groups Projects
manifest_item.jl 3.37 KiB
Newer Older
# SPDX-License-Identifier: CECILL-2.1

"""
    abstract type ManifestItem end

Base class for all manifest item. Every manifest item should have an
`id` attribute.
"""
abstract type ManifestItem end

"""
    struct Recording{Ts<:AbstractAudioSource} <: ManifestItem
        id::AbstractString
        source::Ts
        channels::Vector{Int}
        samplerate::Int
    end

A recording is an audio source associated with and id.

# Constructors
    Recording(id, source, channels, samplerate)
    Recording(id, source[; channels = missing, samplerate = missing])

If the channels or the sample rate are not provided then they will be
read from `source`.

!!! warning
    When preparing large corpus, not providing the channes and/or the
    sample rate can drastically reduce the speed as it forces to read
    source.
"""
struct Recording{Ts<:AbstractAudioSource} <: ManifestItem
    id::AbstractString
    source::Ts
    channels::Vector{Int}
    samplerate::Int
end

function Recording(uttid, s::AbstractAudioSource; channels = missing, samplerate = missing)
    if ismissing(channels) || ismissing(samplerate)
Martin Kocour's avatar
Martin Kocour committed
        x, sr = loadaudio(s)
        samplerate = ismissing(samplerate) ? Int(sr) : samplerate
        channels = ismissing(channels) ? collect(1:size(x,2)) : channels
    end
    Recording(uttid, s, channels, samplerate)
end

"""
    struct Supervision <: ManifestItem
        id::AbstractString
        recording_id::AbstractString
        start::Float64
        duration::Float64
Martin Kocour's avatar
Martin Kocour committed
        channel::Union{Vector, Colon}
        data::Dict
    end

A "supervision" defines a segment of a recording on a single channel.
The `data` field is an arbitrary dictionary holdin the nature of the
Martin Kocour's avatar
Martin Kocour committed
supervision. `start` and `duration` (in seconds) defines,
where the segment is locatated within the recoding `recording_id`.

# Constructor

    Supervision(id, recording_id, start, duration, channel, data)
    Supervision(id, recording_id[; channel = missing, start = -1, duration = -1, data = missing)

If `start` and/or `duration` are negative, the segment is considered to
be the whole sequence length of the recording.
"""
struct Supervision <: ManifestItem
    id::AbstractString
    recording_id::AbstractString
    start::Float64
    duration::Float64
Lucas Ondel Yang's avatar
Lucas Ondel Yang committed
    channels::Union{Vector, Colon}
Supervision(id, recid; channels = missing, start = -1, duration = -1, data = missing) =
    Supervision(id, recid, start, duration, channels, data)

"""
    load(recording[; start = -1, duration = -1, channels = recording.channels])
    load(recording, supervision)

Load the signal from a recording. `start`, `duration` (in seconds) can
be used to load only a segment. If a `supervision` is given, function
will return on the portion of the signal corresponding to the
supervision segment.

The function returns a tuple `(x, sr)` where `x` is a ``NxC`` array
- ``N`` is the length of the signal and ``C`` is the number of channels
- and `sr` is the sampling rate of the signal.
"""
function load(r::Recording; start = -1, duration = -1, channels = r.channels)
    if start >= 0 && duration >= 0
        s = Int(floor(start * r.samplerate + 1))
        e = Int(ceil(duration * r.samplerate))
        subrange = (s:e)
    else
        subrange = (:)
    end

Martin Kocour's avatar
Martin Kocour committed
    x, sr = loadaudio(r.source, subrange)
load(r::Recording, s::Supervision) = load(r; start = s.start, duration = s.duration, channels = s.channels)