Skip to content
Snippets Groups Projects
manifest_item.jl 3.36 KiB
Newer Older
  • Learn to ignore specific revisions
  • # SPDX-License-Identifier: CECILL-2.1
    
    """
        abstract type ManifestItem end
    
    Base class for all manifest item. Every manifest item should have an
    `id` attribute.
    """
    abstract type ManifestItem end
    
    """
        struct Recording{Ts<:AbstractAudioSource} <: ManifestItem
            id::AbstractString
            source::Ts
            channels::Vector{Int}
            samplerate::Int
        end
    
    A recording is an audio source associated with and id.
    
    # Constructors
        Recording(id, source, channels, samplerate)
    
        Recording(id, source[; channels = missing, samplerate = missing])
    
    
    If the channels or the sample rate are not provided then they will be
    read from `source`.
    
    
    !!! warning
    
        When preparing large corpus, not providing the channes and/or the
        sample rate can drastically reduce the speed as it forces to read
        source.
    """
    struct Recording{Ts<:AbstractAudioSource} <: ManifestItem
        id::AbstractString
        source::Ts
        channels::Vector{Int}
        samplerate::Int
    end
    
    function Recording(uttid, s::AbstractAudioSource; channels = missing, samplerate = missing)
        if ismissing(channels) || ismissing(samplerate)
    
    Martin Kocour's avatar
    Martin Kocour committed
            x, sr = loadaudio(s)
    
            samplerate = ismissing(samplerate) ? Int(sr) : samplerate
            channels = ismissing(channels) ? collect(1:size(x,2)) : channels
        end
        Recording(uttid, s, channels, samplerate)
    end
    
    """
        struct Supervision <: ManifestItem
            id::AbstractString
            recording_id::AbstractString
            start::Float64
            duration::Float64
            channel::Int
            data::Dict
        end
    
    A "supervision" defines a segment of a recording on a single channel.
    The `data` field is an arbitrary dictionary holdin the nature of the
    
    Martin Kocour's avatar
    Martin Kocour committed
    supervision. `start` and `duration` (in seconds) defines,
    where the segment is locatated within the recoding `recording_id`.
    
    
    # Constructor
    
        Supervision(id, recording_id, start, duration, channel, data)
        Supervision(id, recording_id[; channel = missing, start = -1, duration = -1, data = missing)
    
    If `start` and/or `duration` are negative, the segment is considered to
    be the whole sequence length of the recording.
    """
    struct Supervision <: ManifestItem
        id::AbstractString
        recording_id::AbstractString
        start::Float64
        duration::Float64
    
        channels::Vector{Integer}
    
    Supervision(id, recid; channels = missing, start = -1, duration = -1, data = missing) =
        Supervision(id, recid, start, duration, channels, data)
    
    
    """
        load(recording[; start = -1, duration = -1, channels = recording.channels])
        load(recording, supervision)
    
    Load the signal from a recording. `start`, `duration` (in seconds) can
    be used to load only a segment. If a `supervision` is given, function
    will return on the portion of the signal corresponding to the
    supervision segment.
    
    The function returns a tuple `(x, sr)` where `x` is a ``NxC`` array
    - ``N`` is the length of the signal and ``C`` is the number of channels
    - and `sr` is the sampling rate of the signal.
    """
    function load(r::Recording; start = -1, duration = -1, channels = r.channels)
        if start >= 0 && duration >= 0
            s = Int(floor(start * r.samplerate + 1))
            e = Int(ceil(duration * r.samplerate))
            subrange = (s:e)
        else
            subrange = (:)
        end
    
    
    Martin Kocour's avatar
    Martin Kocour committed
        x, sr = loadaudio(r.source, subrange)
    
        x[:,channels], sr
    end
    
    load(r::Recording, s::Supervision) =
    
        load(r; start = s.start, duration = s.duration, channels = s.channels)