Skip to content
Snippets Groups Projects
manifest_item.jl 4.62 KiB
Newer Older
  • Learn to ignore specific revisions
  • # SPDX-License-Identifier: CECILL-2.1
    
    """
        abstract type AbstractAudioSource end
    
    Base class for all audio source. Possible audio sources are:
    * `FileAudioSource`
    * `URLAudioSource`
    * `CmdAudioSource`
    
    You can load the data of an audio source with the internal function
    
        loadsoce(s::AbstractAudioSource, subrange)
    
    """
    abstract type AbstractAudioSource end
    
    struct FileAudioSource <: AbstractAudioSource
        path::AbstractString
    end
    
    struct URLAudioSource <: AbstractAudioSource
        url::AbstractString
    end
    
    struct CmdAudioSource <: AbstractAudioSource
        cmd
    end
    CmdAudioSource(c::String) = CmdAudioSource(Cmd(String.(split(c))))
    
    loadsource(s::FileAudioSource, subrange) = wavread(s.path; subrange)
    loadsource(s::URLAudioSource, subrange) = wavread(IOBuffer(HTTP.get(s.url).body); subrange)
    loadsource(s::CmdAudioSource, subrange) = wavread(IOBuffer(read(pipeline(s.cmd))); subrange)
    
    """
        abstract type ManifestItem end
    
    Base class for all manifest item. Every manifest item should have an
    `id` attribute.
    """
    abstract type ManifestItem end
    
    """
        struct Recording{Ts<:AbstractAudioSource} <: ManifestItem
            id::AbstractString
            source::Ts
            channels::Vector{Int}
            samplerate::Int
        end
    
    A recording is an audio source associated with and id.
    
    # Constructors
        Recording(id, source, channels, samplerate)
        Recording(id, souce[; channels = missing, samplerate = missing])
    
    If the channels or the sample rate are not provided then they will be
    read from `source`.
    
    !!! warn
        When preparing large corpus, not providing the channes and/or the
        sample rate can drastically reduce the speed as it forces to read
        source.
    """
    struct Recording{Ts<:AbstractAudioSource} <: ManifestItem
        id::AbstractString
        source::Ts
        channels::Vector{Int}
        samplerate::Int
    end
    
    function Recording(uttid, s::AbstractAudioSource; channels = missing, samplerate = missing)
        if ismissing(channels) || ismissing(samplerate)
            x, sr = load(s)
            samplerate = ismissing(samplerate) ? Int(sr) : samplerate
            channels = ismissing(channels) ? collect(1:size(x,2)) : channels
        end
        Recording(uttid, s, channels, samplerate)
    end
    
    """
        struct Supervision <: ManifestItem
            id::AbstractString
            recording_id::AbstractString
            start::Float64
            duration::Float64
            channel::Int
            data::Dict
        end
    
    A "supervision" defines a segment of a recording on a single channel.
    The `data` field is an arbitrary dictionary holdin the nature of the
    supervision.
    
    # Constructor
    
        Supervision(id, recording_id, start, duration, channel, data)
        Supervision(id, recording_id[; channel = missing, start = -1, duration = -1, data = missing)
    
    If `start` and/or `duration` are negative, the segment is considered to
    be the whole sequence length of the recording.
    """
    struct Supervision <: ManifestItem
        id::AbstractString
        recording_id::AbstractString
        start::Float64
        duration::Float64
        channel::Int
        data::Dict
    end
    
    Supervision(id, recid; channel = missing, start = -1, duration = -1, data = missing) =
        Supervision(id, recid, start, duration, channel, data)
    
    """
        load(recording[; start = -1, duration = -1, channels = recording.channels])
        load(recording, supervision)
    
    Load the signal from a recording. `start`, `duration` (in seconds) can
    be used to load only a segment. If a `supervision` is given, function
    will return on the portion of the signal corresponding to the
    supervision segment.
    
    The function returns a tuple `(x, sr)` where `x` is a ``NxC`` array
    - ``N`` is the length of the signal and ``C`` is the number of channels
    - and `sr` is the sampling rate of the signal.
    """
    function load(r::Recording; start = -1, duration = -1, channels = r.channels)
        if start >= 0 && duration >= 0
            s = Int(floor(start * r.samplerate + 1))
            e = Int(ceil(duration * r.samplerate))
            subrange = (s:e)
        else
            subrange = (:)
        end
    
        x, sr, _, _ = loadsource(r.source, subrange)
        x[:,channels], sr
    end
    
    load(r::Recording, s::Supervision) =
        load(r; start = s.start, duration = s.duration, channels = [s.channel])
    
    """
        play(recording[; start = -1, duration = -1, channels = recording.channels])
        play(recording, supervision)
    
    Play the recording on the default audio device. See [`load`](@ref)
    for details on the function arguments.
    """
    function play(r::Recording; start = -1, duration = -1, channels = r.channels)
        x, _ = load(r; start, duration, channels)
        PortAudioStream(0, length(channels); r.samplerate) do stream
            write(stream, x)
        end
    end
    play(r::Recording, s::Supervision) =
        play(r; start = s.start, duration = s.duration, channels = [s.channel])