Skip to content
Snippets Groups Projects
manifest_io.jl 4.95 KiB
Newer Older
# SPDX-License-Identifier: CECILL-2.1

#=====================================================================#
# HTML pretty display

function Base.show(io::IO, ::MIME"text/html", r::AbstractAudioSource)
    print(io, "<audio controls ")
    print(io, "src=\"data:audio/wav;base64,")

    x, s = load(r)
    iob64_encode = Base64EncodePipe(io)
    wavwrite(x, iob64_encode, Fs = s, nbits = 8, compression = WAV.WAVE_FORMAT_PCM)
    close(iob64_encode)

    println(io, "\" />")
end

#=====================================================================#
# JSON serialization of a manifest item

function Base.show(io::IO, m::MIME"application/json", s::FileAudioSource)
    compact = get(io, :compact, false)
    indent = get(io, :indent, 0)
    printfn = compact ? print : println
    printfn(io, "{")
    printfn(io, repeat(" ", indent + (compact ? 0 : 2)), "\"type\": \"path\", ")
    printfn(io, repeat(" ", indent + (compact ? 0 : 2)), "\"data\": \"", s.path, "\"")
    print(io, repeat(" ", indent), "}")
end

function Base.show(io::IO, m::MIME"application/json", s::URLAudioSource)
    compact = get(io, :compact, false)
    indent = get(io, :indent, 0)
    printfn = compact ? print : println
    printfn(io, repeat(" ", indent), "{")
    printfn(io, repeat(" ", indent + (compact ? 0 : 2)), "\"type\": \"url\", ")
    printfn(io, repeat(" ", indent + (compact ? 0 : 2)), "\"data\": \"", s.url, "\"")
    print(io, repeat(" ", indent), "}")
end

function Base.show(io::IO, m::MIME"application/json", s::CmdAudioSource)
    compact = get(io, :compact, false)
    indent = get(io, :indent, 0)
    printfn = compact ? print : println
    printfn(io, "{")
    printfn(io, repeat(" ", indent + (compact ? 0 : 2)), "\"type\": \"cmd\", ")
    strcmd = replace("$(s.cmd)", "`" => "")
    printfn(io, repeat(" ", indent + (compact ? 0 : 2)), "\"data\": \"$(strcmd)\"")
    print(io, repeat(" ", indent), "}")
end

function Base.show(io::IO, m::MIME"application/json", r::Recording)
    compact = get(io, :compact, false)
    indent = compact ? 0 : 2
    printfn = compact ? print : println
    printfn(io, "{")
    printfn(io, repeat(" ", indent), "\"id\": \"", r.id, "\", ")
    print(io, repeat(" ", indent), "\"src\": ")
    show(IOContext(io, :indent => compact ? 0 : 2), m, r.source)
    printfn(io, ", ")
    print(io, repeat(" ", indent), "\"channels\": [")
    for (i, c) in enumerate(r.channels)
        print(io, c)
        i < length(r.channels) && print(io, ",")
    end
    printfn(io, "], ")
    printfn(io, repeat(" ", indent), "\"samplerate\": ", r.samplerate)
    print(io, "}")
end

function Base.show(io::IO, m::MIME"application/json", s::Supervision)
    compact = get(io, :compact, false)
    indent = compact ? 0 : 2
    printfn = compact ? print : println
    printfn(io, "{")
    printfn(io, repeat(" ", indent), "\"id\": \"", s.id, "\", ")
    printfn(io, repeat(" ", indent), "\"recording_id\": \"", s.recording_id, "\", ")
    printfn(io, repeat(" ", indent), "\"start\": ", s.start, ", ")
    printfn(io, repeat(" ", indent), "\"duration\": ", s.duration, ", ")
    printfn(io, repeat(" ", indent), "\"channel\": ", s.channel, ", ")
    printfn(io, repeat(" ", indent), "\"data\": ", s.data |> json)
    print(io, "}")
end

function JSON.json(r::Union{Recording, Supervision}; compact = true)
    out = IOBuffer()
    show(IOContext(out, :compact => compact), MIME("application/json"), r)
    String(take!(out))
end

#=====================================================================#
# Converting a dictionary to a manifest item.

function AudioSource(d::Dict)
    if d["type"] == "path"
        T = FileAudioSource
    elseif d["type"] == "url"
        T = URLAudioSource
    elseif d["type"] == "cmd"
        T = CmdAudioSource
    else
        throw(ArgumentError("invalid type: $(d["type"])"))
    end
    T(d["data"])
end

Recording(d::Dict) = Recording(
    d["id"],
    AudioSource(d["src"]),
    convert(Vector{Int}, d["channels"]),
    d["samplerate"]
)

Supervision(d::Dict) = Supervision(
    d["id"],
    d["recording_id"],
    d["start"],
    d["duration"],
    d["channel"],
    d["data"]
)

#=====================================================================#
# Writing / reading manifest from file.

function writemanifest(io::IO, manifest::Dict)
    writefn = x -> println(io, x)
    for item in values(manifest)
        item |> json |> writefn
    end
end

function readmanifest(io::IO, T)
    manifest = Dict()
    for line in eachline(io)
        item = JSON.parse(line) |> T
        manifest[item.id] = item
    end
    manifest
end

manifestname(T::Type{<:Recording}, subset) = "recording-manifest-$(subset).jsonl"
manifestname(T::Type{<:Supervision}, subset) = "supervision-manifest-$(subset).jsonl"

load(T::Type{<:Union{Recording,Supervision}}, path::AbstractString) =
    open(f -> readmanifest(f, T), path, "r")
load(corpus::SpeechCorpus, dir, T, subset) =
    load(T, joinpath(path(corpus, dir), manifestname(T, subset)))
load(corpus::SpeechCorpus, T, subset) =
    load(corpus, corporadir, T, subset)