# SPDX-License-Identifier: CECILL-2.1 #=====================================================================# # HTML pretty display function Base.show(io::IO, ::MIME"text/html", r::AbstractAudioSource) print(io, "<audio controls ") print(io, "src=\"data:audio/wav;base64,") x, s, _ = loadsource(r, :) iob64_encode = Base64EncodePipe(io) wavwrite(x, iob64_encode, Fs = s, nbits = 8, compression = WAV.WAVE_FORMAT_PCM) close(iob64_encode) println(io, "\" />") end #=====================================================================# # JSON serialization of a manifest item function Base.show(io::IO, m::MIME"application/json", s::FileAudioSource) compact = get(io, :compact, false) indent = get(io, :indent, 0) printfn = compact ? print : println printfn(io, "{") printfn(io, repeat(" ", indent + (compact ? 0 : 2)), "\"type\": \"path\", ") printfn(io, repeat(" ", indent + (compact ? 0 : 2)), "\"data\": \"", s.path, "\"") print(io, repeat(" ", indent), "}") end function Base.show(io::IO, m::MIME"application/json", s::URLAudioSource) compact = get(io, :compact, false) indent = get(io, :indent, 0) printfn = compact ? print : println printfn(io, repeat(" ", indent), "{") printfn(io, repeat(" ", indent + (compact ? 0 : 2)), "\"type\": \"url\", ") printfn(io, repeat(" ", indent + (compact ? 0 : 2)), "\"data\": \"", s.url, "\"") print(io, repeat(" ", indent), "}") end function Base.show(io::IO, m::MIME"application/json", s::CmdAudioSource) compact = get(io, :compact, false) indent = get(io, :indent, 0) printfn = compact ? print : println printfn(io, "{") printfn(io, repeat(" ", indent + (compact ? 0 : 2)), "\"type\": \"cmd\", ") strcmd = replace("$(s.cmd)", "`" => "") printfn(io, repeat(" ", indent + (compact ? 0 : 2)), "\"data\": \"$(strcmd)\"") print(io, repeat(" ", indent), "}") end function Base.show(io::IO, m::MIME"application/json", r::Recording) compact = get(io, :compact, false) indent = compact ? 0 : 2 printfn = compact ? print : println printfn(io, "{") printfn(io, repeat(" ", indent), "\"id\": \"", r.id, "\", ") print(io, repeat(" ", indent), "\"src\": ") show(IOContext(io, :indent => compact ? 0 : 2), m, r.source) printfn(io, ", ") print(io, repeat(" ", indent), "\"channels\": [") for (i, c) in enumerate(r.channels) print(io, c) i < length(r.channels) && print(io, ",") end printfn(io, "], ") printfn(io, repeat(" ", indent), "\"samplerate\": ", r.samplerate) print(io, "}") end function Base.show(io::IO, m::MIME"application/json", s::Supervision) compact = get(io, :compact, false) indent = compact ? 0 : 2 printfn = compact ? print : println printfn(io, "{") printfn(io, repeat(" ", indent), "\"id\": \"", s.id, "\", ") printfn(io, repeat(" ", indent), "\"recording_id\": \"", s.recording_id, "\", ") printfn(io, repeat(" ", indent), "\"start\": ", s.start, ", ") printfn(io, repeat(" ", indent), "\"duration\": ", s.duration, ", ") printfn(io, repeat(" ", indent), "\"channel\": ", s.channel, ", ") printfn(io, repeat(" ", indent), "\"data\": ", s.data |> json) print(io, "}") end function JSON.json(r::Union{Recording, Supervision}; compact = true) out = IOBuffer() show(IOContext(out, :compact => compact), MIME("application/json"), r) String(take!(out)) end #=====================================================================# # Converting a dictionary to a manifest item. function AudioSource(d::Dict) if d["type"] == "path" T = FileAudioSource elseif d["type"] == "url" T = URLAudioSource elseif d["type"] == "cmd" T = CmdAudioSource else throw(ArgumentError("invalid type: $(d["type"])")) end T(d["data"]) end Recording(d::Dict) = Recording( d["id"], AudioSource(d["src"]), convert(Vector{Int}, d["channels"]), d["samplerate"] ) Supervision(d::Dict) = Supervision( d["id"], d["recording_id"], d["start"], d["duration"], d["channel"], d["data"] ) #=====================================================================# # Writing / reading manifest from file. function writemanifest(io::IO, manifest::Dict) writefn = x -> println(io, x) for item in values(manifest) item |> json |> writefn end end function readmanifest(io::IO, T) manifest = Dict() for line in eachline(io) item = JSON.parse(line) |> T manifest[item.id] = item end manifest end manifestname(T::Type{<:Recording}, subset) = "recording-manifest-$(subset).jsonl" manifestname(T::Type{<:Supervision}, subset) = "supervision-manifest-$(subset).jsonl" load(T::Type{<:Union{Recording,Supervision}}, path::AbstractString) = open(f -> readmanifest(f, T), path, "r") load(corpus::SpeechCorpus, dir, T, subset) = load(T, joinpath(path(corpus, dir), manifestname(T, subset))) load(corpus::SpeechCorpus, T, subset) = load(corpus, corporadir, T, subset)