Skip to content
Snippets Groups Projects
manifest_io.jl 4.51 KiB
Newer Older
  • Learn to ignore specific revisions
  • # SPDX-License-Identifier: CECILL-2.1
    
    #=====================================================================#
    # JSON serialization of a manifest item
    
    function Base.show(io::IO, m::MIME"application/json", s::FileAudioSource)
        compact = get(io, :compact, false)
        indent = get(io, :indent, 0)
        printfn = compact ? print : println
        printfn(io, "{")
        printfn(io, repeat(" ", indent + (compact ? 0 : 2)), "\"type\": \"path\", ")
        printfn(io, repeat(" ", indent + (compact ? 0 : 2)), "\"data\": \"", s.path, "\"")
        print(io, repeat(" ", indent), "}")
    end
    
    function Base.show(io::IO, m::MIME"application/json", s::URLAudioSource)
        compact = get(io, :compact, false)
        indent = get(io, :indent, 0)
        printfn = compact ? print : println
        printfn(io, repeat(" ", indent), "{")
        printfn(io, repeat(" ", indent + (compact ? 0 : 2)), "\"type\": \"url\", ")
        printfn(io, repeat(" ", indent + (compact ? 0 : 2)), "\"data\": \"", s.url, "\"")
        print(io, repeat(" ", indent), "}")
    end
    
    function Base.show(io::IO, m::MIME"application/json", s::CmdAudioSource)
        compact = get(io, :compact, false)
        indent = get(io, :indent, 0)
        printfn = compact ? print : println
        printfn(io, "{")
        printfn(io, repeat(" ", indent + (compact ? 0 : 2)), "\"type\": \"cmd\", ")
        strcmd = replace("$(s.cmd)", "`" => "")
        printfn(io, repeat(" ", indent + (compact ? 0 : 2)), "\"data\": \"$(strcmd)\"")
        print(io, repeat(" ", indent), "}")
    end
    
    function Base.show(io::IO, m::MIME"application/json", r::Recording)
        compact = get(io, :compact, false)
        indent = compact ? 0 : 2
        printfn = compact ? print : println
        printfn(io, "{")
        printfn(io, repeat(" ", indent), "\"id\": \"", r.id, "\", ")
        print(io, repeat(" ", indent), "\"src\": ")
        show(IOContext(io, :indent => compact ? 0 : 2), m, r.source)
        printfn(io, ", ")
        print(io, repeat(" ", indent), "\"channels\": [")
        for (i, c) in enumerate(r.channels)
            print(io, c)
            i < length(r.channels) && print(io, ",")
        end
        printfn(io, "], ")
        printfn(io, repeat(" ", indent), "\"samplerate\": ", r.samplerate)
        print(io, "}")
    end
    
    function Base.show(io::IO, m::MIME"application/json", s::Supervision)
        compact = get(io, :compact, false)
        indent = compact ? 0 : 2
        printfn = compact ? print : println
        printfn(io, "{")
        printfn(io, repeat(" ", indent), "\"id\": \"", s.id, "\", ")
        printfn(io, repeat(" ", indent), "\"recording_id\": \"", s.recording_id, "\", ")
        printfn(io, repeat(" ", indent), "\"start\": ", s.start, ", ")
        printfn(io, repeat(" ", indent), "\"duration\": ", s.duration, ", ")
        printfn(io, repeat(" ", indent), "\"channel\": ", s.channel, ", ")
        printfn(io, repeat(" ", indent), "\"data\": ", s.data |> json)
        print(io, "}")
    end
    
    function JSON.json(r::Union{Recording, Supervision}; compact = true)
        out = IOBuffer()
        show(IOContext(out, :compact => compact), MIME("application/json"), r)
        String(take!(out))
    end
    
    #=====================================================================#
    # Converting a dictionary to a manifest item.
    
    function AudioSource(d::Dict)
        if d["type"] == "path"
            T = FileAudioSource
        elseif d["type"] == "url"
            T = URLAudioSource
        elseif d["type"] == "cmd"
            T = CmdAudioSource
        else
            throw(ArgumentError("invalid type: $(d["type"])"))
        end
        T(d["data"])
    end
    
    Recording(d::Dict) = Recording(
        d["id"],
        AudioSource(d["src"]),
        convert(Vector{Int}, d["channels"]),
        d["samplerate"]
    )
    
    Supervision(d::Dict) = Supervision(
        d["id"],
        d["recording_id"],
        d["start"],
        d["duration"],
        d["channel"],
        d["data"]
    )
    
    #=====================================================================#
    # Writing / reading manifest from file.
    
    function writemanifest(io::IO, manifest::Dict)
        writefn = x -> println(io, x)
        for item in values(manifest)
            item |> json |> writefn
        end
    end
    
    function readmanifest(io::IO, T)
        manifest = Dict()
        for line in eachline(io)
            item = JSON.parse(line) |> T
            manifest[item.id] = item
        end
        manifest
    end
    
    
    manifestname(T::Type{<:Recording}, subset) = "recording-manifest-$(subset).jsonl"
    manifestname(T::Type{<:Supervision}, subset) = "supervision-manifest-$(subset).jsonl"
    
    load(T::Type{<:Union{Recording,Supervision}}, path::AbstractString) =
        open(f -> readmanifest(f, T), path, "r")
    load(corpus::SpeechCorpus, dir, T, subset) =
        load(T, joinpath(path(corpus, dir), manifestname(T, subset)))
    load(corpus::SpeechCorpus, T, subset) =
        load(corpus, corporadir, T, subset)