SpeechRecognitionFSTs.jl

# SPDX-License-Identifier: CECILL-2.1

module SpeechRecognitionFSTs

using ..TensorFSTs
using ..TensorFSTs.LinearFSTs

export EmissionMapping, lexiconfst, tokenfst

"""
    struct EmissionMapping <: AbstractMatrix{Int}
        numstates::Int
        numtokens::Int
    end

Default emission mapping which assumes the same number of emissions per token.
"""
struct EmissionMapping <: AbstractMatrix{Int}
    numpdfs::Int
    numtokens::Int
end

Base.size(x::EmissionMapping) = (x.numpdfs, x.numtokens)
Base.getindex(x::EmissionMapping, i, j) = (j - 1) * x.numpdfs + i
Base.minimum(x::EmissionMapping) = 1
Base.maximum(x::EmissionMapping) = (x.numpdfs * x.numtokens)


"""
    tokenfst(S, topo, initweights, finalweights, tokens, mapping)

Create a FST composed of a set of smaller FSTs. Each token share the same FST
topology specified with `topo = [(src, dest, weight), ...]`. `S` is a
`Semiring` of output FST, `tokens` is a list of token IDs, and
`mapping[tokens[i]]` is the output symbol .
"""
function tokenfst(
    S,
    topo,
    initweights,
    finalweights,
    tokens,
    mapping = nothing
)
    states = Set(Int[])
    arcs, init, final = [], [], []
    
    if isnothing(mapping)
        st = Set(Int[]) 
        for _ in tokens
            for top in topo
                push!(st, top[1])
            end
        end
        mapping = EmissionMapping(length(st), length(tokens))   
    end

    # Extra state (infinite loop)
    init_state = length(mapping) + 1
    push!(states, init_state)
    
    for (j, token) in enumerate(tokens)
        offset = length(states) - 1
        
        for topo_arc in topo
            
            src, dest, weight = offset + topo_arc[1], offset + topo_arc[2], topo_arc[3]

            arc = Arc(
                src = src,
                isym = mapping[topo_arc[2], j],
                osym = 0,
                dest = dest,
                weight = S(weight)
            )
            
            # Init arc to sources
            if topo_arc[1] == 1
                init_arc = Arc(
                    src = init_state,
                    isym = 0,
                    osym = token,
                    dest = src,
                    weight = S(weight)
                )
                push!(arcs, init_arc)
            end

            # Final arcs to destinations
            if topo_arc[1] == length(st)
                final_arc = Arc(
                    src = src,
                    isym = 0,
                    osym = 0,
                    dest = init_state,
                    weight = S(weight)
                ) 
                push!(arcs, final_arc)
            end

            push!(states, src)
            push!(states, dest)
            push!(arcs, arc)
        end

        # for (state, weight) in initweights
        #     print(state)
        #     state = offset + state
        #     push!(states, state)
        #     push!(init, state => S(weight))
        # end

        for (state, weight) in finalweights
            state = offset + state
            push!(states, state)
            push!(final, state => S(weight))
        end
    end

    # Actually, there is just one init state
    for (_, weight) in initweights
        push!(init, init_state => S(weight))
    end

    TensorFST(arcs, init, final)
end

include("trie.jl")
include("lexicon.jl")

end