Skip to content
Snippets Groups Projects
Commit 755c6d24 authored by Lucas Ondel Yang's avatar Lucas Ondel Yang
Browse files

Merge branch '44-wrong-emission-ids' into 'main'

Resolve "Wrong emission IDs"

Closes #44

See merge request !58
parents 57d8d47e 69bbde22
Branches 27-read-iarpa-format-lm-2
No related tags found
1 merge request!58Resolve "Wrong emission IDs"
......@@ -46,6 +46,9 @@ function tokenfst(
states = Set(Int[])
arcs, init, final = [], [], []
initdict = Dict(initweights...)
finaldict = Dict(finalweights...)
if isnothing(mapping)
st = Set(Int[])
for _ in tokens
......@@ -57,9 +60,9 @@ function tokenfst(
mapping = EmissionMapping(length(st), length(tokens))
end
# Extra state (infinite loop)
init_state = length(mapping) + 1
push!(states, init_state)
# Extra state
extra_state = length(mapping) + 1
push!(states, extra_state)
for (j, token) in enumerate(tokens)
offset = length(states) - 1
......@@ -76,55 +79,60 @@ function tokenfst(
weight = S(weight)
)
# Init arc to sources
if topo_arc[1] == 1
# Initialization arcs
if topo_arc[1] in keys(initdict)
init_arc = Arc(
src = init_state,
src = extra_state,
isym = 0,
osym = token,
dest = src,
weight = S(weight)
weight = S(initdict[topo_arc[1]])
)
push!(arcs, init_arc)
end
if topo_arc[2] in keys(initdict)
init_arc = Arc(
src = extra_state,
isym = 0,
osym = token,
dest = dest,
weight = S(initdict[topo_arc[2]])
)
push!(arcs, init_arc)
end
# Final arcs to destinations
if topo_arc[2] == size(mapping, 1)
# Final arcs
if topo_arc[1] in keys(finaldict)
final_arc = Arc(
src = src,
isym = 0,
osym = 0,
dest = extra_state,
weight = S(finaldict[topo_arc[1]])
)
push!(arcs, final_arc)
end
if topo_arc[2] in keys(finaldict)
final_arc = Arc(
src = dest,
isym = 0,
osym = 0,
dest = init_state,
weight = S(weight)
)
dest = extra_state,
weight = S(finaldict[topo_arc[2]])
)
push!(arcs, final_arc)
end
push!(states, src)
push!(states, dest)
push!(arcs, arc)
end
# for (state, weight) in initweights
# print(state)
# state = offset + state
# push!(states, state)
# push!(init, state => S(weight))
# end
for (state, weight) in finalweights
state = offset + state
push!(states, state)
push!(final, state => S(weight))
end
end
# Actually, there is just one init state
for (_, weight) in initweights
push!(init, init_state => S(weight))
end
end
push!(init, extra_state => one(S))
push!(final, extra_state => one(S))
TensorFST(arcs, init, final)
TensorFST(collect(Set(arcs)), init, final)
end
include("trie.jl")
......
......@@ -44,7 +44,7 @@ function SymbolTable(syms)
end
"""
syms(t [, exclude_ϵ])
syms(t [, exclude_ϵ=tru])
Return all symbols from symbol table `t`.
"""
......@@ -57,7 +57,7 @@ function syms(t::SymbolTable; exclude_ϵ=true)
end
"""
ids(t [, exclude_ϵ])
ids(t [, exclude_ϵ=true])
Return all IDs from symbol table `t`.
"""
......
......@@ -31,11 +31,11 @@ S = BoolSemiring
# # Extra node
# Source
(src = (7,), isym = 0, osym = 4, weight = one(S), dest = (5,)),
(src = (7,), isym = 0, osym = 3, weight = one(S), dest = (3, )),
(src = (7,), isym = 0, osym = 3, weight = one(S), dest = (3,)),
(src = (7,), isym = 0, osym = 2, weight = one(S), dest = (1,)),
# Destination
(src = (6,), isym = 0, osym = 0, weight = one(S), dest = (7,)),
(src = (4,), isym = 0, osym = 0, weight = one(S), dest = (7, )),
(src = (4,), isym = 0, osym = 0, weight = one(S), dest = (7,)),
(src = (2,), isym = 0, osym = 0, weight = one(S), dest = (7,)),
......@@ -49,7 +49,7 @@ S = BoolSemiring
],
[(7,) => one(S)],
[(4,) => one(S), (6,) => one(S), (2,) => one(S)],
[(7,) => one(S)]
)
@test issame(U, U2)
end
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment