Skip to content
Snippets Groups Projects
Commit c31c1938 authored by Simon Devauchelle's avatar Simon Devauchelle
Browse files

MFA_FR : OOVS list of words generated from G2P models can be added to the lexicon

parent a82df3ee
No related branches found
No related tags found
No related merge requests found
...@@ -80,7 +80,7 @@ Return the french dictionary of pronunciation as provided by MFA (french_mfa v2. ...@@ -80,7 +80,7 @@ Return the french dictionary of pronunciation as provided by MFA (french_mfa v2.
generated by G2P model (french_mfa) on INA Diachronic Corpus. generated by G2P model (french_mfa) on INA Diachronic Corpus.
""" """
function MFAFRDICT(path) function MFAFRDICT(path; oovs="")
if ! isfile(path) if ! isfile(path)
mkpath(dirname(path)) mkpath(dirname(path))
dir = mktempdir() dir = mktempdir()
...@@ -96,5 +96,17 @@ function MFAFRDICT(path) ...@@ -96,5 +96,17 @@ function MFAFRDICT(path)
lexicon[word] = prononciations lexicon[word] = prononciations
end end
end end
# Add out-of-vocabulary words
if isfile(oovs)
open(oovs, "r") do o
for line in eachline(o)
word, pron... = split(line)
prononciations = get(lexicon, word, [])
push!(prononciations, pron)
lexicon[word] = prononciations
end
end
end
lexicon lexicon
end end
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment