Newer
Older
using DataFrames
using Semirings
using TensorFSTs
using CSV
using BenchmarkTools
using OpenFst
using Glob
using IterTools
include("../../TensorFSTs.jl/lib/OpenFstConvert.jl")
pairs = [("dense","topology"),
("dense","charlm"),
("dense","random"),
("topology","dense")]
function compbench(compfunc, machineA, machineB, seconds)
b = @benchmarkable $compfunc($machineA, $machineB)
# tune!(b)
t = run(b, samples=100, seconds=seconds, evals=1)
t.times
end
function compbench2(compfunc, machineA, machineB, seconds)
AM, Aa, Aw = tensorFST2SparseArray(machineA)
BM, Ba, Bw = tensorFST2SparseArray(machineB)
min_cd = min(AM.dims[4], BM.dims[3])
AM = AM[:,:,:,1:min_cd]
BM = BM[:,:,1:min_cd,:]
b = @benchmarkable $compfunc($AM, $BM, $Aa, $Aw, $Ba, $Bw)
# tune!(b)
t = run(b, samples=100, seconds=seconds, evals=1)
t.times
end
function renamepath(path)
type = splitpath(path)[2]
name = replace(splitpath(path)[3],".fst"=>"")
type * "-" * name
end
function main()
machinezoo_path = "../../MachineZoo.jl/"
if !isdir(joinpath(machinezoo_path, "machines", "composition"))
mkdir(joinpath(machinezoo_path, "machines", "composition"))
end
dfs = []
for path in glob(joinpath(machinezoo_path,"machines/*/*/fstinfo.csv"))
df = DataFrame(CSV.File(path));
push!(dfs, df)
end
df = vcat(dfs...)
df[!,"type"] = map(x -> splitpath(x)[2] ,df[!,"file"])
if size(df)[1] == 0
println("No machines found")
exit()
end
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# filter
# df = df[ ((df[!,"# of arcs"].<=1000) .& (df[!,"type"].=="dense")) .| (df[!,"type"].=="charlm") ,:]
df = df[ .!((df[!,"# of arcs"].>1000) .& (df[!,"type"].=="dense")),:]
# println(df)
# exit()
results = []
num = 0
for pair in pairs
dfx = df[df.type.==pair[1],:]
dfy = df[df.type.==pair[2],:]
for (p,types) in zip(product(dfx.file, dfy.file),product(dfx[!,"arc type"], dfy[!,"arc type"]) )
if types[1] != types[2]
continue
end
ofstA = OF.read(joinpath(machinezoo_path, p[1]))
ofstB = OF.read(joinpath(machinezoo_path, p[2]))
println(p)
ofstC = OF.compose(ofstA, ofstB)
if OF.numstates(ofstC) == 0
println("skipping")
continue
end
p1 = renamepath(p[1])
p2 = renamepath(p[2])
num=num+1
fileC = joinpath( "machines", "composition", "$(num)-$(p1)-x-$(p2).fst")
OF.write(ofstC, joinpath(machinezoo_path,fileC))
# otimes = compbench(OF.compose, ofstA, ofstB, oseconds)
# println(" of: ",mean(otimes))
times = Dict()
times["ofst"] = compbench(OF.compose, ofstA, ofstB, oseconds)
try
times["tfst"] = compbench(TF.fsmcompose, TF.SparseTensorFSM(ofstA), TF.SparseTensorFSM(ofstB), oseconds)
catch
println("tf failed")
times["tfst"] = [NaN]
end
stats = Dict()
stats[Symbol("fileA")] = p[1]
stats[Symbol("fileB")] = p[2]
stats[Symbol("fileC")] = fileC
for (k,v) in times
stats[Symbol("$(k)_min")] = nanminimum(v)
stats[Symbol("$(k)_max")] = nanmaximum(v)
stats[Symbol("$(k)_mean")] = nanmean(v)
stats[Symbol("$(k)_std")] = nanstd(v)
stats[Symbol("$(k)_len")] = length(filter(!isnan, v))
end
push!(results, NamedTuple(stats))
if length(results) != 0
CSV.write("composition_benchmark.csv", DataFrame(results))
else
println("No results")