Skip to content
Snippets Groups Projects
Commit 8604acfc authored by Pablo Riera's avatar Pablo Riera
Browse files

sd, compose benchmark

parent c038cdbc
No related branches found
No related tags found
No related merge requests found
......@@ -3,6 +3,7 @@ BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Debugger = "31a5f54b-26ea-5ae9-a837-f05ce5417438"
Glob = "c27321d9-0574-5035-807b-f59d2c89b15c"
IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
LogExpFunctions = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
......
......@@ -40,84 +40,92 @@ function renamepath(path)
type * "-" * name
end
machinezoo_path = "../../MachineZoo.jl/"
function main()
machinezoo_path = "../../MachineZoo.jl/"
if !isdir(joinpath(machinezoo_path, "machines", "composition"))
mkdir(joinpath(machinezoo_path, "machines", "composition"))
end
tseconds = 4
oseconds = 1
dfs = []
for path in glob(joinpath(machinezoo_path,"machines/*/*/fstinfo.csv"))
df = DataFrame(CSV.File(path));
push!(dfs, df)
end
df = vcat(dfs...)
df[!,"type"] = map(x -> splitpath(x)[2] ,df[!,"file"])
if size(df)[1] == 0
println("No machines found")
exit()
end
# filter
df = df[ ((df[!,"# of arcs"].<=1000) .& (df[!,"type"].=="dense")) .| (df[!,"type"].=="charlm") ,:]
# println(df)
# exit()
if !isdir(joinpath(machinezoo_path, "machines", "composition"))
mkdir(joinpath(machinezoo_path, "machines", "composition"))
end
results = []
for pair in pairs
dfx = df[df.type.==pair[1],:]
dfy = df[df.type.==pair[2],:]
for (p,types) in zip(product(dfx.file, dfy.file),product(dfx[!,"arc type"], dfy[!,"arc type"]) )
println(p)
if types[1] != types[2]
continue
end
ofstA = OF.read(joinpath(machinezoo_path, p[1]))
ofstB = OF.read(joinpath(machinezoo_path, p[2]))
println(p)
ofstC = OF.compose(ofstA, ofstB)
if OF.numstates(ofstC) == 0
println("skipping")
continue
end
p1 = renamepath(p[1])
p2 = renamepath(p[2])
OF.write(ofstC, joinpath(machinezoo_path, "machines", "composition", "$(p1)-x-$(p2).fst"))
tseconds = 4
oseconds = 1
# otimes = compbench(OF.compose, ofstA, ofstB, oseconds)
# println(" of: ",mean(otimes))
times = Dict()
dfs = []
for path in glob(joinpath(machinezoo_path,"machines/*/*/fstinfo.csv"))
df = DataFrame(CSV.File(path));
push!(dfs, df)
end
df = vcat(dfs...)
df[!,"type"] = map(x -> splitpath(x)[2] ,df[!,"file"])
times["ofst"] = compbench(OF.compose, ofstA, ofstB, oseconds)
if size(df)[1] == 0
println("No machines found")
exit()
end
try
times["tfst"] = compbench(TF.fsmcompose, TF.SparseTensorFSM(ofstA), TF.SparseTensorFSM(ofstB), oseconds)
catch
println("tf failed")
times["tfst"] = [NaN]
# filter
# df = df[ ((df[!,"# of arcs"].<=1000) .& (df[!,"type"].=="dense")) .| (df[!,"type"].=="charlm") ,:]
df = df[ .!((df[!,"# of arcs"].>1000) .& (df[!,"type"].=="dense")),:]
# println(df)
# exit()
results = []
num = 0
for pair in pairs
dfx = df[df.type.==pair[1],:]
dfy = df[df.type.==pair[2],:]
for (p,types) in zip(product(dfx.file, dfy.file),product(dfx[!,"arc type"], dfy[!,"arc type"]) )
if types[1] != types[2]
continue
end
ofstA = OF.read(joinpath(machinezoo_path, p[1]))
ofstB = OF.read(joinpath(machinezoo_path, p[2]))
println(p)
ofstC = OF.compose(ofstA, ofstB)
if OF.numstates(ofstC) == 0
println("skipping")
continue
end
p1 = renamepath(p[1])
p2 = renamepath(p[2])
num=num+1
fileC = joinpath( "machines", "composition", "$(num)-$(p1)-x-$(p2).fst")
OF.write(ofstC, joinpath(machinezoo_path,fileC))
# otimes = compbench(OF.compose, ofstA, ofstB, oseconds)
# println(" of: ",mean(otimes))
times = Dict()
times["ofst"] = compbench(OF.compose, ofstA, ofstB, oseconds)
try
times["tfst"] = compbench(TF.fsmcompose, TF.SparseTensorFSM(ofstA), TF.SparseTensorFSM(ofstB), oseconds)
catch
println("tf failed")
times["tfst"] = [NaN]
end
stats = Dict()
stats[Symbol("fileA")] = p[1]
stats[Symbol("fileB")] = p[2]
stats[Symbol("fileC")] = fileC
for (k,v) in times
stats[Symbol("$(k)_min")] = nanminimum(v)
stats[Symbol("$(k)_max")] = nanmaximum(v)
stats[Symbol("$(k)_mean")] = nanmean(v)
stats[Symbol("$(k)_std")] = nanstd(v)
stats[Symbol("$(k)_len")] = length(filter(!isnan, v))
end
push!(results, NamedTuple(stats))
end
end
stats = Dict()
stats[Symbol("fileA")] = p[1]
stats[Symbol("fileB")] = p[2]
for (k,v) in times
stats[Symbol("$(k)_min")] = nanminimum(v)
stats[Symbol("$(k)_max")] = nanmaximum(v)
stats[Symbol("$(k)_mean")] = nanmean(v)
stats[Symbol("$(k)_std")] = nanstd(v)
stats[Symbol("$(k)_len")] = length(filter(!isnan, v))
end
push!(results, NamedTuple(stats))
if length(results) != 0
CSV.write("composition_benchmark.csv", DataFrame(results))
else
println("No results")
end
end
if length(results) != 0
CSV.write("composition_benchmark.csv", DataFrame(results))
else
println("No results")
end
\ No newline at end of file
main()
\ No newline at end of file
JULIA_ENV=./
export LD_LIBRARY_PATH=../../OpenFst.jl/src/:../../OpenFst.jl/openfst-1.8.3/src/lib
# julia --project=$JULIA_ENV shortest_distance.jl
julia --project=$JULIA_ENV composition.jl
\ No newline at end of file
......@@ -8,6 +8,8 @@ using Glob
using SparseArrays
using CUDA
using NaNStatistics
using Debugger
include("../../TensorFSTs.jl/lib/OpenFstConvert.jl")
include("utils.jl")
......@@ -30,14 +32,24 @@ for path in glob(joinpath(machinezoo_path,"machines/*/*/fstinfo.csv"))
end
df = vcat(dfs...)
if size(df)[1] == 0
println("No machines found")
exit()
end
results = []
for r in eachrow(df)
# check if file exists
if !isfile(joinpath(machinezoo_path, r["file"]))
continue
end
ofst = OF.read(joinpath(machinezoo_path, r["file"]))
if OF.numstates(ofst) == 0
continue
end
if r["# of arcs"] > 100000
if r["# of arcs"] > 1000
continue
end
......@@ -46,49 +58,49 @@ for r in eachrow(df)
end
println(r["file"])
tfst = TF.TensorFST(ofst)
A_cpu, A_gpu = machine2matrices(tfst)
tfst = TF.SparseTensorFSM(ofst)
times = Dict()
#check results
sd0 = OF.shortestdistance(ofst)
sd1 = TF.shortestdistance(tfst)
sd2 = cpu_shortest_distance(A_cpu)
sd3 = cu_shortest_distance(A_gpu)
sd0 = OF.shortestdistance(ofst).+1
sd1 = TF.fsmshortestdistance(tfst)
# sd2 = cpu_shortest_distance(A_cpu)
# sd3 = cu_shortest_distance(A_gpu)
times["ofst"] = sdbench(OF.shortestdistance,ofst, tseconds)
times["ofst"] = sdbench(OF.shortestdistance, ofst, tseconds)
if isapprox(sd0,val.(sd1[:]))
times["tfst"] = sdbench(TF.shortestdistance,tfst, tseconds)
times["tfst"] = sdbench(TF.fsmshortestdistance, tfst, tseconds)
else
times["tfst"] = [NaN]
end
if isapprox(sd0, val.(sd2[:]))
times["cpufst"] = sdbench(cpu_shortest_distance, A_cpu, tseconds)
else
times["cpufst"] = [NaN]
end
if isapprox(sd0, val.(Array(sd3)))
times["gpufst"] = sdbench(cu_shortest_distance, A_gpu, tseconds)
else
times["gpufst"] = [NaN]
end
# if isapprox(sd0, val.(sd2[:]))
# times["cpufst"] = sdbench(cpu_shortest_distance, A_cpu, tseconds)
# else
# times["cpufst"] = [NaN]
# end
# if isapprox(sd0, val.(Array(sd3)))
# times["gpufst"] = sdbench(cu_shortest_distance, A_gpu, tseconds)
# else
# times["gpufst"] = [NaN]
# end
if r["cyclic"]=="n"
sd4 = cpu_acyclic_shortest_distance(A_cpu)
sd5 = cu_acyclic_shortest_distance(A_gpu)
if isapprox(sd0, val.(sd4[:]))
times["cpufst_acyclic"] = sdbench(cpu_acyclic_shortest_distance, A_cpu, tseconds)
else
times["cpufst_acyclic"] = [NaN]
end
if isapprox(sd0, val.(Array(sd5)))
times["gpufst_acyclic"] = sdbench(cu_acyclic_shortest_distance, A_gpu, tseconds)
else
times["gpufst_acyclic"] = [NaN]
end
end
# if r["cyclic"]=="n"
# sd4 = cpu_acyclic_shortest_distance(A_cpu)
# sd5 = cu_acyclic_shortest_distance(A_gpu)
# if isapprox(sd0, val.(sd4[:]))
# times["cpufst_acyclic"] = sdbench(cpu_acyclic_shortest_distance, A_cpu, tseconds)
# else
# times["cpufst_acyclic"] = [NaN]
# end
# if isapprox(sd0, val.(Array(sd5)))
# times["gpufst_acyclic"] = sdbench(cu_acyclic_shortest_distance, A_gpu, tseconds)
# else
# times["gpufst_acyclic"] = [NaN]
# end
# end
stats = Dict()
stats[Symbol("file")] = r["file"]
......@@ -101,6 +113,9 @@ for r in eachrow(df)
end
push!(results, NamedTuple(stats))
end
joined = innerjoin(df, DataFrame(results), on = :file)
CSV.write("shortest_distance_benchmark.csv", joined)
if length(results) != 0
joined = innerjoin(df, DataFrame(results), on = :file)
CSV.write("shortest_distance_benchmark.csv", joined)
else
println("No results")
end
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment