Skip to content
Snippets Groups Projects
shortest_distance.jl 2.45 KiB
Newer Older
Pablo Riera's avatar
Pablo Riera committed
using DataFrames
using Semirings
using TensorFSTs
using CSV
using BenchmarkTools
using OpenFst
using Glob
using SparseArrays
using CUDA
using NaNStatistics

include("../../TensorFSTs.jl/lib/OpenFstConvert.jl")
include("utils.jl")

function sdbench(sdfunc, machine, seconds)
	b = @benchmarkable $sdfunc($machine)	    
    # tune!(b)
    t = run(b, samples=100, seconds=seconds, evals=1)
    t.times
end

machinezoo_path = "../../MachineZoo.jl/"
tseconds = 4
oseconds = 1

dfs = []
for path in glob(joinpath(machinezoo_path,"machines/*/*/fstinfo.csv"))
    df = DataFrame(CSV.File(path));
    push!(dfs, df)
end
df = vcat(dfs...)

results = []
for r in eachrow(df)
	ofst = OF.read(joinpath(machinezoo_path, r["file"]))

	if OF.numstates(ofst) == 0
		continue
	end
	if r["# of arcs"] > 100000
		continue
	end

	if r["cyclic"] == "y" && r["arc type"] == "log"
		continue
	end

    println(r["file"])
	tfst = TF.TensorFST(ofst)	
	A_cpu, A_gpu = machine2matrices(tfst)
	times = Dict()

	#check results

	sd0 = OF.shortestdistance(ofst)
	sd1 = TF.shortestdistance(tfst)
	sd2 = cpu_shortest_distance(A_cpu)
	sd3 = cu_shortest_distance(A_gpu)

	times["ofst"] = sdbench(OF.shortestdistance,ofst, tseconds)

	if isapprox(sd0,val.(sd1[:]))
		times["tfst"] = sdbench(TF.shortestdistance,tfst, tseconds)
	else
		times["tfst"] = [NaN]
	end
	if isapprox(sd0, val.(sd2[:]))
		times["cpufst"] = sdbench(cpu_shortest_distance, A_cpu, tseconds)
	else
		times["cpufst"] = [NaN]
	end
	if isapprox(sd0, val.(Array(sd3)))
		times["gpufst"] = sdbench(cu_shortest_distance, A_gpu, tseconds)
	else
		times["gpufst"] = [NaN]
	end
		
	if r["cyclic"]=="n"
		sd4 = cpu_acyclic_shortest_distance(A_cpu)
		sd5 = cu_acyclic_shortest_distance(A_gpu)
		if isapprox(sd0, val.(sd4[:]))
			times["cpufst_acyclic"] = sdbench(cpu_acyclic_shortest_distance, A_cpu, tseconds)
		else
			times["cpufst_acyclic"] = [NaN]
		end
		if isapprox(sd0, val.(Array(sd5)))
			times["gpufst_acyclic"] = sdbench(cu_acyclic_shortest_distance, A_gpu, tseconds)
		else
			times["gpufst_acyclic"] = [NaN]
		end
	end

	stats = Dict()
	stats[Symbol("file")] = r["file"]
	for (k,v) in times
		stats[Symbol("$(k)_min")] = nanminimum(v)
		stats[Symbol("$(k)_max")] = nanmaximum(v)
		stats[Symbol("$(k)_mean")] = nanmean(v)
		stats[Symbol("$(k)_std")] = nanstd(v)
		stats[Symbol("$(k)_len")] = length(filter(!isnan, v))
	end
	push!(results, NamedTuple(stats))
end

joined = innerjoin(df, DataFrame(results), on = :file)
CSV.write("shortest_distance_benchmark.csv", joined)