sd, compose benchmark

8604acfc · Pablo Riera · c038cdbc · 8604acfc · 8604acfc · 8604acfc
Commit 8604acfc authored 11 months ago by Pablo Riera
--- a/benchmark/Project.toml
+++ b/benchmark/Project.toml
@@ -3,6 +3,7 @@ BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
 CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
+Debugger = "31a5f54b-26ea-5ae9-a837-f05ce5417438"
 Glob = "c27321d9-0574-5035-807b-f59d2c89b15c"
 IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
 LogExpFunctions = "2ab3a3ac-af41-5b50-aa03-7779005ae688"

--- a/benchmark/composition.jl
+++ b/benchmark/composition.jl
@@ -40,84 +40,92 @@ function renamepath(path)
    type * "-" * name
 end

-machinezoo_path = "../../MachineZoo.jl/"
+function main()
+    machinezoo_path = "../../MachineZoo.jl/"

-if !isdir(joinpath(machinezoo_path, "machines", "composition"))
-    mkdir(joinpath(machinezoo_path, "machines", "composition"))
-end
-
-tseconds = 4
-oseconds = 1
-
-dfs = []
-for path in glob(joinpath(machinezoo_path,"machines/*/*/fstinfo.csv"))
-    df = DataFrame(CSV.File(path));
-    push!(dfs, df)
-end
-df = vcat(dfs...)
-df[!,"type"] = map(x -> splitpath(x)[2] ,df[!,"file"])
-
-if size(df)[1] == 0
-	println("No machines found")
-	exit()
-end
-
-# filter
-df = df[  ((df[!,"# of arcs"].<=1000) .& (df[!,"type"].=="dense")) .| (df[!,"type"].=="charlm") ,:]
-
-# println(df)
-# exit()
+    if !isdir(joinpath(machinezoo_path, "machines", "composition"))
+        mkdir(joinpath(machinezoo_path, "machines", "composition"))
+    end

-results = []
-for pair in pairs
-    dfx = df[df.type.==pair[1],:]
-    dfy = df[df.type.==pair[2],:]
-    for (p,types) in zip(product(dfx.file, dfy.file),product(dfx[!,"arc type"], dfy[!,"arc type"]) )
-        println(p)
-        if types[1] != types[2]
-            continue
-        end
-        ofstA = OF.read(joinpath(machinezoo_path, p[1]))
-        ofstB = OF.read(joinpath(machinezoo_path, p[2]))
-        println(p)
-        ofstC = OF.compose(ofstA, ofstB)
-        if OF.numstates(ofstC) == 0  
-            println("skipping")
-            continue
-        end
-        p1 = renamepath(p[1])
-        p2 = renamepath(p[2])
-        OF.write(ofstC, joinpath(machinezoo_path, "machines", "composition", "$(p1)-x-$(p2).fst"))
+    tseconds = 4
+    oseconds = 1

-        # otimes = compbench(OF.compose, ofstA, ofstB, oseconds)
-        # println(" of: ",mean(otimes))
-        times = Dict()
+    dfs = []
+    for path in glob(joinpath(machinezoo_path,"machines/*/*/fstinfo.csv"))
+        df = DataFrame(CSV.File(path));
+        push!(dfs, df)
+    end
+    df = vcat(dfs...)
+    df[!,"type"] = map(x -> splitpath(x)[2] ,df[!,"file"])

-        times["ofst"] = compbench(OF.compose, ofstA, ofstB, oseconds)
+    if size(df)[1] == 0
+        println("No machines found")
+        exit()
+    end

-        try        
-            times["tfst"] = compbench(TF.fsmcompose, TF.SparseTensorFSM(ofstA), TF.SparseTensorFSM(ofstB), oseconds) 
-        catch
-            println("tf failed")
-            times["tfst"]  = [NaN]
+    # filter
+    # df = df[  ((df[!,"# of arcs"].<=1000) .& (df[!,"type"].=="dense")) .| (df[!,"type"].=="charlm") ,:]
+    df = df[  .!((df[!,"# of arcs"].>1000) .& (df[!,"type"].=="dense")),:]
+
+    # println(df)
+    # exit()
+
+    results = []
+    num = 0
+    for pair in pairs
+        dfx = df[df.type.==pair[1],:]
+        dfy = df[df.type.==pair[2],:]
+        for (p,types) in zip(product(dfx.file, dfy.file),product(dfx[!,"arc type"], dfy[!,"arc type"]) )
+            if types[1] != types[2]
+                continue
+            end
+            ofstA = OF.read(joinpath(machinezoo_path, p[1]))
+            ofstB = OF.read(joinpath(machinezoo_path, p[2]))
+            println(p)
+            ofstC = OF.compose(ofstA, ofstB)
+            if OF.numstates(ofstC) == 0  
+                println("skipping")
+                continue
+            end
+            p1 = renamepath(p[1])
+            p2 = renamepath(p[2])
+            num=num+1
+            fileC = joinpath( "machines", "composition", "$(num)-$(p1)-x-$(p2).fst")
+            OF.write(ofstC, joinpath(machinezoo_path,fileC))
+
+            # otimes = compbench(OF.compose, ofstA, ofstB, oseconds)
+            # println(" of: ",mean(otimes))
+            times = Dict()
+
+            times["ofst"] = compbench(OF.compose, ofstA, ofstB, oseconds)
+
+            try        
+                times["tfst"] = compbench(TF.fsmcompose, TF.SparseTensorFSM(ofstA), TF.SparseTensorFSM(ofstB), oseconds) 
+            catch
+                println("tf failed")
+                times["tfst"]  = [NaN]
+            end
+
+            stats = Dict()
+            stats[Symbol("fileA")] = p[1]
+            stats[Symbol("fileB")] = p[2]
+            stats[Symbol("fileC")] = fileC
+            for (k,v) in times
+                stats[Symbol("$(k)_min")] = nanminimum(v)
+                stats[Symbol("$(k)_max")] = nanmaximum(v)
+                stats[Symbol("$(k)_mean")] = nanmean(v)
+                stats[Symbol("$(k)_std")] = nanstd(v)
+                stats[Symbol("$(k)_len")] = length(filter(!isnan, v))
+            end
+            push!(results, NamedTuple(stats))
        end
+    end

-        stats = Dict()
-        stats[Symbol("fileA")] = p[1]
-        stats[Symbol("fileB")] = p[2]
-        for (k,v) in times
-            stats[Symbol("$(k)_min")] = nanminimum(v)
-            stats[Symbol("$(k)_max")] = nanmaximum(v)
-            stats[Symbol("$(k)_mean")] = nanmean(v)
-            stats[Symbol("$(k)_std")] = nanstd(v)
-            stats[Symbol("$(k)_len")] = length(filter(!isnan, v))
-        end
-        push!(results, NamedTuple(stats))
+    if length(results) != 0	
+        CSV.write("composition_benchmark.csv",  DataFrame(results))
+    else
+        println("No results")
    end
 end

-if length(results) != 0	
-	CSV.write("composition_benchmark.csv",  DataFrame(results))
-else
-	println("No results")
-end
\ No newline at end of file
+main()
\ No newline at end of file
--- a/benchmark/run_benchmarks.sh
+++ b/benchmark/run_benchmarks.sh
 JULIA_ENV=./
 export LD_LIBRARY_PATH=../../OpenFst.jl/src/:../../OpenFst.jl/openfst-1.8.3/src/lib
+
 # julia --project=$JULIA_ENV shortest_distance.jl
 julia --project=$JULIA_ENV composition.jl
\ No newline at end of file
--- a/benchmark/shortest_distance.jl
+++ b/benchmark/shortest_distance.jl
@@ -8,6 +8,8 @@ using Glob
 using SparseArrays
 using CUDA
 using NaNStatistics
+using Debugger
+

 include("../../TensorFSTs.jl/lib/OpenFstConvert.jl")
 include("utils.jl")
@@ -30,14 +32,24 @@ for path in glob(joinpath(machinezoo_path,"machines/*/*/fstinfo.csv"))
 end
 df = vcat(dfs...)

+if size(df)[1] == 0
+	println("No machines found")
+	exit()
+end
+
 results = []
 for r in eachrow(df)
+	# check if file exists
+	if !isfile(joinpath(machinezoo_path, r["file"]))
+		continue
+	end
+
 	ofst = OF.read(joinpath(machinezoo_path, r["file"]))

 	if OF.numstates(ofst) == 0
 		continue
 	end
-	if r["# of arcs"] > 100000
+	if r["# of arcs"] > 1000
 		continue
 	end

@@ -46,49 +58,49 @@ for r in eachrow(df)
 	end

    println(r["file"])
-	tfst = TF.TensorFST(ofst)	
-	A_cpu, A_gpu = machine2matrices(tfst)
+	tfst = TF.SparseTensorFSM(ofst)	
 	times = Dict()

 	#check results

-	sd0 = OF.shortestdistance(ofst)
-	sd1 = TF.shortestdistance(tfst)
-	sd2 = cpu_shortest_distance(A_cpu)
-	sd3 = cu_shortest_distance(A_gpu)
+	sd0 = OF.shortestdistance(ofst).+1
+	sd1 = TF.fsmshortestdistance(tfst)
+	# sd2 = cpu_shortest_distance(A_cpu)
+	# sd3 = cu_shortest_distance(A_gpu)
+

-	times["ofst"] = sdbench(OF.shortestdistance,ofst, tseconds)
+	times["ofst"] = sdbench(OF.shortestdistance, ofst, tseconds)

 	if isapprox(sd0,val.(sd1[:]))
-		times["tfst"] = sdbench(TF.shortestdistance,tfst, tseconds)
+		times["tfst"] = sdbench(TF.fsmshortestdistance, tfst, tseconds)
 	else
 		times["tfst"] = [NaN]
 	end
-	if isapprox(sd0, val.(sd2[:]))
-		times["cpufst"] = sdbench(cpu_shortest_distance, A_cpu, tseconds)
-	else
-		times["cpufst"] = [NaN]
-	end
-	if isapprox(sd0, val.(Array(sd3)))
-		times["gpufst"] = sdbench(cu_shortest_distance, A_gpu, tseconds)
-	else
-		times["gpufst"] = [NaN]
-	end
+	# if isapprox(sd0, val.(sd2[:]))
+	# 	times["cpufst"] = sdbench(cpu_shortest_distance, A_cpu, tseconds)
+	# else
+	# 	times["cpufst"] = [NaN]
+	# end
+	# if isapprox(sd0, val.(Array(sd3)))
+	# 	times["gpufst"] = sdbench(cu_shortest_distance, A_gpu, tseconds)
+	# else
+	# 	times["gpufst"] = [NaN]
+	# end
 		
-	if r["cyclic"]=="n"
-		sd4 = cpu_acyclic_shortest_distance(A_cpu)
-		sd5 = cu_acyclic_shortest_distance(A_gpu)
-		if isapprox(sd0, val.(sd4[:]))
-			times["cpufst_acyclic"] = sdbench(cpu_acyclic_shortest_distance, A_cpu, tseconds)
-		else
-			times["cpufst_acyclic"] = [NaN]
-		end
-		if isapprox(sd0, val.(Array(sd5)))
-			times["gpufst_acyclic"] = sdbench(cu_acyclic_shortest_distance, A_gpu, tseconds)
-		else
-			times["gpufst_acyclic"] = [NaN]
-		end
-	end
+	# if r["cyclic"]=="n"
+	# 	sd4 = cpu_acyclic_shortest_distance(A_cpu)
+	# 	sd5 = cu_acyclic_shortest_distance(A_gpu)
+	# 	if isapprox(sd0, val.(sd4[:]))
+	# 		times["cpufst_acyclic"] = sdbench(cpu_acyclic_shortest_distance, A_cpu, tseconds)
+	# 	else
+	# 		times["cpufst_acyclic"] = [NaN]
+	# 	end
+	# 	if isapprox(sd0, val.(Array(sd5)))
+	# 		times["gpufst_acyclic"] = sdbench(cu_acyclic_shortest_distance, A_gpu, tseconds)
+	# 	else
+	# 		times["gpufst_acyclic"] = [NaN]
+	# 	end
+	# end

 	stats = Dict()
 	stats[Symbol("file")] = r["file"]
@@ -101,6 +113,9 @@ for r in eachrow(df)
 	end
 	push!(results, NamedTuple(stats))
 end
-
-joined = innerjoin(df, DataFrame(results), on = :file)
-CSV.write("shortest_distance_benchmark.csv", joined)
+if length(results) != 0	
+	joined = innerjoin(df, DataFrame(results), on = :file)
+	CSV.write("shortest_distance_benchmark.csv", joined)
+else
+	println("No results")
+end