Skip to content
Snippets Groups Projects
composition.jl 3.8 KiB
Newer Older
Pablo Riera's avatar
Pablo Riera committed
using DataFrames
using Semirings
using TensorFSTs
using CSV
using BenchmarkTools
using OpenFst
using Glob
using IterTools
Pablo Riera's avatar
Pablo Riera committed
using NaNStatistics
Pablo Riera's avatar
Pablo Riera committed

include("../../TensorFSTs.jl/lib/OpenFstConvert.jl")

pairs = [("dense","topology"),
         ("dense","charlm"),
         ("dense","random"),
         ("topology","dense")]

function compbench(compfunc, machineA, machineB, seconds)
	b = @benchmarkable $compfunc($machineA, $machineB)	    
    # tune!(b)
    t = run(b, samples=100, seconds=seconds, evals=1)
    t.times
end

function compbench2(compfunc, machineA, machineB, seconds)
    AM, Aa, Aw = tensorFST2SparseArray(machineA)
    BM, Ba, Bw = tensorFST2SparseArray(machineB)
    min_cd = min(AM.dims[4], BM.dims[3])
    AM = AM[:,:,:,1:min_cd]
    BM = BM[:,:,1:min_cd,:]
	b = @benchmarkable $compfunc($AM, $BM, $Aa, $Aw, $Ba, $Bw)
    # tune!(b)
    t = run(b, samples=100, seconds=seconds, evals=1)
    t.times
end

function renamepath(path)
    type = splitpath(path)[2]
    name = replace(splitpath(path)[3],".fst"=>"")
    type * "-" * name
end

Pablo Riera's avatar
Pablo Riera committed
function main()
    machinezoo_path = "../../MachineZoo.jl/"
Pablo Riera's avatar
Pablo Riera committed

Pablo Riera's avatar
Pablo Riera committed
    if !isdir(joinpath(machinezoo_path, "machines", "composition"))
        mkdir(joinpath(machinezoo_path, "machines", "composition"))
    end
Pablo Riera's avatar
Pablo Riera committed

Pablo Riera's avatar
Pablo Riera committed
    tseconds = 4
    oseconds = 1
Pablo Riera's avatar
Pablo Riera committed

Pablo Riera's avatar
Pablo Riera committed
    dfs = []
    for path in glob(joinpath(machinezoo_path,"machines/*/*/fstinfo.csv"))
        df = DataFrame(CSV.File(path));
        push!(dfs, df)
    end
    df = vcat(dfs...)
    df[!,"type"] = map(x -> splitpath(x)[2] ,df[!,"file"])
Pablo Riera's avatar
Pablo Riera committed

Pablo Riera's avatar
Pablo Riera committed
    if size(df)[1] == 0
        println("No machines found")
        exit()
    end
Pablo Riera's avatar
Pablo Riera committed

Pablo Riera's avatar
Pablo Riera committed
    # filter
    # df = df[  ((df[!,"# of arcs"].<=1000) .& (df[!,"type"].=="dense")) .| (df[!,"type"].=="charlm") ,:]
    df = df[  .!((df[!,"# of arcs"].>1000) .& (df[!,"type"].=="dense")),:]

    # println(df)
    # exit()

    results = []
    num = 0
    for pair in pairs
        dfx = df[df.type.==pair[1],:]
        dfy = df[df.type.==pair[2],:]
        for (p,types) in zip(product(dfx.file, dfy.file),product(dfx[!,"arc type"], dfy[!,"arc type"]) )
            if types[1] != types[2]
                continue
            end
            ofstA = OF.read(joinpath(machinezoo_path, p[1]))
            ofstB = OF.read(joinpath(machinezoo_path, p[2]))
            println(p)
            ofstC = OF.compose(ofstA, ofstB)
            if OF.numstates(ofstC) == 0  
                println("skipping")
                continue
            end
            p1 = renamepath(p[1])
            p2 = renamepath(p[2])
            num=num+1
            fileC = joinpath( "machines", "composition", "$(num)-$(p1)-x-$(p2).fst")
            OF.write(ofstC, joinpath(machinezoo_path,fileC))

            # otimes = compbench(OF.compose, ofstA, ofstB, oseconds)
            # println(" of: ",mean(otimes))
            times = Dict()

            times["ofst"] = compbench(OF.compose, ofstA, ofstB, oseconds)

            try        
                times["tfst"] = compbench(TF.fsmcompose, TF.SparseTensorFSM(ofstA), TF.SparseTensorFSM(ofstB), oseconds) 
            catch
                println("tf failed")
                times["tfst"]  = [NaN]
            end

            stats = Dict()
            stats[Symbol("fileA")] = p[1]
            stats[Symbol("fileB")] = p[2]
            stats[Symbol("fileC")] = fileC
            for (k,v) in times
                stats[Symbol("$(k)_min")] = nanminimum(v)
                stats[Symbol("$(k)_max")] = nanmaximum(v)
                stats[Symbol("$(k)_mean")] = nanmean(v)
                stats[Symbol("$(k)_std")] = nanstd(v)
                stats[Symbol("$(k)_len")] = length(filter(!isnan, v))
            end
            push!(results, NamedTuple(stats))
Pablo Riera's avatar
Pablo Riera committed
        end
Pablo Riera's avatar
Pablo Riera committed
    end
Pablo Riera's avatar
Pablo Riera committed

Pablo Riera's avatar
Pablo Riera committed
    if length(results) != 0	
        CSV.write("composition_benchmark.csv",  DataFrame(results))
    else
        println("No results")
Pablo Riera's avatar
Pablo Riera committed
    end
end

Pablo Riera's avatar
Pablo Riera committed
main()