Source code for tnetwork.DCD.benchmarking


def _get_return(function, x, y, return_var):
    return_var.append(function(x, elapsed_time=y))

from tnetwork.DCD.analytics.dynamic_partition import *
import sklearn
import pandas as pd
import numpy as np
import tnetwork as tn





def _compute_all_stats(all_infos, detailed=True):
    """

    :param all_infos:
    :param detailed:
    :return:
    """
    names = []
    times = []

    LAMI = []
    LARI = []

    #LNMI = []
    #LF1 = []

    SM_N = []
    # entropies = []
    SM_L = []
    SM_P = []

    modularities = []

    nmis = []
    ARIs = []

    #F1s = []

    nb_nodes = []
    nb_steps = []
    IDs = {}

    for id,an_experiment in all_infos.items():
        GT_as_sn = an_experiment["GT"]
        dyn_graph_sn=an_experiment["graph"]
        if "result" not in an_experiment:
            results={}
        else:
            results = an_experiment["result"]
        iteration = an_experiment["ID"]

        for name, (result, time) in results.items():
            for k, v in iteration.items():
                IDs.setdefault(k,[])
                IDs[k].append(v)
            names.append(name)
            times.append(time["total"])
            nb_steps.append(len(dyn_graph_sn.snapshots()))
            nb_nodes.append(len(dyn_graph_sn.snapshots(dyn_graph_sn.snapshots_timesteps()[0]).nodes))
            if detailed:
                LAMI.append(longitudinal_similarity(GT_as_sn, result))
                # def nf1go(x, y):
                #     a = NF1(y, x)
                #     score = a.get_f1()[0]
                #     return score


                #LF1.append(longitudinal_similarity(GT_as_sn,result,score=f1_score(),convert_coms_sklearn_format=False))
                #LAMI.append(longitudinal_similarity(GT_as_sn, result))
                LARI.append(longitudinal_similarity(GT_as_sn, result, score=sklearn.metrics.adjusted_rand_score))

                SM_N.append(tn.SM_N(result))

                #entropies.append(entropy(result))
                SM_L.append(tn.SM_L(result)) #####Slow
                SM_P.append(tn.SM_P(result))

                mods = quality_at_each_step(result, dyn_graph_sn)
                modularities.append(np.average(mods[0], weights=mods[1]))



                sim = similarity_at_each_step(GT_as_sn,result)
                nmis.append(np.average(sim[0],weights=sim[1]))

                rand = similarity_at_each_step(GT_as_sn,result,score=sklearn.metrics.adjusted_rand_score)
                ARIs.append(np.average(rand[0],weights=rand[1]))

                #sim = similarity_at_each_step(GT_as_sn,result,score=f1_score())
                #F1s.append(np.average(sim[0],weights=sim[1]))


    df = pd.DataFrame()
    df["algorithm"] = names
    df["running time"] = times
    if detailed:
        df["LAMI"] = LAMI
        #df["LF1"] = LF1

        #df["LNMI"] = LNMI
        df["LARI"] = LARI


        df["SM-N"] = SM_N
        #df["I_old"] = entropies
        df["SM-L"] = SM_L
        df["SM-P"] = SM_P


        df["Q"] = modularities
        df["AMI"] = nmis
        df["ARI"] = ARIs

        #df["F1"] = F1s


    df["# nodes"] = nb_nodes
    df["# steps"] = nb_steps

    for k,l in IDs.items():
        df[k]=l

    return df




def run_algos_on_graph(methods_to_test, dyn_graph_sn):#, plot=False,**kwargs):
    """
    :param methods_to_test:
    :param dyn_graph_sn:
    :param plot:
    :param waiting:
    :return:
    """
    results = {}

    methods_this_step = {name: m for name, m in methods_to_test.items()}
    for name, m in methods_this_step.items():
        results[name] = m(dyn_graph_sn, elapsed_time=True)

        # if plot!=False:
        #     p = tn.plot_longitudinal(dyn_graph_sn, results[name][0],**kwargs)#.to_DynCommunitiesIG(1))
        #     location = os.path.join(plot,name+".png")
        #     p.savefig(location, bbox_inches='tight')
        #     plt.clf()

    return results


def _subset(graph, com, length):
    subgraph = tn.DynGraphSN(list(graph.snapshots().values())[:length])
    subcomsGT = tn.DynCommunitiesSN()
    for t in subgraph.snapshots_timesteps():
        subcomsGT.set_communities(t, com.snapshot_communities(t))
    return (subgraph, subcomsGT)


[docs]def DCD_benchmark(methods_to_test, mus, nb_coms=[10], subsets=None, iterations=2, min_size=5, max_size=15,
                  operations=20, only_time_statistics=False):
    """
    Compute stats and running time for methods

    Function to reproduce benchmarks in XXX.
    Given methods and some parameters, run algorithms, compute stats, and return the results.

    Due to some occasional crashes with some methods, it is safer to call the method several times with subsets of parameters and combine the results
    later.

    For scalability tests, don't forget to set only_time_statistics=True

    :param methods_to_test: dictionary {method_name,method}
    :param mus: list of mu values (float)
    :param nb_coms: list of number of communities
    :param subsets: list of subset sizes to test
    :param iterations: number of iteration for each combination of parameters
    :param min_size: min size of communities
    :param max_size: max size of communities
    :param operations: number of events in the random graph
    :param only_time_statistics: if True, do not compute statistics such as average modularity, smoothness etc., which are very time consuming.
    :return: communities as a dictionary {ID:{ID:{"}
    """
    saved_coms = {}

    if subsets == None:
        subsets = [None]
    for mu in mus:
        print("mu: ", mu)

        for iteration in range(iterations):
            print("iteration: ", iteration)

            for nb_com in nb_coms:

                (dyn_graph, GT) = tn.generate_simple_random_graph(nb_com, min_size, max_size, operations, mu_noise=0.01, mu=mu)

                dyn_graph_sn = dyn_graph.to_DynGraphSN(slices=1)
                GT_as_sn = GT.to_DynCommunitiesSN(slices=1)

                for length in subsets:
                    print("subset length:",length)
                    ID = (mu, iteration, nb_com, length)
                    saved_coms[ID] = {}
                    saved_coms[ID]["ID"] = {"mu": ID[0], "iteration": ID[1], "#coms": nb_com}
                    if length == None:
                        subgraph = dyn_graph_sn
                        subcomsGT = GT_as_sn
                    else:
                        subgraph, subcomsGT = _subset(dyn_graph_sn, GT_as_sn, length)

                    saved_coms[ID]["graph"] = subgraph
                    saved_coms[ID]["GT"] = subcomsGT

                    try:
                        result = run_algos_on_graph(methods_to_test, subgraph)
                        saved_coms[ID]["result"] = result
                    except:
                        print("error computing algos")
    print("Compute stats")
    stats = _compute_all_stats(saved_coms, detailed= not only_time_statistics)
    return stats



from os import listdir
from os.path import isfile, join
def _load_all_files(keyword, dir_or):
    """
    Function to load all files with a given keyword and concatenate their csvs"""
    mypath = dir_or
    onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]
    all_pds = []
    for fil in onlyfiles:
        if keyword in fil:
            a_pd = pd.read_csv(mypath+"/"+fil,index_col=0)
            a_pd["file"]=fil
            all_pds.append(a_pd)
            print(len(a_pd))
    df_stats = pd.concat(all_pds)
    return df_stats