Source code for tnetwork.readwrite.generic_io

from tnetwork.dyn_graph.encodings import code_length_SN_M,code_length_SN_E,code_length_LS,code_length_IG
import tnetwork as tn
import pandas as pd
import networkx as nx

__all__ = ["from_pandas_interaction_list", "_encoding_efficiency", "read_interactions"]

def _encoding_efficiency(interactions:pd.DataFrame, frequency):
    nb_interactions = len(interactions)
    nb_unique_edges = len(set(interactions["e"]))
    nb_time = len(set(interactions["time"]))
    nb_nodes = len(set(list(interactions["n1"])+list(interactions["n2"])))
    print("nb_interactions:",nb_interactions,"nb_unique_Edges:",nb_unique_edges,"nb_time:",nb_time,"nb_nodes:",nb_nodes)

    ls_encoding=code_length_LS(None,nb_nodes=nb_nodes, nb_unique_edges=nb_unique_edges, nb_interactions=nb_interactions, nb_time=nb_time)
    sn_m_encoding = code_length_SN_M(None,nb_nodes=nb_nodes, nb_unique_edges=nb_unique_edges, nb_interactions=nb_interactions, nb_time=nb_time)
    sn_e_encoding = code_length_SN_E(None,nb_nodes=nb_nodes, nb_unique_edges=nb_unique_edges, nb_interactions=nb_interactions, nb_time=nb_time)

    df = interactions.sort_values(["e","time"], ascending=(True,True))
    previous = (-1,-1)
    times=set()
    nb_interactions_IG = 0
    for index, row in df.iterrows():
        if not (row["e"]==previous[0] and row["time"]==previous[1]+frequency):
            nb_interactions_IG+=1
            times.add(row["time"])
        previous=(row["e"],row["time"])
    nb_time_IG = len(times)
    print("nb intervals: ",nb_interactions_IG)
    sn_ig_encoding = code_length_IG(None,nb_nodes=nb_nodes, nb_unique_edges=nb_unique_edges, nb_interactions=nb_interactions_IG, nb_time=nb_time_IG)
    results = {}
    results["ls"]=ls_encoding
    results["sn_m"]=sn_m_encoding
    results["sn_e"]=sn_e_encoding
    results["ig"]=sn_ig_encoding

    d_view = sorted([(v, k) for k, v in results.items()])
    for v, k in d_view:
        print(k,":",v)
    return results


[docs]def from_pandas_interaction_list(interactions,format,frequency=1,source="n1",target="n2",time="time"): interactions = two_columns2unidrected_edge(interactions,source="n1",target="n2") if format==tn.DynGraphSN: all_times = set(interactions[time]) all_graphs = {} for t in all_times: this_t = interactions[interactions["time"]==t] all_graphs[t]=nx.from_pandas_edgelist(this_t,source=source,target=target) return tn.DynGraphSN(all_graphs,frequency=frequency) if format==tn.DynGraphLS: #all_edges = set(interactions["e"]) #print(len(all_edges)) edges_time = {} for i,row in interactions.iterrows(): edges_time.setdefault(row["e"],[]).append(row[time]) #for e in all_edges: # edges_time[e]= list(interactions[interactions["e"]==e][time]) to_return = tn.DynGraphLS(edges=edges_time,frequency=frequency) return to_return if format==tn.DynGraphIG: #all_edges = set(interactions["e"]) edges_time = {} for i,row in interactions.iterrows(): edges_time.setdefault(row["e"],[]).append(row[time]) for e,v in edges_time.items(): edges_time[e]=tn.Intervals.from_time_list(v,frequency) #for e in all_edges: # edges_time[e]= list(interactions[interactions["e"]==e][time]) # edges_time[e]=tn.Intervals.from_time_list(edges_time) to_return = tn.DynGraphIG(edges_time) return to_return
def two_columns2unidrected_edge(interactions,source="n1",target="n2"): to_return = interactions[interactions[source] != interactions[target]] to_return["e"] = to_return.apply(lambda row: tuple(sorted([row[source], row[target]])), axis=1) return to_return
[docs]def read_interactions(file,frequency=1,format=None,time_first_column=False,sep="\t",columns=None): """ Read link stream data :param file: file to read :param frequency: frequency of data collection, i.e., smallest possible difference between successive timestamps :param format: by default, the most efficient format is selected automatically based on encoding length. :param time_first_column: If there are only 3 columns, you can use True if time is on the first column adn false if it is on the last :param sep: column separator :param columns: if there are more than 3 columns, give column names, the used one being "n1", "n2" and "time" :return: """ #theDynGraph = DynGraphSN() if columns==None: columns=["n1","n2","time"] if time_first_column: columns=["time","n1","n2"] interactions = pd.read_csv(file,names=columns,sep=sep) interactions = two_columns2unidrected_edge(interactions) if format==None: efficiency = _encoding_efficiency(interactions, frequency) best_key = min(efficiency, key=efficiency.get) if best_key=="ls": format=tn.DynGraphLS if best_key=="sn_m" or best_key=="sn_e": format=tn.DynGraphSN if best_key=="ig": format=tn.DynGraphIG print("graph will be loaded as: ",format) return tn.from_pandas_interaction_list(interactions,format,frequency=frequency,source="n1",target="n2")