From 36c4a380493cf29bc1b83635e86383a97cae7dd9 Mon Sep 17 00:00:00 2001 From: KatolaZ Date: Thu, 23 Apr 2015 15:29:46 +0100 Subject: Version 0.1 --- README.md | 41 +++- python/README.md | 83 +++++++ python/multired.py | 487 ++++++++++++++++++++++++++++++++++++++ python/test/simple_test.py | 22 ++ python/test/simple_test_approx.py | 16 ++ python/test/test.py | 34 +++ python/test/test_approx.py | 34 +++ 7 files changed, 711 insertions(+), 6 deletions(-) create mode 100644 python/README.md create mode 100644 python/multired.py create mode 100644 python/test/simple_test.py create mode 100644 python/test/simple_test_approx.py create mode 100644 python/test/test.py create mode 100644 python/test/test_approx.py diff --git a/README.md b/README.md index 8c87722..6c22793 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,37 @@ -# multired -Algorithm for structural reduction of multi-layer networks +#multired +* +* Copyright (C) 2015 Vincenzo (Enzo) Nicosia +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* long with this program. If not, see . +* + +---------------------- +This is multired-0.1. + + +This is a Python implmementation of the algorithm for structural +reduction of multi-layer networks based on the Von Neumann and on the +Quantum Jensen-Shannon divergence of graphs, as explained in: + + M. De Domenico. V. Nicosia, A. Arenas, V. Latora + "Structural reducibility of multilayer networks", + Nat. Commun. 6, 6864 (2015) doi:10.1038/ncomms7864 + +If you happen to find any use of this code please do not forget to +cite that paper ;-) + +My plan is to provide also a C version in the future, but I cannot +guarantee that I will do so anytime soon. -Please cite: -M. De Domenico. V. Nicosia, A. Arenas, V. Latora -"Structural reducibility of multilayer networks", -Nat. Commun. 7864 (2015) doi:10.1038/ncomms7864 diff --git a/python/README.md b/python/README.md new file mode 100644 index 0000000..b63b36c --- /dev/null +++ b/python/README.md @@ -0,0 +1,83 @@ +# multired +/** +* +* Copyright (C) 2015 Vincenzo (Enzo) Nicosia +* +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* long with this program. If not, see . +* +*/ + +This is multired-0.1. + + +This is a Python implmementation of the algorithm for structural +reduction of multi-layer networks based on the Von Neumann and on the +Quantum Jensen-Shannon divergence of graphs, as explained in: + + M. De Domenico. V. Nicosia, A. Arenas, V. Latora + "Structural reducibility of multilayer networks", + Nat. Commun. 6, 6864 (2015) doi:10.1038/ncomms7864 + +If you happen to find any use of this code please do not forget to +cite that paper ;-) + + +-------------------- + INFO +-------------------- + +The module "multired.py" provides the class "multiplex_red", which +implements the algorithm to reduce a multilayer network described in +the paper cited above. + +In order to use it, you just need to + + import multired as mr + +in your python script and create a multiplex_red object. Please make +sure that "multired.py" is in PYTHONPATH. The constructor requires as +its first argument the path of a file which in turn contains a list of +files (one for each line) where the graph of each layer is to be +found. + +The class provides one set of methods which perform the exact +evaluation of the Von Neumann entropy, and another set of methods +(those whose name end with the suffix "_approx") which rely on a +polynomial approximation of the Von Neumann entropy. By default the +approximation is based on a 10th order polynomial fit of x log(x) in +[0,1], but the order of the polynomial can be set through the +parameter "fit_degree" of the constructor. + +Several sample scripts can be found in the "test/" directory. + +-------------------- + DEPENDENCIES +-------------------- + +The only strict dependencies are a recent version of Python, Numpy and +SciPy. The methods "draw_dendrogram" and "draw_dendrogram_approx" will +work only if matplotlib is installed. + +The module has been tested on a Debian GNU/Linux system, using: + + - Python 2.7.8, + - SciPy 0.13.3 + - Numpy 1.8.2 + - matplotlib 1.3.1 + +but it will almost surely work on other platforms and/or with other +versions of those packages. If you would like to report a working +configuration, just email me (the address is at the beginning of this +file). diff --git a/python/multired.py b/python/multired.py new file mode 100644 index 0000000..7482877 --- /dev/null +++ b/python/multired.py @@ -0,0 +1,487 @@ +import sys +import math +import numpy as np +from scipy.sparse import csr_matrix, eye +from scipy.linalg import eigh, eig +import copy +from scipy.cluster.hierarchy import linkage, dendrogram + +has_matplotlib = False + +try: + import matplotlib + has_matplotlib = True + +except ImportError: + has_matplotlib = False + + +class XLogx_fit: + def __init__(self, degree, npoints= 100, xmax=1): + if xmax > 1: + xmax = 1 + self.degree = degree + x = np.linspace(0, xmax, npoints) + y = [i * math.log(i) for i in x[1:]] + y.insert(0, 0) + self.fit = np.polyfit(x, y, degree) + + def __getitem__ (self, index): + if index <= self.degree: + return self.fit[index] + else: + print "Error!!! Index %d is larger than the degree of the fitting polynomial (%d)" \ + % (index, degree) + sys.exit(-1) + + +class layer: + def __init__ (self, layerfile= None, matrix=None): + self.N = 0 + self.num_layer = -1 + self.fname = layerfile + self.adj_matr = None + self.laplacian = None + self.resc_laplacian = None + self.entropy = None + self.entropy_approx = None + self._ii = [] + self._jj = [] + self._ww = [] + self._matrix_called = False + if layerfile != None: + try: + min_N = 10e10 + with open(layerfile, "r") as lines: + for l in lines: + if l[0] == '#': + continue + elems = l.strip(" \n").split(" ") + s = int(elems[0]) + d = int(elems[1]) + self._ii.append(s) + self._jj.append(d) + if s > self.N: + self.N = s + if d > self.N: + self.N = d + if s < min_N: + min_N = s + if d < min_N: + min_N = d + if len(elems) >2 : ## A weight is specified + val = [float(x) if "e" in x or "." in x else int(x) for x in [elems[2]]][0] + self._ww.append(float(val)) + else: + self._ww.append(int(1)) + + except (IOError): + print "Unable to find/open file %s -- Exiting!!!" % layerfile + exit(-2) + elif matrix != None: + self.adj_matr = copy.copy(matrix) + self.N, _x = matrix.shape + K = np.multiply(self.adj_matr.sum(0), np.ones((self.N,self.N))) + D = np.diag(np.diag(K)) + self.laplacian = csr_matrix(D - self.adj_matr) + K = self.laplacian.diagonal().sum() + self.resc_laplacian = csr_matrix(self.laplacian / K) + self._matrix_called = True + else: + print "The given matrix is BLANK" + def make_matrices(self, N): + self.N = N + self.adj_matr = csr_matrix((self._ww, (self._ii, self._jj)), shape=(self.N, self.N)) + self.adj_matr = self.adj_matr + self.adj_matr.transpose() + K = np.multiply(self.adj_matr.sum(0), np.ones((self.N,self.N))) + D = np.diag(np.diag(K)) + self.laplacian = csr_matrix(D - self.adj_matr) + K = self.laplacian.diagonal().sum() + self.resc_laplacian = csr_matrix(self.laplacian / K) + self._matrix_called = True + + def dump_info(self): + N, M = self.adj_matr.shape + K = self.adj_matr.nnz + sys.stderr.write("Layer File: %s\nNodes: %d Edges: %d\nEntropy: %g Approx. Entropy: %g\n" % \ + (self.fname, N, K, self.entropy, self.entropy_approx) ) + + def compute_VN_entropy(self): + eigvals = eigh(self.resc_laplacian.todense()) + + self.entropy = 0 + for l_i in eigvals[0]: + if (l_i > 10e-20): + self.entropy -= l_i * math.log (l_i) + + + def compute_VN_entropy_approx(self, poly): + p = poly.degree + h = - poly[p] * self.N + M = csr_matrix(np.eye(self.N)) + for i in range(p-1, -1, -1): + M = M * self.resc_laplacian + h += - poly[i] * sum(M.diagonal()) + self.entropy_approx = h + + def aggregate(self, other_layer): + if self.adj_matr != None: + self.adj_matr = self.adj_matr + other_layer.adj_matr + else: + self.adj_matr = copy.copy(other_layer.adj_matr) + K = np.multiply(self.adj_matr.sum(0), np.ones((self.N,self.N))) + D = np.diag(np.diag(K)) + self.laplacian = csr_matrix(D - self.adj_matr) + K = self.laplacian.diagonal().sum() + self.resc_laplacian = csr_matrix(self.laplacian / K) + self._matrix_called = True + + + +class multiplex_red: + + def __init__ (self, multiplexfile, directed = None, fit_degree=10, verbose=False): + self.layers = [] + self.N = 0 + self.M = 0 + self.entropy = 0 + self.entropy_approx = 0 + self.JSD = None + self.JSD_approx = None + self.Z = None + self.Z_approx = None + self.aggr = None + self.q_vals = None + self.q_vals_approx = None + self.fit_degree = fit_degree + self.poly = XLogx_fit(self.fit_degree) + self.verb = verbose + self.cuts = None + self.cuts_approx = None + try: + with open(multiplexfile, "r") as lines: + for l in lines: + if (self.verb): + sys.stderr.write("Loading layer %d from file %s" % (len(self.layers), l)) + A = layer(l.strip(" \n")) + if A.N > self.N: + self.N = A.N+1 + self.layers.append(A) + n = 0 + for l in self.layers: + l.make_matrices(self.N) + l.num_layer = n + n += 1 + self.M = len(self.layers) + except ( IOError): + print "Unable to find/open file %s -- Exiting!!!" % layer_file + exit(-2) + + def dump_info(self): + i = 0 + for l in self.layers: + sys.stderr.write("--------\nLayer: %d\n" % i) + l.dump_info() + i += 1 + + + def compute_aggregated(self): + self.aggr = copy.copy(self.layers[0]) + self.aggr.entropy = 0 + self.aggr.entropy_approx = 0 + for l in self.layers[1:]: + self.aggr.aggregate(l) + + def compute_layer_entropies(self): + for l in self.layers: + l.compute_VN_entropy() + + def compute_layer_entropies_approx(self): + for l in self.layers: + l.compute_VN_entropy_approx(self.poly) + + + def compute_multiplex_entropy(self, force_compute=False): + ### The entropy of a multiplex is defined as the sum of the entropies of its layers + for l in self.layers: + if l.entropy == None: + l.compute_VN_entropy() + self.entropy += l.entropy + + def compute_multiplex_entropy_approx(self, force_compute=False): + ### The entropy of a multiplex is defined as the sum of the entropies of its layers + for l in self.layers: + if l.entropy_approx == None: + l.compute_VN_entropy_approx(self.poly) + self.entropy_approx += l.entropy_approx + + def compute_JSD_matrix(self): + if (self.verb): + sys.stderr.write("Computing JSD matrix\n") + self.JSD = np.zeros((self.M, self.M)) + for i in range(len(self.layers)): + for j in range(i+1, len(self.layers)): + li = self.layers[i] + lj = self.layers[j] + if not li.entropy: + li.compute_VN_entropy() + if not lj.entropy: + lj.compute_VN_entropy() + # m_sigma = (li.resc_laplacian + lj.resc_laplacian)/2.0 + # m_sigma_entropy = mr.compute_VN_entropy_LR(m_sigma) + m_sigma_matr = (li.adj_matr + lj.adj_matr)/2.0 + m_sigma = layer(matrix=m_sigma_matr) + m_sigma.compute_VN_entropy() + d = m_sigma.entropy - 0.5 * (li.entropy + lj.entropy) + d = math.sqrt(d) + self.JSD[i][j] = d + self.JSD[j][i] = d + pass + + def compute_JSD_matrix_approx(self): + if (self.verb): + sys.stderr.write("Computing JSD matrix (approx)\n") + self.JSD_approx = np.zeros((self.M, self.M)) + for i in range(len(self.layers)): + for j in range(i+1, len(self.layers)): + li = self.layers[i] + lj = self.layers[j] + if not li.entropy_approx: + li.compute_VN_entropy_approx(self.poly) + if not lj.entropy_approx: + lj.compute_VN_entropy_approx(self.poly) + m_sigma_matr = (li.adj_matr + lj.adj_matr)/2.0 + m_sigma = layer(matrix=m_sigma_matr) + m_sigma.compute_VN_entropy_approx(self.poly) + d = m_sigma.entropy_approx - 0.5 * (li.entropy_approx + lj.entropy_approx) + d = math.sqrt(d) + self.JSD_approx[i][j] = d + self.JSD_approx[j][i] = d + + def dump_JSD(self, force_compute=False): + if self.JSD == None: + if force_compute: + self.compute_JSD_matrix() + else: + print "Error!!! call to dump_JSD but JSD matrix has not been computed!!!" + sys.exit(1) + idx = 0 + for i in range(self.len): + for j in range(i+1, self.len): + print i, j, self.JSD[idx] + idx += 1 + + def dump_JSD_approx(self, force_compute=False): + if self.JSD_approx == None: + if force_compute: + self.compute_JSD_matrix_approx() + else: + print "Error!!! call to dump_JSD_approx but JSD approximate matrix has not been computed!!!" + sys.exit(1) + idx = 0 + for i in range(self.M): + for j in range(i+1, self.M): + print i, j, self.JSD_approx[idx] + idx += 1 + + + def reduce(self, method="ward"): + if (self.verb): + sys.stderr.write("Performing '%s' reduction\n" % method) + if self.JSD == None: + self.compute_JSD_matrix() + self.Z = linkage(self.JSD, method=method) + return self.Z + + def reduce_approx(self, method="ward"): + if (self.verb): + sys.stderr.write("Performing '%s' reduction (approx)\n" % method) + if self.JSD_approx == None: + self.compute_JSD_matrix_approx() + self.Z_approx = linkage(self.JSD_approx, method=method) + return self.Z_approx + + def get_linkage(self): + return self.Z + + def get_linkage_approx(self): + return self.Z_approx + + def __compute_q(self, layers): + H_avg = 0 + if not self.aggr: + self.compute_aggregated() + self.aggr.compute_VN_entropy() + for l in layers: + if not l.entropy: + l.compute_VN_entropy() + H_avg += l.entropy + H_avg /= len(layers) + q = 1.0 - H_avg / self.aggr.entropy + return q + + def get_q_profile(self): + mylayers = copy.copy(self.layers) + rem_layers = copy.copy(self.layers) + q_vals = [] + if self.Z == None: + self.reduce() + q = self.__compute_q(rem_layers) + q_vals.append(q) + n = len(self.layers) + for l1, l2, _d, _x in self.Z: + l_new = layer(matrix=mylayers[int(l1)].adj_matr) + l_new.num_layer = n + n += 1 + l_new.aggregate(mylayers[int(l2)]) + rem_layers.remove(mylayers[int(l1)]) + rem_layers.remove(mylayers[int(l2)]) + rem_layers.append(l_new) + mylayers.append(l_new) + q = self.__compute_q(rem_layers) + q_vals.append(q) + self.q_vals = q_vals + return q_vals + pass + + + def __compute_q_approx(self, layers): + H_avg = 0 + if not self.aggr: + self.compute_aggregated() + self.aggr.compute_VN_entropy_approx(self.poly) + for l in layers: + if not l.entropy_approx: + l.compute_VN_entropy_approx(self.poly) + H_avg += l.entropy_approx + H_avg /= len(layers) + q = 1.0 - H_avg / self.aggr.entropy_approx + return q + + def get_q_profile_approx(self): + mylayers = copy.copy(self.layers) + rem_layers = copy.copy(self.layers) + q_vals = [] + if self.Z_approx == None: + self.reduce_approx() + q = self.__compute_q_approx(rem_layers) + q_vals.append(q) + n = len(self.layers) + for l1, l2, _d, _x in self.Z_approx: + l_new = layer(matrix=mylayers[int(l1)].adj_matr) + l_new.num_layer = n + n += 1 + l_new.aggregate(mylayers[int(l2)]) + rem_layers.remove(mylayers[int(l1)]) + rem_layers.remove(mylayers[int(l2)]) + rem_layers.append(l_new) + mylayers.append(l_new) + q = self.__compute_q_approx(rem_layers) + q_vals.append(q) + self.q_vals_approx = q_vals + return q_vals + + def compute_partitions(self): + if (self.verb): + sys.stderr.write("Getting partitions...\n") + if self.Z == None: + self.reduce() + if self.q_vals == None: + self.get_q_profile() + sets = {} + M = len(self.layers) + for i in range(len(self.layers)): + sets[i] = [i] + best_pos = self.q_vals.index(max(self.q_vals)) + j = 0 + cur_part = sets.values() + self.cuts = [copy.deepcopy(cur_part)] + while j < M-1: + l1, l2, _x, _y = self.Z[j] + l1 = int(l1) + l2 = int(l2) + val = sets[l1] + val.extend(sets[l2]) + sets[M+j] = val + r1 = cur_part.index(sets[l1]) + cur_part.pop(r1) + r2 = cur_part.index(sets[l2]) + cur_part.pop(r2) + cur_part.append(val) + j += 1 + self.cuts.append(copy.deepcopy(cur_part)) + self.cuts.append(copy.deepcopy(cur_part)) + return zip(self.q_vals, self.cuts) + + + def compute_partitions_approx(self): + if (self.verb): + sys.stderr.write("Getting partitions (approx)...\n") + if self.Z_approx == None: + self.reduce_approx() + if self.q_vals_approx == None: + self.get_q_profile_approx() + sets = {} + M = len(self.layers) + for i in range(len(self.layers)): + sets[i] = [i] + best_pos = self.q_vals_approx.index(max(self.q_vals_approx)) + j = 0 + cur_part = sets.values() + self.cuts_approx = [copy.deepcopy(cur_part)] + while j < M-1: + l1, l2, _x, _y = self.Z_approx[j] + l1 = int(l1) + l2 = int(l2) + val = sets[l1] + val.extend(sets[l2]) + sets[M+j] = val + r1 = cur_part.index(sets[l1]) + cur_part.pop(r1) + r2 = cur_part.index(sets[l2]) + cur_part.pop(r2) + cur_part.append(val) + j += 1 + self.cuts_approx.append(copy.deepcopy(cur_part)) + self.cuts_approx.append(copy.deepcopy(cur_part)) + return zip(self.q_vals_approx, self.cuts_approx) + + def draw_dendrogram(self, force = False): + if not has_matplotlib: + sys.stderr.write("No matplotlib module found in draw_dendrogram...Exiting!!!\n") + sys.exit(3) + if self.Z == None: + if not force: + sys.stderr.write("Please call reduce() first or specify 'force=True'") + else: + self.reduce() + dendrogram(self.Z, no_plot=False) + matplotlib.pyplot.draw() + matplotlib.pyplot.show() + + def draw_dendrogram_approx(self, force = False): + if not has_matplotlib: + sys.stderr.write("No matplotlib module found in draw_dendrogram_approx...Exiting!!!\n") + sys.exit(3) + if self.Z_approx == None: + if not force: + sys.stderr.write("Please call reduce_approx() first or specify 'force=True'") + else: + self.reduce_approx() + dendrogram(self.Z_approx, no_plot=False) + matplotlib.pyplot.draw() + matplotlib.pyplot.show() + + def dump_partitions(self): + part = zip(self.q_vals, self.cuts) + for q, p in part: + print q, "->", p + + def dump_partitions_approx(self): + part = zip(self.q_vals_approx, self.cuts_approx) + for q, p in part: + print q, "->", p + + + + diff --git a/python/test/simple_test.py b/python/test/simple_test.py new file mode 100644 index 0000000..e2919c6 --- /dev/null +++ b/python/test/simple_test.py @@ -0,0 +1,22 @@ +import multired as mr +import sys + + +if len(sys.argv) < 2: + print "Usage: %s " % sys.argv[0] + sys.exit(1) + +print "Loading layers...", +m = mr.multiplex_red(sys.argv[1], verbose=True) +print "[DONE]" + +print "Getting partitons...", +part = m.compute_partitions() +print "[DONE]" + +print "Partitions:..." +m.dump_partitions() + +m.draw_dendrogram() + + diff --git a/python/test/simple_test_approx.py b/python/test/simple_test_approx.py new file mode 100644 index 0000000..fe2ab30 --- /dev/null +++ b/python/test/simple_test_approx.py @@ -0,0 +1,16 @@ +import multired as mr +import sys + + +if len(sys.argv) < 2: + print "Usage: %s " % sys.argv[0] + sys.exit(1) + +m = mr.multiplex_red(sys.argv[1], verbose=True, fit_degree=20) +part = m.compute_partitions_approx() + +print "Partitions:..." +m.dump_partitions_approx() + +m.draw_dendrogram_approx() + diff --git a/python/test/test.py b/python/test/test.py new file mode 100644 index 0000000..5ccb490 --- /dev/null +++ b/python/test/test.py @@ -0,0 +1,34 @@ +import multired as mr +import sys + + +if len(sys.argv) < 2: + print "Usage: %s " % sys.argv[0] + sys.exit(1) + +print "Loading layers...", +m = mr.multiplex_red(sys.argv[1]) +print "[DONE]" + +print "Computing layer entropies...", +m.compute_layer_entropies() +print "[DONE]" + +print "Computing JSD matrix...", +m.compute_JSD_matrix() +print "[DONE]" + +print "Performing reduction...", +m.reduce() +print "[DONE]" + +print "Getting partitons...", +part = m.compute_partitions() +print "[DONE]" + +print "Partitions:...", +m.dump_partitions() +print "[DONE]" + + + diff --git a/python/test/test_approx.py b/python/test/test_approx.py new file mode 100644 index 0000000..353436c --- /dev/null +++ b/python/test/test_approx.py @@ -0,0 +1,34 @@ +import multired as mr +import sys + + +if len(sys.argv) < 2: + print "Usage: %s " % sys.argv[0] + sys.exit(1) + +print "Loading layers...", +m = mr.multiplex_red(sys.argv[1]) +print "[DONE]" + +print "Computing layer entropies (approx)...", +m.compute_layer_entropies_approx() +print "[DONE]" + +print "Computing JSD matrix (approx)...", +m.compute_JSD_matrix_approx() +print "[DONE]" + +print "Performing reduction (approx)...", +m.reduce_approx() +print "[DONE]" + + +print "Getting partitons...", +part = m.compute_partitions_approx() +print "[DONE]" + +print "Partitions:..." +m.dump_partitions_approx() +print "[DONE]" + + -- cgit v1.2.3