summaryrefslogtreecommitdiff
path: root/structure
diff options
context:
space:
mode:
authorKatolaZ <katolaz@yahoo.it>2015-10-19 16:23:00 +0100
committerKatolaZ <katolaz@yahoo.it>2015-10-19 16:23:00 +0100
commitdf8386f75b0538075d72d52693836bb8878f505b (patch)
tree704c2a0836f8b9fd9f470c12b6ae05637c431468 /structure
parent363274e79eade464247089c105260bc34940da07 (diff)
First commit of MAMMULT code
Diffstat (limited to 'structure')
-rw-r--r--structure/activity/degs_to_activity_overlap.py36
-rw-r--r--structure/activity/degs_to_binary.py45
-rw-r--r--structure/activity/hamming_dist.py41
-rw-r--r--structure/activity/layer_activity.py27
-rw-r--r--structure/activity/layer_activity_vectors.py44
-rw-r--r--structure/activity/multiplexity.py41
-rw-r--r--structure/activity/node_activity.py51
-rw-r--r--structure/activity/node_activity_vectors.py68
-rw-r--r--structure/activity/node_degree_vectors.py68
-rw-r--r--structure/correlations/Makefile15
-rw-r--r--structure/correlations/compute_pearson.py33
-rw-r--r--structure/correlations/compute_rho.py118
-rw-r--r--structure/correlations/compute_tau.py133
-rw-r--r--structure/correlations/dump_k_q.c50
-rw-r--r--structure/correlations/fit_knn.c59
-rw-r--r--structure/correlations/fit_utils.c382
-rw-r--r--structure/correlations/fit_utils.h25
-rw-r--r--structure/correlations/knn_q_from_degrees.py49
-rw-r--r--structure/correlations/knn_q_from_layers.py98
-rw-r--r--structure/correlations/rank_nodes.py74
-rw-r--r--structure/correlations/rank_nodes_thresh.py87
-rw-r--r--structure/correlations/rank_occurrence.py45
-rw-r--r--structure/correlations/rank_utils.c217
-rw-r--r--structure/correlations/rank_utils.h44
-rw-r--r--structure/metrics/aggregate_layers_w.py37
-rw-r--r--structure/metrics/avg_edge_overlap.py47
-rw-r--r--structure/metrics/cartography_from_columns.py44
-rw-r--r--structure/metrics/cartography_from_deg_vectors.py37
-rw-r--r--structure/metrics/cartography_from_layers.py54
-rw-r--r--structure/metrics/edge_overlap.py43
-rw-r--r--structure/metrics/intersect_layers.py47
-rw-r--r--structure/metrics/overlap_degree.py47
-rw-r--r--structure/reinforcement/reinforcement.py61
33 files changed, 2267 insertions, 0 deletions
diff --git a/structure/activity/degs_to_activity_overlap.py b/structure/activity/degs_to_activity_overlap.py
new file mode 100644
index 0000000..c96959b
--- /dev/null
+++ b/structure/activity/degs_to_activity_overlap.py
@@ -0,0 +1,36 @@
+####
+##
+## Take a file which contains, on the n-th line, the degrees at each
+## layer of the n-th node, and print on output the corresponding node
+## multi-activity (i.e., the number of layers in which the node is
+## active) and the overlapping degree (i.e., the total number of edges
+## incident on the node)
+##
+##
+
+
+import sys
+
+def to_binary(l):
+ s = 0
+ e = 0
+ for v in l:
+ s += v * pow(2,e)
+ e +=1
+ return s
+
+if len(sys.argv) < 2:
+ print "Usage: %s <filein>" % sys.argv[0]
+ sys.exit(1)
+
+distr = {}
+
+with open(sys.argv[1]) as f:
+ for l in f:
+ elems = [int(x) for x in l.strip(" \n").split(" ")]
+ ov = sum(elems)
+ new_list = [1 if x>0 else 0 for x in elems]
+ multi_act = sum(new_list)
+ if multi_act and ov:
+ print ov, multi_act
+
diff --git a/structure/activity/degs_to_binary.py b/structure/activity/degs_to_binary.py
new file mode 100644
index 0000000..cb7aef5
--- /dev/null
+++ b/structure/activity/degs_to_binary.py
@@ -0,0 +1,45 @@
+####
+##
+## Take a file which contains, on the n-th line, the degrees at each
+## layer of the n-th node, and print on output the corresponding node
+## participation bit-strings, i.e. the string which contains "1" if on
+## that layer the node is connected, and zero otherwise
+##
+## on the stderr, we also dump the distribution of each bit-string
+##
+
+
+import sys
+
+def to_binary(l):
+ s = 0
+ e = 0
+ for v in l:
+ s += v * pow(2,e)
+ e +=1
+ return s
+
+if len(sys.argv) < 2:
+ print "Usage: %s <filein>" % sys.argv[0]
+ sys.exit(1)
+
+distr = {}
+
+with open(sys.argv[1]) as f:
+ for l in f:
+ elems = [int(x) for x in l.strip(" \n").split(" ")]
+ new_list = [1 if x>0 else 0 for x in elems]
+ val = to_binary(new_list)
+ if val in distr:
+ distr[val] += 1
+ else:
+ distr[val] = 1
+ for i in new_list:
+ print i,
+ print
+
+for k in distr:
+ bin_list = bin(k)
+ bin_num = sum([int(x) if x=='1' else 0 for x in bin_list[2:]])
+ sys.stderr.write("%d %028s %d \n" % (bin_num, bin_list[2:], distr[k]))
+
diff --git a/structure/activity/hamming_dist.py b/structure/activity/hamming_dist.py
new file mode 100644
index 0000000..2a98e64
--- /dev/null
+++ b/structure/activity/hamming_dist.py
@@ -0,0 +1,41 @@
+####
+##
+## Take as input the layers of a multiplex, and provide as output a
+## file where the n-th line contains the activity of the n-th
+## layer.
+##
+##
+
+
+import sys
+
+if len(sys.argv) < 3:
+ print "Usage: %s <layer1> <layer2> [<layer3>...]" % sys.argv[0]
+ sys.exit(1)
+
+max_N = -1
+
+layers = []
+
+
+for layer in sys.argv[1:]:
+ active = []
+ with open(layer, "r") as lines:
+ for l in lines:
+ if l[0] == "#":
+ continue
+
+ s, d = [int(x) for x in l.strip(" \n").split(" ")[:2]]
+ if s > max_N:
+ max_N = s
+ if d > max_N:
+ max_N = d
+ active.extend([s,d])
+ active = set(active)
+ layers.append(active)
+
+for i in range(len(layers)):
+ for j in range(i+1, len(layers)):
+ s = layer[i] ^ layer[j]
+ print i, j, len(s)*1.0 / min(len(layer[i]) + len(layer[j]), max_N+1)
+
diff --git a/structure/activity/layer_activity.py b/structure/activity/layer_activity.py
new file mode 100644
index 0000000..bea0416
--- /dev/null
+++ b/structure/activity/layer_activity.py
@@ -0,0 +1,27 @@
+####
+##
+## Take as input the layers of a multiplex, and provide as output a
+## file where the n-th line contains the activity of the n-th
+## layer.
+##
+##
+
+
+import sys
+
+if len(sys.argv) < 2:
+ print "Usage: %s <layer1> [<layer2>...]" % sys.argv[0]
+ sys.exit(1)
+
+
+for layer in sys.argv[1:]:
+ active = []
+ with open(layer, "r") as lines:
+ for l in lines:
+ if l[0] == "#":
+ continue
+
+ s, d = [int(x) for x in l.strip(" \n").split(" ")[:2]]
+ active.extend([s,d])
+ active = set(active)
+ print len(active)
diff --git a/structure/activity/layer_activity_vectors.py b/structure/activity/layer_activity_vectors.py
new file mode 100644
index 0000000..f32418d
--- /dev/null
+++ b/structure/activity/layer_activity_vectors.py
@@ -0,0 +1,44 @@
+####
+##
+## Take as input the layers of a multiplex, and provide as output a
+## file where the n-th line contains the activity of the n-th
+## layer.
+##
+##
+
+
+import sys
+
+if len(sys.argv) < 2:
+ print "Usage: %s <layer1> [<layer2>...]" % sys.argv[0]
+ sys.exit(1)
+
+max_N = -1
+
+layers = []
+
+
+for layer in sys.argv[1:]:
+ active = []
+ with open(layer, "r") as lines:
+ for l in lines:
+ if l[0] == "#":
+ continue
+
+ s, d = [int(x) for x in l.strip(" \n").split(" ")[:2]]
+ if s > max_N:
+ max_N = s
+ if d > max_N:
+ max_N = d
+ active.extend([s,d])
+ active = set(active)
+ layers.append(active)
+
+for l in layers:
+ for n in range(max_N+1):
+ if n in l:
+ print 1,
+ else:
+ print 0,
+ print
+
diff --git a/structure/activity/multiplexity.py b/structure/activity/multiplexity.py
new file mode 100644
index 0000000..9e038c9
--- /dev/null
+++ b/structure/activity/multiplexity.py
@@ -0,0 +1,41 @@
+####
+##
+## Take as input the layers of a multiplex, and provide as output a
+## file where the n-th line contains the activity of the n-th
+## layer.
+##
+##
+
+
+import sys
+
+if len(sys.argv) < 3:
+ print "Usage: %s <layer1> <layer2> [<layer3>...]" % sys.argv[0]
+ sys.exit(1)
+
+max_N = -1
+
+layers = []
+
+
+for layer in sys.argv[1:]:
+ active = []
+ with open(layer, "r") as lines:
+ for l in lines:
+ if l[0] == "#":
+ continue
+
+ s, d = [int(x) for x in l.strip(" \n").split(" ")[:2]]
+ if s > max_N:
+ max_N = s
+ if d > max_N:
+ max_N = d
+ active.extend([s,d])
+ active = set(active)
+ layers.append(active)
+
+for i in range(len(layers)):
+ for j in range(i+1, len(layers)):
+ s = layer[i] & layer[j]
+ print i, j, len(s)*1.0 / (max_N+1)
+
diff --git a/structure/activity/node_activity.py b/structure/activity/node_activity.py
new file mode 100644
index 0000000..6410a68
--- /dev/null
+++ b/structure/activity/node_activity.py
@@ -0,0 +1,51 @@
+####
+##
+## Take as input the layers of a multiplex, and provide as output a
+## file where the n-th line contains the activity of the n-th node. We
+## assume that nodes are numbered sequentially, starting from 0, with
+## no gaps (i.e., missing nodes are treated as isolated nodes)
+##
+##
+##
+
+
+import sys
+
+if len(sys.argv) < 2:
+ print "Usage: %s <layer1> [<layer2>...]" % sys.argv[0]
+ sys.exit(1)
+
+node_activity = {}
+
+max_N = -1
+
+for layer in sys.argv[1:]:
+ active = []
+ with open(layer, "r") as lines:
+ for l in lines:
+ if l[0] == "#":
+ continue
+
+ s, d = [int(x) for x in l.strip(" \n").split(" ")[:2]]
+ active.extend([s,d])
+ if s > max_N:
+ max_N = s
+ if d > max_N:
+ max_N = d
+ active = set(active)
+ for n in active:
+ if n in node_activity:
+ node_activity[n] += 1
+ else:
+ node_activity[n] = 1
+
+
+for n in range(max_N+1):
+ if n in node_activity:
+ print node_activity[n]
+ else:
+ print 0
+
+
+
+
diff --git a/structure/activity/node_activity_vectors.py b/structure/activity/node_activity_vectors.py
new file mode 100644
index 0000000..9839334
--- /dev/null
+++ b/structure/activity/node_activity_vectors.py
@@ -0,0 +1,68 @@
+####
+##
+## Take as input the layers of a multiplex, and provide as output a
+## file where the n-th line contains the degrees of the n-th node at
+## each layer, separated by a space, in the format:
+##
+## node1_deglay1 node1_deglay2 .... node1_deglayM
+## node2_deglay1 node2_deglay2 .... node2_deglayM
+## ..............................................
+## nodeN_deglay1 nodeN_deglay2 .... nodeN_deglayM
+##
+##
+
+import sys
+
+if len(sys.argv) < 2:
+ print "Usage: %s <layer1> [<layer2>...]" % sys.argv[0]
+ sys.exit(1)
+
+node_degrees = {}
+
+max_N = -1
+
+num_layer = 0
+
+for layer in sys.argv[1:]:
+ with open(layer, "r") as lines:
+ for l in lines:
+ if l[0] == "#":
+ continue
+
+ s, d = [int(x) for x in l.strip(" \n").split(" ")[:2]]
+
+ if s > max_N:
+ max_N = s
+ if d > max_N:
+ max_N = d
+
+ if s in node_degrees:
+ if num_layer in node_degrees[s]:
+ node_degrees[s][num_layer] += 1
+ else:
+ node_degrees[s][num_layer] = 1
+ else:
+ node_degrees[s] = {}
+ node_degrees[s][num_layer] = 1
+
+ if d in node_degrees:
+ if num_layer in node_degrees[d]:
+ node_degrees[d][num_layer] += 1
+ else:
+ node_degrees[d][num_layer] = 1
+ else:
+ node_degrees[d] = {}
+ node_degrees[d][num_layer] = 1
+ num_layer += 1
+
+
+for n in range(max_N+1):
+ for i in range(num_layer):
+ if n in node_degrees:
+ if i in node_degrees[n]:
+ print 1,
+ else:
+ print 0,
+ else:
+ print 0,
+ print
diff --git a/structure/activity/node_degree_vectors.py b/structure/activity/node_degree_vectors.py
new file mode 100644
index 0000000..8863daa
--- /dev/null
+++ b/structure/activity/node_degree_vectors.py
@@ -0,0 +1,68 @@
+####
+##
+## Take as input the layers of a multiplex, and provide as output a
+## file where the n-th line contains the degrees of the n-th node at
+## each layer, separated by a space, in the format:
+##
+## node1_deglay1 node1_deglay2 .... node1_deglayM
+## node2_deglay1 node2_deglay2 .... node2_deglayM
+## ..............................................
+## nodeN_deglay1 nodeN_deglay2 .... nodeN_deglayM
+##
+##
+
+import sys
+
+if len(sys.argv) < 2:
+ print "Usage: %s <layer1> [<layer2>...]" % sys.argv[0]
+ sys.exit(1)
+
+node_degrees = {}
+
+max_N = -1
+
+num_layer = 0
+
+for layer in sys.argv[1:]:
+ with open(layer, "r") as lines:
+ for l in lines:
+ if l[0] == "#":
+ continue
+
+ s, d = [int(x) for x in l.strip(" \n").split(" ")[:2]]
+
+ if s > max_N:
+ max_N = s
+ if d > max_N:
+ max_N = d
+
+ if s in node_degrees:
+ if num_layer in node_degrees[s]:
+ node_degrees[s][num_layer] += 1
+ else:
+ node_degrees[s][num_layer] = 1
+ else:
+ node_degrees[s] = {}
+ node_degrees[s][num_layer] = 1
+
+ if d in node_degrees:
+ if num_layer in node_degrees[d]:
+ node_degrees[d][num_layer] += 1
+ else:
+ node_degrees[d][num_layer] = 1
+ else:
+ node_degrees[d] = {}
+ node_degrees[d][num_layer] = 1
+ num_layer += 1
+
+
+for n in range(max_N+1):
+ for i in range(num_layer):
+ if n in node_degrees:
+ if i in node_degrees[n]:
+ print node_degrees[n][i],
+ else:
+ print 0,
+ else:
+ print 0,
+ print
diff --git a/structure/correlations/Makefile b/structure/correlations/Makefile
new file mode 100644
index 0000000..58301cb
--- /dev/null
+++ b/structure/correlations/Makefile
@@ -0,0 +1,15 @@
+CFLAGS="-O3"
+CC="gcc"
+GSL_FLAGS=-lgsl -lgslcblas -lm
+MFLAG=-lm
+
+all: dump_k_q fit_knn
+
+fit_knn: fit_knn.c fit_utils.c
+ $(CC) $(CFLAGS) -o fit_knn fit_knn.c fit_utils.c $(GSL_FLAGS)
+
+dump_k_q: dump_k_q.c rank_utils.c
+ $(CC) $(CFLAGS) -o dump_k_q dump_k_q.c rank_utils.c $(MFLAG)
+
+clean:
+ rm dump_k_q fit_knn
diff --git a/structure/correlations/compute_pearson.py b/structure/correlations/compute_pearson.py
new file mode 100644
index 0000000..e2c9055
--- /dev/null
+++ b/structure/correlations/compute_pearson.py
@@ -0,0 +1,33 @@
+####
+##
+## Compute the pearson correlation coefficient between the values of
+## node properties included in the two files provided as input.
+##
+
+import sys
+import numpy
+import scipy.stats
+import math
+
+if len(sys.argv) < 3:
+ print "Usage %s <file1> <file2>" % sys.argv[0]
+ sys.exit(1)
+
+
+x1 = []
+
+with open(sys.argv[1], "r") as lines:
+ for l in lines:
+ elem = [float(x) if "." in x or "e" in x else int(x) for x in l.strip(" \n").split()][0]
+ x1.append(elem)
+
+x2 = []
+
+with open(sys.argv[2], "r") as lines:
+ for l in lines:
+ elem = [float(x) if "." in x or "e" in x else int(x) for x in l.strip(" \n").split()][0]
+ x2.append(elem)
+
+
+r2 =numpy.corrcoef(x1,x2)
+print r2[0][1]
diff --git a/structure/correlations/compute_rho.py b/structure/correlations/compute_rho.py
new file mode 100644
index 0000000..8fa4174
--- /dev/null
+++ b/structure/correlations/compute_rho.py
@@ -0,0 +1,118 @@
+####
+##
+## Get two rankings and a pairing, and compute the corresponding
+## Spearman's rho rank correlation coefficient
+##
+##
+##
+##
+
+import sys
+import random
+import math
+#import scipy.stats
+
+##
+## Compute the constant C of the Spearman's rho correlation coefficient in the draft
+##
+def compute_C(rank1, rank2):
+ N = len(rank1)
+ [mu1, mu2] = [1.0 * sum(x.values()) / len(x.values()) for x in [rank1, rank2]]
+ [sum1, sum2] = [1.0 * sum(x.values()) for x in [rank1, rank2]]
+ C = N * mu1 * mu2 - mu2 * sum1 - mu1 * sum2
+
+ #print mu1, mu2, sum1, sum2, C
+
+ return C
+
+
+##
+## Compute the constant D of the Spearman's rho correlation coefficient in the draft
+##
+def compute_D(rank1, rank2):
+ [mu1, mu2] = [1.0 * sum(x.values()) / len(x.values()) for x in [rank1, rank2]]
+ s1 = sum([math.pow((x-mu1), 2) for x in rank1.values()])
+ s2 = sum([math.pow((x-mu2), 2) for x in rank2.values()])
+
+ D = math.sqrt(s1 * s2)
+ return D
+
+def compute_rho(rank1, rank2, pairing, C, D):
+
+ rho = 0
+ for s,t in pairing:
+ rho += rank1[s] * rank2[t]
+ rho = (rho + C) / D
+ return rho
+
+if len(sys.argv) < 3:
+ print "Usage: %s <rank1> <rank2> [<pairing>]" % sys.argv[0]
+ sys.exit(1)
+
+rank1 = {}
+rank2 = {}
+
+lines = open(sys.argv[1], "r").readlines()
+
+i = 0
+for l in lines:
+ if l[0] == "#" or l.strip(" \n").split(" ") == []: ## Strip comments and empty lines
+ continue
+ r = [float(x) if "." in x or "e" in x else int(x) for x in l.strip(" \n").split(" ")][0]
+ rank1[i] = r
+ i += 1
+
+
+lines = open(sys.argv[2], "r").readlines()
+
+i = 0
+for l in lines:
+ if l[0] == "#" or l.strip(" \n").split(" ") == []: ## Strip comments and empty lines
+ continue
+ r = [float(x) if "." in x or "e" in x else int(x) for x in l.strip(" \n").split(" ")][0]
+ rank2[i] = r
+ i += 1
+
+
+N1 = len(rank1)
+N2 = len(rank2)
+
+if (N1 != N2):
+ print "The two files must have the same number of nodes!!!!!"
+ sys.exit(2)
+
+
+
+C = compute_C(rank1, rank2)
+D = compute_D(rank1, rank2)
+
+
+## We start from a random pairing, and compute the corresponding value of rho
+
+pairing = []
+
+if len(sys.argv) > 3:
+ lines = open(sys.argv[3], "r").readlines()
+
+
+
+ for l in lines:
+ if l[0] == "#" or l.strip(" \n").split(" ") == []:
+ continue
+ p1, p2 = [int(x) for x in l.strip(" \n").split(" ")][:2]
+ pairing.append((p1, p2))
+
+else:
+ for i in range (N1):
+ pairing.append((i,i))
+
+
+if len(pairing) != N1:
+ print "ERROR !!! The pairing should be of the same length of the ranking files!!!"
+ sys.exit(1)
+
+rho = compute_rho(rank1, rank2, pairing, C, D)
+
+print rho
+
+
diff --git a/structure/correlations/compute_tau.py b/structure/correlations/compute_tau.py
new file mode 100644
index 0000000..3d1f804
--- /dev/null
+++ b/structure/correlations/compute_tau.py
@@ -0,0 +1,133 @@
+####
+##
+## Take as input two files, whose n^th line contains the ranking of
+## element n, and compute the Kendall's \tau_b rank correlation
+## coefficient
+##
+##
+
+import sys
+from numpy import *
+
+
+def kendalltau(x,y):
+ initial_sort_with_lexsort = True # if True, ~30% slower (but faster under profiler!) but with better worst case (O(n log(n)) than (quick)sort (O(n^2))
+ n = len(x)
+ temp = range(n) # support structure used by mergesort
+ # this closure recursively sorts sections of perm[] by comparing
+ # elements of y[perm[]] using temp[] as support
+ # returns the number of swaps required by an equivalent bubble sort
+ def mergesort(offs, length):
+ exchcnt = 0
+ if length == 1:
+ return 0
+ if length == 2:
+ if y[perm[offs]] <= y[perm[offs+1]]:
+ return 0
+ t = perm[offs]
+ perm[offs] = perm[offs+1]
+ perm[offs+1] = t
+ return 1
+ length0 = length / 2
+ length1 = length - length0
+ middle = offs + length0
+ exchcnt += mergesort(offs, length0)
+ exchcnt += mergesort(middle, length1)
+ if y[perm[middle - 1]] < y[perm[middle]]:
+ return exchcnt
+ # merging
+ i = j = k = 0
+ while j < length0 or k < length1:
+ if k >= length1 or (j < length0 and y[perm[offs + j]] <= y[perm[middle + k]]):
+ temp[i] = perm[offs + j]
+ d = i - j
+ j += 1
+ else:
+ temp[i] = perm[middle + k]
+ d = (offs + i) - (middle + k)
+ k += 1
+ if d > 0:
+ exchcnt += d;
+ i += 1
+ perm[offs:offs+length] = temp[0:length]
+ return exchcnt
+
+ # initial sort on values of x and, if tied, on values of y
+ if initial_sort_with_lexsort:
+ # sort implemented as mergesort, worst case: O(n log(n))
+ perm = lexsort((y, x))
+ else:
+ # sort implemented as quicksort, 30% faster but with worst case: O(n^2)
+ perm = range(n)
+ perm.sort(lambda a,b: cmp(x[a],x[b]) or cmp(y[a],y[b]))
+
+ # compute joint ties
+ first = 0
+ t = 0
+ for i in xrange(1,n):
+ if x[perm[first]] != x[perm[i]] or y[perm[first]] != y[perm[i]]:
+ t += ((i - first) * (i - first - 1)) / 2
+ first = i
+ t += ((n - first) * (n - first - 1)) / 2
+
+ # compute ties in x
+ first = 0
+ u = 0
+ for i in xrange(1,n):
+ if x[perm[first]] != x[perm[i]]:
+ u += ((i - first) * (i - first - 1)) / 2
+ first = i
+ u += ((n - first) * (n - first - 1)) / 2
+
+ # count exchanges
+ exchanges = mergesort(0, n)
+ # compute ties in y after mergesort with counting
+ first = 0
+ v = 0
+ for i in xrange(1,n):
+ if y[perm[first]] != y[perm[i]]:
+ v += ((i - first) * (i - first - 1)) / 2
+ first = i
+ v += ((n - first) * (n - first - 1)) / 2
+
+ tot = (n * (n - 1)) / 2
+ if tot == u and tot == v:
+ return 1 # Special case for all ties in both ranks
+
+ tau = ((tot-(v+u-t)) - 2.0 * exchanges) / (sqrt(float(( tot - u )) * float( tot - v )))
+
+ # what follows reproduces ending of Gary Strangman's original stats.kendalltau() in SciPy
+ svar = (4.0*n+10.0) / (9.0*n*(n-1))
+ z = tau / sqrt(svar)
+ ##prob = erfc(abs(z)/1.4142136)
+ ##return tau, prob
+ return tau
+
+def main():
+
+ if len(sys.argv) < 3:
+ print "Usage: %s <file1> <file2>" % sys.argv[0]
+ sys.exit(1)
+
+ x1 = []
+ x2= []
+
+ lines = open(sys.argv[1]).readlines()
+
+ for l in lines:
+ elem = [float(x) if "e" in x or "." in x else int(x) for x in l.strip(" \n").split()][0]
+ x1.append(elem)
+
+ lines = open(sys.argv[2]).readlines()
+
+ for l in lines:
+ elem = [float(x) if "e" in x or "." in x else int(x) for x in l.strip(" \n").split()][0]
+ x2.append(elem)
+
+
+ tau = kendalltau(x1,x2)
+ print tau
+
+
+if __name__ == "__main__":
+ main()
diff --git a/structure/correlations/dump_k_q.c b/structure/correlations/dump_k_q.c
new file mode 100644
index 0000000..2b6ba79
--- /dev/null
+++ b/structure/correlations/dump_k_q.c
@@ -0,0 +1,50 @@
+/**
+ *
+ * Get the degree distributions of two layers and a pairing, and dump
+ * on output the pairs k-q
+ *
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+void dump_k_q(double *R1, double *R2, int N, int *pairing){
+
+ int i;
+
+ for (i=0; i<N; i++){
+ printf("%g %g\n", R1[i], R2[pairing[i]]);
+ }
+}
+
+
+int main(int argc, char *argv[]){
+
+ int N1, N2;
+ double *R1 = NULL;
+ double *R2 = NULL;
+
+ int *pairing = NULL;
+
+
+ if (argc < 4){
+ printf("Usage: %s <degs1> <degs2> <pairing>\n", argv[0]);
+ exit(1);
+ }
+
+ load_ranking(argv[1], &N1, &R1);
+ load_ranking(argv[2], &N2, &R2);
+
+ if (N1 != N2){
+ printf("Error!!!! The two files must have the same number of lines!!!! Exiting...\n");
+ exit(1);
+ }
+
+ pairing = malloc(N1 * sizeof(int));
+
+ load_pairing(&pairing, N1, argv[3]);
+
+ dump_k_q(R1, R2, N1, pairing);
+
+}
diff --git a/structure/correlations/fit_knn.c b/structure/correlations/fit_knn.c
new file mode 100644
index 0000000..64fc4c3
--- /dev/null
+++ b/structure/correlations/fit_knn.c
@@ -0,0 +1,59 @@
+/**
+ *
+ * Take a file which contains pairs in the form:
+ *
+ * k q
+ *
+ * compute qnn(k) and fit this function with a power-law
+ *
+ * a*x^{b}
+ *
+ * The fit is actually performed as a linear fit on the
+ * exponential-binned log-log distribution
+ *
+ *
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <gsl/gsl_fit.h>
+#include "fit_utils.h"
+
+
+
+int main(int argc, char *argv[]){
+
+ double *k, *q, *x, *y;
+ int N, num;
+ double alpha;
+
+ double c0, c1, cov00, cov01, cov11, sqsum;
+
+ k=q=x=y=NULL;
+
+ if (argc < 3){
+ printf("%s <filein> <alpha>\n", argv[0]);
+ exit(1);
+ }
+
+ alpha=atof(argv[2]);
+
+ fprintf(stderr,"Alpha: %g\n", alpha);
+
+ load_sequence_col(argv[1], &k, &N, 0);
+ load_sequence_col(argv[1], &q, &N, 1);
+ //exit(1);
+ exp_bin_avg(k, q, N, alpha, &x, &y, &num);
+ //normalize_distr(x, y, num);
+ //dump_distr(x, y, num);
+ compact_distr(x, y, &num);
+ // dump_distr(x,y,num);
+ map_vec(x, num, log);
+ map_vec(y, num, log);
+ //dump_distr(x,y,num);
+
+ gsl_fit_linear(x, 1, y, 1, num, &c0, &c1, &cov00, &cov01, &cov11, &sqsum);
+ fprintf(stderr, "a: %g b: %g\n", exp(c0), c1);
+}
diff --git a/structure/correlations/fit_utils.c b/structure/correlations/fit_utils.c
new file mode 100644
index 0000000..e9e3954
--- /dev/null
+++ b/structure/correlations/fit_utils.c
@@ -0,0 +1,382 @@
+/**
+ *
+ * Fit a given sequence with a power-law function
+ *
+ * a*x^{b}
+ *
+ * The fit is actually performed as a linear fit on the
+ * exponential-binned log-log distribution
+ *
+ *
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <gsl/gsl_fit.h>
+
+/**
+ *
+ * Load a sequence from a file, which contains one element on each line
+ *
+ */
+
+void load_sequence(char *fname, double **v, int *N){
+
+ int size;
+ char buff[256];
+
+ FILE *f;
+
+ f = fopen(fname, "r");
+ if (!f){
+ fprintf(stderr, "Error opening file %s!!!! Exiting...\n", fname);
+ exit(1);
+ }
+
+ *N =0;
+ size = 10;
+ if (*v == NULL)
+ *v = malloc(size * sizeof(double));
+ else{
+ *v = realloc(*v, size * sizeof(double));
+ }
+
+ while (fgets(buff, 255, f)){
+ (*v)[*N] = atof(buff);
+ *N += 1;
+ if (*N == size){
+ size += 10;
+ *v = realloc(*v, size * sizeof(double));
+ }
+ }
+ *v = realloc(*v, (*N) * sizeof(double));
+ fclose(f);
+}
+
+
+/**
+ *
+ * Load a sequence, getting the "col"-th column of the input file
+ *
+ */
+
+void load_sequence_col(char *fname, double **v, int *N, int col){
+
+ int size, n;
+ char buff[256];
+ char *ptr;
+
+ FILE *f;
+
+ f = fopen(fname, "r");
+ if (!f){
+ fprintf(stderr, "Error opening file %s!!!! Exiting...\n", fname);
+ exit(1);
+ }
+
+ *N =0;
+ size = 10;
+ if (*v == NULL)
+ *v = malloc(size * sizeof(double));
+ else{
+ *v = realloc(*v, size * sizeof(double));
+ }
+
+ while (fgets(buff, 255, f)){
+ ptr = strtok(buff, " ");
+ if (col > 0){
+ n = 0;
+ while (n<col){
+ ptr = strtok(NULL, " ");
+ n += 1;
+ }
+ }
+ (*v)[*N] = atof(ptr);
+ *N += 1;
+
+ if (*N == size){
+ size += 10;
+ *v = realloc(*v, size * sizeof(double));
+ }
+ }
+ *v = realloc(*v, (*N) * sizeof(double));
+ fclose(f);
+}
+
+
+/**
+ *
+ * Make the exponential binning of a distribution, with a giving
+ * exponent alpha. The value of y[i] is the number of elements of v
+ * whose value lies between x[i-1] and x[i]...
+ *
+ */
+
+void exp_bin_cnt(double *v, int N, double alpha, double **x, double **y, int *num){
+
+ double min_v, max_v, val, last_val;
+ int i, j, size, num_x;
+ double last_size, new_size;
+
+ min_v = max_v = v[0];
+
+ for (i=1; i<N; i ++){
+ if (v[i] > max_v)
+ max_v = v[i];
+ else if (v[i] > 0 && v[i] < min_v)
+ min_v = v[i];
+ }
+
+ size = 10;
+ if (*x == NULL){
+ *x = malloc(size * sizeof(double));
+ }
+ else{
+ *x = realloc(*x, size * sizeof(double));
+ }
+
+ val = min_v;
+ last_size = min_v;
+ (*x)[0] = min_v;
+ num_x = 1;
+
+ while(val < max_v){
+ new_size = last_size * alpha;
+ val = last_size + new_size;
+ last_size = new_size;
+ last_val = val;
+ (*x)[num_x] = val;
+ num_x += 1;
+ if (num_x == size){
+ size += 10;
+ *x = realloc(*x, size * sizeof(double));
+ }
+ }
+
+ if (*y == NULL){
+ *y = malloc(num_x * sizeof(double));
+ }
+ else{
+ *y = realloc(*y, num_x * sizeof(double));
+ }
+ for (i=0; i < num_x; i++){
+ (*y)[i] = 0;
+ }
+
+
+
+ for(i=0; i <N; i ++){
+ j = 0;
+ while(v[i] > (*x)[j]){
+ j ++;
+ }
+ (*y)[j] += 1;
+ }
+ *num = num_x;
+}
+
+/**
+ *
+ * Make the exponential binning of a distribution, with a giving
+ * exponent alpha. The value of y[i] is the average of the values in
+ * the vector "w" whose corresponding v lies between x[i-1] and x[i]...
+ *
+ */
+
+
+void exp_bin_avg(double *v, double *w, int N, double alpha, double **x, double **y, int *num){
+
+ double min_v, max_v, val, last_val;
+ int i, j, size, num_x;
+ double last_size, new_size;
+ int *cnt;
+
+
+ min_v = max_v = v[0];
+
+ for (i=1; i<N; i ++){
+ if (v[i] > max_v)
+ max_v = v[i];
+ else if (v[i] > 0 && v[i] < min_v)
+ min_v = v[i];
+ }
+
+ size = 10;
+ if (*x == NULL){
+ *x = malloc(size * sizeof(double));
+ }
+ else{
+ *x = realloc(*x, size * sizeof(double));
+ }
+
+ val = min_v;
+ last_size = min_v;
+ (*x)[0] = min_v;
+ num_x = 1;
+
+ while(val < max_v){
+ new_size = last_size * alpha;
+ val = last_size + new_size;
+ last_size = new_size;
+ last_val = val;
+ (*x)[num_x] = val;
+ num_x += 1;
+ if (num_x == size){
+ size += 10;
+ *x = realloc(*x, size * sizeof(double));
+ }
+ }
+
+
+ cnt = malloc(num_x * sizeof(int));
+
+ if (*y == NULL){
+ *y = malloc(num_x * sizeof(double));
+ }
+ else{
+ *y = realloc(*y, num_x * sizeof(double));
+ }
+ for (i=0; i < num_x; i++){
+ (*y)[i] = 0;
+ cnt[i] = 0;
+ }
+
+ for(i=0; i <N; i ++){
+ j = 0;
+ while(j < num_x && v[i] > (*x)[j]){
+ j ++;
+ }
+ if(j == num_x){
+ printf("Error!!!!! Trying to assing a non-existing bin!!! -- fit_utils.c:exp_bin_avg!!!\n");
+ exit(37);
+ }
+ (*y)[j] += w[i];
+ cnt[j] += 1;
+ }
+ *num = num_x;
+
+ for(i=0; i< num_x; i++){
+ if (cnt[i] > 0){
+ (*y)[i] = (*y)[i] / cnt[i];
+ }
+ }
+ free(cnt);
+}
+
+
+/**
+ *
+ * Print a distribution on stdout
+ *
+ */
+
+void dump_distr(double *x, double *y, int N){
+ int i;
+
+ for(i=0; i<N; i ++){
+ printf("%g %g\n", x[i], y[i]);
+ }
+
+}
+
+
+/**
+ * Compact a distribution, leaving only the pairs (x_i, y_i) for which
+ * y_i > 0
+ *
+ */
+
+void compact_distr(double *x, double *y, int *num){
+
+ int i, j;
+
+ i = j = 0;
+ while(j < *num){
+ while(j < *num && y[j] == 0){
+ j ++;
+ }
+ if (j==*num){
+ break;
+ }
+ x[i] = x[j];
+ y[i] = y[j];
+ i ++;
+ j ++;
+ }
+ *num = i;
+}
+
+
+/**
+ *
+ * Apply the function "f" on all the elemnts of a vector, in-place
+ *
+ */
+
+void map_vec(double *v, int N, double (*f)(double)){
+ int i;
+
+ for (i=0; i<N; i ++){
+ v[i] = f(v[i]);
+ }
+}
+
+
+/**
+ *
+ * Normalize a distribution, dividing each y[i] for the width of the
+ * corresponding bin (i.e., x[i] - x[i-1])
+ *
+ */
+void normalize_distr(double *x, double *y, int num){
+
+ int i;
+
+ for(i=1; i<num; i ++){
+ y[i] /= (x[i] - x[i-1]);
+ }
+}
+
+/**
+ *
+ * take two vectors (k and q) and a pairing, and compute the best
+ * power-law fit "a*k^{mu}" of qnn(k)
+ *
+ */
+
+void fit_current_trend(double *k, double *q, int N, int *pairing, double *mu, double *a,
+ double *corr){
+
+ static int *num;
+ static double *q_pair, *x, *y;
+
+ int i;
+ int fit_num;
+ double c0, c1, cov00, cov01, cov11, sqsum;
+
+ if (q_pair == NULL){
+ q_pair = malloc(N * sizeof(double));
+ }
+
+ for(i=0; i<N; i++){
+ q_pair[i] = q[pairing[i]];
+ }
+
+ exp_bin_avg(k, q_pair, N, 1.3, &x, &y, &fit_num);
+ //normalize_distr(x,y, fit_num);
+ compact_distr(x, y, &fit_num);
+
+ map_vec(x, fit_num, log);
+ map_vec(y, fit_num, log);
+ gsl_fit_linear(x, 1, y, 1, fit_num, &c0, &c1, &cov00, &cov01, &cov11, &sqsum);
+
+ //fprintf(stderr,"cov00: %g cov01: %g cov11: %g\n", cov00, cov01, cov11);
+ //fprintf(stderr, "corr: %g ", cov01 / sqrt(cov00 * cov11));
+ *a = c0;
+ *mu = c1;
+ *corr = cov01/sqrt(cov00 * cov11);
+}
+
diff --git a/structure/correlations/fit_utils.h b/structure/correlations/fit_utils.h
new file mode 100644
index 0000000..183f47c
--- /dev/null
+++ b/structure/correlations/fit_utils.h
@@ -0,0 +1,25 @@
+#ifndef __FIT_UTILS_H__
+#define __FIT_UTILS_H__
+
+
+void load_sequence(char *fname, double **v, int *N);
+
+void load_sequence_col(char *fname, double **v, int *N, int col);
+
+void exp_bin_cnt(double *v, int N, double alpha, double **x, double **y, int *num);
+
+void exp_bin_avg(double *v, double *w, int N, double alpha, double **x, double **y, int *num);
+
+void dump_distr(double *x, double *y, int N);
+
+void compact_distr(double *x, double *y, int *num);
+
+void map_vec(double *v, int N, double (*f)(double));
+
+void normalize_distr(double *x, double *y, int num);
+
+void fit_current_trend(double *k, double *q, int N, int *pairing,
+ double *mu, double *a, double *corr);
+
+
+#endif /* __FIT_UTILS_H__ */
diff --git a/structure/correlations/knn_q_from_degrees.py b/structure/correlations/knn_q_from_degrees.py
new file mode 100644
index 0000000..5e10bfa
--- /dev/null
+++ b/structure/correlations/knn_q_from_degrees.py
@@ -0,0 +1,49 @@
+####
+##
+##
+## Take a file with the degree sequence of nodes which are active on
+## both layers, and compute \overline{k}(q) and \overline{q}(k),
+## i.e. the two inter-layer correlation functions, where we call "k"
+## the degree on the first column, and "q" the degree on the second
+## column
+##
+##
+
+import sys
+
+if len(sys.argv) < 2:
+ print "Usage: %s <filein>" % sys.argv[0]
+ sys.exit(1)
+
+qnn_k = {}
+knn_q = {}
+
+with open(sys.argv[1]) as f:
+ for l in f:
+ k, q = [int(x) for x in l.strip(" \n").split(" ")]
+ if qnn_k.has_key(k):
+ qnn_k[k].append(q)
+ else:
+ qnn_k[k] = [q]
+ if knn_q.has_key(q):
+ knn_q[q].append(k)
+ else:
+ knn_q[q] = [k]
+
+
+
+
+keys= qnn_k.keys()
+keys.sort()
+for k in keys:
+ print k, 1.0 * sum(qnn_k[k])/len(qnn_k[k])
+
+
+
+
+keys= knn_q.keys()
+keys.sort()
+for q in keys:
+ sys.stderr.write("%d %f\n" % (q, 1.0 * sum(knn_q[q])/len(knn_q[q])))
+
+
diff --git a/structure/correlations/knn_q_from_layers.py b/structure/correlations/knn_q_from_layers.py
new file mode 100644
index 0000000..c108c18
--- /dev/null
+++ b/structure/correlations/knn_q_from_layers.py
@@ -0,0 +1,98 @@
+####
+##
+##
+## Compute the degree-degree correlations of a multiplex graph, namely:
+##
+## <k_1>(k_2) and <k_2>(k_1)
+##
+## Takes as input the two lists of edges corresponding to each layer
+##
+
+import sys
+import numpy as np
+import networkx as net
+
+
+def knn(G, n):
+ neigh = G.neighbors(n)
+ l = G.degree(neigh).values()
+ return 1.0 * sum(l) / len(l)
+
+
+if len(sys.argv) < 2:
+ print "Usage: %s <layer1> <layer2>" % sys.argv[0]
+ sys.exit(1)
+
+
+G1 = net.read_edgelist(sys.argv[1])
+G2 = net.read_edgelist(sys.argv[2])
+
+k1_k1 = {} ## Intraleyer knn (k1)
+k2_k2 = {} ## Intralayer knn (k2)
+k1_k2 = {} ## Interlayer average degree at layer 1 of a node having degree k_2 in layer 2
+k2_k1 = {} ## Interlayer average degree at layer 2 of a node having degree k_1 in layer 1
+
+
+for n in G1.nodes():
+ k1 = G1.degree(n)
+
+
+ ##print k1,k2
+
+ knn1 = knn(G1, n)
+ if n in G2.nodes():
+ k2 = G2.degree(n)
+ knn2 = knn(G2, n)
+ else:
+ k2 = 0
+ knn2 = 0
+
+ if k1_k1.has_key(k1):
+ k1_k1[k1].append(knn1)
+ else:
+ k1_k1[k1] = [knn1]
+
+ if k2_k2.has_key(k2):
+ k2_k2[k2].append(knn2)
+ else:
+ k2_k2[k2] = [knn2]
+
+
+ if k1_k2.has_key(k2):
+ k1_k2[k2].append(k1)
+ else:
+ k1_k2[k2] = [k1]
+
+ if k2_k1.has_key(k1):
+ k2_k1[k1].append(k2)
+ else:
+ k2_k1[k1] = [k2]
+
+
+k1_keys = k1_k1.keys()
+k1_keys.sort()
+k2_keys = k2_k2.keys()
+k2_keys.sort()
+
+
+f = open("%s_%s_k1" % (sys.argv[1], sys.argv[2]), "w+")
+
+for n in k1_keys:
+ avg_knn = np.mean(k1_k1[n])
+ std_knn = np.std(k1_k1[n])
+ avg_k2 = np.mean(k2_k1[n])
+ std_k2 = np.std(k2_k1[n])
+ f.write("%d %f %f %f %f\n" % (n, avg_knn, std_knn, avg_k2, std_k2))
+
+f.close()
+
+f = open("%s_%s_k2" % (sys.argv[1], sys.argv[2]), "w+")
+
+for n in k2_keys:
+ avg_knn = np.mean(k2_k2[n])
+ std_knn = np.std(k2_k2[n])
+ avg_k1 = np.mean(k1_k2[n])
+ std_k1 = np.std(k1_k2[n])
+ f.write("%d %f %f %f %f\n" % (n, avg_knn, std_knn, avg_k1, std_k1))
+f.close()
+
diff --git a/structure/correlations/rank_nodes.py b/structure/correlations/rank_nodes.py
new file mode 100644
index 0000000..6985e88
--- /dev/null
+++ b/structure/correlations/rank_nodes.py
@@ -0,0 +1,74 @@
+####
+##
+##
+## Get a file as input, whose n-th line corresponds to the value of a
+## certain property of node n, and rank nodes according to their
+## properties, taking into account ranking ties properly.
+##
+## The output is a file whose n-th line is the "ranking" of the n-th
+## node according to the given property. (notice that rankings could
+## be fractional, due to the tie removal algorithm)
+##
+##
+
+
+import sys
+import math
+
+
+if len(sys.argv) < 2:
+ print "Usage: %s <filein>" % sys.argv[0]
+ sys.exit(1)
+
+
+
+lines = open(sys.argv[1], "r").readlines()
+
+ranking = []
+
+n=0
+for l in lines:
+ if l[0] == "#" or l.strip(" \n").split(" ") == []:
+ continue
+ v = [float(x) if "." in x or "e" in x else int(x) for x in l.strip(" \n").split(" ")][0]
+ ranking.append((v,n))
+ n +=1
+
+ranking.sort(reverse=True)
+#print ranking
+new_ranking = {}
+
+v0, n0 = ranking[0]
+
+old_value = v0
+tot_rankings = 1
+
+stack = [n0]
+l=1.0
+for v,n in ranking[1:]:
+ l += 1
+ ##print stack, tot_rankings
+ if v != old_value: ### There is a new rank
+ # We first compute the rank for all the nodes in the stack and then we set it
+ new_rank_value = 1.0 * tot_rankings / len(stack)
+ ##print new_rank_value
+ for j in stack:
+ new_ranking[j] = new_rank_value
+ old_value = v
+ tot_rankings = l
+ stack = [n]
+ else: # One more value with the same rank, keep it for the future
+ stack.append(n)
+ tot_rankings += l
+
+new_rank_value = 1.0 * tot_rankings / len(stack)
+#print new_rank_value
+for j in stack:
+ new_ranking[j] = new_rank_value
+
+#print new_ranking
+
+keys = new_ranking.keys()
+keys.sort()
+for k in keys:
+ print new_ranking[k]
diff --git a/structure/correlations/rank_nodes_thresh.py b/structure/correlations/rank_nodes_thresh.py
new file mode 100644
index 0000000..44b565a
--- /dev/null
+++ b/structure/correlations/rank_nodes_thresh.py
@@ -0,0 +1,87 @@
+####
+##
+##
+## Get a file as input, whose n-th line corresponds to the value of a
+## certain property of node n, and rank nodes according to their
+## properties, taking into account ranking ties properly.
+##
+## The output is a file whose n-th line is the "ranking" of the n-th
+## node according to the given property. (notice that rankings could
+## be fractional, due to the tie removal algorithm)
+##
+## The rank of a node is set to "0" (ZERO) if the corresponding
+## property is smaller than a value given as second parameter
+##
+
+
+import sys
+import math
+
+
+if len(sys.argv) < 3:
+ print "Usage: %s <filein> <thresh>" % sys.argv[0]
+ sys.exit(1)
+
+
+thresh = float(sys.argv[2])
+
+lines = open(sys.argv[1], "r").readlines()
+
+ranking = []
+
+n=0
+for l in lines:
+ if l[0] == "#" or l.strip(" \n").split(" ") == []:
+ continue
+ v = [float(x) if "." in x or "e" in x else int(x) for x in l.strip(" \n").split(" ")][0]
+ if v >= thresh:
+ ranking.append((v,n))
+ else:
+ ranking.append((0,n))
+ n +=1
+
+ranking.sort(reverse=True)
+#print ranking
+new_ranking = {}
+
+v0, n0 = ranking[0]
+
+
+old_value = v0
+tot_rankings = 1
+
+stack = [n0]
+l=1.0
+for v,n in ranking[1:]:
+ l += 1
+ ##print stack, tot_rankings
+ if v != old_value: ### There is a new rank
+ # We first compute the rank for all the nodes in the stack and then we set it
+ if old_value == 0:
+ new_rank_value = 0
+ else:
+ new_rank_value = 1.0 * tot_rankings / len(stack)
+ ##print new_rank_value
+ for j in stack:
+ new_ranking[j] = new_rank_value
+ old_value = v
+ tot_rankings = l
+ stack = [n]
+ else: # One more value with the same rank, keep it for the future
+ stack.append(n)
+ tot_rankings += l
+
+if v == 0 :
+ new_rank_value = 0
+else:
+ new_rank_value = 1.0 * tot_rankings / len(stack)
+#print new_rank_value
+for j in stack:
+ new_ranking[j] = new_rank_value
+
+#print new_ranking
+
+keys = new_ranking.keys()
+keys.sort()
+for k in keys:
+ print new_ranking[k]
diff --git a/structure/correlations/rank_occurrence.py b/structure/correlations/rank_occurrence.py
new file mode 100644
index 0000000..6339d5d
--- /dev/null
+++ b/structure/correlations/rank_occurrence.py
@@ -0,0 +1,45 @@
+####
+##
+## Get two rankings and compute the size of the k-intersection,
+## i.e. the number of elements which are present in the first k
+## positions of both rankings, as a function of k
+##
+##
+
+
+import sys
+
+
+if len(sys.argv)< 4:
+ print "Usage: %s <file1> <file2> <increment>" % sys.argv[0]
+ sys.exit(1)
+
+incr = int(sys.argv[3])
+
+rank1 = []
+rank2 = []
+
+lines = open(sys.argv[1], "r").readlines()
+
+for l in lines:
+ n= l.strip(" \n").split(" ")[0]
+ rank1.append(n)
+
+lines = open(sys.argv[2], "r").readlines()
+
+for l in lines:
+ n= l.strip(" \n").split(" ")[0]
+ rank2.append(n)
+
+
+N = len(rank1)
+
+i = incr
+
+while i < N+incr:
+ l = len(set(rank1[:i]) & set(rank2[:i]))
+ print i, l
+ i += incr
+
+
+
diff --git a/structure/correlations/rank_utils.c b/structure/correlations/rank_utils.c
new file mode 100644
index 0000000..4b8ef41
--- /dev/null
+++ b/structure/correlations/rank_utils.c
@@ -0,0 +1,217 @@
+/**
+ *
+ * Some utility functions to be used with tune_rho, compute_rho and
+ * the like
+ *
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+#include <time.h>
+
+#include "rank_utils.h"
+
+void load_ranking(char *fname, int *N, double **R){
+
+ char buff[256];
+ int size = 10;
+ FILE *f;
+
+ if (*R == NULL){
+ *R = malloc(size * sizeof (double));
+ }
+ f = fopen(fname, "r");
+ if (!f){
+ printf("Unable to open file: %s!!! Exiting\n");
+ exit(1);
+ }
+
+ *N = 0;
+
+ while (fgets(buff, 255, f)){
+ if (* N == size){
+ size += 10;
+ *R = realloc(*R, size * sizeof(double));
+ }
+ (*R)[*N] = atof(buff);
+ *N += 1;
+ }
+}
+
+double avg_array(double *v, int N){
+
+ double sum = 0.0;
+ int i;
+
+ for (i = 0; i < N; i ++){
+ sum += v[i];
+ }
+ return sum/N;
+}
+
+double compute_C(double *R1, double *R2, int N){
+ double mu1, mu2, sum1, sum2;
+
+ mu1 = avg_array(R1, N);
+ mu2 = avg_array(R2, N);
+
+ sum1 = mu1 * N;
+ sum2 = mu2 * N;
+
+ return N * mu1 * mu2 - mu2 * sum1 - mu1 * sum2;
+}
+
+
+double compute_D(double *R1, double *R2, int N){
+
+ double mu1, mu2, s1, s2;
+ int i;
+
+ mu1 = avg_array(R1, N);
+ mu2 = avg_array(R1, N);
+
+ s1 = s2 = 0.0;
+
+ for (i=0 ; i < N; i ++){
+ s1 += pow((R1[i] - mu1), 2);
+ s2 += pow((R2[i] - mu2), 2);
+ }
+
+ return sqrt(s1 * s2);
+}
+
+
+double compute_rho(double *R1, double *R2, int N, int *pairing){
+
+ double rho = 0;
+ int i;
+
+ for (i=0; i < N; i ++){
+ rho += R1[i] * R2[ pairing[i] ];
+ }
+
+ rho = (rho + compute_C(R1, R2, N))/ compute_D(R1, R2, N);
+ return rho;
+}
+
+void dump_ranking(double *R, int N){
+ int i;
+
+ for (i=0; i < N; i ++){
+ printf("%d: %f\n", i, R[i] );
+ }
+}
+
+
+void init_pairing_natural(int *pairing, int N){
+ int i;
+
+ for (i = 0; i< N; i ++){
+ pairing[i] = i;
+ }
+}
+
+void init_pairing_inverse(int *pairing, int N){
+ int i;
+
+ for (i = 0; i< N; i ++){
+ pairing[i] = N-i-1;
+ }
+}
+
+void select_pairing(int *pairing, int N, int argc, char *argv[], int pos){
+
+ if (argc < pos + 1 || !strncasecmp("rnd", argv[pos], 3)){
+ init_pairing_random(pairing, N);
+ }
+ else if (!strncasecmp("nat", argv[pos], 3)){
+ init_pairing_natural(pairing, N);
+ }
+ else if (!strncasecmp("inv", argv[pos], 3)){
+ init_pairing_inverse(pairing, N);
+ }
+ else{
+ printf ("Pairing strategy \"%s\" unknown!!! Exiting...\n", argv[pos]);
+ exit(1);
+ }
+
+}
+
+
+void shuffle_sequence(int *s, int N){
+
+ int i, j, tmp;
+
+ for (i=N-1; i>=0; i--){
+ j = rand() % (i+1);
+ tmp = s[j];
+ s[j] = s[i];
+ s[i] = tmp;
+ }
+}
+
+
+
+void init_pairing_random(int *pairing, int N){
+
+ init_pairing_natural(pairing, N);
+ shuffle_sequence(pairing, N);
+
+}
+
+/* Loads a pairing from a file, in the format:
+ *
+ * rank1 rank2
+ * ...........
+ */
+void load_pairing(int **pairing, int N, char *fname){
+
+ FILE *f;
+ int i, j, num;
+ char buff[256];
+ char *ptr;
+
+ f = fopen(fname, "r");
+ if (!f){
+ printf("Error opening file \"%s\"!!!! Exiting....\n", fname);
+ exit(2);
+ }
+
+ if (*pairing == NULL){
+ *pairing = malloc(N * sizeof(int));
+ init_pairing_natural(*pairing, N);
+ }
+
+ num = 0;
+ while(num < N){
+ fgets(buff, 255, f);
+ if (buff[0] == '#')
+ continue;
+ ptr = strtok(buff, " "); /* read the first node */
+ i = atoi(ptr);
+ ptr = strtok(NULL, " "); /* read the second node */
+ j = atoi(ptr);
+ (*pairing)[i] = j;
+ num += 1;
+ }
+}
+
+
+void dump_pairing(int *pairing, int N){
+ int i;
+
+ for(i=0; i< N; i ++){
+ printf("%d %d\n", i, pairing[i]);
+ }
+}
+
+void copy_pairing(int *p1, int *p2, int N){
+ int i;
+
+ for (i=0 ; i < N; i ++){
+ p2[i] = p1[i];
+ }
+}
diff --git a/structure/correlations/rank_utils.h b/structure/correlations/rank_utils.h
new file mode 100644
index 0000000..521582a
--- /dev/null
+++ b/structure/correlations/rank_utils.h
@@ -0,0 +1,44 @@
+#ifndef __RANK_UTILS_H__
+#define __RANK_UTILS_H__
+
+/* Load a ranking */
+void load_ranking(char *fname, int *N, double **R);
+
+/* Compute the average of an array */
+double avg_array(double *v, int N);
+
+/* Compute the term "C" in the rho correlation coefficient */
+double compute_C(double *R1, double *R2, int N);
+
+/* Compute the term "D" in the rho correlation coefficient */
+double compute_D(double *R1, double *R2, int N);
+
+/* Compute the Spearman's rank correlation coefficient \rho */
+double compute_rho(double *R1, double *R2, int N, int *pairing);
+
+void dump_ranking(double *R, int N);
+
+void init_pairing_natural(int *pairing, int N);
+
+void init_pairing_inverse(int *pairing, int N);
+
+void init_pairing_random(int *pairing, int N);
+
+void select_pairing(int *pairing, int N, int argc, char *argv[], int pos);
+
+void shuffle_sequence(int *s, int N);
+
+void dump_pairing(int *pairing, int N);
+
+void copy_pairing(int *pairing1, int *pairing2, int N);
+
+
+#endif /* __RANK_UTILS_H__ */
+
+
+
+
+
+
+
+
diff --git a/structure/metrics/aggregate_layers_w.py b/structure/metrics/aggregate_layers_w.py
new file mode 100644
index 0000000..cd5ea1d
--- /dev/null
+++ b/structure/metrics/aggregate_layers_w.py
@@ -0,0 +1,37 @@
+####
+##
+## Aggregate the layers of a multiplex, weigthing each edge according
+## to the number of times it occurs in the different layers
+##
+
+import sys
+import networkx as net
+
+
+if len(sys.argv) < 3:
+ print "Usage: %s <file1> <file2> [<file3>....]" % sys.argv(0)
+ sys.exit(1)
+
+G = net.Graph()
+
+lines = open(sys.argv[1], "r").readlines()
+
+for l in lines:
+ s,d = [int(x) for x in l.strip(" \n").split(" ")[:2]]
+ G.add_edge(s,d)
+ G[s][d]['weigth'] = 1
+
+for f in sys.argv[2:]:
+ lines = open(f, "r").readlines()
+ for l in lines:
+ s,d = [int(x) for x in l.strip(" \n").split(" ")[:2]]
+ if (G.has_edge(s,d)):
+ G[s][d]['weigth'] += 1
+ else:
+ G.add_edge(s,d)
+ G[s][d]['weigth'] = 1
+
+for s,d in G.edges():
+ print s,d, G[s][d]['weigth']
+
+
diff --git a/structure/metrics/avg_edge_overlap.py b/structure/metrics/avg_edge_overlap.py
new file mode 100644
index 0000000..e68fd8b
--- /dev/null
+++ b/structure/metrics/avg_edge_overlap.py
@@ -0,0 +1,47 @@
+####
+##
+## Compute the average edge overlap of a multiplex, i.e. the average
+## number of layers in which an edge is present
+##
+##
+
+import sys
+
+
+if len(sys.argv) < 2:
+ print "Usage: %s <layer1> [<layer2>...]" % sys.argv[0]
+ sys.exit(1)
+
+max_N = -1
+
+all_edges = {}
+
+layer_ID = -1
+
+for layer in sys.argv[1:]:
+ layer_ID += 1
+ with open(layer, "r") as lines:
+ for l in lines:
+ if l[0] == "#":
+ continue
+ s, d = [int(x) for x in l.strip(" \n").split(" ")[:2]]
+ if s > d:
+ tmp = s
+ s = d
+ d = tmp
+ if (s,d) in all_edges:
+ all_edges[(s,d)].append(layer_ID)
+ else:
+ all_edges[(s,d)] = [layer_ID]
+
+K = len(all_edges.keys())
+M = len(sys.argv) - 1
+
+numer = 0
+
+for k in all_edges.keys():
+ numer += len(set(all_edges[(k)]))
+
+
+print 1.0 * numer / K, 1.0 * numer / (K * M)
+
diff --git a/structure/metrics/cartography_from_columns.py b/structure/metrics/cartography_from_columns.py
new file mode 100644
index 0000000..a2343ed
--- /dev/null
+++ b/structure/metrics/cartography_from_columns.py
@@ -0,0 +1,44 @@
+####
+##
+## Make a cartography (i.e., sum and participation coefficient) based
+## on the values found at the given column numbers of the file given
+## as input, e.g.:
+##
+## cartography_cols.py FILEIN 1 5 8 14
+##
+## makes the assumption that the system has 4 layers, and that the
+## quantities involved in the cartography are at the 2nd, 6th, 9th and
+## 15th column of FILEIN
+##
+##
+##
+
+import sys
+
+if len(sys.argv) < 4:
+ print "Usage: %s <filein> <col1> <col2> [<col3> <col4>...]" % sys.argv[0]
+ sys.exit(1)
+
+filein=sys.argv[1]
+cols = [int(x) for x in sys.argv[2:]]
+
+M = len(cols)
+
+with open(filein,"r") as f:
+ for l in f:
+ if l[0] == "#":
+ continue
+ elems = [float(x) if "e" in x or "." in x else int(x) for x in l.strip(" \n").split(" ")]
+ sum_elems = 0
+ part = 0
+ for c in cols:
+ val = elems[c]
+ sum_elems += val
+ part += val*val
+ if sum_elems > 0:
+ part = M * 1.0 / (M -1) * (1 - part * 1.0 / (sum_elems * sum_elems))
+ else:
+ part = 0.0
+ print elems[0], sum_elems, part
+
+
diff --git a/structure/metrics/cartography_from_deg_vectors.py b/structure/metrics/cartography_from_deg_vectors.py
new file mode 100644
index 0000000..c40d701
--- /dev/null
+++ b/structure/metrics/cartography_from_deg_vectors.py
@@ -0,0 +1,37 @@
+####
+##
+## Take as input a file containing, on each line, the degree vector of
+## a node of the multiplex, and compute the multiplex cartography
+## diagram
+##
+##
+
+import sys
+
+if len(sys.argv) < 2:
+ print "Usage: %s <node_deg_vectors>" % sys.argv[0]
+ sys.exit(1)
+
+filein=sys.argv[1]
+
+M = -1
+
+with open(filein,"r") as lines:
+ for l in lines:
+ if l[0] == "#":
+ continue
+ elems = [int(x) for x in l.strip(" \n").split(" ")]
+ if (M == -1):
+ M = len(elems)
+ sum_elems = 0
+ part = 0
+ for val in elems:
+ sum_elems += val
+ part += val*val
+ if sum_elems > 0:
+ part = M * 1.0 / (M -1) * (1 - part * 1.0 / (sum_elems * sum_elems))
+ else:
+ part = 0.0
+ print sum_elems, part
+
+
diff --git a/structure/metrics/cartography_from_layers.py b/structure/metrics/cartography_from_layers.py
new file mode 100644
index 0000000..b9c4584
--- /dev/null
+++ b/structure/metrics/cartography_from_layers.py
@@ -0,0 +1,54 @@
+####
+##
+## Compute the participation coefficient of each node of a multiplex
+##
+##
+
+import sys
+import networkx as net
+import collections
+from compiler.ast import flatten
+
+
+
+if len(sys.argv) < 3:
+ print "Usage: %s <layer1> <layer2> [<layer3>...]" % sys.argv[0]
+ sys.exit(1)
+
+
+layers = []
+
+for f in sys.argv[1:]:
+ G = net.Graph(net.read_edgelist(f, nodetype=int))
+ layers.append(G)
+
+nodes = flatten([x for j in layers for x in j.nodes()])
+#nodes.sort()
+nodes = set(nodes)
+
+M = len(layers)
+
+#print nodes
+
+for n in nodes:
+ deg_alpha_square = 0
+ deg = 0
+ col = 0
+ print n,
+ for l in layers:
+ val = l.degree([n])
+ if not val:
+ col = 2* col
+ continue
+ col *= 2
+ col += 1
+ val = val[n]
+ deg += val
+ deg_alpha_square += val*val
+ if deg > 0:
+ print deg, 1.0 * M / (M-1) * (1.0 - 1.0 * deg_alpha_square/(deg * deg)) , col
+ else:
+ print 0 , 0, col
+
+
+
diff --git a/structure/metrics/edge_overlap.py b/structure/metrics/edge_overlap.py
new file mode 100644
index 0000000..1b27f55
--- /dev/null
+++ b/structure/metrics/edge_overlap.py
@@ -0,0 +1,43 @@
+####
+##
+## Compute the edge overlap of each edge of the multiplex, i.e. the
+## number of times that each edge appears in the multiplex
+##
+##
+
+import sys
+
+
+if len(sys.argv) < 2:
+ print "Usage: %s <layer1> [<layer2>...]" % sys.argv[0]
+ sys.exit(1)
+
+max_N = -1
+
+all_edges = {}
+
+layer_ID = -1
+
+for layer in sys.argv[1:]:
+ layer_ID += 1
+ with open(layer, "r") as lines:
+ for l in lines:
+ if l[0] == "#":
+ continue
+ s, d = [int(x) for x in l.strip(" \n").split(" ")[:2]]
+ if s > d:
+ tmp = s
+ s = d
+ d = tmp
+ if (s,d) in all_edges:
+ all_edges[(s,d)].append(layer_ID)
+ else:
+ all_edges[(s,d)] = [layer_ID]
+
+K = len(all_edges.keys())
+
+for k in all_edges.keys():
+ val = len(set(all_edges[(k)]))
+ s,d = k
+ print s, d, val
+
diff --git a/structure/metrics/intersect_layers.py b/structure/metrics/intersect_layers.py
new file mode 100644
index 0000000..a9da00c
--- /dev/null
+++ b/structure/metrics/intersect_layers.py
@@ -0,0 +1,47 @@
+####
+##
+## Take a certain number of networks as input and give as output the
+## corresponding intersection graph, i.e. the graph in which an edge
+## exists between i and j if and only if it exists in ALL the graphs
+## given as input
+##
+
+
+import sys
+
+
+if len(sys.argv)< 3:
+ print "Usage: %s <file1> <file2> [<file3>....]" % sys.argv[0]
+ sys.exit(1)
+
+
+graphs = {}
+
+num = 0
+
+for fname in sys.argv[1:]:
+
+ lines = open(fname).readlines()
+ graphs[num] = []
+ for l in lines:
+ s,d = [int(x) for x in l.strip(" \n").split(" ")][:2]
+ if s > d:
+ graphs[num].append((d,s))
+ else:
+ graphs[num].append((d,s))
+ num += 1
+
+#print graphs
+
+
+for edge in graphs[0]:
+ to_print = True
+ for i in range(1, num):
+ if edge not in graphs[i]:
+ to_print = False
+ break
+ if to_print:
+ s,d = edge
+ print s,d
+
+
diff --git a/structure/metrics/overlap_degree.py b/structure/metrics/overlap_degree.py
new file mode 100644
index 0000000..fa69434
--- /dev/null
+++ b/structure/metrics/overlap_degree.py
@@ -0,0 +1,47 @@
+####
+##
+## Compute the overlapping degree for each node and the corresponding
+## z-score
+##
+##
+
+import sys
+import numpy
+
+
+if len(sys.argv) < 2:
+ print "Usage: %s <layer1> <layer2> [<layer3>...]" % sys.argv[0]
+ sys.exit(1)
+
+
+nodes = {}
+
+for f in sys.argv[1:]:
+
+ lines = open(f).readlines()
+
+ for l in lines:
+ if l[0] == "#":
+ continue
+ s,d = [int(x) for x in l.strip(" \n").split(" ")]
+ if nodes.has_key(s):
+ nodes[s] +=1
+ else:
+ nodes[s] = 1
+ if nodes.has_key(d):
+ nodes[d] +=1
+ else:
+ nodes[d] = 1
+
+
+degrees = nodes.values()
+avg_deg = numpy.mean(degrees)
+std_deg = numpy.std(degrees)
+
+#print avg_deg, std_deg
+
+keys = nodes.keys()
+keys.sort()
+
+for n in keys:
+ print n, nodes[n], (nodes[n] - avg_deg)/std_deg
diff --git a/structure/reinforcement/reinforcement.py b/structure/reinforcement/reinforcement.py
new file mode 100644
index 0000000..5939e59
--- /dev/null
+++ b/structure/reinforcement/reinforcement.py
@@ -0,0 +1,61 @@
+import networkx as nx
+import sys
+
+
+if __name__ == "__main__":
+
+
+ if len(sys.argv) < 6:
+ print "Usage: %s <layer1> <layer2> <N bins> <min_value> <max_value>" % sys.argv[0]
+ sys.exit(1)
+
+
+ filename_a = sys.argv[1]
+ filename_b = sys.argv[2]
+
+ intervals=int(sys.argv[3])
+ minvalue=float(sys.argv[4])
+ maxvalue=float(sys.argv[5])
+
+
+ tot_a = []
+ pos_a = []
+ for t in range (intervals):
+ tot_a.append(0)
+ pos_a.append(0)
+
+
+
+
+ fa=open(filename_a, 'r')
+ Ga=nx.read_adjlist(fa)
+
+
+ fb=open(filename_b, 'r')
+ Gb=nx.read_weighted_edgelist(fb)
+
+
+
+
+ for u,v in Gb.edges():
+ Gbw=Gb[u][v]['weight']
+ for i in range (intervals):
+ a=minvalue+float(maxvalue-minvalue)*float(i)/intervals
+ b=minvalue+float(maxvalue-minvalue)*float(i+1)/intervals
+ if (Gbw>a and Gbw<b):
+ tot_a[i]+=1
+ break
+ if (Ga.has_edge(u,v)==True):
+ pos_a[i]+=1
+
+ freq_a=[]
+ for i in range (intervals):
+ if (tot_a[i]>0):
+ freq_a.append(float(pos_a[i])/tot_a[i])
+ else:
+ freq_a.append(0)
+ print "#bin_minvalue bin_maxvalue frequence"
+ for i in range (intervals):
+ a=minvalue+float(maxvalue-minvalue)*float(i)/intervals
+ b=minvalue+float(maxvalue-minvalue)*float(i+1)/intervals
+ print a, b, freq_a[i]