# This file is part of MAMMULT: Metrics And Models for Multilayer Networks # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or (at # your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . #### ## ## Get two rankings and a pairing, and compute the corresponding ## Spearman's rho rank correlation coefficient ## ## ## ## import sys import random import math #import scipy.stats ## ## Compute the constant C of the Spearman's rho correlation coefficient in the draft ## def compute_C(rank1, rank2): N = len(rank1) [mu1, mu2] = [1.0 * sum(x.values()) / len(x.values()) for x in [rank1, rank2]] [sum1, sum2] = [1.0 * sum(x.values()) for x in [rank1, rank2]] C = N * mu1 * mu2 - mu2 * sum1 - mu1 * sum2 #print mu1, mu2, sum1, sum2, C return C ## ## Compute the constant D of the Spearman's rho correlation coefficient in the draft ## def compute_D(rank1, rank2): [mu1, mu2] = [1.0 * sum(x.values()) / len(x.values()) for x in [rank1, rank2]] s1 = sum([math.pow((x-mu1), 2) for x in rank1.values()]) s2 = sum([math.pow((x-mu2), 2) for x in rank2.values()]) D = math.sqrt(s1 * s2) return D def compute_rho(rank1, rank2, pairing, C, D): rho = 0 for s,t in pairing: rho += rank1[s] * rank2[t] rho = (rho + C) / D return rho if len(sys.argv) < 3: print "Usage: %s []" % sys.argv[0] sys.exit(1) rank1 = {} rank2 = {} lines = open(sys.argv[1], "r").readlines() i = 0 for l in lines: if l[0] == "#" or l.strip(" \n").split(" ") == []: ## Strip comments and empty lines continue r = [float(x) if "." in x or "e" in x else int(x) for x in l.strip(" \n").split(" ")][0] rank1[i] = r i += 1 lines = open(sys.argv[2], "r").readlines() i = 0 for l in lines: if l[0] == "#" or l.strip(" \n").split(" ") == []: ## Strip comments and empty lines continue r = [float(x) if "." in x or "e" in x else int(x) for x in l.strip(" \n").split(" ")][0] rank2[i] = r i += 1 N1 = len(rank1) N2 = len(rank2) if (N1 != N2): print "The two files must have the same number of nodes!!!!!" sys.exit(2) C = compute_C(rank1, rank2) D = compute_D(rank1, rank2) ## We start from a random pairing, and compute the corresponding value of rho pairing = [] if len(sys.argv) > 3: lines = open(sys.argv[3], "r").readlines() for l in lines: if l[0] == "#" or l.strip(" \n").split(" ") == []: continue p1, p2 = [int(x) for x in l.strip(" \n").split(" ")][:2] pairing.append((p1, p2)) else: for i in range (N1): pairing.append((i,i)) if len(pairing) != N1: print "ERROR !!! The pairing should be of the same length of the ranking files!!!" sys.exit(1) rho = compute_rho(rank1, rank2, pairing, C, D) print rho