From 3aee2fd43e3059a699af2b63c6f2395e5a55e515 Mon Sep 17 00:00:00 2001 From: KatolaZ Date: Wed, 27 Sep 2017 15:06:31 +0100 Subject: First commit on github -- NetBunch 1.0 --- src/gn/gn.c | 495 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 495 insertions(+) create mode 100644 src/gn/gn.c (limited to 'src/gn/gn.c') diff --git a/src/gn/gn.c b/src/gn/gn.c new file mode 100644 index 0000000..d69008d --- /dev/null +++ b/src/gn/gn.c @@ -0,0 +1,495 @@ +/** + * This program is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see + * . + * + * (c) Vincenzo Nicosia 2009-2017 -- + * + * This file is part of NetBunch, a package for complex network + * analysis and modelling. For more information please visit: + * + * http://www.complex-networks.net/ + * + * If you use this software, please add a reference to + * + * V. Latora, V. Nicosia, G. Russo + * "Complex Networks: Principles, Methods and Applications" + * Cambridge University Press (2017) + * ISBN: 9781107103184 + * + *********************************************************************** + * + * This program implements the Girvan-Newman algorithm for community + * detection, based on the removal of edges with largest betweenness. + * + * + * References: + * + * [1] M. Girvan and M. E. J. Newman. "Community structure in social + * and biological networks". P. Natl. Acad. Sci. USA 99 (2002), + * 7821--7826. + * + */ + + +#include +#include +#include +#include +#include + +#include "utils.h" + +void usage(char *argv[]){ + + printf("********************************************************************\n" + "** **\n" + "** -*- gn -*- **\n" + "** **\n" + "** Find the communities of the input graph 'graph_in' using **\n" + "** the Girvan-Newman algorithm (successive removal of edges **\n" + "** with high betweeneess). **\n" + "** **\n" + "** The input file 'graph_in' is an edge-list: **\n" + "** **\n" + "** I_1 J_1 **\n" + "** I_2 J_2 **\n" + "** I_3 J_3 **\n" + "** ... ... **\n" + "** I_K J_K **\n" + "** **\n" + "** If 'graph_in' is equal to '-' (dash), read the file from **\n" + "** the standard input (STDIN). **\n" + "** **\n" + "** The program prints on STDOUT the partition corresponding **\n" + "** to the largest value of modularity, in the format: **\n" + "** **\n" + "** node_1 comm_1 **\n" + "** node_2 comm_2 **\n" + "** node_3 comm_3 **\n" + "** ..... **\n" + "** **\n" + "** where 'comm_1' is the community to which 'node_1' belongs. **\n" + "** **\n" + "** The program prints on STDERR the number of communities and **\n" + "** the value of modularity obtained after the removal of each **\n" + "** edge, in the format: **\n" + "** **\n" + "** **\n" + "** ## nc: NUM_COMM Q_max: Q_MAX **\n" + "** nc_1 Q_1 **\n" + "** nc_2 Q_2 **\n" + "** nc_3 Q_3 **\n" + "** ... **\n" + "** **\n" + "** where 'nc_1', 'nc_2', 'nc_3', etc. is the number of **\n" + "** communities (connected components) remaining after the **\n" + "** 1st, 2nd, 3rd, etc. edge has been removed, and 'Q_1', **\n" + "** 'Q_2', 'Q_3', etc. are the value of the modularity **\n" + "** function of the corresponding node partition. The first **\n" + "** output line reports the number of communities NUM_COMM **\n" + "** and corresponding value of modularity Q_MAX of the best **\n" + "** partition found. **\n" + "** **\n" + "********************************************************************\n" + " This is Free Software - You can use and distribute it under \n" + " the terms of the GNU General Public License, version 3 or later\n\n" + " Please visit http://www.complex-networks.net for more information\n\n" + " (c) Vincenzo Nicosia 2009-2017 (v.nicosia@qmul.ac.uk)\n" + "********************************************************************\n\n" + ); + printf("Usage: %s \n", argv[0]); + exit(1); +} + + +void add_predecessor(unsigned int **pred, unsigned int k){ + + (*pred)[0] += 1; + *pred = realloc(*pred, ((*pred)[0] + 1) * sizeof(unsigned int)); + (*pred)[ (*pred)[0] ] = k; +} + + + +/* + * + * Compute node and edge betweenness, based on shortest paths + * originating on the "num" nodes specified in "nlist". "edge_bet" + * should be an appropriately allocated (and initialised to zero!!!!) + * vector of length equal to J_slap, and will contain the values of + * edge betweenness. + * + */ +double* compute_bet_dependency_active(unsigned int N, unsigned int *J_slap, unsigned int *r_slap, + double *edge_bet, char *active){ + + static unsigned int *marked, **preds, *dist, *nj; + static double *delta, *cB; + int i, j, k, w, idx, cur_node; + double val; + unsigned int d; + unsigned int n, nd, ndp; + unsigned int edge_pos; + + if (!dist) + dist = malloc(N * sizeof(unsigned int)); + if (!marked) + marked = malloc(N * sizeof(unsigned int)); + if (!preds) + preds = malloc(N * sizeof(unsigned int *)); + if (!nj) + nj = malloc(N * sizeof(unsigned int)); + if (!delta) + delta = malloc(N * sizeof(double)); + if (!cB) + cB = malloc(N * sizeof(double)); + + for (i=0; i 0){ + for(i = n; i< n+nd; i ++){ + cur_node = marked[i]; + for (k=r_slap[cur_node]; k=1; k--){ + w = marked[k]; + for (idx=1; idx <= preds[w][0]; idx ++ ){ + i = preds[w][idx]; + val = 1.0 * nj[i] / nj[w] * (1 + delta[w]); + delta[i] += val; + /* Now we should update the betweenness of the edge (i,w) in + the appropriate position of the vector edge_bet*/ + find_neigh_in_Jslap(J_slap, r_slap, N, i, w, &edge_pos); + edge_bet[edge_pos] += val; + find_neigh_in_Jslap(J_slap, r_slap, N, w, i, &edge_pos); + edge_bet[edge_pos] += val; + } + cB[w] += delta[w]; + } + } + //free(marked); + return cB; +} + + + +/** + * + * Depth-First search on the node i.... + * + */ +int dfs_active(unsigned int i, unsigned int *J_slap, unsigned int *r_slap, + unsigned int N, unsigned int nc, unsigned int *ic, unsigned int *f, + char reset, char *active){ + + static unsigned int time; + unsigned int j, s; + + if(reset){ + time = 0; + } + + ic[i] = nc; + s = 1; + time += 1; + + for(j=r_slap[i]; j= max && active[i % K]){ + max = v[i % K]; + pos_max = i % K; + } + } + return pos_max; +} + + +unsigned int find_pos_max(double *v, unsigned int K, char *active){ + + unsigned int i; + double max; + unsigned int base, pos_max; + + base = 0; + while(! active[base] ) + base = (base + 1) % K; + max = v[base]; + pos_max = base; + + for(i=base; i= max && active[i % K]){ + max = v[i % K]; + pos_max = i % K; + } + } + return pos_max; +} + + +/* This function compute the modularity function of the partition + 'part'...*/ +double compute_modularity(unsigned int *J_slap, unsigned int *r_slap, unsigned int N, + unsigned int *part, unsigned int nc){ + static double *e, *a; + + unsigned int i, j, n, K; + unsigned int ci, cj; + double Q; + + if(!e) + e = malloc((N+1) * sizeof(double)); + if(!a) + a = malloc((N+1) * sizeof(double)); + + memset(e, 0, (N+1) * sizeof(double)); + memset(a, 0, (N+1) * sizeof(double)); + + K = r_slap[N]; + + for (i=0; i 0){ + if (Q > Q_max){ + Q_max = Q; + nc_max = nc; + memcpy (best_part, ic, N * sizeof(unsigned int)); + } + } + else{ + Q_max = Q; + memcpy(best_part, ic, N*sizeof(unsigned int)); + } + } + /* Return the best partition */ + fprintf(stdout, "### nc: %d Q_max: %g\n", nc_max, Q_max); + free(f); + free(ic); + free(sizes); + free(edge_bet); + free(active); + return best_part; +} + + + +void dump_partition(unsigned int *p, unsigned int N){ + + unsigned int i; + for(i=0; i