/** * This program is free software: you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see * . * * (c) Vincenzo Nicosia 2009-2017 -- * * This file is part of NetBunch, a package for complex network * analysis and modelling. For more information please visit: * * http://www.complex-networks.net/ * * If you use this software, please add a reference to * * V. Latora, V. Nicosia, G. Russo * "Complex Networks: Principles, Methods and Applications" * Cambridge University Press (2017) * ISBN: 9781107103184 * *********************************************************************** * * This program finds the communities in a graph using the * label-propagation algorithm proposed by Raghavan, Albert, and * Kumara. * * References: * * [1] U. N. Raghavan, R. Albert, and S. Kumara. "Near linear time * algorithm to detect community structures in large-scale * networks". Phys. Rev. E 76 (2007), 036106. * */ #include #include #include #include #include #include "iltree.h" #include "utils.h" typedef struct{ int label; int freq; } label_freq_t; #define MODE_SYNC 0x0 #define MODE_ASYNC 0x1 /* Usage */ void usage(char *argv[]){ printf("********************************************************************\n" "** **\n" "** -*- label_prop -*- **\n" "** **\n" "** Find the communities in 'graph_in' using the label **\n" "** propagation algorithm. **\n" "** **\n" "** The first parameter is used to choose between synchronous **\n" "** (SYNC) and asynchronous (ASYNC) update. **\n" "** **\n" "** The input file 'graph_in' is an edge-list. **\n" "** If 'graph_in' is equal to '-' (dash), read the file from **\n" "** the standard input (STDIN). **\n" "** **\n" "** If 'max_epochs' is specified, the program stops after **\n" "** 'max_epochs' epochs (useful in conjunction with SYNC, to **\n" "** exit from loops). **\n" "** **\n" "** The program prints on STDOUT the partition obtained when **\n" "** no more label flips are possible, in the format: **\n" "** **\n" "** node_1 comm_1 **\n" "** node_2 comm_2 **\n" "** node_3 comm_3 **\n" "** ..... **\n" "** **\n" "** where 'comm_1' is the community to which 'node_1' belongs. **\n" "** **\n" "** The program prints on STDERR one line for each epoch, **\n" "** in the format: **\n" "** **\n" "** epoch_1 Q_1 flips_1 **\n" "** epoch_2 Q_2 flips_2 **\n" "** ..... **\n" "** **\n" "** where 'epoch_i' is the epoch number, 'Q_i' is the modularity **\n" "** of the partition found at that epoch, and 'flips_i' is the **\n" "** number of label flips occurred in 'epoch_i'. **\n" "** **\n" "********************************************************************\n" " This is Free Software - You can use and distribute it under \n" " the terms of the GNU General Public License, version 3 or later\n\n" " Please visit http://www.complex-networks.net for more information\n\n" " (c) Vincenzo Nicosia 2009-2017 (v.nicosia@qmul.ac.uk)\n" "********************************************************************\n\n" ); printf("Usage: %s [SYNC|ASYNC] []\n\n" , argv[0]); } /* Compare the frequency of two labels and return a value which allows to sort them in reverse order (i.e., -v, if v=f1-f2) */ int compare_label_freq_reverse(const void *e1, const void *e2){ label_freq_t v1, v2; v1 = *((label_freq_t*)e1); v2 = *((label_freq_t*)e2); return - (v1.freq - v2.freq); } /* get the most common label in neighs (that is the list of the k neighbours of a node) */ unsigned int get_most_common_label(unsigned int *neighs, unsigned int k, unsigned int *labels, unsigned int ref_label, int *is_max){ static label_freq_t *neigh_labels = NULL; static int size = 0; int num, i, j, max_freq; if (size < k){ size = k; neigh_labels = realloc(neigh_labels, size * sizeof(label_freq_t)); } neigh_labels[0].label = labels[neighs[0]]; neigh_labels[0].freq = 1; num = 1; for (i=1; i increase the counter */ neigh_labels[j].freq += 1; } } /* Now we sort the array neigh_labels */ qsort(neigh_labels, num, sizeof(label_freq_t), compare_label_freq_reverse); /* we determine how many neighbours have the maximum freq*/ max_freq = neigh_labels[0].freq; i = 1; while(i < num && neigh_labels[i].freq == max_freq){ i ++; } /* check whether ref_label is one of the most common labels */ *is_max = 0; for (j=0; j 0 && epochs > max_epochs) break; cont = 0; if (epochs > 0){ Q= modularity(J_slap, r_slap, N, K, labels, N); fprintf(stderr, "%d %g %g\n", epochs, Q, (double)num_flips); } num_flips = 0; epochs += 1; for (i=N-1; i>=0; i--){ j = rand() % (i+1); tmp = ids[j]; /* This is the id to be considered */ ids[j] = ids[i]; ids[i] = tmp; k = r_slap[tmp + 1] - r_slap[tmp]; new_label = get_most_common_label(J_slap+r_slap[tmp], k,labels, labels[tmp], &is_max); /* Stop criterion: if the new label is not equal to the old one, continue to another epoch */ if (mode == MODE_ASYNC && labels[tmp] != new_label){ labels[tmp] = new_label; cont = 1; num_flips += 1 ; } if (mode == MODE_SYNC){ next_labels[tmp] = new_label; if (labels[tmp] != next_labels[tmp]){ cont = 1; num_flips += 1; } } } if (mode == MODE_SYNC){ /* Now we can swap labels and next_labels */ tmp_labels = labels; labels = next_labels; next_labels = tmp_labels; } } free(ids); *num_epochs = epochs - 1; if (mode == MODE_SYNC){ free(next_labels); } return labels; } int main(int argc, char *argv[]){ unsigned int N, K, nc; unsigned int *J_slap, *r_slap, *labels, *label_count, num_epochs, max_epochs; FILE *filein; double Q; char mode; if (argc < 3){ usage(argv); exit(1); } srand(time(NULL)); if (!strcmp(argv[1], "-")){ /* take the input from STDIN */ filein = stdin; } else { filein = openfile_or_exit(argv[2], "r", 2); } read_slap(filein, &K, &N, &J_slap, &r_slap); fclose(filein); if (!my_strcasecmp(argv[1], "sync")){ mode = MODE_SYNC; } else{ mode = MODE_ASYNC; } if (argc > 3) max_epochs = atoi(argv[3]); else max_epochs = 0; labels = label_propagation(J_slap, r_slap, N, K, &num_epochs, max_epochs, mode); label_count = malloc(N * sizeof(unsigned int)); nc = normalise_labels(labels, N, label_count); Q= modularity(J_slap, r_slap, N, K, labels, nc); printf("### nc: %d Q_max: %f Epochs: %d\n", nc, Q, num_epochs); dump_partition(labels, label_count, N); free(J_slap); free(r_slap); free(label_count); free(labels); }