/**
* This program is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see
* .
*
* (c) Vincenzo Nicosia 2009-2017 --
*
* This file is part of NetBunch, a package for complex network
* analysis and modelling. For more information please visit:
*
* http://www.complex-networks.net/
*
* If you use this software, please add a reference to
*
* V. Latora, V. Nicosia, G. Russo
* "Complex Networks: Principles, Methods and Applications"
* Cambridge University Press (2017)
* ISBN: 9781107103184
*
***********************************************************************
*
* This program finds the communities in a graph using the
* label-propagation algorithm proposed by Raghavan, Albert, and
* Kumara.
*
* References:
*
* [1] U. N. Raghavan, R. Albert, and S. Kumara. "Near linear time
* algorithm to detect community structures in large-scale
* networks". Phys. Rev. E 76 (2007), 036106.
*
*/
#include
#include
#include
#include
#include
#include "iltree.h"
#include "utils.h"
typedef struct{
int label;
int freq;
} label_freq_t;
#define MODE_SYNC 0x0
#define MODE_ASYNC 0x1
/* Usage */
void usage(char *argv[]){
printf("********************************************************************\n"
"** **\n"
"** -*- label_prop -*- **\n"
"** **\n"
"** Find the communities in 'graph_in' using the label **\n"
"** propagation algorithm. **\n"
"** **\n"
"** The first parameter is used to choose between synchronous **\n"
"** (SYNC) and asynchronous (ASYNC) update. **\n"
"** **\n"
"** The input file 'graph_in' is an edge-list. **\n"
"** If 'graph_in' is equal to '-' (dash), read the file from **\n"
"** the standard input (STDIN). **\n"
"** **\n"
"** If 'max_epochs' is specified, the program stops after **\n"
"** 'max_epochs' epochs (useful in conjunction with SYNC, to **\n"
"** exit from loops). **\n"
"** **\n"
"** The program prints on STDOUT the partition obtained when **\n"
"** no more label flips are possible, in the format: **\n"
"** **\n"
"** node_1 comm_1 **\n"
"** node_2 comm_2 **\n"
"** node_3 comm_3 **\n"
"** ..... **\n"
"** **\n"
"** where 'comm_1' is the community to which 'node_1' belongs. **\n"
"** **\n"
"** The program prints on STDERR one line for each epoch, **\n"
"** in the format: **\n"
"** **\n"
"** epoch_1 Q_1 flips_1 **\n"
"** epoch_2 Q_2 flips_2 **\n"
"** ..... **\n"
"** **\n"
"** where 'epoch_i' is the epoch number, 'Q_i' is the modularity **\n"
"** of the partition found at that epoch, and 'flips_i' is the **\n"
"** number of label flips occurred in 'epoch_i'. **\n"
"** **\n"
"********************************************************************\n"
" This is Free Software - You can use and distribute it under \n"
" the terms of the GNU General Public License, version 3 or later\n\n"
" Please visit http://www.complex-networks.net for more information\n\n"
" (c) Vincenzo Nicosia 2009-2017 (v.nicosia@qmul.ac.uk)\n"
"********************************************************************\n\n"
);
printf("Usage: %s [SYNC|ASYNC] []\n\n" , argv[0]);
}
/* Compare the frequency of two labels and return a value which allows
to sort them in reverse order (i.e., -v, if v=f1-f2) */
int compare_label_freq_reverse(const void *e1, const void *e2){
label_freq_t v1, v2;
v1 = *((label_freq_t*)e1);
v2 = *((label_freq_t*)e2);
return - (v1.freq - v2.freq);
}
/* get the most common label in neighs (that is the list of the k
neighbours of a node) */
unsigned int get_most_common_label(unsigned int *neighs, unsigned int k,
unsigned int *labels, unsigned int ref_label,
int *is_max){
static label_freq_t *neigh_labels = NULL;
static int size = 0;
int num, i, j, max_freq;
if (size < k){
size = k;
neigh_labels = realloc(neigh_labels, size * sizeof(label_freq_t));
}
neigh_labels[0].label = labels[neighs[0]];
neigh_labels[0].freq = 1;
num = 1;
for (i=1; i increase the counter */
neigh_labels[j].freq += 1;
}
}
/* Now we sort the array neigh_labels */
qsort(neigh_labels, num, sizeof(label_freq_t), compare_label_freq_reverse);
/* we determine how many neighbours have the maximum freq*/
max_freq = neigh_labels[0].freq;
i = 1;
while(i < num && neigh_labels[i].freq == max_freq){
i ++;
}
/* check whether ref_label is one of the most common labels */
*is_max = 0;
for (j=0; j 0 && epochs > max_epochs)
break;
cont = 0;
if (epochs > 0){
Q= modularity(J_slap, r_slap, N, K, labels, N);
fprintf(stderr, "%d %g %g\n", epochs, Q, (double)num_flips);
}
num_flips = 0;
epochs += 1;
for (i=N-1; i>=0; i--){
j = rand() % (i+1);
tmp = ids[j]; /* This is the id to be considered */
ids[j] = ids[i];
ids[i] = tmp;
k = r_slap[tmp + 1] - r_slap[tmp];
new_label = get_most_common_label(J_slap+r_slap[tmp], k,labels, labels[tmp], &is_max);
/* Stop criterion: if the new label is not equal to the
old one, continue to another epoch */
if (mode == MODE_ASYNC && labels[tmp] != new_label){
labels[tmp] = new_label;
cont = 1;
num_flips += 1 ;
}
if (mode == MODE_SYNC){
next_labels[tmp] = new_label;
if (labels[tmp] != next_labels[tmp]){
cont = 1;
num_flips += 1;
}
}
}
if (mode == MODE_SYNC){
/* Now we can swap labels and next_labels */
tmp_labels = labels;
labels = next_labels;
next_labels = tmp_labels;
}
}
free(ids);
*num_epochs = epochs - 1;
if (mode == MODE_SYNC){
free(next_labels);
}
return labels;
}
int main(int argc, char *argv[]){
unsigned int N, K, nc;
unsigned int *J_slap, *r_slap, *labels, *label_count, num_epochs, max_epochs;
FILE *filein;
double Q;
char mode;
if (argc < 3){
usage(argv);
exit(1);
}
srand(time(NULL));
if (!strcmp(argv[1], "-")){
/* take the input from STDIN */
filein = stdin;
}
else {
filein = openfile_or_exit(argv[2], "r", 2);
}
read_slap(filein, &K, &N, &J_slap, &r_slap);
fclose(filein);
if (!my_strcasecmp(argv[1], "sync")){
mode = MODE_SYNC;
}
else{
mode = MODE_ASYNC;
}
if (argc > 3)
max_epochs = atoi(argv[3]);
else
max_epochs = 0;
labels = label_propagation(J_slap, r_slap, N, K, &num_epochs, max_epochs, mode);
label_count = malloc(N * sizeof(unsigned int));
nc = normalise_labels(labels, N, label_count);
Q= modularity(J_slap, r_slap, N, K, labels, nc);
printf("### nc: %d Q_max: %f Epochs: %d\n", nc, Q, num_epochs);
dump_partition(labels, label_count, N);
free(J_slap);
free(r_slap);
free(label_count);
free(labels);
}