/**
* This program is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see
* .
*
* (c) Vincenzo Nicosia 2009-2017 --
*
* This file is part of NetBunch, a package for complex network
* analysis and modelling. For more information please visit:
*
* http://www.complex-networks.net/
*
* If you use this software, please add a reference to
*
* V. Latora, V. Nicosia, G. Russo
* "Complex Networks: Principles, Methods and Applications"
* Cambridge University Press (2017)
* ISBN: 9781107103184
*
***********************************************************************
*
* Enumerate all the three-nodes subgraphs in a directed network, and
* compute the significance of their number with respect to the
* corresponding configuration model ensemble.
*
* References:
*
* [1] R. Milo et al. "Network Motifs: Simple Building Blocks of
* Complex Networks". Science 298 (2002), 824-827.
*
* [2] R. Milo et al. "Superfamilies of evolved and designed
* networks." Science 303 (2004), 1538-1542
*
*/
#include
#include
#include
#include
#include
#include "utils.h"
void usage(char *argv[]){
printf("********************************************************************\n"
"** **\n"
"** -*- f3m -*- **\n"
"** **\n"
"** Count all the 3-node subgraphs of a directed graph given as **\n"
"** input, and compute the relevance (z-score) of each motif **\n"
"** with respect to the corresponding configuration model graph **\n"
"** ensemble. **\n"
"** **\n"
"** The file 'graph_in' contains the edge list of the graph. **\n"
"** **\n"
"** The program prints on STDOUT one line for each of the 13 **\n"
"** motifs, in the format **\n"
"** **\n"
"** motif count mean_rnd std_rnd z-score **\n"
"** **\n"
"** where 'motif' is the motif number (an integer between 1 and **\n"
"** 13), 'count' is the number of subgraphs of that type found **\n"
"** in 'graph_in', 'mean_rnd' is the average number of those **\n"
"** subgraphs found in the randomised realisations of the graph, **\n"
"** 'std_rnd' is the standard deviation associated to 'avg_rnd', **\n"
"** and 'z-score' is the normalised deviation of 'count' from **\n"
"** 'mean_rnd'. **\n"
"** **\n"
"** If the (optional) parameter 'num_random' is provided, use **\n"
"** that number of random realisations to compute the z-score. **\n"
"** **\n"
"********************************************************************\n"
" This is Free Software - You can use and distribute it under \n"
" the terms of the GNU General Public License, version 3 or later\n\n"
" Please visit http://www.complex-networks.net for more information\n\n"
" (c) Vincenzo Nicosia 2009-2017 (v.nicosia@qmul.ac.uk)\n"
"********************************************************************\n\n"
);
printf("Usage: %s []\n", argv[0]);
}
#define MIN(x, y) ((x) < (y) ? (x) : (y))
typedef struct{
unsigned int N;
unsigned int K;
unsigned int *J_slap;
unsigned int *r_slap;
} graph_t;
typedef struct{
double f_count_real[13];
int num_rnd;
double **f_count_rnd;
} mstats_t;
char perm12[3][3] = {{0, 1, 0},
{1, 0, 0},
{0, 0, 1}};
char perm13[3][3] = {{0, 0, 1},
{0, 1, 0},
{1, 0, 0}};
char perm23[3][3] = {{1, 0, 0},
{0, 0, 1},
{0, 1, 0}};
void shuffle_list(unsigned int *v, unsigned int K){
int i, pos;
for(i=K-1; i>=0; i--){
pos = rand() % K;
if (pos != i){
v[i] ^= v[pos];
v[pos] ^= v[i];
v[i] ^= v[pos];
}
}
}
int is_simple_graph(unsigned int *J_slap, unsigned int *r_slap, unsigned int K,
unsigned int N){
int i, j;
for(i=0; i r_slap[i] && J_slap[j] == J_slap[j-1]) /* or a double edge... */
return 0;
}
}
return 1;
}
int is_loop_free(unsigned int *J_slap, unsigned int *r_slap, unsigned int K,
unsigned int N){
int i, j;
for(i=0; i n2){
n1 ^= n2;
n2 ^= n1;
n1 ^= n2;
}
perm = n1 + (n2<<2);
switch(perm){
case (1 + (2<<2)): /* permute 1 with 2 */
apply_perm_3(m, perm12);
break;
case (1 + (3<<2)): /* permute 1 with 3 */
apply_perm_3(m, perm13);
break;
case (2 + (3<<2)): /* permute 2 with 3 */
apply_perm_3(m, perm23);
break;
}
}
/* Load the input graph. We construct two versions of the graph,
i.e. the directed versions G_out ( containing the list of
out-neighbours of each node) and the underlying undirected graph
G_u
N.B.: This is quite inefficient at the moment, since it reads the
file twice, and could be replaced by one call to read_ij and two
appropriate calls to convert_ij2slap.... */
void load_graph(FILE *fin, graph_t *G_u, graph_t *G_out){
/*FIXME!!!! WE CANNOT REWIND THE STANDARD OUTPUT !!!!! */
read_slap(fin, &(G_u->K), &(G_u->N), &(G_u->J_slap), &(G_u->r_slap));
sort_neighbours(G_u->J_slap, G_u->r_slap, G_u->N);
rewind(fin);
read_slap_dir(fin, &(G_out->K), &(G_out->N), &(G_out->J_slap), &(G_out->r_slap));
sort_neighbours(G_out->J_slap, G_out->r_slap, G_out->N);
rewind(fin);
}
void dump_matrix_3(char m[3][3]){
int i, j;
for(i=0; i<3; i++){
for(j=0; j<3; j++){
printf("%d ", m[i][j]);
}
printf("\n");
}
}
int motif_number(char m[3][3]){
char m0[3][3];
char m1[3][3];
char m2[3][3];
char m3[3][3];
int v, v0, v1, v2, v3, v4, v5;
int i,j;
for(i=0; i<3; i++){
for(j=0; j<3; j++){
m0[i][j] = m[i][j];
}
}
if (row_value(m[0]) == 0){
permute_matrix_3(m0, 1, 2);
}
if (row_value(m0[1]) == 0){
permute_matrix_3(m0, 2, 3);
}
for(i=0; i<3; i++){
for(j=0; j<3; j++){
m1[i][j] = m0[i][j];
m2[i][j] = m0[i][j];
m3[i][j] = m0[i][j];
}
}
/* We consider here all the 6 possible permutations... */
/* {0, 1, 2} */
v0 = matrix_value(m0);
/* {1, 0, 2} */
permute_matrix_3(m1, 1, 2);
v1 = matrix_value(m1);
/* {2, 1, 0} */
permute_matrix_3(m2, 1, 3);
v2 = matrix_value(m2);
/* {0, 2, 1} */
permute_matrix_3(m3, 2, 3);
v3 = matrix_value(m3);
/* {1, 2, 0} */
permute_matrix_3(m2, 1, 2);
v4 = matrix_value(m2);
/* {2, 0, 1} */
permute_matrix_3(m3, 1, 2);
v5 = matrix_value(m3);
v = MIN (MIN( MIN( MIN( MIN( v0, v1), v2), v3), v4), v5);
switch(v){
case 6:
return 0;
case 12:
return 1;
case 14:
return 2;
case 36:
return 3;
case 38:
return 4;
case 46:
return 5;
case 74:
return 6;
case 78:
return 7;
case 98:
return 8;
case 102:
return 9;
case 108:
return 10;
case 110:
return 11;
case 238:
return 12;
default:
fprintf(stderr, "No motif with number %d! Exiting\n", v);
dump_matrix_3(m);
exit(5);
}
}
int get_motif_3(int n1, int n2, int n3, graph_t *G_out){
char m[3][3];
unsigned int n[3] = {n1, n2, n3};
int i, j, v;
for(i=0; i<3; i++){
for (j=0; j<3; j++){
if (is_neigh(G_out->J_slap, G_out->r_slap, G_out->N,
n[i], n[j])){
m[i][j] = 1;
}
else{
m[i][j] = 0;
}
}
}
v = motif_number(m);
return v;
}
void find_subgraphs_3(graph_t *G_u, graph_t *G_out, double *f_cnt){
int i, j, k, n1, n2;
int val;
for (i=0; iN; i++){
for(n1 = G_u->r_slap[i]; n1r_slap[i+1]; n1++){
/* j is a first-neighbour of i in G_u */
j = G_u->J_slap[n1];
/* avoid multiple entries in the J_slap vector */
if (n1 > G_u->r_slap[i] && j == G_u->J_slap[n1-1])
continue;
for(n2 = n1+1; n2 < G_u->r_slap[i+1]; n2++){
/* and k is another first neighbour of i in G_u */
k = G_u->J_slap[n2];
/* avoid multiple entries in the J_slap vector */
if (n2 > n1+1 && k == G_u->J_slap[n2-1])
continue;
/* now, if j and k are connected by an edge, we consider this
triangle only if iJ_slap, G_u->r_slap, G_u->N, j, k) &&
(j < i || k < j || k < i)) || (j==k))
continue;
val = get_motif_3(i, j, k, G_out);
f_cnt[val] +=1;
}
}
}
}
void init_graph(graph_t *G1){
G1->J_slap = G1->r_slap = NULL;
}
void init_stats(mstats_t *st, int n_rand){
int i;
st->f_count_rnd = malloc(n_rand * sizeof(double*));
st->num_rnd = n_rand;
for(i=0; i<13; i++){
st->f_count_real[i] = 0;
}
for(i=0; if_count_rnd[i] = malloc(13 * sizeof(double));
memset(st->f_count_rnd[i], 0, 13 * sizeof(double));
}
}
void compute_rnd_st_mean_std(mstats_t *st, double *mean, double *std){
double sum[13], sum2[13];
double val, n;
int i, j;
n = st->num_rnd;
for (i=0; i<13; i++){
sum[i] = sum2[i] = 0;
}
if (n == 0)
return;
for(i=0; if_count_rnd[i][j];
sum[j] += val;
sum2[j] += val*val;
}
}
for(i=0; i<13; i++){
mean[i] = sum[i] / n;
if (sum2[i] > 0)
std[i] = sqrt(sum2[i] * 1.0/(n-1) - 1.0/( n * (n-1)) * sum[i]*sum[i]);
else
std[i] = 0.0;
}
}
void dump_stats(mstats_t *st){
int i;
double v_mean[13], v_std[13], x;
memset(v_mean, 0, 13 * sizeof(double));
memset(v_std, 0, 13 * sizeof(double));
compute_rnd_st_mean_std(st, v_mean, v_std);
for(i=0; i<13; i++){
x = st->f_count_real[i];
if (v_std[i] > 0)
printf("%-2d %12.0f %15.2f %10.3f %+10.3f\n", i+1, x,
v_mean[i], v_std[i], 1.0 * (x - v_mean[i])/v_std[i] );
else
printf("%-2d %12.0f %15.2f %10.3f %+10.3f\n", i+1, x,
v_mean[i], v_std[i], 0.0);
}
}
void randomise_graph(graph_t *G_out, graph_t *RNDG_out, graph_t *RNDG_u){
static unsigned int *I, *J;
static unsigned int I_size, J_size;
unsigned int *tmp;
if (!I || I_size < 2*G_out->K){
tmp = realloc(I, G_out -> K * 2 * sizeof(unsigned int));
VALID_PTR_OR_EXIT(tmp, 3);
I = tmp;
I_size = 2*G_out->K;
}
if (!J || J_size < 2*G_out->K){
tmp = realloc(J, G_out -> K * 2 * sizeof(unsigned int));
VALID_PTR_OR_EXIT(tmp, 3);
J = tmp;
J_size = 2*G_out->K;
}
if (RNDG_out->J_slap){
free(RNDG_out->J_slap);
RNDG_out->J_slap = NULL;
}
RNDG_out->J_slap = sample_conf_model_smart(G_out->J_slap, G_out->r_slap, G_out->K, G_out->N);
tmp = realloc(RNDG_out->r_slap, (G_out->N + 1) * sizeof(unsigned int));
VALID_PTR_OR_EXIT(tmp, 19);
RNDG_out->r_slap = tmp;
memcpy(RNDG_out->r_slap, G_out->r_slap, (G_out->N + 1) * sizeof(unsigned int));
RNDG_out->N = G_out->N;
RNDG_out->K = G_out->K;
convert_slap2ij(RNDG_out->J_slap, RNDG_out->r_slap, RNDG_out->N, I, J);
/* copy J at the end of I */
memcpy(&(I[G_out->K]), J, G_out->K * sizeof(unsigned int));
/* copy I at the end of J */
memcpy(&(J[G_out->K]), I, G_out->K * sizeof(unsigned int));
RNDG_u->N = convert_ij2slap(I, J, 2*G_out->K, & (RNDG_u->r_slap), &(RNDG_u->J_slap));
RNDG_u->K = 2 * G_out->K;
sort_neighbours(RNDG_u->J_slap, RNDG_u->r_slap, RNDG_u->N);
sort_neighbours(RNDG_out->J_slap, RNDG_out->r_slap, RNDG_out->N);
if (!is_loop_free(RNDG_u->J_slap, RNDG_u->r_slap, RNDG_u->K, RNDG_u->N)){
fprintf(stderr, "Error!!!! The undirected version of the graph is not loop-free!!!!\n");
exit(23);
}
}
int main(int argc, char *argv[]){
graph_t G_u, G_out, RNDG_u, RNDG_out;
mstats_t st;
FILE *filein;
unsigned int nr;
int i;
if(argc < 2){
usage(argv);
exit(1);
}
filein = openfile_or_exit(argv[1], "r", 2);
if (argc > 2){
nr = atoi(argv[2]);
}
else{
nr = 0;
}
init_stats(&st, nr);
init_graph(&G_u);
init_graph(&G_out);
load_graph(filein, &G_u, &G_out);
fclose(filein);
find_subgraphs_3(&G_u, &G_out, st.f_count_real);
srand(time(NULL));
/* Now we create n_r random networks with the same degree
distribution, and we perform motifs analysis on each of them */
init_graph(&RNDG_out);
init_graph(&RNDG_u);
for(i=0; i