First commit of MAMMULT code

author: KatolaZ <katolaz@yahoo.it> 2015-10-19 16:23:00 +0100
committer: KatolaZ <katolaz@yahoo.it> 2015-10-19 16:23:00 +0100
commit: df8386f75b0538075d72d52693836bb8878f505b (patch)
tree: 704c2a0836f8b9fd9f470c12b6ae05637c431468 /models/correlations
parent: 363274e79eade464247089c105260bc34940da07 (diff)
7 files changed, 1033 insertions, 0 deletions
diff --git a/models/correlations/Makefile b/models/correlations/Makefile
new file mode 100644
index 0000000..b3828aa
--- /dev/null
+++ b/models/correlations/Makefile
@@ -0,0 +1,15 @@
+CFLAGS="-O3"
+CC="gcc"
+GSL_FLAGS=-lgsl -lgslcblas -lm
+MFLAG=-lm
+
+all: tune_qnn_adaptive tune_rho 
+
+tune_qnn_adaptive: tune_qnn_adaptive.c rank_utils.c fit_utils.c
+	$(CC) $(CFLAGS) -o tune_qnn_adaptive tune_qnn_adaptive.c rank_utils.c fit_utils.c $(GSL_FLAGS)
+
+tune_rho: tune_rho.c rank_utils.c 
+	$(CC) $(CFLAGS) -o tune_rho tune_rho.c rank_utils.c $(MFLAG) 
+
+clean:
+	rm tune_rho tune_qnn_adaptive
diff --git a/models/correlations/fit_utils.c b/models/correlations/fit_utils.c
new file mode 100644
index 0000000..e9e3954
--- /dev/null
+++ b/models/correlations/fit_utils.c
@@ -0,0 +1,382 @@
+/**
+ *
+ * Fit a given sequence with a power-law function
+ *
+ * a*x^{b}
+ *
+ * The fit is actually performed as a linear fit on the
+ * exponential-binned log-log distribution
+ *
+ *
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <gsl/gsl_fit.h>
+
+/**
+ * 
+ * Load a sequence from a file, which contains one element on each line
+ *
+ */
+
+void load_sequence(char *fname, double **v, int *N){
+
+  int size;
+  char buff[256];
+  
+  FILE *f;
+  
+  f = fopen(fname, "r");
+  if (!f){
+    fprintf(stderr, "Error opening file %s!!!! Exiting...\n", fname);
+    exit(1);
+  }
+  
+  *N =0;
+  size = 10;
+  if (*v == NULL)
+    *v = malloc(size * sizeof(double));
+  else{
+    *v = realloc(*v, size * sizeof(double));
+  }
+  
+  while (fgets(buff, 255, f)){
+    (*v)[*N] = atof(buff);
+    *N += 1;
+    if (*N == size){
+      size += 10;
+      *v = realloc(*v, size * sizeof(double));
+    }
+  }
+  *v = realloc(*v, (*N) * sizeof(double));
+  fclose(f);
+}
+
+
+/**
+ *
+ * Load a sequence, getting the "col"-th column of the input file
+ *
+ */
+
+void load_sequence_col(char *fname, double **v, int *N, int col){
+
+  int size, n;
+  char buff[256];
+  char *ptr;
+
+  FILE *f;
+  
+  f = fopen(fname, "r");
+  if (!f){
+    fprintf(stderr, "Error opening file %s!!!! Exiting...\n", fname);
+    exit(1);
+  }
+  
+  *N =0;
+  size = 10;
+  if (*v == NULL)
+    *v = malloc(size * sizeof(double));
+  else{
+    *v = realloc(*v, size * sizeof(double));
+  }
+  
+  while (fgets(buff, 255, f)){
+    ptr = strtok(buff, " ");
+    if (col > 0){
+      n = 0;
+      while (n<col){
+        ptr = strtok(NULL, " ");
+        n += 1;
+      }
+    }
+    (*v)[*N] = atof(ptr);
+    *N += 1;
+
+    if (*N == size){
+      size += 10;
+      *v = realloc(*v, size * sizeof(double));
+    }
+  }
+  *v = realloc(*v, (*N) * sizeof(double));
+  fclose(f);
+}
+
+
+/**
+ *
+ * Make the exponential binning of a distribution, with a giving
+ * exponent alpha. The value of y[i] is the number of elements of v
+ * whose value lies between x[i-1] and x[i]...
+ *
+ */
+
+void exp_bin_cnt(double *v, int N, double alpha, double **x, double **y, int *num){
+  
+  double min_v, max_v, val, last_val;
+  int i, j, size, num_x;
+  double last_size, new_size;
+  
+  min_v = max_v = v[0];
+  
+  for (i=1; i<N; i ++){
+    if (v[i] > max_v)
+      max_v = v[i];
+    else if (v[i] > 0 && v[i] < min_v)
+      min_v = v[i];
+  }
+
+  size = 10;
+  if (*x == NULL){
+    *x = malloc(size * sizeof(double));
+  }
+  else{
+    *x = realloc(*x, size * sizeof(double));
+  }
+  
+  val = min_v;
+  last_size = min_v;
+  (*x)[0] = min_v;
+  num_x = 1;
+
+  while(val < max_v){
+    new_size = last_size * alpha;
+    val = last_size + new_size;
+    last_size = new_size;
+    last_val = val;
+    (*x)[num_x] = val;
+    num_x += 1;
+    if (num_x == size){
+      size += 10;
+      *x = realloc(*x, size * sizeof(double));
+    }
+  }
+  
+  if (*y == NULL){
+    *y = malloc(num_x * sizeof(double));
+  }
+  else{
+    *y = realloc(*y, num_x * sizeof(double));
+  }
+  for (i=0; i < num_x; i++){
+    (*y)[i] = 0;
+  }
+
+  
+
+  for(i=0; i <N; i ++){
+    j = 0;
+    while(v[i] > (*x)[j]){
+      j ++;
+    }
+    (*y)[j] += 1;
+  }
+  *num = num_x;
+}
+
+/**
+ *
+ * Make the exponential binning of a distribution, with a giving
+ * exponent alpha. The value of y[i] is the average of the values in
+ * the vector "w" whose corresponding v lies between x[i-1] and  x[i]...
+ *
+ */
+
+
+void exp_bin_avg(double *v, double *w, int N, double alpha, double **x, double **y, int *num){
+  
+  double min_v, max_v, val, last_val;
+  int i, j, size, num_x;
+  double last_size, new_size;
+  int *cnt;
+
+
+  min_v = max_v = v[0];
+  
+  for (i=1; i<N; i ++){
+    if (v[i] > max_v)
+      max_v = v[i];
+    else if (v[i] > 0 && v[i] < min_v)
+      min_v = v[i];
+  }
+
+  size = 10;
+  if (*x == NULL){
+    *x = malloc(size * sizeof(double));
+  }
+  else{
+    *x = realloc(*x, size * sizeof(double));
+  }
+  
+  val = min_v;
+  last_size = min_v;
+  (*x)[0] = min_v;
+  num_x = 1;
+
+  while(val < max_v){
+    new_size = last_size * alpha;
+    val = last_size + new_size;
+    last_size = new_size;
+    last_val = val;
+    (*x)[num_x] = val;
+    num_x += 1;
+    if (num_x == size){
+      size += 10;
+      *x = realloc(*x, size * sizeof(double));
+    }
+  }
+  
+  
+  cnt = malloc(num_x * sizeof(int));
+  
+  if (*y == NULL){
+    *y = malloc(num_x * sizeof(double));
+  }
+  else{
+    *y = realloc(*y, num_x * sizeof(double));
+  }
+  for (i=0; i < num_x; i++){
+    (*y)[i] = 0;
+    cnt[i] = 0;
+  }
+
+  for(i=0; i <N; i ++){
+    j = 0;
+    while(j < num_x && v[i] > (*x)[j]){
+      j ++;
+    }
+    if(j == num_x){
+      printf("Error!!!!! Trying to assing a non-existing bin!!! -- fit_utils.c:exp_bin_avg!!!\n");
+      exit(37);
+    }
+    (*y)[j] += w[i];
+    cnt[j] += 1;
+  }
+  *num = num_x;
+  
+  for(i=0; i< num_x; i++){
+    if (cnt[i] > 0){
+      (*y)[i] = (*y)[i] / cnt[i];
+    }
+  }
+  free(cnt);
+}
+
+
+/**
+ *
+ * Print a distribution on stdout
+ *
+ */
+
+void dump_distr(double *x, double *y, int N){
+  int i;
+  
+  for(i=0; i<N; i ++){
+    printf("%g %g\n", x[i], y[i]);
+  }
+  
+}
+
+
+/**
+ * Compact a distribution, leaving only the pairs (x_i, y_i) for which
+ * y_i > 0
+ *
+ */
+
+void compact_distr(double *x, double *y, int *num){
+  
+  int i, j;
+  
+  i = j = 0;
+  while(j < *num){
+    while(j < *num && y[j] == 0){
+      j ++;
+    }
+    if (j==*num){
+      break;
+    }
+    x[i] = x[j];
+    y[i] = y[j];
+    i ++;
+    j ++;
+  }
+  *num = i;
+}
+
+
+/**
+ *
+ * Apply the function "f" on all the elemnts of a vector, in-place
+ *
+ */
+
+void map_vec(double *v, int N, double (*f)(double)){
+  int i;
+  
+  for (i=0; i<N; i ++){
+    v[i] = f(v[i]);
+  }
+}
+
+
+/**
+ *
+ * Normalize a distribution, dividing each y[i] for the width of the
+ * corresponding bin (i.e., x[i] - x[i-1])
+ *
+ */
+void normalize_distr(double *x, double *y, int num){
+  
+  int i;
+  
+  for(i=1; i<num; i ++){
+    y[i] /= (x[i] - x[i-1]);
+  }
+}
+
+/**
+ *
+ * take two vectors (k and q) and a pairing, and compute the best
+ * power-law fit "a*k^{mu}" of qnn(k)
+ *
+ */
+
+void fit_current_trend(double *k, double *q, int N, int *pairing, double *mu, double *a, 
+                       double *corr){
+
+  static int  *num;
+  static double *q_pair, *x, *y;
+
+  int i;
+  int fit_num;
+  double c0, c1, cov00, cov01, cov11, sqsum;
+  
+  if (q_pair == NULL){
+    q_pair = malloc(N * sizeof(double));
+  }
+
+  for(i=0; i<N; i++){
+    q_pair[i] = q[pairing[i]];
+  }
+
+  exp_bin_avg(k, q_pair, N, 1.3, &x, &y, &fit_num);
+  //normalize_distr(x,y, fit_num);
+  compact_distr(x, y, &fit_num);
+  
+  map_vec(x, fit_num, log);
+  map_vec(y, fit_num, log);
+  gsl_fit_linear(x, 1, y, 1, fit_num, &c0, &c1, &cov00, &cov01, &cov11, &sqsum);
+
+  //fprintf(stderr,"cov00: %g cov01: %g cov11: %g\n", cov00, cov01, cov11);
+  //fprintf(stderr, "corr: %g ", cov01 / sqrt(cov00 * cov11));
+  *a = c0;
+  *mu = c1;
+  *corr = cov01/sqrt(cov00 * cov11);
+}
+
diff --git a/models/correlations/fit_utils.h b/models/correlations/fit_utils.h
new file mode 100644
index 0000000..183f47c
--- /dev/null
+++ b/models/correlations/fit_utils.h
@@ -0,0 +1,25 @@
+#ifndef __FIT_UTILS_H__
+#define __FIT_UTILS_H__
+
+
+void load_sequence(char *fname, double **v, int *N);
+
+void load_sequence_col(char *fname, double **v, int *N, int col);
+
+void exp_bin_cnt(double *v, int N, double alpha, double **x, double **y, int *num);
+
+void exp_bin_avg(double *v, double *w, int N, double alpha, double **x, double **y, int *num);
+
+void dump_distr(double *x, double *y, int N);
+
+void compact_distr(double *x, double *y, int *num);
+
+void map_vec(double *v, int N, double (*f)(double));
+
+void normalize_distr(double *x, double *y, int num);
+
+void fit_current_trend(double *k, double *q, int N, int *pairing, 
+                       double *mu, double *a, double *corr);
+
+
+#endif /* __FIT_UTILS_H__ */
diff --git a/models/correlations/rank_utils.c b/models/correlations/rank_utils.c
new file mode 100644
index 0000000..4b8ef41
--- /dev/null
+++ b/models/correlations/rank_utils.c
@@ -0,0 +1,217 @@
+/**
+ *
+ * Some utility functions to be used with tune_rho, compute_rho and
+ * the like
+ *
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+#include <time.h>
+
+#include "rank_utils.h"
+
+void load_ranking(char *fname, int *N, double **R){
+  
+  char buff[256];
+  int size = 10;
+  FILE *f;
+  
+  if (*R == NULL){
+    *R = malloc(size * sizeof (double));
+  }
+  f = fopen(fname, "r");
+  if (!f){
+    printf("Unable to open file: %s!!! Exiting\n");
+    exit(1);
+  }
+  
+  *N = 0;
+ 
+  while (fgets(buff, 255, f)){
+    if (* N == size){
+      size += 10;
+      *R = realloc(*R, size * sizeof(double));
+    }
+    (*R)[*N] = atof(buff);
+    *N += 1;
+  }
+}
+
+double avg_array(double *v, int N){
+  
+  double sum = 0.0;
+  int i;
+  
+  for (i = 0; i < N; i ++){
+    sum += v[i];
+  }
+  return sum/N;
+}
+
+double compute_C(double *R1, double *R2, int N){
+  double mu1, mu2, sum1, sum2;
+
+  mu1 = avg_array(R1, N);
+  mu2 = avg_array(R2, N);
+  
+  sum1 = mu1 * N;
+  sum2 = mu2 * N;
+  
+  return N * mu1 * mu2 - mu2 * sum1 - mu1 * sum2;
+}
+
+
+double compute_D(double *R1, double *R2, int N){
+  
+  double mu1, mu2, s1, s2;
+  int i;
+
+  mu1 = avg_array(R1, N);
+  mu2 = avg_array(R1, N);
+  
+  s1 = s2 = 0.0;
+  
+  for (i=0 ; i < N; i ++){
+    s1 += pow((R1[i] - mu1), 2);
+    s2 += pow((R2[i] - mu2), 2);
+  }
+  
+  return sqrt(s1 * s2);
+}
+
+
+double compute_rho(double *R1, double *R2, int N, int *pairing){
+  
+  double rho = 0;
+  int i;
+  
+  for (i=0; i < N; i ++){
+    rho += R1[i] * R2[ pairing[i] ];
+  }
+
+  rho = (rho + compute_C(R1, R2, N))/ compute_D(R1, R2, N);
+  return rho;
+}
+
+void dump_ranking(double *R, int N){
+  int i;
+
+  for (i=0; i < N; i ++){
+    printf("%d: %f\n", i, R[i] );
+  }
+}
+
+
+void init_pairing_natural(int *pairing, int N){
+  int i;
+
+  for (i = 0; i< N; i ++){
+    pairing[i] = i;
+  }
+}
+
+void init_pairing_inverse(int *pairing, int N){
+  int i;
+
+  for (i = 0; i< N; i ++){
+    pairing[i] = N-i-1;
+  }
+}
+
+void select_pairing(int *pairing, int N, int argc, char *argv[], int pos){
+
+  if (argc < pos + 1 || !strncasecmp("rnd", argv[pos], 3)){
+    init_pairing_random(pairing, N);
+  }
+  else if (!strncasecmp("nat", argv[pos], 3)){
+    init_pairing_natural(pairing, N);
+  }
+  else if (!strncasecmp("inv", argv[pos], 3)){
+    init_pairing_inverse(pairing, N);
+  }
+  else{
+    printf ("Pairing strategy \"%s\" unknown!!! Exiting...\n", argv[pos]);
+    exit(1);
+  }
+
+}
+
+
+void shuffle_sequence(int *s, int N){
+  
+  int i, j, tmp;
+
+  for (i=N-1; i>=0; i--){
+    j = rand() % (i+1);
+    tmp = s[j];
+    s[j] = s[i];
+    s[i] = tmp;
+  }
+}
+
+
+
+void init_pairing_random(int *pairing, int N){
+
+  init_pairing_natural(pairing, N);
+  shuffle_sequence(pairing, N);
+
+}
+
+/* Loads a pairing from a file, in the format:
+ * 
+ * rank1 rank2
+ * ...........
+ */
+void load_pairing(int **pairing, int N, char *fname){
+  
+  FILE *f;
+  int i, j, num;
+  char buff[256];
+  char *ptr;
+
+  f = fopen(fname, "r");
+  if (!f){
+    printf("Error opening file \"%s\"!!!! Exiting....\n", fname);
+    exit(2);
+  }
+
+  if (*pairing == NULL){
+    *pairing = malloc(N * sizeof(int));
+    init_pairing_natural(*pairing, N);
+  }
+  
+  num = 0;
+  while(num < N){
+    fgets(buff, 255, f);
+    if (buff[0] == '#')
+      continue;
+    ptr = strtok(buff, " "); /* read the first node */
+    i = atoi(ptr);
+    ptr = strtok(NULL, " "); /* read the second node */
+    j = atoi(ptr);
+    (*pairing)[i] = j;
+    num += 1;
+  }
+}
+
+
+void dump_pairing(int *pairing, int N){
+  int i;
+
+  for(i=0; i< N; i ++){
+    printf("%d %d\n", i, pairing[i]);
+  }
+}
+
+void copy_pairing(int *p1, int *p2, int N){
+  int i;
+  
+  for (i=0 ; i < N; i ++){
+    p2[i] = p1[i];
+  }
+}
diff --git a/models/correlations/rank_utils.h b/models/correlations/rank_utils.h
new file mode 100644
index 0000000..521582a
--- /dev/null
+++ b/models/correlations/rank_utils.h
@@ -0,0 +1,44 @@
+#ifndef __RANK_UTILS_H__
+#define __RANK_UTILS_H__
+
+/* Load a ranking */
+void load_ranking(char *fname, int *N, double **R);
+
+/* Compute the average of an array */
+double avg_array(double *v, int N);
+
+/* Compute the term "C" in the rho correlation coefficient */
+double compute_C(double *R1, double *R2, int N);
+
+/* Compute the term "D" in the rho correlation coefficient */
+double compute_D(double *R1, double *R2, int N);
+
+/* Compute the Spearman's rank correlation coefficient \rho */
+double compute_rho(double *R1, double *R2, int N, int *pairing);
+
+void dump_ranking(double *R, int N);
+
+void init_pairing_natural(int *pairing, int N);
+
+void init_pairing_inverse(int *pairing, int N);
+
+void init_pairing_random(int *pairing, int N);
+
+void select_pairing(int *pairing, int N, int argc, char *argv[], int pos);
+
+void shuffle_sequence(int *s, int N);
+
+void dump_pairing(int *pairing, int N);
+
+void copy_pairing(int *pairing1, int *pairing2, int N);
+
+
+#endif /* __RANK_UTILS_H__ */
+
+
+
+
+
+
+
+
diff --git a/models/correlations/tune_qnn_adaptive.c b/models/correlations/tune_qnn_adaptive.c
new file mode 100644
index 0000000..71643a1
--- /dev/null
+++ b/models/correlations/tune_qnn_adaptive.c
@@ -0,0 +1,208 @@
+/**
+ *
+ * Tune the inter-layer degree correlation function \bar{q}(k) of a
+ * two-layer multiplex, in order to make it as similar as possible to
+ * a power law with exponent $\mu$, where $\mu$ is given as input
+ *
+ * This version of the program is (or, better, "shoud be") able to
+ * tune also the value of the pre-factor "a"
+ *
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <strings.h>
+#include <time.h>
+
+#include "rank_utils.h"
+
+
+
+
+inline double compute_delta(double q, double k, double mu, double a){
+  
+  return fabs(log(q) - mu * log(k) - log(a));
+  //return fabs (q - a*pow(k,mu));
+}
+
+
+
+
+
+
+void tune_qnn_adaptive(double *R1, double *R2, int N, int *pairing, double eps, 
+              double beta, double mu_target){
+  
+  double act_mu, test_mu, F, F_new, F_old;
+  double delta1_old, delta2_old, delta1_new, delta2_new;
+  double val, prob;
+  double mu, a, err, dummy_a;
+  int *new_pairing;
+  int p1, p2, tmp_val;
+  int tot;
+  char swap = 0;
+ 
+  new_pairing = malloc(N * sizeof(int));
+  copy_pairing(pairing, new_pairing, N);
+
+  a = 1.0;
+  
+  F = 10000;
+  
+  //fprintf("%f %f %f %f %f\n", eps, beta, mu_target, act_mu, F);
+  
+  tot = 0;
+  while (F > eps){
+    /* sample two positions of "pairing" and shuffle them in "new_pairing" */
+
+    p1 = rand() % N;
+    p2 = rand() % N;
+    while (p2 == p1){
+      p2 = rand() % N;
+    }
+    tmp_val = new_pairing[p1];
+    new_pairing[p1] = new_pairing[p2];
+    new_pairing[p2] = tmp_val;
+    
+    delta1_old = compute_delta(R2[pairing[p1]], R1[p1], mu_target, a);
+    delta2_old = compute_delta(R2[pairing[p2]], R1[p2], mu_target, a);
+    
+    delta1_new = compute_delta(R2[new_pairing[p1]], R1[p1], mu_target, a);
+    delta2_new = compute_delta(R2[new_pairing[p2]], R1[p2], mu_target, a);
+    
+    //F_new = log(delta1_new * delta2_new);
+    //F_old = log(delta1_old * delta2_old);
+    
+    F_new = delta1_new + delta2_new;
+    F_old = delta1_old + delta2_old; 
+    
+    
+    if (F_new <= F_old){/* Accept this swap with probability 1 */
+      tmp_val = pairing[p1];
+      pairing[p1] = pairing[p2];
+      pairing[p2] = tmp_val;
+      //act_mu = test_mu;
+      swap = 1;
+    }
+    else{/* Accept the swap with a certain probability */
+      val = 1.0 * rand() / RAND_MAX;
+
+      //prob = pow(fabs(F - F_new)/(F+F_new), beta);
+      prob = exp(-(F_new - F_old)/beta);
+      //fprintf(stderr, "-- %g %g %g -> %f \n", F_new, F_old, F_new - F_old, prob);
+      if (val < prob){ /* Accept the swap */
+        tmp_val = pairing[p1];
+        pairing[p1] = pairing[p2];
+        pairing[p2] = tmp_val;
+        //act_mu = test_mu;
+        swap = 1;
+      }
+      else{ /* Rollback the swap */
+        tmp_val = new_pairing[p1];
+        new_pairing[p1] = new_pairing[p2];
+        new_pairing[p2] = tmp_val;
+        swap = 0;
+      }
+      
+    }
+    
+
+    ///F = fabs(act_mu - mu_target);
+    ///if (tot % 200 == 0){
+    //fprintf(stderr, "%d %g\n", tot, act_mu);
+
+    //fprintf(stderr, "%d: %f %f ---- %f %f ---- %f %f ---- %d \n",
+    //tot, delta1_old, delta2_old, delta1_new, delta2_new, F_old, F_new, swap);
+    
+    ///}
+    tot += 1;
+    if (tot %200 == 0){
+      fit_current_trend(R1, R2, N, pairing, &mu, &a, &err);
+      fprintf(stderr, "mu: %g a: %g corr: %g\n", mu, a, err);
+      //a = a - 0.01 *(a - exp(a));
+      a = exp(a);
+    }
+    fit_current_trend(R1, R2, N, pairing, &mu, &dummy_a, &err);
+    F = fabs(mu - mu_target);
+  }
+}
+
+void dump_qnn(double *R1, double *R2, int N, int *pairing){
+  
+  int i;
+  int *qnn, *num;
+
+  qnn = malloc(N * sizeof(int));
+  num = malloc(N * sizeof(int));
+
+  for (i=0; i<N; i++){
+    qnn[i]=0;
+    num[i]=0;
+  }
+  
+  for (i=0; i<N; i++){
+    qnn[(int)(R1[i])] += R2[pairing[i]];
+    num[(int)(R1[i])] += 1;
+  }
+  
+  for(i=0; i<N; i++){
+    if (num[i] >0){
+      printf("%d %f\n", i, 1.0*qnn[i]/num[i]);
+    }
+  }
+  free(num);
+  free(qnn);
+}
+
+
+void dump_degs(double *R1, double *R2, int N, int *pairing){
+
+  int i;
+
+  for(i=0; i<N; i++){
+    printf("%g %g\n", R1[i], R2[pairing[i]]);
+  }
+
+}
+
+int main (int argc, char *argv[]){
+
+  int N1, N2;
+  double *R1 = NULL;
+  double *R2 = NULL;
+  double eps, beta, mu_target;
+  
+  int *pairing = NULL;
+
+  srand(time(NULL));
+  
+  if (argc < 6){
+    printf("Usage: %s <degs1> <degs2> <mu> <eps> <beta> [RND|NAT|INV]\n", argv[0]);
+    exit(1);
+  }
+  
+  mu_target = atof(argv[3]);
+  eps = atof(argv[4]);
+  beta = atof(argv[5]);
+  
+  load_ranking(argv[1], &N1, &R1);
+  load_ranking(argv[2], &N2, &R2);
+  
+  if (N1 != N2){
+    printf("Error!!!! The two files must have the same number of lines!!!! Exiting...\n");
+    exit(1);
+  }
+  
+  pairing = malloc(N1 * sizeof(int));
+
+  select_pairing(pairing, N1, argc, argv, 6);
+  
+  tune_qnn_adaptive(R1, R2, N1, pairing, eps, beta, mu_target);
+
+  dump_pairing(pairing, N1);
+  //dump_qnn(R1, R2, N1, pairing);
+  //dump_degs(R1, R2, N1, pairing);
+  
+}
diff --git a/models/correlations/tune_rho.c b/models/correlations/tune_rho.c
new file mode 100644
index 0000000..4495b0e
--- /dev/null
+++ b/models/correlations/tune_rho.c
@@ -0,0 +1,142 @@
+/**
+ *
+ * Tune the Spearman's \rho correlation coefficient between two rankings
+ * given as input, using a simulated-anneling procedure
+ *
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <strings.h>
+#include <time.h>
+
+#include "rank_utils.h"
+
+//void select_pairing(int *pairing, int N, int argc, char *argv[]){
+//
+//  if (argc < 7 || !strncasecmp("rnd", argv[6], 3)){
+//    init_pairing_random(pairing, N);
+//  }
+//  else if (!strncasecmp("nat", argv[6], 3)){
+//    init_pairing_natural(pairing, N);
+//  }
+//  else if (!strncasecmp("inv", argv[6], 3)){
+//    init_pairing_inverse(pairing, N);
+//  }
+//  else{
+//    printf ("Pairing strategy \"%s\" unknown!!! Exiting...\n", argv[6]);
+//    exit(1);
+//  }
+//
+//}
+
+void tune_rho(double *R1, double *R2, int N, int *pairing, double eps, 
+              double beta, double rho_target){
+  
+  double act_rho, test_rho, F, F_new;
+  double val, prob;
+  int *new_pairing;
+  int p1, p2, tmp_val;
+  int tot;
+ 
+  new_pairing = malloc(N * sizeof(int));
+  copy_pairing(pairing, new_pairing, N);
+  
+  act_rho = compute_rho(R1, R2, N, pairing);
+  
+  F = fabs(act_rho - rho_target);
+  
+  //fprintf("%f %f %f %f %f\n", eps, beta, rho_target, act_rho, F);
+  
+  tot = 0;
+  while (F > eps){
+    /* sample two positions of "pairing" and shuffle them in "new_pairing" */
+    p1 = rand() % N;
+    p2 = rand() % N;
+    while (p2 == p1){
+      p2 = rand() % N;
+    }
+    tmp_val = new_pairing[p1];
+    new_pairing[p1] = new_pairing[p2];
+    new_pairing[p2] = tmp_val;
+    test_rho = compute_rho(R1, R2, N, new_pairing);
+    F_new = fabs(test_rho - rho_target);
+    if (F_new < F){/* Accept this swap with probability 1 */
+      tmp_val = pairing[p1];
+      pairing[p1] = pairing[p2];
+      pairing[p2] = tmp_val;
+      act_rho = test_rho;
+    }
+    else{/* Accept the swap with a certain probability */
+      val = 1.0 * rand() / RAND_MAX;
+
+      //prob = pow(fabs(F - F_new)/(F+F_new), beta);
+      prob = exp(-(F_new - F)/beta);
+      //fprintf(stderr, "-- %f\n", prob);
+      if (val < prob){ /* Accept the swap */
+        tmp_val = pairing[p1];
+        pairing[p1] = pairing[p2];
+        pairing[p2] = tmp_val;
+        act_rho = test_rho;
+      }
+      else{ /* Rollback the swap */
+        tmp_val = new_pairing[p1];
+        new_pairing[p1] = new_pairing[p2];
+        new_pairing[p2] = tmp_val;
+      }
+      
+    }
+    F = fabs(act_rho - rho_target);
+    if (tot % 200 == 0){
+      fprintf(stderr, "%d %g\n", tot, act_rho);
+    }
+    tot += 1;
+  }
+}
+
+
+
+
+int main (int argc, char *argv[]){
+
+  int N1, N2;
+  double *R1 = NULL;
+  double *R2 = NULL;
+  double eps, beta, rho, rho_target;
+  
+  int *pairing = NULL;
+
+  srand(time(NULL));
+  
+  if (argc < 6){
+    printf("Usage: %s <rank1> <rank2> <rho> <eps> <beta> [RND|NAT|INV]\n", argv[0]);
+    exit(1);
+  }
+  
+  rho_target = atof(argv[3]);
+  eps = atof(argv[4]);
+  beta = atof(argv[5]);
+  
+  load_ranking(argv[1], &N1, &R1);
+  load_ranking(argv[2], &N2, &R2);
+  
+  if (N1 != N2){
+    printf("Error!!!! The two files must have the same number of lines!!!! Exiting...\n");
+    exit(1);
+  }
+  
+  pairing = malloc(N1 * sizeof(int));
+
+  select_pairing(pairing, N1, argc, argv, 6);
+  
+  rho = compute_rho(R1, R2, N1, pairing);
+  
+  fprintf(stderr, "%g\n", rho);
+  
+  tune_rho(R1, R2, N1, pairing, eps, beta, rho_target);
+
+  dump_pairing(pairing, N1);
+  
+}
author	KatolaZ <katolaz@yahoo.it>	2015-10-19 16:23:00 +0100
committer	KatolaZ <katolaz@yahoo.it>	2015-10-19 16:23:00 +0100
commit	df8386f75b0538075d72d52693836bb8878f505b (patch)
tree	704c2a0836f8b9fd9f470c12b6ae05637c431468 /models/correlations
parent	363274e79eade464247089c105260bc34940da07 (diff)