diff options
author | KatolaZ <katolaz@yahoo.it> | 2015-10-19 16:23:00 +0100 |
---|---|---|
committer | KatolaZ <katolaz@yahoo.it> | 2015-10-19 16:23:00 +0100 |
commit | df8386f75b0538075d72d52693836bb8878f505b (patch) | |
tree | 704c2a0836f8b9fd9f470c12b6ae05637c431468 /models/correlations | |
parent | 363274e79eade464247089c105260bc34940da07 (diff) |
First commit of MAMMULT code
Diffstat (limited to 'models/correlations')
-rw-r--r-- | models/correlations/Makefile | 15 | ||||
-rw-r--r-- | models/correlations/fit_utils.c | 382 | ||||
-rw-r--r-- | models/correlations/fit_utils.h | 25 | ||||
-rw-r--r-- | models/correlations/rank_utils.c | 217 | ||||
-rw-r--r-- | models/correlations/rank_utils.h | 44 | ||||
-rw-r--r-- | models/correlations/tune_qnn_adaptive.c | 208 | ||||
-rw-r--r-- | models/correlations/tune_rho.c | 142 |
7 files changed, 1033 insertions, 0 deletions
diff --git a/models/correlations/Makefile b/models/correlations/Makefile new file mode 100644 index 0000000..b3828aa --- /dev/null +++ b/models/correlations/Makefile @@ -0,0 +1,15 @@ +CFLAGS="-O3" +CC="gcc" +GSL_FLAGS=-lgsl -lgslcblas -lm +MFLAG=-lm + +all: tune_qnn_adaptive tune_rho + +tune_qnn_adaptive: tune_qnn_adaptive.c rank_utils.c fit_utils.c + $(CC) $(CFLAGS) -o tune_qnn_adaptive tune_qnn_adaptive.c rank_utils.c fit_utils.c $(GSL_FLAGS) + +tune_rho: tune_rho.c rank_utils.c + $(CC) $(CFLAGS) -o tune_rho tune_rho.c rank_utils.c $(MFLAG) + +clean: + rm tune_rho tune_qnn_adaptive diff --git a/models/correlations/fit_utils.c b/models/correlations/fit_utils.c new file mode 100644 index 0000000..e9e3954 --- /dev/null +++ b/models/correlations/fit_utils.c @@ -0,0 +1,382 @@ +/** + * + * Fit a given sequence with a power-law function + * + * a*x^{b} + * + * The fit is actually performed as a linear fit on the + * exponential-binned log-log distribution + * + * + */ + + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <gsl/gsl_fit.h> + +/** + * + * Load a sequence from a file, which contains one element on each line + * + */ + +void load_sequence(char *fname, double **v, int *N){ + + int size; + char buff[256]; + + FILE *f; + + f = fopen(fname, "r"); + if (!f){ + fprintf(stderr, "Error opening file %s!!!! Exiting...\n", fname); + exit(1); + } + + *N =0; + size = 10; + if (*v == NULL) + *v = malloc(size * sizeof(double)); + else{ + *v = realloc(*v, size * sizeof(double)); + } + + while (fgets(buff, 255, f)){ + (*v)[*N] = atof(buff); + *N += 1; + if (*N == size){ + size += 10; + *v = realloc(*v, size * sizeof(double)); + } + } + *v = realloc(*v, (*N) * sizeof(double)); + fclose(f); +} + + +/** + * + * Load a sequence, getting the "col"-th column of the input file + * + */ + +void load_sequence_col(char *fname, double **v, int *N, int col){ + + int size, n; + char buff[256]; + char *ptr; + + FILE *f; + + f = fopen(fname, "r"); + if (!f){ + fprintf(stderr, "Error opening file %s!!!! Exiting...\n", fname); + exit(1); + } + + *N =0; + size = 10; + if (*v == NULL) + *v = malloc(size * sizeof(double)); + else{ + *v = realloc(*v, size * sizeof(double)); + } + + while (fgets(buff, 255, f)){ + ptr = strtok(buff, " "); + if (col > 0){ + n = 0; + while (n<col){ + ptr = strtok(NULL, " "); + n += 1; + } + } + (*v)[*N] = atof(ptr); + *N += 1; + + if (*N == size){ + size += 10; + *v = realloc(*v, size * sizeof(double)); + } + } + *v = realloc(*v, (*N) * sizeof(double)); + fclose(f); +} + + +/** + * + * Make the exponential binning of a distribution, with a giving + * exponent alpha. The value of y[i] is the number of elements of v + * whose value lies between x[i-1] and x[i]... + * + */ + +void exp_bin_cnt(double *v, int N, double alpha, double **x, double **y, int *num){ + + double min_v, max_v, val, last_val; + int i, j, size, num_x; + double last_size, new_size; + + min_v = max_v = v[0]; + + for (i=1; i<N; i ++){ + if (v[i] > max_v) + max_v = v[i]; + else if (v[i] > 0 && v[i] < min_v) + min_v = v[i]; + } + + size = 10; + if (*x == NULL){ + *x = malloc(size * sizeof(double)); + } + else{ + *x = realloc(*x, size * sizeof(double)); + } + + val = min_v; + last_size = min_v; + (*x)[0] = min_v; + num_x = 1; + + while(val < max_v){ + new_size = last_size * alpha; + val = last_size + new_size; + last_size = new_size; + last_val = val; + (*x)[num_x] = val; + num_x += 1; + if (num_x == size){ + size += 10; + *x = realloc(*x, size * sizeof(double)); + } + } + + if (*y == NULL){ + *y = malloc(num_x * sizeof(double)); + } + else{ + *y = realloc(*y, num_x * sizeof(double)); + } + for (i=0; i < num_x; i++){ + (*y)[i] = 0; + } + + + + for(i=0; i <N; i ++){ + j = 0; + while(v[i] > (*x)[j]){ + j ++; + } + (*y)[j] += 1; + } + *num = num_x; +} + +/** + * + * Make the exponential binning of a distribution, with a giving + * exponent alpha. The value of y[i] is the average of the values in + * the vector "w" whose corresponding v lies between x[i-1] and x[i]... + * + */ + + +void exp_bin_avg(double *v, double *w, int N, double alpha, double **x, double **y, int *num){ + + double min_v, max_v, val, last_val; + int i, j, size, num_x; + double last_size, new_size; + int *cnt; + + + min_v = max_v = v[0]; + + for (i=1; i<N; i ++){ + if (v[i] > max_v) + max_v = v[i]; + else if (v[i] > 0 && v[i] < min_v) + min_v = v[i]; + } + + size = 10; + if (*x == NULL){ + *x = malloc(size * sizeof(double)); + } + else{ + *x = realloc(*x, size * sizeof(double)); + } + + val = min_v; + last_size = min_v; + (*x)[0] = min_v; + num_x = 1; + + while(val < max_v){ + new_size = last_size * alpha; + val = last_size + new_size; + last_size = new_size; + last_val = val; + (*x)[num_x] = val; + num_x += 1; + if (num_x == size){ + size += 10; + *x = realloc(*x, size * sizeof(double)); + } + } + + + cnt = malloc(num_x * sizeof(int)); + + if (*y == NULL){ + *y = malloc(num_x * sizeof(double)); + } + else{ + *y = realloc(*y, num_x * sizeof(double)); + } + for (i=0; i < num_x; i++){ + (*y)[i] = 0; + cnt[i] = 0; + } + + for(i=0; i <N; i ++){ + j = 0; + while(j < num_x && v[i] > (*x)[j]){ + j ++; + } + if(j == num_x){ + printf("Error!!!!! Trying to assing a non-existing bin!!! -- fit_utils.c:exp_bin_avg!!!\n"); + exit(37); + } + (*y)[j] += w[i]; + cnt[j] += 1; + } + *num = num_x; + + for(i=0; i< num_x; i++){ + if (cnt[i] > 0){ + (*y)[i] = (*y)[i] / cnt[i]; + } + } + free(cnt); +} + + +/** + * + * Print a distribution on stdout + * + */ + +void dump_distr(double *x, double *y, int N){ + int i; + + for(i=0; i<N; i ++){ + printf("%g %g\n", x[i], y[i]); + } + +} + + +/** + * Compact a distribution, leaving only the pairs (x_i, y_i) for which + * y_i > 0 + * + */ + +void compact_distr(double *x, double *y, int *num){ + + int i, j; + + i = j = 0; + while(j < *num){ + while(j < *num && y[j] == 0){ + j ++; + } + if (j==*num){ + break; + } + x[i] = x[j]; + y[i] = y[j]; + i ++; + j ++; + } + *num = i; +} + + +/** + * + * Apply the function "f" on all the elemnts of a vector, in-place + * + */ + +void map_vec(double *v, int N, double (*f)(double)){ + int i; + + for (i=0; i<N; i ++){ + v[i] = f(v[i]); + } +} + + +/** + * + * Normalize a distribution, dividing each y[i] for the width of the + * corresponding bin (i.e., x[i] - x[i-1]) + * + */ +void normalize_distr(double *x, double *y, int num){ + + int i; + + for(i=1; i<num; i ++){ + y[i] /= (x[i] - x[i-1]); + } +} + +/** + * + * take two vectors (k and q) and a pairing, and compute the best + * power-law fit "a*k^{mu}" of qnn(k) + * + */ + +void fit_current_trend(double *k, double *q, int N, int *pairing, double *mu, double *a, + double *corr){ + + static int *num; + static double *q_pair, *x, *y; + + int i; + int fit_num; + double c0, c1, cov00, cov01, cov11, sqsum; + + if (q_pair == NULL){ + q_pair = malloc(N * sizeof(double)); + } + + for(i=0; i<N; i++){ + q_pair[i] = q[pairing[i]]; + } + + exp_bin_avg(k, q_pair, N, 1.3, &x, &y, &fit_num); + //normalize_distr(x,y, fit_num); + compact_distr(x, y, &fit_num); + + map_vec(x, fit_num, log); + map_vec(y, fit_num, log); + gsl_fit_linear(x, 1, y, 1, fit_num, &c0, &c1, &cov00, &cov01, &cov11, &sqsum); + + //fprintf(stderr,"cov00: %g cov01: %g cov11: %g\n", cov00, cov01, cov11); + //fprintf(stderr, "corr: %g ", cov01 / sqrt(cov00 * cov11)); + *a = c0; + *mu = c1; + *corr = cov01/sqrt(cov00 * cov11); +} + diff --git a/models/correlations/fit_utils.h b/models/correlations/fit_utils.h new file mode 100644 index 0000000..183f47c --- /dev/null +++ b/models/correlations/fit_utils.h @@ -0,0 +1,25 @@ +#ifndef __FIT_UTILS_H__ +#define __FIT_UTILS_H__ + + +void load_sequence(char *fname, double **v, int *N); + +void load_sequence_col(char *fname, double **v, int *N, int col); + +void exp_bin_cnt(double *v, int N, double alpha, double **x, double **y, int *num); + +void exp_bin_avg(double *v, double *w, int N, double alpha, double **x, double **y, int *num); + +void dump_distr(double *x, double *y, int N); + +void compact_distr(double *x, double *y, int *num); + +void map_vec(double *v, int N, double (*f)(double)); + +void normalize_distr(double *x, double *y, int num); + +void fit_current_trend(double *k, double *q, int N, int *pairing, + double *mu, double *a, double *corr); + + +#endif /* __FIT_UTILS_H__ */ diff --git a/models/correlations/rank_utils.c b/models/correlations/rank_utils.c new file mode 100644 index 0000000..4b8ef41 --- /dev/null +++ b/models/correlations/rank_utils.c @@ -0,0 +1,217 @@ +/** + * + * Some utility functions to be used with tune_rho, compute_rho and + * the like + * + */ + + +#include <stdio.h> +#include <stdlib.h> +#include <math.h> +#include <string.h> +#include <time.h> + +#include "rank_utils.h" + +void load_ranking(char *fname, int *N, double **R){ + + char buff[256]; + int size = 10; + FILE *f; + + if (*R == NULL){ + *R = malloc(size * sizeof (double)); + } + f = fopen(fname, "r"); + if (!f){ + printf("Unable to open file: %s!!! Exiting\n"); + exit(1); + } + + *N = 0; + + while (fgets(buff, 255, f)){ + if (* N == size){ + size += 10; + *R = realloc(*R, size * sizeof(double)); + } + (*R)[*N] = atof(buff); + *N += 1; + } +} + +double avg_array(double *v, int N){ + + double sum = 0.0; + int i; + + for (i = 0; i < N; i ++){ + sum += v[i]; + } + return sum/N; +} + +double compute_C(double *R1, double *R2, int N){ + double mu1, mu2, sum1, sum2; + + mu1 = avg_array(R1, N); + mu2 = avg_array(R2, N); + + sum1 = mu1 * N; + sum2 = mu2 * N; + + return N * mu1 * mu2 - mu2 * sum1 - mu1 * sum2; +} + + +double compute_D(double *R1, double *R2, int N){ + + double mu1, mu2, s1, s2; + int i; + + mu1 = avg_array(R1, N); + mu2 = avg_array(R1, N); + + s1 = s2 = 0.0; + + for (i=0 ; i < N; i ++){ + s1 += pow((R1[i] - mu1), 2); + s2 += pow((R2[i] - mu2), 2); + } + + return sqrt(s1 * s2); +} + + +double compute_rho(double *R1, double *R2, int N, int *pairing){ + + double rho = 0; + int i; + + for (i=0; i < N; i ++){ + rho += R1[i] * R2[ pairing[i] ]; + } + + rho = (rho + compute_C(R1, R2, N))/ compute_D(R1, R2, N); + return rho; +} + +void dump_ranking(double *R, int N){ + int i; + + for (i=0; i < N; i ++){ + printf("%d: %f\n", i, R[i] ); + } +} + + +void init_pairing_natural(int *pairing, int N){ + int i; + + for (i = 0; i< N; i ++){ + pairing[i] = i; + } +} + +void init_pairing_inverse(int *pairing, int N){ + int i; + + for (i = 0; i< N; i ++){ + pairing[i] = N-i-1; + } +} + +void select_pairing(int *pairing, int N, int argc, char *argv[], int pos){ + + if (argc < pos + 1 || !strncasecmp("rnd", argv[pos], 3)){ + init_pairing_random(pairing, N); + } + else if (!strncasecmp("nat", argv[pos], 3)){ + init_pairing_natural(pairing, N); + } + else if (!strncasecmp("inv", argv[pos], 3)){ + init_pairing_inverse(pairing, N); + } + else{ + printf ("Pairing strategy \"%s\" unknown!!! Exiting...\n", argv[pos]); + exit(1); + } + +} + + +void shuffle_sequence(int *s, int N){ + + int i, j, tmp; + + for (i=N-1; i>=0; i--){ + j = rand() % (i+1); + tmp = s[j]; + s[j] = s[i]; + s[i] = tmp; + } +} + + + +void init_pairing_random(int *pairing, int N){ + + init_pairing_natural(pairing, N); + shuffle_sequence(pairing, N); + +} + +/* Loads a pairing from a file, in the format: + * + * rank1 rank2 + * ........... + */ +void load_pairing(int **pairing, int N, char *fname){ + + FILE *f; + int i, j, num; + char buff[256]; + char *ptr; + + f = fopen(fname, "r"); + if (!f){ + printf("Error opening file \"%s\"!!!! Exiting....\n", fname); + exit(2); + } + + if (*pairing == NULL){ + *pairing = malloc(N * sizeof(int)); + init_pairing_natural(*pairing, N); + } + + num = 0; + while(num < N){ + fgets(buff, 255, f); + if (buff[0] == '#') + continue; + ptr = strtok(buff, " "); /* read the first node */ + i = atoi(ptr); + ptr = strtok(NULL, " "); /* read the second node */ + j = atoi(ptr); + (*pairing)[i] = j; + num += 1; + } +} + + +void dump_pairing(int *pairing, int N){ + int i; + + for(i=0; i< N; i ++){ + printf("%d %d\n", i, pairing[i]); + } +} + +void copy_pairing(int *p1, int *p2, int N){ + int i; + + for (i=0 ; i < N; i ++){ + p2[i] = p1[i]; + } +} diff --git a/models/correlations/rank_utils.h b/models/correlations/rank_utils.h new file mode 100644 index 0000000..521582a --- /dev/null +++ b/models/correlations/rank_utils.h @@ -0,0 +1,44 @@ +#ifndef __RANK_UTILS_H__ +#define __RANK_UTILS_H__ + +/* Load a ranking */ +void load_ranking(char *fname, int *N, double **R); + +/* Compute the average of an array */ +double avg_array(double *v, int N); + +/* Compute the term "C" in the rho correlation coefficient */ +double compute_C(double *R1, double *R2, int N); + +/* Compute the term "D" in the rho correlation coefficient */ +double compute_D(double *R1, double *R2, int N); + +/* Compute the Spearman's rank correlation coefficient \rho */ +double compute_rho(double *R1, double *R2, int N, int *pairing); + +void dump_ranking(double *R, int N); + +void init_pairing_natural(int *pairing, int N); + +void init_pairing_inverse(int *pairing, int N); + +void init_pairing_random(int *pairing, int N); + +void select_pairing(int *pairing, int N, int argc, char *argv[], int pos); + +void shuffle_sequence(int *s, int N); + +void dump_pairing(int *pairing, int N); + +void copy_pairing(int *pairing1, int *pairing2, int N); + + +#endif /* __RANK_UTILS_H__ */ + + + + + + + + diff --git a/models/correlations/tune_qnn_adaptive.c b/models/correlations/tune_qnn_adaptive.c new file mode 100644 index 0000000..71643a1 --- /dev/null +++ b/models/correlations/tune_qnn_adaptive.c @@ -0,0 +1,208 @@ +/** + * + * Tune the inter-layer degree correlation function \bar{q}(k) of a + * two-layer multiplex, in order to make it as similar as possible to + * a power law with exponent $\mu$, where $\mu$ is given as input + * + * This version of the program is (or, better, "shoud be") able to + * tune also the value of the pre-factor "a" + * + */ + + +#include <stdio.h> +#include <stdlib.h> +#include <math.h> +#include <strings.h> +#include <time.h> + +#include "rank_utils.h" + + + + +inline double compute_delta(double q, double k, double mu, double a){ + + return fabs(log(q) - mu * log(k) - log(a)); + //return fabs (q - a*pow(k,mu)); +} + + + + + + +void tune_qnn_adaptive(double *R1, double *R2, int N, int *pairing, double eps, + double beta, double mu_target){ + + double act_mu, test_mu, F, F_new, F_old; + double delta1_old, delta2_old, delta1_new, delta2_new; + double val, prob; + double mu, a, err, dummy_a; + int *new_pairing; + int p1, p2, tmp_val; + int tot; + char swap = 0; + + new_pairing = malloc(N * sizeof(int)); + copy_pairing(pairing, new_pairing, N); + + a = 1.0; + + F = 10000; + + //fprintf("%f %f %f %f %f\n", eps, beta, mu_target, act_mu, F); + + tot = 0; + while (F > eps){ + /* sample two positions of "pairing" and shuffle them in "new_pairing" */ + + p1 = rand() % N; + p2 = rand() % N; + while (p2 == p1){ + p2 = rand() % N; + } + tmp_val = new_pairing[p1]; + new_pairing[p1] = new_pairing[p2]; + new_pairing[p2] = tmp_val; + + delta1_old = compute_delta(R2[pairing[p1]], R1[p1], mu_target, a); + delta2_old = compute_delta(R2[pairing[p2]], R1[p2], mu_target, a); + + delta1_new = compute_delta(R2[new_pairing[p1]], R1[p1], mu_target, a); + delta2_new = compute_delta(R2[new_pairing[p2]], R1[p2], mu_target, a); + + //F_new = log(delta1_new * delta2_new); + //F_old = log(delta1_old * delta2_old); + + F_new = delta1_new + delta2_new; + F_old = delta1_old + delta2_old; + + + if (F_new <= F_old){/* Accept this swap with probability 1 */ + tmp_val = pairing[p1]; + pairing[p1] = pairing[p2]; + pairing[p2] = tmp_val; + //act_mu = test_mu; + swap = 1; + } + else{/* Accept the swap with a certain probability */ + val = 1.0 * rand() / RAND_MAX; + + //prob = pow(fabs(F - F_new)/(F+F_new), beta); + prob = exp(-(F_new - F_old)/beta); + //fprintf(stderr, "-- %g %g %g -> %f \n", F_new, F_old, F_new - F_old, prob); + if (val < prob){ /* Accept the swap */ + tmp_val = pairing[p1]; + pairing[p1] = pairing[p2]; + pairing[p2] = tmp_val; + //act_mu = test_mu; + swap = 1; + } + else{ /* Rollback the swap */ + tmp_val = new_pairing[p1]; + new_pairing[p1] = new_pairing[p2]; + new_pairing[p2] = tmp_val; + swap = 0; + } + + } + + + ///F = fabs(act_mu - mu_target); + ///if (tot % 200 == 0){ + //fprintf(stderr, "%d %g\n", tot, act_mu); + + //fprintf(stderr, "%d: %f %f ---- %f %f ---- %f %f ---- %d \n", + //tot, delta1_old, delta2_old, delta1_new, delta2_new, F_old, F_new, swap); + + ///} + tot += 1; + if (tot %200 == 0){ + fit_current_trend(R1, R2, N, pairing, &mu, &a, &err); + fprintf(stderr, "mu: %g a: %g corr: %g\n", mu, a, err); + //a = a - 0.01 *(a - exp(a)); + a = exp(a); + } + fit_current_trend(R1, R2, N, pairing, &mu, &dummy_a, &err); + F = fabs(mu - mu_target); + } +} + +void dump_qnn(double *R1, double *R2, int N, int *pairing){ + + int i; + int *qnn, *num; + + qnn = malloc(N * sizeof(int)); + num = malloc(N * sizeof(int)); + + for (i=0; i<N; i++){ + qnn[i]=0; + num[i]=0; + } + + for (i=0; i<N; i++){ + qnn[(int)(R1[i])] += R2[pairing[i]]; + num[(int)(R1[i])] += 1; + } + + for(i=0; i<N; i++){ + if (num[i] >0){ + printf("%d %f\n", i, 1.0*qnn[i]/num[i]); + } + } + free(num); + free(qnn); +} + + +void dump_degs(double *R1, double *R2, int N, int *pairing){ + + int i; + + for(i=0; i<N; i++){ + printf("%g %g\n", R1[i], R2[pairing[i]]); + } + +} + +int main (int argc, char *argv[]){ + + int N1, N2; + double *R1 = NULL; + double *R2 = NULL; + double eps, beta, mu_target; + + int *pairing = NULL; + + srand(time(NULL)); + + if (argc < 6){ + printf("Usage: %s <degs1> <degs2> <mu> <eps> <beta> [RND|NAT|INV]\n", argv[0]); + exit(1); + } + + mu_target = atof(argv[3]); + eps = atof(argv[4]); + beta = atof(argv[5]); + + load_ranking(argv[1], &N1, &R1); + load_ranking(argv[2], &N2, &R2); + + if (N1 != N2){ + printf("Error!!!! The two files must have the same number of lines!!!! Exiting...\n"); + exit(1); + } + + pairing = malloc(N1 * sizeof(int)); + + select_pairing(pairing, N1, argc, argv, 6); + + tune_qnn_adaptive(R1, R2, N1, pairing, eps, beta, mu_target); + + dump_pairing(pairing, N1); + //dump_qnn(R1, R2, N1, pairing); + //dump_degs(R1, R2, N1, pairing); + +} diff --git a/models/correlations/tune_rho.c b/models/correlations/tune_rho.c new file mode 100644 index 0000000..4495b0e --- /dev/null +++ b/models/correlations/tune_rho.c @@ -0,0 +1,142 @@ +/** + * + * Tune the Spearman's \rho correlation coefficient between two rankings + * given as input, using a simulated-anneling procedure + * + */ + + +#include <stdio.h> +#include <stdlib.h> +#include <math.h> +#include <strings.h> +#include <time.h> + +#include "rank_utils.h" + +//void select_pairing(int *pairing, int N, int argc, char *argv[]){ +// +// if (argc < 7 || !strncasecmp("rnd", argv[6], 3)){ +// init_pairing_random(pairing, N); +// } +// else if (!strncasecmp("nat", argv[6], 3)){ +// init_pairing_natural(pairing, N); +// } +// else if (!strncasecmp("inv", argv[6], 3)){ +// init_pairing_inverse(pairing, N); +// } +// else{ +// printf ("Pairing strategy \"%s\" unknown!!! Exiting...\n", argv[6]); +// exit(1); +// } +// +//} + +void tune_rho(double *R1, double *R2, int N, int *pairing, double eps, + double beta, double rho_target){ + + double act_rho, test_rho, F, F_new; + double val, prob; + int *new_pairing; + int p1, p2, tmp_val; + int tot; + + new_pairing = malloc(N * sizeof(int)); + copy_pairing(pairing, new_pairing, N); + + act_rho = compute_rho(R1, R2, N, pairing); + + F = fabs(act_rho - rho_target); + + //fprintf("%f %f %f %f %f\n", eps, beta, rho_target, act_rho, F); + + tot = 0; + while (F > eps){ + /* sample two positions of "pairing" and shuffle them in "new_pairing" */ + p1 = rand() % N; + p2 = rand() % N; + while (p2 == p1){ + p2 = rand() % N; + } + tmp_val = new_pairing[p1]; + new_pairing[p1] = new_pairing[p2]; + new_pairing[p2] = tmp_val; + test_rho = compute_rho(R1, R2, N, new_pairing); + F_new = fabs(test_rho - rho_target); + if (F_new < F){/* Accept this swap with probability 1 */ + tmp_val = pairing[p1]; + pairing[p1] = pairing[p2]; + pairing[p2] = tmp_val; + act_rho = test_rho; + } + else{/* Accept the swap with a certain probability */ + val = 1.0 * rand() / RAND_MAX; + + //prob = pow(fabs(F - F_new)/(F+F_new), beta); + prob = exp(-(F_new - F)/beta); + //fprintf(stderr, "-- %f\n", prob); + if (val < prob){ /* Accept the swap */ + tmp_val = pairing[p1]; + pairing[p1] = pairing[p2]; + pairing[p2] = tmp_val; + act_rho = test_rho; + } + else{ /* Rollback the swap */ + tmp_val = new_pairing[p1]; + new_pairing[p1] = new_pairing[p2]; + new_pairing[p2] = tmp_val; + } + + } + F = fabs(act_rho - rho_target); + if (tot % 200 == 0){ + fprintf(stderr, "%d %g\n", tot, act_rho); + } + tot += 1; + } +} + + + + +int main (int argc, char *argv[]){ + + int N1, N2; + double *R1 = NULL; + double *R2 = NULL; + double eps, beta, rho, rho_target; + + int *pairing = NULL; + + srand(time(NULL)); + + if (argc < 6){ + printf("Usage: %s <rank1> <rank2> <rho> <eps> <beta> [RND|NAT|INV]\n", argv[0]); + exit(1); + } + + rho_target = atof(argv[3]); + eps = atof(argv[4]); + beta = atof(argv[5]); + + load_ranking(argv[1], &N1, &R1); + load_ranking(argv[2], &N2, &R2); + + if (N1 != N2){ + printf("Error!!!! The two files must have the same number of lines!!!! Exiting...\n"); + exit(1); + } + + pairing = malloc(N1 * sizeof(int)); + + select_pairing(pairing, N1, argc, argv, 6); + + rho = compute_rho(R1, R2, N1, pairing); + + fprintf(stderr, "%g\n", rho); + + tune_rho(R1, R2, N1, pairing, eps, beta, rho_target); + + dump_pairing(pairing, N1); + +} |