summaryrefslogtreecommitdiff
path: root/structure/correlations/fit_utils.c
diff options
context:
space:
mode:
Diffstat (limited to 'structure/correlations/fit_utils.c')
-rw-r--r--structure/correlations/fit_utils.c382
1 files changed, 382 insertions, 0 deletions
diff --git a/structure/correlations/fit_utils.c b/structure/correlations/fit_utils.c
new file mode 100644
index 0000000..e9e3954
--- /dev/null
+++ b/structure/correlations/fit_utils.c
@@ -0,0 +1,382 @@
+/**
+ *
+ * Fit a given sequence with a power-law function
+ *
+ * a*x^{b}
+ *
+ * The fit is actually performed as a linear fit on the
+ * exponential-binned log-log distribution
+ *
+ *
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <gsl/gsl_fit.h>
+
+/**
+ *
+ * Load a sequence from a file, which contains one element on each line
+ *
+ */
+
+void load_sequence(char *fname, double **v, int *N){
+
+ int size;
+ char buff[256];
+
+ FILE *f;
+
+ f = fopen(fname, "r");
+ if (!f){
+ fprintf(stderr, "Error opening file %s!!!! Exiting...\n", fname);
+ exit(1);
+ }
+
+ *N =0;
+ size = 10;
+ if (*v == NULL)
+ *v = malloc(size * sizeof(double));
+ else{
+ *v = realloc(*v, size * sizeof(double));
+ }
+
+ while (fgets(buff, 255, f)){
+ (*v)[*N] = atof(buff);
+ *N += 1;
+ if (*N == size){
+ size += 10;
+ *v = realloc(*v, size * sizeof(double));
+ }
+ }
+ *v = realloc(*v, (*N) * sizeof(double));
+ fclose(f);
+}
+
+
+/**
+ *
+ * Load a sequence, getting the "col"-th column of the input file
+ *
+ */
+
+void load_sequence_col(char *fname, double **v, int *N, int col){
+
+ int size, n;
+ char buff[256];
+ char *ptr;
+
+ FILE *f;
+
+ f = fopen(fname, "r");
+ if (!f){
+ fprintf(stderr, "Error opening file %s!!!! Exiting...\n", fname);
+ exit(1);
+ }
+
+ *N =0;
+ size = 10;
+ if (*v == NULL)
+ *v = malloc(size * sizeof(double));
+ else{
+ *v = realloc(*v, size * sizeof(double));
+ }
+
+ while (fgets(buff, 255, f)){
+ ptr = strtok(buff, " ");
+ if (col > 0){
+ n = 0;
+ while (n<col){
+ ptr = strtok(NULL, " ");
+ n += 1;
+ }
+ }
+ (*v)[*N] = atof(ptr);
+ *N += 1;
+
+ if (*N == size){
+ size += 10;
+ *v = realloc(*v, size * sizeof(double));
+ }
+ }
+ *v = realloc(*v, (*N) * sizeof(double));
+ fclose(f);
+}
+
+
+/**
+ *
+ * Make the exponential binning of a distribution, with a giving
+ * exponent alpha. The value of y[i] is the number of elements of v
+ * whose value lies between x[i-1] and x[i]...
+ *
+ */
+
+void exp_bin_cnt(double *v, int N, double alpha, double **x, double **y, int *num){
+
+ double min_v, max_v, val, last_val;
+ int i, j, size, num_x;
+ double last_size, new_size;
+
+ min_v = max_v = v[0];
+
+ for (i=1; i<N; i ++){
+ if (v[i] > max_v)
+ max_v = v[i];
+ else if (v[i] > 0 && v[i] < min_v)
+ min_v = v[i];
+ }
+
+ size = 10;
+ if (*x == NULL){
+ *x = malloc(size * sizeof(double));
+ }
+ else{
+ *x = realloc(*x, size * sizeof(double));
+ }
+
+ val = min_v;
+ last_size = min_v;
+ (*x)[0] = min_v;
+ num_x = 1;
+
+ while(val < max_v){
+ new_size = last_size * alpha;
+ val = last_size + new_size;
+ last_size = new_size;
+ last_val = val;
+ (*x)[num_x] = val;
+ num_x += 1;
+ if (num_x == size){
+ size += 10;
+ *x = realloc(*x, size * sizeof(double));
+ }
+ }
+
+ if (*y == NULL){
+ *y = malloc(num_x * sizeof(double));
+ }
+ else{
+ *y = realloc(*y, num_x * sizeof(double));
+ }
+ for (i=0; i < num_x; i++){
+ (*y)[i] = 0;
+ }
+
+
+
+ for(i=0; i <N; i ++){
+ j = 0;
+ while(v[i] > (*x)[j]){
+ j ++;
+ }
+ (*y)[j] += 1;
+ }
+ *num = num_x;
+}
+
+/**
+ *
+ * Make the exponential binning of a distribution, with a giving
+ * exponent alpha. The value of y[i] is the average of the values in
+ * the vector "w" whose corresponding v lies between x[i-1] and x[i]...
+ *
+ */
+
+
+void exp_bin_avg(double *v, double *w, int N, double alpha, double **x, double **y, int *num){
+
+ double min_v, max_v, val, last_val;
+ int i, j, size, num_x;
+ double last_size, new_size;
+ int *cnt;
+
+
+ min_v = max_v = v[0];
+
+ for (i=1; i<N; i ++){
+ if (v[i] > max_v)
+ max_v = v[i];
+ else if (v[i] > 0 && v[i] < min_v)
+ min_v = v[i];
+ }
+
+ size = 10;
+ if (*x == NULL){
+ *x = malloc(size * sizeof(double));
+ }
+ else{
+ *x = realloc(*x, size * sizeof(double));
+ }
+
+ val = min_v;
+ last_size = min_v;
+ (*x)[0] = min_v;
+ num_x = 1;
+
+ while(val < max_v){
+ new_size = last_size * alpha;
+ val = last_size + new_size;
+ last_size = new_size;
+ last_val = val;
+ (*x)[num_x] = val;
+ num_x += 1;
+ if (num_x == size){
+ size += 10;
+ *x = realloc(*x, size * sizeof(double));
+ }
+ }
+
+
+ cnt = malloc(num_x * sizeof(int));
+
+ if (*y == NULL){
+ *y = malloc(num_x * sizeof(double));
+ }
+ else{
+ *y = realloc(*y, num_x * sizeof(double));
+ }
+ for (i=0; i < num_x; i++){
+ (*y)[i] = 0;
+ cnt[i] = 0;
+ }
+
+ for(i=0; i <N; i ++){
+ j = 0;
+ while(j < num_x && v[i] > (*x)[j]){
+ j ++;
+ }
+ if(j == num_x){
+ printf("Error!!!!! Trying to assing a non-existing bin!!! -- fit_utils.c:exp_bin_avg!!!\n");
+ exit(37);
+ }
+ (*y)[j] += w[i];
+ cnt[j] += 1;
+ }
+ *num = num_x;
+
+ for(i=0; i< num_x; i++){
+ if (cnt[i] > 0){
+ (*y)[i] = (*y)[i] / cnt[i];
+ }
+ }
+ free(cnt);
+}
+
+
+/**
+ *
+ * Print a distribution on stdout
+ *
+ */
+
+void dump_distr(double *x, double *y, int N){
+ int i;
+
+ for(i=0; i<N; i ++){
+ printf("%g %g\n", x[i], y[i]);
+ }
+
+}
+
+
+/**
+ * Compact a distribution, leaving only the pairs (x_i, y_i) for which
+ * y_i > 0
+ *
+ */
+
+void compact_distr(double *x, double *y, int *num){
+
+ int i, j;
+
+ i = j = 0;
+ while(j < *num){
+ while(j < *num && y[j] == 0){
+ j ++;
+ }
+ if (j==*num){
+ break;
+ }
+ x[i] = x[j];
+ y[i] = y[j];
+ i ++;
+ j ++;
+ }
+ *num = i;
+}
+
+
+/**
+ *
+ * Apply the function "f" on all the elemnts of a vector, in-place
+ *
+ */
+
+void map_vec(double *v, int N, double (*f)(double)){
+ int i;
+
+ for (i=0; i<N; i ++){
+ v[i] = f(v[i]);
+ }
+}
+
+
+/**
+ *
+ * Normalize a distribution, dividing each y[i] for the width of the
+ * corresponding bin (i.e., x[i] - x[i-1])
+ *
+ */
+void normalize_distr(double *x, double *y, int num){
+
+ int i;
+
+ for(i=1; i<num; i ++){
+ y[i] /= (x[i] - x[i-1]);
+ }
+}
+
+/**
+ *
+ * take two vectors (k and q) and a pairing, and compute the best
+ * power-law fit "a*k^{mu}" of qnn(k)
+ *
+ */
+
+void fit_current_trend(double *k, double *q, int N, int *pairing, double *mu, double *a,
+ double *corr){
+
+ static int *num;
+ static double *q_pair, *x, *y;
+
+ int i;
+ int fit_num;
+ double c0, c1, cov00, cov01, cov11, sqsum;
+
+ if (q_pair == NULL){
+ q_pair = malloc(N * sizeof(double));
+ }
+
+ for(i=0; i<N; i++){
+ q_pair[i] = q[pairing[i]];
+ }
+
+ exp_bin_avg(k, q_pair, N, 1.3, &x, &y, &fit_num);
+ //normalize_distr(x,y, fit_num);
+ compact_distr(x, y, &fit_num);
+
+ map_vec(x, fit_num, log);
+ map_vec(y, fit_num, log);
+ gsl_fit_linear(x, 1, y, 1, fit_num, &c0, &c1, &cov00, &cov01, &cov11, &sqsum);
+
+ //fprintf(stderr,"cov00: %g cov01: %g cov11: %g\n", cov00, cov01, cov11);
+ //fprintf(stderr, "corr: %g ", cov01 / sqrt(cov00 * cov11));
+ *a = c0;
+ *mu = c1;
+ *corr = cov01/sqrt(cov00 * cov11);
+}
+