diff --git a/histogram.c b/histogram.c new file mode 100644 index 0000000000000000000000000000000000000000..0995ac2004a9d024e9140bcf0af897a2f7f4143a --- /dev/null +++ b/histogram.c @@ -0,0 +1,138 @@ +#include <float.h> +#include <math.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "histogram.h" + +/* + * Simple histogram routines. +int main(int argc, char *argv[]) { + double *values; + int nvalues; + if (histread(argv[1], &values, &nvalues)) { + printf("## Read %d values from %s\n", nvalues, argv[1]); + struct histogram *h = calloc(1, sizeof(struct histogram)); + histmake(nvalues, values, h); + printf("## Created cumulative histogram with %d values:\n", h->nvalues); + printf("# value sum\n"); + for (int k = 0; k < h->nvalues; k++) { + printf("%f %24.17g\n", h->values[k], h->sums[k]); + } + free(h); + free(values); + return 0; + } + return 1; +} +*/ + +/** + * Create a histogram by binning a number of data values. + * + * The histogram is returned in a histogram struct, which includes the basic + * histogram (with NHIST values) and a normalized cumulative version (with + * h->nvals values, as the zero bins are coalesced). + * + * @param nvalues number of values. + * @param values the data values to bin into a histogram. + * @param h the #histogram. + */ +void histmake(int nvalues, double *values, struct histogram *h) { + + /* Find the minimum and maximum values. */ + double dmin = DBL_MAX; + double dmax = -DBL_MAX; + for (int i = 0; i < nvalues; i++) { + dmin = fmin(dmin, values[i]); + dmax = fmax(dmax, values[i]); + } + + /* Form the fixed width bin histogram. */ + double scale = (double)NHIST / (dmax - dmin); + h->width = 1.0 / scale; + h->zero = dmin - h->width / 2.0; + double sum = 0.0; + for (int i = 0; i < nvalues; i++) { + int idiff = (int)round(scale * ((double)values[i] - dmin)); + h->hist[idiff] += 1; + sum++; + } + + double norm = 1.0 / sum; + + /* Form cumulative sums and count used bins. */ + sum = 0.0; + int lastbin = 0; + int usedbins = 0; + for (int i = 0; i < NHIST; i++) { + + /* Zero bins are folded into a range. */ + if (h->hist[i] > 0) { + + /* Value is mid of bin. */ + h->values[usedbins] = 0.5 * ((lastbin * h->width + h->zero) + + ((i + 1) * h->width + h->zero)); + sum += h->hist[i] * norm; + h->sums[usedbins] = sum; + usedbins++; + lastbin = i + 1; + } + } + h->nvalues = usedbins; +} + +/** + * Read in data to histogram. Assumes a simple text file with one value per + * line. + * + * @param file the name of the file to read. + * @param values the extracted values. Free when not needed. + * @param nvalues the number of values. + * + * @result 1 for successful, 0 otherwise. + */ +int histread(const char *file, double **values, int *nvalues) { + + *values = NULL; + *nvalues = 0; + + FILE *infile = fopen(file, "r"); + if (infile == NULL) { + printf("Failed to open sizes file: %s\n", file); + return 0; + } + + /* Initial space for data. */ + int nvals = 0; + int maxvals = 1024; + double *vals = malloc(maxvals * sizeof(double)); + + char line[132]; + while (!feof(infile)) { + if (fgets(line, 132, infile) != NULL) { + if (line[0] != '#') { + int n = sscanf(line, "%lf", &vals[nvals]); + if (n != 1) { + printf("Incorrect no. of values %s\n", line); + fclose(infile); + free(vals); + return 0; + } + nvals++; + + /* Make more space. */ + if (nvals > maxvals) { + maxvals += 1024; + vals = realloc(vals, maxvals * sizeof(double)); + } + } + } + } + fclose(infile); + + *values = vals; + *nvalues = nvals; + return 1; +} diff --git a/histogram.h b/histogram.h new file mode 100644 index 0000000000000000000000000000000000000000..9e281696949ac05976d0d1437a327d5ee7d2d518 --- /dev/null +++ b/histogram.h @@ -0,0 +1,44 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2021 Peter W. Draper (p.w.draper@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_HISTOGRAM_H +#define SWIFT_HISTOGRAM_H + +/* Bins in a histogram. Note this must be an even number. */ +#define NHIST 2048 + +/** Histogram structure. */ +struct histogram { + /* Raw histogram. NHIST counts in hist and the value associated with a bin + * is index*width+zero. */ + double width; + double zero; + int hist[NHIST]; + + /* Normalized cumulative histogram. Empty bins are joined into a larger one, + * so values are the bin centre, sums the sum to that bin and nvalues the + * number of bins that have been populated, can be less than NHIST. */ + double values[NHIST]; + double sums[NHIST]; + int nvalues; +}; + +int histread(const char *file, double **values, int *nvalues); +void histmake(int nvalues, double *values, struct histogram *h); + +#endif