Skip to content
Snippets Groups Projects
Commit 4c90b0c9 authored by Peter W. Draper's avatar Peter W. Draper
Browse files

Add randoms based on a CDF stored in a text file with columns bin-low bin-high...

Add randoms based on a CDF stored in a text file with columns bin-low bin-high cumulative-value. The values should be normalised to the range 0 to 1.
parent fe6ad7b8
No related branches found
No related tags found
1 merge request!6Version with faked data
...@@ -285,28 +285,77 @@ static double gauss_rand_upper(void) { ...@@ -285,28 +285,77 @@ static double gauss_rand_upper(void) {
* @param nr_nodes the number of ranks that will be used. * @param nr_nodes the number of ranks that will be used.
* @param nr_logs the number of logs to generate per rank. * @param nr_logs the number of logs to generate per rank.
* @param size bytes per message, unless random when this is the maximum * @param size bytes per message, unless random when this is the maximum
* and the minimum is 1 for uniform and 2.5 sigma for gaussian. * and the minimum is 1 for uniform, if using a gaussian
* distribution the value is a 2.5 sigma, for a CDF based
* selection this is just a scale factor of the values.
* @param random whether to use random sizes. * @param random whether to use random sizes.
* @param seed the random seed, use same for fixed sequences. * @param seed the random seed, use same for fixed sequences.
* @param uniform whether to use a uniform distribution. * @param uniform whether to use a uniform distribution other gaussian, unless
* cdf is defined, in which case this parameter is ignored.
* @param cdf text file containing a normalized CDF to use as a basis for inverse
* transform sampling of the randoms. NULL for no file.
*/ */
void mpiuse_log_generate(int nr_nodes, int nr_logs, int size, int random, void mpiuse_log_generate(int nr_nodes, int nr_logs, int size, int random,
long int seed, int uniform) { long int seed, int uniform, const char *cdf) {
/* Each rank exchanges messages with all the others and each "log" has the /* Only used for CDF, may need to increase these. */
* same size. */ int nvals = 0;
double imin[256], imax[256], value[256];
/* Note that each rank exchanges messages with all the others and each "log"
* has the same size. */
/* Set seed. */
if (random) srand48(seed); if (random) srand48(seed);
/* Check for CDF. This should be based on some real distribution, the format
* is same as output from TOPCAT, i.e. bin-low, bin-high, value space
* separated values. Note the value column should be normalised into the
* range 0 to 1 so that it maps into a uniform random distribution. */
if (cdf != NULL) {
FILE *infile = fopen(cdf, "r");
if (infile == NULL) error("Failed to open CDF file: %s", cdf);
char line[132];
while (!feof(infile)) {
if (fgets(line, 132, infile) != NULL) {
if (line[0] != '#') {
int nread = sscanf(line, "%lf %lf %lf", &imin[nvals], &imax[nvals],
&value[nvals]);
if (nread == 3) nvals++;
}
}
}
fclose(infile);
}
/* Message tags increment with across rank logs. */
int tag = 1; int tag = 1;
for (int k = 0; k < nr_logs; k++) { for (int k = 0; k < nr_logs; k++) {
/* Set size for this messages. */ /* Set size for this messages. */
int logsize = size; int logsize = size;
if (random) { if (random) {
if (uniform) { if (cdf) {
/* CDF randoms. */
double rand = drand48();
/* Binary search for containing bin for this rand. */
unsigned int lower = 0;
unsigned int upper = nvals;
unsigned int middle = 0;
while (lower < upper) {
middle = (upper + lower) / 2;
if (rand > value[middle])
lower = middle + 1;
else
upper = middle;
}
logsize = 0.5 * (imax[middle] + imin[middle]);
} else if (uniform) {
/* Uniform randoms in the range 0 to 1 */
logsize = (drand48() * (double)size) + 1; logsize = (drand48() * (double)size) + 1;
} else { } else {
// Gaussian so no maximum, assume size is 2.5 sigma. // Gaussian randoms so no maximum, assume size is 2.5 sigma.
logsize = (gauss_rand_upper() * (double)size * 0.25) + 1; logsize = (gauss_rand_upper() * (double)size * 0.25) + 1;
} }
} }
......
...@@ -102,6 +102,6 @@ int mpiuse_nr_ranks(void); ...@@ -102,6 +102,6 @@ int mpiuse_nr_ranks(void);
void mpiuse_dump_logs(int nranks, const char *logfile); void mpiuse_dump_logs(int nranks, const char *logfile);
void mpiuse_log_generate(int nr_nodes, int nr_logs, int size, int random, void mpiuse_log_generate(int nr_nodes, int nr_logs, int size, int random,
long int seed, int uniform); long int seed, int uniform, const char *cdf);
#endif /* SWIFT_MPIUSE_H */ #endif /* SWIFT_MPIUSE_H */
...@@ -38,8 +38,8 @@ static int verbose = 0; ...@@ -38,8 +38,8 @@ static int verbose = 0;
/* Set a data pattern and check we get this back, slow... */ /* Set a data pattern and check we get this back, slow... */
static int datacheck = 0; static int datacheck = 0;
/* Fixed seed for pseudorandoms. */ /* Default seed for pseudorandoms. */
static long int seed = 1987654321; static long int default_seed = 1987654321;
/* The local queues. */ /* The local queues. */
static struct mpiuse_log_entry **volatile reqs_queue; static struct mpiuse_log_entry **volatile reqs_queue;
...@@ -339,11 +339,13 @@ static void pick_logs(void) { ...@@ -339,11 +339,13 @@ static void pick_logs(void) {
* @brief usage help. * @brief usage help.
*/ */
static void usage(char *argv[]) { static void usage(char *argv[]) {
fprintf(stderr, "Usage: %s [-vf] nr_messages logfile.dat\n", fprintf(stderr, "Usage: %s [-vfgcx] nr_messages logfile.dat\n",
argv[0]); argv[0]);
fprintf(stderr, " options: -v verbose, -d data check, -s size (bytes), " fprintf(stderr, " options: -v verbose, -d data check, -s size (bytes/scale), \n"
"-r uniform random from 1 to size, " "\t[-r uniform random from 1 to size, | \n"
"-r -g half gaussian random from 1 with 2.5 sigma size.\n"); "\t-r -g half gaussian random from 1 with 2.5 sigma size., | \n"
"\t-r -c <file> use cdf from file, size is a scale factor.,] \n"
"\t-x random seed\n");
fflush(stderr); fflush(stderr);
} }
...@@ -368,28 +370,36 @@ int main(int argc, char *argv[]) { ...@@ -368,28 +370,36 @@ int main(int argc, char *argv[]) {
/* Handle the command-line, we expect the number of messages to exchange per /* Handle the command-line, we expect the number of messages to exchange per
* rank an output log and some options, the interesting ones are a size and * rank an output log and some options, the interesting ones are a size and
* whether to use a random selection (with a fixed seed). */ * whether to use a random selections of various kinds. */
int size = 1024; int size = 1024;
int random = 0; int random = 0;
int uniform = 1; int uniform = 1;
char *cdf = NULL;
int opt; int opt;
while ((opt = getopt(argc, argv, "vds:rg")) != -1) { unsigned int seed = default_seed;
while ((opt = getopt(argc, argv, "vds:rgx:c:")) != -1) {
switch (opt) { switch (opt) {
case 'd': case 'd':
datacheck = 1; datacheck = 1;
break; break;
case 'c':
cdf = optarg;
break;
case 'g':
uniform = 0;
break;
case 's': case 's':
size = atoi(optarg); size = atoi(optarg);
break; break;
case 'r': case 'r':
random = 1; random = 1;
break; break;
case 'g':
uniform = 0;
break;
case 'v': case 'v':
verbose = 1; verbose = 1;
break; break;
case 'x':
seed = atol(optarg);
break;
default: default:
if (myrank == 0) usage(argv); if (myrank == 0) usage(argv);
return 1; return 1;
...@@ -408,7 +418,12 @@ int main(int argc, char *argv[]) { ...@@ -408,7 +418,12 @@ int main(int argc, char *argv[]) {
/* Generate the fake logs for the exchanges. */ /* Generate the fake logs for the exchanges. */
if (myrank == 0) { if (myrank == 0) {
if (random) { if (random) {
if (uniform) { if (cdf != NULL) {
message("Generating %d fake logs for %d ranks with randoms"
" based on cdf %s scaled by factor %d", nr_logs, nr_nodes,
cdf,size);
} else if (uniform) {
message("Generating %d fake logs for %d ranks with random distribution" message("Generating %d fake logs for %d ranks with random distribution"
" using size %d", nr_logs, nr_nodes, size); " using size %d", nr_logs, nr_nodes, size);
} else { } else {
...@@ -420,7 +435,7 @@ int main(int argc, char *argv[]) { ...@@ -420,7 +435,7 @@ int main(int argc, char *argv[]) {
nr_logs, nr_nodes, size); nr_logs, nr_nodes, size);
} }
} }
mpiuse_log_generate(nr_nodes, nr_logs, size, random, seed, uniform); mpiuse_log_generate(nr_nodes, nr_logs, size, random, seed, uniform, cdf);
int nranks = mpiuse_nr_ranks(); int nranks = mpiuse_nr_ranks();
/* Each rank requires its own queue, so extract them. */ /* Each rank requires its own queue, so extract them. */
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment