Skip to content
Snippets Groups Projects
Commit 4c90b0c9 authored by Peter W. Draper's avatar Peter W. Draper
Browse files

Add randoms based on a CDF stored in a text file with columns bin-low bin-high...

Add randoms based on a CDF stored in a text file with columns bin-low bin-high cumulative-value. The values should be normalised to the range 0 to 1.
parent fe6ad7b8
Branches
No related tags found
1 merge request!6Version with faked data
......@@ -285,28 +285,77 @@ static double gauss_rand_upper(void) {
* @param nr_nodes the number of ranks that will be used.
* @param nr_logs the number of logs to generate per rank.
* @param size bytes per message, unless random when this is the maximum
* and the minimum is 1 for uniform and 2.5 sigma for gaussian.
* and the minimum is 1 for uniform, if using a gaussian
* distribution the value is a 2.5 sigma, for a CDF based
* selection this is just a scale factor of the values.
* @param random whether to use random sizes.
* @param seed the random seed, use same for fixed sequences.
* @param uniform whether to use a uniform distribution.
* @param uniform whether to use a uniform distribution other gaussian, unless
* cdf is defined, in which case this parameter is ignored.
* @param cdf text file containing a normalized CDF to use as a basis for inverse
* transform sampling of the randoms. NULL for no file.
*/
void mpiuse_log_generate(int nr_nodes, int nr_logs, int size, int random,
long int seed, int uniform) {
long int seed, int uniform, const char *cdf) {
/* Each rank exchanges messages with all the others and each "log" has the
* same size. */
/* Only used for CDF, may need to increase these. */
int nvals = 0;
double imin[256], imax[256], value[256];
/* Note that each rank exchanges messages with all the others and each "log"
* has the same size. */
/* Set seed. */
if (random) srand48(seed);
/* Check for CDF. This should be based on some real distribution, the format
* is same as output from TOPCAT, i.e. bin-low, bin-high, value space
* separated values. Note the value column should be normalised into the
* range 0 to 1 so that it maps into a uniform random distribution. */
if (cdf != NULL) {
FILE *infile = fopen(cdf, "r");
if (infile == NULL) error("Failed to open CDF file: %s", cdf);
char line[132];
while (!feof(infile)) {
if (fgets(line, 132, infile) != NULL) {
if (line[0] != '#') {
int nread = sscanf(line, "%lf %lf %lf", &imin[nvals], &imax[nvals],
&value[nvals]);
if (nread == 3) nvals++;
}
}
}
fclose(infile);
}
/* Message tags increment with across rank logs. */
int tag = 1;
for (int k = 0; k < nr_logs; k++) {
/* Set size for this messages. */
int logsize = size;
if (random) {
if (uniform) {
if (cdf) {
/* CDF randoms. */
double rand = drand48();
/* Binary search for containing bin for this rand. */
unsigned int lower = 0;
unsigned int upper = nvals;
unsigned int middle = 0;
while (lower < upper) {
middle = (upper + lower) / 2;
if (rand > value[middle])
lower = middle + 1;
else
upper = middle;
}
logsize = 0.5 * (imax[middle] + imin[middle]);
} else if (uniform) {
/* Uniform randoms in the range 0 to 1 */
logsize = (drand48() * (double)size) + 1;
} else {
// Gaussian so no maximum, assume size is 2.5 sigma.
// Gaussian randoms so no maximum, assume size is 2.5 sigma.
logsize = (gauss_rand_upper() * (double)size * 0.25) + 1;
}
}
......
......@@ -102,6 +102,6 @@ int mpiuse_nr_ranks(void);
void mpiuse_dump_logs(int nranks, const char *logfile);
void mpiuse_log_generate(int nr_nodes, int nr_logs, int size, int random,
long int seed, int uniform);
long int seed, int uniform, const char *cdf);
#endif /* SWIFT_MPIUSE_H */
......@@ -38,8 +38,8 @@ static int verbose = 0;
/* Set a data pattern and check we get this back, slow... */
static int datacheck = 0;
/* Fixed seed for pseudorandoms. */
static long int seed = 1987654321;
/* Default seed for pseudorandoms. */
static long int default_seed = 1987654321;
/* The local queues. */
static struct mpiuse_log_entry **volatile reqs_queue;
......@@ -339,11 +339,13 @@ static void pick_logs(void) {
* @brief usage help.
*/
static void usage(char *argv[]) {
fprintf(stderr, "Usage: %s [-vf] nr_messages logfile.dat\n",
fprintf(stderr, "Usage: %s [-vfgcx] nr_messages logfile.dat\n",
argv[0]);
fprintf(stderr, " options: -v verbose, -d data check, -s size (bytes), "
"-r uniform random from 1 to size, "
"-r -g half gaussian random from 1 with 2.5 sigma size.\n");
fprintf(stderr, " options: -v verbose, -d data check, -s size (bytes/scale), \n"
"\t[-r uniform random from 1 to size, | \n"
"\t-r -g half gaussian random from 1 with 2.5 sigma size., | \n"
"\t-r -c <file> use cdf from file, size is a scale factor.,] \n"
"\t-x random seed\n");
fflush(stderr);
}
......@@ -368,28 +370,36 @@ int main(int argc, char *argv[]) {
/* Handle the command-line, we expect the number of messages to exchange per
* rank an output log and some options, the interesting ones are a size and
* whether to use a random selection (with a fixed seed). */
* whether to use a random selections of various kinds. */
int size = 1024;
int random = 0;
int uniform = 1;
char *cdf = NULL;
int opt;
while ((opt = getopt(argc, argv, "vds:rg")) != -1) {
unsigned int seed = default_seed;
while ((opt = getopt(argc, argv, "vds:rgx:c:")) != -1) {
switch (opt) {
case 'd':
datacheck = 1;
break;
case 'c':
cdf = optarg;
break;
case 'g':
uniform = 0;
break;
case 's':
size = atoi(optarg);
break;
case 'r':
random = 1;
break;
case 'g':
uniform = 0;
break;
case 'v':
verbose = 1;
break;
case 'x':
seed = atol(optarg);
break;
default:
if (myrank == 0) usage(argv);
return 1;
......@@ -408,7 +418,12 @@ int main(int argc, char *argv[]) {
/* Generate the fake logs for the exchanges. */
if (myrank == 0) {
if (random) {
if (uniform) {
if (cdf != NULL) {
message("Generating %d fake logs for %d ranks with randoms"
" based on cdf %s scaled by factor %d", nr_logs, nr_nodes,
cdf,size);
} else if (uniform) {
message("Generating %d fake logs for %d ranks with random distribution"
" using size %d", nr_logs, nr_nodes, size);
} else {
......@@ -420,7 +435,7 @@ int main(int argc, char *argv[]) {
nr_logs, nr_nodes, size);
}
}
mpiuse_log_generate(nr_nodes, nr_logs, size, random, seed, uniform);
mpiuse_log_generate(nr_nodes, nr_logs, size, random, seed, uniform, cdf);
int nranks = mpiuse_nr_ranks();
/* Each rank requires its own queue, so extract them. */
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment