diff --git a/mpiuse.c b/mpiuse.c index c194495b98c0f4f8c2fb06d52c3da45268c2f3d1..77bd4a1009172ede87c6f6623ba60934d1c39359 100644 --- a/mpiuse.c +++ b/mpiuse.c @@ -285,28 +285,77 @@ static double gauss_rand_upper(void) { * @param nr_nodes the number of ranks that will be used. * @param nr_logs the number of logs to generate per rank. * @param size bytes per message, unless random when this is the maximum - * and the minimum is 1 for uniform and 2.5 sigma for gaussian. + * and the minimum is 1 for uniform, if using a gaussian + * distribution the value is a 2.5 sigma, for a CDF based + * selection this is just a scale factor of the values. * @param random whether to use random sizes. * @param seed the random seed, use same for fixed sequences. - * @param uniform whether to use a uniform distribution. + * @param uniform whether to use a uniform distribution other gaussian, unless + * cdf is defined, in which case this parameter is ignored. + * @param cdf text file containing a normalized CDF to use as a basis for inverse + * transform sampling of the randoms. NULL for no file. */ void mpiuse_log_generate(int nr_nodes, int nr_logs, int size, int random, - long int seed, int uniform) { + long int seed, int uniform, const char *cdf) { - /* Each rank exchanges messages with all the others and each "log" has the - * same size. */ + /* Only used for CDF, may need to increase these. */ + int nvals = 0; + double imin[256], imax[256], value[256]; + + /* Note that each rank exchanges messages with all the others and each "log" + * has the same size. */ + /* Set seed. */ if (random) srand48(seed); + /* Check for CDF. This should be based on some real distribution, the format + * is same as output from TOPCAT, i.e. bin-low, bin-high, value space + * separated values. Note the value column should be normalised into the + * range 0 to 1 so that it maps into a uniform random distribution. */ + if (cdf != NULL) { + FILE *infile = fopen(cdf, "r"); + if (infile == NULL) error("Failed to open CDF file: %s", cdf); + char line[132]; + while (!feof(infile)) { + if (fgets(line, 132, infile) != NULL) { + if (line[0] != '#') { + int nread = sscanf(line, "%lf %lf %lf", &imin[nvals], &imax[nvals], + &value[nvals]); + if (nread == 3) nvals++; + } + } + } + fclose(infile); + } + + /* Message tags increment with across rank logs. */ int tag = 1; for (int k = 0; k < nr_logs; k++) { /* Set size for this messages. */ int logsize = size; if (random) { - if (uniform) { + if (cdf) { + /* CDF randoms. */ + double rand = drand48(); + + /* Binary search for containing bin for this rand. */ + unsigned int lower = 0; + unsigned int upper = nvals; + unsigned int middle = 0; + while (lower < upper) { + middle = (upper + lower) / 2; + if (rand > value[middle]) + lower = middle + 1; + else + upper = middle; + } + logsize = 0.5 * (imax[middle] + imin[middle]); + + } else if (uniform) { + /* Uniform randoms in the range 0 to 1 */ logsize = (drand48() * (double)size) + 1; } else { - // Gaussian so no maximum, assume size is 2.5 sigma. + // Gaussian randoms so no maximum, assume size is 2.5 sigma. logsize = (gauss_rand_upper() * (double)size * 0.25) + 1; } } diff --git a/mpiuse.h b/mpiuse.h index feae1e6f7999a30fb79c9aa4b1ed9844a59a1fce..f621698be1f8cc8cf14a029e55654ce08dfcfacf 100644 --- a/mpiuse.h +++ b/mpiuse.h @@ -102,6 +102,6 @@ int mpiuse_nr_ranks(void); void mpiuse_dump_logs(int nranks, const char *logfile); void mpiuse_log_generate(int nr_nodes, int nr_logs, int size, int random, - long int seed, int uniform); + long int seed, int uniform, const char *cdf); #endif /* SWIFT_MPIUSE_H */ diff --git a/swiftmpifakestepsim.c b/swiftmpifakestepsim.c index 7d991b752bbc1fb292414f65bf5e3bfd116d1d24..80b9e5e3ef046e67e41fcdf11cc52b3ed2c74d5c 100644 --- a/swiftmpifakestepsim.c +++ b/swiftmpifakestepsim.c @@ -38,8 +38,8 @@ static int verbose = 0; /* Set a data pattern and check we get this back, slow... */ static int datacheck = 0; -/* Fixed seed for pseudorandoms. */ -static long int seed = 1987654321; +/* Default seed for pseudorandoms. */ +static long int default_seed = 1987654321; /* The local queues. */ static struct mpiuse_log_entry **volatile reqs_queue; @@ -339,11 +339,13 @@ static void pick_logs(void) { * @brief usage help. */ static void usage(char *argv[]) { - fprintf(stderr, "Usage: %s [-vf] nr_messages logfile.dat\n", + fprintf(stderr, "Usage: %s [-vfgcx] nr_messages logfile.dat\n", argv[0]); - fprintf(stderr, " options: -v verbose, -d data check, -s size (bytes), " - "-r uniform random from 1 to size, " - "-r -g half gaussian random from 1 with 2.5 sigma size.\n"); + fprintf(stderr, " options: -v verbose, -d data check, -s size (bytes/scale), \n" + "\t[-r uniform random from 1 to size, | \n" + "\t-r -g half gaussian random from 1 with 2.5 sigma size., | \n" + "\t-r -c <file> use cdf from file, size is a scale factor.,] \n" + "\t-x random seed\n"); fflush(stderr); } @@ -368,28 +370,36 @@ int main(int argc, char *argv[]) { /* Handle the command-line, we expect the number of messages to exchange per * rank an output log and some options, the interesting ones are a size and - * whether to use a random selection (with a fixed seed). */ + * whether to use a random selections of various kinds. */ int size = 1024; int random = 0; int uniform = 1; + char *cdf = NULL; int opt; - while ((opt = getopt(argc, argv, "vds:rg")) != -1) { + unsigned int seed = default_seed; + while ((opt = getopt(argc, argv, "vds:rgx:c:")) != -1) { switch (opt) { case 'd': datacheck = 1; break; + case 'c': + cdf = optarg; + break; + case 'g': + uniform = 0; + break; case 's': size = atoi(optarg); break; case 'r': random = 1; break; - case 'g': - uniform = 0; - break; case 'v': verbose = 1; break; + case 'x': + seed = atol(optarg); + break; default: if (myrank == 0) usage(argv); return 1; @@ -408,7 +418,12 @@ int main(int argc, char *argv[]) { /* Generate the fake logs for the exchanges. */ if (myrank == 0) { if (random) { - if (uniform) { + if (cdf != NULL) { + message("Generating %d fake logs for %d ranks with randoms" + " based on cdf %s scaled by factor %d", nr_logs, nr_nodes, + cdf,size); + + } else if (uniform) { message("Generating %d fake logs for %d ranks with random distribution" " using size %d", nr_logs, nr_nodes, size); } else { @@ -420,7 +435,7 @@ int main(int argc, char *argv[]) { nr_logs, nr_nodes, size); } } - mpiuse_log_generate(nr_nodes, nr_logs, size, random, seed, uniform); + mpiuse_log_generate(nr_nodes, nr_logs, size, random, seed, uniform, cdf); int nranks = mpiuse_nr_ranks(); /* Each rank requires its own queue, so extract them. */