testCbrt.c 4.44 KB
Newer Older
Pedro Gonnet's avatar
Pedro Gonnet committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
/*******************************************************************************
 * This file is part of SWIFT.
 * Copyright (C) 2016 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 ******************************************************************************/

20 21 22 23 24
/* Config parameters. */
#include "../config.h"

/* Standard includes. */
#include <fenv.h>
25
#include <math.h>
Pedro Gonnet's avatar
Pedro Gonnet committed
26 27 28
#include <stdio.h>
#include <stdlib.h>

29
/* Local includes. */
Pedro Gonnet's avatar
Pedro Gonnet committed
30
#include "cbrt.h"
31
#include "clocks.h"
Pedro Gonnet's avatar
Pedro Gonnet committed
32 33 34 35
#include "error.h"

int main(int argc, char *argv[]) {

36 37 38 39 40
  /* Initialize CPU frequency, this also starts time. */
  unsigned long long cpufreq = 0;
  clocks_set_cpufreq(cpufreq);

  /* Choke on FP-exceptions */
Josh Borrow's avatar
Josh Borrow committed
41
#ifdef HAVE_FE_ENABLE_EXCEPT
42
  feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
Josh Borrow's avatar
Josh Borrow committed
43
#endif
44

Pedro Gonnet's avatar
Pedro Gonnet committed
45
  /* Some constants for this test. */
46 47 48 49
  const int num_vals = 200000000;
  const float range_min = -1e6f;
  const float range_max = 1e6f;
  const float err_rel_tol = 1e-7;
50 51
  message("executing %i runs of each command.", num_vals);

Pedro Gonnet's avatar
Pedro Gonnet committed
52
  /* Create and fill an array of floats. */
53 54 55
  float *data = NULL;
  if (posix_memalign((void **)&data, 64, num_vals * sizeof(float)) != 0)
    error("Failed to allocted memory for the test");
Pedro Gonnet's avatar
Pedro Gonnet committed
56 57 58
  for (int k = 0; k < num_vals; k++) {
    data[k] = (float)rand() / RAND_MAX;
    data[k] = (1.0f - data[k]) * range_min + data[k] * range_max;
59
    if (data[k] == 0.f) k--; /* Skip 0 to avoid spurious mistakes */
Pedro Gonnet's avatar
Pedro Gonnet committed
60
  }
61

62
  /* First run just checks for correctness. */
Pedro Gonnet's avatar
Pedro Gonnet committed
63
  for (int k = 0; k < num_vals; k++) {
64
    const double exact = cbrt(data[k]);  // computed in double just to be sure.
Pedro Gonnet's avatar
Pedro Gonnet committed
65
    const float ours = 1.0f / icbrtf(data[k]);
Josh Borrow's avatar
Josh Borrow committed
66 67
    const float err_abs = fabs(exact - ours);
    const float err_rel = 0.5f * fabs(exact - ours) / (exact + ours);
68 69 70 71 72 73

    if (err_rel > err_rel_tol && data[k] != 0.f)
      error(
          "failed for x = %.8e, exact = %.8e, ours = %.8e, err = %.3e, rel = "
          "%.3e",
          data[k], exact, ours, err_abs, err_rel);
Pedro Gonnet's avatar
Pedro Gonnet committed
74
  }
75 76

  /* Second run to check the speed of the inverse cube root. */
77
  double acc_exact = 0.0f;
78 79 80 81
  ticks tic_exact = getticks();
  for (int k = 0; k < num_vals; k++) {
    acc_exact += 1.0f / cbrtf(data[k]);
  }
82
  message("1.0f / cbrtf took %9.3f %s (acc = %18.11e).",
83 84 85
          clocks_from_ticks(getticks() - tic_exact), clocks_getunit(),
          acc_exact);

86
  double acc_ours = 0.0;
87 88 89 90
  ticks tic_ours = getticks();
  for (int k = 0; k < num_vals; k++) {
    acc_ours += icbrtf(data[k]);
  }
91
  message("icbrtf       took %9.3f %s (acc = %18.11e).",
92
          clocks_from_ticks(getticks() - tic_ours), clocks_getunit(), acc_ours);
93 94 95 96 97 98 99

  /* Third run to check the speed of the cube root. */
  acc_exact = 0.0f;
  tic_exact = getticks();
  for (int k = 0; k < num_vals; k++) {
    acc_exact += cbrtf(data[k]);
  }
100
  message("cbrtf        took %9.3f %s (acc = %18.11e).",
101 102 103 104 105 106 107 108 109
          clocks_from_ticks(getticks() - tic_exact), clocks_getunit(),
          acc_exact);

  acc_ours = 0.0f;
  tic_ours = getticks();
  for (int k = 0; k < num_vals; k++) {
    const float temp = icbrtf(data[k]);
    acc_ours += data[k] * temp * temp;
  }
110
  message("x * icbrtf^2 took %9.3f %s (acc = %18.11e).",
111
          clocks_from_ticks(getticks() - tic_ours), clocks_getunit(), acc_ours);
112 113 114 115 116 117 118 119

  /* Fourth run to check the speed of (.)^(2/3). */
  acc_exact = 0.0f;
  tic_exact = getticks();
  for (int k = 0; k < num_vals; k++) {
    const float temp = cbrtf(data[k]);
    acc_exact += temp * temp;
  }
120
  message("cbrtf^2      took %9.3f %s (acc = %18.11e).",
121 122 123 124 125 126 127 128
          clocks_from_ticks(getticks() - tic_exact), clocks_getunit(),
          acc_exact);

  acc_ours = 0.0f;
  tic_ours = getticks();
  for (int k = 0; k < num_vals; k++) {
    acc_ours += data[k] * icbrtf(data[k]);
  }
129
  message("x * icbrtf   took %9.3f %s (acc = %18.11e).",
130
          clocks_from_ticks(getticks() - tic_ours), clocks_getunit(), acc_ours);
131

132
  free(data);
Pedro Gonnet's avatar
Pedro Gonnet committed
133 134
  return 0;
}