testCbrt.c 4.37 KB
Newer Older
Pedro Gonnet's avatar
Pedro Gonnet committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
/*******************************************************************************
 * This file is part of SWIFT.
 * Copyright (C) 2016 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 ******************************************************************************/

20
21
22
23
24
/* Config parameters. */
#include "../config.h"

/* Standard includes. */
#include <fenv.h>
25
#include <math.h>
Pedro Gonnet's avatar
Pedro Gonnet committed
26
27
28
#include <stdio.h>
#include <stdlib.h>

29
/* Local includes. */
Pedro Gonnet's avatar
Pedro Gonnet committed
30
#include "cbrt.h"
31
#include "clocks.h"
Pedro Gonnet's avatar
Pedro Gonnet committed
32
33
34
35
#include "error.h"

int main(int argc, char *argv[]) {

36
37
38
39
40
  /* Initialize CPU frequency, this also starts time. */
  unsigned long long cpufreq = 0;
  clocks_set_cpufreq(cpufreq);

  /* Choke on FP-exceptions */
Josh Borrow's avatar
Josh Borrow committed
41
#ifdef HAVE_FE_ENABLE_EXCEPT
42
  feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
Josh Borrow's avatar
Josh Borrow committed
43
#endif
44

Pedro Gonnet's avatar
Pedro Gonnet committed
45
  /* Some constants for this test. */
46
47
48
49
  const int num_vals = 200000000;
  const float range_min = -1e6f;
  const float range_max = 1e6f;
  const float err_rel_tol = 1e-7;
50
51
  message("executing %i runs of each command.", num_vals);

Pedro Gonnet's avatar
Pedro Gonnet committed
52
  /* Create and fill an array of floats. */
53
54
  float *data;
  posix_memalign((void **)&data, 64, num_vals*sizeof(float));
Pedro Gonnet's avatar
Pedro Gonnet committed
55
56
57
  for (int k = 0; k < num_vals; k++) {
    data[k] = (float)rand() / RAND_MAX;
    data[k] = (1.0f - data[k]) * range_min + data[k] * range_max;
58
    if (data[k] == 0.f) k--; /* Skip 0 to avoid spurious mistakes */
Pedro Gonnet's avatar
Pedro Gonnet committed
59
  }
60

61
  /* First run just checks for correctness. */
Pedro Gonnet's avatar
Pedro Gonnet committed
62
  for (int k = 0; k < num_vals; k++) {
63
    const double exact = cbrt(data[k]);  // computed in double just to be sure.
Pedro Gonnet's avatar
Pedro Gonnet committed
64
    const float ours = 1.0f / icbrtf(data[k]);
Josh Borrow's avatar
Josh Borrow committed
65
66
    const float err_abs = fabs(exact - ours);
    const float err_rel = 0.5f * fabs(exact - ours) / (exact + ours);
67
68
69
70
71
72

    if (err_rel > err_rel_tol && data[k] != 0.f)
      error(
          "failed for x = %.8e, exact = %.8e, ours = %.8e, err = %.3e, rel = "
          "%.3e",
          data[k], exact, ours, err_abs, err_rel);
Pedro Gonnet's avatar
Pedro Gonnet committed
73
  }
74
75

  /* Second run to check the speed of the inverse cube root. */
76
  double acc_exact = 0.0f;
77
78
79
80
  ticks tic_exact = getticks();
  for (int k = 0; k < num_vals; k++) {
    acc_exact += 1.0f / cbrtf(data[k]);
  }
81
  message("1.0f / cbrtf took %9.3f %s (acc = %18.11e).",
82
83
84
          clocks_from_ticks(getticks() - tic_exact), clocks_getunit(),
          acc_exact);

85
  double acc_ours = 0.0;
86
87
88
89
  ticks tic_ours = getticks();
  for (int k = 0; k < num_vals; k++) {
    acc_ours += icbrtf(data[k]);
  }
90
  message("icbrtf       took %9.3f %s (acc = %18.11e).",
91
          clocks_from_ticks(getticks() - tic_ours), clocks_getunit(), acc_ours);
92
93
94
95
96
97
98

  /* Third run to check the speed of the cube root. */
  acc_exact = 0.0f;
  tic_exact = getticks();
  for (int k = 0; k < num_vals; k++) {
    acc_exact += cbrtf(data[k]);
  }
99
  message("cbrtf        took %9.3f %s (acc = %18.11e).",
100
101
102
103
104
105
106
107
108
          clocks_from_ticks(getticks() - tic_exact), clocks_getunit(),
          acc_exact);

  acc_ours = 0.0f;
  tic_ours = getticks();
  for (int k = 0; k < num_vals; k++) {
    const float temp = icbrtf(data[k]);
    acc_ours += data[k] * temp * temp;
  }
109
  message("x * icbrtf^2 took %9.3f %s (acc = %18.11e).",
110
          clocks_from_ticks(getticks() - tic_ours), clocks_getunit(), acc_ours);
111
112
113
114
115
116
117
118

  /* Fourth run to check the speed of (.)^(2/3). */
  acc_exact = 0.0f;
  tic_exact = getticks();
  for (int k = 0; k < num_vals; k++) {
    const float temp = cbrtf(data[k]);
    acc_exact += temp * temp;
  }
119
  message("cbrtf^2      took %9.3f %s (acc = %18.11e).",
120
121
122
123
124
125
126
127
          clocks_from_ticks(getticks() - tic_exact), clocks_getunit(),
          acc_exact);

  acc_ours = 0.0f;
  tic_ours = getticks();
  for (int k = 0; k < num_vals; k++) {
    acc_ours += data[k] * icbrtf(data[k]);
  }
128
  message("x * icbrtf   took %9.3f %s (acc = %18.11e).",
129
          clocks_from_ticks(getticks() - tic_ours), clocks_getunit(), acc_ours);
130

131
  free(data);
Pedro Gonnet's avatar
Pedro Gonnet committed
132
133
  return 0;
}