/******************************************************************************* * This file is part of SWIFT. * Copyright (C) 2016 Pedro Gonnet (pedro.gonnet@durham.ac.uk) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program. If not, see . * ******************************************************************************/ /* Config parameters. */ #include /* Standard includes. */ #include #include #include #include /* Local includes. */ #include "cbrt.h" #include "clocks.h" #include "error.h" int main(int argc, char *argv[]) { /* Initialize CPU frequency, this also starts time. */ unsigned long long cpufreq = 0; clocks_set_cpufreq(cpufreq); /* Choke on FP-exceptions */ #ifdef HAVE_FE_ENABLE_EXCEPT feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); #endif /* Some constants for this test. */ const int num_vals = 200000000; const float range_min = -1e6f; const float range_max = 1e6f; const float err_rel_tol = 1e-7; message("executing %i runs of each command.", num_vals); /* Create and fill an array of floats. */ float *data = NULL; if (posix_memalign((void **)&data, 64, num_vals * sizeof(float)) != 0) error("Failed to allocted memory for the test"); for (int k = 0; k < num_vals; k++) { data[k] = (float)rand() / (float)RAND_MAX; data[k] = (1.0f - data[k]) * range_min + data[k] * range_max; if (data[k] == 0.f) k--; /* Skip 0 to avoid spurious mistakes */ } /* First run just checks for correctness. */ for (int k = 0; k < num_vals; k++) { const double exact = cbrt(data[k]); // computed in double just to be sure. const float ours = 1.0f / icbrtf(data[k]); const float err_abs = fabs(exact - ours); const float err_rel = 0.5f * fabs(exact - ours) / (exact + ours); if (err_rel > err_rel_tol && data[k] != 0.f) error( "failed for x = %.8e, exact = %.8e, ours = %.8e, err = %.3e, rel = " "%.3e", data[k], exact, ours, err_abs, err_rel); } /* Second run to check the speed of the inverse cube root. */ double acc_exact = 0.0f; ticks tic_exact = getticks(); for (int k = 0; k < num_vals; k++) { acc_exact += 1.0f / cbrtf(data[k]); } message("1.0f / cbrtf took %9.3f %s (acc = %18.11e).", clocks_from_ticks(getticks() - tic_exact), clocks_getunit(), acc_exact); double acc_ours = 0.0; ticks tic_ours = getticks(); for (int k = 0; k < num_vals; k++) { acc_ours += icbrtf(data[k]); } message("icbrtf took %9.3f %s (acc = %18.11e).", clocks_from_ticks(getticks() - tic_ours), clocks_getunit(), acc_ours); /* Third run to check the speed of the cube root. */ acc_exact = 0.0f; tic_exact = getticks(); for (int k = 0; k < num_vals; k++) { acc_exact += cbrtf(data[k]); } message("cbrtf took %9.3f %s (acc = %18.11e).", clocks_from_ticks(getticks() - tic_exact), clocks_getunit(), acc_exact); acc_ours = 0.0f; tic_ours = getticks(); for (int k = 0; k < num_vals; k++) { const float temp = icbrtf(data[k]); acc_ours += data[k] * temp * temp; } message("x * icbrtf^2 took %9.3f %s (acc = %18.11e).", clocks_from_ticks(getticks() - tic_ours), clocks_getunit(), acc_ours); /* Fourth run to check the speed of (.)^(2/3). */ acc_exact = 0.0f; tic_exact = getticks(); for (int k = 0; k < num_vals; k++) { const float temp = cbrtf(data[k]); acc_exact += temp * temp; } message("cbrtf^2 took %9.3f %s (acc = %18.11e).", clocks_from_ticks(getticks() - tic_exact), clocks_getunit(), acc_exact); acc_ours = 0.0f; tic_ours = getticks(); for (int k = 0; k < num_vals; k++) { acc_ours += data[k] * icbrtf(data[k]); } message("x * icbrtf took %9.3f %s (acc = %18.11e).", clocks_from_ticks(getticks() - tic_ours), clocks_getunit(), acc_ours); free(data); return 0; }