Skip to content
Snippets Groups Projects
Commit b8bf8e7f authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

Added a note reagrding performance of the cbrtf() function in its documentation.

parent 5faa3c0f
No related branches found
No related tags found
1 merge request!266Cube root
......@@ -26,16 +26,25 @@
/* Some standard headers. */
#include <math.h>
/* Local headers. */
#include "inline.h"
/**
* @brief Compute the inverse cube root of a single-precision floating-point
* number.
*
* This function does not care about non-finite inputs.
*
* @warning This function is faster than both gcc and Intel's `cbrtf()`
* functions on x86 systems. However, Other compilers or other architectures
* may have faster implementations of the standard function `cbrtf()` that
* will potentionally outperform this function.
*
* @param x_in The input value.
*
* @return The inverse cubic root of @c x_in. Note that this function
* does not care about non-finite inputs.
* @return The inverse cubic root of @c x_in (i.e. \f$x_{in}^{-1/3} \f$) .
*/
__attribute__((always_inline)) inline float icbrtf(float x_in) {
__attribute__((always_inline)) INLINE static float icbrtf(float x_in) {
union {
float as_float;
......@@ -43,26 +52,26 @@ __attribute__((always_inline)) inline float icbrtf(float x_in) {
int as_int;
} cast;
// Extract the exponent.
/* Extract the exponent. */
cast.as_float = x_in;
const int exponent = ((cast.as_int & 0x7f800000) >> 23) - 127;
// Clear the exponent and sign to get the mantissa.
/* Clear the exponent and sign to get the mantissa. */
cast.as_uint = (cast.as_uint & ~0xff800000) | 0x3f800000;
const float x_norm = cast.as_float;
// Multiply by sqrt(1/2) and subtract one, should then be in the
// range [sqrt(1/2) - 1, sqrt(2) - 1).
/* Multiply by sqrt(1/2) and subtract one, should then be in the
range [sqrt(1/2) - 1, sqrt(2) - 1). */
const float x = x_norm * (float)M_SQRT1_2 - 1.0f;
// Compute the polynomial interpolant.
/* Compute the polynomial interpolant. */
float res =
9.99976591940035e-01f +
x * (-3.32901212909283e-01f +
x * (2.24361110929912e-01f +
x * (-1.88913279594895e-01f + x * 1.28384036492344e-01f)));
// Compute the new exponent and the correction factor.
/* Compute the new exponent and the correction factor. */
int exponent_new = exponent;
if (exponent_new < 0) exponent_new -= 2;
exponent_new = -exponent_new / 3;
......@@ -72,13 +81,13 @@ __attribute__((always_inline)) inline float icbrtf(float x_in) {
5.61231024154687e-01f};
const float exponent_scale = cast.as_float * scale[exponent_rem];
// Scale the result and set the correct sign.
/* Scale the result and set the correct sign. */
res = copysignf(res * exponent_scale, x_in);
// One step of Newton iteration to refine the result.
/* One step of Newton iteration to refine the result. */
res *= (1.0f / 3.0f) * (4.0f - x_in * res * res * res);
// We're done.
/* We're done. */
return res;
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment