diff --git a/src/cbrt.h b/src/cbrt.h index 2686caa90e7a18dae571e7b8381adbaa8cd63e60..54560e41d5e6ef143b839f014ce8f0c4a7624ff6 100644 --- a/src/cbrt.h +++ b/src/cbrt.h @@ -26,16 +26,25 @@ /* Some standard headers. */ #include <math.h> +/* Local headers. */ +#include "inline.h" + /** * @brief Compute the inverse cube root of a single-precision floating-point * number. * + * This function does not care about non-finite inputs. + * + * @warning This function is faster than both gcc and Intel's `cbrtf()` + * functions on x86 systems. However, Other compilers or other architectures + * may have faster implementations of the standard function `cbrtf()` that + * will potentionally outperform this function. + * * @param x_in The input value. * - * @return The inverse cubic root of @c x_in. Note that this function - * does not care about non-finite inputs. + * @return The inverse cubic root of @c x_in (i.e. \f$x_{in}^{-1/3} \f$) . */ -__attribute__((always_inline)) inline float icbrtf(float x_in) { +__attribute__((always_inline)) INLINE static float icbrtf(float x_in) { union { float as_float; @@ -43,26 +52,26 @@ __attribute__((always_inline)) inline float icbrtf(float x_in) { int as_int; } cast; - // Extract the exponent. + /* Extract the exponent. */ cast.as_float = x_in; const int exponent = ((cast.as_int & 0x7f800000) >> 23) - 127; - // Clear the exponent and sign to get the mantissa. + /* Clear the exponent and sign to get the mantissa. */ cast.as_uint = (cast.as_uint & ~0xff800000) | 0x3f800000; const float x_norm = cast.as_float; - // Multiply by sqrt(1/2) and subtract one, should then be in the - // range [sqrt(1/2) - 1, sqrt(2) - 1). + /* Multiply by sqrt(1/2) and subtract one, should then be in the + range [sqrt(1/2) - 1, sqrt(2) - 1). */ const float x = x_norm * (float)M_SQRT1_2 - 1.0f; - // Compute the polynomial interpolant. + /* Compute the polynomial interpolant. */ float res = 9.99976591940035e-01f + x * (-3.32901212909283e-01f + x * (2.24361110929912e-01f + x * (-1.88913279594895e-01f + x * 1.28384036492344e-01f))); - // Compute the new exponent and the correction factor. + /* Compute the new exponent and the correction factor. */ int exponent_new = exponent; if (exponent_new < 0) exponent_new -= 2; exponent_new = -exponent_new / 3; @@ -72,13 +81,13 @@ __attribute__((always_inline)) inline float icbrtf(float x_in) { 5.61231024154687e-01f}; const float exponent_scale = cast.as_float * scale[exponent_rem]; - // Scale the result and set the correct sign. + /* Scale the result and set the correct sign. */ res = copysignf(res * exponent_scale, x_in); - // One step of Newton iteration to refine the result. + /* One step of Newton iteration to refine the result. */ res *= (1.0f / 3.0f) * (4.0f - x_in * res * res * res); - // We're done. + /* We're done. */ return res; }