From b8bf8e7f6cfd739f52569394f605ae57738ec4e3 Mon Sep 17 00:00:00 2001
From: Matthieu Schaller <matthieu.schaller@durham.ac.uk>
Date: Mon, 14 Nov 2016 15:50:34 -0700
Subject: [PATCH] Added a note reagrding performance of the cbrtf() function in
 its documentation.

---
 src/cbrt.h | 33 +++++++++++++++++++++------------
 1 file changed, 21 insertions(+), 12 deletions(-)

diff --git a/src/cbrt.h b/src/cbrt.h
index 2686caa90e..54560e41d5 100644
--- a/src/cbrt.h
+++ b/src/cbrt.h
@@ -26,16 +26,25 @@
 /* Some standard headers. */
 #include <math.h>
 
+/* Local headers. */
+#include "inline.h"
+
 /**
  * @brief Compute the inverse cube root of a single-precision floating-point
  * number.
  *
+ * This function does not care about non-finite inputs.
+ *
+ * @warning This function is faster than both gcc and Intel's `cbrtf()`
+ * functions on x86 systems. However, Other compilers or other architectures
+ * may have faster implementations of the standard function `cbrtf()` that
+ * will potentionally outperform this function.
+ *
  * @param x_in The input value.
  *
- * @return The inverse cubic root of @c x_in. Note that this function
- *         does not care about non-finite inputs.
+ * @return The inverse cubic root of @c x_in (i.e. \f$x_{in}^{-1/3} \f$) .
  */
-__attribute__((always_inline)) inline float icbrtf(float x_in) {
+__attribute__((always_inline)) INLINE static float icbrtf(float x_in) {
 
   union {
     float as_float;
@@ -43,26 +52,26 @@ __attribute__((always_inline)) inline float icbrtf(float x_in) {
     int as_int;
   } cast;
 
-  // Extract the exponent.
+  /* Extract the exponent. */
   cast.as_float = x_in;
   const int exponent = ((cast.as_int & 0x7f800000) >> 23) - 127;
 
-  // Clear the exponent and sign to get the mantissa.
+  /* Clear the exponent and sign to get the mantissa. */
   cast.as_uint = (cast.as_uint & ~0xff800000) | 0x3f800000;
   const float x_norm = cast.as_float;
 
-  // Multiply by sqrt(1/2) and subtract one, should then be in the
-  // range [sqrt(1/2) - 1, sqrt(2) - 1).
+  /* Multiply by sqrt(1/2) and subtract one, should then be in the
+     range [sqrt(1/2) - 1, sqrt(2) - 1). */
   const float x = x_norm * (float)M_SQRT1_2 - 1.0f;
 
-  // Compute the polynomial interpolant.
+  /* Compute the polynomial interpolant. */
   float res =
       9.99976591940035e-01f +
       x * (-3.32901212909283e-01f +
            x * (2.24361110929912e-01f +
                 x * (-1.88913279594895e-01f + x * 1.28384036492344e-01f)));
 
-  // Compute the new exponent and the correction factor.
+  /* Compute the new exponent and the correction factor. */
   int exponent_new = exponent;
   if (exponent_new < 0) exponent_new -= 2;
   exponent_new = -exponent_new / 3;
@@ -72,13 +81,13 @@ __attribute__((always_inline)) inline float icbrtf(float x_in) {
                                  5.61231024154687e-01f};
   const float exponent_scale = cast.as_float * scale[exponent_rem];
 
-  // Scale the result and set the correct sign.
+  /* Scale the result and set the correct sign. */
   res = copysignf(res * exponent_scale, x_in);
 
-  // One step of Newton iteration to refine the result.
+  /* One step of Newton iteration to refine the result. */
   res *= (1.0f / 3.0f) * (4.0f - x_in * res * res * res);
 
-  // We're done.
+  /* We're done. */
   return res;
 }
 
-- 
GitLab