changeset 13456:fd291325b6d5

strtod: make it more-accurate typically, and don't require libm * lib/strtod.c (_GL_ARG_NONNULL): Remove; no longer needed. Include limits.h. Don't include string.h. (HAVE_LDEXP_IN_LIBC, HAVE_RAW_DECL_STRTOD): Define to 0 if not defined. (locale_isspace): New function, so that no casts are needed to check whether *s is a space. (ldexp): Provide an unused dummy if not available. (scale_radix_exp, parse_number, underlying_strtod): New functions. (strtod): Use them. This implementation prefers to use the underlying strtod if available, falling back on our own code only to fix known bugs. This is more likely to produce an accurate result. Also, it avoids the use of libm functions. * m4/strtod.m4 (gl_FUNC_STRTOD): Don't invoke _AC_LIBOBJ_STRTOD; no longer needed. Invoke AC_LIBOBJ([strtod]); don't know why this was absent, but it caused a test failure with coreutils. (gl_PREREQ_STRTOD): Check wither ldexp can be used without linking with libm. * modules/strtod (Makefile.am, Link): libm is no longer needed. * modules/strtod-tests (Makefile.am): Likewise.
author Paul R. Eggert <eggert@cs.ucla.edu>
date Mon, 12 Jul 2010 09:14:10 -0700
parents 215879220d76
children fd542c59e379
files ChangeLog lib/strtod.c m4/strtod.m4 modules/strtod modules/strtod-tests
diffstat 5 files changed, 267 insertions(+), 223 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog	Sun Jul 11 11:45:38 2010 -0700
+++ b/ChangeLog	Mon Jul 12 09:14:10 2010 -0700
@@ -1,3 +1,25 @@
+2010-07-12  Paul R. Eggert  <eggert@cs.ucla.edu>
+
+	strtod: make it more-accurate typically, and don't require libm
+	* lib/strtod.c (_GL_ARG_NONNULL): Remove; no longer needed.
+	Include limits.h.  Don't include string.h.
+	(HAVE_LDEXP_IN_LIBC, HAVE_RAW_DECL_STRTOD): Define to 0 if not defined.
+	(locale_isspace): New function, so that no casts are needed to
+	check whether *s is a space.
+	(ldexp): Provide an unused dummy if not available.
+	(scale_radix_exp, parse_number, underlying_strtod): New functions.
+	(strtod): Use them.  This implementation prefers to use the
+	underlying strtod if available, falling back on our own code
+	only to fix known bugs.  This is more likely to produce an
+	accurate result.  Also, it avoids the use of libm functions.
+	* m4/strtod.m4 (gl_FUNC_STRTOD): Don't invoke _AC_LIBOBJ_STRTOD;
+	no longer needed.  Invoke AC_LIBOBJ([strtod]); don't know why this
+	was absent, but it caused a test failure with coreutils.
+	(gl_PREREQ_STRTOD): Check wither ldexp can be used without linking
+	with libm.
+	* modules/strtod (Makefile.am, Link): libm is no longer needed.
+	* modules/strtod-tests (Makefile.am): Likewise.
+
 2010-07-11  Pádraig Brady  <P@draigBrady.com>
             Bruno Haible  <bruno@clisp.org>
 
--- a/lib/strtod.c	Sun Jul 11 11:45:38 2010 -0700
+++ b/lib/strtod.c	Mon Jul 12 09:14:10 2010 -0700
@@ -16,50 +16,189 @@
 
 #include <config.h>
 
-/* Don't use __attribute__ __nonnull__ in this compilation unit.  Otherwise gcc
-   optimizes away the nptr == NULL test below.  */
-#define _GL_ARG_NONNULL(params)
-
 #include <stdlib.h>
 
 #include <ctype.h>
 #include <errno.h>
 #include <float.h>
+#include <limits.h>
 #include <math.h>
 #include <stdbool.h>
-#include <string.h>
 
 #include "c-ctype.h"
 
+#ifndef HAVE_LDEXP_IN_LIBC
+#define HAVE_LDEXP_IN_LIBC 0
+#endif
+#ifndef HAVE_RAW_DECL_STRTOD
+#define HAVE_RAW_DECL_STRTOD 0
+#endif
+
+/* Return true if C is a space in the current locale, avoiding
+   problems with signed char and isspace.  */
+static bool
+locale_isspace (char c)
+{
+  unsigned char uc = c;
+  return isspace (uc) != 0;
+}
+
+#if !HAVE_LDEXP_IN_LIBC
+ #define ldexp dummy_ldexp
+ static double ldexp (double x, int exponent) { return x + exponent; }
+#endif
+
+/* Return X * BASE**EXPONENT.  Return an extreme value and set errno
+   to ERANGE if underflow or overflow occurs.  */
+static double
+scale_radix_exp (double x, int radix, long int exponent)
+{
+  /* If RADIX == 10, this code is neither precise nor fast; it is
+     merely a straightforward and relatively portable approximation.
+     If N == 2, this code is precise on a radix-2 implementation,
+     albeit perhaps not fast if ldexp is not in libc.  */
+
+  long int e = exponent;
+
+  if (HAVE_LDEXP_IN_LIBC && radix == 2)
+    return ldexp (x, e < INT_MIN ? INT_MIN : INT_MAX < e ? INT_MAX : e);
+  else
+    {
+      double r = x;
+
+      if (r != 0)
+        {
+          if (e < 0)
+            {
+              while (e++ != 0)
+                {
+                  r /= radix;
+                  if (r == 0 && x != 0)
+                    {
+                      errno = ERANGE;
+                      break;
+                    }
+                }
+            }
+          else
+            {
+              while (e-- != 0)
+                {
+                  if (r < -DBL_MAX / radix)
+                    {
+                      errno = ERANGE;
+                      return -HUGE_VAL;
+                    }
+                  else if (DBL_MAX / radix < r)
+                    {
+                      errno = ERANGE;
+                      return HUGE_VAL;
+                    }
+                  else
+                    r *= radix;
+                }
+            }
+        }
+
+      return r;
+    }
+}
+
+/* Parse a number at NPTR; this is a bit like strtol (NPTR, ENDPTR)
+   except there are no leading spaces or signs or "0x", and ENDPTR is
+   nonnull.  The number uses a base BASE (either 10 or 16) fraction, a
+   radix RADIX (either 10 or 2) exponent, and exponent character
+   EXPCHAR.  To convert from a number of digits to a radix exponent,
+   multiply by RADIX_MULTIPLIER (either 1 or 4).  */
+static double
+parse_number (const char *nptr,
+              int base, int radix, int radix_multiplier, char expchar,
+              char **endptr)
+{
+  const char *s = nptr;
+  bool got_dot = false;
+  long int exponent = 0;
+  double num = 0;
+
+  for (;; ++s)
+    {
+      int digit;
+      if (c_isdigit (*s))
+        digit = *s - '0';
+      else if (base == 16 && c_isxdigit (*s))
+        digit = c_tolower (*s) - ('a' - 10);
+      else if (! got_dot && *s == '.')
+        {
+          /* Record that we have found the decimal point.  */
+          got_dot = true;
+          continue;
+        }
+      else
+        /* Any other character terminates the number.  */
+        break;
+
+      /* Make sure that multiplication by base will not overflow.  */
+      if (num <= DBL_MAX / base)
+        num = num * base + digit;
+      else
+        {
+          /* The value of the digit doesn't matter, since we have already
+             gotten as many digits as can be represented in a `double'.
+             This doesn't necessarily mean the result will overflow.
+             The exponent may reduce it to within range.
+
+             We just need to record that there was another
+             digit so that we can multiply by 10 later.  */
+          exponent += radix_multiplier;
+        }
+
+      /* Keep track of the number of digits after the decimal point.
+         If we just divided by base here, we might lose precision.  */
+      if (got_dot)
+        exponent -= radix_multiplier;
+    }
+
+  if (c_tolower (*s) == expchar && ! locale_isspace (s[1]))
+    {
+      /* Add any given exponent to the implicit one.  */
+      int save = errno;
+      char *end;
+      long int value = strtol (s + 1, &end, 10);
+      errno = save;
+
+      if (s + 1 != end)
+        {
+          /* Skip past the exponent, and add in the implicit exponent,
+             resulting in an extreme value on overflow.  */
+          s = end;
+          exponent =
+            (exponent < 0
+             ? (value < LONG_MIN - exponent ? LONG_MIN : exponent + value)
+             : (LONG_MAX - exponent < value ? LONG_MAX : exponent + value));
+        }
+    }
+
+  *endptr = (char *) s;
+  return scale_radix_exp (num, radix, exponent);
+}
+
+static double underlying_strtod (const char *, char **);
+
 /* Convert NPTR to a double.  If ENDPTR is not NULL, a pointer to the
    character after the last one used in the number is put in *ENDPTR.  */
 double
 strtod (const char *nptr, char **endptr)
 {
-  const unsigned char *s;
   bool negative = false;
 
   /* The number so far.  */
   double num;
 
-  bool got_dot;                 /* Found a decimal point.  */
-  bool got_digit;               /* Seen any digits.  */
-  bool hex = false;             /* Look for hex float exponent.  */
-
-  /* The exponent of the number.  */
-  long int exponent;
-
-  if (nptr == NULL)
-    {
-      errno = EINVAL;
-      goto noconv;
-    }
-
-  /* Use unsigned char for the ctype routines.  */
-  s = (unsigned char *) nptr;
+  const char *s = nptr;
+  char *end;
 
   /* Eat whitespace.  */
-  while (isspace (*s))
+  while (locale_isspace (*s))
     ++s;
 
   /* Get the sign.  */
@@ -67,210 +206,84 @@
   if (*s == '-' || *s == '+')
     ++s;
 
-  num = 0.0;
-  got_dot = false;
-  got_digit = false;
-  exponent = 0;
-
-  /* Check for hex float.  */
-  if (*s == '0' && c_tolower (s[1]) == 'x'
-      && (c_isxdigit (s[2]) || ('.' == s[2] && c_isxdigit (s[3]))))
-    {
-      hex = true;
-      s += 2;
-      for (;; ++s)
-        {
-          if (c_isxdigit (*s))
-            {
-              got_digit = true;
+  num = underlying_strtod (s, &end);
 
-              /* Make sure that multiplication by 16 will not overflow.  */
-              if (num > DBL_MAX / 16)
-                /* The value of the digit doesn't matter, since we have already
-                   gotten as many digits as can be represented in a `double'.
-                   This doesn't necessarily mean the result will overflow.
-                   The exponent may reduce it to within range.
+  if (c_isdigit (s[*s == '.']))
+    {
+      /* If a hex float was converted incorrectly, do it ourselves.  */
+      if (*s == '0' && c_tolower (s[1]) == 'x' && end <= s + 2
+          && c_isxdigit (s[2 + (s[2] == '.')]))
+        num = parse_number (s + 2, 16, 2, 4, 'p', &end);
 
-                   We just need to record that there was another
-                   digit so that we can multiply by 16 later.  */
-                ++exponent;
-              else
-                num = ((num * 16.0)
-                       + (c_tolower (*s) - (c_isdigit (*s) ? '0' : 'a' - 10)));
-
-              /* Keep track of the number of digits after the decimal point.
-                 If we just divided by 16 here, we would lose precision.  */
-              if (got_dot)
-                --exponent;
-            }
-          else if (!got_dot && *s == '.')
-            /* Record that we have found the decimal point.  */
-            got_dot = true;
-          else
-            /* Any other character terminates the number.  */
-            break;
-        }
+      s = end;
     }
 
-  /* Not a hex float.  */
-  else
+  /* Check for infinities and NaNs.  */
+  else if (c_tolower (*s) == 'i'
+           && c_tolower (s[1]) == 'n'
+           && c_tolower (s[2]) == 'f')
     {
-      for (;; ++s)
-        {
-          if (c_isdigit (*s))
-            {
-              got_digit = true;
-
-              /* Make sure that multiplication by 10 will not overflow.  */
-              if (num > DBL_MAX * 0.1)
-                /* The value of the digit doesn't matter, since we have already
-                   gotten as many digits as can be represented in a `double'.
-                   This doesn't necessarily mean the result will overflow.
-                   The exponent may reduce it to within range.
-
-                   We just need to record that there was another
-                   digit so that we can multiply by 10 later.  */
-                ++exponent;
-              else
-                num = (num * 10.0) + (*s - '0');
-
-              /* Keep track of the number of digits after the decimal point.
-                 If we just divided by 10 here, we would lose precision.  */
-              if (got_dot)
-                --exponent;
-            }
-          else if (!got_dot && *s == '.')
-            /* Record that we have found the decimal point.  */
-            got_dot = true;
-          else
-            /* Any other character terminates the number.  */
-            break;
-        }
-    }
-
-  if (!got_digit)
-    {
-      /* Check for infinities and NaNs.  */
+      s += 3;
       if (c_tolower (*s) == 'i'
           && c_tolower (s[1]) == 'n'
-          && c_tolower (s[2]) == 'f')
-        {
-          s += 3;
-          num = HUGE_VAL;
-          if (c_tolower (*s) == 'i'
-              && c_tolower (s[1]) == 'n'
-              && c_tolower (s[2]) == 'i'
-              && c_tolower (s[3]) == 't'
-              && c_tolower (s[4]) == 'y')
-            s += 5;
-          goto valid;
-        }
-#ifdef NAN
-      else if (c_tolower (*s) == 'n'
-               && c_tolower (s[1]) == 'a'
-               && c_tolower (s[2]) == 'n')
+          && c_tolower (s[2]) == 'i'
+          && c_tolower (s[3]) == 't'
+          && c_tolower (s[4]) == 'y')
+        s += 5;
+      num = HUGE_VAL;
+    }
+  else if (c_tolower (*s) == 'n'
+           && c_tolower (s[1]) == 'a'
+           && c_tolower (s[2]) == 'n')
+    {
+      s += 3;
+      if (*s == '(')
         {
-          s += 3;
-          num = NAN;
-          /* Since nan(<n-char-sequence>) is implementation-defined,
-             we define it by ignoring <n-char-sequence>.  A nicer
-             implementation would populate the bits of the NaN
-             according to interpreting n-char-sequence as a
-             hexadecimal number, but the result is still a NaN.  */
-          if (*s == '(')
-            {
-              const unsigned char *p = s + 1;
-              while (c_isalnum (*p))
-                p++;
-              if (*p == ')')
-                s = p + 1;
-            }
-          goto valid;
+          const char *p = s + 1;
+          while (c_isalnum (*p))
+            p++;
+          if (*p == ')')
+            s = p + 1;
         }
-#endif
-      goto noconv;
+
+      /* If the underlying implementation misparsed the NaN, assume
+         its result is incorrect, and return a NaN.  Normally it's
+         better to use the underlying implementation's result, since a
+         nice implementation populates the bits of the NaN according
+         to interpreting n-char-sequence as a hexadecimal number.  */
+      if (s != end)
+        num = NAN;
+    }
+  else
+    {
+      /* No conversion could be performed.  */
+      errno = EINVAL;
+      s = nptr;
     }
 
-  if (c_tolower (*s) == (hex ? 'p' : 'e') && !isspace (s[1]))
-    {
-      /* Get the exponent specified after the `e' or `E'.  */
-      int save = errno;
-      char *end;
-      long int value;
-
-      errno = 0;
-      ++s;
-      value = strtol ((char *) s, &end, 10);
-      if (errno == ERANGE && num)
-        {
-          /* The exponent overflowed a `long int'.  It is probably a safe
-             assumption that an exponent that cannot be represented by
-             a `long int' exceeds the limits of a `double'.  */
-          if (endptr != NULL)
-            *endptr = end;
-          if (value < 0)
-            goto underflow;
-          else
-            goto overflow;
-        }
-      else if (end == (char *) s)
-        /* There was no exponent.  Reset END to point to
-           the 'e' or 'E', so *ENDPTR will be set there.  */
-        end = (char *) s - 1;
-      errno = save;
-      s = (unsigned char *) end;
-      exponent += value;
-    }
-
-  if (num == 0.0)
-    goto valid;
-
-  if (hex)
-    {
-      /* ldexp takes care of range errors.  */
-      num = ldexp (num, exponent);
-      goto valid;
-    }
-
-  /* Multiply NUM by 10 to the EXPONENT power,
-     checking for overflow and underflow.  */
-
-  if (exponent < 0)
-    {
-      if (num < DBL_MIN * pow (10.0, (double) -exponent))
-        goto underflow;
-    }
-  else if (exponent > 0)
-    {
-      if (num > DBL_MAX * pow (10.0, (double) -exponent))
-        goto overflow;
-    }
-
-  num *= pow (10.0, (double) exponent);
-
- valid:
   if (endptr != NULL)
     *endptr = (char *) s;
   return negative ? -num : num;
-
- overflow:
-  /* Return an overflow error.  */
-  if (endptr != NULL)
-    *endptr = (char *) s;
-  errno = ERANGE;
-  return negative ? -HUGE_VAL : HUGE_VAL;
+}
 
- underflow:
-  /* Return an underflow error.  */
-  if (endptr != NULL)
-    *endptr = (char *) s;
-  errno = ERANGE;
-  return negative ? -0.0 : 0.0;
-
- noconv:
-  /* There was no number.  */
-  if (endptr != NULL)
-    *endptr = (char *) nptr;
-  errno = EINVAL;
-  return 0.0;
+/* The "underlying" strtod implementation.  This must be defined
+   after strtod because it #undefs strtod.  */
+static double
+underlying_strtod (const char *nptr, char **endptr)
+{
+  if (HAVE_RAW_DECL_STRTOD)
+    {
+      /* Prefer the native strtod if available.  Usually it should
+         work and it should give more-accurate results than our
+         approximation.  */
+      #undef strtod
+      return strtod (nptr, endptr);
+    }
+  else
+    {
+      /* Approximate strtod well enough for this module.  There's no
+         need to handle anything but finite unsigned decimal
+         numbers with nonnull ENDPTR.  */
+      return parse_number (nptr, 10, 10, 1, 'e', endptr);
+    }
 }
--- a/m4/strtod.m4	Sun Jul 11 11:45:38 2010 -0700
+++ b/m4/strtod.m4	Mon Jul 12 09:14:10 2010 -0700
@@ -11,7 +11,7 @@
   dnl Don't call AC_FUNC_STRTOD, because it does not have the right guess
   dnl when cross-compiling.
   dnl Don't call AC_CHECK_FUNCS([strtod]) because it would collide with the
-  dnl ac_cv_func_strtod variable set by the AC_FUNC_STRTOD macro,
+  dnl ac_cv_func_strtod variable set by the AC_FUNC_STRTOD macro.
   AC_CHECK_DECLS_ONCE([strtod])
   if test $ac_cv_have_decl_strtod != yes; then
     HAVE_STRTOD=0
@@ -113,12 +113,26 @@
     fi
   fi
   if test $HAVE_STRTOD = 0 || test $REPLACE_STRTOD = 1; then
+    AC_LIBOBJ([strtod])
     gl_PREREQ_STRTOD
-    dnl Use undocumented macro to set POW_LIB correctly.
-    _AC_LIBOBJ_STRTOD
   fi
 ])
 
 # Prerequisites of lib/strtod.c.
-# The need for pow() is already handled by AC_FUNC_STRTOD.
-AC_DEFUN([gl_PREREQ_STRTOD], [:])
+# FIXME: This implementation is a copy of printf-frexp.m4 and should be shared.
+AC_DEFUN([gl_PREREQ_STRTOD], [
+  AC_CACHE_CHECK([whether ldexp can be used without linking with libm],
+    [gl_cv_func_ldexp_no_libm],
+    [
+      AC_TRY_LINK([#include <math.h>
+                   double x;
+                   int y;],
+                  [return ldexp (x, y) < 1;],
+        [gl_cv_func_ldexp_no_libm=yes],
+        [gl_cv_func_ldexp_no_libm=no])
+    ])
+  if test $gl_cv_func_ldexp_no_libm = yes; then
+    AC_DEFINE([HAVE_LDEXP_IN_LIBC], [1],
+      [Define if the ldexp function is available in libc.])
+  fi
+])
--- a/modules/strtod	Sun Jul 11 11:45:38 2010 -0700
+++ b/modules/strtod	Mon Jul 12 09:14:10 2010 -0700
@@ -15,14 +15,10 @@
 gl_STDLIB_MODULE_INDICATOR([strtod])
 
 Makefile.am:
-LIBS += $(POW_LIB)
 
 Include:
 <stdlib.h>
 
-Link:
-$(POW_LIB)
-
 License:
 LGPL
 
--- a/modules/strtod-tests	Sun Jul 11 11:45:38 2010 -0700
+++ b/modules/strtod-tests	Mon Jul 12 09:14:10 2010 -0700
@@ -11,6 +11,5 @@
 configure.ac:
 
 Makefile.am:
-LIBS += $(POW_LIB)
 TESTS += test-strtod
 check_PROGRAMS += test-strtod