Mercurial > gnulib
changeset 18292:24fde31e764d
mbrtowc: work around glibc bug#19932
Fix mbrtowc so that it never returns -1 in the C locale,
as this conflicts with a future version of POSIX
http://austingroupbugs.net/view.php?id=663#c2738
and causes problems with GNU grep: http://bugs.gnu.org/23234
See glibc bug 19932:
https://sourceware.org/bugzilla/show_bug.cgi?id=19932
* doc/posix-functions/mbrlen.texi (mbrlen):
* doc/posix-functions/mbrtowc.texi (mbrtowc):
Document the glibc bug.
* lib/mbrtowc.c [C_LOCALE_MAYBE_EILSEQ]:
Include hard-locale.h, locale.h.
(rpl_mbrtowc): Work around the C_LOCALE_MAYBE_EILSEQ bug,
if the bug is possible.
* m4/mbrtowc.m4 (gl_MBRTOWC_C_LOCALE): New macro.
(gl_FUNC_MBRTOWC): Use it, and define C_LOCALE_MAYBE_EILSEQ as needed.
* modules/hard-locale (License): Now LGPLv2+, for mbrtowc.
* modules/mbrtowc (Depends-on): Add hard-locale.
* modules/mbrtowc-tests (Files, TESTS): Add tests/test-mbrtowc5.sh.
* tests/test-mbrtowc.c (main): Test for bug fix if arg is '5'.
* tests/test-mbrtowc5.sh: New file.
author | Paul Eggert <eggert@penguin.cs.ucla.edu> |
---|---|
date | Sat, 09 Apr 2016 01:28:36 -0700 |
parents | efbdbdd32f55 |
children | 294fa0173b5e |
files | ChangeLog doc/posix-functions/mbrlen.texi doc/posix-functions/mbrtowc.texi lib/mbrtowc.c m4/mbrtowc.m4 modules/hard-locale modules/mbrtowc modules/mbrtowc-tests tests/test-mbrtowc.c tests/test-mbrtowc5.sh |
diffstat | 10 files changed, 128 insertions(+), 31 deletions(-) [+] |
line wrap: on
line diff
--- a/ChangeLog Wed Apr 06 14:02:39 2016 -0700 +++ b/ChangeLog Sat Apr 09 01:28:36 2016 -0700 @@ -1,3 +1,27 @@ +2016-04-09 Paul Eggert <eggert@penguin.cs.ucla.edu> + + mbrtowc: work around glibc bug#19932 + Fix mbrtowc so that it never returns -1 in the C locale, + as this conflicts with a future version of POSIX + http://austingroupbugs.net/view.php?id=663#c2738 + and causes problems with GNU grep: http://bugs.gnu.org/23234 + See glibc bug 19932: + https://sourceware.org/bugzilla/show_bug.cgi?id=19932 + * doc/posix-functions/mbrlen.texi (mbrlen): + * doc/posix-functions/mbrtowc.texi (mbrtowc): + Document the glibc bug. + * lib/mbrtowc.c [C_LOCALE_MAYBE_EILSEQ]: + Include hard-locale.h, locale.h. + (rpl_mbrtowc): Work around the C_LOCALE_MAYBE_EILSEQ bug, + if the bug is possible. + * m4/mbrtowc.m4 (gl_MBRTOWC_C_LOCALE): New macro. + (gl_FUNC_MBRTOWC): Use it, and define C_LOCALE_MAYBE_EILSEQ as needed. + * modules/hard-locale (License): Now LGPLv2+, for mbrtowc. + * modules/mbrtowc (Depends-on): Add hard-locale. + * modules/mbrtowc-tests (Files, TESTS): Add tests/test-mbrtowc5.sh. + * tests/test-mbrtowc.c (main): Test for bug fix if arg is '5'. + * tests/test-mbrtowc5.sh: New file. + 2016-04-03 Pedro Alves <palves@redhat.com> stdint: detect good enough pre-C++11 stdint.h in C++ mode
--- a/doc/posix-functions/mbrlen.texi Wed Apr 06 14:02:39 2016 -0700 +++ b/doc/posix-functions/mbrlen.texi Sat Apr 09 01:28:36 2016 -0700 @@ -12,6 +12,10 @@ This function is missing on some platforms: Minix 3.1.8, HP-UX 11.00, IRIX 6.5, Solaris 2.6, mingw, Interix 3.5. @item +In the C or POSIX locales, this function can return @code{(size_t) -1} +and set @code{errno} to @code{EILSEQ}: +glibc 2.23. +@item This function returns 0 instead of @code{(size_t) -2} when the input is empty: glibc 2.19.
--- a/doc/posix-functions/mbrtowc.texi Wed Apr 06 14:02:39 2016 -0700 +++ b/doc/posix-functions/mbrtowc.texi Sat Apr 09 01:28:36 2016 -0700 @@ -12,6 +12,10 @@ This function is missing on some platforms: Minix 3.1.8, HP-UX 11.00, IRIX 6.5, Solaris 2.6, mingw, Interix 3.5. @item +In the C or POSIX locales, this function can return @code{(size_t) -1} +and set @code{errno} to @code{EILSEQ}: +glibc 2.23. +@item This function returns 0 instead of @code{(size_t) -2} when the input is empty: glibc 2.19.
--- a/lib/mbrtowc.c Wed Apr 06 14:02:39 2016 -0700 +++ b/lib/mbrtowc.c Sat Apr 09 01:28:36 2016 -0700 @@ -20,6 +20,11 @@ /* Specification. */ #include <wchar.h> +#if C_LOCALE_MAYBE_EILSEQ +# include "hard-locale.h" +# include <locale.h> +#endif + #if GNULIB_defined_mbstate_t /* Implement mbrtowc() on top of mbtowc(). */ @@ -328,6 +333,9 @@ size_t rpl_mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps) { + size_t ret; + wchar_t wc; + # if MBRTOWC_NULL_ARG2_BUG || MBRTOWC_RETVAL_BUG || MBRTOWC_EMPTY_INPUT_BUG if (s == NULL) { @@ -342,6 +350,9 @@ return (size_t) -2; # endif + if (! pwc) + pwc = &wc; + # if MBRTOWC_RETVAL_BUG { static mbstate_t internal_state; @@ -357,8 +368,7 @@ size_t count = 0; for (; n > 0; s++, n--) { - wchar_t wc; - size_t ret = mbrtowc (&wc, s, 1, ps); + ret = mbrtowc (&wc, s, 1, ps); if (ret == (size_t)(-1)) return (size_t)(-1); @@ -366,8 +376,7 @@ if (ret != (size_t)(-2)) { /* The multibyte character has been completed. */ - if (pwc != NULL) - *pwc = wc; + *pwc = wc; return (wc == 0 ? 0 : count); } } @@ -376,32 +385,23 @@ } # endif + ret = mbrtowc (pwc, s, n, ps); + # if MBRTOWC_NUL_RETVAL_BUG - { - wchar_t wc; - size_t ret = mbrtowc (&wc, s, n, ps); + if (ret < (size_t) -2 && !*pwc) + return 0; +# endif - if (ret != (size_t)(-1) && ret != (size_t)(-2)) - { - if (pwc != NULL) - *pwc = wc; - if (wc == 0) - ret = 0; - } - return ret; - } -# else - { -# if MBRTOWC_NULL_ARG1_BUG - wchar_t dummy; +# if C_LOCALE_MAYBE_EILSEQ + if ((size_t) -2 <= ret && n != 0 && ! hard_locale (LC_CTYPE)) + { + unsigned char uc = *s; + *pwc = uc; + return 1; + } +# endif - if (pwc == NULL) - pwc = &dummy; -# endif - - return mbrtowc (pwc, s, n, ps); - } -# endif + return ret; } #endif
--- a/m4/mbrtowc.m4 Wed Apr 06 14:02:39 2016 -0700 +++ b/m4/mbrtowc.m4 Sat Apr 09 01:28:36 2016 -0700 @@ -1,4 +1,4 @@ -# mbrtowc.m4 serial 26 -*- coding: utf-8 -*- +# mbrtowc.m4 serial 27 -*- coding: utf-8 -*- dnl Copyright (C) 2001-2002, 2004-2005, 2008-2016 Free Software Foundation, dnl Inc. dnl This file is free software; the Free Software Foundation @@ -40,6 +40,7 @@ gl_MBRTOWC_RETVAL gl_MBRTOWC_NUL_RETVAL gl_MBRTOWC_EMPTY_INPUT + gl_MBRTOWC_C_LOCALE case "$gl_cv_func_mbrtowc_null_arg1" in *yes) ;; *) AC_DEFINE([MBRTOWC_NULL_ARG1_BUG], [1], @@ -76,6 +77,13 @@ REPLACE_MBRTOWC=1 ;; esac + case $gl_cv_C_locale_sans_EILSEQ in + *yes) ;; + *) AC_DEFINE([C_LOCALE_MAYBE_EILSEQ], [1], + [Define to 1 if the C locale may have encoding errors.]) + REPLACE_MBRTOWC=1 + ;; + esac fi fi ]) @@ -577,6 +585,46 @@ ]) ]) +dnl Test whether mbrtowc reports encoding errors in the C locale. +dnl Although POSIX was never intended to allow this, the GNU C Library +dnl and other implementations do it. See: +dnl https://sourceware.org/bugzilla/show_bug.cgi?id=19932 + +AC_DEFUN([gl_MBRTOWC_C_LOCALE], +[ + AC_CACHE_CHECK([whether the C locale is free of encoding errors], + [gl_cv_C_locale_sans_EILSEQ], + [ + dnl Initial guess, used when cross-compiling or when no suitable locale + dnl is present. + gl_cv_C_locale_sans_EILSEQ="guessing no" + + AC_RUN_IFELSE( + [AC_LANG_PROGRAM( + [[#include <limits.h> + #include <locale.h> + #include <wchar.h> + ]], [[ + int i; + char *locale = setlocale (LC_ALL, "C"); + if (! locale) + return 1; + for (i = CHAR_MIN; i <= CHAR_MAX; i++) + { + char c = i; + wchar_t wc; + mbstate_t mbs = { 0, }; + size_t ss = mbrtowc (&wc, &c, 1, &mbs); + if (1 < ss) + return 1; + } + return 0; + ]])], + [gl_cv_C_locale_sans_EILSEQ=yes], + [gl_cv_C_locale_sans_EILSEQ=no], + [:])]) +]) + # Prerequisites of lib/mbrtowc.c. AC_DEFUN([gl_PREREQ_MBRTOWC], [ :
--- a/modules/hard-locale Wed Apr 06 14:02:39 2016 -0700 +++ b/modules/hard-locale Sat Apr 09 01:28:36 2016 -0700 @@ -20,7 +20,7 @@ "hard-locale.h" License: -GPL +LGPLv2+ Maintainer: Paul Eggert
--- a/modules/mbrtowc Wed Apr 06 14:02:39 2016 -0700 +++ b/modules/mbrtowc Sat Apr 09 01:28:36 2016 -0700 @@ -13,6 +13,7 @@ Depends-on: wchar extensions +hard-locale [test $HAVE_MBRTOWC = 0 || test $REPLACE_MBRTOWC = 1] mbsinit [test $HAVE_MBRTOWC = 0 || test $REPLACE_MBRTOWC = 1] localcharset [test $HAVE_MBRTOWC = 0 || test $REPLACE_MBRTOWC = 1] streq [test $HAVE_MBRTOWC = 0 || test $REPLACE_MBRTOWC = 1]
--- a/modules/mbrtowc-tests Wed Apr 06 14:02:39 2016 -0700 +++ b/modules/mbrtowc-tests Sat Apr 09 01:28:36 2016 -0700 @@ -3,6 +3,7 @@ tests/test-mbrtowc2.sh tests/test-mbrtowc3.sh tests/test-mbrtowc4.sh +tests/test-mbrtowc5.sh tests/test-mbrtowc.c tests/test-mbrtowc-w32-1.sh tests/test-mbrtowc-w32-2.sh @@ -31,6 +32,7 @@ Makefile.am: TESTS += \ test-mbrtowc1.sh test-mbrtowc2.sh test-mbrtowc3.sh test-mbrtowc4.sh \ + test-mbrtowc5.sh \ test-mbrtowc-w32-1.sh test-mbrtowc-w32-2.sh test-mbrtowc-w32-3.sh \ test-mbrtowc-w32-4.sh test-mbrtowc-w32-5.sh TESTS_ENVIRONMENT += \ @@ -39,4 +41,3 @@ LOCALE_JA='@LOCALE_JA@' \ LOCALE_ZH_CN='@LOCALE_ZH_CN@' check_PROGRAMS += test-mbrtowc test-mbrtowc-w32 -
--- a/tests/test-mbrtowc.c Wed Apr 06 14:02:39 2016 -0700 +++ b/tests/test-mbrtowc.c Sat Apr 09 01:28:36 2016 -0700 @@ -72,6 +72,10 @@ for (c = 0; c < 0x100; c++) switch (c) { + default: + if (! (c && 1 < argc && argv[1][0] == '5')) + break; + /* Fall through. */ case '\t': case '\v': case '\f': case ' ': case '!': case '"': case '#': case '%': case '&': case '\'': case '(': case ')': case '*': @@ -93,7 +97,8 @@ case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case '{': case '|': case '}': case '~': - /* c is in the ISO C "basic character set". */ + /* c is in the ISO C "basic character set", or argv[1] starts + with '5' so we are testing all nonnull bytes. */ buf[0] = c; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, buf, 1, &state); @@ -334,6 +339,10 @@ ASSERT (mbsinit (&state)); } return 0; + + case '5': + /* C locale; tested above. */ + return 0; } return 1;