changeset 37318:f3764840624a

regex: port to OS X 10.8.5 en_US.UTF-8 locale This fixes a bug when ignoring case and when comparing the titlecase letter 'Lj' (U+01C8 LATIN CAPITAL LETTER L WITH SMALL LETTER J) to the corresponding uppercase letter 'LJ' (U+01C7 LATIN CAPITAL LETTER LJ). In the OS X 10.8.5 en_US.UTF-8 locale, the titlecase letter is neither lowercase nor uppercase, but uppercasing the titlecase letter (via towupper) yields the uppercase letter, so the two letters should match when ignoring case. Problem reported by Jim Meyering in <http://debbugs.gnu.org/16911#16>. * lib/regex_internal.c (build_wcs_upper_buffer, build_upper_buffer): Don't test whether a character is lowercase before uppercasing it.
author Paul Eggert <eggert@cs.ucla.edu>
date Thu, 06 Mar 2014 21:53:50 -0800
parents 96c18a9c91ce
children 415b384389c2
files ChangeLog lib/regex_internal.c
diffstat 2 files changed, 19 insertions(+), 10 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog	Wed Mar 05 17:51:59 2014 -0800
+++ b/ChangeLog	Thu Mar 06 21:53:50 2014 -0800
@@ -1,3 +1,17 @@
+2014-03-06  Paul Eggert  <eggert@cs.ucla.edu>
+
+	regex: port to OS X 10.8.5 en_US.UTF-8 locale
+	This fixes a bug when ignoring case and when comparing the
+	titlecase letter 'Lj' (U+01C8 LATIN CAPITAL LETTER L WITH SMALL
+	LETTER J) to the corresponding uppercase letter 'LJ' (U+01C7 LATIN
+	CAPITAL LETTER LJ).  In the OS X 10.8.5 en_US.UTF-8 locale, the
+	titlecase letter is neither lowercase nor uppercase, but
+	uppercasing the titlecase letter (via towupper) yields the
+	uppercase letter, so the two letters should match when ignoring case.
+	Problem reported by Jim Meyering in <http://debbugs.gnu.org/16911#16>.
+	* lib/regex_internal.c (build_wcs_upper_buffer, build_upper_buffer):
+	Don't test whether a character is lowercase before uppercasing it.
+
 2014-03-04  Kevin Cernekee <cernekee@gmail.com>
 
 	stdint, read-file: fix missing SIZE_MAX on Android (tiny change)
--- a/lib/regex_internal.c	Wed Mar 05 17:51:59 2014 -0800
+++ b/lib/regex_internal.c	Thu Mar 06 21:53:50 2014 -0800
@@ -311,12 +311,11 @@
 			       + byte_idx), remain_len, &pstr->cur_state);
 	  if (BE (mbclen < (size_t) -2, 1))
 	    {
-	      wchar_t wcu = wc;
-	      if (iswlower (wc))
+	      wchar_t wcu = towupper (wc);
+	      if (wcu != wc)
 		{
 		  size_t mbcdlen;
 
-		  wcu = towupper (wc);
 		  mbcdlen = wcrtomb (buf, wcu, &prev_st);
 		  if (BE (mbclen == mbcdlen, 1))
 		    memcpy (pstr->mbs + byte_idx, buf, mbclen);
@@ -381,12 +380,11 @@
 	mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
 	if (BE (mbclen < (size_t) -2, 1))
 	  {
-	    wchar_t wcu = wc;
-	    if (iswlower (wc))
+	    wchar_t wcu = towupper (wc);
+	    if (wcu != wc)
 	      {
 		size_t mbcdlen;
 
-		wcu = towupper (wc);
 		mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
 		if (BE (mbclen == mbcdlen, 1))
 		  memcpy (pstr->mbs + byte_idx, buf, mbclen);
@@ -538,10 +536,7 @@
       int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
       if (BE (pstr->trans != NULL, 0))
 	ch = pstr->trans[ch];
-      if (islower (ch))
-	pstr->mbs[char_idx] = toupper (ch);
-      else
-	pstr->mbs[char_idx] = ch;
+      pstr->mbs[char_idx] = toupper (ch);
     }
   pstr->valid_len = char_idx;
   pstr->valid_raw_len = char_idx;