changeset 17728:61d8a212a342

localcharset, localename: MS-Windows support for non-default locales * lib/localcharset.c (locale_charset) [WINDOWS_NATIVE]: Before falling back on the default system codepage, try extracting the codepage from what 'setlocale' returns. This allows to take into account changes of the codeset due to non-default locale set by a previous call to 'setlocale'. * lib/localename.c (LOCALE_NAME_MAX_LENGTH) [WINDOWS_NATIVE]: Define if not already defined. (enum_locales_fn, get_lcid) [WINDOWS_NATIVE]: New functions. (gl_locale_name_thread) [WINDOWS_NATIVE]: Produce the current locale by calling 'setlocale', then converting the locale name into LCID by calling 'get_lcid'. This allows to take into account changes in the current locale from the default one, in contrast to GetThreadLocale.
author Eli Zaretskii <eliz@gnu.org>
date Tue, 15 Jul 2014 12:18:48 -0700
parents 2539dbbdf52a
children 874a5cc03904
files ChangeLog lib/localcharset.c lib/localename.c
diffstat 3 files changed, 132 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog	Tue Jul 09 15:54:48 2013 +0900
+++ b/ChangeLog	Tue Jul 15 12:18:48 2014 -0700
@@ -1,3 +1,20 @@
+2014-07-15  Eli Zaretskii  <eliz@gnu.org>
+
+	localcharset, localename: MS-Windows support for non-default locales
+	* lib/localcharset.c (locale_charset) [WINDOWS_NATIVE]: Before
+	falling back on the default system codepage, try extracting
+	the codepage from what 'setlocale' returns.  This allows to
+	take into account changes of the codeset due to non-default
+	locale set by a previous call to 'setlocale'.
+	* lib/localename.c (LOCALE_NAME_MAX_LENGTH) [WINDOWS_NATIVE]:
+	Define if not already defined.
+	(enum_locales_fn, get_lcid) [WINDOWS_NATIVE]: New functions.
+	(gl_locale_name_thread) [WINDOWS_NATIVE]: Produce the
+	current locale by calling 'setlocale', then converting the
+	locale name into LCID by calling 'get_lcid'.  This allows to
+	take into account changes in the current locale from the
+	default one, in contrast to GetThreadLocale.
+
 2014-07-14  Daiki Ueno  <ueno@gnu.org>
 
 	announce-gen: avoid failure when Digest::SHA is installed
--- a/lib/localcharset.c	Tue Jul 09 15:54:48 2013 +0900
+++ b/lib/localcharset.c	Tue Jul 15 12:18:48 2014 -0700
@@ -34,6 +34,7 @@
 
 #if defined _WIN32 || defined __WIN32__
 # define WINDOWS_NATIVE
+# include <locale.h>
 #endif
 
 #if defined __EMX__
@@ -461,14 +462,34 @@
 
   static char buf[2 + 10 + 1];
 
-  /* The Windows API has a function returning the locale's codepage as a
-     number: GetACP().
-     When the output goes to a console window, it needs to be provided in
-     GetOEMCP() encoding if the console is using a raster font, or in
-     GetConsoleOutputCP() encoding if it is using a TrueType font.
-     But in GUI programs and for output sent to files and pipes, GetACP()
-     encoding is the best bet.  */
-  sprintf (buf, "CP%u", GetACP ());
+  /* The Windows API has a function returning the locale's codepage as
+     a number, but the value doesn't change according to what the
+     'setlocale' call specified.  So we use it as a last resort, in
+     case the string returned by 'setlocale' doesn't specify the
+     codepage.  */
+  char *current_locale = setlocale (LC_ALL, NULL);
+  char *pdot;
+
+  /* If they set different locales for different categories,
+     'setlocale' will return a semi-colon separated list of locale
+     values.  To make sure we use the correct one, we choose LC_CTYPE.  */
+  if (strchr (current_locale, ';'))
+    current_locale = setlocale (LC_CTYPE, NULL);
+
+  pdot = strrchr (current_locale, '.');
+  if (pdot)
+    sprintf (buf, "CP%s", pdot + 1);
+  else
+    {
+      /* The Windows API has a function returning the locale's codepage as a
+        number: GetACP().
+        When the output goes to a console window, it needs to be provided in
+        GetOEMCP() encoding if the console is using a raster font, or in
+        GetConsoleOutputCP() encoding if it is using a TrueType font.
+        But in GUI programs and for output sent to files and pipes, GetACP()
+        encoding is the best bet.  */
+      sprintf (buf, "CP%u", GetACP ());
+    }
   codeset = buf;
 
 #elif defined OS2
--- a/lib/localename.c	Tue Jul 09 15:54:48 2013 +0900
+++ b/lib/localename.c	Tue Jul 15 12:18:48 2014 -0700
@@ -60,6 +60,7 @@
 #if defined WINDOWS_NATIVE || defined __CYGWIN__ /* Native Windows or Cygwin */
 # define WIN32_LEAN_AND_MEAN
 # include <windows.h>
+# include <winnls.h>
 /* List of language codes, sorted by value:
    0x01 LANG_ARABIC
    0x02 LANG_BULGARIAN
@@ -1124,6 +1125,9 @@
 # ifndef LOCALE_SNAME
 # define LOCALE_SNAME 0x5c
 # endif
+# ifndef LOCALE_NAME_MAX_LENGTH
+# define LOCALE_NAME_MAX_LENGTH 85
+# endif
 #endif
 
 
@@ -2502,6 +2506,68 @@
   return gl_locale_name_from_win32_LANGID (langid);
 }
 
+# ifdef WINDOWS_NATIVE
+
+/* Two variables to interface between get_lcid and the EnumLocales
+   callback function below.  */
+static LCID found_lcid;
+static char lname[LC_MAX * (LOCALE_NAME_MAX_LENGTH + 1) + 1];
+
+/* Callback function for EnumLocales.  */
+static BOOL CALLBACK
+enum_locales_fn (LPTSTR locale_num_str)
+{
+  char *endp;
+  char locval[2 * LOCALE_NAME_MAX_LENGTH + 1 + 1];
+  LCID try_lcid = strtoul (locale_num_str, &endp, 16);
+
+  if (GetLocaleInfo (try_lcid, LOCALE_SENGLANGUAGE,
+                    locval, LOCALE_NAME_MAX_LENGTH))
+    {
+      strcat (locval, "_");
+      if (GetLocaleInfo (try_lcid, LOCALE_SENGCOUNTRY,
+                        locval + strlen (locval), LOCALE_NAME_MAX_LENGTH))
+       {
+         size_t locval_len = strlen (locval);
+
+         if (strncmp (locval, lname, locval_len) == 0
+             && (lname[locval_len] == '.'
+                 || lname[locval_len] == '\0'))
+           {
+             found_lcid = try_lcid;
+             return FALSE;
+           }
+       }
+    }
+  return TRUE;
+}
+
+/* Return the Locale ID (LCID) number given the locale's name, a
+   string, in LOCALE_NAME.  This works by enumerating all the locales
+   supported by the system, until we find one whose name matches
+   LOCALE_NAME.  */
+static LCID
+get_lcid (const char *locale_name)
+{
+  /* A simple cache.  */
+  static LCID last_lcid;
+  static char last_locale[1000];
+
+  if (last_lcid > 0 && strcmp (locale_name, last_locale) == 0)
+    return last_lcid;
+  strncpy (lname, locale_name, sizeof (lname) - 1);
+  lname[sizeof (lname) - 1] = '\0';
+  found_lcid = 0;
+  EnumSystemLocales (enum_locales_fn, LCID_SUPPORTED);
+  if (found_lcid > 0)
+    {
+      last_lcid = found_lcid;
+      strcpy (last_locale, locale_name);
+    }
+  return found_lcid;
+}
+
+# endif
 #endif
 
 
@@ -2660,6 +2726,26 @@
   const char *name = gl_locale_name_thread_unsafe (category, categoryname);
   if (name != NULL)
     return struniq (name);
+#elif defined WINDOWS_NATIVE
+  if (LC_MIN <= category && category <= LC_MAX)
+    {
+      char *locname = setlocale (category, NULL);
+
+      /* If CATEGORY is LC_ALL, the result might be a semi-colon
+        separated list of locales.  We need only one, so we take the
+        one corresponding to LC_CTYPE, as the most important for
+        character translations.  */
+      if (strchr (locname, ';'))
+       locname = setlocale (LC_CTYPE, NULL);
+
+    /* Convert locale name to LCID.  We don't want to use
+       LocaleNameToLCID because (a) it is only available since Vista,
+       and (b) it doesn't accept locale names returned by 'setlocale'.  */
+    LCID lcid = get_lcid (locname);
+
+    if (lcid > 0)
+      return gl_locale_name_from_win32_LCID (lcid);
+  }
 #endif
   return NULL;
 }