changeset 6554:a6f1111a9f1d

readline: Check console codepage for UTF-8 locale on Windows. * src/readline-4-locale.patch: Get console input codepage to check for UTF-8 locale on Windows. Be more careful with setting locale on Windows. * src/readline-2-event-hook.patch: Support non-UTF-8 locale. (Not really compatible to Octave which expects that all character arrays are UTF-8 encoded). * dist-files.mk: Add new file to list.
author Markus Mützel <markus.muetzel@gmx.de>
date Sun, 27 Nov 2022 19:08:31 +0100
parents 38a32b0869a9
children 67d95090a70a
files dist-files.mk src/readline-2-event-hook.patch src/readline-4-locale.patch
diffstat 3 files changed, 58 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/dist-files.mk	Sat Dec 03 12:48:38 2022 -0500
+++ b/dist-files.mk	Sun Nov 27 19:08:31 2022 +0100
@@ -668,6 +668,7 @@
   readline-1-sigwinch.patch \
   readline-2-event-hook.patch \
   readline-3-fd_set.patch \
+  readline-4-locale.patch \
   readline.mk \
   release-octave-1-gnulib.patch \
   release-octave.mk \
--- a/src/readline-2-event-hook.patch	Sat Dec 03 12:48:38 2022 -0500
+++ b/src/readline-2-event-hook.patch	Sun Nov 27 19:08:31 2022 +0100
@@ -1,7 +1,7 @@
 diff -urN readline-8.2/input.c.orig readline-8.2/input.c
 --- readline-8.2/input.c.orig	2022-04-08 21:43:24.000000000 +0200
-+++ readline-8.2/input.c	2022-11-22 16:54:55.099070500 +0100
-@@ -176,6 +176,12 @@
++++ readline-8.2/input.c	2022-12-01 19:16:34.989739900 +0100
+@@ -176,6 +178,12 @@
  static unsigned char ibuffer[512];
  static int ibuffer_len = sizeof (ibuffer) - 1;
  
@@ -14,7 +14,7 @@
  #define any_typein (push_index != pop_index)
  
  int
-@@ -306,7 +311,7 @@
+@@ -306,7 +314,7 @@
  #if defined (__MINGW32__)
    /* Use getch/_kbhit to check for available console input, in the same way
       that we read it normally. */
@@ -23,7 +23,7 @@
     result = 0;
  #endif
  
-@@ -404,7 +409,7 @@
+@@ -404,7 +412,7 @@
  
  #if defined (__MINGW32__)
    if (isatty (tty))
@@ -32,7 +32,7 @@
  #endif
  
    return 0;
-@@ -799,6 +804,139 @@
+@@ -799,6 +807,142 @@
    return (c);
  }
  
@@ -143,8 +143,11 @@
 +          unicode[1] = _win32_getch_internal (0, &is_char);
 +          utf16_code_units++;
 +        }
-+      /* convert to UTF-8 byte sequence */
-+      int len = WideCharToMultiByte (CP_UTF8, 0,
++      /* convert to current codepage or UTF-8 byte sequence */
++      unsigned int codepage = CP_THREAD_ACP;
++      if (_rl_utf8locale)
++        codepage = CP_UTF8;
++      int len = WideCharToMultiByte (codepage, 0,
 +                                     (wchar_t *) &unicode, utf16_code_units,
 +                                     charbuf, 5, NULL, NULL);
 +      for (int i=0; i<len; i++)
@@ -172,7 +175,7 @@
  int
  rl_getc (FILE *stream)
  {
-@@ -818,8 +937,11 @@
+@@ -818,8 +962,11 @@
        /* We know at this point that _rl_caught_signal == 0 */
  
  #if defined (__MINGW32__)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/readline-4-locale.patch	Sun Nov 27 19:08:31 2022 +0100
@@ -0,0 +1,46 @@
+--- readline-8.2/nls.c.orig	2022-11-22 18:41:48.302144300 +0100
++++ readline-8.2/nls.c	2022-12-03 18:37:54.991412600 +0100
+@@ -49,6 +49,11 @@
+ 
+ #include <ctype.h>
+ 
++#if defined (_WIN32)
++#  include <windows.h>
++#  include <versionhelpers.h>
++#endif
++
+ #include "rldefs.h"
+ #include "readline.h"
+ #include "rlshell.h"
+@@ -109,7 +114,11 @@
+   char *cp;
+   size_t len;
+ 
+-#if HAVE_LANGINFO_CODESET
++#if defined (_WIN32)
++  /* On Windows, the relevant "locale" is the selected codepage of the used
++     console. */
++  return (IsWindows7OrGreater () && (GetConsoleCP () == 65001));
++#elif HAVE_LANGINFO_CODESET
+   cp = nl_langinfo (CODESET);
+   return (STREQ (cp, "UTF-8") || STREQ (cp, "utf8"));
+ #else
+@@ -138,11 +147,18 @@
+ #if defined (HAVE_SETLOCALE)
+   if (lspec == 0 || *lspec == 0)
+     lspec = setlocale (LC_CTYPE, (char *)NULL);
++#if defined (_WIN32)
++  /* Setting an UTF-8 locale is not a no-op on Windows.  Instead the
++     information about the locale is lost.  Use whatever we got at this
++     point. */
++  ret = lspec;
++#else
+   if (lspec == 0)
+     lspec = "";
+   ret = setlocale (LC_CTYPE, lspec);	/* ok, since it does not change locale */
+   if (ret == 0 || *ret == 0)
+     ret = setlocale (LC_CTYPE, (char *)NULL);
++#endif
+   if (ret == 0 || *ret == 0)
+     ret = RL_DEFAULT_LOCALE;
+ #else