# HG changeset patch # User Markus Mützel # Date 1669572511 -3600 # Node ID a6f1111a9f1d3f736b82fb10a08df7196c1d8618 # Parent 38a32b0869a9cdd28ea90fe3d00bc69682777141 readline: Check console codepage for UTF-8 locale on Windows. * src/readline-4-locale.patch: Get console input codepage to check for UTF-8 locale on Windows. Be more careful with setting locale on Windows. * src/readline-2-event-hook.patch: Support non-UTF-8 locale. (Not really compatible to Octave which expects that all character arrays are UTF-8 encoded). * dist-files.mk: Add new file to list. diff -r 38a32b0869a9 -r a6f1111a9f1d dist-files.mk --- a/dist-files.mk Sat Dec 03 12:48:38 2022 -0500 +++ b/dist-files.mk Sun Nov 27 19:08:31 2022 +0100 @@ -668,6 +668,7 @@ readline-1-sigwinch.patch \ readline-2-event-hook.patch \ readline-3-fd_set.patch \ + readline-4-locale.patch \ readline.mk \ release-octave-1-gnulib.patch \ release-octave.mk \ diff -r 38a32b0869a9 -r a6f1111a9f1d src/readline-2-event-hook.patch --- a/src/readline-2-event-hook.patch Sat Dec 03 12:48:38 2022 -0500 +++ b/src/readline-2-event-hook.patch Sun Nov 27 19:08:31 2022 +0100 @@ -1,7 +1,7 @@ diff -urN readline-8.2/input.c.orig readline-8.2/input.c --- readline-8.2/input.c.orig 2022-04-08 21:43:24.000000000 +0200 -+++ readline-8.2/input.c 2022-11-22 16:54:55.099070500 +0100 -@@ -176,6 +176,12 @@ ++++ readline-8.2/input.c 2022-12-01 19:16:34.989739900 +0100 +@@ -176,6 +178,12 @@ static unsigned char ibuffer[512]; static int ibuffer_len = sizeof (ibuffer) - 1; @@ -14,7 +14,7 @@ #define any_typein (push_index != pop_index) int -@@ -306,7 +311,7 @@ +@@ -306,7 +314,7 @@ #if defined (__MINGW32__) /* Use getch/_kbhit to check for available console input, in the same way that we read it normally. */ @@ -23,7 +23,7 @@ result = 0; #endif -@@ -404,7 +409,7 @@ +@@ -404,7 +412,7 @@ #if defined (__MINGW32__) if (isatty (tty)) @@ -32,7 +32,7 @@ #endif return 0; -@@ -799,6 +804,139 @@ +@@ -799,6 +807,142 @@ return (c); } @@ -143,8 +143,11 @@ + unicode[1] = _win32_getch_internal (0, &is_char); + utf16_code_units++; + } -+ /* convert to UTF-8 byte sequence */ -+ int len = WideCharToMultiByte (CP_UTF8, 0, ++ /* convert to current codepage or UTF-8 byte sequence */ ++ unsigned int codepage = CP_THREAD_ACP; ++ if (_rl_utf8locale) ++ codepage = CP_UTF8; ++ int len = WideCharToMultiByte (codepage, 0, + (wchar_t *) &unicode, utf16_code_units, + charbuf, 5, NULL, NULL); + for (int i=0; i + ++#if defined (_WIN32) ++# include ++# include ++#endif ++ + #include "rldefs.h" + #include "readline.h" + #include "rlshell.h" +@@ -109,7 +114,11 @@ + char *cp; + size_t len; + +-#if HAVE_LANGINFO_CODESET ++#if defined (_WIN32) ++ /* On Windows, the relevant "locale" is the selected codepage of the used ++ console. */ ++ return (IsWindows7OrGreater () && (GetConsoleCP () == 65001)); ++#elif HAVE_LANGINFO_CODESET + cp = nl_langinfo (CODESET); + return (STREQ (cp, "UTF-8") || STREQ (cp, "utf8")); + #else +@@ -138,11 +147,18 @@ + #if defined (HAVE_SETLOCALE) + if (lspec == 0 || *lspec == 0) + lspec = setlocale (LC_CTYPE, (char *)NULL); ++#if defined (_WIN32) ++ /* Setting an UTF-8 locale is not a no-op on Windows. Instead the ++ information about the locale is lost. Use whatever we got at this ++ point. */ ++ ret = lspec; ++#else + if (lspec == 0) + lspec = ""; + ret = setlocale (LC_CTYPE, lspec); /* ok, since it does not change locale */ + if (ret == 0 || *ret == 0) + ret = setlocale (LC_CTYPE, (char *)NULL); ++#endif + if (ret == 0 || *ret == 0) + ret = RL_DEFAULT_LOCALE; + #else