changeset 32572:e424a55bc9fb stable

Use own function for symbol name matching on Windows (bug #64975). * glob-match.h, glob-match.cc (symbol_match): Add new class that can efficiently match symbol names cross-platform. * call-stack.cc (call_stack::clear_global_variable_pattern), load-save.cc (matches_patterns, load_save_system::save_fields), ls-hdf5.cc (read_hdf5_data), stack-frame.cc (symbol_cleaner::clear_symbols, symbol_info_accumulator::filter), symtab.cc (symbol_table::clear_function_pattern), variables.cc (name_matches_any_pattern): Use new class to match symbol names. * variables.cc (Fclear): Update docstring to point to difference of patterns on Windows.
author Markus Mützel <markus.muetzel@gmx.de>
date Mon, 04 Dec 2023 18:24:49 +0100
parents 25fffec4ecfd
children 9f493031b539 b08a96c70196
files libinterp/corefcn/call-stack.cc libinterp/corefcn/load-save.cc libinterp/corefcn/ls-hdf5.cc libinterp/corefcn/stack-frame.cc libinterp/corefcn/symtab.cc libinterp/corefcn/variables.cc liboctave/util/glob-match.cc liboctave/util/glob-match.h
diffstat 8 files changed, 115 insertions(+), 10 deletions(-) [+]
line wrap: on
line diff
--- a/libinterp/corefcn/call-stack.cc	Sun Dec 10 14:56:43 2023 +0100
+++ b/libinterp/corefcn/call-stack.cc	Mon Dec 04 18:24:49 2023 +0100
@@ -840,7 +840,7 @@
 
 void call_stack::clear_global_variable_pattern (const std::string& pattern)
 {
-  glob_match pat (pattern);
+  symbol_match pat (pattern);
 
   for (auto& nm_ov : m_global_values)
     {
@@ -1064,7 +1064,7 @@
         }
       else
         {
-          glob_match pat (pattern);
+          symbol_match pat (pattern);
 
           for (auto& nm_ov : m_global_values)
             {
--- a/libinterp/corefcn/load-save.cc	Sun Dec 10 14:56:43 2023 +0100
+++ b/libinterp/corefcn/load-save.cc	Mon Dec 04 18:24:49 2023 +0100
@@ -113,7 +113,7 @@
 {
   for (int i = pat_idx; i < num_pat; i++)
     {
-      glob_match pattern (patterns[i]);
+      symbol_match pattern (patterns[i]);
 
       if (pattern.match (name))
         return true;
@@ -1003,7 +1003,7 @@
     const load_save_format& fmt,
     bool save_as_floats)
 {
-  glob_match pat (pattern);
+  symbol_match pat (pattern);
 
   std::size_t saved = 0;
 
--- a/libinterp/corefcn/ls-hdf5.cc	Sun Dec 10 14:56:43 2023 +0100
+++ b/libinterp/corefcn/ls-hdf5.cc	Mon Dec 04 18:24:49 2023 +0100
@@ -1117,7 +1117,7 @@
 
       for (int i = argv_idx; i < argc; i++)
         {
-          glob_match pattern (argv[i]);
+          symbol_match pattern (argv[i]);
           if (pattern.match (std::string (&var_name[0])))
             {
               found = true;
--- a/libinterp/corefcn/stack-frame.cc	Sun Dec 10 14:56:43 2023 +0100
+++ b/libinterp/corefcn/stack-frame.cc	Mon Dec 04 18:24:49 2023 +0100
@@ -760,7 +760,7 @@
           {
             std::string pattern = m_patterns[j];
 
-            glob_match pat (pattern);
+            symbol_match pat (pattern);
 
             for (const auto& sym : symbols)
               {
@@ -1005,7 +1005,7 @@
           {
             std::string pattern = m_patterns[j];
 
-            glob_match pat (pattern);
+            symbol_match pat (pattern);
 
             for (const auto& sym : symbols)
               {
--- a/libinterp/corefcn/symtab.cc	Sun Dec 10 14:56:43 2023 +0100
+++ b/libinterp/corefcn/symtab.cc	Mon Dec 04 18:24:49 2023 +0100
@@ -438,7 +438,7 @@
 
 void symbol_table::clear_function_pattern (const std::string& pat)
 {
-  glob_match pattern (pat);
+  symbol_match pattern (pat);
 
   auto p = m_fcn_table.begin ();
 
--- a/libinterp/corefcn/variables.cc	Sun Dec 10 14:56:43 2023 +0100
+++ b/libinterp/corefcn/variables.cc	Mon Dec 04 18:24:49 2023 +0100
@@ -948,7 +948,7 @@
             }
           else
             {
-              glob_match pattern (patstr);
+              symbol_match pattern (patstr);
 
               if (pattern.match (nm))
                 {
@@ -1161,7 +1161,8 @@
 Match the list of characters specified by @var{list}.  If the first character
 is @code{!} or @code{^}, match all characters except those specified by
 @var{list}.  For example, the pattern @code{[a-zA-Z]} will match all lowercase
-and uppercase alphabetic characters.
+and uppercase alphabetic characters.  On Windows, square brackets are matched
+literally and are not used to group characters.
 @end table
 
 For example, the command
--- a/liboctave/util/glob-match.cc	Sun Dec 10 14:56:43 2023 +0100
+++ b/liboctave/util/glob-match.cc	Mon Dec 04 18:24:49 2023 +0100
@@ -59,3 +59,75 @@
 
   return retval;
 }
+
+symbol_match::symbol_match (const std::string& pattern)
+{
+  m_pat = pattern;
+
+#if defined (OCTAVE_USE_WINDOWS_API)
+  m_glob = nullptr;
+#else
+  m_glob {new glob_match (pattern)};
+#endif
+}
+
+bool symbol_match::match (const std::string& sym)
+{
+#if defined (OCTAVE_USE_WINDOWS_API)
+
+  // gnulib's fnmatch replacement is slow on Windows.
+  // We don't need full POSIX compatibility to match symbol patterns.
+  // Glob patterns with '*' or '?' should be good enough.
+  // We also do not need to worry about multi-byte characters because symbols
+  // are ASCII-only.
+  octave_idx_type pat_len = m_pat.length ();
+  octave_idx_type pat_idx = 0;
+  octave_idx_type pat_wildc_idx = -1;
+  octave_idx_type sym_len = sym.length ();
+  octave_idx_type sym_idx = 0;
+  octave_idx_type sym_wildc_idx;
+
+  while (sym_idx < sym_len)
+    {
+      if (pat_idx < pat_len
+          && (m_pat[pat_idx] == '?' || m_pat[pat_idx] == sym[sym_idx]))
+        {
+          // match to '?' or exact match
+          pat_idx++;
+          sym_idx++;
+        }
+      else if (pat_idx < pat_len && m_pat[pat_idx] == '*')
+        {
+          // remember position in pattern and symbol
+          pat_wildc_idx = pat_idx;
+          sym_wildc_idx = sym_idx;
+          pat_idx++;
+        }
+      else if (pat_wildc_idx != -1)
+        {
+          // no match but previous wildcard '*'
+          // revert pat_idx to previous position
+          pat_idx = pat_wildc_idx + 1;
+          // but proceed to next character in symbol and try to match again
+          sym_wildc_idx++;
+          sym_idx = sym_wildc_idx;
+        }
+      else
+        // no exact match and no wildcard
+        return false;
+    }
+
+  // consume potentially trailing '*' in pattern
+  while (pat_idx < pat_len && m_pat[pat_idx] == '*')
+    pat_idx++;
+
+  // check for remaining (unmatched) characters in pattern
+  return pat_idx == pat_len;
+
+#else
+
+  return m_glob->match (sym);
+
+#endif
+}
+
--- a/liboctave/util/glob-match.h	Sun Dec 10 14:56:43 2023 +0100
+++ b/liboctave/util/glob-match.h	Mon Dec 04 18:24:49 2023 +0100
@@ -96,4 +96,36 @@
   int opts_to_fnmatch_flags (unsigned int xopts) const;
 };
 
+class
+OCTAVE_API
+symbol_match
+{
+
+// This class is meant to provide a performant implementation for symbol
+// matching on all platforms.  For Windows, that is done by manually
+// implementing matching rules for '*' and '?' wildcards.  On other platforms,
+// the matching is deferred to `fnmatch`.  That means that the matching rules
+// differ depending on the platform.  To write cross-platform compatible code
+// with this class, do not use [] groups or ranges, named character classes,
+// collating symbols, or equivalence class expressions.
+
+public:
+
+  symbol_match (const std::string& pattern);
+
+  symbol_match (const symbol_match&) = default;
+
+  symbol_match& operator = (const symbol_match&) = default;
+
+  ~symbol_match () = default;
+
+  bool match (const std::string& sym);
+
+private:
+
+  std::string m_pat;
+
+  std::unique_ptr<glob_match> m_glob;
+};
+
 #endif