# HG changeset patch # User Markus Mützel # Date 1701710689 -3600 # Node ID e424a55bc9fb7ffc17a358a28ba9fe1793c580ba # Parent 25fffec4ecfd45b6b0f38a5dabd0b55fa8309dcb Use own function for symbol name matching on Windows (bug #64975). * glob-match.h, glob-match.cc (symbol_match): Add new class that can efficiently match symbol names cross-platform. * call-stack.cc (call_stack::clear_global_variable_pattern), load-save.cc (matches_patterns, load_save_system::save_fields), ls-hdf5.cc (read_hdf5_data), stack-frame.cc (symbol_cleaner::clear_symbols, symbol_info_accumulator::filter), symtab.cc (symbol_table::clear_function_pattern), variables.cc (name_matches_any_pattern): Use new class to match symbol names. * variables.cc (Fclear): Update docstring to point to difference of patterns on Windows. diff -r 25fffec4ecfd -r e424a55bc9fb libinterp/corefcn/call-stack.cc --- a/libinterp/corefcn/call-stack.cc Sun Dec 10 14:56:43 2023 +0100 +++ b/libinterp/corefcn/call-stack.cc Mon Dec 04 18:24:49 2023 +0100 @@ -840,7 +840,7 @@ void call_stack::clear_global_variable_pattern (const std::string& pattern) { - glob_match pat (pattern); + symbol_match pat (pattern); for (auto& nm_ov : m_global_values) { @@ -1064,7 +1064,7 @@ } else { - glob_match pat (pattern); + symbol_match pat (pattern); for (auto& nm_ov : m_global_values) { diff -r 25fffec4ecfd -r e424a55bc9fb libinterp/corefcn/load-save.cc --- a/libinterp/corefcn/load-save.cc Sun Dec 10 14:56:43 2023 +0100 +++ b/libinterp/corefcn/load-save.cc Mon Dec 04 18:24:49 2023 +0100 @@ -113,7 +113,7 @@ { for (int i = pat_idx; i < num_pat; i++) { - glob_match pattern (patterns[i]); + symbol_match pattern (patterns[i]); if (pattern.match (name)) return true; @@ -1003,7 +1003,7 @@ const load_save_format& fmt, bool save_as_floats) { - glob_match pat (pattern); + symbol_match pat (pattern); std::size_t saved = 0; diff -r 25fffec4ecfd -r e424a55bc9fb libinterp/corefcn/ls-hdf5.cc --- a/libinterp/corefcn/ls-hdf5.cc Sun Dec 10 14:56:43 2023 +0100 +++ b/libinterp/corefcn/ls-hdf5.cc Mon Dec 04 18:24:49 2023 +0100 @@ -1117,7 +1117,7 @@ for (int i = argv_idx; i < argc; i++) { - glob_match pattern (argv[i]); + symbol_match pattern (argv[i]); if (pattern.match (std::string (&var_name[0]))) { found = true; diff -r 25fffec4ecfd -r e424a55bc9fb libinterp/corefcn/stack-frame.cc --- a/libinterp/corefcn/stack-frame.cc Sun Dec 10 14:56:43 2023 +0100 +++ b/libinterp/corefcn/stack-frame.cc Mon Dec 04 18:24:49 2023 +0100 @@ -760,7 +760,7 @@ { std::string pattern = m_patterns[j]; - glob_match pat (pattern); + symbol_match pat (pattern); for (const auto& sym : symbols) { @@ -1005,7 +1005,7 @@ { std::string pattern = m_patterns[j]; - glob_match pat (pattern); + symbol_match pat (pattern); for (const auto& sym : symbols) { diff -r 25fffec4ecfd -r e424a55bc9fb libinterp/corefcn/symtab.cc --- a/libinterp/corefcn/symtab.cc Sun Dec 10 14:56:43 2023 +0100 +++ b/libinterp/corefcn/symtab.cc Mon Dec 04 18:24:49 2023 +0100 @@ -438,7 +438,7 @@ void symbol_table::clear_function_pattern (const std::string& pat) { - glob_match pattern (pat); + symbol_match pattern (pat); auto p = m_fcn_table.begin (); diff -r 25fffec4ecfd -r e424a55bc9fb libinterp/corefcn/variables.cc --- a/libinterp/corefcn/variables.cc Sun Dec 10 14:56:43 2023 +0100 +++ b/libinterp/corefcn/variables.cc Mon Dec 04 18:24:49 2023 +0100 @@ -948,7 +948,7 @@ } else { - glob_match pattern (patstr); + symbol_match pattern (patstr); if (pattern.match (nm)) { @@ -1161,7 +1161,8 @@ Match the list of characters specified by @var{list}. If the first character is @code{!} or @code{^}, match all characters except those specified by @var{list}. For example, the pattern @code{[a-zA-Z]} will match all lowercase -and uppercase alphabetic characters. +and uppercase alphabetic characters. On Windows, square brackets are matched +literally and are not used to group characters. @end table For example, the command diff -r 25fffec4ecfd -r e424a55bc9fb liboctave/util/glob-match.cc --- a/liboctave/util/glob-match.cc Sun Dec 10 14:56:43 2023 +0100 +++ b/liboctave/util/glob-match.cc Mon Dec 04 18:24:49 2023 +0100 @@ -59,3 +59,75 @@ return retval; } + +symbol_match::symbol_match (const std::string& pattern) +{ + m_pat = pattern; + +#if defined (OCTAVE_USE_WINDOWS_API) + m_glob = nullptr; +#else + m_glob {new glob_match (pattern)}; +#endif +} + +bool symbol_match::match (const std::string& sym) +{ +#if defined (OCTAVE_USE_WINDOWS_API) + + // gnulib's fnmatch replacement is slow on Windows. + // We don't need full POSIX compatibility to match symbol patterns. + // Glob patterns with '*' or '?' should be good enough. + // We also do not need to worry about multi-byte characters because symbols + // are ASCII-only. + octave_idx_type pat_len = m_pat.length (); + octave_idx_type pat_idx = 0; + octave_idx_type pat_wildc_idx = -1; + octave_idx_type sym_len = sym.length (); + octave_idx_type sym_idx = 0; + octave_idx_type sym_wildc_idx; + + while (sym_idx < sym_len) + { + if (pat_idx < pat_len + && (m_pat[pat_idx] == '?' || m_pat[pat_idx] == sym[sym_idx])) + { + // match to '?' or exact match + pat_idx++; + sym_idx++; + } + else if (pat_idx < pat_len && m_pat[pat_idx] == '*') + { + // remember position in pattern and symbol + pat_wildc_idx = pat_idx; + sym_wildc_idx = sym_idx; + pat_idx++; + } + else if (pat_wildc_idx != -1) + { + // no match but previous wildcard '*' + // revert pat_idx to previous position + pat_idx = pat_wildc_idx + 1; + // but proceed to next character in symbol and try to match again + sym_wildc_idx++; + sym_idx = sym_wildc_idx; + } + else + // no exact match and no wildcard + return false; + } + + // consume potentially trailing '*' in pattern + while (pat_idx < pat_len && m_pat[pat_idx] == '*') + pat_idx++; + + // check for remaining (unmatched) characters in pattern + return pat_idx == pat_len; + +#else + + return m_glob->match (sym); + +#endif +} + diff -r 25fffec4ecfd -r e424a55bc9fb liboctave/util/glob-match.h --- a/liboctave/util/glob-match.h Sun Dec 10 14:56:43 2023 +0100 +++ b/liboctave/util/glob-match.h Mon Dec 04 18:24:49 2023 +0100 @@ -96,4 +96,36 @@ int opts_to_fnmatch_flags (unsigned int xopts) const; }; +class +OCTAVE_API +symbol_match +{ + +// This class is meant to provide a performant implementation for symbol +// matching on all platforms. For Windows, that is done by manually +// implementing matching rules for '*' and '?' wildcards. On other platforms, +// the matching is deferred to `fnmatch`. That means that the matching rules +// differ depending on the platform. To write cross-platform compatible code +// with this class, do not use [] groups or ranges, named character classes, +// collating symbols, or equivalence class expressions. + +public: + + symbol_match (const std::string& pattern); + + symbol_match (const symbol_match&) = default; + + symbol_match& operator = (const symbol_match&) = default; + + ~symbol_match () = default; + + bool match (const std::string& sym); + +private: + + std::string m_pat; + + std::unique_ptr m_glob; +}; + #endif