Mercurial > octave
changeset 29091:b924b916dc91
matlab.lang.makeValidName: reimplement in C++ (patch #9998)
Rationale: `matlab.lang.makeValidName` was pure Octave code and only available
from the interpreter via `octave::feval`. This is too slow for an Octave
function ensuring a valid variable name, especially, when called many times
from C++. For example in jsondecode.
* libinterp/corefcn/utils.h: new function `octave::make_valid_name` and new
helper class `octave::make_valid_name_options`.
* libinterp/corefcn/utils.cc: new functions `F__make_valid_name__` and
`octave::make_valid_name` and new helper class
`octave::make_valid_name_options`.
* scripts/+matlab/+lang/makeValidName.m: replace Octave code implementation
by `F__make_valid_name__` call.
author | Kai T. Ohlhus <k.ohlhus@gmail.com> |
---|---|
date | Mon, 23 Nov 2020 16:11:46 +0900 |
parents | f61d1faacfca |
children | 38e22065d9ec |
files | libinterp/corefcn/utils.cc libinterp/corefcn/utils.h scripts/+matlab/+lang/makeValidName.m |
diffstat | 3 files changed, 223 insertions(+), 99 deletions(-) [+] |
line wrap: on
line diff
--- a/libinterp/corefcn/utils.cc Mon Nov 23 12:41:34 2020 +0900 +++ b/libinterp/corefcn/utils.cc Mon Nov 23 16:11:46 2020 +0900 @@ -133,6 +133,166 @@ namespace octave { + bool + make_valid_name (std::string& str, const make_valid_name_options& options) + { + // If `isvarname (str)`, no modifications necessary. + if (valid_identifier (str) && ! iskeyword (str)) + return false; + + // Change whitespace followed by lowercase letter to uppercase, except + // for the first + bool previous = false; + bool any_non_space = false; + for (char& c : str) + { + c = ((any_non_space && previous && std::isalpha (c)) ? std::toupper (c) + : c); + previous = std::isspace (c); + any_non_space |= (! previous); // once true, always true + } + + // Remove any whitespace. + str.erase (std::remove_if (str.begin(), str.end(), + [] (unsigned char x) + { return std::isspace(x); }), + str.end()); + if (str.empty ()) + str = options.get_prefix (); + + // Add prefix and capitalize first character, if `str` is a reserved + // keyword. + if (iskeyword (str)) + { + str[0] = std::toupper (str[0]); + str = options.get_prefix () + str; + } + + // Add prefix if first character is not a letter or underscore. + if (! std::isalpha (str[0]) && str[0] != '_') + str = options.get_prefix () + str; + + // Replace non alphanumerics or underscores + if (options.get_replacement_style () == "underscore") + for (char& c : str) + c = (std::isalnum (c) ? c : '_'); + else if (options.get_replacement_style () == "delete") + str.erase (std::remove_if (str.begin(), str.end(), + [] (unsigned char x) + { return ! std::isalnum (x) && x != '_'; }), + str.end()); + else if (options.get_replacement_style () == "hex") + { + const std::string permitted_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "_0123456789"; + // Get the first non-permitted char. + size_t pos = str.find_first_not_of (permitted_chars); + // Buffer for hex string "0xFF" (+1 for null terminator). + char hex_str[5]; + // Repeat until end of string. + while (pos != std::string::npos) + { + // Replace non-permitted char by it's hex value. + std::snprintf (hex_str, sizeof (hex_str), "0x%02X", str[pos]); + str.replace (pos, 1, hex_str); + // Get the next occurrence from the last position. + // (-1 for null terminator) + pos = str.find_first_not_of (permitted_chars, + pos + sizeof (hex_str) - 1); + } + } + + return true; + } + + make_valid_name_options::make_valid_name_options (const octave_value_list& args) + { + auto nargs = args.length (); + if (nargs == 0) + return; + + // nargs = 2, 4, 6, ... permitted + if (nargs % 2) + error ("makeValidName: property/value options must occur in pairs"); + + auto str_to_lower = [] (std::string& s) + { + std::transform (s.begin(), s.end(), s.begin(), + [] (unsigned char c) + { return std::tolower(c); }); + }; + + for (auto i = 0; i < nargs; i = i + 2) + { + std::string parameter = args(i).xstring_value ("makeValidName: " + "option argument must be a string"); + str_to_lower (parameter); + if (parameter == "replacementstyle") + { + m_replacement_style = args(i + 1).xstring_value ("makeValidName: " + "'ReplacementStyle' value must be a string"); + str_to_lower (m_replacement_style); + if ((m_replacement_style != "underscore") + && (m_replacement_style != "delete") + && (m_replacement_style != "hex")) + error ("makeValidName: invalid 'ReplacementStyle' value '%s'", + m_replacement_style.c_str ()); + } + else if (parameter == "prefix") + { + m_prefix = args(i + 1).xstring_value ("makeValidName: " + "'Prefix' value must be a string"); + if (! octave::valid_identifier (m_prefix) + || octave::iskeyword (m_prefix)) + error ("makeValidName: invalid 'Prefix' value '%s'", + m_prefix.c_str ()); + } + else + error ("makeValidName: unknown property '%s'", parameter.c_str ()); + } + } +} + +DEFUN (__make_valid_name__, args, , + doc: /* -*- texinfo -*- +@deftypefn {} {@var{varname} =} __make_valid_name__ (@var{str}) +@deftypefnx {} {@var{varname} =} __make_valid_name__ (@var{str}, @qcode{"ReplacementStyle"}) +@deftypefnx {} {@var{varname} =} __make_valid_name__ (@var{str}, @qcode{"ReplacementStyle"}, @qcode{"Prefix"}) +@deftypefnx {} {[@var{varname}, @var{ismodified}] =} __make_valid_name__ (@dots{}) +Return a valid variable name @var{varname} from input @var{str}. + +For more documentation, see @code{matlab.lang.makeValidName}. + +@seealso{isvarname, matlab.lang.makeValidName} +@end deftypefn */) +{ + auto nargin = args.length (); + if (nargin < 1) + print_usage (); + + octave::make_valid_name_options options (args.slice (1, nargin - 1)); + + if (args(0).is_string ()) + { + std::string varname = args(0).string_value (); + bool is_modified = octave::make_valid_name (varname, options); + return ovl (varname, is_modified); + } + else if (args(0).iscellstr ()) + { + Array<std::string> varnames = args(0).cellstr_value (); + Array<bool> is_modified (varnames.dims ()); + for (auto i = 0; i < varnames.numel (); i++) + is_modified(i) = octave::make_valid_name (varnames(i), options); + return ovl (varnames, is_modified); + } + else + error ("makeValidName: STR must be a string or cellstr"); +} + +namespace octave +{ // Return TRUE if F and G are both names for the same file. bool same_file (const std::string& f, const std::string& g)
--- a/libinterp/corefcn/utils.h Mon Nov 23 12:41:34 2020 +0900 +++ b/libinterp/corefcn/utils.h Mon Nov 23 16:11:46 2020 +0900 @@ -46,6 +46,63 @@ extern OCTINTERP_API bool valid_identifier (const char *s); extern OCTINTERP_API bool valid_identifier (const std::string& s); + //! Helper class for `make_valid_name` function calls. + //! + //! Extracting options separately for multiple (e.g. 1000+) function calls + //! avoids expensive repetitive parsing of the very same options. + + class + OCTINTERP_API + make_valid_name_options + { + public: + + //! Default options for `make_valid_name` function calls. + //! + //! Calling the constructor without arguments is equivalent to: + //! + //! @code{.cc} + //! make_valid_name_options (ovl ("ReplacementStyle", "underscore", + //! "Prefix", "x")); + //! @endcode + + make_valid_name_options () = default; + + //! Extract attribute-value-pairs from an octave_value_list of strings. + //! + //! If attributes occur multiple times, the rightmost pair is chosen. + //! + //! @code{.cc} + //! make_valid_name_options (ovl ("ReplacementStyle", "hex", ...)); + //! @endcode + + make_valid_name_options (const octave_value_list& args); + + //! @return ReplacementStyle, see `help matlab.lang.makeValidName`. + + const std::string& + get_replacement_style () const { return m_replacement_style; } + + //! @return Prefix, see `help matlab.lang.makeValidName`. + + const std::string& get_prefix () const { return m_prefix; } + + private: + + std::string m_replacement_style{"underscore"}; + std::string m_prefix{"x"}; + }; + + //! Modify @p str to be a valid variable name. + //! + //! @param str input string + //! @param options see also `help matlab.lang.makeValidName`. + //! + //! @return true, if @p str was modified. + + extern OCTINTERP_API bool + make_valid_name (std::string& str, const make_valid_name_options& options); + extern OCTINTERP_API bool same_file (const std::string& f, const std::string& g);
--- a/scripts/+matlab/+lang/makeValidName.m Mon Nov 23 12:41:34 2020 +0900 +++ b/scripts/+matlab/+lang/makeValidName.m Mon Nov 23 16:11:46 2020 +0900 @@ -65,102 +65,9 @@ ## @seealso{iskeyword, isvarname, matlab.lang.makeUniqueStrings} ## @end deftypefn -function [varname, ismodified] = makeValidName (str, varargin) - - if (nargin == 0) - print_usage (); - endif - - if (! ischar (str) && ! iscellstr (str)) - error ("makeValidName: STR must be a string or cellstr"); - endif - - if (mod (nargin - 1, 2) != 0) - error ("makeValidName: property/value options must occur in pairs"); - endif - - varname = cellstr (str); - ismodified = false (size (varname)); - convert2char = ischar (str); - opts = struct ("replacementstyle", "underscore", "prefix", "x"); - - for i = 1:2:numel (varargin) - if (! ischar (varargin{i})) - error ("makeValidName: option argument must be a string"); - endif - parameter = tolower (varargin{i}); - value = varargin{i+1}; - switch (parameter) - case "replacementstyle" - if (! ischar (value)) - error ('makeValidName: "ReplacementStyle" value must be a string'); - endif - value = tolower (value); - if (! any (strcmp (value, {"underscore", "delete", "hex"}))) - error ('makeValidName: invalid "ReplacementStyle" value "%s"', value); - endif - opts.replacementstyle = value; - - case "prefix" - if (! isvarname (value)) - error ('makeValidName: invalid "Prefix" value "%s"', value); - endif - opts.prefix = value; - - otherwise - error ('makeValidName: unknown property "%s"', parameter); - endswitch - endfor +function [varname, ismodified] = makeValidName (varargin) - for i = 1:numel (varname) - if (! isvarname (varname{i})) - ismodified(i) = true; - - ## Remove leading and trailing whitespace - varname{i} = strtrim (varname{i}); - if (isempty (varname{i})) - varname{i} = opts.prefix; - endif - - ## Add prefix if input is a reserved keyword - if (iskeyword (varname{i})) - varname{i} = [opts.prefix, toupper(varname{i}(1)), varname{i}(2:end)]; - endif - - ## Change whitespace followed by lowercase letter to uppercase - idx = regexp (varname{i}, '\s[a-z]'); - varname{i}(idx+1) = toupper (varname{i}(idx+1)); - - ## Remove any whitespace character - varname{i}(isspace (varname{i})) = ""; - - ## Add prefix if first character is not a letter or underscore - char1 = varname{i}(1); - if (! isalpha (char1) && char1 != "_") - varname{i} = [opts.prefix varname{i}]; - endif - - ## Replace non alphanumerics or underscores - idx = regexp (varname{i}, '[^0-9a-zA-Z_]'); - switch (opts.replacementstyle) - case "underscore" - varname{i}(idx) = "_"; - - case "delete" - varname{i}(idx) = ""; - - case "hex" - for j = numel (idx):-1:1 - varname{i} = strrep (varname{i}, varname{i}(idx(j)), - sprintf ("0x%02X",varname{i}(idx(j)))); - endfor - endswitch - endif - endfor - - if (convert2char) - varname = char (varname); - endif + [varname, ismodified] = __make_valid_name__ (varargin{:}); endfunction @@ -223,11 +130,11 @@ %!error <options must occur in pairs> matlab.lang.makeValidName ("a", "opt1") %!error <option argument must be a string> %! matlab.lang.makeValidName ("a", 1, 2) -%!error <"ReplacementStyle" value must be a string> +%!error <'ReplacementStyle' value must be a string> %! matlab.lang.makeValidName ("a", "ReplacementStyle", 1); -%!error <invalid "ReplacementStyle" value "foobar"> +%!error <invalid 'ReplacementStyle' value 'foobar'> %! matlab.lang.makeValidName ("a", "ReplacementStyle", "foobar"); -%!error <invalid "Prefix" value "1_"> +%!error <invalid 'Prefix' value '1_'> %! matlab.lang.makeValidName ("a", "Prefix", "1_"); -%!error <unknown property "foobar"> +%!error <unknown property 'foobar'> %! matlab.lang.makeValidName ("a", "foobar", 1);