Mercurial > octave
changeset 26201:750a6f9957ea
New functions makeUniqueStrings and makeValidName (bug #52596).
* scripts/+matlab/+lang/makeUniqueStrings.m: New function.
* scripts/+matlab/+lang/makeValidName.m: New function.
* scripts/module.mk: Add directory +matlab/+lang to build system.
* scripts/+matlab/+lang/module.mk: Add new functions to build system.
* NEWS: Announce new functions.
author | Guillaume Flandin <guillaume.offline@gmail.com> |
---|---|
date | Tue, 11 Dec 2018 15:19:00 +0100 |
parents | 33e6ab3e1491 |
children | 368dc1142072 |
files | NEWS scripts/+matlab/+lang/makeUniqueStrings.m scripts/+matlab/+lang/makeValidName.m scripts/+matlab/+lang/module.mk scripts/module.mk |
diffstat | 5 files changed, 457 insertions(+), 1 deletions(-) [+] |
line wrap: on
line diff
--- a/NEWS Mon Dec 10 21:41:39 2018 -0800 +++ b/NEWS Tue Dec 11 15:19:00 2018 +0100 @@ -133,7 +133,7 @@ argument to print) by default. This makes more sense for EPS files which are normally embedded within other documents, and is Matlab compatible. If necessary use the "-loose" option to - reproduce figures as they appeared in previous versions of Octave. + reproduce figures as they appeared in previous versions of Octave. ** It is now possible to use files and folders containing Unicode characters in Windows. @@ -166,6 +166,8 @@ ordeig savefig uitable + matlab.lang.makeValidName + matlab.lang.makeUniqueStrings ** Legacy functions.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/+matlab/+lang/makeUniqueStrings.m Tue Dec 11 15:19:00 2018 +0100 @@ -0,0 +1,220 @@ +## Copyright (C) 2017-2018 Guillaume Flandin +## +## This file is part of Octave. +## +## Octave is free software; you can redistribute it and/or modify it +## under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 3 of the License, or +## (at your option) any later version. +## +## Octave is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with Octave; see the file COPYING. If not, see +## <http://www.gnu.org/licenses/>. + +## -*- texinfo -*- +## @deftypefn {} {@var{y} =} matlab.lang.makeUniqueStrings (@var{x}) +## @deftypefnx {} {@var{y} =} matlab.lang.makeUniqueStrings (@var{x}, @var{ex}) +## @deftypefnx {} {@var{y} =} matlab.lang.makeUniqueStrings (@var{x}, @var{ex}, @var{maxlength}) +## @deftypefnx {} {[@var{y}, @var{ismodified}] =} matlab.lang.makeUniqueStrings (@dots{}) +## +## Construct a list of unique strings from a list of strings. +## +## @var{x} has to be a string or a cell array of strings. @var{y} will be of +## the same type, and all of its elements will be unique. +## +## If @var{ex} is a string or a cell array of strings, @var{y} will contain +## elements that are unique between themselves and with respect to @var{ex}. +## +## If @var{ex} is an index array or a logical array for @var{x} then it selects +## the subset of @var{x} that is made unique. Unselected elements are not +## modified. +## +## @var{maxlength} is the maximal acceptable length of the strings in @var{y}. +## +## @var{ismodified} is a logical array indicating whether each element in +## @var{x} was modified to make it unique. +## +## @seealso{unique, matlab.lang.makeValidName} +## @end deftypefn + +function [y, ismodified] = makeUniqueStrings (x, ex = {}, maxlength = namelengthmax ()) + + if (nargin == 0 || nargout > 3) + print_usage (); + endif + + ## Validate first input + if ((! ischar (x)) && (! iscellstr (x))) + error ("makeUniqueStrings: input must be a string."); + endif + converttochar = ischar (x); + y = cellstr (x); + sz = size (y); + y = y(:)'; + + ## Initialize array of strings to exclude + excludedstrings = {}; + + ## Validate first optional input + if (nargin > 1) + if (ischar (ex) || iscellstr (ex)) + excludedstrings = cellstr (ex); + elseif (islogical (ex)) + if (numel (ex) != numel (y)) + error ("makeUniqueStrings: input and logical array must have same length."); + endif + excludedstrings = y(! ex); + y = y(ex); + elseif (isnumeric (ex)) + if (any (ex <= 0 | ex > numel (y) | round (ex) != ex)) # isindex + error ("makeUniqueStrings: invalid array of indices."); + endif + excludedstrings = y(setdiff (1:numel (y), ex)); + y = y(ex); + else + error ("makeUniqueStrings: invalid input."); + endif + excludedstrings = excludedstrings(:)'; + endif + + ## Validate second optional input + if (nargin > 2) + if (! isnumeric (maxlength) + || ! isscalar (maxlength) + || ! isreal (maxlength) + || maxlength < 0 + || round (maxlength) != maxlength) + error ("makeUniqueStrings: 'maxlength' must be a positive integer."); + endif + endif + + ## Initialize second output + ismodified = false (size (y)); + + ## Truncate output strings + istruncated = false (size (y)); + if (maxlength < namelengthmax ()) + istruncated = cellfun (@(x) length (x) > maxlength, y); + for i=find (istruncated) + y{i} = y{i}(1:maxlength); + endfor + endif + + ## Make unique strings + [~, I, J] = unique (y, "first"); + for i=1: numel (I) + R = ! ismember (y{I(i)}, excludedstrings); + K = find (J == J(I(i))); + n = 1 + ceil (log10 (numel (K))); + if (length (y{K(1)}) + n > maxlength) + if (n >= maxlength) + error ("makeUniqueStrings: cannot create unique elements within 'maxlength'."); + endif + sub = y{K(1)}(1:maxlength-n); + else + sub = y{K(1)}; + endif + for k = (1 + R):numel (K) + while true + N = k - R; + proposal = [sub sprintf("_%d", N)]; + if (! ismember (proposal, [excludedstrings y(I(i+1:end))])) + y{K(k)} = proposal; + break; + else + R--; # i.e. increments N + endif + endwhile + ismodified(K(k)) = true; + endfor + endfor + + ## Return outputs with correct type and size + ismodified = ismodified | istruncated; + + if (converttochar) + y = char (y); + if (isempty (y)) + y = char (); + endif + else + if (isnumeric (ex) || islogical (ex)) + z = y; + y = cell (1, prod (sz)); + y(ex) = z; + if (isnumeric (ex)) + y(setdiff (1:prod (sz), ex)) = excludedstrings; + else + y(! ex) = excludedstrings; + endif + z = ismodified; + ismodified = false (sz); + ismodified(ex) = z; + endif + y = reshape (y, sz); + endif + +endfunction + +## Test first input +%!test +%! assert (matlab.lang.makeUniqueStrings ("a"), "a") +%! assert (matlab.lang.makeUniqueStrings ({"a","b","c"}), {"a","b","c"}) +%! assert (matlab.lang.makeUniqueStrings (''), ''); +%! assert (matlab.lang.makeUniqueStrings ({}), {}); + +## Test exclusion list +%!test +%! x = {"jwe", "Marco", "Rik", "jwe", "Kai", "jwe", "Torsten"}; +%! y = matlab.lang.makeUniqueStrings (x); +%! assert (y, {"jwe", "Marco", "Rik", "jwe_1", "Kai", "jwe_2", "Torsten"}) +%! y = matlab.lang.makeUniqueStrings (x, "Rik"); +%! assert (y, {"jwe", "Marco", "Rik_1", "jwe_1", "Kai", "jwe_2", "Torsten"}) +%! [y, m] = matlab.lang.makeUniqueStrings (x, {"Kai", "Rik"}); +%! assert (y, {"jwe", "Marco", "Rik_1", "jwe_1", "Kai_1", "jwe_2", "Torsten"}) +%! assert (m, logical ([0 0 1 1 1 1 0])); + +## Test index array +%!test +%! x = {"a", "a", "a", "b", "a", "b"}; +%! y = matlab.lang.makeUniqueStrings (x, 1:4); +%! assert (y, {"a_1", "a_2", "a_3", "b_1", "a", "b"}); +%! x(end+1) = "a"; +%! [y, m] = matlab.lang.makeUniqueStrings (x, 1:4); +%! assert (y, {"a_1", "a_2", "a_3", "b_1", "a", "b", "a"}); +%! assert (m ,logical ([1 1 1 1 0 0 0])); + +## Test logical array +%!test +%! x = {"a", "a", "a", "b", "a", "b"}; +%! [y, m] = matlab.lang.makeUniqueStrings (x, logical ([1 1 1 1 0 0])); +%! assert (y, {"a_1", "a_2", "a_3", "b_1", "a", "b"}); +%! assert (m, logical ([1 1 1 1 0 0])); + +## Test maxlength +%!test +%! x = {"maxlength", "maxlength", "maxlength", "maxlength"}; +%! [y, m] = matlab.lang.makeUniqueStrings (x, 1:3, 5); +%! assert (y, {"maxle", "max_1", "max_2", "maxlength"}); +%! assert (m, logical ([1 1 1 0])); +%!error matlab.lang.makeUniqueStrings (repmat ({"a"}, 1, 10), {}, 2) + +%!test +%! assert (matlab.lang.makeUniqueStrings ("a", {"a"}), "a_1") +%! assert (matlab.lang.makeUniqueStrings ("a", {"a","a_1"}), "a_2") +%! y = matlab.lang.makeUniqueStrings ({"a","a","a","a_6","a"}, {"a","a_3"}); +%! assert (y, {"a_1","a_2","a_4","a_6","a_5"}) + +%!test +%!error matlab.lang.makeUniqueStrings (); +%!error matlab.lang.makeUniqueStrings (1); +%!error matlab.lang.makeUniqueStrings ("a", struct ()); +%!error matlab.lang.makeUniqueStrings ("a", 2); +%!error matlab.lang.makeUniqueStrings ("a", [true false]); +%!error matlab.lang.makeUniqueStrings ("a", {}, pi); +%!error [a, b, c] = matlab.lang.makeUniqueStrings ("a");
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/+matlab/+lang/makeValidName.m Tue Dec 11 15:19:00 2018 +0100 @@ -0,0 +1,218 @@ +## Copyright (C) 2017-2018 Guillaume Flandin +## +## This file is part of Octave. +## +## Octave is free software; you can redistribute it and/or modify it +## under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 3 of the License, or +## (at your option) any later version. +## +## Octave is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with Octave; see the file COPYING. If not, see +## <http://www.gnu.org/licenses/>. + +## -*- texinfo -*- +## @deftypefn {} {@var{y} =} matlab.lang.makeValidName (@var{x}) +## @deftypefnx {} {@var{y} =} matlab.lang.makeValidName (@var{x}, @qcode{"ReplacementStyle"}, @var{rs}) +## @deftypefnx {} {@var{y} =} matlab.lang.makeValidName (@var{x}, @qcode{"Prefix"}, @var{p}) +## @deftypefnx {} {[@var{y}, @var{ismodified}] =} matlab.lang.makeValidName (@dots{}) +## +## Create valid variable name(s) @var{y} from @var{x}. +## +## @var{x} has to be a string or a cell array of strings. @var{y} will be of +## the same type. +## +## A valid name is a string of alphanumerics and underscores, starting with a +## letter. +## +## The @qcode{"ReplacementStyle"} option specifies how non valid characters have +## to be handled. Acceptable values are @qcode{"underscore"}, @qcode{"hex"} and +## @qcode{"delete"}. Default value, @qcode{"underscore"}, replaces all non +## valid characters with a @qcode{"_"}. @qcode{"hex"} replaces all non valid +## characters with their hexadecimal representation, while @qcode{"delete"} will +## simply remove any character that is not alphanumeric or underscore. +## Whitespace characters are always removed prior to the application of the +## @qcode{"ReplacementStyle"}. Lowercase letters following a whitespace will be +## changed to uppercase. +## +## The @qcode{"Prefix"} option specifies the string @var{p} to add as a prefix +## to the input if it does not start with a letter. @var{p} has to be a valid +## variable name itself. Its default is @qcode{"x"}. +## +## @var{ismodified} is a logical array indicating whether each element in +## @var{x} is a valid name or not (and therefore is modified in @var{y}). +## +## @seealso{iskeyword, isvarname, matlab.lang.makeUniqueStrings} +## @end deftypefn + +function [y, ismodified] = makeValidName (x, varargin) + + if (nargin == 0 || nargout > 2) + print_usage (); + endif + + if ((! ischar (x)) && (! iscellstr (x))) + error ("makeValidName: input must be a string."); + endif + + converttochar = ischar (x); + y = cellstr (x); + + ismodified = false (size (y)); + + opts = struct ("replacementstyle", "underscore", "prefix", "x"); + + for i = 1:2:numel(varargin) + if (! ischar (varargin{i})) + error ("makeValidName: input argument must be a string."); + endif + parameter = tolower (varargin{i}); + if (numel (varargin) < i+1) + error ("makeValidName: input value missing."); + endif + value = varargin{i+1}; + switch (parameter) + case "replacementstyle" + if (! ischar (value)) + error ('makeValidName: "ReplacementStyle" value must be a string.'); + endif + value = tolower (value); + if (! ismember (value, {"underscore", "hex", "delete"})) + error ('makeValidName: invalid "ReplacementStyle" value.'); + endif + opts.replacementstyle = value; + case "prefix" + if (! isvalidname (value)) + error ('makeValidName: invalid "Prefix" value.'); + endif + opts.prefix = value; + otherwise + error ("makeValidName: unknown input argument."); + endswitch + endfor + + for i = 1:numel (y) + if (! isvalidname (y{i})) + ismodified(i) = true; + ## Remove leading and trailing whitespace + y{i} = strtrim (y{i}); + if (isempty (y{i})) + y{i} = opts.prefix; + endif + + ## Check if input is a reserved keyword + if (iskeyword (y{i})) + y{i} = [opts.prefix, toupper(y{i}(1)), y{i}(2:end)]; + endif + + ## Change lowercase letter followed by whitespace to uppercase + idx = regexp (y{i},'[\s][a-z]'); + y{i}(idx+1) = toupper (y{i}(idx+1)); + + ## Remove any whitespace character + y{i}(isspace (y{i})) = ""; + + ## Add prefix if first character is not a letter + if (! isempty (regexp (y{i}(1),"[^a-zA-Z]"))) + y{i} = [opts.prefix y{i}]; + endif + + ## Replace non alphanumerics or underscores + idx = regexp (y{i},"[^0-9A-Za-z_]"); + switch (opts.replacementstyle) + case "underscore" + y{i}(idx) = "_"; + case "hex" + for j = numel (idx):-1:1 + y{i} = strrep (y{i}, y{i}(idx(j)), sprintf ("0x%02X",y{i}(idx(j)))); + endfor + case "delete" + y{i}(idx) = ""; + endswitch + endif + endfor + + if (converttochar) + y = char (y); + endif + +endfunction + + +function tf = isvalidname (x) + # use isvarname instead + tf = true; + if (! ischar (x) + || isempty (x) + || numel (x) > namelengthmax () + || ! isempty (regexp (x,"[^0-9A-Za-z_]")) + || ! isempty (regexp (x(1),"[^a-zA-Z]")) + || iskeyword (x)) + tf = false; + endif +endfunction + +## Test char vector input +%!test +%! y = matlab.lang.makeValidName ("octave"); +%! assert (y, "octave"); + +## Test cellstr input +%!test +%! y = matlab.lang.makeValidName ({"gnu", "octave"}); +%! assert (y, {"gnu", "octave"}); + +## Test default flags +%!test +%! x = {"Octave", "3d plot", "GNU/Octave", "laplace_*"}; +%! y = matlab.lang.makeValidName (x); +%! assert (y, {"Octave", "x3dPlot", "GNU_Octave", "laplace__"}); + +## Test ReplacementStyle flag +%!test +%! x = {"Octave", "3d plot", "GNU/Octave", "laplace_*"}; +%! y = matlab.lang.makeValidName (x, "ReplacementStyle", "underscore"); +%! assert (y, {"Octave", "x3dPlot", "GNU_Octave", "laplace__"}); +%! y = matlab.lang.makeValidName (x, "ReplacementStyle", "hex"); +%! assert (y, {"Octave", "x3dPlot", "GNU0x2FOctave", "laplace_0x2A"}); +%! y = matlab.lang.makeValidName (x, "ReplacementStyle", "delete"); +%! assert (y, {"Octave", "x3dPlot", "GNUOctave", "laplace_"}); + +## Test Prefix flag +%!test +%! assert (matlab.lang.makeValidName ({"", " "}), {"x", "x"}); +%! x = {"Octave", "3d plot", "GNU/Octave", "laplace_*"}; +%! y = matlab.lang.makeValidName (x, "prefix", "oct_"); +%! assert (y, {"Octave", "oct_3dPlot", "GNU_Octave", "laplace__"}); + +## Test second output +%!test +%! x = {"Octave", "3d plot", "GNU/Octave", "laplace_*"}; +%! [y, modified] = matlab.lang.makeValidName (x); +%! assert (modified, [false, true, true, true]); + +## Test lowercase letter following a whitespace +%!test +%! y = matlab.lang.makeValidName ("gnu octave"); +%! assert (y, "gnuOctave"); +%! y = matlab.lang.makeValidName (" octave "); +%! assert (y, "octave"); + +## Check for keywords +%!test +%! assert (matlab.lang.makeValidName ("for"), "xFor") +%! assert (matlab.lang.makeValidName ("For"), "For") +%!error matlab.lang.makeValidName ("for", "Prefix", "for") + +## Test input validation +%!test +%!error matlab.lang.makeValidName (); +%!error matlab.lang.makeValidName (42); +%!error matlab.lang.makeValidName ("octave", "unknown"); +%!error matlab.lang.makeValidName ("octave", "prefix"); +%!error matlab.lang.makeValidName ("octave", "prefix", "$");
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/+matlab/+lang/module.mk Tue Dec 11 15:19:00 2018 +0100 @@ -0,0 +1,15 @@ +FCN_FILE_DIRS += scripts/+matlab/+lang + +%canon_reldir%_FCN_FILES = \ + %reldir%/makeUniqueStrings.m \ + %reldir%/makeValidName.m + +%canon_reldir%dir = $(fcnfiledir)/+matlab/+lang + +%canon_reldir%_DATA = $(%canon_reldir%_FCN_FILES) + +FCN_FILES += $(%canon_reldir%_FCN_FILES) + +PKG_ADD_FILES += %reldir%/PKG_ADD + +DIRSTAMP_FILES += %reldir%/$(octave_dirstamp)
--- a/scripts/module.mk Mon Dec 10 21:41:39 2018 -0800 +++ b/scripts/module.mk Tue Dec 11 15:19:00 2018 +0100 @@ -5,6 +5,7 @@ %canon_reldir%_MAINTAINERCLEANFILES = include %reldir%/+containers/module.mk +include %reldir%/+matlab/+lang/module.mk include %reldir%/audio/module.mk include %reldir%/deprecated/module.mk include %reldir%/elfun/module.mk