changeset 26201:750a6f9957ea

New functions makeUniqueStrings and makeValidName (bug #52596). * scripts/+matlab/+lang/makeUniqueStrings.m: New function. * scripts/+matlab/+lang/makeValidName.m: New function. * scripts/module.mk: Add directory +matlab/+lang to build system. * scripts/+matlab/+lang/module.mk: Add new functions to build system. * NEWS: Announce new functions.
author Guillaume Flandin <guillaume.offline@gmail.com>
date Tue, 11 Dec 2018 15:19:00 +0100
parents 33e6ab3e1491
children 368dc1142072
files NEWS scripts/+matlab/+lang/makeUniqueStrings.m scripts/+matlab/+lang/makeValidName.m scripts/+matlab/+lang/module.mk scripts/module.mk
diffstat 5 files changed, 457 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/NEWS	Mon Dec 10 21:41:39 2018 -0800
+++ b/NEWS	Tue Dec 11 15:19:00 2018 +0100
@@ -133,7 +133,7 @@
     argument to print) by default.  This makes more sense for EPS
     files which are normally embedded within other documents, and is
     Matlab compatible.  If necessary use the "-loose" option to
-    reproduce figures as they appeared in previous versions of Octave. 
+    reproduce figures as they appeared in previous versions of Octave.
 
  ** It is now possible to use files and folders containing Unicode
     characters in Windows.
@@ -166,6 +166,8 @@
       ordeig
       savefig
       uitable
+      matlab.lang.makeValidName
+      matlab.lang.makeUniqueStrings
 
  ** Legacy functions.
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/+matlab/+lang/makeUniqueStrings.m	Tue Dec 11 15:19:00 2018 +0100
@@ -0,0 +1,220 @@
+## Copyright (C) 2017-2018 Guillaume Flandin
+##
+## This file is part of Octave.
+##
+## Octave is free software; you can redistribute it and/or modify it
+## under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 3 of the License, or
+## (at your option) any later version.
+##
+## Octave is distributed in the hope that it will be useful, but
+## WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with Octave; see the file COPYING.  If not, see
+## <http://www.gnu.org/licenses/>.
+
+## -*- texinfo -*-
+## @deftypefn  {} {@var{y} =} matlab.lang.makeUniqueStrings (@var{x})
+## @deftypefnx {} {@var{y} =} matlab.lang.makeUniqueStrings (@var{x}, @var{ex})
+## @deftypefnx {} {@var{y} =} matlab.lang.makeUniqueStrings (@var{x}, @var{ex}, @var{maxlength})
+## @deftypefnx {} {[@var{y}, @var{ismodified}] =} matlab.lang.makeUniqueStrings (@dots{})
+##
+## Construct a list of unique strings from a list of strings.
+##
+## @var{x} has to be a string or a cell array of strings.  @var{y} will be of
+## the same type, and all of its elements will be unique.
+##
+## If @var{ex} is a string or a cell array of strings, @var{y} will contain
+## elements that are unique between themselves and with respect to @var{ex}.
+##
+## If @var{ex} is an index array or a logical array for @var{x} then it selects
+## the subset of @var{x} that is made unique.  Unselected elements are not
+## modified.
+##
+## @var{maxlength} is the maximal acceptable length of the strings in @var{y}.
+##
+## @var{ismodified} is a logical array indicating whether each element in
+## @var{x} was modified to make it unique.
+##
+## @seealso{unique, matlab.lang.makeValidName}
+## @end deftypefn
+
+function [y, ismodified] = makeUniqueStrings (x, ex = {}, maxlength = namelengthmax ())
+
+  if (nargin == 0 || nargout > 3)
+    print_usage ();
+  endif
+
+  ##  Validate first input
+  if ((! ischar (x)) && (! iscellstr (x)))
+    error ("makeUniqueStrings: input must be a string.");
+  endif
+  converttochar = ischar (x);
+  y = cellstr (x);
+  sz = size (y);
+  y = y(:)';
+
+  ## Initialize array of strings to exclude
+  excludedstrings = {};
+
+  ## Validate first optional input
+  if (nargin > 1)
+    if (ischar (ex) || iscellstr (ex))
+      excludedstrings = cellstr (ex);
+    elseif (islogical (ex))
+      if (numel (ex) != numel (y))
+        error ("makeUniqueStrings: input and logical array must have same length.");
+      endif
+      excludedstrings = y(! ex);
+      y = y(ex);
+    elseif (isnumeric (ex))
+      if (any (ex <= 0 | ex > numel (y) | round (ex) != ex)) # isindex
+        error ("makeUniqueStrings: invalid array of indices.");
+      endif
+      excludedstrings = y(setdiff (1:numel (y), ex));
+      y = y(ex);
+    else
+      error ("makeUniqueStrings: invalid input.");
+    endif
+    excludedstrings = excludedstrings(:)';
+  endif
+
+  ## Validate second optional input
+  if (nargin > 2)
+    if (! isnumeric (maxlength)
+      || ! isscalar (maxlength)
+      || ! isreal (maxlength)
+      || maxlength < 0
+      || round (maxlength) != maxlength)
+      error ("makeUniqueStrings: 'maxlength' must be a positive integer.");
+    endif
+  endif
+
+  ## Initialize second output
+  ismodified = false (size (y));
+
+  ## Truncate output strings
+  istruncated = false (size (y));
+  if (maxlength < namelengthmax ())
+    istruncated = cellfun (@(x) length (x) > maxlength, y);
+    for i=find (istruncated)
+      y{i} = y{i}(1:maxlength);
+    endfor
+  endif
+
+  ## Make unique strings
+  [~, I, J] = unique (y, "first");
+  for i=1: numel (I)
+    R = ! ismember (y{I(i)}, excludedstrings);
+    K = find (J == J(I(i)));
+    n = 1 + ceil (log10 (numel (K)));
+    if (length (y{K(1)}) + n > maxlength)
+      if (n >= maxlength)
+        error ("makeUniqueStrings: cannot create unique elements within 'maxlength'.");
+      endif
+      sub = y{K(1)}(1:maxlength-n);
+    else
+      sub = y{K(1)};
+    endif
+    for k = (1 + R):numel (K)
+      while true
+        N = k - R;
+        proposal = [sub sprintf("_%d", N)];
+        if (! ismember (proposal, [excludedstrings y(I(i+1:end))]))
+          y{K(k)} = proposal;
+          break;
+        else
+          R--; # i.e. increments N
+        endif
+      endwhile
+      ismodified(K(k)) = true;
+    endfor
+  endfor
+
+  ## Return outputs with correct type and size
+  ismodified = ismodified | istruncated;
+
+  if (converttochar)
+    y = char (y);
+    if (isempty (y))
+      y = char ();
+    endif
+  else
+    if (isnumeric (ex) || islogical (ex))
+      z = y;
+      y = cell (1, prod (sz));
+      y(ex) = z;
+      if (isnumeric (ex))
+        y(setdiff (1:prod (sz), ex)) = excludedstrings;
+      else
+        y(! ex) = excludedstrings;
+      endif
+      z = ismodified;
+      ismodified = false (sz);
+      ismodified(ex) = z;
+    endif
+    y = reshape (y, sz);
+  endif
+
+endfunction
+
+## Test first input
+%!test
+%! assert (matlab.lang.makeUniqueStrings ("a"), "a")
+%! assert (matlab.lang.makeUniqueStrings ({"a","b","c"}), {"a","b","c"})
+%! assert (matlab.lang.makeUniqueStrings (''), '');
+%! assert (matlab.lang.makeUniqueStrings ({}), {});
+
+## Test exclusion list
+%!test
+%! x = {"jwe", "Marco", "Rik", "jwe", "Kai", "jwe", "Torsten"};
+%! y = matlab.lang.makeUniqueStrings (x);
+%! assert (y, {"jwe", "Marco", "Rik", "jwe_1", "Kai", "jwe_2", "Torsten"})
+%! y = matlab.lang.makeUniqueStrings (x, "Rik");
+%! assert (y, {"jwe", "Marco", "Rik_1", "jwe_1", "Kai", "jwe_2", "Torsten"})
+%! [y, m] = matlab.lang.makeUniqueStrings (x, {"Kai", "Rik"});
+%! assert (y, {"jwe", "Marco", "Rik_1", "jwe_1", "Kai_1", "jwe_2", "Torsten"})
+%! assert (m, logical ([0 0 1 1 1 1 0]));
+
+## Test index array
+%!test
+%! x = {"a", "a", "a", "b", "a", "b"};
+%! y = matlab.lang.makeUniqueStrings (x, 1:4);
+%! assert (y, {"a_1", "a_2", "a_3", "b_1", "a", "b"});
+%! x(end+1) = "a";
+%! [y, m] = matlab.lang.makeUniqueStrings (x, 1:4);
+%! assert (y, {"a_1", "a_2", "a_3", "b_1", "a", "b", "a"});
+%! assert (m ,logical ([1 1 1 1 0 0 0]));
+
+## Test logical array
+%!test
+%! x = {"a", "a", "a", "b", "a", "b"};
+%! [y, m] = matlab.lang.makeUniqueStrings (x, logical ([1 1 1 1 0 0]));
+%! assert (y, {"a_1", "a_2", "a_3", "b_1", "a", "b"});
+%! assert (m, logical ([1 1 1 1 0 0]));
+
+## Test maxlength
+%!test
+%! x = {"maxlength", "maxlength", "maxlength", "maxlength"};
+%! [y, m] = matlab.lang.makeUniqueStrings (x, 1:3, 5);
+%! assert (y, {"maxle", "max_1", "max_2", "maxlength"});
+%! assert (m, logical ([1 1 1 0]));
+%!error matlab.lang.makeUniqueStrings (repmat ({"a"}, 1, 10), {}, 2)
+
+%!test
+%! assert (matlab.lang.makeUniqueStrings ("a", {"a"}), "a_1")
+%! assert (matlab.lang.makeUniqueStrings ("a", {"a","a_1"}), "a_2")
+%! y = matlab.lang.makeUniqueStrings ({"a","a","a","a_6","a"}, {"a","a_3"});
+%! assert (y, {"a_1","a_2","a_4","a_6","a_5"})
+
+%!test
+%!error matlab.lang.makeUniqueStrings ();
+%!error matlab.lang.makeUniqueStrings (1);
+%!error matlab.lang.makeUniqueStrings ("a", struct ());
+%!error matlab.lang.makeUniqueStrings ("a", 2);
+%!error matlab.lang.makeUniqueStrings ("a", [true false]);
+%!error matlab.lang.makeUniqueStrings ("a", {}, pi);
+%!error [a, b, c] = matlab.lang.makeUniqueStrings ("a");
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/+matlab/+lang/makeValidName.m	Tue Dec 11 15:19:00 2018 +0100
@@ -0,0 +1,218 @@
+## Copyright (C) 2017-2018 Guillaume Flandin
+##
+## This file is part of Octave.
+##
+## Octave is free software; you can redistribute it and/or modify it
+## under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 3 of the License, or
+## (at your option) any later version.
+##
+## Octave is distributed in the hope that it will be useful, but
+## WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with Octave; see the file COPYING.  If not, see
+## <http://www.gnu.org/licenses/>.
+
+## -*- texinfo -*-
+## @deftypefn  {} {@var{y} =} matlab.lang.makeValidName (@var{x})
+## @deftypefnx {} {@var{y} =} matlab.lang.makeValidName (@var{x}, @qcode{"ReplacementStyle"}, @var{rs})
+## @deftypefnx {} {@var{y} =} matlab.lang.makeValidName (@var{x}, @qcode{"Prefix"}, @var{p})
+## @deftypefnx {} {[@var{y}, @var{ismodified}] =} matlab.lang.makeValidName (@dots{})
+##
+## Create valid variable name(s) @var{y} from @var{x}.
+##
+## @var{x} has to be a string or a cell array of strings.  @var{y} will be of
+## the same type.
+##
+## A valid name is a string of alphanumerics and underscores, starting with a
+## letter.
+##
+## The @qcode{"ReplacementStyle"} option specifies how non valid characters have
+## to be handled.  Acceptable values are @qcode{"underscore"}, @qcode{"hex"} and
+## @qcode{"delete"}.  Default value, @qcode{"underscore"}, replaces all non
+## valid characters with a @qcode{"_"}.  @qcode{"hex"} replaces all non valid
+## characters with their hexadecimal representation, while @qcode{"delete"} will
+## simply remove any character that is not alphanumeric or underscore.
+## Whitespace characters are always removed prior to the application of the
+## @qcode{"ReplacementStyle"}.  Lowercase letters following a whitespace will be
+## changed to uppercase.
+##
+## The @qcode{"Prefix"} option specifies the string @var{p} to add as a prefix
+## to the input if it does not start with a letter.  @var{p} has to be a valid
+## variable name itself. Its default is @qcode{"x"}.
+##
+## @var{ismodified} is a logical array indicating whether each element in
+## @var{x} is a valid name or not (and therefore is modified in @var{y}).
+##
+## @seealso{iskeyword, isvarname, matlab.lang.makeUniqueStrings}
+## @end deftypefn
+
+function [y, ismodified] = makeValidName (x, varargin)
+
+  if (nargin == 0 || nargout > 2)
+    print_usage ();
+  endif
+
+  if ((! ischar (x)) && (! iscellstr (x)))
+    error ("makeValidName: input must be a string.");
+  endif
+
+  converttochar = ischar (x);
+  y = cellstr (x);
+
+  ismodified = false (size (y));
+
+  opts = struct ("replacementstyle", "underscore", "prefix", "x");
+
+  for i = 1:2:numel(varargin)
+    if (! ischar (varargin{i}))
+      error ("makeValidName: input argument must be a string.");
+    endif
+    parameter = tolower (varargin{i});
+    if (numel (varargin) < i+1)
+      error ("makeValidName: input value missing.");
+    endif
+    value = varargin{i+1};
+    switch (parameter)
+      case "replacementstyle"
+        if (! ischar (value))
+          error ('makeValidName: "ReplacementStyle" value must be a string.');
+        endif
+        value = tolower (value);
+        if (! ismember (value, {"underscore", "hex", "delete"}))
+          error ('makeValidName: invalid "ReplacementStyle" value.');
+        endif
+        opts.replacementstyle = value;
+      case "prefix"
+        if (! isvalidname (value))
+          error ('makeValidName: invalid "Prefix" value.');
+        endif
+        opts.prefix = value;
+      otherwise
+        error ("makeValidName: unknown input argument.");
+    endswitch
+  endfor
+
+  for i = 1:numel (y)
+    if (! isvalidname (y{i}))
+      ismodified(i) = true;
+      ## Remove leading and trailing whitespace
+      y{i} = strtrim (y{i});
+      if (isempty (y{i}))
+        y{i} = opts.prefix;
+      endif
+
+      ## Check if input is a reserved keyword
+      if (iskeyword (y{i}))
+        y{i} = [opts.prefix, toupper(y{i}(1)), y{i}(2:end)];
+      endif
+
+      ## Change lowercase letter followed by whitespace to uppercase
+      idx = regexp (y{i},'[\s][a-z]');
+      y{i}(idx+1) = toupper (y{i}(idx+1));
+
+      ## Remove any whitespace character
+      y{i}(isspace (y{i})) = "";
+
+      ## Add prefix if first character is not a letter
+      if (! isempty (regexp (y{i}(1),"[^a-zA-Z]")))
+        y{i} = [opts.prefix y{i}];
+      endif
+
+      ## Replace non alphanumerics or underscores
+      idx = regexp (y{i},"[^0-9A-Za-z_]");
+      switch (opts.replacementstyle)
+        case "underscore"
+          y{i}(idx) = "_";
+        case "hex"
+          for j = numel (idx):-1:1
+            y{i} = strrep (y{i}, y{i}(idx(j)), sprintf ("0x%02X",y{i}(idx(j))));
+          endfor
+        case "delete"
+          y{i}(idx) = "";
+      endswitch
+    endif
+  endfor
+
+  if (converttochar)
+    y = char (y);
+  endif
+
+endfunction
+
+
+function tf = isvalidname (x)
+  # use isvarname instead
+  tf = true;
+  if (! ischar (x)
+    || isempty (x)
+    || numel (x) > namelengthmax ()
+    || ! isempty (regexp (x,"[^0-9A-Za-z_]"))
+    || ! isempty (regexp (x(1),"[^a-zA-Z]"))
+    || iskeyword (x))
+    tf = false;
+  endif
+endfunction
+
+## Test char vector input
+%!test
+%! y = matlab.lang.makeValidName ("octave");
+%! assert (y, "octave");
+
+## Test cellstr input
+%!test
+%! y = matlab.lang.makeValidName ({"gnu", "octave"});
+%! assert (y, {"gnu", "octave"});
+
+## Test default flags
+%!test
+%! x = {"Octave", "3d plot", "GNU/Octave", "laplace_*"};
+%! y = matlab.lang.makeValidName (x);
+%! assert (y, {"Octave", "x3dPlot", "GNU_Octave", "laplace__"});
+
+## Test ReplacementStyle flag
+%!test
+%! x = {"Octave", "3d plot", "GNU/Octave", "laplace_*"};
+%! y = matlab.lang.makeValidName (x, "ReplacementStyle", "underscore");
+%! assert (y, {"Octave", "x3dPlot", "GNU_Octave", "laplace__"});
+%! y = matlab.lang.makeValidName (x, "ReplacementStyle", "hex");
+%! assert (y, {"Octave", "x3dPlot", "GNU0x2FOctave", "laplace_0x2A"});
+%! y = matlab.lang.makeValidName (x, "ReplacementStyle", "delete");
+%! assert (y, {"Octave", "x3dPlot", "GNUOctave", "laplace_"});
+
+## Test Prefix flag
+%!test
+%! assert (matlab.lang.makeValidName ({"", " "}), {"x", "x"});
+%! x = {"Octave", "3d plot", "GNU/Octave", "laplace_*"};
+%! y = matlab.lang.makeValidName (x, "prefix", "oct_");
+%! assert (y, {"Octave", "oct_3dPlot", "GNU_Octave", "laplace__"});
+
+## Test second output
+%!test
+%! x = {"Octave", "3d plot", "GNU/Octave", "laplace_*"};
+%! [y, modified] = matlab.lang.makeValidName (x);
+%! assert (modified, [false, true, true, true]);
+
+## Test lowercase letter following a whitespace
+%!test
+%! y = matlab.lang.makeValidName ("gnu octave");
+%! assert (y, "gnuOctave");
+%! y = matlab.lang.makeValidName (" octave  ");
+%! assert (y, "octave");
+
+## Check for keywords
+%!test
+%! assert (matlab.lang.makeValidName ("for"), "xFor")
+%! assert (matlab.lang.makeValidName ("For"), "For")
+%!error matlab.lang.makeValidName ("for", "Prefix", "for")
+
+## Test input validation
+%!test
+%!error matlab.lang.makeValidName ();
+%!error matlab.lang.makeValidName (42);
+%!error matlab.lang.makeValidName ("octave", "unknown");
+%!error matlab.lang.makeValidName ("octave", "prefix");
+%!error matlab.lang.makeValidName ("octave", "prefix", "$");
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/+matlab/+lang/module.mk	Tue Dec 11 15:19:00 2018 +0100
@@ -0,0 +1,15 @@
+FCN_FILE_DIRS += scripts/+matlab/+lang
+
+%canon_reldir%_FCN_FILES = \
+  %reldir%/makeUniqueStrings.m \
+  %reldir%/makeValidName.m
+
+%canon_reldir%dir = $(fcnfiledir)/+matlab/+lang
+
+%canon_reldir%_DATA = $(%canon_reldir%_FCN_FILES)
+
+FCN_FILES += $(%canon_reldir%_FCN_FILES)
+
+PKG_ADD_FILES += %reldir%/PKG_ADD
+
+DIRSTAMP_FILES += %reldir%/$(octave_dirstamp)
--- a/scripts/module.mk	Mon Dec 10 21:41:39 2018 -0800
+++ b/scripts/module.mk	Tue Dec 11 15:19:00 2018 +0100
@@ -5,6 +5,7 @@
 %canon_reldir%_MAINTAINERCLEANFILES =
 
 include %reldir%/+containers/module.mk
+include %reldir%/+matlab/+lang/module.mk
 include %reldir%/audio/module.mk
 include %reldir%/deprecated/module.mk
 include %reldir%/elfun/module.mk