diff scripts/ode/private/fuzzy_compare.m @ 20568:fcb792acab9b

Moving ode45, odeset, odeget, and levenshtein from odepkg to core. * libinterp/corefcn/levenshtein.cc: move function from odepkg into core * libinterp/corefcn/module.mk: include levenshtein.cc * scripts/ode: move ode45, odeset, odeget, and all dependencies from odepkg into core * scripts/module.mk: include them * doc/interpreter/diffeq.txi: add documentation for ode45, odeset, odeget * NEWS: announce functions included with this changeset * scripts/help/__unimplemented__.m: removed new functions
author jcorno <jacopo.corno@gmail.com>
date Thu, 24 Sep 2015 12:58:46 +0200
parents
children 25623ef2ff4f
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/ode/private/fuzzy_compare.m	Thu Sep 24 12:58:46 2015 +0200
@@ -0,0 +1,172 @@
+## Copyright (C) 2013, Roberto Porcu' <roberto.porcu@polimi.it>
+##
+## This file is part of Octave.
+##
+## Octave is free software; you can redistribute it and/or modify it
+## under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 3 of the License, or (at
+## your option) any later version.
+##
+## Octave is distributed in the hope that it will be useful, but
+## WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+## General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with Octave; see the file COPYING.  If not, see
+## <http://www.gnu.org/licenses/>.
+
+
+## -*- texinfo -*-
+## @deftypefn {Command} {[@var{res}] =} fuzzy_compare @
+## (@var{"string1"}, @var{string_set}, [@var{correctness}])
+##
+## Compare a string with a set of strings and returns the positions
+## in the set of strings at which there are the fields that best fit
+## the one we are comparing.
+##
+## The distance used to compare the words is the Levenshtein distance
+## and for more details see
+## @url{http://en.wikipedia.org/wiki/Levenshtein_distance}.
+##
+## This function must be called with one output argument @var{res}
+## which contains the positions of the elements in @var{string_set}
+## which best fit the given word. The tolerance that is used to
+## determine if a field of the list fits or not the given word is a
+## function of the length of the word and of the minimum distance of
+## the word from all the elements of the list. The more the length,
+## the more the tolerance. The less the minimum, the less the
+## tolerance but if the minimum is close to the length of the word,
+## the tolerance must be small because it means that no field in the
+## list is really fitting the given word. So that the function is:
+##
+## @ifhtml
+## @example
+## @math{tolerance = 2 * (length-minimum) * minimum / length}
+## @end example
+## @end ifhtml
+## @ifnothtml
+## @math{tolerance = 2 * (length-minimum) * minimum / length}.
+## @end ifnothtml
+##
+## The first input argument must be a string containing the word to
+## compare.
+##
+## The second input argument must be a vector of strings or a
+## cell_array of strings and should contain the fields to use for the
+## comparison.
+##
+## The third input argument is optional and represents a fixed
+## tolerance that will replace the implemented one.
+## @end deftypefn
+##
+## @seealso{odeset,odeget,levenshtein}
+
+function res = fuzzy_compare (string1, string_set, correctness)
+
+  ## check on output arguments
+  if (nargout > 1)
+    error ("OdePkg:InvalidArgument", "too many output arguments");
+  endif
+
+  ## check on input arguments
+  if (nargin < 2 || nargin > 3)
+    error ("OdePkg:InvalidArgument", "wrong input arguments number");
+  endif
+
+  if (! ischar (string1)
+      || (! iscellstr (string_set)
+          && ! ischar (string_set)))
+    error ("OdePkg:InvalidArgument",
+           "first argument must be a string, second argument ",
+           "must be an array of strings or a cell array of strings");
+  endif
+
+  if (nargin == 3)
+    if ((! isnumeric (correctness) || ! isscalar (correctness))
+        && (! ischar (correctness)))
+      error ("OdePkg:InvalidArgument",
+             "third input argument must be a positive ",
+             "integer or a string");
+    endif
+
+    if (isnumeric (correctness)
+        && ( correctness < 0 || mod (correctness, 1) != 0))
+      error ("OdePkg:InvalidArgument",
+             "third input argument must be a positive integer");
+    endif
+  endif
+
+  res = [];
+
+  m = length (string1);
+  fields_nb = size (string_set, 1);
+
+  values = inf .* ones (fields_nb, 1);
+
+  string1 = deblank (string1);
+  string2 = [];
+
+  minimus = inf;
+  ## loop on every field of the list
+  for i = 1:fields_nb
+    if (iscellstr (string_set))
+      string2 = deblank (string_set{i});
+    else
+      string2 = deblank (string_set(i,:));
+    endif
+    ## compute Levenshtein distance (not case sensitive)
+    values(i) = levenshtein (lower (string1),
+                             lower (string2),
+                             minimus);
+    ## update the upper_bound to speedup the computation
+    minimus = min (minimus, values(i));
+  endfor
+
+  positions = find (values == minimus);
+
+  if (minimus == 0) # exact match
+    if (size (positions, 1) != 1)
+      error ("OdePkg:InvalidArgument",
+             "there are %d strings perfectly matching ''%s''",
+             size (positions, 1), string1);
+    endif
+    res = positions;
+    return
+  endif
+
+  ## determine the tolerance with the formula described in the
+  ## textinfo section it is a downwards parable with zeros in 0 and m
+  ## and with a maximum in m/2 of value m/2
+  tolerance = m * (-(minimus - m) * minimus * (2 / (m*m)));
+
+  ## if the degree of correctness is fixed by the user, it will
+  ## replace the tolerance
+  if (nargin == 3)
+    if ((isnumeric (correctness)
+         && isscalar (correctness)
+         && correctness == 0)
+        || (ischar (correctness)
+            && strcmp (lower (deblank (correctness)), 'exact')))
+      error ("OdePkg:InvalidArgument",
+             "no exact matching for string ''%s''", string1);
+    endif
+    if (isnumeric (correctness)
+        && isscalar (correctness))
+      tolerance = correctness;
+    endif
+  endif
+
+  ## returning the positions of the fields whose distance is lower
+  ## than the tolerance
+  for i = 1:1:fields_nb
+    if (values(i) <= tolerance)
+      res = [res; i];
+    endif
+  endfor
+
+endfunction
+
+## Local Variables: ***
+## mode: octave ***
+## End: ***