view libinterp/corefcn/lookup.cc @ 27918:b442ec6dda5c

use centralized file for copyright info for individual contributors * COPYRIGHT.md: New file. * In most other files, use "Copyright (C) YYYY-YYYY The Octave Project Developers" instead of tracking individual names in separate source files. The motivation is to reduce the effort required to update the notices each year. Until now, the Octave source files contained copyright notices that list individual contributors. I adopted these file-scope copyright notices because that is what everyone was doing 30 years ago in the days before distributed version control systems. But now, with many contributors and modern version control systems, having these file-scope copyright notices causes trouble when we update copyright years or refactor code. Over time, the file-scope copyright notices may become outdated as new contributions are made or code is moved from one file to another. Sometimes people contribute significant patches but do not add a line claiming copyright. Other times, people add a copyright notice for their contribution but then a later refactoring moves part or all of their contribution to another file and the notice is not moved with the code. As a practical matter, moving such notices is difficult -- determining what parts are due to a particular contributor requires a time-consuming search through the project history. Even managing the yearly update of copyright years is problematic. We have some contributors who are no longer living. Should we update the copyright dates for their contributions when we release new versions? Probably not, but we do still want to claim copyright for the project as a whole. To minimize the difficulty of maintaining the copyright notices, I would like to change Octave's sources to use what is described here: https://softwarefreedom.org/resources/2012/ManagingCopyrightInformation.html in the section "Maintaining centralized copyright notices": The centralized notice approach consolidates all copyright notices in a single location, usually a top-level file. This file should contain all of the copyright notices provided project contributors, unless the contribution was clearly insignificant. It may also credit -- without a copyright notice -- anyone who helped with the project but did not contribute code or other copyrighted material. This approach captures less information about contributions within individual files, recognizing that the DVCS is better equipped to record those details. As we mentioned before, it does have one disadvantage as compared to the file-scope approach: if a single file is separated from the distribution, the recipient won't see the contributors' copyright notices. But this can be easily remedied by including a single copyright notice in each file's header, pointing to the top-level file: Copyright YYYY-YYYY The Octave Project Developers See the COPYRIGHT file at the top-level directory of this distribution or at https://octave.org/COPYRIGHT.html. followed by the usual GPL copyright statement. For more background, see the discussion here: https://lists.gnu.org/archive/html/octave-maintainers/2020-01/msg00009.html Most files in the following directories have been skipped intentinally in this changeset: doc libgui/qterminal liboctave/external m4
author John W. Eaton <jwe@octave.org>
date Mon, 06 Jan 2020 15:38:17 -0500
parents 00f796120a6d
children 1891570abac8
line wrap: on
line source

/*

Copyright (C) 2008-2019 The Octave Project Developers

See the file COPYRIGHT.md in the top-level directory of this distribution
or <https://octave.org/COPYRIGHT.html/>.


This file is part of Octave.

Octave is free software: you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

Octave is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with Octave; see the file COPYING.  If not, see
<https://www.gnu.org/licenses/>.

*/

// Author: Jaroslav Hajek <highegg@gmail.com>

#if defined (HAVE_CONFIG_H)
#  include "config.h"
#endif

#include <cctype>
#include <functional>
#include <algorithm>

#include "dNDArray.h"
#include "CNDArray.h"

#include "Cell.h"
#include "defun.h"
#include "error.h"
#include "errwarn.h"
#include "ovl.h"
#include "ov.h"

static
bool
contains_char (const std::string& str, char c)
{
  return (str.find (c) != std::string::npos
          || str.find (std::toupper (c)) != std::string::npos);
}

template <typename T>
inline sortmode
get_sort_mode (const Array<T>& array,
               typename octave_sort<T>::compare_fcn_type desc_comp
               = octave_sort<T>::descending_compare)
{
  octave_idx_type n = array.numel ();
  if (n > 1 && desc_comp (array (0), array (n-1)))
    return DESCENDING;
  else
    return ASCENDING;
}

// FIXME: perhaps there should be octave_value::lookup?
// The question is, how should it behave w.r.t. the second argument's type.
// We'd need a dispatch on two arguments.  Hmmm...

#define INT_ARRAY_LOOKUP(TYPE)                                  \
  (table.is_ ## TYPE ## _type () && y.is_ ## TYPE ## _type ())  \
    retval = do_numeric_lookup (table.TYPE ## _array_value (),  \
                                y.TYPE ## _array_value (),      \
                                left_inf, right_inf,            \
                                match_idx, match_bool);
template <typename ArrayT>
static octave_value
do_numeric_lookup (const ArrayT& array, const ArrayT& values,
                   bool left_inf, bool right_inf,
                   bool match_idx, bool match_bool)
{
  octave_value retval;

  Array<octave_idx_type> idx = array.lookup (values);
  octave_idx_type n = array.numel ();
  octave_idx_type nval = values.numel ();

  // Post-process.
  if (match_bool)
    {
      boolNDArray match (idx.dims ());
      for (octave_idx_type i = 0; i < nval; i++)
        {
          octave_idx_type j = idx.xelem (i);
          match.xelem (i) = j != 0 && values(i) == array(j-1);
        }

      retval = match;
    }
  else if (match_idx || left_inf || right_inf)
    {
      if (match_idx)
        {
          NDArray ridx (idx.dims ());

          for (octave_idx_type i = 0; i < nval; i++)
            {
              octave_idx_type j = idx.xelem (i);
              ridx.xelem (i) = (j != 0 && values(i) == array(j-1)) ? j : 0;
            }

          retval = ridx;
        }
      else if (left_inf && right_inf)
        {
          // Results in valid indices.  Optimize using lazy index.
          octave_idx_type zero = 0;
          for (octave_idx_type i = 0; i < nval; i++)
            {
              octave_idx_type j = idx.xelem (i) - 1;
              idx.xelem (i) = std::max (zero, std::min (j, n-2));
            }

          retval = idx_vector (idx);
        }
      else if (left_inf)
        {
          // Results in valid indices.  Optimize using lazy index.
          octave_idx_type zero = 0;
          for (octave_idx_type i = 0; i < nval; i++)
            {
              octave_idx_type j = idx.xelem (i) - 1;
              idx.xelem (i) = std::max (zero, j);
            }

          retval = idx_vector (idx);
        }
      else if (right_inf)
        {
          NDArray ridx (idx.dims ());

          for (octave_idx_type i = 0; i < nval; i++)
            {
              octave_idx_type j = idx.xelem (i);
              ridx.xelem (i) = std::min (j, n-1);
            }

          retval = ridx;
        }
    }
  else
    retval = idx;

  return retval;
}

DEFUN (lookup, args, ,
       doc: /* -*- texinfo -*-
@deftypefn  {} {@var{idx} =} lookup (@var{table}, @var{y})
@deftypefnx {} {@var{idx} =} lookup (@var{table}, @var{y}, @var{opt})
Lookup values in a @strong{sorted} table.

This function is usually used as a prelude to interpolation.

If table is increasing, of length N and @code{idx = lookup (table, y)}, then
@code{table(idx(i)) <= y(i) < table(idx(i+1))} for all @code{y(i)} within the
table.  If @code{y(i) < table(1)} then @code{idx(i)} is 0.  If
@code{y(i) >= table(end)} or @code{isnan (y(i))} then @code{idx(i)} is N.

If the table is decreasing, then the tests are reversed.  For non-strictly
monotonic tables, empty intervals are always skipped.  The result is undefined
if @var{table} is not monotonic, or if @var{table} contains a NaN.

The complexity of the lookup is O(M*log(N)) where M is the size of @var{y}.
In the special case when @var{y} is also sorted, the complexity is
O(min (M*log(N), M+N)).

@var{table} and @var{y} can also be cell arrays of strings (or @var{y} can be a
single string).  In this case, string lookup is performed using lexicographical
comparison.

If @var{opts} is specified, it must be a string with letters indicating
additional options.

@table @code
@item m
Match.  @code{table(idx(i)) == y(i)} if @code{y(i)} occurs in table;
otherwise, @code{idx(i)} is zero.

@item b
Boolean.  @code{idx(i)} is a logical 1 or 0, indicating whether @code{y(i)}
is contained in table or not.

@item l
Left.  For numeric lookups the leftmost subinterval shall be extended to
minus infinity (i.e., all indices at least 1).

@item r
Right.  For numeric lookups the rightmost subinterval shall be extended to
infinity (i.e., all indices at most N-1).
@end table

@strong{Note}: If @var{table} is not sorted the results from @code{lookup}
will be unpredictable.
@end deftypefn */)
{
  int nargin = args.length ();

  if (nargin < 2 || nargin > 3)
    print_usage ();

  octave_value table = args(0);
  octave_value y = args(1);
  if (table.ndims () > 2 || (table.columns () > 1 && table.rows () > 1))
    warning ("lookup: table is not a vector");

  octave_value retval;

  bool num_case = ((table.isnumeric () && y.isnumeric ())
                   || (table.is_char_matrix () && y.is_char_matrix ()));
  bool str_case = table.iscellstr () && (y.is_string () || y.iscellstr ());
  bool left_inf = false;
  bool right_inf = false;
  bool match_idx = false;
  bool match_bool = false;

  if (nargin == 3)
    {
      std::string opt = args(2).xstring_value ("lookup: OPT must be a string");
      left_inf = contains_char (opt, 'l');
      right_inf = contains_char (opt, 'r');
      match_idx = contains_char (opt, 'm');
      match_bool = contains_char (opt, 'b');
      if (opt.find_first_not_of ("lrmb") != std::string::npos)
        error ("lookup: unrecognized option: %c",
               opt[opt.find_first_not_of ("lrmb")]);
    }

  if ((match_idx || match_bool) && (left_inf || right_inf))
    error ("lookup: m, b cannot be specified with l or r");
  else if (match_idx && match_bool)
    error ("lookup: only one of m or b can be specified");
  else if (str_case && (left_inf || right_inf))
    error ("lookup: l, r are not recognized for string lookups");

  if (num_case)
    {
      // In the case of a complex array, absolute values will be used for
      // compatibility (though it's not too meaningful).
      if (table.iscomplex ())
        table = table.abs ();

      if (y.iscomplex ())
        y = y.abs ();

      Array<octave_idx_type> idx;

      // PS: I learned this from data.cc
      if INT_ARRAY_LOOKUP (int8)
      else if INT_ARRAY_LOOKUP (int16)
      else if INT_ARRAY_LOOKUP (int32)
      else if INT_ARRAY_LOOKUP (int64)
      else if INT_ARRAY_LOOKUP (uint8)
      else if INT_ARRAY_LOOKUP (uint16)
      else if INT_ARRAY_LOOKUP (uint32)
      else if INT_ARRAY_LOOKUP (uint64)
      else if (table.is_char_matrix () && y.is_char_matrix ())
        retval = do_numeric_lookup (table.char_array_value (),
                                    y.char_array_value (),
                                    left_inf, right_inf,
                                    match_idx, match_bool);
      else if (table.is_single_type () || y.is_single_type ())
        retval = do_numeric_lookup (table.float_array_value (),
                                    y.float_array_value (),
                                    left_inf, right_inf,
                                    match_idx, match_bool);
      else
        retval = do_numeric_lookup (table.array_value (),
                                    y.array_value (),
                                    left_inf, right_inf,
                                    match_idx, match_bool);
    }
  else if (str_case)
    {
      Array<std::string> str_table = table.cellstr_value ();
      Array<std::string> str_y (dim_vector (1, 1));

      if (y.iscellstr ())
        str_y = y.cellstr_value ();
      else
        str_y(0) = y.string_value ();

      Array<octave_idx_type> idx = str_table.lookup (str_y);
      octave_idx_type nval = str_y.numel ();

      // Post-process.
      if (match_bool)
        {
          boolNDArray match (idx.dims ());
          for (octave_idx_type i = 0; i < nval; i++)
            {
              octave_idx_type j = idx.xelem (i);
              match.xelem (i) = j != 0 && str_y(i) == str_table(j-1);
            }

          retval = match;
        }
      else if (match_idx)
        {
          NDArray ridx (idx.dims ());
          for (octave_idx_type i = 0; i < nval; i++)
            {
              octave_idx_type j = idx.xelem (i);
              ridx.xelem (i) = (j != 0 && str_y(i) == str_table(j-1) ? j : 0);
            }

          retval = ridx;
        }
      else
        retval = idx;
    }
  else
    print_usage ();

  return retval;
}

/*
%!assert (lookup (1:3, 0.5), 0)     # value before table
%!assert (lookup (1:3, 3.5), 3)     # value after table error
%!assert (lookup (1:3, 1.5), 1)     # value within table error
%!assert (lookup (1:3, [3,2,1]), [3,2,1])
%!assert (lookup ([1:4]', [1.2, 3.5]'), [1, 3]')
%!assert (lookup ([1:4], [1.2, 3.5]'), [1, 3]')
%!assert (lookup ([1:4]', [1.2, 3.5]), [1, 3])
%!assert (lookup ([1:4], [1.2, 3.5]), [1, 3])
%!assert (lookup (1:3, [3, 2, 1]), [3, 2, 1])
%!assert (lookup ([3:-1:1], [3.5, 3, 1.2, 2.5, 2.5]), [0, 1, 2, 1, 1])
%!assert (isempty (lookup ([1:3], [])))
%!assert (isempty (lookup ([1:3]', [])))
%!assert (lookup (1:3, [1, 2; 3, 0.5]), [1, 2; 3, 0])
%!assert (lookup (1:4, [1, 1.2; 3, 2.5], "m"), [1, 0; 3, 0])
%!assert (lookup (4:-1:1, [1, 1.2; 3, 2.5], "m"), [4, 0; 2, 0])
%!assert (lookup (1:4, [1, 1.2; 3, 2.5], "b"), logical ([1, 0; 3, 0]))
%!assert (lookup (4:-1:1, [1, 1.2; 3, 2.5], "b"), logical ([4, 0; 2, 0]))
%!
%!assert (lookup ({"apple","lemon","orange"}, {"banana","kiwi"; "ananas","mango"}), [1,1;0,2])
%!assert (lookup ({"apple","lemon","orange"}, "potato"), 3)
%!assert (lookup ({"orange","lemon","apple"}, "potato"), 0)
*/