Mercurial > octave

/*

We are using the pure parser interface and the reentrant lexer interface
but the Octave parser and lexer are NOT properly reentrant because both
still use many global variables.  It should be safe to create a parser
object and call it while another parser object is active (to parse a
callback function while the main interactive parser is waiting for
input, for example) if you take care to properly save and restore
(typically with an unwind_protect object) relevant global values before
and after the nested call.

*/

%top {
////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 1993-2022 The Octave Project Developers
//
// See the file COPYRIGHT.md in the top-level directory of this
// distribution or <https://octave.org/copyright/>.
//
// This file is part of Octave.
//
// Octave is free software: you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Octave is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Octave; see the file COPYING.  If not, see
// <https://www.gnu.org/licenses/>.
//
////////////////////////////////////////////////////////////////////////

#if defined (HAVE_CONFIG_H)
#  include "config.h"
#endif

#if defined (HAVE_PRAGMA_GCC_DIAGNOSTIC)
   // This one needs to be global.
#  pragma GCC diagnostic ignored "-Wunused-function"
   // Disable these warnings for code that is generated by flex,
   // including pattern rules.  Push the current state so we can
   // restore the warning state prior to functions we define at
   // the bottom of the file.
#  pragma GCC diagnostic push
#  pragma GCC diagnostic ignored "-Wold-style-cast"
#  pragma GCC diagnostic ignored "-Wsign-compare"
#  pragma GCC diagnostic ignored "-Wzero-as-null-pointer-constant"
#  if defined (HAVE_WARN_IMPLICIT_FALLTHROUGH)
#    pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
#  endif
#endif

// Define away the deprecated register storage class specifier to avoid
// potential warnings about it.
#if ! defined (register)
#  define register
#endif

}

%option prefix = "octave_"
%option noyywrap
%option reentrant
%option bison-bridge

%option noyyalloc
%option noyyrealloc
%option noyyfree

%x COMMAND_START
%s MATRIX_START

%x INPUT_FILE_START

%x BLOCK_COMMENT_START
%x LINE_COMMENT_START

%x DQ_STRING_START
%x SQ_STRING_START

%x FQ_IDENT_START

%{

#include <cctype>
#include <cstring>

#include <algorithm>
#include <iostream>
#include <set>
#include <sstream>
#include <string>
#include <stack>

#include "cmd-edit.h"
#include "lo-mappers.h"
#include "quit.h"
#include "unistd-wrappers.h"

// These would be alphabetical, but oct-parse.h must be included before
// oct-gperf.h and oct-parse.h must be included after token.h and the tree
// class declarations.  We can't include oct-parse.h in oct-gperf.h
// because it may not be protected to allow it to be included multiple
// times.

#include "Cell.h"
#include "defun.h"
#include "error.h"
#include "errwarn.h"
#include "input.h"
#include "interpreter.h"
#include "lex.h"
#include "octave.h"
#include "ov-magic-int.h"
#include "ov.h"
#include "parse.h"
#include "pt-all.h"
#include "symtab.h"
#include "token.h"
#include "utils.h"
#include "variables.h"
#include "oct-parse.h"
#include "oct-gperf.h"

// FIXME: with bison 3.x, OCTAVE_STYPE appears in the generated
// oct-parse.h file, but there is no definition for YYSTYPE, which is
// needed by the code that is generated by flex.  I can't seem to find a
// way to tell flex to use OCTAVE_STYPE instead of YYSTYPE in the code
// it generates, or to tell bison to provide the definition of YYSTYPE
// in the generated oct-parse.h file.

#if defined (OCTAVE_STYPE_IS_DECLARED) && ! defined YYSTYPE
#  define YYSTYPE OCTAVE_STYPE
#endif

#define YY_NO_UNISTD_H 1
#define isatty octave_isatty_wrapper

#if ! (defined (FLEX_SCANNER)                                           \
       && defined (YY_FLEX_MAJOR_VERSION) && YY_FLEX_MAJOR_VERSION >= 2 \
       && defined (YY_FLEX_MINOR_VERSION) && YY_FLEX_MINOR_VERSION >= 5)
#error lex.l requires flex version 2.5.4 or later
#endif

#define YY_EXTRA_TYPE octave::base_lexer *
#define curr_lexer yyextra

// Arrange to get input via readline.

#if defined (YY_INPUT)
#  undef YY_INPUT
#endif
#define YY_INPUT(buf, result, max_size)                 \
  result = curr_lexer->fill_flex_buffer (buf, max_size)

// Try to avoid crashing out completely on fatal scanner errors.

#if defined (YY_FATAL_ERROR)
#  undef YY_FATAL_ERROR
#endif
#define YY_FATAL_ERROR(msg)                     \
   (yyget_extra (yyscanner))->fatal_error (msg)

#define CMD_OR_OP(PATTERN, TOK, COMPAT)                                 \
   do                                                                   \
     {                                                                  \
       curr_lexer->lexer_debug (PATTERN);                               \
                                                                        \
       if (curr_lexer->looks_like_command_arg ())                       \
         {                                                              \
           yyless (0);                                                  \
           curr_lexer->push_start_state (COMMAND_START);                \
         }                                                              \
       else                                                             \
         return curr_lexer->handle_op (TOK, false, COMPAT);             \
     }                                                                  \
   while (0)

#define CMD_OR_DEPRECATED_OP(PATTERN, REPLACEMENT, VERSION, TOK)        \
   do                                                                   \
     {                                                                  \
       curr_lexer->lexer_debug (PATTERN);                               \
                                                                        \
       if (curr_lexer->looks_like_command_arg ())                       \
         {                                                              \
           yyless (0);                                                  \
           curr_lexer->push_start_state (COMMAND_START);                \
         }                                                              \
       else                                                             \
         {                                                              \
           curr_lexer->warn_deprecated_operator (PATTERN, REPLACEMENT,  \
                                                 #VERSION);             \
           /* set COMPAT to true here to avoid warning about            \
              compatibility since we've already warned about the        \
              operator being deprecated.  */                            \
           return curr_lexer->handle_op (TOK, false, true);             \
         }                                                              \
     }                                                                  \
   while (0)

#define CMD_OR_UNARY_OP(PATTERN, TOK, COMPAT)                           \
   do                                                                   \
     {                                                                  \
       curr_lexer->lexer_debug (PATTERN);                               \
                                                                        \
       if (curr_lexer->previous_token_may_be_command ())                \
         {                                                              \
           if (curr_lexer->looks_like_command_arg ())                   \
             {                                                          \
               yyless (0);                                              \
               curr_lexer->push_start_state (COMMAND_START);            \
             }                                                          \
           else                                                         \
             return curr_lexer->handle_op (TOK, false, COMPAT);         \
         }                                                              \
       else                                                             \
         {                                                              \
           if (curr_lexer->maybe_unput_comma_before_unary_op (TOK))     \
             {                                                          \
               yyless (0);                                              \
               curr_lexer->xunput (',');                                \
             }                                                          \
           else                                                         \
             return curr_lexer->handle_op (TOK, false, COMPAT);         \
         }                                                              \
     }                                                                  \
   while (0)

#define HANDLE_EOB_OR_EOF(STATUS)                       \
   do                                                   \
     {                                                  \
       if (curr_lexer->is_push_lexer ())                \
         {                                              \
           if (curr_lexer->at_end_of_buffer ())         \
             return STATUS;                             \
                                                        \
           if (curr_lexer->at_end_of_file ())           \
             return curr_lexer->handle_end_of_input (); \
         }                                              \
     }                                                  \
   while (0)

// If we are at the end of the buffer, ask for more input.
// If we are at the end of the file, deal with it.
// Otherwise, just keep going with the text from the current buffer.
#define HANDLE_STRING_CONTINUATION                      \
   do                                                   \
     {                                                  \
       curr_lexer->m_filepos.next_line ();              \
                                                        \
       HANDLE_EOB_OR_EOF (-1);                          \
     }                                                  \
   while (0)

#define HANDLE_NUMBER(PATTERN, BASE)                            \
  do                                                            \
    {                                                           \
     curr_lexer->lexer_debug (PATTERN);                         \
                                                                \
     if (curr_lexer->previous_token_may_be_command ()           \
         &&  curr_lexer->space_follows_previous_token ())       \
       {                                                        \
         yyless (0);                                            \
         curr_lexer->push_start_state (COMMAND_START);          \
       }                                                        \
     else                                                       \
       {                                                        \
         int tok = curr_lexer->previous_token_value ();         \
                                                                \
         if (curr_lexer->whitespace_is_significant ()           \
             && curr_lexer->space_follows_previous_token ()     \
             && ! (tok == '[' || tok == '{'                     \
                   || curr_lexer->previous_token_is_binop ()))  \
           {                                                    \
             yyless (0);                                        \
             curr_lexer->xunput (',');                          \
           }                                                    \
         else                                                   \
           return curr_lexer->handle_number<BASE> ();           \
       }                                                        \
    }                                                           \
  while (0)

#define HANDLE_IDENTIFIER(pattern, get_set)                             \
   do                                                                   \
     {                                                                  \
       curr_lexer->lexer_debug (pattern);                               \
                                                                        \
       int tok = curr_lexer->previous_token_value ();                   \
                                                                        \
       if (curr_lexer->whitespace_is_significant ()                     \
           && curr_lexer->space_follows_previous_token ()               \
           && ! (tok == '[' || tok == '{'                               \
                 || curr_lexer->previous_token_is_binop ()))            \
         {                                                              \
           yyless (0);                                                  \
           curr_lexer->xunput (',');                                    \
         }                                                              \
       else                                                             \
         {                                                              \
           if (! curr_lexer->m_looking_at_decl_list                     \
               && curr_lexer->previous_token_may_be_command ())         \
             {                                                          \
               yyless (0);                                              \
               curr_lexer->push_start_state (COMMAND_START);            \
             }                                                          \
           else                                                         \
             {                                                          \
               if (get_set)                                             \
                 {                                                      \
                   yyless (3);                                          \
                   curr_lexer->m_filepos.increment_column (3);          \
                   curr_lexer->m_maybe_classdef_get_set_method = false; \
                 }                                                      \
                                                                        \
               return curr_lexer->handle_identifier ();                 \
             }                                                          \
         }                                                              \
     }                                                                  \
   while (0)

static inline bool
is_space_or_tab (char c)
{
  return c == ' ' || c == '\t';
}

static inline bool
is_space_or_tab_or_eol (char c)
{
  return c == ' ' || c == '\t' || c == '\n' || c == '\r';
}

OCTAVE_BEGIN_NAMESPACE(octave)

  bool iskeyword (const std::string& s)
  {
    // Parsing function names like "set.property_name" inside
    // classdef-style class definitions is simplified by handling the
    // "set" and "get" portions of the names using the same mechanism
    // as is used for keywords.  However, they are not really keywords
    // in the language, so omit them from the list of possible
    // keywords.  Likewise for "arguments", "enumeration", "events",
    // "methods", and "properties".

    // FIXME: The following check is duplicated in Fiskeyword.
    return (octave_kw_hash::in_word_set (s.c_str (), s.length ()) != nullptr
            && ! (s == "set" || s == "get" || s == "arguments"
                  || s == "enumeration" || s == "events"
                  || s == "methods" || s == "properties"));
  }

OCTAVE_END_NAMESPACE(octave)

%}

D       [0-9]
D_      [0-9_]
S       [ \t]
NL      ((\n)|(\r)|(\r\n))
CCHAR   [#%]
IDENT   ([_$a-zA-Z][_$a-zA-Z0-9]*)
FQIDENT ({IDENT}({S}*\.{S}*{IDENT})*)

%{
// Decimal numbers may be real or imaginary but always create
// double precision constants initially.  Any conversion to single
// precision happens as part of an expression evaluation in the
// interpreter, not the lexer and parser.
%}

DECIMAL_DIGITS ({D}{D_}*)
EXPONENT       ([DdEe][+-]?{DECIMAL_DIGITS})
REAL_DECIMAL   ((({DECIMAL_DIGITS}\.?)|({DECIMAL_DIGITS}?\.{DECIMAL_DIGITS})){EXPONENT}?)
IMAG_DECIMAL   ({REAL_DECIMAL}[IiJj])
DECIMAL_NUMBER ({REAL_DECIMAL}|{IMAG_DECIMAL})

%{
// It is possible to specify signedness and size for binary and
// hexadecimal numbers but there is no special syntax for imaginary
// constants.  Binary and hexadecimal constants always create integer
// valued constants ({u,}int{8,16,32,64}).  If a size is not specified,
// the smallest integer type that will hold the value is used.  Negative
// values may be created with a signed size specification by applying
// twos-complement conversion (for example, 0xffs8 produces an 8-bit
// signed integer equal to -1 and 0b10000000s8 produces an 8-bit signed
// integer equal to -128).
%}

SIZE_SUFFIX        ([su](8|16|32|64))
BINARY_BITS        (0[bB][01][01_]*)
BINARY_NUMBER      ({BINARY_BITS}|{BINARY_BITS}{SIZE_SUFFIX})
HEXADECIMAL_BITS   (0[xX][0-9a-fA-F][0-9a-fA-F_]*)
HEXADECIMAL_NUMBER ({HEXADECIMAL_BITS}|{HEXADECIMAL_BITS}{SIZE_SUFFIX})

ANY_EXCEPT_NL [^\r\n]
ANY_INCLUDING_NL (.|{NL})

%%

%{
// Make script and function files start with an invalid token. This makes
// the parser go down a special path.
%}

<INPUT_FILE_START>{ANY_INCLUDING_NL} {
    curr_lexer->lexer_debug ("<INPUT_FILE_START>{ANY_INCLUDING_NL}");

    curr_lexer->xunput (yytext[0]);

    // May be reset later if we see "function" or "classdef" appears
    // as the first token.
    curr_lexer->m_reading_script_file = true;

    curr_lexer->pop_start_state ();

    return curr_lexer->show_token (INPUT_FILE);
  }

<INPUT_FILE_START><<EOF>> {
    curr_lexer->lexer_debug ("<INPUT_FILE_START><<EOF>>");

    // May be reset later if we see "function" or "classdef" appears
    // as the first token.
    curr_lexer->m_reading_script_file = true;

    curr_lexer->pop_start_state ();

    return curr_lexer->show_token (INPUT_FILE);
  }

%{
// Help and other command-style functions.
%}

%{
// Commands can be continued on a second line using the ellipsis.
// If an argument is in construction, it is completed.
%}

<COMMAND_START>(\.\.\.){ANY_EXCEPT_NL}*{NL} {
    curr_lexer->lexer_debug ("<COMMAND_START>(\\.\\.\\.){ANY_EXCEPT_NL}*{NL}");

    if (! curr_lexer->m_string_text.empty ())
      {
        yyless (0);
        curr_lexer->m_tok_end = curr_lexer->m_filepos;
        return curr_lexer->finish_command_arg ();
      }

    HANDLE_STRING_CONTINUATION;
  }

%{
// Commands normally end at the end of a line or a semicolon.
%}

<COMMAND_START>({CCHAR}{ANY_EXCEPT_NL}*)?{NL} {
    curr_lexer->lexer_debug ("<COMMAND_START>({CCHAR}{ANY_EXCEPT_NL}*)?{NL}");

    if (! curr_lexer->m_string_text.empty ())
      {
        yyless (0);
        curr_lexer->m_tok_end = curr_lexer->m_filepos;
        return curr_lexer->finish_command_arg ();
      }

    curr_lexer->update_token_positions (yyleng);

    curr_lexer->m_filepos.next_line ();
    curr_lexer->m_looking_for_object_index = false;
    curr_lexer->m_at_beginning_of_statement = true;
    curr_lexer->pop_start_state ();

    return curr_lexer->handle_token ('\n');
  }

<COMMAND_START>[\,\;] {
    curr_lexer->lexer_debug ("<COMMAND_START>[\\,\\;]");

    if (yytext[0] != ',' || curr_lexer->m_command_arg_paren_count == 0)
      {
        if (! curr_lexer->m_string_text.empty ())
          {
            yyless (0);
            curr_lexer->m_tok_end = curr_lexer->m_filepos;
            return curr_lexer->finish_command_arg ();
          }

        curr_lexer->update_token_positions (yyleng);

        curr_lexer->m_looking_for_object_index = false;
        curr_lexer->m_at_beginning_of_statement = true;
        curr_lexer->pop_start_state ();

        return curr_lexer->handle_token (yytext[0]);
      }
    else
      {
        curr_lexer->m_string_text += yytext;
        curr_lexer->m_filepos.increment_column (yyleng);
      }
  }

%{
// Unbalanced parentheses serve as pseudo-quotes: they are included in
// the final argument string, but they cause parentheses and quotes to
// be slurped into that argument as well.
%}

<COMMAND_START>[\(\[\{]* {
    curr_lexer->lexer_debug ("<COMMAND_START>[\\(\\[\\{]+");

    curr_lexer->m_command_arg_paren_count += yyleng;
    curr_lexer->m_string_text += yytext;
    curr_lexer->m_filepos.increment_column (yyleng);
  }

<COMMAND_START>[\)\]\}]* {
   curr_lexer->lexer_debug ("<COMMAND_START>[\\)\\]\\}]+");

   curr_lexer->m_command_arg_paren_count -= yyleng;
   curr_lexer->m_string_text += yytext;
   curr_lexer->m_filepos.increment_column (yyleng);
}

%{
// Handle quoted strings.  Quoted strings that are not separated by
// whitespace from other argument text are combined with that previous
// text.  For instance,
//
//   command 'text1'"text2"
//
// has a single argument text1text2, not two separate arguments.
// That's why we must test to see if we are in command argument mode
// when processing the end of a string.
%}

<COMMAND_START>[\"\'] {
    curr_lexer->lexer_debug ("<COMMAND_START>[\\\"\\']");

    if (curr_lexer->m_command_arg_paren_count == 0)
      curr_lexer->begin_string (yytext[0] == '"'
                                ? DQ_STRING_START : SQ_STRING_START);
    else
      curr_lexer->m_string_text += yytext;

    curr_lexer->m_filepos.increment_column (yyleng);
  }

%{
// In standard command argument processing, whitespace separates
// arguments.  In the presence of unbalanced parentheses, it is
// incorporated into the argument.
%}

<COMMAND_START>{S}* {
    curr_lexer->lexer_debug ("<COMMAND_START>{S}*");

    if (curr_lexer->m_command_arg_paren_count == 0)
      {
        if (! curr_lexer->m_string_text.empty ())
          {
            yyless (0);
            curr_lexer->m_tok_end = curr_lexer->m_filepos;
            return curr_lexer->finish_command_arg ();
          }
      }
    else
      curr_lexer->m_string_text += yytext;

    curr_lexer->m_filepos.increment_column (yyleng);
  }

%{
// Everything else is slurped into the command arguments.
%}

<COMMAND_START>([\.]|[^#% \t\r\n\.\,\;\"\'\(\[\{\}\]\)]*) {
    curr_lexer->lexer_debug ("<COMMAND_START>([\\.]|[^#% \\t\\r\\n\\.\\,\\;\\\"\\'\\(\\[\\{\\}\\]\\)]*");

    curr_lexer->m_string_text += yytext;
    curr_lexer->m_filepos.increment_column (yyleng);
  }

%{
// Whitespace inside matrix lists.
%}

<MATRIX_START>{S}* {
    curr_lexer->lexer_debug ("<MATRIX_START>{S}*");

    curr_lexer->m_filepos.increment_column (yyleng);

    curr_lexer->mark_previous_token_trailing_space ();
  }

<MATRIX_START>{NL} {
    curr_lexer->lexer_debug ("<MATRIX_START>{NL}");

    curr_lexer->m_filepos.next_line ();

    if (curr_lexer->m_nesting_level.is_paren ())
      curr_lexer->warn_language_extension ("bare newline inside parentheses");
    else
      {
        int tok = curr_lexer->previous_token_value ();

        if (! (tok == ';' || tok == '[' || tok == '{'))
          curr_lexer->xunput (';');
      }
  }

%{
// Continuation lines in matrix constants are handled as whitespace.
// Allow arbitrary text after the continuation marker.
%}

<MATRIX_START>\.\.\.{ANY_EXCEPT_NL}*{NL} {
    curr_lexer->lexer_debug ("<MATRIX_START>\\.\\.\\.{ANY_EXCEPT_NL}*{NL}");

    curr_lexer->handle_continuation ();

    // Even if there wasn't a space before or after the continuation
    // marker, treat the continuation as if it were.  But since it will
    // be transformed to a separator later anyway, there's no need to
    // actually unput a space on the input stream.

    curr_lexer->mark_previous_token_trailing_space ();
  }

%{
// For this and the next two rules, we're looking at ']', and we
// need to know if the next token is '=' or '=='.
//
// It would have been so much easier if the delimiters were simply
// different for the expression on the left hand side of the equals
// operator.
//
// It's also a pain in the ass to decide whether to insert a comma
// after seeing a ']' character...

// FIXME: we need to handle block comments here.
%}

<MATRIX_START>\] {
    curr_lexer->lexer_debug ("<MATRIX_START>\\]");

    curr_lexer->update_token_positions (yyleng);
    return curr_lexer->handle_close_bracket (']');
  }

%{
// FIXME: we need to handle block comments here.
%}

<MATRIX_START>\} {
    curr_lexer->lexer_debug ("<MATRIX_START>\\}*");

    curr_lexer->update_token_positions (yyleng);
    return curr_lexer->handle_close_bracket ('}');
  }

\[ {
    curr_lexer->lexer_debug ("\\[");

        bool unput_comma = false;

    if (curr_lexer->whitespace_is_significant ()
        && curr_lexer->space_follows_previous_token ())
      {
        int tok = curr_lexer->previous_token_value ();

        if (! (tok == '[' || tok == '{'
               || curr_lexer->previous_token_is_binop ()))
          unput_comma = true;
      }

    if (unput_comma)
      {
        yyless (0);
        curr_lexer->xunput (',');
      }
    else
      {
        curr_lexer->update_token_positions (yyleng);

        curr_lexer->m_nesting_level.bracket ();

        curr_lexer->m_looking_at_object_index.push_front (false);

        curr_lexer->m_looking_for_object_index = false;
        curr_lexer->m_at_beginning_of_statement = false;

        if (curr_lexer->m_defining_fcn
            && ! curr_lexer->m_parsed_function_name.top ())
          curr_lexer->m_looking_at_return_list = true;
        else
          curr_lexer->m_looking_at_matrix_or_assign_lhs = true;

        curr_lexer->m_bracketflag++;

        curr_lexer->push_start_state (MATRIX_START);

        return curr_lexer->count_token ('[');
      }
  }

\] {
    curr_lexer->lexer_debug ("\\]");

    curr_lexer->update_token_positions (yyleng);

    curr_lexer->m_nesting_level.remove ();

    curr_lexer->m_looking_at_object_index.pop_front ();

    curr_lexer->m_looking_for_object_index = true;
    curr_lexer->m_at_beginning_of_statement = false;

    return curr_lexer->handle_token (']');
  }

%{
// Gobble comments.  Both BLOCK_COMMENT_START and LINE_COMMENT_START
// are exclusive start states.  We try to grab a continuous series of
// line-oriented comments as a single collection of comments.
%}

%{
// Start of a block comment.  Since comment start states are exclusive,
// this pattern will not match a block comment that immediately follows
// a line-oriented comment.  All we need to do is push the matched text
// back on the input stream and push the new start state.
%}

^{S}*{CCHAR}\{{S}*{NL} {
    curr_lexer->lexer_debug ("^{S}*{CCHAR}\\{{S}*{NL}");

    yyless (0);

    curr_lexer->push_start_state (BLOCK_COMMENT_START);
  }

<BLOCK_COMMENT_START>^{S}*{CCHAR}\{{S}*{NL} {
    curr_lexer->lexer_debug ("<BLOCK_COMMENT_START>^{S}*{CCHAR}\\{{S}*{NL}");

    curr_lexer->m_filepos.next_line ();

    if (curr_lexer->m_block_comment_nesting_level)
      curr_lexer->m_comment_text = "\n";

    curr_lexer->m_block_comment_nesting_level++;

    HANDLE_EOB_OR_EOF (-1);
  }

%{
// End of a block comment.  If this block comment is nested inside
// another, wait for the outermost block comment to be closed before
// storing the comment.

// NOTE: This pattern must appear before the one below.  Both may match
// the same text and this one should take precedence over the one that
// follows.
%}

<BLOCK_COMMENT_START>^{S}*{CCHAR}\}{S}*{NL} {
    curr_lexer->lexer_debug ("<BLOCK_COMMENT_START>^{S}*{CCHAR}\\}{S}*{NL}");

    curr_lexer->m_filepos.next_line ();

    if (curr_lexer->m_block_comment_nesting_level > 1)
      curr_lexer->m_comment_text = "\n";
    else
      curr_lexer->finish_comment (octave::comment_elt::block);

    curr_lexer->m_block_comment_nesting_level--;

    int status = -1;

    if (curr_lexer->m_block_comment_nesting_level == 0)
      {
        status = -2;

        curr_lexer->pop_start_state ();
      }

    HANDLE_EOB_OR_EOF (status);
  }

%{
// Body of a block comment.
%}

<BLOCK_COMMENT_START>{ANY_EXCEPT_NL}*{NL} {
    curr_lexer->lexer_debug ("<BLOCK_COMMENT_START>{ANY_EXCEPT_NL}*{NL}");

    curr_lexer->m_filepos.next_line ();
    curr_lexer->m_comment_text += yytext;

    HANDLE_EOB_OR_EOF (-1);
  }

%{
// Full-line or end-of-line comment.
%}

{S}*{CCHAR}{ANY_EXCEPT_NL}*{NL} {
    curr_lexer->lexer_debug ("{S}*{CCHAR}{ANY_EXCEPT_NL}*{NL}");

    curr_lexer->push_start_state (LINE_COMMENT_START);
    yyless (0);
  }

%{
// Beginning of a block comment while we are looking at a series of
// line-oriented comments.  Finish previous comment, push current
// text back on input stream, and switch start states.

// NOTE: This pattern must appear before the one below.  Both may match
// the same text and this one should take precedence over the one that
// follows.
%}

<LINE_COMMENT_START>^{S}*{CCHAR}\{{S}*{NL} {
    curr_lexer->lexer_debug ("<LINE_COMMENT_START>^{S}*{CCHAR}\\{{S}*{NL}");

    if (! curr_lexer->m_comment_text.empty ())
      curr_lexer->finish_comment (octave::comment_elt::full_line);

    curr_lexer->pop_start_state ();
    curr_lexer->push_start_state (BLOCK_COMMENT_START);
    yyless (0);
  }

%{
// Line-oriented comment.  If we are at the beginning of a line, this is
// part of a series of full-line comments.  Otherwise, this is an end of
// line comment.  We don't need to parse the matched text to determine
// whether we are looking at the start of a block comment as that
// pattern is handled above.

// NOTE: This pattern must appear before the one below.  Both may match
// the same text and this one should take precedence over the one that
// follows.
%}

<LINE_COMMENT_START>{S}*{CCHAR}{ANY_EXCEPT_NL}*{NL} {
    curr_lexer->lexer_debug ("<LINE_COMMENT_START>{S}*{CCHAR}{ANY_EXCEPT_NL}*{NL}");

    // Grab text of comment without leading space or comment
    // characters.

    std::size_t i = 0;
    while (i < yyleng && is_space_or_tab (yytext[i]))
      i++;

    bool have_space = (i > 0);

    while (i < yyleng && (yytext[i] == '#' || yytext[i] == '%'))
      i++;

    curr_lexer->m_comment_text += &yytext[i];

    if (curr_lexer->m_filepos.column () == 1)
      {
        curr_lexer->m_filepos.next_line ();
      }
    else
      {
        // End of line comment.

        if (have_space)
          curr_lexer->mark_previous_token_trailing_space ();

        curr_lexer->finish_comment (octave::comment_elt::end_of_line);

        curr_lexer->pop_start_state ();

        // Push the newline character back on the input and skip
        // incrementing the line count so we don't have to duplicate
        // all the possible actions that happen with newlines here.

        curr_lexer->xunput ('\n');

        // The next action should recognize a newline character and set
        // the input column back to 1, but we should try to keep the
        // input column location accurate anyway, so update here.
        curr_lexer->m_filepos.increment_column (yyleng);
      }
  }

%{
// End of a series of full-line because some other character was
// found on the input stream.
%}

<LINE_COMMENT_START>{ANY_INCLUDING_NL} {
    curr_lexer->lexer_debug ("<LINE_COMMENT_START>{ANY_INCLUDING_NL}");

    if (yytext[0] == '\001')
      {
        // We are here because we are using the push parser/lexer
        // interface and we hit the end of the input buffer or file.
        // The special ASCII 1 marker is added to the input by
        // push_lexer::fill_flex_buffer.

        if (curr_lexer->pending_token_count () > 0)
          {
            // We are in the middle of parsing a command, expresison,
            // etc., so set the return status so that if we are at the
            // end of the buffer we'll continue looking for more input,
            // possibly buffering a series of line oriented comments as
            // a single block.

            HANDLE_EOB_OR_EOF (-1);
          }
        else
          {
            // We are not in the process of parsing a command,
            // expression, etc., so end any current sequence of comments
            // with this full line comment, pop the start state and
            // return as if we have just finished parsing a complete
            // statement.

            curr_lexer->finish_comment (octave::comment_elt::full_line);

            curr_lexer->pop_start_state ();

            HANDLE_EOB_OR_EOF (-2);
          }
      }
    else
      {
        // End any current sequence of comments, pop the start state,
        // and unput the pending input character that ended the series
        // of comments.

        curr_lexer->finish_comment (octave::comment_elt::full_line);

        curr_lexer->pop_start_state ();

        curr_lexer->xunput (yytext[0]);
      }
  }

%{
// End of file will also end a series of full-line comments.
%}

<LINE_COMMENT_START><<EOF>> {
    curr_lexer->lexer_debug ("<LINE_COMMENT_START><<EOF>>");

    curr_lexer->finish_comment (octave::comment_elt::full_line);

    curr_lexer->pop_start_state ();
  }

%{
// Double-quoted character strings.
%}

<DQ_STRING_START>\"\" {
    curr_lexer->lexer_debug ("<DQ_STRING_START>\\\"\\\"");

    curr_lexer->m_filepos.increment_column (yyleng);
    curr_lexer->m_string_text += '"';
  }

<DQ_STRING_START>\" {
    curr_lexer->lexer_debug ("<DQ_STRING_START>\\\"");

    // m_tok_beg was set when we started parsing the string.
    curr_lexer->m_tok_end = curr_lexer->m_filepos;
    curr_lexer->m_filepos.increment_column ();

    curr_lexer->pop_start_state ();

    if (curr_lexer->start_state() != COMMAND_START)
      {
        curr_lexer->m_looking_for_object_index = true;
        curr_lexer->m_at_beginning_of_statement = false;

        curr_lexer->push_token (new octave::token (DQ_STRING,
                                                   curr_lexer->m_string_text,
                                                   curr_lexer->m_tok_beg,
                                                   curr_lexer->m_tok_end));

        curr_lexer->m_string_text = "";

        return curr_lexer->count_token_internal (DQ_STRING);
      }
  }

<DQ_STRING_START>\\[0-7]{1,3} {
    curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\[0-7]{1,3}");

    curr_lexer->update_token_positions (yyleng);

    unsigned int result;
    sscanf (yytext+1, "%o", &result);

    if (result > 0xff)
      {
        // Use location of octal digits for error token.
        octave::token *tok
          = new octave::token (LEXICAL_ERROR,
                               "invalid octal escape sequence in character string",
                               curr_lexer->m_tok_beg, curr_lexer->m_tok_end);

        curr_lexer->push_token (tok);

        return curr_lexer->count_token_internal (LEXICAL_ERROR);
      }
    else
      curr_lexer->m_string_text += static_cast<unsigned char> (result);
  }

<DQ_STRING_START>\\x[0-9a-fA-F]+ {
    curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\x[0-9a-fA-F]+");

    curr_lexer->m_filepos.increment_column (yyleng);

    unsigned int result;
    sscanf (yytext+2, "%x", &result);

    // Truncate the value silently instead of checking the range like
    // we do for octal above.  This is to match C/C++ where any number
    // of digits is allowed but the value is implementation-defined if
    // it exceeds the range of the character type.
    curr_lexer->m_string_text += static_cast<unsigned char> (result);
  }

<DQ_STRING_START>"\\a" {
    curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\a\"");

    curr_lexer->m_filepos.increment_column (yyleng);
    curr_lexer->m_string_text += '\a';
  }

<DQ_STRING_START>"\\b" {
    curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\b\"");

    curr_lexer->m_filepos.increment_column (yyleng);
    curr_lexer->m_string_text += '\b';
  }

<DQ_STRING_START>"\\f" {
    curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\f\"");

    curr_lexer->m_filepos.increment_column (yyleng);
    curr_lexer->m_string_text += '\f';
  }

<DQ_STRING_START>"\\n" {
    curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\n\"");

    curr_lexer->m_filepos.increment_column (yyleng);
    curr_lexer->m_string_text += '\n';
  }

<DQ_STRING_START>"\\r" {
    curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\r\"");

    curr_lexer->m_filepos.increment_column (yyleng);
    curr_lexer->m_string_text += '\r';
  }

<DQ_STRING_START>"\\t" {
    curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\t\"");

    curr_lexer->m_filepos.increment_column (yyleng);
    curr_lexer->m_string_text += '\t';
  }

<DQ_STRING_START>"\\v" {
    curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\v\"");

    curr_lexer->m_filepos.increment_column (yyleng);
    curr_lexer->m_string_text += '\v';
  }

<DQ_STRING_START>(\.\.\.){S}*{NL} {
    curr_lexer->lexer_debug ("<DQ_STRING_START>(\\.\\.\\.){S}*{NL}");

    /* FIXME: Remove support for '...' continuation in Octave 9 */
    static const char *msg = "'...' continuations in double-quoted character strings were deprecated in version 7 and will not be allowed in a future version of Octave; please use '\\' instead";

    curr_lexer->warn_deprecated_syntax (msg);

    HANDLE_STRING_CONTINUATION;
  }

<DQ_STRING_START>\\{S}+{NL} {
    curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\{S}+{NL}");

    /* FIXME: Remove support for WS after line continuation in Octave 9 */
    static const char *msg = "whitespace after continuation markers in double-quoted character strings were deprecated in version 7 and will not be allowed in a future version of Octave";

    curr_lexer->warn_deprecated_syntax (msg);

    HANDLE_STRING_CONTINUATION;
  }

<DQ_STRING_START>\\{NL} {
    curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\{NL}");

    HANDLE_STRING_CONTINUATION;
  }

<DQ_STRING_START>\\. {
    curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\.");

    curr_lexer->m_filepos.increment_column (yyleng);
    curr_lexer->m_string_text += yytext[1];
  }

<DQ_STRING_START>\. {
    curr_lexer->lexer_debug ("<DQ_STRING_START>\\.");

    curr_lexer->m_filepos.increment_column ();
    curr_lexer->m_string_text += yytext[0];
  }

<DQ_STRING_START>[^\.\\\r\n\"]+ {
    curr_lexer->lexer_debug ("<DQ_STRING_START>[^\\.\\\\\\r\\n\\\"]+");

    curr_lexer->m_filepos.increment_column (yyleng);
    curr_lexer->m_string_text += yytext;
  }

<DQ_STRING_START>{NL} {
    curr_lexer->lexer_debug ("<DQ_STRING_START>{NL}");

    // Use current file position for error token.
    octave::token *tok
      = new octave::token (LEXICAL_ERROR,
                           "unterminated character string constant",
                           curr_lexer->m_filepos, curr_lexer->m_filepos);

    curr_lexer->push_token (tok);

    curr_lexer->m_filepos.next_line ();

    return curr_lexer->count_token_internal (LEXICAL_ERROR);
  }

%{
// Single-quoted character strings.
%}

<SQ_STRING_START>\'\' {
    curr_lexer->lexer_debug ("<SQ_STRING_START>\\'\\'");

    curr_lexer->m_filepos.increment_column (yyleng);
    curr_lexer->m_string_text += '\'';
  }

<SQ_STRING_START>\' {
    curr_lexer->lexer_debug ("<SQ_STRING_START>\\'");

    // m_tok_beg was set when we started parsing the string.
    curr_lexer->m_tok_end = curr_lexer->m_filepos;
    curr_lexer->m_filepos.increment_column ();

    curr_lexer->pop_start_state ();

    if (curr_lexer->start_state() != COMMAND_START)
      {
        curr_lexer->m_looking_for_object_index = true;
        curr_lexer->m_at_beginning_of_statement = false;

        curr_lexer->push_token (new octave::token (SQ_STRING,
                                                   curr_lexer->m_string_text,
                                                   curr_lexer->m_tok_beg,
                                                   curr_lexer->m_tok_end));

        curr_lexer->m_string_text = "";

        return curr_lexer->count_token_internal (SQ_STRING);
      }
  }

<SQ_STRING_START>[^\'\n\r]+ {
    curr_lexer->lexer_debug ("<SQ_STRING_START>[^\\'\\n\\r]+");

    curr_lexer->m_filepos.increment_column (yyleng);
    curr_lexer->m_string_text += yytext;
  }

<SQ_STRING_START>{NL} {
    curr_lexer->lexer_debug ("<SQ_STRING_START>{NL}");

    // Use current file position for error token.
    octave::token *tok
      = new octave::token (LEXICAL_ERROR,
                           "unterminated character string constant",
                           curr_lexer->m_filepos, curr_lexer->m_filepos);

    curr_lexer->push_token (tok);

    curr_lexer->m_filepos.next_line ();

    return curr_lexer->count_token_internal (LEXICAL_ERROR);
  }

%{
// Fully-qualified identifiers (used for classdef).
%}

<FQ_IDENT_START>{FQIDENT} {
    curr_lexer->lexer_debug ("<FQ_IDENT_START>{FQIDENT}{S}*");

    curr_lexer->pop_start_state ();

    curr_lexer->update_token_positions (yyleng);

    int id_tok = curr_lexer->handle_fq_identifier ();

    if (id_tok >= 0)
      {
        curr_lexer->m_looking_for_object_index = true;

        return curr_lexer->count_token_internal (id_tok);
      }
  }

<FQ_IDENT_START>{S}+ {
    curr_lexer->lexer_debug ("<FQ_IDENT_START>{S}+");

    curr_lexer->m_filepos.increment_column (yyleng);

    curr_lexer->mark_previous_token_trailing_space ();
  }

<FQ_IDENT_START>(\.\.\.){ANY_EXCEPT_NL}*{NL} {
    curr_lexer->lexer_debug ("<FQ_IDENT_START>(\\.\\.\\.){ANY_EXCEPT_NL}*{NL}");

    curr_lexer->m_filepos.next_line ();
  }

<FQ_IDENT_START>{ANY_INCLUDING_NL} {
    curr_lexer->lexer_debug ("<FQ_IDENT_START>{ANY_INCLUDING_NL}");

    // If input doesn't match FQIDENT, return char and go to previous
    // start state.

    yyless (0);
    curr_lexer->pop_start_state ();
  }

{BINARY_NUMBER} {
    HANDLE_NUMBER ("{BINARY_NUMBER}", 2);
  }

%{
// Decimal numbers.  For expressions that are just digits followed
// directly by an element-by-element operator, don't grab the '.'
// part of the operator as part of the constant (for example, in an
// expression like "13./x").
%}

{DECIMAL_DIGITS}/\.[\*/\\^\'] |
{DECIMAL_NUMBER} {
    HANDLE_NUMBER ("{DECIMAL_DIGITS}/\\.[\\*/\\\\^\\']|{DECIMAL_NUMBER}", 10);
  }

{HEXADECIMAL_NUMBER} {
    HANDLE_NUMBER ("{HEXADECIMAL_NUMBER}", 16);
  }

%{
// Eat whitespace.  Whitespace inside matrix constants is handled by
// the <MATRIX_START> start state code above.
%}

{S}+ {
    curr_lexer->m_filepos.increment_column (yyleng);

    curr_lexer->mark_previous_token_trailing_space ();
  }

%{
// Continuation lines.  Allow arbitrary text after continuations.
%}

\.\.\.{ANY_EXCEPT_NL}*{NL} {
    curr_lexer->lexer_debug ("\\.\\.\\.{ANY_EXCEPT_NL}*{NL}");

    curr_lexer->handle_continuation ();
  }

%{
// Deprecated C preprocessor style continuation markers.
%}

\\{S}*{NL} |
\\{S}*{CCHAR}{ANY_EXCEPT_NL}*{NL} {
    curr_lexer->lexer_debug ("\\\\{S}*{NL}|\\\\{S}*{CCHAR}{ANY_EXCEPT_NL}*{NL}");

    /* FIXME: Remove support for '\\' line continuation in Octave 9 */
    static const char *msg = "using continuation marker \\ outside of double quoted strings was deprecated in version 7 and will be removed from a future version of Octave, use ... instead";

    curr_lexer->warn_deprecated_syntax (msg);

    curr_lexer->handle_continuation ();
  }

%{
// End of file.
%}

<<EOF>> {
   return curr_lexer->handle_end_of_input ();
  }

%{
// Identifiers.

// Don't allow get and set to be recognized as keywords if they are
// followed by "(".
%}

(set|get){S}*\( {
    HANDLE_IDENTIFIER ("(set|get){S}*\\(", true);
  }

{IDENT} {
    HANDLE_IDENTIFIER ("{IDENT}", false);
  }

%{
// Superclass method identifiers.
%}

{FQIDENT}{S}*@{S}*{FQIDENT} {
    curr_lexer->lexer_debug ("{FQIDENT}{S}*@{S}*{FQIDENT}");

    if (curr_lexer->previous_token_may_be_command ())
      {
        yyless (0);
        curr_lexer->push_start_state (COMMAND_START);
      }
    else
      {
        if (curr_lexer->m_at_beginning_of_statement)
          {
            std::string txt = yytext;

            std::size_t at_or_dot_pos = txt.find_first_of ("@.");

            if (at_or_dot_pos != std::string::npos)
              {
                std::size_t spc_pos = txt.find_first_of (" \t");

                if (spc_pos != std::string::npos && spc_pos < at_or_dot_pos)
                  {
                    yyless (spc_pos);
                    curr_lexer->m_filepos.increment_column (spc_pos);

                    return curr_lexer->handle_identifier ();
                  }
              }
          }

        curr_lexer->m_looking_for_object_index = true;
        curr_lexer->m_at_beginning_of_statement = false;

        return curr_lexer->handle_superclass_identifier ();
      }
  }

%{
// Metaclass query
%}

\?{S}*{FQIDENT} {
    curr_lexer->lexer_debug ("\\?{S}*{FQIDENT}");

    if (curr_lexer->previous_token_may_be_command ()
        &&  curr_lexer->space_follows_previous_token ())
      {
        yyless (0);
        curr_lexer->push_start_state (COMMAND_START);
      }
    else
      {
        curr_lexer->update_token_positions (yyleng);

        int id_tok = curr_lexer->handle_meta_identifier ();

        if (id_tok >= 0)
          {
            curr_lexer->m_looking_for_object_index = true;

            return curr_lexer->count_token_internal (id_tok);
          }
      }
  }

\@ |
\@{S}*{FQIDENT} {
    curr_lexer->lexer_debug ("\\@|\\@{S}*{FQIDENT}");

    if (curr_lexer->previous_token_may_be_command ()
        &&  curr_lexer->space_follows_previous_token ())
      {
        yyless (0);
        curr_lexer->push_start_state (COMMAND_START);
      }
    else
      {
        int tok_val = curr_lexer->previous_token_value ();

        if (curr_lexer->whitespace_is_significant ()
            && curr_lexer->space_follows_previous_token ()
            && ! (tok_val == '[' || tok_val == '{'
                  || curr_lexer->previous_token_is_binop ()))
          {
            yyless (0);
            curr_lexer->xunput (',');
          }
        else
          {
            curr_lexer->update_token_positions (yyleng);

            curr_lexer->m_at_beginning_of_statement = false;

            std::string ident = yytext;

            if (ident == "@")
              {
                curr_lexer->m_looking_at_function_handle++;
                curr_lexer->m_looking_for_object_index = false;

                return curr_lexer->count_token ('@');
              }
            else
              {
                ident = ident.substr (1);
                ident.erase (std::remove_if (ident.begin (), ident.end (),
                                             is_space_or_tab), ident.end ());

                octave::token *tok;

                if (octave::iskeyword (ident))
                  tok = new octave::token (LEXICAL_ERROR,
                                           "function handles may not refer to keywords",
                                           curr_lexer->m_tok_beg,
                                           curr_lexer->m_tok_end);
                else
                  {
                    curr_lexer->m_looking_for_object_index = true;

                    tok = new octave::token (FCN_HANDLE, ident,
                                             curr_lexer->m_tok_beg,
                                             curr_lexer->m_tok_end);
                  }

                curr_lexer->push_token (tok);

                return curr_lexer->count_token_internal (tok->token_value ());
              }
          }
      }
  }

%{
// A new line character.  New line characters inside matrix constants
// are handled by the <MATRIX_START> start state code above.  If closest
// nesting is inside parentheses, don't return a row separator.
%}

{NL} {
    curr_lexer->lexer_debug ("{NL}");

    if (curr_lexer->m_nesting_level.is_paren ())
      {
        curr_lexer->m_filepos.next_line ();

        curr_lexer->m_at_beginning_of_statement = false;
        curr_lexer->warn_language_extension
          ("bare newline inside parentheses");
      }
    else if (curr_lexer->m_nesting_level.none ()
        || curr_lexer->m_nesting_level.is_anon_fcn_body ())
      {
        curr_lexer->update_token_positions (yyleng);
        curr_lexer->m_filepos.next_line ();

        curr_lexer->m_at_beginning_of_statement = true;

        return curr_lexer->count_token ('\n');
      }
    else if (curr_lexer->m_nesting_level.is_bracket_or_brace ())
      {
        curr_lexer->update_token_positions (yyleng);
        curr_lexer->m_filepos.next_line ();

        // Use current file position for error token.
        octave::token *tok
          = new octave::token (LEXICAL_ERROR,
                               "unexpected internal lexer error",
                               curr_lexer->m_filepos, curr_lexer->m_filepos);

        curr_lexer->push_token (tok);

        return curr_lexer->count_token_internal (LEXICAL_ERROR);
      }
  }

%{
// Single quote can either be the beginning of a string or a transpose
// operator.
%}

"'" {
    curr_lexer->lexer_debug ("'");

    if (curr_lexer->previous_token_may_be_command ()
        &&  curr_lexer->space_follows_previous_token ())
      {
        curr_lexer->m_filepos.increment_column ();
        curr_lexer->push_start_state (COMMAND_START);
        curr_lexer->begin_string (SQ_STRING_START);
      }
    else if (curr_lexer->m_at_beginning_of_statement)
      {
        curr_lexer->m_filepos.increment_column ();
        curr_lexer->begin_string (SQ_STRING_START);
      }
    else
      {
        int tok = curr_lexer->previous_token_value ();

        if (curr_lexer->whitespace_is_significant ())
          {
            if (curr_lexer->space_follows_previous_token ())
              {
                if (tok == '[' || tok == '{'
                    || curr_lexer->previous_token_is_binop ())
                  {
                    curr_lexer->m_filepos.increment_column ();
                    curr_lexer->begin_string (SQ_STRING_START);
                  }
                else
                  {
                    yyless (0);
                    curr_lexer->xunput (',');
                  }
              }
            else
              {
                if (tok == '[' || tok == '{'
                    || curr_lexer->previous_token_is_binop ()
                    || curr_lexer->previous_token_is_keyword ())
                  {
                    curr_lexer->m_filepos.increment_column ();
                    curr_lexer->begin_string (SQ_STRING_START);
                  }
                else
                  {
                    curr_lexer->m_filepos.increment_column ();
                    return curr_lexer->count_token (HERMITIAN);
                  }
              }
          }
        else
          {
            if (! tok || tok == '[' || tok == '{' || tok == '('
                || curr_lexer->previous_token_is_binop ()
                || curr_lexer->previous_token_is_keyword ())
              {
                curr_lexer->m_filepos.increment_column ();
                curr_lexer->begin_string (SQ_STRING_START);
              }
            else
              {
                curr_lexer->m_filepos.increment_column ();
                return curr_lexer->count_token (HERMITIAN);
              }
          }
      }
  }

%{
// Double quotes always begin strings.
%}

\" {
    curr_lexer->lexer_debug ("\\\"");

    if (curr_lexer->previous_token_may_be_command ()
        &&  curr_lexer->space_follows_previous_token ())
      {
        curr_lexer->m_filepos.increment_column ();
        curr_lexer->push_start_state (COMMAND_START);
        curr_lexer->begin_string (DQ_STRING_START);
      }
    else
      {
        int tok = curr_lexer->previous_token_value ();

        if (curr_lexer->whitespace_is_significant ())
          {
            if (curr_lexer->space_follows_previous_token ())
              {
                if (tok == '[' || tok == '{'
                    || curr_lexer->previous_token_is_binop ())
                  {
                    curr_lexer->m_filepos.increment_column ();
                    curr_lexer->begin_string (DQ_STRING_START);
                  }
                else
                  {
                    yyless (0);
                    curr_lexer->xunput (',');
                  }
              }
            else
              {
                curr_lexer->m_filepos.increment_column ();
                curr_lexer->begin_string (DQ_STRING_START);
              }
          }
        else
          {
            curr_lexer->m_filepos.increment_column ();
            curr_lexer->begin_string (DQ_STRING_START);
          }
      }
  }

%{
// Other operators.
%}

":"   { CMD_OR_OP (":", ':', true); }
".+"  { CMD_OR_DEPRECATED_OP (".+", "+", 7, '+'); }
".-"  { CMD_OR_DEPRECATED_OP (".-", "-", 7, '-'); }
".*"  { CMD_OR_OP (".*", EMUL, true); }
"./"  { CMD_OR_OP ("./", EDIV, true); }
".\\" { CMD_OR_OP (".\\", ELEFTDIV, true); }
".^"  { CMD_OR_OP (".^", EPOW, true); }
".**" { CMD_OR_DEPRECATED_OP (".**", ".^", 7, EPOW); }
"<="  { CMD_OR_OP ("<=", EXPR_LE, true); }
"=="  { CMD_OR_OP ("==", EXPR_EQ, true); }
"!="  { CMD_OR_OP ("!=", EXPR_NE, false); }
"~="  { CMD_OR_OP ("~=", EXPR_NE, true); }
">="  { CMD_OR_OP (">=", EXPR_GE, true); }
"&"   { CMD_OR_OP ("&", EXPR_AND, true); }
"|"   { CMD_OR_OP ("|", EXPR_OR, true); }
"<"   { CMD_OR_OP ("<", EXPR_LT, true); }
">"   { CMD_OR_OP (">", EXPR_GT, true); }
"*"   { CMD_OR_OP ("*", '*', true); }
"/"   { CMD_OR_OP ("/", '/', true); }

%{
// In Matlab, '\' may also trigger command syntax.
%}

"\\" {
    // FIXME: After backslash is no longer handled as a line
    // continuation marker outside of character strings, this
    // action may be replaced with
    //
    //   CMD_OR_OP ("\\", LEFTDIV, true);

    curr_lexer->lexer_debug ("\\");

    return curr_lexer->handle_op (LEFTDIV);
  }

"^"   { CMD_OR_OP ("^", POW, true); }
"**"  { CMD_OR_DEPRECATED_OP ("**", "^", 7, POW); }
"&&"  { CMD_OR_OP ("&&", EXPR_AND_AND, true); }
"||"  { CMD_OR_OP ("||", EXPR_OR_OR, true); }

";" {
    curr_lexer->lexer_debug (";");

    bool at_beginning_of_statement
      = (! (curr_lexer->whitespace_is_significant ()
            || curr_lexer->m_looking_at_object_index.front ()));

    return curr_lexer->handle_op (';', at_beginning_of_statement);
  }

"+" { CMD_OR_UNARY_OP ("+", '+', true); }
"-" { CMD_OR_UNARY_OP ("-", '-', true); }

"~" { CMD_OR_UNARY_OP ("~", '~', true); }
"!" { CMD_OR_UNARY_OP ("!", '!', false); }

"," {
    curr_lexer->lexer_debug (",");

    bool at_beginning_of_statement
      = (! (curr_lexer->whitespace_is_significant ()
            || curr_lexer->m_looking_at_object_index.front ()));

    return curr_lexer->handle_op (',', at_beginning_of_statement);
  }

".'" {
    curr_lexer->lexer_debug (".'");

    return curr_lexer->handle_op (TRANSPOSE);
  }

"++" { CMD_OR_UNARY_OP ("++", PLUS_PLUS, false); }
"--" { CMD_OR_UNARY_OP ("--", MINUS_MINUS, false); }

"(" {
    curr_lexer->lexer_debug ("(");

    bool unput_comma = false;

    if (curr_lexer->whitespace_is_significant ()
        && curr_lexer->space_follows_previous_token ())
      {
        int tok = curr_lexer->previous_token_value ();

        if (! (tok == '[' || tok == '{'
               || curr_lexer->previous_token_is_binop ()))
          unput_comma = true;
      }

    if (unput_comma)
      {
        yyless (0);
        curr_lexer->xunput (',');
      }
    else
      {
        curr_lexer->update_token_positions (yyleng);

        // If we are looking for an object index, then push TRUE for
        // m_looking_at_object_index.  Otherwise, just push whatever state
        // is current (so that we can pop it off the stack when we find
        // the matching close paren).

        curr_lexer->m_looking_at_object_index.push_front
          (curr_lexer->m_looking_for_object_index);

        curr_lexer->m_looking_at_indirect_ref = false;
        curr_lexer->m_looking_for_object_index = false;
        curr_lexer->m_at_beginning_of_statement = false;

        curr_lexer->m_nesting_level.paren ();

        return curr_lexer->handle_token ('(');
      }
  }

")" {
    curr_lexer->lexer_debug (")");

    curr_lexer->update_token_positions (yyleng);

    curr_lexer->m_nesting_level.remove ();

    curr_lexer->m_looking_at_object_index.pop_front ();

    curr_lexer->m_looking_for_object_index = true;
    curr_lexer->m_at_beginning_of_statement = false;

    if (curr_lexer->m_looking_at_anon_fcn_args)
      {
        curr_lexer->m_looking_at_anon_fcn_args = false;
        curr_lexer->m_nesting_level.anon_fcn_body ();
      }

    return curr_lexer->count_token (')');
  }

"." {
    curr_lexer->lexer_debug (".");

    if (curr_lexer->previous_token_may_be_command ()
        && curr_lexer->space_follows_previous_token ())
      {
        yyless (0);
        curr_lexer->push_start_state (COMMAND_START);
      }
    else
      {
        curr_lexer->update_token_positions (yyleng);

        curr_lexer->m_looking_for_object_index = false;
        curr_lexer->m_at_beginning_of_statement = false;

        return curr_lexer->handle_token ('.');
      }
  }

%{
// = and op= operators.
%}

"=" {
    curr_lexer->lexer_debug ("=");

    return curr_lexer->handle_op ('=');
  }

"+="   { CMD_OR_OP ("+=", ADD_EQ, false); }
"-="   { CMD_OR_OP ("-=", SUB_EQ, false); }
"*="   { CMD_OR_OP ("*=", MUL_EQ, false); }
"/="   { CMD_OR_OP ("/=", DIV_EQ, false); }
"\\="  { CMD_OR_OP ("\\=", LEFTDIV_EQ, false); }
".+="  { CMD_OR_DEPRECATED_OP (".+=", "+=", 7, ADD_EQ); }
".-="  { CMD_OR_DEPRECATED_OP (".-=", "-=", 7, SUB_EQ); }
".*="  { CMD_OR_OP (".*=", EMUL_EQ, false); }
"./="  { CMD_OR_OP ("./=", EDIV_EQ, false); }
".\\=" { CMD_OR_OP (".\\=", ELEFTDIV_EQ, false); }
"^="   { CMD_OR_OP ("^=", POW_EQ, false); }
"**="  { CMD_OR_DEPRECATED_OP ("**=", "^=", 7, POW_EQ); }
".^="  { CMD_OR_OP (".^=", EPOW_EQ, false); }
".**=" { CMD_OR_DEPRECATED_OP (".**=", ".^=", 7, EPOW_EQ); }
"&="   { CMD_OR_OP ("&=", AND_EQ, false); }
"|="   { CMD_OR_OP ("|=", OR_EQ, false); }

%{
// In Matlab, '{' may also trigger command syntax.
%}

"{" {
    curr_lexer->lexer_debug ("{");

    bool unput_comma = false;

    if (curr_lexer->whitespace_is_significant ()
        && curr_lexer->space_follows_previous_token ())
      {
        int tok = curr_lexer->previous_token_value ();

        if (! (tok == '[' || tok == '{'
               || curr_lexer->previous_token_is_binop ()))
          unput_comma = true;
      }

    if (unput_comma)
      {
        yyless (0);
        curr_lexer->xunput (',');
      }
    else
      {
        curr_lexer->m_nesting_level.brace ();

        curr_lexer->m_looking_at_object_index.push_front
          (curr_lexer->m_looking_for_object_index);

        curr_lexer->m_filepos.increment_column (yyleng);
        curr_lexer->m_looking_for_object_index = false;
        curr_lexer->m_at_beginning_of_statement = false;

        curr_lexer->m_braceflag++;

        curr_lexer->push_start_state (MATRIX_START);

        return curr_lexer->count_token ('{');
      }
  }

"}" {
    curr_lexer->lexer_debug ("}");

    curr_lexer->update_token_positions (yyleng);

    curr_lexer->m_looking_at_object_index.pop_front ();

    curr_lexer->m_looking_for_object_index = true;
    curr_lexer->m_at_beginning_of_statement = false;

    curr_lexer->m_nesting_level.remove ();

    return curr_lexer->handle_token ('}');
  }

%{
// Unrecognized input.  If the previous token may be a command and is
// followed by a space, parse the remainder of this statement as a
// command-style function call.  Otherwise, unrecognized input is a
// lexical error.
%}

. {
    curr_lexer->lexer_debug (".");

    curr_lexer->xunput (yytext[0]);

    int c = curr_lexer->text_yyinput ();

    if (c == 1)
      return -1;
    else if (c == EOF)
      return curr_lexer->handle_end_of_input ();
    else if (curr_lexer->previous_token_may_be_command ()
             && curr_lexer->space_follows_previous_token ())
      {
        yyless (0);
        curr_lexer->push_start_state (COMMAND_START);
      }
    else
      {
        std::ostringstream buf;

        buf << "invalid character '"
            << octave::undo_string_escape (static_cast<char> (c))
            << "' (ASCII " << c << ")";

        // Use current file position for error token.
        octave::token *tok
          = new octave::token (LEXICAL_ERROR, buf.str (),
                               curr_lexer->m_filepos, curr_lexer->m_filepos);

        curr_lexer->push_token (tok);

        curr_lexer->m_filepos.increment_column ();

        return curr_lexer->count_token_internal (LEXICAL_ERROR);
      }
  }

%{
#if defined (HAVE_PRAGMA_GCC_DIAGNOSTIC)
   // Disable these warnings for flex code.
#  pragma GCC diagnostic ignored "-Wold-style-cast"
#  pragma GCC diagnostic ignored "-Wunused-parameter"
#endif
%}

%%

#if defined (HAVE_PRAGMA_GCC_DIAGNOSTIC)
   // Restore prevailing warning state for remainder of the file.
#  pragma GCC diagnostic pop
#endif

void *
octave_alloc (yy_size_t size, yyscan_t)
{
  return std::malloc (size);
}

void *
octave_realloc (void *ptr, yy_size_t size, yyscan_t)
{
  return std::realloc (ptr, size);
}

void
octave_free (void *ptr, yyscan_t)
{
  std::free (ptr);
}

static void
display_character (char c)
{
  if (isgraph (c))
    std::cerr << c;
  else
    switch (c)
      {
      case 0:
        std::cerr << "NUL";
        break;

      case 1:
        std::cerr << "SOH";
        break;

      case 2:
        std::cerr << "STX";
        break;

      case 3:
        std::cerr << "ETX";
        break;

      case 4:
        std::cerr << "EOT";
        break;

      case 5:
        std::cerr << "ENQ";
        break;

      case 6:
        std::cerr << "ACK";
        break;

      case 7:
        std::cerr << "\\a";
        break;

      case 8:
        std::cerr << "\\b";
        break;

      case 9:
        std::cerr << "\\t";
        break;

      case 10:
        std::cerr << "\\n";
        break;

      case 11:
        std::cerr << "\\v";
        break;

      case 12:
        std::cerr << "\\f";
        break;

      case 13:
        std::cerr << "\\r";
        break;

      case 14:
        std::cerr << "SO";
        break;

      case 15:
        std::cerr << "SI";
        break;

      case 16:
        std::cerr << "DLE";
        break;

      case 17:
        std::cerr << "DC1";
        break;

      case 18:
        std::cerr << "DC2";
        break;

      case 19:
        std::cerr << "DC3";
        break;

      case 20:
        std::cerr << "DC4";
        break;

      case 21:
        std::cerr << "NAK";
        break;

      case 22:
        std::cerr << "SYN";
        break;

      case 23:
        std::cerr << "ETB";
        break;

      case 24:
        std::cerr << "CAN";
        break;

      case 25:
        std::cerr << "EM";
        break;

      case 26:
        std::cerr << "SUB";
        break;

      case 27:
        std::cerr << "ESC";
        break;

      case 28:
        std::cerr << "FS";
        break;

      case 29:
        std::cerr << "GS";
        break;

      case 30:
        std::cerr << "RS";
        break;

      case 31:
        std::cerr << "US";
        break;

      case 32:
        std::cerr << "SPACE";
        break;

      case 127:
        std::cerr << "DEL";
        break;
      }
}

OCTAVE_BEGIN_NAMESPACE(octave)

DEFUN (iskeyword, args, ,
       doc: /* -*- texinfo -*-
@deftypefn  {} {} iskeyword ()
@deftypefnx {} {} iskeyword (@var{name})
Return true if @var{name} is an Octave keyword.

If @var{name} is omitted, return a list of keywords.
@seealso{isvarname, exist}
@end deftypefn */)
{
  octave_value retval;

  int nargin = args.length ();

  if (nargin > 1)
    print_usage ();

  if (nargin == 0)
    {
      // Neither set nor get are keywords.  See the note in the
      // iskeyword function for additional details.

      string_vector lst (TOTAL_KEYWORDS);

      int j = 0;

      for (int i = 0; i < TOTAL_KEYWORDS; i++)
        {
          std::string kword = wordlist[i].name;

          // FIXME: The following check is duplicated in iskeyword.
          if (! (kword == "set" || kword == "get" || kword == "arguments"
                 || kword == "enumeration" || kword == "events"
                 || kword == "methods" || kword == "properties"))
            lst[j++] = kword;
        }

      lst.resize (j);

      retval = Cell (lst.sort ());
    }
  else
    {
      std::string name = args(0).xstring_value ("iskeyword: NAME must be a string");
      retval = iskeyword (name);
    }

  return retval;
}

/*

%!assert (iskeyword ("for"))
%!assert (iskeyword ("fort"), false)
%!assert (iskeyword ("fft"), false)
%!assert (iskeyword ("get"), false)
%!assert (iskeyword ("set"), false)

%!error iskeyword ("A", "B")
%!error <NAME must be a string> iskeyword (1)

*/

  void
  lexical_feedback::symbol_table_context::clear (void)
  {
    while (! m_frame_stack.empty ())
      m_frame_stack.pop_front ();
  }

  void
  lexical_feedback::symbol_table_context::pop (void)
  {
    if (empty ())
      panic_impossible ();

    m_frame_stack.pop_front ();
  }

  symbol_scope
  lexical_feedback::symbol_table_context::curr_scope (void) const
  {
    if (empty ())
      return m_interpreter.get_current_scope ();
    else
      return m_frame_stack.front ();
  }

  symbol_scope
  lexical_feedback::symbol_table_context::parent_scope (void) const
  {
    std::size_t sz = size ();

    return (sz > 1
            ? m_frame_stack[1]
            : (sz == 1 ? m_frame_stack[0] : symbol_scope ()));
  }

  lexical_feedback::~lexical_feedback (void)
  {
    m_tokens.clear ();
  }

  void
  lexical_feedback::init (void)
  {
    // The closest paren, brace, or bracket nesting is not an object
    // index.
    m_looking_at_object_index.push_front (false);
  }

  void
  lexical_feedback::reset (void)
  {
    m_end_of_input = false;
    m_allow_command_syntax = true;
    m_at_beginning_of_statement = true;
    m_looking_at_anon_fcn_args = false;
    m_looking_at_return_list = false;
    m_looking_at_parameter_list = false;
    m_looking_at_decl_list = false;
    m_looking_at_matrix_or_assign_lhs = false;
    m_looking_for_object_index = false;
    m_looking_at_indirect_ref = false;
    m_arguments_is_keyword = false;
    m_classdef_element_names_are_keywords = false;
    m_parsing_anon_fcn_body = false;
    m_parsing_class_method = false;
    m_parsing_classdef = false;
    m_parsing_classdef_decl = false;
    m_parsing_classdef_superclass = false;
    m_maybe_classdef_get_set_method = false;
    m_parsing_classdef_get_method = false;
    m_parsing_classdef_set_method = false;
    m_quote_is_transpose = false;
    m_force_script = false;
    m_reading_fcn_file = false;
    m_reading_script_file = false;
    m_reading_classdef_file = false;
    m_buffer_function_text = false;
    m_bracketflag = 0;
    m_braceflag = 0;
    m_looping = 0;
    m_defining_fcn = 0;
    m_looking_at_function_handle = 0;
    m_block_comment_nesting_level = 0;
    m_command_arg_paren_count = 0;
    m_token_count = 0;
    m_filepos = filepos (1, 1);
    m_tok_beg = filepos ();
    m_tok_end = filepos ();
    m_string_text = "";
    m_current_input_line = "";
    m_comment_text = "";
    m_help_text = "";
    m_function_text = "";
    m_fcn_file_name = "";
    m_fcn_file_full_name = "";
    m_dir_name = "";
    m_package_name = "";
    m_looking_at_object_index.clear ();
    m_looking_at_object_index.push_front (false);

    while (! m_parsed_function_name.empty ())
      m_parsed_function_name.pop ();

    m_symtab_context.clear ();
    m_nesting_level.reset ();
    m_tokens.clear ();
  }

  int
  lexical_feedback::previous_token_value (void) const
  {
    const token *tok = m_tokens.front ();
    return tok ? tok->token_value () : 0;
  }

  bool
  lexical_feedback::previous_token_value_is (int tok_val) const
  {
    const token *tok = m_tokens.front ();
    return tok ? tok->token_value_is (tok_val) : false;
  }

  void
  lexical_feedback::mark_previous_token_trailing_space (void)
  {
    token *tok = m_tokens.front ();
    if (tok && ! previous_token_value_is ('\n'))
      tok->mark_trailing_space ();
  }

  bool
  lexical_feedback::space_follows_previous_token (void) const
  {
    const token *tok = m_tokens.front ();
    return tok ? tok->space_follows_token () : false;
  }

  bool
  lexical_feedback::previous_token_is_binop (void) const
  {
    int tok = previous_token_value ();

    return (tok == '+' || tok == '-' || tok == '@' || tok == '~' || tok == '!'
            || tok == ',' || tok == ';' || tok == '*' || tok == '/'
            || tok == ':' || tok == '=' || tok == ADD_EQ
            || tok == AND_EQ || tok == DIV_EQ || tok == EDIV
            || tok == EDIV_EQ || tok == ELEFTDIV || tok == ELEFTDIV_EQ
            || tok == EMUL || tok == EMUL_EQ
            || tok == EPOW || tok == EPOW_EQ || tok == EXPR_AND
            || tok == EXPR_AND_AND || tok == EXPR_EQ || tok == EXPR_GE
            || tok == EXPR_GT || tok == EXPR_LE || tok == EXPR_LT
            || tok == EXPR_NE || tok == EXPR_OR
            || tok == EXPR_OR_OR || tok == LEFTDIV || tok == LEFTDIV_EQ
            || tok == MUL_EQ || tok == OR_EQ || tok == POW
            || tok == POW_EQ || tok == SUB_EQ);
  }

  bool
  lexical_feedback::previous_token_is_keyword (void) const
  {
    const token *tok = m_tokens.front ();
    return tok ? tok->iskeyword () : false;
  }

  void
  lexical_feedback::mark_as_variable (const std::string& nm)
  {
    symbol_scope scope = m_symtab_context.curr_scope ();

    if (scope)
      scope.mark_as_variable (nm);
  }

  void
  lexical_feedback::mark_as_variables (const std::list<std::string>& lst)
  {
    symbol_scope scope = m_symtab_context.curr_scope ();

    if (scope)
      scope.mark_as_variables (lst);
  }

  bool
  lexical_feedback::previous_token_may_be_command (void) const
  {
    if (! m_allow_command_syntax)
      return false;

    const token *tok = m_tokens.front ();
    return tok ? tok->may_be_command () : false;
  }

static bool
looks_like_copyright (const std::string& s)
{
  if (s.empty ())
    return false;

  // Comment characters have been stripped but whitespace
  // (including newlines) remains.

  std::size_t offset = s.find_first_not_of (" \t\n\r");

  return (offset != std::string::npos
          && (s.substr (offset, 9) == "Copyright"
              || s.substr (offset, 6) == "Author"
              || s.substr (offset, 23) == "SPDX-License-Identifier"));
}

static bool
looks_like_shebang (const std::string& s)
{
  return ((! s.empty ()) && (s[0] == '!'));
}

  void
  base_lexer::input_buffer::fill (const std::string& input, bool eof_arg)
  {
    m_buffer = input;
    m_chars_left = m_buffer.length ();
    m_offset = 0;
    m_eof = eof_arg;
  }

  // If BY_LINES is true, return chunks to the lexer line by line.
  int
  base_lexer::input_buffer::copy_chunk (char *buf, std::size_t max_size,
                                        bool by_lines)
  {
    static const char * const eol = "\n";

    std::size_t len = 0;
    if (by_lines)
      {
        std::size_t newline_pos = m_buffer.find ('\n', m_offset);
        len = (newline_pos != std::string::npos
               ? newline_pos - m_offset + 1
               : (max_size > m_chars_left ? m_chars_left : max_size));
      }
    else
      len = max_size > m_chars_left ? m_chars_left : max_size;

    assert (len > 0);
    memcpy (buf, m_buffer.c_str () + m_offset, len);

    m_chars_left -= len;
    m_offset += len;

    // Make sure the final input returned to the lexer ends with a new
    // line character.

    if (m_chars_left == 0 && buf[len-1] != '\n')
      {
        if (len < max_size)
          {
            // There is enough room to plug the newline character in
            // the buffer.
            buf[len++] = '\n';
          }
        else
          {
            // There isn't enough room to plug the newline character
            // in BUF so arrange to have it returned on the next call
            // to base_lexer::read.

            // At this point we've exhausted the original input
            // (m_chars_left is zero) so we can overwrite the initial
            // buffer with a single newline character to be returned on
            // the next call.

            m_buffer = eol;
            m_chars_left = 1;
            m_offset = 0;
          }
      }

    return len;
  }

  base_lexer::~base_lexer (void)
  {
    yylex_destroy (m_scanner);
  }

  void
  base_lexer::init (void)
  {
    yylex_init (&m_scanner);

    // Make base_lexer object available through yyextra in
    // flex-generated lexer.
    yyset_extra (this, m_scanner);

    clear_start_state ();
  }

  // Inside Flex-generated functions, yyg is the scanner cast to its real
  // type.  Some flex macros that we use in base_lexer member functions
  // (for example, BEGIN) use yyg.  If we could perform the actions of
  // these macros with functions instead, we could eliminate the
  // OCTAVE_YYG macro.

#define OCTAVE_YYG                                                      \
  struct yyguts_t *yyg = static_cast<struct yyguts_t*> (m_scanner)

  void
  base_lexer::reset (void)
  {
    // Start off on the right foot.
    clear_start_state ();

    m_symtab_context.clear ();

    // Only ask for input from stdin if we are expecting interactive
    // input.

    if (m_interpreter.interactive ()
        && ! (m_reading_fcn_file
              || m_reading_classdef_file
              || m_reading_script_file
              || input_from_eval_string ()))
      yyrestart (stdin, m_scanner);

    lexical_feedback::reset ();

    m_comment_buf.reset ();
  }

  void
  base_lexer::prep_for_file (void)
  {
    m_reading_script_file = true;

    push_start_state (INPUT_FILE_START);
  }

  void
  base_lexer::begin_string (int state)
  {
    m_tok_beg = m_filepos;

    push_start_state (state);
  }

  int
  base_lexer::handle_end_of_input (void)
  {
    lexer_debug ("<<EOF>>");

    m_tok_beg = m_filepos;
    m_tok_end = m_filepos;

    if (m_block_comment_nesting_level != 0)
      {
        warning ("block comment unterminated at end of input");

        if ((m_reading_fcn_file || m_reading_script_file || m_reading_classdef_file)
            && ! m_fcn_file_name.empty ())
          warning ("near line %d of file '%s.m'",
                   m_filepos.line (), m_fcn_file_name.c_str ());
      }

    token *tok_val = new token (END_OF_INPUT, m_tok_beg, m_tok_end);

    push_token (tok_val);

    return count_token_internal (END_OF_INPUT);
  }

  char *
  base_lexer::flex_yytext (void)
  {
    return yyget_text (m_scanner);
  }

  int
  base_lexer::flex_yyleng (void)
  {
    return yyget_leng (m_scanner);
  }

  int
  base_lexer::text_yyinput (void)
  {
    int c = yyinput (m_scanner);

    if (debug_flag ())
      {
        std::cerr << "I: ";
        display_character (c);
        std::cerr << std::endl;
      }

    // Convert CRLF into just LF and single CR into LF.

    if (c == '\r')
      {
        c = yyinput (m_scanner);

        if (debug_flag ())
          {
            std::cerr << "I: ";
            display_character (c);
            std::cerr << std::endl;
          }

        if (c != '\n')
          {
            xunput (c);
            c = '\n';
          }
      }

    return c;
  }

  void
  base_lexer::xunput (char c, char *buf)
  {
    if (c != EOF)
      {
        if (debug_flag ())
          {
            std::cerr << "U: ";
            display_character (c);
            std::cerr << std::endl;
          }

        yyunput (c, buf, m_scanner);
      }
  }

  void
  base_lexer::xunput (char c)
  {
    char *yytxt = flex_yytext ();

    xunput (c, yytxt);
  }

  void
  base_lexer::update_token_positions (int tok_len)
  {
    m_tok_beg = m_filepos;
    m_tok_end = m_filepos;

    if (tok_len > 1)
      m_tok_end.increment_column (tok_len - 1);

    m_filepos.increment_column (tok_len);
  }

  bool
  base_lexer::looking_at_space (void)
  {
    int c = text_yyinput ();
    xunput (c);
    return is_space_or_tab (c);
  }

  bool
  base_lexer::inside_any_object_index (void)
  {
    bool retval = false;

    for (const bool is_obj_idx : m_looking_at_object_index)
      {
        if (is_obj_idx)
          {
            retval = true;
            break;
          }
      }

    return retval;
  }

  int
  base_lexer::make_keyword_token (const std::string& s)
  {
    // Token positions should have already been updated before this
    // function is called.

    int slen = s.length ();

    const octave_kw *kw = octave_kw_hash::in_word_set (s.c_str (), slen);

    if (! kw)
      return 0;

    bool previous_at_bos = m_at_beginning_of_statement;

    // May be reset to true for some token types.
    m_at_beginning_of_statement = false;

    token *tok_val = nullptr;

    switch (kw->kw_id)
      {
      case break_kw:
      case catch_kw:
      case continue_kw:
      case else_kw:
      case otherwise_kw:
      case return_kw:
      case unwind_protect_cleanup_kw:
        m_at_beginning_of_statement = true;
        break;

      case persistent_kw:
      case global_kw:
        m_looking_at_decl_list = true;
        break;

      case case_kw:
      case elseif_kw:
      case until_kw:
        break;

      case end_kw:
        if (inside_any_object_index ()
            || (m_defining_fcn
                && ! (m_looking_at_return_list
                      || m_parsed_function_name.top ())))
          {
            m_at_beginning_of_statement = previous_at_bos;
            return 0;
          }

        tok_val = new token (kw->tok, token::simple_end, m_tok_beg, m_tok_end);
        m_at_beginning_of_statement = true;
        break;

      case end_try_catch_kw:
        tok_val = new token (kw->tok, token::try_catch_end, m_tok_beg,
                             m_tok_end);
        m_at_beginning_of_statement = true;
        break;

      case end_unwind_protect_kw:
        tok_val = new token (kw->tok, token::unwind_protect_end, m_tok_beg,
                             m_tok_end);
        m_at_beginning_of_statement = true;
        break;

      case endfor_kw:
        tok_val = new token (kw->tok, token::for_end, m_tok_beg, m_tok_end);
        m_at_beginning_of_statement = true;
        break;

      case endfunction_kw:
        tok_val = new token (kw->tok, token::function_end, m_tok_beg,
                             m_tok_end);
        m_at_beginning_of_statement = true;
        break;

      case endif_kw:
        tok_val = new token (kw->tok, token::if_end, m_tok_beg, m_tok_end);
        m_at_beginning_of_statement = true;
        break;

      case endparfor_kw:
        tok_val = new token (kw->tok, token::parfor_end, m_tok_beg, m_tok_end);
        m_at_beginning_of_statement = true;
        break;

      case endswitch_kw:
        tok_val = new token (kw->tok, token::switch_end, m_tok_beg, m_tok_end);
        m_at_beginning_of_statement = true;
        break;

      case endwhile_kw:
        tok_val = new token (kw->tok, token::while_end, m_tok_beg, m_tok_end);
        m_at_beginning_of_statement = true;
        break;

      case endarguments_kw:
#if defined (DISABLE_ARGUMENTS_VALIDATION_BLOCK)
        return 0;
#else
        tok_val = new token (kw->tok, token::arguments_end, m_tok_beg,
                             m_tok_end);
        m_at_beginning_of_statement = true;
        break;
#endif

      case endclassdef_kw:
        tok_val = new token (kw->tok, token::classdef_end, m_tok_beg,
                             m_tok_end);
        m_at_beginning_of_statement = true;
        break;

      case endenumeration_kw:
        tok_val = new token (kw->tok, token::enumeration_end, m_tok_beg,
                             m_tok_end);
        m_at_beginning_of_statement = true;
        break;

      case endevents_kw:
        tok_val = new token (kw->tok, token::events_end, m_tok_beg,
                             m_tok_end);
        m_at_beginning_of_statement = true;
        break;

      case endmethods_kw:
        tok_val = new token (kw->tok, token::methods_end, m_tok_beg,
                             m_tok_end);
        m_at_beginning_of_statement = true;
        break;

      case endproperties_kw:
        tok_val = new token (kw->tok, token::properties_end, m_tok_beg,
                             m_tok_end);
        m_at_beginning_of_statement = true;
        break;

      case for_kw:
      case parfor_kw:
      case while_kw:
        m_looping++;
        break;

      case do_kw:
        m_at_beginning_of_statement = true;
        m_looping++;
        break;

      case try_kw:
      case unwind_protect_kw:
        m_at_beginning_of_statement = true;
        break;

      case if_kw:
      case switch_kw:
        break;

      case get_kw:
      case set_kw:
        // 'get' and 'set' are keywords in classdef method
        // declarations.
        if (! m_maybe_classdef_get_set_method)
          {
            m_at_beginning_of_statement = previous_at_bos;
            return 0;
          }
        break;

      case enumeration_kw:
      case events_kw:
      case methods_kw:
      case properties_kw:
        // 'properties', 'methods' and 'events' are keywords for
        // classdef blocks.
        if (! m_classdef_element_names_are_keywords)
          {
            m_at_beginning_of_statement = previous_at_bos;
            return 0;
          }
        // fall through ...

      case classdef_kw:
        // 'classdef' is always a keyword.
        if (! m_force_script && m_token_count == 0 && input_from_file ())
          {
            m_reading_classdef_file = true;
            m_reading_script_file = false;
          }
        break;

      case function_kw:
        m_defining_fcn++;
        m_parsed_function_name.push (false);

        if (! m_force_script && m_token_count == 0 && input_from_file ())
          {
            m_reading_fcn_file = true;
            m_reading_script_file = false;
          }

        // FIXME: should we be asking directly whether input is coming
        // from an eval string instead of that it is not coming from a
        // file?

        if (! (m_reading_fcn_file || m_reading_script_file
               || m_reading_classdef_file))
          {
            // Input must be coming from the terminal or stdin?
            m_buffer_function_text = true;
            m_function_text += (m_current_input_line + "\n");

            // FIXME: do we need to save and restore the file position
            // or just reset the line number here?  The goal is to
            // track line info for command-line functions relative
            // to the function keyword.  Should we really be setting
            // the line and column info to (1, 1) here?

            m_filepos = filepos (1, 1);
            update_token_positions (slen);
          }
        break;

      case arguments_kw:
#if defined (DISABLE_ARGUMENTS_VALIDATION_BLOCK)
        return 0;
#else
        if (! m_arguments_is_keyword)
          return 0;
        break;
#endif

      case spmd_kw:
        m_at_beginning_of_statement = true;
        break;

      case endspmd_kw:
        tok_val = new token (kw->tok, token::spmd_end, m_tok_beg, m_tok_end);
        m_at_beginning_of_statement = true;
        break;

      case magic_file_kw:
        {
          if ((m_reading_fcn_file || m_reading_script_file
               || m_reading_classdef_file)
              && ! m_fcn_file_full_name.empty ())
            tok_val = new token (kw->tok, m_fcn_file_full_name,
                                 m_tok_beg, m_tok_end);
          else
            tok_val = new token (kw->tok, "stdin", m_tok_beg, m_tok_end);
        }
        break;

      case magic_line_kw:
        {
          int l = m_tok_beg.line ();
          octave_value ov_value (static_cast<double> (l));
          tok_val = new token (kw->tok, ov_value, "", m_tok_beg, m_tok_end);
        }
        break;

      default:
        panic_impossible ();
      }

    if (! tok_val)
      tok_val = new token (kw->tok, true, m_tok_beg, m_tok_end);

    push_token (tok_val);

    return kw->tok;
  }

/*

## check if magic file and line keywords are working
%!assert <*62587> (ischar (__FILE__))
%!assert <*62587> (isnumeric (__LINE__))

*/

  bool
  base_lexer::fq_identifier_contains_keyword (const std::string& s)
  {
    std::size_t p1 = 0;
    std::size_t p2;

    std::string s_part;

    do
      {
        p2 = s.find ('.', p1);

        if (p2 != std::string::npos)
          {
            s_part = s.substr (p1, p2 - p1);
            p1 = p2 + 1;
          }
        else
          s_part = s.substr (p1);

        if (iskeyword (s_part))
          return true;
      }
    while (p2 != std::string::npos);

    return false;
  }

  bool
  base_lexer::whitespace_is_significant (void)
  {
    return (m_nesting_level.is_bracket ()
            || (m_nesting_level.is_brace ()
                && ! m_looking_at_object_index.front ()));
  }

static inline bool
looks_like_bin (const char *s, int len)
{
  return (len > 2 && s[0] == '0' && (s[1] == 'b' || s[1] == 'B'));
}

static inline bool
looks_like_hex (const char *s, int len)
{
  return (len > 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X'));
}

static inline octave_value
make_integer_value (uintmax_t long_int_val, bool unsigned_val, int bytes)
{
  if (unsigned_val)
    {
     switch (bytes)
       {
       case 1:
         return octave_value (octave_uint8 (long_int_val));

       case 2:
         return octave_value (octave_uint16 (long_int_val));

       case 4:
         return octave_value (octave_uint32 (long_int_val));

       case 8:
         return octave_value (octave_uint64 (long_int_val));

       default:
         panic_impossible ();
       };
    }
  else
    {
      // FIXME: Conversion to signed values is supposed to follow
      // twos-complement rules.  Do we need to be more carefule here?

      switch (bytes)
        {
        case 1:
          return octave_value (octave_int8 (int8_t (long_int_val)));

        case 2:
          return octave_value (octave_int16 (int16_t (long_int_val)));

        case 4:
          return octave_value (octave_int32 (int32_t (long_int_val)));

        case 8:
        return octave_value (octave_int64 (int64_t (long_int_val)));

        default:
          panic_impossible ();
        };
    }

  return octave_value ();
}

  template <>
  int
  base_lexer::handle_number<2> (void)
  {
    // Skip 0[bB] prefix.
    std::string yytxt (flex_yytext () + 2);

    yytxt.erase (std::remove (yytxt.begin (), yytxt.end (), '_'),
                 yytxt.end ());

    std::size_t pos = yytxt.find_first_of ("su");

    bool unsigned_val = true;
    int bytes = -1;

    if (pos == std::string::npos)
      {
        std::size_t num_digits = yytxt.length ();

        if (num_digits <= 8)
          bytes = 1;
        else if (num_digits <= 16)
          bytes = 2;
        else if (num_digits <= 32)
          bytes = 4;
        else if (num_digits <= 64)
          bytes = 8;
      }
    else
      {
        unsigned_val = (yytxt[pos] == 'u');
        std::string size_str = yytxt.substr (pos+1);
        yytxt = yytxt.substr (0, pos);
        std::size_t num_digits = yytxt.length ();

        if (size_str == "8" && num_digits <= 8)
          bytes = 1;
        else if (size_str == "16" && num_digits <= 16)
          bytes = 2;
        else if (size_str == "32" && num_digits <= 32)
          bytes = 4;
        else if (size_str == "64" && num_digits <= 64)
          bytes = 8;
      }

    if (bytes < 0)
      {
        token *tok
          = new token (LEXICAL_ERROR,
                       "too many digits for binary constant",
                       m_tok_beg, m_tok_end);

        push_token (tok);

        return count_token_internal (LEXICAL_ERROR);
      }

    // FIXME: is there a better way?  Can uintmax_t be anything other
    // than long or long long?  Should we just be using uint64_t instead
    // of uintmax_t?

    errno = 0;
    char *end;
    uintmax_t long_int_val;
    if (sizeof (uintmax_t) == sizeof (unsigned long long))
      long_int_val = strtoull (yytxt.c_str (), &end, 2);
    else if (sizeof (uintmax_t) == sizeof (unsigned long))
      long_int_val = strtoul (yytxt.c_str (), &end, 2);
    else
      panic_impossible ();

    if (errno == ERANGE)
      panic_impossible ();

    octave_value ov_value
      = make_integer_value (long_int_val, unsigned_val, bytes);

    m_looking_for_object_index = false;
    m_at_beginning_of_statement = false;

    update_token_positions (flex_yyleng ());

    push_token (new token (NUMBER, ov_value, yytxt, m_tok_beg, m_tok_end));

    return count_token_internal (NUMBER);
  }

  static uint64_t
  flintmax (void)
  {
    return (static_cast<uint64_t> (1) << std::numeric_limits<double>::digits);
  }

  template <>
  int
  base_lexer::handle_number<10> (void)
  {
    bool imag = false;
    bool digits_only = true;

    char *yytxt = flex_yytext ();
    std::size_t yylng = flex_yyleng ();

    OCTAVE_LOCAL_BUFFER (char, tmptxt, yylng + 1);
    char *rp = yytxt;
    char *p = &tmptxt[0];

    char ch;
    while ((ch = *rp++))
      {
        switch (ch)
          {
          case '_':
            break;

          case 'D':
          case 'd':
            *p++ = 'e';
            digits_only = false;
            break;

          case 'I':
          case 'J':
          case 'i':
          case 'j':
            // Octave does not provide imaginary integers.
            digits_only = false;
            imag = true;
            break;

          case '+':
          case '-':
          case '.':
          case 'E':
          case 'e':
            digits_only = false;
            *p++ = ch;
            break;

          default:
            *p++ = ch;
            break;
          }
      }

    *p = '\0';

    double value = 0.0;
    int nread = 0;

    nread = sscanf (tmptxt, "%lf", &value);

    // If yytext doesn't contain a valid number, we are in deep doo doo.

    assert (nread == 1);

    octave_value ov_value;

    // Use >= because > will not return true until value is greater than
    // flintmax + 2!

    if (digits_only && value >= flintmax ())
      {
        // Try reading as an unsigned 64-bit integer.  If there is a
        // range error, then create a double value.  Otherwise, create a
        // special uint64 object that will be automatically converted to
        // double unless it appears as the argument to one of the int64
        // or uint64 functions.

        errno = 0;
        char *end;
        uintmax_t long_int_val;
        if (sizeof (uintmax_t) == sizeof (unsigned long long))
          long_int_val = strtoull (tmptxt, &end, 10);
        else if (sizeof (uintmax_t) == sizeof (unsigned long))
          long_int_val = strtoul (tmptxt, &end, 10);
        else
          panic_impossible ();

        if (errno != ERANGE)
          {
            // If possible, store the value as a signed integer.

            octave_base_value *magic_int;
            if (long_int_val > std::numeric_limits<int64_t>::max ())
              magic_int = new octave_magic_uint (octave_uint64 (long_int_val));
            else
              magic_int = new octave_magic_int (octave_int64 (long_int_val));

            ov_value = octave_value (magic_int);
          }
      }

    m_looking_for_object_index = false;
    m_at_beginning_of_statement = false;

    update_token_positions (yylng);

    if (ov_value.is_undefined ())
      ov_value = (imag
                  ? octave_value (Complex (0.0, value))
                  : octave_value (value));

    push_token (new token (NUMBER, ov_value, yytxt, m_tok_beg, m_tok_end));

    return count_token_internal (NUMBER);
  }

  template <>
  int
  base_lexer::handle_number<16> (void)
  {
    // Skip 0[xX] prefix.
    std::string yytxt (flex_yytext () + 2);

    yytxt.erase (std::remove (yytxt.begin (), yytxt.end (), '_'),
                 yytxt.end ());

    std::size_t pos = yytxt.find_first_of ("su");

    bool unsigned_val = true;
    int bytes = -1;

    if (pos == std::string::npos)
      {
        std::size_t num_digits = yytxt.length ();

        if (num_digits <= 2)
          bytes = 1;
        else if (num_digits <= 4)
          bytes = 2;
        else if (num_digits <= 8)
          bytes = 4;
        else if (num_digits <= 16)
          bytes = 8;
      }
    else
      {
        unsigned_val = (yytxt[pos] == 'u');
        std::string size_str = yytxt.substr (pos+1);
        yytxt = yytxt.substr (0, pos);
        std::size_t num_digits = yytxt.length ();

        if (size_str == "8" && num_digits <= 2)
          bytes = 1;
        else if (size_str == "16" && num_digits <= 4)
          bytes = 2;
        else if (size_str == "32" && num_digits <= 8)
          bytes = 4;
        else if (size_str == "64" && num_digits <= 16)
          bytes = 8;
      }

    if (bytes < 0)
      {
        token *tok
          = new token (LEXICAL_ERROR,
                       "too many digits for hexadecimal constant",
                       m_tok_beg, m_tok_end);

        push_token (tok);

        return count_token_internal (LEXICAL_ERROR);
      }

    // Assert here because if yytext doesn't contain a valid number, we
    // are in deep doo doo.

    uintmax_t long_int_val;
    int status = sscanf (yytxt.c_str (), "%jx", &long_int_val);
    assert (status);

    octave_value ov_value
      = make_integer_value (long_int_val, unsigned_val, bytes);

    m_looking_for_object_index = false;
    m_at_beginning_of_statement = false;

    update_token_positions (flex_yyleng ());

    push_token (new token (NUMBER, ov_value, yytxt, m_tok_beg, m_tok_end));

    return count_token_internal (NUMBER);
  }

  void
  base_lexer::handle_continuation (void)
  {
    char *yytxt = flex_yytext ();
    int yylng = flex_yyleng ();

    int offset = 1;
    if (yytxt[0] == '\\')
      warn_language_extension_continuation ();
    else
      offset = 3;

    bool have_space = false;
    while (offset < yylng)
      {
        char c = yytxt[offset];
        if (is_space_or_tab (c))
          {
            have_space = true;
            offset++;
          }
        else
          break;
      }

    if (have_space)
      mark_previous_token_trailing_space ();

    bool have_comment = false;
    while (offset < yylng)
      {
        char c = yytxt[offset];
        if (c == '#' || c == '%')
          {
            have_comment = true;
            offset++;
          }
        else
          break;
      }

    if (have_comment)
      {
        m_comment_text = &yytxt[offset];

        // finish_comment sets m_at_beginning_of_statement to true but
        // that's not be correct if we are handling a continued
        // statement.  Preserve the current state.

        bool saved_bos = m_at_beginning_of_statement;

        finish_comment (comment_elt::end_of_line);

        m_at_beginning_of_statement = saved_bos;
      }

    m_filepos.next_line ();
  }

  void
  base_lexer::finish_comment (comment_elt::comment_type typ)
  {
    bool copyright = looks_like_copyright (m_comment_text);

    if (m_nesting_level.none () && m_help_text.empty () && ! m_comment_text.empty ()
        && ! copyright && ! looks_like_shebang (m_comment_text))
      m_help_text = m_comment_text;

    if (copyright)
      typ = comment_elt::copyright;

    m_comment_buf.append (m_comment_text, typ);

    m_comment_text = "";

    m_at_beginning_of_statement = true;
  }

  int
  base_lexer::handle_close_bracket (int bracket_type)
  {
    m_looking_at_object_index.pop_front ();

    m_looking_for_object_index = true;
    m_at_beginning_of_statement = false;

    if (! m_nesting_level.none ())
      {
        m_nesting_level.remove ();

        if (bracket_type == ']')
          m_bracketflag--;
        else if (bracket_type == '}')
          m_braceflag--;
        else
          panic_impossible ();
      }

    pop_start_state ();

    return count_token (bracket_type);
  }

  bool
  base_lexer::looks_like_command_arg (void)
  {
    if (! m_allow_command_syntax)
      return false;

    bool space_before = space_follows_previous_token ();
    bool space_after = looking_at_space ();

    return (space_before && ! space_after
            && previous_token_may_be_command ());
  }

  int
  base_lexer::handle_superclass_identifier (void)
  {
    update_token_positions (flex_yyleng ());

    std::string txt = flex_yytext ();

    txt.erase (std::remove_if (txt.begin (), txt.end (), is_space_or_tab),
               txt.end ());

    std::size_t pos = txt.find ("@");

    std::string meth = txt.substr (0, pos);
    std::string cls = txt.substr (pos + 1);

    if (iskeyword (meth) || fq_identifier_contains_keyword (cls))
      {
        token *tok
          = new token (LEXICAL_ERROR,
                       "method, class, and package names may not be keywords",
                       m_tok_beg, m_tok_end);

        push_token (tok);

        return count_token_internal (LEXICAL_ERROR);
      }

    push_token (new token (SUPERCLASSREF, meth, cls, m_tok_beg, m_tok_end));

    m_filepos.increment_column (flex_yyleng ());

    return count_token_internal (SUPERCLASSREF);
  }

  int
  base_lexer::handle_meta_identifier (void)
  {
    std::string txt = flex_yytext ();

    txt.erase (std::remove_if (txt.begin (), txt.end (), is_space_or_tab),
               txt.end ());

    // Eliminate leading '?'
    std::string cls = txt.substr (1);

    // Token positions should have already been updated before this
    // function is called.

    if (fq_identifier_contains_keyword (cls))
      {
        token *tok = new token (LEXICAL_ERROR,
                                "class and package names may not be keywords",
                                m_tok_beg, m_tok_end);
        push_token (tok);

        return count_token_internal (LEXICAL_ERROR);
      }

    push_token (new token (METAQUERY, cls, m_tok_beg, m_tok_end));

    m_filepos.increment_column (flex_yyleng ());

    return METAQUERY;
  }

  int
  base_lexer::handle_fq_identifier (void)
  {
    std::string txt = flex_yytext ();

    txt.erase (std::remove_if (txt.begin (), txt.end (), is_space_or_tab),
               txt.end ());

    // Token positions should have already been updated before this
    // function is called.

    if (fq_identifier_contains_keyword (txt))
      {
        token *tok
          = new token (LEXICAL_ERROR,
                       "function, method, class, and package names may not be keywords",
                       m_tok_beg, m_tok_end);

        push_token (tok);

        return count_token_internal (LEXICAL_ERROR);
      }

    push_token (new token (FQ_IDENT, txt, m_tok_beg, m_tok_end));

    m_filepos.increment_column (flex_yyleng ());

    return FQ_IDENT;
  }

  // Figure out exactly what kind of token to return when we have seen
  // an identifier.  Handles keywords.  Return -1 if the identifier
  // should be ignored.

  int
  base_lexer::handle_identifier (void)
  {
    update_token_positions (flex_yyleng ());

    std::string ident = flex_yytext ();

    // If we are expecting a structure element, avoid recognizing
    // keywords and other special names and return STRUCT_ELT, which is
    // a string that is also a valid identifier.

    if (m_looking_at_indirect_ref)
      {
        push_token (new token (STRUCT_ELT, ident, m_tok_beg, m_tok_end));

        m_looking_for_object_index = true;

        return STRUCT_ELT;
      }

    // If ident is a keyword token, then make_keyword_token will set
    // m_at_beginning_of_statement.  For example, if tok is an IF
    // token, then m_at_beginning_of_statement will be false.

    int kw_token = make_keyword_token (ident);

    // If we have a regular keyword, return it.
    // Keywords can be followed by identifiers.

    if (kw_token)
      {
        m_looking_for_object_index = false;

        // The call to make_keyword_token set m_at_beginning_of_statement.

        return count_token_internal (kw_token);
      }

    token *tok = new token (NAME, ident, m_tok_beg, m_tok_end);

    // For compatibility with Matlab, the following symbols are
    // handled specially so that things like
    //
    //   pi +1
    //
    // are parsed as an addition expression instead of as a command-style
    // function call with the argument "+1".

    if (m_at_beginning_of_statement
        && ! (m_parsing_anon_fcn_body
              || ident == "e" || ident == "pi"
              || ident == "I" || ident == "i"
              || ident == "J" || ident == "j"
              || ident == "Inf" || ident == "inf"
              || ident == "NaN" || ident == "nan"))
      tok->mark_may_be_command ();

    push_token (tok);

    // The magic end index can't be indexed.

    if (ident != "end")
      m_looking_for_object_index = true;

    m_at_beginning_of_statement = false;

    return count_token_internal (NAME);
  }

  void
  base_lexer::maybe_warn_separator_insert (char sep)
  {
    std::string nm = m_fcn_file_full_name;

    if (nm.empty ())
      warning_with_id ("Octave:separator-insert",
                       "potential auto-insertion of '%c' near line %d",
                       sep, m_filepos.line ());
    else
      warning_with_id ("Octave:separator-insert",
                       "potential auto-insertion of '%c' near line %d of file %s",
                       sep, m_filepos.line (), nm.c_str ());
  }

  void
  base_lexer::warn_language_extension (const std::string& msg)
  {
    std::string nm = m_fcn_file_full_name;

    if (nm.empty ())
      warning_with_id ("Octave:language-extension",
                       "Octave language extension used: %s",
                       msg.c_str ());
    else
      warning_with_id ("Octave:language-extension",
                       "Octave language extension used: %s near line %d offile %s",
                       msg.c_str (), m_filepos.line (), nm.c_str ());
  }

  void
  base_lexer::maybe_warn_language_extension_comment (char c)
  {
    if (c == '#')
      warn_language_extension ("# used as comment character");
  }

  void
  base_lexer::warn_language_extension_continuation (void)
  {
    warn_language_extension ("\\ used as line continuation marker");
  }

  void
  base_lexer::warn_language_extension_operator (const std::string& op)
  {
    std::string t = op;
    int n = t.length ();
    if (t[n-1] == '\n')
      t.resize (n-1);
    warn_language_extension (t + " used as operator");
  }

  void
  base_lexer::warn_deprecated_syntax (const std::string& msg)
  {
    if (m_fcn_file_full_name.empty ())
      warning_with_id ("Octave:deprecated-syntax", "%s", msg.c_str ());
    else
      warning_with_id ("Octave:deprecated-syntax",
                       "%s; near line %d of file '%s'", msg.c_str (),
                       m_filepos.line (), m_fcn_file_full_name.c_str ());
  }

  void
  base_lexer::warn_deprecated_operator (const std::string& deprecated_op,
                                        const std::string& recommended_op,
                                        const std::string& version)
  {
    std::string msg = "the '" + deprecated_op + "' operator was deprecated in version " + version + " and will not be allowed in a future version of Octave; please use '" + recommended_op + "' instead";

    warn_deprecated_syntax (msg);
  }

  void
  base_lexer::push_token (token *tok)
  {
    YYSTYPE *lval = yyget_lval (m_scanner);
    lval->tok_val = tok;
    m_tokens.push (tok);
  }

  token *
  base_lexer::current_token (void)
  {
    YYSTYPE *lval = yyget_lval (m_scanner);
    return lval->tok_val;
  }

  std::size_t
  base_lexer::pending_token_count (void) const
  {
    return m_tokens.size ();
  }

  void
  base_lexer::display_token (int tok)
  {
    switch (tok)
      {
      case '=': std::cerr << "'='\n"; break;
      case ':': std::cerr << "':'\n"; break;
      case '-': std::cerr << "'-'\n"; break;
      case '+': std::cerr << "'+'\n"; break;
      case '*': std::cerr << "'*'\n"; break;
      case '/': std::cerr << "'/'\n"; break;
      case '~': std::cerr << "'~'\n"; break;
      case '!': std::cerr << "'!'\n"; break;
      case ADD_EQ: std::cerr << "ADD_EQ\n"; break;
      case SUB_EQ: std::cerr << "SUB_EQ\n"; break;
      case MUL_EQ: std::cerr << "MUL_EQ\n"; break;
      case DIV_EQ: std::cerr << "DIV_EQ\n"; break;
      case LEFTDIV_EQ: std::cerr << "LEFTDIV_EQ\n"; break;
      case POW_EQ: std::cerr << "POW_EQ\n"; break;
      case EMUL_EQ: std::cerr << "EMUL_EQ\n"; break;
      case EDIV_EQ: std::cerr << "EDIV_EQ\n"; break;
      case ELEFTDIV_EQ: std::cerr << "ELEFTDIV_EQ\n"; break;
      case EPOW_EQ: std::cerr << "EPOW_EQ\n"; break;
      case AND_EQ: std::cerr << "AND_EQ\n"; break;
      case OR_EQ: std::cerr << "OR_EQ\n"; break;
      case EXPR_AND_AND: std::cerr << "EXPR_AND_AND\n"; break;
      case EXPR_OR_OR: std::cerr << "EXPR_OR_OR\n"; break;
      case EXPR_AND: std::cerr << "EXPR_AND\n"; break;
      case EXPR_OR: std::cerr << "EXPR_OR\n"; break;
      case EXPR_LT: std::cerr << "EXPR_LT\n"; break;
      case EXPR_LE: std::cerr << "EXPR_LE\n"; break;
      case EXPR_EQ: std::cerr << "EXPR_EQ\n"; break;
      case EXPR_NE: std::cerr << "EXPR_NE\n"; break;
      case EXPR_GE: std::cerr << "EXPR_GE\n"; break;
      case EXPR_GT: std::cerr << "EXPR_GT\n"; break;
      case LEFTDIV: std::cerr << "LEFTDIV\n"; break;
      case EMUL: std::cerr << "EMUL\n"; break;
      case EDIV: std::cerr << "EDIV\n"; break;
      case ELEFTDIV: std::cerr << "ELEFTDIV\n"; break;
      case HERMITIAN: std::cerr << "HERMITIAN\n"; break;
      case TRANSPOSE: std::cerr << "TRANSPOSE\n"; break;
      case PLUS_PLUS: std::cerr << "PLUS_PLUS\n"; break;
      case MINUS_MINUS: std::cerr << "MINUS_MINUS\n"; break;
      case POW: std::cerr << "POW\n"; break;
      case EPOW: std::cerr << "EPOW\n"; break;

      case NUMBER:
        {
          token *tok_val = current_token ();
          std::cerr << "NUMBER [";
          octave_value num = tok_val->number ();
          num.print_raw (std::cerr);
          std::cerr << "]\n";
        }
        break;

      case STRUCT_ELT:
        {
          token *tok_val = current_token ();
          std::cerr << "STRUCT_ELT [" << tok_val->text () << "]\n";
        }
        break;

      case NAME:
        {
          token *tok_val = current_token ();
          std::cerr << "NAME [" << tok_val->text () << "]\n";
        }
        break;

      case END: std::cerr << "END\n"; break;

      case DQ_STRING:
      case SQ_STRING:
        {
          token *tok_val = current_token ();

          std::cerr << (tok == DQ_STRING ? "DQ_STRING" : "SQ_STRING")
                    << " [" << tok_val->text () << "]\n";
        }
        break;

      case FOR: std::cerr << "FOR\n"; break;
      case WHILE: std::cerr << "WHILE\n"; break;
      case DO: std::cerr << "DO\n"; break;
      case UNTIL: std::cerr << "UNTIL\n"; break;
      case IF: std::cerr << "IF\n"; break;
      case ELSEIF: std::cerr << "ELSEIF\n"; break;
      case ELSE: std::cerr << "ELSE\n"; break;
      case SWITCH: std::cerr << "SWITCH\n"; break;
      case CASE: std::cerr << "CASE\n"; break;
      case OTHERWISE: std::cerr << "OTHERWISE\n"; break;
      case BREAK: std::cerr << "BREAK\n"; break;
      case CONTINUE: std::cerr << "CONTINUE\n"; break;
      case FUNC_RET: std::cerr << "FUNC_RET\n"; break;
      case UNWIND: std::cerr << "UNWIND\n"; break;
      case CLEANUP: std::cerr << "CLEANUP\n"; break;
      case TRY: std::cerr << "TRY\n"; break;
      case CATCH: std::cerr << "CATCH\n"; break;
      case GLOBAL: std::cerr << "GLOBAL\n"; break;
      case PERSISTENT: std::cerr << "PERSISTENT\n"; break;
      case FCN_HANDLE: std::cerr << "FCN_HANDLE\n"; break;
      case END_OF_INPUT: std::cerr << "END_OF_INPUT\n\n"; break;
      case LEXICAL_ERROR: std::cerr << "LEXICAL_ERROR\n\n"; break;
      case FCN: std::cerr << "FCN\n"; break;
      case INPUT_FILE: std::cerr << "INPUT_FILE\n"; break;
      case SUPERCLASSREF: std::cerr << "SUPERCLASSREF\n"; break;
      case METAQUERY: std::cerr << "METAQUERY\n"; break;
      case GET: std::cerr << "GET\n"; break;
      case SET: std::cerr << "SET\n"; break;
      case PROPERTIES: std::cerr << "PROPERTIES\n"; break;
      case METHODS: std::cerr << "METHODS\n"; break;
      case EVENTS: std::cerr << "EVENTS\n"; break;
      case CLASSDEF: std::cerr << "CLASSDEF\n"; break;
      case '\n': std::cerr << "\\n\n"; break;
      case '\r': std::cerr << "\\r\n"; break;
      case '\t': std::cerr << "TAB\n"; break;
      default:
        {
          if (tok < 256 && tok > 31)
            std::cerr << static_cast<char> (tok) << "\n";
          else
            std::cerr << "UNKNOWN(" << tok << ")\n";
        }
        break;
      }
  }

  void
  base_lexer::fatal_error (const char *msg)
  {
    error ("fatal lexer error: %s", msg);
  }

  bool
  base_lexer::debug_flag (void) const
  {
    settings& stgs = m_interpreter.get_settings ();
    return stgs.lexer_debug_flag ();
  }

  bool
  base_lexer::display_tokens (void) const
  {
    settings& stgs = m_interpreter.get_settings ();
    return stgs.display_tokens ();
  }

  void
  base_lexer::increment_token_count (void)
  {
    settings& stgs = m_interpreter.get_settings ();
    stgs.increment_token_count ();

    m_token_count++;
  }

  void
  base_lexer::lexer_debug (const char *pattern)
  {
    if (debug_flag ())
      {
        std::cerr << std::endl;

        display_start_state ();

        std::cerr << "P: " << pattern << std::endl;
        std::cerr << "T: " << flex_yytext () << std::endl;
      }
  }

  bool
  base_lexer::input_from_tmp_history_file (void)
  {
    history_system& history_sys = m_interpreter.get_history_system ();

    return history_sys.input_from_tmp_file ();
  }

  void
  base_lexer::push_start_state (int state)
  {
    OCTAVE_YYG;

    start_state_stack.push (state);

    BEGIN (start_state ());
  }

  void
  base_lexer::pop_start_state (void)
  {
    OCTAVE_YYG;

    start_state_stack.pop ();

    BEGIN (start_state ());
  }

  void
  base_lexer::clear_start_state (void)
  {
    while (! start_state_stack.empty ())
      start_state_stack.pop ();

    push_start_state (INITIAL);
  }

  void
  base_lexer::display_start_state (void) const
  {
    std::cerr << "S: ";

    switch (start_state ())
      {
      case INITIAL:
        std::cerr << "INITIAL" << std::endl;
        break;

      case COMMAND_START:
        std::cerr << "COMMAND_START" << std::endl;
        break;

      case MATRIX_START:
        std::cerr << "MATRIX_START" << std::endl;
        break;

      case INPUT_FILE_START:
        std::cerr << "INPUT_FILE_START" << std::endl;
        break;

      case BLOCK_COMMENT_START:
        std::cerr << "BLOCK_COMMENT_START" << std::endl;
        break;

      case LINE_COMMENT_START:
        std::cerr << "LINE_COMMENT_START" << std::endl;
        break;

      case DQ_STRING_START:
        std::cerr << "DQ_STRING_START" << std::endl;
        break;

      case SQ_STRING_START:
        std::cerr << "SQ_STRING_START" << std::endl;
        break;

      case FQ_IDENT_START:
        std::cerr << "FQ_IDENT_START" << std::endl;
        break;

      default:
        std::cerr << "UNKNOWN START STATE!" << std::endl;
        break;
      }
  }

  bool
  base_lexer::maybe_unput_comma_before_unary_op (int tok)
  {
    int prev_tok = previous_token_value ();

    bool unput_comma = false;

    if (whitespace_is_significant () && space_follows_previous_token ())
      {
        int c = text_yyinput ();
        xunput (c);

        bool space_after = is_space_or_tab (c);

        if (! (prev_tok == '[' || prev_tok == '{'
               || previous_token_is_binop ()
               || ((tok == '+' || tok == '-') && space_after)))
          unput_comma = true;
      }

    return unput_comma;
  }

  int
  base_lexer::handle_op (int tok, bool bos, bool compat)
  {
    if (! compat)
      warn_language_extension_operator (flex_yytext ());

    update_token_positions (flex_yyleng ());

    push_token (new token (tok, m_tok_beg, m_tok_end));

    m_looking_for_object_index = false;
    m_at_beginning_of_statement = bos;

    switch (tok)
      {
      case EXPR_LT:
        if (m_parsing_classdef_decl)
          {
            m_parsing_classdef_superclass = true;
            push_start_state (FQ_IDENT_START);
          }
        break;

      case EXPR_AND:
        if (m_parsing_classdef_superclass)
          push_start_state (FQ_IDENT_START);
        break;

      default:
        break;
      }

    return count_token_internal (tok);
  }

  // When a command argument boundary is detected, push out the current
  // argument being built.  This one seems like a good candidate for a
  // function call.

  int
  base_lexer::finish_command_arg (void)
  {
    int tok = SQ_STRING;

    token *tok_val = new token (tok, m_string_text, m_tok_beg, m_tok_end);

    m_string_text = "";
    m_command_arg_paren_count = 0;

    return handle_token (tok, tok_val);
  }

  int
  base_lexer::handle_token (int tok, token *tok_val)
  {
    if (! tok_val)
      tok_val = new token (tok, m_tok_beg, m_tok_end);

    push_token (tok_val);

    return count_token_internal (tok);
  }

  int
  base_lexer::count_token (int tok)
  {
    token *tok_val = new token (tok, m_tok_beg, m_tok_end);

    push_token (tok_val);

    return count_token_internal (tok);
  }

  int
  base_lexer::count_token_internal (int tok)
  {
    if (tok != '\n')
      increment_token_count ();

    return show_token (tok);
  }

  int
  base_lexer::show_token (int tok)
  {

    if (display_tokens ())
      display_token (tok);

    if (debug_flag ())
      {
        std::cerr << "R: ";
        display_token (tok);
        std::cerr << std::endl;
      }

    return tok;
  }

  int
  lexer::fill_flex_buffer (char *buf, unsigned max_size)
  {
    int status = 0;

    if (m_input_buf.empty ())
      {
        input_system& input_sys = m_interpreter.get_input_system ();

        std::string ps
          = m_initial_input ? input_sys.PS1 () : input_sys.PS2 ();

        std::string prompt = command_editor::decode_prompt_string (ps);

        bool eof = false;
        m_current_input_line = m_reader.get_input (prompt, eof);

        m_input_buf.fill (m_current_input_line, eof);

        // Attempt to capture text for functions defined on the
        // command line.
        //
        // FIXME: the handling of newline here seems a bit clumsy.
        //
        // See also comments in push_lexer::append_input.

        if (m_buffer_function_text)
          {
            if (! m_current_input_line.empty ())
            {
              m_function_text += m_current_input_line;
              if (m_current_input_line.back () != '\n')
                m_function_text += '\n';
            }
          }
      }

    if (! m_input_buf.empty ())
      status = m_input_buf.copy_chunk (buf, max_size);
    else
      status = YY_NULL;

    m_initial_input = false;

    return status;
  }

  void
  push_lexer::append_input (const std::string& input, bool eof)
  {
    // FIXME: input may contain more than one line, so how can we
    // properly start buffering input for command-line functions?
    //
    // Currently, base_lexer::make_keyword_token starts buffering text
    // for command-line functions by setting the initial value of
    // m_function_text to m_current_input_line when function_kw is
    // recognized.  To make that work, we need to do something like
    // maintain a queue of input strings and pass them to the flex
    // buffer one line at a time, while also setting
    // m_current_input_line.  Some care will be needed if a single line
    // of input arrives in multiple calls to append_input.
    //
    // OR, should we require that the input string to append_input
    // IS a single line of input?  That seems to be what we are doing
    // here by setting m_current_input_line to input.

    m_input_buf.fill (input, eof);
    m_current_input_line = input;
  }

  int
  push_lexer::fill_flex_buffer (char *buf, unsigned max_size)
  {
    int status = 0;

    if (m_input_buf.empty () && ! m_input_buf.at_eof ())
      {
        // If the input buffer is empty or we are at the end of the
        // buffer, insert ASCII 1 as a marker for subsequent rules.
        // Don't insert a newline character in this case.  Instead of
        // calling input_buffer::fill followed immediately by
        // input_buffer::copy_chunk, simply insert the marker directly
        // in BUF.

        assert (max_size > 0);

        buf[0] = static_cast<char> (1);
        status = 1;
      }
    else
      {
        // Note that the copy_chunk function may append a newline
        // character to the input.

        if (! m_input_buf.empty ())
          status = m_input_buf.copy_chunk (buf, max_size, true);
        else
          status = YY_NULL;
      }

    return status;
  }

OCTAVE_END_NAMESPACE(octave)
author	Rik <rik@octave.org>
date	Thu, 01 Dec 2022 14:23:45 -0800
parents	ef7418c5df8a
children	dfa5d9c3ae72 597f3ee61a48