view libinterp/parse-tree/lex.ll @ 22196:dd992fd74fce

put parser, lexer, and evaluator in namespace; interpreter now owns evaluator * oct-parse.in.yy, parse.h: Move parser classes to octave namespace. * lex.ll, lex.h: Move lexer classes to octave namespace. * pt-eval.h, pt-eval.cc: Move evaluator class to octave namespace. Don't define global current evaluator pointer here. * debug.cc, error.cc, input.cc, input.h, ls-mat-ascii.cc, pt-jit.cc, sighandlers.cc, utils.cc, variables.cc, ov-usr-fcn.cc, pt-assign.cc, pt-exp.h, pt-id.cc: Update for namespaces. * interpreter.cc, interpreter.h (current_evaluator): New global var. (interpreter::m_evaluator): New data member. (interpreter::~interpreter): Delete evaluator.
author John W. Eaton <jwe@octave.org>
date Tue, 12 Jul 2016 14:28:07 -0400
parents 9203833cab7d
children e43d83253e28
line wrap: on
line source

/*

Copyright (C) 1993-2015 John W. Eaton

This file is part of Octave.

Octave is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 3 of the License, or (at your
option) any later version.

Octave is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received a copy of the GNU General Public License
along with Octave; see the file COPYING.  If not, see
<http://www.gnu.org/licenses/>.

*/

/*
We are using the pure parser interface and the reentrant lexer
interface but the Octave parser and lexer are NOT properly
reentrant because both still use many global variables.  It should be
safe to create a parser object and call it while another parser
object is active (to parse a callback function while the main
interactive parser is waiting for input, for example) if you take
care to properly save and restore (typically with an unwind_protect
object) relevant global values before and after the nested call.
*/

%top {
#if defined (HAVE_CONFIG_H)
#  include "config.h"
#endif

#if defined (HAVE_PRAGMA_GCC_DIAGNOSTIC)
// This one needs to be global.
#pragma GCC diagnostic ignored "-Wunused-function"

// Disable these warnings for code that is generated by flex, including
// pattern rules.  Push the current state so we can restore the warning
// state prior to functions we define at the bottom of the file.
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wold-style-cast"
#pragma GCC diagnostic ignored "-Wsign-compare"
#endif

// Define away the deprecated register storage class specifier to avoid
// potential warnings about it.
#if ! defined (register)
#  define register
#endif

}

%option prefix = "octave_"
%option noyywrap
%option reentrant
%option bison-bridge

%option noyyalloc
%option noyyrealloc
%option noyyfree

%x COMMAND_START
%s MATRIX_START

%x INPUT_FILE_START

%x BLOCK_COMMENT_START
%x LINE_COMMENT_START

%x DQ_STRING_START
%x SQ_STRING_START

%x FQ_IDENT_START

%{

#include <cctype>
#include <cstring>

#include <iostream>
#include <set>
#include <sstream>
#include <string>
#include <stack>

#include "cmd-edit.h"
#include "lo-mappers.h"
#include "quit.h"
#include "unistd-wrappers.h"

// These would be alphabetical, but oct-parse.h must be included before
// oct-gperf.h and oct-parse.h must be included after token.h and the tree
// class declarations.  We can't include oct-parse.h in oct-gperf.h
// because it may not be protected to allow it to be included multiple
// times.

#include "Cell.h"
#include "comment-list.h"
#include "defun.h"
#include "error.h"
#include "errwarn.h"
#include "input.h"
#include "interpreter.h"
#include "lex.h"
#include "octave.h"
#include "ov.h"
#include "parse.h"
#include "pt-all.h"
#include "symtab.h"
#include "token.h"
#include "utils.h"
#include "variables.h"
#include <oct-parse.h>
#include <oct-gperf.h>

// FIXME: with bison 3.x, OCTAVE_STYPE appears in the generated
// oct-parse.h file, but there is no definition for YYSTYPE, which is
// needed by the code that is generated by flex.  I can't seem to find a
// way to tell flex to use OCTAVE_STYPE instead of YYSTYPE in the code
// it generates, or to tell bison to provide the definition of YYSTYPE
// in the generated oct-parse.h file.

#if defined (OCTAVE_STYPE_IS_DECLARED) && ! defined YYSTYPE
#  define YYSTYPE OCTAVE_STYPE
#endif

#define YY_NO_UNISTD_H 1
#define isatty octave_isatty_wrapper

#if ! (defined (FLEX_SCANNER) \
       && defined (YY_FLEX_MAJOR_VERSION) && YY_FLEX_MAJOR_VERSION >= 2 \
       && defined (YY_FLEX_MINOR_VERSION) && YY_FLEX_MINOR_VERSION >= 5)
#error lex.l requires flex version 2.5.4 or later
#endif

#define YY_EXTRA_TYPE octave::base_lexer *
#define curr_lexer yyextra

// Arrange to get input via readline.

#if defined (YY_INPUT)
#  undef YY_INPUT
#endif
#define YY_INPUT(buf, result, max_size) \
  result = curr_lexer->fill_flex_buffer (buf, max_size)

// Try to avoid crashing out completely on fatal scanner errors.

#if defined (YY_FATAL_ERROR)
#  undef YY_FATAL_ERROR
#endif
#define YY_FATAL_ERROR(msg) \
  (yyget_extra (yyscanner))->fatal_error (msg)

#define CMD_OR_OP(PATTERN, TOK, COMPAT) \
 \
  do \
    { \
      curr_lexer->lexer_debug (PATTERN); \
 \
      if (curr_lexer->looks_like_command_arg ()) \
        { \
          yyless (0); \
          curr_lexer->push_start_state (COMMAND_START); \
        } \
      else \
        { \
          return curr_lexer->handle_op_internal (TOK, false, COMPAT); \
        } \
    } \
  while (0)

#define CMD_OR_COMPUTED_ASSIGN_OP(PATTERN, TOK) \
 \
  do \
    { \
      curr_lexer->lexer_debug (PATTERN); \
 \
      if (curr_lexer->previous_token_may_be_command () \
          && curr_lexer->space_follows_previous_token ()) \
        { \
          yyless (0); \
          curr_lexer->push_start_state (COMMAND_START); \
        } \
      else \
        { \
          return curr_lexer->handle_language_extension_op (PATTERN, TOK, \
                                                           false); \
        } \
    } \
  while (0)

#define CMD_OR_UNARY_OP(PATTERN, TOK, COMPAT) \
 \
  do \
    { \
      curr_lexer->lexer_debug (PATTERN); \
 \
      if (curr_lexer->previous_token_may_be_command ()) \
        { \
          if (curr_lexer->looks_like_command_arg ()) \
            { \
              yyless (0); \
              curr_lexer->push_start_state (COMMAND_START); \
            } \
          else \
            { \
              return curr_lexer->handle_op_internal (TOK, false, COMPAT); \
            } \
        } \
      else \
        { \
          int tok \
            = (COMPAT \
               ? curr_lexer->handle_unary_op (TOK) \
               : curr_lexer->handle_language_extension_unary_op (TOK)); \
 \
          if (tok < 0) \
            { \
              yyless (0); \
              curr_lexer->xunput (','); \
              /* Adjust for comma that was not really in the input stream. */ \
              curr_lexer->current_input_column--; \
            } \
          else \
            { \
              return tok; \
            } \
        } \
    } \
  while (0)

// We can't rely on the trick used elsewhere of sticking ASCII 1 in
// the input buffer and recognizing it as a special case because ASCII
// 1 is a valid character for a character string.  If we are at the
// end of the buffer, ask for more input.  If we are at the end of the
// file, deal with it.  Otherwise, just keep going with the text from
// the current buffer.
#define HANDLE_STRING_CONTINUATION \
  do \
    { \
      curr_lexer->decrement_promptflag (); \
      curr_lexer->input_line_number++; \
      curr_lexer->current_input_column = 1; \
 \
      if (curr_lexer->is_push_lexer ()) \
        { \
          if (curr_lexer->at_end_of_buffer ()) \
            return -1; \
 \
          if (curr_lexer->at_end_of_file ()) \
            return curr_lexer->handle_end_of_input (); \
        } \
    } \
  while (0)

// When a command argument boundary is detected, push out the
// current argument being built.  This one seems like a good
// candidate for a function call.

#define COMMAND_ARG_FINISH \
  do \
    { \
      if (curr_lexer->string_text.empty ()) \
        break; \
 \
      int retval = curr_lexer->handle_token (curr_lexer->string_text, \
                                             SQ_STRING); \
 \
      curr_lexer->string_text = ""; \
      curr_lexer->command_arg_paren_count = 0; \
 \
      yyless (0); \
 \
      return retval; \
    } \
  while (0)

#define HANDLE_IDENTIFIER(pattern, get_set) \
  do \
    { \
      curr_lexer->lexer_debug (pattern); \
 \
      int tok = curr_lexer->previous_token_value (); \
 \
      if (curr_lexer->whitespace_is_significant () \
          && curr_lexer->space_follows_previous_token () \
          && ! (tok == '[' || tok == '{' \
                || curr_lexer->previous_token_is_binop ())) \
        { \
          yyless (0); \
          unput (','); \
        } \
      else \
        { \
          if (! curr_lexer->looking_at_decl_list \
              && curr_lexer->previous_token_may_be_command ()) \
            { \
              yyless (0); \
              curr_lexer->push_start_state (COMMAND_START); \
            } \
          else \
            { \
              if (get_set) \
                { \
                  yyless (3); \
                  curr_lexer->maybe_classdef_get_set_method = false; \
                } \
 \
              int id_tok = curr_lexer->handle_identifier (); \
 \
              if (id_tok >= 0) \
                return curr_lexer->count_token_internal (id_tok); \
            } \
        } \
    } \
  while (0)

static bool Vdisplay_tokens = false;

static unsigned int Vtoken_count = 0;

// Internal variable for lexer debugging state.
static bool lexer_debug_flag = false;

%}

D       [0-9]
D_      [0-9_]
S       [ \t]
NL      ((\n)|(\r)|(\r\n))
Im      [iIjJ]
CCHAR   [#%]
IDENT   ([_$a-zA-Z][_$a-zA-Z0-9]*)
FQIDENT ({IDENT}(\.{IDENT})*)
EXPON   ([DdEe][+-]?{D}{D_}*)
NUMBIN  (0[bB][01_]+)
NUMHEX  (0[xX][0-9a-fA-F][0-9a-fA-F_]*)
NUMREAL (({D}{D_}*\.?{D_}*{EXPON}?)|(\.{D}{D_}*{EXPON}?))
NUMBER  ({NUMREAL}|{NUMHEX}|{NUMBIN})

ANY_EXCEPT_NL [^\r\n]
ANY_INCLUDING_NL (.|{NL})

%%

%{
// Make script and function files start with an invalid token. This makes
// the parser go down a special path.
%}

<INPUT_FILE_START>{ANY_INCLUDING_NL} {
    curr_lexer->lexer_debug ("<INPUT_FILE_START>{ANY_INCLUDING_NL}");

    curr_lexer->xunput (yytext[0]);

    // May be reset later if we see "function" or "classdef" appears
    // as the first token.
    curr_lexer->reading_script_file = true;

    curr_lexer->pop_start_state ();

    return curr_lexer->show_token (INPUT_FILE);
  }

<INPUT_FILE_START><<EOF>> {
    curr_lexer->lexer_debug ("<INPUT_FILE_START><<EOF>>");

    // May be reset later if we see "function" or "classdef" appears
    // as the first token.
    curr_lexer->reading_script_file = true;

    curr_lexer->pop_start_state ();

    return curr_lexer->show_token (INPUT_FILE);
  }

%{
// Help and other command-style functions.
%}

%{
// Commands can be continued on a second line using the ellipsis.
// If an argument is in construction, it is completed.
%}

<COMMAND_START>(\.\.\.){ANY_EXCEPT_NL}*{NL} {
    curr_lexer->lexer_debug ("<COMMAND_START>(\\.\\.\\.){ANY_EXCEPT_NL}*{NL}");

    COMMAND_ARG_FINISH;

    curr_lexer->input_line_number++;
    curr_lexer->current_input_column = 1;

    HANDLE_STRING_CONTINUATION;
  }

%{
// Commands normally end at the end of a line or a semicolon.
%}

<COMMAND_START>({CCHAR}{ANY_EXCEPT_NL}*)?{NL} {
    curr_lexer->lexer_debug ("<COMMAND_START>({CCHAR}{ANY_EXCEPT_NL}*)?{NL}");

    COMMAND_ARG_FINISH;

    curr_lexer->input_line_number++;
    curr_lexer->current_input_column = 1;
    curr_lexer->looking_for_object_index = false;
    curr_lexer->at_beginning_of_statement = true;
    curr_lexer->pop_start_state ();

    return curr_lexer->handle_token ('\n');
  }

<COMMAND_START>[\,\;] {
    curr_lexer->lexer_debug ("<COMMAND_START>[\\,\\;]");

    if (yytext[0] != ',' || curr_lexer->command_arg_paren_count == 0)
      {
        COMMAND_ARG_FINISH;
        curr_lexer->looking_for_object_index = false;
        curr_lexer->at_beginning_of_statement = true;
        curr_lexer->pop_start_state ();
        return curr_lexer->handle_token (yytext[0]);
      }
    else
      curr_lexer->string_text += yytext;

    curr_lexer->current_input_column += yyleng;
  }

%{
// Unbalanced parentheses serve as pseudo-quotes: they are included in
// the final argument string, but they cause parentheses and quotes to
// be slurped into that argument as well.
%}

<COMMAND_START>[\(\[\{]* {
    curr_lexer->lexer_debug ("<COMMAND_START>[\\(\\[\\{]+");

    curr_lexer->command_arg_paren_count += yyleng;
    curr_lexer->string_text += yytext;
    curr_lexer->current_input_column += yyleng;
  }

<COMMAND_START>[\)\]\}]* {
   curr_lexer->lexer_debug ("<COMMAND_START>[\\)\\]\\}]+");

   curr_lexer->command_arg_paren_count -= yyleng;
   curr_lexer->string_text += yytext;
   curr_lexer->current_input_column += yyleng;
}

%{
// Handle quoted strings.  Quoted strings that are not separated by
// whitespace from other argument text are combined with that previous
// text.  For instance,
//
//   command 'text1'"text2"
//
// has a single argument text1text2, not two separate arguments.
// That's why we must test to see if we are in command argument mode
// when processing the end of a string.
%}

<COMMAND_START>[\"\'] {
    curr_lexer->lexer_debug ("<COMMAND_START>[\\\"\\']");

    if (curr_lexer->command_arg_paren_count == 0)
      curr_lexer->begin_string (yytext[0] == '"'
                                ? DQ_STRING_START : SQ_STRING_START);
    else
      curr_lexer->string_text += yytext;

    curr_lexer->current_input_column += yyleng;
  }

%{
// In standard command argument processing, whitespace separates
// arguments.  In the presence of unbalanced parentheses, it is
// incorporated into the argument.
%}

<COMMAND_START>{S}* {
    curr_lexer->lexer_debug ("<COMMAND_START>{S}*");

    if (curr_lexer->command_arg_paren_count == 0)
      COMMAND_ARG_FINISH;
    else
      curr_lexer->string_text += yytext;

    curr_lexer->current_input_column += yyleng;
  }

%{
// Everything else is slurped into the command arguments.
%}

<COMMAND_START>([\.]|[^#% \t\r\n\.\,\;\"\'\(\[\{\}\]\)]*) {
    curr_lexer->lexer_debug ("<COMMAND_START>([\\.]|[^#% \\t\\r\\n\\.\\,\\;\\\"\\'\\(\\[\\{\\}\\]\\)]*");

    curr_lexer->string_text += yytext;
    curr_lexer->current_input_column += yyleng;
  }

<MATRIX_START>{S}* {
    curr_lexer->lexer_debug ("<MATRIX_START>{S}*");

    curr_lexer->mark_previous_token_trailing_space ();
  }

<MATRIX_START>{NL} {
    curr_lexer->lexer_debug ("<MATRIX_START>{NL}");

    curr_lexer->input_line_number++;
    curr_lexer->current_input_column = 1;

    if (curr_lexer->nesting_level.is_paren ())
      curr_lexer->warn_language_extension ("bare newline inside parentheses");
    else
      {
        int tok = curr_lexer->previous_token_value ();

        if (! (tok == ';' || tok == '[' || tok == '{'))
          {
            curr_lexer->xunput (';');
            // Adjust for semicolon that was not really in the input stream.
            curr_lexer->current_input_column--;
          }
      }
  }

%{
// For this and the next two rules, we're looking at ']', and we
// need to know if the next token is '=' or '=='.
//
// It would have been so much easier if the delimiters were simply
// different for the expression on the left hand side of the equals
// operator.
//
// It's also a pain in the ass to decide whether to insert a comma
// after seeing a ']' character...

// FIXME: we need to handle block comments here.
%}

<MATRIX_START>\] {
    curr_lexer->lexer_debug ("<MATRIX_START>\\]");

    curr_lexer->looking_at_object_index.pop_front ();

    curr_lexer->looking_for_object_index = true;
    curr_lexer->at_beginning_of_statement = false;

    curr_lexer->handle_close_bracket (']');

    return curr_lexer->count_token (']');
  }

%{
// FIXME: we need to handle block comments here.
%}

<MATRIX_START>\} {
    curr_lexer->lexer_debug ("<MATRIX_START>\\}*");

    curr_lexer->looking_at_object_index.pop_front ();

    curr_lexer->looking_for_object_index = true;
    curr_lexer->at_beginning_of_statement = false;

    curr_lexer->handle_close_bracket ('}');

    return curr_lexer->count_token ('}');
  }

\[ {
    curr_lexer->lexer_debug ("\\[");

        bool unput_comma = false;

    if (curr_lexer->whitespace_is_significant ()
        && curr_lexer->space_follows_previous_token ())
      {
        int tok = curr_lexer->previous_token_value ();

        if (! (tok == '[' || tok == '{'
               || curr_lexer->previous_token_is_binop ()))
          unput_comma = true;
      }

    if (unput_comma)
      {
        yyless (0);
        curr_lexer->xunput (',');
        // Adjust for comma that was not really in the input stream.
        curr_lexer->current_input_column--;
      }
    else
      {
        curr_lexer->nesting_level.bracket ();

        curr_lexer->looking_at_object_index.push_front (false);

        curr_lexer->current_input_column += yyleng;
        curr_lexer->looking_for_object_index = false;
        curr_lexer->at_beginning_of_statement = false;

        if (curr_lexer->defining_func
            && ! curr_lexer->parsed_function_name.top ())
          curr_lexer->looking_at_return_list = true;
        else
          curr_lexer->looking_at_matrix_or_assign_lhs = true;

        curr_lexer->decrement_promptflag ();

        curr_lexer->bracketflag++;

        curr_lexer->push_start_state (MATRIX_START);

        return curr_lexer->count_token ('[');
      }
  }

\] {
    curr_lexer->lexer_debug ("\\]");

    curr_lexer->nesting_level.remove ();

    curr_lexer->looking_at_object_index.pop_front ();

    curr_lexer->looking_for_object_index = true;
    curr_lexer->at_beginning_of_statement = false;

    return curr_lexer->handle_token (']');
  }

%{
// Gobble comments.
%}

%{
// Start of a block comment.  If the comment marker appears immediately
// after a block of full-line comments, finish the full line comment
// block.
%}

^{S}*{CCHAR}\{{S}*{NL} {
    curr_lexer->lexer_debug ("^{S}*{CCHAR}\\{{S}*{NL}");

    yyless (0);

    if (curr_lexer->start_state () == LINE_COMMENT_START)
      {
        if (! curr_lexer->comment_text.empty ())
          curr_lexer->finish_comment (octave_comment_elt::full_line);

        curr_lexer->pop_start_state ();
      }

    curr_lexer->decrement_promptflag ();

    curr_lexer->push_start_state (BLOCK_COMMENT_START);

  }

<BLOCK_COMMENT_START>^{S}*{CCHAR}\{{S}*{NL} {
    curr_lexer->lexer_debug ("<BLOCK_COMMENT_START>^{S}*{CCHAR}\\{{S}*{NL}");

    curr_lexer->input_line_number++;
    curr_lexer->current_input_column = 1;

    if (curr_lexer->block_comment_nesting_level)
      curr_lexer->comment_text = "\n";

    curr_lexer->block_comment_nesting_level++;
  }

%{
// End of a block comment.  If this block comment is nested inside
// another, wait for the outermost block comment block to be closed
// before storing the comment.
%}

<BLOCK_COMMENT_START>^{S}*{CCHAR}\}{S}*{NL} {
    curr_lexer->lexer_debug ("<BLOCK_COMMENT_START>^{S}*{CCHAR}\\}{S}*{NL}");

    curr_lexer->input_line_number++;
    curr_lexer->current_input_column = 1;

    if (curr_lexer->block_comment_nesting_level > 1)
      curr_lexer->comment_text = "\n";
    else
      curr_lexer->finish_comment (octave_comment_elt::block);

    curr_lexer->block_comment_nesting_level--;

    if (curr_lexer->block_comment_nesting_level == 0)
      {
        curr_lexer->increment_promptflag ();

        curr_lexer->pop_start_state ();
      }
  }

%{
// Body of a block comment.
%}

<BLOCK_COMMENT_START>{ANY_EXCEPT_NL}*{NL} {
    curr_lexer->lexer_debug ("<BLOCK_COMMENT_START>{ANY_EXCEPT_NL}*{NL}");

    curr_lexer->input_line_number++;
    curr_lexer->current_input_column = 1;
    curr_lexer->comment_text += yytext;
  }

%{
// Full-line or end-of-line comment.
%}

{S}*{CCHAR}{ANY_EXCEPT_NL}*{NL} {
    curr_lexer->lexer_debug ("{S}*{CCHAR}{ANY_EXCEPT_NL}*{NL}");

    curr_lexer->push_start_state (LINE_COMMENT_START);
    yyless (0);
  }

<LINE_COMMENT_START>{S}*{CCHAR}{ANY_EXCEPT_NL}*{NL} {
    curr_lexer->lexer_debug ("<LINE_COMMENT_START>{S}*{CCHAR}{ANY_EXCEPT_NL}*{NL}");

    bool full_line_comment = curr_lexer->current_input_column == 1;
    curr_lexer->input_line_number++;
    curr_lexer->current_input_column = 1;

    bool have_space = false;
    size_t len = yyleng;
    size_t i = 0;
    while (i < len)
      {
        char c = yytext[i];
        if (c == ' ' || c == '\t')
          {
            have_space = true;
            i++;
          }
        else
          break;
      }

    size_t num_comment_chars = 0;

    while (i < len)
      {
        char c = yytext[i];
        if (c == '#' || c == '%')
          {
            num_comment_chars++;
            i++;
          }
        else
          break;
      }

    curr_lexer->comment_text += &yytext[i];

    if (full_line_comment)
      {
        if (num_comment_chars == 1 && yytext[i++] == '{')
          {
            bool looks_like_block_comment = true;

            while (i < len)
              {
                char c = yytext[i++];
                if (! (c == ' ' || c == '\t' || c == '\n' || c == '\r'))
                  {
                    looks_like_block_comment = false;
                    break;
                  }
              }

            if (looks_like_block_comment)
              {
                yyless (0);

                curr_lexer->finish_comment (octave_comment_elt::full_line);

                curr_lexer->pop_start_state ();
              }
          }
      }
    else
      {
        if (have_space)
          curr_lexer->mark_previous_token_trailing_space ();

        curr_lexer->finish_comment (octave_comment_elt::end_of_line);

        curr_lexer->pop_start_state ();

        curr_lexer->xunput ('\n');
        curr_lexer->input_line_number--;
      }
  }

%{
// End of a block of full-line comments.
%}

<LINE_COMMENT_START>{ANY_INCLUDING_NL} {
    curr_lexer->lexer_debug ("<LINE_COMMENT_START>{ANY_INCLUDING_NL}");

    curr_lexer->xunput (yytext[0]);

    curr_lexer->finish_comment (octave_comment_elt::full_line);

    curr_lexer->pop_start_state ();
  }

%{
// End of a block of full-line comments.
%}

<LINE_COMMENT_START><<EOF>> {
    curr_lexer->lexer_debug ("<LINE_COMMENT_START><<EOF>>");

    curr_lexer->finish_comment (octave_comment_elt::full_line);

    curr_lexer->pop_start_state ();
  }

%{
// Double-quoted character strings.
%}

<DQ_STRING_START>\"\" {
    curr_lexer->lexer_debug ("<DQ_STRING_START>\\\"\\\"");

    curr_lexer->current_input_column += yyleng;
    curr_lexer->string_text += '"';
  }

<DQ_STRING_START>\" {
    curr_lexer->lexer_debug ("<DQ_STRING_START>\\\"");

    curr_lexer->current_input_column++;

    curr_lexer->pop_start_state ();

    if (curr_lexer->start_state() != COMMAND_START)
      {
        curr_lexer->looking_for_object_index = true;
        curr_lexer->at_beginning_of_statement = false;

        curr_lexer->push_token (new token (DQ_STRING,
                                           curr_lexer->string_text,
                                           curr_lexer->string_line,
                                           curr_lexer->string_column));

        curr_lexer->string_text = "";

        return curr_lexer->count_token_internal (DQ_STRING);
      }
  }

<DQ_STRING_START>\\[0-7]{1,3} {
    curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\[0-7]{1,3}");

    curr_lexer->current_input_column += yyleng;

    int result;
    sscanf (yytext+1, "%o", &result);

    if (result > 0xff)
      {
        token *tok
          = new token (LEXICAL_ERROR,
                       "invalid octal escape sequence in character string",
                       curr_lexer->input_line_number,
                       curr_lexer->current_input_column);

        curr_lexer->push_token (tok);

        return curr_lexer->count_token_internal (LEXICAL_ERROR);
      }
    else
      curr_lexer->string_text += static_cast<unsigned char> (result);
  }

<DQ_STRING_START>\\x[0-9a-fA-F]+ {
    curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\x[0-9a-fA-F]+");

    curr_lexer->current_input_column += yyleng;

    int result;
    sscanf (yytext+2, "%x", &result);

    // Truncate the value silently instead of checking the range like
    // we do for octal above.  This is to match C/C++ where any number
    // of digits is allowed but the value is implementation-defined if
    // it exceeds the range of the character type.
    curr_lexer->string_text += static_cast<unsigned char> (result);
  }

<DQ_STRING_START>"\\a" {
    curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\a\"");

    curr_lexer->current_input_column += yyleng;
    curr_lexer->string_text += '\a';
  }

<DQ_STRING_START>"\\b" {
    curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\b\"");

    curr_lexer->current_input_column += yyleng;
    curr_lexer->string_text += '\b';
  }

<DQ_STRING_START>"\\f" {
    curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\f\"");

    curr_lexer->current_input_column += yyleng;
    curr_lexer->string_text += '\f';
  }

<DQ_STRING_START>"\\n" {
    curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\n\"");

    curr_lexer->current_input_column += yyleng;
    curr_lexer->string_text += '\n';
  }

<DQ_STRING_START>"\\r" {
    curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\r\"");

    curr_lexer->current_input_column += yyleng;
    curr_lexer->string_text += '\r';
  }

<DQ_STRING_START>"\\t" {
    curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\t\"");

    curr_lexer->current_input_column += yyleng;
    curr_lexer->string_text += '\t';
  }

<DQ_STRING_START>"\\v" {
    curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\v\"");

    curr_lexer->current_input_column += yyleng;
    curr_lexer->string_text += '\v';
  }

<DQ_STRING_START>(\.\.\.){S}*{NL} {
    curr_lexer->lexer_debug ("<DQ_STRING_START>(\\.\\.\\.){S}*{NL}");

    static const char *msg = "'...' continuations in double-quoted character strings are obsolete and will not be allowed in a future version of Octave; please use '\\' instead";

    std::string nm = curr_lexer->fcn_file_full_name;

    if (nm.empty ())
      warning_with_id ("Octave:deprecated-syntax", "%s", msg);
    else
      warning_with_id ("Octave:deprecated-syntax",
                       "%s; near line %d of file '%s'", msg,
                       curr_lexer->input_line_number, nm.c_str ());

    HANDLE_STRING_CONTINUATION;
  }

<DQ_STRING_START>\\{S}+{NL} {
    curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\{S}+{NL}");

    static const char *msg = "white space and comments after continuation markers in double-quoted character strings are obsolete and will not be allowed in a future version of Octave";

    std::string nm = curr_lexer->fcn_file_full_name;

    if (nm.empty ())
      warning_with_id ("Octave:deprecated-syntax", "%s", msg);
    else
      warning_with_id ("Octave:deprecated-syntax",
                       "%s; near line %d of file '%s'", msg,
                       curr_lexer->input_line_number, nm.c_str ());

    HANDLE_STRING_CONTINUATION;
  }

<DQ_STRING_START>\\{NL} {
    curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\{NL}");

    HANDLE_STRING_CONTINUATION;
  }

<DQ_STRING_START>\\. {
    curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\.");

    curr_lexer->current_input_column += yyleng;
    curr_lexer->string_text += yytext[1];
  }

<DQ_STRING_START>\. {
    curr_lexer->lexer_debug ("<DQ_STRING_START>\\.");

    curr_lexer->current_input_column++;
    curr_lexer->string_text += yytext[0];
  }

<DQ_STRING_START>[^\.\\\r\n\"]+ {
    curr_lexer->lexer_debug ("<DQ_STRING_START>[^\\.\\\\\\r\\n\\\"]+");

    curr_lexer->current_input_column += yyleng;
    curr_lexer->string_text += yytext;
  }

<DQ_STRING_START>{NL} {
    curr_lexer->lexer_debug ("<DQ_STRING_START>{NL}");

    token *tok = new token (LEXICAL_ERROR,
                            "unterminated character string constant",
                            curr_lexer->input_line_number,
                            curr_lexer->current_input_column);

    curr_lexer->push_token (tok);

    curr_lexer->input_line_number++;
    curr_lexer->current_input_column = 1;

    return curr_lexer->count_token_internal (LEXICAL_ERROR);
  }

%{
// Single-quoted character strings.
%}

<SQ_STRING_START>\'\' {
    curr_lexer->lexer_debug ("<SQ_STRING_START>\\'\\'");

    curr_lexer->current_input_column += yyleng;
    curr_lexer->string_text += '\'';
  }

<SQ_STRING_START>\' {
    curr_lexer->lexer_debug ("<SQ_STRING_START>\\'");

    curr_lexer->current_input_column++;

    curr_lexer->pop_start_state ();

    if (curr_lexer->start_state() != COMMAND_START)
      {
        curr_lexer->looking_for_object_index = true;
        curr_lexer->at_beginning_of_statement = false;

        curr_lexer->push_token (new token (SQ_STRING,
                                           curr_lexer->string_text,
                                           curr_lexer->string_line,
                                           curr_lexer->string_column));

        curr_lexer->string_text = "";

        return curr_lexer->count_token_internal (SQ_STRING);
      }
  }

<SQ_STRING_START>[^\'\n\r]+ {
    curr_lexer->lexer_debug ("<SQ_STRING_START>[^\\'\\n\\r]+");

    curr_lexer->current_input_column += yyleng;
    curr_lexer->string_text += yytext;
  }

<SQ_STRING_START>{NL} {
    curr_lexer->lexer_debug ("<SQ_STRING_START>{NL}");

    token *tok = new token (LEXICAL_ERROR,
                            "unterminated character string constant",
                            curr_lexer->input_line_number,
                            curr_lexer->current_input_column);

    curr_lexer->push_token (tok);

    curr_lexer->input_line_number++;
    curr_lexer->current_input_column = 1;

    return curr_lexer->count_token_internal (LEXICAL_ERROR);
  }

%{
// Fully-qualified identifiers (used for classdef).
%}

<FQ_IDENT_START>{FQIDENT} {
    curr_lexer->lexer_debug ("<FQ_IDENT_START>{FQIDENT}");
    curr_lexer->pop_start_state ();

    int id_tok = curr_lexer->handle_fq_identifier ();

    if (id_tok >= 0)
      {
        curr_lexer->looking_for_object_index = true;

        return curr_lexer->count_token_internal (id_tok);
      }
  }

<FQ_IDENT_START>{S}+ {
    curr_lexer->current_input_column += yyleng;

    curr_lexer->mark_previous_token_trailing_space ();
  }

<FQ_IDENT_START>. {
    yyless (0);
    curr_lexer->pop_start_state ();
  }

%{
// Imaginary numbers.
%}

{NUMBER}{Im} {
    curr_lexer->lexer_debug ("{NUMBER}{Im}");

    if (curr_lexer->previous_token_may_be_command ()
        &&  curr_lexer->space_follows_previous_token ())
      {
        yyless (0);
        curr_lexer->push_start_state (COMMAND_START);
      }
    else
      {
        int tok = curr_lexer->previous_token_value ();

        if (curr_lexer->whitespace_is_significant ()
            && curr_lexer->space_follows_previous_token ()
            && ! (tok == '[' || tok == '{'
                  || curr_lexer->previous_token_is_binop ()))
          {
            yyless (0);
            unput (',');
          }
        else
          {
            curr_lexer->handle_number ();
            return curr_lexer->count_token_internal (IMAG_NUM);
          }
      }
  }

%{
// Real numbers.  Don't grab the '.' part of a dot operator as part of
// the constant.
%}

{D}{D_}*/\.[\*/\\^\'] |
{NUMBER} {
    curr_lexer->lexer_debug ("{D}{D_}*/\\.[\\*/\\\\^\\']|{NUMBER}");

    if (curr_lexer->previous_token_may_be_command ()
        &&  curr_lexer->space_follows_previous_token ())
      {
        yyless (0);
        curr_lexer->push_start_state (COMMAND_START);
      }
    else
      {
        int tok = curr_lexer->previous_token_value ();

        if (curr_lexer->whitespace_is_significant ()
            && curr_lexer->space_follows_previous_token ()
            && ! (tok == '[' || tok == '{'
                  || curr_lexer->previous_token_is_binop ()))
          {
            yyless (0);
            unput (',');
          }
        else
          {
            curr_lexer->handle_number ();
            return curr_lexer->count_token_internal (NUM);
          }
      }
  }

%{
// Eat whitespace.  Whitespace inside matrix constants is handled by
// the <MATRIX_START> start state code above.
%}

{S}+ {
    curr_lexer->current_input_column += yyleng;

    curr_lexer->mark_previous_token_trailing_space ();
  }

%{
// Continuation lines.  Allow arbitrary text after continuations.
%}

\.\.\.{ANY_EXCEPT_NL}*{NL} {
    curr_lexer->lexer_debug ("\\.\\.\\.{ANY_EXCEPT_NL}*{NL}");

    curr_lexer->handle_continuation ();
  }

%{
// Deprecated C preprocessor style continuation markers.
%}

\\{S}*{NL} |
\\{S}*{CCHAR}{ANY_EXCEPT_NL}*{NL} {
    curr_lexer->lexer_debug ("\\\\{S}*{NL}|\\\\{S}*{CCHAR}{ANY_EXCEPT_NL}*{NL}");

    static const char *msg = "using continuation marker \\ outside of double quoted strings is deprecated and will be removed in a future version of Octave";

    std::string nm = curr_lexer->fcn_file_full_name;

    if (nm.empty ())
      warning_with_id ("Octave:deprecated-syntax", "%s", msg);
    else
      warning_with_id ("Octave:deprecated-syntax",
                       "%s; near line %d of file '%s'", msg,
                       curr_lexer->input_line_number, nm.c_str ());

    curr_lexer->handle_continuation ();
  }

%{
// End of file.
%}

<<EOF>> {
   return curr_lexer->handle_end_of_input ();
  }

%{
// Identifiers.

// Don't allow get and set to be recognized as keywords if they are
// followed by "(".
%}

(set|get){S}*\( {
    HANDLE_IDENTIFIER ("(set|get){S}*\\(", true);
  }

{IDENT} {
    HANDLE_IDENTIFIER ("{IDENT}", false);
  }

%{
// Superclass method identifiers.
%}

{IDENT}@{FQIDENT} {
    curr_lexer->lexer_debug ("{IDENT}@{FQIDENT}");

    if (curr_lexer->previous_token_may_be_command ())
      {
        yyless (0);
        curr_lexer->push_start_state (COMMAND_START);
      }
    else
      {
        int id_tok = curr_lexer->handle_superclass_identifier ();

        if (id_tok >= 0)
          {
            curr_lexer->looking_for_object_index = true;

            return curr_lexer->count_token_internal (id_tok);
          }
      }
  }

%{
// Metaclass query
%}

\?{FQIDENT} {
    curr_lexer->lexer_debug ("\\?{FQIDENT}");

    if (curr_lexer->previous_token_may_be_command ()
        &&  curr_lexer->space_follows_previous_token ())
      {
        yyless (0);
        curr_lexer->push_start_state (COMMAND_START);
      }
    else
      {
        int id_tok = curr_lexer->handle_meta_identifier ();

        if (id_tok >= 0)
          {
            curr_lexer->looking_for_object_index = true;

            return curr_lexer->count_token_internal (id_tok);
          }
      }
  }

"@" {
    curr_lexer->lexer_debug ("@");

    if (curr_lexer->previous_token_may_be_command ()
        &&  curr_lexer->space_follows_previous_token ())
      {
        yyless (0);
        curr_lexer->push_start_state (COMMAND_START);
      }
    else
      {
        int tok = curr_lexer->previous_token_value ();

        if (curr_lexer->whitespace_is_significant ()
            && curr_lexer->space_follows_previous_token ()
            && ! (tok == '[' || tok == '{'
                  || curr_lexer->previous_token_is_binop ()))
          {
            yyless (0);
            unput (',');
          }
        else
          {
            curr_lexer->current_input_column++;

            curr_lexer->looking_at_function_handle++;
            curr_lexer->looking_for_object_index = false;
            curr_lexer->at_beginning_of_statement = false;

            return curr_lexer->count_token ('@');
          }
      }
  }

%{
// A new line character.  New line characters inside matrix constants
// are handled by the <MATRIX_START> start state code above.  If closest
// nesting is inside parentheses, don't return a row separator.
%}

{NL} {
    curr_lexer->lexer_debug ("{NL}");

    if (curr_lexer->nesting_level.is_paren ())
      {
        curr_lexer->input_line_number++;
        curr_lexer->current_input_column = 1;

        curr_lexer->at_beginning_of_statement = false;
        curr_lexer->warn_language_extension
          ("bare newline inside parentheses");
      }
    else if (curr_lexer->nesting_level.none ()
        || curr_lexer->nesting_level.is_anon_fcn_body ())
      {
        curr_lexer->input_line_number++;
        curr_lexer->current_input_column = 1;

        curr_lexer->at_beginning_of_statement = true;

        return curr_lexer->count_token ('\n');
      }
    else if (curr_lexer->nesting_level.is_bracket_or_brace ())
      {
        token *tok = new token (LEXICAL_ERROR,
                                "unexpected internal lexer error",
                                curr_lexer->input_line_number,
                                curr_lexer->current_input_column);

        curr_lexer->push_token (tok);

        curr_lexer->input_line_number++;
        curr_lexer->current_input_column = 1;

        return curr_lexer->count_token_internal (LEXICAL_ERROR);
      }
  }

%{
// Single quote can either be the beginning of a string or a transpose
// operator.
%}

"'" {
    curr_lexer->lexer_debug ("'");

    if (curr_lexer->previous_token_may_be_command ()
        &&  curr_lexer->space_follows_previous_token ())
      {
        curr_lexer->current_input_column++;
        curr_lexer->push_start_state (COMMAND_START);
        curr_lexer->begin_string (SQ_STRING_START);
      }
    else if (curr_lexer->at_beginning_of_statement)
      {
        curr_lexer->current_input_column++;
        curr_lexer->begin_string (SQ_STRING_START);
      }
    else
      {
        int tok = curr_lexer->previous_token_value ();

        if (curr_lexer->whitespace_is_significant ())
          {
            if (curr_lexer->space_follows_previous_token ())
              {
                if (tok == '[' || tok == '{'
                    || curr_lexer->previous_token_is_binop ())
                  {
                    curr_lexer->current_input_column++;
                    curr_lexer->begin_string (SQ_STRING_START);
                  }
                else
                  {
                    yyless (0);
                    curr_lexer->xunput (',');
                    // Adjust for comma that was not really in the input stream.
                    curr_lexer->current_input_column--;
                  }
              }
            else
              {
                if (tok == '[' || tok == '{'
                    || curr_lexer->previous_token_is_binop ()
                    || curr_lexer->previous_token_is_keyword ())
                  {
                    curr_lexer->current_input_column++;
                    curr_lexer->begin_string (SQ_STRING_START);
                  }
                else
                  return curr_lexer->count_token (HERMITIAN);
              }
          }
        else
          {
            if (! tok || tok == '[' || tok == '{' || tok == '('
                || curr_lexer->previous_token_is_binop ()
                || curr_lexer->previous_token_is_keyword ())
              {
                curr_lexer->current_input_column++;
                curr_lexer->begin_string (SQ_STRING_START);
              }
            else
              return curr_lexer->count_token (HERMITIAN);
          }
      }
  }

%{
// Double quotes always begin strings.
%}

\" {
    curr_lexer->lexer_debug ("\\\"");

    if (curr_lexer->previous_token_may_be_command ()
        &&  curr_lexer->space_follows_previous_token ())
      {
        curr_lexer->current_input_column++;
        curr_lexer->push_start_state (COMMAND_START);
        curr_lexer->begin_string (DQ_STRING_START);
      }
    else
      {
        int tok = curr_lexer->previous_token_value ();

        if (curr_lexer->whitespace_is_significant ())
          {
            if (curr_lexer->space_follows_previous_token ())
              {
                if (tok == '[' || tok == '{'
                    || curr_lexer->previous_token_is_binop ())
                  {
                    curr_lexer->current_input_column++;
                    curr_lexer->begin_string (DQ_STRING_START);
                  }
                else
                  {
                    yyless (0);
                    curr_lexer->xunput (',');
                    // Adjust for comma that was not really in the input stream.
                    curr_lexer->current_input_column--;
                  }
              }
            else
              {
                curr_lexer->current_input_column++;
                curr_lexer->begin_string (DQ_STRING_START);
              }
          }
        else
          {
            curr_lexer->current_input_column++;
            curr_lexer->begin_string (DQ_STRING_START);
          }
      }
  }

%{
// Other operators.
%}

":"   { CMD_OR_OP (":", ':', true); }
".+"  { CMD_OR_OP (".+", EPLUS, false); }
".-"  { CMD_OR_OP (".-", EMINUS, false); }
".*"  { CMD_OR_OP (".*", EMUL, true); }
"./"  { CMD_OR_OP ("./", EDIV, true); }
".\\" { CMD_OR_OP (".\\", ELEFTDIV, true); }
".^"  { CMD_OR_OP (".^", EPOW, true); }
".**" { CMD_OR_OP (".**", EPOW, false); }
"<="  { CMD_OR_OP ("<=", EXPR_LE, true); }
"=="  { CMD_OR_OP ("==", EXPR_EQ, true); }
"~="  { CMD_OR_OP ("~=", EXPR_NE, true); }
"!="  { CMD_OR_OP ("!=", EXPR_NE, false); }
">="  { CMD_OR_OP (">=", EXPR_GE, true); }
"&"   { CMD_OR_OP ("&", EXPR_AND, true); }
"|"   { CMD_OR_OP ("|", EXPR_OR, true); }
"<"   { CMD_OR_OP ("<", EXPR_LT, true); }
">"   { CMD_OR_OP (">", EXPR_GT, true); }
"*"   { CMD_OR_OP ("*", '*', true); }
"/"   { CMD_OR_OP ("/", '/', true); }

%{
// In Matlab, '\' may also trigger command syntax.
%}

"\\"  { return curr_lexer->handle_op ("\\", LEFTDIV); }

"^"   { CMD_OR_OP ("^", POW, true); }
"**"  { CMD_OR_OP ("**", POW, false); }
"&&"  { CMD_OR_OP ("&&", EXPR_AND_AND, true); }
"||"  { CMD_OR_OP ("||", EXPR_OR_OR, true); }

";" {
    bool at_beginning_of_statement
      = (! (curr_lexer->whitespace_is_significant ()
            || curr_lexer->looking_at_object_index.front ()));

    return curr_lexer->handle_op (";", ';', at_beginning_of_statement);
  }

"+" { CMD_OR_UNARY_OP ("+", '+', true); }
"-" { CMD_OR_UNARY_OP ("-", '-', true); }

"~" { CMD_OR_UNARY_OP ("~", EXPR_NOT, true); }
"!" { CMD_OR_UNARY_OP ("!", EXPR_NOT, false); }

"," {
    bool at_beginning_of_statement
      = (! (curr_lexer->whitespace_is_significant ()
            || curr_lexer->looking_at_object_index.front ()));

    return curr_lexer->handle_op (",", ',', at_beginning_of_statement);
  }

".'" {
    return curr_lexer->handle_op (".'", TRANSPOSE, false);
  }

"++" { CMD_OR_UNARY_OP ("++", PLUS_PLUS, false); }
"--" { CMD_OR_UNARY_OP ("--", MINUS_MINUS, false); }

"(" {
    curr_lexer->lexer_debug ("(");

    bool unput_comma = false;

    if (curr_lexer->whitespace_is_significant ()
        && curr_lexer->space_follows_previous_token ())
      {
        int tok = curr_lexer->previous_token_value ();

        if (! (tok == '[' || tok == '{'
               || curr_lexer->previous_token_is_binop ()))
          unput_comma = true;
      }

    if (unput_comma)
      {
        yyless (0);
        curr_lexer->xunput (',');
        // Adjust for comma that was not really in the input stream.
        curr_lexer->current_input_column--;
      }
    else
      {
        // If we are looking for an object index, then push TRUE for
        // looking_at_object_index.  Otherwise, just push whatever state
        // is current (so that we can pop it off the stack when we find
        // the matching close paren).

        curr_lexer->looking_at_object_index.push_front
          (curr_lexer->looking_for_object_index);

        curr_lexer->looking_at_indirect_ref = false;
        curr_lexer->looking_for_object_index = false;
        curr_lexer->at_beginning_of_statement = false;

        curr_lexer->nesting_level.paren ();
        curr_lexer->decrement_promptflag ();

        return curr_lexer->handle_token ('(');
      }
  }

")" {
    curr_lexer->lexer_debug (")");

    curr_lexer->nesting_level.remove ();
    curr_lexer->current_input_column++;

    curr_lexer->looking_at_object_index.pop_front ();

    curr_lexer->looking_for_object_index = true;
    curr_lexer->at_beginning_of_statement = false;

    if (curr_lexer->looking_at_anon_fcn_args)
      {
        curr_lexer->looking_at_anon_fcn_args = false;
        curr_lexer->nesting_level.anon_fcn_body ();
      }

    return curr_lexer->count_token (')');
  }

"." {
    curr_lexer->lexer_debug (".");

    if (curr_lexer->previous_token_may_be_command ()
        && curr_lexer->space_follows_previous_token ())
      {
        yyless (0);
        curr_lexer->push_start_state (COMMAND_START);
      }
    else
      {
        curr_lexer->looking_for_object_index = false;
        curr_lexer->at_beginning_of_statement = false;

        return curr_lexer->handle_token ('.');
      }
  }

%{
// = and op= operators.
%}

"=" {
    curr_lexer->maybe_mark_previous_token_as_variable ();

    return curr_lexer->handle_op ("=", '=');
  }

"+="   { CMD_OR_COMPUTED_ASSIGN_OP ("+=", ADD_EQ); }
"-="   { CMD_OR_COMPUTED_ASSIGN_OP ("-=", SUB_EQ); }
"*="   { CMD_OR_COMPUTED_ASSIGN_OP ("*=", MUL_EQ); }
"/="   { CMD_OR_COMPUTED_ASSIGN_OP ("/=", DIV_EQ); }
"\\="  { CMD_OR_COMPUTED_ASSIGN_OP ("\\=", LEFTDIV_EQ); }
".+="  { CMD_OR_COMPUTED_ASSIGN_OP (".+=", ADD_EQ); }
".-="  { CMD_OR_COMPUTED_ASSIGN_OP (".-=", SUB_EQ); }
".*="  { CMD_OR_COMPUTED_ASSIGN_OP (".*=", EMUL_EQ); }
"./="  { CMD_OR_COMPUTED_ASSIGN_OP ("./=", EDIV_EQ); }
".\\=" { CMD_OR_COMPUTED_ASSIGN_OP (".\\=", ELEFTDIV_EQ); }
"^="   { CMD_OR_COMPUTED_ASSIGN_OP ("^=", POW_EQ); }
"**="  { CMD_OR_COMPUTED_ASSIGN_OP ("^=", POW_EQ); }
".^="  { CMD_OR_COMPUTED_ASSIGN_OP (".^=", EPOW_EQ); }
".**=" { CMD_OR_COMPUTED_ASSIGN_OP (".^=", EPOW_EQ); }
"&="   { CMD_OR_COMPUTED_ASSIGN_OP ("&=", AND_EQ); }
"|="   { CMD_OR_COMPUTED_ASSIGN_OP ("|=", OR_EQ); }

%{
// In Matlab, '{' may also trigger command syntax.
%}

"{" {
    curr_lexer->lexer_debug ("{");

    bool unput_comma = false;

    if (curr_lexer->whitespace_is_significant ()
        && curr_lexer->space_follows_previous_token ())
      {
        int tok = curr_lexer->previous_token_value ();

        if (! (tok == '[' || tok == '{'
               || curr_lexer->previous_token_is_binop ()))
          unput_comma = true;
      }

    if (unput_comma)
      {
        yyless (0);
        curr_lexer->xunput (',');
        // Adjust for comma that was not really in the input stream.
        curr_lexer->current_input_column--;
      }
    else
      {
        curr_lexer->nesting_level.brace ();

        curr_lexer->looking_at_object_index.push_front
          (curr_lexer->looking_for_object_index);

        curr_lexer->current_input_column += yyleng;
        curr_lexer->looking_for_object_index = false;
        curr_lexer->at_beginning_of_statement = false;

        curr_lexer->decrement_promptflag ();

        curr_lexer->braceflag++;

        curr_lexer->push_start_state (MATRIX_START);

        return curr_lexer->count_token ('{');
      }
  }

"}" {
    curr_lexer->lexer_debug ("}");

    curr_lexer->looking_at_object_index.pop_front ();

    curr_lexer->looking_for_object_index = true;
    curr_lexer->at_beginning_of_statement = false;

    curr_lexer->nesting_level.remove ();

    return curr_lexer->handle_token ('}');
  }

%{
// Unrecognized input is a lexical error.
%}

. {
    curr_lexer->lexer_debug (".");

    curr_lexer->xunput (yytext[0]);

    int c = curr_lexer->text_yyinput ();

    if (c == 1)
      return -1;
    else if (c == EOF)
      return curr_lexer->handle_end_of_input ();
    else
      {
        std::ostringstream buf;

        buf << "invalid character '"
            << undo_string_escape (static_cast<char> (c))
            << "' (ASCII " << c << ")";

        token *tok = new token (LEXICAL_ERROR, buf.str (),
                                curr_lexer->input_line_number,
                                curr_lexer->current_input_column);

        curr_lexer->push_token (tok);

        curr_lexer->current_input_column++;

        return curr_lexer->count_token_internal (LEXICAL_ERROR);
      }
  }

%{
#if defined (HAVE_PRAGMA_GCC_DIAGNOSTIC)
// Disable this warning for code that is generated by flex.
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wold-style-cast"
#endif
%}

%{
#if defined (HAVE_PRAGMA_GCC_DIAGNOSTIC)
// Also disable this warning for functions that is generated by flex
// after the pattern rules.
#pragma GCC diagnostic ignored "-Wunused-parameter"
#endif
%}

%%

#if defined (HAVE_PRAGMA_GCC_DIAGNOSTIC)
// Restore prevailing warning state for remainder of the file.
#pragma GCC diagnostic pop
#endif

void *
octave_alloc (yy_size_t size, yyscan_t)
{
  return std::malloc (size);
}

void *
octave_realloc (void *ptr, yy_size_t size, yyscan_t)
{
  return std::realloc (ptr, size);
}

void
octave_free (void *ptr, yyscan_t)
{
  std::free (ptr);
}

static void
display_character (char c)
{
  if (isgraph (c))
    std::cerr << c;
  else
    switch (c)
      {
      case 0:
        std::cerr << "NUL";
        break;

      case 1:
        std::cerr << "SOH";
        break;

      case 2:
        std::cerr << "STX";
        break;

      case 3:
        std::cerr << "ETX";
        break;

      case 4:
        std::cerr << "EOT";
        break;

      case 5:
        std::cerr << "ENQ";
        break;

      case 6:
        std::cerr << "ACK";
        break;

      case 7:
        std::cerr << "\\a";
        break;

      case 8:
        std::cerr << "\\b";
        break;

      case 9:
        std::cerr << "\\t";
        break;

      case 10:
        std::cerr << "\\n";
        break;

      case 11:
        std::cerr << "\\v";
        break;

      case 12:
        std::cerr << "\\f";
        break;

      case 13:
        std::cerr << "\\r";
        break;

      case 14:
        std::cerr << "SO";
        break;

      case 15:
        std::cerr << "SI";
        break;

      case 16:
        std::cerr << "DLE";
        break;

      case 17:
        std::cerr << "DC1";
        break;

      case 18:
        std::cerr << "DC2";
        break;

      case 19:
        std::cerr << "DC3";
        break;

      case 20:
        std::cerr << "DC4";
        break;

      case 21:
        std::cerr << "NAK";
        break;

      case 22:
        std::cerr << "SYN";
        break;

      case 23:
        std::cerr << "ETB";
        break;

      case 24:
        std::cerr << "CAN";
        break;

      case 25:
        std::cerr << "EM";
        break;

      case 26:
        std::cerr << "SUB";
        break;

      case 27:
        std::cerr << "ESC";
        break;

      case 28:
        std::cerr << "FS";
        break;

      case 29:
        std::cerr << "GS";
        break;

      case 30:
        std::cerr << "RS";
        break;

      case 31:
        std::cerr << "US";
        break;

      case 32:
        std::cerr << "SPACE";
        break;

      case 127:
        std::cerr << "DEL";
        break;
      }
}

namespace octave
{
  bool
  is_keyword (const std::string& s)
  {
    // Parsing function names like "set.property_name" inside
    // classdef-style class definitions is simplified by handling the
    // "set" and "get" portions of the names using the same mechanism
    // as is used for keywords.  However, they are not really keywords
    // in the language, so omit them from the list of possible
    // keywords.  Likewise for "enumeration", "events", "methods", and
    // "properties".

    return (octave_kw_hash::in_word_set (s.c_str (), s.length ()) != 0
            && ! (s == "set" || s == "get"
                  || s == "enumeration" || s == "events"
                  || s == "methods" || s == "properties"));
  }
}

DEFUN (iskeyword, args, ,
       doc: /* -*- texinfo -*-
@deftypefn  {} {} iskeyword ()
@deftypefnx {} {} iskeyword (@var{name})
Return true if @var{name} is an Octave keyword.

If @var{name} is omitted, return a list of keywords.
@seealso{isvarname, exist}
@end deftypefn */)
{
  octave_value retval;

  int nargin = args.length ();

  if (nargin > 1)
    print_usage ();

  if (nargin == 0)
    {
      // Neither set nor get are keywords.  See the note in the
      // is_keyword function for additional details.

      string_vector lst (TOTAL_KEYWORDS);

      int j = 0;

      for (int i = 0; i < TOTAL_KEYWORDS; i++)
        {
          std::string kword = wordlist[i].name;

          if (kword != "set" && kword != "get")
            lst[j++] = kword;
        }

      lst.resize (j);

      retval = Cell (lst.sort ());
    }
  else
    {
      std::string name = args(0).xstring_value ("iskeyword: NAME must be a string");
      retval = octave::is_keyword (name);
    }

  return retval;
}

/*

%!assert (iskeyword ("for"))
%!assert (iskeyword ("fort"), false)
%!assert (iskeyword ("fft"), false)
%!assert (iskeyword ("get"), false)
%!assert (iskeyword ("set"), false)

%!error iskeyword ("A", "B")
%!error <NAME must be a string> iskeyword (1)

*/

DEFUN (__display_tokens__, args, nargout,
       doc: /* -*- texinfo -*-
@deftypefn {} {} __display_tokens__ ()
Query or set the internal variable that determines whether Octave's
lexer displays tokens as they are read.
@seealso{__lexer_debug_flag__, __token_count__}
@end deftypefn */)
{
  return SET_INTERNAL_VARIABLE (display_tokens);
}

DEFUN (__token_count__, , ,
       doc: /* -*- texinfo -*-
@deftypefn {} {} __token_count__ ()
Return the number of language tokens processed since Octave startup.
@seealso{__lexer_debug_flag__, __display_tokens__}
@end deftypefn */)
{
  return octave_value (Vtoken_count);
}

DEFUN (__lexer_debug_flag__, args, nargout,
       doc: /* -*- texinfo -*-
@deftypefn  {} {@var{val} =} __lexer_debug_flag__ ()
@deftypefnx {} {@var{old_val} =} __lexer_debug_flag__ (@var{new_val})
Query or set the internal flag that determines whether Octave's lexer prints
debug information as it processes an expression.
@seealso{__display_tokens__, __token_count__, __parse_debug_flag__}
@end deftypefn */)
{
  octave_value retval;

  retval = set_internal_variable (lexer_debug_flag, args, nargout,
                                  "__lexer_debug_flag__");

  return retval;
}

namespace octave
{
  lexical_feedback::~lexical_feedback (void)
  {
    tokens.clear ();
  }

  void
  lexical_feedback::init (void)
  {
    // The closest paren, brace, or bracket nesting is not an object
    // index.
    looking_at_object_index.push_front (false);
  }

  void
  lexical_feedback::reset (void)
  {
    end_of_input = false;
    at_beginning_of_statement = true;
    looking_at_anon_fcn_args = false;
    looking_at_return_list = false;
    looking_at_parameter_list = false;
    looking_at_decl_list = false;
    looking_at_initializer_expression = false;
    looking_at_matrix_or_assign_lhs = false;
    looking_for_object_index = false;
    looking_at_indirect_ref = false;
    parsing_class_method = false;
    parsing_classdef = false;
    maybe_classdef_get_set_method = false;
    parsing_classdef_get_method = false;
    parsing_classdef_set_method = false;
    quote_is_transpose = false;
    force_script = false;
    reading_fcn_file = false;
    reading_script_file = false;
    reading_classdef_file = false;
    input_line_number = 1;
    current_input_column = 1;
    bracketflag = 0;
    braceflag = 0;
    looping = 0;
    defining_func = 0;
    looking_at_function_handle = 0;
    block_comment_nesting_level = 0;
    command_arg_paren_count = 0;
    token_count = 0;
    current_input_line = "";
    comment_text = "";
    help_text = "";
    string_text = "";
    string_line = 0;
    string_column = 0;
    fcn_file_name = "";
    fcn_file_full_name = "";
    looking_at_object_index.clear ();
    looking_at_object_index.push_front (false);

    while (! parsed_function_name.empty ())
      parsed_function_name.pop ();

    pending_local_variables.clear ();
    symtab_context.clear ();
    nesting_level.reset ();
    tokens.clear ();
  }

  int
  lexical_feedback::previous_token_value (void) const
  {
    const token *tok = tokens.front ();
    return tok ? tok->token_value () : 0;
  }

  bool
  lexical_feedback::previous_token_value_is (int tok_val) const
  {
    const token *tok = tokens.front ();
    return tok ? tok->token_value_is (tok_val) : false;
  }

  void
  lexical_feedback::mark_previous_token_trailing_space (void)
  {
    token *tok = tokens.front ();
    if (tok && ! previous_token_value_is ('\n'))
      tok->mark_trailing_space ();
  }

  bool
  lexical_feedback::space_follows_previous_token (void) const
  {
    const token *tok = tokens.front ();
    return tok ? tok->space_follows_token () : false;
  }

  bool
  lexical_feedback::previous_token_is_binop (void) const
  {
    int tok = previous_token_value ();

    return (tok == '+' || tok == '-' || tok == '@'
            || tok == ',' || tok == ';' || tok == '*' || tok == '/'
            || tok == ':' || tok == '=' || tok == ADD_EQ
            || tok == AND_EQ || tok == DIV_EQ || tok == EDIV
            || tok == EDIV_EQ || tok == ELEFTDIV || tok == ELEFTDIV_EQ
            || tok == EMINUS || tok == EMUL || tok == EMUL_EQ
            || tok == EPOW || tok == EPOW_EQ || tok == EXPR_AND
            || tok == EXPR_AND_AND || tok == EXPR_EQ || tok == EXPR_GE
            || tok == EXPR_GT || tok == EXPR_LE || tok == EXPR_LT
            || tok == EXPR_NE || tok == EXPR_NOT || tok == EXPR_OR
            || tok == EXPR_OR_OR || tok == LEFTDIV || tok == LEFTDIV_EQ
            || tok == MUL_EQ || tok == OR_EQ || tok == POW
            || tok == POW_EQ || tok == SUB_EQ);
  }

  bool
  lexical_feedback::previous_token_is_keyword (void) const
  {
    const token *tok = tokens.front ();
    return tok ? tok->is_keyword () : false;
  }

  bool
  lexical_feedback::previous_token_may_be_command (void) const
  {
    const token *tok = tokens.front ();
    return tok ? tok->may_be_command () : false;
  }

  void
  lexical_feedback::maybe_mark_previous_token_as_variable (void)
  {
    token *tok = tokens.front ();

    if (tok && tok->is_symbol ())
      pending_local_variables.insert (tok->symbol_name ());
  }

  void
  lexical_feedback::mark_as_variables (const std::list<std::string>& lst)
  {
    for (std::list<std::string>::const_iterator p = lst.begin ();
         p != lst.end (); p++)
      {
        pending_local_variables.insert (*p);
      }
  }
}

static bool
looks_like_copyright (const std::string& s)
{
  bool retval = false;

  if (! s.empty ())
    {
      size_t offset = s.find_first_not_of (" \t");

      retval = (s.substr (offset, 9) == "Copyright" || s.substr (offset, 6) == "Author");
    }

  return retval;
}

namespace octave
{
  void
  base_lexer::input_buffer::fill (const std::string& input, bool eof_arg)
  {
    buffer = input;
    chars_left = buffer.length ();
    pos = buffer.c_str ();
    eof = eof_arg;
  }

  int
  base_lexer::input_buffer::copy_chunk (char *buf, size_t max_size)
  {
    static const char * const eol = "\n";

    size_t len = max_size > chars_left ? chars_left : max_size;
    assert (len > 0);

    memcpy (buf, pos, len);

    chars_left -= len;
    pos += len;

    // Make sure input ends with a new line character.
    if (chars_left == 0 && buf[len-1] != '\n')
      {
        if (len < max_size)
          {
            // There is enough room to plug the newline character in
            // the buffer.
            buf[len++] = '\n';
          }
        else
          {
            // There isn't enough room to plug the newline character
            // in the buffer so arrange to have it returned on the next
            // call to base_lexer::read.
            pos = eol;
            chars_left = 1;
          }
      }

    return len;
  }

  base_lexer::~base_lexer (void)
  {
    yylex_destroy (scanner);
  }

  void
  base_lexer::init (void)
  {
    yylex_init (&scanner);

    // Make base_lexer object available through yyextra in
    // flex-generated lexer.
    yyset_extra (this, scanner);

    clear_start_state ();
  }

  // Inside Flex-generated functions, yyg is the scanner cast to its real
  // type.  Some flex macros that we use in base_lexer member functions
  // (for example, BEGIN) use yyg.  If we could perform the actions of
  // these macros with functions instead, we could eliminate the
  // OCTAVE_YYG macro.

#define OCTAVE_YYG                                                      \
  struct yyguts_t *yyg = static_cast<struct yyguts_t*> (scanner)

  void
  base_lexer::reset (void)
  {
    // Start off on the right foot.
    clear_start_state ();

    symtab_context.clear ();

    // We do want a prompt by default.
    promptflag (1);

    // Only ask for input from stdin if we are expecting interactive
    // input.

    if (octave::application::interactive ()
        && ! (reading_fcn_file
              || reading_classdef_file
              || reading_script_file
              || input_from_eval_string ()))
      yyrestart (stdin, scanner);

    lexical_feedback::reset ();

    comment_buf.reset ();
  }

  void
  base_lexer::prep_for_file (void)
  {
    reading_script_file = true;

    push_start_state (INPUT_FILE_START);
  }

  void
  base_lexer::begin_string (int state)
  {
    string_line = input_line_number;
    string_column = current_input_column;

    push_start_state (state);
  }

  int
  base_lexer::handle_end_of_input (void)
  {
    lexer_debug ("<<EOF>>");

    if (block_comment_nesting_level != 0)
      {
        warning ("block comment open at end of input");

        if ((reading_fcn_file || reading_script_file || reading_classdef_file)
            && ! fcn_file_name.empty ())
          warning ("near line %d of file '%s.m'",
                   input_line_number, fcn_file_name.c_str ());
      }

    return handle_token (END_OF_INPUT);
  }

  char *
  base_lexer::flex_yytext (void)
  {
    return yyget_text (scanner);
  }

  int
  base_lexer::flex_yyleng (void)
  {
    return yyget_leng (scanner);
  }

  int
  base_lexer::text_yyinput (void)
  {
    int c = yyinput (scanner);

    if (lexer_debug_flag)
      {
        std::cerr << "I: ";
        display_character (c);
        std::cerr << std::endl;
      }

    // Convert CRLF into just LF and single CR into LF.

    if (c == '\r')
      {
        c = yyinput (scanner);

        if (lexer_debug_flag)
          {
            std::cerr << "I: ";
            display_character (c);
            std::cerr << std::endl;
          }

        if (c != '\n')
          {
            xunput (c);
            c = '\n';
          }
      }

    return c;
  }

  void
  base_lexer::xunput (char c, char *buf)
  {
    if (c != EOF)
      {
        if (lexer_debug_flag)
          {
            std::cerr << "U: ";
            display_character (c);
            std::cerr << std::endl;
          }

        yyunput (c, buf, scanner);
      }
  }

  void
  base_lexer::xunput (char c)
  {
    char *yytxt = flex_yytext ();

    xunput (c, yytxt);
  }

  bool
  base_lexer::looking_at_space (void)
  {
    int c = text_yyinput ();
    xunput (c);
    return (c == ' ' || c == '\t');
  }

  bool
  base_lexer::inside_any_object_index (void)
  {
    bool retval = false;

    for (std::list<bool>::const_iterator i = looking_at_object_index.begin ();
         i != looking_at_object_index.end (); i++)
      {
        if (*i)
          {
            retval = true;
            break;
          }
      }

    return retval;
  }

  bool
  base_lexer::is_variable (const std::string& name,
                                  symbol_table::scope_id scope)
  {
    return (symbol_table::is_variable (name, scope)
            || (pending_local_variables.find (name)
                != pending_local_variables.end ()));
  }

  // Handle keywords.  Return -1 if the keyword should be ignored.

  int
  base_lexer::is_keyword_token (const std::string& s)
  {
    int l = input_line_number;
    int c = current_input_column;

    int len = s.length ();

    const octave_kw *kw = octave_kw_hash::in_word_set (s.c_str (), len);

    if (kw)
      {
        bool previous_at_bos = at_beginning_of_statement;

        // May be reset to true for some token types.
        at_beginning_of_statement = false;

        token *tok_val = 0;

        switch (kw->kw_id)
          {
          case break_kw:
          case catch_kw:
          case continue_kw:
          case else_kw:
          case otherwise_kw:
          case return_kw:
          case unwind_protect_cleanup_kw:
            at_beginning_of_statement = true;
            break;

          case persistent_kw:
          case global_kw:
            looking_at_decl_list = true;
            break;

          case case_kw:
          case elseif_kw:
          case until_kw:
            break;

          case end_kw:
            if (inside_any_object_index ()
                || (defining_func
                    && ! (looking_at_return_list
                          || parsed_function_name.top ())))
              {
                at_beginning_of_statement = previous_at_bos;
                return 0;
              }

            tok_val = new token (end_kw, token::simple_end, l, c);
            at_beginning_of_statement = true;
            break;

          case end_try_catch_kw:
            tok_val = new token (end_try_catch_kw, token::try_catch_end, l, c);
            at_beginning_of_statement = true;
            break;

          case end_unwind_protect_kw:
            tok_val = new token (end_unwind_protect_kw,
                                 token::unwind_protect_end, l, c);
            at_beginning_of_statement = true;
            break;

          case endfor_kw:
            tok_val = new token (endfor_kw, token::for_end, l, c);
            at_beginning_of_statement = true;
            break;

          case endfunction_kw:
            tok_val = new token (endfunction_kw, token::function_end, l, c);
            at_beginning_of_statement = true;
            break;

          case endif_kw:
            tok_val = new token (endif_kw, token::if_end, l, c);
            at_beginning_of_statement = true;
            break;

          case endparfor_kw:
            tok_val = new token (endparfor_kw, token::parfor_end, l, c);
            at_beginning_of_statement = true;
            break;

          case endswitch_kw:
            tok_val = new token (endswitch_kw, token::switch_end, l, c);
            at_beginning_of_statement = true;
            break;

          case endwhile_kw:
            tok_val = new token (endwhile_kw, token::while_end, l, c);
            at_beginning_of_statement = true;
            break;

          case endclassdef_kw:
            tok_val = new token (endclassdef_kw, token::classdef_end, l, c);
            at_beginning_of_statement = true;
            break;

          case endenumeration_kw:
            tok_val = new token (endenumeration_kw, token::enumeration_end, l, c);
            at_beginning_of_statement = true;
            break;

          case endevents_kw:
            tok_val = new token (endevents_kw, token::events_end, l, c);
            at_beginning_of_statement = true;
            break;

          case endmethods_kw:
            tok_val = new token (endmethods_kw, token::methods_end, l, c);
            at_beginning_of_statement = true;
            break;

          case endproperties_kw:
            tok_val = new token (endproperties_kw, token::properties_end, l, c);
            at_beginning_of_statement = true;
            break;


          case for_kw:
          case parfor_kw:
          case while_kw:
            decrement_promptflag ();
            looping++;
            break;

          case do_kw:
            at_beginning_of_statement = true;
            decrement_promptflag ();
            looping++;
            break;

          case try_kw:
          case unwind_protect_kw:
            at_beginning_of_statement = true;
            decrement_promptflag ();
            break;

          case if_kw:
          case switch_kw:
            decrement_promptflag ();
            break;

          case get_kw:
          case set_kw:
            // 'get' and 'set' are keywords in classdef method
            // declarations.
            if (! maybe_classdef_get_set_method)
              {
                at_beginning_of_statement = previous_at_bos;
                return 0;
              }
            break;

          case enumeration_kw:
          case events_kw:
          case methods_kw:
          case properties_kw:
            // 'properties', 'methods' and 'events' are keywords for
            // classdef blocks.
            if (! parsing_classdef)
              {
                at_beginning_of_statement = previous_at_bos;
                return 0;
              }
            // fall through ...

          case classdef_kw:
            // 'classdef' is always a keyword.
            decrement_promptflag ();

            if (! force_script && token_count == 0 && input_from_file ())
              {
                reading_classdef_file = true;
                reading_script_file = false;
              }
            break;

          case function_kw:
            decrement_promptflag ();

            defining_func++;
            parsed_function_name.push (false);

            if (! force_script && token_count == 0 && input_from_file ())
              {
                reading_fcn_file = true;
                reading_script_file = false;
              }

            if (! (reading_fcn_file || reading_script_file
                   || reading_classdef_file))
              input_line_number = 1;
            break;

          case magic_file_kw:
            {
              if ((reading_fcn_file || reading_script_file
                   || reading_classdef_file)
                  && ! fcn_file_full_name.empty ())
                tok_val = new token (magic_file_kw, fcn_file_full_name, l, c);
              else
                tok_val = new token (magic_file_kw, "stdin", l, c);
            }
            break;

          case magic_line_kw:
            tok_val = new token (magic_line_kw, static_cast<double> (l),
                                 "", l, c);
            break;

          default:
            panic_impossible ();
          }

        if (! tok_val)
          tok_val = new token (kw->tok, true, l, c);

        push_token (tok_val);

        return kw->tok;
      }

    return 0;
  }

  bool
  base_lexer::fq_identifier_contains_keyword (const std::string& s)
  {
    size_t p1 = 0;
    size_t p2;

    std::string s_part;

    do
      {
        p2 = s.find ('.', p1);

        if (p2 != std::string::npos)
          {
            s_part = s.substr (p1, p2 - p1);
            p1 = p2 + 1;
          }
        else
          s_part = s.substr (p1);

        if (is_keyword_token (s_part))
          return true;
      }
    while (p2 != std::string::npos);

    return false;
  }

  bool
  base_lexer::whitespace_is_significant (void)
  {
    return (nesting_level.is_bracket ()
            || (nesting_level.is_brace ()
                && ! looking_at_object_index.front ()));
  }
}

static inline bool
looks_like_bin (const char *s, int len)
{
  return (len > 2 && s[0] == '0' && (s[1] == 'b' || s[1] == 'B'));
}

static inline bool
looks_like_hex (const char *s, int len)
{
  return (len > 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X'));
}

namespace octave
{
  void
  base_lexer::handle_number (void)
  {
    double value = 0.0;
    int nread = 0;

    char *yytxt = flex_yytext ();

    // Strip any underscores
    char *tmptxt = strsave (yytxt);
    char *rptr = tmptxt;
    char *wptr = tmptxt;
    while (*rptr)
      {
        *wptr = *rptr++;
        wptr += (*wptr != '_');
      }
    *wptr = '\0';

    if (looks_like_hex (tmptxt, strlen (tmptxt)))
      {
        uintmax_t long_int_value;

        nread = sscanf (tmptxt, "%jx", &long_int_value);

        value = static_cast<double> (long_int_value);
      }
    else if (looks_like_bin (tmptxt, strlen (tmptxt)))
      {
        uintmax_t long_int_value = 0;

        for (size_t i = 0; i < strlen (tmptxt); i++)
          {
            long_int_value <<= 1;
            long_int_value += static_cast<uintmax_t> (tmptxt[i] == '1');
          }

        value = static_cast<double> (long_int_value);

        nread = 1;  // Just to pass the assert stmt below
      }
    else
      {
        char *idx = strpbrk (tmptxt, "Dd");

        if (idx)
          *idx = 'e';

        nread = sscanf (tmptxt, "%lf", &value);
      }

    delete [] tmptxt;

    // If yytext doesn't contain a valid number, we are in deep doo doo.

    assert (nread == 1);

    looking_for_object_index = false;
    at_beginning_of_statement = false;

    push_token (new token (NUM, value, yytxt, input_line_number,
                           current_input_column));

    current_input_column += flex_yyleng ();
  }

  void
  base_lexer::handle_continuation (void)
  {
    char *yytxt = flex_yytext ();
    int yylng = flex_yyleng ();

    int offset = 1;
    if (yytxt[0] == '\\')
      warn_language_extension_continuation ();
    else
      offset = 3;

    bool have_space = false;
    while (offset < yylng)
      {
        char c = yytxt[offset];
        if (c == ' ' || c == '\t')
          {
            have_space = true;
            offset++;
          }
        else
          break;
      }

    if (have_space)
      mark_previous_token_trailing_space ();

    bool have_comment = false;
    while (offset < yylng)
      {
        char c = yytxt[offset];
        if (c == '#' || c == '%')
          {
            have_comment = true;
            offset++;
          }
        else
          break;
      }

    if (have_comment)
      {
        comment_text = &yytxt[offset];

        // finish_comment sets at_beginning_of_statement to true but
        // that's not be correct if we are handling a continued
        // statement.  Preserve the current state.

        bool saved_bos = at_beginning_of_statement;

        finish_comment (octave_comment_elt::end_of_line);

        at_beginning_of_statement = saved_bos;
      }

    decrement_promptflag ();
    input_line_number++;
    current_input_column = 1;
  }

  void
  base_lexer::finish_comment (octave_comment_elt::comment_type typ)
  {
    bool copyright = looks_like_copyright (comment_text);

    if (nesting_level.none () && help_text.empty ()
        && ! comment_text.empty () && ! copyright)
      help_text = comment_text;

    if (copyright)
      typ = octave_comment_elt::copyright;

    comment_buf.append (comment_text, typ);

    comment_text = "";

    at_beginning_of_statement = true;
  }

  int
  base_lexer::handle_close_bracket (int bracket_type)
  {
    int retval = bracket_type;

    if (! nesting_level.none ())
      {
        nesting_level.remove ();

        if (bracket_type == ']')
          bracketflag--;
        else if (bracket_type == '}')
          braceflag--;
        else
          panic_impossible ();
      }

    pop_start_state ();

    return retval;
  }

  bool
  base_lexer::looks_like_command_arg (void)
  {
    bool space_before = space_follows_previous_token ();
    bool space_after = looking_at_space ();

    return (space_before && ! space_after
            && previous_token_may_be_command ());
  }

  int
  base_lexer::handle_superclass_identifier (void)
  {
    std::string meth = flex_yytext ();

    size_t pos = meth.find ("@");
    std::string cls = meth.substr (pos + 1);
    meth = meth.substr (0, pos);

    bool kw_token = (is_keyword_token (meth)
                     || fq_identifier_contains_keyword (cls));

    if (kw_token)
      {
        token *tok
          = new token (LEXICAL_ERROR,
                       "method, class, and package names may not be keywords",
                       input_line_number, current_input_column);

        push_token (tok);

        return count_token_internal (LEXICAL_ERROR);
      }

    push_token (new token (SUPERCLASSREF, meth, cls,
                           input_line_number, current_input_column));

    current_input_column += flex_yyleng ();

    return SUPERCLASSREF;
  }

  int
  base_lexer::handle_meta_identifier (void)
  {
    std::string cls = std::string(flex_yytext ()).substr (1);

    if (fq_identifier_contains_keyword (cls))
      {
        token *tok = new token (LEXICAL_ERROR,
                                "class and package names may not be keywords",
                                input_line_number, current_input_column);
        push_token (tok);

        return count_token_internal (LEXICAL_ERROR);
      }

    push_token (new token (METAQUERY, cls, input_line_number,
                           current_input_column));

    current_input_column += flex_yyleng ();

    return METAQUERY;
  }

  int
  base_lexer::handle_fq_identifier (void)
  {
    std::string fq_id = flex_yytext ();

    if (fq_identifier_contains_keyword (fq_id))
      {
        token *tok
          = new token (LEXICAL_ERROR,
                       "function, method, class, and package names may not be keywords",
                       input_line_number, current_input_column);

        push_token (tok);

        return count_token_internal (LEXICAL_ERROR);
      }

    push_token (new token (FQ_IDENT, fq_id, input_line_number,
                           current_input_column));

    current_input_column += flex_yyleng ();

    return FQ_IDENT;
  }

  // Figure out exactly what kind of token to return when we have seen
  // an identifier.  Handles keywords.  Return -1 if the identifier
  // should be ignored.

  int
  base_lexer::handle_identifier (void)
  {
    std::string ident = flex_yytext ();

    // If we are expecting a structure element, avoid recognizing
    // keywords and other special names and return STRUCT_ELT, which is
    // a string that is also a valid identifier.

    if (looking_at_indirect_ref)
      {
        push_token (new token (STRUCT_ELT, ident, input_line_number,
                               current_input_column));

        looking_for_object_index = true;

        current_input_column += flex_yyleng ();

        return STRUCT_ELT;
      }

    // If ident is a keyword token, then is_keyword_token will set
    // at_beginning_of_statement.  For example, if tok is an IF
    // token, then at_beginning_of_statement will be false.

    int kw_token = is_keyword_token (ident);

    if (looking_at_function_handle)
      {
        if (kw_token)
          {
            token *tok
              = new token (LEXICAL_ERROR,
                           "function handles may not refer to keywords",
                           input_line_number, current_input_column);

            push_token (tok);

            return count_token_internal (LEXICAL_ERROR);
          }
        else
          {
            push_token (new token (FCN_HANDLE, ident, input_line_number,
                                   current_input_column));

            current_input_column += flex_yyleng ();
            looking_for_object_index = true;

            at_beginning_of_statement = false;

            return FCN_HANDLE;
          }
      }

    // If we have a regular keyword, return it.
    // Keywords can be followed by identifiers.

    if (kw_token)
      {
        if (kw_token >= 0)
          {
            current_input_column += flex_yyleng ();
            looking_for_object_index = false;
          }

        // The call to is_keyword_token set at_beginning_of_statement.

        return kw_token;
      }

    // Find the token in the symbol table.

    symbol_table::scope_id sid = symtab_context.curr_scope ();

    token *tok = new token (NAME, &(symbol_table::insert (ident, sid)),
                            input_line_number, current_input_column);

    // The following symbols are handled specially so that things like
    //
    //   pi +1
    //
    // are parsed as an addition expression instead of as a command-style
    // function call with the argument "+1".

    if (at_beginning_of_statement
        && (! (is_variable (ident, sid)
               || ident == "e" || ident == "pi"
               || ident == "I" || ident == "i"
               || ident == "J" || ident == "j"
               || ident == "Inf" || ident == "inf"
               || ident == "NaN" || ident == "nan")))
      tok->mark_may_be_command ();

    push_token (tok);

    current_input_column += flex_yyleng ();

    // The magic end index can't be indexed.

    if (ident != "end")
      looking_for_object_index = true;

    at_beginning_of_statement = false;

    return NAME;
  }

  void
  base_lexer::maybe_warn_separator_insert (char sep)
  {
    std::string nm = fcn_file_full_name;

    if (nm.empty ())
      warning_with_id ("Octave:separator-insert",
                       "potential auto-insertion of '%c' near line %d",
                       sep, input_line_number);
    else
      warning_with_id ("Octave:separator-insert",
                       "potential auto-insertion of '%c' near line %d of file %s",
                       sep, input_line_number, nm.c_str ());
  }

  void
  base_lexer::warn_single_quote_string (void)
  {
    std::string nm = fcn_file_full_name;

    if (nm.empty ())
      warning_with_id ("Octave:single-quote-string",
                       "single quote delimited string near line %d",
                       input_line_number);
    else
      warning_with_id ("Octave:single-quote-string",
                       "single quote delimited string near line %d of file %s",
                       input_line_number, nm.c_str ());
  }

  void
  base_lexer::warn_language_extension (const std::string& msg)
  {
    std::string nm = fcn_file_full_name;

    if (nm.empty ())
      warning_with_id ("Octave:language-extension",
                       "Octave language extension used: %s",
                       msg.c_str ());
    else
      warning_with_id ("Octave:language-extension",
                       "Octave language extension used: %s near line %d offile %s",
                       msg.c_str (), input_line_number, nm.c_str ());
  }

  void
  base_lexer::maybe_warn_language_extension_comment (char c)
  {
    if (c == '#')
      warn_language_extension ("# used as comment character");
  }

  void
  base_lexer::warn_language_extension_continuation (void)
  {
    warn_language_extension ("\\ used as line continuation marker");
  }

  void
  base_lexer::warn_language_extension_operator (const std::string& op)
  {
    std::string t = op;
    int n = t.length ();
    if (t[n-1] == '\n')
      t.resize (n-1);
    warn_language_extension (t + " used as operator");
  }

  void
  base_lexer::push_token (token *tok)
  {
    YYSTYPE *lval = yyget_lval (scanner);
    lval->tok_val = tok;
    tokens.push (tok);
  }

  token *
  base_lexer::current_token (void)
  {
    YYSTYPE *lval = yyget_lval (scanner);
    return lval->tok_val;
  }

  void
  base_lexer::display_token (int tok)
  {
    switch (tok)
      {
      case '=': std::cerr << "'='\n"; break;
      case ':': std::cerr << "':'\n"; break;
      case '-': std::cerr << "'-'\n"; break;
      case '+': std::cerr << "'+'\n"; break;
      case '*': std::cerr << "'*'\n"; break;
      case '/': std::cerr << "'/'\n"; break;
      case ADD_EQ: std::cerr << "ADD_EQ\n"; break;
      case SUB_EQ: std::cerr << "SUB_EQ\n"; break;
      case MUL_EQ: std::cerr << "MUL_EQ\n"; break;
      case DIV_EQ: std::cerr << "DIV_EQ\n"; break;
      case LEFTDIV_EQ: std::cerr << "LEFTDIV_EQ\n"; break;
      case POW_EQ: std::cerr << "POW_EQ\n"; break;
      case EMUL_EQ: std::cerr << "EMUL_EQ\n"; break;
      case EDIV_EQ: std::cerr << "EDIV_EQ\n"; break;
      case ELEFTDIV_EQ: std::cerr << "ELEFTDIV_EQ\n"; break;
      case EPOW_EQ: std::cerr << "EPOW_EQ\n"; break;
      case AND_EQ: std::cerr << "AND_EQ\n"; break;
      case OR_EQ: std::cerr << "OR_EQ\n"; break;
      case EXPR_AND_AND: std::cerr << "EXPR_AND_AND\n"; break;
      case EXPR_OR_OR: std::cerr << "EXPR_OR_OR\n"; break;
      case EXPR_AND: std::cerr << "EXPR_AND\n"; break;
      case EXPR_OR: std::cerr << "EXPR_OR\n"; break;
      case EXPR_NOT: std::cerr << "EXPR_NOT\n"; break;
      case EXPR_LT: std::cerr << "EXPR_LT\n"; break;
      case EXPR_LE: std::cerr << "EXPR_LE\n"; break;
      case EXPR_EQ: std::cerr << "EXPR_EQ\n"; break;
      case EXPR_NE: std::cerr << "EXPR_NE\n"; break;
      case EXPR_GE: std::cerr << "EXPR_GE\n"; break;
      case EXPR_GT: std::cerr << "EXPR_GT\n"; break;
      case LEFTDIV: std::cerr << "LEFTDIV\n"; break;
      case EMUL: std::cerr << "EMUL\n"; break;
      case EDIV: std::cerr << "EDIV\n"; break;
      case ELEFTDIV: std::cerr << "ELEFTDIV\n"; break;
      case EPLUS: std::cerr << "EPLUS\n"; break;
      case EMINUS: std::cerr << "EMINUS\n"; break;
      case HERMITIAN: std::cerr << "HERMITIAN\n"; break;
      case TRANSPOSE: std::cerr << "TRANSPOSE\n"; break;
      case PLUS_PLUS: std::cerr << "PLUS_PLUS\n"; break;
      case MINUS_MINUS: std::cerr << "MINUS_MINUS\n"; break;
      case POW: std::cerr << "POW\n"; break;
      case EPOW: std::cerr << "EPOW\n"; break;

      case NUM:
      case IMAG_NUM:
        {
          token *tok_val = current_token ();
          std::cerr << (tok == NUM ? "NUM" : "IMAG_NUM")
                    << " [" << tok_val->number () << "]\n";
        }
        break;

      case STRUCT_ELT:
        {
          token *tok_val = current_token ();
          std::cerr << "STRUCT_ELT [" << tok_val->text () << "]\n";
        }
        break;

      case NAME:
        {
          token *tok_val = current_token ();
          symbol_table::symbol_record *sr = tok_val->sym_rec ();
          std::cerr << "NAME";
          if (sr)
            std::cerr << " [" << sr->name () << "]";
          std::cerr << "\n";
        }
        break;

      case END: std::cerr << "END\n"; break;

      case DQ_STRING:
      case SQ_STRING:
        {
          token *tok_val = current_token ();

          std::cerr << (tok == DQ_STRING ? "DQ_STRING" : "SQ_STRING")
                    << " [" << tok_val->text () << "]\n";
        }
        break;

      case FOR: std::cerr << "FOR\n"; break;
      case WHILE: std::cerr << "WHILE\n"; break;
      case DO: std::cerr << "DO\n"; break;
      case UNTIL: std::cerr << "UNTIL\n"; break;
      case IF: std::cerr << "IF\n"; break;
      case ELSEIF: std::cerr << "ELSEIF\n"; break;
      case ELSE: std::cerr << "ELSE\n"; break;
      case SWITCH: std::cerr << "SWITCH\n"; break;
      case CASE: std::cerr << "CASE\n"; break;
      case OTHERWISE: std::cerr << "OTHERWISE\n"; break;
      case BREAK: std::cerr << "BREAK\n"; break;
      case CONTINUE: std::cerr << "CONTINUE\n"; break;
      case FUNC_RET: std::cerr << "FUNC_RET\n"; break;
      case UNWIND: std::cerr << "UNWIND\n"; break;
      case CLEANUP: std::cerr << "CLEANUP\n"; break;
      case TRY: std::cerr << "TRY\n"; break;
      case CATCH: std::cerr << "CATCH\n"; break;
      case GLOBAL: std::cerr << "GLOBAL\n"; break;
      case PERSISTENT: std::cerr << "PERSISTENT\n"; break;
      case FCN_HANDLE: std::cerr << "FCN_HANDLE\n"; break;
      case END_OF_INPUT: std::cerr << "END_OF_INPUT\n\n"; break;
      case LEXICAL_ERROR: std::cerr << "LEXICAL_ERROR\n\n"; break;
      case FCN: std::cerr << "FCN\n"; break;
      case INPUT_FILE: std::cerr << "INPUT_FILE\n"; break;
      case SUPERCLASSREF: std::cerr << "SUPERCLASSREF\n"; break;
      case METAQUERY: std::cerr << "METAQUERY\n"; break;
      case GET: std::cerr << "GET\n"; break;
      case SET: std::cerr << "SET\n"; break;
      case PROPERTIES: std::cerr << "PROPERTIES\n"; break;
      case METHODS: std::cerr << "METHODS\n"; break;
      case EVENTS: std::cerr << "EVENTS\n"; break;
      case CLASSDEF: std::cerr << "CLASSDEF\n"; break;
      case '\n': std::cerr << "\\n\n"; break;
      case '\r': std::cerr << "\\r\n"; break;
      case '\t': std::cerr << "TAB\n"; break;
      default:
        {
          if (tok < 256 && tok > 31)
            std::cerr << static_cast<char> (tok) << "\n";
          else
            std::cerr << "UNKNOWN(" << tok << ")\n";
        }
        break;
      }
  }

  void
  base_lexer::fatal_error (const char *msg)
  {
    error ("fatal lexer error: %s", msg);
  }

  void
  base_lexer::lexer_debug (const char *pattern)
  {
    if (lexer_debug_flag)
      {
        std::cerr << std::endl;

        display_start_state ();

        std::cerr << "P: " << pattern << std::endl;
        std::cerr << "T: " << flex_yytext () << std::endl;
      }
  }

  void
  base_lexer::push_start_state (int state)
  {
    OCTAVE_YYG;

    start_state_stack.push (state);

    BEGIN (start_state ());
  }

  void
  base_lexer::pop_start_state (void)
  {
    OCTAVE_YYG;

    start_state_stack.pop ();

    BEGIN (start_state ());
  }

  void
  base_lexer::clear_start_state (void)
  {
    while (! start_state_stack.empty ())
      start_state_stack.pop ();

    push_start_state (INITIAL);
  }

  void
  base_lexer::display_start_state (void) const
  {
    std::cerr << "S: ";

    switch (start_state ())
      {
      case INITIAL:
        std::cerr << "INITIAL" << std::endl;
        break;

      case COMMAND_START:
        std::cerr << "COMMAND_START" << std::endl;
        break;

      case MATRIX_START:
        std::cerr << "MATRIX_START" << std::endl;
        break;

      case INPUT_FILE_START:
        std::cerr << "INPUT_FILE_BEGIN" << std::endl;
        break;

      case BLOCK_COMMENT_START:
        std::cerr << "BLOCK_COMMENT_START" << std::endl;
        break;

      case LINE_COMMENT_START:
        std::cerr << "LINE_COMMENT_START" << std::endl;
        break;

      case DQ_STRING_START:
        std::cerr << "DQ_STRING_START" << std::endl;
        break;

      case SQ_STRING_START:
        std::cerr << "SQ_STRING_START" << std::endl;
        break;

      default:
        std::cerr << "UNKNOWN START STATE!" << std::endl;
        break;
      }
  }

  int
  base_lexer::handle_op (const char *pattern, int tok, bool bos)
  {
    lexer_debug (pattern);

    return handle_op_internal (tok, bos, true);
  }

  int
  base_lexer::handle_language_extension_op (const char *pattern, int tok,
                                                   bool bos)
  {
    lexer_debug (pattern);

    return handle_op_internal (tok, bos, false);
  }

  bool
  base_lexer::maybe_unput_comma_before_unary_op (int tok)
  {
    int prev_tok = previous_token_value ();

    bool unput_comma = false;

    if (whitespace_is_significant () && space_follows_previous_token ())
      {
        int c = text_yyinput ();
        xunput (c);

        bool space_after = (c == ' ' || c == '\t');

        if (! (prev_tok == '[' || prev_tok == '{'
               || previous_token_is_binop ()
               || ((tok == '+' || tok == '-') && space_after)))
          unput_comma = true;
      }

    return unput_comma;
  }

  int
  base_lexer::handle_unary_op (int tok, bool bos)
  {
    return maybe_unput_comma_before_unary_op (tok)
      ? -1 : handle_op_internal (tok, bos, true);
  }

  int
  base_lexer::handle_language_extension_unary_op (int tok, bool bos)
  {
    return maybe_unput_comma_before_unary_op (tok)
      ? -1 : handle_op_internal (tok, bos, false);
  }

  int
  base_lexer::handle_op_internal (int tok, bool bos, bool compat)
  {
    if (! compat)
      warn_language_extension_operator (flex_yytext ());

    push_token (new token (tok, input_line_number, current_input_column));

    current_input_column += flex_yyleng ();
    looking_for_object_index = false;
    at_beginning_of_statement = bos;

    return count_token_internal (tok);
  }

  int
  base_lexer::handle_token (const std::string& name, int tok)
  {
    token *tok_val = new token (tok, name, input_line_number,
                                current_input_column);

    return handle_token (tok, tok_val);
  }

  int
  base_lexer::handle_token (int tok, token *tok_val)
  {
    if (! tok_val)
      tok_val = new token (tok, input_line_number, current_input_column);

    push_token (tok_val);

    current_input_column += flex_yyleng ();

    return count_token_internal (tok);
  }

  int
  base_lexer::count_token (int tok)
  {
    token *tok_val = new token (tok, input_line_number, current_input_column);

    push_token (tok_val);

    return count_token_internal (tok);
  }

  int
  base_lexer::count_token_internal (int tok)
  {
    if (tok != '\n')
      {
        Vtoken_count++;
        token_count++;
      }

    return show_token (tok);
  }

  int
  base_lexer::show_token (int tok)
  {
    if (Vdisplay_tokens)
      display_token (tok);

    if (lexer_debug_flag)
      {
        std::cerr << "R: ";
        display_token (tok);
        std::cerr << std::endl;
      }

    return tok;
  }

  void
  base_lexer::enable_fq_identifier (void)
  {
    push_start_state (FQ_IDENT_START);
  }

  int
  lexer::fill_flex_buffer (char *buf, unsigned max_size)
  {
    int status = 0;

    if (input_buf.empty ())
      {
        bool eof = false;
        current_input_line = input_reader.get_input (eof);
        input_buf.fill (current_input_line, eof);
      }

    if (! input_buf.empty ())
      status = input_buf.copy_chunk (buf, max_size);
    else
      status = YY_NULL;

    return status;
  }

  int
  push_lexer::fill_flex_buffer (char *buf, unsigned max_size)
  {
    int status = 0;

    if (input_buf.empty () && ! input_buf.at_eof ())
      input_buf.fill (std::string (1, static_cast<char> (1)), false);

    if (! input_buf.empty ())
      status = input_buf.copy_chunk (buf, max_size);
    else
      status = YY_NULL;

    return status;
  }
}