diff libinterp/parse-tree/lex.ll @ 16259:0b5ab09dfce4

2/10 commits reworking the lexer
author John W. Eaton <jwe@octave.org>
date Mon, 11 Mar 2013 14:18:39 -0400
parents db7f07b22b9b
children 6c211b8cfbd9 b45a90cdb0ae
line wrap: on
line diff
--- a/libinterp/parse-tree/lex.ll	Mon Mar 11 14:14:41 2013 -0400
+++ b/libinterp/parse-tree/lex.ll	Mon Mar 11 14:18:39 2013 -0400
@@ -194,8 +194,6 @@
     curr_lexer->input_line_number++;
     curr_lexer->current_input_column = 1;
 
-    curr_lexer->quote_is_transpose = false;
-    curr_lexer->convert_spaces_to_comma = true;
     curr_lexer->looking_for_object_index = false;
     curr_lexer->at_beginning_of_statement = true;
 
@@ -278,18 +276,12 @@
 <MATRIX_START>\] {
     curr_lexer->lexer_debug ("<MATRIX_START>\\]");
 
-    curr_lexer->scan_for_comments (yytext);
-    curr_lexer->fixup_column_count (yytext);
-
     curr_lexer->looking_at_object_index.pop_front ();
 
     curr_lexer->looking_for_object_index = true;
     curr_lexer->at_beginning_of_statement = false;
 
-    int c = yytext[yyleng-1];
-    bool cont_is_spc = (curr_lexer->eat_continuation () != octave_lexer::NO_WHITESPACE);
-    bool spc_gobbled = (cont_is_spc || c == ' ' || c == '\t');
-    int tok_to_return = curr_lexer->handle_close_bracket (spc_gobbled, ']');
+    int tok_to_return = curr_lexer->handle_close_bracket (']');
 
     return curr_lexer->count_token (']');
   }
@@ -301,18 +293,12 @@
 <MATRIX_START>\} {
     curr_lexer->lexer_debug ("<MATRIX_START>\\}*");
 
-    curr_lexer->scan_for_comments (yytext);
-    curr_lexer->fixup_column_count (yytext);
-
     curr_lexer->looking_at_object_index.pop_front ();
 
     curr_lexer->looking_for_object_index = true;
     curr_lexer->at_beginning_of_statement = false;
 
-    int c = yytext[yyleng-1];
-    bool cont_is_spc = (curr_lexer->eat_continuation () != octave_lexer::NO_WHITESPACE);
-    bool spc_gobbled = (cont_is_spc || c == ' ' || c == '\t');
-    int tok_to_return = curr_lexer->handle_close_bracket (spc_gobbled, '}');
+    int tok_to_return = curr_lexer->handle_close_bracket ('}');
 
     return curr_lexer->count_token ('}');
   }
@@ -325,8 +311,6 @@
     curr_lexer->looking_at_object_index.push_front (false);
 
     curr_lexer->current_input_column += yyleng;
-    curr_lexer->quote_is_transpose = false;
-    curr_lexer->convert_spaces_to_comma = true;
     curr_lexer->looking_for_object_index = false;
     curr_lexer->at_beginning_of_statement = false;
 
@@ -574,16 +558,18 @@
   }
 
 %{
-// Identifiers.  Truncate the token at the first space or tab but
-// don't write directly on yytext.
+// Identifiers.
 %}
 
 {IDENT} {
     curr_lexer->lexer_debug ("{IDENT}");
 
+    int tok = curr_lexer->previous_token_value ();
+
     if (curr_lexer->whitespace_is_significant ()
         && curr_lexer->space_follows_previous_token ()
-        && ! curr_lexer->previous_token_is_binop ())
+        && ! (tok == '[' || tok == '{'
+              || curr_lexer->previous_token_is_binop ()))
       {
         yyless (0);
         unput (',');
@@ -650,8 +636,6 @@
 
     curr_lexer->current_input_column++;
 
-    curr_lexer->quote_is_transpose = false;
-    curr_lexer->convert_spaces_to_comma = false;
     curr_lexer->looking_at_function_handle++;
     curr_lexer->looking_for_object_index = false;
     curr_lexer->at_beginning_of_statement = false;
@@ -672,9 +656,6 @@
     curr_lexer->input_line_number++;
     curr_lexer->current_input_column = 1;
 
-    curr_lexer->quote_is_transpose = false;
-    curr_lexer->convert_spaces_to_comma = true;
-
     if (curr_lexer->nesting_level.none ())
       {
         curr_lexer->at_beginning_of_statement = true;
@@ -799,19 +780,16 @@
   }
 
 ".'" {
-    curr_lexer->do_comma_insert_check ();
     return curr_lexer->handle_op (".'", TRANSPOSE, true, false);
   }
 
 "++" {
-    curr_lexer->do_comma_insert_check ();
     return curr_lexer->handle_incompatible_op
       ("++", PLUS_PLUS, true, false, true);
   }
 
 "--" {
     ;
-    curr_lexer->do_comma_insert_check ();
     return curr_lexer->handle_incompatible_op
       ("--", MINUS_MINUS, true, false, true);
   }
@@ -845,18 +823,12 @@
 
     curr_lexer->looking_at_object_index.pop_front ();
 
-    curr_lexer->quote_is_transpose = true;
-    curr_lexer->convert_spaces_to_comma
-      = (curr_lexer->nesting_level.is_bracket_or_brace ()
-         && ! curr_lexer->looking_at_anon_fcn_args);
     curr_lexer->looking_for_object_index = true;
     curr_lexer->at_beginning_of_statement = false;
 
     if (curr_lexer->looking_at_anon_fcn_args)
       curr_lexer->looking_at_anon_fcn_args = false;
 
-    curr_lexer->do_comma_insert_check ();
-
     return curr_lexer->count_token (')');
   }
 
@@ -1110,13 +1082,10 @@
       (curr_lexer->looking_for_object_index);
 
     curr_lexer->current_input_column += yyleng;
-    curr_lexer->quote_is_transpose = false;
-    curr_lexer->convert_spaces_to_comma = true;
     curr_lexer->looking_for_object_index = false;
     curr_lexer->at_beginning_of_statement = false;
 
     curr_lexer->decrement_promptflag ();
-    curr_lexer->eat_whitespace ();
 
     curr_lexer->braceflag++;
 
@@ -1506,8 +1475,6 @@
 lexical_feedback::reset (void)
 {
   end_of_input = false;
-  convert_spaces_to_comma = true;
-  do_comma_insert = false;
   at_beginning_of_statement = true;
   looking_at_anon_fcn_args = false;
   looking_at_return_list = false;
@@ -1520,7 +1487,6 @@
   parsing_class_method = false;
   maybe_classdef_get_set_method = false;
   parsing_classdef = false;
-  quote_is_transpose = false;
   force_script = false;
   reading_fcn_file = false;
   reading_script_file = false;
@@ -1784,27 +1750,6 @@
   return yyget_leng (scanner);
 }
 
-// GAG.
-//
-// If we're reading a matrix and the next character is '[', make sure
-// that we insert a comma ahead of it.
-
-void
-octave_lexer::do_comma_insert_check (void)
-{
-  bool spc_gobbled = (eat_continuation () != octave_lexer::NO_WHITESPACE);
-
-  int c = text_yyinput ();
-
-  xunput (c);
-
-  if (spc_gobbled)
-    xunput (' ');
-
-  do_comma_insert = (! looking_at_object_index.front ()
-                     && bracketflag && c == '[');
-}
-
 int
 octave_lexer::text_yyinput (void)
 {
@@ -1870,25 +1815,6 @@
   xunput (c, yytxt);
 }
 
-// If we read some newlines, we need figure out what column we're
-// really looking at.
-
-void
-octave_lexer::fixup_column_count (char *s)
-{
-  char c;
-  while ((c = *s++) != '\0')
-    {
-      if (c == '\n')
-        {
-          input_line_number++;
-          current_input_column = 1;
-        }
-      else
-        current_input_column++;
-    }
-}
-
 bool
 octave_lexer::inside_any_object_index (void)
 {
@@ -2147,342 +2073,6 @@
               != pending_local_variables.end ()));
 }
 
-// Recognize separators.  If the separator is a CRLF pair, it is
-// replaced by a single LF.
-
-bool
-octave_lexer::next_token_is_sep_op (void)
-{
-  bool retval = false;
-
-  int c = text_yyinput ();
-
-  retval = match_any (c, ",;\n]");
-
-  xunput (c);
-
-  return retval;
-}
-
-// Try to determine if the next token should be treated as a postfix
-// unary operator.  This is ugly, but it seems to do the right thing.
-
-bool
-octave_lexer::next_token_is_postfix_unary_op (bool spc_prev)
-{
-  bool un_op = false;
-
-  int c0 = text_yyinput ();
-
-  if (c0 == '\'' && ! spc_prev)
-    {
-      un_op = true;
-    }
-  else if (c0 == '.')
-    {
-      int c1 = text_yyinput ();
-      un_op = (c1 == '\'');
-      xunput (c1);
-    }
-  else if (c0 == '+')
-    {
-      int c1 = text_yyinput ();
-      un_op = (c1 == '+');
-      xunput (c1);
-    }
-  else if (c0 == '-')
-    {
-      int c1 = text_yyinput ();
-      un_op = (c1 == '-');
-      xunput (c1);
-    }
-
-  xunput (c0);
-
-  return un_op;
-}
-
-// Try to determine if the next token should be treated as a binary
-// operator.
-//
-// This kluge exists because whitespace is not always ignored inside
-// the square brackets that are used to create matrix objects (though
-// spacing only really matters in the cases that can be interpreted
-// either as binary ops or prefix unary ops: currently just +, -).
-//
-// Note that a line continuation directly following a + or - operator
-// (e.g., the characters '[' 'a' ' ' '+' '\' LFD 'b' ']') will be
-// parsed as a binary operator.
-
-bool
-octave_lexer::next_token_is_bin_op (bool spc_prev)
-{
-  bool bin_op = false;
-
-  int c0 = text_yyinput ();
-
-  switch (c0)
-    {
-    case '+':
-    case '-':
-      {
-        int c1 = text_yyinput ();
-
-        switch (c1)
-          {
-          case '+':
-          case '-':
-            // Unary ops, spacing doesn't matter.
-            break;
-
-          case '=':
-            // Binary ops, spacing doesn't matter.
-            bin_op = true;
-            break;
-
-          default:
-            // Could be either, spacing matters.
-            bin_op = looks_like_bin_op (spc_prev, c1);
-            break;
-          }
-
-        xunput (c1);
-      }
-      break;
-
-    case ':':
-    case '/':
-    case '\\':
-    case '^':
-      // Always a binary op (may also include /=, \=, and ^=).
-      bin_op = true;
-      break;
-
-    // .+ .- ./ .\ .^ .* .**
-    case '.':
-      {
-        int c1 = text_yyinput ();
-
-        if (match_any (c1, "+-/\\^*"))
-          // Always a binary op (may also include .+=, .-=, ./=, ...).
-          bin_op = true;
-        else if (! isdigit (c1) && c1 != ' ' && c1 != '\t' && c1 != '.')
-          // A structure element reference is a binary op.
-          bin_op = true;
-
-        xunput (c1);
-      }
-      break;
-
-    // = == & && | || * **
-    case '=':
-    case '&':
-    case '|':
-    case '*':
-      // Always a binary op (may also include ==, &&, ||, **).
-      bin_op = true;
-      break;
-
-    // < <= <> > >=
-    case '<':
-    case '>':
-      // Always a binary op (may also include <=, <>, >=).
-      bin_op = true;
-      break;
-
-    // ~= !=
-    case '~':
-    case '!':
-      {
-        int c1 = text_yyinput ();
-
-        // ~ and ! can be unary ops, so require following =.
-        if (c1 == '=')
-          bin_op = true;
-
-        xunput (c1);
-      }
-      break;
-
-    default:
-      break;
-    }
-
-  xunput (c0);
-
-  return bin_op;
-}
-
-// FIXME -- we need to handle block comments here.
-
-void
-octave_lexer::scan_for_comments (const char *text)
-{
-  std::string comment_buf;
-
-  bool in_comment = false;
-  bool beginning_of_comment = false;
-
-  int len = strlen (text);
-  int i = 0;
-
-  while (i < len)
-    {
-      char c = text[i++];
-
-      switch (c)
-        {
-        case '%':
-        case '#':
-          if (in_comment)
-            {
-              if (! beginning_of_comment)
-                comment_buf += static_cast<char> (c);
-            }
-          else
-            {
-              maybe_gripe_matlab_incompatible_comment (c);
-              in_comment = true;
-              beginning_of_comment = true;
-            }
-          break;
-
-        case '\n':
-          if (in_comment)
-            {
-              comment_buf += static_cast<char> (c);
-              octave_comment_buffer::append (comment_buf);
-              comment_buf.resize (0);
-              in_comment = false;
-              beginning_of_comment = false;
-            }
-          break;
-
-        default:
-          if (in_comment)
-            {
-              comment_buf += static_cast<char> (c);
-              beginning_of_comment = false;
-            }
-          break;
-        }
-    }
-
-  if (! comment_buf.empty ())
-    octave_comment_buffer::append (comment_buf);
-}
-
-// Discard whitespace, including comments and continuations.
-
-// FIXME -- we need to handle block comments here.
-
-int
-octave_lexer::eat_whitespace (void)
-{
-  int retval = octave_lexer::NO_WHITESPACE;
-
-  std::string comment_buf;
-
-  bool in_comment = false;
-  bool beginning_of_comment = false;
-
-  int c = 0;
-
-  while ((c = text_yyinput ()) != EOF)
-    {
-      current_input_column++;
-
-      switch (c)
-        {
-        case ' ':
-        case '\t':
-          if (in_comment)
-            {
-              comment_buf += static_cast<char> (c);
-              beginning_of_comment = false;
-            }
-          retval |= octave_lexer::SPACE_OR_TAB;
-          break;
-
-        case '\n':
-          retval |= octave_lexer::NEWLINE;
-          if (in_comment)
-            {
-              comment_buf += static_cast<char> (c);
-              octave_comment_buffer::append (comment_buf);
-              comment_buf.resize (0);
-              in_comment = false;
-              beginning_of_comment = false;
-            }
-          current_input_column = 0;
-          break;
-
-        case '#':
-        case '%':
-          if (in_comment)
-            {
-              if (! beginning_of_comment)
-                comment_buf += static_cast<char> (c);
-            }
-          else
-            {
-              maybe_gripe_matlab_incompatible_comment (c);
-              in_comment = true;
-              beginning_of_comment = true;
-            }
-          break;
-
-        case '.':
-          if (in_comment)
-            {
-              comment_buf += static_cast<char> (c);
-              beginning_of_comment = false;
-              break;
-            }
-          else
-            {
-              if (have_ellipsis_continuation ())
-                break;
-              else
-                goto done;
-            }
-
-        case '\\':
-          if (in_comment)
-            {
-              comment_buf += static_cast<char> (c);
-              beginning_of_comment = false;
-              break;
-            }
-          else
-            {
-              if (have_continuation ())
-                break;
-              else
-                goto done;
-            }
-
-        default:
-          if (in_comment)
-            {
-              comment_buf += static_cast<char> (c);
-              beginning_of_comment = false;
-              break;
-            }
-          else
-            goto done;
-        }
-    }
-
-  if (! comment_buf.empty ())
-    octave_comment_buffer::append (comment_buf);
-
- done:
-  xunput (c);
-  current_input_column--;
-  return retval;
-}
-
 bool
 octave_lexer::whitespace_is_significant (void)
 {
@@ -2531,8 +2121,6 @@
 
   assert (nread == 1);
 
-  quote_is_transpose = true;
-  convert_spaces_to_comma = true;
   looking_for_object_index = false;
   at_beginning_of_statement = false;
 
@@ -2540,8 +2128,6 @@
                          current_input_column));
 
   current_input_column += flex_yyleng ();
-
-  do_comma_insert_check ();
 }
 
 void
@@ -2614,8 +2200,6 @@
 
   comment_text = "";
 
-  quote_is_transpose = false;
-  convert_spaces_to_comma = true;
   at_beginning_of_statement = true;
 
   if (! looking_at_continuation)
@@ -2742,25 +2326,6 @@
   return false;
 }
 
-// See if we have a continuation line.  If so, eat it and the leading
-// whitespace on the next line.
-
-int
-octave_lexer::eat_continuation (void)
-{
-  int retval = octave_lexer::NO_WHITESPACE;
-
-  int c = text_yyinput ();
-
-  if ((c == '.' && have_ellipsis_continuation ())
-      || (c == '\\' && have_continuation ()))
-    retval = eat_whitespace ();
-  else
-    xunput (c);
-
-  return retval;
-}
-
 int
 octave_lexer::handle_string (char delim)
 {
@@ -2826,9 +2391,6 @@
                   else
                     s = do_string_escapes (buf.str ());
 
-                  quote_is_transpose = true;
-                  convert_spaces_to_comma = true;
-
                   if (delim == '"')
                     gripe_matlab_incompatible ("\" used as string delimiter");
                   else if (delim == '\'')
@@ -2856,100 +2418,8 @@
   return LEXICAL_ERROR;
 }
 
-bool
-octave_lexer::next_token_is_assign_op (void)
-{
-  bool retval = false;
-
-  int c0 = text_yyinput ();
-
-  switch (c0)
-    {
-    case '=':
-      {
-        int c1 = text_yyinput ();
-        xunput (c1);
-        if (c1 != '=')
-          retval = true;
-      }
-      break;
-
-    case '+':
-    case '-':
-    case '*':
-    case '/':
-    case '\\':
-    case '&':
-    case '|':
-      {
-        int c1 = text_yyinput ();
-        xunput (c1);
-        if (c1 == '=')
-          retval = true;
-      }
-      break;
-
-    case '.':
-      {
-        int c1 = text_yyinput ();
-        if (match_any (c1, "+-*/\\"))
-          {
-            int c2 = text_yyinput ();
-            xunput (c2);
-            if (c2 == '=')
-              retval = true;
-          }
-        xunput (c1);
-      }
-      break;
-
-    case '>':
-      {
-        int c1 = text_yyinput ();
-        if (c1 == '>')
-          {
-            int c2 = text_yyinput ();
-            xunput (c2);
-            if (c2 == '=')
-              retval = true;
-          }
-        xunput (c1);
-      }
-      break;
-
-    case '<':
-      {
-        int c1 = text_yyinput ();
-        if (c1 == '<')
-          {
-            int c2 = text_yyinput ();
-            xunput (c2);
-            if (c2 == '=')
-              retval = true;
-          }
-        xunput (c1);
-      }
-      break;
-
-    default:
-      break;
-    }
-
-  xunput (c0);
-
-  return retval;
-}
-
-bool
-octave_lexer::next_token_is_index_op (void)
-{
-  int c = text_yyinput ();
-  xunput (c);
-  return c == '(' || c == '{';
-}
-
 int
-octave_lexer::handle_close_bracket (bool spc_gobbled, int bracket_type)
+octave_lexer::handle_close_bracket (int bracket_type)
 {
   int retval = bracket_type;
 
@@ -2967,51 +2437,9 @@
 
   pop_start_state ();
 
-  quote_is_transpose = true;
-  convert_spaces_to_comma = true;
-
   return retval;
 }
 
-void
-octave_lexer::maybe_unput_comma (int spc_gobbled)
-{
-  if (nesting_level.is_bracket ()
-      || (nesting_level.is_brace ()
-          && ! looking_at_object_index.front ()))
-    {
-      int bin_op = next_token_is_bin_op (spc_gobbled);
-
-      int postfix_un_op = next_token_is_postfix_unary_op (spc_gobbled);
-
-      int c1 = text_yyinput ();
-      int c2 = text_yyinput ();
-
-      xunput (c2);
-      xunput (c1);
-
-      int sep_op = next_token_is_sep_op ();
-
-      int dot_op = (c1 == '.'
-                    && (isalpha (c2) || isspace (c2) || c2 == '_'));
-
-      if (postfix_un_op || bin_op || sep_op || dot_op)
-        return;
-
-      int index_op = (c1 == '(' || c1 == '{');
-
-      // If there is no space before the indexing op, we don't insert
-      // a comma.
-
-      if (index_op && ! spc_gobbled)
-        return;
-
-      maybe_warn_separator_insert (',');
-
-      xunput (',');
-    }
-}
-
 bool
 octave_lexer::next_token_can_follow_bin_op (void)
 {
@@ -3279,8 +2707,6 @@
 int
 octave_lexer::handle_superclass_identifier (void)
 {
-  eat_continuation ();
-
   std::string pkg;
   char *yytxt = flex_yytext ();
   std::string meth = strip_trailing_whitespace (yytxt);
@@ -3309,7 +2735,6 @@
                          pkg.empty () ? 0 : &(symbol_table::insert (pkg)),
                          input_line_number, current_input_column));
 
-  convert_spaces_to_comma = true;
   current_input_column += flex_yyleng ();
 
   return SUPERCLASSREF;
@@ -3318,8 +2743,6 @@
 int
 octave_lexer::handle_meta_identifier (void)
 {
-  eat_continuation ();
-
   std::string pkg;
   char *yytxt = flex_yytext ();
   std::string cls = strip_trailing_whitespace (yytxt).substr (1);
@@ -3343,7 +2766,6 @@
                          pkg.empty () ? 0 : &(symbol_table::insert (pkg)),
                          input_line_number, current_input_column));
 
-  convert_spaces_to_comma = true;
   current_input_column += flex_yyleng ();
 
   return METAQUERY;
@@ -3371,15 +2793,9 @@
 
   if (looking_at_indirect_ref)
     {
-      //      do_comma_insert_check ();
-
-      //      maybe_unput_comma (spc_gobbled);
-
       push_token (new token (STRUCT_ELT, tok, input_line_number,
                              current_input_column));
 
-      quote_is_transpose = true;
-      convert_spaces_to_comma = true;
       looking_for_object_index = true;
 
       current_input_column += flex_yyleng ();
@@ -3415,8 +2831,6 @@
                                  current_input_column));
 
           current_input_column += flex_yyleng ();
-          quote_is_transpose = false;
-          convert_spaces_to_comma = true;
           looking_for_object_index = true;
 
           at_beginning_of_statement = false;
@@ -3433,8 +2847,6 @@
       if (kw_token >= 0)
         {
           current_input_column += flex_yyleng ();
-          quote_is_transpose = false;
-          convert_spaces_to_comma = true;
           looking_for_object_index = false;
         }
 
@@ -3865,8 +3277,6 @@
   push_token (new token (tok, input_line_number, current_input_column));
 
   current_input_column += flex_yyleng ();
-  quote_is_transpose = qit;
-  convert_spaces_to_comma = convert;
   looking_for_object_index = false;
   at_beginning_of_statement = bos;
 
@@ -3891,8 +3301,6 @@
   push_token (tok_val);
 
   current_input_column += flex_yyleng ();
-  quote_is_transpose = false;
-  convert_spaces_to_comma = true;
 
   return count_token_internal (tok);
 }