changeset 16273:c5e5f6ccac5d

9/10 commits reworking the lexer
author John W. Eaton <jwe@octave.org>
date Mon, 11 Mar 2013 14:32:53 -0400
parents 87069bd38107
children 3c265e4dba6d dd7de0345124
files libinterp/parse-tree/lex.ll libinterp/parse-tree/oct-parse.in.yy libinterp/parse-tree/parse.h libinterp/parse-tree/pt-array-list.h libinterp/parse-tree/pt-cell.h libinterp/parse-tree/pt-exp.h libinterp/parse-tree/pt-mat.h
diffstat 7 files changed, 140 insertions(+), 347 deletions(-) [+]
line wrap: on
line diff
--- a/libinterp/parse-tree/lex.ll	Mon Mar 11 14:32:15 2013 -0400
+++ b/libinterp/parse-tree/lex.ll	Mon Mar 11 14:32:53 2013 -0400
@@ -51,8 +51,6 @@
 %x BLOCK_COMMENT_START
 %x LINE_COMMENT_START
 
-%x KLUGE
-
 %{
 
 #include <cctype>
@@ -253,12 +251,6 @@
       }
   }
 
-<KLUGE>@ {
-    curr_lexer->lexer_debug ("<KLUGE>@");
-    curr_lexer->pop_start_state ();
-    return curr_lexer->count_token (CHOOSE_ASSIGNMENT);
-  }
-
 %{
 // For this and the next two rules, we're looking at ']', and we
 // need to know if the next token is '=' or '=='.
@@ -802,24 +794,24 @@
 // Other operators.
 %}
 
-":"     { return curr_lexer->handle_op (":", ':'); }
-".+"    { return curr_lexer->handle_incompatible_op (".+", EPLUS); }
-".-"    { return curr_lexer->handle_incompatible_op (".-", EMINUS); }
-".*"    { return curr_lexer->handle_op (".*", EMUL); }
-"./"    { return curr_lexer->handle_op ("./", EDIV); }
-".\\"   { return curr_lexer->handle_op (".\\", ELEFTDIV); }
-".^"    { return curr_lexer->handle_op (".^", EPOW); }
-".**"   { return curr_lexer->handle_incompatible_op (".**", EPOW); }
-"<="    { return curr_lexer->handle_op ("<=", EXPR_LE); }
-"=="    { return curr_lexer->handle_op ("==", EXPR_EQ); }
-"~="    { return curr_lexer->handle_op ("~=", EXPR_NE); }
-"!="    { return curr_lexer->handle_incompatible_op ("!=", EXPR_NE); }
-">="    { return curr_lexer->handle_op (">=", EXPR_GE); }
-"&"     { return curr_lexer->handle_op ("&", EXPR_AND); }
-"|"     { return curr_lexer->handle_op ("|", EXPR_OR); }
-"<"     { return curr_lexer->handle_op ("<", EXPR_LT); }
-">"     { return curr_lexer->handle_op (">", EXPR_GT); }
-"*"     { return curr_lexer->handle_op ("*", '*'); }
+":"   { return curr_lexer->handle_op (":", ':'); }
+".+"  { return curr_lexer->handle_incompatible_op (".+", EPLUS); }
+".-"  { return curr_lexer->handle_incompatible_op (".-", EMINUS); }
+".*"  { return curr_lexer->handle_op (".*", EMUL); }
+"./"  { return curr_lexer->handle_op ("./", EDIV); }
+".\\" { return curr_lexer->handle_op (".\\", ELEFTDIV); }
+".^"  { return curr_lexer->handle_op (".^", EPOW); }
+".**" { return curr_lexer->handle_incompatible_op (".**", EPOW); }
+"<="  { return curr_lexer->handle_op ("<=", EXPR_LE); }
+"=="  { return curr_lexer->handle_op ("==", EXPR_EQ); }
+"~="  { return curr_lexer->handle_op ("~=", EXPR_NE); }
+"!="  { return curr_lexer->handle_incompatible_op ("!=", EXPR_NE); }
+">="  { return curr_lexer->handle_op (">=", EXPR_GE); }
+"&"   { return curr_lexer->handle_op ("&", EXPR_AND); }
+"|"   { return curr_lexer->handle_op ("|", EXPR_OR); }
+"<"   { return curr_lexer->handle_op ("<", EXPR_LT); }
+">"   { return curr_lexer->handle_op (">", EXPR_GT); }
+"*"   { return curr_lexer->handle_op ("*", '*'); }
 
 "/" {
     int prev_tok = curr_lexer->previous_token_value ();
@@ -838,13 +830,13 @@
       return curr_lexer->handle_op ("/", '/');
   }
 
-"\\"    { return curr_lexer->handle_op ("\\", LEFTDIV); }
-"^"     { return curr_lexer->handle_op ("^", POW); }
-"**"    { return curr_lexer->handle_incompatible_op ("**", POW); }
-"&&"    { return curr_lexer->handle_op ("&&", EXPR_AND_AND); }
-"||"    { return curr_lexer->handle_op ("||", EXPR_OR_OR); }
-"<<"    { return curr_lexer->handle_incompatible_op ("<<", LSHIFT); }
-">>"    { return curr_lexer->handle_incompatible_op (">>", RSHIFT); }
+"\\" { return curr_lexer->handle_op ("\\", LEFTDIV); }
+"^"  { return curr_lexer->handle_op ("^", POW); }
+"**" { return curr_lexer->handle_incompatible_op ("**", POW); }
+"&&" { return curr_lexer->handle_op ("&&", EXPR_AND_AND); }
+"||" { return curr_lexer->handle_op ("||", EXPR_OR_OR); }
+"<<" { return curr_lexer->handle_incompatible_op ("<<", LSHIFT); }
+">>" { return curr_lexer->handle_incompatible_op (">>", RSHIFT); }
 
 ";" {
     bool at_beginning_of_statement
@@ -1030,233 +1022,25 @@
 // = and op= operators.
 %}
 
-"=" {
-    int tok = curr_lexer->handle_assign_op ("=", '=');
-    if (tok < 0)
-      {
-        yyless (0);
-        curr_lexer->xunput ('@');
-        curr_lexer->push_start_state (KLUGE);
-      }
-    else
-      return tok;
-  }
-
-"+=" {
-    int tok = curr_lexer->handle_incompatible_assign_op ("+=", ADD_EQ);
-    if (tok < 0)
-      {
-        yyless (0);
-        curr_lexer->xunput ('@');
-        curr_lexer->push_start_state (KLUGE);
-      }
-    else
-      return tok;
-  }
-
-"-=" {
-    int tok = curr_lexer->handle_incompatible_assign_op ("-=", SUB_EQ);
-    if (tok < 0)
-      {
-        yyless (0);
-        curr_lexer->xunput ('@');
-        curr_lexer->push_start_state (KLUGE);
-      }
-    else
-      return tok;
-  }
-
-"*=" {
-    int tok = curr_lexer->handle_incompatible_assign_op ("*=", MUL_EQ);
-    if (tok < 0)
-      {
-        yyless (0);
-        curr_lexer->xunput ('@');
-        curr_lexer->push_start_state (KLUGE);
-      }
-    else
-      return tok;
-  }
-
-"/=" {
-    int tok = curr_lexer->handle_incompatible_assign_op ("/=", DIV_EQ);
-    if (tok < 0)
-      {
-        yyless (0);
-        curr_lexer->xunput ('@');
-        curr_lexer->push_start_state (KLUGE);
-      }
-    else
-      return tok;
-  }
-
-"\\=" {
-    int tok = curr_lexer->handle_incompatible_assign_op ("\\=", LEFTDIV_EQ);
-    if (tok < 0)
-      {
-        yyless (0);
-        curr_lexer->xunput ('@');
-        curr_lexer->push_start_state (KLUGE);
-      }
-    else
-      return tok;
-  }
-
-".+=" {
-    int tok = curr_lexer->handle_incompatible_assign_op (".+=", ADD_EQ);
-    if (tok < 0)
-      {
-        yyless (0);
-        curr_lexer->xunput ('@');
-        curr_lexer->push_start_state (KLUGE);
-      }
-    else
-      return tok;
-  }
-
-".-=" {
-    int tok = curr_lexer->handle_incompatible_assign_op (".-=", SUB_EQ);
-    if (tok < 0)
-      {
-        yyless (0);
-        curr_lexer->xunput ('@');
-        curr_lexer->push_start_state (KLUGE);
-      }
-    else
-      return tok;
-  }
-
-".*=" {
-    int tok = curr_lexer->handle_incompatible_assign_op (".*=", EMUL_EQ);
-    if (tok < 0)
-      {
-        yyless (0);
-        curr_lexer->xunput ('@');
-        curr_lexer->push_start_state (KLUGE);
-      }
-    else
-      return tok;
-  }
-
-"./=" {
-    int tok = curr_lexer->handle_incompatible_assign_op ("./=", EDIV_EQ);
-    if (tok < 0)
-      {
-        yyless (0);
-        curr_lexer->xunput ('@');
-        curr_lexer->push_start_state (KLUGE);
-      }
-    else
-      return tok;
-  }
-
-".\\=" {
-    int tok = curr_lexer->handle_incompatible_assign_op (".\\=", ELEFTDIV_EQ);
-    if (tok < 0)
-      {
-        yyless (0);
-        curr_lexer->xunput ('@');
-        curr_lexer->push_start_state (KLUGE);
-      }
-    else
-      return tok;
-  }
-
-"^=" {
-    int tok = curr_lexer->handle_incompatible_assign_op ("^=", POW_EQ);
-    if (tok < 0)
-      {
-        yyless (0);
-        curr_lexer->xunput ('@');
-        curr_lexer->push_start_state (KLUGE);
-      }
-    else
-      return tok;
-  }
-
-"**=" {
-    int tok = curr_lexer->handle_incompatible_assign_op ("^=", POW_EQ);
-    if (tok < 0)
-      {
-        yyless (0);
-        curr_lexer->xunput ('@');
-        curr_lexer->push_start_state (KLUGE);
-      }
-    else
-      return tok;
-  }
-
-".^=" {
-    int tok = curr_lexer->handle_incompatible_assign_op (".^=", EPOW_EQ);
-    if (tok < 0)
-      {
-        yyless (0);
-        curr_lexer->xunput ('@');
-        curr_lexer->push_start_state (KLUGE);
-      }
-    else
-      return tok;
-  }
-
-".**=" {
-    int tok = curr_lexer->handle_incompatible_assign_op (".^=", EPOW_EQ);
-    if (tok < 0)
-      {
-        yyless (0);
-        curr_lexer->xunput ('@');
-        curr_lexer->push_start_state (KLUGE);
-      }
-    else
-      return tok;
-  }
-
-"&=" {
-    int tok = curr_lexer->handle_incompatible_assign_op ("&=", AND_EQ);
-    if (tok < 0)
-      {
-        yyless (0);
-        curr_lexer->xunput ('@');
-        curr_lexer->push_start_state (KLUGE);
-      }
-    else
-      return tok;
-  }
-
-"|=" {
-    int tok = curr_lexer->handle_incompatible_assign_op ("|=", OR_EQ);
-    if (tok < 0)
-      {
-        yyless (0);
-        curr_lexer->xunput ('@');
-        curr_lexer->push_start_state (KLUGE);
-      }
-    else
-      return tok;
-  }
-
-"<<=" {
-    int tok = curr_lexer->handle_incompatible_assign_op ("<<=", LSHIFT_EQ);
-    if (tok < 0)
-      {
-        yyless (0);
-        curr_lexer->xunput ('@');
-        curr_lexer->push_start_state (KLUGE);
-      }
-    else
-      return tok;
-  }
-
-">>=" {
-    int tok = curr_lexer->handle_incompatible_assign_op (">>=", RSHIFT_EQ);
-    if (tok < 0)
-      {
-        yyless (0);
-        curr_lexer->xunput ('@');
-        curr_lexer->push_start_state (KLUGE);
-      }
-    else
-      return tok;
-  }
+"="    { return curr_lexer->handle_op ("=", '='); }
+"+="   { return curr_lexer->handle_incompatible_op ("+=", ADD_EQ); }
+"-="   { return curr_lexer->handle_incompatible_op ("-=", SUB_EQ); }
+"*="   { return curr_lexer->handle_incompatible_op ("*=", MUL_EQ); }
+"/="   { return curr_lexer->handle_incompatible_op ("/=", DIV_EQ); }
+"\\="  { return curr_lexer->handle_incompatible_op ("\\=", LEFTDIV_EQ); }
+".+="  { return curr_lexer->handle_incompatible_op (".+=", ADD_EQ); }
+".-="  { return curr_lexer->handle_incompatible_op (".-=", SUB_EQ); }
+".*="  { return curr_lexer->handle_incompatible_op (".*=", EMUL_EQ); }
+"./="  { return curr_lexer->handle_incompatible_op ("./=", EDIV_EQ); }
+".\\=" { return curr_lexer->handle_incompatible_op (".\\=", ELEFTDIV_EQ); }
+"^="   { return curr_lexer->handle_incompatible_op ("^=", POW_EQ); }
+"**="  { return curr_lexer->handle_incompatible_op ("^=", POW_EQ); }
+".^="  { return curr_lexer->handle_incompatible_op (".^=", EPOW_EQ); }
+".**=" { return curr_lexer->handle_incompatible_op (".^=", EPOW_EQ); }
+"&="   { return curr_lexer->handle_incompatible_op ("&=", AND_EQ); }
+"|="   { return curr_lexer->handle_incompatible_op ("|=", OR_EQ); }
+"<<="  { return curr_lexer->handle_incompatible_op ("<<=", LSHIFT_EQ); }
+">>="  { return curr_lexer->handle_incompatible_op (">>=", RSHIFT_EQ); }
 
 "{" {
     curr_lexer->lexer_debug ("{");
@@ -3031,48 +2815,6 @@
       return kw_token;
     }
 
-  // See if we have a plot keyword (title, using, with, or clear).
-
-  int c1 = text_yyinput ();
-
-  bool next_tok_is_eq = false;
-  if (c1 == '=')
-    {
-      int c2 = text_yyinput ();
-      xunput (c2);
-
-      if (c2 != '=')
-        next_tok_is_eq = true;
-    }
-
-  xunput (c1);
-
-  // Kluge alert.
-  //
-  // If we are looking at a text style function, set up to gobble its
-  // arguments.
-  //
-  // If the following token is '=', or if we are parsing a function
-  // return list or function parameter list, or if we are looking at
-  // something like [ab,cd] = foo (), force the symbol to be inserted
-  // as a variable in the current symbol table.
-
-  if (! is_variable (tok))
-    {
-      if (next_tok_is_eq
-          || looking_at_decl_list
-          || looking_at_return_list
-          || (looking_at_parameter_list
-              && ! looking_at_initializer_expression))
-        {
-          symbol_table::force_variable (tok);
-        }
-      else if (looking_at_matrix_or_assign_lhs)
-        {
-          pending_local_variables.insert (tok);
-        }
-    }
-
   // Find the token in the symbol table.  Beware the magic
   // transformation of the end keyword...
 
@@ -3298,7 +3040,6 @@
     case END_OF_INPUT: std::cerr << "END_OF_INPUT\n\n"; break;
     case LEXICAL_ERROR: std::cerr << "LEXICAL_ERROR\n\n"; break;
     case FCN: std::cerr << "FCN\n"; break;
-    case CHOOSE_ASSIGNMENT: std::cerr << "CHOOSE_ASSIGNMENT\n"; break;
     case INPUT_FILE: std::cerr << "INPUT_FILE\n"; break;
     case SUPERCLASSREF: std::cerr << "SUPERCLASSREF\n"; break;
     case METAQUERY: std::cerr << "METAQUERY\n"; break;
@@ -3406,10 +3147,6 @@
       std::cerr << "LINE_COMMENT_START" << std::endl;
       break;
 
-    case KLUGE:
-      std::cerr << "KLUGE" << std::endl;
-      break;
-
     default:
       std::cerr << "UNKNOWN START STATE!" << std::endl;
       break;
@@ -3478,24 +3215,6 @@
 }
 
 int
-octave_lexer::handle_assign_op (const char *pattern, int tok)
-{
-  lexer_debug (pattern);
-
-  return (previous_token_value_is (']') && looking_at_matrix_or_assign_lhs)
-    ? -1 : handle_op_internal (pattern, tok, false, false, false, true);
-}
-
-int
-octave_lexer::handle_incompatible_assign_op (const char *pattern, int tok)
-{
-  lexer_debug (pattern);
-
-  return (previous_token_value_is (']') && looking_at_matrix_or_assign_lhs)
-    ? -1 : handle_op_internal (pattern, tok, false, false, false, false);
-}
-
-int
 octave_lexer::handle_op_internal (const char *pattern, int tok, bool convert,
                                   bool bos, bool qit, bool compat)
 {
--- a/libinterp/parse-tree/oct-parse.in.yy	Mon Mar 11 14:32:15 2013 -0400
+++ b/libinterp/parse-tree/oct-parse.in.yy	Mon Mar 11 14:32:53 2013 -0400
@@ -230,7 +230,6 @@
 %token END_OF_INPUT LEXICAL_ERROR
 %token FCN INPUT_FILE CLASSDEF
 // %token VARARGIN VARARGOUT
-%token CHOOSE_ASSIGNMENT
 
 // Nonterminals we construct.
 %type <comment_type> stash_comment function_beg classdef_beg
@@ -670,21 +669,19 @@
 
 assign_lhs      : simple_expr
                   {
-                    $$ = new tree_argument_list ($1);
-                    $$->mark_as_simple_assign_lhs ();
-                  }
-                | matrix CHOOSE_ASSIGNMENT
-                  {
-                    tree_matrix *tmp = dynamic_cast<tree_matrix *> ($1);
-                    $$ = tmp->front ();
-                    curr_lexer->looking_at_matrix_or_assign_lhs = false;
-                    for (std::set<std::string>::const_iterator p = curr_lexer->pending_local_variables.begin ();
-                         p != curr_lexer->pending_local_variables.end ();
-                         p++)
+                    $$ = curr_parser.validate_matrix_for_assignment ($1);
+
+                    if ($$)
                       {
-                        symbol_table::force_variable (*p);
+                        curr_lexer->looking_at_matrix_or_assign_lhs = false;
+                        curr_lexer->pending_local_variables.clear ();
                       }
-                    curr_lexer->pending_local_variables.clear ();
+                    else
+                      {
+                        // validate_matrix_for_assignment deleted $1
+                        // for us.
+                        ABORT_PARSE;
+                      }
                   }
                 ;
 
@@ -721,7 +718,20 @@
                 ;
 
 expression      : simple_expr
-                  { $$ = $1; }
+                  {
+                    if ($1 && ($1->is_matrix () || $1->is_cell ()))
+                      {
+                        if (curr_parser.validate_array_list ($1))
+                          $$ = $1;
+                        else
+                          {
+                            delete $1;
+                            ABORT_PARSE;
+                          }
+                      }
+                    else
+                      $$ = $1;
+                  }
                 | assign_expr
                   { $$ = $1; }
                 | anon_fcn_handle
@@ -2932,12 +2942,64 @@
   return retval;
 }
 
+bool
+octave_parser::validate_array_list (tree_expression *e)
+{
+  bool retval = true;
+
+  tree_array_list *al = dynamic_cast<tree_array_list *> (e);
+
+  for (tree_array_list::iterator i = al->begin (); i != al->end (); i++)
+    {
+      tree_argument_list *row = *i;
+
+      if (row && row->has_magic_tilde ())
+        {
+          retval = false;
+          if (e->is_matrix ())
+             bison_error ("invalid use of tilde (~) in matrix expression");
+           else
+             bison_error ("invalid use of tilde (~) in cell expression");
+          break;
+        }
+    }
+
+  return retval;
+}
+
 tree_argument_list *
-octave_parser::validate_matrix_row (tree_argument_list *row)
+octave_parser::validate_matrix_for_assignment (tree_expression *e)
 {
-  if (row && row->has_magic_tilde ())
-    bison_error ("invalid use of tilde (~) in matrix expression");
-  return row;
+  tree_argument_list *retval = 0;
+
+  if (e->is_constant ())
+    {
+      bison_error ("invalid empty LHS in [] = ... assignment");
+      delete e;
+    }
+  else if (e->is_matrix ())
+    {
+      tree_matrix *mat = dynamic_cast<tree_matrix *> (e);
+
+      if (mat && mat->size () == 1)
+        {
+          retval = mat->front ();
+          mat->pop_front ();
+          delete e;
+        }
+      else
+        {
+          bison_error ("invalid LHS in '[LHS] = ...' assignment");
+          delete e;
+        }
+    }
+  else
+    {
+      retval = new tree_argument_list (e);
+      retval->mark_as_simple_assign_lhs ();
+    }
+
+  return retval;
 }
 
 // Finish building an array_list.
--- a/libinterp/parse-tree/parse.h	Mon Mar 11 14:32:15 2013 -0400
+++ b/libinterp/parse-tree/parse.h	Mon Mar 11 14:32:53 2013 -0400
@@ -328,8 +328,11 @@
   tree_decl_command *
   make_decl_command (int tok, token *tok_val, tree_decl_init_list *lst);
 
-  // Validate argument list forming a matrix or cell row.
-  tree_argument_list *validate_matrix_row (tree_argument_list *row);
+  // Validate matrix or cell
+  bool validate_array_list (tree_expression *e);
+
+  // Validate matrix object used in "[lhs] = ..." assignments.
+  tree_argument_list *validate_matrix_for_assignment (tree_expression *e);
 
   // Finish building an array_list (common action for finish_matrix
   // and finish_cell).
--- a/libinterp/parse-tree/pt-array-list.h	Mon Mar 11 14:32:15 2013 -0400
+++ b/libinterp/parse-tree/pt-array-list.h	Mon Mar 11 14:32:53 2013 -0400
@@ -36,6 +36,9 @@
 {
 public:
 
+  typedef octave_base_list<tree_argument_list *>::iterator iterator;
+  typedef octave_base_list<tree_argument_list *>::const_iterator const_iterator;
+
   tree_array_list (tree_argument_list *row = 0, int l = -1, int c = -1)
     : tree_expression (l, c), octave_base_list<tree_argument_list *> ()
   {
--- a/libinterp/parse-tree/pt-cell.h	Mon Mar 11 14:32:15 2013 -0400
+++ b/libinterp/parse-tree/pt-cell.h	Mon Mar 11 14:32:53 2013 -0400
@@ -47,6 +47,8 @@
 
   ~tree_cell (void) { }
 
+  bool is_cell (void) const { return true; }
+
   bool rvalue_ok (void) const { return true; }
 
   octave_value rvalue1 (int nargout = 1);
--- a/libinterp/parse-tree/pt-exp.h	Mon Mar 11 14:32:15 2013 -0400
+++ b/libinterp/parse-tree/pt-exp.h	Mon Mar 11 14:32:53 2013 -0400
@@ -52,7 +52,9 @@
 
   virtual bool is_constant (void) const { return false; }
 
-  virtual bool is_matrix_constant (void) const { return false; }
+  virtual bool is_matrix (void) const { return false; }
+
+  virtual bool is_cell (void) const { return false; }
 
   virtual bool is_identifier (void) const { return false; }
 
--- a/libinterp/parse-tree/pt-mat.h	Mon Mar 11 14:32:15 2013 -0400
+++ b/libinterp/parse-tree/pt-mat.h	Mon Mar 11 14:32:53 2013 -0400
@@ -50,6 +50,8 @@
 
   ~tree_matrix (void) { }
 
+  bool is_matrix (void) const { return true; }
+
   bool rvalue_ok (void) const { return true; }
 
   octave_value rvalue1 (int nargout = 1);