changeset 16263:9acb86e6ac90

4/10 commits reworking the lexer
author John W. Eaton <jwe@octave.org>
date Mon, 11 Mar 2013 14:28:11 -0400
parents b45a90cdb0ae
children 6077d13ddb3b 71ee3afedb69
files libinterp/parse-tree/lex.h libinterp/parse-tree/lex.ll libinterp/parse-tree/oct-parse.in.yy test/fntests.m
diffstat 4 files changed, 274 insertions(+), 79 deletions(-) [+]
line wrap: on
line diff
--- a/libinterp/parse-tree/lex.h	Mon Mar 11 14:26:29 2013 -0400
+++ b/libinterp/parse-tree/lex.h	Mon Mar 11 14:28:11 2013 -0400
@@ -588,6 +588,15 @@
                               bool convert = false, bool bos = false,
                               bool qit = false);
 
+  bool maybe_unput_comma_before_unary_op (int tok);
+
+  int handle_unary_op (const char *pattern, int tok, bool convert = false,
+                       bool bos = false, bool qit = false);
+
+  int handle_incompatible_unary_op (const char *pattern, int tok,
+                                    bool convert = false, bool bos = false,
+                                    bool qit = false);
+
   int handle_assign_op (const char *pattern, int tok);
 
   int handle_incompatible_assign_op (const char *pattern, int tok);
--- a/libinterp/parse-tree/lex.ll	Mon Mar 11 14:26:29 2013 -0400
+++ b/libinterp/parse-tree/lex.ll	Mon Mar 11 14:28:11 2013 -0400
@@ -244,8 +244,8 @@
 
     int tok = curr_lexer->previous_token_value ();
 
-    if (! (tok == ',' || tok == ';' || tok == '[' || tok == '{'))
-      curr_lexer->xunput (',');
+    if (! (tok == ';' || tok == '[' || tok == '{'))
+      curr_lexer->xunput (';');
   }
 
 <KLUGE>@ {
@@ -301,27 +301,47 @@
 \[ {
     curr_lexer->lexer_debug ("\\[");
 
-    curr_lexer->nesting_level.bracket ();
-
-    curr_lexer->looking_at_object_index.push_front (false);
-
-    curr_lexer->current_input_column += yyleng;
-    curr_lexer->looking_for_object_index = false;
-    curr_lexer->at_beginning_of_statement = false;
-
-    if (curr_lexer->defining_func
-        && ! curr_lexer->parsed_function_name.top ())
-      curr_lexer->looking_at_return_list = true;
+    bool unput_comma = false;
+
+    if (curr_lexer->whitespace_is_significant ()
+        && curr_lexer->space_follows_previous_token ())
+      {
+        int tok = curr_lexer->previous_token_value ();
+
+        if (! (tok == ';' || tok == ',' || tok == '[' || tok == '{'
+               || curr_lexer->previous_token_is_binop ()))
+          unput_comma = true;
+      }
+
+    if (unput_comma)
+      {
+        yyless (0);
+        curr_lexer->xunput (',');
+      }
     else
-      curr_lexer->looking_at_matrix_or_assign_lhs = true;
-
-    curr_lexer->decrement_promptflag ();
-
-    curr_lexer->bracketflag++;
-
-    curr_lexer->push_start_state (MATRIX_START);
-
-    return curr_lexer->count_token ('[');
+      {
+        curr_lexer->nesting_level.bracket ();
+
+        curr_lexer->looking_at_object_index.push_front (false);
+
+        curr_lexer->current_input_column += yyleng;
+        curr_lexer->looking_for_object_index = false;
+        curr_lexer->at_beginning_of_statement = false;
+
+        if (curr_lexer->defining_func
+            && ! curr_lexer->parsed_function_name.top ())
+          curr_lexer->looking_at_return_list = true;
+        else
+          curr_lexer->looking_at_matrix_or_assign_lhs = true;
+
+        curr_lexer->decrement_promptflag ();
+
+        curr_lexer->bracketflag++;
+
+        curr_lexer->push_start_state (MATRIX_START);
+
+        return curr_lexer->count_token ('[');
+      }
   }
 
 \] {
@@ -485,9 +505,12 @@
 {NUMBER}{Im} {
     curr_lexer->lexer_debug ("{NUMBER}{Im}");
 
+    int tok = curr_lexer->previous_token_value ();
+
     if (curr_lexer->whitespace_is_significant ()
         && curr_lexer->space_follows_previous_token ()
-        && ! curr_lexer->previous_token_is_binop ())
+        && ! (tok == '[' || tok == '{'
+              || curr_lexer->previous_token_is_binop ()))
       {
         yyless (0);
         unput (',');
@@ -508,9 +531,12 @@
 {NUMBER} {
     curr_lexer->lexer_debug ("{D}+/\\.[\\*/\\^\\']|{NUMBER}");
 
+    int tok = curr_lexer->previous_token_value ();
+
     if (curr_lexer->whitespace_is_significant ()
         && curr_lexer->space_follows_previous_token ()
-        && ! curr_lexer->previous_token_is_binop ())
+        && ! (tok == '[' || tok == '{'
+              || curr_lexer->previous_token_is_binop ()))
       {
         yyless (0);
         unput (',');
@@ -571,7 +597,8 @@
       }
     else
       {
-        if (curr_lexer->previous_token_may_be_command ())
+        if (! curr_lexer->looking_at_decl_list
+            && curr_lexer->previous_token_may_be_command ())
           {
             yyless (0);
             curr_lexer->push_start_state (COMMAND_START);
@@ -696,7 +723,7 @@
           }
         else
           {
-            if (tok == ',' || tok == ';'
+            if (tok == ',' || tok == ';' || tok == '[' || tok == '{'
                 || curr_lexer->previous_token_is_binop ())
               {
                 curr_lexer->current_input_column++;
@@ -736,7 +763,7 @@
       {
         if (curr_lexer->space_follows_previous_token ())
           {
-            if (tok == '[' || tok == '{'
+            if (tok == ',' || tok == ';' || tok == '[' || tok == '{'
                 || curr_lexer->previous_token_is_binop ())
               {
                 curr_lexer->current_input_column++;
@@ -785,8 +812,6 @@
 "|"     { return curr_lexer->handle_op ("|", EXPR_OR); }
 "<"     { return curr_lexer->handle_op ("<", EXPR_LT); }
 ">"     { return curr_lexer->handle_op (">", EXPR_GT); }
-"+"     { return curr_lexer->handle_op ("+", '+'); }
-"-"     { return curr_lexer->handle_op ("-", '-'); }
 "*"     { return curr_lexer->handle_op ("*", '*'); }
 "/"     { return curr_lexer->handle_op ("/", '/'); }
 "\\"    { return curr_lexer->handle_op ("\\", LEFTDIV); }
@@ -796,10 +821,71 @@
 "||"    { return curr_lexer->handle_op ("||", EXPR_OR_OR); }
 "<<"    { return curr_lexer->handle_incompatible_op ("<<", LSHIFT); }
 ">>"    { return curr_lexer->handle_incompatible_op (">>", RSHIFT); }
-"~"     { return curr_lexer->handle_op ("~", EXPR_NOT); }
-"!"     { return curr_lexer->handle_incompatible_op ("!", EXPR_NOT); }
 ";"     { return curr_lexer->handle_op (";", ';', true, true); }
 
+"+" {
+   int tok = curr_lexer->handle_unary_op ("+", '+');
+
+    if (tok < 0)
+      {
+        yyless (0);
+        curr_lexer->xunput (',');
+      }
+    else
+      return tok;
+  }
+
+"-" {
+    int prev_tok = curr_lexer->previous_token_value ();
+    bool space_before = curr_lexer->space_follows_previous_token ();
+    int c = curr_lexer->text_yyinput ();
+    curr_lexer->xunput (c);
+    bool space_after = (c == ' ' || c == '\t');
+
+    if (space_before && ! space_after
+        && curr_lexer->previous_token_may_be_command ())
+      {
+        yyless (0);
+        curr_lexer->push_start_state (COMMAND_START);
+      }
+    else
+      {
+        int tok = curr_lexer->handle_unary_op ("-", '-');
+
+        if (tok < 0)
+          {
+            yyless (0);
+            curr_lexer->xunput (',');
+          }
+        else
+          return tok;
+      }
+  }
+
+"~" {
+    int tok = curr_lexer->handle_unary_op ("~", EXPR_NOT);
+
+    if (tok < 0)
+      {
+        yyless (0);
+        curr_lexer->xunput (',');
+      }
+    else
+      return tok;
+  }
+
+"!" {
+    int tok = curr_lexer->handle_incompatible_unary_op ("!", EXPR_NOT);
+
+    if (tok < 0)
+      {
+        yyless (0);
+        curr_lexer->xunput (',');
+      }
+    else
+      return tok;
+  }
+
 "," {
     return curr_lexer->handle_op
       (",", ',', true, ! curr_lexer->looking_at_object_index.front ());
@@ -810,35 +896,70 @@
   }
 
 "++" {
-    return curr_lexer->handle_incompatible_op
-      ("++", PLUS_PLUS, true, false, true);
+    int tok = curr_lexer->handle_incompatible_unary_op
+                ("++", PLUS_PLUS, true, false, true);
+
+    if (tok < 0)
+      {
+        yyless (0);
+        curr_lexer->xunput (',');
+      }
+    else
+      return tok;
   }
 
 "--" {
-    ;
-    return curr_lexer->handle_incompatible_op
-      ("--", MINUS_MINUS, true, false, true);
+    int tok = curr_lexer->handle_incompatible_unary_op
+                ("--", MINUS_MINUS, true, false, true);
+
+    if (tok < 0)
+      {
+        yyless (0);
+        curr_lexer->xunput (',');
+      }
+    else
+      return tok;
   }
 
 "(" {
     curr_lexer->lexer_debug ("(");
 
-    // If we are looking for an object index, then push TRUE for
-    // looking_at_object_index.  Otherwise, just push whatever state
-    // is current (so that we can pop it off the stack when we find
-    // the matching close paren).
-
-    curr_lexer->looking_at_object_index.push_front
-      (curr_lexer->looking_for_object_index);
-
-    curr_lexer->looking_at_indirect_ref = false;
-    curr_lexer->looking_for_object_index = false;
-    curr_lexer->at_beginning_of_statement = false;
-
-    curr_lexer->nesting_level.paren ();
-    curr_lexer->decrement_promptflag ();
-
-    return curr_lexer->handle_token ('(');
+    bool unput_comma = false;
+
+    if (curr_lexer->whitespace_is_significant ()
+        && curr_lexer->space_follows_previous_token ())
+      {
+        int tok = curr_lexer->previous_token_value ();
+
+        if (! (tok == ';' || tok == ',' || tok == '[' || tok == '{'
+               || curr_lexer->previous_token_is_binop ()))
+          unput_comma = true;
+      }
+
+    if (unput_comma)
+      {
+        yyless (0);
+        curr_lexer->xunput (',');
+      }
+    else
+      {
+        // If we are looking for an object index, then push TRUE for
+        // looking_at_object_index.  Otherwise, just push whatever state
+        // is current (so that we can pop it off the stack when we find
+        // the matching close paren).
+
+        curr_lexer->looking_at_object_index.push_front
+          (curr_lexer->looking_for_object_index);
+
+        curr_lexer->looking_at_indirect_ref = false;
+        curr_lexer->looking_for_object_index = false;
+        curr_lexer->at_beginning_of_statement = false;
+
+        curr_lexer->nesting_level.paren ();
+        curr_lexer->decrement_promptflag ();
+
+        return curr_lexer->handle_token ('(');
+      }
   }
 
 ")" {
@@ -1102,22 +1223,42 @@
 "{" {
     curr_lexer->lexer_debug ("{");
 
-    curr_lexer->nesting_level.brace ();
-
-    curr_lexer->looking_at_object_index.push_front
-      (curr_lexer->looking_for_object_index);
-
-    curr_lexer->current_input_column += yyleng;
-    curr_lexer->looking_for_object_index = false;
-    curr_lexer->at_beginning_of_statement = false;
-
-    curr_lexer->decrement_promptflag ();
-
-    curr_lexer->braceflag++;
-
-    curr_lexer->push_start_state (MATRIX_START);
-
-    return curr_lexer->count_token ('{');
+    bool unput_comma = false;
+
+    if (curr_lexer->whitespace_is_significant ()
+        && curr_lexer->space_follows_previous_token ())
+      {
+        int tok = curr_lexer->previous_token_value ();
+
+        if (! (tok == ';' || tok == ',' || tok == '[' || tok == '{'
+               || curr_lexer->previous_token_is_binop ()))
+          unput_comma = true;
+      }
+
+    if (unput_comma)
+      {
+        yyless (0);
+        curr_lexer->xunput (',');
+      }
+    else
+      {
+        curr_lexer->nesting_level.brace ();
+
+        curr_lexer->looking_at_object_index.push_front
+          (curr_lexer->looking_for_object_index);
+
+        curr_lexer->current_input_column += yyleng;
+        curr_lexer->looking_for_object_index = false;
+        curr_lexer->at_beginning_of_statement = false;
+
+        curr_lexer->decrement_promptflag ();
+
+        curr_lexer->braceflag++;
+
+        curr_lexer->push_start_state (MATRIX_START);
+
+        return curr_lexer->count_token ('{');
+      }
   }
 
 "}" {
@@ -1902,11 +2043,12 @@
           // fall through ...
 
         case persistent_kw:
+        case global_kw:
+          looking_at_decl_list = true;
           break;
 
         case case_kw:
         case elseif_kw:
-        case global_kw:
         case until_kw:
           break;
 
@@ -3259,6 +3401,8 @@
 octave_lexer::handle_op (const char *pattern, int tok, bool convert,
                          bool bos, bool qit)
 {
+  lexer_debug (pattern);
+
   return handle_op_internal (pattern, tok, convert, bos, qit, true);
 }
 
@@ -3266,9 +3410,55 @@
 octave_lexer::handle_incompatible_op (const char *pattern, int tok,
                                       bool convert, bool bos, bool qit)
 {
+  lexer_debug (pattern);
+
   return handle_op_internal (pattern, tok, convert, bos, qit, false);
 }
 
+bool
+octave_lexer::maybe_unput_comma_before_unary_op (int tok)
+{
+  int prev_tok = previous_token_value ();
+
+  bool unput_comma = false;
+
+  if (whitespace_is_significant () && space_follows_previous_token ())
+    {
+      int c = text_yyinput ();
+      xunput (c);
+
+      bool space_after = (c == ' ' || c == '\t');
+
+      if (! (prev_tok == ';' || prev_tok == ','
+             || prev_tok == '[' || prev_tok == '{'
+             || previous_token_is_binop ()
+             || ((tok == '+' || tok == '-') && space_after)))
+        unput_comma = true;
+    }
+
+  return unput_comma;
+}
+
+int
+octave_lexer::handle_unary_op (const char *pattern, int tok, bool convert,
+                               bool bos, bool qit)
+{
+  lexer_debug (pattern);
+
+  return maybe_unput_comma_before_unary_op (tok)
+    ? -1 : handle_op_internal (pattern, tok, convert, bos, qit, true);
+}
+
+int
+octave_lexer::handle_incompatible_unary_op (const char *pattern, int tok,
+                                            bool convert, bool bos, bool qit)
+{
+  lexer_debug (pattern);
+
+  return maybe_unput_comma_before_unary_op (tok)
+    ? -1 : handle_op_internal (pattern, tok, convert, bos, qit, false);
+}
+
 int
 octave_lexer::handle_assign_op (const char *pattern, int tok)
 {
@@ -3291,8 +3481,6 @@
 octave_lexer::handle_op_internal (const char *pattern, int tok, bool convert,
                                   bool bos, bool qit, bool compat)
 {
-  lexer_debug (pattern);
-
   if (! compat)
     gripe_matlab_incompatible_operator (flex_yytext ());
 
--- a/libinterp/parse-tree/oct-parse.in.yy	Mon Mar 11 14:26:29 2013 -0400
+++ b/libinterp/parse-tree/oct-parse.in.yy	Mon Mar 11 14:28:11 2013 -0400
@@ -751,18 +751,14 @@
 // Declaration statemnts
 // =====================
 
-parsing_decl_list
-                : // empty
-                  { curr_lexer->looking_at_decl_list = true; }
-
-declaration     : GLOBAL parsing_decl_list decl1
+declaration     : GLOBAL decl1
                   {
-                    $$ = curr_parser.make_decl_command (GLOBAL, $1, $3);
+                    $$ = curr_parser.make_decl_command (GLOBAL, $1, $2);
                     curr_lexer->looking_at_decl_list = false;
                   }
-                | PERSISTENT parsing_decl_list decl1
+                | PERSISTENT decl1
                   {
-                    $$ = curr_parser.make_decl_command (PERSISTENT, $1, $3);
+                    $$ = curr_parser.make_decl_command (PERSISTENT, $1, $2);
                     curr_lexer->looking_at_decl_list = false;
                   }
                 ;
--- a/test/fntests.m	Mon Mar 11 14:26:29 2013 -0400
+++ b/test/fntests.m	Mon Mar 11 14:28:11 2013 -0400
@@ -25,6 +25,8 @@
 
 currdir = canonicalize_file_name (".");
 
+debug_on_error (true);
+
 if (nargin == 1)
   xdir = argv (){1};
 else