changeset 16267:15f55df088e7

6/10 commits reworking the lexer
author John W. Eaton <jwe@octave.org>
date Mon, 11 Mar 2013 14:30:57 -0400
parents 71ee3afedb69
children dbbef00202ff 488b0fef52c5
files libinterp/parse-tree/lex.h libinterp/parse-tree/lex.ll libinterp/parse-tree/token.cc libinterp/parse-tree/token.h
diffstat 4 files changed, 77 insertions(+), 21 deletions(-) [+]
line wrap: on
line diff
--- a/libinterp/parse-tree/lex.h	Mon Mar 11 14:29:19 2013 -0400
+++ b/libinterp/parse-tree/lex.h	Mon Mar 11 14:30:57 2013 -0400
@@ -274,6 +274,8 @@
 
   bool previous_token_is_binop (void) const;
 
+  bool previous_token_is_keyword (void) const;
+
   bool previous_token_may_be_command (void) const;
 
   // true means that we have encountered eof on the input stream.
--- a/libinterp/parse-tree/lex.ll	Mon Mar 11 14:29:19 2013 -0400
+++ b/libinterp/parse-tree/lex.ll	Mon Mar 11 14:30:57 2013 -0400
@@ -242,10 +242,15 @@
 <MATRIX_START>{NL} {
     curr_lexer->lexer_debug ("<MATRIX_START>{NL}");
 
-    int tok = curr_lexer->previous_token_value ();
-
-    if (! (tok == ';' || tok == '[' || tok == '{'))
-      curr_lexer->xunput (';');
+    if (curr_lexer->nesting_level.is_paren ())
+      curr_lexer->gripe_matlab_incompatible ("bare newline inside parentheses");
+    else
+      {
+        int tok = curr_lexer->previous_token_value ();
+
+        if (! (tok == ';' || tok == '[' || tok == '{'))
+          curr_lexer->xunput (';');
+      }
   }
 
 <KLUGE>@ {
@@ -308,7 +313,7 @@
       {
         int tok = curr_lexer->previous_token_value ();
 
-        if (! (tok == ';' || tok == ',' || tok == '[' || tok == '{'
+        if (! (tok == '[' || tok == '{'
                || curr_lexer->previous_token_is_binop ()))
           unput_comma = true;
       }
@@ -723,8 +728,9 @@
           }
         else
           {
-            if (tok == ',' || tok == ';' || tok == '[' || tok == '{'
-                || curr_lexer->previous_token_is_binop ())
+            if (tok == '[' || tok == '{'
+                || curr_lexer->previous_token_is_binop ()
+                || curr_lexer->previous_token_is_keyword ())
               {
                 curr_lexer->current_input_column++;
                 int retval = curr_lexer->handle_string ('\'');
@@ -736,15 +742,16 @@
       }
     else
       {
-        if (tok == NAME || tok == NUM || tok == IMAG_NUM
-            || tok == ')' || tok == ']' || tok == '}')
-          return curr_lexer->count_token (QUOTE);
-        else
+        if (! tok || tok == '[' || tok == '{' || tok == '('
+            || curr_lexer->previous_token_is_binop ()
+            || curr_lexer->previous_token_is_keyword ())
           {
             curr_lexer->current_input_column++;
             int retval = curr_lexer->handle_string ('\'');
             return curr_lexer->count_token_internal (retval);
           }
+        else
+          return curr_lexer->count_token (QUOTE);
       }
   }
 
@@ -763,7 +770,7 @@
       {
         if (curr_lexer->space_follows_previous_token ())
           {
-            if (tok == ',' || tok == ';' || tok == '[' || tok == '{'
+            if (tok == '[' || tok == '{'
                 || curr_lexer->previous_token_is_binop ())
               {
                 curr_lexer->current_input_column++;
@@ -813,7 +820,24 @@
 "<"     { return curr_lexer->handle_op ("<", EXPR_LT); }
 ">"     { return curr_lexer->handle_op (">", EXPR_GT); }
 "*"     { return curr_lexer->handle_op ("*", '*'); }
-"/"     { return curr_lexer->handle_op ("/", '/'); }
+
+"/" {
+    int prev_tok = curr_lexer->previous_token_value ();
+    bool space_before = curr_lexer->space_follows_previous_token ();
+    int c = curr_lexer->text_yyinput ();
+    curr_lexer->xunput (c);
+    bool space_after = (c == ' ' || c == '\t');
+
+    if (space_before && ! space_after
+        && curr_lexer->previous_token_may_be_command ())
+      {
+        yyless (0);
+        curr_lexer->push_start_state (COMMAND_START);
+      }
+    else
+      return curr_lexer->handle_op ("/", '/');
+  }
+
 "\\"    { return curr_lexer->handle_op ("\\", LEFTDIV); }
 "^"     { return curr_lexer->handle_op ("^", POW); }
 "**"    { return curr_lexer->handle_incompatible_op ("**", POW); }
@@ -942,7 +966,7 @@
       {
         int tok = curr_lexer->previous_token_value ();
 
-        if (! (tok == ';' || tok == ',' || tok == '[' || tok == '{'
+        if (! (tok == '[' || tok == '{'
                || curr_lexer->previous_token_is_binop ()))
           unput_comma = true;
       }
@@ -1244,7 +1268,7 @@
       {
         int tok = curr_lexer->previous_token_value ();
 
-        if (! (tok == ';' || tok == ',' || tok == '[' || tok == '{'
+        if (! (tok == '[' || tok == '{'
                || curr_lexer->previous_token_is_binop ()))
           unput_comma = true;
       }
@@ -1750,6 +1774,13 @@
 }
 
 bool
+lexical_feedback::previous_token_is_keyword (void) const
+{
+  const token *tok = tokens.front ();
+  return tok ? tok->is_keyword () : false;
+}
+
+bool
 lexical_feedback::previous_token_may_be_command (void) const
 {
   const token *tok = tokens.front ();
@@ -2125,8 +2156,7 @@
           break;
 
         case endenumeration_kw:
-          tok_val = new token (endenumeration_kw, token::enumeration_end,
-                               l, c);
+          tok_val = new token (endenumeration_kw, token::enumeration_end, l, c);
           at_beginning_of_statement = true;
           break;
 
@@ -2221,7 +2251,8 @@
             if ((reading_fcn_file || reading_script_file
                  || reading_classdef_file)
                 && ! fcn_file_full_name.empty ())
-              tok_val = new token (magic_file_kw, fcn_file_full_name, l, c);
+              tok_val = new token (magic_file_kw, true,
+                                   fcn_file_full_name, l, c);
             else
               tok_val = new token (magic_file_kw, "stdin", l, c);
           }
@@ -2237,7 +2268,7 @@
         }
 
       if (! tok_val)
-        tok_val = new token (kw->tok, l, c);
+        tok_val = new token (kw->tok, true, l, c);
 
       push_token (tok_val);
 
@@ -3443,8 +3474,7 @@
 
       bool space_after = (c == ' ' || c == '\t');
 
-      if (! (prev_tok == ';' || prev_tok == ','
-             || prev_tok == '[' || prev_tok == '{'
+      if (! (prev_tok == '[' || prev_tok == '{'
              || previous_token_is_binop ()
              || ((tok == '+' || tok == '-') && space_after)))
         unput_comma = true;
--- a/libinterp/parse-tree/token.cc	Mon Mar 11 14:29:19 2013 -0400
+++ b/libinterp/parse-tree/token.cc	Mon Mar 11 14:30:57 2013 -0400
@@ -42,6 +42,16 @@
   type_tag = generic_token;
 }
 
+token::token (int tv, bool is_kw, int l, int c)
+{
+  maybe_cmd = false;
+  tspc = false;
+  line_num = l;
+  column_num = c;
+  tok_val = tv;
+  type_tag = is_kw ? keyword_token : generic_token;
+}
+
 token::token (int tv, const std::string& s, int l, int c)
 {
   maybe_cmd = false;
@@ -135,6 +145,12 @@
   return num;
 }
 
+token::token_type
+token::ttype (void) const
+{
+  return type_tag;
+}
+
 token::end_tok_type
 token::ettype (void) const
 {
--- a/libinterp/parse-tree/token.h	Mon Mar 11 14:29:19 2013 -0400
+++ b/libinterp/parse-tree/token.h	Mon Mar 11 14:30:57 2013 -0400
@@ -35,6 +35,7 @@
   enum token_type
     {
       generic_token,
+      keyword_token,
       string_token,
       double_token,
       ettype_token,
@@ -62,6 +63,7 @@
     };
 
   token (int tv, int l = -1, int c = -1);
+  token (int tv, bool is_keyword, int l = -1, int c = -1);
   token (int tv, const std::string& s, int l = -1, int c = -1);
   token (int tv, double d, const std::string& s = std::string (),
          int l = -1, int c = -1);
@@ -87,8 +89,14 @@
   int line (void) const { return line_num; }
   int column (void) const { return column_num; }
 
+  bool is_keyword (void) const
+  {
+    return type_tag == keyword_token || type_tag == ettype_token;
+  }
+
   std::string text (void) const;
   double number (void) const;
+  token_type ttype (void) const;
   end_tok_type ettype (void) const;
   symbol_table::symbol_record *sym_rec (void);