changeset 16255:12bf6a3f8c45

store more info in token value class * token.h, token.cc: Store token ID and trailing space info * lex.h, lex.ll (lexical_feedback::token_cache): Handle storing and retrieving extra info in the lexer.
author John W. Eaton <jwe@octave.org>
date Mon, 11 Mar 2013 14:08:50 -0400
parents a89cf57ba3a5
children b28062b977fd db7f07b22b9b
files libinterp/parse-tree/lex.h libinterp/parse-tree/lex.ll libinterp/parse-tree/token.cc libinterp/parse-tree/token.h
diffstat 4 files changed, 168 insertions(+), 59 deletions(-) [+]
line wrap: on
line diff
--- a/libinterp/parse-tree/lex.h	Sat Mar 09 21:44:14 2013 -0500
+++ b/libinterp/parse-tree/lex.h	Mon Mar 11 14:08:50 2013 -0400
@@ -188,15 +188,36 @@
     }
 
     // Direct access.
-    token *at (size_t n) { return buffer.at (n); }
-    const token *at (size_t n) const { return buffer.at (n); }
+    token *at (size_t n)
+    {
+      return empty () ? 0 : buffer.at (n);
+    }
+
+    const token *at (size_t n) const
+    {
+      return empty () ? 0 : buffer.at (n);
+    }
 
     // Most recently pushed.
-    token *front (void) { return buffer.front (); }
-    const token *front (void) const { return buffer.front (); }
+    token *front (void)
+    {
+      return empty () ? 0 : buffer.front ();
+    }
+
+    const token *front (void) const
+    {
+      return empty () ? 0 : buffer.front ();
+    }
 
-    token *back (void) { return buffer.back (); }
-    const token *back (void) const { return buffer.back (); }
+    token *back (void)
+    {
+      return empty () ? 0 : buffer.back ();
+    }
+
+    const token *back (void) const
+    {
+      return empty () ? 0 : buffer.back ();
+    }
 
     // Number of elements currently in the buffer, max of sz.
     size_t size (void) const { return buffer.size (); }
@@ -253,6 +274,14 @@
 
   void reset (void);
 
+  int previous_token_value (void) const;
+
+  bool previous_token_value_is (int tok_val) const;
+
+  void mark_previous_token_trailing_space (void);
+
+  bool space_follows_previous_token (void) const;
+
   // true means that we have encountered eof on the input stream.
   bool end_of_input;
 
--- a/libinterp/parse-tree/lex.ll	Sat Mar 09 21:44:14 2013 -0500
+++ b/libinterp/parse-tree/lex.ll	Mon Mar 11 14:08:50 2013 -0400
@@ -562,12 +562,25 @@
     curr_lexer->input_line_number++;
     curr_lexer->current_input_column = 1;
 
+    bool have_space = false;
     size_t len = yyleng;
     size_t i = 0;
     while (i < len)
       {
         char c = yytext[i];
-        if (c == '#' || c == '%' || c == ' ' || c == '\t')
+        if (c == ' ' || c == '\t')
+          {
+            have_space = true;
+            i++;
+          }
+        else
+          break;
+      }
+
+    while (i < len)
+      {
+        char c = yytext[i];
+        if (c == '#' || c == '%')
           i++;
         else
           break;
@@ -577,6 +590,9 @@
 
     if (! full_line_comment)
       {
+        if (have_space)
+          curr_lexer->mark_previous_token_trailing_space ();
+
         curr_lexer->finish_comment (octave_comment_elt::end_of_line);
 
         curr_lexer->pop_start_state ();
@@ -627,6 +643,8 @@
 
 {S}* {
     curr_lexer->current_input_column += yyleng;
+
+    curr_lexer->mark_previous_token_trailing_space ();
   }
 
 %{
@@ -1369,6 +1387,35 @@
   tokens.clear ();
 }
 
+int
+lexical_feedback::previous_token_value (void) const
+{
+  const token *tok = tokens.front ();
+  return tok ? tok->token_value () : 0;
+}
+
+bool
+lexical_feedback::previous_token_value_is (int tok_val) const
+{
+  const token *tok = tokens.front ();
+  return tok ? tok->token_value_is (tok_val) : false;
+}
+
+void
+lexical_feedback::mark_previous_token_trailing_space (void)
+{
+  token *tok = tokens.front ();
+  if (tok && ! previous_token_value_is ('\n'))
+    tok->mark_trailing_space ();
+}
+
+bool
+lexical_feedback::space_follows_previous_token (void) const
+{
+  const token *tok = tokens.front ();
+  return tok ? tok->space_follows_token () : false;
+}
+
 static bool
 looks_like_copyright (const std::string& s)
 {
@@ -1726,72 +1773,74 @@
                             || parsed_function_name.top ()))))
             return 0;
 
-          tok_val = new token (token::simple_end, l, c);
+          tok_val = new token (end_kw, token::simple_end, l, c);
           at_beginning_of_statement = true;
           break;
 
         case end_try_catch_kw:
-          tok_val = new token (token::try_catch_end, l, c);
+          tok_val = new token (end_try_catch_kw, token::try_catch_end, l, c);
           at_beginning_of_statement = true;
           break;
 
         case end_unwind_protect_kw:
-          tok_val = new token (token::unwind_protect_end, l, c);
+          tok_val = new token (end_unwind_protect_kw,
+                               token::unwind_protect_end, l, c);
           at_beginning_of_statement = true;
           break;
 
         case endfor_kw:
-          tok_val = new token (token::for_end, l, c);
+          tok_val = new token (endfor_kw, token::for_end, l, c);
           at_beginning_of_statement = true;
           break;
 
         case endfunction_kw:
-          tok_val = new token (token::function_end, l, c);
+          tok_val = new token (endfunction_kw, token::function_end, l, c);
           at_beginning_of_statement = true;
           break;
 
         case endif_kw:
-          tok_val = new token (token::if_end, l, c);
+          tok_val = new token (endif_kw, token::if_end, l, c);
           at_beginning_of_statement = true;
           break;
 
         case endparfor_kw:
-          tok_val = new token (token::parfor_end, l, c);
+          tok_val = new token (endparfor_kw, token::parfor_end, l, c);
           at_beginning_of_statement = true;
           break;
 
         case endswitch_kw:
-          tok_val = new token (token::switch_end, l, c);
+          tok_val = new token (endswitch_kw, token::switch_end, l, c);
           at_beginning_of_statement = true;
           break;
 
         case endwhile_kw:
-          tok_val = new token (token::while_end, l, c);
+          tok_val = new token (endwhile_kw, token::while_end, l, c);
           at_beginning_of_statement = true;
           break;
 
         case endclassdef_kw:
-          tok_val = new token (token::classdef_end, l, c);
+          tok_val = new token (endclassdef_kw, token::classdef_end, l, c);
           at_beginning_of_statement = true;
           break;
 
         case endenumeration_kw:
-          tok_val = new token (token::enumeration_end, l, c);
+          tok_val = new token (endenumeration_kw, token::enumeration_end,
+                               l, c);
           at_beginning_of_statement = true;
           break;
 
         case endevents_kw:
-          tok_val = new token (token::events_end, l, c);
+          tok_val = new token (endevents_kw, token::events_end, l, c);
           at_beginning_of_statement = true;
           break;
 
         case endmethods_kw:
-          tok_val = new token (token::methods_end, l, c);
+          tok_val = new token (endmethods_kw, token::methods_end, l, c);
           at_beginning_of_statement = true;
           break;
 
         case endproperties_kw:
-          tok_val = new token (token::properties_end, l, c);
+          tok_val = new token (endproperties_kw, token::properties_end, l, c);
           at_beginning_of_statement = true;
           break;
 
@@ -1871,14 +1920,15 @@
             if ((reading_fcn_file || reading_script_file
                  || reading_classdef_file)
                 && ! fcn_file_full_name.empty ())
-              tok_val = new token (fcn_file_full_name, l, c);
+              tok_val = new token (magic_file_kw, fcn_file_full_name, l, c);
             else
-              tok_val = new token ("stdin", l, c);
+              tok_val = new token (magic_file_kw, "stdin", l, c);
           }
           break;
 
         case magic_line_kw:
-          tok_val = new token (static_cast<double> (l), "", l, c);
+          tok_val = new token (magic_line_kw, static_cast<double> (l),
+                               "", l, c);
           break;
 
         default:
@@ -1886,7 +1936,7 @@
         }
 
       if (! tok_val)
-        tok_val = new token (l, c);
+        tok_val = new token (kw->tok, l, c);
 
       push_token (tok_val);
 
@@ -2285,7 +2335,7 @@
   looking_for_object_index = false;
   at_beginning_of_statement = false;
 
-  push_token (new token (value, yytxt, input_line_number,
+  push_token (new token (NUM, value, yytxt, input_line_number,
                          current_input_column));
 
   current_input_column += flex_yyleng ();
@@ -2318,6 +2368,9 @@
         break;
     }
 
+  if (have_space)
+    mark_previous_token_trailing_space ();
+
   bool have_comment = false;
   while (offset < yylng)
     {
@@ -2575,8 +2628,6 @@
                   quote_is_transpose = true;
                   convert_spaces_to_comma = true;
 
-                  push_token (new token (s, bos_line, bos_col));
-
                   if (delim == '"')
                     gripe_matlab_incompatible ("\" used as string delimiter");
                   else if (delim == '\'')
@@ -2585,7 +2636,11 @@
                   looking_for_object_index = true;
                   at_beginning_of_statement = false;
 
-                  return delim == '"' ? DQ_STRING : SQ_STRING;
+                  int tok = delim == '"' ? DQ_STRING : SQ_STRING;
+
+                  push_token (new token (tok, s, bos_line, bos_col));
+
+                  return tok;
                 }
             }
         }
@@ -3087,7 +3142,8 @@
       return LEXICAL_ERROR;
     }
 
-  push_token (new token (meth.empty () ? 0 : &(symbol_table::insert (meth)),
+  push_token (new token (SUPERCLASSREF,
+                         meth.empty () ? 0 : &(symbol_table::insert (meth)),
                          cls.empty () ? 0 : &(symbol_table::insert (cls)),
                          pkg.empty () ? 0 : &(symbol_table::insert (pkg)),
                          input_line_number, current_input_column));
@@ -3121,7 +3177,8 @@
       return LEXICAL_ERROR;
     }
 
-  push_token (new token (cls.empty () ? 0 : &(symbol_table::insert (cls)),
+  push_token (new token (METAQUERY,
+                         cls.empty () ? 0 : &(symbol_table::insert (cls)),
                          pkg.empty () ? 0 : &(symbol_table::insert (pkg)),
                          input_line_number, current_input_column));
 
@@ -3161,7 +3218,7 @@
 
       maybe_unput_comma (spc_gobbled);
 
-      push_token (new token (tok, input_line_number,
+      push_token (new token (STRUCT_ELT, tok, input_line_number,
                              current_input_column));
 
       quote_is_transpose = true;
@@ -3196,7 +3253,7 @@
         }
       else
         {
-          push_token (new token (tok, input_line_number,
+          push_token (new token (FCN_HANDLE, tok, input_line_number,
                                  current_input_column));
 
           current_input_column += flex_yyleng ();
@@ -3277,7 +3334,7 @@
   if (tok == "end")
     tok = "__end__";
 
-  push_token (new token (&(symbol_table::insert (tok)),
+  push_token (new token (NAME, &(symbol_table::insert (tok)),
                          input_line_number, current_input_column));
 
   // After seeing an identifer, it is ok to convert spaces to a comma
@@ -3635,7 +3692,7 @@
   if (! compat)
     gripe_matlab_incompatible_operator (flex_yytext ());
 
-  push_token (new token (input_line_number, current_input_column));
+  push_token (new token (tok, input_line_number, current_input_column));
 
   current_input_column += flex_yyleng ();
   quote_is_transpose = qit;
@@ -3649,7 +3706,8 @@
 int
 octave_lexer::handle_token (const std::string& name, int tok)
 {
-  token *tok_val = new token (name, input_line_number, current_input_column);
+  token *tok_val = new token (tok, name, input_line_number,
+                              current_input_column);
 
   return handle_token (tok, tok_val);
 }
@@ -3658,7 +3716,7 @@
 octave_lexer::handle_token (int tok, token *tok_val)
 {
   if (! tok_val)
-    tok_val = new token (input_line_number, current_input_column);
+    tok_val = new token (tok, input_line_number, current_input_column);
 
   push_token (tok_val);
 
--- a/libinterp/parse-tree/token.cc	Sat Mar 09 21:44:14 2013 -0500
+++ b/libinterp/parse-tree/token.cc	Mon Mar 11 14:08:50 2013 -0400
@@ -32,62 +32,76 @@
 #include "token.h"
 #include "utils.h"
 
-token::token (int l, int c)
+token::token (int tv, int l, int c)
 {
+  tspc = false;
   line_num = l;
   column_num = c;
+  tok_val = tv;
   type_tag = generic_token;
 }
 
-token::token (const std::string& s, int l, int c)
+token::token (int tv, const std::string& s, int l, int c)
 {
+  tspc = false;
   line_num = l;
   column_num = c;
+  tok_val = tv;
   type_tag = string_token;
   str = new std::string (s);
 }
 
-token::token (double d, const std::string& s, int l, int c)
+token::token (int tv, double d, const std::string& s, int l, int c)
 {
+  tspc = false;
   line_num = l;
   column_num = c;
+  tok_val = tv;
   type_tag = double_token;
   num = d;
   orig_text = s;
 }
 
-token::token (end_tok_type t, int l, int c)
+token::token (int tv, end_tok_type t, int l, int c)
 {
+  tspc = false;
   line_num = l;
   column_num = c;
+  tok_val = tv;
   type_tag = ettype_token;
   et = t;
 }
 
-token::token (symbol_table::symbol_record *s, int l, int c)
+token::token (int tv, symbol_table::symbol_record *s, int l, int c)
 {
+  tspc = false;
   line_num = l;
   column_num = c;
+  tok_val = tv;
   type_tag = sym_rec_token;
   sr = s;
 }
 
-token::token (symbol_table::symbol_record *cls,
+token::token (int tv, symbol_table::symbol_record *cls,
               symbol_table::symbol_record *pkg, int l, int c)
 {
+  tspc = false;
   line_num = l;
   column_num = c;
+  tok_val = tv;
   type_tag = meta_rec_token;
   mc.cr = cls;
   mc.pr = pkg;
 }
 
-token::token (symbol_table::symbol_record *mth,
+token::token (int tv, symbol_table::symbol_record *mth,
               symbol_table::symbol_record *cls,
               symbol_table::symbol_record *pkg, int l, int c)
 {
+  tspc = false;
   line_num = l;
   column_num = c;
+  tok_val = tv;
   type_tag = scls_rec_token;
   sc.mr = mth;
   sc.cr = cls;
@@ -101,21 +115,21 @@
 }
 
 std::string
-token::text (void)
+token::text (void) const
 {
   assert (type_tag == string_token);
   return *str;
 }
 
 double
-token::number (void)
+token::number (void) const
 {
   assert (type_tag == double_token);
   return num;
 }
 
 token::end_tok_type
-token::ettype (void)
+token::ettype (void) const
 {
   assert (type_tag == ettype_token);
   return et;
--- a/libinterp/parse-tree/token.h	Sat Mar 09 21:44:14 2013 -0500
+++ b/libinterp/parse-tree/token.h	Mon Mar 11 14:08:50 2013 -0400
@@ -61,26 +61,32 @@
       unwind_protect_end
     };
 
-  token (int l = -1, int c = -1);
-  token (const std::string& s, int l = -1, int c = -1);
-  token (double d, const std::string& s = std::string (),
+  token (int tv, int l = -1, int c = -1);
+  token (int tv, const std::string& s, int l = -1, int c = -1);
+  token (int tv, double d, const std::string& s = std::string (),
          int l = -1, int c = -1);
-  token (end_tok_type t, int l = -1, int c = -1);
-  token (symbol_table::symbol_record *s, int l = -1, int c = -1);
-  token (symbol_table::symbol_record *cls,
+  token (int tv, end_tok_type t, int l = -1, int c = -1);
+  token (int tv, symbol_table::symbol_record *s, int l = -1, int c = -1);
+  token (int tv, symbol_table::symbol_record *cls,
          symbol_table::symbol_record *pkg, int l = -1, int c = -1);
-  token (symbol_table::symbol_record *mth,
+  token (int tv, symbol_table::symbol_record *mth,
          symbol_table::symbol_record *cls,
          symbol_table::symbol_record *pkg, int l = -1, int c = -1);
 
   ~token (void);
 
-  int line (void) { return line_num; }
-  int column (void) { return column_num; }
+  void mark_trailing_space (void) { tspc = true; }
+  bool space_follows_token (void) const { return tspc; }
+
+  int token_value (void) const { return tok_val; }
+  bool token_value_is (int tv) const { return tv == tok_val; }
 
-  std::string text (void);
-  double number (void);
-  end_tok_type ettype (void);
+  int line (void) const { return line_num; }
+  int column (void) const { return column_num; }
+
+  std::string text (void) const;
+  double number (void) const;
+  end_tok_type ettype (void) const;
   symbol_table::symbol_record *sym_rec (void);
 
   symbol_table::symbol_record *method_rec (void);
@@ -100,8 +106,10 @@
 
   token& operator = (const token& tok);
 
+  bool tspc;
   int line_num;
   int column_num;
+  int tok_val;
   token_type type_tag;
   union
     {