changeset 27769:07ffed9878ad

store begin and end position in tokens * token.h, token.cc (token::m_beg_pos): Rename from m_pos. (token::m_end_pos): New member variable. (token::beg_pos, token::end_pos): New functions. (token::token): Accept beginning and ending positions for tokens as arguments. Change all uses but store the same position for both beginning and end in most cases. Subsequent changes will store the correct beginning and ending positions.
author John W. Eaton <jwe@octave.org>
date Mon, 02 Dec 2019 22:58:55 -0600
parents d6701f835496
children 7a06e352ac61
files libinterp/parse-tree/lex.ll libinterp/parse-tree/token.cc libinterp/parse-tree/token.h
diffstat 3 files changed, 127 insertions(+), 81 deletions(-) [+]
line wrap: on
line diff
--- a/libinterp/parse-tree/lex.ll	Mon Dec 02 22:37:05 2019 -0600
+++ b/libinterp/parse-tree/lex.ll	Mon Dec 02 22:58:55 2019 -0600
@@ -840,7 +840,8 @@
 
         curr_lexer->push_token (new octave::token (DQ_STRING,
                                                    curr_lexer->m_string_text,
-                                                   curr_lexer->m_beg_string));
+                                                   curr_lexer->m_beg_string,
+                                                   curr_lexer->m_filepos));
 
         curr_lexer->m_string_text = "";
 
@@ -861,7 +862,7 @@
         octave::token *tok
           = new octave::token (LEXICAL_ERROR,
                                "invalid octal escape sequence in character string",
-                               curr_lexer->m_filepos);
+                               curr_lexer->m_filepos, curr_lexer->m_filepos);
 
         curr_lexer->push_token (tok);
 
@@ -1002,7 +1003,7 @@
     octave::token *tok
       = new octave::token (LEXICAL_ERROR,
                            "unterminated character string constant",
-                           curr_lexer->m_filepos);
+                           curr_lexer->m_filepos, curr_lexer->m_filepos);
 
     curr_lexer->push_token (tok);
 
@@ -1036,7 +1037,8 @@
 
         curr_lexer->push_token (new octave::token (SQ_STRING,
                                                    curr_lexer->m_string_text,
-                                                   curr_lexer->m_beg_string));
+                                                   curr_lexer->m_beg_string,
+                                                   curr_lexer->m_filepos));
 
         curr_lexer->m_string_text = "";
 
@@ -1057,7 +1059,7 @@
     octave::token *tok
       = new octave::token (LEXICAL_ERROR,
                            "unterminated character string constant",
-                           curr_lexer->m_filepos);
+                           curr_lexer->m_filepos, curr_lexer->m_filepos);
 
     curr_lexer->push_token (tok);
 
@@ -1368,12 +1370,14 @@
                 if (kw_token)
                   tok = new octave::token (LEXICAL_ERROR,
                                            "function handles may not refer to keywords",
+                                           curr_lexer->m_filepos,
                                            curr_lexer->m_filepos);
                 else
                   {
                     curr_lexer->m_looking_for_object_index = true;
 
                     tok = new octave::token (FCN_HANDLE, ident,
+                                             curr_lexer->m_filepos,
                                              curr_lexer->m_filepos);
                   }
 
@@ -1416,7 +1420,7 @@
         octave::token *tok
           = new octave::token (LEXICAL_ERROR,
                                "unexpected internal lexer error",
-                               curr_lexer->m_filepos);
+                               curr_lexer->m_filepos, curr_lexer->m_filepos);
 
         curr_lexer->push_token (tok);
 
@@ -1806,7 +1810,7 @@
 
         octave::token *tok
           = new octave::token (LEXICAL_ERROR, buf.str (),
-                               curr_lexer->m_filepos);
+                               curr_lexer->m_filepos, curr_lexer->m_filepos);
 
         curr_lexer->push_token (tok);
 
@@ -2575,73 +2579,86 @@
                 return 0;
               }
 
-            tok_val = new token (end_kw, token::simple_end, m_filepos);
+            tok_val = new token (end_kw, token::simple_end, m_filepos,
+                                 m_filepos);
             m_at_beginning_of_statement = true;
             break;
 
           case end_try_catch_kw:
-            tok_val = new token (end_try_catch_kw, token::try_catch_end, m_filepos);
+            tok_val = new token (end_try_catch_kw, token::try_catch_end,
+                                 m_filepos, m_filepos);
             m_at_beginning_of_statement = true;
             break;
 
           case end_unwind_protect_kw:
             tok_val = new token (end_unwind_protect_kw,
-                                 token::unwind_protect_end, m_filepos);
+                                 token::unwind_protect_end, m_filepos,
+                                 m_filepos);
             m_at_beginning_of_statement = true;
             break;
 
           case endfor_kw:
-            tok_val = new token (endfor_kw, token::for_end, m_filepos);
+            tok_val = new token (endfor_kw, token::for_end, m_filepos,
+                                 m_filepos);
             m_at_beginning_of_statement = true;
             break;
 
           case endfunction_kw:
-            tok_val = new token (endfunction_kw, token::function_end, m_filepos);
+            tok_val = new token (endfunction_kw, token::function_end,
+                                 m_filepos, m_filepos);
             m_at_beginning_of_statement = true;
             break;
 
           case endif_kw:
-            tok_val = new token (endif_kw, token::if_end, m_filepos);
+            tok_val = new token (endif_kw, token::if_end, m_filepos, m_filepos);
             m_at_beginning_of_statement = true;
             break;
 
           case endparfor_kw:
-            tok_val = new token (endparfor_kw, token::parfor_end, m_filepos);
+            tok_val = new token (endparfor_kw, token::parfor_end, m_filepos,
+                                 m_filepos);
             m_at_beginning_of_statement = true;
             break;
 
           case endswitch_kw:
-            tok_val = new token (endswitch_kw, token::switch_end, m_filepos);
+            tok_val = new token (endswitch_kw, token::switch_end, m_filepos,
+                                 m_filepos);
             m_at_beginning_of_statement = true;
             break;
 
           case endwhile_kw:
-            tok_val = new token (endwhile_kw, token::while_end, m_filepos);
+            tok_val = new token (endwhile_kw, token::while_end, m_filepos,
+                                 m_filepos);
             m_at_beginning_of_statement = true;
             break;
 
           case endclassdef_kw:
-            tok_val = new token (endclassdef_kw, token::classdef_end, m_filepos);
+            tok_val = new token (endclassdef_kw, token::classdef_end,
+                                 m_filepos, m_filepos);
             m_at_beginning_of_statement = true;
             break;
 
           case endenumeration_kw:
-            tok_val = new token (endenumeration_kw, token::enumeration_end, m_filepos);
+            tok_val = new token (endenumeration_kw, token::enumeration_end,
+                                 m_filepos, m_filepos);
             m_at_beginning_of_statement = true;
             break;
 
           case endevents_kw:
-            tok_val = new token (endevents_kw, token::events_end, m_filepos);
+            tok_val = new token (endevents_kw, token::events_end, m_filepos,
+                                 m_filepos);
             m_at_beginning_of_statement = true;
             break;
 
           case endmethods_kw:
-            tok_val = new token (endmethods_kw, token::methods_end, m_filepos);
+            tok_val = new token (endmethods_kw, token::methods_end, m_filepos,
+                                 m_filepos);
             m_at_beginning_of_statement = true;
             break;
 
           case endproperties_kw:
-            tok_val = new token (endproperties_kw, token::properties_end, m_filepos);
+            tok_val = new token (endproperties_kw, token::properties_end,
+                                 m_filepos, m_filepos);
             m_at_beginning_of_statement = true;
             break;
 
@@ -2729,9 +2746,11 @@
               if ((m_reading_fcn_file || m_reading_script_file
                    || m_reading_classdef_file)
                   && ! m_fcn_file_full_name.empty ())
-                tok_val = new token (magic_file_kw, m_fcn_file_full_name, m_filepos);
+                tok_val = new token (magic_file_kw, m_fcn_file_full_name,
+                                     m_filepos, m_filepos);
               else
-                tok_val = new token (magic_file_kw, "stdin", m_filepos);
+                tok_val = new token (magic_file_kw, "stdin", m_filepos,
+                                     m_filepos);
             }
             break;
 
@@ -2739,7 +2758,7 @@
             {
               int l = m_filepos.line ();
               tok_val = new token (magic_line_kw, static_cast<double> (l),
-                                   "", m_filepos);
+                                   "", m_filepos, m_filepos);
             }
             break;
 
@@ -2748,7 +2767,7 @@
           }
 
         if (! tok_val)
-          tok_val = new token (kw->tok, true, m_filepos);
+          tok_val = new token (kw->tok, true, m_filepos, m_filepos);
 
         push_token (tok_val);
 
@@ -2874,7 +2893,7 @@
     m_looking_for_object_index = false;
     m_at_beginning_of_statement = false;
 
-    push_token (new token (NUM, value, yytxt, m_filepos));
+    push_token (new token (NUM, value, yytxt, m_filepos, m_filepos));
 
     m_filepos.increment_column (flex_yyleng ());
   }
@@ -3018,15 +3037,14 @@
         token *tok
           = new token (LEXICAL_ERROR,
                        "method, class, and package names may not be keywords",
-                       m_filepos);
+                       m_filepos, m_filepos);
 
         push_token (tok);
 
         return count_token_internal (LEXICAL_ERROR);
       }
 
-    push_token (new token (SUPERCLASSREF, meth, cls,
-                           m_filepos));
+    push_token (new token (SUPERCLASSREF, meth, cls, m_filepos, m_filepos));
 
     m_filepos.increment_column (flex_yyleng ());
 
@@ -3048,13 +3066,13 @@
       {
         token *tok = new token (LEXICAL_ERROR,
                                 "class and package names may not be keywords",
-                                m_filepos);
+                                m_filepos, m_filepos);
         push_token (tok);
 
         return count_token_internal (LEXICAL_ERROR);
       }
 
-    push_token (new token (METAQUERY, cls, m_filepos));
+    push_token (new token (METAQUERY, cls, m_filepos, m_filepos));
 
     m_filepos.increment_column (flex_yyleng ());
 
@@ -3074,14 +3092,14 @@
         token *tok
           = new token (LEXICAL_ERROR,
                        "function, method, class, and package names may not be keywords",
-                       m_filepos);
+                       m_filepos, m_filepos);
 
         push_token (tok);
 
         return count_token_internal (LEXICAL_ERROR);
       }
 
-    push_token (new token (FQ_IDENT, txt, m_filepos));
+    push_token (new token (FQ_IDENT, txt, m_filepos, m_filepos));
 
     m_filepos.increment_column (flex_yyleng ());
 
@@ -3103,7 +3121,7 @@
 
     if (m_looking_at_indirect_ref)
       {
-        push_token (new token (STRUCT_ELT, ident, m_filepos));
+        push_token (new token (STRUCT_ELT, ident, m_filepos, m_filepos));
 
         m_looking_for_object_index = true;
 
@@ -3140,7 +3158,7 @@
 
     symbol_record sr = (scope ? scope.insert (ident) : symbol_record (ident));
 
-    token *tok = new token (NAME, sr, m_filepos);
+    token *tok = new token (NAME, sr, m_filepos, m_filepos);
 
     // The following symbols are handled specially so that things like
     //
@@ -3574,7 +3592,7 @@
     if (! compat)
       warn_language_extension_operator (flex_yytext ());
 
-    push_token (new token (tok, m_filepos));
+    push_token (new token (tok, m_filepos, m_filepos));
 
     m_filepos.increment_column (flex_yyleng ());
     m_looking_for_object_index = false;
@@ -3605,7 +3623,7 @@
   int
   base_lexer::handle_token (const std::string& name, int tok)
   {
-    token *tok_val = new token (tok, name, m_filepos);
+    token *tok_val = new token (tok, name, m_filepos, m_filepos);
 
     return handle_token (tok, tok_val);
   }
@@ -3614,7 +3632,7 @@
   base_lexer::handle_token (int tok, token *tok_val)
   {
     if (! tok_val)
-      tok_val = new token (tok, m_filepos);
+      tok_val = new token (tok, m_filepos, m_filepos);
 
     push_token (tok_val);
 
@@ -3626,7 +3644,7 @@
   int
   base_lexer::count_token (int tok)
   {
-    token *tok_val = new token (tok, m_filepos);
+    token *tok_val = new token (tok, m_filepos, m_filepos);
 
     push_token (tok_val);
 
--- a/libinterp/parse-tree/token.cc	Mon Dec 02 22:37:05 2019 -0600
+++ b/libinterp/parse-tree/token.cc	Mon Dec 02 22:58:55 2019 -0600
@@ -31,52 +31,59 @@
 
 namespace octave
 {
-  token::token (int tv, const filepos& pos)
-    : m_maybe_cmd (false), m_tspc (false), m_pos (pos),
-      m_tok_val (tv), m_type_tag (generic_token), m_tok_info (),
-      m_orig_text ()
-  { }
-
-  token::token (int tv, bool is_kw, const filepos& pos)
-    : m_maybe_cmd (false), m_tspc (false), m_pos (pos),
-      m_tok_val (tv), m_type_tag (is_kw ? keyword_token : generic_token),
+  token::token (int tv, const filepos& beg_pos, const filepos& end_pos)
+    : m_maybe_cmd (false), m_tspc (false), m_beg_pos (beg_pos),
+      m_end_pos (end_pos), m_tok_val (tv), m_type_tag (generic_token),
       m_tok_info (), m_orig_text ()
   { }
 
-  token::token (int tv, const char *s, const filepos& pos)
-    : m_maybe_cmd (false), m_tspc (false), m_pos (pos),
-      m_tok_val (tv), m_type_tag (string_token), m_tok_info (s),
+  token::token (int tv, bool is_kw, const filepos& beg_pos,
+                const filepos& end_pos)
+    : m_maybe_cmd (false), m_tspc (false), m_beg_pos (beg_pos),
+      m_end_pos (end_pos), m_tok_val (tv),
+      m_type_tag (is_kw ? keyword_token : generic_token), m_tok_info (),
       m_orig_text ()
   { }
 
-  token::token (int tv, const std::string& s, const filepos& pos)
-    : m_maybe_cmd (false), m_tspc (false), m_pos (pos),
-      m_tok_val (tv), m_type_tag (string_token), m_tok_info (s),
-      m_orig_text ()
+  token::token (int tv, const char *s, const filepos& beg_pos,
+                const filepos& end_pos)
+    : m_maybe_cmd (false), m_tspc (false), m_beg_pos (beg_pos),
+      m_end_pos (end_pos), m_tok_val (tv), m_type_tag (string_token),
+      m_tok_info (s), m_orig_text ()
+  { }
+
+  token::token (int tv, const std::string& s, const filepos& beg_pos,
+                const filepos& end_pos)
+    : m_maybe_cmd (false), m_tspc (false), m_beg_pos (beg_pos),
+      m_end_pos (end_pos), m_tok_val (tv), m_type_tag (string_token),
+      m_tok_info (s), m_orig_text ()
   { }
 
-  token::token (int tv, double d, const std::string& s, const filepos& pos)
-    : m_maybe_cmd (false), m_tspc (false), m_pos (pos),
-      m_tok_val (tv), m_type_tag (double_token), m_tok_info (d),
-      m_orig_text (s)
+  token::token (int tv, double d, const std::string& s, const filepos& beg_pos,
+                const filepos& end_pos)
+    : m_maybe_cmd (false), m_tspc (false), m_beg_pos (beg_pos),
+      m_end_pos (end_pos), m_tok_val (tv), m_type_tag (double_token),
+      m_tok_info (d), m_orig_text (s)
   { }
 
-  token::token (int tv, end_tok_type t, const filepos& pos)
-    : m_maybe_cmd (false), m_tspc (false), m_pos (pos),
-      m_tok_val (tv), m_type_tag (ettype_token), m_tok_info (t),
-      m_orig_text ()
+  token::token (int tv, end_tok_type t, const filepos& beg_pos,
+                const filepos& end_pos)
+    : m_maybe_cmd (false), m_tspc (false), m_beg_pos (beg_pos),
+      m_end_pos (end_pos), m_tok_val (tv), m_type_tag (ettype_token),
+      m_tok_info (t), m_orig_text ()
   { }
 
-  token::token (int tv, const symbol_record& sr, const filepos& pos)
-    : m_maybe_cmd (false), m_tspc (false), m_pos (pos),
-      m_tok_val (tv), m_type_tag (sym_rec_token), m_tok_info (sr),
-      m_orig_text ()
+  token::token (int tv, const symbol_record& sr, const filepos& beg_pos,
+                const filepos& end_pos)
+    : m_maybe_cmd (false), m_tspc (false), m_beg_pos (beg_pos),
+      m_end_pos (end_pos), m_tok_val (tv), m_type_tag (sym_rec_token),
+      m_tok_info (sr), m_orig_text ()
   { }
 
   token::token (int tv, const std::string& meth, const std::string& cls,
-                const filepos& pos)
-    : m_maybe_cmd (false), m_tspc (false), m_pos (pos),
-      m_tok_val (tv), m_type_tag (scls_name_token),
+                const filepos& beg_pos, const filepos& end_pos)
+    : m_maybe_cmd (false), m_tspc (false), m_beg_pos (beg_pos),
+      m_end_pos (end_pos), m_tok_val (tv), m_type_tag (scls_name_token),
       m_tok_info (meth, cls), m_orig_text ()
   { }
 
--- a/libinterp/parse-tree/token.h	Mon Dec 02 22:37:05 2019 -0600
+++ b/libinterp/parse-tree/token.h	Mon Dec 02 22:58:55 2019 -0600
@@ -65,15 +65,28 @@
       while_end,
     };
 
-    token (int tv, const filepos& pos);
-    token (int tv, bool is_keyword, const filepos& pos);
-    token (int tv, const char *s, const filepos& pos);
-    token (int tv, const std::string& s, const filepos& pos);
-    token (int tv, double d, const std::string& s, const filepos& pos);
-    token (int tv, end_tok_type t, const filepos& pos);
-    token (int tv, const symbol_record& s, const filepos& pos);
+    token (int tv, const filepos& beg_pos, const filepos& end_pos);
+
+    token (int tv, bool is_keyword, const filepos& beg_pos
+           , const filepos& end_pos);
+
+    token (int tv, const char *s, const filepos& beg_pos,
+           const filepos& end_pos);
+
+    token (int tv, const std::string& s, const filepos& beg_pos,
+           const filepos& end_pos);
+
+    token (int tv, double d, const std::string& s, const filepos& beg_pos,
+           const filepos& end_pos);
+
+    token (int tv, end_tok_type t, const filepos& beg_pos,
+           const filepos& end_pos);
+
+    token (int tv, const symbol_record& s, const filepos& beg_pos,
+           const filepos& end_pos);
+
     token (int tv, const std::string& mth, const std::string& cls,
-           const filepos& pos);
+           const filepos& beg_pos, const filepos& end_pos);
 
     // No copying!
 
@@ -92,8 +105,15 @@
     int token_value (void) const { return m_tok_val; }
     bool token_value_is (int tv) const { return tv == m_tok_val; }
 
-    int line (void) const { return m_pos.line (); }
-    int column (void) const { return m_pos.column (); }
+    filepos beg_pos (void) const { return m_beg_pos; }
+    filepos end_pos (void) const { return m_end_pos; }
+
+    void beg_pos (const filepos& pos) { m_beg_pos = pos; }
+    void end_pos (const filepos& pos) { m_end_pos = pos; }
+
+    // These will probably be removed.
+    int line (void) const { return m_beg_pos.line (); }
+    int column (void) const { return m_beg_pos.column (); }
 
     bool iskeyword (void) const
     {
@@ -129,7 +149,8 @@
 
     bool m_tspc;
 
-    filepos m_pos;
+    filepos m_beg_pos;
+    filepos m_end_pos;
 
     int m_tok_val;