changeset 28149:026bff6a54d7 stable

improve position tracking in the lexer (bug #57924) * oct-parse.yy (base_parser::bison_error): Fix error column adjustment. * lex.h, lex.ll (lexical_feedback::handle_language_extension_op, lexical_feedback::handle_assign_op, lexical_feedback::handle_language_extension_assign_op, lexical_feedback::handle_op_internal): Delete. Replace uses with calls to handle_op instead. (lexical_feedback::handle_op, lexical_feedback::handle_identifier, lexical_feedback::handle_superclass_identifier): Update token positions inside these functions. Remove updates from rules that use them. Consistently handle file position updates.
author John W. Eaton <jwe@octave.org>
date Tue, 10 Mar 2020 07:51:20 -0400
parents 648202bebcb0
children 38a9f6444eb0 4609d001daee
files libinterp/parse-tree/lex.h libinterp/parse-tree/lex.ll libinterp/parse-tree/oct-parse.yy
diffstat 3 files changed, 48 insertions(+), 88 deletions(-) [+]
line wrap: on
line diff
--- a/libinterp/parse-tree/lex.h	Mon Mar 09 10:24:07 2020 -0400
+++ b/libinterp/parse-tree/lex.h	Tue Mar 10 07:51:20 2020 -0400
@@ -740,18 +740,9 @@
 
     void display_start_state (void) const;
 
-    int handle_op (const char *pattern, int tok, bool bos = false);
-
-    int handle_language_extension_op (const char *pattern, int tok,
-                                      bool bos = false);
-
     bool maybe_unput_comma_before_unary_op (int tok);
 
-    int handle_assign_op (const char *pattern, int tok);
-
-    int handle_language_extension_assign_op (const char *pattern, int tok);
-
-    int handle_op_internal (int tok, bool bos, bool compat);
+    int handle_op (int tok, bool bos = false, bool compat = true);
 
     int finish_command_arg (void);
 
--- a/libinterp/parse-tree/lex.ll	Mon Mar 09 10:24:07 2020 -0400
+++ b/libinterp/parse-tree/lex.ll	Tue Mar 10 07:51:20 2020 -0400
@@ -177,10 +177,7 @@
            curr_lexer->push_start_state (COMMAND_START);                \
          }                                                              \
        else                                                             \
-         {                                                              \
-           curr_lexer->update_token_positions (yyleng);                 \
-           return curr_lexer->handle_op_internal (TOK, false, COMPAT);  \
-         }                                                              \
+         return curr_lexer->handle_op (TOK, false, COMPAT);             \
      }                                                                  \
    while (0)
 
@@ -196,10 +193,7 @@
            curr_lexer->push_start_state (COMMAND_START);                \
          }                                                              \
        else                                                             \
-         {                                                              \
-           curr_lexer->update_token_positions (yyleng);                 \
-           return curr_lexer->handle_language_extension_op (PATTERN, TOK, false); \
-         }                                                              \
+         return curr_lexer->handle_op (TOK, false, false);              \
      }                                                                  \
    while (0)
 
@@ -216,10 +210,7 @@
                curr_lexer->push_start_state (COMMAND_START);            \
              }                                                          \
            else                                                         \
-             {                                                          \
-               curr_lexer->update_token_positions (yyleng);             \
-               return curr_lexer->handle_op_internal (TOK, false, COMPAT); \
-             }                                                          \
+             return curr_lexer->handle_op (TOK, false, COMPAT);         \
          }                                                              \
        else                                                             \
          {                                                              \
@@ -229,10 +220,7 @@
                curr_lexer->xunput (',');                                \
              }                                                          \
            else                                                         \
-             {                                                          \
-               curr_lexer->update_token_positions (yyleng);              \
-               return curr_lexer->handle_op_internal (TOK, false, COMPAT); \
-             }                                                          \
+             return curr_lexer->handle_op (TOK, false, COMPAT);         \
          }                                                              \
      }                                                                  \
    while (0)
@@ -295,12 +283,7 @@
                    curr_lexer->m_maybe_classdef_get_set_method = false; \
                  }                                                      \
                                                                         \
-               curr_lexer->update_token_positions (yyleng);             \
-                                                                        \
-               int id_tok = curr_lexer->handle_identifier ();           \
-                                                                        \
-               if (id_tok >= 0)                                         \
-                 return curr_lexer->count_token_internal (id_tok);      \
+               return curr_lexer->handle_identifier ();                 \
              }                                                          \
          }                                                              \
      }                                                                  \
@@ -1310,24 +1293,16 @@
                   {
                     yyless (spc_pos);
                     curr_lexer->m_filepos.increment_column (spc_pos);
-                    curr_lexer->update_token_positions (yyleng);
 
                     return curr_lexer->handle_identifier ();
                   }
               }
           }
 
-        curr_lexer->update_token_positions (yyleng);
-
-        int id_tok = curr_lexer->handle_superclass_identifier ();
-
-        if (id_tok >= 0)
-          {
-            curr_lexer->m_looking_for_object_index = true;
-            curr_lexer->m_at_beginning_of_statement = false;
-
-            return curr_lexer->count_token_internal (id_tok);
-          }
+        curr_lexer->m_looking_for_object_index = true;
+        curr_lexer->m_at_beginning_of_statement = false;
+
+        return curr_lexer->handle_superclass_identifier ();
       }
   }
 
@@ -1622,7 +1597,11 @@
 // In Matlab, '\' may also trigger command syntax.
 %}
 
-"\\"  { return curr_lexer->handle_op ("\\", LEFTDIV); }
+"\\" {
+    curr_lexer->lexer_debug ("\\");
+
+    return curr_lexer->handle_op (LEFTDIV);
+  }
 
 "^"   { CMD_OR_OP ("^", POW, true); }
 "**"  { CMD_OR_OP ("**", POW, false); }
@@ -1630,11 +1609,13 @@
 "||"  { CMD_OR_OP ("||", EXPR_OR_OR, true); }
 
 ";" {
+    curr_lexer->lexer_debug (";");
+
     bool at_beginning_of_statement
       = (! (curr_lexer->whitespace_is_significant ()
             || curr_lexer->m_looking_at_object_index.front ()));
 
-    return curr_lexer->handle_op (";", ';', at_beginning_of_statement);
+    return curr_lexer->handle_op (';', at_beginning_of_statement);
   }
 
 "+" { CMD_OR_UNARY_OP ("+", '+', true); }
@@ -1644,15 +1625,19 @@
 "!" { CMD_OR_UNARY_OP ("!", EXPR_NOT, false); }
 
 "," {
+    curr_lexer->lexer_debug (",");
+
     bool at_beginning_of_statement
       = (! (curr_lexer->whitespace_is_significant ()
             || curr_lexer->m_looking_at_object_index.front ()));
 
-    return curr_lexer->handle_op (",", ',', at_beginning_of_statement);
+    return curr_lexer->handle_op (',', at_beginning_of_statement);
   }
 
 ".'" {
-    return curr_lexer->handle_op (".'", TRANSPOSE, false);
+    curr_lexer->lexer_debug (".'");
+
+    return curr_lexer->handle_op (TRANSPOSE);
   }
 
 "++" { CMD_OR_UNARY_OP ("++", PLUS_PLUS, false); }
@@ -1748,9 +1733,11 @@
 %}
 
 "=" {
+    curr_lexer->lexer_debug ("=");
+
     curr_lexer->maybe_mark_previous_token_as_variable ();
 
-    return curr_lexer->handle_op ("=", '=');
+    return curr_lexer->handle_op ('=');
   }
 
 "+="   { CMD_OR_COMPUTED_ASSIGN_OP ("+=", ADD_EQ); }
@@ -2468,7 +2455,11 @@
                    m_filepos.line (), m_fcn_file_name.c_str ());
       }
 
-    return handle_token (END_OF_INPUT);
+    token *tok_val = new token (END_OF_INPUT, m_tok_beg, m_tok_end);
+
+    push_token (tok_val);
+
+    return count_token_internal (END_OF_INPUT);
   }
 
   char *
@@ -3083,6 +3074,8 @@
   int
   base_lexer::handle_superclass_identifier (void)
   {
+    update_token_positions (flex_yyleng ());
+
     std::string txt = flex_yytext ();
 
     txt.erase (std::remove_if (txt.begin (), txt.end (), is_space_or_tab),
@@ -3096,9 +3089,6 @@
     bool kw_token = (is_keyword_token (meth)
                      || fq_identifier_contains_keyword (cls));
 
-    // Token positions should have already been updated before this
-    // function is called.
-
     if (kw_token)
       {
         token *tok
@@ -3115,7 +3105,7 @@
 
     m_filepos.increment_column (flex_yyleng ());
 
-    return SUPERCLASSREF;
+    return count_token_internal (SUPERCLASSREF);
   }
 
   int
@@ -3186,10 +3176,9 @@
   int
   base_lexer::handle_identifier (void)
   {
-    // Token positions should have already been updated before this
-    // function is called.
-
-   std::string ident = flex_yytext ();
+    update_token_positions (flex_yyleng ());
+
+    std::string ident = flex_yytext ();
 
     // If we are expecting a structure element, avoid recognizing
     // keywords and other special names and return STRUCT_ELT, which is
@@ -3215,12 +3204,11 @@
 
     if (kw_token)
       {
-        if (kw_token >= 0)
-          m_looking_for_object_index = false;
+        m_looking_for_object_index = false;
 
         // The call to make_keyword_token set m_at_beginning_of_statement.
 
-        return kw_token;
+        return count_token_internal (kw_token);
       }
 
     // Find the token in the symbol table.
@@ -3257,7 +3245,7 @@
 
     m_at_beginning_of_statement = false;
 
-    return NAME;
+    return count_token_internal (NAME);
   }
 
   void
@@ -3601,23 +3589,6 @@
       }
   }
 
-  int
-  base_lexer::handle_op (const char *pattern, int tok, bool bos)
-  {
-    lexer_debug (pattern);
-
-    return handle_op_internal (tok, bos, true);
-  }
-
-  int
-  base_lexer::handle_language_extension_op (const char *pattern, int tok,
-                                                   bool bos)
-  {
-    lexer_debug (pattern);
-
-    return handle_op_internal (tok, bos, false);
-  }
-
   bool
   base_lexer::maybe_unput_comma_before_unary_op (int tok)
   {
@@ -3642,14 +3613,15 @@
   }
 
   int
-  base_lexer::handle_op_internal (int tok, bool bos, bool compat)
+  base_lexer::handle_op (int tok, bool bos, bool compat)
   {
     if (! compat)
       warn_language_extension_operator (flex_yytext ());
 
-    push_token (new token (tok, m_filepos, m_filepos));
-
-    m_filepos.increment_column (flex_yyleng ());
+    update_token_positions (flex_yyleng ());
+
+    push_token (new token (tok, m_tok_beg, m_tok_end));
+
     m_looking_for_object_index = false;
     m_at_beginning_of_statement = bos;
 
@@ -3700,8 +3672,6 @@
 
     push_token (tok_val);
 
-    m_filepos.increment_column (flex_yyleng ());
-
     return count_token_internal (tok);
   }
 
--- a/libinterp/parse-tree/oct-parse.yy	Mon Mar 09 10:24:07 2020 -0400
+++ b/libinterp/parse-tree/oct-parse.yy	Tue Mar 10 07:51:20 2020 -0400
@@ -4565,9 +4565,8 @@
       curr_line = m_lexer.m_current_input_line;
 
     // Adjust the error column for display because it is 1-based in the
-    // lexer for easier reporting and it has already been advanced to
-    // one past the end of the most recently read token.
-    err_col -= 2;
+    // lexer for easier reporting.
+    err_col--;
 
     if (! curr_line.empty ())
       {