changeset 33153:c2ab726fcc88

new functions for reporting syntax errors from the lexer * lex.h, lex.ll (base_lexer::syntax_error): New functions. Use them to return syntax errors from the lexer to the parser.
author John W. Eaton <jwe@octave.org>
date Sat, 02 Mar 2024 16:10:00 -0500
parents 365751dd06c1
children 16c392461132
files libinterp/parse-tree/lex.h libinterp/parse-tree/lex.ll
diffstat 2 files changed, 79 insertions(+), 94 deletions(-) [+]
line wrap: on
line diff
--- a/libinterp/parse-tree/lex.h	Wed Mar 06 11:33:21 2024 -0500
+++ b/libinterp/parse-tree/lex.h	Sat Mar 02 16:10:00 2024 -0500
@@ -669,9 +669,9 @@
 
   int handle_superclass_identifier ();
 
-  token * make_meta_identifier_token ();
+  token * make_meta_identifier_token (const std::string& cls);
 
-  token * make_fq_identifier_token ();
+  token * make_fq_identifier_token (const std::string& ident);
 
   int handle_identifier ();
 
@@ -689,6 +689,10 @@
 
   void warn_deprecated_syntax (const std::string& msg);
 
+  int syntax_error (const std::string& msg);
+  int syntax_error (const std::string& msg, const filepos& pos);
+  int syntax_error (const std::string& msg, const filepos& beg_pos, const filepos& end_pos);
+
   void push_token (token *);
 
   token * current_token ();
--- a/libinterp/parse-tree/lex.ll	Wed Mar 06 11:33:21 2024 -0500
+++ b/libinterp/parse-tree/lex.ll	Sat Mar 02 16:10:00 2024 -0500
@@ -1011,9 +1011,7 @@
       {
         // Use location of octal digits for error token.
         std::string msg {"invalid octal escape sequence in character string"};
-        octave::token *tok = new octave::token (LEXICAL_ERROR, msg, curr_lexer->m_tok_beg, curr_lexer->m_tok_end);
-
-        return curr_lexer->handle_token (tok);
+        return curr_lexer->syntax_error (msg);
       }
     else
       curr_lexer->m_string_text += static_cast<unsigned char> (result);
@@ -1115,11 +1113,7 @@
 
     // Use current file position for error token.
     std::string msg {"unterminated character string constant"};
-    octave::token *tok = new octave::token (LEXICAL_ERROR, msg, curr_lexer->m_filepos, curr_lexer->m_filepos);
-
-    curr_lexer->m_filepos.next_line ();
-
-    return curr_lexer->handle_token (tok);
+    return curr_lexer->syntax_error (msg, curr_lexer->m_filepos);
   }
 
 %{
@@ -1167,11 +1161,7 @@
 
     // Use current file position for error token.
     std::string msg {"unterminated character string constant"};
-    octave::token *tok = new octave::token (LEXICAL_ERROR, msg, curr_lexer->m_filepos, curr_lexer->m_filepos);
-
-    curr_lexer->m_filepos.next_line ();
-
-    return curr_lexer->handle_token (tok);
+    return curr_lexer->syntax_error (msg, curr_lexer->m_filepos);
   }
 
 %{
@@ -1185,7 +1175,14 @@
 
     curr_lexer->update_token_positions (yyleng);
 
-    octave::token *tok = curr_lexer->make_fq_identifier_token ();
+    std::string ident = yytext;
+
+    ident.erase (std::remove_if (ident.begin (), ident.end (), is_space_or_tab), ident.end ());
+
+    if (curr_lexer->fq_identifier_contains_keyword (ident))
+      return curr_lexer->syntax_error ("function, method, class, and package names may not be keywords");
+
+    octave::token *tok = curr_lexer->make_fq_identifier_token (ident);
 
     return curr_lexer->handle_token (tok);
   }
@@ -1341,7 +1338,17 @@
       {
         curr_lexer->update_token_positions (yyleng);
 
-        octave::token *tok = curr_lexer->make_meta_identifier_token ();
+        std::string txt = yytext;
+
+        txt.erase (std::remove_if (txt.begin (), txt.end (), is_space_or_tab), txt.end ());
+
+        // Eliminate leading '?'
+        std::string cls = txt.substr (1);
+
+        if (curr_lexer->fq_identifier_contains_keyword (cls))
+          return curr_lexer->syntax_error ("class and package names may not be keywords");
+
+        octave::token *tok = curr_lexer->make_meta_identifier_token (cls);
 
         return curr_lexer->handle_token (tok);
       }
@@ -1395,14 +1402,12 @@
                 if (octave::iskeyword (ident))
                   {
                     std::string msg {"function handles may not refer to keywords"};
-                    tok = new octave::token (LEXICAL_ERROR, msg, curr_lexer->m_tok_beg, curr_lexer->m_tok_end);
+                    return curr_lexer->syntax_error (msg);
                   }
-                else
-                  {
-                    curr_lexer->m_looking_for_object_index = true;
-
-                    tok = new octave::token (FCN_HANDLE, ident, curr_lexer->m_tok_beg, curr_lexer->m_tok_end);
-                  }
+
+                curr_lexer->m_looking_for_object_index = true;
+
+                tok = new octave::token (FCN_HANDLE, ident, curr_lexer->m_tok_beg, curr_lexer->m_tok_end);
 
                 return curr_lexer->handle_token (tok);
               }
@@ -1444,9 +1449,7 @@
 
         // Use current file position for error token.
         std::string msg {"unexpected internal lexer error"};
-        octave::token *tok = new octave::token (LEXICAL_ERROR, msg, curr_lexer->m_filepos, curr_lexer->m_filepos);
-
-        return curr_lexer->handle_token (tok);
+        return curr_lexer->syntax_error (msg, curr_lexer->m_filepos);
       }
   }
 
@@ -1844,13 +1847,9 @@
             << octave::undo_string_escape (static_cast<char> (c))
             << "' (ASCII " << c << ")";
 
-        // Use current file position for error token.
-        std::string msg {"unexpected internal lexer error"};
-        octave::token *tok = new octave::token (LEXICAL_ERROR, buf.str (), msg, curr_lexer->m_filepos, curr_lexer->m_filepos);
-
-        curr_lexer->m_filepos.increment_column ();
-
-        return curr_lexer->handle_token (tok);
+        curr_lexer->update_token_positions (yyleng);
+
+        return curr_lexer->syntax_error (buf.str ());
       }
   }
 
@@ -2471,14 +2470,13 @@
 
     if (m_block_comment_nesting_level != 0)
       {
+        std::string msg {"block comment unterminated at end of input"};
 
         if ((m_reading_fcn_file || m_reading_script_file || m_reading_classdef_file)
             && ! m_fcn_file_name.empty ())
-          error ("block comment unterminated at end of input\n"
-                 "near line %d of file '%s.m'",
-                 m_filepos.line () - 1, m_fcn_file_name.c_str ());
-        else
-          error ("block comment unterminated at end of input");
+          msg += " near line " + std::to_string (m_filepos.line () - 1) + " of file '" + m_fcn_file_name + ".m'";
+
+        syntax_error (msg);
       }
 
     token *tok = new token (END_OF_INPUT, m_tok_beg, m_tok_end);
@@ -3010,9 +3008,7 @@
     if (bytes < 0)
       {
         std::string msg {"too many digits for binary constant"};
-        token *tok = new token (LEXICAL_ERROR, msg, m_tok_beg, m_tok_end);
-
-        return handle_token (tok);
+        return syntax_error (msg);
       }
 
     // FIXME: is there a better way?  Can uintmax_t be anything other
@@ -3214,9 +3210,7 @@
     if (bytes < 0)
       {
         std::string msg {"too many digits for hexadecimal constant"};
-        token *tok = new token (LEXICAL_ERROR, msg, m_tok_beg, m_tok_end);
-
-        return handle_token (tok);
+        return syntax_error (msg);
       }
 
     // Assert here because if yytext doesn't contain a valid number, we
@@ -3375,9 +3369,7 @@
     if (iskeyword (meth) || fq_identifier_contains_keyword (cls))
       {
         std::string msg {"method, class, and package names may not be keywords"};
-        token *tok = new token (LEXICAL_ERROR, msg, m_tok_beg, m_tok_end);
-
-        return handle_token (tok);
+        return syntax_error (msg);
       }
 
     token *tok = new token (SUPERCLASSREF, meth, cls, m_tok_beg, m_tok_end);
@@ -3388,64 +3380,31 @@
   }
 
   token *
-  base_lexer::make_meta_identifier_token ()
+  base_lexer::make_meta_identifier_token (const std::string& cls)
   {
-    std::string txt = flex_yytext ();
-
-    txt.erase (std::remove_if (txt.begin (), txt.end (), is_space_or_tab),
-               txt.end ());
-
-    // Eliminate leading '?'
-    std::string cls = txt.substr (1);
-
     // Token positions should have already been updated before this
     // function is called.
 
-    token *tok;
-
-    if (fq_identifier_contains_keyword (cls))
-      {
-        std::string msg {"class and package names may not be keywords"};
-        tok = new token (LEXICAL_ERROR, msg, m_tok_beg, m_tok_end);
-      }
-    else
-      {
-        m_looking_for_object_index = true;
-
-        tok = new token (METAQUERY, cls, m_tok_beg, m_tok_end);
-
-        m_filepos.increment_column (flex_yyleng ());
-      }
+    m_looking_for_object_index = true;
+
+    token *tok = new token (METAQUERY, cls, m_tok_beg, m_tok_end);
+
+    m_filepos.increment_column (flex_yyleng ());
 
     return tok;
   }
 
   token *
-  base_lexer::make_fq_identifier_token ()
+  base_lexer::make_fq_identifier_token (const std::string& ident)
   {
-    std::string txt = flex_yytext ();
-
-    txt.erase (std::remove_if (txt.begin (), txt.end (), is_space_or_tab),
-               txt.end ());
-
     // Token positions should have already been updated before this
     // function is called.
 
-    token *tok;
-
-    if (fq_identifier_contains_keyword (txt))
-      {
-        std::string msg {"function, method, class, and package names may not be keywords"};
-        tok = new token (LEXICAL_ERROR, msg, m_tok_beg, m_tok_end);
-      }
-    else
-      {
-        m_looking_for_object_index = true;
-
-        tok = new token (FQ_IDENT, txt, m_tok_beg, m_tok_end);
-
-        m_filepos.increment_column (flex_yyleng ());
-      }
+    m_looking_for_object_index = true;
+
+    token *tok = new token (FQ_IDENT, ident, m_tok_beg, m_tok_end);
+
+    m_filepos.increment_column (flex_yyleng ());
 
     return tok;
   }
@@ -3598,6 +3557,28 @@
                        m_filepos.line (), m_fcn_file_full_name.c_str ());
   }
 
+  int
+  base_lexer::syntax_error (const std::string& msg)
+  {
+    return syntax_error (msg, m_tok_beg, m_tok_end);
+  }
+
+  int
+  base_lexer::syntax_error (const std::string& msg, const filepos& pos)
+  {
+    return syntax_error (msg, pos, pos);
+  }
+
+  int
+  base_lexer::syntax_error (const std::string& msg, const filepos& beg_pos, const filepos& end_pos)
+  {
+    token *tok = new token (LEXICAL_ERROR, msg, beg_pos, end_pos);
+
+    push_token (tok);
+
+    return count_token_internal (tok->token_id ());
+  }
+
   void
   base_lexer::push_token (token *tok)
   {
@@ -3750,7 +3731,7 @@
   void
   base_lexer::fatal_error (const char *msg)
   {
-    error ("fatal lexer error: %s", msg);
+    ::error ("fatal lexer error: %s", msg);
   }
 
   bool