Mercurial > octave
changeset 33153:c2ab726fcc88
new functions for reporting syntax errors from the lexer
* lex.h, lex.ll (base_lexer::syntax_error): New functions.
Use them to return syntax errors from the lexer to the parser.
author | John W. Eaton <jwe@octave.org> |
---|---|
date | Sat, 02 Mar 2024 16:10:00 -0500 |
parents | 365751dd06c1 |
children | 16c392461132 |
files | libinterp/parse-tree/lex.h libinterp/parse-tree/lex.ll |
diffstat | 2 files changed, 79 insertions(+), 94 deletions(-) [+] |
line wrap: on
line diff
--- a/libinterp/parse-tree/lex.h Wed Mar 06 11:33:21 2024 -0500 +++ b/libinterp/parse-tree/lex.h Sat Mar 02 16:10:00 2024 -0500 @@ -669,9 +669,9 @@ int handle_superclass_identifier (); - token * make_meta_identifier_token (); + token * make_meta_identifier_token (const std::string& cls); - token * make_fq_identifier_token (); + token * make_fq_identifier_token (const std::string& ident); int handle_identifier (); @@ -689,6 +689,10 @@ void warn_deprecated_syntax (const std::string& msg); + int syntax_error (const std::string& msg); + int syntax_error (const std::string& msg, const filepos& pos); + int syntax_error (const std::string& msg, const filepos& beg_pos, const filepos& end_pos); + void push_token (token *); token * current_token ();
--- a/libinterp/parse-tree/lex.ll Wed Mar 06 11:33:21 2024 -0500 +++ b/libinterp/parse-tree/lex.ll Sat Mar 02 16:10:00 2024 -0500 @@ -1011,9 +1011,7 @@ { // Use location of octal digits for error token. std::string msg {"invalid octal escape sequence in character string"}; - octave::token *tok = new octave::token (LEXICAL_ERROR, msg, curr_lexer->m_tok_beg, curr_lexer->m_tok_end); - - return curr_lexer->handle_token (tok); + return curr_lexer->syntax_error (msg); } else curr_lexer->m_string_text += static_cast<unsigned char> (result); @@ -1115,11 +1113,7 @@ // Use current file position for error token. std::string msg {"unterminated character string constant"}; - octave::token *tok = new octave::token (LEXICAL_ERROR, msg, curr_lexer->m_filepos, curr_lexer->m_filepos); - - curr_lexer->m_filepos.next_line (); - - return curr_lexer->handle_token (tok); + return curr_lexer->syntax_error (msg, curr_lexer->m_filepos); } %{ @@ -1167,11 +1161,7 @@ // Use current file position for error token. std::string msg {"unterminated character string constant"}; - octave::token *tok = new octave::token (LEXICAL_ERROR, msg, curr_lexer->m_filepos, curr_lexer->m_filepos); - - curr_lexer->m_filepos.next_line (); - - return curr_lexer->handle_token (tok); + return curr_lexer->syntax_error (msg, curr_lexer->m_filepos); } %{ @@ -1185,7 +1175,14 @@ curr_lexer->update_token_positions (yyleng); - octave::token *tok = curr_lexer->make_fq_identifier_token (); + std::string ident = yytext; + + ident.erase (std::remove_if (ident.begin (), ident.end (), is_space_or_tab), ident.end ()); + + if (curr_lexer->fq_identifier_contains_keyword (ident)) + return curr_lexer->syntax_error ("function, method, class, and package names may not be keywords"); + + octave::token *tok = curr_lexer->make_fq_identifier_token (ident); return curr_lexer->handle_token (tok); } @@ -1341,7 +1338,17 @@ { curr_lexer->update_token_positions (yyleng); - octave::token *tok = curr_lexer->make_meta_identifier_token (); + std::string txt = yytext; + + txt.erase (std::remove_if (txt.begin (), txt.end (), is_space_or_tab), txt.end ()); + + // Eliminate leading '?' + std::string cls = txt.substr (1); + + if (curr_lexer->fq_identifier_contains_keyword (cls)) + return curr_lexer->syntax_error ("class and package names may not be keywords"); + + octave::token *tok = curr_lexer->make_meta_identifier_token (cls); return curr_lexer->handle_token (tok); } @@ -1395,14 +1402,12 @@ if (octave::iskeyword (ident)) { std::string msg {"function handles may not refer to keywords"}; - tok = new octave::token (LEXICAL_ERROR, msg, curr_lexer->m_tok_beg, curr_lexer->m_tok_end); + return curr_lexer->syntax_error (msg); } - else - { - curr_lexer->m_looking_for_object_index = true; - - tok = new octave::token (FCN_HANDLE, ident, curr_lexer->m_tok_beg, curr_lexer->m_tok_end); - } + + curr_lexer->m_looking_for_object_index = true; + + tok = new octave::token (FCN_HANDLE, ident, curr_lexer->m_tok_beg, curr_lexer->m_tok_end); return curr_lexer->handle_token (tok); } @@ -1444,9 +1449,7 @@ // Use current file position for error token. std::string msg {"unexpected internal lexer error"}; - octave::token *tok = new octave::token (LEXICAL_ERROR, msg, curr_lexer->m_filepos, curr_lexer->m_filepos); - - return curr_lexer->handle_token (tok); + return curr_lexer->syntax_error (msg, curr_lexer->m_filepos); } } @@ -1844,13 +1847,9 @@ << octave::undo_string_escape (static_cast<char> (c)) << "' (ASCII " << c << ")"; - // Use current file position for error token. - std::string msg {"unexpected internal lexer error"}; - octave::token *tok = new octave::token (LEXICAL_ERROR, buf.str (), msg, curr_lexer->m_filepos, curr_lexer->m_filepos); - - curr_lexer->m_filepos.increment_column (); - - return curr_lexer->handle_token (tok); + curr_lexer->update_token_positions (yyleng); + + return curr_lexer->syntax_error (buf.str ()); } } @@ -2471,14 +2470,13 @@ if (m_block_comment_nesting_level != 0) { + std::string msg {"block comment unterminated at end of input"}; if ((m_reading_fcn_file || m_reading_script_file || m_reading_classdef_file) && ! m_fcn_file_name.empty ()) - error ("block comment unterminated at end of input\n" - "near line %d of file '%s.m'", - m_filepos.line () - 1, m_fcn_file_name.c_str ()); - else - error ("block comment unterminated at end of input"); + msg += " near line " + std::to_string (m_filepos.line () - 1) + " of file '" + m_fcn_file_name + ".m'"; + + syntax_error (msg); } token *tok = new token (END_OF_INPUT, m_tok_beg, m_tok_end); @@ -3010,9 +3008,7 @@ if (bytes < 0) { std::string msg {"too many digits for binary constant"}; - token *tok = new token (LEXICAL_ERROR, msg, m_tok_beg, m_tok_end); - - return handle_token (tok); + return syntax_error (msg); } // FIXME: is there a better way? Can uintmax_t be anything other @@ -3214,9 +3210,7 @@ if (bytes < 0) { std::string msg {"too many digits for hexadecimal constant"}; - token *tok = new token (LEXICAL_ERROR, msg, m_tok_beg, m_tok_end); - - return handle_token (tok); + return syntax_error (msg); } // Assert here because if yytext doesn't contain a valid number, we @@ -3375,9 +3369,7 @@ if (iskeyword (meth) || fq_identifier_contains_keyword (cls)) { std::string msg {"method, class, and package names may not be keywords"}; - token *tok = new token (LEXICAL_ERROR, msg, m_tok_beg, m_tok_end); - - return handle_token (tok); + return syntax_error (msg); } token *tok = new token (SUPERCLASSREF, meth, cls, m_tok_beg, m_tok_end); @@ -3388,64 +3380,31 @@ } token * - base_lexer::make_meta_identifier_token () + base_lexer::make_meta_identifier_token (const std::string& cls) { - std::string txt = flex_yytext (); - - txt.erase (std::remove_if (txt.begin (), txt.end (), is_space_or_tab), - txt.end ()); - - // Eliminate leading '?' - std::string cls = txt.substr (1); - // Token positions should have already been updated before this // function is called. - token *tok; - - if (fq_identifier_contains_keyword (cls)) - { - std::string msg {"class and package names may not be keywords"}; - tok = new token (LEXICAL_ERROR, msg, m_tok_beg, m_tok_end); - } - else - { - m_looking_for_object_index = true; - - tok = new token (METAQUERY, cls, m_tok_beg, m_tok_end); - - m_filepos.increment_column (flex_yyleng ()); - } + m_looking_for_object_index = true; + + token *tok = new token (METAQUERY, cls, m_tok_beg, m_tok_end); + + m_filepos.increment_column (flex_yyleng ()); return tok; } token * - base_lexer::make_fq_identifier_token () + base_lexer::make_fq_identifier_token (const std::string& ident) { - std::string txt = flex_yytext (); - - txt.erase (std::remove_if (txt.begin (), txt.end (), is_space_or_tab), - txt.end ()); - // Token positions should have already been updated before this // function is called. - token *tok; - - if (fq_identifier_contains_keyword (txt)) - { - std::string msg {"function, method, class, and package names may not be keywords"}; - tok = new token (LEXICAL_ERROR, msg, m_tok_beg, m_tok_end); - } - else - { - m_looking_for_object_index = true; - - tok = new token (FQ_IDENT, txt, m_tok_beg, m_tok_end); - - m_filepos.increment_column (flex_yyleng ()); - } + m_looking_for_object_index = true; + + token *tok = new token (FQ_IDENT, ident, m_tok_beg, m_tok_end); + + m_filepos.increment_column (flex_yyleng ()); return tok; } @@ -3598,6 +3557,28 @@ m_filepos.line (), m_fcn_file_full_name.c_str ()); } + int + base_lexer::syntax_error (const std::string& msg) + { + return syntax_error (msg, m_tok_beg, m_tok_end); + } + + int + base_lexer::syntax_error (const std::string& msg, const filepos& pos) + { + return syntax_error (msg, pos, pos); + } + + int + base_lexer::syntax_error (const std::string& msg, const filepos& beg_pos, const filepos& end_pos) + { + token *tok = new token (LEXICAL_ERROR, msg, beg_pos, end_pos); + + push_token (tok); + + return count_token_internal (tok->token_id ()); + } + void base_lexer::push_token (token *tok) { @@ -3750,7 +3731,7 @@ void base_lexer::fatal_error (const char *msg) { - error ("fatal lexer error: %s", msg); + ::error ("fatal lexer error: %s", msg); } bool