Mercurial > octave
changeset 29864:e2e493712818
improve previous change for command syntax parsing (bug #60882)
* symrec.h (symbol_record::variable): New static constant.
(symbol_record_rep::is_variable, symbol_record_rep::mark_as_variable,
symbol_record_rep::unmark_as_variable): New functions.
(symbol_record_rep::mark_formal): Also mark as variable.
(symbol_record::is_variable, symbol_record::mark_as_variable,
symbol_record::unmark_as_variable): New functions.
* symscope.h, symscope.cc (symbol_scope_rep::mark_as_variable,
symbol_scope_rep::mark_as_variables, symbol_scope_rep::is_variable,
symbol_scope::mark_as_variable, symbol_scope::mark_as_variables,
symbol_scope::is_variable): New functions.
* lex.h, lex.ll (lexical_feedback::m_pending_local_variables): Delete
member variable and all uses.
(CMD_OR_COMPUTED_ASSIGN_OP): Don't attempt Matlab compatibility for
computed assignment operators.
(lexical_feedback::mark_as_variable,
lexical_feedback::mark_as_variables, lexical_feedback::is_variable):
Store status of symbols as variables in scope objects instead of
m_pending_local_variables lists.
(base_lexer::handle_identifier): Don't check for variables when
deciding whether an identifier can be recognized as a command.
* parse.h, oct-parse.yy (parse_exception, parse_tree_validator):
New classes.
(base_parser::bison_error): New overloads that accept parse_exception
and list of parse_exception objects.
(base_parser::finish_classdef_file): Validate classdef object and
local functions and return status.
(base_parser::validate_primary_fcn, base_parser::finish_input):
New functions.
(file): Validate primary function after parser is finished reading and
parsing file.
(input): Call base_parser::finish_input to validate and accept
statement list.
* try.tst: Fix syntax in test.
author | John W. Eaton <jwe@octave.org> |
---|---|
date | Fri, 09 Jul 2021 04:05:58 -0400 |
parents | c29cfcf7a941 |
children | 6829836f86c2 |
files | libinterp/corefcn/symrec.h libinterp/corefcn/symscope.cc libinterp/corefcn/symscope.h libinterp/parse-tree/lex.h libinterp/parse-tree/lex.ll libinterp/parse-tree/oct-parse.yy libinterp/parse-tree/parse.h test/try.tst |
diffstat | 8 files changed, 376 insertions(+), 78 deletions(-) [+] |
line wrap: on
line diff
--- a/libinterp/corefcn/symrec.h Fri Jul 09 04:03:36 2021 -0400 +++ b/libinterp/corefcn/symrec.h Fri Jul 09 04:05:58 2021 -0400 @@ -58,6 +58,9 @@ // (symbol added to a static workspace) static const unsigned int added_static = 4; + // this symbol was recognized as a variable from syntax + static const unsigned int variable = 8; + private: class symbol_record_rep @@ -101,6 +104,11 @@ return m_storage_class & added_static; } + bool is_variable (void) const + { + return m_storage_class & variable; + } + void mark_local (void) { m_storage_class |= local; @@ -108,7 +116,8 @@ void mark_formal (void) { - m_storage_class |= formal; + // Formal parameters are also variables. + m_storage_class |= (formal | variable); } void mark_added_static (void) @@ -116,6 +125,11 @@ m_storage_class |= added_static; } + void mark_as_variable (void) + { + m_storage_class |= variable; + } + void unmark_local (void) { m_storage_class &= ~local; @@ -131,6 +145,11 @@ m_storage_class &= ~added_static; } + void unmark_as_variable (void) + { + m_storage_class &= ~variable; + } + unsigned int storage_class (void) const { return m_storage_class; } std::shared_ptr<symbol_record_rep> dup (void) const; @@ -189,14 +208,17 @@ bool is_local (void) const { return m_rep->is_local (); } bool is_formal (void) const { return m_rep->is_formal (); } bool is_added_static (void) const { return m_rep->is_added_static (); } + bool is_variable (void) const { return m_rep->is_variable (); } void mark_local (void) { m_rep->mark_local (); } void mark_formal (void) { m_rep->mark_formal (); } void mark_added_static (void) { m_rep->mark_added_static (); } + void mark_as_variable (void) { m_rep->mark_as_variable (); } void unmark_local (void) { m_rep->unmark_local (); } void unmark_formal (void) { m_rep->unmark_formal (); } void unmark_added_static (void) { m_rep->unmark_added_static (); } + void unmark_as_variable (void) { m_rep->unmark_as_variable (); } unsigned int storage_class (void) const { return m_rep->storage_class (); }
--- a/libinterp/corefcn/symscope.cc Fri Jul 09 04:03:36 2021 -0400 +++ b/libinterp/corefcn/symscope.cc Fri Jul 09 04:05:58 2021 -0400 @@ -253,6 +253,39 @@ return false; } + void symbol_scope_rep::mark_as_variable (const std::string& nm) + { + table_iterator p = m_symbols.find (nm); + + if (p != m_symbols.end ()) + p->second.mark_as_variable (); + } + + void symbol_scope_rep::mark_as_variables (const std::list<std::string>& lst) + { + for (const auto& nm : lst) + mark_as_variable (nm); + } + + bool symbol_scope_rep::is_variable (const std::string& nm) const + { + table_const_iterator p = m_symbols.find (nm); + + // FIXME: maybe we should also mark formal parameters as variables? + + if (p != m_symbols.end () && p->second.is_variable ()) + return true; + + if (is_nested ()) + { + auto t_parent = m_parent.lock (); + + return t_parent ? t_parent->is_variable (nm) : false; + } + + return false; + } + void symbol_scope_rep::update_nest (void) { auto t_parent = m_parent.lock ();
--- a/libinterp/corefcn/symscope.h Fri Jul 09 04:03:36 2021 -0400 +++ b/libinterp/corefcn/symscope.h Fri Jul 09 04:05:58 2021 -0400 @@ -292,6 +292,11 @@ bool is_relative (const std::shared_ptr<symbol_scope_rep>& scope) const; + void mark_as_variable (const std::string& nm); + void mark_as_variables (const std::list<std::string>& lst); + + bool is_variable (const std::string& nm) const; + void update_nest (void); bool look_nonlocal (const std::string& name, std::size_t offset, @@ -670,6 +675,23 @@ return m_rep ? m_rep->is_relative (scope.get_rep ()) : false; } + void mark_as_variable (const std::string& nm) + { + if (m_rep) + m_rep->mark_as_variable (nm); + } + + void mark_as_variables (const std::list<std::string>& lst) + { + if (m_rep) + m_rep->mark_as_variables (lst); + } + + bool is_variable (const std::string& nm) const + { + return m_rep ? m_rep->is_variable (nm) : false; + } + void update_nest (void) { if (m_rep)
--- a/libinterp/parse-tree/lex.h Fri Jul 09 04:03:36 2021 -0400 +++ b/libinterp/parse-tree/lex.h Fri Jul 09 04:05:58 2021 -0400 @@ -313,7 +313,6 @@ m_package_name (), m_looking_at_object_index (), m_parsed_function_name (), - m_pending_local_variables (), m_symtab_context (interp), m_nesting_level (), m_tokens () @@ -350,8 +349,6 @@ void mark_as_variable (const std::string& nm); void mark_as_variables (const std::list<std::string>& lst); - bool is_variable (const std::string& nm) const; - interpreter& m_interpreter; // true means that we have encountered eof on the input stream. @@ -515,11 +512,6 @@ // current_function_level > 0 std::stack<bool> m_parsed_function_name; - // A list of sets of identifiers that might be local variable names. - // The front of the list corresponds to the current scope. The next - // element is for the parent scope, etc. - std::list<std::set<std::string>> m_pending_local_variables; - // Track current symbol table scope and context. symbol_table_context m_symtab_context;
--- a/libinterp/parse-tree/lex.ll Fri Jul 09 04:03:36 2021 -0400 +++ b/libinterp/parse-tree/lex.ll Fri Jul 09 04:05:58 2021 -0400 @@ -183,7 +183,19 @@ } \ while (0) -#define CMD_OR_COMPUTED_ASSIGN_OP(PATTERN, TOK) \ +#if 0 +// Use the following to handle computed assignment operators +// (+=, -=, etc.) in word list commands in a way that is compatible +// with Matlab. However, that will also make it impossible to use +// these operators with a space before them: +// +// x = 1; +// x+=2; ## ok +// x+= 2; ## ok +// x +=2; ## error: invalid use of symbol as both variable and command +// x += 2; ## error: invalid use of symbol as both variable and command +// +# define CMD_OR_COMPUTED_ASSIGN_OP(PATTERN, TOK) \ do \ { \ curr_lexer->lexer_debug (PATTERN); \ @@ -198,6 +210,10 @@ return curr_lexer->handle_op (TOK, false, false); \ } \ while (0) +#else +# define CMD_OR_COMPUTED_ASSIGN_OP(PATTERN, TOK) \ + return curr_lexer->handle_op (TOK, false, false) +#endif #define CMD_OR_UNARY_OP(PATTERN, TOK, COMPAT) \ do \ @@ -2221,12 +2237,6 @@ // The closest paren, brace, or bracket nesting is not an object // index. m_looking_at_object_index.push_front (false); - - // Provide an initial set to store variables at the top-level. - // Don't clear this one when resetting lexical_feedback state. - // It should persist since the top-level scope does. Hmm maybe - // we should just use the symbol scope object for this job? - m_pending_local_variables.push_front (std::set<std::string> ()); } void @@ -2284,9 +2294,6 @@ while (! m_parsed_function_name.empty ()) m_parsed_function_name.pop (); - while (m_pending_local_variables.size () > 1) - m_pending_local_variables.pop_front (); - m_symtab_context.clear (); m_nesting_level.reset (); m_tokens.clear (); @@ -2348,6 +2355,24 @@ return tok ? tok->iskeyword () : false; } + void + lexical_feedback::mark_as_variable (const std::string& nm) + { + symbol_scope scope = m_symtab_context.curr_scope (); + + if (scope) + scope.mark_as_variable (nm); + } + + void + lexical_feedback::mark_as_variables (const std::list<std::string>& lst) + { + symbol_scope scope = m_symtab_context.curr_scope (); + + if (scope) + scope.mark_as_variables (lst); + } + bool lexical_feedback::previous_token_may_be_command (void) const { @@ -2357,37 +2382,6 @@ const token *tok = m_tokens.front (); return tok ? tok->may_be_command () : false; } - - void - lexical_feedback::mark_as_variable (const std::string& nm) - { - auto& vars = m_pending_local_variables.front (); - vars.insert (nm); - } - - void - lexical_feedback::mark_as_variables (const std::list<std::string>& lst) - { - auto& vars = m_pending_local_variables.front (); - for (const auto& nm : lst) - vars.insert (nm); - } - - bool - lexical_feedback::is_variable (const std::string& nm) const - { - if (m_interpreter.at_top_level () && m_interpreter.is_variable (nm)) - return true; - - // Search current scope, then parents. - for (const auto& vars : m_pending_local_variables) - { - if (vars.find (nm) != vars.end ()) - return true; - } - - return false; - } } static bool @@ -3599,7 +3593,6 @@ if (m_at_beginning_of_statement && ! (m_parsing_anon_fcn_body - || is_variable (ident) || ident == "e" || ident == "pi" || ident == "I" || ident == "i" || ident == "J" || ident == "j"
--- a/libinterp/parse-tree/oct-parse.yy Fri Jul 09 04:03:36 2021 -0400 +++ b/libinterp/parse-tree/oct-parse.yy Fri Jul 09 04:05:58 2021 -0400 @@ -417,19 +417,22 @@ OCTAVE_YYUSE ($2); $$ = nullptr; - std::shared_ptr<octave::tree_statement_list> tmp_lst ($1); - parser.statement_list (tmp_lst); - YYACCEPT; + + if (! parser.finish_input ($1)) + YYABORT; + else + YYACCEPT; } | simple_list END_OF_INPUT { OCTAVE_YYUSE ($2); $$ = nullptr; - lexer.m_end_of_input = true; - std::shared_ptr<octave::tree_statement_list> tmp_lst ($1); - parser.statement_list (tmp_lst); - YYACCEPT; + + if (! parser.finish_input ($1, true)) + YYABORT; + else + YYACCEPT; } | parse_error { @@ -1452,7 +1455,6 @@ { // Will get a real name later. lexer.m_symtab_context.push (octave::symbol_scope ("parser:param_list_beg")); - lexer.m_pending_local_variables.push_front (std::set<std::string> ()); lexer.m_looking_at_function_handle--; lexer.m_looking_at_anon_fcn_args = true; } @@ -1608,7 +1610,6 @@ // This scope may serve as the parent scope for local // functions in classdef files.. lexer.m_symtab_context.push (octave::symbol_scope ("parser:push_script_symtab")); - lexer.m_pending_local_variables.push_front (std::set<std::string> ()); } ; @@ -1630,9 +1631,11 @@ // Unused symbol table context. lexer.m_symtab_context.pop (); - lexer.m_pending_local_variables.pop_front (); delete $3; + + if (! parser.validate_primary_fcn ()) + YYABORT; } else { @@ -1641,6 +1644,9 @@ $4->beg_pos (), $4->end_pos ()); parser.make_script ($3, end_of_script); + + if (! parser.validate_primary_fcn ()) + YYABORT; } $$ = nullptr; @@ -1653,9 +1659,9 @@ // Unused symbol table context. lexer.m_symtab_context.pop (); - lexer.m_pending_local_variables.pop_front (); - - parser.finish_classdef_file ($3, $6); + + if (! parser.finish_classdef_file ($3, $6)) + YYABORT; $$ = nullptr; } @@ -1956,7 +1962,6 @@ // Create invalid parent scope. lexer.m_symtab_context.push (octave::symbol_scope ()); - lexer.m_pending_local_variables.push_front (std::set<std::string> ()); lexer.m_parsing_classdef = true; lexer.m_parsing_classdef_decl = true; lexer.m_classdef_element_names_are_keywords = true; @@ -2560,6 +2565,131 @@ namespace octave { + class parse_exception : public std::runtime_error + { + public: + + parse_exception (const std::string& message, + const std::string& fcn_name = "", + const std::string& file_name = "", + int line = -1, int column = -1) + : runtime_error (message), m_message (message), + m_fcn_name (fcn_name), m_file_name (file_name), + m_line (line), m_column (column) + { } + + parse_exception (const parse_exception&) = default; + + parse_exception& operator = (const parse_exception&) = default; + + ~parse_exception (void) = default; + + std::string message (void) const { return m_message; } + + // Provided for std::exception interface. + const char * what (void) const noexcept { return m_message.c_str (); } + + std::string fcn_name (void) const { return m_fcn_name; } + std::string file_name (void) const { return m_file_name; } + + int line (void) const { return m_line; } + int column (void) const { return m_column; } + + // virtual void display (std::ostream& os) const; + + private: + + std::string m_message; + + std::string m_fcn_name; + std::string m_file_name; + int m_line; + int m_column; + }; + + class parse_tree_validator : public tree_walker + { + public: + + parse_tree_validator (void) + : m_scope (), m_error_list () + { } + + parse_tree_validator (const parse_tree_validator&) = delete; + + parse_tree_validator& operator = (const parse_tree_validator&) = delete; + + ~parse_tree_validator (void) = default; + + symbol_scope get_scope (void) const { return m_scope; } + + bool ok (void) const { return m_error_list.empty (); } + + std::list<parse_exception> error_list (void) const + { + return m_error_list; + } + + void visit_octave_user_script (octave_user_script& script) + { + unwind_protect_var<symbol_scope> restore_var (m_scope, script.scope ()); + + tree_statement_list *stmt_list = script.body (); + + if (stmt_list) + stmt_list->accept (*this); + } + + void visit_octave_user_function (octave_user_function& fcn) + { + unwind_protect_var<symbol_scope> restore_var (m_scope, fcn.scope ()); + + tree_statement_list *stmt_list = fcn.body (); + + if (stmt_list) + stmt_list->accept (*this); + + std::map<std::string, octave_value> subfcns = fcn.subfunctions (); + + if (! subfcns.empty ()) + { + for (auto& nm_val : subfcns) + { + octave_user_function *subfcn + = nm_val.second.user_function_value (); + + if (subfcn) + subfcn->accept (*this); + } + } + } + + void visit_index_expression (tree_index_expression& idx_expr) + { + if (idx_expr.is_word_list_cmd ()) + { + std::string sym_nm = idx_expr.name (); + + if (m_scope.is_variable (sym_nm)) + { + std::string message + = sym_nm + ": invalid use of symbol as both variable and command"; + parse_exception pe (message, m_scope.fcn_name (), + m_scope.fcn_file_name (), + idx_expr.line (), idx_expr.column ()); + + m_error_list.push_back (pe); + } + } + } + + private: + + symbol_scope m_scope; + + std::list<parse_exception> m_error_list; + }; + std::size_t base_parser::parent_scope_info::size (void) const { @@ -2829,7 +2959,6 @@ // Will get a real name later. m_lexer.m_symtab_context.push (symbol_scope ("parser:push_fcn_symtab")); - m_lexer.m_pending_local_variables.push_front (std::set<std::string> ()); m_function_scopes.push (m_lexer.m_symtab_context.curr_scope ()); if (! m_lexer.m_reading_script_file && m_curr_fcn_depth == 0 @@ -2944,7 +3073,6 @@ symbol_scope parent_scope = m_lexer.m_symtab_context.parent_scope (); m_lexer.m_symtab_context.pop (); - m_lexer.m_pending_local_variables.pop_front (); expr->set_print_flag (false); @@ -3808,8 +3936,6 @@ } } - // Define a script. - void base_parser::make_script (tree_statement_list *cmds, tree_statement *end_script) @@ -3831,7 +3957,6 @@ cmds, m_lexer.m_help_text); m_lexer.m_symtab_context.pop (); - m_lexer.m_pending_local_variables.pop_front (); m_lexer.m_help_text = ""; sys::time now; @@ -4232,7 +4357,6 @@ base_parser::recover_from_parsing_function (void) { m_lexer.m_symtab_context.pop (); - m_lexer.m_pending_local_variables.pop_front (); if (m_lexer.m_reading_fcn_file && m_curr_fcn_depth == 0 && ! m_parsing_subfunctions) @@ -4268,7 +4392,6 @@ tree_classdef *retval = nullptr; m_lexer.m_symtab_context.pop (); - m_lexer.m_pending_local_variables.pop_front (); std::string cls_name = id->name (); @@ -4579,12 +4702,39 @@ return new tree_function_def (fcn, l, c); } - void + bool base_parser::finish_classdef_file (tree_classdef *cls, tree_statement_list *local_fcns) { - if (m_lexer.m_reading_classdef_file) - m_classdef_object = std::shared_ptr<tree_classdef> (cls); + parse_tree_validator validator; + + cls->accept (validator); + + if (local_fcns) + { + for (tree_statement *elt : *local_fcns) + { + tree_command *cmd = elt->command (); + + tree_function_def *fcn_def + = dynamic_cast<tree_function_def *> (cmd); + + fcn_def->accept (validator); + } + } + + if (! validator.ok ()) + { + delete cls; + delete local_fcns; + + bison_error (validator.error_list ()); + + return false; + } + + // Require all validations to succeed before installing any local + // functions or defining the classdef object for later use. if (local_fcns) { @@ -4599,7 +4749,8 @@ = dynamic_cast<tree_function_def *> (cmd); octave_value ov_fcn = fcn_def->function (); - octave_function *fcn = ov_fcn.function_value (); + octave_user_function *fcn = ov_fcn.user_function_value (); + std::string nm = fcn->name (); std::string file = fcn->fcn_file_name (); @@ -4608,6 +4759,12 @@ delete local_fcns; } + + // FIXME: Is it possible for the following condition to be false? + if (m_lexer.m_reading_classdef_file) + m_classdef_object = std::shared_ptr<tree_classdef> (cls); + + return true; } // Make an index expression. @@ -5167,6 +5324,23 @@ m_parse_error_msg = output_buf.str (); } + void + base_parser::bison_error (const parse_exception& pe) + { + bison_error (pe.message (), pe.line (), pe.column ()); + } + + void + base_parser::bison_error (const std::list<parse_exception>& pe_list) + { + // For now, we just report the first error found. Reporting all + // errors will require a bit more refactoring. + + parse_exception pe = pe_list.front (); + + bison_error (pe.message (), pe.line (), pe.column ()); + } + int parser::run (void) { @@ -5423,6 +5597,57 @@ return octave_value (); } + bool + base_parser::finish_input (tree_statement_list *lst, bool at_eof) + { + m_lexer.m_end_of_input = at_eof; + + if (lst) + { + parse_tree_validator validator; + + lst->accept (validator); + + if (! validator.ok ()) + { + delete lst; + + bison_error (validator.error_list ()); + + return false; + } + } + + std::shared_ptr<octave::tree_statement_list> tmp_lst (lst); + + statement_list (tmp_lst); + + return true; + } + + // Check script or function for semantic errors. + bool + base_parser::validate_primary_fcn (void) + { + octave_user_code *code = m_primary_fcn.user_code_value (); + + if (code) + { + parse_tree_validator validator; + + code->accept (validator); + + if (! validator.ok ()) + { + bison_error (validator.error_list ()); + + return false; + } + } + + return true; + } + // Maybe print a warning if an assignment expression is used as the // test in a logical expression.
--- a/libinterp/parse-tree/parse.h Fri Jul 09 04:03:36 2021 -0400 +++ b/libinterp/parse-tree/parse.h Fri Jul 09 04:05:58 2021 -0400 @@ -43,11 +43,13 @@ #include "token.h" class octave_function; +class octave_user_code; class octave_user_function; namespace octave { class comment_list; + class parse_exception; class tree; class tree_anon_fcn_handle; class tree_arg_size_spec; @@ -464,7 +466,7 @@ tree_parameter_list *ret_list, comment_list *cl); - OCTINTERP_API void + OCTINTERP_API bool finish_classdef_file (tree_classdef *cls, tree_statement_list *local_fcns); @@ -536,6 +538,8 @@ OCTINTERP_API void bison_error (const std::string& s); OCTINTERP_API void bison_error (const std::string& s, const filepos& pos); OCTINTERP_API void bison_error (const std::string& s, int line, int column); + OCTINTERP_API void bison_error (const std::list<parse_exception>& pe); + OCTINTERP_API void bison_error (const parse_exception& pe); friend OCTINTERP_API octave_value parse_fcn_file (interpreter& interp, const std::string& full_file, @@ -552,6 +556,13 @@ virtual int run (void) = 0; + // Check primary script or function generated by the parser for + // semantic errors. + OCTINTERP_API bool validate_primary_fcn (void); + + OCTINTERP_API bool finish_input (tree_statement_list *lst, + bool at_eof = false); + protected: // Contains error message if Bison-generated parser returns non-zero