Mercurial > octave
changeset 32930:ab4b7e516ff3 stable
allow help for ordinary functions to work again (bug #65220)
* comment-list.h (comment_elt::doc_string): Delete enum element. The
lexer and parser no longer attempt to tag comments as doc strings when
they are initially recognized so this tag is unused now.
(comment_elt::is_doc_string): Delete function.
(comment_list::find_doc_comment, comment_list::find_doc_string):
New functions.
* lex.h, lex.ll (lexical_feedback::m_classdef_doc_string,
lexical_feedback::m_doc_string): Delete data members and all uses.
(base_parser::finish_comment): Don't try to recognize doc string here.
* parse.h, oct-parse.yy (classdef_beg): Don't try to recognize
doc string here.
(classdef): Use stash_comment to grab comment before END instead of
calling lexer.get_comment inside rule. Don't try to recognize doc
string here.
* (base_parser::make_script): Examine comment lists attached to commands
to find doc string.
* (base_parser::make_function): Examine leading and body comment lists
to find doc string. For classdef methods, attempt Matlab compatible
behavior by looking at whether comments only use '%' characters.
* (base_parser::start_function): New argument, doc_string. Use it
instead of asking the lexer for the doc string.
* (base_parser::make_classdef): New argument, bc. Delete it on error.
Extract doc string from leading or body comment.
* help.cc (help_system::raw_help_from_symbol_table): When searching
using the full name and a class is found, only search for a
constructor if the full name contains a '.' and there is no doc string
for the class itself. If a class is found and there is no doc string
for the class itself or a constructor, generate a simple doc string
indicating that the class exists but is undocumented.
author | John W. Eaton <jwe@octave.org> |
---|---|
date | Sun, 04 Feb 2024 23:21:11 -0500 |
parents | ac3121944bb7 |
children | d8c716290873 2bc78bbc3fde |
files | libinterp/corefcn/help.cc libinterp/parse-tree/comment-list.h libinterp/parse-tree/lex.h libinterp/parse-tree/lex.ll libinterp/parse-tree/oct-parse.yy libinterp/parse-tree/parse.h |
diffstat | 6 files changed, 112 insertions(+), 75 deletions(-) [+] |
line wrap: on
line diff
--- a/libinterp/corefcn/help.cc Sat Feb 03 19:29:53 2024 +0100 +++ b/libinterp/corefcn/help.cc Sun Feb 04 23:21:11 2024 -0500 @@ -751,16 +751,27 @@ // Look for constructor. pos = name.rfind ('.'); - std::string nm = name.substr (pos+1); - octave_value ov_meth = cls.get_method (nm); + if (pos != std::string::npos) + { + std::string nm = name.substr (pos+1); + + octave_value ov_meth = cls.get_method (nm); - if (get_help_from_fcn (nm, ov_meth, help, what, symbol_found)) - { - what = "constructor"; + if (get_help_from_fcn (nm, ov_meth, help, what, symbol_found)) + { + what = "constructor"; + return true; + } + } - return true; - } + // We found a class but no doc string for it or its constructor. + // Create a generic doc string. + + help = name + " is an undocumented class"; + what = "class"; + symbol_found = true; + return true; } cdef_package pkg = cdm.find_package (name, false, true);
--- a/libinterp/parse-tree/comment-list.h Sat Feb 03 19:29:53 2024 +0100 +++ b/libinterp/parse-tree/comment-list.h Sun Feb 04 23:21:11 2024 -0500 @@ -51,7 +51,6 @@ block, full_line, end_of_line, - doc_string, copyright }; @@ -83,7 +82,6 @@ bool is_block () const { return m_type == block; } bool is_full_line () const { return m_type == full_line; } bool is_end_of_line () const { return m_type == end_of_line; } - bool is_doc_string () const { return m_type == doc_string; } bool is_copyright () const { return m_type == copyright; } bool uses_hash_char () const { return m_uses_hash_char; } @@ -125,6 +123,25 @@ { append (comment_elt (s, t, uses_hash_char)); } comment_list * dup () const; + + // Documentation for functions is typically the first block of + // comments that doesn't look like a copyright statement. + comment_elt find_doc_comment () const + { + for (const auto& elt : *this) + { + // FIXME: should we also omit end-of-line comments? + if (! elt.is_copyright ()) + return elt; + } + + return comment_elt (); + } + + std::string find_doc_string () const + { + return find_doc_comment().text (); + } }; OCTAVE_END_NAMESPACE(octave)
--- a/libinterp/parse-tree/lex.h Sat Feb 03 19:29:53 2024 +0100 +++ b/libinterp/parse-tree/lex.h Sun Feb 04 23:21:11 2024 -0500 @@ -301,8 +301,6 @@ m_filepos (1, 1), m_tok_beg (), m_tok_end (), - m_classdef_doc_string (), - m_doc_string (), m_string_text (), m_current_input_line (), m_comment_text (), @@ -476,12 +474,6 @@ filepos m_tok_beg; filepos m_tok_end; - // Pending doc string for classdef object. - comment_elt m_classdef_doc_string; - - // Pending doc string for functions. - comment_elt m_doc_string; - // The current character string text. std::string m_string_text;
--- a/libinterp/parse-tree/lex.ll Sat Feb 03 19:29:53 2024 +0100 +++ b/libinterp/parse-tree/lex.ll Sun Feb 04 23:21:11 2024 -0500 @@ -2226,8 +2226,6 @@ m_filepos = filepos (1, 1); m_tok_beg = filepos (); m_tok_end = filepos (); - m_classdef_doc_string.reset (); - m_doc_string.reset (); m_string_text = ""; m_current_input_line = ""; m_comment_text = ""; @@ -3351,15 +3349,7 @@ void base_lexer::finish_comment (comment_elt::comment_type typ) { - bool copyright = looks_like_copyright (m_comment_text); - - if (typ != octave::comment_elt::end_of_line - && m_nesting_level.none () - && m_doc_string.empty () && ! m_comment_text.empty () - && ! copyright && ! looks_like_shebang (m_comment_text)) - m_doc_string = comment_elt (m_comment_text, typ, m_comment_uses_hash_char); - - if (copyright) + if (looks_like_copyright (m_comment_text)) typ = comment_elt::copyright; m_comment_buf.append (m_comment_text, typ, m_comment_uses_hash_char);
--- a/libinterp/parse-tree/oct-parse.yy Sat Feb 03 19:29:53 2024 +0100 +++ b/libinterp/parse-tree/oct-parse.yy Sun Feb 04 23:21:11 2024 -0500 @@ -1832,9 +1832,6 @@ YYABORT; } - lexer.m_classdef_doc_string = lexer.m_doc_string; - lexer.m_doc_string.reset (); - // Create invalid parent scope. lexer.m_symtab_context.push (octave::symbol_scope::anonymous ()); lexer.m_parsing_classdef = true; @@ -1845,23 +1842,15 @@ } ; -classdef : classdef_beg stash_comment attr_list identifier opt_sep superclass_list stash_comment class_body END +classdef : classdef_beg stash_comment attr_list identifier opt_sep superclass_list stash_comment class_body stash_comment END { OCTAVE_YYUSE ($5); - octave::comment_list *lc = $2; - octave::comment_list *tc = lexer.get_comment (); - - if (lexer.m_classdef_doc_string.empty () && $7 && ! $7->empty ()) - lexer.m_classdef_doc_string = $7->front (); - lexer.m_parsing_classdef = false; - if (! ($$ = parser.make_classdef ($1, $3, $4, $6, $8, $9, - lc, tc))) + if (! ($$ = parser.make_classdef ($1, $3, $4, $6, $8, $10, $2, $7, $9))) { - // make_classdef deleted $3, $4, $6, $8, LC, and - // TC. + // make_classdef deleted $2, $3, $4, $6, $7, $8, $9 YYABORT; } } @@ -2014,8 +2003,6 @@ properties_beg : PROPERTIES { - lexer.m_doc_string.reset (); - lexer.m_classdef_element_names_are_keywords = false; $$ = $1; } @@ -2092,8 +2079,6 @@ methods_beg : METHODS { - lexer.m_doc_string.reset (); - lexer.m_classdef_element_names_are_keywords = false; $$ = $1; } @@ -2178,8 +2163,6 @@ events_beg : EVENTS { - lexer.m_doc_string.reset (); - lexer.m_classdef_element_names_are_keywords = false; $$ = $1; } @@ -2232,8 +2215,6 @@ enumeration_beg : ENUMERATION { - lexer.m_doc_string.reset (); - lexer.m_classdef_element_names_are_keywords = false; $$ = $1; } @@ -2291,7 +2272,9 @@ ; stash_comment : // empty - { $$ = lexer.get_comment (); } + { + $$ = lexer.get_comment (); + } ; parse_error : LEXICAL_ERROR @@ -3892,6 +3875,10 @@ base_parser::make_script (tree_statement_list *cmds, tree_statement *end_script) { + // Any comments at the beginning of a script file should be + // attached to the first statement in the file or the END_SCRIPT + // statement created by the parser. + if (! cmds) cmds = new tree_statement_list (); @@ -3903,13 +3890,22 @@ script_scope.cache_fcn_file_name (m_lexer.m_fcn_file_full_name); script_scope.cache_dir_name (m_lexer.m_dir_name); + // First non-copyright comment in classdef body, before first + // properties, methods, etc. block. + + tree_statement *first_stmt = cmds->front (); + comment_list *leading_comments = first_stmt->comment_text (); + + std::string doc_string; + if (leading_comments) + doc_string = leading_comments->find_doc_string (); + octave_user_script *script = new octave_user_script (m_lexer.m_fcn_file_full_name, m_lexer.m_fcn_file_name, script_scope, - cmds, m_lexer.m_doc_string.text ()); + cmds, doc_string); m_lexer.m_symtab_context.pop (); - m_lexer.m_doc_string.reset (); sys::time now; @@ -3962,10 +3958,19 @@ tree_statement *end_fcn_stmt, comment_list *lc, comment_list *bc) { - // FIXME: maybe choose which comment to used by checking whether - // any language extensions are noticed in the entire source file, - // not just in the comments that are candidates to become the - // function doc string. + // First non-copyright comments found above and below function keyword. + comment_elt leading_doc_comment; + comment_elt body_doc_comment; + + if (lc) + leading_doc_comment = lc->find_doc_comment (); + + if (bc) + body_doc_comment = bc->find_doc_comment (); + + // Choose which comment to use for doc string. + + // For ordinary functions, use the first comment that isn't empty. // If we are looking at a classdef method and there is a comment // prior to the function keyword and another after, then @@ -3978,16 +3983,25 @@ // comments use percent '%' characters. This is // Matlab-compatible behavior. - if (m_lexer.m_parsing_classdef && ! m_lexer.m_doc_string.empty () - && bc && ! bc->empty () && ! m_lexer.m_doc_string.uses_hash_char () - && ! bc->front().uses_hash_char ()) - m_lexer.m_doc_string = bc->front (); + // FIXME: maybe choose which comment to used by checking whether + // any language extensions are noticed in the entire source file, + // not just in the comments that are candidates to become the + // function doc string. + + std::string doc_string; + + if (leading_doc_comment.empty () + || (m_lexer.m_parsing_classdef && ! body_doc_comment.empty () + && (! (leading_doc_comment.uses_hash_char () || body_doc_comment.uses_hash_char ())))) + doc_string = body_doc_comment.text (); + else + doc_string = leading_doc_comment.text (); int l = fcn_tok->line (); int c = fcn_tok->column (); octave_user_function *tmp_fcn - = start_function (id, param_list, body, end_fcn_stmt); + = start_function (id, param_list, body, end_fcn_stmt, doc_string); tree_function_def *retval = finish_function (ret_list, tmp_fcn, lc, l, c); @@ -4002,7 +4016,8 @@ base_parser::start_function (tree_identifier *id, tree_parameter_list *param_list, tree_statement_list *body, - tree_statement *end_fcn_stmt) + tree_statement *end_fcn_stmt, + const std::string& doc_string) { // We'll fill in the return list later. @@ -4118,11 +4133,9 @@ // because the doc_string of the outermost function is read first, // whereas this function is called for the innermost function first. // We could have a stack of doc_string objects in lexer. - if (! m_lexer.m_doc_string.empty () && m_curr_fcn_depth == 0) - { - fcn->document (m_lexer.m_doc_string.text ()); - m_lexer.m_doc_string.reset (); - } + if (! doc_string.empty () && m_curr_fcn_depth == 0) + fcn->document (doc_string); + if (m_lexer.m_reading_fcn_file && m_curr_fcn_depth == 0 && ! m_parsing_subfunctions) @@ -4367,7 +4380,7 @@ tree_identifier *id, tree_classdef_superclass_list *sc, tree_classdef_body *body, token *end_tok, - comment_list *lc, comment_list *tc) + comment_list *lc, comment_list *bc, comment_list *tc) { tree_classdef *retval = nullptr; @@ -4394,6 +4407,7 @@ delete sc; delete body; delete lc; + delete bc; delete tc; bison_error ("invalid classdef definition, the class name must match the filename", l, c); @@ -4406,15 +4420,26 @@ int l = tok_val->line (); int c = tok_val->column (); + // First non-copyright comments found above and below + // function keyword are candidates for the documentation + // string. Use the first one that is not empty. + + std::string doc_string; + + if (lc) + doc_string = lc->find_doc_string (); + + if (doc_string.empty () && bc) + doc_string = bc->find_doc_string (); + if (! body) body = new tree_classdef_body (); + // FIXME - pass body comment to tree_classdef constructor. + retval = new tree_classdef (m_lexer.m_symtab_context.curr_scope (), - m_lexer.m_classdef_doc_string.text (), - a, id, sc, body, lc, tc, + doc_string, a, id, sc, body, lc, tc, m_curr_package_name, full_name, l, c); - - m_lexer.m_classdef_doc_string.reset (); } else { @@ -4423,6 +4448,7 @@ delete sc; delete body; delete lc; + delete bc; delete tc; end_token_error (end_tok, token::switch_end);
--- a/libinterp/parse-tree/parse.h Sat Feb 03 19:29:53 2024 +0100 +++ b/libinterp/parse-tree/parse.h Sun Feb 04 23:21:11 2024 -0500 @@ -394,7 +394,8 @@ // Begin defining a function. OCTINTERP_API octave_user_function * start_function (tree_identifier *id, tree_parameter_list *param_list, - tree_statement_list *body, tree_statement *end_function); + tree_statement_list *body, tree_statement *end_function, + const std::string& doc_string); // Create a no-op statement for end_function. OCTINTERP_API tree_statement * @@ -456,7 +457,7 @@ make_classdef (token *tok_val, tree_classdef_attribute_list *a, tree_identifier *id, tree_classdef_superclass_list *sc, tree_classdef_body *body, token *end_tok, - comment_list *lc, comment_list *tc); + comment_list *lc, comment_list *bc, comment_list *tc); OCTINTERP_API tree_classdef_properties_block * make_classdef_properties_block (token *tok_val,