changeset 32931:d8c716290873

maint: merge stable to default
author John W. Eaton <jwe@octave.org>
date Sun, 04 Feb 2024 23:24:58 -0500
parents de39818dca5e (current diff) ab4b7e516ff3 (diff)
children da8fda0b7dd8 b216fd94f5cc
files libinterp/parse-tree/comment-list.h libinterp/parse-tree/lex.h libinterp/parse-tree/oct-parse.yy
diffstat 6 files changed, 112 insertions(+), 75 deletions(-) [+]
line wrap: on
line diff
--- a/libinterp/corefcn/help.cc	Sun Feb 04 14:52:17 2024 -0500
+++ b/libinterp/corefcn/help.cc	Sun Feb 04 23:24:58 2024 -0500
@@ -751,16 +751,27 @@
       // Look for constructor.
 
       pos = name.rfind ('.');
-      std::string nm = name.substr (pos+1);
 
-      octave_value ov_meth = cls.get_method (nm);
+      if (pos != std::string::npos)
+        {
+          std::string nm = name.substr (pos+1);
+
+          octave_value ov_meth = cls.get_method (nm);
 
-      if (get_help_from_fcn (nm, ov_meth, help, what, symbol_found))
-        {
-          what = "constructor";
+          if (get_help_from_fcn (nm, ov_meth, help, what, symbol_found))
+            {
+              what = "constructor";
+              return true;
+            }
+        }
 
-          return true;
-        }
+      // We found a class but no doc string for it or its constructor.
+      // Create a generic doc string.
+
+      help = name + " is an undocumented class";
+      what = "class";
+      symbol_found = true;
+      return true;
     }
 
   cdef_package pkg = cdm.find_package (name, false, true);
--- a/libinterp/parse-tree/comment-list.h	Sun Feb 04 14:52:17 2024 -0500
+++ b/libinterp/parse-tree/comment-list.h	Sun Feb 04 23:24:58 2024 -0500
@@ -50,7 +50,6 @@
     block,
     full_line,
     end_of_line,
-    doc_string,
     copyright
   };
 
@@ -82,7 +81,6 @@
   bool is_block () const { return m_type == block; }
   bool is_full_line () const { return m_type == full_line; }
   bool is_end_of_line () const { return m_type == end_of_line; }
-  bool is_doc_string () const { return m_type == doc_string; }
   bool is_copyright () const { return m_type == copyright; }
   bool uses_hash_char () const { return m_uses_hash_char; }
 
@@ -123,6 +121,25 @@
   { append (comment_elt (s, t, uses_hash_char)); }
 
   comment_list * dup () const;
+
+  // Documentation for functions is typically the first block of
+  // comments that doesn't look like a copyright statement.
+  comment_elt find_doc_comment () const
+  {
+    for (const auto& elt : *this)
+      {
+        // FIXME: should we also omit end-of-line comments?
+        if (! elt.is_copyright ())
+          return elt;
+      }
+
+    return comment_elt ();
+  }
+
+  std::string find_doc_string () const
+  {
+    return find_doc_comment().text ();
+  }
 };
 
 OCTAVE_END_NAMESPACE(octave)
--- a/libinterp/parse-tree/lex.h	Sun Feb 04 14:52:17 2024 -0500
+++ b/libinterp/parse-tree/lex.h	Sun Feb 04 23:24:58 2024 -0500
@@ -300,8 +300,6 @@
       m_filepos (1, 1),
       m_tok_beg (),
       m_tok_end (),
-      m_classdef_doc_string (),
-      m_doc_string (),
       m_string_text (),
       m_current_input_line (),
       m_comment_text (),
@@ -475,12 +473,6 @@
   filepos m_tok_beg;
   filepos m_tok_end;
 
-  // Pending doc string for classdef object.
-  comment_elt m_classdef_doc_string;
-
-  // Pending doc string for functions.
-  comment_elt m_doc_string;
-
   // The current character string text.
   std::string m_string_text;
 
--- a/libinterp/parse-tree/lex.ll	Sun Feb 04 14:52:17 2024 -0500
+++ b/libinterp/parse-tree/lex.ll	Sun Feb 04 23:24:58 2024 -0500
@@ -2226,8 +2226,6 @@
     m_filepos = filepos (1, 1);
     m_tok_beg = filepos ();
     m_tok_end = filepos ();
-    m_classdef_doc_string.reset ();
-    m_doc_string.reset ();
     m_string_text = "";
     m_current_input_line = "";
     m_comment_text = "";
@@ -3351,15 +3349,7 @@
   void
   base_lexer::finish_comment (comment_elt::comment_type typ)
   {
-    bool copyright = looks_like_copyright (m_comment_text);
-
-    if (typ != octave::comment_elt::end_of_line
-        && m_nesting_level.none ()
-        && m_doc_string.empty () && ! m_comment_text.empty ()
-        && ! copyright && ! looks_like_shebang (m_comment_text))
-      m_doc_string = comment_elt (m_comment_text, typ, m_comment_uses_hash_char);
-
-    if (copyright)
+    if (looks_like_copyright (m_comment_text))
       typ = comment_elt::copyright;
 
     m_comment_buf.append (m_comment_text, typ, m_comment_uses_hash_char);
--- a/libinterp/parse-tree/oct-parse.yy	Sun Feb 04 14:52:17 2024 -0500
+++ b/libinterp/parse-tree/oct-parse.yy	Sun Feb 04 23:24:58 2024 -0500
@@ -1832,9 +1832,6 @@
                         YYABORT;
                       }
 
-                    lexer.m_classdef_doc_string = lexer.m_doc_string;
-                    lexer.m_doc_string.reset ();
-
                     // Create invalid parent scope.
                     lexer.m_symtab_context.push (octave::symbol_scope::anonymous ());
                     lexer.m_parsing_classdef = true;
@@ -1845,23 +1842,15 @@
                   }
                 ;
 
-classdef        : classdef_beg stash_comment attr_list identifier opt_sep superclass_list stash_comment class_body END
+classdef        : classdef_beg stash_comment attr_list identifier opt_sep superclass_list stash_comment class_body stash_comment END
                   {
                     OCTAVE_YYUSE ($5);
 
-                    octave::comment_list *lc = $2;
-                    octave::comment_list *tc = lexer.get_comment ();
-
-                    if (lexer.m_classdef_doc_string.empty () && $7 && ! $7->empty ())
-                      lexer.m_classdef_doc_string = $7->front ();
-
                     lexer.m_parsing_classdef = false;
 
-                    if (! ($$ = parser.make_classdef ($1, $3, $4, $6, $8, $9,
-                                                      lc, tc)))
+                    if (! ($$ = parser.make_classdef ($1, $3, $4, $6, $8, $10, $2, $7, $9)))
                       {
-                        // make_classdef deleted $3, $4, $6, $8, LC, and
-                        // TC.
+                        // make_classdef deleted $2, $3, $4, $6, $7, $8, $9
                         YYABORT;
                       }
                   }
@@ -2014,8 +2003,6 @@
 
 properties_beg  : PROPERTIES
                   {
-                    lexer.m_doc_string.reset ();
-
                     lexer.m_classdef_element_names_are_keywords = false;
                     $$ = $1;
                   }
@@ -2092,8 +2079,6 @@
 
 methods_beg     : METHODS
                   {
-                    lexer.m_doc_string.reset ();
-
                     lexer.m_classdef_element_names_are_keywords = false;
                     $$ = $1;
                   }
@@ -2178,8 +2163,6 @@
 
 events_beg      : EVENTS
                   {
-                    lexer.m_doc_string.reset ();
-
                     lexer.m_classdef_element_names_are_keywords = false;
                     $$ = $1;
                   }
@@ -2232,8 +2215,6 @@
 
 enumeration_beg : ENUMERATION
                   {
-                    lexer.m_doc_string.reset ();
-
                     lexer.m_classdef_element_names_are_keywords = false;
                     $$ = $1;
                   }
@@ -2291,7 +2272,9 @@
                 ;
 
 stash_comment   : // empty
-                  { $$ = lexer.get_comment (); }
+                  {
+                    $$ = lexer.get_comment ();
+                  }
                 ;
 
 parse_error     : LEXICAL_ERROR
@@ -3892,6 +3875,10 @@
   base_parser::make_script (tree_statement_list *cmds,
                             tree_statement *end_script)
   {
+    // Any comments at the beginning of a script file should be
+    // attached to the first statement in the file or the END_SCRIPT
+    // statement created by the parser.
+
     if (! cmds)
       cmds = new tree_statement_list ();
 
@@ -3903,13 +3890,22 @@
     script_scope.cache_fcn_file_name (m_lexer.m_fcn_file_full_name);
     script_scope.cache_dir_name (m_lexer.m_dir_name);
 
+    // First non-copyright comment in classdef body, before first
+    // properties, methods, etc. block.
+
+    tree_statement *first_stmt = cmds->front ();
+    comment_list *leading_comments = first_stmt->comment_text ();
+
+    std::string doc_string;
+    if (leading_comments)
+      doc_string = leading_comments->find_doc_string ();
+
     octave_user_script *script
       = new octave_user_script (m_lexer.m_fcn_file_full_name,
                                 m_lexer.m_fcn_file_name, script_scope,
-                                cmds, m_lexer.m_doc_string.text ());
+                                cmds, doc_string);
 
     m_lexer.m_symtab_context.pop ();
-    m_lexer.m_doc_string.reset ();
 
     sys::time now;
 
@@ -3962,10 +3958,19 @@
                               tree_statement *end_fcn_stmt,
                               comment_list *lc, comment_list *bc)
   {
-    // FIXME: maybe choose which comment to used by checking whether
-    // any language extensions are noticed in the entire source file,
-    // not just in the comments that are candidates to become the
-    // function doc string.
+    // First non-copyright comments found above and below function keyword.
+    comment_elt leading_doc_comment;
+    comment_elt body_doc_comment;
+
+    if (lc)
+      leading_doc_comment = lc->find_doc_comment ();
+
+    if (bc)
+      body_doc_comment = bc->find_doc_comment ();
+
+    // Choose which comment to use for doc string.
+
+    // For ordinary functions, use the first comment that isn't empty.
 
     // If we are looking at a classdef method and there is a comment
     // prior to the function keyword and another after, then
@@ -3978,16 +3983,25 @@
     //     comments use percent '%' characters.  This is
     //     Matlab-compatible behavior.
 
-    if (m_lexer.m_parsing_classdef && ! m_lexer.m_doc_string.empty ()
-        && bc && ! bc->empty () && ! m_lexer.m_doc_string.uses_hash_char ()
-        && ! bc->front().uses_hash_char ())
-      m_lexer.m_doc_string = bc->front ();
+    // FIXME: maybe choose which comment to used by checking whether
+    // any language extensions are noticed in the entire source file,
+    // not just in the comments that are candidates to become the
+    // function doc string.
+
+    std::string doc_string;
+
+    if (leading_doc_comment.empty ()
+        || (m_lexer.m_parsing_classdef && ! body_doc_comment.empty ()
+            && (! (leading_doc_comment.uses_hash_char () || body_doc_comment.uses_hash_char ()))))
+      doc_string = body_doc_comment.text ();
+    else
+      doc_string = leading_doc_comment.text ();
 
     int l = fcn_tok->line ();
     int c = fcn_tok->column ();
 
     octave_user_function *tmp_fcn
-      = start_function (id, param_list, body, end_fcn_stmt);
+      = start_function (id, param_list, body, end_fcn_stmt, doc_string);
 
     tree_function_def *retval = finish_function (ret_list, tmp_fcn, lc, l, c);
 
@@ -4002,7 +4016,8 @@
   base_parser::start_function (tree_identifier *id,
                                tree_parameter_list *param_list,
                                tree_statement_list *body,
-                               tree_statement *end_fcn_stmt)
+                               tree_statement *end_fcn_stmt,
+                               const std::string& doc_string)
   {
     // We'll fill in the return list later.
 
@@ -4118,11 +4133,9 @@
     // because the doc_string of the outermost function is read first,
     // whereas this function is called for the innermost function first.
     // We could have a stack of doc_string objects in lexer.
-    if (! m_lexer.m_doc_string.empty () && m_curr_fcn_depth == 0)
-      {
-        fcn->document (m_lexer.m_doc_string.text ());
-        m_lexer.m_doc_string.reset ();
-      }
+    if (! doc_string.empty () && m_curr_fcn_depth == 0)
+      fcn->document (doc_string);
+
 
     if (m_lexer.m_reading_fcn_file && m_curr_fcn_depth == 0
         && ! m_parsing_subfunctions)
@@ -4367,7 +4380,7 @@
                               tree_identifier *id,
                               tree_classdef_superclass_list *sc,
                               tree_classdef_body *body, token *end_tok,
-                              comment_list *lc, comment_list *tc)
+                              comment_list *lc, comment_list *bc, comment_list *tc)
   {
     tree_classdef *retval = nullptr;
 
@@ -4394,6 +4407,7 @@
         delete sc;
         delete body;
         delete lc;
+        delete bc;
         delete tc;
 
         bison_error ("invalid classdef definition, the class name must match the filename", l, c);
@@ -4406,15 +4420,26 @@
             int l = tok_val->line ();
             int c = tok_val->column ();
 
+            // First non-copyright comments found above and below
+            // function keyword are candidates for the documentation
+            // string.  Use the first one that is not empty.
+
+            std::string doc_string;
+
+            if (lc)
+              doc_string = lc->find_doc_string ();
+
+            if (doc_string.empty () && bc)
+              doc_string = bc->find_doc_string ();
+
             if (! body)
               body = new tree_classdef_body ();
 
+            // FIXME - pass body comment to tree_classdef constructor.
+
             retval = new tree_classdef (m_lexer.m_symtab_context.curr_scope (),
-                                        m_lexer.m_classdef_doc_string.text (),
-                                        a, id, sc, body, lc, tc,
+                                        doc_string, a, id, sc, body, lc, tc,
                                         m_curr_package_name, full_name, l, c);
-
-            m_lexer.m_classdef_doc_string.reset ();
           }
         else
           {
@@ -4423,6 +4448,7 @@
             delete sc;
             delete body;
             delete lc;
+            delete bc;
             delete tc;
 
             end_token_error (end_tok, token::switch_end);
--- a/libinterp/parse-tree/parse.h	Sun Feb 04 14:52:17 2024 -0500
+++ b/libinterp/parse-tree/parse.h	Sun Feb 04 23:24:58 2024 -0500
@@ -394,7 +394,8 @@
   // Begin defining a function.
   OCTINTERP_API octave_user_function *
   start_function (tree_identifier *id, tree_parameter_list *param_list,
-                  tree_statement_list *body, tree_statement *end_function);
+                  tree_statement_list *body, tree_statement *end_function,
+                  const std::string& doc_string);
 
   // Create a no-op statement for end_function.
   OCTINTERP_API tree_statement *
@@ -456,7 +457,7 @@
   make_classdef (token *tok_val, tree_classdef_attribute_list *a,
                  tree_identifier *id, tree_classdef_superclass_list *sc,
                  tree_classdef_body *body, token *end_tok,
-                 comment_list *lc, comment_list *tc);
+                 comment_list *lc, comment_list *bc, comment_list *tc);
 
   OCTINTERP_API tree_classdef_properties_block *
   make_classdef_properties_block (token *tok_val,