Mercurial > octave

--- a/libinterp/parse-tree/comment-list.h	Fri Feb 02 20:55:11 2024 -0500
+++ b/libinterp/parse-tree/comment-list.h	Fri Feb 02 23:47:25 2024 -0500
@@ -54,11 +54,12 @@
     copyright
   };

-  comment_elt (const std::string& s = "", comment_type t = unknown)
-    : m_text (s), m_type (t) { }
+  comment_elt (const std::string& s = "", comment_type t = unknown, bool uses_hash_char = false)
+    : m_text (s), m_type (t), m_uses_hash_char (uses_hash_char) { }

   comment_elt (const comment_elt& oc)
-    : m_text (oc.m_text), m_type (oc.m_type) { }
+    : m_text (oc.m_text), m_type (oc.m_type), m_uses_hash_char (oc.m_uses_hash_char)
+  { }

   comment_elt& operator = (const comment_elt& oc)
   {
@@ -66,11 +67,14 @@
       {
         m_text = oc.m_text;
         m_type = oc.m_type;
+        m_uses_hash_char = oc.m_uses_hash_char;
       }

     return *this;
   }

+  bool empty () const { return m_text.empty (); }
+
   std::string text () const { return m_text; }

   comment_type type () const { return m_type; }
@@ -80,6 +84,14 @@
   bool is_end_of_line () const { return m_type == end_of_line; }
   bool is_doc_string () const { return m_type == doc_string; }
   bool is_copyright () const { return m_type == copyright; }
+  bool uses_hash_char () const { return m_uses_hash_char; }
+
+  void reset ()
+  {
+    m_text = "";
+    m_type = unknown;
+    m_uses_hash_char = false;
+  }

   ~comment_elt () = default;

@@ -90,6 +102,10 @@

   // The type of comment.
   comment_type m_type;
+
+  // TRUE means a line comment uses '#' or a block comment used at least
+  // one '#' delimiter.
+  bool m_uses_hash_char;
 };

 class comment_list : public base_list<comment_elt>
@@ -102,8 +118,9 @@
   { base_list<comment_elt>::append (elt); }

   void append (const std::string& s,
-               comment_elt::comment_type t = comment_elt::unknown)
-  { append (comment_elt (s, t)); }
+               comment_elt::comment_type t = comment_elt::unknown,
+               bool uses_hash_char = false)
+  { append (comment_elt (s, t, uses_hash_char)); }

   comment_list * dup () const;
 };
--- a/libinterp/parse-tree/lex.h	Fri Feb 02 20:55:11 2024 -0500
+++ b/libinterp/parse-tree/lex.h	Fri Feb 02 23:47:25 2024 -0500
@@ -288,6 +288,7 @@
       m_reading_script_file (false),
       m_reading_classdef_file (false),
       m_buffer_function_text (false),
+      m_comment_uses_hash_char (false),
       m_bracketflag (0),
       m_braceflag (0),
       m_looping (0),
@@ -299,11 +300,11 @@
       m_filepos (1, 1),
       m_tok_beg (),
       m_tok_end (),
+      m_classdef_doc_string (),
+      m_doc_string (),
       m_string_text (),
       m_current_input_line (),
       m_comment_text (),
-      m_classdef_help_text (),
-      m_help_text (),
       m_function_text (),
       m_fcn_file_name (),
       m_fcn_file_full_name (),
@@ -435,6 +436,10 @@
   // parsing.
   bool m_buffer_function_text;

+  // TRUE means a line comment uses '#' or a block comment used at least
+  // one '#' delimiter.
+  bool m_comment_uses_hash_char;
+
   // square bracket level count.
   int m_bracketflag;

@@ -470,21 +475,22 @@
   filepos m_tok_beg;
   filepos m_tok_end;

+  // Pending doc string for classdef object.
+  comment_elt m_classdef_doc_string;
+
+  // Pending doc string for functions.
+  comment_elt m_doc_string;
+
   // The current character string text.
   std::string m_string_text;

   // The current line of input.
   std::string m_current_input_line;

-  // The current comment text.
+  // The text of the current comment, used to gather comment lines
+  // before storing in m_comment_buf.
   std::string m_comment_text;

-  // The current classdef help text.
-  std::string m_classdef_help_text;
-
-  // The current help text.
-  std::string m_help_text;
-
   // The text of functions entered on the command line.
   std::string m_function_text;

@@ -570,12 +576,12 @@

     ~comment_buffer () { delete m_comment_list; }

-    void append (const std::string& s, comment_elt::comment_type t)
+    void append (const std::string& s, comment_elt::comment_type t, bool uses_hash_char)
     {
       if (! m_comment_list)
         m_comment_list = new comment_list ();

-      m_comment_list->append (s, t);
+      m_comment_list->append (s, t, uses_hash_char);
     }

     // Caller is expected to delete the returned value.
@@ -674,6 +680,8 @@

   int handle_identifier ();

+  void check_comment_for_hash_char (const char *txt, std::size_t len);
+
   void maybe_warn_separator_insert (char sep);

   void warn_language_extension (const std::string& msg);
--- a/libinterp/parse-tree/lex.ll	Fri Feb 02 20:55:11 2024 -0500
+++ b/libinterp/parse-tree/lex.ll	Fri Feb 02 23:47:25 2024 -0500
@@ -454,8 +454,9 @@

     curr_lexer->m_filepos.next_line ();
     curr_lexer->m_looking_for_object_index = false;
-    curr_lexer->m_at_beginning_of_statement = true;
     curr_lexer->pop_start_state ();
+    curr_lexer->m_comment_uses_hash_char = yytext[0] == '#';
+    curr_lexer->finish_comment (octave::comment_elt::end_of_line);

     return curr_lexer->handle_token ('\n');
   }
@@ -734,6 +735,8 @@

     if (curr_lexer->m_block_comment_nesting_level)
       curr_lexer->m_comment_text = "\n";
+    else
+      curr_lexer->check_comment_for_hash_char (yytext, yyleng);

     curr_lexer->m_block_comment_nesting_level++;

@@ -758,7 +761,10 @@
     if (curr_lexer->m_block_comment_nesting_level > 1)
       curr_lexer->m_comment_text = "\n";
     else
-      curr_lexer->finish_comment (octave::comment_elt::block);
+      {
+        curr_lexer->check_comment_for_hash_char (yytext, yyleng);
+        curr_lexer->finish_comment (octave::comment_elt::block);
+      }

     curr_lexer->m_block_comment_nesting_level--;

@@ -844,8 +850,25 @@

     bool have_space = (i > 0);

-    while (i < yyleng && (yytext[i] == '#' || yytext[i] == '%'))
-      i++;
+    bool first = true;
+
+    while (i < yyleng)
+      {
+        char c = yytext[i];
+
+        if (c == '#' || c == '%')
+          {
+            if (first && c == '#')
+              {
+                curr_lexer->m_comment_uses_hash_char = true;
+                first = false;
+              }
+
+            i++;
+          }
+        else
+          break;
+      }

     curr_lexer->m_comment_text += &yytext[i];

@@ -2191,6 +2214,7 @@
     m_reading_script_file = false;
     m_reading_classdef_file = false;
     m_buffer_function_text = false;
+    m_comment_uses_hash_char = false;
     m_bracketflag = 0;
     m_braceflag = 0;
     m_looping = 0;
@@ -2202,11 +2226,11 @@
     m_filepos = filepos (1, 1);
     m_tok_beg = filepos ();
     m_tok_end = filepos ();
+    m_classdef_doc_string.reset ();
+    m_doc_string.reset ();
     m_string_text = "";
     m_current_input_line = "";
     m_comment_text = "";
-    m_classdef_help_text = "";
-    m_help_text = "";
     m_function_text = "";
     m_fcn_file_name = "";
     m_fcn_file_full_name = "";
@@ -3286,11 +3310,19 @@
       mark_previous_token_trailing_space ();

     bool have_comment = false;
+    bool first = true;
     while (offset < yylng)
       {
         char c = yytxt[offset];
+
         if (c == '#' || c == '%')
           {
+            if (first && c == '#')
+              {
+                m_comment_uses_hash_char = true;
+                first = false;
+              }
+
             have_comment = true;
             offset++;
           }
@@ -3323,17 +3355,17 @@

     if (typ != octave::comment_elt::end_of_line
         && m_nesting_level.none ()
-        && m_help_text.empty () && ! m_comment_text.empty ()
+        && m_doc_string.empty () && ! m_comment_text.empty ()
         && ! copyright && ! looks_like_shebang (m_comment_text))
-      m_help_text = m_comment_text;
+      m_doc_string = comment_elt (m_comment_text, typ, m_comment_uses_hash_char);

     if (copyright)
       typ = comment_elt::copyright;

-    m_comment_buf.append (m_comment_text, typ);
+    m_comment_buf.append (m_comment_text, typ, m_comment_uses_hash_char);

     m_comment_text = "";
-
+    m_comment_uses_hash_char = false;
     m_at_beginning_of_statement = true;
   }

@@ -3544,6 +3576,19 @@
   }

   void
+  base_lexer::check_comment_for_hash_char (const char *txt, std::size_t len)
+  {
+    if (m_comment_uses_hash_char)
+      return;
+
+    std::size_t i = 0;
+    while (i < len && is_space_or_tab (txt[i]))
+      i++;
+
+    m_comment_uses_hash_char = txt[i] == '#';
+  }
+
+  void
   base_lexer::maybe_warn_separator_insert (char sep)
   {
     std::string nm = m_fcn_file_full_name;
--- a/libinterp/parse-tree/oct-parse.yy	Fri Feb 02 20:55:11 2024 -0500
+++ b/libinterp/parse-tree/oct-parse.yy	Fri Feb 02 23:47:25 2024 -0500
@@ -1832,8 +1832,8 @@
                         YYABORT;
                       }

-                    lexer.m_classdef_help_text = lexer.m_help_text;
-                    lexer.m_help_text = "";
+                    lexer.m_classdef_doc_string = lexer.m_doc_string;
+                    lexer.m_doc_string.reset ();

                     // Create invalid parent scope.
                     lexer.m_symtab_context.push (octave::symbol_scope::anonymous ());
@@ -1847,16 +1847,13 @@

 classdef        : classdef_beg stash_comment attr_list identifier opt_sep superclass_list stash_comment class_body END
                   {
-                    OCTAVE_YYUSE ($4);
+                    OCTAVE_YYUSE ($5);

                     octave::comment_list *lc = $2;
                     octave::comment_list *tc = lexer.get_comment ();

-                    if (lexer.m_classdef_help_text.empty () && $7 && ! $7->empty ())
-                      {
-                        const octave::comment_elt& elt = $7->front ();
-                        lexer.m_classdef_help_text = elt.text ();
-                      }
+                    if (lexer.m_classdef_doc_string.empty () && $7 && ! $7->empty ())
+                      lexer.m_classdef_doc_string = $7->front ();

                     lexer.m_parsing_classdef = false;

@@ -2017,7 +2014,7 @@

 properties_beg  : PROPERTIES
                   {
-                    lexer.m_help_text = "";
+                    lexer.m_doc_string.reset ();

                     lexer.m_classdef_element_names_are_keywords = false;
                     $$ = $1;
@@ -2095,7 +2092,7 @@

 methods_beg     : METHODS
                   {
-                    lexer.m_help_text = "";
+                    lexer.m_doc_string.reset ();

                     lexer.m_classdef_element_names_are_keywords = false;
                     $$ = $1;
@@ -2181,7 +2178,7 @@

 events_beg      : EVENTS
                   {
-                    lexer.m_help_text = "";
+                    lexer.m_doc_string.reset ();

                     lexer.m_classdef_element_names_are_keywords = false;
                     $$ = $1;
@@ -2235,7 +2232,7 @@

 enumeration_beg : ENUMERATION
                   {
-                    lexer.m_help_text = "";
+                    lexer.m_doc_string.reset ();

                     lexer.m_classdef_element_names_are_keywords = false;
                     $$ = $1;
@@ -3909,10 +3906,10 @@
     octave_user_script *script
       = new octave_user_script (m_lexer.m_fcn_file_full_name,
                                 m_lexer.m_fcn_file_name, script_scope,
-                                cmds, m_lexer.m_help_text);
+                                cmds, m_lexer.m_doc_string.text ());

     m_lexer.m_symtab_context.pop ();
-    m_lexer.m_help_text = "";
+    m_lexer.m_doc_string.reset ();

     sys::time now;

@@ -3965,16 +3962,26 @@
                               tree_statement *end_fcn_stmt,
                               comment_list *lc, comment_list *bc)
   {
+    // FIXME: maybe choose which comment to used by checking whether
+    // any language extensions are noticed in the entire source file,
+    // not just in the comments that are candidates to become the
+    // function doc string.
+
     // If we are looking at a classdef method and there is a comment
-    // prior to the function keyword and another after, choose the one
-    // inside the function definition for compatibility with Matlab.
-
-    if (m_lexer.m_parsing_classdef && ! m_lexer.m_help_text.empty () && bc && ! bc->empty ())
-      {
-        const octave::comment_elt& elt = bc->front ();
-        m_lexer.m_help_text = elt.text ();
-      }
-
+    // prior to the function keyword and another after, then
+    //
+    //   * Choose the one outside the function definition if either of
+    //     the comments use hash '#' characters.  This is the preferred
+    //     Octave style.
+    //
+    //   * Choose the one inside the function definition if both
+    //     comments use percent '%' characters.  This is
+    //     Matlab-compatible behavior.
+
+    if (m_lexer.m_parsing_classdef && ! m_lexer.m_doc_string.empty ()
+        && bc && ! bc->empty () && ! m_lexer.m_doc_string.uses_hash_char ()
+        && ! bc->front().uses_hash_char ())
+      m_lexer.m_doc_string = bc->front ();

     int l = fcn_tok->line ();
     int c = fcn_tok->column ();
@@ -4106,16 +4113,15 @@
                  id_name.c_str (), m_lexer.m_fcn_file_full_name.c_str ());
       }

-    // Record help text for functions other than nested functions.
+    // Record doc string for functions other than nested functions.
     // We cannot currently record help for nested functions (bug #46008)
     // because the doc_string of the outermost function is read first,
     // whereas this function is called for the innermost function first.
-    // We could have a stack of help_text in lexer.
-    if (! m_lexer.m_help_text.empty () && m_curr_fcn_depth == 0)
+    // We could have a stack of doc_string objects in lexer.
+    if (! m_lexer.m_doc_string.empty () && m_curr_fcn_depth == 0)
       {
-        fcn->document (m_lexer.m_help_text);
-
-        m_lexer.m_help_text = "";
+        fcn->document (m_lexer.m_doc_string.text ());
+        m_lexer.m_doc_string.reset ();
       }

     if (m_lexer.m_reading_fcn_file && m_curr_fcn_depth == 0
@@ -4404,11 +4410,11 @@
               body = new tree_classdef_body ();

             retval = new tree_classdef (m_lexer.m_symtab_context.curr_scope (),
-                                        m_lexer.m_classdef_help_text,
+                                        m_lexer.m_classdef_doc_string.text (),
                                         a, id, sc, body, lc, tc,
                                         m_curr_package_name, full_name, l, c);

-            m_lexer.m_classdef_help_text = "";
+            m_lexer.m_classdef_doc_string.reset ();
           }
         else
           {