# HG changeset patch # User John W. Eaton # Date 1706935645 18000 # Node ID 8e039e5258823abe6b9ad52ed92c34226047d193 # Parent c96d0470a582db337620207dce95772adb1115f8# Parent 319dcef8962d23ffe45fab734a60a05430d076db maint: merge default to bytecode-interpreter diff -r c96d0470a582 -r 8e039e525882 libinterp/parse-tree/comment-list.h --- a/libinterp/parse-tree/comment-list.h Fri Feb 02 20:55:11 2024 -0500 +++ b/libinterp/parse-tree/comment-list.h Fri Feb 02 23:47:25 2024 -0500 @@ -54,11 +54,12 @@ copyright }; - comment_elt (const std::string& s = "", comment_type t = unknown) - : m_text (s), m_type (t) { } + comment_elt (const std::string& s = "", comment_type t = unknown, bool uses_hash_char = false) + : m_text (s), m_type (t), m_uses_hash_char (uses_hash_char) { } comment_elt (const comment_elt& oc) - : m_text (oc.m_text), m_type (oc.m_type) { } + : m_text (oc.m_text), m_type (oc.m_type), m_uses_hash_char (oc.m_uses_hash_char) + { } comment_elt& operator = (const comment_elt& oc) { @@ -66,11 +67,14 @@ { m_text = oc.m_text; m_type = oc.m_type; + m_uses_hash_char = oc.m_uses_hash_char; } return *this; } + bool empty () const { return m_text.empty (); } + std::string text () const { return m_text; } comment_type type () const { return m_type; } @@ -80,6 +84,14 @@ bool is_end_of_line () const { return m_type == end_of_line; } bool is_doc_string () const { return m_type == doc_string; } bool is_copyright () const { return m_type == copyright; } + bool uses_hash_char () const { return m_uses_hash_char; } + + void reset () + { + m_text = ""; + m_type = unknown; + m_uses_hash_char = false; + } ~comment_elt () = default; @@ -90,6 +102,10 @@ // The type of comment. comment_type m_type; + + // TRUE means a line comment uses '#' or a block comment used at least + // one '#' delimiter. + bool m_uses_hash_char; }; class comment_list : public base_list @@ -102,8 +118,9 @@ { base_list::append (elt); } void append (const std::string& s, - comment_elt::comment_type t = comment_elt::unknown) - { append (comment_elt (s, t)); } + comment_elt::comment_type t = comment_elt::unknown, + bool uses_hash_char = false) + { append (comment_elt (s, t, uses_hash_char)); } comment_list * dup () const; }; diff -r c96d0470a582 -r 8e039e525882 libinterp/parse-tree/lex.h --- a/libinterp/parse-tree/lex.h Fri Feb 02 20:55:11 2024 -0500 +++ b/libinterp/parse-tree/lex.h Fri Feb 02 23:47:25 2024 -0500 @@ -288,6 +288,7 @@ m_reading_script_file (false), m_reading_classdef_file (false), m_buffer_function_text (false), + m_comment_uses_hash_char (false), m_bracketflag (0), m_braceflag (0), m_looping (0), @@ -299,11 +300,11 @@ m_filepos (1, 1), m_tok_beg (), m_tok_end (), + m_classdef_doc_string (), + m_doc_string (), m_string_text (), m_current_input_line (), m_comment_text (), - m_classdef_help_text (), - m_help_text (), m_function_text (), m_fcn_file_name (), m_fcn_file_full_name (), @@ -435,6 +436,10 @@ // parsing. bool m_buffer_function_text; + // TRUE means a line comment uses '#' or a block comment used at least + // one '#' delimiter. + bool m_comment_uses_hash_char; + // square bracket level count. int m_bracketflag; @@ -470,21 +475,22 @@ filepos m_tok_beg; filepos m_tok_end; + // Pending doc string for classdef object. + comment_elt m_classdef_doc_string; + + // Pending doc string for functions. + comment_elt m_doc_string; + // The current character string text. std::string m_string_text; // The current line of input. std::string m_current_input_line; - // The current comment text. + // The text of the current comment, used to gather comment lines + // before storing in m_comment_buf. std::string m_comment_text; - // The current classdef help text. - std::string m_classdef_help_text; - - // The current help text. - std::string m_help_text; - // The text of functions entered on the command line. std::string m_function_text; @@ -570,12 +576,12 @@ ~comment_buffer () { delete m_comment_list; } - void append (const std::string& s, comment_elt::comment_type t) + void append (const std::string& s, comment_elt::comment_type t, bool uses_hash_char) { if (! m_comment_list) m_comment_list = new comment_list (); - m_comment_list->append (s, t); + m_comment_list->append (s, t, uses_hash_char); } // Caller is expected to delete the returned value. @@ -674,6 +680,8 @@ int handle_identifier (); + void check_comment_for_hash_char (const char *txt, std::size_t len); + void maybe_warn_separator_insert (char sep); void warn_language_extension (const std::string& msg); diff -r c96d0470a582 -r 8e039e525882 libinterp/parse-tree/lex.ll --- a/libinterp/parse-tree/lex.ll Fri Feb 02 20:55:11 2024 -0500 +++ b/libinterp/parse-tree/lex.ll Fri Feb 02 23:47:25 2024 -0500 @@ -454,8 +454,9 @@ curr_lexer->m_filepos.next_line (); curr_lexer->m_looking_for_object_index = false; - curr_lexer->m_at_beginning_of_statement = true; curr_lexer->pop_start_state (); + curr_lexer->m_comment_uses_hash_char = yytext[0] == '#'; + curr_lexer->finish_comment (octave::comment_elt::end_of_line); return curr_lexer->handle_token ('\n'); } @@ -734,6 +735,8 @@ if (curr_lexer->m_block_comment_nesting_level) curr_lexer->m_comment_text = "\n"; + else + curr_lexer->check_comment_for_hash_char (yytext, yyleng); curr_lexer->m_block_comment_nesting_level++; @@ -758,7 +761,10 @@ if (curr_lexer->m_block_comment_nesting_level > 1) curr_lexer->m_comment_text = "\n"; else - curr_lexer->finish_comment (octave::comment_elt::block); + { + curr_lexer->check_comment_for_hash_char (yytext, yyleng); + curr_lexer->finish_comment (octave::comment_elt::block); + } curr_lexer->m_block_comment_nesting_level--; @@ -844,8 +850,25 @@ bool have_space = (i > 0); - while (i < yyleng && (yytext[i] == '#' || yytext[i] == '%')) - i++; + bool first = true; + + while (i < yyleng) + { + char c = yytext[i]; + + if (c == '#' || c == '%') + { + if (first && c == '#') + { + curr_lexer->m_comment_uses_hash_char = true; + first = false; + } + + i++; + } + else + break; + } curr_lexer->m_comment_text += &yytext[i]; @@ -2191,6 +2214,7 @@ m_reading_script_file = false; m_reading_classdef_file = false; m_buffer_function_text = false; + m_comment_uses_hash_char = false; m_bracketflag = 0; m_braceflag = 0; m_looping = 0; @@ -2202,11 +2226,11 @@ m_filepos = filepos (1, 1); m_tok_beg = filepos (); m_tok_end = filepos (); + m_classdef_doc_string.reset (); + m_doc_string.reset (); m_string_text = ""; m_current_input_line = ""; m_comment_text = ""; - m_classdef_help_text = ""; - m_help_text = ""; m_function_text = ""; m_fcn_file_name = ""; m_fcn_file_full_name = ""; @@ -3286,11 +3310,19 @@ mark_previous_token_trailing_space (); bool have_comment = false; + bool first = true; while (offset < yylng) { char c = yytxt[offset]; + if (c == '#' || c == '%') { + if (first && c == '#') + { + m_comment_uses_hash_char = true; + first = false; + } + have_comment = true; offset++; } @@ -3323,17 +3355,17 @@ if (typ != octave::comment_elt::end_of_line && m_nesting_level.none () - && m_help_text.empty () && ! m_comment_text.empty () + && m_doc_string.empty () && ! m_comment_text.empty () && ! copyright && ! looks_like_shebang (m_comment_text)) - m_help_text = m_comment_text; + m_doc_string = comment_elt (m_comment_text, typ, m_comment_uses_hash_char); if (copyright) typ = comment_elt::copyright; - m_comment_buf.append (m_comment_text, typ); + m_comment_buf.append (m_comment_text, typ, m_comment_uses_hash_char); m_comment_text = ""; - + m_comment_uses_hash_char = false; m_at_beginning_of_statement = true; } @@ -3544,6 +3576,19 @@ } void + base_lexer::check_comment_for_hash_char (const char *txt, std::size_t len) + { + if (m_comment_uses_hash_char) + return; + + std::size_t i = 0; + while (i < len && is_space_or_tab (txt[i])) + i++; + + m_comment_uses_hash_char = txt[i] == '#'; + } + + void base_lexer::maybe_warn_separator_insert (char sep) { std::string nm = m_fcn_file_full_name; diff -r c96d0470a582 -r 8e039e525882 libinterp/parse-tree/oct-parse.yy --- a/libinterp/parse-tree/oct-parse.yy Fri Feb 02 20:55:11 2024 -0500 +++ b/libinterp/parse-tree/oct-parse.yy Fri Feb 02 23:47:25 2024 -0500 @@ -1832,8 +1832,8 @@ YYABORT; } - lexer.m_classdef_help_text = lexer.m_help_text; - lexer.m_help_text = ""; + lexer.m_classdef_doc_string = lexer.m_doc_string; + lexer.m_doc_string.reset (); // Create invalid parent scope. lexer.m_symtab_context.push (octave::symbol_scope::anonymous ()); @@ -1847,16 +1847,13 @@ classdef : classdef_beg stash_comment attr_list identifier opt_sep superclass_list stash_comment class_body END { - OCTAVE_YYUSE ($4); + OCTAVE_YYUSE ($5); octave::comment_list *lc = $2; octave::comment_list *tc = lexer.get_comment (); - if (lexer.m_classdef_help_text.empty () && $7 && ! $7->empty ()) - { - const octave::comment_elt& elt = $7->front (); - lexer.m_classdef_help_text = elt.text (); - } + if (lexer.m_classdef_doc_string.empty () && $7 && ! $7->empty ()) + lexer.m_classdef_doc_string = $7->front (); lexer.m_parsing_classdef = false; @@ -2017,7 +2014,7 @@ properties_beg : PROPERTIES { - lexer.m_help_text = ""; + lexer.m_doc_string.reset (); lexer.m_classdef_element_names_are_keywords = false; $$ = $1; @@ -2095,7 +2092,7 @@ methods_beg : METHODS { - lexer.m_help_text = ""; + lexer.m_doc_string.reset (); lexer.m_classdef_element_names_are_keywords = false; $$ = $1; @@ -2181,7 +2178,7 @@ events_beg : EVENTS { - lexer.m_help_text = ""; + lexer.m_doc_string.reset (); lexer.m_classdef_element_names_are_keywords = false; $$ = $1; @@ -2235,7 +2232,7 @@ enumeration_beg : ENUMERATION { - lexer.m_help_text = ""; + lexer.m_doc_string.reset (); lexer.m_classdef_element_names_are_keywords = false; $$ = $1; @@ -3909,10 +3906,10 @@ octave_user_script *script = new octave_user_script (m_lexer.m_fcn_file_full_name, m_lexer.m_fcn_file_name, script_scope, - cmds, m_lexer.m_help_text); + cmds, m_lexer.m_doc_string.text ()); m_lexer.m_symtab_context.pop (); - m_lexer.m_help_text = ""; + m_lexer.m_doc_string.reset (); sys::time now; @@ -3965,16 +3962,26 @@ tree_statement *end_fcn_stmt, comment_list *lc, comment_list *bc) { + // FIXME: maybe choose which comment to used by checking whether + // any language extensions are noticed in the entire source file, + // not just in the comments that are candidates to become the + // function doc string. + // If we are looking at a classdef method and there is a comment - // prior to the function keyword and another after, choose the one - // inside the function definition for compatibility with Matlab. - - if (m_lexer.m_parsing_classdef && ! m_lexer.m_help_text.empty () && bc && ! bc->empty ()) - { - const octave::comment_elt& elt = bc->front (); - m_lexer.m_help_text = elt.text (); - } - + // prior to the function keyword and another after, then + // + // * Choose the one outside the function definition if either of + // the comments use hash '#' characters. This is the preferred + // Octave style. + // + // * Choose the one inside the function definition if both + // comments use percent '%' characters. This is + // Matlab-compatible behavior. + + if (m_lexer.m_parsing_classdef && ! m_lexer.m_doc_string.empty () + && bc && ! bc->empty () && ! m_lexer.m_doc_string.uses_hash_char () + && ! bc->front().uses_hash_char ()) + m_lexer.m_doc_string = bc->front (); int l = fcn_tok->line (); int c = fcn_tok->column (); @@ -4106,16 +4113,15 @@ id_name.c_str (), m_lexer.m_fcn_file_full_name.c_str ()); } - // Record help text for functions other than nested functions. + // Record doc string for functions other than nested functions. // We cannot currently record help for nested functions (bug #46008) // because the doc_string of the outermost function is read first, // whereas this function is called for the innermost function first. - // We could have a stack of help_text in lexer. - if (! m_lexer.m_help_text.empty () && m_curr_fcn_depth == 0) + // We could have a stack of doc_string objects in lexer. + if (! m_lexer.m_doc_string.empty () && m_curr_fcn_depth == 0) { - fcn->document (m_lexer.m_help_text); - - m_lexer.m_help_text = ""; + fcn->document (m_lexer.m_doc_string.text ()); + m_lexer.m_doc_string.reset (); } if (m_lexer.m_reading_fcn_file && m_curr_fcn_depth == 0 @@ -4404,11 +4410,11 @@ body = new tree_classdef_body (); retval = new tree_classdef (m_lexer.m_symtab_context.curr_scope (), - m_lexer.m_classdef_help_text, + m_lexer.m_classdef_doc_string.text (), a, id, sc, body, lc, tc, m_curr_package_name, full_name, l, c); - m_lexer.m_classdef_help_text = ""; + m_lexer.m_classdef_doc_string.reset (); } else {