# HG changeset patch # User John W. Eaton # Date 1208465089 14400 # Node ID 4e2eafef689cbfa417edd52fa1fcd43a8e60c7e8 # Parent 87eda1f8faaa7bf03d5ee85bfe893cb6bd4756f3 unify comment and help text processing in lex.l and parse.y diff -r 87eda1f8faaa -r 4e2eafef689c src/ChangeLog --- a/src/ChangeLog Wed Apr 16 22:08:15 2008 -0400 +++ b/src/ChangeLog Thu Apr 17 16:44:49 2008 -0400 @@ -1,3 +1,29 @@ +2008-04-17 John W. Eaton + + * parse.y (looks_like_copyright): Handle leading whitespace. + (class stdio_stream_reader): New class. + (skip_white_space): New function. + (gobble_leading_white_space): New arg, EOF. Change all uses. + Use skip_white_space and grab_comment_block to process comments. + (process_leading_comments): Delete. + (parse_fcn_file): Call gobble_leading_white_space instead of + process_leading_comments. + Skip parsing if gobble_leading_white_space detects EOF. + * lex.l (process_comment): Delete CCHAR arg. Change all uses. + ({CCHAR}): Unput comment character before calling process_comment. + (grab_comment_block): Rename from grab_help_text. Don't discard + spaces from comment text. Update input_line_number and + current_input_column as characters are read. Warn only once about + incompatible comment characters in a given block of comments. + Use stream_reader class instead of accessing yyinput and yyunput + directly. + (class flex_stream_reader): New class. + (process_comment): Use grab_comment_block to handle all comments. + Don't call maybe_gripe_matlab_incompatible_comment. + Always store comment text returned by grab_comment_block. + * lex.h (grab_comment_block): Provide decl. + (class stream_reader): New class. + 2008-04-16 John W. Eaton * parse.y (Fautoload, Fmfilename): Call diff -r 87eda1f8faaa -r 4e2eafef689c src/lex.h --- a/src/lex.h Wed Apr 16 22:08:15 2008 -0400 +++ b/src/lex.h Thu Apr 17 16:44:49 2008 -0400 @@ -134,6 +134,26 @@ lexical_feedback& operator = (const lexical_feedback&); }; +class +stream_reader +{ +public: + virtual int getc (void) = 0; + virtual int ungetc (int c) = 0; + +protected: + stream_reader (void) { } + ~stream_reader (void) { } + +private: + + // No copying! + stream_reader (const stream_reader&); + stream_reader& operator = (const stream_reader&); +}; + +extern std::string grab_comment_block (stream_reader& reader, bool& eof); + // TRUE means that we have encountered EOF on the input stream. extern bool parser_end_of_input; diff -r 87eda1f8faaa -r 4e2eafef689c src/lex.l --- a/src/lex.l Wed Apr 16 22:08:15 2008 -0400 +++ b/src/lex.l Thu Apr 17 16:44:49 2008 -0400 @@ -244,8 +244,7 @@ static int is_keyword_token (const std::string& s); static void prep_for_function (void); static void prep_for_nested_function (void); -static std::string grab_help_text (bool& eof); -static int process_comment (char cchar, bool& eof); +static int process_comment (bool& eof); static bool match_any (char c, const char *s); static bool next_token_is_sep_op (void); static bool next_token_is_bin_op (bool spc_prev); @@ -631,7 +630,8 @@ {CCHAR} { bool eof = false; - int tok = process_comment (yytext[0], eof); + yyunput (yytext[0], yytext); + int tok = process_comment (eof); if (eof) TOK_RETURN (END_OF_INPUT); else if (tok > 0) @@ -1134,33 +1134,29 @@ val = Matrix (); } -// Grab the help text from an function file. - -// FIXME -- gobble_leading_white_space() in parse.y -// duplicates some of this code! - -static std::string -grab_help_text (bool& eof) +std::string +grab_comment_block (stream_reader& reader, bool& eof) { std::string buf; - bool begin_comment = true; - bool in_comment = true; - bool discard_space = true; + // TRUE means we are at the beginning of a comment block. + bool begin_comment = false; + + // TRUE means we are currently reading a comment block. + bool in_comment = false; + + bool warned_incompatible = false; int c = 0; - while ((c = yyinput ()) != EOF) + while ((c = reader.getc ()) != EOF) { + current_input_column++; + if (begin_comment) { if (c == '%' || c == '#') continue; - else if (discard_space && c == ' ') - { - discard_space = false; - continue; - } else begin_comment = false; } @@ -1171,26 +1167,36 @@ if (c == '\n') { + input_line_number++; + current_input_column = 0; + in_comment = false; - discard_space = true; } } else { switch (c) { + case ' ': + case '\t': + break; + case '#': + if (! warned_incompatible) + { + warned_incompatible = true; + maybe_gripe_matlab_incompatible_comment (c); + } + // fall through... + case '%': - maybe_gripe_matlab_incompatible_comment (yytext[0]); in_comment = true; begin_comment = true; break; - case ' ': - case '\t': - break; - default: + current_input_column--; + reader.ungetc (c); goto done; } } @@ -1201,14 +1207,24 @@ if (c == EOF) eof = true; - if (c && ! eof) - yyunput (c, yytext); - return buf; } +class +flex_stream_reader : public stream_reader +{ +public: + flex_stream_reader (char *buf_arg) : stream_reader (), buf (buf_arg) { } + + int getc (void) { return ::yyinput (); } + int ungetc (int c) { ::yyunput (c, buf); return 0; } + +private: + char *buf; +}; + static int -process_comment (char cchar, bool& eof) +process_comment (bool& eof) { eof = false; @@ -1217,44 +1233,24 @@ if (! help_buf.empty ()) help_txt = help_buf.top (); + flex_stream_reader flex_reader (yytext); + + std::string txt = grab_comment_block (flex_reader, eof); + if (help_txt.empty () && nesting_level.none ()) { - std::string txt = grab_help_text (eof); - if (! help_buf.empty ()) help_buf.pop (); help_buf.push (txt); - - octave_comment_buffer::append (txt); } - else - { - std::string buf; - - bool begin_comment = true; - - int c; - while ((c = yyinput ()) != EOF && c != '\n') - { - if (begin_comment && (c == '#' || c == '%')) - ; /* Skip leading comment characters. */ - else - buf += static_cast (c); - } - - octave_comment_buffer::append (buf); - - if (c == EOF) - eof = true; - } + + octave_comment_buffer::append (txt); current_input_column = 1; lexer_flags.quote_is_transpose = false; lexer_flags.convert_spaces_to_comma = true; - maybe_gripe_matlab_incompatible_comment (cchar); - if (YY_START == COMMAND_START) BEGIN (INITIAL); diff -r 87eda1f8faaa -r 4e2eafef689c src/parse.y --- a/src/parse.y Wed Apr 16 22:08:15 2008 -0400 +++ b/src/parse.y Thu Apr 17 16:44:49 2008 -0400 @@ -2822,10 +2822,16 @@ static bool looks_like_copyright (const std::string& s) { - // Perhaps someday we will want to do more here, so leave this as a - // separate function. - - return (s.substr (0, 9) == "Copyright"); + bool retval = false; + + if (! s.empty ()) + { + size_t offset = s.find_first_not_of (" \t"); + + retval = (s.substr (offset, 9) == "Copyright"); + } + + return retval; } static int @@ -2849,117 +2855,89 @@ return c; } -// Eat whitespace and comments from FFILE, returning the text of the -// comments read if it doesn't look like a copyright notice. The -// parser line and column number information is updated. Processing -// stops at the first non-whitespace character that is not part of a -// comment. - -static std::string -gobble_leading_white_space (FILE *ffile) +class +stdio_stream_reader : public stream_reader { - std::string help_txt; - - // TRUE means we have already seen the first block of comments. - bool first_comments_seen = false; - - // TRUE means we are at the beginning of a comment block. - bool begin_comment = false; - - // TRUE means we have already cached the help text. - bool have_help_text = false; - - // TRUE means we are currently reading a comment block. - bool in_comment = false; - - // TRUE means we should discard the first space from the input - // (used to strip leading spaces from the help text). - bool discard_space = true; - - int c; - - while ((c = text_getc (ffile)) != EOF) +public: + stdio_stream_reader (FILE *f_arg) : stream_reader (), f (f_arg) { } + + int getc (void) { return ::text_getc (f); } + int ungetc (int c) { return ::ungetc (c, f); } + +private: + FILE *f; +}; + +static bool +skip_white_space (stream_reader& reader) +{ + int c = 0; + + while ((c = reader.getc ()) != EOF) { - current_input_column++; - - if (begin_comment) - { - if (c == '%' || c == '#') - continue; - else if (discard_space && c == ' ') - { - discard_space = false; - continue; - } - else - begin_comment = false; - } - - if (in_comment) + switch (c) { - if (! have_help_text) - { - first_comments_seen = true; - help_txt += static_cast (c); - } - - if (c == '\n') - { - input_line_number++; - current_input_column = 0; - - in_comment = false; - discard_space = true; - } - } - else - { - switch (c) - { - case '\n': - input_line_number++; - current_input_column = 0; - // fall through... - - case ' ': - case '\t': - if (first_comments_seen && ! have_help_text) - { - if (looks_like_copyright (help_txt)) - help_txt.resize (0); - - if (! help_txt.empty ()) - have_help_text = true; - } - break; - - case '%': - case '#': - begin_comment = true; - in_comment = true; - break; - - default: - current_input_column--; - ungetc (c, ffile); - goto done; - } + case ' ': + case '\t': + current_input_column++; + break; + + case '\n': + input_line_number++; + current_input_column = 0; + break; + + default: + current_input_column--; + reader.ungetc (c); + goto done; } } done: - return help_txt; + return (c == EOF); } -static void -process_leading_comments (FILE *fptr) +static std::string +gobble_leading_white_space (FILE *ffile, bool& eof) { - std::string txt = gobble_leading_white_space (fptr); - - help_buf.push (txt); - - octave_comment_buffer::append (txt); + std::string help_txt; + + eof = false; + + // TRUE means we have already cached the help text. + bool have_help_text = false; + + std::string txt; + + stdio_stream_reader stdio_reader (ffile); + + while (true) + { + eof = skip_white_space (stdio_reader); + + if (eof) + break; + + txt = grab_comment_block (stdio_reader, eof); + + if (txt.empty ()) + break; + + if (! (have_help_text || looks_like_copyright (txt))) + { + help_txt = txt; + have_help_text = true; + } + + octave_comment_buffer::append (txt); + + if (eof) + break; + } + + return help_txt; } static bool @@ -3046,66 +3024,74 @@ if (ffile) { - process_leading_comments (ffile); - - std::string file_type; - - bool parsing_script = false; - - if (! force_script && looking_at_function_keyword (ffile)) - { - file_type = "function"; - - unwind_protect_int (Vecho_executing_commands); - unwind_protect_bool (reading_fcn_file); - unwind_protect_bool (get_input_from_eval_string); - unwind_protect_bool (parser_end_of_input); - - Vecho_executing_commands = ECHO_OFF; - reading_fcn_file = true; - get_input_from_eval_string = false; - parser_end_of_input = false; - } - else + bool eof; + + std::string help_txt = gobble_leading_white_space (ffile, eof); + + if (! help_txt.empty ()) + help_buf.push (help_txt); + + if (! eof) { - file_type = "script"; - - // The value of `reading_fcn_file' will be restored to the - // proper value when we unwind from this frame. - reading_fcn_file = old_reading_fcn_file_state; - - unwind_protect_bool (reading_script_file); - - reading_script_file = true; - - parsing_script = true; + std::string file_type; + + bool parsing_script = false; + + if (! force_script && looking_at_function_keyword (ffile)) + { + file_type = "function"; + + unwind_protect_int (Vecho_executing_commands); + unwind_protect_bool (reading_fcn_file); + unwind_protect_bool (get_input_from_eval_string); + unwind_protect_bool (parser_end_of_input); + + Vecho_executing_commands = ECHO_OFF; + reading_fcn_file = true; + get_input_from_eval_string = false; + parser_end_of_input = false; + } + else + { + file_type = "script"; + + // The value of `reading_fcn_file' will be restored to the + // proper value when we unwind from this frame. + reading_fcn_file = old_reading_fcn_file_state; + + unwind_protect_bool (reading_script_file); + + reading_script_file = true; + + parsing_script = true; + } + + YY_BUFFER_STATE old_buf = current_buffer (); + YY_BUFFER_STATE new_buf = create_buffer (ffile); + + unwind_protect::add (restore_input_buffer, old_buf); + unwind_protect::add (delete_input_buffer, new_buf); + + switch_to_buffer (new_buf); + + unwind_protect_ptr (curr_fcn_ptr); + curr_fcn_ptr = 0; + + reset_parser (); + + if (parsing_script) + prep_lexer_for_script (); + + lexer_flags.parsing_class_method = ! dispatch_type.empty (); + + int status = yyparse (); + + fcn_ptr = curr_fcn_ptr; + + if (status != 0) + error ("parse error while reading %s file %s", + file_type.c_str(), ff.c_str ()); } - - YY_BUFFER_STATE old_buf = current_buffer (); - YY_BUFFER_STATE new_buf = create_buffer (ffile); - - unwind_protect::add (restore_input_buffer, old_buf); - unwind_protect::add (delete_input_buffer, new_buf); - - switch_to_buffer (new_buf); - - unwind_protect_ptr (curr_fcn_ptr); - curr_fcn_ptr = 0; - - reset_parser (); - - if (parsing_script) - prep_lexer_for_script (); - - lexer_flags.parsing_class_method = ! dispatch_type.empty (); - - int status = yyparse (); - - fcn_ptr = curr_fcn_ptr; - - if (status != 0) - error ("parse error while reading %s file %s", - file_type.c_str(), ff.c_str ()); } else if (require_file) error ("no such file, `%s'", ff.c_str ()); @@ -3135,7 +3121,8 @@ { unwind_protect::add (safe_fclose, fptr); - retval = gobble_leading_white_space (fptr); + bool eof; + retval = gobble_leading_white_space (fptr, eof); if (retval.empty ()) {