changeset 7720:4e2eafef689c

unify comment and help text processing in lex.l and parse.y
author John W. Eaton <jwe@octave.org>
date Thu, 17 Apr 2008 16:44:49 -0400
parents 87eda1f8faaa
children 9369589f2ba5
files src/ChangeLog src/lex.h src/lex.l src/parse.y
diffstat 4 files changed, 247 insertions(+), 218 deletions(-) [+]
line wrap: on
line diff
--- a/src/ChangeLog	Wed Apr 16 22:08:15 2008 -0400
+++ b/src/ChangeLog	Thu Apr 17 16:44:49 2008 -0400
@@ -1,3 +1,29 @@
+2008-04-17  John W. Eaton  <jwe@octave.org>
+
+	* parse.y (looks_like_copyright): Handle leading whitespace.
+	(class stdio_stream_reader): New class.
+	(skip_white_space): New function.
+	(gobble_leading_white_space): New arg, EOF.  Change all uses.
+	Use skip_white_space and grab_comment_block to process comments.
+	(process_leading_comments): Delete.
+	(parse_fcn_file): Call gobble_leading_white_space instead of
+	process_leading_comments.
+	Skip parsing if gobble_leading_white_space detects EOF.
+	* lex.l (process_comment): Delete CCHAR arg.  Change all uses.
+	({CCHAR}): Unput comment character before calling process_comment.
+	(grab_comment_block): Rename from grab_help_text.  Don't discard
+	spaces from comment text.  Update input_line_number and
+	current_input_column as characters are read.  Warn only once about
+	incompatible comment characters in a given block of comments.
+	Use stream_reader class instead of accessing yyinput and yyunput
+	directly.
+	(class flex_stream_reader): New class.
+	(process_comment): Use grab_comment_block to handle all comments.
+	Don't call maybe_gripe_matlab_incompatible_comment.
+	Always store comment text returned by grab_comment_block.
+	* lex.h (grab_comment_block): Provide decl.
+	(class stream_reader): New class.
+
 2008-04-16  John W. Eaton  <jwe@octave.org>
 
 	* parse.y (Fautoload, Fmfilename): Call
--- a/src/lex.h	Wed Apr 16 22:08:15 2008 -0400
+++ b/src/lex.h	Thu Apr 17 16:44:49 2008 -0400
@@ -134,6 +134,26 @@
   lexical_feedback& operator = (const lexical_feedback&);
 };
 
+class
+stream_reader
+{
+public:
+  virtual int getc (void) = 0;
+  virtual int ungetc (int c) = 0;
+
+protected:
+  stream_reader (void) { }
+  ~stream_reader (void) { }
+
+private:
+
+  // No copying!
+  stream_reader (const stream_reader&);
+  stream_reader& operator = (const stream_reader&);
+};
+
+extern std::string grab_comment_block (stream_reader& reader, bool& eof);
+
 // TRUE means that we have encountered EOF on the input stream.
 extern bool parser_end_of_input;
 
--- a/src/lex.l	Wed Apr 16 22:08:15 2008 -0400
+++ b/src/lex.l	Thu Apr 17 16:44:49 2008 -0400
@@ -244,8 +244,7 @@
 static int is_keyword_token (const std::string& s);
 static void prep_for_function (void);
 static void prep_for_nested_function (void);
-static std::string grab_help_text (bool& eof);
-static int process_comment (char cchar, bool& eof);
+static int process_comment (bool& eof);
 static bool match_any (char c, const char *s);
 static bool next_token_is_sep_op (void);
 static bool next_token_is_bin_op (bool spc_prev);
@@ -631,7 +630,8 @@
 
 {CCHAR} {
     bool eof = false;
-    int tok = process_comment (yytext[0], eof);
+    yyunput (yytext[0], yytext);
+    int tok = process_comment (eof);
     if (eof)
       TOK_RETURN (END_OF_INPUT);
     else if (tok > 0)
@@ -1134,33 +1134,29 @@
     val = Matrix ();
 }
 
-// Grab the help text from an function file.
-
-// FIXME -- gobble_leading_white_space() in parse.y
-// duplicates some of this code!
-
-static std::string
-grab_help_text (bool& eof)
+std::string
+grab_comment_block (stream_reader& reader, bool& eof)
 {
   std::string buf;
 
-  bool begin_comment = true;
-  bool in_comment = true;
-  bool discard_space = true;
+  // TRUE means we are at the beginning of a comment block.
+  bool begin_comment = false;
+
+  // TRUE means we are currently reading a comment block.
+  bool in_comment = false;
+
+  bool warned_incompatible = false;
 
   int c = 0;
 
-  while ((c = yyinput ()) != EOF)
+  while ((c = reader.getc ()) != EOF)
     {
+      current_input_column++;
+
       if (begin_comment)
 	{
 	  if (c == '%' || c == '#')
 	    continue;
-	  else if (discard_space && c == ' ')
-	    {
-	      discard_space = false;
-	      continue;
-	    }
 	  else
 	    begin_comment = false;
 	}	
@@ -1171,26 +1167,36 @@
 
 	  if (c == '\n')
 	    {
+	      input_line_number++;
+	      current_input_column = 0;
+
 	      in_comment = false;
-	      discard_space = true;
 	    }
 	}
       else
 	{
 	  switch (c)
 	    {
+	    case ' ':
+	    case '\t':
+	      break;
+
 	    case '#':
+	      if (! warned_incompatible)
+		{
+		  warned_incompatible = true;
+		  maybe_gripe_matlab_incompatible_comment (c);
+		}
+	      // fall through...
+
 	    case '%':
-	      maybe_gripe_matlab_incompatible_comment (yytext[0]);
 	      in_comment = true;
 	      begin_comment = true;
 	      break;
 
-	    case ' ':
-	    case '\t':
-	      break;
-
 	    default:
+	      current_input_column--;
+	      reader.ungetc (c);
 	      goto done;
 	    }
 	}
@@ -1201,14 +1207,24 @@
   if (c == EOF)
     eof = true;
 
-  if (c && ! eof)
-    yyunput (c, yytext);
-
   return buf;
 }
 
+class
+flex_stream_reader : public stream_reader
+{
+public:
+  flex_stream_reader (char *buf_arg) : stream_reader (), buf (buf_arg) { }
+
+  int getc (void) { return ::yyinput (); }
+  int ungetc (int c) { ::yyunput (c, buf); return 0; }
+  
+private:
+  char *buf;
+};
+
 static int
-process_comment (char cchar, bool& eof)
+process_comment (bool& eof)
 {
   eof = false;
 
@@ -1217,44 +1233,24 @@
   if (! help_buf.empty ())
     help_txt = help_buf.top ();
 
+  flex_stream_reader flex_reader (yytext);
+
+  std::string txt = grab_comment_block (flex_reader, eof);
+
   if (help_txt.empty () && nesting_level.none ())
     {
-      std::string txt = grab_help_text (eof);
-
       if (! help_buf.empty ())
 	help_buf.pop ();
 
       help_buf.push (txt);
-
-      octave_comment_buffer::append (txt);
     }
-  else
-    {
-      std::string buf;
-
-      bool begin_comment = true;
-
-      int c;
-      while ((c = yyinput ()) != EOF && c != '\n')
-	{
-	  if (begin_comment && (c == '#' || c == '%'))
-	    ; /* Skip leading comment characters. */
-	  else
-	    buf += static_cast<char> (c);
-	}
-
-      octave_comment_buffer::append (buf);
-
-      if (c == EOF)
-	eof = true;
-    }
+
+  octave_comment_buffer::append (txt);
 
   current_input_column = 1;
   lexer_flags.quote_is_transpose = false;
   lexer_flags.convert_spaces_to_comma = true;
 
-  maybe_gripe_matlab_incompatible_comment (cchar);
-
   if (YY_START == COMMAND_START)
     BEGIN (INITIAL);
 
--- a/src/parse.y	Wed Apr 16 22:08:15 2008 -0400
+++ b/src/parse.y	Thu Apr 17 16:44:49 2008 -0400
@@ -2822,10 +2822,16 @@
 static bool
 looks_like_copyright (const std::string& s)
 {
-  // Perhaps someday we will want to do more here, so leave this as a
-  // separate function.
-
-  return (s.substr (0, 9) == "Copyright");
+  bool retval = false;
+
+  if (! s.empty ())
+    {
+      size_t offset = s.find_first_not_of (" \t");
+  
+      retval = (s.substr (offset, 9) == "Copyright");
+    }
+
+  return retval;
 }
 
 static int
@@ -2849,117 +2855,89 @@
   return c;
 }
 
-// Eat whitespace and comments from FFILE, returning the text of the
-// comments read if it doesn't look like a copyright notice.  The
-// parser line and column number information is updated.  Processing
-// stops at the first non-whitespace character that is not part of a
-// comment.
-
-static std::string
-gobble_leading_white_space (FILE *ffile)
+class
+stdio_stream_reader : public stream_reader
 {
-  std::string help_txt;
-
-  // TRUE means we have already seen the first block of comments.
-  bool first_comments_seen = false;
-
-  // TRUE means we are at the beginning of a comment block.
-  bool begin_comment = false;
-
-  // TRUE means we have already cached the help text.
-  bool have_help_text = false;
-
-  // TRUE means we are currently reading a comment block.
-  bool in_comment = false;
-
-  // TRUE means we should discard the first space from the input
-  // (used to strip leading spaces from the help text).
-  bool discard_space = true;
-
-  int c;
-
-  while ((c = text_getc (ffile)) != EOF)
+public:
+  stdio_stream_reader (FILE *f_arg) : stream_reader (), f (f_arg) { }
+
+  int getc (void) { return ::text_getc (f); }
+  int ungetc (int c) { return ::ungetc (c, f); }
+  
+private:
+  FILE *f;
+};
+
+static bool
+skip_white_space (stream_reader& reader)
+{
+  int c = 0;
+
+  while ((c = reader.getc ()) != EOF)
     {
-      current_input_column++;
-
-      if (begin_comment)
-	{
-	  if (c == '%' || c == '#')
-	    continue;
-	  else if (discard_space && c == ' ')
-	    {
-	      discard_space = false;
-	      continue;
-	    }
-	  else
-	    begin_comment = false;
-	}
-
-      if (in_comment)
+      switch (c)
 	{
-	  if (! have_help_text)
-	    {
-	      first_comments_seen = true;
-	      help_txt += static_cast<char> (c);
-	    }
-
-	  if (c == '\n')
-	    {
-	      input_line_number++;
-	      current_input_column = 0;
-
-	      in_comment = false;
-	      discard_space = true;
-	    }
-	}
-      else
-	{
-	  switch (c)
-	    {
-	    case '\n':
-	      input_line_number++;
-	      current_input_column = 0;
-	      // fall through...
-
-	    case ' ':
-	    case '\t':
-	      if (first_comments_seen && ! have_help_text)
-		{
-		  if (looks_like_copyright (help_txt))
-		    help_txt.resize (0);
-
-		  if (! help_txt.empty ())
-		    have_help_text = true;
-		}
-	      break;
-
-	    case '%':
-	    case '#':
-	      begin_comment = true;
-	      in_comment = true;
-	      break;
-
-	    default:
-	      current_input_column--;
-	      ungetc (c, ffile);
-	      goto done;
-	    }
+	case ' ':
+	case '\t':
+	  current_input_column++;
+	  break;
+
+	case '\n':
+	  input_line_number++;
+	  current_input_column = 0;
+	  break;
+
+	default:
+	  current_input_column--;
+	  reader.ungetc (c);
+	  goto done;
 	}
     }
 
  done:
 
-  return help_txt;
+  return (c == EOF);
 }
 
-static void
-process_leading_comments (FILE *fptr)
+static std::string
+gobble_leading_white_space (FILE *ffile, bool& eof)
 {
-  std::string txt = gobble_leading_white_space (fptr);
-
-  help_buf.push (txt);
-
-  octave_comment_buffer::append (txt);
+  std::string help_txt;
+
+  eof = false;
+
+  // TRUE means we have already cached the help text.
+  bool have_help_text = false;
+
+  std::string txt;
+
+  stdio_stream_reader stdio_reader (ffile);
+
+  while (true)
+    {
+      eof = skip_white_space (stdio_reader);
+
+      if (eof)
+	break;
+
+      txt = grab_comment_block (stdio_reader, eof);
+
+      if (txt.empty ())
+	break;
+
+      if (! (have_help_text || looks_like_copyright (txt)))
+	{
+	  help_txt = txt;
+	  have_help_text = true;
+	}
+
+      octave_comment_buffer::append (txt);
+
+      if (eof)
+	break;
+    }
+
+  return help_txt;
 }
 
 static bool
@@ -3046,66 +3024,74 @@
 
   if (ffile)
     {
-      process_leading_comments (ffile);
-
-      std::string file_type;
-
-      bool parsing_script = false;
-
-      if (! force_script && looking_at_function_keyword (ffile))
-	{
-	  file_type = "function";
-
-	  unwind_protect_int (Vecho_executing_commands);
-	  unwind_protect_bool (reading_fcn_file);
-	  unwind_protect_bool (get_input_from_eval_string);
-	  unwind_protect_bool (parser_end_of_input);
-
-	  Vecho_executing_commands = ECHO_OFF;
-	  reading_fcn_file = true;
-	  get_input_from_eval_string = false;
-	  parser_end_of_input = false;
-	}
-      else
+      bool eof;
+
+      std::string help_txt = gobble_leading_white_space (ffile, eof);
+
+      if (! help_txt.empty ())
+	help_buf.push (help_txt);
+
+      if (! eof)
 	{
-	  file_type = "script";
-
-	  // The value of `reading_fcn_file' will be restored to the
-	  // proper value when we unwind from this frame.
-	  reading_fcn_file = old_reading_fcn_file_state;
-
-	  unwind_protect_bool (reading_script_file);
-
-	  reading_script_file = true;
-
-	  parsing_script = true;
+	  std::string file_type;
+
+	  bool parsing_script = false;
+
+	  if (! force_script && looking_at_function_keyword (ffile))
+	    {
+	      file_type = "function";
+
+	      unwind_protect_int (Vecho_executing_commands);
+	      unwind_protect_bool (reading_fcn_file);
+	      unwind_protect_bool (get_input_from_eval_string);
+	      unwind_protect_bool (parser_end_of_input);
+
+	      Vecho_executing_commands = ECHO_OFF;
+	      reading_fcn_file = true;
+	      get_input_from_eval_string = false;
+	      parser_end_of_input = false;
+	    }
+	  else
+	    {
+	      file_type = "script";
+
+	      // The value of `reading_fcn_file' will be restored to the
+	      // proper value when we unwind from this frame.
+	      reading_fcn_file = old_reading_fcn_file_state;
+
+	      unwind_protect_bool (reading_script_file);
+
+	      reading_script_file = true;
+
+	      parsing_script = true;
+	    }
+
+	  YY_BUFFER_STATE old_buf = current_buffer ();
+	  YY_BUFFER_STATE new_buf = create_buffer (ffile);
+
+	  unwind_protect::add (restore_input_buffer, old_buf);
+	  unwind_protect::add (delete_input_buffer, new_buf);
+
+	  switch_to_buffer (new_buf);
+
+	  unwind_protect_ptr (curr_fcn_ptr);
+	  curr_fcn_ptr = 0;
+
+	  reset_parser ();
+
+	  if (parsing_script)
+	    prep_lexer_for_script ();
+
+	  lexer_flags.parsing_class_method = ! dispatch_type.empty ();
+
+	  int status = yyparse ();
+
+	  fcn_ptr = curr_fcn_ptr;
+
+	  if (status != 0)
+	    error ("parse error while reading %s file %s",
+		   file_type.c_str(), ff.c_str ());
 	}
-
-      YY_BUFFER_STATE old_buf = current_buffer ();
-      YY_BUFFER_STATE new_buf = create_buffer (ffile);
-
-      unwind_protect::add (restore_input_buffer, old_buf);
-      unwind_protect::add (delete_input_buffer, new_buf);
-
-      switch_to_buffer (new_buf);
-
-      unwind_protect_ptr (curr_fcn_ptr);
-      curr_fcn_ptr = 0;
-
-      reset_parser ();
-
-      if (parsing_script)
-	prep_lexer_for_script ();
-
-      lexer_flags.parsing_class_method = ! dispatch_type.empty ();
-
-      int status = yyparse ();
-
-      fcn_ptr = curr_fcn_ptr;
-
-      if (status != 0)
-	error ("parse error while reading %s file %s",
-	       file_type.c_str(), ff.c_str ());
     }
   else if (require_file)
     error ("no such file, `%s'", ff.c_str ());
@@ -3135,7 +3121,8 @@
 	{
 	  unwind_protect::add (safe_fclose, fptr);
 
-	  retval = gobble_leading_white_space (fptr);
+	  bool eof;
+	  retval = gobble_leading_white_space (fptr, eof);
 
 	  if (retval.empty ())
 	    {