changeset 7723:74f5e0c7de9e

first pass at handling block comments
author John W. Eaton <jwe@octave.org>
date Sat, 19 Apr 2008 01:59:31 -0400
parents c3bb0b7a4261
children 932b0cf51834
files src/ChangeLog src/lex.h src/lex.l src/parse.y
diffstat 4 files changed, 247 insertions(+), 18 deletions(-) [+]
line wrap: on
line diff
--- a/src/ChangeLog	Fri Apr 18 13:29:50 2008 -0400
+++ b/src/ChangeLog	Sat Apr 19 01:59:31 2008 -0400
@@ -1,5 +1,20 @@
 2008-04-18  John W. Eaton  <jwe@octave.org>
 
+	* lex.l, lex.h (process_comment): New arg, start_in_block.  Call
+	grab_block_comment if start_in_block is true.  Change all uses.
+	* lex.l (grab_block_comment): New function.
+	(grab_comment_block): New arg, at_bol.  Change all uses.
+	Call grab_block_comment if we find the start of a block comment.
+	(block_comment_nesting_level): New static variable.
+	(^{S}*{CCHAR}\{{S}*{NL}): New rule.
+	(<<EOF>>): Warn about open block comments.
+	(reset_parser): Set block_comment_nesting_level to zero.
+	* parse.y (parse_fcn_file): Stash help text from
+	gobble_leading_white_space after calling reset_parser.
+	(text_getc): Keep track of input_line_number here.
+	(skip_white_sapce): Don't increment input_line_number here.
+	* lex.l (grab_comment_block): Or here.
+
 	* lex.l (Vdisplay_tokens): New static variable.
 	(F__display_tokens__): New function.
 	(display_token): New function.
@@ -8,7 +23,9 @@
 	(<COMMAND_START>[\"\'], "'", \"): Move handle_string outside of
 	COUNT_TOK_AND_RETURN macro parameter list.
 	(handle_identifier): Don't use macros to return token values here.
-
+	(<MATRIX_START>{S}*{COMMENT}{SNLCMT}*|<MATRIX_START>{S}*{NL}{SNLCMT}*):
+	Recognize block comments here.
+	
 	* pr-output.cc (Fdisp): If nargout > 0, produce an sq-string
 	unless arg is a dq-string.
 
--- a/src/lex.h	Fri Apr 18 13:29:50 2008 -0400
+++ b/src/lex.h	Sat Apr 19 01:59:31 2008 -0400
@@ -152,7 +152,8 @@
   stream_reader& operator = (const stream_reader&);
 };
 
-extern std::string grab_comment_block (stream_reader& reader, bool& eof);
+extern std::string
+grab_comment_block (stream_reader& reader, bool at_bol, bool& eof);
 
 // TRUE means that we have encountered EOF on the input stream.
 extern bool parser_end_of_input;
--- a/src/lex.l	Fri Apr 18 13:29:50 2008 -0400
+++ b/src/lex.l	Sat Apr 19 01:59:31 2008 -0400
@@ -248,6 +248,10 @@
 
 static unsigned int Vtoken_count = 0;
 
+// The start state that was in effect when the beginning of a block
+// comment was noticed.
+static int block_comment_nesting_level = 0;
+
 // Forward declarations for functions defined at the bottom of this
 // file.
 
@@ -256,7 +260,7 @@
 static int is_keyword_token (const std::string& s);
 static void prep_for_function (void);
 static void prep_for_nested_function (void);
-static int process_comment (bool& eof);
+static int process_comment (bool start_in_block, bool& eof);
 static bool match_any (char c, const char *s);
 static bool next_token_is_sep_op (void);
 static bool next_token_is_bin_op (bool spc_prev);
@@ -367,6 +371,8 @@
 //
 // It's also a pain in the ass to decide whether to insert a comma
 // after seeing a ']' character...
+
+// FIXME -- we need to handle block comments here.
 %}
 
 <MATRIX_START>{SNLCMT}*\]{S}* {
@@ -381,6 +387,10 @@
     COUNT_TOK_AND_RETURN (tok_to_return);
   }
 
+%{
+// FIXME -- we need to handle block comments here.
+%}
+
 <MATRIX_START>{SNLCMT}*\}{S}* {
     scan_for_comments (yytext);
     fixup_column_count (yytext);
@@ -455,6 +465,8 @@
 // Semicolons are handled as row seprators in matrix constants.  If we
 // don't eat whitespace here we can end up inserting too many
 // semicolons.
+
+// FIXME -- we need to handle block comments here.
 %}
 
 <MATRIX_START>{SNLCMT}*;{SNLCMT}* {
@@ -470,6 +482,8 @@
 // In some cases, new lines can also become row separators.  If we
 // don't eat whitespace here we can end up inserting too many
 // semicolons.
+
+// FIXME -- we need to handle block comments here.
 %}
 
 <MATRIX_START>{S}*{COMMENT}{SNLCMT}* |
@@ -565,6 +579,16 @@
 %}
 
 <<EOF>> {
+    if (block_comment_nesting_level != 0)
+      {
+	warning ("block comment open at end of input");
+
+	if ((reading_fcn_file || reading_script_file)
+	    && ! curr_fcn_file_name.empty ())
+	  warning ("near line %d of file `%s.m'",
+		   input_line_number, curr_fcn_file_name.c_str ());
+      }
+
     TOK_RETURN (END_OF_INPUT);
   }
 
@@ -647,9 +671,11 @@
 %} 
 
 {CCHAR} {
+    yyunput (yytext[0], yytext);
+
     bool eof = false;
-    yyunput (yytext[0], yytext);
-    int tok = process_comment (eof);
+    int tok = process_comment (false, eof);
+
     if (eof)
       TOK_RETURN (END_OF_INPUT);
     else if (tok > 0)
@@ -657,6 +683,18 @@
   }
 
 %{
+// Block comments.
+%}
+
+^{S}*{CCHAR}\{{S}*{NL} {
+    current_input_column = 1;
+    block_comment_nesting_level++;
+    promptflag--;
+    bool eof = false;
+    process_comment (true, eof);
+  }
+
+%{
 // Other operators.
 %}
 
@@ -827,6 +865,9 @@
   // We do want a prompt by default.
   promptflag = 1;
 
+  // We are not in a block comment.
+  block_comment_nesting_level = 0;
+
   // Error may have occurred inside some brackets, braces, or parentheses.
   nesting_level.clear ();
 
@@ -1152,8 +1193,115 @@
     val = Matrix ();
 }
 
+static std::string
+grab_block_comment (stream_reader& reader, bool& eof)
+{
+  std::string buf;
+
+  bool at_bol = true;
+  bool look_for_marker = false;
+
+  bool warned_incompatible = false;
+
+  int c = 0;
+
+  while ((c = reader.getc ()) != EOF)
+    {
+      current_input_column++;
+
+      if (look_for_marker)
+	{
+	  at_bol = false;
+	  look_for_marker = false;
+
+	  if (c == '{' || c == '}')
+	    {
+	      std::string tmp_buf (1, static_cast<char> (c));
+
+	      int type = c;
+
+	      bool done = false;
+
+	      while ((c = reader.getc ()) != EOF && ! done)
+		{
+		  current_input_column++;
+
+		  switch (c)
+		    {
+		    case ' ':
+		    case '\t':
+		      tmp_buf += static_cast<char> (c);
+		      break;
+
+		    case '\n':
+		      {
+			current_input_column = 0;
+			at_bol = true;
+			done = true;
+
+			if (type == '{')
+			  {
+			    block_comment_nesting_level++;
+			    promptflag--;
+			  }
+			else
+			  {
+			    block_comment_nesting_level--;
+			    promptflag++;
+
+			    if (block_comment_nesting_level == 0)
+			      {
+				buf += grab_comment_block (reader, true, eof);
+
+				return buf;
+			      }
+			  }
+		      }
+		      break;
+
+		    default:
+		      at_bol = false;
+		      tmp_buf += static_cast<char> (c);
+		      buf += tmp_buf;
+		      done = true;
+		      break;
+		    }
+		}
+	    }
+	}
+
+      if (at_bol && c == '%' || c == '#')
+        {
+          if (c == '#' && ! warned_incompatible)
+	    {
+	      warned_incompatible = true;
+	      maybe_gripe_matlab_incompatible_comment (c);
+	    }
+
+	  at_bol = false;
+	  look_for_marker = true;
+	}
+      else
+	{
+	  buf += static_cast<char> (c);
+
+	  if (c == '\n')
+	    {
+	      current_input_column = 0;
+	      at_bol = true;
+	    }
+	}
+    }
+
+  if (c == EOF)
+    eof = true;
+
+  return buf;
+}
+
 std::string
-grab_comment_block (stream_reader& reader, bool& eof)
+grab_comment_block (stream_reader& reader, bool at_bol,
+		    bool& eof)
 {
   std::string buf;
 
@@ -1174,9 +1322,59 @@
       if (begin_comment)
 	{
 	  if (c == '%' || c == '#')
-	    continue;
+	    {
+	      at_bol = false;
+	      continue;
+	    }
+	  else if (at_bol && c == '{')
+	    {
+	      std::string tmp_buf (1, static_cast<char> (c));
+
+	      bool done = false;
+
+	      while ((c = reader.getc ()) != EOF && ! done)
+		{
+		  current_input_column++;
+
+		  switch (c)
+		    {
+		    case ' ':
+		    case '\t':
+		      tmp_buf += static_cast<char> (c);
+		      break;
+
+		    case '\n':
+		      {
+			current_input_column = 0;
+			at_bol = true;
+			done = true;
+
+			block_comment_nesting_level++;
+			promptflag--;
+
+			buf += grab_block_comment (reader, eof);
+
+			in_comment = false;
+
+			if (eof)
+			  goto done;
+		      }
+		      break;
+
+		    default:
+		      at_bol = false;
+		      tmp_buf += static_cast<char> (c);
+		      buf += tmp_buf;
+		      done = true;
+		      break;
+		    }
+		}
+	    }
 	  else
-	    begin_comment = false;
+	    {
+	      at_bol = false;
+	      begin_comment = false;
+	    }
 	}	
 
       if (in_comment)
@@ -1185,9 +1383,8 @@
 
 	  if (c == '\n')
 	    {
-	      input_line_number++;
+	      at_bol = true;
 	      current_input_column = 0;
-
 	      in_comment = false;
 	    }
 	}
@@ -1242,7 +1439,7 @@
 };
 
 static int
-process_comment (bool& eof)
+process_comment (bool start_in_block, bool& eof)
 {
   eof = false;
 
@@ -1253,7 +1450,12 @@
 
   flex_stream_reader flex_reader (yytext);
 
-  std::string txt = grab_comment_block (flex_reader, eof);
+  // process_comment is only supposed to be called when we are not
+  // initially looking at a block comment.
+
+  std::string txt = start_in_block
+    ? grab_block_comment (flex_reader, eof)
+    : grab_comment_block (flex_reader, false, eof);
 
   if (help_txt.empty () && nesting_level.none ())
     {
@@ -1509,6 +1711,8 @@
   return retval;
 }
 
+// FIXME -- we need to handle block comments here.
+
 static void
 scan_for_comments (const char *text)
 {
@@ -1593,6 +1797,8 @@
 //  ATE_SPACE_OR_TAB : space or tab in input
 //  ATE_NEWLINE      : bare new line in input
 
+// FIXME -- we need to handle block comments here.
+
 static yum_yum
 eat_whitespace (void)
 {
@@ -1782,6 +1988,8 @@
 // If non-whitespace characters are found before comment
 // characters, return 0.  Otherwise, return 1.
 
+// FIXME -- we need to handle block comments here.
+
 static bool
 have_continuation (bool trailing_comments_ok)
 {
--- a/src/parse.y	Fri Apr 18 13:29:50 2008 -0400
+++ b/src/parse.y	Sat Apr 19 01:59:31 2008 -0400
@@ -2845,12 +2845,16 @@
     {
       c = getc (f);
 
-      if (c != '\n')
+      if (c == '\n')
+        input_line_number++;
+      else
 	{
 	  ungetc (c, f);
 	  c = '\r';
 	}
     }
+  else if (c == '\n')
+    input_line_number++;
 
   return c;
 }
@@ -2883,7 +2887,6 @@
 	  break;
 
 	case '\n':
-	  input_line_number++;
 	  current_input_column = 0;
 	  break;
 
@@ -2920,7 +2923,7 @@
       if (eof)
 	break;
 
-      txt = grab_comment_block (stdio_reader, eof);
+      txt = grab_comment_block (stdio_reader, true, eof);
 
       if (txt.empty ())
 	break;
@@ -3028,9 +3031,6 @@
 
       std::string help_txt = gobble_leading_white_space (ffile, eof);
 
-      if (! help_txt.empty ())
-	help_buf.push (help_txt);
-
       if (! eof)
 	{
 	  std::string file_type;
@@ -3079,6 +3079,9 @@
 
 	  reset_parser ();
 
+	  if (! help_txt.empty ())
+	    help_buf.push (help_txt);
+
 	  if (parsing_script)
 	    prep_lexer_for_script ();