changeset 8745:6dc61981d18b

better handling of object indexing in lexer
author John W. Eaton <jwe@octave.org>
date Sun, 15 Feb 2009 16:31:16 -0500
parents 4142982c66c6
children 5dd06f19e9be
files src/ChangeLog src/lex.h src/lex.l src/parse.y
diffstat 4 files changed, 238 insertions(+), 115 deletions(-) [+]
line wrap: on
line diff
--- a/src/ChangeLog	Sun Feb 15 19:07:05 2009 -0500
+++ b/src/ChangeLog	Sun Feb 15 16:31:16 2009 -0500
@@ -1,3 +1,16 @@
+2009-02-15  John W. Eaton  <jwe@octave.org>
+
+	* lex.h, lex.l (lexer_flags.looking_at_object_index): Now a
+	std::list<bool> object instead of an int.
+	Push TRUE to list at start of object index.  Push FALSE at
+	beginning of matrix list.  Pop value at end of object index or
+	matrix list.
+	(lexer_flags.looking_for_object_index): New data member.
+	Set it as needed in rules.
+	(inside_any_object_index): New function.
+	* parse.y (begin_obj_idx, cancel_obj_idx): Delete non-terminals
+	and all uses.
+
 2009-02-13 Ben Abbott <bpabott@mac.com>
 
 	* graphics.h.in (class axes::properties): Initialize ticklength.
--- a/src/lex.h	Sun Feb 15 19:07:05 2009 -0500
+++ b/src/lex.h	Sun Feb 15 16:31:16 2009 -0500
@@ -24,6 +24,8 @@
 #if !defined (octave_lex_h)
 #define octave_lex_h 1
 
+#include <list>
+
 // FIXME -- these input buffer things should be members of a
 // parser input stream class.
 
@@ -104,8 +106,12 @@
   // multi-value assignment statement.
   bool looking_at_matrix_or_assign_lhs;
 
-  // Nonzero means we're parsing an indexing operation for an object.
-  int looking_at_object_index;
+  // If the front of the list is TRUE, the closest paren, brace, or
+  // bracket nesting is an index for an object.
+  std::list<bool> looking_at_object_index;
+
+  // Object index not possible until we've seen something.
+  bool looking_for_object_index;
 
   // GAG.  Stupid kludge so that [[1,2][3,4]] will work.
   bool do_comma_insert;
--- a/src/lex.l	Sun Feb 15 19:07:05 2009 -0500
+++ b/src/lex.l	Sun Feb 15 16:31:16 2009 -0500
@@ -164,6 +164,7 @@
       current_input_column += yyleng; \
       lexer_flags.quote_is_transpose = false; \
       lexer_flags.convert_spaces_to_comma = convert; \
+      lexer_flags.looking_for_object_index = false; \
       COUNT_TOK_AND_RETURN (tok); \
     } \
   while (0)
@@ -171,8 +172,8 @@
 #define XBIN_OP_RETURN(tok, convert) \
   do \
     { \
-	gripe_matlab_incompatible_operator (yytext); \
-        BIN_OP_RETURN (tok, convert); \
+      gripe_matlab_incompatible_operator (yytext); \
+      BIN_OP_RETURN (tok, convert); \
     } \
   while (0)
 
@@ -362,15 +363,20 @@
     BEGIN (INITIAL);
     input_line_number++;
     current_input_column = 1;
+
     lexer_flags.quote_is_transpose = false;
     lexer_flags.convert_spaces_to_comma = true;
     lexer_flags.doing_rawcommand = false;
+    lexer_flags.looking_for_object_index = false;
+
     COUNT_TOK_AND_RETURN ('\n');
   }
 
 <COMMAND_START>[\;\,] {
     LEXER_DEBUG ("<COMMAND_START>[\\;\\,]");
 
+    lexer_flags.looking_for_object_index = false;
+
     if (lexer_flags.doing_rawcommand)
       TOK_PUSH_AND_RETURN (yytext, SQ_STRING);
 
@@ -387,6 +393,7 @@
 
     current_input_column++;
     int tok = handle_string (yytext[0], true);
+
     COUNT_TOK_AND_RETURN (tok);
   }
 
@@ -394,6 +401,9 @@
     LEXER_DEBUG ("<COMMAND_START>[^#% \\t\\r\\n\\;\\,\\\"\\'][^ \\t\\r\\n\\;\\,]*{S}*");
 
     std::string tok = strip_trailing_whitespace (yytext);
+
+    lexer_flags.looking_for_object_index = false;
+
     TOK_PUSH_AND_RETURN (tok, SQ_STRING);
   }
 
@@ -416,12 +426,19 @@
 
     scan_for_comments (yytext);
     fixup_column_count (yytext);
+
+    lexer_flags.looking_at_object_index.pop_front ();
+
+    lexer_flags.looking_for_object_index = true;
+
     int c = yytext[yyleng-1];
     int cont_is_spc = eat_continuation ();
     bool spc_gobbled = (cont_is_spc || c == ' ' || c == '\t');
     int tok_to_return = handle_close_bracket (spc_gobbled, ']');
+
     if (spc_gobbled)
-      yyunput (' ', yytext);
+      xunput (' ', yytext);
+
     COUNT_TOK_AND_RETURN (tok_to_return);
   }
 
@@ -434,12 +451,19 @@
 
     scan_for_comments (yytext);
     fixup_column_count (yytext);
+
+    lexer_flags.looking_at_object_index.pop_front ();
+
+    lexer_flags.looking_for_object_index = true;
+
     int c = yytext[yyleng-1];
     int cont_is_spc = eat_continuation ();
     bool spc_gobbled = (cont_is_spc || c == ' ' || c == '\t');
     int tok_to_return = handle_close_bracket (spc_gobbled, '}');
+
     if (spc_gobbled)
-      yyunput (' ', yytext);
+      xunput (' ', yytext);
+
     COUNT_TOK_AND_RETURN (tok_to_return);
   }
 
@@ -458,12 +482,16 @@
 
     lexer_flags.quote_is_transpose = false;
     lexer_flags.convert_spaces_to_comma = true;
-
-    if ((tmp & ATE_NEWLINE) == ATE_NEWLINE)
+    lexer_flags.looking_for_object_index = false;
+
+    if (! lexer_flags.looking_at_object_index.front ())
       {
-	maybe_warn_separator_insert (';');
-
-	yyunput (';', yytext);
+	if ((tmp & ATE_NEWLINE) == ATE_NEWLINE)
+	  {
+	    maybe_warn_separator_insert (';');
+
+	    xunput (';', yytext);
+	  }
       }
 
     COUNT_TOK_AND_RETURN (',');
@@ -482,27 +510,31 @@
     current_input_column += yyleng;
 
     int tmp = eat_continuation ();
-    bool bin_op = next_token_is_bin_op (true);
-    bool postfix_un_op = next_token_is_postfix_unary_op (true);
-    bool sep_op = next_token_is_sep_op ();
-
-    if (! (postfix_un_op || bin_op || sep_op)
-	&& nesting_level.is_bracket_or_brace ()
-	&& lexer_flags.convert_spaces_to_comma)
+
+    if (! lexer_flags.looking_at_object_index.front ())
       {
-	if ((tmp & ATE_NEWLINE) == ATE_NEWLINE)
+	bool bin_op = next_token_is_bin_op (true);
+	bool postfix_un_op = next_token_is_postfix_unary_op (true);
+	bool sep_op = next_token_is_sep_op ();
+
+	if (! (postfix_un_op || bin_op || sep_op)
+	    && nesting_level.is_bracket_or_brace ()
+	    && lexer_flags.convert_spaces_to_comma)
 	  {
-	    maybe_warn_separator_insert (';');
-
-	    yyunput (';', yytext);
+	    if ((tmp & ATE_NEWLINE) == ATE_NEWLINE)
+	      {
+		maybe_warn_separator_insert (';');
+
+		xunput (';', yytext);
+	      }
+
+	    lexer_flags.quote_is_transpose = false;
+	    lexer_flags.convert_spaces_to_comma = true;
+
+	    maybe_warn_separator_insert (',');
+
+	    COUNT_TOK_AND_RETURN (',');
 	  }
-
-	lexer_flags.quote_is_transpose = false;
-	lexer_flags.convert_spaces_to_comma = true;
-
-	maybe_warn_separator_insert (',');
-
-	COUNT_TOK_AND_RETURN (',');
       }
   }
 
@@ -520,8 +552,11 @@
     scan_for_comments (yytext);
     fixup_column_count (yytext);
     eat_whitespace ();
+
     lexer_flags.quote_is_transpose = false;
     lexer_flags.convert_spaces_to_comma = true;
+    lexer_flags.looking_for_object_index = false;
+
     COUNT_TOK_AND_RETURN (';');
   }
 
@@ -547,7 +582,8 @@
     if (nesting_level.none ())
       return LEXICAL_ERROR;
 
-    if (nesting_level.is_bracket_or_brace ())
+    if (! lexer_flags.looking_at_object_index.front ()
+	&& nesting_level.is_bracket_or_brace ())
       {
 	maybe_warn_separator_insert (';');
 
@@ -560,9 +596,12 @@
 
     nesting_level.bracket ();
 
+    lexer_flags.looking_at_object_index.push_front (false);
+
     current_input_column += yyleng;
     lexer_flags.quote_is_transpose = false;
     lexer_flags.convert_spaces_to_comma = true;
+    lexer_flags.looking_for_object_index = false;
 
     if (lexer_flags.defining_func && ! lexer_flags.parsed_function_name)
       lexer_flags.looking_at_return_list = true;
@@ -582,6 +621,10 @@
 
     nesting_level.remove ();
 
+    lexer_flags.looking_at_object_index.pop_front ();
+
+    lexer_flags.looking_for_object_index = true;
+
     TOK_RETURN (']');
   }
 
@@ -664,7 +707,11 @@
     int id_tok = handle_identifier ();
 
     if (id_tok >= 0)
-      COUNT_TOK_AND_RETURN (id_tok);
+      {
+        lexer_flags.looking_for_object_index = true;
+
+        COUNT_TOK_AND_RETURN (id_tok);
+      }
   }
 
 %{
@@ -675,9 +722,12 @@
     LEXER_DEBUG ("@");
 
     current_input_column++;
+
     lexer_flags.quote_is_transpose = false;
     lexer_flags.convert_spaces_to_comma = false;
     lexer_flags.looking_at_function_handle++;
+    lexer_flags.looking_for_object_index = false;
+
     COUNT_TOK_AND_RETURN ('@');
   }
 
@@ -734,6 +784,7 @@
 
     current_input_column++;
     int tok = handle_string ('"');
+
     COUNT_TOK_AND_RETURN (tok);
 }
 
@@ -744,6 +795,8 @@
 {CCHAR} {
     LEXER_DEBUG ("{CCHAR}");
 
+    lexer_flags.looking_for_object_index = false;
+
     xunput (yytext[0], yytext);
 
     bool eof = false;
@@ -762,6 +815,8 @@
 ^{S}*{CCHAR}\{{S}*{NL} {
     LEXER_DEBUG ("^{S}*{CCHAR}\\{{S}*{NL}");
 
+    lexer_flags.looking_for_object_index = false;
+
     input_line_number++;
     current_input_column = 1;
     block_comment_nesting_level++;
@@ -822,9 +877,20 @@
 "(" {
     LEXER_DEBUG ("(");
 
+    // If we are looking for an object index, then push TRUE for
+    // looking_at_object_index.  Otherwise, just push whatever state
+    // is current (so that we can pop it off the stack when we find
+    // the matching close paren).
+
+    lexer_flags.looking_at_object_index.push_front
+      (lexer_flags.looking_for_object_index);
+
     lexer_flags.looking_at_indirect_ref = false;
+    lexer_flags.looking_for_object_index = false;
+
     nesting_level.paren ();
     promptflag--;
+
     TOK_RETURN ('(');
   }
 
@@ -833,13 +899,25 @@
 
     nesting_level.remove ();
     current_input_column++;
+
+    lexer_flags.looking_at_object_index.pop_front ();
+
     lexer_flags.quote_is_transpose = true;
     lexer_flags.convert_spaces_to_comma = nesting_level.is_bracket_or_brace ();
+    lexer_flags.looking_for_object_index = true;
+
     do_comma_insert_check ();
+
     COUNT_TOK_AND_RETURN (')');
   }
 
-"."     { LEXER_DEBUG ("."); TOK_RETURN ('.'); }
+"." {
+    LEXER_DEBUG (".");
+
+    lexer_flags.looking_for_object_index = false;
+
+    TOK_RETURN ('.');
+  }
 
 "+="	{ LEXER_DEBUG ("+="); XBIN_OP_RETURN (ADD_EQ, false); }
 "-="	{ LEXER_DEBUG ("-="); XBIN_OP_RETURN (SUB_EQ, false); }
@@ -863,9 +941,13 @@
 
     nesting_level.brace ();
 
+    lexer_flags.looking_at_object_index.push_front
+      (lexer_flags.looking_for_object_index);
+
     current_input_column += yyleng;
     lexer_flags.quote_is_transpose = false;
     lexer_flags.convert_spaces_to_comma = true;
+    lexer_flags.looking_for_object_index = false;
 
     promptflag--;
     eat_whitespace ();
@@ -878,6 +960,10 @@
 "}" {
     LEXER_DEBUG ("}");
 
+    lexer_flags.looking_at_object_index.pop_front ();
+
+    lexer_flags.looking_for_object_index = true;
+
     nesting_level.remove ();
 
     TOK_RETURN ('}');
@@ -927,9 +1013,10 @@
   xunput (c, yytext);
 
   if (spc_gobbled)
-    yyunput (' ', yytext);
-
-  lexer_flags.do_comma_insert = (lexer_flags.bracketflag && c == '[');
+    xunput (' ', yytext);
+
+  lexer_flags.do_comma_insert = (! lexer_flags.looking_at_object_index.front ()
+				 && lexer_flags.bracketflag && c == '[');
 }
 
 // Fix things up for errors or interrupts.  The parser is never called
@@ -991,30 +1078,6 @@
   lexer_flags.init ();
 }
 
-static int
-text_yyinput (void)
-{
-  int c = yyinput ();
-
-  // Convert CRLF into just LF and single CR into LF.
-
-  if (c == '\r')
-    {
-      c = yyinput ();
-
-      if (c != '\n')
-	{
-	  yyunput (c, yytext);
-	  c = '\n';
-	}
-    }
-
-  if (c == '\n')
-    input_line_number++;
-
-  return c;
-}
-
 static void
 display_character (char c)
 {
@@ -1160,6 +1223,45 @@
 	break;
       }
 }
+
+static int
+text_yyinput (void)
+{
+  int c = yyinput ();
+
+  if (lexer_debug_flag)
+    {
+      std::cerr << "I: ";
+      display_character (c);
+      std::cerr << std::endl;
+    }
+
+  // Convert CRLF into just LF and single CR into LF.
+
+  if (c == '\r')
+    {
+      c = yyinput ();
+
+      if (lexer_debug_flag)
+	{
+	  std::cerr << "I: ";
+	  display_character (c);
+	  std::cerr << std::endl;
+	}
+
+      if (c != '\n')
+	{
+	  xunput (c, yytext);
+	  c = '\n';
+	}
+    }
+
+  if (c == '\n')
+    input_line_number++;
+
+  return c;
+}
+
 static void
 xunput (char c, char *buf)
 {
@@ -1277,6 +1379,24 @@
   token_stack.push (yylval.tok_val);
 }
 
+static bool
+inside_any_object_index (void)
+{
+  bool retval = false;
+
+  for (std::list<bool>::const_iterator i = lexer_flags.looking_at_object_index.begin ();
+       i != lexer_flags.looking_at_object_index.end (); i++)
+    {
+      if (*i)
+	{
+	  retval = true;
+	  break;
+	}
+    }
+
+  return retval;
+}
+
 // Handle keywords.  Return -1 if the keyword should be ignored.
 
 static int
@@ -1310,7 +1430,7 @@
  	  break;
 
 	case end_kw:
-	  if (lexer_flags.looking_at_object_index
+	  if (inside_any_object_index ()
 	      || (lexer_flags.defining_func
 		  && ! (lexer_flags.looking_at_return_list
 			|| lexer_flags.parsed_function_name)))
@@ -2211,6 +2331,7 @@
 
   lexer_flags.quote_is_transpose = true;
   lexer_flags.convert_spaces_to_comma = true;
+  lexer_flags.looking_for_object_index = true;
 
   yylval.tok_val = new token (value, yytext, input_line_number,
 			      current_input_column);
@@ -2446,6 +2567,8 @@
 		  else if (delim == '\'')
 		    gripe_single_quote_string ();
 
+                  lexer_flags.looking_for_object_index = true;
+
 		  return delim == '"' ? DQ_STRING : SQ_STRING;
 		}
 	    }
@@ -2583,7 +2706,7 @@
 	   && lexer_flags.convert_spaces_to_comma
 	   && (nesting_level.is_bracket ()
 	       || (nesting_level.is_brace ()
-		   && ! lexer_flags.looking_at_object_index)))
+		   && ! lexer_flags.looking_at_object_index.front ())))
     {
       bool index_op = next_token_is_index_op ();
 
@@ -2607,7 +2730,7 @@
 	    {
 	      maybe_warn_separator_insert (',');
 
-	      yyunput (',', yytext);
+	      xunput (',', yytext);
 	      return retval;
 	    }
 	}
@@ -2624,7 +2747,7 @@
 {
   if (nesting_level.is_bracket ()
       || (nesting_level.is_brace ()
-	  && ! lexer_flags.looking_at_object_index))
+	  && ! lexer_flags.looking_at_object_index.front ()))
     {
       int bin_op = next_token_is_bin_op (spc_gobbled);
 
@@ -2654,7 +2777,7 @@
 
       maybe_warn_separator_insert (',');
 
-      yyunput (',', yytext);
+      xunput (',', yytext);
     }
 }
 
@@ -2779,12 +2902,14 @@
 	{
 	  lexer_flags.pending_local_variables.insert (tok);
 	}
-      else if (! (next_tok_is_paren || lexer_flags.looking_at_object_index))
+      else if (! (next_tok_is_paren
+		  || lexer_flags.looking_at_object_index.front ()))
 	{
 	  BEGIN (COMMAND_START);
 	}
 
-      if (is_rawcommand_name (tok) && ! lexer_flags.looking_at_object_index)
+      if (is_rawcommand_name (tok)
+	  && ! lexer_flags.looking_at_object_index.front ())
 	{
 	  lexer_flags.doing_rawcommand = true;
 	  BEGIN (COMMAND_START);
@@ -2855,7 +2980,13 @@
   looking_at_matrix_or_assign_lhs = false;
 
   // Not parsing an object index.
-  looking_at_object_index = 0;
+  while (! looking_at_object_index.empty ())
+    looking_at_object_index.pop_front ();
+
+  looking_at_object_index.push_front (false);
+
+  // Object index not possible until we've seen something.
+  looking_for_object_index = false;
 
   // No need to do comma insert or convert spaces to comma at
   // beginning of input. 
--- a/src/parse.y	Sun Feb 15 19:07:05 2009 -0500
+++ b/src/parse.y	Sun Feb 15 16:31:16 2009 -0500
@@ -694,58 +694,31 @@
 		  { lexer_flags.looking_at_indirect_ref = true; }
 		;
 
-// Two more rules for lexical feedback.  To avoid reduce/reduce
-// conflicts, We use begin_obj_idx after every postfix_expr on the RHS
-// of a rule, then cancel that as soon as possible for cases when we
-// are not actually parsing an index expression.  Since all of those
-// cases are simple tokens that don't involve examining the value of 
-// lexer_flags.looking_at_object_index, I think we should be OK.
-
-begin_obj_idx	: // empty
-		  { lexer_flags.looking_at_object_index++; }
-		;
-
-cancel_obj_idx	: // empty
-		  { lexer_flags.looking_at_object_index--; }
-		;
-
 postfix_expr	: primary_expr
 		  { $$ = $1; }
-		| postfix_expr begin_obj_idx '(' ')'
-		  {
-		    $$ = make_index_expression ($1, 0, '(');
-		    lexer_flags.looking_at_object_index--;
-		  }
-		| postfix_expr begin_obj_idx '(' arg_list ')'
-		  {
-		    $$ = make_index_expression ($1, $4, '(');
-		    lexer_flags.looking_at_object_index--;
-		  }
-		| postfix_expr begin_obj_idx '{' '}'
-		  {
-		    $$ = make_index_expression ($1, 0, '{');
-		    lexer_flags.looking_at_object_index--;
-		  }
-		| postfix_expr begin_obj_idx '{' arg_list '}'
-		  {
-		    $$ = make_index_expression ($1, $4, '{');
-		    lexer_flags.looking_at_object_index--;
-		  }
-		| postfix_expr begin_obj_idx PLUS_PLUS cancel_obj_idx
-		  { $$ = make_postfix_op (PLUS_PLUS, $1, $3); }
-		| postfix_expr begin_obj_idx MINUS_MINUS cancel_obj_idx
-		  { $$ = make_postfix_op (MINUS_MINUS, $1, $3); }
-		| postfix_expr begin_obj_idx QUOTE cancel_obj_idx
-		  { $$ = make_postfix_op (QUOTE, $1, $3); }
-		| postfix_expr begin_obj_idx TRANSPOSE cancel_obj_idx
-		  { $$ = make_postfix_op (TRANSPOSE, $1, $3); }
-		| postfix_expr begin_obj_idx indirect_ref_op cancel_obj_idx STRUCT_ELT
-		  { $$ = make_indirect_ref ($1, $5->text ()); }
-		| postfix_expr begin_obj_idx indirect_ref_op cancel_obj_idx '(' expression ')'
-		  { $$ = make_indirect_ref ($1, $6); }
+		| postfix_expr '(' ')'
+		  { $$ = make_index_expression ($1, 0, '('); }
+		| postfix_expr '(' arg_list ')'
+		  { $$ = make_index_expression ($1, $3, '('); }
+		| postfix_expr '{' '}'
+		  { $$ = make_index_expression ($1, 0, '{'); }
+		| postfix_expr '{' arg_list '}'
+		  { $$ = make_index_expression ($1, $3, '{'); }
+		| postfix_expr PLUS_PLUS
+		  { $$ = make_postfix_op (PLUS_PLUS, $1, $2); }
+		| postfix_expr MINUS_MINUS
+		  { $$ = make_postfix_op (MINUS_MINUS, $1, $2); }
+		| postfix_expr QUOTE
+		  { $$ = make_postfix_op (QUOTE, $1, $2); }
+		| postfix_expr TRANSPOSE
+		  { $$ = make_postfix_op (TRANSPOSE, $1, $2); }
+		| postfix_expr indirect_ref_op STRUCT_ELT
+		  { $$ = make_indirect_ref ($1, $3->text ()); }
+		| postfix_expr indirect_ref_op '(' expression ')'
+		  { $$ = make_indirect_ref ($1, $4); }
 		;
 
-prefix_expr	: postfix_expr begin_obj_idx cancel_obj_idx
+prefix_expr	: postfix_expr
 		  { $$ = $1; }
 		| binary_expr
 		  { $$ = $1; }