# HG changeset patch # User John W. Eaton # Date 1234733476 18000 # Node ID 6dc61981d18b50d6b41a9833f1fd192e8a64c8b1 # Parent 4142982c66c65382a975fea6fff8eb3ac2293c7d better handling of object indexing in lexer diff -r 4142982c66c6 -r 6dc61981d18b src/ChangeLog --- a/src/ChangeLog Sun Feb 15 19:07:05 2009 -0500 +++ b/src/ChangeLog Sun Feb 15 16:31:16 2009 -0500 @@ -1,3 +1,16 @@ +2009-02-15 John W. Eaton + + * lex.h, lex.l (lexer_flags.looking_at_object_index): Now a + std::list object instead of an int. + Push TRUE to list at start of object index. Push FALSE at + beginning of matrix list. Pop value at end of object index or + matrix list. + (lexer_flags.looking_for_object_index): New data member. + Set it as needed in rules. + (inside_any_object_index): New function. + * parse.y (begin_obj_idx, cancel_obj_idx): Delete non-terminals + and all uses. + 2009-02-13 Ben Abbott * graphics.h.in (class axes::properties): Initialize ticklength. diff -r 4142982c66c6 -r 6dc61981d18b src/lex.h --- a/src/lex.h Sun Feb 15 19:07:05 2009 -0500 +++ b/src/lex.h Sun Feb 15 16:31:16 2009 -0500 @@ -24,6 +24,8 @@ #if !defined (octave_lex_h) #define octave_lex_h 1 +#include + // FIXME -- these input buffer things should be members of a // parser input stream class. @@ -104,8 +106,12 @@ // multi-value assignment statement. bool looking_at_matrix_or_assign_lhs; - // Nonzero means we're parsing an indexing operation for an object. - int looking_at_object_index; + // If the front of the list is TRUE, the closest paren, brace, or + // bracket nesting is an index for an object. + std::list looking_at_object_index; + + // Object index not possible until we've seen something. + bool looking_for_object_index; // GAG. Stupid kludge so that [[1,2][3,4]] will work. bool do_comma_insert; diff -r 4142982c66c6 -r 6dc61981d18b src/lex.l --- a/src/lex.l Sun Feb 15 19:07:05 2009 -0500 +++ b/src/lex.l Sun Feb 15 16:31:16 2009 -0500 @@ -164,6 +164,7 @@ current_input_column += yyleng; \ lexer_flags.quote_is_transpose = false; \ lexer_flags.convert_spaces_to_comma = convert; \ + lexer_flags.looking_for_object_index = false; \ COUNT_TOK_AND_RETURN (tok); \ } \ while (0) @@ -171,8 +172,8 @@ #define XBIN_OP_RETURN(tok, convert) \ do \ { \ - gripe_matlab_incompatible_operator (yytext); \ - BIN_OP_RETURN (tok, convert); \ + gripe_matlab_incompatible_operator (yytext); \ + BIN_OP_RETURN (tok, convert); \ } \ while (0) @@ -362,15 +363,20 @@ BEGIN (INITIAL); input_line_number++; current_input_column = 1; + lexer_flags.quote_is_transpose = false; lexer_flags.convert_spaces_to_comma = true; lexer_flags.doing_rawcommand = false; + lexer_flags.looking_for_object_index = false; + COUNT_TOK_AND_RETURN ('\n'); } [\;\,] { LEXER_DEBUG ("[\\;\\,]"); + lexer_flags.looking_for_object_index = false; + if (lexer_flags.doing_rawcommand) TOK_PUSH_AND_RETURN (yytext, SQ_STRING); @@ -387,6 +393,7 @@ current_input_column++; int tok = handle_string (yytext[0], true); + COUNT_TOK_AND_RETURN (tok); } @@ -394,6 +401,9 @@ LEXER_DEBUG ("[^#% \\t\\r\\n\\;\\,\\\"\\'][^ \\t\\r\\n\\;\\,]*{S}*"); std::string tok = strip_trailing_whitespace (yytext); + + lexer_flags.looking_for_object_index = false; + TOK_PUSH_AND_RETURN (tok, SQ_STRING); } @@ -416,12 +426,19 @@ scan_for_comments (yytext); fixup_column_count (yytext); + + lexer_flags.looking_at_object_index.pop_front (); + + lexer_flags.looking_for_object_index = true; + int c = yytext[yyleng-1]; int cont_is_spc = eat_continuation (); bool spc_gobbled = (cont_is_spc || c == ' ' || c == '\t'); int tok_to_return = handle_close_bracket (spc_gobbled, ']'); + if (spc_gobbled) - yyunput (' ', yytext); + xunput (' ', yytext); + COUNT_TOK_AND_RETURN (tok_to_return); } @@ -434,12 +451,19 @@ scan_for_comments (yytext); fixup_column_count (yytext); + + lexer_flags.looking_at_object_index.pop_front (); + + lexer_flags.looking_for_object_index = true; + int c = yytext[yyleng-1]; int cont_is_spc = eat_continuation (); bool spc_gobbled = (cont_is_spc || c == ' ' || c == '\t'); int tok_to_return = handle_close_bracket (spc_gobbled, '}'); + if (spc_gobbled) - yyunput (' ', yytext); + xunput (' ', yytext); + COUNT_TOK_AND_RETURN (tok_to_return); } @@ -458,12 +482,16 @@ lexer_flags.quote_is_transpose = false; lexer_flags.convert_spaces_to_comma = true; - - if ((tmp & ATE_NEWLINE) == ATE_NEWLINE) + lexer_flags.looking_for_object_index = false; + + if (! lexer_flags.looking_at_object_index.front ()) { - maybe_warn_separator_insert (';'); - - yyunput (';', yytext); + if ((tmp & ATE_NEWLINE) == ATE_NEWLINE) + { + maybe_warn_separator_insert (';'); + + xunput (';', yytext); + } } COUNT_TOK_AND_RETURN (','); @@ -482,27 +510,31 @@ current_input_column += yyleng; int tmp = eat_continuation (); - bool bin_op = next_token_is_bin_op (true); - bool postfix_un_op = next_token_is_postfix_unary_op (true); - bool sep_op = next_token_is_sep_op (); - - if (! (postfix_un_op || bin_op || sep_op) - && nesting_level.is_bracket_or_brace () - && lexer_flags.convert_spaces_to_comma) + + if (! lexer_flags.looking_at_object_index.front ()) { - if ((tmp & ATE_NEWLINE) == ATE_NEWLINE) + bool bin_op = next_token_is_bin_op (true); + bool postfix_un_op = next_token_is_postfix_unary_op (true); + bool sep_op = next_token_is_sep_op (); + + if (! (postfix_un_op || bin_op || sep_op) + && nesting_level.is_bracket_or_brace () + && lexer_flags.convert_spaces_to_comma) { - maybe_warn_separator_insert (';'); - - yyunput (';', yytext); + if ((tmp & ATE_NEWLINE) == ATE_NEWLINE) + { + maybe_warn_separator_insert (';'); + + xunput (';', yytext); + } + + lexer_flags.quote_is_transpose = false; + lexer_flags.convert_spaces_to_comma = true; + + maybe_warn_separator_insert (','); + + COUNT_TOK_AND_RETURN (','); } - - lexer_flags.quote_is_transpose = false; - lexer_flags.convert_spaces_to_comma = true; - - maybe_warn_separator_insert (','); - - COUNT_TOK_AND_RETURN (','); } } @@ -520,8 +552,11 @@ scan_for_comments (yytext); fixup_column_count (yytext); eat_whitespace (); + lexer_flags.quote_is_transpose = false; lexer_flags.convert_spaces_to_comma = true; + lexer_flags.looking_for_object_index = false; + COUNT_TOK_AND_RETURN (';'); } @@ -547,7 +582,8 @@ if (nesting_level.none ()) return LEXICAL_ERROR; - if (nesting_level.is_bracket_or_brace ()) + if (! lexer_flags.looking_at_object_index.front () + && nesting_level.is_bracket_or_brace ()) { maybe_warn_separator_insert (';'); @@ -560,9 +596,12 @@ nesting_level.bracket (); + lexer_flags.looking_at_object_index.push_front (false); + current_input_column += yyleng; lexer_flags.quote_is_transpose = false; lexer_flags.convert_spaces_to_comma = true; + lexer_flags.looking_for_object_index = false; if (lexer_flags.defining_func && ! lexer_flags.parsed_function_name) lexer_flags.looking_at_return_list = true; @@ -582,6 +621,10 @@ nesting_level.remove (); + lexer_flags.looking_at_object_index.pop_front (); + + lexer_flags.looking_for_object_index = true; + TOK_RETURN (']'); } @@ -664,7 +707,11 @@ int id_tok = handle_identifier (); if (id_tok >= 0) - COUNT_TOK_AND_RETURN (id_tok); + { + lexer_flags.looking_for_object_index = true; + + COUNT_TOK_AND_RETURN (id_tok); + } } %{ @@ -675,9 +722,12 @@ LEXER_DEBUG ("@"); current_input_column++; + lexer_flags.quote_is_transpose = false; lexer_flags.convert_spaces_to_comma = false; lexer_flags.looking_at_function_handle++; + lexer_flags.looking_for_object_index = false; + COUNT_TOK_AND_RETURN ('@'); } @@ -734,6 +784,7 @@ current_input_column++; int tok = handle_string ('"'); + COUNT_TOK_AND_RETURN (tok); } @@ -744,6 +795,8 @@ {CCHAR} { LEXER_DEBUG ("{CCHAR}"); + lexer_flags.looking_for_object_index = false; + xunput (yytext[0], yytext); bool eof = false; @@ -762,6 +815,8 @@ ^{S}*{CCHAR}\{{S}*{NL} { LEXER_DEBUG ("^{S}*{CCHAR}\\{{S}*{NL}"); + lexer_flags.looking_for_object_index = false; + input_line_number++; current_input_column = 1; block_comment_nesting_level++; @@ -822,9 +877,20 @@ "(" { LEXER_DEBUG ("("); + // If we are looking for an object index, then push TRUE for + // looking_at_object_index. Otherwise, just push whatever state + // is current (so that we can pop it off the stack when we find + // the matching close paren). + + lexer_flags.looking_at_object_index.push_front + (lexer_flags.looking_for_object_index); + lexer_flags.looking_at_indirect_ref = false; + lexer_flags.looking_for_object_index = false; + nesting_level.paren (); promptflag--; + TOK_RETURN ('('); } @@ -833,13 +899,25 @@ nesting_level.remove (); current_input_column++; + + lexer_flags.looking_at_object_index.pop_front (); + lexer_flags.quote_is_transpose = true; lexer_flags.convert_spaces_to_comma = nesting_level.is_bracket_or_brace (); + lexer_flags.looking_for_object_index = true; + do_comma_insert_check (); + COUNT_TOK_AND_RETURN (')'); } -"." { LEXER_DEBUG ("."); TOK_RETURN ('.'); } +"." { + LEXER_DEBUG ("."); + + lexer_flags.looking_for_object_index = false; + + TOK_RETURN ('.'); + } "+=" { LEXER_DEBUG ("+="); XBIN_OP_RETURN (ADD_EQ, false); } "-=" { LEXER_DEBUG ("-="); XBIN_OP_RETURN (SUB_EQ, false); } @@ -863,9 +941,13 @@ nesting_level.brace (); + lexer_flags.looking_at_object_index.push_front + (lexer_flags.looking_for_object_index); + current_input_column += yyleng; lexer_flags.quote_is_transpose = false; lexer_flags.convert_spaces_to_comma = true; + lexer_flags.looking_for_object_index = false; promptflag--; eat_whitespace (); @@ -878,6 +960,10 @@ "}" { LEXER_DEBUG ("}"); + lexer_flags.looking_at_object_index.pop_front (); + + lexer_flags.looking_for_object_index = true; + nesting_level.remove (); TOK_RETURN ('}'); @@ -927,9 +1013,10 @@ xunput (c, yytext); if (spc_gobbled) - yyunput (' ', yytext); - - lexer_flags.do_comma_insert = (lexer_flags.bracketflag && c == '['); + xunput (' ', yytext); + + lexer_flags.do_comma_insert = (! lexer_flags.looking_at_object_index.front () + && lexer_flags.bracketflag && c == '['); } // Fix things up for errors or interrupts. The parser is never called @@ -991,30 +1078,6 @@ lexer_flags.init (); } -static int -text_yyinput (void) -{ - int c = yyinput (); - - // Convert CRLF into just LF and single CR into LF. - - if (c == '\r') - { - c = yyinput (); - - if (c != '\n') - { - yyunput (c, yytext); - c = '\n'; - } - } - - if (c == '\n') - input_line_number++; - - return c; -} - static void display_character (char c) { @@ -1160,6 +1223,45 @@ break; } } + +static int +text_yyinput (void) +{ + int c = yyinput (); + + if (lexer_debug_flag) + { + std::cerr << "I: "; + display_character (c); + std::cerr << std::endl; + } + + // Convert CRLF into just LF and single CR into LF. + + if (c == '\r') + { + c = yyinput (); + + if (lexer_debug_flag) + { + std::cerr << "I: "; + display_character (c); + std::cerr << std::endl; + } + + if (c != '\n') + { + xunput (c, yytext); + c = '\n'; + } + } + + if (c == '\n') + input_line_number++; + + return c; +} + static void xunput (char c, char *buf) { @@ -1277,6 +1379,24 @@ token_stack.push (yylval.tok_val); } +static bool +inside_any_object_index (void) +{ + bool retval = false; + + for (std::list::const_iterator i = lexer_flags.looking_at_object_index.begin (); + i != lexer_flags.looking_at_object_index.end (); i++) + { + if (*i) + { + retval = true; + break; + } + } + + return retval; +} + // Handle keywords. Return -1 if the keyword should be ignored. static int @@ -1310,7 +1430,7 @@ break; case end_kw: - if (lexer_flags.looking_at_object_index + if (inside_any_object_index () || (lexer_flags.defining_func && ! (lexer_flags.looking_at_return_list || lexer_flags.parsed_function_name))) @@ -2211,6 +2331,7 @@ lexer_flags.quote_is_transpose = true; lexer_flags.convert_spaces_to_comma = true; + lexer_flags.looking_for_object_index = true; yylval.tok_val = new token (value, yytext, input_line_number, current_input_column); @@ -2446,6 +2567,8 @@ else if (delim == '\'') gripe_single_quote_string (); + lexer_flags.looking_for_object_index = true; + return delim == '"' ? DQ_STRING : SQ_STRING; } } @@ -2583,7 +2706,7 @@ && lexer_flags.convert_spaces_to_comma && (nesting_level.is_bracket () || (nesting_level.is_brace () - && ! lexer_flags.looking_at_object_index))) + && ! lexer_flags.looking_at_object_index.front ()))) { bool index_op = next_token_is_index_op (); @@ -2607,7 +2730,7 @@ { maybe_warn_separator_insert (','); - yyunput (',', yytext); + xunput (',', yytext); return retval; } } @@ -2624,7 +2747,7 @@ { if (nesting_level.is_bracket () || (nesting_level.is_brace () - && ! lexer_flags.looking_at_object_index)) + && ! lexer_flags.looking_at_object_index.front ())) { int bin_op = next_token_is_bin_op (spc_gobbled); @@ -2654,7 +2777,7 @@ maybe_warn_separator_insert (','); - yyunput (',', yytext); + xunput (',', yytext); } } @@ -2779,12 +2902,14 @@ { lexer_flags.pending_local_variables.insert (tok); } - else if (! (next_tok_is_paren || lexer_flags.looking_at_object_index)) + else if (! (next_tok_is_paren + || lexer_flags.looking_at_object_index.front ())) { BEGIN (COMMAND_START); } - if (is_rawcommand_name (tok) && ! lexer_flags.looking_at_object_index) + if (is_rawcommand_name (tok) + && ! lexer_flags.looking_at_object_index.front ()) { lexer_flags.doing_rawcommand = true; BEGIN (COMMAND_START); @@ -2855,7 +2980,13 @@ looking_at_matrix_or_assign_lhs = false; // Not parsing an object index. - looking_at_object_index = 0; + while (! looking_at_object_index.empty ()) + looking_at_object_index.pop_front (); + + looking_at_object_index.push_front (false); + + // Object index not possible until we've seen something. + looking_for_object_index = false; // No need to do comma insert or convert spaces to comma at // beginning of input. diff -r 4142982c66c6 -r 6dc61981d18b src/parse.y --- a/src/parse.y Sun Feb 15 19:07:05 2009 -0500 +++ b/src/parse.y Sun Feb 15 16:31:16 2009 -0500 @@ -694,58 +694,31 @@ { lexer_flags.looking_at_indirect_ref = true; } ; -// Two more rules for lexical feedback. To avoid reduce/reduce -// conflicts, We use begin_obj_idx after every postfix_expr on the RHS -// of a rule, then cancel that as soon as possible for cases when we -// are not actually parsing an index expression. Since all of those -// cases are simple tokens that don't involve examining the value of -// lexer_flags.looking_at_object_index, I think we should be OK. - -begin_obj_idx : // empty - { lexer_flags.looking_at_object_index++; } - ; - -cancel_obj_idx : // empty - { lexer_flags.looking_at_object_index--; } - ; - postfix_expr : primary_expr { $$ = $1; } - | postfix_expr begin_obj_idx '(' ')' - { - $$ = make_index_expression ($1, 0, '('); - lexer_flags.looking_at_object_index--; - } - | postfix_expr begin_obj_idx '(' arg_list ')' - { - $$ = make_index_expression ($1, $4, '('); - lexer_flags.looking_at_object_index--; - } - | postfix_expr begin_obj_idx '{' '}' - { - $$ = make_index_expression ($1, 0, '{'); - lexer_flags.looking_at_object_index--; - } - | postfix_expr begin_obj_idx '{' arg_list '}' - { - $$ = make_index_expression ($1, $4, '{'); - lexer_flags.looking_at_object_index--; - } - | postfix_expr begin_obj_idx PLUS_PLUS cancel_obj_idx - { $$ = make_postfix_op (PLUS_PLUS, $1, $3); } - | postfix_expr begin_obj_idx MINUS_MINUS cancel_obj_idx - { $$ = make_postfix_op (MINUS_MINUS, $1, $3); } - | postfix_expr begin_obj_idx QUOTE cancel_obj_idx - { $$ = make_postfix_op (QUOTE, $1, $3); } - | postfix_expr begin_obj_idx TRANSPOSE cancel_obj_idx - { $$ = make_postfix_op (TRANSPOSE, $1, $3); } - | postfix_expr begin_obj_idx indirect_ref_op cancel_obj_idx STRUCT_ELT - { $$ = make_indirect_ref ($1, $5->text ()); } - | postfix_expr begin_obj_idx indirect_ref_op cancel_obj_idx '(' expression ')' - { $$ = make_indirect_ref ($1, $6); } + | postfix_expr '(' ')' + { $$ = make_index_expression ($1, 0, '('); } + | postfix_expr '(' arg_list ')' + { $$ = make_index_expression ($1, $3, '('); } + | postfix_expr '{' '}' + { $$ = make_index_expression ($1, 0, '{'); } + | postfix_expr '{' arg_list '}' + { $$ = make_index_expression ($1, $3, '{'); } + | postfix_expr PLUS_PLUS + { $$ = make_postfix_op (PLUS_PLUS, $1, $2); } + | postfix_expr MINUS_MINUS + { $$ = make_postfix_op (MINUS_MINUS, $1, $2); } + | postfix_expr QUOTE + { $$ = make_postfix_op (QUOTE, $1, $2); } + | postfix_expr TRANSPOSE + { $$ = make_postfix_op (TRANSPOSE, $1, $2); } + | postfix_expr indirect_ref_op STRUCT_ELT + { $$ = make_indirect_ref ($1, $3->text ()); } + | postfix_expr indirect_ref_op '(' expression ')' + { $$ = make_indirect_ref ($1, $4); } ; -prefix_expr : postfix_expr begin_obj_idx cancel_obj_idx +prefix_expr : postfix_expr { $$ = $1; } | binary_expr { $$ = $1; }