# HG changeset patch # User Michael C. Grant # Date 1389027724 18000 # Node ID 615fdd2238c1837c23b52684054cb64f0c3a5672 # Parent 4d90e104bf357f9e8310cbb0242db8c94dab4a6f improve compatibility of command syntax parsing (bug #41032) * lex.h, lex.ll (lexical_feedback::command_arg_paren_count): New data member. (lexical_feedback::lexical_feedback): Initialize it. (lexical_feedback::reset): Reset it. (COMMAND_ARG_FINISH): New macro. Rewrite COMMAND_START patterns to improve Matlab compatibility of command syntax parsing. (\", \'): Don't return token if start state is COMMAND_START. * close.m: Fix test. diff -r 4d90e104bf35 -r 615fdd2238c1 libinterp/parse-tree/lex.h --- a/libinterp/parse-tree/lex.h Sun Jan 05 17:43:18 2014 -0600 +++ b/libinterp/parse-tree/lex.h Mon Jan 06 12:02:04 2014 -0500 @@ -268,9 +268,9 @@ input_line_number (1), current_input_column (1), bracketflag (0), braceflag (0), looping (0), defining_func (0), looking_at_function_handle (0), - block_comment_nesting_level (0), token_count (0), - current_input_line (), comment_text (), help_text (), - string_text (), string_line (0), string_column (0), + block_comment_nesting_level (0), command_arg_paren_count (0), + token_count (0), current_input_line (), comment_text (), + help_text (), string_text (), string_line (0), string_column (0), fcn_file_name (), fcn_file_full_name (), looking_at_object_index (), parsed_function_name (), pending_local_variables (), symtab_context (), nesting_level (), tokens () @@ -389,6 +389,9 @@ // nestng level for blcok comments. int block_comment_nesting_level; + // Parenthesis count for command argument parsing. + int command_arg_paren_count; + // Count of tokens recognized by this lexer since initialized or // since the last reset. size_t token_count; diff -r 4d90e104bf35 -r 615fdd2238c1 libinterp/parse-tree/lex.ll --- a/libinterp/parse-tree/lex.ll Sun Jan 05 17:43:18 2014 -0600 +++ b/libinterp/parse-tree/lex.ll Mon Jan 06 12:02:04 2014 -0500 @@ -232,6 +232,27 @@ } \ while (0) +// When a command argument boundary is detected, push out the +// current argument being built. This one seems like a good +// candidate for a function call. + +#define COMMAND_ARG_FINISH \ + do \ + { \ + if (curr_lexer->string_text.empty ()) \ + break; \ + \ + int retval = curr_lexer->handle_token (curr_lexer->string_text, \ + SQ_STRING); \ + \ + curr_lexer->string_text = ""; \ + curr_lexer->command_arg_paren_count = 0; \ + \ + yyless (0); \ + \ + return retval; \ + } \ + while (0) static bool Vdisplay_tokens = false; @@ -283,54 +304,129 @@ // Help and other command-style functions. %} -{NL} { - curr_lexer->lexer_debug ("{NL}"); +%{ +// Commands can be continued on a second line using the ellipsis. +// If an argument is in construction, it is completed. +%} + +(\.\.\.)[^\r\n]*{NL} { + curr_lexer->lexer_debug ("(\\.\\.\\.)[^\\r\\n]*{NL}"); + + COMMAND_ARG_FINISH; + + curr_lexer->input_line_number++; + curr_lexer->current_input_column = 1; + + HANDLE_STRING_CONTINUATION; + } + +%{ +// Commands normally end at the end of a line or a semicolon. +%} + +({CCHAR}[^\r\n]*)?{NL} { + curr_lexer->lexer_debug ("({CCHAR}[^\\r\\n]*)?{NL}"); + + COMMAND_ARG_FINISH; curr_lexer->input_line_number++; curr_lexer->current_input_column = 1; - curr_lexer->looking_for_object_index = false; curr_lexer->at_beginning_of_statement = true; - curr_lexer->pop_start_state (); - return curr_lexer->count_token ('\n'); + return curr_lexer->handle_token ('\n'); + } + +[\,\;] { + curr_lexer->lexer_debug( "[\\,\\;]" ); + + if (yytext[0] != ',' || curr_lexer->command_arg_paren_count == 0) + { + COMMAND_ARG_FINISH; + curr_lexer->looking_for_object_index = false; + curr_lexer->at_beginning_of_statement = true; + curr_lexer->pop_start_state (); + return curr_lexer->handle_token (yytext[0]); + } + else + curr_lexer->string_text += yytext; + + curr_lexer->current_input_column += yyleng; } -[\;\,] { - curr_lexer->lexer_debug ("[\\;\\,]"); - - curr_lexer->looking_for_object_index = false; - curr_lexer->at_beginning_of_statement = true; - - curr_lexer->pop_start_state (); - - if (strcmp (yytext, ",") == 0) - return curr_lexer->handle_token (','); - else - return curr_lexer->handle_token (';'); +%{ +// Unbalanced parentheses serve as pseudo-quotes: they are included in +// the final argument string, but they cause parentheses and quotes to +// be slurped into that argument as well. +%} + +[\(\[\{]+ { + curr_lexer->lexer_debug ("[\\(\\[\\{]+"); + + curr_lexer->command_arg_paren_count += yyleng; + curr_lexer->string_text += yytext; + curr_lexer->current_input_column += yyleng; } +[\)\]\}]+ { + curr_lexer->lexer_debug ("[\\)\\]\\}]+"); + + curr_lexer->command_arg_paren_count -= yyleng; + curr_lexer->string_text += yytext; + curr_lexer->current_input_column += yyleng; +} + +%{ +// Handle quoted strings. Quoted strings that are not separated by +// whitespace from other argument text are combined with that previous +// text. For instance, +// +// command 'text1'"text2" +// +// has a single argument text1text2, not two separate arguments. +// That's why we must test to see if we are in command argument mode +// when processing the end of a string. +%} + [\"\'] { curr_lexer->lexer_debug ("[\\\"\\']"); - curr_lexer->at_beginning_of_statement = false; - - curr_lexer->current_input_column++; - - curr_lexer->begin_string (yytext[0] == '"' - ? DQ_STRING_START : SQ_STRING_START); + if (curr_lexer->command_arg_paren_count == 0) + curr_lexer->begin_string (yytext[0] == '"' + ? DQ_STRING_START : SQ_STRING_START); + else + curr_lexer->string_text += yytext; + + curr_lexer->current_input_column += yyleng; } -[^#% \t\r\n\;\,\"\'][^ \t\r\n\;\,]*{S}* { - curr_lexer->lexer_debug ("[^#% \\t\\r\\n\\;\\,\\\"\\'][^ \\t\\r\\n\\;\\,]*{S}*"); - - std::string tok = strip_trailing_whitespace (yytext); - - curr_lexer->looking_for_object_index = false; - curr_lexer->at_beginning_of_statement = false; - - return curr_lexer->handle_token (tok, SQ_STRING); +%{ +// In standard command argument processing, whitespace separates +// arguments. In the presence of unbalanced parentheses, it is +// incorporated into the argument. +%} + +{S}+ { + curr_lexer->lexer_debug ("{S}+"); + + if (curr_lexer->command_arg_paren_count == 0) + COMMAND_ARG_FINISH; + else + curr_lexer->string_text += yytext; + + curr_lexer->current_input_column += yyleng; + } + +%{ +// Everything else is slurped into the command arguments. +%} + +([\.]|[^#% \t\r\n\,\;\"\'\(\[\{\}\]\)]+) { + curr_lexer->lexer_debug ("[^#% \\t\\r\\n\\.\\,\\;\\\"\\'\\(\\[\\{\\}\\]\\)]+"); + + curr_lexer->string_text += yytext; + curr_lexer->current_input_column += yyleng; } {S}* { @@ -678,17 +774,20 @@ curr_lexer->pop_start_state (); - curr_lexer->looking_for_object_index = true; - curr_lexer->at_beginning_of_statement = false; - - curr_lexer->push_token (new token (DQ_STRING, - curr_lexer->string_text, - curr_lexer->string_line, - curr_lexer->string_column)); - - curr_lexer->string_text = ""; - - return curr_lexer->count_token_internal (DQ_STRING); + if (curr_lexer->start_state() != COMMAND_START) + { + curr_lexer->looking_for_object_index = true; + curr_lexer->at_beginning_of_statement = false; + + curr_lexer->push_token (new token (DQ_STRING, + curr_lexer->string_text, + curr_lexer->string_line, + curr_lexer->string_column)); + + curr_lexer->string_text = ""; + + return curr_lexer->count_token_internal (DQ_STRING); + } } \\[0-7]{1,3} { @@ -861,17 +960,20 @@ curr_lexer->pop_start_state (); - curr_lexer->looking_for_object_index = true; - curr_lexer->at_beginning_of_statement = false; - - curr_lexer->push_token (new token (SQ_STRING, - curr_lexer->string_text, - curr_lexer->string_line, - curr_lexer->string_column)); - - curr_lexer->string_text = ""; - - return curr_lexer->count_token_internal (SQ_STRING); + if (curr_lexer->start_state() != COMMAND_START) + { + curr_lexer->looking_for_object_index = true; + curr_lexer->at_beginning_of_statement = false; + + curr_lexer->push_token (new token (SQ_STRING, + curr_lexer->string_text, + curr_lexer->string_line, + curr_lexer->string_column)); + + curr_lexer->string_text = ""; + + return curr_lexer->count_token_internal (SQ_STRING); + } } [^\'\n\r]+ { @@ -1849,6 +1951,7 @@ fcn_file_full_name = ""; looking_at_object_index.clear (); looking_at_object_index.push_front (false); + command_arg_paren_count = 0; while (! parsed_function_name.empty ()) parsed_function_name.pop (); @@ -3265,3 +3368,4 @@ return status; } + diff -r 4d90e104bf35 -r 615fdd2238c1 scripts/plot/util/close.m --- a/scripts/plot/util/close.m Sun Jan 05 17:43:18 2014 -0600 +++ b/scripts/plot/util/close.m Mon Jan 06 12:02:04 2014 -0500 @@ -105,5 +105,4 @@ %!error close ({"all"}) %!error close ("all_and_more") %!error close (-1) -%!error close "all" hid" - +%!error close all hid