comparison libinterp/parse-tree/lex.ll @ 18223:615fdd2238c1 gui-release

improve compatibility of command syntax parsing (bug #41032) * lex.h, lex.ll (lexical_feedback::command_arg_paren_count): New data member. (lexical_feedback::lexical_feedback): Initialize it. (lexical_feedback::reset): Reset it. (COMMAND_ARG_FINISH): New macro. Rewrite COMMAND_START patterns to improve Matlab compatibility of command syntax parsing. (<DQ_STRING_START>\", <SQ_STRING_START>\'): Don't return token if start state is COMMAND_START. * close.m: Fix test.
author Michael C. Grant <mcg@cvxr.com>
date Mon, 06 Jan 2014 12:02:04 -0500
parents 7721e78b1337
children 1af5ee5f3076 2eb26867bf66
comparison
equal deleted inserted replaced
18222:4d90e104bf35 18223:615fdd2238c1
230 return curr_lexer->handle_end_of_input (); \ 230 return curr_lexer->handle_end_of_input (); \
231 } \ 231 } \
232 } \ 232 } \
233 while (0) 233 while (0)
234 234
235 // When a command argument boundary is detected, push out the
236 // current argument being built. This one seems like a good
237 // candidate for a function call.
238
239 #define COMMAND_ARG_FINISH \
240 do \
241 { \
242 if (curr_lexer->string_text.empty ()) \
243 break; \
244 \
245 int retval = curr_lexer->handle_token (curr_lexer->string_text, \
246 SQ_STRING); \
247 \
248 curr_lexer->string_text = ""; \
249 curr_lexer->command_arg_paren_count = 0; \
250 \
251 yyless (0); \
252 \
253 return retval; \
254 } \
255 while (0)
235 256
236 static bool Vdisplay_tokens = false; 257 static bool Vdisplay_tokens = false;
237 258
238 static unsigned int Vtoken_count = 0; 259 static unsigned int Vtoken_count = 0;
239 260
281 302
282 %{ 303 %{
283 // Help and other command-style functions. 304 // Help and other command-style functions.
284 %} 305 %}
285 306
286 <COMMAND_START>{NL} { 307 %{
287 curr_lexer->lexer_debug ("<COMMAND_START>{NL}"); 308 // Commands can be continued on a second line using the ellipsis.
309 // If an argument is in construction, it is completed.
310 %}
311
312 <COMMAND_START>(\.\.\.)[^\r\n]*{NL} {
313 curr_lexer->lexer_debug ("<COMMAND_START>(\\.\\.\\.)[^\\r\\n]*{NL}");
314
315 COMMAND_ARG_FINISH;
288 316
289 curr_lexer->input_line_number++; 317 curr_lexer->input_line_number++;
290 curr_lexer->current_input_column = 1; 318 curr_lexer->current_input_column = 1;
291 319
320 HANDLE_STRING_CONTINUATION;
321 }
322
323 %{
324 // Commands normally end at the end of a line or a semicolon.
325 %}
326
327 <COMMAND_START>({CCHAR}[^\r\n]*)?{NL} {
328 curr_lexer->lexer_debug ("<COMMAND_START>({CCHAR}[^\\r\\n]*)?{NL}");
329
330 COMMAND_ARG_FINISH;
331
332 curr_lexer->input_line_number++;
333 curr_lexer->current_input_column = 1;
292 curr_lexer->looking_for_object_index = false; 334 curr_lexer->looking_for_object_index = false;
293 curr_lexer->at_beginning_of_statement = true; 335 curr_lexer->at_beginning_of_statement = true;
294
295 curr_lexer->pop_start_state (); 336 curr_lexer->pop_start_state ();
296 337
297 return curr_lexer->count_token ('\n'); 338 return curr_lexer->handle_token ('\n');
298 } 339 }
299 340
300 <COMMAND_START>[\;\,] { 341 <COMMAND_START>[\,\;] {
301 curr_lexer->lexer_debug ("<COMMAND_START>[\\;\\,]"); 342 curr_lexer->lexer_debug( "<COMMAND_START>[\\,\\;]" );
302 343
303 curr_lexer->looking_for_object_index = false; 344 if (yytext[0] != ',' || curr_lexer->command_arg_paren_count == 0)
304 curr_lexer->at_beginning_of_statement = true; 345 {
305 346 COMMAND_ARG_FINISH;
306 curr_lexer->pop_start_state (); 347 curr_lexer->looking_for_object_index = false;
307 348 curr_lexer->at_beginning_of_statement = true;
308 if (strcmp (yytext, ",") == 0) 349 curr_lexer->pop_start_state ();
309 return curr_lexer->handle_token (','); 350 return curr_lexer->handle_token (yytext[0]);
351 }
310 else 352 else
311 return curr_lexer->handle_token (';'); 353 curr_lexer->string_text += yytext;
312 } 354
355 curr_lexer->current_input_column += yyleng;
356 }
357
358 %{
359 // Unbalanced parentheses serve as pseudo-quotes: they are included in
360 // the final argument string, but they cause parentheses and quotes to
361 // be slurped into that argument as well.
362 %}
363
364 <COMMAND_START>[\(\[\{]+ {
365 curr_lexer->lexer_debug ("<COMMAND_START>[\\(\\[\\{]+");
366
367 curr_lexer->command_arg_paren_count += yyleng;
368 curr_lexer->string_text += yytext;
369 curr_lexer->current_input_column += yyleng;
370 }
371
372 <COMMAND_START>[\)\]\}]+ {
373 curr_lexer->lexer_debug ("<COMMAND_START>[\\)\\]\\}]+");
374
375 curr_lexer->command_arg_paren_count -= yyleng;
376 curr_lexer->string_text += yytext;
377 curr_lexer->current_input_column += yyleng;
378 }
379
380 %{
381 // Handle quoted strings. Quoted strings that are not separated by
382 // whitespace from other argument text are combined with that previous
383 // text. For instance,
384 //
385 // command 'text1'"text2"
386 //
387 // has a single argument text1text2, not two separate arguments.
388 // That's why we must test to see if we are in command argument mode
389 // when processing the end of a string.
390 %}
313 391
314 <COMMAND_START>[\"\'] { 392 <COMMAND_START>[\"\'] {
315 curr_lexer->lexer_debug ("<COMMAND_START>[\\\"\\']"); 393 curr_lexer->lexer_debug ("<COMMAND_START>[\\\"\\']");
316 394
317 curr_lexer->at_beginning_of_statement = false; 395 if (curr_lexer->command_arg_paren_count == 0)
318 396 curr_lexer->begin_string (yytext[0] == '"'
319 curr_lexer->current_input_column++; 397 ? DQ_STRING_START : SQ_STRING_START);
320 398 else
321 curr_lexer->begin_string (yytext[0] == '"' 399 curr_lexer->string_text += yytext;
322 ? DQ_STRING_START : SQ_STRING_START); 400
323 } 401 curr_lexer->current_input_column += yyleng;
324 402 }
325 <COMMAND_START>[^#% \t\r\n\;\,\"\'][^ \t\r\n\;\,]*{S}* { 403
326 curr_lexer->lexer_debug ("<COMMAND_START>[^#% \\t\\r\\n\\;\\,\\\"\\'][^ \\t\\r\\n\\;\\,]*{S}*"); 404 %{
327 405 // In standard command argument processing, whitespace separates
328 std::string tok = strip_trailing_whitespace (yytext); 406 // arguments. In the presence of unbalanced parentheses, it is
329 407 // incorporated into the argument.
330 curr_lexer->looking_for_object_index = false; 408 %}
331 curr_lexer->at_beginning_of_statement = false; 409
332 410 <COMMAND_START>{S}+ {
333 return curr_lexer->handle_token (tok, SQ_STRING); 411 curr_lexer->lexer_debug ("<COMMAND_START>{S}+");
412
413 if (curr_lexer->command_arg_paren_count == 0)
414 COMMAND_ARG_FINISH;
415 else
416 curr_lexer->string_text += yytext;
417
418 curr_lexer->current_input_column += yyleng;
419 }
420
421 %{
422 // Everything else is slurped into the command arguments.
423 %}
424
425 <COMMAND_START>([\.]|[^#% \t\r\n\,\;\"\'\(\[\{\}\]\)]+) {
426 curr_lexer->lexer_debug ("<COMMAND_START>[^#% \\t\\r\\n\\.\\,\\;\\\"\\'\\(\\[\\{\\}\\]\\)]+");
427
428 curr_lexer->string_text += yytext;
429 curr_lexer->current_input_column += yyleng;
334 } 430 }
335 431
336 <MATRIX_START>{S}* { 432 <MATRIX_START>{S}* {
337 curr_lexer->lexer_debug ("<MATRIX_START>{S}*"); 433 curr_lexer->lexer_debug ("<MATRIX_START>{S}*");
338 434
676 772
677 curr_lexer->current_input_column++; 773 curr_lexer->current_input_column++;
678 774
679 curr_lexer->pop_start_state (); 775 curr_lexer->pop_start_state ();
680 776
681 curr_lexer->looking_for_object_index = true; 777 if (curr_lexer->start_state() != COMMAND_START)
682 curr_lexer->at_beginning_of_statement = false; 778 {
683 779 curr_lexer->looking_for_object_index = true;
684 curr_lexer->push_token (new token (DQ_STRING, 780 curr_lexer->at_beginning_of_statement = false;
685 curr_lexer->string_text, 781
686 curr_lexer->string_line, 782 curr_lexer->push_token (new token (DQ_STRING,
687 curr_lexer->string_column)); 783 curr_lexer->string_text,
688 784 curr_lexer->string_line,
689 curr_lexer->string_text = ""; 785 curr_lexer->string_column));
690 786
691 return curr_lexer->count_token_internal (DQ_STRING); 787 curr_lexer->string_text = "";
788
789 return curr_lexer->count_token_internal (DQ_STRING);
790 }
692 } 791 }
693 792
694 <DQ_STRING_START>\\[0-7]{1,3} { 793 <DQ_STRING_START>\\[0-7]{1,3} {
695 curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\[0-7]{1,3}"); 794 curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\[0-7]{1,3}");
696 795
859 958
860 curr_lexer->current_input_column++; 959 curr_lexer->current_input_column++;
861 960
862 curr_lexer->pop_start_state (); 961 curr_lexer->pop_start_state ();
863 962
864 curr_lexer->looking_for_object_index = true; 963 if (curr_lexer->start_state() != COMMAND_START)
865 curr_lexer->at_beginning_of_statement = false; 964 {
866 965 curr_lexer->looking_for_object_index = true;
867 curr_lexer->push_token (new token (SQ_STRING, 966 curr_lexer->at_beginning_of_statement = false;
868 curr_lexer->string_text, 967
869 curr_lexer->string_line, 968 curr_lexer->push_token (new token (SQ_STRING,
870 curr_lexer->string_column)); 969 curr_lexer->string_text,
871 970 curr_lexer->string_line,
872 curr_lexer->string_text = ""; 971 curr_lexer->string_column));
873 972
874 return curr_lexer->count_token_internal (SQ_STRING); 973 curr_lexer->string_text = "";
974
975 return curr_lexer->count_token_internal (SQ_STRING);
976 }
875 } 977 }
876 978
877 <SQ_STRING_START>[^\'\n\r]+ { 979 <SQ_STRING_START>[^\'\n\r]+ {
878 curr_lexer->lexer_debug ("<SQ_STRING_START>[^\\'\\n\\r]+"); 980 curr_lexer->lexer_debug ("<SQ_STRING_START>[^\\'\\n\\r]+");
879 981
1847 string_column = 0; 1949 string_column = 0;
1848 fcn_file_name = ""; 1950 fcn_file_name = "";
1849 fcn_file_full_name = ""; 1951 fcn_file_full_name = "";
1850 looking_at_object_index.clear (); 1952 looking_at_object_index.clear ();
1851 looking_at_object_index.push_front (false); 1953 looking_at_object_index.push_front (false);
1954 command_arg_paren_count = 0;
1852 1955
1853 while (! parsed_function_name.empty ()) 1956 while (! parsed_function_name.empty ())
1854 parsed_function_name.pop (); 1957 parsed_function_name.pop ();
1855 1958
1856 nesting_level.reset (); 1959 nesting_level.reset ();
3263 fatal_error ("octave_base_lexer::fill_flex_buffer failed"); 3366 fatal_error ("octave_base_lexer::fill_flex_buffer failed");
3264 } 3367 }
3265 3368
3266 return status; 3369 return status;
3267 } 3370 }
3371