Mercurial > octave-nkf
comparison libinterp/parse-tree/lex.ll @ 18223:615fdd2238c1 gui-release
improve compatibility of command syntax parsing (bug #41032)
* lex.h, lex.ll (lexical_feedback::command_arg_paren_count):
New data member.
(lexical_feedback::lexical_feedback): Initialize it.
(lexical_feedback::reset): Reset it.
(COMMAND_ARG_FINISH): New macro.
Rewrite COMMAND_START patterns to improve Matlab compatibility of
command syntax parsing.
(<DQ_STRING_START>\", <SQ_STRING_START>\'): Don't return token if
start state is COMMAND_START.
* close.m: Fix test.
author | Michael C. Grant <mcg@cvxr.com> |
---|---|
date | Mon, 06 Jan 2014 12:02:04 -0500 |
parents | 7721e78b1337 |
children | 1af5ee5f3076 2eb26867bf66 |
comparison
equal
deleted
inserted
replaced
18222:4d90e104bf35 | 18223:615fdd2238c1 |
---|---|
230 return curr_lexer->handle_end_of_input (); \ | 230 return curr_lexer->handle_end_of_input (); \ |
231 } \ | 231 } \ |
232 } \ | 232 } \ |
233 while (0) | 233 while (0) |
234 | 234 |
235 // When a command argument boundary is detected, push out the | |
236 // current argument being built. This one seems like a good | |
237 // candidate for a function call. | |
238 | |
239 #define COMMAND_ARG_FINISH \ | |
240 do \ | |
241 { \ | |
242 if (curr_lexer->string_text.empty ()) \ | |
243 break; \ | |
244 \ | |
245 int retval = curr_lexer->handle_token (curr_lexer->string_text, \ | |
246 SQ_STRING); \ | |
247 \ | |
248 curr_lexer->string_text = ""; \ | |
249 curr_lexer->command_arg_paren_count = 0; \ | |
250 \ | |
251 yyless (0); \ | |
252 \ | |
253 return retval; \ | |
254 } \ | |
255 while (0) | |
235 | 256 |
236 static bool Vdisplay_tokens = false; | 257 static bool Vdisplay_tokens = false; |
237 | 258 |
238 static unsigned int Vtoken_count = 0; | 259 static unsigned int Vtoken_count = 0; |
239 | 260 |
281 | 302 |
282 %{ | 303 %{ |
283 // Help and other command-style functions. | 304 // Help and other command-style functions. |
284 %} | 305 %} |
285 | 306 |
286 <COMMAND_START>{NL} { | 307 %{ |
287 curr_lexer->lexer_debug ("<COMMAND_START>{NL}"); | 308 // Commands can be continued on a second line using the ellipsis. |
309 // If an argument is in construction, it is completed. | |
310 %} | |
311 | |
312 <COMMAND_START>(\.\.\.)[^\r\n]*{NL} { | |
313 curr_lexer->lexer_debug ("<COMMAND_START>(\\.\\.\\.)[^\\r\\n]*{NL}"); | |
314 | |
315 COMMAND_ARG_FINISH; | |
288 | 316 |
289 curr_lexer->input_line_number++; | 317 curr_lexer->input_line_number++; |
290 curr_lexer->current_input_column = 1; | 318 curr_lexer->current_input_column = 1; |
291 | 319 |
320 HANDLE_STRING_CONTINUATION; | |
321 } | |
322 | |
323 %{ | |
324 // Commands normally end at the end of a line or a semicolon. | |
325 %} | |
326 | |
327 <COMMAND_START>({CCHAR}[^\r\n]*)?{NL} { | |
328 curr_lexer->lexer_debug ("<COMMAND_START>({CCHAR}[^\\r\\n]*)?{NL}"); | |
329 | |
330 COMMAND_ARG_FINISH; | |
331 | |
332 curr_lexer->input_line_number++; | |
333 curr_lexer->current_input_column = 1; | |
292 curr_lexer->looking_for_object_index = false; | 334 curr_lexer->looking_for_object_index = false; |
293 curr_lexer->at_beginning_of_statement = true; | 335 curr_lexer->at_beginning_of_statement = true; |
294 | |
295 curr_lexer->pop_start_state (); | 336 curr_lexer->pop_start_state (); |
296 | 337 |
297 return curr_lexer->count_token ('\n'); | 338 return curr_lexer->handle_token ('\n'); |
298 } | 339 } |
299 | 340 |
300 <COMMAND_START>[\;\,] { | 341 <COMMAND_START>[\,\;] { |
301 curr_lexer->lexer_debug ("<COMMAND_START>[\\;\\,]"); | 342 curr_lexer->lexer_debug( "<COMMAND_START>[\\,\\;]" ); |
302 | 343 |
303 curr_lexer->looking_for_object_index = false; | 344 if (yytext[0] != ',' || curr_lexer->command_arg_paren_count == 0) |
304 curr_lexer->at_beginning_of_statement = true; | 345 { |
305 | 346 COMMAND_ARG_FINISH; |
306 curr_lexer->pop_start_state (); | 347 curr_lexer->looking_for_object_index = false; |
307 | 348 curr_lexer->at_beginning_of_statement = true; |
308 if (strcmp (yytext, ",") == 0) | 349 curr_lexer->pop_start_state (); |
309 return curr_lexer->handle_token (','); | 350 return curr_lexer->handle_token (yytext[0]); |
351 } | |
310 else | 352 else |
311 return curr_lexer->handle_token (';'); | 353 curr_lexer->string_text += yytext; |
312 } | 354 |
355 curr_lexer->current_input_column += yyleng; | |
356 } | |
357 | |
358 %{ | |
359 // Unbalanced parentheses serve as pseudo-quotes: they are included in | |
360 // the final argument string, but they cause parentheses and quotes to | |
361 // be slurped into that argument as well. | |
362 %} | |
363 | |
364 <COMMAND_START>[\(\[\{]+ { | |
365 curr_lexer->lexer_debug ("<COMMAND_START>[\\(\\[\\{]+"); | |
366 | |
367 curr_lexer->command_arg_paren_count += yyleng; | |
368 curr_lexer->string_text += yytext; | |
369 curr_lexer->current_input_column += yyleng; | |
370 } | |
371 | |
372 <COMMAND_START>[\)\]\}]+ { | |
373 curr_lexer->lexer_debug ("<COMMAND_START>[\\)\\]\\}]+"); | |
374 | |
375 curr_lexer->command_arg_paren_count -= yyleng; | |
376 curr_lexer->string_text += yytext; | |
377 curr_lexer->current_input_column += yyleng; | |
378 } | |
379 | |
380 %{ | |
381 // Handle quoted strings. Quoted strings that are not separated by | |
382 // whitespace from other argument text are combined with that previous | |
383 // text. For instance, | |
384 // | |
385 // command 'text1'"text2" | |
386 // | |
387 // has a single argument text1text2, not two separate arguments. | |
388 // That's why we must test to see if we are in command argument mode | |
389 // when processing the end of a string. | |
390 %} | |
313 | 391 |
314 <COMMAND_START>[\"\'] { | 392 <COMMAND_START>[\"\'] { |
315 curr_lexer->lexer_debug ("<COMMAND_START>[\\\"\\']"); | 393 curr_lexer->lexer_debug ("<COMMAND_START>[\\\"\\']"); |
316 | 394 |
317 curr_lexer->at_beginning_of_statement = false; | 395 if (curr_lexer->command_arg_paren_count == 0) |
318 | 396 curr_lexer->begin_string (yytext[0] == '"' |
319 curr_lexer->current_input_column++; | 397 ? DQ_STRING_START : SQ_STRING_START); |
320 | 398 else |
321 curr_lexer->begin_string (yytext[0] == '"' | 399 curr_lexer->string_text += yytext; |
322 ? DQ_STRING_START : SQ_STRING_START); | 400 |
323 } | 401 curr_lexer->current_input_column += yyleng; |
324 | 402 } |
325 <COMMAND_START>[^#% \t\r\n\;\,\"\'][^ \t\r\n\;\,]*{S}* { | 403 |
326 curr_lexer->lexer_debug ("<COMMAND_START>[^#% \\t\\r\\n\\;\\,\\\"\\'][^ \\t\\r\\n\\;\\,]*{S}*"); | 404 %{ |
327 | 405 // In standard command argument processing, whitespace separates |
328 std::string tok = strip_trailing_whitespace (yytext); | 406 // arguments. In the presence of unbalanced parentheses, it is |
329 | 407 // incorporated into the argument. |
330 curr_lexer->looking_for_object_index = false; | 408 %} |
331 curr_lexer->at_beginning_of_statement = false; | 409 |
332 | 410 <COMMAND_START>{S}+ { |
333 return curr_lexer->handle_token (tok, SQ_STRING); | 411 curr_lexer->lexer_debug ("<COMMAND_START>{S}+"); |
412 | |
413 if (curr_lexer->command_arg_paren_count == 0) | |
414 COMMAND_ARG_FINISH; | |
415 else | |
416 curr_lexer->string_text += yytext; | |
417 | |
418 curr_lexer->current_input_column += yyleng; | |
419 } | |
420 | |
421 %{ | |
422 // Everything else is slurped into the command arguments. | |
423 %} | |
424 | |
425 <COMMAND_START>([\.]|[^#% \t\r\n\,\;\"\'\(\[\{\}\]\)]+) { | |
426 curr_lexer->lexer_debug ("<COMMAND_START>[^#% \\t\\r\\n\\.\\,\\;\\\"\\'\\(\\[\\{\\}\\]\\)]+"); | |
427 | |
428 curr_lexer->string_text += yytext; | |
429 curr_lexer->current_input_column += yyleng; | |
334 } | 430 } |
335 | 431 |
336 <MATRIX_START>{S}* { | 432 <MATRIX_START>{S}* { |
337 curr_lexer->lexer_debug ("<MATRIX_START>{S}*"); | 433 curr_lexer->lexer_debug ("<MATRIX_START>{S}*"); |
338 | 434 |
676 | 772 |
677 curr_lexer->current_input_column++; | 773 curr_lexer->current_input_column++; |
678 | 774 |
679 curr_lexer->pop_start_state (); | 775 curr_lexer->pop_start_state (); |
680 | 776 |
681 curr_lexer->looking_for_object_index = true; | 777 if (curr_lexer->start_state() != COMMAND_START) |
682 curr_lexer->at_beginning_of_statement = false; | 778 { |
683 | 779 curr_lexer->looking_for_object_index = true; |
684 curr_lexer->push_token (new token (DQ_STRING, | 780 curr_lexer->at_beginning_of_statement = false; |
685 curr_lexer->string_text, | 781 |
686 curr_lexer->string_line, | 782 curr_lexer->push_token (new token (DQ_STRING, |
687 curr_lexer->string_column)); | 783 curr_lexer->string_text, |
688 | 784 curr_lexer->string_line, |
689 curr_lexer->string_text = ""; | 785 curr_lexer->string_column)); |
690 | 786 |
691 return curr_lexer->count_token_internal (DQ_STRING); | 787 curr_lexer->string_text = ""; |
788 | |
789 return curr_lexer->count_token_internal (DQ_STRING); | |
790 } | |
692 } | 791 } |
693 | 792 |
694 <DQ_STRING_START>\\[0-7]{1,3} { | 793 <DQ_STRING_START>\\[0-7]{1,3} { |
695 curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\[0-7]{1,3}"); | 794 curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\[0-7]{1,3}"); |
696 | 795 |
859 | 958 |
860 curr_lexer->current_input_column++; | 959 curr_lexer->current_input_column++; |
861 | 960 |
862 curr_lexer->pop_start_state (); | 961 curr_lexer->pop_start_state (); |
863 | 962 |
864 curr_lexer->looking_for_object_index = true; | 963 if (curr_lexer->start_state() != COMMAND_START) |
865 curr_lexer->at_beginning_of_statement = false; | 964 { |
866 | 965 curr_lexer->looking_for_object_index = true; |
867 curr_lexer->push_token (new token (SQ_STRING, | 966 curr_lexer->at_beginning_of_statement = false; |
868 curr_lexer->string_text, | 967 |
869 curr_lexer->string_line, | 968 curr_lexer->push_token (new token (SQ_STRING, |
870 curr_lexer->string_column)); | 969 curr_lexer->string_text, |
871 | 970 curr_lexer->string_line, |
872 curr_lexer->string_text = ""; | 971 curr_lexer->string_column)); |
873 | 972 |
874 return curr_lexer->count_token_internal (SQ_STRING); | 973 curr_lexer->string_text = ""; |
974 | |
975 return curr_lexer->count_token_internal (SQ_STRING); | |
976 } | |
875 } | 977 } |
876 | 978 |
877 <SQ_STRING_START>[^\'\n\r]+ { | 979 <SQ_STRING_START>[^\'\n\r]+ { |
878 curr_lexer->lexer_debug ("<SQ_STRING_START>[^\\'\\n\\r]+"); | 980 curr_lexer->lexer_debug ("<SQ_STRING_START>[^\\'\\n\\r]+"); |
879 | 981 |
1847 string_column = 0; | 1949 string_column = 0; |
1848 fcn_file_name = ""; | 1950 fcn_file_name = ""; |
1849 fcn_file_full_name = ""; | 1951 fcn_file_full_name = ""; |
1850 looking_at_object_index.clear (); | 1952 looking_at_object_index.clear (); |
1851 looking_at_object_index.push_front (false); | 1953 looking_at_object_index.push_front (false); |
1954 command_arg_paren_count = 0; | |
1852 | 1955 |
1853 while (! parsed_function_name.empty ()) | 1956 while (! parsed_function_name.empty ()) |
1854 parsed_function_name.pop (); | 1957 parsed_function_name.pop (); |
1855 | 1958 |
1856 nesting_level.reset (); | 1959 nesting_level.reset (); |
3263 fatal_error ("octave_base_lexer::fill_flex_buffer failed"); | 3366 fatal_error ("octave_base_lexer::fill_flex_buffer failed"); |
3264 } | 3367 } |
3265 | 3368 |
3266 return status; | 3369 return status; |
3267 } | 3370 } |
3371 |