Mercurial > octave-dspies
comparison src/lex.l @ 8535:75e6ab186761
lexer debugging functions
author | John W. Eaton <jwe@octave.org> |
---|---|
date | Mon, 19 Jan 2009 16:53:30 -0500 |
parents | 0e0bd07e6ae2 |
children | de1b944d5306 |
comparison
equal
deleted
inserted
replaced
8534:0eb83938c8bc | 8535:75e6ab186761 |
---|---|
116 do \ | 116 do \ |
117 { \ | 117 { \ |
118 int tok_val = tok; \ | 118 int tok_val = tok; \ |
119 if (Vdisplay_tokens) \ | 119 if (Vdisplay_tokens) \ |
120 display_token (tok_val); \ | 120 display_token (tok_val); \ |
121 if (lexer_debug_flag) \ | |
122 { \ | |
123 std::cerr << "R: "; \ | |
124 display_token (tok_val); \ | |
125 std::cerr << std::endl; \ | |
126 } \ | |
121 return tok_val; \ | 127 return tok_val; \ |
122 } \ | 128 } \ |
123 while (0) | 129 while (0) |
124 | 130 |
125 #define COUNT_TOK_AND_RETURN(tok) \ | 131 #define COUNT_TOK_AND_RETURN(tok) \ |
168 gripe_matlab_incompatible_operator (yytext); \ | 174 gripe_matlab_incompatible_operator (yytext); \ |
169 BIN_OP_RETURN (tok, convert); \ | 175 BIN_OP_RETURN (tok, convert); \ |
170 } \ | 176 } \ |
171 while (0) | 177 while (0) |
172 | 178 |
179 #define LEXER_DEBUG(pattern) \ | |
180 do \ | |
181 { \ | |
182 if (lexer_debug_flag) \ | |
183 lexer_debug (pattern, yytext); \ | |
184 } \ | |
185 while (0) | |
186 | |
173 // TRUE means that we have encountered EOF on the input stream. | 187 // TRUE means that we have encountered EOF on the input stream. |
174 bool parser_end_of_input = false; | 188 bool parser_end_of_input = false; |
175 | 189 |
176 // Flags that need to be shared between the lexer and parser. | 190 // Flags that need to be shared between the lexer and parser. |
177 lexical_feedback lexer_flags; | 191 lexical_feedback lexer_flags; |
250 static unsigned int Vtoken_count = 0; | 264 static unsigned int Vtoken_count = 0; |
251 | 265 |
252 // The start state that was in effect when the beginning of a block | 266 // The start state that was in effect when the beginning of a block |
253 // comment was noticed. | 267 // comment was noticed. |
254 static int block_comment_nesting_level = 0; | 268 static int block_comment_nesting_level = 0; |
269 | |
270 // Internal variable for lexer debugging state. | |
271 static bool lexer_debug_flag = false; | |
255 | 272 |
256 // Forward declarations for functions defined at the bottom of this | 273 // Forward declarations for functions defined at the bottom of this |
257 // file. | 274 // file. |
258 | 275 |
259 static int text_yyinput (void); | 276 static int text_yyinput (void); |
283 static void gripe_matlab_incompatible (const std::string& msg); | 300 static void gripe_matlab_incompatible (const std::string& msg); |
284 static void maybe_gripe_matlab_incompatible_comment (char c); | 301 static void maybe_gripe_matlab_incompatible_comment (char c); |
285 static void gripe_matlab_incompatible_continuation (void); | 302 static void gripe_matlab_incompatible_continuation (void); |
286 static void gripe_matlab_incompatible_operator (const std::string& op); | 303 static void gripe_matlab_incompatible_operator (const std::string& op); |
287 static void display_token (int tok); | 304 static void display_token (int tok); |
305 static void lexer_debug (const char *pattern, const char *text); | |
288 | 306 |
289 %} | 307 %} |
290 | 308 |
291 D [0-9] | 309 D [0-9] |
292 S [ \t] | 310 S [ \t] |
306 EXPON ([DdEe][+-]?{D}+) | 324 EXPON ([DdEe][+-]?{D}+) |
307 NUMBER (({D}+\.?{D}*{EXPON}?)|(\.{D}+{EXPON}?)|(0[xX][0-9a-fA-F]+)) | 325 NUMBER (({D}+\.?{D}*{EXPON}?)|(\.{D}+{EXPON}?)|(0[xX][0-9a-fA-F]+)) |
308 %% | 326 %% |
309 | 327 |
310 <SCRIPT_FILE_BEGIN>. { | 328 <SCRIPT_FILE_BEGIN>. { |
329 LEXER_DEBUG ("<SCRIPT_FILE_BEGIN>."); | |
330 | |
311 BEGIN (INITIAL); | 331 BEGIN (INITIAL); |
312 xunput (yytext[0], yytext); | 332 xunput (yytext[0], yytext); |
313 COUNT_TOK_AND_RETURN (SCRIPT); | 333 COUNT_TOK_AND_RETURN (SCRIPT); |
314 } | 334 } |
315 | 335 |
316 <NESTED_FUNCTION_END>. { | 336 <NESTED_FUNCTION_END>. { |
337 LEXER_DEBUG ("<NESTED_FUNCTION_END>."); | |
338 | |
317 BEGIN (NESTED_FUNCTION_BEGIN); | 339 BEGIN (NESTED_FUNCTION_BEGIN); |
318 xunput (yytext[0], yytext); | 340 xunput (yytext[0], yytext); |
319 COUNT_TOK_AND_RETURN (';'); | 341 COUNT_TOK_AND_RETURN (';'); |
320 } | 342 } |
321 | 343 |
322 <NESTED_FUNCTION_BEGIN>. { | 344 <NESTED_FUNCTION_BEGIN>. { |
345 LEXER_DEBUG ("<NESTED_FUNCTION_BEGIN>."); | |
346 | |
323 BEGIN (INITIAL); | 347 BEGIN (INITIAL); |
324 xunput (yytext[0], yytext); | 348 xunput (yytext[0], yytext); |
325 prep_for_nested_function (); | 349 prep_for_nested_function (); |
326 COUNT_TOK_AND_RETURN (FCN); | 350 COUNT_TOK_AND_RETURN (FCN); |
327 } | 351 } |
331 // stuff needs to be simplified. May require some changes in the | 355 // stuff needs to be simplified. May require some changes in the |
332 // parser too. | 356 // parser too. |
333 %} | 357 %} |
334 | 358 |
335 <COMMAND_START>{NL} { | 359 <COMMAND_START>{NL} { |
360 LEXER_DEBUG ("<COMMAND_START>{NL}"); | |
361 | |
336 BEGIN (INITIAL); | 362 BEGIN (INITIAL); |
337 input_line_number++; | 363 input_line_number++; |
338 current_input_column = 1; | 364 current_input_column = 1; |
339 lexer_flags.quote_is_transpose = false; | 365 lexer_flags.quote_is_transpose = false; |
340 lexer_flags.convert_spaces_to_comma = true; | 366 lexer_flags.convert_spaces_to_comma = true; |
341 lexer_flags.doing_rawcommand = false; | 367 lexer_flags.doing_rawcommand = false; |
342 COUNT_TOK_AND_RETURN ('\n'); | 368 COUNT_TOK_AND_RETURN ('\n'); |
343 } | 369 } |
344 | 370 |
345 <COMMAND_START>[\;\,] { | 371 <COMMAND_START>[\;\,] { |
372 LEXER_DEBUG ("<COMMAND_START>[\\;\\,]"); | |
373 | |
346 if (lexer_flags.doing_rawcommand) | 374 if (lexer_flags.doing_rawcommand) |
347 TOK_PUSH_AND_RETURN (yytext, SQ_STRING); | 375 TOK_PUSH_AND_RETURN (yytext, SQ_STRING); |
348 | 376 |
349 BEGIN (INITIAL); | 377 BEGIN (INITIAL); |
350 | 378 |
353 else | 381 else |
354 TOK_RETURN (';'); | 382 TOK_RETURN (';'); |
355 } | 383 } |
356 | 384 |
357 <COMMAND_START>[\"\'] { | 385 <COMMAND_START>[\"\'] { |
386 LEXER_DEBUG ("<COMMAND_START>[\\\"\\']"); | |
387 | |
358 current_input_column++; | 388 current_input_column++; |
359 int tok = handle_string (yytext[0], true); | 389 int tok = handle_string (yytext[0], true); |
360 COUNT_TOK_AND_RETURN (tok); | 390 COUNT_TOK_AND_RETURN (tok); |
361 } | 391 } |
362 | 392 |
363 <COMMAND_START>[^#% \t\r\n\;\,\"\'][^ \t\r\n\;\,]*{S}* { | 393 <COMMAND_START>[^#% \t\r\n\;\,\"\'][^ \t\r\n\;\,]*{S}* { |
394 LEXER_DEBUG ("<COMMAND_START>[^#% \\t\\r\\n\\;\\,\\\"\\'][^ \\t\\r\\n\\;\\,]*{S}*"); | |
395 | |
364 std::string tok = strip_trailing_whitespace (yytext); | 396 std::string tok = strip_trailing_whitespace (yytext); |
365 TOK_PUSH_AND_RETURN (tok, SQ_STRING); | 397 TOK_PUSH_AND_RETURN (tok, SQ_STRING); |
366 } | 398 } |
367 | 399 |
368 %{ | 400 %{ |
378 | 410 |
379 // FIXME -- we need to handle block comments here. | 411 // FIXME -- we need to handle block comments here. |
380 %} | 412 %} |
381 | 413 |
382 <MATRIX_START>{SNLCMT}*\]{S}* { | 414 <MATRIX_START>{SNLCMT}*\]{S}* { |
415 LEXER_DEBUG ("<MATRIX_START>{SNLCMT}*\\]{S}*"); | |
416 | |
383 scan_for_comments (yytext); | 417 scan_for_comments (yytext); |
384 fixup_column_count (yytext); | 418 fixup_column_count (yytext); |
385 int c = yytext[yyleng-1]; | 419 int c = yytext[yyleng-1]; |
386 int cont_is_spc = eat_continuation (); | 420 int cont_is_spc = eat_continuation (); |
387 bool spc_gobbled = (cont_is_spc || c == ' ' || c == '\t'); | 421 bool spc_gobbled = (cont_is_spc || c == ' ' || c == '\t'); |
394 %{ | 428 %{ |
395 // FIXME -- we need to handle block comments here. | 429 // FIXME -- we need to handle block comments here. |
396 %} | 430 %} |
397 | 431 |
398 <MATRIX_START>{SNLCMT}*\}{S}* { | 432 <MATRIX_START>{SNLCMT}*\}{S}* { |
433 LEXER_DEBUG ("<MATRIX_START>{SNLCMT}*\\}{S}*"); | |
434 | |
399 scan_for_comments (yytext); | 435 scan_for_comments (yytext); |
400 fixup_column_count (yytext); | 436 fixup_column_count (yytext); |
401 int c = yytext[yyleng-1]; | 437 int c = yytext[yyleng-1]; |
402 int cont_is_spc = eat_continuation (); | 438 int cont_is_spc = eat_continuation (); |
403 bool spc_gobbled = (cont_is_spc || c == ' ' || c == '\t'); | 439 bool spc_gobbled = (cont_is_spc || c == ' ' || c == '\t'); |
412 // check for continuations here we can end up inserting too many | 448 // check for continuations here we can end up inserting too many |
413 // commas. | 449 // commas. |
414 %} | 450 %} |
415 | 451 |
416 <MATRIX_START>{S}*\,{S}* { | 452 <MATRIX_START>{S}*\,{S}* { |
453 LEXER_DEBUG ("<MATRIX_START>{S}*\\,{S}*"); | |
454 | |
417 current_input_column += yyleng; | 455 current_input_column += yyleng; |
418 | 456 |
419 int tmp = eat_continuation (); | 457 int tmp = eat_continuation (); |
420 | 458 |
421 lexer_flags.quote_is_transpose = false; | 459 lexer_flags.quote_is_transpose = false; |
437 // constants so we just eat them. If we don't check for continuations | 475 // constants so we just eat them. If we don't check for continuations |
438 // here we can end up inserting too many commas. | 476 // here we can end up inserting too many commas. |
439 %} | 477 %} |
440 | 478 |
441 <MATRIX_START>{S}+ { | 479 <MATRIX_START>{S}+ { |
480 LEXER_DEBUG ("<MATRIX_START>{S}+"); | |
481 | |
442 current_input_column += yyleng; | 482 current_input_column += yyleng; |
443 | 483 |
444 int tmp = eat_continuation (); | 484 int tmp = eat_continuation (); |
445 bool bin_op = next_token_is_bin_op (true); | 485 bool bin_op = next_token_is_bin_op (true); |
446 bool postfix_un_op = next_token_is_postfix_unary_op (true); | 486 bool postfix_un_op = next_token_is_postfix_unary_op (true); |
473 | 513 |
474 // FIXME -- we need to handle block comments here. | 514 // FIXME -- we need to handle block comments here. |
475 %} | 515 %} |
476 | 516 |
477 <MATRIX_START>{SNLCMT}*;{SNLCMT}* { | 517 <MATRIX_START>{SNLCMT}*;{SNLCMT}* { |
518 LEXER_DEBUG ("<MATRIX_START>{SNLCMT}*;{SNLCMT}*"); | |
519 | |
478 scan_for_comments (yytext); | 520 scan_for_comments (yytext); |
479 fixup_column_count (yytext); | 521 fixup_column_count (yytext); |
480 eat_whitespace (); | 522 eat_whitespace (); |
481 lexer_flags.quote_is_transpose = false; | 523 lexer_flags.quote_is_transpose = false; |
482 lexer_flags.convert_spaces_to_comma = true; | 524 lexer_flags.convert_spaces_to_comma = true; |
491 // FIXME -- we need to handle block comments here. | 533 // FIXME -- we need to handle block comments here. |
492 %} | 534 %} |
493 | 535 |
494 <MATRIX_START>{S}*{COMMENT}{SNLCMT}* | | 536 <MATRIX_START>{S}*{COMMENT}{SNLCMT}* | |
495 <MATRIX_START>{S}*{NL}{SNLCMT}* { | 537 <MATRIX_START>{S}*{NL}{SNLCMT}* { |
538 LEXER_DEBUG ("<MATRIX_START>{S}*{COMMENT}{SNLCMT}*|<MATRIX_START>{S}*{NL}{SNLCMT}*"); | |
539 | |
496 scan_for_comments (yytext); | 540 scan_for_comments (yytext); |
497 fixup_column_count (yytext); | 541 fixup_column_count (yytext); |
498 eat_whitespace (); | 542 eat_whitespace (); |
499 | 543 |
500 lexer_flags.quote_is_transpose = false; | 544 lexer_flags.quote_is_transpose = false; |
510 COUNT_TOK_AND_RETURN (';'); | 554 COUNT_TOK_AND_RETURN (';'); |
511 } | 555 } |
512 } | 556 } |
513 | 557 |
514 \[{S}* { | 558 \[{S}* { |
559 LEXER_DEBUG ("\\[{S}*"); | |
560 | |
515 nesting_level.bracket (); | 561 nesting_level.bracket (); |
516 | 562 |
517 current_input_column += yyleng; | 563 current_input_column += yyleng; |
518 lexer_flags.quote_is_transpose = false; | 564 lexer_flags.quote_is_transpose = false; |
519 lexer_flags.convert_spaces_to_comma = true; | 565 lexer_flags.convert_spaces_to_comma = true; |
530 BEGIN (MATRIX_START); | 576 BEGIN (MATRIX_START); |
531 COUNT_TOK_AND_RETURN ('['); | 577 COUNT_TOK_AND_RETURN ('['); |
532 } | 578 } |
533 | 579 |
534 \] { | 580 \] { |
581 LEXER_DEBUG ("\\]"); | |
582 | |
535 nesting_level.remove (); | 583 nesting_level.remove (); |
536 | 584 |
537 TOK_RETURN (']'); | 585 TOK_RETURN (']'); |
538 } | 586 } |
539 | 587 |
540 %{ | 588 %{ |
541 // Imaginary numbers. | 589 // Imaginary numbers. |
542 %} | 590 %} |
543 | 591 |
544 {NUMBER}{Im} { | 592 {NUMBER}{Im} { |
593 LEXER_DEBUG ("{NUMBER}{Im}"); | |
594 | |
545 handle_number (); | 595 handle_number (); |
546 COUNT_TOK_AND_RETURN (IMAG_NUM); | 596 COUNT_TOK_AND_RETURN (IMAG_NUM); |
547 } | 597 } |
548 | 598 |
549 %{ | 599 %{ |
550 // Real numbers. Don't grab the `.' part of a dot operator as part of | 600 // Real numbers. Don't grab the `.' part of a dot operator as part of |
551 // the constant. | 601 // the constant. |
552 %} | 602 %} |
553 | 603 |
554 {D}+/\.[\*/\\^'] | | 604 {D}+/\.[\*/\\^\'] | |
555 {NUMBER} { | 605 {NUMBER} { |
606 LEXER_DEBUG ("{D}+/\\.[\\*/\\^\\']|{NUMBER}"); | |
556 handle_number (); | 607 handle_number (); |
557 COUNT_TOK_AND_RETURN (NUM); | 608 COUNT_TOK_AND_RETURN (NUM); |
558 } | 609 } |
559 | 610 |
560 %{ | 611 %{ |
570 // Continuation lines. Allow comments after continuations. | 621 // Continuation lines. Allow comments after continuations. |
571 %} | 622 %} |
572 | 623 |
573 {CONT}{S}*{NL} | | 624 {CONT}{S}*{NL} | |
574 {CONT}{S}*{COMMENT} { | 625 {CONT}{S}*{COMMENT} { |
626 LEXER_DEBUG ("{CONT}{S}*{NL}|{CONT}{S}*{COMMENT}"); | |
627 | |
575 if (yytext[0] == '\\') | 628 if (yytext[0] == '\\') |
576 gripe_matlab_incompatible_continuation (); | 629 gripe_matlab_incompatible_continuation (); |
577 scan_for_comments (yytext); | 630 scan_for_comments (yytext); |
578 promptflag--; | 631 promptflag--; |
579 input_line_number++; | 632 input_line_number++; |
583 %{ | 636 %{ |
584 // End of file. | 637 // End of file. |
585 %} | 638 %} |
586 | 639 |
587 <<EOF>> { | 640 <<EOF>> { |
641 LEXER_DEBUG ("<<EOF>>"); | |
642 | |
588 if (block_comment_nesting_level != 0) | 643 if (block_comment_nesting_level != 0) |
589 { | 644 { |
590 warning ("block comment open at end of input"); | 645 warning ("block comment open at end of input"); |
591 | 646 |
592 if ((reading_fcn_file || reading_script_file) | 647 if ((reading_fcn_file || reading_script_file) |
602 // Identifiers. Truncate the token at the first space or tab but | 657 // Identifiers. Truncate the token at the first space or tab but |
603 // don't write directly on yytext. | 658 // don't write directly on yytext. |
604 %} | 659 %} |
605 | 660 |
606 {IDENT}{S}* { | 661 {IDENT}{S}* { |
662 LEXER_DEBUG ("{IDENT}{S}*"); | |
663 | |
607 int id_tok = handle_identifier (); | 664 int id_tok = handle_identifier (); |
608 | 665 |
609 if (id_tok >= 0) | 666 if (id_tok >= 0) |
610 COUNT_TOK_AND_RETURN (id_tok); | 667 COUNT_TOK_AND_RETURN (id_tok); |
611 } | 668 } |
613 %{ | 670 %{ |
614 // Function handles. | 671 // Function handles. |
615 %} | 672 %} |
616 | 673 |
617 "@" { | 674 "@" { |
675 LEXER_DEBUG ("@"); | |
676 | |
618 current_input_column++; | 677 current_input_column++; |
619 lexer_flags.quote_is_transpose = false; | 678 lexer_flags.quote_is_transpose = false; |
620 lexer_flags.convert_spaces_to_comma = false; | 679 lexer_flags.convert_spaces_to_comma = false; |
621 lexer_flags.looking_at_function_handle++; | 680 lexer_flags.looking_at_function_handle++; |
622 COUNT_TOK_AND_RETURN ('@'); | 681 COUNT_TOK_AND_RETURN ('@'); |
627 // are handled by the <MATRIX_START> start state code above. If closest | 686 // are handled by the <MATRIX_START> start state code above. If closest |
628 // nesting is inside parentheses, don't return a row separator. | 687 // nesting is inside parentheses, don't return a row separator. |
629 %} | 688 %} |
630 | 689 |
631 {NL} { | 690 {NL} { |
691 LEXER_DEBUG ("{NL}"); | |
692 | |
632 input_line_number++; | 693 input_line_number++; |
633 current_input_column = 1; | 694 current_input_column = 1; |
634 lexer_flags.quote_is_transpose = false; | 695 lexer_flags.quote_is_transpose = false; |
635 lexer_flags.convert_spaces_to_comma = true; | 696 lexer_flags.convert_spaces_to_comma = true; |
636 if (nesting_level.none ()) | 697 if (nesting_level.none ()) |
645 // Single quote can either be the beginning of a string or a transpose | 706 // Single quote can either be the beginning of a string or a transpose |
646 // operator. | 707 // operator. |
647 %} | 708 %} |
648 | 709 |
649 "'" { | 710 "'" { |
711 LEXER_DEBUG ("'"); | |
712 | |
650 current_input_column++; | 713 current_input_column++; |
651 lexer_flags.convert_spaces_to_comma = true; | 714 lexer_flags.convert_spaces_to_comma = true; |
652 | 715 |
653 if (lexer_flags.quote_is_transpose) | 716 if (lexer_flags.quote_is_transpose) |
654 { | 717 { |
665 %{ | 728 %{ |
666 // Double quotes always begin strings. | 729 // Double quotes always begin strings. |
667 %} | 730 %} |
668 | 731 |
669 \" { | 732 \" { |
733 LEXER_DEBUG ("\""); | |
734 | |
670 current_input_column++; | 735 current_input_column++; |
671 int tok = handle_string ('"'); | 736 int tok = handle_string ('"'); |
672 COUNT_TOK_AND_RETURN (tok); | 737 COUNT_TOK_AND_RETURN (tok); |
673 } | 738 } |
674 | 739 |
676 // Gobble comments. If closest nesting is inside parentheses, don't | 741 // Gobble comments. If closest nesting is inside parentheses, don't |
677 // return a new line. | 742 // return a new line. |
678 %} | 743 %} |
679 | 744 |
680 {CCHAR} { | 745 {CCHAR} { |
746 LEXER_DEBUG ("{CCHAR}"); | |
747 | |
681 xunput (yytext[0], yytext); | 748 xunput (yytext[0], yytext); |
682 | 749 |
683 bool eof = false; | 750 bool eof = false; |
684 int tok = process_comment (false, eof); | 751 int tok = process_comment (false, eof); |
685 | 752 |
692 %{ | 759 %{ |
693 // Block comments. | 760 // Block comments. |
694 %} | 761 %} |
695 | 762 |
696 ^{S}*{CCHAR}\{{S}*{NL} { | 763 ^{S}*{CCHAR}\{{S}*{NL} { |
764 LEXER_DEBUG ("^{S}*{CCHAR}\\{{S}*{NL}"); | |
765 | |
697 input_line_number++; | 766 input_line_number++; |
698 current_input_column = 1; | 767 current_input_column = 1; |
699 block_comment_nesting_level++; | 768 block_comment_nesting_level++; |
700 promptflag--; | 769 promptflag--; |
701 bool eof = false; | 770 bool eof = false; |
704 | 773 |
705 %{ | 774 %{ |
706 // Other operators. | 775 // Other operators. |
707 %} | 776 %} |
708 | 777 |
709 ":" { BIN_OP_RETURN (':', false); } | 778 ":" { LEXER_DEBUG (":"); BIN_OP_RETURN (':', false); } |
710 | 779 |
711 ".+" { XBIN_OP_RETURN (EPLUS, false); } | 780 ".+" { LEXER_DEBUG (".+"); XBIN_OP_RETURN (EPLUS, false); } |
712 ".-" { XBIN_OP_RETURN (EMINUS, false); } | 781 ".-" { LEXER_DEBUG (".-"); XBIN_OP_RETURN (EMINUS, false); } |
713 ".*" { BIN_OP_RETURN (EMUL, false); } | 782 ".*" { LEXER_DEBUG (".*"); BIN_OP_RETURN (EMUL, false); } |
714 "./" { BIN_OP_RETURN (EDIV, false); } | 783 "./" { LEXER_DEBUG ("./"); BIN_OP_RETURN (EDIV, false); } |
715 ".\\" { BIN_OP_RETURN (ELEFTDIV, false); } | 784 ".\\" { LEXER_DEBUG (".\\"); BIN_OP_RETURN (ELEFTDIV, false); } |
716 ".^" { BIN_OP_RETURN (EPOW, false); } | 785 ".^" { LEXER_DEBUG (".^"); BIN_OP_RETURN (EPOW, false); } |
717 ".**" { XBIN_OP_RETURN (EPOW, false); } | 786 ".**" { LEXER_DEBUG (".**"); XBIN_OP_RETURN (EPOW, false); } |
718 ".'" { do_comma_insert_check (); BIN_OP_RETURN (TRANSPOSE, true); } | 787 ".'" { LEXER_DEBUG (".'"); do_comma_insert_check (); BIN_OP_RETURN (TRANSPOSE, true); } |
719 "++" { do_comma_insert_check (); XBIN_OP_RETURN (PLUS_PLUS, true); } | 788 "++" { LEXER_DEBUG ("++"); do_comma_insert_check (); XBIN_OP_RETURN (PLUS_PLUS, true); } |
720 "--" { do_comma_insert_check (); XBIN_OP_RETURN (MINUS_MINUS, true); } | 789 "--" { LEXER_DEBUG ("--"); do_comma_insert_check (); XBIN_OP_RETURN (MINUS_MINUS, true); } |
721 "<=" { BIN_OP_RETURN (EXPR_LE, false); } | 790 "<=" { LEXER_DEBUG ("<="); BIN_OP_RETURN (EXPR_LE, false); } |
722 "==" { BIN_OP_RETURN (EXPR_EQ, false); } | 791 "==" { LEXER_DEBUG ("=="); BIN_OP_RETURN (EXPR_EQ, false); } |
723 "~=" { BIN_OP_RETURN (EXPR_NE, false); } | 792 "~=" { LEXER_DEBUG ("~="); BIN_OP_RETURN (EXPR_NE, false); } |
724 "!=" { XBIN_OP_RETURN (EXPR_NE, false); } | 793 "!=" { LEXER_DEBUG ("!="); XBIN_OP_RETURN (EXPR_NE, false); } |
725 ">=" { BIN_OP_RETURN (EXPR_GE, false); } | 794 ">=" { LEXER_DEBUG (">="); BIN_OP_RETURN (EXPR_GE, false); } |
726 "&" { BIN_OP_RETURN (EXPR_AND, false); } | 795 "&" { LEXER_DEBUG ("&"); BIN_OP_RETURN (EXPR_AND, false); } |
727 "|" { BIN_OP_RETURN (EXPR_OR, false); } | 796 "|" { LEXER_DEBUG ("|"); BIN_OP_RETURN (EXPR_OR, false); } |
728 "<" { BIN_OP_RETURN (EXPR_LT, false); } | 797 "<" { LEXER_DEBUG ("<"); BIN_OP_RETURN (EXPR_LT, false); } |
729 ">" { BIN_OP_RETURN (EXPR_GT, false); } | 798 ">" { LEXER_DEBUG (">"); BIN_OP_RETURN (EXPR_GT, false); } |
730 "+" { BIN_OP_RETURN ('+', false); } | 799 "+" { LEXER_DEBUG ("+"); BIN_OP_RETURN ('+', false); } |
731 "-" { BIN_OP_RETURN ('-', false); } | 800 "-" { LEXER_DEBUG ("-"); BIN_OP_RETURN ('-', false); } |
732 "*" { BIN_OP_RETURN ('*', false); } | 801 "*" { LEXER_DEBUG ("*"); BIN_OP_RETURN ('*', false); } |
733 "/" { BIN_OP_RETURN ('/', false); } | 802 "/" { LEXER_DEBUG ("/"); BIN_OP_RETURN ('/', false); } |
734 "\\" { BIN_OP_RETURN (LEFTDIV, false); } | 803 "\\" { LEXER_DEBUG ("\\"); BIN_OP_RETURN (LEFTDIV, false); } |
735 ";" { BIN_OP_RETURN (';', true); } | 804 ";" { LEXER_DEBUG (";"); BIN_OP_RETURN (';', true); } |
736 "," { BIN_OP_RETURN (',', true); } | 805 "," { LEXER_DEBUG (","); BIN_OP_RETURN (',', true); } |
737 "^" { BIN_OP_RETURN (POW, false); } | 806 "^" { LEXER_DEBUG ("^"); BIN_OP_RETURN (POW, false); } |
738 "**" { XBIN_OP_RETURN (POW, false); } | 807 "**" { LEXER_DEBUG ("**"); XBIN_OP_RETURN (POW, false); } |
739 "=" { BIN_OP_RETURN ('=', true); } | 808 "=" { LEXER_DEBUG ("="); BIN_OP_RETURN ('=', true); } |
740 "&&" { BIN_OP_RETURN (EXPR_AND_AND, false); } | 809 "&&" { LEXER_DEBUG ("&&"); BIN_OP_RETURN (EXPR_AND_AND, false); } |
741 "||" { BIN_OP_RETURN (EXPR_OR_OR, false); } | 810 "||" { LEXER_DEBUG ("||"); BIN_OP_RETURN (EXPR_OR_OR, false); } |
742 "<<" { XBIN_OP_RETURN (LSHIFT, false); } | 811 "<<" { LEXER_DEBUG ("<<"); XBIN_OP_RETURN (LSHIFT, false); } |
743 ">>" { XBIN_OP_RETURN (RSHIFT, false); } | 812 ">>" { LEXER_DEBUG (">>"); XBIN_OP_RETURN (RSHIFT, false); } |
744 | 813 |
745 {NOT} { | 814 {NOT} { |
815 LEXER_DEBUG ("{NOT}"); | |
816 | |
746 if (yytext[0] == '~') | 817 if (yytext[0] == '~') |
747 BIN_OP_RETURN (EXPR_NOT, false); | 818 BIN_OP_RETURN (EXPR_NOT, false); |
748 else | 819 else |
749 XBIN_OP_RETURN (EXPR_NOT, false); | 820 XBIN_OP_RETURN (EXPR_NOT, false); |
750 } | 821 } |
751 | 822 |
752 "(" { | 823 "(" { |
824 LEXER_DEBUG ("("); | |
825 | |
753 lexer_flags.looking_at_indirect_ref = false; | 826 lexer_flags.looking_at_indirect_ref = false; |
754 nesting_level.paren (); | 827 nesting_level.paren (); |
755 promptflag--; | 828 promptflag--; |
756 TOK_RETURN ('('); | 829 TOK_RETURN ('('); |
757 } | 830 } |
758 | 831 |
759 ")" { | 832 ")" { |
833 LEXER_DEBUG (")"); | |
834 | |
760 nesting_level.remove (); | 835 nesting_level.remove (); |
761 current_input_column++; | 836 current_input_column++; |
762 lexer_flags.quote_is_transpose = true; | 837 lexer_flags.quote_is_transpose = true; |
763 lexer_flags.convert_spaces_to_comma = nesting_level.is_bracket_or_brace (); | 838 lexer_flags.convert_spaces_to_comma = nesting_level.is_bracket_or_brace (); |
764 do_comma_insert_check (); | 839 do_comma_insert_check (); |
765 COUNT_TOK_AND_RETURN (')'); | 840 COUNT_TOK_AND_RETURN (')'); |
766 } | 841 } |
767 | 842 |
768 "." { TOK_RETURN ('.'); } | 843 "." { LEXER_DEBUG ("."); TOK_RETURN ('.'); } |
769 | 844 |
770 "+=" { XBIN_OP_RETURN (ADD_EQ, false); } | 845 "+=" { LEXER_DEBUG ("+="); XBIN_OP_RETURN (ADD_EQ, false); } |
771 "-=" { XBIN_OP_RETURN (SUB_EQ, false); } | 846 "-=" { LEXER_DEBUG ("-="); XBIN_OP_RETURN (SUB_EQ, false); } |
772 "*=" { XBIN_OP_RETURN (MUL_EQ, false); } | 847 "*=" { LEXER_DEBUG ("*="); XBIN_OP_RETURN (MUL_EQ, false); } |
773 "/=" { XBIN_OP_RETURN (DIV_EQ, false); } | 848 "/=" { LEXER_DEBUG ("/="); XBIN_OP_RETURN (DIV_EQ, false); } |
774 "\\=" { XBIN_OP_RETURN (LEFTDIV_EQ, false); } | 849 "\\=" { LEXER_DEBUG ("\\="); XBIN_OP_RETURN (LEFTDIV_EQ, false); } |
775 ".+=" { XBIN_OP_RETURN (ADD_EQ, false); } | 850 ".+=" { LEXER_DEBUG (".+="); XBIN_OP_RETURN (ADD_EQ, false); } |
776 ".-=" { XBIN_OP_RETURN (SUB_EQ, false); } | 851 ".-=" { LEXER_DEBUG (".-="); XBIN_OP_RETURN (SUB_EQ, false); } |
777 ".*=" { XBIN_OP_RETURN (EMUL_EQ, false); } | 852 ".*=" { LEXER_DEBUG (".*="); XBIN_OP_RETURN (EMUL_EQ, false); } |
778 "./=" { XBIN_OP_RETURN (EDIV_EQ, false); } | 853 "./=" { LEXER_DEBUG ("./="); XBIN_OP_RETURN (EDIV_EQ, false); } |
779 ".\\=" { XBIN_OP_RETURN (ELEFTDIV_EQ, false); } | 854 ".\\=" { LEXER_DEBUG (".\\="); XBIN_OP_RETURN (ELEFTDIV_EQ, false); } |
780 {POW}= { XBIN_OP_RETURN (POW_EQ, false); } | 855 {POW}= { LEXER_DEBUG ("{POW}="); XBIN_OP_RETURN (POW_EQ, false); } |
781 {EPOW}= { XBIN_OP_RETURN (EPOW_EQ, false); } | 856 {EPOW}= { LEXER_DEBUG ("{EPOW}="); XBIN_OP_RETURN (EPOW_EQ, false); } |
782 "&=" { XBIN_OP_RETURN (AND_EQ, false); } | 857 "&=" { LEXER_DEBUG ("&="); XBIN_OP_RETURN (AND_EQ, false); } |
783 "|=" { XBIN_OP_RETURN (OR_EQ, false); } | 858 "|=" { LEXER_DEBUG ("|="); XBIN_OP_RETURN (OR_EQ, false); } |
784 "<<=" { XBIN_OP_RETURN (LSHIFT_EQ, false); } | 859 "<<=" { LEXER_DEBUG ("<<="); XBIN_OP_RETURN (LSHIFT_EQ, false); } |
785 ">>=" { XBIN_OP_RETURN (RSHIFT_EQ, false); } | 860 ">>=" { LEXER_DEBUG (">>="); XBIN_OP_RETURN (RSHIFT_EQ, false); } |
786 | 861 |
787 \{{S}* { | 862 \{{S}* { |
863 LEXER_DEBUG ("\\{{S}*"); | |
864 | |
788 nesting_level.brace (); | 865 nesting_level.brace (); |
789 | 866 |
790 current_input_column += yyleng; | 867 current_input_column += yyleng; |
791 lexer_flags.quote_is_transpose = false; | 868 lexer_flags.quote_is_transpose = false; |
792 lexer_flags.convert_spaces_to_comma = true; | 869 lexer_flags.convert_spaces_to_comma = true; |
798 BEGIN (MATRIX_START); | 875 BEGIN (MATRIX_START); |
799 COUNT_TOK_AND_RETURN ('{'); | 876 COUNT_TOK_AND_RETURN ('{'); |
800 } | 877 } |
801 | 878 |
802 "}" { | 879 "}" { |
880 LEXER_DEBUG ("}"); | |
881 | |
803 nesting_level.remove (); | 882 nesting_level.remove (); |
804 | 883 |
805 TOK_RETURN ('}'); | 884 TOK_RETURN ('}'); |
806 } | 885 } |
807 | 886 |
808 %{ | 887 %{ |
809 // Unrecognized input is a lexical error. | 888 // Unrecognized input is a lexical error. |
810 %} | 889 %} |
811 | 890 |
812 . { | 891 . { |
892 LEXER_DEBUG ("."); | |
893 | |
813 // EOF happens here if we are parsing nested functions. | 894 // EOF happens here if we are parsing nested functions. |
814 | 895 |
815 xunput (yytext[0], yytext); | 896 xunput (yytext[0], yytext); |
816 | 897 |
817 int c = text_yyinput (); | 898 int c = text_yyinput (); |
934 | 1015 |
935 return c; | 1016 return c; |
936 } | 1017 } |
937 | 1018 |
938 static void | 1019 static void |
1020 display_character (char c) | |
1021 { | |
1022 if (isgraph (c)) | |
1023 std::cerr << c; | |
1024 else | |
1025 switch (c) | |
1026 { | |
1027 case 0: | |
1028 std::cerr << "NUL"; | |
1029 break; | |
1030 | |
1031 case 1: | |
1032 std::cerr << "SOH"; | |
1033 break; | |
1034 | |
1035 case 2: | |
1036 std::cerr << "STX"; | |
1037 break; | |
1038 | |
1039 case 3: | |
1040 std::cerr << "ETX"; | |
1041 break; | |
1042 | |
1043 case 4: | |
1044 std::cerr << "EOT"; | |
1045 break; | |
1046 | |
1047 case 5: | |
1048 std::cerr << "ENQ"; | |
1049 break; | |
1050 | |
1051 case 6: | |
1052 std::cerr << "ACK"; | |
1053 break; | |
1054 | |
1055 case 7: | |
1056 std::cerr << "\\a"; | |
1057 break; | |
1058 | |
1059 case 8: | |
1060 std::cerr << "\\b"; | |
1061 break; | |
1062 | |
1063 case 9: | |
1064 std::cerr << "\\t"; | |
1065 break; | |
1066 | |
1067 case 10: | |
1068 std::cerr << "\\n"; | |
1069 break; | |
1070 | |
1071 case 11: | |
1072 std::cerr << "\\v"; | |
1073 break; | |
1074 | |
1075 case 12: | |
1076 std::cerr << "\\f"; | |
1077 break; | |
1078 | |
1079 case 13: | |
1080 std::cerr << "\\r"; | |
1081 break; | |
1082 | |
1083 case 14: | |
1084 std::cerr << "SO"; | |
1085 break; | |
1086 | |
1087 case 15: | |
1088 std::cerr << "SI"; | |
1089 break; | |
1090 | |
1091 case 16: | |
1092 std::cerr << "DLE"; | |
1093 break; | |
1094 | |
1095 case 17: | |
1096 std::cerr << "DC1"; | |
1097 break; | |
1098 | |
1099 case 18: | |
1100 std::cerr << "DC2"; | |
1101 break; | |
1102 | |
1103 case 19: | |
1104 std::cerr << "DC3"; | |
1105 break; | |
1106 | |
1107 case 20: | |
1108 std::cerr << "DC4"; | |
1109 break; | |
1110 | |
1111 case 21: | |
1112 std::cerr << "NAK"; | |
1113 break; | |
1114 | |
1115 case 22: | |
1116 std::cerr << "SYN"; | |
1117 break; | |
1118 | |
1119 case 23: | |
1120 std::cerr << "ETB"; | |
1121 break; | |
1122 | |
1123 case 24: | |
1124 std::cerr << "CAN"; | |
1125 break; | |
1126 | |
1127 case 25: | |
1128 std::cerr << "EM"; | |
1129 break; | |
1130 | |
1131 case 26: | |
1132 std::cerr << "SUB"; | |
1133 break; | |
1134 | |
1135 case 27: | |
1136 std::cerr << "ESC"; | |
1137 break; | |
1138 | |
1139 case 28: | |
1140 std::cerr << "FS"; | |
1141 break; | |
1142 | |
1143 case 29: | |
1144 std::cerr << "GS"; | |
1145 break; | |
1146 | |
1147 case 30: | |
1148 std::cerr << "RS"; | |
1149 break; | |
1150 | |
1151 case 31: | |
1152 std::cerr << "US"; | |
1153 break; | |
1154 | |
1155 case 32: | |
1156 std::cerr << "SPACE"; | |
1157 break; | |
1158 | |
1159 case 127: | |
1160 std::cerr << "DEL"; | |
1161 break; | |
1162 } | |
1163 } | |
1164 static void | |
939 xunput (char c, char *buf) | 1165 xunput (char c, char *buf) |
940 { | 1166 { |
1167 if (lexer_debug_flag) | |
1168 { | |
1169 std::cerr << "U: "; | |
1170 display_character (c); | |
1171 std::cerr << std::endl; | |
1172 } | |
1173 | |
941 if (c == '\n') | 1174 if (c == '\n') |
942 input_line_number--; | 1175 input_line_number--; |
943 | 1176 |
944 yyunput (c, buf); | 1177 yyunput (c, buf); |
945 } | 1178 } |
1503 // initially looking at a block comment. | 1736 // initially looking at a block comment. |
1504 | 1737 |
1505 std::string txt = start_in_block | 1738 std::string txt = start_in_block |
1506 ? grab_block_comment (flex_reader, eof) | 1739 ? grab_block_comment (flex_reader, eof) |
1507 : grab_comment_block (flex_reader, false, eof); | 1740 : grab_comment_block (flex_reader, false, eof); |
1741 | |
1742 if (lexer_debug_flag) | |
1743 std::cerr << "C: " txt << std::endl; | |
1508 | 1744 |
1509 if (help_txt.empty () && nesting_level.none ()) | 1745 if (help_txt.empty () && nesting_level.none ()) |
1510 { | 1746 { |
1511 if (! help_buf.empty ()) | 1747 if (! help_buf.empty ()) |
1512 help_buf.pop (); | 1748 help_buf.pop (); |
2825 } | 3061 } |
2826 break; | 3062 break; |
2827 } | 3063 } |
2828 } | 3064 } |
2829 | 3065 |
3066 static void | |
3067 display_state (void) | |
3068 { | |
3069 std::cerr << "S: "; | |
3070 | |
3071 switch (YY_START) | |
3072 { | |
3073 case INITIAL: | |
3074 std::cerr << "INITIAL" << std::endl; | |
3075 break; | |
3076 | |
3077 case COMMAND_START: | |
3078 std::cerr << "COMMAND_START" << std::endl; | |
3079 break; | |
3080 | |
3081 case MATRIX_START: | |
3082 std::cerr << "MATRIX_START" << std::endl; | |
3083 break; | |
3084 | |
3085 case SCRIPT_FILE_BEGIN: | |
3086 std::cerr << "SCRIPT_FILE_BEGIN" << std::endl; | |
3087 break; | |
3088 | |
3089 case NESTED_FUNCTION_END: | |
3090 std::cerr << "NESTED_FUNCTION_END" << std::endl; | |
3091 break; | |
3092 | |
3093 case NESTED_FUNCTION_BEGIN: | |
3094 std::cerr << "NESTED_FUNCTION_BEGIN" << std::endl; | |
3095 break; | |
3096 | |
3097 default: | |
3098 std::cerr << "UNKNOWN START STATE!" << std::endl; | |
3099 break; | |
3100 } | |
3101 } | |
3102 | |
3103 static void | |
3104 lexer_debug (const char *pattern, const char *text) | |
3105 { | |
3106 std::cerr << std::endl; | |
3107 | |
3108 display_state (); | |
3109 | |
3110 std::cerr << "P: " << pattern << std::endl; | |
3111 std::cerr << "T: " << text << std::endl; | |
3112 } | |
3113 | |
2830 DEFUN (__display_tokens__, args, nargout, | 3114 DEFUN (__display_tokens__, args, nargout, |
2831 "-*- texinfo -*-\n\ | 3115 "-*- texinfo -*-\n\ |
2832 @deftypefn {Built-in Function} {} __display_tokens__\n\ | 3116 @deftypefn {Built-in Function} {} __display_tokens__\n\ |
2833 Query or set the internal variable that determines whether Octave's\n\ | 3117 Query or set the internal variable that determines whether Octave's\n\ |
2834 lexer displays tokens as they are read.\n\ | 3118 lexer displays tokens as they are read.\n\ |
2844 @end deftypefn") | 3128 @end deftypefn") |
2845 { | 3129 { |
2846 return octave_value (Vtoken_count); | 3130 return octave_value (Vtoken_count); |
2847 } | 3131 } |
2848 | 3132 |
3133 DEFUN (__lexer_debug_flag__, args, nargout, | |
3134 "Undocumented internal function.") | |
3135 { | |
3136 octave_value retval; | |
3137 | |
3138 retval = set_internal_variable (lexer_debug_flag, args, nargout, | |
3139 "__lexer_debug_flag__"); | |
3140 | |
3141 return retval; | |
3142 } | |
3143 | |
2849 /* | 3144 /* |
2850 ;;; Local Variables: *** | 3145 ;;; Local Variables: *** |
2851 ;;; mode: C++ *** | 3146 ;;; mode: C++ *** |
2852 ;;; End: *** | 3147 ;;; End: *** |
2853 */ | 3148 */ |