comparison src/lex.l @ 8535:75e6ab186761

lexer debugging functions
author John W. Eaton <jwe@octave.org>
date Mon, 19 Jan 2009 16:53:30 -0500
parents 0e0bd07e6ae2
children de1b944d5306
comparison
equal deleted inserted replaced
8534:0eb83938c8bc 8535:75e6ab186761
116 do \ 116 do \
117 { \ 117 { \
118 int tok_val = tok; \ 118 int tok_val = tok; \
119 if (Vdisplay_tokens) \ 119 if (Vdisplay_tokens) \
120 display_token (tok_val); \ 120 display_token (tok_val); \
121 if (lexer_debug_flag) \
122 { \
123 std::cerr << "R: "; \
124 display_token (tok_val); \
125 std::cerr << std::endl; \
126 } \
121 return tok_val; \ 127 return tok_val; \
122 } \ 128 } \
123 while (0) 129 while (0)
124 130
125 #define COUNT_TOK_AND_RETURN(tok) \ 131 #define COUNT_TOK_AND_RETURN(tok) \
168 gripe_matlab_incompatible_operator (yytext); \ 174 gripe_matlab_incompatible_operator (yytext); \
169 BIN_OP_RETURN (tok, convert); \ 175 BIN_OP_RETURN (tok, convert); \
170 } \ 176 } \
171 while (0) 177 while (0)
172 178
179 #define LEXER_DEBUG(pattern) \
180 do \
181 { \
182 if (lexer_debug_flag) \
183 lexer_debug (pattern, yytext); \
184 } \
185 while (0)
186
173 // TRUE means that we have encountered EOF on the input stream. 187 // TRUE means that we have encountered EOF on the input stream.
174 bool parser_end_of_input = false; 188 bool parser_end_of_input = false;
175 189
176 // Flags that need to be shared between the lexer and parser. 190 // Flags that need to be shared between the lexer and parser.
177 lexical_feedback lexer_flags; 191 lexical_feedback lexer_flags;
250 static unsigned int Vtoken_count = 0; 264 static unsigned int Vtoken_count = 0;
251 265
252 // The start state that was in effect when the beginning of a block 266 // The start state that was in effect when the beginning of a block
253 // comment was noticed. 267 // comment was noticed.
254 static int block_comment_nesting_level = 0; 268 static int block_comment_nesting_level = 0;
269
270 // Internal variable for lexer debugging state.
271 static bool lexer_debug_flag = false;
255 272
256 // Forward declarations for functions defined at the bottom of this 273 // Forward declarations for functions defined at the bottom of this
257 // file. 274 // file.
258 275
259 static int text_yyinput (void); 276 static int text_yyinput (void);
283 static void gripe_matlab_incompatible (const std::string& msg); 300 static void gripe_matlab_incompatible (const std::string& msg);
284 static void maybe_gripe_matlab_incompatible_comment (char c); 301 static void maybe_gripe_matlab_incompatible_comment (char c);
285 static void gripe_matlab_incompatible_continuation (void); 302 static void gripe_matlab_incompatible_continuation (void);
286 static void gripe_matlab_incompatible_operator (const std::string& op); 303 static void gripe_matlab_incompatible_operator (const std::string& op);
287 static void display_token (int tok); 304 static void display_token (int tok);
305 static void lexer_debug (const char *pattern, const char *text);
288 306
289 %} 307 %}
290 308
291 D [0-9] 309 D [0-9]
292 S [ \t] 310 S [ \t]
306 EXPON ([DdEe][+-]?{D}+) 324 EXPON ([DdEe][+-]?{D}+)
307 NUMBER (({D}+\.?{D}*{EXPON}?)|(\.{D}+{EXPON}?)|(0[xX][0-9a-fA-F]+)) 325 NUMBER (({D}+\.?{D}*{EXPON}?)|(\.{D}+{EXPON}?)|(0[xX][0-9a-fA-F]+))
308 %% 326 %%
309 327
310 <SCRIPT_FILE_BEGIN>. { 328 <SCRIPT_FILE_BEGIN>. {
329 LEXER_DEBUG ("<SCRIPT_FILE_BEGIN>.");
330
311 BEGIN (INITIAL); 331 BEGIN (INITIAL);
312 xunput (yytext[0], yytext); 332 xunput (yytext[0], yytext);
313 COUNT_TOK_AND_RETURN (SCRIPT); 333 COUNT_TOK_AND_RETURN (SCRIPT);
314 } 334 }
315 335
316 <NESTED_FUNCTION_END>. { 336 <NESTED_FUNCTION_END>. {
337 LEXER_DEBUG ("<NESTED_FUNCTION_END>.");
338
317 BEGIN (NESTED_FUNCTION_BEGIN); 339 BEGIN (NESTED_FUNCTION_BEGIN);
318 xunput (yytext[0], yytext); 340 xunput (yytext[0], yytext);
319 COUNT_TOK_AND_RETURN (';'); 341 COUNT_TOK_AND_RETURN (';');
320 } 342 }
321 343
322 <NESTED_FUNCTION_BEGIN>. { 344 <NESTED_FUNCTION_BEGIN>. {
345 LEXER_DEBUG ("<NESTED_FUNCTION_BEGIN>.");
346
323 BEGIN (INITIAL); 347 BEGIN (INITIAL);
324 xunput (yytext[0], yytext); 348 xunput (yytext[0], yytext);
325 prep_for_nested_function (); 349 prep_for_nested_function ();
326 COUNT_TOK_AND_RETURN (FCN); 350 COUNT_TOK_AND_RETURN (FCN);
327 } 351 }
331 // stuff needs to be simplified. May require some changes in the 355 // stuff needs to be simplified. May require some changes in the
332 // parser too. 356 // parser too.
333 %} 357 %}
334 358
335 <COMMAND_START>{NL} { 359 <COMMAND_START>{NL} {
360 LEXER_DEBUG ("<COMMAND_START>{NL}");
361
336 BEGIN (INITIAL); 362 BEGIN (INITIAL);
337 input_line_number++; 363 input_line_number++;
338 current_input_column = 1; 364 current_input_column = 1;
339 lexer_flags.quote_is_transpose = false; 365 lexer_flags.quote_is_transpose = false;
340 lexer_flags.convert_spaces_to_comma = true; 366 lexer_flags.convert_spaces_to_comma = true;
341 lexer_flags.doing_rawcommand = false; 367 lexer_flags.doing_rawcommand = false;
342 COUNT_TOK_AND_RETURN ('\n'); 368 COUNT_TOK_AND_RETURN ('\n');
343 } 369 }
344 370
345 <COMMAND_START>[\;\,] { 371 <COMMAND_START>[\;\,] {
372 LEXER_DEBUG ("<COMMAND_START>[\\;\\,]");
373
346 if (lexer_flags.doing_rawcommand) 374 if (lexer_flags.doing_rawcommand)
347 TOK_PUSH_AND_RETURN (yytext, SQ_STRING); 375 TOK_PUSH_AND_RETURN (yytext, SQ_STRING);
348 376
349 BEGIN (INITIAL); 377 BEGIN (INITIAL);
350 378
353 else 381 else
354 TOK_RETURN (';'); 382 TOK_RETURN (';');
355 } 383 }
356 384
357 <COMMAND_START>[\"\'] { 385 <COMMAND_START>[\"\'] {
386 LEXER_DEBUG ("<COMMAND_START>[\\\"\\']");
387
358 current_input_column++; 388 current_input_column++;
359 int tok = handle_string (yytext[0], true); 389 int tok = handle_string (yytext[0], true);
360 COUNT_TOK_AND_RETURN (tok); 390 COUNT_TOK_AND_RETURN (tok);
361 } 391 }
362 392
363 <COMMAND_START>[^#% \t\r\n\;\,\"\'][^ \t\r\n\;\,]*{S}* { 393 <COMMAND_START>[^#% \t\r\n\;\,\"\'][^ \t\r\n\;\,]*{S}* {
394 LEXER_DEBUG ("<COMMAND_START>[^#% \\t\\r\\n\\;\\,\\\"\\'][^ \\t\\r\\n\\;\\,]*{S}*");
395
364 std::string tok = strip_trailing_whitespace (yytext); 396 std::string tok = strip_trailing_whitespace (yytext);
365 TOK_PUSH_AND_RETURN (tok, SQ_STRING); 397 TOK_PUSH_AND_RETURN (tok, SQ_STRING);
366 } 398 }
367 399
368 %{ 400 %{
378 410
379 // FIXME -- we need to handle block comments here. 411 // FIXME -- we need to handle block comments here.
380 %} 412 %}
381 413
382 <MATRIX_START>{SNLCMT}*\]{S}* { 414 <MATRIX_START>{SNLCMT}*\]{S}* {
415 LEXER_DEBUG ("<MATRIX_START>{SNLCMT}*\\]{S}*");
416
383 scan_for_comments (yytext); 417 scan_for_comments (yytext);
384 fixup_column_count (yytext); 418 fixup_column_count (yytext);
385 int c = yytext[yyleng-1]; 419 int c = yytext[yyleng-1];
386 int cont_is_spc = eat_continuation (); 420 int cont_is_spc = eat_continuation ();
387 bool spc_gobbled = (cont_is_spc || c == ' ' || c == '\t'); 421 bool spc_gobbled = (cont_is_spc || c == ' ' || c == '\t');
394 %{ 428 %{
395 // FIXME -- we need to handle block comments here. 429 // FIXME -- we need to handle block comments here.
396 %} 430 %}
397 431
398 <MATRIX_START>{SNLCMT}*\}{S}* { 432 <MATRIX_START>{SNLCMT}*\}{S}* {
433 LEXER_DEBUG ("<MATRIX_START>{SNLCMT}*\\}{S}*");
434
399 scan_for_comments (yytext); 435 scan_for_comments (yytext);
400 fixup_column_count (yytext); 436 fixup_column_count (yytext);
401 int c = yytext[yyleng-1]; 437 int c = yytext[yyleng-1];
402 int cont_is_spc = eat_continuation (); 438 int cont_is_spc = eat_continuation ();
403 bool spc_gobbled = (cont_is_spc || c == ' ' || c == '\t'); 439 bool spc_gobbled = (cont_is_spc || c == ' ' || c == '\t');
412 // check for continuations here we can end up inserting too many 448 // check for continuations here we can end up inserting too many
413 // commas. 449 // commas.
414 %} 450 %}
415 451
416 <MATRIX_START>{S}*\,{S}* { 452 <MATRIX_START>{S}*\,{S}* {
453 LEXER_DEBUG ("<MATRIX_START>{S}*\\,{S}*");
454
417 current_input_column += yyleng; 455 current_input_column += yyleng;
418 456
419 int tmp = eat_continuation (); 457 int tmp = eat_continuation ();
420 458
421 lexer_flags.quote_is_transpose = false; 459 lexer_flags.quote_is_transpose = false;
437 // constants so we just eat them. If we don't check for continuations 475 // constants so we just eat them. If we don't check for continuations
438 // here we can end up inserting too many commas. 476 // here we can end up inserting too many commas.
439 %} 477 %}
440 478
441 <MATRIX_START>{S}+ { 479 <MATRIX_START>{S}+ {
480 LEXER_DEBUG ("<MATRIX_START>{S}+");
481
442 current_input_column += yyleng; 482 current_input_column += yyleng;
443 483
444 int tmp = eat_continuation (); 484 int tmp = eat_continuation ();
445 bool bin_op = next_token_is_bin_op (true); 485 bool bin_op = next_token_is_bin_op (true);
446 bool postfix_un_op = next_token_is_postfix_unary_op (true); 486 bool postfix_un_op = next_token_is_postfix_unary_op (true);
473 513
474 // FIXME -- we need to handle block comments here. 514 // FIXME -- we need to handle block comments here.
475 %} 515 %}
476 516
477 <MATRIX_START>{SNLCMT}*;{SNLCMT}* { 517 <MATRIX_START>{SNLCMT}*;{SNLCMT}* {
518 LEXER_DEBUG ("<MATRIX_START>{SNLCMT}*;{SNLCMT}*");
519
478 scan_for_comments (yytext); 520 scan_for_comments (yytext);
479 fixup_column_count (yytext); 521 fixup_column_count (yytext);
480 eat_whitespace (); 522 eat_whitespace ();
481 lexer_flags.quote_is_transpose = false; 523 lexer_flags.quote_is_transpose = false;
482 lexer_flags.convert_spaces_to_comma = true; 524 lexer_flags.convert_spaces_to_comma = true;
491 // FIXME -- we need to handle block comments here. 533 // FIXME -- we need to handle block comments here.
492 %} 534 %}
493 535
494 <MATRIX_START>{S}*{COMMENT}{SNLCMT}* | 536 <MATRIX_START>{S}*{COMMENT}{SNLCMT}* |
495 <MATRIX_START>{S}*{NL}{SNLCMT}* { 537 <MATRIX_START>{S}*{NL}{SNLCMT}* {
538 LEXER_DEBUG ("<MATRIX_START>{S}*{COMMENT}{SNLCMT}*|<MATRIX_START>{S}*{NL}{SNLCMT}*");
539
496 scan_for_comments (yytext); 540 scan_for_comments (yytext);
497 fixup_column_count (yytext); 541 fixup_column_count (yytext);
498 eat_whitespace (); 542 eat_whitespace ();
499 543
500 lexer_flags.quote_is_transpose = false; 544 lexer_flags.quote_is_transpose = false;
510 COUNT_TOK_AND_RETURN (';'); 554 COUNT_TOK_AND_RETURN (';');
511 } 555 }
512 } 556 }
513 557
514 \[{S}* { 558 \[{S}* {
559 LEXER_DEBUG ("\\[{S}*");
560
515 nesting_level.bracket (); 561 nesting_level.bracket ();
516 562
517 current_input_column += yyleng; 563 current_input_column += yyleng;
518 lexer_flags.quote_is_transpose = false; 564 lexer_flags.quote_is_transpose = false;
519 lexer_flags.convert_spaces_to_comma = true; 565 lexer_flags.convert_spaces_to_comma = true;
530 BEGIN (MATRIX_START); 576 BEGIN (MATRIX_START);
531 COUNT_TOK_AND_RETURN ('['); 577 COUNT_TOK_AND_RETURN ('[');
532 } 578 }
533 579
534 \] { 580 \] {
581 LEXER_DEBUG ("\\]");
582
535 nesting_level.remove (); 583 nesting_level.remove ();
536 584
537 TOK_RETURN (']'); 585 TOK_RETURN (']');
538 } 586 }
539 587
540 %{ 588 %{
541 // Imaginary numbers. 589 // Imaginary numbers.
542 %} 590 %}
543 591
544 {NUMBER}{Im} { 592 {NUMBER}{Im} {
593 LEXER_DEBUG ("{NUMBER}{Im}");
594
545 handle_number (); 595 handle_number ();
546 COUNT_TOK_AND_RETURN (IMAG_NUM); 596 COUNT_TOK_AND_RETURN (IMAG_NUM);
547 } 597 }
548 598
549 %{ 599 %{
550 // Real numbers. Don't grab the `.' part of a dot operator as part of 600 // Real numbers. Don't grab the `.' part of a dot operator as part of
551 // the constant. 601 // the constant.
552 %} 602 %}
553 603
554 {D}+/\.[\*/\\^'] | 604 {D}+/\.[\*/\\^\'] |
555 {NUMBER} { 605 {NUMBER} {
606 LEXER_DEBUG ("{D}+/\\.[\\*/\\^\\']|{NUMBER}");
556 handle_number (); 607 handle_number ();
557 COUNT_TOK_AND_RETURN (NUM); 608 COUNT_TOK_AND_RETURN (NUM);
558 } 609 }
559 610
560 %{ 611 %{
570 // Continuation lines. Allow comments after continuations. 621 // Continuation lines. Allow comments after continuations.
571 %} 622 %}
572 623
573 {CONT}{S}*{NL} | 624 {CONT}{S}*{NL} |
574 {CONT}{S}*{COMMENT} { 625 {CONT}{S}*{COMMENT} {
626 LEXER_DEBUG ("{CONT}{S}*{NL}|{CONT}{S}*{COMMENT}");
627
575 if (yytext[0] == '\\') 628 if (yytext[0] == '\\')
576 gripe_matlab_incompatible_continuation (); 629 gripe_matlab_incompatible_continuation ();
577 scan_for_comments (yytext); 630 scan_for_comments (yytext);
578 promptflag--; 631 promptflag--;
579 input_line_number++; 632 input_line_number++;
583 %{ 636 %{
584 // End of file. 637 // End of file.
585 %} 638 %}
586 639
587 <<EOF>> { 640 <<EOF>> {
641 LEXER_DEBUG ("<<EOF>>");
642
588 if (block_comment_nesting_level != 0) 643 if (block_comment_nesting_level != 0)
589 { 644 {
590 warning ("block comment open at end of input"); 645 warning ("block comment open at end of input");
591 646
592 if ((reading_fcn_file || reading_script_file) 647 if ((reading_fcn_file || reading_script_file)
602 // Identifiers. Truncate the token at the first space or tab but 657 // Identifiers. Truncate the token at the first space or tab but
603 // don't write directly on yytext. 658 // don't write directly on yytext.
604 %} 659 %}
605 660
606 {IDENT}{S}* { 661 {IDENT}{S}* {
662 LEXER_DEBUG ("{IDENT}{S}*");
663
607 int id_tok = handle_identifier (); 664 int id_tok = handle_identifier ();
608 665
609 if (id_tok >= 0) 666 if (id_tok >= 0)
610 COUNT_TOK_AND_RETURN (id_tok); 667 COUNT_TOK_AND_RETURN (id_tok);
611 } 668 }
613 %{ 670 %{
614 // Function handles. 671 // Function handles.
615 %} 672 %}
616 673
617 "@" { 674 "@" {
675 LEXER_DEBUG ("@");
676
618 current_input_column++; 677 current_input_column++;
619 lexer_flags.quote_is_transpose = false; 678 lexer_flags.quote_is_transpose = false;
620 lexer_flags.convert_spaces_to_comma = false; 679 lexer_flags.convert_spaces_to_comma = false;
621 lexer_flags.looking_at_function_handle++; 680 lexer_flags.looking_at_function_handle++;
622 COUNT_TOK_AND_RETURN ('@'); 681 COUNT_TOK_AND_RETURN ('@');
627 // are handled by the <MATRIX_START> start state code above. If closest 686 // are handled by the <MATRIX_START> start state code above. If closest
628 // nesting is inside parentheses, don't return a row separator. 687 // nesting is inside parentheses, don't return a row separator.
629 %} 688 %}
630 689
631 {NL} { 690 {NL} {
691 LEXER_DEBUG ("{NL}");
692
632 input_line_number++; 693 input_line_number++;
633 current_input_column = 1; 694 current_input_column = 1;
634 lexer_flags.quote_is_transpose = false; 695 lexer_flags.quote_is_transpose = false;
635 lexer_flags.convert_spaces_to_comma = true; 696 lexer_flags.convert_spaces_to_comma = true;
636 if (nesting_level.none ()) 697 if (nesting_level.none ())
645 // Single quote can either be the beginning of a string or a transpose 706 // Single quote can either be the beginning of a string or a transpose
646 // operator. 707 // operator.
647 %} 708 %}
648 709
649 "'" { 710 "'" {
711 LEXER_DEBUG ("'");
712
650 current_input_column++; 713 current_input_column++;
651 lexer_flags.convert_spaces_to_comma = true; 714 lexer_flags.convert_spaces_to_comma = true;
652 715
653 if (lexer_flags.quote_is_transpose) 716 if (lexer_flags.quote_is_transpose)
654 { 717 {
665 %{ 728 %{
666 // Double quotes always begin strings. 729 // Double quotes always begin strings.
667 %} 730 %}
668 731
669 \" { 732 \" {
733 LEXER_DEBUG ("\"");
734
670 current_input_column++; 735 current_input_column++;
671 int tok = handle_string ('"'); 736 int tok = handle_string ('"');
672 COUNT_TOK_AND_RETURN (tok); 737 COUNT_TOK_AND_RETURN (tok);
673 } 738 }
674 739
676 // Gobble comments. If closest nesting is inside parentheses, don't 741 // Gobble comments. If closest nesting is inside parentheses, don't
677 // return a new line. 742 // return a new line.
678 %} 743 %}
679 744
680 {CCHAR} { 745 {CCHAR} {
746 LEXER_DEBUG ("{CCHAR}");
747
681 xunput (yytext[0], yytext); 748 xunput (yytext[0], yytext);
682 749
683 bool eof = false; 750 bool eof = false;
684 int tok = process_comment (false, eof); 751 int tok = process_comment (false, eof);
685 752
692 %{ 759 %{
693 // Block comments. 760 // Block comments.
694 %} 761 %}
695 762
696 ^{S}*{CCHAR}\{{S}*{NL} { 763 ^{S}*{CCHAR}\{{S}*{NL} {
764 LEXER_DEBUG ("^{S}*{CCHAR}\\{{S}*{NL}");
765
697 input_line_number++; 766 input_line_number++;
698 current_input_column = 1; 767 current_input_column = 1;
699 block_comment_nesting_level++; 768 block_comment_nesting_level++;
700 promptflag--; 769 promptflag--;
701 bool eof = false; 770 bool eof = false;
704 773
705 %{ 774 %{
706 // Other operators. 775 // Other operators.
707 %} 776 %}
708 777
709 ":" { BIN_OP_RETURN (':', false); } 778 ":" { LEXER_DEBUG (":"); BIN_OP_RETURN (':', false); }
710 779
711 ".+" { XBIN_OP_RETURN (EPLUS, false); } 780 ".+" { LEXER_DEBUG (".+"); XBIN_OP_RETURN (EPLUS, false); }
712 ".-" { XBIN_OP_RETURN (EMINUS, false); } 781 ".-" { LEXER_DEBUG (".-"); XBIN_OP_RETURN (EMINUS, false); }
713 ".*" { BIN_OP_RETURN (EMUL, false); } 782 ".*" { LEXER_DEBUG (".*"); BIN_OP_RETURN (EMUL, false); }
714 "./" { BIN_OP_RETURN (EDIV, false); } 783 "./" { LEXER_DEBUG ("./"); BIN_OP_RETURN (EDIV, false); }
715 ".\\" { BIN_OP_RETURN (ELEFTDIV, false); } 784 ".\\" { LEXER_DEBUG (".\\"); BIN_OP_RETURN (ELEFTDIV, false); }
716 ".^" { BIN_OP_RETURN (EPOW, false); } 785 ".^" { LEXER_DEBUG (".^"); BIN_OP_RETURN (EPOW, false); }
717 ".**" { XBIN_OP_RETURN (EPOW, false); } 786 ".**" { LEXER_DEBUG (".**"); XBIN_OP_RETURN (EPOW, false); }
718 ".'" { do_comma_insert_check (); BIN_OP_RETURN (TRANSPOSE, true); } 787 ".'" { LEXER_DEBUG (".'"); do_comma_insert_check (); BIN_OP_RETURN (TRANSPOSE, true); }
719 "++" { do_comma_insert_check (); XBIN_OP_RETURN (PLUS_PLUS, true); } 788 "++" { LEXER_DEBUG ("++"); do_comma_insert_check (); XBIN_OP_RETURN (PLUS_PLUS, true); }
720 "--" { do_comma_insert_check (); XBIN_OP_RETURN (MINUS_MINUS, true); } 789 "--" { LEXER_DEBUG ("--"); do_comma_insert_check (); XBIN_OP_RETURN (MINUS_MINUS, true); }
721 "<=" { BIN_OP_RETURN (EXPR_LE, false); } 790 "<=" { LEXER_DEBUG ("<="); BIN_OP_RETURN (EXPR_LE, false); }
722 "==" { BIN_OP_RETURN (EXPR_EQ, false); } 791 "==" { LEXER_DEBUG ("=="); BIN_OP_RETURN (EXPR_EQ, false); }
723 "~=" { BIN_OP_RETURN (EXPR_NE, false); } 792 "~=" { LEXER_DEBUG ("~="); BIN_OP_RETURN (EXPR_NE, false); }
724 "!=" { XBIN_OP_RETURN (EXPR_NE, false); } 793 "!=" { LEXER_DEBUG ("!="); XBIN_OP_RETURN (EXPR_NE, false); }
725 ">=" { BIN_OP_RETURN (EXPR_GE, false); } 794 ">=" { LEXER_DEBUG (">="); BIN_OP_RETURN (EXPR_GE, false); }
726 "&" { BIN_OP_RETURN (EXPR_AND, false); } 795 "&" { LEXER_DEBUG ("&"); BIN_OP_RETURN (EXPR_AND, false); }
727 "|" { BIN_OP_RETURN (EXPR_OR, false); } 796 "|" { LEXER_DEBUG ("|"); BIN_OP_RETURN (EXPR_OR, false); }
728 "<" { BIN_OP_RETURN (EXPR_LT, false); } 797 "<" { LEXER_DEBUG ("<"); BIN_OP_RETURN (EXPR_LT, false); }
729 ">" { BIN_OP_RETURN (EXPR_GT, false); } 798 ">" { LEXER_DEBUG (">"); BIN_OP_RETURN (EXPR_GT, false); }
730 "+" { BIN_OP_RETURN ('+', false); } 799 "+" { LEXER_DEBUG ("+"); BIN_OP_RETURN ('+', false); }
731 "-" { BIN_OP_RETURN ('-', false); } 800 "-" { LEXER_DEBUG ("-"); BIN_OP_RETURN ('-', false); }
732 "*" { BIN_OP_RETURN ('*', false); } 801 "*" { LEXER_DEBUG ("*"); BIN_OP_RETURN ('*', false); }
733 "/" { BIN_OP_RETURN ('/', false); } 802 "/" { LEXER_DEBUG ("/"); BIN_OP_RETURN ('/', false); }
734 "\\" { BIN_OP_RETURN (LEFTDIV, false); } 803 "\\" { LEXER_DEBUG ("\\"); BIN_OP_RETURN (LEFTDIV, false); }
735 ";" { BIN_OP_RETURN (';', true); } 804 ";" { LEXER_DEBUG (";"); BIN_OP_RETURN (';', true); }
736 "," { BIN_OP_RETURN (',', true); } 805 "," { LEXER_DEBUG (","); BIN_OP_RETURN (',', true); }
737 "^" { BIN_OP_RETURN (POW, false); } 806 "^" { LEXER_DEBUG ("^"); BIN_OP_RETURN (POW, false); }
738 "**" { XBIN_OP_RETURN (POW, false); } 807 "**" { LEXER_DEBUG ("**"); XBIN_OP_RETURN (POW, false); }
739 "=" { BIN_OP_RETURN ('=', true); } 808 "=" { LEXER_DEBUG ("="); BIN_OP_RETURN ('=', true); }
740 "&&" { BIN_OP_RETURN (EXPR_AND_AND, false); } 809 "&&" { LEXER_DEBUG ("&&"); BIN_OP_RETURN (EXPR_AND_AND, false); }
741 "||" { BIN_OP_RETURN (EXPR_OR_OR, false); } 810 "||" { LEXER_DEBUG ("||"); BIN_OP_RETURN (EXPR_OR_OR, false); }
742 "<<" { XBIN_OP_RETURN (LSHIFT, false); } 811 "<<" { LEXER_DEBUG ("<<"); XBIN_OP_RETURN (LSHIFT, false); }
743 ">>" { XBIN_OP_RETURN (RSHIFT, false); } 812 ">>" { LEXER_DEBUG (">>"); XBIN_OP_RETURN (RSHIFT, false); }
744 813
745 {NOT} { 814 {NOT} {
815 LEXER_DEBUG ("{NOT}");
816
746 if (yytext[0] == '~') 817 if (yytext[0] == '~')
747 BIN_OP_RETURN (EXPR_NOT, false); 818 BIN_OP_RETURN (EXPR_NOT, false);
748 else 819 else
749 XBIN_OP_RETURN (EXPR_NOT, false); 820 XBIN_OP_RETURN (EXPR_NOT, false);
750 } 821 }
751 822
752 "(" { 823 "(" {
824 LEXER_DEBUG ("(");
825
753 lexer_flags.looking_at_indirect_ref = false; 826 lexer_flags.looking_at_indirect_ref = false;
754 nesting_level.paren (); 827 nesting_level.paren ();
755 promptflag--; 828 promptflag--;
756 TOK_RETURN ('('); 829 TOK_RETURN ('(');
757 } 830 }
758 831
759 ")" { 832 ")" {
833 LEXER_DEBUG (")");
834
760 nesting_level.remove (); 835 nesting_level.remove ();
761 current_input_column++; 836 current_input_column++;
762 lexer_flags.quote_is_transpose = true; 837 lexer_flags.quote_is_transpose = true;
763 lexer_flags.convert_spaces_to_comma = nesting_level.is_bracket_or_brace (); 838 lexer_flags.convert_spaces_to_comma = nesting_level.is_bracket_or_brace ();
764 do_comma_insert_check (); 839 do_comma_insert_check ();
765 COUNT_TOK_AND_RETURN (')'); 840 COUNT_TOK_AND_RETURN (')');
766 } 841 }
767 842
768 "." { TOK_RETURN ('.'); } 843 "." { LEXER_DEBUG ("."); TOK_RETURN ('.'); }
769 844
770 "+=" { XBIN_OP_RETURN (ADD_EQ, false); } 845 "+=" { LEXER_DEBUG ("+="); XBIN_OP_RETURN (ADD_EQ, false); }
771 "-=" { XBIN_OP_RETURN (SUB_EQ, false); } 846 "-=" { LEXER_DEBUG ("-="); XBIN_OP_RETURN (SUB_EQ, false); }
772 "*=" { XBIN_OP_RETURN (MUL_EQ, false); } 847 "*=" { LEXER_DEBUG ("*="); XBIN_OP_RETURN (MUL_EQ, false); }
773 "/=" { XBIN_OP_RETURN (DIV_EQ, false); } 848 "/=" { LEXER_DEBUG ("/="); XBIN_OP_RETURN (DIV_EQ, false); }
774 "\\=" { XBIN_OP_RETURN (LEFTDIV_EQ, false); } 849 "\\=" { LEXER_DEBUG ("\\="); XBIN_OP_RETURN (LEFTDIV_EQ, false); }
775 ".+=" { XBIN_OP_RETURN (ADD_EQ, false); } 850 ".+=" { LEXER_DEBUG (".+="); XBIN_OP_RETURN (ADD_EQ, false); }
776 ".-=" { XBIN_OP_RETURN (SUB_EQ, false); } 851 ".-=" { LEXER_DEBUG (".-="); XBIN_OP_RETURN (SUB_EQ, false); }
777 ".*=" { XBIN_OP_RETURN (EMUL_EQ, false); } 852 ".*=" { LEXER_DEBUG (".*="); XBIN_OP_RETURN (EMUL_EQ, false); }
778 "./=" { XBIN_OP_RETURN (EDIV_EQ, false); } 853 "./=" { LEXER_DEBUG ("./="); XBIN_OP_RETURN (EDIV_EQ, false); }
779 ".\\=" { XBIN_OP_RETURN (ELEFTDIV_EQ, false); } 854 ".\\=" { LEXER_DEBUG (".\\="); XBIN_OP_RETURN (ELEFTDIV_EQ, false); }
780 {POW}= { XBIN_OP_RETURN (POW_EQ, false); } 855 {POW}= { LEXER_DEBUG ("{POW}="); XBIN_OP_RETURN (POW_EQ, false); }
781 {EPOW}= { XBIN_OP_RETURN (EPOW_EQ, false); } 856 {EPOW}= { LEXER_DEBUG ("{EPOW}="); XBIN_OP_RETURN (EPOW_EQ, false); }
782 "&=" { XBIN_OP_RETURN (AND_EQ, false); } 857 "&=" { LEXER_DEBUG ("&="); XBIN_OP_RETURN (AND_EQ, false); }
783 "|=" { XBIN_OP_RETURN (OR_EQ, false); } 858 "|=" { LEXER_DEBUG ("|="); XBIN_OP_RETURN (OR_EQ, false); }
784 "<<=" { XBIN_OP_RETURN (LSHIFT_EQ, false); } 859 "<<=" { LEXER_DEBUG ("<<="); XBIN_OP_RETURN (LSHIFT_EQ, false); }
785 ">>=" { XBIN_OP_RETURN (RSHIFT_EQ, false); } 860 ">>=" { LEXER_DEBUG (">>="); XBIN_OP_RETURN (RSHIFT_EQ, false); }
786 861
787 \{{S}* { 862 \{{S}* {
863 LEXER_DEBUG ("\\{{S}*");
864
788 nesting_level.brace (); 865 nesting_level.brace ();
789 866
790 current_input_column += yyleng; 867 current_input_column += yyleng;
791 lexer_flags.quote_is_transpose = false; 868 lexer_flags.quote_is_transpose = false;
792 lexer_flags.convert_spaces_to_comma = true; 869 lexer_flags.convert_spaces_to_comma = true;
798 BEGIN (MATRIX_START); 875 BEGIN (MATRIX_START);
799 COUNT_TOK_AND_RETURN ('{'); 876 COUNT_TOK_AND_RETURN ('{');
800 } 877 }
801 878
802 "}" { 879 "}" {
880 LEXER_DEBUG ("}");
881
803 nesting_level.remove (); 882 nesting_level.remove ();
804 883
805 TOK_RETURN ('}'); 884 TOK_RETURN ('}');
806 } 885 }
807 886
808 %{ 887 %{
809 // Unrecognized input is a lexical error. 888 // Unrecognized input is a lexical error.
810 %} 889 %}
811 890
812 . { 891 . {
892 LEXER_DEBUG (".");
893
813 // EOF happens here if we are parsing nested functions. 894 // EOF happens here if we are parsing nested functions.
814 895
815 xunput (yytext[0], yytext); 896 xunput (yytext[0], yytext);
816 897
817 int c = text_yyinput (); 898 int c = text_yyinput ();
934 1015
935 return c; 1016 return c;
936 } 1017 }
937 1018
938 static void 1019 static void
1020 display_character (char c)
1021 {
1022 if (isgraph (c))
1023 std::cerr << c;
1024 else
1025 switch (c)
1026 {
1027 case 0:
1028 std::cerr << "NUL";
1029 break;
1030
1031 case 1:
1032 std::cerr << "SOH";
1033 break;
1034
1035 case 2:
1036 std::cerr << "STX";
1037 break;
1038
1039 case 3:
1040 std::cerr << "ETX";
1041 break;
1042
1043 case 4:
1044 std::cerr << "EOT";
1045 break;
1046
1047 case 5:
1048 std::cerr << "ENQ";
1049 break;
1050
1051 case 6:
1052 std::cerr << "ACK";
1053 break;
1054
1055 case 7:
1056 std::cerr << "\\a";
1057 break;
1058
1059 case 8:
1060 std::cerr << "\\b";
1061 break;
1062
1063 case 9:
1064 std::cerr << "\\t";
1065 break;
1066
1067 case 10:
1068 std::cerr << "\\n";
1069 break;
1070
1071 case 11:
1072 std::cerr << "\\v";
1073 break;
1074
1075 case 12:
1076 std::cerr << "\\f";
1077 break;
1078
1079 case 13:
1080 std::cerr << "\\r";
1081 break;
1082
1083 case 14:
1084 std::cerr << "SO";
1085 break;
1086
1087 case 15:
1088 std::cerr << "SI";
1089 break;
1090
1091 case 16:
1092 std::cerr << "DLE";
1093 break;
1094
1095 case 17:
1096 std::cerr << "DC1";
1097 break;
1098
1099 case 18:
1100 std::cerr << "DC2";
1101 break;
1102
1103 case 19:
1104 std::cerr << "DC3";
1105 break;
1106
1107 case 20:
1108 std::cerr << "DC4";
1109 break;
1110
1111 case 21:
1112 std::cerr << "NAK";
1113 break;
1114
1115 case 22:
1116 std::cerr << "SYN";
1117 break;
1118
1119 case 23:
1120 std::cerr << "ETB";
1121 break;
1122
1123 case 24:
1124 std::cerr << "CAN";
1125 break;
1126
1127 case 25:
1128 std::cerr << "EM";
1129 break;
1130
1131 case 26:
1132 std::cerr << "SUB";
1133 break;
1134
1135 case 27:
1136 std::cerr << "ESC";
1137 break;
1138
1139 case 28:
1140 std::cerr << "FS";
1141 break;
1142
1143 case 29:
1144 std::cerr << "GS";
1145 break;
1146
1147 case 30:
1148 std::cerr << "RS";
1149 break;
1150
1151 case 31:
1152 std::cerr << "US";
1153 break;
1154
1155 case 32:
1156 std::cerr << "SPACE";
1157 break;
1158
1159 case 127:
1160 std::cerr << "DEL";
1161 break;
1162 }
1163 }
1164 static void
939 xunput (char c, char *buf) 1165 xunput (char c, char *buf)
940 { 1166 {
1167 if (lexer_debug_flag)
1168 {
1169 std::cerr << "U: ";
1170 display_character (c);
1171 std::cerr << std::endl;
1172 }
1173
941 if (c == '\n') 1174 if (c == '\n')
942 input_line_number--; 1175 input_line_number--;
943 1176
944 yyunput (c, buf); 1177 yyunput (c, buf);
945 } 1178 }
1503 // initially looking at a block comment. 1736 // initially looking at a block comment.
1504 1737
1505 std::string txt = start_in_block 1738 std::string txt = start_in_block
1506 ? grab_block_comment (flex_reader, eof) 1739 ? grab_block_comment (flex_reader, eof)
1507 : grab_comment_block (flex_reader, false, eof); 1740 : grab_comment_block (flex_reader, false, eof);
1741
1742 if (lexer_debug_flag)
1743 std::cerr << "C: " txt << std::endl;
1508 1744
1509 if (help_txt.empty () && nesting_level.none ()) 1745 if (help_txt.empty () && nesting_level.none ())
1510 { 1746 {
1511 if (! help_buf.empty ()) 1747 if (! help_buf.empty ())
1512 help_buf.pop (); 1748 help_buf.pop ();
2825 } 3061 }
2826 break; 3062 break;
2827 } 3063 }
2828 } 3064 }
2829 3065
3066 static void
3067 display_state (void)
3068 {
3069 std::cerr << "S: ";
3070
3071 switch (YY_START)
3072 {
3073 case INITIAL:
3074 std::cerr << "INITIAL" << std::endl;
3075 break;
3076
3077 case COMMAND_START:
3078 std::cerr << "COMMAND_START" << std::endl;
3079 break;
3080
3081 case MATRIX_START:
3082 std::cerr << "MATRIX_START" << std::endl;
3083 break;
3084
3085 case SCRIPT_FILE_BEGIN:
3086 std::cerr << "SCRIPT_FILE_BEGIN" << std::endl;
3087 break;
3088
3089 case NESTED_FUNCTION_END:
3090 std::cerr << "NESTED_FUNCTION_END" << std::endl;
3091 break;
3092
3093 case NESTED_FUNCTION_BEGIN:
3094 std::cerr << "NESTED_FUNCTION_BEGIN" << std::endl;
3095 break;
3096
3097 default:
3098 std::cerr << "UNKNOWN START STATE!" << std::endl;
3099 break;
3100 }
3101 }
3102
3103 static void
3104 lexer_debug (const char *pattern, const char *text)
3105 {
3106 std::cerr << std::endl;
3107
3108 display_state ();
3109
3110 std::cerr << "P: " << pattern << std::endl;
3111 std::cerr << "T: " << text << std::endl;
3112 }
3113
2830 DEFUN (__display_tokens__, args, nargout, 3114 DEFUN (__display_tokens__, args, nargout,
2831 "-*- texinfo -*-\n\ 3115 "-*- texinfo -*-\n\
2832 @deftypefn {Built-in Function} {} __display_tokens__\n\ 3116 @deftypefn {Built-in Function} {} __display_tokens__\n\
2833 Query or set the internal variable that determines whether Octave's\n\ 3117 Query or set the internal variable that determines whether Octave's\n\
2834 lexer displays tokens as they are read.\n\ 3118 lexer displays tokens as they are read.\n\
2844 @end deftypefn") 3128 @end deftypefn")
2845 { 3129 {
2846 return octave_value (Vtoken_count); 3130 return octave_value (Vtoken_count);
2847 } 3131 }
2848 3132
3133 DEFUN (__lexer_debug_flag__, args, nargout,
3134 "Undocumented internal function.")
3135 {
3136 octave_value retval;
3137
3138 retval = set_internal_variable (lexer_debug_flag, args, nargout,
3139 "__lexer_debug_flag__");
3140
3141 return retval;
3142 }
3143
2849 /* 3144 /*
2850 ;;; Local Variables: *** 3145 ;;; Local Variables: ***
2851 ;;; mode: C++ *** 3146 ;;; mode: C++ ***
2852 ;;; End: *** 3147 ;;; End: ***
2853 */ 3148 */