comparison libinterp/parse-tree/lex.ll @ 16904:f29dd5a7591d

more tweaks for parsing character strings * lex.ll (<SQ_STRING_START>\'\', <SQ_STRING_START>\', <SQ_STRING_START>[^\'\n\r]+): New patterns to replace <SQ_STRING_START>[^\'\n\r]*\'. (<SQ_STRING_START>[^\'\n\r]*\'): Delete. Attempt to correctly update input position for all patterns. (<DQ_STRING_START>\\{NL}): Only check for EOB or EOF if we are using the push lexer interface.
author John W. Eaton <jwe@octave.org>
date Fri, 05 Jul 2013 13:56:21 -0400
parents f21194531877
children bc12849bb6cc
comparison
equal deleted inserted replaced
16903:f21194531877 16904:f29dd5a7591d
636 } 636 }
637 637
638 <DQ_STRING_START>\" { 638 <DQ_STRING_START>\" {
639 curr_lexer->lexer_debug ("<DQ_STRING_START>\\\""); 639 curr_lexer->lexer_debug ("<DQ_STRING_START>\\\"");
640 640
641 curr_lexer->current_input_column++;
642
641 curr_lexer->pop_start_state (); 643 curr_lexer->pop_start_state ();
642 644
643 curr_lexer->looking_for_object_index = true; 645 curr_lexer->looking_for_object_index = true;
644 curr_lexer->at_beginning_of_statement = false; 646 curr_lexer->at_beginning_of_statement = false;
645 647
654 } 656 }
655 657
656 <DQ_STRING_START>\\[0-7]{1,3} { 658 <DQ_STRING_START>\\[0-7]{1,3} {
657 curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\[0-7]{1,3}"); 659 curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\[0-7]{1,3}");
658 660
661 curr_lexer->current_input_column += yyleng;
662
659 int result; 663 int result;
660 sscanf (yytext+1, "%o", &result); 664 sscanf (yytext+1, "%o", &result);
661 665
662 if (result > 0xff) 666 if (result > 0xff)
663 error ("invalid octal escape sequence in character string"); 667 error ("invalid octal escape sequence in character string");
666 } 670 }
667 671
668 <DQ_STRING_START>"\\a" { 672 <DQ_STRING_START>"\\a" {
669 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\a\""); 673 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\a\"");
670 674
675 curr_lexer->current_input_column += yyleng;
671 curr_lexer->string_text += '\a'; 676 curr_lexer->string_text += '\a';
672 } 677 }
673 678
674 <DQ_STRING_START>"\\b" { 679 <DQ_STRING_START>"\\b" {
675 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\b\""); 680 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\b\"");
676 681
682 curr_lexer->current_input_column += yyleng;
677 curr_lexer->string_text += '\b'; 683 curr_lexer->string_text += '\b';
678 } 684 }
679 685
680 <DQ_STRING_START>"\\f" { 686 <DQ_STRING_START>"\\f" {
681 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\f\""); 687 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\f\"");
682 688
689 curr_lexer->current_input_column += yyleng;
683 curr_lexer->string_text += '\f'; 690 curr_lexer->string_text += '\f';
684 } 691 }
685 692
686 <DQ_STRING_START>"\\n" { 693 <DQ_STRING_START>"\\n" {
687 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\n\""); 694 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\n\"");
688 695
696 curr_lexer->current_input_column += yyleng;
689 curr_lexer->string_text += '\n'; 697 curr_lexer->string_text += '\n';
690 } 698 }
691 699
692 <DQ_STRING_START>"\\r" { 700 <DQ_STRING_START>"\\r" {
693 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\r\""); 701 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\r\"");
694 702
703 curr_lexer->current_input_column += yyleng;
695 curr_lexer->string_text += '\r'; 704 curr_lexer->string_text += '\r';
696 } 705 }
697 706
698 <DQ_STRING_START>"\\t" { 707 <DQ_STRING_START>"\\t" {
699 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\t\""); 708 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\t\"");
700 709
710 curr_lexer->current_input_column += yyleng;
701 curr_lexer->string_text += '\t'; 711 curr_lexer->string_text += '\t';
702 } 712 }
703 713
704 <DQ_STRING_START>"\\v" { 714 <DQ_STRING_START>"\\v" {
705 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\v\""); 715 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\v\"");
706 716
717 curr_lexer->current_input_column += yyleng;
707 curr_lexer->string_text += '\v'; 718 curr_lexer->string_text += '\v';
708 } 719 }
709 720
710 <DQ_STRING_START>\\{NL} { 721 <DQ_STRING_START>\\{NL} {
711 curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\{NL}"); 722 curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\{NL}");
712 723
713 curr_lexer->decrement_promptflag (); 724 curr_lexer->decrement_promptflag ();
714 curr_lexer->input_line_number++; 725 curr_lexer->input_line_number++;
715 curr_lexer->current_input_column = 1; 726 curr_lexer->current_input_column = 1;
716 727
717 // We can't rely on the trick used elsewhere of sticking ASCII 1 728 if (curr_lexer->is_push_lexer ())
718 // in the intput buffer and recognizing it as a special case 729 {
719 // because ASCII 1 is a valid character for a character string. 730 // We can't rely on the trick used elsewhere of sticking ASCII
720 731 // 1 in the input buffer and recognizing it as a special case
721 if (curr_lexer->at_end_of_buffer ()) 732 // because ASCII 1 is a valid character for a character
722 return -1; 733 // string. If we are at the end of the buffer, ask for more
723 734 // input. If we are at the end of the file, deal with it.
724 if (curr_lexer->at_end_of_file ()) 735 // Otherwise, just keep going with the text from the current
725 return curr_lexer->handle_end_of_input (); 736 // buffer.
726 737
727 // Otherwise, just keep going with the text from the current buffer. 738 if (curr_lexer->at_end_of_buffer ())
739 return -1;
740
741 if (curr_lexer->at_end_of_file ())
742 return curr_lexer->handle_end_of_input ();
743 }
728 } 744 }
729 745
730 <DQ_STRING_START>\\. { 746 <DQ_STRING_START>\\. {
731 curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\."); 747 curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\.");
732 748
749 curr_lexer->current_input_column += yyleng;
733 curr_lexer->string_text += yytext[1]; 750 curr_lexer->string_text += yytext[1];
734 } 751 }
735 752
736 <DQ_STRING_START>[^\\\r\n\"]+ { 753 <DQ_STRING_START>[^\\\r\n\"]+ {
737 curr_lexer->lexer_debug ("<DQ_STRING_START>[^\\\\\\r\\n\\\"]+"); 754 curr_lexer->lexer_debug ("<DQ_STRING_START>[^\\\\\\r\\n\\\"]+");
738 755
756 curr_lexer->current_input_column += yyleng;
739 curr_lexer->string_text += yytext; 757 curr_lexer->string_text += yytext;
740 } 758 }
741 759
742 <DQ_STRING_START>{NL} { 760 <DQ_STRING_START>{NL} {
743 curr_lexer->lexer_debug ("<DQ_STRING_START>{NL}"); 761 curr_lexer->lexer_debug ("<DQ_STRING_START>{NL}");
752 770
753 %{ 771 %{
754 // Single-quoted character strings. 772 // Single-quoted character strings.
755 %} 773 %}
756 774
757 <SQ_STRING_START>[^\'\n\r]*\' { 775 <SQ_STRING_START>\'\' {
758 curr_lexer->lexer_debug ("<SQ_STRING_START>[^\\'\\n\\r]*\\'"); 776 curr_lexer->lexer_debug ("<SQ_STRING_START>\\'\\'");
759 777
760 yytext[yyleng-1] = 0; 778 curr_lexer->current_input_column += yyleng;
779 curr_lexer->string_text += '\'';
780 }
781
782 <SQ_STRING_START>\' {
783 curr_lexer->lexer_debug ("<SQ_STRING_START>\\'");
784
785 curr_lexer->current_input_column++;
786
787 curr_lexer->pop_start_state ();
788
789 curr_lexer->looking_for_object_index = true;
790 curr_lexer->at_beginning_of_statement = false;
791
792 curr_lexer->push_token (new token (SQ_STRING,
793 curr_lexer->string_text,
794 curr_lexer->string_line,
795 curr_lexer->string_column));
796
797 curr_lexer->string_text = "";
798
799 return curr_lexer->count_token_internal (SQ_STRING);
800 }
801
802 <SQ_STRING_START>[^\'\n\r]+ {
803 curr_lexer->lexer_debug ("<SQ_STRING_START>[^\\'\\n\\r]+");
804
805 curr_lexer->current_input_column += yyleng;
761 curr_lexer->string_text += yytext; 806 curr_lexer->string_text += yytext;
762
763 curr_lexer->current_input_column += yyleng;
764
765 int c = curr_lexer->text_yyinput ();
766
767 if (c == '\'')
768 {
769 curr_lexer->string_text += c;
770
771 curr_lexer->current_input_column++;
772 }
773 else
774 {
775 curr_lexer->xunput (c);
776
777 curr_lexer->pop_start_state ();
778
779 curr_lexer->looking_for_object_index = true;
780 curr_lexer->at_beginning_of_statement = false;
781
782 curr_lexer->push_token (new token (SQ_STRING,
783 curr_lexer->string_text,
784 curr_lexer->string_line,
785 curr_lexer->string_column));
786
787 curr_lexer->string_text = "";
788
789 return curr_lexer->count_token_internal (SQ_STRING);
790 }
791 } 807 }
792 808
793 <SQ_STRING_START>{NL} { 809 <SQ_STRING_START>{NL} {
794 curr_lexer->lexer_debug ("<SQ_STRING_START>{NL}"); 810 curr_lexer->lexer_debug ("<SQ_STRING_START>{NL}");
795 811