Mercurial > octave-nkf
comparison libinterp/parse-tree/lex.ll @ 16275:dd7de0345124
10/10 commits reworking the lexer
This and the previous nine changesets completely revise the way
significant whitespace is handled inside matrix and cell lists. Now
instead of indefinite look ahead, we keep track of all tokens read and
look back to previous tokens and ahead at most one character. If the
surrounding context changes the parse, we unput the text for the
current token and alter the start state, unput an additional character
(typically a comma or semicolon) or both.
Comments are now handled by a separate set of exlusive start states
and there is no explicit handling of comments within any other
patterns.
author | John W. Eaton <jwe@octave.org> |
---|---|
date | Mon, 11 Mar 2013 14:40:29 -0400 |
parents | c5e5f6ccac5d |
children | 26fd237a3427 8cb65fd72164 |
comparison
equal
deleted
inserted
replaced
16273:c5e5f6ccac5d | 16275:dd7de0345124 |
---|---|
128 #undef YY_FATAL_ERROR | 128 #undef YY_FATAL_ERROR |
129 #endif | 129 #endif |
130 #define YY_FATAL_ERROR(msg) \ | 130 #define YY_FATAL_ERROR(msg) \ |
131 (yyget_extra (yyscanner))->fatal_error (msg) | 131 (yyget_extra (yyscanner))->fatal_error (msg) |
132 | 132 |
133 #define CMD_OR_OP(PATTERN, TOK, COMPAT) \ | |
134 \ | |
135 do \ | |
136 { \ | |
137 curr_lexer->lexer_debug (PATTERN); \ | |
138 \ | |
139 if (curr_lexer->looks_like_command_arg ()) \ | |
140 { \ | |
141 yyless (0); \ | |
142 curr_lexer->push_start_state (COMMAND_START); \ | |
143 } \ | |
144 else \ | |
145 { \ | |
146 return curr_lexer->handle_op_internal (TOK, false, COMPAT); \ | |
147 } \ | |
148 } \ | |
149 while (0) | |
150 | |
151 #define CMD_OR_UNARY_OP(PATTERN, TOK, COMPAT) \ | |
152 \ | |
153 do \ | |
154 { \ | |
155 curr_lexer->lexer_debug (PATTERN); \ | |
156 \ | |
157 if (curr_lexer->looks_like_command_arg ()) \ | |
158 { \ | |
159 yyless (0); \ | |
160 curr_lexer->push_start_state (COMMAND_START); \ | |
161 } \ | |
162 else \ | |
163 { \ | |
164 int tok \ | |
165 = (COMPAT \ | |
166 ? curr_lexer->handle_unary_op (TOK) \ | |
167 : curr_lexer->handle_incompatible_unary_op (TOK)); \ | |
168 \ | |
169 if (tok < 0) \ | |
170 { \ | |
171 yyless (0); \ | |
172 curr_lexer->xunput (','); \ | |
173 } \ | |
174 else \ | |
175 { \ | |
176 return tok; \ | |
177 } \ | |
178 } \ | |
179 } \ | |
180 while (0) | |
181 | |
133 static bool Vdisplay_tokens = false; | 182 static bool Vdisplay_tokens = false; |
134 | 183 |
135 static unsigned int Vtoken_count = 0; | 184 static unsigned int Vtoken_count = 0; |
136 | 185 |
137 // Internal variable for lexer debugging state. | 186 // Internal variable for lexer debugging state. |
296 } | 345 } |
297 | 346 |
298 \[ { | 347 \[ { |
299 curr_lexer->lexer_debug ("\\["); | 348 curr_lexer->lexer_debug ("\\["); |
300 | 349 |
301 bool unput_comma = false; | 350 bool unput_comma = false; |
302 | 351 |
303 if (curr_lexer->whitespace_is_significant () | 352 if (curr_lexer->whitespace_is_significant () |
304 && curr_lexer->space_follows_previous_token ()) | 353 && curr_lexer->space_follows_previous_token ()) |
305 { | 354 { |
306 int tok = curr_lexer->previous_token_value (); | 355 int tok = curr_lexer->previous_token_value (); |
526 | 575 |
527 {D}+/\.[\*/\\^\'] | | 576 {D}+/\.[\*/\\^\'] | |
528 {NUMBER} { | 577 {NUMBER} { |
529 curr_lexer->lexer_debug ("{D}+/\\.[\\*/\\^\\']|{NUMBER}"); | 578 curr_lexer->lexer_debug ("{D}+/\\.[\\*/\\^\\']|{NUMBER}"); |
530 | 579 |
531 int tok = curr_lexer->previous_token_value (); | 580 int tok = curr_lexer->previous_token_value (); |
532 | 581 |
533 if (curr_lexer->whitespace_is_significant () | 582 if (curr_lexer->whitespace_is_significant () |
534 && curr_lexer->space_follows_previous_token () | 583 && curr_lexer->space_follows_previous_token () |
535 && ! (tok == '[' || tok == '{' | 584 && ! (tok == '[' || tok == '{' |
536 || curr_lexer->previous_token_is_binop ())) | 585 || curr_lexer->previous_token_is_binop ())) |
537 { | 586 { |
538 yyless (0); | 587 yyless (0); |
539 unput (','); | 588 unput (','); |
540 } | 589 } |
541 else | 590 else |
542 { | 591 { |
543 curr_lexer->handle_number (); | 592 curr_lexer->handle_number (); |
544 return curr_lexer->count_token_internal (NUM); | 593 return curr_lexer->count_token_internal (NUM); |
545 } | 594 } |
546 } | 595 } |
547 | 596 |
548 %{ | 597 %{ |
549 // Eat whitespace. Whitespace inside matrix constants is handled by | 598 // Eat whitespace. Whitespace inside matrix constants is handled by |
550 // the <MATRIX_START> start state code above. | 599 // the <MATRIX_START> start state code above. |
644 | 693 |
645 return curr_lexer->count_token_internal (METAQUERY); | 694 return curr_lexer->count_token_internal (METAQUERY); |
646 } | 695 } |
647 } | 696 } |
648 | 697 |
649 %{ | |
650 // Function handles and superclass references | |
651 %} | |
652 | |
653 "@" { | 698 "@" { |
654 curr_lexer->lexer_debug ("@"); | 699 curr_lexer->lexer_debug ("@"); |
655 | 700 |
656 curr_lexer->current_input_column++; | 701 curr_lexer->current_input_column++; |
657 | 702 |
658 curr_lexer->looking_at_function_handle++; | 703 curr_lexer->looking_at_function_handle++; |
659 curr_lexer->looking_for_object_index = false; | 704 curr_lexer->looking_for_object_index = false; |
660 curr_lexer->at_beginning_of_statement = false; | 705 curr_lexer->at_beginning_of_statement = false; |
661 | 706 |
662 return curr_lexer->count_token ('@'); | 707 return curr_lexer->count_token ('@'); |
663 | |
664 } | 708 } |
665 | 709 |
666 %{ | 710 %{ |
667 // A new line character. New line characters inside matrix constants | 711 // A new line character. New line characters inside matrix constants |
668 // are handled by the <MATRIX_START> start state code above. If closest | 712 // are handled by the <MATRIX_START> start state code above. If closest |
695 %} | 739 %} |
696 | 740 |
697 "'" { | 741 "'" { |
698 curr_lexer->lexer_debug ("'"); | 742 curr_lexer->lexer_debug ("'"); |
699 | 743 |
700 int tok = curr_lexer->previous_token_value (); | 744 if (curr_lexer->previous_token_may_be_command () |
701 | 745 && curr_lexer->space_follows_previous_token ()) |
702 bool transpose = false; | 746 { |
703 | 747 yyless (0); |
704 if (curr_lexer->whitespace_is_significant ()) | 748 curr_lexer->push_start_state (COMMAND_START); |
705 { | 749 } |
706 if (curr_lexer->space_follows_previous_token ()) | 750 else |
751 { | |
752 int tok = curr_lexer->previous_token_value (); | |
753 | |
754 bool transpose = false; | |
755 | |
756 if (curr_lexer->whitespace_is_significant ()) | |
707 { | 757 { |
708 if (tok == '[' || tok == '{' | 758 if (curr_lexer->space_follows_previous_token ()) |
709 || curr_lexer->previous_token_is_binop ()) | |
710 { | 759 { |
711 curr_lexer->current_input_column++; | 760 if (tok == '[' || tok == '{' |
712 int retval = curr_lexer->handle_string ('\''); | 761 || curr_lexer->previous_token_is_binop ()) |
713 return curr_lexer->count_token_internal (retval); | 762 { |
763 curr_lexer->current_input_column++; | |
764 int retval = curr_lexer->handle_string ('\''); | |
765 return curr_lexer->count_token_internal (retval); | |
766 } | |
767 else | |
768 { | |
769 yyless (0); | |
770 curr_lexer->xunput (','); | |
771 } | |
714 } | 772 } |
715 else | 773 else |
716 { | 774 { |
717 yyless (0); | 775 if (tok == '[' || tok == '{' |
718 curr_lexer->xunput (','); | 776 || curr_lexer->previous_token_is_binop () |
777 || curr_lexer->previous_token_is_keyword ()) | |
778 { | |
779 curr_lexer->current_input_column++; | |
780 int retval = curr_lexer->handle_string ('\''); | |
781 return curr_lexer->count_token_internal (retval); | |
782 } | |
783 else | |
784 return curr_lexer->count_token (QUOTE); | |
719 } | 785 } |
720 } | 786 } |
721 else | 787 else |
722 { | 788 { |
723 if (tok == '[' || tok == '{' | 789 if (! tok || tok == '[' || tok == '{' || tok == '(' |
724 || curr_lexer->previous_token_is_binop () | 790 || curr_lexer->previous_token_is_binop () |
725 || curr_lexer->previous_token_is_keyword ()) | 791 || curr_lexer->previous_token_is_keyword ()) |
726 { | 792 { |
727 curr_lexer->current_input_column++; | 793 curr_lexer->current_input_column++; |
728 int retval = curr_lexer->handle_string ('\''); | 794 int retval = curr_lexer->handle_string ('\''); |
730 } | 796 } |
731 else | 797 else |
732 return curr_lexer->count_token (QUOTE); | 798 return curr_lexer->count_token (QUOTE); |
733 } | 799 } |
734 } | 800 } |
735 else | |
736 { | |
737 if (! tok || tok == '[' || tok == '{' || tok == '(' | |
738 || curr_lexer->previous_token_is_binop () | |
739 || curr_lexer->previous_token_is_keyword ()) | |
740 { | |
741 curr_lexer->current_input_column++; | |
742 int retval = curr_lexer->handle_string ('\''); | |
743 return curr_lexer->count_token_internal (retval); | |
744 } | |
745 else | |
746 return curr_lexer->count_token (QUOTE); | |
747 } | |
748 } | 801 } |
749 | 802 |
750 %{ | 803 %{ |
751 // Double quotes always begin strings. | 804 // Double quotes always begin strings. |
752 %} | 805 %} |
753 | 806 |
754 \" { | 807 \" { |
755 curr_lexer->lexer_debug ("\""); | 808 curr_lexer->lexer_debug ("\""); |
756 | 809 |
757 int tok = curr_lexer->previous_token_value (); | 810 if (curr_lexer->previous_token_may_be_command () |
758 | 811 && curr_lexer->space_follows_previous_token ()) |
759 bool transpose = false; | 812 { |
760 | 813 yyless (0); |
761 if (curr_lexer->whitespace_is_significant ()) | 814 curr_lexer->push_start_state (COMMAND_START); |
762 { | 815 } |
763 if (curr_lexer->space_follows_previous_token ()) | 816 else |
817 { | |
818 int tok = curr_lexer->previous_token_value (); | |
819 | |
820 bool transpose = false; | |
821 | |
822 if (curr_lexer->whitespace_is_significant ()) | |
764 { | 823 { |
765 if (tok == '[' || tok == '{' | 824 if (curr_lexer->space_follows_previous_token ()) |
766 || curr_lexer->previous_token_is_binop ()) | 825 { |
826 if (tok == '[' || tok == '{' | |
827 || curr_lexer->previous_token_is_binop ()) | |
828 { | |
829 curr_lexer->current_input_column++; | |
830 int retval = curr_lexer->handle_string ('"'); | |
831 return curr_lexer->count_token_internal (retval); | |
832 } | |
833 else | |
834 { | |
835 yyless (0); | |
836 curr_lexer->xunput (','); | |
837 } | |
838 } | |
839 else | |
767 { | 840 { |
768 curr_lexer->current_input_column++; | 841 curr_lexer->current_input_column++; |
769 int retval = curr_lexer->handle_string ('"'); | 842 int retval = curr_lexer->handle_string ('"'); |
770 return curr_lexer->count_token_internal (retval); | 843 return curr_lexer->count_token_internal (retval); |
771 } | |
772 else | |
773 { | |
774 yyless (0); | |
775 curr_lexer->xunput (','); | |
776 } | 844 } |
777 } | 845 } |
778 else | 846 else |
779 { | 847 { |
780 curr_lexer->current_input_column++; | 848 curr_lexer->current_input_column++; |
781 int retval = curr_lexer->handle_string ('"'); | 849 int retval = curr_lexer->handle_string ('"'); |
782 return curr_lexer->count_token_internal (retval); | 850 return curr_lexer->count_token_internal (retval); |
783 } | 851 } |
784 } | 852 } |
785 else | |
786 { | |
787 curr_lexer->current_input_column++; | |
788 int retval = curr_lexer->handle_string ('"'); | |
789 return curr_lexer->count_token_internal (retval); | |
790 } | |
791 } | 853 } |
792 | 854 |
793 %{ | 855 %{ |
794 // Other operators. | 856 // Other operators. |
795 %} | 857 %} |
796 | 858 |
797 ":" { return curr_lexer->handle_op (":", ':'); } | 859 ":" { CMD_OR_OP (":", ':', true); } |
798 ".+" { return curr_lexer->handle_incompatible_op (".+", EPLUS); } | 860 ".+" { CMD_OR_OP (".+", EPLUS, false); } |
799 ".-" { return curr_lexer->handle_incompatible_op (".-", EMINUS); } | 861 ".-" { CMD_OR_OP (".-", EMINUS, false); } |
800 ".*" { return curr_lexer->handle_op (".*", EMUL); } | 862 ".*" { CMD_OR_OP (".*", EMUL, true); } |
801 "./" { return curr_lexer->handle_op ("./", EDIV); } | 863 "./" { CMD_OR_OP ("./", EDIV, true); } |
802 ".\\" { return curr_lexer->handle_op (".\\", ELEFTDIV); } | 864 ".\\" { CMD_OR_OP (".\\", ELEFTDIV, true); } |
803 ".^" { return curr_lexer->handle_op (".^", EPOW); } | 865 ".^" { CMD_OR_OP (".^", EPOW, true); } |
804 ".**" { return curr_lexer->handle_incompatible_op (".**", EPOW); } | 866 ".**" { CMD_OR_OP (".**", EPOW, false); } |
805 "<=" { return curr_lexer->handle_op ("<=", EXPR_LE); } | 867 "<=" { CMD_OR_OP ("<=", EXPR_LE, true); } |
806 "==" { return curr_lexer->handle_op ("==", EXPR_EQ); } | 868 "==" { CMD_OR_OP ("==", EXPR_EQ, true); } |
807 "~=" { return curr_lexer->handle_op ("~=", EXPR_NE); } | 869 "~=" { CMD_OR_OP ("~=", EXPR_NE, true); } |
808 "!=" { return curr_lexer->handle_incompatible_op ("!=", EXPR_NE); } | 870 "!=" { CMD_OR_OP ("!=", EXPR_NE, false); } |
809 ">=" { return curr_lexer->handle_op (">=", EXPR_GE); } | 871 ">=" { CMD_OR_OP (">=", EXPR_GE, true); } |
810 "&" { return curr_lexer->handle_op ("&", EXPR_AND); } | 872 "&" { CMD_OR_OP ("&", EXPR_AND, true); } |
811 "|" { return curr_lexer->handle_op ("|", EXPR_OR); } | 873 "|" { CMD_OR_OP ("|", EXPR_OR, true); } |
812 "<" { return curr_lexer->handle_op ("<", EXPR_LT); } | 874 "<" { CMD_OR_OP ("<", EXPR_LT, true); } |
813 ">" { return curr_lexer->handle_op (">", EXPR_GT); } | 875 ">" { CMD_OR_OP (">", EXPR_GT, true); } |
814 "*" { return curr_lexer->handle_op ("*", '*'); } | 876 "*" { CMD_OR_OP ("*", '*', true); } |
815 | 877 "/" { CMD_OR_OP ("/", '/', true); } |
816 "/" { | 878 |
817 int prev_tok = curr_lexer->previous_token_value (); | 879 %{ |
818 bool space_before = curr_lexer->space_follows_previous_token (); | 880 // In Matlab, '\' may also trigger command syntax. |
819 int c = curr_lexer->text_yyinput (); | 881 %} |
820 curr_lexer->xunput (c); | 882 |
821 bool space_after = (c == ' ' || c == '\t'); | 883 "\\" { return curr_lexer->handle_op ("\\", LEFTDIV); } |
822 | 884 |
823 if (space_before && ! space_after | 885 "^" { CMD_OR_OP ("^", POW, true); } |
824 && curr_lexer->previous_token_may_be_command ()) | 886 "**" { CMD_OR_OP ("**", POW, false); } |
825 { | 887 "&&" { CMD_OR_OP ("&&", EXPR_AND_AND, true); } |
826 yyless (0); | 888 "||" { CMD_OR_OP ("||", EXPR_OR_OR, true); } |
827 curr_lexer->push_start_state (COMMAND_START); | 889 "<<" { CMD_OR_OP ("<<", LSHIFT, false); } |
828 } | 890 ">>" { CMD_OR_OP (">>", RSHIFT, false); } |
829 else | |
830 return curr_lexer->handle_op ("/", '/'); | |
831 } | |
832 | |
833 "\\" { return curr_lexer->handle_op ("\\", LEFTDIV); } | |
834 "^" { return curr_lexer->handle_op ("^", POW); } | |
835 "**" { return curr_lexer->handle_incompatible_op ("**", POW); } | |
836 "&&" { return curr_lexer->handle_op ("&&", EXPR_AND_AND); } | |
837 "||" { return curr_lexer->handle_op ("||", EXPR_OR_OR); } | |
838 "<<" { return curr_lexer->handle_incompatible_op ("<<", LSHIFT); } | |
839 ">>" { return curr_lexer->handle_incompatible_op (">>", RSHIFT); } | |
840 | 891 |
841 ";" { | 892 ";" { |
842 bool at_beginning_of_statement | 893 bool at_beginning_of_statement |
843 = (! (curr_lexer->whitespace_is_significant () | 894 = (! (curr_lexer->whitespace_is_significant () |
844 || curr_lexer->looking_at_object_index.front ())); | 895 || curr_lexer->looking_at_object_index.front ())); |
845 | 896 |
846 return curr_lexer->handle_op (";", ';', true, at_beginning_of_statement); | 897 return curr_lexer->handle_op (";", ';', at_beginning_of_statement); |
847 } | 898 } |
848 | 899 |
849 "+" { | 900 "+" { CMD_OR_UNARY_OP ("+", '+', true); } |
850 int tok = curr_lexer->handle_unary_op ("+", '+'); | 901 "-" { CMD_OR_UNARY_OP ("-", '-', true); } |
851 | 902 |
852 if (tok < 0) | 903 "~" { CMD_OR_UNARY_OP ("~", EXPR_NOT, true); } |
853 { | 904 "!" { CMD_OR_UNARY_OP ("!", EXPR_NOT, false); } |
854 yyless (0); | |
855 curr_lexer->xunput (','); | |
856 } | |
857 else | |
858 return tok; | |
859 } | |
860 | |
861 "-" { | |
862 int prev_tok = curr_lexer->previous_token_value (); | |
863 bool space_before = curr_lexer->space_follows_previous_token (); | |
864 int c = curr_lexer->text_yyinput (); | |
865 curr_lexer->xunput (c); | |
866 bool space_after = (c == ' ' || c == '\t'); | |
867 | |
868 if (space_before && ! space_after | |
869 && curr_lexer->previous_token_may_be_command ()) | |
870 { | |
871 yyless (0); | |
872 curr_lexer->push_start_state (COMMAND_START); | |
873 } | |
874 else | |
875 { | |
876 int tok = curr_lexer->handle_unary_op ("-", '-'); | |
877 | |
878 if (tok < 0) | |
879 { | |
880 yyless (0); | |
881 curr_lexer->xunput (','); | |
882 } | |
883 else | |
884 return tok; | |
885 } | |
886 } | |
887 | |
888 "~" { | |
889 int tok = curr_lexer->handle_unary_op ("~", EXPR_NOT); | |
890 | |
891 if (tok < 0) | |
892 { | |
893 yyless (0); | |
894 curr_lexer->xunput (','); | |
895 } | |
896 else | |
897 return tok; | |
898 } | |
899 | |
900 "!" { | |
901 int tok = curr_lexer->handle_incompatible_unary_op ("!", EXPR_NOT); | |
902 | |
903 if (tok < 0) | |
904 { | |
905 yyless (0); | |
906 curr_lexer->xunput (','); | |
907 } | |
908 else | |
909 return tok; | |
910 } | |
911 | 905 |
912 "," { | 906 "," { |
913 bool at_beginning_of_statement | 907 bool at_beginning_of_statement |
914 = (! (curr_lexer->whitespace_is_significant () | 908 = (! (curr_lexer->whitespace_is_significant () |
915 || curr_lexer->looking_at_object_index.front ())); | 909 || curr_lexer->looking_at_object_index.front ())); |
916 | 910 |
917 return curr_lexer->handle_op | 911 return curr_lexer->handle_op (",", ',', at_beginning_of_statement); |
918 (",", ',', true, at_beginning_of_statement); | |
919 } | 912 } |
920 | 913 |
921 ".'" { | 914 ".'" { |
922 return curr_lexer->handle_op (".'", TRANSPOSE, true, false); | 915 return curr_lexer->handle_op (".'", TRANSPOSE, false); |
923 } | 916 } |
924 | 917 |
925 "++" { | 918 "++" { |
926 int tok = curr_lexer->handle_incompatible_unary_op | 919 curr_lexer->lexer_debug ("++"); |
927 ("++", PLUS_PLUS, true, false, true); | 920 |
921 int tok = curr_lexer->handle_incompatible_unary_op (PLUS_PLUS, false); | |
928 | 922 |
929 if (tok < 0) | 923 if (tok < 0) |
930 { | 924 { |
931 yyless (0); | 925 yyless (0); |
932 curr_lexer->xunput (','); | 926 curr_lexer->xunput (','); |
934 else | 928 else |
935 return tok; | 929 return tok; |
936 } | 930 } |
937 | 931 |
938 "--" { | 932 "--" { |
939 int tok = curr_lexer->handle_incompatible_unary_op | 933 curr_lexer->lexer_debug ("--"); |
940 ("--", MINUS_MINUS, true, false, true); | 934 |
935 int tok = curr_lexer->handle_incompatible_unary_op (MINUS_MINUS, false); | |
941 | 936 |
942 if (tok < 0) | 937 if (tok < 0) |
943 { | 938 { |
944 yyless (0); | 939 yyless (0); |
945 curr_lexer->xunput (','); | 940 curr_lexer->xunput (','); |
1039 ".**=" { return curr_lexer->handle_incompatible_op (".^=", EPOW_EQ); } | 1034 ".**=" { return curr_lexer->handle_incompatible_op (".^=", EPOW_EQ); } |
1040 "&=" { return curr_lexer->handle_incompatible_op ("&=", AND_EQ); } | 1035 "&=" { return curr_lexer->handle_incompatible_op ("&=", AND_EQ); } |
1041 "|=" { return curr_lexer->handle_incompatible_op ("|=", OR_EQ); } | 1036 "|=" { return curr_lexer->handle_incompatible_op ("|=", OR_EQ); } |
1042 "<<=" { return curr_lexer->handle_incompatible_op ("<<=", LSHIFT_EQ); } | 1037 "<<=" { return curr_lexer->handle_incompatible_op ("<<=", LSHIFT_EQ); } |
1043 ">>=" { return curr_lexer->handle_incompatible_op (">>=", RSHIFT_EQ); } | 1038 ">>=" { return curr_lexer->handle_incompatible_op (">>=", RSHIFT_EQ); } |
1039 | |
1040 %{ | |
1041 // In Matlab, '{' may also trigger command syntax. | |
1042 %} | |
1044 | 1043 |
1045 "{" { | 1044 "{" { |
1046 curr_lexer->lexer_debug ("{"); | 1045 curr_lexer->lexer_debug ("{"); |
1047 | 1046 |
1048 bool unput_comma = false; | 1047 bool unput_comma = false; |
1794 | 1793 |
1795 xunput (c, yytxt); | 1794 xunput (c, yytxt); |
1796 } | 1795 } |
1797 | 1796 |
1798 bool | 1797 bool |
1798 octave_lexer::looking_at_space (void) | |
1799 { | |
1800 int c = text_yyinput (); | |
1801 xunput (c); | |
1802 return (c == ' ' || c == '\t'); | |
1803 } | |
1804 | |
1805 bool | |
1799 octave_lexer::inside_any_object_index (void) | 1806 octave_lexer::inside_any_object_index (void) |
1800 { | 1807 { |
1801 bool retval = false; | 1808 bool retval = false; |
1802 | 1809 |
1803 for (std::list<bool>::const_iterator i = looking_at_object_index.begin (); | 1810 for (std::list<bool>::const_iterator i = looking_at_object_index.begin (); |
2451 } | 2458 } |
2452 | 2459 |
2453 bool | 2460 bool |
2454 octave_lexer::looks_like_command_arg (void) | 2461 octave_lexer::looks_like_command_arg (void) |
2455 { | 2462 { |
2456 bool retval = true; | 2463 bool space_before = space_follows_previous_token (); |
2457 | 2464 bool space_after = looking_at_space (); |
2458 int c0 = text_yyinput (); | 2465 |
2459 | 2466 return (space_before && ! space_after |
2460 switch (c0) | 2467 && previous_token_may_be_command ()); |
2461 { | |
2462 // = == | |
2463 case '=': | |
2464 { | |
2465 int c1 = text_yyinput (); | |
2466 | |
2467 if (c1 == '=') | |
2468 { | |
2469 int c2 = text_yyinput (); | |
2470 | |
2471 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t') | |
2472 && next_token_can_follow_bin_op ()) | |
2473 retval = false; | |
2474 | |
2475 xunput (c2); | |
2476 } | |
2477 else | |
2478 retval = false; | |
2479 | |
2480 xunput (c1); | |
2481 } | |
2482 break; | |
2483 | |
2484 case '(': | |
2485 case '{': | |
2486 // Indexing. | |
2487 retval = false; | |
2488 break; | |
2489 | |
2490 case '\n': | |
2491 // EOL. | |
2492 break; | |
2493 | |
2494 case '\'': | |
2495 case '"': | |
2496 // Beginning of a character string. | |
2497 break; | |
2498 | |
2499 // + - ++ -- += -= | |
2500 case '+': | |
2501 case '-': | |
2502 { | |
2503 int c1 = text_yyinput (); | |
2504 | |
2505 switch (c1) | |
2506 { | |
2507 case '\n': | |
2508 // EOL. | |
2509 case '+': | |
2510 case '-': | |
2511 // Unary ops, spacing doesn't matter. | |
2512 break; | |
2513 | |
2514 case '\t': | |
2515 case ' ': | |
2516 { | |
2517 if (next_token_can_follow_bin_op ()) | |
2518 retval = false; | |
2519 } | |
2520 break; | |
2521 | |
2522 case '=': | |
2523 { | |
2524 int c2 = text_yyinput (); | |
2525 | |
2526 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t') | |
2527 && next_token_can_follow_bin_op ()) | |
2528 retval = false; | |
2529 | |
2530 xunput (c2); | |
2531 } | |
2532 break; | |
2533 } | |
2534 | |
2535 xunput (c1); | |
2536 } | |
2537 break; | |
2538 | |
2539 case ':': | |
2540 case '/': | |
2541 case '\\': | |
2542 case '^': | |
2543 { | |
2544 int c1 = text_yyinput (); | |
2545 | |
2546 if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t') | |
2547 && next_token_can_follow_bin_op ()) | |
2548 retval = false; | |
2549 | |
2550 xunput (c1); | |
2551 } | |
2552 break; | |
2553 | |
2554 // .+ .- ./ .\ .^ .* .** | |
2555 case '.': | |
2556 { | |
2557 int c1 = text_yyinput (); | |
2558 | |
2559 if (match_any (c1, "+-/\\^*")) | |
2560 { | |
2561 int c2 = text_yyinput (); | |
2562 | |
2563 if (c2 == '=') | |
2564 { | |
2565 int c3 = text_yyinput (); | |
2566 | |
2567 if (! match_any (c3, ",;\n") && (c3 == ' ' || c3 == '\t') | |
2568 && next_token_can_follow_bin_op ()) | |
2569 retval = false; | |
2570 | |
2571 xunput (c3); | |
2572 } | |
2573 else if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t') | |
2574 && next_token_can_follow_bin_op ()) | |
2575 retval = false; | |
2576 | |
2577 xunput (c2); | |
2578 } | |
2579 else if (! match_any (c1, ",;\n") | |
2580 && (! isdigit (c1) && c1 != ' ' && c1 != '\t' | |
2581 && c1 != '.')) | |
2582 { | |
2583 // Structure reference. FIXME -- is this a complete check? | |
2584 | |
2585 retval = false; | |
2586 } | |
2587 | |
2588 xunput (c1); | |
2589 } | |
2590 break; | |
2591 | |
2592 // & && | || * ** | |
2593 case '&': | |
2594 case '|': | |
2595 case '*': | |
2596 { | |
2597 int c1 = text_yyinput (); | |
2598 | |
2599 if (c1 == c0) | |
2600 { | |
2601 int c2 = text_yyinput (); | |
2602 | |
2603 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t') | |
2604 && next_token_can_follow_bin_op ()) | |
2605 retval = false; | |
2606 | |
2607 xunput (c2); | |
2608 } | |
2609 else if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t') | |
2610 && next_token_can_follow_bin_op ()) | |
2611 retval = false; | |
2612 | |
2613 xunput (c1); | |
2614 } | |
2615 break; | |
2616 | |
2617 // < <= > >= | |
2618 case '<': | |
2619 case '>': | |
2620 { | |
2621 int c1 = text_yyinput (); | |
2622 | |
2623 if (c1 == '=') | |
2624 { | |
2625 int c2 = text_yyinput (); | |
2626 | |
2627 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t') | |
2628 && next_token_can_follow_bin_op ()) | |
2629 retval = false; | |
2630 | |
2631 xunput (c2); | |
2632 } | |
2633 else if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t') | |
2634 && next_token_can_follow_bin_op ()) | |
2635 retval = false; | |
2636 | |
2637 xunput (c1); | |
2638 } | |
2639 break; | |
2640 | |
2641 // ~= != | |
2642 case '~': | |
2643 case '!': | |
2644 { | |
2645 int c1 = text_yyinput (); | |
2646 | |
2647 // ~ and ! can be unary ops, so require following =. | |
2648 if (c1 == '=') | |
2649 { | |
2650 int c2 = text_yyinput (); | |
2651 | |
2652 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t') | |
2653 && next_token_can_follow_bin_op ()) | |
2654 retval = false; | |
2655 | |
2656 xunput (c2); | |
2657 } | |
2658 else if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t') | |
2659 && next_token_can_follow_bin_op ()) | |
2660 retval = false; | |
2661 | |
2662 xunput (c1); | |
2663 } | |
2664 break; | |
2665 | |
2666 default: | |
2667 break; | |
2668 } | |
2669 | |
2670 xunput (c0); | |
2671 | |
2672 return retval; | |
2673 } | 2468 } |
2674 | 2469 |
2675 int | 2470 int |
2676 octave_lexer::handle_superclass_identifier (void) | 2471 octave_lexer::handle_superclass_identifier (void) |
2677 { | 2472 { |
3152 break; | 2947 break; |
3153 } | 2948 } |
3154 } | 2949 } |
3155 | 2950 |
3156 int | 2951 int |
3157 octave_lexer::handle_op (const char *pattern, int tok, bool convert, | 2952 octave_lexer::handle_op (const char *pattern, int tok, bool bos) |
3158 bool bos, bool qit) | |
3159 { | 2953 { |
3160 lexer_debug (pattern); | 2954 lexer_debug (pattern); |
3161 | 2955 |
3162 return handle_op_internal (pattern, tok, convert, bos, qit, true); | 2956 return handle_op_internal (tok, bos, true); |
3163 } | 2957 } |
3164 | 2958 |
3165 int | 2959 int |
3166 octave_lexer::handle_incompatible_op (const char *pattern, int tok, | 2960 octave_lexer::handle_incompatible_op (const char *pattern, int tok, bool bos) |
3167 bool convert, bool bos, bool qit) | |
3168 { | 2961 { |
3169 lexer_debug (pattern); | 2962 lexer_debug (pattern); |
3170 | 2963 |
3171 return handle_op_internal (pattern, tok, convert, bos, qit, false); | 2964 return handle_op_internal (tok, bos, false); |
3172 } | 2965 } |
3173 | 2966 |
3174 bool | 2967 bool |
3175 octave_lexer::maybe_unput_comma_before_unary_op (int tok) | 2968 octave_lexer::maybe_unput_comma_before_unary_op (int tok) |
3176 { | 2969 { |
3193 | 2986 |
3194 return unput_comma; | 2987 return unput_comma; |
3195 } | 2988 } |
3196 | 2989 |
3197 int | 2990 int |
3198 octave_lexer::handle_unary_op (const char *pattern, int tok, bool convert, | 2991 octave_lexer::handle_unary_op (int tok, bool bos) |
3199 bool bos, bool qit) | 2992 { |
3200 { | |
3201 lexer_debug (pattern); | |
3202 | |
3203 return maybe_unput_comma_before_unary_op (tok) | 2993 return maybe_unput_comma_before_unary_op (tok) |
3204 ? -1 : handle_op_internal (pattern, tok, convert, bos, qit, true); | 2994 ? -1 : handle_op_internal (tok, bos, true); |
3205 } | 2995 } |
3206 | 2996 |
3207 int | 2997 int |
3208 octave_lexer::handle_incompatible_unary_op (const char *pattern, int tok, | 2998 octave_lexer::handle_incompatible_unary_op (int tok, bool bos) |
3209 bool convert, bool bos, bool qit) | 2999 { |
3210 { | |
3211 lexer_debug (pattern); | |
3212 | |
3213 return maybe_unput_comma_before_unary_op (tok) | 3000 return maybe_unput_comma_before_unary_op (tok) |
3214 ? -1 : handle_op_internal (pattern, tok, convert, bos, qit, false); | 3001 ? -1 : handle_op_internal (tok, bos, false); |
3215 } | 3002 } |
3216 | 3003 |
3217 int | 3004 int |
3218 octave_lexer::handle_op_internal (const char *pattern, int tok, bool convert, | 3005 octave_lexer::handle_op_internal (int tok, bool bos, bool compat) |
3219 bool bos, bool qit, bool compat) | |
3220 { | 3006 { |
3221 if (! compat) | 3007 if (! compat) |
3222 gripe_matlab_incompatible_operator (flex_yytext ()); | 3008 gripe_matlab_incompatible_operator (flex_yytext ()); |
3223 | 3009 |
3224 push_token (new token (tok, input_line_number, current_input_column)); | 3010 push_token (new token (tok, input_line_number, current_input_column)); |