Mercurial > octave-nkf
comparison libinterp/parse-tree/lex.ll @ 17577:c702371ff6df classdef
maint: periodic merge of default to classdef
author | John W. Eaton <jwe@octave.org> |
---|---|
date | Sat, 05 Oct 2013 11:22:09 -0400 |
parents | 498b2dd1bd56 f0edd6c752e9 |
children | 93b3d03b05e7 |
comparison
equal
deleted
inserted
replaced
17414:20d1b911b4e7 | 17577:c702371ff6df |
---|---|
48 | 48 |
49 %x INPUT_FILE_START | 49 %x INPUT_FILE_START |
50 | 50 |
51 %x BLOCK_COMMENT_START | 51 %x BLOCK_COMMENT_START |
52 %x LINE_COMMENT_START | 52 %x LINE_COMMENT_START |
53 | |
54 %x DQ_STRING_START | |
55 %x SQ_STRING_START | |
53 | 56 |
54 %{ | 57 %{ |
55 | 58 |
56 #include <cctype> | 59 #include <cctype> |
57 #include <cstring> | 60 #include <cstring> |
203 } \ | 206 } \ |
204 } \ | 207 } \ |
205 } \ | 208 } \ |
206 while (0) | 209 while (0) |
207 | 210 |
211 // We can't rely on the trick used elsewhere of sticking ASCII 1 in | |
212 // the input buffer and recognizing it as a special case because ASCII | |
213 // 1 is a valid character for a character string. If we are at the | |
214 // end of the buffer, ask for more input. If we are at the end of the | |
215 // file, deal with it. Otherwise, just keep going with the text from | |
216 // the current buffer. | |
217 #define HANDLE_STRING_CONTINUATION \ | |
218 do \ | |
219 { \ | |
220 curr_lexer->decrement_promptflag (); \ | |
221 curr_lexer->input_line_number++; \ | |
222 curr_lexer->current_input_column = 1; \ | |
223 \ | |
224 if (curr_lexer->is_push_lexer ()) \ | |
225 { \ | |
226 if (curr_lexer->at_end_of_buffer ()) \ | |
227 return -1; \ | |
228 \ | |
229 if (curr_lexer->at_end_of_file ()) \ | |
230 return curr_lexer->handle_end_of_input (); \ | |
231 } \ | |
232 } \ | |
233 while (0) | |
234 | |
235 | |
208 static bool Vdisplay_tokens = false; | 236 static bool Vdisplay_tokens = false; |
209 | 237 |
210 static unsigned int Vtoken_count = 0; | 238 static unsigned int Vtoken_count = 0; |
211 | 239 |
212 // Internal variable for lexer debugging state. | 240 // Internal variable for lexer debugging state. |
220 %} | 248 %} |
221 | 249 |
222 D [0-9] | 250 D [0-9] |
223 S [ \t] | 251 S [ \t] |
224 NL ((\n)|(\r)|(\r\n)) | 252 NL ((\n)|(\r)|(\r\n)) |
225 CONT ((\.\.\.)|(\\)) | |
226 Im [iIjJ] | 253 Im [iIjJ] |
227 CCHAR [#%] | 254 CCHAR [#%] |
228 IDENT ([_$a-zA-Z][_$a-zA-Z0-9]*) | 255 IDENT ([_$a-zA-Z][_$a-zA-Z0-9]*) |
229 EXPON ([DdEe][+-]?{D}+) | 256 EXPON ([DdEe][+-]?{D}+) |
230 NUMBER (({D}+\.?{D}*{EXPON}?)|(\.{D}+{EXPON}?)|(0[xX][0-9a-fA-F]+)) | 257 NUMBER (({D}+\.?{D}*{EXPON}?)|(\.{D}+{EXPON}?)|(0[xX][0-9a-fA-F]+)) |
288 curr_lexer->lexer_debug ("<COMMAND_START>[\\\"\\']"); | 315 curr_lexer->lexer_debug ("<COMMAND_START>[\\\"\\']"); |
289 | 316 |
290 curr_lexer->at_beginning_of_statement = false; | 317 curr_lexer->at_beginning_of_statement = false; |
291 | 318 |
292 curr_lexer->current_input_column++; | 319 curr_lexer->current_input_column++; |
293 int tok = curr_lexer->handle_string (yytext[0]); | 320 |
294 | 321 curr_lexer->begin_string (yytext[0] == '"' |
295 return curr_lexer->count_token_internal (tok); | 322 ? DQ_STRING_START : SQ_STRING_START); |
296 } | 323 } |
297 | 324 |
298 <COMMAND_START>[^#% \t\r\n\;\,\"\'][^ \t\r\n\;\,]*{S}* { | 325 <COMMAND_START>[^#% \t\r\n\;\,\"\'][^ \t\r\n\;\,]*{S}* { |
299 curr_lexer->lexer_debug ("<COMMAND_START>[^#% \\t\\r\\n\\;\\,\\\"\\'][^ \\t\\r\\n\\;\\,]*{S}*"); | 326 curr_lexer->lexer_debug ("<COMMAND_START>[^#% \\t\\r\\n\\;\\,\\\"\\'][^ \\t\\r\\n\\;\\,]*{S}*"); |
300 | 327 |
447 // after a block of full-line comments, finish the full line comment | 474 // after a block of full-line comments, finish the full line comment |
448 // block. | 475 // block. |
449 %} | 476 %} |
450 | 477 |
451 ^{S}*{CCHAR}\{{S}*{NL} { | 478 ^{S}*{CCHAR}\{{S}*{NL} { |
452 curr_lexer->lexer_debug ("^{S}*{CCHAR}\{{S}*{NL}"); | 479 curr_lexer->lexer_debug ("^{S}*{CCHAR}\\{{S}*{NL}"); |
453 | 480 |
454 yyless (0); | 481 yyless (0); |
455 | 482 |
456 if (curr_lexer->start_state () == LINE_COMMENT_START) | 483 if (curr_lexer->start_state () == LINE_COMMENT_START) |
457 { | 484 { |
466 curr_lexer->push_start_state (BLOCK_COMMENT_START); | 493 curr_lexer->push_start_state (BLOCK_COMMENT_START); |
467 | 494 |
468 } | 495 } |
469 | 496 |
470 <BLOCK_COMMENT_START>^{S}*{CCHAR}\{{S}*{NL} { | 497 <BLOCK_COMMENT_START>^{S}*{CCHAR}\{{S}*{NL} { |
471 curr_lexer->lexer_debug ("<BLOCK_COMMENT_START>^{S}*{CCHAR}\{{S}*{NL}"); | 498 curr_lexer->lexer_debug ("<BLOCK_COMMENT_START>^{S}*{CCHAR}\\{{S}*{NL}"); |
472 | 499 |
473 curr_lexer->input_line_number++; | 500 curr_lexer->input_line_number++; |
474 curr_lexer->current_input_column = 1; | 501 curr_lexer->current_input_column = 1; |
475 | 502 |
476 if (curr_lexer->block_comment_nesting_level) | 503 if (curr_lexer->block_comment_nesting_level) |
617 curr_lexer->xunput (yytext[0]); | 644 curr_lexer->xunput (yytext[0]); |
618 | 645 |
619 curr_lexer->finish_comment (octave_comment_elt::full_line); | 646 curr_lexer->finish_comment (octave_comment_elt::full_line); |
620 | 647 |
621 curr_lexer->pop_start_state (); | 648 curr_lexer->pop_start_state (); |
649 } | |
650 | |
651 %{ | |
652 // Double-quoted character strings. | |
653 %} | |
654 | |
655 <DQ_STRING_START>\"\" { | |
656 curr_lexer->lexer_debug ("<DQ_STRING_START>\\\"\\\""); | |
657 | |
658 curr_lexer->current_input_column += yyleng; | |
659 curr_lexer->string_text += '"'; | |
660 } | |
661 | |
662 <DQ_STRING_START>\" { | |
663 curr_lexer->lexer_debug ("<DQ_STRING_START>\\\""); | |
664 | |
665 curr_lexer->current_input_column++; | |
666 | |
667 curr_lexer->pop_start_state (); | |
668 | |
669 curr_lexer->looking_for_object_index = true; | |
670 curr_lexer->at_beginning_of_statement = false; | |
671 | |
672 curr_lexer->push_token (new token (DQ_STRING, | |
673 curr_lexer->string_text, | |
674 curr_lexer->string_line, | |
675 curr_lexer->string_column)); | |
676 | |
677 curr_lexer->string_text = ""; | |
678 | |
679 return curr_lexer->count_token_internal (DQ_STRING); | |
680 } | |
681 | |
682 <DQ_STRING_START>\\[0-7]{1,3} { | |
683 curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\[0-7]{1,3}"); | |
684 | |
685 curr_lexer->current_input_column += yyleng; | |
686 | |
687 int result; | |
688 sscanf (yytext+1, "%o", &result); | |
689 | |
690 if (result > 0xff) | |
691 error ("invalid octal escape sequence in character string"); | |
692 else | |
693 curr_lexer->string_text += static_cast<unsigned char> (result); | |
694 } | |
695 | |
696 <DQ_STRING_START>\\x[0-9a-fA-F]+ { | |
697 curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\x[0-9a-fA-F]+"); | |
698 | |
699 curr_lexer->current_input_column += yyleng; | |
700 | |
701 int result; | |
702 sscanf (yytext+2, "%x", &result); | |
703 | |
704 // Truncate the value silently instead of checking the range like | |
705 // we do for octal above. This is to match C/C++ where any number | |
706 // of digits is allowed but the value is implementation-defined if | |
707 // it exceeds the range of the character type. | |
708 curr_lexer->string_text += static_cast<unsigned char> (result); | |
709 } | |
710 | |
711 <DQ_STRING_START>"\\a" { | |
712 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\a\""); | |
713 | |
714 curr_lexer->current_input_column += yyleng; | |
715 curr_lexer->string_text += '\a'; | |
716 } | |
717 | |
718 <DQ_STRING_START>"\\b" { | |
719 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\b\""); | |
720 | |
721 curr_lexer->current_input_column += yyleng; | |
722 curr_lexer->string_text += '\b'; | |
723 } | |
724 | |
725 <DQ_STRING_START>"\\f" { | |
726 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\f\""); | |
727 | |
728 curr_lexer->current_input_column += yyleng; | |
729 curr_lexer->string_text += '\f'; | |
730 } | |
731 | |
732 <DQ_STRING_START>"\\n" { | |
733 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\n\""); | |
734 | |
735 curr_lexer->current_input_column += yyleng; | |
736 curr_lexer->string_text += '\n'; | |
737 } | |
738 | |
739 <DQ_STRING_START>"\\r" { | |
740 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\r\""); | |
741 | |
742 curr_lexer->current_input_column += yyleng; | |
743 curr_lexer->string_text += '\r'; | |
744 } | |
745 | |
746 <DQ_STRING_START>"\\t" { | |
747 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\t\""); | |
748 | |
749 curr_lexer->current_input_column += yyleng; | |
750 curr_lexer->string_text += '\t'; | |
751 } | |
752 | |
753 <DQ_STRING_START>"\\v" { | |
754 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\v\""); | |
755 | |
756 curr_lexer->current_input_column += yyleng; | |
757 curr_lexer->string_text += '\v'; | |
758 } | |
759 | |
760 <DQ_STRING_START>(\.\.\.){S}*{NL} | | |
761 <DQ_STRING_START>(\.\.\.){S}*{CCHAR}.*{NL} { | |
762 curr_lexer->lexer_debug ("<DQ_STRING_START>(\\.\\.\\.){S}*{NL}|<DQ_STRING_START>(\\.\\.\\.){S}*{CCHAR}.*{NL}"); | |
763 | |
764 static const char *msg = "'...' continuations in double-quoted character strings are obsolete and will not be allowed in a future version of Octave; please use '\\' instead"; | |
765 | |
766 std::string nm = curr_lexer->fcn_file_full_name; | |
767 | |
768 if (nm.empty ()) | |
769 warning_with_id ("Octave:deprecated-syntax", "%s", msg); | |
770 else | |
771 warning_with_id ("Octave:deprecated-syntax", | |
772 "%s; near line %d of file '%s'", msg, | |
773 curr_lexer->input_line_number, nm.c_str ()); | |
774 | |
775 HANDLE_STRING_CONTINUATION; | |
776 } | |
777 | |
778 <DQ_STRING_START>\\{S}+{NL} | | |
779 <DQ_STRING_START>\\{S}*{CCHAR}.*{NL} { | |
780 curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\{S}+{NL}|<DQ_STRING_START>\\\\{S}*{CCHAR}.*{NL}"); | |
781 | |
782 static const char *msg = "white space and comments after continuation markers in double-quoted character strings are obsolete and will not be allowed in a future version of Octave"; | |
783 | |
784 std::string nm = curr_lexer->fcn_file_full_name; | |
785 | |
786 if (nm.empty ()) | |
787 warning_with_id ("Octave:deprecated-syntax", "%s", msg); | |
788 else | |
789 warning_with_id ("Octave:deprecated-syntax", | |
790 "%s; near line %d of file '%s'", msg, | |
791 curr_lexer->input_line_number, nm.c_str ()); | |
792 | |
793 HANDLE_STRING_CONTINUATION; | |
794 } | |
795 | |
796 <DQ_STRING_START>\\{NL} { | |
797 curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\{NL}"); | |
798 | |
799 HANDLE_STRING_CONTINUATION; | |
800 } | |
801 | |
802 <DQ_STRING_START>\\. { | |
803 curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\."); | |
804 | |
805 curr_lexer->current_input_column += yyleng; | |
806 curr_lexer->string_text += yytext[1]; | |
807 } | |
808 | |
809 <DQ_STRING_START>\. { | |
810 curr_lexer->lexer_debug ("<DQ_STRING_START>\\."); | |
811 | |
812 curr_lexer->current_input_column++; | |
813 curr_lexer->string_text += yytext[0]; | |
814 } | |
815 | |
816 <DQ_STRING_START>[^\.\\\r\n\"]+ { | |
817 curr_lexer->lexer_debug ("<DQ_STRING_START>[^\\.\\\\\\r\\n\\\"]+"); | |
818 | |
819 curr_lexer->current_input_column += yyleng; | |
820 curr_lexer->string_text += yytext; | |
821 } | |
822 | |
823 <DQ_STRING_START>{NL} { | |
824 curr_lexer->lexer_debug ("<DQ_STRING_START>{NL}"); | |
825 | |
826 curr_lexer->input_line_number++; | |
827 curr_lexer->current_input_column = 1; | |
828 | |
829 error ("unterminated character string constant"); | |
830 | |
831 return LEXICAL_ERROR; | |
832 } | |
833 | |
834 %{ | |
835 // Single-quoted character strings. | |
836 %} | |
837 | |
838 <SQ_STRING_START>\'\' { | |
839 curr_lexer->lexer_debug ("<SQ_STRING_START>\\'\\'"); | |
840 | |
841 curr_lexer->current_input_column += yyleng; | |
842 curr_lexer->string_text += '\''; | |
843 } | |
844 | |
845 <SQ_STRING_START>\' { | |
846 curr_lexer->lexer_debug ("<SQ_STRING_START>\\'"); | |
847 | |
848 curr_lexer->current_input_column++; | |
849 | |
850 curr_lexer->pop_start_state (); | |
851 | |
852 curr_lexer->looking_for_object_index = true; | |
853 curr_lexer->at_beginning_of_statement = false; | |
854 | |
855 curr_lexer->push_token (new token (SQ_STRING, | |
856 curr_lexer->string_text, | |
857 curr_lexer->string_line, | |
858 curr_lexer->string_column)); | |
859 | |
860 curr_lexer->string_text = ""; | |
861 | |
862 return curr_lexer->count_token_internal (SQ_STRING); | |
863 } | |
864 | |
865 <SQ_STRING_START>[^\'\n\r]+ { | |
866 curr_lexer->lexer_debug ("<SQ_STRING_START>[^\\'\\n\\r]+"); | |
867 | |
868 curr_lexer->current_input_column += yyleng; | |
869 curr_lexer->string_text += yytext; | |
870 } | |
871 | |
872 <SQ_STRING_START>{NL} { | |
873 curr_lexer->lexer_debug ("<SQ_STRING_START>{NL}"); | |
874 | |
875 curr_lexer->input_line_number++; | |
876 curr_lexer->current_input_column = 1; | |
877 | |
878 error ("unterminated character string constant"); | |
879 | |
880 return LEXICAL_ERROR; | |
622 } | 881 } |
623 | 882 |
624 %{ | 883 %{ |
625 // Imaginary numbers. | 884 // Imaginary numbers. |
626 %} | 885 %} |
659 // the constant. | 918 // the constant. |
660 %} | 919 %} |
661 | 920 |
662 {D}+/\.[\*/\\^\'] | | 921 {D}+/\.[\*/\\^\'] | |
663 {NUMBER} { | 922 {NUMBER} { |
664 curr_lexer->lexer_debug ("{D}+/\\.[\\*/\\^\\']|{NUMBER}"); | 923 curr_lexer->lexer_debug ("{D}+/\\.[\\*/\\\\^\\']|{NUMBER}"); |
665 | 924 |
666 if (curr_lexer->previous_token_may_be_command () | 925 if (curr_lexer->previous_token_may_be_command () |
667 && curr_lexer->space_follows_previous_token ()) | 926 && curr_lexer->space_follows_previous_token ()) |
668 { | 927 { |
669 yyless (0); | 928 yyless (0); |
699 | 958 |
700 curr_lexer->mark_previous_token_trailing_space (); | 959 curr_lexer->mark_previous_token_trailing_space (); |
701 } | 960 } |
702 | 961 |
703 %{ | 962 %{ |
704 // Continuation lines. Allow comments after continuations. | 963 // Continuation lines. Allow arbitrary text after continuations. |
705 %} | 964 %} |
706 | 965 |
707 {CONT}{S}*{NL} | | 966 \.\.\..*{NL} { |
708 {CONT}{S}*{CCHAR}.*{NL} { | 967 curr_lexer->lexer_debug ("\\.\\.\\..*{NL}"); |
709 curr_lexer->lexer_debug ("{CONT}{S}*{NL}|{CONT}{S}*{CCHAR}.*{NL}"); | 968 |
969 curr_lexer->handle_continuation (); | |
970 } | |
971 | |
972 %{ | |
973 // Deprecated C preprocessor style continuation markers. | |
974 %} | |
975 | |
976 \\{S}*{NL} | | |
977 \\{S}*{CCHAR}.*{NL} { | |
978 curr_lexer->lexer_debug ("\\\\{S}*{NL}|\\\\{S}*{CCHAR}.*{NL}"); | |
979 | |
980 static const char *msg = "using continuation marker \\ outside of double quoted strings is deprecated and will be removed in a future version of Octave"; | |
981 | |
982 std::string nm = curr_lexer->fcn_file_full_name; | |
983 | |
984 if (nm.empty ()) | |
985 warning_with_id ("Octave:deprecated-syntax", "%s", msg); | |
986 else | |
987 warning_with_id ("Octave:deprecated-syntax", | |
988 "%s; near line %d of file '%s'", msg, | |
989 curr_lexer->input_line_number, nm.c_str ()); | |
710 | 990 |
711 curr_lexer->handle_continuation (); | 991 curr_lexer->handle_continuation (); |
712 } | 992 } |
713 | 993 |
714 %{ | 994 %{ |
865 curr_lexer->lexer_debug ("'"); | 1145 curr_lexer->lexer_debug ("'"); |
866 | 1146 |
867 if (curr_lexer->previous_token_may_be_command () | 1147 if (curr_lexer->previous_token_may_be_command () |
868 && curr_lexer->space_follows_previous_token ()) | 1148 && curr_lexer->space_follows_previous_token ()) |
869 { | 1149 { |
870 yyless (0); | 1150 curr_lexer->current_input_column++; |
871 curr_lexer->push_start_state (COMMAND_START); | 1151 curr_lexer->push_start_state (COMMAND_START); |
1152 curr_lexer->begin_string (SQ_STRING_START); | |
872 } | 1153 } |
873 else if (curr_lexer->at_beginning_of_statement) | 1154 else if (curr_lexer->at_beginning_of_statement) |
874 { | 1155 { |
875 curr_lexer->current_input_column++; | 1156 curr_lexer->current_input_column++; |
876 int retval = curr_lexer->handle_string ('\''); | 1157 curr_lexer->begin_string (SQ_STRING_START); |
877 return curr_lexer->count_token_internal (retval); | |
878 } | 1158 } |
879 else | 1159 else |
880 { | 1160 { |
881 int tok = curr_lexer->previous_token_value (); | 1161 int tok = curr_lexer->previous_token_value (); |
882 | 1162 |
886 { | 1166 { |
887 if (tok == '[' || tok == '{' | 1167 if (tok == '[' || tok == '{' |
888 || curr_lexer->previous_token_is_binop ()) | 1168 || curr_lexer->previous_token_is_binop ()) |
889 { | 1169 { |
890 curr_lexer->current_input_column++; | 1170 curr_lexer->current_input_column++; |
891 int retval = curr_lexer->handle_string ('\''); | 1171 curr_lexer->begin_string (SQ_STRING_START); |
892 return curr_lexer->count_token_internal (retval); | |
893 } | 1172 } |
894 else | 1173 else |
895 { | 1174 { |
896 yyless (0); | 1175 yyless (0); |
897 curr_lexer->xunput (','); | 1176 curr_lexer->xunput (','); |
904 if (tok == '[' || tok == '{' | 1183 if (tok == '[' || tok == '{' |
905 || curr_lexer->previous_token_is_binop () | 1184 || curr_lexer->previous_token_is_binop () |
906 || curr_lexer->previous_token_is_keyword ()) | 1185 || curr_lexer->previous_token_is_keyword ()) |
907 { | 1186 { |
908 curr_lexer->current_input_column++; | 1187 curr_lexer->current_input_column++; |
909 int retval = curr_lexer->handle_string ('\''); | 1188 curr_lexer->begin_string (SQ_STRING_START); |
910 return curr_lexer->count_token_internal (retval); | |
911 } | 1189 } |
912 else | 1190 else |
913 return curr_lexer->count_token (HERMITIAN); | 1191 return curr_lexer->count_token (HERMITIAN); |
914 } | 1192 } |
915 } | 1193 } |
918 if (! tok || tok == '[' || tok == '{' || tok == '(' | 1196 if (! tok || tok == '[' || tok == '{' || tok == '(' |
919 || curr_lexer->previous_token_is_binop () | 1197 || curr_lexer->previous_token_is_binop () |
920 || curr_lexer->previous_token_is_keyword ()) | 1198 || curr_lexer->previous_token_is_keyword ()) |
921 { | 1199 { |
922 curr_lexer->current_input_column++; | 1200 curr_lexer->current_input_column++; |
923 int retval = curr_lexer->handle_string ('\''); | 1201 curr_lexer->begin_string (SQ_STRING_START); |
924 return curr_lexer->count_token_internal (retval); | |
925 } | 1202 } |
926 else | 1203 else |
927 return curr_lexer->count_token (HERMITIAN); | 1204 return curr_lexer->count_token (HERMITIAN); |
928 } | 1205 } |
929 } | 1206 } |
932 %{ | 1209 %{ |
933 // Double quotes always begin strings. | 1210 // Double quotes always begin strings. |
934 %} | 1211 %} |
935 | 1212 |
936 \" { | 1213 \" { |
937 curr_lexer->lexer_debug ("\""); | 1214 curr_lexer->lexer_debug ("\\\""); |
938 | 1215 |
939 if (curr_lexer->previous_token_may_be_command () | 1216 if (curr_lexer->previous_token_may_be_command () |
940 && curr_lexer->space_follows_previous_token ()) | 1217 && curr_lexer->space_follows_previous_token ()) |
941 { | 1218 { |
942 yyless (0); | 1219 curr_lexer->current_input_column++; |
943 curr_lexer->push_start_state (COMMAND_START); | 1220 curr_lexer->push_start_state (COMMAND_START); |
1221 curr_lexer->begin_string (DQ_STRING_START); | |
944 } | 1222 } |
945 else | 1223 else |
946 { | 1224 { |
947 int tok = curr_lexer->previous_token_value (); | 1225 int tok = curr_lexer->previous_token_value (); |
948 | 1226 |
952 { | 1230 { |
953 if (tok == '[' || tok == '{' | 1231 if (tok == '[' || tok == '{' |
954 || curr_lexer->previous_token_is_binop ()) | 1232 || curr_lexer->previous_token_is_binop ()) |
955 { | 1233 { |
956 curr_lexer->current_input_column++; | 1234 curr_lexer->current_input_column++; |
957 int retval = curr_lexer->handle_string ('"'); | 1235 curr_lexer->begin_string (DQ_STRING_START); |
958 return curr_lexer->count_token_internal (retval); | |
959 } | 1236 } |
960 else | 1237 else |
961 { | 1238 { |
962 yyless (0); | 1239 yyless (0); |
963 curr_lexer->xunput (','); | 1240 curr_lexer->xunput (','); |
966 } | 1243 } |
967 } | 1244 } |
968 else | 1245 else |
969 { | 1246 { |
970 curr_lexer->current_input_column++; | 1247 curr_lexer->current_input_column++; |
971 int retval = curr_lexer->handle_string ('"'); | 1248 curr_lexer->begin_string (DQ_STRING_START); |
972 return curr_lexer->count_token_internal (retval); | |
973 } | 1249 } |
974 } | 1250 } |
975 else | 1251 else |
976 { | 1252 { |
977 curr_lexer->current_input_column++; | 1253 curr_lexer->current_input_column++; |
978 int retval = curr_lexer->handle_string ('"'); | 1254 curr_lexer->begin_string (DQ_STRING_START); |
979 return curr_lexer->count_token_internal (retval); | |
980 } | 1255 } |
981 } | 1256 } |
982 } | 1257 } |
983 | 1258 |
984 %{ | 1259 %{ |
1555 block_comment_nesting_level = 0; | 1830 block_comment_nesting_level = 0; |
1556 token_count = 0; | 1831 token_count = 0; |
1557 current_input_line = ""; | 1832 current_input_line = ""; |
1558 comment_text = ""; | 1833 comment_text = ""; |
1559 help_text = ""; | 1834 help_text = ""; |
1835 string_text = ""; | |
1836 string_line = 0; | |
1837 string_column = 0; | |
1560 fcn_file_name = ""; | 1838 fcn_file_name = ""; |
1561 fcn_file_full_name = ""; | 1839 fcn_file_full_name = ""; |
1562 looking_at_object_index.clear (); | 1840 looking_at_object_index.clear (); |
1563 looking_at_object_index.push_front (false); | 1841 looking_at_object_index.push_front (false); |
1564 | 1842 |
1769 reading_script_file = true; | 2047 reading_script_file = true; |
1770 | 2048 |
1771 push_start_state (INPUT_FILE_START); | 2049 push_start_state (INPUT_FILE_START); |
1772 } | 2050 } |
1773 | 2051 |
2052 void | |
2053 octave_base_lexer::begin_string (int state) | |
2054 { | |
2055 string_line = input_line_number; | |
2056 string_column = current_input_column; | |
2057 | |
2058 push_start_state (state); | |
2059 } | |
2060 | |
1774 int | 2061 int |
1775 octave_base_lexer::handle_end_of_input (void) | 2062 octave_base_lexer::handle_end_of_input (void) |
1776 { | 2063 { |
1777 lexer_debug ("<<EOF>>"); | 2064 lexer_debug ("<<EOF>>"); |
1778 | 2065 |
2257 comment_text = ""; | 2544 comment_text = ""; |
2258 | 2545 |
2259 at_beginning_of_statement = true; | 2546 at_beginning_of_statement = true; |
2260 } | 2547 } |
2261 | 2548 |
2262 // We have seen a backslash and need to find out if it should be | |
2263 // treated as a continuation character. If so, this eats it, up to | |
2264 // and including the new line character. | |
2265 // | |
2266 // Match whitespace only, followed by a comment character or newline. | |
2267 // Once a comment character is found, discard all input until newline. | |
2268 // If non-whitespace characters are found before comment | |
2269 // characters, return 0. Otherwise, return 1. | |
2270 | |
2271 // FIXME -- we need to handle block comments here. | |
2272 | |
2273 bool | |
2274 octave_base_lexer::have_continuation (bool trailing_comments_ok) | |
2275 { | |
2276 std::ostringstream buf; | |
2277 | |
2278 std::string comment_buf; | |
2279 | |
2280 bool in_comment = false; | |
2281 bool beginning_of_comment = false; | |
2282 | |
2283 int c = 0; | |
2284 | |
2285 while ((c = text_yyinput ()) != EOF) | |
2286 { | |
2287 buf << static_cast<char> (c); | |
2288 | |
2289 switch (c) | |
2290 { | |
2291 case ' ': | |
2292 case '\t': | |
2293 if (in_comment) | |
2294 { | |
2295 comment_buf += static_cast<char> (c); | |
2296 beginning_of_comment = false; | |
2297 } | |
2298 break; | |
2299 | |
2300 case '%': | |
2301 case '#': | |
2302 if (trailing_comments_ok) | |
2303 { | |
2304 if (in_comment) | |
2305 { | |
2306 if (! beginning_of_comment) | |
2307 comment_buf += static_cast<char> (c); | |
2308 } | |
2309 else | |
2310 { | |
2311 maybe_gripe_matlab_incompatible_comment (c); | |
2312 in_comment = true; | |
2313 beginning_of_comment = true; | |
2314 } | |
2315 } | |
2316 else | |
2317 goto cleanup; | |
2318 break; | |
2319 | |
2320 case '\n': | |
2321 if (in_comment) | |
2322 { | |
2323 comment_buf += static_cast<char> (c); | |
2324 octave_comment_buffer::append (comment_buf); | |
2325 } | |
2326 current_input_column = 0; | |
2327 decrement_promptflag (); | |
2328 gripe_matlab_incompatible_continuation (); | |
2329 return true; | |
2330 | |
2331 default: | |
2332 if (in_comment) | |
2333 { | |
2334 comment_buf += static_cast<char> (c); | |
2335 beginning_of_comment = false; | |
2336 } | |
2337 else | |
2338 goto cleanup; | |
2339 break; | |
2340 } | |
2341 } | |
2342 | |
2343 xunput (c); | |
2344 return false; | |
2345 | |
2346 cleanup: | |
2347 | |
2348 std::string s = buf.str (); | |
2349 | |
2350 int len = s.length (); | |
2351 while (len--) | |
2352 xunput (s[len]); | |
2353 | |
2354 return false; | |
2355 } | |
2356 | |
2357 // We have seen a '.' and need to see if it is the start of a | |
2358 // continuation. If so, this eats it, up to and including the new | |
2359 // line character. | |
2360 | |
2361 bool | |
2362 octave_base_lexer::have_ellipsis_continuation (bool trailing_comments_ok) | |
2363 { | |
2364 char c1 = text_yyinput (); | |
2365 if (c1 == '.') | |
2366 { | |
2367 char c2 = text_yyinput (); | |
2368 if (c2 == '.' && have_continuation (trailing_comments_ok)) | |
2369 return true; | |
2370 else | |
2371 { | |
2372 xunput (c2); | |
2373 xunput (c1); | |
2374 } | |
2375 } | |
2376 else | |
2377 xunput (c1); | |
2378 | |
2379 return false; | |
2380 } | |
2381 | |
2382 int | |
2383 octave_base_lexer::handle_string (char delim) | |
2384 { | |
2385 std::ostringstream buf; | |
2386 | |
2387 int bos_line = input_line_number; | |
2388 int bos_col = current_input_column; | |
2389 | |
2390 int c; | |
2391 int escape_pending = 0; | |
2392 | |
2393 while ((c = text_yyinput ()) != EOF) | |
2394 { | |
2395 current_input_column++; | |
2396 | |
2397 if (c == '\\') | |
2398 { | |
2399 if (delim == '\'' || escape_pending) | |
2400 { | |
2401 buf << static_cast<char> (c); | |
2402 escape_pending = 0; | |
2403 } | |
2404 else | |
2405 { | |
2406 if (have_continuation (false)) | |
2407 escape_pending = 0; | |
2408 else | |
2409 { | |
2410 buf << static_cast<char> (c); | |
2411 escape_pending = 1; | |
2412 } | |
2413 } | |
2414 continue; | |
2415 } | |
2416 else if (c == '.') | |
2417 { | |
2418 if (delim == '\'' || ! have_ellipsis_continuation (false)) | |
2419 buf << static_cast<char> (c); | |
2420 } | |
2421 else if (c == '\n') | |
2422 { | |
2423 error ("unterminated string constant"); | |
2424 break; | |
2425 } | |
2426 else if (c == delim) | |
2427 { | |
2428 if (escape_pending) | |
2429 buf << static_cast<char> (c); | |
2430 else | |
2431 { | |
2432 c = text_yyinput (); | |
2433 if (c == delim) | |
2434 { | |
2435 buf << static_cast<char> (c); | |
2436 } | |
2437 else | |
2438 { | |
2439 std::string s; | |
2440 xunput (c); | |
2441 | |
2442 if (delim == '\'') | |
2443 s = buf.str (); | |
2444 else | |
2445 s = do_string_escapes (buf.str ()); | |
2446 | |
2447 if (delim == '"') | |
2448 gripe_matlab_incompatible ("\" used as string delimiter"); | |
2449 else if (delim == '\'') | |
2450 gripe_single_quote_string (); | |
2451 | |
2452 looking_for_object_index = true; | |
2453 at_beginning_of_statement = false; | |
2454 | |
2455 int tok = delim == '"' ? DQ_STRING : SQ_STRING; | |
2456 | |
2457 push_token (new token (tok, s, bos_line, bos_col)); | |
2458 | |
2459 return tok; | |
2460 } | |
2461 } | |
2462 } | |
2463 else | |
2464 { | |
2465 buf << static_cast<char> (c); | |
2466 } | |
2467 | |
2468 escape_pending = 0; | |
2469 } | |
2470 | |
2471 return LEXICAL_ERROR; | |
2472 } | |
2473 | |
2474 int | 2549 int |
2475 octave_base_lexer::handle_close_bracket (int bracket_type) | 2550 octave_base_lexer::handle_close_bracket (int bracket_type) |
2476 { | 2551 { |
2477 int retval = bracket_type; | 2552 int retval = bracket_type; |
2478 | 2553 |
2636 // The call to is_keyword_token set at_beginning_of_statement. | 2711 // The call to is_keyword_token set at_beginning_of_statement. |
2637 | 2712 |
2638 return kw_token; | 2713 return kw_token; |
2639 } | 2714 } |
2640 | 2715 |
2641 // Find the token in the symbol table. Beware the magic | 2716 // Find the token in the symbol table. |
2642 // transformation of the end keyword... | |
2643 | |
2644 if (tok == "end") | |
2645 tok = "__end__"; | |
2646 | 2717 |
2647 symbol_table::scope_id sid = symtab_context.curr_scope (); | 2718 symbol_table::scope_id sid = symtab_context.curr_scope (); |
2648 | 2719 |
2649 token *tok_val = new token (NAME, &(symbol_table::insert (tok, sid)), | 2720 token *tok_val = new token (NAME, &(symbol_table::insert (tok, sid)), |
2650 input_line_number, current_input_column); | 2721 input_line_number, current_input_column); |
2667 | 2738 |
2668 push_token (tok_val); | 2739 push_token (tok_val); |
2669 | 2740 |
2670 current_input_column += flex_yyleng (); | 2741 current_input_column += flex_yyleng (); |
2671 | 2742 |
2672 if (tok != "__end__") | 2743 // The magic end index can't be indexed. |
2744 | |
2745 if (tok != "end") | |
2673 looking_for_object_index = true; | 2746 looking_for_object_index = true; |
2674 | 2747 |
2675 at_beginning_of_statement = false; | 2748 at_beginning_of_statement = false; |
2676 | 2749 |
2677 return NAME; | 2750 return NAME; |
2978 | 3051 |
2979 case LINE_COMMENT_START: | 3052 case LINE_COMMENT_START: |
2980 std::cerr << "LINE_COMMENT_START" << std::endl; | 3053 std::cerr << "LINE_COMMENT_START" << std::endl; |
2981 break; | 3054 break; |
2982 | 3055 |
3056 case DQ_STRING_START: | |
3057 std::cerr << "DQ_STRING_START" << std::endl; | |
3058 break; | |
3059 | |
3060 case SQ_STRING_START: | |
3061 std::cerr << "SQ_STRING_START" << std::endl; | |
3062 break; | |
3063 | |
2983 default: | 3064 default: |
2984 std::cerr << "UNKNOWN START STATE!" << std::endl; | 3065 std::cerr << "UNKNOWN START STATE!" << std::endl; |
2985 break; | 3066 break; |
2986 } | 3067 } |
2987 } | 3068 } |