comparison libinterp/parse-tree/lex.ll @ 17577:c702371ff6df classdef

maint: periodic merge of default to classdef
author John W. Eaton <jwe@octave.org>
date Sat, 05 Oct 2013 11:22:09 -0400
parents 498b2dd1bd56 f0edd6c752e9
children 93b3d03b05e7
comparison
equal deleted inserted replaced
17414:20d1b911b4e7 17577:c702371ff6df
48 48
49 %x INPUT_FILE_START 49 %x INPUT_FILE_START
50 50
51 %x BLOCK_COMMENT_START 51 %x BLOCK_COMMENT_START
52 %x LINE_COMMENT_START 52 %x LINE_COMMENT_START
53
54 %x DQ_STRING_START
55 %x SQ_STRING_START
53 56
54 %{ 57 %{
55 58
56 #include <cctype> 59 #include <cctype>
57 #include <cstring> 60 #include <cstring>
203 } \ 206 } \
204 } \ 207 } \
205 } \ 208 } \
206 while (0) 209 while (0)
207 210
211 // We can't rely on the trick used elsewhere of sticking ASCII 1 in
212 // the input buffer and recognizing it as a special case because ASCII
213 // 1 is a valid character for a character string. If we are at the
214 // end of the buffer, ask for more input. If we are at the end of the
215 // file, deal with it. Otherwise, just keep going with the text from
216 // the current buffer.
217 #define HANDLE_STRING_CONTINUATION \
218 do \
219 { \
220 curr_lexer->decrement_promptflag (); \
221 curr_lexer->input_line_number++; \
222 curr_lexer->current_input_column = 1; \
223 \
224 if (curr_lexer->is_push_lexer ()) \
225 { \
226 if (curr_lexer->at_end_of_buffer ()) \
227 return -1; \
228 \
229 if (curr_lexer->at_end_of_file ()) \
230 return curr_lexer->handle_end_of_input (); \
231 } \
232 } \
233 while (0)
234
235
208 static bool Vdisplay_tokens = false; 236 static bool Vdisplay_tokens = false;
209 237
210 static unsigned int Vtoken_count = 0; 238 static unsigned int Vtoken_count = 0;
211 239
212 // Internal variable for lexer debugging state. 240 // Internal variable for lexer debugging state.
220 %} 248 %}
221 249
222 D [0-9] 250 D [0-9]
223 S [ \t] 251 S [ \t]
224 NL ((\n)|(\r)|(\r\n)) 252 NL ((\n)|(\r)|(\r\n))
225 CONT ((\.\.\.)|(\\))
226 Im [iIjJ] 253 Im [iIjJ]
227 CCHAR [#%] 254 CCHAR [#%]
228 IDENT ([_$a-zA-Z][_$a-zA-Z0-9]*) 255 IDENT ([_$a-zA-Z][_$a-zA-Z0-9]*)
229 EXPON ([DdEe][+-]?{D}+) 256 EXPON ([DdEe][+-]?{D}+)
230 NUMBER (({D}+\.?{D}*{EXPON}?)|(\.{D}+{EXPON}?)|(0[xX][0-9a-fA-F]+)) 257 NUMBER (({D}+\.?{D}*{EXPON}?)|(\.{D}+{EXPON}?)|(0[xX][0-9a-fA-F]+))
288 curr_lexer->lexer_debug ("<COMMAND_START>[\\\"\\']"); 315 curr_lexer->lexer_debug ("<COMMAND_START>[\\\"\\']");
289 316
290 curr_lexer->at_beginning_of_statement = false; 317 curr_lexer->at_beginning_of_statement = false;
291 318
292 curr_lexer->current_input_column++; 319 curr_lexer->current_input_column++;
293 int tok = curr_lexer->handle_string (yytext[0]); 320
294 321 curr_lexer->begin_string (yytext[0] == '"'
295 return curr_lexer->count_token_internal (tok); 322 ? DQ_STRING_START : SQ_STRING_START);
296 } 323 }
297 324
298 <COMMAND_START>[^#% \t\r\n\;\,\"\'][^ \t\r\n\;\,]*{S}* { 325 <COMMAND_START>[^#% \t\r\n\;\,\"\'][^ \t\r\n\;\,]*{S}* {
299 curr_lexer->lexer_debug ("<COMMAND_START>[^#% \\t\\r\\n\\;\\,\\\"\\'][^ \\t\\r\\n\\;\\,]*{S}*"); 326 curr_lexer->lexer_debug ("<COMMAND_START>[^#% \\t\\r\\n\\;\\,\\\"\\'][^ \\t\\r\\n\\;\\,]*{S}*");
300 327
447 // after a block of full-line comments, finish the full line comment 474 // after a block of full-line comments, finish the full line comment
448 // block. 475 // block.
449 %} 476 %}
450 477
451 ^{S}*{CCHAR}\{{S}*{NL} { 478 ^{S}*{CCHAR}\{{S}*{NL} {
452 curr_lexer->lexer_debug ("^{S}*{CCHAR}\{{S}*{NL}"); 479 curr_lexer->lexer_debug ("^{S}*{CCHAR}\\{{S}*{NL}");
453 480
454 yyless (0); 481 yyless (0);
455 482
456 if (curr_lexer->start_state () == LINE_COMMENT_START) 483 if (curr_lexer->start_state () == LINE_COMMENT_START)
457 { 484 {
466 curr_lexer->push_start_state (BLOCK_COMMENT_START); 493 curr_lexer->push_start_state (BLOCK_COMMENT_START);
467 494
468 } 495 }
469 496
470 <BLOCK_COMMENT_START>^{S}*{CCHAR}\{{S}*{NL} { 497 <BLOCK_COMMENT_START>^{S}*{CCHAR}\{{S}*{NL} {
471 curr_lexer->lexer_debug ("<BLOCK_COMMENT_START>^{S}*{CCHAR}\{{S}*{NL}"); 498 curr_lexer->lexer_debug ("<BLOCK_COMMENT_START>^{S}*{CCHAR}\\{{S}*{NL}");
472 499
473 curr_lexer->input_line_number++; 500 curr_lexer->input_line_number++;
474 curr_lexer->current_input_column = 1; 501 curr_lexer->current_input_column = 1;
475 502
476 if (curr_lexer->block_comment_nesting_level) 503 if (curr_lexer->block_comment_nesting_level)
617 curr_lexer->xunput (yytext[0]); 644 curr_lexer->xunput (yytext[0]);
618 645
619 curr_lexer->finish_comment (octave_comment_elt::full_line); 646 curr_lexer->finish_comment (octave_comment_elt::full_line);
620 647
621 curr_lexer->pop_start_state (); 648 curr_lexer->pop_start_state ();
649 }
650
651 %{
652 // Double-quoted character strings.
653 %}
654
655 <DQ_STRING_START>\"\" {
656 curr_lexer->lexer_debug ("<DQ_STRING_START>\\\"\\\"");
657
658 curr_lexer->current_input_column += yyleng;
659 curr_lexer->string_text += '"';
660 }
661
662 <DQ_STRING_START>\" {
663 curr_lexer->lexer_debug ("<DQ_STRING_START>\\\"");
664
665 curr_lexer->current_input_column++;
666
667 curr_lexer->pop_start_state ();
668
669 curr_lexer->looking_for_object_index = true;
670 curr_lexer->at_beginning_of_statement = false;
671
672 curr_lexer->push_token (new token (DQ_STRING,
673 curr_lexer->string_text,
674 curr_lexer->string_line,
675 curr_lexer->string_column));
676
677 curr_lexer->string_text = "";
678
679 return curr_lexer->count_token_internal (DQ_STRING);
680 }
681
682 <DQ_STRING_START>\\[0-7]{1,3} {
683 curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\[0-7]{1,3}");
684
685 curr_lexer->current_input_column += yyleng;
686
687 int result;
688 sscanf (yytext+1, "%o", &result);
689
690 if (result > 0xff)
691 error ("invalid octal escape sequence in character string");
692 else
693 curr_lexer->string_text += static_cast<unsigned char> (result);
694 }
695
696 <DQ_STRING_START>\\x[0-9a-fA-F]+ {
697 curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\x[0-9a-fA-F]+");
698
699 curr_lexer->current_input_column += yyleng;
700
701 int result;
702 sscanf (yytext+2, "%x", &result);
703
704 // Truncate the value silently instead of checking the range like
705 // we do for octal above. This is to match C/C++ where any number
706 // of digits is allowed but the value is implementation-defined if
707 // it exceeds the range of the character type.
708 curr_lexer->string_text += static_cast<unsigned char> (result);
709 }
710
711 <DQ_STRING_START>"\\a" {
712 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\a\"");
713
714 curr_lexer->current_input_column += yyleng;
715 curr_lexer->string_text += '\a';
716 }
717
718 <DQ_STRING_START>"\\b" {
719 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\b\"");
720
721 curr_lexer->current_input_column += yyleng;
722 curr_lexer->string_text += '\b';
723 }
724
725 <DQ_STRING_START>"\\f" {
726 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\f\"");
727
728 curr_lexer->current_input_column += yyleng;
729 curr_lexer->string_text += '\f';
730 }
731
732 <DQ_STRING_START>"\\n" {
733 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\n\"");
734
735 curr_lexer->current_input_column += yyleng;
736 curr_lexer->string_text += '\n';
737 }
738
739 <DQ_STRING_START>"\\r" {
740 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\r\"");
741
742 curr_lexer->current_input_column += yyleng;
743 curr_lexer->string_text += '\r';
744 }
745
746 <DQ_STRING_START>"\\t" {
747 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\t\"");
748
749 curr_lexer->current_input_column += yyleng;
750 curr_lexer->string_text += '\t';
751 }
752
753 <DQ_STRING_START>"\\v" {
754 curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\v\"");
755
756 curr_lexer->current_input_column += yyleng;
757 curr_lexer->string_text += '\v';
758 }
759
760 <DQ_STRING_START>(\.\.\.){S}*{NL} |
761 <DQ_STRING_START>(\.\.\.){S}*{CCHAR}.*{NL} {
762 curr_lexer->lexer_debug ("<DQ_STRING_START>(\\.\\.\\.){S}*{NL}|<DQ_STRING_START>(\\.\\.\\.){S}*{CCHAR}.*{NL}");
763
764 static const char *msg = "'...' continuations in double-quoted character strings are obsolete and will not be allowed in a future version of Octave; please use '\\' instead";
765
766 std::string nm = curr_lexer->fcn_file_full_name;
767
768 if (nm.empty ())
769 warning_with_id ("Octave:deprecated-syntax", "%s", msg);
770 else
771 warning_with_id ("Octave:deprecated-syntax",
772 "%s; near line %d of file '%s'", msg,
773 curr_lexer->input_line_number, nm.c_str ());
774
775 HANDLE_STRING_CONTINUATION;
776 }
777
778 <DQ_STRING_START>\\{S}+{NL} |
779 <DQ_STRING_START>\\{S}*{CCHAR}.*{NL} {
780 curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\{S}+{NL}|<DQ_STRING_START>\\\\{S}*{CCHAR}.*{NL}");
781
782 static const char *msg = "white space and comments after continuation markers in double-quoted character strings are obsolete and will not be allowed in a future version of Octave";
783
784 std::string nm = curr_lexer->fcn_file_full_name;
785
786 if (nm.empty ())
787 warning_with_id ("Octave:deprecated-syntax", "%s", msg);
788 else
789 warning_with_id ("Octave:deprecated-syntax",
790 "%s; near line %d of file '%s'", msg,
791 curr_lexer->input_line_number, nm.c_str ());
792
793 HANDLE_STRING_CONTINUATION;
794 }
795
796 <DQ_STRING_START>\\{NL} {
797 curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\{NL}");
798
799 HANDLE_STRING_CONTINUATION;
800 }
801
802 <DQ_STRING_START>\\. {
803 curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\.");
804
805 curr_lexer->current_input_column += yyleng;
806 curr_lexer->string_text += yytext[1];
807 }
808
809 <DQ_STRING_START>\. {
810 curr_lexer->lexer_debug ("<DQ_STRING_START>\\.");
811
812 curr_lexer->current_input_column++;
813 curr_lexer->string_text += yytext[0];
814 }
815
816 <DQ_STRING_START>[^\.\\\r\n\"]+ {
817 curr_lexer->lexer_debug ("<DQ_STRING_START>[^\\.\\\\\\r\\n\\\"]+");
818
819 curr_lexer->current_input_column += yyleng;
820 curr_lexer->string_text += yytext;
821 }
822
823 <DQ_STRING_START>{NL} {
824 curr_lexer->lexer_debug ("<DQ_STRING_START>{NL}");
825
826 curr_lexer->input_line_number++;
827 curr_lexer->current_input_column = 1;
828
829 error ("unterminated character string constant");
830
831 return LEXICAL_ERROR;
832 }
833
834 %{
835 // Single-quoted character strings.
836 %}
837
838 <SQ_STRING_START>\'\' {
839 curr_lexer->lexer_debug ("<SQ_STRING_START>\\'\\'");
840
841 curr_lexer->current_input_column += yyleng;
842 curr_lexer->string_text += '\'';
843 }
844
845 <SQ_STRING_START>\' {
846 curr_lexer->lexer_debug ("<SQ_STRING_START>\\'");
847
848 curr_lexer->current_input_column++;
849
850 curr_lexer->pop_start_state ();
851
852 curr_lexer->looking_for_object_index = true;
853 curr_lexer->at_beginning_of_statement = false;
854
855 curr_lexer->push_token (new token (SQ_STRING,
856 curr_lexer->string_text,
857 curr_lexer->string_line,
858 curr_lexer->string_column));
859
860 curr_lexer->string_text = "";
861
862 return curr_lexer->count_token_internal (SQ_STRING);
863 }
864
865 <SQ_STRING_START>[^\'\n\r]+ {
866 curr_lexer->lexer_debug ("<SQ_STRING_START>[^\\'\\n\\r]+");
867
868 curr_lexer->current_input_column += yyleng;
869 curr_lexer->string_text += yytext;
870 }
871
872 <SQ_STRING_START>{NL} {
873 curr_lexer->lexer_debug ("<SQ_STRING_START>{NL}");
874
875 curr_lexer->input_line_number++;
876 curr_lexer->current_input_column = 1;
877
878 error ("unterminated character string constant");
879
880 return LEXICAL_ERROR;
622 } 881 }
623 882
624 %{ 883 %{
625 // Imaginary numbers. 884 // Imaginary numbers.
626 %} 885 %}
659 // the constant. 918 // the constant.
660 %} 919 %}
661 920
662 {D}+/\.[\*/\\^\'] | 921 {D}+/\.[\*/\\^\'] |
663 {NUMBER} { 922 {NUMBER} {
664 curr_lexer->lexer_debug ("{D}+/\\.[\\*/\\^\\']|{NUMBER}"); 923 curr_lexer->lexer_debug ("{D}+/\\.[\\*/\\\\^\\']|{NUMBER}");
665 924
666 if (curr_lexer->previous_token_may_be_command () 925 if (curr_lexer->previous_token_may_be_command ()
667 && curr_lexer->space_follows_previous_token ()) 926 && curr_lexer->space_follows_previous_token ())
668 { 927 {
669 yyless (0); 928 yyless (0);
699 958
700 curr_lexer->mark_previous_token_trailing_space (); 959 curr_lexer->mark_previous_token_trailing_space ();
701 } 960 }
702 961
703 %{ 962 %{
704 // Continuation lines. Allow comments after continuations. 963 // Continuation lines. Allow arbitrary text after continuations.
705 %} 964 %}
706 965
707 {CONT}{S}*{NL} | 966 \.\.\..*{NL} {
708 {CONT}{S}*{CCHAR}.*{NL} { 967 curr_lexer->lexer_debug ("\\.\\.\\..*{NL}");
709 curr_lexer->lexer_debug ("{CONT}{S}*{NL}|{CONT}{S}*{CCHAR}.*{NL}"); 968
969 curr_lexer->handle_continuation ();
970 }
971
972 %{
973 // Deprecated C preprocessor style continuation markers.
974 %}
975
976 \\{S}*{NL} |
977 \\{S}*{CCHAR}.*{NL} {
978 curr_lexer->lexer_debug ("\\\\{S}*{NL}|\\\\{S}*{CCHAR}.*{NL}");
979
980 static const char *msg = "using continuation marker \\ outside of double quoted strings is deprecated and will be removed in a future version of Octave";
981
982 std::string nm = curr_lexer->fcn_file_full_name;
983
984 if (nm.empty ())
985 warning_with_id ("Octave:deprecated-syntax", "%s", msg);
986 else
987 warning_with_id ("Octave:deprecated-syntax",
988 "%s; near line %d of file '%s'", msg,
989 curr_lexer->input_line_number, nm.c_str ());
710 990
711 curr_lexer->handle_continuation (); 991 curr_lexer->handle_continuation ();
712 } 992 }
713 993
714 %{ 994 %{
865 curr_lexer->lexer_debug ("'"); 1145 curr_lexer->lexer_debug ("'");
866 1146
867 if (curr_lexer->previous_token_may_be_command () 1147 if (curr_lexer->previous_token_may_be_command ()
868 && curr_lexer->space_follows_previous_token ()) 1148 && curr_lexer->space_follows_previous_token ())
869 { 1149 {
870 yyless (0); 1150 curr_lexer->current_input_column++;
871 curr_lexer->push_start_state (COMMAND_START); 1151 curr_lexer->push_start_state (COMMAND_START);
1152 curr_lexer->begin_string (SQ_STRING_START);
872 } 1153 }
873 else if (curr_lexer->at_beginning_of_statement) 1154 else if (curr_lexer->at_beginning_of_statement)
874 { 1155 {
875 curr_lexer->current_input_column++; 1156 curr_lexer->current_input_column++;
876 int retval = curr_lexer->handle_string ('\''); 1157 curr_lexer->begin_string (SQ_STRING_START);
877 return curr_lexer->count_token_internal (retval);
878 } 1158 }
879 else 1159 else
880 { 1160 {
881 int tok = curr_lexer->previous_token_value (); 1161 int tok = curr_lexer->previous_token_value ();
882 1162
886 { 1166 {
887 if (tok == '[' || tok == '{' 1167 if (tok == '[' || tok == '{'
888 || curr_lexer->previous_token_is_binop ()) 1168 || curr_lexer->previous_token_is_binop ())
889 { 1169 {
890 curr_lexer->current_input_column++; 1170 curr_lexer->current_input_column++;
891 int retval = curr_lexer->handle_string ('\''); 1171 curr_lexer->begin_string (SQ_STRING_START);
892 return curr_lexer->count_token_internal (retval);
893 } 1172 }
894 else 1173 else
895 { 1174 {
896 yyless (0); 1175 yyless (0);
897 curr_lexer->xunput (','); 1176 curr_lexer->xunput (',');
904 if (tok == '[' || tok == '{' 1183 if (tok == '[' || tok == '{'
905 || curr_lexer->previous_token_is_binop () 1184 || curr_lexer->previous_token_is_binop ()
906 || curr_lexer->previous_token_is_keyword ()) 1185 || curr_lexer->previous_token_is_keyword ())
907 { 1186 {
908 curr_lexer->current_input_column++; 1187 curr_lexer->current_input_column++;
909 int retval = curr_lexer->handle_string ('\''); 1188 curr_lexer->begin_string (SQ_STRING_START);
910 return curr_lexer->count_token_internal (retval);
911 } 1189 }
912 else 1190 else
913 return curr_lexer->count_token (HERMITIAN); 1191 return curr_lexer->count_token (HERMITIAN);
914 } 1192 }
915 } 1193 }
918 if (! tok || tok == '[' || tok == '{' || tok == '(' 1196 if (! tok || tok == '[' || tok == '{' || tok == '('
919 || curr_lexer->previous_token_is_binop () 1197 || curr_lexer->previous_token_is_binop ()
920 || curr_lexer->previous_token_is_keyword ()) 1198 || curr_lexer->previous_token_is_keyword ())
921 { 1199 {
922 curr_lexer->current_input_column++; 1200 curr_lexer->current_input_column++;
923 int retval = curr_lexer->handle_string ('\''); 1201 curr_lexer->begin_string (SQ_STRING_START);
924 return curr_lexer->count_token_internal (retval);
925 } 1202 }
926 else 1203 else
927 return curr_lexer->count_token (HERMITIAN); 1204 return curr_lexer->count_token (HERMITIAN);
928 } 1205 }
929 } 1206 }
932 %{ 1209 %{
933 // Double quotes always begin strings. 1210 // Double quotes always begin strings.
934 %} 1211 %}
935 1212
936 \" { 1213 \" {
937 curr_lexer->lexer_debug ("\""); 1214 curr_lexer->lexer_debug ("\\\"");
938 1215
939 if (curr_lexer->previous_token_may_be_command () 1216 if (curr_lexer->previous_token_may_be_command ()
940 && curr_lexer->space_follows_previous_token ()) 1217 && curr_lexer->space_follows_previous_token ())
941 { 1218 {
942 yyless (0); 1219 curr_lexer->current_input_column++;
943 curr_lexer->push_start_state (COMMAND_START); 1220 curr_lexer->push_start_state (COMMAND_START);
1221 curr_lexer->begin_string (DQ_STRING_START);
944 } 1222 }
945 else 1223 else
946 { 1224 {
947 int tok = curr_lexer->previous_token_value (); 1225 int tok = curr_lexer->previous_token_value ();
948 1226
952 { 1230 {
953 if (tok == '[' || tok == '{' 1231 if (tok == '[' || tok == '{'
954 || curr_lexer->previous_token_is_binop ()) 1232 || curr_lexer->previous_token_is_binop ())
955 { 1233 {
956 curr_lexer->current_input_column++; 1234 curr_lexer->current_input_column++;
957 int retval = curr_lexer->handle_string ('"'); 1235 curr_lexer->begin_string (DQ_STRING_START);
958 return curr_lexer->count_token_internal (retval);
959 } 1236 }
960 else 1237 else
961 { 1238 {
962 yyless (0); 1239 yyless (0);
963 curr_lexer->xunput (','); 1240 curr_lexer->xunput (',');
966 } 1243 }
967 } 1244 }
968 else 1245 else
969 { 1246 {
970 curr_lexer->current_input_column++; 1247 curr_lexer->current_input_column++;
971 int retval = curr_lexer->handle_string ('"'); 1248 curr_lexer->begin_string (DQ_STRING_START);
972 return curr_lexer->count_token_internal (retval);
973 } 1249 }
974 } 1250 }
975 else 1251 else
976 { 1252 {
977 curr_lexer->current_input_column++; 1253 curr_lexer->current_input_column++;
978 int retval = curr_lexer->handle_string ('"'); 1254 curr_lexer->begin_string (DQ_STRING_START);
979 return curr_lexer->count_token_internal (retval);
980 } 1255 }
981 } 1256 }
982 } 1257 }
983 1258
984 %{ 1259 %{
1555 block_comment_nesting_level = 0; 1830 block_comment_nesting_level = 0;
1556 token_count = 0; 1831 token_count = 0;
1557 current_input_line = ""; 1832 current_input_line = "";
1558 comment_text = ""; 1833 comment_text = "";
1559 help_text = ""; 1834 help_text = "";
1835 string_text = "";
1836 string_line = 0;
1837 string_column = 0;
1560 fcn_file_name = ""; 1838 fcn_file_name = "";
1561 fcn_file_full_name = ""; 1839 fcn_file_full_name = "";
1562 looking_at_object_index.clear (); 1840 looking_at_object_index.clear ();
1563 looking_at_object_index.push_front (false); 1841 looking_at_object_index.push_front (false);
1564 1842
1769 reading_script_file = true; 2047 reading_script_file = true;
1770 2048
1771 push_start_state (INPUT_FILE_START); 2049 push_start_state (INPUT_FILE_START);
1772 } 2050 }
1773 2051
2052 void
2053 octave_base_lexer::begin_string (int state)
2054 {
2055 string_line = input_line_number;
2056 string_column = current_input_column;
2057
2058 push_start_state (state);
2059 }
2060
1774 int 2061 int
1775 octave_base_lexer::handle_end_of_input (void) 2062 octave_base_lexer::handle_end_of_input (void)
1776 { 2063 {
1777 lexer_debug ("<<EOF>>"); 2064 lexer_debug ("<<EOF>>");
1778 2065
2257 comment_text = ""; 2544 comment_text = "";
2258 2545
2259 at_beginning_of_statement = true; 2546 at_beginning_of_statement = true;
2260 } 2547 }
2261 2548
2262 // We have seen a backslash and need to find out if it should be
2263 // treated as a continuation character. If so, this eats it, up to
2264 // and including the new line character.
2265 //
2266 // Match whitespace only, followed by a comment character or newline.
2267 // Once a comment character is found, discard all input until newline.
2268 // If non-whitespace characters are found before comment
2269 // characters, return 0. Otherwise, return 1.
2270
2271 // FIXME -- we need to handle block comments here.
2272
2273 bool
2274 octave_base_lexer::have_continuation (bool trailing_comments_ok)
2275 {
2276 std::ostringstream buf;
2277
2278 std::string comment_buf;
2279
2280 bool in_comment = false;
2281 bool beginning_of_comment = false;
2282
2283 int c = 0;
2284
2285 while ((c = text_yyinput ()) != EOF)
2286 {
2287 buf << static_cast<char> (c);
2288
2289 switch (c)
2290 {
2291 case ' ':
2292 case '\t':
2293 if (in_comment)
2294 {
2295 comment_buf += static_cast<char> (c);
2296 beginning_of_comment = false;
2297 }
2298 break;
2299
2300 case '%':
2301 case '#':
2302 if (trailing_comments_ok)
2303 {
2304 if (in_comment)
2305 {
2306 if (! beginning_of_comment)
2307 comment_buf += static_cast<char> (c);
2308 }
2309 else
2310 {
2311 maybe_gripe_matlab_incompatible_comment (c);
2312 in_comment = true;
2313 beginning_of_comment = true;
2314 }
2315 }
2316 else
2317 goto cleanup;
2318 break;
2319
2320 case '\n':
2321 if (in_comment)
2322 {
2323 comment_buf += static_cast<char> (c);
2324 octave_comment_buffer::append (comment_buf);
2325 }
2326 current_input_column = 0;
2327 decrement_promptflag ();
2328 gripe_matlab_incompatible_continuation ();
2329 return true;
2330
2331 default:
2332 if (in_comment)
2333 {
2334 comment_buf += static_cast<char> (c);
2335 beginning_of_comment = false;
2336 }
2337 else
2338 goto cleanup;
2339 break;
2340 }
2341 }
2342
2343 xunput (c);
2344 return false;
2345
2346 cleanup:
2347
2348 std::string s = buf.str ();
2349
2350 int len = s.length ();
2351 while (len--)
2352 xunput (s[len]);
2353
2354 return false;
2355 }
2356
2357 // We have seen a '.' and need to see if it is the start of a
2358 // continuation. If so, this eats it, up to and including the new
2359 // line character.
2360
2361 bool
2362 octave_base_lexer::have_ellipsis_continuation (bool trailing_comments_ok)
2363 {
2364 char c1 = text_yyinput ();
2365 if (c1 == '.')
2366 {
2367 char c2 = text_yyinput ();
2368 if (c2 == '.' && have_continuation (trailing_comments_ok))
2369 return true;
2370 else
2371 {
2372 xunput (c2);
2373 xunput (c1);
2374 }
2375 }
2376 else
2377 xunput (c1);
2378
2379 return false;
2380 }
2381
2382 int
2383 octave_base_lexer::handle_string (char delim)
2384 {
2385 std::ostringstream buf;
2386
2387 int bos_line = input_line_number;
2388 int bos_col = current_input_column;
2389
2390 int c;
2391 int escape_pending = 0;
2392
2393 while ((c = text_yyinput ()) != EOF)
2394 {
2395 current_input_column++;
2396
2397 if (c == '\\')
2398 {
2399 if (delim == '\'' || escape_pending)
2400 {
2401 buf << static_cast<char> (c);
2402 escape_pending = 0;
2403 }
2404 else
2405 {
2406 if (have_continuation (false))
2407 escape_pending = 0;
2408 else
2409 {
2410 buf << static_cast<char> (c);
2411 escape_pending = 1;
2412 }
2413 }
2414 continue;
2415 }
2416 else if (c == '.')
2417 {
2418 if (delim == '\'' || ! have_ellipsis_continuation (false))
2419 buf << static_cast<char> (c);
2420 }
2421 else if (c == '\n')
2422 {
2423 error ("unterminated string constant");
2424 break;
2425 }
2426 else if (c == delim)
2427 {
2428 if (escape_pending)
2429 buf << static_cast<char> (c);
2430 else
2431 {
2432 c = text_yyinput ();
2433 if (c == delim)
2434 {
2435 buf << static_cast<char> (c);
2436 }
2437 else
2438 {
2439 std::string s;
2440 xunput (c);
2441
2442 if (delim == '\'')
2443 s = buf.str ();
2444 else
2445 s = do_string_escapes (buf.str ());
2446
2447 if (delim == '"')
2448 gripe_matlab_incompatible ("\" used as string delimiter");
2449 else if (delim == '\'')
2450 gripe_single_quote_string ();
2451
2452 looking_for_object_index = true;
2453 at_beginning_of_statement = false;
2454
2455 int tok = delim == '"' ? DQ_STRING : SQ_STRING;
2456
2457 push_token (new token (tok, s, bos_line, bos_col));
2458
2459 return tok;
2460 }
2461 }
2462 }
2463 else
2464 {
2465 buf << static_cast<char> (c);
2466 }
2467
2468 escape_pending = 0;
2469 }
2470
2471 return LEXICAL_ERROR;
2472 }
2473
2474 int 2549 int
2475 octave_base_lexer::handle_close_bracket (int bracket_type) 2550 octave_base_lexer::handle_close_bracket (int bracket_type)
2476 { 2551 {
2477 int retval = bracket_type; 2552 int retval = bracket_type;
2478 2553
2636 // The call to is_keyword_token set at_beginning_of_statement. 2711 // The call to is_keyword_token set at_beginning_of_statement.
2637 2712
2638 return kw_token; 2713 return kw_token;
2639 } 2714 }
2640 2715
2641 // Find the token in the symbol table. Beware the magic 2716 // Find the token in the symbol table.
2642 // transformation of the end keyword...
2643
2644 if (tok == "end")
2645 tok = "__end__";
2646 2717
2647 symbol_table::scope_id sid = symtab_context.curr_scope (); 2718 symbol_table::scope_id sid = symtab_context.curr_scope ();
2648 2719
2649 token *tok_val = new token (NAME, &(symbol_table::insert (tok, sid)), 2720 token *tok_val = new token (NAME, &(symbol_table::insert (tok, sid)),
2650 input_line_number, current_input_column); 2721 input_line_number, current_input_column);
2667 2738
2668 push_token (tok_val); 2739 push_token (tok_val);
2669 2740
2670 current_input_column += flex_yyleng (); 2741 current_input_column += flex_yyleng ();
2671 2742
2672 if (tok != "__end__") 2743 // The magic end index can't be indexed.
2744
2745 if (tok != "end")
2673 looking_for_object_index = true; 2746 looking_for_object_index = true;
2674 2747
2675 at_beginning_of_statement = false; 2748 at_beginning_of_statement = false;
2676 2749
2677 return NAME; 2750 return NAME;
2978 3051
2979 case LINE_COMMENT_START: 3052 case LINE_COMMENT_START:
2980 std::cerr << "LINE_COMMENT_START" << std::endl; 3053 std::cerr << "LINE_COMMENT_START" << std::endl;
2981 break; 3054 break;
2982 3055
3056 case DQ_STRING_START:
3057 std::cerr << "DQ_STRING_START" << std::endl;
3058 break;
3059
3060 case SQ_STRING_START:
3061 std::cerr << "SQ_STRING_START" << std::endl;
3062 break;
3063
2983 default: 3064 default:
2984 std::cerr << "UNKNOWN START STATE!" << std::endl; 3065 std::cerr << "UNKNOWN START STATE!" << std::endl;
2985 break; 3066 break;
2986 } 3067 }
2987 } 3068 }