comparison libinterp/parse-tree/lex.ll @ 16263:9acb86e6ac90

4/10 commits reworking the lexer
author John W. Eaton <jwe@octave.org>
date Mon, 11 Mar 2013 14:28:11 -0400
parents b45a90cdb0ae
children 6077d13ddb3b 71ee3afedb69
comparison
equal deleted inserted replaced
16261:b45a90cdb0ae 16263:9acb86e6ac90
242 <MATRIX_START>{NL} { 242 <MATRIX_START>{NL} {
243 curr_lexer->lexer_debug ("<MATRIX_START>{NL}"); 243 curr_lexer->lexer_debug ("<MATRIX_START>{NL}");
244 244
245 int tok = curr_lexer->previous_token_value (); 245 int tok = curr_lexer->previous_token_value ();
246 246
247 if (! (tok == ',' || tok == ';' || tok == '[' || tok == '{')) 247 if (! (tok == ';' || tok == '[' || tok == '{'))
248 curr_lexer->xunput (','); 248 curr_lexer->xunput (';');
249 } 249 }
250 250
251 <KLUGE>@ { 251 <KLUGE>@ {
252 curr_lexer->lexer_debug ("<KLUGE>@"); 252 curr_lexer->lexer_debug ("<KLUGE>@");
253 curr_lexer->pop_start_state (); 253 curr_lexer->pop_start_state ();
299 } 299 }
300 300
301 \[ { 301 \[ {
302 curr_lexer->lexer_debug ("\\["); 302 curr_lexer->lexer_debug ("\\[");
303 303
304 curr_lexer->nesting_level.bracket (); 304 bool unput_comma = false;
305 305
306 curr_lexer->looking_at_object_index.push_front (false); 306 if (curr_lexer->whitespace_is_significant ()
307 307 && curr_lexer->space_follows_previous_token ())
308 curr_lexer->current_input_column += yyleng; 308 {
309 curr_lexer->looking_for_object_index = false; 309 int tok = curr_lexer->previous_token_value ();
310 curr_lexer->at_beginning_of_statement = false; 310
311 311 if (! (tok == ';' || tok == ',' || tok == '[' || tok == '{'
312 if (curr_lexer->defining_func 312 || curr_lexer->previous_token_is_binop ()))
313 && ! curr_lexer->parsed_function_name.top ()) 313 unput_comma = true;
314 curr_lexer->looking_at_return_list = true; 314 }
315
316 if (unput_comma)
317 {
318 yyless (0);
319 curr_lexer->xunput (',');
320 }
315 else 321 else
316 curr_lexer->looking_at_matrix_or_assign_lhs = true; 322 {
317 323 curr_lexer->nesting_level.bracket ();
318 curr_lexer->decrement_promptflag (); 324
319 325 curr_lexer->looking_at_object_index.push_front (false);
320 curr_lexer->bracketflag++; 326
321 327 curr_lexer->current_input_column += yyleng;
322 curr_lexer->push_start_state (MATRIX_START); 328 curr_lexer->looking_for_object_index = false;
323 329 curr_lexer->at_beginning_of_statement = false;
324 return curr_lexer->count_token ('['); 330
331 if (curr_lexer->defining_func
332 && ! curr_lexer->parsed_function_name.top ())
333 curr_lexer->looking_at_return_list = true;
334 else
335 curr_lexer->looking_at_matrix_or_assign_lhs = true;
336
337 curr_lexer->decrement_promptflag ();
338
339 curr_lexer->bracketflag++;
340
341 curr_lexer->push_start_state (MATRIX_START);
342
343 return curr_lexer->count_token ('[');
344 }
325 } 345 }
326 346
327 \] { 347 \] {
328 curr_lexer->lexer_debug ("\\]"); 348 curr_lexer->lexer_debug ("\\]");
329 349
483 %} 503 %}
484 504
485 {NUMBER}{Im} { 505 {NUMBER}{Im} {
486 curr_lexer->lexer_debug ("{NUMBER}{Im}"); 506 curr_lexer->lexer_debug ("{NUMBER}{Im}");
487 507
508 int tok = curr_lexer->previous_token_value ();
509
488 if (curr_lexer->whitespace_is_significant () 510 if (curr_lexer->whitespace_is_significant ()
489 && curr_lexer->space_follows_previous_token () 511 && curr_lexer->space_follows_previous_token ()
490 && ! curr_lexer->previous_token_is_binop ()) 512 && ! (tok == '[' || tok == '{'
513 || curr_lexer->previous_token_is_binop ()))
491 { 514 {
492 yyless (0); 515 yyless (0);
493 unput (','); 516 unput (',');
494 } 517 }
495 else 518 else
506 529
507 {D}+/\.[\*/\\^\'] | 530 {D}+/\.[\*/\\^\'] |
508 {NUMBER} { 531 {NUMBER} {
509 curr_lexer->lexer_debug ("{D}+/\\.[\\*/\\^\\']|{NUMBER}"); 532 curr_lexer->lexer_debug ("{D}+/\\.[\\*/\\^\\']|{NUMBER}");
510 533
534 int tok = curr_lexer->previous_token_value ();
535
511 if (curr_lexer->whitespace_is_significant () 536 if (curr_lexer->whitespace_is_significant ()
512 && curr_lexer->space_follows_previous_token () 537 && curr_lexer->space_follows_previous_token ()
513 && ! curr_lexer->previous_token_is_binop ()) 538 && ! (tok == '[' || tok == '{'
539 || curr_lexer->previous_token_is_binop ()))
514 { 540 {
515 yyless (0); 541 yyless (0);
516 unput (','); 542 unput (',');
517 } 543 }
518 else 544 else
569 yyless (0); 595 yyless (0);
570 unput (','); 596 unput (',');
571 } 597 }
572 else 598 else
573 { 599 {
574 if (curr_lexer->previous_token_may_be_command ()) 600 if (! curr_lexer->looking_at_decl_list
601 && curr_lexer->previous_token_may_be_command ())
575 { 602 {
576 yyless (0); 603 yyless (0);
577 curr_lexer->push_start_state (COMMAND_START); 604 curr_lexer->push_start_state (COMMAND_START);
578 } 605 }
579 else 606 else
694 curr_lexer->xunput (','); 721 curr_lexer->xunput (',');
695 } 722 }
696 } 723 }
697 else 724 else
698 { 725 {
699 if (tok == ',' || tok == ';' 726 if (tok == ',' || tok == ';' || tok == '[' || tok == '{'
700 || curr_lexer->previous_token_is_binop ()) 727 || curr_lexer->previous_token_is_binop ())
701 { 728 {
702 curr_lexer->current_input_column++; 729 curr_lexer->current_input_column++;
703 int retval = curr_lexer->handle_string ('\''); 730 int retval = curr_lexer->handle_string ('\'');
704 return curr_lexer->count_token_internal (retval); 731 return curr_lexer->count_token_internal (retval);
734 761
735 if (curr_lexer->whitespace_is_significant ()) 762 if (curr_lexer->whitespace_is_significant ())
736 { 763 {
737 if (curr_lexer->space_follows_previous_token ()) 764 if (curr_lexer->space_follows_previous_token ())
738 { 765 {
739 if (tok == '[' || tok == '{' 766 if (tok == ',' || tok == ';' || tok == '[' || tok == '{'
740 || curr_lexer->previous_token_is_binop ()) 767 || curr_lexer->previous_token_is_binop ())
741 { 768 {
742 curr_lexer->current_input_column++; 769 curr_lexer->current_input_column++;
743 int retval = curr_lexer->handle_string ('"'); 770 int retval = curr_lexer->handle_string ('"');
744 return curr_lexer->count_token_internal (retval); 771 return curr_lexer->count_token_internal (retval);
783 ">=" { return curr_lexer->handle_op (">=", EXPR_GE); } 810 ">=" { return curr_lexer->handle_op (">=", EXPR_GE); }
784 "&" { return curr_lexer->handle_op ("&", EXPR_AND); } 811 "&" { return curr_lexer->handle_op ("&", EXPR_AND); }
785 "|" { return curr_lexer->handle_op ("|", EXPR_OR); } 812 "|" { return curr_lexer->handle_op ("|", EXPR_OR); }
786 "<" { return curr_lexer->handle_op ("<", EXPR_LT); } 813 "<" { return curr_lexer->handle_op ("<", EXPR_LT); }
787 ">" { return curr_lexer->handle_op (">", EXPR_GT); } 814 ">" { return curr_lexer->handle_op (">", EXPR_GT); }
788 "+" { return curr_lexer->handle_op ("+", '+'); }
789 "-" { return curr_lexer->handle_op ("-", '-'); }
790 "*" { return curr_lexer->handle_op ("*", '*'); } 815 "*" { return curr_lexer->handle_op ("*", '*'); }
791 "/" { return curr_lexer->handle_op ("/", '/'); } 816 "/" { return curr_lexer->handle_op ("/", '/'); }
792 "\\" { return curr_lexer->handle_op ("\\", LEFTDIV); } 817 "\\" { return curr_lexer->handle_op ("\\", LEFTDIV); }
793 "^" { return curr_lexer->handle_op ("^", POW); } 818 "^" { return curr_lexer->handle_op ("^", POW); }
794 "**" { return curr_lexer->handle_incompatible_op ("**", POW); } 819 "**" { return curr_lexer->handle_incompatible_op ("**", POW); }
795 "&&" { return curr_lexer->handle_op ("&&", EXPR_AND_AND); } 820 "&&" { return curr_lexer->handle_op ("&&", EXPR_AND_AND); }
796 "||" { return curr_lexer->handle_op ("||", EXPR_OR_OR); } 821 "||" { return curr_lexer->handle_op ("||", EXPR_OR_OR); }
797 "<<" { return curr_lexer->handle_incompatible_op ("<<", LSHIFT); } 822 "<<" { return curr_lexer->handle_incompatible_op ("<<", LSHIFT); }
798 ">>" { return curr_lexer->handle_incompatible_op (">>", RSHIFT); } 823 ">>" { return curr_lexer->handle_incompatible_op (">>", RSHIFT); }
799 "~" { return curr_lexer->handle_op ("~", EXPR_NOT); }
800 "!" { return curr_lexer->handle_incompatible_op ("!", EXPR_NOT); }
801 ";" { return curr_lexer->handle_op (";", ';', true, true); } 824 ";" { return curr_lexer->handle_op (";", ';', true, true); }
825
826 "+" {
827 int tok = curr_lexer->handle_unary_op ("+", '+');
828
829 if (tok < 0)
830 {
831 yyless (0);
832 curr_lexer->xunput (',');
833 }
834 else
835 return tok;
836 }
837
838 "-" {
839 int prev_tok = curr_lexer->previous_token_value ();
840 bool space_before = curr_lexer->space_follows_previous_token ();
841 int c = curr_lexer->text_yyinput ();
842 curr_lexer->xunput (c);
843 bool space_after = (c == ' ' || c == '\t');
844
845 if (space_before && ! space_after
846 && curr_lexer->previous_token_may_be_command ())
847 {
848 yyless (0);
849 curr_lexer->push_start_state (COMMAND_START);
850 }
851 else
852 {
853 int tok = curr_lexer->handle_unary_op ("-", '-');
854
855 if (tok < 0)
856 {
857 yyless (0);
858 curr_lexer->xunput (',');
859 }
860 else
861 return tok;
862 }
863 }
864
865 "~" {
866 int tok = curr_lexer->handle_unary_op ("~", EXPR_NOT);
867
868 if (tok < 0)
869 {
870 yyless (0);
871 curr_lexer->xunput (',');
872 }
873 else
874 return tok;
875 }
876
877 "!" {
878 int tok = curr_lexer->handle_incompatible_unary_op ("!", EXPR_NOT);
879
880 if (tok < 0)
881 {
882 yyless (0);
883 curr_lexer->xunput (',');
884 }
885 else
886 return tok;
887 }
802 888
803 "," { 889 "," {
804 return curr_lexer->handle_op 890 return curr_lexer->handle_op
805 (",", ',', true, ! curr_lexer->looking_at_object_index.front ()); 891 (",", ',', true, ! curr_lexer->looking_at_object_index.front ());
806 } 892 }
808 ".'" { 894 ".'" {
809 return curr_lexer->handle_op (".'", TRANSPOSE, true, false); 895 return curr_lexer->handle_op (".'", TRANSPOSE, true, false);
810 } 896 }
811 897
812 "++" { 898 "++" {
813 return curr_lexer->handle_incompatible_op 899 int tok = curr_lexer->handle_incompatible_unary_op
814 ("++", PLUS_PLUS, true, false, true); 900 ("++", PLUS_PLUS, true, false, true);
901
902 if (tok < 0)
903 {
904 yyless (0);
905 curr_lexer->xunput (',');
906 }
907 else
908 return tok;
815 } 909 }
816 910
817 "--" { 911 "--" {
818 ; 912 int tok = curr_lexer->handle_incompatible_unary_op
819 return curr_lexer->handle_incompatible_op 913 ("--", MINUS_MINUS, true, false, true);
820 ("--", MINUS_MINUS, true, false, true); 914
915 if (tok < 0)
916 {
917 yyless (0);
918 curr_lexer->xunput (',');
919 }
920 else
921 return tok;
821 } 922 }
822 923
823 "(" { 924 "(" {
824 curr_lexer->lexer_debug ("("); 925 curr_lexer->lexer_debug ("(");
825 926
826 // If we are looking for an object index, then push TRUE for 927 bool unput_comma = false;
827 // looking_at_object_index. Otherwise, just push whatever state 928
828 // is current (so that we can pop it off the stack when we find 929 if (curr_lexer->whitespace_is_significant ()
829 // the matching close paren). 930 && curr_lexer->space_follows_previous_token ())
830 931 {
831 curr_lexer->looking_at_object_index.push_front 932 int tok = curr_lexer->previous_token_value ();
832 (curr_lexer->looking_for_object_index); 933
833 934 if (! (tok == ';' || tok == ',' || tok == '[' || tok == '{'
834 curr_lexer->looking_at_indirect_ref = false; 935 || curr_lexer->previous_token_is_binop ()))
835 curr_lexer->looking_for_object_index = false; 936 unput_comma = true;
836 curr_lexer->at_beginning_of_statement = false; 937 }
837 938
838 curr_lexer->nesting_level.paren (); 939 if (unput_comma)
839 curr_lexer->decrement_promptflag (); 940 {
840 941 yyless (0);
841 return curr_lexer->handle_token ('('); 942 curr_lexer->xunput (',');
943 }
944 else
945 {
946 // If we are looking for an object index, then push TRUE for
947 // looking_at_object_index. Otherwise, just push whatever state
948 // is current (so that we can pop it off the stack when we find
949 // the matching close paren).
950
951 curr_lexer->looking_at_object_index.push_front
952 (curr_lexer->looking_for_object_index);
953
954 curr_lexer->looking_at_indirect_ref = false;
955 curr_lexer->looking_for_object_index = false;
956 curr_lexer->at_beginning_of_statement = false;
957
958 curr_lexer->nesting_level.paren ();
959 curr_lexer->decrement_promptflag ();
960
961 return curr_lexer->handle_token ('(');
962 }
842 } 963 }
843 964
844 ")" { 965 ")" {
845 curr_lexer->lexer_debug (")"); 966 curr_lexer->lexer_debug (")");
846 967
1100 } 1221 }
1101 1222
1102 "{" { 1223 "{" {
1103 curr_lexer->lexer_debug ("{"); 1224 curr_lexer->lexer_debug ("{");
1104 1225
1105 curr_lexer->nesting_level.brace (); 1226 bool unput_comma = false;
1106 1227
1107 curr_lexer->looking_at_object_index.push_front 1228 if (curr_lexer->whitespace_is_significant ()
1108 (curr_lexer->looking_for_object_index); 1229 && curr_lexer->space_follows_previous_token ())
1109 1230 {
1110 curr_lexer->current_input_column += yyleng; 1231 int tok = curr_lexer->previous_token_value ();
1111 curr_lexer->looking_for_object_index = false; 1232
1112 curr_lexer->at_beginning_of_statement = false; 1233 if (! (tok == ';' || tok == ',' || tok == '[' || tok == '{'
1113 1234 || curr_lexer->previous_token_is_binop ()))
1114 curr_lexer->decrement_promptflag (); 1235 unput_comma = true;
1115 1236 }
1116 curr_lexer->braceflag++; 1237
1117 1238 if (unput_comma)
1118 curr_lexer->push_start_state (MATRIX_START); 1239 {
1119 1240 yyless (0);
1120 return curr_lexer->count_token ('{'); 1241 curr_lexer->xunput (',');
1242 }
1243 else
1244 {
1245 curr_lexer->nesting_level.brace ();
1246
1247 curr_lexer->looking_at_object_index.push_front
1248 (curr_lexer->looking_for_object_index);
1249
1250 curr_lexer->current_input_column += yyleng;
1251 curr_lexer->looking_for_object_index = false;
1252 curr_lexer->at_beginning_of_statement = false;
1253
1254 curr_lexer->decrement_promptflag ();
1255
1256 curr_lexer->braceflag++;
1257
1258 curr_lexer->push_start_state (MATRIX_START);
1259
1260 return curr_lexer->count_token ('{');
1261 }
1121 } 1262 }
1122 1263
1123 "}" { 1264 "}" {
1124 curr_lexer->lexer_debug ("}"); 1265 curr_lexer->lexer_debug ("}");
1125 1266
1900 "the 'static' keyword is obsolete and will be removed from a future version of Octave; please use 'persistent' instead; near line %d", 2041 "the 'static' keyword is obsolete and will be removed from a future version of Octave; please use 'persistent' instead; near line %d",
1901 input_line_number); 2042 input_line_number);
1902 // fall through ... 2043 // fall through ...
1903 2044
1904 case persistent_kw: 2045 case persistent_kw:
2046 case global_kw:
2047 looking_at_decl_list = true;
1905 break; 2048 break;
1906 2049
1907 case case_kw: 2050 case case_kw:
1908 case elseif_kw: 2051 case elseif_kw:
1909 case global_kw:
1910 case until_kw: 2052 case until_kw:
1911 break; 2053 break;
1912 2054
1913 case end_kw: 2055 case end_kw:
1914 if (inside_any_object_index () 2056 if (inside_any_object_index ()
3257 3399
3258 int 3400 int
3259 octave_lexer::handle_op (const char *pattern, int tok, bool convert, 3401 octave_lexer::handle_op (const char *pattern, int tok, bool convert,
3260 bool bos, bool qit) 3402 bool bos, bool qit)
3261 { 3403 {
3404 lexer_debug (pattern);
3405
3262 return handle_op_internal (pattern, tok, convert, bos, qit, true); 3406 return handle_op_internal (pattern, tok, convert, bos, qit, true);
3263 } 3407 }
3264 3408
3265 int 3409 int
3266 octave_lexer::handle_incompatible_op (const char *pattern, int tok, 3410 octave_lexer::handle_incompatible_op (const char *pattern, int tok,
3267 bool convert, bool bos, bool qit) 3411 bool convert, bool bos, bool qit)
3268 { 3412 {
3413 lexer_debug (pattern);
3414
3269 return handle_op_internal (pattern, tok, convert, bos, qit, false); 3415 return handle_op_internal (pattern, tok, convert, bos, qit, false);
3416 }
3417
3418 bool
3419 octave_lexer::maybe_unput_comma_before_unary_op (int tok)
3420 {
3421 int prev_tok = previous_token_value ();
3422
3423 bool unput_comma = false;
3424
3425 if (whitespace_is_significant () && space_follows_previous_token ())
3426 {
3427 int c = text_yyinput ();
3428 xunput (c);
3429
3430 bool space_after = (c == ' ' || c == '\t');
3431
3432 if (! (prev_tok == ';' || prev_tok == ','
3433 || prev_tok == '[' || prev_tok == '{'
3434 || previous_token_is_binop ()
3435 || ((tok == '+' || tok == '-') && space_after)))
3436 unput_comma = true;
3437 }
3438
3439 return unput_comma;
3440 }
3441
3442 int
3443 octave_lexer::handle_unary_op (const char *pattern, int tok, bool convert,
3444 bool bos, bool qit)
3445 {
3446 lexer_debug (pattern);
3447
3448 return maybe_unput_comma_before_unary_op (tok)
3449 ? -1 : handle_op_internal (pattern, tok, convert, bos, qit, true);
3450 }
3451
3452 int
3453 octave_lexer::handle_incompatible_unary_op (const char *pattern, int tok,
3454 bool convert, bool bos, bool qit)
3455 {
3456 lexer_debug (pattern);
3457
3458 return maybe_unput_comma_before_unary_op (tok)
3459 ? -1 : handle_op_internal (pattern, tok, convert, bos, qit, false);
3270 } 3460 }
3271 3461
3272 int 3462 int
3273 octave_lexer::handle_assign_op (const char *pattern, int tok) 3463 octave_lexer::handle_assign_op (const char *pattern, int tok)
3274 { 3464 {
3289 3479
3290 int 3480 int
3291 octave_lexer::handle_op_internal (const char *pattern, int tok, bool convert, 3481 octave_lexer::handle_op_internal (const char *pattern, int tok, bool convert,
3292 bool bos, bool qit, bool compat) 3482 bool bos, bool qit, bool compat)
3293 { 3483 {
3294 lexer_debug (pattern);
3295
3296 if (! compat) 3484 if (! compat)
3297 gripe_matlab_incompatible_operator (flex_yytext ()); 3485 gripe_matlab_incompatible_operator (flex_yytext ());
3298 3486
3299 push_token (new token (tok, input_line_number, current_input_column)); 3487 push_token (new token (tok, input_line_number, current_input_column));
3300 3488