comparison libinterp/parse-tree/lex.ll @ 16257:db7f07b22b9b

1/10 commits reworking the lexer
author John W. Eaton <jwe@octave.org>
date Mon, 11 Mar 2013 14:14:41 -0400
parents 12bf6a3f8c45
children 5c32368509a2 0b5ab09dfce4
comparison
equal deleted inserted replaced
16255:12bf6a3f8c45 16257:db7f07b22b9b
48 48
49 %x INPUT_FILE_START 49 %x INPUT_FILE_START
50 50
51 %x BLOCK_COMMENT_START 51 %x BLOCK_COMMENT_START
52 %x LINE_COMMENT_START 52 %x LINE_COMMENT_START
53
54 %x KLUGE
53 55
54 %{ 56 %{
55 57
56 #include <cctype> 58 #include <cctype>
57 #include <cstring> 59 #include <cstring>
222 curr_lexer->at_beginning_of_statement = false; 224 curr_lexer->at_beginning_of_statement = false;
223 225
224 curr_lexer->current_input_column++; 226 curr_lexer->current_input_column++;
225 int tok = curr_lexer->handle_string (yytext[0]); 227 int tok = curr_lexer->handle_string (yytext[0]);
226 228
227 return curr_lexer->count_token (tok); 229 return curr_lexer->count_token_internal (tok);
228 } 230 }
229 231
230 <COMMAND_START>[^#% \t\r\n\;\,\"\'][^ \t\r\n\;\,]*{S}* { 232 <COMMAND_START>[^#% \t\r\n\;\,\"\'][^ \t\r\n\;\,]*{S}* {
231 curr_lexer->lexer_debug ("<COMMAND_START>[^#% \\t\\r\\n\\;\\,\\\"\\'][^ \\t\\r\\n\\;\\,]*{S}*"); 233 curr_lexer->lexer_debug ("<COMMAND_START>[^#% \\t\\r\\n\\;\\,\\\"\\'][^ \\t\\r\\n\\;\\,]*{S}*");
232 234
234 236
235 curr_lexer->looking_for_object_index = false; 237 curr_lexer->looking_for_object_index = false;
236 curr_lexer->at_beginning_of_statement = false; 238 curr_lexer->at_beginning_of_statement = false;
237 239
238 return curr_lexer->handle_token (tok, SQ_STRING); 240 return curr_lexer->handle_token (tok, SQ_STRING);
241 }
242
243 <MATRIX_START>{S}* {
244 curr_lexer->lexer_debug ("<MATRIX_START>{S}*");
245
246 curr_lexer->mark_previous_token_trailing_space ();
247 }
248
249 <MATRIX_START>{NL} {
250 curr_lexer->lexer_debug ("<MATRIX_START>{NL}");
251
252 int tok = curr_lexer->previous_token_value ();
253
254 if (! (tok == ';' || tok == '[' || tok == '{'))
255 curr_lexer->xunput (',');
256 }
257
258 <KLUGE>@ {
259 curr_lexer->lexer_debug ("<KLUGE>@");
260 curr_lexer->pop_start_state ();
261 return curr_lexer->count_token (CHOOSE_ASSIGNMENT);
239 } 262 }
240 263
241 %{ 264 %{
242 // For this and the next two rules, we're looking at ']', and we 265 // For this and the next two rules, we're looking at ']', and we
243 // need to know if the next token is '=' or '=='. 266 // need to know if the next token is '=' or '=='.
250 // after seeing a ']' character... 273 // after seeing a ']' character...
251 274
252 // FIXME -- we need to handle block comments here. 275 // FIXME -- we need to handle block comments here.
253 %} 276 %}
254 277
255 <MATRIX_START>{SNLCMT}*\]{S}* { 278 <MATRIX_START>\] {
256 curr_lexer->lexer_debug ("<MATRIX_START>{SNLCMT}*\\]{S}*"); 279 curr_lexer->lexer_debug ("<MATRIX_START>\\]");
257 280
258 curr_lexer->scan_for_comments (yytext); 281 curr_lexer->scan_for_comments (yytext);
259 curr_lexer->fixup_column_count (yytext); 282 curr_lexer->fixup_column_count (yytext);
260 283
261 curr_lexer->looking_at_object_index.pop_front (); 284 curr_lexer->looking_at_object_index.pop_front ();
266 int c = yytext[yyleng-1]; 289 int c = yytext[yyleng-1];
267 bool cont_is_spc = (curr_lexer->eat_continuation () != octave_lexer::NO_WHITESPACE); 290 bool cont_is_spc = (curr_lexer->eat_continuation () != octave_lexer::NO_WHITESPACE);
268 bool spc_gobbled = (cont_is_spc || c == ' ' || c == '\t'); 291 bool spc_gobbled = (cont_is_spc || c == ' ' || c == '\t');
269 int tok_to_return = curr_lexer->handle_close_bracket (spc_gobbled, ']'); 292 int tok_to_return = curr_lexer->handle_close_bracket (spc_gobbled, ']');
270 293
271 if (spc_gobbled) 294 return curr_lexer->count_token (']');
272 curr_lexer->xunput (' ');
273
274 return curr_lexer->count_token (tok_to_return);
275 } 295 }
276 296
277 %{ 297 %{
278 // FIXME -- we need to handle block comments here. 298 // FIXME -- we need to handle block comments here.
279 %} 299 %}
280 300
281 <MATRIX_START>{SNLCMT}*\}{S}* { 301 <MATRIX_START>\} {
282 curr_lexer->lexer_debug ("<MATRIX_START>{SNLCMT}*\\}{S}*"); 302 curr_lexer->lexer_debug ("<MATRIX_START>\\}*");
283 303
284 curr_lexer->scan_for_comments (yytext); 304 curr_lexer->scan_for_comments (yytext);
285 curr_lexer->fixup_column_count (yytext); 305 curr_lexer->fixup_column_count (yytext);
286 306
287 curr_lexer->looking_at_object_index.pop_front (); 307 curr_lexer->looking_at_object_index.pop_front ();
292 int c = yytext[yyleng-1]; 312 int c = yytext[yyleng-1];
293 bool cont_is_spc = (curr_lexer->eat_continuation () != octave_lexer::NO_WHITESPACE); 313 bool cont_is_spc = (curr_lexer->eat_continuation () != octave_lexer::NO_WHITESPACE);
294 bool spc_gobbled = (cont_is_spc || c == ' ' || c == '\t'); 314 bool spc_gobbled = (cont_is_spc || c == ' ' || c == '\t');
295 int tok_to_return = curr_lexer->handle_close_bracket (spc_gobbled, '}'); 315 int tok_to_return = curr_lexer->handle_close_bracket (spc_gobbled, '}');
296 316
297 if (spc_gobbled) 317 return curr_lexer->count_token ('}');
298 curr_lexer->xunput (' '); 318 }
299 319
300 return curr_lexer->count_token (tok_to_return); 320 \[ {
301 } 321 curr_lexer->lexer_debug ("\\[");
302
303 %{
304 // Commas are element separators in matrix constants. If we don't
305 // check for continuations here we can end up inserting too many
306 // commas.
307 %}
308
309 <MATRIX_START>{S}*\,{S}* {
310 curr_lexer->lexer_debug ("<MATRIX_START>{S}*\\,{S}*");
311
312 curr_lexer->current_input_column += yyleng;
313
314 int tmp = curr_lexer->eat_continuation ();
315
316 curr_lexer->quote_is_transpose = false;
317 curr_lexer->convert_spaces_to_comma = true;
318 curr_lexer->looking_for_object_index = false;
319 curr_lexer->at_beginning_of_statement = false;
320
321 if (! curr_lexer->looking_at_object_index.front ())
322 {
323 if ((tmp & octave_lexer::NEWLINE) == octave_lexer::NEWLINE)
324 {
325 curr_lexer->maybe_warn_separator_insert (';');
326
327 curr_lexer->xunput (';');
328 }
329 }
330
331 return curr_lexer->count_token (',');
332 }
333
334 %{
335 // In some cases, spaces in matrix constants can turn into commas.
336 // If commas are required, spaces are not important in matrix
337 // constants so we just eat them. If we don't check for continuations
338 // here we can end up inserting too many commas.
339 %}
340
341 <MATRIX_START>{S}+ {
342 curr_lexer->lexer_debug ("<MATRIX_START>{S}+");
343
344 curr_lexer->current_input_column += yyleng;
345
346 curr_lexer->at_beginning_of_statement = false;
347
348 int tmp = curr_lexer->eat_continuation ();
349
350 if (! curr_lexer->looking_at_object_index.front ())
351 {
352 bool bin_op = curr_lexer->next_token_is_bin_op (true);
353 bool postfix_un_op = curr_lexer->next_token_is_postfix_unary_op (true);
354 bool sep_op = curr_lexer->next_token_is_sep_op ();
355
356 if (! (postfix_un_op || bin_op || sep_op)
357 && curr_lexer->nesting_level.is_bracket_or_brace ()
358 && curr_lexer->convert_spaces_to_comma)
359 {
360 if ((tmp & octave_lexer::NEWLINE) == octave_lexer::NEWLINE)
361 {
362 curr_lexer->maybe_warn_separator_insert (';');
363
364 curr_lexer->xunput (';');
365 }
366
367 curr_lexer->quote_is_transpose = false;
368 curr_lexer->convert_spaces_to_comma = true;
369 curr_lexer->looking_for_object_index = false;
370
371 curr_lexer->maybe_warn_separator_insert (',');
372
373 return curr_lexer->count_token (',');
374 }
375 }
376 }
377
378 %{
379 // Semicolons are handled as row seprators in matrix constants. If we
380 // don't eat whitespace here we can end up inserting too many
381 // semicolons.
382
383 // FIXME -- we need to handle block comments here.
384 %}
385
386 <MATRIX_START>{SNLCMT}*;{SNLCMT}* {
387 curr_lexer->lexer_debug ("<MATRIX_START>{SNLCMT}*;{SNLCMT}*");
388
389 curr_lexer->scan_for_comments (yytext);
390 curr_lexer->fixup_column_count (yytext);
391 curr_lexer->eat_whitespace ();
392
393 curr_lexer->quote_is_transpose = false;
394 curr_lexer->convert_spaces_to_comma = true;
395 curr_lexer->looking_for_object_index = false;
396 curr_lexer->at_beginning_of_statement = false;
397
398 return curr_lexer->count_token (';');
399 }
400
401 %{
402 // In some cases, new lines can also become row separators. If we
403 // don't eat whitespace here we can end up inserting too many
404 // semicolons.
405
406 // FIXME -- we need to handle block comments here.
407 %}
408
409 <MATRIX_START>{S}*{COMMENT}{SNLCMT}* |
410 <MATRIX_START>{S}*{NL}{SNLCMT}* {
411 curr_lexer->lexer_debug ("<MATRIX_START>{S}*{COMMENT}{SNLCMT}*|<MATRIX_START>{S}*{NL}{SNLCMT}*");
412
413 curr_lexer->scan_for_comments (yytext);
414 curr_lexer->fixup_column_count (yytext);
415 curr_lexer->eat_whitespace ();
416
417 curr_lexer->quote_is_transpose = false;
418 curr_lexer->convert_spaces_to_comma = true;
419 curr_lexer->at_beginning_of_statement = false;
420
421 if (curr_lexer->nesting_level.none ())
422 return LEXICAL_ERROR;
423
424 if (! curr_lexer->looking_at_object_index.front ()
425 && curr_lexer->nesting_level.is_bracket_or_brace ())
426 {
427 curr_lexer->maybe_warn_separator_insert (';');
428
429 return curr_lexer->count_token (';');
430 }
431 }
432
433 \[{S}* {
434 curr_lexer->lexer_debug ("\\[{S}*");
435 322
436 curr_lexer->nesting_level.bracket (); 323 curr_lexer->nesting_level.bracket ();
437 324
438 curr_lexer->looking_at_object_index.push_front (false); 325 curr_lexer->looking_at_object_index.push_front (false);
439 326
448 curr_lexer->looking_at_return_list = true; 335 curr_lexer->looking_at_return_list = true;
449 else 336 else
450 curr_lexer->looking_at_matrix_or_assign_lhs = true; 337 curr_lexer->looking_at_matrix_or_assign_lhs = true;
451 338
452 curr_lexer->decrement_promptflag (); 339 curr_lexer->decrement_promptflag ();
453 curr_lexer->eat_whitespace ();
454 340
455 curr_lexer->bracketflag++; 341 curr_lexer->bracketflag++;
456 342
457 curr_lexer->push_start_state (MATRIX_START); 343 curr_lexer->push_start_state (MATRIX_START);
458 344
618 %} 504 %}
619 505
620 {NUMBER}{Im} { 506 {NUMBER}{Im} {
621 curr_lexer->lexer_debug ("{NUMBER}{Im}"); 507 curr_lexer->lexer_debug ("{NUMBER}{Im}");
622 508
623 curr_lexer->handle_number (); 509 if (curr_lexer->whitespace_is_significant ()
624 return curr_lexer->count_token (IMAG_NUM); 510 && curr_lexer->space_follows_previous_token ()
511 && ! curr_lexer->previous_token_is_binop ())
512 {
513 yyless (0);
514 unput (',');
515 }
516 else
517 {
518 curr_lexer->handle_number ();
519 return curr_lexer->count_token_internal (IMAG_NUM);
520 }
625 } 521 }
626 522
627 %{ 523 %{
628 // Real numbers. Don't grab the '.' part of a dot operator as part of 524 // Real numbers. Don't grab the '.' part of a dot operator as part of
629 // the constant. 525 // the constant.
630 %} 526 %}
631 527
632 {D}+/\.[\*/\\^\'] | 528 {D}+/\.[\*/\\^\'] |
633 {NUMBER} { 529 {NUMBER} {
634 curr_lexer->lexer_debug ("{D}+/\\.[\\*/\\^\\']|{NUMBER}"); 530 curr_lexer->lexer_debug ("{D}+/\\.[\\*/\\^\\']|{NUMBER}");
635 curr_lexer->handle_number (); 531
636 return curr_lexer->count_token (NUM); 532 if (curr_lexer->whitespace_is_significant ()
533 && curr_lexer->space_follows_previous_token ()
534 && ! curr_lexer->previous_token_is_binop ())
535 {
536 yyless (0);
537 unput (',');
538 }
539 else
540 {
541 curr_lexer->handle_number ();
542 return curr_lexer->count_token_internal (NUM);
543 }
637 } 544 }
638 545
639 %{ 546 %{
640 // Eat whitespace. Whitespace inside matrix constants is handled by 547 // Eat whitespace. Whitespace inside matrix constants is handled by
641 // the <MATRIX_START> start state code above. 548 // the <MATRIX_START> start state code above.
669 %{ 576 %{
670 // Identifiers. Truncate the token at the first space or tab but 577 // Identifiers. Truncate the token at the first space or tab but
671 // don't write directly on yytext. 578 // don't write directly on yytext.
672 %} 579 %}
673 580
674 {IDENT}{S}* { 581 {IDENT} {
675 curr_lexer->lexer_debug ("{IDENT}{S}*"); 582 curr_lexer->lexer_debug ("{IDENT}");
676 583
677 int id_tok = curr_lexer->handle_identifier (); 584 if (curr_lexer->whitespace_is_significant ()
678 585 && curr_lexer->space_follows_previous_token ()
679 if (id_tok >= 0) 586 && ! curr_lexer->previous_token_is_binop ())
680 return curr_lexer->count_token (id_tok); 587 {
588 yyless (0);
589 unput (',');
590 }
591 else
592 {
593 if (curr_lexer->previous_token_may_be_command ())
594 {
595 yyless (0);
596 curr_lexer->push_start_state (COMMAND_START);
597 }
598 else
599 {
600 int id_tok = curr_lexer->handle_identifier ();
601
602 if (id_tok >= 0)
603 return curr_lexer->count_token_internal (id_tok);
604 }
605 }
681 } 606 }
682 607
683 %{ 608 %{
684 // Superclass method identifiers. 609 // Superclass method identifiers.
685 %} 610 %}
692 617
693 if (id_tok >= 0) 618 if (id_tok >= 0)
694 { 619 {
695 curr_lexer->looking_for_object_index = true; 620 curr_lexer->looking_for_object_index = true;
696 621
697 return curr_lexer->count_token (SUPERCLASSREF); 622 return curr_lexer->count_token_internal (SUPERCLASSREF);
698 } 623 }
699 } 624 }
700 625
701 %{ 626 %{
702 // Metaclass query 627 // Metaclass query
710 635
711 if (id_tok >= 0) 636 if (id_tok >= 0)
712 { 637 {
713 curr_lexer->looking_for_object_index = true; 638 curr_lexer->looking_for_object_index = true;
714 639
715 return curr_lexer->count_token (METAQUERY); 640 return curr_lexer->count_token_internal (METAQUERY);
716 } 641 }
717 } 642 }
718 643
719 %{ 644 %{
720 // Function handles and superclass references 645 // Function handles and superclass references
771 696
772 "'" { 697 "'" {
773 curr_lexer->lexer_debug ("'"); 698 curr_lexer->lexer_debug ("'");
774 699
775 curr_lexer->current_input_column++; 700 curr_lexer->current_input_column++;
776 curr_lexer->convert_spaces_to_comma = true; 701
777 702 int tok = curr_lexer->previous_token_value ();
778 if (curr_lexer->quote_is_transpose) 703
779 { 704 bool transpose = false;
780 curr_lexer->do_comma_insert_check (); 705
781 return curr_lexer->count_token (QUOTE); 706 if (curr_lexer->whitespace_is_significant ())
707 {
708 if (curr_lexer->space_follows_previous_token ())
709 {
710 if (tok == '[' || tok == '{'
711 || curr_lexer->previous_token_is_binop ())
712 {
713 int retval = curr_lexer->handle_string ('\'');
714 return curr_lexer->count_token_internal (retval);
715 }
716 else
717 {
718 yyless (0);
719 curr_lexer->xunput (',');
720 }
721 }
722 else
723 {
724 if (tok == ',' || tok == ';'
725 || curr_lexer->previous_token_is_binop ())
726 {
727 int retval = curr_lexer->handle_string ('\'');
728 return curr_lexer->count_token_internal (retval);
729 }
730 else
731 return curr_lexer->count_token (QUOTE);
732 }
782 } 733 }
783 else 734 else
784 { 735 {
785 int tok = curr_lexer->handle_string ('\''); 736 if (tok == NAME || tok == NUM || tok == IMAG_NUM
786 return curr_lexer->count_token (tok); 737 || tok == ')' || tok == ']' || tok == '}')
738 return curr_lexer->count_token (QUOTE);
739 else
740 {
741 int retval = curr_lexer->handle_string ('\'');
742 return curr_lexer->count_token_internal (retval);
743 }
787 } 744 }
788 } 745 }
789 746
790 %{ 747 %{
791 // Double quotes always begin strings. 748 // Double quotes always begin strings.
795 curr_lexer->lexer_debug ("\""); 752 curr_lexer->lexer_debug ("\"");
796 753
797 curr_lexer->current_input_column++; 754 curr_lexer->current_input_column++;
798 int tok = curr_lexer->handle_string ('"'); 755 int tok = curr_lexer->handle_string ('"');
799 756
800 return curr_lexer->count_token (tok); 757 return curr_lexer->count_token_internal (tok);
801 } 758 }
802 759
803 %{ 760 %{
804 // Other operators. 761 // Other operators.
805 %} 762 %}
826 "*" { return curr_lexer->handle_op ("*", '*'); } 783 "*" { return curr_lexer->handle_op ("*", '*'); }
827 "/" { return curr_lexer->handle_op ("/", '/'); } 784 "/" { return curr_lexer->handle_op ("/", '/'); }
828 "\\" { return curr_lexer->handle_op ("\\", LEFTDIV); } 785 "\\" { return curr_lexer->handle_op ("\\", LEFTDIV); }
829 "^" { return curr_lexer->handle_op ("^", POW); } 786 "^" { return curr_lexer->handle_op ("^", POW); }
830 "**" { return curr_lexer->handle_incompatible_op ("**", POW); } 787 "**" { return curr_lexer->handle_incompatible_op ("**", POW); }
831 "=" { return curr_lexer->handle_op ("=", '=', true, false); }
832 "&&" { return curr_lexer->handle_op ("&&", EXPR_AND_AND); } 788 "&&" { return curr_lexer->handle_op ("&&", EXPR_AND_AND); }
833 "||" { return curr_lexer->handle_op ("||", EXPR_OR_OR); } 789 "||" { return curr_lexer->handle_op ("||", EXPR_OR_OR); }
834 "<<" { return curr_lexer->handle_incompatible_op ("<<", LSHIFT); } 790 "<<" { return curr_lexer->handle_incompatible_op ("<<", LSHIFT); }
835 ">>" { return curr_lexer->handle_incompatible_op (">>", RSHIFT); } 791 ">>" { return curr_lexer->handle_incompatible_op (">>", RSHIFT); }
836 "~" { return curr_lexer->handle_op ("~", EXPR_NOT); } 792 "~" { return curr_lexer->handle_op ("~", EXPR_NOT); }
912 868
913 return curr_lexer->handle_token ('.'); 869 return curr_lexer->handle_token ('.');
914 } 870 }
915 871
916 %{ 872 %{
917 // op= operators. 873 // = and op= operators.
918 %} 874 %}
919 875
920 "+=" { return curr_lexer->handle_incompatible_op ("+=", ADD_EQ); } 876 "=" {
921 "-=" { return curr_lexer->handle_incompatible_op ("-=", SUB_EQ); } 877 int tok = curr_lexer->handle_assign_op ("=", '=');
922 "*=" { return curr_lexer->handle_incompatible_op ("*=", MUL_EQ); } 878 if (tok < 0)
923 "/=" { return curr_lexer->handle_incompatible_op ("/=", DIV_EQ); } 879 {
924 "\\=" { return curr_lexer->handle_incompatible_op ("\\=", LEFTDIV_EQ); } 880 yyless (0);
925 ".+=" { return curr_lexer->handle_incompatible_op (".+=", ADD_EQ); } 881 curr_lexer->xunput ('@');
926 ".-=" { return curr_lexer->handle_incompatible_op (".-=", SUB_EQ); } 882 curr_lexer->push_start_state (KLUGE);
927 ".*=" { return curr_lexer->handle_incompatible_op (".*=", EMUL_EQ); } 883 }
928 "./=" { return curr_lexer->handle_incompatible_op ("./=", EDIV_EQ); } 884 else
929 ".\\=" { return curr_lexer->handle_incompatible_op (".\\=", ELEFTDIV_EQ); } 885 return tok;
930 "^=" { return curr_lexer->handle_incompatible_op ("^=", POW_EQ); } 886 }
931 "**=" { return curr_lexer->handle_incompatible_op ("^=", POW_EQ); } 887
932 ".^=" { return curr_lexer->handle_incompatible_op (".^=", EPOW_EQ); } 888 "+=" {
933 ".**=" { return curr_lexer->handle_incompatible_op (".^=", EPOW_EQ); } 889 int tok = curr_lexer->handle_incompatible_assign_op ("+=", ADD_EQ);
934 "&=" { return curr_lexer->handle_incompatible_op ("&=", AND_EQ); } 890 if (tok < 0)
935 "|=" { return curr_lexer->handle_incompatible_op ("|=", OR_EQ); } 891 {
936 "<<=" { return curr_lexer->handle_incompatible_op ("<<=", LSHIFT_EQ); } 892 yyless (0);
937 ">>=" { return curr_lexer->handle_incompatible_op (">>=", RSHIFT_EQ); } 893 curr_lexer->xunput ('@');
938 894 curr_lexer->push_start_state (KLUGE);
939 \{{S}* { 895 }
940 curr_lexer->lexer_debug ("\\{{S}*"); 896 else
897 return tok;
898 }
899
900 "-=" {
901 int tok = curr_lexer->handle_incompatible_assign_op ("-=", SUB_EQ);
902 if (tok < 0)
903 {
904 yyless (0);
905 curr_lexer->xunput ('@');
906 curr_lexer->push_start_state (KLUGE);
907 }
908 else
909 return tok;
910 }
911
912 "*=" {
913 int tok = curr_lexer->handle_incompatible_assign_op ("*=", MUL_EQ);
914 if (tok < 0)
915 {
916 yyless (0);
917 curr_lexer->xunput ('@');
918 curr_lexer->push_start_state (KLUGE);
919 }
920 else
921 return tok;
922 }
923
924 "/=" {
925 int tok = curr_lexer->handle_incompatible_assign_op ("/=", DIV_EQ);
926 if (tok < 0)
927 {
928 yyless (0);
929 curr_lexer->xunput ('@');
930 curr_lexer->push_start_state (KLUGE);
931 }
932 else
933 return tok;
934 }
935
936 "\\=" {
937 int tok = curr_lexer->handle_incompatible_assign_op ("\\=", LEFTDIV_EQ);
938 if (tok < 0)
939 {
940 yyless (0);
941 curr_lexer->xunput ('@');
942 curr_lexer->push_start_state (KLUGE);
943 }
944 else
945 return tok;
946 }
947
948 ".+=" {
949 int tok = curr_lexer->handle_incompatible_assign_op (".+=", ADD_EQ);
950 if (tok < 0)
951 {
952 yyless (0);
953 curr_lexer->xunput ('@');
954 curr_lexer->push_start_state (KLUGE);
955 }
956 else
957 return tok;
958 }
959
960 ".-=" {
961 int tok = curr_lexer->handle_incompatible_assign_op (".-=", SUB_EQ);
962 if (tok < 0)
963 {
964 yyless (0);
965 curr_lexer->xunput ('@');
966 curr_lexer->push_start_state (KLUGE);
967 }
968 else
969 return tok;
970 }
971
972 ".*=" {
973 int tok = curr_lexer->handle_incompatible_assign_op (".*=", EMUL_EQ);
974 if (tok < 0)
975 {
976 yyless (0);
977 curr_lexer->xunput ('@');
978 curr_lexer->push_start_state (KLUGE);
979 }
980 else
981 return tok;
982 }
983
984 "./=" {
985 int tok = curr_lexer->handle_incompatible_assign_op ("./=", EDIV_EQ);
986 if (tok < 0)
987 {
988 yyless (0);
989 curr_lexer->xunput ('@');
990 curr_lexer->push_start_state (KLUGE);
991 }
992 else
993 return tok;
994 }
995
996 ".\\=" {
997 int tok = curr_lexer->handle_incompatible_assign_op (".\\=", ELEFTDIV_EQ);
998 if (tok < 0)
999 {
1000 yyless (0);
1001 curr_lexer->xunput ('@');
1002 curr_lexer->push_start_state (KLUGE);
1003 }
1004 else
1005 return tok;
1006 }
1007
1008 "^=" {
1009 int tok = curr_lexer->handle_incompatible_assign_op ("^=", POW_EQ);
1010 if (tok < 0)
1011 {
1012 yyless (0);
1013 curr_lexer->xunput ('@');
1014 curr_lexer->push_start_state (KLUGE);
1015 }
1016 else
1017 return tok;
1018 }
1019
1020 "**=" {
1021 int tok = curr_lexer->handle_incompatible_assign_op ("^=", POW_EQ);
1022 if (tok < 0)
1023 {
1024 yyless (0);
1025 curr_lexer->xunput ('@');
1026 curr_lexer->push_start_state (KLUGE);
1027 }
1028 else
1029 return tok;
1030 }
1031
1032 ".^=" {
1033 int tok = curr_lexer->handle_incompatible_assign_op (".^=", EPOW_EQ);
1034 if (tok < 0)
1035 {
1036 yyless (0);
1037 curr_lexer->xunput ('@');
1038 curr_lexer->push_start_state (KLUGE);
1039 }
1040 else
1041 return tok;
1042 }
1043
1044 ".**=" {
1045 int tok = curr_lexer->handle_incompatible_assign_op (".^=", EPOW_EQ);
1046 if (tok < 0)
1047 {
1048 yyless (0);
1049 curr_lexer->xunput ('@');
1050 curr_lexer->push_start_state (KLUGE);
1051 }
1052 else
1053 return tok;
1054 }
1055
1056 "&=" {
1057 int tok = curr_lexer->handle_incompatible_assign_op ("&=", AND_EQ);
1058 if (tok < 0)
1059 {
1060 yyless (0);
1061 curr_lexer->xunput ('@');
1062 curr_lexer->push_start_state (KLUGE);
1063 }
1064 else
1065 return tok;
1066 }
1067
1068 "|=" {
1069 int tok = curr_lexer->handle_incompatible_assign_op ("|=", OR_EQ);
1070 if (tok < 0)
1071 {
1072 yyless (0);
1073 curr_lexer->xunput ('@');
1074 curr_lexer->push_start_state (KLUGE);
1075 }
1076 else
1077 return tok;
1078 }
1079
1080 "<<=" {
1081 int tok = curr_lexer->handle_incompatible_assign_op ("<<=", LSHIFT_EQ);
1082 if (tok < 0)
1083 {
1084 yyless (0);
1085 curr_lexer->xunput ('@');
1086 curr_lexer->push_start_state (KLUGE);
1087 }
1088 else
1089 return tok;
1090 }
1091
1092 ">>=" {
1093 int tok = curr_lexer->handle_incompatible_assign_op (">>=", RSHIFT_EQ);
1094 if (tok < 0)
1095 {
1096 yyless (0);
1097 curr_lexer->xunput ('@');
1098 curr_lexer->push_start_state (KLUGE);
1099 }
1100 else
1101 return tok;
1102 }
1103
1104 "{" {
1105 curr_lexer->lexer_debug ("{");
941 1106
942 curr_lexer->nesting_level.brace (); 1107 curr_lexer->nesting_level.brace ();
943 1108
944 curr_lexer->looking_at_object_index.push_front 1109 curr_lexer->looking_at_object_index.push_front
945 (curr_lexer->looking_for_object_index); 1110 (curr_lexer->looking_for_object_index);
1414 { 1579 {
1415 const token *tok = tokens.front (); 1580 const token *tok = tokens.front ();
1416 return tok ? tok->space_follows_token () : false; 1581 return tok ? tok->space_follows_token () : false;
1417 } 1582 }
1418 1583
1584 bool
1585 lexical_feedback::previous_token_is_binop (void) const
1586 {
1587 int tok = previous_token_value ();
1588
1589 return (tok == '+' || tok == '-' || tok == '@'
1590 || tok == ',' || tok == ';' || tok == '*' || tok == '/'
1591 || tok == ':' || tok == '=' || tok == ADD_EQ
1592 || tok == AND_EQ || tok == DIV_EQ || tok == EDIV
1593 || tok == EDIV_EQ || tok == ELEFTDIV || tok == ELEFTDIV_EQ
1594 || tok == EMINUS || tok == EMUL || tok == EMUL_EQ
1595 || tok == EPOW || tok == EPOW_EQ || tok == EXPR_AND
1596 || tok == EXPR_AND_AND || tok == EXPR_EQ || tok == EXPR_GE
1597 || tok == EXPR_GT || tok == EXPR_LE || tok == EXPR_LT
1598 || tok == EXPR_NE || tok == EXPR_NOT || tok == EXPR_OR
1599 || tok == EXPR_OR_OR || tok == LEFTDIV || tok == LEFTDIV_EQ
1600 || tok == LSHIFT || tok == LSHIFT_EQ || tok == MUL_EQ
1601 || tok == OR_EQ || tok == POW || tok == POW_EQ
1602 || tok == RSHIFT || tok == RSHIFT_EQ || tok == SUB_EQ);
1603 }
1604
1605 bool
1606 lexical_feedback::previous_token_may_be_command (void) const
1607 {
1608 const token *tok = tokens.front ();
1609 return tok ? tok->may_be_command () : false;
1610 }
1611
1419 static bool 1612 static bool
1420 looks_like_copyright (const std::string& s) 1613 looks_like_copyright (const std::string& s)
1421 { 1614 {
1422 bool retval = false; 1615 bool retval = false;
1423 1616
2288 xunput (c); 2481 xunput (c);
2289 current_input_column--; 2482 current_input_column--;
2290 return retval; 2483 return retval;
2291 } 2484 }
2292 2485
2486 bool
2487 octave_lexer::whitespace_is_significant (void)
2488 {
2489 return (nesting_level.is_bracket ()
2490 || (nesting_level.is_brace ()
2491 && ! looking_at_object_index.front ()));
2492 }
2493
2293 static inline bool 2494 static inline bool
2294 looks_like_hex (const char *s, int len) 2495 looks_like_hex (const char *s, int len)
2295 { 2496 {
2296 return (len > 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')); 2497 return (len > 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X'));
2297 } 2498 }
2347 octave_lexer::handle_continuation (void) 2548 octave_lexer::handle_continuation (void)
2348 { 2549 {
2349 char *yytxt = flex_yytext (); 2550 char *yytxt = flex_yytext ();
2350 int yylng = flex_yyleng (); 2551 int yylng = flex_yyleng ();
2351 2552
2352 size_t offset = 1; 2553 int offset = 1;
2353 if (yytxt[0] == '\\') 2554 if (yytxt[0] == '\\')
2354 gripe_matlab_incompatible_continuation (); 2555 gripe_matlab_incompatible_continuation ();
2355 else 2556 else
2356 offset = 3; 2557 offset = 3;
2357 2558
2764 panic_impossible (); 2965 panic_impossible ();
2765 } 2966 }
2766 2967
2767 pop_start_state (); 2968 pop_start_state ();
2768 2969
2769 if (bracket_type == ']'
2770 && next_token_is_assign_op ()
2771 && ! looking_at_return_list)
2772 {
2773 retval = CLOSE_BRACE;
2774 }
2775 else if ((bracketflag || braceflag)
2776 && convert_spaces_to_comma
2777 && (nesting_level.is_bracket ()
2778 || (nesting_level.is_brace ()
2779 && ! looking_at_object_index.front ())))
2780 {
2781 bool index_op = next_token_is_index_op ();
2782
2783 // Don't insert comma if we are looking at something like
2784 //
2785 // [x{i}{j}] or [x{i}(j)]
2786 //
2787 // but do if we are looking at
2788 //
2789 // [x{i} {j}] or [x{i} (j)]
2790
2791 if (spc_gobbled || ! (bracket_type == '}' && index_op))
2792 {
2793 bool bin_op = next_token_is_bin_op (spc_gobbled);
2794
2795 bool postfix_un_op = next_token_is_postfix_unary_op (spc_gobbled);
2796
2797 bool sep_op = next_token_is_sep_op ();
2798
2799 if (! (postfix_un_op || bin_op || sep_op))
2800 {
2801 maybe_warn_separator_insert (',');
2802
2803 xunput (',');
2804 return retval;
2805 }
2806 }
2807 }
2808
2809 quote_is_transpose = true; 2970 quote_is_transpose = true;
2810 convert_spaces_to_comma = true; 2971 convert_spaces_to_comma = true;
2811 2972
2812 return retval; 2973 return retval;
2813 } 2974 }
3193 // should be ignored. 3354 // should be ignored.
3194 3355
3195 int 3356 int
3196 octave_lexer::handle_identifier (void) 3357 octave_lexer::handle_identifier (void)
3197 { 3358 {
3198 bool at_bos = at_beginning_of_statement;
3199
3200 char *yytxt = flex_yytext (); 3359 char *yytxt = flex_yytext ();
3201 3360
3202 std::string tok = strip_trailing_whitespace (yytxt); 3361 std::string tok = yytxt;
3203 3362
3204 int c = yytxt[flex_yyleng()-1]; 3363 int c = yytxt[flex_yyleng()-1];
3205 3364
3206 bool cont_is_spc = (eat_continuation () != octave_lexer::NO_WHITESPACE); 3365 bool spc_gobbled = false;
3207
3208 int spc_gobbled = (cont_is_spc || c == ' ' || c == '\t');
3209 3366
3210 // If we are expecting a structure element, avoid recognizing 3367 // If we are expecting a structure element, avoid recognizing
3211 // keywords and other special names and return STRUCT_ELT, which is 3368 // keywords and other special names and return STRUCT_ELT, which is
3212 // a string that is also a valid identifier. But first, we have to 3369 // a string that is also a valid identifier. But first, we have to
3213 // decide whether to insert a comma. 3370 // decide whether to insert a comma.
3214 3371
3215 if (looking_at_indirect_ref) 3372 if (looking_at_indirect_ref)
3216 { 3373 {
3217 do_comma_insert_check (); 3374 // do_comma_insert_check ();
3218 3375
3219 maybe_unput_comma (spc_gobbled); 3376 // maybe_unput_comma (spc_gobbled);
3220 3377
3221 push_token (new token (STRUCT_ELT, tok, input_line_number, 3378 push_token (new token (STRUCT_ELT, tok, input_line_number,
3222 current_input_column)); 3379 current_input_column));
3223 3380
3224 quote_is_transpose = true; 3381 quote_is_transpose = true;
3225 convert_spaces_to_comma = true; 3382 convert_spaces_to_comma = true;
3226 looking_for_object_index = true; 3383 looking_for_object_index = true;
3227 3384
3228 current_input_column += flex_yyleng (); 3385 current_input_column += flex_yyleng ();
3229 3386
3387 at_beginning_of_statement = false;
3388
3230 return STRUCT_ELT; 3389 return STRUCT_ELT;
3231 } 3390 }
3232
3233 at_beginning_of_statement = false;
3234 3391
3235 // The is_keyword_token may reset 3392 // The is_keyword_token may reset
3236 // at_beginning_of_statement. For example, if it sees 3393 // at_beginning_of_statement. For example, if it sees
3237 // an else token, then the next token is at the beginning of a 3394 // an else token, then the next token is at the beginning of a
3238 // statement. 3395 // statement.
3239 3396
3397 // May set begenning_of_statement to true.
3240 int kw_token = is_keyword_token (tok); 3398 int kw_token = is_keyword_token (tok);
3241 3399
3242 // If we found a keyword token, then the beginning_of_statement flag 3400 // If we found a keyword token, then the beginning_of_statement flag
3243 // is already set. Otherwise, we won't be at the beginning of a 3401 // is already set. Otherwise, we won't be at the beginning of a
3244 // statement. 3402 // statement.
3258 3416
3259 current_input_column += flex_yyleng (); 3417 current_input_column += flex_yyleng ();
3260 quote_is_transpose = false; 3418 quote_is_transpose = false;
3261 convert_spaces_to_comma = true; 3419 convert_spaces_to_comma = true;
3262 looking_for_object_index = true; 3420 looking_for_object_index = true;
3421
3422 at_beginning_of_statement = false;
3263 3423
3264 return FCN_HANDLE; 3424 return FCN_HANDLE;
3265 } 3425 }
3266 } 3426 }
3267 3427
3307 // something like [ab,cd] = foo (), force the symbol to be inserted 3467 // something like [ab,cd] = foo (), force the symbol to be inserted
3308 // as a variable in the current symbol table. 3468 // as a variable in the current symbol table.
3309 3469
3310 if (! is_variable (tok)) 3470 if (! is_variable (tok))
3311 { 3471 {
3312 if (at_bos && spc_gobbled && can_be_command (tok) 3472 if (next_tok_is_eq
3313 && looks_like_command_arg ()) 3473 || looking_at_decl_list
3314 { 3474 || looking_at_return_list
3315 push_start_state (COMMAND_START); 3475 || (looking_at_parameter_list
3316 } 3476 && ! looking_at_initializer_expression))
3317 else if (next_tok_is_eq
3318 || looking_at_decl_list
3319 || looking_at_return_list
3320 || (looking_at_parameter_list
3321 && ! looking_at_initializer_expression))
3322 { 3477 {
3323 symbol_table::force_variable (tok); 3478 symbol_table::force_variable (tok);
3324 } 3479 }
3325 else if (looking_at_matrix_or_assign_lhs) 3480 else if (looking_at_matrix_or_assign_lhs)
3326 { 3481 {
3332 // transformation of the end keyword... 3487 // transformation of the end keyword...
3333 3488
3334 if (tok == "end") 3489 if (tok == "end")
3335 tok = "__end__"; 3490 tok = "__end__";
3336 3491
3337 push_token (new token (NAME, &(symbol_table::insert (tok)), 3492 token *tok_val = new token (NAME, &(symbol_table::insert (tok)),
3338 input_line_number, current_input_column)); 3493 input_line_number, current_input_column);
3339 3494
3340 // After seeing an identifer, it is ok to convert spaces to a comma 3495 if (at_beginning_of_statement)
3341 // (if needed). 3496 tok_val->mark_may_be_command ();
3342 3497
3343 convert_spaces_to_comma = true; 3498 push_token (tok_val);
3344
3345 if (! (next_tok_is_eq || start_state () == COMMAND_START))
3346 {
3347 quote_is_transpose = true;
3348
3349 do_comma_insert_check ();
3350
3351 maybe_unput_comma (spc_gobbled);
3352 }
3353 3499
3354 current_input_column += flex_yyleng (); 3500 current_input_column += flex_yyleng ();
3355 3501
3356 if (tok != "__end__") 3502 if (tok != "__end__")
3357 looking_for_object_index = true; 3503 looking_for_object_index = true;
3504
3505 at_beginning_of_statement = false;
3358 3506
3359 return NAME; 3507 return NAME;
3360 } 3508 }
3361 3509
3362 void 3510 void
3553 case PERSISTENT: std::cerr << "PERSISTENT\n"; break; 3701 case PERSISTENT: std::cerr << "PERSISTENT\n"; break;
3554 case FCN_HANDLE: std::cerr << "FCN_HANDLE\n"; break; 3702 case FCN_HANDLE: std::cerr << "FCN_HANDLE\n"; break;
3555 case END_OF_INPUT: std::cerr << "END_OF_INPUT\n\n"; break; 3703 case END_OF_INPUT: std::cerr << "END_OF_INPUT\n\n"; break;
3556 case LEXICAL_ERROR: std::cerr << "LEXICAL_ERROR\n\n"; break; 3704 case LEXICAL_ERROR: std::cerr << "LEXICAL_ERROR\n\n"; break;
3557 case FCN: std::cerr << "FCN\n"; break; 3705 case FCN: std::cerr << "FCN\n"; break;
3558 case CLOSE_BRACE: std::cerr << "CLOSE_BRACE\n"; break; 3706 case CHOOSE_ASSIGNMENT: std::cerr << "CHOOSE_ASSIGNMENT\n"; break;
3559 case INPUT_FILE: std::cerr << "INPUT_FILE\n"; break; 3707 case INPUT_FILE: std::cerr << "INPUT_FILE\n"; break;
3560 case SUPERCLASSREF: std::cerr << "SUPERCLASSREF\n"; break; 3708 case SUPERCLASSREF: std::cerr << "SUPERCLASSREF\n"; break;
3561 case METAQUERY: std::cerr << "METAQUERY\n"; break; 3709 case METAQUERY: std::cerr << "METAQUERY\n"; break;
3562 case GET: std::cerr << "GET\n"; break; 3710 case GET: std::cerr << "GET\n"; break;
3563 case SET: std::cerr << "SET\n"; break; 3711 case SET: std::cerr << "SET\n"; break;
3661 3809
3662 case LINE_COMMENT_START: 3810 case LINE_COMMENT_START:
3663 std::cerr << "LINE_COMMENT_START" << std::endl; 3811 std::cerr << "LINE_COMMENT_START" << std::endl;
3664 break; 3812 break;
3665 3813
3814 case KLUGE:
3815 std::cerr << "KLUGE" << std::endl;
3816 break;
3817
3666 default: 3818 default:
3667 std::cerr << "UNKNOWN START STATE!" << std::endl; 3819 std::cerr << "UNKNOWN START STATE!" << std::endl;
3668 break; 3820 break;
3669 } 3821 }
3670 } 3822 }
3679 int 3831 int
3680 octave_lexer::handle_incompatible_op (const char *pattern, int tok, 3832 octave_lexer::handle_incompatible_op (const char *pattern, int tok,
3681 bool convert, bool bos, bool qit) 3833 bool convert, bool bos, bool qit)
3682 { 3834 {
3683 return handle_op_internal (pattern, tok, convert, bos, qit, false); 3835 return handle_op_internal (pattern, tok, convert, bos, qit, false);
3836 }
3837
3838 int
3839 octave_lexer::handle_assign_op (const char *pattern, int tok)
3840 {
3841 lexer_debug (pattern);
3842
3843 return (previous_token_value_is (']') && looking_at_matrix_or_assign_lhs)
3844 ? -1 : handle_op_internal (pattern, tok, false, false, false, true);
3845 }
3846
3847 int
3848 octave_lexer::handle_incompatible_assign_op (const char *pattern, int tok)
3849 {
3850 lexer_debug (pattern);
3851
3852 return (previous_token_value_is (']') && looking_at_matrix_or_assign_lhs)
3853 ? -1 : handle_op_internal (pattern, tok, false, false, false, false);
3684 } 3854 }
3685 3855
3686 int 3856 int
3687 octave_lexer::handle_op_internal (const char *pattern, int tok, bool convert, 3857 octave_lexer::handle_op_internal (const char *pattern, int tok, bool convert,
3688 bool bos, bool qit, bool compat) 3858 bool bos, bool qit, bool compat)
3722 3892
3723 current_input_column += flex_yyleng (); 3893 current_input_column += flex_yyleng ();
3724 quote_is_transpose = false; 3894 quote_is_transpose = false;
3725 convert_spaces_to_comma = true; 3895 convert_spaces_to_comma = true;
3726 3896
3727 return count_token (tok); 3897 return count_token_internal (tok);
3728 } 3898 }
3729 3899
3730 int 3900 int
3731 octave_lexer::count_token (int tok) 3901 octave_lexer::count_token (int tok)
3902 {
3903 token *tok_val = new token (tok, input_line_number, current_input_column);
3904
3905 push_token (tok_val);
3906
3907 return count_token_internal (tok);
3908 }
3909
3910 int
3911 octave_lexer::count_token_internal (int tok)
3732 { 3912 {
3733 if (tok != '\n') 3913 if (tok != '\n')
3734 { 3914 {
3735 Vtoken_count++; 3915 Vtoken_count++;
3736 token_count++; 3916 token_count++;