comparison libinterp/parse-tree/lex.ll @ 16261:b45a90cdb0ae

3/10 commits reworking the lexer
author John W. Eaton <jwe@octave.org>
date Mon, 11 Mar 2013 14:26:29 -0400
parents 0b5ab09dfce4
children 072c97b0e1c5 9acb86e6ac90
comparison
equal deleted inserted replaced
16259:0b5ab09dfce4 16261:b45a90cdb0ae
147 %} 147 %}
148 148
149 D [0-9] 149 D [0-9]
150 S [ \t] 150 S [ \t]
151 NL ((\n)|(\r)|(\r\n)) 151 NL ((\n)|(\r)|(\r\n))
152 SNL ({S}|{NL}) 152 CONT ((\.\.\.)|(\\))
153 EL (\.\.\.)
154 BS (\\)
155 CONT ({EL}|{BS})
156 Im [iIjJ] 153 Im [iIjJ]
157 CCHAR [#%] 154 CCHAR [#%]
158 COMMENT ({CCHAR}.*{NL})
159 SNLCMT ({SNL}|{COMMENT})
160 IDENT ([_$a-zA-Z][_$a-zA-Z0-9]*) 155 IDENT ([_$a-zA-Z][_$a-zA-Z0-9]*)
161 EXPON ([DdEe][+-]?{D}+) 156 EXPON ([DdEe][+-]?{D}+)
162 NUMBER (({D}+\.?{D}*{EXPON}?)|(\.{D}+{EXPON}?)|(0[xX][0-9a-fA-F]+)) 157 NUMBER (({D}+\.?{D}*{EXPON}?)|(\.{D}+{EXPON}?)|(0[xX][0-9a-fA-F]+))
163 158
164 ANY_INCLUDING_NL (.|{NL}) 159 ANY_INCLUDING_NL (.|{NL})
247 <MATRIX_START>{NL} { 242 <MATRIX_START>{NL} {
248 curr_lexer->lexer_debug ("<MATRIX_START>{NL}"); 243 curr_lexer->lexer_debug ("<MATRIX_START>{NL}");
249 244
250 int tok = curr_lexer->previous_token_value (); 245 int tok = curr_lexer->previous_token_value ();
251 246
252 if (! (tok == ';' || tok == '[' || tok == '{')) 247 if (! (tok == ',' || tok == ';' || tok == '[' || tok == '{'))
253 curr_lexer->xunput (','); 248 curr_lexer->xunput (',');
254 } 249 }
255 250
256 <KLUGE>@ { 251 <KLUGE>@ {
257 curr_lexer->lexer_debug ("<KLUGE>@"); 252 curr_lexer->lexer_debug ("<KLUGE>@");
593 588
594 %{ 589 %{
595 // Superclass method identifiers. 590 // Superclass method identifiers.
596 %} 591 %}
597 592
598 {IDENT}@{IDENT}{S}* | 593 {IDENT}@{IDENT} |
599 {IDENT}@{IDENT}.{IDENT}{S}* { 594 {IDENT}@{IDENT}.{IDENT} {
600 curr_lexer->lexer_debug ("{IDENT}@{IDENT}{S}*|{IDENT}@{IDENT}.{IDENT}{S}*"); 595 curr_lexer->lexer_debug ("{IDENT}@{IDENT}|{IDENT}@{IDENT}.{IDENT}");
601 596
602 int id_tok = curr_lexer->handle_superclass_identifier (); 597 int id_tok = curr_lexer->handle_superclass_identifier ();
603 598
604 if (id_tok >= 0) 599 if (id_tok >= 0)
605 { 600 {
611 606
612 %{ 607 %{
613 // Metaclass query 608 // Metaclass query
614 %} 609 %}
615 610
616 \?{IDENT}{S}* | 611 \?{IDENT} |
617 \?{IDENT}\.{IDENT}{S}* { 612 \?{IDENT}\.{IDENT} {
618 curr_lexer->lexer_debug ("\\?{IDENT}{S}*|\\?{IDENT}\\.{IDENT}{S}*"); 613 curr_lexer->lexer_debug ("\\?{IDENT}|\\?{IDENT}\\.{IDENT}");
619 614
620 int id_tok = curr_lexer->handle_meta_identifier (); 615 int id_tok = curr_lexer->handle_meta_identifier ();
621 616
622 if (id_tok >= 0) 617 if (id_tok >= 0)
623 { 618 {
676 %} 671 %}
677 672
678 "'" { 673 "'" {
679 curr_lexer->lexer_debug ("'"); 674 curr_lexer->lexer_debug ("'");
680 675
681 curr_lexer->current_input_column++;
682
683 int tok = curr_lexer->previous_token_value (); 676 int tok = curr_lexer->previous_token_value ();
684 677
685 bool transpose = false; 678 bool transpose = false;
686 679
687 if (curr_lexer->whitespace_is_significant ()) 680 if (curr_lexer->whitespace_is_significant ())
689 if (curr_lexer->space_follows_previous_token ()) 682 if (curr_lexer->space_follows_previous_token ())
690 { 683 {
691 if (tok == '[' || tok == '{' 684 if (tok == '[' || tok == '{'
692 || curr_lexer->previous_token_is_binop ()) 685 || curr_lexer->previous_token_is_binop ())
693 { 686 {
687 curr_lexer->current_input_column++;
694 int retval = curr_lexer->handle_string ('\''); 688 int retval = curr_lexer->handle_string ('\'');
695 return curr_lexer->count_token_internal (retval); 689 return curr_lexer->count_token_internal (retval);
696 } 690 }
697 else 691 else
698 { 692 {
703 else 697 else
704 { 698 {
705 if (tok == ',' || tok == ';' 699 if (tok == ',' || tok == ';'
706 || curr_lexer->previous_token_is_binop ()) 700 || curr_lexer->previous_token_is_binop ())
707 { 701 {
702 curr_lexer->current_input_column++;
708 int retval = curr_lexer->handle_string ('\''); 703 int retval = curr_lexer->handle_string ('\'');
709 return curr_lexer->count_token_internal (retval); 704 return curr_lexer->count_token_internal (retval);
710 } 705 }
711 else 706 else
712 return curr_lexer->count_token (QUOTE); 707 return curr_lexer->count_token (QUOTE);
717 if (tok == NAME || tok == NUM || tok == IMAG_NUM 712 if (tok == NAME || tok == NUM || tok == IMAG_NUM
718 || tok == ')' || tok == ']' || tok == '}') 713 || tok == ')' || tok == ']' || tok == '}')
719 return curr_lexer->count_token (QUOTE); 714 return curr_lexer->count_token (QUOTE);
720 else 715 else
721 { 716 {
717 curr_lexer->current_input_column++;
722 int retval = curr_lexer->handle_string ('\''); 718 int retval = curr_lexer->handle_string ('\'');
723 return curr_lexer->count_token_internal (retval); 719 return curr_lexer->count_token_internal (retval);
724 } 720 }
725 } 721 }
726 } 722 }
730 %} 726 %}
731 727
732 \" { 728 \" {
733 curr_lexer->lexer_debug ("\""); 729 curr_lexer->lexer_debug ("\"");
734 730
735 curr_lexer->current_input_column++; 731 int tok = curr_lexer->previous_token_value ();
736 int tok = curr_lexer->handle_string ('"'); 732
737 733 bool transpose = false;
738 return curr_lexer->count_token_internal (tok); 734
739 } 735 if (curr_lexer->whitespace_is_significant ())
736 {
737 if (curr_lexer->space_follows_previous_token ())
738 {
739 if (tok == '[' || tok == '{'
740 || curr_lexer->previous_token_is_binop ())
741 {
742 curr_lexer->current_input_column++;
743 int retval = curr_lexer->handle_string ('"');
744 return curr_lexer->count_token_internal (retval);
745 }
746 else
747 {
748 yyless (0);
749 curr_lexer->xunput (',');
750 }
751 }
752 else
753 {
754 curr_lexer->current_input_column++;
755 int retval = curr_lexer->handle_string ('"');
756 return curr_lexer->count_token_internal (retval);
757 }
758 }
759 else
760 {
761 curr_lexer->current_input_column++;
762 int retval = curr_lexer->handle_string ('"');
763 return curr_lexer->count_token_internal (retval);
764 }
765 }
740 766
741 %{ 767 %{
742 // Other operators. 768 // Other operators.
743 %} 769 %}
744 770
2779 octave_lexer::handle_identifier (void) 2805 octave_lexer::handle_identifier (void)
2780 { 2806 {
2781 char *yytxt = flex_yytext (); 2807 char *yytxt = flex_yytext ();
2782 2808
2783 std::string tok = yytxt; 2809 std::string tok = yytxt;
2784
2785 int c = yytxt[flex_yyleng()-1];
2786
2787 bool spc_gobbled = false;
2788 2810
2789 // If we are expecting a structure element, avoid recognizing 2811 // If we are expecting a structure element, avoid recognizing
2790 // keywords and other special names and return STRUCT_ELT, which is 2812 // keywords and other special names and return STRUCT_ELT, which is
2791 // a string that is also a valid identifier. But first, we have to 2813 // a string that is also a valid identifier. But first, we have to
2792 // decide whether to insert a comma. 2814 // decide whether to insert a comma.