comparison libinterp/parse-tree/lex.ll @ 16267:15f55df088e7

6/10 commits reworking the lexer
author John W. Eaton <jwe@octave.org>
date Mon, 11 Mar 2013 14:30:57 -0400
parents 71ee3afedb69
children dbbef00202ff 488b0fef52c5
comparison
equal deleted inserted replaced
16265:71ee3afedb69 16267:15f55df088e7
240 } 240 }
241 241
242 <MATRIX_START>{NL} { 242 <MATRIX_START>{NL} {
243 curr_lexer->lexer_debug ("<MATRIX_START>{NL}"); 243 curr_lexer->lexer_debug ("<MATRIX_START>{NL}");
244 244
245 int tok = curr_lexer->previous_token_value (); 245 if (curr_lexer->nesting_level.is_paren ())
246 246 curr_lexer->gripe_matlab_incompatible ("bare newline inside parentheses");
247 if (! (tok == ';' || tok == '[' || tok == '{')) 247 else
248 curr_lexer->xunput (';'); 248 {
249 int tok = curr_lexer->previous_token_value ();
250
251 if (! (tok == ';' || tok == '[' || tok == '{'))
252 curr_lexer->xunput (';');
253 }
249 } 254 }
250 255
251 <KLUGE>@ { 256 <KLUGE>@ {
252 curr_lexer->lexer_debug ("<KLUGE>@"); 257 curr_lexer->lexer_debug ("<KLUGE>@");
253 curr_lexer->pop_start_state (); 258 curr_lexer->pop_start_state ();
306 if (curr_lexer->whitespace_is_significant () 311 if (curr_lexer->whitespace_is_significant ()
307 && curr_lexer->space_follows_previous_token ()) 312 && curr_lexer->space_follows_previous_token ())
308 { 313 {
309 int tok = curr_lexer->previous_token_value (); 314 int tok = curr_lexer->previous_token_value ();
310 315
311 if (! (tok == ';' || tok == ',' || tok == '[' || tok == '{' 316 if (! (tok == '[' || tok == '{'
312 || curr_lexer->previous_token_is_binop ())) 317 || curr_lexer->previous_token_is_binop ()))
313 unput_comma = true; 318 unput_comma = true;
314 } 319 }
315 320
316 if (unput_comma) 321 if (unput_comma)
721 curr_lexer->xunput (','); 726 curr_lexer->xunput (',');
722 } 727 }
723 } 728 }
724 else 729 else
725 { 730 {
726 if (tok == ',' || tok == ';' || tok == '[' || tok == '{' 731 if (tok == '[' || tok == '{'
727 || curr_lexer->previous_token_is_binop ()) 732 || curr_lexer->previous_token_is_binop ()
733 || curr_lexer->previous_token_is_keyword ())
728 { 734 {
729 curr_lexer->current_input_column++; 735 curr_lexer->current_input_column++;
730 int retval = curr_lexer->handle_string ('\''); 736 int retval = curr_lexer->handle_string ('\'');
731 return curr_lexer->count_token_internal (retval); 737 return curr_lexer->count_token_internal (retval);
732 } 738 }
734 return curr_lexer->count_token (QUOTE); 740 return curr_lexer->count_token (QUOTE);
735 } 741 }
736 } 742 }
737 else 743 else
738 { 744 {
739 if (tok == NAME || tok == NUM || tok == IMAG_NUM 745 if (! tok || tok == '[' || tok == '{' || tok == '('
740 || tok == ')' || tok == ']' || tok == '}') 746 || curr_lexer->previous_token_is_binop ()
741 return curr_lexer->count_token (QUOTE); 747 || curr_lexer->previous_token_is_keyword ())
742 else
743 { 748 {
744 curr_lexer->current_input_column++; 749 curr_lexer->current_input_column++;
745 int retval = curr_lexer->handle_string ('\''); 750 int retval = curr_lexer->handle_string ('\'');
746 return curr_lexer->count_token_internal (retval); 751 return curr_lexer->count_token_internal (retval);
747 } 752 }
753 else
754 return curr_lexer->count_token (QUOTE);
748 } 755 }
749 } 756 }
750 757
751 %{ 758 %{
752 // Double quotes always begin strings. 759 // Double quotes always begin strings.
761 768
762 if (curr_lexer->whitespace_is_significant ()) 769 if (curr_lexer->whitespace_is_significant ())
763 { 770 {
764 if (curr_lexer->space_follows_previous_token ()) 771 if (curr_lexer->space_follows_previous_token ())
765 { 772 {
766 if (tok == ',' || tok == ';' || tok == '[' || tok == '{' 773 if (tok == '[' || tok == '{'
767 || curr_lexer->previous_token_is_binop ()) 774 || curr_lexer->previous_token_is_binop ())
768 { 775 {
769 curr_lexer->current_input_column++; 776 curr_lexer->current_input_column++;
770 int retval = curr_lexer->handle_string ('"'); 777 int retval = curr_lexer->handle_string ('"');
771 return curr_lexer->count_token_internal (retval); 778 return curr_lexer->count_token_internal (retval);
811 "&" { return curr_lexer->handle_op ("&", EXPR_AND); } 818 "&" { return curr_lexer->handle_op ("&", EXPR_AND); }
812 "|" { return curr_lexer->handle_op ("|", EXPR_OR); } 819 "|" { return curr_lexer->handle_op ("|", EXPR_OR); }
813 "<" { return curr_lexer->handle_op ("<", EXPR_LT); } 820 "<" { return curr_lexer->handle_op ("<", EXPR_LT); }
814 ">" { return curr_lexer->handle_op (">", EXPR_GT); } 821 ">" { return curr_lexer->handle_op (">", EXPR_GT); }
815 "*" { return curr_lexer->handle_op ("*", '*'); } 822 "*" { return curr_lexer->handle_op ("*", '*'); }
816 "/" { return curr_lexer->handle_op ("/", '/'); } 823
824 "/" {
825 int prev_tok = curr_lexer->previous_token_value ();
826 bool space_before = curr_lexer->space_follows_previous_token ();
827 int c = curr_lexer->text_yyinput ();
828 curr_lexer->xunput (c);
829 bool space_after = (c == ' ' || c == '\t');
830
831 if (space_before && ! space_after
832 && curr_lexer->previous_token_may_be_command ())
833 {
834 yyless (0);
835 curr_lexer->push_start_state (COMMAND_START);
836 }
837 else
838 return curr_lexer->handle_op ("/", '/');
839 }
840
817 "\\" { return curr_lexer->handle_op ("\\", LEFTDIV); } 841 "\\" { return curr_lexer->handle_op ("\\", LEFTDIV); }
818 "^" { return curr_lexer->handle_op ("^", POW); } 842 "^" { return curr_lexer->handle_op ("^", POW); }
819 "**" { return curr_lexer->handle_incompatible_op ("**", POW); } 843 "**" { return curr_lexer->handle_incompatible_op ("**", POW); }
820 "&&" { return curr_lexer->handle_op ("&&", EXPR_AND_AND); } 844 "&&" { return curr_lexer->handle_op ("&&", EXPR_AND_AND); }
821 "||" { return curr_lexer->handle_op ("||", EXPR_OR_OR); } 845 "||" { return curr_lexer->handle_op ("||", EXPR_OR_OR); }
940 if (curr_lexer->whitespace_is_significant () 964 if (curr_lexer->whitespace_is_significant ()
941 && curr_lexer->space_follows_previous_token ()) 965 && curr_lexer->space_follows_previous_token ())
942 { 966 {
943 int tok = curr_lexer->previous_token_value (); 967 int tok = curr_lexer->previous_token_value ();
944 968
945 if (! (tok == ';' || tok == ',' || tok == '[' || tok == '{' 969 if (! (tok == '[' || tok == '{'
946 || curr_lexer->previous_token_is_binop ())) 970 || curr_lexer->previous_token_is_binop ()))
947 unput_comma = true; 971 unput_comma = true;
948 } 972 }
949 973
950 if (unput_comma) 974 if (unput_comma)
1242 if (curr_lexer->whitespace_is_significant () 1266 if (curr_lexer->whitespace_is_significant ()
1243 && curr_lexer->space_follows_previous_token ()) 1267 && curr_lexer->space_follows_previous_token ())
1244 { 1268 {
1245 int tok = curr_lexer->previous_token_value (); 1269 int tok = curr_lexer->previous_token_value ();
1246 1270
1247 if (! (tok == ';' || tok == ',' || tok == '[' || tok == '{' 1271 if (! (tok == '[' || tok == '{'
1248 || curr_lexer->previous_token_is_binop ())) 1272 || curr_lexer->previous_token_is_binop ()))
1249 unput_comma = true; 1273 unput_comma = true;
1250 } 1274 }
1251 1275
1252 if (unput_comma) 1276 if (unput_comma)
1748 || tok == OR_EQ || tok == POW || tok == POW_EQ 1772 || tok == OR_EQ || tok == POW || tok == POW_EQ
1749 || tok == RSHIFT || tok == RSHIFT_EQ || tok == SUB_EQ); 1773 || tok == RSHIFT || tok == RSHIFT_EQ || tok == SUB_EQ);
1750 } 1774 }
1751 1775
1752 bool 1776 bool
1777 lexical_feedback::previous_token_is_keyword (void) const
1778 {
1779 const token *tok = tokens.front ();
1780 return tok ? tok->is_keyword () : false;
1781 }
1782
1783 bool
1753 lexical_feedback::previous_token_may_be_command (void) const 1784 lexical_feedback::previous_token_may_be_command (void) const
1754 { 1785 {
1755 const token *tok = tokens.front (); 1786 const token *tok = tokens.front ();
1756 return tok ? tok->may_be_command () : false; 1787 return tok ? tok->may_be_command () : false;
1757 } 1788 }
2123 tok_val = new token (endclassdef_kw, token::classdef_end, l, c); 2154 tok_val = new token (endclassdef_kw, token::classdef_end, l, c);
2124 at_beginning_of_statement = true; 2155 at_beginning_of_statement = true;
2125 break; 2156 break;
2126 2157
2127 case endenumeration_kw: 2158 case endenumeration_kw:
2128 tok_val = new token (endenumeration_kw, token::enumeration_end, 2159 tok_val = new token (endenumeration_kw, token::enumeration_end, l, c);
2129 l, c);
2130 at_beginning_of_statement = true; 2160 at_beginning_of_statement = true;
2131 break; 2161 break;
2132 2162
2133 case endevents_kw: 2163 case endevents_kw:
2134 tok_val = new token (endevents_kw, token::events_end, l, c); 2164 tok_val = new token (endevents_kw, token::events_end, l, c);
2219 case magic_file_kw: 2249 case magic_file_kw:
2220 { 2250 {
2221 if ((reading_fcn_file || reading_script_file 2251 if ((reading_fcn_file || reading_script_file
2222 || reading_classdef_file) 2252 || reading_classdef_file)
2223 && ! fcn_file_full_name.empty ()) 2253 && ! fcn_file_full_name.empty ())
2224 tok_val = new token (magic_file_kw, fcn_file_full_name, l, c); 2254 tok_val = new token (magic_file_kw, true,
2255 fcn_file_full_name, l, c);
2225 else 2256 else
2226 tok_val = new token (magic_file_kw, "stdin", l, c); 2257 tok_val = new token (magic_file_kw, "stdin", l, c);
2227 } 2258 }
2228 break; 2259 break;
2229 2260
2235 default: 2266 default:
2236 panic_impossible (); 2267 panic_impossible ();
2237 } 2268 }
2238 2269
2239 if (! tok_val) 2270 if (! tok_val)
2240 tok_val = new token (kw->tok, l, c); 2271 tok_val = new token (kw->tok, true, l, c);
2241 2272
2242 push_token (tok_val); 2273 push_token (tok_val);
2243 2274
2244 return kw->tok; 2275 return kw->tok;
2245 } 2276 }
3441 int c = text_yyinput (); 3472 int c = text_yyinput ();
3442 xunput (c); 3473 xunput (c);
3443 3474
3444 bool space_after = (c == ' ' || c == '\t'); 3475 bool space_after = (c == ' ' || c == '\t');
3445 3476
3446 if (! (prev_tok == ';' || prev_tok == ',' 3477 if (! (prev_tok == '[' || prev_tok == '{'
3447 || prev_tok == '[' || prev_tok == '{'
3448 || previous_token_is_binop () 3478 || previous_token_is_binop ()
3449 || ((tok == '+' || tok == '-') && space_after))) 3479 || ((tok == '+' || tok == '-') && space_after)))
3450 unput_comma = true; 3480 unput_comma = true;
3451 } 3481 }
3452 3482