comparison libinterp/parse-tree/lex.ll @ 16259:0b5ab09dfce4

2/10 commits reworking the lexer
author John W. Eaton <jwe@octave.org>
date Mon, 11 Mar 2013 14:18:39 -0400
parents db7f07b22b9b
children 6c211b8cfbd9 b45a90cdb0ae
comparison
equal deleted inserted replaced
16257:db7f07b22b9b 16259:0b5ab09dfce4
192 curr_lexer->lexer_debug ("<COMMAND_START>{NL}"); 192 curr_lexer->lexer_debug ("<COMMAND_START>{NL}");
193 193
194 curr_lexer->input_line_number++; 194 curr_lexer->input_line_number++;
195 curr_lexer->current_input_column = 1; 195 curr_lexer->current_input_column = 1;
196 196
197 curr_lexer->quote_is_transpose = false;
198 curr_lexer->convert_spaces_to_comma = true;
199 curr_lexer->looking_for_object_index = false; 197 curr_lexer->looking_for_object_index = false;
200 curr_lexer->at_beginning_of_statement = true; 198 curr_lexer->at_beginning_of_statement = true;
201 199
202 curr_lexer->pop_start_state (); 200 curr_lexer->pop_start_state ();
203 201
276 %} 274 %}
277 275
278 <MATRIX_START>\] { 276 <MATRIX_START>\] {
279 curr_lexer->lexer_debug ("<MATRIX_START>\\]"); 277 curr_lexer->lexer_debug ("<MATRIX_START>\\]");
280 278
281 curr_lexer->scan_for_comments (yytext);
282 curr_lexer->fixup_column_count (yytext);
283
284 curr_lexer->looking_at_object_index.pop_front (); 279 curr_lexer->looking_at_object_index.pop_front ();
285 280
286 curr_lexer->looking_for_object_index = true; 281 curr_lexer->looking_for_object_index = true;
287 curr_lexer->at_beginning_of_statement = false; 282 curr_lexer->at_beginning_of_statement = false;
288 283
289 int c = yytext[yyleng-1]; 284 int tok_to_return = curr_lexer->handle_close_bracket (']');
290 bool cont_is_spc = (curr_lexer->eat_continuation () != octave_lexer::NO_WHITESPACE);
291 bool spc_gobbled = (cont_is_spc || c == ' ' || c == '\t');
292 int tok_to_return = curr_lexer->handle_close_bracket (spc_gobbled, ']');
293 285
294 return curr_lexer->count_token (']'); 286 return curr_lexer->count_token (']');
295 } 287 }
296 288
297 %{ 289 %{
299 %} 291 %}
300 292
301 <MATRIX_START>\} { 293 <MATRIX_START>\} {
302 curr_lexer->lexer_debug ("<MATRIX_START>\\}*"); 294 curr_lexer->lexer_debug ("<MATRIX_START>\\}*");
303 295
304 curr_lexer->scan_for_comments (yytext);
305 curr_lexer->fixup_column_count (yytext);
306
307 curr_lexer->looking_at_object_index.pop_front (); 296 curr_lexer->looking_at_object_index.pop_front ();
308 297
309 curr_lexer->looking_for_object_index = true; 298 curr_lexer->looking_for_object_index = true;
310 curr_lexer->at_beginning_of_statement = false; 299 curr_lexer->at_beginning_of_statement = false;
311 300
312 int c = yytext[yyleng-1]; 301 int tok_to_return = curr_lexer->handle_close_bracket ('}');
313 bool cont_is_spc = (curr_lexer->eat_continuation () != octave_lexer::NO_WHITESPACE);
314 bool spc_gobbled = (cont_is_spc || c == ' ' || c == '\t');
315 int tok_to_return = curr_lexer->handle_close_bracket (spc_gobbled, '}');
316 302
317 return curr_lexer->count_token ('}'); 303 return curr_lexer->count_token ('}');
318 } 304 }
319 305
320 \[ { 306 \[ {
323 curr_lexer->nesting_level.bracket (); 309 curr_lexer->nesting_level.bracket ();
324 310
325 curr_lexer->looking_at_object_index.push_front (false); 311 curr_lexer->looking_at_object_index.push_front (false);
326 312
327 curr_lexer->current_input_column += yyleng; 313 curr_lexer->current_input_column += yyleng;
328 curr_lexer->quote_is_transpose = false;
329 curr_lexer->convert_spaces_to_comma = true;
330 curr_lexer->looking_for_object_index = false; 314 curr_lexer->looking_for_object_index = false;
331 curr_lexer->at_beginning_of_statement = false; 315 curr_lexer->at_beginning_of_statement = false;
332 316
333 if (curr_lexer->defining_func 317 if (curr_lexer->defining_func
334 && ! curr_lexer->parsed_function_name.top ()) 318 && ! curr_lexer->parsed_function_name.top ())
572 <<EOF>> { 556 <<EOF>> {
573 return curr_lexer->handle_end_of_input (); 557 return curr_lexer->handle_end_of_input ();
574 } 558 }
575 559
576 %{ 560 %{
577 // Identifiers. Truncate the token at the first space or tab but 561 // Identifiers.
578 // don't write directly on yytext.
579 %} 562 %}
580 563
581 {IDENT} { 564 {IDENT} {
582 curr_lexer->lexer_debug ("{IDENT}"); 565 curr_lexer->lexer_debug ("{IDENT}");
583 566
567 int tok = curr_lexer->previous_token_value ();
568
584 if (curr_lexer->whitespace_is_significant () 569 if (curr_lexer->whitespace_is_significant ()
585 && curr_lexer->space_follows_previous_token () 570 && curr_lexer->space_follows_previous_token ()
586 && ! curr_lexer->previous_token_is_binop ()) 571 && ! (tok == '[' || tok == '{'
572 || curr_lexer->previous_token_is_binop ()))
587 { 573 {
588 yyless (0); 574 yyless (0);
589 unput (','); 575 unput (',');
590 } 576 }
591 else 577 else
648 "@" { 634 "@" {
649 curr_lexer->lexer_debug ("@"); 635 curr_lexer->lexer_debug ("@");
650 636
651 curr_lexer->current_input_column++; 637 curr_lexer->current_input_column++;
652 638
653 curr_lexer->quote_is_transpose = false;
654 curr_lexer->convert_spaces_to_comma = false;
655 curr_lexer->looking_at_function_handle++; 639 curr_lexer->looking_at_function_handle++;
656 curr_lexer->looking_for_object_index = false; 640 curr_lexer->looking_for_object_index = false;
657 curr_lexer->at_beginning_of_statement = false; 641 curr_lexer->at_beginning_of_statement = false;
658 642
659 return curr_lexer->count_token ('@'); 643 return curr_lexer->count_token ('@');
669 {NL} { 653 {NL} {
670 curr_lexer->lexer_debug ("{NL}"); 654 curr_lexer->lexer_debug ("{NL}");
671 655
672 curr_lexer->input_line_number++; 656 curr_lexer->input_line_number++;
673 curr_lexer->current_input_column = 1; 657 curr_lexer->current_input_column = 1;
674
675 curr_lexer->quote_is_transpose = false;
676 curr_lexer->convert_spaces_to_comma = true;
677 658
678 if (curr_lexer->nesting_level.none ()) 659 if (curr_lexer->nesting_level.none ())
679 { 660 {
680 curr_lexer->at_beginning_of_statement = true; 661 curr_lexer->at_beginning_of_statement = true;
681 return curr_lexer->count_token ('\n'); 662 return curr_lexer->count_token ('\n');
797 return curr_lexer->handle_op 778 return curr_lexer->handle_op
798 (",", ',', true, ! curr_lexer->looking_at_object_index.front ()); 779 (",", ',', true, ! curr_lexer->looking_at_object_index.front ());
799 } 780 }
800 781
801 ".'" { 782 ".'" {
802 curr_lexer->do_comma_insert_check ();
803 return curr_lexer->handle_op (".'", TRANSPOSE, true, false); 783 return curr_lexer->handle_op (".'", TRANSPOSE, true, false);
804 } 784 }
805 785
806 "++" { 786 "++" {
807 curr_lexer->do_comma_insert_check ();
808 return curr_lexer->handle_incompatible_op 787 return curr_lexer->handle_incompatible_op
809 ("++", PLUS_PLUS, true, false, true); 788 ("++", PLUS_PLUS, true, false, true);
810 } 789 }
811 790
812 "--" { 791 "--" {
813 ; 792 ;
814 curr_lexer->do_comma_insert_check ();
815 return curr_lexer->handle_incompatible_op 793 return curr_lexer->handle_incompatible_op
816 ("--", MINUS_MINUS, true, false, true); 794 ("--", MINUS_MINUS, true, false, true);
817 } 795 }
818 796
819 "(" { 797 "(" {
843 curr_lexer->nesting_level.remove (); 821 curr_lexer->nesting_level.remove ();
844 curr_lexer->current_input_column++; 822 curr_lexer->current_input_column++;
845 823
846 curr_lexer->looking_at_object_index.pop_front (); 824 curr_lexer->looking_at_object_index.pop_front ();
847 825
848 curr_lexer->quote_is_transpose = true;
849 curr_lexer->convert_spaces_to_comma
850 = (curr_lexer->nesting_level.is_bracket_or_brace ()
851 && ! curr_lexer->looking_at_anon_fcn_args);
852 curr_lexer->looking_for_object_index = true; 826 curr_lexer->looking_for_object_index = true;
853 curr_lexer->at_beginning_of_statement = false; 827 curr_lexer->at_beginning_of_statement = false;
854 828
855 if (curr_lexer->looking_at_anon_fcn_args) 829 if (curr_lexer->looking_at_anon_fcn_args)
856 curr_lexer->looking_at_anon_fcn_args = false; 830 curr_lexer->looking_at_anon_fcn_args = false;
857
858 curr_lexer->do_comma_insert_check ();
859 831
860 return curr_lexer->count_token (')'); 832 return curr_lexer->count_token (')');
861 } 833 }
862 834
863 "." { 835 "." {
1108 1080
1109 curr_lexer->looking_at_object_index.push_front 1081 curr_lexer->looking_at_object_index.push_front
1110 (curr_lexer->looking_for_object_index); 1082 (curr_lexer->looking_for_object_index);
1111 1083
1112 curr_lexer->current_input_column += yyleng; 1084 curr_lexer->current_input_column += yyleng;
1113 curr_lexer->quote_is_transpose = false;
1114 curr_lexer->convert_spaces_to_comma = true;
1115 curr_lexer->looking_for_object_index = false; 1085 curr_lexer->looking_for_object_index = false;
1116 curr_lexer->at_beginning_of_statement = false; 1086 curr_lexer->at_beginning_of_statement = false;
1117 1087
1118 curr_lexer->decrement_promptflag (); 1088 curr_lexer->decrement_promptflag ();
1119 curr_lexer->eat_whitespace ();
1120 1089
1121 curr_lexer->braceflag++; 1090 curr_lexer->braceflag++;
1122 1091
1123 curr_lexer->push_start_state (MATRIX_START); 1092 curr_lexer->push_start_state (MATRIX_START);
1124 1093
1504 1473
1505 void 1474 void
1506 lexical_feedback::reset (void) 1475 lexical_feedback::reset (void)
1507 { 1476 {
1508 end_of_input = false; 1477 end_of_input = false;
1509 convert_spaces_to_comma = true;
1510 do_comma_insert = false;
1511 at_beginning_of_statement = true; 1478 at_beginning_of_statement = true;
1512 looking_at_anon_fcn_args = false; 1479 looking_at_anon_fcn_args = false;
1513 looking_at_return_list = false; 1480 looking_at_return_list = false;
1514 looking_at_parameter_list = false; 1481 looking_at_parameter_list = false;
1515 looking_at_decl_list = false; 1482 looking_at_decl_list = false;
1518 looking_for_object_index = false; 1485 looking_for_object_index = false;
1519 looking_at_indirect_ref = false; 1486 looking_at_indirect_ref = false;
1520 parsing_class_method = false; 1487 parsing_class_method = false;
1521 maybe_classdef_get_set_method = false; 1488 maybe_classdef_get_set_method = false;
1522 parsing_classdef = false; 1489 parsing_classdef = false;
1523 quote_is_transpose = false;
1524 force_script = false; 1490 force_script = false;
1525 reading_fcn_file = false; 1491 reading_fcn_file = false;
1526 reading_script_file = false; 1492 reading_script_file = false;
1527 reading_classdef_file = false; 1493 reading_classdef_file = false;
1528 input_line_number = 1; 1494 input_line_number = 1;
1782 octave_lexer::flex_yyleng (void) 1748 octave_lexer::flex_yyleng (void)
1783 { 1749 {
1784 return yyget_leng (scanner); 1750 return yyget_leng (scanner);
1785 } 1751 }
1786 1752
1787 // GAG.
1788 //
1789 // If we're reading a matrix and the next character is '[', make sure
1790 // that we insert a comma ahead of it.
1791
1792 void
1793 octave_lexer::do_comma_insert_check (void)
1794 {
1795 bool spc_gobbled = (eat_continuation () != octave_lexer::NO_WHITESPACE);
1796
1797 int c = text_yyinput ();
1798
1799 xunput (c);
1800
1801 if (spc_gobbled)
1802 xunput (' ');
1803
1804 do_comma_insert = (! looking_at_object_index.front ()
1805 && bracketflag && c == '[');
1806 }
1807
1808 int 1753 int
1809 octave_lexer::text_yyinput (void) 1754 octave_lexer::text_yyinput (void)
1810 { 1755 {
1811 int c = yyinput (scanner); 1756 int c = yyinput (scanner);
1812 1757
1866 octave_lexer::xunput (char c) 1811 octave_lexer::xunput (char c)
1867 { 1812 {
1868 char *yytxt = flex_yytext (); 1813 char *yytxt = flex_yytext ();
1869 1814
1870 xunput (c, yytxt); 1815 xunput (c, yytxt);
1871 }
1872
1873 // If we read some newlines, we need figure out what column we're
1874 // really looking at.
1875
1876 void
1877 octave_lexer::fixup_column_count (char *s)
1878 {
1879 char c;
1880 while ((c = *s++) != '\0')
1881 {
1882 if (c == '\n')
1883 {
1884 input_line_number++;
1885 current_input_column = 1;
1886 }
1887 else
1888 current_input_column++;
1889 }
1890 } 1816 }
1891 1817
1892 bool 1818 bool
1893 octave_lexer::inside_any_object_index (void) 1819 octave_lexer::inside_any_object_index (void)
1894 { 1820 {
2145 return (symbol_table::is_variable (name) 2071 return (symbol_table::is_variable (name)
2146 || (pending_local_variables.find (name) 2072 || (pending_local_variables.find (name)
2147 != pending_local_variables.end ())); 2073 != pending_local_variables.end ()));
2148 } 2074 }
2149 2075
2150 // Recognize separators. If the separator is a CRLF pair, it is
2151 // replaced by a single LF.
2152
2153 bool
2154 octave_lexer::next_token_is_sep_op (void)
2155 {
2156 bool retval = false;
2157
2158 int c = text_yyinput ();
2159
2160 retval = match_any (c, ",;\n]");
2161
2162 xunput (c);
2163
2164 return retval;
2165 }
2166
2167 // Try to determine if the next token should be treated as a postfix
2168 // unary operator. This is ugly, but it seems to do the right thing.
2169
2170 bool
2171 octave_lexer::next_token_is_postfix_unary_op (bool spc_prev)
2172 {
2173 bool un_op = false;
2174
2175 int c0 = text_yyinput ();
2176
2177 if (c0 == '\'' && ! spc_prev)
2178 {
2179 un_op = true;
2180 }
2181 else if (c0 == '.')
2182 {
2183 int c1 = text_yyinput ();
2184 un_op = (c1 == '\'');
2185 xunput (c1);
2186 }
2187 else if (c0 == '+')
2188 {
2189 int c1 = text_yyinput ();
2190 un_op = (c1 == '+');
2191 xunput (c1);
2192 }
2193 else if (c0 == '-')
2194 {
2195 int c1 = text_yyinput ();
2196 un_op = (c1 == '-');
2197 xunput (c1);
2198 }
2199
2200 xunput (c0);
2201
2202 return un_op;
2203 }
2204
2205 // Try to determine if the next token should be treated as a binary
2206 // operator.
2207 //
2208 // This kluge exists because whitespace is not always ignored inside
2209 // the square brackets that are used to create matrix objects (though
2210 // spacing only really matters in the cases that can be interpreted
2211 // either as binary ops or prefix unary ops: currently just +, -).
2212 //
2213 // Note that a line continuation directly following a + or - operator
2214 // (e.g., the characters '[' 'a' ' ' '+' '\' LFD 'b' ']') will be
2215 // parsed as a binary operator.
2216
2217 bool
2218 octave_lexer::next_token_is_bin_op (bool spc_prev)
2219 {
2220 bool bin_op = false;
2221
2222 int c0 = text_yyinput ();
2223
2224 switch (c0)
2225 {
2226 case '+':
2227 case '-':
2228 {
2229 int c1 = text_yyinput ();
2230
2231 switch (c1)
2232 {
2233 case '+':
2234 case '-':
2235 // Unary ops, spacing doesn't matter.
2236 break;
2237
2238 case '=':
2239 // Binary ops, spacing doesn't matter.
2240 bin_op = true;
2241 break;
2242
2243 default:
2244 // Could be either, spacing matters.
2245 bin_op = looks_like_bin_op (spc_prev, c1);
2246 break;
2247 }
2248
2249 xunput (c1);
2250 }
2251 break;
2252
2253 case ':':
2254 case '/':
2255 case '\\':
2256 case '^':
2257 // Always a binary op (may also include /=, \=, and ^=).
2258 bin_op = true;
2259 break;
2260
2261 // .+ .- ./ .\ .^ .* .**
2262 case '.':
2263 {
2264 int c1 = text_yyinput ();
2265
2266 if (match_any (c1, "+-/\\^*"))
2267 // Always a binary op (may also include .+=, .-=, ./=, ...).
2268 bin_op = true;
2269 else if (! isdigit (c1) && c1 != ' ' && c1 != '\t' && c1 != '.')
2270 // A structure element reference is a binary op.
2271 bin_op = true;
2272
2273 xunput (c1);
2274 }
2275 break;
2276
2277 // = == & && | || * **
2278 case '=':
2279 case '&':
2280 case '|':
2281 case '*':
2282 // Always a binary op (may also include ==, &&, ||, **).
2283 bin_op = true;
2284 break;
2285
2286 // < <= <> > >=
2287 case '<':
2288 case '>':
2289 // Always a binary op (may also include <=, <>, >=).
2290 bin_op = true;
2291 break;
2292
2293 // ~= !=
2294 case '~':
2295 case '!':
2296 {
2297 int c1 = text_yyinput ();
2298
2299 // ~ and ! can be unary ops, so require following =.
2300 if (c1 == '=')
2301 bin_op = true;
2302
2303 xunput (c1);
2304 }
2305 break;
2306
2307 default:
2308 break;
2309 }
2310
2311 xunput (c0);
2312
2313 return bin_op;
2314 }
2315
2316 // FIXME -- we need to handle block comments here.
2317
2318 void
2319 octave_lexer::scan_for_comments (const char *text)
2320 {
2321 std::string comment_buf;
2322
2323 bool in_comment = false;
2324 bool beginning_of_comment = false;
2325
2326 int len = strlen (text);
2327 int i = 0;
2328
2329 while (i < len)
2330 {
2331 char c = text[i++];
2332
2333 switch (c)
2334 {
2335 case '%':
2336 case '#':
2337 if (in_comment)
2338 {
2339 if (! beginning_of_comment)
2340 comment_buf += static_cast<char> (c);
2341 }
2342 else
2343 {
2344 maybe_gripe_matlab_incompatible_comment (c);
2345 in_comment = true;
2346 beginning_of_comment = true;
2347 }
2348 break;
2349
2350 case '\n':
2351 if (in_comment)
2352 {
2353 comment_buf += static_cast<char> (c);
2354 octave_comment_buffer::append (comment_buf);
2355 comment_buf.resize (0);
2356 in_comment = false;
2357 beginning_of_comment = false;
2358 }
2359 break;
2360
2361 default:
2362 if (in_comment)
2363 {
2364 comment_buf += static_cast<char> (c);
2365 beginning_of_comment = false;
2366 }
2367 break;
2368 }
2369 }
2370
2371 if (! comment_buf.empty ())
2372 octave_comment_buffer::append (comment_buf);
2373 }
2374
2375 // Discard whitespace, including comments and continuations.
2376
2377 // FIXME -- we need to handle block comments here.
2378
2379 int
2380 octave_lexer::eat_whitespace (void)
2381 {
2382 int retval = octave_lexer::NO_WHITESPACE;
2383
2384 std::string comment_buf;
2385
2386 bool in_comment = false;
2387 bool beginning_of_comment = false;
2388
2389 int c = 0;
2390
2391 while ((c = text_yyinput ()) != EOF)
2392 {
2393 current_input_column++;
2394
2395 switch (c)
2396 {
2397 case ' ':
2398 case '\t':
2399 if (in_comment)
2400 {
2401 comment_buf += static_cast<char> (c);
2402 beginning_of_comment = false;
2403 }
2404 retval |= octave_lexer::SPACE_OR_TAB;
2405 break;
2406
2407 case '\n':
2408 retval |= octave_lexer::NEWLINE;
2409 if (in_comment)
2410 {
2411 comment_buf += static_cast<char> (c);
2412 octave_comment_buffer::append (comment_buf);
2413 comment_buf.resize (0);
2414 in_comment = false;
2415 beginning_of_comment = false;
2416 }
2417 current_input_column = 0;
2418 break;
2419
2420 case '#':
2421 case '%':
2422 if (in_comment)
2423 {
2424 if (! beginning_of_comment)
2425 comment_buf += static_cast<char> (c);
2426 }
2427 else
2428 {
2429 maybe_gripe_matlab_incompatible_comment (c);
2430 in_comment = true;
2431 beginning_of_comment = true;
2432 }
2433 break;
2434
2435 case '.':
2436 if (in_comment)
2437 {
2438 comment_buf += static_cast<char> (c);
2439 beginning_of_comment = false;
2440 break;
2441 }
2442 else
2443 {
2444 if (have_ellipsis_continuation ())
2445 break;
2446 else
2447 goto done;
2448 }
2449
2450 case '\\':
2451 if (in_comment)
2452 {
2453 comment_buf += static_cast<char> (c);
2454 beginning_of_comment = false;
2455 break;
2456 }
2457 else
2458 {
2459 if (have_continuation ())
2460 break;
2461 else
2462 goto done;
2463 }
2464
2465 default:
2466 if (in_comment)
2467 {
2468 comment_buf += static_cast<char> (c);
2469 beginning_of_comment = false;
2470 break;
2471 }
2472 else
2473 goto done;
2474 }
2475 }
2476
2477 if (! comment_buf.empty ())
2478 octave_comment_buffer::append (comment_buf);
2479
2480 done:
2481 xunput (c);
2482 current_input_column--;
2483 return retval;
2484 }
2485
2486 bool 2076 bool
2487 octave_lexer::whitespace_is_significant (void) 2077 octave_lexer::whitespace_is_significant (void)
2488 { 2078 {
2489 return (nesting_level.is_bracket () 2079 return (nesting_level.is_bracket ()
2490 || (nesting_level.is_brace () 2080 || (nesting_level.is_brace ()
2529 2119
2530 // If yytext doesn't contain a valid number, we are in deep doo doo. 2120 // If yytext doesn't contain a valid number, we are in deep doo doo.
2531 2121
2532 assert (nread == 1); 2122 assert (nread == 1);
2533 2123
2534 quote_is_transpose = true;
2535 convert_spaces_to_comma = true;
2536 looking_for_object_index = false; 2124 looking_for_object_index = false;
2537 at_beginning_of_statement = false; 2125 at_beginning_of_statement = false;
2538 2126
2539 push_token (new token (NUM, value, yytxt, input_line_number, 2127 push_token (new token (NUM, value, yytxt, input_line_number,
2540 current_input_column)); 2128 current_input_column));
2541 2129
2542 current_input_column += flex_yyleng (); 2130 current_input_column += flex_yyleng ();
2543
2544 do_comma_insert_check ();
2545 } 2131 }
2546 2132
2547 void 2133 void
2548 octave_lexer::handle_continuation (void) 2134 octave_lexer::handle_continuation (void)
2549 { 2135 {
2612 2198
2613 octave_comment_buffer::append (comment_text, typ); 2199 octave_comment_buffer::append (comment_text, typ);
2614 2200
2615 comment_text = ""; 2201 comment_text = "";
2616 2202
2617 quote_is_transpose = false;
2618 convert_spaces_to_comma = true;
2619 at_beginning_of_statement = true; 2203 at_beginning_of_statement = true;
2620 2204
2621 if (! looking_at_continuation) 2205 if (! looking_at_continuation)
2622 xunput ('\n'); 2206 xunput ('\n');
2623 } 2207 }
2738 } 2322 }
2739 else 2323 else
2740 xunput (c1); 2324 xunput (c1);
2741 2325
2742 return false; 2326 return false;
2743 }
2744
2745 // See if we have a continuation line. If so, eat it and the leading
2746 // whitespace on the next line.
2747
2748 int
2749 octave_lexer::eat_continuation (void)
2750 {
2751 int retval = octave_lexer::NO_WHITESPACE;
2752
2753 int c = text_yyinput ();
2754
2755 if ((c == '.' && have_ellipsis_continuation ())
2756 || (c == '\\' && have_continuation ()))
2757 retval = eat_whitespace ();
2758 else
2759 xunput (c);
2760
2761 return retval;
2762 } 2327 }
2763 2328
2764 int 2329 int
2765 octave_lexer::handle_string (char delim) 2330 octave_lexer::handle_string (char delim)
2766 { 2331 {
2824 if (delim == '\'') 2389 if (delim == '\'')
2825 s = buf.str (); 2390 s = buf.str ();
2826 else 2391 else
2827 s = do_string_escapes (buf.str ()); 2392 s = do_string_escapes (buf.str ());
2828 2393
2829 quote_is_transpose = true;
2830 convert_spaces_to_comma = true;
2831
2832 if (delim == '"') 2394 if (delim == '"')
2833 gripe_matlab_incompatible ("\" used as string delimiter"); 2395 gripe_matlab_incompatible ("\" used as string delimiter");
2834 else if (delim == '\'') 2396 else if (delim == '\'')
2835 gripe_single_quote_string (); 2397 gripe_single_quote_string ();
2836 2398
2854 } 2416 }
2855 2417
2856 return LEXICAL_ERROR; 2418 return LEXICAL_ERROR;
2857 } 2419 }
2858 2420
2859 bool
2860 octave_lexer::next_token_is_assign_op (void)
2861 {
2862 bool retval = false;
2863
2864 int c0 = text_yyinput ();
2865
2866 switch (c0)
2867 {
2868 case '=':
2869 {
2870 int c1 = text_yyinput ();
2871 xunput (c1);
2872 if (c1 != '=')
2873 retval = true;
2874 }
2875 break;
2876
2877 case '+':
2878 case '-':
2879 case '*':
2880 case '/':
2881 case '\\':
2882 case '&':
2883 case '|':
2884 {
2885 int c1 = text_yyinput ();
2886 xunput (c1);
2887 if (c1 == '=')
2888 retval = true;
2889 }
2890 break;
2891
2892 case '.':
2893 {
2894 int c1 = text_yyinput ();
2895 if (match_any (c1, "+-*/\\"))
2896 {
2897 int c2 = text_yyinput ();
2898 xunput (c2);
2899 if (c2 == '=')
2900 retval = true;
2901 }
2902 xunput (c1);
2903 }
2904 break;
2905
2906 case '>':
2907 {
2908 int c1 = text_yyinput ();
2909 if (c1 == '>')
2910 {
2911 int c2 = text_yyinput ();
2912 xunput (c2);
2913 if (c2 == '=')
2914 retval = true;
2915 }
2916 xunput (c1);
2917 }
2918 break;
2919
2920 case '<':
2921 {
2922 int c1 = text_yyinput ();
2923 if (c1 == '<')
2924 {
2925 int c2 = text_yyinput ();
2926 xunput (c2);
2927 if (c2 == '=')
2928 retval = true;
2929 }
2930 xunput (c1);
2931 }
2932 break;
2933
2934 default:
2935 break;
2936 }
2937
2938 xunput (c0);
2939
2940 return retval;
2941 }
2942
2943 bool
2944 octave_lexer::next_token_is_index_op (void)
2945 {
2946 int c = text_yyinput ();
2947 xunput (c);
2948 return c == '(' || c == '{';
2949 }
2950
2951 int 2421 int
2952 octave_lexer::handle_close_bracket (bool spc_gobbled, int bracket_type) 2422 octave_lexer::handle_close_bracket (int bracket_type)
2953 { 2423 {
2954 int retval = bracket_type; 2424 int retval = bracket_type;
2955 2425
2956 if (! nesting_level.none ()) 2426 if (! nesting_level.none ())
2957 { 2427 {
2965 panic_impossible (); 2435 panic_impossible ();
2966 } 2436 }
2967 2437
2968 pop_start_state (); 2438 pop_start_state ();
2969 2439
2970 quote_is_transpose = true;
2971 convert_spaces_to_comma = true;
2972
2973 return retval; 2440 return retval;
2974 }
2975
2976 void
2977 octave_lexer::maybe_unput_comma (int spc_gobbled)
2978 {
2979 if (nesting_level.is_bracket ()
2980 || (nesting_level.is_brace ()
2981 && ! looking_at_object_index.front ()))
2982 {
2983 int bin_op = next_token_is_bin_op (spc_gobbled);
2984
2985 int postfix_un_op = next_token_is_postfix_unary_op (spc_gobbled);
2986
2987 int c1 = text_yyinput ();
2988 int c2 = text_yyinput ();
2989
2990 xunput (c2);
2991 xunput (c1);
2992
2993 int sep_op = next_token_is_sep_op ();
2994
2995 int dot_op = (c1 == '.'
2996 && (isalpha (c2) || isspace (c2) || c2 == '_'));
2997
2998 if (postfix_un_op || bin_op || sep_op || dot_op)
2999 return;
3000
3001 int index_op = (c1 == '(' || c1 == '{');
3002
3003 // If there is no space before the indexing op, we don't insert
3004 // a comma.
3005
3006 if (index_op && ! spc_gobbled)
3007 return;
3008
3009 maybe_warn_separator_insert (',');
3010
3011 xunput (',');
3012 }
3013 } 2441 }
3014 2442
3015 bool 2443 bool
3016 octave_lexer::next_token_can_follow_bin_op (void) 2444 octave_lexer::next_token_can_follow_bin_op (void)
3017 { 2445 {
3277 } 2705 }
3278 2706
3279 int 2707 int
3280 octave_lexer::handle_superclass_identifier (void) 2708 octave_lexer::handle_superclass_identifier (void)
3281 { 2709 {
3282 eat_continuation ();
3283
3284 std::string pkg; 2710 std::string pkg;
3285 char *yytxt = flex_yytext (); 2711 char *yytxt = flex_yytext ();
3286 std::string meth = strip_trailing_whitespace (yytxt); 2712 std::string meth = strip_trailing_whitespace (yytxt);
3287 size_t pos = meth.find ("@"); 2713 size_t pos = meth.find ("@");
3288 std::string cls = meth.substr (pos).substr (1); 2714 std::string cls = meth.substr (pos).substr (1);
3307 meth.empty () ? 0 : &(symbol_table::insert (meth)), 2733 meth.empty () ? 0 : &(symbol_table::insert (meth)),
3308 cls.empty () ? 0 : &(symbol_table::insert (cls)), 2734 cls.empty () ? 0 : &(symbol_table::insert (cls)),
3309 pkg.empty () ? 0 : &(symbol_table::insert (pkg)), 2735 pkg.empty () ? 0 : &(symbol_table::insert (pkg)),
3310 input_line_number, current_input_column)); 2736 input_line_number, current_input_column));
3311 2737
3312 convert_spaces_to_comma = true;
3313 current_input_column += flex_yyleng (); 2738 current_input_column += flex_yyleng ();
3314 2739
3315 return SUPERCLASSREF; 2740 return SUPERCLASSREF;
3316 } 2741 }
3317 2742
3318 int 2743 int
3319 octave_lexer::handle_meta_identifier (void) 2744 octave_lexer::handle_meta_identifier (void)
3320 { 2745 {
3321 eat_continuation ();
3322
3323 std::string pkg; 2746 std::string pkg;
3324 char *yytxt = flex_yytext (); 2747 char *yytxt = flex_yytext ();
3325 std::string cls = strip_trailing_whitespace (yytxt).substr (1); 2748 std::string cls = strip_trailing_whitespace (yytxt).substr (1);
3326 size_t pos = cls.find ("."); 2749 size_t pos = cls.find (".");
3327 2750
3341 push_token (new token (METAQUERY, 2764 push_token (new token (METAQUERY,
3342 cls.empty () ? 0 : &(symbol_table::insert (cls)), 2765 cls.empty () ? 0 : &(symbol_table::insert (cls)),
3343 pkg.empty () ? 0 : &(symbol_table::insert (pkg)), 2766 pkg.empty () ? 0 : &(symbol_table::insert (pkg)),
3344 input_line_number, current_input_column)); 2767 input_line_number, current_input_column));
3345 2768
3346 convert_spaces_to_comma = true;
3347 current_input_column += flex_yyleng (); 2769 current_input_column += flex_yyleng ();
3348 2770
3349 return METAQUERY; 2771 return METAQUERY;
3350 } 2772 }
3351 2773
3369 // a string that is also a valid identifier. But first, we have to 2791 // a string that is also a valid identifier. But first, we have to
3370 // decide whether to insert a comma. 2792 // decide whether to insert a comma.
3371 2793
3372 if (looking_at_indirect_ref) 2794 if (looking_at_indirect_ref)
3373 { 2795 {
3374 // do_comma_insert_check ();
3375
3376 // maybe_unput_comma (spc_gobbled);
3377
3378 push_token (new token (STRUCT_ELT, tok, input_line_number, 2796 push_token (new token (STRUCT_ELT, tok, input_line_number,
3379 current_input_column)); 2797 current_input_column));
3380 2798
3381 quote_is_transpose = true;
3382 convert_spaces_to_comma = true;
3383 looking_for_object_index = true; 2799 looking_for_object_index = true;
3384 2800
3385 current_input_column += flex_yyleng (); 2801 current_input_column += flex_yyleng ();
3386 2802
3387 at_beginning_of_statement = false; 2803 at_beginning_of_statement = false;
3413 { 2829 {
3414 push_token (new token (FCN_HANDLE, tok, input_line_number, 2830 push_token (new token (FCN_HANDLE, tok, input_line_number,
3415 current_input_column)); 2831 current_input_column));
3416 2832
3417 current_input_column += flex_yyleng (); 2833 current_input_column += flex_yyleng ();
3418 quote_is_transpose = false;
3419 convert_spaces_to_comma = true;
3420 looking_for_object_index = true; 2834 looking_for_object_index = true;
3421 2835
3422 at_beginning_of_statement = false; 2836 at_beginning_of_statement = false;
3423 2837
3424 return FCN_HANDLE; 2838 return FCN_HANDLE;
3431 if (kw_token) 2845 if (kw_token)
3432 { 2846 {
3433 if (kw_token >= 0) 2847 if (kw_token >= 0)
3434 { 2848 {
3435 current_input_column += flex_yyleng (); 2849 current_input_column += flex_yyleng ();
3436 quote_is_transpose = false;
3437 convert_spaces_to_comma = true;
3438 looking_for_object_index = false; 2850 looking_for_object_index = false;
3439 } 2851 }
3440 2852
3441 return kw_token; 2853 return kw_token;
3442 } 2854 }
3863 gripe_matlab_incompatible_operator (flex_yytext ()); 3275 gripe_matlab_incompatible_operator (flex_yytext ());
3864 3276
3865 push_token (new token (tok, input_line_number, current_input_column)); 3277 push_token (new token (tok, input_line_number, current_input_column));
3866 3278
3867 current_input_column += flex_yyleng (); 3279 current_input_column += flex_yyleng ();
3868 quote_is_transpose = qit;
3869 convert_spaces_to_comma = convert;
3870 looking_for_object_index = false; 3280 looking_for_object_index = false;
3871 at_beginning_of_statement = bos; 3281 at_beginning_of_statement = bos;
3872 3282
3873 return count_token (tok); 3283 return count_token (tok);
3874 } 3284 }
3889 tok_val = new token (tok, input_line_number, current_input_column); 3299 tok_val = new token (tok, input_line_number, current_input_column);
3890 3300
3891 push_token (tok_val); 3301 push_token (tok_val);
3892 3302
3893 current_input_column += flex_yyleng (); 3303 current_input_column += flex_yyleng ();
3894 quote_is_transpose = false;
3895 convert_spaces_to_comma = true;
3896 3304
3897 return count_token_internal (tok); 3305 return count_token_internal (tok);
3898 } 3306 }
3899 3307
3900 int 3308 int