Mercurial > octave-nkf
comparison libinterp/parse-tree/lex.ll @ 16259:0b5ab09dfce4
2/10 commits reworking the lexer
author | John W. Eaton <jwe@octave.org> |
---|---|
date | Mon, 11 Mar 2013 14:18:39 -0400 |
parents | db7f07b22b9b |
children | 6c211b8cfbd9 b45a90cdb0ae |
comparison
equal
deleted
inserted
replaced
16257:db7f07b22b9b | 16259:0b5ab09dfce4 |
---|---|
192 curr_lexer->lexer_debug ("<COMMAND_START>{NL}"); | 192 curr_lexer->lexer_debug ("<COMMAND_START>{NL}"); |
193 | 193 |
194 curr_lexer->input_line_number++; | 194 curr_lexer->input_line_number++; |
195 curr_lexer->current_input_column = 1; | 195 curr_lexer->current_input_column = 1; |
196 | 196 |
197 curr_lexer->quote_is_transpose = false; | |
198 curr_lexer->convert_spaces_to_comma = true; | |
199 curr_lexer->looking_for_object_index = false; | 197 curr_lexer->looking_for_object_index = false; |
200 curr_lexer->at_beginning_of_statement = true; | 198 curr_lexer->at_beginning_of_statement = true; |
201 | 199 |
202 curr_lexer->pop_start_state (); | 200 curr_lexer->pop_start_state (); |
203 | 201 |
276 %} | 274 %} |
277 | 275 |
278 <MATRIX_START>\] { | 276 <MATRIX_START>\] { |
279 curr_lexer->lexer_debug ("<MATRIX_START>\\]"); | 277 curr_lexer->lexer_debug ("<MATRIX_START>\\]"); |
280 | 278 |
281 curr_lexer->scan_for_comments (yytext); | |
282 curr_lexer->fixup_column_count (yytext); | |
283 | |
284 curr_lexer->looking_at_object_index.pop_front (); | 279 curr_lexer->looking_at_object_index.pop_front (); |
285 | 280 |
286 curr_lexer->looking_for_object_index = true; | 281 curr_lexer->looking_for_object_index = true; |
287 curr_lexer->at_beginning_of_statement = false; | 282 curr_lexer->at_beginning_of_statement = false; |
288 | 283 |
289 int c = yytext[yyleng-1]; | 284 int tok_to_return = curr_lexer->handle_close_bracket (']'); |
290 bool cont_is_spc = (curr_lexer->eat_continuation () != octave_lexer::NO_WHITESPACE); | |
291 bool spc_gobbled = (cont_is_spc || c == ' ' || c == '\t'); | |
292 int tok_to_return = curr_lexer->handle_close_bracket (spc_gobbled, ']'); | |
293 | 285 |
294 return curr_lexer->count_token (']'); | 286 return curr_lexer->count_token (']'); |
295 } | 287 } |
296 | 288 |
297 %{ | 289 %{ |
299 %} | 291 %} |
300 | 292 |
301 <MATRIX_START>\} { | 293 <MATRIX_START>\} { |
302 curr_lexer->lexer_debug ("<MATRIX_START>\\}*"); | 294 curr_lexer->lexer_debug ("<MATRIX_START>\\}*"); |
303 | 295 |
304 curr_lexer->scan_for_comments (yytext); | |
305 curr_lexer->fixup_column_count (yytext); | |
306 | |
307 curr_lexer->looking_at_object_index.pop_front (); | 296 curr_lexer->looking_at_object_index.pop_front (); |
308 | 297 |
309 curr_lexer->looking_for_object_index = true; | 298 curr_lexer->looking_for_object_index = true; |
310 curr_lexer->at_beginning_of_statement = false; | 299 curr_lexer->at_beginning_of_statement = false; |
311 | 300 |
312 int c = yytext[yyleng-1]; | 301 int tok_to_return = curr_lexer->handle_close_bracket ('}'); |
313 bool cont_is_spc = (curr_lexer->eat_continuation () != octave_lexer::NO_WHITESPACE); | |
314 bool spc_gobbled = (cont_is_spc || c == ' ' || c == '\t'); | |
315 int tok_to_return = curr_lexer->handle_close_bracket (spc_gobbled, '}'); | |
316 | 302 |
317 return curr_lexer->count_token ('}'); | 303 return curr_lexer->count_token ('}'); |
318 } | 304 } |
319 | 305 |
320 \[ { | 306 \[ { |
323 curr_lexer->nesting_level.bracket (); | 309 curr_lexer->nesting_level.bracket (); |
324 | 310 |
325 curr_lexer->looking_at_object_index.push_front (false); | 311 curr_lexer->looking_at_object_index.push_front (false); |
326 | 312 |
327 curr_lexer->current_input_column += yyleng; | 313 curr_lexer->current_input_column += yyleng; |
328 curr_lexer->quote_is_transpose = false; | |
329 curr_lexer->convert_spaces_to_comma = true; | |
330 curr_lexer->looking_for_object_index = false; | 314 curr_lexer->looking_for_object_index = false; |
331 curr_lexer->at_beginning_of_statement = false; | 315 curr_lexer->at_beginning_of_statement = false; |
332 | 316 |
333 if (curr_lexer->defining_func | 317 if (curr_lexer->defining_func |
334 && ! curr_lexer->parsed_function_name.top ()) | 318 && ! curr_lexer->parsed_function_name.top ()) |
572 <<EOF>> { | 556 <<EOF>> { |
573 return curr_lexer->handle_end_of_input (); | 557 return curr_lexer->handle_end_of_input (); |
574 } | 558 } |
575 | 559 |
576 %{ | 560 %{ |
577 // Identifiers. Truncate the token at the first space or tab but | 561 // Identifiers. |
578 // don't write directly on yytext. | |
579 %} | 562 %} |
580 | 563 |
581 {IDENT} { | 564 {IDENT} { |
582 curr_lexer->lexer_debug ("{IDENT}"); | 565 curr_lexer->lexer_debug ("{IDENT}"); |
583 | 566 |
567 int tok = curr_lexer->previous_token_value (); | |
568 | |
584 if (curr_lexer->whitespace_is_significant () | 569 if (curr_lexer->whitespace_is_significant () |
585 && curr_lexer->space_follows_previous_token () | 570 && curr_lexer->space_follows_previous_token () |
586 && ! curr_lexer->previous_token_is_binop ()) | 571 && ! (tok == '[' || tok == '{' |
572 || curr_lexer->previous_token_is_binop ())) | |
587 { | 573 { |
588 yyless (0); | 574 yyless (0); |
589 unput (','); | 575 unput (','); |
590 } | 576 } |
591 else | 577 else |
648 "@" { | 634 "@" { |
649 curr_lexer->lexer_debug ("@"); | 635 curr_lexer->lexer_debug ("@"); |
650 | 636 |
651 curr_lexer->current_input_column++; | 637 curr_lexer->current_input_column++; |
652 | 638 |
653 curr_lexer->quote_is_transpose = false; | |
654 curr_lexer->convert_spaces_to_comma = false; | |
655 curr_lexer->looking_at_function_handle++; | 639 curr_lexer->looking_at_function_handle++; |
656 curr_lexer->looking_for_object_index = false; | 640 curr_lexer->looking_for_object_index = false; |
657 curr_lexer->at_beginning_of_statement = false; | 641 curr_lexer->at_beginning_of_statement = false; |
658 | 642 |
659 return curr_lexer->count_token ('@'); | 643 return curr_lexer->count_token ('@'); |
669 {NL} { | 653 {NL} { |
670 curr_lexer->lexer_debug ("{NL}"); | 654 curr_lexer->lexer_debug ("{NL}"); |
671 | 655 |
672 curr_lexer->input_line_number++; | 656 curr_lexer->input_line_number++; |
673 curr_lexer->current_input_column = 1; | 657 curr_lexer->current_input_column = 1; |
674 | |
675 curr_lexer->quote_is_transpose = false; | |
676 curr_lexer->convert_spaces_to_comma = true; | |
677 | 658 |
678 if (curr_lexer->nesting_level.none ()) | 659 if (curr_lexer->nesting_level.none ()) |
679 { | 660 { |
680 curr_lexer->at_beginning_of_statement = true; | 661 curr_lexer->at_beginning_of_statement = true; |
681 return curr_lexer->count_token ('\n'); | 662 return curr_lexer->count_token ('\n'); |
797 return curr_lexer->handle_op | 778 return curr_lexer->handle_op |
798 (",", ',', true, ! curr_lexer->looking_at_object_index.front ()); | 779 (",", ',', true, ! curr_lexer->looking_at_object_index.front ()); |
799 } | 780 } |
800 | 781 |
801 ".'" { | 782 ".'" { |
802 curr_lexer->do_comma_insert_check (); | |
803 return curr_lexer->handle_op (".'", TRANSPOSE, true, false); | 783 return curr_lexer->handle_op (".'", TRANSPOSE, true, false); |
804 } | 784 } |
805 | 785 |
806 "++" { | 786 "++" { |
807 curr_lexer->do_comma_insert_check (); | |
808 return curr_lexer->handle_incompatible_op | 787 return curr_lexer->handle_incompatible_op |
809 ("++", PLUS_PLUS, true, false, true); | 788 ("++", PLUS_PLUS, true, false, true); |
810 } | 789 } |
811 | 790 |
812 "--" { | 791 "--" { |
813 ; | 792 ; |
814 curr_lexer->do_comma_insert_check (); | |
815 return curr_lexer->handle_incompatible_op | 793 return curr_lexer->handle_incompatible_op |
816 ("--", MINUS_MINUS, true, false, true); | 794 ("--", MINUS_MINUS, true, false, true); |
817 } | 795 } |
818 | 796 |
819 "(" { | 797 "(" { |
843 curr_lexer->nesting_level.remove (); | 821 curr_lexer->nesting_level.remove (); |
844 curr_lexer->current_input_column++; | 822 curr_lexer->current_input_column++; |
845 | 823 |
846 curr_lexer->looking_at_object_index.pop_front (); | 824 curr_lexer->looking_at_object_index.pop_front (); |
847 | 825 |
848 curr_lexer->quote_is_transpose = true; | |
849 curr_lexer->convert_spaces_to_comma | |
850 = (curr_lexer->nesting_level.is_bracket_or_brace () | |
851 && ! curr_lexer->looking_at_anon_fcn_args); | |
852 curr_lexer->looking_for_object_index = true; | 826 curr_lexer->looking_for_object_index = true; |
853 curr_lexer->at_beginning_of_statement = false; | 827 curr_lexer->at_beginning_of_statement = false; |
854 | 828 |
855 if (curr_lexer->looking_at_anon_fcn_args) | 829 if (curr_lexer->looking_at_anon_fcn_args) |
856 curr_lexer->looking_at_anon_fcn_args = false; | 830 curr_lexer->looking_at_anon_fcn_args = false; |
857 | |
858 curr_lexer->do_comma_insert_check (); | |
859 | 831 |
860 return curr_lexer->count_token (')'); | 832 return curr_lexer->count_token (')'); |
861 } | 833 } |
862 | 834 |
863 "." { | 835 "." { |
1108 | 1080 |
1109 curr_lexer->looking_at_object_index.push_front | 1081 curr_lexer->looking_at_object_index.push_front |
1110 (curr_lexer->looking_for_object_index); | 1082 (curr_lexer->looking_for_object_index); |
1111 | 1083 |
1112 curr_lexer->current_input_column += yyleng; | 1084 curr_lexer->current_input_column += yyleng; |
1113 curr_lexer->quote_is_transpose = false; | |
1114 curr_lexer->convert_spaces_to_comma = true; | |
1115 curr_lexer->looking_for_object_index = false; | 1085 curr_lexer->looking_for_object_index = false; |
1116 curr_lexer->at_beginning_of_statement = false; | 1086 curr_lexer->at_beginning_of_statement = false; |
1117 | 1087 |
1118 curr_lexer->decrement_promptflag (); | 1088 curr_lexer->decrement_promptflag (); |
1119 curr_lexer->eat_whitespace (); | |
1120 | 1089 |
1121 curr_lexer->braceflag++; | 1090 curr_lexer->braceflag++; |
1122 | 1091 |
1123 curr_lexer->push_start_state (MATRIX_START); | 1092 curr_lexer->push_start_state (MATRIX_START); |
1124 | 1093 |
1504 | 1473 |
1505 void | 1474 void |
1506 lexical_feedback::reset (void) | 1475 lexical_feedback::reset (void) |
1507 { | 1476 { |
1508 end_of_input = false; | 1477 end_of_input = false; |
1509 convert_spaces_to_comma = true; | |
1510 do_comma_insert = false; | |
1511 at_beginning_of_statement = true; | 1478 at_beginning_of_statement = true; |
1512 looking_at_anon_fcn_args = false; | 1479 looking_at_anon_fcn_args = false; |
1513 looking_at_return_list = false; | 1480 looking_at_return_list = false; |
1514 looking_at_parameter_list = false; | 1481 looking_at_parameter_list = false; |
1515 looking_at_decl_list = false; | 1482 looking_at_decl_list = false; |
1518 looking_for_object_index = false; | 1485 looking_for_object_index = false; |
1519 looking_at_indirect_ref = false; | 1486 looking_at_indirect_ref = false; |
1520 parsing_class_method = false; | 1487 parsing_class_method = false; |
1521 maybe_classdef_get_set_method = false; | 1488 maybe_classdef_get_set_method = false; |
1522 parsing_classdef = false; | 1489 parsing_classdef = false; |
1523 quote_is_transpose = false; | |
1524 force_script = false; | 1490 force_script = false; |
1525 reading_fcn_file = false; | 1491 reading_fcn_file = false; |
1526 reading_script_file = false; | 1492 reading_script_file = false; |
1527 reading_classdef_file = false; | 1493 reading_classdef_file = false; |
1528 input_line_number = 1; | 1494 input_line_number = 1; |
1782 octave_lexer::flex_yyleng (void) | 1748 octave_lexer::flex_yyleng (void) |
1783 { | 1749 { |
1784 return yyget_leng (scanner); | 1750 return yyget_leng (scanner); |
1785 } | 1751 } |
1786 | 1752 |
1787 // GAG. | |
1788 // | |
1789 // If we're reading a matrix and the next character is '[', make sure | |
1790 // that we insert a comma ahead of it. | |
1791 | |
1792 void | |
1793 octave_lexer::do_comma_insert_check (void) | |
1794 { | |
1795 bool spc_gobbled = (eat_continuation () != octave_lexer::NO_WHITESPACE); | |
1796 | |
1797 int c = text_yyinput (); | |
1798 | |
1799 xunput (c); | |
1800 | |
1801 if (spc_gobbled) | |
1802 xunput (' '); | |
1803 | |
1804 do_comma_insert = (! looking_at_object_index.front () | |
1805 && bracketflag && c == '['); | |
1806 } | |
1807 | |
1808 int | 1753 int |
1809 octave_lexer::text_yyinput (void) | 1754 octave_lexer::text_yyinput (void) |
1810 { | 1755 { |
1811 int c = yyinput (scanner); | 1756 int c = yyinput (scanner); |
1812 | 1757 |
1866 octave_lexer::xunput (char c) | 1811 octave_lexer::xunput (char c) |
1867 { | 1812 { |
1868 char *yytxt = flex_yytext (); | 1813 char *yytxt = flex_yytext (); |
1869 | 1814 |
1870 xunput (c, yytxt); | 1815 xunput (c, yytxt); |
1871 } | |
1872 | |
1873 // If we read some newlines, we need figure out what column we're | |
1874 // really looking at. | |
1875 | |
1876 void | |
1877 octave_lexer::fixup_column_count (char *s) | |
1878 { | |
1879 char c; | |
1880 while ((c = *s++) != '\0') | |
1881 { | |
1882 if (c == '\n') | |
1883 { | |
1884 input_line_number++; | |
1885 current_input_column = 1; | |
1886 } | |
1887 else | |
1888 current_input_column++; | |
1889 } | |
1890 } | 1816 } |
1891 | 1817 |
1892 bool | 1818 bool |
1893 octave_lexer::inside_any_object_index (void) | 1819 octave_lexer::inside_any_object_index (void) |
1894 { | 1820 { |
2145 return (symbol_table::is_variable (name) | 2071 return (symbol_table::is_variable (name) |
2146 || (pending_local_variables.find (name) | 2072 || (pending_local_variables.find (name) |
2147 != pending_local_variables.end ())); | 2073 != pending_local_variables.end ())); |
2148 } | 2074 } |
2149 | 2075 |
2150 // Recognize separators. If the separator is a CRLF pair, it is | |
2151 // replaced by a single LF. | |
2152 | |
2153 bool | |
2154 octave_lexer::next_token_is_sep_op (void) | |
2155 { | |
2156 bool retval = false; | |
2157 | |
2158 int c = text_yyinput (); | |
2159 | |
2160 retval = match_any (c, ",;\n]"); | |
2161 | |
2162 xunput (c); | |
2163 | |
2164 return retval; | |
2165 } | |
2166 | |
2167 // Try to determine if the next token should be treated as a postfix | |
2168 // unary operator. This is ugly, but it seems to do the right thing. | |
2169 | |
2170 bool | |
2171 octave_lexer::next_token_is_postfix_unary_op (bool spc_prev) | |
2172 { | |
2173 bool un_op = false; | |
2174 | |
2175 int c0 = text_yyinput (); | |
2176 | |
2177 if (c0 == '\'' && ! spc_prev) | |
2178 { | |
2179 un_op = true; | |
2180 } | |
2181 else if (c0 == '.') | |
2182 { | |
2183 int c1 = text_yyinput (); | |
2184 un_op = (c1 == '\''); | |
2185 xunput (c1); | |
2186 } | |
2187 else if (c0 == '+') | |
2188 { | |
2189 int c1 = text_yyinput (); | |
2190 un_op = (c1 == '+'); | |
2191 xunput (c1); | |
2192 } | |
2193 else if (c0 == '-') | |
2194 { | |
2195 int c1 = text_yyinput (); | |
2196 un_op = (c1 == '-'); | |
2197 xunput (c1); | |
2198 } | |
2199 | |
2200 xunput (c0); | |
2201 | |
2202 return un_op; | |
2203 } | |
2204 | |
2205 // Try to determine if the next token should be treated as a binary | |
2206 // operator. | |
2207 // | |
2208 // This kluge exists because whitespace is not always ignored inside | |
2209 // the square brackets that are used to create matrix objects (though | |
2210 // spacing only really matters in the cases that can be interpreted | |
2211 // either as binary ops or prefix unary ops: currently just +, -). | |
2212 // | |
2213 // Note that a line continuation directly following a + or - operator | |
2214 // (e.g., the characters '[' 'a' ' ' '+' '\' LFD 'b' ']') will be | |
2215 // parsed as a binary operator. | |
2216 | |
2217 bool | |
2218 octave_lexer::next_token_is_bin_op (bool spc_prev) | |
2219 { | |
2220 bool bin_op = false; | |
2221 | |
2222 int c0 = text_yyinput (); | |
2223 | |
2224 switch (c0) | |
2225 { | |
2226 case '+': | |
2227 case '-': | |
2228 { | |
2229 int c1 = text_yyinput (); | |
2230 | |
2231 switch (c1) | |
2232 { | |
2233 case '+': | |
2234 case '-': | |
2235 // Unary ops, spacing doesn't matter. | |
2236 break; | |
2237 | |
2238 case '=': | |
2239 // Binary ops, spacing doesn't matter. | |
2240 bin_op = true; | |
2241 break; | |
2242 | |
2243 default: | |
2244 // Could be either, spacing matters. | |
2245 bin_op = looks_like_bin_op (spc_prev, c1); | |
2246 break; | |
2247 } | |
2248 | |
2249 xunput (c1); | |
2250 } | |
2251 break; | |
2252 | |
2253 case ':': | |
2254 case '/': | |
2255 case '\\': | |
2256 case '^': | |
2257 // Always a binary op (may also include /=, \=, and ^=). | |
2258 bin_op = true; | |
2259 break; | |
2260 | |
2261 // .+ .- ./ .\ .^ .* .** | |
2262 case '.': | |
2263 { | |
2264 int c1 = text_yyinput (); | |
2265 | |
2266 if (match_any (c1, "+-/\\^*")) | |
2267 // Always a binary op (may also include .+=, .-=, ./=, ...). | |
2268 bin_op = true; | |
2269 else if (! isdigit (c1) && c1 != ' ' && c1 != '\t' && c1 != '.') | |
2270 // A structure element reference is a binary op. | |
2271 bin_op = true; | |
2272 | |
2273 xunput (c1); | |
2274 } | |
2275 break; | |
2276 | |
2277 // = == & && | || * ** | |
2278 case '=': | |
2279 case '&': | |
2280 case '|': | |
2281 case '*': | |
2282 // Always a binary op (may also include ==, &&, ||, **). | |
2283 bin_op = true; | |
2284 break; | |
2285 | |
2286 // < <= <> > >= | |
2287 case '<': | |
2288 case '>': | |
2289 // Always a binary op (may also include <=, <>, >=). | |
2290 bin_op = true; | |
2291 break; | |
2292 | |
2293 // ~= != | |
2294 case '~': | |
2295 case '!': | |
2296 { | |
2297 int c1 = text_yyinput (); | |
2298 | |
2299 // ~ and ! can be unary ops, so require following =. | |
2300 if (c1 == '=') | |
2301 bin_op = true; | |
2302 | |
2303 xunput (c1); | |
2304 } | |
2305 break; | |
2306 | |
2307 default: | |
2308 break; | |
2309 } | |
2310 | |
2311 xunput (c0); | |
2312 | |
2313 return bin_op; | |
2314 } | |
2315 | |
2316 // FIXME -- we need to handle block comments here. | |
2317 | |
2318 void | |
2319 octave_lexer::scan_for_comments (const char *text) | |
2320 { | |
2321 std::string comment_buf; | |
2322 | |
2323 bool in_comment = false; | |
2324 bool beginning_of_comment = false; | |
2325 | |
2326 int len = strlen (text); | |
2327 int i = 0; | |
2328 | |
2329 while (i < len) | |
2330 { | |
2331 char c = text[i++]; | |
2332 | |
2333 switch (c) | |
2334 { | |
2335 case '%': | |
2336 case '#': | |
2337 if (in_comment) | |
2338 { | |
2339 if (! beginning_of_comment) | |
2340 comment_buf += static_cast<char> (c); | |
2341 } | |
2342 else | |
2343 { | |
2344 maybe_gripe_matlab_incompatible_comment (c); | |
2345 in_comment = true; | |
2346 beginning_of_comment = true; | |
2347 } | |
2348 break; | |
2349 | |
2350 case '\n': | |
2351 if (in_comment) | |
2352 { | |
2353 comment_buf += static_cast<char> (c); | |
2354 octave_comment_buffer::append (comment_buf); | |
2355 comment_buf.resize (0); | |
2356 in_comment = false; | |
2357 beginning_of_comment = false; | |
2358 } | |
2359 break; | |
2360 | |
2361 default: | |
2362 if (in_comment) | |
2363 { | |
2364 comment_buf += static_cast<char> (c); | |
2365 beginning_of_comment = false; | |
2366 } | |
2367 break; | |
2368 } | |
2369 } | |
2370 | |
2371 if (! comment_buf.empty ()) | |
2372 octave_comment_buffer::append (comment_buf); | |
2373 } | |
2374 | |
2375 // Discard whitespace, including comments and continuations. | |
2376 | |
2377 // FIXME -- we need to handle block comments here. | |
2378 | |
2379 int | |
2380 octave_lexer::eat_whitespace (void) | |
2381 { | |
2382 int retval = octave_lexer::NO_WHITESPACE; | |
2383 | |
2384 std::string comment_buf; | |
2385 | |
2386 bool in_comment = false; | |
2387 bool beginning_of_comment = false; | |
2388 | |
2389 int c = 0; | |
2390 | |
2391 while ((c = text_yyinput ()) != EOF) | |
2392 { | |
2393 current_input_column++; | |
2394 | |
2395 switch (c) | |
2396 { | |
2397 case ' ': | |
2398 case '\t': | |
2399 if (in_comment) | |
2400 { | |
2401 comment_buf += static_cast<char> (c); | |
2402 beginning_of_comment = false; | |
2403 } | |
2404 retval |= octave_lexer::SPACE_OR_TAB; | |
2405 break; | |
2406 | |
2407 case '\n': | |
2408 retval |= octave_lexer::NEWLINE; | |
2409 if (in_comment) | |
2410 { | |
2411 comment_buf += static_cast<char> (c); | |
2412 octave_comment_buffer::append (comment_buf); | |
2413 comment_buf.resize (0); | |
2414 in_comment = false; | |
2415 beginning_of_comment = false; | |
2416 } | |
2417 current_input_column = 0; | |
2418 break; | |
2419 | |
2420 case '#': | |
2421 case '%': | |
2422 if (in_comment) | |
2423 { | |
2424 if (! beginning_of_comment) | |
2425 comment_buf += static_cast<char> (c); | |
2426 } | |
2427 else | |
2428 { | |
2429 maybe_gripe_matlab_incompatible_comment (c); | |
2430 in_comment = true; | |
2431 beginning_of_comment = true; | |
2432 } | |
2433 break; | |
2434 | |
2435 case '.': | |
2436 if (in_comment) | |
2437 { | |
2438 comment_buf += static_cast<char> (c); | |
2439 beginning_of_comment = false; | |
2440 break; | |
2441 } | |
2442 else | |
2443 { | |
2444 if (have_ellipsis_continuation ()) | |
2445 break; | |
2446 else | |
2447 goto done; | |
2448 } | |
2449 | |
2450 case '\\': | |
2451 if (in_comment) | |
2452 { | |
2453 comment_buf += static_cast<char> (c); | |
2454 beginning_of_comment = false; | |
2455 break; | |
2456 } | |
2457 else | |
2458 { | |
2459 if (have_continuation ()) | |
2460 break; | |
2461 else | |
2462 goto done; | |
2463 } | |
2464 | |
2465 default: | |
2466 if (in_comment) | |
2467 { | |
2468 comment_buf += static_cast<char> (c); | |
2469 beginning_of_comment = false; | |
2470 break; | |
2471 } | |
2472 else | |
2473 goto done; | |
2474 } | |
2475 } | |
2476 | |
2477 if (! comment_buf.empty ()) | |
2478 octave_comment_buffer::append (comment_buf); | |
2479 | |
2480 done: | |
2481 xunput (c); | |
2482 current_input_column--; | |
2483 return retval; | |
2484 } | |
2485 | |
2486 bool | 2076 bool |
2487 octave_lexer::whitespace_is_significant (void) | 2077 octave_lexer::whitespace_is_significant (void) |
2488 { | 2078 { |
2489 return (nesting_level.is_bracket () | 2079 return (nesting_level.is_bracket () |
2490 || (nesting_level.is_brace () | 2080 || (nesting_level.is_brace () |
2529 | 2119 |
2530 // If yytext doesn't contain a valid number, we are in deep doo doo. | 2120 // If yytext doesn't contain a valid number, we are in deep doo doo. |
2531 | 2121 |
2532 assert (nread == 1); | 2122 assert (nread == 1); |
2533 | 2123 |
2534 quote_is_transpose = true; | |
2535 convert_spaces_to_comma = true; | |
2536 looking_for_object_index = false; | 2124 looking_for_object_index = false; |
2537 at_beginning_of_statement = false; | 2125 at_beginning_of_statement = false; |
2538 | 2126 |
2539 push_token (new token (NUM, value, yytxt, input_line_number, | 2127 push_token (new token (NUM, value, yytxt, input_line_number, |
2540 current_input_column)); | 2128 current_input_column)); |
2541 | 2129 |
2542 current_input_column += flex_yyleng (); | 2130 current_input_column += flex_yyleng (); |
2543 | |
2544 do_comma_insert_check (); | |
2545 } | 2131 } |
2546 | 2132 |
2547 void | 2133 void |
2548 octave_lexer::handle_continuation (void) | 2134 octave_lexer::handle_continuation (void) |
2549 { | 2135 { |
2612 | 2198 |
2613 octave_comment_buffer::append (comment_text, typ); | 2199 octave_comment_buffer::append (comment_text, typ); |
2614 | 2200 |
2615 comment_text = ""; | 2201 comment_text = ""; |
2616 | 2202 |
2617 quote_is_transpose = false; | |
2618 convert_spaces_to_comma = true; | |
2619 at_beginning_of_statement = true; | 2203 at_beginning_of_statement = true; |
2620 | 2204 |
2621 if (! looking_at_continuation) | 2205 if (! looking_at_continuation) |
2622 xunput ('\n'); | 2206 xunput ('\n'); |
2623 } | 2207 } |
2738 } | 2322 } |
2739 else | 2323 else |
2740 xunput (c1); | 2324 xunput (c1); |
2741 | 2325 |
2742 return false; | 2326 return false; |
2743 } | |
2744 | |
2745 // See if we have a continuation line. If so, eat it and the leading | |
2746 // whitespace on the next line. | |
2747 | |
2748 int | |
2749 octave_lexer::eat_continuation (void) | |
2750 { | |
2751 int retval = octave_lexer::NO_WHITESPACE; | |
2752 | |
2753 int c = text_yyinput (); | |
2754 | |
2755 if ((c == '.' && have_ellipsis_continuation ()) | |
2756 || (c == '\\' && have_continuation ())) | |
2757 retval = eat_whitespace (); | |
2758 else | |
2759 xunput (c); | |
2760 | |
2761 return retval; | |
2762 } | 2327 } |
2763 | 2328 |
2764 int | 2329 int |
2765 octave_lexer::handle_string (char delim) | 2330 octave_lexer::handle_string (char delim) |
2766 { | 2331 { |
2824 if (delim == '\'') | 2389 if (delim == '\'') |
2825 s = buf.str (); | 2390 s = buf.str (); |
2826 else | 2391 else |
2827 s = do_string_escapes (buf.str ()); | 2392 s = do_string_escapes (buf.str ()); |
2828 | 2393 |
2829 quote_is_transpose = true; | |
2830 convert_spaces_to_comma = true; | |
2831 | |
2832 if (delim == '"') | 2394 if (delim == '"') |
2833 gripe_matlab_incompatible ("\" used as string delimiter"); | 2395 gripe_matlab_incompatible ("\" used as string delimiter"); |
2834 else if (delim == '\'') | 2396 else if (delim == '\'') |
2835 gripe_single_quote_string (); | 2397 gripe_single_quote_string (); |
2836 | 2398 |
2854 } | 2416 } |
2855 | 2417 |
2856 return LEXICAL_ERROR; | 2418 return LEXICAL_ERROR; |
2857 } | 2419 } |
2858 | 2420 |
2859 bool | |
2860 octave_lexer::next_token_is_assign_op (void) | |
2861 { | |
2862 bool retval = false; | |
2863 | |
2864 int c0 = text_yyinput (); | |
2865 | |
2866 switch (c0) | |
2867 { | |
2868 case '=': | |
2869 { | |
2870 int c1 = text_yyinput (); | |
2871 xunput (c1); | |
2872 if (c1 != '=') | |
2873 retval = true; | |
2874 } | |
2875 break; | |
2876 | |
2877 case '+': | |
2878 case '-': | |
2879 case '*': | |
2880 case '/': | |
2881 case '\\': | |
2882 case '&': | |
2883 case '|': | |
2884 { | |
2885 int c1 = text_yyinput (); | |
2886 xunput (c1); | |
2887 if (c1 == '=') | |
2888 retval = true; | |
2889 } | |
2890 break; | |
2891 | |
2892 case '.': | |
2893 { | |
2894 int c1 = text_yyinput (); | |
2895 if (match_any (c1, "+-*/\\")) | |
2896 { | |
2897 int c2 = text_yyinput (); | |
2898 xunput (c2); | |
2899 if (c2 == '=') | |
2900 retval = true; | |
2901 } | |
2902 xunput (c1); | |
2903 } | |
2904 break; | |
2905 | |
2906 case '>': | |
2907 { | |
2908 int c1 = text_yyinput (); | |
2909 if (c1 == '>') | |
2910 { | |
2911 int c2 = text_yyinput (); | |
2912 xunput (c2); | |
2913 if (c2 == '=') | |
2914 retval = true; | |
2915 } | |
2916 xunput (c1); | |
2917 } | |
2918 break; | |
2919 | |
2920 case '<': | |
2921 { | |
2922 int c1 = text_yyinput (); | |
2923 if (c1 == '<') | |
2924 { | |
2925 int c2 = text_yyinput (); | |
2926 xunput (c2); | |
2927 if (c2 == '=') | |
2928 retval = true; | |
2929 } | |
2930 xunput (c1); | |
2931 } | |
2932 break; | |
2933 | |
2934 default: | |
2935 break; | |
2936 } | |
2937 | |
2938 xunput (c0); | |
2939 | |
2940 return retval; | |
2941 } | |
2942 | |
2943 bool | |
2944 octave_lexer::next_token_is_index_op (void) | |
2945 { | |
2946 int c = text_yyinput (); | |
2947 xunput (c); | |
2948 return c == '(' || c == '{'; | |
2949 } | |
2950 | |
2951 int | 2421 int |
2952 octave_lexer::handle_close_bracket (bool spc_gobbled, int bracket_type) | 2422 octave_lexer::handle_close_bracket (int bracket_type) |
2953 { | 2423 { |
2954 int retval = bracket_type; | 2424 int retval = bracket_type; |
2955 | 2425 |
2956 if (! nesting_level.none ()) | 2426 if (! nesting_level.none ()) |
2957 { | 2427 { |
2965 panic_impossible (); | 2435 panic_impossible (); |
2966 } | 2436 } |
2967 | 2437 |
2968 pop_start_state (); | 2438 pop_start_state (); |
2969 | 2439 |
2970 quote_is_transpose = true; | |
2971 convert_spaces_to_comma = true; | |
2972 | |
2973 return retval; | 2440 return retval; |
2974 } | |
2975 | |
2976 void | |
2977 octave_lexer::maybe_unput_comma (int spc_gobbled) | |
2978 { | |
2979 if (nesting_level.is_bracket () | |
2980 || (nesting_level.is_brace () | |
2981 && ! looking_at_object_index.front ())) | |
2982 { | |
2983 int bin_op = next_token_is_bin_op (spc_gobbled); | |
2984 | |
2985 int postfix_un_op = next_token_is_postfix_unary_op (spc_gobbled); | |
2986 | |
2987 int c1 = text_yyinput (); | |
2988 int c2 = text_yyinput (); | |
2989 | |
2990 xunput (c2); | |
2991 xunput (c1); | |
2992 | |
2993 int sep_op = next_token_is_sep_op (); | |
2994 | |
2995 int dot_op = (c1 == '.' | |
2996 && (isalpha (c2) || isspace (c2) || c2 == '_')); | |
2997 | |
2998 if (postfix_un_op || bin_op || sep_op || dot_op) | |
2999 return; | |
3000 | |
3001 int index_op = (c1 == '(' || c1 == '{'); | |
3002 | |
3003 // If there is no space before the indexing op, we don't insert | |
3004 // a comma. | |
3005 | |
3006 if (index_op && ! spc_gobbled) | |
3007 return; | |
3008 | |
3009 maybe_warn_separator_insert (','); | |
3010 | |
3011 xunput (','); | |
3012 } | |
3013 } | 2441 } |
3014 | 2442 |
3015 bool | 2443 bool |
3016 octave_lexer::next_token_can_follow_bin_op (void) | 2444 octave_lexer::next_token_can_follow_bin_op (void) |
3017 { | 2445 { |
3277 } | 2705 } |
3278 | 2706 |
3279 int | 2707 int |
3280 octave_lexer::handle_superclass_identifier (void) | 2708 octave_lexer::handle_superclass_identifier (void) |
3281 { | 2709 { |
3282 eat_continuation (); | |
3283 | |
3284 std::string pkg; | 2710 std::string pkg; |
3285 char *yytxt = flex_yytext (); | 2711 char *yytxt = flex_yytext (); |
3286 std::string meth = strip_trailing_whitespace (yytxt); | 2712 std::string meth = strip_trailing_whitespace (yytxt); |
3287 size_t pos = meth.find ("@"); | 2713 size_t pos = meth.find ("@"); |
3288 std::string cls = meth.substr (pos).substr (1); | 2714 std::string cls = meth.substr (pos).substr (1); |
3307 meth.empty () ? 0 : &(symbol_table::insert (meth)), | 2733 meth.empty () ? 0 : &(symbol_table::insert (meth)), |
3308 cls.empty () ? 0 : &(symbol_table::insert (cls)), | 2734 cls.empty () ? 0 : &(symbol_table::insert (cls)), |
3309 pkg.empty () ? 0 : &(symbol_table::insert (pkg)), | 2735 pkg.empty () ? 0 : &(symbol_table::insert (pkg)), |
3310 input_line_number, current_input_column)); | 2736 input_line_number, current_input_column)); |
3311 | 2737 |
3312 convert_spaces_to_comma = true; | |
3313 current_input_column += flex_yyleng (); | 2738 current_input_column += flex_yyleng (); |
3314 | 2739 |
3315 return SUPERCLASSREF; | 2740 return SUPERCLASSREF; |
3316 } | 2741 } |
3317 | 2742 |
3318 int | 2743 int |
3319 octave_lexer::handle_meta_identifier (void) | 2744 octave_lexer::handle_meta_identifier (void) |
3320 { | 2745 { |
3321 eat_continuation (); | |
3322 | |
3323 std::string pkg; | 2746 std::string pkg; |
3324 char *yytxt = flex_yytext (); | 2747 char *yytxt = flex_yytext (); |
3325 std::string cls = strip_trailing_whitespace (yytxt).substr (1); | 2748 std::string cls = strip_trailing_whitespace (yytxt).substr (1); |
3326 size_t pos = cls.find ("."); | 2749 size_t pos = cls.find ("."); |
3327 | 2750 |
3341 push_token (new token (METAQUERY, | 2764 push_token (new token (METAQUERY, |
3342 cls.empty () ? 0 : &(symbol_table::insert (cls)), | 2765 cls.empty () ? 0 : &(symbol_table::insert (cls)), |
3343 pkg.empty () ? 0 : &(symbol_table::insert (pkg)), | 2766 pkg.empty () ? 0 : &(symbol_table::insert (pkg)), |
3344 input_line_number, current_input_column)); | 2767 input_line_number, current_input_column)); |
3345 | 2768 |
3346 convert_spaces_to_comma = true; | |
3347 current_input_column += flex_yyleng (); | 2769 current_input_column += flex_yyleng (); |
3348 | 2770 |
3349 return METAQUERY; | 2771 return METAQUERY; |
3350 } | 2772 } |
3351 | 2773 |
3369 // a string that is also a valid identifier. But first, we have to | 2791 // a string that is also a valid identifier. But first, we have to |
3370 // decide whether to insert a comma. | 2792 // decide whether to insert a comma. |
3371 | 2793 |
3372 if (looking_at_indirect_ref) | 2794 if (looking_at_indirect_ref) |
3373 { | 2795 { |
3374 // do_comma_insert_check (); | |
3375 | |
3376 // maybe_unput_comma (spc_gobbled); | |
3377 | |
3378 push_token (new token (STRUCT_ELT, tok, input_line_number, | 2796 push_token (new token (STRUCT_ELT, tok, input_line_number, |
3379 current_input_column)); | 2797 current_input_column)); |
3380 | 2798 |
3381 quote_is_transpose = true; | |
3382 convert_spaces_to_comma = true; | |
3383 looking_for_object_index = true; | 2799 looking_for_object_index = true; |
3384 | 2800 |
3385 current_input_column += flex_yyleng (); | 2801 current_input_column += flex_yyleng (); |
3386 | 2802 |
3387 at_beginning_of_statement = false; | 2803 at_beginning_of_statement = false; |
3413 { | 2829 { |
3414 push_token (new token (FCN_HANDLE, tok, input_line_number, | 2830 push_token (new token (FCN_HANDLE, tok, input_line_number, |
3415 current_input_column)); | 2831 current_input_column)); |
3416 | 2832 |
3417 current_input_column += flex_yyleng (); | 2833 current_input_column += flex_yyleng (); |
3418 quote_is_transpose = false; | |
3419 convert_spaces_to_comma = true; | |
3420 looking_for_object_index = true; | 2834 looking_for_object_index = true; |
3421 | 2835 |
3422 at_beginning_of_statement = false; | 2836 at_beginning_of_statement = false; |
3423 | 2837 |
3424 return FCN_HANDLE; | 2838 return FCN_HANDLE; |
3431 if (kw_token) | 2845 if (kw_token) |
3432 { | 2846 { |
3433 if (kw_token >= 0) | 2847 if (kw_token >= 0) |
3434 { | 2848 { |
3435 current_input_column += flex_yyleng (); | 2849 current_input_column += flex_yyleng (); |
3436 quote_is_transpose = false; | |
3437 convert_spaces_to_comma = true; | |
3438 looking_for_object_index = false; | 2850 looking_for_object_index = false; |
3439 } | 2851 } |
3440 | 2852 |
3441 return kw_token; | 2853 return kw_token; |
3442 } | 2854 } |
3863 gripe_matlab_incompatible_operator (flex_yytext ()); | 3275 gripe_matlab_incompatible_operator (flex_yytext ()); |
3864 | 3276 |
3865 push_token (new token (tok, input_line_number, current_input_column)); | 3277 push_token (new token (tok, input_line_number, current_input_column)); |
3866 | 3278 |
3867 current_input_column += flex_yyleng (); | 3279 current_input_column += flex_yyleng (); |
3868 quote_is_transpose = qit; | |
3869 convert_spaces_to_comma = convert; | |
3870 looking_for_object_index = false; | 3280 looking_for_object_index = false; |
3871 at_beginning_of_statement = bos; | 3281 at_beginning_of_statement = bos; |
3872 | 3282 |
3873 return count_token (tok); | 3283 return count_token (tok); |
3874 } | 3284 } |
3889 tok_val = new token (tok, input_line_number, current_input_column); | 3299 tok_val = new token (tok, input_line_number, current_input_column); |
3890 | 3300 |
3891 push_token (tok_val); | 3301 push_token (tok_val); |
3892 | 3302 |
3893 current_input_column += flex_yyleng (); | 3303 current_input_column += flex_yyleng (); |
3894 quote_is_transpose = false; | |
3895 convert_spaces_to_comma = true; | |
3896 | 3304 |
3897 return count_token_internal (tok); | 3305 return count_token_internal (tok); |
3898 } | 3306 } |
3899 | 3307 |
3900 int | 3308 int |