Mercurial > octave-nkf
comparison src/lex.l @ 9474:25ed2d6aacf6
Parse nested functions more accurately.
author | David Grundberg <individ@acc.umu.se> |
---|---|
date | Thu, 30 Jul 2009 11:52:58 -0400 |
parents | 29563379fa9b |
children | d9b25c5b8ee5 |
comparison
equal
deleted
inserted
replaced
9473:833109a9f37f | 9474:25ed2d6aacf6 |
---|---|
26 | 26 |
27 %s COMMAND_START | 27 %s COMMAND_START |
28 %s MATRIX_START | 28 %s MATRIX_START |
29 | 29 |
30 %x SCRIPT_FILE_BEGIN | 30 %x SCRIPT_FILE_BEGIN |
31 | 31 %x FUNCTION_FILE_BEGIN |
32 %x NESTED_FUNCTION_END | |
33 %x NESTED_FUNCTION_BEGIN | |
34 | 32 |
35 %{ | 33 %{ |
36 #ifdef HAVE_CONFIG_H | 34 #ifdef HAVE_CONFIG_H |
37 #include <config.h> | 35 #include <config.h> |
38 #endif | 36 #endif |
280 static int text_yyinput (void); | 278 static int text_yyinput (void); |
281 static void xunput (char c, char *buf); | 279 static void xunput (char c, char *buf); |
282 static void fixup_column_count (char *s); | 280 static void fixup_column_count (char *s); |
283 static void do_comma_insert_check (void); | 281 static void do_comma_insert_check (void); |
284 static int is_keyword_token (const std::string& s); | 282 static int is_keyword_token (const std::string& s); |
285 static void prep_for_function (void); | |
286 static void prep_for_nested_function (void); | |
287 static int process_comment (bool start_in_block, bool& eof); | 283 static int process_comment (bool start_in_block, bool& eof); |
288 static bool match_any (char c, const char *s); | 284 static bool match_any (char c, const char *s); |
289 static bool next_token_is_sep_op (void); | 285 static bool next_token_is_sep_op (void); |
290 static bool next_token_is_bin_op (bool spc_prev); | 286 static bool next_token_is_bin_op (bool spc_prev); |
291 static bool next_token_is_postfix_unary_op (bool spc_prev); | 287 static bool next_token_is_postfix_unary_op (bool spc_prev); |
327 IDENT ([_$a-zA-Z][_$a-zA-Z0-9]*) | 323 IDENT ([_$a-zA-Z][_$a-zA-Z0-9]*) |
328 EXPON ([DdEe][+-]?{D}+) | 324 EXPON ([DdEe][+-]?{D}+) |
329 NUMBER (({D}+\.?{D}*{EXPON}?)|(\.{D}+{EXPON}?)|(0[xX][0-9a-fA-F]+)) | 325 NUMBER (({D}+\.?{D}*{EXPON}?)|(\.{D}+{EXPON}?)|(0[xX][0-9a-fA-F]+)) |
330 %% | 326 %% |
331 | 327 |
328 %{ | |
329 // Make script and function files start with a bogus token. This makes | |
330 // the parser go down a special path. | |
331 %} | |
332 | |
332 <SCRIPT_FILE_BEGIN>. { | 333 <SCRIPT_FILE_BEGIN>. { |
333 LEXER_DEBUG ("<SCRIPT_FILE_BEGIN>."); | 334 LEXER_DEBUG ("<SCRIPT_FILE_BEGIN>."); |
334 | 335 |
335 BEGIN (INITIAL); | 336 BEGIN (INITIAL); |
336 xunput (yytext[0], yytext); | 337 xunput (yytext[0], yytext); |
337 COUNT_TOK_AND_RETURN (SCRIPT); | 338 COUNT_TOK_AND_RETURN (SCRIPT_FILE); |
338 } | 339 } |
339 | 340 |
340 <NESTED_FUNCTION_END>. { | 341 <FUNCTION_FILE_BEGIN>. { |
341 LEXER_DEBUG ("<NESTED_FUNCTION_END>."); | 342 LEXER_DEBUG ("<FUNCTION_FILE_BEGIN>."); |
342 | |
343 BEGIN (NESTED_FUNCTION_BEGIN); | |
344 xunput (yytext[0], yytext); | |
345 | |
346 lexer_flags.at_beginning_of_statement = true; | |
347 | |
348 COUNT_TOK_AND_RETURN (';'); | |
349 } | |
350 | |
351 <NESTED_FUNCTION_BEGIN>. { | |
352 LEXER_DEBUG ("<NESTED_FUNCTION_BEGIN>."); | |
353 | 343 |
354 BEGIN (INITIAL); | 344 BEGIN (INITIAL); |
355 xunput (yytext[0], yytext); | 345 xunput (yytext[0], yytext); |
356 | 346 COUNT_TOK_AND_RETURN (FUNCTION_FILE); |
357 prep_for_nested_function (); | |
358 | |
359 COUNT_TOK_AND_RETURN (FCN); | |
360 } | 347 } |
361 | 348 |
362 %{ | 349 %{ |
363 // Help and other command-style functions. | 350 // Help and other command-style functions. |
364 %} | 351 %} |
1002 %} | 989 %} |
1003 | 990 |
1004 . { | 991 . { |
1005 LEXER_DEBUG ("."); | 992 LEXER_DEBUG ("."); |
1006 | 993 |
1007 // EOF happens here if we are parsing nested functions. | |
1008 | |
1009 xunput (yytext[0], yytext); | 994 xunput (yytext[0], yytext); |
1010 | 995 |
1011 int c = text_yyinput (); | 996 int c = text_yyinput (); |
1012 | 997 |
1013 if (c != EOF) | 998 if (c != EOF) |
1056 { | 1041 { |
1057 // Start off on the right foot. | 1042 // Start off on the right foot. |
1058 BEGIN (INITIAL); | 1043 BEGIN (INITIAL); |
1059 | 1044 |
1060 parser_end_of_input = false; | 1045 parser_end_of_input = false; |
1061 end_tokens_expected = 0; | |
1062 | 1046 |
1063 while (! symtab_context.empty ()) | 1047 while (! symtab_context.empty ()) |
1064 symtab_context.pop (); | 1048 symtab_context.pop (); |
1065 | |
1066 symbol_table::reset_parent_scope (); | |
1067 | 1049 |
1068 // We do want a prompt by default. | 1050 // We do want a prompt by default. |
1069 promptflag = 1; | 1051 promptflag = 1; |
1070 | 1052 |
1071 // We are not in a block comment. | 1053 // We are not in a block comment. |
1377 | 1359 |
1378 void | 1360 void |
1379 delete_input_buffer (void *buf) | 1361 delete_input_buffer (void *buf) |
1380 { | 1362 { |
1381 delete_buffer (static_cast<YY_BUFFER_STATE> (buf)); | 1363 delete_buffer (static_cast<YY_BUFFER_STATE> (buf)); |
1382 } | |
1383 | |
1384 static void | |
1385 prep_for_function (void) | |
1386 { | |
1387 end_tokens_expected++; | |
1388 | |
1389 promptflag--; | |
1390 | |
1391 lexer_flags.defining_func = true; | |
1392 lexer_flags.parsed_function_name = false; | |
1393 | |
1394 if (! (reading_fcn_file || reading_script_file)) | |
1395 input_line_number = 1; | |
1396 } | |
1397 | |
1398 static void | |
1399 prep_for_nested_function (void) | |
1400 { | |
1401 lexer_flags.parsing_nested_function = 1; | |
1402 help_buf.push (std::string ()); | |
1403 prep_for_function (); | |
1404 // We're still only expecting one end token for this set of functions. | |
1405 end_tokens_expected--; | |
1406 yylval.tok_val = new token (input_line_number, current_input_column); | |
1407 token_stack.push (yylval.tok_val); | |
1408 } | 1364 } |
1409 | 1365 |
1410 static bool | 1366 static bool |
1411 inside_any_object_index (void) | 1367 inside_any_object_index (void) |
1412 { | 1368 { |
1464 if (inside_any_object_index () | 1420 if (inside_any_object_index () |
1465 || (lexer_flags.defining_func | 1421 || (lexer_flags.defining_func |
1466 && ! (lexer_flags.looking_at_return_list | 1422 && ! (lexer_flags.looking_at_return_list |
1467 || lexer_flags.parsed_function_name))) | 1423 || lexer_flags.parsed_function_name))) |
1468 return 0; | 1424 return 0; |
1469 else | 1425 |
1470 { | 1426 yylval.tok_val = new token (token::simple_end, l, c); |
1471 if (reading_fcn_file && end_tokens_expected == 1) | 1427 lexer_flags.at_beginning_of_statement = true; |
1472 return -1; | |
1473 else | |
1474 { | |
1475 yylval.tok_val = new token (token::simple_end, l, c); | |
1476 lexer_flags.at_beginning_of_statement = true; | |
1477 end_tokens_expected--; | |
1478 } | |
1479 } | |
1480 break; | 1428 break; |
1481 | 1429 |
1482 case end_try_catch_kw: | 1430 case end_try_catch_kw: |
1483 yylval.tok_val = new token (token::try_catch_end, l, c); | 1431 yylval.tok_val = new token (token::try_catch_end, l, c); |
1484 lexer_flags.at_beginning_of_statement = true; | 1432 lexer_flags.at_beginning_of_statement = true; |
1485 end_tokens_expected--; | |
1486 break; | 1433 break; |
1487 | 1434 |
1488 case end_unwind_protect_kw: | 1435 case end_unwind_protect_kw: |
1489 yylval.tok_val = new token (token::unwind_protect_end, l, c); | 1436 yylval.tok_val = new token (token::unwind_protect_end, l, c); |
1490 lexer_flags.at_beginning_of_statement = true; | 1437 lexer_flags.at_beginning_of_statement = true; |
1491 end_tokens_expected--; | |
1492 break; | 1438 break; |
1493 | 1439 |
1494 case endfor_kw: | 1440 case endfor_kw: |
1495 yylval.tok_val = new token (token::for_end, l, c); | 1441 yylval.tok_val = new token (token::for_end, l, c); |
1496 lexer_flags.at_beginning_of_statement = true; | 1442 lexer_flags.at_beginning_of_statement = true; |
1497 end_tokens_expected--; | |
1498 break; | 1443 break; |
1499 | 1444 |
1500 case endfunction_kw: | 1445 case endfunction_kw: |
1501 { | 1446 yylval.tok_val = new token (token::function_end, l, c); |
1502 if (reading_fcn_file && end_tokens_expected == 1) | 1447 lexer_flags.at_beginning_of_statement = true; |
1503 return -1; | |
1504 else | |
1505 { | |
1506 yylval.tok_val = new token (token::function_end, l, c); | |
1507 lexer_flags.at_beginning_of_statement = true; | |
1508 end_tokens_expected--; | |
1509 } | |
1510 } | |
1511 break; | 1448 break; |
1512 | 1449 |
1513 case endif_kw: | 1450 case endif_kw: |
1514 yylval.tok_val = new token (token::if_end, l, c); | 1451 yylval.tok_val = new token (token::if_end, l, c); |
1515 lexer_flags.at_beginning_of_statement = true; | 1452 lexer_flags.at_beginning_of_statement = true; |
1516 end_tokens_expected--; | |
1517 break; | 1453 break; |
1518 | 1454 |
1519 case endswitch_kw: | 1455 case endswitch_kw: |
1520 yylval.tok_val = new token (token::switch_end, l, c); | 1456 yylval.tok_val = new token (token::switch_end, l, c); |
1521 lexer_flags.at_beginning_of_statement = true; | 1457 lexer_flags.at_beginning_of_statement = true; |
1522 end_tokens_expected--; | |
1523 break; | 1458 break; |
1524 | 1459 |
1525 case endwhile_kw: | 1460 case endwhile_kw: |
1526 yylval.tok_val = new token (token::while_end, l, c); | 1461 yylval.tok_val = new token (token::while_end, l, c); |
1527 lexer_flags.at_beginning_of_statement = true; | 1462 lexer_flags.at_beginning_of_statement = true; |
1528 end_tokens_expected--; | |
1529 break; | 1463 break; |
1530 | 1464 |
1531 case for_kw: | 1465 case for_kw: |
1532 case while_kw: | 1466 case while_kw: |
1533 end_tokens_expected++; | |
1534 promptflag--; | 1467 promptflag--; |
1535 lexer_flags.looping++; | 1468 lexer_flags.looping++; |
1536 break; | 1469 break; |
1537 | 1470 |
1538 case do_kw: | 1471 case do_kw: |
1542 break; | 1475 break; |
1543 | 1476 |
1544 case try_kw: | 1477 case try_kw: |
1545 case unwind_protect_kw: | 1478 case unwind_protect_kw: |
1546 lexer_flags.at_beginning_of_statement = true; | 1479 lexer_flags.at_beginning_of_statement = true; |
1547 end_tokens_expected++; | |
1548 promptflag--; | 1480 promptflag--; |
1549 break; | 1481 break; |
1550 | 1482 |
1551 case if_kw: | 1483 case if_kw: |
1552 case switch_kw: | 1484 case switch_kw: |
1553 end_tokens_expected++; | |
1554 promptflag--; | 1485 promptflag--; |
1555 break; | 1486 break; |
1556 | 1487 |
1557 case function_kw: | 1488 case function_kw: |
1558 { | 1489 promptflag--; |
1559 if (lexer_flags.defining_func) | 1490 |
1560 { | 1491 lexer_flags.defining_func = true; |
1561 if (reading_fcn_file) | 1492 lexer_flags.parsed_function_name = false; |
1562 { | 1493 |
1563 if (lexer_flags.parsing_nested_function) | 1494 if (! (reading_fcn_file || reading_script_file)) |
1564 { | 1495 input_line_number = 1; |
1565 BEGIN (NESTED_FUNCTION_END); | |
1566 | |
1567 yylval.tok_val = new token (token::function_end, l, c); | |
1568 token_stack.push (yylval.tok_val); | |
1569 | |
1570 lexer_flags.at_beginning_of_statement = true; | |
1571 | |
1572 return END; | |
1573 } | |
1574 else | |
1575 { | |
1576 prep_for_nested_function (); | |
1577 | |
1578 return FCN; | |
1579 } | |
1580 } | |
1581 else | |
1582 { | |
1583 error ("nested functions not implemented in this context"); | |
1584 | |
1585 if ((reading_fcn_file || reading_script_file) | |
1586 && ! curr_fcn_file_name.empty ()) | |
1587 error ("near line %d of file `%s.m'", | |
1588 input_line_number, curr_fcn_file_name.c_str ()); | |
1589 else | |
1590 error ("near line %d", input_line_number); | |
1591 | |
1592 return LEXICAL_ERROR; | |
1593 } | |
1594 } | |
1595 else | |
1596 prep_for_function (); | |
1597 } | |
1598 break; | 1496 break; |
1599 | 1497 |
1600 case magic_file_kw: | 1498 case magic_file_kw: |
1601 { | 1499 { |
1602 if ((reading_fcn_file || reading_script_file) | 1500 if ((reading_fcn_file || reading_script_file) |
3252 looping = 0; | 3150 looping = 0; |
3253 | 3151 |
3254 // Not initially defining a function. | 3152 // Not initially defining a function. |
3255 defining_func = false; | 3153 defining_func = false; |
3256 parsed_function_name = false; | 3154 parsed_function_name = false; |
3257 parsing_nested_function = 0; | |
3258 parsing_class_method = false; | 3155 parsing_class_method = false; |
3259 | 3156 |
3260 // Not initiallly looking at a function handle. | 3157 // Not initiallly looking at a function handle. |
3261 looking_at_function_handle = 0; | 3158 looking_at_function_handle = 0; |
3262 | 3159 |
3339 | 3236 |
3340 return retval; | 3237 return retval; |
3341 } | 3238 } |
3342 | 3239 |
3343 void | 3240 void |
3344 prep_lexer_for_script (void) | 3241 prep_lexer_for_script_file (void) |
3345 { | 3242 { |
3346 BEGIN (SCRIPT_FILE_BEGIN); | 3243 BEGIN (SCRIPT_FILE_BEGIN); |
3244 } | |
3245 | |
3246 void | |
3247 prep_lexer_for_function_file (void) | |
3248 { | |
3249 BEGIN (FUNCTION_FILE_BEGIN); | |
3347 } | 3250 } |
3348 | 3251 |
3349 static void | 3252 static void |
3350 maybe_warn_separator_insert (char sep) | 3253 maybe_warn_separator_insert (char sep) |
3351 { | 3254 { |
3493 case FCN_HANDLE: std::cerr << "FCN_HANDLE\n"; break; | 3396 case FCN_HANDLE: std::cerr << "FCN_HANDLE\n"; break; |
3494 case END_OF_INPUT: std::cerr << "END_OF_INPUT\n\n"; break; | 3397 case END_OF_INPUT: std::cerr << "END_OF_INPUT\n\n"; break; |
3495 case LEXICAL_ERROR: std::cerr << "LEXICAL_ERROR\n\n"; break; | 3398 case LEXICAL_ERROR: std::cerr << "LEXICAL_ERROR\n\n"; break; |
3496 case FCN: std::cerr << "FCN\n"; break; | 3399 case FCN: std::cerr << "FCN\n"; break; |
3497 case CLOSE_BRACE: std::cerr << "CLOSE_BRACE\n"; break; | 3400 case CLOSE_BRACE: std::cerr << "CLOSE_BRACE\n"; break; |
3401 case SCRIPT_FILE: std::cerr << "SCRIPT_FILE\n"; break; | |
3402 case FUNCTION_FILE: std::cerr << "FUNCTION_FILE\n"; break; | |
3498 case '\n': std::cerr << "\\n\n"; break; | 3403 case '\n': std::cerr << "\\n\n"; break; |
3499 case '\r': std::cerr << "\\r\n"; break; | 3404 case '\r': std::cerr << "\\r\n"; break; |
3500 case '\t': std::cerr << "TAB\n"; break; | 3405 case '\t': std::cerr << "TAB\n"; break; |
3501 default: | 3406 default: |
3502 { | 3407 { |
3530 | 3435 |
3531 case SCRIPT_FILE_BEGIN: | 3436 case SCRIPT_FILE_BEGIN: |
3532 std::cerr << "SCRIPT_FILE_BEGIN" << std::endl; | 3437 std::cerr << "SCRIPT_FILE_BEGIN" << std::endl; |
3533 break; | 3438 break; |
3534 | 3439 |
3535 case NESTED_FUNCTION_END: | 3440 case FUNCTION_FILE_BEGIN: |
3536 std::cerr << "NESTED_FUNCTION_END" << std::endl; | 3441 std::cerr << "FUNCTION_FILE_BEGIN" << std::endl; |
3537 break; | |
3538 | |
3539 case NESTED_FUNCTION_BEGIN: | |
3540 std::cerr << "NESTED_FUNCTION_BEGIN" << std::endl; | |
3541 break; | 3442 break; |
3542 | 3443 |
3543 default: | 3444 default: |
3544 std::cerr << "UNKNOWN START STATE!" << std::endl; | 3445 std::cerr << "UNKNOWN START STATE!" << std::endl; |
3545 break; | 3446 break; |