comparison src/lex.l @ 9474:25ed2d6aacf6

Parse nested functions more accurately.
author David Grundberg <individ@acc.umu.se>
date Thu, 30 Jul 2009 11:52:58 -0400
parents 29563379fa9b
children d9b25c5b8ee5
comparison
equal deleted inserted replaced
9473:833109a9f37f 9474:25ed2d6aacf6
26 26
27 %s COMMAND_START 27 %s COMMAND_START
28 %s MATRIX_START 28 %s MATRIX_START
29 29
30 %x SCRIPT_FILE_BEGIN 30 %x SCRIPT_FILE_BEGIN
31 31 %x FUNCTION_FILE_BEGIN
32 %x NESTED_FUNCTION_END
33 %x NESTED_FUNCTION_BEGIN
34 32
35 %{ 33 %{
36 #ifdef HAVE_CONFIG_H 34 #ifdef HAVE_CONFIG_H
37 #include <config.h> 35 #include <config.h>
38 #endif 36 #endif
280 static int text_yyinput (void); 278 static int text_yyinput (void);
281 static void xunput (char c, char *buf); 279 static void xunput (char c, char *buf);
282 static void fixup_column_count (char *s); 280 static void fixup_column_count (char *s);
283 static void do_comma_insert_check (void); 281 static void do_comma_insert_check (void);
284 static int is_keyword_token (const std::string& s); 282 static int is_keyword_token (const std::string& s);
285 static void prep_for_function (void);
286 static void prep_for_nested_function (void);
287 static int process_comment (bool start_in_block, bool& eof); 283 static int process_comment (bool start_in_block, bool& eof);
288 static bool match_any (char c, const char *s); 284 static bool match_any (char c, const char *s);
289 static bool next_token_is_sep_op (void); 285 static bool next_token_is_sep_op (void);
290 static bool next_token_is_bin_op (bool spc_prev); 286 static bool next_token_is_bin_op (bool spc_prev);
291 static bool next_token_is_postfix_unary_op (bool spc_prev); 287 static bool next_token_is_postfix_unary_op (bool spc_prev);
327 IDENT ([_$a-zA-Z][_$a-zA-Z0-9]*) 323 IDENT ([_$a-zA-Z][_$a-zA-Z0-9]*)
328 EXPON ([DdEe][+-]?{D}+) 324 EXPON ([DdEe][+-]?{D}+)
329 NUMBER (({D}+\.?{D}*{EXPON}?)|(\.{D}+{EXPON}?)|(0[xX][0-9a-fA-F]+)) 325 NUMBER (({D}+\.?{D}*{EXPON}?)|(\.{D}+{EXPON}?)|(0[xX][0-9a-fA-F]+))
330 %% 326 %%
331 327
328 %{
329 // Make script and function files start with a bogus token. This makes
330 // the parser go down a special path.
331 %}
332
332 <SCRIPT_FILE_BEGIN>. { 333 <SCRIPT_FILE_BEGIN>. {
333 LEXER_DEBUG ("<SCRIPT_FILE_BEGIN>."); 334 LEXER_DEBUG ("<SCRIPT_FILE_BEGIN>.");
334 335
335 BEGIN (INITIAL); 336 BEGIN (INITIAL);
336 xunput (yytext[0], yytext); 337 xunput (yytext[0], yytext);
337 COUNT_TOK_AND_RETURN (SCRIPT); 338 COUNT_TOK_AND_RETURN (SCRIPT_FILE);
338 } 339 }
339 340
340 <NESTED_FUNCTION_END>. { 341 <FUNCTION_FILE_BEGIN>. {
341 LEXER_DEBUG ("<NESTED_FUNCTION_END>."); 342 LEXER_DEBUG ("<FUNCTION_FILE_BEGIN>.");
342
343 BEGIN (NESTED_FUNCTION_BEGIN);
344 xunput (yytext[0], yytext);
345
346 lexer_flags.at_beginning_of_statement = true;
347
348 COUNT_TOK_AND_RETURN (';');
349 }
350
351 <NESTED_FUNCTION_BEGIN>. {
352 LEXER_DEBUG ("<NESTED_FUNCTION_BEGIN>.");
353 343
354 BEGIN (INITIAL); 344 BEGIN (INITIAL);
355 xunput (yytext[0], yytext); 345 xunput (yytext[0], yytext);
356 346 COUNT_TOK_AND_RETURN (FUNCTION_FILE);
357 prep_for_nested_function ();
358
359 COUNT_TOK_AND_RETURN (FCN);
360 } 347 }
361 348
362 %{ 349 %{
363 // Help and other command-style functions. 350 // Help and other command-style functions.
364 %} 351 %}
1002 %} 989 %}
1003 990
1004 . { 991 . {
1005 LEXER_DEBUG ("."); 992 LEXER_DEBUG (".");
1006 993
1007 // EOF happens here if we are parsing nested functions.
1008
1009 xunput (yytext[0], yytext); 994 xunput (yytext[0], yytext);
1010 995
1011 int c = text_yyinput (); 996 int c = text_yyinput ();
1012 997
1013 if (c != EOF) 998 if (c != EOF)
1056 { 1041 {
1057 // Start off on the right foot. 1042 // Start off on the right foot.
1058 BEGIN (INITIAL); 1043 BEGIN (INITIAL);
1059 1044
1060 parser_end_of_input = false; 1045 parser_end_of_input = false;
1061 end_tokens_expected = 0;
1062 1046
1063 while (! symtab_context.empty ()) 1047 while (! symtab_context.empty ())
1064 symtab_context.pop (); 1048 symtab_context.pop ();
1065
1066 symbol_table::reset_parent_scope ();
1067 1049
1068 // We do want a prompt by default. 1050 // We do want a prompt by default.
1069 promptflag = 1; 1051 promptflag = 1;
1070 1052
1071 // We are not in a block comment. 1053 // We are not in a block comment.
1377 1359
1378 void 1360 void
1379 delete_input_buffer (void *buf) 1361 delete_input_buffer (void *buf)
1380 { 1362 {
1381 delete_buffer (static_cast<YY_BUFFER_STATE> (buf)); 1363 delete_buffer (static_cast<YY_BUFFER_STATE> (buf));
1382 }
1383
1384 static void
1385 prep_for_function (void)
1386 {
1387 end_tokens_expected++;
1388
1389 promptflag--;
1390
1391 lexer_flags.defining_func = true;
1392 lexer_flags.parsed_function_name = false;
1393
1394 if (! (reading_fcn_file || reading_script_file))
1395 input_line_number = 1;
1396 }
1397
1398 static void
1399 prep_for_nested_function (void)
1400 {
1401 lexer_flags.parsing_nested_function = 1;
1402 help_buf.push (std::string ());
1403 prep_for_function ();
1404 // We're still only expecting one end token for this set of functions.
1405 end_tokens_expected--;
1406 yylval.tok_val = new token (input_line_number, current_input_column);
1407 token_stack.push (yylval.tok_val);
1408 } 1364 }
1409 1365
1410 static bool 1366 static bool
1411 inside_any_object_index (void) 1367 inside_any_object_index (void)
1412 { 1368 {
1464 if (inside_any_object_index () 1420 if (inside_any_object_index ()
1465 || (lexer_flags.defining_func 1421 || (lexer_flags.defining_func
1466 && ! (lexer_flags.looking_at_return_list 1422 && ! (lexer_flags.looking_at_return_list
1467 || lexer_flags.parsed_function_name))) 1423 || lexer_flags.parsed_function_name)))
1468 return 0; 1424 return 0;
1469 else 1425
1470 { 1426 yylval.tok_val = new token (token::simple_end, l, c);
1471 if (reading_fcn_file && end_tokens_expected == 1) 1427 lexer_flags.at_beginning_of_statement = true;
1472 return -1;
1473 else
1474 {
1475 yylval.tok_val = new token (token::simple_end, l, c);
1476 lexer_flags.at_beginning_of_statement = true;
1477 end_tokens_expected--;
1478 }
1479 }
1480 break; 1428 break;
1481 1429
1482 case end_try_catch_kw: 1430 case end_try_catch_kw:
1483 yylval.tok_val = new token (token::try_catch_end, l, c); 1431 yylval.tok_val = new token (token::try_catch_end, l, c);
1484 lexer_flags.at_beginning_of_statement = true; 1432 lexer_flags.at_beginning_of_statement = true;
1485 end_tokens_expected--;
1486 break; 1433 break;
1487 1434
1488 case end_unwind_protect_kw: 1435 case end_unwind_protect_kw:
1489 yylval.tok_val = new token (token::unwind_protect_end, l, c); 1436 yylval.tok_val = new token (token::unwind_protect_end, l, c);
1490 lexer_flags.at_beginning_of_statement = true; 1437 lexer_flags.at_beginning_of_statement = true;
1491 end_tokens_expected--;
1492 break; 1438 break;
1493 1439
1494 case endfor_kw: 1440 case endfor_kw:
1495 yylval.tok_val = new token (token::for_end, l, c); 1441 yylval.tok_val = new token (token::for_end, l, c);
1496 lexer_flags.at_beginning_of_statement = true; 1442 lexer_flags.at_beginning_of_statement = true;
1497 end_tokens_expected--;
1498 break; 1443 break;
1499 1444
1500 case endfunction_kw: 1445 case endfunction_kw:
1501 { 1446 yylval.tok_val = new token (token::function_end, l, c);
1502 if (reading_fcn_file && end_tokens_expected == 1) 1447 lexer_flags.at_beginning_of_statement = true;
1503 return -1;
1504 else
1505 {
1506 yylval.tok_val = new token (token::function_end, l, c);
1507 lexer_flags.at_beginning_of_statement = true;
1508 end_tokens_expected--;
1509 }
1510 }
1511 break; 1448 break;
1512 1449
1513 case endif_kw: 1450 case endif_kw:
1514 yylval.tok_val = new token (token::if_end, l, c); 1451 yylval.tok_val = new token (token::if_end, l, c);
1515 lexer_flags.at_beginning_of_statement = true; 1452 lexer_flags.at_beginning_of_statement = true;
1516 end_tokens_expected--;
1517 break; 1453 break;
1518 1454
1519 case endswitch_kw: 1455 case endswitch_kw:
1520 yylval.tok_val = new token (token::switch_end, l, c); 1456 yylval.tok_val = new token (token::switch_end, l, c);
1521 lexer_flags.at_beginning_of_statement = true; 1457 lexer_flags.at_beginning_of_statement = true;
1522 end_tokens_expected--;
1523 break; 1458 break;
1524 1459
1525 case endwhile_kw: 1460 case endwhile_kw:
1526 yylval.tok_val = new token (token::while_end, l, c); 1461 yylval.tok_val = new token (token::while_end, l, c);
1527 lexer_flags.at_beginning_of_statement = true; 1462 lexer_flags.at_beginning_of_statement = true;
1528 end_tokens_expected--;
1529 break; 1463 break;
1530 1464
1531 case for_kw: 1465 case for_kw:
1532 case while_kw: 1466 case while_kw:
1533 end_tokens_expected++;
1534 promptflag--; 1467 promptflag--;
1535 lexer_flags.looping++; 1468 lexer_flags.looping++;
1536 break; 1469 break;
1537 1470
1538 case do_kw: 1471 case do_kw:
1542 break; 1475 break;
1543 1476
1544 case try_kw: 1477 case try_kw:
1545 case unwind_protect_kw: 1478 case unwind_protect_kw:
1546 lexer_flags.at_beginning_of_statement = true; 1479 lexer_flags.at_beginning_of_statement = true;
1547 end_tokens_expected++;
1548 promptflag--; 1480 promptflag--;
1549 break; 1481 break;
1550 1482
1551 case if_kw: 1483 case if_kw:
1552 case switch_kw: 1484 case switch_kw:
1553 end_tokens_expected++;
1554 promptflag--; 1485 promptflag--;
1555 break; 1486 break;
1556 1487
1557 case function_kw: 1488 case function_kw:
1558 { 1489 promptflag--;
1559 if (lexer_flags.defining_func) 1490
1560 { 1491 lexer_flags.defining_func = true;
1561 if (reading_fcn_file) 1492 lexer_flags.parsed_function_name = false;
1562 { 1493
1563 if (lexer_flags.parsing_nested_function) 1494 if (! (reading_fcn_file || reading_script_file))
1564 { 1495 input_line_number = 1;
1565 BEGIN (NESTED_FUNCTION_END);
1566
1567 yylval.tok_val = new token (token::function_end, l, c);
1568 token_stack.push (yylval.tok_val);
1569
1570 lexer_flags.at_beginning_of_statement = true;
1571
1572 return END;
1573 }
1574 else
1575 {
1576 prep_for_nested_function ();
1577
1578 return FCN;
1579 }
1580 }
1581 else
1582 {
1583 error ("nested functions not implemented in this context");
1584
1585 if ((reading_fcn_file || reading_script_file)
1586 && ! curr_fcn_file_name.empty ())
1587 error ("near line %d of file `%s.m'",
1588 input_line_number, curr_fcn_file_name.c_str ());
1589 else
1590 error ("near line %d", input_line_number);
1591
1592 return LEXICAL_ERROR;
1593 }
1594 }
1595 else
1596 prep_for_function ();
1597 }
1598 break; 1496 break;
1599 1497
1600 case magic_file_kw: 1498 case magic_file_kw:
1601 { 1499 {
1602 if ((reading_fcn_file || reading_script_file) 1500 if ((reading_fcn_file || reading_script_file)
3252 looping = 0; 3150 looping = 0;
3253 3151
3254 // Not initially defining a function. 3152 // Not initially defining a function.
3255 defining_func = false; 3153 defining_func = false;
3256 parsed_function_name = false; 3154 parsed_function_name = false;
3257 parsing_nested_function = 0;
3258 parsing_class_method = false; 3155 parsing_class_method = false;
3259 3156
3260 // Not initiallly looking at a function handle. 3157 // Not initiallly looking at a function handle.
3261 looking_at_function_handle = 0; 3158 looking_at_function_handle = 0;
3262 3159
3339 3236
3340 return retval; 3237 return retval;
3341 } 3238 }
3342 3239
3343 void 3240 void
3344 prep_lexer_for_script (void) 3241 prep_lexer_for_script_file (void)
3345 { 3242 {
3346 BEGIN (SCRIPT_FILE_BEGIN); 3243 BEGIN (SCRIPT_FILE_BEGIN);
3244 }
3245
3246 void
3247 prep_lexer_for_function_file (void)
3248 {
3249 BEGIN (FUNCTION_FILE_BEGIN);
3347 } 3250 }
3348 3251
3349 static void 3252 static void
3350 maybe_warn_separator_insert (char sep) 3253 maybe_warn_separator_insert (char sep)
3351 { 3254 {
3493 case FCN_HANDLE: std::cerr << "FCN_HANDLE\n"; break; 3396 case FCN_HANDLE: std::cerr << "FCN_HANDLE\n"; break;
3494 case END_OF_INPUT: std::cerr << "END_OF_INPUT\n\n"; break; 3397 case END_OF_INPUT: std::cerr << "END_OF_INPUT\n\n"; break;
3495 case LEXICAL_ERROR: std::cerr << "LEXICAL_ERROR\n\n"; break; 3398 case LEXICAL_ERROR: std::cerr << "LEXICAL_ERROR\n\n"; break;
3496 case FCN: std::cerr << "FCN\n"; break; 3399 case FCN: std::cerr << "FCN\n"; break;
3497 case CLOSE_BRACE: std::cerr << "CLOSE_BRACE\n"; break; 3400 case CLOSE_BRACE: std::cerr << "CLOSE_BRACE\n"; break;
3401 case SCRIPT_FILE: std::cerr << "SCRIPT_FILE\n"; break;
3402 case FUNCTION_FILE: std::cerr << "FUNCTION_FILE\n"; break;
3498 case '\n': std::cerr << "\\n\n"; break; 3403 case '\n': std::cerr << "\\n\n"; break;
3499 case '\r': std::cerr << "\\r\n"; break; 3404 case '\r': std::cerr << "\\r\n"; break;
3500 case '\t': std::cerr << "TAB\n"; break; 3405 case '\t': std::cerr << "TAB\n"; break;
3501 default: 3406 default:
3502 { 3407 {
3530 3435
3531 case SCRIPT_FILE_BEGIN: 3436 case SCRIPT_FILE_BEGIN:
3532 std::cerr << "SCRIPT_FILE_BEGIN" << std::endl; 3437 std::cerr << "SCRIPT_FILE_BEGIN" << std::endl;
3533 break; 3438 break;
3534 3439
3535 case NESTED_FUNCTION_END: 3440 case FUNCTION_FILE_BEGIN:
3536 std::cerr << "NESTED_FUNCTION_END" << std::endl; 3441 std::cerr << "FUNCTION_FILE_BEGIN" << std::endl;
3537 break;
3538
3539 case NESTED_FUNCTION_BEGIN:
3540 std::cerr << "NESTED_FUNCTION_BEGIN" << std::endl;
3541 break; 3442 break;
3542 3443
3543 default: 3444 default:
3544 std::cerr << "UNKNOWN START STATE!" << std::endl; 3445 std::cerr << "UNKNOWN START STATE!" << std::endl;
3545 break; 3446 break;