Mercurial > octave
comparison libinterp/parse-tree/lex.ll @ 16119:b31eb56f4d84
maint: reorder class definitions in lex.ll
author | John W. Eaton <jwe@octave.org> |
---|---|
date | Tue, 26 Feb 2013 12:01:54 -0500 |
parents | f8e463523229 |
children | 4b68eb9b98b0 |
comparison
equal
deleted
inserted
replaced
16118:f8e463523229 | 16119:b31eb56f4d84 |
---|---|
1419 if (! eof) | 1419 if (! eof) |
1420 YY_FATAL_ERROR ("octave_read () in flex scanner failed"); | 1420 YY_FATAL_ERROR ("octave_read () in flex scanner failed"); |
1421 } | 1421 } |
1422 | 1422 |
1423 return status; | 1423 return status; |
1424 } | |
1425 | |
1426 DEFUN (__display_tokens__, args, nargout, | |
1427 "-*- texinfo -*-\n\ | |
1428 @deftypefn {Built-in Function} {} __display_tokens__ ()\n\ | |
1429 Query or set the internal variable that determines whether Octave's\n\ | |
1430 lexer displays tokens as they are read.\n\ | |
1431 @end deftypefn") | |
1432 { | |
1433 return SET_INTERNAL_VARIABLE (display_tokens); | |
1434 } | |
1435 | |
1436 DEFUN (__token_count__, , , | |
1437 "-*- texinfo -*-\n\ | |
1438 @deftypefn {Built-in Function} {} __token_count__ ()\n\ | |
1439 Number of language tokens processed since Octave startup.\n\ | |
1440 @end deftypefn") | |
1441 { | |
1442 return octave_value (Vtoken_count); | |
1443 } | |
1444 | |
1445 DEFUN (__lexer_debug_flag__, args, nargout, | |
1446 "-*- texinfo -*-\n\ | |
1447 @deftypefn {Built-in Function} {@var{old_val} =} __lexer_debug_flag__ (@var{new_val}))\n\ | |
1448 Undocumented internal function.\n\ | |
1449 @end deftypefn") | |
1450 { | |
1451 octave_value retval; | |
1452 | |
1453 retval = set_internal_variable (lexer_debug_flag, args, nargout, | |
1454 "__lexer_debug_flag__"); | |
1455 | |
1456 return retval; | |
1457 } | |
1458 | |
1459 class | |
1460 flex_stream_reader : public stream_reader | |
1461 { | |
1462 public: | |
1463 flex_stream_reader (lexical_feedback *l, char *buf_arg) | |
1464 : stream_reader (), lexer (l), buf (buf_arg) | |
1465 { } | |
1466 | |
1467 int getc (void) { return lexer->text_yyinput (); } | |
1468 int ungetc (int c) { lexer->xunput (c, buf); return 0; } | |
1469 | |
1470 private: | |
1471 | |
1472 // No copying! | |
1473 | |
1474 flex_stream_reader (const flex_stream_reader&); | |
1475 | |
1476 flex_stream_reader& operator = (const flex_stream_reader&); | |
1477 | |
1478 lexical_feedback *lexer; | |
1479 | |
1480 char *buf; | |
1481 }; | |
1482 | |
1483 lexical_feedback::~lexical_feedback (void) | |
1484 { | |
1485 // Clear out the stack of token info used to track line and | |
1486 // column numbers. | |
1487 | |
1488 while (! token_stack.empty ()) | |
1489 { | |
1490 delete token_stack.top (); | |
1491 token_stack.pop (); | |
1492 } | |
1493 } | |
1494 | |
1495 // GAG. | |
1496 // | |
1497 // If we're reading a matrix and the next character is '[', make sure | |
1498 // that we insert a comma ahead of it. | |
1499 | |
1500 void | |
1501 lexical_feedback::do_comma_insert_check (void) | |
1502 { | |
1503 bool spc_gobbled = (eat_continuation () != lexical_feedback::NO_WHITESPACE); | |
1504 | |
1505 int c = text_yyinput (); | |
1506 | |
1507 xunput (c, yytext); | |
1508 | |
1509 if (spc_gobbled) | |
1510 xunput (' ', yytext); | |
1511 | |
1512 do_comma_insert = (! looking_at_object_index.front () | |
1513 && bracketflag && c == '['); | |
1514 } | |
1515 | |
1516 int | |
1517 lexical_feedback::text_yyinput (void) | |
1518 { | |
1519 int c = yyinput (); | |
1520 | |
1521 if (lexer_debug_flag) | |
1522 { | |
1523 std::cerr << "I: "; | |
1524 display_character (c); | |
1525 std::cerr << std::endl; | |
1526 } | |
1527 | |
1528 // Convert CRLF into just LF and single CR into LF. | |
1529 | |
1530 if (c == '\r') | |
1531 { | |
1532 c = yyinput (); | |
1533 | |
1534 if (lexer_debug_flag) | |
1535 { | |
1536 std::cerr << "I: "; | |
1537 display_character (c); | |
1538 std::cerr << std::endl; | |
1539 } | |
1540 | |
1541 if (c != '\n') | |
1542 { | |
1543 xunput (c, yytext); | |
1544 c = '\n'; | |
1545 } | |
1546 } | |
1547 | |
1548 if (c == '\n') | |
1549 input_line_number++; | |
1550 | |
1551 return c; | |
1552 } | |
1553 | |
1554 void | |
1555 lexical_feedback::xunput (char c, char *buf) | |
1556 { | |
1557 if (lexer_debug_flag) | |
1558 { | |
1559 std::cerr << "U: "; | |
1560 display_character (c); | |
1561 std::cerr << std::endl; | |
1562 } | |
1563 | |
1564 if (c == '\n') | |
1565 input_line_number--; | |
1566 | |
1567 yyunput (c, buf); | |
1568 } | |
1569 | |
1570 // If we read some newlines, we need figure out what column we're | |
1571 // really looking at. | |
1572 | |
1573 void | |
1574 lexical_feedback::fixup_column_count (char *s) | |
1575 { | |
1576 char c; | |
1577 while ((c = *s++) != '\0') | |
1578 { | |
1579 if (c == '\n') | |
1580 { | |
1581 input_line_number++; | |
1582 current_input_column = 1; | |
1583 } | |
1584 else | |
1585 current_input_column++; | |
1586 } | |
1587 } | |
1588 | |
1589 bool | |
1590 lexical_feedback::inside_any_object_index (void) | |
1591 { | |
1592 bool retval = false; | |
1593 | |
1594 for (std::list<bool>::const_iterator i = looking_at_object_index.begin (); | |
1595 i != looking_at_object_index.end (); i++) | |
1596 { | |
1597 if (*i) | |
1598 { | |
1599 retval = true; | |
1600 break; | |
1601 } | |
1602 } | |
1603 | |
1604 return retval; | |
1605 } | |
1606 | |
1607 // Handle keywords. Return -1 if the keyword should be ignored. | |
1608 | |
1609 int | |
1610 lexical_feedback::is_keyword_token (const std::string& s) | |
1611 { | |
1612 int l = input_line_number; | |
1613 int c = current_input_column; | |
1614 | |
1615 int len = s.length (); | |
1616 | |
1617 const octave_kw *kw = octave_kw_hash::in_word_set (s.c_str (), len); | |
1618 | |
1619 if (kw) | |
1620 { | |
1621 yylval.tok_val = 0; | |
1622 | |
1623 switch (kw->kw_id) | |
1624 { | |
1625 case break_kw: | |
1626 case catch_kw: | |
1627 case continue_kw: | |
1628 case else_kw: | |
1629 case otherwise_kw: | |
1630 case return_kw: | |
1631 case unwind_protect_cleanup_kw: | |
1632 at_beginning_of_statement = true; | |
1633 break; | |
1634 | |
1635 case static_kw: | |
1636 if ((reading_fcn_file || reading_script_file | |
1637 || reading_classdef_file) | |
1638 && ! curr_fcn_file_full_name.empty ()) | |
1639 warning_with_id ("Octave:deprecated-keyword", | |
1640 "the 'static' keyword is obsolete and will be removed from a future version of Octave; please use 'persistent' instead; near line %d of file '%s'", | |
1641 input_line_number, | |
1642 curr_fcn_file_full_name.c_str ()); | |
1643 else | |
1644 warning_with_id ("Octave:deprecated-keyword", | |
1645 "the 'static' keyword is obsolete and will be removed from a future version of Octave; please use 'persistent' instead; near line %d", | |
1646 input_line_number); | |
1647 // fall through ... | |
1648 | |
1649 case persistent_kw: | |
1650 break; | |
1651 | |
1652 case case_kw: | |
1653 case elseif_kw: | |
1654 case global_kw: | |
1655 case until_kw: | |
1656 break; | |
1657 | |
1658 case end_kw: | |
1659 if (inside_any_object_index () | |
1660 || (! reading_classdef_file | |
1661 && (defining_func | |
1662 && ! (looking_at_return_list | |
1663 || parsed_function_name.top ())))) | |
1664 return 0; | |
1665 | |
1666 yylval.tok_val = new token (token::simple_end, l, c); | |
1667 at_beginning_of_statement = true; | |
1668 break; | |
1669 | |
1670 case end_try_catch_kw: | |
1671 yylval.tok_val = new token (token::try_catch_end, l, c); | |
1672 at_beginning_of_statement = true; | |
1673 break; | |
1674 | |
1675 case end_unwind_protect_kw: | |
1676 yylval.tok_val = new token (token::unwind_protect_end, l, c); | |
1677 at_beginning_of_statement = true; | |
1678 break; | |
1679 | |
1680 case endfor_kw: | |
1681 yylval.tok_val = new token (token::for_end, l, c); | |
1682 at_beginning_of_statement = true; | |
1683 break; | |
1684 | |
1685 case endfunction_kw: | |
1686 yylval.tok_val = new token (token::function_end, l, c); | |
1687 at_beginning_of_statement = true; | |
1688 break; | |
1689 | |
1690 case endif_kw: | |
1691 yylval.tok_val = new token (token::if_end, l, c); | |
1692 at_beginning_of_statement = true; | |
1693 break; | |
1694 | |
1695 case endparfor_kw: | |
1696 yylval.tok_val = new token (token::parfor_end, l, c); | |
1697 at_beginning_of_statement = true; | |
1698 break; | |
1699 | |
1700 case endswitch_kw: | |
1701 yylval.tok_val = new token (token::switch_end, l, c); | |
1702 at_beginning_of_statement = true; | |
1703 break; | |
1704 | |
1705 case endwhile_kw: | |
1706 yylval.tok_val = new token (token::while_end, l, c); | |
1707 at_beginning_of_statement = true; | |
1708 break; | |
1709 | |
1710 case endclassdef_kw: | |
1711 yylval.tok_val = new token (token::classdef_end, l, c); | |
1712 at_beginning_of_statement = true; | |
1713 break; | |
1714 | |
1715 case endenumeration_kw: | |
1716 yylval.tok_val = new token (token::enumeration_end, l, c); | |
1717 at_beginning_of_statement = true; | |
1718 break; | |
1719 | |
1720 case endevents_kw: | |
1721 yylval.tok_val = new token (token::events_end, l, c); | |
1722 at_beginning_of_statement = true; | |
1723 break; | |
1724 | |
1725 case endmethods_kw: | |
1726 yylval.tok_val = new token (token::methods_end, l, c); | |
1727 at_beginning_of_statement = true; | |
1728 break; | |
1729 | |
1730 case endproperties_kw: | |
1731 yylval.tok_val = new token (token::properties_end, l, c); | |
1732 at_beginning_of_statement = true; | |
1733 break; | |
1734 | |
1735 | |
1736 case for_kw: | |
1737 case parfor_kw: | |
1738 case while_kw: | |
1739 promptflag--; | |
1740 looping++; | |
1741 break; | |
1742 | |
1743 case do_kw: | |
1744 at_beginning_of_statement = true; | |
1745 promptflag--; | |
1746 looping++; | |
1747 break; | |
1748 | |
1749 case try_kw: | |
1750 case unwind_protect_kw: | |
1751 at_beginning_of_statement = true; | |
1752 promptflag--; | |
1753 break; | |
1754 | |
1755 case if_kw: | |
1756 case switch_kw: | |
1757 promptflag--; | |
1758 break; | |
1759 | |
1760 case get_kw: | |
1761 case set_kw: | |
1762 // 'get' and 'set' are keywords in classdef method | |
1763 // declarations. | |
1764 if (! maybe_classdef_get_set_method) | |
1765 return 0; | |
1766 break; | |
1767 | |
1768 case enumeration_kw: | |
1769 case events_kw: | |
1770 case methods_kw: | |
1771 case properties_kw: | |
1772 // 'properties', 'methods' and 'events' are keywords for | |
1773 // classdef blocks. | |
1774 if (! parsing_classdef) | |
1775 return 0; | |
1776 // fall through ... | |
1777 | |
1778 case classdef_kw: | |
1779 // 'classdef' is always a keyword. | |
1780 promptflag--; | |
1781 break; | |
1782 | |
1783 case function_kw: | |
1784 promptflag--; | |
1785 | |
1786 defining_func++; | |
1787 parsed_function_name.push (false); | |
1788 | |
1789 if (! (reading_fcn_file || reading_script_file | |
1790 || reading_classdef_file)) | |
1791 input_line_number = 1; | |
1792 break; | |
1793 | |
1794 case magic_file_kw: | |
1795 { | |
1796 if ((reading_fcn_file || reading_script_file | |
1797 || reading_classdef_file) | |
1798 && ! curr_fcn_file_full_name.empty ()) | |
1799 yylval.tok_val = new token (curr_fcn_file_full_name, l, c); | |
1800 else | |
1801 yylval.tok_val = new token ("stdin", l, c); | |
1802 } | |
1803 break; | |
1804 | |
1805 case magic_line_kw: | |
1806 yylval.tok_val = new token (static_cast<double> (l), "", l, c); | |
1807 break; | |
1808 | |
1809 default: | |
1810 panic_impossible (); | |
1811 } | |
1812 | |
1813 if (! yylval.tok_val) | |
1814 yylval.tok_val = new token (l, c); | |
1815 | |
1816 token_stack.push (yylval.tok_val); | |
1817 | |
1818 return kw->tok; | |
1819 } | |
1820 | |
1821 return 0; | |
1822 } | |
1823 | |
1824 bool | |
1825 lexical_feedback::is_variable (const std::string& name) | |
1826 { | |
1827 return (symbol_table::is_variable (name) | |
1828 || (pending_local_variables.find (name) | |
1829 != pending_local_variables.end ())); | |
1830 } | |
1831 | |
1832 std::string | |
1833 lexical_feedback::grab_block_comment (stream_reader& reader, bool& eof) | |
1834 { | |
1835 std::string buf; | |
1836 | |
1837 bool at_bol = true; | |
1838 bool look_for_marker = false; | |
1839 | |
1840 bool warned_incompatible = false; | |
1841 | |
1842 int c = 0; | |
1843 | |
1844 while ((c = reader.getc ()) != EOF) | |
1845 { | |
1846 current_input_column++; | |
1847 | |
1848 if (look_for_marker) | |
1849 { | |
1850 at_bol = false; | |
1851 look_for_marker = false; | |
1852 | |
1853 if (c == '{' || c == '}') | |
1854 { | |
1855 std::string tmp_buf (1, static_cast<char> (c)); | |
1856 | |
1857 int type = c; | |
1858 | |
1859 bool done = false; | |
1860 | |
1861 while ((c = reader.getc ()) != EOF && ! done) | |
1862 { | |
1863 current_input_column++; | |
1864 | |
1865 switch (c) | |
1866 { | |
1867 case ' ': | |
1868 case '\t': | |
1869 tmp_buf += static_cast<char> (c); | |
1870 break; | |
1871 | |
1872 case '\n': | |
1873 { | |
1874 current_input_column = 0; | |
1875 at_bol = true; | |
1876 done = true; | |
1877 | |
1878 if (type == '{') | |
1879 { | |
1880 block_comment_nesting_level++; | |
1881 promptflag--; | |
1882 } | |
1883 else | |
1884 { | |
1885 block_comment_nesting_level--; | |
1886 promptflag++; | |
1887 | |
1888 if (block_comment_nesting_level == 0) | |
1889 { | |
1890 buf += grab_comment_block (reader, true, eof); | |
1891 | |
1892 return buf; | |
1893 } | |
1894 } | |
1895 } | |
1896 break; | |
1897 | |
1898 default: | |
1899 at_bol = false; | |
1900 tmp_buf += static_cast<char> (c); | |
1901 buf += tmp_buf; | |
1902 done = true; | |
1903 break; | |
1904 } | |
1905 } | |
1906 } | |
1907 } | |
1908 | |
1909 if (at_bol && (c == '%' || c == '#')) | |
1910 { | |
1911 if (c == '#' && ! warned_incompatible) | |
1912 { | |
1913 warned_incompatible = true; | |
1914 maybe_gripe_matlab_incompatible_comment (c); | |
1915 } | |
1916 | |
1917 at_bol = false; | |
1918 look_for_marker = true; | |
1919 } | |
1920 else | |
1921 { | |
1922 buf += static_cast<char> (c); | |
1923 | |
1924 if (c == '\n') | |
1925 { | |
1926 current_input_column = 0; | |
1927 at_bol = true; | |
1928 } | |
1929 } | |
1930 } | |
1931 | |
1932 if (c == EOF) | |
1933 eof = true; | |
1934 | |
1935 return buf; | |
1936 } | |
1937 | |
1938 std::string | |
1939 lexical_feedback::grab_comment_block (stream_reader& reader, bool at_bol, | |
1940 bool& eof) | |
1941 { | |
1942 std::string buf; | |
1943 | |
1944 // TRUE means we are at the beginning of a comment block. | |
1945 bool begin_comment = false; | |
1946 | |
1947 // TRUE means we are currently reading a comment block. | |
1948 bool in_comment = false; | |
1949 | |
1950 bool warned_incompatible = false; | |
1951 | |
1952 int c = 0; | |
1953 | |
1954 while ((c = reader.getc ()) != EOF) | |
1955 { | |
1956 current_input_column++; | |
1957 | |
1958 if (begin_comment) | |
1959 { | |
1960 if (c == '%' || c == '#') | |
1961 { | |
1962 at_bol = false; | |
1963 continue; | |
1964 } | |
1965 else if (at_bol && c == '{') | |
1966 { | |
1967 std::string tmp_buf (1, static_cast<char> (c)); | |
1968 | |
1969 bool done = false; | |
1970 | |
1971 while ((c = reader.getc ()) != EOF && ! done) | |
1972 { | |
1973 current_input_column++; | |
1974 | |
1975 switch (c) | |
1976 { | |
1977 case ' ': | |
1978 case '\t': | |
1979 tmp_buf += static_cast<char> (c); | |
1980 break; | |
1981 | |
1982 case '\n': | |
1983 { | |
1984 current_input_column = 0; | |
1985 at_bol = true; | |
1986 done = true; | |
1987 | |
1988 block_comment_nesting_level++; | |
1989 promptflag--; | |
1990 | |
1991 buf += grab_block_comment (reader, eof); | |
1992 | |
1993 in_comment = false; | |
1994 | |
1995 if (eof) | |
1996 goto done; | |
1997 } | |
1998 break; | |
1999 | |
2000 default: | |
2001 at_bol = false; | |
2002 tmp_buf += static_cast<char> (c); | |
2003 buf += tmp_buf; | |
2004 done = true; | |
2005 break; | |
2006 } | |
2007 } | |
2008 } | |
2009 else | |
2010 { | |
2011 at_bol = false; | |
2012 begin_comment = false; | |
2013 } | |
2014 } | |
2015 | |
2016 if (in_comment) | |
2017 { | |
2018 buf += static_cast<char> (c); | |
2019 | |
2020 if (c == '\n') | |
2021 { | |
2022 at_bol = true; | |
2023 current_input_column = 0; | |
2024 in_comment = false; | |
2025 | |
2026 // FIXME -- bailing out here prevents things like | |
2027 // | |
2028 // octave> # comment | |
2029 // octave> x = 1 | |
2030 // | |
2031 // from failing at the command line, while still | |
2032 // allowing blocks of comments to be grabbed properly | |
2033 // for function doc strings. But only the first line of | |
2034 // a mult-line doc string will be picked up for | |
2035 // functions defined on the command line. We need a | |
2036 // better way of collecting these comments... | |
2037 if (! (reading_fcn_file || reading_script_file)) | |
2038 goto done; | |
2039 } | |
2040 } | |
2041 else | |
2042 { | |
2043 switch (c) | |
2044 { | |
2045 case ' ': | |
2046 case '\t': | |
2047 break; | |
2048 | |
2049 case '#': | |
2050 if (! warned_incompatible) | |
2051 { | |
2052 warned_incompatible = true; | |
2053 maybe_gripe_matlab_incompatible_comment (c); | |
2054 } | |
2055 // fall through... | |
2056 | |
2057 case '%': | |
2058 in_comment = true; | |
2059 begin_comment = true; | |
2060 break; | |
2061 | |
2062 default: | |
2063 current_input_column--; | |
2064 reader.ungetc (c); | |
2065 goto done; | |
2066 } | |
2067 } | |
2068 } | |
2069 | |
2070 done: | |
2071 | |
2072 if (c == EOF) | |
2073 eof = true; | |
2074 | |
2075 return buf; | |
2076 } | |
2077 | |
2078 int | |
2079 lexical_feedback::process_comment (bool start_in_block, bool& eof) | |
2080 { | |
2081 eof = false; | |
2082 | |
2083 std::string help_txt; | |
2084 | |
2085 if (! help_buf.empty ()) | |
2086 help_txt = help_buf.top (); | |
2087 | |
2088 flex_stream_reader flex_reader (this, yytext); | |
2089 | |
2090 // process_comment is only supposed to be called when we are not | |
2091 // initially looking at a block comment. | |
2092 | |
2093 std::string txt = start_in_block | |
2094 ? grab_block_comment (flex_reader, eof) | |
2095 : grab_comment_block (flex_reader, false, eof); | |
2096 | |
2097 if (lexer_debug_flag) | |
2098 std::cerr << "C: " << txt << std::endl; | |
2099 | |
2100 if (help_txt.empty () && nesting_level.none ()) | |
2101 { | |
2102 if (! help_buf.empty ()) | |
2103 help_buf.pop (); | |
2104 | |
2105 help_buf.push (txt); | |
2106 } | |
2107 | |
2108 octave_comment_buffer::append (txt); | |
2109 | |
2110 current_input_column = 1; | |
2111 quote_is_transpose = false; | |
2112 convert_spaces_to_comma = true; | |
2113 at_beginning_of_statement = true; | |
2114 | |
2115 if (YY_START == COMMAND_START) | |
2116 BEGIN (INITIAL); | |
2117 | |
2118 if (nesting_level.none ()) | |
2119 return '\n'; | |
2120 else if (nesting_level.is_bracket_or_brace ()) | |
2121 return ';'; | |
2122 else | |
2123 return 0; | |
2124 } | |
2125 | |
2126 // Recognize separators. If the separator is a CRLF pair, it is | |
2127 // replaced by a single LF. | |
2128 | |
2129 bool | |
2130 lexical_feedback::next_token_is_sep_op (void) | |
2131 { | |
2132 bool retval = false; | |
2133 | |
2134 int c = text_yyinput (); | |
2135 | |
2136 retval = match_any (c, ",;\n]"); | |
2137 | |
2138 xunput (c, yytext); | |
2139 | |
2140 return retval; | |
2141 } | |
2142 | |
2143 // Try to determine if the next token should be treated as a postfix | |
2144 // unary operator. This is ugly, but it seems to do the right thing. | |
2145 | |
2146 bool | |
2147 lexical_feedback::next_token_is_postfix_unary_op (bool spc_prev) | |
2148 { | |
2149 bool un_op = false; | |
2150 | |
2151 int c0 = text_yyinput (); | |
2152 | |
2153 if (c0 == '\'' && ! spc_prev) | |
2154 { | |
2155 un_op = true; | |
2156 } | |
2157 else if (c0 == '.') | |
2158 { | |
2159 int c1 = text_yyinput (); | |
2160 un_op = (c1 == '\''); | |
2161 xunput (c1, yytext); | |
2162 } | |
2163 else if (c0 == '+') | |
2164 { | |
2165 int c1 = text_yyinput (); | |
2166 un_op = (c1 == '+'); | |
2167 xunput (c1, yytext); | |
2168 } | |
2169 else if (c0 == '-') | |
2170 { | |
2171 int c1 = text_yyinput (); | |
2172 un_op = (c1 == '-'); | |
2173 xunput (c1, yytext); | |
2174 } | |
2175 | |
2176 xunput (c0, yytext); | |
2177 | |
2178 return un_op; | |
2179 } | |
2180 | |
2181 // Try to determine if the next token should be treated as a binary | |
2182 // operator. | |
2183 // | |
2184 // This kluge exists because whitespace is not always ignored inside | |
2185 // the square brackets that are used to create matrix objects (though | |
2186 // spacing only really matters in the cases that can be interpreted | |
2187 // either as binary ops or prefix unary ops: currently just +, -). | |
2188 // | |
2189 // Note that a line continuation directly following a + or - operator | |
2190 // (e.g., the characters '[' 'a' ' ' '+' '\' LFD 'b' ']') will be | |
2191 // parsed as a binary operator. | |
2192 | |
2193 bool | |
2194 lexical_feedback::next_token_is_bin_op (bool spc_prev) | |
2195 { | |
2196 bool bin_op = false; | |
2197 | |
2198 int c0 = text_yyinput (); | |
2199 | |
2200 switch (c0) | |
2201 { | |
2202 case '+': | |
2203 case '-': | |
2204 { | |
2205 int c1 = text_yyinput (); | |
2206 | |
2207 switch (c1) | |
2208 { | |
2209 case '+': | |
2210 case '-': | |
2211 // Unary ops, spacing doesn't matter. | |
2212 break; | |
2213 | |
2214 case '=': | |
2215 // Binary ops, spacing doesn't matter. | |
2216 bin_op = true; | |
2217 break; | |
2218 | |
2219 default: | |
2220 // Could be either, spacing matters. | |
2221 bin_op = looks_like_bin_op (spc_prev, c1); | |
2222 break; | |
2223 } | |
2224 | |
2225 xunput (c1, yytext); | |
2226 } | |
2227 break; | |
2228 | |
2229 case ':': | |
2230 case '/': | |
2231 case '\\': | |
2232 case '^': | |
2233 // Always a binary op (may also include /=, \=, and ^=). | |
2234 bin_op = true; | |
2235 break; | |
2236 | |
2237 // .+ .- ./ .\ .^ .* .** | |
2238 case '.': | |
2239 { | |
2240 int c1 = text_yyinput (); | |
2241 | |
2242 if (match_any (c1, "+-/\\^*")) | |
2243 // Always a binary op (may also include .+=, .-=, ./=, ...). | |
2244 bin_op = true; | |
2245 else if (! isdigit (c1) && c1 != ' ' && c1 != '\t' && c1 != '.') | |
2246 // A structure element reference is a binary op. | |
2247 bin_op = true; | |
2248 | |
2249 xunput (c1, yytext); | |
2250 } | |
2251 break; | |
2252 | |
2253 // = == & && | || * ** | |
2254 case '=': | |
2255 case '&': | |
2256 case '|': | |
2257 case '*': | |
2258 // Always a binary op (may also include ==, &&, ||, **). | |
2259 bin_op = true; | |
2260 break; | |
2261 | |
2262 // < <= <> > >= | |
2263 case '<': | |
2264 case '>': | |
2265 // Always a binary op (may also include <=, <>, >=). | |
2266 bin_op = true; | |
2267 break; | |
2268 | |
2269 // ~= != | |
2270 case '~': | |
2271 case '!': | |
2272 { | |
2273 int c1 = text_yyinput (); | |
2274 | |
2275 // ~ and ! can be unary ops, so require following =. | |
2276 if (c1 == '=') | |
2277 bin_op = true; | |
2278 | |
2279 xunput (c1, yytext); | |
2280 } | |
2281 break; | |
2282 | |
2283 default: | |
2284 break; | |
2285 } | |
2286 | |
2287 xunput (c0, yytext); | |
2288 | |
2289 return bin_op; | |
2290 } | |
2291 | |
2292 // FIXME -- we need to handle block comments here. | |
2293 | |
2294 void | |
2295 lexical_feedback::scan_for_comments (const char *text) | |
2296 { | |
2297 std::string comment_buf; | |
2298 | |
2299 bool in_comment = false; | |
2300 bool beginning_of_comment = false; | |
2301 | |
2302 int len = strlen (text); | |
2303 int i = 0; | |
2304 | |
2305 while (i < len) | |
2306 { | |
2307 char c = text[i++]; | |
2308 | |
2309 switch (c) | |
2310 { | |
2311 case '%': | |
2312 case '#': | |
2313 if (in_comment) | |
2314 { | |
2315 if (! beginning_of_comment) | |
2316 comment_buf += static_cast<char> (c); | |
2317 } | |
2318 else | |
2319 { | |
2320 maybe_gripe_matlab_incompatible_comment (c); | |
2321 in_comment = true; | |
2322 beginning_of_comment = true; | |
2323 } | |
2324 break; | |
2325 | |
2326 case '\n': | |
2327 if (in_comment) | |
2328 { | |
2329 comment_buf += static_cast<char> (c); | |
2330 octave_comment_buffer::append (comment_buf); | |
2331 comment_buf.resize (0); | |
2332 in_comment = false; | |
2333 beginning_of_comment = false; | |
2334 } | |
2335 break; | |
2336 | |
2337 default: | |
2338 if (in_comment) | |
2339 { | |
2340 comment_buf += static_cast<char> (c); | |
2341 beginning_of_comment = false; | |
2342 } | |
2343 break; | |
2344 } | |
2345 } | |
2346 | |
2347 if (! comment_buf.empty ()) | |
2348 octave_comment_buffer::append (comment_buf); | |
2349 } | |
2350 | |
2351 // Discard whitespace, including comments and continuations. | |
2352 | |
2353 // FIXME -- we need to handle block comments here. | |
2354 | |
2355 int | |
2356 lexical_feedback::eat_whitespace (void) | |
2357 { | |
2358 int retval = lexical_feedback::NO_WHITESPACE; | |
2359 | |
2360 std::string comment_buf; | |
2361 | |
2362 bool in_comment = false; | |
2363 bool beginning_of_comment = false; | |
2364 | |
2365 int c = 0; | |
2366 | |
2367 while ((c = text_yyinput ()) != EOF) | |
2368 { | |
2369 current_input_column++; | |
2370 | |
2371 switch (c) | |
2372 { | |
2373 case ' ': | |
2374 case '\t': | |
2375 if (in_comment) | |
2376 { | |
2377 comment_buf += static_cast<char> (c); | |
2378 beginning_of_comment = false; | |
2379 } | |
2380 retval |= lexical_feedback::SPACE_OR_TAB; | |
2381 break; | |
2382 | |
2383 case '\n': | |
2384 retval |= lexical_feedback::NEWLINE; | |
2385 if (in_comment) | |
2386 { | |
2387 comment_buf += static_cast<char> (c); | |
2388 octave_comment_buffer::append (comment_buf); | |
2389 comment_buf.resize (0); | |
2390 in_comment = false; | |
2391 beginning_of_comment = false; | |
2392 } | |
2393 current_input_column = 0; | |
2394 break; | |
2395 | |
2396 case '#': | |
2397 case '%': | |
2398 if (in_comment) | |
2399 { | |
2400 if (! beginning_of_comment) | |
2401 comment_buf += static_cast<char> (c); | |
2402 } | |
2403 else | |
2404 { | |
2405 maybe_gripe_matlab_incompatible_comment (c); | |
2406 in_comment = true; | |
2407 beginning_of_comment = true; | |
2408 } | |
2409 break; | |
2410 | |
2411 case '.': | |
2412 if (in_comment) | |
2413 { | |
2414 comment_buf += static_cast<char> (c); | |
2415 beginning_of_comment = false; | |
2416 break; | |
2417 } | |
2418 else | |
2419 { | |
2420 if (have_ellipsis_continuation ()) | |
2421 break; | |
2422 else | |
2423 goto done; | |
2424 } | |
2425 | |
2426 case '\\': | |
2427 if (in_comment) | |
2428 { | |
2429 comment_buf += static_cast<char> (c); | |
2430 beginning_of_comment = false; | |
2431 break; | |
2432 } | |
2433 else | |
2434 { | |
2435 if (have_continuation ()) | |
2436 break; | |
2437 else | |
2438 goto done; | |
2439 } | |
2440 | |
2441 default: | |
2442 if (in_comment) | |
2443 { | |
2444 comment_buf += static_cast<char> (c); | |
2445 beginning_of_comment = false; | |
2446 break; | |
2447 } | |
2448 else | |
2449 goto done; | |
2450 } | |
2451 } | |
2452 | |
2453 if (! comment_buf.empty ()) | |
2454 octave_comment_buffer::append (comment_buf); | |
2455 | |
2456 done: | |
2457 xunput (c, yytext); | |
2458 current_input_column--; | |
2459 return retval; | |
2460 } | |
2461 | |
2462 static inline bool | |
2463 looks_like_hex (const char *s, int len) | |
2464 { | |
2465 return (len > 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')); | |
2466 } | |
2467 | |
2468 void | |
2469 lexical_feedback::handle_number (void) | |
2470 { | |
2471 double value = 0.0; | |
2472 int nread = 0; | |
2473 | |
2474 if (looks_like_hex (yytext, strlen (yytext))) | |
2475 { | |
2476 unsigned long ival; | |
2477 | |
2478 nread = sscanf (yytext, "%lx", &ival); | |
2479 | |
2480 value = static_cast<double> (ival); | |
2481 } | |
2482 else | |
2483 { | |
2484 char *tmp = strsave (yytext); | |
2485 | |
2486 char *idx = strpbrk (tmp, "Dd"); | |
2487 | |
2488 if (idx) | |
2489 *idx = 'e'; | |
2490 | |
2491 nread = sscanf (tmp, "%lf", &value); | |
2492 | |
2493 delete [] tmp; | |
2494 } | |
2495 | |
2496 // If yytext doesn't contain a valid number, we are in deep doo doo. | |
2497 | |
2498 assert (nread == 1); | |
2499 | |
2500 quote_is_transpose = true; | |
2501 convert_spaces_to_comma = true; | |
2502 looking_for_object_index = false; | |
2503 at_beginning_of_statement = false; | |
2504 | |
2505 yylval.tok_val = new token (value, yytext, input_line_number, | |
2506 current_input_column); | |
2507 | |
2508 token_stack.push (yylval.tok_val); | |
2509 | |
2510 current_input_column += yyleng; | |
2511 | |
2512 do_comma_insert_check (); | |
2513 } | |
2514 | |
2515 // We have seen a backslash and need to find out if it should be | |
2516 // treated as a continuation character. If so, this eats it, up to | |
2517 // and including the new line character. | |
2518 // | |
2519 // Match whitespace only, followed by a comment character or newline. | |
2520 // Once a comment character is found, discard all input until newline. | |
2521 // If non-whitespace characters are found before comment | |
2522 // characters, return 0. Otherwise, return 1. | |
2523 | |
2524 // FIXME -- we need to handle block comments here. | |
2525 | |
2526 bool | |
2527 lexical_feedback::have_continuation (bool trailing_comments_ok) | |
2528 { | |
2529 std::ostringstream buf; | |
2530 | |
2531 std::string comment_buf; | |
2532 | |
2533 bool in_comment = false; | |
2534 bool beginning_of_comment = false; | |
2535 | |
2536 int c = 0; | |
2537 | |
2538 while ((c = text_yyinput ()) != EOF) | |
2539 { | |
2540 buf << static_cast<char> (c); | |
2541 | |
2542 switch (c) | |
2543 { | |
2544 case ' ': | |
2545 case '\t': | |
2546 if (in_comment) | |
2547 { | |
2548 comment_buf += static_cast<char> (c); | |
2549 beginning_of_comment = false; | |
2550 } | |
2551 break; | |
2552 | |
2553 case '%': | |
2554 case '#': | |
2555 if (trailing_comments_ok) | |
2556 { | |
2557 if (in_comment) | |
2558 { | |
2559 if (! beginning_of_comment) | |
2560 comment_buf += static_cast<char> (c); | |
2561 } | |
2562 else | |
2563 { | |
2564 maybe_gripe_matlab_incompatible_comment (c); | |
2565 in_comment = true; | |
2566 beginning_of_comment = true; | |
2567 } | |
2568 } | |
2569 else | |
2570 goto cleanup; | |
2571 break; | |
2572 | |
2573 case '\n': | |
2574 if (in_comment) | |
2575 { | |
2576 comment_buf += static_cast<char> (c); | |
2577 octave_comment_buffer::append (comment_buf); | |
2578 } | |
2579 current_input_column = 0; | |
2580 promptflag--; | |
2581 gripe_matlab_incompatible_continuation (); | |
2582 return true; | |
2583 | |
2584 default: | |
2585 if (in_comment) | |
2586 { | |
2587 comment_buf += static_cast<char> (c); | |
2588 beginning_of_comment = false; | |
2589 } | |
2590 else | |
2591 goto cleanup; | |
2592 break; | |
2593 } | |
2594 } | |
2595 | |
2596 xunput (c, yytext); | |
2597 return false; | |
2598 | |
2599 cleanup: | |
2600 | |
2601 std::string s = buf.str (); | |
2602 | |
2603 int len = s.length (); | |
2604 while (len--) | |
2605 xunput (s[len], yytext); | |
2606 | |
2607 return false; | |
2608 } | |
2609 | |
2610 // We have seen a '.' and need to see if it is the start of a | |
2611 // continuation. If so, this eats it, up to and including the new | |
2612 // line character. | |
2613 | |
2614 bool | |
2615 lexical_feedback::have_ellipsis_continuation (bool trailing_comments_ok) | |
2616 { | |
2617 char c1 = text_yyinput (); | |
2618 if (c1 == '.') | |
2619 { | |
2620 char c2 = text_yyinput (); | |
2621 if (c2 == '.' && have_continuation (trailing_comments_ok)) | |
2622 return true; | |
2623 else | |
2624 { | |
2625 xunput (c2, yytext); | |
2626 xunput (c1, yytext); | |
2627 } | |
2628 } | |
2629 else | |
2630 xunput (c1, yytext); | |
2631 | |
2632 return false; | |
2633 } | |
2634 | |
2635 // See if we have a continuation line. If so, eat it and the leading | |
2636 // whitespace on the next line. | |
2637 | |
2638 int | |
2639 lexical_feedback::eat_continuation (void) | |
2640 { | |
2641 int retval = lexical_feedback::NO_WHITESPACE; | |
2642 | |
2643 int c = text_yyinput (); | |
2644 | |
2645 if ((c == '.' && have_ellipsis_continuation ()) | |
2646 || (c == '\\' && have_continuation ())) | |
2647 retval = eat_whitespace (); | |
2648 else | |
2649 xunput (c, yytext); | |
2650 | |
2651 return retval; | |
2652 } | |
2653 | |
2654 int | |
2655 lexical_feedback::handle_string (char delim) | |
2656 { | |
2657 std::ostringstream buf; | |
2658 | |
2659 int bos_line = input_line_number; | |
2660 int bos_col = current_input_column; | |
2661 | |
2662 int c; | |
2663 int escape_pending = 0; | |
2664 | |
2665 while ((c = text_yyinput ()) != EOF) | |
2666 { | |
2667 current_input_column++; | |
2668 | |
2669 if (c == '\\') | |
2670 { | |
2671 if (delim == '\'' || escape_pending) | |
2672 { | |
2673 buf << static_cast<char> (c); | |
2674 escape_pending = 0; | |
2675 } | |
2676 else | |
2677 { | |
2678 if (have_continuation (false)) | |
2679 escape_pending = 0; | |
2680 else | |
2681 { | |
2682 buf << static_cast<char> (c); | |
2683 escape_pending = 1; | |
2684 } | |
2685 } | |
2686 continue; | |
2687 } | |
2688 else if (c == '.') | |
2689 { | |
2690 if (delim == '\'' || ! have_ellipsis_continuation (false)) | |
2691 buf << static_cast<char> (c); | |
2692 } | |
2693 else if (c == '\n') | |
2694 { | |
2695 error ("unterminated string constant"); | |
2696 break; | |
2697 } | |
2698 else if (c == delim) | |
2699 { | |
2700 if (escape_pending) | |
2701 buf << static_cast<char> (c); | |
2702 else | |
2703 { | |
2704 c = text_yyinput (); | |
2705 if (c == delim) | |
2706 { | |
2707 buf << static_cast<char> (c); | |
2708 } | |
2709 else | |
2710 { | |
2711 std::string s; | |
2712 xunput (c, yytext); | |
2713 | |
2714 if (delim == '\'') | |
2715 s = buf.str (); | |
2716 else | |
2717 s = do_string_escapes (buf.str ()); | |
2718 | |
2719 quote_is_transpose = true; | |
2720 convert_spaces_to_comma = true; | |
2721 | |
2722 yylval.tok_val = new token (s, bos_line, bos_col); | |
2723 token_stack.push (yylval.tok_val); | |
2724 | |
2725 if (delim == '"') | |
2726 gripe_matlab_incompatible ("\" used as string delimiter"); | |
2727 else if (delim == '\'') | |
2728 gripe_single_quote_string (); | |
2729 | |
2730 looking_for_object_index = true; | |
2731 at_beginning_of_statement = false; | |
2732 | |
2733 return delim == '"' ? DQ_STRING : SQ_STRING; | |
2734 } | |
2735 } | |
2736 } | |
2737 else | |
2738 { | |
2739 buf << static_cast<char> (c); | |
2740 } | |
2741 | |
2742 escape_pending = 0; | |
2743 } | |
2744 | |
2745 return LEXICAL_ERROR; | |
2746 } | |
2747 | |
2748 bool | |
2749 lexical_feedback::next_token_is_assign_op (void) | |
2750 { | |
2751 bool retval = false; | |
2752 | |
2753 int c0 = text_yyinput (); | |
2754 | |
2755 switch (c0) | |
2756 { | |
2757 case '=': | |
2758 { | |
2759 int c1 = text_yyinput (); | |
2760 xunput (c1, yytext); | |
2761 if (c1 != '=') | |
2762 retval = true; | |
2763 } | |
2764 break; | |
2765 | |
2766 case '+': | |
2767 case '-': | |
2768 case '*': | |
2769 case '/': | |
2770 case '\\': | |
2771 case '&': | |
2772 case '|': | |
2773 { | |
2774 int c1 = text_yyinput (); | |
2775 xunput (c1, yytext); | |
2776 if (c1 == '=') | |
2777 retval = true; | |
2778 } | |
2779 break; | |
2780 | |
2781 case '.': | |
2782 { | |
2783 int c1 = text_yyinput (); | |
2784 if (match_any (c1, "+-*/\\")) | |
2785 { | |
2786 int c2 = text_yyinput (); | |
2787 xunput (c2, yytext); | |
2788 if (c2 == '=') | |
2789 retval = true; | |
2790 } | |
2791 xunput (c1, yytext); | |
2792 } | |
2793 break; | |
2794 | |
2795 case '>': | |
2796 { | |
2797 int c1 = text_yyinput (); | |
2798 if (c1 == '>') | |
2799 { | |
2800 int c2 = text_yyinput (); | |
2801 xunput (c2, yytext); | |
2802 if (c2 == '=') | |
2803 retval = true; | |
2804 } | |
2805 xunput (c1, yytext); | |
2806 } | |
2807 break; | |
2808 | |
2809 case '<': | |
2810 { | |
2811 int c1 = text_yyinput (); | |
2812 if (c1 == '<') | |
2813 { | |
2814 int c2 = text_yyinput (); | |
2815 xunput (c2, yytext); | |
2816 if (c2 == '=') | |
2817 retval = true; | |
2818 } | |
2819 xunput (c1, yytext); | |
2820 } | |
2821 break; | |
2822 | |
2823 default: | |
2824 break; | |
2825 } | |
2826 | |
2827 xunput (c0, yytext); | |
2828 | |
2829 return retval; | |
2830 } | |
2831 | |
2832 bool | |
2833 lexical_feedback::next_token_is_index_op (void) | |
2834 { | |
2835 int c = text_yyinput (); | |
2836 xunput (c, yytext); | |
2837 return c == '(' || c == '{'; | |
2838 } | |
2839 | |
2840 int | |
2841 lexical_feedback::handle_close_bracket (bool spc_gobbled, int bracket_type) | |
2842 { | |
2843 int retval = bracket_type; | |
2844 | |
2845 if (! nesting_level.none ()) | |
2846 { | |
2847 nesting_level.remove (); | |
2848 | |
2849 if (bracket_type == ']') | |
2850 bracketflag--; | |
2851 else if (bracket_type == '}') | |
2852 braceflag--; | |
2853 else | |
2854 panic_impossible (); | |
2855 } | |
2856 | |
2857 if (bracketflag == 0 && braceflag == 0) | |
2858 BEGIN (INITIAL); | |
2859 | |
2860 if (bracket_type == ']' | |
2861 && next_token_is_assign_op () | |
2862 && ! looking_at_return_list) | |
2863 { | |
2864 retval = CLOSE_BRACE; | |
2865 } | |
2866 else if ((bracketflag || braceflag) | |
2867 && convert_spaces_to_comma | |
2868 && (nesting_level.is_bracket () | |
2869 || (nesting_level.is_brace () | |
2870 && ! looking_at_object_index.front ()))) | |
2871 { | |
2872 bool index_op = next_token_is_index_op (); | |
2873 | |
2874 // Don't insert comma if we are looking at something like | |
2875 // | |
2876 // [x{i}{j}] or [x{i}(j)] | |
2877 // | |
2878 // but do if we are looking at | |
2879 // | |
2880 // [x{i} {j}] or [x{i} (j)] | |
2881 | |
2882 if (spc_gobbled || ! (bracket_type == '}' && index_op)) | |
2883 { | |
2884 bool bin_op = next_token_is_bin_op (spc_gobbled); | |
2885 | |
2886 bool postfix_un_op = next_token_is_postfix_unary_op (spc_gobbled); | |
2887 | |
2888 bool sep_op = next_token_is_sep_op (); | |
2889 | |
2890 if (! (postfix_un_op || bin_op || sep_op)) | |
2891 { | |
2892 maybe_warn_separator_insert (','); | |
2893 | |
2894 xunput (',', yytext); | |
2895 return retval; | |
2896 } | |
2897 } | |
2898 } | |
2899 | |
2900 quote_is_transpose = true; | |
2901 convert_spaces_to_comma = true; | |
2902 | |
2903 return retval; | |
2904 } | |
2905 | |
2906 void | |
2907 lexical_feedback::maybe_unput_comma (int spc_gobbled) | |
2908 { | |
2909 if (nesting_level.is_bracket () | |
2910 || (nesting_level.is_brace () | |
2911 && ! looking_at_object_index.front ())) | |
2912 { | |
2913 int bin_op = next_token_is_bin_op (spc_gobbled); | |
2914 | |
2915 int postfix_un_op = next_token_is_postfix_unary_op (spc_gobbled); | |
2916 | |
2917 int c1 = text_yyinput (); | |
2918 int c2 = text_yyinput (); | |
2919 | |
2920 xunput (c2, yytext); | |
2921 xunput (c1, yytext); | |
2922 | |
2923 int sep_op = next_token_is_sep_op (); | |
2924 | |
2925 int dot_op = (c1 == '.' | |
2926 && (isalpha (c2) || isspace (c2) || c2 == '_')); | |
2927 | |
2928 if (postfix_un_op || bin_op || sep_op || dot_op) | |
2929 return; | |
2930 | |
2931 int index_op = (c1 == '(' || c1 == '{'); | |
2932 | |
2933 // If there is no space before the indexing op, we don't insert | |
2934 // a comma. | |
2935 | |
2936 if (index_op && ! spc_gobbled) | |
2937 return; | |
2938 | |
2939 maybe_warn_separator_insert (','); | |
2940 | |
2941 xunput (',', yytext); | |
2942 } | |
2943 } | |
2944 | |
2945 bool | |
2946 lexical_feedback::next_token_can_follow_bin_op (void) | |
2947 { | |
2948 std::stack<char> buf; | |
2949 | |
2950 int c = EOF; | |
2951 | |
2952 // Skip whitespace in current statement on current line | |
2953 while (true) | |
2954 { | |
2955 c = text_yyinput (); | |
2956 | |
2957 buf.push (c); | |
2958 | |
2959 if (match_any (c, ",;\n") || (c != ' ' && c != '\t')) | |
2960 break; | |
2961 } | |
2962 | |
2963 // Restore input. | |
2964 while (! buf.empty ()) | |
2965 { | |
2966 xunput (buf.top (), yytext); | |
2967 | |
2968 buf.pop (); | |
2969 } | |
2970 | |
2971 return (isalnum (c) || match_any (c, "!\"'(-[_{~")); | |
2972 } | |
2973 | |
2974 static bool | |
2975 can_be_command (const std::string& tok) | |
2976 { | |
2977 // Don't allow these names to be treated as commands to avoid | |
2978 // surprises when parsing things like "NaN ^2". | |
2979 | |
2980 return ! (tok == "e" | |
2981 || tok == "I" || tok == "i" | |
2982 || tok == "J" || tok == "j" | |
2983 || tok == "Inf" || tok == "inf" | |
2984 || tok == "NaN" || tok == "nan"); | |
2985 } | |
2986 | |
2987 bool | |
2988 lexical_feedback::looks_like_command_arg (void) | |
2989 { | |
2990 bool retval = true; | |
2991 | |
2992 int c0 = text_yyinput (); | |
2993 | |
2994 switch (c0) | |
2995 { | |
2996 // = == | |
2997 case '=': | |
2998 { | |
2999 int c1 = text_yyinput (); | |
3000 | |
3001 if (c1 == '=') | |
3002 { | |
3003 int c2 = text_yyinput (); | |
3004 | |
3005 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t') | |
3006 && next_token_can_follow_bin_op ()) | |
3007 retval = false; | |
3008 | |
3009 xunput (c2, yytext); | |
3010 } | |
3011 else | |
3012 retval = false; | |
3013 | |
3014 xunput (c1, yytext); | |
3015 } | |
3016 break; | |
3017 | |
3018 case '(': | |
3019 case '{': | |
3020 // Indexing. | |
3021 retval = false; | |
3022 break; | |
3023 | |
3024 case '\n': | |
3025 // EOL. | |
3026 break; | |
3027 | |
3028 case '\'': | |
3029 case '"': | |
3030 // Beginning of a character string. | |
3031 break; | |
3032 | |
3033 // + - ++ -- += -= | |
3034 case '+': | |
3035 case '-': | |
3036 { | |
3037 int c1 = text_yyinput (); | |
3038 | |
3039 switch (c1) | |
3040 { | |
3041 case '\n': | |
3042 // EOL. | |
3043 case '+': | |
3044 case '-': | |
3045 // Unary ops, spacing doesn't matter. | |
3046 break; | |
3047 | |
3048 case '\t': | |
3049 case ' ': | |
3050 { | |
3051 if (next_token_can_follow_bin_op ()) | |
3052 retval = false; | |
3053 } | |
3054 break; | |
3055 | |
3056 case '=': | |
3057 { | |
3058 int c2 = text_yyinput (); | |
3059 | |
3060 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t') | |
3061 && next_token_can_follow_bin_op ()) | |
3062 retval = false; | |
3063 | |
3064 xunput (c2, yytext); | |
3065 } | |
3066 break; | |
3067 } | |
3068 | |
3069 xunput (c1, yytext); | |
3070 } | |
3071 break; | |
3072 | |
3073 case ':': | |
3074 case '/': | |
3075 case '\\': | |
3076 case '^': | |
3077 { | |
3078 int c1 = text_yyinput (); | |
3079 | |
3080 if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t') | |
3081 && next_token_can_follow_bin_op ()) | |
3082 retval = false; | |
3083 | |
3084 xunput (c1, yytext); | |
3085 } | |
3086 break; | |
3087 | |
3088 // .+ .- ./ .\ .^ .* .** | |
3089 case '.': | |
3090 { | |
3091 int c1 = text_yyinput (); | |
3092 | |
3093 if (match_any (c1, "+-/\\^*")) | |
3094 { | |
3095 int c2 = text_yyinput (); | |
3096 | |
3097 if (c2 == '=') | |
3098 { | |
3099 int c3 = text_yyinput (); | |
3100 | |
3101 if (! match_any (c3, ",;\n") && (c3 == ' ' || c3 == '\t') | |
3102 && next_token_can_follow_bin_op ()) | |
3103 retval = false; | |
3104 | |
3105 xunput (c3, yytext); | |
3106 } | |
3107 else if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t') | |
3108 && next_token_can_follow_bin_op ()) | |
3109 retval = false; | |
3110 | |
3111 xunput (c2, yytext); | |
3112 } | |
3113 else if (! match_any (c1, ",;\n") | |
3114 && (! isdigit (c1) && c1 != ' ' && c1 != '\t' | |
3115 && c1 != '.')) | |
3116 { | |
3117 // Structure reference. FIXME -- is this a complete check? | |
3118 | |
3119 retval = false; | |
3120 } | |
3121 | |
3122 xunput (c1, yytext); | |
3123 } | |
3124 break; | |
3125 | |
3126 // & && | || * ** | |
3127 case '&': | |
3128 case '|': | |
3129 case '*': | |
3130 { | |
3131 int c1 = text_yyinput (); | |
3132 | |
3133 if (c1 == c0) | |
3134 { | |
3135 int c2 = text_yyinput (); | |
3136 | |
3137 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t') | |
3138 && next_token_can_follow_bin_op ()) | |
3139 retval = false; | |
3140 | |
3141 xunput (c2, yytext); | |
3142 } | |
3143 else if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t') | |
3144 && next_token_can_follow_bin_op ()) | |
3145 retval = false; | |
3146 | |
3147 xunput (c1, yytext); | |
3148 } | |
3149 break; | |
3150 | |
3151 // < <= > >= | |
3152 case '<': | |
3153 case '>': | |
3154 { | |
3155 int c1 = text_yyinput (); | |
3156 | |
3157 if (c1 == '=') | |
3158 { | |
3159 int c2 = text_yyinput (); | |
3160 | |
3161 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t') | |
3162 && next_token_can_follow_bin_op ()) | |
3163 retval = false; | |
3164 | |
3165 xunput (c2, yytext); | |
3166 } | |
3167 else if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t') | |
3168 && next_token_can_follow_bin_op ()) | |
3169 retval = false; | |
3170 | |
3171 xunput (c1, yytext); | |
3172 } | |
3173 break; | |
3174 | |
3175 // ~= != | |
3176 case '~': | |
3177 case '!': | |
3178 { | |
3179 int c1 = text_yyinput (); | |
3180 | |
3181 // ~ and ! can be unary ops, so require following =. | |
3182 if (c1 == '=') | |
3183 { | |
3184 int c2 = text_yyinput (); | |
3185 | |
3186 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t') | |
3187 && next_token_can_follow_bin_op ()) | |
3188 retval = false; | |
3189 | |
3190 xunput (c2, yytext); | |
3191 } | |
3192 else if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t') | |
3193 && next_token_can_follow_bin_op ()) | |
3194 retval = false; | |
3195 | |
3196 xunput (c1, yytext); | |
3197 } | |
3198 break; | |
3199 | |
3200 default: | |
3201 break; | |
3202 } | |
3203 | |
3204 xunput (c0, yytext); | |
3205 | |
3206 return retval; | |
3207 } | |
3208 | |
3209 int | |
3210 lexical_feedback::handle_superclass_identifier (void) | |
3211 { | |
3212 eat_continuation (); | |
3213 | |
3214 std::string pkg; | |
3215 std::string meth = strip_trailing_whitespace (yytext); | |
3216 size_t pos = meth.find ("@"); | |
3217 std::string cls = meth.substr (pos).substr (1); | |
3218 meth = meth.substr (0, pos - 1); | |
3219 | |
3220 pos = cls.find ("."); | |
3221 if (pos != std::string::npos) | |
3222 { | |
3223 pkg = cls.substr (pos).substr (1); | |
3224 cls = cls.substr (0, pos - 1); | |
3225 } | |
3226 | |
3227 int kw_token = (is_keyword_token (meth) || is_keyword_token (cls) | |
3228 || is_keyword_token (pkg)); | |
3229 if (kw_token) | |
3230 { | |
3231 error ("method, class and package names may not be keywords"); | |
3232 return LEXICAL_ERROR; | |
3233 } | |
3234 | |
3235 yylval.tok_val | |
3236 = new token (meth.empty () ? 0 : &(symbol_table::insert (meth)), | |
3237 cls.empty () ? 0 : &(symbol_table::insert (cls)), | |
3238 pkg.empty () ? 0 : &(symbol_table::insert (pkg)), | |
3239 input_line_number, | |
3240 current_input_column); | |
3241 token_stack.push (yylval.tok_val); | |
3242 | |
3243 convert_spaces_to_comma = true; | |
3244 current_input_column += yyleng; | |
3245 | |
3246 return SUPERCLASSREF; | |
3247 } | |
3248 | |
3249 int | |
3250 lexical_feedback::handle_meta_identifier (void) | |
3251 { | |
3252 eat_continuation (); | |
3253 | |
3254 std::string pkg; | |
3255 std::string cls = strip_trailing_whitespace (yytext).substr (1); | |
3256 size_t pos = cls.find ("."); | |
3257 | |
3258 if (pos != std::string::npos) | |
3259 { | |
3260 pkg = cls.substr (pos).substr (1); | |
3261 cls = cls.substr (0, pos - 1); | |
3262 } | |
3263 | |
3264 int kw_token = is_keyword_token (cls) || is_keyword_token (pkg); | |
3265 if (kw_token) | |
3266 { | |
3267 error ("class and package names may not be keywords"); | |
3268 return LEXICAL_ERROR; | |
3269 } | |
3270 | |
3271 yylval.tok_val | |
3272 = new token (cls.empty () ? 0 : &(symbol_table::insert (cls)), | |
3273 pkg.empty () ? 0 : &(symbol_table::insert (pkg)), | |
3274 input_line_number, | |
3275 current_input_column); | |
3276 | |
3277 token_stack.push (yylval.tok_val); | |
3278 | |
3279 convert_spaces_to_comma = true; | |
3280 current_input_column += yyleng; | |
3281 | |
3282 return METAQUERY; | |
3283 } | |
3284 | |
3285 // Figure out exactly what kind of token to return when we have seen | |
3286 // an identifier. Handles keywords. Return -1 if the identifier | |
3287 // should be ignored. | |
3288 | |
3289 int | |
3290 lexical_feedback::handle_identifier (void) | |
3291 { | |
3292 bool at_bos = at_beginning_of_statement; | |
3293 | |
3294 std::string tok = strip_trailing_whitespace (yytext); | |
3295 | |
3296 int c = yytext[yyleng-1]; | |
3297 | |
3298 bool cont_is_spc = (eat_continuation () != lexical_feedback::NO_WHITESPACE); | |
3299 | |
3300 int spc_gobbled = (cont_is_spc || c == ' ' || c == '\t'); | |
3301 | |
3302 // If we are expecting a structure element, avoid recognizing | |
3303 // keywords and other special names and return STRUCT_ELT, which is | |
3304 // a string that is also a valid identifier. But first, we have to | |
3305 // decide whether to insert a comma. | |
3306 | |
3307 if (looking_at_indirect_ref) | |
3308 { | |
3309 do_comma_insert_check (); | |
3310 | |
3311 maybe_unput_comma (spc_gobbled); | |
3312 | |
3313 yylval.tok_val = new token (tok, input_line_number, | |
3314 current_input_column); | |
3315 | |
3316 token_stack.push (yylval.tok_val); | |
3317 | |
3318 quote_is_transpose = true; | |
3319 convert_spaces_to_comma = true; | |
3320 looking_for_object_index = true; | |
3321 | |
3322 current_input_column += yyleng; | |
3323 | |
3324 return STRUCT_ELT; | |
3325 } | |
3326 | |
3327 at_beginning_of_statement = false; | |
3328 | |
3329 // The is_keyword_token may reset | |
3330 // at_beginning_of_statement. For example, if it sees | |
3331 // an else token, then the next token is at the beginning of a | |
3332 // statement. | |
3333 | |
3334 int kw_token = is_keyword_token (tok); | |
3335 | |
3336 // If we found a keyword token, then the beginning_of_statement flag | |
3337 // is already set. Otherwise, we won't be at the beginning of a | |
3338 // statement. | |
3339 | |
3340 if (looking_at_function_handle) | |
3341 { | |
3342 if (kw_token) | |
3343 { | |
3344 error ("function handles may not refer to keywords"); | |
3345 | |
3346 return LEXICAL_ERROR; | |
3347 } | |
3348 else | |
3349 { | |
3350 yylval.tok_val = new token (tok, input_line_number, | |
3351 current_input_column); | |
3352 | |
3353 token_stack.push (yylval.tok_val); | |
3354 | |
3355 current_input_column += yyleng; | |
3356 quote_is_transpose = false; | |
3357 convert_spaces_to_comma = true; | |
3358 looking_for_object_index = true; | |
3359 | |
3360 return FCN_HANDLE; | |
3361 } | |
3362 } | |
3363 | |
3364 // If we have a regular keyword, return it. | |
3365 // Keywords can be followed by identifiers. | |
3366 | |
3367 if (kw_token) | |
3368 { | |
3369 if (kw_token >= 0) | |
3370 { | |
3371 current_input_column += yyleng; | |
3372 quote_is_transpose = false; | |
3373 convert_spaces_to_comma = true; | |
3374 looking_for_object_index = false; | |
3375 } | |
3376 | |
3377 return kw_token; | |
3378 } | |
3379 | |
3380 // See if we have a plot keyword (title, using, with, or clear). | |
3381 | |
3382 int c1 = text_yyinput (); | |
3383 | |
3384 bool next_tok_is_eq = false; | |
3385 if (c1 == '=') | |
3386 { | |
3387 int c2 = text_yyinput (); | |
3388 xunput (c2, yytext); | |
3389 | |
3390 if (c2 != '=') | |
3391 next_tok_is_eq = true; | |
3392 } | |
3393 | |
3394 xunput (c1, yytext); | |
3395 | |
3396 // Kluge alert. | |
3397 // | |
3398 // If we are looking at a text style function, set up to gobble its | |
3399 // arguments. | |
3400 // | |
3401 // If the following token is '=', or if we are parsing a function | |
3402 // return list or function parameter list, or if we are looking at | |
3403 // something like [ab,cd] = foo (), force the symbol to be inserted | |
3404 // as a variable in the current symbol table. | |
3405 | |
3406 if (! is_variable (tok)) | |
3407 { | |
3408 if (at_bos && spc_gobbled && can_be_command (tok) | |
3409 && looks_like_command_arg ()) | |
3410 { | |
3411 BEGIN (COMMAND_START); | |
3412 } | |
3413 else if (next_tok_is_eq | |
3414 || looking_at_decl_list | |
3415 || looking_at_return_list | |
3416 || (looking_at_parameter_list | |
3417 && ! looking_at_initializer_expression)) | |
3418 { | |
3419 symbol_table::force_variable (tok); | |
3420 } | |
3421 else if (looking_at_matrix_or_assign_lhs) | |
3422 { | |
3423 pending_local_variables.insert (tok); | |
3424 } | |
3425 } | |
3426 | |
3427 // Find the token in the symbol table. Beware the magic | |
3428 // transformation of the end keyword... | |
3429 | |
3430 if (tok == "end") | |
3431 tok = "__end__"; | |
3432 | |
3433 yylval.tok_val = new token (&(symbol_table::insert (tok)), | |
3434 input_line_number, | |
3435 current_input_column); | |
3436 | |
3437 token_stack.push (yylval.tok_val); | |
3438 | |
3439 // After seeing an identifer, it is ok to convert spaces to a comma | |
3440 // (if needed). | |
3441 | |
3442 convert_spaces_to_comma = true; | |
3443 | |
3444 if (! (next_tok_is_eq || YY_START == COMMAND_START)) | |
3445 { | |
3446 quote_is_transpose = true; | |
3447 | |
3448 do_comma_insert_check (); | |
3449 | |
3450 maybe_unput_comma (spc_gobbled); | |
3451 } | |
3452 | |
3453 current_input_column += yyleng; | |
3454 | |
3455 if (tok != "__end__") | |
3456 looking_for_object_index = true; | |
3457 | |
3458 return NAME; | |
3459 } | |
3460 | |
3461 void | |
3462 lexical_feedback::maybe_warn_separator_insert (char sep) | |
3463 { | |
3464 std::string nm = curr_fcn_file_full_name; | |
3465 | |
3466 if (nm.empty ()) | |
3467 warning_with_id ("Octave:separator-insert", | |
3468 "potential auto-insertion of '%c' near line %d", | |
3469 sep, input_line_number); | |
3470 else | |
3471 warning_with_id ("Octave:separator-insert", | |
3472 "potential auto-insertion of '%c' near line %d of file %s", | |
3473 sep, input_line_number, nm.c_str ()); | |
3474 } | |
3475 | |
3476 void | |
3477 lexical_feedback::gripe_single_quote_string (void) | |
3478 { | |
3479 std::string nm = curr_fcn_file_full_name; | |
3480 | |
3481 if (nm.empty ()) | |
3482 warning_with_id ("Octave:single-quote-string", | |
3483 "single quote delimited string near line %d", | |
3484 input_line_number); | |
3485 else | |
3486 warning_with_id ("Octave:single-quote-string", | |
3487 "single quote delimited string near line %d of file %s", | |
3488 input_line_number, nm.c_str ()); | |
3489 } | |
3490 | |
3491 void | |
3492 lexical_feedback::gripe_matlab_incompatible (const std::string& msg) | |
3493 { | |
3494 std::string nm = curr_fcn_file_full_name; | |
3495 | |
3496 if (nm.empty ()) | |
3497 warning_with_id ("Octave:matlab-incompatible", | |
3498 "potential Matlab compatibility problem: %s", | |
3499 msg.c_str ()); | |
3500 else | |
3501 warning_with_id ("Octave:matlab-incompatible", | |
3502 "potential Matlab compatibility problem: %s near line %d offile %s", | |
3503 msg.c_str (), input_line_number, nm.c_str ()); | |
3504 } | |
3505 | |
3506 void | |
3507 lexical_feedback::maybe_gripe_matlab_incompatible_comment (char c) | |
3508 { | |
3509 if (c == '#') | |
3510 gripe_matlab_incompatible ("# used as comment character"); | |
3511 } | |
3512 | |
3513 void | |
3514 lexical_feedback::gripe_matlab_incompatible_continuation (void) | |
3515 { | |
3516 gripe_matlab_incompatible ("\\ used as line continuation marker"); | |
3517 } | |
3518 | |
3519 void | |
3520 lexical_feedback::gripe_matlab_incompatible_operator (const std::string& op) | |
3521 { | |
3522 std::string t = op; | |
3523 int n = t.length (); | |
3524 if (t[n-1] == '\n') | |
3525 t.resize (n-1); | |
3526 gripe_matlab_incompatible (t + " used as operator"); | |
1424 } | 3527 } |
1425 | 3528 |
1426 void | 3529 void |
1427 lexical_feedback::display_token (int tok) | 3530 lexical_feedback::display_token (int tok) |
1428 { | 3531 { |
1590 display_state (YY_START); | 3693 display_state (YY_START); |
1591 | 3694 |
1592 std::cerr << "P: " << pattern << std::endl; | 3695 std::cerr << "P: " << pattern << std::endl; |
1593 std::cerr << "T: " << text << std::endl; | 3696 std::cerr << "T: " << text << std::endl; |
1594 } | 3697 } |
1595 | |
1596 DEFUN (__display_tokens__, args, nargout, | |
1597 "-*- texinfo -*-\n\ | |
1598 @deftypefn {Built-in Function} {} __display_tokens__ ()\n\ | |
1599 Query or set the internal variable that determines whether Octave's\n\ | |
1600 lexer displays tokens as they are read.\n\ | |
1601 @end deftypefn") | |
1602 { | |
1603 return SET_INTERNAL_VARIABLE (display_tokens); | |
1604 } | |
1605 | |
1606 DEFUN (__token_count__, , , | |
1607 "-*- texinfo -*-\n\ | |
1608 @deftypefn {Built-in Function} {} __token_count__ ()\n\ | |
1609 Number of language tokens processed since Octave startup.\n\ | |
1610 @end deftypefn") | |
1611 { | |
1612 return octave_value (Vtoken_count); | |
1613 } | |
1614 | |
1615 DEFUN (__lexer_debug_flag__, args, nargout, | |
1616 "-*- texinfo -*-\n\ | |
1617 @deftypefn {Built-in Function} {@var{old_val} =} __lexer_debug_flag__ (@var{new_val}))\n\ | |
1618 Undocumented internal function.\n\ | |
1619 @end deftypefn") | |
1620 { | |
1621 octave_value retval; | |
1622 | |
1623 retval = set_internal_variable (lexer_debug_flag, args, nargout, | |
1624 "__lexer_debug_flag__"); | |
1625 | |
1626 return retval; | |
1627 } | |
1628 | |
1629 class | |
1630 flex_stream_reader : public stream_reader | |
1631 { | |
1632 public: | |
1633 flex_stream_reader (lexical_feedback *l, char *buf_arg) | |
1634 : stream_reader (), lexer (l), buf (buf_arg) | |
1635 { } | |
1636 | |
1637 int getc (void) { return lexer->text_yyinput (); } | |
1638 int ungetc (int c) { lexer->xunput (c, buf); return 0; } | |
1639 | |
1640 private: | |
1641 | |
1642 // No copying! | |
1643 | |
1644 flex_stream_reader (const flex_stream_reader&); | |
1645 | |
1646 flex_stream_reader& operator = (const flex_stream_reader&); | |
1647 | |
1648 lexical_feedback *lexer; | |
1649 | |
1650 char *buf; | |
1651 }; | |
1652 | |
1653 lexical_feedback::~lexical_feedback (void) | |
1654 { | |
1655 // Clear out the stack of token info used to track line and | |
1656 // column numbers. | |
1657 | |
1658 while (! token_stack.empty ()) | |
1659 { | |
1660 delete token_stack.top (); | |
1661 token_stack.pop (); | |
1662 } | |
1663 } | |
1664 | |
1665 // GAG. | |
1666 // | |
1667 // If we're reading a matrix and the next character is '[', make sure | |
1668 // that we insert a comma ahead of it. | |
1669 | |
1670 void | |
1671 lexical_feedback::do_comma_insert_check (void) | |
1672 { | |
1673 bool spc_gobbled = (eat_continuation () != lexical_feedback::NO_WHITESPACE); | |
1674 | |
1675 int c = text_yyinput (); | |
1676 | |
1677 xunput (c, yytext); | |
1678 | |
1679 if (spc_gobbled) | |
1680 xunput (' ', yytext); | |
1681 | |
1682 do_comma_insert = (! looking_at_object_index.front () | |
1683 && bracketflag && c == '['); | |
1684 } | |
1685 | |
1686 int | |
1687 lexical_feedback::text_yyinput (void) | |
1688 { | |
1689 int c = yyinput (); | |
1690 | |
1691 if (lexer_debug_flag) | |
1692 { | |
1693 std::cerr << "I: "; | |
1694 display_character (c); | |
1695 std::cerr << std::endl; | |
1696 } | |
1697 | |
1698 // Convert CRLF into just LF and single CR into LF. | |
1699 | |
1700 if (c == '\r') | |
1701 { | |
1702 c = yyinput (); | |
1703 | |
1704 if (lexer_debug_flag) | |
1705 { | |
1706 std::cerr << "I: "; | |
1707 display_character (c); | |
1708 std::cerr << std::endl; | |
1709 } | |
1710 | |
1711 if (c != '\n') | |
1712 { | |
1713 xunput (c, yytext); | |
1714 c = '\n'; | |
1715 } | |
1716 } | |
1717 | |
1718 if (c == '\n') | |
1719 input_line_number++; | |
1720 | |
1721 return c; | |
1722 } | |
1723 | |
1724 void | |
1725 lexical_feedback::xunput (char c, char *buf) | |
1726 { | |
1727 if (lexer_debug_flag) | |
1728 { | |
1729 std::cerr << "U: "; | |
1730 display_character (c); | |
1731 std::cerr << std::endl; | |
1732 } | |
1733 | |
1734 if (c == '\n') | |
1735 input_line_number--; | |
1736 | |
1737 yyunput (c, buf); | |
1738 } | |
1739 | |
1740 // If we read some newlines, we need figure out what column we're | |
1741 // really looking at. | |
1742 | |
1743 void | |
1744 lexical_feedback::fixup_column_count (char *s) | |
1745 { | |
1746 char c; | |
1747 while ((c = *s++) != '\0') | |
1748 { | |
1749 if (c == '\n') | |
1750 { | |
1751 input_line_number++; | |
1752 current_input_column = 1; | |
1753 } | |
1754 else | |
1755 current_input_column++; | |
1756 } | |
1757 } | |
1758 | |
1759 bool | |
1760 lexical_feedback::inside_any_object_index (void) | |
1761 { | |
1762 bool retval = false; | |
1763 | |
1764 for (std::list<bool>::const_iterator i = looking_at_object_index.begin (); | |
1765 i != looking_at_object_index.end (); i++) | |
1766 { | |
1767 if (*i) | |
1768 { | |
1769 retval = true; | |
1770 break; | |
1771 } | |
1772 } | |
1773 | |
1774 return retval; | |
1775 } | |
1776 | |
1777 // Handle keywords. Return -1 if the keyword should be ignored. | |
1778 | |
1779 int | |
1780 lexical_feedback::is_keyword_token (const std::string& s) | |
1781 { | |
1782 int l = input_line_number; | |
1783 int c = current_input_column; | |
1784 | |
1785 int len = s.length (); | |
1786 | |
1787 const octave_kw *kw = octave_kw_hash::in_word_set (s.c_str (), len); | |
1788 | |
1789 if (kw) | |
1790 { | |
1791 yylval.tok_val = 0; | |
1792 | |
1793 switch (kw->kw_id) | |
1794 { | |
1795 case break_kw: | |
1796 case catch_kw: | |
1797 case continue_kw: | |
1798 case else_kw: | |
1799 case otherwise_kw: | |
1800 case return_kw: | |
1801 case unwind_protect_cleanup_kw: | |
1802 at_beginning_of_statement = true; | |
1803 break; | |
1804 | |
1805 case static_kw: | |
1806 if ((reading_fcn_file || reading_script_file | |
1807 || reading_classdef_file) | |
1808 && ! curr_fcn_file_full_name.empty ()) | |
1809 warning_with_id ("Octave:deprecated-keyword", | |
1810 "the 'static' keyword is obsolete and will be removed from a future version of Octave; please use 'persistent' instead; near line %d of file '%s'", | |
1811 input_line_number, | |
1812 curr_fcn_file_full_name.c_str ()); | |
1813 else | |
1814 warning_with_id ("Octave:deprecated-keyword", | |
1815 "the 'static' keyword is obsolete and will be removed from a future version of Octave; please use 'persistent' instead; near line %d", | |
1816 input_line_number); | |
1817 // fall through ... | |
1818 | |
1819 case persistent_kw: | |
1820 break; | |
1821 | |
1822 case case_kw: | |
1823 case elseif_kw: | |
1824 case global_kw: | |
1825 case until_kw: | |
1826 break; | |
1827 | |
1828 case end_kw: | |
1829 if (inside_any_object_index () | |
1830 || (! reading_classdef_file | |
1831 && (defining_func | |
1832 && ! (looking_at_return_list | |
1833 || parsed_function_name.top ())))) | |
1834 return 0; | |
1835 | |
1836 yylval.tok_val = new token (token::simple_end, l, c); | |
1837 at_beginning_of_statement = true; | |
1838 break; | |
1839 | |
1840 case end_try_catch_kw: | |
1841 yylval.tok_val = new token (token::try_catch_end, l, c); | |
1842 at_beginning_of_statement = true; | |
1843 break; | |
1844 | |
1845 case end_unwind_protect_kw: | |
1846 yylval.tok_val = new token (token::unwind_protect_end, l, c); | |
1847 at_beginning_of_statement = true; | |
1848 break; | |
1849 | |
1850 case endfor_kw: | |
1851 yylval.tok_val = new token (token::for_end, l, c); | |
1852 at_beginning_of_statement = true; | |
1853 break; | |
1854 | |
1855 case endfunction_kw: | |
1856 yylval.tok_val = new token (token::function_end, l, c); | |
1857 at_beginning_of_statement = true; | |
1858 break; | |
1859 | |
1860 case endif_kw: | |
1861 yylval.tok_val = new token (token::if_end, l, c); | |
1862 at_beginning_of_statement = true; | |
1863 break; | |
1864 | |
1865 case endparfor_kw: | |
1866 yylval.tok_val = new token (token::parfor_end, l, c); | |
1867 at_beginning_of_statement = true; | |
1868 break; | |
1869 | |
1870 case endswitch_kw: | |
1871 yylval.tok_val = new token (token::switch_end, l, c); | |
1872 at_beginning_of_statement = true; | |
1873 break; | |
1874 | |
1875 case endwhile_kw: | |
1876 yylval.tok_val = new token (token::while_end, l, c); | |
1877 at_beginning_of_statement = true; | |
1878 break; | |
1879 | |
1880 case endclassdef_kw: | |
1881 yylval.tok_val = new token (token::classdef_end, l, c); | |
1882 at_beginning_of_statement = true; | |
1883 break; | |
1884 | |
1885 case endenumeration_kw: | |
1886 yylval.tok_val = new token (token::enumeration_end, l, c); | |
1887 at_beginning_of_statement = true; | |
1888 break; | |
1889 | |
1890 case endevents_kw: | |
1891 yylval.tok_val = new token (token::events_end, l, c); | |
1892 at_beginning_of_statement = true; | |
1893 break; | |
1894 | |
1895 case endmethods_kw: | |
1896 yylval.tok_val = new token (token::methods_end, l, c); | |
1897 at_beginning_of_statement = true; | |
1898 break; | |
1899 | |
1900 case endproperties_kw: | |
1901 yylval.tok_val = new token (token::properties_end, l, c); | |
1902 at_beginning_of_statement = true; | |
1903 break; | |
1904 | |
1905 | |
1906 case for_kw: | |
1907 case parfor_kw: | |
1908 case while_kw: | |
1909 promptflag--; | |
1910 looping++; | |
1911 break; | |
1912 | |
1913 case do_kw: | |
1914 at_beginning_of_statement = true; | |
1915 promptflag--; | |
1916 looping++; | |
1917 break; | |
1918 | |
1919 case try_kw: | |
1920 case unwind_protect_kw: | |
1921 at_beginning_of_statement = true; | |
1922 promptflag--; | |
1923 break; | |
1924 | |
1925 case if_kw: | |
1926 case switch_kw: | |
1927 promptflag--; | |
1928 break; | |
1929 | |
1930 case get_kw: | |
1931 case set_kw: | |
1932 // 'get' and 'set' are keywords in classdef method | |
1933 // declarations. | |
1934 if (! maybe_classdef_get_set_method) | |
1935 return 0; | |
1936 break; | |
1937 | |
1938 case enumeration_kw: | |
1939 case events_kw: | |
1940 case methods_kw: | |
1941 case properties_kw: | |
1942 // 'properties', 'methods' and 'events' are keywords for | |
1943 // classdef blocks. | |
1944 if (! parsing_classdef) | |
1945 return 0; | |
1946 // fall through ... | |
1947 | |
1948 case classdef_kw: | |
1949 // 'classdef' is always a keyword. | |
1950 promptflag--; | |
1951 break; | |
1952 | |
1953 case function_kw: | |
1954 promptflag--; | |
1955 | |
1956 defining_func++; | |
1957 parsed_function_name.push (false); | |
1958 | |
1959 if (! (reading_fcn_file || reading_script_file | |
1960 || reading_classdef_file)) | |
1961 input_line_number = 1; | |
1962 break; | |
1963 | |
1964 case magic_file_kw: | |
1965 { | |
1966 if ((reading_fcn_file || reading_script_file | |
1967 || reading_classdef_file) | |
1968 && ! curr_fcn_file_full_name.empty ()) | |
1969 yylval.tok_val = new token (curr_fcn_file_full_name, l, c); | |
1970 else | |
1971 yylval.tok_val = new token ("stdin", l, c); | |
1972 } | |
1973 break; | |
1974 | |
1975 case magic_line_kw: | |
1976 yylval.tok_val = new token (static_cast<double> (l), "", l, c); | |
1977 break; | |
1978 | |
1979 default: | |
1980 panic_impossible (); | |
1981 } | |
1982 | |
1983 if (! yylval.tok_val) | |
1984 yylval.tok_val = new token (l, c); | |
1985 | |
1986 token_stack.push (yylval.tok_val); | |
1987 | |
1988 return kw->tok; | |
1989 } | |
1990 | |
1991 return 0; | |
1992 } | |
1993 | |
1994 bool | |
1995 lexical_feedback::is_variable (const std::string& name) | |
1996 { | |
1997 return (symbol_table::is_variable (name) | |
1998 || (pending_local_variables.find (name) | |
1999 != pending_local_variables.end ())); | |
2000 } | |
2001 | |
2002 std::string | |
2003 lexical_feedback::grab_block_comment (stream_reader& reader, bool& eof) | |
2004 { | |
2005 std::string buf; | |
2006 | |
2007 bool at_bol = true; | |
2008 bool look_for_marker = false; | |
2009 | |
2010 bool warned_incompatible = false; | |
2011 | |
2012 int c = 0; | |
2013 | |
2014 while ((c = reader.getc ()) != EOF) | |
2015 { | |
2016 current_input_column++; | |
2017 | |
2018 if (look_for_marker) | |
2019 { | |
2020 at_bol = false; | |
2021 look_for_marker = false; | |
2022 | |
2023 if (c == '{' || c == '}') | |
2024 { | |
2025 std::string tmp_buf (1, static_cast<char> (c)); | |
2026 | |
2027 int type = c; | |
2028 | |
2029 bool done = false; | |
2030 | |
2031 while ((c = reader.getc ()) != EOF && ! done) | |
2032 { | |
2033 current_input_column++; | |
2034 | |
2035 switch (c) | |
2036 { | |
2037 case ' ': | |
2038 case '\t': | |
2039 tmp_buf += static_cast<char> (c); | |
2040 break; | |
2041 | |
2042 case '\n': | |
2043 { | |
2044 current_input_column = 0; | |
2045 at_bol = true; | |
2046 done = true; | |
2047 | |
2048 if (type == '{') | |
2049 { | |
2050 block_comment_nesting_level++; | |
2051 promptflag--; | |
2052 } | |
2053 else | |
2054 { | |
2055 block_comment_nesting_level--; | |
2056 promptflag++; | |
2057 | |
2058 if (block_comment_nesting_level == 0) | |
2059 { | |
2060 buf += grab_comment_block (reader, true, eof); | |
2061 | |
2062 return buf; | |
2063 } | |
2064 } | |
2065 } | |
2066 break; | |
2067 | |
2068 default: | |
2069 at_bol = false; | |
2070 tmp_buf += static_cast<char> (c); | |
2071 buf += tmp_buf; | |
2072 done = true; | |
2073 break; | |
2074 } | |
2075 } | |
2076 } | |
2077 } | |
2078 | |
2079 if (at_bol && (c == '%' || c == '#')) | |
2080 { | |
2081 if (c == '#' && ! warned_incompatible) | |
2082 { | |
2083 warned_incompatible = true; | |
2084 maybe_gripe_matlab_incompatible_comment (c); | |
2085 } | |
2086 | |
2087 at_bol = false; | |
2088 look_for_marker = true; | |
2089 } | |
2090 else | |
2091 { | |
2092 buf += static_cast<char> (c); | |
2093 | |
2094 if (c == '\n') | |
2095 { | |
2096 current_input_column = 0; | |
2097 at_bol = true; | |
2098 } | |
2099 } | |
2100 } | |
2101 | |
2102 if (c == EOF) | |
2103 eof = true; | |
2104 | |
2105 return buf; | |
2106 } | |
2107 | |
2108 std::string | |
2109 lexical_feedback::grab_comment_block (stream_reader& reader, bool at_bol, | |
2110 bool& eof) | |
2111 { | |
2112 std::string buf; | |
2113 | |
2114 // TRUE means we are at the beginning of a comment block. | |
2115 bool begin_comment = false; | |
2116 | |
2117 // TRUE means we are currently reading a comment block. | |
2118 bool in_comment = false; | |
2119 | |
2120 bool warned_incompatible = false; | |
2121 | |
2122 int c = 0; | |
2123 | |
2124 while ((c = reader.getc ()) != EOF) | |
2125 { | |
2126 current_input_column++; | |
2127 | |
2128 if (begin_comment) | |
2129 { | |
2130 if (c == '%' || c == '#') | |
2131 { | |
2132 at_bol = false; | |
2133 continue; | |
2134 } | |
2135 else if (at_bol && c == '{') | |
2136 { | |
2137 std::string tmp_buf (1, static_cast<char> (c)); | |
2138 | |
2139 bool done = false; | |
2140 | |
2141 while ((c = reader.getc ()) != EOF && ! done) | |
2142 { | |
2143 current_input_column++; | |
2144 | |
2145 switch (c) | |
2146 { | |
2147 case ' ': | |
2148 case '\t': | |
2149 tmp_buf += static_cast<char> (c); | |
2150 break; | |
2151 | |
2152 case '\n': | |
2153 { | |
2154 current_input_column = 0; | |
2155 at_bol = true; | |
2156 done = true; | |
2157 | |
2158 block_comment_nesting_level++; | |
2159 promptflag--; | |
2160 | |
2161 buf += grab_block_comment (reader, eof); | |
2162 | |
2163 in_comment = false; | |
2164 | |
2165 if (eof) | |
2166 goto done; | |
2167 } | |
2168 break; | |
2169 | |
2170 default: | |
2171 at_bol = false; | |
2172 tmp_buf += static_cast<char> (c); | |
2173 buf += tmp_buf; | |
2174 done = true; | |
2175 break; | |
2176 } | |
2177 } | |
2178 } | |
2179 else | |
2180 { | |
2181 at_bol = false; | |
2182 begin_comment = false; | |
2183 } | |
2184 } | |
2185 | |
2186 if (in_comment) | |
2187 { | |
2188 buf += static_cast<char> (c); | |
2189 | |
2190 if (c == '\n') | |
2191 { | |
2192 at_bol = true; | |
2193 current_input_column = 0; | |
2194 in_comment = false; | |
2195 | |
2196 // FIXME -- bailing out here prevents things like | |
2197 // | |
2198 // octave> # comment | |
2199 // octave> x = 1 | |
2200 // | |
2201 // from failing at the command line, while still | |
2202 // allowing blocks of comments to be grabbed properly | |
2203 // for function doc strings. But only the first line of | |
2204 // a mult-line doc string will be picked up for | |
2205 // functions defined on the command line. We need a | |
2206 // better way of collecting these comments... | |
2207 if (! (reading_fcn_file || reading_script_file)) | |
2208 goto done; | |
2209 } | |
2210 } | |
2211 else | |
2212 { | |
2213 switch (c) | |
2214 { | |
2215 case ' ': | |
2216 case '\t': | |
2217 break; | |
2218 | |
2219 case '#': | |
2220 if (! warned_incompatible) | |
2221 { | |
2222 warned_incompatible = true; | |
2223 maybe_gripe_matlab_incompatible_comment (c); | |
2224 } | |
2225 // fall through... | |
2226 | |
2227 case '%': | |
2228 in_comment = true; | |
2229 begin_comment = true; | |
2230 break; | |
2231 | |
2232 default: | |
2233 current_input_column--; | |
2234 reader.ungetc (c); | |
2235 goto done; | |
2236 } | |
2237 } | |
2238 } | |
2239 | |
2240 done: | |
2241 | |
2242 if (c == EOF) | |
2243 eof = true; | |
2244 | |
2245 return buf; | |
2246 } | |
2247 | |
2248 int | |
2249 lexical_feedback::process_comment (bool start_in_block, bool& eof) | |
2250 { | |
2251 eof = false; | |
2252 | |
2253 std::string help_txt; | |
2254 | |
2255 if (! help_buf.empty ()) | |
2256 help_txt = help_buf.top (); | |
2257 | |
2258 flex_stream_reader flex_reader (this, yytext); | |
2259 | |
2260 // process_comment is only supposed to be called when we are not | |
2261 // initially looking at a block comment. | |
2262 | |
2263 std::string txt = start_in_block | |
2264 ? grab_block_comment (flex_reader, eof) | |
2265 : grab_comment_block (flex_reader, false, eof); | |
2266 | |
2267 if (lexer_debug_flag) | |
2268 std::cerr << "C: " << txt << std::endl; | |
2269 | |
2270 if (help_txt.empty () && nesting_level.none ()) | |
2271 { | |
2272 if (! help_buf.empty ()) | |
2273 help_buf.pop (); | |
2274 | |
2275 help_buf.push (txt); | |
2276 } | |
2277 | |
2278 octave_comment_buffer::append (txt); | |
2279 | |
2280 current_input_column = 1; | |
2281 quote_is_transpose = false; | |
2282 convert_spaces_to_comma = true; | |
2283 at_beginning_of_statement = true; | |
2284 | |
2285 if (YY_START == COMMAND_START) | |
2286 BEGIN (INITIAL); | |
2287 | |
2288 if (nesting_level.none ()) | |
2289 return '\n'; | |
2290 else if (nesting_level.is_bracket_or_brace ()) | |
2291 return ';'; | |
2292 else | |
2293 return 0; | |
2294 } | |
2295 | |
2296 // Recognize separators. If the separator is a CRLF pair, it is | |
2297 // replaced by a single LF. | |
2298 | |
2299 bool | |
2300 lexical_feedback::next_token_is_sep_op (void) | |
2301 { | |
2302 bool retval = false; | |
2303 | |
2304 int c = text_yyinput (); | |
2305 | |
2306 retval = match_any (c, ",;\n]"); | |
2307 | |
2308 xunput (c, yytext); | |
2309 | |
2310 return retval; | |
2311 } | |
2312 | |
2313 // Try to determine if the next token should be treated as a postfix | |
2314 // unary operator. This is ugly, but it seems to do the right thing. | |
2315 | |
2316 bool | |
2317 lexical_feedback::next_token_is_postfix_unary_op (bool spc_prev) | |
2318 { | |
2319 bool un_op = false; | |
2320 | |
2321 int c0 = text_yyinput (); | |
2322 | |
2323 if (c0 == '\'' && ! spc_prev) | |
2324 { | |
2325 un_op = true; | |
2326 } | |
2327 else if (c0 == '.') | |
2328 { | |
2329 int c1 = text_yyinput (); | |
2330 un_op = (c1 == '\''); | |
2331 xunput (c1, yytext); | |
2332 } | |
2333 else if (c0 == '+') | |
2334 { | |
2335 int c1 = text_yyinput (); | |
2336 un_op = (c1 == '+'); | |
2337 xunput (c1, yytext); | |
2338 } | |
2339 else if (c0 == '-') | |
2340 { | |
2341 int c1 = text_yyinput (); | |
2342 un_op = (c1 == '-'); | |
2343 xunput (c1, yytext); | |
2344 } | |
2345 | |
2346 xunput (c0, yytext); | |
2347 | |
2348 return un_op; | |
2349 } | |
2350 | |
2351 // Try to determine if the next token should be treated as a binary | |
2352 // operator. | |
2353 // | |
2354 // This kluge exists because whitespace is not always ignored inside | |
2355 // the square brackets that are used to create matrix objects (though | |
2356 // spacing only really matters in the cases that can be interpreted | |
2357 // either as binary ops or prefix unary ops: currently just +, -). | |
2358 // | |
2359 // Note that a line continuation directly following a + or - operator | |
2360 // (e.g., the characters '[' 'a' ' ' '+' '\' LFD 'b' ']') will be | |
2361 // parsed as a binary operator. | |
2362 | |
2363 bool | |
2364 lexical_feedback::next_token_is_bin_op (bool spc_prev) | |
2365 { | |
2366 bool bin_op = false; | |
2367 | |
2368 int c0 = text_yyinput (); | |
2369 | |
2370 switch (c0) | |
2371 { | |
2372 case '+': | |
2373 case '-': | |
2374 { | |
2375 int c1 = text_yyinput (); | |
2376 | |
2377 switch (c1) | |
2378 { | |
2379 case '+': | |
2380 case '-': | |
2381 // Unary ops, spacing doesn't matter. | |
2382 break; | |
2383 | |
2384 case '=': | |
2385 // Binary ops, spacing doesn't matter. | |
2386 bin_op = true; | |
2387 break; | |
2388 | |
2389 default: | |
2390 // Could be either, spacing matters. | |
2391 bin_op = looks_like_bin_op (spc_prev, c1); | |
2392 break; | |
2393 } | |
2394 | |
2395 xunput (c1, yytext); | |
2396 } | |
2397 break; | |
2398 | |
2399 case ':': | |
2400 case '/': | |
2401 case '\\': | |
2402 case '^': | |
2403 // Always a binary op (may also include /=, \=, and ^=). | |
2404 bin_op = true; | |
2405 break; | |
2406 | |
2407 // .+ .- ./ .\ .^ .* .** | |
2408 case '.': | |
2409 { | |
2410 int c1 = text_yyinput (); | |
2411 | |
2412 if (match_any (c1, "+-/\\^*")) | |
2413 // Always a binary op (may also include .+=, .-=, ./=, ...). | |
2414 bin_op = true; | |
2415 else if (! isdigit (c1) && c1 != ' ' && c1 != '\t' && c1 != '.') | |
2416 // A structure element reference is a binary op. | |
2417 bin_op = true; | |
2418 | |
2419 xunput (c1, yytext); | |
2420 } | |
2421 break; | |
2422 | |
2423 // = == & && | || * ** | |
2424 case '=': | |
2425 case '&': | |
2426 case '|': | |
2427 case '*': | |
2428 // Always a binary op (may also include ==, &&, ||, **). | |
2429 bin_op = true; | |
2430 break; | |
2431 | |
2432 // < <= <> > >= | |
2433 case '<': | |
2434 case '>': | |
2435 // Always a binary op (may also include <=, <>, >=). | |
2436 bin_op = true; | |
2437 break; | |
2438 | |
2439 // ~= != | |
2440 case '~': | |
2441 case '!': | |
2442 { | |
2443 int c1 = text_yyinput (); | |
2444 | |
2445 // ~ and ! can be unary ops, so require following =. | |
2446 if (c1 == '=') | |
2447 bin_op = true; | |
2448 | |
2449 xunput (c1, yytext); | |
2450 } | |
2451 break; | |
2452 | |
2453 default: | |
2454 break; | |
2455 } | |
2456 | |
2457 xunput (c0, yytext); | |
2458 | |
2459 return bin_op; | |
2460 } | |
2461 | |
2462 // FIXME -- we need to handle block comments here. | |
2463 | |
2464 void | |
2465 lexical_feedback::scan_for_comments (const char *text) | |
2466 { | |
2467 std::string comment_buf; | |
2468 | |
2469 bool in_comment = false; | |
2470 bool beginning_of_comment = false; | |
2471 | |
2472 int len = strlen (text); | |
2473 int i = 0; | |
2474 | |
2475 while (i < len) | |
2476 { | |
2477 char c = text[i++]; | |
2478 | |
2479 switch (c) | |
2480 { | |
2481 case '%': | |
2482 case '#': | |
2483 if (in_comment) | |
2484 { | |
2485 if (! beginning_of_comment) | |
2486 comment_buf += static_cast<char> (c); | |
2487 } | |
2488 else | |
2489 { | |
2490 maybe_gripe_matlab_incompatible_comment (c); | |
2491 in_comment = true; | |
2492 beginning_of_comment = true; | |
2493 } | |
2494 break; | |
2495 | |
2496 case '\n': | |
2497 if (in_comment) | |
2498 { | |
2499 comment_buf += static_cast<char> (c); | |
2500 octave_comment_buffer::append (comment_buf); | |
2501 comment_buf.resize (0); | |
2502 in_comment = false; | |
2503 beginning_of_comment = false; | |
2504 } | |
2505 break; | |
2506 | |
2507 default: | |
2508 if (in_comment) | |
2509 { | |
2510 comment_buf += static_cast<char> (c); | |
2511 beginning_of_comment = false; | |
2512 } | |
2513 break; | |
2514 } | |
2515 } | |
2516 | |
2517 if (! comment_buf.empty ()) | |
2518 octave_comment_buffer::append (comment_buf); | |
2519 } | |
2520 | |
2521 // Discard whitespace, including comments and continuations. | |
2522 | |
2523 // FIXME -- we need to handle block comments here. | |
2524 | |
2525 int | |
2526 lexical_feedback::eat_whitespace (void) | |
2527 { | |
2528 int retval = lexical_feedback::NO_WHITESPACE; | |
2529 | |
2530 std::string comment_buf; | |
2531 | |
2532 bool in_comment = false; | |
2533 bool beginning_of_comment = false; | |
2534 | |
2535 int c = 0; | |
2536 | |
2537 while ((c = text_yyinput ()) != EOF) | |
2538 { | |
2539 current_input_column++; | |
2540 | |
2541 switch (c) | |
2542 { | |
2543 case ' ': | |
2544 case '\t': | |
2545 if (in_comment) | |
2546 { | |
2547 comment_buf += static_cast<char> (c); | |
2548 beginning_of_comment = false; | |
2549 } | |
2550 retval |= lexical_feedback::SPACE_OR_TAB; | |
2551 break; | |
2552 | |
2553 case '\n': | |
2554 retval |= lexical_feedback::NEWLINE; | |
2555 if (in_comment) | |
2556 { | |
2557 comment_buf += static_cast<char> (c); | |
2558 octave_comment_buffer::append (comment_buf); | |
2559 comment_buf.resize (0); | |
2560 in_comment = false; | |
2561 beginning_of_comment = false; | |
2562 } | |
2563 current_input_column = 0; | |
2564 break; | |
2565 | |
2566 case '#': | |
2567 case '%': | |
2568 if (in_comment) | |
2569 { | |
2570 if (! beginning_of_comment) | |
2571 comment_buf += static_cast<char> (c); | |
2572 } | |
2573 else | |
2574 { | |
2575 maybe_gripe_matlab_incompatible_comment (c); | |
2576 in_comment = true; | |
2577 beginning_of_comment = true; | |
2578 } | |
2579 break; | |
2580 | |
2581 case '.': | |
2582 if (in_comment) | |
2583 { | |
2584 comment_buf += static_cast<char> (c); | |
2585 beginning_of_comment = false; | |
2586 break; | |
2587 } | |
2588 else | |
2589 { | |
2590 if (have_ellipsis_continuation ()) | |
2591 break; | |
2592 else | |
2593 goto done; | |
2594 } | |
2595 | |
2596 case '\\': | |
2597 if (in_comment) | |
2598 { | |
2599 comment_buf += static_cast<char> (c); | |
2600 beginning_of_comment = false; | |
2601 break; | |
2602 } | |
2603 else | |
2604 { | |
2605 if (have_continuation ()) | |
2606 break; | |
2607 else | |
2608 goto done; | |
2609 } | |
2610 | |
2611 default: | |
2612 if (in_comment) | |
2613 { | |
2614 comment_buf += static_cast<char> (c); | |
2615 beginning_of_comment = false; | |
2616 break; | |
2617 } | |
2618 else | |
2619 goto done; | |
2620 } | |
2621 } | |
2622 | |
2623 if (! comment_buf.empty ()) | |
2624 octave_comment_buffer::append (comment_buf); | |
2625 | |
2626 done: | |
2627 xunput (c, yytext); | |
2628 current_input_column--; | |
2629 return retval; | |
2630 } | |
2631 | |
2632 static inline bool | |
2633 looks_like_hex (const char *s, int len) | |
2634 { | |
2635 return (len > 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')); | |
2636 } | |
2637 | |
2638 void | |
2639 lexical_feedback::handle_number (void) | |
2640 { | |
2641 double value = 0.0; | |
2642 int nread = 0; | |
2643 | |
2644 if (looks_like_hex (yytext, strlen (yytext))) | |
2645 { | |
2646 unsigned long ival; | |
2647 | |
2648 nread = sscanf (yytext, "%lx", &ival); | |
2649 | |
2650 value = static_cast<double> (ival); | |
2651 } | |
2652 else | |
2653 { | |
2654 char *tmp = strsave (yytext); | |
2655 | |
2656 char *idx = strpbrk (tmp, "Dd"); | |
2657 | |
2658 if (idx) | |
2659 *idx = 'e'; | |
2660 | |
2661 nread = sscanf (tmp, "%lf", &value); | |
2662 | |
2663 delete [] tmp; | |
2664 } | |
2665 | |
2666 // If yytext doesn't contain a valid number, we are in deep doo doo. | |
2667 | |
2668 assert (nread == 1); | |
2669 | |
2670 quote_is_transpose = true; | |
2671 convert_spaces_to_comma = true; | |
2672 looking_for_object_index = false; | |
2673 at_beginning_of_statement = false; | |
2674 | |
2675 yylval.tok_val = new token (value, yytext, input_line_number, | |
2676 current_input_column); | |
2677 | |
2678 token_stack.push (yylval.tok_val); | |
2679 | |
2680 current_input_column += yyleng; | |
2681 | |
2682 do_comma_insert_check (); | |
2683 } | |
2684 | |
2685 // We have seen a backslash and need to find out if it should be | |
2686 // treated as a continuation character. If so, this eats it, up to | |
2687 // and including the new line character. | |
2688 // | |
2689 // Match whitespace only, followed by a comment character or newline. | |
2690 // Once a comment character is found, discard all input until newline. | |
2691 // If non-whitespace characters are found before comment | |
2692 // characters, return 0. Otherwise, return 1. | |
2693 | |
2694 // FIXME -- we need to handle block comments here. | |
2695 | |
2696 bool | |
2697 lexical_feedback::have_continuation (bool trailing_comments_ok) | |
2698 { | |
2699 std::ostringstream buf; | |
2700 | |
2701 std::string comment_buf; | |
2702 | |
2703 bool in_comment = false; | |
2704 bool beginning_of_comment = false; | |
2705 | |
2706 int c = 0; | |
2707 | |
2708 while ((c = text_yyinput ()) != EOF) | |
2709 { | |
2710 buf << static_cast<char> (c); | |
2711 | |
2712 switch (c) | |
2713 { | |
2714 case ' ': | |
2715 case '\t': | |
2716 if (in_comment) | |
2717 { | |
2718 comment_buf += static_cast<char> (c); | |
2719 beginning_of_comment = false; | |
2720 } | |
2721 break; | |
2722 | |
2723 case '%': | |
2724 case '#': | |
2725 if (trailing_comments_ok) | |
2726 { | |
2727 if (in_comment) | |
2728 { | |
2729 if (! beginning_of_comment) | |
2730 comment_buf += static_cast<char> (c); | |
2731 } | |
2732 else | |
2733 { | |
2734 maybe_gripe_matlab_incompatible_comment (c); | |
2735 in_comment = true; | |
2736 beginning_of_comment = true; | |
2737 } | |
2738 } | |
2739 else | |
2740 goto cleanup; | |
2741 break; | |
2742 | |
2743 case '\n': | |
2744 if (in_comment) | |
2745 { | |
2746 comment_buf += static_cast<char> (c); | |
2747 octave_comment_buffer::append (comment_buf); | |
2748 } | |
2749 current_input_column = 0; | |
2750 promptflag--; | |
2751 gripe_matlab_incompatible_continuation (); | |
2752 return true; | |
2753 | |
2754 default: | |
2755 if (in_comment) | |
2756 { | |
2757 comment_buf += static_cast<char> (c); | |
2758 beginning_of_comment = false; | |
2759 } | |
2760 else | |
2761 goto cleanup; | |
2762 break; | |
2763 } | |
2764 } | |
2765 | |
2766 xunput (c, yytext); | |
2767 return false; | |
2768 | |
2769 cleanup: | |
2770 | |
2771 std::string s = buf.str (); | |
2772 | |
2773 int len = s.length (); | |
2774 while (len--) | |
2775 xunput (s[len], yytext); | |
2776 | |
2777 return false; | |
2778 } | |
2779 | |
2780 // We have seen a '.' and need to see if it is the start of a | |
2781 // continuation. If so, this eats it, up to and including the new | |
2782 // line character. | |
2783 | |
2784 bool | |
2785 lexical_feedback::have_ellipsis_continuation (bool trailing_comments_ok) | |
2786 { | |
2787 char c1 = text_yyinput (); | |
2788 if (c1 == '.') | |
2789 { | |
2790 char c2 = text_yyinput (); | |
2791 if (c2 == '.' && have_continuation (trailing_comments_ok)) | |
2792 return true; | |
2793 else | |
2794 { | |
2795 xunput (c2, yytext); | |
2796 xunput (c1, yytext); | |
2797 } | |
2798 } | |
2799 else | |
2800 xunput (c1, yytext); | |
2801 | |
2802 return false; | |
2803 } | |
2804 | |
2805 // See if we have a continuation line. If so, eat it and the leading | |
2806 // whitespace on the next line. | |
2807 | |
2808 int | |
2809 lexical_feedback::eat_continuation (void) | |
2810 { | |
2811 int retval = lexical_feedback::NO_WHITESPACE; | |
2812 | |
2813 int c = text_yyinput (); | |
2814 | |
2815 if ((c == '.' && have_ellipsis_continuation ()) | |
2816 || (c == '\\' && have_continuation ())) | |
2817 retval = eat_whitespace (); | |
2818 else | |
2819 xunput (c, yytext); | |
2820 | |
2821 return retval; | |
2822 } | |
2823 | |
2824 int | |
2825 lexical_feedback::handle_string (char delim) | |
2826 { | |
2827 std::ostringstream buf; | |
2828 | |
2829 int bos_line = input_line_number; | |
2830 int bos_col = current_input_column; | |
2831 | |
2832 int c; | |
2833 int escape_pending = 0; | |
2834 | |
2835 while ((c = text_yyinput ()) != EOF) | |
2836 { | |
2837 current_input_column++; | |
2838 | |
2839 if (c == '\\') | |
2840 { | |
2841 if (delim == '\'' || escape_pending) | |
2842 { | |
2843 buf << static_cast<char> (c); | |
2844 escape_pending = 0; | |
2845 } | |
2846 else | |
2847 { | |
2848 if (have_continuation (false)) | |
2849 escape_pending = 0; | |
2850 else | |
2851 { | |
2852 buf << static_cast<char> (c); | |
2853 escape_pending = 1; | |
2854 } | |
2855 } | |
2856 continue; | |
2857 } | |
2858 else if (c == '.') | |
2859 { | |
2860 if (delim == '\'' || ! have_ellipsis_continuation (false)) | |
2861 buf << static_cast<char> (c); | |
2862 } | |
2863 else if (c == '\n') | |
2864 { | |
2865 error ("unterminated string constant"); | |
2866 break; | |
2867 } | |
2868 else if (c == delim) | |
2869 { | |
2870 if (escape_pending) | |
2871 buf << static_cast<char> (c); | |
2872 else | |
2873 { | |
2874 c = text_yyinput (); | |
2875 if (c == delim) | |
2876 { | |
2877 buf << static_cast<char> (c); | |
2878 } | |
2879 else | |
2880 { | |
2881 std::string s; | |
2882 xunput (c, yytext); | |
2883 | |
2884 if (delim == '\'') | |
2885 s = buf.str (); | |
2886 else | |
2887 s = do_string_escapes (buf.str ()); | |
2888 | |
2889 quote_is_transpose = true; | |
2890 convert_spaces_to_comma = true; | |
2891 | |
2892 yylval.tok_val = new token (s, bos_line, bos_col); | |
2893 token_stack.push (yylval.tok_val); | |
2894 | |
2895 if (delim == '"') | |
2896 gripe_matlab_incompatible ("\" used as string delimiter"); | |
2897 else if (delim == '\'') | |
2898 gripe_single_quote_string (); | |
2899 | |
2900 looking_for_object_index = true; | |
2901 at_beginning_of_statement = false; | |
2902 | |
2903 return delim == '"' ? DQ_STRING : SQ_STRING; | |
2904 } | |
2905 } | |
2906 } | |
2907 else | |
2908 { | |
2909 buf << static_cast<char> (c); | |
2910 } | |
2911 | |
2912 escape_pending = 0; | |
2913 } | |
2914 | |
2915 return LEXICAL_ERROR; | |
2916 } | |
2917 | |
2918 bool | |
2919 lexical_feedback::next_token_is_assign_op (void) | |
2920 { | |
2921 bool retval = false; | |
2922 | |
2923 int c0 = text_yyinput (); | |
2924 | |
2925 switch (c0) | |
2926 { | |
2927 case '=': | |
2928 { | |
2929 int c1 = text_yyinput (); | |
2930 xunput (c1, yytext); | |
2931 if (c1 != '=') | |
2932 retval = true; | |
2933 } | |
2934 break; | |
2935 | |
2936 case '+': | |
2937 case '-': | |
2938 case '*': | |
2939 case '/': | |
2940 case '\\': | |
2941 case '&': | |
2942 case '|': | |
2943 { | |
2944 int c1 = text_yyinput (); | |
2945 xunput (c1, yytext); | |
2946 if (c1 == '=') | |
2947 retval = true; | |
2948 } | |
2949 break; | |
2950 | |
2951 case '.': | |
2952 { | |
2953 int c1 = text_yyinput (); | |
2954 if (match_any (c1, "+-*/\\")) | |
2955 { | |
2956 int c2 = text_yyinput (); | |
2957 xunput (c2, yytext); | |
2958 if (c2 == '=') | |
2959 retval = true; | |
2960 } | |
2961 xunput (c1, yytext); | |
2962 } | |
2963 break; | |
2964 | |
2965 case '>': | |
2966 { | |
2967 int c1 = text_yyinput (); | |
2968 if (c1 == '>') | |
2969 { | |
2970 int c2 = text_yyinput (); | |
2971 xunput (c2, yytext); | |
2972 if (c2 == '=') | |
2973 retval = true; | |
2974 } | |
2975 xunput (c1, yytext); | |
2976 } | |
2977 break; | |
2978 | |
2979 case '<': | |
2980 { | |
2981 int c1 = text_yyinput (); | |
2982 if (c1 == '<') | |
2983 { | |
2984 int c2 = text_yyinput (); | |
2985 xunput (c2, yytext); | |
2986 if (c2 == '=') | |
2987 retval = true; | |
2988 } | |
2989 xunput (c1, yytext); | |
2990 } | |
2991 break; | |
2992 | |
2993 default: | |
2994 break; | |
2995 } | |
2996 | |
2997 xunput (c0, yytext); | |
2998 | |
2999 return retval; | |
3000 } | |
3001 | |
3002 bool | |
3003 lexical_feedback::next_token_is_index_op (void) | |
3004 { | |
3005 int c = text_yyinput (); | |
3006 xunput (c, yytext); | |
3007 return c == '(' || c == '{'; | |
3008 } | |
3009 | |
3010 int | |
3011 lexical_feedback::handle_close_bracket (bool spc_gobbled, int bracket_type) | |
3012 { | |
3013 int retval = bracket_type; | |
3014 | |
3015 if (! nesting_level.none ()) | |
3016 { | |
3017 nesting_level.remove (); | |
3018 | |
3019 if (bracket_type == ']') | |
3020 bracketflag--; | |
3021 else if (bracket_type == '}') | |
3022 braceflag--; | |
3023 else | |
3024 panic_impossible (); | |
3025 } | |
3026 | |
3027 if (bracketflag == 0 && braceflag == 0) | |
3028 BEGIN (INITIAL); | |
3029 | |
3030 if (bracket_type == ']' | |
3031 && next_token_is_assign_op () | |
3032 && ! looking_at_return_list) | |
3033 { | |
3034 retval = CLOSE_BRACE; | |
3035 } | |
3036 else if ((bracketflag || braceflag) | |
3037 && convert_spaces_to_comma | |
3038 && (nesting_level.is_bracket () | |
3039 || (nesting_level.is_brace () | |
3040 && ! looking_at_object_index.front ()))) | |
3041 { | |
3042 bool index_op = next_token_is_index_op (); | |
3043 | |
3044 // Don't insert comma if we are looking at something like | |
3045 // | |
3046 // [x{i}{j}] or [x{i}(j)] | |
3047 // | |
3048 // but do if we are looking at | |
3049 // | |
3050 // [x{i} {j}] or [x{i} (j)] | |
3051 | |
3052 if (spc_gobbled || ! (bracket_type == '}' && index_op)) | |
3053 { | |
3054 bool bin_op = next_token_is_bin_op (spc_gobbled); | |
3055 | |
3056 bool postfix_un_op = next_token_is_postfix_unary_op (spc_gobbled); | |
3057 | |
3058 bool sep_op = next_token_is_sep_op (); | |
3059 | |
3060 if (! (postfix_un_op || bin_op || sep_op)) | |
3061 { | |
3062 maybe_warn_separator_insert (','); | |
3063 | |
3064 xunput (',', yytext); | |
3065 return retval; | |
3066 } | |
3067 } | |
3068 } | |
3069 | |
3070 quote_is_transpose = true; | |
3071 convert_spaces_to_comma = true; | |
3072 | |
3073 return retval; | |
3074 } | |
3075 | |
3076 void | |
3077 lexical_feedback::maybe_unput_comma (int spc_gobbled) | |
3078 { | |
3079 if (nesting_level.is_bracket () | |
3080 || (nesting_level.is_brace () | |
3081 && ! looking_at_object_index.front ())) | |
3082 { | |
3083 int bin_op = next_token_is_bin_op (spc_gobbled); | |
3084 | |
3085 int postfix_un_op = next_token_is_postfix_unary_op (spc_gobbled); | |
3086 | |
3087 int c1 = text_yyinput (); | |
3088 int c2 = text_yyinput (); | |
3089 | |
3090 xunput (c2, yytext); | |
3091 xunput (c1, yytext); | |
3092 | |
3093 int sep_op = next_token_is_sep_op (); | |
3094 | |
3095 int dot_op = (c1 == '.' | |
3096 && (isalpha (c2) || isspace (c2) || c2 == '_')); | |
3097 | |
3098 if (postfix_un_op || bin_op || sep_op || dot_op) | |
3099 return; | |
3100 | |
3101 int index_op = (c1 == '(' || c1 == '{'); | |
3102 | |
3103 // If there is no space before the indexing op, we don't insert | |
3104 // a comma. | |
3105 | |
3106 if (index_op && ! spc_gobbled) | |
3107 return; | |
3108 | |
3109 maybe_warn_separator_insert (','); | |
3110 | |
3111 xunput (',', yytext); | |
3112 } | |
3113 } | |
3114 | |
3115 bool | |
3116 lexical_feedback::next_token_can_follow_bin_op (void) | |
3117 { | |
3118 std::stack<char> buf; | |
3119 | |
3120 int c = EOF; | |
3121 | |
3122 // Skip whitespace in current statement on current line | |
3123 while (true) | |
3124 { | |
3125 c = text_yyinput (); | |
3126 | |
3127 buf.push (c); | |
3128 | |
3129 if (match_any (c, ",;\n") || (c != ' ' && c != '\t')) | |
3130 break; | |
3131 } | |
3132 | |
3133 // Restore input. | |
3134 while (! buf.empty ()) | |
3135 { | |
3136 xunput (buf.top (), yytext); | |
3137 | |
3138 buf.pop (); | |
3139 } | |
3140 | |
3141 return (isalnum (c) || match_any (c, "!\"'(-[_{~")); | |
3142 } | |
3143 | |
3144 static bool | |
3145 can_be_command (const std::string& tok) | |
3146 { | |
3147 // Don't allow these names to be treated as commands to avoid | |
3148 // surprises when parsing things like "NaN ^2". | |
3149 | |
3150 return ! (tok == "e" | |
3151 || tok == "I" || tok == "i" | |
3152 || tok == "J" || tok == "j" | |
3153 || tok == "Inf" || tok == "inf" | |
3154 || tok == "NaN" || tok == "nan"); | |
3155 } | |
3156 | |
3157 bool | |
3158 lexical_feedback::looks_like_command_arg (void) | |
3159 { | |
3160 bool retval = true; | |
3161 | |
3162 int c0 = text_yyinput (); | |
3163 | |
3164 switch (c0) | |
3165 { | |
3166 // = == | |
3167 case '=': | |
3168 { | |
3169 int c1 = text_yyinput (); | |
3170 | |
3171 if (c1 == '=') | |
3172 { | |
3173 int c2 = text_yyinput (); | |
3174 | |
3175 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t') | |
3176 && next_token_can_follow_bin_op ()) | |
3177 retval = false; | |
3178 | |
3179 xunput (c2, yytext); | |
3180 } | |
3181 else | |
3182 retval = false; | |
3183 | |
3184 xunput (c1, yytext); | |
3185 } | |
3186 break; | |
3187 | |
3188 case '(': | |
3189 case '{': | |
3190 // Indexing. | |
3191 retval = false; | |
3192 break; | |
3193 | |
3194 case '\n': | |
3195 // EOL. | |
3196 break; | |
3197 | |
3198 case '\'': | |
3199 case '"': | |
3200 // Beginning of a character string. | |
3201 break; | |
3202 | |
3203 // + - ++ -- += -= | |
3204 case '+': | |
3205 case '-': | |
3206 { | |
3207 int c1 = text_yyinput (); | |
3208 | |
3209 switch (c1) | |
3210 { | |
3211 case '\n': | |
3212 // EOL. | |
3213 case '+': | |
3214 case '-': | |
3215 // Unary ops, spacing doesn't matter. | |
3216 break; | |
3217 | |
3218 case '\t': | |
3219 case ' ': | |
3220 { | |
3221 if (next_token_can_follow_bin_op ()) | |
3222 retval = false; | |
3223 } | |
3224 break; | |
3225 | |
3226 case '=': | |
3227 { | |
3228 int c2 = text_yyinput (); | |
3229 | |
3230 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t') | |
3231 && next_token_can_follow_bin_op ()) | |
3232 retval = false; | |
3233 | |
3234 xunput (c2, yytext); | |
3235 } | |
3236 break; | |
3237 } | |
3238 | |
3239 xunput (c1, yytext); | |
3240 } | |
3241 break; | |
3242 | |
3243 case ':': | |
3244 case '/': | |
3245 case '\\': | |
3246 case '^': | |
3247 { | |
3248 int c1 = text_yyinput (); | |
3249 | |
3250 if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t') | |
3251 && next_token_can_follow_bin_op ()) | |
3252 retval = false; | |
3253 | |
3254 xunput (c1, yytext); | |
3255 } | |
3256 break; | |
3257 | |
3258 // .+ .- ./ .\ .^ .* .** | |
3259 case '.': | |
3260 { | |
3261 int c1 = text_yyinput (); | |
3262 | |
3263 if (match_any (c1, "+-/\\^*")) | |
3264 { | |
3265 int c2 = text_yyinput (); | |
3266 | |
3267 if (c2 == '=') | |
3268 { | |
3269 int c3 = text_yyinput (); | |
3270 | |
3271 if (! match_any (c3, ",;\n") && (c3 == ' ' || c3 == '\t') | |
3272 && next_token_can_follow_bin_op ()) | |
3273 retval = false; | |
3274 | |
3275 xunput (c3, yytext); | |
3276 } | |
3277 else if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t') | |
3278 && next_token_can_follow_bin_op ()) | |
3279 retval = false; | |
3280 | |
3281 xunput (c2, yytext); | |
3282 } | |
3283 else if (! match_any (c1, ",;\n") | |
3284 && (! isdigit (c1) && c1 != ' ' && c1 != '\t' | |
3285 && c1 != '.')) | |
3286 { | |
3287 // Structure reference. FIXME -- is this a complete check? | |
3288 | |
3289 retval = false; | |
3290 } | |
3291 | |
3292 xunput (c1, yytext); | |
3293 } | |
3294 break; | |
3295 | |
3296 // & && | || * ** | |
3297 case '&': | |
3298 case '|': | |
3299 case '*': | |
3300 { | |
3301 int c1 = text_yyinput (); | |
3302 | |
3303 if (c1 == c0) | |
3304 { | |
3305 int c2 = text_yyinput (); | |
3306 | |
3307 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t') | |
3308 && next_token_can_follow_bin_op ()) | |
3309 retval = false; | |
3310 | |
3311 xunput (c2, yytext); | |
3312 } | |
3313 else if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t') | |
3314 && next_token_can_follow_bin_op ()) | |
3315 retval = false; | |
3316 | |
3317 xunput (c1, yytext); | |
3318 } | |
3319 break; | |
3320 | |
3321 // < <= > >= | |
3322 case '<': | |
3323 case '>': | |
3324 { | |
3325 int c1 = text_yyinput (); | |
3326 | |
3327 if (c1 == '=') | |
3328 { | |
3329 int c2 = text_yyinput (); | |
3330 | |
3331 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t') | |
3332 && next_token_can_follow_bin_op ()) | |
3333 retval = false; | |
3334 | |
3335 xunput (c2, yytext); | |
3336 } | |
3337 else if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t') | |
3338 && next_token_can_follow_bin_op ()) | |
3339 retval = false; | |
3340 | |
3341 xunput (c1, yytext); | |
3342 } | |
3343 break; | |
3344 | |
3345 // ~= != | |
3346 case '~': | |
3347 case '!': | |
3348 { | |
3349 int c1 = text_yyinput (); | |
3350 | |
3351 // ~ and ! can be unary ops, so require following =. | |
3352 if (c1 == '=') | |
3353 { | |
3354 int c2 = text_yyinput (); | |
3355 | |
3356 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t') | |
3357 && next_token_can_follow_bin_op ()) | |
3358 retval = false; | |
3359 | |
3360 xunput (c2, yytext); | |
3361 } | |
3362 else if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t') | |
3363 && next_token_can_follow_bin_op ()) | |
3364 retval = false; | |
3365 | |
3366 xunput (c1, yytext); | |
3367 } | |
3368 break; | |
3369 | |
3370 default: | |
3371 break; | |
3372 } | |
3373 | |
3374 xunput (c0, yytext); | |
3375 | |
3376 return retval; | |
3377 } | |
3378 | |
3379 int | |
3380 lexical_feedback::handle_superclass_identifier (void) | |
3381 { | |
3382 eat_continuation (); | |
3383 | |
3384 std::string pkg; | |
3385 std::string meth = strip_trailing_whitespace (yytext); | |
3386 size_t pos = meth.find ("@"); | |
3387 std::string cls = meth.substr (pos).substr (1); | |
3388 meth = meth.substr (0, pos - 1); | |
3389 | |
3390 pos = cls.find ("."); | |
3391 if (pos != std::string::npos) | |
3392 { | |
3393 pkg = cls.substr (pos).substr (1); | |
3394 cls = cls.substr (0, pos - 1); | |
3395 } | |
3396 | |
3397 int kw_token = (is_keyword_token (meth) || is_keyword_token (cls) | |
3398 || is_keyword_token (pkg)); | |
3399 if (kw_token) | |
3400 { | |
3401 error ("method, class and package names may not be keywords"); | |
3402 return LEXICAL_ERROR; | |
3403 } | |
3404 | |
3405 yylval.tok_val | |
3406 = new token (meth.empty () ? 0 : &(symbol_table::insert (meth)), | |
3407 cls.empty () ? 0 : &(symbol_table::insert (cls)), | |
3408 pkg.empty () ? 0 : &(symbol_table::insert (pkg)), | |
3409 input_line_number, | |
3410 current_input_column); | |
3411 token_stack.push (yylval.tok_val); | |
3412 | |
3413 convert_spaces_to_comma = true; | |
3414 current_input_column += yyleng; | |
3415 | |
3416 return SUPERCLASSREF; | |
3417 } | |
3418 | |
3419 int | |
3420 lexical_feedback::handle_meta_identifier (void) | |
3421 { | |
3422 eat_continuation (); | |
3423 | |
3424 std::string pkg; | |
3425 std::string cls = strip_trailing_whitespace (yytext).substr (1); | |
3426 size_t pos = cls.find ("."); | |
3427 | |
3428 if (pos != std::string::npos) | |
3429 { | |
3430 pkg = cls.substr (pos).substr (1); | |
3431 cls = cls.substr (0, pos - 1); | |
3432 } | |
3433 | |
3434 int kw_token = is_keyword_token (cls) || is_keyword_token (pkg); | |
3435 if (kw_token) | |
3436 { | |
3437 error ("class and package names may not be keywords"); | |
3438 return LEXICAL_ERROR; | |
3439 } | |
3440 | |
3441 yylval.tok_val | |
3442 = new token (cls.empty () ? 0 : &(symbol_table::insert (cls)), | |
3443 pkg.empty () ? 0 : &(symbol_table::insert (pkg)), | |
3444 input_line_number, | |
3445 current_input_column); | |
3446 | |
3447 token_stack.push (yylval.tok_val); | |
3448 | |
3449 convert_spaces_to_comma = true; | |
3450 current_input_column += yyleng; | |
3451 | |
3452 return METAQUERY; | |
3453 } | |
3454 | |
3455 // Figure out exactly what kind of token to return when we have seen | |
3456 // an identifier. Handles keywords. Return -1 if the identifier | |
3457 // should be ignored. | |
3458 | |
3459 int | |
3460 lexical_feedback::handle_identifier (void) | |
3461 { | |
3462 bool at_bos = at_beginning_of_statement; | |
3463 | |
3464 std::string tok = strip_trailing_whitespace (yytext); | |
3465 | |
3466 int c = yytext[yyleng-1]; | |
3467 | |
3468 bool cont_is_spc = (eat_continuation () != lexical_feedback::NO_WHITESPACE); | |
3469 | |
3470 int spc_gobbled = (cont_is_spc || c == ' ' || c == '\t'); | |
3471 | |
3472 // If we are expecting a structure element, avoid recognizing | |
3473 // keywords and other special names and return STRUCT_ELT, which is | |
3474 // a string that is also a valid identifier. But first, we have to | |
3475 // decide whether to insert a comma. | |
3476 | |
3477 if (looking_at_indirect_ref) | |
3478 { | |
3479 do_comma_insert_check (); | |
3480 | |
3481 maybe_unput_comma (spc_gobbled); | |
3482 | |
3483 yylval.tok_val = new token (tok, input_line_number, | |
3484 current_input_column); | |
3485 | |
3486 token_stack.push (yylval.tok_val); | |
3487 | |
3488 quote_is_transpose = true; | |
3489 convert_spaces_to_comma = true; | |
3490 looking_for_object_index = true; | |
3491 | |
3492 current_input_column += yyleng; | |
3493 | |
3494 return STRUCT_ELT; | |
3495 } | |
3496 | |
3497 at_beginning_of_statement = false; | |
3498 | |
3499 // The is_keyword_token may reset | |
3500 // at_beginning_of_statement. For example, if it sees | |
3501 // an else token, then the next token is at the beginning of a | |
3502 // statement. | |
3503 | |
3504 int kw_token = is_keyword_token (tok); | |
3505 | |
3506 // If we found a keyword token, then the beginning_of_statement flag | |
3507 // is already set. Otherwise, we won't be at the beginning of a | |
3508 // statement. | |
3509 | |
3510 if (looking_at_function_handle) | |
3511 { | |
3512 if (kw_token) | |
3513 { | |
3514 error ("function handles may not refer to keywords"); | |
3515 | |
3516 return LEXICAL_ERROR; | |
3517 } | |
3518 else | |
3519 { | |
3520 yylval.tok_val = new token (tok, input_line_number, | |
3521 current_input_column); | |
3522 | |
3523 token_stack.push (yylval.tok_val); | |
3524 | |
3525 current_input_column += yyleng; | |
3526 quote_is_transpose = false; | |
3527 convert_spaces_to_comma = true; | |
3528 looking_for_object_index = true; | |
3529 | |
3530 return FCN_HANDLE; | |
3531 } | |
3532 } | |
3533 | |
3534 // If we have a regular keyword, return it. | |
3535 // Keywords can be followed by identifiers. | |
3536 | |
3537 if (kw_token) | |
3538 { | |
3539 if (kw_token >= 0) | |
3540 { | |
3541 current_input_column += yyleng; | |
3542 quote_is_transpose = false; | |
3543 convert_spaces_to_comma = true; | |
3544 looking_for_object_index = false; | |
3545 } | |
3546 | |
3547 return kw_token; | |
3548 } | |
3549 | |
3550 // See if we have a plot keyword (title, using, with, or clear). | |
3551 | |
3552 int c1 = text_yyinput (); | |
3553 | |
3554 bool next_tok_is_eq = false; | |
3555 if (c1 == '=') | |
3556 { | |
3557 int c2 = text_yyinput (); | |
3558 xunput (c2, yytext); | |
3559 | |
3560 if (c2 != '=') | |
3561 next_tok_is_eq = true; | |
3562 } | |
3563 | |
3564 xunput (c1, yytext); | |
3565 | |
3566 // Kluge alert. | |
3567 // | |
3568 // If we are looking at a text style function, set up to gobble its | |
3569 // arguments. | |
3570 // | |
3571 // If the following token is '=', or if we are parsing a function | |
3572 // return list or function parameter list, or if we are looking at | |
3573 // something like [ab,cd] = foo (), force the symbol to be inserted | |
3574 // as a variable in the current symbol table. | |
3575 | |
3576 if (! is_variable (tok)) | |
3577 { | |
3578 if (at_bos && spc_gobbled && can_be_command (tok) | |
3579 && looks_like_command_arg ()) | |
3580 { | |
3581 BEGIN (COMMAND_START); | |
3582 } | |
3583 else if (next_tok_is_eq | |
3584 || looking_at_decl_list | |
3585 || looking_at_return_list | |
3586 || (looking_at_parameter_list | |
3587 && ! looking_at_initializer_expression)) | |
3588 { | |
3589 symbol_table::force_variable (tok); | |
3590 } | |
3591 else if (looking_at_matrix_or_assign_lhs) | |
3592 { | |
3593 pending_local_variables.insert (tok); | |
3594 } | |
3595 } | |
3596 | |
3597 // Find the token in the symbol table. Beware the magic | |
3598 // transformation of the end keyword... | |
3599 | |
3600 if (tok == "end") | |
3601 tok = "__end__"; | |
3602 | |
3603 yylval.tok_val = new token (&(symbol_table::insert (tok)), | |
3604 input_line_number, | |
3605 current_input_column); | |
3606 | |
3607 token_stack.push (yylval.tok_val); | |
3608 | |
3609 // After seeing an identifer, it is ok to convert spaces to a comma | |
3610 // (if needed). | |
3611 | |
3612 convert_spaces_to_comma = true; | |
3613 | |
3614 if (! (next_tok_is_eq || YY_START == COMMAND_START)) | |
3615 { | |
3616 quote_is_transpose = true; | |
3617 | |
3618 do_comma_insert_check (); | |
3619 | |
3620 maybe_unput_comma (spc_gobbled); | |
3621 } | |
3622 | |
3623 current_input_column += yyleng; | |
3624 | |
3625 if (tok != "__end__") | |
3626 looking_for_object_index = true; | |
3627 | |
3628 return NAME; | |
3629 } | |
3630 | |
3631 void | |
3632 lexical_feedback::maybe_warn_separator_insert (char sep) | |
3633 { | |
3634 std::string nm = curr_fcn_file_full_name; | |
3635 | |
3636 if (nm.empty ()) | |
3637 warning_with_id ("Octave:separator-insert", | |
3638 "potential auto-insertion of '%c' near line %d", | |
3639 sep, input_line_number); | |
3640 else | |
3641 warning_with_id ("Octave:separator-insert", | |
3642 "potential auto-insertion of '%c' near line %d of file %s", | |
3643 sep, input_line_number, nm.c_str ()); | |
3644 } | |
3645 | |
3646 void | |
3647 lexical_feedback::gripe_single_quote_string (void) | |
3648 { | |
3649 std::string nm = curr_fcn_file_full_name; | |
3650 | |
3651 if (nm.empty ()) | |
3652 warning_with_id ("Octave:single-quote-string", | |
3653 "single quote delimited string near line %d", | |
3654 input_line_number); | |
3655 else | |
3656 warning_with_id ("Octave:single-quote-string", | |
3657 "single quote delimited string near line %d of file %s", | |
3658 input_line_number, nm.c_str ()); | |
3659 } | |
3660 | |
3661 void | |
3662 lexical_feedback::gripe_matlab_incompatible (const std::string& msg) | |
3663 { | |
3664 std::string nm = curr_fcn_file_full_name; | |
3665 | |
3666 if (nm.empty ()) | |
3667 warning_with_id ("Octave:matlab-incompatible", | |
3668 "potential Matlab compatibility problem: %s", | |
3669 msg.c_str ()); | |
3670 else | |
3671 warning_with_id ("Octave:matlab-incompatible", | |
3672 "potential Matlab compatibility problem: %s near line %d offile %s", | |
3673 msg.c_str (), input_line_number, nm.c_str ()); | |
3674 } | |
3675 | |
3676 void | |
3677 lexical_feedback::maybe_gripe_matlab_incompatible_comment (char c) | |
3678 { | |
3679 if (c == '#') | |
3680 gripe_matlab_incompatible ("# used as comment character"); | |
3681 } | |
3682 | |
3683 void | |
3684 lexical_feedback::gripe_matlab_incompatible_continuation (void) | |
3685 { | |
3686 gripe_matlab_incompatible ("\\ used as line continuation marker"); | |
3687 } | |
3688 | |
3689 void | |
3690 lexical_feedback::gripe_matlab_incompatible_operator (const std::string& op) | |
3691 { | |
3692 std::string t = op; | |
3693 int n = t.length (); | |
3694 if (t[n-1] == '\n') | |
3695 t.resize (n-1); | |
3696 gripe_matlab_incompatible (t + " used as operator"); | |
3697 } |