comparison libinterp/parse-tree/lex.ll @ 16119:b31eb56f4d84

maint: reorder class definitions in lex.ll
author John W. Eaton <jwe@octave.org>
date Tue, 26 Feb 2013 12:01:54 -0500
parents f8e463523229
children 4b68eb9b98b0
comparison
equal deleted inserted replaced
16118:f8e463523229 16119:b31eb56f4d84
1419 if (! eof) 1419 if (! eof)
1420 YY_FATAL_ERROR ("octave_read () in flex scanner failed"); 1420 YY_FATAL_ERROR ("octave_read () in flex scanner failed");
1421 } 1421 }
1422 1422
1423 return status; 1423 return status;
1424 }
1425
1426 DEFUN (__display_tokens__, args, nargout,
1427 "-*- texinfo -*-\n\
1428 @deftypefn {Built-in Function} {} __display_tokens__ ()\n\
1429 Query or set the internal variable that determines whether Octave's\n\
1430 lexer displays tokens as they are read.\n\
1431 @end deftypefn")
1432 {
1433 return SET_INTERNAL_VARIABLE (display_tokens);
1434 }
1435
1436 DEFUN (__token_count__, , ,
1437 "-*- texinfo -*-\n\
1438 @deftypefn {Built-in Function} {} __token_count__ ()\n\
1439 Number of language tokens processed since Octave startup.\n\
1440 @end deftypefn")
1441 {
1442 return octave_value (Vtoken_count);
1443 }
1444
1445 DEFUN (__lexer_debug_flag__, args, nargout,
1446 "-*- texinfo -*-\n\
1447 @deftypefn {Built-in Function} {@var{old_val} =} __lexer_debug_flag__ (@var{new_val}))\n\
1448 Undocumented internal function.\n\
1449 @end deftypefn")
1450 {
1451 octave_value retval;
1452
1453 retval = set_internal_variable (lexer_debug_flag, args, nargout,
1454 "__lexer_debug_flag__");
1455
1456 return retval;
1457 }
1458
1459 class
1460 flex_stream_reader : public stream_reader
1461 {
1462 public:
1463 flex_stream_reader (lexical_feedback *l, char *buf_arg)
1464 : stream_reader (), lexer (l), buf (buf_arg)
1465 { }
1466
1467 int getc (void) { return lexer->text_yyinput (); }
1468 int ungetc (int c) { lexer->xunput (c, buf); return 0; }
1469
1470 private:
1471
1472 // No copying!
1473
1474 flex_stream_reader (const flex_stream_reader&);
1475
1476 flex_stream_reader& operator = (const flex_stream_reader&);
1477
1478 lexical_feedback *lexer;
1479
1480 char *buf;
1481 };
1482
1483 lexical_feedback::~lexical_feedback (void)
1484 {
1485 // Clear out the stack of token info used to track line and
1486 // column numbers.
1487
1488 while (! token_stack.empty ())
1489 {
1490 delete token_stack.top ();
1491 token_stack.pop ();
1492 }
1493 }
1494
1495 // GAG.
1496 //
1497 // If we're reading a matrix and the next character is '[', make sure
1498 // that we insert a comma ahead of it.
1499
1500 void
1501 lexical_feedback::do_comma_insert_check (void)
1502 {
1503 bool spc_gobbled = (eat_continuation () != lexical_feedback::NO_WHITESPACE);
1504
1505 int c = text_yyinput ();
1506
1507 xunput (c, yytext);
1508
1509 if (spc_gobbled)
1510 xunput (' ', yytext);
1511
1512 do_comma_insert = (! looking_at_object_index.front ()
1513 && bracketflag && c == '[');
1514 }
1515
1516 int
1517 lexical_feedback::text_yyinput (void)
1518 {
1519 int c = yyinput ();
1520
1521 if (lexer_debug_flag)
1522 {
1523 std::cerr << "I: ";
1524 display_character (c);
1525 std::cerr << std::endl;
1526 }
1527
1528 // Convert CRLF into just LF and single CR into LF.
1529
1530 if (c == '\r')
1531 {
1532 c = yyinput ();
1533
1534 if (lexer_debug_flag)
1535 {
1536 std::cerr << "I: ";
1537 display_character (c);
1538 std::cerr << std::endl;
1539 }
1540
1541 if (c != '\n')
1542 {
1543 xunput (c, yytext);
1544 c = '\n';
1545 }
1546 }
1547
1548 if (c == '\n')
1549 input_line_number++;
1550
1551 return c;
1552 }
1553
1554 void
1555 lexical_feedback::xunput (char c, char *buf)
1556 {
1557 if (lexer_debug_flag)
1558 {
1559 std::cerr << "U: ";
1560 display_character (c);
1561 std::cerr << std::endl;
1562 }
1563
1564 if (c == '\n')
1565 input_line_number--;
1566
1567 yyunput (c, buf);
1568 }
1569
1570 // If we read some newlines, we need figure out what column we're
1571 // really looking at.
1572
1573 void
1574 lexical_feedback::fixup_column_count (char *s)
1575 {
1576 char c;
1577 while ((c = *s++) != '\0')
1578 {
1579 if (c == '\n')
1580 {
1581 input_line_number++;
1582 current_input_column = 1;
1583 }
1584 else
1585 current_input_column++;
1586 }
1587 }
1588
1589 bool
1590 lexical_feedback::inside_any_object_index (void)
1591 {
1592 bool retval = false;
1593
1594 for (std::list<bool>::const_iterator i = looking_at_object_index.begin ();
1595 i != looking_at_object_index.end (); i++)
1596 {
1597 if (*i)
1598 {
1599 retval = true;
1600 break;
1601 }
1602 }
1603
1604 return retval;
1605 }
1606
1607 // Handle keywords. Return -1 if the keyword should be ignored.
1608
1609 int
1610 lexical_feedback::is_keyword_token (const std::string& s)
1611 {
1612 int l = input_line_number;
1613 int c = current_input_column;
1614
1615 int len = s.length ();
1616
1617 const octave_kw *kw = octave_kw_hash::in_word_set (s.c_str (), len);
1618
1619 if (kw)
1620 {
1621 yylval.tok_val = 0;
1622
1623 switch (kw->kw_id)
1624 {
1625 case break_kw:
1626 case catch_kw:
1627 case continue_kw:
1628 case else_kw:
1629 case otherwise_kw:
1630 case return_kw:
1631 case unwind_protect_cleanup_kw:
1632 at_beginning_of_statement = true;
1633 break;
1634
1635 case static_kw:
1636 if ((reading_fcn_file || reading_script_file
1637 || reading_classdef_file)
1638 && ! curr_fcn_file_full_name.empty ())
1639 warning_with_id ("Octave:deprecated-keyword",
1640 "the 'static' keyword is obsolete and will be removed from a future version of Octave; please use 'persistent' instead; near line %d of file '%s'",
1641 input_line_number,
1642 curr_fcn_file_full_name.c_str ());
1643 else
1644 warning_with_id ("Octave:deprecated-keyword",
1645 "the 'static' keyword is obsolete and will be removed from a future version of Octave; please use 'persistent' instead; near line %d",
1646 input_line_number);
1647 // fall through ...
1648
1649 case persistent_kw:
1650 break;
1651
1652 case case_kw:
1653 case elseif_kw:
1654 case global_kw:
1655 case until_kw:
1656 break;
1657
1658 case end_kw:
1659 if (inside_any_object_index ()
1660 || (! reading_classdef_file
1661 && (defining_func
1662 && ! (looking_at_return_list
1663 || parsed_function_name.top ()))))
1664 return 0;
1665
1666 yylval.tok_val = new token (token::simple_end, l, c);
1667 at_beginning_of_statement = true;
1668 break;
1669
1670 case end_try_catch_kw:
1671 yylval.tok_val = new token (token::try_catch_end, l, c);
1672 at_beginning_of_statement = true;
1673 break;
1674
1675 case end_unwind_protect_kw:
1676 yylval.tok_val = new token (token::unwind_protect_end, l, c);
1677 at_beginning_of_statement = true;
1678 break;
1679
1680 case endfor_kw:
1681 yylval.tok_val = new token (token::for_end, l, c);
1682 at_beginning_of_statement = true;
1683 break;
1684
1685 case endfunction_kw:
1686 yylval.tok_val = new token (token::function_end, l, c);
1687 at_beginning_of_statement = true;
1688 break;
1689
1690 case endif_kw:
1691 yylval.tok_val = new token (token::if_end, l, c);
1692 at_beginning_of_statement = true;
1693 break;
1694
1695 case endparfor_kw:
1696 yylval.tok_val = new token (token::parfor_end, l, c);
1697 at_beginning_of_statement = true;
1698 break;
1699
1700 case endswitch_kw:
1701 yylval.tok_val = new token (token::switch_end, l, c);
1702 at_beginning_of_statement = true;
1703 break;
1704
1705 case endwhile_kw:
1706 yylval.tok_val = new token (token::while_end, l, c);
1707 at_beginning_of_statement = true;
1708 break;
1709
1710 case endclassdef_kw:
1711 yylval.tok_val = new token (token::classdef_end, l, c);
1712 at_beginning_of_statement = true;
1713 break;
1714
1715 case endenumeration_kw:
1716 yylval.tok_val = new token (token::enumeration_end, l, c);
1717 at_beginning_of_statement = true;
1718 break;
1719
1720 case endevents_kw:
1721 yylval.tok_val = new token (token::events_end, l, c);
1722 at_beginning_of_statement = true;
1723 break;
1724
1725 case endmethods_kw:
1726 yylval.tok_val = new token (token::methods_end, l, c);
1727 at_beginning_of_statement = true;
1728 break;
1729
1730 case endproperties_kw:
1731 yylval.tok_val = new token (token::properties_end, l, c);
1732 at_beginning_of_statement = true;
1733 break;
1734
1735
1736 case for_kw:
1737 case parfor_kw:
1738 case while_kw:
1739 promptflag--;
1740 looping++;
1741 break;
1742
1743 case do_kw:
1744 at_beginning_of_statement = true;
1745 promptflag--;
1746 looping++;
1747 break;
1748
1749 case try_kw:
1750 case unwind_protect_kw:
1751 at_beginning_of_statement = true;
1752 promptflag--;
1753 break;
1754
1755 case if_kw:
1756 case switch_kw:
1757 promptflag--;
1758 break;
1759
1760 case get_kw:
1761 case set_kw:
1762 // 'get' and 'set' are keywords in classdef method
1763 // declarations.
1764 if (! maybe_classdef_get_set_method)
1765 return 0;
1766 break;
1767
1768 case enumeration_kw:
1769 case events_kw:
1770 case methods_kw:
1771 case properties_kw:
1772 // 'properties', 'methods' and 'events' are keywords for
1773 // classdef blocks.
1774 if (! parsing_classdef)
1775 return 0;
1776 // fall through ...
1777
1778 case classdef_kw:
1779 // 'classdef' is always a keyword.
1780 promptflag--;
1781 break;
1782
1783 case function_kw:
1784 promptflag--;
1785
1786 defining_func++;
1787 parsed_function_name.push (false);
1788
1789 if (! (reading_fcn_file || reading_script_file
1790 || reading_classdef_file))
1791 input_line_number = 1;
1792 break;
1793
1794 case magic_file_kw:
1795 {
1796 if ((reading_fcn_file || reading_script_file
1797 || reading_classdef_file)
1798 && ! curr_fcn_file_full_name.empty ())
1799 yylval.tok_val = new token (curr_fcn_file_full_name, l, c);
1800 else
1801 yylval.tok_val = new token ("stdin", l, c);
1802 }
1803 break;
1804
1805 case magic_line_kw:
1806 yylval.tok_val = new token (static_cast<double> (l), "", l, c);
1807 break;
1808
1809 default:
1810 panic_impossible ();
1811 }
1812
1813 if (! yylval.tok_val)
1814 yylval.tok_val = new token (l, c);
1815
1816 token_stack.push (yylval.tok_val);
1817
1818 return kw->tok;
1819 }
1820
1821 return 0;
1822 }
1823
1824 bool
1825 lexical_feedback::is_variable (const std::string& name)
1826 {
1827 return (symbol_table::is_variable (name)
1828 || (pending_local_variables.find (name)
1829 != pending_local_variables.end ()));
1830 }
1831
1832 std::string
1833 lexical_feedback::grab_block_comment (stream_reader& reader, bool& eof)
1834 {
1835 std::string buf;
1836
1837 bool at_bol = true;
1838 bool look_for_marker = false;
1839
1840 bool warned_incompatible = false;
1841
1842 int c = 0;
1843
1844 while ((c = reader.getc ()) != EOF)
1845 {
1846 current_input_column++;
1847
1848 if (look_for_marker)
1849 {
1850 at_bol = false;
1851 look_for_marker = false;
1852
1853 if (c == '{' || c == '}')
1854 {
1855 std::string tmp_buf (1, static_cast<char> (c));
1856
1857 int type = c;
1858
1859 bool done = false;
1860
1861 while ((c = reader.getc ()) != EOF && ! done)
1862 {
1863 current_input_column++;
1864
1865 switch (c)
1866 {
1867 case ' ':
1868 case '\t':
1869 tmp_buf += static_cast<char> (c);
1870 break;
1871
1872 case '\n':
1873 {
1874 current_input_column = 0;
1875 at_bol = true;
1876 done = true;
1877
1878 if (type == '{')
1879 {
1880 block_comment_nesting_level++;
1881 promptflag--;
1882 }
1883 else
1884 {
1885 block_comment_nesting_level--;
1886 promptflag++;
1887
1888 if (block_comment_nesting_level == 0)
1889 {
1890 buf += grab_comment_block (reader, true, eof);
1891
1892 return buf;
1893 }
1894 }
1895 }
1896 break;
1897
1898 default:
1899 at_bol = false;
1900 tmp_buf += static_cast<char> (c);
1901 buf += tmp_buf;
1902 done = true;
1903 break;
1904 }
1905 }
1906 }
1907 }
1908
1909 if (at_bol && (c == '%' || c == '#'))
1910 {
1911 if (c == '#' && ! warned_incompatible)
1912 {
1913 warned_incompatible = true;
1914 maybe_gripe_matlab_incompatible_comment (c);
1915 }
1916
1917 at_bol = false;
1918 look_for_marker = true;
1919 }
1920 else
1921 {
1922 buf += static_cast<char> (c);
1923
1924 if (c == '\n')
1925 {
1926 current_input_column = 0;
1927 at_bol = true;
1928 }
1929 }
1930 }
1931
1932 if (c == EOF)
1933 eof = true;
1934
1935 return buf;
1936 }
1937
1938 std::string
1939 lexical_feedback::grab_comment_block (stream_reader& reader, bool at_bol,
1940 bool& eof)
1941 {
1942 std::string buf;
1943
1944 // TRUE means we are at the beginning of a comment block.
1945 bool begin_comment = false;
1946
1947 // TRUE means we are currently reading a comment block.
1948 bool in_comment = false;
1949
1950 bool warned_incompatible = false;
1951
1952 int c = 0;
1953
1954 while ((c = reader.getc ()) != EOF)
1955 {
1956 current_input_column++;
1957
1958 if (begin_comment)
1959 {
1960 if (c == '%' || c == '#')
1961 {
1962 at_bol = false;
1963 continue;
1964 }
1965 else if (at_bol && c == '{')
1966 {
1967 std::string tmp_buf (1, static_cast<char> (c));
1968
1969 bool done = false;
1970
1971 while ((c = reader.getc ()) != EOF && ! done)
1972 {
1973 current_input_column++;
1974
1975 switch (c)
1976 {
1977 case ' ':
1978 case '\t':
1979 tmp_buf += static_cast<char> (c);
1980 break;
1981
1982 case '\n':
1983 {
1984 current_input_column = 0;
1985 at_bol = true;
1986 done = true;
1987
1988 block_comment_nesting_level++;
1989 promptflag--;
1990
1991 buf += grab_block_comment (reader, eof);
1992
1993 in_comment = false;
1994
1995 if (eof)
1996 goto done;
1997 }
1998 break;
1999
2000 default:
2001 at_bol = false;
2002 tmp_buf += static_cast<char> (c);
2003 buf += tmp_buf;
2004 done = true;
2005 break;
2006 }
2007 }
2008 }
2009 else
2010 {
2011 at_bol = false;
2012 begin_comment = false;
2013 }
2014 }
2015
2016 if (in_comment)
2017 {
2018 buf += static_cast<char> (c);
2019
2020 if (c == '\n')
2021 {
2022 at_bol = true;
2023 current_input_column = 0;
2024 in_comment = false;
2025
2026 // FIXME -- bailing out here prevents things like
2027 //
2028 // octave> # comment
2029 // octave> x = 1
2030 //
2031 // from failing at the command line, while still
2032 // allowing blocks of comments to be grabbed properly
2033 // for function doc strings. But only the first line of
2034 // a mult-line doc string will be picked up for
2035 // functions defined on the command line. We need a
2036 // better way of collecting these comments...
2037 if (! (reading_fcn_file || reading_script_file))
2038 goto done;
2039 }
2040 }
2041 else
2042 {
2043 switch (c)
2044 {
2045 case ' ':
2046 case '\t':
2047 break;
2048
2049 case '#':
2050 if (! warned_incompatible)
2051 {
2052 warned_incompatible = true;
2053 maybe_gripe_matlab_incompatible_comment (c);
2054 }
2055 // fall through...
2056
2057 case '%':
2058 in_comment = true;
2059 begin_comment = true;
2060 break;
2061
2062 default:
2063 current_input_column--;
2064 reader.ungetc (c);
2065 goto done;
2066 }
2067 }
2068 }
2069
2070 done:
2071
2072 if (c == EOF)
2073 eof = true;
2074
2075 return buf;
2076 }
2077
2078 int
2079 lexical_feedback::process_comment (bool start_in_block, bool& eof)
2080 {
2081 eof = false;
2082
2083 std::string help_txt;
2084
2085 if (! help_buf.empty ())
2086 help_txt = help_buf.top ();
2087
2088 flex_stream_reader flex_reader (this, yytext);
2089
2090 // process_comment is only supposed to be called when we are not
2091 // initially looking at a block comment.
2092
2093 std::string txt = start_in_block
2094 ? grab_block_comment (flex_reader, eof)
2095 : grab_comment_block (flex_reader, false, eof);
2096
2097 if (lexer_debug_flag)
2098 std::cerr << "C: " << txt << std::endl;
2099
2100 if (help_txt.empty () && nesting_level.none ())
2101 {
2102 if (! help_buf.empty ())
2103 help_buf.pop ();
2104
2105 help_buf.push (txt);
2106 }
2107
2108 octave_comment_buffer::append (txt);
2109
2110 current_input_column = 1;
2111 quote_is_transpose = false;
2112 convert_spaces_to_comma = true;
2113 at_beginning_of_statement = true;
2114
2115 if (YY_START == COMMAND_START)
2116 BEGIN (INITIAL);
2117
2118 if (nesting_level.none ())
2119 return '\n';
2120 else if (nesting_level.is_bracket_or_brace ())
2121 return ';';
2122 else
2123 return 0;
2124 }
2125
2126 // Recognize separators. If the separator is a CRLF pair, it is
2127 // replaced by a single LF.
2128
2129 bool
2130 lexical_feedback::next_token_is_sep_op (void)
2131 {
2132 bool retval = false;
2133
2134 int c = text_yyinput ();
2135
2136 retval = match_any (c, ",;\n]");
2137
2138 xunput (c, yytext);
2139
2140 return retval;
2141 }
2142
2143 // Try to determine if the next token should be treated as a postfix
2144 // unary operator. This is ugly, but it seems to do the right thing.
2145
2146 bool
2147 lexical_feedback::next_token_is_postfix_unary_op (bool spc_prev)
2148 {
2149 bool un_op = false;
2150
2151 int c0 = text_yyinput ();
2152
2153 if (c0 == '\'' && ! spc_prev)
2154 {
2155 un_op = true;
2156 }
2157 else if (c0 == '.')
2158 {
2159 int c1 = text_yyinput ();
2160 un_op = (c1 == '\'');
2161 xunput (c1, yytext);
2162 }
2163 else if (c0 == '+')
2164 {
2165 int c1 = text_yyinput ();
2166 un_op = (c1 == '+');
2167 xunput (c1, yytext);
2168 }
2169 else if (c0 == '-')
2170 {
2171 int c1 = text_yyinput ();
2172 un_op = (c1 == '-');
2173 xunput (c1, yytext);
2174 }
2175
2176 xunput (c0, yytext);
2177
2178 return un_op;
2179 }
2180
2181 // Try to determine if the next token should be treated as a binary
2182 // operator.
2183 //
2184 // This kluge exists because whitespace is not always ignored inside
2185 // the square brackets that are used to create matrix objects (though
2186 // spacing only really matters in the cases that can be interpreted
2187 // either as binary ops or prefix unary ops: currently just +, -).
2188 //
2189 // Note that a line continuation directly following a + or - operator
2190 // (e.g., the characters '[' 'a' ' ' '+' '\' LFD 'b' ']') will be
2191 // parsed as a binary operator.
2192
2193 bool
2194 lexical_feedback::next_token_is_bin_op (bool spc_prev)
2195 {
2196 bool bin_op = false;
2197
2198 int c0 = text_yyinput ();
2199
2200 switch (c0)
2201 {
2202 case '+':
2203 case '-':
2204 {
2205 int c1 = text_yyinput ();
2206
2207 switch (c1)
2208 {
2209 case '+':
2210 case '-':
2211 // Unary ops, spacing doesn't matter.
2212 break;
2213
2214 case '=':
2215 // Binary ops, spacing doesn't matter.
2216 bin_op = true;
2217 break;
2218
2219 default:
2220 // Could be either, spacing matters.
2221 bin_op = looks_like_bin_op (spc_prev, c1);
2222 break;
2223 }
2224
2225 xunput (c1, yytext);
2226 }
2227 break;
2228
2229 case ':':
2230 case '/':
2231 case '\\':
2232 case '^':
2233 // Always a binary op (may also include /=, \=, and ^=).
2234 bin_op = true;
2235 break;
2236
2237 // .+ .- ./ .\ .^ .* .**
2238 case '.':
2239 {
2240 int c1 = text_yyinput ();
2241
2242 if (match_any (c1, "+-/\\^*"))
2243 // Always a binary op (may also include .+=, .-=, ./=, ...).
2244 bin_op = true;
2245 else if (! isdigit (c1) && c1 != ' ' && c1 != '\t' && c1 != '.')
2246 // A structure element reference is a binary op.
2247 bin_op = true;
2248
2249 xunput (c1, yytext);
2250 }
2251 break;
2252
2253 // = == & && | || * **
2254 case '=':
2255 case '&':
2256 case '|':
2257 case '*':
2258 // Always a binary op (may also include ==, &&, ||, **).
2259 bin_op = true;
2260 break;
2261
2262 // < <= <> > >=
2263 case '<':
2264 case '>':
2265 // Always a binary op (may also include <=, <>, >=).
2266 bin_op = true;
2267 break;
2268
2269 // ~= !=
2270 case '~':
2271 case '!':
2272 {
2273 int c1 = text_yyinput ();
2274
2275 // ~ and ! can be unary ops, so require following =.
2276 if (c1 == '=')
2277 bin_op = true;
2278
2279 xunput (c1, yytext);
2280 }
2281 break;
2282
2283 default:
2284 break;
2285 }
2286
2287 xunput (c0, yytext);
2288
2289 return bin_op;
2290 }
2291
2292 // FIXME -- we need to handle block comments here.
2293
2294 void
2295 lexical_feedback::scan_for_comments (const char *text)
2296 {
2297 std::string comment_buf;
2298
2299 bool in_comment = false;
2300 bool beginning_of_comment = false;
2301
2302 int len = strlen (text);
2303 int i = 0;
2304
2305 while (i < len)
2306 {
2307 char c = text[i++];
2308
2309 switch (c)
2310 {
2311 case '%':
2312 case '#':
2313 if (in_comment)
2314 {
2315 if (! beginning_of_comment)
2316 comment_buf += static_cast<char> (c);
2317 }
2318 else
2319 {
2320 maybe_gripe_matlab_incompatible_comment (c);
2321 in_comment = true;
2322 beginning_of_comment = true;
2323 }
2324 break;
2325
2326 case '\n':
2327 if (in_comment)
2328 {
2329 comment_buf += static_cast<char> (c);
2330 octave_comment_buffer::append (comment_buf);
2331 comment_buf.resize (0);
2332 in_comment = false;
2333 beginning_of_comment = false;
2334 }
2335 break;
2336
2337 default:
2338 if (in_comment)
2339 {
2340 comment_buf += static_cast<char> (c);
2341 beginning_of_comment = false;
2342 }
2343 break;
2344 }
2345 }
2346
2347 if (! comment_buf.empty ())
2348 octave_comment_buffer::append (comment_buf);
2349 }
2350
2351 // Discard whitespace, including comments and continuations.
2352
2353 // FIXME -- we need to handle block comments here.
2354
2355 int
2356 lexical_feedback::eat_whitespace (void)
2357 {
2358 int retval = lexical_feedback::NO_WHITESPACE;
2359
2360 std::string comment_buf;
2361
2362 bool in_comment = false;
2363 bool beginning_of_comment = false;
2364
2365 int c = 0;
2366
2367 while ((c = text_yyinput ()) != EOF)
2368 {
2369 current_input_column++;
2370
2371 switch (c)
2372 {
2373 case ' ':
2374 case '\t':
2375 if (in_comment)
2376 {
2377 comment_buf += static_cast<char> (c);
2378 beginning_of_comment = false;
2379 }
2380 retval |= lexical_feedback::SPACE_OR_TAB;
2381 break;
2382
2383 case '\n':
2384 retval |= lexical_feedback::NEWLINE;
2385 if (in_comment)
2386 {
2387 comment_buf += static_cast<char> (c);
2388 octave_comment_buffer::append (comment_buf);
2389 comment_buf.resize (0);
2390 in_comment = false;
2391 beginning_of_comment = false;
2392 }
2393 current_input_column = 0;
2394 break;
2395
2396 case '#':
2397 case '%':
2398 if (in_comment)
2399 {
2400 if (! beginning_of_comment)
2401 comment_buf += static_cast<char> (c);
2402 }
2403 else
2404 {
2405 maybe_gripe_matlab_incompatible_comment (c);
2406 in_comment = true;
2407 beginning_of_comment = true;
2408 }
2409 break;
2410
2411 case '.':
2412 if (in_comment)
2413 {
2414 comment_buf += static_cast<char> (c);
2415 beginning_of_comment = false;
2416 break;
2417 }
2418 else
2419 {
2420 if (have_ellipsis_continuation ())
2421 break;
2422 else
2423 goto done;
2424 }
2425
2426 case '\\':
2427 if (in_comment)
2428 {
2429 comment_buf += static_cast<char> (c);
2430 beginning_of_comment = false;
2431 break;
2432 }
2433 else
2434 {
2435 if (have_continuation ())
2436 break;
2437 else
2438 goto done;
2439 }
2440
2441 default:
2442 if (in_comment)
2443 {
2444 comment_buf += static_cast<char> (c);
2445 beginning_of_comment = false;
2446 break;
2447 }
2448 else
2449 goto done;
2450 }
2451 }
2452
2453 if (! comment_buf.empty ())
2454 octave_comment_buffer::append (comment_buf);
2455
2456 done:
2457 xunput (c, yytext);
2458 current_input_column--;
2459 return retval;
2460 }
2461
2462 static inline bool
2463 looks_like_hex (const char *s, int len)
2464 {
2465 return (len > 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X'));
2466 }
2467
2468 void
2469 lexical_feedback::handle_number (void)
2470 {
2471 double value = 0.0;
2472 int nread = 0;
2473
2474 if (looks_like_hex (yytext, strlen (yytext)))
2475 {
2476 unsigned long ival;
2477
2478 nread = sscanf (yytext, "%lx", &ival);
2479
2480 value = static_cast<double> (ival);
2481 }
2482 else
2483 {
2484 char *tmp = strsave (yytext);
2485
2486 char *idx = strpbrk (tmp, "Dd");
2487
2488 if (idx)
2489 *idx = 'e';
2490
2491 nread = sscanf (tmp, "%lf", &value);
2492
2493 delete [] tmp;
2494 }
2495
2496 // If yytext doesn't contain a valid number, we are in deep doo doo.
2497
2498 assert (nread == 1);
2499
2500 quote_is_transpose = true;
2501 convert_spaces_to_comma = true;
2502 looking_for_object_index = false;
2503 at_beginning_of_statement = false;
2504
2505 yylval.tok_val = new token (value, yytext, input_line_number,
2506 current_input_column);
2507
2508 token_stack.push (yylval.tok_val);
2509
2510 current_input_column += yyleng;
2511
2512 do_comma_insert_check ();
2513 }
2514
2515 // We have seen a backslash and need to find out if it should be
2516 // treated as a continuation character. If so, this eats it, up to
2517 // and including the new line character.
2518 //
2519 // Match whitespace only, followed by a comment character or newline.
2520 // Once a comment character is found, discard all input until newline.
2521 // If non-whitespace characters are found before comment
2522 // characters, return 0. Otherwise, return 1.
2523
2524 // FIXME -- we need to handle block comments here.
2525
2526 bool
2527 lexical_feedback::have_continuation (bool trailing_comments_ok)
2528 {
2529 std::ostringstream buf;
2530
2531 std::string comment_buf;
2532
2533 bool in_comment = false;
2534 bool beginning_of_comment = false;
2535
2536 int c = 0;
2537
2538 while ((c = text_yyinput ()) != EOF)
2539 {
2540 buf << static_cast<char> (c);
2541
2542 switch (c)
2543 {
2544 case ' ':
2545 case '\t':
2546 if (in_comment)
2547 {
2548 comment_buf += static_cast<char> (c);
2549 beginning_of_comment = false;
2550 }
2551 break;
2552
2553 case '%':
2554 case '#':
2555 if (trailing_comments_ok)
2556 {
2557 if (in_comment)
2558 {
2559 if (! beginning_of_comment)
2560 comment_buf += static_cast<char> (c);
2561 }
2562 else
2563 {
2564 maybe_gripe_matlab_incompatible_comment (c);
2565 in_comment = true;
2566 beginning_of_comment = true;
2567 }
2568 }
2569 else
2570 goto cleanup;
2571 break;
2572
2573 case '\n':
2574 if (in_comment)
2575 {
2576 comment_buf += static_cast<char> (c);
2577 octave_comment_buffer::append (comment_buf);
2578 }
2579 current_input_column = 0;
2580 promptflag--;
2581 gripe_matlab_incompatible_continuation ();
2582 return true;
2583
2584 default:
2585 if (in_comment)
2586 {
2587 comment_buf += static_cast<char> (c);
2588 beginning_of_comment = false;
2589 }
2590 else
2591 goto cleanup;
2592 break;
2593 }
2594 }
2595
2596 xunput (c, yytext);
2597 return false;
2598
2599 cleanup:
2600
2601 std::string s = buf.str ();
2602
2603 int len = s.length ();
2604 while (len--)
2605 xunput (s[len], yytext);
2606
2607 return false;
2608 }
2609
2610 // We have seen a '.' and need to see if it is the start of a
2611 // continuation. If so, this eats it, up to and including the new
2612 // line character.
2613
2614 bool
2615 lexical_feedback::have_ellipsis_continuation (bool trailing_comments_ok)
2616 {
2617 char c1 = text_yyinput ();
2618 if (c1 == '.')
2619 {
2620 char c2 = text_yyinput ();
2621 if (c2 == '.' && have_continuation (trailing_comments_ok))
2622 return true;
2623 else
2624 {
2625 xunput (c2, yytext);
2626 xunput (c1, yytext);
2627 }
2628 }
2629 else
2630 xunput (c1, yytext);
2631
2632 return false;
2633 }
2634
2635 // See if we have a continuation line. If so, eat it and the leading
2636 // whitespace on the next line.
2637
2638 int
2639 lexical_feedback::eat_continuation (void)
2640 {
2641 int retval = lexical_feedback::NO_WHITESPACE;
2642
2643 int c = text_yyinput ();
2644
2645 if ((c == '.' && have_ellipsis_continuation ())
2646 || (c == '\\' && have_continuation ()))
2647 retval = eat_whitespace ();
2648 else
2649 xunput (c, yytext);
2650
2651 return retval;
2652 }
2653
2654 int
2655 lexical_feedback::handle_string (char delim)
2656 {
2657 std::ostringstream buf;
2658
2659 int bos_line = input_line_number;
2660 int bos_col = current_input_column;
2661
2662 int c;
2663 int escape_pending = 0;
2664
2665 while ((c = text_yyinput ()) != EOF)
2666 {
2667 current_input_column++;
2668
2669 if (c == '\\')
2670 {
2671 if (delim == '\'' || escape_pending)
2672 {
2673 buf << static_cast<char> (c);
2674 escape_pending = 0;
2675 }
2676 else
2677 {
2678 if (have_continuation (false))
2679 escape_pending = 0;
2680 else
2681 {
2682 buf << static_cast<char> (c);
2683 escape_pending = 1;
2684 }
2685 }
2686 continue;
2687 }
2688 else if (c == '.')
2689 {
2690 if (delim == '\'' || ! have_ellipsis_continuation (false))
2691 buf << static_cast<char> (c);
2692 }
2693 else if (c == '\n')
2694 {
2695 error ("unterminated string constant");
2696 break;
2697 }
2698 else if (c == delim)
2699 {
2700 if (escape_pending)
2701 buf << static_cast<char> (c);
2702 else
2703 {
2704 c = text_yyinput ();
2705 if (c == delim)
2706 {
2707 buf << static_cast<char> (c);
2708 }
2709 else
2710 {
2711 std::string s;
2712 xunput (c, yytext);
2713
2714 if (delim == '\'')
2715 s = buf.str ();
2716 else
2717 s = do_string_escapes (buf.str ());
2718
2719 quote_is_transpose = true;
2720 convert_spaces_to_comma = true;
2721
2722 yylval.tok_val = new token (s, bos_line, bos_col);
2723 token_stack.push (yylval.tok_val);
2724
2725 if (delim == '"')
2726 gripe_matlab_incompatible ("\" used as string delimiter");
2727 else if (delim == '\'')
2728 gripe_single_quote_string ();
2729
2730 looking_for_object_index = true;
2731 at_beginning_of_statement = false;
2732
2733 return delim == '"' ? DQ_STRING : SQ_STRING;
2734 }
2735 }
2736 }
2737 else
2738 {
2739 buf << static_cast<char> (c);
2740 }
2741
2742 escape_pending = 0;
2743 }
2744
2745 return LEXICAL_ERROR;
2746 }
2747
2748 bool
2749 lexical_feedback::next_token_is_assign_op (void)
2750 {
2751 bool retval = false;
2752
2753 int c0 = text_yyinput ();
2754
2755 switch (c0)
2756 {
2757 case '=':
2758 {
2759 int c1 = text_yyinput ();
2760 xunput (c1, yytext);
2761 if (c1 != '=')
2762 retval = true;
2763 }
2764 break;
2765
2766 case '+':
2767 case '-':
2768 case '*':
2769 case '/':
2770 case '\\':
2771 case '&':
2772 case '|':
2773 {
2774 int c1 = text_yyinput ();
2775 xunput (c1, yytext);
2776 if (c1 == '=')
2777 retval = true;
2778 }
2779 break;
2780
2781 case '.':
2782 {
2783 int c1 = text_yyinput ();
2784 if (match_any (c1, "+-*/\\"))
2785 {
2786 int c2 = text_yyinput ();
2787 xunput (c2, yytext);
2788 if (c2 == '=')
2789 retval = true;
2790 }
2791 xunput (c1, yytext);
2792 }
2793 break;
2794
2795 case '>':
2796 {
2797 int c1 = text_yyinput ();
2798 if (c1 == '>')
2799 {
2800 int c2 = text_yyinput ();
2801 xunput (c2, yytext);
2802 if (c2 == '=')
2803 retval = true;
2804 }
2805 xunput (c1, yytext);
2806 }
2807 break;
2808
2809 case '<':
2810 {
2811 int c1 = text_yyinput ();
2812 if (c1 == '<')
2813 {
2814 int c2 = text_yyinput ();
2815 xunput (c2, yytext);
2816 if (c2 == '=')
2817 retval = true;
2818 }
2819 xunput (c1, yytext);
2820 }
2821 break;
2822
2823 default:
2824 break;
2825 }
2826
2827 xunput (c0, yytext);
2828
2829 return retval;
2830 }
2831
2832 bool
2833 lexical_feedback::next_token_is_index_op (void)
2834 {
2835 int c = text_yyinput ();
2836 xunput (c, yytext);
2837 return c == '(' || c == '{';
2838 }
2839
2840 int
2841 lexical_feedback::handle_close_bracket (bool spc_gobbled, int bracket_type)
2842 {
2843 int retval = bracket_type;
2844
2845 if (! nesting_level.none ())
2846 {
2847 nesting_level.remove ();
2848
2849 if (bracket_type == ']')
2850 bracketflag--;
2851 else if (bracket_type == '}')
2852 braceflag--;
2853 else
2854 panic_impossible ();
2855 }
2856
2857 if (bracketflag == 0 && braceflag == 0)
2858 BEGIN (INITIAL);
2859
2860 if (bracket_type == ']'
2861 && next_token_is_assign_op ()
2862 && ! looking_at_return_list)
2863 {
2864 retval = CLOSE_BRACE;
2865 }
2866 else if ((bracketflag || braceflag)
2867 && convert_spaces_to_comma
2868 && (nesting_level.is_bracket ()
2869 || (nesting_level.is_brace ()
2870 && ! looking_at_object_index.front ())))
2871 {
2872 bool index_op = next_token_is_index_op ();
2873
2874 // Don't insert comma if we are looking at something like
2875 //
2876 // [x{i}{j}] or [x{i}(j)]
2877 //
2878 // but do if we are looking at
2879 //
2880 // [x{i} {j}] or [x{i} (j)]
2881
2882 if (spc_gobbled || ! (bracket_type == '}' && index_op))
2883 {
2884 bool bin_op = next_token_is_bin_op (spc_gobbled);
2885
2886 bool postfix_un_op = next_token_is_postfix_unary_op (spc_gobbled);
2887
2888 bool sep_op = next_token_is_sep_op ();
2889
2890 if (! (postfix_un_op || bin_op || sep_op))
2891 {
2892 maybe_warn_separator_insert (',');
2893
2894 xunput (',', yytext);
2895 return retval;
2896 }
2897 }
2898 }
2899
2900 quote_is_transpose = true;
2901 convert_spaces_to_comma = true;
2902
2903 return retval;
2904 }
2905
2906 void
2907 lexical_feedback::maybe_unput_comma (int spc_gobbled)
2908 {
2909 if (nesting_level.is_bracket ()
2910 || (nesting_level.is_brace ()
2911 && ! looking_at_object_index.front ()))
2912 {
2913 int bin_op = next_token_is_bin_op (spc_gobbled);
2914
2915 int postfix_un_op = next_token_is_postfix_unary_op (spc_gobbled);
2916
2917 int c1 = text_yyinput ();
2918 int c2 = text_yyinput ();
2919
2920 xunput (c2, yytext);
2921 xunput (c1, yytext);
2922
2923 int sep_op = next_token_is_sep_op ();
2924
2925 int dot_op = (c1 == '.'
2926 && (isalpha (c2) || isspace (c2) || c2 == '_'));
2927
2928 if (postfix_un_op || bin_op || sep_op || dot_op)
2929 return;
2930
2931 int index_op = (c1 == '(' || c1 == '{');
2932
2933 // If there is no space before the indexing op, we don't insert
2934 // a comma.
2935
2936 if (index_op && ! spc_gobbled)
2937 return;
2938
2939 maybe_warn_separator_insert (',');
2940
2941 xunput (',', yytext);
2942 }
2943 }
2944
2945 bool
2946 lexical_feedback::next_token_can_follow_bin_op (void)
2947 {
2948 std::stack<char> buf;
2949
2950 int c = EOF;
2951
2952 // Skip whitespace in current statement on current line
2953 while (true)
2954 {
2955 c = text_yyinput ();
2956
2957 buf.push (c);
2958
2959 if (match_any (c, ",;\n") || (c != ' ' && c != '\t'))
2960 break;
2961 }
2962
2963 // Restore input.
2964 while (! buf.empty ())
2965 {
2966 xunput (buf.top (), yytext);
2967
2968 buf.pop ();
2969 }
2970
2971 return (isalnum (c) || match_any (c, "!\"'(-[_{~"));
2972 }
2973
2974 static bool
2975 can_be_command (const std::string& tok)
2976 {
2977 // Don't allow these names to be treated as commands to avoid
2978 // surprises when parsing things like "NaN ^2".
2979
2980 return ! (tok == "e"
2981 || tok == "I" || tok == "i"
2982 || tok == "J" || tok == "j"
2983 || tok == "Inf" || tok == "inf"
2984 || tok == "NaN" || tok == "nan");
2985 }
2986
2987 bool
2988 lexical_feedback::looks_like_command_arg (void)
2989 {
2990 bool retval = true;
2991
2992 int c0 = text_yyinput ();
2993
2994 switch (c0)
2995 {
2996 // = ==
2997 case '=':
2998 {
2999 int c1 = text_yyinput ();
3000
3001 if (c1 == '=')
3002 {
3003 int c2 = text_yyinput ();
3004
3005 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t')
3006 && next_token_can_follow_bin_op ())
3007 retval = false;
3008
3009 xunput (c2, yytext);
3010 }
3011 else
3012 retval = false;
3013
3014 xunput (c1, yytext);
3015 }
3016 break;
3017
3018 case '(':
3019 case '{':
3020 // Indexing.
3021 retval = false;
3022 break;
3023
3024 case '\n':
3025 // EOL.
3026 break;
3027
3028 case '\'':
3029 case '"':
3030 // Beginning of a character string.
3031 break;
3032
3033 // + - ++ -- += -=
3034 case '+':
3035 case '-':
3036 {
3037 int c1 = text_yyinput ();
3038
3039 switch (c1)
3040 {
3041 case '\n':
3042 // EOL.
3043 case '+':
3044 case '-':
3045 // Unary ops, spacing doesn't matter.
3046 break;
3047
3048 case '\t':
3049 case ' ':
3050 {
3051 if (next_token_can_follow_bin_op ())
3052 retval = false;
3053 }
3054 break;
3055
3056 case '=':
3057 {
3058 int c2 = text_yyinput ();
3059
3060 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t')
3061 && next_token_can_follow_bin_op ())
3062 retval = false;
3063
3064 xunput (c2, yytext);
3065 }
3066 break;
3067 }
3068
3069 xunput (c1, yytext);
3070 }
3071 break;
3072
3073 case ':':
3074 case '/':
3075 case '\\':
3076 case '^':
3077 {
3078 int c1 = text_yyinput ();
3079
3080 if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t')
3081 && next_token_can_follow_bin_op ())
3082 retval = false;
3083
3084 xunput (c1, yytext);
3085 }
3086 break;
3087
3088 // .+ .- ./ .\ .^ .* .**
3089 case '.':
3090 {
3091 int c1 = text_yyinput ();
3092
3093 if (match_any (c1, "+-/\\^*"))
3094 {
3095 int c2 = text_yyinput ();
3096
3097 if (c2 == '=')
3098 {
3099 int c3 = text_yyinput ();
3100
3101 if (! match_any (c3, ",;\n") && (c3 == ' ' || c3 == '\t')
3102 && next_token_can_follow_bin_op ())
3103 retval = false;
3104
3105 xunput (c3, yytext);
3106 }
3107 else if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t')
3108 && next_token_can_follow_bin_op ())
3109 retval = false;
3110
3111 xunput (c2, yytext);
3112 }
3113 else if (! match_any (c1, ",;\n")
3114 && (! isdigit (c1) && c1 != ' ' && c1 != '\t'
3115 && c1 != '.'))
3116 {
3117 // Structure reference. FIXME -- is this a complete check?
3118
3119 retval = false;
3120 }
3121
3122 xunput (c1, yytext);
3123 }
3124 break;
3125
3126 // & && | || * **
3127 case '&':
3128 case '|':
3129 case '*':
3130 {
3131 int c1 = text_yyinput ();
3132
3133 if (c1 == c0)
3134 {
3135 int c2 = text_yyinput ();
3136
3137 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t')
3138 && next_token_can_follow_bin_op ())
3139 retval = false;
3140
3141 xunput (c2, yytext);
3142 }
3143 else if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t')
3144 && next_token_can_follow_bin_op ())
3145 retval = false;
3146
3147 xunput (c1, yytext);
3148 }
3149 break;
3150
3151 // < <= > >=
3152 case '<':
3153 case '>':
3154 {
3155 int c1 = text_yyinput ();
3156
3157 if (c1 == '=')
3158 {
3159 int c2 = text_yyinput ();
3160
3161 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t')
3162 && next_token_can_follow_bin_op ())
3163 retval = false;
3164
3165 xunput (c2, yytext);
3166 }
3167 else if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t')
3168 && next_token_can_follow_bin_op ())
3169 retval = false;
3170
3171 xunput (c1, yytext);
3172 }
3173 break;
3174
3175 // ~= !=
3176 case '~':
3177 case '!':
3178 {
3179 int c1 = text_yyinput ();
3180
3181 // ~ and ! can be unary ops, so require following =.
3182 if (c1 == '=')
3183 {
3184 int c2 = text_yyinput ();
3185
3186 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t')
3187 && next_token_can_follow_bin_op ())
3188 retval = false;
3189
3190 xunput (c2, yytext);
3191 }
3192 else if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t')
3193 && next_token_can_follow_bin_op ())
3194 retval = false;
3195
3196 xunput (c1, yytext);
3197 }
3198 break;
3199
3200 default:
3201 break;
3202 }
3203
3204 xunput (c0, yytext);
3205
3206 return retval;
3207 }
3208
3209 int
3210 lexical_feedback::handle_superclass_identifier (void)
3211 {
3212 eat_continuation ();
3213
3214 std::string pkg;
3215 std::string meth = strip_trailing_whitespace (yytext);
3216 size_t pos = meth.find ("@");
3217 std::string cls = meth.substr (pos).substr (1);
3218 meth = meth.substr (0, pos - 1);
3219
3220 pos = cls.find (".");
3221 if (pos != std::string::npos)
3222 {
3223 pkg = cls.substr (pos).substr (1);
3224 cls = cls.substr (0, pos - 1);
3225 }
3226
3227 int kw_token = (is_keyword_token (meth) || is_keyword_token (cls)
3228 || is_keyword_token (pkg));
3229 if (kw_token)
3230 {
3231 error ("method, class and package names may not be keywords");
3232 return LEXICAL_ERROR;
3233 }
3234
3235 yylval.tok_val
3236 = new token (meth.empty () ? 0 : &(symbol_table::insert (meth)),
3237 cls.empty () ? 0 : &(symbol_table::insert (cls)),
3238 pkg.empty () ? 0 : &(symbol_table::insert (pkg)),
3239 input_line_number,
3240 current_input_column);
3241 token_stack.push (yylval.tok_val);
3242
3243 convert_spaces_to_comma = true;
3244 current_input_column += yyleng;
3245
3246 return SUPERCLASSREF;
3247 }
3248
3249 int
3250 lexical_feedback::handle_meta_identifier (void)
3251 {
3252 eat_continuation ();
3253
3254 std::string pkg;
3255 std::string cls = strip_trailing_whitespace (yytext).substr (1);
3256 size_t pos = cls.find (".");
3257
3258 if (pos != std::string::npos)
3259 {
3260 pkg = cls.substr (pos).substr (1);
3261 cls = cls.substr (0, pos - 1);
3262 }
3263
3264 int kw_token = is_keyword_token (cls) || is_keyword_token (pkg);
3265 if (kw_token)
3266 {
3267 error ("class and package names may not be keywords");
3268 return LEXICAL_ERROR;
3269 }
3270
3271 yylval.tok_val
3272 = new token (cls.empty () ? 0 : &(symbol_table::insert (cls)),
3273 pkg.empty () ? 0 : &(symbol_table::insert (pkg)),
3274 input_line_number,
3275 current_input_column);
3276
3277 token_stack.push (yylval.tok_val);
3278
3279 convert_spaces_to_comma = true;
3280 current_input_column += yyleng;
3281
3282 return METAQUERY;
3283 }
3284
3285 // Figure out exactly what kind of token to return when we have seen
3286 // an identifier. Handles keywords. Return -1 if the identifier
3287 // should be ignored.
3288
3289 int
3290 lexical_feedback::handle_identifier (void)
3291 {
3292 bool at_bos = at_beginning_of_statement;
3293
3294 std::string tok = strip_trailing_whitespace (yytext);
3295
3296 int c = yytext[yyleng-1];
3297
3298 bool cont_is_spc = (eat_continuation () != lexical_feedback::NO_WHITESPACE);
3299
3300 int spc_gobbled = (cont_is_spc || c == ' ' || c == '\t');
3301
3302 // If we are expecting a structure element, avoid recognizing
3303 // keywords and other special names and return STRUCT_ELT, which is
3304 // a string that is also a valid identifier. But first, we have to
3305 // decide whether to insert a comma.
3306
3307 if (looking_at_indirect_ref)
3308 {
3309 do_comma_insert_check ();
3310
3311 maybe_unput_comma (spc_gobbled);
3312
3313 yylval.tok_val = new token (tok, input_line_number,
3314 current_input_column);
3315
3316 token_stack.push (yylval.tok_val);
3317
3318 quote_is_transpose = true;
3319 convert_spaces_to_comma = true;
3320 looking_for_object_index = true;
3321
3322 current_input_column += yyleng;
3323
3324 return STRUCT_ELT;
3325 }
3326
3327 at_beginning_of_statement = false;
3328
3329 // The is_keyword_token may reset
3330 // at_beginning_of_statement. For example, if it sees
3331 // an else token, then the next token is at the beginning of a
3332 // statement.
3333
3334 int kw_token = is_keyword_token (tok);
3335
3336 // If we found a keyword token, then the beginning_of_statement flag
3337 // is already set. Otherwise, we won't be at the beginning of a
3338 // statement.
3339
3340 if (looking_at_function_handle)
3341 {
3342 if (kw_token)
3343 {
3344 error ("function handles may not refer to keywords");
3345
3346 return LEXICAL_ERROR;
3347 }
3348 else
3349 {
3350 yylval.tok_val = new token (tok, input_line_number,
3351 current_input_column);
3352
3353 token_stack.push (yylval.tok_val);
3354
3355 current_input_column += yyleng;
3356 quote_is_transpose = false;
3357 convert_spaces_to_comma = true;
3358 looking_for_object_index = true;
3359
3360 return FCN_HANDLE;
3361 }
3362 }
3363
3364 // If we have a regular keyword, return it.
3365 // Keywords can be followed by identifiers.
3366
3367 if (kw_token)
3368 {
3369 if (kw_token >= 0)
3370 {
3371 current_input_column += yyleng;
3372 quote_is_transpose = false;
3373 convert_spaces_to_comma = true;
3374 looking_for_object_index = false;
3375 }
3376
3377 return kw_token;
3378 }
3379
3380 // See if we have a plot keyword (title, using, with, or clear).
3381
3382 int c1 = text_yyinput ();
3383
3384 bool next_tok_is_eq = false;
3385 if (c1 == '=')
3386 {
3387 int c2 = text_yyinput ();
3388 xunput (c2, yytext);
3389
3390 if (c2 != '=')
3391 next_tok_is_eq = true;
3392 }
3393
3394 xunput (c1, yytext);
3395
3396 // Kluge alert.
3397 //
3398 // If we are looking at a text style function, set up to gobble its
3399 // arguments.
3400 //
3401 // If the following token is '=', or if we are parsing a function
3402 // return list or function parameter list, or if we are looking at
3403 // something like [ab,cd] = foo (), force the symbol to be inserted
3404 // as a variable in the current symbol table.
3405
3406 if (! is_variable (tok))
3407 {
3408 if (at_bos && spc_gobbled && can_be_command (tok)
3409 && looks_like_command_arg ())
3410 {
3411 BEGIN (COMMAND_START);
3412 }
3413 else if (next_tok_is_eq
3414 || looking_at_decl_list
3415 || looking_at_return_list
3416 || (looking_at_parameter_list
3417 && ! looking_at_initializer_expression))
3418 {
3419 symbol_table::force_variable (tok);
3420 }
3421 else if (looking_at_matrix_or_assign_lhs)
3422 {
3423 pending_local_variables.insert (tok);
3424 }
3425 }
3426
3427 // Find the token in the symbol table. Beware the magic
3428 // transformation of the end keyword...
3429
3430 if (tok == "end")
3431 tok = "__end__";
3432
3433 yylval.tok_val = new token (&(symbol_table::insert (tok)),
3434 input_line_number,
3435 current_input_column);
3436
3437 token_stack.push (yylval.tok_val);
3438
3439 // After seeing an identifer, it is ok to convert spaces to a comma
3440 // (if needed).
3441
3442 convert_spaces_to_comma = true;
3443
3444 if (! (next_tok_is_eq || YY_START == COMMAND_START))
3445 {
3446 quote_is_transpose = true;
3447
3448 do_comma_insert_check ();
3449
3450 maybe_unput_comma (spc_gobbled);
3451 }
3452
3453 current_input_column += yyleng;
3454
3455 if (tok != "__end__")
3456 looking_for_object_index = true;
3457
3458 return NAME;
3459 }
3460
3461 void
3462 lexical_feedback::maybe_warn_separator_insert (char sep)
3463 {
3464 std::string nm = curr_fcn_file_full_name;
3465
3466 if (nm.empty ())
3467 warning_with_id ("Octave:separator-insert",
3468 "potential auto-insertion of '%c' near line %d",
3469 sep, input_line_number);
3470 else
3471 warning_with_id ("Octave:separator-insert",
3472 "potential auto-insertion of '%c' near line %d of file %s",
3473 sep, input_line_number, nm.c_str ());
3474 }
3475
3476 void
3477 lexical_feedback::gripe_single_quote_string (void)
3478 {
3479 std::string nm = curr_fcn_file_full_name;
3480
3481 if (nm.empty ())
3482 warning_with_id ("Octave:single-quote-string",
3483 "single quote delimited string near line %d",
3484 input_line_number);
3485 else
3486 warning_with_id ("Octave:single-quote-string",
3487 "single quote delimited string near line %d of file %s",
3488 input_line_number, nm.c_str ());
3489 }
3490
3491 void
3492 lexical_feedback::gripe_matlab_incompatible (const std::string& msg)
3493 {
3494 std::string nm = curr_fcn_file_full_name;
3495
3496 if (nm.empty ())
3497 warning_with_id ("Octave:matlab-incompatible",
3498 "potential Matlab compatibility problem: %s",
3499 msg.c_str ());
3500 else
3501 warning_with_id ("Octave:matlab-incompatible",
3502 "potential Matlab compatibility problem: %s near line %d offile %s",
3503 msg.c_str (), input_line_number, nm.c_str ());
3504 }
3505
3506 void
3507 lexical_feedback::maybe_gripe_matlab_incompatible_comment (char c)
3508 {
3509 if (c == '#')
3510 gripe_matlab_incompatible ("# used as comment character");
3511 }
3512
3513 void
3514 lexical_feedback::gripe_matlab_incompatible_continuation (void)
3515 {
3516 gripe_matlab_incompatible ("\\ used as line continuation marker");
3517 }
3518
3519 void
3520 lexical_feedback::gripe_matlab_incompatible_operator (const std::string& op)
3521 {
3522 std::string t = op;
3523 int n = t.length ();
3524 if (t[n-1] == '\n')
3525 t.resize (n-1);
3526 gripe_matlab_incompatible (t + " used as operator");
1424 } 3527 }
1425 3528
1426 void 3529 void
1427 lexical_feedback::display_token (int tok) 3530 lexical_feedback::display_token (int tok)
1428 { 3531 {
1590 display_state (YY_START); 3693 display_state (YY_START);
1591 3694
1592 std::cerr << "P: " << pattern << std::endl; 3695 std::cerr << "P: " << pattern << std::endl;
1593 std::cerr << "T: " << text << std::endl; 3696 std::cerr << "T: " << text << std::endl;
1594 } 3697 }
1595
1596 DEFUN (__display_tokens__, args, nargout,
1597 "-*- texinfo -*-\n\
1598 @deftypefn {Built-in Function} {} __display_tokens__ ()\n\
1599 Query or set the internal variable that determines whether Octave's\n\
1600 lexer displays tokens as they are read.\n\
1601 @end deftypefn")
1602 {
1603 return SET_INTERNAL_VARIABLE (display_tokens);
1604 }
1605
1606 DEFUN (__token_count__, , ,
1607 "-*- texinfo -*-\n\
1608 @deftypefn {Built-in Function} {} __token_count__ ()\n\
1609 Number of language tokens processed since Octave startup.\n\
1610 @end deftypefn")
1611 {
1612 return octave_value (Vtoken_count);
1613 }
1614
1615 DEFUN (__lexer_debug_flag__, args, nargout,
1616 "-*- texinfo -*-\n\
1617 @deftypefn {Built-in Function} {@var{old_val} =} __lexer_debug_flag__ (@var{new_val}))\n\
1618 Undocumented internal function.\n\
1619 @end deftypefn")
1620 {
1621 octave_value retval;
1622
1623 retval = set_internal_variable (lexer_debug_flag, args, nargout,
1624 "__lexer_debug_flag__");
1625
1626 return retval;
1627 }
1628
1629 class
1630 flex_stream_reader : public stream_reader
1631 {
1632 public:
1633 flex_stream_reader (lexical_feedback *l, char *buf_arg)
1634 : stream_reader (), lexer (l), buf (buf_arg)
1635 { }
1636
1637 int getc (void) { return lexer->text_yyinput (); }
1638 int ungetc (int c) { lexer->xunput (c, buf); return 0; }
1639
1640 private:
1641
1642 // No copying!
1643
1644 flex_stream_reader (const flex_stream_reader&);
1645
1646 flex_stream_reader& operator = (const flex_stream_reader&);
1647
1648 lexical_feedback *lexer;
1649
1650 char *buf;
1651 };
1652
1653 lexical_feedback::~lexical_feedback (void)
1654 {
1655 // Clear out the stack of token info used to track line and
1656 // column numbers.
1657
1658 while (! token_stack.empty ())
1659 {
1660 delete token_stack.top ();
1661 token_stack.pop ();
1662 }
1663 }
1664
1665 // GAG.
1666 //
1667 // If we're reading a matrix and the next character is '[', make sure
1668 // that we insert a comma ahead of it.
1669
1670 void
1671 lexical_feedback::do_comma_insert_check (void)
1672 {
1673 bool spc_gobbled = (eat_continuation () != lexical_feedback::NO_WHITESPACE);
1674
1675 int c = text_yyinput ();
1676
1677 xunput (c, yytext);
1678
1679 if (spc_gobbled)
1680 xunput (' ', yytext);
1681
1682 do_comma_insert = (! looking_at_object_index.front ()
1683 && bracketflag && c == '[');
1684 }
1685
1686 int
1687 lexical_feedback::text_yyinput (void)
1688 {
1689 int c = yyinput ();
1690
1691 if (lexer_debug_flag)
1692 {
1693 std::cerr << "I: ";
1694 display_character (c);
1695 std::cerr << std::endl;
1696 }
1697
1698 // Convert CRLF into just LF and single CR into LF.
1699
1700 if (c == '\r')
1701 {
1702 c = yyinput ();
1703
1704 if (lexer_debug_flag)
1705 {
1706 std::cerr << "I: ";
1707 display_character (c);
1708 std::cerr << std::endl;
1709 }
1710
1711 if (c != '\n')
1712 {
1713 xunput (c, yytext);
1714 c = '\n';
1715 }
1716 }
1717
1718 if (c == '\n')
1719 input_line_number++;
1720
1721 return c;
1722 }
1723
1724 void
1725 lexical_feedback::xunput (char c, char *buf)
1726 {
1727 if (lexer_debug_flag)
1728 {
1729 std::cerr << "U: ";
1730 display_character (c);
1731 std::cerr << std::endl;
1732 }
1733
1734 if (c == '\n')
1735 input_line_number--;
1736
1737 yyunput (c, buf);
1738 }
1739
1740 // If we read some newlines, we need figure out what column we're
1741 // really looking at.
1742
1743 void
1744 lexical_feedback::fixup_column_count (char *s)
1745 {
1746 char c;
1747 while ((c = *s++) != '\0')
1748 {
1749 if (c == '\n')
1750 {
1751 input_line_number++;
1752 current_input_column = 1;
1753 }
1754 else
1755 current_input_column++;
1756 }
1757 }
1758
1759 bool
1760 lexical_feedback::inside_any_object_index (void)
1761 {
1762 bool retval = false;
1763
1764 for (std::list<bool>::const_iterator i = looking_at_object_index.begin ();
1765 i != looking_at_object_index.end (); i++)
1766 {
1767 if (*i)
1768 {
1769 retval = true;
1770 break;
1771 }
1772 }
1773
1774 return retval;
1775 }
1776
1777 // Handle keywords. Return -1 if the keyword should be ignored.
1778
1779 int
1780 lexical_feedback::is_keyword_token (const std::string& s)
1781 {
1782 int l = input_line_number;
1783 int c = current_input_column;
1784
1785 int len = s.length ();
1786
1787 const octave_kw *kw = octave_kw_hash::in_word_set (s.c_str (), len);
1788
1789 if (kw)
1790 {
1791 yylval.tok_val = 0;
1792
1793 switch (kw->kw_id)
1794 {
1795 case break_kw:
1796 case catch_kw:
1797 case continue_kw:
1798 case else_kw:
1799 case otherwise_kw:
1800 case return_kw:
1801 case unwind_protect_cleanup_kw:
1802 at_beginning_of_statement = true;
1803 break;
1804
1805 case static_kw:
1806 if ((reading_fcn_file || reading_script_file
1807 || reading_classdef_file)
1808 && ! curr_fcn_file_full_name.empty ())
1809 warning_with_id ("Octave:deprecated-keyword",
1810 "the 'static' keyword is obsolete and will be removed from a future version of Octave; please use 'persistent' instead; near line %d of file '%s'",
1811 input_line_number,
1812 curr_fcn_file_full_name.c_str ());
1813 else
1814 warning_with_id ("Octave:deprecated-keyword",
1815 "the 'static' keyword is obsolete and will be removed from a future version of Octave; please use 'persistent' instead; near line %d",
1816 input_line_number);
1817 // fall through ...
1818
1819 case persistent_kw:
1820 break;
1821
1822 case case_kw:
1823 case elseif_kw:
1824 case global_kw:
1825 case until_kw:
1826 break;
1827
1828 case end_kw:
1829 if (inside_any_object_index ()
1830 || (! reading_classdef_file
1831 && (defining_func
1832 && ! (looking_at_return_list
1833 || parsed_function_name.top ()))))
1834 return 0;
1835
1836 yylval.tok_val = new token (token::simple_end, l, c);
1837 at_beginning_of_statement = true;
1838 break;
1839
1840 case end_try_catch_kw:
1841 yylval.tok_val = new token (token::try_catch_end, l, c);
1842 at_beginning_of_statement = true;
1843 break;
1844
1845 case end_unwind_protect_kw:
1846 yylval.tok_val = new token (token::unwind_protect_end, l, c);
1847 at_beginning_of_statement = true;
1848 break;
1849
1850 case endfor_kw:
1851 yylval.tok_val = new token (token::for_end, l, c);
1852 at_beginning_of_statement = true;
1853 break;
1854
1855 case endfunction_kw:
1856 yylval.tok_val = new token (token::function_end, l, c);
1857 at_beginning_of_statement = true;
1858 break;
1859
1860 case endif_kw:
1861 yylval.tok_val = new token (token::if_end, l, c);
1862 at_beginning_of_statement = true;
1863 break;
1864
1865 case endparfor_kw:
1866 yylval.tok_val = new token (token::parfor_end, l, c);
1867 at_beginning_of_statement = true;
1868 break;
1869
1870 case endswitch_kw:
1871 yylval.tok_val = new token (token::switch_end, l, c);
1872 at_beginning_of_statement = true;
1873 break;
1874
1875 case endwhile_kw:
1876 yylval.tok_val = new token (token::while_end, l, c);
1877 at_beginning_of_statement = true;
1878 break;
1879
1880 case endclassdef_kw:
1881 yylval.tok_val = new token (token::classdef_end, l, c);
1882 at_beginning_of_statement = true;
1883 break;
1884
1885 case endenumeration_kw:
1886 yylval.tok_val = new token (token::enumeration_end, l, c);
1887 at_beginning_of_statement = true;
1888 break;
1889
1890 case endevents_kw:
1891 yylval.tok_val = new token (token::events_end, l, c);
1892 at_beginning_of_statement = true;
1893 break;
1894
1895 case endmethods_kw:
1896 yylval.tok_val = new token (token::methods_end, l, c);
1897 at_beginning_of_statement = true;
1898 break;
1899
1900 case endproperties_kw:
1901 yylval.tok_val = new token (token::properties_end, l, c);
1902 at_beginning_of_statement = true;
1903 break;
1904
1905
1906 case for_kw:
1907 case parfor_kw:
1908 case while_kw:
1909 promptflag--;
1910 looping++;
1911 break;
1912
1913 case do_kw:
1914 at_beginning_of_statement = true;
1915 promptflag--;
1916 looping++;
1917 break;
1918
1919 case try_kw:
1920 case unwind_protect_kw:
1921 at_beginning_of_statement = true;
1922 promptflag--;
1923 break;
1924
1925 case if_kw:
1926 case switch_kw:
1927 promptflag--;
1928 break;
1929
1930 case get_kw:
1931 case set_kw:
1932 // 'get' and 'set' are keywords in classdef method
1933 // declarations.
1934 if (! maybe_classdef_get_set_method)
1935 return 0;
1936 break;
1937
1938 case enumeration_kw:
1939 case events_kw:
1940 case methods_kw:
1941 case properties_kw:
1942 // 'properties', 'methods' and 'events' are keywords for
1943 // classdef blocks.
1944 if (! parsing_classdef)
1945 return 0;
1946 // fall through ...
1947
1948 case classdef_kw:
1949 // 'classdef' is always a keyword.
1950 promptflag--;
1951 break;
1952
1953 case function_kw:
1954 promptflag--;
1955
1956 defining_func++;
1957 parsed_function_name.push (false);
1958
1959 if (! (reading_fcn_file || reading_script_file
1960 || reading_classdef_file))
1961 input_line_number = 1;
1962 break;
1963
1964 case magic_file_kw:
1965 {
1966 if ((reading_fcn_file || reading_script_file
1967 || reading_classdef_file)
1968 && ! curr_fcn_file_full_name.empty ())
1969 yylval.tok_val = new token (curr_fcn_file_full_name, l, c);
1970 else
1971 yylval.tok_val = new token ("stdin", l, c);
1972 }
1973 break;
1974
1975 case magic_line_kw:
1976 yylval.tok_val = new token (static_cast<double> (l), "", l, c);
1977 break;
1978
1979 default:
1980 panic_impossible ();
1981 }
1982
1983 if (! yylval.tok_val)
1984 yylval.tok_val = new token (l, c);
1985
1986 token_stack.push (yylval.tok_val);
1987
1988 return kw->tok;
1989 }
1990
1991 return 0;
1992 }
1993
1994 bool
1995 lexical_feedback::is_variable (const std::string& name)
1996 {
1997 return (symbol_table::is_variable (name)
1998 || (pending_local_variables.find (name)
1999 != pending_local_variables.end ()));
2000 }
2001
2002 std::string
2003 lexical_feedback::grab_block_comment (stream_reader& reader, bool& eof)
2004 {
2005 std::string buf;
2006
2007 bool at_bol = true;
2008 bool look_for_marker = false;
2009
2010 bool warned_incompatible = false;
2011
2012 int c = 0;
2013
2014 while ((c = reader.getc ()) != EOF)
2015 {
2016 current_input_column++;
2017
2018 if (look_for_marker)
2019 {
2020 at_bol = false;
2021 look_for_marker = false;
2022
2023 if (c == '{' || c == '}')
2024 {
2025 std::string tmp_buf (1, static_cast<char> (c));
2026
2027 int type = c;
2028
2029 bool done = false;
2030
2031 while ((c = reader.getc ()) != EOF && ! done)
2032 {
2033 current_input_column++;
2034
2035 switch (c)
2036 {
2037 case ' ':
2038 case '\t':
2039 tmp_buf += static_cast<char> (c);
2040 break;
2041
2042 case '\n':
2043 {
2044 current_input_column = 0;
2045 at_bol = true;
2046 done = true;
2047
2048 if (type == '{')
2049 {
2050 block_comment_nesting_level++;
2051 promptflag--;
2052 }
2053 else
2054 {
2055 block_comment_nesting_level--;
2056 promptflag++;
2057
2058 if (block_comment_nesting_level == 0)
2059 {
2060 buf += grab_comment_block (reader, true, eof);
2061
2062 return buf;
2063 }
2064 }
2065 }
2066 break;
2067
2068 default:
2069 at_bol = false;
2070 tmp_buf += static_cast<char> (c);
2071 buf += tmp_buf;
2072 done = true;
2073 break;
2074 }
2075 }
2076 }
2077 }
2078
2079 if (at_bol && (c == '%' || c == '#'))
2080 {
2081 if (c == '#' && ! warned_incompatible)
2082 {
2083 warned_incompatible = true;
2084 maybe_gripe_matlab_incompatible_comment (c);
2085 }
2086
2087 at_bol = false;
2088 look_for_marker = true;
2089 }
2090 else
2091 {
2092 buf += static_cast<char> (c);
2093
2094 if (c == '\n')
2095 {
2096 current_input_column = 0;
2097 at_bol = true;
2098 }
2099 }
2100 }
2101
2102 if (c == EOF)
2103 eof = true;
2104
2105 return buf;
2106 }
2107
2108 std::string
2109 lexical_feedback::grab_comment_block (stream_reader& reader, bool at_bol,
2110 bool& eof)
2111 {
2112 std::string buf;
2113
2114 // TRUE means we are at the beginning of a comment block.
2115 bool begin_comment = false;
2116
2117 // TRUE means we are currently reading a comment block.
2118 bool in_comment = false;
2119
2120 bool warned_incompatible = false;
2121
2122 int c = 0;
2123
2124 while ((c = reader.getc ()) != EOF)
2125 {
2126 current_input_column++;
2127
2128 if (begin_comment)
2129 {
2130 if (c == '%' || c == '#')
2131 {
2132 at_bol = false;
2133 continue;
2134 }
2135 else if (at_bol && c == '{')
2136 {
2137 std::string tmp_buf (1, static_cast<char> (c));
2138
2139 bool done = false;
2140
2141 while ((c = reader.getc ()) != EOF && ! done)
2142 {
2143 current_input_column++;
2144
2145 switch (c)
2146 {
2147 case ' ':
2148 case '\t':
2149 tmp_buf += static_cast<char> (c);
2150 break;
2151
2152 case '\n':
2153 {
2154 current_input_column = 0;
2155 at_bol = true;
2156 done = true;
2157
2158 block_comment_nesting_level++;
2159 promptflag--;
2160
2161 buf += grab_block_comment (reader, eof);
2162
2163 in_comment = false;
2164
2165 if (eof)
2166 goto done;
2167 }
2168 break;
2169
2170 default:
2171 at_bol = false;
2172 tmp_buf += static_cast<char> (c);
2173 buf += tmp_buf;
2174 done = true;
2175 break;
2176 }
2177 }
2178 }
2179 else
2180 {
2181 at_bol = false;
2182 begin_comment = false;
2183 }
2184 }
2185
2186 if (in_comment)
2187 {
2188 buf += static_cast<char> (c);
2189
2190 if (c == '\n')
2191 {
2192 at_bol = true;
2193 current_input_column = 0;
2194 in_comment = false;
2195
2196 // FIXME -- bailing out here prevents things like
2197 //
2198 // octave> # comment
2199 // octave> x = 1
2200 //
2201 // from failing at the command line, while still
2202 // allowing blocks of comments to be grabbed properly
2203 // for function doc strings. But only the first line of
2204 // a mult-line doc string will be picked up for
2205 // functions defined on the command line. We need a
2206 // better way of collecting these comments...
2207 if (! (reading_fcn_file || reading_script_file))
2208 goto done;
2209 }
2210 }
2211 else
2212 {
2213 switch (c)
2214 {
2215 case ' ':
2216 case '\t':
2217 break;
2218
2219 case '#':
2220 if (! warned_incompatible)
2221 {
2222 warned_incompatible = true;
2223 maybe_gripe_matlab_incompatible_comment (c);
2224 }
2225 // fall through...
2226
2227 case '%':
2228 in_comment = true;
2229 begin_comment = true;
2230 break;
2231
2232 default:
2233 current_input_column--;
2234 reader.ungetc (c);
2235 goto done;
2236 }
2237 }
2238 }
2239
2240 done:
2241
2242 if (c == EOF)
2243 eof = true;
2244
2245 return buf;
2246 }
2247
2248 int
2249 lexical_feedback::process_comment (bool start_in_block, bool& eof)
2250 {
2251 eof = false;
2252
2253 std::string help_txt;
2254
2255 if (! help_buf.empty ())
2256 help_txt = help_buf.top ();
2257
2258 flex_stream_reader flex_reader (this, yytext);
2259
2260 // process_comment is only supposed to be called when we are not
2261 // initially looking at a block comment.
2262
2263 std::string txt = start_in_block
2264 ? grab_block_comment (flex_reader, eof)
2265 : grab_comment_block (flex_reader, false, eof);
2266
2267 if (lexer_debug_flag)
2268 std::cerr << "C: " << txt << std::endl;
2269
2270 if (help_txt.empty () && nesting_level.none ())
2271 {
2272 if (! help_buf.empty ())
2273 help_buf.pop ();
2274
2275 help_buf.push (txt);
2276 }
2277
2278 octave_comment_buffer::append (txt);
2279
2280 current_input_column = 1;
2281 quote_is_transpose = false;
2282 convert_spaces_to_comma = true;
2283 at_beginning_of_statement = true;
2284
2285 if (YY_START == COMMAND_START)
2286 BEGIN (INITIAL);
2287
2288 if (nesting_level.none ())
2289 return '\n';
2290 else if (nesting_level.is_bracket_or_brace ())
2291 return ';';
2292 else
2293 return 0;
2294 }
2295
2296 // Recognize separators. If the separator is a CRLF pair, it is
2297 // replaced by a single LF.
2298
2299 bool
2300 lexical_feedback::next_token_is_sep_op (void)
2301 {
2302 bool retval = false;
2303
2304 int c = text_yyinput ();
2305
2306 retval = match_any (c, ",;\n]");
2307
2308 xunput (c, yytext);
2309
2310 return retval;
2311 }
2312
2313 // Try to determine if the next token should be treated as a postfix
2314 // unary operator. This is ugly, but it seems to do the right thing.
2315
2316 bool
2317 lexical_feedback::next_token_is_postfix_unary_op (bool spc_prev)
2318 {
2319 bool un_op = false;
2320
2321 int c0 = text_yyinput ();
2322
2323 if (c0 == '\'' && ! spc_prev)
2324 {
2325 un_op = true;
2326 }
2327 else if (c0 == '.')
2328 {
2329 int c1 = text_yyinput ();
2330 un_op = (c1 == '\'');
2331 xunput (c1, yytext);
2332 }
2333 else if (c0 == '+')
2334 {
2335 int c1 = text_yyinput ();
2336 un_op = (c1 == '+');
2337 xunput (c1, yytext);
2338 }
2339 else if (c0 == '-')
2340 {
2341 int c1 = text_yyinput ();
2342 un_op = (c1 == '-');
2343 xunput (c1, yytext);
2344 }
2345
2346 xunput (c0, yytext);
2347
2348 return un_op;
2349 }
2350
2351 // Try to determine if the next token should be treated as a binary
2352 // operator.
2353 //
2354 // This kluge exists because whitespace is not always ignored inside
2355 // the square brackets that are used to create matrix objects (though
2356 // spacing only really matters in the cases that can be interpreted
2357 // either as binary ops or prefix unary ops: currently just +, -).
2358 //
2359 // Note that a line continuation directly following a + or - operator
2360 // (e.g., the characters '[' 'a' ' ' '+' '\' LFD 'b' ']') will be
2361 // parsed as a binary operator.
2362
2363 bool
2364 lexical_feedback::next_token_is_bin_op (bool spc_prev)
2365 {
2366 bool bin_op = false;
2367
2368 int c0 = text_yyinput ();
2369
2370 switch (c0)
2371 {
2372 case '+':
2373 case '-':
2374 {
2375 int c1 = text_yyinput ();
2376
2377 switch (c1)
2378 {
2379 case '+':
2380 case '-':
2381 // Unary ops, spacing doesn't matter.
2382 break;
2383
2384 case '=':
2385 // Binary ops, spacing doesn't matter.
2386 bin_op = true;
2387 break;
2388
2389 default:
2390 // Could be either, spacing matters.
2391 bin_op = looks_like_bin_op (spc_prev, c1);
2392 break;
2393 }
2394
2395 xunput (c1, yytext);
2396 }
2397 break;
2398
2399 case ':':
2400 case '/':
2401 case '\\':
2402 case '^':
2403 // Always a binary op (may also include /=, \=, and ^=).
2404 bin_op = true;
2405 break;
2406
2407 // .+ .- ./ .\ .^ .* .**
2408 case '.':
2409 {
2410 int c1 = text_yyinput ();
2411
2412 if (match_any (c1, "+-/\\^*"))
2413 // Always a binary op (may also include .+=, .-=, ./=, ...).
2414 bin_op = true;
2415 else if (! isdigit (c1) && c1 != ' ' && c1 != '\t' && c1 != '.')
2416 // A structure element reference is a binary op.
2417 bin_op = true;
2418
2419 xunput (c1, yytext);
2420 }
2421 break;
2422
2423 // = == & && | || * **
2424 case '=':
2425 case '&':
2426 case '|':
2427 case '*':
2428 // Always a binary op (may also include ==, &&, ||, **).
2429 bin_op = true;
2430 break;
2431
2432 // < <= <> > >=
2433 case '<':
2434 case '>':
2435 // Always a binary op (may also include <=, <>, >=).
2436 bin_op = true;
2437 break;
2438
2439 // ~= !=
2440 case '~':
2441 case '!':
2442 {
2443 int c1 = text_yyinput ();
2444
2445 // ~ and ! can be unary ops, so require following =.
2446 if (c1 == '=')
2447 bin_op = true;
2448
2449 xunput (c1, yytext);
2450 }
2451 break;
2452
2453 default:
2454 break;
2455 }
2456
2457 xunput (c0, yytext);
2458
2459 return bin_op;
2460 }
2461
2462 // FIXME -- we need to handle block comments here.
2463
2464 void
2465 lexical_feedback::scan_for_comments (const char *text)
2466 {
2467 std::string comment_buf;
2468
2469 bool in_comment = false;
2470 bool beginning_of_comment = false;
2471
2472 int len = strlen (text);
2473 int i = 0;
2474
2475 while (i < len)
2476 {
2477 char c = text[i++];
2478
2479 switch (c)
2480 {
2481 case '%':
2482 case '#':
2483 if (in_comment)
2484 {
2485 if (! beginning_of_comment)
2486 comment_buf += static_cast<char> (c);
2487 }
2488 else
2489 {
2490 maybe_gripe_matlab_incompatible_comment (c);
2491 in_comment = true;
2492 beginning_of_comment = true;
2493 }
2494 break;
2495
2496 case '\n':
2497 if (in_comment)
2498 {
2499 comment_buf += static_cast<char> (c);
2500 octave_comment_buffer::append (comment_buf);
2501 comment_buf.resize (0);
2502 in_comment = false;
2503 beginning_of_comment = false;
2504 }
2505 break;
2506
2507 default:
2508 if (in_comment)
2509 {
2510 comment_buf += static_cast<char> (c);
2511 beginning_of_comment = false;
2512 }
2513 break;
2514 }
2515 }
2516
2517 if (! comment_buf.empty ())
2518 octave_comment_buffer::append (comment_buf);
2519 }
2520
2521 // Discard whitespace, including comments and continuations.
2522
2523 // FIXME -- we need to handle block comments here.
2524
2525 int
2526 lexical_feedback::eat_whitespace (void)
2527 {
2528 int retval = lexical_feedback::NO_WHITESPACE;
2529
2530 std::string comment_buf;
2531
2532 bool in_comment = false;
2533 bool beginning_of_comment = false;
2534
2535 int c = 0;
2536
2537 while ((c = text_yyinput ()) != EOF)
2538 {
2539 current_input_column++;
2540
2541 switch (c)
2542 {
2543 case ' ':
2544 case '\t':
2545 if (in_comment)
2546 {
2547 comment_buf += static_cast<char> (c);
2548 beginning_of_comment = false;
2549 }
2550 retval |= lexical_feedback::SPACE_OR_TAB;
2551 break;
2552
2553 case '\n':
2554 retval |= lexical_feedback::NEWLINE;
2555 if (in_comment)
2556 {
2557 comment_buf += static_cast<char> (c);
2558 octave_comment_buffer::append (comment_buf);
2559 comment_buf.resize (0);
2560 in_comment = false;
2561 beginning_of_comment = false;
2562 }
2563 current_input_column = 0;
2564 break;
2565
2566 case '#':
2567 case '%':
2568 if (in_comment)
2569 {
2570 if (! beginning_of_comment)
2571 comment_buf += static_cast<char> (c);
2572 }
2573 else
2574 {
2575 maybe_gripe_matlab_incompatible_comment (c);
2576 in_comment = true;
2577 beginning_of_comment = true;
2578 }
2579 break;
2580
2581 case '.':
2582 if (in_comment)
2583 {
2584 comment_buf += static_cast<char> (c);
2585 beginning_of_comment = false;
2586 break;
2587 }
2588 else
2589 {
2590 if (have_ellipsis_continuation ())
2591 break;
2592 else
2593 goto done;
2594 }
2595
2596 case '\\':
2597 if (in_comment)
2598 {
2599 comment_buf += static_cast<char> (c);
2600 beginning_of_comment = false;
2601 break;
2602 }
2603 else
2604 {
2605 if (have_continuation ())
2606 break;
2607 else
2608 goto done;
2609 }
2610
2611 default:
2612 if (in_comment)
2613 {
2614 comment_buf += static_cast<char> (c);
2615 beginning_of_comment = false;
2616 break;
2617 }
2618 else
2619 goto done;
2620 }
2621 }
2622
2623 if (! comment_buf.empty ())
2624 octave_comment_buffer::append (comment_buf);
2625
2626 done:
2627 xunput (c, yytext);
2628 current_input_column--;
2629 return retval;
2630 }
2631
2632 static inline bool
2633 looks_like_hex (const char *s, int len)
2634 {
2635 return (len > 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X'));
2636 }
2637
2638 void
2639 lexical_feedback::handle_number (void)
2640 {
2641 double value = 0.0;
2642 int nread = 0;
2643
2644 if (looks_like_hex (yytext, strlen (yytext)))
2645 {
2646 unsigned long ival;
2647
2648 nread = sscanf (yytext, "%lx", &ival);
2649
2650 value = static_cast<double> (ival);
2651 }
2652 else
2653 {
2654 char *tmp = strsave (yytext);
2655
2656 char *idx = strpbrk (tmp, "Dd");
2657
2658 if (idx)
2659 *idx = 'e';
2660
2661 nread = sscanf (tmp, "%lf", &value);
2662
2663 delete [] tmp;
2664 }
2665
2666 // If yytext doesn't contain a valid number, we are in deep doo doo.
2667
2668 assert (nread == 1);
2669
2670 quote_is_transpose = true;
2671 convert_spaces_to_comma = true;
2672 looking_for_object_index = false;
2673 at_beginning_of_statement = false;
2674
2675 yylval.tok_val = new token (value, yytext, input_line_number,
2676 current_input_column);
2677
2678 token_stack.push (yylval.tok_val);
2679
2680 current_input_column += yyleng;
2681
2682 do_comma_insert_check ();
2683 }
2684
2685 // We have seen a backslash and need to find out if it should be
2686 // treated as a continuation character. If so, this eats it, up to
2687 // and including the new line character.
2688 //
2689 // Match whitespace only, followed by a comment character or newline.
2690 // Once a comment character is found, discard all input until newline.
2691 // If non-whitespace characters are found before comment
2692 // characters, return 0. Otherwise, return 1.
2693
2694 // FIXME -- we need to handle block comments here.
2695
2696 bool
2697 lexical_feedback::have_continuation (bool trailing_comments_ok)
2698 {
2699 std::ostringstream buf;
2700
2701 std::string comment_buf;
2702
2703 bool in_comment = false;
2704 bool beginning_of_comment = false;
2705
2706 int c = 0;
2707
2708 while ((c = text_yyinput ()) != EOF)
2709 {
2710 buf << static_cast<char> (c);
2711
2712 switch (c)
2713 {
2714 case ' ':
2715 case '\t':
2716 if (in_comment)
2717 {
2718 comment_buf += static_cast<char> (c);
2719 beginning_of_comment = false;
2720 }
2721 break;
2722
2723 case '%':
2724 case '#':
2725 if (trailing_comments_ok)
2726 {
2727 if (in_comment)
2728 {
2729 if (! beginning_of_comment)
2730 comment_buf += static_cast<char> (c);
2731 }
2732 else
2733 {
2734 maybe_gripe_matlab_incompatible_comment (c);
2735 in_comment = true;
2736 beginning_of_comment = true;
2737 }
2738 }
2739 else
2740 goto cleanup;
2741 break;
2742
2743 case '\n':
2744 if (in_comment)
2745 {
2746 comment_buf += static_cast<char> (c);
2747 octave_comment_buffer::append (comment_buf);
2748 }
2749 current_input_column = 0;
2750 promptflag--;
2751 gripe_matlab_incompatible_continuation ();
2752 return true;
2753
2754 default:
2755 if (in_comment)
2756 {
2757 comment_buf += static_cast<char> (c);
2758 beginning_of_comment = false;
2759 }
2760 else
2761 goto cleanup;
2762 break;
2763 }
2764 }
2765
2766 xunput (c, yytext);
2767 return false;
2768
2769 cleanup:
2770
2771 std::string s = buf.str ();
2772
2773 int len = s.length ();
2774 while (len--)
2775 xunput (s[len], yytext);
2776
2777 return false;
2778 }
2779
2780 // We have seen a '.' and need to see if it is the start of a
2781 // continuation. If so, this eats it, up to and including the new
2782 // line character.
2783
2784 bool
2785 lexical_feedback::have_ellipsis_continuation (bool trailing_comments_ok)
2786 {
2787 char c1 = text_yyinput ();
2788 if (c1 == '.')
2789 {
2790 char c2 = text_yyinput ();
2791 if (c2 == '.' && have_continuation (trailing_comments_ok))
2792 return true;
2793 else
2794 {
2795 xunput (c2, yytext);
2796 xunput (c1, yytext);
2797 }
2798 }
2799 else
2800 xunput (c1, yytext);
2801
2802 return false;
2803 }
2804
2805 // See if we have a continuation line. If so, eat it and the leading
2806 // whitespace on the next line.
2807
2808 int
2809 lexical_feedback::eat_continuation (void)
2810 {
2811 int retval = lexical_feedback::NO_WHITESPACE;
2812
2813 int c = text_yyinput ();
2814
2815 if ((c == '.' && have_ellipsis_continuation ())
2816 || (c == '\\' && have_continuation ()))
2817 retval = eat_whitespace ();
2818 else
2819 xunput (c, yytext);
2820
2821 return retval;
2822 }
2823
2824 int
2825 lexical_feedback::handle_string (char delim)
2826 {
2827 std::ostringstream buf;
2828
2829 int bos_line = input_line_number;
2830 int bos_col = current_input_column;
2831
2832 int c;
2833 int escape_pending = 0;
2834
2835 while ((c = text_yyinput ()) != EOF)
2836 {
2837 current_input_column++;
2838
2839 if (c == '\\')
2840 {
2841 if (delim == '\'' || escape_pending)
2842 {
2843 buf << static_cast<char> (c);
2844 escape_pending = 0;
2845 }
2846 else
2847 {
2848 if (have_continuation (false))
2849 escape_pending = 0;
2850 else
2851 {
2852 buf << static_cast<char> (c);
2853 escape_pending = 1;
2854 }
2855 }
2856 continue;
2857 }
2858 else if (c == '.')
2859 {
2860 if (delim == '\'' || ! have_ellipsis_continuation (false))
2861 buf << static_cast<char> (c);
2862 }
2863 else if (c == '\n')
2864 {
2865 error ("unterminated string constant");
2866 break;
2867 }
2868 else if (c == delim)
2869 {
2870 if (escape_pending)
2871 buf << static_cast<char> (c);
2872 else
2873 {
2874 c = text_yyinput ();
2875 if (c == delim)
2876 {
2877 buf << static_cast<char> (c);
2878 }
2879 else
2880 {
2881 std::string s;
2882 xunput (c, yytext);
2883
2884 if (delim == '\'')
2885 s = buf.str ();
2886 else
2887 s = do_string_escapes (buf.str ());
2888
2889 quote_is_transpose = true;
2890 convert_spaces_to_comma = true;
2891
2892 yylval.tok_val = new token (s, bos_line, bos_col);
2893 token_stack.push (yylval.tok_val);
2894
2895 if (delim == '"')
2896 gripe_matlab_incompatible ("\" used as string delimiter");
2897 else if (delim == '\'')
2898 gripe_single_quote_string ();
2899
2900 looking_for_object_index = true;
2901 at_beginning_of_statement = false;
2902
2903 return delim == '"' ? DQ_STRING : SQ_STRING;
2904 }
2905 }
2906 }
2907 else
2908 {
2909 buf << static_cast<char> (c);
2910 }
2911
2912 escape_pending = 0;
2913 }
2914
2915 return LEXICAL_ERROR;
2916 }
2917
2918 bool
2919 lexical_feedback::next_token_is_assign_op (void)
2920 {
2921 bool retval = false;
2922
2923 int c0 = text_yyinput ();
2924
2925 switch (c0)
2926 {
2927 case '=':
2928 {
2929 int c1 = text_yyinput ();
2930 xunput (c1, yytext);
2931 if (c1 != '=')
2932 retval = true;
2933 }
2934 break;
2935
2936 case '+':
2937 case '-':
2938 case '*':
2939 case '/':
2940 case '\\':
2941 case '&':
2942 case '|':
2943 {
2944 int c1 = text_yyinput ();
2945 xunput (c1, yytext);
2946 if (c1 == '=')
2947 retval = true;
2948 }
2949 break;
2950
2951 case '.':
2952 {
2953 int c1 = text_yyinput ();
2954 if (match_any (c1, "+-*/\\"))
2955 {
2956 int c2 = text_yyinput ();
2957 xunput (c2, yytext);
2958 if (c2 == '=')
2959 retval = true;
2960 }
2961 xunput (c1, yytext);
2962 }
2963 break;
2964
2965 case '>':
2966 {
2967 int c1 = text_yyinput ();
2968 if (c1 == '>')
2969 {
2970 int c2 = text_yyinput ();
2971 xunput (c2, yytext);
2972 if (c2 == '=')
2973 retval = true;
2974 }
2975 xunput (c1, yytext);
2976 }
2977 break;
2978
2979 case '<':
2980 {
2981 int c1 = text_yyinput ();
2982 if (c1 == '<')
2983 {
2984 int c2 = text_yyinput ();
2985 xunput (c2, yytext);
2986 if (c2 == '=')
2987 retval = true;
2988 }
2989 xunput (c1, yytext);
2990 }
2991 break;
2992
2993 default:
2994 break;
2995 }
2996
2997 xunput (c0, yytext);
2998
2999 return retval;
3000 }
3001
3002 bool
3003 lexical_feedback::next_token_is_index_op (void)
3004 {
3005 int c = text_yyinput ();
3006 xunput (c, yytext);
3007 return c == '(' || c == '{';
3008 }
3009
3010 int
3011 lexical_feedback::handle_close_bracket (bool spc_gobbled, int bracket_type)
3012 {
3013 int retval = bracket_type;
3014
3015 if (! nesting_level.none ())
3016 {
3017 nesting_level.remove ();
3018
3019 if (bracket_type == ']')
3020 bracketflag--;
3021 else if (bracket_type == '}')
3022 braceflag--;
3023 else
3024 panic_impossible ();
3025 }
3026
3027 if (bracketflag == 0 && braceflag == 0)
3028 BEGIN (INITIAL);
3029
3030 if (bracket_type == ']'
3031 && next_token_is_assign_op ()
3032 && ! looking_at_return_list)
3033 {
3034 retval = CLOSE_BRACE;
3035 }
3036 else if ((bracketflag || braceflag)
3037 && convert_spaces_to_comma
3038 && (nesting_level.is_bracket ()
3039 || (nesting_level.is_brace ()
3040 && ! looking_at_object_index.front ())))
3041 {
3042 bool index_op = next_token_is_index_op ();
3043
3044 // Don't insert comma if we are looking at something like
3045 //
3046 // [x{i}{j}] or [x{i}(j)]
3047 //
3048 // but do if we are looking at
3049 //
3050 // [x{i} {j}] or [x{i} (j)]
3051
3052 if (spc_gobbled || ! (bracket_type == '}' && index_op))
3053 {
3054 bool bin_op = next_token_is_bin_op (spc_gobbled);
3055
3056 bool postfix_un_op = next_token_is_postfix_unary_op (spc_gobbled);
3057
3058 bool sep_op = next_token_is_sep_op ();
3059
3060 if (! (postfix_un_op || bin_op || sep_op))
3061 {
3062 maybe_warn_separator_insert (',');
3063
3064 xunput (',', yytext);
3065 return retval;
3066 }
3067 }
3068 }
3069
3070 quote_is_transpose = true;
3071 convert_spaces_to_comma = true;
3072
3073 return retval;
3074 }
3075
3076 void
3077 lexical_feedback::maybe_unput_comma (int spc_gobbled)
3078 {
3079 if (nesting_level.is_bracket ()
3080 || (nesting_level.is_brace ()
3081 && ! looking_at_object_index.front ()))
3082 {
3083 int bin_op = next_token_is_bin_op (spc_gobbled);
3084
3085 int postfix_un_op = next_token_is_postfix_unary_op (spc_gobbled);
3086
3087 int c1 = text_yyinput ();
3088 int c2 = text_yyinput ();
3089
3090 xunput (c2, yytext);
3091 xunput (c1, yytext);
3092
3093 int sep_op = next_token_is_sep_op ();
3094
3095 int dot_op = (c1 == '.'
3096 && (isalpha (c2) || isspace (c2) || c2 == '_'));
3097
3098 if (postfix_un_op || bin_op || sep_op || dot_op)
3099 return;
3100
3101 int index_op = (c1 == '(' || c1 == '{');
3102
3103 // If there is no space before the indexing op, we don't insert
3104 // a comma.
3105
3106 if (index_op && ! spc_gobbled)
3107 return;
3108
3109 maybe_warn_separator_insert (',');
3110
3111 xunput (',', yytext);
3112 }
3113 }
3114
3115 bool
3116 lexical_feedback::next_token_can_follow_bin_op (void)
3117 {
3118 std::stack<char> buf;
3119
3120 int c = EOF;
3121
3122 // Skip whitespace in current statement on current line
3123 while (true)
3124 {
3125 c = text_yyinput ();
3126
3127 buf.push (c);
3128
3129 if (match_any (c, ",;\n") || (c != ' ' && c != '\t'))
3130 break;
3131 }
3132
3133 // Restore input.
3134 while (! buf.empty ())
3135 {
3136 xunput (buf.top (), yytext);
3137
3138 buf.pop ();
3139 }
3140
3141 return (isalnum (c) || match_any (c, "!\"'(-[_{~"));
3142 }
3143
3144 static bool
3145 can_be_command (const std::string& tok)
3146 {
3147 // Don't allow these names to be treated as commands to avoid
3148 // surprises when parsing things like "NaN ^2".
3149
3150 return ! (tok == "e"
3151 || tok == "I" || tok == "i"
3152 || tok == "J" || tok == "j"
3153 || tok == "Inf" || tok == "inf"
3154 || tok == "NaN" || tok == "nan");
3155 }
3156
3157 bool
3158 lexical_feedback::looks_like_command_arg (void)
3159 {
3160 bool retval = true;
3161
3162 int c0 = text_yyinput ();
3163
3164 switch (c0)
3165 {
3166 // = ==
3167 case '=':
3168 {
3169 int c1 = text_yyinput ();
3170
3171 if (c1 == '=')
3172 {
3173 int c2 = text_yyinput ();
3174
3175 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t')
3176 && next_token_can_follow_bin_op ())
3177 retval = false;
3178
3179 xunput (c2, yytext);
3180 }
3181 else
3182 retval = false;
3183
3184 xunput (c1, yytext);
3185 }
3186 break;
3187
3188 case '(':
3189 case '{':
3190 // Indexing.
3191 retval = false;
3192 break;
3193
3194 case '\n':
3195 // EOL.
3196 break;
3197
3198 case '\'':
3199 case '"':
3200 // Beginning of a character string.
3201 break;
3202
3203 // + - ++ -- += -=
3204 case '+':
3205 case '-':
3206 {
3207 int c1 = text_yyinput ();
3208
3209 switch (c1)
3210 {
3211 case '\n':
3212 // EOL.
3213 case '+':
3214 case '-':
3215 // Unary ops, spacing doesn't matter.
3216 break;
3217
3218 case '\t':
3219 case ' ':
3220 {
3221 if (next_token_can_follow_bin_op ())
3222 retval = false;
3223 }
3224 break;
3225
3226 case '=':
3227 {
3228 int c2 = text_yyinput ();
3229
3230 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t')
3231 && next_token_can_follow_bin_op ())
3232 retval = false;
3233
3234 xunput (c2, yytext);
3235 }
3236 break;
3237 }
3238
3239 xunput (c1, yytext);
3240 }
3241 break;
3242
3243 case ':':
3244 case '/':
3245 case '\\':
3246 case '^':
3247 {
3248 int c1 = text_yyinput ();
3249
3250 if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t')
3251 && next_token_can_follow_bin_op ())
3252 retval = false;
3253
3254 xunput (c1, yytext);
3255 }
3256 break;
3257
3258 // .+ .- ./ .\ .^ .* .**
3259 case '.':
3260 {
3261 int c1 = text_yyinput ();
3262
3263 if (match_any (c1, "+-/\\^*"))
3264 {
3265 int c2 = text_yyinput ();
3266
3267 if (c2 == '=')
3268 {
3269 int c3 = text_yyinput ();
3270
3271 if (! match_any (c3, ",;\n") && (c3 == ' ' || c3 == '\t')
3272 && next_token_can_follow_bin_op ())
3273 retval = false;
3274
3275 xunput (c3, yytext);
3276 }
3277 else if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t')
3278 && next_token_can_follow_bin_op ())
3279 retval = false;
3280
3281 xunput (c2, yytext);
3282 }
3283 else if (! match_any (c1, ",;\n")
3284 && (! isdigit (c1) && c1 != ' ' && c1 != '\t'
3285 && c1 != '.'))
3286 {
3287 // Structure reference. FIXME -- is this a complete check?
3288
3289 retval = false;
3290 }
3291
3292 xunput (c1, yytext);
3293 }
3294 break;
3295
3296 // & && | || * **
3297 case '&':
3298 case '|':
3299 case '*':
3300 {
3301 int c1 = text_yyinput ();
3302
3303 if (c1 == c0)
3304 {
3305 int c2 = text_yyinput ();
3306
3307 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t')
3308 && next_token_can_follow_bin_op ())
3309 retval = false;
3310
3311 xunput (c2, yytext);
3312 }
3313 else if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t')
3314 && next_token_can_follow_bin_op ())
3315 retval = false;
3316
3317 xunput (c1, yytext);
3318 }
3319 break;
3320
3321 // < <= > >=
3322 case '<':
3323 case '>':
3324 {
3325 int c1 = text_yyinput ();
3326
3327 if (c1 == '=')
3328 {
3329 int c2 = text_yyinput ();
3330
3331 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t')
3332 && next_token_can_follow_bin_op ())
3333 retval = false;
3334
3335 xunput (c2, yytext);
3336 }
3337 else if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t')
3338 && next_token_can_follow_bin_op ())
3339 retval = false;
3340
3341 xunput (c1, yytext);
3342 }
3343 break;
3344
3345 // ~= !=
3346 case '~':
3347 case '!':
3348 {
3349 int c1 = text_yyinput ();
3350
3351 // ~ and ! can be unary ops, so require following =.
3352 if (c1 == '=')
3353 {
3354 int c2 = text_yyinput ();
3355
3356 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t')
3357 && next_token_can_follow_bin_op ())
3358 retval = false;
3359
3360 xunput (c2, yytext);
3361 }
3362 else if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t')
3363 && next_token_can_follow_bin_op ())
3364 retval = false;
3365
3366 xunput (c1, yytext);
3367 }
3368 break;
3369
3370 default:
3371 break;
3372 }
3373
3374 xunput (c0, yytext);
3375
3376 return retval;
3377 }
3378
3379 int
3380 lexical_feedback::handle_superclass_identifier (void)
3381 {
3382 eat_continuation ();
3383
3384 std::string pkg;
3385 std::string meth = strip_trailing_whitespace (yytext);
3386 size_t pos = meth.find ("@");
3387 std::string cls = meth.substr (pos).substr (1);
3388 meth = meth.substr (0, pos - 1);
3389
3390 pos = cls.find (".");
3391 if (pos != std::string::npos)
3392 {
3393 pkg = cls.substr (pos).substr (1);
3394 cls = cls.substr (0, pos - 1);
3395 }
3396
3397 int kw_token = (is_keyword_token (meth) || is_keyword_token (cls)
3398 || is_keyword_token (pkg));
3399 if (kw_token)
3400 {
3401 error ("method, class and package names may not be keywords");
3402 return LEXICAL_ERROR;
3403 }
3404
3405 yylval.tok_val
3406 = new token (meth.empty () ? 0 : &(symbol_table::insert (meth)),
3407 cls.empty () ? 0 : &(symbol_table::insert (cls)),
3408 pkg.empty () ? 0 : &(symbol_table::insert (pkg)),
3409 input_line_number,
3410 current_input_column);
3411 token_stack.push (yylval.tok_val);
3412
3413 convert_spaces_to_comma = true;
3414 current_input_column += yyleng;
3415
3416 return SUPERCLASSREF;
3417 }
3418
3419 int
3420 lexical_feedback::handle_meta_identifier (void)
3421 {
3422 eat_continuation ();
3423
3424 std::string pkg;
3425 std::string cls = strip_trailing_whitespace (yytext).substr (1);
3426 size_t pos = cls.find (".");
3427
3428 if (pos != std::string::npos)
3429 {
3430 pkg = cls.substr (pos).substr (1);
3431 cls = cls.substr (0, pos - 1);
3432 }
3433
3434 int kw_token = is_keyword_token (cls) || is_keyword_token (pkg);
3435 if (kw_token)
3436 {
3437 error ("class and package names may not be keywords");
3438 return LEXICAL_ERROR;
3439 }
3440
3441 yylval.tok_val
3442 = new token (cls.empty () ? 0 : &(symbol_table::insert (cls)),
3443 pkg.empty () ? 0 : &(symbol_table::insert (pkg)),
3444 input_line_number,
3445 current_input_column);
3446
3447 token_stack.push (yylval.tok_val);
3448
3449 convert_spaces_to_comma = true;
3450 current_input_column += yyleng;
3451
3452 return METAQUERY;
3453 }
3454
3455 // Figure out exactly what kind of token to return when we have seen
3456 // an identifier. Handles keywords. Return -1 if the identifier
3457 // should be ignored.
3458
3459 int
3460 lexical_feedback::handle_identifier (void)
3461 {
3462 bool at_bos = at_beginning_of_statement;
3463
3464 std::string tok = strip_trailing_whitespace (yytext);
3465
3466 int c = yytext[yyleng-1];
3467
3468 bool cont_is_spc = (eat_continuation () != lexical_feedback::NO_WHITESPACE);
3469
3470 int spc_gobbled = (cont_is_spc || c == ' ' || c == '\t');
3471
3472 // If we are expecting a structure element, avoid recognizing
3473 // keywords and other special names and return STRUCT_ELT, which is
3474 // a string that is also a valid identifier. But first, we have to
3475 // decide whether to insert a comma.
3476
3477 if (looking_at_indirect_ref)
3478 {
3479 do_comma_insert_check ();
3480
3481 maybe_unput_comma (spc_gobbled);
3482
3483 yylval.tok_val = new token (tok, input_line_number,
3484 current_input_column);
3485
3486 token_stack.push (yylval.tok_val);
3487
3488 quote_is_transpose = true;
3489 convert_spaces_to_comma = true;
3490 looking_for_object_index = true;
3491
3492 current_input_column += yyleng;
3493
3494 return STRUCT_ELT;
3495 }
3496
3497 at_beginning_of_statement = false;
3498
3499 // The is_keyword_token may reset
3500 // at_beginning_of_statement. For example, if it sees
3501 // an else token, then the next token is at the beginning of a
3502 // statement.
3503
3504 int kw_token = is_keyword_token (tok);
3505
3506 // If we found a keyword token, then the beginning_of_statement flag
3507 // is already set. Otherwise, we won't be at the beginning of a
3508 // statement.
3509
3510 if (looking_at_function_handle)
3511 {
3512 if (kw_token)
3513 {
3514 error ("function handles may not refer to keywords");
3515
3516 return LEXICAL_ERROR;
3517 }
3518 else
3519 {
3520 yylval.tok_val = new token (tok, input_line_number,
3521 current_input_column);
3522
3523 token_stack.push (yylval.tok_val);
3524
3525 current_input_column += yyleng;
3526 quote_is_transpose = false;
3527 convert_spaces_to_comma = true;
3528 looking_for_object_index = true;
3529
3530 return FCN_HANDLE;
3531 }
3532 }
3533
3534 // If we have a regular keyword, return it.
3535 // Keywords can be followed by identifiers.
3536
3537 if (kw_token)
3538 {
3539 if (kw_token >= 0)
3540 {
3541 current_input_column += yyleng;
3542 quote_is_transpose = false;
3543 convert_spaces_to_comma = true;
3544 looking_for_object_index = false;
3545 }
3546
3547 return kw_token;
3548 }
3549
3550 // See if we have a plot keyword (title, using, with, or clear).
3551
3552 int c1 = text_yyinput ();
3553
3554 bool next_tok_is_eq = false;
3555 if (c1 == '=')
3556 {
3557 int c2 = text_yyinput ();
3558 xunput (c2, yytext);
3559
3560 if (c2 != '=')
3561 next_tok_is_eq = true;
3562 }
3563
3564 xunput (c1, yytext);
3565
3566 // Kluge alert.
3567 //
3568 // If we are looking at a text style function, set up to gobble its
3569 // arguments.
3570 //
3571 // If the following token is '=', or if we are parsing a function
3572 // return list or function parameter list, or if we are looking at
3573 // something like [ab,cd] = foo (), force the symbol to be inserted
3574 // as a variable in the current symbol table.
3575
3576 if (! is_variable (tok))
3577 {
3578 if (at_bos && spc_gobbled && can_be_command (tok)
3579 && looks_like_command_arg ())
3580 {
3581 BEGIN (COMMAND_START);
3582 }
3583 else if (next_tok_is_eq
3584 || looking_at_decl_list
3585 || looking_at_return_list
3586 || (looking_at_parameter_list
3587 && ! looking_at_initializer_expression))
3588 {
3589 symbol_table::force_variable (tok);
3590 }
3591 else if (looking_at_matrix_or_assign_lhs)
3592 {
3593 pending_local_variables.insert (tok);
3594 }
3595 }
3596
3597 // Find the token in the symbol table. Beware the magic
3598 // transformation of the end keyword...
3599
3600 if (tok == "end")
3601 tok = "__end__";
3602
3603 yylval.tok_val = new token (&(symbol_table::insert (tok)),
3604 input_line_number,
3605 current_input_column);
3606
3607 token_stack.push (yylval.tok_val);
3608
3609 // After seeing an identifer, it is ok to convert spaces to a comma
3610 // (if needed).
3611
3612 convert_spaces_to_comma = true;
3613
3614 if (! (next_tok_is_eq || YY_START == COMMAND_START))
3615 {
3616 quote_is_transpose = true;
3617
3618 do_comma_insert_check ();
3619
3620 maybe_unput_comma (spc_gobbled);
3621 }
3622
3623 current_input_column += yyleng;
3624
3625 if (tok != "__end__")
3626 looking_for_object_index = true;
3627
3628 return NAME;
3629 }
3630
3631 void
3632 lexical_feedback::maybe_warn_separator_insert (char sep)
3633 {
3634 std::string nm = curr_fcn_file_full_name;
3635
3636 if (nm.empty ())
3637 warning_with_id ("Octave:separator-insert",
3638 "potential auto-insertion of '%c' near line %d",
3639 sep, input_line_number);
3640 else
3641 warning_with_id ("Octave:separator-insert",
3642 "potential auto-insertion of '%c' near line %d of file %s",
3643 sep, input_line_number, nm.c_str ());
3644 }
3645
3646 void
3647 lexical_feedback::gripe_single_quote_string (void)
3648 {
3649 std::string nm = curr_fcn_file_full_name;
3650
3651 if (nm.empty ())
3652 warning_with_id ("Octave:single-quote-string",
3653 "single quote delimited string near line %d",
3654 input_line_number);
3655 else
3656 warning_with_id ("Octave:single-quote-string",
3657 "single quote delimited string near line %d of file %s",
3658 input_line_number, nm.c_str ());
3659 }
3660
3661 void
3662 lexical_feedback::gripe_matlab_incompatible (const std::string& msg)
3663 {
3664 std::string nm = curr_fcn_file_full_name;
3665
3666 if (nm.empty ())
3667 warning_with_id ("Octave:matlab-incompatible",
3668 "potential Matlab compatibility problem: %s",
3669 msg.c_str ());
3670 else
3671 warning_with_id ("Octave:matlab-incompatible",
3672 "potential Matlab compatibility problem: %s near line %d offile %s",
3673 msg.c_str (), input_line_number, nm.c_str ());
3674 }
3675
3676 void
3677 lexical_feedback::maybe_gripe_matlab_incompatible_comment (char c)
3678 {
3679 if (c == '#')
3680 gripe_matlab_incompatible ("# used as comment character");
3681 }
3682
3683 void
3684 lexical_feedback::gripe_matlab_incompatible_continuation (void)
3685 {
3686 gripe_matlab_incompatible ("\\ used as line continuation marker");
3687 }
3688
3689 void
3690 lexical_feedback::gripe_matlab_incompatible_operator (const std::string& op)
3691 {
3692 std::string t = op;
3693 int n = t.length ();
3694 if (t[n-1] == '\n')
3695 t.resize (n-1);
3696 gripe_matlab_incompatible (t + " used as operator");
3697 }