Mercurial > octave
changeset 27778:2f8559459314
store beginning and ending location info for tokens recognized by lexer
* lex.h, lex.ll: Attempt to store the beginning and ending file
positions for each token created by the lexer.
(lexical_feedback::m_tok_beg, lexical_feedback::m_tok_beg): New data members.
(lexical_feedback::m_beg_string): Delete variable. Replace all uses
with m_tok_beg.
(base_lexer::update_token_positions): New function. Use in rules to
set m_tok_beg and m_tok_end and to advance m_filepos.
(base_lexer::handle_unary_op,
base_lexer::handle_language_extension_unary_op): Delete. Replace uses
with direct calls to other handle_op functions.
* parse.h, oct-parse.yy (INPUT_FILE): Declare as tok_val token instead
of dummy_type.
(base_parser::bison_error): Eliminate default arguments. Provide
version that accepts filepos value.
(base_parser::make_anon_fcn_handle): Accept filepos position of @
token. Change caller.
(base_parser::make_end): Accept begin and end position for END token.
Change all uses.
author | John W. Eaton <jwe@octave.org> |
---|---|
date | Mon, 02 Dec 2019 23:43:27 -0600 |
parents | 3b6920ee4383 |
children | 1e6389ae4060 |
files | libinterp/parse-tree/lex.h libinterp/parse-tree/lex.ll libinterp/parse-tree/oct-parse.yy libinterp/parse-tree/parse.h |
diffstat | 4 files changed, 212 insertions(+), 168 deletions(-) [+] |
line wrap: on
line diff
--- a/libinterp/parse-tree/lex.h Thu Dec 05 22:01:02 2019 -0600 +++ b/libinterp/parse-tree/lex.h Mon Dec 02 23:43:27 2019 -0600 @@ -296,7 +296,8 @@ m_command_arg_paren_count (0), m_token_count (0), m_filepos (), - m_beg_string (), + m_tok_beg (), + m_tok_end (), m_string_text (), m_current_input_line (), m_comment_text (), @@ -464,8 +465,12 @@ // The current position in the file (line and column). filepos m_filepos; - // The position of the beginning of the current character string. - filepos m_beg_string; + // The positions of the beginning and end of the current token after + // calling update_token_positions. Also used apart from + // update_token_positions to handle the beginning and end of + // character strings. + filepos m_tok_beg; + filepos m_tok_end; // The current character string text. std::string m_string_text; @@ -639,6 +644,8 @@ void xunput (char c); + void update_token_positions (int tok_len); + bool looking_at_space (void); bool inside_any_object_index (void); @@ -737,10 +744,6 @@ bool maybe_unput_comma_before_unary_op (int tok); - int handle_unary_op (int tok, bool bos = false); - - int handle_language_extension_unary_op (int tok, bool bos = false); - int handle_assign_op (const char *pattern, int tok); int handle_language_extension_assign_op (const char *pattern, int tok);
--- a/libinterp/parse-tree/lex.ll Thu Dec 05 22:01:02 2019 -0600 +++ b/libinterp/parse-tree/lex.ll Mon Dec 02 23:43:27 2019 -0600 @@ -173,6 +173,7 @@ } \ else \ { \ + curr_lexer->update_token_positions (yyleng); \ return curr_lexer->handle_op_internal (TOK, false, COMPAT); \ } \ } \ @@ -191,8 +192,8 @@ } \ else \ { \ - return curr_lexer->handle_language_extension_op (PATTERN, TOK, \ - false); \ + curr_lexer->update_token_positions (yyleng); \ + return curr_lexer->handle_language_extension_op (PATTERN, TOK, false); \ } \ } \ while (0) @@ -211,26 +212,21 @@ } \ else \ { \ + curr_lexer->update_token_positions (yyleng); \ return curr_lexer->handle_op_internal (TOK, false, COMPAT); \ } \ } \ else \ { \ - int tok \ - = (COMPAT \ - ? curr_lexer->handle_unary_op (TOK) \ - : curr_lexer->handle_language_extension_unary_op (TOK)); \ - \ - if (tok < 0) \ + if (curr_lexer->maybe_unput_comma_before_unary_op (TOK)) \ { \ yyless (0); \ curr_lexer->xunput (','); \ - /* Adjust for comma that was not really in the input stream. */ \ - curr_lexer->m_filepos.decrement_column (); \ } \ else \ { \ - return tok; \ + curr_lexer->update_token_positions (yyleng); \ + return curr_lexer->handle_op_internal (TOK, false, COMPAT); \ } \ } \ } \ @@ -286,9 +282,12 @@ if (get_set) \ { \ yyless (3); \ + curr_lexer->m_filepos.increment_column (3); \ curr_lexer->m_maybe_classdef_get_set_method = false; \ } \ \ + curr_lexer->update_token_positions (yyleng); \ + \ int id_tok = curr_lexer->handle_identifier (); \ \ if (id_tok >= 0) \ @@ -377,7 +376,7 @@ if (! curr_lexer->m_string_text.empty ()) { yyless (0); - + curr_lexer->m_tok_end = curr_lexer->m_filepos; return curr_lexer->finish_command_arg (); } @@ -394,10 +393,12 @@ if (! curr_lexer->m_string_text.empty ()) { yyless (0); - + curr_lexer->m_tok_end = curr_lexer->m_filepos; return curr_lexer->finish_command_arg (); } + curr_lexer->update_token_positions (yyleng); + curr_lexer->m_filepos.next_line (); curr_lexer->m_looking_for_object_index = false; curr_lexer->m_at_beginning_of_statement = true; @@ -414,10 +415,12 @@ if (! curr_lexer->m_string_text.empty ()) { yyless (0); - + curr_lexer->m_tok_end = curr_lexer->m_filepos; return curr_lexer->finish_command_arg (); } + curr_lexer->update_token_positions (yyleng); + curr_lexer->m_looking_for_object_index = false; curr_lexer->m_at_beginning_of_statement = true; curr_lexer->pop_start_state (); @@ -425,9 +428,10 @@ return curr_lexer->handle_token (yytext[0]); } else - curr_lexer->m_string_text += yytext; - - curr_lexer->m_filepos.increment_column (yyleng); + { + curr_lexer->m_string_text += yytext; + curr_lexer->m_filepos.increment_column (yyleng); + } } %{ @@ -490,7 +494,7 @@ if (! curr_lexer->m_string_text.empty ()) { yyless (0); - + curr_lexer->m_tok_end = curr_lexer->m_filepos; return curr_lexer->finish_command_arg (); } } @@ -531,11 +535,7 @@ int tok = curr_lexer->previous_token_value (); if (! (tok == ';' || tok == '[' || tok == '{')) - { - curr_lexer->xunput (';'); - // Adjust for semicolon that was not really in the input stream. - curr_lexer->m_filepos.decrement_column (); - } + curr_lexer->xunput (';'); } } @@ -556,6 +556,7 @@ <MATRIX_START>\] { curr_lexer->lexer_debug ("<MATRIX_START>\\]"); + curr_lexer->update_token_positions (yyleng); return curr_lexer->handle_close_bracket (']'); } @@ -566,6 +567,7 @@ <MATRIX_START>\} { curr_lexer->lexer_debug ("<MATRIX_START>\\}*"); + curr_lexer->update_token_positions (yyleng); return curr_lexer->handle_close_bracket ('}'); } @@ -588,16 +590,15 @@ { yyless (0); curr_lexer->xunput (','); - // Adjust for comma that was not really in the input stream. - curr_lexer->m_filepos.decrement_column (); } else { + curr_lexer->update_token_positions (yyleng); + curr_lexer->m_nesting_level.bracket (); curr_lexer->m_looking_at_object_index.push_front (false); - curr_lexer->m_filepos.increment_column (yyleng); curr_lexer->m_looking_for_object_index = false; curr_lexer->m_at_beginning_of_statement = false; @@ -618,6 +619,8 @@ \] { curr_lexer->lexer_debug ("\\]"); + curr_lexer->update_token_positions (yyleng); + curr_lexer->m_nesting_level.remove (); curr_lexer->m_looking_at_object_index.pop_front (); @@ -831,6 +834,8 @@ <DQ_STRING_START>\" { curr_lexer->lexer_debug ("<DQ_STRING_START>\\\""); + // m_tok_beg was set when we started parsing the string. + curr_lexer->m_tok_end = curr_lexer->m_filepos; curr_lexer->m_filepos.increment_column (); curr_lexer->pop_start_state (); @@ -842,8 +847,8 @@ curr_lexer->push_token (new octave::token (DQ_STRING, curr_lexer->m_string_text, - curr_lexer->m_beg_string, - curr_lexer->m_filepos)); + curr_lexer->m_tok_beg, + curr_lexer->m_tok_end)); curr_lexer->m_string_text = ""; @@ -854,17 +859,18 @@ <DQ_STRING_START>\\[0-7]{1,3} { curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\[0-7]{1,3}"); - curr_lexer->m_filepos.increment_column (yyleng); + curr_lexer->update_token_positions (yyleng); unsigned int result; sscanf (yytext+1, "%o", &result); if (result > 0xff) { + // Use location of octal digits for error token. octave::token *tok = new octave::token (LEXICAL_ERROR, "invalid octal escape sequence in character string", - curr_lexer->m_filepos, curr_lexer->m_filepos); + curr_lexer->m_tok_beg, curr_lexer->m_tok_end); curr_lexer->push_token (tok); @@ -1002,6 +1008,7 @@ <DQ_STRING_START>{NL} { curr_lexer->lexer_debug ("<DQ_STRING_START>{NL}"); + // Use current file position for error token. octave::token *tok = new octave::token (LEXICAL_ERROR, "unterminated character string constant", @@ -1028,6 +1035,8 @@ <SQ_STRING_START>\' { curr_lexer->lexer_debug ("<SQ_STRING_START>\\'"); + // m_tok_beg was set when we started parsing the string. + curr_lexer->m_tok_end = curr_lexer->m_filepos; curr_lexer->m_filepos.increment_column (); curr_lexer->pop_start_state (); @@ -1039,8 +1048,8 @@ curr_lexer->push_token (new octave::token (SQ_STRING, curr_lexer->m_string_text, - curr_lexer->m_beg_string, - curr_lexer->m_filepos)); + curr_lexer->m_tok_beg, + curr_lexer->m_tok_end)); curr_lexer->m_string_text = ""; @@ -1058,6 +1067,7 @@ <SQ_STRING_START>{NL} { curr_lexer->lexer_debug ("<SQ_STRING_START>{NL}"); + // Use current file position for error token. octave::token *tok = new octave::token (LEXICAL_ERROR, "unterminated character string constant", @@ -1079,6 +1089,8 @@ curr_lexer->pop_start_state (); + curr_lexer->update_token_positions (yyleng); + int id_tok = curr_lexer->handle_fq_identifier (); if (id_tok >= 0) @@ -1137,8 +1149,6 @@ { yyless (0); unput (','); - // Adjust for comma that was not really in the input stream. - curr_lexer->m_filepos.decrement_column (); } else { @@ -1174,8 +1184,6 @@ { yyless (0); unput (','); - // Adjust for comma that was not really in the input stream. - curr_lexer->m_filepos.decrement_column (); } else { @@ -1278,11 +1286,16 @@ if (spc_pos != std::string::npos && spc_pos < at_or_dot_pos) { yyless (spc_pos); + curr_lexer->m_filepos.increment_column (spc_pos); + curr_lexer->update_token_positions (yyleng); + return curr_lexer->handle_identifier (); } } } + curr_lexer->update_token_positions (yyleng); + int id_tok = curr_lexer->handle_superclass_identifier (); if (id_tok >= 0) @@ -1310,6 +1323,8 @@ } else { + curr_lexer->update_token_positions (yyleng); + int id_tok = curr_lexer->handle_meta_identifier (); if (id_tok >= 0) @@ -1342,12 +1357,11 @@ { yyless (0); unput (','); - // Adjust for comma that was not really in the input stream. - curr_lexer->m_filepos.decrement_column (); } else { - curr_lexer->m_filepos.increment_column (yyleng); + curr_lexer->update_token_positions (yyleng); + curr_lexer->m_at_beginning_of_statement = false; std::string ident = yytext; @@ -1372,15 +1386,15 @@ if (kw_token) tok = new octave::token (LEXICAL_ERROR, "function handles may not refer to keywords", - curr_lexer->m_filepos, - curr_lexer->m_filepos); + curr_lexer->m_tok_beg, + curr_lexer->m_tok_end); else { curr_lexer->m_looking_for_object_index = true; tok = new octave::token (FCN_HANDLE, ident, - curr_lexer->m_filepos, - curr_lexer->m_filepos); + curr_lexer->m_tok_beg, + curr_lexer->m_tok_end); } curr_lexer->push_token (tok); @@ -1411,6 +1425,7 @@ else if (curr_lexer->m_nesting_level.none () || curr_lexer->m_nesting_level.is_anon_fcn_body ()) { + curr_lexer->update_token_positions (yyleng); curr_lexer->m_filepos.next_line (); curr_lexer->m_at_beginning_of_statement = true; @@ -1419,6 +1434,10 @@ } else if (curr_lexer->m_nesting_level.is_bracket_or_brace ()) { + curr_lexer->update_token_positions (yyleng); + curr_lexer->m_filepos.next_line (); + + // Use current file position for error token. octave::token *tok = new octave::token (LEXICAL_ERROR, "unexpected internal lexer error", @@ -1426,8 +1445,6 @@ curr_lexer->push_token (tok); - curr_lexer->m_filepos.next_line (); - return curr_lexer->count_token_internal (LEXICAL_ERROR); } } @@ -1470,8 +1487,6 @@ { yyless (0); curr_lexer->xunput (','); - // Adjust for comma that was not really in the input stream. - curr_lexer->m_filepos.decrement_column (); } } else @@ -1540,8 +1555,6 @@ { yyless (0); curr_lexer->xunput (','); - // Adjust for comma that was not really in the input stream. - curr_lexer->m_filepos.decrement_column (); } } else @@ -1625,6 +1638,8 @@ "(" { curr_lexer->lexer_debug ("("); + curr_lexer->update_token_positions (yyleng); + bool unput_comma = false; if (curr_lexer->whitespace_is_significant () @@ -1641,11 +1656,11 @@ { yyless (0); curr_lexer->xunput (','); - // Adjust for comma that was not really in the input stream. - curr_lexer->m_filepos.decrement_column (); } else { + curr_lexer->update_token_positions (yyleng); + // If we are looking for an object index, then push TRUE for // m_looking_at_object_index. Otherwise, just push whatever state // is current (so that we can pop it off the stack when we find @@ -1667,8 +1682,9 @@ ")" { curr_lexer->lexer_debug (")"); + curr_lexer->update_token_positions (yyleng); + curr_lexer->m_nesting_level.remove (); - curr_lexer->m_filepos.increment_column (); curr_lexer->m_looking_at_object_index.pop_front (); @@ -1695,6 +1711,8 @@ } else { + curr_lexer->update_token_positions (yyleng); + curr_lexer->m_looking_for_object_index = false; curr_lexer->m_at_beginning_of_statement = false; @@ -1752,8 +1770,6 @@ { yyless (0); curr_lexer->xunput (','); - // Adjust for comma that was not really in the input stream. - curr_lexer->m_filepos.decrement_column (); } else { @@ -1777,6 +1793,8 @@ "}" { curr_lexer->lexer_debug ("}"); + curr_lexer->update_token_positions (yyleng); + curr_lexer->m_looking_at_object_index.pop_front (); curr_lexer->m_looking_for_object_index = true; @@ -1810,6 +1828,7 @@ << octave::undo_string_escape (static_cast<char> (c)) << "' (ASCII " << c << ")"; + // Use current file position for error token. octave::token *tok = new octave::token (LEXICAL_ERROR, buf.str (), curr_lexer->m_filepos, curr_lexer->m_filepos); @@ -2168,7 +2187,8 @@ m_command_arg_paren_count = 0; m_token_count = 0; m_filepos = filepos (); - m_beg_string = filepos (); + m_tok_beg = filepos (); + m_tok_end = filepos (); m_string_text = ""; m_current_input_line = ""; m_comment_text = ""; @@ -2402,7 +2422,7 @@ void base_lexer::begin_string (int state) { - m_beg_string = m_filepos; + m_tok_beg = m_filepos; push_start_state (state); } @@ -2412,6 +2432,9 @@ { lexer_debug ("<<EOF>>"); + m_tok_beg = m_filepos; + m_tok_end = m_filepos; + if (m_block_comment_nesting_level != 0) { warning ("block comment open at end of input"); @@ -2496,7 +2519,19 @@ xunput (c, yytxt); } - bool + void + base_lexer::update_token_positions (int tok_len) + { + m_tok_beg = m_filepos; + m_tok_end = m_filepos; + + if (tok_len > 1) + m_tok_end.increment_column (tok_len - 1); + + m_filepos.increment_column (tok_len); + } + +bool base_lexer::looking_at_space (void) { int c = text_yyinput (); @@ -2544,9 +2579,9 @@ int base_lexer::make_keyword_token (const std::string& s) { - int len = s.length (); - - const octave_kw *kw = octave_kw_hash::in_word_set (s.c_str (), len); + int slen = s.length (); + + const octave_kw *kw = octave_kw_hash::in_word_set (s.c_str (), slen); if (! kw) return 0; @@ -2556,6 +2591,8 @@ // May be reset to true for some token types. m_at_beginning_of_statement = false; + update_token_positions (slen); + token *tok_val = nullptr; switch (kw->kw_id) @@ -2590,86 +2627,83 @@ return 0; } - tok_val = new token (end_kw, token::simple_end, m_filepos, - m_filepos); + tok_val = new token (end_kw, token::simple_end, m_tok_beg, m_tok_end); m_at_beginning_of_statement = true; break; case end_try_catch_kw: - tok_val = new token (end_try_catch_kw, token::try_catch_end, - m_filepos, m_filepos); + tok_val = new token (end_try_catch_kw, token::try_catch_end, m_tok_beg, + m_tok_end); m_at_beginning_of_statement = true; break; case end_unwind_protect_kw: tok_val = new token (end_unwind_protect_kw, - token::unwind_protect_end, m_filepos, - m_filepos); + token::unwind_protect_end, m_tok_beg, m_tok_end); m_at_beginning_of_statement = true; break; case endfor_kw: - tok_val = new token (endfor_kw, token::for_end, m_filepos, - m_filepos); + tok_val = new token (endfor_kw, token::for_end, m_tok_beg, m_tok_end); m_at_beginning_of_statement = true; break; case endfunction_kw: - tok_val = new token (endfunction_kw, token::function_end, - m_filepos, m_filepos); + tok_val = new token (endfunction_kw, token::function_end, m_tok_beg, + m_tok_end); m_at_beginning_of_statement = true; break; case endif_kw: - tok_val = new token (endif_kw, token::if_end, m_filepos, m_filepos); + tok_val = new token (endif_kw, token::if_end, m_tok_beg, m_tok_end); m_at_beginning_of_statement = true; break; case endparfor_kw: - tok_val = new token (endparfor_kw, token::parfor_end, m_filepos, - m_filepos); + tok_val = new token (endparfor_kw, token::parfor_end, m_tok_beg, + m_tok_end); m_at_beginning_of_statement = true; break; case endswitch_kw: - tok_val = new token (endswitch_kw, token::switch_end, m_filepos, - m_filepos); + tok_val = new token (endswitch_kw, token::switch_end, m_tok_beg, + m_tok_end); m_at_beginning_of_statement = true; break; case endwhile_kw: - tok_val = new token (endwhile_kw, token::while_end, m_filepos, - m_filepos); + tok_val = new token (endwhile_kw, token::while_end, m_tok_beg, + m_tok_end); m_at_beginning_of_statement = true; break; case endclassdef_kw: - tok_val = new token (endclassdef_kw, token::classdef_end, - m_filepos, m_filepos); + tok_val = new token (endclassdef_kw, token::classdef_end, m_tok_beg, + m_tok_end); m_at_beginning_of_statement = true; break; case endenumeration_kw: tok_val = new token (endenumeration_kw, token::enumeration_end, - m_filepos, m_filepos); + m_tok_beg, m_tok_end); m_at_beginning_of_statement = true; break; case endevents_kw: - tok_val = new token (endevents_kw, token::events_end, m_filepos, - m_filepos); + tok_val = new token (endevents_kw, token::events_end, m_tok_beg, + m_tok_end); m_at_beginning_of_statement = true; break; case endmethods_kw: - tok_val = new token (endmethods_kw, token::methods_end, m_filepos, - m_filepos); + tok_val = new token (endmethods_kw, token::methods_end, m_tok_beg, + m_tok_end); m_at_beginning_of_statement = true; break; case endproperties_kw: - tok_val = new token (endproperties_kw, token::properties_end, - m_filepos, m_filepos); + tok_val = new token (endproperties_kw, token::properties_end, m_tok_beg, + m_tok_end); m_at_beginning_of_statement = true; break; @@ -2748,7 +2782,9 @@ // or just reset the line number here? The goal is to // track line info for command-line functions relative // to the function keyword. - m_filepos.line (1); + + m_filepos = filepos (); + update_token_positions (slen); } break; @@ -2758,18 +2794,17 @@ || m_reading_classdef_file) && ! m_fcn_file_full_name.empty ()) tok_val = new token (magic_file_kw, m_fcn_file_full_name, - m_filepos, m_filepos); + m_tok_beg, m_tok_end); else - tok_val = new token (magic_file_kw, "stdin", m_filepos, - m_filepos); + tok_val = new token (magic_file_kw, "stdin", m_tok_beg, m_tok_end); } break; case magic_line_kw: { - int l = m_filepos.line (); + int l = m_tok_beg.line (); tok_val = new token (magic_line_kw, static_cast<double> (l), - "", m_filepos, m_filepos); + "", m_tok_beg, m_tok_end); } break; @@ -2778,7 +2813,7 @@ } if (! tok_val) - tok_val = new token (kw->tok, true, m_filepos, m_filepos); + tok_val = new token (kw->tok, true, m_tok_beg, m_tok_end); push_token (tok_val); @@ -2901,9 +2936,9 @@ m_looking_for_object_index = false; m_at_beginning_of_statement = false; - push_token (new token (NUM, value, yytxt, m_filepos, m_filepos)); - - m_filepos.increment_column (flex_yyleng ()); + update_token_positions (flex_yyleng ()); + + push_token (new token (NUM, value, yytxt, m_tok_beg, m_tok_end)); } void @@ -2992,8 +3027,6 @@ m_looking_for_object_index = true; m_at_beginning_of_statement = false; - m_filepos.increment_column (); - if (! m_nesting_level.none ()) { m_nesting_level.remove (); @@ -3040,19 +3073,22 @@ bool kw_token = (is_keyword_token (meth) || fq_identifier_contains_keyword (cls)); + // Token positions should have already been updated before this + // function is called. + if (kw_token) { token *tok = new token (LEXICAL_ERROR, "method, class, and package names may not be keywords", - m_filepos, m_filepos); + m_tok_beg, m_tok_end); push_token (tok); return count_token_internal (LEXICAL_ERROR); } - push_token (new token (SUPERCLASSREF, meth, cls, m_filepos, m_filepos)); + push_token (new token (SUPERCLASSREF, meth, cls, m_tok_beg, m_tok_end)); m_filepos.increment_column (flex_yyleng ()); @@ -3070,17 +3106,20 @@ // Eliminate leading '?' std::string cls = txt.substr (1); + // Token positions should have already been updated before this + // function is called. + if (fq_identifier_contains_keyword (cls)) { token *tok = new token (LEXICAL_ERROR, "class and package names may not be keywords", - m_filepos, m_filepos); + m_tok_beg, m_tok_end); push_token (tok); return count_token_internal (LEXICAL_ERROR); } - push_token (new token (METAQUERY, cls, m_filepos, m_filepos)); + push_token (new token (METAQUERY, cls, m_tok_beg, m_tok_end)); m_filepos.increment_column (flex_yyleng ()); @@ -3095,19 +3134,22 @@ txt.erase (std::remove_if (txt.begin (), txt.end (), is_space_or_tab), txt.end ()); + // Token positions should have already been updated before this + // function is called. + if (fq_identifier_contains_keyword (txt)) { token *tok = new token (LEXICAL_ERROR, "function, method, class, and package names may not be keywords", - m_filepos, m_filepos); + m_tok_beg, m_tok_end); push_token (tok); return count_token_internal (LEXICAL_ERROR); } - push_token (new token (FQ_IDENT, txt, m_filepos, m_filepos)); + push_token (new token (FQ_IDENT, txt, m_tok_beg, m_tok_end)); m_filepos.increment_column (flex_yyleng ()); @@ -3121,7 +3163,10 @@ int base_lexer::handle_identifier (void) { - std::string ident = flex_yytext (); + // Token positions should have already been updated before this + // function is called. + + std::string ident = flex_yytext (); // If we are expecting a structure element, avoid recognizing // keywords and other special names and return STRUCT_ELT, which is @@ -3129,12 +3174,10 @@ if (m_looking_at_indirect_ref) { - push_token (new token (STRUCT_ELT, ident, m_filepos, m_filepos)); + push_token (new token (STRUCT_ELT, ident, m_tok_beg, m_tok_end)); m_looking_for_object_index = true; - m_filepos.increment_column (flex_yyleng ()); - return STRUCT_ELT; } @@ -3150,10 +3193,7 @@ if (kw_token) { if (kw_token >= 0) - { - m_filepos.increment_column (flex_yyleng ()); - m_looking_for_object_index = false; - } + m_looking_for_object_index = false; // The call to make_keyword_token set m_at_beginning_of_statement. @@ -3166,7 +3206,7 @@ symbol_record sr = (scope ? scope.insert (ident) : symbol_record (ident)); - token *tok = new token (NAME, sr, m_filepos, m_filepos); + token *tok = new token (NAME, sr, m_tok_beg, m_tok_end); // The following symbols are handled specially so that things like // @@ -3187,8 +3227,6 @@ push_token (tok); - m_filepos.increment_column (flex_yyleng ()); - // The magic end index can't be indexed. if (ident != "end") @@ -3581,20 +3619,6 @@ } int - base_lexer::handle_unary_op (int tok, bool bos) - { - return maybe_unput_comma_before_unary_op (tok) - ? -1 : handle_op_internal (tok, bos, true); - } - - int - base_lexer::handle_language_extension_unary_op (int tok, bool bos) - { - return maybe_unput_comma_before_unary_op (tok) - ? -1 : handle_op_internal (tok, bos, false); - } - - int base_lexer::handle_op_internal (int tok, bool bos, bool compat) { if (! compat) @@ -3637,7 +3661,7 @@ { int tok = SQ_STRING; - token *tok_val = new token (tok, m_string_text, m_filepos, m_filepos); + token *tok_val = new token (tok, m_string_text, m_tok_beg, m_tok_end); m_string_text = ""; m_command_arg_paren_count = 0; @@ -3649,7 +3673,7 @@ base_lexer::handle_token (int tok, token *tok_val) { if (! tok_val) - tok_val = new token (tok, m_filepos, m_filepos); + tok_val = new token (tok, m_tok_beg, m_tok_end); push_token (tok_val); @@ -3661,7 +3685,7 @@ int base_lexer::count_token (int tok) { - token *tok_val = new token (tok, m_filepos, m_filepos); + token *tok_val = new token (tok, m_tok_beg, m_tok_end); push_token (tok_val);
--- a/libinterp/parse-tree/oct-parse.yy Thu Dec 05 22:01:02 2019 -0600 +++ b/libinterp/parse-tree/oct-parse.yy Mon Dec 02 23:43:27 2019 -0600 @@ -220,10 +220,10 @@ %token <tok_val> GET SET %token <tok_val> FCN %token <tok_val> LEXICAL_ERROR +%token <tok_val> END_OF_INPUT // Other tokens. -%token<dummy_type> END_OF_INPUT -%token<dummy_type> INPUT_FILE +%token <dummy_type> INPUT_FILE // %token VARARGIN VARARGOUT // Nonterminals we construct. @@ -390,6 +390,8 @@ } | simple_list END_OF_INPUT { + YYUSE ($2); + $$ = nullptr; lexer.m_end_of_input = true; parser.statement_list (std::shared_ptr<octave::tree_statement_list> ($1)); @@ -633,9 +635,7 @@ anon_fcn_handle : '@' param_list anon_fcn_begin expression { - YYUSE ($1); - - $$ = parser.make_anon_fcn_handle ($2, $4); + $$ = parser.make_anon_fcn_handle ($2, $4, $1->beg_pos ()); if (! $$) { // make_anon_fcn_handle deleted $2 and $4. @@ -1570,7 +1570,7 @@ { octave::tree_statement *end_of_script = parser.make_end ("endscript", true, - lexer.m_filepos.line (), lexer.m_filepos.column ()); + $4->beg_pos (), $4->end_pos ()); parser.make_script ($3, end_of_script); } @@ -1581,6 +1581,7 @@ { YYUSE ($2); YYUSE ($5); + YYUSE ($7); // Unused symbol table context. lexer.m_symtab_context.pop (); @@ -1641,7 +1642,7 @@ if (parser.end_token_ok ($1, octave::token::function_end)) $$ = parser.make_end ("endfunction", false, - $1->line (), $1->column ()); + $1->beg_pos (), $1->end_pos ()); else { parser.end_token_error ($1, octave::token::function_end); @@ -1679,7 +1680,7 @@ } $$ = parser.make_end ("endfunction", true, - lexer.m_filepos.line (), lexer.m_filepos.column ()); + $1->beg_pos (), $1->end_pos ()); } ; @@ -2439,7 +2440,7 @@ + "' command matched by '" + end_token_as_string (tok->ettype ()) + "'"); - bison_error (msg, tok->line (), tok->column ()); + bison_error (msg, tok->beg_pos ()); } // Check to see that end tokens are properly matched. @@ -2558,12 +2559,9 @@ tree_anon_fcn_handle * base_parser::make_anon_fcn_handle (tree_parameter_list *param_list, - tree_expression *expr) + tree_expression *expr, + const filepos& at_pos) { - // FIXME: need to get these from the location of the @ symbol. - int l = m_lexer.m_filepos.line (); - int c = m_lexer.m_filepos.column (); - // FIXME: We need to examine EXPR and issue an error if any // sub-expression contains an assignment, compound assignment, // increment, or decrement operator. @@ -2590,9 +2588,12 @@ fcn_scope.mark_static (); + int at_line = at_pos.line (); + int at_column = at_pos.column (); + tree_anon_fcn_handle *retval = new tree_anon_fcn_handle (param_list, expr, fcn_scope, - parent_scope, l, c); + parent_scope, at_line, at_column); std::ostringstream buf; @@ -2607,7 +2608,7 @@ buf << ": *terminal input*"; else if (m_lexer.input_from_eval_string ()) buf << ": *eval string*"; - buf << ": line: " << l << " column: " << c; + buf << ": line: " << at_line << " column: " << at_column; std::string scope_name = buf.str (); @@ -3356,7 +3357,8 @@ delete lhs; delete rhs; - bison_error ("computed multiple assignment not allowed", l, c); + bison_error ("computed multiple assignment not allowed", + eq_tok->beg_pos ()); return nullptr; } @@ -3376,7 +3378,8 @@ delete lhs; delete rhs; - bison_error ("invalid assignment to keyword \"" + kw + "\"", l, c); + bison_error ("invalid assignment to keyword \"" + kw + "\"", + eq_tok->beg_pos ()); return nullptr; } @@ -3397,7 +3400,7 @@ delete rhs; bison_error ("invalid assignment to keyword \"" + kw + "\"", - l, c); + eq_tok->beg_pos ()); return nullptr; } @@ -3632,8 +3635,12 @@ } tree_statement * - base_parser::make_end (const std::string& type, bool eof, int l, int c) + base_parser::make_end (const std::string& type, bool eof, + const filepos& beg_pos, const filepos& /*end_pos*/) { + int l = beg_pos.line (); + int c = beg_pos.column (); + return make_statement (new tree_no_op_command (type, eof, l, c)); } @@ -4517,11 +4524,20 @@ } void - base_parser::bison_error (const std::string& str, int l, int c) + base_parser::bison_error (const std::string& str) { - int err_line = l < 0 ? m_lexer.m_filepos.line () : l; - int err_col = c < 0 ? m_lexer.m_filepos.column () - 1 : c; - + bison_error (str, m_lexer.m_filepos); + } + + void + base_parser::bison_error (const std::string& str, const filepos& pos) + { + bison_error (str, pos.line (), pos.column ()); + } + + void + base_parser::bison_error (const std::string& str, int err_line, int err_col) + { std::ostringstream output_buf; if (m_lexer.m_reading_fcn_file || m_lexer.m_reading_script_file @@ -4541,8 +4557,6 @@ if (m_lexer.m_reading_fcn_file || m_lexer.m_reading_script_file || m_lexer.m_reading_classdef_file) curr_line = get_file_line (m_lexer.m_fcn_file_full_name, err_line); - else - curr_line = m_lexer.m_filepos.line (); if (! curr_line.empty ()) {
--- a/libinterp/parse-tree/parse.h Thu Dec 05 22:01:02 2019 -0600 +++ b/libinterp/parse-tree/parse.h Mon Dec 02 23:43:27 2019 -0600 @@ -216,7 +216,7 @@ // Build an anonymous function handle. tree_anon_fcn_handle * make_anon_fcn_handle (tree_parameter_list *param_list, - tree_expression * expr); + tree_expression * expr, const filepos& at_pos); // Build a colon expression. tree_expression * @@ -331,7 +331,8 @@ tree_statement_list *body, tree_statement *end_function); // Create a no-op statement for end_function. - tree_statement * make_end (const std::string& type, bool eof, int l, int c); + tree_statement * make_end (const std::string& type, bool eof, + const filepos& beg_pos, const filepos& end_pos); // Do most of the work for defining a function. octave_user_function * @@ -451,7 +452,9 @@ void disallow_command_syntax (void); // Generic error messages. - void bison_error (const std::string& s, int l = -1, int c = -1); + void bison_error (const std::string& s); + void bison_error (const std::string& s, const filepos& pos); + void bison_error (const std::string& s, int line, int column); friend octave_value parse_fcn_file (interpreter& interp, const std::string& full_file,