# HG changeset patch # User John W. Eaton # Date 1575332428 21600 # Node ID 0ca7f17de0416cc703acf6b71d0db5b83297b6d2 # Parent dd93e1fdc7dbed214a57f9efb0245537426a5cf1 use file position object to track line and column info in lexer and parser * filepos.h: New file. * libinterp/parse-tree/module.mk: Update. * lex.h, lex.ll (lexical_feedback::m_filepos): New member variable for tracking file position. (lexical_feedback::m_input_line_number, lexical_feedback::m_current_input_column): Delete. Replace all uses with operations on m_filepos. diff -r dd93e1fdc7db -r 0ca7f17de041 libinterp/parse-tree/filepos.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libinterp/parse-tree/filepos.h Mon Dec 02 18:20:28 2019 -0600 @@ -0,0 +1,73 @@ +/* + +Copyright (C) 2019 John W. Eaton + +This file is part of Octave. + +Octave is free software: you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Octave is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#if ! defined (octave_filepos_h) +#define octave_filepos_h 1 + +#include "octave-config.h" + +namespace octave +{ + class filepos + { + public: + + filepos (int l = 1, int c = 1) : m_line (l), m_column (c) { } + + filepos (const filepos&) = default; + + filepos& operator = (const filepos&) = default; + + ~filepos (void) = default; + + void set (int l, int c) + { + m_line = l; + m_column = c; + } + + void line (int l) { m_line = l; } + void column (int c) { m_column = c; } + + int line (void) const { return m_line; } + int column (void) const { return m_column; } + + void increment_line (int val = 1) { m_line += val; } + void increment_column (int val = 1) { m_column += val; } + + void decrement_line (int val = 1) { m_line -= val; } + void decrement_column (int val = 1) { m_column -= val; } + + void next_line (void) + { + m_line++; + m_column = 1; + } + + private: + + int m_line; + int m_column; + }; +} + +#endif diff -r dd93e1fdc7db -r 0ca7f17de041 libinterp/parse-tree/lex.h --- a/libinterp/parse-tree/lex.h Mon Dec 02 15:23:01 2019 -0600 +++ b/libinterp/parse-tree/lex.h Mon Dec 02 18:20:28 2019 -0600 @@ -31,6 +31,7 @@ #include #include "comment-list.h" +#include "filepos.h" #include "input.h" #include "symscope.h" #include "token.h" @@ -286,8 +287,6 @@ m_reading_script_file (false), m_reading_classdef_file (false), m_buffer_function_text (false), - m_input_line_number (1), - m_current_input_column (1), m_bracketflag (0), m_braceflag (0), m_looping (0), @@ -296,6 +295,7 @@ m_block_comment_nesting_level (0), m_command_arg_paren_count (0), m_token_count (0), + m_filepos (), m_current_input_line (), m_comment_text (), m_help_text (), @@ -437,12 +437,6 @@ // parsing. bool m_buffer_function_text; - // the current input line number. - int m_input_line_number; - - // the column of the current token. - int m_current_input_column; - // square bracket level count. int m_bracketflag; @@ -468,6 +462,9 @@ // since the last reset. size_t m_token_count; + // The current position in the file (line and column). + filepos m_filepos; + // The current line of input. std::string m_current_input_line; diff -r dd93e1fdc7db -r 0ca7f17de041 libinterp/parse-tree/lex.ll --- a/libinterp/parse-tree/lex.ll Mon Dec 02 15:23:01 2019 -0600 +++ b/libinterp/parse-tree/lex.ll Mon Dec 02 18:20:28 2019 -0600 @@ -226,7 +226,7 @@ yyless (0); \ curr_lexer->xunput (','); \ /* Adjust for comma that was not really in the input stream. */ \ - curr_lexer->m_current_input_column--; \ + curr_lexer->m_filepos.decrement_column (); \ } \ else \ { \ @@ -245,8 +245,7 @@ #define HANDLE_STRING_CONTINUATION \ do \ { \ - curr_lexer->m_input_line_number++; \ - curr_lexer->m_current_input_column = 1; \ + curr_lexer->m_filepos.next_line (); \ \ if (curr_lexer->is_push_lexer ()) \ { \ @@ -411,8 +410,7 @@ COMMAND_ARG_FINISH; - curr_lexer->m_input_line_number++; - curr_lexer->m_current_input_column = 1; + curr_lexer->m_filepos.next_line (); curr_lexer->m_looking_for_object_index = false; curr_lexer->m_at_beginning_of_statement = true; curr_lexer->pop_start_state (); @@ -434,7 +432,7 @@ else curr_lexer->m_string_text += yytext; - curr_lexer->m_current_input_column += yyleng; + curr_lexer->m_filepos.increment_column (yyleng); } %{ @@ -448,7 +446,7 @@ curr_lexer->m_command_arg_paren_count += yyleng; curr_lexer->m_string_text += yytext; - curr_lexer->m_current_input_column += yyleng; + curr_lexer->m_filepos.increment_column (yyleng); } [\)\]\}]* { @@ -456,7 +454,7 @@ curr_lexer->m_command_arg_paren_count -= yyleng; curr_lexer->m_string_text += yytext; - curr_lexer->m_current_input_column += yyleng; + curr_lexer->m_filepos.increment_column (yyleng); } %{ @@ -480,7 +478,7 @@ else curr_lexer->m_string_text += yytext; - curr_lexer->m_current_input_column += yyleng; + curr_lexer->m_filepos.increment_column (yyleng); } %{ @@ -497,7 +495,7 @@ else curr_lexer->m_string_text += yytext; - curr_lexer->m_current_input_column += yyleng; + curr_lexer->m_filepos.increment_column (yyleng); } %{ @@ -508,13 +506,13 @@ curr_lexer->lexer_debug ("([\\.]|[^#% \\t\\r\\n\\.\\,\\;\\\"\\'\\(\\[\\{\\}\\]\\)]*"); curr_lexer->m_string_text += yytext; - curr_lexer->m_current_input_column += yyleng; + curr_lexer->m_filepos.increment_column (yyleng); } {S}* { curr_lexer->lexer_debug ("{S}*"); - curr_lexer->m_current_input_column += yyleng; + curr_lexer->m_filepos.increment_column (yyleng); curr_lexer->mark_previous_token_trailing_space (); } @@ -522,8 +520,7 @@ {NL} { curr_lexer->lexer_debug ("{NL}"); - curr_lexer->m_input_line_number++; - curr_lexer->m_current_input_column = 1; + curr_lexer->m_filepos.next_line (); if (curr_lexer->m_nesting_level.is_paren ()) curr_lexer->warn_language_extension ("bare newline inside parentheses"); @@ -535,7 +532,7 @@ { curr_lexer->xunput (';'); // Adjust for semicolon that was not really in the input stream. - curr_lexer->m_current_input_column--; + curr_lexer->m_filepos.decrement_column (); } } } @@ -590,7 +587,7 @@ yyless (0); curr_lexer->xunput (','); // Adjust for comma that was not really in the input stream. - curr_lexer->m_current_input_column--; + curr_lexer->m_filepos.decrement_column (); } else { @@ -598,7 +595,7 @@ curr_lexer->m_looking_at_object_index.push_front (false); - curr_lexer->m_current_input_column += yyleng; + curr_lexer->m_filepos.increment_column (yyleng); curr_lexer->m_looking_for_object_index = false; curr_lexer->m_at_beginning_of_statement = false; @@ -653,8 +650,7 @@ ^{S}*{CCHAR}\{{S}*{NL} { curr_lexer->lexer_debug ("^{S}*{CCHAR}\\{{S}*{NL}"); - curr_lexer->m_input_line_number++; - curr_lexer->m_current_input_column = 1; + curr_lexer->m_filepos.next_line (); if (curr_lexer->m_block_comment_nesting_level) curr_lexer->m_comment_text = "\n"; @@ -675,8 +671,7 @@ ^{S}*{CCHAR}\}{S}*{NL} { curr_lexer->lexer_debug ("^{S}*{CCHAR}\\}{S}*{NL}"); - curr_lexer->m_input_line_number++; - curr_lexer->m_current_input_column = 1; + curr_lexer->m_filepos.next_line (); if (curr_lexer->m_block_comment_nesting_level > 1) curr_lexer->m_comment_text = "\n"; @@ -696,8 +691,7 @@ {ANY_EXCEPT_NL}*{NL} { curr_lexer->lexer_debug ("{ANY_EXCEPT_NL}*{NL}"); - curr_lexer->m_input_line_number++; - curr_lexer->m_current_input_column = 1; + curr_lexer->m_filepos.next_line (); curr_lexer->m_comment_text += yytext; } @@ -762,10 +756,9 @@ curr_lexer->m_comment_text += &yytext[i]; - if (curr_lexer->m_current_input_column == 1) + if (curr_lexer->m_filepos.column () == 1) { - curr_lexer->m_input_line_number++; - curr_lexer->m_current_input_column = 1; + curr_lexer->m_filepos.next_line (); } else { @@ -787,7 +780,7 @@ // The next action should recognize a newline character and set // the input column back to 1, but we should try to keep the // input column location accurate anyway, so update here. - curr_lexer->m_current_input_column += yyleng; + curr_lexer->m_filepos.increment_column (yyleng); } } @@ -829,14 +822,14 @@ \"\" { curr_lexer->lexer_debug ("\\\"\\\""); - curr_lexer->m_current_input_column += yyleng; + curr_lexer->m_filepos.increment_column (yyleng); curr_lexer->m_string_text += '"'; } \" { curr_lexer->lexer_debug ("\\\""); - curr_lexer->m_current_input_column++; + curr_lexer->m_filepos.increment_column (); curr_lexer->pop_start_state (); @@ -845,10 +838,12 @@ curr_lexer->m_looking_for_object_index = true; curr_lexer->m_at_beginning_of_statement = false; + octave::filepos pos (curr_lexer->m_string_line, + curr_lexer->m_string_column); + curr_lexer->push_token (new octave::token (DQ_STRING, curr_lexer->m_string_text, - curr_lexer->m_string_line, - curr_lexer->m_string_column)); + pos)); curr_lexer->m_string_text = ""; @@ -859,7 +854,7 @@ \\[0-7]{1,3} { curr_lexer->lexer_debug ("\\\\[0-7]{1,3}"); - curr_lexer->m_current_input_column += yyleng; + curr_lexer->m_filepos.increment_column (yyleng); unsigned int result; sscanf (yytext+1, "%o", &result); @@ -869,8 +864,7 @@ octave::token *tok = new octave::token (LEXICAL_ERROR, "invalid octal escape sequence in character string", - curr_lexer->m_input_line_number, - curr_lexer->m_current_input_column); + curr_lexer->m_filepos); curr_lexer->push_token (tok); @@ -883,7 +877,7 @@ \\x[0-9a-fA-F]+ { curr_lexer->lexer_debug ("\\\\x[0-9a-fA-F]+"); - curr_lexer->m_current_input_column += yyleng; + curr_lexer->m_filepos.increment_column (yyleng); unsigned int result; sscanf (yytext+2, "%x", &result); @@ -898,49 +892,49 @@ "\\a" { curr_lexer->lexer_debug ("\"\\\\a\""); - curr_lexer->m_current_input_column += yyleng; + curr_lexer->m_filepos.increment_column (yyleng); curr_lexer->m_string_text += '\a'; } "\\b" { curr_lexer->lexer_debug ("\"\\\\b\""); - curr_lexer->m_current_input_column += yyleng; + curr_lexer->m_filepos.increment_column (yyleng); curr_lexer->m_string_text += '\b'; } "\\f" { curr_lexer->lexer_debug ("\"\\\\f\""); - curr_lexer->m_current_input_column += yyleng; + curr_lexer->m_filepos.increment_column (yyleng); curr_lexer->m_string_text += '\f'; } "\\n" { curr_lexer->lexer_debug ("\"\\\\n\""); - curr_lexer->m_current_input_column += yyleng; + curr_lexer->m_filepos.increment_column (yyleng); curr_lexer->m_string_text += '\n'; } "\\r" { curr_lexer->lexer_debug ("\"\\\\r\""); - curr_lexer->m_current_input_column += yyleng; + curr_lexer->m_filepos.increment_column (yyleng); curr_lexer->m_string_text += '\r'; } "\\t" { curr_lexer->lexer_debug ("\"\\\\t\""); - curr_lexer->m_current_input_column += yyleng; + curr_lexer->m_filepos.increment_column (yyleng); curr_lexer->m_string_text += '\t'; } "\\v" { curr_lexer->lexer_debug ("\"\\\\v\""); - curr_lexer->m_current_input_column += yyleng; + curr_lexer->m_filepos.increment_column (yyleng); curr_lexer->m_string_text += '\v'; } @@ -956,7 +950,7 @@ else warning_with_id ("Octave:deprecated-syntax", "%s; near line %d of file '%s'", msg, - curr_lexer->m_input_line_number, nm.c_str ()); + curr_lexer->m_filepos.line (), nm.c_str ()); HANDLE_STRING_CONTINUATION; } @@ -973,7 +967,7 @@ else warning_with_id ("Octave:deprecated-syntax", "%s; near line %d of file '%s'", msg, - curr_lexer->m_input_line_number, nm.c_str ()); + curr_lexer->m_filepos.line (), nm.c_str ()); HANDLE_STRING_CONTINUATION; } @@ -987,21 +981,21 @@ \\. { curr_lexer->lexer_debug ("\\\\."); - curr_lexer->m_current_input_column += yyleng; + curr_lexer->m_filepos.increment_column (yyleng); curr_lexer->m_string_text += yytext[1]; } \. { curr_lexer->lexer_debug ("\\."); - curr_lexer->m_current_input_column++; + curr_lexer->m_filepos.increment_column (); curr_lexer->m_string_text += yytext[0]; } [^\.\\\r\n\"]+ { curr_lexer->lexer_debug ("[^\\.\\\\\\r\\n\\\"]+"); - curr_lexer->m_current_input_column += yyleng; + curr_lexer->m_filepos.increment_column (yyleng); curr_lexer->m_string_text += yytext; } @@ -1011,13 +1005,11 @@ octave::token *tok = new octave::token (LEXICAL_ERROR, "unterminated character string constant", - curr_lexer->m_input_line_number, - curr_lexer->m_current_input_column); + curr_lexer->m_filepos); curr_lexer->push_token (tok); - curr_lexer->m_input_line_number++; - curr_lexer->m_current_input_column = 1; + curr_lexer->m_filepos.next_line (); return curr_lexer->count_token_internal (LEXICAL_ERROR); } @@ -1029,14 +1021,14 @@ \'\' { curr_lexer->lexer_debug ("\\'\\'"); - curr_lexer->m_current_input_column += yyleng; + curr_lexer->m_filepos.increment_column (yyleng); curr_lexer->m_string_text += '\''; } \' { curr_lexer->lexer_debug ("\\'"); - curr_lexer->m_current_input_column++; + curr_lexer->m_filepos.increment_column (); curr_lexer->pop_start_state (); @@ -1045,10 +1037,12 @@ curr_lexer->m_looking_for_object_index = true; curr_lexer->m_at_beginning_of_statement = false; + octave::filepos pos (curr_lexer->m_string_line, + curr_lexer->m_string_column); + curr_lexer->push_token (new octave::token (SQ_STRING, curr_lexer->m_string_text, - curr_lexer->m_string_line, - curr_lexer->m_string_column)); + pos)); curr_lexer->m_string_text = ""; @@ -1059,7 +1053,7 @@ [^\'\n\r]+ { curr_lexer->lexer_debug ("[^\\'\\n\\r]+"); - curr_lexer->m_current_input_column += yyleng; + curr_lexer->m_filepos.increment_column (yyleng); curr_lexer->m_string_text += yytext; } @@ -1069,13 +1063,11 @@ octave::token *tok = new octave::token (LEXICAL_ERROR, "unterminated character string constant", - curr_lexer->m_input_line_number, - curr_lexer->m_current_input_column); + curr_lexer->m_filepos); curr_lexer->push_token (tok); - curr_lexer->m_input_line_number++; - curr_lexer->m_current_input_column = 1; + curr_lexer->m_filepos.next_line (); return curr_lexer->count_token_internal (LEXICAL_ERROR); } @@ -1102,7 +1094,7 @@ {S}+ { curr_lexer->lexer_debug ("{S}+"); - curr_lexer->m_current_input_column += yyleng; + curr_lexer->m_filepos.increment_column (yyleng); curr_lexer->mark_previous_token_trailing_space (); } @@ -1110,8 +1102,7 @@ (\.\.\.){ANY_EXCEPT_NL}*{NL} { curr_lexer->lexer_debug ("(\\.\\.\\.){ANY_EXCEPT_NL}*{NL}"); - curr_lexer->m_input_line_number++; - curr_lexer->m_current_input_column = 1; + curr_lexer->m_filepos.next_line (); } {ANY_INCLUDING_NL} { @@ -1149,7 +1140,7 @@ yyless (0); unput (','); // Adjust for comma that was not really in the input stream. - curr_lexer->m_current_input_column--; + curr_lexer->m_filepos.decrement_column (); } else { @@ -1186,7 +1177,7 @@ yyless (0); unput (','); // Adjust for comma that was not really in the input stream. - curr_lexer->m_current_input_column--; + curr_lexer->m_filepos.decrement_column (); } else { @@ -1202,7 +1193,7 @@ %} {S}+ { - curr_lexer->m_current_input_column += yyleng; + curr_lexer->m_filepos.increment_column (yyleng); curr_lexer->mark_previous_token_trailing_space (); } @@ -1234,7 +1225,7 @@ else warning_with_id ("Octave:deprecated-syntax", "%s; near line %d of file '%s'", msg, - curr_lexer->m_input_line_number, nm.c_str ()); + curr_lexer->m_filepos.line (), nm.c_str ()); curr_lexer->handle_continuation (); } @@ -1354,11 +1345,11 @@ yyless (0); unput (','); // Adjust for comma that was not really in the input stream. - curr_lexer->m_current_input_column--; + curr_lexer->m_filepos.decrement_column (); } else { - curr_lexer->m_current_input_column += yyleng; + curr_lexer->m_filepos.increment_column (yyleng); curr_lexer->m_at_beginning_of_statement = false; std::string ident = yytext; @@ -1383,15 +1374,13 @@ if (kw_token) tok = new octave::token (LEXICAL_ERROR, "function handles may not refer to keywords", - curr_lexer->m_input_line_number, - curr_lexer->m_current_input_column); + curr_lexer->m_filepos); else { curr_lexer->m_looking_for_object_index = true; tok = new octave::token (FCN_HANDLE, ident, - curr_lexer->m_input_line_number, - curr_lexer->m_current_input_column); + curr_lexer->m_filepos); } curr_lexer->push_token (tok); @@ -1413,8 +1402,7 @@ if (curr_lexer->m_nesting_level.is_paren ()) { - curr_lexer->m_input_line_number++; - curr_lexer->m_current_input_column = 1; + curr_lexer->m_filepos.next_line (); curr_lexer->m_at_beginning_of_statement = false; curr_lexer->warn_language_extension @@ -1423,8 +1411,7 @@ else if (curr_lexer->m_nesting_level.none () || curr_lexer->m_nesting_level.is_anon_fcn_body ()) { - curr_lexer->m_input_line_number++; - curr_lexer->m_current_input_column = 1; + curr_lexer->m_filepos.next_line (); curr_lexer->m_at_beginning_of_statement = true; @@ -1435,13 +1422,11 @@ octave::token *tok = new octave::token (LEXICAL_ERROR, "unexpected internal lexer error", - curr_lexer->m_input_line_number, - curr_lexer->m_current_input_column); + curr_lexer->m_filepos); curr_lexer->push_token (tok); - curr_lexer->m_input_line_number++; - curr_lexer->m_current_input_column = 1; + curr_lexer->m_filepos.next_line (); return curr_lexer->count_token_internal (LEXICAL_ERROR); } @@ -1458,13 +1443,13 @@ if (curr_lexer->previous_token_may_be_command () && curr_lexer->space_follows_previous_token ()) { - curr_lexer->m_current_input_column++; + curr_lexer->m_filepos.increment_column (); curr_lexer->push_start_state (COMMAND_START); curr_lexer->begin_string (SQ_STRING_START); } else if (curr_lexer->m_at_beginning_of_statement) { - curr_lexer->m_current_input_column++; + curr_lexer->m_filepos.increment_column (); curr_lexer->begin_string (SQ_STRING_START); } else @@ -1478,7 +1463,7 @@ if (tok == '[' || tok == '{' || curr_lexer->previous_token_is_binop ()) { - curr_lexer->m_current_input_column++; + curr_lexer->m_filepos.increment_column (); curr_lexer->begin_string (SQ_STRING_START); } else @@ -1486,7 +1471,7 @@ yyless (0); curr_lexer->xunput (','); // Adjust for comma that was not really in the input stream. - curr_lexer->m_current_input_column--; + curr_lexer->m_filepos.decrement_column (); } } else @@ -1495,12 +1480,12 @@ || curr_lexer->previous_token_is_binop () || curr_lexer->previous_token_is_keyword ()) { - curr_lexer->m_current_input_column++; + curr_lexer->m_filepos.increment_column (); curr_lexer->begin_string (SQ_STRING_START); } else { - curr_lexer->m_current_input_column++; + curr_lexer->m_filepos.increment_column (); return curr_lexer->count_token (HERMITIAN); } } @@ -1511,12 +1496,12 @@ || curr_lexer->previous_token_is_binop () || curr_lexer->previous_token_is_keyword ()) { - curr_lexer->m_current_input_column++; + curr_lexer->m_filepos.increment_column (); curr_lexer->begin_string (SQ_STRING_START); } else { - curr_lexer->m_current_input_column++; + curr_lexer->m_filepos.increment_column (); return curr_lexer->count_token (HERMITIAN); } } @@ -1533,7 +1518,7 @@ if (curr_lexer->previous_token_may_be_command () && curr_lexer->space_follows_previous_token ()) { - curr_lexer->m_current_input_column++; + curr_lexer->m_filepos.increment_column (); curr_lexer->push_start_state (COMMAND_START); curr_lexer->begin_string (DQ_STRING_START); } @@ -1548,7 +1533,7 @@ if (tok == '[' || tok == '{' || curr_lexer->previous_token_is_binop ()) { - curr_lexer->m_current_input_column++; + curr_lexer->m_filepos.increment_column (); curr_lexer->begin_string (DQ_STRING_START); } else @@ -1556,18 +1541,18 @@ yyless (0); curr_lexer->xunput (','); // Adjust for comma that was not really in the input stream. - curr_lexer->m_current_input_column--; + curr_lexer->m_filepos.decrement_column (); } } else { - curr_lexer->m_current_input_column++; + curr_lexer->m_filepos.increment_column (); curr_lexer->begin_string (DQ_STRING_START); } } else { - curr_lexer->m_current_input_column++; + curr_lexer->m_filepos.increment_column (); curr_lexer->begin_string (DQ_STRING_START); } } @@ -1657,7 +1642,7 @@ yyless (0); curr_lexer->xunput (','); // Adjust for comma that was not really in the input stream. - curr_lexer->m_current_input_column--; + curr_lexer->m_filepos.decrement_column (); } else { @@ -1683,7 +1668,7 @@ curr_lexer->lexer_debug (")"); curr_lexer->m_nesting_level.remove (); - curr_lexer->m_current_input_column++; + curr_lexer->m_filepos.increment_column (); curr_lexer->m_looking_at_object_index.pop_front (); @@ -1768,7 +1753,7 @@ yyless (0); curr_lexer->xunput (','); // Adjust for comma that was not really in the input stream. - curr_lexer->m_current_input_column--; + curr_lexer->m_filepos.decrement_column (); } else { @@ -1777,7 +1762,7 @@ curr_lexer->m_looking_at_object_index.push_front (curr_lexer->m_looking_for_object_index); - curr_lexer->m_current_input_column += yyleng; + curr_lexer->m_filepos.increment_column (yyleng); curr_lexer->m_looking_for_object_index = false; curr_lexer->m_at_beginning_of_statement = false; @@ -1827,12 +1812,11 @@ octave::token *tok = new octave::token (LEXICAL_ERROR, buf.str (), - curr_lexer->m_input_line_number, - curr_lexer->m_current_input_column); + curr_lexer->m_filepos); curr_lexer->push_token (tok); - curr_lexer->m_current_input_column++; + curr_lexer->m_filepos.increment_column (); return curr_lexer->count_token_internal (LEXICAL_ERROR); } @@ -2175,8 +2159,6 @@ m_reading_script_file = false; m_reading_classdef_file = false; m_buffer_function_text = false; - m_input_line_number = 1; - m_current_input_column = 1; m_bracketflag = 0; m_braceflag = 0; m_looping = 0; @@ -2185,6 +2167,7 @@ m_block_comment_nesting_level = 0; m_command_arg_paren_count = 0; m_token_count = 0; + m_filepos = filepos (); m_current_input_line = ""; m_comment_text = ""; m_help_text = ""; @@ -2420,8 +2403,8 @@ void base_lexer::begin_string (int state) { - m_string_line = m_input_line_number; - m_string_column = m_current_input_column; + m_string_line = m_filepos.line (); + m_string_column = m_filepos.column (); push_start_state (state); } @@ -2438,7 +2421,7 @@ if ((m_reading_fcn_file || m_reading_script_file || m_reading_classdef_file) && ! m_fcn_file_name.empty ()) warning ("near line %d of file '%s.m'", - m_input_line_number, m_fcn_file_name.c_str ()); + m_filepos.line (), m_fcn_file_name.c_str ()); } return handle_token (END_OF_INPUT); @@ -2555,9 +2538,6 @@ int base_lexer::is_keyword_token (const std::string& s) { - int l = m_input_line_number; - int c = m_current_input_column; - int len = s.length (); const octave_kw *kw = octave_kw_hash::in_word_set (s.c_str (), len); @@ -2603,73 +2583,73 @@ return 0; } - tok_val = new token (end_kw, token::simple_end, l, c); + tok_val = new token (end_kw, token::simple_end, m_filepos); m_at_beginning_of_statement = true; break; case end_try_catch_kw: - tok_val = new token (end_try_catch_kw, token::try_catch_end, l, c); + tok_val = new token (end_try_catch_kw, token::try_catch_end, m_filepos); m_at_beginning_of_statement = true; break; case end_unwind_protect_kw: tok_val = new token (end_unwind_protect_kw, - token::unwind_protect_end, l, c); + token::unwind_protect_end, m_filepos); m_at_beginning_of_statement = true; break; case endfor_kw: - tok_val = new token (endfor_kw, token::for_end, l, c); + tok_val = new token (endfor_kw, token::for_end, m_filepos); m_at_beginning_of_statement = true; break; case endfunction_kw: - tok_val = new token (endfunction_kw, token::function_end, l, c); + tok_val = new token (endfunction_kw, token::function_end, m_filepos); m_at_beginning_of_statement = true; break; case endif_kw: - tok_val = new token (endif_kw, token::if_end, l, c); + tok_val = new token (endif_kw, token::if_end, m_filepos); m_at_beginning_of_statement = true; break; case endparfor_kw: - tok_val = new token (endparfor_kw, token::parfor_end, l, c); + tok_val = new token (endparfor_kw, token::parfor_end, m_filepos); m_at_beginning_of_statement = true; break; case endswitch_kw: - tok_val = new token (endswitch_kw, token::switch_end, l, c); + tok_val = new token (endswitch_kw, token::switch_end, m_filepos); m_at_beginning_of_statement = true; break; case endwhile_kw: - tok_val = new token (endwhile_kw, token::while_end, l, c); + tok_val = new token (endwhile_kw, token::while_end, m_filepos); m_at_beginning_of_statement = true; break; case endclassdef_kw: - tok_val = new token (endclassdef_kw, token::classdef_end, l, c); + tok_val = new token (endclassdef_kw, token::classdef_end, m_filepos); m_at_beginning_of_statement = true; break; case endenumeration_kw: - tok_val = new token (endenumeration_kw, token::enumeration_end, l, c); + tok_val = new token (endenumeration_kw, token::enumeration_end, m_filepos); m_at_beginning_of_statement = true; break; case endevents_kw: - tok_val = new token (endevents_kw, token::events_end, l, c); + tok_val = new token (endevents_kw, token::events_end, m_filepos); m_at_beginning_of_statement = true; break; case endmethods_kw: - tok_val = new token (endmethods_kw, token::methods_end, l, c); + tok_val = new token (endmethods_kw, token::methods_end, m_filepos); m_at_beginning_of_statement = true; break; case endproperties_kw: - tok_val = new token (endproperties_kw, token::properties_end, l, c); + tok_val = new token (endproperties_kw, token::properties_end, m_filepos); m_at_beginning_of_statement = true; break; @@ -2744,7 +2724,11 @@ m_buffer_function_text = true; m_function_text += (m_current_input_line + "\n"); - m_input_line_number = 1; + // FIXME: do we need to save and restore the file position + // or just reset the line number here? The goal is to + // track line info for command-line functions relative + // to the function keyword. + m_filepos.line (1); } break; @@ -2753,15 +2737,18 @@ if ((m_reading_fcn_file || m_reading_script_file || m_reading_classdef_file) && ! m_fcn_file_full_name.empty ()) - tok_val = new token (magic_file_kw, m_fcn_file_full_name, l, c); + tok_val = new token (magic_file_kw, m_fcn_file_full_name, m_filepos); else - tok_val = new token (magic_file_kw, "stdin", l, c); + tok_val = new token (magic_file_kw, "stdin", m_filepos); } break; case magic_line_kw: - tok_val = new token (magic_line_kw, static_cast (l), - "", l, c); + { + int l = m_filepos.line (); + tok_val = new token (magic_line_kw, static_cast (l), + "", m_filepos); + } break; default: @@ -2769,7 +2756,7 @@ } if (! tok_val) - tok_val = new token (kw->tok, true, l, c); + tok_val = new token (kw->tok, true, m_filepos); push_token (tok_val); @@ -2895,10 +2882,9 @@ m_looking_for_object_index = false; m_at_beginning_of_statement = false; - push_token (new token (NUM, value, yytxt, m_input_line_number, - m_current_input_column)); - - m_current_input_column += flex_yyleng (); + push_token (new token (NUM, value, yytxt, m_filepos)); + + m_filepos.increment_column (flex_yyleng ()); } void @@ -2957,8 +2943,7 @@ m_at_beginning_of_statement = saved_bos; } - m_input_line_number++; - m_current_input_column = 1; + m_filepos.next_line (); } void @@ -2988,7 +2973,7 @@ m_looking_for_object_index = true; m_at_beginning_of_statement = false; - m_current_input_column++; + m_filepos.increment_column (); if (! m_nesting_level.none ()) { @@ -3041,7 +3026,7 @@ token *tok = new token (LEXICAL_ERROR, "method, class, and package names may not be keywords", - m_input_line_number, m_current_input_column); + m_filepos); push_token (tok); @@ -3049,9 +3034,9 @@ } push_token (new token (SUPERCLASSREF, meth, cls, - m_input_line_number, m_current_input_column)); - - m_current_input_column += flex_yyleng (); + m_filepos)); + + m_filepos.increment_column (flex_yyleng ()); return SUPERCLASSREF; } @@ -3071,16 +3056,15 @@ { token *tok = new token (LEXICAL_ERROR, "class and package names may not be keywords", - m_input_line_number, m_current_input_column); + m_filepos); push_token (tok); return count_token_internal (LEXICAL_ERROR); } - push_token (new token (METAQUERY, cls, m_input_line_number, - m_current_input_column)); - - m_current_input_column += flex_yyleng (); + push_token (new token (METAQUERY, cls, m_filepos)); + + m_filepos.increment_column (flex_yyleng ()); return METAQUERY; } @@ -3098,17 +3082,16 @@ token *tok = new token (LEXICAL_ERROR, "function, method, class, and package names may not be keywords", - m_input_line_number, m_current_input_column); + m_filepos); push_token (tok); return count_token_internal (LEXICAL_ERROR); } - push_token (new token (FQ_IDENT, txt, m_input_line_number, - m_current_input_column)); - - m_current_input_column += flex_yyleng (); + push_token (new token (FQ_IDENT, txt, m_filepos)); + + m_filepos.increment_column (flex_yyleng ()); return FQ_IDENT; } @@ -3128,12 +3111,11 @@ if (m_looking_at_indirect_ref) { - push_token (new token (STRUCT_ELT, ident, m_input_line_number, - m_current_input_column)); + push_token (new token (STRUCT_ELT, ident, m_filepos)); m_looking_for_object_index = true; - m_current_input_column += flex_yyleng (); + m_filepos.increment_column (flex_yyleng ()); return STRUCT_ELT; } @@ -3151,7 +3133,7 @@ { if (kw_token >= 0) { - m_current_input_column += flex_yyleng (); + m_filepos.increment_column (flex_yyleng ()); m_looking_for_object_index = false; } @@ -3166,7 +3148,7 @@ symbol_record sr = (scope ? scope.insert (ident) : symbol_record (ident)); - token *tok = new token (NAME, sr, m_input_line_number, m_current_input_column); + token *tok = new token (NAME, sr, m_filepos); // The following symbols are handled specially so that things like // @@ -3187,7 +3169,7 @@ push_token (tok); - m_current_input_column += flex_yyleng (); + m_filepos.increment_column (flex_yyleng ()); // The magic end index can't be indexed. @@ -3207,11 +3189,11 @@ if (nm.empty ()) warning_with_id ("Octave:separator-insert", "potential auto-insertion of '%c' near line %d", - sep, m_input_line_number); + sep, m_filepos.line ()); else warning_with_id ("Octave:separator-insert", "potential auto-insertion of '%c' near line %d of file %s", - sep, m_input_line_number, nm.c_str ()); + sep, m_filepos.line (), nm.c_str ()); } void @@ -3222,11 +3204,11 @@ if (nm.empty ()) warning_with_id ("Octave:single-quote-string", "single quote delimited string near line %d", - m_input_line_number); + m_filepos.line ()); else warning_with_id ("Octave:single-quote-string", "single quote delimited string near line %d of file %s", - m_input_line_number, nm.c_str ()); + m_filepos.line (), nm.c_str ()); } void @@ -3241,7 +3223,7 @@ else warning_with_id ("Octave:language-extension", "Octave language extension used: %s near line %d offile %s", - msg.c_str (), m_input_line_number, nm.c_str ()); + msg.c_str (), m_filepos.line (), nm.c_str ()); } void @@ -3600,9 +3582,9 @@ if (! compat) warn_language_extension_operator (flex_yytext ()); - push_token (new token (tok, m_input_line_number, m_current_input_column)); - - m_current_input_column += flex_yyleng (); + push_token (new token (tok, m_filepos)); + + m_filepos.increment_column (flex_yyleng ()); m_looking_for_object_index = false; m_at_beginning_of_statement = bos; @@ -3631,8 +3613,7 @@ int base_lexer::handle_token (const std::string& name, int tok) { - token *tok_val = new token (tok, name, m_input_line_number, - m_current_input_column); + token *tok_val = new token (tok, name, m_filepos); return handle_token (tok, tok_val); } @@ -3641,11 +3622,11 @@ base_lexer::handle_token (int tok, token *tok_val) { if (! tok_val) - tok_val = new token (tok, m_input_line_number, m_current_input_column); + tok_val = new token (tok, m_filepos); push_token (tok_val); - m_current_input_column += flex_yyleng (); + m_filepos.increment_column (flex_yyleng ()); return count_token_internal (tok); } @@ -3653,7 +3634,7 @@ int base_lexer::count_token (int tok) { - token *tok_val = new token (tok, m_input_line_number, m_current_input_column); + token *tok_val = new token (tok, m_filepos); push_token (tok_val); diff -r dd93e1fdc7db -r 0ca7f17de041 libinterp/parse-tree/module.mk --- a/libinterp/parse-tree/module.mk Mon Dec 02 15:23:01 2019 -0600 +++ b/libinterp/parse-tree/module.mk Mon Dec 02 18:20:28 2019 -0600 @@ -2,6 +2,7 @@ %reldir%/anon-fcn-validator.h \ %reldir%/bp-table.h \ %reldir%/comment-list.h \ + %reldir%/filepos.h \ %reldir%/jit-ir.h \ %reldir%/jit-typeinfo.h \ %reldir%/jit-util.h \ diff -r dd93e1fdc7db -r 0ca7f17de041 libinterp/parse-tree/oct-parse.yy --- a/libinterp/parse-tree/oct-parse.yy Mon Dec 02 15:23:01 2019 -0600 +++ b/libinterp/parse-tree/oct-parse.yy Mon Dec 02 18:20:28 2019 -0600 @@ -1570,8 +1570,7 @@ { octave::tree_statement *end_of_script = parser.make_end ("endscript", true, - lexer.m_input_line_number, - lexer.m_current_input_column); + lexer.m_filepos.line (), lexer.m_filepos.column ()); parser.make_script ($3, end_of_script); } @@ -1680,8 +1679,7 @@ } $$ = parser.make_end ("endfunction", true, - lexer.m_input_line_number, - lexer.m_current_input_column); + lexer.m_filepos.line (), lexer.m_filepos.column ()); } ; @@ -2563,8 +2561,8 @@ tree_expression *expr) { // FIXME: need to get these from the location of the @ symbol. - int l = m_lexer.m_input_line_number; - int c = m_lexer.m_current_input_column; + int l = m_lexer.m_filepos.line (); + int c = m_lexer.m_filepos.column (); // FIXME: We need to examine EXPR and issue an error if any // sub-expression contains an assignment, compound assignment, @@ -4521,8 +4519,8 @@ void base_parser::bison_error (const std::string& str, int l, int c) { - int err_line = l < 0 ? m_lexer.m_input_line_number : l; - int err_col = c < 0 ? m_lexer.m_current_input_column - 1 : c; + int err_line = l < 0 ? m_lexer.m_filepos.line () : l; + int err_col = c < 0 ? m_lexer.m_filepos.column () - 1 : c; std::ostringstream output_buf; @@ -4544,7 +4542,7 @@ || m_lexer.m_reading_classdef_file) curr_line = get_file_line (m_lexer.m_fcn_file_full_name, err_line); else - curr_line = m_lexer.m_current_input_line; + curr_line = m_lexer.m_filepos.line (); if (! curr_line.empty ()) { diff -r dd93e1fdc7db -r 0ca7f17de041 libinterp/parse-tree/token.cc --- a/libinterp/parse-tree/token.cc Mon Dec 02 15:23:01 2019 -0600 +++ b/libinterp/parse-tree/token.cc Mon Dec 02 18:20:28 2019 -0600 @@ -31,51 +31,51 @@ namespace octave { - token::token (int tv, int l, int c) - : m_maybe_cmd (false), m_tspc (false), m_line_num (l), m_column_num (c), + token::token (int tv, const filepos& pos) + : m_maybe_cmd (false), m_tspc (false), m_pos (pos), m_tok_val (tv), m_type_tag (generic_token), m_tok_info (), m_orig_text () { } - token::token (int tv, bool is_kw, int l, int c) - : m_maybe_cmd (false), m_tspc (false), m_line_num (l), m_column_num (c), + token::token (int tv, bool is_kw, const filepos& pos) + : m_maybe_cmd (false), m_tspc (false), m_pos (pos), m_tok_val (tv), m_type_tag (is_kw ? keyword_token : generic_token), m_tok_info (), m_orig_text () { } - token::token (int tv, const char *s, int l, int c) - : m_maybe_cmd (false), m_tspc (false), m_line_num (l), m_column_num (c), + token::token (int tv, const char *s, const filepos& pos) + : m_maybe_cmd (false), m_tspc (false), m_pos (pos), m_tok_val (tv), m_type_tag (string_token), m_tok_info (s), m_orig_text () { } - token::token (int tv, const std::string& s, int l, int c) - : m_maybe_cmd (false), m_tspc (false), m_line_num (l), m_column_num (c), + token::token (int tv, const std::string& s, const filepos& pos) + : m_maybe_cmd (false), m_tspc (false), m_pos (pos), m_tok_val (tv), m_type_tag (string_token), m_tok_info (s), m_orig_text () { } - token::token (int tv, double d, const std::string& s, int l, int c) - : m_maybe_cmd (false), m_tspc (false), m_line_num (l), m_column_num (c), + token::token (int tv, double d, const std::string& s, const filepos& pos) + : m_maybe_cmd (false), m_tspc (false), m_pos (pos), m_tok_val (tv), m_type_tag (double_token), m_tok_info (d), m_orig_text (s) { } - token::token (int tv, end_tok_type t, int l, int c) - : m_maybe_cmd (false), m_tspc (false), m_line_num (l), m_column_num (c), + token::token (int tv, end_tok_type t, const filepos& pos) + : m_maybe_cmd (false), m_tspc (false), m_pos (pos), m_tok_val (tv), m_type_tag (ettype_token), m_tok_info (t), m_orig_text () { } - token::token (int tv, const symbol_record& sr, int l, int c) - : m_maybe_cmd (false), m_tspc (false), m_line_num (l), m_column_num (c), + token::token (int tv, const symbol_record& sr, const filepos& pos) + : m_maybe_cmd (false), m_tspc (false), m_pos (pos), m_tok_val (tv), m_type_tag (sym_rec_token), m_tok_info (sr), m_orig_text () { } token::token (int tv, const std::string& meth, const std::string& cls, - int l, int c) - : m_maybe_cmd (false), m_tspc (false), m_line_num (l), m_column_num (c), + const filepos& pos) + : m_maybe_cmd (false), m_tspc (false), m_pos (pos), m_tok_val (tv), m_type_tag (scls_name_token), m_tok_info (meth, cls), m_orig_text () { } diff -r dd93e1fdc7db -r 0ca7f17de041 libinterp/parse-tree/token.h --- a/libinterp/parse-tree/token.h Mon Dec 02 15:23:01 2019 -0600 +++ b/libinterp/parse-tree/token.h Mon Dec 02 18:20:28 2019 -0600 @@ -27,6 +27,7 @@ #include +#include "filepos.h" #include "symrec.h" namespace octave @@ -64,15 +65,15 @@ while_end, }; - token (int tv, int l, int c); - token (int tv, bool is_keyword, int l, int c); - token (int tv, const char *s, int l, int c); - token (int tv, const std::string& s, int l, int c); - token (int tv, double d, const std::string& s, int l, int c); - token (int tv, end_tok_type t, int l, int c); - token (int tv, const symbol_record& s, int l, int c); + token (int tv, const filepos& pos); + token (int tv, bool is_keyword, const filepos& pos); + token (int tv, const char *s, const filepos& pos); + token (int tv, const std::string& s, const filepos& pos); + token (int tv, double d, const std::string& s, const filepos& pos); + token (int tv, end_tok_type t, const filepos& pos); + token (int tv, const symbol_record& s, const filepos& pos); token (int tv, const std::string& mth, const std::string& cls, - int l, int c); + const filepos& pos); // No copying! @@ -91,8 +92,8 @@ int token_value (void) const { return m_tok_val; } bool token_value_is (int tv) const { return tv == m_tok_val; } - int line (void) const { return m_line_num; } - int column (void) const { return m_column_num; } + int line (void) const { return m_pos.line (); } + int column (void) const { return m_pos.column (); } bool iskeyword (void) const { @@ -128,9 +129,7 @@ bool m_tspc; - int m_line_num; - - int m_column_num; + filepos m_pos; int m_tok_val;