changeset 26708:606605d0cd31

allow spaces in FQ_IDENT, SUPERCLASSREF, and METAQUERY tokens in lexer This change is part of a fix for bug #47680. * lex.ll (FQIDENT): Allow spaces around '.' character. Allow spaces around '@' character when recognizing superclass method identifiers. Allow FQIDENT on either side of '@' character, not just after it. Allow Space between '?' and FQIDENT when recognizing metaclass query. (base_lexer::handle_superclass_identifier, base_lexer::handle_meta_identifier, base_lexer::handle_fq_identifier): Strip whitespace from text before creating tokens. (is_space_or_tab): New static function. * debug.cc: Update test for change in lexer. We now have an ambiguity with superclass references and things like "dbstop @ftp". WDMD?
author John W. Eaton <jwe@octave.org>
date Sat, 09 Feb 2019 15:14:24 +0000
parents f35db7d5b7a4
children 17e7d310def8
files libinterp/corefcn/debug.cc libinterp/parse-tree/lex.ll
diffstat 2 files changed, 33 insertions(+), 13 deletions(-) [+]
line wrap: on
line diff
--- a/libinterp/corefcn/debug.cc	Fri Feb 08 06:35:38 2019 +0000
+++ b/libinterp/corefcn/debug.cc	Sat Feb 09 15:14:24 2019 +0000
@@ -529,8 +529,8 @@
 %! endif
 %! unwind_protect
 %!   dbclear all;   # Clear out breakpoints before test
-%!   dbstop @ftp/dir;
-%!   dbstop @audioplayer/set 70;
+%!   dbstop ("@ftp/dir");
+%!   dbstop ("@audioplayer/set", "70");
 %!   dbstop quantile>__quantile__;
 %!   dbstop ls;
 %!   s = dbstatus;
--- a/libinterp/parse-tree/lex.ll	Fri Feb 08 06:35:38 2019 +0000
+++ b/libinterp/parse-tree/lex.ll	Sat Feb 09 15:14:24 2019 +0000
@@ -332,7 +332,7 @@
 Im      [iIjJ]
 CCHAR   [#%]
 IDENT   ([_$a-zA-Z][_$a-zA-Z0-9]*)
-FQIDENT ({IDENT}(\.{IDENT})*)
+FQIDENT ({IDENT}({S}*\.{S}*{IDENT})*)
 EXPON   ([DdEe][+-]?{D}{D_}*)
 NUMBIN  (0[bB][01_]+)
 NUMHEX  (0[xX][0-9a-fA-F][0-9a-fA-F_]*)
@@ -1079,7 +1079,7 @@
 %}
 
 <FQ_IDENT_START>{FQIDENT} {
-    curr_lexer->lexer_debug ("<FQ_IDENT_START>{FQIDENT}");
+    curr_lexer->lexer_debug ("<FQ_IDENT_START>{FQIDENT}{S}*");
     curr_lexer->pop_start_state ();
 
     int id_tok = curr_lexer->handle_fq_identifier ();
@@ -1099,6 +1099,8 @@
   }
 
 <FQ_IDENT_START>. {
+    // If input doesn't match FQIDENT, return char and go to previous
+    // start state.
     yyless (0);
     curr_lexer->pop_start_state ();
   }
@@ -1245,8 +1247,8 @@
 // Superclass method identifiers.
 %}
 
-{IDENT}@{FQIDENT} {
-    curr_lexer->lexer_debug ("{IDENT}@{FQIDENT}");
+{FQIDENT}{S}*@{S}*{FQIDENT} {
+    curr_lexer->lexer_debug ("{FQIDENT}{S}*@{S}*{FQIDENT}");
 
     if (curr_lexer->previous_token_may_be_command ())
       {
@@ -1270,8 +1272,8 @@
 // Metaclass query
 %}
 
-\?{FQIDENT} {
-    curr_lexer->lexer_debug ("\\?{FQIDENT}");
+\?{S}*{FQIDENT} {
+    curr_lexer->lexer_debug ("\\?{S}*{FQIDENT}");
 
     if (curr_lexer->previous_token_may_be_command ()
         &&  curr_lexer->space_follows_previous_token ())
@@ -1800,6 +1802,12 @@
   std::free (ptr);
 }
 
+static inline bool
+is_space_or_tab (char c)
+{
+  return c == ' ' || c == '\t';
+}
+
 static void
 display_character (char c)
 {
@@ -2967,6 +2975,9 @@
   {
     std::string txt = flex_yytext ();
 
+    txt.erase (std::remove_if (txt.begin (), txt.end (), is_space_or_tab),
+               txt.end ());
+
     size_t pos = txt.find ("@");
 
     std::string meth_or_obj = txt.substr (0, pos);
@@ -2998,7 +3009,13 @@
   int
   base_lexer::handle_meta_identifier (void)
   {
-    std::string cls = std::string(flex_yytext ()).substr (1);
+    std::string txt = flex_yytext ();
+
+    txt.erase (std::remove_if (txt.begin (), txt.end (), is_space_or_tab),
+               txt.end ());
+
+    // Eliminate leading '?'
+    std::string cls = txt.substr (1);
 
     if (fq_identifier_contains_keyword (cls))
       {
@@ -3021,9 +3038,12 @@
   int
   base_lexer::handle_fq_identifier (void)
   {
-    std::string fq_id = flex_yytext ();
-
-    if (fq_identifier_contains_keyword (fq_id))
+    std::string txt = flex_yytext ();
+
+    txt.erase (std::remove_if (txt.begin (), txt.end (), is_space_or_tab),
+               txt.end ());
+
+    if (fq_identifier_contains_keyword (txt))
       {
         token *tok
           = new token (LEXICAL_ERROR,
@@ -3035,7 +3055,7 @@
         return count_token_internal (LEXICAL_ERROR);
       }
 
-    push_token (new token (FQ_IDENT, fq_id, m_input_line_number,
+    push_token (new token (FQ_IDENT, txt, m_input_line_number,
                            m_current_input_column));
 
     m_current_input_column += flex_yyleng ();