# HG changeset patch # User John W. Eaton # Date 1363026389 14400 # Node ID b45a90cdb0aeb72e5f917a8a85e78b2e5a032f22 # Parent 0b5ab09dfce46f625fc3e639b3ea596941f3b0a8 3/10 commits reworking the lexer diff -r 0b5ab09dfce4 -r b45a90cdb0ae libinterp/parse-tree/lex.ll --- a/libinterp/parse-tree/lex.ll Mon Mar 11 14:18:39 2013 -0400 +++ b/libinterp/parse-tree/lex.ll Mon Mar 11 14:26:29 2013 -0400 @@ -149,14 +149,9 @@ D [0-9] S [ \t] NL ((\n)|(\r)|(\r\n)) -SNL ({S}|{NL}) -EL (\.\.\.) -BS (\\) -CONT ({EL}|{BS}) +CONT ((\.\.\.)|(\\)) Im [iIjJ] CCHAR [#%] -COMMENT ({CCHAR}.*{NL}) -SNLCMT ({SNL}|{COMMENT}) IDENT ([_$a-zA-Z][_$a-zA-Z0-9]*) EXPON ([DdEe][+-]?{D}+) NUMBER (({D}+\.?{D}*{EXPON}?)|(\.{D}+{EXPON}?)|(0[xX][0-9a-fA-F]+)) @@ -249,7 +244,7 @@ int tok = curr_lexer->previous_token_value (); - if (! (tok == ';' || tok == '[' || tok == '{')) + if (! (tok == ',' || tok == ';' || tok == '[' || tok == '{')) curr_lexer->xunput (','); } @@ -595,9 +590,9 @@ // Superclass method identifiers. %} -{IDENT}@{IDENT}{S}* | -{IDENT}@{IDENT}.{IDENT}{S}* { - curr_lexer->lexer_debug ("{IDENT}@{IDENT}{S}*|{IDENT}@{IDENT}.{IDENT}{S}*"); +{IDENT}@{IDENT} | +{IDENT}@{IDENT}.{IDENT} { + curr_lexer->lexer_debug ("{IDENT}@{IDENT}|{IDENT}@{IDENT}.{IDENT}"); int id_tok = curr_lexer->handle_superclass_identifier (); @@ -613,9 +608,9 @@ // Metaclass query %} -\?{IDENT}{S}* | -\?{IDENT}\.{IDENT}{S}* { - curr_lexer->lexer_debug ("\\?{IDENT}{S}*|\\?{IDENT}\\.{IDENT}{S}*"); +\?{IDENT} | +\?{IDENT}\.{IDENT} { + curr_lexer->lexer_debug ("\\?{IDENT}|\\?{IDENT}\\.{IDENT}"); int id_tok = curr_lexer->handle_meta_identifier (); @@ -678,8 +673,6 @@ "'" { curr_lexer->lexer_debug ("'"); - curr_lexer->current_input_column++; - int tok = curr_lexer->previous_token_value (); bool transpose = false; @@ -691,6 +684,7 @@ if (tok == '[' || tok == '{' || curr_lexer->previous_token_is_binop ()) { + curr_lexer->current_input_column++; int retval = curr_lexer->handle_string ('\''); return curr_lexer->count_token_internal (retval); } @@ -705,6 +699,7 @@ if (tok == ',' || tok == ';' || curr_lexer->previous_token_is_binop ()) { + curr_lexer->current_input_column++; int retval = curr_lexer->handle_string ('\''); return curr_lexer->count_token_internal (retval); } @@ -719,6 +714,7 @@ return curr_lexer->count_token (QUOTE); else { + curr_lexer->current_input_column++; int retval = curr_lexer->handle_string ('\''); return curr_lexer->count_token_internal (retval); } @@ -732,11 +728,41 @@ \" { curr_lexer->lexer_debug ("\""); - curr_lexer->current_input_column++; - int tok = curr_lexer->handle_string ('"'); - - return curr_lexer->count_token_internal (tok); -} + int tok = curr_lexer->previous_token_value (); + + bool transpose = false; + + if (curr_lexer->whitespace_is_significant ()) + { + if (curr_lexer->space_follows_previous_token ()) + { + if (tok == '[' || tok == '{' + || curr_lexer->previous_token_is_binop ()) + { + curr_lexer->current_input_column++; + int retval = curr_lexer->handle_string ('"'); + return curr_lexer->count_token_internal (retval); + } + else + { + yyless (0); + curr_lexer->xunput (','); + } + } + else + { + curr_lexer->current_input_column++; + int retval = curr_lexer->handle_string ('"'); + return curr_lexer->count_token_internal (retval); + } + } + else + { + curr_lexer->current_input_column++; + int retval = curr_lexer->handle_string ('"'); + return curr_lexer->count_token_internal (retval); + } + } %{ // Other operators. @@ -2782,10 +2808,6 @@ std::string tok = yytxt; - int c = yytxt[flex_yyleng()-1]; - - bool spc_gobbled = false; - // If we are expecting a structure element, avoid recognizing // keywords and other special names and return STRUCT_ELT, which is // a string that is also a valid identifier. But first, we have to