# HG changeset patch # User John W. Eaton # Date 1363026657 14400 # Node ID 15f55df088e74334ee5c2dd117905146c9b1854c # Parent 71ee3afedb694ab9dd886960bf2f7cc4b9c5a379 6/10 commits reworking the lexer diff -r 71ee3afedb69 -r 15f55df088e7 libinterp/parse-tree/lex.h --- a/libinterp/parse-tree/lex.h Mon Mar 11 14:29:19 2013 -0400 +++ b/libinterp/parse-tree/lex.h Mon Mar 11 14:30:57 2013 -0400 @@ -274,6 +274,8 @@ bool previous_token_is_binop (void) const; + bool previous_token_is_keyword (void) const; + bool previous_token_may_be_command (void) const; // true means that we have encountered eof on the input stream. diff -r 71ee3afedb69 -r 15f55df088e7 libinterp/parse-tree/lex.ll --- a/libinterp/parse-tree/lex.ll Mon Mar 11 14:29:19 2013 -0400 +++ b/libinterp/parse-tree/lex.ll Mon Mar 11 14:30:57 2013 -0400 @@ -242,10 +242,15 @@ {NL} { curr_lexer->lexer_debug ("{NL}"); - int tok = curr_lexer->previous_token_value (); - - if (! (tok == ';' || tok == '[' || tok == '{')) - curr_lexer->xunput (';'); + if (curr_lexer->nesting_level.is_paren ()) + curr_lexer->gripe_matlab_incompatible ("bare newline inside parentheses"); + else + { + int tok = curr_lexer->previous_token_value (); + + if (! (tok == ';' || tok == '[' || tok == '{')) + curr_lexer->xunput (';'); + } } @ { @@ -308,7 +313,7 @@ { int tok = curr_lexer->previous_token_value (); - if (! (tok == ';' || tok == ',' || tok == '[' || tok == '{' + if (! (tok == '[' || tok == '{' || curr_lexer->previous_token_is_binop ())) unput_comma = true; } @@ -723,8 +728,9 @@ } else { - if (tok == ',' || tok == ';' || tok == '[' || tok == '{' - || curr_lexer->previous_token_is_binop ()) + if (tok == '[' || tok == '{' + || curr_lexer->previous_token_is_binop () + || curr_lexer->previous_token_is_keyword ()) { curr_lexer->current_input_column++; int retval = curr_lexer->handle_string ('\''); @@ -736,15 +742,16 @@ } else { - if (tok == NAME || tok == NUM || tok == IMAG_NUM - || tok == ')' || tok == ']' || tok == '}') - return curr_lexer->count_token (QUOTE); - else + if (! tok || tok == '[' || tok == '{' || tok == '(' + || curr_lexer->previous_token_is_binop () + || curr_lexer->previous_token_is_keyword ()) { curr_lexer->current_input_column++; int retval = curr_lexer->handle_string ('\''); return curr_lexer->count_token_internal (retval); } + else + return curr_lexer->count_token (QUOTE); } } @@ -763,7 +770,7 @@ { if (curr_lexer->space_follows_previous_token ()) { - if (tok == ',' || tok == ';' || tok == '[' || tok == '{' + if (tok == '[' || tok == '{' || curr_lexer->previous_token_is_binop ()) { curr_lexer->current_input_column++; @@ -813,7 +820,24 @@ "<" { return curr_lexer->handle_op ("<", EXPR_LT); } ">" { return curr_lexer->handle_op (">", EXPR_GT); } "*" { return curr_lexer->handle_op ("*", '*'); } -"/" { return curr_lexer->handle_op ("/", '/'); } + +"/" { + int prev_tok = curr_lexer->previous_token_value (); + bool space_before = curr_lexer->space_follows_previous_token (); + int c = curr_lexer->text_yyinput (); + curr_lexer->xunput (c); + bool space_after = (c == ' ' || c == '\t'); + + if (space_before && ! space_after + && curr_lexer->previous_token_may_be_command ()) + { + yyless (0); + curr_lexer->push_start_state (COMMAND_START); + } + else + return curr_lexer->handle_op ("/", '/'); + } + "\\" { return curr_lexer->handle_op ("\\", LEFTDIV); } "^" { return curr_lexer->handle_op ("^", POW); } "**" { return curr_lexer->handle_incompatible_op ("**", POW); } @@ -942,7 +966,7 @@ { int tok = curr_lexer->previous_token_value (); - if (! (tok == ';' || tok == ',' || tok == '[' || tok == '{' + if (! (tok == '[' || tok == '{' || curr_lexer->previous_token_is_binop ())) unput_comma = true; } @@ -1244,7 +1268,7 @@ { int tok = curr_lexer->previous_token_value (); - if (! (tok == ';' || tok == ',' || tok == '[' || tok == '{' + if (! (tok == '[' || tok == '{' || curr_lexer->previous_token_is_binop ())) unput_comma = true; } @@ -1750,6 +1774,13 @@ } bool +lexical_feedback::previous_token_is_keyword (void) const +{ + const token *tok = tokens.front (); + return tok ? tok->is_keyword () : false; +} + +bool lexical_feedback::previous_token_may_be_command (void) const { const token *tok = tokens.front (); @@ -2125,8 +2156,7 @@ break; case endenumeration_kw: - tok_val = new token (endenumeration_kw, token::enumeration_end, - l, c); + tok_val = new token (endenumeration_kw, token::enumeration_end, l, c); at_beginning_of_statement = true; break; @@ -2221,7 +2251,8 @@ if ((reading_fcn_file || reading_script_file || reading_classdef_file) && ! fcn_file_full_name.empty ()) - tok_val = new token (magic_file_kw, fcn_file_full_name, l, c); + tok_val = new token (magic_file_kw, true, + fcn_file_full_name, l, c); else tok_val = new token (magic_file_kw, "stdin", l, c); } @@ -2237,7 +2268,7 @@ } if (! tok_val) - tok_val = new token (kw->tok, l, c); + tok_val = new token (kw->tok, true, l, c); push_token (tok_val); @@ -3443,8 +3474,7 @@ bool space_after = (c == ' ' || c == '\t'); - if (! (prev_tok == ';' || prev_tok == ',' - || prev_tok == '[' || prev_tok == '{' + if (! (prev_tok == '[' || prev_tok == '{' || previous_token_is_binop () || ((tok == '+' || tok == '-') && space_after))) unput_comma = true; diff -r 71ee3afedb69 -r 15f55df088e7 libinterp/parse-tree/token.cc --- a/libinterp/parse-tree/token.cc Mon Mar 11 14:29:19 2013 -0400 +++ b/libinterp/parse-tree/token.cc Mon Mar 11 14:30:57 2013 -0400 @@ -42,6 +42,16 @@ type_tag = generic_token; } +token::token (int tv, bool is_kw, int l, int c) +{ + maybe_cmd = false; + tspc = false; + line_num = l; + column_num = c; + tok_val = tv; + type_tag = is_kw ? keyword_token : generic_token; +} + token::token (int tv, const std::string& s, int l, int c) { maybe_cmd = false; @@ -135,6 +145,12 @@ return num; } +token::token_type +token::ttype (void) const +{ + return type_tag; +} + token::end_tok_type token::ettype (void) const { diff -r 71ee3afedb69 -r 15f55df088e7 libinterp/parse-tree/token.h --- a/libinterp/parse-tree/token.h Mon Mar 11 14:29:19 2013 -0400 +++ b/libinterp/parse-tree/token.h Mon Mar 11 14:30:57 2013 -0400 @@ -35,6 +35,7 @@ enum token_type { generic_token, + keyword_token, string_token, double_token, ettype_token, @@ -62,6 +63,7 @@ }; token (int tv, int l = -1, int c = -1); + token (int tv, bool is_keyword, int l = -1, int c = -1); token (int tv, const std::string& s, int l = -1, int c = -1); token (int tv, double d, const std::string& s = std::string (), int l = -1, int c = -1); @@ -87,8 +89,14 @@ int line (void) const { return line_num; } int column (void) const { return column_num; } + bool is_keyword (void) const + { + return type_tag == keyword_token || type_tag == ettype_token; + } + std::string text (void) const; double number (void) const; + token_type ttype (void) const; end_tok_type ettype (void) const; symbol_table::symbol_record *sym_rec (void);