octave-nkf: libinterp/parse-tree/lex.ll comparison

comparison libinterp/parse-tree/lex.ll @ 16259:0b5ab09dfce4

2/10 commits reworking the lexer

author	John W. Eaton <jwe@octave.org>
date	Mon, 11 Mar 2013 14:18:39 -0400
parents	db7f07b22b9b
children	6c211b8cfbd9 b45a90cdb0ae

comparison

equal deleted inserted replaced

-:db7f07b22b9b
+:0b5ab09dfce4
 curr_lexer->lexer_debug ("<COMMAND_START>{NL}");
 curr_lexer->input_line_number++;
 curr_lexer->current_input_column = 1;
-curr_lexer->quote_is_transpose = false;
-curr_lexer->convert_spaces_to_comma = true;
 curr_lexer->looking_for_object_index = false;
 curr_lexer->at_beginning_of_statement = true;
 curr_lexer->pop_start_state ();
 %}
 <MATRIX_START>\] {
 curr_lexer->lexer_debug ("<MATRIX_START>\\]");
-curr_lexer->scan_for_comments (yytext);
-curr_lexer->fixup_column_count (yytext);
 curr_lexer->looking_at_object_index.pop_front ();
 curr_lexer->looking_for_object_index = true;
 curr_lexer->at_beginning_of_statement = false;
-int c = yytext[yyleng-1];
+int tok_to_return = curr_lexer->handle_close_bracket (']');
-bool cont_is_spc = (curr_lexer->eat_continuation () != octave_lexer::NO_WHITESPACE);
-bool spc_gobbled = (cont_is_spc || c == ' ' || c == '\t');
-int tok_to_return = curr_lexer->handle_close_bracket (spc_gobbled, ']');
 return curr_lexer->count_token (']');
 }
 %{
 %}
 <MATRIX_START>\} {
 curr_lexer->lexer_debug ("<MATRIX_START>\\}*");
-curr_lexer->scan_for_comments (yytext);
-curr_lexer->fixup_column_count (yytext);
 curr_lexer->looking_at_object_index.pop_front ();
 curr_lexer->looking_for_object_index = true;
 curr_lexer->at_beginning_of_statement = false;
-int c = yytext[yyleng-1];
+int tok_to_return = curr_lexer->handle_close_bracket ('}');
-bool cont_is_spc = (curr_lexer->eat_continuation () != octave_lexer::NO_WHITESPACE);
-bool spc_gobbled = (cont_is_spc || c == ' ' || c == '\t');
-int tok_to_return = curr_lexer->handle_close_bracket (spc_gobbled, '}');
 return curr_lexer->count_token ('}');
 }
 \[ {
 curr_lexer->nesting_level.bracket ();
 curr_lexer->looking_at_object_index.push_front (false);
 curr_lexer->current_input_column += yyleng;
-curr_lexer->quote_is_transpose = false;
-curr_lexer->convert_spaces_to_comma = true;
 curr_lexer->looking_for_object_index = false;
 curr_lexer->at_beginning_of_statement = false;
 if (curr_lexer->defining_func
 && ! curr_lexer->parsed_function_name.top ())
 <<EOF>> {
 return curr_lexer->handle_end_of_input ();
 }
 %{
-// Identifiers.  Truncate the token at the first space or tab but
+// Identifiers.
-// don't write directly on yytext.
 %}
 {IDENT} {
 curr_lexer->lexer_debug ("{IDENT}");
+int tok = curr_lexer->previous_token_value ();
 if (curr_lexer->whitespace_is_significant ()
 && curr_lexer->space_follows_previous_token ()
-&& ! curr_lexer->previous_token_is_binop ())
+&& ! (tok == '[' || tok == '{'
+|| curr_lexer->previous_token_is_binop ()))
 {
 yyless (0);
 unput (',');
 }
 else
 "@" {
 curr_lexer->lexer_debug ("@");
 curr_lexer->current_input_column++;
-curr_lexer->quote_is_transpose = false;
-curr_lexer->convert_spaces_to_comma = false;
 curr_lexer->looking_at_function_handle++;
 curr_lexer->looking_for_object_index = false;
 curr_lexer->at_beginning_of_statement = false;
 return curr_lexer->count_token ('@');
 {NL} {
 curr_lexer->lexer_debug ("{NL}");
 curr_lexer->input_line_number++;
 curr_lexer->current_input_column = 1;
-curr_lexer->quote_is_transpose = false;
-curr_lexer->convert_spaces_to_comma = true;
 if (curr_lexer->nesting_level.none ())
 {
 curr_lexer->at_beginning_of_statement = true;
 return curr_lexer->count_token ('\n');
 return curr_lexer->handle_op
 (",", ',', true, ! curr_lexer->looking_at_object_index.front ());
 }
 ".'" {
-curr_lexer->do_comma_insert_check ();
 return curr_lexer->handle_op (".'", TRANSPOSE, true, false);
 }
 "++" {
-curr_lexer->do_comma_insert_check ();
 return curr_lexer->handle_incompatible_op
 ("++", PLUS_PLUS, true, false, true);
 }
 "--" {
 ;
-curr_lexer->do_comma_insert_check ();
 return curr_lexer->handle_incompatible_op
 ("--", MINUS_MINUS, true, false, true);
 }
 "(" {
 curr_lexer->nesting_level.remove ();
 curr_lexer->current_input_column++;
 curr_lexer->looking_at_object_index.pop_front ();
-curr_lexer->quote_is_transpose = true;
-curr_lexer->convert_spaces_to_comma
-= (curr_lexer->nesting_level.is_bracket_or_brace ()
-&& ! curr_lexer->looking_at_anon_fcn_args);
 curr_lexer->looking_for_object_index = true;
 curr_lexer->at_beginning_of_statement = false;
 if (curr_lexer->looking_at_anon_fcn_args)
 curr_lexer->looking_at_anon_fcn_args = false;
-curr_lexer->do_comma_insert_check ();
 return curr_lexer->count_token (')');
 }
 "." {
 curr_lexer->looking_at_object_index.push_front
 (curr_lexer->looking_for_object_index);
 curr_lexer->current_input_column += yyleng;
-curr_lexer->quote_is_transpose = false;
-curr_lexer->convert_spaces_to_comma = true;
 curr_lexer->looking_for_object_index = false;
 curr_lexer->at_beginning_of_statement = false;
 curr_lexer->decrement_promptflag ();
-curr_lexer->eat_whitespace ();
 curr_lexer->braceflag++;
 curr_lexer->push_start_state (MATRIX_START);
 void
 lexical_feedback::reset (void)
 {
 end_of_input = false;
-convert_spaces_to_comma = true;
-do_comma_insert = false;
 at_beginning_of_statement = true;
 looking_at_anon_fcn_args = false;
 looking_at_return_list = false;
 looking_at_parameter_list = false;
 looking_at_decl_list = false;
 looking_for_object_index = false;
 looking_at_indirect_ref = false;
 parsing_class_method = false;
 maybe_classdef_get_set_method = false;
 parsing_classdef = false;
-quote_is_transpose = false;
 force_script = false;
 reading_fcn_file = false;
 reading_script_file = false;
 reading_classdef_file = false;
 input_line_number = 1;
 octave_lexer::flex_yyleng (void)
 {
 return yyget_leng (scanner);
 }
-// GAG.
-//
-// If we're reading a matrix and the next character is '[', make sure
-// that we insert a comma ahead of it.
-void
-octave_lexer::do_comma_insert_check (void)
-{
-bool spc_gobbled = (eat_continuation () != octave_lexer::NO_WHITESPACE);
-int c = text_yyinput ();
-xunput (c);
-if (spc_gobbled)
-xunput (' ');
-do_comma_insert = (! looking_at_object_index.front ()
-&& bracketflag && c == '[');
-}
 int
 octave_lexer::text_yyinput (void)
 {
 int c = yyinput (scanner);
 octave_lexer::xunput (char c)
 {
 char *yytxt = flex_yytext ();
 xunput (c, yytxt);
-}
-// If we read some newlines, we need figure out what column we're
-// really looking at.
-void
-octave_lexer::fixup_column_count (char *s)
-{
-char c;
-while ((c = *s++) != '\0')
-{
-if (c == '\n')
-{
-input_line_number++;
-current_input_column = 1;
-}
-else
-current_input_column++;
-}
 }
 bool
 octave_lexer::inside_any_object_index (void)
 {
 return (symbol_table::is_variable (name)
 || (pending_local_variables.find (name)
 != pending_local_variables.end ()));
 }
-// Recognize separators.  If the separator is a CRLF pair, it is
-// replaced by a single LF.
-bool
-octave_lexer::next_token_is_sep_op (void)
-{
-bool retval = false;
-int c = text_yyinput ();
-retval = match_any (c, ",;\n]");
-xunput (c);
-return retval;
-}
-// Try to determine if the next token should be treated as a postfix
-// unary operator.  This is ugly, but it seems to do the right thing.
-bool
-octave_lexer::next_token_is_postfix_unary_op (bool spc_prev)
-{
-bool un_op = false;
-int c0 = text_yyinput ();
-if (c0 == '\'' && ! spc_prev)
-{
-un_op = true;
-}
-else if (c0 == '.')
-{
-int c1 = text_yyinput ();
-un_op = (c1 == '\'');
-xunput (c1);
-}
-else if (c0 == '+')
-{
-int c1 = text_yyinput ();
-un_op = (c1 == '+');
-xunput (c1);
-}
-else if (c0 == '-')
-{
-int c1 = text_yyinput ();
-un_op = (c1 == '-');
-xunput (c1);
-}
-xunput (c0);
-return un_op;
-}
-// Try to determine if the next token should be treated as a binary
-// operator.
-//
-// This kluge exists because whitespace is not always ignored inside
-// the square brackets that are used to create matrix objects (though
-// spacing only really matters in the cases that can be interpreted
-// either as binary ops or prefix unary ops: currently just +, -).
-//
-// Note that a line continuation directly following a + or - operator
-// (e.g., the characters '[' 'a' ' ' '+' '\' LFD 'b' ']') will be
-// parsed as a binary operator.
-bool
-octave_lexer::next_token_is_bin_op (bool spc_prev)
-{
-bool bin_op = false;
-int c0 = text_yyinput ();
-switch (c0)
-{
-case '+':
-case '-':
-{
-int c1 = text_yyinput ();
-switch (c1)
-{
-case '+':
-case '-':
-// Unary ops, spacing doesn't matter.
-break;
-case '=':
-// Binary ops, spacing doesn't matter.
-bin_op = true;
-break;
-default:
-// Could be either, spacing matters.
-bin_op = looks_like_bin_op (spc_prev, c1);
-break;
-}
-xunput (c1);
-}
-break;
-case ':':
-case '/':
-case '\\':
-case '^':
-// Always a binary op (may also include /=, \=, and ^=).
-bin_op = true;
-break;
-// .+ .- ./ .\ .^ .* .**
-case '.':
-{
-int c1 = text_yyinput ();
-if (match_any (c1, "+-/\\^*"))
-// Always a binary op (may also include .+=, .-=, ./=, ...).
-bin_op = true;
-else if (! isdigit (c1) && c1 != ' ' && c1 != '\t' && c1 != '.')
-// A structure element reference is a binary op.
-bin_op = true;
-xunput (c1);
-}
-break;
-// = == & && | || * **
-case '=':
-case '&':
-case '|':
-case '*':
-// Always a binary op (may also include ==, &&, ||, **).
-bin_op = true;
-break;
-// < <= <> > >=
-case '<':
-case '>':
-// Always a binary op (may also include <=, <>, >=).
-bin_op = true;
-break;
-// ~= !=
-case '~':
-case '!':
-{
-int c1 = text_yyinput ();
-// ~ and ! can be unary ops, so require following =.
-if (c1 == '=')
-bin_op = true;
-xunput (c1);
-}
-break;
-default:
-break;
-}
-xunput (c0);
-return bin_op;
-}
-// FIXME -- we need to handle block comments here.
-void
-octave_lexer::scan_for_comments (const char *text)
-{
-std::string comment_buf;
-bool in_comment = false;
-bool beginning_of_comment = false;
-int len = strlen (text);
-int i = 0;
-while (i < len)
-{
-char c = text[i++];
-switch (c)
-{
-case '%':
-case '#':
-if (in_comment)
-{
-if (! beginning_of_comment)
-comment_buf += static_cast<char> (c);
-}
-else
-{
-maybe_gripe_matlab_incompatible_comment (c);
-in_comment = true;
-beginning_of_comment = true;
-}
-break;
-case '\n':
-if (in_comment)
-{
-comment_buf += static_cast<char> (c);
-octave_comment_buffer::append (comment_buf);
-comment_buf.resize (0);
-in_comment = false;
-beginning_of_comment = false;
-}
-break;
-default:
-if (in_comment)
-{
-comment_buf += static_cast<char> (c);
-beginning_of_comment = false;
-}
-break;
-}
-}
-if (! comment_buf.empty ())
-octave_comment_buffer::append (comment_buf);
-}
-// Discard whitespace, including comments and continuations.
-// FIXME -- we need to handle block comments here.
-int
-octave_lexer::eat_whitespace (void)
-{
-int retval = octave_lexer::NO_WHITESPACE;
-std::string comment_buf;
-bool in_comment = false;
-bool beginning_of_comment = false;
-int c = 0;
-while ((c = text_yyinput ()) != EOF)
-{
-current_input_column++;
-switch (c)
-{
-case ' ':
-case '\t':
-if (in_comment)
-{
-comment_buf += static_cast<char> (c);
-beginning_of_comment = false;
-}
-retval |= octave_lexer::SPACE_OR_TAB;
-break;
-case '\n':
-retval |= octave_lexer::NEWLINE;
-if (in_comment)
-{
-comment_buf += static_cast<char> (c);
-octave_comment_buffer::append (comment_buf);
-comment_buf.resize (0);
-in_comment = false;
-beginning_of_comment = false;
-}
-current_input_column = 0;
-break;
-case '#':
-case '%':
-if (in_comment)
-{
-if (! beginning_of_comment)
-comment_buf += static_cast<char> (c);
-}
-else
-{
-maybe_gripe_matlab_incompatible_comment (c);
-in_comment = true;
-beginning_of_comment = true;
-}
-break;
-case '.':
-if (in_comment)
-{
-comment_buf += static_cast<char> (c);
-beginning_of_comment = false;
-break;
-}
-else
-{
-if (have_ellipsis_continuation ())
-break;
-else
-goto done;
-}
-case '\\':
-if (in_comment)
-{
-comment_buf += static_cast<char> (c);
-beginning_of_comment = false;
-break;
-}
-else
-{
-if (have_continuation ())
-break;
-else
-goto done;
-}
-default:
-if (in_comment)
-{
-comment_buf += static_cast<char> (c);
-beginning_of_comment = false;
-break;
-}
-else
-goto done;
-}
-}
-if (! comment_buf.empty ())
-octave_comment_buffer::append (comment_buf);
-done:
-xunput (c);
-current_input_column--;
-return retval;
-}
 bool
 octave_lexer::whitespace_is_significant (void)
 {
 return (nesting_level.is_bracket ()
 || (nesting_level.is_brace ()
 // If yytext doesn't contain a valid number, we are in deep doo doo.
 assert (nread == 1);
-quote_is_transpose = true;
-convert_spaces_to_comma = true;
 looking_for_object_index = false;
 at_beginning_of_statement = false;
 push_token (new token (NUM, value, yytxt, input_line_number,
 current_input_column));
 current_input_column += flex_yyleng ();
-do_comma_insert_check ();
 }
 void
 octave_lexer::handle_continuation (void)
 {
 octave_comment_buffer::append (comment_text, typ);
 comment_text = "";
-quote_is_transpose = false;
-convert_spaces_to_comma = true;
 at_beginning_of_statement = true;
 if (! looking_at_continuation)
 xunput ('\n');
 }
 }
 else
 xunput (c1);
 return false;
-}
-// See if we have a continuation line.  If so, eat it and the leading
-// whitespace on the next line.
-int
-octave_lexer::eat_continuation (void)
-{
-int retval = octave_lexer::NO_WHITESPACE;
-int c = text_yyinput ();
-if ((c == '.' && have_ellipsis_continuation ())
-|| (c == '\\' && have_continuation ()))
-retval = eat_whitespace ();
-else
-xunput (c);
-return retval;
 }
 int
 octave_lexer::handle_string (char delim)
 {
 if (delim == '\'')
 s = buf.str ();
 else
 s = do_string_escapes (buf.str ());
-quote_is_transpose = true;
-convert_spaces_to_comma = true;
 if (delim == '"')
 gripe_matlab_incompatible ("\" used as string delimiter");
 else if (delim == '\'')
 gripe_single_quote_string ();
 }
 return LEXICAL_ERROR;
 }
-bool
-octave_lexer::next_token_is_assign_op (void)
-{
-bool retval = false;
-int c0 = text_yyinput ();
-switch (c0)
-{
-case '=':
-{
-int c1 = text_yyinput ();
-xunput (c1);
-if (c1 != '=')
-retval = true;
-}
-break;
-case '+':
-case '-':
-case '*':
-case '/':
-case '\\':
-case '&':
-case '|':
-{
-int c1 = text_yyinput ();
-xunput (c1);
-if (c1 == '=')
-retval = true;
-}
-break;
-case '.':
-{
-int c1 = text_yyinput ();
-if (match_any (c1, "+-*/\\"))
-{
-int c2 = text_yyinput ();
-xunput (c2);
-if (c2 == '=')
-retval = true;
-}
-xunput (c1);
-}
-break;
-case '>':
-{
-int c1 = text_yyinput ();
-if (c1 == '>')
-{
-int c2 = text_yyinput ();
-xunput (c2);
-if (c2 == '=')
-retval = true;
-}
-xunput (c1);
-}
-break;
-case '<':
-{
-int c1 = text_yyinput ();
-if (c1 == '<')
-{
-int c2 = text_yyinput ();
-xunput (c2);
-if (c2 == '=')
-retval = true;
-}
-xunput (c1);
-}
-break;
-default:
-break;
-}
-xunput (c0);
-return retval;
-}
-bool
-octave_lexer::next_token_is_index_op (void)
-{
-int c = text_yyinput ();
-xunput (c);
-return c == '(' || c == '{';
-}
 int
-octave_lexer::handle_close_bracket (bool spc_gobbled, int bracket_type)
+octave_lexer::handle_close_bracket (int bracket_type)
 {
 int retval = bracket_type;
 if (! nesting_level.none ())
 {
 panic_impossible ();
 }
 pop_start_state ();
-quote_is_transpose = true;
-convert_spaces_to_comma = true;
 return retval;
-}
-void
-octave_lexer::maybe_unput_comma (int spc_gobbled)
-{
-if (nesting_level.is_bracket ()
-|| (nesting_level.is_brace ()
-&& ! looking_at_object_index.front ()))
-{
-int bin_op = next_token_is_bin_op (spc_gobbled);
-int postfix_un_op = next_token_is_postfix_unary_op (spc_gobbled);
-int c1 = text_yyinput ();
-int c2 = text_yyinput ();
-xunput (c2);
-xunput (c1);
-int sep_op = next_token_is_sep_op ();
-int dot_op = (c1 == '.'
-&& (isalpha (c2) || isspace (c2) || c2 == '_'));
-if (postfix_un_op || bin_op || sep_op || dot_op)
-return;
-int index_op = (c1 == '(' || c1 == '{');
-// If there is no space before the indexing op, we don't insert
-// a comma.
-if (index_op && ! spc_gobbled)
-return;
-maybe_warn_separator_insert (',');
-xunput (',');
-}
 }
 bool
 octave_lexer::next_token_can_follow_bin_op (void)
 {
 }
 int
 octave_lexer::handle_superclass_identifier (void)
 {
-eat_continuation ();
 std::string pkg;
 char *yytxt = flex_yytext ();
 std::string meth = strip_trailing_whitespace (yytxt);
 size_t pos = meth.find ("@");
 std::string cls = meth.substr (pos).substr (1);
 meth.empty () ? 0 : &(symbol_table::insert (meth)),
 cls.empty () ? 0 : &(symbol_table::insert (cls)),
 pkg.empty () ? 0 : &(symbol_table::insert (pkg)),
 input_line_number, current_input_column));
-convert_spaces_to_comma = true;
 current_input_column += flex_yyleng ();
 return SUPERCLASSREF;
 }
 int
 octave_lexer::handle_meta_identifier (void)
 {
-eat_continuation ();
 std::string pkg;
 char *yytxt = flex_yytext ();
 std::string cls = strip_trailing_whitespace (yytxt).substr (1);
 size_t pos = cls.find (".");
 push_token (new token (METAQUERY,
 cls.empty () ? 0 : &(symbol_table::insert (cls)),
 pkg.empty () ? 0 : &(symbol_table::insert (pkg)),
 input_line_number, current_input_column));
-convert_spaces_to_comma = true;
 current_input_column += flex_yyleng ();
 return METAQUERY;
 }
 // a string that is also a valid identifier.  But first, we have to
 // decide whether to insert a comma.
 if (looking_at_indirect_ref)
 {
-//      do_comma_insert_check ();
-//      maybe_unput_comma (spc_gobbled);
 push_token (new token (STRUCT_ELT, tok, input_line_number,
 current_input_column));
-quote_is_transpose = true;
-convert_spaces_to_comma = true;
 looking_for_object_index = true;
 current_input_column += flex_yyleng ();
 at_beginning_of_statement = false;
 {
 push_token (new token (FCN_HANDLE, tok, input_line_number,
 current_input_column));
 current_input_column += flex_yyleng ();
-quote_is_transpose = false;
-convert_spaces_to_comma = true;
 looking_for_object_index = true;
 at_beginning_of_statement = false;
 return FCN_HANDLE;
 if (kw_token)
 {
 if (kw_token >= 0)
 {
 current_input_column += flex_yyleng ();
-quote_is_transpose = false;
-convert_spaces_to_comma = true;
 looking_for_object_index = false;
 }
 return kw_token;
 }
 gripe_matlab_incompatible_operator (flex_yytext ());
 push_token (new token (tok, input_line_number, current_input_column));
 current_input_column += flex_yyleng ();
-quote_is_transpose = qit;
-convert_spaces_to_comma = convert;
 looking_for_object_index = false;
 at_beginning_of_statement = bos;
 return count_token (tok);
 }
 tok_val = new token (tok, input_line_number, current_input_column);
 push_token (tok_val);
 current_input_column += flex_yyleng ();
-quote_is_transpose = false;
-convert_spaces_to_comma = true;
 return count_token_internal (tok);
 }
 int

Mercurial > octave-nkf

comparison libinterp/parse-tree/lex.ll @ 16259:0b5ab09dfce4