comparison libinterp/parse-tree/lex.ll @ 15195:2fc554ffbc28

split libinterp from src * libinterp: New directory. Move all files from src directory here except Makefile.am, main.cc, main-cli.cc, mkoctfile.in.cc, mkoctfilr.in.sh, octave-config.in.cc, octave-config.in.sh. * libinterp/Makefile.am: New file, extracted from src/Makefile.am. * src/Makefile.am: Delete everything except targets and definitions needed to build and link main and utility programs. * Makefile.am (SUBDIRS): Include libinterp in the list. * autogen.sh: Run config-module.sh in libinterp/dldfcn directory, not src/dldfcn directory. * configure.ac (AC_CONFIG_SRCDIR): Use libinterp/octave.cc, not src/octave.cc. (DL_LDFLAGS, LIBOCTINTERP): Use libinterp, not src. (AC_CONFIG_FILES): Include libinterp/Makefile in the list. * find-docstring-files.sh: Look in libinterp, not src. * gui/src/Makefile.am (liboctgui_la_CPPFLAGS): Find header files in libinterp, not src.
author John W. Eaton <jwe@octave.org>
date Sat, 18 Aug 2012 16:23:39 -0400
parents src/parse-tree/lex.ll@28f5f4a4a80a
children 947cf10c94da 049e8bbff782
comparison
equal deleted inserted replaced
15194:0f0b795044c3 15195:2fc554ffbc28
1 /*
2
3 Copyright (C) 1993-2012 John W. Eaton
4
5 This file is part of Octave.
6
7 Octave is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3 of the License, or (at your
10 option) any later version.
11
12 Octave is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Octave; see the file COPYING. If not, see
19 <http://www.gnu.org/licenses/>.
20
21 */
22
23 %option prefix = "octave_"
24
25 %top {
26 #ifdef HAVE_CONFIG_H
27 #include <config.h>
28 #endif
29
30 }
31
32 %s COMMAND_START
33 %s MATRIX_START
34
35 %x SCRIPT_FILE_BEGIN
36 %x FUNCTION_FILE_BEGIN
37
38 %{
39
40 #include <cctype>
41 #include <cstring>
42
43 #include <iostream>
44 #include <set>
45 #include <sstream>
46 #include <string>
47 #include <stack>
48
49 #include <sys/types.h>
50 #include <unistd.h>
51
52 #include "cmd-edit.h"
53 #include "quit.h"
54 #include "lo-mappers.h"
55
56 // These would be alphabetical, but y.tab.h must be included before
57 // oct-gperf.h and y.tab.h must be included after token.h and the tree
58 // class declarations. We can't include y.tab.h in oct-gperf.h
59 // because it may not be protected to allow it to be included multiple
60 // times.
61
62 #include "Cell.h"
63 #include "comment-list.h"
64 #include "defun.h"
65 #include "error.h"
66 #include "gripes.h"
67 #include "input.h"
68 #include "lex.h"
69 #include "ov.h"
70 #include "parse.h"
71 #include "parse-private.h"
72 #include "pt-all.h"
73 #include "symtab.h"
74 #include "token.h"
75 #include "toplev.h"
76 #include "utils.h"
77 #include "variables.h"
78 #include <oct-parse.h>
79 #include <oct-gperf.h>
80
81 #if defined (GNULIB_NAMESPACE)
82 // Calls to the following functions appear in the generated output from
83 // flex without the namespace tag. Redefine them so we will use them
84 // via the gnulib namespace.
85 #define fprintf GNULIB_NAMESPACE::fprintf
86 #define fwrite GNULIB_NAMESPACE::fwrite
87 #define isatty GNULIB_NAMESPACE::isatty
88 #define malloc GNULIB_NAMESPACE::malloc
89 #define realloc GNULIB_NAMESPACE::realloc
90 #endif
91
92 #if ! (defined (FLEX_SCANNER) \
93 && defined (YY_FLEX_MAJOR_VERSION) && YY_FLEX_MAJOR_VERSION >= 2 \
94 && defined (YY_FLEX_MINOR_VERSION) && YY_FLEX_MINOR_VERSION >= 5)
95 #error lex.l requires flex version 2.5.4 or later
96 #endif
97
98 #define yylval octave_lval
99
100 // Arrange to get input via readline.
101
102 #ifdef YY_INPUT
103 #undef YY_INPUT
104 #endif
105 #define YY_INPUT(buf, result, max_size) \
106 if ((result = octave_read (buf, max_size)) < 0) \
107 YY_FATAL_ERROR ("octave_read () in flex scanner failed");
108
109 // Try to avoid crashing out completely on fatal scanner errors.
110 // The call to yy_fatal_error should never happen, but it avoids a
111 // `static function defined but not used' warning from gcc.
112
113 #ifdef YY_FATAL_ERROR
114 #undef YY_FATAL_ERROR
115 #endif
116 #define YY_FATAL_ERROR(msg) \
117 do \
118 { \
119 error (msg); \
120 OCTAVE_QUIT; \
121 yy_fatal_error (msg); \
122 } \
123 while (0)
124
125 #define DISPLAY_TOK_AND_RETURN(tok) \
126 do \
127 { \
128 int tok_val = tok; \
129 if (Vdisplay_tokens) \
130 display_token (tok_val); \
131 if (lexer_debug_flag) \
132 { \
133 std::cerr << "R: "; \
134 display_token (tok_val); \
135 std::cerr << std::endl; \
136 } \
137 return tok_val; \
138 } \
139 while (0)
140
141 #define COUNT_TOK_AND_RETURN(tok) \
142 do \
143 { \
144 Vtoken_count++; \
145 DISPLAY_TOK_AND_RETURN (tok); \
146 } \
147 while (0)
148
149 #define TOK_RETURN(tok) \
150 do \
151 { \
152 current_input_column += yyleng; \
153 lexer_flags.quote_is_transpose = false; \
154 lexer_flags.convert_spaces_to_comma = true; \
155 COUNT_TOK_AND_RETURN (tok); \
156 } \
157 while (0)
158
159 #define TOK_PUSH_AND_RETURN(name, tok) \
160 do \
161 { \
162 yylval.tok_val = new token (name, input_line_number, \
163 current_input_column); \
164 token_stack.push (yylval.tok_val); \
165 TOK_RETURN (tok); \
166 } \
167 while (0)
168
169 #define BIN_OP_RETURN_INTERNAL(tok, convert, bos, qit) \
170 do \
171 { \
172 yylval.tok_val = new token (input_line_number, current_input_column); \
173 token_stack.push (yylval.tok_val); \
174 current_input_column += yyleng; \
175 lexer_flags.quote_is_transpose = qit; \
176 lexer_flags.convert_spaces_to_comma = convert; \
177 lexer_flags.looking_for_object_index = false; \
178 lexer_flags.at_beginning_of_statement = bos; \
179 COUNT_TOK_AND_RETURN (tok); \
180 } \
181 while (0)
182
183 #define XBIN_OP_RETURN_INTERNAL(tok, convert, bos, qit) \
184 do \
185 { \
186 gripe_matlab_incompatible_operator (yytext); \
187 BIN_OP_RETURN_INTERNAL (tok, convert, bos, qit); \
188 } \
189 while (0)
190
191 #define BIN_OP_RETURN(tok, convert, bos) \
192 do \
193 { \
194 BIN_OP_RETURN_INTERNAL (tok, convert, bos, false); \
195 } \
196 while (0)
197
198 #define XBIN_OP_RETURN(tok, convert, bos) \
199 do \
200 { \
201 gripe_matlab_incompatible_operator (yytext); \
202 BIN_OP_RETURN (tok, convert, bos); \
203 } \
204 while (0)
205
206 #define LEXER_DEBUG(pattern) \
207 do \
208 { \
209 if (lexer_debug_flag) \
210 lexer_debug (pattern, yytext); \
211 } \
212 while (0)
213
214 // TRUE means that we have encountered EOF on the input stream.
215 bool parser_end_of_input = false;
216
217 // Flags that need to be shared between the lexer and parser.
218 lexical_feedback lexer_flags;
219
220 // Stack to hold tokens so that we can delete them when the parser is
221 // reset and avoid growing forever just because we are stashing some
222 // information. This has to appear before lex.h is included, because
223 // one of the macros defined there uses token_stack.
224 //
225 // FIXME -- this should really be static, but that causes
226 // problems on some systems.
227 std::stack <token*> token_stack;
228
229 // Did eat_whitespace() eat a space or tab, or a newline, or both?
230
231 typedef int yum_yum;
232
233 const yum_yum ATE_NOTHING = 0;
234 const yum_yum ATE_SPACE_OR_TAB = 1;
235 const yum_yum ATE_NEWLINE = 2;
236
237 // Is the closest nesting level a square bracket, squiggly brace or a paren?
238
239 class bracket_brace_paren_nesting_level
240 {
241 public:
242
243 bracket_brace_paren_nesting_level (void) : context () { }
244
245 ~bracket_brace_paren_nesting_level (void) { }
246
247 void bracket (void) { context.push (BRACKET); }
248 bool is_bracket (void)
249 { return ! context.empty () && context.top () == BRACKET; }
250
251 void brace (void) { context.push (BRACE); }
252 bool is_brace (void)
253 { return ! context.empty () && context.top () == BRACE; }
254
255 void paren (void) { context.push (PAREN); }
256 bool is_paren (void)
257 { return ! context.empty () && context.top () == PAREN; }
258
259 bool is_bracket_or_brace (void)
260 { return (! context.empty ()
261 && (context.top () == BRACKET || context.top () == BRACE)); }
262
263 bool none (void) { return context.empty (); }
264
265 void remove (void) { if (! context.empty ()) context.pop (); }
266
267 void clear (void) { while (! context.empty ()) context.pop (); }
268
269 private:
270
271 std::stack<int> context;
272
273 static const int BRACKET;
274 static const int BRACE;
275 static const int PAREN;
276
277 bracket_brace_paren_nesting_level (const bracket_brace_paren_nesting_level&);
278
279 bracket_brace_paren_nesting_level&
280 operator = (const bracket_brace_paren_nesting_level&);
281 };
282
283 const int bracket_brace_paren_nesting_level::BRACKET = 1;
284 const int bracket_brace_paren_nesting_level::BRACE = 2;
285 const int bracket_brace_paren_nesting_level::PAREN = 3;
286
287 static bracket_brace_paren_nesting_level nesting_level;
288
289 static bool Vdisplay_tokens = false;
290
291 static unsigned int Vtoken_count = 0;
292
293 // The start state that was in effect when the beginning of a block
294 // comment was noticed.
295 static int block_comment_nesting_level = 0;
296
297 // Internal variable for lexer debugging state.
298 static bool lexer_debug_flag = false;
299
300 // Forward declarations for functions defined at the bottom of this
301 // file.
302
303 static int text_yyinput (void);
304 static void xunput (char c, char *buf);
305 static void fixup_column_count (char *s);
306 static void do_comma_insert_check (void);
307 static int is_keyword_token (const std::string& s);
308 static int process_comment (bool start_in_block, bool& eof);
309 static bool match_any (char c, const char *s);
310 static bool next_token_is_sep_op (void);
311 static bool next_token_is_bin_op (bool spc_prev);
312 static bool next_token_is_postfix_unary_op (bool spc_prev);
313 static std::string strip_trailing_whitespace (char *s);
314 static void handle_number (void);
315 static int handle_string (char delim);
316 static int handle_close_bracket (bool spc_gobbled, int bracket_type);
317 static int handle_superclass_identifier (void);
318 static int handle_meta_identifier (void);
319 static int handle_identifier (void);
320 static bool have_continuation (bool trailing_comments_ok = true);
321 static bool have_ellipsis_continuation (bool trailing_comments_ok = true);
322 static void scan_for_comments (const char *);
323 static yum_yum eat_whitespace (void);
324 static yum_yum eat_continuation (void);
325 static void maybe_warn_separator_insert (char sep);
326 static void gripe_single_quote_string (void);
327 static void gripe_matlab_incompatible (const std::string& msg);
328 static void maybe_gripe_matlab_incompatible_comment (char c);
329 static void gripe_matlab_incompatible_continuation (void);
330 static void gripe_matlab_incompatible_operator (const std::string& op);
331 static void display_token (int tok);
332 static void lexer_debug (const char *pattern, const char *text);
333
334 %}
335
336 D [0-9]
337 S [ \t]
338 NL ((\n)|(\r)|(\r\n))
339 SNL ({S}|{NL})
340 EL (\.\.\.)
341 BS (\\)
342 CONT ({EL}|{BS})
343 Im [iIjJ]
344 CCHAR [#%]
345 COMMENT ({CCHAR}.*{NL})
346 SNLCMT ({SNL}|{COMMENT})
347 NOT ((\~)|(\!))
348 POW ((\*\*)|(\^))
349 EPOW (\.{POW})
350 IDENT ([_$a-zA-Z][_$a-zA-Z0-9]*)
351 EXPON ([DdEe][+-]?{D}+)
352 NUMBER (({D}+\.?{D}*{EXPON}?)|(\.{D}+{EXPON}?)|(0[xX][0-9a-fA-F]+))
353 %%
354
355 %{
356 // Make script and function files start with a bogus token. This makes
357 // the parser go down a special path.
358 %}
359
360 <SCRIPT_FILE_BEGIN>. {
361 LEXER_DEBUG ("<SCRIPT_FILE_BEGIN>.");
362
363 BEGIN (INITIAL);
364 xunput (yytext[0], yytext);
365 COUNT_TOK_AND_RETURN (SCRIPT_FILE);
366 }
367
368 <FUNCTION_FILE_BEGIN>. {
369 LEXER_DEBUG ("<FUNCTION_FILE_BEGIN>.");
370
371 BEGIN (INITIAL);
372 xunput (yytext[0], yytext);
373 COUNT_TOK_AND_RETURN (FUNCTION_FILE);
374 }
375
376 %{
377 // Help and other command-style functions.
378 %}
379
380 <COMMAND_START>{NL} {
381 LEXER_DEBUG ("<COMMAND_START>{NL}");
382
383 BEGIN (INITIAL);
384 input_line_number++;
385 current_input_column = 1;
386
387 lexer_flags.quote_is_transpose = false;
388 lexer_flags.convert_spaces_to_comma = true;
389 lexer_flags.looking_for_object_index = false;
390 lexer_flags.at_beginning_of_statement = true;
391
392 COUNT_TOK_AND_RETURN ('\n');
393 }
394
395 <COMMAND_START>[\;\,] {
396 LEXER_DEBUG ("<COMMAND_START>[\\;\\,]");
397
398 lexer_flags.looking_for_object_index = false;
399 lexer_flags.at_beginning_of_statement = true;
400
401 BEGIN (INITIAL);
402
403 if (strcmp (yytext, ",") == 0)
404 TOK_RETURN (',');
405 else
406 TOK_RETURN (';');
407 }
408
409 <COMMAND_START>[\"\'] {
410 LEXER_DEBUG ("<COMMAND_START>[\\\"\\']");
411
412 lexer_flags.at_beginning_of_statement = false;
413
414 current_input_column++;
415 int tok = handle_string (yytext[0]);
416
417 COUNT_TOK_AND_RETURN (tok);
418 }
419
420 <COMMAND_START>[^#% \t\r\n\;\,\"\'][^ \t\r\n\;\,]*{S}* {
421 LEXER_DEBUG ("<COMMAND_START>[^#% \\t\\r\\n\\;\\,\\\"\\'][^ \\t\\r\\n\\;\\,]*{S}*");
422
423 std::string tok = strip_trailing_whitespace (yytext);
424
425 lexer_flags.looking_for_object_index = false;
426 lexer_flags.at_beginning_of_statement = false;
427
428 TOK_PUSH_AND_RETURN (tok, SQ_STRING);
429 }
430
431 %{
432 // For this and the next two rules, we're looking at ']', and we
433 // need to know if the next token is `=' or `=='.
434 //
435 // It would have been so much easier if the delimiters were simply
436 // different for the expression on the left hand side of the equals
437 // operator.
438 //
439 // It's also a pain in the ass to decide whether to insert a comma
440 // after seeing a ']' character...
441
442 // FIXME -- we need to handle block comments here.
443 %}
444
445 <MATRIX_START>{SNLCMT}*\]{S}* {
446 LEXER_DEBUG ("<MATRIX_START>{SNLCMT}*\\]{S}*");
447
448 scan_for_comments (yytext);
449 fixup_column_count (yytext);
450
451 lexer_flags.looking_at_object_index.pop_front ();
452
453 lexer_flags.looking_for_object_index = true;
454 lexer_flags.at_beginning_of_statement = false;
455
456 int c = yytext[yyleng-1];
457 int cont_is_spc = eat_continuation ();
458 bool spc_gobbled = (cont_is_spc || c == ' ' || c == '\t');
459 int tok_to_return = handle_close_bracket (spc_gobbled, ']');
460
461 if (spc_gobbled)
462 xunput (' ', yytext);
463
464 COUNT_TOK_AND_RETURN (tok_to_return);
465 }
466
467 %{
468 // FIXME -- we need to handle block comments here.
469 %}
470
471 <MATRIX_START>{SNLCMT}*\}{S}* {
472 LEXER_DEBUG ("<MATRIX_START>{SNLCMT}*\\}{S}*");
473
474 scan_for_comments (yytext);
475 fixup_column_count (yytext);
476
477 lexer_flags.looking_at_object_index.pop_front ();
478
479 lexer_flags.looking_for_object_index = true;
480 lexer_flags.at_beginning_of_statement = false;
481
482 int c = yytext[yyleng-1];
483 int cont_is_spc = eat_continuation ();
484 bool spc_gobbled = (cont_is_spc || c == ' ' || c == '\t');
485 int tok_to_return = handle_close_bracket (spc_gobbled, '}');
486
487 if (spc_gobbled)
488 xunput (' ', yytext);
489
490 COUNT_TOK_AND_RETURN (tok_to_return);
491 }
492
493 %{
494 // Commas are element separators in matrix constants. If we don't
495 // check for continuations here we can end up inserting too many
496 // commas.
497 %}
498
499 <MATRIX_START>{S}*\,{S}* {
500 LEXER_DEBUG ("<MATRIX_START>{S}*\\,{S}*");
501
502 current_input_column += yyleng;
503
504 int tmp = eat_continuation ();
505
506 lexer_flags.quote_is_transpose = false;
507 lexer_flags.convert_spaces_to_comma = true;
508 lexer_flags.looking_for_object_index = false;
509 lexer_flags.at_beginning_of_statement = false;
510
511 if (! lexer_flags.looking_at_object_index.front ())
512 {
513 if ((tmp & ATE_NEWLINE) == ATE_NEWLINE)
514 {
515 maybe_warn_separator_insert (';');
516
517 xunput (';', yytext);
518 }
519 }
520
521 COUNT_TOK_AND_RETURN (',');
522 }
523
524 %{
525 // In some cases, spaces in matrix constants can turn into commas.
526 // If commas are required, spaces are not important in matrix
527 // constants so we just eat them. If we don't check for continuations
528 // here we can end up inserting too many commas.
529 %}
530
531 <MATRIX_START>{S}+ {
532 LEXER_DEBUG ("<MATRIX_START>{S}+");
533
534 current_input_column += yyleng;
535
536 lexer_flags.at_beginning_of_statement = false;
537
538 int tmp = eat_continuation ();
539
540 if (! lexer_flags.looking_at_object_index.front ())
541 {
542 bool bin_op = next_token_is_bin_op (true);
543 bool postfix_un_op = next_token_is_postfix_unary_op (true);
544 bool sep_op = next_token_is_sep_op ();
545
546 if (! (postfix_un_op || bin_op || sep_op)
547 && nesting_level.is_bracket_or_brace ()
548 && lexer_flags.convert_spaces_to_comma)
549 {
550 if ((tmp & ATE_NEWLINE) == ATE_NEWLINE)
551 {
552 maybe_warn_separator_insert (';');
553
554 xunput (';', yytext);
555 }
556
557 lexer_flags.quote_is_transpose = false;
558 lexer_flags.convert_spaces_to_comma = true;
559 lexer_flags.looking_for_object_index = false;
560
561 maybe_warn_separator_insert (',');
562
563 COUNT_TOK_AND_RETURN (',');
564 }
565 }
566 }
567
568 %{
569 // Semicolons are handled as row seprators in matrix constants. If we
570 // don't eat whitespace here we can end up inserting too many
571 // semicolons.
572
573 // FIXME -- we need to handle block comments here.
574 %}
575
576 <MATRIX_START>{SNLCMT}*;{SNLCMT}* {
577 LEXER_DEBUG ("<MATRIX_START>{SNLCMT}*;{SNLCMT}*");
578
579 scan_for_comments (yytext);
580 fixup_column_count (yytext);
581 eat_whitespace ();
582
583 lexer_flags.quote_is_transpose = false;
584 lexer_flags.convert_spaces_to_comma = true;
585 lexer_flags.looking_for_object_index = false;
586 lexer_flags.at_beginning_of_statement = false;
587
588 COUNT_TOK_AND_RETURN (';');
589 }
590
591 %{
592 // In some cases, new lines can also become row separators. If we
593 // don't eat whitespace here we can end up inserting too many
594 // semicolons.
595
596 // FIXME -- we need to handle block comments here.
597 %}
598
599 <MATRIX_START>{S}*{COMMENT}{SNLCMT}* |
600 <MATRIX_START>{S}*{NL}{SNLCMT}* {
601 LEXER_DEBUG ("<MATRIX_START>{S}*{COMMENT}{SNLCMT}*|<MATRIX_START>{S}*{NL}{SNLCMT}*");
602
603 scan_for_comments (yytext);
604 fixup_column_count (yytext);
605 eat_whitespace ();
606
607 lexer_flags.quote_is_transpose = false;
608 lexer_flags.convert_spaces_to_comma = true;
609 lexer_flags.at_beginning_of_statement = false;
610
611 if (nesting_level.none ())
612 return LEXICAL_ERROR;
613
614 if (! lexer_flags.looking_at_object_index.front ()
615 && nesting_level.is_bracket_or_brace ())
616 {
617 maybe_warn_separator_insert (';');
618
619 COUNT_TOK_AND_RETURN (';');
620 }
621 }
622
623 \[{S}* {
624 LEXER_DEBUG ("\\[{S}*");
625
626 nesting_level.bracket ();
627
628 lexer_flags.looking_at_object_index.push_front (false);
629
630 current_input_column += yyleng;
631 lexer_flags.quote_is_transpose = false;
632 lexer_flags.convert_spaces_to_comma = true;
633 lexer_flags.looking_for_object_index = false;
634 lexer_flags.at_beginning_of_statement = false;
635
636 if (lexer_flags.defining_func
637 && ! lexer_flags.parsed_function_name.top ())
638 lexer_flags.looking_at_return_list = true;
639 else
640 lexer_flags.looking_at_matrix_or_assign_lhs = true;
641
642 promptflag--;
643 eat_whitespace ();
644
645 lexer_flags.bracketflag++;
646 BEGIN (MATRIX_START);
647 COUNT_TOK_AND_RETURN ('[');
648 }
649
650 \] {
651 LEXER_DEBUG ("\\]");
652
653 nesting_level.remove ();
654
655 lexer_flags.looking_at_object_index.pop_front ();
656
657 lexer_flags.looking_for_object_index = true;
658 lexer_flags.at_beginning_of_statement = false;
659
660 TOK_RETURN (']');
661 }
662
663 %{
664 // Imaginary numbers.
665 %}
666
667 {NUMBER}{Im} {
668 LEXER_DEBUG ("{NUMBER}{Im}");
669
670 handle_number ();
671 COUNT_TOK_AND_RETURN (IMAG_NUM);
672 }
673
674 %{
675 // Real numbers. Don't grab the `.' part of a dot operator as part of
676 // the constant.
677 %}
678
679 {D}+/\.[\*/\\^\'] |
680 {NUMBER} {
681 LEXER_DEBUG ("{D}+/\\.[\\*/\\^\\']|{NUMBER}");
682 handle_number ();
683 COUNT_TOK_AND_RETURN (NUM);
684 }
685
686 %{
687 // Eat whitespace. Whitespace inside matrix constants is handled by
688 // the <MATRIX_START> start state code above.
689 %}
690
691 {S}* {
692 current_input_column += yyleng;
693 }
694
695 %{
696 // Continuation lines. Allow comments after continuations.
697 %}
698
699 {CONT}{S}*{NL} |
700 {CONT}{S}*{COMMENT} {
701 LEXER_DEBUG ("{CONT}{S}*{NL}|{CONT}{S}*{COMMENT}");
702
703 if (yytext[0] == '\\')
704 gripe_matlab_incompatible_continuation ();
705 scan_for_comments (yytext);
706 promptflag--;
707 input_line_number++;
708 current_input_column = 1;
709 }
710
711 %{
712 // End of file.
713 %}
714
715 <<EOF>> {
716 LEXER_DEBUG ("<<EOF>>");
717
718 if (block_comment_nesting_level != 0)
719 {
720 warning ("block comment open at end of input");
721
722 if ((reading_fcn_file || reading_script_file || reading_classdef_file)
723 && ! curr_fcn_file_name.empty ())
724 warning ("near line %d of file `%s.m'",
725 input_line_number, curr_fcn_file_name.c_str ());
726 }
727
728 TOK_RETURN (END_OF_INPUT);
729 }
730
731 %{
732 // Identifiers. Truncate the token at the first space or tab but
733 // don't write directly on yytext.
734 %}
735
736 {IDENT}{S}* {
737 LEXER_DEBUG ("{IDENT}{S}*");
738
739 int id_tok = handle_identifier ();
740
741 if (id_tok >= 0)
742 COUNT_TOK_AND_RETURN (id_tok);
743 }
744
745 %{
746 // Superclass method identifiers.
747 %}
748
749 {IDENT}@{IDENT}{S}* |
750 {IDENT}@{IDENT}.{IDENT}{S}* {
751 LEXER_DEBUG ("{IDENT}@{IDENT}{S}*|{IDENT}@{IDENT}.{IDENT}{S}*");
752
753 int id_tok = handle_superclass_identifier ();
754
755 if (id_tok >= 0)
756 {
757 lexer_flags.looking_for_object_index = true;
758
759 COUNT_TOK_AND_RETURN (SUPERCLASSREF);
760 }
761 }
762
763 %{
764 // Metaclass query
765 %}
766
767 \?{IDENT}{S}* |
768 \?{IDENT}\.{IDENT}{S}* {
769 LEXER_DEBUG ("\\?{IDENT}{S}*|\\?{IDENT}\\.{IDENT}{S}*");
770
771 int id_tok = handle_meta_identifier ();
772
773 if (id_tok >= 0)
774 {
775 lexer_flags.looking_for_object_index = true;
776
777 COUNT_TOK_AND_RETURN (METAQUERY);
778 }
779 }
780
781 %{
782 // Function handles and superclass references
783 %}
784
785 "@" {
786 LEXER_DEBUG ("@");
787
788 current_input_column++;
789
790 lexer_flags.quote_is_transpose = false;
791 lexer_flags.convert_spaces_to_comma = false;
792 lexer_flags.looking_at_function_handle++;
793 lexer_flags.looking_for_object_index = false;
794 lexer_flags.at_beginning_of_statement = false;
795
796 COUNT_TOK_AND_RETURN ('@');
797
798 }
799
800 %{
801 // A new line character. New line characters inside matrix constants
802 // are handled by the <MATRIX_START> start state code above. If closest
803 // nesting is inside parentheses, don't return a row separator.
804 %}
805
806 {NL} {
807 LEXER_DEBUG ("{NL}");
808
809 input_line_number++;
810 current_input_column = 1;
811
812 lexer_flags.quote_is_transpose = false;
813 lexer_flags.convert_spaces_to_comma = true;
814
815 if (nesting_level.none ())
816 {
817 lexer_flags.at_beginning_of_statement = true;
818 COUNT_TOK_AND_RETURN ('\n');
819 }
820 else if (nesting_level.is_paren ())
821 {
822 lexer_flags.at_beginning_of_statement = false;
823 gripe_matlab_incompatible ("bare newline inside parentheses");
824 }
825 else if (nesting_level.is_bracket_or_brace ())
826 return LEXICAL_ERROR;
827 }
828
829 %{
830 // Single quote can either be the beginning of a string or a transpose
831 // operator.
832 %}
833
834 "'" {
835 LEXER_DEBUG ("'");
836
837 current_input_column++;
838 lexer_flags.convert_spaces_to_comma = true;
839
840 if (lexer_flags.quote_is_transpose)
841 {
842 do_comma_insert_check ();
843 COUNT_TOK_AND_RETURN (QUOTE);
844 }
845 else
846 {
847 int tok = handle_string ('\'');
848 COUNT_TOK_AND_RETURN (tok);
849 }
850 }
851
852 %{
853 // Double quotes always begin strings.
854 %}
855
856 \" {
857 LEXER_DEBUG ("\"");
858
859 current_input_column++;
860 int tok = handle_string ('"');
861
862 COUNT_TOK_AND_RETURN (tok);
863 }
864
865 %{
866 // Gobble comments.
867 %}
868
869 {CCHAR} {
870 LEXER_DEBUG ("{CCHAR}");
871
872 lexer_flags.looking_for_object_index = false;
873
874 xunput (yytext[0], yytext);
875
876 bool eof = false;
877 int tok = process_comment (false, eof);
878
879 if (eof)
880 TOK_RETURN (END_OF_INPUT);
881 else if (tok > 0)
882 COUNT_TOK_AND_RETURN (tok);
883 }
884
885 %{
886 // Block comments.
887 %}
888
889 ^{S}*{CCHAR}\{{S}*{NL} {
890 LEXER_DEBUG ("^{S}*{CCHAR}\\{{S}*{NL}");
891
892 lexer_flags.looking_for_object_index = false;
893
894 input_line_number++;
895 current_input_column = 1;
896 block_comment_nesting_level++;
897 promptflag--;
898
899 bool eof = false;
900 process_comment (true, eof);
901 }
902
903 %{
904 // Other operators.
905 %}
906
907 ":" { LEXER_DEBUG (":"); BIN_OP_RETURN (':', false, false); }
908
909 ".+" { LEXER_DEBUG (".+"); XBIN_OP_RETURN (EPLUS, false, false); }
910 ".-" { LEXER_DEBUG (".-"); XBIN_OP_RETURN (EMINUS, false, false); }
911 ".*" { LEXER_DEBUG (".*"); BIN_OP_RETURN (EMUL, false, false); }
912 "./" { LEXER_DEBUG ("./"); BIN_OP_RETURN (EDIV, false, false); }
913 ".\\" { LEXER_DEBUG (".\\"); BIN_OP_RETURN (ELEFTDIV, false, false); }
914 ".^" { LEXER_DEBUG (".^"); BIN_OP_RETURN (EPOW, false, false); }
915 ".**" { LEXER_DEBUG (".**"); XBIN_OP_RETURN (EPOW, false, false); }
916 ".'" { LEXER_DEBUG (".'"); do_comma_insert_check (); BIN_OP_RETURN (TRANSPOSE, true, false); }
917 "++" { LEXER_DEBUG ("++"); do_comma_insert_check (); XBIN_OP_RETURN_INTERNAL (PLUS_PLUS, true, false, true); }
918 "--" { LEXER_DEBUG ("--"); do_comma_insert_check (); XBIN_OP_RETURN_INTERNAL (MINUS_MINUS, true, false, true); }
919 "<=" { LEXER_DEBUG ("<="); BIN_OP_RETURN (EXPR_LE, false, false); }
920 "==" { LEXER_DEBUG ("=="); BIN_OP_RETURN (EXPR_EQ, false, false); }
921 "~=" { LEXER_DEBUG ("~="); BIN_OP_RETURN (EXPR_NE, false, false); }
922 "!=" { LEXER_DEBUG ("!="); XBIN_OP_RETURN (EXPR_NE, false, false); }
923 ">=" { LEXER_DEBUG (">="); BIN_OP_RETURN (EXPR_GE, false, false); }
924 "&" { LEXER_DEBUG ("&"); BIN_OP_RETURN (EXPR_AND, false, false); }
925 "|" { LEXER_DEBUG ("|"); BIN_OP_RETURN (EXPR_OR, false, false); }
926 "<" { LEXER_DEBUG ("<"); BIN_OP_RETURN (EXPR_LT, false, false); }
927 ">" { LEXER_DEBUG (">"); BIN_OP_RETURN (EXPR_GT, false, false); }
928 "+" { LEXER_DEBUG ("+"); BIN_OP_RETURN ('+', false, false); }
929 "-" { LEXER_DEBUG ("-"); BIN_OP_RETURN ('-', false, false); }
930 "*" { LEXER_DEBUG ("*"); BIN_OP_RETURN ('*', false, false); }
931 "/" { LEXER_DEBUG ("/"); BIN_OP_RETURN ('/', false, false); }
932 "\\" { LEXER_DEBUG ("\\"); BIN_OP_RETURN (LEFTDIV, false, false); }
933 ";" { LEXER_DEBUG (";"); BIN_OP_RETURN (';', true, true); }
934 "," { LEXER_DEBUG (","); BIN_OP_RETURN (',', true, ! lexer_flags.looking_at_object_index.front ()); }
935 "^" { LEXER_DEBUG ("^"); BIN_OP_RETURN (POW, false, false); }
936 "**" { LEXER_DEBUG ("**"); XBIN_OP_RETURN (POW, false, false); }
937 "=" { LEXER_DEBUG ("="); BIN_OP_RETURN ('=', true, false); }
938 "&&" { LEXER_DEBUG ("&&"); BIN_OP_RETURN (EXPR_AND_AND, false, false); }
939 "||" { LEXER_DEBUG ("||"); BIN_OP_RETURN (EXPR_OR_OR, false, false); }
940 "<<" { LEXER_DEBUG ("<<"); XBIN_OP_RETURN (LSHIFT, false, false); }
941 ">>" { LEXER_DEBUG (">>"); XBIN_OP_RETURN (RSHIFT, false, false); }
942
943 {NOT} {
944 LEXER_DEBUG ("{NOT}");
945
946 if (yytext[0] == '~')
947 BIN_OP_RETURN (EXPR_NOT, false, false);
948 else
949 XBIN_OP_RETURN (EXPR_NOT, false, false);
950 }
951
952 "(" {
953 LEXER_DEBUG ("(");
954
955 // If we are looking for an object index, then push TRUE for
956 // looking_at_object_index. Otherwise, just push whatever state
957 // is current (so that we can pop it off the stack when we find
958 // the matching close paren).
959
960 lexer_flags.looking_at_object_index.push_front
961 (lexer_flags.looking_for_object_index);
962
963 lexer_flags.looking_at_indirect_ref = false;
964 lexer_flags.looking_for_object_index = false;
965 lexer_flags.at_beginning_of_statement = false;
966
967 nesting_level.paren ();
968 promptflag--;
969
970 TOK_RETURN ('(');
971 }
972
973 ")" {
974 LEXER_DEBUG (")");
975
976 nesting_level.remove ();
977 current_input_column++;
978
979 lexer_flags.looking_at_object_index.pop_front ();
980
981 lexer_flags.quote_is_transpose = true;
982 lexer_flags.convert_spaces_to_comma
983 = (nesting_level.is_bracket_or_brace ()
984 && ! lexer_flags.looking_at_anon_fcn_args);
985 lexer_flags.looking_for_object_index = true;
986 lexer_flags.at_beginning_of_statement = false;
987
988 if (lexer_flags.looking_at_anon_fcn_args)
989 lexer_flags.looking_at_anon_fcn_args = false;
990
991 do_comma_insert_check ();
992
993 COUNT_TOK_AND_RETURN (')');
994 }
995
996 "." {
997 LEXER_DEBUG (".");
998
999 lexer_flags.looking_for_object_index = false;
1000 lexer_flags.at_beginning_of_statement = false;
1001
1002 TOK_RETURN ('.');
1003 }
1004
1005 "+=" { LEXER_DEBUG ("+="); XBIN_OP_RETURN (ADD_EQ, false, false); }
1006 "-=" { LEXER_DEBUG ("-="); XBIN_OP_RETURN (SUB_EQ, false, false); }
1007 "*=" { LEXER_DEBUG ("*="); XBIN_OP_RETURN (MUL_EQ, false, false); }
1008 "/=" { LEXER_DEBUG ("/="); XBIN_OP_RETURN (DIV_EQ, false, false); }
1009 "\\=" { LEXER_DEBUG ("\\="); XBIN_OP_RETURN (LEFTDIV_EQ, false, false); }
1010 ".+=" { LEXER_DEBUG (".+="); XBIN_OP_RETURN (ADD_EQ, false, false); }
1011 ".-=" { LEXER_DEBUG (".-="); XBIN_OP_RETURN (SUB_EQ, false, false); }
1012 ".*=" { LEXER_DEBUG (".*="); XBIN_OP_RETURN (EMUL_EQ, false, false); }
1013 "./=" { LEXER_DEBUG ("./="); XBIN_OP_RETURN (EDIV_EQ, false, false); }
1014 ".\\=" { LEXER_DEBUG (".\\="); XBIN_OP_RETURN (ELEFTDIV_EQ, false, false); }
1015 {POW}= { LEXER_DEBUG ("{POW}="); XBIN_OP_RETURN (POW_EQ, false, false); }
1016 {EPOW}= { LEXER_DEBUG ("{EPOW}="); XBIN_OP_RETURN (EPOW_EQ, false, false); }
1017 "&=" { LEXER_DEBUG ("&="); XBIN_OP_RETURN (AND_EQ, false, false); }
1018 "|=" { LEXER_DEBUG ("|="); XBIN_OP_RETURN (OR_EQ, false, false); }
1019 "<<=" { LEXER_DEBUG ("<<="); XBIN_OP_RETURN (LSHIFT_EQ, false, false); }
1020 ">>=" { LEXER_DEBUG (">>="); XBIN_OP_RETURN (RSHIFT_EQ, false, false); }
1021
1022 \{{S}* {
1023 LEXER_DEBUG ("\\{{S}*");
1024
1025 nesting_level.brace ();
1026
1027 lexer_flags.looking_at_object_index.push_front
1028 (lexer_flags.looking_for_object_index);
1029
1030 current_input_column += yyleng;
1031 lexer_flags.quote_is_transpose = false;
1032 lexer_flags.convert_spaces_to_comma = true;
1033 lexer_flags.looking_for_object_index = false;
1034 lexer_flags.at_beginning_of_statement = false;
1035
1036 promptflag--;
1037 eat_whitespace ();
1038
1039 lexer_flags.braceflag++;
1040 BEGIN (MATRIX_START);
1041 COUNT_TOK_AND_RETURN ('{');
1042 }
1043
1044 "}" {
1045 LEXER_DEBUG ("}");
1046
1047 lexer_flags.looking_at_object_index.pop_front ();
1048
1049 lexer_flags.looking_for_object_index = true;
1050 lexer_flags.at_beginning_of_statement = false;
1051
1052 nesting_level.remove ();
1053
1054 TOK_RETURN ('}');
1055 }
1056
1057 %{
1058 // Unrecognized input is a lexical error.
1059 %}
1060
1061 . {
1062 LEXER_DEBUG (".");
1063
1064 xunput (yytext[0], yytext);
1065
1066 int c = text_yyinput ();
1067
1068 if (c != EOF)
1069 {
1070 current_input_column++;
1071
1072 error ("invalid character `%s' (ASCII %d) near line %d, column %d",
1073 undo_string_escape (static_cast<char> (c)), c,
1074 input_line_number, current_input_column);
1075
1076 return LEXICAL_ERROR;
1077 }
1078 else
1079 TOK_RETURN (END_OF_INPUT);
1080 }
1081
1082 %%
1083
1084 // GAG.
1085 //
1086 // If we're reading a matrix and the next character is '[', make sure
1087 // that we insert a comma ahead of it.
1088
1089 void
1090 do_comma_insert_check (void)
1091 {
1092 int spc_gobbled = eat_continuation ();
1093
1094 int c = text_yyinput ();
1095
1096 xunput (c, yytext);
1097
1098 if (spc_gobbled)
1099 xunput (' ', yytext);
1100
1101 lexer_flags.do_comma_insert = (! lexer_flags.looking_at_object_index.front ()
1102 && lexer_flags.bracketflag && c == '[');
1103 }
1104
1105 // Fix things up for errors or interrupts. The parser is never called
1106 // recursively, so it is always safe to reinitialize its state before
1107 // doing any parsing.
1108
1109 void
1110 reset_parser (void)
1111 {
1112 // Start off on the right foot.
1113 BEGIN (INITIAL);
1114
1115 parser_end_of_input = false;
1116
1117 parser_symtab_context.clear ();
1118
1119 // We do want a prompt by default.
1120 promptflag = 1;
1121
1122 // We are not in a block comment.
1123 block_comment_nesting_level = 0;
1124
1125 // Error may have occurred inside some brackets, braces, or parentheses.
1126 nesting_level.clear ();
1127
1128 // Clear out the stack of token info used to track line and column
1129 // numbers.
1130 while (! token_stack.empty ())
1131 {
1132 delete token_stack.top ();
1133 token_stack.pop ();
1134 }
1135
1136 // Can be reset by defining a function.
1137 if (! (reading_script_file || reading_fcn_file || reading_classdef_file))
1138 {
1139 current_input_column = 1;
1140 input_line_number = command_editor::current_command_number ();
1141 }
1142
1143 // Only ask for input from stdin if we are expecting interactive
1144 // input.
1145
1146 if (! quitting_gracefully
1147 && (interactive || forced_interactive)
1148 && ! (reading_fcn_file
1149 || reading_classdef_file
1150 || reading_script_file
1151 || get_input_from_eval_string
1152 || input_from_startup_file))
1153 yyrestart (stdin);
1154
1155 // Clear the buffer for help text.
1156 while (! help_buf.empty ())
1157 help_buf.pop ();
1158
1159 // Reset other flags.
1160 lexer_flags.init ();
1161 }
1162
1163 static void
1164 display_character (char c)
1165 {
1166 if (isgraph (c))
1167 std::cerr << c;
1168 else
1169 switch (c)
1170 {
1171 case 0:
1172 std::cerr << "NUL";
1173 break;
1174
1175 case 1:
1176 std::cerr << "SOH";
1177 break;
1178
1179 case 2:
1180 std::cerr << "STX";
1181 break;
1182
1183 case 3:
1184 std::cerr << "ETX";
1185 break;
1186
1187 case 4:
1188 std::cerr << "EOT";
1189 break;
1190
1191 case 5:
1192 std::cerr << "ENQ";
1193 break;
1194
1195 case 6:
1196 std::cerr << "ACK";
1197 break;
1198
1199 case 7:
1200 std::cerr << "\\a";
1201 break;
1202
1203 case 8:
1204 std::cerr << "\\b";
1205 break;
1206
1207 case 9:
1208 std::cerr << "\\t";
1209 break;
1210
1211 case 10:
1212 std::cerr << "\\n";
1213 break;
1214
1215 case 11:
1216 std::cerr << "\\v";
1217 break;
1218
1219 case 12:
1220 std::cerr << "\\f";
1221 break;
1222
1223 case 13:
1224 std::cerr << "\\r";
1225 break;
1226
1227 case 14:
1228 std::cerr << "SO";
1229 break;
1230
1231 case 15:
1232 std::cerr << "SI";
1233 break;
1234
1235 case 16:
1236 std::cerr << "DLE";
1237 break;
1238
1239 case 17:
1240 std::cerr << "DC1";
1241 break;
1242
1243 case 18:
1244 std::cerr << "DC2";
1245 break;
1246
1247 case 19:
1248 std::cerr << "DC3";
1249 break;
1250
1251 case 20:
1252 std::cerr << "DC4";
1253 break;
1254
1255 case 21:
1256 std::cerr << "NAK";
1257 break;
1258
1259 case 22:
1260 std::cerr << "SYN";
1261 break;
1262
1263 case 23:
1264 std::cerr << "ETB";
1265 break;
1266
1267 case 24:
1268 std::cerr << "CAN";
1269 break;
1270
1271 case 25:
1272 std::cerr << "EM";
1273 break;
1274
1275 case 26:
1276 std::cerr << "SUB";
1277 break;
1278
1279 case 27:
1280 std::cerr << "ESC";
1281 break;
1282
1283 case 28:
1284 std::cerr << "FS";
1285 break;
1286
1287 case 29:
1288 std::cerr << "GS";
1289 break;
1290
1291 case 30:
1292 std::cerr << "RS";
1293 break;
1294
1295 case 31:
1296 std::cerr << "US";
1297 break;
1298
1299 case 32:
1300 std::cerr << "SPACE";
1301 break;
1302
1303 case 127:
1304 std::cerr << "DEL";
1305 break;
1306 }
1307 }
1308
1309 static int
1310 text_yyinput (void)
1311 {
1312 int c = yyinput ();
1313
1314 if (lexer_debug_flag)
1315 {
1316 std::cerr << "I: ";
1317 display_character (c);
1318 std::cerr << std::endl;
1319 }
1320
1321 // Convert CRLF into just LF and single CR into LF.
1322
1323 if (c == '\r')
1324 {
1325 c = yyinput ();
1326
1327 if (lexer_debug_flag)
1328 {
1329 std::cerr << "I: ";
1330 display_character (c);
1331 std::cerr << std::endl;
1332 }
1333
1334 if (c != '\n')
1335 {
1336 xunput (c, yytext);
1337 c = '\n';
1338 }
1339 }
1340
1341 if (c == '\n')
1342 input_line_number++;
1343
1344 return c;
1345 }
1346
1347 static void
1348 xunput (char c, char *buf)
1349 {
1350 if (lexer_debug_flag)
1351 {
1352 std::cerr << "U: ";
1353 display_character (c);
1354 std::cerr << std::endl;
1355 }
1356
1357 if (c == '\n')
1358 input_line_number--;
1359
1360 yyunput (c, buf);
1361 }
1362
1363 // If we read some newlines, we need figure out what column we're
1364 // really looking at.
1365
1366 static void
1367 fixup_column_count (char *s)
1368 {
1369 char c;
1370 while ((c = *s++) != '\0')
1371 {
1372 if (c == '\n')
1373 {
1374 input_line_number++;
1375 current_input_column = 1;
1376 }
1377 else
1378 current_input_column++;
1379 }
1380 }
1381
1382 // Include these so that we don't have to link to libfl.a.
1383
1384 int
1385 yywrap (void)
1386 {
1387 return 1;
1388 }
1389
1390 // Tell us all what the current buffer is.
1391
1392 YY_BUFFER_STATE
1393 current_buffer (void)
1394 {
1395 return YY_CURRENT_BUFFER;
1396 }
1397
1398 // Create a new buffer.
1399
1400 YY_BUFFER_STATE
1401 create_buffer (FILE *f)
1402 {
1403 return yy_create_buffer (f, YY_BUF_SIZE);
1404 }
1405
1406 // Start reading a new buffer.
1407
1408 void
1409 switch_to_buffer (YY_BUFFER_STATE buf)
1410 {
1411 yy_switch_to_buffer (buf);
1412 }
1413
1414 // Delete a buffer.
1415
1416 void
1417 delete_buffer (YY_BUFFER_STATE buf)
1418 {
1419 yy_delete_buffer (buf);
1420
1421 // Prevent invalid yyin from being used by yyrestart.
1422 if (! current_buffer ())
1423 yyin = 0;
1424 }
1425
1426 // Delete all buffers from the stack.
1427 void
1428 clear_all_buffers (void)
1429 {
1430 while (current_buffer ())
1431 octave_pop_buffer_state ();
1432 }
1433
1434 void
1435 cleanup_parser (void)
1436 {
1437 reset_parser ();
1438
1439 clear_all_buffers ();
1440 }
1441
1442 // Restore a buffer (for unwind-prot).
1443
1444 void
1445 restore_input_buffer (void *buf)
1446 {
1447 switch_to_buffer (static_cast<YY_BUFFER_STATE> (buf));
1448 }
1449
1450 // Delete a buffer (for unwind-prot).
1451
1452 void
1453 delete_input_buffer (void *buf)
1454 {
1455 delete_buffer (static_cast<YY_BUFFER_STATE> (buf));
1456 }
1457
1458 static bool
1459 inside_any_object_index (void)
1460 {
1461 bool retval = false;
1462
1463 for (std::list<bool>::const_iterator i = lexer_flags.looking_at_object_index.begin ();
1464 i != lexer_flags.looking_at_object_index.end (); i++)
1465 {
1466 if (*i)
1467 {
1468 retval = true;
1469 break;
1470 }
1471 }
1472
1473 return retval;
1474 }
1475
1476 // Handle keywords. Return -1 if the keyword should be ignored.
1477
1478 static int
1479 is_keyword_token (const std::string& s)
1480 {
1481 int l = input_line_number;
1482 int c = current_input_column;
1483
1484 int len = s.length ();
1485
1486 const octave_kw *kw = octave_kw_hash::in_word_set (s.c_str (), len);
1487
1488 if (kw)
1489 {
1490 yylval.tok_val = 0;
1491
1492 switch (kw->kw_id)
1493 {
1494 case break_kw:
1495 case catch_kw:
1496 case continue_kw:
1497 case else_kw:
1498 case otherwise_kw:
1499 case return_kw:
1500 case unwind_protect_cleanup_kw:
1501 lexer_flags.at_beginning_of_statement = true;
1502 break;
1503
1504 case static_kw:
1505 if ((reading_fcn_file || reading_script_file
1506 || reading_classdef_file)
1507 && ! curr_fcn_file_full_name.empty ())
1508 warning_with_id ("Octave:deprecated-keyword",
1509 "the `static' keyword is obsolete and will be removed from a future version of Octave; please use `persistent' instead; near line %d of file `%s'",
1510 input_line_number,
1511 curr_fcn_file_full_name.c_str ());
1512 else
1513 warning_with_id ("Octave:deprecated-keyword",
1514 "the `static' keyword is obsolete and will be removed from a future version of Octave; please use `persistent' instead; near line %d",
1515 input_line_number);
1516 // fall through ...
1517
1518 case persistent_kw:
1519 break;
1520
1521 case case_kw:
1522 case elseif_kw:
1523 case global_kw:
1524 case until_kw:
1525 break;
1526
1527 case end_kw:
1528 if (inside_any_object_index ()
1529 || (! reading_classdef_file
1530 && (lexer_flags.defining_func
1531 && ! (lexer_flags.looking_at_return_list
1532 || lexer_flags.parsed_function_name.top ()))))
1533 return 0;
1534
1535 yylval.tok_val = new token (token::simple_end, l, c);
1536 lexer_flags.at_beginning_of_statement = true;
1537 break;
1538
1539 case end_try_catch_kw:
1540 yylval.tok_val = new token (token::try_catch_end, l, c);
1541 lexer_flags.at_beginning_of_statement = true;
1542 break;
1543
1544 case end_unwind_protect_kw:
1545 yylval.tok_val = new token (token::unwind_protect_end, l, c);
1546 lexer_flags.at_beginning_of_statement = true;
1547 break;
1548
1549 case endfor_kw:
1550 yylval.tok_val = new token (token::for_end, l, c);
1551 lexer_flags.at_beginning_of_statement = true;
1552 break;
1553
1554 case endfunction_kw:
1555 yylval.tok_val = new token (token::function_end, l, c);
1556 lexer_flags.at_beginning_of_statement = true;
1557 break;
1558
1559 case endif_kw:
1560 yylval.tok_val = new token (token::if_end, l, c);
1561 lexer_flags.at_beginning_of_statement = true;
1562 break;
1563
1564 case endparfor_kw:
1565 yylval.tok_val = new token (token::parfor_end, l, c);
1566 lexer_flags.at_beginning_of_statement = true;
1567 break;
1568
1569 case endswitch_kw:
1570 yylval.tok_val = new token (token::switch_end, l, c);
1571 lexer_flags.at_beginning_of_statement = true;
1572 break;
1573
1574 case endwhile_kw:
1575 yylval.tok_val = new token (token::while_end, l, c);
1576 lexer_flags.at_beginning_of_statement = true;
1577 break;
1578
1579 case endclassdef_kw:
1580 yylval.tok_val = new token (token::classdef_end, l, c);
1581 lexer_flags.at_beginning_of_statement = true;
1582 break;
1583
1584 case endenumeration_kw:
1585 yylval.tok_val = new token (token::enumeration_end, l, c);
1586 lexer_flags.at_beginning_of_statement = true;
1587 break;
1588
1589 case endevents_kw:
1590 yylval.tok_val = new token (token::events_end, l, c);
1591 lexer_flags.at_beginning_of_statement = true;
1592 break;
1593
1594 case endmethods_kw:
1595 yylval.tok_val = new token (token::methods_end, l, c);
1596 lexer_flags.at_beginning_of_statement = true;
1597 break;
1598
1599 case endproperties_kw:
1600 yylval.tok_val = new token (token::properties_end, l, c);
1601 lexer_flags.at_beginning_of_statement = true;
1602 break;
1603
1604
1605 case for_kw:
1606 case parfor_kw:
1607 case while_kw:
1608 promptflag--;
1609 lexer_flags.looping++;
1610 break;
1611
1612 case do_kw:
1613 lexer_flags.at_beginning_of_statement = true;
1614 promptflag--;
1615 lexer_flags.looping++;
1616 break;
1617
1618 case try_kw:
1619 case unwind_protect_kw:
1620 lexer_flags.at_beginning_of_statement = true;
1621 promptflag--;
1622 break;
1623
1624 case if_kw:
1625 case switch_kw:
1626 promptflag--;
1627 break;
1628
1629 case get_kw:
1630 case set_kw:
1631 // 'get' and 'set' are keywords in classdef method
1632 // declarations.
1633 if (! lexer_flags.maybe_classdef_get_set_method)
1634 return 0;
1635 break;
1636
1637 case enumeration_kw:
1638 case events_kw:
1639 case methods_kw:
1640 case properties_kw:
1641 // 'properties', 'methods' and 'events' are keywords for
1642 // classdef blocks.
1643 if (! lexer_flags.parsing_classdef)
1644 return 0;
1645 // fall through ...
1646
1647 case classdef_kw:
1648 // 'classdef' is always a keyword.
1649 promptflag--;
1650 break;
1651
1652 case function_kw:
1653 promptflag--;
1654
1655 lexer_flags.defining_func++;
1656 lexer_flags.parsed_function_name.push (false);
1657
1658 if (! (reading_fcn_file || reading_script_file
1659 || reading_classdef_file))
1660 input_line_number = 1;
1661 break;
1662
1663 case magic_file_kw:
1664 {
1665 if ((reading_fcn_file || reading_script_file
1666 || reading_classdef_file)
1667 && ! curr_fcn_file_full_name.empty ())
1668 yylval.tok_val = new token (curr_fcn_file_full_name, l, c);
1669 else
1670 yylval.tok_val = new token ("stdin", l, c);
1671 }
1672 break;
1673
1674 case magic_line_kw:
1675 yylval.tok_val = new token (static_cast<double> (l), "", l, c);
1676 break;
1677
1678 default:
1679 panic_impossible ();
1680 }
1681
1682 if (! yylval.tok_val)
1683 yylval.tok_val = new token (l, c);
1684
1685 token_stack.push (yylval.tok_val);
1686
1687 return kw->tok;
1688 }
1689
1690 return 0;
1691 }
1692
1693 static bool
1694 is_variable (const std::string& name)
1695 {
1696 return (symbol_table::is_variable (name)
1697 || (lexer_flags.pending_local_variables.find (name)
1698 != lexer_flags.pending_local_variables.end ()));
1699 }
1700
1701 static std::string
1702 grab_block_comment (stream_reader& reader, bool& eof)
1703 {
1704 std::string buf;
1705
1706 bool at_bol = true;
1707 bool look_for_marker = false;
1708
1709 bool warned_incompatible = false;
1710
1711 int c = 0;
1712
1713 while ((c = reader.getc ()) != EOF)
1714 {
1715 current_input_column++;
1716
1717 if (look_for_marker)
1718 {
1719 at_bol = false;
1720 look_for_marker = false;
1721
1722 if (c == '{' || c == '}')
1723 {
1724 std::string tmp_buf (1, static_cast<char> (c));
1725
1726 int type = c;
1727
1728 bool done = false;
1729
1730 while ((c = reader.getc ()) != EOF && ! done)
1731 {
1732 current_input_column++;
1733
1734 switch (c)
1735 {
1736 case ' ':
1737 case '\t':
1738 tmp_buf += static_cast<char> (c);
1739 break;
1740
1741 case '\n':
1742 {
1743 current_input_column = 0;
1744 at_bol = true;
1745 done = true;
1746
1747 if (type == '{')
1748 {
1749 block_comment_nesting_level++;
1750 promptflag--;
1751 }
1752 else
1753 {
1754 block_comment_nesting_level--;
1755 promptflag++;
1756
1757 if (block_comment_nesting_level == 0)
1758 {
1759 buf += grab_comment_block (reader, true, eof);
1760
1761 return buf;
1762 }
1763 }
1764 }
1765 break;
1766
1767 default:
1768 at_bol = false;
1769 tmp_buf += static_cast<char> (c);
1770 buf += tmp_buf;
1771 done = true;
1772 break;
1773 }
1774 }
1775 }
1776 }
1777
1778 if (at_bol && (c == '%' || c == '#'))
1779 {
1780 if (c == '#' && ! warned_incompatible)
1781 {
1782 warned_incompatible = true;
1783 maybe_gripe_matlab_incompatible_comment (c);
1784 }
1785
1786 at_bol = false;
1787 look_for_marker = true;
1788 }
1789 else
1790 {
1791 buf += static_cast<char> (c);
1792
1793 if (c == '\n')
1794 {
1795 current_input_column = 0;
1796 at_bol = true;
1797 }
1798 }
1799 }
1800
1801 if (c == EOF)
1802 eof = true;
1803
1804 return buf;
1805 }
1806
1807 std::string
1808 grab_comment_block (stream_reader& reader, bool at_bol,
1809 bool& eof)
1810 {
1811 std::string buf;
1812
1813 // TRUE means we are at the beginning of a comment block.
1814 bool begin_comment = false;
1815
1816 // TRUE means we are currently reading a comment block.
1817 bool in_comment = false;
1818
1819 bool warned_incompatible = false;
1820
1821 int c = 0;
1822
1823 while ((c = reader.getc ()) != EOF)
1824 {
1825 current_input_column++;
1826
1827 if (begin_comment)
1828 {
1829 if (c == '%' || c == '#')
1830 {
1831 at_bol = false;
1832 continue;
1833 }
1834 else if (at_bol && c == '{')
1835 {
1836 std::string tmp_buf (1, static_cast<char> (c));
1837
1838 bool done = false;
1839
1840 while ((c = reader.getc ()) != EOF && ! done)
1841 {
1842 current_input_column++;
1843
1844 switch (c)
1845 {
1846 case ' ':
1847 case '\t':
1848 tmp_buf += static_cast<char> (c);
1849 break;
1850
1851 case '\n':
1852 {
1853 current_input_column = 0;
1854 at_bol = true;
1855 done = true;
1856
1857 block_comment_nesting_level++;
1858 promptflag--;
1859
1860 buf += grab_block_comment (reader, eof);
1861
1862 in_comment = false;
1863
1864 if (eof)
1865 goto done;
1866 }
1867 break;
1868
1869 default:
1870 at_bol = false;
1871 tmp_buf += static_cast<char> (c);
1872 buf += tmp_buf;
1873 done = true;
1874 break;
1875 }
1876 }
1877 }
1878 else
1879 {
1880 at_bol = false;
1881 begin_comment = false;
1882 }
1883 }
1884
1885 if (in_comment)
1886 {
1887 buf += static_cast<char> (c);
1888
1889 if (c == '\n')
1890 {
1891 at_bol = true;
1892 current_input_column = 0;
1893 in_comment = false;
1894
1895 // FIXME -- bailing out here prevents things like
1896 //
1897 // octave> # comment
1898 // octave> x = 1
1899 //
1900 // from failing at the command line, while still
1901 // allowing blocks of comments to be grabbed properly
1902 // for function doc strings. But only the first line of
1903 // a mult-line doc string will be picked up for
1904 // functions defined on the command line. We need a
1905 // better way of collecting these comments...
1906 if (! (reading_fcn_file || reading_script_file))
1907 goto done;
1908 }
1909 }
1910 else
1911 {
1912 switch (c)
1913 {
1914 case ' ':
1915 case '\t':
1916 break;
1917
1918 case '#':
1919 if (! warned_incompatible)
1920 {
1921 warned_incompatible = true;
1922 maybe_gripe_matlab_incompatible_comment (c);
1923 }
1924 // fall through...
1925
1926 case '%':
1927 in_comment = true;
1928 begin_comment = true;
1929 break;
1930
1931 default:
1932 current_input_column--;
1933 reader.ungetc (c);
1934 goto done;
1935 }
1936 }
1937 }
1938
1939 done:
1940
1941 if (c == EOF)
1942 eof = true;
1943
1944 return buf;
1945 }
1946
1947 class
1948 flex_stream_reader : public stream_reader
1949 {
1950 public:
1951 flex_stream_reader (char *buf_arg) : stream_reader (), buf (buf_arg) { }
1952
1953 int getc (void) { return ::text_yyinput (); }
1954 int ungetc (int c) { ::xunput (c, buf); return 0; }
1955
1956 private:
1957
1958 // No copying!
1959
1960 flex_stream_reader (const flex_stream_reader&);
1961
1962 flex_stream_reader& operator = (const flex_stream_reader&);
1963
1964 char *buf;
1965 };
1966
1967 static int
1968 process_comment (bool start_in_block, bool& eof)
1969 {
1970 eof = false;
1971
1972 std::string help_txt;
1973
1974 if (! help_buf.empty ())
1975 help_txt = help_buf.top ();
1976
1977 flex_stream_reader flex_reader (yytext);
1978
1979 // process_comment is only supposed to be called when we are not
1980 // initially looking at a block comment.
1981
1982 std::string txt = start_in_block
1983 ? grab_block_comment (flex_reader, eof)
1984 : grab_comment_block (flex_reader, false, eof);
1985
1986 if (lexer_debug_flag)
1987 std::cerr << "C: " << txt << std::endl;
1988
1989 if (help_txt.empty () && nesting_level.none ())
1990 {
1991 if (! help_buf.empty ())
1992 help_buf.pop ();
1993
1994 help_buf.push (txt);
1995 }
1996
1997 octave_comment_buffer::append (txt);
1998
1999 current_input_column = 1;
2000 lexer_flags.quote_is_transpose = false;
2001 lexer_flags.convert_spaces_to_comma = true;
2002 lexer_flags.at_beginning_of_statement = true;
2003
2004 if (YY_START == COMMAND_START)
2005 BEGIN (INITIAL);
2006
2007 if (nesting_level.none ())
2008 return '\n';
2009 else if (nesting_level.is_bracket_or_brace ())
2010 return ';';
2011 else
2012 return 0;
2013 }
2014
2015 // Return 1 if the given character matches any character in the given
2016 // string.
2017
2018 static bool
2019 match_any (char c, const char *s)
2020 {
2021 char tmp;
2022 while ((tmp = *s++) != '\0')
2023 {
2024 if (c == tmp)
2025 return true;
2026 }
2027 return false;
2028 }
2029
2030 // Given information about the spacing surrounding an operator,
2031 // return 1 if it looks like it should be treated as a binary
2032 // operator. For example,
2033 //
2034 // [ 1 + 2 ] or [ 1+ 2] or [ 1+2 ] ==> binary
2035 //
2036 // [ 1 +2 ] ==> unary
2037
2038 static bool
2039 looks_like_bin_op (bool spc_prev, int next_char)
2040 {
2041 bool spc_next = (next_char == ' ' || next_char == '\t');
2042
2043 return ((spc_prev && spc_next) || ! spc_prev);
2044 }
2045
2046 // Recognize separators. If the separator is a CRLF pair, it is
2047 // replaced by a single LF.
2048
2049 static bool
2050 next_token_is_sep_op (void)
2051 {
2052 bool retval = false;
2053
2054 int c = text_yyinput ();
2055
2056 retval = match_any (c, ",;\n]");
2057
2058 xunput (c, yytext);
2059
2060 return retval;
2061 }
2062
2063 // Try to determine if the next token should be treated as a postfix
2064 // unary operator. This is ugly, but it seems to do the right thing.
2065
2066 static bool
2067 next_token_is_postfix_unary_op (bool spc_prev)
2068 {
2069 bool un_op = false;
2070
2071 int c0 = text_yyinput ();
2072
2073 if (c0 == '\'' && ! spc_prev)
2074 {
2075 un_op = true;
2076 }
2077 else if (c0 == '.')
2078 {
2079 int c1 = text_yyinput ();
2080 un_op = (c1 == '\'');
2081 xunput (c1, yytext);
2082 }
2083 else if (c0 == '+')
2084 {
2085 int c1 = text_yyinput ();
2086 un_op = (c1 == '+');
2087 xunput (c1, yytext);
2088 }
2089 else if (c0 == '-')
2090 {
2091 int c1 = text_yyinput ();
2092 un_op = (c1 == '-');
2093 xunput (c1, yytext);
2094 }
2095
2096 xunput (c0, yytext);
2097
2098 return un_op;
2099 }
2100
2101 // Try to determine if the next token should be treated as a binary
2102 // operator.
2103 //
2104 // This kluge exists because whitespace is not always ignored inside
2105 // the square brackets that are used to create matrix objects (though
2106 // spacing only really matters in the cases that can be interpreted
2107 // either as binary ops or prefix unary ops: currently just +, -).
2108 //
2109 // Note that a line continuation directly following a + or - operator
2110 // (e.g., the characters '[' 'a' ' ' '+' '\' LFD 'b' ']') will be
2111 // parsed as a binary operator.
2112
2113 static bool
2114 next_token_is_bin_op (bool spc_prev)
2115 {
2116 bool bin_op = false;
2117
2118 int c0 = text_yyinput ();
2119
2120 switch (c0)
2121 {
2122 case '+':
2123 case '-':
2124 {
2125 int c1 = text_yyinput ();
2126
2127 switch (c1)
2128 {
2129 case '+':
2130 case '-':
2131 // Unary ops, spacing doesn't matter.
2132 break;
2133
2134 case '=':
2135 // Binary ops, spacing doesn't matter.
2136 bin_op = true;
2137 break;
2138
2139 default:
2140 // Could be either, spacing matters.
2141 bin_op = looks_like_bin_op (spc_prev, c1);
2142 break;
2143 }
2144
2145 xunput (c1, yytext);
2146 }
2147 break;
2148
2149 case ':':
2150 case '/':
2151 case '\\':
2152 case '^':
2153 // Always a binary op (may also include /=, \=, and ^=).
2154 bin_op = true;
2155 break;
2156
2157 // .+ .- ./ .\ .^ .* .**
2158 case '.':
2159 {
2160 int c1 = text_yyinput ();
2161
2162 if (match_any (c1, "+-/\\^*"))
2163 // Always a binary op (may also include .+=, .-=, ./=, ...).
2164 bin_op = true;
2165 else if (! isdigit (c1) && c1 != ' ' && c1 != '\t' && c1 != '.')
2166 // A structure element reference is a binary op.
2167 bin_op = true;
2168
2169 xunput (c1, yytext);
2170 }
2171 break;
2172
2173 // = == & && | || * **
2174 case '=':
2175 case '&':
2176 case '|':
2177 case '*':
2178 // Always a binary op (may also include ==, &&, ||, **).
2179 bin_op = true;
2180 break;
2181
2182 // < <= <> > >=
2183 case '<':
2184 case '>':
2185 // Always a binary op (may also include <=, <>, >=).
2186 bin_op = true;
2187 break;
2188
2189 // ~= !=
2190 case '~':
2191 case '!':
2192 {
2193 int c1 = text_yyinput ();
2194
2195 // ~ and ! can be unary ops, so require following =.
2196 if (c1 == '=')
2197 bin_op = true;
2198
2199 xunput (c1, yytext);
2200 }
2201 break;
2202
2203 default:
2204 break;
2205 }
2206
2207 xunput (c0, yytext);
2208
2209 return bin_op;
2210 }
2211
2212 // Used to delete trailing white space from tokens.
2213
2214 static std::string
2215 strip_trailing_whitespace (char *s)
2216 {
2217 std::string retval = s;
2218
2219 size_t pos = retval.find_first_of (" \t");
2220
2221 if (pos != std::string::npos)
2222 retval.resize (pos);
2223
2224 return retval;
2225 }
2226
2227 // FIXME -- we need to handle block comments here.
2228
2229 static void
2230 scan_for_comments (const char *text)
2231 {
2232 std::string comment_buf;
2233
2234 bool in_comment = false;
2235 bool beginning_of_comment = false;
2236
2237 int len = strlen (text);
2238 int i = 0;
2239
2240 while (i < len)
2241 {
2242 char c = text[i++];
2243
2244 switch (c)
2245 {
2246 case '%':
2247 case '#':
2248 if (in_comment)
2249 {
2250 if (! beginning_of_comment)
2251 comment_buf += static_cast<char> (c);
2252 }
2253 else
2254 {
2255 maybe_gripe_matlab_incompatible_comment (c);
2256 in_comment = true;
2257 beginning_of_comment = true;
2258 }
2259 break;
2260
2261 case '\n':
2262 if (in_comment)
2263 {
2264 comment_buf += static_cast<char> (c);
2265 octave_comment_buffer::append (comment_buf);
2266 comment_buf.resize (0);
2267 in_comment = false;
2268 beginning_of_comment = false;
2269 }
2270 break;
2271
2272 default:
2273 if (in_comment)
2274 {
2275 comment_buf += static_cast<char> (c);
2276 beginning_of_comment = false;
2277 }
2278 break;
2279 }
2280 }
2281
2282 if (! comment_buf.empty ())
2283 octave_comment_buffer::append (comment_buf);
2284 }
2285
2286 // Discard whitespace, including comments and continuations.
2287 //
2288 // Return value is logical OR of the following values:
2289 //
2290 // ATE_NOTHING : no spaces to eat
2291 // ATE_SPACE_OR_TAB : space or tab in input
2292 // ATE_NEWLINE : bare new line in input
2293
2294 // FIXME -- we need to handle block comments here.
2295
2296 static yum_yum
2297 eat_whitespace (void)
2298 {
2299 yum_yum retval = ATE_NOTHING;
2300
2301 std::string comment_buf;
2302
2303 bool in_comment = false;
2304 bool beginning_of_comment = false;
2305
2306 int c = 0;
2307
2308 while ((c = text_yyinput ()) != EOF)
2309 {
2310 current_input_column++;
2311
2312 switch (c)
2313 {
2314 case ' ':
2315 case '\t':
2316 if (in_comment)
2317 {
2318 comment_buf += static_cast<char> (c);
2319 beginning_of_comment = false;
2320 }
2321 retval |= ATE_SPACE_OR_TAB;
2322 break;
2323
2324 case '\n':
2325 retval |= ATE_NEWLINE;
2326 if (in_comment)
2327 {
2328 comment_buf += static_cast<char> (c);
2329 octave_comment_buffer::append (comment_buf);
2330 comment_buf.resize (0);
2331 in_comment = false;
2332 beginning_of_comment = false;
2333 }
2334 current_input_column = 0;
2335 break;
2336
2337 case '#':
2338 case '%':
2339 if (in_comment)
2340 {
2341 if (! beginning_of_comment)
2342 comment_buf += static_cast<char> (c);
2343 }
2344 else
2345 {
2346 maybe_gripe_matlab_incompatible_comment (c);
2347 in_comment = true;
2348 beginning_of_comment = true;
2349 }
2350 break;
2351
2352 case '.':
2353 if (in_comment)
2354 {
2355 comment_buf += static_cast<char> (c);
2356 beginning_of_comment = false;
2357 break;
2358 }
2359 else
2360 {
2361 if (have_ellipsis_continuation ())
2362 break;
2363 else
2364 goto done;
2365 }
2366
2367 case '\\':
2368 if (in_comment)
2369 {
2370 comment_buf += static_cast<char> (c);
2371 beginning_of_comment = false;
2372 break;
2373 }
2374 else
2375 {
2376 if (have_continuation ())
2377 break;
2378 else
2379 goto done;
2380 }
2381
2382 default:
2383 if (in_comment)
2384 {
2385 comment_buf += static_cast<char> (c);
2386 beginning_of_comment = false;
2387 break;
2388 }
2389 else
2390 goto done;
2391 }
2392 }
2393
2394 if (! comment_buf.empty ())
2395 octave_comment_buffer::append (comment_buf);
2396
2397 done:
2398 xunput (c, yytext);
2399 current_input_column--;
2400 return retval;
2401 }
2402
2403 static inline bool
2404 looks_like_hex (const char *s, int len)
2405 {
2406 return (len > 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X'));
2407 }
2408
2409 static void
2410 handle_number (void)
2411 {
2412 double value = 0.0;
2413 int nread = 0;
2414
2415 if (looks_like_hex (yytext, strlen (yytext)))
2416 {
2417 unsigned long ival;
2418
2419 nread = sscanf (yytext, "%lx", &ival);
2420
2421 value = static_cast<double> (ival);
2422 }
2423 else
2424 {
2425 char *tmp = strsave (yytext);
2426
2427 char *idx = strpbrk (tmp, "Dd");
2428
2429 if (idx)
2430 *idx = 'e';
2431
2432 nread = sscanf (tmp, "%lf", &value);
2433
2434 delete [] tmp;
2435 }
2436
2437 // If yytext doesn't contain a valid number, we are in deep doo doo.
2438
2439 assert (nread == 1);
2440
2441 lexer_flags.quote_is_transpose = true;
2442 lexer_flags.convert_spaces_to_comma = true;
2443 lexer_flags.looking_for_object_index = false;
2444 lexer_flags.at_beginning_of_statement = false;
2445
2446 yylval.tok_val = new token (value, yytext, input_line_number,
2447 current_input_column);
2448
2449 token_stack.push (yylval.tok_val);
2450
2451 current_input_column += yyleng;
2452
2453 do_comma_insert_check ();
2454 }
2455
2456 // We have seen a backslash and need to find out if it should be
2457 // treated as a continuation character. If so, this eats it, up to
2458 // and including the new line character.
2459 //
2460 // Match whitespace only, followed by a comment character or newline.
2461 // Once a comment character is found, discard all input until newline.
2462 // If non-whitespace characters are found before comment
2463 // characters, return 0. Otherwise, return 1.
2464
2465 // FIXME -- we need to handle block comments here.
2466
2467 static bool
2468 have_continuation (bool trailing_comments_ok)
2469 {
2470 std::ostringstream buf;
2471
2472 std::string comment_buf;
2473
2474 bool in_comment = false;
2475 bool beginning_of_comment = false;
2476
2477 int c = 0;
2478
2479 while ((c = text_yyinput ()) != EOF)
2480 {
2481 buf << static_cast<char> (c);
2482
2483 switch (c)
2484 {
2485 case ' ':
2486 case '\t':
2487 if (in_comment)
2488 {
2489 comment_buf += static_cast<char> (c);
2490 beginning_of_comment = false;
2491 }
2492 break;
2493
2494 case '%':
2495 case '#':
2496 if (trailing_comments_ok)
2497 {
2498 if (in_comment)
2499 {
2500 if (! beginning_of_comment)
2501 comment_buf += static_cast<char> (c);
2502 }
2503 else
2504 {
2505 maybe_gripe_matlab_incompatible_comment (c);
2506 in_comment = true;
2507 beginning_of_comment = true;
2508 }
2509 }
2510 else
2511 goto cleanup;
2512 break;
2513
2514 case '\n':
2515 if (in_comment)
2516 {
2517 comment_buf += static_cast<char> (c);
2518 octave_comment_buffer::append (comment_buf);
2519 }
2520 current_input_column = 0;
2521 promptflag--;
2522 gripe_matlab_incompatible_continuation ();
2523 return true;
2524
2525 default:
2526 if (in_comment)
2527 {
2528 comment_buf += static_cast<char> (c);
2529 beginning_of_comment = false;
2530 }
2531 else
2532 goto cleanup;
2533 break;
2534 }
2535 }
2536
2537 xunput (c, yytext);
2538 return false;
2539
2540 cleanup:
2541
2542 std::string s = buf.str ();
2543
2544 int len = s.length ();
2545 while (len--)
2546 xunput (s[len], yytext);
2547
2548 return false;
2549 }
2550
2551 // We have seen a `.' and need to see if it is the start of a
2552 // continuation. If so, this eats it, up to and including the new
2553 // line character.
2554
2555 static bool
2556 have_ellipsis_continuation (bool trailing_comments_ok)
2557 {
2558 char c1 = text_yyinput ();
2559 if (c1 == '.')
2560 {
2561 char c2 = text_yyinput ();
2562 if (c2 == '.' && have_continuation (trailing_comments_ok))
2563 return true;
2564 else
2565 {
2566 xunput (c2, yytext);
2567 xunput (c1, yytext);
2568 }
2569 }
2570 else
2571 xunput (c1, yytext);
2572
2573 return false;
2574 }
2575
2576 // See if we have a continuation line. If so, eat it and the leading
2577 // whitespace on the next line.
2578 //
2579 // Return value is the same as described for eat_whitespace().
2580
2581 static yum_yum
2582 eat_continuation (void)
2583 {
2584 int retval = ATE_NOTHING;
2585
2586 int c = text_yyinput ();
2587
2588 if ((c == '.' && have_ellipsis_continuation ())
2589 || (c == '\\' && have_continuation ()))
2590 retval = eat_whitespace ();
2591 else
2592 xunput (c, yytext);
2593
2594 return retval;
2595 }
2596
2597 static int
2598 handle_string (char delim)
2599 {
2600 std::ostringstream buf;
2601
2602 int bos_line = input_line_number;
2603 int bos_col = current_input_column;
2604
2605 int c;
2606 int escape_pending = 0;
2607
2608 while ((c = text_yyinput ()) != EOF)
2609 {
2610 current_input_column++;
2611
2612 if (c == '\\')
2613 {
2614 if (delim == '\'' || escape_pending)
2615 {
2616 buf << static_cast<char> (c);
2617 escape_pending = 0;
2618 }
2619 else
2620 {
2621 if (have_continuation (false))
2622 escape_pending = 0;
2623 else
2624 {
2625 buf << static_cast<char> (c);
2626 escape_pending = 1;
2627 }
2628 }
2629 continue;
2630 }
2631 else if (c == '.')
2632 {
2633 if (delim == '\'' || ! have_ellipsis_continuation (false))
2634 buf << static_cast<char> (c);
2635 }
2636 else if (c == '\n')
2637 {
2638 error ("unterminated string constant");
2639 break;
2640 }
2641 else if (c == delim)
2642 {
2643 if (escape_pending)
2644 buf << static_cast<char> (c);
2645 else
2646 {
2647 c = text_yyinput ();
2648 if (c == delim)
2649 {
2650 buf << static_cast<char> (c);
2651 }
2652 else
2653 {
2654 std::string s;
2655 xunput (c, yytext);
2656
2657 if (delim == '\'')
2658 s = buf.str ();
2659 else
2660 s = do_string_escapes (buf.str ());
2661
2662 lexer_flags.quote_is_transpose = true;
2663 lexer_flags.convert_spaces_to_comma = true;
2664
2665 yylval.tok_val = new token (s, bos_line, bos_col);
2666 token_stack.push (yylval.tok_val);
2667
2668 if (delim == '"')
2669 gripe_matlab_incompatible ("\" used as string delimiter");
2670 else if (delim == '\'')
2671 gripe_single_quote_string ();
2672
2673 lexer_flags.looking_for_object_index = true;
2674 lexer_flags.at_beginning_of_statement = false;
2675
2676 return delim == '"' ? DQ_STRING : SQ_STRING;
2677 }
2678 }
2679 }
2680 else
2681 {
2682 buf << static_cast<char> (c);
2683 }
2684
2685 escape_pending = 0;
2686 }
2687
2688 return LEXICAL_ERROR;
2689 }
2690
2691 static bool
2692 next_token_is_assign_op (void)
2693 {
2694 bool retval = false;
2695
2696 int c0 = text_yyinput ();
2697
2698 switch (c0)
2699 {
2700 case '=':
2701 {
2702 int c1 = text_yyinput ();
2703 xunput (c1, yytext);
2704 if (c1 != '=')
2705 retval = true;
2706 }
2707 break;
2708
2709 case '+':
2710 case '-':
2711 case '*':
2712 case '/':
2713 case '\\':
2714 case '&':
2715 case '|':
2716 {
2717 int c1 = text_yyinput ();
2718 xunput (c1, yytext);
2719 if (c1 == '=')
2720 retval = true;
2721 }
2722 break;
2723
2724 case '.':
2725 {
2726 int c1 = text_yyinput ();
2727 if (match_any (c1, "+-*/\\"))
2728 {
2729 int c2 = text_yyinput ();
2730 xunput (c2, yytext);
2731 if (c2 == '=')
2732 retval = true;
2733 }
2734 xunput (c1, yytext);
2735 }
2736 break;
2737
2738 case '>':
2739 {
2740 int c1 = text_yyinput ();
2741 if (c1 == '>')
2742 {
2743 int c2 = text_yyinput ();
2744 xunput (c2, yytext);
2745 if (c2 == '=')
2746 retval = true;
2747 }
2748 xunput (c1, yytext);
2749 }
2750 break;
2751
2752 case '<':
2753 {
2754 int c1 = text_yyinput ();
2755 if (c1 == '<')
2756 {
2757 int c2 = text_yyinput ();
2758 xunput (c2, yytext);
2759 if (c2 == '=')
2760 retval = true;
2761 }
2762 xunput (c1, yytext);
2763 }
2764 break;
2765
2766 default:
2767 break;
2768 }
2769
2770 xunput (c0, yytext);
2771
2772 return retval;
2773 }
2774
2775 static bool
2776 next_token_is_index_op (void)
2777 {
2778 int c = text_yyinput ();
2779 xunput (c, yytext);
2780 return c == '(' || c == '{';
2781 }
2782
2783 static int
2784 handle_close_bracket (bool spc_gobbled, int bracket_type)
2785 {
2786 int retval = bracket_type;
2787
2788 if (! nesting_level.none ())
2789 {
2790 nesting_level.remove ();
2791
2792 if (bracket_type == ']')
2793 lexer_flags.bracketflag--;
2794 else if (bracket_type == '}')
2795 lexer_flags.braceflag--;
2796 else
2797 panic_impossible ();
2798 }
2799
2800 if (lexer_flags.bracketflag == 0 && lexer_flags.braceflag == 0)
2801 BEGIN (INITIAL);
2802
2803 if (bracket_type == ']'
2804 && next_token_is_assign_op ()
2805 && ! lexer_flags.looking_at_return_list)
2806 {
2807 retval = CLOSE_BRACE;
2808 }
2809 else if ((lexer_flags.bracketflag || lexer_flags.braceflag)
2810 && lexer_flags.convert_spaces_to_comma
2811 && (nesting_level.is_bracket ()
2812 || (nesting_level.is_brace ()
2813 && ! lexer_flags.looking_at_object_index.front ())))
2814 {
2815 bool index_op = next_token_is_index_op ();
2816
2817 // Don't insert comma if we are looking at something like
2818 //
2819 // [x{i}{j}] or [x{i}(j)]
2820 //
2821 // but do if we are looking at
2822 //
2823 // [x{i} {j}] or [x{i} (j)]
2824
2825 if (spc_gobbled || ! (bracket_type == '}' && index_op))
2826 {
2827 bool bin_op = next_token_is_bin_op (spc_gobbled);
2828
2829 bool postfix_un_op = next_token_is_postfix_unary_op (spc_gobbled);
2830
2831 bool sep_op = next_token_is_sep_op ();
2832
2833 if (! (postfix_un_op || bin_op || sep_op))
2834 {
2835 maybe_warn_separator_insert (',');
2836
2837 xunput (',', yytext);
2838 return retval;
2839 }
2840 }
2841 }
2842
2843 lexer_flags.quote_is_transpose = true;
2844 lexer_flags.convert_spaces_to_comma = true;
2845
2846 return retval;
2847 }
2848
2849 static void
2850 maybe_unput_comma (int spc_gobbled)
2851 {
2852 if (nesting_level.is_bracket ()
2853 || (nesting_level.is_brace ()
2854 && ! lexer_flags.looking_at_object_index.front ()))
2855 {
2856 int bin_op = next_token_is_bin_op (spc_gobbled);
2857
2858 int postfix_un_op = next_token_is_postfix_unary_op (spc_gobbled);
2859
2860 int c1 = text_yyinput ();
2861 int c2 = text_yyinput ();
2862
2863 xunput (c2, yytext);
2864 xunput (c1, yytext);
2865
2866 int sep_op = next_token_is_sep_op ();
2867
2868 int dot_op = (c1 == '.'
2869 && (isalpha (c2) || isspace (c2) || c2 == '_'));
2870
2871 if (postfix_un_op || bin_op || sep_op || dot_op)
2872 return;
2873
2874 int index_op = (c1 == '(' || c1 == '{');
2875
2876 // If there is no space before the indexing op, we don't insert
2877 // a comma.
2878
2879 if (index_op && ! spc_gobbled)
2880 return;
2881
2882 maybe_warn_separator_insert (',');
2883
2884 xunput (',', yytext);
2885 }
2886 }
2887
2888 static bool
2889 next_token_can_follow_bin_op (void)
2890 {
2891 std::stack<char> buf;
2892
2893 int c = EOF;
2894
2895 // Skip whitespace in current statement on current line
2896 while (true)
2897 {
2898 c = text_yyinput ();
2899
2900 buf.push (c);
2901
2902 if (match_any (c, ",;\n") || (c != ' ' && c != '\t'))
2903 break;
2904 }
2905
2906 // Restore input.
2907 while (! buf.empty ())
2908 {
2909 xunput (buf.top (), yytext);
2910
2911 buf.pop ();
2912 }
2913
2914 return (isalnum (c) || match_any (c, "!\"'(-[_{~"));
2915 }
2916
2917 static bool
2918 can_be_command (const std::string& tok)
2919 {
2920 // Don't allow these names to be treated as commands to avoid
2921 // surprises when parsing things like "NaN ^2".
2922
2923 return ! (tok == "e"
2924 || tok == "I" || tok == "i"
2925 || tok == "J" || tok == "j"
2926 || tok == "Inf" || tok == "inf"
2927 || tok == "NaN" || tok == "nan");
2928 }
2929
2930 static bool
2931 looks_like_command_arg (void)
2932 {
2933 bool retval = true;
2934
2935 int c0 = text_yyinput ();
2936
2937 switch (c0)
2938 {
2939 // = ==
2940 case '=':
2941 {
2942 int c1 = text_yyinput ();
2943
2944 if (c1 == '=')
2945 {
2946 int c2 = text_yyinput ();
2947
2948 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t')
2949 && next_token_can_follow_bin_op ())
2950 retval = false;
2951
2952 xunput (c2, yytext);
2953 }
2954 else
2955 retval = false;
2956
2957 xunput (c1, yytext);
2958 }
2959 break;
2960
2961 case '(':
2962 case '{':
2963 // Indexing.
2964 retval = false;
2965 break;
2966
2967 case '\n':
2968 // EOL.
2969 break;
2970
2971 case '\'':
2972 case '"':
2973 // Beginning of a character string.
2974 break;
2975
2976 // + - ++ -- += -=
2977 case '+':
2978 case '-':
2979 {
2980 int c1 = text_yyinput ();
2981
2982 switch (c1)
2983 {
2984 case '\n':
2985 // EOL.
2986 case '+':
2987 case '-':
2988 // Unary ops, spacing doesn't matter.
2989 break;
2990
2991 case '\t':
2992 case ' ':
2993 {
2994 if (next_token_can_follow_bin_op ())
2995 retval = false;
2996 }
2997 break;
2998
2999 case '=':
3000 {
3001 int c2 = text_yyinput ();
3002
3003 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t')
3004 && next_token_can_follow_bin_op ())
3005 retval = false;
3006
3007 xunput (c2, yytext);
3008 }
3009 break;
3010 }
3011
3012 xunput (c1, yytext);
3013 }
3014 break;
3015
3016 case ':':
3017 case '/':
3018 case '\\':
3019 case '^':
3020 {
3021 int c1 = text_yyinput ();
3022
3023 if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t')
3024 && next_token_can_follow_bin_op ())
3025 retval = false;
3026
3027 xunput (c1, yytext);
3028 }
3029 break;
3030
3031 // .+ .- ./ .\ .^ .* .**
3032 case '.':
3033 {
3034 int c1 = text_yyinput ();
3035
3036 if (match_any (c1, "+-/\\^*"))
3037 {
3038 int c2 = text_yyinput ();
3039
3040 if (c2 == '=')
3041 {
3042 int c3 = text_yyinput ();
3043
3044 if (! match_any (c3, ",;\n") && (c3 == ' ' || c3 == '\t')
3045 && next_token_can_follow_bin_op ())
3046 retval = false;
3047
3048 xunput (c3, yytext);
3049 }
3050 else if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t')
3051 && next_token_can_follow_bin_op ())
3052 retval = false;
3053
3054 xunput (c2, yytext);
3055 }
3056 else if (! match_any (c1, ",;\n")
3057 && (! isdigit (c1) && c1 != ' ' && c1 != '\t'
3058 && c1 != '.'))
3059 {
3060 // Structure reference. FIXME -- is this a complete check?
3061
3062 retval = false;
3063 }
3064
3065 xunput (c1, yytext);
3066 }
3067 break;
3068
3069 // & && | || * **
3070 case '&':
3071 case '|':
3072 case '*':
3073 {
3074 int c1 = text_yyinput ();
3075
3076 if (c1 == c0)
3077 {
3078 int c2 = text_yyinput ();
3079
3080 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t')
3081 && next_token_can_follow_bin_op ())
3082 retval = false;
3083
3084 xunput (c2, yytext);
3085 }
3086 else if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t')
3087 && next_token_can_follow_bin_op ())
3088 retval = false;
3089
3090 xunput (c1, yytext);
3091 }
3092 break;
3093
3094 // < <= > >=
3095 case '<':
3096 case '>':
3097 {
3098 int c1 = text_yyinput ();
3099
3100 if (c1 == '=')
3101 {
3102 int c2 = text_yyinput ();
3103
3104 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t')
3105 && next_token_can_follow_bin_op ())
3106 retval = false;
3107
3108 xunput (c2, yytext);
3109 }
3110 else if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t')
3111 && next_token_can_follow_bin_op ())
3112 retval = false;
3113
3114 xunput (c1, yytext);
3115 }
3116 break;
3117
3118 // ~= !=
3119 case '~':
3120 case '!':
3121 {
3122 int c1 = text_yyinput ();
3123
3124 // ~ and ! can be unary ops, so require following =.
3125 if (c1 == '=')
3126 {
3127 int c2 = text_yyinput ();
3128
3129 if (! match_any (c2, ",;\n") && (c2 == ' ' || c2 == '\t')
3130 && next_token_can_follow_bin_op ())
3131 retval = false;
3132
3133 xunput (c2, yytext);
3134 }
3135 else if (! match_any (c1, ",;\n") && (c1 == ' ' || c1 == '\t')
3136 && next_token_can_follow_bin_op ())
3137 retval = false;
3138
3139 xunput (c1, yytext);
3140 }
3141 break;
3142
3143 default:
3144 break;
3145 }
3146
3147 xunput (c0, yytext);
3148
3149 return retval;
3150 }
3151
3152 static int
3153 handle_superclass_identifier (void)
3154 {
3155 eat_continuation ();
3156
3157 std::string pkg;
3158 std::string meth = strip_trailing_whitespace (yytext);
3159 size_t pos = meth.find ("@");
3160 std::string cls = meth.substr (pos).substr (1);
3161 meth = meth.substr (0, pos - 1);
3162
3163 pos = cls.find (".");
3164 if (pos != std::string::npos)
3165 {
3166 pkg = cls.substr (pos).substr (1);
3167 cls = cls.substr (0, pos - 1);
3168 }
3169
3170 int kw_token = (is_keyword_token (meth) || is_keyword_token (cls)
3171 || is_keyword_token (pkg));
3172 if (kw_token)
3173 {
3174 error ("method, class and package names may not be keywords");
3175 return LEXICAL_ERROR;
3176 }
3177
3178 yylval.tok_val
3179 = new token (meth.empty () ? 0 : &(symbol_table::insert (meth)),
3180 cls.empty () ? 0 : &(symbol_table::insert (cls)),
3181 pkg.empty () ? 0 : &(symbol_table::insert (pkg)),
3182 input_line_number, current_input_column);
3183 token_stack.push (yylval.tok_val);
3184
3185 lexer_flags.convert_spaces_to_comma = true;
3186 current_input_column += yyleng;
3187
3188 return SUPERCLASSREF;
3189 }
3190
3191 static int
3192 handle_meta_identifier (void)
3193 {
3194 eat_continuation ();
3195
3196 std::string pkg;
3197 std::string cls = strip_trailing_whitespace (yytext).substr (1);
3198 size_t pos = cls.find (".");
3199
3200 if (pos != std::string::npos)
3201 {
3202 pkg = cls.substr (pos).substr (1);
3203 cls = cls.substr (0, pos - 1);
3204 }
3205
3206 int kw_token = is_keyword_token (cls) || is_keyword_token (pkg);
3207 if (kw_token)
3208 {
3209 error ("class and package names may not be keywords");
3210 return LEXICAL_ERROR;
3211 }
3212
3213 yylval.tok_val
3214 = new token (cls.empty () ? 0 : &(symbol_table::insert (cls)),
3215 pkg.empty () ? 0 : &(symbol_table::insert (pkg)),
3216 input_line_number, current_input_column);
3217
3218 token_stack.push (yylval.tok_val);
3219
3220 lexer_flags.convert_spaces_to_comma = true;
3221 current_input_column += yyleng;
3222
3223 return METAQUERY;
3224 }
3225
3226 // Figure out exactly what kind of token to return when we have seen
3227 // an identifier. Handles keywords. Return -1 if the identifier
3228 // should be ignored.
3229
3230 static int
3231 handle_identifier (void)
3232 {
3233 bool at_bos = lexer_flags.at_beginning_of_statement;
3234
3235 std::string tok = strip_trailing_whitespace (yytext);
3236
3237 int c = yytext[yyleng-1];
3238
3239 int cont_is_spc = eat_continuation ();
3240
3241 int spc_gobbled = (cont_is_spc || c == ' ' || c == '\t');
3242
3243 // If we are expecting a structure element, avoid recognizing
3244 // keywords and other special names and return STRUCT_ELT, which is
3245 // a string that is also a valid identifier. But first, we have to
3246 // decide whether to insert a comma.
3247
3248 if (lexer_flags.looking_at_indirect_ref)
3249 {
3250 do_comma_insert_check ();
3251
3252 maybe_unput_comma (spc_gobbled);
3253
3254 yylval.tok_val = new token (tok, input_line_number,
3255 current_input_column);
3256
3257 token_stack.push (yylval.tok_val);
3258
3259 lexer_flags.quote_is_transpose = true;
3260 lexer_flags.convert_spaces_to_comma = true;
3261 lexer_flags.looking_for_object_index = true;
3262
3263 current_input_column += yyleng;
3264
3265 return STRUCT_ELT;
3266 }
3267
3268 lexer_flags.at_beginning_of_statement = false;
3269
3270 // The is_keyword_token may reset
3271 // lexer_flags.at_beginning_of_statement. For example, if it sees
3272 // an else token, then the next token is at the beginning of a
3273 // statement.
3274
3275 int kw_token = is_keyword_token (tok);
3276
3277 // If we found a keyword token, then the beginning_of_statement flag
3278 // is already set. Otherwise, we won't be at the beginning of a
3279 // statement.
3280
3281 if (lexer_flags.looking_at_function_handle)
3282 {
3283 if (kw_token)
3284 {
3285 error ("function handles may not refer to keywords");
3286
3287 return LEXICAL_ERROR;
3288 }
3289 else
3290 {
3291 yylval.tok_val = new token (tok, input_line_number,
3292 current_input_column);
3293
3294 token_stack.push (yylval.tok_val);
3295
3296 current_input_column += yyleng;
3297 lexer_flags.quote_is_transpose = false;
3298 lexer_flags.convert_spaces_to_comma = true;
3299 lexer_flags.looking_for_object_index = true;
3300
3301 return FCN_HANDLE;
3302 }
3303 }
3304
3305 // If we have a regular keyword, return it.
3306 // Keywords can be followed by identifiers.
3307
3308 if (kw_token)
3309 {
3310 if (kw_token >= 0)
3311 {
3312 current_input_column += yyleng;
3313 lexer_flags.quote_is_transpose = false;
3314 lexer_flags.convert_spaces_to_comma = true;
3315 lexer_flags.looking_for_object_index = false;
3316 }
3317
3318 return kw_token;
3319 }
3320
3321 // See if we have a plot keyword (title, using, with, or clear).
3322
3323 int c1 = text_yyinput ();
3324
3325 bool next_tok_is_eq = false;
3326 if (c1 == '=')
3327 {
3328 int c2 = text_yyinput ();
3329 xunput (c2, yytext);
3330
3331 if (c2 != '=')
3332 next_tok_is_eq = true;
3333 }
3334
3335 xunput (c1, yytext);
3336
3337 // Kluge alert.
3338 //
3339 // If we are looking at a text style function, set up to gobble its
3340 // arguments.
3341 //
3342 // If the following token is `=', or if we are parsing a function
3343 // return list or function parameter list, or if we are looking at
3344 // something like [ab,cd] = foo (), force the symbol to be inserted
3345 // as a variable in the current symbol table.
3346
3347 if (! is_variable (tok))
3348 {
3349 if (at_bos && spc_gobbled && can_be_command (tok)
3350 && looks_like_command_arg ())
3351 {
3352 BEGIN (COMMAND_START);
3353 }
3354 else if (next_tok_is_eq
3355 || lexer_flags.looking_at_decl_list
3356 || lexer_flags.looking_at_return_list
3357 || (lexer_flags.looking_at_parameter_list
3358 && ! lexer_flags.looking_at_initializer_expression))
3359 {
3360 symbol_table::force_variable (tok);
3361 }
3362 else if (lexer_flags.looking_at_matrix_or_assign_lhs)
3363 {
3364 lexer_flags.pending_local_variables.insert (tok);
3365 }
3366 }
3367
3368 // Find the token in the symbol table. Beware the magic
3369 // transformation of the end keyword...
3370
3371 if (tok == "end")
3372 tok = "__end__";
3373
3374 yylval.tok_val = new token (&(symbol_table::insert (tok)),
3375 input_line_number, current_input_column);
3376
3377 token_stack.push (yylval.tok_val);
3378
3379 // After seeing an identifer, it is ok to convert spaces to a comma
3380 // (if needed).
3381
3382 lexer_flags.convert_spaces_to_comma = true;
3383
3384 if (! (next_tok_is_eq || YY_START == COMMAND_START))
3385 {
3386 lexer_flags.quote_is_transpose = true;
3387
3388 do_comma_insert_check ();
3389
3390 maybe_unput_comma (spc_gobbled);
3391 }
3392
3393 current_input_column += yyleng;
3394
3395 if (tok != "__end__")
3396 lexer_flags.looking_for_object_index = true;
3397
3398 return NAME;
3399 }
3400
3401 void
3402 lexical_feedback::init (void)
3403 {
3404 // Not initially defining a matrix list.
3405 bracketflag = 0;
3406
3407 // Not initially defining a cell array list.
3408 braceflag = 0;
3409
3410 // Not initially inside a loop or if statement.
3411 looping = 0;
3412
3413 // Not initially defining a function.
3414 defining_func = 0;
3415
3416 // Not parsing an object index.
3417 while (! parsed_function_name.empty ())
3418 parsed_function_name.pop ();
3419
3420 parsing_class_method = false;
3421
3422 // Not initially defining a class with classdef.
3423 maybe_classdef_get_set_method = false;
3424 parsing_classdef = false;
3425
3426 // Not initiallly looking at a function handle.
3427 looking_at_function_handle = 0;
3428
3429 // Not initiallly looking at an anonymous function argument list.
3430 looking_at_anon_fcn_args = 0;
3431
3432 // Not parsing a function return, parameter, or declaration list.
3433 looking_at_return_list = false;
3434 looking_at_parameter_list = false;
3435 looking_at_decl_list = false;
3436
3437 // Not looking at an argument list initializer expression.
3438 looking_at_initializer_expression = false;
3439
3440 // Not parsing a matrix or the left hand side of multi-value
3441 // assignment statement.
3442 looking_at_matrix_or_assign_lhs = false;
3443
3444 // Not parsing an object index.
3445 while (! looking_at_object_index.empty ())
3446 looking_at_object_index.pop_front ();
3447
3448 looking_at_object_index.push_front (false);
3449
3450 // Object index not possible until we've seen something.
3451 looking_for_object_index = false;
3452
3453 // Yes, we are at the beginning of a statement.
3454 at_beginning_of_statement = true;
3455
3456 // No need to do comma insert or convert spaces to comma at
3457 // beginning of input.
3458 convert_spaces_to_comma = true;
3459 do_comma_insert = false;
3460
3461 // Not initially looking at indirect references.
3462 looking_at_indirect_ref = false;
3463
3464 // Quote marks strings intially.
3465 quote_is_transpose = false;
3466
3467 // Set of identifiers that might be local variable names is empty.
3468 pending_local_variables.clear ();
3469 }
3470
3471 bool
3472 is_keyword (const std::string& s)
3473 {
3474 // Parsing function names like "set.property_name" inside
3475 // classdef-style class definitions is simplified by handling the
3476 // "set" and "get" portions of the names using the same mechanism as
3477 // is used for keywords. However, they are not really keywords in
3478 // the language, so omit them from the list of possible keywords.
3479
3480 return (octave_kw_hash::in_word_set (s.c_str (), s.length ()) != 0
3481 && ! (s == "set" || s == "get"));
3482 }
3483
3484 DEFUN (iskeyword, args, ,
3485 "-*- texinfo -*-\n\
3486 @deftypefn {Built-in Function} {} iskeyword ()\n\
3487 @deftypefnx {Built-in Function} {} iskeyword (@var{name})\n\
3488 Return true if @var{name} is an Octave keyword. If @var{name}\n\
3489 is omitted, return a list of keywords.\n\
3490 @seealso{isvarname, exist}\n\
3491 @end deftypefn")
3492 {
3493 octave_value retval;
3494
3495 int argc = args.length () + 1;
3496
3497 string_vector argv = args.make_argv ("iskeyword");
3498
3499 if (error_state)
3500 return retval;
3501
3502 if (argc == 1)
3503 {
3504 // Neither set and get are keywords. See the note in the
3505 // is_keyword function for additional details.
3506
3507 string_vector lst (TOTAL_KEYWORDS);
3508
3509 int j = 0;
3510
3511 for (int i = 0; i < TOTAL_KEYWORDS; i++)
3512 {
3513 std::string tmp = wordlist[i].name;
3514
3515 if (! (tmp == "set" || tmp == "get"))
3516 lst[j++] = tmp;
3517 }
3518
3519 lst.resize (j);
3520
3521 retval = Cell (lst.sort ());
3522 }
3523 else if (argc == 2)
3524 {
3525 retval = is_keyword (argv[1]);
3526 }
3527 else
3528 print_usage ();
3529
3530 return retval;
3531 }
3532
3533 /*
3534
3535 %!assert (iskeyword ("for"))
3536 %!assert (iskeyword ("fort"), false)
3537 %!assert (iskeyword ("fft"), false)
3538
3539 */
3540
3541 void
3542 prep_lexer_for_script_file (void)
3543 {
3544 BEGIN (SCRIPT_FILE_BEGIN);
3545 }
3546
3547 void
3548 prep_lexer_for_function_file (void)
3549 {
3550 BEGIN (FUNCTION_FILE_BEGIN);
3551 }
3552
3553 static void
3554 maybe_warn_separator_insert (char sep)
3555 {
3556 std::string nm = curr_fcn_file_full_name;
3557
3558 if (nm.empty ())
3559 warning_with_id ("Octave:separator-insert",
3560 "potential auto-insertion of `%c' near line %d",
3561 sep, input_line_number);
3562 else
3563 warning_with_id ("Octave:separator-insert",
3564 "potential auto-insertion of `%c' near line %d of file %s",
3565 sep, input_line_number, nm.c_str ());
3566 }
3567
3568 static void
3569 gripe_single_quote_string (void)
3570 {
3571 std::string nm = curr_fcn_file_full_name;
3572
3573 if (nm.empty ())
3574 warning_with_id ("Octave:single-quote-string",
3575 "single quote delimited string near line %d",
3576 input_line_number);
3577 else
3578 warning_with_id ("Octave:single-quote-string",
3579 "single quote delimited string near line %d of file %s",
3580 input_line_number, nm.c_str ());
3581 }
3582
3583 static void
3584 gripe_matlab_incompatible (const std::string& msg)
3585 {
3586 std::string nm = curr_fcn_file_full_name;
3587
3588 if (nm.empty ())
3589 warning_with_id ("Octave:matlab-incompatible",
3590 "potential Matlab compatibility problem: %s",
3591 msg.c_str ());
3592 else
3593 warning_with_id ("Octave:matlab-incompatible",
3594 "potential Matlab compatibility problem: %s near line %d offile %s",
3595 msg.c_str (), input_line_number, nm.c_str ());
3596 }
3597
3598 static void
3599 maybe_gripe_matlab_incompatible_comment (char c)
3600 {
3601 if (c == '#')
3602 gripe_matlab_incompatible ("# used as comment character");
3603 }
3604
3605 static void
3606 gripe_matlab_incompatible_continuation (void)
3607 {
3608 gripe_matlab_incompatible ("\\ used as line continuation marker");
3609 }
3610
3611 static void
3612 gripe_matlab_incompatible_operator (const std::string& op)
3613 {
3614 std::string t = op;
3615 int n = t.length ();
3616 if (t[n-1] == '\n')
3617 t.resize (n-1);
3618 gripe_matlab_incompatible (t + " used as operator");
3619 }
3620
3621 static void
3622 display_token (int tok)
3623 {
3624 switch (tok)
3625 {
3626 case '=': std::cerr << "'='\n"; break;
3627 case ':': std::cerr << "':'\n"; break;
3628 case '-': std::cerr << "'-'\n"; break;
3629 case '+': std::cerr << "'+'\n"; break;
3630 case '*': std::cerr << "'*'\n"; break;
3631 case '/': std::cerr << "'/'\n"; break;
3632 case ADD_EQ: std::cerr << "ADD_EQ\n"; break;
3633 case SUB_EQ: std::cerr << "SUB_EQ\n"; break;
3634 case MUL_EQ: std::cerr << "MUL_EQ\n"; break;
3635 case DIV_EQ: std::cerr << "DIV_EQ\n"; break;
3636 case LEFTDIV_EQ: std::cerr << "LEFTDIV_EQ\n"; break;
3637 case POW_EQ: std::cerr << "POW_EQ\n"; break;
3638 case EMUL_EQ: std::cerr << "EMUL_EQ\n"; break;
3639 case EDIV_EQ: std::cerr << "EDIV_EQ\n"; break;
3640 case ELEFTDIV_EQ: std::cerr << "ELEFTDIV_EQ\n"; break;
3641 case EPOW_EQ: std::cerr << "EPOW_EQ\n"; break;
3642 case AND_EQ: std::cerr << "AND_EQ\n"; break;
3643 case OR_EQ: std::cerr << "OR_EQ\n"; break;
3644 case LSHIFT_EQ: std::cerr << "LSHIFT_EQ\n"; break;
3645 case RSHIFT_EQ: std::cerr << "RSHIFT_EQ\n"; break;
3646 case LSHIFT: std::cerr << "LSHIFT\n"; break;
3647 case RSHIFT: std::cerr << "RSHIFT\n"; break;
3648 case EXPR_AND_AND: std::cerr << "EXPR_AND_AND\n"; break;
3649 case EXPR_OR_OR: std::cerr << "EXPR_OR_OR\n"; break;
3650 case EXPR_AND: std::cerr << "EXPR_AND\n"; break;
3651 case EXPR_OR: std::cerr << "EXPR_OR\n"; break;
3652 case EXPR_NOT: std::cerr << "EXPR_NOT\n"; break;
3653 case EXPR_LT: std::cerr << "EXPR_LT\n"; break;
3654 case EXPR_LE: std::cerr << "EXPR_LE\n"; break;
3655 case EXPR_EQ: std::cerr << "EXPR_EQ\n"; break;
3656 case EXPR_NE: std::cerr << "EXPR_NE\n"; break;
3657 case EXPR_GE: std::cerr << "EXPR_GE\n"; break;
3658 case EXPR_GT: std::cerr << "EXPR_GT\n"; break;
3659 case LEFTDIV: std::cerr << "LEFTDIV\n"; break;
3660 case EMUL: std::cerr << "EMUL\n"; break;
3661 case EDIV: std::cerr << "EDIV\n"; break;
3662 case ELEFTDIV: std::cerr << "ELEFTDIV\n"; break;
3663 case EPLUS: std::cerr << "EPLUS\n"; break;
3664 case EMINUS: std::cerr << "EMINUS\n"; break;
3665 case QUOTE: std::cerr << "QUOTE\n"; break;
3666 case TRANSPOSE: std::cerr << "TRANSPOSE\n"; break;
3667 case PLUS_PLUS: std::cerr << "PLUS_PLUS\n"; break;
3668 case MINUS_MINUS: std::cerr << "MINUS_MINUS\n"; break;
3669 case POW: std::cerr << "POW\n"; break;
3670 case EPOW: std::cerr << "EPOW\n"; break;
3671
3672 case NUM:
3673 case IMAG_NUM:
3674 std::cerr << (tok == NUM ? "NUM" : "IMAG_NUM")
3675 << " [" << yylval.tok_val->number () << "]\n";
3676 break;
3677
3678 case STRUCT_ELT:
3679 std::cerr << "STRUCT_ELT [" << yylval.tok_val->text () << "]\n"; break;
3680
3681 case NAME:
3682 {
3683 symbol_table::symbol_record *sr = yylval.tok_val->sym_rec ();
3684 std::cerr << "NAME";
3685 if (sr)
3686 std::cerr << " [" << sr->name () << "]";
3687 std::cerr << "\n";
3688 }
3689 break;
3690
3691 case END: std::cerr << "END\n"; break;
3692
3693 case DQ_STRING:
3694 case SQ_STRING:
3695 std::cerr << (tok == DQ_STRING ? "DQ_STRING" : "SQ_STRING")
3696 << " [" << yylval.tok_val->text () << "]\n";
3697 break;
3698
3699 case FOR: std::cerr << "FOR\n"; break;
3700 case WHILE: std::cerr << "WHILE\n"; break;
3701 case DO: std::cerr << "DO\n"; break;
3702 case UNTIL: std::cerr << "UNTIL\n"; break;
3703 case IF: std::cerr << "IF\n"; break;
3704 case ELSEIF: std::cerr << "ELSEIF\n"; break;
3705 case ELSE: std::cerr << "ELSE\n"; break;
3706 case SWITCH: std::cerr << "SWITCH\n"; break;
3707 case CASE: std::cerr << "CASE\n"; break;
3708 case OTHERWISE: std::cerr << "OTHERWISE\n"; break;
3709 case BREAK: std::cerr << "BREAK\n"; break;
3710 case CONTINUE: std::cerr << "CONTINUE\n"; break;
3711 case FUNC_RET: std::cerr << "FUNC_RET\n"; break;
3712 case UNWIND: std::cerr << "UNWIND\n"; break;
3713 case CLEANUP: std::cerr << "CLEANUP\n"; break;
3714 case TRY: std::cerr << "TRY\n"; break;
3715 case CATCH: std::cerr << "CATCH\n"; break;
3716 case GLOBAL: std::cerr << "GLOBAL\n"; break;
3717 case PERSISTENT: std::cerr << "PERSISTENT\n"; break;
3718 case FCN_HANDLE: std::cerr << "FCN_HANDLE\n"; break;
3719 case END_OF_INPUT: std::cerr << "END_OF_INPUT\n\n"; break;
3720 case LEXICAL_ERROR: std::cerr << "LEXICAL_ERROR\n\n"; break;
3721 case FCN: std::cerr << "FCN\n"; break;
3722 case CLOSE_BRACE: std::cerr << "CLOSE_BRACE\n"; break;
3723 case SCRIPT_FILE: std::cerr << "SCRIPT_FILE\n"; break;
3724 case FUNCTION_FILE: std::cerr << "FUNCTION_FILE\n"; break;
3725 case SUPERCLASSREF: std::cerr << "SUPERCLASSREF\n"; break;
3726 case METAQUERY: std::cerr << "METAQUERY\n"; break;
3727 case GET: std::cerr << "GET\n"; break;
3728 case SET: std::cerr << "SET\n"; break;
3729 case PROPERTIES: std::cerr << "PROPERTIES\n"; break;
3730 case METHODS: std::cerr << "METHODS\n"; break;
3731 case EVENTS: std::cerr << "EVENTS\n"; break;
3732 case CLASSDEF: std::cerr << "CLASSDEF\n"; break;
3733 case '\n': std::cerr << "\\n\n"; break;
3734 case '\r': std::cerr << "\\r\n"; break;
3735 case '\t': std::cerr << "TAB\n"; break;
3736 default:
3737 {
3738 if (tok < 256)
3739 std::cerr << static_cast<char> (tok) << "\n";
3740 else
3741 std::cerr << "UNKNOWN(" << tok << ")\n";
3742 }
3743 break;
3744 }
3745 }
3746
3747 static void
3748 display_state (void)
3749 {
3750 std::cerr << "S: ";
3751
3752 switch (YY_START)
3753 {
3754 case INITIAL:
3755 std::cerr << "INITIAL" << std::endl;
3756 break;
3757
3758 case COMMAND_START:
3759 std::cerr << "COMMAND_START" << std::endl;
3760 break;
3761
3762 case MATRIX_START:
3763 std::cerr << "MATRIX_START" << std::endl;
3764 break;
3765
3766 case SCRIPT_FILE_BEGIN:
3767 std::cerr << "SCRIPT_FILE_BEGIN" << std::endl;
3768 break;
3769
3770 case FUNCTION_FILE_BEGIN:
3771 std::cerr << "FUNCTION_FILE_BEGIN" << std::endl;
3772 break;
3773
3774 default:
3775 std::cerr << "UNKNOWN START STATE!" << std::endl;
3776 break;
3777 }
3778 }
3779
3780 static void
3781 lexer_debug (const char *pattern, const char *text)
3782 {
3783 std::cerr << std::endl;
3784
3785 display_state ();
3786
3787 std::cerr << "P: " << pattern << std::endl;
3788 std::cerr << "T: " << text << std::endl;
3789 }
3790
3791 DEFUN (__display_tokens__, args, nargout,
3792 "-*- texinfo -*-\n\
3793 @deftypefn {Built-in Function} {} __display_tokens__ ()\n\
3794 Query or set the internal variable that determines whether Octave's\n\
3795 lexer displays tokens as they are read.\n\
3796 @end deftypefn")
3797 {
3798 return SET_INTERNAL_VARIABLE (display_tokens);
3799 }
3800
3801 DEFUN (__token_count__, , ,
3802 "-*- texinfo -*-\n\
3803 @deftypefn {Built-in Function} {} __token_count__ ()\n\
3804 Number of language tokens processed since Octave startup.\n\
3805 @end deftypefn")
3806 {
3807 return octave_value (Vtoken_count);
3808 }
3809
3810 DEFUN (__lexer_debug_flag__, args, nargout,
3811 "-*- texinfo -*-\n\
3812 @deftypefn {Built-in Function} {@var{old_val} =} __lexer_debug_flag__ (@var{new_val}))\n\
3813 Undocumented internal function.\n\
3814 @end deftypefn")
3815 {
3816 octave_value retval;
3817
3818 retval = set_internal_variable (lexer_debug_flag, args, nargout,
3819 "__lexer_debug_flag__");
3820
3821 return retval;
3822 }