Mercurial > octave-nkf
comparison src/lex.l @ 1001:641c05eaed01
[project @ 1994-12-23 06:17:30 by jwe]
author | jwe |
---|---|
date | Fri, 23 Dec 1994 06:17:30 +0000 |
parents | 18be848f10a9 |
children | dfe01093f657 |
comparison
equal
deleted
inserted
replaced
1000:de0df9547e08 | 1001:641c05eaed01 |
---|---|
110 static void grab_help_text (void); | 110 static void grab_help_text (void); |
111 static int match_any (char c, char *s); | 111 static int match_any (char c, char *s); |
112 static int next_token_is_bin_op (int spc_prev, char *yytext); | 112 static int next_token_is_bin_op (int spc_prev, char *yytext); |
113 static int next_token_is_postfix_unary_op (int spc_prev, char *yytext); | 113 static int next_token_is_postfix_unary_op (int spc_prev, char *yytext); |
114 static char *strip_trailing_whitespace (char *s); | 114 static char *strip_trailing_whitespace (char *s); |
115 static void eat_whitespace (void); | |
116 static void handle_number (char *yytext); | 115 static void handle_number (char *yytext); |
117 static int handle_string (char delim, int text_style = 0); | 116 static int handle_string (char delim, int text_style = 0); |
118 static int handle_close_brace (char *yytext); | 117 static int handle_close_brace (int spc_gobbled); |
119 static int handle_identifier (char *s, int next_tok_is_eq); | 118 static int handle_identifier (char *tok, int spc_gobbled); |
119 static int have_continuation (void); | |
120 static int have_ellipsis_continuation (void); | |
121 static int eat_whitespace (void); | |
122 static int eat_continuation (void); | |
120 | 123 |
121 %} | 124 %} |
122 | 125 |
123 D [0-9] | 126 D [0-9] |
124 S [ \t] | 127 S [ \t] |
201 // It's also a pain in the ass to decide whether to insert a comma | 204 // It's also a pain in the ass to decide whether to insert a comma |
202 // after seeing a ']' character... | 205 // after seeing a ']' character... |
203 %} | 206 %} |
204 | 207 |
205 <MATRIX>{SNL}*\]{S}* { | 208 <MATRIX>{SNL}*\]{S}* { |
206 return handle_close_brace (yytext); | 209 fixup_column_count (yytext); |
210 int c = yytext[yyleng-1]; | |
211 int cont_is_spc = eat_continuation (); | |
212 int spc_gobbled = (cont_is_spc || c == ' ' || c == '\t'); | |
213 return handle_close_brace (spc_gobbled); | |
207 } | 214 } |
208 | 215 |
209 %{ | 216 %{ |
210 // Commas are element separators in matrix constants. | 217 // Commas are element separators in matrix constants. |
211 %} | 218 %} |
232 TOK_RETURN (','); | 239 TOK_RETURN (','); |
233 } | 240 } |
234 } | 241 } |
235 | 242 |
236 %{ | 243 %{ |
237 // Semicolons are both handled as row seprators in matrix constants. | 244 // Semicolons are handled as row seprators in matrix constants. |
238 %} | 245 %} |
239 | 246 |
240 <MATRIX>{SNLCMT}*;{SNLCMT}* { | 247 <MATRIX>{SNLCMT}*;{SNLCMT}* { |
241 fixup_column_count (yytext); | 248 fixup_column_count (yytext); |
249 eat_whitespace (); | |
242 quote_is_transpose = 0; | 250 quote_is_transpose = 0; |
243 cant_be_identifier = 0; | 251 cant_be_identifier = 0; |
244 convert_spaces_to_comma = 1; | 252 convert_spaces_to_comma = 1; |
245 return ';'; | 253 return ';'; |
246 } | 254 } |
292 TOK_RETURN ('['); | 300 TOK_RETURN ('['); |
293 } | 301 } |
294 } | 302 } |
295 | 303 |
296 \] { | 304 \] { |
297 promptflag++; | |
298 | |
299 if (! nesting_level.empty ()) | 305 if (! nesting_level.empty ()) |
300 nesting_level.pop (); | 306 nesting_level.pop (); |
301 | 307 |
302 if (plotting && ! past_plot_range) | 308 if (plotting && ! past_plot_range) |
303 { | 309 { |
371 | 377 |
372 {IDENT}{S}* { | 378 {IDENT}{S}* { |
373 static char *tok = 0; | 379 static char *tok = 0; |
374 delete [] tok; | 380 delete [] tok; |
375 tok = strip_trailing_whitespace (yytext); | 381 tok = strip_trailing_whitespace (yytext); |
376 int c = yyinput (); | 382 current_input_column += yyleng; |
377 unput (c); | 383 int c = yytext[yyleng-1]; |
378 return handle_identifier (tok, (c == '=')); | 384 int cont_is_spc = eat_continuation (); |
385 int spc_gobbled = (cont_is_spc || c == ' ' || c == '\t'); | |
386 return handle_identifier (tok, spc_gobbled); | |
379 } | 387 } |
380 | 388 |
381 %{ | 389 %{ |
382 // A new line character. New line characters inside matrix constants | 390 // A new line character. New line characters inside matrix constants |
383 // are handled by the <MATRIX> start state code above. If closest | 391 // are handled by the <MATRIX> start state code above. If closest |
532 TOK_RETURN ('('); | 540 TOK_RETURN ('('); |
533 } | 541 } |
534 | 542 |
535 ")" { | 543 ")" { |
536 if (! nesting_level.empty ()) | 544 if (! nesting_level.empty ()) |
537 { | 545 nesting_level.pop (); |
538 nesting_level.pop (); | 546 |
539 promptflag++; | |
540 } | |
541 do_comma_insert_check (); | |
542 current_input_column++; | 547 current_input_column++; |
543 cant_be_identifier = 1; | 548 cant_be_identifier = 1; |
544 quote_is_transpose = 1; | 549 quote_is_transpose = 1; |
545 convert_spaces_to_comma = (! nesting_level.empty () | 550 convert_spaces_to_comma = (! nesting_level.empty () |
546 && nesting_level.top () == BRACE); | 551 && nesting_level.top () == BRACE); |
552 do_comma_insert_check (); | |
547 return ')'; | 553 return ')'; |
548 } | 554 } |
549 | 555 |
550 %{ | 556 %{ |
551 // We return everything else as single character tokens, which should | 557 // We return everything else as single character tokens, which should |
562 // that we insert a comma ahead of it. | 568 // that we insert a comma ahead of it. |
563 | 569 |
564 void | 570 void |
565 do_comma_insert_check (void) | 571 do_comma_insert_check (void) |
566 { | 572 { |
573 int spc_gobbled = eat_continuation (); | |
567 int c = yyinput (); | 574 int c = yyinput (); |
568 yyunput (c, yytext); | 575 yyunput (c, yytext); |
576 if (spc_gobbled) | |
577 yyunput (' ', yytext); | |
569 do_comma_insert = (braceflag && c == '['); | 578 do_comma_insert = (braceflag && c == '['); |
570 } | 579 } |
571 | 580 |
572 // Fix things up for errors or interrupts. The parser is never called | 581 // Fix things up for errors or interrupts. The parser is never called |
573 // recursively, so it is always safe to reinitialize its state before | 582 // recursively, so it is always safe to reinitialize its state before |
1068 token_stack.push (yylval.tok_val); | 1077 token_stack.push (yylval.tok_val); |
1069 return ALL_VA_ARGS; | 1078 return ALL_VA_ARGS; |
1070 } | 1079 } |
1071 | 1080 |
1072 if (end_found) | 1081 if (end_found) |
1073 { | 1082 return END; |
1074 if (! defining_func && ! looping) | |
1075 promptflag++; | |
1076 return END; | |
1077 } | |
1078 | 1083 |
1079 return 0; | 1084 return 0; |
1080 } | 1085 } |
1081 | 1086 |
1082 // Try to find an identifier. All binding to global or builtin | 1087 // Try to find an identifier. All binding to global or builtin |
1316 *t = '\0'; | 1321 *t = '\0'; |
1317 | 1322 |
1318 return retval; | 1323 return retval; |
1319 } | 1324 } |
1320 | 1325 |
1321 static void | 1326 // Discard whitespace, including comments and continuations. |
1327 // Return 1 if whitespace appeared in the input, 0 otherwise. | |
1328 | |
1329 static int | |
1322 eat_whitespace (void) | 1330 eat_whitespace (void) |
1323 { | 1331 { |
1332 int retval = 0; | |
1324 int in_comment = 0; | 1333 int in_comment = 0; |
1325 int c; | 1334 int c; |
1326 while ((c = yyinput ()) != EOF) | 1335 while ((c = yyinput ()) != EOF) |
1327 { | 1336 { |
1328 current_input_column++; | 1337 current_input_column++; |
1329 | 1338 |
1330 switch (c) | 1339 switch (c) |
1331 { | 1340 { |
1332 case ' ': | 1341 case ' ': |
1333 case '\t': | 1342 case '\t': |
1343 retval = 1; | |
1334 break; | 1344 break; |
1335 | 1345 |
1336 case '\n': | 1346 case '\n': |
1347 retval = 1; | |
1337 in_comment = 0; | 1348 in_comment = 0; |
1338 current_input_column = 0; | 1349 current_input_column = 0; |
1339 break; | 1350 break; |
1340 | 1351 |
1341 case '#': | 1352 case '#': |
1342 case '%': | 1353 case '%': |
1343 in_comment = 1; | 1354 in_comment = 1; |
1344 break; | 1355 break; |
1356 | |
1357 case '.': | |
1358 if (in_comment) | |
1359 break; | |
1360 else | |
1361 { | |
1362 if (have_ellipsis_continuation ()) | |
1363 break; | |
1364 else | |
1365 goto done; | |
1366 } | |
1367 | |
1368 case '\\': | |
1369 if (in_comment) | |
1370 break; | |
1371 else | |
1372 { | |
1373 if (have_continuation ()) | |
1374 break; | |
1375 else | |
1376 goto done; | |
1377 } | |
1345 | 1378 |
1346 default: | 1379 default: |
1347 if (in_comment) | 1380 if (in_comment) |
1348 break; | 1381 break; |
1349 else | 1382 else |
1351 } | 1384 } |
1352 } | 1385 } |
1353 | 1386 |
1354 done: | 1387 done: |
1355 yyunput (c, yytext); | 1388 yyunput (c, yytext); |
1356 return; | 1389 return retval; |
1357 } | 1390 } |
1358 | 1391 |
1359 static void | 1392 static void |
1360 handle_number (char *yytext) | 1393 handle_number (char *yytext) |
1361 { | 1394 { |
1381 current_input_column += yyleng; | 1414 current_input_column += yyleng; |
1382 | 1415 |
1383 do_comma_insert_check (); | 1416 do_comma_insert_check (); |
1384 } | 1417 } |
1385 | 1418 |
1419 // We have seen a backslash and need to find out if it should be | |
1420 // treated as a continuation character. If so, this eats it, up to | |
1421 // and including the new line character. | |
1422 // | |
1386 // Match whitespace only, followed by a comment character or newline. | 1423 // Match whitespace only, followed by a comment character or newline. |
1387 // Once a comment character is found, discard all input until newline. | 1424 // Once a comment character is found, discard all input until newline. |
1388 // If non-whitespace characters are found before comment | 1425 // If non-whitespace characters are found before comment |
1389 // characters, return 0. Otherwise, return 1. | 1426 // characters, return 0. Otherwise, return 1. |
1390 | 1427 |
1410 in_comment = 1; | 1447 in_comment = 1; |
1411 break; | 1448 break; |
1412 | 1449 |
1413 case '\n': | 1450 case '\n': |
1414 current_input_column = 0; | 1451 current_input_column = 0; |
1452 promptflag--; | |
1415 return 1; | 1453 return 1; |
1416 | 1454 |
1417 default: | 1455 default: |
1418 if (in_comment) | 1456 if (in_comment) |
1419 break; | 1457 break; |
1436 yyunput (c, yytext); | 1474 yyunput (c, yytext); |
1437 | 1475 |
1438 return 0; | 1476 return 0; |
1439 } | 1477 } |
1440 | 1478 |
1479 // We have seen a `.' and need to see if it is the start of a | |
1480 // continuation. If so, this eats it, up to and including the new | |
1481 // line character. | |
1482 | |
1441 static int | 1483 static int |
1442 have_ellipsis_continuation (void) | 1484 have_ellipsis_continuation (void) |
1443 { | 1485 { |
1444 char c1 = yyinput (); | 1486 char c1 = yyinput (); |
1445 if (c1 == '.') | 1487 if (c1 == '.') |
1457 yyunput (c1, yytext); | 1499 yyunput (c1, yytext); |
1458 | 1500 |
1459 return 0; | 1501 return 0; |
1460 } | 1502 } |
1461 | 1503 |
1504 // See if we have a continuation line. If so, eat it and the leading | |
1505 // whitespace on the next line. | |
1506 // Return 1 if whitespace appeared in the input, 0 otherwise. | |
1507 | |
1508 static int | |
1509 eat_continuation (void) | |
1510 { | |
1511 int retval = 0; | |
1512 int c = yyinput (); | |
1513 if ((c == '.' && have_ellipsis_continuation ()) | |
1514 || (c == '\\' && have_continuation ())) | |
1515 retval = eat_whitespace (); | |
1516 else | |
1517 yyunput (c, yytext); | |
1518 | |
1519 return retval; | |
1520 } | |
1521 | |
1462 static int | 1522 static int |
1463 handle_string (char delim, int text_style) | 1523 handle_string (char delim, int text_style) |
1464 { | 1524 { |
1465 ostrstream buf; | 1525 ostrstream buf; |
1466 | 1526 |
1471 { | 1531 { |
1472 current_input_column++; | 1532 current_input_column++; |
1473 | 1533 |
1474 if (c == '\\') | 1534 if (c == '\\') |
1475 { | 1535 { |
1476 if (have_continuation ()) | 1536 if (! have_continuation ()) |
1477 promptflag--; | |
1478 else | |
1479 buf << (char) c; | 1537 buf << (char) c; |
1480 goto next; | 1538 goto next; |
1481 } | 1539 } |
1482 else if (c == '.') | 1540 else if (c == '.') |
1483 { | 1541 { |
1484 if (have_ellipsis_continuation ()) | 1542 if (! have_ellipsis_continuation ()) |
1485 promptflag--; | |
1486 else | |
1487 buf << (char) c; | 1543 buf << (char) c; |
1488 goto next; | 1544 goto next; |
1489 } | 1545 } |
1490 else if (c == '\n') | 1546 else if (c == '\n') |
1491 { | 1547 { |
1546 | 1602 |
1547 return LEXICAL_ERROR; | 1603 return LEXICAL_ERROR; |
1548 } | 1604 } |
1549 | 1605 |
1550 static int | 1606 static int |
1551 handle_close_brace (char *yytext) | 1607 handle_close_brace (int spc_gobbled) |
1552 { | 1608 { |
1553 fixup_column_count (yytext); | |
1554 | |
1555 if (! nesting_level.empty ()) | 1609 if (! nesting_level.empty ()) |
1556 { | 1610 { |
1557 nesting_level.pop (); | 1611 nesting_level.pop (); |
1558 braceflag--; | 1612 braceflag--; |
1559 } | 1613 } |
1560 | 1614 |
1561 if (braceflag == 0) | 1615 if (braceflag == 0) |
1562 { | 1616 BEGIN 0; |
1563 if (! defining_func) | 1617 |
1564 promptflag++; | |
1565 BEGIN 0; | |
1566 } | |
1567 | |
1568 int c1 = yyinput (); | 1618 int c1 = yyinput (); |
1569 | |
1570 if (c1 == '=') | 1619 if (c1 == '=') |
1571 { | 1620 { |
1572 quote_is_transpose = 0; | 1621 quote_is_transpose = 0; |
1573 cant_be_identifier = 0; | 1622 cant_be_identifier = 0; |
1574 convert_spaces_to_comma = 1; | 1623 convert_spaces_to_comma = 1; |
1586 { | 1635 { |
1587 unput (c1); | 1636 unput (c1); |
1588 | 1637 |
1589 if (braceflag && user_pref.whitespace_in_literal_matrix != 2) | 1638 if (braceflag && user_pref.whitespace_in_literal_matrix != 2) |
1590 { | 1639 { |
1591 int c0 = yytext[yyleng-1]; | 1640 int bin_op = next_token_is_bin_op (spc_gobbled, yytext); |
1592 int spc_prev = (c0 == ' ' || c0 == '\t'); | |
1593 int bin_op = next_token_is_bin_op (spc_prev, yytext); | |
1594 int postfix_un_op = next_token_is_postfix_unary_op | 1641 int postfix_un_op = next_token_is_postfix_unary_op |
1595 (spc_prev, yytext); | 1642 (spc_gobbled, yytext); |
1596 | 1643 |
1597 int other_op = match_any (c1, ",;\n]"); | 1644 int other_op = match_any (c1, ",;\n]"); |
1598 | 1645 |
1599 if (! (postfix_un_op || bin_op || other_op | 1646 if (! (postfix_un_op || bin_op || other_op |
1600 || nesting_level.empty ()) | 1647 || nesting_level.empty ()) |
1615 | 1662 |
1616 // Figure out exactly what kind of token to return when we have seen | 1663 // Figure out exactly what kind of token to return when we have seen |
1617 // an identifier. Handles keywords. | 1664 // an identifier. Handles keywords. |
1618 | 1665 |
1619 static int | 1666 static int |
1620 handle_identifier (char *tok, int next_tok_is_eq) | 1667 handle_identifier (char *tok, int spc_gobbled) |
1621 { | 1668 { |
1622 // It is almost always an error for an identifier to be followed | 1669 // It is almost always an error for an identifier to be followed |
1623 // directly by another identifier. Special cases are handled below. | 1670 // directly by another identifier. Special cases are handled below. |
1624 | 1671 |
1625 cant_be_identifier = 1; | 1672 cant_be_identifier = 1; |
1640 int kw_token = is_keyword (tok); | 1687 int kw_token = is_keyword (tok); |
1641 if (kw_token) | 1688 if (kw_token) |
1642 { | 1689 { |
1643 if (kw_token == STYLE) | 1690 if (kw_token == STYLE) |
1644 { | 1691 { |
1645 current_input_column += yyleng; | |
1646 quote_is_transpose = 0; | 1692 quote_is_transpose = 0; |
1647 convert_spaces_to_comma = 1; | 1693 convert_spaces_to_comma = 1; |
1648 return kw_token; | 1694 return kw_token; |
1649 } | 1695 } |
1650 else | 1696 else |
1682 BEGIN HELP_FCN; | 1728 BEGIN HELP_FCN; |
1683 else if (strcmp (tok, "set") == 0) | 1729 else if (strcmp (tok, "set") == 0) |
1684 doing_set = 1; | 1730 doing_set = 1; |
1685 } | 1731 } |
1686 | 1732 |
1733 int c = yyinput (); | |
1734 yyunput (c, yytext); | |
1735 int next_tok_is_eq = (c == '='); | |
1736 | |
1687 // Make sure we put the return values of a function in the symbol | 1737 // Make sure we put the return values of a function in the symbol |
1688 // table that is local to the function. | 1738 // table that is local to the function. |
1689 | 1739 |
1690 if (next_tok_is_eq && defining_func && maybe_screwed) | 1740 if (next_tok_is_eq && defining_func && maybe_screwed) |
1691 curr_sym_tab = tmp_local_sym_tab; | 1741 curr_sym_tab = tmp_local_sym_tab; |
1700 | 1750 |
1701 // After seeing an identifer, it is ok to convert spaces to a comma | 1751 // After seeing an identifer, it is ok to convert spaces to a comma |
1702 // (if needed). | 1752 // (if needed). |
1703 | 1753 |
1704 convert_spaces_to_comma = 1; | 1754 convert_spaces_to_comma = 1; |
1705 current_input_column += yyleng; | |
1706 | 1755 |
1707 // If we are defining a function and we have not seen the parameter | 1756 // If we are defining a function and we have not seen the parameter |
1708 // list yet and the next token is `=', return a token that represents | 1757 // list yet and the next token is `=', return a token that represents |
1709 // the only return value for the function. For example, | 1758 // the only return value for the function. For example, |
1710 // | 1759 // |
1733 | 1782 |
1734 if (user_pref.whitespace_in_literal_matrix != 2 | 1783 if (user_pref.whitespace_in_literal_matrix != 2 |
1735 && ! nesting_level.empty () | 1784 && ! nesting_level.empty () |
1736 && nesting_level.top () == BRACE) | 1785 && nesting_level.top () == BRACE) |
1737 { | 1786 { |
1738 int c0 = yytext[yyleng-1]; | 1787 int bin_op = next_token_is_bin_op (spc_gobbled, yytext); |
1739 int spc_prev = (c0 == ' ' || c0 == '\t'); | 1788 |
1740 int bin_op = next_token_is_bin_op (spc_prev, yytext); | 1789 int postfix_un_op = next_token_is_postfix_unary_op (spc_gobbled, |
1741 | |
1742 int postfix_un_op = next_token_is_postfix_unary_op (spc_prev, | |
1743 yytext); | 1790 yytext); |
1744 | 1791 |
1745 int c1 = yyinput (); | 1792 int c1 = yyinput (); |
1746 unput (c1); | 1793 unput (c1); |
1747 int other_op = match_any (c1, ".,;\n]"); | 1794 int other_op = match_any (c1, ".,;\n]"); |
1748 int index_op = (c1 == '(' | 1795 int index_op = (c1 == '(' |
1749 && (user_pref.whitespace_in_literal_matrix == 0 | 1796 && (user_pref.whitespace_in_literal_matrix == 0 |
1750 || ! spc_prev)); | 1797 || ! spc_gobbled)); |
1751 | 1798 |
1752 if (! (postfix_un_op || bin_op || other_op || index_op)) | 1799 if (! (postfix_un_op || bin_op || other_op || index_op)) |
1753 unput (','); | 1800 unput (','); |
1754 } | 1801 } |
1755 | 1802 |