Mercurial > octave
comparison libinterp/parse-tree/lex.ll @ 27769:07ffed9878ad
store begin and end position in tokens
* token.h, token.cc (token::m_beg_pos): Rename from m_pos.
(token::m_end_pos): New member variable.
(token::beg_pos, token::end_pos): New functions.
(token::token): Accept beginning and ending positions for tokens as
arguments. Change all uses but store the same position for both
beginning and end in most cases. Subsequent changes will store the
correct beginning and ending positions.
author | John W. Eaton <jwe@octave.org> |
---|---|
date | Mon, 02 Dec 2019 22:58:55 -0600 |
parents | d6701f835496 |
children | 7a06e352ac61 |
comparison
equal
deleted
inserted
replaced
27768:d6701f835496 | 27769:07ffed9878ad |
---|---|
838 curr_lexer->m_looking_for_object_index = true; | 838 curr_lexer->m_looking_for_object_index = true; |
839 curr_lexer->m_at_beginning_of_statement = false; | 839 curr_lexer->m_at_beginning_of_statement = false; |
840 | 840 |
841 curr_lexer->push_token (new octave::token (DQ_STRING, | 841 curr_lexer->push_token (new octave::token (DQ_STRING, |
842 curr_lexer->m_string_text, | 842 curr_lexer->m_string_text, |
843 curr_lexer->m_beg_string)); | 843 curr_lexer->m_beg_string, |
844 curr_lexer->m_filepos)); | |
844 | 845 |
845 curr_lexer->m_string_text = ""; | 846 curr_lexer->m_string_text = ""; |
846 | 847 |
847 return curr_lexer->count_token_internal (DQ_STRING); | 848 return curr_lexer->count_token_internal (DQ_STRING); |
848 } | 849 } |
859 if (result > 0xff) | 860 if (result > 0xff) |
860 { | 861 { |
861 octave::token *tok | 862 octave::token *tok |
862 = new octave::token (LEXICAL_ERROR, | 863 = new octave::token (LEXICAL_ERROR, |
863 "invalid octal escape sequence in character string", | 864 "invalid octal escape sequence in character string", |
864 curr_lexer->m_filepos); | 865 curr_lexer->m_filepos, curr_lexer->m_filepos); |
865 | 866 |
866 curr_lexer->push_token (tok); | 867 curr_lexer->push_token (tok); |
867 | 868 |
868 return curr_lexer->count_token_internal (LEXICAL_ERROR); | 869 return curr_lexer->count_token_internal (LEXICAL_ERROR); |
869 } | 870 } |
1000 curr_lexer->lexer_debug ("<DQ_STRING_START>{NL}"); | 1001 curr_lexer->lexer_debug ("<DQ_STRING_START>{NL}"); |
1001 | 1002 |
1002 octave::token *tok | 1003 octave::token *tok |
1003 = new octave::token (LEXICAL_ERROR, | 1004 = new octave::token (LEXICAL_ERROR, |
1004 "unterminated character string constant", | 1005 "unterminated character string constant", |
1005 curr_lexer->m_filepos); | 1006 curr_lexer->m_filepos, curr_lexer->m_filepos); |
1006 | 1007 |
1007 curr_lexer->push_token (tok); | 1008 curr_lexer->push_token (tok); |
1008 | 1009 |
1009 curr_lexer->m_filepos.next_line (); | 1010 curr_lexer->m_filepos.next_line (); |
1010 | 1011 |
1034 curr_lexer->m_looking_for_object_index = true; | 1035 curr_lexer->m_looking_for_object_index = true; |
1035 curr_lexer->m_at_beginning_of_statement = false; | 1036 curr_lexer->m_at_beginning_of_statement = false; |
1036 | 1037 |
1037 curr_lexer->push_token (new octave::token (SQ_STRING, | 1038 curr_lexer->push_token (new octave::token (SQ_STRING, |
1038 curr_lexer->m_string_text, | 1039 curr_lexer->m_string_text, |
1039 curr_lexer->m_beg_string)); | 1040 curr_lexer->m_beg_string, |
1041 curr_lexer->m_filepos)); | |
1040 | 1042 |
1041 curr_lexer->m_string_text = ""; | 1043 curr_lexer->m_string_text = ""; |
1042 | 1044 |
1043 return curr_lexer->count_token_internal (SQ_STRING); | 1045 return curr_lexer->count_token_internal (SQ_STRING); |
1044 } | 1046 } |
1055 curr_lexer->lexer_debug ("<SQ_STRING_START>{NL}"); | 1057 curr_lexer->lexer_debug ("<SQ_STRING_START>{NL}"); |
1056 | 1058 |
1057 octave::token *tok | 1059 octave::token *tok |
1058 = new octave::token (LEXICAL_ERROR, | 1060 = new octave::token (LEXICAL_ERROR, |
1059 "unterminated character string constant", | 1061 "unterminated character string constant", |
1060 curr_lexer->m_filepos); | 1062 curr_lexer->m_filepos, curr_lexer->m_filepos); |
1061 | 1063 |
1062 curr_lexer->push_token (tok); | 1064 curr_lexer->push_token (tok); |
1063 | 1065 |
1064 curr_lexer->m_filepos.next_line (); | 1066 curr_lexer->m_filepos.next_line (); |
1065 | 1067 |
1366 octave::token *tok; | 1368 octave::token *tok; |
1367 | 1369 |
1368 if (kw_token) | 1370 if (kw_token) |
1369 tok = new octave::token (LEXICAL_ERROR, | 1371 tok = new octave::token (LEXICAL_ERROR, |
1370 "function handles may not refer to keywords", | 1372 "function handles may not refer to keywords", |
1373 curr_lexer->m_filepos, | |
1371 curr_lexer->m_filepos); | 1374 curr_lexer->m_filepos); |
1372 else | 1375 else |
1373 { | 1376 { |
1374 curr_lexer->m_looking_for_object_index = true; | 1377 curr_lexer->m_looking_for_object_index = true; |
1375 | 1378 |
1376 tok = new octave::token (FCN_HANDLE, ident, | 1379 tok = new octave::token (FCN_HANDLE, ident, |
1380 curr_lexer->m_filepos, | |
1377 curr_lexer->m_filepos); | 1381 curr_lexer->m_filepos); |
1378 } | 1382 } |
1379 | 1383 |
1380 curr_lexer->push_token (tok); | 1384 curr_lexer->push_token (tok); |
1381 | 1385 |
1414 else if (curr_lexer->m_nesting_level.is_bracket_or_brace ()) | 1418 else if (curr_lexer->m_nesting_level.is_bracket_or_brace ()) |
1415 { | 1419 { |
1416 octave::token *tok | 1420 octave::token *tok |
1417 = new octave::token (LEXICAL_ERROR, | 1421 = new octave::token (LEXICAL_ERROR, |
1418 "unexpected internal lexer error", | 1422 "unexpected internal lexer error", |
1419 curr_lexer->m_filepos); | 1423 curr_lexer->m_filepos, curr_lexer->m_filepos); |
1420 | 1424 |
1421 curr_lexer->push_token (tok); | 1425 curr_lexer->push_token (tok); |
1422 | 1426 |
1423 curr_lexer->m_filepos.next_line (); | 1427 curr_lexer->m_filepos.next_line (); |
1424 | 1428 |
1804 << octave::undo_string_escape (static_cast<char> (c)) | 1808 << octave::undo_string_escape (static_cast<char> (c)) |
1805 << "' (ASCII " << c << ")"; | 1809 << "' (ASCII " << c << ")"; |
1806 | 1810 |
1807 octave::token *tok | 1811 octave::token *tok |
1808 = new octave::token (LEXICAL_ERROR, buf.str (), | 1812 = new octave::token (LEXICAL_ERROR, buf.str (), |
1809 curr_lexer->m_filepos); | 1813 curr_lexer->m_filepos, curr_lexer->m_filepos); |
1810 | 1814 |
1811 curr_lexer->push_token (tok); | 1815 curr_lexer->push_token (tok); |
1812 | 1816 |
1813 curr_lexer->m_filepos.increment_column (); | 1817 curr_lexer->m_filepos.increment_column (); |
1814 | 1818 |
2573 { | 2577 { |
2574 m_at_beginning_of_statement = previous_at_bos; | 2578 m_at_beginning_of_statement = previous_at_bos; |
2575 return 0; | 2579 return 0; |
2576 } | 2580 } |
2577 | 2581 |
2578 tok_val = new token (end_kw, token::simple_end, m_filepos); | 2582 tok_val = new token (end_kw, token::simple_end, m_filepos, |
2583 m_filepos); | |
2579 m_at_beginning_of_statement = true; | 2584 m_at_beginning_of_statement = true; |
2580 break; | 2585 break; |
2581 | 2586 |
2582 case end_try_catch_kw: | 2587 case end_try_catch_kw: |
2583 tok_val = new token (end_try_catch_kw, token::try_catch_end, m_filepos); | 2588 tok_val = new token (end_try_catch_kw, token::try_catch_end, |
2589 m_filepos, m_filepos); | |
2584 m_at_beginning_of_statement = true; | 2590 m_at_beginning_of_statement = true; |
2585 break; | 2591 break; |
2586 | 2592 |
2587 case end_unwind_protect_kw: | 2593 case end_unwind_protect_kw: |
2588 tok_val = new token (end_unwind_protect_kw, | 2594 tok_val = new token (end_unwind_protect_kw, |
2589 token::unwind_protect_end, m_filepos); | 2595 token::unwind_protect_end, m_filepos, |
2596 m_filepos); | |
2590 m_at_beginning_of_statement = true; | 2597 m_at_beginning_of_statement = true; |
2591 break; | 2598 break; |
2592 | 2599 |
2593 case endfor_kw: | 2600 case endfor_kw: |
2594 tok_val = new token (endfor_kw, token::for_end, m_filepos); | 2601 tok_val = new token (endfor_kw, token::for_end, m_filepos, |
2602 m_filepos); | |
2595 m_at_beginning_of_statement = true; | 2603 m_at_beginning_of_statement = true; |
2596 break; | 2604 break; |
2597 | 2605 |
2598 case endfunction_kw: | 2606 case endfunction_kw: |
2599 tok_val = new token (endfunction_kw, token::function_end, m_filepos); | 2607 tok_val = new token (endfunction_kw, token::function_end, |
2608 m_filepos, m_filepos); | |
2600 m_at_beginning_of_statement = true; | 2609 m_at_beginning_of_statement = true; |
2601 break; | 2610 break; |
2602 | 2611 |
2603 case endif_kw: | 2612 case endif_kw: |
2604 tok_val = new token (endif_kw, token::if_end, m_filepos); | 2613 tok_val = new token (endif_kw, token::if_end, m_filepos, m_filepos); |
2605 m_at_beginning_of_statement = true; | 2614 m_at_beginning_of_statement = true; |
2606 break; | 2615 break; |
2607 | 2616 |
2608 case endparfor_kw: | 2617 case endparfor_kw: |
2609 tok_val = new token (endparfor_kw, token::parfor_end, m_filepos); | 2618 tok_val = new token (endparfor_kw, token::parfor_end, m_filepos, |
2619 m_filepos); | |
2610 m_at_beginning_of_statement = true; | 2620 m_at_beginning_of_statement = true; |
2611 break; | 2621 break; |
2612 | 2622 |
2613 case endswitch_kw: | 2623 case endswitch_kw: |
2614 tok_val = new token (endswitch_kw, token::switch_end, m_filepos); | 2624 tok_val = new token (endswitch_kw, token::switch_end, m_filepos, |
2625 m_filepos); | |
2615 m_at_beginning_of_statement = true; | 2626 m_at_beginning_of_statement = true; |
2616 break; | 2627 break; |
2617 | 2628 |
2618 case endwhile_kw: | 2629 case endwhile_kw: |
2619 tok_val = new token (endwhile_kw, token::while_end, m_filepos); | 2630 tok_val = new token (endwhile_kw, token::while_end, m_filepos, |
2631 m_filepos); | |
2620 m_at_beginning_of_statement = true; | 2632 m_at_beginning_of_statement = true; |
2621 break; | 2633 break; |
2622 | 2634 |
2623 case endclassdef_kw: | 2635 case endclassdef_kw: |
2624 tok_val = new token (endclassdef_kw, token::classdef_end, m_filepos); | 2636 tok_val = new token (endclassdef_kw, token::classdef_end, |
2637 m_filepos, m_filepos); | |
2625 m_at_beginning_of_statement = true; | 2638 m_at_beginning_of_statement = true; |
2626 break; | 2639 break; |
2627 | 2640 |
2628 case endenumeration_kw: | 2641 case endenumeration_kw: |
2629 tok_val = new token (endenumeration_kw, token::enumeration_end, m_filepos); | 2642 tok_val = new token (endenumeration_kw, token::enumeration_end, |
2643 m_filepos, m_filepos); | |
2630 m_at_beginning_of_statement = true; | 2644 m_at_beginning_of_statement = true; |
2631 break; | 2645 break; |
2632 | 2646 |
2633 case endevents_kw: | 2647 case endevents_kw: |
2634 tok_val = new token (endevents_kw, token::events_end, m_filepos); | 2648 tok_val = new token (endevents_kw, token::events_end, m_filepos, |
2649 m_filepos); | |
2635 m_at_beginning_of_statement = true; | 2650 m_at_beginning_of_statement = true; |
2636 break; | 2651 break; |
2637 | 2652 |
2638 case endmethods_kw: | 2653 case endmethods_kw: |
2639 tok_val = new token (endmethods_kw, token::methods_end, m_filepos); | 2654 tok_val = new token (endmethods_kw, token::methods_end, m_filepos, |
2655 m_filepos); | |
2640 m_at_beginning_of_statement = true; | 2656 m_at_beginning_of_statement = true; |
2641 break; | 2657 break; |
2642 | 2658 |
2643 case endproperties_kw: | 2659 case endproperties_kw: |
2644 tok_val = new token (endproperties_kw, token::properties_end, m_filepos); | 2660 tok_val = new token (endproperties_kw, token::properties_end, |
2661 m_filepos, m_filepos); | |
2645 m_at_beginning_of_statement = true; | 2662 m_at_beginning_of_statement = true; |
2646 break; | 2663 break; |
2647 | 2664 |
2648 | 2665 |
2649 case for_kw: | 2666 case for_kw: |
2727 case magic_file_kw: | 2744 case magic_file_kw: |
2728 { | 2745 { |
2729 if ((m_reading_fcn_file || m_reading_script_file | 2746 if ((m_reading_fcn_file || m_reading_script_file |
2730 || m_reading_classdef_file) | 2747 || m_reading_classdef_file) |
2731 && ! m_fcn_file_full_name.empty ()) | 2748 && ! m_fcn_file_full_name.empty ()) |
2732 tok_val = new token (magic_file_kw, m_fcn_file_full_name, m_filepos); | 2749 tok_val = new token (magic_file_kw, m_fcn_file_full_name, |
2750 m_filepos, m_filepos); | |
2733 else | 2751 else |
2734 tok_val = new token (magic_file_kw, "stdin", m_filepos); | 2752 tok_val = new token (magic_file_kw, "stdin", m_filepos, |
2753 m_filepos); | |
2735 } | 2754 } |
2736 break; | 2755 break; |
2737 | 2756 |
2738 case magic_line_kw: | 2757 case magic_line_kw: |
2739 { | 2758 { |
2740 int l = m_filepos.line (); | 2759 int l = m_filepos.line (); |
2741 tok_val = new token (magic_line_kw, static_cast<double> (l), | 2760 tok_val = new token (magic_line_kw, static_cast<double> (l), |
2742 "", m_filepos); | 2761 "", m_filepos, m_filepos); |
2743 } | 2762 } |
2744 break; | 2763 break; |
2745 | 2764 |
2746 default: | 2765 default: |
2747 panic_impossible (); | 2766 panic_impossible (); |
2748 } | 2767 } |
2749 | 2768 |
2750 if (! tok_val) | 2769 if (! tok_val) |
2751 tok_val = new token (kw->tok, true, m_filepos); | 2770 tok_val = new token (kw->tok, true, m_filepos, m_filepos); |
2752 | 2771 |
2753 push_token (tok_val); | 2772 push_token (tok_val); |
2754 | 2773 |
2755 return kw->tok; | 2774 return kw->tok; |
2756 } | 2775 } |
2872 assert (nread == 1); | 2891 assert (nread == 1); |
2873 | 2892 |
2874 m_looking_for_object_index = false; | 2893 m_looking_for_object_index = false; |
2875 m_at_beginning_of_statement = false; | 2894 m_at_beginning_of_statement = false; |
2876 | 2895 |
2877 push_token (new token (NUM, value, yytxt, m_filepos)); | 2896 push_token (new token (NUM, value, yytxt, m_filepos, m_filepos)); |
2878 | 2897 |
2879 m_filepos.increment_column (flex_yyleng ()); | 2898 m_filepos.increment_column (flex_yyleng ()); |
2880 } | 2899 } |
2881 | 2900 |
2882 void | 2901 void |
3016 if (kw_token) | 3035 if (kw_token) |
3017 { | 3036 { |
3018 token *tok | 3037 token *tok |
3019 = new token (LEXICAL_ERROR, | 3038 = new token (LEXICAL_ERROR, |
3020 "method, class, and package names may not be keywords", | 3039 "method, class, and package names may not be keywords", |
3021 m_filepos); | 3040 m_filepos, m_filepos); |
3022 | 3041 |
3023 push_token (tok); | 3042 push_token (tok); |
3024 | 3043 |
3025 return count_token_internal (LEXICAL_ERROR); | 3044 return count_token_internal (LEXICAL_ERROR); |
3026 } | 3045 } |
3027 | 3046 |
3028 push_token (new token (SUPERCLASSREF, meth, cls, | 3047 push_token (new token (SUPERCLASSREF, meth, cls, m_filepos, m_filepos)); |
3029 m_filepos)); | |
3030 | 3048 |
3031 m_filepos.increment_column (flex_yyleng ()); | 3049 m_filepos.increment_column (flex_yyleng ()); |
3032 | 3050 |
3033 return SUPERCLASSREF; | 3051 return SUPERCLASSREF; |
3034 } | 3052 } |
3046 | 3064 |
3047 if (fq_identifier_contains_keyword (cls)) | 3065 if (fq_identifier_contains_keyword (cls)) |
3048 { | 3066 { |
3049 token *tok = new token (LEXICAL_ERROR, | 3067 token *tok = new token (LEXICAL_ERROR, |
3050 "class and package names may not be keywords", | 3068 "class and package names may not be keywords", |
3051 m_filepos); | 3069 m_filepos, m_filepos); |
3052 push_token (tok); | 3070 push_token (tok); |
3053 | 3071 |
3054 return count_token_internal (LEXICAL_ERROR); | 3072 return count_token_internal (LEXICAL_ERROR); |
3055 } | 3073 } |
3056 | 3074 |
3057 push_token (new token (METAQUERY, cls, m_filepos)); | 3075 push_token (new token (METAQUERY, cls, m_filepos, m_filepos)); |
3058 | 3076 |
3059 m_filepos.increment_column (flex_yyleng ()); | 3077 m_filepos.increment_column (flex_yyleng ()); |
3060 | 3078 |
3061 return METAQUERY; | 3079 return METAQUERY; |
3062 } | 3080 } |
3072 if (fq_identifier_contains_keyword (txt)) | 3090 if (fq_identifier_contains_keyword (txt)) |
3073 { | 3091 { |
3074 token *tok | 3092 token *tok |
3075 = new token (LEXICAL_ERROR, | 3093 = new token (LEXICAL_ERROR, |
3076 "function, method, class, and package names may not be keywords", | 3094 "function, method, class, and package names may not be keywords", |
3077 m_filepos); | 3095 m_filepos, m_filepos); |
3078 | 3096 |
3079 push_token (tok); | 3097 push_token (tok); |
3080 | 3098 |
3081 return count_token_internal (LEXICAL_ERROR); | 3099 return count_token_internal (LEXICAL_ERROR); |
3082 } | 3100 } |
3083 | 3101 |
3084 push_token (new token (FQ_IDENT, txt, m_filepos)); | 3102 push_token (new token (FQ_IDENT, txt, m_filepos, m_filepos)); |
3085 | 3103 |
3086 m_filepos.increment_column (flex_yyleng ()); | 3104 m_filepos.increment_column (flex_yyleng ()); |
3087 | 3105 |
3088 return FQ_IDENT; | 3106 return FQ_IDENT; |
3089 } | 3107 } |
3101 // keywords and other special names and return STRUCT_ELT, which is | 3119 // keywords and other special names and return STRUCT_ELT, which is |
3102 // a string that is also a valid identifier. | 3120 // a string that is also a valid identifier. |
3103 | 3121 |
3104 if (m_looking_at_indirect_ref) | 3122 if (m_looking_at_indirect_ref) |
3105 { | 3123 { |
3106 push_token (new token (STRUCT_ELT, ident, m_filepos)); | 3124 push_token (new token (STRUCT_ELT, ident, m_filepos, m_filepos)); |
3107 | 3125 |
3108 m_looking_for_object_index = true; | 3126 m_looking_for_object_index = true; |
3109 | 3127 |
3110 m_filepos.increment_column (flex_yyleng ()); | 3128 m_filepos.increment_column (flex_yyleng ()); |
3111 | 3129 |
3138 | 3156 |
3139 symbol_scope scope = m_symtab_context.curr_scope (); | 3157 symbol_scope scope = m_symtab_context.curr_scope (); |
3140 | 3158 |
3141 symbol_record sr = (scope ? scope.insert (ident) : symbol_record (ident)); | 3159 symbol_record sr = (scope ? scope.insert (ident) : symbol_record (ident)); |
3142 | 3160 |
3143 token *tok = new token (NAME, sr, m_filepos); | 3161 token *tok = new token (NAME, sr, m_filepos, m_filepos); |
3144 | 3162 |
3145 // The following symbols are handled specially so that things like | 3163 // The following symbols are handled specially so that things like |
3146 // | 3164 // |
3147 // pi +1 | 3165 // pi +1 |
3148 // | 3166 // |
3572 base_lexer::handle_op_internal (int tok, bool bos, bool compat) | 3590 base_lexer::handle_op_internal (int tok, bool bos, bool compat) |
3573 { | 3591 { |
3574 if (! compat) | 3592 if (! compat) |
3575 warn_language_extension_operator (flex_yytext ()); | 3593 warn_language_extension_operator (flex_yytext ()); |
3576 | 3594 |
3577 push_token (new token (tok, m_filepos)); | 3595 push_token (new token (tok, m_filepos, m_filepos)); |
3578 | 3596 |
3579 m_filepos.increment_column (flex_yyleng ()); | 3597 m_filepos.increment_column (flex_yyleng ()); |
3580 m_looking_for_object_index = false; | 3598 m_looking_for_object_index = false; |
3581 m_at_beginning_of_statement = bos; | 3599 m_at_beginning_of_statement = bos; |
3582 | 3600 |
3603 } | 3621 } |
3604 | 3622 |
3605 int | 3623 int |
3606 base_lexer::handle_token (const std::string& name, int tok) | 3624 base_lexer::handle_token (const std::string& name, int tok) |
3607 { | 3625 { |
3608 token *tok_val = new token (tok, name, m_filepos); | 3626 token *tok_val = new token (tok, name, m_filepos, m_filepos); |
3609 | 3627 |
3610 return handle_token (tok, tok_val); | 3628 return handle_token (tok, tok_val); |
3611 } | 3629 } |
3612 | 3630 |
3613 int | 3631 int |
3614 base_lexer::handle_token (int tok, token *tok_val) | 3632 base_lexer::handle_token (int tok, token *tok_val) |
3615 { | 3633 { |
3616 if (! tok_val) | 3634 if (! tok_val) |
3617 tok_val = new token (tok, m_filepos); | 3635 tok_val = new token (tok, m_filepos, m_filepos); |
3618 | 3636 |
3619 push_token (tok_val); | 3637 push_token (tok_val); |
3620 | 3638 |
3621 m_filepos.increment_column (flex_yyleng ()); | 3639 m_filepos.increment_column (flex_yyleng ()); |
3622 | 3640 |
3624 } | 3642 } |
3625 | 3643 |
3626 int | 3644 int |
3627 base_lexer::count_token (int tok) | 3645 base_lexer::count_token (int tok) |
3628 { | 3646 { |
3629 token *tok_val = new token (tok, m_filepos); | 3647 token *tok_val = new token (tok, m_filepos, m_filepos); |
3630 | 3648 |
3631 push_token (tok_val); | 3649 push_token (tok_val); |
3632 | 3650 |
3633 return count_token_internal (tok); | 3651 return count_token_internal (tok); |
3634 } | 3652 } |