comparison libinterp/parse-tree/lex.ll @ 27769:07ffed9878ad

store begin and end position in tokens * token.h, token.cc (token::m_beg_pos): Rename from m_pos. (token::m_end_pos): New member variable. (token::beg_pos, token::end_pos): New functions. (token::token): Accept beginning and ending positions for tokens as arguments. Change all uses but store the same position for both beginning and end in most cases. Subsequent changes will store the correct beginning and ending positions.
author John W. Eaton <jwe@octave.org>
date Mon, 02 Dec 2019 22:58:55 -0600
parents d6701f835496
children 7a06e352ac61
comparison
equal deleted inserted replaced
27768:d6701f835496 27769:07ffed9878ad
838 curr_lexer->m_looking_for_object_index = true; 838 curr_lexer->m_looking_for_object_index = true;
839 curr_lexer->m_at_beginning_of_statement = false; 839 curr_lexer->m_at_beginning_of_statement = false;
840 840
841 curr_lexer->push_token (new octave::token (DQ_STRING, 841 curr_lexer->push_token (new octave::token (DQ_STRING,
842 curr_lexer->m_string_text, 842 curr_lexer->m_string_text,
843 curr_lexer->m_beg_string)); 843 curr_lexer->m_beg_string,
844 curr_lexer->m_filepos));
844 845
845 curr_lexer->m_string_text = ""; 846 curr_lexer->m_string_text = "";
846 847
847 return curr_lexer->count_token_internal (DQ_STRING); 848 return curr_lexer->count_token_internal (DQ_STRING);
848 } 849 }
859 if (result > 0xff) 860 if (result > 0xff)
860 { 861 {
861 octave::token *tok 862 octave::token *tok
862 = new octave::token (LEXICAL_ERROR, 863 = new octave::token (LEXICAL_ERROR,
863 "invalid octal escape sequence in character string", 864 "invalid octal escape sequence in character string",
864 curr_lexer->m_filepos); 865 curr_lexer->m_filepos, curr_lexer->m_filepos);
865 866
866 curr_lexer->push_token (tok); 867 curr_lexer->push_token (tok);
867 868
868 return curr_lexer->count_token_internal (LEXICAL_ERROR); 869 return curr_lexer->count_token_internal (LEXICAL_ERROR);
869 } 870 }
1000 curr_lexer->lexer_debug ("<DQ_STRING_START>{NL}"); 1001 curr_lexer->lexer_debug ("<DQ_STRING_START>{NL}");
1001 1002
1002 octave::token *tok 1003 octave::token *tok
1003 = new octave::token (LEXICAL_ERROR, 1004 = new octave::token (LEXICAL_ERROR,
1004 "unterminated character string constant", 1005 "unterminated character string constant",
1005 curr_lexer->m_filepos); 1006 curr_lexer->m_filepos, curr_lexer->m_filepos);
1006 1007
1007 curr_lexer->push_token (tok); 1008 curr_lexer->push_token (tok);
1008 1009
1009 curr_lexer->m_filepos.next_line (); 1010 curr_lexer->m_filepos.next_line ();
1010 1011
1034 curr_lexer->m_looking_for_object_index = true; 1035 curr_lexer->m_looking_for_object_index = true;
1035 curr_lexer->m_at_beginning_of_statement = false; 1036 curr_lexer->m_at_beginning_of_statement = false;
1036 1037
1037 curr_lexer->push_token (new octave::token (SQ_STRING, 1038 curr_lexer->push_token (new octave::token (SQ_STRING,
1038 curr_lexer->m_string_text, 1039 curr_lexer->m_string_text,
1039 curr_lexer->m_beg_string)); 1040 curr_lexer->m_beg_string,
1041 curr_lexer->m_filepos));
1040 1042
1041 curr_lexer->m_string_text = ""; 1043 curr_lexer->m_string_text = "";
1042 1044
1043 return curr_lexer->count_token_internal (SQ_STRING); 1045 return curr_lexer->count_token_internal (SQ_STRING);
1044 } 1046 }
1055 curr_lexer->lexer_debug ("<SQ_STRING_START>{NL}"); 1057 curr_lexer->lexer_debug ("<SQ_STRING_START>{NL}");
1056 1058
1057 octave::token *tok 1059 octave::token *tok
1058 = new octave::token (LEXICAL_ERROR, 1060 = new octave::token (LEXICAL_ERROR,
1059 "unterminated character string constant", 1061 "unterminated character string constant",
1060 curr_lexer->m_filepos); 1062 curr_lexer->m_filepos, curr_lexer->m_filepos);
1061 1063
1062 curr_lexer->push_token (tok); 1064 curr_lexer->push_token (tok);
1063 1065
1064 curr_lexer->m_filepos.next_line (); 1066 curr_lexer->m_filepos.next_line ();
1065 1067
1366 octave::token *tok; 1368 octave::token *tok;
1367 1369
1368 if (kw_token) 1370 if (kw_token)
1369 tok = new octave::token (LEXICAL_ERROR, 1371 tok = new octave::token (LEXICAL_ERROR,
1370 "function handles may not refer to keywords", 1372 "function handles may not refer to keywords",
1373 curr_lexer->m_filepos,
1371 curr_lexer->m_filepos); 1374 curr_lexer->m_filepos);
1372 else 1375 else
1373 { 1376 {
1374 curr_lexer->m_looking_for_object_index = true; 1377 curr_lexer->m_looking_for_object_index = true;
1375 1378
1376 tok = new octave::token (FCN_HANDLE, ident, 1379 tok = new octave::token (FCN_HANDLE, ident,
1380 curr_lexer->m_filepos,
1377 curr_lexer->m_filepos); 1381 curr_lexer->m_filepos);
1378 } 1382 }
1379 1383
1380 curr_lexer->push_token (tok); 1384 curr_lexer->push_token (tok);
1381 1385
1414 else if (curr_lexer->m_nesting_level.is_bracket_or_brace ()) 1418 else if (curr_lexer->m_nesting_level.is_bracket_or_brace ())
1415 { 1419 {
1416 octave::token *tok 1420 octave::token *tok
1417 = new octave::token (LEXICAL_ERROR, 1421 = new octave::token (LEXICAL_ERROR,
1418 "unexpected internal lexer error", 1422 "unexpected internal lexer error",
1419 curr_lexer->m_filepos); 1423 curr_lexer->m_filepos, curr_lexer->m_filepos);
1420 1424
1421 curr_lexer->push_token (tok); 1425 curr_lexer->push_token (tok);
1422 1426
1423 curr_lexer->m_filepos.next_line (); 1427 curr_lexer->m_filepos.next_line ();
1424 1428
1804 << octave::undo_string_escape (static_cast<char> (c)) 1808 << octave::undo_string_escape (static_cast<char> (c))
1805 << "' (ASCII " << c << ")"; 1809 << "' (ASCII " << c << ")";
1806 1810
1807 octave::token *tok 1811 octave::token *tok
1808 = new octave::token (LEXICAL_ERROR, buf.str (), 1812 = new octave::token (LEXICAL_ERROR, buf.str (),
1809 curr_lexer->m_filepos); 1813 curr_lexer->m_filepos, curr_lexer->m_filepos);
1810 1814
1811 curr_lexer->push_token (tok); 1815 curr_lexer->push_token (tok);
1812 1816
1813 curr_lexer->m_filepos.increment_column (); 1817 curr_lexer->m_filepos.increment_column ();
1814 1818
2573 { 2577 {
2574 m_at_beginning_of_statement = previous_at_bos; 2578 m_at_beginning_of_statement = previous_at_bos;
2575 return 0; 2579 return 0;
2576 } 2580 }
2577 2581
2578 tok_val = new token (end_kw, token::simple_end, m_filepos); 2582 tok_val = new token (end_kw, token::simple_end, m_filepos,
2583 m_filepos);
2579 m_at_beginning_of_statement = true; 2584 m_at_beginning_of_statement = true;
2580 break; 2585 break;
2581 2586
2582 case end_try_catch_kw: 2587 case end_try_catch_kw:
2583 tok_val = new token (end_try_catch_kw, token::try_catch_end, m_filepos); 2588 tok_val = new token (end_try_catch_kw, token::try_catch_end,
2589 m_filepos, m_filepos);
2584 m_at_beginning_of_statement = true; 2590 m_at_beginning_of_statement = true;
2585 break; 2591 break;
2586 2592
2587 case end_unwind_protect_kw: 2593 case end_unwind_protect_kw:
2588 tok_val = new token (end_unwind_protect_kw, 2594 tok_val = new token (end_unwind_protect_kw,
2589 token::unwind_protect_end, m_filepos); 2595 token::unwind_protect_end, m_filepos,
2596 m_filepos);
2590 m_at_beginning_of_statement = true; 2597 m_at_beginning_of_statement = true;
2591 break; 2598 break;
2592 2599
2593 case endfor_kw: 2600 case endfor_kw:
2594 tok_val = new token (endfor_kw, token::for_end, m_filepos); 2601 tok_val = new token (endfor_kw, token::for_end, m_filepos,
2602 m_filepos);
2595 m_at_beginning_of_statement = true; 2603 m_at_beginning_of_statement = true;
2596 break; 2604 break;
2597 2605
2598 case endfunction_kw: 2606 case endfunction_kw:
2599 tok_val = new token (endfunction_kw, token::function_end, m_filepos); 2607 tok_val = new token (endfunction_kw, token::function_end,
2608 m_filepos, m_filepos);
2600 m_at_beginning_of_statement = true; 2609 m_at_beginning_of_statement = true;
2601 break; 2610 break;
2602 2611
2603 case endif_kw: 2612 case endif_kw:
2604 tok_val = new token (endif_kw, token::if_end, m_filepos); 2613 tok_val = new token (endif_kw, token::if_end, m_filepos, m_filepos);
2605 m_at_beginning_of_statement = true; 2614 m_at_beginning_of_statement = true;
2606 break; 2615 break;
2607 2616
2608 case endparfor_kw: 2617 case endparfor_kw:
2609 tok_val = new token (endparfor_kw, token::parfor_end, m_filepos); 2618 tok_val = new token (endparfor_kw, token::parfor_end, m_filepos,
2619 m_filepos);
2610 m_at_beginning_of_statement = true; 2620 m_at_beginning_of_statement = true;
2611 break; 2621 break;
2612 2622
2613 case endswitch_kw: 2623 case endswitch_kw:
2614 tok_val = new token (endswitch_kw, token::switch_end, m_filepos); 2624 tok_val = new token (endswitch_kw, token::switch_end, m_filepos,
2625 m_filepos);
2615 m_at_beginning_of_statement = true; 2626 m_at_beginning_of_statement = true;
2616 break; 2627 break;
2617 2628
2618 case endwhile_kw: 2629 case endwhile_kw:
2619 tok_val = new token (endwhile_kw, token::while_end, m_filepos); 2630 tok_val = new token (endwhile_kw, token::while_end, m_filepos,
2631 m_filepos);
2620 m_at_beginning_of_statement = true; 2632 m_at_beginning_of_statement = true;
2621 break; 2633 break;
2622 2634
2623 case endclassdef_kw: 2635 case endclassdef_kw:
2624 tok_val = new token (endclassdef_kw, token::classdef_end, m_filepos); 2636 tok_val = new token (endclassdef_kw, token::classdef_end,
2637 m_filepos, m_filepos);
2625 m_at_beginning_of_statement = true; 2638 m_at_beginning_of_statement = true;
2626 break; 2639 break;
2627 2640
2628 case endenumeration_kw: 2641 case endenumeration_kw:
2629 tok_val = new token (endenumeration_kw, token::enumeration_end, m_filepos); 2642 tok_val = new token (endenumeration_kw, token::enumeration_end,
2643 m_filepos, m_filepos);
2630 m_at_beginning_of_statement = true; 2644 m_at_beginning_of_statement = true;
2631 break; 2645 break;
2632 2646
2633 case endevents_kw: 2647 case endevents_kw:
2634 tok_val = new token (endevents_kw, token::events_end, m_filepos); 2648 tok_val = new token (endevents_kw, token::events_end, m_filepos,
2649 m_filepos);
2635 m_at_beginning_of_statement = true; 2650 m_at_beginning_of_statement = true;
2636 break; 2651 break;
2637 2652
2638 case endmethods_kw: 2653 case endmethods_kw:
2639 tok_val = new token (endmethods_kw, token::methods_end, m_filepos); 2654 tok_val = new token (endmethods_kw, token::methods_end, m_filepos,
2655 m_filepos);
2640 m_at_beginning_of_statement = true; 2656 m_at_beginning_of_statement = true;
2641 break; 2657 break;
2642 2658
2643 case endproperties_kw: 2659 case endproperties_kw:
2644 tok_val = new token (endproperties_kw, token::properties_end, m_filepos); 2660 tok_val = new token (endproperties_kw, token::properties_end,
2661 m_filepos, m_filepos);
2645 m_at_beginning_of_statement = true; 2662 m_at_beginning_of_statement = true;
2646 break; 2663 break;
2647 2664
2648 2665
2649 case for_kw: 2666 case for_kw:
2727 case magic_file_kw: 2744 case magic_file_kw:
2728 { 2745 {
2729 if ((m_reading_fcn_file || m_reading_script_file 2746 if ((m_reading_fcn_file || m_reading_script_file
2730 || m_reading_classdef_file) 2747 || m_reading_classdef_file)
2731 && ! m_fcn_file_full_name.empty ()) 2748 && ! m_fcn_file_full_name.empty ())
2732 tok_val = new token (magic_file_kw, m_fcn_file_full_name, m_filepos); 2749 tok_val = new token (magic_file_kw, m_fcn_file_full_name,
2750 m_filepos, m_filepos);
2733 else 2751 else
2734 tok_val = new token (magic_file_kw, "stdin", m_filepos); 2752 tok_val = new token (magic_file_kw, "stdin", m_filepos,
2753 m_filepos);
2735 } 2754 }
2736 break; 2755 break;
2737 2756
2738 case magic_line_kw: 2757 case magic_line_kw:
2739 { 2758 {
2740 int l = m_filepos.line (); 2759 int l = m_filepos.line ();
2741 tok_val = new token (magic_line_kw, static_cast<double> (l), 2760 tok_val = new token (magic_line_kw, static_cast<double> (l),
2742 "", m_filepos); 2761 "", m_filepos, m_filepos);
2743 } 2762 }
2744 break; 2763 break;
2745 2764
2746 default: 2765 default:
2747 panic_impossible (); 2766 panic_impossible ();
2748 } 2767 }
2749 2768
2750 if (! tok_val) 2769 if (! tok_val)
2751 tok_val = new token (kw->tok, true, m_filepos); 2770 tok_val = new token (kw->tok, true, m_filepos, m_filepos);
2752 2771
2753 push_token (tok_val); 2772 push_token (tok_val);
2754 2773
2755 return kw->tok; 2774 return kw->tok;
2756 } 2775 }
2872 assert (nread == 1); 2891 assert (nread == 1);
2873 2892
2874 m_looking_for_object_index = false; 2893 m_looking_for_object_index = false;
2875 m_at_beginning_of_statement = false; 2894 m_at_beginning_of_statement = false;
2876 2895
2877 push_token (new token (NUM, value, yytxt, m_filepos)); 2896 push_token (new token (NUM, value, yytxt, m_filepos, m_filepos));
2878 2897
2879 m_filepos.increment_column (flex_yyleng ()); 2898 m_filepos.increment_column (flex_yyleng ());
2880 } 2899 }
2881 2900
2882 void 2901 void
3016 if (kw_token) 3035 if (kw_token)
3017 { 3036 {
3018 token *tok 3037 token *tok
3019 = new token (LEXICAL_ERROR, 3038 = new token (LEXICAL_ERROR,
3020 "method, class, and package names may not be keywords", 3039 "method, class, and package names may not be keywords",
3021 m_filepos); 3040 m_filepos, m_filepos);
3022 3041
3023 push_token (tok); 3042 push_token (tok);
3024 3043
3025 return count_token_internal (LEXICAL_ERROR); 3044 return count_token_internal (LEXICAL_ERROR);
3026 } 3045 }
3027 3046
3028 push_token (new token (SUPERCLASSREF, meth, cls, 3047 push_token (new token (SUPERCLASSREF, meth, cls, m_filepos, m_filepos));
3029 m_filepos));
3030 3048
3031 m_filepos.increment_column (flex_yyleng ()); 3049 m_filepos.increment_column (flex_yyleng ());
3032 3050
3033 return SUPERCLASSREF; 3051 return SUPERCLASSREF;
3034 } 3052 }
3046 3064
3047 if (fq_identifier_contains_keyword (cls)) 3065 if (fq_identifier_contains_keyword (cls))
3048 { 3066 {
3049 token *tok = new token (LEXICAL_ERROR, 3067 token *tok = new token (LEXICAL_ERROR,
3050 "class and package names may not be keywords", 3068 "class and package names may not be keywords",
3051 m_filepos); 3069 m_filepos, m_filepos);
3052 push_token (tok); 3070 push_token (tok);
3053 3071
3054 return count_token_internal (LEXICAL_ERROR); 3072 return count_token_internal (LEXICAL_ERROR);
3055 } 3073 }
3056 3074
3057 push_token (new token (METAQUERY, cls, m_filepos)); 3075 push_token (new token (METAQUERY, cls, m_filepos, m_filepos));
3058 3076
3059 m_filepos.increment_column (flex_yyleng ()); 3077 m_filepos.increment_column (flex_yyleng ());
3060 3078
3061 return METAQUERY; 3079 return METAQUERY;
3062 } 3080 }
3072 if (fq_identifier_contains_keyword (txt)) 3090 if (fq_identifier_contains_keyword (txt))
3073 { 3091 {
3074 token *tok 3092 token *tok
3075 = new token (LEXICAL_ERROR, 3093 = new token (LEXICAL_ERROR,
3076 "function, method, class, and package names may not be keywords", 3094 "function, method, class, and package names may not be keywords",
3077 m_filepos); 3095 m_filepos, m_filepos);
3078 3096
3079 push_token (tok); 3097 push_token (tok);
3080 3098
3081 return count_token_internal (LEXICAL_ERROR); 3099 return count_token_internal (LEXICAL_ERROR);
3082 } 3100 }
3083 3101
3084 push_token (new token (FQ_IDENT, txt, m_filepos)); 3102 push_token (new token (FQ_IDENT, txt, m_filepos, m_filepos));
3085 3103
3086 m_filepos.increment_column (flex_yyleng ()); 3104 m_filepos.increment_column (flex_yyleng ());
3087 3105
3088 return FQ_IDENT; 3106 return FQ_IDENT;
3089 } 3107 }
3101 // keywords and other special names and return STRUCT_ELT, which is 3119 // keywords and other special names and return STRUCT_ELT, which is
3102 // a string that is also a valid identifier. 3120 // a string that is also a valid identifier.
3103 3121
3104 if (m_looking_at_indirect_ref) 3122 if (m_looking_at_indirect_ref)
3105 { 3123 {
3106 push_token (new token (STRUCT_ELT, ident, m_filepos)); 3124 push_token (new token (STRUCT_ELT, ident, m_filepos, m_filepos));
3107 3125
3108 m_looking_for_object_index = true; 3126 m_looking_for_object_index = true;
3109 3127
3110 m_filepos.increment_column (flex_yyleng ()); 3128 m_filepos.increment_column (flex_yyleng ());
3111 3129
3138 3156
3139 symbol_scope scope = m_symtab_context.curr_scope (); 3157 symbol_scope scope = m_symtab_context.curr_scope ();
3140 3158
3141 symbol_record sr = (scope ? scope.insert (ident) : symbol_record (ident)); 3159 symbol_record sr = (scope ? scope.insert (ident) : symbol_record (ident));
3142 3160
3143 token *tok = new token (NAME, sr, m_filepos); 3161 token *tok = new token (NAME, sr, m_filepos, m_filepos);
3144 3162
3145 // The following symbols are handled specially so that things like 3163 // The following symbols are handled specially so that things like
3146 // 3164 //
3147 // pi +1 3165 // pi +1
3148 // 3166 //
3572 base_lexer::handle_op_internal (int tok, bool bos, bool compat) 3590 base_lexer::handle_op_internal (int tok, bool bos, bool compat)
3573 { 3591 {
3574 if (! compat) 3592 if (! compat)
3575 warn_language_extension_operator (flex_yytext ()); 3593 warn_language_extension_operator (flex_yytext ());
3576 3594
3577 push_token (new token (tok, m_filepos)); 3595 push_token (new token (tok, m_filepos, m_filepos));
3578 3596
3579 m_filepos.increment_column (flex_yyleng ()); 3597 m_filepos.increment_column (flex_yyleng ());
3580 m_looking_for_object_index = false; 3598 m_looking_for_object_index = false;
3581 m_at_beginning_of_statement = bos; 3599 m_at_beginning_of_statement = bos;
3582 3600
3603 } 3621 }
3604 3622
3605 int 3623 int
3606 base_lexer::handle_token (const std::string& name, int tok) 3624 base_lexer::handle_token (const std::string& name, int tok)
3607 { 3625 {
3608 token *tok_val = new token (tok, name, m_filepos); 3626 token *tok_val = new token (tok, name, m_filepos, m_filepos);
3609 3627
3610 return handle_token (tok, tok_val); 3628 return handle_token (tok, tok_val);
3611 } 3629 }
3612 3630
3613 int 3631 int
3614 base_lexer::handle_token (int tok, token *tok_val) 3632 base_lexer::handle_token (int tok, token *tok_val)
3615 { 3633 {
3616 if (! tok_val) 3634 if (! tok_val)
3617 tok_val = new token (tok, m_filepos); 3635 tok_val = new token (tok, m_filepos, m_filepos);
3618 3636
3619 push_token (tok_val); 3637 push_token (tok_val);
3620 3638
3621 m_filepos.increment_column (flex_yyleng ()); 3639 m_filepos.increment_column (flex_yyleng ());
3622 3640
3624 } 3642 }
3625 3643
3626 int 3644 int
3627 base_lexer::count_token (int tok) 3645 base_lexer::count_token (int tok)
3628 { 3646 {
3629 token *tok_val = new token (tok, m_filepos); 3647 token *tok_val = new token (tok, m_filepos, m_filepos);
3630 3648
3631 push_token (tok_val); 3649 push_token (tok_val);
3632 3650
3633 return count_token_internal (tok); 3651 return count_token_internal (tok);
3634 } 3652 }