changeset 32444:f5dee2d8173e

New opcode for i:s and e Specialized opcode that deals with 'i','j','I','J' and 'e' when used as the imaginary unit or natural log base, to push faster. As with PUSH_PI. Being assigned to is used as a heuristic for if the id:s are used as variables or as mathematical constants. A fallback covers the cases where the guess is wrong or the user has overridden the builtin functions. * pt-bytecode-vm.cc: New opcodes * pt-bytecode-vm.h: New fields for checking for overrides * pt-bytecode-walk.cc: Emit new opcodes * pt-bytecode-walk.h: New field m_set_assigned_ids * pt-bytecode.h: New opcodes PUSH_I, PUSH_E
author Petter T. <petter.vilhelm@gmail.com>
date Fri, 27 Oct 2023 18:19:56 +0200
parents 0abebb3f41e4
children 4d6615bca5b4
files libinterp/parse-tree/pt-bytecode-vm.cc libinterp/parse-tree/pt-bytecode-vm.h libinterp/parse-tree/pt-bytecode-walk.cc libinterp/parse-tree/pt-bytecode-walk.h libinterp/parse-tree/pt-bytecode.h
diffstat 5 files changed, 232 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/libinterp/parse-tree/pt-bytecode-vm.cc	Fri Oct 27 18:19:50 2023 +0200
+++ b/libinterp/parse-tree/pt-bytecode-vm.cc	Fri Oct 27 18:19:56 2023 +0200
@@ -366,6 +366,8 @@
           CASE_START (FORCE_ASSIGN)               PSLOT() CASE_END ()
           CASE_START (PUSH_SLOT_NARGOUT1)         PSLOT() CASE_END ()
           CASE_START (PUSH_PI)                    PSLOT() CASE_END ()
+          CASE_START (PUSH_I)                     PSLOT() CASE_END ()
+          CASE_START (PUSH_E)                     PSLOT() CASE_END ()
           CASE_START (PUSH_SLOT_NARGOUT1_SPECIAL) PSLOT() CASE_END ()
           CASE_START (PUSH_SLOT_INDEXED)          PSLOT() CASE_END ()
           CASE_START (PUSH_FCN_HANDLE)            PSLOT() CASE_END ()
@@ -845,6 +847,14 @@
 static octave_value ov_dbl_1 {1.0};
 static octave_value ov_dbl_2 {2.0};
 
+static octave_value ov_i {Complex (0.0, 1.0)};
+#if defined (M_E)
+  static octave_value ov_e {M_E};
+#else
+  // Initialized in vm::vm()
+  static octave_value ov_e;
+#endif
+
 // TODO: Push non-nil and nil ov instead of true false to make some checks
 //       faster? Would they be faster?
 
@@ -1035,6 +1045,8 @@
       &&neq_cst,
       &&pow_cst_dbl,
       &&pow_cst,
+      &&push_i,
+      &&push_e,
     };
 
   if (OCTAVE_UNLIKELY (m_profiler_enabled))
@@ -1894,7 +1906,7 @@
 
     INSTR opcode = static_cast<INSTR> (*(ip - 2 + wide_opcode_offset));
     if (opcode == INSTR::PUSH_SLOT_NARGOUT1 ||
-        opcode == INSTR::PUSH_PI)
+        opcode == INSTR::PUSH_PI || opcode == INSTR::PUSH_I || opcode == INSTR::PUSH_E)
       nargout = 1;
     else if (opcode == INSTR::PUSH_SLOT_NARGOUT0)
       nargout = 0;
@@ -2225,6 +2237,84 @@
 }
 DISPATCH();
 
+push_i:
+// Specialization to push i (the imaginary unit) fast as a scalar.
+//
+// If the user use i as a variable opcode PUSH_SLOT_NARGOUT1
+// is used instead.
+{
+  int slot = arg0;
+
+  octave_value &ov = bsp[slot].ov;
+  // If the slot value is not a function cache we do a
+  // PUSH_SLOT_NARGOUT1 which will most likely put a
+  // function cache in the slot (unless the user has done a
+  // "i = 123;" or whatever).
+  if (OCTAVE_UNLIKELY (!ov.is_function_cache ()))
+    {
+      goto push_slot_nargout1;
+    }
+
+  // We need to check so the user has not defined some i function
+  octave_function *fcn;
+  try
+    {
+      octave_fcn_cache &cache = REP (octave_fcn_cache, ov);
+      fcn = cache.get_cached_fcn_if_fresh ();
+      if (! fcn)
+        fcn = cache.get_cached_fcn (static_cast<octave_value*> (nullptr), static_cast<octave_value*> (nullptr));
+    }
+  CATCH_EXECUTION_EXCEPTION // parse errors might throw in classdefs
+
+  if (OCTAVE_UNLIKELY (fcn != m_i_builtin_fn))
+    {
+      goto push_slot_nargout1;
+    }
+
+  // The user wanna push i ...
+  PUSH_OV (ov_i);
+}
+DISPATCH();
+
+push_e:
+// Specialization to push e fast as a scalar.
+//
+// If the user use 'e' as a variable opcode PUSH_SLOT_NARGOUT1
+// is used instead.
+{
+  int slot = arg0;
+
+  octave_value &ov = bsp[slot].ov;
+  // If the slot value is not a function cache we do a
+  // PUSH_SLOT_NARGOUT1 which will most likely put a
+  // function cache in the slot (unless the user has done a
+  // "e = 123;" or whatever).
+  if (OCTAVE_UNLIKELY (!ov.is_function_cache ()))
+    {
+      goto push_slot_nargout1;
+    }
+
+  // We need to check so the user has not defined some pi function
+  octave_function *fcn;
+  try
+    {
+      octave_fcn_cache &cache = REP (octave_fcn_cache, ov);
+      fcn = cache.get_cached_fcn_if_fresh ();
+      if (! fcn)
+        fcn = cache.get_cached_fcn (static_cast<octave_value*> (nullptr), static_cast<octave_value*> (nullptr));
+    }
+  CATCH_EXECUTION_EXCEPTION // parse errors might throw in classdefs
+
+  if (OCTAVE_UNLIKELY (fcn != m_e_builtin_fn))
+    {
+      goto push_slot_nargout1;
+    }
+
+  // The user wanna push e...
+  PUSH_OV (ov_e);
+}
+DISPATCH();
+
   {
     // TODO: Too much code. Should be broken out?
 
@@ -6668,10 +6758,16 @@
   m_fn_bool_not = m_ti->lookup_unary_op (octave_value::unary_op::op_not, m_bool_typeid);
 
   m_pi_builtin_fn = m_symtab->find_built_in_function ("pi").function_value ();
-  // If the platform has no M_PI we need to initialize ov_pi
+  m_i_builtin_fn = m_symtab->find_built_in_function ("i").function_value ();
+  m_e_builtin_fn = m_symtab->find_built_in_function ("e").function_value ();
+  // If the platform has no M_PI, M_E we need to initialize ov_pi and ov_e
 #if !defined (M_PI)
   ov_pi = 4.0 * atan (1.0);
 #endif
+#if !defined (M_E)
+  ov_e = exp (1.0);
+#endif
+
 }
 
 // If there are too many return values we can't just move them since the stacks will overlap so we
--- a/libinterp/parse-tree/pt-bytecode-vm.h	Fri Oct 27 18:19:50 2023 +0200
+++ b/libinterp/parse-tree/pt-bytecode-vm.h	Fri Oct 27 18:19:56 2023 +0200
@@ -501,6 +501,8 @@
   type_info::unary_op_fcn m_fn_bool_not = nullptr;
 
   octave_function * m_pi_builtin_fn = nullptr;
+  octave_function * m_i_builtin_fn = nullptr;
+  octave_function * m_e_builtin_fn = nullptr;
 
   static int constexpr m_scalar_typeid = 2;
   static int constexpr m_matrix_typeid = 4;
--- a/libinterp/parse-tree/pt-bytecode-walk.cc	Fri Oct 27 18:19:50 2023 +0200
+++ b/libinterp/parse-tree/pt-bytecode-walk.cc	Fri Oct 27 18:19:56 2023 +0200
@@ -48,6 +48,8 @@
       ERR("Internal VM compiler consistency check failed, " #cond);             \
   } while ((0))
 
+#define CHECK_NONNULL(ptr) if (!ptr) error ("Unexpected null %d", __LINE__)
+
 // Compiles an anonymous function.
 //
 // The compilation need to happen at runtime since the values of the variables are embedded into the bytecode
@@ -184,6 +186,104 @@
   }
 }
 
+// Class to walk the tree and collect most id:s that are assigned to.
+// E.g. any assign to index expression "pi(3) = 4;" is not collected.
+// This walker is used to try to figure out if any "i" identifier is used
+// as a variable or the imaginary unit.
+class find_assigned_ids_walker : tree_walker
+{
+public:
+  static std::set<std::string>
+  find_ids (octave_user_code &e)
+  {
+    find_assigned_ids_walker walker;
+    e.accept (walker);
+
+    return walker.m_set_of_ids;
+  }
+
+  std::set<std::string> m_set_of_ids; // ... that are assigned to
+
+  void visit_simple_assignment (tree_simple_assignment &t)
+  {
+    auto lhs = t.left_hand_side ();
+
+    if (lhs->is_identifier ())
+      m_set_of_ids.insert (lhs->name ());
+
+    t.right_hand_side ()->accept (*this);
+  }
+
+  void visit_multi_assignment (tree_multi_assignment &t)
+  {
+    octave::tree_argument_list *lhs = t.left_hand_side ();
+    if (!lhs)
+      return;
+
+    for (auto it = lhs->begin (); it != lhs->end (); it++)
+      {
+        if ((*it)->is_identifier ())
+          m_set_of_ids.insert ((*it)->name ());
+      }
+
+    t.right_hand_side ()->accept (*this);
+  }
+
+  void visit_simple_for_command (tree_simple_for_command& cmd)
+  {
+    tree_expression *lhs = cmd.left_hand_side ();
+    if (lhs->is_identifier ())
+      m_set_of_ids.insert (lhs->name ());
+
+    tree_statement_list *list = cmd.body ();
+    if (list)
+      list->accept (*this);
+  }
+
+  void visit_complex_for_command (tree_complex_for_command& cmd)
+  {
+    octave::tree_argument_list *lhs = cmd.left_hand_side ();
+
+    CHECK (lhs);
+    CHECK (lhs->size () == 2);
+
+    auto p = lhs->begin ();
+    tree_expression *val = *p++;
+    tree_expression *key = *p++;
+
+    CHECK (val); CHECK (key);
+
+    CHECK (val->is_identifier ());
+    CHECK (key->is_identifier ());
+
+    m_set_of_ids.insert (val->name ());
+    m_set_of_ids.insert (key->name ());
+
+    tree_statement_list *list = cmd.body ();
+    if (list)
+      list->accept (*this);
+  }
+
+  void visit_octave_user_function (octave_user_function& fcn)
+  {
+    octave::tree_parameter_list *paras = fcn.parameter_list ();
+    if (paras)
+      {
+        for (auto it = paras->begin (); it != paras->end (); it++)
+          {
+            CHECK_NONNULL (*it);
+            CHECK ((*it)->ident ());
+            m_set_of_ids.insert ((*it)->name ());
+          }
+      }
+
+    // Walk body
+    tree_statement_list *cmd_list = fcn.body ();
+    if (cmd_list)
+      cmd_list->accept (*this);
+  }
+};
+
 // Class to walk the tree and see if a index expression has
 // an end in it.
 //
@@ -463,8 +563,6 @@
 
 #define SLOT(name) get_slot (name)
 
-#define CHECK_NONNULL(ptr) if (!ptr) error ("Unexpected null %d", __LINE__)
-
 #define PUSH_ID_BEGIN_INDEXED(slot, idx, narg, is_obj) \
   m_indexed_id.push_back ({slot, idx, narg, is_obj})
 #define POP_ID_BEING_INDEXED() m_indexed_id.pop_back ()
@@ -2738,6 +2836,17 @@
         }
     }
 
+  // If the id:s "i", "j", "I","J" or "e" are used we try to figure out if they are used as variables or as
+  // the imaginary unit, to choose between PUSH_I, PUSH_E (specialized) or a generic push op-code.
+  bool ije_used = m_map_locals_to_slot.find ("i") != m_map_locals_to_slot.end ();
+  ije_used |= m_map_locals_to_slot.find ("j") != m_map_locals_to_slot.end ();
+  ije_used |= m_map_locals_to_slot.find ("I") != m_map_locals_to_slot.end ();
+  ije_used |= m_map_locals_to_slot.find ("J") != m_map_locals_to_slot.end ();
+  ije_used |= m_map_locals_to_slot.find ("e") != m_map_locals_to_slot.end ();
+
+  if (ije_used)
+    m_set_assigned_ids = find_assigned_ids_walker::find_ids (fcn);
+
   CHECK (! (m_is_anon && m_n_nested_fn));
 
   // Add code to initialize variables in anonymous functions that took their value from
@@ -4391,7 +4500,23 @@
           // Push the local at its slot number to the stack
           MAYBE_PUSH_WIDE_OPEXT (slot);
           if (name == "pi")
-            PUSH_CODE (INSTR::PUSH_PI);
+            PUSH_CODE (INSTR::PUSH_PI); // Specialization that pushes pi fast
+          else if (name == "i" || name == "I" || name == "j" || name == "J")
+            {
+              // If the id is assigned to anywhere in the function, we don't use the 
+              // specialization.
+              if (m_set_assigned_ids.find (name) == m_set_assigned_ids.end ())
+                PUSH_CODE (INSTR::PUSH_I); // Specialization that pushes imaginary unit fast
+              else
+                PUSH_CODE (INSTR::PUSH_SLOT_NARGOUT1);
+            }
+          else if (name == "e")
+            {
+              if (m_set_assigned_ids.find (name) == m_set_assigned_ids.end ())
+                PUSH_CODE (INSTR::PUSH_E); // Specialization that pushes e fast
+              else
+                PUSH_CODE (INSTR::PUSH_SLOT_NARGOUT1);
+            }
           else
             PUSH_CODE (INSTR::PUSH_SLOT_NARGOUT1);
           PUSH_SLOT (slot);
--- a/libinterp/parse-tree/pt-bytecode-walk.h	Fri Oct 27 18:19:50 2023 +0200
+++ b/libinterp/parse-tree/pt-bytecode-walk.h	Fri Oct 27 18:19:56 2023 +0200
@@ -168,6 +168,8 @@
 
     std::vector<nesting_statement> m_nesting_statement;
 
+    std::set<std::string> m_set_assigned_ids; // Only populated if i,j,I,J,e are ids
+
     // For "end" in indexing expression we need to know what variable is
     // being indexed.
     std::vector<id_being_indexed> m_indexed_id;
--- a/libinterp/parse-tree/pt-bytecode.h	Fri Oct 27 18:19:50 2023 +0200
+++ b/libinterp/parse-tree/pt-bytecode.h	Fri Oct 27 18:19:56 2023 +0200
@@ -216,6 +216,8 @@
   NEQ_CST,
   POW_CST_DBL,
   POW_CST,
+  PUSH_I,
+  PUSH_E,
 };
 
 enum class unwind_entry_type