changeset 14935:5801e031a3b5

Place releases after last use and generalize dom visiting
author Max Brister <max@2bass.com>
date Mon, 04 Jun 2012 13:10:44 -0500
parents 4ec5f49cdfe4
children 32deb562ae77
files src/pt-jit.cc src/pt-jit.h
diffstat 2 files changed, 483 insertions(+), 181 deletions(-) [+]
line wrap: on
line diff
--- a/src/pt-jit.cc	Sun Jun 03 16:30:21 2012 -0500
+++ b/src/pt-jit.cc	Mon Jun 04 13:10:44 2012 -0500
@@ -339,30 +339,30 @@
   fn = create_function ("octave_jit_grab_any", any, any);
                         
   engine->addGlobalMapping (fn, reinterpret_cast<void*>(&octave_jit_grab_any));
-  grab_fn.add_overload (fn, false, any, any);
+  grab_fn.add_overload (fn, false, false, any, any);
   grab_fn.stash_name ("grab");
 
   // grab scalar
   fn = create_identity (scalar);
-  grab_fn.add_overload (fn, false, scalar, scalar);
+  grab_fn.add_overload (fn, false, false, scalar, scalar);
 
   // grab index
   fn = create_identity (index);
-  grab_fn.add_overload (fn, false, index, index);
+  grab_fn.add_overload (fn, false, false, index, index);
 
   // release any
   fn = create_function ("octave_jit_release_any", void_t, any->to_llvm ());
   engine->addGlobalMapping (fn, reinterpret_cast<void*>(&octave_jit_release_any));
-  release_fn.add_overload (fn, false, 0, any);
+  release_fn.add_overload (fn, false, false, 0, any);
   release_fn.stash_name ("release");
 
   // release scalar
   fn = create_identity (scalar);
-  release_fn.add_overload (fn, false, 0, scalar);
+  release_fn.add_overload (fn, false, false, 0, scalar);
 
   // release index
   fn = create_identity (index);
-  release_fn.add_overload (fn, false, 0, index);
+  release_fn.add_overload (fn, false, false, 0, index);
 
   // now for binary scalar operations
   // FIXME: Finish all operations
@@ -401,7 +401,7 @@
     builder.CreateRet (zero);
   }
   llvm::verifyFunction (*fn);
-  for_init_fn.add_overload (fn, false, index, range);
+  for_init_fn.add_overload (fn, false, false, index, range);
 
   // bounds check for for loop
   for_check_fn.stash_name ("for_check");
@@ -417,7 +417,7 @@
     builder.CreateRet (ret);
   }
   llvm::verifyFunction (*fn);
-  for_check_fn.add_overload (fn, false, boolean, range, index);
+  for_check_fn.add_overload (fn, false, false, boolean, range, index);
 
   // index variabe for for loop
   for_index_fn.stash_name ("for_index");
@@ -437,7 +437,7 @@
     builder.CreateRet (ret);
   }
   llvm::verifyFunction (*fn);
-  for_index_fn.add_overload (fn, false, scalar, range, index);
+  for_index_fn.add_overload (fn, false, false, scalar, range, index);
 
   // logically true
   // FIXME: Check for NaN
@@ -450,14 +450,14 @@
     builder.CreateRet (ret);
   }
   llvm::verifyFunction (*fn);
-  logically_true.add_overload (fn, true, boolean, scalar);
+  logically_true.add_overload (fn, true, false, boolean, scalar);
 
   fn = create_function ("octave_logically_true_bool", boolean, boolean);
   body = llvm::BasicBlock::Create (context, "body", fn);
   builder.SetInsertPoint (body);
   builder.CreateRet (fn->arg_begin ());
   llvm::verifyFunction (*fn);
-  logically_true.add_overload (fn, false, boolean, boolean);
+  logically_true.add_overload (fn, false, false, boolean, boolean);
   logically_true.stash_name ("logically_true");
 
   casts[any->type_id ()].stash_name ("(any)");
@@ -466,20 +466,20 @@
   // cast any <- scalar
   fn = create_function ("octave_jit_cast_any_scalar", any, scalar);
   engine->addGlobalMapping (fn, reinterpret_cast<void*> (&octave_jit_cast_any_scalar));
-  casts[any->type_id ()].add_overload (fn, false, any, scalar);
+  casts[any->type_id ()].add_overload (fn, false, false, any, scalar);
 
   // cast scalar <- any
   fn = create_function ("octave_jit_cast_scalar_any", scalar, any);
   engine->addGlobalMapping (fn, reinterpret_cast<void*> (&octave_jit_cast_scalar_any));
-  casts[scalar->type_id ()].add_overload (fn, false, scalar, any);
+  casts[scalar->type_id ()].add_overload (fn, false, false, scalar, any);
 
   // cast any <- any
   fn = create_identity (any);
-  casts[any->type_id ()].add_overload (fn, false, any, any);
+  casts[any->type_id ()].add_overload (fn, false, false, any, any);
 
   // cast scalar <- scalar
   fn = create_identity (scalar);
-  casts[scalar->type_id ()].add_overload (fn, false, scalar, scalar);
+  casts[scalar->type_id ()].add_overload (fn, false, false, scalar, scalar);
 }
 
 void
@@ -494,7 +494,7 @@
                                         ty->to_llvm ());
   engine->addGlobalMapping (fn, call);
 
-  jit_function::overload ol (fn, false, 0, string, ty);
+  jit_function::overload ol (fn, false, true, 0, string, ty);
   print_fn.add_overload (ol);
 }
 
@@ -517,7 +517,7 @@
   builder.CreateRet (ret);
   llvm::verifyFunction (*fn);
 
-  jit_function::overload ol(fn, false, ty, ty, ty);
+  jit_function::overload ol(fn, false, false, ty, ty, ty);
   binary_ops[op].add_overload (ol);
 }
 
@@ -539,7 +539,7 @@
   builder.CreateRet (ret);
   llvm::verifyFunction (*fn);
 
-  jit_function::overload ol (fn, false, boolean, ty, ty);
+  jit_function::overload ol (fn, false, false, boolean, ty, ty);
   binary_ops[op].add_overload (ol);
 }
 
@@ -561,7 +561,7 @@
   builder.CreateRet (ret);
   llvm::verifyFunction (*fn);
 
-  jit_function::overload ol (fn, false, boolean, ty, ty);
+  jit_function::overload ol (fn, false, false, boolean, ty, ty);
   binary_ops[op].add_overload (ol);
 }
 
@@ -666,6 +666,13 @@
 
 // -------------------- jit_instruction --------------------
 void
+jit_instruction::remove (void)
+{
+  if (mparent)
+    mparent->remove (mlocation);
+}
+
+void
 jit_instruction::push_variable (void)
 {
   if (tag ())
@@ -715,23 +722,40 @@
 jit_block::prepend (jit_instruction *instr)
 {
   instructions.push_front (instr);
-  instr->stash_parent (this);
+  instr->stash_parent (this, instructions.begin ());
   return instr;
 }
 
 jit_instruction *
+jit_block::prepend_after_phi (jit_instruction *instr)
+{
+  // FIXME: Make this O(1)
+  for (iterator iter = begin (); iter != end (); ++iter)
+    {
+      jit_instruction *temp = *iter;
+      if (! temp->is_phi ())
+        {
+          insert_before (iter, instr);
+          return instr;
+        }
+    }
+
+  return append (instr);
+}
+
+jit_instruction *
 jit_block::append (jit_instruction *instr)
 {
   instructions.push_back (instr);
-  instr->stash_parent (this);
+  instr->stash_parent (this, --instructions.end ());
   return instr;
 }
 
 jit_instruction *
 jit_block::insert_before (iterator loc, jit_instruction *instr)
 {
-  instructions.insert (loc, instr);
-  instr->stash_parent (this);
+  iterator iloc = instructions.insert (loc, instr);
+  instr->stash_parent (this, iloc);
   return instr;
 }
 
@@ -739,8 +763,8 @@
 jit_block::insert_after (iterator loc, jit_instruction *instr)
 {
   ++loc;
-  instructions.insert (loc, instr);
-  instr->stash_parent (this);
+  iterator iloc = instructions.insert (loc, instr);
+  instr->stash_parent (this, iloc);
   return instr;
 }
 
@@ -751,7 +775,7 @@
     return 0;
 
   jit_instruction *last = instructions.back ();
-  return dynamic_cast<jit_terminator *> (last);
+  return last->to_terminator ();
 }
 
 jit_block *
@@ -925,61 +949,8 @@
 }
 
 void
-jit_block::finish_phi (jit_block *apred)
-{
-  size_t pred_idx = pred_index (apred);
-  for (iterator iter = begin (); iter != end ()
-         && dynamic_cast<jit_phi *> (*iter); ++iter)
-    {
-      jit_instruction *phi = *iter;
-      jit_variable *var = phi->tag ();
-      phi->stash_argument (pred_idx, var->top ());
-    }
-}
-
-void
-jit_block::do_construct_ssa (jit_convert& convert, size_t visit_count)
+jit_block::pop_all (void)
 {
-  if (mvisit_count > visit_count)
-    return;
-  ++mvisit_count;
-
-  for (iterator iter = begin (); iter != end (); ++iter)
-    {
-      jit_instruction *instr = *iter;
-      bool isphi = dynamic_cast<jit_phi *> (instr);
-
-      if (! isphi)
-        {
-          for (size_t i = 0; i < instr->argument_count (); ++i)
-            {
-              jit_variable *var;
-              var = dynamic_cast<jit_variable *> (instr->argument (i));
-              if (var)
-                instr->stash_argument (i, var->top ());
-            }
-
-          // FIXME: Remove need for jit_store_argument dynamic cast
-          jit_variable *tag = instr->tag ();
-          if (tag && tag->has_top ()
-              && ! dynamic_cast<jit_store_argument *> (instr))
-            {
-              jit_call *rel = convert.create<jit_call> (jit_typeinfo::release,
-                                                        tag->top ());
-              insert_after (iter, rel);
-              ++iter;
-            }
-        }
-
-      instr->push_variable ();
-    }
-
-  for (size_t i = 0; i < succ_count (); ++i)
-    succ (i)->finish_phi (this);
-
-  for (size_t i = 0; i < dom_succ.size (); ++i)
-    dom_succ[i]->do_construct_ssa (convert, visit_count);
-
   for (iterator iter = begin (); iter != end (); ++iter)
     {
       jit_instruction *instr = *iter;
@@ -1021,6 +992,18 @@
 
 // -------------------- jit_call --------------------
 bool
+jit_call::dead (void) const
+{
+  return ! has_side_effects () && use_count () == 0;
+}
+
+bool
+jit_call::almost_dead (void) const
+{
+  return ! has_side_effects () && use_count () <= 1;
+}
+
+bool
 jit_call::infer (void)
 {
   // FIXME: explain algorithm
@@ -1082,10 +1065,6 @@
        iter != constants.end (); ++iter)
     append_users (*iter);
 
-#ifdef OCTAVE_JIT_DEBUG
-  print_blocks ("octave jit ir");
-#endif
-
   // FIXME: Describe algorithm here
   while (worklist.size ())
     {
@@ -1096,6 +1075,8 @@
         append_users (next);
     }
 
+  place_releases ();
+
 #ifdef OCTAVE_JIT_DEBUG
   std::cout << "-------------------- Compiling tree --------------------\n";
   std::cout << tee.str_print_code () << std::endl;
@@ -1107,7 +1088,7 @@
   for (jit_block::iterator iter = entry_block->begin ();
        iter != entry_block->end (); ++iter)
     {
-      if (jit_extract_argument *extract = dynamic_cast<jit_extract_argument *> (*iter))
+      if (jit_extract_argument *extract = (*iter)->to_extract_argument ())
         arguments.push_back (std::make_pair (extract->name (), true));
     }
 
@@ -1714,7 +1695,7 @@
   entry_block->compute_df ();
   entry_block->create_dom_tree ();
 
-  // insert phi nodes where needed
+  // insert phi nodes where needed, this is done on a per variable basis
   for (vmap_t::iterator iter = vmap.begin (); iter != vmap.end (); ++iter)
     {
       jit_block::df_set visited, added_phi;
@@ -1748,13 +1729,59 @@
         }
     }
 
-  entry_block->construct_ssa (*this);
+  entry_block->visit_dom (&jit_convert::do_construct_ssa, &jit_block::pop_all);
 }
 
 void
-jit_convert::finish_breaks (jit_block *dest, const break_list& lst)
+jit_convert::do_construct_ssa (jit_block& block)
 {
-  for (break_list::const_iterator iter = lst.begin (); iter != lst.end ();
+  // replace variables with their current SSA value
+  for (jit_block::iterator iter = block.begin (); iter != block.end (); ++iter)
+    {
+      jit_instruction *instr = *iter;
+      if (! instr->is_phi ())
+        {
+          for (size_t i = 0; i < instr->argument_count (); ++i)
+            {
+              jit_variable *var;
+              var = instr->argument_variable (i);
+              assert (var == instr->argument (i)->to_variable ());
+              assert (var == dynamic_cast<jit_variable *> (instr->argument (i)));
+              if (var)
+                instr->stash_argument (i, var->top ());
+            }
+        }
+
+      instr->push_variable ();
+    }
+
+  // finish phi nodes of sucessors
+  for (size_t i = 0; i < block.succ_count (); ++i)
+    {
+      jit_block *finish = block.succ (i);
+      size_t pred_idx = finish->pred_index (&block);
+
+      for (jit_block::iterator iter = finish->begin (); iter != finish->end ()
+             && (*iter)->is_phi (); ++iter)
+        {
+          jit_instruction *phi = *iter;
+          jit_variable *var = phi->tag ();
+          phi->stash_argument (pred_idx, var->top ());
+        }
+    }
+}
+
+void
+jit_convert::place_releases (void)
+{
+  jit_convert::release_placer placer (*this);
+  entry_block->visit_dom (placer, &jit_block::pop_all);
+}
+
+void
+jit_convert::finish_breaks (jit_block *dest, const block_list& lst)
+{
+  for (block_list::const_iterator iter = lst.begin (); iter != lst.end ();
        ++iter)
     {
       jit_block *b = *iter;
@@ -1762,6 +1789,42 @@
     }
 }
 
+// -------------------- jit_convert::release_placer --------------------
+void
+jit_convert::release_placer::operator() (jit_block& block)
+{
+  for (jit_block::iterator iter = block.begin (); iter != block.end (); ++iter)
+    {
+      jit_instruction *instr = *iter;
+      for (size_t i = 0; i < instr->argument_count (); ++i)
+        {
+          jit_instruction *arg = instr->argument_instruction (i);
+          if (arg && arg->tag ())
+            {
+              jit_variable *tag = arg->tag ();
+              tag->stash_last_use (instr);
+            }
+        }
+
+      jit_variable *tag = instr->tag ();
+      if (tag && ! (instr->is_phi () || instr->is_store_argument ())
+          && tag->has_top ())
+        {
+          jit_instruction *last_use = tag->last_use ();
+          jit_call *release = convert.create<jit_call> (jit_typeinfo::release,
+                                                        tag->top ());
+          release->infer ();
+          if (last_use && last_use->parent () == &block
+              && ! last_use->is_phi ())
+            block.insert_after (last_use->location (), release);
+          else
+            block.prepend_after_phi (release);
+        }
+
+      instr->push_variable ();
+    }
+}
+
 // -------------------- jit_convert::convert_llvm --------------------
 llvm::Function *
 jit_convert::convert_llvm::convert (llvm::Module *module,
@@ -1812,42 +1875,10 @@
         {
           jit_block& block = **biter;
           for (jit_block::iterator piter = block.begin ();
-               piter != block.end () && dynamic_cast<jit_phi *> (*piter); ++piter)
+               piter != block.end () && (*piter)->is_phi (); ++piter)
             {
-              // our phi nodes don't have to have the same incomming type,
-              // so we do casts here
               jit_instruction *phi = *piter;
-              jit_block *pblock = phi->parent ();
-              llvm::PHINode *llvm_phi = llvm::cast<llvm::PHINode> (phi->to_llvm ());
-              for (size_t i = 0; i < phi->argument_count (); ++i)
-                {
-                  llvm::BasicBlock *pred = pblock->pred_llvm (i);
-                  if (phi->argument_type_llvm (i) == phi->type_llvm ())
-                    {
-                      llvm_phi->addIncoming (phi->argument_llvm (i), pred);
-                    }
-                  else
-                    {
-                      // add cast right before pred terminator
-                      builder.SetInsertPoint (--pred->end ());
-
-                      const jit_function::overload& ol
-                        = jit_typeinfo::cast (phi->type (),
-                                              phi->argument_type (i));
-                      if (! ol.function)
-                        {
-                          std::stringstream ss;
-                          ss << "No cast for phi(" << i << "): ";
-                          phi->print (ss);
-                          fail (ss.str ());
-                        }
-
-                      llvm::Value *casted;
-                      casted = builder.CreateCall (ol.function,
-                                                   phi->argument_llvm (i));
-                      llvm_phi->addIncoming (casted, pred);
-                    }
-                }
+              finish_phi (phi);
             }
         }
 
@@ -1864,6 +1895,87 @@
 }
 
 void
+jit_convert::convert_llvm::finish_phi (jit_instruction *phi)
+{
+  jit_block *pblock = phi->parent ();
+  llvm::PHINode *llvm_phi = llvm::cast<llvm::PHINode> (phi->to_llvm ());
+
+  bool can_remove = llvm_phi->use_empty ();
+  if (! can_remove && llvm_phi->hasOneUse () && phi->use_count () == 1)
+    {
+      jit_instruction *user = phi->first_use ()->user ();
+      can_remove = user->is_call (); // must be a remove
+    }
+
+  if (can_remove)
+    {
+      // replace with releases along each incomming branch
+      while (! llvm_phi->use_empty ())
+        {
+          llvm::Instruction *llvm_instr;
+          llvm_instr = llvm::cast<llvm::Instruction> (llvm_phi->use_back ());
+          llvm_instr->eraseFromParent ();
+        }
+
+      llvm_phi->eraseFromParent ();
+      phi->stash_llvm (0);
+
+      for (size_t i = 0; i < phi->argument_count (); ++i)
+        {
+          jit_value *arg = phi->argument (i);
+          if (arg->has_llvm () && phi->argument_type (i) != phi->type ())
+            {
+              llvm::BasicBlock *pred = pblock->pred_llvm (i);
+              builder.SetInsertPoint (--pred->end ());
+              const jit_function::overload& ol
+                = jit_typeinfo::get_release (phi->argument_type (i));
+              if (! ol.function)
+                {
+                  std::stringstream ss;
+                  ss << "No release for phi(" << i << "): ";
+                  phi->print (ss);
+                  fail (ss.str ());
+                }
+
+              builder.CreateCall (ol.function, phi->argument_llvm (i));
+            }
+        }
+    }
+  else
+    {
+      for (size_t i = 0; i < phi->argument_count (); ++i)
+        {
+          llvm::BasicBlock *pred = pblock->pred_llvm (i);
+          if (phi->argument_type (i) == phi->type ())
+            {
+              llvm_phi->addIncoming (phi->argument_llvm (i), pred);
+            }
+          else
+            {
+              // add cast right before pred terminator
+              builder.SetInsertPoint (--pred->end ());
+
+              const jit_function::overload& ol
+                = jit_typeinfo::cast (phi->type (),
+                                      phi->argument_type (i));
+              if (! ol.function)
+                {
+                  std::stringstream ss;
+                  ss << "No cast for phi(" << i << "): ";
+                  phi->print (ss);
+                  fail (ss.str ());
+                }
+
+              llvm::Value *casted;
+              casted = builder.CreateCall (ol.function,
+                                           phi->argument_llvm (i));
+              llvm_phi->addIncoming (casted, pred);
+            }
+        }
+    }
+}
+
+void
 jit_convert::convert_llvm::visit (jit_const_string& cs)
 {
   cs.stash_llvm (builder.CreateGlobalStringPtr (cs.value ()));
--- a/src/pt-jit.h	Sun Jun 03 16:30:21 2012 -0500
+++ b/src/pt-jit.h	Mon Jun 04 13:10:44 2012 -0500
@@ -163,19 +163,20 @@
 jit_function
 {
 public:
-  struct overload
+  struct
+  overload
   {
     overload (void) : function (0), can_error (true), result (0) {}
 
-    overload (llvm::Function *f, bool e, jit_type *r, jit_type *arg0) :
-      function (f), can_error (e), result (r), arguments (1)
+    overload (llvm::Function *f, bool e, bool s, jit_type *r, jit_type *arg0) :
+      function (f), can_error (e), side_effects (s), result (r), arguments (1)
     {
       arguments[0] = arg0;
     }
 
-    overload (llvm::Function *f, bool e, jit_type *r, jit_type *arg0,
-              jit_type *arg1) : function (f), can_error (e), result (r),
-                                arguments (2)
+    overload (llvm::Function *f, bool e, bool s, jit_type *r, jit_type *arg0,
+              jit_type *arg1) : function (f), can_error (e), side_effects (s),
+                                result (r), arguments (2)
     {
       arguments[0] = arg0;
       arguments[1] = arg1;
@@ -183,6 +184,7 @@
 
     llvm::Function *function;
     bool can_error;
+    bool side_effects;
     jit_type *result;
     std::vector<jit_type*> arguments;
   };
@@ -192,16 +194,16 @@
     add_overload (func, func.arguments);
   }
 
-  void add_overload (llvm::Function *f, bool e, jit_type *r, jit_type *arg0)
+  void add_overload (llvm::Function *f, bool e, bool s, jit_type *r, jit_type *arg0)
   {
-    overload ol (f, e, r, arg0);
+    overload ol (f, e, s, r, arg0);
     add_overload (ol);
   }
 
-  void add_overload (llvm::Function *f, bool e, jit_type *r, jit_type *arg0,
+  void add_overload (llvm::Function *f, bool e, bool s, jit_type *r, jit_type *arg0,
                      jit_type *arg1)
   {
-    overload ol (f, e, r, arg0, arg1);
+    overload ol (f, e, s, r, arg0, arg1);
     add_overload (ol);
   }
 
@@ -293,6 +295,11 @@
     return instance->release_fn;
   }
 
+  static const jit_function::overload& get_release (jit_type *type)
+  {
+    return instance->release_fn.get_overload (type);
+  }
+
   static const jit_function& print_value (void)
   {
     return instance->print_fn;
@@ -490,19 +497,54 @@
 // We convert the octave parse tree to this IR directly.
 
 #define JIT_VISIT_IR_NOTEMPLATE                 \
-  JIT_METH(block);                              \
-  JIT_METH(break);                              \
-  JIT_METH(cond_break);                         \
-  JIT_METH(call);                               \
-  JIT_METH(extract_argument);                   \
-  JIT_METH(store_argument);                     \
-  JIT_METH(phi);                                \
+  JIT_METH(block)                               \
+  JIT_METH(break)                               \
+  JIT_METH(cond_break)                          \
+  JIT_METH(call)                                \
+  JIT_METH(extract_argument)                    \
+  JIT_METH(store_argument)                      \
+  JIT_METH(phi)                                 \
   JIT_METH(variable)
 
+#define JIT_VISIT_IR_CONST                      \
+  JIT_METH(const_scalar)                        \
+  JIT_METH(const_index)                         \
+  JIT_METH(const_string)                        \
+  JIT_METH(const_range)
+
+#define JIT_VISIT_IR_ABSTRACT                   \
+  JIT_METH(instruction)                         \
+  JIT_METH(terminator)
+
 #define JIT_VISIT_IR_CLASSES                    \
-  JIT_VISIT_IR_NOTEMPLATE;                      \
+  JIT_VISIT_IR_NOTEMPLATE                       \
   JIT_VISIT_IR_CONST
 
+#define JIT_VISIT_IR_ALL                        \
+  JIT_VISIT_IR_CLASSES                          \
+  JIT_VISIT_IR_ABSTRACT
+
+// forward declare all ir classes
+#define JIT_METH(cname)                         \
+  class jit_ ## cname;
+
+JIT_VISIT_IR_ABSTRACT
+JIT_VISIT_IR_NOTEMPLATE
+
+#undef JIT_METH
+
+// constants are typedefs from jit_const
+template <typename T, jit_type *(*EXTRACT_T)(void), typename PASS_T = T,
+          bool QUOTE=false>
+class jit_const;
+
+typedef jit_const<double, jit_typeinfo::get_scalar> jit_const_scalar;
+typedef jit_const<octave_idx_type, jit_typeinfo::get_index> jit_const_index;
+
+typedef jit_const<std::string, jit_typeinfo::get_string, const std::string&, true>
+jit_const_string;
+typedef jit_const<jit_range, jit_typeinfo::get_range, const jit_range&>
+jit_const_range;
 
 class jit_ir_walker;
 class jit_use;
@@ -566,6 +608,22 @@
   {
     llvm_value = compiled;
   }
+
+#define JIT_METH(cname)                                         \
+  virtual bool is_ ## cname (void) const                        \
+  { return false; }                                             \
+                                                                \
+  virtual jit_ ## cname *to_ ## cname (void)                    \
+  { return 0; }                                                 \
+                                                                \
+  virtual const jit_ ## cname *to_ ## cname (void) const        \
+  { return 0; }
+
+JIT_VISIT_IR_NOTEMPLATE
+JIT_VISIT_IR_ABSTRACT
+
+#undef JIT_METH
+
 protected:
   std::ostream& print_indent (std::ostream& os, size_t indent) const
   {
@@ -583,8 +641,15 @@
 
 std::ostream& operator<< (std::ostream& os, const jit_value& value);
 
-class jit_instruction;
-class jit_block;
+#define JIT_GEN_CASTS(cname)                                    \
+  virtual bool is_ ## cname (void) const                        \
+  { return true; }                                              \
+                                                                \
+  virtual jit_ ## cname *to_ ## cname (void)                    \
+  { return this; }                                              \
+                                                                \
+  virtual const jit_ ## cname *to_ ## cname (void) const        \
+  { return this; }
 
 class
 jit_use
@@ -615,6 +680,8 @@
 
   jit_block *user_parent (void) const;
 
+  std::list<jit_block *> user_parent_location (void) const;
+
   void stash_value (jit_value *avalue, jit_instruction *auser = 0,
                     size_t aindex = -1)
   {
@@ -668,8 +735,6 @@
   size_t mindex;
 };
 
-class jit_variable;
-
 class
 jit_instruction : public jit_value
 {
@@ -738,6 +803,20 @@
     return argument_type (i)->to_llvm ();
   }
 
+  // generate functions of form argument_type where type is any subclass of
+  // jit_value
+#define JIT_METH(cname)                                 \
+  jit_ ## cname *argument_ ## cname (size_t i) const    \
+  {                                                     \
+    jit_value *arg = argument (i);                      \
+    return arg ? arg->to_ ## cname () : 0;              \
+  }
+
+JIT_VISIT_IR_ABSTRACT
+JIT_VISIT_IR_NOTEMPLATE
+
+#undef JIT_METH
+
   std::ostream& print_argument (std::ostream& os, size_t i) const
   {
     if (argument (i))
@@ -770,8 +849,14 @@
   const std::vector<jit_type *>& argument_types (void) const
   { return already_infered; }
 
+  virtual bool dead (void) const { return false; }
+
+  virtual bool almost_dead (void) const { return false; }
+
   virtual bool infer (void) { return false; }
 
+  void remove (void);
+
   void push_variable (void);
 
   void pop_variable (void);
@@ -780,17 +865,25 @@
 
   jit_block *parent (void) const { return mparent; }
 
+  std::list<jit_instruction *>::iterator location (void) const
+  {
+    return mlocation;
+  }
+
   llvm::BasicBlock *parent_llvm (void) const;
 
-  void stash_parent (jit_block *aparent)
+  void stash_parent (jit_block *aparent,
+                     std::list<jit_instruction *>::iterator alocation)
   {
-    assert (! mparent);
     mparent = aparent;
+    mlocation = alocation;
   }
 
   jit_variable *tag (void) const;
 
   void stash_tag (jit_variable *atag);
+
+  JIT_GEN_CASTS (instruction)
 protected:
   std::vector<jit_type *> already_infered;
 private:
@@ -809,14 +902,15 @@
 
   size_t id;
   jit_block *mparent;
+  std::list<jit_instruction *>::iterator mlocation;
 };
 
 // defnie accept methods for subclasses
 #define JIT_VALUE_ACCEPT(clname)                \
   virtual void accept (jit_ir_walker& walker);
 
-template <typename T, jit_type *(*EXTRACT_T)(void), typename PASS_T = T,
-          bool QUOTE=false>
+template <typename T, jit_type *(*EXTRACT_T)(void), typename PASS_T,
+          bool QUOTE>
 class
 jit_const : public jit_instruction
 {
@@ -847,24 +941,6 @@
   T mvalue;
 };
 
-typedef jit_const<double, jit_typeinfo::get_scalar> jit_const_scalar;
-typedef jit_const<octave_idx_type, jit_typeinfo::get_index> jit_const_index;
-
-typedef jit_const<std::string, jit_typeinfo::get_string, const std::string&, true>
-jit_const_string;
-typedef jit_const<jit_range, jit_typeinfo::get_range, const jit_range&>
-jit_const_range;
-
-#define JIT_VISIT_IR_CONST                      \
-  JIT_METH(const_scalar);                       \
-  JIT_METH(const_index);                        \
-  JIT_METH(const_string);                       \
-  JIT_METH(const_range)
-
-class jit_terminator;
-class jit_phi;
-class jit_convert;
-
 class
 jit_block : public jit_value
 {
@@ -884,6 +960,8 @@
 
   jit_instruction *prepend (jit_instruction *instr);
 
+  jit_instruction *prepend_after_phi (jit_instruction *instr);
+
   jit_instruction *append (jit_instruction *instr);
 
   jit_instruction *insert_before (iterator loc, jit_instruction *instr);
@@ -892,7 +970,9 @@
 
   void remove (jit_block::iterator iter)
   {
+    jit_instruction *instr = *iter;
     instructions.erase (iter);
+    instr->stash_parent (0, instructions.end ());
   }
 
   jit_terminator *terminator (void) const;
@@ -1001,11 +1081,18 @@
     create_dom_tree (mvisit_count);
   }
 
-  void construct_ssa (jit_convert& convert)
+  // visit blocks in the order of the dominator tree
+  // inorder - Run on the root first, then children
+  // postorder - Run on children first, then the root
+  template <typename func_type0, typename func_type1>
+  void visit_dom (func_type0 inorder, func_type1 postorder)
   {
-    do_construct_ssa (convert, mvisit_count);
+    do_visit_dom (mvisit_count, inorder, postorder);
   }
 
+  // call pop_varaible on all instructions
+  void pop_all (void);
+
   virtual std::ostream& print (std::ostream& os, size_t indent) const
   {
     print_indent (os, indent) << mname << ":        %pred = ";
@@ -1036,19 +1123,19 @@
   llvm::BasicBlock *to_llvm (void) const;
 
   JIT_VALUE_ACCEPT (block)
+  JIT_GEN_CASTS (block)
 private:
   void compute_df (size_t visit_count);
 
   bool update_idom (size_t visit_count);
 
-  void finish_phi (jit_block *pred);
-
-  void do_construct_ssa (jit_convert& convert, size_t visit_count);
-
   void create_dom_tree (size_t visit_count);
 
   jit_block *idom_intersect (jit_block *b);
 
+  template <typename func_type0, typename func_type1>
+  void do_visit_dom (size_t visit_count, func_type0 inorder, func_type1 postorder);
+
   static const size_t NO_ID = static_cast<size_t> (-1);
   size_t mvisit_count;
   size_t mid;
@@ -1060,6 +1147,54 @@
   mutable std::vector<llvm::BasicBlock *> mpred_llvm;
 };
 
+// allow regular function pointers as well as pointers to members
+template <typename func_type>
+class jit_block_callback
+{
+public:
+  jit_block_callback (func_type afunction) : function (afunction) {}
+
+  void operator() (jit_block& block)
+  {
+    function (block);
+  }
+private:
+  func_type function;
+};
+
+template <>
+class jit_block_callback<void (jit_block::*)(void)>
+{
+public:
+  typedef void (jit_block::*func_type)(void);
+
+  jit_block_callback (func_type afunction) : function (afunction) {}
+
+  void operator() (jit_block& ablock)
+  {
+    (ablock.*function) ();
+  }
+private:
+  func_type function;
+};
+
+template <typename func_type0, typename func_type1>
+void
+jit_block::do_visit_dom (size_t visit_count, func_type0 inorder, func_type1 postorder)
+{
+  if (mvisit_count > visit_count)
+    return;
+  mvisit_count = visit_count + 1;
+
+  jit_block_callback<func_type0> inorder_cb (inorder);
+  inorder_cb (*this);
+
+  for (size_t i = 0; i < dom_succ.size (); ++i)
+    dom_succ[i]->do_visit_dom (visit_count, inorder, postorder);
+
+  jit_block_callback<func_type1> postorder_cb (postorder);
+  postorder_cb (*this);
+}
 
 
 // A non-ssa variable
@@ -1067,7 +1202,7 @@
 jit_variable : public jit_value
 {
 public:
-  jit_variable (const std::string& aname) : mname (aname) {}
+  jit_variable (const std::string& aname) : mname (aname), mlast_use (0) {}
 
   const std::string &name (void) const { return mname; }
 
@@ -1083,9 +1218,10 @@
     return value_stack.top ();
   }
 
-  void push (jit_value *v)
+  void push (jit_instruction *v)
   {
     value_stack.push (v);
+    mlast_use = v;
   }
 
   void pop (void)
@@ -1093,6 +1229,16 @@
     value_stack.pop ();
   }
 
+  jit_instruction *last_use (void) const
+  {
+    return mlast_use;
+  }
+
+  void stash_last_use (jit_instruction *instr)
+  {
+    mlast_use = instr;
+  }
+
   // blocks in which we are used
   void use_blocks (jit_block::df_set& result)
   {
@@ -1110,9 +1256,11 @@
   }
 
   JIT_VALUE_ACCEPT (variable)
+  JIT_GEN_CASTS (variable)
 private:
   std::string mname;
   std::stack<jit_value *> value_stack;
+  jit_instruction *mlast_use;
 };
 
 class
@@ -1125,6 +1273,16 @@
     stash_tag (avariable);
   }
 
+  virtual bool dead (void) const
+  {
+    return use_count () == 0;
+  }
+
+  virtual bool almost_dead (void) const
+  {
+    return use_count () <= 1;
+  }
+
   virtual bool infer (void)
   {
     jit_type *infered = 0;
@@ -1167,6 +1325,7 @@
   }
 
   JIT_VALUE_ACCEPT (phi);
+  JIT_GEN_CASTS (phi)
 };
 
 class
@@ -1197,6 +1356,8 @@
   }
 
   virtual size_t sucessor_count (void) const = 0;
+
+  JIT_GEN_CASTS (terminator)
 };
 
 class
@@ -1220,6 +1381,7 @@
   }
 
   JIT_VALUE_ACCEPT (break)
+  JIT_GEN_CASTS (break)
 };
 
 class
@@ -1258,6 +1420,7 @@
   }
 
   JIT_VALUE_ACCEPT (cond_break)
+  JIT_GEN_CASTS (cond_break)
 };
 
 class
@@ -1280,6 +1443,11 @@
 
   const jit_function& function (void) const { return mfunction; }
 
+  bool has_side_effects (void) const
+  {
+    return overload ().side_effects;
+  }
+
   const jit_function::overload& overload (void) const
   {
     return mfunction.get_overload (argument_types ());
@@ -1302,9 +1470,14 @@
     return os << ")";
   }
 
+  virtual bool dead (void) const;
+
+  virtual bool almost_dead (void) const;
+
   virtual bool infer (void);
 
   JIT_VALUE_ACCEPT (call)
+  JIT_GEN_CASTS (call)
 private:
   const jit_function& mfunction;
 };
@@ -1339,6 +1512,7 @@
   }
 
   JIT_VALUE_ACCEPT (extract_argument)
+  JIT_GEN_CASTS (extract_argument)
 };
 
 class
@@ -1385,6 +1559,7 @@
   }
 
   JIT_VALUE_ACCEPT (store_argument)
+  JIT_GEN_CASTS (store_argument)
 };
 
 class
@@ -1394,7 +1569,7 @@
   virtual ~jit_ir_walker () {}
 
 #define JIT_METH(clname) \
-  virtual void visit (jit_ ## clname&) = 0
+  virtual void visit (jit_ ## clname&) = 0;
 
   JIT_VISIT_IR_CLASSES;
 
@@ -1548,6 +1723,9 @@
     return ret;
   }
 private:
+  typedef std::list<jit_block *> block_list;
+  typedef block_list::iterator block_iterator;
+
   std::vector<std::pair<std::string, bool> > arguments;
   type_bound_vector bounds;
 
@@ -1597,6 +1775,10 @@
 
   void construct_ssa (jit_block *final_block);
 
+  static void do_construct_ssa (jit_block& block);
+
+  void place_releases (void);
+
   void print_blocks (const std::string& header)
   {
     std::cout << "-------------------- " << header << " --------------------\n";
@@ -1621,13 +1803,20 @@
     std::cout << std::endl;
   }
 
-  typedef std::list<jit_block *> break_list;
+  bool breaking; // true if we are breaking OR continuing
+  block_list breaks;
+  block_list continues;
+
+  void finish_breaks (jit_block *dest, const block_list& lst);
 
-  bool breaking; // true if we are breaking OR continuing
-  break_list breaks;
-  break_list continues;
+  struct release_placer
+  {
+    release_placer (jit_convert& aconvert) : convert (aconvert) {}
 
-  void finish_breaks (jit_block *dest, const break_list& lst);
+    jit_convert& convert;
+
+    void operator() (jit_block& block);
+  };
 
   // this case is much simpler, just convert from the jit ir to llvm
   class
@@ -1648,6 +1837,7 @@
     // name -> llvm argument
     std::map<std::string, llvm::Value *> arguments;
 
+    void finish_phi (jit_instruction *phi);
 
     void visit (jit_value *jvalue)
     {