changeset 174:4286209da98e

profiling push - fixed tree caching issue, 15% impr
author Scott Chacon <schacon@gmail.com>
date Wed, 03 Jun 2009 11:45:17 -0700
parents bc7b0d080031
children cee0473e67ca
files __init__.py dulwich/objects.py dulwich/repo.py git_handler.py
diffstat 4 files changed, 72 insertions(+), 13 deletions(-) [+]
line wrap: on
line diff
--- a/__init__.py	Wed Jun 03 09:59:16 2009 -0700
+++ b/__init__.py	Wed Jun 03 11:45:17 2009 -0700
@@ -40,7 +40,23 @@
     # fetch the initial git data
     git = GitHandler(dest_repo, ui)
     git.remote_add('origin', git_url)
-    git.fetch('origin')
+    
+    import cProfile, pstats
+    import lsprofcalltree
+    prof = cProfile.Profile()
+    prof = prof.runctx("git.fetch('origin')", globals(), locals())
+    stats = pstats.Stats(prof)
+    k = lsprofcalltree.KCacheGrind(prof)
+    data = open('/tmp/prof.kgrind', 'w+')
+    k.output(data)
+    data.close()
+    stats.sort_stats("cumulative")  # Or cumulative
+    stats.print_stats(80)  # 80 = how many to print
+    # The rest is optional.
+    #stats.print_callees()
+    #stats.print_callers()
+    
+    #git.fetch('origin')
     
     # checkout the tip
     node = git.remote_head('origin')
--- a/dulwich/objects.py	Wed Jun 03 09:59:16 2009 -0700
+++ b/dulwich/objects.py	Wed Jun 03 11:45:17 2009 -0700
@@ -389,7 +389,6 @@
         count = count + 20
     return ret
 
-
 class Tree(ShaFile):
     """A Git tree object"""
 
@@ -457,7 +456,14 @@
 
     def _parse_text(self):
         """Grab the entries in the tree"""
-        self._entries = parse_tree(self._text)
+        tc = TreeCache()
+        parsed_tree = tc.get(self.id)
+        if parsed_tree:
+            self._entries = parsed_tree
+        else:
+            tree_ent = parse_tree(self._text)
+            tc.put(self.id, tree_ent)
+            self._entries = tree_ent
         self._needs_parsing = False
 
     def serialize(self):
@@ -657,6 +663,50 @@
         "Returns the zone the author time is in.")
 
 
+class TreeCache:
+    """ A python singleton """
+
+    class __impl:
+        """ Implementation of the singleton interface """
+        def __init__(self):
+            self.trees = {}
+        
+        def get(self, sha):
+            """ Test method, return singleton id """
+            if sha in self.trees:
+                return self.trees[sha]
+            else:
+                return None
+
+        def put(self, sha, tree):
+            """ Test method, return singleton id """
+            self.trees[sha] = tree
+
+        def size(self):
+            return len(self.trees)
+
+    # storage for the instance reference
+    __instance = None
+
+    def __init__(self):
+        """ Create singleton instance """
+        # Check whether we already have an instance
+        if TreeCache.__instance is None:
+            # Create and remember instance
+            TreeCache.__instance = TreeCache.__impl()
+
+        # Store instance reference as the only member in the handle
+        self.__dict__['_TreeCache__instance'] = TreeCache.__instance
+
+    def __getattr__(self, attr):
+        """ Delegate access to implementation """
+        return getattr(self.__instance, attr)
+
+    def __setattr__(self, attr, value):
+        """ Delegate access to implementation """
+        return setattr(self.__instance, attr, value)
+        
+      
 type_map = {
     BLOB_ID : Blob,
     TREE_ID : Tree,
--- a/dulwich/repo.py	Wed Jun 03 09:59:16 2009 -0700
+++ b/dulwich/repo.py	Wed Jun 03 11:45:17 2009 -0700
@@ -323,14 +323,8 @@
     def commit(self, sha):
         return self._get_object(sha, Commit)
 
-    # we call this a lot on import, so we're caching it a bit
-    already_parsed_trees = {}
     def tree(self, sha):
-        if sha in self.already_parsed_trees:
-            return self.already_parsed_trees[sha]
-        tree = self._get_object(sha, Tree)
-        self.already_parsed_trees[sha] = tree
-        return tree
+        return self._get_object(sha, Tree)
 
     def tag(self, sha):
         return self._get_object(sha, Tag)
--- a/git_handler.py	Wed Jun 03 09:59:16 2009 -0700
+++ b/git_handler.py	Wed Jun 03 11:45:17 2009 -0700
@@ -416,13 +416,12 @@
                     entry[2] = sha
                 sha_group.append(entry[2])
                 tree_data.append(entry)
-            print sha_group
-            
+
+            # calculating a sha for the tree, so we don't write it twice
             listsha = make_sha()
             for s in sha_group:
                 listsha.update(s)
             listsha = listsha.hexdigest()
-            print listsha
             
             if listsha in self.written_trees:
                 tree_shas[dirnm] = self.written_trees[listsha]