Mercurial > hg-git
changeset 171:88e413d853ee
fixed serious speed issue with rename detection
author | Scott Chacon <schacon@gmail.com> |
---|---|
date | Wed, 03 Jun 2009 09:22:17 -0700 |
parents | 92e708d6e3a1 |
children | ac92cdc45ceb |
files | TODO.txt __init__.py git_handler.py lsprofcalltree.py |
diffstat | 4 files changed, 93 insertions(+), 8 deletions(-) [+] |
line wrap: on
line diff
--- a/TODO.txt Tue Jun 02 21:27:19 2009 -0700 +++ b/TODO.txt Wed Jun 03 09:22:17 2009 -0700 @@ -30,6 +30,7 @@ SPEED/EFFICIENCY ================ +* cache rename detection on push (seems to be very expensive) * switch object mapping to hg->git since the many to one is that direction * don't send blobs/trees already on server (thin pack) * packfile creation benchmarking (seems to take a while sometimes)
--- a/__init__.py Tue Jun 02 21:27:19 2009 -0700 +++ b/__init__.py Wed Jun 03 09:22:17 2009 -0700 @@ -49,14 +49,19 @@ def gpush(ui, repo, remote_name='origin', branch=None): git = GitHandler(repo, ui) import cProfile, pstats + import lsprofcalltree prof = cProfile.Profile() prof = prof.runctx("git.push(remote_name)", globals(), locals()) stats = pstats.Stats(prof) + k = lsprofcalltree.KCacheGrind(prof) + data = open('/tmp/prof.kgrind', 'w+') + k.output(data) + data.close() stats.sort_stats("time") # Or cumulative stats.print_stats(80) # 80 = how many to print # The rest is optional. - # stats.print_callees() - # stats.print_callers() + #stats.print_callees() + #stats.print_callers() def gimport(ui, repo, remote_name=None): git = GitHandler(repo, ui)
--- a/git_handler.py Tue Jun 02 21:27:19 2009 -0700 +++ b/git_handler.py Wed Jun 03 09:22:17 2009 -0700 @@ -187,6 +187,7 @@ return dict(filter(is_local_head, refs.items())) def export_git_objects(self): + self.manifest_renames = {} self.ui.status(_("importing Hg objects into Git\n")) total = len(self.repo.changelog) if total: @@ -331,16 +332,24 @@ trees = {} man = ctx.manifest() renames = [] - for filenm in man.keys(): + for filenm, nodesha in man.iteritems(): + file_id = hex(nodesha) # write blob if not in our git database - fctx = ctx.filectx(filenm) - rename = fctx.renamed() - if rename: - filerename, sha = rename + fctx = ctx.filectx(filenm) + filerename = None + if file_id in self.manifest_renames: + filerename = self.manifest_renames[file_id] + else: + rename = fctx.renamed() + if rename: + filerename, sha = rename + self.manifest_renames[file_id] = filerename + else: + self.manifest_renames[file_id] = None + if filerename: renames.append((filerename, filenm)) is_exec = 'x' in fctx.flags() is_link = 'l' in fctx.flags() - file_id = hex(fctx.filenode()) blob_sha = self.map_git_get(file_id) if not blob_sha: blob_sha = self.git.write_blob(fctx.data()) # writing new blobs to git
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lsprofcalltree.py Wed Jun 03 09:22:17 2009 -0700 @@ -0,0 +1,70 @@ + + +def label(code): + if isinstance(code, str): + return ('~', 0, code) # built-in functions ('~' sorts at the end) + else: + return '%s %s:%d' % (code.co_name, code.co_filename, code.co_firstlineno) + + + +class KCacheGrind(object): + def __init__(self, profiler): + self.data = profiler.getstats() + self.out_file = None + + def output(self, out_file): + self.out_file = out_file + print >> out_file, 'events: Ticks' + self._print_summary() + for entry in self.data: + self._entry(entry) + + def _print_summary(self): + max_cost = 0 + for entry in self.data: + totaltime = int(entry.totaltime * 1000) + max_cost = max(max_cost, totaltime) + print >> self.out_file, 'summary: %d' % (max_cost,) + + def _entry(self, entry): + out_file = self.out_file + code = entry.code + inlinetime = int(entry.inlinetime * 1000) + #print >> out_file, 'ob=%s' % (code.co_filename,) + if isinstance(code, str): + print >> out_file, 'fi=~' + else: + print >> out_file, 'fi=%s' % (code.co_filename,) + print >> out_file, 'fn=%s' % (label(code),) + if isinstance(code, str): + print >> out_file, '0 ', inlinetime + else: + print >> out_file, '%d %d' % (code.co_firstlineno, inlinetime) + # recursive calls are counted in entry.calls + if entry.calls: + calls = entry.calls + else: + calls = [] + if isinstance(code, str): + lineno = 0 + else: + lineno = code.co_firstlineno + for subentry in calls: + self._subentry(lineno, subentry) + print >> out_file + + def _subentry(self, lineno, subentry): + out_file = self.out_file + code = subentry.code + totaltime = int(subentry.totaltime * 1000) + #print >> out_file, 'cob=%s' % (code.co_filename,) + print >> out_file, 'cfn=%s' % (label(code),) + if isinstance(code, str): + print >> out_file, 'cfi=~' + print >> out_file, 'calls=%d 0' % (subentry.callcount,) + else: + print >> out_file, 'cfi=%s' % (code.co_filename,) + print >> out_file, 'calls=%d %d' % ( + subentry.callcount, code.co_firstlineno) + print >> out_file, '%d %d' % (lineno, totaltime)