annotate git_handler.py @ 9:7e776864b301

sorts the commits topologically before converting
author Scott Chacon <schacon@gmail.com>
date Sat, 25 Apr 2009 20:56:03 -0700
parents 2548735d24ef
children 66860f141788
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
7
89992b6d2eef mapping parents properly now
Scott Chacon <schacon@gmail.com>
parents: 6
diff changeset
1 import os, errno, sys, time, datetime, pickle, copy
5
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
2 import dulwich
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
3 from dulwich.repo import Repo
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
4 from dulwich.client import SimpleFetchGraphWalker
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
5 from hgext import bookmarks
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
6 from mercurial.i18n import _
6
c77197123d95 importing basic, mostly stubbed changesets
Scott Chacon <schacon@gmail.com>
parents: 5
diff changeset
7 from mercurial.node import bin, hex, nullid
c77197123d95 importing basic, mostly stubbed changesets
Scott Chacon <schacon@gmail.com>
parents: 5
diff changeset
8 from mercurial import hg, util, context, error
5
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
9
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
10 class GitHandler(object):
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
11
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
12 def __init__(self, dest_repo, ui):
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
13 self.repo = dest_repo
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
14 self.ui = ui
7
89992b6d2eef mapping parents properly now
Scott Chacon <schacon@gmail.com>
parents: 6
diff changeset
15 self.load_git()
89992b6d2eef mapping parents properly now
Scott Chacon <schacon@gmail.com>
parents: 6
diff changeset
16 self.load_map()
89992b6d2eef mapping parents properly now
Scott Chacon <schacon@gmail.com>
parents: 6
diff changeset
17
89992b6d2eef mapping parents properly now
Scott Chacon <schacon@gmail.com>
parents: 6
diff changeset
18 def load_git(self):
5
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
19 git_dir = os.path.join(self.repo.path, 'git')
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
20 self.git = Repo(git_dir)
7
89992b6d2eef mapping parents properly now
Scott Chacon <schacon@gmail.com>
parents: 6
diff changeset
21
89992b6d2eef mapping parents properly now
Scott Chacon <schacon@gmail.com>
parents: 6
diff changeset
22 def load_map(self):
89992b6d2eef mapping parents properly now
Scott Chacon <schacon@gmail.com>
parents: 6
diff changeset
23 self._map = {}
89992b6d2eef mapping parents properly now
Scott Chacon <schacon@gmail.com>
parents: 6
diff changeset
24 if os.path.exists(self.repo.join('git-mapfile')):
89992b6d2eef mapping parents properly now
Scott Chacon <schacon@gmail.com>
parents: 6
diff changeset
25 for line in self.repo.opener('git-mapfile'):
89992b6d2eef mapping parents properly now
Scott Chacon <schacon@gmail.com>
parents: 6
diff changeset
26 gitsha, hgsha = line.strip().split(' ', 1)
89992b6d2eef mapping parents properly now
Scott Chacon <schacon@gmail.com>
parents: 6
diff changeset
27 self._map[gitsha] = hgsha
89992b6d2eef mapping parents properly now
Scott Chacon <schacon@gmail.com>
parents: 6
diff changeset
28
89992b6d2eef mapping parents properly now
Scott Chacon <schacon@gmail.com>
parents: 6
diff changeset
29 def save_map(self):
89992b6d2eef mapping parents properly now
Scott Chacon <schacon@gmail.com>
parents: 6
diff changeset
30 file = self.repo.opener('git-mapfile', 'w+')
89992b6d2eef mapping parents properly now
Scott Chacon <schacon@gmail.com>
parents: 6
diff changeset
31 for gitsha, hgsha in self._map.iteritems():
89992b6d2eef mapping parents properly now
Scott Chacon <schacon@gmail.com>
parents: 6
diff changeset
32 file.write("%s %s\n" % (gitsha, hgsha))
89992b6d2eef mapping parents properly now
Scott Chacon <schacon@gmail.com>
parents: 6
diff changeset
33 file.close()
89992b6d2eef mapping parents properly now
Scott Chacon <schacon@gmail.com>
parents: 6
diff changeset
34
5
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
35 def fetch(self, git_url):
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
36 self.ui.status(_("fetching from git url: " + git_url + "\n"))
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
37 self.export_git_objects()
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
38 self.fetch_pack(git_url)
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
39 self.import_git_objects()
7
89992b6d2eef mapping parents properly now
Scott Chacon <schacon@gmail.com>
parents: 6
diff changeset
40 self.save_map()
89992b6d2eef mapping parents properly now
Scott Chacon <schacon@gmail.com>
parents: 6
diff changeset
41
5
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
42 def fetch_pack(self, git_url):
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
43 client, path = self.get_transport_and_path(git_url)
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
44 graphwalker = SimpleFetchGraphWalker(self.git.heads().values(), self.git.get_parents)
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
45 f, commit = self.git.object_store.add_pack()
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
46 try:
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
47 determine_wants = self.git.object_store.determine_wants_all
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
48 refs = client.fetch_pack(path, determine_wants, graphwalker, f.write, sys.stdout.write)
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
49 f.close()
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
50 commit()
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
51 self.git.set_refs(refs)
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
52 except:
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
53 f.close()
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
54 raise
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
55
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
56 def import_git_objects(self):
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
57 self.ui.status(_("importing Git objects into Hg\n"))
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
58 # import heads as remote references
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
59 todo = []
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
60 done = set()
9
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
61 convert_list = {}
6
c77197123d95 importing basic, mostly stubbed changesets
Scott Chacon <schacon@gmail.com>
parents: 5
diff changeset
62
c77197123d95 importing basic, mostly stubbed changesets
Scott Chacon <schacon@gmail.com>
parents: 5
diff changeset
63 # get a list of all the head shas
5
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
64 for head, sha in self.git.heads().iteritems():
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
65 todo.append(sha)
6
c77197123d95 importing basic, mostly stubbed changesets
Scott Chacon <schacon@gmail.com>
parents: 5
diff changeset
66
c77197123d95 importing basic, mostly stubbed changesets
Scott Chacon <schacon@gmail.com>
parents: 5
diff changeset
67 # traverse the heads getting a list of all the unique commits
c77197123d95 importing basic, mostly stubbed changesets
Scott Chacon <schacon@gmail.com>
parents: 5
diff changeset
68 # TODO : stop when we hit a SHA we've already imported
5
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
69 while todo:
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
70 sha = todo.pop()
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
71 assert isinstance(sha, str)
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
72 if sha in done:
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
73 continue
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
74 done.add(sha)
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
75 commit = self.git.commit(sha)
9
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
76 convert_list[sha] = commit
5
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
77 todo.extend([p for p in commit.parents if p not in done])
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
78
6
c77197123d95 importing basic, mostly stubbed changesets
Scott Chacon <schacon@gmail.com>
parents: 5
diff changeset
79 # sort the commits by commit date (TODO: change to topo sort)
9
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
80 commits = TopoSort(convert_list).items()
6
c77197123d95 importing basic, mostly stubbed changesets
Scott Chacon <schacon@gmail.com>
parents: 5
diff changeset
81
c77197123d95 importing basic, mostly stubbed changesets
Scott Chacon <schacon@gmail.com>
parents: 5
diff changeset
82 # import each of the commits, oldest first
9
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
83 for csha in commits:
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
84 commit = convert_list[csha]
5
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
85 self.import_git_commit(commit)
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
86
6
c77197123d95 importing basic, mostly stubbed changesets
Scott Chacon <schacon@gmail.com>
parents: 5
diff changeset
87 # TODO : update Hg bookmarks (possibly named heads?)
5
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
88 print bookmarks.parse(self.repo)
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
89
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
90 def import_git_commit(self, commit):
8
2548735d24ef will now more or less correctly determine a changelist from a git commit
Scott Chacon <schacon@gmail.com>
parents: 7
diff changeset
91 print "importing: " + commit.id
2548735d24ef will now more or less correctly determine a changelist from a git commit
Scott Chacon <schacon@gmail.com>
parents: 7
diff changeset
92
7
89992b6d2eef mapping parents properly now
Scott Chacon <schacon@gmail.com>
parents: 6
diff changeset
93 # TODO : have to handle merge contexts at some point (two parent files, etc)
6
c77197123d95 importing basic, mostly stubbed changesets
Scott Chacon <schacon@gmail.com>
parents: 5
diff changeset
94 def getfilectx(repo, memctx, f):
8
2548735d24ef will now more or less correctly determine a changelist from a git commit
Scott Chacon <schacon@gmail.com>
parents: 7
diff changeset
95 (e, sha, data) = self.git.get_file(commit, f)
2548735d24ef will now more or less correctly determine a changelist from a git commit
Scott Chacon <schacon@gmail.com>
parents: 7
diff changeset
96 e = '' # TODO : make this a real mode
6
c77197123d95 importing basic, mostly stubbed changesets
Scott Chacon <schacon@gmail.com>
parents: 5
diff changeset
97 return context.memfilectx(f, data, 'l' in e, 'x' in e, None)
7
89992b6d2eef mapping parents properly now
Scott Chacon <schacon@gmail.com>
parents: 6
diff changeset
98
6
c77197123d95 importing basic, mostly stubbed changesets
Scott Chacon <schacon@gmail.com>
parents: 5
diff changeset
99 p1 = "0" * 40
c77197123d95 importing basic, mostly stubbed changesets
Scott Chacon <schacon@gmail.com>
parents: 5
diff changeset
100 p2 = "0" * 40
7
89992b6d2eef mapping parents properly now
Scott Chacon <schacon@gmail.com>
parents: 6
diff changeset
101 # TODO : do something if parent is not mapped yet!
89992b6d2eef mapping parents properly now
Scott Chacon <schacon@gmail.com>
parents: 6
diff changeset
102 if len(commit.parents) > 0:
89992b6d2eef mapping parents properly now
Scott Chacon <schacon@gmail.com>
parents: 6
diff changeset
103 sha = commit.parents[0]
89992b6d2eef mapping parents properly now
Scott Chacon <schacon@gmail.com>
parents: 6
diff changeset
104 p1 = self._map[sha]
89992b6d2eef mapping parents properly now
Scott Chacon <schacon@gmail.com>
parents: 6
diff changeset
105 if len(commit.parents) > 1:
89992b6d2eef mapping parents properly now
Scott Chacon <schacon@gmail.com>
parents: 6
diff changeset
106 sha = commit.parents[1]
89992b6d2eef mapping parents properly now
Scott Chacon <schacon@gmail.com>
parents: 6
diff changeset
107 p2 = self._map[sha]
89992b6d2eef mapping parents properly now
Scott Chacon <schacon@gmail.com>
parents: 6
diff changeset
108 if len(commit.parents) > 2:
89992b6d2eef mapping parents properly now
Scott Chacon <schacon@gmail.com>
parents: 6
diff changeset
109 # TODO : map extra parents to the extras file
89992b6d2eef mapping parents properly now
Scott Chacon <schacon@gmail.com>
parents: 6
diff changeset
110 pass
6
c77197123d95 importing basic, mostly stubbed changesets
Scott Chacon <schacon@gmail.com>
parents: 5
diff changeset
111
8
2548735d24ef will now more or less correctly determine a changelist from a git commit
Scott Chacon <schacon@gmail.com>
parents: 7
diff changeset
112 files = self.git.get_files_changed(commit)
9
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
113 #print files
6
c77197123d95 importing basic, mostly stubbed changesets
Scott Chacon <schacon@gmail.com>
parents: 5
diff changeset
114
c77197123d95 importing basic, mostly stubbed changesets
Scott Chacon <schacon@gmail.com>
parents: 5
diff changeset
115 # get a list of the changed, added, removed files
7
89992b6d2eef mapping parents properly now
Scott Chacon <schacon@gmail.com>
parents: 6
diff changeset
116 extra = {}
6
c77197123d95 importing basic, mostly stubbed changesets
Scott Chacon <schacon@gmail.com>
parents: 5
diff changeset
117 text = commit.message
7
89992b6d2eef mapping parents properly now
Scott Chacon <schacon@gmail.com>
parents: 6
diff changeset
118 date = datetime.datetime.fromtimestamp(commit.author_time).strftime("%Y-%m-%d %H:%M:%S")
6
c77197123d95 importing basic, mostly stubbed changesets
Scott Chacon <schacon@gmail.com>
parents: 5
diff changeset
119 ctx = context.memctx(self.repo, (p1, p2), text, files, getfilectx,
c77197123d95 importing basic, mostly stubbed changesets
Scott Chacon <schacon@gmail.com>
parents: 5
diff changeset
120 commit.author, date, extra)
c77197123d95 importing basic, mostly stubbed changesets
Scott Chacon <schacon@gmail.com>
parents: 5
diff changeset
121 a = self.repo.commitctx(ctx)
7
89992b6d2eef mapping parents properly now
Scott Chacon <schacon@gmail.com>
parents: 6
diff changeset
122
89992b6d2eef mapping parents properly now
Scott Chacon <schacon@gmail.com>
parents: 6
diff changeset
123 # get changeset id
89992b6d2eef mapping parents properly now
Scott Chacon <schacon@gmail.com>
parents: 6
diff changeset
124 p2 = hex(self.repo.changelog.tip())
89992b6d2eef mapping parents properly now
Scott Chacon <schacon@gmail.com>
parents: 6
diff changeset
125 # save changeset to mapping file
89992b6d2eef mapping parents properly now
Scott Chacon <schacon@gmail.com>
parents: 6
diff changeset
126 gitsha = commit.id
89992b6d2eef mapping parents properly now
Scott Chacon <schacon@gmail.com>
parents: 6
diff changeset
127 self._map[gitsha] = p2
6
c77197123d95 importing basic, mostly stubbed changesets
Scott Chacon <schacon@gmail.com>
parents: 5
diff changeset
128
c77197123d95 importing basic, mostly stubbed changesets
Scott Chacon <schacon@gmail.com>
parents: 5
diff changeset
129 def getfilectx(self, source, repo, memctx, f):
c77197123d95 importing basic, mostly stubbed changesets
Scott Chacon <schacon@gmail.com>
parents: 5
diff changeset
130 v = files[f]
c77197123d95 importing basic, mostly stubbed changesets
Scott Chacon <schacon@gmail.com>
parents: 5
diff changeset
131 data = source.getfile(f, v)
c77197123d95 importing basic, mostly stubbed changesets
Scott Chacon <schacon@gmail.com>
parents: 5
diff changeset
132 e = source.getmode(f, v)
c77197123d95 importing basic, mostly stubbed changesets
Scott Chacon <schacon@gmail.com>
parents: 5
diff changeset
133 return context.memfilectx(f, data, 'l' in e, 'x' in e, copies.get(f))
5
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
134
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
135 def export_git_objects(self):
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
136 pass
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
137
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
138 def check_bookmarks(self):
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
139 if self.ui.config('extensions', 'hgext.bookmarks') is not None:
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
140 print "YOU NEED TO SETUP BOOKMARKS"
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
141
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
142 def get_transport_and_path(self, uri):
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
143 from dulwich.client import TCPGitClient, SSHGitClient, SubprocessGitClient
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
144 for handler, transport in (("git://", TCPGitClient), ("git+ssh://", SSHGitClient)):
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
145 if uri.startswith(handler):
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
146 host, path = uri[len(handler):].split("/", 1)
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
147 return transport(host), "/"+path
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
148 # if its not git or git+ssh, try a local url..
d6c443a91b18 refactored the git handling stuff out into another class
Scott Chacon <schacon@gmail.com>
parents:
diff changeset
149 return SubprocessGitClient(), uri
9
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
150
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
151
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
152 """
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
153 Tarjan's algorithm and topological sorting implementation in Python
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
154 by Paul Harrison
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
155 Public domain, do with it as you will
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
156 """
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
157 class TopoSort(object):
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
158
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
159 def __init__(self, commitdict):
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
160 self._sorted = self.robust_topological_sort(commitdict)
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
161 self._shas = []
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
162 for level in self._sorted:
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
163 self._shas.append(level[0])
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
164
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
165 def items(self):
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
166 self._shas.reverse()
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
167 return self._shas
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
168
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
169 def strongly_connected_components(self, graph):
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
170 """ Find the strongly connected components in a graph using
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
171 Tarjan's algorithm.
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
172
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
173 graph should be a dictionary mapping node names to
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
174 lists of successor nodes.
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
175 """
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
176
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
177 result = [ ]
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
178 stack = [ ]
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
179 low = { }
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
180
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
181 def visit(node):
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
182 if node in low: return
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
183
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
184 num = len(low)
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
185 low[node] = num
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
186 stack_pos = len(stack)
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
187 stack.append(node)
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
188
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
189 for successor in graph[node].parents:
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
190 visit(successor)
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
191 low[node] = min(low[node], low[successor])
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
192
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
193 if num == low[node]:
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
194 component = tuple(stack[stack_pos:])
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
195 del stack[stack_pos:]
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
196 result.append(component)
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
197 for item in component:
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
198 low[item] = len(graph)
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
199
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
200 for node in graph:
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
201 visit(node)
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
202
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
203 return result
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
204
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
205
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
206 def topological_sort(self, graph):
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
207 count = { }
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
208 for node in graph:
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
209 count[node] = 0
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
210 for node in graph:
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
211 for successor in graph[node]:
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
212 count[successor] += 1
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
213
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
214 ready = [ node for node in graph if count[node] == 0 ]
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
215
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
216 result = [ ]
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
217 while ready:
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
218 node = ready.pop(-1)
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
219 result.append(node)
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
220
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
221 for successor in graph[node]:
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
222 count[successor] -= 1
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
223 if count[successor] == 0:
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
224 ready.append(successor)
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
225
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
226 return result
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
227
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
228
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
229 def robust_topological_sort(self, graph):
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
230 """ First identify strongly connected components,
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
231 then perform a topological sort on these components. """
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
232
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
233 components = self.strongly_connected_components(graph)
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
234
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
235 node_component = { }
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
236 for component in components:
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
237 for node in component:
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
238 node_component[node] = component
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
239
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
240 component_graph = { }
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
241 for component in components:
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
242 component_graph[component] = [ ]
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
243
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
244 for node in graph:
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
245 node_c = node_component[node]
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
246 for successor in graph[node].parents:
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
247 successor_c = node_component[successor]
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
248 if node_c != successor_c:
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
249 component_graph[node_c].append(successor_c)
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
250
7e776864b301 sorts the commits topologically before converting
Scott Chacon <schacon@gmail.com>
parents: 8
diff changeset
251 return self.topological_sort(component_graph)