annotate hggit/git2hg.py @ 794:1350e43e662f

git2hg.find_incoming: move graph traversal into a function This is preparation for upcoming changes to find_incoming that will allow it to import certain Git branches as Mercurial named branches.
author Siddharth Agarwal <sid0@fb.com>
date Wed, 15 Oct 2014 14:21:09 -0700
parents 388944fca782
children c19835c3c60d
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
788
e734d71cc558 git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff changeset
1 # git2hg.py - convert Git repositories and commits to Mercurial ones
e734d71cc558 git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff changeset
2
e734d71cc558 git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff changeset
3 from dulwich.objects import Commit, Tag
e734d71cc558 git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff changeset
4
e734d71cc558 git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff changeset
5 def find_incoming(git_object_store, git_map, refs):
e734d71cc558 git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff changeset
6 '''find what commits need to be imported
e734d71cc558 git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff changeset
7
e734d71cc558 git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff changeset
8 git_object_store is a dulwich object store.
e734d71cc558 git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff changeset
9 git_map is a map with keys being Git commits that have already been imported
e734d71cc558 git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff changeset
10 refs is a map of refs to SHAs that we're interested in.'''
e734d71cc558 git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff changeset
11
e734d71cc558 git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff changeset
12 done = set()
e734d71cc558 git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff changeset
13 commit_cache = {}
e734d71cc558 git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff changeset
14
e734d71cc558 git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff changeset
15 # sort by commit date
e734d71cc558 git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff changeset
16 def commitdate(sha):
e734d71cc558 git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff changeset
17 obj = git_object_store[sha]
e734d71cc558 git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff changeset
18 return obj.commit_time-obj.commit_timezone
e734d71cc558 git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff changeset
19
793
388944fca782 git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents: 792
diff changeset
20 # get a list of all the head shas
388944fca782 git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents: 792
diff changeset
21 def get_heads(refs):
388944fca782 git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents: 792
diff changeset
22 todo = []
388944fca782 git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents: 792
diff changeset
23 seenheads = set()
388944fca782 git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents: 792
diff changeset
24 for sha in refs.itervalues():
388944fca782 git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents: 792
diff changeset
25 # refs could contain refs on the server that we haven't pulled down
388944fca782 git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents: 792
diff changeset
26 # the objects for
388944fca782 git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents: 792
diff changeset
27 if sha in git_object_store:
388944fca782 git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents: 792
diff changeset
28 obj = git_object_store[sha]
388944fca782 git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents: 792
diff changeset
29 while isinstance(obj, Tag):
388944fca782 git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents: 792
diff changeset
30 obj_type, sha = obj.object
388944fca782 git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents: 792
diff changeset
31 obj = git_object_store[sha]
388944fca782 git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents: 792
diff changeset
32 if isinstance(obj, Commit) and sha not in seenheads:
388944fca782 git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents: 792
diff changeset
33 seenheads.add(sha)
388944fca782 git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents: 792
diff changeset
34 todo.append(sha)
388944fca782 git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents: 792
diff changeset
35
388944fca782 git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents: 792
diff changeset
36 todo.sort(key=commitdate, reverse=True)
388944fca782 git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents: 792
diff changeset
37 return todo
388944fca782 git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents: 792
diff changeset
38
794
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
39 def get_unseen_commits(todo):
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
40 '''get all unseen commits reachable from todo in topological order
788
e734d71cc558 git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff changeset
41
794
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
42 'unseen' means not reachable from the done set and not in the git map.
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
43 Mutates todo and the done set in the process.'''
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
44 commits = []
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
45 while todo:
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
46 sha = todo[-1]
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
47 if sha in done or sha in git_map:
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
48 todo.pop()
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
49 continue
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
50 assert isinstance(sha, str)
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
51 if sha in commit_cache:
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
52 obj = commit_cache[sha]
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
53 else:
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
54 obj = git_object_store[sha]
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
55 commit_cache[sha] = obj
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
56 assert isinstance(obj, Commit)
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
57 for p in obj.parents:
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
58 if p not in done and p not in git_map:
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
59 todo.append(p)
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
60 # process parents of a commit before processing the
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
61 # commit itself, and come back to this commit later
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
62 break
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
63 else:
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
64 commits.append(sha)
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
65 done.add(sha)
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
66 todo.pop()
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
67
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
68 return commits
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
69
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
70 todo = get_heads(refs)
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
71 commits = get_unseen_commits(todo)
788
e734d71cc558 git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff changeset
72
789
77416ddca136 git2hg: return a struct from find_incoming
Siddharth Agarwal <sid0@fb.com>
parents: 788
diff changeset
73 return GitIncomingResult(commits, commit_cache)
77416ddca136 git2hg: return a struct from find_incoming
Siddharth Agarwal <sid0@fb.com>
parents: 788
diff changeset
74
77416ddca136 git2hg: return a struct from find_incoming
Siddharth Agarwal <sid0@fb.com>
parents: 788
diff changeset
75 class GitIncomingResult(object):
77416ddca136 git2hg: return a struct from find_incoming
Siddharth Agarwal <sid0@fb.com>
parents: 788
diff changeset
76 '''struct to store result from find_incoming'''
77416ddca136 git2hg: return a struct from find_incoming
Siddharth Agarwal <sid0@fb.com>
parents: 788
diff changeset
77 def __init__(self, commits, commit_cache):
77416ddca136 git2hg: return a struct from find_incoming
Siddharth Agarwal <sid0@fb.com>
parents: 788
diff changeset
78 self.commits = commits
77416ddca136 git2hg: return a struct from find_incoming
Siddharth Agarwal <sid0@fb.com>
parents: 788
diff changeset
79 self.commit_cache = commit_cache