annotate hggit/git2hg.py @ 795:c19835c3c60d

git_handler: move extract_hg_metadata into git2hg This function doesn't depend on self at all, so moving it to git2hg is straightforward.
author Siddharth Agarwal <sid0@fb.com>
date Wed, 15 Oct 2014 16:54:50 -0700
parents 1350e43e662f
children 7ff4913e72df
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
788
e734d71cc558 git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff changeset
1 # git2hg.py - convert Git repositories and commits to Mercurial ones
e734d71cc558 git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff changeset
2
795
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
3 import urllib
788
e734d71cc558 git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff changeset
4 from dulwich.objects import Commit, Tag
e734d71cc558 git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff changeset
5
e734d71cc558 git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff changeset
6 def find_incoming(git_object_store, git_map, refs):
e734d71cc558 git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff changeset
7 '''find what commits need to be imported
e734d71cc558 git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff changeset
8
e734d71cc558 git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff changeset
9 git_object_store is a dulwich object store.
e734d71cc558 git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff changeset
10 git_map is a map with keys being Git commits that have already been imported
e734d71cc558 git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff changeset
11 refs is a map of refs to SHAs that we're interested in.'''
e734d71cc558 git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff changeset
12
e734d71cc558 git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff changeset
13 done = set()
e734d71cc558 git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff changeset
14 commit_cache = {}
e734d71cc558 git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff changeset
15
e734d71cc558 git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff changeset
16 # sort by commit date
e734d71cc558 git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff changeset
17 def commitdate(sha):
e734d71cc558 git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff changeset
18 obj = git_object_store[sha]
e734d71cc558 git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff changeset
19 return obj.commit_time-obj.commit_timezone
e734d71cc558 git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff changeset
20
793
388944fca782 git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents: 792
diff changeset
21 # get a list of all the head shas
388944fca782 git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents: 792
diff changeset
22 def get_heads(refs):
388944fca782 git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents: 792
diff changeset
23 todo = []
388944fca782 git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents: 792
diff changeset
24 seenheads = set()
388944fca782 git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents: 792
diff changeset
25 for sha in refs.itervalues():
388944fca782 git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents: 792
diff changeset
26 # refs could contain refs on the server that we haven't pulled down
388944fca782 git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents: 792
diff changeset
27 # the objects for
388944fca782 git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents: 792
diff changeset
28 if sha in git_object_store:
388944fca782 git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents: 792
diff changeset
29 obj = git_object_store[sha]
388944fca782 git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents: 792
diff changeset
30 while isinstance(obj, Tag):
388944fca782 git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents: 792
diff changeset
31 obj_type, sha = obj.object
388944fca782 git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents: 792
diff changeset
32 obj = git_object_store[sha]
388944fca782 git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents: 792
diff changeset
33 if isinstance(obj, Commit) and sha not in seenheads:
388944fca782 git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents: 792
diff changeset
34 seenheads.add(sha)
388944fca782 git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents: 792
diff changeset
35 todo.append(sha)
388944fca782 git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents: 792
diff changeset
36
388944fca782 git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents: 792
diff changeset
37 todo.sort(key=commitdate, reverse=True)
388944fca782 git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents: 792
diff changeset
38 return todo
388944fca782 git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents: 792
diff changeset
39
794
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
40 def get_unseen_commits(todo):
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
41 '''get all unseen commits reachable from todo in topological order
788
e734d71cc558 git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff changeset
42
794
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
43 'unseen' means not reachable from the done set and not in the git map.
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
44 Mutates todo and the done set in the process.'''
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
45 commits = []
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
46 while todo:
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
47 sha = todo[-1]
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
48 if sha in done or sha in git_map:
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
49 todo.pop()
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
50 continue
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
51 assert isinstance(sha, str)
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
52 if sha in commit_cache:
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
53 obj = commit_cache[sha]
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
54 else:
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
55 obj = git_object_store[sha]
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
56 commit_cache[sha] = obj
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
57 assert isinstance(obj, Commit)
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
58 for p in obj.parents:
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
59 if p not in done and p not in git_map:
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
60 todo.append(p)
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
61 # process parents of a commit before processing the
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
62 # commit itself, and come back to this commit later
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
63 break
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
64 else:
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
65 commits.append(sha)
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
66 done.add(sha)
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
67 todo.pop()
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
68
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
69 return commits
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
70
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
71 todo = get_heads(refs)
1350e43e662f git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents: 793
diff changeset
72 commits = get_unseen_commits(todo)
788
e734d71cc558 git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff changeset
73
789
77416ddca136 git2hg: return a struct from find_incoming
Siddharth Agarwal <sid0@fb.com>
parents: 788
diff changeset
74 return GitIncomingResult(commits, commit_cache)
77416ddca136 git2hg: return a struct from find_incoming
Siddharth Agarwal <sid0@fb.com>
parents: 788
diff changeset
75
77416ddca136 git2hg: return a struct from find_incoming
Siddharth Agarwal <sid0@fb.com>
parents: 788
diff changeset
76 class GitIncomingResult(object):
77416ddca136 git2hg: return a struct from find_incoming
Siddharth Agarwal <sid0@fb.com>
parents: 788
diff changeset
77 '''struct to store result from find_incoming'''
77416ddca136 git2hg: return a struct from find_incoming
Siddharth Agarwal <sid0@fb.com>
parents: 788
diff changeset
78 def __init__(self, commits, commit_cache):
77416ddca136 git2hg: return a struct from find_incoming
Siddharth Agarwal <sid0@fb.com>
parents: 788
diff changeset
79 self.commits = commits
77416ddca136 git2hg: return a struct from find_incoming
Siddharth Agarwal <sid0@fb.com>
parents: 788
diff changeset
80 self.commit_cache = commit_cache
795
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
81
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
82 def extract_hg_metadata(message, git_extra):
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
83 split = message.split("\n--HG--\n", 1)
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
84 renames = {}
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
85 extra = {}
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
86 branch = False
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
87 if len(split) == 2:
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
88 message, meta = split
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
89 lines = meta.split("\n")
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
90 for line in lines:
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
91 if line == '':
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
92 continue
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
93
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
94 if ' : ' not in line:
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
95 break
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
96 command, data = line.split(" : ", 1)
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
97
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
98 if command == 'rename':
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
99 before, after = data.split(" => ", 1)
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
100 renames[after] = before
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
101 if command == 'branch':
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
102 branch = data
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
103 if command == 'extra':
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
104 k, v = data.split(" : ", 1)
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
105 extra[k] = urllib.unquote(v)
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
106
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
107 git_fn = 0
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
108 for field, data in git_extra:
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
109 if field.startswith('HG:'):
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
110 command = field[3:]
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
111 if command == 'rename':
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
112 before, after = data.split(':', 1)
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
113 renames[urllib.unquote(after)] = urllib.unquote(before)
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
114 elif command == 'extra':
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
115 k, v = data.split(':', 1)
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
116 extra[urllib.unquote(k)] = urllib.unquote(v)
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
117 else:
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
118 # preserve ordering in Git by using an incrementing integer for
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
119 # each field. Note that extra metadata in Git is an ordered list
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
120 # of pairs.
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
121 hg_field = 'GIT%d-%s' % (git_fn, field)
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
122 git_fn += 1
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
123 extra[urllib.quote(hg_field)] = urllib.quote(data)
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
124
c19835c3c60d git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents: 794
diff changeset
125 return (message, renames, branch, extra)