Mercurial > hg-git
annotate hggit/git2hg.py @ 795:c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
This function doesn't depend on self at all, so moving it to git2hg is
straightforward.
author | Siddharth Agarwal <sid0@fb.com> |
---|---|
date | Wed, 15 Oct 2014 16:54:50 -0700 |
parents | 1350e43e662f |
children | 7ff4913e72df |
rev | line source |
---|---|
788
e734d71cc558
git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff
changeset
|
1 # git2hg.py - convert Git repositories and commits to Mercurial ones |
e734d71cc558
git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff
changeset
|
2 |
795
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
3 import urllib |
788
e734d71cc558
git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff
changeset
|
4 from dulwich.objects import Commit, Tag |
e734d71cc558
git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff
changeset
|
5 |
e734d71cc558
git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff
changeset
|
6 def find_incoming(git_object_store, git_map, refs): |
e734d71cc558
git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff
changeset
|
7 '''find what commits need to be imported |
e734d71cc558
git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff
changeset
|
8 |
e734d71cc558
git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff
changeset
|
9 git_object_store is a dulwich object store. |
e734d71cc558
git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff
changeset
|
10 git_map is a map with keys being Git commits that have already been imported |
e734d71cc558
git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff
changeset
|
11 refs is a map of refs to SHAs that we're interested in.''' |
e734d71cc558
git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff
changeset
|
12 |
e734d71cc558
git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff
changeset
|
13 done = set() |
e734d71cc558
git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff
changeset
|
14 commit_cache = {} |
e734d71cc558
git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff
changeset
|
15 |
e734d71cc558
git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff
changeset
|
16 # sort by commit date |
e734d71cc558
git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff
changeset
|
17 def commitdate(sha): |
e734d71cc558
git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff
changeset
|
18 obj = git_object_store[sha] |
e734d71cc558
git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff
changeset
|
19 return obj.commit_time-obj.commit_timezone |
e734d71cc558
git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff
changeset
|
20 |
793
388944fca782
git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents:
792
diff
changeset
|
21 # get a list of all the head shas |
388944fca782
git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents:
792
diff
changeset
|
22 def get_heads(refs): |
388944fca782
git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents:
792
diff
changeset
|
23 todo = [] |
388944fca782
git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents:
792
diff
changeset
|
24 seenheads = set() |
388944fca782
git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents:
792
diff
changeset
|
25 for sha in refs.itervalues(): |
388944fca782
git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents:
792
diff
changeset
|
26 # refs could contain refs on the server that we haven't pulled down |
388944fca782
git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents:
792
diff
changeset
|
27 # the objects for |
388944fca782
git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents:
792
diff
changeset
|
28 if sha in git_object_store: |
388944fca782
git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents:
792
diff
changeset
|
29 obj = git_object_store[sha] |
388944fca782
git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents:
792
diff
changeset
|
30 while isinstance(obj, Tag): |
388944fca782
git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents:
792
diff
changeset
|
31 obj_type, sha = obj.object |
388944fca782
git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents:
792
diff
changeset
|
32 obj = git_object_store[sha] |
388944fca782
git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents:
792
diff
changeset
|
33 if isinstance(obj, Commit) and sha not in seenheads: |
388944fca782
git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents:
792
diff
changeset
|
34 seenheads.add(sha) |
388944fca782
git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents:
792
diff
changeset
|
35 todo.append(sha) |
388944fca782
git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents:
792
diff
changeset
|
36 |
388944fca782
git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents:
792
diff
changeset
|
37 todo.sort(key=commitdate, reverse=True) |
388944fca782
git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents:
792
diff
changeset
|
38 return todo |
388944fca782
git2hg.find_incoming: move head-finding code into a function
Siddharth Agarwal <sid0@fb.com>
parents:
792
diff
changeset
|
39 |
794
1350e43e662f
git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents:
793
diff
changeset
|
40 def get_unseen_commits(todo): |
1350e43e662f
git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents:
793
diff
changeset
|
41 '''get all unseen commits reachable from todo in topological order |
788
e734d71cc558
git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff
changeset
|
42 |
794
1350e43e662f
git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents:
793
diff
changeset
|
43 'unseen' means not reachable from the done set and not in the git map. |
1350e43e662f
git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents:
793
diff
changeset
|
44 Mutates todo and the done set in the process.''' |
1350e43e662f
git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents:
793
diff
changeset
|
45 commits = [] |
1350e43e662f
git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents:
793
diff
changeset
|
46 while todo: |
1350e43e662f
git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents:
793
diff
changeset
|
47 sha = todo[-1] |
1350e43e662f
git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents:
793
diff
changeset
|
48 if sha in done or sha in git_map: |
1350e43e662f
git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents:
793
diff
changeset
|
49 todo.pop() |
1350e43e662f
git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents:
793
diff
changeset
|
50 continue |
1350e43e662f
git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents:
793
diff
changeset
|
51 assert isinstance(sha, str) |
1350e43e662f
git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents:
793
diff
changeset
|
52 if sha in commit_cache: |
1350e43e662f
git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents:
793
diff
changeset
|
53 obj = commit_cache[sha] |
1350e43e662f
git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents:
793
diff
changeset
|
54 else: |
1350e43e662f
git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents:
793
diff
changeset
|
55 obj = git_object_store[sha] |
1350e43e662f
git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents:
793
diff
changeset
|
56 commit_cache[sha] = obj |
1350e43e662f
git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents:
793
diff
changeset
|
57 assert isinstance(obj, Commit) |
1350e43e662f
git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents:
793
diff
changeset
|
58 for p in obj.parents: |
1350e43e662f
git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents:
793
diff
changeset
|
59 if p not in done and p not in git_map: |
1350e43e662f
git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents:
793
diff
changeset
|
60 todo.append(p) |
1350e43e662f
git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents:
793
diff
changeset
|
61 # process parents of a commit before processing the |
1350e43e662f
git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents:
793
diff
changeset
|
62 # commit itself, and come back to this commit later |
1350e43e662f
git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents:
793
diff
changeset
|
63 break |
1350e43e662f
git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents:
793
diff
changeset
|
64 else: |
1350e43e662f
git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents:
793
diff
changeset
|
65 commits.append(sha) |
1350e43e662f
git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents:
793
diff
changeset
|
66 done.add(sha) |
1350e43e662f
git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents:
793
diff
changeset
|
67 todo.pop() |
1350e43e662f
git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents:
793
diff
changeset
|
68 |
1350e43e662f
git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents:
793
diff
changeset
|
69 return commits |
1350e43e662f
git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents:
793
diff
changeset
|
70 |
1350e43e662f
git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents:
793
diff
changeset
|
71 todo = get_heads(refs) |
1350e43e662f
git2hg.find_incoming: move graph traversal into a function
Siddharth Agarwal <sid0@fb.com>
parents:
793
diff
changeset
|
72 commits = get_unseen_commits(todo) |
788
e734d71cc558
git_handler: move get_git_incoming to a separate module
Siddharth Agarwal <sid0@fb.com>
parents:
diff
changeset
|
73 |
789
77416ddca136
git2hg: return a struct from find_incoming
Siddharth Agarwal <sid0@fb.com>
parents:
788
diff
changeset
|
74 return GitIncomingResult(commits, commit_cache) |
77416ddca136
git2hg: return a struct from find_incoming
Siddharth Agarwal <sid0@fb.com>
parents:
788
diff
changeset
|
75 |
77416ddca136
git2hg: return a struct from find_incoming
Siddharth Agarwal <sid0@fb.com>
parents:
788
diff
changeset
|
76 class GitIncomingResult(object): |
77416ddca136
git2hg: return a struct from find_incoming
Siddharth Agarwal <sid0@fb.com>
parents:
788
diff
changeset
|
77 '''struct to store result from find_incoming''' |
77416ddca136
git2hg: return a struct from find_incoming
Siddharth Agarwal <sid0@fb.com>
parents:
788
diff
changeset
|
78 def __init__(self, commits, commit_cache): |
77416ddca136
git2hg: return a struct from find_incoming
Siddharth Agarwal <sid0@fb.com>
parents:
788
diff
changeset
|
79 self.commits = commits |
77416ddca136
git2hg: return a struct from find_incoming
Siddharth Agarwal <sid0@fb.com>
parents:
788
diff
changeset
|
80 self.commit_cache = commit_cache |
795
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
81 |
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
82 def extract_hg_metadata(message, git_extra): |
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
83 split = message.split("\n--HG--\n", 1) |
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
84 renames = {} |
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
85 extra = {} |
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
86 branch = False |
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
87 if len(split) == 2: |
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
88 message, meta = split |
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
89 lines = meta.split("\n") |
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
90 for line in lines: |
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
91 if line == '': |
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
92 continue |
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
93 |
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
94 if ' : ' not in line: |
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
95 break |
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
96 command, data = line.split(" : ", 1) |
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
97 |
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
98 if command == 'rename': |
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
99 before, after = data.split(" => ", 1) |
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
100 renames[after] = before |
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
101 if command == 'branch': |
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
102 branch = data |
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
103 if command == 'extra': |
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
104 k, v = data.split(" : ", 1) |
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
105 extra[k] = urllib.unquote(v) |
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
106 |
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
107 git_fn = 0 |
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
108 for field, data in git_extra: |
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
109 if field.startswith('HG:'): |
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
110 command = field[3:] |
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
111 if command == 'rename': |
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
112 before, after = data.split(':', 1) |
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
113 renames[urllib.unquote(after)] = urllib.unquote(before) |
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
114 elif command == 'extra': |
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
115 k, v = data.split(':', 1) |
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
116 extra[urllib.unquote(k)] = urllib.unquote(v) |
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
117 else: |
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
118 # preserve ordering in Git by using an incrementing integer for |
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
119 # each field. Note that extra metadata in Git is an ordered list |
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
120 # of pairs. |
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
121 hg_field = 'GIT%d-%s' % (git_fn, field) |
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
122 git_fn += 1 |
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
123 extra[urllib.quote(hg_field)] = urllib.quote(data) |
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
124 |
c19835c3c60d
git_handler: move extract_hg_metadata into git2hg
Siddharth Agarwal <sid0@fb.com>
parents:
794
diff
changeset
|
125 return (message, renames, branch, extra) |