Mercurial > hg-git
view git_handler.py @ 215:b5d4d1552765
add some annotations for test coverage
author | Abderrahim Kitouni <a.kitouni@gmail.com> |
---|---|
date | Tue, 07 Jul 2009 17:46:14 +0100 |
parents | 61471faeb7fd |
children | 8d961ea5fc5a |
line wrap: on
line source
import os, sys, math, urllib, re import toposort from dulwich.repo import Repo from dulwich.client import SimpleFetchGraphWalker from hgext import bookmarks from mercurial.i18n import _ from mercurial.node import hex, bin, nullid from mercurial import context from dulwich.misc import make_sha from dulwich.objects import ( Blob, Commit, Tag, Tree, format_timezone, ) class GitHandler(object): def __init__(self, dest_repo, ui): self.repo = dest_repo self.ui = ui self.mapfile = 'git-mapfile' self.tagsfile = 'git-tags' if ui.config('git', 'intree'): self.gitdir = self.repo.wjoin('.git') else: self.gitdir = self.repo.join('git') self.paths = ui.configitems('paths') self.init_if_missing() self.load_git() self.load_map() self.load_tags() # make the git data directory def init_if_missing(self): if not os.path.exists(self.gitdir): os.mkdir(self.gitdir) Repo.init_bare(self.gitdir) def load_git(self): self.git = Repo(self.gitdir) ## FILE LOAD AND SAVE METHODS def map_set(self, gitsha, hgsha): self._map_git[gitsha] = hgsha self._map_hg[hgsha] = gitsha def map_hg_get(self, gitsha): return self._map_git.get(gitsha) def map_git_get(self, hgsha): return self._map_hg.get(hgsha) def load_map(self): self._map_git = {} self._map_hg = {} if os.path.exists(self.repo.join(self.mapfile)): for line in self.repo.opener(self.mapfile): gitsha, hgsha = line.strip().split(' ', 1) self._map_git[gitsha] = hgsha self._map_hg[hgsha] = gitsha def save_map(self): file = self.repo.opener(self.mapfile, 'w+', atomictemp=True) for gitsha, hgsha in sorted(self._map_git.iteritems()): file.write("%s %s\n" % (gitsha, hgsha)) file.rename() def load_tags(self): self.tags = {} if os.path.exists(self.repo.join(self.tagsfile)): for line in self.repo.opener(self.tagsfile): sha, name = line.strip().split(' ', 1) self.tags[name] = sha def save_tags(self): file = self.repo.opener(self.tagsfile, 'w+', atomictemp=True) for name, sha in sorted(self.tags.iteritems()): if not self.repo.tagtype(name) == 'global': file.write("%s %s\n" % (sha, name)) file.rename() ## END FILE LOAD AND SAVE METHODS ## COMMANDS METHODS def import_commits(self, remote_name): self.import_git_objects(remote_name) self.save_map() def fetch(self, remote): self.ui.status(_("fetching from : %s\n") % remote) self.export_git_objects() refs = self.fetch_pack(remote) remote_name = self.remote_name(remote) if refs: self.import_git_objects(remote_name, refs) self.import_tags(refs) self.update_hg_bookmarks(refs) if remote_name: self.update_remote_branches(remote_name, refs) elif not self.paths: # intial cloning self.update_remote_branches('default', refs) else: self.ui.status(_("nothing new on the server\n")) self.save_map() def export_commits(self, export_objects=True): if export_objects: self.export_git_objects() self.export_hg_tags() self.update_references() self.save_map() def push(self, remote): self.fetch(remote) # get and convert objects if they already exist self.ui.status(_("pushing to : %s\n") % remote) self.export_commits(False) changed_refs = self.upload_pack(remote) remote_name = self.remote_name(remote) if remote_name and changed_refs: for ref, sha in changed_refs.iteritems(): self.ui.status(" "+ remote_name + "::" + ref + " => GIT:" + sha[0:8] + "\n") self.update_remote_branches(remote_name, changed_refs) def clear(self): mapfile = self.repo.join(self.mapfile) if os.path.exists(self.gitdir): for root, dirs, files in os.walk(self.gitdir, topdown=False): for name in files: os.remove(os.path.join(root, name)) for name in dirs: os.rmdir(os.path.join(root, name)) os.rmdir(self.gitdir) if os.path.exists(mapfile): os.remove(mapfile) ## CHANGESET CONVERSION METHODS def export_git_objects(self): self.previous_entries = {} self.written_trees = {} self.ui.status(_("importing Hg objects into Git\n")) nodes = [self.repo.lookup(n) for n in self.repo] export = [node for node in nodes if not hex(node) in self._map_hg] total = len(export) if total: magnitude = int(math.log(total, 10)) + 1 else: magnitude = 1 for i, rev in enumerate(export): if i%100 == 0: self.ui.status(_("at: %*d/%d\n") % (magnitude, i, total)) ctx = self.repo.changectx(rev) state = ctx.extra().get('hg-git', None) if state == 'octopus': self.ui.debug("revision %d is a part of octopus explosion\n" % ctx.rev()) continue self.export_hg_commit(rev) self.save_map() # convert this commit into git objects # go through the manifest, convert all blobs/trees we don't have # write the commit object (with metadata info) def export_hg_commit(self, rev): def is_octopus_part(ctx): return ctx.extra().get('hg-git', None) in set(['octopus', 'octopus-done']) self.ui.note(_("converting revision %s\n") % rev) oldenc = self.swap_out_encoding() # make sure parents are converted first ctx = self.repo.changectx(rev) extra = ctx.extra() parents = [] if extra.get('hg-git', None) == 'octopus-done': # implode octopus parents part = ctx while is_octopus_part(part): (p1, p2) = part.parents() assert not is_octopus_part(p1) parents.append(p1) part = p2 parents.append(p2) else: parents = ctx.parents() for parent in parents: p_node = parent.node() if p_node != nullid and not hex(p_node) in self._map_hg: self.export_hg_commit(p_rev) tree_sha, renames = self.write_git_tree(ctx) commit = {} commit['tree'] = tree_sha (time, timezone) = ctx.date() if 'author' in extra: author = extra['author'] else: # hg authors might not have emails author = ctx.user() # check for git author pattern compliance regex = re.compile('^(.*?) \<(.*?)\>(.*)$') a = regex.match(author) if a: name = a.group(1) email = a.group(2) if len(a.group(3)) > 0: name += ' ext:(' + urllib.quote(a.group(3)) + ')' author = name + ' <' + email + '>' else: author = author + ' <none@none>' commit['author'] = author + ' ' + str(int(time)) + ' ' + format_timezone(-timezone) if 'message' in extra: commit['message'] = extra['message'] else: message = ctx.description() commit['message'] = ctx.description() + "\n" if 'committer' in extra: # fixup timezone (name_timestamp, timezone) = extra['committer'].rsplit(' ', 1) try: timezone = format_timezone(-int(timezone)) commit['committer'] = '%s %s' % (name_timestamp, timezone) except ValueError: #pragma: no cover self.ui.warn(_("Ignoring committer in extra, invalid timezone in r%d: '%s'.\n") % (ctx, timezone)) if 'encoding' in extra: commit['encoding'] = extra['encoding'] # HG EXTRA INFORMATION add_extras = False extra_message = '' if not ctx.branch() == 'default': add_extras = True extra_message += "branch : " + ctx.branch() + "\n" if renames: add_extras = True for oldfile, newfile in renames: extra_message += "rename : " + oldfile + " => " + newfile + "\n" for key, value in extra.iteritems(): if key in ('author', 'committer', 'encoding', 'message', 'branch', 'hg-git'): continue else: add_extras = True extra_message += "extra : " + key + " : " + urllib.quote(value) + "\n" if add_extras: commit['message'] += "\n--HG--\n" + extra_message commit['parents'] = [] for parent in parents: hgsha = hex(parent.node()) git_sha = self.map_git_get(hgsha) if git_sha: commit['parents'].append(git_sha) commit_sha = self.git.write_commit_hash(commit) # writing new blobs to git self.map_set(commit_sha, ctx.hex()) self.swap_out_encoding(oldenc) return commit_sha def write_git_tree(self, ctx): trees = {} man = ctx.manifest() ctx_id = hex(ctx.node()) renames = [] for filenm, nodesha in man.iteritems(): file_id = hex(nodesha) if ctx_id not in self.previous_entries: self.previous_entries[ctx_id] = {} self.previous_entries[ctx_id][filenm] = file_id # write blob if not in our git database fctx = ctx.filectx(filenm) same_as_last = False for par in ctx.parents(): par_id = hex(par.node()) if par_id in self.previous_entries: if filenm in self.previous_entries[par_id]: if self.previous_entries[par_id][filenm] == file_id: same_as_last = True if not same_as_last: rename = fctx.renamed() if rename: filerename, sha = rename renames.append((filerename, filenm)) is_exec = 'x' in fctx.flags() is_link = 'l' in fctx.flags() blob_sha = self.map_git_get(file_id) if not blob_sha: blob_sha = self.git.write_blob(fctx.data()) # writing new blobs to git self.map_set(blob_sha, file_id) parts = filenm.split('/') if len(parts) > 1: # get filename and path for leading subdir filepath = parts[-1:][0] dirpath = "/".join([v for v in parts[0:-1]]) + '/' # get subdir name and path for parent dir parpath = '/' nparpath = '/' for part in parts[0:-1]: if nparpath == '/': nparpath = part + '/' else: nparpath += part + '/' treeentry = ['tree', part + '/', nparpath] if parpath not in trees: trees[parpath] = [] if treeentry not in trees[parpath]: trees[parpath].append( treeentry ) parpath = nparpath # set file entry fileentry = ['blob', filepath, blob_sha, is_exec, is_link] if dirpath not in trees: trees[dirpath] = [] trees[dirpath].append(fileentry) else: fileentry = ['blob', parts[0], blob_sha, is_exec, is_link] if '/' not in trees: trees['/'] = [] trees['/'].append(fileentry) dirs = trees.keys() if dirs: # sort by tree depth, so we write the deepest trees first dirs.sort(lambda a, b: len(b.split('/'))-len(a.split('/'))) dirs.remove('/') dirs.append('/') else: # manifest is empty => make empty root tree trees['/'] = [] dirs = ['/'] # write all the trees tree_sha = None tree_shas = {} for dirnm in dirs: tree_data = [] sha_group = [] # calculating a sha for the tree, so we don't write it twice listsha = make_sha() for entry in trees[dirnm]: # replace tree path with tree SHA if entry[0] == 'tree': sha = tree_shas[entry[2]] entry[2] = sha listsha.update(entry[1]) listsha.update(entry[2]) tree_data.append(entry) listsha = listsha.hexdigest() if listsha in self.written_trees: tree_sha = self.written_trees[listsha] tree_shas[dirnm] = tree_sha else: tree_sha = self.git.write_tree_array(tree_data) # writing new trees to git tree_shas[dirnm] = tree_sha self.written_trees[listsha] = tree_sha return (tree_sha, renames) # should be the last root tree sha def import_git_objects(self, remote_name=None, refs=None): self.ui.status(_("importing Git objects into Hg\n")) # import heads and fetched tags as remote references todo = [] done = set() convert_list = {} # get a list of all the head shas if refs: for head, sha in refs.iteritems(): todo.append(sha) elif remote_name: todo = self.git.remote_refs(remote_name).values()[:] else: todo = self.git.heads().values()[:] # traverse the heads getting a list of all the unique commits while todo: sha = todo.pop() assert isinstance(sha, str) if sha in done: continue done.add(sha) obj = self.git.get_object(sha) if isinstance (obj, Commit): convert_list[sha] = obj todo.extend([p for p in obj.parents if p not in done]) if isinstance(obj, Tag): (obj_type, obj_sha) = obj.get_object() obj = self.git.get_object(obj_sha) if isinstance (obj, Commit): convert_list[sha] = obj todo.extend([p for p in obj.parents if p not in done]) # sort the commits commits = toposort.TopoSort(convert_list).items() commits = [commit for commit in commits if not commit in self._map_git] # import each of the commits, oldest first total = len(commits) magnitude = int(math.log(total, 10)) + 1 if total else 1 for i, csha in enumerate(commits): if i%100 == 0: self.ui.status(_("at: %*d/%d\n") % (magnitude, i, total)) commit = convert_list[csha] self.import_git_commit(commit) def import_git_commit(self, commit): self.ui.debug(_("importing: %s\n") % commit.id) # TODO: Do something less coarse-grained than try/except on the # get_file call for removed files (strip_message, hg_renames, hg_branch, extra) = self.extract_hg_metadata(commit.message) # get a list of the changed, added, removed files files = self.git.get_files_changed(commit) date = (commit.author_time, -commit.author_timezone) text = strip_message try: text.decode('utf-8') except UnicodeDecodeError: extra['message'] = text text = self.decode_guess(text, commit._encoding) author = commit.author # convert extra data back to the end if ' ext:' in commit.author: regex = re.compile('^(.*?)\ ext:\((.*)\) <(.*)\>$') m = regex.match(commit.author) if m: name = m.group(1) ex = urllib.unquote(m.group(2)) email = m.group(3) author = name + ' <' + email + '>' + ex if ' <none@none>' in commit.author: author = commit.author[:-12] try: author.decode('utf-8') except UnicodeDecodeError: extra['author'] = author author = self.decode_guess(author, commit._encoding) oldenc = self.swap_out_encoding() def getfilectx(repo, memctx, f): try: (mode, sha, data) = self.git.get_file(commit, f) e = self.convert_git_int_mode(mode) except TypeError: raise IOError() if f in hg_renames: copied_path = hg_renames[f] else: copied_path = None return context.memfilectx(f, data, 'l' in e, 'x' in e, copied_path) gparents = map(self.map_hg_get, commit.parents) p1, p2 = (nullid, nullid) octopus = False if len(gparents) > 1: # merge, possibly octopus def commit_octopus(p1, p2): ctx = context.memctx(self.repo, (p1, p2), text, files, getfilectx, author, date, {'hg-git': 'octopus'}) return hex(self.repo.commitctx(ctx)) octopus = len(gparents) > 2 p2 = gparents.pop() p1 = gparents.pop() while len(gparents) > 0: p2 = commit_octopus(p1, p2) p1 = gparents.pop() else: if gparents: p1 = gparents.pop() files = list(set(files)) pa = None if not (p2 == nullid): node1 = self.repo.changectx(p1) node2 = self.repo.changectx(p2) pa = node1.ancestor(node2) # if named branch, add to extra if hg_branch: extra['branch'] = hg_branch # if committer is different than author, add it to extra if not commit._author_raw == commit._committer_raw: extra['committer'] = "%s %d %d" % (commit.committer, commit.commit_time, -commit.commit_timezone) if commit._encoding: extra['encoding'] = commit._encoding if hg_branch: extra['branch'] = hg_branch if octopus: extra['hg-git'] ='octopus-done' ctx = context.memctx(self.repo, (p1, p2), text, files, getfilectx, author, date, extra) node = self.repo.commit_import_ctx(ctx, pa) self.swap_out_encoding(oldenc) # save changeset to mapping file cs = hex(node) self.map_set(commit.id, cs) ## PACK UPLOADING AND FETCHING def upload_pack(self, remote): client, path = self.get_transport_and_path(remote) changed = self.get_changed_refs genpack = self.generate_pack_contents try: self.ui.status(_("creating and sending data\n")) changed_refs = client.send_pack(path, changed, genpack) return changed_refs except: # TODO: remove try/except or do something useful here raise # TODO: for now, we'll just push all heads that match remote heads # * we should have specified push, tracking branches and --all # takes a dict of refs:shas from the server and returns what should be # pushed up def get_changed_refs(self, refs): keys = refs.keys() changed = {} if not keys: return None # TODO: this is a huge hack if keys[0] == 'capabilities^{}': # nothing on the server yet - first push if not 'master' in self.repo.tags(): tip = self.repo.lookup('tip') changed['refs/heads/master'] = self.map_git_get(hex(tip)) for tag, sha in self.tags.iteritems(): tag_name = 'refs/tags/' + tag if tag_name not in refs: changed[tag_name] = self.map_git_get(sha) for ref_name in keys: parts = ref_name.split('/') if parts[0] == 'refs' and parts[1] == 'heads': # strip off 'refs/heads' head = "/".join([v for v in parts[2:]]) try: local_ref = self.repo.lookup(head) remote_ref = self.map_hg_get(refs[ref_name]) if remote_ref: remotectx = self.repo[remote_ref] localctx = self.repo[local_ref] if remotectx.ancestor(localctx) == remotectx: # fast forward push changed[ref_name] = self.map_git_get(hex(local_ref)) else: # XXX: maybe abort completely ui.warn('not pushing branch %s, please merge'% head) except RepoError: #pragma: no cover # remote_ref is not here pass # Also push any local branches not on the server yet for head in self.local_heads(): ref = 'refs/heads/' + head if not ref in refs: node = self.repo.lookup(head) changed[ref] = self.map_git_get(hex(node)) return changed # takes a list of shas the server wants and shas the server has # and generates a list of commit shas we need to push up def generate_pack_contents(self, want, have): graph_walker = SimpleFetchGraphWalker(want, self.git.get_parents) next = graph_walker.next() shas = set() while next: if next in have: graph_walker.ack(next) else: shas.add(next) next = graph_walker.next() seen = [] # so now i have the shas, need to turn them into a list of # tuples (sha, path) for ALL the objects i'm sending # TODO: don't send blobs or trees they already have def get_objects(tree, path): changes = list() changes.append((tree, path)) for (mode, name, sha) in tree.entries(): if mode == 0160000: # TODO: properly handle submodules and document what 57344 means continue if sha in seen: continue obj = self.git.get_object(sha) seen.append(sha) if isinstance (obj, Blob): changes.append((obj, path + name)) elif isinstance(obj, Tree): changes.extend(get_objects(obj, path + name + '/')) return changes objects = [] for commit_sha in shas: commit = self.git.commit(commit_sha) objects.append((commit, 'commit')) tree = self.git.get_object(commit.tree) objects.extend( get_objects(tree, '/') ) return objects def fetch_pack(self, remote_name): client, path = self.get_transport_and_path(remote_name) graphwalker = SimpleFetchGraphWalker(self.git.heads().values(), self.git.get_parents) f, commit = self.git.object_store.add_pack() try: determine_wants = self.git.object_store.determine_wants_all refs = client.fetch_pack(path, determine_wants, graphwalker, f.write, sys.stdout.write) f.close() commit() return refs finally: f.close() ## REFERENCES HANDLING def update_references(self): heads = self.local_heads() # Create a local Git branch name for each # Mercurial bookmark. for key in heads: self.git.set_ref('refs/heads/' + key, heads[key]) def export_hg_tags(self): for tag, sha in self.repo.tags().iteritems(): if self.repo.tagtype(tag) in ('global', 'git'): self.git.set_ref('refs/tags/' + tag, self.map_git_get(hex(sha))) self.tags[tag] = hex(sha) def local_heads(self): try: bms = bookmarks.parse(self.repo) return dict([(bm, self.map_git_get(hex(bms[bm]))) for bm in bms]) except AttributeError: #pragma: no cover return {} def import_tags(self, refs): keys = refs.keys() if not keys: return for k in keys[:]: ref_name = k parts = k.split('/') if parts[0] == 'refs' and parts[1] == 'tags': ref_name = "/".join([v for v in parts[2:]]) if ref_name[-3:] == '^{}': ref_name = ref_name[:-3] if not ref_name in self.repo.tags(): obj = self.git.get_object(refs[k]) sha = None if isinstance (obj, Commit): # lightweight sha = self.map_hg_get(refs[k]) self.tags[ref_name] = sha elif isinstance (obj, Tag): # annotated (obj_type, obj_sha) = obj.get_object() obj = self.git.get_object(obj_sha) if isinstance (obj, Commit): sha = self.map_hg_get(obj_sha) # TODO: better handling for annotated tags self.tags[ref_name] = sha self.save_tags() def update_hg_bookmarks(self, refs): try: bms = bookmarks.parse(self.repo) heads = dict([(ref[11:],refs[ref]) for ref in refs if ref.startswith('refs/heads/')]) for head, sha in heads.iteritems(): hgsha = bin(self.map_hg_get(sha)) if not head in bms: # new branch bms[head] = hgsha else: bm = self.repo[bms[head]] if bm.ancestor(self.repo[hgsha]) == bm: # fast forward bms[head] = hgsha if heads: bookmarks.write(self.repo, bms) except AttributeError: self.ui.warn(_('creating bookmarks failed, do you have' ' bookmarks enabled?\n')) def update_remote_branches(self, remote_name, refs): heads = dict([(ref[11:],refs[ref]) for ref in refs if ref.startswith('refs/heads/')]) for head, sha in heads.iteritems(): hgsha = bin(self.map_hg_get(sha)) tag = '%s/%s' % (remote_name, head) self.repo.tag(tag, hgsha, '', True, None, None) self.git.set_remote_refs(refs, remote_name) ## UTILITY FUNCTIONS def convert_git_int_mode(self, mode): # TODO: make these into constants convert = { 0100644: '', 0100755: 'x', 0120000: 'l'} if mode in convert: return convert[mode] return '' def extract_hg_metadata(self, message): split = message.split("\n\n--HG--\n", 1) renames = {} extra = {} branch = False if len(split) == 2: message, meta = split lines = meta.split("\n") for line in lines: if line == '': continue command, data = line.split(" : ", 1) if command == 'rename': before, after = data.split(" => ", 1) renames[after] = before if command == 'branch': branch = data if command == 'extra': before, after = data.split(" : ", 1) extra[before] = urllib.unquote(after) return (message, renames, branch, extra) def remote_name(self, remote): names = [name for name, path in self.paths if path == remote] if names: return names[0] # Stolen from hgsubversion def swap_out_encoding(self, new_encoding='UTF-8'): try: from mercurial import encoding old = encoding.encoding encoding.encoding = new_encoding except ImportError: old = hgutil._encoding hgutil._encoding = new_encoding return old def decode_guess(self, string, encoding): # text is not valid utf-8, try to make sense of it if encoding: try: return string.decode(encoding).encode('utf-8') except UnicodeDecodeError: pass try: return string.decode('latin-1').encode('utf-8') except UnicodeDecodeError: return string.decode('ascii', 'replace').encode('utf-8') def get_transport_and_path(self, uri): from dulwich.client import TCPGitClient, SSHGitClient, SubprocessGitClient for handler, transport in (("git://", TCPGitClient), ("git@", SSHGitClient), ("git+ssh://", SSHGitClient)): if uri.startswith(handler): host, path = uri[len(handler):].split("/", 1) return transport(host), '/' + path # if its not git or git+ssh, try a local url.. return SubprocessGitClient(), uri