# HG changeset patch # User Kevin Bullock # Date 1254339589 18000 # Node ID 505d7cdca19838bfb270193e0709154a2dad5c19 # Parent 3e0eb85a83a70dee1b34778df810a55a58d4a8dc package with distutils (patch tweaked slightly by Augie Fackler) diff -r 3e0eb85a83a7 -r 505d7cdca198 .gitignore --- a/.gitignore Fri Sep 25 22:44:05 2009 -0400 +++ b/.gitignore Wed Sep 30 14:39:49 2009 -0500 @@ -1,1 +1,5 @@ *.pyc +tests/*.err +build +dist +*.egg-info diff -r 3e0eb85a83a7 -r 505d7cdca198 .hgignore --- a/.hgignore Fri Sep 25 22:44:05 2009 -0400 +++ b/.hgignore Wed Sep 30 14:39:49 2009 -0500 @@ -1,4 +1,6 @@ syntax: glob - *.pyc tests/*.err +build +dist +*.egg-info diff -r 3e0eb85a83a7 -r 505d7cdca198 __init__.py --- a/__init__.py Fri Sep 25 22:44:05 2009 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,68 +0,0 @@ -# git.py - git server bridge -# -# Copyright 2008 Scott Chacon -# also some code (and help) borrowed from durin42 -# -# This software may be used and distributed according to the terms -# of the GNU General Public License, incorporated herein by reference. - -'''push and pull from a Git server - -This extension lets you communicate (push and pull) with a Git server. -This way you can use Git hosting for your project or collaborate with a -project that is in Git. A bridger of worlds, this plugin be. - -Try hg clone git:// or hg clone git+ssh:// -''' - -from mercurial import commands, extensions, hg, util -from mercurial.i18n import _ - -from dulwich.repo import Repo -from dulwich.errors import NotGitRepository - -import gitrepo, hgrepo -from git_handler import GitHandler - -# support for `hg clone git://github.com/defunkt/facebox.git` -# also hg clone git+ssh://git@github.com/schacon/simplegit.git -hg.schemes['git'] = gitrepo -hg.schemes['git+ssh'] = gitrepo - -_oldlocal = hg.schemes['file'] - -def _local(path): - p = util.drop_scheme('file', path) - try: - Repo(p) - return gitrepo - except NotGitRepository: - return _oldlocal(path) - -hg.schemes['file'] = _local - -def reposetup(ui, repo): - klass = hgrepo.generate_repo_subclass(repo.__class__) - repo.__class__ = klass - -def gimport(ui, repo, remote_name=None): - git = GitHandler(repo, ui) - git.import_commits(remote_name) - -def gexport(ui, repo): - git = GitHandler(repo, ui) - git.export_commits() - -def gclear(ui, repo): - repo.ui.status(_("clearing out the git cache data\n")) - git = GitHandler(repo, ui) - git.clear() - -cmdtable = { - "gimport": - (gimport, [], _('hg gimport')), - "gexport": - (gexport, [], _('hg gexport')), - "gclear": - (gclear, [], _('Clears out the Git cached data')), -} diff -r 3e0eb85a83a7 -r 505d7cdca198 git_handler.py --- a/git_handler.py Fri Sep 25 22:44:05 2009 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,844 +0,0 @@ -import os, sys, math, urllib, re -import toposort - -from dulwich.errors import HangupException -from dulwich.index import commit_tree -from dulwich.objects import Blob, Commit, Tag, Tree, parse_timezone -from dulwich.pack import create_delta, apply_delta -from dulwich.repo import Repo - -from hgext import bookmarks -from mercurial.i18n import _ -from mercurial.node import hex, bin, nullid -from mercurial import context, util as hgutil - -try: - from mercurial.error import RepoError -except ImportError: - from mercurial.repo import RepoError - - -class GitHandler(object): - - def __init__(self, dest_repo, ui): - self.repo = dest_repo - self.ui = ui - self.mapfile = 'git-mapfile' - self.tagsfile = 'git-tags' - - if ui.config('git', 'intree'): - self.gitdir = self.repo.wjoin('.git') - else: - self.gitdir = self.repo.join('git') - - self.paths = ui.configitems('paths') - - self.init_if_missing() - self.load_git() - self.load_map() - self.load_tags() - - # make the git data directory - def init_if_missing(self): - if not os.path.exists(self.gitdir): - os.mkdir(self.gitdir) - Repo.init_bare(self.gitdir) - - def load_git(self): - self.git = Repo(self.gitdir) - - ## FILE LOAD AND SAVE METHODS - - def map_set(self, gitsha, hgsha): - self._map_git[gitsha] = hgsha - self._map_hg[hgsha] = gitsha - - def map_hg_get(self, gitsha): - return self._map_git.get(gitsha) - - def map_git_get(self, hgsha): - return self._map_hg.get(hgsha) - - def load_map(self): - self._map_git = {} - self._map_hg = {} - if os.path.exists(self.repo.join(self.mapfile)): - for line in self.repo.opener(self.mapfile): - gitsha, hgsha = line.strip().split(' ', 1) - self._map_git[gitsha] = hgsha - self._map_hg[hgsha] = gitsha - - def save_map(self): - file = self.repo.opener(self.mapfile, 'w+', atomictemp=True) - for hgsha, gitsha in sorted(self._map_hg.iteritems()): - file.write("%s %s\n" % (gitsha, hgsha)) - file.rename() - - - def load_tags(self): - self.tags = {} - if os.path.exists(self.repo.join(self.tagsfile)): - for line in self.repo.opener(self.tagsfile): - sha, name = line.strip().split(' ', 1) - self.tags[name] = sha - - def save_tags(self): - file = self.repo.opener(self.tagsfile, 'w+', atomictemp=True) - for name, sha in sorted(self.tags.iteritems()): - if not self.repo.tagtype(name) == 'global': - file.write("%s %s\n" % (sha, name)) - file.rename() - - ## END FILE LOAD AND SAVE METHODS - - ## COMMANDS METHODS - - def import_commits(self, remote_name): - self.import_git_objects(remote_name) - self.save_map() - - def fetch(self, remote, heads): - self.export_commits() - refs = self.fetch_pack(remote, heads) - remote_name = self.remote_name(remote) - - if refs: - self.import_git_objects(remote_name, refs) - self.import_tags(refs) - self.update_hg_bookmarks(refs) - if remote_name: - self.update_remote_branches(remote_name, refs) - elif not self.paths: - # intial cloning - self.update_remote_branches('default', refs) - else: - self.ui.status(_("nothing new on the server\n")) - - self.save_map() - - def export_commits(self): - try: - self.export_git_objects() - self.export_hg_tags() - self.update_references() - finally: - self.save_map() - - def get_refs(self, remote): - self.export_commits() - client, path = self.get_transport_and_path(remote) - old_refs = {} - new_refs = {} - def changed(refs): - old_refs.update(refs) - to_push = set(self.local_heads().values() + self.tags.values()) - new_refs.update(self.get_changed_refs(refs, to_push, True)) - # don't push anything - return {} - - try: - client.send_pack(path, changed, None) - - changed_refs = [ref for ref, sha in new_refs.iteritems() - if sha != old_refs.get(ref)] - new = [bin(self.map_hg_get(new_refs[ref])) for ref in changed_refs] - old = dict( (bin(self.map_hg_get(old_refs[r])), 1) - for r in changed_refs if r in old_refs) - - return old, new - except HangupException: - raise hgutil.Abort("the remote end hung up unexpectedly") - - def push(self, remote, revs, force): - self.export_commits() - changed_refs = self.upload_pack(remote, revs, force) - remote_name = self.remote_name(remote) - - if remote_name and changed_refs: - for ref, sha in changed_refs.iteritems(): - self.ui.status(" "+ remote_name + "::" + ref + " => GIT:" + sha[0:8] + "\n") - - self.update_remote_branches(remote_name, changed_refs) - - def clear(self): - mapfile = self.repo.join(self.mapfile) - if os.path.exists(self.gitdir): - for root, dirs, files in os.walk(self.gitdir, topdown=False): - for name in files: - os.remove(os.path.join(root, name)) - for name in dirs: - os.rmdir(os.path.join(root, name)) - os.rmdir(self.gitdir) - if os.path.exists(mapfile): - os.remove(mapfile) - - ## CHANGESET CONVERSION METHODS - - def export_git_objects(self): - self.ui.status(_("importing Hg objects into Git\n")) - nodes = [self.repo.lookup(n) for n in self.repo] - export = [node for node in nodes if not hex(node) in self._map_hg] - total = len(export) - if total: - magnitude = int(math.log(total, 10)) + 1 - else: - magnitude = 1 - for i, rev in enumerate(export): - if i%100 == 0: - self.ui.status(_("at: %*d/%d\n") % (magnitude, i, total)) - - ctx = self.repo.changectx(rev) - state = ctx.extra().get('hg-git', None) - if state == 'octopus': - self.ui.debug("revision %d is a part of octopus explosion\n" % ctx.rev()) - continue - self.export_hg_commit(rev) - - # convert this commit into git objects - # go through the manifest, convert all blobs/trees we don't have - # write the commit object (with metadata info) - def export_hg_commit(self, rev): - self.ui.note(_("converting revision %s\n") % rev) - - oldenc = self.swap_out_encoding() - - ctx = self.repo.changectx(rev) - extra = ctx.extra() - - commit = Commit() - - (time, timezone) = ctx.date() - commit.author = self.get_git_author(ctx) - commit.author_time = int(time) - commit.author_timezone = -timezone - - if 'committer' in extra: - # fixup timezone - (name, timestamp, timezone) = extra['committer'].rsplit(' ', 2) - commit.committer = name - commit.commit_time = timestamp - - # work around a timezone format change - if int(timezone) % 60 != 0: #pragma: no cover - timezone = parse_timezone(timezone) - else: - timezone = -int(timezone) - commit.commit_timezone = timezone - else: - commit.committer = commit.author - commit.commit_time = commit.author_time - commit.commit_timezone = commit.author_timezone - - commit.parents = [] - for parent in self.get_git_parents(ctx): - hgsha = hex(parent.node()) - git_sha = self.map_git_get(hgsha) - if git_sha: - commit.parents.append(git_sha) - - commit.message = self.get_git_message(ctx) - - if 'encoding' in extra: - commit.encoding = extra['encoding'] - - tree_sha = commit_tree(self.git.object_store, self.iterblobs(ctx)) - commit.tree = tree_sha - - self.git.object_store.add_object(commit) - self.map_set(commit.id, ctx.hex()) - - self.swap_out_encoding(oldenc) - return commit.id - - def get_git_author(self, ctx): - # hg authors might not have emails - author = ctx.user() - - # check for git author pattern compliance - regex = re.compile('^(.*?) \<(.*?)\>(.*)$') - a = regex.match(author) - - if a: - name = a.group(1) - email = a.group(2) - if len(a.group(3)) > 0: - name += ' ext:(' + urllib.quote(a.group(3)) + ')' - author = name + ' <' + email + '>' - else: - author = author + ' ' - - if 'author' in ctx.extra(): - author = apply_delta(author, ctx.extra()['author']) - - return author - - def get_git_parents(self, ctx): - def is_octopus_part(ctx): - return ctx.extra().get('hg-git', None) in ('octopus', 'octopus-done') - - parents = [] - if ctx.extra().get('hg-git', None) == 'octopus-done': - # implode octopus parents - part = ctx - while is_octopus_part(part): - (p1, p2) = part.parents() - assert not is_octopus_part(p1) - parents.append(p1) - part = p2 - parents.append(p2) - else: - parents = ctx.parents() - - return parents - - def get_git_message(self, ctx): - extra = ctx.extra() - - message = ctx.description() + "\n" - if 'message' in extra: - message = apply_delta(message, extra['message']) - - # HG EXTRA INFORMATION - add_extras = False - extra_message = '' - if not ctx.branch() == 'default': - add_extras = True - extra_message += "branch : " + ctx.branch() + "\n" - - renames = [] - for f in ctx.files(): - if f not in ctx.manifest(): - continue - rename = ctx.filectx(f).renamed() - if rename: - renames.append((rename[0], f)) - - if renames: - add_extras = True - for oldfile, newfile in renames: - extra_message += "rename : " + oldfile + " => " + newfile + "\n" - - for key, value in extra.iteritems(): - if key in ('author', 'committer', 'encoding', 'message', 'branch', 'hg-git'): - continue - else: - add_extras = True - extra_message += "extra : " + key + " : " + urllib.quote(value) + "\n" - - if add_extras: - message += "\n--HG--\n" + extra_message - - return message - - def iterblobs(self, ctx): - for f in ctx: - fctx = ctx[f] - blobid = self.map_git_get(hex(fctx.filenode())) - - if not blobid: - blob = Blob.from_string(fctx.data()) - self.git.object_store.add_object(blob) - self.map_set(blob.id, hex(fctx.filenode())) - blobid = blob.id - - if 'l' in ctx.flags(f): - mode = 0120000 - elif 'x' in ctx.flags(f): - mode = 0100755 - else: - mode = 0100644 - - yield f, blobid, mode - - def import_git_objects(self, remote_name=None, refs=None): - self.ui.status(_("importing Git objects into Hg\n")) - # import heads and fetched tags as remote references - todo = [] - done = set() - convert_list = {} - - # get a list of all the head shas - if refs: - for head, sha in refs.iteritems(): - # refs contains all the refs in the server, not just the ones - # we are pulling - if sha in self.git.object_store: - todo.append(sha) - else: - todo = self.git.refs.values()[:] - - # traverse the heads getting a list of all the unique commits - while todo: - sha = todo.pop() - assert isinstance(sha, str) - if sha in done: - continue - done.add(sha) - obj = self.git.get_object(sha) - if isinstance (obj, Commit): - convert_list[sha] = obj - todo.extend([p for p in obj.parents if p not in done]) - if isinstance(obj, Tag): - (obj_type, obj_sha) = obj.get_object() - obj = self.git.get_object(obj_sha) - if isinstance (obj, Commit): - convert_list[sha] = obj - todo.extend([p for p in obj.parents if p not in done]) - - # sort the commits - commits = toposort.TopoSort(convert_list).items() - - commits = [commit for commit in commits if not commit in self._map_git] - # import each of the commits, oldest first - total = len(commits) - if total: - magnitude = int(math.log(total, 10)) + 1 - else: - magnitude = 1 - for i, csha in enumerate(commits): - if i%100 == 0: - self.ui.status(_("at: %*d/%d\n") % (magnitude, i, total)) - commit = convert_list[csha] - self.import_git_commit(commit) - - def import_git_commit(self, commit): - self.ui.debug(_("importing: %s\n") % commit.id) - # TODO: Do something less coarse-grained than try/except on the - # get_file call for removed files - - (strip_message, hg_renames, hg_branch, extra) = self.extract_hg_metadata(commit.message) - - # get a list of the changed, added, removed files - files = self.get_files_changed(commit) - - date = (commit.author_time, -commit.author_timezone) - text = strip_message - - origtext = text - try: - text.decode('utf-8') - except UnicodeDecodeError: - text = self.decode_guess(text, commit.encoding) - - text = '\n'.join([l.rstrip() for l in text.splitlines()]).strip('\n') - if text + '\n' != origtext: - extra['message'] = create_delta(text +'\n', origtext) - - author = commit.author - - # convert extra data back to the end - if ' ext:' in commit.author: - regex = re.compile('^(.*?)\ ext:\((.*)\) <(.*)\>$') - m = regex.match(commit.author) - if m: - name = m.group(1) - ex = urllib.unquote(m.group(2)) - email = m.group(3) - author = name + ' <' + email + '>' + ex - - if ' ' in commit.author: - author = commit.author[:-12] - - try: - author.decode('utf-8') - except UnicodeDecodeError: - origauthor = author - author = self.decode_guess(author, commit.encoding) - extra['author'] = create_delta(author, origauthor) - - oldenc = self.swap_out_encoding() - - def getfilectx(repo, memctx, f): - try: - (mode, sha, data) = self.get_file(commit, f) - e = self.convert_git_int_mode(mode) - except (TypeError, KeyError): - raise IOError() - if f in hg_renames: - copied_path = hg_renames[f] - else: - copied_path = None - return context.memfilectx(f, data, 'l' in e, 'x' in e, copied_path) - - gparents = map(self.map_hg_get, commit.parents) - p1, p2 = (nullid, nullid) - octopus = False - - if len(gparents) > 1: - # merge, possibly octopus - def commit_octopus(p1, p2): - ctx = context.memctx(self.repo, (p1, p2), text, files, getfilectx, - author, date, {'hg-git': 'octopus'}) - return hex(self.repo.commitctx(ctx)) - - octopus = len(gparents) > 2 - p2 = gparents.pop() - p1 = gparents.pop() - while len(gparents) > 0: - p2 = commit_octopus(p1, p2) - p1 = gparents.pop() - else: - if gparents: - p1 = gparents.pop() - - files = list(set(files)) - - pa = None - if not (p2 == nullid): - node1 = self.repo.changectx(p1) - node2 = self.repo.changectx(p2) - pa = node1.ancestor(node2) - - # if named branch, add to extra - if hg_branch: - extra['branch'] = hg_branch - - # if committer is different than author, add it to extra - if commit.author != commit.committer \ - or commit.author_time != commit.commit_time \ - or commit.author_timezone != commit.commit_timezone: - extra['committer'] = "%s %d %d" % (commit.committer, commit.commit_time, -commit.commit_timezone) - - if commit.encoding: - extra['encoding'] = commit.encoding - - if hg_branch: - extra['branch'] = hg_branch - - if octopus: - extra['hg-git'] ='octopus-done' - - ctx = context.memctx(self.repo, (p1, p2), text, files, getfilectx, - author, date, extra) - - node = self.repo.commitctx(ctx) - - self.swap_out_encoding(oldenc) - - # save changeset to mapping file - cs = hex(node) - self.map_set(commit.id, cs) - - ## PACK UPLOADING AND FETCHING - - def upload_pack(self, remote, revs, force): - client, path = self.get_transport_and_path(remote) - def changed(refs): - to_push = revs or set(self.local_heads().values() + self.tags.values()) - return self.get_changed_refs(refs, to_push, force) - - genpack = self.git.object_store.generate_pack_contents - try: - self.ui.status(_("creating and sending data\n")) - changed_refs = client.send_pack(path, changed, genpack) - return changed_refs - except HangupException: - raise hgutil.Abort("the remote end hung up unexpectedly") - - def get_changed_refs(self, refs, revs, force): - new_refs = refs.copy() - - #The remote repo is empty and the local one doesn't have bookmarks/tags - if refs.keys()[0] == 'capabilities^{}': - del new_refs['capabilities^{}'] - if not self.local_heads(): - tip = hex(self.repo.lookup('tip')) - bookmarks.bookmark(self.ui, self.repo, 'master', tip) - bookmarks.setcurrent(self.repo, 'master') - new_refs['refs/heads/master'] = self.map_git_get(tip) - - for rev in revs: - ctx = self.repo[rev] - heads = [t for t in ctx.tags() if t in self.local_heads()] - tags = [t for t in ctx.tags() if t in self.tags] - - if not (heads or tags): - raise hgutil.Abort("revision %s cannot be pushed since" - " it doesn't have a ref" % ctx) - - for r in heads + tags: - if r in heads: - ref = 'refs/heads/'+r - else: - ref = 'refs/tags/'+r - - if ref not in refs: - new_refs[ref] = self.map_git_get(ctx.hex()) - elif new_refs[ref] in self._map_git: - rctx = self.repo[self.map_hg_get(new_refs[ref])] - if rctx.ancestor(ctx) == rctx or force: - new_refs[ref] = self.map_git_get(ctx.hex()) - else: - raise hgutil.Abort("pushing %s overwrites %s" - % (ref, ctx)) - else: - raise hgutil.Abort("%s changed on the server, please pull " - "and merge before pushing" % ref) - - return new_refs - - - def fetch_pack(self, remote_name, heads): - client, path = self.get_transport_and_path(remote_name) - graphwalker = self.git.get_graph_walker() - def determine_wants(refs): - if heads: - want = [] - for h in heads: - r = [ref for ref in refs if ref.endswith('/'+h)] - if not r: - raise hgutil.Abort("ref %s not found on remote server") - elif len(r) == 1: - want.append(refs[r[0]]) - else: - raise hgutil.Abort("ambiguous reference %s: %r"%(h, r)) - else: - want = [sha for ref, sha in refs.iteritems() - if not ref.endswith('^{}')] - return want - f, commit = self.git.object_store.add_pack() - try: - return client.fetch_pack(path, determine_wants, graphwalker, f.write, self.ui.status) - except HangupException: - raise hgutil.Abort("the remote end hung up unexpectedly") - finally: - commit() - - ## REFERENCES HANDLING - - def update_references(self): - heads = self.local_heads() - - # Create a local Git branch name for each - # Mercurial bookmark. - for key in heads: - self.git.refs['refs/heads/' + key] = self.map_git_get(heads[key]) - - def export_hg_tags(self): - for tag, sha in self.repo.tags().iteritems(): - if self.repo.tagtype(tag) in ('global', 'git'): - self.git.refs['refs/tags/' + tag] = self.map_git_get(hex(sha)) - self.tags[tag] = hex(sha) - - def local_heads(self): - try: - bms = bookmarks.parse(self.repo) - return dict([(bm, hex(bms[bm])) for bm in bms]) - except AttributeError: #pragma: no cover - return {} - - def import_tags(self, refs): - keys = refs.keys() - if not keys: - return - for k in keys[:]: - ref_name = k - parts = k.split('/') - if parts[0] == 'refs' and parts[1] == 'tags': - ref_name = "/".join([v for v in parts[2:]]) - # refs contains all the refs in the server, not just - # the ones we are pulling - if refs[k] not in self.git.object_store: - continue - if ref_name[-3:] == '^{}': - ref_name = ref_name[:-3] - if not ref_name in self.repo.tags(): - obj = self.git.get_object(refs[k]) - sha = None - if isinstance (obj, Commit): # lightweight - sha = self.map_hg_get(refs[k]) - self.tags[ref_name] = sha - elif isinstance (obj, Tag): # annotated - (obj_type, obj_sha) = obj.get_object() - obj = self.git.get_object(obj_sha) - if isinstance (obj, Commit): - sha = self.map_hg_get(obj_sha) - # TODO: better handling for annotated tags - self.tags[ref_name] = sha - self.save_tags() - - def update_hg_bookmarks(self, refs): - try: - bms = bookmarks.parse(self.repo) - heads = dict([(ref[11:],refs[ref]) for ref in refs - if ref.startswith('refs/heads/')]) - - for head, sha in heads.iteritems(): - # refs contains all the refs in the server, not just - # the ones we are pulling - if sha not in self.git.object_store: - continue - hgsha = bin(self.map_hg_get(sha)) - if not head in bms: - # new branch - bms[head] = hgsha - else: - bm = self.repo[bms[head]] - if bm.ancestor(self.repo[hgsha]) == bm: - # fast forward - bms[head] = hgsha - if heads: - bookmarks.write(self.repo, bms) - - except AttributeError: - self.ui.warn(_('creating bookmarks failed, do you have' - ' bookmarks enabled?\n')) - - def update_remote_branches(self, remote_name, refs): - heads = dict([(ref[11:],refs[ref]) for ref in refs - if ref.startswith('refs/heads/')]) - - for head, sha in heads.iteritems(): - # refs contains all the refs in the server, not just the ones - # we are pulling - if sha not in self.git.object_store: - continue - hgsha = bin(self.map_hg_get(sha)) - tag = '%s/%s' % (remote_name, head) - self.repo.tag(tag, hgsha, '', True, None, None) - - for ref_name in refs: - if ref_name.startswith('refs/heads'): - new_ref = 'refs/remotes/%s/%s' % (remote_name, ref_name[10:]) - self.git.refs[new_ref] = refs[ref_name] - elif ref_name.startswith('refs/tags'): - self.git.refs[ref_name] = refs[ref_name] - - - ## UTILITY FUNCTIONS - - def convert_git_int_mode(self, mode): - # TODO: make these into constants - convert = { - 0100644: '', - 0100755: 'x', - 0120000: 'l'} - if mode in convert: - return convert[mode] - return '' - - def extract_hg_metadata(self, message): - split = message.split("\n--HG--\n", 1) - renames = {} - extra = {} - branch = False - if len(split) == 2: - message, meta = split - lines = meta.split("\n") - for line in lines: - if line == '': - continue - - command, data = line.split(" : ", 1) - - if command == 'rename': - before, after = data.split(" => ", 1) - renames[after] = before - if command == 'branch': - branch = data - if command == 'extra': - before, after = data.split(" : ", 1) - extra[before] = urllib.unquote(after) - return (message, renames, branch, extra) - - def get_file(self, commit, f): - otree = self.git.tree(commit.tree) - parts = f.split('/') - for part in parts: - (mode, sha) = otree[part] - obj = self.git.get_object(sha) - if isinstance (obj, Blob): - return (mode, sha, obj._text) - elif isinstance(obj, Tree): - otree = obj - - def get_files_changed(self, commit): - def filenames(basetree, comptree, prefix): - basefiles = set() - changes = list() - csha = None - cmode = None - if basetree is not None: - for (bmode, bname, bsha) in basetree.entries(): - if bmode == 0160000: # TODO: properly handle submodules - continue - basefiles.add(bname) - bobj = self.git.get_object(bsha) - if comptree is not None: - if bname in comptree: - (cmode, csha) = comptree[bname] - else: - (cmode, csha) = (None, None) - if not ((csha == bsha) and (cmode == bmode)): - if isinstance (bobj, Blob): - changes.append (prefix + bname) - elif isinstance(bobj, Tree): - ctree = None - if csha: - ctree = self.git.get_object(csha) - changes.extend(filenames(bobj, - ctree, - prefix + bname + '/')) - - # handle removals - if comptree is not None: - for (bmode, bname, bsha) in comptree.entries(): - if bmode == 0160000: # TODO: handle submodles - continue - if bname not in basefiles: - bobj = self.git.get_object(bsha) - if isinstance(bobj, Blob): - changes.append(prefix + bname) - elif isinstance(bobj, Tree): - changes.extend(filenames(None, bobj, - prefix + bname + '/')) - return changes - - all_changes = list() - otree = self.git.tree(commit.tree) - if len(commit.parents) == 0: - all_changes = filenames(otree, None, '') - for parent in commit.parents: - pcommit = self.git.commit(parent) - ptree = self.git.tree(pcommit.tree) - all_changes.extend(filenames(otree, ptree, '')) - - return all_changes - - def remote_name(self, remote): - names = [name for name, path in self.paths if path == remote] - if names: - return names[0] - - # Stolen from hgsubversion - def swap_out_encoding(self, new_encoding='UTF-8'): - try: - from mercurial import encoding - old = encoding.encoding - encoding.encoding = new_encoding - except ImportError: - old = hgutil._encoding - hgutil._encoding = new_encoding - return old - - def decode_guess(self, string, encoding): - # text is not valid utf-8, try to make sense of it - if encoding: - try: - return string.decode(encoding).encode('utf-8') - except UnicodeDecodeError: - pass - - try: - return string.decode('latin-1').encode('utf-8') - except UnicodeDecodeError: - return string.decode('ascii', 'replace').encode('utf-8') - - def get_transport_and_path(self, uri): - from dulwich.client import TCPGitClient, SSHGitClient, SubprocessGitClient - for handler, transport in (("git://", TCPGitClient), ("git@", SSHGitClient), ("git+ssh://", SSHGitClient)): - if uri.startswith(handler): - host, path = uri[len(handler):].split("/", 1) - return transport(host, thin_packs=False), '/' + path - # if its not git or git+ssh, try a local url.. - return SubprocessGitClient(thin_packs=False), uri diff -r 3e0eb85a83a7 -r 505d7cdca198 gitrepo.py --- a/gitrepo.py Fri Sep 25 22:44:05 2009 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,14 +0,0 @@ -from mercurial import repo, util -from git_handler import GitHandler - -class gitrepo(repo.repository): - capabilities = ['lookup'] - def __init__(self, ui, path, create): - if create: # pragma: no cover - raise util.Abort('Cannot create a git repository.') - self.path = path - def lookup(self, key): - if isinstance(key, str): - return key - -instance = gitrepo diff -r 3e0eb85a83a7 -r 505d7cdca198 hggit/__init__.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hggit/__init__.py Wed Sep 30 14:39:49 2009 -0500 @@ -0,0 +1,68 @@ +# git.py - git server bridge +# +# Copyright 2008 Scott Chacon +# also some code (and help) borrowed from durin42 +# +# This software may be used and distributed according to the terms +# of the GNU General Public License, incorporated herein by reference. + +'''push and pull from a Git server + +This extension lets you communicate (push and pull) with a Git server. +This way you can use Git hosting for your project or collaborate with a +project that is in Git. A bridger of worlds, this plugin be. + +Try hg clone git:// or hg clone git+ssh:// +''' + +from mercurial import commands, extensions, hg, util +from mercurial.i18n import _ + +from dulwich.repo import Repo +from dulwich.errors import NotGitRepository + +import gitrepo, hgrepo +from git_handler import GitHandler + +# support for `hg clone git://github.com/defunkt/facebox.git` +# also hg clone git+ssh://git@github.com/schacon/simplegit.git +hg.schemes['git'] = gitrepo +hg.schemes['git+ssh'] = gitrepo + +_oldlocal = hg.schemes['file'] + +def _local(path): + p = util.drop_scheme('file', path) + try: + Repo(p) + return gitrepo + except NotGitRepository: + return _oldlocal(path) + +hg.schemes['file'] = _local + +def reposetup(ui, repo): + klass = hgrepo.generate_repo_subclass(repo.__class__) + repo.__class__ = klass + +def gimport(ui, repo, remote_name=None): + git = GitHandler(repo, ui) + git.import_commits(remote_name) + +def gexport(ui, repo): + git = GitHandler(repo, ui) + git.export_commits() + +def gclear(ui, repo): + repo.ui.status(_("clearing out the git cache data\n")) + git = GitHandler(repo, ui) + git.clear() + +cmdtable = { + "gimport": + (gimport, [], _('hg gimport')), + "gexport": + (gexport, [], _('hg gexport')), + "gclear": + (gclear, [], _('Clears out the Git cached data')), +} diff -r 3e0eb85a83a7 -r 505d7cdca198 hggit/git_handler.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hggit/git_handler.py Wed Sep 30 14:39:49 2009 -0500 @@ -0,0 +1,844 @@ +import os, sys, math, urllib, re +import toposort + +from dulwich.errors import HangupException +from dulwich.index import commit_tree +from dulwich.objects import Blob, Commit, Tag, Tree, parse_timezone +from dulwich.pack import create_delta, apply_delta +from dulwich.repo import Repo + +from hgext import bookmarks +from mercurial.i18n import _ +from mercurial.node import hex, bin, nullid +from mercurial import context, util as hgutil + +try: + from mercurial.error import RepoError +except ImportError: + from mercurial.repo import RepoError + + +class GitHandler(object): + + def __init__(self, dest_repo, ui): + self.repo = dest_repo + self.ui = ui + self.mapfile = 'git-mapfile' + self.tagsfile = 'git-tags' + + if ui.config('git', 'intree'): + self.gitdir = self.repo.wjoin('.git') + else: + self.gitdir = self.repo.join('git') + + self.paths = ui.configitems('paths') + + self.init_if_missing() + self.load_git() + self.load_map() + self.load_tags() + + # make the git data directory + def init_if_missing(self): + if not os.path.exists(self.gitdir): + os.mkdir(self.gitdir) + Repo.init_bare(self.gitdir) + + def load_git(self): + self.git = Repo(self.gitdir) + + ## FILE LOAD AND SAVE METHODS + + def map_set(self, gitsha, hgsha): + self._map_git[gitsha] = hgsha + self._map_hg[hgsha] = gitsha + + def map_hg_get(self, gitsha): + return self._map_git.get(gitsha) + + def map_git_get(self, hgsha): + return self._map_hg.get(hgsha) + + def load_map(self): + self._map_git = {} + self._map_hg = {} + if os.path.exists(self.repo.join(self.mapfile)): + for line in self.repo.opener(self.mapfile): + gitsha, hgsha = line.strip().split(' ', 1) + self._map_git[gitsha] = hgsha + self._map_hg[hgsha] = gitsha + + def save_map(self): + file = self.repo.opener(self.mapfile, 'w+', atomictemp=True) + for hgsha, gitsha in sorted(self._map_hg.iteritems()): + file.write("%s %s\n" % (gitsha, hgsha)) + file.rename() + + + def load_tags(self): + self.tags = {} + if os.path.exists(self.repo.join(self.tagsfile)): + for line in self.repo.opener(self.tagsfile): + sha, name = line.strip().split(' ', 1) + self.tags[name] = sha + + def save_tags(self): + file = self.repo.opener(self.tagsfile, 'w+', atomictemp=True) + for name, sha in sorted(self.tags.iteritems()): + if not self.repo.tagtype(name) == 'global': + file.write("%s %s\n" % (sha, name)) + file.rename() + + ## END FILE LOAD AND SAVE METHODS + + ## COMMANDS METHODS + + def import_commits(self, remote_name): + self.import_git_objects(remote_name) + self.save_map() + + def fetch(self, remote, heads): + self.export_commits() + refs = self.fetch_pack(remote, heads) + remote_name = self.remote_name(remote) + + if refs: + self.import_git_objects(remote_name, refs) + self.import_tags(refs) + self.update_hg_bookmarks(refs) + if remote_name: + self.update_remote_branches(remote_name, refs) + elif not self.paths: + # intial cloning + self.update_remote_branches('default', refs) + else: + self.ui.status(_("nothing new on the server\n")) + + self.save_map() + + def export_commits(self): + try: + self.export_git_objects() + self.export_hg_tags() + self.update_references() + finally: + self.save_map() + + def get_refs(self, remote): + self.export_commits() + client, path = self.get_transport_and_path(remote) + old_refs = {} + new_refs = {} + def changed(refs): + old_refs.update(refs) + to_push = set(self.local_heads().values() + self.tags.values()) + new_refs.update(self.get_changed_refs(refs, to_push, True)) + # don't push anything + return {} + + try: + client.send_pack(path, changed, None) + + changed_refs = [ref for ref, sha in new_refs.iteritems() + if sha != old_refs.get(ref)] + new = [bin(self.map_hg_get(new_refs[ref])) for ref in changed_refs] + old = dict( (bin(self.map_hg_get(old_refs[r])), 1) + for r in changed_refs if r in old_refs) + + return old, new + except HangupException: + raise hgutil.Abort("the remote end hung up unexpectedly") + + def push(self, remote, revs, force): + self.export_commits() + changed_refs = self.upload_pack(remote, revs, force) + remote_name = self.remote_name(remote) + + if remote_name and changed_refs: + for ref, sha in changed_refs.iteritems(): + self.ui.status(" "+ remote_name + "::" + ref + " => GIT:" + sha[0:8] + "\n") + + self.update_remote_branches(remote_name, changed_refs) + + def clear(self): + mapfile = self.repo.join(self.mapfile) + if os.path.exists(self.gitdir): + for root, dirs, files in os.walk(self.gitdir, topdown=False): + for name in files: + os.remove(os.path.join(root, name)) + for name in dirs: + os.rmdir(os.path.join(root, name)) + os.rmdir(self.gitdir) + if os.path.exists(mapfile): + os.remove(mapfile) + + ## CHANGESET CONVERSION METHODS + + def export_git_objects(self): + self.ui.status(_("importing Hg objects into Git\n")) + nodes = [self.repo.lookup(n) for n in self.repo] + export = [node for node in nodes if not hex(node) in self._map_hg] + total = len(export) + if total: + magnitude = int(math.log(total, 10)) + 1 + else: + magnitude = 1 + for i, rev in enumerate(export): + if i%100 == 0: + self.ui.status(_("at: %*d/%d\n") % (magnitude, i, total)) + + ctx = self.repo.changectx(rev) + state = ctx.extra().get('hg-git', None) + if state == 'octopus': + self.ui.debug("revision %d is a part of octopus explosion\n" % ctx.rev()) + continue + self.export_hg_commit(rev) + + # convert this commit into git objects + # go through the manifest, convert all blobs/trees we don't have + # write the commit object (with metadata info) + def export_hg_commit(self, rev): + self.ui.note(_("converting revision %s\n") % rev) + + oldenc = self.swap_out_encoding() + + ctx = self.repo.changectx(rev) + extra = ctx.extra() + + commit = Commit() + + (time, timezone) = ctx.date() + commit.author = self.get_git_author(ctx) + commit.author_time = int(time) + commit.author_timezone = -timezone + + if 'committer' in extra: + # fixup timezone + (name, timestamp, timezone) = extra['committer'].rsplit(' ', 2) + commit.committer = name + commit.commit_time = timestamp + + # work around a timezone format change + if int(timezone) % 60 != 0: #pragma: no cover + timezone = parse_timezone(timezone) + else: + timezone = -int(timezone) + commit.commit_timezone = timezone + else: + commit.committer = commit.author + commit.commit_time = commit.author_time + commit.commit_timezone = commit.author_timezone + + commit.parents = [] + for parent in self.get_git_parents(ctx): + hgsha = hex(parent.node()) + git_sha = self.map_git_get(hgsha) + if git_sha: + commit.parents.append(git_sha) + + commit.message = self.get_git_message(ctx) + + if 'encoding' in extra: + commit.encoding = extra['encoding'] + + tree_sha = commit_tree(self.git.object_store, self.iterblobs(ctx)) + commit.tree = tree_sha + + self.git.object_store.add_object(commit) + self.map_set(commit.id, ctx.hex()) + + self.swap_out_encoding(oldenc) + return commit.id + + def get_git_author(self, ctx): + # hg authors might not have emails + author = ctx.user() + + # check for git author pattern compliance + regex = re.compile('^(.*?) \<(.*?)\>(.*)$') + a = regex.match(author) + + if a: + name = a.group(1) + email = a.group(2) + if len(a.group(3)) > 0: + name += ' ext:(' + urllib.quote(a.group(3)) + ')' + author = name + ' <' + email + '>' + else: + author = author + ' ' + + if 'author' in ctx.extra(): + author = apply_delta(author, ctx.extra()['author']) + + return author + + def get_git_parents(self, ctx): + def is_octopus_part(ctx): + return ctx.extra().get('hg-git', None) in ('octopus', 'octopus-done') + + parents = [] + if ctx.extra().get('hg-git', None) == 'octopus-done': + # implode octopus parents + part = ctx + while is_octopus_part(part): + (p1, p2) = part.parents() + assert not is_octopus_part(p1) + parents.append(p1) + part = p2 + parents.append(p2) + else: + parents = ctx.parents() + + return parents + + def get_git_message(self, ctx): + extra = ctx.extra() + + message = ctx.description() + "\n" + if 'message' in extra: + message = apply_delta(message, extra['message']) + + # HG EXTRA INFORMATION + add_extras = False + extra_message = '' + if not ctx.branch() == 'default': + add_extras = True + extra_message += "branch : " + ctx.branch() + "\n" + + renames = [] + for f in ctx.files(): + if f not in ctx.manifest(): + continue + rename = ctx.filectx(f).renamed() + if rename: + renames.append((rename[0], f)) + + if renames: + add_extras = True + for oldfile, newfile in renames: + extra_message += "rename : " + oldfile + " => " + newfile + "\n" + + for key, value in extra.iteritems(): + if key in ('author', 'committer', 'encoding', 'message', 'branch', 'hg-git'): + continue + else: + add_extras = True + extra_message += "extra : " + key + " : " + urllib.quote(value) + "\n" + + if add_extras: + message += "\n--HG--\n" + extra_message + + return message + + def iterblobs(self, ctx): + for f in ctx: + fctx = ctx[f] + blobid = self.map_git_get(hex(fctx.filenode())) + + if not blobid: + blob = Blob.from_string(fctx.data()) + self.git.object_store.add_object(blob) + self.map_set(blob.id, hex(fctx.filenode())) + blobid = blob.id + + if 'l' in ctx.flags(f): + mode = 0120000 + elif 'x' in ctx.flags(f): + mode = 0100755 + else: + mode = 0100644 + + yield f, blobid, mode + + def import_git_objects(self, remote_name=None, refs=None): + self.ui.status(_("importing Git objects into Hg\n")) + # import heads and fetched tags as remote references + todo = [] + done = set() + convert_list = {} + + # get a list of all the head shas + if refs: + for head, sha in refs.iteritems(): + # refs contains all the refs in the server, not just the ones + # we are pulling + if sha in self.git.object_store: + todo.append(sha) + else: + todo = self.git.refs.values()[:] + + # traverse the heads getting a list of all the unique commits + while todo: + sha = todo.pop() + assert isinstance(sha, str) + if sha in done: + continue + done.add(sha) + obj = self.git.get_object(sha) + if isinstance (obj, Commit): + convert_list[sha] = obj + todo.extend([p for p in obj.parents if p not in done]) + if isinstance(obj, Tag): + (obj_type, obj_sha) = obj.get_object() + obj = self.git.get_object(obj_sha) + if isinstance (obj, Commit): + convert_list[sha] = obj + todo.extend([p for p in obj.parents if p not in done]) + + # sort the commits + commits = toposort.TopoSort(convert_list).items() + + commits = [commit for commit in commits if not commit in self._map_git] + # import each of the commits, oldest first + total = len(commits) + if total: + magnitude = int(math.log(total, 10)) + 1 + else: + magnitude = 1 + for i, csha in enumerate(commits): + if i%100 == 0: + self.ui.status(_("at: %*d/%d\n") % (magnitude, i, total)) + commit = convert_list[csha] + self.import_git_commit(commit) + + def import_git_commit(self, commit): + self.ui.debug(_("importing: %s\n") % commit.id) + # TODO: Do something less coarse-grained than try/except on the + # get_file call for removed files + + (strip_message, hg_renames, hg_branch, extra) = self.extract_hg_metadata(commit.message) + + # get a list of the changed, added, removed files + files = self.get_files_changed(commit) + + date = (commit.author_time, -commit.author_timezone) + text = strip_message + + origtext = text + try: + text.decode('utf-8') + except UnicodeDecodeError: + text = self.decode_guess(text, commit.encoding) + + text = '\n'.join([l.rstrip() for l in text.splitlines()]).strip('\n') + if text + '\n' != origtext: + extra['message'] = create_delta(text +'\n', origtext) + + author = commit.author + + # convert extra data back to the end + if ' ext:' in commit.author: + regex = re.compile('^(.*?)\ ext:\((.*)\) <(.*)\>$') + m = regex.match(commit.author) + if m: + name = m.group(1) + ex = urllib.unquote(m.group(2)) + email = m.group(3) + author = name + ' <' + email + '>' + ex + + if ' ' in commit.author: + author = commit.author[:-12] + + try: + author.decode('utf-8') + except UnicodeDecodeError: + origauthor = author + author = self.decode_guess(author, commit.encoding) + extra['author'] = create_delta(author, origauthor) + + oldenc = self.swap_out_encoding() + + def getfilectx(repo, memctx, f): + try: + (mode, sha, data) = self.get_file(commit, f) + e = self.convert_git_int_mode(mode) + except (TypeError, KeyError): + raise IOError() + if f in hg_renames: + copied_path = hg_renames[f] + else: + copied_path = None + return context.memfilectx(f, data, 'l' in e, 'x' in e, copied_path) + + gparents = map(self.map_hg_get, commit.parents) + p1, p2 = (nullid, nullid) + octopus = False + + if len(gparents) > 1: + # merge, possibly octopus + def commit_octopus(p1, p2): + ctx = context.memctx(self.repo, (p1, p2), text, files, getfilectx, + author, date, {'hg-git': 'octopus'}) + return hex(self.repo.commitctx(ctx)) + + octopus = len(gparents) > 2 + p2 = gparents.pop() + p1 = gparents.pop() + while len(gparents) > 0: + p2 = commit_octopus(p1, p2) + p1 = gparents.pop() + else: + if gparents: + p1 = gparents.pop() + + files = list(set(files)) + + pa = None + if not (p2 == nullid): + node1 = self.repo.changectx(p1) + node2 = self.repo.changectx(p2) + pa = node1.ancestor(node2) + + # if named branch, add to extra + if hg_branch: + extra['branch'] = hg_branch + + # if committer is different than author, add it to extra + if commit.author != commit.committer \ + or commit.author_time != commit.commit_time \ + or commit.author_timezone != commit.commit_timezone: + extra['committer'] = "%s %d %d" % (commit.committer, commit.commit_time, -commit.commit_timezone) + + if commit.encoding: + extra['encoding'] = commit.encoding + + if hg_branch: + extra['branch'] = hg_branch + + if octopus: + extra['hg-git'] ='octopus-done' + + ctx = context.memctx(self.repo, (p1, p2), text, files, getfilectx, + author, date, extra) + + node = self.repo.commitctx(ctx) + + self.swap_out_encoding(oldenc) + + # save changeset to mapping file + cs = hex(node) + self.map_set(commit.id, cs) + + ## PACK UPLOADING AND FETCHING + + def upload_pack(self, remote, revs, force): + client, path = self.get_transport_and_path(remote) + def changed(refs): + to_push = revs or set(self.local_heads().values() + self.tags.values()) + return self.get_changed_refs(refs, to_push, force) + + genpack = self.git.object_store.generate_pack_contents + try: + self.ui.status(_("creating and sending data\n")) + changed_refs = client.send_pack(path, changed, genpack) + return changed_refs + except HangupException: + raise hgutil.Abort("the remote end hung up unexpectedly") + + def get_changed_refs(self, refs, revs, force): + new_refs = refs.copy() + + #The remote repo is empty and the local one doesn't have bookmarks/tags + if refs.keys()[0] == 'capabilities^{}': + del new_refs['capabilities^{}'] + if not self.local_heads(): + tip = hex(self.repo.lookup('tip')) + bookmarks.bookmark(self.ui, self.repo, 'master', tip) + bookmarks.setcurrent(self.repo, 'master') + new_refs['refs/heads/master'] = self.map_git_get(tip) + + for rev in revs: + ctx = self.repo[rev] + heads = [t for t in ctx.tags() if t in self.local_heads()] + tags = [t for t in ctx.tags() if t in self.tags] + + if not (heads or tags): + raise hgutil.Abort("revision %s cannot be pushed since" + " it doesn't have a ref" % ctx) + + for r in heads + tags: + if r in heads: + ref = 'refs/heads/'+r + else: + ref = 'refs/tags/'+r + + if ref not in refs: + new_refs[ref] = self.map_git_get(ctx.hex()) + elif new_refs[ref] in self._map_git: + rctx = self.repo[self.map_hg_get(new_refs[ref])] + if rctx.ancestor(ctx) == rctx or force: + new_refs[ref] = self.map_git_get(ctx.hex()) + else: + raise hgutil.Abort("pushing %s overwrites %s" + % (ref, ctx)) + else: + raise hgutil.Abort("%s changed on the server, please pull " + "and merge before pushing" % ref) + + return new_refs + + + def fetch_pack(self, remote_name, heads): + client, path = self.get_transport_and_path(remote_name) + graphwalker = self.git.get_graph_walker() + def determine_wants(refs): + if heads: + want = [] + for h in heads: + r = [ref for ref in refs if ref.endswith('/'+h)] + if not r: + raise hgutil.Abort("ref %s not found on remote server") + elif len(r) == 1: + want.append(refs[r[0]]) + else: + raise hgutil.Abort("ambiguous reference %s: %r"%(h, r)) + else: + want = [sha for ref, sha in refs.iteritems() + if not ref.endswith('^{}')] + return want + f, commit = self.git.object_store.add_pack() + try: + return client.fetch_pack(path, determine_wants, graphwalker, f.write, self.ui.status) + except HangupException: + raise hgutil.Abort("the remote end hung up unexpectedly") + finally: + commit() + + ## REFERENCES HANDLING + + def update_references(self): + heads = self.local_heads() + + # Create a local Git branch name for each + # Mercurial bookmark. + for key in heads: + self.git.refs['refs/heads/' + key] = self.map_git_get(heads[key]) + + def export_hg_tags(self): + for tag, sha in self.repo.tags().iteritems(): + if self.repo.tagtype(tag) in ('global', 'git'): + self.git.refs['refs/tags/' + tag] = self.map_git_get(hex(sha)) + self.tags[tag] = hex(sha) + + def local_heads(self): + try: + bms = bookmarks.parse(self.repo) + return dict([(bm, hex(bms[bm])) for bm in bms]) + except AttributeError: #pragma: no cover + return {} + + def import_tags(self, refs): + keys = refs.keys() + if not keys: + return + for k in keys[:]: + ref_name = k + parts = k.split('/') + if parts[0] == 'refs' and parts[1] == 'tags': + ref_name = "/".join([v for v in parts[2:]]) + # refs contains all the refs in the server, not just + # the ones we are pulling + if refs[k] not in self.git.object_store: + continue + if ref_name[-3:] == '^{}': + ref_name = ref_name[:-3] + if not ref_name in self.repo.tags(): + obj = self.git.get_object(refs[k]) + sha = None + if isinstance (obj, Commit): # lightweight + sha = self.map_hg_get(refs[k]) + self.tags[ref_name] = sha + elif isinstance (obj, Tag): # annotated + (obj_type, obj_sha) = obj.get_object() + obj = self.git.get_object(obj_sha) + if isinstance (obj, Commit): + sha = self.map_hg_get(obj_sha) + # TODO: better handling for annotated tags + self.tags[ref_name] = sha + self.save_tags() + + def update_hg_bookmarks(self, refs): + try: + bms = bookmarks.parse(self.repo) + heads = dict([(ref[11:],refs[ref]) for ref in refs + if ref.startswith('refs/heads/')]) + + for head, sha in heads.iteritems(): + # refs contains all the refs in the server, not just + # the ones we are pulling + if sha not in self.git.object_store: + continue + hgsha = bin(self.map_hg_get(sha)) + if not head in bms: + # new branch + bms[head] = hgsha + else: + bm = self.repo[bms[head]] + if bm.ancestor(self.repo[hgsha]) == bm: + # fast forward + bms[head] = hgsha + if heads: + bookmarks.write(self.repo, bms) + + except AttributeError: + self.ui.warn(_('creating bookmarks failed, do you have' + ' bookmarks enabled?\n')) + + def update_remote_branches(self, remote_name, refs): + heads = dict([(ref[11:],refs[ref]) for ref in refs + if ref.startswith('refs/heads/')]) + + for head, sha in heads.iteritems(): + # refs contains all the refs in the server, not just the ones + # we are pulling + if sha not in self.git.object_store: + continue + hgsha = bin(self.map_hg_get(sha)) + tag = '%s/%s' % (remote_name, head) + self.repo.tag(tag, hgsha, '', True, None, None) + + for ref_name in refs: + if ref_name.startswith('refs/heads'): + new_ref = 'refs/remotes/%s/%s' % (remote_name, ref_name[10:]) + self.git.refs[new_ref] = refs[ref_name] + elif ref_name.startswith('refs/tags'): + self.git.refs[ref_name] = refs[ref_name] + + + ## UTILITY FUNCTIONS + + def convert_git_int_mode(self, mode): + # TODO: make these into constants + convert = { + 0100644: '', + 0100755: 'x', + 0120000: 'l'} + if mode in convert: + return convert[mode] + return '' + + def extract_hg_metadata(self, message): + split = message.split("\n--HG--\n", 1) + renames = {} + extra = {} + branch = False + if len(split) == 2: + message, meta = split + lines = meta.split("\n") + for line in lines: + if line == '': + continue + + command, data = line.split(" : ", 1) + + if command == 'rename': + before, after = data.split(" => ", 1) + renames[after] = before + if command == 'branch': + branch = data + if command == 'extra': + before, after = data.split(" : ", 1) + extra[before] = urllib.unquote(after) + return (message, renames, branch, extra) + + def get_file(self, commit, f): + otree = self.git.tree(commit.tree) + parts = f.split('/') + for part in parts: + (mode, sha) = otree[part] + obj = self.git.get_object(sha) + if isinstance (obj, Blob): + return (mode, sha, obj._text) + elif isinstance(obj, Tree): + otree = obj + + def get_files_changed(self, commit): + def filenames(basetree, comptree, prefix): + basefiles = set() + changes = list() + csha = None + cmode = None + if basetree is not None: + for (bmode, bname, bsha) in basetree.entries(): + if bmode == 0160000: # TODO: properly handle submodules + continue + basefiles.add(bname) + bobj = self.git.get_object(bsha) + if comptree is not None: + if bname in comptree: + (cmode, csha) = comptree[bname] + else: + (cmode, csha) = (None, None) + if not ((csha == bsha) and (cmode == bmode)): + if isinstance (bobj, Blob): + changes.append (prefix + bname) + elif isinstance(bobj, Tree): + ctree = None + if csha: + ctree = self.git.get_object(csha) + changes.extend(filenames(bobj, + ctree, + prefix + bname + '/')) + + # handle removals + if comptree is not None: + for (bmode, bname, bsha) in comptree.entries(): + if bmode == 0160000: # TODO: handle submodles + continue + if bname not in basefiles: + bobj = self.git.get_object(bsha) + if isinstance(bobj, Blob): + changes.append(prefix + bname) + elif isinstance(bobj, Tree): + changes.extend(filenames(None, bobj, + prefix + bname + '/')) + return changes + + all_changes = list() + otree = self.git.tree(commit.tree) + if len(commit.parents) == 0: + all_changes = filenames(otree, None, '') + for parent in commit.parents: + pcommit = self.git.commit(parent) + ptree = self.git.tree(pcommit.tree) + all_changes.extend(filenames(otree, ptree, '')) + + return all_changes + + def remote_name(self, remote): + names = [name for name, path in self.paths if path == remote] + if names: + return names[0] + + # Stolen from hgsubversion + def swap_out_encoding(self, new_encoding='UTF-8'): + try: + from mercurial import encoding + old = encoding.encoding + encoding.encoding = new_encoding + except ImportError: + old = hgutil._encoding + hgutil._encoding = new_encoding + return old + + def decode_guess(self, string, encoding): + # text is not valid utf-8, try to make sense of it + if encoding: + try: + return string.decode(encoding).encode('utf-8') + except UnicodeDecodeError: + pass + + try: + return string.decode('latin-1').encode('utf-8') + except UnicodeDecodeError: + return string.decode('ascii', 'replace').encode('utf-8') + + def get_transport_and_path(self, uri): + from dulwich.client import TCPGitClient, SSHGitClient, SubprocessGitClient + for handler, transport in (("git://", TCPGitClient), ("git@", SSHGitClient), ("git+ssh://", SSHGitClient)): + if uri.startswith(handler): + host, path = uri[len(handler):].split("/", 1) + return transport(host, thin_packs=False), '/' + path + # if its not git or git+ssh, try a local url.. + return SubprocessGitClient(thin_packs=False), uri diff -r 3e0eb85a83a7 -r 505d7cdca198 hggit/gitrepo.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hggit/gitrepo.py Wed Sep 30 14:39:49 2009 -0500 @@ -0,0 +1,14 @@ +from mercurial import repo, util +from git_handler import GitHandler + +class gitrepo(repo.repository): + capabilities = ['lookup'] + def __init__(self, ui, path, create): + if create: # pragma: no cover + raise util.Abort('Cannot create a git repository.') + self.path = path + def lookup(self, key): + if isinstance(key, str): + return key + +instance = gitrepo diff -r 3e0eb85a83a7 -r 505d7cdca198 hggit/hgrepo.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hggit/hgrepo.py Wed Sep 30 14:39:49 2009 -0500 @@ -0,0 +1,64 @@ +from mercurial.node import bin + +from git_handler import GitHandler +from gitrepo import gitrepo + + +def generate_repo_subclass(baseclass): + class hgrepo(baseclass): + def pull(self, remote, heads=None, force=False): + if isinstance(remote, gitrepo): + git = GitHandler(self, self.ui) + git.fetch(remote.path, heads) + else: #pragma: no cover + return super(hgrepo, self).pull(remote, heads, force) + + def push(self, remote, force=False, revs=None): + if isinstance(remote, gitrepo): + git = GitHandler(self, self.ui) + git.push(remote.path, revs, force) + else: #pragma: no cover + return super(hgrepo, self).push(remote, force, revs) + + def findoutgoing(self, remote, base=None, heads=None, force=False): + if isinstance(remote, gitrepo): + git = GitHandler(self, self.ui) + base, heads = git.get_refs(remote.path) + out, h = super(hgrepo, self).findoutgoing(remote, base, heads, force) + return out + else: #pragma: no cover + return super(hgrepo, self).findoutgoing(remote, base, heads, force) + + def _findtags(self): + (tags, tagtypes) = super(hgrepo, self)._findtags() + + git = GitHandler(self, self.ui) + for tag, rev in git.tags.iteritems(): + if tag in tags: + continue + + tags[tag] = bin(rev) + tagtypes[tag] = 'git' + + return (tags, tagtypes) + + def tags(self): + if not hasattr(self, 'tagscache'): + # mercurial 1.4 + return super(hgrepo, self).tags() + + if self.tagscache: + return self.tagscache + + git = GitHandler(self, self.ui) + tagscache = super(hgrepo, self).tags() + for tag, rev in git.tags.iteritems(): + if tag in tagscache: + continue + + tagscache[tag] = bin(rev) + self._tagstypecache[tag] = 'git' + + return tagscache + + return hgrepo diff -r 3e0eb85a83a7 -r 505d7cdca198 hggit/toposort.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hggit/toposort.py Wed Sep 30 14:39:49 2009 -0500 @@ -0,0 +1,159 @@ +'' +""" + Tarjan's algorithm and topological sorting implementation in Python + by Paul Harrison + Public domain, do with it as you will +""" +class TopoSort(object): + + def __init__(self, commitdict): + self._sorted = self.robust_topological_sort(commitdict) + self._shas = [] + for level in self._sorted: + for sha in level: + self._shas.append(sha) + + def items(self): + self._shas.reverse() + return self._shas + + def strongly_connected_components(self, graph): + """ Find the strongly connected components in a graph using + Tarjan's algorithm. + + graph should be a dictionary mapping node names to + lists of successor nodes. + """ + + result = [ ] + stack = [ ] + low = { } + + def visit(node): + if node in low: return + + num = len(low) + low[node] = num + stack_pos = len(stack) + stack.append(node) + + for successor in graph[node].parents: + visit(successor) + low[node] = min(low[node], low[successor]) + + if num == low[node]: + component = tuple(stack[stack_pos:]) + del stack[stack_pos:] + result.append(component) + for item in component: + low[item] = len(graph) + + for node in graph: + visit(node) + + return result + + def strongly_connected_components_non(self, G): + """Returns a list of strongly connected components in G. + + Uses Tarjan's algorithm with Nuutila's modifications. + Nonrecursive version of algorithm. + + References: + + R. Tarjan (1972). Depth-first search and linear graph algorithms. + SIAM Journal of Computing 1(2):146-160. + + E. Nuutila and E. Soisalon-Soinen (1994). + On finding the strongly connected components in a directed graph. + Information Processing Letters 49(1): 9-14. + + """ + preorder={} + lowlink={} + scc_found={} + scc_queue = [] + scc_list=[] + i=0 # Preorder counter + for source in G: + if source not in scc_found: + queue=[source] + while queue: + v=queue[-1] + if v not in preorder: + i=i+1 + preorder[v]=i + done=1 + v_nbrs=G[v] + for w in v_nbrs.parents: + if w not in preorder: + queue.append(w) + done=0 + break + if done==1: + lowlink[v]=preorder[v] + for w in v_nbrs.parents: + if w not in scc_found: + if preorder[w]>preorder[v]: + lowlink[v]=min([lowlink[v],lowlink[w]]) + else: + lowlink[v]=min([lowlink[v],preorder[w]]) + queue.pop() + if lowlink[v]==preorder[v]: + scc_found[v]=True + scc=(v,) + while scc_queue and preorder[scc_queue[-1]]>preorder[v]: + k=scc_queue.pop() + scc_found[k]=True + scc.append(k) + scc_list.append(scc) + else: + scc_queue.append(v) + scc_list.sort(lambda x, y: cmp(len(y),len(x))) + return scc_list + + def topological_sort(self, graph): + count = { } + for node in graph: + count[node] = 0 + for node in graph: + for successor in graph[node]: + count[successor] += 1 + + ready = [ node for node in graph if count[node] == 0 ] + + result = [ ] + while ready: + node = ready.pop(-1) + result.append(node) + + for successor in graph[node]: + count[successor] -= 1 + if count[successor] == 0: + ready.append(successor) + + return result + + def robust_topological_sort(self, graph): + """ First identify strongly connected components, + then perform a topological sort on these components. """ + + components = self.strongly_connected_components_non(graph) + + node_component = { } + for component in components: + for node in component: + node_component[node] = component + + component_graph = { } + for component in components: + component_graph[component] = [ ] + + for node in graph: + node_c = node_component[node] + for successor in graph[node].parents: + successor_c = node_component[successor] + if node_c != successor_c: + component_graph[node_c].append(successor_c) + + return self.topological_sort(component_graph) diff -r 3e0eb85a83a7 -r 505d7cdca198 hgrepo.py --- a/hgrepo.py Fri Sep 25 22:44:05 2009 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,64 +0,0 @@ -from mercurial.node import bin - -from git_handler import GitHandler -from gitrepo import gitrepo - - -def generate_repo_subclass(baseclass): - class hgrepo(baseclass): - def pull(self, remote, heads=None, force=False): - if isinstance(remote, gitrepo): - git = GitHandler(self, self.ui) - git.fetch(remote.path, heads) - else: #pragma: no cover - return super(hgrepo, self).pull(remote, heads, force) - - def push(self, remote, force=False, revs=None): - if isinstance(remote, gitrepo): - git = GitHandler(self, self.ui) - git.push(remote.path, revs, force) - else: #pragma: no cover - return super(hgrepo, self).push(remote, force, revs) - - def findoutgoing(self, remote, base=None, heads=None, force=False): - if isinstance(remote, gitrepo): - git = GitHandler(self, self.ui) - base, heads = git.get_refs(remote.path) - out, h = super(hgrepo, self).findoutgoing(remote, base, heads, force) - return out - else: #pragma: no cover - return super(hgrepo, self).findoutgoing(remote, base, heads, force) - - def _findtags(self): - (tags, tagtypes) = super(hgrepo, self)._findtags() - - git = GitHandler(self, self.ui) - for tag, rev in git.tags.iteritems(): - if tag in tags: - continue - - tags[tag] = bin(rev) - tagtypes[tag] = 'git' - - return (tags, tagtypes) - - def tags(self): - if not hasattr(self, 'tagscache'): - # mercurial 1.4 - return super(hgrepo, self).tags() - - if self.tagscache: - return self.tagscache - - git = GitHandler(self, self.ui) - tagscache = super(hgrepo, self).tags() - for tag, rev in git.tags.iteritems(): - if tag in tagscache: - continue - - tagscache[tag] = bin(rev) - self._tagstypecache[tag] = 'git' - - return tagscache - - return hgrepo diff -r 3e0eb85a83a7 -r 505d7cdca198 setup.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/setup.py Wed Sep 30 14:39:49 2009 -0500 @@ -0,0 +1,23 @@ +try: + from setuptools import setup +except: + from distutils.core import setup + +setup( + name='hg-git', + version='0.1.0', + author='Scott Chacon', + maintainer='Augie Fackler', + maintainer_email='durin42@gmail.com', + url='http://hg-git.github.com/', + description='push and pull from a Git server using Mercurial', + long_description=""" +This extension lets you communicate (push and pull) with a Git server. +This way you can use Git hosting for your project or collaborate with a +project that is in Git. A bridger of worlds, this plugin be. + """.strip(), + keywords='hg git mercurial', + license='GPLv2', + packages=['hggit'], + install_requires=['dulwich>=0.4.0'], +) diff -r 3e0eb85a83a7 -r 505d7cdca198 toposort.py --- a/toposort.py Fri Sep 25 22:44:05 2009 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,159 +0,0 @@ -'' -""" - Tarjan's algorithm and topological sorting implementation in Python - by Paul Harrison - Public domain, do with it as you will -""" -class TopoSort(object): - - def __init__(self, commitdict): - self._sorted = self.robust_topological_sort(commitdict) - self._shas = [] - for level in self._sorted: - for sha in level: - self._shas.append(sha) - - def items(self): - self._shas.reverse() - return self._shas - - def strongly_connected_components(self, graph): - """ Find the strongly connected components in a graph using - Tarjan's algorithm. - - graph should be a dictionary mapping node names to - lists of successor nodes. - """ - - result = [ ] - stack = [ ] - low = { } - - def visit(node): - if node in low: return - - num = len(low) - low[node] = num - stack_pos = len(stack) - stack.append(node) - - for successor in graph[node].parents: - visit(successor) - low[node] = min(low[node], low[successor]) - - if num == low[node]: - component = tuple(stack[stack_pos:]) - del stack[stack_pos:] - result.append(component) - for item in component: - low[item] = len(graph) - - for node in graph: - visit(node) - - return result - - def strongly_connected_components_non(self, G): - """Returns a list of strongly connected components in G. - - Uses Tarjan's algorithm with Nuutila's modifications. - Nonrecursive version of algorithm. - - References: - - R. Tarjan (1972). Depth-first search and linear graph algorithms. - SIAM Journal of Computing 1(2):146-160. - - E. Nuutila and E. Soisalon-Soinen (1994). - On finding the strongly connected components in a directed graph. - Information Processing Letters 49(1): 9-14. - - """ - preorder={} - lowlink={} - scc_found={} - scc_queue = [] - scc_list=[] - i=0 # Preorder counter - for source in G: - if source not in scc_found: - queue=[source] - while queue: - v=queue[-1] - if v not in preorder: - i=i+1 - preorder[v]=i - done=1 - v_nbrs=G[v] - for w in v_nbrs.parents: - if w not in preorder: - queue.append(w) - done=0 - break - if done==1: - lowlink[v]=preorder[v] - for w in v_nbrs.parents: - if w not in scc_found: - if preorder[w]>preorder[v]: - lowlink[v]=min([lowlink[v],lowlink[w]]) - else: - lowlink[v]=min([lowlink[v],preorder[w]]) - queue.pop() - if lowlink[v]==preorder[v]: - scc_found[v]=True - scc=(v,) - while scc_queue and preorder[scc_queue[-1]]>preorder[v]: - k=scc_queue.pop() - scc_found[k]=True - scc.append(k) - scc_list.append(scc) - else: - scc_queue.append(v) - scc_list.sort(lambda x, y: cmp(len(y),len(x))) - return scc_list - - def topological_sort(self, graph): - count = { } - for node in graph: - count[node] = 0 - for node in graph: - for successor in graph[node]: - count[successor] += 1 - - ready = [ node for node in graph if count[node] == 0 ] - - result = [ ] - while ready: - node = ready.pop(-1) - result.append(node) - - for successor in graph[node]: - count[successor] -= 1 - if count[successor] == 0: - ready.append(successor) - - return result - - def robust_topological_sort(self, graph): - """ First identify strongly connected components, - then perform a topological sort on these components. """ - - components = self.strongly_connected_components_non(graph) - - node_component = { } - for component in components: - for node in component: - node_component[node] = component - - component_graph = { } - for component in components: - component_graph[component] = [ ] - - for node in graph: - node_c = node_component[node] - for successor in graph[node].parents: - successor_c = node_component[successor] - if node_c != successor_c: - component_graph[node_c].append(successor_c) - - return self.topological_sort(component_graph)