diff hggit/verify.py @ 693:9b194d7c9c03

verify: add new command to verify the contents of a Mercurial rev Since the Git to Mercurial conversion process is incremental, it's at risk of missing files, or recording files the wrong way, or recording the wrong commit metadata. Add a command called 'gverify' that can verify the contents of a particular Mercurial rev against the corresponding Git commit. Currently, this is limited to checking file names, flags and contents, but this can be made as robust as desired. Further additions will probably require refactoring git_handler.py a bit though. This function is pretty fast: on a Linux machine with a warm cache, verifying a repository with around 50,000 files takes just 20 seconds. There is scope for further improvement through parallelization, but conducting tree walks in parallel is non-trivial with the current worker infrastructure in Mercurial.
author Siddharth Agarwal <sid0@fb.com>
date Wed, 26 Feb 2014 14:19:24 -0800
parents
children 1e3e4ff9a25a
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hggit/verify.py	Wed Feb 26 14:19:24 2014 -0800
@@ -0,0 +1,103 @@
+# verify.py - verify Mercurial revisions
+#
+# Copyright 2014 Facebook.
+#
+# This software may be used and distributed according to the terms
+# of the GNU General Public License, incorporated herein by reference.
+
+import stat
+
+from mercurial import command
+from mercurial import error
+from mercurial import util as hgutil
+from mercurial.node import hex, bin, nullid
+from mercurial.i18n import _
+from mercurial import scmutil
+
+from dulwich import diff_tree
+from dulwich.objects import Commit, S_IFGITLINK
+
+def verify(ui, repo, **opts):
+    '''verify that a Mercurial rev matches the corresponding Git rev
+
+    Given a Mercurial revision that has a corresponding Git revision in the map,
+    this attempts to answer whether that revision has the same contents as the
+    corresponding Git revision.
+
+    '''
+    hgctx = scmutil.revsingle(repo, opts.get('rev'), '.')
+
+    handler = repo.githandler
+
+    gitsha = handler.map_git_get(hgctx.hex())
+    if not gitsha:
+        # TODO deal better with commits in the middle of octopus merges
+        raise hgutil.Abort(_('no git commit found for rev %s') % hgctx,
+                           hint=_('if this is an octopus merge, verify against the last rev'))
+
+    try:
+        gitcommit = handler.git.get_object(gitsha)
+    except KeyError:
+        raise hgutil.Abort(_('git equivalent %s for rev %s not found!') %
+                           (gitsha, hgctx))
+    if not isinstance(gitcommit, Commit):
+        raise hgutil.Abort(_('git equivalent %s for rev %s is not a commit!') %
+                           (gitsha, hgctx))
+
+    ui.status(_('verifying rev %s against git commit %s\n') % (hgctx, gitsha))
+    failed = False
+
+    # TODO check commit message and other metadata
+
+    dirkind = stat.S_IFDIR
+
+    hgfiles = set(hgctx)
+    # TODO deal with submodules
+    hgfiles.discard('.hgsubstate')
+    hgfiles.discard('.hgsub')
+    gitfiles = set()
+
+    i = 0
+    for gitfile, dummy in diff_tree.walk_trees(handler.git.object_store,
+                                               gitcommit.tree, None):
+        if gitfile.mode == dirkind:
+            continue
+        # TODO deal with submodules
+        if (gitfile.mode == S_IFGITLINK or gitfile.path == '.hgsubstate'
+            or gitfile.path == '.hgsub'):
+            continue
+        ui.progress('verify', i, total=len(hgfiles))
+        i += 1
+        gitfiles.add(gitfile.path)
+
+        try:
+            fctx = hgctx[gitfile.path]
+        except error.LookupError:
+            # we'll deal with this at the end
+            continue
+
+        hgflags = fctx.flags()
+        gitflags = handler.convert_git_int_mode(gitfile.mode)
+        if hgflags != gitflags:
+            ui.write(_("file has different flags: %s (hg '%s', git '%s')\n") %
+                     (gitfile.path, hgflags, gitflags))
+            failed = True
+        if fctx.data() != handler.git[gitfile.sha].data:
+            ui.write(_('difference in: %s\n') % gitfile.path)
+            failed = True
+
+    ui.progress('verify', None, total=len(hgfiles))
+
+    if hgfiles != gitfiles:
+        failed = True
+        missing = gitfiles - hgfiles
+        for f in sorted(missing):
+            ui.write(_('file found in git but not hg: %s\n') % f)
+        unexpected = hgfiles - gitfiles
+        for f in sorted(unexpected):
+            ui.write(_('file found in hg but not git: %s\n') % f)
+
+    if failed:
+        return 1
+    else:
+        return 0