changeset 693:9b194d7c9c03

verify: add new command to verify the contents of a Mercurial rev Since the Git to Mercurial conversion process is incremental, it's at risk of missing files, or recording files the wrong way, or recording the wrong commit metadata. Add a command called 'gverify' that can verify the contents of a particular Mercurial rev against the corresponding Git commit. Currently, this is limited to checking file names, flags and contents, but this can be made as robust as desired. Further additions will probably require refactoring git_handler.py a bit though. This function is pretty fast: on a Linux machine with a warm cache, verifying a repository with around 50,000 files takes just 20 seconds. There is scope for further improvement through parallelization, but conducting tree walks in parallel is non-trivial with the current worker infrastructure in Mercurial.
author Siddharth Agarwal <sid0@fb.com>
date Wed, 26 Feb 2014 14:19:24 -0800
parents cf3bb80a666e
children 1e3e4ff9a25a
files hggit/__init__.py hggit/verify.py tests/test-convergedmerge.t tests/test-empty-working-tree.t tests/test-git-clone.t tests/test-git-submodules.t tests/test-octopus.t tests/test-verify-fail.t
diffstat 8 files changed, 196 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/hggit/__init__.py	Tue Feb 25 20:01:42 2014 -0800
+++ b/hggit/__init__.py	Wed Feb 26 14:19:24 2014 -0800
@@ -43,6 +43,7 @@
 
 import gitrepo, hgrepo
 from git_handler import GitHandler
+import verify
 
 testedwith = '2.0.2 2.1.2 2.2.3 2.8.1'
 buglink = 'https://bitbucket.org/durin42/hg-git/issues'
@@ -223,5 +224,7 @@
   "gclear":
       (gclear, [], _('Clears out the Git cached data')),
   "git-cleanup": (git_cleanup, [], _(
-        "Cleans up git repository after history editing"))
+        "Cleans up git repository after history editing")),
+  "gverify": (verify.verify,
+    [('r', 'rev', '', _('revision to verify'), _('REV'))], _('[-r REV]')),
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hggit/verify.py	Wed Feb 26 14:19:24 2014 -0800
@@ -0,0 +1,103 @@
+# verify.py - verify Mercurial revisions
+#
+# Copyright 2014 Facebook.
+#
+# This software may be used and distributed according to the terms
+# of the GNU General Public License, incorporated herein by reference.
+
+import stat
+
+from mercurial import command
+from mercurial import error
+from mercurial import util as hgutil
+from mercurial.node import hex, bin, nullid
+from mercurial.i18n import _
+from mercurial import scmutil
+
+from dulwich import diff_tree
+from dulwich.objects import Commit, S_IFGITLINK
+
+def verify(ui, repo, **opts):
+    '''verify that a Mercurial rev matches the corresponding Git rev
+
+    Given a Mercurial revision that has a corresponding Git revision in the map,
+    this attempts to answer whether that revision has the same contents as the
+    corresponding Git revision.
+
+    '''
+    hgctx = scmutil.revsingle(repo, opts.get('rev'), '.')
+
+    handler = repo.githandler
+
+    gitsha = handler.map_git_get(hgctx.hex())
+    if not gitsha:
+        # TODO deal better with commits in the middle of octopus merges
+        raise hgutil.Abort(_('no git commit found for rev %s') % hgctx,
+                           hint=_('if this is an octopus merge, verify against the last rev'))
+
+    try:
+        gitcommit = handler.git.get_object(gitsha)
+    except KeyError:
+        raise hgutil.Abort(_('git equivalent %s for rev %s not found!') %
+                           (gitsha, hgctx))
+    if not isinstance(gitcommit, Commit):
+        raise hgutil.Abort(_('git equivalent %s for rev %s is not a commit!') %
+                           (gitsha, hgctx))
+
+    ui.status(_('verifying rev %s against git commit %s\n') % (hgctx, gitsha))
+    failed = False
+
+    # TODO check commit message and other metadata
+
+    dirkind = stat.S_IFDIR
+
+    hgfiles = set(hgctx)
+    # TODO deal with submodules
+    hgfiles.discard('.hgsubstate')
+    hgfiles.discard('.hgsub')
+    gitfiles = set()
+
+    i = 0
+    for gitfile, dummy in diff_tree.walk_trees(handler.git.object_store,
+                                               gitcommit.tree, None):
+        if gitfile.mode == dirkind:
+            continue
+        # TODO deal with submodules
+        if (gitfile.mode == S_IFGITLINK or gitfile.path == '.hgsubstate'
+            or gitfile.path == '.hgsub'):
+            continue
+        ui.progress('verify', i, total=len(hgfiles))
+        i += 1
+        gitfiles.add(gitfile.path)
+
+        try:
+            fctx = hgctx[gitfile.path]
+        except error.LookupError:
+            # we'll deal with this at the end
+            continue
+
+        hgflags = fctx.flags()
+        gitflags = handler.convert_git_int_mode(gitfile.mode)
+        if hgflags != gitflags:
+            ui.write(_("file has different flags: %s (hg '%s', git '%s')\n") %
+                     (gitfile.path, hgflags, gitflags))
+            failed = True
+        if fctx.data() != handler.git[gitfile.sha].data:
+            ui.write(_('difference in: %s\n') % gitfile.path)
+            failed = True
+
+    ui.progress('verify', None, total=len(hgfiles))
+
+    if hgfiles != gitfiles:
+        failed = True
+        missing = gitfiles - hgfiles
+        for f in sorted(missing):
+            ui.write(_('file found in git but not hg: %s\n') % f)
+        unexpected = hgfiles - gitfiles
+        for f in sorted(unexpected):
+            ui.write(_('file found in hg but not git: %s\n') % f)
+
+    if failed:
+        return 1
+    else:
+        return 0
--- a/tests/test-convergedmerge.t	Tue Feb 25 20:01:42 2014 -0800
+++ b/tests/test-convergedmerge.t	Wed Feb 26 14:19:24 2014 -0800
@@ -75,3 +75,5 @@
   o  0   5d1a6b64f9d0   1970-01-01 00:00 +0000   test
        origin
   
+  $ hg -R hgrepo2 gverify
+  verifying rev eaa21d002113 against git commit efe74cc1e0ede609e3ab5983e61d780a10177ef3
--- a/tests/test-empty-working-tree.t	Tue Feb 25 20:01:42 2014 -0800
+++ b/tests/test-empty-working-tree.t	Wed Feb 26 14:19:24 2014 -0800
@@ -16,6 +16,8 @@
   $ cd hgrepo
   $ hg log -r tip --template 'files: {files}\n'
   files: 
+  $ hg gverify
+  verifying rev 2c7bb41124ca against git commit 678256865a8c85ae925bf834369264193c88f8de
 
   $ hg gclear
   clearing out the git cache data
--- a/tests/test-git-clone.t	Tue Feb 25 20:01:42 2014 -0800
+++ b/tests/test-git-clone.t	Wed Feb 26 14:19:24 2014 -0800
@@ -33,3 +33,5 @@
 we should have some bookmarks
   $ hg -R hgrepo book
    * master                    1:7bcd915dc873
+  $ hg -R hgrepo gverify
+  verifying rev 7bcd915dc873 against git commit 9497a4ee62e16ee641860d7677cdb2589ea15554
--- a/tests/test-git-submodules.t	Tue Feb 25 20:01:42 2014 -0800
+++ b/tests/test-git-submodules.t	Wed Feb 26 14:19:24 2014 -0800
@@ -161,12 +161,16 @@
   6e4ad8da50204560c00fa25e4987eb2e239029ba subrepo
   $ hg cat -r 1 .hgsub
   subrepo = [git]../gitsubrepo
+  $ hg gverify -r 1
+  verifying rev 2f69b1b8a6f8 against git commit e42b08b3cb7069b4594a4ee1d9cb641ee47b2355
 
 (change subrepo commit)
   $ hg cat -r 2 .hgsubstate
   aa2ead20c29b5cc6256408e1d9ef704870033afb subrepo
   $ hg cat -r 2 .hgsub
   subrepo = [git]../gitsubrepo
+  $ hg gverify -r 2
+  verifying rev 914937cccdbe against git commit a000567ceefbd9a2ce364e0dea6e298010b02b6d
 
 (add another subrepo)
   $ hg cat -r 3 .hgsubstate
@@ -175,6 +179,8 @@
   $ hg cat -r 3 .hgsub
   subrepo = [git]../gitsubrepo
   subrepo2 = [git]../gitsubrepo
+  $ hg gverify -r 3
+  verifying rev 6264517ddb98 against git commit 6e219527869fa40eb6ffbdd013cd86d576b26b01
 
 (replace subrepo with file)
   $ hg cat -r 4 .hgsubstate
@@ -183,6 +189,8 @@
   subrepo2 = [git]../gitsubrepo
   $ hg cat -r 4 subrepo
   subrepo
+  $ hg gverify -r 4
+  verifying rev e233b0858578 against git commit f6436a472da00f581d8d257e9bbaf3c358a5e88c
 
 (replace file with subrepo)
   $ hg cat -r 5 .hgsubstate
@@ -194,12 +202,16 @@
   $ hg cat -r 5 alpha
   alpha: no such file in rev 97f89374a0ce
   [1]
+  $ hg gverify -r 5
+  verifying rev 97f89374a0ce against git commit 88171163bf4795b5570924e51d5f8ede33f8bc28
 
 (remove all subrepos)
   $ hg cat -r 6 .hgsub .hgsubstate
   .hgsub: no such file in rev 827c0345b7d1
   .hgsubstate: no such file in rev 827c0345b7d1
   [1]
+  $ hg gverify -r 6
+  verifying rev 827c0345b7d1 against git commit d3c472800f9d11baa6615971a3179fd441869173
 
   $ hg gclear
   clearing out the git cache data
--- a/tests/test-octopus.t	Tue Feb 25 20:01:42 2014 -0800
+++ b/tests/test-octopus.t	Wed Feb 26 14:19:24 2014 -0800
@@ -97,6 +97,12 @@
   o  0   3442585be8a6   2007-01-01 00:00 +0000   test
        add alpha
   
+  $ hg gverify -r 9
+  verifying rev 9c1d2aac0643 against git commit b32ff845df61df998206b630e4370a44f9b36845
+  $ hg gverify -r 8
+  abort: no git commit found for rev c5ea839ce0aa
+  (if this is an octopus merge, verify against the last rev)
+  [255]
 
   $ hg gclear
   clearing out the git cache data
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-verify-fail.t	Wed Feb 26 14:19:24 2014 -0800
@@ -0,0 +1,65 @@
+Other tests make sure that gverify passes. This makes sure that gverify detects
+inconsistencies. Since hg-git is ostensibly correct, we artificially create
+inconsistencies by placing different Mercurial and Git repos in the right spots.
+
+  $ . "$TESTDIR/testutil"
+  $ git init gitrepo
+  Initialized empty Git repository in $TESTTMP/gitrepo/.git/
+  $ cd gitrepo
+  $ echo normalf > normalf
+  $ echo missingf > missingf
+  $ echo differentf > differentf
+(executable in git, non-executable in hg)
+  $ echo exef > exef
+  $ chmod +x exef
+(symlink in hg, regular file in git)
+equivalent to 'echo -n foo > linkf', but that doesn't work on OS X
+  $ printf foo > linkf
+  $ git add normalf missingf differentf exef linkf
+  $ fn_git_commit -m 'add files'
+  $ cd ..
+
+  $ hg init hgrepo
+  $ cd hgrepo
+  $ echo normalf > normalf
+  $ echo differentf2 > differentf
+  $ echo unexpectedf > unexpectedf
+  $ echo exef > exef
+  $ ln -s foo linkf
+  $ hg add normalf differentf unexpectedf exef linkf
+  $ fn_hg_commit -m 'add files'
+  $ git clone --mirror ../gitrepo .hg/git
+  Cloning into bare repository '.hg/git'...
+  done.
+  $ echo "$(cd ../gitrepo && git rev-parse HEAD) $(hg log -r . --template '{node}')" >> .hg/git-mapfile
+  $ hg gverify
+  verifying rev 3f1601c3cf54 against git commit 039c1cd9fdda382c9d1e8ec85de6b5b59518ca80
+  difference in: differentf
+  file has different flags: exef (hg '', git 'x')
+  file has different flags: linkf (hg 'l', git '')
+  file found in git but not hg: missingf
+  file found in hg but not git: unexpectedf
+  [1]
+
+  $ echo newf > newf
+  $ hg add newf
+  $ fn_hg_commit -m 'new hg commit'
+  $ hg gverify
+  abort: no git commit found for rev 4e582b4eb862
+  (if this is an octopus merge, verify against the last rev)
+  [255]
+
+invalid git SHA
+  $ echo "ffffffffffffffffffffffffffffffffffffffff $(hg log -r . --template '{node}')" >> .hg/git-mapfile
+  $ hg gverify
+  abort: git equivalent ffffffffffffffffffffffffffffffffffffffff for rev 4e582b4eb862 not found!
+  [255]
+
+git SHA is not a commit
+  $ echo new2 >> newf
+  $ fn_hg_commit -m 'new hg commit 2'
+this gets the tree pointed to by the commit at HEAD
+  $ echo "$(cd ../gitrepo && git show --format=%T HEAD | head -n 1) $(hg log -r . --template '{node}')" >> .hg/git-mapfile
+  $ hg gverify
+  abort: git equivalent f477b00e4a9907617f346a529cc0fe9ba5d6f6d3 for rev 5c2eb98af3e2 is not a commit!
+  [255]