diff hggit/git_handler.py @ 448:e58a6d0b80e2

Remove illegal characters from username/email Includes a doctest and tests in test-hg/author for the correct behavior.
author Keshav Kini <keshav.kini@gmail.com>
date Wed, 15 Feb 2012 09:30:06 +0800
parents 1189e52ba27c
children 163ac98569d3
line wrap: on
line diff
--- a/hggit/git_handler.py	Fri Jan 27 22:48:55 2012 -0500
+++ b/hggit/git_handler.py	Wed Feb 15 09:30:06 2012 +0800
@@ -352,7 +352,50 @@
         return commit.id
 
     def get_valid_git_username_email(self, name):
-        return name.lstrip('< ').rstrip('> ')
+        r"""Sanitize usernames and emails to fit git's restrictions.
+
+        The following is taken from the man page of git's fast-import
+        command:
+
+            [...] Likewise LF means one (and only one) linefeed [...]
+
+            committer
+                The committer command indicates who made this commit,
+                and when they made it.
+
+                Here <name> is the person's display name (for example
+                "Com M Itter") and <email> is the person's email address
+                ("cm@example.com[1]"). LT and GT are the literal
+                less-than (\x3c) and greater-than (\x3e) symbols. These
+                are required to delimit the email address from the other
+                fields in the line. Note that <name> and <email> are
+                free-form and may contain any sequence of bytes, except
+                LT, GT and LF. <name> is typically UTF-8 encoded.
+
+        Accordingly, this function makes sure that there are none of the
+        characters <, >, or \n in any string which will be used for
+        a git username or email. Before this, it first removes left
+        angle brackets and spaces from the beginning, and right angle
+        brackets and spaces from the end, of this string, to convert
+        such things as " <john@doe.com> " to "john@doe.com" for
+        convenience.
+
+        TESTS:
+
+        >>> from mercurial.ui import ui
+        >>> g = GitHandler('', ui()).get_valid_git_username_email
+        >>> g('John Doe')
+        'John Doe'
+        >>> g('john@doe.com')
+        'john@doe.com'
+        >>> g(' <john@doe.com> ')
+        'john@doe.com'
+        >>> g('    <random<\n<garbage\n>  > > ')
+        'random???garbage?'
+        >>> g('Typo in hgrc >but.hg-git@handles.it.gracefully>')
+        'Typo in hgrc ?but.hg-git@handles.it.gracefully'
+        """
+        return re.sub('[<>\n]', '?', name.lstrip('< ').rstrip('> '))
 
     def get_git_author(self, ctx):
         # hg authors might not have emails
@@ -363,8 +406,8 @@
         a = regex.match(author)
 
         if a:
-            name = a.group(1)
-            email = a.group(2)
+            name = self.get_valid_git_username_email(a.group(1))
+            email = self.get_valid_git_username_email(a.group(2))
             if a.group(3) != None and len(a.group(3)) != 0:
                 name += ' ext:(' + urllib.quote(a.group(3)) + ')'
             author = self.get_valid_git_username_email(name) + ' <' + self.get_valid_git_username_email(email) + '>'
@@ -724,7 +767,7 @@
             ctx = self.repo[rev]
             if getattr(ctx, 'bookmarks', None):
                 labels = lambda c: ctx.tags() + [
-                                fltr for fltr, bm 
+                                fltr for fltr, bm
                                 in self._filter_for_bookmarks(ctx.bookmarks())
                             ]
             else:
@@ -861,7 +904,7 @@
                 bms = bookmarks.parse(self.repo)
             else:
                 bms = self.repo._bookmarks
-            return dict([(filtered_bm, hex(bms[bm])) for 
+            return dict([(filtered_bm, hex(bms[bm])) for
                         filtered_bm, bm in self._filter_for_bookmarks(bms)])
         except AttributeError: #pragma: no cover
             return {}
@@ -930,7 +973,7 @@
                 real_branch_names = self.repo.branchmap()
                 bms = dict(
                     (
-                        bm_name + self.branch_bookmark_suffix 
+                        bm_name + self.branch_bookmark_suffix
                             if bm_name in real_branch_names
                         else bm_name,
                         bms[bm_name]