changeset 118:b3be536e3f50

handles git commit encoding fields now
author Scott Chacon <schacon@gmail.com>
date Mon, 11 May 2009 16:03:57 -0700
parents 93980820bba1
children 70381c36c113 b51381c6fab8 6b5925d56ecd f2dfb2bed724 366052cae03c
files TODO.txt dulwich/objects.py dulwich/repo.py git_handler.py
diffstat 4 files changed, 21 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/TODO.txt	Mon May 11 11:35:30 2009 -0700
+++ b/TODO.txt	Mon May 11 16:03:57 2009 -0700
@@ -23,7 +23,6 @@
 MAPPING ISSUES
 ==============
 Created in Git:
-* encoding field / utf-8
 * octopus merge explode/implode
 
 WEBSITE
@@ -35,6 +34,6 @@
 SPEED/EFFICIENCY
 ================
 * switch object mapping to hg->git since the many to one is that direction
-* don't send blobs/trees already on server
+* don't send blobs/trees already on server (thin pack)
 * packfile creation benchmarking (seems to take a while sometimes)
   - at least provide status output
--- a/dulwich/objects.py	Mon May 11 11:35:30 2009 -0700
+++ b/dulwich/objects.py	Mon May 11 16:03:57 2009 -0700
@@ -41,6 +41,7 @@
 PARENT_ID = "parent"
 AUTHOR_ID = "author"
 COMMITTER_ID = "committer"
+ENCODING_ID = "encoding"
 OBJECT_ID = "object"
 TYPE_ID = "type"
 TAGGER_ID = "tagger"
@@ -589,9 +590,18 @@
             while text[count] != '\n':
                 count += 1
             count += 1
-        assert text[count] == '\n', "There must be a new line after the headers"
+        self._encoding = None
+        if not text[count] == '\n':
+            # There can be an encoding field.
+            if text[count:].startswith(ENCODING_ID):
+                count += len(ENCODING_ID)
+                assert text[count] == ' ', "Invalid encoding, " \
+                     "%s must be followed by space not %s" % (ENCODING_ID, text[count])
+                count += 1
+                self._encoding = text[count:].split("\n", 1)[0]
+                while text[count] != "\n":
+                    count += 1
         count += 1
-        # XXX: There can be an encoding field.
         self._message = text[count:]
         self._needs_parsing = False
 
--- a/dulwich/repo.py	Mon May 11 11:35:30 2009 -0700
+++ b/dulwich/repo.py	Mon May 11 16:03:57 2009 -0700
@@ -357,6 +357,8 @@
             commit_data += 'parent ' + parent + "\n"
         commit_data += 'author ' + commit['author'] + "\n"
         commit_data += 'committer ' + commit['committer'] + "\n"
+        if 'encoding' in commit:
+            commit_data += 'encoding ' + commit['encoding'] + "\n"
         commit_data += "\n"
         commit_data += commit['message']
         sha = self.write_object('commit', commit_data)
--- a/git_handler.py	Mon May 11 11:35:30 2009 -0700
+++ b/git_handler.py	Mon May 11 16:03:57 2009 -0700
@@ -210,6 +210,8 @@
         extra = ctx.extra()
         if 'committer' in extra:
             commit['committer'] = extra['committer']
+        if 'encoding' in extra:
+            commit['encoding'] = extra['encoding']
 
         # HG EXTRA INFORMATION
         add_extras = False
@@ -584,9 +586,12 @@
         if not commit._author_raw == commit._committer_raw:
             extra['committer'] = commit._committer_raw
 
+        if commit._encoding:
+            extra['encoding'] = commit._encoding
+
         if hg_branch:
             extra['branch'] = hg_branch
-                
+
         text = strip_message
         date = datetime.datetime.fromtimestamp(commit.author_time).strftime("%Y-%m-%d %H:%M:%S")
         ctx = context.memctx(self.repo, (p1, p2), text, files, getfilectx,