# HG changeset patch # User Mark Brand # Date 1286920471 -7200 # Node ID 1a6731196918f2f0ec0378af8d3134881c71c49f # Parent 5d9e7369c6ed1e0725adb21f3f21497977a8e888 upgrade package vmime diff -r 5d9e7369c6ed -r 1a6731196918 src/vmime-1-fastforward.patch --- a/src/vmime-1-fastforward.patch Wed Oct 13 06:18:33 2010 +1100 +++ b/src/vmime-1-fastforward.patch Tue Oct 12 23:54:31 2010 +0200 @@ -17671,3 +17671,547 @@ // TODO: UUEncode VMIME_TEST_SUITE_END + +commit fb268637f2f06d710f0ef475d71a8d4034a28e6c +Author: vincent-richard +Date: Tue Oct 12 17:10:58 2010 +0000 + + Better RFC-2047 encoding. + + git-svn-id: https://vmime.svn.sourceforge.net/svnroot/vmime/trunk@567 5301114d-f842-0410-bbdd-996ee0417009 + +diff --git a/src/utility/encoder/qpEncoder.cpp b/src/utility/encoder/qpEncoder.cpp +index e20be9f..aa95022 100644 +--- a/src/utility/encoder/qpEncoder.cpp ++++ b/src/utility/encoder/qpEncoder.cpp +@@ -51,10 +51,52 @@ const std::vector qpEncoder::getAvailableProperties() const + + + +-// Encoding table ++// Hex-encoding table + const unsigned char qpEncoder::sm_hexDigits[] = "0123456789ABCDEF"; + +-// Decoding table ++ ++// RFC-2047 encoding table: we always encode RFC-2047 using the restricted ++// charset, that is the one used for 'phrase' in From/To/Cc/... headers. ++// ++// " The set of characters that may be used in a "Q"-encoded 'encoded-word' ++// is restricted to: . " ++// ++// Two special cases: ++// - encode space (32) as underscore (95) ++// - encode underscore as hex (=5F) ++// ++// This is a quick lookup table: ++// '1' means "encode", '0' means "no encoding" ++// ++const unsigned char qpEncoder::sm_RFC2047EncodeTable[] = ++{ ++ /* 0 NUL */ 1, /* 1 SOH */ 1, /* 2 STX */ 1, /* 3 ETX */ 1, /* 4 EOT */ 1, /* 5 ENQ */ 1, ++ /* 6 ACK */ 1, /* 7 BEL */ 1, /* 8 BS */ 1, /* 9 TAB */ 1, /* 10 LF */ 1, /* 11 VT */ 1, ++ /* 12 FF */ 1, /* 13 CR */ 1, /* 14 SO */ 1, /* 15 SI */ 1, /* 16 DLE */ 1, /* 17 DC1 */ 1, ++ /* 18 DC2 */ 1, /* 19 DC3 */ 1, /* 20 DC4 */ 1, /* 21 NAK */ 1, /* 22 SYN */ 1, /* 23 ETB */ 1, ++ /* 24 CAN */ 1, /* 25 EM */ 1, /* 26 SUB */ 1, /* 27 ESC */ 1, /* 28 FS */ 1, /* 29 GS */ 1, ++ /* 30 RS */ 1, /* 31 US */ 1, /* 32 SPACE*/ 1, /* 33 ! */ 0, /* 34 " */ 1, /* 35 # */ 1, ++ /* 36 $ */ 1, /* 37 % */ 1, /* 38 & */ 1, /* 39 ' */ 1, /* 40 ( */ 1, /* 41 ) */ 1, ++ /* 42 * */ 0, /* 43 + */ 0, /* 44 , */ 1, /* 45 - */ 0, /* 46 . */ 1, /* 47 / */ 0, ++ /* 48 0 */ 0, /* 49 1 */ 0, /* 50 2 */ 0, /* 51 3 */ 0, /* 52 4 */ 0, /* 53 5 */ 0, ++ /* 54 6 */ 0, /* 55 7 */ 0, /* 56 8 */ 0, /* 57 9 */ 0, /* 58 : */ 1, /* 59 ; */ 1, ++ /* 60 < */ 1, /* 61 = */ 1, /* 62 > */ 1, /* 63 ? */ 1, /* 64 @ */ 1, /* 65 A */ 0, ++ /* 66 B */ 0, /* 67 C */ 0, /* 68 D */ 0, /* 69 E */ 0, /* 70 F */ 0, /* 71 G */ 0, ++ /* 72 H */ 0, /* 73 I */ 0, /* 74 J */ 0, /* 75 K */ 0, /* 76 L */ 0, /* 77 M */ 0, ++ /* 78 N */ 0, /* 79 O */ 0, /* 80 P */ 0, /* 81 Q */ 0, /* 82 R */ 0, /* 83 S */ 0, ++ /* 84 T */ 0, /* 85 U */ 0, /* 86 V */ 0, /* 87 W */ 0, /* 88 X */ 0, /* 89 Y */ 0, ++ /* 90 Z */ 0, /* 91 [ */ 1, /* 92 " */ 1, /* 93 ] */ 1, /* 94 ^ */ 1, /* 95 _ */ 1, ++ /* 96 ` */ 1, /* 97 a */ 0, /* 98 b */ 0, /* 99 c */ 0, /* 100 d */ 0, /* 101 e */ 0, ++ /* 102 f */ 0, /* 103 g */ 0, /* 104 h */ 0, /* 105 i */ 0, /* 106 j */ 0, /* 107 k */ 0, ++ /* 108 l */ 0, /* 109 m */ 0, /* 110 n */ 0, /* 111 o */ 0, /* 112 p */ 0, /* 113 q */ 0, ++ /* 114 r */ 0, /* 115 s */ 0, /* 116 t */ 0, /* 117 u */ 0, /* 118 v */ 0, /* 119 w */ 0, ++ /* 120 x */ 0, /* 121 y */ 0, /* 122 z */ 0, /* 123 { */ 1, /* 124 | */ 1, /* 125 } */ 1, ++ /* 126 ~ */ 1, /* 127 DEL */ 1 ++}; ++ ++ ++// Hex-decoding table + const unsigned char qpEncoder::sm_hexDecodeTable[256] = + { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +@@ -76,6 +118,36 @@ const unsigned char qpEncoder::sm_hexDecodeTable[256] = + }; + + ++// static ++bool qpEncoder::RFC2047_isEncodingNeededForChar(const unsigned char c) ++{ ++ return (c >= 128 || sm_RFC2047EncodeTable[c] != 0); ++} ++ ++ ++// static ++int qpEncoder::RFC2047_getEncodedLength(const unsigned char c) ++{ ++ if (c >= 128 || sm_RFC2047EncodeTable[c] != 0) ++ { ++ if (c == 32) // space ++ { ++ // Encoded as "_" ++ return 1; ++ } ++ else ++ { ++ // Hex encoding ++ return 3; ++ } ++ } ++ else ++ { ++ return 1; // no encoding ++ } ++} ++ ++ + #ifndef VMIME_BUILDING_DOC + + #define QP_ENCODE_HEX(x) \ +@@ -83,7 +155,7 @@ const unsigned char qpEncoder::sm_hexDecodeTable[256] = + outBuffer[outBufferPos + 1] = sm_hexDigits[x >> 4]; \ + outBuffer[outBufferPos + 2] = sm_hexDigits[x & 0xF]; \ + outBufferPos += 3; \ +- curCol += 3; ++ curCol += 3 + + #define QP_WRITE(s, x, l) s.write(reinterpret_cast (x), l) + +@@ -145,34 +217,51 @@ utility::stream::size_type qpEncoder::encode(utility::inputStream& in, + // Get the next char and encode it + const unsigned char c = static_cast (buffer[bufferPos++]); + +- switch (c) +- { +- case '.': ++ if (rfc2047) + { +- if (!rfc2047 && curCol == 0) ++ if (c >= 128 || sm_RFC2047EncodeTable[c] != 0) + { +- // If a '.' appears at the beginning of a line, we encode it to +- // to avoid problems with SMTP servers... ("\r\n.\r\n" means the +- // end of data transmission). +- QP_ENCODE_HEX('.') +- continue; ++ if (c == 32) // space ++ { ++ // RFC-2047, Page 5, 4.2. The "Q" encoding: ++ // << The 8-bit hexadecimal value 20 (e.g., ISO-8859-1 SPACE) may be ++ // represented as "_" (underscore, ASCII 95.). >> ++ outBuffer[outBufferPos++] = '_'; ++ ++curCol; ++ } ++ else ++ { ++ // Other characters: '=' + hexadecimal encoding ++ QP_ENCODE_HEX(c); ++ } ++ } ++ else ++ { ++ // No encoding ++ outBuffer[outBufferPos++] = c; ++ ++curCol; + } +- +- outBuffer[outBufferPos++] = '.'; +- ++curCol; +- break; + } +- case ' ': ++ else + { +- // RFC-2047, Page 5, 4.2. The "Q" encoding: +- // << The 8-bit hexadecimal value 20 (e.g., ISO-8859-1 SPACE) may be +- // represented as "_" (underscore, ASCII 95.). >> +- if (rfc2047) ++ switch (c) + { +- outBuffer[outBufferPos++] = '_'; ++ case 46: // . ++ { ++ if (curCol == 0) ++ { ++ // If a '.' appears at the beginning of a line, we encode it to ++ // to avoid problems with SMTP servers... ("\r\n.\r\n" means the ++ // end of data transmission). ++ QP_ENCODE_HEX('.'); ++ continue; ++ } ++ ++ outBuffer[outBufferPos++] = '.'; + ++curCol; ++ break; + } +- else ++ case 32: // space + { + // Need to get more data? + if (bufferPos >= bufferLength) +@@ -192,100 +281,74 @@ utility::stream::size_type qpEncoder::encode(utility::inputStream& in, + outBuffer[outBufferPos++] = ' '; + ++curCol; + } +- } + +- break; +- } +- case '\t': +- { +- QP_ENCODE_HEX(c) +- break; +- } +- case '\r': +- case '\n': +- { +- // Text mode (where using CRLF or LF or ... does not +- // care for a new line...) +- if (text) +- { +- outBuffer[outBufferPos++] = c; +- ++curCol; ++ break; + } +- // Binary mode (where CR and LF bytes are important!) +- else ++ case 9: // TAB + { +- QP_ENCODE_HEX(c) +- } +- +- break; +- } +- case '=': +- { +- QP_ENCODE_HEX('=') +- break; +- } +- // RFC-2047 'especials' characters +- case ',': +- case ';': +- case ':': +- case '_': +- case '@': +- case '(': +- case ')': +- case '<': +- case '>': +- case '[': +- case ']': +- case '"': +- { +- if (rfc2047) +- { +- QP_ENCODE_HEX(c) ++ QP_ENCODE_HEX(c); ++ break; + } +- else ++ case 13: // CR ++ case 10: // LF + { +- outBuffer[outBufferPos++] = c; +- ++curCol; +- } ++ // Text mode (where using CRLF or LF or ... does not ++ // care for a new line...) ++ if (text) ++ { ++ outBuffer[outBufferPos++] = c; ++ ++curCol; ++ } ++ // Binary mode (where CR and LF bytes are important!) ++ else ++ { ++ QP_ENCODE_HEX(c); ++ } + +- break; +- } +- /* +- Rule #2: (Literal representation) Octets with decimal values of 33 +- through 60 inclusive, and 62 through 126, inclusive, MAY be +- represented as the ASCII characters which correspond to those +- octets (EXCLAMATION POINT through LESS THAN, and GREATER THAN +- through TILDE, respectively). +- */ +- default: +- { +- //if ((c >= 33 && c <= 60) || (c >= 62 && c <= 126)) +- if (c >= 33 && c <= 126 && c != 61 && c != 63) +- { +- outBuffer[outBufferPos++] = c; +- ++curCol; ++ break; + } +- // Other characters: '=' + hexadecimal encoding +- else ++ case 61: // = + { +- QP_ENCODE_HEX(c) ++ QP_ENCODE_HEX('='); ++ break; + } ++ /* ++ Rule #2: (Literal representation) Octets with decimal values of 33 ++ through 60 inclusive, and 62 through 126, inclusive, MAY be ++ represented as the ASCII characters which correspond to those ++ octets (EXCLAMATION POINT through LESS THAN, and GREATER THAN ++ through TILDE, respectively). ++ */ ++ default: ++ ++ //if ((c >= 33 && c <= 60) || (c >= 62 && c <= 126)) ++ if (c >= 33 && c <= 126 && c != 61 && c != 63) ++ { ++ outBuffer[outBufferPos++] = c; ++ ++curCol; ++ } ++ // Other characters: '=' + hexadecimal encoding ++ else ++ { ++ QP_ENCODE_HEX(c); ++ } + +- break; +- } ++ break; + +- } ++ } // switch (c) + +- // Soft line break : "=\r\n" +- if (cutLines && curCol >= maxLineLength - 1) +- { +- outBuffer[outBufferPos] = '='; +- outBuffer[outBufferPos + 1] = '\r'; +- outBuffer[outBufferPos + 2] = '\n'; ++ // Soft line break : "=\r\n" ++ if (cutLines && curCol >= maxLineLength - 1) ++ { ++ outBuffer[outBufferPos] = '='; ++ outBuffer[outBufferPos + 1] = '\r'; ++ outBuffer[outBufferPos + 2] = '\n'; + +- outBufferPos += 3; +- curCol = 0; +- } ++ outBufferPos += 3; ++ curCol = 0; ++ } ++ ++ } // !rfc2047 + + ++inTotal; + +diff --git a/src/wordEncoder.cpp b/src/wordEncoder.cpp +index 22994ed..67bd7a1 100644 +--- a/src/wordEncoder.cpp ++++ b/src/wordEncoder.cpp +@@ -150,29 +150,9 @@ const string wordEncoder::getNextChunk(const string::size_type maxLength) + while ((inputCount == 0 || outputCount < maxLength) && (inputCount < remaining)) + { + const unsigned char c = m_buffer[m_pos + inputCount]; +- bool encoded = true; +- +- switch (c) +- { +- case ',': +- case ';': +- case ':': +- case '_': +- case '=': +- +- encoded = true; +- break; +- +- default: +- +- if (c >= 33 && c <= 126 && c != 61) +- encoded = false; +- +- break; +- } + + inputCount++; +- outputCount += (encoded ? 3 : 1); ++ outputCount += utility::encoder::qpEncoder::RFC2047_getEncodedLength(c); + } + + // Encode chunk +@@ -217,28 +197,7 @@ const string wordEncoder::getNextChunk(const string::size_type maxLength) + for (string::size_type i = 0, n = encodeBytes.length() ; i < n ; ++i) + { + const unsigned char c = encodeBytes[i]; +- bool encoded = true; +- +- switch (c) +- { +- case ',': +- case ';': +- case ':': +- case '_': +- case '=': +- +- encoded = true; +- break; +- +- default: +- +- if (c >= 33 && c <= 126 && c != 61) +- encoded = false; +- +- break; +- } +- +- outputCount += (encoded ? 3 : 1); ++ outputCount += utility::encoder::qpEncoder::RFC2047_getEncodedLength(c); + } + } + +diff --git a/vmime/utility/encoder/qpEncoder.hpp b/vmime/utility/encoder/qpEncoder.hpp +index 098b4c8..a969126 100644 +--- a/vmime/utility/encoder/qpEncoder.hpp ++++ b/vmime/utility/encoder/qpEncoder.hpp +@@ -47,10 +47,14 @@ public: + + const std::vector getAvailableProperties() const; + ++ static bool RFC2047_isEncodingNeededForChar(const unsigned char c); ++ static int RFC2047_getEncodedLength(const unsigned char c); ++ + protected: + + static const unsigned char sm_hexDigits[17]; + static const unsigned char sm_hexDecodeTable[256]; ++ static const unsigned char sm_RFC2047EncodeTable[128]; + }; + + + +commit 3c46d1a864399d924a4a7c8a0cfdd348ecfd5fbc +Author: vincent-richard +Date: Tue Oct 12 20:01:34 2010 +0000 + + Fixed missing whitespace in text parsing. + + git-svn-id: https://vmime.svn.sourceforge.net/svnroot/vmime/trunk@568 5301114d-f842-0410-bbdd-996ee0417009 + +diff --git a/src/text.cpp b/src/text.cpp +index a2fe060..2454456 100644 +--- a/src/text.cpp ++++ b/src/text.cpp +@@ -299,6 +299,12 @@ void text::createFromString(const string& in, const charset& ch) + } + else + { ++ if (count) ++ { ++ ref w = getWordAt(getWordCount() - 1); ++ w->getBuffer() += ' '; ++ } ++ + appendWord(vmime::create (chunk, ch)); + + prevIs8bit = true; +@@ -314,6 +320,12 @@ void text::createFromString(const string& in, const charset& ch) + } + else + { ++ if (count) ++ { ++ ref w = getWordAt(getWordCount() - 1); ++ w->getBuffer() += ' '; ++ } ++ + appendWord(vmime::create + (chunk, charset(charsets::US_ASCII))); + +diff --git a/tests/parser/textTest.cpp b/tests/parser/textTest.cpp +index b455d91..c60da5a 100644 +--- a/tests/parser/textTest.cpp ++++ b/tests/parser/textTest.cpp +@@ -47,6 +47,9 @@ VMIME_TEST_SUITE_BEGIN + VMIME_TEST(testWordGenerateQuote) + VMIME_TEST(testWordGenerateSpecialCharsets) + VMIME_TEST(testWordGenerateSpecials) ++ ++ VMIME_TEST(testWhitespace) ++ VMIME_TEST(testWhitespaceMBox) + VMIME_TEST_LIST_END + + +@@ -141,9 +144,9 @@ VMIME_TEST_SUITE_BEGIN + t2.createFromString(s2, c2); + + VASSERT_EQ("2.1", 3, t2.getWordCount()); +- VASSERT_EQ("2.2", "some ASCII characters and special chars:", t2.getWordAt(0)->getBuffer()); ++ VASSERT_EQ("2.2", "some ASCII characters and special chars: ", t2.getWordAt(0)->getBuffer()); + VASSERT_EQ("2.3", vmime::charset(vmime::charsets::US_ASCII), t2.getWordAt(0)->getCharset()); +- VASSERT_EQ("2.4", "\xf1\xf2\xf3\xf4", t2.getWordAt(1)->getBuffer()); ++ VASSERT_EQ("2.4", "\xf1\xf2\xf3\xf4 ", t2.getWordAt(1)->getBuffer()); + VASSERT_EQ("2.5", c2, t2.getWordAt(1)->getCharset()); + VASSERT_EQ("2.6", "and then more ASCII chars.", t2.getWordAt(2)->getBuffer()); + VASSERT_EQ("2.7", vmime::charset(vmime::charsets::US_ASCII), t2.getWordAt(2)->getCharset()); +@@ -378,5 +381,52 @@ VMIME_TEST_SUITE_BEGIN + vmime::word("\x22\xC3\x9Cml\xC3\xA4ute\x22", vmime::charset("UTF-8")).generate()); + } + ++ void testWhitespace() ++ { ++ // Create ++ vmime::text text; ++ text.createFromString("Achim Br\xc3\xa4ndt", vmime::charsets::UTF_8); ++ ++ VASSERT_EQ("1", 2, text.getWordCount()); ++ VASSERT_EQ("2", "Achim ", text.getWordAt(0)->getBuffer()); ++ VASSERT_EQ("3", "us-ascii", text.getWordAt(0)->getCharset()); ++ VASSERT_EQ("4", "Br\xc3\xa4ndt", text.getWordAt(1)->getBuffer()); ++ VASSERT_EQ("5", "utf-8", text.getWordAt(1)->getCharset()); ++ ++ // Generate ++ VASSERT_EQ("6", "Achim =?utf-8?Q?Br=C3=A4ndt?=", text.generate()); ++ ++ // Parse ++ text.parse("=?us-ascii?Q?Achim_?= =?utf-8?Q?Br=C3=A4ndt?="); ++ ++ VASSERT_EQ("7", 2, text.getWordCount()); ++ VASSERT_EQ("8", "Achim ", text.getWordAt(0)->getBuffer()); ++ VASSERT_EQ("9", "us-ascii", text.getWordAt(0)->getCharset()); ++ VASSERT_EQ("10", "Br\xc3\xa4ndt", text.getWordAt(1)->getBuffer()); ++ VASSERT_EQ("11", "utf-8", text.getWordAt(1)->getCharset()); ++ } ++ ++ void testWhitespaceMBox() ++ { ++ // Space MUST be encoded inside a word ++ vmime::mailbox mbox(vmime::text("Achim Br\xc3\xa4ndt", vmime::charsets::UTF_8), "me@vmime.org"); ++ VASSERT_EQ("generate1", "=?us-ascii?Q?Achim_?= =?utf-8?Q?Br=C3=A4ndt?= ", mbox.generate()); ++ ++ vmime::text txt; ++ txt.appendWord(vmime::create ("Achim ", "us-ascii")); ++ txt.appendWord(vmime::create ("Br\xc3\xa4ndt", "utf-8")); ++ mbox = vmime::mailbox(txt, "me@vmime.org"); ++ VASSERT_EQ("generate2", "=?us-ascii?Q?Achim_?= =?utf-8?Q?Br=C3=A4ndt?= ", mbox.generate()); ++ ++ mbox.parse("=?us-ascii?Q?Achim?= =?utf-8?Q?Br=C3=A4ndt?= "); ++ VASSERT_EQ("parse.name.count", 2, mbox.getName().getWordCount()); ++ VASSERT_EQ("parse.name.word1.buffer", "Achim", mbox.getName().getWordAt(0)->getBuffer()); ++ VASSERT_EQ("parse.name.word1.charset", "us-ascii", mbox.getName().getWordAt(0)->getCharset()); ++ VASSERT_EQ("parse.name.word2.buffer", "Br\xc3\xa4ndt", mbox.getName().getWordAt(1)->getBuffer()); ++ VASSERT_EQ("parse.name.word2.charset", "utf-8", mbox.getName().getWordAt(1)->getCharset()); ++ ++ VASSERT_EQ("parse.email", "me@vmime.org", mbox.getEmail()); ++ } ++ + VMIME_TEST_SUITE_END +