changeset 1003:964d56843408

upgrade package vmime to vmime-0.9.1-svn-r553 r552 | vincent-richard | 2010-05-21 09:41:15 +0200 (Fri, 21 May 2010) | 1 line Always encode special charsets. r553 | vincent-richard | 2010-05-21 11:32:42 +0200 (Fri, 21 May 2010) | 1 line Rewritten doc.
author Mark Brand <mabrand@mabrand.nl>
date Sat, 22 May 2010 13:25:49 +0200
parents 92bbc989fb89
children 4aa88ca6d470
files src/vmime-0.9.1-svn-r553-20100521.patch
diffstat 1 files changed, 280 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/vmime-0.9.1-svn-r553-20100521.patch	Sat May 22 13:25:49 2010 +0200
@@ -0,0 +1,280 @@
+This file is part of mingw-cross-env.
+See doc/index.html for further information.
+
+diff -urN a/src/utility/stringUtils.cpp b/src/utility/stringUtils.cpp
+--- a/src/utility/stringUtils.cpp	2010-05-22 13:23:38.947821913 +0200
++++ b/src/utility/stringUtils.cpp	2010-05-22 13:24:17.295331088 +0200
+@@ -151,6 +151,24 @@
+ }
+ 
+ 
++string::size_type stringUtils::findFirstNonASCIIchar
++	(const string::const_iterator begin, const string::const_iterator end)
++{
++	string::size_type pos = string::npos;
++
++	for (string::const_iterator i = begin ; i != end ; ++i)
++	{
++		if (!parserHelpers::isAscii(*i))
++		{
++			pos = i - begin;
++			break;
++		}
++	}
++
++	return pos;
++}
++
++
+ const string stringUtils::unquote(const string& str)
+ {
+ 	if (str.length() < 2)
+diff -urN a/src/word.cpp b/src/word.cpp
+--- a/src/word.cpp	2010-05-22 13:23:38.947821913 +0200
++++ b/src/word.cpp	2010-05-22 13:24:17.263325063 +0200
+@@ -336,30 +336,22 @@
+ 	if (state == NULL)
+ 		state = &defaultGeneratorState;
+ 
+-	// Calculate the number of ASCII chars to check whether encoding is needed
+-	// and _which_ encoding to use.
+-	const string::size_type asciiCount =
+-		utility::stringUtils::countASCIIchars(m_buffer.begin(), m_buffer.end());
++	// Find out if encoding is forced or required by contents + charset
++	bool encodingNeeded = (flags & text::FORCE_ENCODING) != 0;
+ 
+-	bool noEncoding = (flags & text::FORCE_NO_ENCODING) ||
+-	    (!(flags & text::FORCE_ENCODING) && asciiCount == m_buffer.length());
+-
+-	if (!(flags & text::FORCE_NO_ENCODING) &&
+-	    m_buffer.find_first_of("\n\r") != string::npos)
+-	{
+-		// Force encoding when there are only ASCII chars, but there is
+-		// also at least one of '\n' or '\r' (header fields)
+-		noEncoding = false;
+-	}
++	if (encodingNeeded == false)
++		encodingNeeded = wordEncoder::isEncodingNeeded(m_buffer, m_charset);
++	else if ((flags & text::FORCE_NO_ENCODING) != 0)
++		encodingNeeded = false;
+ 
+ 	// If possible and requested (with flag), quote the buffer (no folding is performed).
+ 	// Quoting is possible if and only if:
+-	//  - the whole buffer is ASCII-only
++	//  - the buffer does not need to be encoded
+ 	//  - the buffer does not contain quoting character (")
+ 	//  - there is enough remaining space on the current line to hold the whole buffer
+-	if (!noEncoding &&
++	if (!encodingNeeded &&
+ 	    (flags & text::QUOTE_IF_POSSIBLE) &&
+-	    asciiCount == m_buffer.length() &&
++	    !encodingNeeded &&
+ 	    m_buffer.find('"') == string::npos &&
+ 	    (curLineLength + 2 /* 2 x " */ + m_buffer.length()) < maxLineLength)
+ 	{
+@@ -367,7 +359,7 @@
+ 		curLineLength += 2 + m_buffer.length();
+ 	}
+ 	// We will fold lines without encoding them.
+-	else if (noEncoding)
++	else if (!encodingNeeded)
+ 	{
+ 		string::const_iterator lastWSpos = m_buffer.end(); // last white-space position
+ 		string::const_iterator curLineStart = m_buffer.begin(); // current line start
+diff -urN a/src/wordEncoder.cpp b/src/wordEncoder.cpp
+--- a/src/wordEncoder.cpp	2010-05-22 13:23:38.947821913 +0200
++++ b/src/wordEncoder.cpp	2010-05-22 13:24:17.263325063 +0200
+@@ -260,17 +260,75 @@
+ }
+ 
+ 
++// Explicitly force encoding for some charsets
++struct CharsetEncodingEntry
++{
++	CharsetEncodingEntry(const std::string& charset_, const wordEncoder::Encoding encoding_)
++		: charset(charset_), encoding(encoding_)
++	{
++	}
++
++	std::string charset;
++	wordEncoder::Encoding encoding;
++};
++
++CharsetEncodingEntry g_charsetEncodingMap[] =
++{
++	// Use QP encoding for ISO-8859-x charsets
++	CharsetEncodingEntry("iso-8859",     wordEncoder::ENCODING_QP),
++	CharsetEncodingEntry("iso8859",      wordEncoder::ENCODING_QP),
++
++	// RFC-1468 states:
++	//   " ISO-2022-JP may also be used in MIME Part 2 headers.  The "B"
++	//     encoding should be used with ISO-2022-JP text. "
++	// Use Base64 encoding for all ISO-2022 charsets.
++	CharsetEncodingEntry("iso-2022",     wordEncoder::ENCODING_B64),
++	CharsetEncodingEntry("iso2022",      wordEncoder::ENCODING_B64),
++
++	// Last entry is not used
++	CharsetEncodingEntry("", wordEncoder::ENCODING_AUTO)
++};
++
++
++// static
++bool wordEncoder::isEncodingNeeded(const string& buffer, const charset& charset)
++{
++	// Special treatment for some charsets
++	const string cset = utility::stringUtils::toLower(charset.getName());
++
++	for (unsigned int i = 0 ; i < (sizeof(g_charsetEncodingMap) / sizeof(g_charsetEncodingMap[0])) - 1 ; ++i)
++	{
++		if (cset.find(g_charsetEncodingMap[i].charset) != string::npos)
++		{
++			if (g_charsetEncodingMap[i].encoding != wordEncoder::ENCODING_AUTO)
++				return true;
++		}
++	}
++
++	// No encoding is needed if the buffer only contains ASCII chars
++	if (utility::stringUtils::findFirstNonASCIIchar(buffer.begin(), buffer.end()) != string::npos)
++		return true;
++
++	// Force encoding when there are only ASCII chars, but there is
++	// also at least one of '\n' or '\r' (header fields)
++	if (buffer.find_first_of("\n\r") != string::npos)
++		return true;
++
++	return false;
++}
++
++
+ // static
+ wordEncoder::Encoding wordEncoder::guessBestEncoding
+ 	(const string& buffer, const charset& charset)
+ {
+-	// If the charset is ISO-8859-x, set to QP encoding
++	// Special treatment for some charsets
+ 	const string cset = utility::stringUtils::toLower(charset.getName());
+ 
+-	if (cset.find("iso-8859") != string::npos ||
+-	    cset.find("iso8859") != string::npos)
++	for (unsigned int i = 0 ; i < (sizeof(g_charsetEncodingMap) / sizeof(g_charsetEncodingMap[0])) - 1 ; ++i)
+ 	{
+-		return ENCODING_QP;
++		if (cset.find(g_charsetEncodingMap[i].charset) != string::npos)
++			return g_charsetEncodingMap[i].encoding;
+ 	}
+ 
+ 	// Use Base64 if more than 40% non-ASCII, or Quoted-Printable else (default)
+diff -urN a/tests/parser/textTest.cpp b/tests/parser/textTest.cpp
+--- a/tests/parser/textTest.cpp	2010-05-22 13:23:38.955319215 +0200
++++ b/tests/parser/textTest.cpp	2010-05-22 13:24:17.295331088 +0200
+@@ -44,6 +44,8 @@
+ 		VMIME_TEST(testWordGenerateSpace)
+ 		VMIME_TEST(testWordGenerateSpace2)
+ 		VMIME_TEST(testWordGenerateMultiBytes)
++		VMIME_TEST(testWordGenerateQuote)
++		VMIME_TEST(testWordGenerateSpecialCharsets)
+ 	VMIME_TEST_LIST_END
+ 
+ 
+@@ -335,9 +337,38 @@
+ 		VASSERT_EQ("1", "=?utf-8?Q?aaa?==?utf-8?Q?=C3=A9?==?utf-8?Q?zzz?=",
+ 			cleanGeneratedWords(vmime::word("aaa\xc3\xa9zzz", vmime::charset("utf-8")).generate(16)));
+ 
+-		VASSERT_EQ("1", "=?utf-8?Q?aaa=C3=A9?==?utf-8?Q?zzz?=",
++		VASSERT_EQ("2", "=?utf-8?Q?aaa=C3=A9?==?utf-8?Q?zzz?=",
+ 			cleanGeneratedWords(vmime::word("aaa\xc3\xa9zzz", vmime::charset("utf-8")).generate(17)));
+ 	}
+ 
++	void testWordGenerateQuote()
++	{
++		std::string str;
++		vmime::utility::outputStreamStringAdapter os(str);
++
++		// ASCII-only text is quotable
++		str.clear();
++		vmime::word("Quoted text").generate(os, 1000, 0, NULL, vmime::text::QUOTE_IF_POSSIBLE, NULL);
++		VASSERT_EQ("1", "\"Quoted text\"", cleanGeneratedWords(str));
++
++		// Text with CR/LF is not quotable
++		str.clear();
++		vmime::word("Non-quotable\ntext", "us-ascii").generate(os, 1000, 0, NULL, vmime::text::QUOTE_IF_POSSIBLE, NULL);
++		VASSERT_EQ("2", "=?us-ascii?Q?Non-quotable=0Atext?=", cleanGeneratedWords(str));
++
++		// Text with non-ASCII chars is not quotable
++		str.clear();
++		vmime::word("Non-quotable text \xc3\xa9").generate(os, 1000, 0, NULL, vmime::text::QUOTE_IF_POSSIBLE, NULL);
++		VASSERT_EQ("3", "=?UTF-8?Q?Non-quotable_text_=C3=A9?=", cleanGeneratedWords(str));
++	}
++
++	void testWordGenerateSpecialCharsets()
++	{
++		// ISO-2022-JP only uses 7-bit chars but should be encoded in Base64
++		VASSERT_EQ("1", "=?iso-2022-jp?B?XlskQiVRITwlPSVKJWshJiU9JVUlSCUmJSclIl5bKEI=?=",
++			cleanGeneratedWords(vmime::word("^[$B%Q!<%=%J%k!&%=%U%H%&%'%\"^[(B",
++				vmime::charset("iso-2022-jp")).generate(100)));
++	}
++
+ VMIME_TEST_SUITE_END
+ 
+diff -urN a/vmime/component.hpp b/vmime/component.hpp
+--- a/vmime/component.hpp	2010-05-22 13:23:38.967821097 +0200
++++ b/vmime/component.hpp	2010-05-22 13:24:17.295331088 +0200
+@@ -32,8 +32,8 @@
+ {
+ 
+ 
+-/** This abstract class is the base for all the classes in the library.
+-  * It defines the methods for parsing and generating all the components.
++/** This abstract class is the base class for all the components of a message.
++  * It defines methods for parsing and generating a component.
+   */
+ 
+ class component : public object
+diff -urN a/vmime/utility/stringUtils.hpp b/vmime/utility/stringUtils.hpp
+--- a/vmime/utility/stringUtils.hpp	2010-05-22 13:23:39.023322447 +0200
++++ b/vmime/utility/stringUtils.hpp	2010-05-22 13:24:17.295331088 +0200
+@@ -104,6 +104,14 @@
+ 	  */
+ 	static string::size_type countASCIIchars(const string::const_iterator begin, const string::const_iterator end);
+ 
++	/** Returns the position of the first non 7-bit US-ASCII character in a string.
++	  *
++	  * @param begin start position
++	  * @param end end position
++	  * @return position since begin, or string::npos
++	  */
++	static string::size_type findFirstNonASCIIchar(const string::const_iterator begin, const string::const_iterator end);
++
+ 	/** Convert the specified value to a string value.
+ 	  *
+ 	  * @param value to convert
+diff -urN a/vmime/wordEncoder.hpp b/vmime/wordEncoder.hpp
+--- a/vmime/wordEncoder.hpp	2010-05-22 13:23:39.027319211 +0200
++++ b/vmime/wordEncoder.hpp	2010-05-22 13:24:17.295331088 +0200
+@@ -73,12 +73,23 @@
+ 	  */
+ 	Encoding getEncoding() const;
+ 
+-private:
++	/** Test whether RFC-2047 encoding is needed.
++	  *
++	  * @param buffer buffer to analyze
++	  * @param charset charset of the buffer
++	  * @return true if encoding is needed, false otherwise.
++	  */
++	static bool isEncodingNeeded(const string& buffer, const charset& charset);
+ 
++	/** Guess the best RFC-2047 encoding to use for the specified buffer.
++	  *
++	  * @param buffer buffer to analyze
++	  * @param charset charset of the buffer
++	  * @return RFC-2047 encoding
++	  */
+ 	static Encoding guessBestEncoding(const string& buffer, const charset& charset);
+ 
+-	void guessBestEncoding();
+-
++private:
+ 
+ 	string m_buffer;
+ 	string::size_type m_pos;