changeset 11127:edaef4685b82

Update to Unicode 5.1.0.
author Bruno Haible <bruno@clisp.org>
date Sun, 08 Feb 2009 16:13:18 +0100
parents 7210d4209fd1
children 5bfb3f92b613
files ChangeLog lib/gen-uni-tables.c
diffstat 2 files changed, 188 insertions(+), 14 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog	Sun Feb 08 16:11:56 2009 +0100
+++ b/ChangeLog	Sun Feb 08 16:13:18 2009 +0100
@@ -1,6 +1,181 @@
 2009-02-08  Bruno Haible  <bruno@clisp.org>
 
 	Update to Unicode 5.1.0.
+	* lib/gen-uni-tables.c (is_property_alphabetic): Include
+	U+2185..U+2188.
+	(is_property_default_ignorable_code_point): Don't include characters
+	of category Cc or Cs and not-a-characters.
+	* lib/unictype/bidi_of.h: Regenerated.
+	* lib/unictype/blocks.h: Regenerated.
+	* lib/unictype/categ_C.h: Regenerated.
+	* lib/unictype/categ_Cf.h: Regenerated.
+	* lib/unictype/categ_Cn.h: Regenerated.
+	* lib/unictype/categ_L.h: Regenerated.
+	* lib/unictype/categ_Ll.h: Regenerated.
+	* lib/unictype/categ_Lm.h: Regenerated.
+	* lib/unictype/categ_Lo.h: Regenerated.
+	* lib/unictype/categ_Lu.h: Regenerated.
+	* lib/unictype/categ_M.h: Regenerated.
+	* lib/unictype/categ_Mc.h: Regenerated.
+	* lib/unictype/categ_Me.h: Regenerated.
+	* lib/unictype/categ_Mn.h: Regenerated.
+	* lib/unictype/categ_N.h: Regenerated.
+	* lib/unictype/categ_Nd.h: Regenerated.
+	* lib/unictype/categ_Nl.h: Regenerated.
+	* lib/unictype/categ_No.h: Regenerated.
+	* lib/unictype/categ_P.h: Regenerated.
+	* lib/unictype/categ_Pd.h: Regenerated.
+	* lib/unictype/categ_Pe.h: Regenerated.
+	* lib/unictype/categ_Pf.h: Regenerated.
+	* lib/unictype/categ_Pi.h: Regenerated.
+	* lib/unictype/categ_Po.h: Regenerated.
+	* lib/unictype/categ_Ps.h: Regenerated.
+	* lib/unictype/categ_S.h: Regenerated.
+	* lib/unictype/categ_Sk.h: Regenerated.
+	* lib/unictype/categ_Sm.h: Regenerated.
+	* lib/unictype/categ_So.h: Regenerated.
+	* lib/unictype/categ_of.h: Regenerated.
+	* lib/unictype/combining.h: Regenerated.
+	* lib/unictype/ctype_alnum.h: Regenerated.
+	* lib/unictype/ctype_alpha.h: Regenerated.
+	* lib/unictype/ctype_graph.h: Regenerated.
+	* lib/unictype/ctype_lower.h: Regenerated.
+	* lib/unictype/ctype_print.h: Regenerated.
+	* lib/unictype/ctype_punct.h: Regenerated.
+	* lib/unictype/ctype_upper.h: Regenerated.
+	* lib/unictype/decdigit.h: Regenerated.
+	* lib/unictype/digit.h: Regenerated.
+	* lib/unictype/mirror.h: Regenerated.
+	* lib/unictype/numeric.h: Regenerated.
+	* lib/unictype/pr_alphabetic.h: Regenerated.
+	* lib/unictype/pr_bidi_arabic_digit.h: Regenerated.
+	* lib/unictype/pr_bidi_arabic_right_to_left.h: Regenerated.
+	* lib/unictype/pr_bidi_boundary_neutral.h: Regenerated.
+	* lib/unictype/pr_bidi_eur_num_terminator.h: Regenerated.
+	* lib/unictype/pr_bidi_left_to_right.h: Regenerated.
+	* lib/unictype/pr_bidi_non_spacing_mark.h: Regenerated.
+	* lib/unictype/pr_bidi_other_neutral.h: Regenerated.
+	* lib/unictype/pr_combining.h: Regenerated.
+	* lib/unictype/pr_dash.h: Regenerated.
+	* lib/unictype/pr_decimal_digit.h: Regenerated.
+	* lib/unictype/pr_default_ignorable_code_point.h: Regenerated.
+	* lib/unictype/pr_deprecated.h: Regenerated.
+	* lib/unictype/pr_diacritic.h: Regenerated.
+	* lib/unictype/pr_extender.h: Regenerated.
+	* lib/unictype/pr_format_control.h: Regenerated.
+	* lib/unictype/pr_grapheme_base.h: Regenerated.
+	* lib/unictype/pr_grapheme_extend.h: Regenerated.
+	* lib/unictype/pr_grapheme_link.h: Regenerated.
+	* lib/unictype/pr_id_continue.h: Regenerated.
+	* lib/unictype/pr_id_start.h: Regenerated.
+	* lib/unictype/pr_ideographic.h: Regenerated.
+	* lib/unictype/pr_ignorable_control.h: Regenerated.
+	* lib/unictype/pr_lowercase.h: Regenerated.
+	* lib/unictype/pr_math.h: Regenerated.
+	* lib/unictype/pr_numeric.h: Regenerated.
+	* lib/unictype/pr_other_alphabetic.h: Regenerated.
+	* lib/unictype/pr_other_default_ignorable_code_point.h: Regenerated.
+	* lib/unictype/pr_other_grapheme_extend.h: Regenerated.
+	* lib/unictype/pr_other_id_continue.h: Regenerated.
+	* lib/unictype/pr_other_lowercase.h: Regenerated.
+	* lib/unictype/pr_other_math.h: Regenerated.
+	* lib/unictype/pr_punctuation.h: Regenerated.
+	* lib/unictype/pr_sentence_terminal.h: Regenerated.
+	* lib/unictype/pr_soft_dotted.h: Regenerated.
+	* lib/unictype/pr_terminal_punctuation.h: Regenerated.
+	* lib/unictype/pr_unassigned_code_value.h: Regenerated.
+	* lib/unictype/pr_unified_ideograph.h: Regenerated.
+	* lib/unictype/pr_uppercase.h: Regenerated.
+	* lib/unictype/pr_xid_continue.h: Regenerated.
+	* lib/unictype/pr_xid_start.h: Regenerated.
+	* lib/unictype/pr_zero_width.h: Regenerated.
+	* lib/unictype/scripts.h: Regenerated.
+	* lib/unictype/scripts_byname.gperf: Regenerated.
+	* lib/unictype/sy_java_ident.h: Regenerated.
+	* lib/unilbrk/lbrkprop1.h: Regenerated.
+	* lib/unilbrk/lbrkprop2.h: Regenerated.
+	* tests/unictype/test-categ_C.c: Regenerated.
+	* tests/unictype/test-categ_Cf.c: Regenerated.
+	* tests/unictype/test-categ_Cn.c: Regenerated.
+	* tests/unictype/test-categ_L.c: Regenerated.
+	* tests/unictype/test-categ_Ll.c: Regenerated.
+	* tests/unictype/test-categ_Lm.c: Regenerated.
+	* tests/unictype/test-categ_Lo.c: Regenerated.
+	* tests/unictype/test-categ_Lu.c: Regenerated.
+	* tests/unictype/test-categ_M.c: Regenerated.
+	* tests/unictype/test-categ_Mc.c: Regenerated.
+	* tests/unictype/test-categ_Me.c: Regenerated.
+	* tests/unictype/test-categ_Mn.c: Regenerated.
+	* tests/unictype/test-categ_N.c: Regenerated.
+	* tests/unictype/test-categ_Nd.c: Regenerated.
+	* tests/unictype/test-categ_Nl.c: Regenerated.
+	* tests/unictype/test-categ_No.c: Regenerated.
+	* tests/unictype/test-categ_P.c: Regenerated.
+	* tests/unictype/test-categ_Pd.c: Regenerated.
+	* tests/unictype/test-categ_Pe.c: Regenerated.
+	* tests/unictype/test-categ_Pf.c: Regenerated.
+	* tests/unictype/test-categ_Pi.c: Regenerated.
+	* tests/unictype/test-categ_Po.c: Regenerated.
+	* tests/unictype/test-categ_Ps.c: Regenerated.
+	* tests/unictype/test-categ_S.c: Regenerated.
+	* tests/unictype/test-categ_Sk.c: Regenerated.
+	* tests/unictype/test-categ_Sm.c: Regenerated.
+	* tests/unictype/test-categ_So.c: Regenerated.
+	* tests/unictype/test-ctype_alnum.c: Regenerated.
+	* tests/unictype/test-ctype_alpha.c: Regenerated.
+	* tests/unictype/test-ctype_graph.c: Regenerated.
+	* tests/unictype/test-ctype_lower.c: Regenerated.
+	* tests/unictype/test-ctype_print.c: Regenerated.
+	* tests/unictype/test-ctype_punct.c: Regenerated.
+	* tests/unictype/test-ctype_upper.c: Regenerated.
+	* tests/unictype/test-decdigit.h: Regenerated.
+	* tests/unictype/test-digit.h: Regenerated.
+	* tests/unictype/test-numeric.h: Regenerated.
+	* tests/unictype/test-pr_alphabetic.c: Regenerated.
+	* tests/unictype/test-pr_bidi_arabic_digit.c: Regenerated.
+	* tests/unictype/test-pr_bidi_arabic_right_to_left.c: Regenerated.
+	* tests/unictype/test-pr_bidi_boundary_neutral.c: Regenerated.
+	* tests/unictype/test-pr_bidi_eur_num_terminator.c: Regenerated.
+	* tests/unictype/test-pr_bidi_left_to_right.c: Regenerated.
+	* tests/unictype/test-pr_bidi_non_spacing_mark.c: Regenerated.
+	* tests/unictype/test-pr_bidi_other_neutral.c: Regenerated.
+	* tests/unictype/test-pr_combining.c: Regenerated.
+	* tests/unictype/test-pr_dash.c: Regenerated.
+	* tests/unictype/test-pr_decimal_digit.c: Regenerated.
+	* tests/unictype/test-pr_default_ignorable_code_point.c: Regenerated.
+	* tests/unictype/test-pr_deprecated.c: Regenerated.
+	* tests/unictype/test-pr_diacritic.c: Regenerated.
+	* tests/unictype/test-pr_extender.c: Regenerated.
+	* tests/unictype/test-pr_format_control.c: Regenerated.
+	* tests/unictype/test-pr_grapheme_base.c: Regenerated.
+	* tests/unictype/test-pr_grapheme_extend.c: Regenerated.
+	* tests/unictype/test-pr_grapheme_link.c: Regenerated.
+	* tests/unictype/test-pr_id_continue.c: Regenerated.
+	* tests/unictype/test-pr_id_start.c: Regenerated.
+	* tests/unictype/test-pr_ideographic.c: Regenerated.
+	* tests/unictype/test-pr_ignorable_control.c: Regenerated.
+	* tests/unictype/test-pr_lowercase.c: Regenerated.
+	* tests/unictype/test-pr_math.c: Regenerated.
+	* tests/unictype/test-pr_numeric.c: Regenerated.
+	* tests/unictype/test-pr_other_alphabetic.c: Regenerated.
+	* tests/unictype/test-pr_other_default_ignorable_code_point.c:
+	Regenerated.
+	* tests/unictype/test-pr_other_grapheme_extend.c: Regenerated.
+	* tests/unictype/test-pr_other_id_continue.c: Regenerated.
+	* tests/unictype/test-pr_other_lowercase.c: Regenerated.
+	* tests/unictype/test-pr_other_math.c: Regenerated.
+	* tests/unictype/test-pr_punctuation.c: Regenerated.
+	* tests/unictype/test-pr_sentence_terminal.c: Regenerated.
+	* tests/unictype/test-pr_soft_dotted.c: Regenerated.
+	* tests/unictype/test-pr_terminal_punctuation.c: Regenerated.
+	* tests/unictype/test-pr_unassigned_code_value.c: Regenerated.
+	* tests/unictype/test-pr_unified_ideograph.c: Regenerated.
+	* tests/unictype/test-pr_uppercase.c: Regenerated.
+	* tests/unictype/test-pr_xid_continue.c: Regenerated.
+	* tests/unictype/test-pr_xid_start.c: Regenerated.
+	* tests/unictype/test-pr_zero_width.c: Regenerated.
+
+	Update to Unicode 5.1.0.
 	* lib/uniwidth/width.c (nonspacing_table_data): Add U+0487,
 	U+0616..U+061A, U+0A51, U+0A75, U+0B44, U+0B62..U+0B63, U+0C62..U+0C63,
 	U+0D44, U+0D62..U+0D63, U+1033..U+1035, U+103A, U+103D..U+103E,
--- a/lib/gen-uni-tables.c	Sun Feb 08 16:11:56 2009 +0100
+++ b/lib/gen-uni-tables.c	Sun Feb 08 16:13:18 2009 +0100
@@ -25,7 +25,7 @@
                       /usr/local/share/Unidata/PropList-3.0.1.txt \
                       /usr/local/share/Unidata/EastAsianWidth.txt \
                       /usr/local/share/Unidata/LineBreak.txt \
-                      5.0.0
+                      5.1.0
  */
 
 #include <stdbool.h>
@@ -2766,6 +2766,7 @@
        Alphabetic but not as having property Other_Alphabetic.  */
     || (ch >= 0x16EE && ch <= 0x16F0) /* RUNIC SYMBOLS */
     || (ch >= 0x2160 && ch <= 0x2182) /* ROMAN NUMERALS */
+    || (ch >= 0x2185 && ch <= 0x2188) /* ROMAN NUMERALS */
     || (ch >= 0x24D0 && ch <= 0x24E9) /* CIRCLED LATIN SMALL LETTER */
     || (ch == 0x3007) /* IDEOGRAPHIC NUMBER ZERO */
     || (ch >= 0x3021 && ch <= 0x3029) /* HANGZHOU NUMERAL */
@@ -2804,12 +2805,10 @@
 {
   bool result1 =
     (is_category_Cf (ch)
-     && !(ch >= 0xFFF9 && ch <= 0xFFFB)) /* Annotations */
-    || ((is_category_Cc (ch) || is_category_Cs (ch))
-	&& !is_property_white_space (ch))
+     && !(ch >= 0xFFF9 && ch <= 0xFFFB) /* Annotations */
+     && !((ch >= 0x0600 && ch <= 0x0603) || ch == 0x06DD || ch == 0x070F))
     || ((unicode_properties[ch] & (1ULL << PROP_OTHER_DEFAULT_IGNORABLE_CODE_POINT)) != 0)
-    || ((unicode_properties[ch] & (1ULL << PROP_VARIATION_SELECTOR)) != 0)
-    || is_property_not_a_character (ch);
+    || ((unicode_properties[ch] & (1ULL << PROP_VARIATION_SELECTOR)) != 0);
   bool result2 =
     ((unicode_properties[ch] & (1ULL << PROP_DEFAULT_IGNORABLE_CODE_POINT)) != 0);
 
@@ -6351,15 +6350,15 @@
  * compile-command: "
    gcc -O -Wall gen-uni-tables.c -Iunictype -o gen-uni-tables && \
    ./gen-uni-tables \
-        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.0.0/ucd/UnicodeData.txt \
-        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.0.0/ucd/PropList.txt \
-        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.0.0/ucd/DerivedCoreProperties.txt \
-        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.0.0/ucd/Scripts.txt \
-        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.0.0/ucd/Blocks.txt \
+        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.1.0/ucd/UnicodeData.txt \
+        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.1.0/ucd/PropList.txt \
+        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.1.0/ucd/DerivedCoreProperties.txt \
+        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.1.0/ucd/Scripts.txt \
+        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.1.0/ucd/Blocks.txt \
         /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/3.0.1/PropList-3.0.1.txt \
-        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.0.0/ucd/EastAsianWidth.txt \
-        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.0.0/ucd/LineBreak.txt \
-        5.0.0
+        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.1.0/ucd/EastAsianWidth.txt \
+        /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.1.0/ucd/LineBreak.txt \
+        5.1.0
    "
  * End:
  */