changeset 19273:638b6d1fdf36 ueno/unicode-9.0.0

libunistring: update to Unicode 9.0.0 * lib/gen-uni-tables.c (fill_properties): Recognize Sentence_Terminal and Prepended_Concatenation_Mark. (is_property_default_ignorable_code_point): Exclude U+08E2. (fill_arabicshaping): Allow missing whitespace when parsing; recognize "AFRICAN FEH", "AFRICAN QAF", and "AFRICAN MOON". (output_blocks): Increase the element size of the level1 table to accommodate more blocks. (get_lbp): Recognize ZWJ, E_Base, and E_Modifier characters; Update each class according to the standard. (get_wbp): Recognize ZWJ, E_Base, E_Modifier, Glue_After_Zwj, and E_Base_GAZ characters. (output_gbp_table): Recognize ZWJ, E_Base, E_Modifier, Glue_After_Zwj, and E_Base_GAZ characters. * lib/unictype.in.h (UC_JOINING_GROUP_AFRICAN_FEH, UC_JOINING_GROUP_AFRICAN_QAF, UC_JOINING_GROUP_AFRICAN_MOON): New enum value. * lib/unilbrk/lbrktables.h (LBP_ZWJ, LBP_EB, LBP_EM): New enum value. * lib/unilbrk/lbrktables.c (unilbrk_table): Extend the table with LBP_ZWJ, LBP_EB, and LBP_EM. * lib/uniwbrk.in.h (WBP_ZWJ, WBP_EB, WBP_EM, WBP_GAZ, WBP_EBG): New enum value. * lib/uniwbrk/u-wordbreaks.h: Implement WB3c, WB15, and WB16. * lib/uniwbrk/wbrktable.h (uniwbrk_prop_index): New variable declaration. * lib/uniwbrk/wbrktable.c (uniwbrk_prop_index): New variable. (uniwbrk_table): Implement WB14. * tests/uniwbrk/test-uc-wordbreaks.c (wordbreakproperty_to_string): Check WBP_ZWJ, WBP_EB, WBP_EM, WBP_GAZ, and WBP_EBG. * modules/unigbrk/u{32,16,8}-grapheme-breaks: No longer depend on uc-is-grapheme-break. * modules/unigbrk/uc-grapheme-breaks: New module. * modules/unigbrk/uc-grapheme-breaks-tests: New module. * lib/unigbrk.in.h (GBP_ZWJ, GBP_EB, GBP_EM, GBP_GAZ, GBP_EBG): New enum value. (uc_grapheme_breaks): New function, replacing uc_is_grapheme_break. * lib/unigbrk/u-grapheme-breaks.h: New file. * lib/unigbrk/u{32,16,8}-grapheme-breaks.c: Rewrite using u-grapheme-breaks.h instead of uc_is_grapheme_break. * lib/unigbrk/uc-grapheme-breaks.c: New file. * lib/unigbrk/uc-is-grapheme-break.c: Partially update to TR29 rev 29. * tests/unigbrk/test-uc-gbrk-prop.c (graphemebreakproperty_to_string): Check GBP_ZWJ, GBP_EB, GBP_EM, GBP_GAZ, and GBP_EBG. * tests/unigbrk/test-uc-grapheme-breaks.c: New test. * tests/unigbrk/test-uc-is-grapheme-break.c (graphemebreakproperty_to_string): Check GBP_ZWJ, GBP_EB, GBP_EM, GBP_GAZ, and GBP_EBG. (main): Skip unsupported rules involving 3 or more characters, namely GB10, GB12, and GB13. * lib/uniwidth/width.c (nonspacing_table_data): Update.
author Daiki Ueno <ueno@gnu.org>
date Wed, 12 Oct 2016 17:40:37 +0200
parents c20fd8143023
children
files lib/gen-uni-tables.c lib/unictype.in.h lib/unigbrk.in.h lib/unigbrk/u-grapheme-breaks.h lib/unigbrk/u16-grapheme-breaks.c lib/unigbrk/u32-grapheme-breaks.c lib/unigbrk/u8-grapheme-breaks.c lib/unigbrk/uc-gbrk-prop.c lib/unigbrk/uc-grapheme-breaks.c lib/unigbrk/uc-is-grapheme-break.c lib/unilbrk/lbrktables.c lib/unilbrk/lbrktables.h lib/uniname/gen-uninames.lisp lib/uniwbrk.in.h lib/uniwbrk/u-wordbreaks.h lib/uniwbrk/wbrktable.c lib/uniwbrk/wbrktable.h lib/uniwidth/width.c modules/unigbrk/u16-grapheme-breaks modules/unigbrk/u32-grapheme-breaks modules/unigbrk/u8-grapheme-breaks modules/unigbrk/uc-grapheme-breaks modules/unigbrk/uc-grapheme-breaks-tests tests/unigbrk/test-uc-gbrk-prop.c tests/unigbrk/test-uc-grapheme-breaks.c tests/unigbrk/test-uc-grapheme-breaks.sh tests/unigbrk/test-uc-is-grapheme-break.c tests/uniwbrk/test-uc-wordbreaks.c tests/uniwidth/test-uc_width2.sh
diffstat 29 files changed, 991 insertions(+), 250 deletions(-) [+]
line wrap: on
line diff
--- a/lib/gen-uni-tables.c	Sun Oct 29 16:22:41 2017 -0700
+++ b/lib/gen-uni-tables.c	Wed Oct 12 17:40:37 2016 +0200
@@ -32,7 +32,7 @@
                       /usr/local/share/Unidata/CompositionExclusions.txt \
                       /usr/local/share/Unidata/SpecialCasing.txt \
                       /usr/local/share/Unidata/CaseFolding.txt \
-                      8.0.0
+                      9.0.0
  */
 
 #include <assert.h>
@@ -2591,6 +2591,7 @@
   PROP_VARIATION_SELECTOR,
   PROP_PATTERN_WHITE_SPACE,
   PROP_PATTERN_SYNTAX,
+  PROP_PREPENDED_CONCATENATION_MARK,
   /* DerivedCoreProperties.txt */
   PROP_MATH,
   PROP_ALPHABETIC,
@@ -2692,10 +2693,11 @@
       PROP ("Logical_Order_Exception", PROP_LOGICAL_ORDER_EXCEPTION)
       PROP ("Other_ID_Start", PROP_OTHER_ID_START)
       PROP ("Other_ID_Continue", PROP_OTHER_ID_CONTINUE)
-      PROP ("STerm", PROP_STERM)
+      PROP ("Sentence_Terminal", PROP_STERM)
       PROP ("Variation_Selector", PROP_VARIATION_SELECTOR)
       PROP ("Pattern_White_Space", PROP_PATTERN_WHITE_SPACE)
       PROP ("Pattern_Syntax", PROP_PATTERN_SYNTAX)
+      PROP ("Prepended_Concatenation_Mark", PROP_PREPENDED_CONCATENATION_MARK)
       /* DerivedCoreProperties.txt */
       PROP ("Math", PROP_MATH)
       PROP ("Alphabetic", PROP_ALPHABETIC)
@@ -2890,7 +2892,8 @@
      && !((ch >= 0x0600 && ch <= 0x0605) || ch == 0x06DD || ch == 0x070F)
      /* For some reason, the following are not listed as having property
         Default_Ignorable_Code_Point.  */
-     && !(ch == 0x110BD))
+     && !(ch == 0x110BD)
+     && !(ch == 0x8E2))
     || ((unicode_properties[ch] & (1ULL << PROP_OTHER_DEFAULT_IGNORABLE_CODE_POINT)) != 0)
     || ((unicode_properties[ch] & (1ULL << PROP_VARIATION_SELECTOR)) != 0);
   bool result2 =
@@ -3787,7 +3790,10 @@
   UC_JOINING_GROUP_MANICHAEAN_FIVE,       /* Manichaean_Five */
   UC_JOINING_GROUP_MANICHAEAN_TEN,        /* Manichaean_Ten */
   UC_JOINING_GROUP_MANICHAEAN_TWENTY,     /* Manichaean_Twenty */
-  UC_JOINING_GROUP_MANICHAEAN_HUNDRED     /* Manichaean_Hundred */
+  UC_JOINING_GROUP_MANICHAEAN_HUNDRED,    /* Manichaean_Hundred */
+  UC_JOINING_GROUP_AFRICAN_FEH,           /* African_Feh */
+  UC_JOINING_GROUP_AFRICAN_QAF,           /* African_Qaf */
+  UC_JOINING_GROUP_AFRICAN_NOON           /* African_Noon */
 };
 
 static uint8_t unicode_joining_group[0x110000];
@@ -3815,30 +3821,26 @@
   lineno = 0;
   for (;;)
     {
-      char buf[100+1];
-      char separator1[100+1];
-      char padding1[100+1];
-      char schematic_name[100+1];
-      char separator2[100+1];
-      char padding2[100+1];
-      char joining_type_name[100+1];
-      char separator3[100+1];
-      char padding3[100+1];
-      char joining_group_name[100+1];
+      char buf[200+1];
+      char separator1[200+1];
+      char schematic_name[200+1];
+      char separator2[200+1];
+      char joining_type_name[200+1];
+      char separator3[200+1];
+      char joining_group_name[200+1];
       int joining_type;
       int joining_group;
 
       lineno++;
-      if (fscanf (stream, "%100[^\n]\n", buf) < 1)
+      if (fscanf (stream, "%200[^\n]\n", buf) < 1)
         break;
 
       if (buf[0] == '\0' || buf[0] == '#')
         continue;
 
-      if (sscanf (buf, "%X%[;]%[ ]%[^;]%[;]%[ ]%[^;]%[;]%[ ]%100[^\n]",
-                  &i, separator1, padding1, schematic_name, separator2,
-                  padding2, joining_type_name, separator3, padding3,
-                  joining_group_name) != 10)
+      if (sscanf (buf, "%X%[; ]%[^;]%[; ]%[^;]%[; ]%100[^\n]",
+                  &i, separator1, schematic_name, separator2, joining_type_name,
+                  separator3, joining_group_name) != 7)
         {
           fprintf (stderr, "parse error in '%s':%d\n",
                    arabicshaping_filename, lineno);
@@ -3955,6 +3957,9 @@
       TRY(UC_JOINING_GROUP_MANICHAEAN_TEN,        "MANICHAEAN TEN")
       TRY(UC_JOINING_GROUP_MANICHAEAN_TWENTY,     "MANICHAEAN TWENTY")
       TRY(UC_JOINING_GROUP_MANICHAEAN_HUNDRED,    "MANICHAEAN HUNDRED")
+      TRY(UC_JOINING_GROUP_AFRICAN_FEH,           "AFRICAN FEH")
+      TRY(UC_JOINING_GROUP_AFRICAN_QAF,           "AFRICAN QAF")
+      TRY(UC_JOINING_GROUP_AFRICAN_NOON,          "AFRICAN NOON")
 #undef TRY
       else
         {
@@ -4264,6 +4269,9 @@
   TRY(UC_JOINING_GROUP_MANICHAEAN_TEN)
   TRY(UC_JOINING_GROUP_MANICHAEAN_TWENTY)
   TRY(UC_JOINING_GROUP_MANICHAEAN_HUNDRED)
+  TRY(UC_JOINING_GROUP_AFRICAN_FEH)
+  TRY(UC_JOINING_GROUP_AFRICAN_QAF)
+  TRY(UC_JOINING_GROUP_AFRICAN_NOON)
 #undef TRY
   abort ();
 }
@@ -4901,7 +4909,7 @@
   fprintf (stream, "};\n");
   fprintf (stream, "#define blocks_level1_shift %d\n", shift);
   fprintf (stream, "#define blocks_level1_threshold 0x%04X\n", threshold);
-  fprintf (stream, "static const uint8_t blocks_level1[%d * 2] =\n",
+  fprintf (stream, "static const uint16_t blocks_level1[%d * 2] =\n",
            threshold >> shift);
   fprintf (stream, "{\n");
   for (i1 = 0; i1 < (threshold >> shift); i1++)
@@ -6292,22 +6300,22 @@
 
 enum
 {
-  /* Values >= 27 are resolved at run time. */
-  LBP_BK = 27, /* mandatory break */
+  /* Values >= 30 are resolved at run time. */
+  LBP_BK = 30, /* mandatory break */
 /*LBP_CR,         carriage return - not used here because it's a DOSism */
 /*LBP_LF,         line feed - not used here because it's a DOSism */
-  LBP_CM = 28, /* attached characters and combining marks */
+  LBP_CM = 31, /* attached characters and combining marks */
 /*LBP_NL,         next line - not used here because it's equivalent to LBP_BK */
 /*LBP_SG,         surrogates - not used here because they are not characters */
   LBP_WJ =  0, /* word joiner */
-  LBP_ZW = 29, /* zero width space */
+  LBP_ZW = 32, /* zero width space */
   LBP_GL =  1, /* non-breaking (glue) */
-  LBP_SP = 30, /* space */
+  LBP_SP = 33, /* space */
   LBP_B2 =  2, /* break opportunity before and after */
   LBP_BA =  3, /* break opportunity after */
   LBP_BB =  4, /* break opportunity before */
   LBP_HY =  5, /* hyphen */
-  LBP_CB = 31, /* contingent break opportunity */
+  LBP_CB = 34, /* contingent break opportunity */
   LBP_CL =  6, /* closing punctuation */
   LBP_CP =  7, /* closing parenthesis */
   LBP_EX =  8, /* exclamation/interrogation */
@@ -6320,7 +6328,7 @@
   LBP_PO = 15, /* postfix (numeric) */
   LBP_PR = 16, /* prefix (numeric) */
   LBP_SY = 17, /* symbols allowing breaks */
-  LBP_AI = 32, /* ambiguous (alphabetic or ideograph) */
+  LBP_AI = 35, /* ambiguous (alphabetic or ideograph) */
   LBP_AL = 18, /* ordinary alphabetic and symbol characters */
 /*LBP_CJ,         conditional Japanese starter, resolved to NS */
   LBP_H2 = 19, /* Hangul LV syllable */
@@ -6331,8 +6339,11 @@
   LBP_JV = 23, /* Hangul V Jamo */
   LBP_JT = 24, /* Hangul T Jamo */
   LBP_RI = 26, /* regional indicator */
-  LBP_SA = 33, /* complex context (South East Asian) */
-  LBP_XX = 34  /* unknown */
+  LBP_SA = 36, /* complex context (South East Asian) */
+  LBP_ZWJ = 27, /* zero width joiner */
+  LBP_EB = 28, /* emoji base */
+  LBP_EM = 29, /* emoji modifier */
+  LBP_XX = 37  /* unknown */
 };
 
 /* Returns the line breaking classification for ch, as a bit mask.  */
@@ -6363,6 +6374,45 @@
       if (ch == 0x200B /* ZERO WIDTH SPACE */)
         attr |= (int64_t) 1 << LBP_ZW;
 
+      /* zero width joiner */
+      if (ch == 0x200D /* ZERO WIDTH JOINER */)
+        attr |= (int64_t) 1 << LBP_ZWJ;
+
+      /* emoji base */
+      if (ch == 0x261D /* WHITE UP POINTING INDEX */
+          || ch == 0x26F9 /* PERSON WITH BALL */
+          || (ch >= 0x270A && ch <= 0x270D) /* RAISED FIST..WRITING HAND */
+          || ch == 0x1F385 /* FATHER CHRISTMAS */
+          || (ch >= 0x1F3C3 && ch <= 0x1F3C4) /* RUNNER..SURFER */
+          || (ch >= 0x1F3CA && ch <= 0x1F3CB) /* SWIMMER..WEIGHT LIFTER */
+          || (ch >= 0x1F442 && ch <= 0x1F443) /* EAR..NOSE */
+          || (ch >= 0x1F446 && ch <= 0x1F450) /* WHITE UP POINTING BACKHAND INDEX..OPEN HANDS SIGN */
+          || (ch >= 0x1F466 && ch <= 0x1F469) /* BOY..WOMAN */
+          || ch == 0x1F46E /* POLICE OFFICER */
+          || (ch >= 0x1F470 && ch <= 0x1F478) /* BRIDE WITH VEIL..PRINCESS */
+          || ch == 0x1F47C /* BABY ANGEL */
+          || (ch >= 0x1F481 && ch <= 0x1F483) /* INFORMATION DESK PERSON..DANCER */
+          || (ch >= 0x1F485 && ch <= 0x1F487) /* NAIL POLISH..HAIRCUT */
+          || ch == 0x1F4AA /* FLEXED BICEPS */
+          || ch == 0x1F575 /* SLEUTH OR SPY */
+          || ch == 0x1F57A /* MAN DANCING */
+          || ch == 0x1F590 /* RAISED HAND WITH FINGERS SPLAYED */
+          || (ch >= 0x1F595 && ch <= 0x1F596) /* REVERSED HAND WITH MIDDLE FINGER EXTENDED..RAISED HAND WITH PART BETWEEN MIDDLE AND RING FINGERS */
+          || (ch >= 0x1F645 && ch <= 0x1F647) /* FACE WITH NO GOOD GESTURE..PERSON BOWING DEEPLY */
+          || (ch >= 0x1F64B && ch <= 0x1F64F) /* HAPPY PERSON RAISING ONE HAND..PERSON WITH FOLDED HANDS */
+          || ch == 0x1F6A3 /* ROWBOAT */
+          || (ch >= 0x1F6B4 && ch <= 0x1F6B6) /* BICYCLIST..PEDESTRIAN */
+          || ch == 0x1F6C0 /* BATH */
+          || (ch >= 0x1F918 && ch <= 0x1F91E) /* SIGN OF THE HORNS..HAND WITH INDEX AND MIDDLE FINGERS CROSSED */
+          || ch == 0x1F926 /* FACE PALM */
+          || ch == 0x1F930 /* PREGNANT WOMAN */
+          || (ch >= 0x1F933 && ch <= 0x1F939) /* SELFIE..JUGGLING */
+          || (ch >= 0x1F93C && ch <= 0x1F93E) /* WRESTLERS..HANDBALL */)
+        attr |= (int64_t) 1 << LBP_EB;
+
+      if ((ch >= 0x1F3FB && ch <= 0x1F3FF) /* EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6 */)
+        attr |= (int64_t) 1 << LBP_EM;
+
       /* non-breaking (glue) */
       if (ch == 0x00A0 /* NO-BREAK SPACE */
           || ch == 0x202F /* NARROW NO-BREAK SPACE */
@@ -6496,6 +6546,8 @@
           || ch == 0x2CFF /* COPTIC MORPHOLOGICAL DIVIDER */
           || (ch >= 0x2E0E && ch <= 0x2E15) /* EDITORIAL CORONIS .. UPWARDS ANCORA */
           || ch == 0x2E17 /* DOUBLE OBLIQUE HYPHEN */
+          || ch == 0x2E43 /* DASH WITH LEFT UPTURN */
+          || ch == 0x2E44 /* DOUBLE SUSPENSION MARK */
           || ch == 0x2E3C /* STENOGRAPHIC FULL STOP */
           || ch == 0x2E3D /* VERTICAL SIX DOTS */
           || ch == 0x2E3E /* WIGGLY VERTICAL LINE */
@@ -6554,12 +6606,15 @@
           || ch == 0x1123B /* KHOJKI SECTION MARK */
           || ch == 0x1123C /* KHOJKI DOUBLE SECTION MARK */
           || ch == 0x112A9 /* MULTANI SECTION MARK */
+          || (ch >= 0x1144B && ch <= 0x1144E) /* NEWA DANDA..NEWA GAP FILLER */
+          || ch == 0x1145B /* NEWA PLACEHOLDER MARK */
           || ch == 0x115C2 /* SIDDHAM DANDA */
           || ch == 0x115C3 /* SIDDHAM DOUBLE DANDA */
           || (ch >= 0x115C9 && ch <= 0x115D7) /* SIDDHAM END OF TEXT MARK..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES */
           || ch == 0x11641 /* MODI DANDA */
           || ch == 0x11642 /* MODI DOUBLE DANDA */
           || (ch >= 0x1173C && ch <= 0x1173E) /* AHOM SIGN SMALL SECTION..AHOM SIGN RULAI */
+          || (ch >= 0x11C41 && ch <= 0x11C45) /* BHAIKSUKI DANDA..BHAIKSUKI GAP FILLER-2 */
           || ch == 0x12471 /* CUNEIFORM PUNCTUATION SIGN VERTICAL COLON */
           || ch == 0x12472 /* CUNEIFORM PUNCTUATION SIGN DIAGONAL COLON */
           || ch == 0x12473 /* CUNEIFORM PUNCTUATION SIGN DIAGONAL TRICOLON */
@@ -6598,7 +6653,9 @@
           || ch == 0x1806 /* MONGOLIAN TODO SOFT HYPHEN */
           || ch == 0x11175 /* MAHAJANI SECTION MARK */
           || ch == 0x111DB /* SHARADA SIGN SIDDHAM */
-          || ch == 0x115C1 /* SIDDHAM SIGN SIDDHAM */)
+          || ch == 0x115C1 /* SIDDHAM SIGN SIDDHAM */
+          || (ch >= 0x11660 && ch <= 0x1166C) /* MONGOLIAN BIRGA WITH ORNAMENT..MONGOLIAN TURNED SWIRL BIRGA WITH DOUBLE ORNAMENT */
+          || ch == 0x11C70 /* MARCHEN HEAD MARK */)
         attr |= (int64_t) 1 << LBP_BB;
 
       /* hyphen */
@@ -6676,7 +6733,8 @@
           || ch == 0xFF01 /* FULLWIDTH EXCLAMATION MARK */
           || ch == 0xFF1F /* FULLWIDTH QUESTION MARK */
           || ch == 0x115C4 /* SIDDHAM SEPARATOR DOT */
-          || ch == 0x115C5 /* SIDDHAM SEPARATOR BAR */)
+          || ch == 0x115C5 /* SIDDHAM SEPARATOR BAR */
+          || ch == 0x11C71 /* MARCHEN MARK SHAD */)
         attr |= (int64_t) 1 << LBP_EX;
 
       /* inseparable */
@@ -6717,6 +6775,7 @@
           || ch == 0xFF70 /* HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK */
           || ch == 0xFF9E /* HALFWIDTH KATAKANA VOICED SOUND MARK */
           || ch == 0xFF9F /* HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK */
+          || ch == 0x16FE0 /* TANGUT ITERATION MARK */
           || ch == 0x1F679 /* HEAVY INTERROBANG ORNAMENT */
           || ch == 0x1F67A /* SANS-SERIF INTERROBANG ORNAMENT */
           || ch == 0x1F67B /* HEAVY SANS-SERIF INTERROBANG ORNAMENT */
@@ -6737,7 +6796,8 @@
           || ch == 0x13286 /* EGYPTIAN HIEROGLYPH O036A */
           || ch == 0x13288 /* EGYPTIAN HIEROGLYPH O036C */
           || ch == 0x13379 /* EGYPTIAN HIEROGLYPH V011A */
-          || ch == 0x145CE /* ANATOLIAN HIEROGLYPH A410 BEGIN LOGOGRAM MARK */)
+          || ch == 0x145CE /* ANATOLIAN HIEROGLYPH A410 BEGIN LOGOGRAM MARK */
+          || (ch >= 0x1E95E && ch <= 0x1E95F) /* ADLAM INITIAL EXCLAMATION MARK..ADLAM INITIAL QUESTION MARK */)
         attr |= (int64_t) 1 << LBP_OP;
 
       /* ambiguous quotation */
@@ -6905,9 +6965,10 @@
           || (unicode_attributes[ch].category[0] == 'C'
               && (unicode_attributes[ch].category[1] == 'c'
                   || unicode_attributes[ch].category[1] == 'f')
-              && ch != 0x110BD /* KAITHI NUMBER SIGN */)
+              && ch != 0x110BD /* KAITHI NUMBER SIGN */
+              && ch != 0x08E2 /* ARABIC DISPUTED END OF AYAH */)
           || ch == 0x3035 /* VERTICAL KANA REPEAT MARK LOWER HALF */)
-        if (!(attr & (((int64_t) 1 << LBP_BK) | ((int64_t) 1 << LBP_BA) | ((int64_t) 1 << LBP_GL) | ((int64_t) 1 << LBP_SA) | ((int64_t) 1 << LBP_WJ) | ((int64_t) 1 << LBP_ZW))))
+        if (!(attr & (((int64_t) 1 << LBP_BK) | ((int64_t) 1 << LBP_BA) | ((int64_t) 1 << LBP_GL) | ((int64_t) 1 << LBP_SA) | ((int64_t) 1 << LBP_WJ) | ((int64_t) 1 << LBP_ZW) | ((int64_t) 1 << LBP_ZWJ))))
           attr |= (int64_t) 1 << LBP_CM;
 
       /* ideographic */
@@ -6983,6 +7044,7 @@
           || ch == 0x270B /* RAISED HAND */
           || ch == 0x270C /* VICTORY HAND */
           || ch == 0x270D /* WRITING HAND */
+          || ch == 0x2764 /* HEAVY BLACK HEART */
           || (ch >= 0x2E80 && ch <= 0x2FFF) /* CJK RADICAL, KANGXI RADICAL, IDEOGRAPHIC DESCRIPTION */
           || (ch >= 0x3040 && ch <= 0x309F) /* HIRAGANA */
           || (ch >= 0x30A0 && ch <= 0x30FF) /* KATAKANA */
@@ -7046,6 +7108,15 @@
           || ch == 0xFFE3 /* FULLWIDTH MACRON */
           || ch == 0xFFE4 /* FULLWIDTH BROKEN BAR */
           /* Extra characters for compatibility with Unicode LineBreak.txt.  */
+          || ch == 0xFF66 /* Halfwidth Katakana */
+          || (ch >= 0xFF71 && ch <= 0xFF9D) /* Halfwidth Katakana */
+          || (ch >= 0xFFA0 && ch <= 0xFFBE) /* Halfwidth Hangul */
+          || (ch >= 0xFFC2 && ch <= 0xFFC7) /* Halfwidth Hangul */
+          || (ch >= 0xFFCA && ch <= 0xFFCF) /* Halfwidth Hangul */
+          || (ch >= 0xFFD2 && ch <= 0xFFD7) /* Halfwidth Hangul */
+          || (ch >= 0xFFDA && ch <= 0xFFDC) /* Halfwidth Hangul */
+          || (ch >= 0x17000 && ch <= 0x187EC) /* Tangut Ideograph */
+          || (ch >= 0x18800 && ch <= 0x18AF2) /* Tangut Ideograph */
           || (ch >= 0x1B000 && ch <= 0x1B001) /* Kana Supplement */
           || (ch >= 0x1F000 && ch <= 0x1F02B) /* Mahjong Tiles */
           || (ch >= 0x1F030 && ch <= 0x1F093) /* Domino Tiles */
@@ -7064,14 +7135,14 @@
               && !(ch >= 0x1F5D4 && ch <= 0x1F5DB)
               && !(ch >= 0x1F5F4 && ch <= 0x1F5F9))
           || (ch >= 0x1F600 && ch <= 0x1F64F) /* Emoticons */
-          || (ch >= 0x1F680 && ch <= 0x1F6D0) /* Transport and Map Symbols */
+          || (ch >= 0x1F680 && ch <= 0x1F6DF) /* Transport and Map Symbols */
           || (ch >= 0x1F6E0 && ch <= 0x1F6EC) /* Transport and Map Symbols */
-          || (ch >= 0x1F6F0 && ch <= 0x1F6F3) /* Transport and Map Symbols */
+          || (ch >= 0x1F6F0 && ch <= 0x1F6F6) /* Transport and Map Symbols */
           || (ch >= 0x1F900 && ch <= 0x1F9FF) /* Supplemental Symbols and Pictographs */
           || (ch >= 0x2A700 && ch <= 0x2B734) /* CJK Ideograph Extension C */
           || (ch >= 0x2B740 && ch <= 0x2B81D) /* CJK Ideograph Extension D */
           || (ch >= 0x2B820 && ch <= 0x2CEAF) /* CJK Ideograph Extension E */)
-        if (!(attr & (((int64_t) 1 << LBP_NS) | ((int64_t) 1 << LBP_CM))))
+        if (!(attr & (((int64_t) 1 << LBP_NS) | ((int64_t) 1 << LBP_CM) | ((int64_t) 1 << LBP_EB))))
           {
             /* ambiguous (ideograph) ? */
             if ((unicode_width[ch] != NULL
@@ -7134,13 +7205,14 @@
           || ch == 0x0605 /* ARABIC NUMBER MARK ABOVE */
           || ch == 0x06DD /* ARABIC END OF AYAH */
           || ch == 0x070F /* SYRIAC ABBREVIATION MARK */
+          || ch == 0x08E2 /* ARABIC DISPUTED END OF AYAH */
           || ch == 0x2061 /* FUNCTION APPLICATION */
           || ch == 0x2062 /* INVISIBLE TIMES */
           || ch == 0x2063 /* INVISIBLE SEPARATOR */
           || ch == 0x2064 /* INVISIBLE PLUS */
           /* Extra characters for compatibility with Unicode LineBreak.txt.  */
           || ch == 0x110BD /* KAITHI NUMBER SIGN */)
-        if (!(attr & (((int64_t) 1 << LBP_GL) | ((int64_t) 1 << LBP_B2) | ((int64_t) 1 << LBP_BA) | ((int64_t) 1 << LBP_BB) | ((int64_t) 1 << LBP_HY) | ((int64_t) 1 << LBP_CB) | ((int64_t) 1 << LBP_CL) | ((int64_t) 1 << LBP_CP) | ((int64_t) 1 << LBP_EX) | ((int64_t) 1 << LBP_IN) | ((int64_t) 1 << LBP_NS) | ((int64_t) 1 << LBP_OP) | ((int64_t) 1 << LBP_QU) | ((int64_t) 1 << LBP_IS) | ((int64_t) 1 << LBP_NU) | ((int64_t) 1 << LBP_PO) | ((int64_t) 1 << LBP_PR) | ((int64_t) 1 << LBP_SY) | ((int64_t) 1 << LBP_H2) | ((int64_t) 1 << LBP_H3) | ((int64_t) 1 << LBP_HL) | ((int64_t) 1 << LBP_JL) | ((int64_t) 1 << LBP_JV) | ((int64_t) 1 << LBP_JT) | ((int64_t) 1 << LBP_RI) | ((int64_t) 1 << LBP_SA) | ((int64_t) 1 << LBP_ID)))
+        if (!(attr & (((int64_t) 1 << LBP_GL) | ((int64_t) 1 << LBP_B2) | ((int64_t) 1 << LBP_BA) | ((int64_t) 1 << LBP_BB) | ((int64_t) 1 << LBP_HY) | ((int64_t) 1 << LBP_CB) | ((int64_t) 1 << LBP_CL) | ((int64_t) 1 << LBP_CP) | ((int64_t) 1 << LBP_EX) | ((int64_t) 1 << LBP_IN) | ((int64_t) 1 << LBP_NS) | ((int64_t) 1 << LBP_OP) | ((int64_t) 1 << LBP_QU) | ((int64_t) 1 << LBP_IS) | ((int64_t) 1 << LBP_NU) | ((int64_t) 1 << LBP_PO) | ((int64_t) 1 << LBP_PR) | ((int64_t) 1 << LBP_SY) | ((int64_t) 1 << LBP_H2) | ((int64_t) 1 << LBP_H3) | ((int64_t) 1 << LBP_HL) | ((int64_t) 1 << LBP_JL) | ((int64_t) 1 << LBP_JV) | ((int64_t) 1 << LBP_JT) | ((int64_t) 1 << LBP_RI) | ((int64_t) 1 << LBP_SA) | ((int64_t) 1 << LBP_ID) | ((int64_t) 1 << LBP_EB) | ((int64_t) 1 << LBP_EM)))
             && ch != 0x3035 /* VERTICAL KANA REPEAT MARK LOWER HALF */)
           {
             /* ambiguous (alphabetic) ? */
@@ -7192,7 +7264,11 @@
                 || ch == 0x2574 /* BOX DRAWINGS LIGHT LEFT */
                 || ch == 0x2616 /* WHITE SHOGI PIECE */
                 || ch == 0x2617 /* BLACK SHOGI PIECE */
+                || ch == 0x2757 /* HEAVY EXCLAMATION MARK SYMBOL */
+                || ch == 0x2B55 /* HEAVY LARGE CIRCLE */
                 || ch == 0x1F10B /* DINGBAT CIRCLED SANS-SERIF DIGIT ZERO */
+                || ch == 0x1F18E /* NEGATIVE SQUARED AB */
+                || (ch >= 0x1F191 && ch <= 0x1F19A) /* SQUARED CL..SQUARED VS */
                 || ch == 0x1F10C /* DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ZERO */)
               attr |= (int64_t) 1 << LBP_AI;
             else
@@ -7206,6 +7282,38 @@
       if ((ch >= 0x3400 && ch <= 0x4DBF) /* CJK Unified Ideographs Extension A */
           || (ch >= 0x4E00 && ch <= 0x9FFF) /* CJK Unified Ideographs */
           || (ch >= 0xF900 && ch <= 0xFAFF) /* CJK Compatibility Ideographs */
+          || (ch >= 0x1F02C && ch <= 0x1F02F) /* reserved */
+          || (ch >= 0x1F094 && ch <= 0x1F09F) /* reserved */
+          || (ch >= 0x1F0AF && ch <= 0x1F0B0) /* reserved */
+          || ch == 0x1F0C0 /* reserved */
+          || ch == 0x1F0D0 /* reserved */
+          || (ch >= 0x1F0F6 && ch <= 0x1F0FF) /* reserved */
+          || (ch >= 0x1F10D && ch <= 0x1F10F) /* reserved */
+          || ch == 0x1F12F /* reserved */
+          || (ch >= 0x1F16C && ch <= 0x1F16F) /* reserved */
+          || (ch >= 0x1F1AD && ch <= 0x1F1E5) /* reserved */
+          || (ch >= 0x1F203 && ch <= 0x1F20F) /* reserved */
+          || (ch >= 0x1F23C && ch <= 0x1F23F) /* reserved */
+          || (ch >= 0x1F249 && ch <= 0x1F24F) /* reserved */
+          || (ch >= 0x1F252 && ch <= 0x1F2FF) /* reserved */
+          || (ch >= 0x1F6D3 && ch <= 0x1F6DF) /* reserved */
+          || (ch >= 0x1F6ED && ch <= 0x1F6EF) /* reserved */
+          || (ch >= 0x1F6F7 && ch <= 0x1F6FF) /* reserved */
+          || (ch >= 0x1F774 && ch <= 0x1F77F) /* reserved */
+          || (ch >= 0x1F7D5 && ch <= 0x1F7FF) /* reserved */
+          || (ch >= 0x1F80C && ch <= 0x1F80F) /* reserved */
+          || (ch >= 0x1F848 && ch <= 0x1F84F) /* reserved */
+          || (ch >= 0x1F85A && ch <= 0x1F85F) /* reserved */
+          || (ch >= 0x1F888 && ch <= 0x1F88F) /* reserved */
+          || (ch >= 0x1F8AE && ch <= 0x1F90F) /* reserved */
+          || ch == 0x1F91F /* reserved */
+          || ch == 0x1F93F /* reserved */
+          || (ch >= 0x1F928 && ch <= 0x1F92F) /* reserved */
+          || (ch >= 0x1F931 && ch <= 0x1F932) /* reserved */
+          || (ch >= 0x1F94C && ch <= 0x1F94F) /* reserved */
+          || (ch >= 0x1F95F && ch <= 0x1F97F) /* reserved */
+          || (ch >= 0x1F992 && ch <= 0x1F9BF) /* reserved */
+          || (ch >= 0x1F9C1 && ch <= 0x1FFFD) /* reserved */
           || (ch >= 0x20000 && ch <= 0x2A6FF) /* CJK Unified Ideographs Extension B */
           || (ch >= 0x2A700 && ch <= 0x2F7FF) /* CJK Unified Ideographs Extension C,
                                                  Supplementary Ideographic Plane (Plane 2) outside of blocks */
@@ -7270,6 +7378,9 @@
           PRINT_BIT(attr,LBP_JT);
           PRINT_BIT(attr,LBP_RI);
           PRINT_BIT(attr,LBP_SA);
+          PRINT_BIT(attr,LBP_ZWJ);
+          PRINT_BIT(attr,LBP_EB);
+          PRINT_BIT(attr,LBP_EM);
           PRINT_BIT(attr,LBP_XX);
 #undef PRINT_BIT
           fprintf (stream, "\n");
@@ -7386,6 +7497,9 @@
       TRY(LBP_JT)
       TRY(LBP_RI)
       TRY(LBP_SA)
+      TRY(LBP_ZWJ)
+      TRY(LBP_EB)
+      TRY(LBP_EM)
       TRY(LBP_XX)
 #undef TRY
       else if (strcmp (field1, "LF") == 0) value = LBP_BK;
@@ -7469,6 +7583,9 @@
           PRINT_BIT(attr,LBP_JT);
           PRINT_BIT(attr,LBP_RI);
           PRINT_BIT(attr,LBP_SA);
+          PRINT_BIT(attr,LBP_ZWJ);
+          PRINT_BIT(attr,LBP_EB);
+          PRINT_BIT(attr,LBP_EM);
           PRINT_BIT(attr,LBP_XX);
 #undef PRINT_BIT
           fprintf (stream, "\n");
@@ -7643,6 +7760,9 @@
           CASE(LBP_JT);
           CASE(LBP_RI);
           CASE(LBP_SA);
+          CASE(LBP_ZWJ);
+          CASE(LBP_EB);
+          CASE(LBP_EM);
           CASE(LBP_XX);
 #undef CASE
           default:
@@ -7745,7 +7865,12 @@
   WBP_RI           = 13,
   WBP_DQ           = 14,
   WBP_SQ           = 15,
-  WBP_HL           = 16
+  WBP_HL           = 16,
+  WBP_ZWJ          = 17,
+  WBP_EB           = 18,
+  WBP_EM           = 19,
+  WBP_GAZ          = 20,
+  WBP_EBG          = 21
 };
 
 /* Returns the word breaking property for ch, as a bit mask.  */
@@ -7768,13 +7893,15 @@
         attr |= 1 << WBP_NEWLINE;
 
       if (((unicode_properties[ch] >> PROP_GRAPHEME_EXTEND) & 1) != 0
+          || ((unicode_properties[ch] >> PROP_OTHER_GRAPHEME_EXTEND) & 1) != 0
           || (unicode_attributes[ch].category != NULL
               && strcmp (unicode_attributes[ch].category, "Mc") == 0))
         attr |= 1 << WBP_EXTEND;
 
       if (unicode_attributes[ch].category != NULL
           && strcmp (unicode_attributes[ch].category, "Cf") == 0
-          && ch != 0x200B && ch != 0x200C && ch != 0x200D)
+          && ch != 0x200B && ch != 0x200C && ch != 0x200D
+          && !(ch >= 0xe0020 && ch <= 0xe007f))
         attr |= 1 << WBP_FORMAT;
 
       if ((unicode_scripts[ch] < numscripts
@@ -7816,8 +7943,9 @@
           && ch != 0x066C)
         attr |= 1 << WBP_NUMERIC;
 
-      if (unicode_attributes[ch].category != NULL
-          && strcmp (unicode_attributes[ch].category, "Pc") == 0)
+      if ((unicode_attributes[ch].category != NULL
+           && strcmp (unicode_attributes[ch].category, "Pc") == 0)
+          || ch == 0x202F /* NARROW NO-BREAK SPACE */)
         attr |= 1 << WBP_EXTENDNUMLET;
 
       if (((get_lbp (ch) >> LBP_RI) & 1) != 0)
@@ -7828,6 +7956,20 @@
 
       if (ch == 0x0027)
         attr |= 1 << WBP_SQ;
+
+      if (ch == 0x200D)
+        attr |= 1 << WBP_ZWJ;
+
+      if (ch >= 0x1F466 && ch <= 0x1F469)
+        attr |= 1 << WBP_EBG;
+      else if (((get_lbp (ch) >> LBP_EB) & 1) != 0)
+        attr |= 1 << WBP_EB;
+
+      if (((get_lbp (ch) >> LBP_EM) & 1) != 0)
+        attr |= 1 << WBP_EM;
+
+      if (ch == 0x2764 || ch == 0x1F48B || ch == 0x1F5E8)
+        attr |= 1 << WBP_GAZ;
     }
 
   if (attr == 0)
@@ -7881,6 +8023,16 @@
             fprintf (stream, " Single_Quote");
           if (attr & (1 << WBP_HL))
             fprintf (stream, " Hebrew_Letter");
+          if (attr & (1 << WBP_ZWJ))
+            fprintf (stream, " ZWJ");
+          if (attr & (1 << WBP_EB))
+            fprintf (stream, " E_Base");
+          if (attr & (1 << WBP_EM))
+            fprintf (stream, " E_Modifier");
+          if (attr & (1 << WBP_GAZ))
+            fprintf (stream, " Glue_After_Zwj");
+          if (attr & (1 << WBP_EBG))
+            fprintf (stream, " E_Base_GAZ");
          fprintf (stream, "\n");
         }
     }
@@ -7970,6 +8122,11 @@
       PROP ("Double_Quote", WBP_DQ)
       PROP ("Single_Quote", WBP_SQ)
       PROP ("Hebrew_Letter", WBP_HL)
+      PROP ("ZWJ", WBP_ZWJ)
+      PROP ("E_Base", WBP_EB)
+      PROP ("E_Modifier", WBP_EM)
+      PROP ("Glue_After_Zwj", WBP_GAZ)
+      PROP ("E_Base_GAZ", WBP_EBG)
 #undef PROP
         {
           fprintf (stderr, "unknown property value '%s' in '%s'\n", propname,
@@ -8019,6 +8176,11 @@
           PROP ("Double_Quote", WBP_DQ)
           PROP ("Single_Quote", WBP_SQ)
           PROP ("Hebrew_Letter", WBP_HL)
+          PROP ("ZWJ", WBP_ZWJ)
+          PROP ("E_Base", WBP_EB)
+          PROP ("E_Modifier", WBP_EM)
+          PROP ("Glue_After_Zwj", WBP_GAZ)
+          PROP ("E_Base_GAZ", WBP_EBG)
 #undef PROP
           fprintf (stream, " ??");
           fprintf (stream, "\n");
@@ -8174,6 +8336,11 @@
           CASE(WBP_DQ);
           CASE(WBP_SQ);
           CASE(WBP_HL);
+          CASE(WBP_ZWJ);
+          CASE(WBP_EB);
+          CASE(WBP_EM);
+          CASE(WBP_GAZ);
+          CASE(WBP_EBG);
 #undef CASE
           default:
             abort ();
@@ -8238,7 +8405,7 @@
 /* ========================================================================= */
 
 /* Grapheme break property.
-   Updated for Unicode TR #29 revision 17.  */
+   Updated for Unicode TR #29 revision 29.  */
 
 /* Possible values of the Grapheme_Cluster_Break property.  */
 enum
@@ -8255,7 +8422,12 @@
   GBP_T            = 9,
   GBP_LV           = 10,
   GBP_LVT          = 11,
-  GBP_RI           = 12
+  GBP_RI           = 12,
+  GBP_ZWJ          = 13,
+  GBP_EB           = 14,
+  GBP_EM           = 15,
+  GBP_GAZ          = 16,
+  GBP_EBG          = 17
 };
 
 /* Construction of sparse 3-level tables.  */
@@ -8327,6 +8499,11 @@
       CASE (GBP_LV)
       CASE (GBP_LVT)
       CASE (GBP_RI)
+      CASE (GBP_ZWJ)
+      CASE (GBP_EB)
+      CASE (GBP_EM)
+      CASE (GBP_GAZ)
+      CASE (GBP_EBG)
 #undef CASE
         default:
           abort ();
@@ -8396,7 +8573,7 @@
   fprintf (stream, "  {\n");
   fprintf (stream, "    int level1[%zu];\n", t.level1_size);
   fprintf (stream, "    short level2[%zu << %d];\n", t.level2_size, t.q);
-  fprintf (stream, "    unsigned char level3[(%zu << %d) / 2];\n",
+  fprintf (stream, "    unsigned char level3[%zu << %d];\n",
            t.level3_size, t.p);
   fprintf (stream, "  }\n");
   fprintf (stream, "unigbrkprop =\n");
@@ -8434,7 +8611,7 @@
         fprintf (stream, " %5d", -1);
       else
         fprintf (stream, " %5zu",
-                 (offset - level3_offset) / sizeof (uint8_t) / 2);
+                 (offset - level3_offset) / sizeof (uint8_t));
       if (i+1 < t.level2_size << t.q)
         fprintf (stream, ",");
     }
@@ -8442,19 +8619,43 @@
     fprintf (stream, "\n ");
   fprintf (stream, " },\n");
   fprintf (stream, "  {");
-  if (t.level3_size << t.p > 8)
+  if (t.level3_size << t.p > 4)
     fprintf (stream, "\n   ");
-  for (i = 0; i < (t.level3_size << t.p) / 2; i++)
-    {
-      unsigned char *p = (unsigned char *) (t.result + level3_offset);
-      unsigned char value0 = p[i * 2];
-      unsigned char value1 = p[i * 2 + 1];
-      if (i > 0 && (i % 8) == 0)
+  for (i = 0; i < t.level3_size << t.p; i++)
+    {
+      unsigned char value = ((unsigned char *) (t.result + level3_offset))[i];
+      const char *value_string;
+      switch (value)
+        {
+#define CASE(x) case x: value_string = #x; break;
+      CASE (GBP_OTHER)
+      CASE (GBP_CR)
+      CASE (GBP_LF)
+      CASE (GBP_CONTROL)
+      CASE (GBP_EXTEND)
+      CASE (GBP_PREPEND)
+      CASE (GBP_SPACINGMARK)
+      CASE (GBP_L)
+      CASE (GBP_V)
+      CASE (GBP_T)
+      CASE (GBP_LV)
+      CASE (GBP_LVT)
+      CASE (GBP_RI)
+      CASE (GBP_ZWJ)
+      CASE (GBP_EB)
+      CASE (GBP_EM)
+      CASE (GBP_GAZ)
+      CASE (GBP_EBG)
+#undef CASE
+          default:
+            abort ();
+        }
+      if (i > 0 && (i % 4) == 0)
         fprintf (stream, "\n   ");
-      fprintf (stream, " 0x%02x%s", (value1 << 4) + value0,
-               (i+1 < (t.level3_size << t.p) / 2 ? "," : ""));
-    }
-  if (t.level3_size << t.p > 8)
+      fprintf (stream, " %s%s", value_string,
+               (i+1 < t.level3_size << t.p ? "," : ""));
+    }
+  if (t.level3_size << t.p > 4)
     fprintf (stream, "\n ");
   fprintf (stream, " }\n");
   fprintf (stream, "};\n");
@@ -8525,6 +8726,11 @@
       PROP ("LV", GBP_LV)
       PROP ("LVT", GBP_LVT)
       PROP ("Regional_Indicator", GBP_RI)
+      PROP ("ZWJ", GBP_ZWJ)
+      PROP ("E_Base", GBP_EB)
+      PROP ("E_Modifier", GBP_EM)
+      PROP ("Glue_After_Zwj", GBP_GAZ)
+      PROP ("E_Base_GAZ", GBP_EBG)
 #undef PROP
         {
           fprintf (stderr, "unknown property value '%s' in %s:%d\n", propname,
--- a/lib/unictype.in.h	Sun Oct 29 16:22:41 2017 -0700
+++ b/lib/unictype.in.h	Wed Oct 12 17:40:37 2016 +0200
@@ -551,7 +551,10 @@
   UC_JOINING_GROUP_MANICHAEAN_FIVE,       /* Manichaean_Five */
   UC_JOINING_GROUP_MANICHAEAN_TEN,        /* Manichaean_Ten */
   UC_JOINING_GROUP_MANICHAEAN_TWENTY,     /* Manichaean_Twenty */
-  UC_JOINING_GROUP_MANICHAEAN_HUNDRED     /* Manichaean_Hundred */
+  UC_JOINING_GROUP_MANICHAEAN_HUNDRED,    /* Manichaean_Hundred */
+  UC_JOINING_GROUP_AFRICAN_FEH,           /* African_Feh */
+  UC_JOINING_GROUP_AFRICAN_QAF,           /* African_Qaf */
+  UC_JOINING_GROUP_AFRICAN_NOON           /* African_Noon */
 };
 
 /* Return the name of a joining group.  */
--- a/lib/unigbrk.in.h	Sun Oct 29 16:22:41 2017 -0700
+++ b/lib/unigbrk.in.h	Wed Oct 12 17:40:37 2016 +0200
@@ -52,7 +52,12 @@
   GBP_T            = 9,
   GBP_LV           = 10,
   GBP_LVT          = 11,
-  GBP_RI           = 12
+  GBP_RI           = 12,
+  GBP_ZWJ          = 13,
+  GBP_EB           = 14,
+  GBP_EM           = 15,
+  GBP_GAZ          = 16,
+  GBP_EBG          = 17
 };
 
 /* Return the Grapheme_Cluster_Break property of a Unicode character. */
@@ -118,6 +123,8 @@
        u32_grapheme_breaks (const uint32_t *s, size_t n, char *p);
 extern void
        ulc_grapheme_breaks (const char *s, size_t n, char *p);
+extern void
+       uc_grapheme_breaks (const ucs4_t *s, size_t n, char *p);
 
 /* ========================================================================= */
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/unigbrk/u-grapheme-breaks.h	Wed Oct 12 17:40:37 2016 +0200
@@ -0,0 +1,122 @@
+/* Grapheme cluster break function.
+   Copyright (C) 2010-2017 Free Software Foundation, Inc.
+   Written by Ben Pfaff <blp@cs.stanford.edu>, 2010.
+
+   This program is free software: you can redistribute it and/or modify it
+   under the terms of the GNU Lesser General Public License as published
+   by the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+void
+FUNC (const UNIT *s, size_t n, char *p)
+{
+  if (n > 0)
+    {
+      const UNIT *s_end = s + n;
+
+      /* Grapheme Cluster break property of the last character.
+         -1 at the very beginning of the string.  */
+      int last_char_prop = -1;
+
+      /* Grapheme Cluster break property of the last complex character.
+         -1 at the very beginning of the string.  */
+      int last_compchar_prop = -1;
+
+      size_t ri_count = 0;
+
+      /* Don't break inside multibyte characters.  */
+      memset (p, 0, n);
+
+      while (s < s_end)
+        {
+          ucs4_t uc;
+          int count = U_MBTOUC (&uc, s, s_end - s);
+          int prop = uc_graphemeclusterbreak_property (uc);
+
+          /* Break at the start of the string (GB1).  */
+          if (last_char_prop < 0)
+            *p = 1;
+          else
+            {
+              /* No break between CR and LF (GB3).  */
+              if (last_char_prop == GBP_CR && prop == GBP_LF)
+                /* *p = 0 */;
+              /* Break before and after newlines (GB4, GB5).  */
+              else if ((last_char_prop == GBP_CR
+                        || last_char_prop == GBP_LF
+                        || last_char_prop == GBP_CONTROL)
+                       || (prop == GBP_CR
+                           || prop == GBP_LF
+                           || prop == GBP_CONTROL))
+                *p = 1;
+              /* No break between Hangul syllable sequences (GB6, GB7, GB8).  */
+              else if ((last_char_prop == GBP_L
+                        && (prop == GBP_L
+                            || prop == GBP_V
+                            || prop == GBP_LV
+                            || prop == GBP_LVT))
+                       || ((last_char_prop == GBP_LV
+                            || last_char_prop == GBP_V)
+                           && (prop == GBP_V
+                               || prop == GBP_T))
+                       || ((last_char_prop == GBP_LVT
+                            || last_char_prop == GBP_T)
+                           && prop == GBP_T))
+                /* *p = 0 */;
+              /* No break before extending characters or ZWJ (GB9).  */
+              else if (prop == GBP_EXTEND || prop == GBP_ZWJ)
+                /* *p = 0 */;
+              /* No break before SpacingMarks (GB9a).  */
+              else if (prop == GBP_SPACINGMARK)
+                /* *p = 0 */;
+              /* No break after Prepend characters (GB9b).  */
+              else if (last_char_prop == GBP_PREPEND)
+                /* *p = 0 */;
+              /* No break within emoji modifier sequences (GB10).  */
+              else if ((last_compchar_prop == GBP_EB
+                        || last_compchar_prop == GBP_EBG)
+                       && prop == GBP_EM)
+                /* *p = 0 */;
+              /* No break within emoji zwj sequences (GB11).  */
+              else if (last_char_prop == GBP_ZWJ
+                       && (prop == GBP_GAZ
+                           || prop == GBP_EBG))
+                /* *p = 0 */;
+              /* No break between RI if there is an odd number of RI
+                 characters before (GB12, GB13).  */
+              else if (prop == GBP_RI)
+                {
+                  if (ri_count % 2 == 0)
+                    *p = 1;
+                  /* else *p = 0; */
+                }
+              /* Break everywhere (GBP999).  */
+              else
+                *p = 1;
+            }
+
+          last_char_prop = prop;
+
+          if (!(prop == GBP_EXTEND
+                && (last_compchar_prop == GBP_EB
+                    || last_compchar_prop == GBP_EBG)))
+            last_compchar_prop = prop;
+
+          if (prop == GBP_RI)
+            ri_count++;
+          else
+            ri_count = 0;
+
+          s += count;
+          p += count;
+        }
+    }
+}
--- a/lib/unigbrk/u16-grapheme-breaks.c	Sun Oct 29 16:22:41 2017 -0700
+++ b/lib/unigbrk/u16-grapheme-breaks.c	Wed Oct 12 17:40:37 2016 +0200
@@ -20,25 +20,11 @@
 /* Specification.  */
 #include "unigbrk.h"
 
+#include <string.h>
+
 #include "unistr.h"
 
-void
-u16_grapheme_breaks (const uint16_t *s, size_t n, char *p)
-{
-  ucs4_t prev;
-  int mblen;
-
-  prev = 0;
-  for (; n > 0; s += mblen, p += mblen, n -= mblen)
-    {
-      ucs4_t next;
-
-      mblen = u16_mbtouc (&next, s, n);
-
-      p[0] = uc_is_grapheme_break (prev, next);
-      if (mblen > 1)
-        p[1] = 0;
-
-      prev = next;
-    }
-}
+#define FUNC u16_grapheme_breaks
+#define UNIT uint16_t
+#define U_MBTOUC u16_mbtouc
+#include "u-grapheme-breaks.h"
--- a/lib/unigbrk/u32-grapheme-breaks.c	Sun Oct 29 16:22:41 2017 -0700
+++ b/lib/unigbrk/u32-grapheme-breaks.c	Wed Oct 12 17:40:37 2016 +0200
@@ -20,23 +20,11 @@
 /* Specification.  */
 #include "unigbrk.h"
 
+#include <string.h>
+
 #include "unistr.h"
 
-void
-u32_grapheme_breaks (const uint32_t *s, size_t n, char *p)
-{
-  ucs4_t prev;
-  size_t i;
-
-  prev = 0;
-  for (i = 0; i < n; i++)
-    {
-      ucs4_t next;
-
-      u32_mbtouc (&next, &s[i], 1);
-
-      p[i] = uc_is_grapheme_break (prev, next);
-
-      prev = next;
-    }
-}
+#define FUNC u32_grapheme_breaks
+#define UNIT uint32_t
+#define U_MBTOUC u32_mbtouc
+#include "u-grapheme-breaks.h"
--- a/lib/unigbrk/u8-grapheme-breaks.c	Sun Oct 29 16:22:41 2017 -0700
+++ b/lib/unigbrk/u8-grapheme-breaks.c	Wed Oct 12 17:40:37 2016 +0200
@@ -21,26 +21,11 @@
 /* Specification.  */
 #include "unigbrk.h"
 
+#include <string.h>
+
 #include "unistr.h"
 
-void
-u8_grapheme_breaks (const uint8_t *s, size_t n, char *p)
-{
-  ucs4_t prev;
-  int mblen;
-
-  prev = 0;
-  for (; n > 0; s += mblen, p += mblen, n -= mblen)
-    {
-      ucs4_t next;
-      int i;
-
-      mblen = u8_mbtouc (&next, s, n);
-
-      p[0] = uc_is_grapheme_break (prev, next);
-      for (i = 1; i < mblen; i++)
-        p[i] = 0;
-
-      prev = next;
-    }
-}
+#define FUNC u8_grapheme_breaks
+#define UNIT uint8_t
+#define U_MBTOUC u8_mbtouc
+#include "u-grapheme-breaks.h"
--- a/lib/unigbrk/uc-gbrk-prop.c	Sun Oct 29 16:22:41 2017 -0700
+++ b/lib/unigbrk/uc-gbrk-prop.c	Wed Oct 12 17:40:37 2016 +0200
@@ -36,8 +36,7 @@
           if (lookup2 >= 0)
             {
               unsigned int index3 = uc & gbrkprop_header_4;
-              unsigned char lookup3 = unigbrkprop.level3[lookup2 + index3 / 2];
-              return (lookup3 >> ((uc & 1) << 2)) & 0x0f;
+              return unigbrkprop.level3[lookup2 + index3];
             }
         }
     }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/unigbrk/uc-grapheme-breaks.c	Wed Oct 12 17:40:37 2016 +0200
@@ -0,0 +1,39 @@
+/* Grapheme cluster breaks function.
+   Copyright (C) 2010-2017 Free Software Foundation, Inc.
+   Written by Ben Pfaff <blp@cs.stanford.edu>, 2010.
+
+   This program is free software: you can redistribute it and/or modify it
+   under the terms of the GNU Lesser General Public License as published
+   by the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+
+/* Specification.  */
+#include "unigbrk.h"
+
+#include <string.h>
+
+#include "unistr.h"
+
+/* This is similar to u32_mbtouc_unsafe(), but doesn't check invalid
+   characters.  */
+static int
+uc_grapheme_breaks_mbtouc (ucs4_t *puc, const ucs4_t *s, size_t n)
+{
+  *puc = *s;
+  return 1;
+}
+
+#define FUNC uc_grapheme_breaks
+#define UNIT ucs4_t
+#define U_MBTOUC uc_grapheme_breaks_mbtouc
+#include "u-grapheme-breaks.h"
--- a/lib/unigbrk/uc-is-grapheme-break.c	Sun Oct 29 16:22:41 2017 -0700
+++ b/lib/unigbrk/uc-is-grapheme-break.c	Wed Oct 12 17:40:37 2016 +0200
@@ -47,19 +47,22 @@
    /* GB8 */                                                            \
    ((A) == GBP_LVT || (A) == GBP_T) && (B) == GBP_T ? false :           \
                                                                         \
-   /* GB8a */								\
-   (A) == GBP_RI && (B) == GBP_RI ? false :				\
-									\
    /* GB9 */                                                            \
-   (B) == GBP_EXTEND ? false :                                          \
+   (B) == GBP_EXTEND || (B) == GBP_ZWJ ? false :                        \
                                                                         \
    /* GB9a */                                                           \
    (B) == GBP_SPACINGMARK ? false :                                     \
                                                                         \
    /* GB9b */                                                           \
-   (A) == GBP_PREPEND ? false                                           \
+   (A) == GBP_PREPEND ? false :                                         \
+                                                                        \
+   /* GB10 -- incomplete */                                             \
+   ((A) == GBP_EB || (A) == GBP_EBG) && (B) == GBP_EM ? false :         \
                                                                         \
-   /* GB10 */                                                           \
+   /* GB11 */                                                           \
+   (A) == GBP_ZWJ && ((B) == GBP_GAZ || (B) == GBP_EBG) ? false         \
+                                                                        \
+   /* GB999 */                                                          \
    : true)
 
 #define UC_GRAPHEME_BREAKS_FOR(A)                                       \
@@ -75,9 +78,14 @@
    | (UC_IS_GRAPHEME_BREAK(A, GBP_T)           << GBP_T)                \
    | (UC_IS_GRAPHEME_BREAK(A, GBP_LV)          << GBP_LV)               \
    | (UC_IS_GRAPHEME_BREAK(A, GBP_LVT)         << GBP_LVT)              \
-   | (UC_IS_GRAPHEME_BREAK(A, GBP_RI)          << GBP_RI))
+   | (UC_IS_GRAPHEME_BREAK(A, GBP_RI)          << GBP_RI)               \
+   | (UC_IS_GRAPHEME_BREAK(A, GBP_ZWJ)         << GBP_ZWJ)              \
+   | (UC_IS_GRAPHEME_BREAK(A, GBP_EB)          << GBP_EB)               \
+   | (UC_IS_GRAPHEME_BREAK(A, GBP_EM)          << GBP_EM)               \
+   | (UC_IS_GRAPHEME_BREAK(A, GBP_GAZ)         << GBP_GAZ)              \
+   | (UC_IS_GRAPHEME_BREAK(A, GBP_EBG)         << GBP_EBG))
 
-static const unsigned short int gb_table[13] =
+static const unsigned long int gb_table[18] =
   {
     UC_GRAPHEME_BREAKS_FOR(0),  /* GBP_OTHER */
     UC_GRAPHEME_BREAKS_FOR(1),  /* GBP_CR */
@@ -92,6 +100,11 @@
     UC_GRAPHEME_BREAKS_FOR(10), /* GBP_LV */
     UC_GRAPHEME_BREAKS_FOR(11), /* GBP_LVT */
     UC_GRAPHEME_BREAKS_FOR(12), /* GBP_RI */
+    UC_GRAPHEME_BREAKS_FOR(13), /* GBP_ZWJ */
+    UC_GRAPHEME_BREAKS_FOR(14), /* GBP_EB */
+    UC_GRAPHEME_BREAKS_FOR(15), /* GBP_EM */
+    UC_GRAPHEME_BREAKS_FOR(16), /* GBP_GAZ */
+    UC_GRAPHEME_BREAKS_FOR(17), /* GBP_EBG */
   };
 
 bool
--- a/lib/unilbrk/lbrktables.c	Sun Oct 29 16:22:41 2017 -0700
+++ b/lib/unilbrk/lbrktables.c	Wed Oct 12 17:40:37 2016 +0200
@@ -23,37 +23,40 @@
 /* Define unilbrkprop, table of line breaking properties.  */
 #include "unilbrk/lbrkprop2.h"
 
-const unsigned char unilbrk_table[27][27] =
+const unsigned char unilbrk_table[30][30] =
 {
                                 /* after */
-        /* WJ GL B2 BA BB HY CL CP EX IN NS OP QU IS NU PO PR SY AL H2 H3 ID JL JV JT HL RI */
-/* WJ */ { P, I, I, I, I, I, P, P, P, I, I, I, I, P, I, I, I, P, I, I, I, I, I, I, I, I, I, },
-/* GL */ { P, I, I, I, I, I, P, P, P, I, I, I, I, P, I, I, I, P, I, I, I, I, I, I, I, I, I, },
-/* B2 */ { P, I, P, I, D, I, P, P, P, D, I, D, I, P, D, D, D, P, D, D, D, D, D, D, D, D, D, },
-/* BA */ { P, D, D, I, D, I, P, P, P, D, I, D, I, P, D, D, D, P, D, D, D, D, D, D, D, D, D, },
-/* BB */ { P, I, I, I, I, I, P, P, P, I, I, I, I, P, I, I, I, P, I, I, I, I, I, I, I, I, I, },
-/* HY */ { P, D, D, I, D, I, P, P, P, D, I, D, I, P, I, D, D, P, D, D, D, D, D, D, D, D, D, },
-/* CL */ { P, I, D, I, D, I, P, P, P, D, P, D, I, P, D, I, I, P, D, D, D, D, D, D, D, D, D, },
-/* CP */ { P, I, D, I, D, I, P, P, P, D, P, D, I, P, I, I, I, P, I, D, D, D, D, D, D, I, D, },
-/* EX */ { P, I, D, I, D, I, P, P, P, I, I, D, I, P, D, D, D, P, D, D, D, D, D, D, D, D, D, },
-/* IN */ { P, I, D, I, D, I, P, P, P, I, I, D, I, P, D, D, D, P, D, D, D, D, D, D, D, D, D, },
-/* NS */ { P, I, D, I, D, I, P, P, P, D, I, D, I, P, D, D, D, P, D, D, D, D, D, D, D, D, D, },
-/* OP */ { P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, },
-/* QU */ { P, I, I, I, I, I, P, P, P, I, I, P, I, P, I, I, I, P, I, I, I, I, I, I, I, I, I, },
-/* IS */ { P, I, D, I, D, I, P, P, P, D, I, D, I, P, I, D, D, P, D, D, D, D, D, D, D, D, D, },
-/* NU */ { P, I, D, I, D, I, P, P, P, I, I, I, I, P, I, I, I, P, I, D, D, D, D, D, D, I, D, },
-/* PO */ { P, I, D, I, D, I, P, P, P, D, I, I, I, P, I, D, D, P, I, D, D, D, D, D, D, I, D, },
-/* PR */ { P, I, D, I, D, I, P, P, P, D, I, I, I, P, I, D, D, P, I, I, I, I, I, I, I, I, D, },
-/* SY */ { P, I, D, I, D, I, P, P, P, D, I, D, I, P, I, D, D, P, D, D, D, D, D, D, D, I, D, },
-/* AL */ { P, I, D, I, D, I, P, P, P, I, I, I, I, P, I, D, D, P, I, D, D, D, D, D, D, I, D, },
-/* H2 */ { P, I, D, I, D, I, P, P, P, I, I, D, I, P, D, I, D, P, D, D, D, D, D, I, I, D, D, },
-/* H3 */ { P, I, D, I, D, I, P, P, P, I, I, D, I, P, D, I, D, P, D, D, D, D, D, D, I, D, D, },
-/* ID */ { P, I, D, I, D, I, P, P, P, I, I, D, I, P, D, I, D, P, D, D, D, D, D, D, D, D, D, },
-/* JL */ { P, I, D, I, D, I, P, P, P, I, I, D, I, P, D, I, D, P, D, I, I, D, I, I, D, D, D, },
-/* JV */ { P, I, D, I, D, I, P, P, P, I, I, D, I, P, D, I, D, P, D, D, D, D, D, I, I, D, D, },
-/* JT */ { P, I, D, I, D, I, P, P, P, I, I, D, I, P, D, I, D, P, D, D, D, D, D, D, I, D, D, },
-/* HL */ { P, I, D, I, D, I, P, P, P, I, I, I, I, P, I, D, D, P, I, D, D, D, D, D, D, I, D, },
-/* RI */ { P, I, D, I, D, I, P, P, P, D, I, D, I, P, D, D, D, P, D, D, D, D, D, D, I, D, I, },
+        /*  WJ  GL  B2  BA  BB  HY  CL  CP  EX  IN  NS  OP  QU  IS  NU  PO  PR  SY  AL  H2  H3  ID  JL  JV  JT  HL  RI  ZWJ EB  EM  */
+/*  WJ */ {  P,  I,  I,  I,  I,  I,  P,  P,  P,  I,  I,  I,  I,  P,  I,  I,  I,  P,  I,  I,  I,  I,  I,  I,  I,  I,  I,  I,  I,  I, },
+/*  GL */ {  P,  I,  I,  I,  I,  I,  P,  P,  P,  I,  I,  I,  I,  P,  I,  I,  I,  P,  I,  I,  I,  I,  I,  I,  I,  I,  I,  I,  I,  I, },
+/*  B2 */ {  P,  I,  P,  I,  D,  I,  P,  P,  P,  D,  I,  D,  I,  P,  D,  D,  D,  P,  D,  D,  D,  D,  D,  D,  D,  D,  D,  I,  D,  D, },
+/*  BA */ {  P,  D,  D,  I,  D,  I,  P,  P,  P,  D,  I,  D,  I,  P,  D,  D,  D,  P,  D,  D,  D,  D,  D,  D,  D,  D,  D,  I,  D,  D, },
+/*  BB */ {  P,  I,  I,  I,  I,  I,  P,  P,  P,  I,  I,  I,  I,  P,  I,  I,  I,  P,  I,  I,  I,  I,  I,  I,  I,  I,  I,  I,  I,  I, },
+/*  HY */ {  P,  D,  D,  I,  D,  I,  P,  P,  P,  D,  I,  D,  I,  P,  I,  D,  D,  P,  D,  D,  D,  D,  D,  D,  D,  D,  D,  I,  D,  D, },
+/*  CL */ {  P,  I,  D,  I,  D,  I,  P,  P,  P,  D,  P,  D,  I,  P,  D,  I,  I,  P,  D,  D,  D,  D,  D,  D,  D,  D,  D,  I,  D,  D, },
+/*  CP */ {  P,  I,  D,  I,  D,  I,  P,  P,  P,  D,  P,  D,  I,  P,  I,  I,  I,  P,  I,  D,  D,  D,  D,  D,  D,  I,  D,  I,  D,  D, },
+/*  EX */ {  P,  I,  D,  I,  D,  I,  P,  P,  P,  I,  I,  D,  I,  P,  D,  D,  D,  P,  D,  D,  D,  D,  D,  D,  D,  D,  D,  I,  D,  D, },
+/*  IN */ {  P,  I,  D,  I,  D,  I,  P,  P,  P,  I,  I,  D,  I,  P,  D,  D,  D,  P,  D,  D,  D,  D,  D,  D,  D,  D,  D,  I,  D,  D, },
+/*  NS */ {  P,  I,  D,  I,  D,  I,  P,  P,  P,  D,  I,  D,  I,  P,  D,  D,  D,  P,  D,  D,  D,  D,  D,  D,  D,  D,  D,  I,  D,  D, },
+/*  OP */ {  P,  P,  P,  P,  P,  P,  P,  P,  P,  P,  P,  P,  P,  P,  P,  P,  P,  P,  P,  P,  P,  P,  P,  P,  P,  P,  P,  P,  P,  P, },
+/*  QU */ {  P,  I,  I,  I,  I,  I,  P,  P,  P,  I,  I,  P,  I,  P,  I,  I,  I,  P,  I,  I,  I,  I,  I,  I,  I,  I,  I,  I,  I,  I, },
+/*  IS */ {  P,  I,  D,  I,  D,  I,  P,  P,  P,  D,  I,  D,  I,  P,  I,  D,  D,  P,  D,  D,  D,  D,  D,  D,  D,  D,  D,  I,  D,  D, },
+/*  NU */ {  P,  I,  D,  I,  D,  I,  P,  P,  P,  I,  I,  I,  I,  P,  I,  I,  I,  P,  I,  D,  D,  D,  D,  D,  D,  I,  D,  I,  D,  D, },
+/*  PO */ {  P,  I,  D,  I,  D,  I,  P,  P,  P,  D,  I,  I,  I,  P,  I,  D,  D,  P,  I,  D,  D,  D,  D,  D,  D,  I,  D,  I,  D,  D, },
+/*  PR */ {  P,  I,  D,  I,  D,  I,  P,  P,  P,  D,  I,  I,  I,  P,  I,  D,  D,  P,  I,  I,  I,  I,  I,  I,  I,  I,  D,  I,  I,  I, },
+/*  SY */ {  P,  I,  D,  I,  D,  I,  P,  P,  P,  D,  I,  D,  I,  P,  I,  D,  D,  P,  D,  D,  D,  D,  D,  D,  D,  I,  D,  I,  D,  D, },
+/*  AL */ {  P,  I,  D,  I,  D,  I,  P,  P,  P,  I,  I,  I,  I,  P,  I,  D,  D,  P,  I,  D,  D,  D,  D,  D,  D,  I,  D,  I,  D,  D, },
+/*  H2 */ {  P,  I,  D,  I,  D,  I,  P,  P,  P,  I,  I,  D,  I,  P,  D,  I,  D,  P,  D,  D,  D,  D,  D,  I,  I,  D,  D,  I,  D,  D, },
+/*  H3 */ {  P,  I,  D,  I,  D,  I,  P,  P,  P,  I,  I,  D,  I,  P,  D,  I,  D,  P,  D,  D,  D,  D,  D,  D,  I,  D,  D,  I,  D,  D, },
+/*  ID */ {  P,  I,  D,  I,  D,  I,  P,  P,  P,  I,  I,  D,  I,  P,  D,  I,  D,  P,  D,  D,  D,  D,  D,  D,  D,  D,  D,  I,  D,  D, },
+/*  JL */ {  P,  I,  D,  I,  D,  I,  P,  P,  P,  I,  I,  D,  I,  P,  D,  I,  D,  P,  D,  I,  I,  D,  I,  I,  D,  D,  D,  I,  D,  D, },
+/*  JV */ {  P,  I,  D,  I,  D,  I,  P,  P,  P,  I,  I,  D,  I,  P,  D,  I,  D,  P,  D,  D,  D,  D,  D,  I,  I,  D,  D,  I,  D,  D, },
+/*  JT */ {  P,  I,  D,  I,  D,  I,  P,  P,  P,  I,  I,  D,  I,  P,  D,  I,  D,  P,  D,  D,  D,  D,  D,  D,  I,  D,  D,  I,  D,  D, },
+/*  HL */ {  P,  I,  D,  I,  D,  I,  P,  P,  P,  I,  I,  I,  I,  P,  I,  D,  D,  P,  I,  D,  D,  D,  D,  D,  D,  I,  D,  I,  D,  D, },
+/*  RI */ {  P,  I,  D,  I,  D,  I,  P,  P,  P,  D,  I,  D,  I,  P,  D,  D,  D,  P,  D,  D,  D,  D,  D,  D,  I,  D,  I,  I,  D,  D, },
+/* ZWJ */ {  P,  I,  D,  I,  D,  I,  P,  P,  P,  D,  I,  D,  I,  P,  D,  D,  D,  P,  D,  D,  D,  I,  D,  D,  I,  D,  I,  I,  I,  I, },
+/*  EB */ {  P,  I,  D,  I,  D,  I,  P,  P,  P,  I,  I,  D,  I,  P,  D,  I,  D,  P,  D,  D,  D,  D,  D,  D,  I,  D,  I,  I,  D,  I, },
+/*  EM */ {  P,  I,  D,  I,  D,  I,  P,  P,  P,  I,  I,  D,  I,  P,  D,  I,  D,  P,  D,  D,  D,  D,  D,  D,  I,  D,  I,  I,  D,  D, },
 /* "" */
 /* before */
 };
--- a/lib/unilbrk/lbrktables.h	Sun Oct 29 16:22:41 2017 -0700
+++ b/lib/unilbrk/lbrktables.h	Wed Oct 12 17:40:37 2016 +0200
@@ -21,22 +21,22 @@
 
 enum
 {
-  /* Values >= 27 are resolved at run time. */
-  LBP_BK = 27, /* mandatory break */
+  /* Values >= 30 are resolved at run time. */
+  LBP_BK = 30, /* mandatory break */
 /*LBP_CR,         carriage return - not used here because it's a DOSism */
 /*LBP_LF,         line feed - not used here because it's a DOSism */
-  LBP_CM = 28, /* attached characters and combining marks */
+  LBP_CM = 31, /* attached characters and combining marks */
 /*LBP_NL,         next line - not used here because it's equivalent to LBP_BK */
 /*LBP_SG,         surrogates - not used here because they are not characters */
   LBP_WJ =  0, /* word joiner */
-  LBP_ZW = 29, /* zero width space */
+  LBP_ZW = 32, /* zero width space */
   LBP_GL =  1, /* non-breaking (glue) */
-  LBP_SP = 30, /* space */
+  LBP_SP = 33, /* space */
   LBP_B2 =  2, /* break opportunity before and after */
   LBP_BA =  3, /* break opportunity after */
   LBP_BB =  4, /* break opportunity before */
   LBP_HY =  5, /* hyphen */
-  LBP_CB = 31, /* contingent break opportunity */
+  LBP_CB = 34, /* contingent break opportunity */
   LBP_CL =  6, /* closing punctuation */
   LBP_CP =  7, /* closing parenthesis */
   LBP_EX =  8, /* exclamation/interrogation */
@@ -49,7 +49,7 @@
   LBP_PO = 15, /* postfix (numeric) */
   LBP_PR = 16, /* prefix (numeric) */
   LBP_SY = 17, /* symbols allowing breaks */
-  LBP_AI = 32, /* ambiguous (alphabetic or ideograph) */
+  LBP_AI = 35, /* ambiguous (alphabetic or ideograph) */
   LBP_AL = 18, /* ordinary alphabetic and symbol characters */
 /*LBP_CJ,         conditional Japanese starters, resolved to NS */
   LBP_H2 = 19, /* Hangul LV syllable */
@@ -60,8 +60,11 @@
   LBP_JV = 23, /* Hangul V Jamo */
   LBP_JT = 24, /* Hangul T Jamo */
   LBP_RI = 26, /* regional indicator */
-  LBP_SA = 33, /* complex context (South East Asian) */
-  LBP_XX = 34  /* unknown */
+  LBP_SA = 36, /* complex context (South East Asian) */
+  LBP_ZWJ = 27, /* zero width joiner */
+  LBP_EB = 28, /* emoji base */
+  LBP_EM = 29, /* emoji modifier */
+  LBP_XX = 37  /* unknown */
 };
 
 #include "lbrkprop1.h"
@@ -92,7 +95,7 @@
 #define I 2  /* indirect break opportunity, '%' in table 7.3 of UTR #14 */
 #define P 3  /* prohibited break,           '^' in table 7.3 of UTR #14 */
 
-extern const unsigned char unilbrk_table[27][27];
+extern const unsigned char unilbrk_table[30][30];
 
 /* We don't support line breaking of complex-context dependent characters
    (Thai, Lao, Myanmar, Khmer) yet, because it requires dictionary lookup. */
--- a/lib/uniname/gen-uninames.lisp	Sun Oct 29 16:22:41 2017 -0700
+++ b/lib/uniname/gen-uninames.lisp	Wed Oct 12 17:40:37 2016 +0200
@@ -196,7 +196,7 @@
         ) ) )
         (format ostream "};~%")
         |#
-        (format ostream "static const struct { uint16_t extra_offset; uint16_t ind_offset; } unicode_name_by_length[~D] = {~%"
+        (format ostream "static const struct { uint32_t extra_offset; uint16_t ind_offset; } unicode_name_by_length[~D] = {~%"
                         (1+ (length words-by-length))
         )
         (let ((extra-offset 0)
--- a/lib/uniwbrk.in.h	Sun Oct 29 16:22:41 2017 -0700
+++ b/lib/uniwbrk.in.h	Wed Oct 12 17:40:37 2016 +0200
@@ -53,7 +53,12 @@
   WBP_RI           = 13,
   WBP_DQ           = 14,
   WBP_SQ           = 15,
-  WBP_HL           = 16
+  WBP_HL           = 16,
+  WBP_ZWJ          = 17,
+  WBP_EB           = 18,
+  WBP_EM           = 19,
+  WBP_GAZ          = 20,
+  WBP_EBG          = 21
 };
 
 /* Return the Word_Break property of a Unicode character.  */
--- a/lib/uniwbrk/u-wordbreaks.h	Sun Oct 29 16:22:41 2017 -0700
+++ b/lib/uniwbrk/u-wordbreaks.h	Wed Oct 12 17:40:37 2016 +0200
@@ -39,6 +39,8 @@
          -1 at the very beginning of the string.  */
       int secondlast_compchar_prop = -1;
 
+      size_t ri_count = 0;
+
       /* Don't break inside multibyte characters.  */
       memset (p, 0, n);
 
@@ -51,10 +53,10 @@
           /* No break at the start of the string.  */
           if (last_char_prop >= 0)
             {
-              /* No break between CR and LF.  */
+              /* No break between CR and LF (WB3).  */
               if (last_char_prop == WBP_CR && prop == WBP_LF)
                 /* *p = 0 */;
-              /* Break before and after newlines.  */
+              /* Break before and after newlines (WB3a, WB3b).  */
               else if ((last_char_prop == WBP_CR
                         || last_char_prop == WBP_LF
                         || last_char_prop == WBP_NEWLINE)
@@ -62,8 +64,12 @@
                            || prop == WBP_LF
                            || prop == WBP_NEWLINE))
                 *p = 1;
+              /* No break within emoji zwj sequence (WB3c).  */
+              else if (last_char_prop == WBP_ZWJ &&
+                       (prop == WBP_GAZ || prop == WBP_EBG))
+                /* *p = 0 */;
               /* Ignore Format and Extend characters.  */
-              else if (!(prop == WBP_EXTEND || prop == WBP_FORMAT))
+              else if (!(prop == WBP_EXTEND || prop == WBP_FORMAT || prop == WBP_ZWJ))
                 {
                   /* No break in these situations (see UAX #29):
 
@@ -75,16 +81,8 @@
                   Numeric × (MidNum | MidNumLet | SQ)      Numeric      (WB12)
                                                         HL × DQ HL      (WB7b)
                                                         HL DQ × HL      (WB7c)
-                                   (ALetter | HL) × (ALetter | HL)      (WB5)
-                                          (ALetter | HL) × Numeric      (WB9)
-                                          Numeric × (ALetter | HL)      (WB10)
-                                                 Numeric × Numeric      (WB8)
-                                                      HL × SQ           (WB7a)
-                                                Katakana × Katakana     (WB13)
-                     (ALetter | HL | Numeric | Katakana) × ExtendNumLet (WB13a)
-                                            ExtendNumLet × ExtendNumLet (WB13a)
-                    ExtendNumLet × (ALetter | HL | Numeric | Katakana)  (WB13b)
-                               Regional_Indicator × Regional_Indicator  (WB13c)
+                                                ^ (RI RI)* RI × RI      (WB15)
+                                            [^RI] (RI RI)* RI × RI      (WB16)
                    */
                   /* No break across certain punctuation.  Also, disable word
                      breaks that were recognized earlier (due to lookahead of
@@ -108,27 +106,29 @@
                       *last_compchar_ptr = 0;
                       /* *p = 0; */
                     }
-                  /* Break after Format and Extend characters.  */
+                  /* Break before RI, if odd number of RI's are
+                     preceding (WB15, WB16).  */
+                  else if (last_compchar_prop == WBP_RI && prop == WBP_RI)
+                    {
+                      if (ri_count % 2 == 0)
+                        *p = 1;
+                      /* else *p = 0 */
+                    }
+                  /* Break after Format and Extend character.  */
                   else if (last_compchar_prop == WBP_EXTEND
                            || last_compchar_prop == WBP_FORMAT)
                     *p = 1;
                   else
                     {
-                      /* Normalize property value to table index,
-                         skipping 5 properties: WBP_EXTEND,
-                         WBP_FORMAT, WBP_NEWLINE, WBP_CR, and
-                         WBP_LF.  */
-                      int last_compchar_prop_index = last_compchar_prop;
-                      int prop_index = prop;
+                      int last_compchar_index =
+                        uniwbrk_prop_index[last_compchar_prop];
+                      int index = uniwbrk_prop_index[prop];
 
-                      if (last_compchar_prop_index >= WBP_EXTEND)
-                        last_compchar_prop_index -= 5;
-
-                      if (prop_index >= WBP_EXTEND)
-                        prop_index -= 5;
-
+                      /* Break between unknown pair (WB999).  */
+                      if (last_compchar_index < 0 || index < 0)
+                        *p = 1;
                       /* Perform a single table lookup.  */
-                      if (uniwbrk_table[last_compchar_prop_index][prop_index])
+                      else if (uniwbrk_table[last_compchar_index][index])
                         *p = 1;
                       /* else *p = 0; */
                     }
@@ -136,17 +136,23 @@
             }
 
           last_char_prop = prop;
-          /* Ignore Format and Extend characters, except at the start
-             of the line.  */
+
+          /* Ignore Format and Extend characters, except at the
+             start of the line.  */
           if (last_compchar_prop < 0
               || last_compchar_prop == WBP_CR
               || last_compchar_prop == WBP_LF
               || last_compchar_prop == WBP_NEWLINE
-              || !(prop == WBP_EXTEND || prop == WBP_FORMAT))
+              || !(prop == WBP_EXTEND || prop == WBP_FORMAT || prop == WBP_ZWJ))
             {
               secondlast_compchar_prop = last_compchar_prop;
               last_compchar_prop = prop;
               last_compchar_ptr = p;
+
+              if (prop == WBP_RI)
+                ri_count++;
+              else
+                ri_count = 0;
             }
 
           s += count;
--- a/lib/uniwbrk/wbrktable.c	Sun Oct 29 16:22:41 2017 -0700
+++ b/lib/uniwbrk/wbrktable.c	Wed Oct 12 17:40:37 2016 +0200
@@ -20,6 +20,32 @@
 /* Specification.  */
 #include "wbrktable.h"
 
+const int uniwbrk_prop_index[22] =
+{
+   0, /* WBP_OTHER */
+   1, /* WBP_KATAKANA */
+   2, /* WBP_ALETTER */
+   3, /* WBP_MIDNUMLET */
+   4, /* WBP_MIDLETTER */
+   5, /* WBP_MIDNUM */
+   6, /* WBP_NUMERIC */
+   7, /* WBP_EXTENDNUMLET */
+  -1, /* WBP_EXTEND */
+  -1, /* WBP_FORMAT */
+  -1, /* WBP_NEWLINE */
+  -1, /* WBP_CR */
+  -1, /* WBP_LF */
+  -1, /* WBP_RI */
+   8, /* WBP_DQ */
+   9, /* WBP_SQ */
+  10, /* WBP_HL */
+  -1, /* WBP_ZWJ */
+  11, /* WBP_EB */
+  12, /* WBP_EM */
+  -1, /* WBP_GAZ */
+  13  /* WBP_EBG */
+};
+
 /* This table contains the following rules (see UAX #29):
 
                            last         current
@@ -33,24 +59,30 @@
 (ALetter | HL | Numeric | Katakana) × ExtendNumLet                    (WB13a)
                        ExtendNumLet × ExtendNumLet                    (WB13a)
                    ExtendNumLet × (ALetter | HL | Numeric | Katakana) (WB13b)
-                 Regional_Indicator × Regional_Indicator              (WB13c)
+                     (E_Base | EBG) × E_Modifier                      (WB14)
+
+   Note that the following rules are not handled here but in the loop in u-wordbreaks.h:
+   - The rules need to look back or look ahead the second character (WB6, WB7, WB7b, WB7c, WB11, WB12)
+   - The rules with a higher precedence over the "ignore" rule (WB4), such as WB3c
  */
 
-const unsigned char uniwbrk_table[12][12] =
-{        /* current:      OTHER        MIDNUMLET    NUMERIC     DQ         */
-         /*                 KATAKANA     MIDLETTER    EXTENDNUMLET  SQ     */
-         /*                   ALETTER      MIDNUM           RI          HL */
+const unsigned char uniwbrk_table[14][14] =
+{        /* current:        OTHER       MIDNUMLET   NUMERIC     SQ          EM      */
+         /*                     KATAKANA    MIDLETTER   EXNUMLET    HL          EBG */
+         /*                         ALETTER     MIDNUM      DQ          EB          */
   /* last */
-  /* WBP_OTHER */        {  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1 },
-  /* WBP_KATAKANA */     {  1,  0,  1,  1,  1,  1,  1,  0,  1,  1,  1,  1 },
-  /* WBP_ALETTER */      {  1,  1,  0,  1,  1,  1,  0,  0,  1,  1,  1,  0 },
-  /* WBP_MIDNUMLET */    {  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1 },
-  /* WBP_MIDLETTER */    {  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1 },
-  /* WBP_MIDNUM */       {  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1 },
-  /* WBP_NUMERIC */      {  1,  1,  0,  1,  1,  1,  0,  0,  1,  1,  1,  0 },
-  /* WBP_EXTENDNUMLET */ {  1,  0,  0,  1,  1,  1,  0,  0,  1,  1,  1,  0 },
-  /* WBP_RI */           {  1,  1,  1,  1,  1,  1,  1,  1,  0,  1,  1,  1 },
-  /* WBP_DQ */           {  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1 },
-  /* WBP_SQ */           {  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1 },
-  /* WBP_HL */           {  1,  1,  0,  1,  1,  1,  0,  0,  1,  1,  0,  0 }
+  /* WBP_OTHER */        {  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1 },
+  /* WBP_KATAKANA */     {  1,  0,  1,  1,  1,  1,  1,  0,  1,  1,  1,  1,  1,  1 },
+  /* WBP_ALETTER */      {  1,  1,  0,  1,  1,  1,  0,  0,  1,  1,  0,  1,  1,  1 },
+  /* WBP_MIDNUMLET */    {  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1 },
+  /* WBP_MIDLETTER */    {  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1 },
+  /* WBP_MIDNUM */       {  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1 },
+  /* WBP_NUMERIC */      {  1,  1,  0,  1,  1,  1,  0,  0,  1,  1,  0,  1,  1,  1 },
+  /* WBP_EXTENDNUMLET */ {  1,  0,  0,  1,  1,  1,  0,  0,  1,  1,  0,  1,  1,  1 },
+  /* WBP_DQ */           {  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1 },
+  /* WBP_SQ */           {  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1 },
+  /* WBP_HL */           {  1,  1,  0,  1,  1,  1,  0,  0,  1,  0,  0,  1,  1,  1 },
+  /* WBP_EB */           {  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  0,  1 },
+  /* WBP_EM */           {  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1 },
+  /* WBP_EBG */          {  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  0,  1 }
 };
--- a/lib/uniwbrk/wbrktable.h	Sun Oct 29 16:22:41 2017 -0700
+++ b/lib/uniwbrk/wbrktable.h	Wed Oct 12 17:40:37 2016 +0200
@@ -15,4 +15,5 @@
    You should have received a copy of the GNU Lesser General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
 
-extern const unsigned char uniwbrk_table[12][12];
+extern const int uniwbrk_prop_index[22];
+extern const unsigned char uniwbrk_table[14][14];
--- a/lib/uniwidth/width.c	Sun Oct 29 16:22:41 2017 -0700
+++ b/lib/uniwidth/width.c	Wed Oct 12 17:40:37 2016 +0200
@@ -32,7 +32,7 @@
  * - Zero width characters; generated from
  *   "grep '^[^;]*;ZERO WIDTH ' UnicodeData.txt"
  */
-static const unsigned char nonspacing_table_data[36*64] = {
+static const unsigned char nonspacing_table_data[38*64] = {
   /* 0x0000-0x01ff */
   0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, /* 0x0000-0x003f */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, /* 0x0040-0x007f */
@@ -73,7 +73,7 @@
   0x00, 0x00, 0xc0, 0xfb, 0xef, 0x3e, 0x00, 0x00, /* 0x0800-0x083f */
   0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, /* 0x0840-0x087f */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0880-0x08bf */
-  0x00, 0x00, 0x00, 0x00, 0xf8, 0xff, 0xff, 0xff, /* 0x08c0-0x08ff */
+  0x00, 0x00, 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x08c0-0x08ff */
   0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14, /* 0x0900-0x093f */
   0xfe, 0x21, 0xfe, 0x00, 0x0c, 0x00, 0x00, 0x00, /* 0x0940-0x097f */
   0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, /* 0x0980-0x09bf */
@@ -135,7 +135,7 @@
   /* 0x1800-0x19ff */
   0x00, 0x78, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1800-0x183f */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1840-0x187f */
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, /* 0x1880-0x18bf */
+  0x60, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, /* 0x1880-0x18bf */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18c0-0x18ff */
   0x00, 0x00, 0x00, 0x00, 0x87, 0x01, 0x04, 0x0e, /* 0x1900-0x193f */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1940-0x197f */
@@ -158,7 +158,7 @@
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1d00-0x1d3f */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1d40-0x1d7f */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1d80-0x1dbf */
-  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f, 0xf0, /* 0x1dc0-0x1dff */
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f, 0xf8, /* 0x1dc0-0x1dff */
   /* 0x2000-0x21ff */
   0x00, 0xf8, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x00, /* 0x2000-0x203f */
   0x00, 0x00, 0x00, 0x00, 0xdf, 0xff, 0x00, 0x00, /* 0x2040-0x207f */
@@ -199,7 +199,7 @@
   0x44, 0x08, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, /* 0xa800-0xa83f */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa840-0xa87f */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa880-0xa8bf */
-  0x10, 0x00, 0x00, 0x00, 0xff, 0xff, 0x03, 0x00, /* 0xa8c0-0xa8ff */
+  0x30, 0x00, 0x00, 0x00, 0xff, 0xff, 0x03, 0x00, /* 0xa8c0-0xa8ff */
   0x00, 0x00, 0x00, 0x00, 0xc0, 0x3f, 0x00, 0x00, /* 0xa900-0xa93f */
   0x80, 0xff, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa940-0xa97f */
   0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc8, 0x13, /* 0xa980-0xa9bf */
@@ -268,7 +268,7 @@
   0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x7f, /* 0x11180-0x111bf */
   0x00, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x111c0-0x111ff */
   /* 0x11200-0x113ff */
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0xd3, 0x00, /* 0x11200-0x1123f */
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0xd3, 0x40, /* 0x11200-0x1123f */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x11240-0x1127f */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x11280-0x112bf */
   0x00, 0x00, 0x00, 0x80, 0xf8, 0x07, 0x00, 0x00, /* 0x112c0-0x112ff */
@@ -277,8 +277,8 @@
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x11380-0x113bf */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x113c0-0x113ff */
   /* 0x11400-0x115ff */
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x11400-0x1143f */
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x11440-0x1147f */
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, /* 0x11400-0x1143f */
+  0x5c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x11440-0x1147f */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x85, /* 0x11480-0x114bf */
   0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x114c0-0x114ff */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x11500-0x1153f */
@@ -294,6 +294,15 @@
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x11740-0x1177f */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x11780-0x117bf */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x117c0-0x117ff */
+  /* 0x11c00-0x11dff */
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x3f, /* 0x11c00-0x11c3f */
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x11c40-0x11c7f */
+  0x00, 0x00, 0xfc, 0xff, 0xff, 0xfc, 0x6d, 0x00, /* 0x11c80-0x11cbf */
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x11cc0-0x11cff */
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x11d00-0x11d3f */
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x11d40-0x11d7f */
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x11d80-0x11dbf */
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x11dc0-0x11dff */
   /* 0x16a00-0x16bff */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x16a00-0x16a3f */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x16a40-0x16a7f */
@@ -348,13 +357,22 @@
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1db40-0x1db7f */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1db80-0x1dbbf */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1dbc0-0x1dbff */
+  /* 0x1e000-0x1e1ff */
+  0x7f, 0xff, 0xff, 0xf9, 0xdb, 0x07, 0x00, 0x00, /* 0x1e000-0x1e03f */
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1e040-0x1e07f */
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1e080-0x1e0bf */
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1e0c0-0x1e0ff */
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1e100-0x1e13f */
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1e140-0x1e17f */
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1e180-0x1e1bf */
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1e1c0-0x1e1ff */
   /* 0x1e800-0x1e9ff */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1e800-0x1e83f */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1e840-0x1e87f */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1e880-0x1e8bf */
   0x00, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1e8c0-0x1e8ff */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1e900-0x1e93f */
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1e940-0x1e97f */
+  0xf0, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1e940-0x1e97f */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1e980-0x1e9bf */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  /* 0x1e9c0-0x1e9ff */
 };
@@ -376,20 +394,20 @@
   -1, -1, -1, -1, -1, -1, -1, -1, /* 0xe000-0xefff */
   -1, -1, -1, -1, -1, 20, -1, 21, /* 0xf000-0xffff */
   22, 23, -1, -1, -1, 24, -1, -1, /* 0x10000-0x10fff */
-  25, 26, 27, 28, -1, -1, -1, -1, /* 0x11000-0x11fff */
+  25, 26, 27, 28, -1, -1, 29, -1, /* 0x11000-0x11fff */
   -1, -1, -1, -1, -1, -1, -1, -1, /* 0x12000-0x12fff */
   -1, -1, -1, -1, -1, -1, -1, -1, /* 0x13000-0x13fff */
   -1, -1, -1, -1, -1, -1, -1, -1, /* 0x14000-0x14fff */
   -1, -1, -1, -1, -1, -1, -1, -1, /* 0x15000-0x15fff */
-  -1, -1, -1, -1, -1, 29, -1, 30, /* 0x16000-0x16fff */
+  -1, -1, -1, -1, -1, 30, -1, 31, /* 0x16000-0x16fff */
   -1, -1, -1, -1, -1, -1, -1, -1, /* 0x17000-0x17fff */
   -1, -1, -1, -1, -1, -1, -1, -1, /* 0x18000-0x18fff */
   -1, -1, -1, -1, -1, -1, -1, -1, /* 0x19000-0x19fff */
   -1, -1, -1, -1, -1, -1, -1, -1, /* 0x1a000-0x1afff */
-  -1, -1, -1, -1, -1, -1, 31, -1, /* 0x1b000-0x1bfff */
+  -1, -1, -1, -1, -1, -1, 32, -1, /* 0x1b000-0x1bfff */
   -1, -1, -1, -1, -1, -1, -1, -1, /* 0x1c000-0x1cfff */
-  32, 33, -1, -1, -1, 34, -1, -1, /* 0x1d000-0x1dfff */
-  -1, -1, -1, -1, 35, -1, -1, -1  /* 0x1e000-0x1efff */
+  33, 34, -1, -1, -1, 35, -1, -1, /* 0x1d000-0x1dfff */
+  36, -1, -1, -1, 37, -1, -1, -1  /* 0x1e000-0x1efff */
 };
 
 /* Determine number of column positions required for UC.  */
--- a/modules/unigbrk/u16-grapheme-breaks	Sun Oct 29 16:22:41 2017 -0700
+++ b/modules/unigbrk/u16-grapheme-breaks	Wed Oct 12 17:40:37 2016 +0200
@@ -3,10 +3,11 @@
 
 Files:
 lib/unigbrk/u16-grapheme-breaks.c
-tests/macros.h
+lib/unigbrk/u-grapheme-breaks.h
 
 Depends-on:
-unigbrk/uc-is-grapheme-break
+unigbrk/base
+unigbrk/uc-gbrk-prop
 unistr/u16-mbtouc
 
 configure.ac:
--- a/modules/unigbrk/u32-grapheme-breaks	Sun Oct 29 16:22:41 2017 -0700
+++ b/modules/unigbrk/u32-grapheme-breaks	Wed Oct 12 17:40:37 2016 +0200
@@ -3,10 +3,11 @@
 
 Files:
 lib/unigbrk/u32-grapheme-breaks.c
-tests/macros.h
+lib/unigbrk/u-grapheme-breaks.h
 
 Depends-on:
-unigbrk/uc-is-grapheme-break
+unigbrk/base
+unigbrk/uc-gbrk-prop
 unistr/u32-mbtouc
 
 configure.ac:
--- a/modules/unigbrk/u8-grapheme-breaks	Sun Oct 29 16:22:41 2017 -0700
+++ b/modules/unigbrk/u8-grapheme-breaks	Wed Oct 12 17:40:37 2016 +0200
@@ -3,10 +3,11 @@
 
 Files:
 lib/unigbrk/u8-grapheme-breaks.c
-tests/macros.h
+lib/unigbrk/u-grapheme-breaks.h
 
 Depends-on:
-unigbrk/uc-is-grapheme-break
+unigbrk/base
+unigbrk/uc-gbrk-prop
 unistr/u8-mbtouc
 
 configure.ac:
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/modules/unigbrk/uc-grapheme-breaks	Wed Oct 12 17:40:37 2016 +0200
@@ -0,0 +1,28 @@
+Description:
+Find grapheme cluster breaks.
+
+Files:
+lib/unigbrk/uc-grapheme-breaks.c
+lib/unigbrk/u-grapheme-breaks.h
+
+Depends-on:
+unigbrk/base
+unigbrk/uc-gbrk-prop
+
+configure.ac:
+gl_MODULE_INDICATOR([unigbrk/uc-grapheme-breaks])
+gl_LIBUNISTRING_MODULE([0.9.6], [unigbrk/uc-grapheme-breaks])
+
+Makefile.am:
+if LIBUNISTRING_COMPILE_UNIGBRK_U32_GRAPHEME_BREAKS
+lib_SOURCES += unigbrk/uc-grapheme-breaks.c
+endif
+
+Include:
+"unigbrk.h"
+
+License:
+LGPLv3+ or GPLv2
+
+Maintainer:
+Ben Pfaff, Daiki Ueno
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/modules/unigbrk/uc-grapheme-breaks-tests	Wed Oct 12 17:40:37 2016 +0200
@@ -0,0 +1,14 @@
+Files:
+tests/unigbrk/test-uc-grapheme-breaks.c
+tests/unigbrk/test-uc-grapheme-breaks.sh
+tests/unigbrk/GraphemeBreakTest.txt
+
+Depends-on:
+
+configure.ac:
+
+Makefile.am:
+TESTS += unigbrk/test-uc-grapheme-breaks.sh
+check_PROGRAMS += test-uc-grapheme-breaks
+test_uc_grapheme_breaks_SOURCES = unigbrk/test-uc-grapheme-breaks.c
+test_uc_grapheme_breaks_LDADD = $(LDADD) $(LIBUNISTRING)
--- a/tests/unigbrk/test-uc-gbrk-prop.c	Sun Oct 29 16:22:41 2017 -0700
+++ b/tests/unigbrk/test-uc-gbrk-prop.c	Wed Oct 12 17:40:37 2016 +0200
@@ -51,6 +51,11 @@
       CASE(LV)
       CASE(LVT)
       CASE(RI)
+      CASE(ZWJ)
+      CASE(EB)
+      CASE(EM)
+      CASE(GAZ)
+      CASE(EBG)
     }
   abort ();
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/unigbrk/test-uc-grapheme-breaks.c	Wed Oct 12 17:40:37 2016 +0200
@@ -0,0 +1,191 @@
+/* Grapheme cluster break function test.
+   Copyright (C) 2010-2017 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify it
+   under the terms of the GNU Lesser General Public License as published
+   by the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+/* Written by Ben Pfaff <blp@cs.stanford.edu>, 2010. */
+
+#include <config.h>
+
+/* Specification. */
+#include <unigbrk.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "macros.h"
+
+static const char *
+graphemebreakproperty_to_string (int gbp)
+{
+  printf ("%d\n", gbp);
+  switch (gbp)
+    {
+#define CASE(VALUE) case GBP_##VALUE: return #VALUE;
+      CASE(OTHER)
+      CASE(CR)
+      CASE(LF)
+      CASE(CONTROL)
+      CASE(EXTEND)
+      CASE(PREPEND)
+      CASE(SPACINGMARK)
+      CASE(L)
+      CASE(V)
+      CASE(T)
+      CASE(LV)
+      CASE(LVT)
+      CASE(RI)
+      CASE(ZWJ)
+      CASE(EB)
+      CASE(EM)
+      CASE(GAZ)
+      CASE(EBG)
+    }
+  abort ();
+}
+
+static void
+test_uc_grapheme_breaks (const char *expected, ucs4_t *s, size_t n,
+                          const char *filename, int lineno)
+{
+  char breaks[16];
+  size_t i;
+
+  ASSERT (n <= 16);
+
+  uc_grapheme_breaks (s, n, breaks);
+  for (i = 0; i < n; i++)
+    if (breaks[i] != (expected[i] == '#'))
+      {
+        size_t j;
+
+        fprintf (stderr, "wrong grapheme breaks:\n");
+
+        fprintf (stderr, "   input:");
+        for (j = 0; j < n; j++)
+          fprintf (stderr, " %02x", s[j]);
+        putc ('\n', stderr);
+
+        fprintf (stderr, "expected:");
+        for (j = 0; j < n; j++)
+          fprintf (stderr, "  %d", expected[j] == '#');
+        putc ('\n', stderr);
+
+        fprintf (stderr, "  actual:");
+        for (j = 0; j < n; j++)
+          fprintf (stderr, "  %d", breaks[j]);
+        putc ('\n', stderr);
+
+        abort ();
+      }
+}
+
+int
+main (int argc, char *argv[])
+{
+  const char *filename;
+  char line[1024];
+  int exit_code;
+  FILE *stream;
+  int lineno;
+
+  if (argc != 2)
+    {
+      fprintf (stderr, "usage: %s FILENAME\n"
+               "where FILENAME is the location of the GraphemeBreakTest.txt\n"
+               "test file.\n", argv[0]);
+      exit (1);
+    }
+
+  filename = argv[1];
+  stream = fopen (filename, "r");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "error during fopen of '%s'\n", filename);
+      exit (1);
+    }
+
+  exit_code = 0;
+  lineno = 0;
+  while (fgets (line, sizeof line, stream))
+    {
+      char *comment;
+      const char *p;
+      ucs4_t s[16];
+      char breaks[16];
+      size_t i = 0;
+
+      lineno++;
+
+      comment = strchr (line, '#');
+      if (comment != NULL)
+        *comment = '\0';
+      if (line[strspn (line, " \t\r\n")] == '\0')
+        continue;
+
+      s[0] = 0;
+      p = line;
+      do
+        {
+          ucs4_t next;
+
+          p += strspn (p, " \t\r\n");
+          if (!strncmp (p, "\303\267" /* ÷ */, 2))
+            {
+              breaks[i] = '#';
+              p += 2;
+            }
+          else if (!strncmp (p, "\303\227" /* × */, 2))
+            {
+              breaks[i] = '_';
+              p += 2;
+            }
+          else
+            {
+              fprintf (stderr, "%s:%d.%d: syntax error expecting '÷' or '×'\n",
+                       filename, lineno, (int) (p - line + 1));
+              exit (1);
+            }
+
+          p += strspn (p, " \t\r\n");
+          if (*p == '\0')
+            s[i] = 0;
+          else
+            {
+              unsigned int next_int;
+              int n;
+
+              if (sscanf (p, "%x%n", &next_int, &n) != 1)
+                {
+                  fprintf (stderr, "%s:%d.%d: syntax error at '%s' "
+                           "expecting hexadecimal Unicode code point number\n",
+                           filename, lineno, (int) (p - line + 1), p);
+                  exit (1);
+                }
+              p += n;
+
+              s[i] = next_int;
+            }
+          p += strspn (p, " \t\r\n");
+          i++;
+        }
+      while (*p != '\0');
+
+      if (i > 0)
+        test_uc_grapheme_breaks (breaks, s, i, filename, lineno);
+    }
+
+  return exit_code;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/unigbrk/test-uc-grapheme-breaks.sh	Wed Oct 12 17:40:37 2016 +0200
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+./test-uc-grapheme-breaks${EXEEXT} "${srcdir}/unigbrk/GraphemeBreakTest.txt"
--- a/tests/unigbrk/test-uc-is-grapheme-break.c	Sun Oct 29 16:22:41 2017 -0700
+++ b/tests/unigbrk/test-uc-is-grapheme-break.c	Wed Oct 12 17:40:37 2016 +0200
@@ -45,6 +45,11 @@
       CASE(LV)
       CASE(LVT)
       CASE(RI)
+      CASE(ZWJ)
+      CASE(EB)
+      CASE(EM)
+      CASE(GAZ)
+      CASE(EBG)
     }
   abort ();
 }
@@ -81,6 +86,8 @@
       char *comment;
       const char *p;
       ucs4_t prev;
+      int last_compchar_prop;
+      size_t ri_count;
 
       lineno++;
 
@@ -90,6 +97,8 @@
       if (line[strspn (line, " \t\r\n")] == '\0')
         continue;
 
+      last_compchar_prop = -1;
+      ri_count = 0;
       prev = 0;
       p = line;
       do
@@ -135,7 +144,30 @@
               next = next_int;
             }
 
-          if (uc_is_grapheme_break (prev, next) != should_break)
+          if ((last_compchar_prop == GBP_EB
+               || last_compchar_prop == GBP_EBG)
+              && uc_graphemeclusterbreak_property (next) == GBP_EM)
+            {
+              int prev_gbp = uc_graphemeclusterbreak_property (prev);
+              int next_gbp = uc_graphemeclusterbreak_property (next);
+              fprintf (stderr, "%s:%d: skipping GB10: should join U+%04X (%s) "
+                       "and U+%04X (%s)\n",
+                       filename, lineno,
+                       prev, graphemebreakproperty_to_string (prev_gbp),
+                       next, graphemebreakproperty_to_string (next_gbp));
+            }
+          else if (uc_graphemeclusterbreak_property (next) == GBP_RI
+                   && ri_count % 2 != 0)
+            {
+              int prev_gbp = uc_graphemeclusterbreak_property (prev);
+              int next_gbp = uc_graphemeclusterbreak_property (next);
+              fprintf (stderr, "%s:%d: skipping GB12: should join U+%04X (%s) "
+                       "and U+%04X (%s)\n",
+                       filename, lineno,
+                       prev, graphemebreakproperty_to_string (prev_gbp),
+                       next, graphemebreakproperty_to_string (next_gbp));
+            }
+          else if (uc_is_grapheme_break (prev, next) != should_break)
             {
               int prev_gbp = uc_graphemeclusterbreak_property (prev);
               int next_gbp = uc_graphemeclusterbreak_property (next);
@@ -150,6 +182,16 @@
 
           p += strspn (p, " \t\r\n");
           prev = next;
+
+          if (!(uc_graphemeclusterbreak_property (next) == GBP_EXTEND
+                && (last_compchar_prop == GBP_EB
+                    || last_compchar_prop == GBP_EBG)))
+            last_compchar_prop = uc_graphemeclusterbreak_property (next);
+
+          if (uc_graphemeclusterbreak_property (next) == GBP_RI)
+            ri_count++;
+          else
+            ri_count = 0;
         }
       while (*p != '\0');
     }
--- a/tests/uniwbrk/test-uc-wordbreaks.c	Sun Oct 29 16:22:41 2017 -0700
+++ b/tests/uniwbrk/test-uc-wordbreaks.c	Wed Oct 12 17:40:37 2016 +0200
@@ -51,6 +51,11 @@
       CASE(DQ)
       CASE(SQ)
       CASE(HL)
+      CASE(ZWJ)
+      CASE(EB)
+      CASE(EM)
+      CASE(GAZ)
+      CASE(EBG)
     }
   abort ();
 }
--- a/tests/uniwidth/test-uc_width2.sh	Sun Oct 29 16:22:41 2017 -0700
+++ b/tests/uniwidth/test-uc_width2.sh	Wed Oct 12 17:40:37 2016 +0200
@@ -65,8 +65,8 @@
 0829..082D	0
 082E..0858	A
 0859..085B	0
-085C..08E2	A
-08E3..0902	0
+085C..08D3	A
+08D4..0902	0
 0903..0939	A
 093A		0
 093B		A
@@ -251,7 +251,9 @@
 17DD		0
 17DE..180A	A
 180B..180E	0
-180F..18A8	A
+180F..1884	A
+1885..1886	0
+1887..18A8	A
 18A9		0
 18AA..191F	A
 1920..1922	0
@@ -327,8 +329,8 @@
 1CF8..1CF9	0
 1CFA..1DBF	A
 1DC0..1DF5	0
-1DF6..1DFB	A
-1DFC..1DFF	0
+1DF6..1DFA	A
+1DFB..1DFF	0
 1E00..200A	A
 200B..200F	0
 2010..2029	A
@@ -376,8 +378,8 @@
 A80C..A824	A
 A825..A826	0
 A827..A8C3	A
-A8C4		0
-A8C5..A8DF	A
+A8C4..A8C5	0
+A8C6..A8DF	A
 A8E0..A8F1	0
 A8F2..A925	A
 A926..A92D	0
@@ -493,7 +495,9 @@
 11234		0
 11235		1
 11236..11237	0
-11238..112DE	1
+11238..1123D	1
+1123E		0
+1123F..112DE	1
 112DF		0
 112E0..112E2	1
 112E3..112EA	0
@@ -507,7 +511,13 @@
 11366..1136C	0
 1136D..1136F	1
 11370..11374	0
-11375..114B2	1
+11375..11437	1
+11438..1143F	0
+11440..11441	1
+11442..11444	0
+11445		1
+11446		0
+11447..114B2	1
 114B3..114B8	0
 114B9		1
 114BA		0
@@ -543,7 +553,19 @@
 11722..11725	0
 11726		1
 11727..1172B	0
-1172C..16AEF	1
+1172C..11C2F	1
+11C30..11C36	0
+11C37		1
+11C38..11C3D	0
+11C3E..11C91	1
+11C92..11CA7	0
+11CA8..11CA9	1
+11CAA..11CB0	0
+11CB1		1
+11CB2..11CB3	0
+11CB4		1
+11CB5..11CB6	0
+11CB7..16AEF	1
 16AF0..16AF4	0
 16AF5..16B2F	1
 16B30..16B36	0
@@ -575,9 +597,21 @@
 1DA9B..1DA9F	0
 1DAA0		1
 1DAA1..1DAAF	0
-1DAB0..1E8CF	1
+1DAB0..1DFFF	1
+1E000..1E006	0
+1E007		1
+1E008..1E018	0
+1E019..1E01A	1
+1E01B..1E021	0
+1E022		1
+1E023..1E024	0
+1E025		1
+1E026..1E02A	0
+1E02B..1E8CF	1
 1E8D0..1E8D6	0
-1E8D7..1FFFF	1
+1E8D7..1E943	1
+1E944..1E94A	0
+1E94B..1FFFF	1
 20000..3FFFF	2
 40000..E0000	1
 E0001		0