# HG changeset patch # User Rafael Laboissiere # Date 1668525940 10800 # Node ID 864d29fb932b8f18befa29bb8b3fbc3cfd8ddb05 # Parent 1bbcaa97b2dd92c62fa53fc9ec57378fa308a46c Add test for regexp match on UTF-8 string (patch #10295). * libinterp/corefcn/regexp.cc: Add test with UTF-8 encoded character vector. (grafted from 75cbfaf709cbeb144dbc17140379bd94151e7889) diff -r 1bbcaa97b2dd -r 864d29fb932b libinterp/corefcn/regexp.cc --- a/libinterp/corefcn/regexp.cc Sat Nov 19 00:01:26 2022 -0500 +++ b/libinterp/corefcn/regexp.cc Tue Nov 15 12:25:40 2022 -0300 @@ -919,6 +919,10 @@ %!assert (regexp ('abcabc', 'abc$'), 4) %!assert (regexp ('abcabc', '^abc$'), zeros (1,0)) +## UTF-8 test with character vector "âé🙂ïõù" +%!assert (regexp (char ([195, 162, 195, 169, 240, 159, 153, 130, 195, 175, ... +%! 195, 181, 195, 185]), "."), [1, 3, 5, 9, 11, 13]) + %!test %! [s, e, te, m, t] = regexp (' No Match ', 'f(.*)uck'); %! assert (s, zeros (1,0));