changeset 31482:75cbfaf709cb

Add test for regexp match on UTF-8 string (patch #10295). * libinterp/corefcn/regexp.cc: Add test with UTF-8 encoded character vector.
author Rafael Laboissiere <rafael@laboissiere.net>
date Tue, 15 Nov 2022 12:25:40 -0300
parents cfa938be2999
children 601b08ce0c00
files libinterp/corefcn/regexp.cc
diffstat 1 files changed, 4 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/libinterp/corefcn/regexp.cc	Sat Nov 19 00:03:30 2022 -0500
+++ b/libinterp/corefcn/regexp.cc	Tue Nov 15 12:25:40 2022 -0300
@@ -919,6 +919,10 @@
 %!assert (regexp ('abcabc', 'abc$'), 4)
 %!assert (regexp ('abcabc', '^abc$'), zeros (1,0))
 
+## UTF-8 test with character vector "âé🙂ïõù"
+%!assert (regexp (char ([195, 162, 195, 169, 240, 159, 153, 130, 195, 175, ...
+%!                       195, 181, 195, 185]), "."), [1, 3, 5, 9, 11, 13])
+
 %!test
 %! [s, e, te, m, t] = regexp (' No Match ', 'f(.*)uck');
 %! assert (s, zeros (1,0));