Mercurial > octave-nkf
comparison src/DLD-FUNCTIONS/regexp.cc @ 7893:eb9ccb44ea41
make regexp(...,'once') matlab compatible
author | Jaroslav Hajek <highegg@gmail.com> |
---|---|
date | Wed, 18 Jun 2008 21:00:06 +0200 |
parents | b166043585a8 |
children | 0ef13e15319b |
comparison
equal
deleted
inserted
replaced
7892:7ca2735d74c2 | 7893:eb9ccb44ea41 |
---|---|
81 typedef std::list<regexp_elem>::const_iterator const_iterator; | 81 typedef std::list<regexp_elem>::const_iterator const_iterator; |
82 | 82 |
83 static int | 83 static int |
84 octregexp_list (const octave_value_list &args, const std::string &nm, | 84 octregexp_list (const octave_value_list &args, const std::string &nm, |
85 bool case_insensitive, std::list<regexp_elem> &lst, | 85 bool case_insensitive, std::list<regexp_elem> &lst, |
86 string_vector &named, int &nopts) | 86 string_vector &named, int &nopts, bool &once) |
87 { | 87 { |
88 int sz = 0; | 88 int sz = 0; |
89 #if defined (HAVE_REGEX) || defined (HAVE_PCRE) | 89 #if defined (HAVE_REGEX) || defined (HAVE_PCRE) |
90 int nargin = args.length(); | 90 int nargin = args.length(); |
91 bool once = false; | |
92 bool lineanchors = false; | 91 bool lineanchors = false; |
93 bool dotexceptnewline = false; | 92 bool dotexceptnewline = false; |
94 bool freespacing = false; | 93 bool freespacing = false; |
95 | 94 |
96 nopts = nargin - 2; | 95 nopts = nargin - 2; |
96 once = false; | |
97 | 97 |
98 std::string buffer = args(0).string_value (); | 98 std::string buffer = args(0).string_value (); |
99 if (error_state) | 99 if (error_state) |
100 { | 100 { |
101 gripe_wrong_type_arg (nm.c_str(), args(0)); | 101 gripe_wrong_type_arg (nm.c_str(), args(0)); |
449 octave_value_list retval; | 449 octave_value_list retval; |
450 int nargin = args.length(); | 450 int nargin = args.length(); |
451 std::list<regexp_elem> lst; | 451 std::list<regexp_elem> lst; |
452 string_vector named; | 452 string_vector named; |
453 int nopts; | 453 int nopts; |
454 int sz = octregexp_list (args, nm, case_insensitive, lst, named, nopts); | 454 bool once; |
455 int sz = octregexp_list (args, nm, case_insensitive, lst, named, nopts, once); | |
455 | 456 |
456 if (! error_state) | 457 if (! error_state) |
457 { | 458 { |
458 // Converted the linked list in the correct form for the return values | 459 // Converted the linked list in the correct form for the return values |
459 | 460 |
480 } | 481 } |
481 #else | 482 #else |
482 retval(5) = Octave_map(); | 483 retval(5) = Octave_map(); |
483 #endif | 484 #endif |
484 | 485 |
485 Cell t (dim_vector(1, sz)); | 486 if (once) |
486 i = 0; | 487 retval(4) = sz ? lst.front ().t : Cell(); |
487 for (const_iterator p = lst.begin(); p != lst.end(); p++) | 488 else |
488 t(i++) = p->t; | 489 { |
489 retval(4) = t; | 490 Cell t (dim_vector(1, sz)); |
490 | 491 i = 0; |
491 Cell m (dim_vector(1, sz)); | 492 for (const_iterator p = lst.begin(); p != lst.end(); p++) |
492 i = 0; | 493 t(i++) = p->t; |
493 for (const_iterator p = lst.begin(); p != lst.end(); p++) | 494 retval(4) = t; |
494 m(i++) = p->m; | 495 } |
495 retval(3) = m; | 496 |
496 | 497 if (once) |
497 | 498 retval(3) = sz ? lst.front ().m : std::string(); |
498 Cell te (dim_vector(1, sz)); | 499 else |
499 i = 0; | 500 { |
500 for (const_iterator p = lst.begin(); p != lst.end(); p++) | 501 Cell m (dim_vector(1, sz)); |
501 te(i++) = p->te; | 502 i = 0; |
502 retval(2) = te; | 503 for (const_iterator p = lst.begin(); p != lst.end(); p++) |
503 | 504 m(i++) = p->m; |
504 NDArray e (dim_vector(1, sz)); | 505 retval(3) = m; |
505 i = 0; | 506 } |
506 for (const_iterator p = lst.begin(); p != lst.end(); p++) | 507 |
507 e(i++) = p->e; | 508 if (once) |
508 retval(1) = e; | 509 retval(2) = sz ? lst.front ().te : Matrix(); |
509 | 510 else |
511 { | |
512 Cell te (dim_vector(1, sz)); | |
513 i = 0; | |
514 for (const_iterator p = lst.begin(); p != lst.end(); p++) | |
515 te(i++) = p->te; | |
516 retval(2) = te; | |
517 } | |
518 | |
519 if (once) | |
520 { | |
521 if (sz) | |
522 retval(1) = lst.front ().e; | |
523 else | |
524 retval(1) = Matrix(); | |
525 } | |
526 else | |
527 { | |
528 NDArray e (dim_vector(1, sz)); | |
529 i = 0; | |
530 for (const_iterator p = lst.begin(); p != lst.end(); p++) | |
531 e(i++) = p->e; | |
532 retval(1) = e; | |
533 } | |
534 | |
535 if (once) | |
536 { | |
537 if (sz) | |
538 retval(0) = lst.front ().s; | |
539 else | |
540 retval(0) = Matrix(); | |
541 } | |
542 else | |
543 { | |
510 NDArray s (dim_vector(1, sz)); | 544 NDArray s (dim_vector(1, sz)); |
511 i = 0; | 545 i = 0; |
512 for (const_iterator p = lst.begin(); p != lst.end(); p++) | 546 for (const_iterator p = lst.begin(); p != lst.end(); p++) |
513 s(i++) = p->s; | 547 s(i++) = p->s; |
514 retval(0) = s; | 548 retval(0) = s; |
549 } | |
515 | 550 |
516 // Alter the order of the output arguments | 551 // Alter the order of the output arguments |
517 if (nopts > 0) | 552 if (nopts > 0) |
518 { | 553 { |
519 int n = 0; | 554 int n = 0; |
909 | 944 |
910 %!test | 945 %!test |
911 %! [s, e, te, m, t] = regexp('short test string','\w*r\w*','once'); | 946 %! [s, e, te, m, t] = regexp('short test string','\w*r\w*','once'); |
912 %! assert (s,1) | 947 %! assert (s,1) |
913 %! assert (e,5) | 948 %! assert (e,5) |
914 %! assert (size(te), [1,1]) | 949 %! assert (isempty(te)) |
915 %! assert (isempty(te{1})) | 950 %! assert (m,'short') |
916 %! assert (m{1},'short') | 951 %! assert (isempty(t)) |
917 %! ## Matlab gives [1,0] here but that seems wrong. | |
918 %! assert (size(t), [1,1]) | |
919 | 952 |
920 %!test | 953 %!test |
921 %! [m, te, e, s, t] = regexp('short test string','\w*r\w*','once', 'match', 'tokenExtents', 'end', 'start', 'tokens'); | 954 %! [m, te, e, s, t] = regexp('short test string','\w*r\w*','once', 'match', 'tokenExtents', 'end', 'start', 'tokens'); |
922 %! assert (s,1) | 955 %! assert (s,1) |
923 %! assert (e,5) | 956 %! assert (e,5) |
924 %! assert (size(te), [1,1]) | 957 %! assert (isempty(te)) |
925 %! assert (isempty(te{1})) | 958 %! assert (m,'short') |
926 %! assert (m{1},'short') | 959 %! assert (isempty(t)) |
927 %! ## Matlab gives [1,0] here but that seems wrong. | |
928 %! assert (size(t), [1,1]) | |
929 | 960 |
930 %!testif HAVE_PCRE | 961 %!testif HAVE_PCRE |
931 %! ## This test is expected to fail if PCRE is not installed | 962 %! ## This test is expected to fail if PCRE is not installed |
932 %! [s, e, te, m, t, nm] = regexp('short test string','(?<word1>\w*t)\s*(?<word2>\w*t)'); | 963 %! [s, e, te, m, t, nm] = regexp('short test string','(?<word1>\w*t)\s*(?<word2>\w*t)'); |
933 %! assert (s,1) | 964 %! assert (s,1) |
1085 | 1116 |
1086 %!test | 1117 %!test |
1087 %! [s, e, te, m, t] = regexpi('ShoRt Test String','\w*r\w*','once'); | 1118 %! [s, e, te, m, t] = regexpi('ShoRt Test String','\w*r\w*','once'); |
1088 %! assert (s,1) | 1119 %! assert (s,1) |
1089 %! assert (e,5) | 1120 %! assert (e,5) |
1090 %! assert (size(te), [1,1]) | 1121 %! assert (isempty(te)) |
1091 %! assert (isempty(te{1})) | 1122 %! assert (m,'ShoRt') |
1092 %! assert (m{1},'ShoRt') | 1123 %! assert (isempty(t)) |
1093 %! ## Matlab gives [1,0] here but that seems wrong. | |
1094 %! assert (size(t), [1,1]) | |
1095 | 1124 |
1096 %!test | 1125 %!test |
1097 %! [m, te, e, s, t] = regexpi('ShoRt Test String','\w*r\w*','once', 'match', 'tokenExtents', 'end', 'start', 'tokens'); | 1126 %! [m, te, e, s, t] = regexpi('ShoRt Test String','\w*r\w*','once', 'match', 'tokenExtents', 'end', 'start', 'tokens'); |
1098 %! assert (s,1) | 1127 %! assert (s,1) |
1099 %! assert (e,5) | 1128 %! assert (e,5) |
1100 %! assert (size(te), [1,1]) | 1129 %! assert (isempty(te)) |
1101 %! assert (isempty(te{1})) | 1130 %! assert (m,'ShoRt') |
1102 %! assert (m{1},'ShoRt') | 1131 %! assert (isempty(t)) |
1103 %! ## Matlab gives [1,0] here but that seems wrong. | |
1104 %! assert (size(t), [1,1]) | |
1105 | 1132 |
1106 %!testif HAVE_PCRE | 1133 %!testif HAVE_PCRE |
1107 %! ## This test is expected to fail if PCRE is not installed | 1134 %! ## This test is expected to fail if PCRE is not installed |
1108 %! [s, e, te, m, t, nm] = regexpi('ShoRt Test String','(?<word1>\w*t)\s*(?<word2>\w*t)'); | 1135 %! [s, e, te, m, t, nm] = regexpi('ShoRt Test String','(?<word1>\w*t)\s*(?<word2>\w*t)'); |
1109 %! assert (s,1) | 1136 %! assert (s,1) |
1235 if (tokens > 0) | 1262 if (tokens > 0) |
1236 { | 1263 { |
1237 std::list<regexp_elem> lst; | 1264 std::list<regexp_elem> lst; |
1238 string_vector named; | 1265 string_vector named; |
1239 int nopts; | 1266 int nopts; |
1240 int sz = octregexp_list (regexpargs, nm , false, lst, named, nopts); | 1267 bool once; |
1268 int sz = octregexp_list (regexpargs, nm , false, lst, named, nopts, once); | |
1241 | 1269 |
1242 if (error_state) | 1270 if (error_state) |
1243 return retval; | 1271 return retval; |
1244 if (sz == 0) | 1272 if (sz == 0) |
1245 { | 1273 { |
1321 else | 1349 else |
1322 { | 1350 { |
1323 std::list<regexp_elem> lst; | 1351 std::list<regexp_elem> lst; |
1324 string_vector named; | 1352 string_vector named; |
1325 int nopts; | 1353 int nopts; |
1326 int sz = octregexp_list (regexpargs, nm, false, lst, named,nopts); | 1354 bool once; |
1355 int sz = octregexp_list (regexpargs, nm, false, lst, named, nopts, once); | |
1327 | 1356 |
1328 if (error_state) | 1357 if (error_state) |
1329 return retval; | 1358 return retval; |
1330 if (sz == 0) | 1359 if (sz == 0) |
1331 { | 1360 { |