comparison src/DLD-FUNCTIONS/regexp.cc @ 7893:eb9ccb44ea41

make regexp(...,'once') matlab compatible
author Jaroslav Hajek <highegg@gmail.com>
date Wed, 18 Jun 2008 21:00:06 +0200
parents b166043585a8
children 0ef13e15319b
comparison
equal deleted inserted replaced
7892:7ca2735d74c2 7893:eb9ccb44ea41
81 typedef std::list<regexp_elem>::const_iterator const_iterator; 81 typedef std::list<regexp_elem>::const_iterator const_iterator;
82 82
83 static int 83 static int
84 octregexp_list (const octave_value_list &args, const std::string &nm, 84 octregexp_list (const octave_value_list &args, const std::string &nm,
85 bool case_insensitive, std::list<regexp_elem> &lst, 85 bool case_insensitive, std::list<regexp_elem> &lst,
86 string_vector &named, int &nopts) 86 string_vector &named, int &nopts, bool &once)
87 { 87 {
88 int sz = 0; 88 int sz = 0;
89 #if defined (HAVE_REGEX) || defined (HAVE_PCRE) 89 #if defined (HAVE_REGEX) || defined (HAVE_PCRE)
90 int nargin = args.length(); 90 int nargin = args.length();
91 bool once = false;
92 bool lineanchors = false; 91 bool lineanchors = false;
93 bool dotexceptnewline = false; 92 bool dotexceptnewline = false;
94 bool freespacing = false; 93 bool freespacing = false;
95 94
96 nopts = nargin - 2; 95 nopts = nargin - 2;
96 once = false;
97 97
98 std::string buffer = args(0).string_value (); 98 std::string buffer = args(0).string_value ();
99 if (error_state) 99 if (error_state)
100 { 100 {
101 gripe_wrong_type_arg (nm.c_str(), args(0)); 101 gripe_wrong_type_arg (nm.c_str(), args(0));
449 octave_value_list retval; 449 octave_value_list retval;
450 int nargin = args.length(); 450 int nargin = args.length();
451 std::list<regexp_elem> lst; 451 std::list<regexp_elem> lst;
452 string_vector named; 452 string_vector named;
453 int nopts; 453 int nopts;
454 int sz = octregexp_list (args, nm, case_insensitive, lst, named, nopts); 454 bool once;
455 int sz = octregexp_list (args, nm, case_insensitive, lst, named, nopts, once);
455 456
456 if (! error_state) 457 if (! error_state)
457 { 458 {
458 // Converted the linked list in the correct form for the return values 459 // Converted the linked list in the correct form for the return values
459 460
480 } 481 }
481 #else 482 #else
482 retval(5) = Octave_map(); 483 retval(5) = Octave_map();
483 #endif 484 #endif
484 485
485 Cell t (dim_vector(1, sz)); 486 if (once)
486 i = 0; 487 retval(4) = sz ? lst.front ().t : Cell();
487 for (const_iterator p = lst.begin(); p != lst.end(); p++) 488 else
488 t(i++) = p->t; 489 {
489 retval(4) = t; 490 Cell t (dim_vector(1, sz));
490 491 i = 0;
491 Cell m (dim_vector(1, sz)); 492 for (const_iterator p = lst.begin(); p != lst.end(); p++)
492 i = 0; 493 t(i++) = p->t;
493 for (const_iterator p = lst.begin(); p != lst.end(); p++) 494 retval(4) = t;
494 m(i++) = p->m; 495 }
495 retval(3) = m; 496
496 497 if (once)
497 498 retval(3) = sz ? lst.front ().m : std::string();
498 Cell te (dim_vector(1, sz)); 499 else
499 i = 0; 500 {
500 for (const_iterator p = lst.begin(); p != lst.end(); p++) 501 Cell m (dim_vector(1, sz));
501 te(i++) = p->te; 502 i = 0;
502 retval(2) = te; 503 for (const_iterator p = lst.begin(); p != lst.end(); p++)
503 504 m(i++) = p->m;
504 NDArray e (dim_vector(1, sz)); 505 retval(3) = m;
505 i = 0; 506 }
506 for (const_iterator p = lst.begin(); p != lst.end(); p++) 507
507 e(i++) = p->e; 508 if (once)
508 retval(1) = e; 509 retval(2) = sz ? lst.front ().te : Matrix();
509 510 else
511 {
512 Cell te (dim_vector(1, sz));
513 i = 0;
514 for (const_iterator p = lst.begin(); p != lst.end(); p++)
515 te(i++) = p->te;
516 retval(2) = te;
517 }
518
519 if (once)
520 {
521 if (sz)
522 retval(1) = lst.front ().e;
523 else
524 retval(1) = Matrix();
525 }
526 else
527 {
528 NDArray e (dim_vector(1, sz));
529 i = 0;
530 for (const_iterator p = lst.begin(); p != lst.end(); p++)
531 e(i++) = p->e;
532 retval(1) = e;
533 }
534
535 if (once)
536 {
537 if (sz)
538 retval(0) = lst.front ().s;
539 else
540 retval(0) = Matrix();
541 }
542 else
543 {
510 NDArray s (dim_vector(1, sz)); 544 NDArray s (dim_vector(1, sz));
511 i = 0; 545 i = 0;
512 for (const_iterator p = lst.begin(); p != lst.end(); p++) 546 for (const_iterator p = lst.begin(); p != lst.end(); p++)
513 s(i++) = p->s; 547 s(i++) = p->s;
514 retval(0) = s; 548 retval(0) = s;
549 }
515 550
516 // Alter the order of the output arguments 551 // Alter the order of the output arguments
517 if (nopts > 0) 552 if (nopts > 0)
518 { 553 {
519 int n = 0; 554 int n = 0;
909 944
910 %!test 945 %!test
911 %! [s, e, te, m, t] = regexp('short test string','\w*r\w*','once'); 946 %! [s, e, te, m, t] = regexp('short test string','\w*r\w*','once');
912 %! assert (s,1) 947 %! assert (s,1)
913 %! assert (e,5) 948 %! assert (e,5)
914 %! assert (size(te), [1,1]) 949 %! assert (isempty(te))
915 %! assert (isempty(te{1})) 950 %! assert (m,'short')
916 %! assert (m{1},'short') 951 %! assert (isempty(t))
917 %! ## Matlab gives [1,0] here but that seems wrong.
918 %! assert (size(t), [1,1])
919 952
920 %!test 953 %!test
921 %! [m, te, e, s, t] = regexp('short test string','\w*r\w*','once', 'match', 'tokenExtents', 'end', 'start', 'tokens'); 954 %! [m, te, e, s, t] = regexp('short test string','\w*r\w*','once', 'match', 'tokenExtents', 'end', 'start', 'tokens');
922 %! assert (s,1) 955 %! assert (s,1)
923 %! assert (e,5) 956 %! assert (e,5)
924 %! assert (size(te), [1,1]) 957 %! assert (isempty(te))
925 %! assert (isempty(te{1})) 958 %! assert (m,'short')
926 %! assert (m{1},'short') 959 %! assert (isempty(t))
927 %! ## Matlab gives [1,0] here but that seems wrong.
928 %! assert (size(t), [1,1])
929 960
930 %!testif HAVE_PCRE 961 %!testif HAVE_PCRE
931 %! ## This test is expected to fail if PCRE is not installed 962 %! ## This test is expected to fail if PCRE is not installed
932 %! [s, e, te, m, t, nm] = regexp('short test string','(?<word1>\w*t)\s*(?<word2>\w*t)'); 963 %! [s, e, te, m, t, nm] = regexp('short test string','(?<word1>\w*t)\s*(?<word2>\w*t)');
933 %! assert (s,1) 964 %! assert (s,1)
1085 1116
1086 %!test 1117 %!test
1087 %! [s, e, te, m, t] = regexpi('ShoRt Test String','\w*r\w*','once'); 1118 %! [s, e, te, m, t] = regexpi('ShoRt Test String','\w*r\w*','once');
1088 %! assert (s,1) 1119 %! assert (s,1)
1089 %! assert (e,5) 1120 %! assert (e,5)
1090 %! assert (size(te), [1,1]) 1121 %! assert (isempty(te))
1091 %! assert (isempty(te{1})) 1122 %! assert (m,'ShoRt')
1092 %! assert (m{1},'ShoRt') 1123 %! assert (isempty(t))
1093 %! ## Matlab gives [1,0] here but that seems wrong.
1094 %! assert (size(t), [1,1])
1095 1124
1096 %!test 1125 %!test
1097 %! [m, te, e, s, t] = regexpi('ShoRt Test String','\w*r\w*','once', 'match', 'tokenExtents', 'end', 'start', 'tokens'); 1126 %! [m, te, e, s, t] = regexpi('ShoRt Test String','\w*r\w*','once', 'match', 'tokenExtents', 'end', 'start', 'tokens');
1098 %! assert (s,1) 1127 %! assert (s,1)
1099 %! assert (e,5) 1128 %! assert (e,5)
1100 %! assert (size(te), [1,1]) 1129 %! assert (isempty(te))
1101 %! assert (isempty(te{1})) 1130 %! assert (m,'ShoRt')
1102 %! assert (m{1},'ShoRt') 1131 %! assert (isempty(t))
1103 %! ## Matlab gives [1,0] here but that seems wrong.
1104 %! assert (size(t), [1,1])
1105 1132
1106 %!testif HAVE_PCRE 1133 %!testif HAVE_PCRE
1107 %! ## This test is expected to fail if PCRE is not installed 1134 %! ## This test is expected to fail if PCRE is not installed
1108 %! [s, e, te, m, t, nm] = regexpi('ShoRt Test String','(?<word1>\w*t)\s*(?<word2>\w*t)'); 1135 %! [s, e, te, m, t, nm] = regexpi('ShoRt Test String','(?<word1>\w*t)\s*(?<word2>\w*t)');
1109 %! assert (s,1) 1136 %! assert (s,1)
1235 if (tokens > 0) 1262 if (tokens > 0)
1236 { 1263 {
1237 std::list<regexp_elem> lst; 1264 std::list<regexp_elem> lst;
1238 string_vector named; 1265 string_vector named;
1239 int nopts; 1266 int nopts;
1240 int sz = octregexp_list (regexpargs, nm , false, lst, named, nopts); 1267 bool once;
1268 int sz = octregexp_list (regexpargs, nm , false, lst, named, nopts, once);
1241 1269
1242 if (error_state) 1270 if (error_state)
1243 return retval; 1271 return retval;
1244 if (sz == 0) 1272 if (sz == 0)
1245 { 1273 {
1321 else 1349 else
1322 { 1350 {
1323 std::list<regexp_elem> lst; 1351 std::list<regexp_elem> lst;
1324 string_vector named; 1352 string_vector named;
1325 int nopts; 1353 int nopts;
1326 int sz = octregexp_list (regexpargs, nm, false, lst, named,nopts); 1354 bool once;
1355 int sz = octregexp_list (regexpargs, nm, false, lst, named, nopts, once);
1327 1356
1328 if (error_state) 1357 if (error_state)
1329 return retval; 1358 return retval;
1330 if (sz == 0) 1359 if (sz == 0)
1331 { 1360 {