# HG changeset patch
# User John W. Eaton <jwe@octave.org>
# Date 1318273316 14400
# Node ID 583940a28bfd1cd6ce3dec5a6f06de8ff8ea02a8
# Parent  1f075333d9e8fbb020d9dff678b8c82bf2491d93
handle "split" option for regexp

* regexp.cc (octregexp_list, octregexp, Fregexp):
Handle "split" option.
(octregexp): Consolidate multiple loops for gathering outputs.
(Fregexp): New tests.

diff -r 1f075333d9e8 -r 583940a28bfd src/DLD-FUNCTIONS/regexp.cc
--- a/src/DLD-FUNCTIONS/regexp.cc	Mon Oct 10 11:12:34 2011 -0700
+++ b/src/DLD-FUNCTIONS/regexp.cc	Mon Oct 10 15:01:56 2011 -0400
@@ -168,9 +168,10 @@
           freespacing = true;
           nopts--;
         }
-      else if (str.find ("start", 0) && str.find ("end", 0) &&
-               str.find ("tokenextents", 0) && str.find ("match", 0) &&
-               str.find ("tokens", 0) && str.find ("names", 0))
+      else if (str.find ("start", 0) && str.find ("end", 0)
+               && str.find ("tokenextents", 0) && str.find ("match", 0)
+               && str.find ("tokens", 0) && str.find ("names", 0)
+               && str.find ("split", 0))
         error ("%s: unrecognized option", nm.c_str ());
     }
 
@@ -524,6 +525,8 @@
       octave_idx_type i = 0;
       octave_scalar_map nmap;
 
+      retval.resize (7);
+
       if (sz == 1)
         {
           for (int j = 0; j < named.length (); j++)
@@ -547,78 +550,64 @@
           retval(5) = nmap;
         }
 
+      std::string buffer = args(0).string_value ();
+
       if (once)
-        retval(4) = sz ? lst.front ().t : Cell ();
+        {
+          retval(4) = sz ? lst.front ().t : Cell ();
+          retval(3) = sz ? lst.front ().m : std::string ();
+          retval(2) = sz ? lst.front ().te : Matrix ();
+
+          if (sz)
+            {
+              double e = lst.front ().e;
+              double s = lst.front ().s;
+
+              Cell sp (dim_vector (1, 2));
+              sp(0) = buffer.substr (0, s-1);
+              sp(1) = buffer.substr (e);
+
+              retval(6) = sp;
+              retval(1) = e;
+              retval(0) = s;
+            }
+          else
+            {
+              retval(6) = buffer;
+              retval(1) = Matrix ();
+              retval(0) = Matrix ();
+            }
+        }
       else
         {
           Cell t (dim_vector (1, sz));
-
-          i = 0;
-          for (const_iterator p = lst.begin (); p != lst.end (); p++)
-            t(i++) = p->t;
-
-          retval(4) = t;
-        }
-
-      if (once)
-        retval(3) = sz ? lst.front ().m : std::string ();
-      else
-        {
           Cell m (dim_vector (1, sz));
-
-          i = 0;
-          for (const_iterator p = lst.begin (); p != lst.end (); p++)
-            m(i++) = p->m;
-
-          retval(3) = m;
-        }
-
-      if (once)
-        retval(2) = sz ? lst.front ().te : Matrix ();
-      else
-        {
           Cell te (dim_vector (1, sz));
+          NDArray e (dim_vector (1, sz));
+          NDArray s (dim_vector (1, sz));
+          Cell sp (dim_vector (1, sz+1));
+          size_t sp_start = 0;
 
           i = 0;
           for (const_iterator p = lst.begin (); p != lst.end (); p++)
-            te(i++) = p->te;
-
-          retval(2) = te;
-        }
-
-      if (once)
-        {
-          if (sz)
-            retval(1) = lst.front ().e;
-          else
-            retval(1) = Matrix ();
-        }
-      else
-        {
-          NDArray e (dim_vector (1, sz));
+            {
+              t(i) = p->t;
+              m(i) = p->m;
+              te(i) = p->te;
+              e(i) = p->e;
+              s(i) = p->s;
+              sp(i) = buffer.substr (sp_start, p->s-sp_start-1);
+              sp_start = p->e;
+              i++;
+            }
 
-          i = 0;
-          for (const_iterator p = lst.begin (); p != lst.end (); p++)
-            e(i++) = p->e;
-
-          retval(1) = e;
-        }
+          sp(i) = buffer.substr (sp_start);
 
-      if (once)
-        {
-          if (sz)
-            retval(0) = lst.front ().s;
-          else
-            retval(0) = Matrix ();
-        }
-      else
-        {
-          NDArray s (dim_vector (1, sz));
-
-          i = 0;
-          for (const_iterator p = lst.begin (); p != lst.end (); p++)
-            s(i++) = p->s;
-
+          retval(6) = sp;
+          retval(4) = t;
+          retval(3) = m;
+          retval(2) = te;
+          retval(1) = e;
           retval(0) = s;
         }
 
@@ -661,6 +650,8 @@
                 k = 4;
               else if (str.find ("names", 0) == 0)
                 k = 5;
+              else if (str.find ("split", 0) == 0)
+                k = 6;
 
               new_retval(n++) = retval(k);
               arg_used[k] = true;
@@ -947,6 +938,8 @@
 A structure containing the text of each matched named token, with the name\n\
 being used as the fieldname.  A named token is denoted by\n\
 @code{(?<name>@dots{})}.\n\
+@item sp\n\
+A cell array of the text not returned by match.\n\
 @end table\n\
 \n\
 Particular output arguments, or the order of the output arguments, can be\n\
@@ -961,6 +954,7 @@
 @item @tab 'match'        @tab @var{m}  @tab\n\
 @item @tab 'tokens'       @tab @var{t}  @tab\n\
 @item @tab 'names'        @tab @var{nm} @tab\n\
+@item @tab 'split'        @tab @var{sp} @tab\n\
 @end multitable\n\
 \n\
 Additional arguments are summarized below.\n\
@@ -1200,6 +1194,45 @@
 %! assert(regexp("qit",'q(?=u*)','match'), {'q'})
 %! assert(regexp('thingamabob','(?<=a)b'), 9)
 
+## Tests for split option.
+%!shared str
+%! str = "foo bar foo";
+%!test
+%! [a, b] = regexp (str, "f..", "match", "split");
+%! assert (a, {"foo", "foo"});
+%! assert (b, {"", " bar ", ""});
+%!test
+%! [a, b] = regexp (str, "f..", "match", "split", "once");
+%! assert (a, "foo");
+%! assert (b, {"", " bar foo"});
+%!test
+%! [a, b] = regexp (str, "fx.", "match", "split");
+%! assert (a, cell (1, 0));
+%! assert (b, {"foo bar foo"});
+%!test
+%! [a, b] = regexp (str, "fx.", "match", "split", "once");
+%! assert (a, "");
+%! assert (b, "foo bar foo")
+
+%!shared str
+%! str = "foo bar";
+%!test
+%! [a, b] = regexp (str, "f..", "match", "split");
+%! assert (a, {"foo"});
+%! assert (b, {"", " bar"});
+%!test
+%! [a, b] = regexp (str, "b..", "match", "split");
+%! assert (a, {"bar"});
+%! assert (b, {"foo ", ""});
+%!test
+%! [a, b] = regexp (str, "x", "match", "split");
+%! assert (a, cell (1, 0));
+%! assert (b, {"foo bar"});
+%!test
+%! [a, b] = regexp (str, "[o]+", "match", "split");
+%! assert (a, {"oo"});
+%! assert (b, {"f", " bar"});
+
 */
 
 DEFUN_DLD (regexpi, args, nargout,
@@ -1395,7 +1428,7 @@
       const std::string opt = args(i).string_value ();
       if (opt != "tokenize" && opt != "start" && opt != "end"
           && opt != "tokenextents" && opt != "match" && opt != "tokens"
-          && opt != "names"  && opt != "warnings")
+          && opt != "names"  && opt != "split" && opt != "warnings")
         {
           regexpargs(len++) = args(i);
         }