changeset 10518:fcafe0e9bd58

Handle repeated matches in matches returned by pcre
author David Bateman <dbateman@free.fr>
date Tue, 13 Apr 2010 23:48:01 +0200
parents 9cdd6c8c05a4
children f6959aff84ca
files src/ChangeLog src/DLD-FUNCTIONS/regexp.cc
diffstat 2 files changed, 30 insertions(+), 11 deletions(-) [+]
line wrap: on
line diff
--- a/src/ChangeLog	Tue Apr 13 20:26:08 2010 +0300
+++ b/src/ChangeLog	Tue Apr 13 23:48:01 2010 +0200
@@ -1,3 +1,8 @@
+2010-04-13  David Bateman  <dbateman@free.fr>
+
+	* DLD-FUNCTIONS/regexp.cc (octregexp_list): Handle repeated matches
+	in the list of matches returned by pcre.
+
 2010-04-13  Shai Ayal  <shaiay@users.sourceforge.net>
 
 	* DLD-FUNCTIONS/fltk_backend.cc (plot_window::set_currentpoint,
--- a/src/DLD-FUNCTIONS/regexp.cc	Tue Apr 13 20:26:08 2010 +0300
+++ b/src/DLD-FUNCTIONS/regexp.cc	Tue Apr 13 23:48:01 2010 +0200
@@ -429,13 +429,16 @@
               int pos_match = 0;
               Matrix te(matches-1,2);
               for (int i = 1; i < matches; i++)
-                {
-                  if (ovector[2*i] >= 0 && ovector[2*i+1] > 0)
+                if (ovector[2*i] >= 0 && ovector[2*i+1] > 0)
+                  if (i == 1 || ovector[2*i] != ovector[2*i-2]
+                      || ovector[2*i-1] != ovector[2*i+1])
                     {
-                      te(pos_match,0) = double (ovector[2*i]+1);
-                      te(pos_match++,1) = double (ovector[2*i+1]);
+                      if (ovector[2*i] >= 0 && ovector[2*i+1] > 0)
+                        {
+                          te(pos_match,0) = double (ovector[2*i]+1);
+                          te(pos_match++,1) = double (ovector[2*i+1]);
+                        }
                     }
-                }
               te.resize(pos_match,2);
               s = double (ovector[0]+1);
               e = double (ovector[1]);
@@ -452,19 +455,26 @@
               }
 
               Cell cell_t (dim_vector(1,pos_match));
+              string_vector named_tokens(nnames);
+              int pos_offset = 0;
               pos_match = 0;
               for (int i = 1; i < matches; i++)
                 if (ovector[2*i] >= 0 && ovector[2*i+1] > 0)
-                  cell_t(pos_match++) = std::string(*(listptr+i));
+                  if (i == 1 || ovector[2*i] != ovector[2*i-2]
+                      || ovector[2*i-1] != ovector[2*i+1])
+                    {
+                      if (namecount > 0)
+                        named_tokens(named_idx(i-pos_offset-1)) = 
+                          std::string(*(listptr+nidx[i-pos_offset-1]));    
+                      cell_t(pos_match++) = 
+                        std::string(*(listptr+i));
+                    }
+                  else
+                    pos_offset++;
 
               m =  std::string(*listptr);
               t = cell_t;
 
-              string_vector named_tokens(nnames);
-              if (namecount > 0)
-                for (int i = 0; i < pos_match; i++)
-                  named_tokens(named_idx(i)) = std::string(*(listptr+nidx[i]));
-
               pcre_free_substring_list(listptr);
 
               regexp_elem new_elem (named_tokens, t, m, te, s, e);
@@ -1131,6 +1141,10 @@
 %! assert (nm.last{1},'Davis');
 %! assert (nm.last{2},'Rogers');
 
+%!testif HAVE_PCRE
+%! # Parenthesis in named token (ie (int)) causes a problem
+%! assert (regexp('qwe int asd', ['(?<typestr>(int))'], 'names'), struct ('typestr', 'int'));
+
 %!assert(regexp("abc\nabc",'.'),[1:7])
 %!assert(regexp("abc\nabc",'.','dotall'),[1:7])
 %!testif HAVE_PCRE