changeset 17614:ab0ca58c72b1

exclude: add support for posix regexps This commit adds support for POSIX extended regular expressions and fixes a long-standing memory leak (pattern buffer was never freed). It also implements a new interface function to read exclude patterns from a FILE, which passes an additional parameter to its callback function, thereby allowing to preserve its state between invocations. * lib/exclude.c (struct patopts): Pack regex and pattern into union. (pattern_buffer): New struct. (exclude): New member patbuf. (exclude_add_pattern_buffer): New function. (free_exclude_segment): Free regexps. (free_exclude): Free allocated pattern buffers. (exclude_patopts): New function. (file_pattern_matches): Use exclude_patopts. (add_exclude): support regexps. (add_exclude_fp): New function. (add_exclude_file): Rewrite using add_exclude_fp. (fnmatch_pattern_has_wildcards): Support posix extended regexps. * lib/exclude.h (EXCLUDE_REGEX, EXCLUDE_ALLOC): New flags. (add_exclude_fp) (exclude_add_pattern_buffer): New prototypes. * modules/exclude: Depends on regex and filename.
author Sergey Poznyakoff <gray@gnu.org.ua>
date Sat, 15 Feb 2014 19:21:04 +0200
parents c18fb7a002f4
children 621c25ada8d5
files ChangeLog lib/exclude.c lib/exclude.h modules/exclude
diffstat 4 files changed, 207 insertions(+), 33 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog	Thu Feb 20 06:56:42 2014 -0700
+++ b/ChangeLog	Sat Feb 15 19:21:04 2014 +0200
@@ -1,3 +1,35 @@
+2014-02-21  Sergey Poznyakoff  <gray@gnu.org.ua>
+
+	exclude: add support for posix regexps
+
+	This commit adds support for POSIX extended regular expressions
+	and fixes a long-standing memory leak (pattern buffer was never
+	freed).  It also implements a new interface function to read
+	exclude patterns from a FILE, which passes an additional parameter
+	to its callback function, thereby allowing to preserve its state
+	between invocations.
+
+	* lib/exclude.c (struct patopts): Pack regex and pattern into union.
+	(pattern_buffer): New struct.
+	(exclude): New member patbuf.
+	(exclude_add_pattern_buffer): New function.
+	(free_exclude_segment): Free regexps.
+	(free_exclude): Free allocated pattern buffers.
+	(exclude_patopts): New function.
+	(file_pattern_matches): Use exclude_patopts.
+	(add_exclude): support regexps.
+	(add_exclude_fp): New function.
+	(add_exclude_file): Rewrite using add_exclude_fp.
+	(fnmatch_pattern_has_wildcards): Support posix extended regexps.
+	* lib/exclude.h (EXCLUDE_REGEX, EXCLUDE_ALLOC): New flags.
+	(add_exclude_fp)
+	(add_exclude_file): Rewrite using add_exclude_fp.
+	(fnmatch_pattern_has_wildcards): Support posix extended regexps.
+	* lib/exclude.h (EXCLUDE_REGEX, EXCLUDE_ALLOC): New flags.
+	(add_exclude_fp)
+	(exclude_add_pattern_buffer): New prototypes.
+	* modules/exclude: Depends on regex and filename.
+
 2014-02-20  Eric Blake  <eblake@redhat.com>
 
 	maintainer-makefiles: use $(SED) for syntax check
--- a/lib/exclude.c	Thu Feb 20 06:56:42 2014 -0700
+++ b/lib/exclude.c	Sat Feb 15 19:21:04 2014 +0200
@@ -32,6 +32,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <wctype.h>
+#include <regex.h>
 
 #include "exclude.h"
 #include "hash.h"
@@ -39,6 +40,7 @@
 #include "fnmatch.h"
 #include "xalloc.h"
 #include "verify.h"
+#include "filename.h"
 
 #if USE_UNLOCKED_IO
 # include "unlocked-io.h"
@@ -73,8 +75,12 @@
 
 struct patopts
   {
-    char const *pattern;
     int options;
+    union
+    {
+      char const *pattern;
+      regex_t re;
+    } v;
   };
 
 /* An array of pattern-options pairs.  */
@@ -104,13 +110,33 @@
     } v;
   };
 
+struct pattern_buffer
+  {
+    struct pattern_buffer *next;
+    char *base;
+  };
+
 /* The exclude structure keeps a singly-linked list of exclude segments,
    maintained in reverse order.  */
 struct exclude
   {
     struct exclude_segment *head;
+    struct pattern_buffer *patbuf;
   };
 
+/* Register BUF in the pattern buffer list of EX.  ADD_FUNC (see
+   add_exclude_file and add_exclude_fp below) can use this function
+   if it modifies the pattern, to ensure the allocated memory will be
+   properly reclaimed upon calling free_exclude. */
+void
+exclude_add_pattern_buffer (struct exclude *ex, char *buf)
+{
+  struct pattern_buffer *pbuf = xmalloc (sizeof *pbuf);
+  pbuf->base = buf;
+  pbuf->next = ex->patbuf;
+  ex->patbuf = pbuf;
+}
+
 /* Return true if STR has or may have wildcards, when matched with OPTIONS.
    Return false if STR definitely does not have wildcards.  */
 bool
@@ -120,8 +146,20 @@
     {
       switch (*str++)
         {
+	case '.':
+	case '{':
+	case '}':
+	case '(':
+	case ')':
+	  if (options & EXCLUDE_REGEX)
+	    return true;
+	  break;
+
         case '\\':
-          str += ! (options & FNM_NOESCAPE) && *str;
+	  if (options & EXCLUDE_REGEX)
+	    continue;
+	  else
+	    str += ! (options & FNM_NOESCAPE) && *str;
           break;
 
         case '+': case '@': case '!':
@@ -243,9 +281,16 @@
 static void
 free_exclude_segment (struct exclude_segment *seg)
 {
+  size_t i;
+
   switch (seg->type)
     {
     case exclude_pattern:
+      for (i = 0; i < seg->v.pat.exclude_count; i++)
+	{
+	  if (seg->v.pat.exclude[i].options & EXCLUDE_REGEX)
+	    regfree (&seg->v.pat.exclude[i].v.re);
+	}
       free (seg->v.pat.exclude);
       break;
 
@@ -261,12 +306,23 @@
 free_exclude (struct exclude *ex)
 {
   struct exclude_segment *seg;
+  struct pattern_buffer *pbuf;
+
   for (seg = ex->head; seg; )
     {
       struct exclude_segment *next = seg->next;
       free_exclude_segment (seg);
       seg = next;
     }
+
+  for (pbuf = ex->patbuf; pbuf; )
+    {
+      struct pattern_buffer *next = pbuf->next;
+      free (pbuf->base);
+      free (pbuf);
+      pbuf = next;
+    }
+
   free (ex);
 }
 
@@ -331,11 +387,21 @@
   if (! (options & EXCLUDE_ANCHORED))
     for (p = f; *p && ! matched; p++)
       if (*p == '/' && p[1] != '/')
-        matched = ((*matcher) (pattern, p + 1, options) == 0);
+	matched = ((*matcher) (pattern, p + 1, options) == 0);
 
   return matched;
 }
 
+bool
+exclude_patopts (struct patopts const *opts, char const *f)
+{
+  int options = opts->options;
+
+  return (options & EXCLUDE_REGEX)
+          ? regexec (&opts->v.re, f, 0, NULL, 0) == 0
+          : exclude_fnmatch (opts->v.pattern, f, options);
+}
+
 /* Return true if the exclude_pattern segment SEG matches F.  */
 
 static bool
@@ -347,9 +413,7 @@
 
   for (i = 0; i < exclude_count; i++)
     {
-      char const *pattern = exclude[i].pattern;
-      int options = exclude[i].options;
-      if (exclude_fnmatch (pattern, f, options))
+      if (exclude_patopts (exclude + i, f))
         return true;
     }
   return false;
@@ -454,17 +518,17 @@
 add_exclude (struct exclude *ex, char const *pattern, int options)
 {
   struct exclude_segment *seg;
+  struct exclude_pattern *pat;
+  struct patopts *patopts;
 
-  if ((options & EXCLUDE_WILDCARDS)
+  if ((options & (EXCLUDE_REGEX|EXCLUDE_WILDCARDS))
       && fnmatch_pattern_has_wildcards (pattern, options))
     {
-      struct exclude_pattern *pat;
-      struct patopts *patopts;
+      if (! (ex->head && ex->head->type == exclude_pattern
+	     && ((ex->head->options & EXCLUDE_INCLUDE)
+		 == (options & EXCLUDE_INCLUDE))))
+	new_exclude_segment (ex, exclude_pattern, options);
 
-      if (! (ex->head && ex->head->type == exclude_pattern
-             && ((ex->head->options & EXCLUDE_INCLUDE)
-                 == (options & EXCLUDE_INCLUDE))))
-        new_exclude_segment (ex, exclude_pattern, options);
       seg = ex->head;
 
       pat = &seg->v.pat;
@@ -472,8 +536,51 @@
         pat->exclude = x2nrealloc (pat->exclude, &pat->exclude_alloc,
                                    sizeof *pat->exclude);
       patopts = &pat->exclude[pat->exclude_count++];
-      patopts->pattern = pattern;
+
       patopts->options = options;
+      if (options & EXCLUDE_REGEX)
+	{
+	  int rc;
+	  int cflags = REG_NOSUB|REG_EXTENDED|
+	               ((options & FNM_CASEFOLD) ? REG_ICASE : 0);
+
+	  if (options & FNM_LEADING_DIR)
+	    {
+	      char *tmp;
+	      size_t len = strlen (pattern);
+
+	      while (len > 0 && ISSLASH (pattern[len-1]))
+		--len;
+
+	      if (len == 0)
+		rc = 1;
+	      else
+		{
+		  tmp = xmalloc (len + 7);
+		  memcpy (tmp, pattern, len);
+		  strcpy (tmp + len, "(/.*)?");
+		  rc = regcomp (&patopts->v.re, tmp, cflags);
+		  free (tmp);
+		}
+	    }
+	  else
+	    rc = regcomp (&patopts->v.re, pattern, cflags);
+
+	  if (rc)
+	    {
+	      pat->exclude_count--;
+	      return;
+	    }
+	}
+      else
+	{
+	  if (options & EXCLUDE_ALLOC)
+	    {
+	      pattern = xstrdup (pattern);
+	      exclude_add_pattern_buffer (ex, (char*) pattern);
+	    }
+	  patopts->v.pattern = pattern;
+	}
     }
   else
     {
@@ -498,45 +605,39 @@
 /* Use ADD_FUNC to append to EX the patterns in FILE_NAME, each with
    OPTIONS.  LINE_END terminates each pattern in the file.  If
    LINE_END is a space character, ignore trailing spaces and empty
-   lines in FILE.  Return -1 on failure, 0 on success.  */
+   lines in FP.  Return -1 on failure, 0 on success.  */
 
 int
-add_exclude_file (void (*add_func) (struct exclude *, char const *, int),
-                  struct exclude *ex, char const *file_name, int options,
-                  char line_end)
+add_exclude_fp (void (*add_func) (struct exclude *, char const *, int, void *),
+		struct exclude *ex, FILE *fp, int options,
+		char line_end,
+		void *data)
 {
-  bool use_stdin = file_name[0] == '-' && !file_name[1];
-  FILE *in;
   char *buf = NULL;
   char *p;
-  char const *pattern;
+  char *pattern;
   char const *lim;
   size_t buf_alloc = 0;
   size_t buf_count = 0;
   int c;
   int e = 0;
 
-  if (use_stdin)
-    in = stdin;
-  else if (! (in = fopen (file_name, "r")))
-    return -1;
-
-  while ((c = getc (in)) != EOF)
+  while ((c = getc (fp)) != EOF)
     {
       if (buf_count == buf_alloc)
         buf = x2realloc (buf, &buf_alloc);
       buf[buf_count++] = c;
     }
 
-  if (ferror (in))
-    e = errno;
-
-  if (!use_stdin && fclose (in) != 0)
+  if (ferror (fp))
     e = errno;
 
   buf = xrealloc (buf, buf_count + 1);
   buf[buf_count] = line_end;
   lim = buf + buf_count + ! (buf_count == 0 || buf[buf_count - 1] == line_end);
+
+  exclude_add_pattern_buffer (ex, buf);
+
   pattern = buf;
 
   for (p = buf; p < lim; p++)
@@ -554,7 +655,7 @@
           }
 
         *pattern_end = '\0';
-        (*add_func) (ex, pattern, options);
+        (*add_func) (ex, pattern, options, data);
 
       next_pattern:
         pattern = p + 1;
@@ -563,3 +664,32 @@
   errno = e;
   return e ? -1 : 0;
 }
+
+static void
+call_addfn (struct exclude *ex, char const *pattern, int options, void *data)
+{
+  void (*addfn) (struct exclude *, char const *, int) = data;
+  addfn (ex, pattern, options);
+}
+
+int
+add_exclude_file (void (*add_func) (struct exclude *, char const *, int),
+		  struct exclude *ex, char const *file_name, int options,
+		  char line_end)
+{
+  bool use_stdin = file_name[0] == '-' && !file_name[1];
+  FILE *in;
+  int rc = 0;
+
+  if (use_stdin)
+    in = stdin;
+  else if (! (in = fopen (file_name, "r")))
+    return -1;
+
+  rc = add_exclude_fp (call_addfn, ex, in, options, line_end, add_func);
+
+  if (!use_stdin && fclose (in) != 0)
+    rc = -1;
+
+  return rc;
+}
--- a/lib/exclude.h	Thu Feb 20 06:56:42 2014 -0700
+++ b/lib/exclude.h	Sat Feb 15 19:21:04 2014 +0200
@@ -20,6 +20,7 @@
 #define _GL_EXCLUDE_H 1
 
 #include <stdbool.h>
+#include <stdio.h>
 
 /* Written by Paul Eggert <eggert@twinsun.com>
    and Sergey Poznyakoff <gray@gnu.org> */
@@ -37,6 +38,12 @@
    option, these characters are ordinary and fnmatch is not used.  */
 #define EXCLUDE_WILDCARDS (1 << 28)
 
+/* Patterns are POSIX extended regular expressions */
+#define EXCLUDE_REGEX     (1 << 27)
+
+/* Allocate storage for the pattern */
+#define EXCLUDE_ALLOC     (1 << 26)
+
 struct exclude;
 
 bool fnmatch_pattern_has_wildcards (const char *, int) _GL_ATTRIBUTE_PURE;
@@ -46,7 +53,10 @@
 void add_exclude (struct exclude *, char const *, int);
 int add_exclude_file (void (*) (struct exclude *, char const *, int),
                       struct exclude *, char const *, int, char);
+int add_exclude_fp (void (*) (struct exclude *, char const *, int, void *),
+		    struct exclude *, FILE *, int, char, void *);
 bool excluded_file_name (struct exclude const *, char const *);
-bool exclude_fnmatch (char const *pattern, char const *f, int options);
+void exclude_add_pattern_buffer (struct exclude *ex, char *buf);
+bool exclude_fnmatch (char const *, char const *, int);
 
 #endif /* _GL_EXCLUDE_H */
--- a/modules/exclude	Thu Feb 20 06:56:42 2014 -0700
+++ b/modules/exclude	Sat Feb 15 19:21:04 2014 +0200
@@ -6,10 +6,12 @@
 lib/exclude.c
 
 Depends-on:
+filename
 fnmatch
 hash
 mbscasecmp
 mbuiter
+regex
 stdbool
 verify
 xalloc