view lib/regex-quote.c @ 37166:fe129ca366e4

regex-quote: fix buffer access out of bounds http://lists.gnu.org/archive/html/bug-gnulib/2013-09/msg00001.html * lib/regex-quote.c (regex_quote_spec_pcre): Fix typo that resulted in an out-of-bounds read.
author Anton Ovchinnikov <revolver112@gmail.com>
date Wed, 04 Sep 2013 17:09:39 -0700
parents c741bc27922a
children 344018b6e5d7
line wrap: on
line source

/* Construct a regular expression from a literal string.
   Copyright (C) 1995, 2010-2013 Free Software Foundation, Inc.
   Written by Bruno Haible <haible@clisp.cons.org>, 2010.

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */

#include <config.h>

/* Specification.  */
#include "regex-quote.h"

#include <string.h>

#include "mbuiter.h"
#include "xalloc.h"

/* Characters that are special in a BRE.  */
static const char bre_special[] = "$^.*[]\\";

/* Characters that are special in an ERE.  */
static const char ere_special[] = "$^.*[]\\+?{}()|";

struct regex_quote_spec
regex_quote_spec_posix (int cflags, bool anchored)
{
  struct regex_quote_spec result;

  strcpy (result.special, cflags != 0 ? ere_special : bre_special);
  result.multibyte = true;
  result.anchored = anchored;

  return result;
}

/* Syntax bit values, defined in GNU <regex.h>.  We don't include it here,
   otherwise this module would need to depend on gnulib module 'regex'.  */
#define RE_BK_PLUS_QM    0x00000002
#define RE_INTERVALS     0x00000200
#define RE_LIMITED_OPS   0x00000400
#define RE_NEWLINE_ALT   0x00000800
#define RE_NO_BK_BRACES  0x00001000
#define RE_NO_BK_PARENS  0x00002000
#define RE_NO_BK_VBAR    0x00008000

struct regex_quote_spec
regex_quote_spec_gnu (unsigned long /*reg_syntax_t*/ syntax, bool anchored)
{
  struct regex_quote_spec result;
  char *p;

  p = result.special;
  memcpy (p, bre_special, sizeof (bre_special) - 1);
  p += sizeof (bre_special) - 1;
  if ((syntax & RE_LIMITED_OPS) == 0 && (syntax & RE_BK_PLUS_QM) == 0)
    {
      *p++ = '+';
      *p++ = '?';
    }
  if ((syntax & RE_INTERVALS) != 0 && (syntax & RE_NO_BK_BRACES) != 0)
    {
      *p++ = '{';
      *p++ = '}';
    }
  if ((syntax & RE_NO_BK_PARENS) != 0)
    {
      *p++ = '(';
      *p++ = ')';
    }
  if ((syntax & RE_LIMITED_OPS) == 0 && (syntax & RE_NO_BK_VBAR) != 0)
    *p++ = '|';
  if ((syntax & RE_NEWLINE_ALT) != 0)
    *p++ = '\n';
  *p = '\0';

  result.multibyte = true;
  result.anchored = anchored;

  return result;
}

/* Characters that are special in a PCRE.  */
static const char pcre_special[] = "$^.*[]\\+?{}()|";

/* Options bit values, defined in <pcre.h>.  We don't include it here, because
   it is not a standard header.  */
#define PCRE_ANCHORED 0x00000010
#define PCRE_EXTENDED 0x00000008

struct regex_quote_spec
regex_quote_spec_pcre (int options, bool anchored)
{
  struct regex_quote_spec result;
  char *p;

  p = result.special;
  memcpy (p, pcre_special, sizeof (pcre_special) - 1);
  p += sizeof (pcre_special) - 1;
  if (options & PCRE_EXTENDED)
    {
      *p++ = ' ';
      *p++ = '\t';
      *p++ = '\n';
      *p++ = '\v';
      *p++ = '\f';
      *p++ = '\r';
      *p++ = '#';
    }
  *p = '\0';

  /* PCRE regular expressions consist of UTF-8 characters of options contains
     PCRE_UTF8 and of single bytes otherwise.  */
  result.multibyte = false;
  /* If options contains PCRE_ANCHORED, the anchoring is implicit.  */
  result.anchored = (options & PCRE_ANCHORED ? 0 : anchored);

  return result;
}

size_t
regex_quote_length (const char *string, const struct regex_quote_spec *spec)
{
  const char *special = spec->special;
  size_t length;

  length = 0;
  if (spec->anchored)
    length += 2; /* for '^' at the beginning and '$' at the end */
  if (spec->multibyte)
    {
      mbui_iterator_t iter;

      for (mbui_init (iter, string); mbui_avail (iter); mbui_advance (iter))
        {
          /* We know that special contains only ASCII characters.  */
          if (mb_len (mbui_cur (iter)) == 1
              && strchr (special, * mbui_cur_ptr (iter)))
            length += 1;
          length += mb_len (mbui_cur (iter));
        }
    }
  else
    {
      const char *iter;

      for (iter = string; *iter != '\0'; iter++)
        {
          if (strchr (special, *iter))
            length += 1;
          length += 1;
        }
    }

  return length;
}

char *
regex_quote_copy (char *p, const char *string, const struct regex_quote_spec *spec)
{
  const char *special = spec->special;

  if (spec->anchored)
    *p++ = '^';
  if (spec->multibyte)
    {
      mbui_iterator_t iter;

      for (mbui_init (iter, string); mbui_avail (iter); mbui_advance (iter))
        {
          /* We know that special contains only ASCII characters.  */
          if (mb_len (mbui_cur (iter)) == 1
              && strchr (special, * mbui_cur_ptr (iter)))
            *p++ = '\\';
          memcpy (p, mbui_cur_ptr (iter), mb_len (mbui_cur (iter)));
          p += mb_len (mbui_cur (iter));
        }
    }
  else
    {
      const char *iter;

      for (iter = string; *iter != '\0'; iter++)
        {
          if (strchr (special, *iter))
            *p++ = '\\';
          *p++ = *iter++;
        }
    }
  if (spec->anchored)
    *p++ = '$';

  return p;
}

char *
regex_quote (const char *string, const struct regex_quote_spec *spec)
{
  size_t length = regex_quote_length (string, spec);
  char *result = XNMALLOC (length + 1, char);
  char *p;

  p = result;
  p = regex_quote_copy (p, string, spec);
  *p = '\0';
  return result;
}