Mercurial > octave
diff liboctave/regexp.h @ 14024:fc9f204faea0
refactor regexp (bug #34440)
* liboctave/regexp.h, liboctave/regexp.cc: New files.
Provide classes and functions for regular expressions.
Adapted from src/DLD-FUNCTIONS/regexp.cc.
* regex-match.h, regex-match.cc: Delete
* liboctave/Makefile.am (INCS, LIBOCTAVE_CXX_SOURCES): Update.
* variables.cc (name_matches_any_pattern): Use new regexp class.
* symtab.h (symbol_table::regexp_global_variables,
symbol_table::do_clear_variable_regexp, symbol_table::do_regexp):
Likewise.
* DLD-FUNCTIONS/regexp.cc (parse_options): New function.
(octregexp, octcellregexp, octregexprep): Extract matching code for
use in new regexp class. Use new regexp class to provide required
functionality.
author | John W. Eaton <jwe@octave.org> |
---|---|
date | Sun, 11 Dec 2011 22:19:57 -0500 |
parents | liboctave/regex-match.h@12df7854fa7c |
children | 72c96de7a403 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/regexp.h Sun Dec 11 22:19:57 2011 -0500 @@ -0,0 +1,281 @@ +/* + +Copyright (C) 2011 John W. Eaton +Copyright (C) 2005-2011 David Bateman + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +<http://www.gnu.org/licenses/>. + +*/ + +#if !defined (octave_regexp_match_h) +#define octave_regexp_match_h 1 + +#include <list> +#include <sstream> +#include <string> + +#include "Array.h" +#include "Matrix.h" +#include "base-list.h" +#include "str-vec.h" + +class regexp +{ +public: + + class opts; + class match_data; + + regexp (const std::string& pat = "", + const regexp::opts& opt = regexp::opts (), + const std::string& w = "regexp") + : pattern (pat), options (opt), data (0), named_pats (), + nnames (0), named_idx (), who (w) + { + compile_internal (); + } + + regexp (const regexp& rx) + : pattern (rx.pattern), data (rx.data), named_pats (rx.named_pats), + nnames (rx.nnames), named_idx (rx.named_idx) + { } + + regexp& operator = (const regexp& rx) + { + if (this != &rx) + { + pattern = rx.pattern; + data = rx.data; + named_pats = rx.named_pats; + nnames = rx.nnames; + named_idx = rx.named_idx; + } + + return *this; + } + + ~regexp (void) { free (); } + + void compile (const std::string& pat, + const regexp::opts& opt = regexp::opts ()) + { + pattern = pat; + options = opt; + compile_internal (); + } + + match_data match (const std::string& buffer); + + bool is_match (const std::string& buffer); + + Array<bool> is_match (const string_vector& buffer); + + std::string replace (const std::string& buffer, + const std::string& replacement); + + struct opts + { + public: + + opts (void) + : x_case_insensitive (false), x_dotexceptnewline (false), + x_freespacing (false), x_lineanchors (false), x_once (false) { } + + opts (const opts& o) + : x_case_insensitive (o.x_case_insensitive), + x_dotexceptnewline (o.x_dotexceptnewline), + x_freespacing (o.x_freespacing), + x_lineanchors (o.x_lineanchors), + x_once (o.x_once) + { } + + opts& operator = (const opts& o) + { + if (this != &o) + { + x_case_insensitive = o.x_case_insensitive; + x_dotexceptnewline = o.x_dotexceptnewline; + x_freespacing = o.x_freespacing; + x_lineanchors = o.x_lineanchors; + x_once = o.x_once; + } + + return *this; + } + + ~opts (void) { } + + void case_insensitive (bool val) { x_case_insensitive = val; } + void dotexceptnewline (bool val) { x_dotexceptnewline = val; } + void freespacing (bool val) { x_freespacing = val; } + void lineanchors (bool val) { x_lineanchors = val; } + void once (bool val) { x_once = val; } + + bool case_insensitive (void) const { return x_case_insensitive; } + bool dotexceptnewline (void) const { return x_dotexceptnewline; } + bool freespacing (void) const { return x_freespacing; } + bool lineanchors (void) const { return x_lineanchors; } + bool once (void) const { return x_once; } + + private: + + bool x_case_insensitive; + bool x_dotexceptnewline; + bool x_freespacing; + bool x_lineanchors; + bool x_once; + }; + + class match_element + { + public: + + match_element (const string_vector& nt, const string_vector& t, + const std::string& ms, const Matrix& te, + double s, double e) + : x_match_string (ms), x_named_tokens (nt), x_tokens (t), + x_token_extents (te), x_start (s), x_end (e) + { } + + match_element (const match_element &a) + : x_match_string (a.x_match_string), + x_named_tokens (a.x_named_tokens), x_tokens (a.x_tokens), + x_token_extents (a.x_token_extents), + x_start (a.x_start), x_end (a.x_end) + { } + + std::string match_string (void) const { return x_match_string; } + string_vector named_tokens (void) const { return x_named_tokens; } + string_vector tokens (void) const { return x_tokens; } + Matrix token_extents (void) const { return x_token_extents; } + double start (void) const { return x_start; } + double end (void) const { return x_end; } + + private: + + std::string x_match_string; + string_vector x_named_tokens; + string_vector x_tokens; + Matrix x_token_extents; + double x_start; + double x_end; + }; + + class match_data : public octave_base_list<match_element> + { + public: + + match_data (void) + : octave_base_list<match_element> (), named_pats () + { } + + match_data (const std::list<match_element>& l, const string_vector& np) + : octave_base_list<match_element> (l), named_pats (np) + { } + + match_data (const match_data& rx_lst) + : octave_base_list<match_element> (rx_lst), + named_pats (rx_lst.named_pats) + { } + + match_data& operator = (const match_data& rx_lst) + { + if (this != &rx_lst) + { + octave_base_list<match_element>::operator = (rx_lst); + named_pats = rx_lst.named_pats; + } + + return *this; + } + + ~match_data (void) { } + + string_vector named_patterns (void) { return named_pats; } + + private: + + string_vector named_pats; + }; + +private: + + // The pattern we've been asked to match. + std::string pattern; + + opts options; + + // Internal data describing the regular expression. + void *data; + + std::string m; + string_vector named_pats; + int nnames; + Array<int> named_idx; + std::string who; + + void free (void); + + void compile_internal (void); +}; + +inline regexp::match_data +regexp_match (const std::string& pat, + const std::string& buffer, + const regexp::opts& opt = regexp::opts (), + const std::string& who = "regexp") +{ + regexp rx (pat, opt, who); + + return rx.match (buffer); +} + +inline bool +is_regexp_match (const std::string& pat, + const std::string& buffer, + const regexp::opts& opt = regexp::opts (), + const std::string& who = "regexp") +{ + regexp rx (pat, opt, who); + + return rx.is_match (buffer); +} + +inline Array<bool> +is_regexp_match (const std::string& pat, + const string_vector& buffer, + const regexp::opts& opt = regexp::opts (), + const std::string& who = "regexp") +{ + regexp rx (pat, opt, who); + + return rx.is_match (buffer); +} + +inline std::string +regexp_replace (const std::string& pat, + const std::string& buffer, + const std::string& replacement, + const regexp::opts& opt = regexp::opts (), + const std::string& who = "regexp") +{ + regexp rx (pat, opt, who); + + return rx.replace (buffer, replacement); +} + +#endif