From da17cddd8fe5ab873de06eede24a6faa13e5eab9 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Tue, 5 Jul 2011 18:08:01 +0200 Subject: Actual regex implementation --- cutl/re.hxx | 245 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ cutl/re/re.cxx | 175 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 420 insertions(+) create mode 100644 cutl/re.hxx create mode 100644 cutl/re/re.cxx (limited to 'cutl') diff --git a/cutl/re.hxx b/cutl/re.hxx new file mode 100644 index 0000000..57fa258 --- /dev/null +++ b/cutl/re.hxx @@ -0,0 +1,245 @@ +// file : cutl/re.hxx +// author : Boris Kolpackov +// copyright : Copyright (c) 2009-2011 Code Synthesis Tools CC +// license : MIT; see accompanying LICENSE file + +#ifndef CUTL_RE_HXX +#define CUTL_RE_HXX + +#include +#include // std::ostream + +#include +#include + +namespace cutl +{ + namespace re + { + struct LIBCUTL_EXPORT format: exception + { + virtual + ~format () throw (); + + format (std::string const& e, std::string const& d) + : regex_ (e), description_ (d) + { + } + + std::string const& + regex () const + { + return regex_; + } + + std::string const& + description () const + { + return description_; + } + + virtual char const* + what () const throw (); + + private: + std::string regex_; + std::string description_; + }; + + // Regular expression pattern. + // + struct LIBCUTL_EXPORT regex + { + ~regex (); + + regex () + : impl_ (0) + { + init (0); + } + + explicit + regex (std::string const& s) + : impl_ (0) + { + init (&s); + } + + regex& + operator= (std::string const& s) + { + init (&s); + return *this; + } + + regex (regex const&); + + regex& + operator= (regex const&); + + public: + bool + match (std::string const&) const; + + bool + search (std::string const&) const; + + std::string + replace (std::string const& s, + std::string const& sub, + bool first_only = false) const; + + public: + std::string + str () const; + + bool + empty () const; + + private: + void + init (std::string const*); + + private: + struct impl; + impl* impl_; + }; + + LIBCUTL_EXPORT std::ostream& + operator<< (std::ostream&, regex const&); + + // Regular expression pattern and substituation. + // + struct LIBCUTL_EXPORT regexsub + { + typedef re::regex regex_type; + + regexsub () + { + } + + // Expression is of the form /regex/substitution/ where '/' can + // be replaced with any delimiter. Delimiters must be escaped in + // regex and substitution using back slashes (e.g., "\/"). Back + // slashes themselves can be escaped using the double back slash + // sequence (e.g., "\\"). + // + explicit + regexsub (std::string const& e) + { + init (e); + } + + regexsub (std::string const& regex, std::string const& sub) + : regex_ (regex), sub_ (sub) + { + } + + regexsub (regex_type const& regex, std::string const& sub) + : regex_ (regex), sub_ (sub) + { + } + + regexsub& + operator= (std::string const& e) + { + init (e); + return *this; + } + + public: + bool + match (std::string const& s) const + { + return regex_.match (s); + } + + bool + search (std::string const& s) const + { + return regex_.search (s); + } + + std::string + replace (std::string const& s, bool first_only = false) const + { + return regex_.replace (s, sub_, first_only); + } + + public: + const regex_type& + regex () const + { + return regex_; + } + + const std::string& + substitution () const + { + return sub_; + } + + bool + empty () const + { + return sub_.empty () && regex_.empty (); + } + + private: + void + init (std::string const&); + + private: + regex_type regex_; + std::string sub_; + }; + + // Once-off regex execution. + // + inline bool + match (std::string const& s, std::string const& regex) + { + re::regex r (regex); + return r.match (s); + } + + inline bool + search (std::string const& s, std::string const& regex) + { + re::regex r (regex); + return r.search (s); + } + + inline std::string + replace (std::string const& s, + std::string const& regex, + std::string const& sub, + bool first_only = false) + { + re::regex r (regex); + return r.replace (s, sub, first_only); + } + + inline std::string + replace (std::string const& s, + std::string const& regexsub, // /regex/subst/ + bool first_only = false) + { + re::regexsub r (regexsub); + return r.replace (s, first_only); + } + + // Utility function for parsing expressions in the form /regex/subst/ + // where '/' can be replaced with any delimiter. This function handles + // escaping. It return the position of the next delimiter and stores + // the unescaped chunk in result or throws the format exception if + // the expression is invalid. + // + LIBCUTL_EXPORT std::string::size_type + parse (std::string const& s, + std::string::size_type start, + std::string& result); + } +} + +#endif // CUTL_RE_HXX diff --git a/cutl/re/re.cxx b/cutl/re/re.cxx new file mode 100644 index 0000000..9cd2f2e --- /dev/null +++ b/cutl/re/re.cxx @@ -0,0 +1,175 @@ +// file : cutl/re/re.cxx +// author : Boris Kolpackov +// copyright : Copyright (c) 2009-2011 Code Synthesis Tools CC +// license : MIT; see accompanying LICENSE file + +#include + +#include + +#include + +using namespace std; + +namespace cutl +{ + namespace re + { + // + // format + // + + format:: + ~format () throw () + { + } + + char const* format:: + what () const throw () + { + return description_.c_str (); + } + + // + // regex + // + struct regex::impl + { + impl () {} + impl (string const& s): r (s, tr1::regex_constants::ECMAScript) {} + impl (tr1::regex const& r): r (r) {} + + tr1::regex r; + }; + + regex:: + ~regex () + { + delete impl_; + } + + regex:: + regex (regex const& r) + : impl_ (new impl (r.impl_->r)) + { + } + + regex& regex:: + operator= (regex const& r) + { + impl_->r = r.impl_->r; + return *this; + } + + void regex:: + init (string const* s) + { + try + { + if (impl_ == 0) + impl_ = s == 0 ? new impl : new impl (*s); + else + impl_->r = *s; + } + catch (tr1::regex_error const& e) + { + throw format (s == 0 ? "" : *s, e.what ()); + } + } + + bool regex:: + match (string const& s) const + { + return tr1::regex_match (s, impl_->r); + } + + bool regex:: + search (string const& s) const + { + return tr1::regex_search (s, impl_->r); + } + + string regex:: + replace (string const& s, string const& sub, bool first_only) const + { + tr1::regex_constants::match_flag_type f ( + tr1::regex_constants::format_default); + + if (first_only) + f |= tr1::regex_constants::format_first_only; + + return regex_replace (s, impl_->r, sub, f); + } + + string regex:: + str () const + { + return impl_->r.str (); + } + + bool regex:: + empty () const + { + return impl_->r.empty (); + } + + ostream& + operator<< (ostream& os, regex const& r) + { + return os << r.str ().c_str (); + } + + // + // regexsub + // + void regexsub:: + init (string const& s) + { + string r; + string::size_type p (parse (s, 0, r)); + regex_ = r; + p = parse (s, p, sub_); + if (p + 1 < s.size ()) + throw format (s, "junk after third delimiter"); + } + + // + // parse() + // + string::size_type + parse (string const& s, string::size_type p, string& r) + { + r.clear (); + string::size_type n (s.size ()); + + if (p >= n) + throw format (s, "empty expression"); + + char d (s[p++]); + + for (; p < n; ++p) + { + if (s[p] == d) + break; + + if (s[p] == '\\') + { + if (++p < n) + { + if (s[p] != d && s[p] != '\\') + r += '\\'; + r += s[p]; + } + // else {we ran out of stuff before finding the delimiter} + } + else + r += s[p]; + } + + if (p == n) + throw format (s, "missing closing delimiter"); + + return p; + } + } +} -- cgit v1.1