aboutsummaryrefslogtreecommitdiff
path: root/cutl
diff options
context:
space:
mode:
Diffstat (limited to 'cutl')
-rw-r--r--cutl/re.hxx245
-rw-r--r--cutl/re/re.cxx175
2 files changed, 420 insertions, 0 deletions
diff --git a/cutl/re.hxx b/cutl/re.hxx
new file mode 100644
index 0000000..57fa258
--- /dev/null
+++ b/cutl/re.hxx
@@ -0,0 +1,245 @@
+// file : cutl/re.hxx
+// author : Boris Kolpackov <boris@codesynthesis.com>
+// copyright : Copyright (c) 2009-2011 Code Synthesis Tools CC
+// license : MIT; see accompanying LICENSE file
+
+#ifndef CUTL_RE_HXX
+#define CUTL_RE_HXX
+
+#include <string>
+#include <iosfwd> // std::ostream
+
+#include <cutl/exception.hxx>
+#include <cutl/details/export.hxx>
+
+namespace cutl
+{
+ namespace re
+ {
+ struct LIBCUTL_EXPORT format: exception
+ {
+ virtual
+ ~format () throw ();
+
+ format (std::string const& e, std::string const& d)
+ : regex_ (e), description_ (d)
+ {
+ }
+
+ std::string const&
+ regex () const
+ {
+ return regex_;
+ }
+
+ std::string const&
+ description () const
+ {
+ return description_;
+ }
+
+ virtual char const*
+ what () const throw ();
+
+ private:
+ std::string regex_;
+ std::string description_;
+ };
+
+ // Regular expression pattern.
+ //
+ struct LIBCUTL_EXPORT regex
+ {
+ ~regex ();
+
+ regex ()
+ : impl_ (0)
+ {
+ init (0);
+ }
+
+ explicit
+ regex (std::string const& s)
+ : impl_ (0)
+ {
+ init (&s);
+ }
+
+ regex&
+ operator= (std::string const& s)
+ {
+ init (&s);
+ return *this;
+ }
+
+ regex (regex const&);
+
+ regex&
+ operator= (regex const&);
+
+ public:
+ bool
+ match (std::string const&) const;
+
+ bool
+ search (std::string const&) const;
+
+ std::string
+ replace (std::string const& s,
+ std::string const& sub,
+ bool first_only = false) const;
+
+ public:
+ std::string
+ str () const;
+
+ bool
+ empty () const;
+
+ private:
+ void
+ init (std::string const*);
+
+ private:
+ struct impl;
+ impl* impl_;
+ };
+
+ LIBCUTL_EXPORT std::ostream&
+ operator<< (std::ostream&, regex const&);
+
+ // Regular expression pattern and substituation.
+ //
+ struct LIBCUTL_EXPORT regexsub
+ {
+ typedef re::regex regex_type;
+
+ regexsub ()
+ {
+ }
+
+ // Expression is of the form /regex/substitution/ where '/' can
+ // be replaced with any delimiter. Delimiters must be escaped in
+ // regex and substitution using back slashes (e.g., "\/"). Back
+ // slashes themselves can be escaped using the double back slash
+ // sequence (e.g., "\\").
+ //
+ explicit
+ regexsub (std::string const& e)
+ {
+ init (e);
+ }
+
+ regexsub (std::string const& regex, std::string const& sub)
+ : regex_ (regex), sub_ (sub)
+ {
+ }
+
+ regexsub (regex_type const& regex, std::string const& sub)
+ : regex_ (regex), sub_ (sub)
+ {
+ }
+
+ regexsub&
+ operator= (std::string const& e)
+ {
+ init (e);
+ return *this;
+ }
+
+ public:
+ bool
+ match (std::string const& s) const
+ {
+ return regex_.match (s);
+ }
+
+ bool
+ search (std::string const& s) const
+ {
+ return regex_.search (s);
+ }
+
+ std::string
+ replace (std::string const& s, bool first_only = false) const
+ {
+ return regex_.replace (s, sub_, first_only);
+ }
+
+ public:
+ const regex_type&
+ regex () const
+ {
+ return regex_;
+ }
+
+ const std::string&
+ substitution () const
+ {
+ return sub_;
+ }
+
+ bool
+ empty () const
+ {
+ return sub_.empty () && regex_.empty ();
+ }
+
+ private:
+ void
+ init (std::string const&);
+
+ private:
+ regex_type regex_;
+ std::string sub_;
+ };
+
+ // Once-off regex execution.
+ //
+ inline bool
+ match (std::string const& s, std::string const& regex)
+ {
+ re::regex r (regex);
+ return r.match (s);
+ }
+
+ inline bool
+ search (std::string const& s, std::string const& regex)
+ {
+ re::regex r (regex);
+ return r.search (s);
+ }
+
+ inline std::string
+ replace (std::string const& s,
+ std::string const& regex,
+ std::string const& sub,
+ bool first_only = false)
+ {
+ re::regex r (regex);
+ return r.replace (s, sub, first_only);
+ }
+
+ inline std::string
+ replace (std::string const& s,
+ std::string const& regexsub, // /regex/subst/
+ bool first_only = false)
+ {
+ re::regexsub r (regexsub);
+ return r.replace (s, first_only);
+ }
+
+ // Utility function for parsing expressions in the form /regex/subst/
+ // where '/' can be replaced with any delimiter. This function handles
+ // escaping. It return the position of the next delimiter and stores
+ // the unescaped chunk in result or throws the format exception if
+ // the expression is invalid.
+ //
+ LIBCUTL_EXPORT std::string::size_type
+ parse (std::string const& s,
+ std::string::size_type start,
+ std::string& result);
+ }
+}
+
+#endif // CUTL_RE_HXX
diff --git a/cutl/re/re.cxx b/cutl/re/re.cxx
new file mode 100644
index 0000000..9cd2f2e
--- /dev/null
+++ b/cutl/re/re.cxx
@@ -0,0 +1,175 @@
+// file : cutl/re/re.cxx
+// author : Boris Kolpackov <boris@codesynthesis.com>
+// copyright : Copyright (c) 2009-2011 Code Synthesis Tools CC
+// license : MIT; see accompanying LICENSE file
+
+#include <ostream>
+
+#include <cutl/re.hxx>
+
+#include <cutl/details/boost/tr1/regex.hpp>
+
+using namespace std;
+
+namespace cutl
+{
+ namespace re
+ {
+ //
+ // format
+ //
+
+ format::
+ ~format () throw ()
+ {
+ }
+
+ char const* format::
+ what () const throw ()
+ {
+ return description_.c_str ();
+ }
+
+ //
+ // regex
+ //
+ struct regex::impl
+ {
+ impl () {}
+ impl (string const& s): r (s, tr1::regex_constants::ECMAScript) {}
+ impl (tr1::regex const& r): r (r) {}
+
+ tr1::regex r;
+ };
+
+ regex::
+ ~regex ()
+ {
+ delete impl_;
+ }
+
+ regex::
+ regex (regex const& r)
+ : impl_ (new impl (r.impl_->r))
+ {
+ }
+
+ regex& regex::
+ operator= (regex const& r)
+ {
+ impl_->r = r.impl_->r;
+ return *this;
+ }
+
+ void regex::
+ init (string const* s)
+ {
+ try
+ {
+ if (impl_ == 0)
+ impl_ = s == 0 ? new impl : new impl (*s);
+ else
+ impl_->r = *s;
+ }
+ catch (tr1::regex_error const& e)
+ {
+ throw format (s == 0 ? "" : *s, e.what ());
+ }
+ }
+
+ bool regex::
+ match (string const& s) const
+ {
+ return tr1::regex_match (s, impl_->r);
+ }
+
+ bool regex::
+ search (string const& s) const
+ {
+ return tr1::regex_search (s, impl_->r);
+ }
+
+ string regex::
+ replace (string const& s, string const& sub, bool first_only) const
+ {
+ tr1::regex_constants::match_flag_type f (
+ tr1::regex_constants::format_default);
+
+ if (first_only)
+ f |= tr1::regex_constants::format_first_only;
+
+ return regex_replace (s, impl_->r, sub, f);
+ }
+
+ string regex::
+ str () const
+ {
+ return impl_->r.str ();
+ }
+
+ bool regex::
+ empty () const
+ {
+ return impl_->r.empty ();
+ }
+
+ ostream&
+ operator<< (ostream& os, regex const& r)
+ {
+ return os << r.str ().c_str ();
+ }
+
+ //
+ // regexsub
+ //
+ void regexsub::
+ init (string const& s)
+ {
+ string r;
+ string::size_type p (parse (s, 0, r));
+ regex_ = r;
+ p = parse (s, p, sub_);
+ if (p + 1 < s.size ())
+ throw format (s, "junk after third delimiter");
+ }
+
+ //
+ // parse()
+ //
+ string::size_type
+ parse (string const& s, string::size_type p, string& r)
+ {
+ r.clear ();
+ string::size_type n (s.size ());
+
+ if (p >= n)
+ throw format (s, "empty expression");
+
+ char d (s[p++]);
+
+ for (; p < n; ++p)
+ {
+ if (s[p] == d)
+ break;
+
+ if (s[p] == '\\')
+ {
+ if (++p < n)
+ {
+ if (s[p] != d && s[p] != '\\')
+ r += '\\';
+ r += s[p];
+ }
+ // else {we ran out of stuff before finding the delimiter}
+ }
+ else
+ r += s[p];
+ }
+
+ if (p == n)
+ throw format (s, "missing closing delimiter");
+
+ return p;
+ }
+ }
+}