1 files changed, 434 insertions, 0 deletions
diff --git a/libcutl/re/re.cxx b/libcutl/re/re.cxx
new file mode 100644
index 0000000..0bcbe96
--- /dev/null
+++ b/libcutl/re/re.cxx
@@ -0,0 +1,434 @@
+// file      : libcutl/re/re.cxx
+// license   : MIT; see accompanying LICENSE file
+
+#include <libcutl/re.hxx>
+
+#include <regex>
+#include <locale>
+#include <cstddef> // size_t
+
+using namespace std;
+
+namespace cutl
+{
+  namespace re
+  {
+    //
+    // format_base
+    //
+
+    char const* format_base::
+    what () const noexcept
+    {
+      return description_.c_str ();
+    }
+
+    //
+    // basic_regex
+    //
+    template <typename C>
+    struct basic_regex<C>::impl
+    {
+      typedef basic_string<C> string_type;
+      typedef std::basic_regex<C> regex_type;
+      typedef typename regex_type::flag_type flag_type;
+
+      impl () {}
+      impl (regex_type const& r): r (r) {}
+      impl (string_type const& s, bool icase)
+      {
+        flag_type f (std::regex_constants::ECMAScript);
+
+        if (icase)
+          f |= std::regex_constants::icase;
+
+        r.assign (s, f);
+      }
+
+      regex_type r;
+    };
+
+    template <>
+    LIBCUTL_EXPORT basic_regex<char>::
+    ~basic_regex ()
+    {
+      delete impl_;
+    }
+
+    template <>
+    LIBCUTL_EXPORT basic_regex<wchar_t>::
+    ~basic_regex ()
+    {
+      delete impl_;
+    }
+
+    template <>
+    LIBCUTL_EXPORT basic_regex<char>::
+    basic_regex (basic_regex const& r)
+        : str_ (r.str_), impl_ (new impl (r.impl_->r))
+    {
+    }
+
+    template <>
+    LIBCUTL_EXPORT basic_regex<wchar_t>::
+    basic_regex (basic_regex const& r)
+        : str_ (r.str_), impl_ (new impl (r.impl_->r))
+    {
+    }
+
+    template <>
+    LIBCUTL_EXPORT basic_regex<char>& basic_regex<char>::
+    operator= (basic_regex const& r)
+    {
+      string_type tmp (r.str_);
+      impl_->r = r.impl_->r;
+      str_.swap (tmp);
+      return *this;
+    }
+
+    template <>
+    LIBCUTL_EXPORT basic_regex<wchar_t>& basic_regex<wchar_t>::
+    operator= (basic_regex const& r)
+    {
+      string_type tmp (r.str_);
+      impl_->r = r.impl_->r;
+      str_.swap (tmp);
+      return *this;
+    }
+
+    template <>
+    LIBCUTL_EXPORT void basic_regex<char>::
+    init (string_type const* s, bool icase)
+    {
+      string_type tmp (s == 0 ? string_type () : *s);
+
+      try
+      {
+        if (impl_ == 0)
+          impl_ = s == 0 ? new impl : new impl (*s, icase);
+        else
+        {
+          impl::flag_type f (std::regex_constants::ECMAScript);
+
+          if (icase)
+            f |= std::regex_constants::icase;
+
+          impl_->r.assign (*s, f);
+        }
+      }
+      catch (std::regex_error const& e)
+      {
+        throw basic_format<char> (s == 0 ? "" : *s, e.what ());
+      }
+
+      str_.swap (tmp);
+    }
+
+    template <>
+    LIBCUTL_EXPORT void basic_regex<wchar_t>::
+    init (string_type const* s, bool icase)
+    {
+      string_type tmp (s == 0 ? string_type () : *s);
+
+      try
+      {
+        if (impl_ == 0)
+          impl_ = s == 0 ? new impl : new impl (*s, icase);
+        else
+        {
+          impl::flag_type f (std::regex_constants::ECMAScript);
+
+          if (icase)
+            f |= std::regex_constants::icase;
+
+          impl_->r.assign (*s, f);
+        }
+      }
+      catch (std::regex_error const& e)
+      {
+        throw basic_format<wchar_t> (s == 0 ? L"" : *s, e.what ());
+      }
+
+      str_.swap (tmp);
+    }
+
+    template <>
+    LIBCUTL_EXPORT bool basic_regex<char>::
+    match (string_type const& s) const
+    {
+      return std::regex_match (s, impl_->r);
+    }
+
+    template <>
+    LIBCUTL_EXPORT bool basic_regex<wchar_t>::
+    match (string_type const& s) const
+    {
+      return std::regex_match (s, impl_->r);
+    }
+
+    template <>
+    LIBCUTL_EXPORT bool basic_regex<char>::
+    search (string_type const& s) const
+    {
+      return std::regex_search (s, impl_->r);
+    }
+
+    template <>
+    LIBCUTL_EXPORT bool basic_regex<wchar_t>::
+    search (string_type const& s) const
+    {
+      return std::regex_search (s, impl_->r);
+    }
+
+    // If we are using C++11 regex then extend the standard ECMA-262
+    // substitution escape sequences with a subset of Perl sequences:
+    //
+    // \\, \u, \l, \U, \L, \E, \1, ..., \9
+    //
+    // Notes and limitations:
+    //
+    // - The only valid regex_constants flags are match_default,
+    //   format_first_only (format_no_copy can easily be supported).
+    //
+    // - If backslash doesn't start any of the listed sequences then it is
+    //   silently dropped and the following character is copied as is.
+    //
+    // - The character case conversion is performed according to the global
+    //   C++ locale (which is, unless changed, is the same as C locale and
+    //   both default to the POSIX locale aka "C").
+    //
+    template <typename C>
+    static basic_string<C>
+    regex_replace_ex (const basic_string<C>& s,
+                      const std::basic_regex<C>& re,
+                      const basic_string<C>& fmt,
+                      std::regex_constants::match_flag_type flags)
+    {
+      using string_type = basic_string<C>;
+      using str_it      = typename string_type::const_iterator;
+      using regex_it    = regex_iterator<str_it>;
+
+      bool first_only ((flags & regex_constants::format_first_only) ==
+                       regex_constants::format_first_only);
+
+      locale cl; // Copy of the global C++ locale.
+      string_type r;
+
+      // Beginning of the last unmatched substring.
+      //
+      str_it ub (s.begin ());
+
+      for (regex_it b (s.begin (), s.end (), re, flags), i (b), e; i != e; ++i)
+      {
+        const match_results<str_it>& m (*i);
+
+        // Copy the preceeding unmatched substring, save the beginning of the
+        // one that follows.
+        //
+        r.append (ub, m.prefix ().second);
+        ub = m.suffix ().first;
+
+        if (first_only && i != b)
+          r.append (m[0].first, m[0].second); // Append matched substring.
+        else
+        {
+          // The standard implementation calls m.format() here. We perform our
+          // own formatting.
+          //
+          // Note that we are using char type literals with the assumption
+          // that being ASCII characters they will be properly "widened" to
+          // the corresponding literals of the C template parameter type.
+          //
+          auto digit = [] (C c) -> int
+          {
+            return c >= '0' && c <= '9' ? c - '0' : -1;
+          };
+
+          enum class case_conv {none, upper, lower, upper_once, lower_once}
+          mode (case_conv::none);
+
+          auto conv_chr = [&mode, &cl] (C c) -> C
+          {
+            switch (mode)
+            {
+            case case_conv::upper_once: mode = case_conv::none; // Fall through.
+            case case_conv::upper:      c = toupper (c, cl); break;
+            case case_conv::lower_once: mode = case_conv::none; // Fall through.
+            case case_conv::lower:      c = tolower (c, cl); break;
+            case case_conv::none:       break;
+            }
+            return c;
+          };
+
+          auto append_chr = [&r, &conv_chr] (C c)
+          {
+            r.push_back (conv_chr (c));
+          };
+
+          auto append_str = [&r, &mode, &conv_chr] (str_it b, str_it e)
+          {
+            // Optimize for the common case.
+            //
+            if (mode == case_conv::none)
+              r.append (b, e);
+            else
+            {
+              for (str_it i (b); i != e; ++i)
+                r.push_back (conv_chr (*i));
+            }
+          };
+
+          size_t n (fmt.size ());
+          for (size_t i (0); i < n; ++i)
+          {
+            C c (fmt[i]);
+
+            switch (c)
+            {
+            case '$':
+              {
+                // Check if this is a $-based escape sequence. Interpret it
+                // accordingly if that's the case, treat '$' as a regular
+                // character otherwise.
+                //
+                c = fmt[++i]; // '\0' if last.
+
+                switch (c)
+                {
+                case '$': append_chr (c); break;
+                case '&': append_str (m[0].first, m[0].second); break;
+                case '`':
+                  {
+                    append_str (m.prefix ().first, m.prefix ().second);
+                    break;
+                  }
+                case '\'':
+                  {
+                    append_str (m.suffix ().first, m.suffix ().second);
+                    break;
+                  }
+                default:
+                  {
+                    // Check if this is a sub-expression 1-based index ($n or
+                    // $nn). Append the matching substring if that's the case.
+                    // Treat '$' as a regular character otherwise. Index
+                    // greater than the sub-expression count is silently
+                    // ignored.
+                    //
+                    int si (digit (c));
+                    if (si >= 0)
+                    {
+                      int d;
+                      if ((d = digit (fmt[i + 1])) >= 0) // '\0' if last.
+                      {
+                        si = si * 10 + d;
+                        ++i;
+                      }
+                    }
+
+                    if (si > 0)
+                    {
+                      // m[0] refers to the matched substring.
+                      //
+                      if (static_cast<size_t> (si) < m.size ())
+                        append_str (m[si].first, m[si].second);
+                    }
+                    else
+                    {
+                      // Not a $-based escape sequence so treat '$' as a
+                      // regular character.
+                      //
+                      --i;
+                      append_chr ('$');
+                    }
+
+                    break;
+                  }
+                }
+
+                break;
+              }
+            case '\\':
+              {
+                c = fmt[++i]; // '\0' if last.
+
+                switch (c)
+                {
+                case '\\': append_chr (c); break;
+
+                case 'u': mode = case_conv::upper_once; break;
+                case 'l': mode = case_conv::lower_once; break;
+                case 'U': mode = case_conv::upper;      break;
+                case 'L': mode = case_conv::lower;      break;
+                case 'E': mode = case_conv::none;       break;
+                default:
+                  {
+                    // Check if this is a sub-expression 1-based index. Append
+                    // the matching substring if that's the case, Skip '\\'
+                    // otherwise. Index greater than the sub-expression count
+                    // is silently ignored.
+                    //
+                    int si (digit (c));
+                    if (si > 0)
+                    {
+                      // m[0] refers to the matched substring.
+                      //
+                      if (static_cast<size_t> (si) < m.size ())
+                        append_str (m[si].first, m[si].second);
+                    }
+                    else
+                      --i;
+
+                    break;
+                  }
+                }
+
+                break;
+              }
+            default:
+              {
+                // Append a regular character.
+                //
+                append_chr (c);
+                break;
+              }
+            }
+          }
+        }
+      }
+
+      r.append (ub, s.end ()); // Append the rightmost non-matched substring.
+      return r;
+    }
+
+    template <>
+    LIBCUTL_EXPORT string basic_regex<char>::
+    replace (string_type const& s,
+             string_type const& sub,
+             bool first_only) const
+    {
+      std::regex_constants::match_flag_type f (
+        std::regex_constants::format_default);
+
+      if (first_only)
+        f |= std::regex_constants::format_first_only;
+
+      return regex_replace_ex (s, impl_->r, sub, f);
+    }
+
+    template <>
+    LIBCUTL_EXPORT wstring basic_regex<wchar_t>::
+    replace (string_type const& s,
+             string_type const& sub,
+             bool first_only) const
+    {
+      std::regex_constants::match_flag_type f (
+        std::regex_constants::format_default);
+
+      if (first_only)
+        f |= std::regex_constants::format_first_only;
+
+      return regex_replace_ex (s, impl_->r, sub, f);
+    }
+  }
+}