From 8e761289a2446367267c6c0d9a26e734f0f78306 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Wed, 16 Dec 2020 20:29:05 +0300 Subject: Get rid of legacy build systems and rename cutl/ to libcutl/ --- libcutl/re/re.cxx | 462 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 462 insertions(+) create mode 100644 libcutl/re/re.cxx (limited to 'libcutl/re/re.cxx') diff --git a/libcutl/re/re.cxx b/libcutl/re/re.cxx new file mode 100644 index 0000000..524c416 --- /dev/null +++ b/libcutl/re/re.cxx @@ -0,0 +1,462 @@ +// file : libcutl/re/re.cxx +// license : MIT; see accompanying LICENSE file + +#include + +// It is either C++11 regex or Boost. +// +// Note that some compiler/runtime combinations don't have usable C++11 +// regex. For example Clang 3.5 with libstdc++ from GCC 4.9. In this case you +// can fall back to using Boost regex by passing -DLIBCUTL_BOOST_REGEX +// preprocessor option when building libcutl. +// +// @@ Should this rather be a (custom) config.* variable? +// +#if !defined(LIBCUTL_BOOST_REGEX) +# include +# include +# include // size_t +#else +# ifndef LIBCUTL_BOOST_REGEX +# define LIBCUTL_BOOST_REGEX +# endif +# include +#endif + +using namespace std; + +namespace cutl +{ + namespace re + { +#ifdef LIBCUTL_BOOST_REGEX + namespace ire = std::tr1; +#else + namespace ire = std; +#endif + + // + // format_base + // + + char const* format_base:: + what () const noexcept + { + return description_.c_str (); + } + + // + // basic_regex + // + template + struct basic_regex::impl + { + typedef basic_string string_type; + typedef ire::basic_regex regex_type; + typedef typename regex_type::flag_type flag_type; + + impl () {} + impl (regex_type const& r): r (r) {} + impl (string_type const& s, bool icase) + { + flag_type f (ire::regex_constants::ECMAScript); + + if (icase) + f |= ire::regex_constants::icase; + + r.assign (s, f); + } + + regex_type r; + }; + + template <> + LIBCUTL_EXPORT basic_regex:: + ~basic_regex () + { + delete impl_; + } + + template <> + LIBCUTL_EXPORT basic_regex:: + ~basic_regex () + { + delete impl_; + } + + template <> + LIBCUTL_EXPORT basic_regex:: + basic_regex (basic_regex const& r) + : str_ (r.str_), impl_ (new impl (r.impl_->r)) + { + } + + template <> + LIBCUTL_EXPORT basic_regex:: + basic_regex (basic_regex const& r) + : str_ (r.str_), impl_ (new impl (r.impl_->r)) + { + } + + template <> + LIBCUTL_EXPORT basic_regex& basic_regex:: + operator= (basic_regex const& r) + { + string_type tmp (r.str_); + impl_->r = r.impl_->r; + str_.swap (tmp); + return *this; + } + + template <> + LIBCUTL_EXPORT basic_regex& basic_regex:: + operator= (basic_regex const& r) + { + string_type tmp (r.str_); + impl_->r = r.impl_->r; + str_.swap (tmp); + return *this; + } + + template <> + LIBCUTL_EXPORT void basic_regex:: + init (string_type const* s, bool icase) + { + string_type tmp (s == 0 ? string_type () : *s); + + try + { + if (impl_ == 0) + impl_ = s == 0 ? new impl : new impl (*s, icase); + else + { + impl::flag_type f (ire::regex_constants::ECMAScript); + + if (icase) + f |= ire::regex_constants::icase; + + impl_->r.assign (*s, f); + } + } + catch (ire::regex_error const& e) + { + throw basic_format (s == 0 ? "" : *s, e.what ()); + } + + str_.swap (tmp); + } + + template <> + LIBCUTL_EXPORT void basic_regex:: + init (string_type const* s, bool icase) + { + string_type tmp (s == 0 ? string_type () : *s); + + try + { + if (impl_ == 0) + impl_ = s == 0 ? new impl : new impl (*s, icase); + else + { + impl::flag_type f (ire::regex_constants::ECMAScript); + + if (icase) + f |= ire::regex_constants::icase; + + impl_->r.assign (*s, f); + } + } + catch (ire::regex_error const& e) + { + throw basic_format (s == 0 ? L"" : *s, e.what ()); + } + + str_.swap (tmp); + } + + template <> + LIBCUTL_EXPORT bool basic_regex:: + match (string_type const& s) const + { + return ire::regex_match (s, impl_->r); + } + + template <> + LIBCUTL_EXPORT bool basic_regex:: + match (string_type const& s) const + { + return ire::regex_match (s, impl_->r); + } + + template <> + LIBCUTL_EXPORT bool basic_regex:: + search (string_type const& s) const + { + return ire::regex_search (s, impl_->r); + } + + template <> + LIBCUTL_EXPORT bool basic_regex:: + search (string_type const& s) const + { + return ire::regex_search (s, impl_->r); + } + + // If we are using C++11 regex then extend the standard ECMA-262 + // substitution escape sequences with a subset of Perl sequences: + // + // \\, \u, \l, \U, \L, \E, \1, ..., \9 + // + // Notes and limitations: + // + // - The only valid regex_constants flags are match_default, + // format_first_only (format_no_copy can easily be supported). + // + // - If backslash doesn't start any of the listed sequences then it is + // silently dropped and the following character is copied as is. + // + // - The character case conversion is performed according to the global + // C++ locale (which is, unless changed, is the same as C locale and + // both default to the POSIX locale aka "C"). + // + template + static basic_string + regex_replace_ex (const basic_string& s, + const ire::basic_regex& re, + const basic_string& fmt, + ire::regex_constants::match_flag_type flags) + { +#ifdef LIBCUTL_BOOST_REGEX + // Boost regex already does what we need. + // + return ire::regex_replace (s, re, fmt, flags); +#else + using string_type = basic_string; + using str_it = typename string_type::const_iterator; + using regex_it = regex_iterator; + + bool first_only ((flags & regex_constants::format_first_only) == + regex_constants::format_first_only); + + locale cl; // Copy of the global C++ locale. + string_type r; + + // Beginning of the last unmatched substring. + // + str_it ub (s.begin ()); + + for (regex_it b (s.begin (), s.end (), re, flags), i (b), e; i != e; ++i) + { + const match_results& m (*i); + + // Copy the preceeding unmatched substring, save the beginning of the + // one that follows. + // + r.append (ub, m.prefix ().second); + ub = m.suffix ().first; + + if (first_only && i != b) + r.append (m[0].first, m[0].second); // Append matched substring. + else + { + // The standard implementation calls m.format() here. We perform our + // own formatting. + // + // Note that we are using char type literals with the assumption + // that being ASCII characters they will be properly "widened" to + // the corresponding literals of the C template parameter type. + // + auto digit = [] (C c) -> int + { + return c >= '0' && c <= '9' ? c - '0' : -1; + }; + + enum class case_conv {none, upper, lower, upper_once, lower_once} + mode (case_conv::none); + + auto conv_chr = [&mode, &cl] (C c) -> C + { + switch (mode) + { + case case_conv::upper_once: mode = case_conv::none; // Fall through. + case case_conv::upper: c = toupper (c, cl); break; + case case_conv::lower_once: mode = case_conv::none; // Fall through. + case case_conv::lower: c = tolower (c, cl); break; + case case_conv::none: break; + } + return c; + }; + + auto append_chr = [&r, &conv_chr] (C c) + { + r.push_back (conv_chr (c)); + }; + + auto append_str = [&r, &mode, &conv_chr] (str_it b, str_it e) + { + // Optimize for the common case. + // + if (mode == case_conv::none) + r.append (b, e); + else + { + for (str_it i (b); i != e; ++i) + r.push_back (conv_chr (*i)); + } + }; + + size_t n (fmt.size ()); + for (size_t i (0); i < n; ++i) + { + C c (fmt[i]); + + switch (c) + { + case '$': + { + // Check if this is a $-based escape sequence. Interpret it + // accordingly if that's the case, treat '$' as a regular + // character otherwise. + // + c = fmt[++i]; // '\0' if last. + + switch (c) + { + case '$': append_chr (c); break; + case '&': append_str (m[0].first, m[0].second); break; + case '`': + { + append_str (m.prefix ().first, m.prefix ().second); + break; + } + case '\'': + { + append_str (m.suffix ().first, m.suffix ().second); + break; + } + default: + { + // Check if this is a sub-expression 1-based index ($n or + // $nn). Append the matching substring if that's the case. + // Treat '$' as a regular character otherwise. Index + // greater than the sub-expression count is silently + // ignored. + // + int si (digit (c)); + if (si >= 0) + { + int d; + if ((d = digit (fmt[i + 1])) >= 0) // '\0' if last. + { + si = si * 10 + d; + ++i; + } + } + + if (si > 0) + { + // m[0] refers to the matched substring. + // + if (static_cast (si) < m.size ()) + append_str (m[si].first, m[si].second); + } + else + { + // Not a $-based escape sequence so treat '$' as a + // regular character. + // + --i; + append_chr ('$'); + } + + break; + } + } + + break; + } + case '\\': + { + c = fmt[++i]; // '\0' if last. + + switch (c) + { + case '\\': append_chr (c); break; + + case 'u': mode = case_conv::upper_once; break; + case 'l': mode = case_conv::lower_once; break; + case 'U': mode = case_conv::upper; break; + case 'L': mode = case_conv::lower; break; + case 'E': mode = case_conv::none; break; + default: + { + // Check if this is a sub-expression 1-based index. Append + // the matching substring if that's the case, Skip '\\' + // otherwise. Index greater than the sub-expression count + // is silently ignored. + // + int si (digit (c)); + if (si > 0) + { + // m[0] refers to the matched substring. + // + if (static_cast (si) < m.size ()) + append_str (m[si].first, m[si].second); + } + else + --i; + + break; + } + } + + break; + } + default: + { + // Append a regular character. + // + append_chr (c); + break; + } + } + } + } + } + + r.append (ub, s.end ()); // Append the rightmost non-matched substring. + return r; +#endif + } + + template <> + LIBCUTL_EXPORT string basic_regex:: + replace (string_type const& s, + string_type const& sub, + bool first_only) const + { + ire::regex_constants::match_flag_type f ( + ire::regex_constants::format_default); + + if (first_only) + f |= ire::regex_constants::format_first_only; + + return regex_replace_ex (s, impl_->r, sub, f); + } + + template <> + LIBCUTL_EXPORT wstring basic_regex:: + replace (string_type const& s, + string_type const& sub, + bool first_only) const + { + ire::regex_constants::match_flag_type f ( + ire::regex_constants::format_default); + + if (first_only) + f |= ire::regex_constants::format_first_only; + + return regex_replace_ex (s, impl_->r, sub, f); + } + } +} -- cgit v1.1